849 files changed, 40480 insertions, 9849 deletions
diff --git a/test/CodeGen/AArch64/adc.ll b/test/CodeGen/AArch64/adc.ll
new file mode 100644
index 000000000000..7cb373232a2c
--- /dev/null
+++ b/test/CodeGen/AArch64/adc.ll
@@ -0,0 +1,54 @@
+; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu | FileCheck %s
+
+define i128 @test_simple(i128 %a, i128 %b, i128 %c) {
+; CHECK: test_simple:
+
+  %valadd = add i128 %a, %b
+; CHECK: adds [[ADDLO:x[0-9]+]], x0, x2
+; CHECK-NEXT: adcs [[ADDHI:x[0-9]+]], x1, x3
+
+  %valsub = sub i128 %valadd, %c
+; CHECK: subs x0, [[ADDLO]], x4
+; CHECK: sbcs x1, [[ADDHI]], x5
+
+  ret i128 %valsub
+; CHECK: ret
+}
+
+define i128 @test_imm(i128 %a) {
+; CHECK: test_imm:
+
+  %val = add i128 %a, 12
+; CHECK: adds x0, x0, #12
+; CHECK: adcs x1, x1, {{x[0-9]|xzr}}
+
+  ret i128 %val
+; CHECK: ret
+}
+
+define i128 @test_shifted(i128 %a, i128 %b) {
+; CHECK: test_shifted:
+
+  %rhs = shl i128 %b, 45
+
+  %val = add i128 %a, %rhs
+; CHECK: adds x0, x0, x2, lsl #45
+; CHECK: adcs x1, x1, {{x[0-9]}}
+
+  ret i128 %val
+; CHECK: ret
+}
+
+define i128 @test_extended(i128 %a, i16 %b) {
+; CHECK: test_extended:
+
+  %ext = sext i16 %b to i128
+  %rhs = shl i128 %ext, 3
+
+  %val = add i128 %a, %rhs
+; CHECK: adds x0, x0, w2, sxth #3
+; CHECK: adcs x1, x1, {{x[0-9]}}
+
+  ret i128 %val
+; CHECK: ret
+}
diff --git a/test/CodeGen/AArch64/addsub-shifted.ll b/test/CodeGen/AArch64/addsub-shifted.ll
new file mode 100644
index 000000000000..f2c74f6952b0
--- /dev/null
+++ b/test/CodeGen/AArch64/addsub-shifted.ll
@@ -0,0 +1,295 @@
+; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu | FileCheck %s
+
+@var32 = global i32 0
+@var64 = global i64 0
+
+define void @test_lsl_arith(i32 %lhs32, i32 %rhs32, i64 %lhs64, i64 %rhs64) {
+; CHECK: test_lsl_arith:
+
+  %rhs1 = load volatile i32* @var32
+  %shift1 = shl i32 %rhs1, 18
+  %val1 = add i32 %lhs32, %shift1
+  store volatile i32 %val1, i32* @var32
+; CHECK: add {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, lsl #18
+
+  %rhs2 = load volatile i32* @var32
+  %shift2 = shl i32 %rhs2, 31
+  %val2 = add i32 %shift2, %lhs32
+  store volatile i32 %val2, i32* @var32
+; CHECK: add {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, lsl #31
+
+  %rhs3 = load volatile i32* @var32
+  %shift3 = shl i32 %rhs3, 5
+  %val3 = sub i32 %lhs32, %shift3
+  store volatile i32 %val3, i32* @var32
+; CHECK: sub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, lsl #5
+
+; Subtraction is not commutative!
+  %rhs4 = load volatile i32* @var32
+  %shift4 = shl i32 %rhs4, 19
+  %val4 = sub i32 %shift4, %lhs32
+  store volatile i32 %val4, i32* @var32
+; CHECK-NOT: sub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, lsl #19
+
+  %lhs4a = load volatile i32* @var32
+  %shift4a = shl i32 %lhs4a, 15
+  %val4a = sub i32 0, %shift4a
+  store volatile i32 %val4a, i32* @var32
+; CHECK: sub {{w[0-9]+}}, wzr, {{w[0-9]+}}, lsl #15
+
+  %rhs5 = load volatile i64* @var64
+  %shift5 = shl i64 %rhs5, 18
+  %val5 = add i64 %lhs64, %shift5
+  store volatile i64 %val5, i64* @var64
+; CHECK: add {{x[0-9]+}}, {{x[0-9]+}}, {{x[0-9]+}}, lsl #18
+
+  %rhs6 = load volatile i64* @var64
+  %shift6 = shl i64 %rhs6, 31
+  %val6 = add i64 %shift6, %lhs64
+  store volatile i64 %val6, i64* @var64
+; CHECK: add {{x[0-9]+}}, {{x[0-9]+}}, {{x[0-9]+}}, lsl #31
+
+  %rhs7 = load volatile i64* @var64
+  %shift7 = shl i64 %rhs7, 5
+  %val7 = sub i64 %lhs64, %shift7
+  store volatile i64 %val7, i64* @var64
+; CHECK: sub {{x[0-9]+}}, {{x[0-9]+}}, {{x[0-9]+}}, lsl #5
+
+; Subtraction is not commutative!
+  %rhs8 = load volatile i64* @var64
+  %shift8 = shl i64 %rhs8, 19
+  %val8 = sub i64 %shift8, %lhs64
+  store volatile i64 %val8, i64* @var64
+; CHECK-NOT: sub {{x[0-9]+}}, {{x[0-9]+}}, {{x[0-9]+}}, lsl #19
+
+  %lhs8a = load volatile i64* @var64
+  %shift8a = shl i64 %lhs8a, 60
+  %val8a = sub i64 0, %shift8a
+  store volatile i64 %val8a, i64* @var64
+; CHECK: sub {{x[0-9]+}}, xzr, {{x[0-9]+}}, lsl #60
+
+  ret void
+; CHECK: ret
+}
+
+define void @test_lsr_arith(i32 %lhs32, i32 %rhs32, i64 %lhs64, i64 %rhs64) {
+; CHECK: test_lsr_arith:
+
+  %shift1 = lshr i32 %rhs32, 18
+  %val1 = add i32 %lhs32, %shift1
+  store volatile i32 %val1, i32* @var32
+; CHECK: add {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, lsr #18
+
+  %shift2 = lshr i32 %rhs32, 31
+  %val2 = add i32 %shift2, %lhs32
+  store volatile i32 %val2, i32* @var32
+; CHECK: add {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, lsr #31
+
+  %shift3 = lshr i32 %rhs32, 5
+  %val3 = sub i32 %lhs32, %shift3
+  store volatile i32 %val3, i32* @var32
+; CHECK: sub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, lsr #5
+
+; Subtraction is not commutative!
+  %shift4 = lshr i32 %rhs32, 19
+  %val4 = sub i32 %shift4, %lhs32
+  store volatile i32 %val4, i32* @var32
+; CHECK-NOT: sub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, lsr #19
+
+  %shift4a = lshr i32 %lhs32, 15
+  %val4a = sub i32 0, %shift4a
+  store volatile i32 %val4a, i32* @var32
+; CHECK: sub {{w[0-9]+}}, wzr, {{w[0-9]+}}, lsr #15
+
+  %shift5 = lshr i64 %rhs64, 18
+  %val5 = add i64 %lhs64, %shift5
+  store volatile i64 %val5, i64* @var64
+; CHECK: add {{x[0-9]+}}, {{x[0-9]+}}, {{x[0-9]+}}, lsr #18
+
+  %shift6 = lshr i64 %rhs64, 31
+  %val6 = add i64 %shift6, %lhs64
+  store volatile i64 %val6, i64* @var64
+; CHECK: add {{x[0-9]+}}, {{x[0-9]+}}, {{x[0-9]+}}, lsr #31
+
+  %shift7 = lshr i64 %rhs64, 5
+  %val7 = sub i64 %lhs64, %shift7
+  store volatile i64 %val7, i64* @var64
+; CHECK: sub {{x[0-9]+}}, {{x[0-9]+}}, {{x[0-9]+}}, lsr #5
+
+; Subtraction is not commutative!
+  %shift8 = lshr i64 %rhs64, 19
+  %val8 = sub i64 %shift8, %lhs64
+  store volatile i64 %val8, i64* @var64
+; CHECK-NOT: sub {{x[0-9]+}}, {{x[0-9]+}}, {{x[0-9]+}}, lsr #19
+
+  %shift8a = lshr i64 %lhs64, 45
+  %val8a = sub i64 0, %shift8a
+  store volatile i64 %val8a, i64* @var64
+; CHECK: sub {{x[0-9]+}}, xzr, {{x[0-9]+}}, lsr #45
+
+  ret void
+; CHECK: ret
+}
+
+define void @test_asr_arith(i32 %lhs32, i32 %rhs32, i64 %lhs64, i64 %rhs64) {
+; CHECK: test_asr_arith:
+
+  %shift1 = ashr i32 %rhs32, 18
+  %val1 = add i32 %lhs32, %shift1
+  store volatile i32 %val1, i32* @var32
+; CHECK: add {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, asr #18
+
+  %shift2 = ashr i32 %rhs32, 31
+  %val2 = add i32 %shift2, %lhs32
+  store volatile i32 %val2, i32* @var32
+; CHECK: add {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, asr #31
+
+  %shift3 = ashr i32 %rhs32, 5
+  %val3 = sub i32 %lhs32, %shift3
+  store volatile i32 %val3, i32* @var32
+; CHECK: sub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, asr #5
+
+; Subtraction is not commutative!
+  %shift4 = ashr i32 %rhs32, 19
+  %val4 = sub i32 %shift4, %lhs32
+  store volatile i32 %val4, i32* @var32
+; CHECK-NOT: sub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, asr #19
+
+  %shift4a = ashr i32 %lhs32, 15
+  %val4a = sub i32 0, %shift4a
+  store volatile i32 %val4a, i32* @var32
+; CHECK: sub {{w[0-9]+}}, wzr, {{w[0-9]+}}, asr #15
+
+  %shift5 = ashr i64 %rhs64, 18
+  %val5 = add i64 %lhs64, %shift5
+  store volatile i64 %val5, i64* @var64
+; CHECK: add {{x[0-9]+}}, {{x[0-9]+}}, {{x[0-9]+}}, asr #18
+
+  %shift6 = ashr i64 %rhs64, 31
+  %val6 = add i64 %shift6, %lhs64
+  store volatile i64 %val6, i64* @var64
+; CHECK: add {{x[0-9]+}}, {{x[0-9]+}}, {{x[0-9]+}}, asr #31
+
+  %shift7 = ashr i64 %rhs64, 5
+  %val7 = sub i64 %lhs64, %shift7
+  store volatile i64 %val7, i64* @var64
+; CHECK: sub {{x[0-9]+}}, {{x[0-9]+}}, {{x[0-9]+}}, asr #5
+
+; Subtraction is not commutative!
+  %shift8 = ashr i64 %rhs64, 19
+  %val8 = sub i64 %shift8, %lhs64
+  store volatile i64 %val8, i64* @var64
+; CHECK-NOT: sub {{x[0-9]+}}, {{x[0-9]+}}, {{x[0-9]+}}, asr #19
+
+  %shift8a = ashr i64 %lhs64, 45
+  %val8a = sub i64 0, %shift8a
+  store volatile i64 %val8a, i64* @var64
+; CHECK: sub {{x[0-9]+}}, xzr, {{x[0-9]+}}, asr #45
+
+  ret void
+; CHECK: ret
+}
+
+define i32 @test_cmp(i32 %lhs32, i32 %rhs32, i64 %lhs64, i64 %rhs64) {
+; CHECK: test_cmp:
+
+  %shift1 = shl i32 %rhs32, 13
+  %tst1 = icmp uge i32 %lhs32, %shift1
+  br i1 %tst1, label %t2, label %end
+; CHECK: cmp {{w[0-9]+}}, {{w[0-9]+}}, lsl #13
+
+t2:
+  %shift2 = lshr i32 %rhs32, 20
+  %tst2 = icmp ne i32 %lhs32, %shift2
+  br i1 %tst2, label %t3, label %end
+; CHECK: cmp {{w[0-9]+}}, {{w[0-9]+}}, lsr #20
+
+t3:
+  %shift3 = ashr i32 %rhs32, 9
+  %tst3 = icmp ne i32 %lhs32, %shift3
+  br i1 %tst3, label %t4, label %end
+; CHECK: cmp {{w[0-9]+}}, {{w[0-9]+}}, asr #9
+
+t4:
+  %shift4 = shl i64 %rhs64, 43
+  %tst4 = icmp uge i64 %lhs64, %shift4
+  br i1 %tst4, label %t5, label %end
+; CHECK: cmp {{x[0-9]+}}, {{x[0-9]+}}, lsl #43
+
+t5:
+  %shift5 = lshr i64 %rhs64, 20
+  %tst5 = icmp ne i64 %lhs64, %shift5
+  br i1 %tst5, label %t6, label %end
+; CHECK: cmp {{x[0-9]+}}, {{x[0-9]+}}, lsr #20
+
+t6:
+  %shift6 = ashr i64 %rhs64, 59
+  %tst6 = icmp ne i64 %lhs64, %shift6
+  br i1 %tst6, label %t7, label %end
+; CHECK: cmp {{x[0-9]+}}, {{x[0-9]+}}, asr #59
+
+t7:
+  ret i32 1
+end:
+
+  ret i32 0
+; CHECK: ret
+}
+
+define i32 @test_cmn(i32 %lhs32, i32 %rhs32, i64 %lhs64, i64 %rhs64) {
+; CHECK: test_cmn:
+
+  %shift1 = shl i32 %rhs32, 13
+  %val1 = sub i32 0, %shift1
+  %tst1 = icmp uge i32 %lhs32, %val1
+  br i1 %tst1, label %t2, label %end
+  ; Important that this isn't lowered to a cmn instruction because if %rhs32 ==
+  ; 0 then the results will differ.
+; CHECK: sub [[RHS:w[0-9]+]], wzr, {{w[0-9]+}}, lsl #13
+; CHECK: cmp {{w[0-9]+}}, [[RHS]]
+
+t2:
+  %shift2 = lshr i32 %rhs32, 20
+  %val2 = sub i32 0, %shift2
+  %tst2 = icmp ne i32 %lhs32, %val2
+  br i1 %tst2, label %t3, label %end
+; CHECK: cmn {{w[0-9]+}}, {{w[0-9]+}}, lsr #20
+
+t3:
+  %shift3 = ashr i32 %rhs32, 9
+  %val3 = sub i32 0, %shift3
+  %tst3 = icmp eq i32 %lhs32, %val3
+  br i1 %tst3, label %t4, label %end
+; CHECK: cmn {{w[0-9]+}}, {{w[0-9]+}}, asr #9
+
+t4:
+  %shift4 = shl i64 %rhs64, 43
+  %val4 = sub i64 0, %shift4
+  %tst4 = icmp slt i64 %lhs64, %val4
+  br i1 %tst4, label %t5, label %end
+  ; Again, it's important that cmn isn't used here in case %rhs64 == 0.
+; CHECK: sub [[RHS:x[0-9]+]], xzr, {{x[0-9]+}}, lsl #43
+; CHECK: cmp {{x[0-9]+}}, [[RHS]]
+
+t5:
+  %shift5 = lshr i64 %rhs64, 20
+  %val5 = sub i64 0, %shift5
+  %tst5 = icmp ne i64 %lhs64, %val5
+  br i1 %tst5, label %t6, label %end
+; CHECK: cmn {{x[0-9]+}}, {{x[0-9]+}}, lsr #20
+
+t6:
+  %shift6 = ashr i64 %rhs64, 59
+  %val6 = sub i64 0, %shift6
+  %tst6 = icmp ne i64 %lhs64, %val6
+  br i1 %tst6, label %t7, label %end
+; CHECK: cmn {{x[0-9]+}}, {{x[0-9]+}}, asr #59
+
+t7:
+  ret i32 1
+end:
+
+  ret i32 0
+; CHECK: ret
+}
+
diff --git a/test/CodeGen/AArch64/addsub.ll b/test/CodeGen/AArch64/addsub.ll
new file mode 100644
index 000000000000..5148807163c9
--- /dev/null
+++ b/test/CodeGen/AArch64/addsub.ll
@@ -0,0 +1,127 @@
+; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu | FileCheck %s
+
+; Note that this should be refactored (for efficiency if nothing else)
+; when the PCS is implemented so we don't have to worry about the
+; loads and stores.
+
+@var_i32 = global i32 42
+@var_i64 = global i64 0
+
+; Add pure 12-bit immediates:
+define void @add_small() {
+; CHECK: add_small:
+
+; CHECK: add {{w[0-9]+}}, {{w[0-9]+}}, #4095
+  %val32 = load i32* @var_i32
+  %newval32 = add i32 %val32, 4095
+  store i32 %newval32, i32* @var_i32
+
+; CHECK: add {{x[0-9]+}}, {{x[0-9]+}}, #52
+  %val64 = load i64* @var_i64
+  %newval64 = add i64 %val64, 52
+  store i64 %newval64, i64* @var_i64
+
+  ret void
+}
+
+; Add 12-bit immediates, shifted left by 12 bits
+define void @add_med() {
+; CHECK: add_med:
+
+; CHECK: add {{w[0-9]+}}, {{w[0-9]+}}, #3567, lsl #12
+  %val32 = load i32* @var_i32
+  %newval32 = add i32 %val32, 14610432 ; =0xdef000
+  store i32 %newval32, i32* @var_i32
+
+; CHECK: add {{x[0-9]+}}, {{x[0-9]+}}, #4095, lsl #12
+  %val64 = load i64* @var_i64
+  %newval64 = add i64 %val64, 16773120 ; =0xfff000
+  store i64 %newval64, i64* @var_i64
+
+  ret void
+}
+
+; Subtract 12-bit immediates
+define void @sub_small() {
+; CHECK: sub_small:
+
+; CHECK: sub {{w[0-9]+}}, {{w[0-9]+}}, #4095
+  %val32 = load i32* @var_i32
+  %newval32 = sub i32 %val32, 4095
+  store i32 %newval32, i32* @var_i32
+
+; CHECK: sub {{x[0-9]+}}, {{x[0-9]+}}, #52
+  %val64 = load i64* @var_i64
+  %newval64 = sub i64 %val64, 52
+  store i64 %newval64, i64* @var_i64
+
+  ret void
+}
+
+; Subtract 12-bit immediates, shifted left by 12 bits
+define void @sub_med() {
+; CHECK: sub_med:
+
+; CHECK: sub {{w[0-9]+}}, {{w[0-9]+}}, #3567, lsl #12
+  %val32 = load i32* @var_i32
+  %newval32 = sub i32 %val32, 14610432 ; =0xdef000
+  store i32 %newval32, i32* @var_i32
+
+; CHECK: sub {{x[0-9]+}}, {{x[0-9]+}}, #4095, lsl #12
+  %val64 = load i64* @var_i64
+  %newval64 = sub i64 %val64, 16773120 ; =0xfff000
+  store i64 %newval64, i64* @var_i64
+
+  ret void
+}
+
+define void @testing() {
+; CHECK: testing:
+  %val = load i32* @var_i32
+
+; CHECK: cmp {{w[0-9]+}}, #4095
+; CHECK: b.ne .LBB4_6
+  %cmp_pos_small = icmp ne i32 %val, 4095
+  br i1 %cmp_pos_small, label %ret, label %test2
+
+test2:
+; CHECK: cmp {{w[0-9]+}}, #3567, lsl #12
+; CHECK: b.lo .LBB4_6
+  %newval2 = add i32 %val, 1
+  store i32 %newval2, i32* @var_i32
+  %cmp_pos_big = icmp ult i32 %val, 14610432
+  br i1 %cmp_pos_big, label %ret, label %test3
+
+test3:
+; CHECK: cmp {{w[0-9]+}}, #123
+; CHECK: b.lt .LBB4_6
+  %newval3 = add i32 %val, 2
+  store i32 %newval3, i32* @var_i32
+  %cmp_pos_slt = icmp slt i32 %val, 123
+  br i1 %cmp_pos_slt, label %ret, label %test4
+
+test4:
+; CHECK: cmp {{w[0-9]+}}, #321
+; CHECK: b.gt .LBB4_6
+  %newval4 = add i32 %val, 3
+  store i32 %newval4, i32* @var_i32
+  %cmp_pos_sgt = icmp sgt i32 %val, 321
+  br i1 %cmp_pos_sgt, label %ret, label %test5
+
+test5:
+; CHECK: cmn {{w[0-9]+}}, #444
+; CHECK: b.gt .LBB4_6
+  %newval5 = add i32 %val, 4
+  store i32 %newval5, i32* @var_i32
+  %cmp_neg_uge = icmp sgt i32 %val, -444
+  br i1 %cmp_neg_uge, label %ret, label %test6
+
+test6:
+  %newval6 = add i32 %val, 5
+  store i32 %newval6, i32* @var_i32
+  ret void
+
+ret:
+  ret void
+}
+; TODO: adds/subs
diff --git a/test/CodeGen/AArch64/addsub_ext.ll b/test/CodeGen/AArch64/addsub_ext.ll
new file mode 100644
index 000000000000..2dd16626ea9f
--- /dev/null
+++ b/test/CodeGen/AArch64/addsub_ext.ll
@@ -0,0 +1,189 @@
+; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu | FileCheck %s
+
+@var8 = global i8 0
+@var16 = global i16 0
+@var32 = global i32 0
+@var64 = global i64 0
+
+define void @addsub_i8rhs() {
+; CHECK: addsub_i8rhs:
+    %val8_tmp = load i8* @var8
+    %lhs32 = load i32* @var32
+    %lhs64 = load i64* @var64
+
+    ; Need this to prevent extension upon load and give a vanilla i8 operand.
+    %val8 = add i8 %val8_tmp, 123
+
+
+; Zero-extending to 32-bits
+    %rhs32_zext = zext i8 %val8 to i32
+    %res32_zext = add i32 %lhs32, %rhs32_zext
+    store volatile i32 %res32_zext, i32* @var32
+; CHECK: add {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, uxtb
+
+   %rhs32_zext_shift = shl i32 %rhs32_zext, 3
+   %res32_zext_shift = add i32 %lhs32, %rhs32_zext_shift
+   store volatile i32 %res32_zext_shift, i32* @var32
+; CHECK: add {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, uxtb #3
+
+
+; Zero-extending to 64-bits
+    %rhs64_zext = zext i8 %val8 to i64
+    %res64_zext = add i64 %lhs64, %rhs64_zext
+    store volatile i64 %res64_zext, i64* @var64
+; CHECK: add {{x[0-9]+}}, {{x[0-9]+}}, {{w[0-9]+}}, uxtb
+
+   %rhs64_zext_shift = shl i64 %rhs64_zext, 1
+   %res64_zext_shift = add i64 %lhs64, %rhs64_zext_shift
+   store volatile i64 %res64_zext_shift, i64* @var64
+; CHECK: add {{x[0-9]+}}, {{x[0-9]+}}, {{w[0-9]+}}, uxtb #1
+
+; Sign-extending to 32-bits
+    %rhs32_sext = sext i8 %val8 to i32
+    %res32_sext = add i32 %lhs32, %rhs32_sext
+    store volatile i32 %res32_sext, i32* @var32
+; CHECK: add {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, sxtb
+
+   %rhs32_sext_shift = shl i32 %rhs32_sext, 1
+   %res32_sext_shift = add i32 %lhs32, %rhs32_sext_shift
+   store volatile i32 %res32_sext_shift, i32* @var32
+; CHECK: add {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, sxtb #1
+
+; Sign-extending to 64-bits
+    %rhs64_sext = sext i8 %val8 to i64
+    %res64_sext = add i64 %lhs64, %rhs64_sext
+    store volatile i64 %res64_sext, i64* @var64
+; CHECK: add {{x[0-9]+}}, {{x[0-9]+}}, {{w[0-9]+}}, sxtb
+
+   %rhs64_sext_shift = shl i64 %rhs64_sext, 4
+   %res64_sext_shift = add i64 %lhs64, %rhs64_sext_shift
+   store volatile i64 %res64_sext_shift, i64* @var64
+; CHECK: add {{x[0-9]+}}, {{x[0-9]+}}, {{w[0-9]+}}, sxtb #4
+
+
+; CMP variants
+    %tst = icmp slt i32 %lhs32, %rhs32_zext
+    br i1 %tst, label %end, label %test2
+; CHECK: cmp {{w[0-9]+}}, {{w[0-9]+}}, uxtb
+
+test2:
+    %cmp_sext = sext i8 %val8 to i64
+    %tst2 = icmp eq i64 %lhs64, %cmp_sext
+    br i1 %tst2, label %other, label %end
+; CHECK: cmp {{x[0-9]+}}, {{w[0-9]+}}, sxtb
+
+other:
+    store volatile i32 %lhs32, i32* @var32
+    ret void
+
+end:
+    ret void
+}
+
+define void @addsub_i16rhs() {
+; CHECK: addsub_i16rhs:
+    %val16_tmp = load i16* @var16
+    %lhs32 = load i32* @var32
+    %lhs64 = load i64* @var64
+
+    ; Need this to prevent extension upon load and give a vanilla i16 operand.
+    %val16 = add i16 %val16_tmp, 123
+
+
+; Zero-extending to 32-bits
+    %rhs32_zext = zext i16 %val16 to i32
+    %res32_zext = add i32 %lhs32, %rhs32_zext
+    store volatile i32 %res32_zext, i32* @var32
+; CHECK: add {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, uxth
+
+   %rhs32_zext_shift = shl i32 %rhs32_zext, 3
+   %res32_zext_shift = add i32 %lhs32, %rhs32_zext_shift
+   store volatile i32 %res32_zext_shift, i32* @var32
+; CHECK: add {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, uxth #3
+
+
+; Zero-extending to 64-bits
+    %rhs64_zext = zext i16 %val16 to i64
+    %res64_zext = add i64 %lhs64, %rhs64_zext
+    store volatile i64 %res64_zext, i64* @var64
+; CHECK: add {{x[0-9]+}}, {{x[0-9]+}}, {{w[0-9]+}}, uxth
+
+   %rhs64_zext_shift = shl i64 %rhs64_zext, 1
+   %res64_zext_shift = add i64 %lhs64, %rhs64_zext_shift
+   store volatile i64 %res64_zext_shift, i64* @var64
+; CHECK: add {{x[0-9]+}}, {{x[0-9]+}}, {{w[0-9]+}}, uxth #1
+
+; Sign-extending to 32-bits
+    %rhs32_sext = sext i16 %val16 to i32
+    %res32_sext = add i32 %lhs32, %rhs32_sext
+    store volatile i32 %res32_sext, i32* @var32
+; CHECK: add {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, sxth
+
+   %rhs32_sext_shift = shl i32 %rhs32_sext, 1
+   %res32_sext_shift = add i32 %lhs32, %rhs32_sext_shift
+   store volatile i32 %res32_sext_shift, i32* @var32
+; CHECK: add {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, sxth #1
+
+; Sign-extending to 64-bits
+    %rhs64_sext = sext i16 %val16 to i64
+    %res64_sext = add i64 %lhs64, %rhs64_sext
+    store volatile i64 %res64_sext, i64* @var64
+; CHECK: add {{x[0-9]+}}, {{x[0-9]+}}, {{w[0-9]+}}, sxth
+
+   %rhs64_sext_shift = shl i64 %rhs64_sext, 4
+   %res64_sext_shift = add i64 %lhs64, %rhs64_sext_shift
+   store volatile i64 %res64_sext_shift, i64* @var64
+; CHECK: add {{x[0-9]+}}, {{x[0-9]+}}, {{w[0-9]+}}, sxth #4
+
+
+; CMP variants
+    %tst = icmp slt i32 %lhs32, %rhs32_zext
+    br i1 %tst, label %end, label %test2
+; CHECK: cmp {{w[0-9]+}}, {{w[0-9]+}}, uxth
+
+test2:
+    %cmp_sext = sext i16 %val16 to i64
+    %tst2 = icmp eq i64 %lhs64, %cmp_sext
+    br i1 %tst2, label %other, label %end
+; CHECK: cmp {{x[0-9]+}}, {{w[0-9]+}}, sxth
+
+other:
+    store volatile i32 %lhs32, i32* @var32
+    ret void
+
+end:
+    ret void
+}
+
+; N.b. we could probably check more here ("add w2, w3, w1, uxtw" for
+; example), but the remaining instructions are probably not idiomatic
+; in the face of "add/sub (shifted register)" so I don't intend to.
+define void @addsub_i32rhs() {
+; CHECK: addsub_i32rhs:
+    %val32_tmp = load i32* @var32
+    %lhs64 = load i64* @var64
+
+    %val32 = add i32 %val32_tmp, 123
+
+    %rhs64_zext = zext i32 %val32 to i64
+    %res64_zext = add i64 %lhs64, %rhs64_zext
+    store volatile i64 %res64_zext, i64* @var64
+; CHECK: add {{x[0-9]+}}, {{x[0-9]+}}, {{w[0-9]+}}, uxtw
+
+    %rhs64_zext_shift = shl i64 %rhs64_zext, 2
+    %res64_zext_shift = add i64 %lhs64, %rhs64_zext_shift
+    store volatile i64 %res64_zext_shift, i64* @var64
+; CHECK: add {{x[0-9]+}}, {{x[0-9]+}}, {{w[0-9]+}}, uxtw #2
+
+    %rhs64_sext = sext i32 %val32 to i64
+    %res64_sext = add i64 %lhs64, %rhs64_sext
+    store volatile i64 %res64_sext, i64* @var64
+; CHECK: add {{x[0-9]+}}, {{x[0-9]+}}, {{w[0-9]+}}, sxtw
+
+    %rhs64_sext_shift = shl i64 %rhs64_sext, 2
+    %res64_sext_shift = add i64 %lhs64, %rhs64_sext_shift
+    store volatile i64 %res64_sext_shift, i64* @var64
+; CHECK: add {{x[0-9]+}}, {{x[0-9]+}}, {{w[0-9]+}}, sxtw #2
+
+    ret void
+}
+\ No newline at end of file
diff --git a/test/CodeGen/AArch64/adrp-relocation.ll b/test/CodeGen/AArch64/adrp-relocation.ll
new file mode 100644
index 000000000000..c33b442624a5
--- /dev/null
+++ b/test/CodeGen/AArch64/adrp-relocation.ll
@@ -0,0 +1,35 @@
+; RUN: llc -mtriple=aarch64-none-linux-gnu -verify-machineinstrs -filetype=obj < %s | elf-dump | FileCheck %s
+
+define i64 @testfn() nounwind {
+entry:
+  ret i64 0
+}
+
+define i64 @foo() nounwind {
+entry:
+  %bar = alloca i64 ()*, align 8
+  store i64 ()* @testfn, i64 ()** %bar, align 8
+  %call = call i64 @testfn()
+  ret i64 %call
+}
+
+; The above should produce an ADRP/ADD pair to calculate the address of
+; testfn. The important point is that LLVM shouldn't think it can deal with the
+; relocation on the ADRP itself (even though it knows everything about the
+; relative offsets of testfn and foo) because its value depends on where this
+; object file's .text section gets relocated in memory.
+
+; CHECK: .rela.text
+
+; CHECK: # Relocation 0
+; CHECK-NEXT: (('r_offset', 0x0000000000000010)
+; CHECK-NEXT:  ('r_sym', 0x00000007)
+; CHECK-NEXT:  ('r_type', 0x00000113)
+; CHECK-NEXT:  ('r_addend', 0x0000000000000000)
+; CHECK-NEXT: ),
+; CHECK-NEXT:  Relocation 1
+; CHECK-NEXT: (('r_offset', 0x0000000000000014)
+; CHECK-NEXT:  ('r_sym', 0x00000007)
+; CHECK-NEXT:  ('r_type', 0x00000115)
+; CHECK-NEXT:  ('r_addend', 0x0000000000000000)
+; CHECK-NEXT: ),
diff --git a/test/CodeGen/AArch64/alloca.ll b/test/CodeGen/AArch64/alloca.ll
new file mode 100644
index 000000000000..c62edf6503c6
--- /dev/null
+++ b/test/CodeGen/AArch64/alloca.ll
@@ -0,0 +1,134 @@
+; RUN: llc -mtriple=aarch64-none-linux-gnu -verify-machineinstrs < %s | FileCheck %s
+
+declare void @use_addr(i8*)
+
+define void @test_simple_alloca(i64 %n) {
+; CHECK: test_simple_alloca:
+
+  %buf = alloca i8, i64 %n
+  ; Make sure we align the stack change to 16 bytes:
+; CHECK: add [[SPDELTA:x[0-9]+]], x0, #15
+; CHECK: and x0, [[SPDELTA]], #0xfffffffffffffff0
+
+  ; Make sure we change SP. It would be surprising if anything but x0 were used
+  ; for the final sp, but it could be if it was then moved into x0.
+; CHECK: mov [[TMP:x[0-9]+]], sp
+; CHECK: sub x0, [[TMP]], [[SPDELTA]]
+; CHECK: mov sp, x0
+
+  call void @use_addr(i8* %buf)
+; CHECK: bl use_addr
+
+  ret void
+  ; Make sure epilogue restores sp from fp
+; CHECK: sub sp, x29, #16
+; CHECK: ldp x29, x30, [sp, #16]
+; CHECK: add sp, sp, #32
+; CHECK: ret
+}
+
+declare void @use_addr_loc(i8*, i64*)
+
+define i64 @test_alloca_with_local(i64 %n) {
+; CHECK: test_alloca_with_local:
+; CHECK: sub sp, sp, #32
+; CHECK: stp x29, x30, [sp, #16]
+
+  %loc = alloca i64
+  %buf = alloca i8, i64 %n
+  ; Make sure we align the stack change to 16 bytes:
+; CHECK: add [[SPDELTA:x[0-9]+]], x0, #15
+; CHECK: and x0, [[SPDELTA]], #0xfffffffffffffff0
+
+  ; Make sure we change SP. It would be surprising if anything but x0 were used
+  ; for the final sp, but it could be if it was then moved into x0.
+; CHECK: mov [[TMP:x[0-9]+]], sp
+; CHECK: sub x0, [[TMP]], [[SPDELTA]]
+; CHECK: mov sp, x0
+
+  ; Obviously suboptimal code here, but it to get &local in x1
+; CHECK: sub [[TMP:x[0-9]+]], x29, [[LOC_FROM_FP:#[0-9]+]]
+; CHECK: add x1, [[TMP]], #0
+
+  call void @use_addr_loc(i8* %buf, i64* %loc)
+; CHECK: bl use_addr
+
+  %val = load i64* %loc
+; CHECK: sub x[[TMP:[0-9]+]], x29, [[LOC_FROM_FP]]
+; CHECK: ldr x0, [x[[TMP]]]
+
+  ret i64 %val
+  ; Make sure epilogue restores sp from fp
+; CHECK: sub sp, x29, #16
+; CHECK: ldp x29, x30, [sp, #16]
+; CHECK: add sp, sp, #32
+; CHECK: ret
+}
+
+define void @test_variadic_alloca(i64 %n, ...) {
+; CHECK: test_variadic_alloca:
+
+; CHECK: sub     sp, sp, #208
+; CHECK: stp     x29, x30, [sp, #192]
+; CHECK: add     x29, sp, #192
+; CHECK: sub     [[TMP:x[0-9]+]], x29, #192
+; CHECK: add     x8, [[TMP]], #0
+; CHECK: str     q7, [x8, #112]
+; [...]
+; CHECK: str     q1, [x8, #16]
+
+  %addr = alloca i8, i64 %n
+
+  call void @use_addr(i8* %addr)
+; CHECK: bl use_addr
+
+  ret void
+; CHECK: sub sp, x29, #192
+; CHECK: ldp x29, x30, [sp, #192]
+; CHECK: add sp, sp, #208
+}
+
+define void @test_alloca_large_frame(i64 %n) {
+; CHECK: test_alloca_large_frame:
+
+; CHECK: sub sp, sp, #496
+; CHECK: stp x29, x30, [sp, #480]
+; CHECK: add x29, sp, #480
+; CHECK: sub sp, sp, #48
+; CHECK: sub sp, sp, #1953, lsl #12
+
+  %addr1 = alloca i8, i64 %n
+  %addr2 = alloca i64, i64 1000000
+
+  call void @use_addr_loc(i8* %addr1, i64* %addr2)
+
+  ret void
+; CHECK: sub sp, x29, #480
+; CHECK: ldp x29, x30, [sp, #480]
+; CHECK: add sp, sp, #496
+}
+
+declare i8* @llvm.stacksave()
+declare void @llvm.stackrestore(i8*)
+
+define void @test_scoped_alloca(i64 %n) {
+; CHECK: test_scoped_alloca
+; CHECK: sub sp, sp, #32
+
+  %sp = call i8* @llvm.stacksave()
+; CHECK: mov [[SAVED_SP:x[0-9]+]], sp
+
+  %addr = alloca i8, i64 %n
+; CHECK: and [[SPDELTA:x[0-9]+]], {{x[0-9]+}}, #0xfffffffffffffff0
+; CHECK: mov [[OLDSP:x[0-9]+]], sp
+; CHECK: sub [[NEWSP:x[0-9]+]], [[OLDSP]], [[SPDELTA]]
+; CHECK: mov sp, [[NEWSP]]
+
+  call void @use_addr(i8* %addr)
+; CHECK: bl use_addr
+
+  call void @llvm.stackrestore(i8* %sp)
+; CHECK: mov sp, [[SAVED_SP]]
+
+  ret void
+}
diff --git a/test/CodeGen/AArch64/analyze-branch.ll b/test/CodeGen/AArch64/analyze-branch.ll
new file mode 100644
index 000000000000..e10bbb0f8691
--- /dev/null
+++ b/test/CodeGen/AArch64/analyze-branch.ll
@@ -0,0 +1,231 @@
+; RUN: llc -mtriple=aarch64-none-linux-gnu < %s | FileCheck %s
+
+; This test checks that LLVM can do basic stripping and reapplying of branches
+; to basic blocks.
+
+declare void @test_true()
+declare void @test_false()
+
+; !0 corresponds to a branch being taken, !1 to not being takne.
+!0 = metadata !{metadata !"branch_weights", i32 64, i32 4}
+!1 = metadata !{metadata !"branch_weights", i32 4, i32 64}
+
+define void @test_Bcc_fallthrough_taken(i32 %in) nounwind {
+; CHECK: test_Bcc_fallthrough_taken:
+  %tst = icmp eq i32 %in, 42
+  br i1 %tst, label %true, label %false, !prof !0
+
+; CHECK: cmp {{w[0-9]+}}, #42
+
+; CHECK: b.ne [[FALSE:.LBB[0-9]+_[0-9]+]]
+; CHECK-NEXT: // BB#
+; CHECK-NEXT: bl test_true
+
+; CHECK: [[FALSE]]:
+; CHECK: bl test_false
+
+true:
+  call void @test_true()
+  ret void
+
+false:
+  call void @test_false()
+  ret void
+}
+
+define void @test_Bcc_fallthrough_nottaken(i32 %in) nounwind {
+; CHECK: test_Bcc_fallthrough_nottaken:
+  %tst = icmp eq i32 %in, 42
+  br i1 %tst, label %true, label %false, !prof !1
+
+; CHECK: cmp {{w[0-9]+}}, #42
+
+; CHECK: b.eq [[TRUE:.LBB[0-9]+_[0-9]+]]
+; CHECK-NEXT: // BB#
+; CHECK-NEXT: bl test_false
+
+; CHECK: [[TRUE]]:
+; CHECK: bl test_true
+
+true:
+  call void @test_true()
+  ret void
+
+false:
+  call void @test_false()
+  ret void
+}
+
+define void @test_CBZ_fallthrough_taken(i32 %in) nounwind {
+; CHECK: test_CBZ_fallthrough_taken:
+  %tst = icmp eq i32 %in, 0
+  br i1 %tst, label %true, label %false, !prof !0
+
+; CHECK: cbnz {{w[0-9]+}}, [[FALSE:.LBB[0-9]+_[0-9]+]]
+; CHECK-NEXT: // BB#
+; CHECK-NEXT: bl test_true
+
+; CHECK: [[FALSE]]:
+; CHECK: bl test_false
+
+true:
+  call void @test_true()
+  ret void
+
+false:
+  call void @test_false()
+  ret void
+}
+
+define void @test_CBZ_fallthrough_nottaken(i64 %in) nounwind {
+; CHECK: test_CBZ_fallthrough_nottaken:
+  %tst = icmp eq i64 %in, 0
+  br i1 %tst, label %true, label %false, !prof !1
+
+; CHECK: cbz {{x[0-9]+}}, [[TRUE:.LBB[0-9]+_[0-9]+]]
+; CHECK-NEXT: // BB#
+; CHECK-NEXT: bl test_false
+
+; CHECK: [[TRUE]]:
+; CHECK: bl test_true
+
+true:
+  call void @test_true()
+  ret void
+
+false:
+  call void @test_false()
+  ret void
+}
+
+define void @test_CBNZ_fallthrough_taken(i32 %in) nounwind {
+; CHECK: test_CBNZ_fallthrough_taken:
+  %tst = icmp ne i32 %in, 0
+  br i1 %tst, label %true, label %false, !prof !0
+
+; CHECK: cbz {{w[0-9]+}}, [[FALSE:.LBB[0-9]+_[0-9]+]]
+; CHECK-NEXT: // BB#
+; CHECK-NEXT: bl test_true
+
+; CHECK: [[FALSE]]:
+; CHECK: bl test_false
+
+true:
+  call void @test_true()
+  ret void
+
+false:
+  call void @test_false()
+  ret void
+}
+
+define void @test_CBNZ_fallthrough_nottaken(i64 %in) nounwind {
+; CHECK: test_CBNZ_fallthrough_nottaken:
+  %tst = icmp ne i64 %in, 0
+  br i1 %tst, label %true, label %false, !prof !1
+
+; CHECK: cbnz {{x[0-9]+}}, [[TRUE:.LBB[0-9]+_[0-9]+]]
+; CHECK-NEXT: // BB#
+; CHECK-NEXT: bl test_false
+
+; CHECK: [[TRUE]]:
+; CHECK: bl test_true
+
+true:
+  call void @test_true()
+  ret void
+
+false:
+  call void @test_false()
+  ret void
+}
+
+define void @test_TBZ_fallthrough_taken(i32 %in) nounwind {
+; CHECK: test_TBZ_fallthrough_taken:
+  %bit = and i32 %in, 32768
+  %tst = icmp eq i32 %bit, 0
+  br i1 %tst, label %true, label %false, !prof !0
+
+; CHECK: tbnz {{w[0-9]+}}, #15, [[FALSE:.LBB[0-9]+_[0-9]+]]
+; CHECK-NEXT: // BB#
+; CHECK-NEXT: bl test_true
+
+; CHECK: [[FALSE]]:
+; CHECK: bl test_false
+
+true:
+  call void @test_true()
+  ret void
+
+false:
+  call void @test_false()
+  ret void
+}
+
+define void @test_TBZ_fallthrough_nottaken(i64 %in) nounwind {
+; CHECK: test_TBZ_fallthrough_nottaken:
+  %bit = and i64 %in, 32768
+  %tst = icmp eq i64 %bit, 0
+  br i1 %tst, label %true, label %false, !prof !1
+
+; CHECK: tbz {{x[0-9]+}}, #15, [[TRUE:.LBB[0-9]+_[0-9]+]]
+; CHECK-NEXT: // BB#
+; CHECK-NEXT: bl test_false
+
+; CHECK: [[TRUE]]:
+; CHECK: bl test_true
+
+true:
+  call void @test_true()
+  ret void
+
+false:
+  call void @test_false()
+  ret void
+}
+
+
+define void @test_TBNZ_fallthrough_taken(i32 %in) nounwind {
+; CHECK: test_TBNZ_fallthrough_taken:
+  %bit = and i32 %in, 32768
+  %tst = icmp ne i32 %bit, 0
+  br i1 %tst, label %true, label %false, !prof !0
+
+; CHECK: tbz {{w[0-9]+}}, #15, [[FALSE:.LBB[0-9]+_[0-9]+]]
+; CHECK-NEXT: // BB#
+; CHECK-NEXT: bl test_true
+
+; CHECK: [[FALSE]]:
+; CHECK: bl test_false
+
+true:
+  call void @test_true()
+  ret void
+
+false:
+  call void @test_false()
+  ret void
+}
+
+define void @test_TBNZ_fallthrough_nottaken(i64 %in) nounwind {
+; CHECK: test_TBNZ_fallthrough_nottaken:
+  %bit = and i64 %in, 32768
+  %tst = icmp ne i64 %bit, 0
+  br i1 %tst, label %true, label %false, !prof !1
+
+; CHECK: tbnz {{x[0-9]+}}, #15, [[TRUE:.LBB[0-9]+_[0-9]+]]
+; CHECK-NEXT: // BB#
+; CHECK-NEXT: bl test_false
+
+; CHECK: [[TRUE]]:
+; CHECK: bl test_true
+
+true:
+  call void @test_true()
+  ret void
+
+false:
+  call void @test_false()
+  ret void
+}
+
diff --git a/test/CodeGen/AArch64/atomic-ops-not-barriers.ll b/test/CodeGen/AArch64/atomic-ops-not-barriers.ll
new file mode 100644
index 000000000000..3c03e47147b0
--- /dev/null
+++ b/test/CodeGen/AArch64/atomic-ops-not-barriers.ll
@@ -0,0 +1,24 @@
+; RUN: llc -mtriple=aarch64-none-linux-gnu < %s | FileCheck %s
+
+define i32 @foo(i32* %var, i1 %cond) {
+; CHECK: foo:
+  br i1 %cond, label %atomic_ver, label %simple_ver
+simple_ver:
+  %oldval = load i32* %var
+  %newval = add nsw i32 %oldval, -1
+  store i32 %newval, i32* %var
+  br label %somewhere
+atomic_ver:
+  %val = atomicrmw add i32* %var, i32 -1 seq_cst
+  br label %somewhere
+; CHECK: dmb
+; CHECK: ldxr
+; CHECK: dmb
+  ; The key point here is that the second dmb isn't immediately followed by the
+  ; simple_ver basic block, which LLVM attempted to do when DMB had been marked
+  ; with isBarrier. For now, look for something that looks like "somewhere".
+; CHECK-NEXT: mov
+somewhere:
+  %combined = phi i32 [ %val, %atomic_ver ], [ %newval, %simple_ver]
+  ret i32 %combined
+}
diff --git a/test/CodeGen/AArch64/atomic-ops.ll b/test/CodeGen/AArch64/atomic-ops.ll
new file mode 100644
index 000000000000..f3c16171cc83
--- /dev/null
+++ b/test/CodeGen/AArch64/atomic-ops.ll
@@ -0,0 +1,1055 @@
+; RUN: llc -mtriple=aarch64-none-linux-gnu -verify-machineinstrs < %s | FileCheck %s
+
+@var8 = global i8 0
+@var16 = global i16 0
+@var32 = global i32 0
+@var64 = global i64 0
+
+define i8 @test_atomic_load_add_i8(i8 %offset) nounwind {
+; CHECK: test_atomic_load_add_i8:
+   %old = atomicrmw add i8* @var8, i8 %offset seq_cst
+; CHECK: dmb ish
+; CHECK: adrp [[TMPADDR:x[0-9]+]], var8
+; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], #:lo12:var8
+
+; CHECK: .LBB{{[0-9]+}}_1:
+; CHECK-NEXT: ldxrb w[[OLD:[0-9]+]], [x[[ADDR]]]
+  ; w0 below is a reasonable guess but could change: it certainly comes into the
+  ;  function there.
+; CHECK-NEXT: add [[NEW:w[0-9]+]], w[[OLD]], w0
+; CHECK-NEXT: stxrb [[STATUS:w[0-9]+]], [[NEW]], [x[[ADDR]]]
+; CHECK-NEXT: cbnz [[STATUS]], .LBB{{[0-9]+}}_1
+; CHECK: dmb ish
+
+; CHECK: mov x0, x[[OLD]]
+   ret i8 %old
+}
+
+define i16 @test_atomic_load_add_i16(i16 %offset) nounwind {
+; CHECK: test_atomic_load_add_i16:
+   %old = atomicrmw add i16* @var16, i16 %offset seq_cst
+; CHECK: dmb ish
+; CHECK: adrp [[TMPADDR:x[0-9]+]], var16
+; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], #:lo12:var16
+
+; CHECK: .LBB{{[0-9]+}}_1:
+; CHECK-NEXT: ldxrh w[[OLD:[0-9]+]], [x[[ADDR]]]
+  ; w0 below is a reasonable guess but could change: it certainly comes into the
+  ;  function there.
+; CHECK-NEXT: add [[NEW:w[0-9]+]], w[[OLD]], w0
+; CHECK-NEXT: stxrh [[STATUS:w[0-9]+]], [[NEW]], [x[[ADDR]]]
+; CHECK-NEXT: cbnz [[STATUS]], .LBB{{[0-9]+}}_1
+; CHECK: dmb ish
+
+; CHECK: mov x0, x[[OLD]]
+   ret i16 %old
+}
+
+define i32 @test_atomic_load_add_i32(i32 %offset) nounwind {
+; CHECK: test_atomic_load_add_i32:
+   %old = atomicrmw add i32* @var32, i32 %offset seq_cst
+; CHECK: dmb ish
+; CHECK: adrp [[TMPADDR:x[0-9]+]], var32
+; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], #:lo12:var32
+
+; CHECK: .LBB{{[0-9]+}}_1:
+; CHECK-NEXT: ldxr w[[OLD:[0-9]+]], [x[[ADDR]]]
+  ; w0 below is a reasonable guess but could change: it certainly comes into the
+  ;  function there.
+; CHECK-NEXT: add [[NEW:w[0-9]+]], w[[OLD]], w0
+; CHECK-NEXT: stxr [[STATUS:w[0-9]+]], [[NEW]], [x[[ADDR]]]
+; CHECK-NEXT: cbnz [[STATUS]], .LBB{{[0-9]+}}_1
+; CHECK: dmb ish
+
+; CHECK: mov x0, x[[OLD]]
+   ret i32 %old
+}
+
+define i64 @test_atomic_load_add_i64(i64 %offset) nounwind {
+; CHECK: test_atomic_load_add_i64:
+   %old = atomicrmw add i64* @var64, i64 %offset seq_cst
+; CHECK: dmb ish
+; CHECK: adrp [[TMPADDR:x[0-9]+]], var64
+; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], #:lo12:var64
+
+; CHECK: .LBB{{[0-9]+}}_1:
+; CHECK-NEXT: ldxr x[[OLD:[0-9]+]], [x[[ADDR]]]
+  ; x0 below is a reasonable guess but could change: it certainly comes into the
+  ; function there.
+; CHECK-NEXT: add [[NEW:x[0-9]+]], x[[OLD]], x0
+; CHECK-NEXT: stxr [[STATUS:w[0-9]+]], [[NEW]], [x[[ADDR]]]
+; CHECK-NEXT: cbnz [[STATUS]], .LBB{{[0-9]+}}_1
+; CHECK: dmb ish
+
+; CHECK: mov x0, x[[OLD]]
+   ret i64 %old
+}
+
+define i8 @test_atomic_load_sub_i8(i8 %offset) nounwind {
+; CHECK: test_atomic_load_sub_i8:
+   %old = atomicrmw sub i8* @var8, i8 %offset seq_cst
+; CHECK: dmb ish
+; CHECK: adrp [[TMPADDR:x[0-9]+]], var8
+; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], #:lo12:var8
+
+; CHECK: .LBB{{[0-9]+}}_1:
+; CHECK-NEXT: ldxrb w[[OLD:[0-9]+]], [x[[ADDR]]]
+  ; w0 below is a reasonable guess but could change: it certainly comes into the
+  ;  function there.
+; CHECK-NEXT: sub [[NEW:w[0-9]+]], w[[OLD]], w0
+; CHECK-NEXT: stxrb [[STATUS:w[0-9]+]], [[NEW]], [x[[ADDR]]]
+; CHECK-NEXT: cbnz [[STATUS]], .LBB{{[0-9]+}}_1
+; CHECK: dmb ish
+
+; CHECK: mov x0, x[[OLD]]
+   ret i8 %old
+}
+
+define i16 @test_atomic_load_sub_i16(i16 %offset) nounwind {
+; CHECK: test_atomic_load_sub_i16:
+   %old = atomicrmw sub i16* @var16, i16 %offset seq_cst
+; CHECK: dmb ish
+; CHECK: adrp [[TMPADDR:x[0-9]+]], var16
+; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], #:lo12:var16
+
+; CHECK: .LBB{{[0-9]+}}_1:
+; CHECK-NEXT: ldxrh w[[OLD:[0-9]+]], [x[[ADDR]]]
+  ; w0 below is a reasonable guess but could change: it certainly comes into the
+  ;  function there.
+; CHECK-NEXT: sub [[NEW:w[0-9]+]], w[[OLD]], w0
+; CHECK-NEXT: stxrh [[STATUS:w[0-9]+]], [[NEW]], [x[[ADDR]]]
+; CHECK-NEXT: cbnz [[STATUS]], .LBB{{[0-9]+}}_1
+; CHECK: dmb ish
+
+; CHECK: mov x0, x[[OLD]]
+   ret i16 %old
+}
+
+define i32 @test_atomic_load_sub_i32(i32 %offset) nounwind {
+; CHECK: test_atomic_load_sub_i32:
+   %old = atomicrmw sub i32* @var32, i32 %offset seq_cst
+; CHECK: dmb ish
+; CHECK: adrp [[TMPADDR:x[0-9]+]], var32
+; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], #:lo12:var32
+
+; CHECK: .LBB{{[0-9]+}}_1:
+; CHECK-NEXT: ldxr w[[OLD:[0-9]+]], [x[[ADDR]]]
+  ; w0 below is a reasonable guess but could change: it certainly comes into the
+  ;  function there.
+; CHECK-NEXT: sub [[NEW:w[0-9]+]], w[[OLD]], w0
+; CHECK-NEXT: stxr [[STATUS:w[0-9]+]], [[NEW]], [x[[ADDR]]]
+; CHECK-NEXT: cbnz [[STATUS]], .LBB{{[0-9]+}}_1
+; CHECK: dmb ish
+
+; CHECK: mov x0, x[[OLD]]
+   ret i32 %old
+}
+
+define i64 @test_atomic_load_sub_i64(i64 %offset) nounwind {
+; CHECK: test_atomic_load_sub_i64:
+   %old = atomicrmw sub i64* @var64, i64 %offset seq_cst
+; CHECK: dmb ish
+; CHECK: adrp [[TMPADDR:x[0-9]+]], var64
+; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], #:lo12:var64
+
+; CHECK: .LBB{{[0-9]+}}_1:
+; CHECK-NEXT: ldxr x[[OLD:[0-9]+]], [x[[ADDR]]]
+  ; x0 below is a reasonable guess but could change: it certainly comes into the
+  ; function there.
+; CHECK-NEXT: sub [[NEW:x[0-9]+]], x[[OLD]], x0
+; CHECK-NEXT: stxr [[STATUS:w[0-9]+]], [[NEW]], [x[[ADDR]]]
+; CHECK-NEXT: cbnz [[STATUS]], .LBB{{[0-9]+}}_1
+; CHECK: dmb ish
+
+; CHECK: mov x0, x[[OLD]]
+   ret i64 %old
+}
+
+define i8 @test_atomic_load_and_i8(i8 %offset) nounwind {
+; CHECK: test_atomic_load_and_i8:
+   %old = atomicrmw and i8* @var8, i8 %offset seq_cst
+; CHECK: dmb ish
+; CHECK: adrp [[TMPADDR:x[0-9]+]], var8
+; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], #:lo12:var8
+
+; CHECK: .LBB{{[0-9]+}}_1:
+; CHECK-NEXT: ldxrb w[[OLD:[0-9]+]], [x[[ADDR]]]
+  ; w0 below is a reasonable guess but could change: it certainly comes into the
+  ;  function there.
+; CHECK-NEXT: and [[NEW:w[0-9]+]], w[[OLD]], w0
+; CHECK-NEXT: stxrb [[STATUS:w[0-9]+]], [[NEW]], [x[[ADDR]]]
+; CHECK-NEXT: cbnz [[STATUS]], .LBB{{[0-9]+}}_1
+; CHECK: dmb ish
+
+; CHECK: mov x0, x[[OLD]]
+   ret i8 %old
+}
+
+define i16 @test_atomic_load_and_i16(i16 %offset) nounwind {
+; CHECK: test_atomic_load_and_i16:
+   %old = atomicrmw and i16* @var16, i16 %offset seq_cst
+; CHECK: dmb ish
+; CHECK: adrp [[TMPADDR:x[0-9]+]], var16
+; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], #:lo12:var16
+
+; CHECK: .LBB{{[0-9]+}}_1:
+; CHECK-NEXT: ldxrh w[[OLD:[0-9]+]], [x[[ADDR]]]
+  ; w0 below is a reasonable guess but could change: it certainly comes into the
+  ;  function there.
+; CHECK-NEXT: and [[NEW:w[0-9]+]], w[[OLD]], w0
+; CHECK-NEXT: stxrh [[STATUS:w[0-9]+]], [[NEW]], [x[[ADDR]]]
+; CHECK-NEXT: cbnz [[STATUS]], .LBB{{[0-9]+}}_1
+; CHECK: dmb ish
+
+; CHECK: mov x0, x[[OLD]]
+   ret i16 %old
+}
+
+define i32 @test_atomic_load_and_i32(i32 %offset) nounwind {
+; CHECK: test_atomic_load_and_i32:
+   %old = atomicrmw and i32* @var32, i32 %offset seq_cst
+; CHECK: dmb ish
+; CHECK: adrp [[TMPADDR:x[0-9]+]], var32
+; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], #:lo12:var32
+
+; CHECK: .LBB{{[0-9]+}}_1:
+; CHECK-NEXT: ldxr w[[OLD:[0-9]+]], [x[[ADDR]]]
+  ; w0 below is a reasonable guess but could change: it certainly comes into the
+  ;  function there.
+; CHECK-NEXT: and [[NEW:w[0-9]+]], w[[OLD]], w0
+; CHECK-NEXT: stxr [[STATUS:w[0-9]+]], [[NEW]], [x[[ADDR]]]
+; CHECK-NEXT: cbnz [[STATUS]], .LBB{{[0-9]+}}_1
+; CHECK: dmb ish
+
+; CHECK: mov x0, x[[OLD]]
+   ret i32 %old
+}
+
+define i64 @test_atomic_load_and_i64(i64 %offset) nounwind {
+; CHECK: test_atomic_load_and_i64:
+   %old = atomicrmw and i64* @var64, i64 %offset seq_cst
+; CHECK: dmb ish
+; CHECK: adrp [[TMPADDR:x[0-9]+]], var64
+; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], #:lo12:var64
+
+; CHECK: .LBB{{[0-9]+}}_1:
+; CHECK-NEXT: ldxr x[[OLD:[0-9]+]], [x[[ADDR]]]
+  ; x0 below is a reasonable guess but could change: it certainly comes into the
+  ; function there.
+; CHECK-NEXT: and [[NEW:x[0-9]+]], x[[OLD]], x0
+; CHECK-NEXT: stxr [[STATUS:w[0-9]+]], [[NEW]], [x[[ADDR]]]
+; CHECK-NEXT: cbnz [[STATUS]], .LBB{{[0-9]+}}_1
+; CHECK: dmb ish
+
+; CHECK: mov x0, x[[OLD]]
+   ret i64 %old
+}
+
+define i8 @test_atomic_load_or_i8(i8 %offset) nounwind {
+; CHECK: test_atomic_load_or_i8:
+   %old = atomicrmw or i8* @var8, i8 %offset seq_cst
+; CHECK: dmb ish
+; CHECK: adrp [[TMPADDR:x[0-9]+]], var8
+; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], #:lo12:var8
+
+; CHECK: .LBB{{[0-9]+}}_1:
+; CHECK-NEXT: ldxrb w[[OLD:[0-9]+]], [x[[ADDR]]]
+  ; w0 below is a reasonable guess but could change: it certainly comes into the
+  ;  function there.
+; CHECK-NEXT: orr [[NEW:w[0-9]+]], w[[OLD]], w0
+; CHECK-NEXT: stxrb [[STATUS:w[0-9]+]], [[NEW]], [x[[ADDR]]]
+; CHECK-NEXT: cbnz [[STATUS]], .LBB{{[0-9]+}}_1
+; CHECK: dmb ish
+
+; CHECK: mov x0, x[[OLD]]
+   ret i8 %old
+}
+
+define i16 @test_atomic_load_or_i16(i16 %offset) nounwind {
+; CHECK: test_atomic_load_or_i16:
+   %old = atomicrmw or i16* @var16, i16 %offset seq_cst
+; CHECK: dmb ish
+; CHECK: adrp [[TMPADDR:x[0-9]+]], var16
+; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], #:lo12:var16
+
+; CHECK: .LBB{{[0-9]+}}_1:
+; CHECK-NEXT: ldxrh w[[OLD:[0-9]+]], [x[[ADDR]]]
+  ; w0 below is a reasonable guess but could change: it certainly comes into the
+  ;  function there.
+; CHECK-NEXT: orr [[NEW:w[0-9]+]], w[[OLD]], w0
+; CHECK-NEXT: stxrh [[STATUS:w[0-9]+]], [[NEW]], [x[[ADDR]]]
+; CHECK-NEXT: cbnz [[STATUS]], .LBB{{[0-9]+}}_1
+; CHECK: dmb ish
+
+; CHECK: mov x0, x[[OLD]]
+   ret i16 %old
+}
+
+define i32 @test_atomic_load_or_i32(i32 %offset) nounwind {
+; CHECK: test_atomic_load_or_i32:
+   %old = atomicrmw or i32* @var32, i32 %offset seq_cst
+; CHECK: dmb ish
+; CHECK: adrp [[TMPADDR:x[0-9]+]], var32
+; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], #:lo12:var32
+
+; CHECK: .LBB{{[0-9]+}}_1:
+; CHECK-NEXT: ldxr w[[OLD:[0-9]+]], [x[[ADDR]]]
+  ; w0 below is a reasonable guess but could change: it certainly comes into the
+  ;  function there.
+; CHECK-NEXT: orr [[NEW:w[0-9]+]], w[[OLD]], w0
+; CHECK-NEXT: stxr [[STATUS:w[0-9]+]], [[NEW]], [x[[ADDR]]]
+; CHECK-NEXT: cbnz [[STATUS]], .LBB{{[0-9]+}}_1
+; CHECK: dmb ish
+
+; CHECK: mov x0, x[[OLD]]
+   ret i32 %old
+}
+
+define i64 @test_atomic_load_or_i64(i64 %offset) nounwind {
+; CHECK: test_atomic_load_or_i64:
+   %old = atomicrmw or i64* @var64, i64 %offset seq_cst
+; CHECK: dmb ish
+; CHECK: adrp [[TMPADDR:x[0-9]+]], var64
+; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], #:lo12:var64
+
+; CHECK: .LBB{{[0-9]+}}_1:
+; CHECK-NEXT: ldxr x[[OLD:[0-9]+]], [x[[ADDR]]]
+  ; x0 below is a reasonable guess but could change: it certainly comes into the
+  ; function there.
+; CHECK-NEXT: orr [[NEW:x[0-9]+]], x[[OLD]], x0
+; CHECK-NEXT: stxr [[STATUS:w[0-9]+]], [[NEW]], [x[[ADDR]]]
+; CHECK-NEXT: cbnz [[STATUS]], .LBB{{[0-9]+}}_1
+; CHECK: dmb ish
+
+; CHECK: mov x0, x[[OLD]]
+   ret i64 %old
+}
+
+define i8 @test_atomic_load_xor_i8(i8 %offset) nounwind {
+; CHECK: test_atomic_load_xor_i8:
+   %old = atomicrmw xor i8* @var8, i8 %offset seq_cst
+; CHECK: dmb ish
+; CHECK: adrp [[TMPADDR:x[0-9]+]], var8
+; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], #:lo12:var8
+
+; CHECK: .LBB{{[0-9]+}}_1:
+; CHECK-NEXT: ldxrb w[[OLD:[0-9]+]], [x[[ADDR]]]
+  ; w0 below is a reasonable guess but could change: it certainly comes into the
+  ;  function there.
+; CHECK-NEXT: eor [[NEW:w[0-9]+]], w[[OLD]], w0
+; CHECK-NEXT: stxrb [[STATUS:w[0-9]+]], [[NEW]], [x[[ADDR]]]
+; CHECK-NEXT: cbnz [[STATUS]], .LBB{{[0-9]+}}_1
+; CHECK: dmb ish
+
+; CHECK: mov x0, x[[OLD]]
+   ret i8 %old
+}
+
+define i16 @test_atomic_load_xor_i16(i16 %offset) nounwind {
+; CHECK: test_atomic_load_xor_i16:
+   %old = atomicrmw xor i16* @var16, i16 %offset seq_cst
+; CHECK: dmb ish
+; CHECK: adrp [[TMPADDR:x[0-9]+]], var16
+; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], #:lo12:var16
+
+; CHECK: .LBB{{[0-9]+}}_1:
+; CHECK-NEXT: ldxrh w[[OLD:[0-9]+]], [x[[ADDR]]]
+  ; w0 below is a reasonable guess but could change: it certainly comes into the
+  ;  function there.
+; CHECK-NEXT: eor [[NEW:w[0-9]+]], w[[OLD]], w0
+; CHECK-NEXT: stxrh [[STATUS:w[0-9]+]], [[NEW]], [x[[ADDR]]]
+; CHECK-NEXT: cbnz [[STATUS]], .LBB{{[0-9]+}}_1
+; CHECK: dmb ish
+
+; CHECK: mov x0, x[[OLD]]
+   ret i16 %old
+}
+
+define i32 @test_atomic_load_xor_i32(i32 %offset) nounwind {
+; CHECK: test_atomic_load_xor_i32:
+   %old = atomicrmw xor i32* @var32, i32 %offset seq_cst
+; CHECK: dmb ish
+; CHECK: adrp [[TMPADDR:x[0-9]+]], var32
+; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], #:lo12:var32
+
+; CHECK: .LBB{{[0-9]+}}_1:
+; CHECK-NEXT: ldxr w[[OLD:[0-9]+]], [x[[ADDR]]]
+  ; w0 below is a reasonable guess but could change: it certainly comes into the
+  ;  function there.
+; CHECK-NEXT: eor [[NEW:w[0-9]+]], w[[OLD]], w0
+; CHECK-NEXT: stxr [[STATUS:w[0-9]+]], [[NEW]], [x[[ADDR]]]
+; CHECK-NEXT: cbnz [[STATUS]], .LBB{{[0-9]+}}_1
+; CHECK: dmb ish
+
+; CHECK: mov x0, x[[OLD]]
+   ret i32 %old
+}
+
+define i64 @test_atomic_load_xor_i64(i64 %offset) nounwind {
+; CHECK: test_atomic_load_xor_i64:
+   %old = atomicrmw xor i64* @var64, i64 %offset seq_cst
+; CHECK: dmb ish
+; CHECK: adrp [[TMPADDR:x[0-9]+]], var64
+; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], #:lo12:var64
+
+; CHECK: .LBB{{[0-9]+}}_1:
+; CHECK-NEXT: ldxr x[[OLD:[0-9]+]], [x[[ADDR]]]
+  ; x0 below is a reasonable guess but could change: it certainly comes into the
+  ; function there.
+; CHECK-NEXT: eor [[NEW:x[0-9]+]], x[[OLD]], x0
+; CHECK-NEXT: stxr [[STATUS:w[0-9]+]], [[NEW]], [x[[ADDR]]]
+; CHECK-NEXT: cbnz [[STATUS]], .LBB{{[0-9]+}}_1
+; CHECK: dmb ish
+
+; CHECK: mov x0, x[[OLD]]
+   ret i64 %old
+}
+
+define i8 @test_atomic_load_xchg_i8(i8 %offset) nounwind {
+; CHECK: test_atomic_load_xchg_i8:
+   %old = atomicrmw xchg i8* @var8, i8 %offset seq_cst
+; CHECK: dmb ish
+; CHECK: adrp [[TMPADDR:x[0-9]+]], var8
+; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], #:lo12:var8
+
+; CHECK: .LBB{{[0-9]+}}_1:
+; CHECK-NEXT: ldxrb w[[OLD:[0-9]+]], [x[[ADDR]]]
+  ; w0 below is a reasonable guess but could change: it certainly comes into the
+  ; function there.
+; CHECK-NEXT: stxrb [[STATUS:w[0-9]+]], w0, [x[[ADDR]]]
+; CHECK-NEXT: cbnz [[STATUS]], .LBB{{[0-9]+}}_1
+; CHECK: dmb ish
+
+; CHECK: mov x0, x[[OLD]]
+   ret i8 %old
+}
+
+define i16 @test_atomic_load_xchg_i16(i16 %offset) nounwind {
+; CHECK: test_atomic_load_xchg_i16:
+   %old = atomicrmw xchg i16* @var16, i16 %offset seq_cst
+; CHECK: dmb ish
+; CHECK: adrp [[TMPADDR:x[0-9]+]], var16
+; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], #:lo12:var16
+
+; CHECK: .LBB{{[0-9]+}}_1:
+; CHECK-NEXT: ldxrh w[[OLD:[0-9]+]], [x[[ADDR]]]
+  ; w0 below is a reasonable guess but could change: it certainly comes into the
+  ; function there.
+; CHECK-NEXT: stxrh [[STATUS:w[0-9]+]], w0, [x[[ADDR]]]
+; CHECK-NEXT: cbnz [[STATUS]], .LBB{{[0-9]+}}_1
+; CHECK: dmb ish
+
+; CHECK: mov x0, x[[OLD]]
+   ret i16 %old
+}
+
+define i32 @test_atomic_load_xchg_i32(i32 %offset) nounwind {
+; CHECK: test_atomic_load_xchg_i32:
+   %old = atomicrmw xchg i32* @var32, i32 %offset seq_cst
+; CHECK: dmb ish
+; CHECK: adrp [[TMPADDR:x[0-9]+]], var32
+; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], #:lo12:var32
+
+; CHECK: .LBB{{[0-9]+}}_1:
+; CHECK-NEXT: ldxr w[[OLD:[0-9]+]], [x[[ADDR]]]
+  ; w0 below is a reasonable guess but could change: it certainly comes into the
+  ;  function there.
+; CHECK-NEXT: stxr [[STATUS:w[0-9]+]], w0, [x[[ADDR]]]
+; CHECK-NEXT: cbnz [[STATUS]], .LBB{{[0-9]+}}_1
+; CHECK: dmb ish
+
+; CHECK: mov x0, x[[OLD]]
+   ret i32 %old
+}
+
+define i64 @test_atomic_load_xchg_i64(i64 %offset) nounwind {
+; CHECK: test_atomic_load_xchg_i64:
+   %old = atomicrmw xchg i64* @var64, i64 %offset seq_cst
+; CHECK: dmb ish
+; CHECK: adrp [[TMPADDR:x[0-9]+]], var64
+; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], #:lo12:var64
+
+; CHECK: .LBB{{[0-9]+}}_1:
+; CHECK-NEXT: ldxr x[[OLD:[0-9]+]], [x[[ADDR]]]
+  ; x0 below is a reasonable guess but could change: it certainly comes into the
+  ; function there.
+; CHECK-NEXT: stxr [[STATUS:w[0-9]+]], x0, [x[[ADDR]]]
+; CHECK-NEXT: cbnz [[STATUS]], .LBB{{[0-9]+}}_1
+; CHECK: dmb ish
+
+; CHECK: mov x0, x[[OLD]]
+   ret i64 %old
+}
+
+
+define i8 @test_atomic_load_min_i8(i8 %offset) nounwind {
+; CHECK: test_atomic_load_min_i8:
+   %old = atomicrmw min i8* @var8, i8 %offset seq_cst
+; CHECK: dmb ish
+; CHECK: adrp [[TMPADDR:x[0-9]+]], var8
+; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], #:lo12:var8
+
+; CHECK: .LBB{{[0-9]+}}_1:
+; CHECK-NEXT: ldxrb w[[OLD:[0-9]+]], [x[[ADDR]]]
+  ; w0 below is a reasonable guess but could change: it certainly comes into the
+  ;  function there.
+; CHECK-NEXT: cmp w0, w[[OLD]], sxtb
+; CHECK-NEXT: csel [[NEW:w[0-9]+]], w[[OLD]], w0, gt
+; CHECK-NEXT: stxrb [[STATUS:w[0-9]+]], [[NEW]], [x[[ADDR]]]
+; CHECK-NEXT: cbnz [[STATUS]], .LBB{{[0-9]+}}_1
+; CHECK: dmb ish
+
+; CHECK: mov x0, x[[OLD]]
+   ret i8 %old
+}
+
+define i16 @test_atomic_load_min_i16(i16 %offset) nounwind {
+; CHECK: test_atomic_load_min_i16:
+   %old = atomicrmw min i16* @var16, i16 %offset seq_cst
+; CHECK: dmb ish
+; CHECK: adrp [[TMPADDR:x[0-9]+]], var16
+; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], #:lo12:var16
+
+; CHECK: .LBB{{[0-9]+}}_1:
+; CHECK-NEXT: ldxrh w[[OLD:[0-9]+]], [x[[ADDR]]]
+  ; w0 below is a reasonable guess but could change: it certainly comes into the
+  ;  function there.
+; CHECK-NEXT: cmp w0, w[[OLD]], sxth
+; CHECK-NEXT: csel [[NEW:w[0-9]+]], w[[OLD]], w0, gt
+; CHECK-NEXT: stxrh [[STATUS:w[0-9]+]], [[NEW]], [x[[ADDR]]]
+; CHECK-NEXT: cbnz [[STATUS]], .LBB{{[0-9]+}}_1
+; CHECK: dmb ish
+
+; CHECK: mov x0, x[[OLD]]
+   ret i16 %old
+}
+
+define i32 @test_atomic_load_min_i32(i32 %offset) nounwind {
+; CHECK: test_atomic_load_min_i32:
+   %old = atomicrmw min i32* @var32, i32 %offset seq_cst
+; CHECK: dmb ish
+; CHECK: adrp [[TMPADDR:x[0-9]+]], var32
+; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], #:lo12:var32
+
+; CHECK: .LBB{{[0-9]+}}_1:
+; CHECK-NEXT: ldxr w[[OLD:[0-9]+]], [x[[ADDR]]]
+  ; w0 below is a reasonable guess but could change: it certainly comes into the
+  ;  function there.
+; CHECK-NEXT: cmp w0, w[[OLD]]
+; CHECK-NEXT: csel [[NEW:w[0-9]+]], w[[OLD]], w0, gt
+; CHECK-NEXT: stxr [[STATUS:w[0-9]+]], [[NEW]], [x[[ADDR]]]
+; CHECK-NEXT: cbnz [[STATUS]], .LBB{{[0-9]+}}_1
+; CHECK: dmb ish
+
+; CHECK: mov x0, x[[OLD]]
+   ret i32 %old
+}
+
+define i64 @test_atomic_load_min_i64(i64 %offset) nounwind {
+; CHECK: test_atomic_load_min_i64:
+   %old = atomicrmw min i64* @var64, i64 %offset seq_cst
+; CHECK: dmb ish
+; CHECK: adrp [[TMPADDR:x[0-9]+]], var64
+; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], #:lo12:var64
+
+; CHECK: .LBB{{[0-9]+}}_1:
+; CHECK-NEXT: ldxr x[[OLD:[0-9]+]], [x[[ADDR]]]
+  ; x0 below is a reasonable guess but could change: it certainly comes into the
+  ; function there.
+; CHECK-NEXT: cmp x0, x[[OLD]]
+; CHECK-NEXT: csel [[NEW:x[0-9]+]], x[[OLD]], x0, gt
+; CHECK-NEXT: stxr [[STATUS:w[0-9]+]], [[NEW]], [x[[ADDR]]]
+; CHECK-NEXT: cbnz [[STATUS]], .LBB{{[0-9]+}}_1
+; CHECK: dmb ish
+
+; CHECK: mov x0, x[[OLD]]
+   ret i64 %old
+}
+
+define i8 @test_atomic_load_max_i8(i8 %offset) nounwind {
+; CHECK: test_atomic_load_max_i8:
+   %old = atomicrmw max i8* @var8, i8 %offset seq_cst
+; CHECK: dmb ish
+; CHECK: adrp [[TMPADDR:x[0-9]+]], var8
+; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], #:lo12:var8
+
+; CHECK: .LBB{{[0-9]+}}_1:
+; CHECK-NEXT: ldxrb w[[OLD:[0-9]+]], [x[[ADDR]]]
+  ; w0 below is a reasonable guess but could change: it certainly comes into the
+  ;  function there.
+; CHECK-NEXT: cmp w0, w[[OLD]], sxtb
+; CHECK-NEXT: csel [[NEW:w[0-9]+]], w[[OLD]], w0, lt
+; CHECK-NEXT: stxrb [[STATUS:w[0-9]+]], [[NEW]], [x[[ADDR]]]
+; CHECK-NEXT: cbnz [[STATUS]], .LBB{{[0-9]+}}_1
+; CHECK: dmb ish
+
+; CHECK: mov x0, x[[OLD]]
+   ret i8 %old
+}
+
+define i16 @test_atomic_load_max_i16(i16 %offset) nounwind {
+; CHECK: test_atomic_load_max_i16:
+   %old = atomicrmw max i16* @var16, i16 %offset seq_cst
+; CHECK: dmb ish
+; CHECK: adrp [[TMPADDR:x[0-9]+]], var16
+; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], #:lo12:var16
+
+; CHECK: .LBB{{[0-9]+}}_1:
+; CHECK-NEXT: ldxrh w[[OLD:[0-9]+]], [x[[ADDR]]]
+  ; w0 below is a reasonable guess but could change: it certainly comes into the
+  ;  function there.
+; CHECK-NEXT: cmp w0, w[[OLD]], sxth
+; CHECK-NEXT: csel [[NEW:w[0-9]+]], w[[OLD]], w0, lt
+; CHECK-NEXT: stxrh [[STATUS:w[0-9]+]], [[NEW]], [x[[ADDR]]]
+; CHECK-NEXT: cbnz [[STATUS]], .LBB{{[0-9]+}}_1
+; CHECK: dmb ish
+
+; CHECK: mov x0, x[[OLD]]
+   ret i16 %old
+}
+
+define i32 @test_atomic_load_max_i32(i32 %offset) nounwind {
+; CHECK: test_atomic_load_max_i32:
+   %old = atomicrmw max i32* @var32, i32 %offset seq_cst
+; CHECK: dmb ish
+; CHECK: adrp [[TMPADDR:x[0-9]+]], var32
+; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], #:lo12:var32
+
+; CHECK: .LBB{{[0-9]+}}_1:
+; CHECK-NEXT: ldxr w[[OLD:[0-9]+]], [x[[ADDR]]]
+  ; w0 below is a reasonable guess but could change: it certainly comes into the
+  ;  function there.
+; CHECK-NEXT: cmp w0, w[[OLD]]
+; CHECK-NEXT: csel [[NEW:w[0-9]+]], w[[OLD]], w0, lt
+; CHECK-NEXT: stxr [[STATUS:w[0-9]+]], [[NEW]], [x[[ADDR]]]
+; CHECK-NEXT: cbnz [[STATUS]], .LBB{{[0-9]+}}_1
+; CHECK: dmb ish
+
+; CHECK: mov x0, x[[OLD]]
+   ret i32 %old
+}
+
+define i64 @test_atomic_load_max_i64(i64 %offset) nounwind {
+; CHECK: test_atomic_load_max_i64:
+   %old = atomicrmw max i64* @var64, i64 %offset seq_cst
+; CHECK: dmb ish
+; CHECK: adrp [[TMPADDR:x[0-9]+]], var64
+; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], #:lo12:var64
+
+; CHECK: .LBB{{[0-9]+}}_1:
+; CHECK-NEXT: ldxr x[[OLD:[0-9]+]], [x[[ADDR]]]
+  ; x0 below is a reasonable guess but could change: it certainly comes into the
+  ; function there.
+; CHECK-NEXT: cmp x0, x[[OLD]]
+; CHECK-NEXT: csel [[NEW:x[0-9]+]], x[[OLD]], x0, lt
+; CHECK-NEXT: stxr [[STATUS:w[0-9]+]], [[NEW]], [x[[ADDR]]]
+; CHECK-NEXT: cbnz [[STATUS]], .LBB{{[0-9]+}}_1
+; CHECK: dmb ish
+
+; CHECK: mov x0, x[[OLD]]
+   ret i64 %old
+}
+
+define i8 @test_atomic_load_umin_i8(i8 %offset) nounwind {
+; CHECK: test_atomic_load_umin_i8:
+   %old = atomicrmw umin i8* @var8, i8 %offset seq_cst
+; CHECK: dmb ish
+; CHECK: adrp [[TMPADDR:x[0-9]+]], var8
+; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], #:lo12:var8
+
+; CHECK: .LBB{{[0-9]+}}_1:
+; CHECK-NEXT: ldxrb w[[OLD:[0-9]+]], [x[[ADDR]]]
+  ; w0 below is a reasonable guess but could change: it certainly comes into the
+  ;  function there.
+; CHECK-NEXT: cmp w0, w[[OLD]], uxtb
+; CHECK-NEXT: csel [[NEW:w[0-9]+]], w[[OLD]], w0, hi
+; CHECK-NEXT: stxrb [[STATUS:w[0-9]+]], [[NEW]], [x[[ADDR]]]
+; CHECK-NEXT: cbnz [[STATUS]], .LBB{{[0-9]+}}_1
+; CHECK: dmb ish
+
+; CHECK: mov x0, x[[OLD]]
+   ret i8 %old
+}
+
+define i16 @test_atomic_load_umin_i16(i16 %offset) nounwind {
+; CHECK: test_atomic_load_umin_i16:
+   %old = atomicrmw umin i16* @var16, i16 %offset seq_cst
+; CHECK: dmb ish
+; CHECK: adrp [[TMPADDR:x[0-9]+]], var16
+; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], #:lo12:var16
+
+; CHECK: .LBB{{[0-9]+}}_1:
+; CHECK-NEXT: ldxrh w[[OLD:[0-9]+]], [x[[ADDR]]]
+  ; w0 below is a reasonable guess but could change: it certainly comes into the
+  ;  function there.
+; CHECK-NEXT: cmp w0, w[[OLD]], uxth
+; CHECK-NEXT: csel [[NEW:w[0-9]+]], w[[OLD]], w0, hi
+; CHECK-NEXT: stxrh [[STATUS:w[0-9]+]], [[NEW]], [x[[ADDR]]]
+; CHECK-NEXT: cbnz [[STATUS]], .LBB{{[0-9]+}}_1
+; CHECK: dmb ish
+
+; CHECK: mov x0, x[[OLD]]
+   ret i16 %old
+}
+
+define i32 @test_atomic_load_umin_i32(i32 %offset) nounwind {
+; CHECK: test_atomic_load_umin_i32:
+   %old = atomicrmw umin i32* @var32, i32 %offset seq_cst
+; CHECK: dmb ish
+; CHECK: adrp [[TMPADDR:x[0-9]+]], var32
+; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], #:lo12:var32
+
+; CHECK: .LBB{{[0-9]+}}_1:
+; CHECK-NEXT: ldxr w[[OLD:[0-9]+]], [x[[ADDR]]]
+  ; w0 below is a reasonable guess but could change: it certainly comes into the
+  ;  function there.
+; CHECK-NEXT: cmp w0, w[[OLD]]
+; CHECK-NEXT: csel [[NEW:w[0-9]+]], w[[OLD]], w0, hi
+; CHECK-NEXT: stxr [[STATUS:w[0-9]+]], [[NEW]], [x[[ADDR]]]
+; CHECK-NEXT: cbnz [[STATUS]], .LBB{{[0-9]+}}_1
+; CHECK: dmb ish
+
+; CHECK: mov x0, x[[OLD]]
+   ret i32 %old
+}
+
+define i64 @test_atomic_load_umin_i64(i64 %offset) nounwind {
+; CHECK: test_atomic_load_umin_i64:
+   %old = atomicrmw umin i64* @var64, i64 %offset seq_cst
+; CHECK: dmb ish
+; CHECK: adrp [[TMPADDR:x[0-9]+]], var64
+; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], #:lo12:var64
+
+; CHECK: .LBB{{[0-9]+}}_1:
+; CHECK-NEXT: ldxr x[[OLD:[0-9]+]], [x[[ADDR]]]
+  ; x0 below is a reasonable guess but could change: it certainly comes into the
+  ; function there.
+; CHECK-NEXT: cmp x0, x[[OLD]]
+; CHECK-NEXT: csel [[NEW:x[0-9]+]], x[[OLD]], x0, hi
+; CHECK-NEXT: stxr [[STATUS:w[0-9]+]], [[NEW]], [x[[ADDR]]]
+; CHECK-NEXT: cbnz [[STATUS]], .LBB{{[0-9]+}}_1
+; CHECK: dmb ish
+
+; CHECK: mov x0, x[[OLD]]
+   ret i64 %old
+}
+
+define i8 @test_atomic_load_umax_i8(i8 %offset) nounwind {
+; CHECK: test_atomic_load_umax_i8:
+   %old = atomicrmw umax i8* @var8, i8 %offset seq_cst
+; CHECK: dmb ish
+; CHECK: adrp [[TMPADDR:x[0-9]+]], var8
+; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], #:lo12:var8
+
+; CHECK: .LBB{{[0-9]+}}_1:
+; CHECK-NEXT: ldxrb w[[OLD:[0-9]+]], [x[[ADDR]]]
+  ; w0 below is a reasonable guess but could change: it certainly comes into the
+  ;  function there.
+; CHECK-NEXT: cmp w0, w[[OLD]], uxtb
+; CHECK-NEXT: csel [[NEW:w[0-9]+]], w[[OLD]], w0, lo
+; CHECK-NEXT: stxrb [[STATUS:w[0-9]+]], [[NEW]], [x[[ADDR]]]
+; CHECK-NEXT: cbnz [[STATUS]], .LBB{{[0-9]+}}_1
+; CHECK: dmb ish
+
+; CHECK: mov x0, x[[OLD]]
+   ret i8 %old
+}
+
+define i16 @test_atomic_load_umax_i16(i16 %offset) nounwind {
+; CHECK: test_atomic_load_umax_i16:
+   %old = atomicrmw umax i16* @var16, i16 %offset seq_cst
+; CHECK: dmb ish
+; CHECK: adrp [[TMPADDR:x[0-9]+]], var16
+; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], #:lo12:var16
+
+; CHECK: .LBB{{[0-9]+}}_1:
+; CHECK-NEXT: ldxrh w[[OLD:[0-9]+]], [x[[ADDR]]]
+  ; w0 below is a reasonable guess but could change: it certainly comes into the
+  ;  function there.
+; CHECK-NEXT: cmp w0, w[[OLD]], uxth
+; CHECK-NEXT: csel [[NEW:w[0-9]+]], w[[OLD]], w0, lo
+; CHECK-NEXT: stxrh [[STATUS:w[0-9]+]], [[NEW]], [x[[ADDR]]]
+; CHECK-NEXT: cbnz [[STATUS]], .LBB{{[0-9]+}}_1
+; CHECK: dmb ish
+
+; CHECK: mov x0, x[[OLD]]
+   ret i16 %old
+}
+
+define i32 @test_atomic_load_umax_i32(i32 %offset) nounwind {
+; CHECK: test_atomic_load_umax_i32:
+   %old = atomicrmw umax i32* @var32, i32 %offset seq_cst
+; CHECK: dmb ish
+; CHECK: adrp [[TMPADDR:x[0-9]+]], var32
+; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], #:lo12:var32
+
+; CHECK: .LBB{{[0-9]+}}_1:
+; CHECK-NEXT: ldxr w[[OLD:[0-9]+]], [x[[ADDR]]]
+  ; w0 below is a reasonable guess but could change: it certainly comes into the
+  ;  function there.
+; CHECK-NEXT: cmp w0, w[[OLD]]
+; CHECK-NEXT: csel [[NEW:w[0-9]+]], w[[OLD]], w0, lo
+; CHECK-NEXT: stxr [[STATUS:w[0-9]+]], [[NEW]], [x[[ADDR]]]
+; CHECK-NEXT: cbnz [[STATUS]], .LBB{{[0-9]+}}_1
+; CHECK: dmb ish
+
+; CHECK: mov x0, x[[OLD]]
+   ret i32 %old
+}
+
+define i64 @test_atomic_load_umax_i64(i64 %offset) nounwind {
+; CHECK: test_atomic_load_umax_i64:
+   %old = atomicrmw umax i64* @var64, i64 %offset seq_cst
+; CHECK: dmb ish
+; CHECK: adrp [[TMPADDR:x[0-9]+]], var64
+; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], #:lo12:var64
+
+; CHECK: .LBB{{[0-9]+}}_1:
+; CHECK-NEXT: ldxr x[[OLD:[0-9]+]], [x[[ADDR]]]
+  ; x0 below is a reasonable guess but could change: it certainly comes into the
+  ; function there.
+; CHECK-NEXT: cmp x0, x[[OLD]]
+; CHECK-NEXT: csel [[NEW:x[0-9]+]], x[[OLD]], x0, lo
+; CHECK-NEXT: stxr [[STATUS:w[0-9]+]], [[NEW]], [x[[ADDR]]]
+; CHECK-NEXT: cbnz [[STATUS]], .LBB{{[0-9]+}}_1
+; CHECK: dmb ish
+
+; CHECK: mov x0, x[[OLD]]
+   ret i64 %old
+}
+
+define i8 @test_atomic_cmpxchg_i8(i8 %wanted, i8 %new) nounwind {
+; CHECK: test_atomic_cmpxchg_i8:
+   %old = cmpxchg i8* @var8, i8 %wanted, i8 %new seq_cst
+; CHECK: dmb ish
+; CHECK: adrp [[TMPADDR:x[0-9]+]], var8
+; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], #:lo12:var8
+
+; CHECK: [[STARTAGAIN:.LBB[0-9]+_[0-9]+]]:
+; CHECK-NEXT: ldxrb w[[OLD:[0-9]+]], [x[[ADDR]]]
+  ; w0 below is a reasonable guess but could change: it certainly comes into the
+  ;  function there.
+; CHECK-NEXT: cmp w[[OLD]], w0
+; CHECK-NEXT: b.ne [[GET_OUT:.LBB[0-9]+_[0-9]+]]
+  ; As above, w1 is a reasonable guess.
+; CHECK: stxrb [[STATUS:w[0-9]+]], w1, [x[[ADDR]]]
+; CHECK-NEXT: cbnz [[STATUS]], [[STARTAGAIN]]
+; CHECK: dmb ish
+
+; CHECK: mov x0, x[[OLD]]
+   ret i8 %old
+}
+
+define i16 @test_atomic_cmpxchg_i16(i16 %wanted, i16 %new) nounwind {
+; CHECK: test_atomic_cmpxchg_i16:
+   %old = cmpxchg i16* @var16, i16 %wanted, i16 %new seq_cst
+; CHECK: dmb ish
+; CHECK: adrp [[TMPADDR:x[0-9]+]], var16
+; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], #:lo12:var16
+
+; CHECK: [[STARTAGAIN:.LBB[0-9]+_[0-9]+]]:
+; CHECK-NEXT: ldxrh w[[OLD:[0-9]+]], [x[[ADDR]]]
+  ; w0 below is a reasonable guess but could change: it certainly comes into the
+  ;  function there.
+; CHECK-NEXT: cmp w[[OLD]], w0
+; CHECK-NEXT: b.ne [[GET_OUT:.LBB[0-9]+_[0-9]+]]
+  ; As above, w1 is a reasonable guess.
+; CHECK: stxrh [[STATUS:w[0-9]+]], w1, [x[[ADDR]]]
+; CHECK-NEXT: cbnz [[STATUS]], [[STARTAGAIN]]
+; CHECK: dmb ish
+
+; CHECK: mov x0, x[[OLD]]
+   ret i16 %old
+}
+
+define i32 @test_atomic_cmpxchg_i32(i32 %wanted, i32 %new) nounwind {
+; CHECK: test_atomic_cmpxchg_i32:
+   %old = cmpxchg i32* @var32, i32 %wanted, i32 %new seq_cst
+; CHECK: dmb ish
+; CHECK: adrp [[TMPADDR:x[0-9]+]], var32
+; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], #:lo12:var32
+
+; CHECK: [[STARTAGAIN:.LBB[0-9]+_[0-9]+]]:
+; CHECK-NEXT: ldxr w[[OLD:[0-9]+]], [x[[ADDR]]]
+  ; w0 below is a reasonable guess but could change: it certainly comes into the
+  ;  function there.
+; CHECK-NEXT: cmp w[[OLD]], w0
+; CHECK-NEXT: b.ne [[GET_OUT:.LBB[0-9]+_[0-9]+]]
+  ; As above, w1 is a reasonable guess.
+; CHECK: stxr [[STATUS:w[0-9]+]], w1, [x[[ADDR]]]
+; CHECK-NEXT: cbnz [[STATUS]], [[STARTAGAIN]]
+; CHECK: dmb ish
+
+; CHECK: mov x0, x[[OLD]]
+   ret i32 %old
+}
+
+define i64 @test_atomic_cmpxchg_i64(i64 %wanted, i64 %new) nounwind {
+; CHECK: test_atomic_cmpxchg_i64:
+   %old = cmpxchg i64* @var64, i64 %wanted, i64 %new seq_cst
+; CHECK: dmb ish
+; CHECK: adrp [[TMPADDR:x[0-9]+]], var64
+; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], #:lo12:var64
+
+; CHECK: [[STARTAGAIN:.LBB[0-9]+_[0-9]+]]:
+; CHECK-NEXT: ldxr x[[OLD:[0-9]+]], [x[[ADDR]]]
+  ; w0 below is a reasonable guess but could change: it certainly comes into the
+  ;  function there.
+; CHECK-NEXT: cmp x[[OLD]], x0
+; CHECK-NEXT: b.ne [[GET_OUT:.LBB[0-9]+_[0-9]+]]
+  ; As above, w1 is a reasonable guess.
+; CHECK: stxr [[STATUS:w[0-9]+]], x1, [x[[ADDR]]]
+; CHECK-NEXT: cbnz [[STATUS]], [[STARTAGAIN]]
+; CHECK: dmb ish
+
+; CHECK: mov x0, x[[OLD]]
+   ret i64 %old
+}
+
+define i8 @test_atomic_load_monotonic_i8() nounwind {
+; CHECK: test_atomic_load_monotonic_i8:
+  %val = load atomic i8* @var8 monotonic, align 1
+; CHECK-NOT: dmb
+; CHECK: adrp x[[HIADDR:[0-9]+]], var8
+; CHECK: ldrb w0, [x[[HIADDR]], #:lo12:var8]
+; CHECK-NOT: dmb
+
+  ret i8 %val
+}
+
+define i8 @test_atomic_load_monotonic_regoff_i8(i64 %base, i64 %off) nounwind {
+; CHECK: test_atomic_load_monotonic_regoff_i8:
+  %addr_int = add i64 %base, %off
+  %addr = inttoptr i64 %addr_int to i8*
+
+  %val = load atomic i8* %addr monotonic, align 1
+; CHECK-NOT: dmb
+; CHECK: ldrb w0, [x0, x1]
+; CHECK-NOT: dmb
+
+  ret i8 %val
+}
+
+define i8 @test_atomic_load_acquire_i8() nounwind {
+; CHECK: test_atomic_load_acquire_i8:
+  %val = load atomic i8* @var8 acquire, align 1
+; CHECK: adrp [[TMPADDR:x[0-9]+]], var8
+; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], #:lo12:var8
+
+; CHECK: ldarb w0, [x[[ADDR]]]
+  ret i8 %val
+}
+
+define i8 @test_atomic_load_seq_cst_i8() nounwind {
+; CHECK: test_atomic_load_seq_cst_i8:
+  %val = load atomic i8* @var8 seq_cst, align 1
+; CHECK: adrp x[[HIADDR:[0-9]+]], var8
+; CHECK: ldrb w0, [x[[HIADDR]], #:lo12:var8]
+; CHECK: dmb ish
+  ret i8 %val
+}
+
+define i16 @test_atomic_load_monotonic_i16() nounwind {
+; CHECK: test_atomic_load_monotonic_i16:
+  %val = load atomic i16* @var16 monotonic, align 2
+; CHECK-NOT: dmb
+; CHECK: adrp x[[HIADDR:[0-9]+]], var16
+; CHECK: ldrh w0, [x[[HIADDR]], #:lo12:var16]
+; CHECK-NOT: dmb
+
+  ret i16 %val
+}
+
+define i32 @test_atomic_load_monotonic_regoff_i32(i64 %base, i64 %off) nounwind {
+; CHECK: test_atomic_load_monotonic_regoff_i32:
+  %addr_int = add i64 %base, %off
+  %addr = inttoptr i64 %addr_int to i32*
+
+  %val = load atomic i32* %addr monotonic, align 4
+; CHECK-NOT: dmb
+; CHECK: ldr w0, [x0, x1]
+; CHECK-NOT: dmb
+
+  ret i32 %val
+}
+
+define i64 @test_atomic_load_seq_cst_i64() nounwind {
+; CHECK: test_atomic_load_seq_cst_i64:
+  %val = load atomic i64* @var64 seq_cst, align 8
+; CHECK: adrp x[[HIADDR:[0-9]+]], var64
+; CHECK: ldr x0, [x[[HIADDR]], #:lo12:var64]
+; CHECK: dmb ish
+  ret i64 %val
+}
+
+define void @test_atomic_store_monotonic_i8(i8 %val) nounwind {
+; CHECK: test_atomic_store_monotonic_i8:
+  store atomic i8 %val, i8* @var8 monotonic, align 1
+; CHECK: adrp x[[HIADDR:[0-9]+]], var8
+; CHECK: strb w0, [x[[HIADDR]], #:lo12:var8]
+
+  ret void
+}
+
+define void @test_atomic_store_monotonic_regoff_i8(i64 %base, i64 %off, i8 %val) nounwind {
+; CHECK: test_atomic_store_monotonic_regoff_i8:
+
+  %addr_int = add i64 %base, %off
+  %addr = inttoptr i64 %addr_int to i8*
+
+  store atomic i8 %val, i8* %addr monotonic, align 1
+; CHECK: strb w2, [x0, x1]
+
+  ret void
+}
+define void @test_atomic_store_release_i8(i8 %val) nounwind {
+; CHECK: test_atomic_store_release_i8:
+  store atomic i8 %val, i8* @var8 release, align 1
+; CHECK: adrp [[HIADDR:x[0-9]+]], var8
+; CHECK: add x[[ADDR:[0-9]+]], [[HIADDR]], #:lo12:var8
+; CHECK: stlrb w0, [x[[ADDR]]]
+
+  ret void
+}
+
+define void @test_atomic_store_seq_cst_i8(i8 %val) nounwind {
+; CHECK: test_atomic_store_seq_cst_i8:
+  store atomic i8 %val, i8* @var8 seq_cst, align 1
+; CHECK: adrp [[HIADDR:x[0-9]+]], var8
+; CHECK: add x[[ADDR:[0-9]+]], [[HIADDR]], #:lo12:var8
+; CHECK: stlrb w0, [x[[ADDR]]]
+; CHECK: dmb ish
+
+  ret void
+}
+
+define void @test_atomic_store_monotonic_i16(i16 %val) nounwind {
+; CHECK: test_atomic_store_monotonic_i16:
+  store atomic i16 %val, i16* @var16 monotonic, align 2
+; CHECK: adrp x[[HIADDR:[0-9]+]], var16
+; CHECK: strh w0, [x[[HIADDR]], #:lo12:var16]
+
+  ret void
+}
+
+define void @test_atomic_store_monotonic_regoff_i32(i64 %base, i64 %off, i32 %val) nounwind {
+; CHECK: test_atomic_store_monotonic_regoff_i32:
+
+  %addr_int = add i64 %base, %off
+  %addr = inttoptr i64 %addr_int to i32*
+
+  store atomic i32 %val, i32* %addr monotonic, align 4
+; CHECK: str w2, [x0, x1]
+
+  ret void
+}
+
+define void @test_atomic_store_release_i64(i64 %val) nounwind {
+; CHECK: test_atomic_store_release_i64:
+  store atomic i64 %val, i64* @var64 release, align 8
+; CHECK: adrp [[HIADDR:x[0-9]+]], var64
+; CHECK: add x[[ADDR:[0-9]+]], [[HIADDR]], #:lo12:var64
+; CHECK: stlr x0, [x[[ADDR]]]
+
+  ret void
+}
diff --git a/test/CodeGen/AArch64/basic-pic.ll b/test/CodeGen/AArch64/basic-pic.ll
new file mode 100644
index 000000000000..da94041c95ff
--- /dev/null
+++ b/test/CodeGen/AArch64/basic-pic.ll
@@ -0,0 +1,70 @@
+; RUN: llc -mtriple=aarch64-none-linux-gnu -verify-machineinstrs -relocation-model=pic %s -o - | FileCheck %s
+; RUN: llc -mtriple=aarch64-none-linux-gnu -verify-machineinstrs -relocation-model=pic -filetype=obj %s -o -| llvm-objdump -r - | FileCheck --check-prefix=CHECK-ELF %s
+
+@var = global i32 0
+
+; CHECK-ELF: RELOCATION RECORDS FOR [.text]
+
+define i32 @get_globalvar() {
+; CHECK: get_globalvar:
+
+  %val = load i32* @var
+; CHECK: adrp x[[GOTHI:[0-9]+]], :got:var
+; CHECK: ldr x[[GOTLOC:[0-9]+]], [x[[GOTHI]], #:got_lo12:var]
+; CHECK: ldr w0, [x[[GOTLOC]]]
+
+; CHECK-ELF: R_AARCH64_ADR_GOT_PAGE var
+; CHECK-ELF: R_AARCH64_LD64_GOT_LO12_NC var
+  ret i32 %val
+}
+
+define i32* @get_globalvaraddr() {
+; CHECK: get_globalvaraddr:
+
+  %val = load i32* @var
+; CHECK: adrp x[[GOTHI:[0-9]+]], :got:var
+; CHECK: ldr x0, [x[[GOTHI]], #:got_lo12:var]
+
+; CHECK-ELF: R_AARCH64_ADR_GOT_PAGE var
+; CHECK-ELF: R_AARCH64_LD64_GOT_LO12_NC var
+  ret i32* @var
+}
+
+@hiddenvar = hidden global i32 0
+
+define i32 @get_hiddenvar() {
+; CHECK: get_hiddenvar:
+
+  %val = load i32* @hiddenvar
+; CHECK: adrp x[[HI:[0-9]+]], hiddenvar
+; CHECK: ldr w0, [x[[HI]], #:lo12:hiddenvar]
+
+; CHECK-ELF: R_AARCH64_ADR_PREL_PG_HI21 hiddenvar
+; CHECK-ELF: R_AARCH64_LDST32_ABS_LO12_NC hiddenvar
+  ret i32 %val
+}
+
+define i32* @get_hiddenvaraddr() {
+; CHECK: get_hiddenvaraddr:
+
+  %val = load i32* @hiddenvar
+; CHECK: adrp [[HI:x[0-9]+]], hiddenvar
+; CHECK: add x0, [[HI]], #:lo12:hiddenvar
+
+; CHECK-ELF: R_AARCH64_ADR_PREL_PG_HI21 hiddenvar
+; CHECK-ELF: R_AARCH64_ADD_ABS_LO12_NC hiddenvar
+  ret i32* @hiddenvar
+}
+
+define void()* @get_func() {
+; CHECK: get_func:
+
+  ret void()* bitcast(void()*()* @get_func to void()*)
+; CHECK: adrp x[[GOTHI:[0-9]+]], :got:get_func
+; CHECK: ldr x0, [x[[GOTHI]], #:got_lo12:get_func]
+
+  ; Particularly important that the ADRP gets a relocation, LLVM tends to think
+  ; it can relax it because it knows where get_func is. It can't!
+; CHECK-ELF: R_AARCH64_ADR_GOT_PAGE get_func
+; CHECK-ELF: R_AARCH64_LD64_GOT_LO12_NC get_func
+}
+\ No newline at end of file
diff --git a/test/CodeGen/AArch64/bitfield-insert-0.ll b/test/CodeGen/AArch64/bitfield-insert-0.ll
new file mode 100644
index 000000000000..d1191f6aaa8a
--- /dev/null
+++ b/test/CodeGen/AArch64/bitfield-insert-0.ll
@@ -0,0 +1,19 @@
+; RUN: llc -mtriple=aarch64-none-linux-gnu -filetype=obj < %s | llvm-objdump -disassemble - | FileCheck %s
+
+; The encoding of lsb -> immr in the CGed bitfield instructions was wrong at one
+; point, in the edge case where lsb = 0. Just make sure.
+
+define void @test_bfi0(i32* %existing, i32* %new) {
+; CHECK: bfxil {{w[0-9]+}}, {{w[0-9]+}}, #0, #18
+
+  %oldval = load volatile i32* %existing
+  %oldval_keep = and i32 %oldval, 4294705152 ; 0xfffc_0000
+
+  %newval = load volatile i32* %new
+  %newval_masked = and i32 %newval, 262143 ; = 0x0003_ffff
+
+  %combined = or i32 %newval_masked, %oldval_keep
+  store volatile i32 %combined, i32* %existing
+
+  ret void
+}
+\ No newline at end of file
diff --git a/test/CodeGen/AArch64/bitfield-insert.ll b/test/CodeGen/AArch64/bitfield-insert.ll
new file mode 100644
index 000000000000..3e871b9a6d27
--- /dev/null
+++ b/test/CodeGen/AArch64/bitfield-insert.ll
@@ -0,0 +1,193 @@
+; RUN: llc -mtriple=aarch64-none-linux-gnu < %s | FileCheck %s
+
+; First, a simple example from Clang. The registers could plausibly be
+; different, but probably won't be.
+
+%struct.foo = type { i8, [2 x i8], i8 }
+
+define [1 x i64] @from_clang([1 x i64] %f.coerce, i32 %n) nounwind readnone {
+; CHECK: from_clang:
+; CHECK: bfi w0, w1, #3, #4
+; CHECK-NEXT: ret
+
+entry:
+  %f.coerce.fca.0.extract = extractvalue [1 x i64] %f.coerce, 0
+  %tmp.sroa.0.0.extract.trunc = trunc i64 %f.coerce.fca.0.extract to i32
+  %bf.value = shl i32 %n, 3
+  %0 = and i32 %bf.value, 120
+  %f.sroa.0.0.insert.ext.masked = and i32 %tmp.sroa.0.0.extract.trunc, 135
+  %1 = or i32 %f.sroa.0.0.insert.ext.masked, %0
+  %f.sroa.0.0.extract.trunc = zext i32 %1 to i64
+  %tmp1.sroa.1.1.insert.insert = and i64 %f.coerce.fca.0.extract, 4294967040
+  %tmp1.sroa.0.0.insert.insert = or i64 %f.sroa.0.0.extract.trunc, %tmp1.sroa.1.1.insert.insert
+  %.fca.0.insert = insertvalue [1 x i64] undef, i64 %tmp1.sroa.0.0.insert.insert, 0
+  ret [1 x i64] %.fca.0.insert
+}
+
+define void @test_whole32(i32* %existing, i32* %new) {
+; CHECK: test_whole32:
+; CHECK: bfi {{w[0-9]+}}, {{w[0-9]+}}, #26, #5
+
+  %oldval = load volatile i32* %existing
+  %oldval_keep = and i32 %oldval, 2214592511 ; =0x83ffffff
+
+  %newval = load volatile i32* %new
+  %newval_shifted = shl i32 %newval, 26
+  %newval_masked = and i32 %newval_shifted, 2080374784 ; = 0x7c000000
+
+  %combined = or i32 %oldval_keep, %newval_masked
+  store volatile i32 %combined, i32* %existing
+
+  ret void
+}
+
+define void @test_whole64(i64* %existing, i64* %new) {
+; CHECK: test_whole64:
+; CHECK: bfi {{x[0-9]+}}, {{x[0-9]+}}, #26, #14
+; CHECK-NOT: and
+; CHECK: ret
+
+  %oldval = load volatile i64* %existing
+  %oldval_keep = and i64 %oldval, 18446742974265032703 ; = 0xffffff0003ffffffL
+
+  %newval = load volatile i64* %new
+  %newval_shifted = shl i64 %newval, 26
+  %newval_masked = and i64 %newval_shifted, 1099444518912 ; = 0xfffc000000
+
+  %combined = or i64 %oldval_keep, %newval_masked
+  store volatile i64 %combined, i64* %existing
+
+  ret void
+}
+
+define void @test_whole32_from64(i64* %existing, i64* %new) {
+; CHECK: test_whole32_from64:
+; CHECK: bfi {{w[0-9]+}}, {{w[0-9]+}}, #{{0|16}}, #16
+; CHECK-NOT: and
+; CHECK: ret
+
+  %oldval = load volatile i64* %existing
+  %oldval_keep = and i64 %oldval, 4294901760 ; = 0xffff0000
+
+  %newval = load volatile i64* %new
+  %newval_masked = and i64 %newval, 65535 ; = 0xffff
+
+  %combined = or i64 %oldval_keep, %newval_masked
+  store volatile i64 %combined, i64* %existing
+
+  ret void
+}
+
+define void @test_32bit_masked(i32 *%existing, i32 *%new) {
+; CHECK: test_32bit_masked:
+; CHECK: bfi [[INSERT:w[0-9]+]], {{w[0-9]+}}, #3, #4
+; CHECK: and {{w[0-9]+}}, [[INSERT]], #0xff
+
+  %oldval = load volatile i32* %existing
+  %oldval_keep = and i32 %oldval, 135 ; = 0x87
+
+  %newval = load volatile i32* %new
+  %newval_shifted = shl i32 %newval, 3
+  %newval_masked = and i32 %newval_shifted, 120 ; = 0x78
+
+  %combined = or i32 %oldval_keep, %newval_masked
+  store volatile i32 %combined, i32* %existing
+
+  ret void
+}
+
+define void @test_64bit_masked(i64 *%existing, i64 *%new) {
+; CHECK: test_64bit_masked:
+; CHECK: bfi [[INSERT:x[0-9]+]], {{x[0-9]+}}, #40, #8
+; CHECK: and {{x[0-9]+}}, [[INSERT]], #0xffff00000000
+
+  %oldval = load volatile i64* %existing
+  %oldval_keep = and i64 %oldval, 1095216660480 ; = 0xff_0000_0000
+
+  %newval = load volatile i64* %new
+  %newval_shifted = shl i64 %newval, 40
+  %newval_masked = and i64 %newval_shifted, 280375465082880 ; = 0xff00_0000_0000
+
+  %combined = or i64 %newval_masked, %oldval_keep
+  store volatile i64 %combined, i64* %existing
+
+  ret void
+}
+
+; Mask is too complicated for literal ANDwwi, make sure other avenues are tried.
+define void @test_32bit_complexmask(i32 *%existing, i32 *%new) {
+; CHECK: test_32bit_complexmask:
+; CHECK: bfi {{w[0-9]+}}, {{w[0-9]+}}, #3, #4
+; CHECK: and {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
+
+  %oldval = load volatile i32* %existing
+  %oldval_keep = and i32 %oldval, 647 ; = 0x287
+
+  %newval = load volatile i32* %new
+  %newval_shifted = shl i32 %newval, 3
+  %newval_masked = and i32 %newval_shifted, 120 ; = 0x278
+
+  %combined = or i32 %oldval_keep, %newval_masked
+  store volatile i32 %combined, i32* %existing
+
+  ret void
+}
+
+; Neither mask is is a contiguous set of 1s. BFI can't be used
+define void @test_32bit_badmask(i32 *%existing, i32 *%new) {
+; CHECK: test_32bit_badmask:
+; CHECK-NOT: bfi
+; CHECK: ret
+
+  %oldval = load volatile i32* %existing
+  %oldval_keep = and i32 %oldval, 135 ; = 0x87
+
+  %newval = load volatile i32* %new
+  %newval_shifted = shl i32 %newval, 3
+  %newval_masked = and i32 %newval_shifted, 632 ; = 0x278
+
+  %combined = or i32 %oldval_keep, %newval_masked
+  store volatile i32 %combined, i32* %existing
+
+  ret void
+}
+
+; Ditto
+define void @test_64bit_badmask(i64 *%existing, i64 *%new) {
+; CHECK: test_64bit_badmask:
+; CHECK-NOT: bfi
+; CHECK: ret
+
+  %oldval = load volatile i64* %existing
+  %oldval_keep = and i64 %oldval, 135 ; = 0x87
+
+  %newval = load volatile i64* %new
+  %newval_shifted = shl i64 %newval, 3
+  %newval_masked = and i64 %newval_shifted, 664 ; = 0x278
+
+  %combined = or i64 %oldval_keep, %newval_masked
+  store volatile i64 %combined, i64* %existing
+
+  ret void
+}
+
+; Bitfield insert where there's a left-over shr needed at the beginning
+; (e.g. result of str.bf1 = str.bf2)
+define void @test_32bit_with_shr(i32* %existing, i32* %new) {
+; CHECK: test_32bit_with_shr:
+
+  %oldval = load volatile i32* %existing
+  %oldval_keep = and i32 %oldval, 2214592511 ; =0x83ffffff
+
+  %newval = load i32* %new
+  %newval_shifted = shl i32 %newval, 12
+  %newval_masked = and i32 %newval_shifted, 2080374784 ; = 0x7c000000
+
+  %combined = or i32 %oldval_keep, %newval_masked
+  store volatile i32 %combined, i32* %existing
+; CHECK: lsr [[BIT:w[0-9]+]], {{w[0-9]+}}, #14
+; CHECK: bfi {{w[0-9]}}, [[BIT]], #26, #5
+
+  ret void
+}
+
diff --git a/test/CodeGen/AArch64/bitfield.ll b/test/CodeGen/AArch64/bitfield.ll
new file mode 100644
index 000000000000..36d337ef05ef
--- /dev/null
+++ b/test/CodeGen/AArch64/bitfield.ll
@@ -0,0 +1,218 @@
+
+; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu | FileCheck %s
+
+@var32 = global i32 0
+@var64 = global i64 0
+
+define void @test_extendb(i8 %var) {
+; CHECK: test_extendb:
+
+  %sxt32 = sext i8 %var to i32
+  store volatile i32 %sxt32, i32* @var32
+; CHECK: sxtb {{w[0-9]+}}, {{w[0-9]+}}
+
+  %sxt64 = sext i8 %var to i64
+  store volatile i64 %sxt64, i64* @var64
+; CHECK: sxtb {{x[0-9]+}}, {{w[0-9]+}}
+
+; N.b. this doesn't actually produce a bitfield instruction at the
+; moment, but it's still a good test to have and the semantics are
+; correct.
+  %uxt32 = zext i8 %var to i32
+  store volatile i32 %uxt32, i32* @var32
+; CHECK: and {{w[0-9]+}}, {{w[0-9]+}}, #0xff
+
+  %uxt64 = zext i8 %var to i64
+  store volatile i64 %uxt64, i64* @var64
+; CHECK: uxtb {{x[0-9]+}}, {{w[0-9]+}}
+  ret void
+}
+
+define void @test_extendh(i16 %var) {
+; CHECK: test_extendh:
+
+  %sxt32 = sext i16 %var to i32
+  store volatile i32 %sxt32, i32* @var32
+; CHECK: sxth {{w[0-9]+}}, {{w[0-9]+}}
+
+  %sxt64 = sext i16 %var to i64
+  store volatile i64 %sxt64, i64* @var64
+; CHECK: sxth {{x[0-9]+}}, {{w[0-9]+}}
+
+; N.b. this doesn't actually produce a bitfield instruction at the
+; moment, but it's still a good test to have and the semantics are
+; correct.
+  %uxt32 = zext i16 %var to i32
+  store volatile i32 %uxt32, i32* @var32
+; CHECK: and {{w[0-9]+}}, {{w[0-9]+}}, #0xffff
+
+  %uxt64 = zext i16 %var to i64
+  store volatile i64 %uxt64, i64* @var64
+; CHECK: uxth {{x[0-9]+}}, {{w[0-9]+}}
+  ret void
+}
+
+define void @test_extendw(i32 %var) {
+; CHECK: test_extendw:
+
+  %sxt64 = sext i32 %var to i64
+  store volatile i64 %sxt64, i64* @var64
+; CHECK: sxtw {{x[0-9]+}}, {{w[0-9]+}}
+
+  %uxt64 = zext i32 %var to i64
+  store volatile i64 %uxt64, i64* @var64
+; CHECK: ubfx {{w[0-9]+}}, {{w[0-9]+}}, #0, #32
+  ret void
+}
+
+define void @test_shifts(i32 %val32, i64 %val64) {
+; CHECK: test_shifts:
+
+  %shift1 = ashr i32 %val32, 31
+  store volatile i32 %shift1, i32* @var32
+; CHECK: asr {{w[0-9]+}}, {{w[0-9]+}}, #31
+
+  %shift2 = lshr i32 %val32, 8
+  store volatile i32 %shift2, i32* @var32
+; CHECK: lsr {{w[0-9]+}}, {{w[0-9]+}}, #8
+
+  %shift3 = shl i32 %val32, 1
+  store volatile i32 %shift3, i32* @var32
+; CHECK: lsl {{w[0-9]+}}, {{w[0-9]+}}, #1
+
+  %shift4 = ashr i64 %val64, 31
+  store volatile i64 %shift4, i64* @var64
+; CHECK: asr {{x[0-9]+}}, {{x[0-9]+}}, #31
+
+  %shift5 = lshr i64 %val64, 8
+  store volatile i64 %shift5, i64* @var64
+; CHECK: lsr {{x[0-9]+}}, {{x[0-9]+}}, #8
+
+  %shift6 = shl i64 %val64, 63
+  store volatile i64 %shift6, i64* @var64
+; CHECK: lsl {{x[0-9]+}}, {{x[0-9]+}}, #63
+
+  %shift7 = ashr i64 %val64, 63
+  store volatile i64 %shift7, i64* @var64
+; CHECK: asr {{x[0-9]+}}, {{x[0-9]+}}, #63
+
+  %shift8 = lshr i64 %val64, 63
+  store volatile i64 %shift8, i64* @var64
+; CHECK: lsr {{x[0-9]+}}, {{x[0-9]+}}, #63
+
+  %shift9 = lshr i32 %val32, 31
+  store volatile i32 %shift9, i32* @var32
+; CHECK: lsr {{w[0-9]+}}, {{w[0-9]+}}, #31
+
+  %shift10 = shl i32 %val32, 31
+  store volatile i32 %shift10, i32* @var32
+; CHECK: lsl {{w[0-9]+}}, {{w[0-9]+}}, #31
+
+  ret void
+}
+
+; LLVM can produce in-register extensions taking place entirely with
+; 64-bit registers too.
+define void @test_sext_inreg_64(i64 %in) {
+; CHECK: test_sext_inreg_64:
+
+; i1 doesn't have an official alias, but crops up and is handled by
+; the bitfield ops.
+  %trunc_i1 = trunc i64 %in to i1
+  %sext_i1 = sext i1 %trunc_i1 to i64
+  store volatile i64 %sext_i1, i64* @var64
+; CHECK: sbfx {{x[0-9]+}}, {{x[0-9]+}}, #0, #1
+
+  %trunc_i8 = trunc i64 %in to i8
+  %sext_i8 = sext i8 %trunc_i8 to i64
+  store volatile i64 %sext_i8, i64* @var64
+; CHECK: sxtb {{x[0-9]+}}, {{w[0-9]+}}
+
+  %trunc_i16 = trunc i64 %in to i16
+  %sext_i16 = sext i16 %trunc_i16 to i64
+  store volatile i64 %sext_i16, i64* @var64
+; CHECK: sxth {{x[0-9]+}}, {{w[0-9]+}}
+
+  %trunc_i32 = trunc i64 %in to i32
+  %sext_i32 = sext i32 %trunc_i32 to i64
+  store volatile i64 %sext_i32, i64* @var64
+; CHECK: sxtw {{x[0-9]+}}, {{w[0-9]+}}
+  ret void
+}
+
+; These instructions don't actually select to official bitfield
+; operations, but it's important that we select them somehow:
+define void @test_zext_inreg_64(i64 %in) {
+; CHECK: test_zext_inreg_64:
+
+  %trunc_i8 = trunc i64 %in to i8
+  %zext_i8 = zext i8 %trunc_i8 to i64
+  store volatile i64 %zext_i8, i64* @var64
+; CHECK: and {{x[0-9]+}}, {{x[0-9]+}}, #0xff
+
+  %trunc_i16 = trunc i64 %in to i16
+  %zext_i16 = zext i16 %trunc_i16 to i64
+  store volatile i64 %zext_i16, i64* @var64
+; CHECK: and {{x[0-9]+}}, {{x[0-9]+}}, #0xffff
+
+  %trunc_i32 = trunc i64 %in to i32
+  %zext_i32 = zext i32 %trunc_i32 to i64
+  store volatile i64 %zext_i32, i64* @var64
+; CHECK: and {{x[0-9]+}}, {{x[0-9]+}}, #0xffffffff
+
+  ret void
+}
+
+define i64 @test_sext_inreg_from_32(i32 %in) {
+; CHECK: test_sext_inreg_from_32:
+
+  %small = trunc i32 %in to i1
+  %ext = sext i1 %small to i64
+
+  ; Different registers are of course, possible, though suboptimal. This is
+  ; making sure that a 64-bit "(sext_inreg (anyext GPR32), i1)" uses the 64-bit
+  ; sbfx rather than just 32-bits.
+; CHECK: sbfx x0, x0, #0, #1
+  ret i64 %ext
+}
+
+
+define i32 @test_ubfx32(i32* %addr) {
+; CHECK: test_ubfx32:
+; CHECK: ubfx {{w[0-9]+}}, {{w[0-9]+}}, #23, #3
+
+   %fields = load i32* %addr
+   %shifted = lshr i32 %fields, 23
+   %masked = and i32 %shifted, 7
+   ret i32 %masked
+}
+
+define i64 @test_ubfx64(i64* %addr) {
+; CHECK: test_ubfx64:
+; CHECK: ubfx {{x[0-9]+}}, {{x[0-9]+}}, #25, #10
+
+   %fields = load i64* %addr
+   %shifted = lshr i64 %fields, 25
+   %masked = and i64 %shifted, 1023
+   ret i64 %masked
+}
+
+define i32 @test_sbfx32(i32* %addr) {
+; CHECK: test_sbfx32:
+; CHECK: sbfx {{w[0-9]+}}, {{w[0-9]+}}, #6, #3
+
+   %fields = load i32* %addr
+   %shifted = shl i32 %fields, 23
+   %extended = ashr i32 %shifted, 29
+   ret i32 %extended
+}
+
+define i64 @test_sbfx64(i64* %addr) {
+; CHECK: test_sbfx64:
+; CHECK: sbfx {{x[0-9]+}}, {{x[0-9]+}}, #0, #63
+
+   %fields = load i64* %addr
+   %shifted = shl i64 %fields, 1
+   %extended = ashr i64 %shifted, 1
+   ret i64 %extended
+}
diff --git a/test/CodeGen/AArch64/blockaddress.ll b/test/CodeGen/AArch64/blockaddress.ll
new file mode 100644
index 000000000000..3d0a5cf96bcd
--- /dev/null
+++ b/test/CodeGen/AArch64/blockaddress.ll
@@ -0,0 +1,18 @@
+; RUN: llc -mtriple=aarch64-none-linux-gnu -verify-machineinstrs < %s | FileCheck %s
+
+@addr = global i8* null
+
+define void @test_blockaddress() {
+; CHECK: test_blockaddress:
+  store volatile i8* blockaddress(@test_blockaddress, %block), i8** @addr
+  %val = load volatile i8** @addr
+  indirectbr i8* %val, [label %block]
+; CHECK: adrp [[DEST_HI:x[0-9]+]], [[DEST_LBL:.Ltmp[0-9]+]]
+; CHECK: add [[DEST:x[0-9]+]], [[DEST_HI]], #:lo12:[[DEST_LBL]]
+; CHECK: str [[DEST]],
+; CHECK: ldr [[NEWDEST:x[0-9]+]]
+; CHECK: br [[NEWDEST]]
+
+block:
+  ret void
+}
diff --git a/test/CodeGen/AArch64/bool-loads.ll b/test/CodeGen/AArch64/bool-loads.ll
new file mode 100644
index 000000000000..5c7640bc4218
--- /dev/null
+++ b/test/CodeGen/AArch64/bool-loads.ll
@@ -0,0 +1,55 @@
+; RUN: llc -mtriple=aarch64-none-linux-gnu < %s | FileCheck %s
+
+@var = global i1 0
+
+define i32 @test_sextloadi32() {
+; CHECK: test_sextloadi32
+
+  %val = load i1* @var
+  %ret = sext i1 %val to i32
+; CHECK: ldrb {{w[0-9]+}}, [{{x[0-9]+}}, #:lo12:var]
+; CHECK: sbfx {{x[0-9]+}}, {{x[0-9]+}}, #0, #1
+
+  ret i32 %ret
+; CHECK: ret
+}
+
+define i64 @test_sextloadi64() {
+; CHECK: test_sextloadi64
+
+  %val = load i1* @var
+  %ret = sext i1 %val to i64
+; CHECK: ldrb {{w[0-9]+}}, [{{x[0-9]+}}, #:lo12:var]
+; CHECK: sbfx {{x[0-9]+}}, {{x[0-9]+}}, #0, #1
+
+  ret i64 %ret
+; CHECK: ret
+}
+
+define i32 @test_zextloadi32() {
+; CHECK: test_zextloadi32
+
+; It's not actually necessary that "ret" is next, but as far as LLVM
+; is concerned only 0 or 1 should be loadable so no extension is
+; necessary.
+  %val = load i1* @var
+  %ret = zext i1 %val to i32
+; CHECK: ldrb {{w[0-9]+}}, [{{x[0-9]+}}, #:lo12:var]
+
+  ret i32 %ret
+; CHECK-NEXT: ret
+}
+
+define i64 @test_zextloadi64() {
+; CHECK: test_zextloadi64
+
+; It's not actually necessary that "ret" is next, but as far as LLVM
+; is concerned only 0 or 1 should be loadable so no extension is
+; necessary.
+  %val = load i1* @var
+  %ret = zext i1 %val to i64
+; CHECK: ldrb {{w[0-9]+}}, [{{x[0-9]+}}, #:lo12:var]
+
+  ret i64 %ret
+; CHECK-NEXT: ret
+}
diff --git a/test/CodeGen/AArch64/breg.ll b/test/CodeGen/AArch64/breg.ll
new file mode 100644
index 000000000000..38ed4734e1b4
--- /dev/null
+++ b/test/CodeGen/AArch64/breg.ll
@@ -0,0 +1,17 @@
+; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu | FileCheck %s
+
+@stored_label = global i8* null
+
+define void @foo() {
+; CHECK: foo:
+  %lab = load i8** @stored_label
+  indirectbr i8* %lab, [label  %otherlab, label %retlab]
+; CHECK: adrp {{x[0-9]+}}, stored_label
+; CHECK: ldr {{x[0-9]+}}, [{{x[0-9]+}}, #:lo12:stored_label]
+; CHECK: br {{x[0-9]+}}
+
+otherlab:
+  ret void
+retlab:
+  ret void
+}
diff --git a/test/CodeGen/AArch64/callee-save.ll b/test/CodeGen/AArch64/callee-save.ll
new file mode 100644
index 000000000000..c66aa5bfc510
--- /dev/null
+++ b/test/CodeGen/AArch64/callee-save.ll
@@ -0,0 +1,86 @@
+; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu | FileCheck %s
+
+@var = global float 0.0
+
+define void @foo() {
+; CHECK: foo:
+
+; CHECK: stp d14, d15, [sp
+; CHECK: stp d12, d13, [sp
+; CHECK: stp d10, d11, [sp
+; CHECK: stp d8, d9, [sp
+
+  ; Create lots of live variables to exhaust the supply of
+  ; caller-saved registers
+  %val1 = load volatile float* @var
+  %val2 = load volatile float* @var
+  %val3 = load volatile float* @var
+  %val4 = load volatile float* @var
+  %val5 = load volatile float* @var
+  %val6 = load volatile float* @var
+  %val7 = load volatile float* @var
+  %val8 = load volatile float* @var
+  %val9 = load volatile float* @var
+  %val10 = load volatile float* @var
+  %val11 = load volatile float* @var
+  %val12 = load volatile float* @var
+  %val13 = load volatile float* @var
+  %val14 = load volatile float* @var
+  %val15 = load volatile float* @var
+  %val16 = load volatile float* @var
+  %val17 = load volatile float* @var
+  %val18 = load volatile float* @var
+  %val19 = load volatile float* @var
+  %val20 = load volatile float* @var
+  %val21 = load volatile float* @var
+  %val22 = load volatile float* @var
+  %val23 = load volatile float* @var
+  %val24 = load volatile float* @var
+  %val25 = load volatile float* @var
+  %val26 = load volatile float* @var
+  %val27 = load volatile float* @var
+  %val28 = load volatile float* @var
+  %val29 = load volatile float* @var
+  %val30 = load volatile float* @var
+  %val31 = load volatile float* @var
+  %val32 = load volatile float* @var
+
+  store volatile float %val1, float* @var
+  store volatile float %val2, float* @var
+  store volatile float %val3, float* @var
+  store volatile float %val4, float* @var
+  store volatile float %val5, float* @var
+  store volatile float %val6, float* @var
+  store volatile float %val7, float* @var
+  store volatile float %val8, float* @var
+  store volatile float %val9, float* @var
+  store volatile float %val10, float* @var
+  store volatile float %val11, float* @var
+  store volatile float %val12, float* @var
+  store volatile float %val13, float* @var
+  store volatile float %val14, float* @var
+  store volatile float %val15, float* @var
+  store volatile float %val16, float* @var
+  store volatile float %val17, float* @var
+  store volatile float %val18, float* @var
+  store volatile float %val19, float* @var
+  store volatile float %val20, float* @var
+  store volatile float %val21, float* @var
+  store volatile float %val22, float* @var
+  store volatile float %val23, float* @var
+  store volatile float %val24, float* @var
+  store volatile float %val25, float* @var
+  store volatile float %val26, float* @var
+  store volatile float %val27, float* @var
+  store volatile float %val28, float* @var
+  store volatile float %val29, float* @var
+  store volatile float %val30, float* @var
+  store volatile float %val31, float* @var
+  store volatile float %val32, float* @var
+
+; CHECK: ldp     d8, d9, [sp
+; CHECK: ldp     d10, d11, [sp
+; CHECK: ldp     d12, d13, [sp
+; CHECK: ldp     d14, d15, [sp
+  ret void
+}
diff --git a/test/CodeGen/AArch64/compare-branch.ll b/test/CodeGen/AArch64/compare-branch.ll
new file mode 100644
index 000000000000..4213110497d3
--- /dev/null
+++ b/test/CodeGen/AArch64/compare-branch.ll
@@ -0,0 +1,38 @@
+; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu | FileCheck %s
+
+@var32 = global i32 0
+@var64 = global i64 0
+
+define void @foo() {
+; CHECK: foo:
+
+  %val1 = load volatile i32* @var32
+  %tst1 = icmp eq i32 %val1, 0
+  br i1 %tst1, label %end, label %test2
+; CHECK: cbz {{w[0-9]+}}, .LBB
+
+test2:
+  %val2 = load volatile i32* @var32
+  %tst2 = icmp ne i32 %val2, 0
+  br i1 %tst2, label %end, label %test3
+; CHECK: cbnz {{w[0-9]+}}, .LBB
+
+test3:
+  %val3 = load volatile i64* @var64
+  %tst3 = icmp eq i64 %val3, 0
+  br i1 %tst3, label %end, label %test4
+; CHECK: cbz {{x[0-9]+}}, .LBB
+
+test4:
+  %val4 = load volatile i64* @var64
+  %tst4 = icmp ne i64 %val4, 0
+  br i1 %tst4, label %end, label %test5
+; CHECK: cbnz {{x[0-9]+}}, .LBB
+
+test5:
+  store volatile i64 %val4, i64* @var64
+  ret void
+
+end:
+  ret void
+}
+\ No newline at end of file
diff --git a/test/CodeGen/AArch64/cond-sel.ll b/test/CodeGen/AArch64/cond-sel.ll
new file mode 100644
index 000000000000..3051cf53fdf8
--- /dev/null
+++ b/test/CodeGen/AArch64/cond-sel.ll
@@ -0,0 +1,213 @@
+; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu | FileCheck %s
+
+@var32 = global i32 0
+@var64 = global i64 0
+
+define void @test_csel(i32 %lhs32, i32 %rhs32, i64 %lhs64) {
+; CHECK: test_csel:
+
+  %tst1 = icmp ugt i32 %lhs32, %rhs32
+  %val1 = select i1 %tst1, i32 42, i32 52
+  store i32 %val1, i32* @var32
+; CHECK: movz [[W52:w[0-9]+]], #52
+; CHECK: movz [[W42:w[0-9]+]], #42
+; CHECK: csel {{w[0-9]+}}, [[W42]], [[W52]], hi
+
+  %rhs64 = sext i32 %rhs32 to i64
+  %tst2 = icmp sle i64 %lhs64, %rhs64
+  %val2 = select i1 %tst2, i64 %lhs64, i64 %rhs64
+  store i64 %val2, i64* @var64
+; CHECK: cmp [[LHS:x[0-9]+]], [[RHS:w[0-9]+]], sxtw
+; CHECK: sxtw [[EXT_RHS:x[0-9]+]], [[RHS]]
+; CHECK: csel {{x[0-9]+}}, [[LHS]], [[EXT_RHS]], le
+
+  ret void
+; CHECK: ret
+}
+
+define void @test_floatcsel(float %lhs32, float %rhs32, double %lhs64, double %rhs64) {
+; CHECK: test_floatcsel:
+
+  %tst1 = fcmp one float %lhs32, %rhs32
+; CHECK: fcmp {{s[0-9]+}}, {{s[0-9]+}}
+  %val1 = select i1 %tst1, i32 42, i32 52
+  store i32 %val1, i32* @var32
+; CHECK: movz [[W52:w[0-9]+]], #52
+; CHECK: movz [[W42:w[0-9]+]], #42
+; CHECK: csel [[MAYBETRUE:w[0-9]+]], [[W42]], [[W52]], mi
+; CHECK: csel {{w[0-9]+}}, [[W42]], [[MAYBETRUE]], gt
+
+
+  %tst2 = fcmp ueq double %lhs64, %rhs64
+; CHECK: fcmp {{d[0-9]+}}, {{d[0-9]+}}
+  %val2 = select i1 %tst2, i64 9, i64 15
+  store i64 %val2, i64* @var64
+; CHECK: movz [[CONST15:x[0-9]+]], #15
+; CHECK: movz [[CONST9:x[0-9]+]], #9
+; CHECK: csel [[MAYBETRUE:x[0-9]+]], [[CONST9]], [[CONST15]], eq
+; CHECK: csel {{x[0-9]+}}, [[CONST9]], [[MAYBETRUE]], vs
+
+  ret void
+; CHECK: ret
+}
+
+
+define void @test_csinc(i32 %lhs32, i32 %rhs32, i64 %lhs64) {
+; CHECK: test_csinc:
+
+; Note that commuting rhs and lhs in the select changes ugt to ule (i.e. hi to ls).
+  %tst1 = icmp ugt i32 %lhs32, %rhs32
+  %inc1 = add i32 %rhs32, 1
+  %val1 = select i1 %tst1, i32 %inc1, i32 %lhs32
+  store volatile i32 %val1, i32* @var32
+; CHECK: cmp [[LHS:w[0-9]+]], [[RHS:w[0-9]+]]
+; CHECK: csinc {{w[0-9]+}}, [[LHS]], [[RHS]], ls
+
+  %rhs2 = add i32 %rhs32, 42
+  %tst2 = icmp sle i32 %lhs32, %rhs2
+  %inc2 = add i32 %rhs32, 1
+  %val2 = select i1 %tst2, i32 %lhs32, i32 %inc2
+  store volatile i32 %val2, i32* @var32
+; CHECK: cmp [[LHS:w[0-9]+]], {{w[0-9]+}}
+; CHECK: csinc {{w[0-9]+}}, [[LHS]], {{w[0-9]+}}, le
+
+; Note that commuting rhs and lhs in the select changes ugt to ule (i.e. hi to ls).
+  %rhs3 = sext i32 %rhs32 to i64
+  %tst3 = icmp ugt i64 %lhs64, %rhs3
+  %inc3 = add i64 %rhs3, 1
+  %val3 = select i1 %tst3, i64 %inc3, i64 %lhs64
+  store volatile i64 %val3, i64* @var64
+; CHECK: cmp [[LHS:x[0-9]+]], {{w[0-9]+}}
+; CHECK: csinc {{x[0-9]+}}, [[LHS]], {{x[0-9]+}}, ls
+
+  %rhs4 = zext i32 %rhs32 to i64
+  %tst4 = icmp sle i64 %lhs64, %rhs4
+  %inc4 = add i64 %rhs4, 1
+  %val4 = select i1 %tst4, i64 %lhs64, i64 %inc4
+  store volatile i64 %val4, i64* @var64
+; CHECK: cmp [[LHS:x[0-9]+]], {{w[0-9]+}}
+; CHECK: csinc {{x[0-9]+}}, [[LHS]], {{x[0-9]+}}, le
+
+  ret void
+; CHECK: ret
+}
+
+define void @test_csinv(i32 %lhs32, i32 %rhs32, i64 %lhs64) {
+; CHECK: test_csinv:
+
+; Note that commuting rhs and lhs in the select changes ugt to ule (i.e. hi to ls).
+  %tst1 = icmp ugt i32 %lhs32, %rhs32
+  %inc1 = xor i32 -1, %rhs32
+  %val1 = select i1 %tst1, i32 %inc1, i32 %lhs32
+  store volatile i32 %val1, i32* @var32
+; CHECK: cmp [[LHS:w[0-9]+]], [[RHS:w[0-9]+]]
+; CHECK: csinv {{w[0-9]+}}, [[LHS]], [[RHS]], ls
+
+  %rhs2 = add i32 %rhs32, 42
+  %tst2 = icmp sle i32 %lhs32, %rhs2
+  %inc2 = xor i32 -1, %rhs32
+  %val2 = select i1 %tst2, i32 %lhs32, i32 %inc2
+  store volatile i32 %val2, i32* @var32
+; CHECK: cmp [[LHS:w[0-9]+]], {{w[0-9]+}}
+; CHECK: csinv {{w[0-9]+}}, [[LHS]], {{w[0-9]+}}, le
+
+; Note that commuting rhs and lhs in the select changes ugt to ule (i.e. hi to ls).
+  %rhs3 = sext i32 %rhs32 to i64
+  %tst3 = icmp ugt i64 %lhs64, %rhs3
+  %inc3 = xor i64 -1, %rhs3
+  %val3 = select i1 %tst3, i64 %inc3, i64 %lhs64
+  store volatile i64 %val3, i64* @var64
+; CHECK: cmp [[LHS:x[0-9]+]], {{w[0-9]+}}
+; CHECK: csinv {{x[0-9]+}}, [[LHS]], {{x[0-9]+}}, ls
+
+  %rhs4 = zext i32 %rhs32 to i64
+  %tst4 = icmp sle i64 %lhs64, %rhs4
+  %inc4 = xor i64 -1, %rhs4
+  %val4 = select i1 %tst4, i64 %lhs64, i64 %inc4
+  store volatile i64 %val4, i64* @var64
+; CHECK: cmp [[LHS:x[0-9]+]], {{w[0-9]+}}
+; CHECK: csinv {{x[0-9]+}}, [[LHS]], {{x[0-9]+}}, le
+
+  ret void
+; CHECK: ret
+}
+
+define void @test_csneg(i32 %lhs32, i32 %rhs32, i64 %lhs64) {
+; CHECK: test_csneg:
+
+; Note that commuting rhs and lhs in the select changes ugt to ule (i.e. hi to ls).
+  %tst1 = icmp ugt i32 %lhs32, %rhs32
+  %inc1 = sub i32 0, %rhs32
+  %val1 = select i1 %tst1, i32 %inc1, i32 %lhs32
+  store volatile i32 %val1, i32* @var32
+; CHECK: cmp [[LHS:w[0-9]+]], [[RHS:w[0-9]+]]
+; CHECK: csneg {{w[0-9]+}}, [[LHS]], [[RHS]], ls
+
+  %rhs2 = add i32 %rhs32, 42
+  %tst2 = icmp sle i32 %lhs32, %rhs2
+  %inc2 = sub i32 0, %rhs32
+  %val2 = select i1 %tst2, i32 %lhs32, i32 %inc2
+  store volatile i32 %val2, i32* @var32
+; CHECK: cmp [[LHS:w[0-9]+]], {{w[0-9]+}}
+; CHECK: csneg {{w[0-9]+}}, [[LHS]], {{w[0-9]+}}, le
+
+; Note that commuting rhs and lhs in the select changes ugt to ule (i.e. hi to ls).
+  %rhs3 = sext i32 %rhs32 to i64
+  %tst3 = icmp ugt i64 %lhs64, %rhs3
+  %inc3 = sub i64 0, %rhs3
+  %val3 = select i1 %tst3, i64 %inc3, i64 %lhs64
+  store volatile i64 %val3, i64* @var64
+; CHECK: cmp [[LHS:x[0-9]+]], {{w[0-9]+}}
+; CHECK: csneg {{x[0-9]+}}, [[LHS]], {{x[0-9]+}}, ls
+
+  %rhs4 = zext i32 %rhs32 to i64
+  %tst4 = icmp sle i64 %lhs64, %rhs4
+  %inc4 = sub i64 0, %rhs4
+  %val4 = select i1 %tst4, i64 %lhs64, i64 %inc4
+  store volatile i64 %val4, i64* @var64
+; CHECK: cmp [[LHS:x[0-9]+]], {{w[0-9]+}}
+; CHECK: csneg {{x[0-9]+}}, [[LHS]], {{x[0-9]+}}, le
+
+  ret void
+; CHECK: ret
+}
+
+define void @test_cset(i32 %lhs, i32 %rhs, i64 %lhs64) {
+; CHECK: test_cset:
+
+; N.b. code is not optimal here (32-bit csinc would be better) but
+; incoming DAG is too complex
+  %tst1 = icmp eq i32 %lhs, %rhs
+  %val1 = zext i1 %tst1 to i32
+  store i32 %val1, i32* @var32
+; CHECK: cmp {{w[0-9]+}}, {{w[0-9]+}}
+; CHECK: csinc {{w[0-9]+}}, wzr, wzr, ne
+
+  %rhs64 = sext i32 %rhs to i64
+  %tst2 = icmp ule i64 %lhs64, %rhs64
+  %val2 = zext i1 %tst2 to i64
+  store i64 %val2, i64* @var64
+; CHECK: csinc {{w[0-9]+}}, wzr, wzr, hi
+
+  ret void
+; CHECK: ret
+}
+
+define void @test_csetm(i32 %lhs, i32 %rhs, i64 %lhs64) {
+; CHECK: test_csetm:
+
+  %tst1 = icmp eq i32 %lhs, %rhs
+  %val1 = sext i1 %tst1 to i32
+  store i32 %val1, i32* @var32
+; CHECK: cmp {{w[0-9]+}}, {{w[0-9]+}}
+; CHECK: csinv {{w[0-9]+}}, wzr, wzr, ne
+
+  %rhs64 = sext i32 %rhs to i64
+  %tst2 = icmp ule i64 %lhs64, %rhs64
+  %val2 = sext i1 %tst2 to i64
+  store i64 %val2, i64* @var64
+; CHECK: csinv {{x[0-9]+}}, xzr, xzr, hi
+
+  ret void
+; CHECK: ret
+}
diff --git a/test/CodeGen/AArch64/directcond.ll b/test/CodeGen/AArch64/directcond.ll
new file mode 100644
index 000000000000..f5d57593bfad
--- /dev/null
+++ b/test/CodeGen/AArch64/directcond.ll
@@ -0,0 +1,84 @@
+; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu | FileCheck %s
+
+define i32 @test_select_i32(i1 %bit, i32 %a, i32 %b) {
+; CHECK: test_select_i32:
+  %val = select i1 %bit, i32 %a, i32 %b
+; CHECK: movz [[ONE:w[0-9]+]], #1
+; CHECK: tst w0, [[ONE]]
+; CHECK-NEXT: csel w0, w1, w2, ne
+
+  ret i32 %val
+}
+
+define i64 @test_select_i64(i1 %bit, i64 %a, i64 %b) {
+; CHECK: test_select_i64:
+  %val = select i1 %bit, i64 %a, i64 %b
+; CHECK: movz [[ONE:w[0-9]+]], #1
+; CHECK: tst w0, [[ONE]]
+; CHECK-NEXT: csel x0, x1, x2, ne
+
+  ret i64 %val
+}
+
+define float @test_select_float(i1 %bit, float %a, float %b) {
+; CHECK: test_select_float:
+  %val = select i1 %bit, float %a, float %b
+; CHECK: movz [[ONE:w[0-9]+]], #1
+; CHECK: tst w0, [[ONE]]
+; CHECK-NEXT: fcsel s0, s0, s1, ne
+
+  ret float %val
+}
+
+define double @test_select_double(i1 %bit, double %a, double %b) {
+; CHECK: test_select_double:
+  %val = select i1 %bit, double %a, double %b
+; CHECK: movz [[ONE:w[0-9]+]], #1
+; CHECK: tst w0, [[ONE]]
+; CHECK-NEXT: fcsel d0, d0, d1, ne
+
+  ret double %val
+}
+
+define i32 @test_brcond(i1 %bit) {
+; CHECK: test_brcond:
+  br i1 %bit, label %true, label %false
+; CHECK: tbz {{w[0-9]+}}, #0, .LBB
+
+true:
+  ret i32 0
+false:
+  ret i32 42
+}
+
+define i1 @test_setcc_float(float %lhs, float %rhs) {
+; CHECK: test_setcc_float
+  %val = fcmp oeq float %lhs, %rhs
+; CHECK: fcmp s0, s1
+; CHECK: csinc w0, wzr, wzr, ne
+  ret i1 %val
+}
+
+define i1 @test_setcc_double(double %lhs, double %rhs) {
+; CHECK: test_setcc_double
+  %val = fcmp oeq double %lhs, %rhs
+; CHECK: fcmp d0, d1
+; CHECK: csinc w0, wzr, wzr, ne
+  ret i1 %val
+}
+
+define i1 @test_setcc_i32(i32 %lhs, i32 %rhs) {
+; CHECK: test_setcc_i32
+  %val = icmp ugt i32 %lhs, %rhs
+; CHECK: cmp w0, w1
+; CHECK: csinc w0, wzr, wzr, ls
+  ret i1 %val
+}
+
+define i1 @test_setcc_i64(i64 %lhs, i64 %rhs) {
+; CHECK: test_setcc_i64
+  %val = icmp ne i64 %lhs, %rhs
+; CHECK: cmp x0, x1
+; CHECK: csinc w0, wzr, wzr, eq
+  ret i1 %val
+}
diff --git a/test/CodeGen/AArch64/dp-3source.ll b/test/CodeGen/AArch64/dp-3source.ll
new file mode 100644
index 000000000000..c40d3933b44b
--- /dev/null
+++ b/test/CodeGen/AArch64/dp-3source.ll
@@ -0,0 +1,163 @@
+; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu | FileCheck %s
+
+define i32 @test_madd32(i32 %val0, i32 %val1, i32 %val2) {
+; CHECK: test_madd32:
+  %mid = mul i32 %val1, %val2
+  %res = add i32 %val0, %mid
+; CHECK: madd {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
+  ret i32 %res
+}
+
+define i64 @test_madd64(i64 %val0, i64 %val1, i64 %val2) {
+; CHECK: test_madd64:
+  %mid = mul i64 %val1, %val2
+  %res = add i64 %val0, %mid
+; CHECK: madd {{x[0-9]+}}, {{x[0-9]+}}, {{x[0-9]+}}, {{x[0-9]+}}
+  ret i64 %res
+}
+
+define i32 @test_msub32(i32 %val0, i32 %val1, i32 %val2) {
+; CHECK: test_msub32:
+  %mid = mul i32 %val1, %val2
+  %res = sub i32 %val0, %mid
+; CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
+  ret i32 %res
+}
+
+define i64 @test_msub64(i64 %val0, i64 %val1, i64 %val2) {
+; CHECK: test_msub64:
+  %mid = mul i64 %val1, %val2
+  %res = sub i64 %val0, %mid
+; CHECK: msub {{x[0-9]+}}, {{x[0-9]+}}, {{x[0-9]+}}, {{x[0-9]+}}
+  ret i64 %res
+}
+
+define i64 @test_smaddl(i64 %acc, i32 %val1, i32 %val2) {
+; CHECK: test_smaddl:
+  %ext1 = sext i32 %val1 to i64
+  %ext2 = sext i32 %val2 to i64
+  %prod = mul i64 %ext1, %ext2
+  %res = add i64 %acc, %prod
+; CHECK: smaddl {{x[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{x[0-9]+}}
+  ret i64 %res
+}
+
+define i64 @test_smsubl(i64 %acc, i32 %val1, i32 %val2) {
+; CHECK: test_smsubl:
+  %ext1 = sext i32 %val1 to i64
+  %ext2 = sext i32 %val2 to i64
+  %prod = mul i64 %ext1, %ext2
+  %res = sub i64 %acc, %prod
+; CHECK: smsubl {{x[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{x[0-9]+}}
+  ret i64 %res
+}
+
+define i64 @test_umaddl(i64 %acc, i32 %val1, i32 %val2) {
+; CHECK: test_umaddl:
+  %ext1 = zext i32 %val1 to i64
+  %ext2 = zext i32 %val2 to i64
+  %prod = mul i64 %ext1, %ext2
+  %res = add i64 %acc, %prod
+; CHECK: umaddl {{x[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{x[0-9]+}}
+  ret i64 %res
+}
+
+define i64 @test_umsubl(i64 %acc, i32 %val1, i32 %val2) {
+; CHECK: test_umsubl:
+  %ext1 = zext i32 %val1 to i64
+  %ext2 = zext i32 %val2 to i64
+  %prod = mul i64 %ext1, %ext2
+  %res = sub i64 %acc, %prod
+; CHECK: umsubl {{x[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{x[0-9]+}}
+  ret i64 %res
+}
+
+define i64 @test_smulh(i64 %lhs, i64 %rhs) {
+; CHECK: test_smulh:
+  %ext1 = sext i64 %lhs to i128
+  %ext2 = sext i64 %rhs to i128
+  %res = mul i128 %ext1, %ext2
+  %high = lshr i128 %res, 64
+  %val = trunc i128 %high to i64
+; CHECK: smulh {{x[0-9]+}}, {{x[0-9]+}}, {{x[0-9]+}}
+  ret i64 %val
+}
+
+define i64 @test_umulh(i64 %lhs, i64 %rhs) {
+; CHECK: test_umulh:
+  %ext1 = zext i64 %lhs to i128
+  %ext2 = zext i64 %rhs to i128
+  %res = mul i128 %ext1, %ext2
+  %high = lshr i128 %res, 64
+  %val = trunc i128 %high to i64
+; CHECK: umulh {{x[0-9]+}}, {{x[0-9]+}}, {{x[0-9]+}}
+  ret i64 %val
+}
+
+define i32 @test_mul32(i32 %lhs, i32 %rhs) {
+; CHECK: test_mul32:
+  %res = mul i32 %lhs, %rhs
+; CHECK: mul {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
+  ret i32 %res
+}
+
+define i64 @test_mul64(i64 %lhs, i64 %rhs) {
+; CHECK: test_mul64:
+  %res = mul i64 %lhs, %rhs
+; CHECK: mul {{x[0-9]+}}, {{x[0-9]+}}, {{x[0-9]+}}
+  ret i64 %res
+}
+
+define i32 @test_mneg32(i32 %lhs, i32 %rhs) {
+; CHECK: test_mneg32:
+  %prod = mul i32 %lhs, %rhs
+  %res = sub i32 0, %prod
+; CHECK: mneg {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
+  ret i32 %res
+}
+
+define i64 @test_mneg64(i64 %lhs, i64 %rhs) {
+; CHECK: test_mneg64:
+  %prod = mul i64 %lhs, %rhs
+  %res = sub i64 0, %prod
+; CHECK: mneg {{x[0-9]+}}, {{x[0-9]+}}, {{x[0-9]+}}
+  ret i64 %res
+}
+
+define i64 @test_smull(i32 %lhs, i32 %rhs) {
+; CHECK: test_smull:
+  %ext1 = sext i32 %lhs to i64
+  %ext2 = sext i32 %rhs to i64
+  %res = mul i64 %ext1, %ext2
+; CHECK: smull {{x[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
+  ret i64 %res
+}
+
+define i64 @test_umull(i32 %lhs, i32 %rhs) {
+; CHECK: test_umull:
+  %ext1 = zext i32 %lhs to i64
+  %ext2 = zext i32 %rhs to i64
+  %res = mul i64 %ext1, %ext2
+; CHECK: umull {{x[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
+  ret i64 %res
+}
+
+define i64 @test_smnegl(i32 %lhs, i32 %rhs) {
+; CHECK: test_smnegl:
+  %ext1 = sext i32 %lhs to i64
+  %ext2 = sext i32 %rhs to i64
+  %prod = mul i64 %ext1, %ext2
+  %res = sub i64 0, %prod
+; CHECK: smnegl {{x[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
+  ret i64 %res
+}
+
+define i64 @test_umnegl(i32 %lhs, i32 %rhs) {
+; CHECK: test_umnegl:
+  %ext1 = zext i32 %lhs to i64
+  %ext2 = zext i32 %rhs to i64
+  %prod = mul i64 %ext1, %ext2
+  %res = sub i64 0, %prod
+; CHECK: umnegl {{x[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
+  ret i64 %res
+}
diff --git a/test/CodeGen/AArch64/dp1.ll b/test/CodeGen/AArch64/dp1.ll
new file mode 100644
index 000000000000..83aa8b4f6631
--- /dev/null
+++ b/test/CodeGen/AArch64/dp1.ll
@@ -0,0 +1,152 @@
+; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu | FileCheck %s
+
+@var32 = global i32 0
+@var64 = global i64 0
+
+define void @rev_i32() {
+; CHECK: rev_i32:
+    %val0_tmp = load i32* @var32
+    %val1_tmp = call i32 @llvm.bswap.i32(i32 %val0_tmp)
+; CHECK: rev	{{w[0-9]+}}, {{w[0-9]+}}
+    store volatile i32 %val1_tmp, i32* @var32
+    ret void
+}
+
+define void @rev_i64() {
+; CHECK: rev_i64:
+    %val0_tmp = load i64* @var64
+    %val1_tmp = call i64 @llvm.bswap.i64(i64 %val0_tmp)
+; CHECK: rev	{{x[0-9]+}}, {{x[0-9]+}}
+    store volatile i64 %val1_tmp, i64* @var64
+    ret void
+}
+
+define void @rev32_i64() {
+; CHECK: rev32_i64:
+    %val0_tmp = load i64* @var64
+    %val1_tmp = shl i64 %val0_tmp, 32
+    %val5_tmp = sub i64 64, 32
+    %val2_tmp = lshr i64 %val0_tmp, %val5_tmp
+    %val3_tmp = or i64 %val1_tmp, %val2_tmp
+    %val4_tmp = call i64 @llvm.bswap.i64(i64 %val3_tmp)
+; CHECK: rev32	{{x[0-9]+}}, {{x[0-9]+}}
+    store volatile i64 %val4_tmp, i64* @var64
+    ret void
+}
+
+define void @rev16_i32() {
+; CHECK: rev16_i32:
+    %val0_tmp = load i32* @var32
+    %val1_tmp = shl i32 %val0_tmp, 16
+    %val2_tmp = lshr i32 %val0_tmp, 16
+    %val3_tmp = or i32 %val1_tmp, %val2_tmp
+    %val4_tmp = call i32 @llvm.bswap.i32(i32 %val3_tmp)
+; CHECK: rev16	{{w[0-9]+}}, {{w[0-9]+}}
+    store volatile i32 %val4_tmp, i32* @var32
+    ret void
+}
+
+define void @clz_zerodef_i32() {
+; CHECK: clz_zerodef_i32:
+    %val0_tmp = load i32* @var32
+    %val4_tmp = call i32 @llvm.ctlz.i32(i32 %val0_tmp, i1 0)
+; CHECK: clz	{{w[0-9]+}}, {{w[0-9]+}}
+    store volatile i32 %val4_tmp, i32* @var32
+    ret void
+}
+
+define void @clz_zerodef_i64() {
+; CHECK: clz_zerodef_i64:
+    %val0_tmp = load i64* @var64
+    %val4_tmp = call i64 @llvm.ctlz.i64(i64 %val0_tmp, i1 0)
+; CHECK: clz	{{x[0-9]+}}, {{x[0-9]+}}
+    store volatile i64 %val4_tmp, i64* @var64
+    ret void
+}
+
+define void @clz_zeroundef_i32() {
+; CHECK: clz_zeroundef_i32:
+    %val0_tmp = load i32* @var32
+    %val4_tmp = call i32 @llvm.ctlz.i32(i32 %val0_tmp, i1 1)
+; CHECK: clz	{{w[0-9]+}}, {{w[0-9]+}}
+    store volatile i32 %val4_tmp, i32* @var32
+    ret void
+}
+
+define void @clz_zeroundef_i64() {
+; CHECK: clz_zeroundef_i64:
+    %val0_tmp = load i64* @var64
+    %val4_tmp = call i64 @llvm.ctlz.i64(i64 %val0_tmp, i1 1)
+; CHECK: clz	{{x[0-9]+}}, {{x[0-9]+}}
+    store volatile i64 %val4_tmp, i64* @var64
+    ret void
+}
+
+define void @cttz_zerodef_i32() {
+; CHECK: cttz_zerodef_i32:
+    %val0_tmp = load i32* @var32
+    %val4_tmp = call i32 @llvm.cttz.i32(i32 %val0_tmp, i1 0)
+; CHECK: rbit   [[REVERSED:w[0-9]+]], {{w[0-9]+}}
+; CHECK: clz	{{w[0-9]+}}, [[REVERSED]]
+    store volatile i32 %val4_tmp, i32* @var32
+    ret void
+}
+
+define void @cttz_zerodef_i64() {
+; CHECK: cttz_zerodef_i64:
+    %val0_tmp = load i64* @var64
+    %val4_tmp = call i64 @llvm.cttz.i64(i64 %val0_tmp, i1 0)
+; CHECK: rbit   [[REVERSED:x[0-9]+]], {{x[0-9]+}}
+; CHECK: clz	{{x[0-9]+}}, [[REVERSED]]
+    store volatile i64 %val4_tmp, i64* @var64
+    ret void
+}
+
+define void @cttz_zeroundef_i32() {
+; CHECK: cttz_zeroundef_i32:
+    %val0_tmp = load i32* @var32
+    %val4_tmp = call i32 @llvm.cttz.i32(i32 %val0_tmp, i1 1)
+; CHECK: rbit   [[REVERSED:w[0-9]+]], {{w[0-9]+}}
+; CHECK: clz	{{w[0-9]+}}, [[REVERSED]]
+    store volatile i32 %val4_tmp, i32* @var32
+    ret void
+}
+
+define void @cttz_zeroundef_i64() {
+; CHECK: cttz_zeroundef_i64:
+    %val0_tmp = load i64* @var64
+    %val4_tmp = call i64 @llvm.cttz.i64(i64 %val0_tmp, i1 1)
+; CHECK: rbit   [[REVERSED:x[0-9]+]], {{x[0-9]+}}
+; CHECK: clz	{{x[0-9]+}}, [[REVERSED]]
+    store volatile i64 %val4_tmp, i64* @var64
+    ret void
+}
+
+; These two are just compilation tests really: the operation's set to Expand in
+; ISelLowering.
+define void @ctpop_i32() {
+; CHECK: ctpop_i32:
+    %val0_tmp = load i32* @var32
+    %val4_tmp = call i32 @llvm.ctpop.i32(i32 %val0_tmp)
+    store volatile i32 %val4_tmp, i32* @var32
+    ret void
+}
+
+define void @ctpop_i64() {
+; CHECK: ctpop_i64:
+    %val0_tmp = load i64* @var64
+    %val4_tmp = call i64 @llvm.ctpop.i64(i64 %val0_tmp)
+    store volatile i64 %val4_tmp, i64* @var64
+    ret void
+}
+
+
+declare i32 @llvm.bswap.i32(i32)
+declare i64 @llvm.bswap.i64(i64)
+declare i32  @llvm.ctlz.i32 (i32, i1)
+declare i64  @llvm.ctlz.i64 (i64, i1)
+declare i32  @llvm.cttz.i32 (i32, i1)
+declare i64  @llvm.cttz.i64 (i64, i1)
+declare i32  @llvm.ctpop.i32 (i32)
+declare i64  @llvm.ctpop.i64 (i64)
+
diff --git a/test/CodeGen/AArch64/dp2.ll b/test/CodeGen/AArch64/dp2.ll
new file mode 100644
index 000000000000..4c740f6b8623
--- /dev/null
+++ b/test/CodeGen/AArch64/dp2.ll
@@ -0,0 +1,169 @@
+; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu | FileCheck %s
+
+@var32_0 = global i32 0
+@var32_1 = global i32 0
+@var64_0 = global i64 0
+@var64_1 = global i64 0
+
+define void @rorv_i64() {
+; CHECK: rorv_i64:
+    %val0_tmp = load i64* @var64_0
+    %val1_tmp = load i64* @var64_1
+    %val2_tmp = sub i64 64, %val1_tmp
+    %val3_tmp = shl i64 %val0_tmp, %val2_tmp
+    %val4_tmp = lshr i64 %val0_tmp, %val1_tmp
+    %val5_tmp = or i64 %val3_tmp, %val4_tmp
+; CHECK: ror	{{x[0-9]+}}, {{x[0-9]+}}, {{x[0-9]+}}
+    store volatile i64 %val5_tmp, i64* @var64_0
+    ret void
+}
+
+define void @asrv_i64() {
+; CHECK: asrv_i64:
+    %val0_tmp = load i64* @var64_0
+    %val1_tmp = load i64* @var64_1
+    %val4_tmp = ashr i64 %val0_tmp, %val1_tmp
+; CHECK: asr	{{x[0-9]+}}, {{x[0-9]+}}, {{x[0-9]+}}
+    store volatile i64 %val4_tmp, i64* @var64_1
+    ret void
+}
+
+define void @lsrv_i64() {
+; CHECK: lsrv_i64:
+    %val0_tmp = load i64* @var64_0
+    %val1_tmp = load i64* @var64_1
+    %val4_tmp = lshr i64 %val0_tmp, %val1_tmp
+; CHECK: lsr	{{x[0-9]+}}, {{x[0-9]+}}, {{x[0-9]+}}
+    store volatile i64 %val4_tmp, i64* @var64_0
+    ret void
+}
+
+define void @lslv_i64() {
+; CHECK: lslv_i64:
+    %val0_tmp = load i64* @var64_0
+    %val1_tmp = load i64* @var64_1
+    %val4_tmp = shl i64 %val0_tmp, %val1_tmp
+; CHECK: lsl	{{x[0-9]+}}, {{x[0-9]+}}, {{x[0-9]+}}
+    store volatile i64 %val4_tmp, i64* @var64_1
+    ret void
+}
+
+define void @udiv_i64() {
+; CHECK: udiv_i64:
+    %val0_tmp = load i64* @var64_0
+    %val1_tmp = load i64* @var64_1
+    %val4_tmp = udiv i64 %val0_tmp, %val1_tmp
+; CHECK: udiv	{{x[0-9]+}}, {{x[0-9]+}}, {{x[0-9]+}}
+    store volatile i64 %val4_tmp, i64* @var64_0
+    ret void
+}
+
+define void @sdiv_i64() {
+; CHECK: sdiv_i64:
+    %val0_tmp = load i64* @var64_0
+    %val1_tmp = load i64* @var64_1
+    %val4_tmp = sdiv i64 %val0_tmp, %val1_tmp
+; CHECK: sdiv	{{x[0-9]+}}, {{x[0-9]+}}, {{x[0-9]+}}
+    store volatile i64 %val4_tmp, i64* @var64_1
+    ret void
+}
+
+
+define void @lsrv_i32() {
+; CHECK: lsrv_i32:
+    %val0_tmp = load i32* @var32_0
+    %val1_tmp = load i32* @var32_1
+    %val2_tmp = add i32 1, %val1_tmp
+    %val4_tmp = lshr i32 %val0_tmp, %val2_tmp
+; CHECK: lsr	{{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
+    store volatile i32 %val4_tmp, i32* @var32_0
+    ret void
+}
+
+define void @lslv_i32() {
+; CHECK: lslv_i32:
+    %val0_tmp = load i32* @var32_0
+    %val1_tmp = load i32* @var32_1
+    %val2_tmp = add i32 1, %val1_tmp
+    %val4_tmp = shl i32 %val0_tmp, %val2_tmp
+; CHECK: lsl	{{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
+    store volatile i32 %val4_tmp, i32* @var32_1
+    ret void
+}
+
+define void @rorv_i32() {
+; CHECK: rorv_i32:
+    %val0_tmp = load i32* @var32_0
+    %val6_tmp = load i32* @var32_1
+    %val1_tmp = add i32 1, %val6_tmp
+    %val2_tmp = sub i32 32, %val1_tmp
+    %val3_tmp = shl i32 %val0_tmp, %val2_tmp
+    %val4_tmp = lshr i32 %val0_tmp, %val1_tmp
+    %val5_tmp = or i32 %val3_tmp, %val4_tmp
+; CHECK: ror	{{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
+    store volatile i32 %val5_tmp, i32* @var32_0
+    ret void
+}
+
+define void @asrv_i32() {
+; CHECK: asrv_i32:
+    %val0_tmp = load i32* @var32_0
+    %val1_tmp = load i32* @var32_1
+    %val2_tmp = add i32 1, %val1_tmp
+    %val4_tmp = ashr i32 %val0_tmp, %val2_tmp
+; CHECK: asr	{{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
+    store volatile i32 %val4_tmp, i32* @var32_1
+    ret void
+}
+
+define void @sdiv_i32() {
+; CHECK: sdiv_i32:
+    %val0_tmp = load i32* @var32_0
+    %val1_tmp = load i32* @var32_1
+    %val4_tmp = sdiv i32 %val0_tmp, %val1_tmp
+; CHECK: sdiv	{{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
+    store volatile i32 %val4_tmp, i32* @var32_1
+    ret void
+}
+
+define void @udiv_i32() {
+; CHECK: udiv_i32:
+    %val0_tmp = load i32* @var32_0
+    %val1_tmp = load i32* @var32_1
+    %val4_tmp = udiv i32 %val0_tmp, %val1_tmp
+; CHECK: udiv	{{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
+    store volatile i32 %val4_tmp, i32* @var32_0
+    ret void
+}
+
+; The point of this test is that we may not actually see (shl GPR32:$Val, (zext GPR32:$Val2))
+; in the DAG (the RHS may be natively 64-bit), but we should still use the lsl instructions.
+define i32 @test_lsl32() {
+; CHECK: test_lsl32:
+
+  %val = load i32* @var32_0
+  %ret = shl i32 1, %val
+; CHECK: lsl {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
+
+  ret i32 %ret
+}
+
+define i32 @test_lsr32() {
+; CHECK: test_lsr32:
+
+  %val = load i32* @var32_0
+  %ret = lshr i32 1, %val
+; CHECK: lsr {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
+
+  ret i32 %ret
+}
+
+define i32 @test_asr32(i32 %in) {
+; CHECK: test_asr32:
+
+  %val = load i32* @var32_0
+  %ret = ashr i32 %in, %val
+; CHECK: asr {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
+
+  ret i32 %ret
+}
diff --git a/test/CodeGen/AArch64/elf-extern.ll b/test/CodeGen/AArch64/elf-extern.ll
new file mode 100644
index 000000000000..ee89d8d94ba4
--- /dev/null
+++ b/test/CodeGen/AArch64/elf-extern.ll
@@ -0,0 +1,21 @@
+; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu -filetype=obj | elf-dump | FileCheck %s
+
+; External symbols are a different concept to global variables but should still
+; get relocations and so on when used.
+
+declare void @llvm.memcpy.p0i8.p0i8.i32(i8*, i8*, i32, i32, i1)
+
+define i32 @check_extern() {
+  call void @llvm.memcpy.p0i8.p0i8.i32(i8* undef, i8* undef, i32 undef, i32 4, i1 0)
+  ret i32 0
+}
+
+; CHECK: .rela.text
+; CHECK: ('r_sym', 0x00000009)
+; CHECK-NEXT: ('r_type', 0x0000011b)
+
+; CHECK: .symtab
+; CHECK: Symbol 9
+; CHECK-NEXT: memcpy
+
+
diff --git a/test/CodeGen/AArch64/extern-weak.ll b/test/CodeGen/AArch64/extern-weak.ll
new file mode 100644
index 000000000000..3d3d8676818a
--- /dev/null
+++ b/test/CodeGen/AArch64/extern-weak.ll
@@ -0,0 +1,35 @@
+; RUN: llc -mtriple=aarch64-none-linux-gnu -o - < %s | FileCheck %s
+
+declare extern_weak i32 @var()
+
+define i32()* @foo() {
+; The usual ADRP/ADD pair can't be used for a weak reference because it must
+; evaluate to 0 if the symbol is undefined. We use a litpool entry.
+  ret i32()* @var
+; CHECK: .LCPI0_0:
+; CHECK-NEXT: .xword var
+
+; CHECK: ldr x0, [{{x[0-9]+}}, #:lo12:.LCPI0_0]
+
+}
+
+
+@arr_var = extern_weak global [10 x i32]
+
+define i32* @bar() {
+  %addr = getelementptr [10 x i32]* @arr_var, i32 0, i32 5
+; CHECK: .LCPI1_0:
+; CHECK-NEXT: .xword arr_var
+
+; CHECK: ldr [[BASE:x[0-9]+]], [{{x[0-9]+}}, #:lo12:.LCPI1_0]
+; CHECK: add x0, [[BASE]], #20
+  ret i32* %addr
+}
+
+@defined_weak_var = internal unnamed_addr global i32 0
+
+define i32* @wibble() {
+  ret i32* @defined_weak_var
+; CHECK: adrp [[BASE:x[0-9]+]], defined_weak_var
+; CHECK: add x0, [[BASE]], #:lo12:defined_weak_var
+}
+\ No newline at end of file
diff --git a/test/CodeGen/AArch64/extract.ll b/test/CodeGen/AArch64/extract.ll
new file mode 100644
index 000000000000..06267816a4e1
--- /dev/null
+++ b/test/CodeGen/AArch64/extract.ll
@@ -0,0 +1,57 @@
+; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu | FileCheck %s
+
+define i64 @ror_i64(i64 %in) {
+; CHECK: ror_i64:
+    %left = shl i64 %in, 19
+    %right = lshr i64 %in, 45
+    %val5 = or i64 %left, %right
+; CHECK: extr {{x[0-9]+}}, x0, x0, #45
+    ret i64 %val5
+}
+
+define i32 @ror_i32(i32 %in) {
+; CHECK: ror_i32:
+    %left = shl i32 %in, 9
+    %right = lshr i32 %in, 23
+    %val5 = or i32 %left, %right
+; CHECK: extr {{w[0-9]+}}, w0, w0, #23
+    ret i32 %val5
+}
+
+define i32 @extr_i32(i32 %lhs, i32 %rhs) {
+; CHECK: extr_i32:
+  %left = shl i32 %lhs, 6
+  %right = lshr i32 %rhs, 26
+  %val = or i32 %left, %right
+  ; Order of lhs and rhs matters here. Regalloc would have to be very odd to use
+  ; something other than w0 and w1.
+; CHECK: extr {{w[0-9]+}}, w0, w1, #26
+
+  ret i32 %val
+}
+
+define i64 @extr_i64(i64 %lhs, i64 %rhs) {
+; CHECK: extr_i64:
+  %right = lshr i64 %rhs, 40
+  %left = shl i64 %lhs, 24
+  %val = or i64 %right, %left
+  ; Order of lhs and rhs matters here. Regalloc would have to be very odd to use
+  ; something other than w0 and w1.
+; CHECK: extr {{x[0-9]+}}, x0, x1, #40
+
+  ret i64 %val
+}
+
+; Regression test: a bad experimental pattern crept into git which optimised
+; this pattern to a single EXTR.
+define i32 @extr_regress(i32 %a, i32 %b) {
+; CHECK: extr_regress:
+
+    %sh1 = shl i32 %a, 14
+    %sh2 = lshr i32 %b, 14
+    %val = or i32 %sh2, %sh1
+; CHECK-NOT: extr {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, #{{[0-9]+}}
+
+    ret i32 %val
+; CHECK: ret
+}
diff --git a/test/CodeGen/AArch64/fastcc-reserved.ll b/test/CodeGen/AArch64/fastcc-reserved.ll
new file mode 100644
index 000000000000..e40aa3033bde
--- /dev/null
+++ b/test/CodeGen/AArch64/fastcc-reserved.ll
@@ -0,0 +1,58 @@
+; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu -tailcallopt | FileCheck %s
+
+; This test is designed to be run in the situation where the
+; call-frame is not reserved (hence disable-fp-elim), but where
+; callee-pop can occur (hence tailcallopt).
+
+declare fastcc void @will_pop([8 x i32], i32 %val)
+
+define fastcc void @foo(i32 %in) {
+; CHECK: foo:
+
+  %addr = alloca i8, i32 %in
+
+; Normal frame setup stuff:
+; CHECK: sub sp, sp,
+; CHECK: stp x29, x30
+
+; Reserve space for call-frame:
+; CHECK: sub sp, sp, #16
+
+  call fastcc void @will_pop([8 x i32] undef, i32 42)
+; CHECK: bl will_pop
+
+; Since @will_pop is fastcc with tailcallopt, it will put the stack
+; back where it needs to be, we shouldn't duplicate that
+; CHECK-NOT: sub sp, sp, #16
+; CHECK-NOT: add sp, sp,
+
+; CHECK: ldp x29, x30
+; CHECK: add sp, sp,
+  ret void
+}
+
+declare void @wont_pop([8 x i32], i32 %val)
+
+define void @foo1(i32 %in) {
+; CHECK: foo1:
+
+  %addr = alloca i8, i32 %in
+; Normal frame setup again
+; CHECK: sub sp, sp,
+; CHECK: stp x29, x30
+
+; Reserve space for call-frame
+; CHECK: sub sp, sp, #16
+
+  call void @wont_pop([8 x i32] undef, i32 42)
+; CHECK: bl wont_pop
+
+; This time we *do* need to unreserve the call-frame
+; CHECK: add sp, sp, #16
+
+; Check for epilogue (primarily to make sure sp spotted above wasn't
+; part of it).
+; CHECK: ldp x29, x30
+; CHECK: add sp, sp,
+  ret void
+}
diff --git a/test/CodeGen/AArch64/fastcc.ll b/test/CodeGen/AArch64/fastcc.ll
new file mode 100644
index 000000000000..41cde94edc1c
--- /dev/null
+++ b/test/CodeGen/AArch64/fastcc.ll
@@ -0,0 +1,123 @@
+; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu -tailcallopt | FileCheck %s -check-prefix CHECK-TAIL
+; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu | FileCheck %s
+
+; Without tailcallopt fastcc still means the caller cleans up the
+; stack, so try to make sure this is respected.
+
+define fastcc void @func_stack0() {
+; CHECK: func_stack0:
+; CHECK: sub sp, sp, #48
+
+; CHECK-TAIL: func_stack0:
+; CHECK-TAIL: sub sp, sp, #48
+
+
+  call fastcc void @func_stack8([8 x i32] undef, i32 42)
+; CHECK:  bl func_stack8
+; CHECK-NOT: sub sp, sp,
+
+; CHECK-TAIL: bl func_stack8
+; CHECK-TAIL: sub sp, sp, #16
+
+
+  call fastcc void @func_stack32([8 x i32] undef, i128 0, i128 9)
+; CHECK: bl func_stack32
+; CHECK-NOT: sub sp, sp,
+
+; CHECK-TAIL: bl func_stack32
+; CHECK-TAIL: sub sp, sp, #32
+
+
+  call fastcc void @func_stack0()
+; CHECK: bl func_stack0
+; CHECK-NOT: sub sp, sp
+
+; CHECK-TAIL: bl func_stack0
+; CHECK-TAIL-NOT: sub sp, sp
+
+  ret void
+; CHECK: add sp, sp, #48
+; CHECK-NEXT: ret
+
+; CHECK-TAIL: add sp, sp, #48
+; CHECK-TAIL-NEXT: ret
+
+}
+
+define fastcc void @func_stack8([8 x i32], i32 %stacked) {
+; CHECK: func_stack8:
+; CHECK: sub sp, sp, #48
+
+; CHECK-TAIL: func_stack8:
+; CHECK-TAIL: sub sp, sp, #48
+
+
+  call fastcc void @func_stack8([8 x i32] undef, i32 42)
+; CHECK:  bl func_stack8
+; CHECK-NOT: sub sp, sp,
+
+; CHECK-TAIL: bl func_stack8
+; CHECK-TAIL: sub sp, sp, #16
+
+
+  call fastcc void @func_stack32([8 x i32] undef, i128 0, i128 9)
+; CHECK: bl func_stack32
+; CHECK-NOT: sub sp, sp,
+
+; CHECK-TAIL: bl func_stack32
+; CHECK-TAIL: sub sp, sp, #32
+
+
+  call fastcc void @func_stack0()
+; CHECK: bl func_stack0
+; CHECK-NOT: sub sp, sp
+
+; CHECK-TAIL: bl func_stack0
+; CHECK-TAIL-NOT: sub sp, sp
+
+  ret void
+; CHECK: add sp, sp, #48
+; CHECK-NEXT: ret
+
+; CHECK-TAIL: add sp, sp, #64
+; CHECK-TAIL-NEXT: ret
+}
+
+define fastcc void @func_stack32([8 x i32], i128 %stacked0, i128 %stacked1) {
+; CHECK: func_stack32:
+; CHECK: sub sp, sp, #48
+
+; CHECK-TAIL: func_stack32:
+; CHECK-TAIL: sub sp, sp, #48
+
+
+  call fastcc void @func_stack8([8 x i32] undef, i32 42)
+; CHECK:  bl func_stack8
+; CHECK-NOT: sub sp, sp,
+
+; CHECK-TAIL: bl func_stack8
+; CHECK-TAIL: sub sp, sp, #16
+
+
+  call fastcc void @func_stack32([8 x i32] undef, i128 0, i128 9)
+; CHECK: bl func_stack32
+; CHECK-NOT: sub sp, sp,
+
+; CHECK-TAIL: bl func_stack32
+; CHECK-TAIL: sub sp, sp, #32
+
+
+  call fastcc void @func_stack0()
+; CHECK: bl func_stack0
+; CHECK-NOT: sub sp, sp
+
+; CHECK-TAIL: bl func_stack0
+; CHECK-TAIL-NOT: sub sp, sp
+
+  ret void
+; CHECK: add sp, sp, #48
+; CHECK-NEXT: ret
+
+; CHECK-TAIL: add sp, sp, #80
+; CHECK-TAIL-NEXT: ret
+}
diff --git a/test/CodeGen/AArch64/fcmp.ll b/test/CodeGen/AArch64/fcmp.ll
new file mode 100644
index 000000000000..ad4a903c9b25
--- /dev/null
+++ b/test/CodeGen/AArch64/fcmp.ll
@@ -0,0 +1,81 @@
+; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu | FileCheck %s
+
+declare void @bar(i32)
+
+define void @test_float(float %a, float %b) {
+; CHECK: test_float:
+
+  %tst1 = fcmp oeq float %a, %b
+  br i1 %tst1, label %end, label %t2
+; CHECK: fcmp {{s[0-9]+}}, {{s[0-9]+}}
+; CHECK: b.eq .L
+
+t2:
+  %tst2 = fcmp une float %b, 0.0
+  br i1 %tst2, label %t3, label %end
+; CHECK: fcmp {{s[0-9]+}}, #0.0
+; CHECK: b.eq .L
+
+
+t3:
+; This test can't be implemented with just one A64 conditional
+; branch. LLVM converts "ordered and not equal" to "unordered or
+; equal" before instruction selection, which is what we currently
+; test. Obviously, other sequences are valid.
+  %tst3 = fcmp one float %a,  %b
+  br i1 %tst3, label %t4, label %end
+; CHECK: fcmp {{s[0-9]+}}, {{s[0-9]+}}
+; CHECK-NEXT: b.eq .[[T4:LBB[0-9]+_[0-9]+]]
+; CHECK-NEXT: b.vs .[[T4]]
+t4:
+  %tst4 = fcmp uge float %a, -0.0
+  br i1 %tst4, label %t5, label %end
+; CHECK-NOT: fcmp {{s[0-9]+}}, #0.0
+; CHECK: b.mi .LBB
+
+t5:
+  call void @bar(i32 0)
+  ret void
+end:
+  ret void
+
+}
+
+define void @test_double(double %a, double %b) {
+; CHECK: test_double:
+
+  %tst1 = fcmp oeq double %a, %b
+  br i1 %tst1, label %end, label %t2
+; CHECK: fcmp {{d[0-9]+}}, {{d[0-9]+}}
+; CHECK: b.eq .L
+
+t2:
+  %tst2 = fcmp une double %b, 0.0
+  br i1 %tst2, label %t3, label %end
+; CHECK: fcmp {{d[0-9]+}}, #0.0
+; CHECK: b.eq .L
+
+
+t3:
+; This test can't be implemented with just one A64 conditional
+; branch. LLVM converts "ordered and not equal" to "unordered or
+; equal" before instruction selection, which is what we currently
+; test. Obviously, other sequences are valid.
+  %tst3 = fcmp one double %a,  %b
+  br i1 %tst3, label %t4, label %end
+; CHECK: fcmp {{d[0-9]+}}, {{d[0-9]+}}
+; CHECK-NEXT: b.eq .[[T4:LBB[0-9]+_[0-9]+]]
+; CHECK-NEXT: b.vs .[[T4]]
+t4:
+  %tst4 = fcmp uge double %a, -0.0
+  br i1 %tst4, label %t5, label %end
+; CHECK-NOT: fcmp {{d[0-9]+}}, #0.0
+; CHECK: b.mi .LBB
+
+t5:
+  call void @bar(i32 0)
+  ret void
+end:
+  ret void
+
+}
diff --git a/test/CodeGen/AArch64/fcvt-fixed.ll b/test/CodeGen/AArch64/fcvt-fixed.ll
new file mode 100644
index 000000000000..0f7b95b2a48f
--- /dev/null
+++ b/test/CodeGen/AArch64/fcvt-fixed.ll
@@ -0,0 +1,191 @@
+; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu -O0 | FileCheck %s
+
+@var32 = global i32 0
+@var64 = global i64 0
+
+define void @test_fcvtzs(float %flt, double %dbl) {
+; CHECK: test_fcvtzs:
+
+  %fix1 = fmul float %flt, 128.0
+  %cvt1 = fptosi float %fix1 to i32
+; CHECK: fcvtzs {{w[0-9]+}}, {{s[0-9]+}}, #7
+  store volatile i32 %cvt1, i32* @var32
+
+  %fix2 = fmul float %flt, 4294967296.0
+  %cvt2 = fptosi float %fix2 to i32
+; CHECK: fcvtzs {{w[0-9]+}}, {{s[0-9]+}}, #32
+  store volatile i32 %cvt2, i32* @var32
+
+  %fix3 = fmul float %flt, 128.0
+  %cvt3 = fptosi float %fix3 to i64
+; CHECK: fcvtzs {{x[0-9]+}}, {{s[0-9]+}}, #7
+  store volatile i64 %cvt3, i64* @var64
+
+  %fix4 = fmul float %flt, 18446744073709551616.0
+  %cvt4 = fptosi float %fix4 to i64
+; CHECK: fcvtzs {{x[0-9]+}}, {{s[0-9]+}}, #64
+  store volatile i64 %cvt4, i64* @var64
+
+  %fix5 = fmul double %dbl, 128.0
+  %cvt5 = fptosi double %fix5 to i32
+; CHECK: fcvtzs {{w[0-9]+}}, {{d[0-9]+}}, #7
+  store volatile i32 %cvt5, i32* @var32
+
+  %fix6 = fmul double %dbl, 4294967296.0
+  %cvt6 = fptosi double %fix6 to i32
+; CHECK: fcvtzs {{w[0-9]+}}, {{d[0-9]+}}, #32
+  store volatile i32 %cvt6, i32* @var32
+
+  %fix7 = fmul double %dbl, 128.0
+  %cvt7 = fptosi double %fix7 to i64
+; CHECK: fcvtzs {{x[0-9]+}}, {{d[0-9]+}}, #7
+  store volatile i64 %cvt7, i64* @var64
+
+  %fix8 = fmul double %dbl, 18446744073709551616.0
+  %cvt8 = fptosi double %fix8 to i64
+; CHECK: fcvtzs {{x[0-9]+}}, {{d[0-9]+}}, #64
+  store volatile i64 %cvt8, i64* @var64
+
+  ret void
+}
+
+define void @test_fcvtzu(float %flt, double %dbl) {
+; CHECK: test_fcvtzu:
+
+  %fix1 = fmul float %flt, 128.0
+  %cvt1 = fptoui float %fix1 to i32
+; CHECK: fcvtzu {{w[0-9]+}}, {{s[0-9]+}}, #7
+  store volatile i32 %cvt1, i32* @var32
+
+  %fix2 = fmul float %flt, 4294967296.0
+  %cvt2 = fptoui float %fix2 to i32
+; CHECK: fcvtzu {{w[0-9]+}}, {{s[0-9]+}}, #32
+  store volatile i32 %cvt2, i32* @var32
+
+  %fix3 = fmul float %flt, 128.0
+  %cvt3 = fptoui float %fix3 to i64
+; CHECK: fcvtzu {{x[0-9]+}}, {{s[0-9]+}}, #7
+  store volatile i64 %cvt3, i64* @var64
+
+  %fix4 = fmul float %flt, 18446744073709551616.0
+  %cvt4 = fptoui float %fix4 to i64
+; CHECK: fcvtzu {{x[0-9]+}}, {{s[0-9]+}}, #64
+  store volatile i64 %cvt4, i64* @var64
+
+  %fix5 = fmul double %dbl, 128.0
+  %cvt5 = fptoui double %fix5 to i32
+; CHECK: fcvtzu {{w[0-9]+}}, {{d[0-9]+}}, #7
+  store volatile i32 %cvt5, i32* @var32
+
+  %fix6 = fmul double %dbl, 4294967296.0
+  %cvt6 = fptoui double %fix6 to i32
+; CHECK: fcvtzu {{w[0-9]+}}, {{d[0-9]+}}, #32
+  store volatile i32 %cvt6, i32* @var32
+
+  %fix7 = fmul double %dbl, 128.0
+  %cvt7 = fptoui double %fix7 to i64
+; CHECK: fcvtzu {{x[0-9]+}}, {{d[0-9]+}}, #7
+  store volatile i64 %cvt7, i64* @var64
+
+  %fix8 = fmul double %dbl, 18446744073709551616.0
+  %cvt8 = fptoui double %fix8 to i64
+; CHECK: fcvtzu {{x[0-9]+}}, {{d[0-9]+}}, #64
+  store volatile i64 %cvt8, i64* @var64
+
+  ret void
+}
+
+@varfloat = global float 0.0
+@vardouble = global double 0.0
+
+define void @test_scvtf(i32 %int, i64 %long) {
+; CHECK: test_scvtf:
+
+  %cvt1 = sitofp i32 %int to float
+  %fix1 = fdiv float %cvt1, 128.0
+; CHECK: scvtf {{s[0-9]+}}, {{w[0-9]+}}, #7
+  store volatile float %fix1, float* @varfloat
+
+  %cvt2 = sitofp i32 %int to float
+  %fix2 = fdiv float %cvt2, 4294967296.0
+; CHECK: scvtf {{s[0-9]+}}, {{w[0-9]+}}, #32
+  store volatile float %fix2, float* @varfloat
+
+  %cvt3 = sitofp i64 %long to float
+  %fix3 = fdiv float %cvt3, 128.0
+; CHECK: scvtf {{s[0-9]+}}, {{x[0-9]+}}, #7
+  store volatile float %fix3, float* @varfloat
+
+  %cvt4 = sitofp i64 %long to float
+  %fix4 = fdiv float %cvt4, 18446744073709551616.0
+; CHECK: scvtf {{s[0-9]+}}, {{x[0-9]+}}, #64
+  store volatile float %fix4, float* @varfloat
+
+  %cvt5 = sitofp i32 %int to double
+  %fix5 = fdiv double %cvt5, 128.0
+; CHECK: scvtf {{d[0-9]+}}, {{w[0-9]+}}, #7
+  store volatile double %fix5, double* @vardouble
+
+  %cvt6 = sitofp i32 %int to double
+  %fix6 = fdiv double %cvt6, 4294967296.0
+; CHECK: scvtf {{d[0-9]+}}, {{w[0-9]+}}, #32
+  store volatile double %fix6, double* @vardouble
+
+  %cvt7 = sitofp i64 %long to double
+  %fix7 = fdiv double %cvt7, 128.0
+; CHECK: scvtf {{d[0-9]+}}, {{x[0-9]+}}, #7
+  store volatile double %fix7, double* @vardouble
+
+  %cvt8 = sitofp i64 %long to double
+  %fix8 = fdiv double %cvt8, 18446744073709551616.0
+; CHECK: scvtf {{d[0-9]+}}, {{x[0-9]+}}, #64
+  store volatile double %fix8, double* @vardouble
+
+  ret void
+}
+
+define void @test_ucvtf(i32 %int, i64 %long) {
+; CHECK: test_ucvtf:
+
+  %cvt1 = uitofp i32 %int to float
+  %fix1 = fdiv float %cvt1, 128.0
+; CHECK: ucvtf {{s[0-9]+}}, {{w[0-9]+}}, #7
+  store volatile float %fix1, float* @varfloat
+
+  %cvt2 = uitofp i32 %int to float
+  %fix2 = fdiv float %cvt2, 4294967296.0
+; CHECK: ucvtf {{s[0-9]+}}, {{w[0-9]+}}, #32
+  store volatile float %fix2, float* @varfloat
+
+  %cvt3 = uitofp i64 %long to float
+  %fix3 = fdiv float %cvt3, 128.0
+; CHECK: ucvtf {{s[0-9]+}}, {{x[0-9]+}}, #7
+  store volatile float %fix3, float* @varfloat
+
+  %cvt4 = uitofp i64 %long to float
+  %fix4 = fdiv float %cvt4, 18446744073709551616.0
+; CHECK: ucvtf {{s[0-9]+}}, {{x[0-9]+}}, #64
+  store volatile float %fix4, float* @varfloat
+
+  %cvt5 = uitofp i32 %int to double
+  %fix5 = fdiv double %cvt5, 128.0
+; CHECK: ucvtf {{d[0-9]+}}, {{w[0-9]+}}, #7
+  store volatile double %fix5, double* @vardouble
+
+  %cvt6 = uitofp i32 %int to double
+  %fix6 = fdiv double %cvt6, 4294967296.0
+; CHECK: ucvtf {{d[0-9]+}}, {{w[0-9]+}}, #32
+  store volatile double %fix6, double* @vardouble
+
+  %cvt7 = uitofp i64 %long to double
+  %fix7 = fdiv double %cvt7, 128.0
+; CHECK: ucvtf {{d[0-9]+}}, {{x[0-9]+}}, #7
+  store volatile double %fix7, double* @vardouble
+
+  %cvt8 = uitofp i64 %long to double
+  %fix8 = fdiv double %cvt8, 18446744073709551616.0
+; CHECK: ucvtf {{d[0-9]+}}, {{x[0-9]+}}, #64
+  store volatile double %fix8, double* @vardouble
+
+  ret void
+}
diff --git a/test/CodeGen/AArch64/fcvt-int.ll b/test/CodeGen/AArch64/fcvt-int.ll
new file mode 100644
index 000000000000..c771d683a99c
--- /dev/null
+++ b/test/CodeGen/AArch64/fcvt-int.ll
@@ -0,0 +1,151 @@
+; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu | FileCheck %s
+
+define i32 @test_floattoi32(float %in) {
+; CHECK: test_floattoi32:
+
+  %signed = fptosi float %in to i32
+  %unsigned = fptoui float %in to i32
+; CHECK: fcvtzu [[UNSIG:w[0-9]+]], {{s[0-9]+}}
+; CHECK: fcvtzs [[SIG:w[0-9]+]], {{s[0-9]+}}
+
+  %res = sub i32 %signed, %unsigned
+; CHECK: sub {{w[0-9]+}}, [[SIG]], [[UNSIG]]
+
+  ret i32 %res
+; CHECK: ret
+}
+
+define i32 @test_doubletoi32(double %in) {
+; CHECK: test_doubletoi32:
+
+  %signed = fptosi double %in to i32
+  %unsigned = fptoui double %in to i32
+; CHECK: fcvtzu [[UNSIG:w[0-9]+]], {{d[0-9]+}}
+; CHECK: fcvtzs [[SIG:w[0-9]+]], {{d[0-9]+}}
+
+  %res = sub i32 %signed, %unsigned
+; CHECK: sub {{w[0-9]+}}, [[SIG]], [[UNSIG]]
+
+  ret i32 %res
+; CHECK: ret
+}
+
+define i64 @test_floattoi64(float %in) {
+; CHECK: test_floattoi64:
+
+  %signed = fptosi float %in to i64
+  %unsigned = fptoui float %in to i64
+; CHECK: fcvtzu [[UNSIG:x[0-9]+]], {{s[0-9]+}}
+; CHECK: fcvtzs [[SIG:x[0-9]+]], {{s[0-9]+}}
+
+  %res = sub i64 %signed, %unsigned
+; CHECK: sub {{x[0-9]+}}, [[SIG]], [[UNSIG]]
+
+  ret i64 %res
+; CHECK: ret
+}
+
+define i64 @test_doubletoi64(double %in) {
+; CHECK: test_doubletoi64:
+
+  %signed = fptosi double %in to i64
+  %unsigned = fptoui double %in to i64
+; CHECK: fcvtzu [[UNSIG:x[0-9]+]], {{d[0-9]+}}
+; CHECK: fcvtzs [[SIG:x[0-9]+]], {{d[0-9]+}}
+
+  %res = sub i64 %signed, %unsigned
+; CHECK: sub {{x[0-9]+}}, [[SIG]], [[UNSIG]]
+
+  ret i64 %res
+; CHECK: ret
+}
+
+define float @test_i32tofloat(i32 %in) {
+; CHECK: test_i32tofloat:
+
+  %signed = sitofp i32 %in to float
+  %unsigned = uitofp i32 %in to float
+; CHECK: ucvtf [[UNSIG:s[0-9]+]], {{w[0-9]+}}
+; CHECK: scvtf [[SIG:s[0-9]+]], {{w[0-9]+}}
+
+  %res = fsub float %signed, %unsigned
+; CHECL: fsub {{s[0-9]+}}, [[SIG]], [[UNSIG]]
+  ret float %res
+; CHECK: ret
+}
+
+define double @test_i32todouble(i32 %in) {
+; CHECK: test_i32todouble:
+
+  %signed = sitofp i32 %in to double
+  %unsigned = uitofp i32 %in to double
+; CHECK: ucvtf [[UNSIG:d[0-9]+]], {{w[0-9]+}}
+; CHECK: scvtf [[SIG:d[0-9]+]], {{w[0-9]+}}
+
+  %res = fsub double %signed, %unsigned
+; CHECK: fsub {{d[0-9]+}}, [[SIG]], [[UNSIG]]
+  ret double %res
+; CHECK: ret
+}
+
+define float @test_i64tofloat(i64 %in) {
+; CHECK: test_i64tofloat:
+
+  %signed = sitofp i64 %in to float
+  %unsigned = uitofp i64 %in to float
+; CHECK: ucvtf [[UNSIG:s[0-9]+]], {{x[0-9]+}}
+; CHECK: scvtf [[SIG:s[0-9]+]], {{x[0-9]+}}
+
+  %res = fsub float %signed, %unsigned
+; CHECK: fsub {{s[0-9]+}}, [[SIG]], [[UNSIG]]
+  ret float %res
+; CHECK: ret
+}
+
+define double @test_i64todouble(i64 %in) {
+; CHECK: test_i64todouble:
+
+  %signed = sitofp i64 %in to double
+  %unsigned = uitofp i64 %in to double
+; CHECK: ucvtf [[UNSIG:d[0-9]+]], {{x[0-9]+}}
+; CHECK: scvtf [[SIG:d[0-9]+]], {{x[0-9]+}}
+
+  %res = fsub double %signed, %unsigned
+; CHECK: sub {{d[0-9]+}}, [[SIG]], [[UNSIG]]
+  ret double %res
+; CHECK: ret
+}
+
+define i32 @test_bitcastfloattoi32(float %in) {
+; CHECK: test_bitcastfloattoi32:
+
+   %res = bitcast float %in to i32
+; CHECK: fmov {{w[0-9]+}}, {{s[0-9]+}}
+   ret i32 %res
+}
+
+define i64 @test_bitcastdoubletoi64(double %in) {
+; CHECK: test_bitcastdoubletoi64:
+
+   %res = bitcast double %in to i64
+; CHECK: fmov {{x[0-9]+}}, {{d[0-9]+}}
+   ret i64 %res
+}
+
+define float @test_bitcasti32tofloat(i32 %in) {
+; CHECK: test_bitcasti32tofloat:
+
+   %res = bitcast i32 %in to float
+; CHECK: fmov {{s[0-9]+}}, {{w[0-9]+}}
+   ret float %res
+
+}
+
+define double @test_bitcasti64todouble(i64 %in) {
+; CHECK: test_bitcasti64todouble:
+
+   %res = bitcast i64 %in to double
+; CHECK: fmov {{d[0-9]+}}, {{x[0-9]+}}
+   ret double %res
+
+}
diff --git a/test/CodeGen/AArch64/flags-multiuse.ll b/test/CodeGen/AArch64/flags-multiuse.ll
new file mode 100644
index 000000000000..940c146f0a9f
--- /dev/null
+++ b/test/CodeGen/AArch64/flags-multiuse.ll
@@ -0,0 +1,35 @@
+; RUN: llc -mtriple=aarch64-none-linux-gnu -verify-machineinstrs < %s | FileCheck %s
+
+; LLVM should be able to cope with multiple uses of the same flag-setting
+; instruction at different points of a routine. Either by rematerializing the
+; compare or by saving and restoring the flag register.
+
+declare void @bar()
+
+@var = global i32 0
+
+define i32 @test_multiflag(i32 %n, i32 %m, i32 %o) {
+; CHECK: test_multiflag:
+
+  %test = icmp ne i32 %n, %m
+; CHECK: cmp [[LHS:w[0-9]+]], [[RHS:w[0-9]+]]
+
+  %val = zext i1 %test to i32
+; CHECK: csinc {{[xw][0-9]+}}, {{xzr|wzr}}, {{xzr|wzr}}, eq
+
+  store i32 %val, i32* @var
+
+  call void @bar()
+; CHECK: bl bar
+
+  ; Currently, the comparison is emitted again. An MSR/MRS pair would also be
+  ; acceptable, but assuming the call preserves NZCV is not.
+  br i1 %test, label %iftrue, label %iffalse
+; CHECK: cmp [[LHS]], [[RHS]]
+; CHECK: b.eq
+
+iftrue:
+  ret i32 42
+iffalse:
+  ret i32 0
+}
diff --git a/test/CodeGen/AArch64/floatdp_1source.ll b/test/CodeGen/AArch64/floatdp_1source.ll
new file mode 100644
index 000000000000..c94ba9b57b5a
--- /dev/null
+++ b/test/CodeGen/AArch64/floatdp_1source.ll
@@ -0,0 +1,138 @@
+; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu | FileCheck %s
+
+@varhalf = global half 0.0
+@varfloat = global float 0.0
+@vardouble = global double 0.0
+
+declare float @fabsf(float) readonly
+declare double @fabs(double) readonly
+
+declare float @llvm.sqrt.f32(float %Val)
+declare double @llvm.sqrt.f64(double %Val)
+
+declare float @ceilf(float) readonly
+declare double @ceil(double) readonly
+
+declare float @floorf(float) readonly
+declare double @floor(double) readonly
+
+declare float @truncf(float) readonly
+declare double @trunc(double) readonly
+
+declare float @rintf(float) readonly
+declare double @rint(double) readonly
+
+declare float @nearbyintf(float) readonly
+declare double @nearbyint(double) readonly
+
+define void @simple_float() {
+; CHECK: simple_float:
+  %val1 = load volatile float* @varfloat
+
+  %valabs = call float @fabsf(float %val1)
+  store volatile float %valabs, float* @varfloat
+; CHECK: fabs {{s[0-9]+}}, {{s[0-9]+}}
+
+  %valneg = fsub float -0.0, %val1
+  store volatile float %valneg, float* @varfloat
+; CHECK: fneg {{s[0-9]+}}, {{s[0-9]+}}
+
+  %valsqrt = call float @llvm.sqrt.f32(float %val1)
+  store volatile float %valsqrt, float* @varfloat
+; CHECK: fsqrt {{s[0-9]+}}, {{s[0-9]+}}
+
+  %valceil = call float @ceilf(float %val1)
+  store volatile float %valceil, float* @varfloat
+; CHECK: frintp {{s[0-9]+}}, {{s[0-9]+}}
+
+  %valfloor = call float @floorf(float %val1)
+  store volatile float %valfloor, float* @varfloat
+; CHECK: frintm {{s[0-9]+}}, {{s[0-9]+}}
+
+  %valtrunc = call float @truncf(float %val1)
+  store volatile float %valtrunc, float* @varfloat
+; CHECK: frintz {{s[0-9]+}}, {{s[0-9]+}}
+
+  %valrint = call float @rintf(float %val1)
+  store volatile float %valrint, float* @varfloat
+; CHECK: frintx {{s[0-9]+}}, {{s[0-9]+}}
+
+  %valnearbyint = call float @nearbyintf(float %val1)
+  store volatile float %valnearbyint, float* @varfloat
+; CHECK: frinti {{s[0-9]+}}, {{s[0-9]+}}
+
+  ret void
+}
+
+define void @simple_double() {
+; CHECK: simple_double:
+  %val1 = load volatile double* @vardouble
+
+  %valabs = call double @fabs(double %val1)
+  store volatile double %valabs, double* @vardouble
+; CHECK: fabs {{d[0-9]+}}, {{d[0-9]+}}
+
+  %valneg = fsub double -0.0, %val1
+  store volatile double %valneg, double* @vardouble
+; CHECK: fneg {{d[0-9]+}}, {{d[0-9]+}}
+
+  %valsqrt = call double @llvm.sqrt.f64(double %val1)
+  store volatile double %valsqrt, double* @vardouble
+; CHECK: fsqrt {{d[0-9]+}}, {{d[0-9]+}}
+
+  %valceil = call double @ceil(double %val1)
+  store volatile double %valceil, double* @vardouble
+; CHECK: frintp {{d[0-9]+}}, {{d[0-9]+}}
+
+  %valfloor = call double @floor(double %val1)
+  store volatile double %valfloor, double* @vardouble
+; CHECK: frintm {{d[0-9]+}}, {{d[0-9]+}}
+
+  %valtrunc = call double @trunc(double %val1)
+  store volatile double %valtrunc, double* @vardouble
+; CHECK: frintz {{d[0-9]+}}, {{d[0-9]+}}
+
+  %valrint = call double @rint(double %val1)
+  store volatile double %valrint, double* @vardouble
+; CHECK: frintx {{d[0-9]+}}, {{d[0-9]+}}
+
+  %valnearbyint = call double @nearbyint(double %val1)
+  store volatile double %valnearbyint, double* @vardouble
+; CHECK: frinti {{d[0-9]+}}, {{d[0-9]+}}
+
+  ret void
+}
+
+define void @converts() {
+; CHECK: converts:
+
+  %val16 = load volatile half* @varhalf
+  %val32 = load volatile float* @varfloat
+  %val64 = load volatile double* @vardouble
+
+  %val16to32 = fpext half %val16 to float
+  store volatile float %val16to32, float* @varfloat
+; CHECK: fcvt {{s[0-9]+}}, {{h[0-9]+}}
+
+  %val16to64 = fpext half %val16 to double
+  store volatile double %val16to64, double* @vardouble
+; CHECK: fcvt {{d[0-9]+}}, {{h[0-9]+}}
+
+  %val32to16 = fptrunc float %val32 to half
+  store volatile half %val32to16, half* @varhalf
+; CHECK: fcvt {{h[0-9]+}}, {{s[0-9]+}}
+
+  %val32to64 = fpext float %val32 to double
+  store volatile double %val32to64, double* @vardouble
+; CHECK: fcvt {{d[0-9]+}}, {{s[0-9]+}}
+
+  %val64to16 = fptrunc double %val64 to half
+  store volatile half %val64to16, half* @varhalf
+; CHECK: fcvt {{h[0-9]+}}, {{d[0-9]+}}
+
+  %val64to32 = fptrunc double %val64 to float
+  store volatile float %val64to32, float* @varfloat
+; CHECK: fcvt {{s[0-9]+}}, {{d[0-9]+}}
+
+  ret void
+}
diff --git a/test/CodeGen/AArch64/floatdp_2source.ll b/test/CodeGen/AArch64/floatdp_2source.ll
new file mode 100644
index 000000000000..b2256b342acf
--- /dev/null
+++ b/test/CodeGen/AArch64/floatdp_2source.ll
@@ -0,0 +1,60 @@
+; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu | FileCheck %s
+
+@varfloat = global float 0.0
+@vardouble = global double 0.0
+
+define void @testfloat() {
+; CHECK: testfloat:
+  %val1 = load float* @varfloat
+
+  %val2 = fadd float %val1, %val1
+; CHECK: fadd {{s[0-9]+}}, {{s[0-9]+}}, {{s[0-9]+}}
+
+  %val3 = fmul float %val2, %val1
+; CHECK: fmul {{s[0-9]+}}, {{s[0-9]+}}, {{s[0-9]+}}
+
+  %val4 = fdiv float %val3, %val1
+; CHECK: fdiv {{s[0-9]+}}, {{s[0-9]+}}, {{s[0-9]+}}
+
+  %val5 = fsub float %val4, %val2
+; CHECK: fsub {{s[0-9]+}}, {{s[0-9]+}}, {{s[0-9]+}}
+
+  store volatile float %val5, float* @varfloat
+
+; These will be enabled with the implementation of floating-point litpool entries.
+  %val6 = fmul float %val1, %val2
+  %val7 = fsub float -0.0, %val6
+; CHECK: fnmul {{s[0-9]+}}, {{s[0-9]+}}, {{s[0-9]+}}
+
+  store volatile float %val7, float* @varfloat
+
+  ret void
+}
+
+define void @testdouble() {
+; CHECK: testdouble:
+  %val1 = load double* @vardouble
+
+  %val2 = fadd double %val1, %val1
+; CHECK: fadd {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}}
+
+  %val3 = fmul double %val2, %val1
+; CHECK: fmul {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}}
+
+  %val4 = fdiv double %val3, %val1
+; CHECK: fdiv {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}}
+
+  %val5 = fsub double %val4, %val2
+; CHECK: fsub {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}}
+
+  store volatile double %val5, double* @vardouble
+
+; These will be enabled with the implementation of doubleing-point litpool entries.
+   %val6 = fmul double %val1, %val2
+   %val7 = fsub double -0.0, %val6
+; CHECK: fnmul {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}}
+
+   store volatile double %val7, double* @vardouble
+
+  ret void
+}
diff --git a/test/CodeGen/AArch64/fp-cond-sel.ll b/test/CodeGen/AArch64/fp-cond-sel.ll
new file mode 100644
index 000000000000..56e8f16f9b36
--- /dev/null
+++ b/test/CodeGen/AArch64/fp-cond-sel.ll
@@ -0,0 +1,26 @@
+; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu | FileCheck %s
+
+@varfloat = global float 0.0
+@vardouble = global double 0.0
+
+define void @test_csel(i32 %lhs32, i32 %rhs32, i64 %lhs64) {
+; CHECK: test_csel:
+
+  %tst1 = icmp ugt i32 %lhs32, %rhs32
+  %val1 = select i1 %tst1, float 0.0, float 1.0
+  store float %val1, float* @varfloat
+; CHECK: ldr [[FLT0:s[0-9]+]], [{{x[0-9]+}}, #:lo12:.LCPI
+; CHECK: fmov [[FLT1:s[0-9]+]], #1.0
+; CHECK: fcsel {{s[0-9]+}}, [[FLT0]], [[FLT1]], hi
+
+  %rhs64 = sext i32 %rhs32 to i64
+  %tst2 = icmp sle i64 %lhs64, %rhs64
+  %val2 = select i1 %tst2, double 1.0, double 0.0
+  store double %val2, double* @vardouble
+; CHECK: ldr [[FLT0:d[0-9]+]], [{{x[0-9]+}}, #:lo12:.LCPI
+; CHECK: fmov [[FLT1:d[0-9]+]], #1.0
+; CHECK: fcsel {{d[0-9]+}}, [[FLT1]], [[FLT0]], le
+
+  ret void
+; CHECK: ret
+}
diff --git a/test/CodeGen/AArch64/fp-dp3.ll b/test/CodeGen/AArch64/fp-dp3.ll
new file mode 100644
index 000000000000..39db9be15771
--- /dev/null
+++ b/test/CodeGen/AArch64/fp-dp3.ll
@@ -0,0 +1,102 @@
+; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu -fp-contract=fast | FileCheck %s
+
+declare float @llvm.fma.f32(float, float, float)
+declare double @llvm.fma.f64(double, double, double)
+
+define float @test_fmadd(float %a, float %b, float %c) {
+; CHECK: test_fmadd:
+  %val = call float @llvm.fma.f32(float %a, float %b, float %c)
+; CHECK: fmadd {{s[0-9]+}}, {{s[0-9]+}}, {{s[0-9]+}}, {{s[0-9]+}}
+  ret float %val
+}
+
+define float @test_fmsub(float %a, float %b, float %c) {
+; CHECK: test_fmsub:
+  %nega = fsub float -0.0, %a
+  %val = call float @llvm.fma.f32(float %nega, float %b, float %c)
+; CHECK: fmsub {{s[0-9]+}}, {{s[0-9]+}}, {{s[0-9]+}}, {{s[0-9]+}}
+  ret float %val
+}
+
+define float @test_fnmadd(float %a, float %b, float %c) {
+; CHECK: test_fnmadd:
+  %negc = fsub float -0.0, %c
+  %val = call float @llvm.fma.f32(float %a, float %b, float %negc)
+; CHECK: fnmadd {{s[0-9]+}}, {{s[0-9]+}}, {{s[0-9]+}}, {{s[0-9]+}}
+  ret float %val
+}
+
+define float @test_fnmsub(float %a, float %b, float %c) {
+; CHECK: test_fnmsub:
+  %nega = fsub float -0.0, %a
+  %negc = fsub float -0.0, %c
+  %val = call float @llvm.fma.f32(float %nega, float %b, float %negc)
+; CHECK: fnmsub {{s[0-9]+}}, {{s[0-9]+}}, {{s[0-9]+}}, {{s[0-9]+}}
+  ret float %val
+}
+
+define double @testd_fmadd(double %a, double %b, double %c) {
+; CHECK: testd_fmadd:
+  %val = call double @llvm.fma.f64(double %a, double %b, double %c)
+; CHECK: fmadd {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}}
+  ret double %val
+}
+
+define double @testd_fmsub(double %a, double %b, double %c) {
+; CHECK: testd_fmsub:
+  %nega = fsub double -0.0, %a
+  %val = call double @llvm.fma.f64(double %nega, double %b, double %c)
+; CHECK: fmsub {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}}
+  ret double %val
+}
+
+define double @testd_fnmadd(double %a, double %b, double %c) {
+; CHECK: testd_fnmadd:
+  %negc = fsub double -0.0, %c
+  %val = call double @llvm.fma.f64(double %a, double %b, double %negc)
+; CHECK: fnmadd {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}}
+  ret double %val
+}
+
+define double @testd_fnmsub(double %a, double %b, double %c) {
+; CHECK: testd_fnmsub:
+  %nega = fsub double -0.0, %a
+  %negc = fsub double -0.0, %c
+  %val = call double @llvm.fma.f64(double %nega, double %b, double %negc)
+; CHECK: fnmsub {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}}
+  ret double %val
+}
+
+define float @test_fmadd_unfused(float %a, float %b, float %c) {
+; CHECK: test_fmadd_unfused:
+  %prod = fmul float %b, %c
+  %sum = fadd float %a, %prod
+; CHECK: fmadd {{s[0-9]+}}, {{s[0-9]+}}, {{s[0-9]+}}, {{s[0-9]+}}
+  ret float %sum
+}
+
+define float @test_fmsub_unfused(float %a, float %b, float %c) {
+; CHECK: test_fmsub_unfused:
+  %prod = fmul float %b, %c
+  %diff = fsub float %a, %prod
+; CHECK: fmsub {{s[0-9]+}}, {{s[0-9]+}}, {{s[0-9]+}}, {{s[0-9]+}}
+  ret float %diff
+}
+
+define float @test_fnmadd_unfused(float %a, float %b, float %c) {
+; CHECK: test_fnmadd_unfused:
+  %nega = fsub float -0.0, %a
+  %prod = fmul float %b, %c
+  %sum = fadd float %nega, %prod
+; CHECK: fnmadd {{s[0-9]+}}, {{s[0-9]+}}, {{s[0-9]+}}, {{s[0-9]+}}
+  ret float %sum
+}
+
+define float @test_fnmsub_unfused(float %a, float %b, float %c) {
+; CHECK: test_fnmsub_unfused:
+  %nega = fsub float -0.0, %a
+  %prod = fmul float %b, %c
+  %diff = fsub float %nega, %prod
+; CHECK: fnmsub {{s[0-9]+}}, {{s[0-9]+}}, {{s[0-9]+}}, {{s[0-9]+}}
+  ret float %diff
+}
diff --git a/test/CodeGen/AArch64/fp128-folding.ll b/test/CodeGen/AArch64/fp128-folding.ll
new file mode 100644
index 000000000000..b5bdcf4f37b4
--- /dev/null
+++ b/test/CodeGen/AArch64/fp128-folding.ll
@@ -0,0 +1,17 @@
+; RUN: llc -mtriple=aarch64-none-linux-gnu -verify-machineinstrs < %s | FileCheck %s
+declare void @bar(i8*, i8*, i32*)
+
+; SelectionDAG used to try to fold some fp128 operations using the ppc128 type,
+; which is not supported.
+
+define fp128 @test_folding() {
+; CHECK: test_folding:
+  %l = alloca i32
+  store i32 42, i32* %l
+  %val = load i32* %l
+  %fpval = sitofp i32 %val to fp128
+  ; If the value is loaded from a constant pool into an fp128, it's been folded
+  ; successfully.
+; CHECK: ldr {{q[0-9]+}}, [{{x[0-9]+}}, #:lo12:.LCPI
+  ret fp128 %fpval
+}
+\ No newline at end of file
diff --git a/test/CodeGen/AArch64/fp128.ll b/test/CodeGen/AArch64/fp128.ll
new file mode 100644
index 000000000000..258d34b8f81f
--- /dev/null
+++ b/test/CodeGen/AArch64/fp128.ll
@@ -0,0 +1,280 @@
+; RUN: llc -mtriple=aarch64-none-linux-gnu -verify-machineinstrs < %s | FileCheck %s
+
+@lhs = global fp128 zeroinitializer
+@rhs = global fp128 zeroinitializer
+
+define fp128 @test_add() {
+; CHECK: test_add:
+
+  %lhs = load fp128* @lhs
+  %rhs = load fp128* @rhs
+; CHECK: ldr q0, [{{x[0-9]+}}, #:lo12:lhs]
+; CHECK: ldr q1, [{{x[0-9]+}}, #:lo12:rhs]
+
+  %val = fadd fp128 %lhs, %rhs
+; CHECK: bl __addtf3
+  ret fp128 %val
+}
+
+define fp128 @test_sub() {
+; CHECK: test_sub:
+
+  %lhs = load fp128* @lhs
+  %rhs = load fp128* @rhs
+; CHECK: ldr q0, [{{x[0-9]+}}, #:lo12:lhs]
+; CHECK: ldr q1, [{{x[0-9]+}}, #:lo12:rhs]
+
+  %val = fsub fp128 %lhs, %rhs
+; CHECK: bl __subtf3
+  ret fp128 %val
+}
+
+define fp128 @test_mul() {
+; CHECK: test_mul:
+
+  %lhs = load fp128* @lhs
+  %rhs = load fp128* @rhs
+; CHECK: ldr q0, [{{x[0-9]+}}, #:lo12:lhs]
+; CHECK: ldr q1, [{{x[0-9]+}}, #:lo12:rhs]
+
+  %val = fmul fp128 %lhs, %rhs
+; CHECK: bl __multf3
+  ret fp128 %val
+}
+
+define fp128 @test_div() {
+; CHECK: test_div:
+
+  %lhs = load fp128* @lhs
+  %rhs = load fp128* @rhs
+; CHECK: ldr q0, [{{x[0-9]+}}, #:lo12:lhs]
+; CHECK: ldr q1, [{{x[0-9]+}}, #:lo12:rhs]
+
+  %val = fdiv fp128 %lhs, %rhs
+; CHECK: bl __divtf3
+  ret fp128 %val
+}
+
+@var32 = global i32 0
+@var64 = global i64 0
+
+define void @test_fptosi() {
+; CHECK: test_fptosi:
+  %val = load fp128* @lhs
+
+  %val32 = fptosi fp128 %val to i32
+  store i32 %val32, i32* @var32
+; CHECK: bl __fixtfsi
+
+  %val64 = fptosi fp128 %val to i64
+  store i64 %val64, i64* @var64
+; CHECK: bl __fixtfdi
+
+  ret void
+}
+
+define void @test_fptoui() {
+; CHECK: test_fptoui:
+  %val = load fp128* @lhs
+
+  %val32 = fptoui fp128 %val to i32
+  store i32 %val32, i32* @var32
+; CHECK: bl __fixunstfsi
+
+  %val64 = fptoui fp128 %val to i64
+  store i64 %val64, i64* @var64
+; CHECK: bl __fixunstfdi
+
+  ret void
+}
+
+define void @test_sitofp() {
+; CHECK: test_sitofp:
+
+  %src32 = load i32* @var32
+  %val32 = sitofp i32 %src32 to fp128
+  store volatile fp128 %val32, fp128* @lhs
+; CHECK: bl __floatsitf
+
+  %src64 = load i64* @var64
+  %val64 = sitofp i64 %src64 to fp128
+  store volatile fp128 %val64, fp128* @lhs
+; CHECK: bl __floatditf
+
+  ret void
+}
+
+define void @test_uitofp() {
+; CHECK: test_uitofp:
+
+  %src32 = load i32* @var32
+  %val32 = uitofp i32 %src32 to fp128
+  store volatile fp128 %val32, fp128* @lhs
+; CHECK: bl __floatunsitf
+
+  %src64 = load i64* @var64
+  %val64 = uitofp i64 %src64 to fp128
+  store volatile fp128 %val64, fp128* @lhs
+; CHECK: bl __floatunditf
+
+  ret void
+}
+
+define i1 @test_setcc1() {
+; CHECK: test_setcc1:
+
+  %lhs = load fp128* @lhs
+  %rhs = load fp128* @rhs
+; CHECK: ldr q0, [{{x[0-9]+}}, #:lo12:lhs]
+; CHECK: ldr q1, [{{x[0-9]+}}, #:lo12:rhs]
+
+; Technically, everything after the call to __letf2 is redundant, but we'll let
+; LLVM have its fun for now.
+  %val = fcmp ole fp128 %lhs, %rhs
+; CHECK: bl __letf2
+; CHECK: cmp w0, #0
+; CHECK: csinc w0, wzr, wzr, gt
+
+  ret i1 %val
+; CHECK: ret
+}
+
+define i1 @test_setcc2() {
+; CHECK: test_setcc2:
+
+  %lhs = load fp128* @lhs
+  %rhs = load fp128* @rhs
+; CHECK: ldr q0, [{{x[0-9]+}}, #:lo12:lhs]
+; CHECK: ldr q1, [{{x[0-9]+}}, #:lo12:rhs]
+
+; Technically, everything after the call to __letf2 is redundant, but we'll let
+; LLVM have its fun for now.
+  %val = fcmp ugt fp128 %lhs, %rhs
+; CHECK: bl      __unordtf2
+; CHECK: mov     x[[UNORDERED:[0-9]+]], x0
+
+; CHECK: bl      __gttf2
+; CHECK: cmp w0, #0
+; CHECK: csinc   [[GT:w[0-9]+]], wzr, wzr, le
+; CHECK: cmp w[[UNORDERED]], #0
+; CHECK: csinc   [[UNORDERED:w[0-9]+]], wzr, wzr, eq
+; CHECK: orr     w0, [[UNORDERED]], [[GT]]
+
+  ret i1 %val
+; CHECK: ret
+}
+
+define i32 @test_br_cc() {
+; CHECK: test_br_cc:
+
+  %lhs = load fp128* @lhs
+  %rhs = load fp128* @rhs
+; CHECK: ldr q0, [{{x[0-9]+}}, #:lo12:lhs]
+; CHECK: ldr q1, [{{x[0-9]+}}, #:lo12:rhs]
+
+  ; olt == !uge, which LLVM unfortunately "optimizes" this to.
+  %cond = fcmp olt fp128 %lhs, %rhs
+; CHECK: bl      __unordtf2
+; CHECK: mov     x[[UNORDERED:[0-9]+]], x0
+
+; CHECK: bl      __getf2
+; CHECK: cmp w0, #0
+
+; CHECK: csinc   [[OGE:w[0-9]+]], wzr, wzr, lt
+; CHECK: cmp w[[UNORDERED]], #0
+; CHECK: csinc   [[UNORDERED:w[0-9]+]], wzr, wzr, eq
+; CHECK: orr     [[UGE:w[0-9]+]], [[UNORDERED]], [[OGE]]
+; CHECK: cbnz [[UGE]], [[RET29:.LBB[0-9]+_[0-9]+]]
+  br i1 %cond, label %iftrue, label %iffalse
+
+iftrue:
+  ret i32 42
+; CHECK-NEXT: BB#
+; CHECK-NEXT: movz x0, #42
+; CHECK-NEXT: b [[REALRET:.LBB[0-9]+_[0-9]+]]
+
+iffalse:
+  ret i32 29
+; CHECK: [[RET29]]:
+; CHECK-NEXT: movz x0, #29
+; CHECK-NEXT: [[REALRET]]:
+; CHECK: ret
+}
+
+define void @test_select(i1 %cond, fp128 %lhs, fp128 %rhs) {
+; CHECK: test_select:
+
+  %val = select i1 %cond, fp128 %lhs, fp128 %rhs
+  store fp128 %val, fp128* @lhs
+; CHECK: cmp w0, #0
+; CHECK: str q1, [sp]
+; CHECK-NEXT: b.eq [[IFFALSE:.LBB[0-9]+_[0-9]+]]
+; CHECK-NEXT: BB#
+; CHECK-NEXT: str q0, [sp]
+; CHECK-NEXT: [[IFFALSE]]:
+; CHECK-NEXT: ldr q0, [sp]
+; CHECK: str q0, [{{x[0-9]+}}, #:lo12:lhs]
+  ret void
+; CHECK: ret
+}
+
+@varfloat = global float 0.0
+@vardouble = global double 0.0
+
+define void @test_round() {
+; CHECK: test_round:
+
+  %val = load fp128* @lhs
+
+  %float = fptrunc fp128 %val to float
+  store float %float, float* @varfloat
+; CHECK: bl __trunctfsf2
+; CHECK: str s0, [{{x[0-9]+}}, #:lo12:varfloat]
+
+  %double = fptrunc fp128 %val to double
+  store double %double, double* @vardouble
+; CHECK: bl __trunctfdf2
+; CHECK: str d0, [{{x[0-9]+}}, #:lo12:vardouble]
+
+  ret void
+}
+
+define void @test_extend() {
+; CHECK: test_extend:
+
+  %val = load fp128* @lhs
+
+  %float = load float* @varfloat
+  %fromfloat = fpext float %float to fp128
+  store volatile fp128 %fromfloat, fp128* @lhs
+; CHECK: bl __extendsftf2
+; CHECK: str q0, [{{x[0-9]+}}, #:lo12:lhs]
+
+  %double = load double* @vardouble
+  %fromdouble = fpext double %double to fp128
+  store volatile fp128 %fromdouble, fp128* @lhs
+; CHECK: bl __extenddftf2
+; CHECK: str q0, [{{x[0-9]+}}, #:lo12:lhs]
+
+  ret void
+; CHECK: ret
+}
+
+define fp128 @test_neg(fp128 %in) {
+; CHECK: [[MINUS0:.LCPI[0-9]+_0]]:
+; Make sure the weird hex constant below *is* -0.0
+; CHECK-NEXT: fp128 -0
+
+; CHECK: test_neg:
+
+  ; Could in principle be optimized to fneg which we can't select, this makes
+  ; sure that doesn't happen.
+  %ret = fsub fp128 0xL00000000000000008000000000000000, %in
+; CHECK: str q0, [sp, #-16]
+; CHECK-NEXT: ldr q1, [sp], #16
+; CHECK: ldr q0, [{{x[0-9]+}}, #:lo12:[[MINUS0]]]
+; CHECK: bl __subtf3
+
+  ret fp128 %ret
+; CHECK: ret
+}
diff --git a/test/CodeGen/AArch64/fpimm.ll b/test/CodeGen/AArch64/fpimm.ll
new file mode 100644
index 000000000000..fd28aeef9291
--- /dev/null
+++ b/test/CodeGen/AArch64/fpimm.ll
@@ -0,0 +1,34 @@
+; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu | FileCheck %s
+
+@varf32 = global float 0.0
+@varf64 = global double 0.0
+
+define void @check_float() {
+; CHECK: check_float:
+
+  %val = load float* @varf32
+  %newval1 = fadd float %val, 8.5
+  store volatile float %newval1, float* @varf32
+; CHECK: fmov {{s[0-9]+}}, #8.5
+
+  %newval2 = fadd float %val, 128.0
+  store volatile float %newval2, float* @varf32
+; CHECK: ldr {{s[0-9]+}}, [{{x[0-9]+}}, #:lo12:.LCPI0_0
+
+  ret void
+}
+
+define void @check_double() {
+; CHECK: check_double:
+
+  %val = load double* @varf64
+  %newval1 = fadd double %val, 8.5
+  store volatile double %newval1, double* @varf64
+; CHECK: fmov {{d[0-9]+}}, #8.5
+
+  %newval2 = fadd double %val, 128.0
+  store volatile double %newval2, double* @varf64
+; CHECK: ldr {{d[0-9]+}}, [{{x[0-9]+}}, #:lo12:.LCPI1_0
+
+  ret void
+}
diff --git a/test/CodeGen/AArch64/func-argpassing.ll b/test/CodeGen/AArch64/func-argpassing.ll
new file mode 100644
index 000000000000..78fde6a3c33a
--- /dev/null
+++ b/test/CodeGen/AArch64/func-argpassing.ll
@@ -0,0 +1,193 @@
+; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu | FileCheck %s
+
+%myStruct = type { i64 , i8, i32 }
+
+@var8 = global i8 0
+@var32 = global i32 0
+@var64 = global i64 0
+@var128 = global i128 0
+@varfloat = global float 0.0
+@vardouble = global double 0.0
+@varstruct = global %myStruct zeroinitializer
+
+define void @take_i8s(i8 %val1, i8 %val2) {
+; CHECK: take_i8s:
+    store i8 %val2, i8* @var8
+    ; Not using w1 may be technically allowed, but it would indicate a
+    ; problem in itself.
+;  CHECK: strb w1, [{{x[0-9]+}}, #:lo12:var8]
+    ret void
+}
+
+define void @add_floats(float %val1, float %val2) {
+; CHECK: add_floats:
+    %newval = fadd float %val1, %val2
+; CHECK: fadd [[ADDRES:s[0-9]+]], s0, s1
+    store float %newval, float* @varfloat
+; CHECK: str [[ADDRES]], [{{x[0-9]+}}, #:lo12:varfloat]
+    ret void
+}
+
+; byval pointers should be allocated to the stack and copied as if
+; with memcpy.
+define void @take_struct(%myStruct* byval %structval) {
+; CHECK: take_struct:
+    %addr0 = getelementptr %myStruct* %structval, i64 0, i32 2
+    %addr1 = getelementptr %myStruct* %structval, i64 0, i32 0
+
+    %val0 = load i32* %addr0
+    ; Some weird move means x0 is used for one access
+; CHECK: ldr [[REG32:w[0-9]+]], [{{x[0-9]+|sp}}, #12]
+    store i32 %val0, i32* @var32
+; CHECK: str [[REG32]], [{{x[0-9]+}}, #:lo12:var32]
+
+    %val1 = load i64* %addr1
+; CHECK: ldr [[REG64:x[0-9]+]], [{{x[0-9]+|sp}}]
+    store i64 %val1, i64* @var64
+; CHECK: str [[REG64]], [{{x[0-9]+}}, #:lo12:var64]
+
+    ret void
+}
+
+; %structval should be at sp + 16
+define void @check_byval_align(i32* byval %ignore, %myStruct* byval align 16 %structval) {
+; CHECK: check_byval_align:
+
+    %addr0 = getelementptr %myStruct* %structval, i64 0, i32 2
+    %addr1 = getelementptr %myStruct* %structval, i64 0, i32 0
+
+    %val0 = load i32* %addr0
+    ; Some weird move means x0 is used for one access
+; CHECK: add x[[STRUCTVAL_ADDR:[0-9]+]], sp, #16
+; CHECK: ldr [[REG32:w[0-9]+]], [x[[STRUCTVAL_ADDR]], #12]
+    store i32 %val0, i32* @var32
+; CHECK: str [[REG32]], [{{x[0-9]+}}, #:lo12:var32]
+
+    %val1 = load i64* %addr1
+; CHECK: ldr [[REG64:x[0-9]+]], [sp, #16]
+    store i64 %val1, i64* @var64
+; CHECK: str [[REG64]], [{{x[0-9]+}}, #:lo12:var64]
+
+    ret void
+}
+
+define i32 @return_int() {
+; CHECK: return_int:
+    %val = load i32* @var32
+    ret i32 %val
+; CHECK: ldr w0, [{{x[0-9]+}}, #:lo12:var32]
+    ; Make sure epilogue follows
+; CHECK-NEXT: ret
+}
+
+define double @return_double() {
+; CHECK: return_double:
+    ret double 3.14
+; CHECK: ldr d0, [{{x[0-9]+}}, #:lo12:.LCPI
+}
+
+; This is the kind of IR clang will produce for returning a struct
+; small enough to go into registers. Not all that pretty, but it
+; works.
+define [2 x i64] @return_struct() {
+; CHECK: return_struct:
+    %addr = bitcast %myStruct* @varstruct to [2 x i64]*
+    %val = load [2 x i64]* %addr
+    ret [2 x i64] %val
+; CHECK: ldr x0, [{{x[0-9]+}}, #:lo12:varstruct]
+    ; Odd register regex below disallows x0 which we want to be live now.
+; CHECK: add {{x[1-9][0-9]*}}, {{x[1-9][0-9]*}}, #:lo12:varstruct
+; CHECK-NEXT: ldr x1, [{{x[1-9][0-9]*}}, #8]
+    ; Make sure epilogue immediately follows
+; CHECK-NEXT: ret
+}
+
+; Large structs are passed by reference (storage allocated by caller
+; to preserve value semantics) in x8. Strictly this only applies to
+; structs larger than 16 bytes, but C semantics can still be provided
+; if LLVM does it to %myStruct too. So this is the simplest check
+define void @return_large_struct(%myStruct* sret %retval) {
+; CHECK: return_large_struct:
+    %addr0 = getelementptr %myStruct* %retval, i64 0, i32 0
+    %addr1 = getelementptr %myStruct* %retval, i64 0, i32 1
+    %addr2 = getelementptr %myStruct* %retval, i64 0, i32 2
+
+    store i64 42, i64* %addr0
+    store i8 2, i8* %addr1
+    store i32 9, i32* %addr2
+; CHECK: str {{x[0-9]+}}, [x8]
+; CHECK: strb {{w[0-9]+}}, [x8, #8]
+; CHECK: str {{w[0-9]+}}, [x8, #12]
+
+    ret void
+}
+
+; This struct is just too far along to go into registers: (only x7 is
+; available, but it needs two). Also make sure that %stacked doesn't
+; sneak into x7 behind.
+define i32 @struct_on_stack(i8 %var0, i16 %var1, i32 %var2, i64 %var3, i128 %var45,
+                          i32* %var6, %myStruct* byval %struct, i32* byval %stacked,
+                          double %notstacked) {
+; CHECK: struct_on_stack:
+    %addr = getelementptr %myStruct* %struct, i64 0, i32 0
+    %val64 = load i64* %addr
+    store i64 %val64, i64* @var64
+    ; Currently nothing on local stack, so struct should be at sp
+; CHECK: ldr [[VAL64:x[0-9]+]], [sp]
+; CHECK: str [[VAL64]], [{{x[0-9]+}}, #:lo12:var64]
+
+    store double %notstacked, double* @vardouble
+; CHECK-NOT: ldr d0
+; CHECK: str d0, [{{x[0-9]+}}, #:lo12:vardouble
+
+    %retval = load i32* %stacked
+    ret i32 %retval
+; CHECK: ldr w0, [sp, #16]
+}
+
+define void @stacked_fpu(float %var0, double %var1, float %var2, float %var3,
+                         float %var4, float %var5, float %var6, float %var7,
+                         float %var8) {
+; CHECK: stacked_fpu:
+    store float %var8, float* @varfloat
+    ; Beware as above: the offset would be different on big-endian
+    ; machines if the first ldr were changed to use s-registers.
+; CHECK: ldr d[[VALFLOAT:[0-9]+]], [sp]
+; CHECK: str s[[VALFLOAT]], [{{x[0-9]+}}, #:lo12:varfloat]
+
+    ret void
+}
+
+; 128-bit integer types should be passed in xEVEN, xODD rather than
+; the reverse. In this case x2 and x3. Nothing should use x1.
+define i32 @check_i128_regalign(i32 %val0, i128 %val1, i32 %val2) {
+; CHECK: check_i128_regalign
+    store i128 %val1, i128* @var128
+; CHECK: str x2, [{{x[0-9]+}}, #:lo12:var128]
+; CHECK: str x3, [{{x[0-9]+}}, #8]
+
+    ret i32 %val2
+; CHECK: mov x0, x4
+}
+
+define void @check_i128_stackalign(i32 %val0, i32 %val1, i32 %val2, i32 %val3,
+                                   i32 %val4, i32 %val5, i32 %val6, i32 %val7,
+                                   i32 %stack1, i128 %stack2) {
+; CHECK: check_i128_stackalign
+    store i128 %stack2, i128* @var128
+    ; Nothing local on stack in current codegen, so first stack is 16 away
+; CHECK: ldr {{x[0-9]+}}, [sp, #16]
+    ; Important point is that we address sp+24 for second dword
+; CHECK: add     [[REG:x[0-9]+]], sp, #16
+; CHECK: ldr     {{x[0-9]+}}, {{\[}}[[REG]], #8]
+    ret void
+}
+
+declare void @llvm.memcpy.p0i8.p0i8.i32(i8*, i8*, i32, i32, i1)
+
+define i32 @test_extern() {
+; CHECK: test_extern:
+  call void @llvm.memcpy.p0i8.p0i8.i32(i8* undef, i8* undef, i32 undef, i32 4, i1 0)
+; CHECK: bl memcpy
+  ret i32 0
+}
diff --git a/test/CodeGen/AArch64/func-calls.ll b/test/CodeGen/AArch64/func-calls.ll
new file mode 100644
index 000000000000..13b689c40886
--- /dev/null
+++ b/test/CodeGen/AArch64/func-calls.ll
@@ -0,0 +1,140 @@
+; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu | FileCheck %s
+
+%myStruct = type { i64 , i8, i32 }
+
+@var8 = global i8 0
+@var8_2 = global i8 0
+@var32 = global i32 0
+@var64 = global i64 0
+@var128 = global i128 0
+@varfloat = global float 0.0
+@varfloat_2 = global float 0.0
+@vardouble = global double 0.0
+@varstruct = global %myStruct zeroinitializer
+@varsmallstruct = global [2 x i64] zeroinitializer
+
+declare void @take_i8s(i8 %val1, i8 %val2)
+declare void @take_floats(float %val1, float %val2)
+
+define void @simple_args() {
+; CHECK: simple_args:
+  %char1 = load i8* @var8
+  %char2 = load i8* @var8_2
+  call void @take_i8s(i8 %char1, i8 %char2)
+; CHECK: ldrb w0, [{{x[0-9]+}}, #:lo12:var8]
+; CHECK: ldrb w1, [{{x[0-9]+}}, #:lo12:var8_2]
+; CHECK: bl take_i8s
+
+  %float1 = load float* @varfloat
+  %float2 = load float* @varfloat_2
+  call void @take_floats(float %float1, float %float2)
+; CHECK: ldr s1, [{{x[0-9]+}}, #:lo12:varfloat_2]
+; CHECK: ldr s0, [{{x[0-9]+}}, #:lo12:varfloat]
+; CHECK: bl take_floats
+
+  ret void
+}
+
+declare i32 @return_int()
+declare double @return_double()
+declare [2 x i64] @return_smallstruct()
+declare void @return_large_struct(%myStruct* sret %retval)
+
+define void @simple_rets() {
+; CHECK: simple_rets:
+
+  %int = call i32 @return_int()
+  store i32 %int, i32* @var32
+; CHECK: bl return_int
+; CHECK: str w0, [{{x[0-9]+}}, #:lo12:var32]
+
+  %dbl = call double @return_double()
+  store double %dbl, double* @vardouble
+; CHECK: bl return_double
+; CHECK: str d0, [{{x[0-9]+}}, #:lo12:vardouble]
+
+  %arr = call [2 x i64] @return_smallstruct()
+  store [2 x i64] %arr, [2 x i64]* @varsmallstruct
+; CHECK: bl return_smallstruct
+; CHECK: str x1, [{{x[0-9]+}}, #8]
+; CHECK: str x0, [{{x[0-9]+}}, #:lo12:varsmallstruct]
+
+  call void @return_large_struct(%myStruct* sret @varstruct)
+; CHECK: add x8, {{x[0-9]+}}, #:lo12:varstruct
+; CHECK: bl return_large_struct
+
+  ret void
+}
+
+
+declare i32 @struct_on_stack(i8 %var0, i16 %var1, i32 %var2, i64 %var3, i128 %var45,
+                             i32* %var6, %myStruct* byval %struct, i32 %stacked,
+                             double %notstacked)
+declare void @stacked_fpu(float %var0, double %var1, float %var2, float %var3,
+                          float %var4, float %var5, float %var6, float %var7,
+                          float %var8)
+
+define void @check_stack_args() {
+  call i32 @struct_on_stack(i8 0, i16 12, i32 42, i64 99, i128 1,
+                            i32* @var32, %myStruct* byval @varstruct,
+                            i32 999, double 1.0)
+  ; Want to check that the final double is passed in registers and
+  ; that varstruct is passed on the stack. Rather dependent on how a
+  ; memcpy gets created, but the following works for now.
+; CHECK: mov x0, sp
+; CHECK: str {{w[0-9]+}}, [x0]
+; CHECK: str {{w[0-9]+}}, [x0, #12]
+; CHECK: fmov d0,
+; CHECK: bl struct_on_stack
+
+  call void @stacked_fpu(float -1.0, double 1.0, float 4.0, float 2.0,
+                         float -2.0, float -8.0, float 16.0, float 1.0,
+                         float 64.0)
+; CHECK: ldr s[[STACKEDREG:[0-9]+]], [{{x[0-9]+}}, #:lo12:.LCPI
+; CHECK: mov x0, sp
+; CHECK: str d[[STACKEDREG]], [x0]
+; CHECK: bl stacked_fpu
+  ret void
+}
+
+
+declare void @check_i128_stackalign(i32 %val0, i32 %val1, i32 %val2, i32 %val3,
+                                    i32 %val4, i32 %val5, i32 %val6, i32 %val7,
+                                    i32 %stack1, i128 %stack2)
+
+declare void @check_i128_regalign(i32 %val0, i128 %val1)
+
+
+define void @check_i128_align() {
+; CHECK: check_i128_align:
+  %val = load i128* @var128
+  call void @check_i128_stackalign(i32 0, i32 1, i32 2, i32 3,
+                                   i32 4, i32 5, i32 6, i32 7,
+                                   i32 42, i128 %val)
+; CHECK: ldr [[I128LO:x[0-9]+]], [{{x[0-9]+}}, #:lo12:var128]
+; CHECK: ldr [[I128HI:x[0-9]+]], [{{x[0-9]+}}, #8]
+; CHECK: mov x[[SPREG:[0-9]+]], sp
+; CHECK: str [[I128HI]], [x[[SPREG]], #24]
+; CHECK: str [[I128LO]], [x[[SPREG]], #16]
+; CHECK: bl check_i128_stackalign
+
+  call void @check_i128_regalign(i32 0, i128 42)
+; CHECK-NOT: mov x1
+; CHECK: movz x2, #42
+; CHECK: mov x3, xzr
+; CHECK: bl check_i128_regalign
+
+  ret void
+}
+
+@fptr = global void()* null
+
+define void @check_indirect_call() {
+; CHECK: check_indirect_call:
+  %func = load void()** @fptr
+  call void %func()
+; CHECK: ldr [[FPTR:x[0-9]+]], [{{x[0-9]+}}, #:lo12:fptr]
+; CHECK: blr [[FPTR]]
+
+  ret void
+}
diff --git a/test/CodeGen/AArch64/global-alignment.ll b/test/CodeGen/AArch64/global-alignment.ll
new file mode 100644
index 000000000000..8ed6e551cdeb
--- /dev/null
+++ b/test/CodeGen/AArch64/global-alignment.ll
@@ -0,0 +1,69 @@
+; RUN: llc -mtriple=aarch64-none-linux-gnu -verify-machineinstrs < %s | FileCheck %s
+
+@var32 = global [3 x i32] zeroinitializer
+@var64 = global [3 x i64] zeroinitializer
+@var32_align64 = global [3 x i32] zeroinitializer, align 8
+
+define i64 @test_align32() {
+; CHECK: test_align32:
+  %addr = bitcast [3 x i32]* @var32 to i64*
+
+  ; Since @var32 is only guaranteed to be aligned to 32-bits, it's invalid to
+  ; emit an "LDR x0, [x0, #:lo12:var32] instruction to implement this load.
+  %val = load i64* %addr
+; CHECK: adrp [[HIBITS:x[0-9]+]], var32
+; CHECK: add x[[ADDR:[0-9]+]], [[HIBITS]], #:lo12:var32
+; CHECK: ldr x0, [x[[ADDR]]]
+
+  ret i64 %val
+}
+
+define i64 @test_align64() {
+; CHECK: test_align64:
+  %addr = bitcast [3 x i64]* @var64 to i64*
+
+  ; However, var64 *is* properly aligned and emitting an adrp/add/ldr would be
+  ; inefficient.
+  %val = load i64* %addr
+; CHECK: adrp x[[HIBITS:[0-9]+]], var64
+; CHECK-NOT: add x[[HIBITS]]
+; CHECK: ldr x0, [x[[HIBITS]], #:lo12:var64]
+
+  ret i64 %val
+}
+
+define i64 @test_var32_align64() {
+; CHECK: test_var32_align64:
+  %addr = bitcast [3 x i32]* @var32_align64 to i64*
+
+  ; Since @var32 is only guaranteed to be aligned to 32-bits, it's invalid to
+  ; emit an "LDR x0, [x0, #:lo12:var32] instruction to implement this load.
+  %val = load i64* %addr
+; CHECK: adrp x[[HIBITS:[0-9]+]], var32_align64
+; CHECK-NOT: add x[[HIBITS]]
+; CHECK: ldr x0, [x[[HIBITS]], #:lo12:var32_align64]
+
+  ret i64 %val
+}
+
+@yet_another_var = external global {i32, i32}
+
+define i64 @test_yet_another_var() {
+; CHECK: test_yet_another_var:
+
+  ; @yet_another_var has a preferred alignment of 8, but that's not enough if
+  ; we're going to be linking against other things. Its ABI alignment is only 4
+  ; so we can't fold the load.
+  %val = load i64* bitcast({i32, i32}* @yet_another_var to i64*)
+; CHECK: adrp [[HIBITS:x[0-9]+]], yet_another_var
+; CHECK: add x[[ADDR:[0-9]+]], [[HIBITS]], #:lo12:yet_another_var
+; CHECK: ldr x0, [x[[ADDR]]]
+  ret i64 %val
+}
+
+define i64()* @test_functions() {
+; CHECK: test_functions:
+  ret i64()* @test_yet_another_var
+; CHECK: adrp [[HIBITS:x[0-9]+]], test_yet_another_var
+; CHECK: add x0, [[HIBITS]], #:lo12:test_yet_another_var
+}
diff --git a/test/CodeGen/AArch64/got-abuse.ll b/test/CodeGen/AArch64/got-abuse.ll
new file mode 100644
index 000000000000..c474e5845a64
--- /dev/null
+++ b/test/CodeGen/AArch64/got-abuse.ll
@@ -0,0 +1,23 @@
+; RUN: llc -mtriple=aarch64-none-linux-gnu -relocation-model=pic < %s | FileCheck %s
+; RUN: llc -mtriple=aarch64-none-linux-gnu -relocation-model=pic -filetype=obj < %s
+
+; LLVM gives well-defined semantics to this horrible construct (though C says
+; it's undefined). Regardless, we shouldn't crash. The important feature here is
+; that in general the only way to access a GOT symbol is via a 64-bit
+; load. Neither of these alternatives has the ELF relocations required to
+; support it:
+;    + ldr wD, [xN, #:got_lo12:func]
+;    + add xD, xN, #:got_lo12:func
+
+declare void @consume(i32)
+declare void @func()
+
+define void @foo() nounwind {
+; CHECK: foo:
+entry:
+  call void @consume(i32 ptrtoint (void ()* @func to i32))
+; CHECK: adrp x[[ADDRHI:[0-9]+]], :got:func
+; CHECK: ldr {{x[0-9]+}}, [x[[ADDRHI]], #:got_lo12:func]
+  ret void
+}
+
diff --git a/test/CodeGen/AArch64/i128-align.ll b/test/CodeGen/AArch64/i128-align.ll
new file mode 100644
index 000000000000..f019ea0a6706
--- /dev/null
+++ b/test/CodeGen/AArch64/i128-align.ll
@@ -0,0 +1,29 @@
+; RUN: llc -mtriple=aarch64-none-linux-gnu -verify-machineinstrs < %s | FileCheck %s
+
+%struct = type { i32, i128, i8 }
+
+@var = global %struct zeroinitializer
+
+define i64 @check_size() {
+; CHECK: check_size:
+  %starti = ptrtoint %struct* @var to i64
+
+  %endp = getelementptr %struct* @var, i64 1
+  %endi = ptrtoint %struct* %endp to i64
+
+  %diff = sub i64 %endi, %starti
+  ret i64 %diff
+; CHECK: movz x0, #48
+}
+
+define i64 @check_field() {
+; CHECK: check_field:
+  %starti = ptrtoint %struct* @var to i64
+
+  %endp = getelementptr %struct* @var, i64 0, i32 1
+  %endi = ptrtoint i128* %endp to i64
+
+  %diff = sub i64 %endi, %starti
+  ret i64 %diff
+; CHECK: movz x0, #16
+}
+\ No newline at end of file
diff --git a/test/CodeGen/AArch64/illegal-float-ops.ll b/test/CodeGen/AArch64/illegal-float-ops.ll
new file mode 100644
index 000000000000..446151b8ffac
--- /dev/null
+++ b/test/CodeGen/AArch64/illegal-float-ops.ll
@@ -0,0 +1,221 @@
+; RUN: llc -mtriple=aarch64-none-linux-gnu -verify-machineinstrs < %s | FileCheck %s
+
+@varfloat = global float 0.0
+@vardouble = global double 0.0
+@varfp128 = global fp128 zeroinitializer
+
+declare float @llvm.cos.f32(float)
+declare double @llvm.cos.f64(double)
+declare fp128 @llvm.cos.f128(fp128)
+
+define void @test_cos(float %float, double %double, fp128 %fp128) {
+; CHECK: test_cos:
+
+   %cosfloat = call float @llvm.cos.f32(float %float)
+   store float %cosfloat, float* @varfloat
+; CHECK: bl cosf
+
+   %cosdouble = call double @llvm.cos.f64(double %double)
+   store double %cosdouble, double* @vardouble
+; CHECK: bl cos
+
+   %cosfp128 = call fp128 @llvm.cos.f128(fp128 %fp128)
+   store fp128 %cosfp128, fp128* @varfp128
+; CHECK: bl cosl
+
+  ret void
+}
+
+declare float @llvm.exp.f32(float)
+declare double @llvm.exp.f64(double)
+declare fp128 @llvm.exp.f128(fp128)
+
+define void @test_exp(float %float, double %double, fp128 %fp128) {
+; CHECK: test_exp:
+
+   %expfloat = call float @llvm.exp.f32(float %float)
+   store float %expfloat, float* @varfloat
+; CHECK: bl expf
+
+   %expdouble = call double @llvm.exp.f64(double %double)
+   store double %expdouble, double* @vardouble
+; CHECK: bl exp
+
+   %expfp128 = call fp128 @llvm.exp.f128(fp128 %fp128)
+   store fp128 %expfp128, fp128* @varfp128
+; CHECK: bl expl
+
+  ret void
+}
+
+declare float @llvm.exp2.f32(float)
+declare double @llvm.exp2.f64(double)
+declare fp128 @llvm.exp2.f128(fp128)
+
+define void @test_exp2(float %float, double %double, fp128 %fp128) {
+; CHECK: test_exp2:
+
+   %exp2float = call float @llvm.exp2.f32(float %float)
+   store float %exp2float, float* @varfloat
+; CHECK: bl exp2f
+
+   %exp2double = call double @llvm.exp2.f64(double %double)
+   store double %exp2double, double* @vardouble
+; CHECK: bl exp2
+
+   %exp2fp128 = call fp128 @llvm.exp2.f128(fp128 %fp128)
+   store fp128 %exp2fp128, fp128* @varfp128
+; CHECK: bl exp2l
+  ret void
+
+}
+
+declare float @llvm.log.f32(float)
+declare double @llvm.log.f64(double)
+declare fp128 @llvm.log.f128(fp128)
+
+define void @test_log(float %float, double %double, fp128 %fp128) {
+; CHECK: test_log:
+
+   %logfloat = call float @llvm.log.f32(float %float)
+   store float %logfloat, float* @varfloat
+; CHECK: bl logf
+
+   %logdouble = call double @llvm.log.f64(double %double)
+   store double %logdouble, double* @vardouble
+; CHECK: bl log
+
+   %logfp128 = call fp128 @llvm.log.f128(fp128 %fp128)
+   store fp128 %logfp128, fp128* @varfp128
+; CHECK: bl logl
+
+  ret void
+}
+
+declare float @llvm.log2.f32(float)
+declare double @llvm.log2.f64(double)
+declare fp128 @llvm.log2.f128(fp128)
+
+define void @test_log2(float %float, double %double, fp128 %fp128) {
+; CHECK: test_log2:
+
+   %log2float = call float @llvm.log2.f32(float %float)
+   store float %log2float, float* @varfloat
+; CHECK: bl log2f
+
+   %log2double = call double @llvm.log2.f64(double %double)
+   store double %log2double, double* @vardouble
+; CHECK: bl log2
+
+   %log2fp128 = call fp128 @llvm.log2.f128(fp128 %fp128)
+   store fp128 %log2fp128, fp128* @varfp128
+; CHECK: bl log2l
+  ret void
+
+}
+
+declare float @llvm.log10.f32(float)
+declare double @llvm.log10.f64(double)
+declare fp128 @llvm.log10.f128(fp128)
+
+define void @test_log10(float %float, double %double, fp128 %fp128) {
+; CHECK: test_log10:
+
+   %log10float = call float @llvm.log10.f32(float %float)
+   store float %log10float, float* @varfloat
+; CHECK: bl log10f
+
+   %log10double = call double @llvm.log10.f64(double %double)
+   store double %log10double, double* @vardouble
+; CHECK: bl log10
+
+   %log10fp128 = call fp128 @llvm.log10.f128(fp128 %fp128)
+   store fp128 %log10fp128, fp128* @varfp128
+; CHECK: bl log10l
+
+  ret void
+}
+
+declare float @llvm.sin.f32(float)
+declare double @llvm.sin.f64(double)
+declare fp128 @llvm.sin.f128(fp128)
+
+define void @test_sin(float %float, double %double, fp128 %fp128) {
+; CHECK: test_sin:
+
+   %sinfloat = call float @llvm.sin.f32(float %float)
+   store float %sinfloat, float* @varfloat
+; CHECK: bl sinf
+
+   %sindouble = call double @llvm.sin.f64(double %double)
+   store double %sindouble, double* @vardouble
+; CHECK: bl sin
+
+   %sinfp128 = call fp128 @llvm.sin.f128(fp128 %fp128)
+   store fp128 %sinfp128, fp128* @varfp128
+; CHECK: bl sinl
+  ret void
+
+}
+
+declare float @llvm.pow.f32(float, float)
+declare double @llvm.pow.f64(double, double)
+declare fp128 @llvm.pow.f128(fp128, fp128)
+
+define void @test_pow(float %float, double %double, fp128 %fp128) {
+; CHECK: test_pow:
+
+   %powfloat = call float @llvm.pow.f32(float %float, float %float)
+   store float %powfloat, float* @varfloat
+; CHECK: bl powf
+
+   %powdouble = call double @llvm.pow.f64(double %double, double %double)
+   store double %powdouble, double* @vardouble
+; CHECK: bl pow
+
+   %powfp128 = call fp128 @llvm.pow.f128(fp128 %fp128, fp128 %fp128)
+   store fp128 %powfp128, fp128* @varfp128
+; CHECK: bl powl
+
+  ret void
+}
+
+declare float @llvm.powi.f32(float, i32)
+declare double @llvm.powi.f64(double, i32)
+declare fp128 @llvm.powi.f128(fp128, i32)
+
+define void @test_powi(float %float, double %double, i32 %exponent, fp128 %fp128) {
+; CHECK: test_powi:
+
+   %powifloat = call float @llvm.powi.f32(float %float, i32 %exponent)
+   store float %powifloat, float* @varfloat
+; CHECK: bl __powisf2
+
+   %powidouble = call double @llvm.powi.f64(double %double, i32 %exponent)
+   store double %powidouble, double* @vardouble
+; CHECK: bl __powidf2
+
+   %powifp128 = call fp128 @llvm.powi.f128(fp128 %fp128, i32 %exponent)
+   store fp128 %powifp128, fp128* @varfp128
+; CHECK: bl __powitf2
+  ret void
+
+}
+
+define void @test_frem(float %float, double %double, fp128 %fp128) {
+; CHECK: test_frem:
+
+  %fremfloat = frem float %float, %float
+  store float %fremfloat, float* @varfloat
+; CHECK: bl fmodf
+
+  %fremdouble = frem double %double, %double
+  store double %fremdouble, double* @vardouble
+; CHECK: bl fmod
+
+  %fremfp128 = frem fp128 %fp128, %fp128
+  store fp128 %fremfp128, fp128* @varfp128
+; CHECK: bl fmodl
+
+  ret void
+}
diff --git a/test/CodeGen/AArch64/init-array.ll b/test/CodeGen/AArch64/init-array.ll
new file mode 100644
index 000000000000..d80be8f3a639
--- /dev/null
+++ b/test/CodeGen/AArch64/init-array.ll
@@ -0,0 +1,9 @@
+; RUN: llc -mtriple=aarch64-none-linux-gnu -verify-machineinstrs -use-init-array < %s | FileCheck %s
+
+define internal void @_GLOBAL__I_a() section ".text.startup" {
+  ret void
+}
+
+@llvm.global_ctors = appending global [1 x { i32, void ()* }] [{ i32, void ()* } { i32 65535, void ()* @_GLOBAL__I_a }]
+
+; CHECK: .section .init_array
+\ No newline at end of file
diff --git a/test/CodeGen/AArch64/inline-asm-constraints-badI.ll b/test/CodeGen/AArch64/inline-asm-constraints-badI.ll
new file mode 100644
index 000000000000..c39c57f05822
--- /dev/null
+++ b/test/CodeGen/AArch64/inline-asm-constraints-badI.ll
@@ -0,0 +1,7 @@
+; RUN: not llc -mtriple=aarch64-none-linux-gnu < %s
+
+define void @foo() {
+  ; Out of range immediate for I.
+  call void asm sideeffect "add x0, x0, $0", "I"(i32 4096)
+  ret void
+}
+\ No newline at end of file
diff --git a/test/CodeGen/AArch64/inline-asm-constraints-badK.ll b/test/CodeGen/AArch64/inline-asm-constraints-badK.ll
new file mode 100644
index 000000000000..47c5f98bf009
--- /dev/null
+++ b/test/CodeGen/AArch64/inline-asm-constraints-badK.ll
@@ -0,0 +1,7 @@
+; RUN: not llc -mtriple=aarch64-none-linux-gnu < %s
+
+define void @foo() {
+  ; 32-bit bitpattern ending in 1101 can't be produced.
+  call void asm sideeffect "and w0, w0, $0", "K"(i32 13)
+  ret void
+}
+\ No newline at end of file
diff --git a/test/CodeGen/AArch64/inline-asm-constraints-badK2.ll b/test/CodeGen/AArch64/inline-asm-constraints-badK2.ll
new file mode 100644
index 000000000000..7a5b99e23b3d
--- /dev/null
+++ b/test/CodeGen/AArch64/inline-asm-constraints-badK2.ll
@@ -0,0 +1,7 @@
+; RUN: not llc -mtriple=aarch64-none-linux-gnu < %s
+
+define void @foo() {
+  ; 32-bit bitpattern ending in 1101 can't be produced.
+  call void asm sideeffect "and w0, w0, $0", "K"(i64 4294967296)
+  ret void
+}
+\ No newline at end of file
diff --git a/test/CodeGen/AArch64/inline-asm-constraints-badL.ll b/test/CodeGen/AArch64/inline-asm-constraints-badL.ll
new file mode 100644
index 000000000000..4f0039865a35
--- /dev/null
+++ b/test/CodeGen/AArch64/inline-asm-constraints-badL.ll
@@ -0,0 +1,7 @@
+; RUN: not llc -mtriple=aarch64-none-linux-gnu < %s
+
+define void @foo() {
+  ; 32-bit bitpattern ending in 1101 can't be produced.
+  call void asm sideeffect "and x0, x0, $0", "L"(i32 13)
+  ret void
+}
+\ No newline at end of file
diff --git a/test/CodeGen/AArch64/inline-asm-constraints.ll b/test/CodeGen/AArch64/inline-asm-constraints.ll
new file mode 100644
index 000000000000..c232f3208cfa
--- /dev/null
+++ b/test/CodeGen/AArch64/inline-asm-constraints.ll
@@ -0,0 +1,117 @@
+; RUN: llc -mtriple=aarch64-none-linux-gnu < %s | FileCheck %s
+
+define i64 @test_inline_constraint_r(i64 %base, i32 %offset) {
+; CHECK: test_inline_constraint_r:
+  %val = call i64 asm "add $0, $1, $2, sxtw", "=r,r,r"(i64 %base, i32 %offset)
+; CHECK: add {{x[0-9]+}}, {{x[0-9]+}}, {{w[0-9]+}}, sxtw
+  ret i64 %val
+}
+
+define i16 @test_small_reg(i16 %lhs, i16 %rhs) {
+; CHECK: test_small_reg:
+  %val = call i16 asm sideeffect "add $0, $1, $2, sxth", "=r,r,r"(i16 %lhs, i16 %rhs)
+; CHECK: add {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, sxth
+  ret i16 %val
+}
+
+define i64 @test_inline_constraint_r_imm(i64 %base, i32 %offset) {
+; CHECK: test_inline_constraint_r_imm:
+  %val = call i64 asm "add $0, $1, $2, sxtw", "=r,r,r"(i64 4, i32 12)
+; CHECK: movz [[FOUR:x[0-9]+]], #4
+; CHECK: movz [[TWELVE:w[0-9]+]], #12
+; CHECK: add {{x[0-9]+}}, [[FOUR]], [[TWELVE]], sxtw
+  ret i64 %val
+}
+
+; m is permitted to have a base/offset form. We don't do that
+; currently though.
+define i32 @test_inline_constraint_m(i32 *%ptr) {
+; CHECK: test_inline_constraint_m:
+  %val = call i32 asm "ldr $0, $1", "=r,m"(i32 *%ptr)
+; CHECK: ldr {{w[0-9]+}}, [{{x[0-9]+}}]
+  ret i32 %val
+}
+
+@arr = global [8 x i32] zeroinitializer
+
+; Q should *never* have base/offset form even if given the chance.
+define i32 @test_inline_constraint_Q(i32 *%ptr) {
+; CHECK: test_inline_constraint_Q:
+  %val = call i32 asm "ldr $0, $1", "=r,Q"(i32* getelementptr([8 x i32]* @arr, i32 0, i32 1))
+; CHECK: ldr {{w[0-9]+}}, [{{x[0-9]+}}]
+  ret i32 %val
+}
+
+@dump = global fp128 zeroinitializer
+
+define void @test_inline_constraint_I() {
+; CHECK: test_inline_constraint_I:
+  call void asm sideeffect "add x0, x0, $0", "I"(i32 0)
+  call void asm sideeffect "add x0, x0, $0", "I"(i64 4095)
+; CHECK: add x0, x0, #0
+; CHECK: add x0, x0, #4095
+
+  ret void
+}
+
+; Skip J because it's useless
+
+define void @test_inline_constraint_K() {
+; CHECK: test_inline_constraint_K:
+  call void asm sideeffect "and w0, w0, $0", "K"(i32 2863311530) ; = 0xaaaaaaaa
+  call void asm sideeffect "and w0, w0, $0", "K"(i32 65535)
+; CHECK: and w0, w0, #-1431655766
+; CHECK: and w0, w0, #65535
+
+  ret void
+}
+
+define void @test_inline_constraint_L() {
+; CHECK: test_inline_constraint_L:
+  call void asm sideeffect "and x0, x0, $0", "L"(i64 4294967296) ; = 0xaaaaaaaa
+  call void asm sideeffect "and x0, x0, $0", "L"(i64 65535)
+; CHECK: and x0, x0, #4294967296
+; CHECK: and x0, x0, #65535
+
+  ret void
+}
+
+; Skip M and N because we don't support MOV pseudo-instructions yet.
+
+@var = global i32 0
+
+define void @test_inline_constraint_S() {
+; CHECK: test_inline_constraint_S:
+  call void asm sideeffect "adrp x0, $0", "S"(i32* @var)
+  call void asm sideeffect "adrp x0, ${0:A}", "S"(i32* @var)
+  call void asm sideeffect "add x0, x0, ${0:L}", "S"(i32* @var)
+; CHECK: adrp x0, var
+; CHECK: adrp x0, var
+; CHECK: add x0, x0, #:lo12:var
+  ret void
+}
+
+define i32 @test_inline_constraint_S_label(i1 %in) {
+; CHECK: test_inline_constraint_S_label:
+  call void asm sideeffect "adr x0, $0", "S"(i8* blockaddress(@test_inline_constraint_S_label, %loc))
+; CHECK: adr x0, .Ltmp{{[0-9]+}}
+  br i1 %in, label %loc, label %loc2
+loc:
+  ret i32 0
+loc2:
+  ret i32 42
+}
+
+define void @test_inline_constraint_Y() {
+; CHECK: test_inline_constraint_Y:
+  call void asm sideeffect "fcmp s0, $0", "Y"(float 0.0)
+; CHECK: fcmp s0, #0.0
+  ret void
+}
+
+define void @test_inline_constraint_Z() {
+; CHECK: test_inline_constraint_Z:
+  call void asm sideeffect "cmp w0, $0", "Z"(i32 0)
+; CHECK: cmp w0, #0
+  ret void
+}
+\ No newline at end of file
diff --git a/test/CodeGen/AArch64/inline-asm-modifiers.ll b/test/CodeGen/AArch64/inline-asm-modifiers.ll
new file mode 100644
index 000000000000..3b55945561eb
--- /dev/null
+++ b/test/CodeGen/AArch64/inline-asm-modifiers.ll
@@ -0,0 +1,125 @@
+; RUN: llc -mtriple=aarch64-none-linux-gnu -relocation-model=pic < %s | FileCheck %s
+; RUN: llc -mtriple=aarch64-none-linux-gnu -relocation-model=pic -filetype=obj < %s | llvm-objdump -r - | FileCheck --check-prefix=CHECK-ELF %s
+
+@var_simple = hidden global i32 0
+@var_got = global i32 0
+@var_tlsgd = thread_local global i32 0
+@var_tlsld = thread_local(localdynamic) global i32 0
+@var_tlsie = thread_local(initialexec) global i32 0
+@var_tlsle = thread_local(localexec) global i32 0
+
+define void @test_inline_modifier_L() nounwind {
+; CHECK: test_inline_modifier_L:
+  call void asm sideeffect "add x0, x0, ${0:L}", "S,~{x0}"(i32* @var_simple)
+  call void asm sideeffect "ldr x0, [x0, ${0:L}]", "S,~{x0}"(i32* @var_got)
+  call void asm sideeffect "add x0, x0, ${0:L}", "S,~{x0}"(i32* @var_tlsgd)
+  call void asm sideeffect "add x0, x0, ${0:L}", "S,~{x0}"(i32* @var_tlsld)
+  call void asm sideeffect "ldr x0, [x0, ${0:L}]", "S,~{x0}"(i32* @var_tlsie)
+  call void asm sideeffect "add x0, x0, ${0:L}", "S,~{x0}"(i32* @var_tlsle)
+; CHECK: add x0, x0, #:lo12:var_simple
+; CHECK: ldr x0, [x0, #:got_lo12:var_got]
+; CHECK: add x0, x0, #:tlsdesc_lo12:var_tlsgd
+; CHECK: add x0, x0, #:dtprel_lo12:var_tlsld
+; CHECK: ldr x0, [x0, #:gottprel_lo12:var_tlsie]
+; CHECK: add x0, x0, #:tprel_lo12:var_tlsle
+
+; CHECK-ELF: R_AARCH64_ADD_ABS_LO12_NC var_simple
+; CHECK-ELF: R_AARCH64_LD64_GOT_LO12_NC var_got
+; CHECK-ELF: R_AARCH64_TLSDESC_ADD_LO12_NC var_tlsgd
+; CHECK-ELF: R_AARCH64_TLSLD_ADD_DTPREL_LO12 var_tlsld
+; CHECK-ELF: R_AARCH64_TLSIE_LD64_GOTTPREL_LO12_NC var_tlsie
+; CHECK-ELF: R_AARCH64_TLSLE_ADD_TPREL_LO12 var_tlsle
+
+  ret void
+}
+
+define void @test_inline_modifier_G() nounwind {
+; CHECK: test_inline_modifier_G:
+  call void asm sideeffect "add x0, x0, ${0:G}, lsl #12", "S,~{x0}"(i32* @var_tlsld)
+  call void asm sideeffect "add x0, x0, ${0:G}, lsl #12", "S,~{x0}"(i32* @var_tlsle)
+; CHECK: add x0, x0, #:dtprel_hi12:var_tlsld, lsl #12
+; CHECK: add x0, x0, #:tprel_hi12:var_tlsle, lsl #12
+
+; CHECK-ELF: R_AARCH64_TLSLD_ADD_DTPREL_HI12 var_tlsld
+; CHECK-ELF: R_AARCH64_TLSLE_ADD_TPREL_HI12 var_tlsle
+
+  ret void
+}
+
+define void @test_inline_modifier_A() nounwind {
+; CHECK: test_inline_modifier_A:
+  call void asm sideeffect "adrp x0, ${0:A}", "S,~{x0}"(i32* @var_simple)
+  call void asm sideeffect "adrp x0, ${0:A}", "S,~{x0}"(i32* @var_got)
+  call void asm sideeffect "adrp x0, ${0:A}", "S,~{x0}"(i32* @var_tlsgd)
+  call void asm sideeffect "adrp x0, ${0:A}", "S,~{x0}"(i32* @var_tlsie)
+  ; N.b. All tprel and dtprel relocs are modified: lo12 or granules.
+; CHECK: adrp x0, var_simple
+; CHECK: adrp x0, :got:var_got
+; CHECK: adrp x0, :tlsdesc:var_tlsgd
+; CHECK: adrp x0, :gottprel:var_tlsie
+
+; CHECK-ELF: R_AARCH64_ADR_PREL_PG_HI21 var_simple
+; CHECK-ELF: R_AARCH64_ADR_GOT_PAGE var_got
+; CHECK-ELF: R_AARCH64_TLSDESC_ADR_PAGE var_tlsgd
+; CHECK-ELF: R_AARCH64_TLSIE_ADR_GOTTPREL_PAGE21 var_tlsie
+
+  ret void
+}
+
+define void @test_inline_modifier_wx(i32 %small, i64 %big) nounwind {
+; CHECK: test_inline_modifier_wx:
+  call i32 asm sideeffect "add $0, $0, $0", "=r,0"(i32 %small)
+  call i32 asm sideeffect "add ${0:w}, ${0:w}, ${0:w}", "=r,0"(i32 %small)
+  call i32 asm sideeffect "add ${0:x}, ${0:x}, ${0:x}", "=r,0"(i32 %small)
+; CHECK: //APP
+; CHECK: add {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
+; CHECK: add {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
+; CHECK: add {{x[0-9]+}}, {{x[0-9]+}}, {{x[0-9]+}}
+
+  call i64 asm sideeffect "add $0, $0, $0", "=r,0"(i64 %big)
+  call i64 asm sideeffect "add ${0:w}, ${0:w}, ${0:w}", "=r,0"(i64 %big)
+  call i64 asm sideeffect "add ${0:x}, ${0:x}, ${0:x}", "=r,0"(i64 %big)
+; CHECK: add {{x[0-9]+}}, {{x[0-9]+}}, {{x[0-9]+}}
+; CHECK: add {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
+; CHECK: add {{x[0-9]+}}, {{x[0-9]+}}, {{x[0-9]+}}
+
+  call i32 asm sideeffect "add ${0:w}, ${1:w}, ${1:w}", "=r,r"(i32 0)
+  call i32 asm sideeffect "add ${0:x}, ${1:x}, ${1:x}", "=r,r"(i32 0)
+; CHECK: add {{w[0-9]+}}, wzr, wzr
+; CHECK: add {{x[0-9]+}}, xzr, xzr
+  ret void
+}
+
+define void @test_inline_modifier_bhsdq() nounwind {
+; CHECK: test_inline_modifier_bhsdq:
+  call float asm sideeffect "ldr ${0:b}, [sp]", "=w"()
+  call float asm sideeffect "ldr ${0:h}, [sp]", "=w"()
+  call float asm sideeffect "ldr ${0:s}, [sp]", "=w"()
+  call float asm sideeffect "ldr ${0:d}, [sp]", "=w"()
+  call float asm sideeffect "ldr ${0:q}, [sp]", "=w"()
+; CHECK: ldr b0, [sp]
+; CHECK: ldr h0, [sp]
+; CHECK: ldr s0, [sp]
+; CHECK: ldr d0, [sp]
+; CHECK: ldr q0, [sp]
+
+  call double asm sideeffect "ldr ${0:b}, [sp]", "=w"()
+  call double asm sideeffect "ldr ${0:h}, [sp]", "=w"()
+  call double asm sideeffect "ldr ${0:s}, [sp]", "=w"()
+  call double asm sideeffect "ldr ${0:d}, [sp]", "=w"()
+  call double asm sideeffect "ldr ${0:q}, [sp]", "=w"()
+; CHECK: ldr b0, [sp]
+; CHECK: ldr h0, [sp]
+; CHECK: ldr s0, [sp]
+; CHECK: ldr d0, [sp]
+; CHECK: ldr q0, [sp]
+  ret void
+}
+
+define void @test_inline_modifier_c() nounwind {
+; CHECK: test_inline_modifier_c:
+  call void asm sideeffect "adr x0, ${0:c}", "i"(i32 3)
+; CHECK: adr x0, 3
+
+  ret void
+}
+\ No newline at end of file
diff --git a/test/CodeGen/AArch64/jump-table.ll b/test/CodeGen/AArch64/jump-table.ll
new file mode 100644
index 000000000000..dcf9f4ed455c
--- /dev/null
+++ b/test/CodeGen/AArch64/jump-table.ll
@@ -0,0 +1,56 @@
+; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu | FileCheck %s
+; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu -filetype=obj | elf-dump | FileCheck %s -check-prefix=CHECK-ELF
+
+define i32 @test_jumptable(i32 %in) {
+; CHECK: test_jumptable
+
+  switch i32 %in, label %def [
+    i32 0, label %lbl1
+    i32 1, label %lbl2
+    i32 2, label %lbl3
+    i32 4, label %lbl4
+  ]
+; CHECK: adrp [[JTPAGE:x[0-9]+]], .LJTI0_0
+; CHECK: add x[[JT:[0-9]+]], [[JTPAGE]], #:lo12:.LJTI0_0
+; CHECK: ldr [[DEST:x[0-9]+]], [x[[JT]], {{x[0-9]+}}, lsl #3]
+; CHECK: br [[DEST]]
+
+def:
+  ret i32 0
+
+lbl1:
+  ret i32 1
+
+lbl2:
+  ret i32 2
+
+lbl3:
+  ret i32 4
+
+lbl4:
+  ret i32 8
+
+}
+
+; CHECK: .rodata
+
+; CHECK: .LJTI0_0:
+; CHECK-NEXT: .xword
+; CHECK-NEXT: .xword
+; CHECK-NEXT: .xword
+; CHECK-NEXT: .xword
+; CHECK-NEXT: .xword
+
+; ELF tests:
+
+; First make sure we get a page/lo12 pair in .text to pick up the jump-table
+; CHECK-ELF: .rela.text
+; CHECK-ELF: ('r_sym', 0x00000008)
+; CHECK-ELF-NEXT: ('r_type', 0x00000113)
+; CHECK-ELF: ('r_sym', 0x00000008)
+; CHECK-ELF-NEXT: ('r_type', 0x00000115)
+
+; Also check the targets in .rodata are relocated
+; CHECK-ELF: .rela.rodata
+; CHECK-ELF: ('r_sym', 0x00000005)
+; CHECK-ELF-NEXT: ('r_type', 0x00000101)
+\ No newline at end of file
diff --git a/test/CodeGen/AArch64/large-frame.ll b/test/CodeGen/AArch64/large-frame.ll
new file mode 100644
index 000000000000..2b2e1295c4f6
--- /dev/null
+++ b/test/CodeGen/AArch64/large-frame.ll
@@ -0,0 +1,114 @@
+; RUN: llc -verify-machineinstrs -mtriple=aarch64-none-linux-gnu < %s | FileCheck %s
+declare void @use_addr(i8*)
+
+@addr = global i8* null
+
+define void @test_bigframe() {
+; CHECK: test_bigframe:
+
+  %var1 = alloca i8, i32 20000000
+  %var2 = alloca i8, i32 16
+  %var3 = alloca i8, i32 20000000
+; CHECK: sub sp, sp, #496
+; CHECK: str x30, [sp, #488]
+  ; Total adjust is 39999536
+; CHECK: movz [[SUBCONST:x[0-9]+]], #22576
+; CHECK: movk [[SUBCONST]], #610, lsl #16
+; CHECK: sub sp, sp, [[SUBCONST]]
+
+  ; Total offset is 20000024
+; CHECK: movz [[VAR1OFFSET:x[0-9]+]], #11544
+; CHECK: movk [[VAR1OFFSET]], #305, lsl #16
+; CHECK: add {{x[0-9]+}}, sp, [[VAR1OFFSET]]
+  store volatile i8* %var1, i8** @addr
+
+  %var1plus2 = getelementptr i8* %var1, i32 2
+  store volatile i8* %var1plus2, i8** @addr
+
+; CHECK: movz [[VAR2OFFSET:x[0-9]+]], #11528
+; CHECK: movk [[VAR2OFFSET]], #305, lsl #16
+; CHECK: add {{x[0-9]+}}, sp, [[VAR2OFFSET]]
+  store volatile i8* %var2, i8** @addr
+
+  %var2plus2 = getelementptr i8* %var2, i32 2
+  store volatile i8* %var2plus2, i8** @addr
+
+  store volatile i8* %var3, i8** @addr
+
+  %var3plus2 = getelementptr i8* %var3, i32 2
+  store volatile i8* %var3plus2, i8** @addr
+
+; CHECK: movz [[ADDCONST:x[0-9]+]], #22576
+; CHECK: movk [[ADDCONST]], #610, lsl #16
+; CHECK: add sp, sp, [[ADDCONST]]
+  ret void
+}
+
+define void @test_mediumframe() {
+; CHECK: test_mediumframe:
+  %var1 = alloca i8, i32 1000000
+  %var2 = alloca i8, i32 16
+  %var3 = alloca i8, i32 1000000
+; CHECK: sub sp, sp, #496
+; CHECK: str x30, [sp, #488]
+; CHECK: sub sp, sp, #688
+; CHECK-NEXT: sub sp, sp, #488, lsl #12
+
+  store volatile i8* %var1, i8** @addr
+; CHECK: add [[VAR1ADDR:x[0-9]+]], sp, #600
+; CHECK: add [[VAR1ADDR]], [[VAR1ADDR]], #244, lsl #12
+
+  %var1plus2 = getelementptr i8* %var1, i32 2
+  store volatile i8* %var1plus2, i8** @addr
+; CHECK: add [[VAR1PLUS2:x[0-9]+]], {{x[0-9]+}}, #2
+
+  store volatile i8* %var2, i8** @addr
+; CHECK: add [[VAR2ADDR:x[0-9]+]], sp, #584
+; CHECK: add [[VAR2ADDR]], [[VAR2ADDR]], #244, lsl #12
+
+  %var2plus2 = getelementptr i8* %var2, i32 2
+  store volatile i8* %var2plus2, i8** @addr
+; CHECK: add [[VAR2PLUS2:x[0-9]+]], {{x[0-9]+}}, #2
+
+  store volatile i8* %var3, i8** @addr
+
+  %var3plus2 = getelementptr i8* %var3, i32 2
+  store volatile i8* %var3plus2, i8** @addr
+
+; CHECK: add sp, sp, #688
+; CHECK: add sp, sp, #488, lsl #12
+; CHECK: ldr x30, [sp, #488]
+; CHECK: add sp, sp, #496
+  ret void
+}
+
+
+@bigspace = global [8 x i64] zeroinitializer
+
+; If temporary registers are allocated for adjustment, they should *not* clobber
+; argument registers.
+define void @test_tempallocation([8 x i64] %val) nounwind {
+; CHECK: test_tempallocation:
+  %var = alloca i8, i32 1000000
+; CHECK: sub sp, sp,
+
+; Make sure the prologue is reasonably efficient
+; CHECK-NEXT: stp x29, x30, [sp,
+; CHECK-NEXT: stp x25, x26, [sp,
+; CHECK-NEXT: stp x23, x24, [sp,
+; CHECK-NEXT: stp x21, x22, [sp,
+; CHECK-NEXT: stp x19, x20, [sp,
+
+; Make sure we don't trash an argument register
+; CHECK-NOT: movz {{x[0-7],}}
+; CHECK: sub sp, sp,
+
+; CHECK-NOT: movz {{x[0-7],}}
+
+; CHECK: bl use_addr
+  call void @use_addr(i8* %var)
+
+  store [8 x i64] %val, [8 x i64]* @bigspace
+  ret void
+; CHECK: ret
+}
diff --git a/test/CodeGen/AArch64/ldst-regoffset.ll b/test/CodeGen/AArch64/ldst-regoffset.ll
new file mode 100644
index 000000000000..45935129fd7e
--- /dev/null
+++ b/test/CodeGen/AArch64/ldst-regoffset.ll
@@ -0,0 +1,333 @@
+; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu | FileCheck %s
+
+@var_8bit = global i8 0
+@var_16bit = global i16 0
+@var_32bit = global i32 0
+@var_64bit = global i64 0
+
+@var_float = global float 0.0
+@var_double = global double 0.0
+
+define void @ldst_8bit(i8* %base, i32 %off32, i64 %off64) {
+; CHECK: ldst_8bit:
+
+   %addr8_sxtw = getelementptr i8* %base, i32 %off32
+   %val8_sxtw = load volatile i8* %addr8_sxtw
+   %val32_signed = sext i8 %val8_sxtw to i32
+   store volatile i32 %val32_signed, i32* @var_32bit
+; CHECK: ldrsb {{w[0-9]+}}, [{{x[0-9]+}}, {{w[0-9]+}}, sxtw]
+
+  %addr_lsl = getelementptr i8* %base, i64 %off64
+  %val8_lsl = load volatile i8* %addr_lsl
+  %val32_unsigned = zext i8 %val8_lsl to i32
+  store volatile i32 %val32_unsigned, i32* @var_32bit
+; CHECK: ldrb {{w[0-9]+}}, [{{x[0-9]+}}, {{x[0-9]+}}]
+
+  %addrint_uxtw = ptrtoint i8* %base to i64
+  %offset_uxtw = zext i32 %off32 to i64
+  %addrint1_uxtw = add i64 %addrint_uxtw, %offset_uxtw
+  %addr_uxtw = inttoptr i64 %addrint1_uxtw to i8*
+  %val8_uxtw = load volatile i8* %addr_uxtw
+  %newval8 = add i8 %val8_uxtw, 1
+  store volatile i8 %newval8, i8* @var_8bit
+; CHECK: ldrb {{w[0-9]+}}, [{{x[0-9]+}}, {{w[0-9]+}}, uxtw]
+
+   ret void
+}
+
+
+define void @ldst_16bit(i16* %base, i32 %off32, i64 %off64) {
+; CHECK: ldst_16bit:
+
+   %addr8_sxtwN = getelementptr i16* %base, i32 %off32
+   %val8_sxtwN = load volatile i16* %addr8_sxtwN
+   %val32_signed = sext i16 %val8_sxtwN to i32
+   store volatile i32 %val32_signed, i32* @var_32bit
+; CHECK: ldrsh {{w[0-9]+}}, [{{x[0-9]+}}, {{w[0-9]+}}, sxtw #1]
+
+  %addr_lslN = getelementptr i16* %base, i64 %off64
+  %val8_lslN = load volatile i16* %addr_lslN
+  %val32_unsigned = zext i16 %val8_lslN to i32
+  store volatile i32 %val32_unsigned, i32* @var_32bit
+; CHECK: ldrh {{w[0-9]+}}, [{{x[0-9]+}}, {{x[0-9]+}}, lsl #1]
+
+  %addrint_uxtw = ptrtoint i16* %base to i64
+  %offset_uxtw = zext i32 %off32 to i64
+  %addrint1_uxtw = add i64 %addrint_uxtw, %offset_uxtw
+  %addr_uxtw = inttoptr i64 %addrint1_uxtw to i16*
+  %val8_uxtw = load volatile i16* %addr_uxtw
+  %newval8 = add i16 %val8_uxtw, 1
+  store volatile i16 %newval8, i16* @var_16bit
+; CHECK: ldrh {{w[0-9]+}}, [{{x[0-9]+}}, {{w[0-9]+}}, uxtw]
+
+  %base_sxtw = ptrtoint i16* %base to i64
+  %offset_sxtw = sext i32 %off32 to i64
+  %addrint_sxtw = add i64 %base_sxtw, %offset_sxtw
+  %addr_sxtw = inttoptr i64 %addrint_sxtw to i16*
+  %val16_sxtw = load volatile i16* %addr_sxtw
+  %val64_signed = sext i16 %val16_sxtw to i64
+  store volatile i64 %val64_signed, i64* @var_64bit
+; CHECK: ldrsh {{x[0-9]+}}, [{{x[0-9]+}}, {{w[0-9]+}}, sxtw]
+
+
+  %base_lsl = ptrtoint i16* %base to i64
+  %addrint_lsl = add i64 %base_lsl, %off64
+  %addr_lsl = inttoptr i64 %addrint_lsl to i16*
+  %val16_lsl = load volatile i16* %addr_lsl
+  %val64_unsigned = zext i16 %val16_lsl to i64
+  store volatile i64 %val64_unsigned, i64* @var_64bit
+; CHECK: ldrh {{w[0-9]+}}, [{{x[0-9]+}}, {{x[0-9]+}}]
+
+  %base_uxtwN = ptrtoint i16* %base to i64
+  %offset_uxtwN = zext i32 %off32 to i64
+  %offset2_uxtwN = shl i64 %offset_uxtwN, 1
+  %addrint_uxtwN = add i64 %base_uxtwN, %offset2_uxtwN
+  %addr_uxtwN = inttoptr i64 %addrint_uxtwN to i16*
+  %val32 = load volatile i32* @var_32bit
+  %val16_trunc32 = trunc i32 %val32 to i16
+  store volatile i16 %val16_trunc32, i16* %addr_uxtwN
+; CHECK: strh {{w[0-9]+}}, [{{x[0-9]+}}, {{w[0-9]+}}, uxtw #1]
+   ret void
+}
+
+define void @ldst_32bit(i32* %base, i32 %off32, i64 %off64) {
+; CHECK: ldst_32bit:
+
+   %addr_sxtwN = getelementptr i32* %base, i32 %off32
+   %val_sxtwN = load volatile i32* %addr_sxtwN
+   store volatile i32 %val_sxtwN, i32* @var_32bit
+; CHECK: ldr {{w[0-9]+}}, [{{x[0-9]+}}, {{w[0-9]+}}, sxtw #2]
+
+  %addr_lslN = getelementptr i32* %base, i64 %off64
+  %val_lslN = load volatile i32* %addr_lslN
+  store volatile i32 %val_lslN, i32* @var_32bit
+; CHECK: ldr {{w[0-9]+}}, [{{x[0-9]+}}, {{x[0-9]+}}, lsl #2]
+
+  %addrint_uxtw = ptrtoint i32* %base to i64
+  %offset_uxtw = zext i32 %off32 to i64
+  %addrint1_uxtw = add i64 %addrint_uxtw, %offset_uxtw
+  %addr_uxtw = inttoptr i64 %addrint1_uxtw to i32*
+  %val_uxtw = load volatile i32* %addr_uxtw
+  %newval8 = add i32 %val_uxtw, 1
+  store volatile i32 %newval8, i32* @var_32bit
+; CHECK: ldr {{w[0-9]+}}, [{{x[0-9]+}}, {{w[0-9]+}}, uxtw]
+
+
+  %base_sxtw = ptrtoint i32* %base to i64
+  %offset_sxtw = sext i32 %off32 to i64
+  %addrint_sxtw = add i64 %base_sxtw, %offset_sxtw
+  %addr_sxtw = inttoptr i64 %addrint_sxtw to i32*
+  %val16_sxtw = load volatile i32* %addr_sxtw
+  %val64_signed = sext i32 %val16_sxtw to i64
+  store volatile i64 %val64_signed, i64* @var_64bit
+; CHECK: ldrsw {{x[0-9]+}}, [{{x[0-9]+}}, {{w[0-9]+}}, sxtw]
+
+
+  %base_lsl = ptrtoint i32* %base to i64
+  %addrint_lsl = add i64 %base_lsl, %off64
+  %addr_lsl = inttoptr i64 %addrint_lsl to i32*
+  %val16_lsl = load volatile i32* %addr_lsl
+  %val64_unsigned = zext i32 %val16_lsl to i64
+  store volatile i64 %val64_unsigned, i64* @var_64bit
+; CHECK: ldr {{w[0-9]+}}, [{{x[0-9]+}}, {{x[0-9]+}}]
+
+  %base_uxtwN = ptrtoint i32* %base to i64
+  %offset_uxtwN = zext i32 %off32 to i64
+  %offset2_uxtwN = shl i64 %offset_uxtwN, 2
+  %addrint_uxtwN = add i64 %base_uxtwN, %offset2_uxtwN
+  %addr_uxtwN = inttoptr i64 %addrint_uxtwN to i32*
+  %val32 = load volatile i32* @var_32bit
+  store volatile i32 %val32, i32* %addr_uxtwN
+; CHECK: str {{w[0-9]+}}, [{{x[0-9]+}}, {{w[0-9]+}}, uxtw #2]
+   ret void
+}
+
+define void @ldst_64bit(i64* %base, i32 %off32, i64 %off64) {
+; CHECK: ldst_64bit:
+
+   %addr_sxtwN = getelementptr i64* %base, i32 %off32
+   %val_sxtwN = load volatile i64* %addr_sxtwN
+   store volatile i64 %val_sxtwN, i64* @var_64bit
+; CHECK: ldr {{x[0-9]+}}, [{{x[0-9]+}}, {{w[0-9]+}}, sxtw #3]
+
+  %addr_lslN = getelementptr i64* %base, i64 %off64
+  %val_lslN = load volatile i64* %addr_lslN
+  store volatile i64 %val_lslN, i64* @var_64bit
+; CHECK: ldr {{x[0-9]+}}, [{{x[0-9]+}}, {{x[0-9]+}}, lsl #3]
+
+  %addrint_uxtw = ptrtoint i64* %base to i64
+  %offset_uxtw = zext i32 %off32 to i64
+  %addrint1_uxtw = add i64 %addrint_uxtw, %offset_uxtw
+  %addr_uxtw = inttoptr i64 %addrint1_uxtw to i64*
+  %val8_uxtw = load volatile i64* %addr_uxtw
+  %newval8 = add i64 %val8_uxtw, 1
+  store volatile i64 %newval8, i64* @var_64bit
+; CHECK: ldr {{x[0-9]+}}, [{{x[0-9]+}}, {{w[0-9]+}}, uxtw]
+
+  %base_sxtw = ptrtoint i64* %base to i64
+  %offset_sxtw = sext i32 %off32 to i64
+  %addrint_sxtw = add i64 %base_sxtw, %offset_sxtw
+  %addr_sxtw = inttoptr i64 %addrint_sxtw to i64*
+  %val64_sxtw = load volatile i64* %addr_sxtw
+  store volatile i64 %val64_sxtw, i64* @var_64bit
+; CHECK: ldr {{x[0-9]+}}, [{{x[0-9]+}}, {{w[0-9]+}}, sxtw]
+
+  %base_lsl = ptrtoint i64* %base to i64
+  %addrint_lsl = add i64 %base_lsl, %off64
+  %addr_lsl = inttoptr i64 %addrint_lsl to i64*
+  %val64_lsl = load volatile i64* %addr_lsl
+  store volatile i64 %val64_lsl, i64* @var_64bit
+; CHECK: ldr {{x[0-9]+}}, [{{x[0-9]+}}, {{x[0-9]+}}]
+
+  %base_uxtwN = ptrtoint i64* %base to i64
+  %offset_uxtwN = zext i32 %off32 to i64
+  %offset2_uxtwN = shl i64 %offset_uxtwN, 3
+  %addrint_uxtwN = add i64 %base_uxtwN, %offset2_uxtwN
+  %addr_uxtwN = inttoptr i64 %addrint_uxtwN to i64*
+  %val64 = load volatile i64* @var_64bit
+  store volatile i64 %val64, i64* %addr_uxtwN
+; CHECK: str {{x[0-9]+}}, [{{x[0-9]+}}, {{w[0-9]+}}, uxtw #3]
+   ret void
+}
+
+define void @ldst_float(float* %base, i32 %off32, i64 %off64) {
+; CHECK: ldst_float:
+
+   %addr_sxtwN = getelementptr float* %base, i32 %off32
+   %val_sxtwN = load volatile float* %addr_sxtwN
+   store volatile float %val_sxtwN, float* @var_float
+; CHECK: ldr {{s[0-9]+}}, [{{x[0-9]+}}, {{w[0-9]+}}, sxtw #2]
+
+  %addr_lslN = getelementptr float* %base, i64 %off64
+  %val_lslN = load volatile float* %addr_lslN
+  store volatile float %val_lslN, float* @var_float
+; CHECK: ldr {{s[0-9]+}}, [{{x[0-9]+}}, {{x[0-9]+}}, lsl #2]
+
+  %addrint_uxtw = ptrtoint float* %base to i64
+  %offset_uxtw = zext i32 %off32 to i64
+  %addrint1_uxtw = add i64 %addrint_uxtw, %offset_uxtw
+  %addr_uxtw = inttoptr i64 %addrint1_uxtw to float*
+  %val_uxtw = load volatile float* %addr_uxtw
+  store volatile float %val_uxtw, float* @var_float
+; CHECK: ldr {{s[0-9]+}}, [{{x[0-9]+}}, {{w[0-9]+}}, uxtw]
+
+  %base_sxtw = ptrtoint float* %base to i64
+  %offset_sxtw = sext i32 %off32 to i64
+  %addrint_sxtw = add i64 %base_sxtw, %offset_sxtw
+  %addr_sxtw = inttoptr i64 %addrint_sxtw to float*
+  %val64_sxtw = load volatile float* %addr_sxtw
+  store volatile float %val64_sxtw, float* @var_float
+; CHECK: ldr {{s[0-9]+}}, [{{x[0-9]+}}, {{w[0-9]+}}, sxtw]
+
+  %base_lsl = ptrtoint float* %base to i64
+  %addrint_lsl = add i64 %base_lsl, %off64
+  %addr_lsl = inttoptr i64 %addrint_lsl to float*
+  %val64_lsl = load volatile float* %addr_lsl
+  store volatile float %val64_lsl, float* @var_float
+; CHECK: ldr {{s[0-9]+}}, [{{x[0-9]+}}, {{x[0-9]+}}]
+
+  %base_uxtwN = ptrtoint float* %base to i64
+  %offset_uxtwN = zext i32 %off32 to i64
+  %offset2_uxtwN = shl i64 %offset_uxtwN, 2
+  %addrint_uxtwN = add i64 %base_uxtwN, %offset2_uxtwN
+  %addr_uxtwN = inttoptr i64 %addrint_uxtwN to float*
+  %val64 = load volatile float* @var_float
+  store volatile float %val64, float* %addr_uxtwN
+; CHECK: str {{s[0-9]+}}, [{{x[0-9]+}}, {{w[0-9]+}}, uxtw #2]
+   ret void
+}
+
+define void @ldst_double(double* %base, i32 %off32, i64 %off64) {
+; CHECK: ldst_double:
+
+   %addr_sxtwN = getelementptr double* %base, i32 %off32
+   %val_sxtwN = load volatile double* %addr_sxtwN
+   store volatile double %val_sxtwN, double* @var_double
+; CHECK: ldr {{d[0-9]+}}, [{{x[0-9]+}}, {{w[0-9]+}}, sxtw #3]
+
+  %addr_lslN = getelementptr double* %base, i64 %off64
+  %val_lslN = load volatile double* %addr_lslN
+  store volatile double %val_lslN, double* @var_double
+; CHECK: ldr {{d[0-9]+}}, [{{x[0-9]+}}, {{x[0-9]+}}, lsl #3]
+
+  %addrint_uxtw = ptrtoint double* %base to i64
+  %offset_uxtw = zext i32 %off32 to i64
+  %addrint1_uxtw = add i64 %addrint_uxtw, %offset_uxtw
+  %addr_uxtw = inttoptr i64 %addrint1_uxtw to double*
+  %val_uxtw = load volatile double* %addr_uxtw
+  store volatile double %val_uxtw, double* @var_double
+; CHECK: ldr {{d[0-9]+}}, [{{x[0-9]+}}, {{w[0-9]+}}, uxtw]
+
+  %base_sxtw = ptrtoint double* %base to i64
+  %offset_sxtw = sext i32 %off32 to i64
+  %addrint_sxtw = add i64 %base_sxtw, %offset_sxtw
+  %addr_sxtw = inttoptr i64 %addrint_sxtw to double*
+  %val64_sxtw = load volatile double* %addr_sxtw
+  store volatile double %val64_sxtw, double* @var_double
+; CHECK: ldr {{d[0-9]+}}, [{{x[0-9]+}}, {{w[0-9]+}}, sxtw]
+
+  %base_lsl = ptrtoint double* %base to i64
+  %addrint_lsl = add i64 %base_lsl, %off64
+  %addr_lsl = inttoptr i64 %addrint_lsl to double*
+  %val64_lsl = load volatile double* %addr_lsl
+  store volatile double %val64_lsl, double* @var_double
+; CHECK: ldr {{d[0-9]+}}, [{{x[0-9]+}}, {{x[0-9]+}}]
+
+  %base_uxtwN = ptrtoint double* %base to i64
+  %offset_uxtwN = zext i32 %off32 to i64
+  %offset2_uxtwN = shl i64 %offset_uxtwN, 3
+  %addrint_uxtwN = add i64 %base_uxtwN, %offset2_uxtwN
+  %addr_uxtwN = inttoptr i64 %addrint_uxtwN to double*
+  %val64 = load volatile double* @var_double
+  store volatile double %val64, double* %addr_uxtwN
+; CHECK: str {{d[0-9]+}}, [{{x[0-9]+}}, {{w[0-9]+}}, uxtw #3]
+   ret void
+}
+
+
+define void @ldst_128bit(fp128* %base, i32 %off32, i64 %off64) {
+; CHECK: ldst_128bit:
+
+   %addr_sxtwN = getelementptr fp128* %base, i32 %off32
+   %val_sxtwN = load volatile fp128* %addr_sxtwN
+   store volatile fp128 %val_sxtwN, fp128* %base
+; CHECK: ldr {{q[0-9]+}}, [{{x[0-9]+}}, {{w[0-9]+}}, sxtw #4]
+
+  %addr_lslN = getelementptr fp128* %base, i64 %off64
+  %val_lslN = load volatile fp128* %addr_lslN
+  store volatile fp128 %val_lslN, fp128* %base
+; CHECK: ldr {{q[0-9]+}}, [{{x[0-9]+}}, {{x[0-9]+}}, lsl #4]
+
+  %addrint_uxtw = ptrtoint fp128* %base to i64
+  %offset_uxtw = zext i32 %off32 to i64
+  %addrint1_uxtw = add i64 %addrint_uxtw, %offset_uxtw
+  %addr_uxtw = inttoptr i64 %addrint1_uxtw to fp128*
+  %val_uxtw = load volatile fp128* %addr_uxtw
+  store volatile fp128 %val_uxtw, fp128* %base
+; CHECK: ldr {{q[0-9]+}}, [{{x[0-9]+}}, {{w[0-9]+}}, uxtw]
+
+  %base_sxtw = ptrtoint fp128* %base to i64
+  %offset_sxtw = sext i32 %off32 to i64
+  %addrint_sxtw = add i64 %base_sxtw, %offset_sxtw
+  %addr_sxtw = inttoptr i64 %addrint_sxtw to fp128*
+  %val64_sxtw = load volatile fp128* %addr_sxtw
+  store volatile fp128 %val64_sxtw, fp128* %base
+; CHECK: ldr {{q[0-9]+}}, [{{x[0-9]+}}, {{w[0-9]+}}, sxtw]
+
+  %base_lsl = ptrtoint fp128* %base to i64
+  %addrint_lsl = add i64 %base_lsl, %off64
+  %addr_lsl = inttoptr i64 %addrint_lsl to fp128*
+  %val64_lsl = load volatile fp128* %addr_lsl
+  store volatile fp128 %val64_lsl, fp128* %base
+; CHECK: ldr {{q[0-9]+}}, [{{x[0-9]+}}, {{x[0-9]+}}]
+
+  %base_uxtwN = ptrtoint fp128* %base to i64
+  %offset_uxtwN = zext i32 %off32 to i64
+  %offset2_uxtwN = shl i64 %offset_uxtwN, 4
+  %addrint_uxtwN = add i64 %base_uxtwN, %offset2_uxtwN
+  %addr_uxtwN = inttoptr i64 %addrint_uxtwN to fp128*
+  %val64 = load volatile fp128* %base
+  store volatile fp128 %val64, fp128* %addr_uxtwN
+; CHECK: str {{q[0-9]+}}, [{{x[0-9]+}}, {{w[0-9]+}}, uxtw #4]
+   ret void
+}
diff --git a/test/CodeGen/AArch64/ldst-unscaledimm.ll b/test/CodeGen/AArch64/ldst-unscaledimm.ll
new file mode 100644
index 000000000000..78a3c83c3dd8
--- /dev/null
+++ b/test/CodeGen/AArch64/ldst-unscaledimm.ll
@@ -0,0 +1,218 @@
+; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu | FileCheck %s
+
+@var_8bit = global i8 0
+@var_16bit = global i16 0
+@var_32bit = global i32 0
+@var_64bit = global i64 0
+
+@var_float = global float 0.0
+@var_double = global double 0.0
+
+@varptr = global i8* null
+
+define void @ldst_8bit() {
+; CHECK: ldst_8bit:
+
+; No architectural support for loads to 16-bit or 8-bit since we
+; promote i8 during lowering.
+  %addr_8bit = load i8** @varptr
+
+; match a sign-extending load 8-bit -> 32-bit
+   %addr_sext32 = getelementptr i8* %addr_8bit, i64 -256
+   %val8_sext32 = load volatile i8* %addr_sext32
+   %val32_signed = sext i8 %val8_sext32 to i32
+   store volatile i32 %val32_signed, i32* @var_32bit
+; CHECK: ldursb {{w[0-9]+}}, [{{x[0-9]+}}, #-256]
+
+; match a zero-extending load volatile 8-bit -> 32-bit
+  %addr_zext32 = getelementptr i8* %addr_8bit, i64 -12
+  %val8_zext32 = load volatile i8* %addr_zext32
+  %val32_unsigned = zext i8 %val8_zext32 to i32
+  store volatile i32 %val32_unsigned, i32* @var_32bit
+; CHECK: ldurb {{w[0-9]+}}, [{{x[0-9]+}}, #-12]
+
+; match an any-extending load volatile 8-bit -> 32-bit
+  %addr_anyext = getelementptr i8* %addr_8bit, i64 -1
+  %val8_anyext = load volatile i8* %addr_anyext
+  %newval8 = add i8 %val8_anyext, 1
+  store volatile i8 %newval8, i8* @var_8bit
+; CHECK: ldurb {{w[0-9]+}}, [{{x[0-9]+}}, #-1]
+
+; match a sign-extending load volatile 8-bit -> 64-bit
+  %addr_sext64 = getelementptr i8* %addr_8bit, i64 -5
+  %val8_sext64 = load volatile i8* %addr_sext64
+  %val64_signed = sext i8 %val8_sext64 to i64
+  store volatile i64 %val64_signed, i64* @var_64bit
+; CHECK: ldursb {{x[0-9]+}}, [{{x[0-9]+}}, #-5]
+
+; match a zero-extending load volatile 8-bit -> 64-bit.
+; This uses the fact that ldrb w0, [x0] will zero out the high 32-bits
+; of x0 so it's identical to load volatileing to 32-bits.
+  %addr_zext64 = getelementptr i8* %addr_8bit, i64 -9
+  %val8_zext64 = load volatile i8* %addr_zext64
+  %val64_unsigned = zext i8 %val8_zext64 to i64
+  store volatile i64 %val64_unsigned, i64* @var_64bit
+; CHECK: ldurb {{w[0-9]+}}, [{{x[0-9]+}}, #-9]
+
+; truncating store volatile 32-bits to 8-bits
+  %addr_trunc32 = getelementptr i8* %addr_8bit, i64 -256
+  %val32 = load volatile i32* @var_32bit
+  %val8_trunc32 = trunc i32 %val32 to i8
+  store volatile i8 %val8_trunc32, i8* %addr_trunc32
+; CHECK: sturb {{w[0-9]+}}, [{{x[0-9]+}}, #-256]
+
+; truncating store volatile 64-bits to 8-bits
+  %addr_trunc64 = getelementptr i8* %addr_8bit, i64 -1
+  %val64 = load volatile i64* @var_64bit
+  %val8_trunc64 = trunc i64 %val64 to i8
+  store volatile i8 %val8_trunc64, i8* %addr_trunc64
+; CHECK: sturb {{w[0-9]+}}, [{{x[0-9]+}}, #-1]
+
+   ret void
+}
+
+define void @ldst_16bit() {
+; CHECK: ldst_16bit:
+
+; No architectural support for loads to 16-bit or 16-bit since we
+; promote i16 during lowering.
+  %addr_8bit = load i8** @varptr
+
+; match a sign-extending load 16-bit -> 32-bit
+   %addr8_sext32 = getelementptr i8* %addr_8bit, i64 -256
+   %addr_sext32 = bitcast i8* %addr8_sext32 to i16*
+   %val16_sext32 = load volatile i16* %addr_sext32
+   %val32_signed = sext i16 %val16_sext32 to i32
+   store volatile i32 %val32_signed, i32* @var_32bit
+; CHECK: ldursh {{w[0-9]+}}, [{{x[0-9]+}}, #-256]
+
+; match a zero-extending load volatile 16-bit -> 32-bit. With offset that would be unaligned.
+  %addr8_zext32 = getelementptr i8* %addr_8bit, i64 15
+  %addr_zext32 = bitcast i8* %addr8_zext32 to i16*
+  %val16_zext32 = load volatile i16* %addr_zext32
+  %val32_unsigned = zext i16 %val16_zext32 to i32
+  store volatile i32 %val32_unsigned, i32* @var_32bit
+; CHECK: ldurh {{w[0-9]+}}, [{{x[0-9]+}}, #15]
+
+; match an any-extending load volatile 16-bit -> 32-bit
+  %addr8_anyext = getelementptr i8* %addr_8bit, i64 -1
+  %addr_anyext = bitcast i8* %addr8_anyext to i16*
+  %val16_anyext = load volatile i16* %addr_anyext
+  %newval16 = add i16 %val16_anyext, 1
+  store volatile i16 %newval16, i16* @var_16bit
+; CHECK: ldurh {{w[0-9]+}}, [{{x[0-9]+}}, #-1]
+
+; match a sign-extending load volatile 16-bit -> 64-bit
+  %addr8_sext64 = getelementptr i8* %addr_8bit, i64 -5
+  %addr_sext64 = bitcast i8* %addr8_sext64 to i16*
+  %val16_sext64 = load volatile i16* %addr_sext64
+  %val64_signed = sext i16 %val16_sext64 to i64
+  store volatile i64 %val64_signed, i64* @var_64bit
+; CHECK: ldursh {{x[0-9]+}}, [{{x[0-9]+}}, #-5]
+
+; match a zero-extending load volatile 16-bit -> 64-bit.
+; This uses the fact that ldrb w0, [x0] will zero out the high 32-bits
+; of x0 so it's identical to load volatileing to 32-bits.
+  %addr8_zext64 = getelementptr i8* %addr_8bit, i64 9
+  %addr_zext64 = bitcast i8* %addr8_zext64 to i16*
+  %val16_zext64 = load volatile i16* %addr_zext64
+  %val64_unsigned = zext i16 %val16_zext64 to i64
+  store volatile i64 %val64_unsigned, i64* @var_64bit
+; CHECK: ldurh {{w[0-9]+}}, [{{x[0-9]+}}, #9]
+
+; truncating store volatile 32-bits to 16-bits
+  %addr8_trunc32 = getelementptr i8* %addr_8bit, i64 -256
+  %addr_trunc32 = bitcast i8* %addr8_trunc32 to i16*
+  %val32 = load volatile i32* @var_32bit
+  %val16_trunc32 = trunc i32 %val32 to i16
+  store volatile i16 %val16_trunc32, i16* %addr_trunc32
+; CHECK: sturh {{w[0-9]+}}, [{{x[0-9]+}}, #-256]
+
+; truncating store volatile 64-bits to 16-bits
+  %addr8_trunc64 = getelementptr i8* %addr_8bit, i64 -1
+  %addr_trunc64 = bitcast i8* %addr8_trunc64 to i16*
+  %val64 = load volatile i64* @var_64bit
+  %val16_trunc64 = trunc i64 %val64 to i16
+  store volatile i16 %val16_trunc64, i16* %addr_trunc64
+; CHECK: sturh {{w[0-9]+}}, [{{x[0-9]+}}, #-1]
+
+   ret void
+}
+
+define void @ldst_32bit() {
+; CHECK: ldst_32bit:
+
+  %addr_8bit = load i8** @varptr
+
+; Straight 32-bit load/store
+  %addr32_8_noext = getelementptr i8* %addr_8bit, i64 1
+  %addr32_noext = bitcast i8* %addr32_8_noext to i32*
+  %val32_noext = load volatile i32* %addr32_noext
+  store volatile i32 %val32_noext, i32* %addr32_noext
+; CHECK: ldur {{w[0-9]+}}, [{{x[0-9]+}}, #1]
+; CHECK: stur {{w[0-9]+}}, [{{x[0-9]+}}, #1]
+
+; Zero-extension to 64-bits
+  %addr32_8_zext = getelementptr i8* %addr_8bit, i64 -256
+  %addr32_zext = bitcast i8* %addr32_8_zext to i32*
+  %val32_zext = load volatile i32* %addr32_zext
+  %val64_unsigned = zext i32 %val32_zext to i64
+  store volatile i64 %val64_unsigned, i64* @var_64bit
+; CHECK: ldur {{w[0-9]+}}, [{{x[0-9]+}}, #-256]
+; CHECK: str {{x[0-9]+}}, [{{x[0-9]+}}, #:lo12:var_64bit]
+
+; Sign-extension to 64-bits
+  %addr32_8_sext = getelementptr i8* %addr_8bit, i64 -12
+  %addr32_sext = bitcast i8* %addr32_8_sext to i32*
+  %val32_sext = load volatile i32* %addr32_sext
+  %val64_signed = sext i32 %val32_sext to i64
+  store volatile i64 %val64_signed, i64* @var_64bit
+; CHECK: ldursw {{x[0-9]+}}, [{{x[0-9]+}}, #-12]
+; CHECK: str {{x[0-9]+}}, [{{x[0-9]+}}, #:lo12:var_64bit]
+
+; Truncation from 64-bits
+  %addr64_8_trunc = getelementptr i8* %addr_8bit, i64 255
+  %addr64_trunc = bitcast i8* %addr64_8_trunc to i64*
+  %addr32_8_trunc = getelementptr i8* %addr_8bit, i64 -20
+  %addr32_trunc = bitcast i8* %addr32_8_trunc to i32*
+
+  %val64_trunc = load volatile i64* %addr64_trunc
+  %val32_trunc = trunc i64 %val64_trunc to i32
+  store volatile i32 %val32_trunc, i32* %addr32_trunc
+; CHECK: ldur {{x[0-9]+}}, [{{x[0-9]+}}, #255]
+; CHECK: stur {{w[0-9]+}}, [{{x[0-9]+}}, #-20]
+
+  ret void
+}
+
+define void @ldst_float() {
+; CHECK: ldst_float:
+
+  %addr_8bit = load i8** @varptr
+  %addrfp_8 = getelementptr i8* %addr_8bit, i64 -5
+  %addrfp = bitcast i8* %addrfp_8 to float*
+
+  %valfp = load volatile float* %addrfp
+; CHECK: ldur {{s[0-9]+}}, [{{x[0-9]+}}, #-5]
+
+  store volatile float %valfp, float* %addrfp
+; CHECK: stur {{s[0-9]+}}, [{{x[0-9]+}}, #-5]
+
+  ret void
+}
+
+define void @ldst_double() {
+; CHECK: ldst_double:
+
+  %addr_8bit = load i8** @varptr
+  %addrfp_8 = getelementptr i8* %addr_8bit, i64 4
+  %addrfp = bitcast i8* %addrfp_8 to double*
+
+  %valfp = load volatile double* %addrfp
+; CHECK: ldur {{d[0-9]+}}, [{{x[0-9]+}}, #4]
+
+  store volatile double %valfp, double* %addrfp
+; CHECK: stur {{d[0-9]+}}, [{{x[0-9]+}}, #4]
+
+   ret void
+}
diff --git a/test/CodeGen/AArch64/ldst-unsignedimm.ll b/test/CodeGen/AArch64/ldst-unsignedimm.ll
new file mode 100644
index 000000000000..1e7540d9be0a
--- /dev/null
+++ b/test/CodeGen/AArch64/ldst-unsignedimm.ll
@@ -0,0 +1,251 @@
+; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu | FileCheck %s
+
+@var_8bit = global i8 0
+@var_16bit = global i16 0
+@var_32bit = global i32 0
+@var_64bit = global i64 0
+
+@var_float = global float 0.0
+@var_double = global double 0.0
+
+define void @ldst_8bit() {
+; CHECK: ldst_8bit:
+
+; No architectural support for loads to 16-bit or 8-bit since we
+; promote i8 during lowering.
+
+; match a sign-extending load 8-bit -> 32-bit
+   %val8_sext32 = load volatile i8* @var_8bit
+   %val32_signed = sext i8 %val8_sext32 to i32
+   store volatile i32 %val32_signed, i32* @var_32bit
+; CHECK: adrp {{x[0-9]+}}, var_8bit
+; CHECK: ldrsb {{w[0-9]+}}, [{{x[0-9]+}}, #:lo12:var_8bit]
+
+; match a zero-extending load volatile 8-bit -> 32-bit
+  %val8_zext32 = load volatile i8* @var_8bit
+  %val32_unsigned = zext i8 %val8_zext32 to i32
+  store volatile i32 %val32_unsigned, i32* @var_32bit
+; CHECK: ldrb {{w[0-9]+}}, [{{x[0-9]+}}, #:lo12:var_8bit]
+
+; match an any-extending load volatile 8-bit -> 32-bit
+  %val8_anyext = load volatile i8* @var_8bit
+  %newval8 = add i8 %val8_anyext, 1
+  store volatile i8 %newval8, i8* @var_8bit
+; CHECK: ldrb {{w[0-9]+}}, [{{x[0-9]+}}, #:lo12:var_8bit]
+
+; match a sign-extending load volatile 8-bit -> 64-bit
+  %val8_sext64 = load volatile i8* @var_8bit
+  %val64_signed = sext i8 %val8_sext64 to i64
+  store volatile i64 %val64_signed, i64* @var_64bit
+; CHECK: ldrsb {{x[0-9]+}}, [{{x[0-9]+}}, #:lo12:var_8bit]
+
+; match a zero-extending load volatile 8-bit -> 64-bit.
+; This uses the fact that ldrb w0, [x0] will zero out the high 32-bits
+; of x0 so it's identical to load volatileing to 32-bits.
+  %val8_zext64 = load volatile i8* @var_8bit
+  %val64_unsigned = zext i8 %val8_zext64 to i64
+  store volatile i64 %val64_unsigned, i64* @var_64bit
+; CHECK: ldrb {{w[0-9]+}}, [{{x[0-9]+}}, #:lo12:var_8bit]
+
+; truncating store volatile 32-bits to 8-bits
+  %val32 = load volatile i32* @var_32bit
+  %val8_trunc32 = trunc i32 %val32 to i8
+  store volatile i8 %val8_trunc32, i8* @var_8bit
+; CHECK: strb {{w[0-9]+}}, [{{x[0-9]+}}, #:lo12:var_8bit]
+
+; truncating store volatile 64-bits to 8-bits
+  %val64 = load volatile i64* @var_64bit
+  %val8_trunc64 = trunc i64 %val64 to i8
+  store volatile i8 %val8_trunc64, i8* @var_8bit
+; CHECK: strb {{w[0-9]+}}, [{{x[0-9]+}}, #:lo12:var_8bit]
+
+   ret void
+}
+
+define void @ldst_16bit() {
+; CHECK: ldst_16bit:
+
+; No architectural support for load volatiles to 16-bit promote i16 during
+; lowering.
+
+; match a sign-extending load volatile 16-bit -> 32-bit
+  %val16_sext32 = load volatile i16* @var_16bit
+  %val32_signed = sext i16 %val16_sext32 to i32
+  store volatile i32 %val32_signed, i32* @var_32bit
+; CHECK: adrp {{x[0-9]+}}, var_16bit
+; CHECK: ldrsh {{w[0-9]+}}, [{{x[0-9]+}}, #:lo12:var_16bit]
+
+; match a zero-extending load volatile 16-bit -> 32-bit
+  %val16_zext32 = load volatile i16* @var_16bit
+  %val32_unsigned = zext i16 %val16_zext32 to i32
+  store volatile i32 %val32_unsigned, i32* @var_32bit
+; CHECK: ldrh {{w[0-9]+}}, [{{x[0-9]+}}, #:lo12:var_16bit]
+
+; match an any-extending load volatile 16-bit -> 32-bit
+  %val16_anyext = load volatile i16* @var_16bit
+  %newval16 = add i16 %val16_anyext, 1
+  store volatile i16 %newval16, i16* @var_16bit
+; CHECK: ldrh {{w[0-9]+}}, [{{x[0-9]+}}, #:lo12:var_16bit]
+
+; match a sign-extending load volatile 16-bit -> 64-bit
+  %val16_sext64 = load volatile i16* @var_16bit
+  %val64_signed = sext i16 %val16_sext64 to i64
+  store volatile i64 %val64_signed, i64* @var_64bit
+; CHECK: ldrsh {{x[0-9]+}}, [{{x[0-9]+}}, #:lo12:var_16bit]
+
+; match a zero-extending load volatile 16-bit -> 64-bit.
+; This uses the fact that ldrb w0, [x0] will zero out the high 32-bits
+; of x0 so it's identical to load volatileing to 32-bits.
+  %val16_zext64 = load volatile i16* @var_16bit
+  %val64_unsigned = zext i16 %val16_zext64 to i64
+  store volatile i64 %val64_unsigned, i64* @var_64bit
+; CHECK: ldrh {{w[0-9]+}}, [{{x[0-9]+}}, #:lo12:var_16bit]
+
+; truncating store volatile 32-bits to 16-bits
+  %val32 = load volatile i32* @var_32bit
+  %val16_trunc32 = trunc i32 %val32 to i16
+  store volatile i16 %val16_trunc32, i16* @var_16bit
+; CHECK: strh {{w[0-9]+}}, [{{x[0-9]+}}, #:lo12:var_16bit]
+
+; truncating store volatile 64-bits to 16-bits
+  %val64 = load volatile i64* @var_64bit
+  %val16_trunc64 = trunc i64 %val64 to i16
+  store volatile i16 %val16_trunc64, i16* @var_16bit
+; CHECK: strh {{w[0-9]+}}, [{{x[0-9]+}}, #:lo12:var_16bit]
+
+  ret void
+}
+
+define void @ldst_32bit() {
+; CHECK: ldst_32bit:
+
+; Straight 32-bit load/store
+  %val32_noext = load volatile i32* @var_32bit
+  store volatile i32 %val32_noext, i32* @var_32bit
+; CHECK: adrp {{x[0-9]+}}, var_32bit
+; CHECK: ldr {{w[0-9]+}}, [{{x[0-9]+}}, #:lo12:var_32bit]
+; CHECK: str {{w[0-9]+}}, [{{x[0-9]+}}, #:lo12:var_32bit]
+
+; Zero-extension to 64-bits
+  %val32_zext = load volatile i32* @var_32bit
+  %val64_unsigned = zext i32 %val32_zext to i64
+  store volatile i64 %val64_unsigned, i64* @var_64bit
+; CHECK: ldr {{w[0-9]+}}, [{{x[0-9]+}}, #:lo12:var_32bit]
+; CHECK: str {{x[0-9]+}}, [{{x[0-9]+}}, #:lo12:var_64bit]
+
+; Sign-extension to 64-bits
+  %val32_sext = load volatile i32* @var_32bit
+  %val64_signed = sext i32 %val32_sext to i64
+  store volatile i64 %val64_signed, i64* @var_64bit
+; CHECK: ldrsw {{x[0-9]+}}, [{{x[0-9]+}}, #:lo12:var_32bit]
+; CHECK: str {{x[0-9]+}}, [{{x[0-9]+}}, #:lo12:var_64bit]
+
+; Truncation from 64-bits
+  %val64_trunc = load volatile i64* @var_64bit
+  %val32_trunc = trunc i64 %val64_trunc to i32
+  store volatile i32 %val32_trunc, i32* @var_32bit
+; CHECK: ldr {{x[0-9]+}}, [{{x[0-9]+}}, #:lo12:var_64bit]
+; CHECK: str {{w[0-9]+}}, [{{x[0-9]+}}, #:lo12:var_32bit]
+
+  ret void
+}
+
+@arr8 = global i8* null
+@arr16 = global i16* null
+@arr32 = global i32* null
+@arr64 = global i64* null
+
+; Now check that our selection copes with accesses more complex than a
+; single symbol. Permitted offsets should be folded into the loads and
+; stores. Since all forms use the same Operand it's only necessary to
+; check the various access-sizes involved.
+
+define void @ldst_complex_offsets() {
+; CHECK: ldst_complex_offsets
+  %arr8_addr = load volatile i8** @arr8
+; CHECK: adrp {{x[0-9]+}}, arr8
+; CHECK: ldr {{x[0-9]+}}, [{{x[0-9]+}}, #:lo12:arr8]
+
+  %arr8_sub1_addr = getelementptr i8* %arr8_addr, i64 1
+  %arr8_sub1 = load volatile i8* %arr8_sub1_addr
+  store volatile i8 %arr8_sub1, i8* @var_8bit
+; CHECK: ldrb {{w[0-9]+}}, [{{x[0-9]+}}, #1]
+
+  %arr8_sub4095_addr = getelementptr i8* %arr8_addr, i64 4095
+  %arr8_sub4095 = load volatile i8* %arr8_sub4095_addr
+  store volatile i8 %arr8_sub4095, i8* @var_8bit
+; CHECK: ldrb {{w[0-9]+}}, [{{x[0-9]+}}, #4095]
+
+
+  %arr16_addr = load volatile i16** @arr16
+; CHECK: adrp {{x[0-9]+}}, arr16
+; CHECK: ldr {{x[0-9]+}}, [{{x[0-9]+}}, #:lo12:arr16]
+
+  %arr16_sub1_addr = getelementptr i16* %arr16_addr, i64 1
+  %arr16_sub1 = load volatile i16* %arr16_sub1_addr
+  store volatile i16 %arr16_sub1, i16* @var_16bit
+; CHECK: ldrh {{w[0-9]+}}, [{{x[0-9]+}}, #2]
+
+  %arr16_sub4095_addr = getelementptr i16* %arr16_addr, i64 4095
+  %arr16_sub4095 = load volatile i16* %arr16_sub4095_addr
+  store volatile i16 %arr16_sub4095, i16* @var_16bit
+; CHECK: ldrh {{w[0-9]+}}, [{{x[0-9]+}}, #8190]
+
+
+  %arr32_addr = load volatile i32** @arr32
+; CHECK: adrp {{x[0-9]+}}, arr32
+; CHECK: ldr {{x[0-9]+}}, [{{x[0-9]+}}, #:lo12:arr32]
+
+  %arr32_sub1_addr = getelementptr i32* %arr32_addr, i64 1
+  %arr32_sub1 = load volatile i32* %arr32_sub1_addr
+  store volatile i32 %arr32_sub1, i32* @var_32bit
+; CHECK: ldr {{w[0-9]+}}, [{{x[0-9]+}}, #4]
+
+  %arr32_sub4095_addr = getelementptr i32* %arr32_addr, i64 4095
+  %arr32_sub4095 = load volatile i32* %arr32_sub4095_addr
+  store volatile i32 %arr32_sub4095, i32* @var_32bit
+; CHECK: ldr {{w[0-9]+}}, [{{x[0-9]+}}, #16380]
+
+
+  %arr64_addr = load volatile i64** @arr64
+; CHECK: adrp {{x[0-9]+}}, arr64
+; CHECK: ldr {{x[0-9]+}}, [{{x[0-9]+}}, #:lo12:arr64]
+
+  %arr64_sub1_addr = getelementptr i64* %arr64_addr, i64 1
+  %arr64_sub1 = load volatile i64* %arr64_sub1_addr
+  store volatile i64 %arr64_sub1, i64* @var_64bit
+; CHECK: ldr {{x[0-9]+}}, [{{x[0-9]+}}, #8]
+
+  %arr64_sub4095_addr = getelementptr i64* %arr64_addr, i64 4095
+  %arr64_sub4095 = load volatile i64* %arr64_sub4095_addr
+  store volatile i64 %arr64_sub4095, i64* @var_64bit
+; CHECK: ldr {{x[0-9]+}}, [{{x[0-9]+}}, #32760]
+
+  ret void
+}
+
+define void @ldst_float() {
+; CHECK: ldst_float:
+
+   %valfp = load volatile float* @var_float
+; CHECK: adrp {{x[0-9]+}}, var_float
+; CHECK: ldr {{s[0-9]+}}, [{{x[0-9]+}}, #:lo12:var_float]
+
+  store volatile float %valfp, float* @var_float
+; CHECK: str {{s[0-9]+}}, [{{x[0-9]+}}, #:lo12:var_float]
+
+   ret void
+}
+
+define void @ldst_double() {
+; CHECK: ldst_double:
+
+   %valfp = load volatile double* @var_double
+; CHECK: adrp {{x[0-9]+}}, var_double
+; CHECK: ldr {{d[0-9]+}}, [{{x[0-9]+}}, #:lo12:var_double]
+
+  store volatile double %valfp, double* @var_double
+; CHECK: str {{d[0-9]+}}, [{{x[0-9]+}}, #:lo12:var_double]
+
+   ret void
+}
diff --git a/test/CodeGen/CellSPU/lit.local.cfg b/test/CodeGen/AArch64/lit.local.cfg
index ea00867701b2..c5ce2411ed48 100644
--- a/test/CodeGen/CellSPU/lit.local.cfg
+++ b/test/CodeGen/AArch64/lit.local.cfg
@@ -1,6 +1,6 @@
 config.suffixes = ['.ll', '.c', '.cpp']
 
 targets = set(config.root.targets_to_build.split())
-if not 'CellSPU' in targets:
+if not 'AArch64' in targets:
     config.unsupported = True
 
diff --git a/test/CodeGen/AArch64/literal_pools.ll b/test/CodeGen/AArch64/literal_pools.ll
new file mode 100644
index 000000000000..e09084148fdf
--- /dev/null
+++ b/test/CodeGen/AArch64/literal_pools.ll
@@ -0,0 +1,55 @@
+; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu | FileCheck %s
+
+@var32 = global i32 0
+@var64 = global i64 0
+
+define void @foo() {
+; CHECK: foo:
+    %val32 = load i32* @var32
+    %val64 = load i64* @var64
+
+    %val32_lit32 = and i32 %val32, 123456785
+    store volatile i32 %val32_lit32, i32* @var32
+; CHECK: adrp x[[LITBASE:[0-9]+]], [[CURLIT:.LCPI0_[0-9]+]]
+; CHECK: ldr {{w[0-9]+}}, [x[[LITBASE]], #:lo12:[[CURLIT]]]
+
+    %val64_lit32 = and i64 %val64, 305402420
+    store volatile i64 %val64_lit32, i64* @var64
+; CHECK: adrp x[[LITBASE:[0-9]+]], [[CURLIT:.LCPI0_[0-9]+]]
+; CHECK: ldr {{w[0-9]+}}, [x[[LITBASE]], #:lo12:[[CURLIT]]]
+
+    %val64_lit32signed = and i64 %val64, -12345678
+    store volatile i64 %val64_lit32signed, i64* @var64
+; CHECK: adrp x[[LITBASE:[0-9]+]], [[CURLIT:.LCPI0_[0-9]+]]
+; CHECK: ldrsw {{x[0-9]+}}, [x[[LITBASE]], #:lo12:[[CURLIT]]]
+
+    %val64_lit64 = and i64 %val64, 1234567898765432
+    store volatile i64 %val64_lit64, i64* @var64
+; CHECK: adrp x[[LITBASE:[0-9]+]], [[CURLIT:.LCPI0_[0-9]+]]
+; CHECK: ldr {{x[0-9]+}}, [x[[LITBASE]], #:lo12:[[CURLIT]]]
+
+    ret void
+}
+
+@varfloat = global float 0.0
+@vardouble = global double 0.0
+
+define void @floating_lits() {
+; CHECK: floating_lits:
+
+  %floatval = load float* @varfloat
+  %newfloat = fadd float %floatval, 128.0
+; CHECK: adrp x[[LITBASE:[0-9]+]], [[CURLIT:.LCPI1_[0-9]+]]
+; CHECK: ldr {{s[0-9]+}}, [x[[LITBASE]], #:lo12:[[CURLIT]]]
+; CHECK: fadd
+  store float %newfloat, float* @varfloat
+
+  %doubleval = load double* @vardouble
+  %newdouble = fadd double %doubleval, 129.0
+; CHECK: adrp x[[LITBASE:[0-9]+]], [[CURLIT:.LCPI1_[0-9]+]]
+; CHECK: ldr {{d[0-9]+}}, [x[[LITBASE]], #:lo12:[[CURLIT]]]
+; CHECK: fadd
+  store double %newdouble, double* @vardouble
+
+  ret void
+}
diff --git a/test/CodeGen/AArch64/local_vars.ll b/test/CodeGen/AArch64/local_vars.ll
new file mode 100644
index 000000000000..5cbf5a37ec54
--- /dev/null
+++ b/test/CodeGen/AArch64/local_vars.ll
@@ -0,0 +1,57 @@
+; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu -O0 | FileCheck %s
+; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu -O0 -disable-fp-elim | FileCheck -check-prefix CHECK-WITHFP %s
+
+; Make sure a reasonably sane prologue and epilogue are
+; generated. This test is not robust in the face of an frame-handling
+; evolving, but still has value for unrelated changes, I
+; believe.
+;
+; In particular, it will fail when ldp/stp are used for frame setup,
+; when FP-elim is implemented, and when addressing from FP is
+; implemented.
+
+@var = global i64 0
+@local_addr = global i64* null
+
+declare void @foo()
+
+define void @trivial_func() nounwind {
+; CHECK: trivial_func: // @trivial_func
+; CHECK-NEXT: // BB#0
+; CHECK-NEXT: ret
+
+  ret void
+}
+
+define void @trivial_fp_func() {
+; CHECK-WITHFP: trivial_fp_func:
+
+; CHECK-WITHFP: sub sp, sp, #16
+; CHECK-WITHFP: stp x29, x30, [sp]
+; CHECK-WITHFP-NEXT: mov x29, sp
+
+; Dont't really care, but it would be a Bad Thing if this came after the epilogue.
+; CHECK: bl foo
+  call void @foo()
+  ret void
+
+; CHECK-WITHFP: ldp x29, x30, [sp]
+; CHECK-WITHFP: add sp, sp, #16
+
+; CHECK-WITHFP: ret
+}
+
+define void @stack_local() {
+  %local_var = alloca i64
+; CHECK: stack_local:
+; CHECK: sub sp, sp, #16
+
+  %val = load i64* @var
+  store i64 %val, i64* %local_var
+; CHECK: str {{x[0-9]+}}, [sp, #{{[0-9]+}}]
+
+  store i64* %local_var, i64** @local_addr
+; CHECK: add {{x[0-9]+}}, sp, #{{[0-9]+}}
+
+  ret void
+}
diff --git a/test/CodeGen/AArch64/logical-imm.ll b/test/CodeGen/AArch64/logical-imm.ll
new file mode 100644
index 000000000000..5f3f4da0cdad
--- /dev/null
+++ b/test/CodeGen/AArch64/logical-imm.ll
@@ -0,0 +1,84 @@
+; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu | FileCheck %s
+
+@var32 = global i32 0
+@var64 = global i64 0
+
+define void @test_and(i32 %in32, i64 %in64) {
+; CHECK: test_and:
+
+  %val0 = and i32 %in32, 2863311530
+  store volatile i32 %val0, i32* @var32
+; CHECK: and {{w[0-9]+}}, {{w[0-9]+}}, #0xaaaaaaaa
+
+  %val1 = and i32 %in32, 4293984240
+  store volatile i32 %val1, i32* @var32
+; CHECK: and {{w[0-9]+}}, {{w[0-9]+}}, #0xfff0fff0
+
+  %val2 = and i64 %in64, 9331882296111890817
+  store volatile i64 %val2, i64* @var64
+; CHECK: and {{x[0-9]+}}, {{x[0-9]+}}, #0x8181818181818181
+
+  %val3 = and i64 %in64, 18429855317404942275
+  store volatile i64 %val3, i64* @var64
+; CHECK: and {{x[0-9]+}}, {{x[0-9]+}}, #0xffc3ffc3ffc3ffc3
+
+  ret void
+}
+
+define void @test_orr(i32 %in32, i64 %in64) {
+; CHECK: test_orr:
+
+  %val0 = or i32 %in32, 2863311530
+  store volatile i32 %val0, i32* @var32
+; CHECK: orr {{w[0-9]+}}, {{w[0-9]+}}, #0xaaaaaaaa
+
+  %val1 = or i32 %in32, 4293984240
+  store volatile i32 %val1, i32* @var32
+; CHECK: orr {{w[0-9]+}}, {{w[0-9]+}}, #0xfff0fff0
+
+  %val2 = or i64 %in64, 9331882296111890817
+  store volatile i64 %val2, i64* @var64
+; CHECK: orr {{x[0-9]+}}, {{x[0-9]+}}, #0x8181818181818181
+
+  %val3 = or i64 %in64, 18429855317404942275
+  store volatile i64 %val3, i64* @var64
+; CHECK: orr {{x[0-9]+}}, {{x[0-9]+}}, #0xffc3ffc3ffc3ffc3
+
+  ret void
+}
+
+define void @test_eor(i32 %in32, i64 %in64) {
+; CHECK: test_eor:
+
+  %val0 = xor i32 %in32, 2863311530
+  store volatile i32 %val0, i32* @var32
+; CHECK: eor {{w[0-9]+}}, {{w[0-9]+}}, #0xaaaaaaaa
+
+  %val1 = xor i32 %in32, 4293984240
+  store volatile i32 %val1, i32* @var32
+; CHECK: eor {{w[0-9]+}}, {{w[0-9]+}}, #0xfff0fff0
+
+  %val2 = xor i64 %in64, 9331882296111890817
+  store volatile i64 %val2, i64* @var64
+; CHECK: eor {{x[0-9]+}}, {{x[0-9]+}}, #0x8181818181818181
+
+  %val3 = xor i64 %in64, 18429855317404942275
+  store volatile i64 %val3, i64* @var64
+; CHECK: eor {{x[0-9]+}}, {{x[0-9]+}}, #0xffc3ffc3ffc3ffc3
+
+  ret void
+}
+
+define void @test_mov(i32 %in32, i64 %in64) {
+; CHECK: test_mov:
+  %val0 = add i32 %in32, 2863311530
+  store i32 %val0, i32* @var32
+; CHECK: orr {{w[0-9]+}}, wzr, #0xaaaaaaaa
+
+  %val1 = add i64 %in64, 11068046444225730969
+  store i64 %val1, i64* @var64
+; CHECK: orr {{x[0-9]+}}, xzr, #0x9999999999999999
+
+  ret void
+; CHECK: ret
+}
diff --git a/test/CodeGen/AArch64/logical_shifted_reg.ll b/test/CodeGen/AArch64/logical_shifted_reg.ll
new file mode 100644
index 000000000000..bbbfcc1b9118
--- /dev/null
+++ b/test/CodeGen/AArch64/logical_shifted_reg.ll
@@ -0,0 +1,224 @@
+; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu -O0 | FileCheck %s
+
+@var1_32 = global i32 0
+@var2_32 = global i32 0
+
+@var1_64 = global i64 0
+@var2_64 = global i64 0
+
+define void @logical_32bit() {
+; CHECK: logical_32bit:
+  %val1 = load i32* @var1_32
+  %val2 = load i32* @var2_32
+
+  ; First check basic and/bic/or/orn/eor/eon patterns with no shift
+  %neg_val2 = xor i32 -1, %val2
+
+  %and_noshift = and i32 %val1, %val2
+; CHECK: and {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
+  store volatile i32 %and_noshift, i32* @var1_32
+  %bic_noshift = and i32 %neg_val2, %val1
+; CHECK: bic {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
+  store volatile i32 %bic_noshift, i32* @var1_32
+
+  %or_noshift = or i32 %val1, %val2
+; CHECK: orr {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
+  store volatile i32 %or_noshift, i32* @var1_32
+  %orn_noshift = or i32 %neg_val2, %val1
+; CHECK: orn {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
+  store volatile i32 %orn_noshift, i32* @var1_32
+
+  %xor_noshift = xor i32 %val1, %val2
+; CHECK: eor {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
+  store volatile i32 %xor_noshift, i32* @var1_32
+  %xorn_noshift = xor i32 %neg_val2, %val1
+; CHECK: eon {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
+  store volatile i32 %xorn_noshift, i32* @var1_32
+
+  ; Check the maximum shift on each
+  %operand_lsl31 = shl i32 %val2, 31
+  %neg_operand_lsl31 = xor i32 -1, %operand_lsl31
+
+  %and_lsl31 = and i32 %val1, %operand_lsl31
+; CHECK: and {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, lsl #31
+  store volatile i32 %and_lsl31, i32* @var1_32
+  %bic_lsl31 = and i32 %val1, %neg_operand_lsl31
+; CHECK: bic {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, lsl #31
+  store volatile i32 %bic_lsl31, i32* @var1_32
+
+  %or_lsl31 = or i32 %val1, %operand_lsl31
+; CHECK: orr {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, lsl #31
+  store volatile i32 %or_lsl31, i32* @var1_32
+  %orn_lsl31 = or i32 %val1, %neg_operand_lsl31
+; CHECK: orn {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, lsl #31
+  store volatile i32 %orn_lsl31, i32* @var1_32
+
+  %xor_lsl31 = xor i32 %val1, %operand_lsl31
+; CHECK: eor {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, lsl #31
+  store volatile i32 %xor_lsl31, i32* @var1_32
+  %xorn_lsl31 = xor i32 %val1, %neg_operand_lsl31
+; CHECK: eon {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, lsl #31
+  store volatile i32 %xorn_lsl31, i32* @var1_32
+
+  ; Check other shifts on a subset
+  %operand_asr10 = ashr i32 %val2, 10
+  %neg_operand_asr10 = xor i32 -1, %operand_asr10
+
+  %bic_asr10 = and i32 %val1, %neg_operand_asr10
+; CHECK: bic {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, asr #10
+  store volatile i32 %bic_asr10, i32* @var1_32
+  %xor_asr10 = xor i32 %val1, %operand_asr10
+; CHECK: eor {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, asr #10
+  store volatile i32 %xor_asr10, i32* @var1_32
+
+  %operand_lsr1 = lshr i32 %val2, 1
+  %neg_operand_lsr1 = xor i32 -1, %operand_lsr1
+
+  %orn_lsr1 = or i32 %val1, %neg_operand_lsr1
+; CHECK: orn {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, lsr #1
+  store volatile i32 %orn_lsr1, i32* @var1_32
+  %xor_lsr1 = xor i32 %val1, %operand_lsr1
+; CHECK: eor {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, lsr #1
+  store volatile i32 %xor_lsr1, i32* @var1_32
+
+  %operand_ror20_big = shl i32 %val2, 12
+  %operand_ror20_small = lshr i32 %val2, 20
+  %operand_ror20 = or i32 %operand_ror20_big, %operand_ror20_small
+  %neg_operand_ror20 = xor i32 -1, %operand_ror20
+
+  %xorn_ror20 = xor i32 %val1, %neg_operand_ror20
+; CHECK: eon {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, ror #20
+  store volatile i32 %xorn_ror20, i32* @var1_32
+  %and_ror20 = and i32 %val1, %operand_ror20
+; CHECK: and {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, ror #20
+  store volatile i32 %and_ror20, i32* @var1_32
+
+  ret void
+}
+
+define void @logical_64bit() {
+; CHECK: logical_64bit:
+  %val1 = load i64* @var1_64
+  %val2 = load i64* @var2_64
+
+  ; First check basic and/bic/or/orn/eor/eon patterns with no shift
+  %neg_val2 = xor i64 -1, %val2
+
+  %and_noshift = and i64 %val1, %val2
+; CHECK: and {{x[0-9]+}}, {{x[0-9]+}}, {{x[0-9]+}}
+  store volatile i64 %and_noshift, i64* @var1_64
+  %bic_noshift = and i64 %neg_val2, %val1
+; CHECK: bic {{x[0-9]+}}, {{x[0-9]+}}, {{x[0-9]+}}
+  store volatile i64 %bic_noshift, i64* @var1_64
+
+  %or_noshift = or i64 %val1, %val2
+; CHECK: orr {{x[0-9]+}}, {{x[0-9]+}}, {{x[0-9]+}}
+  store volatile i64 %or_noshift, i64* @var1_64
+  %orn_noshift = or i64 %neg_val2, %val1
+; CHECK: orn {{x[0-9]+}}, {{x[0-9]+}}, {{x[0-9]+}}
+  store volatile i64 %orn_noshift, i64* @var1_64
+
+  %xor_noshift = xor i64 %val1, %val2
+; CHECK: eor {{x[0-9]+}}, {{x[0-9]+}}, {{x[0-9]+}}
+  store volatile i64 %xor_noshift, i64* @var1_64
+  %xorn_noshift = xor i64 %neg_val2, %val1
+; CHECK: eon {{x[0-9]+}}, {{x[0-9]+}}, {{x[0-9]+}}
+  store volatile i64 %xorn_noshift, i64* @var1_64
+
+  ; Check the maximum shift on each
+  %operand_lsl63 = shl i64 %val2, 63
+  %neg_operand_lsl63 = xor i64 -1, %operand_lsl63
+
+  %and_lsl63 = and i64 %val1, %operand_lsl63
+; CHECK: and {{x[0-9]+}}, {{x[0-9]+}}, {{x[0-9]+}}, lsl #63
+  store volatile i64 %and_lsl63, i64* @var1_64
+  %bic_lsl63 = and i64 %val1, %neg_operand_lsl63
+; CHECK: bic {{x[0-9]+}}, {{x[0-9]+}}, {{x[0-9]+}}, lsl #63
+  store volatile i64 %bic_lsl63, i64* @var1_64
+
+  %or_lsl63 = or i64 %val1, %operand_lsl63
+; CHECK: orr {{x[0-9]+}}, {{x[0-9]+}}, {{x[0-9]+}}, lsl #63
+  store volatile i64 %or_lsl63, i64* @var1_64
+  %orn_lsl63 = or i64 %val1, %neg_operand_lsl63
+; CHECK: orn {{x[0-9]+}}, {{x[0-9]+}}, {{x[0-9]+}}, lsl #63
+  store volatile i64 %orn_lsl63, i64* @var1_64
+
+  %xor_lsl63 = xor i64 %val1, %operand_lsl63
+; CHECK: eor {{x[0-9]+}}, {{x[0-9]+}}, {{x[0-9]+}}, lsl #63
+  store volatile i64 %xor_lsl63, i64* @var1_64
+  %xorn_lsl63 = xor i64 %val1, %neg_operand_lsl63
+; CHECK: eon {{x[0-9]+}}, {{x[0-9]+}}, {{x[0-9]+}}, lsl #63
+  store volatile i64 %xorn_lsl63, i64* @var1_64
+
+  ; Check other shifts on a subset
+  %operand_asr10 = ashr i64 %val2, 10
+  %neg_operand_asr10 = xor i64 -1, %operand_asr10
+
+  %bic_asr10 = and i64 %val1, %neg_operand_asr10
+; CHECK: bic {{x[0-9]+}}, {{x[0-9]+}}, {{x[0-9]+}}, asr #10
+  store volatile i64 %bic_asr10, i64* @var1_64
+  %xor_asr10 = xor i64 %val1, %operand_asr10
+; CHECK: eor {{x[0-9]+}}, {{x[0-9]+}}, {{x[0-9]+}}, asr #10
+  store volatile i64 %xor_asr10, i64* @var1_64
+
+  %operand_lsr1 = lshr i64 %val2, 1
+  %neg_operand_lsr1 = xor i64 -1, %operand_lsr1
+
+  %orn_lsr1 = or i64 %val1, %neg_operand_lsr1
+; CHECK: orn {{x[0-9]+}}, {{x[0-9]+}}, {{x[0-9]+}}, lsr #1
+  store volatile i64 %orn_lsr1, i64* @var1_64
+  %xor_lsr1 = xor i64 %val1, %operand_lsr1
+; CHECK: eor {{x[0-9]+}}, {{x[0-9]+}}, {{x[0-9]+}}, lsr #1
+  store volatile i64 %xor_lsr1, i64* @var1_64
+
+  ; Construct a rotate-right from a bunch of other logical
+  ; operations. DAGCombiner should ensure we the ROTR during
+  ; selection
+  %operand_ror20_big = shl i64 %val2, 44
+  %operand_ror20_small = lshr i64 %val2, 20
+  %operand_ror20 = or i64 %operand_ror20_big, %operand_ror20_small
+  %neg_operand_ror20 = xor i64 -1, %operand_ror20
+
+  %xorn_ror20 = xor i64 %val1, %neg_operand_ror20
+; CHECK: eon {{x[0-9]+}}, {{x[0-9]+}}, {{x[0-9]+}}, ror #20
+  store volatile i64 %xorn_ror20, i64* @var1_64
+  %and_ror20 = and i64 %val1, %operand_ror20
+; CHECK: and {{x[0-9]+}}, {{x[0-9]+}}, {{x[0-9]+}}, ror #20
+  store volatile i64 %and_ror20, i64* @var1_64
+
+  ret void
+}
+
+define void @flag_setting() {
+; CHECK: flag_setting:
+  %val1 = load i64* @var1_64
+  %val2 = load i64* @var2_64
+
+; CHECK: tst {{x[0-9]+}}, {{x[0-9]+}}
+; CHECK: b.gt .L
+  %simple_and = and i64 %val1, %val2
+  %tst1 = icmp sgt i64 %simple_and, 0
+  br i1 %tst1, label %ret, label %test2
+
+test2:
+; CHECK: tst {{x[0-9]+}}, {{x[0-9]+}}, lsl #63
+; CHECK: b.lt .L
+  %shifted_op = shl i64 %val2, 63
+  %shifted_and = and i64 %val1, %shifted_op
+  %tst2 = icmp slt i64 %shifted_and, 0
+  br i1 %tst2, label %ret, label %test3
+
+test3:
+; CHECK: tst {{x[0-9]+}}, {{x[0-9]+}}, asr #12
+; CHECK: b.gt .L
+  %asr_op = ashr i64 %val2, 12
+  %asr_and = and i64 %asr_op, %val1
+  %tst3 = icmp sgt i64 %asr_and, 0
+  br i1 %tst3, label %ret, label %other_exit
+
+other_exit:
+  store volatile i64 %val1, i64* @var1_64
+  ret void
+ret:
+  ret void
+}
diff --git a/test/CodeGen/AArch64/logical_shifted_reg.s b/test/CodeGen/AArch64/logical_shifted_reg.s
new file mode 100644
index 000000000000..89aea580119b
--- /dev/null
+++ b/test/CodeGen/AArch64/logical_shifted_reg.s
@@ -0,0 +1,208 @@
+	.file	"/home/timnor01/a64-trunk/llvm/test/CodeGen/AArch64/logical_shifted_reg.ll"
+	.text
+	.globl	logical_32bit
+	.type	logical_32bit,@function
+logical_32bit:                          // @logical_32bit
+	.cfi_startproc
+// BB#0:
+	adrp	x0, var1_32
+	ldr	w1, [x0, #:lo12:var1_32]
+	adrp	x0, var2_32
+	ldr	w2, [x0, #:lo12:var2_32]
+	and	w3, w1, w2
+	adrp	x0, var1_32
+	str	w3, [x0, #:lo12:var1_32]
+	bic	w3, w1, w2
+	adrp	x0, var1_32
+	str	w3, [x0, #:lo12:var1_32]
+	orr	w3, w1, w2
+	adrp	x0, var1_32
+	str	w3, [x0, #:lo12:var1_32]
+	orn	w3, w1, w2
+	adrp	x0, var1_32
+	str	w3, [x0, #:lo12:var1_32]
+	eor	w3, w1, w2
+	adrp	x0, var1_32
+	str	w3, [x0, #:lo12:var1_32]
+	eon	w3, w2, w1
+	adrp	x0, var1_32
+	str	w3, [x0, #:lo12:var1_32]
+	and	w3, w1, w2, lsl #31
+	adrp	x0, var1_32
+	str	w3, [x0, #:lo12:var1_32]
+	bic	w3, w1, w2, lsl #31
+	adrp	x0, var1_32
+	str	w3, [x0, #:lo12:var1_32]
+	orr	w3, w1, w2, lsl #31
+	adrp	x0, var1_32
+	str	w3, [x0, #:lo12:var1_32]
+	orn	w3, w1, w2, lsl #31
+	adrp	x0, var1_32
+	str	w3, [x0, #:lo12:var1_32]
+	eor	w3, w1, w2, lsl #31
+	adrp	x0, var1_32
+	str	w3, [x0, #:lo12:var1_32]
+	eon	w3, w1, w2, lsl #31
+	adrp	x0, var1_32
+	str	w3, [x0, #:lo12:var1_32]
+	bic	w3, w1, w2, asr #10
+	adrp	x0, var1_32
+	str	w3, [x0, #:lo12:var1_32]
+	eor	w3, w1, w2, asr #10
+	adrp	x0, var1_32
+	str	w3, [x0, #:lo12:var1_32]
+	orn	w3, w1, w2, lsr #1
+	adrp	x0, var1_32
+	str	w3, [x0, #:lo12:var1_32]
+	eor	w3, w1, w2, lsr #1
+	adrp	x0, var1_32
+	str	w3, [x0, #:lo12:var1_32]
+	eon	w3, w1, w2, ror #20
+	adrp	x0, var1_32
+	str	w3, [x0, #:lo12:var1_32]
+	and	w1, w1, w2, ror #20
+	adrp	x0, var1_32
+	str	w1, [x0, #:lo12:var1_32]
+	ret
+.Ltmp0:
+	.size	logical_32bit, .Ltmp0-logical_32bit
+	.cfi_endproc
+
+	.globl	logical_64bit
+	.type	logical_64bit,@function
+logical_64bit:                          // @logical_64bit
+	.cfi_startproc
+// BB#0:
+	adrp	x0, var1_64
+	ldr	x0, [x0, #:lo12:var1_64]
+	adrp	x1, var2_64
+	ldr	x1, [x1, #:lo12:var2_64]
+	and	x2, x0, x1
+	adrp	x3, var1_64
+	str	x2, [x3, #:lo12:var1_64]
+	bic	x2, x0, x1
+	adrp	x3, var1_64
+	str	x2, [x3, #:lo12:var1_64]
+	orr	x2, x0, x1
+	adrp	x3, var1_64
+	str	x2, [x3, #:lo12:var1_64]
+	orn	x2, x0, x1
+	adrp	x3, var1_64
+	str	x2, [x3, #:lo12:var1_64]
+	eor	x2, x0, x1
+	adrp	x3, var1_64
+	str	x2, [x3, #:lo12:var1_64]
+	eon	x2, x1, x0
+	adrp	x3, var1_64
+	str	x2, [x3, #:lo12:var1_64]
+	and	x2, x0, x1, lsl #63
+	adrp	x3, var1_64
+	str	x2, [x3, #:lo12:var1_64]
+	bic	x2, x0, x1, lsl #63
+	adrp	x3, var1_64
+	str	x2, [x3, #:lo12:var1_64]
+	orr	x2, x0, x1, lsl #63
+	adrp	x3, var1_64
+	str	x2, [x3, #:lo12:var1_64]
+	orn	x2, x0, x1, lsl #63
+	adrp	x3, var1_64
+	str	x2, [x3, #:lo12:var1_64]
+	eor	x2, x0, x1, lsl #63
+	adrp	x3, var1_64
+	str	x2, [x3, #:lo12:var1_64]
+	eon	x2, x0, x1, lsl #63
+	adrp	x3, var1_64
+	str	x2, [x3, #:lo12:var1_64]
+	bic	x2, x0, x1, asr #10
+	adrp	x3, var1_64
+	str	x2, [x3, #:lo12:var1_64]
+	eor	x2, x0, x1, asr #10
+	adrp	x3, var1_64
+	str	x2, [x3, #:lo12:var1_64]
+	orn	x2, x0, x1, lsr #1
+	adrp	x3, var1_64
+	str	x2, [x3, #:lo12:var1_64]
+	eor	x2, x0, x1, lsr #1
+	adrp	x3, var1_64
+	str	x2, [x3, #:lo12:var1_64]
+	eon	x2, x0, x1, ror #20
+	adrp	x3, var1_64
+	str	x2, [x3, #:lo12:var1_64]
+	and	x0, x0, x1, ror #20
+	adrp	x1, var1_64
+	str	x0, [x1, #:lo12:var1_64]
+	ret
+.Ltmp1:
+	.size	logical_64bit, .Ltmp1-logical_64bit
+	.cfi_endproc
+
+	.globl	flag_setting
+	.type	flag_setting,@function
+flag_setting:                           // @flag_setting
+	.cfi_startproc
+// BB#0:
+	sub	sp, sp, #16
+	adrp	x0, var1_64
+	ldr	x0, [x0, #:lo12:var1_64]
+	adrp	x1, var2_64
+	ldr	x1, [x1, #:lo12:var2_64]
+	tst	x0, x1
+	str	x0, [sp, #8]            // 8-byte Folded Spill
+	str	x1, [sp]                // 8-byte Folded Spill
+	b.gt .LBB2_4
+	b	.LBB2_1
+.LBB2_1:                                // %test2
+	ldr	x0, [sp, #8]            // 8-byte Folded Reload
+	ldr	x1, [sp]                // 8-byte Folded Reload
+	tst	x0, x1, lsl #63
+	b.lt .LBB2_4
+	b	.LBB2_2
+.LBB2_2:                                // %test3
+	ldr	x0, [sp, #8]            // 8-byte Folded Reload
+	ldr	x1, [sp]                // 8-byte Folded Reload
+	tst	x0, x1, asr #12
+	b.gt .LBB2_4
+	b	.LBB2_3
+.LBB2_3:                                // %other_exit
+	adrp	x0, var1_64
+	ldr	x1, [sp, #8]            // 8-byte Folded Reload
+	str	x1, [x0, #:lo12:var1_64]
+	add	sp, sp, #16
+	ret
+.LBB2_4:                                // %ret
+	add	sp, sp, #16
+	ret
+.Ltmp2:
+	.size	flag_setting, .Ltmp2-flag_setting
+	.cfi_endproc
+
+	.type	var1_32,@object         // @var1_32
+	.bss
+	.globl	var1_32
+	.align	2
+var1_32:
+	.word	0                       // 0x0
+	.size	var1_32, 4
+
+	.type	var2_32,@object         // @var2_32
+	.globl	var2_32
+	.align	2
+var2_32:
+	.word	0                       // 0x0
+	.size	var2_32, 4
+
+	.type	var1_64,@object         // @var1_64
+	.globl	var1_64
+	.align	3
+var1_64:
+	.xword	0                       // 0x0
+	.size	var1_64, 8
+
+	.type	var2_64,@object         // @var2_64
+	.globl	var2_64
+	.align	3
+var2_64:
+	.xword	0                       // 0x0
+	.size	var2_64, 8
+
+
diff --git a/test/CodeGen/AArch64/movw-consts.ll b/test/CodeGen/AArch64/movw-consts.ll
new file mode 100644
index 000000000000..b8a5fb932202
--- /dev/null
+++ b/test/CodeGen/AArch64/movw-consts.ll
@@ -0,0 +1,124 @@
+; RUN: llc -verify-machineinstrs -O0 < %s -mtriple=aarch64-none-linux-gnu | FileCheck %s
+
+define i64 @test0() {
+; CHECK: test0:
+; Not produced by move wide instructions, but good to make sure we can return 0 anyway:
+; CHECK: mov x0, xzr
+  ret i64 0
+}
+
+define i64 @test1() {
+; CHECK: test1:
+; CHECK: movz x0, #1
+  ret i64 1
+}
+
+define i64 @test2() {
+; CHECK: test2:
+; CHECK: movz x0, #65535
+  ret i64 65535
+}
+
+define i64 @test3() {
+; CHECK: test3:
+; CHECK: movz x0, #1, lsl #16
+  ret i64 65536
+}
+
+define i64 @test4() {
+; CHECK: test4:
+; CHECK: movz x0, #65535, lsl #16
+  ret i64 4294901760
+}
+
+define i64 @test5() {
+; CHECK: test5:
+; CHECK: movz x0, #1, lsl #32
+  ret i64 4294967296
+}
+
+define i64 @test6() {
+; CHECK: test6:
+; CHECK: movz x0, #65535, lsl #32
+  ret i64 281470681743360
+}
+
+define i64 @test7() {
+; CHECK: test7:
+; CHECK: movz x0, #1, lsl #48
+  ret i64 281474976710656
+}
+
+; A 32-bit MOVN can generate some 64-bit patterns that a 64-bit one
+; couldn't. Useful even for i64
+define i64 @test8() {
+; CHECK: test8:
+; CHECK: movn w0, #60875
+  ret i64 4294906420
+}
+
+define i64 @test9() {
+; CHECK: test9:
+; CHECK: movn x0, #0
+  ret i64 -1
+}
+
+define i64 @test10() {
+; CHECK: test10:
+; CHECK: movn x0, #60875, lsl #16
+  ret i64 18446744069720047615
+}
+
+; For reasonably legitimate reasons returning an i32 results in the
+; selection of an i64 constant, so we need a different idiom to test that selection
+@var32 = global i32 0
+
+define void @test11() {
+; CHECK: test11:
+; CHECK: mov {{w[0-9]+}}, wzr
+  store i32 0, i32* @var32
+  ret void
+}
+
+define void @test12() {
+; CHECK: test12:
+; CHECK: movz {{w[0-9]+}}, #1
+  store i32 1, i32* @var32
+  ret void
+}
+
+define void @test13() {
+; CHECK: test13:
+; CHECK: movz {{w[0-9]+}}, #65535
+  store i32 65535, i32* @var32
+  ret void
+}
+
+define void @test14() {
+; CHECK: test14:
+; CHECK: movz {{w[0-9]+}}, #1, lsl #16
+  store i32 65536, i32* @var32
+  ret void
+}
+
+define void @test15() {
+; CHECK: test15:
+; CHECK: movz {{w[0-9]+}}, #65535, lsl #16
+  store i32 4294901760, i32* @var32
+  ret void
+}
+
+define void @test16() {
+; CHECK: test16:
+; CHECK: movn {{w[0-9]+}}, #0
+  store i32 -1, i32* @var32
+  ret void
+}
+
+define i64 @test17() {
+; CHECK: test17:
+
+  ; Mustn't MOVN w0 here.
+; CHECK: movn x0, #2
+  ret i64 -3
+}
diff --git a/test/CodeGen/AArch64/pic-eh-stubs.ll b/test/CodeGen/AArch64/pic-eh-stubs.ll
new file mode 100644
index 000000000000..77bf691cbcbd
--- /dev/null
+++ b/test/CodeGen/AArch64/pic-eh-stubs.ll
@@ -0,0 +1,60 @@
+; RUN: llc -mtriple=aarch64-none-linux-gnu -relocation-model=pic -o - %s | FileCheck %s
+
+; Make sure exception-handling PIC code can be linked correctly. An alternative
+; to the sequence described below would have .gcc_except_table itself writable
+; and not use the indirection, but this isn't what LLVM does right now.
+
+  ; There should be a read-only .gcc_except_table section...
+; CHECK: .section .gcc_except_table,"a"
+
+  ; ... referring indirectly to stubs for its typeinfo ...
+; CHECK: // @TType Encoding = indirect pcrel sdata8
+  ; ... one of which is "int"'s typeinfo
+; CHECK: .Ltmp9:
+; CHECK-NEXT: .xword  .L_ZTIi.DW.stub-.Ltmp9
+
+  ; .. and which is properly defined (in a writable section for the dynamic loader) later.
+; CHECK: .section .data.rel,"aw"
+; CHECK: .L_ZTIi.DW.stub:
+; CHECK-NEXT: .xword _ZTIi
+
+@_ZTIi = external constant i8*
+
+define i32 @_Z3barv() {
+entry:
+  invoke void @_Z3foov()
+          to label %return unwind label %lpad
+
+lpad:                                             ; preds = %entry
+  %0 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*)
+          catch i8* bitcast (i8** @_ZTIi to i8*)
+  %1 = extractvalue { i8*, i32 } %0, 1
+  %2 = tail call i32 @llvm.eh.typeid.for(i8* bitcast (i8** @_ZTIi to i8*)) nounwind
+  %matches = icmp eq i32 %1, %2
+  br i1 %matches, label %catch, label %eh.resume
+
+catch:                                            ; preds = %lpad
+  %3 = extractvalue { i8*, i32 } %0, 0
+  %4 = tail call i8* @__cxa_begin_catch(i8* %3) nounwind
+  %5 = bitcast i8* %4 to i32*
+  %exn.scalar = load i32* %5, align 4
+  tail call void @__cxa_end_catch() nounwind
+  br label %return
+
+return:                                           ; preds = %entry, %catch
+  %retval.0 = phi i32 [ %exn.scalar, %catch ], [ 42, %entry ]
+  ret i32 %retval.0
+
+eh.resume:                                        ; preds = %lpad
+  resume { i8*, i32 } %0
+}
+
+declare void @_Z3foov()
+
+declare i32 @__gxx_personality_v0(...)
+
+declare i32 @llvm.eh.typeid.for(i8*) nounwind readnone
+
+declare i8* @__cxa_begin_catch(i8*)
+
+declare void @__cxa_end_catch()
+\ No newline at end of file
diff --git a/test/CodeGen/AArch64/regress-bitcast-formals.ll b/test/CodeGen/AArch64/regress-bitcast-formals.ll
new file mode 100644
index 000000000000..28dc9a7e2515
--- /dev/null
+++ b/test/CodeGen/AArch64/regress-bitcast-formals.ll
@@ -0,0 +1,11 @@
+; RUN: llc -mtriple=aarch64-none-linux-gnu -verify-machineinstrs < %s | FileCheck %s
+
+; CallingConv.td requires a bitcast for vector arguments. Make sure we're
+; actually capable of that (the test was omitted from LowerFormalArguments).
+
+define void @test_bitcast_lower(<2 x i32> %a) {
+; CHECK: test_bitcast_lower:
+
+  ret void
+; CHECK: ret
+}
diff --git a/test/CodeGen/AArch64/regress-f128csel-flags.ll b/test/CodeGen/AArch64/regress-f128csel-flags.ll
new file mode 100644
index 000000000000..b35185ccd6f3
--- /dev/null
+++ b/test/CodeGen/AArch64/regress-f128csel-flags.ll
@@ -0,0 +1,27 @@
+; RUN: llc -mtriple=aarch64-none-linux-gnu -verify-machineinstrs < %s | FileCheck %s
+
+; We used to not mark NZCV as being used in the continuation basic-block
+; when lowering a 128-bit "select" to branches. This meant a subsequent use
+; of the same flags gave an internal fault here.
+
+declare void @foo(fp128)
+
+define double @test_f128csel_flags(i32 %lhs, fp128 %a, fp128 %b) nounwind {
+; CHECK: test_f128csel_flags
+
+    %tst = icmp ne i32 %lhs, 42
+    %val = select i1 %tst, fp128 %a, fp128 %b
+; CHECK: cmp w0, #42
+; CHECK: b.eq .LBB0
+
+    call void @foo(fp128 %val)
+    %retval = select i1 %tst, double 4.0, double 5.0
+
+    ; It's also reasonably important that the actual fcsel comes before the
+    ; function call since bl may corrupt NZCV. We were doing the right thing anyway,
+    ; but just as well test it while we're here.
+; CHECK: fcsel {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}}, ne
+; CHECK: bl foo
+
+    ret double %retval
+}
diff --git a/test/CodeGen/AArch64/regress-tail-livereg.ll b/test/CodeGen/AArch64/regress-tail-livereg.ll
new file mode 100644
index 000000000000..8d5485cae4c8
--- /dev/null
+++ b/test/CodeGen/AArch64/regress-tail-livereg.ll
@@ -0,0 +1,19 @@
+; RUN: llc -verify-machineinstrs -mtriple=aarch64-none-linux-gnu < %s | FileCheck %s
+@var = global void()* zeroinitializer
+
+declare void @bar()
+
+define void @foo() {
+; CHECK: foo:
+       %func = load void()** @var
+
+       ; Calling a function encourages @foo to use a callee-saved register,
+       ; which makes it a natural choice for the tail call itself. But we don't
+       ; want that: the final "br xN" has to use a temporary or argument
+       ; register.
+       call void @bar()
+
+       tail call void %func()
+; CHECK: br {{x([0-79]|1[0-8])}}
+       ret void
+}
+\ No newline at end of file
diff --git a/test/CodeGen/AArch64/regress-tblgen-chains.ll b/test/CodeGen/AArch64/regress-tblgen-chains.ll
new file mode 100644
index 000000000000..e54552fd8edf
--- /dev/null
+++ b/test/CodeGen/AArch64/regress-tblgen-chains.ll
@@ -0,0 +1,36 @@
+; RUN: llc -verify-machineinstrs -mtriple=aarch64-none-linux-gnu < %s | FileCheck %s
+
+; When generating DAG selection tables, TableGen used to only flag an
+; instruction as needing a chain on its own account if it had a built-in pattern
+; which used the chain. This meant that the AArch64 load/stores weren't
+; recognised and so both loads from %locvar below were coalesced into a single
+; LS8_LDR instruction (same operands other than the non-existent chain) and the
+; increment was lost at return.
+
+; This was obviously a Bad Thing.
+
+declare void @bar(i8*)
+
+define i64 @test_chains() {
+; CHECK: test_chains:
+
+  %locvar = alloca i8
+
+  call void @bar(i8* %locvar)
+; CHECK: bl bar
+
+  %inc.1 = load i8* %locvar
+  %inc.2 = zext i8 %inc.1 to i64
+  %inc.3 = add i64 %inc.2, 1
+  %inc.4 = trunc i64 %inc.3 to i8
+  store i8 %inc.4, i8* %locvar
+; CHECK: ldrb {{w[0-9]+}}, [sp, [[LOCADDR:#[0-9]+]]]
+; CHECK: add {{x[0-9]+}}, {{x[0-9]+}}, #1
+; CHECK: strb {{w[0-9]+}}, [sp, [[LOCADDR]]]
+; CHECK: ldrb {{w[0-9]+}}, [sp, [[LOCADDR]]]
+
+  %ret.1 = load i8* %locvar
+  %ret.2 = zext i8 %ret.1 to i64
+  ret i64 %ret.2
+; CHECK: ret
+}
diff --git a/test/CodeGen/AArch64/regress-w29-reserved-with-fp.ll b/test/CodeGen/AArch64/regress-w29-reserved-with-fp.ll
new file mode 100644
index 000000000000..980e2ffef901
--- /dev/null
+++ b/test/CodeGen/AArch64/regress-w29-reserved-with-fp.ll
@@ -0,0 +1,37 @@
+; RUN: llc -mtriple=aarch64-none-linux-gnu -disable-fp-elim < %s | FileCheck %s
+@var = global i32 0
+
+declare void @bar()
+
+define void @test_w29_reserved() {
+; CHECK: test_w29_reserved:
+; CHECK: add x29, sp, #{{[0-9]+}}
+
+  %val1 = load volatile i32* @var
+  %val2 = load volatile i32* @var
+  %val3 = load volatile i32* @var
+  %val4 = load volatile i32* @var
+  %val5 = load volatile i32* @var
+  %val6 = load volatile i32* @var
+  %val7 = load volatile i32* @var
+  %val8 = load volatile i32* @var
+  %val9 = load volatile i32* @var
+
+; CHECK-NOT: ldr w29,
+
+  ; Call to prevent fp-elim that occurs regardless in leaf functions.
+  call void @bar()
+
+  store volatile i32 %val1,  i32* @var
+  store volatile i32 %val2,  i32* @var
+  store volatile i32 %val3,  i32* @var
+  store volatile i32 %val4,  i32* @var
+  store volatile i32 %val5,  i32* @var
+  store volatile i32 %val6,  i32* @var
+  store volatile i32 %val7,  i32* @var
+  store volatile i32 %val8,  i32* @var
+  store volatile i32 %val9,  i32* @var
+
+  ret void
+; CHECK: ret
+}
diff --git a/test/CodeGen/AArch64/regress-wzr-allocatable.ll b/test/CodeGen/AArch64/regress-wzr-allocatable.ll
new file mode 100644
index 000000000000..764d2bc44f0d
--- /dev/null
+++ b/test/CodeGen/AArch64/regress-wzr-allocatable.ll
@@ -0,0 +1,41 @@
+; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu -O0
+
+; When WZR wasn't marked as reserved, this function tried to allocate
+; it at O0 and then generated an internal fault (mostly incidentally)
+; when it discovered that it was already in use for a multiplication.
+
+; I'm not really convinced this is a good test since it could easily
+; stop testing what it does now with no-one any the wiser. However, I
+; can't think of a better way to force the allocator to use WZR
+; specifically.
+
+define void @test() nounwind {
+entry:
+  br label %for.cond
+
+for.cond:                                         ; preds = %for.body, %entry
+  br i1 undef, label %for.body, label %for.end
+
+for.body:                                         ; preds = %for.cond
+  br label %for.cond
+
+for.end:                                          ; preds = %for.cond
+  br label %for.cond6
+
+for.cond6:                                        ; preds = %for.body9, %for.end
+  br i1 undef, label %for.body9, label %while.cond30
+
+for.body9:                                        ; preds = %for.cond6
+  store i16 0, i16* undef, align 2
+  %0 = load i32* undef, align 4
+  %1 = load i32* undef, align 4
+  %mul15 = mul i32 %0, %1
+  %add16 = add i32 %mul15, 32768
+  %div = udiv i32 %add16, 65535
+  %add17 = add i32 %div, 1
+  store i32 %add17, i32* undef, align 4
+  br label %for.cond6
+
+while.cond30:                                     ; preds = %for.cond6
+  ret void
+}
diff --git a/test/CodeGen/AArch64/setcc-takes-i32.ll b/test/CodeGen/AArch64/setcc-takes-i32.ll
new file mode 100644
index 000000000000..d2eb77ab1b54
--- /dev/null
+++ b/test/CodeGen/AArch64/setcc-takes-i32.ll
@@ -0,0 +1,22 @@
+; RUN: llc -verify-machineinstrs -mtriple=aarch64-none-linux-gnu < %s | FileCheck %s
+
+; Most important point here is that the promotion of the i1 works
+; correctly. Previously LLVM thought that i64 was the appropriate SetCC output,
+; which meant it proceded in two steps and produced an i64 -> i64 any_ext which
+; couldn't be selected and faulted.
+
+; It was expecting the smallest legal promotion of i1 to be the preferred SetCC
+; type, so we'll satisfy it (this actually arguably gives better code anyway,
+; with flag-manipulation operations allowed to use W-registers).
+
+declare {i64, i1} @llvm.umul.with.overflow.i64(i64, i64)
+
+define i64 @test_select(i64 %lhs, i64 %rhs) {
+; CHECK: test_select:
+
+  %res = call {i64, i1} @llvm.umul.with.overflow.i64(i64 %lhs, i64 %rhs)
+  %flag = extractvalue {i64, i1} %res, 1
+  %retval = select i1 %flag, i64 %lhs, i64 %rhs
+  ret i64 %retval
+; CHECK: ret
+}
+\ No newline at end of file
diff --git a/test/CodeGen/AArch64/sibling-call.ll b/test/CodeGen/AArch64/sibling-call.ll
new file mode 100644
index 000000000000..a1ec618b03ba
--- /dev/null
+++ b/test/CodeGen/AArch64/sibling-call.ll
@@ -0,0 +1,97 @@
+; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu | FileCheck %s
+
+declare void @callee_stack0()
+declare void @callee_stack8([8 x i32], i64)
+declare void @callee_stack16([8 x i32], i64, i64)
+
+define void @caller_to0_from0() nounwind {
+; CHECK: caller_to0_from0:
+; CHECK-NEXT: // BB
+  tail call void @callee_stack0()
+  ret void
+; CHECK-NEXT: b callee_stack0
+}
+
+define void @caller_to0_from8([8 x i32], i64) nounwind{
+; CHECK: caller_to0_from8:
+; CHECK-NEXT: // BB
+
+  tail call void @callee_stack0()
+  ret void
+; CHECK-NEXT: b callee_stack0
+}
+
+define void @caller_to8_from0() {
+; CHECK: caller_to8_from0:
+
+; Caller isn't going to clean up any extra stack we allocate, so it
+; can't be a tail call.
+  tail call void @callee_stack8([8 x i32] undef, i64 42)
+  ret void
+; CHECK: bl callee_stack8
+}
+
+define void @caller_to8_from8([8 x i32], i64 %a) {
+; CHECK: caller_to8_from8:
+; CHECK-NOT: sub sp, sp,
+
+; This should reuse our stack area for the 42
+  tail call void @callee_stack8([8 x i32] undef, i64 42)
+  ret void
+; CHECK: str {{x[0-9]+}}, [sp]
+; CHECK-NEXT: b callee_stack8
+}
+
+define void @caller_to16_from8([8 x i32], i64 %a) {
+; CHECK: caller_to16_from8:
+
+; Shouldn't be a tail call: we can't use SP+8 because our caller might
+; have something there. This may sound obvious but implementation does
+; some funky aligning.
+  tail call void @callee_stack16([8 x i32] undef, i64 undef, i64 undef)
+; CHECK: bl callee_stack16
+  ret void
+}
+
+define void @caller_to8_from24([8 x i32], i64 %a, i64 %b, i64 %c) {
+; CHECK: caller_to8_from24:
+; CHECK-NOT: sub sp, sp
+
+; Reuse our area, putting "42" at incoming sp
+  tail call void @callee_stack8([8 x i32] undef, i64 42)
+  ret void
+; CHECK: str {{x[0-9]+}}, [sp]
+; CHECK-NEXT: b callee_stack8
+}
+
+define void @caller_to16_from16([8 x i32], i64 %a, i64 %b) {
+; CHECK: caller_to16_from16:
+; CHECK-NOT: sub sp, sp,
+
+; Here we want to make sure that both loads happen before the stores:
+; otherwise either %a or %b will be wrongly clobbered.
+  tail call void @callee_stack16([8 x i32] undef, i64 %b, i64 %a)
+  ret void
+
+; CHECK: ldr x0,
+; CHECK: ldr x1,
+; CHECK: str x1,
+; CHECK: str x0,
+
+; CHECK-NOT: add sp, sp,
+; CHECK: b callee_stack16
+}
+
+@func = global void(i32)* null
+
+define void @indirect_tail() {
+; CHECK: indirect_tail:
+; CHECK-NOT: sub sp, sp
+
+  %fptr = load void(i32)** @func
+  tail call void %fptr(i32 42)
+  ret void
+; CHECK: movz w0, #42
+; CHECK: ldr [[FPTR:x[1-9]+]], [{{x[0-9]+}}, #:lo12:func]
+; CHECK: br [[FPTR]]
+}
+\ No newline at end of file
diff --git a/test/CodeGen/AArch64/sincos-expansion.ll b/test/CodeGen/AArch64/sincos-expansion.ll
new file mode 100644
index 000000000000..c7a392b78c24
--- /dev/null
+++ b/test/CodeGen/AArch64/sincos-expansion.ll
@@ -0,0 +1,35 @@
+; RUN: llc -mtriple=aarch64-none-linux-gnu -verify-machineinstrs < %s | FileCheck %s
+
+define float @test_sincos_f32(float %f) {
+  %sin = call float @sinf(float %f) readnone
+  %cos = call float @cosf(float %f) readnone
+; CHECK: bl cosf
+; CHECK: bl sinf
+  %val = fadd float %sin, %cos
+  ret float %val
+}
+
+define double @test_sincos_f64(double %f) {
+  %sin = call double @sin(double %f) readnone
+  %cos = call double @cos(double %f) readnone
+  %val = fadd double %sin, %cos
+; CHECK: bl cos
+; CHECK: bl sin
+  ret double %val
+}
+
+define fp128 @test_sincos_f128(fp128 %f) {
+  %sin = call fp128 @sinl(fp128 %f) readnone
+  %cos = call fp128 @cosl(fp128 %f) readnone
+  %val = fadd fp128 %sin, %cos
+; CHECK: bl cosl
+; CHECK: bl sinl
+  ret fp128 %val
+}
+
+declare float  @sinf(float) readonly
+declare double @sin(double) readonly
+declare fp128 @sinl(fp128) readonly
+declare float @cosf(float) readonly
+declare double @cos(double) readonly
+declare fp128 @cosl(fp128) readonly
+\ No newline at end of file
diff --git a/test/CodeGen/AArch64/tail-call.ll b/test/CodeGen/AArch64/tail-call.ll
new file mode 100644
index 000000000000..f323b151ad1e
--- /dev/null
+++ b/test/CodeGen/AArch64/tail-call.ll
@@ -0,0 +1,94 @@
+; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu -tailcallopt | FileCheck %s
+
+declare fastcc void @callee_stack0()
+declare fastcc void @callee_stack8([8 x i32], i64)
+declare fastcc void @callee_stack16([8 x i32], i64, i64)
+
+define fastcc void @caller_to0_from0() nounwind {
+; CHECK: caller_to0_from0:
+; CHECK-NEXT: // BB
+  tail call fastcc void @callee_stack0()
+  ret void
+; CHECK-NEXT: b callee_stack0
+}
+
+define fastcc void @caller_to0_from8([8 x i32], i64) {
+; CHECK: caller_to0_from8:
+
+  tail call fastcc void @callee_stack0()
+  ret void
+; CHECK: add sp, sp, #16
+; CHECK-NEXT: b callee_stack0
+}
+
+define fastcc void @caller_to8_from0() {
+; CHECK: caller_to8_from0:
+; CHECK: sub sp, sp, #32
+
+; Key point is that the "42" should go #16 below incoming stack
+; pointer (we didn't have arg space to reuse).
+  tail call fastcc void @callee_stack8([8 x i32] undef, i64 42)
+  ret void
+; CHECK: str {{x[0-9]+}}, [sp, #16]
+; CHECK-NEXT: add sp, sp, #16
+; CHECK-NEXT: b callee_stack8
+}
+
+define fastcc void @caller_to8_from8([8 x i32], i64 %a) {
+; CHECK: caller_to8_from8:
+; CHECK: sub sp, sp, #16
+
+; Key point is that the "%a" should go where at SP on entry.
+  tail call fastcc void @callee_stack8([8 x i32] undef, i64 42)
+  ret void
+; CHECK: str {{x[0-9]+}}, [sp, #16]
+; CHECK-NEXT: add sp, sp, #16
+; CHECK-NEXT: b callee_stack8
+}
+
+define fastcc void @caller_to16_from8([8 x i32], i64 %a) {
+; CHECK: caller_to16_from8:
+; CHECK: sub sp, sp, #16
+
+; Important point is that the call reuses the "dead" argument space
+; above %a on the stack. If it tries to go below incoming-SP then the
+; callee will not deallocate the space, even in fastcc.
+  tail call fastcc void @callee_stack16([8 x i32] undef, i64 42, i64 2)
+; CHECK: str {{x[0-9]+}}, [sp, #24]
+; CHECK: str {{x[0-9]+}}, [sp, #16]
+; CHECK: add sp, sp, #16
+; CHECK: b callee_stack16
+  ret void
+}
+
+
+define fastcc void @caller_to8_from24([8 x i32], i64 %a, i64 %b, i64 %c) {
+; CHECK: caller_to8_from24:
+; CHECK: sub sp, sp, #16
+
+; Key point is that the "%a" should go where at #16 above SP on entry.
+  tail call fastcc void @callee_stack8([8 x i32] undef, i64 42)
+  ret void
+; CHECK: str {{x[0-9]+}}, [sp, #32]
+; CHECK-NEXT: add sp, sp, #32
+; CHECK-NEXT: b callee_stack8
+}
+
+
+define fastcc void @caller_to16_from16([8 x i32], i64 %a, i64 %b) {
+; CHECK: caller_to16_from16:
+; CHECK: sub sp, sp, #16
+
+; Here we want to make sure that both loads happen before the stores:
+; otherwise either %a or %b will be wrongly clobbered.
+  tail call fastcc void @callee_stack16([8 x i32] undef, i64 %b, i64 %a)
+  ret void
+
+; CHECK: ldr x0,
+; CHECK: ldr x1,
+; CHECK: str x1,
+; CHECK: str x0,
+
+; CHECK: add sp, sp, #16
+; CHECK: b callee_stack16
+}
diff --git a/test/CodeGen/AArch64/tls-dynamic-together.ll b/test/CodeGen/AArch64/tls-dynamic-together.ll
new file mode 100644
index 000000000000..bad2298c8a65
--- /dev/null
+++ b/test/CodeGen/AArch64/tls-dynamic-together.ll
@@ -0,0 +1,18 @@
+; RUN: llc -O0 -mtriple=aarch64-none-linux-gnu -relocation-model=pic -verify-machineinstrs < %s | FileCheck %s
+
+; If the .tlsdesccall and blr parts are emitted completely separately (even with
+; glue) then LLVM will separate them quite happily (with a spill at O0, hence
+; the option). This is definitely wrong, so we make sure they are emitted
+; together.
+
+@general_dynamic_var = external thread_local global i32
+
+define i32 @test_generaldynamic() {
+; CHECK: test_generaldynamic:
+
+  %val = load i32* @general_dynamic_var
+  ret i32 %val
+
+; CHECK: .tlsdesccall general_dynamic_var
+; CHECK-NEXT: blr {{x[0-9]+}}
+}
diff --git a/test/CodeGen/AArch64/tls-dynamics.ll b/test/CodeGen/AArch64/tls-dynamics.ll
new file mode 100644
index 000000000000..cdfd11783c23
--- /dev/null
+++ b/test/CodeGen/AArch64/tls-dynamics.ll
@@ -0,0 +1,121 @@
+; RUN: llc -mtriple=aarch64-none-linux-gnu -relocation-model=pic -verify-machineinstrs < %s | FileCheck %s
+; RUN: llc -mtriple=aarch64-none-linux-gnu -relocation-model=pic -filetype=obj < %s | llvm-objdump -r - | FileCheck --check-prefix=CHECK-RELOC %s
+
+@general_dynamic_var = external thread_local global i32
+
+define i32 @test_generaldynamic() {
+; CHECK: test_generaldynamic:
+
+  %val = load i32* @general_dynamic_var
+  ret i32 %val
+
+; CHECK: adrp x[[TLSDESC_HI:[0-9]+]], :tlsdesc:general_dynamic_var
+; CHECK: add x0, x[[TLSDESC_HI]], #:tlsdesc_lo12:general_dynamic_var
+; CHECK: ldr [[CALLEE:x[0-9]+]], [x[[TLSDESC_HI]], #:tlsdesc_lo12:general_dynamic_var]
+; CHECK: .tlsdesccall general_dynamic_var
+; CHECK-NEXT: blr [[CALLEE]]
+
+; CHECK: mrs x[[TP:[0-9]+]], tpidr_el0
+; CHECK: ldr w0, [x[[TP]], x0]
+
+; CHECK-RELOC: R_AARCH64_TLSDESC_ADR_PAGE
+; CHECK-RELOC: R_AARCH64_TLSDESC_ADD_LO12_NC
+; CHECK-RELOC: R_AARCH64_TLSDESC_LD64_LO12_NC
+; CHECK-RELOC: R_AARCH64_TLSDESC_CALL
+
+}
+
+define i32* @test_generaldynamic_addr() {
+; CHECK: test_generaldynamic_addr:
+
+  ret i32* @general_dynamic_var
+
+; CHECK: adrp x[[TLSDESC_HI:[0-9]+]], :tlsdesc:general_dynamic_var
+; CHECK: add x0, x[[TLSDESC_HI]], #:tlsdesc_lo12:general_dynamic_var
+; CHECK: ldr [[CALLEE:x[0-9]+]], [x[[TLSDESC_HI]], #:tlsdesc_lo12:general_dynamic_var]
+; CHECK: .tlsdesccall general_dynamic_var
+; CHECK-NEXT: blr [[CALLEE]]
+
+; CHECK: mrs [[TP:x[0-9]+]], tpidr_el0
+; CHECK: add x0, [[TP]], x0
+
+; CHECK-RELOC: R_AARCH64_TLSDESC_ADR_PAGE
+; CHECK-RELOC: R_AARCH64_TLSDESC_ADD_LO12_NC
+; CHECK-RELOC: R_AARCH64_TLSDESC_LD64_LO12_NC
+; CHECK-RELOC: R_AARCH64_TLSDESC_CALL
+
+}
+
+@local_dynamic_var = external thread_local(localdynamic) global i32
+
+define i32 @test_localdynamic() {
+; CHECK: test_localdynamic:
+
+  %val = load i32* @local_dynamic_var
+  ret i32 %val
+
+; CHECK: adrp x[[TLSDESC_HI:[0-9]+]], :tlsdesc:_TLS_MODULE_BASE_
+; CHECK: add x0, x[[TLSDESC_HI]], #:tlsdesc_lo12:_TLS_MODULE_BASE_
+; CHECK: ldr [[CALLEE:x[0-9]+]], [x[[TLSDESC_HI]], #:tlsdesc_lo12:_TLS_MODULE_BASE_]
+; CHECK: .tlsdesccall _TLS_MODULE_BASE_
+; CHECK-NEXT: blr [[CALLEE]]
+
+; CHECK: movz [[DTP_OFFSET:x[0-9]+]], #:dtprel_g1:local_dynamic_var
+; CHECK: movk [[DTP_OFFSET]], #:dtprel_g0_nc:local_dynamic_var
+
+; CHECK: ldr w0, [x0, [[DTP_OFFSET]]]
+
+; CHECK-RELOC: R_AARCH64_TLSDESC_ADR_PAGE
+; CHECK-RELOC: R_AARCH64_TLSDESC_ADD_LO12_NC
+; CHECK-RELOC: R_AARCH64_TLSDESC_LD64_LO12_NC
+; CHECK-RELOC: R_AARCH64_TLSDESC_CALL
+
+}
+
+define i32* @test_localdynamic_addr() {
+; CHECK: test_localdynamic_addr:
+
+  ret i32* @local_dynamic_var
+
+; CHECK: adrp x[[TLSDESC_HI:[0-9]+]], :tlsdesc:_TLS_MODULE_BASE_
+; CHECK: add x0, x[[TLSDESC_HI]], #:tlsdesc_lo12:_TLS_MODULE_BASE_
+; CHECK: ldr [[CALLEE:x[0-9]+]], [x[[TLSDESC_HI]], #:tlsdesc_lo12:_TLS_MODULE_BASE_]
+; CHECK: .tlsdesccall _TLS_MODULE_BASE_
+; CHECK-NEXT: blr [[CALLEE]]
+
+; CHECK: movz [[DTP_OFFSET:x[0-9]+]], #:dtprel_g1:local_dynamic_var
+; CHECK: movk [[DTP_OFFSET]], #:dtprel_g0_nc:local_dynamic_var
+
+; CHECK: add x0, x0, [[DTP_OFFSET]]
+
+; CHECK-RELOC: R_AARCH64_TLSDESC_ADR_PAGE
+; CHECK-RELOC: R_AARCH64_TLSDESC_ADD_LO12_NC
+; CHECK-RELOC: R_AARCH64_TLSDESC_LD64_LO12_NC
+; CHECK-RELOC: R_AARCH64_TLSDESC_CALL
+
+}
+
+; The entire point of the local-dynamic access model is to have a single call to
+; the expensive resolver. Make sure we achieve that goal.
+
+@local_dynamic_var2 = external thread_local(localdynamic) global i32
+
+define i32 @test_localdynamic_deduplicate() {
+; CHECK: test_localdynamic_deduplicate:
+
+  %val = load i32* @local_dynamic_var
+  %val2 = load i32* @local_dynamic_var2
+
+  %sum = add i32 %val, %val2
+  ret i32 %sum
+
+; CHECK: adrp x[[TLSDESC_HI:[0-9]+]], :tlsdesc:_TLS_MODULE_BASE_
+; CHECK: add x0, x[[TLSDESC_HI]], #:tlsdesc_lo12:_TLS_MODULE_BASE_
+; CHECK: ldr [[CALLEE:x[0-9]+]], [x[[TLSDESC_HI]], #:tlsdesc_lo12:_TLS_MODULE_BASE_]
+; CHECK: .tlsdesccall _TLS_MODULE_BASE_
+; CHECK-NEXT: blr [[CALLEE]]
+
+; CHECK-NOT: _TLS_MODULE_BASE_
+
+; CHECK: ret
+}
diff --git a/test/CodeGen/AArch64/tls-execs.ll b/test/CodeGen/AArch64/tls-execs.ll
new file mode 100644
index 000000000000..a66588422793
--- /dev/null
+++ b/test/CodeGen/AArch64/tls-execs.ll
@@ -0,0 +1,63 @@
+; RUN: llc -mtriple=aarch64-none-linux-gnu -verify-machineinstrs < %s | FileCheck %s
+; RUN: llc -mtriple=aarch64-none-linux-gnu -filetype=obj < %s | llvm-objdump -r - | FileCheck --check-prefix=CHECK-RELOC %s
+
+@initial_exec_var = external thread_local(initialexec) global i32
+
+define i32 @test_initial_exec() {
+; CHECK: test_initial_exec:
+  %val = load i32* @initial_exec_var
+
+; CHECK: adrp x[[GOTADDR:[0-9]+]], :gottprel:initial_exec_var
+; CHECK: ldr x[[TP_OFFSET:[0-9]+]], [x[[GOTADDR]], #:gottprel_lo12:initial_exec_var]
+; CHECK: mrs x[[TP:[0-9]+]], tpidr_el0
+; CHECK: ldr w0, [x[[TP]], x[[TP_OFFSET]]]
+
+; CHECK-RELOC: R_AARCH64_TLSIE_ADR_GOTTPREL_PAGE21
+; CHECK-RELOC: R_AARCH64_TLSIE_LD64_GOTTPREL_LO12_NC
+
+  ret i32 %val
+}
+
+define i32* @test_initial_exec_addr() {
+; CHECK: test_initial_exec_addr:
+  ret i32* @initial_exec_var
+
+; CHECK: adrp x[[GOTADDR:[0-9]+]], :gottprel:initial_exec_var
+; CHECK: ldr [[TP_OFFSET:x[0-9]+]], [x[[GOTADDR]], #:gottprel_lo12:initial_exec_var]
+; CHECK: mrs [[TP:x[0-9]+]], tpidr_el0
+; CHECK: add x0, [[TP]], [[TP_OFFSET]]
+
+; CHECK-RELOC: R_AARCH64_TLSIE_ADR_GOTTPREL_PAGE21
+; CHECK-RELOC: R_AARCH64_TLSIE_LD64_GOTTPREL_LO12_NC
+
+}
+
+@local_exec_var = thread_local(initialexec) global i32 0
+
+define i32 @test_local_exec() {
+; CHECK: test_local_exec:
+  %val = load i32* @local_exec_var
+
+; CHECK: movz [[TP_OFFSET:x[0-9]+]], #:tprel_g1:local_exec_var
+; CHECK: movk [[TP_OFFSET]], #:tprel_g0_nc:local_exec_var
+; CHECK: mrs x[[TP:[0-9]+]], tpidr_el0
+; CHECK: ldr w0, [x[[TP]], [[TP_OFFSET]]]
+
+; CHECK-RELOC: R_AARCH64_TLSLE_MOVW_TPREL_G1
+; CHECK-RELOC: R_AARCH64_TLSLE_MOVW_TPREL_G0_NC
+
+  ret i32 %val
+}
+
+define i32* @test_local_exec_addr() {
+; CHECK: test_local_exec_addr:
+  ret i32* @local_exec_var
+
+; CHECK: movz [[TP_OFFSET:x[0-9]+]], #:tprel_g1:local_exec_var
+; CHECK: movk [[TP_OFFSET]], #:tprel_g0_nc:local_exec_var
+; CHECK: mrs [[TP:x[0-9]+]], tpidr_el0
+; CHECK: add x0, [[TP]], [[TP_OFFSET]]
+
+; CHECK-RELOC: R_AARCH64_TLSLE_MOVW_TPREL_G1
+; CHECK-RELOC: R_AARCH64_TLSLE_MOVW_TPREL_G0_NC
+}
diff --git a/test/CodeGen/AArch64/tst-br.ll b/test/CodeGen/AArch64/tst-br.ll
new file mode 100644
index 000000000000..65c1fda49e2d
--- /dev/null
+++ b/test/CodeGen/AArch64/tst-br.ll
@@ -0,0 +1,48 @@
+; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu | FileCheck %s
+
+; We've got the usual issues with LLVM reordering blocks here. The
+; tests are correct for the current order, but who knows when that
+; will change. Beware!
+@var32 = global i32 0
+@var64 = global i64 0
+
+define i32 @test_tbz() {
+; CHECK: test_tbz:
+
+  %val = load i32* @var32
+  %val64 = load i64* @var64
+
+  %tbit0 = and i32 %val, 32768
+  %tst0 = icmp ne i32 %tbit0, 0
+  br i1 %tst0, label %test1, label %end1
+; CHECK: tbz {{w[0-9]+}}, #15, [[LBL_end1:.LBB0_[0-9]+]]
+
+test1:
+  %tbit1 = and i32 %val, 4096
+  %tst1 = icmp ne i32 %tbit1, 0
+  br i1 %tst1, label %test2, label %end1
+; CHECK: tbz {{w[0-9]+}}, #12, [[LBL_end1]]
+
+test2:
+  %tbit2 = and i64 %val64, 32768
+  %tst2 = icmp ne i64 %tbit2, 0
+  br i1 %tst2, label %test3, label %end1
+; CHECK: tbz {{x[0-9]+}}, #15, [[LBL_end1]]
+
+test3:
+  %tbit3 = and i64 %val64, 4096
+  %tst3 = icmp ne i64 %tbit3, 0
+  br i1 %tst3, label %end2, label %end1
+; CHECK: tbz {{x[0-9]+}}, #12, [[LBL_end1]]
+
+end2:
+; CHECK: movz x0, #1
+; CHECK-NEXT: ret
+  ret i32 1
+
+end1:
+; CHECK: [[LBL_end1]]:
+; CHECK-NEXT: mov x0, xzr
+; CHECK-NEXT: ret
+  ret i32 0
+}
diff --git a/test/CodeGen/AArch64/variadic.ll b/test/CodeGen/AArch64/variadic.ll
new file mode 100644
index 000000000000..c5d319eb112b
--- /dev/null
+++ b/test/CodeGen/AArch64/variadic.ll
@@ -0,0 +1,144 @@
+; RUN: llc -verify-machineinstrs -mtriple=aarch64-none-linux-gnu < %s | FileCheck %s
+
+%va_list = type {i8*, i8*, i8*, i32, i32}
+
+@var = global %va_list zeroinitializer
+
+declare void @llvm.va_start(i8*)
+
+define void @test_simple(i32 %n, ...) {
+; CHECK: test_simple:
+; CHECK: sub sp, sp, #[[STACKSIZE:[0-9]+]]
+; CHECK: mov x[[FPRBASE:[0-9]+]], sp
+; CHECK: str q7, [x[[FPRBASE]], #112]
+; CHECK: add x[[GPRBASE:[0-9]+]], sp, #[[GPRFROMSP:[0-9]+]]
+; CHECK: str x7, [x[[GPRBASE]], #48]
+
+; Omit the middle ones
+
+; CHECK: str q0, [sp]
+; CHECK: str x1, [sp, #[[GPRFROMSP]]]
+
+  %addr = bitcast %va_list* @var to i8*
+  call void @llvm.va_start(i8* %addr)
+; CHECK: add x[[VA_LIST:[0-9]+]], {{x[0-9]+}}, #:lo12:var
+; CHECK: movn [[VR_OFFS:w[0-9]+]], #127
+; CHECK: str [[VR_OFFS]], [x[[VA_LIST]], #28]
+; CHECK: movn [[GR_OFFS:w[0-9]+]], #55
+; CHECK: str [[GR_OFFS]], [x[[VA_LIST]], #24]
+; CHECK: add [[VR_TOP:x[0-9]+]], x[[FPRBASE]], #128
+; CHECK: str [[VR_TOP]], [x[[VA_LIST]], #16]
+; CHECK: add [[GR_TOP:x[0-9]+]], x[[GPRBASE]], #56
+; CHECK: str [[GR_TOP]], [x[[VA_LIST]], #8]
+; CHECK: add [[STACK:x[0-9]+]], sp, #[[STACKSIZE]]
+; CHECK: str [[STACK]], [{{x[0-9]+}}, #:lo12:var]
+
+  ret void
+}
+
+define void @test_fewargs(i32 %n, i32 %n1, i32 %n2, float %m, ...) {
+; CHECK: test_fewargs:
+; CHECK: sub sp, sp, #[[STACKSIZE:[0-9]+]]
+; CHECK: mov x[[FPRBASE:[0-9]+]], sp
+; CHECK: str q7, [x[[FPRBASE]], #96]
+; CHECK: add x[[GPRBASE:[0-9]+]], sp, #[[GPRFROMSP:[0-9]+]]
+; CHECK: str x7, [x[[GPRBASE]], #32]
+
+; Omit the middle ones
+
+; CHECK: str q1, [sp]
+; CHECK: str x3, [sp, #[[GPRFROMSP]]]
+
+  %addr = bitcast %va_list* @var to i8*
+  call void @llvm.va_start(i8* %addr)
+; CHECK: add x[[VA_LIST:[0-9]+]], {{x[0-9]+}}, #:lo12:var
+; CHECK: movn [[VR_OFFS:w[0-9]+]], #111
+; CHECK: str [[VR_OFFS]], [x[[VA_LIST]], #28]
+; CHECK: movn [[GR_OFFS:w[0-9]+]], #39
+; CHECK: str [[GR_OFFS]], [x[[VA_LIST]], #24]
+; CHECK: add [[VR_TOP:x[0-9]+]], x[[FPRBASE]], #112
+; CHECK: str [[VR_TOP]], [x[[VA_LIST]], #16]
+; CHECK: add [[GR_TOP:x[0-9]+]], x[[GPRBASE]], #40
+; CHECK: str [[GR_TOP]], [x[[VA_LIST]], #8]
+; CHECK: add [[STACK:x[0-9]+]], sp, #[[STACKSIZE]]
+; CHECK: str [[STACK]], [{{x[0-9]+}}, #:lo12:var]
+
+  ret void
+}
+
+define void @test_nospare([8 x i64], [8 x float], ...) {
+; CHECK: test_nospare:
+
+  %addr = bitcast %va_list* @var to i8*
+  call void @llvm.va_start(i8* %addr)
+; CHECK-NOT: sub sp, sp
+; CHECK: mov [[STACK:x[0-9]+]], sp
+; CHECK: str [[STACK]], [{{x[0-9]+}}, #:lo12:var]
+
+  ret void
+}
+
+; If there are non-variadic arguments on the stack (here two i64s) then the
+; __stack field should point just past them.
+define void @test_offsetstack([10 x i64], [3 x float], ...) {
+; CHECK: test_offsetstack:
+; CHECK: sub sp, sp, #80
+; CHECK: mov x[[FPRBASE:[0-9]+]], sp
+; CHECK: str q7, [x[[FPRBASE]], #64]
+
+; CHECK-NOT: str x{{[0-9]+}},
+; Omit the middle ones
+
+; CHECK: str q3, [sp]
+
+  %addr = bitcast %va_list* @var to i8*
+  call void @llvm.va_start(i8* %addr)
+; CHECK: add x[[VA_LIST:[0-9]+]], {{x[0-9]+}}, #:lo12:var
+; CHECK: movn [[VR_OFFS:w[0-9]+]], #79
+; CHECK: str [[VR_OFFS]], [x[[VA_LIST]], #28]
+; CHECK: str wzr, [x[[VA_LIST]], #24]
+; CHECK: add [[VR_TOP:x[0-9]+]], x[[FPRBASE]], #80
+; CHECK: str [[VR_TOP]], [x[[VA_LIST]], #16]
+; CHECK: add [[STACK:x[0-9]+]], sp, #96
+; CHECK: str [[STACK]], [{{x[0-9]+}}, #:lo12:var]
+
+  ret void
+}
+
+declare void @llvm.va_end(i8*)
+
+define void @test_va_end() nounwind {
+; CHECK: test_va_end:
+; CHECK-NEXT: BB#0
+
+  %addr = bitcast %va_list* @var to i8*
+  call void @llvm.va_end(i8* %addr)
+
+  ret void
+; CHECK-NEXT: ret
+}
+
+declare void @llvm.va_copy(i8* %dest, i8* %src)
+
+@second_list = global %va_list zeroinitializer
+
+define void @test_va_copy() {
+; CHECK: test_va_copy:
+  %srcaddr = bitcast %va_list* @var to i8*
+  %dstaddr = bitcast %va_list* @second_list to i8*
+  call void @llvm.va_copy(i8* %dstaddr, i8* %srcaddr)
+
+; Check beginning and end again:
+
+; CHECK: ldr [[BLOCK:x[0-9]+]], [{{x[0-9]+}}, #:lo12:var]
+; CHECK: str [[BLOCK]], [{{x[0-9]+}}, #:lo12:second_list]
+
+; CHECK: add x[[DEST_LIST:[0-9]+]], {{x[0-9]+}}, #:lo12:second_list
+; CHECK: add x[[SRC_LIST:[0-9]+]], {{x[0-9]+}}, #:lo12:var
+
+; CHECK: ldr [[BLOCK:x[0-9]+]], [x[[SRC_LIST]], #24]
+; CHECK: str [[BLOCK]], [x[[DEST_LIST]], #24]
+
+  ret void
+; CHECK: ret
+}
diff --git a/test/CodeGen/AArch64/zero-reg.ll b/test/CodeGen/AArch64/zero-reg.ll
new file mode 100644
index 000000000000..fef0437ae7f3
--- /dev/null
+++ b/test/CodeGen/AArch64/zero-reg.ll
@@ -0,0 +1,31 @@
+; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu | FileCheck %s
+
+@var32 = global i32 0
+@var64 = global i64 0
+
+define void @test_zr() {
+; CHECK: test_zr:
+
+  store i32 0, i32* @var32
+; CHECK: str wzr, [{{x[0-9]+}}, #:lo12:var32]
+  store i64 0, i64* @var64
+; CHECK: str xzr, [{{x[0-9]+}}, #:lo12:var64]
+
+  ret void
+; CHECK: ret
+}
+
+define void @test_sp(i32 %val) {
+; CHECK: test_sp:
+
+; Important correctness point here is that LLVM doesn't try to use xzr
+; as an addressing register: "str w0, [xzr]" is not a valid A64
+; instruction (0b11111 in the Rn field would mean "sp").
+  %addr = getelementptr i32* null, i64 0
+  store i32 %val, i32* %addr
+; CHECK: mov x[[NULL:[0-9]+]], xzr
+; CHECK: str {{w[0-9]+}}, [x[[NULL]]]
+
+  ret void
+; CHECK: ret
+}
+\ No newline at end of file
diff --git a/test/CodeGen/ARM/2007-03-13-InstrSched.ll b/test/CodeGen/ARM/2007-03-13-InstrSched.ll
index a63cdd46e2d8..4783f3707690 100644
--- a/test/CodeGen/ARM/2007-03-13-InstrSched.ll
+++ b/test/CodeGen/ARM/2007-03-13-InstrSched.ll
@@ -1,3 +1,4 @@
+; REQUIRES: asserts
 ; RUN: llc < %s -mtriple=arm-apple-darwin -relocation-model=pic \
 ; RUN:   -mattr=+v6 | grep r9
 ; RUN: llc < %s -mtriple=arm-apple-darwin -relocation-model=pic \
diff --git a/test/CodeGen/ARM/2010-04-07-DbgValueOtherTargets.ll b/test/CodeGen/ARM/2010-04-07-DbgValueOtherTargets.ll
deleted file mode 100644
index 642268992062..000000000000
--- a/test/CodeGen/ARM/2010-04-07-DbgValueOtherTargets.ll
+++ /dev/null
@@ -1,28 +0,0 @@
-; RUN: llc -O0 -march=arm -asm-verbose < %s | FileCheck %s
-; Check that DEBUG_VALUE comments come through on a variety of targets.
-
-define i32 @main() nounwind ssp {
-entry:
-; CHECK: DEBUG_VALUE
-  call void @llvm.dbg.value(metadata !6, i64 0, metadata !7), !dbg !9
-  ret i32 0, !dbg !10
-}
-
-declare void @llvm.dbg.declare(metadata, metadata) nounwind readnone
-
-declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
-
-!llvm.dbg.sp = !{!0}
-
-!0 = metadata !{i32 589870, i32 0, metadata !1, metadata !"main", metadata !"main", metadata !"", metadata !1, i32 2, metadata !3, i1 false, i1 true, i32 0, i32 0, null, i32 0, i1 false, i32 ()* @main} ; [ DW_TAG_subprogram ]
-!1 = metadata !{i32 589865, metadata !"/tmp/x.c", metadata !"/Users/manav", metadata !2} ; [ DW_TAG_file_type ]
-!2 = metadata !{i32 589841, i32 0, i32 12, metadata !"/tmp/x.c", metadata !"/Users/manav", metadata !"clang version 2.9 (trunk 120996)", i1 true, i1 false, metadata !"", i32 0} ; [ DW_TAG_compile_unit ]
-!3 = metadata !{i32 589845, metadata !1, metadata !"", metadata !1, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !4, i32 0, null} ; [ DW_TAG_subroutine_type ]
-!4 = metadata !{metadata !5}
-!5 = metadata !{i32 589860, metadata !2, metadata !"int", metadata !1, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
-!6 = metadata !{i32 0}
-!7 = metadata !{i32 590080, metadata !8, metadata !"i", metadata !1, i32 3, metadata !5, i32 0} ; [ DW_TAG_auto_variable ]
-!8 = metadata !{i32 589835, metadata !0, i32 2, i32 12, metadata !1, i32 0} ; [ DW_TAG_lexical_block ]
-!9 = metadata !{i32 3, i32 11, metadata !8, null}
-!10 = metadata !{i32 4, i32 2, metadata !8, null}
-
diff --git a/test/CodeGen/ARM/2010-08-04-StackVariable.ll b/test/CodeGen/ARM/2010-08-04-StackVariable.ll
index 6aeaa26cebd1..91a9903f3852 100644
--- a/test/CodeGen/ARM/2010-08-04-StackVariable.ll
+++ b/test/CodeGen/ARM/2010-08-04-StackVariable.ll
@@ -74,51 +74,54 @@ return:                                           ; preds = %entry
 
 declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
 
-!llvm.dbg.sp = !{!0, !9, !16, !17, !20}
+!llvm.dbg.cu = !{!3}
 
-!0 = metadata !{i32 524334, i32 0, metadata !1, metadata !"SVal", metadata !"SVal", metadata !"", metadata !2, i32 11, metadata !14, i1 false, i1 false, i32 0, i32 0, null, i1 false, i1 false, null} ; [ DW_TAG_subprogram ]
-!1 = metadata !{i32 524307, metadata !2, metadata !"SVal", metadata !2, i32 1, i64 128, i64 64, i64 0, i32 0, null, metadata !4, i32 0, null} ; [ DW_TAG_structure_type ]
-!2 = metadata !{i32 524329, metadata !"small.cc", metadata !"/Users/manav/R8248330", metadata !3} ; [ DW_TAG_file_type ]
-!3 = metadata !{i32 524305, i32 0, i32 4, metadata !"small.cc", metadata !"/Users/manav/R8248330", metadata !"4.2.1 (Based on Apple Inc. build 5658) (LLVM build)", i1 true, i1 false, metadata !"", i32 0} ; [ DW_TAG_compile_unit ]
+!0 = metadata !{i32 786478, metadata !1, metadata !"SVal", metadata !"SVal", metadata !"", metadata !2, i32 11, metadata !14, i1 false, i1 false, i32 0, i32 0, null, i1 false, i1 false, null} ; [ DW_TAG_subprogram ]
+!1 = metadata !{i32 786451, metadata !2, metadata !"SVal", metadata !2, i32 1, i64 128, i64 64, i64 0, i32 0, null, metadata !4, i32 0, null} ; [ DW_TAG_structure_type ]
+!2 = metadata !{i32 786473, metadata !48} ; [ DW_TAG_file_type ]
+!3 = metadata !{i32 786449, i32 4, metadata !2, metadata !"4.2.1 (Based on Apple Inc. build 5658) (LLVM build)", i1 false, metadata !"", i32 0, metadata !47, metadata !47, metadata !46, metadata !47, metadata !""} ; [ DW_TAG_compile_unit ]
 !4 = metadata !{metadata !5, metadata !7, metadata !0, metadata !9}
-!5 = metadata !{i32 524301, metadata !1, metadata !"Data", metadata !2, i32 7, i64 64, i64 64, i64 0, i32 0, metadata !6} ; [ DW_TAG_member ]
-!6 = metadata !{i32 524303, metadata !2, metadata !"", metadata !2, i32 0, i64 64, i64 64, i64 0, i32 0, null} ; [ DW_TAG_pointer_type ]
-!7 = metadata !{i32 524301, metadata !1, metadata !"Kind", metadata !2, i32 8, i64 32, i64 32, i64 64, i32 0, metadata !8} ; [ DW_TAG_member ]
-!8 = metadata !{i32 524324, metadata !2, metadata !"unsigned int", metadata !2, i32 0, i64 32, i64 32, i64 0, i32 0, i32 7} ; [ DW_TAG_base_type ]
-!9 = metadata !{i32 524334, i32 0, metadata !1, metadata !"~SVal", metadata !"~SVal", metadata !"", metadata !2, i32 12, metadata !10, i1 false, i1 false, i32 0, i32 0, null, i1 false, i1 false, null} ; [ DW_TAG_subprogram ]
-!10 = metadata !{i32 524309, metadata !2, metadata !"", metadata !2, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !11, i32 0, null} ; [ DW_TAG_subroutine_type ]
+!5 = metadata !{i32 786445, metadata !1, metadata !"Data", metadata !2, i32 7, i64 64, i64 64, i64 0, i32 0, metadata !6} ; [ DW_TAG_member ]
+!6 = metadata !{i32 786447, metadata !2, metadata !"", metadata !2, i32 0, i64 64, i64 64, i64 0, i32 0, null} ; [ DW_TAG_pointer_type ]
+!7 = metadata !{i32 786445, metadata !1, metadata !"Kind", metadata !2, i32 8, i64 32, i64 32, i64 64, i32 0, metadata !8} ; [ DW_TAG_member ]
+!8 = metadata !{i32 786468, metadata !2, metadata !"unsigned int", metadata !2, i32 0, i64 32, i64 32, i64 0, i32 0, i32 7} ; [ DW_TAG_base_type ]
+!9 = metadata !{i32 786478, metadata !1, metadata !"~SVal", metadata !"~SVal", metadata !"", metadata !2, i32 12, metadata !10, i1 false, i1 false, i32 0, i32 0, null, i1 false, i1 false, null} ; [ DW_TAG_subprogram ]
+!10 = metadata !{i32 786453, metadata !2, metadata !"", metadata !2, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !11, i32 0, null} ; [ DW_TAG_subroutine_type ]
 !11 = metadata !{null, metadata !12, metadata !13}
-!12 = metadata !{i32 524303, metadata !2, metadata !"", metadata !2, i32 0, i64 64, i64 64, i64 0, i32 64, metadata !1} ; [ DW_TAG_pointer_type ]
-!13 = metadata !{i32 524324, metadata !2, metadata !"int", metadata !2, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
-!14 = metadata !{i32 524309, metadata !2, metadata !"", metadata !2, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !15, i32 0, null} ; [ DW_TAG_subroutine_type ]
+!12 = metadata !{i32 786447, metadata !2, metadata !"", metadata !2, i32 0, i64 64, i64 64, i64 0, i32 64, metadata !1} ; [ DW_TAG_pointer_type ]
+!13 = metadata !{i32 786468, metadata !2, metadata !"int", metadata !2, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
+!14 = metadata !{i32 786453, metadata !2, metadata !"", metadata !2, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !15, i32 0, null} ; [ DW_TAG_subroutine_type ]
 !15 = metadata !{null, metadata !12}
-!16 = metadata !{i32 524334, i32 0, metadata !1, metadata !"SVal", metadata !"SVal", metadata !"_ZN4SValC1Ev", metadata !2, i32 11, metadata !14, i1 false, i1 true, i32 0, i32 0, null, i1 false, i1 false, void (%struct.SVal*)* @_ZN4SValC1Ev} ; [ DW_TAG_subprogram ]
-!17 = metadata !{i32 524334, i32 0, metadata !2, metadata !"foo", metadata !"foo", metadata !"_Z3fooi4SVal", metadata !2, i32 16, metadata !18, i1 false, i1 true, i32 0, i32 0, null, i1 false, i1 false, i32 (i32, %struct.SVal*)* @_Z3fooi4SVal} ; [ DW_TAG_subprogram ]
-!18 = metadata !{i32 524309, metadata !2, metadata !"", metadata !2, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !19, i32 0, null} ; [ DW_TAG_subroutine_type ]
+!16 = metadata !{i32 786478, metadata !1, metadata !"SVal", metadata !"SVal", metadata !"_ZN4SValC1Ev", metadata !2, i32 11, metadata !14, i1 false, i1 true, i32 0, i32 0, null, i1 false, i1 false, void (%struct.SVal*)* @_ZN4SValC1Ev} ; [ DW_TAG_subprogram ]
+!17 = metadata !{i32 786478, metadata !2, metadata !"foo", metadata !"foo", metadata !"_Z3fooi4SVal", metadata !2, i32 16, metadata !18, i1 false, i1 true, i32 0, i32 0, null, i1 false, i1 false, i32 (i32, %struct.SVal*)* @_Z3fooi4SVal} ; [ DW_TAG_subprogram ]
+!18 = metadata !{i32 786453, metadata !2, metadata !"", metadata !2, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !19, i32 0, null} ; [ DW_TAG_subroutine_type ]
 !19 = metadata !{metadata !13, metadata !13, metadata !1}
-!20 = metadata !{i32 524334, i32 0, metadata !2, metadata !"main", metadata !"main", metadata !"main", metadata !2, i32 23, metadata !21, i1 false, i1 true, i32 0, i32 0, null, i1 false, i1 false, i32 ()* @main} ; [ DW_TAG_subprogram ]
-!21 = metadata !{i32 524309, metadata !2, metadata !"", metadata !2, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !22, i32 0, null} ; [ DW_TAG_subroutine_type ]
+!20 = metadata !{i32 786478, metadata !2, metadata !"main", metadata !"main", metadata !"main", metadata !2, i32 23, metadata !21, i1 false, i1 true, i32 0, i32 0, null, i1 false, i1 false, i32 ()* @main} ; [ DW_TAG_subprogram ]
+!21 = metadata !{i32 786453, metadata !2, metadata !"", metadata !2, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !22, i32 0, null} ; [ DW_TAG_subroutine_type ]
 !22 = metadata !{metadata !13}
-!23 = metadata !{i32 524545, metadata !17, metadata !"i", metadata !2, i32 16, metadata !13} ; [ DW_TAG_arg_variable ]
+!23 = metadata !{i32 786689, metadata !17, metadata !"i", metadata !2, i32 16, metadata !13, i32 0, i32 0} ; [ DW_TAG_arg_variable ]
 !24 = metadata !{i32 16, i32 0, metadata !17, null}
-!25 = metadata !{i32 524545, metadata !17, metadata !"location", metadata !2, i32 16, metadata !26} ; [ DW_TAG_arg_variable ]
-!26 = metadata !{i32 524304, metadata !2, metadata !"SVal", metadata !2, i32 0, i64 64, i64 64, i64 0, i32 0, metadata !1} ; [ DW_TAG_reference_type ]
+!25 = metadata !{i32 786689, metadata !17, metadata !"location", metadata !2, i32 16, metadata !26, i32 0, i32 0} ; [ DW_TAG_arg_variable ]
+!26 = metadata !{i32 786448, metadata !2, metadata !"SVal", metadata !2, i32 0, i64 64, i64 64, i64 0, i32 0, metadata !1} ; [ DW_TAG_reference_type ]
 !27 = metadata !{i32 17, i32 0, metadata !28, null}
-!28 = metadata !{i32 524299, metadata !17, i32 16, i32 0, metadata !2, i32 2} ; [ DW_TAG_lexical_block ]
+!28 = metadata !{i32 786443, metadata !2, metadata !17, i32 16, i32 0, i32 2} ; [ DW_TAG_lexical_block ]
 !29 = metadata !{i32 18, i32 0, metadata !28, null}
 !30 = metadata !{i32 20, i32 0, metadata !28, null}
-!31 = metadata !{i32 524545, metadata !16, metadata !"this", metadata !2, i32 11, metadata !32} ; [ DW_TAG_arg_variable ]
-!32 = metadata !{i32 524326, metadata !2, metadata !"", metadata !2, i32 0, i64 64, i64 64, i64 0, i32 64, metadata !33} ; [ DW_TAG_const_type ]
-!33 = metadata !{i32 524303, metadata !2, metadata !"", metadata !2, i32 0, i64 64, i64 64, i64 0, i32 0, metadata !1} ; [ DW_TAG_pointer_type ]
+!31 = metadata !{i32 786689, metadata !16, metadata !"this", metadata !2, i32 11, metadata !32, i32 0, i32 0} ; [ DW_TAG_arg_variable ]
+!32 = metadata !{i32 786470, metadata !2, metadata !"", metadata !2, i32 0, i64 64, i64 64, i64 0, i32 64, metadata !33} ; [ DW_TAG_const_type ]
+!33 = metadata !{i32 786447, metadata !2, metadata !"", metadata !2, i32 0, i64 64, i64 64, i64 0, i32 0, metadata !1} ; [ DW_TAG_pointer_type ]
 !34 = metadata !{i32 11, i32 0, metadata !16, null}
 !35 = metadata !{i32 11, i32 0, metadata !36, null}
-!36 = metadata !{i32 524299, metadata !37, i32 11, i32 0, metadata !2, i32 1} ; [ DW_TAG_lexical_block ]
-!37 = metadata !{i32 524299, metadata !16, i32 11, i32 0, metadata !2, i32 0} ; [ DW_TAG_lexical_block ]
-!38 = metadata !{i32 524544, metadata !39, metadata !"v", metadata !2, i32 24, metadata !1} ; [ DW_TAG_auto_variable ]
-!39 = metadata !{i32 524299, metadata !40, i32 23, i32 0, metadata !2, i32 4} ; [ DW_TAG_lexical_block ]
-!40 = metadata !{i32 524299, metadata !20, i32 23, i32 0, metadata !2, i32 3} ; [ DW_TAG_lexical_block ]
+!36 = metadata !{i32 786443, metadata !2, metadata !37, i32 11, i32 0, i32 1} ; [ DW_TAG_lexical_block ]
+!37 = metadata !{i32 786443, metadata !2, metadata !16, i32 11, i32 0, i32 0} ; [ DW_TAG_lexical_block ]
+!38 = metadata !{i32 786688, metadata !39, metadata !"v", metadata !2, i32 24, metadata !1, i32 0, i32 0} ; [ DW_TAG_auto_variable ]
+!39 = metadata !{i32 786443, metadata !2, metadata !40, i32 23, i32 0, i32 4} ; [ DW_TAG_lexical_block ]
+!40 = metadata !{i32 786443, metadata !2, metadata !20, i32 23, i32 0, i32 3} ; [ DW_TAG_lexical_block ]
 !41 = metadata !{i32 24, i32 0, metadata !39, null}
 !42 = metadata !{i32 25, i32 0, metadata !39, null}
 !43 = metadata !{i32 26, i32 0, metadata !39, null}
-!44 = metadata !{i32 524544, metadata !39, metadata !"k", metadata !2, i32 26, metadata !13} ; [ DW_TAG_auto_variable ]
+!44 = metadata !{i32 786688, metadata !39, metadata !"k", metadata !2, i32 26, metadata !13, i32 0, i32 0} ; [ DW_TAG_auto_variable ]
 !45 = metadata !{i32 27, i32 0, metadata !39, null}
+!46 = metadata !{metadata !0, metadata !9, metadata !16, metadata !17, metadata !20}
+!47 = metadata !{i32 0}
+!48 = metadata !{metadata !"small.cc", metadata !"/Users/manav/R8248330"}
diff --git a/test/CodeGen/ARM/2010-11-30-reloc-movt.ll b/test/CodeGen/ARM/2010-11-30-reloc-movt.ll
index 8b164c5d91f8..94a05412f5d4 100644
--- a/test/CodeGen/ARM/2010-11-30-reloc-movt.ll
+++ b/test/CodeGen/ARM/2010-11-30-reloc-movt.ll
@@ -23,7 +23,7 @@ entry:
 
 ; OBJ:            Relocation 0
 ; OBJ-NEXT:       'r_offset', 0x00000004
-; OBJ-NEXT:       'r_sym', 0x000007
+; OBJ-NEXT:       'r_sym', 0x000009
 ; OBJ-NEXT:        'r_type', 0x2b
 
 ; OBJ:          Relocation 1
@@ -33,7 +33,7 @@ entry:
 
 ; OBJ:          # Relocation 2
 ; OBJ-NEXT:       'r_offset', 0x0000000c
-; OBJ-NEXT:       'r_sym', 0x000008
+; OBJ-NEXT:       'r_sym', 0x00000a
 ; OBJ-NEXT:       'r_type', 0x1c
 
 }
diff --git a/test/CodeGen/ARM/2010-12-15-elf-lcomm.ll b/test/CodeGen/ARM/2010-12-15-elf-lcomm.ll
index 5cfbb4f944f7..1272a257931d 100644
--- a/test/CodeGen/ARM/2010-12-15-elf-lcomm.ll
+++ b/test/CodeGen/ARM/2010-12-15-elf-lcomm.ll
@@ -10,7 +10,8 @@
 @STRIDE = internal global i32 8
 
 ; ASM:          .type   array00,%object         @ @array00
-; ASM-NEXT:     .lcomm  array00,80
+; ASM-NEXT:     .local  array00
+; ASM-NEXT:     .comm   array00,80,1
 ; ASM-NEXT:     .type   _MergedGlobals,%object  @ @_MergedGlobals
 
 
diff --git a/test/CodeGen/ARM/2011-01-19-MergedGlobalDbg.ll b/test/CodeGen/ARM/2011-01-19-MergedGlobalDbg.ll
index ca88eedcea60..1d1b89a34f9a 100644
--- a/test/CodeGen/ARM/2011-01-19-MergedGlobalDbg.ll
+++ b/test/CodeGen/ARM/2011-01-19-MergedGlobalDbg.ll
@@ -75,44 +75,38 @@ entry:
   ret i8 %0, !dbg !39
 }
 
-!llvm.dbg.sp = !{!0, !6, !7, !8, !9}
-!llvm.dbg.lv.get1 = !{!10, !11}
-!llvm.dbg.gv = !{!13, !14, !15, !16, !17}
-!llvm.dbg.lv.get2 = !{!18, !19}
-!llvm.dbg.lv.get3 = !{!21, !22}
-!llvm.dbg.lv.get4 = !{!24, !25}
-!llvm.dbg.lv.get5 = !{!27, !28}
+!llvm.dbg.cu = !{!2}
 
-!0 = metadata !{i32 589870, i32 0, metadata !1, metadata !"get1", metadata !"get1", metadata !"get1", metadata !1, i32 4, metadata !3, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 true, i8 (i8)* @get1} ; [ DW_TAG_subprogram ]
-!1 = metadata !{i32 589865, metadata !"foo.c", metadata !"/tmp/", metadata !2} ; [ DW_TAG_file_type ]
-!2 = metadata !{i32 589841, i32 0, i32 1, metadata !"foo.c", metadata !"/tmp/", metadata !"4.2.1 (Based on Apple Inc. build 5658) (LLVM build 2369.8)", i1 true, i1 true, metadata !"", i32 0} ; [ DW_TAG_compile_unit ]
-!3 = metadata !{i32 589845, metadata !1, metadata !"", metadata !1, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !4, i32 0, null} ; [ DW_TAG_subroutine_type ]
+!0 = metadata !{i32 786478, metadata !1, metadata !"get1", metadata !"get1", metadata !"get1", metadata !1, i32 4, metadata !3, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 true, i8 (i8)* @get1, null, null, metadata !42, i32 4} ; [ DW_TAG_subprogram ]
+!1 = metadata !{i32 786473, metadata !47} ; [ DW_TAG_file_type ]
+!2 = metadata !{i32 786449, metadata !47, i32 1, metadata !"4.2.1 (Based on Apple Inc. build 5658) (LLVM build 2369.8)", i1 true, metadata !"", i32 0, null, null, metadata !40, metadata !41, metadata !""} ; [ DW_TAG_compile_unit ]
+!3 = metadata !{i32 786453, metadata !1, metadata !1, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !4, i32 0, null} ; [ DW_TAG_subroutine_type ]
 !4 = metadata !{metadata !5, metadata !5}
-!5 = metadata !{i32 589860, metadata !1, metadata !"_Bool", metadata !1, i32 0, i64 8, i64 8, i64 0, i32 0, i32 2} ; [ DW_TAG_base_type ]
-!6 = metadata !{i32 589870, i32 0, metadata !1, metadata !"get2", metadata !"get2", metadata !"get2", metadata !1, i32 7, metadata !3, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 true, i8 (i8)* @get2} ; [ DW_TAG_subprogram ]
-!7 = metadata !{i32 589870, i32 0, metadata !1, metadata !"get3", metadata !"get3", metadata !"get3", metadata !1, i32 10, metadata !3, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 true, i8 (i8)* @get3} ; [ DW_TAG_subprogram ]
-!8 = metadata !{i32 589870, i32 0, metadata !1, metadata !"get4", metadata !"get4", metadata !"get4", metadata !1, i32 13, metadata !3, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 true, i8 (i8)* @get4} ; [ DW_TAG_subprogram ]
-!9 = metadata !{i32 589870, i32 0, metadata !1, metadata !"get5", metadata !"get5", metadata !"get5", metadata !1, i32 16, metadata !3, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 true, i8 (i8)* @get5} ; [ DW_TAG_subprogram ]
-!10 = metadata !{i32 590081, metadata !0, metadata !"a", metadata !1, i32 4, metadata !5, i32 0} ; [ DW_TAG_arg_variable ]
-!11 = metadata !{i32 590080, metadata !12, metadata !"b", metadata !1, i32 4, metadata !5, i32 0} ; [ DW_TAG_auto_variable ]
-!12 = metadata !{i32 589835, metadata !0, i32 4, i32 0, metadata !1, i32 0} ; [ DW_TAG_lexical_block ]
-!13 = metadata !{i32 589876, i32 0, metadata !1, metadata !"x1", metadata !"x1", metadata !"", metadata !1, i32 3, metadata !5, i1 true, i1 true, i8* @x1} ; [ DW_TAG_variable ]
-!14 = metadata !{i32 589876, i32 0, metadata !1, metadata !"x2", metadata !"x2", metadata !"", metadata !1, i32 6, metadata !5, i1 true, i1 true, i8* @x2} ; [ DW_TAG_variable ]
-!15 = metadata !{i32 589876, i32 0, metadata !1, metadata !"x3", metadata !"x3", metadata !"", metadata !1, i32 9, metadata !5, i1 true, i1 true, i8* @x3} ; [ DW_TAG_variable ]
-!16 = metadata !{i32 589876, i32 0, metadata !1, metadata !"x4", metadata !"x4", metadata !"", metadata !1, i32 12, metadata !5, i1 true, i1 true, i8* @x4} ; [ DW_TAG_variable ]
-!17 = metadata !{i32 589876, i32 0, metadata !1, metadata !"x5", metadata !"x5", metadata !"", metadata !1, i32 15, metadata !5, i1 false, i1 true, i8* @x5} ; [ DW_TAG_variable ]
-!18 = metadata !{i32 590081, metadata !6, metadata !"a", metadata !1, i32 7, metadata !5, i32 0} ; [ DW_TAG_arg_variable ]
-!19 = metadata !{i32 590080, metadata !20, metadata !"b", metadata !1, i32 7, metadata !5, i32 0} ; [ DW_TAG_auto_variable ]
-!20 = metadata !{i32 589835, metadata !6, i32 7, i32 0, metadata !1, i32 1} ; [ DW_TAG_lexical_block ]
-!21 = metadata !{i32 590081, metadata !7, metadata !"a", metadata !1, i32 10, metadata !5, i32 0} ; [ DW_TAG_arg_variable ]
-!22 = metadata !{i32 590080, metadata !23, metadata !"b", metadata !1, i32 10, metadata !5, i32 0} ; [ DW_TAG_auto_variable ]
-!23 = metadata !{i32 589835, metadata !7, i32 10, i32 0, metadata !1, i32 2} ; [ DW_TAG_lexical_block ]
-!24 = metadata !{i32 590081, metadata !8, metadata !"a", metadata !1, i32 13, metadata !5, i32 0} ; [ DW_TAG_arg_variable ]
-!25 = metadata !{i32 590080, metadata !26, metadata !"b", metadata !1, i32 13, metadata !5, i32 0} ; [ DW_TAG_auto_variable ]
-!26 = metadata !{i32 589835, metadata !8, i32 13, i32 0, metadata !1, i32 3} ; [ DW_TAG_lexical_block ]
-!27 = metadata !{i32 590081, metadata !9, metadata !"a", metadata !1, i32 16, metadata !5, i32 0} ; [ DW_TAG_arg_variable ]
-!28 = metadata !{i32 590080, metadata !29, metadata !"b", metadata !1, i32 16, metadata !5, i32 0} ; [ DW_TAG_auto_variable ]
-!29 = metadata !{i32 589835, metadata !9, i32 16, i32 0, metadata !1, i32 4} ; [ DW_TAG_lexical_block ]
+!5 = metadata !{i32 786468, metadata !1, metadata !1, metadata !"_Bool", i32 0, i64 8, i64 8, i64 0, i32 0, i32 2} ; [ DW_TAG_base_type ]
+!6 = metadata !{i32 786478, metadata !1, metadata !"get2", metadata !"get2", metadata !"get2", metadata !1, i32 7, metadata !3, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 true, i8 (i8)* @get2, null, null, metadata !43, i32 7} ; [ DW_TAG_subprogram ]
+!7 = metadata !{i32 786478, metadata !1, metadata !"get3", metadata !"get3", metadata !"get3", metadata !1, i32 10, metadata !3, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 true, i8 (i8)* @get3, null, null, metadata !44, i32 10} ; [ DW_TAG_subprogram ]
+!8 = metadata !{i32 786478, metadata !1, metadata !"get4", metadata !"get4", metadata !"get4", metadata !1, i32 13, metadata !3, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 true, i8 (i8)* @get4, null, null, metadata !45, i32 13} ; [ DW_TAG_subprogram ]
+!9 = metadata !{i32 786478, metadata !1, metadata !"get5", metadata !"get5", metadata !"get5", metadata !1, i32 16, metadata !3, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 true, i8 (i8)* @get5, null, null, metadata !46, i32 16} ; [ DW_TAG_subprogram ]
+!10 = metadata !{i32 786689, metadata !0, metadata !"a", metadata !1, i32 4, metadata !5, i32 0, null} ; [ DW_TAG_arg_variable ]
+!11 = metadata !{i32 786688, metadata !12, metadata !"b", metadata !1, i32 4, metadata !5, i32 0, null} ; [ DW_TAG_auto_variable ]
+!12 = metadata !{i32 786443, metadata !0, i32 4, i32 0, metadata !1, i32 0} ; [ DW_TAG_lexical_block ]
+!13 = metadata !{i32 786484, i32 0, metadata !1, metadata !"x1", metadata !"x1", metadata !"", metadata !1, i32 3, metadata !5, i1 true, i1 true, i8* @x1, null} ; [ DW_TAG_variable ]
+!14 = metadata !{i32 786484, i32 0, metadata !1, metadata !"x2", metadata !"x2", metadata !"", metadata !1, i32 6, metadata !5, i1 true, i1 true, i8* @x2, null} ; [ DW_TAG_variable ]
+!15 = metadata !{i32 786484, i32 0, metadata !1, metadata !"x3", metadata !"x3", metadata !"", metadata !1, i32 9, metadata !5, i1 true, i1 true, i8* @x3, null} ; [ DW_TAG_variable ]
+!16 = metadata !{i32 786484, i32 0, metadata !1, metadata !"x4", metadata !"x4", metadata !"", metadata !1, i32 12, metadata !5, i1 true, i1 true, i8* @x4, null} ; [ DW_TAG_variable ]
+!17 = metadata !{i32 786484, i32 0, metadata !1, metadata !"x5", metadata !"x5", metadata !"", metadata !1, i32 15, metadata !5, i1 false, i1 true, i8* @x5, null} ; [ DW_TAG_variable ]
+!18 = metadata !{i32 786689, metadata !6, metadata !"a", metadata !1, i32 7, metadata !5, i32 0, null} ; [ DW_TAG_arg_variable ]
+!19 = metadata !{i32 786688, metadata !20, metadata !"b", metadata !1, i32 7, metadata !5, i32 0, null} ; [ DW_TAG_auto_variable ]
+!20 = metadata !{i32 786443, metadata !6, i32 7, i32 0, metadata !1, i32 1} ; [ DW_TAG_lexical_block ]
+!21 = metadata !{i32 786689, metadata !7, metadata !"a", metadata !1, i32 10, metadata !5, i32 0, null} ; [ DW_TAG_arg_variable ]
+!22 = metadata !{i32 786688, metadata !23, metadata !"b", metadata !1, i32 10, metadata !5, i32 0, null} ; [ DW_TAG_auto_variable ]
+!23 = metadata !{i32 786443, metadata !7, i32 10, i32 0, metadata !1, i32 2} ; [ DW_TAG_lexical_block ]
+!24 = metadata !{i32 786689, metadata !8, metadata !"a", metadata !1, i32 13, metadata !5, i32 0, null} ; [ DW_TAG_arg_variable ]
+!25 = metadata !{i32 786688, metadata !26, metadata !"b", metadata !1, i32 13, metadata !5, i32 0, null} ; [ DW_TAG_auto_variable ]
+!26 = metadata !{i32 786443, metadata !8, i32 13, i32 0, metadata !1, i32 3} ; [ DW_TAG_lexical_block ]
+!27 = metadata !{i32 786689, metadata !9, metadata !"a", metadata !1, i32 16, metadata !5, i32 0, null} ; [ DW_TAG_arg_variable ]
+!28 = metadata !{i32 786688, metadata !29, metadata !"b", metadata !1, i32 16, metadata !5, i32 0, null} ; [ DW_TAG_auto_variable ]
+!29 = metadata !{i32 786443, metadata !9, i32 16, i32 0, metadata !1, i32 4} ; [ DW_TAG_lexical_block ]
 !30 = metadata !{i32 4, i32 0, metadata !0, null}
 !31 = metadata !{i32 4, i32 0, metadata !12, null}
 !32 = metadata !{i32 7, i32 0, metadata !6, null}
@@ -123,3 +117,11 @@ entry:
 !37 = metadata !{i32 13, i32 0, metadata !26, null}
 !38 = metadata !{i32 16, i32 0, metadata !9, null}
 !39 = metadata !{i32 16, i32 0, metadata !29, null}
+!40 = metadata !{metadata !0, metadata !6, metadata !7, metadata !8, metadata !9}
+!41 = metadata !{metadata !13, metadata !14, metadata !15, metadata !16, metadata !17}
+!42 = metadata !{metadata !10, metadata !11}
+!43 = metadata !{metadata !18, metadata !19}
+!44 = metadata !{metadata !21, metadata !22}
+!45 = metadata !{metadata !24, metadata !25}
+!46 = metadata !{metadata !27, metadata !28}
+!47 = metadata !{metadata !"foo.c", metadata !"/tmp/"}
diff --git a/test/CodeGen/ARM/2011-08-02-MergedGlobalDbg.ll b/test/CodeGen/ARM/2011-08-02-MergedGlobalDbg.ll
index f2b0c5d7d090..266609b8ce69 100644
--- a/test/CodeGen/ARM/2011-08-02-MergedGlobalDbg.ll
+++ b/test/CodeGen/ARM/2011-08-02-MergedGlobalDbg.ll
@@ -8,7 +8,7 @@
 ; DW_OP_constu
 ; offset
 
-;CHECK: .long Lset33
+;CHECK: .long Lset8
 ;CHECK-NEXT:        @ DW_AT_type
 ;CHECK-NEXT:        @ DW_AT_decl_file
 ;CHECK-NEXT:        @ DW_AT_decl_line
@@ -73,44 +73,37 @@ define i32 @get5(i32 %a) nounwind optsize ssp {
 declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
 
 !llvm.dbg.cu = !{!0}
-!llvm.dbg.sp = !{!1, !6, !7, !8, !9}
-!llvm.dbg.lv.get1 = !{!10, !11}
-!llvm.dbg.lv.get2 = !{!13, !14}
-!llvm.dbg.lv.get3 = !{!16, !17}
-!llvm.dbg.lv.get4 = !{!19, !20}
-!llvm.dbg.gv = !{!22, !23, !24, !25, !26}
-!llvm.dbg.lv.get5 = !{!27, !28}
 
-!0 = metadata !{i32 589841, i32 0, i32 12, metadata !"ss3.c", metadata !"/private/tmp", metadata !"clang", i1 true, i1 true, metadata !"", i32 0} ; [ DW_TAG_compile_unit ]
-!1 = metadata !{i32 589870, i32 0, metadata !2, metadata !"get1", metadata !"get1", metadata !"", metadata !2, i32 5, metadata !3, i1 false, i1 true, i32 0, i32 0, i32 0, i32 256, i1 true, i32 (i32)* @get1, null, null} ; [ DW_TAG_subprogram ]
-!2 = metadata !{i32 589865, metadata !"ss3.c", metadata !"/private/tmp", metadata !0} ; [ DW_TAG_file_type ]
-!3 = metadata !{i32 589845, metadata !2, metadata !"", metadata !2, i32 0, i64 0, i64 0, i32 0, i32 0, i32 0, metadata !4, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
+!0 = metadata !{i32 786449, metadata !47, i32 12, metadata !"clang", i1 true, metadata !"", i32 0, null, null, metadata !40, metadata !41, null} ; [ DW_TAG_compile_unit ]
+!1 = metadata !{i32 786478, metadata !2, metadata !"get1", metadata !"get1", metadata !"", metadata !2, i32 5, metadata !3, i1 false, i1 true, i32 0, i32 0, i32 0, i32 256, i1 true, i32 (i32)* @get1, null, null, metadata !42, i32 5} ; [ DW_TAG_subprogram ]
+!2 = metadata !{i32 786473, metadata !47} ; [ DW_TAG_file_type ]
+!3 = metadata !{i32 786453, metadata !2, metadata !2, metadata !"", i32 0, i64 0, i64 0, i32 0, i32 0, i32 0, metadata !4, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
 !4 = metadata !{metadata !5}
-!5 = metadata !{i32 589860, metadata !0, metadata !"int", null, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
-!6 = metadata !{i32 589870, i32 0, metadata !2, metadata !"get2", metadata !"get2", metadata !"", metadata !2, i32 8, metadata !3, i1 false, i1 true, i32 0, i32 0, i32 0, i32 256, i1 true, i32 (i32)* @get2, null, null} ; [ DW_TAG_subprogram ]
-!7 = metadata !{i32 589870, i32 0, metadata !2, metadata !"get3", metadata !"get3", metadata !"", metadata !2, i32 11, metadata !3, i1 false, i1 true, i32 0, i32 0, i32 0, i32 256, i1 true, i32 (i32)* @get3, null, null} ; [ DW_TAG_subprogram ]
-!8 = metadata !{i32 589870, i32 0, metadata !2, metadata !"get4", metadata !"get4", metadata !"", metadata !2, i32 14, metadata !3, i1 false, i1 true, i32 0, i32 0, i32 0, i32 256, i1 true, i32 (i32)* @get4, null, null} ; [ DW_TAG_subprogram ]
-!9 = metadata !{i32 589870, i32 0, metadata !2, metadata !"get5", metadata !"get5", metadata !"", metadata !2, i32 17, metadata !3, i1 false, i1 true, i32 0, i32 0, i32 0, i32 256, i1 true, i32 (i32)* @get5, null, null} ; [ DW_TAG_subprogram ]
-!10 = metadata !{i32 590081, metadata !1, metadata !"a", metadata !2, i32 16777221, metadata !5, i32 0} ; [ DW_TAG_arg_variable ]
-!11 = metadata !{i32 590080, metadata !12, metadata !"b", metadata !2, i32 5, metadata !5, i32 0} ; [ DW_TAG_auto_variable ]
-!12 = metadata !{i32 589835, metadata !1, i32 5, i32 19, metadata !2, i32 0} ; [ DW_TAG_lexical_block ]
-!13 = metadata !{i32 590081, metadata !6, metadata !"a", metadata !2, i32 16777224, metadata !5, i32 0} ; [ DW_TAG_arg_variable ]
-!14 = metadata !{i32 590080, metadata !15, metadata !"b", metadata !2, i32 8, metadata !5, i32 0} ; [ DW_TAG_auto_variable ]
-!15 = metadata !{i32 589835, metadata !6, i32 8, i32 17, metadata !2, i32 1} ; [ DW_TAG_lexical_block ]
-!16 = metadata !{i32 590081, metadata !7, metadata !"a", metadata !2, i32 16777227, metadata !5, i32 0} ; [ DW_TAG_arg_variable ]
-!17 = metadata !{i32 590080, metadata !18, metadata !"b", metadata !2, i32 11, metadata !5, i32 0} ; [ DW_TAG_auto_variable ]
-!18 = metadata !{i32 589835, metadata !7, i32 11, i32 19, metadata !2, i32 2} ; [ DW_TAG_lexical_block ]
-!19 = metadata !{i32 590081, metadata !8, metadata !"a", metadata !2, i32 16777230, metadata !5, i32 0} ; [ DW_TAG_arg_variable ]
-!20 = metadata !{i32 590080, metadata !21, metadata !"b", metadata !2, i32 14, metadata !5, i32 0} ; [ DW_TAG_auto_variable ]
-!21 = metadata !{i32 589835, metadata !8, i32 14, i32 19, metadata !2, i32 3} ; [ DW_TAG_lexical_block ]
-!22 = metadata !{i32 589876, i32 0, metadata !0, metadata !"x5", metadata !"x5", metadata !"", metadata !2, i32 16, metadata !5, i32 0, i32 1, i32* @x5} ; [ DW_TAG_variable ]
-!23 = metadata !{i32 589876, i32 0, metadata !0, metadata !"x4", metadata !"x4", metadata !"", metadata !2, i32 13, metadata !5, i32 1, i32 1, i32* @x4} ; [ DW_TAG_variable ]
-!24 = metadata !{i32 589876, i32 0, metadata !0, metadata !"x3", metadata !"x3", metadata !"", metadata !2, i32 10, metadata !5, i32 1, i32 1, i32* @x3} ; [ DW_TAG_variable ]
-!25 = metadata !{i32 589876, i32 0, metadata !0, metadata !"x2", metadata !"x2", metadata !"", metadata !2, i32 7, metadata !5, i32 1, i32 1, i32* @x2} ; [ DW_TAG_variable ]
-!26 = metadata !{i32 589876, i32 0, metadata !0, metadata !"x1", metadata !"x1", metadata !"", metadata !2, i32 4, metadata !5, i32 1, i32 1, i32* @x1} ; [ DW_TAG_variable ]
-!27 = metadata !{i32 590081, metadata !9, metadata !"a", metadata !2, i32 16777233, metadata !5, i32 0} ; [ DW_TAG_arg_variable ]
-!28 = metadata !{i32 590080, metadata !29, metadata !"b", metadata !2, i32 17, metadata !5, i32 0} ; [ DW_TAG_auto_variable ]
-!29 = metadata !{i32 589835, metadata !9, i32 17, i32 19, metadata !2, i32 4} ; [ DW_TAG_lexical_block ]
+!5 = metadata !{i32 786468, null, metadata !0, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
+!6 = metadata !{i32 786478, metadata !2, metadata !"get2", metadata !"get2", metadata !"", metadata !2, i32 8, metadata !3, i1 false, i1 true, i32 0, i32 0, i32 0, i32 256, i1 true, i32 (i32)* @get2, null, null, metadata !43, i32 8} ; [ DW_TAG_subprogram ]
+!7 = metadata !{i32 786478, metadata !2, metadata !"get3", metadata !"get3", metadata !"", metadata !2, i32 11, metadata !3, i1 false, i1 true, i32 0, i32 0, i32 0, i32 256, i1 true, i32 (i32)* @get3, null, null, metadata !44, i32 11} ; [ DW_TAG_subprogram ]
+!8 = metadata !{i32 786478, metadata !2, metadata !"get4", metadata !"get4", metadata !"", metadata !2, i32 14, metadata !3, i1 false, i1 true, i32 0, i32 0, i32 0, i32 256, i1 true, i32 (i32)* @get4, null, null, metadata !45, i32 14} ; [ DW_TAG_subprogram ]
+!9 = metadata !{i32 786478, metadata !2, metadata !"get5", metadata !"get5", metadata !"", metadata !2, i32 17, metadata !3, i1 false, i1 true, i32 0, i32 0, i32 0, i32 256, i1 true, i32 (i32)* @get5, null, null, metadata !46, i32 17} ; [ DW_TAG_subprogram ]
+!10 = metadata !{i32 786689, metadata !1, metadata !"a", metadata !2, i32 16777221, metadata !5, i32 0, null} ; [ DW_TAG_arg_variable ]
+!11 = metadata !{i32 786688, metadata !12, metadata !"b", metadata !2, i32 5, metadata !5, i32 0, null} ; [ DW_TAG_auto_variable ]
+!12 = metadata !{i32 786443, metadata !1, i32 5, i32 19, metadata !2, i32 0} ; [ DW_TAG_lexical_block ]
+!13 = metadata !{i32 786689, metadata !6, metadata !"a", metadata !2, i32 16777224, metadata !5, i32 0, null} ; [ DW_TAG_arg_variable ]
+!14 = metadata !{i32 786688, metadata !15, metadata !"b", metadata !2, i32 8, metadata !5, i32 0, null} ; [ DW_TAG_auto_variable ]
+!15 = metadata !{i32 786443, metadata !6, i32 8, i32 17, metadata !2, i32 1} ; [ DW_TAG_lexical_block ]
+!16 = metadata !{i32 786689, metadata !7, metadata !"a", metadata !2, i32 16777227, metadata !5, i32 0, null} ; [ DW_TAG_arg_variable ]
+!17 = metadata !{i32 786688, metadata !18, metadata !"b", metadata !2, i32 11, metadata !5, i32 0, null} ; [ DW_TAG_auto_variable ]
+!18 = metadata !{i32 786443, metadata !7, i32 11, i32 19, metadata !2, i32 2} ; [ DW_TAG_lexical_block ]
+!19 = metadata !{i32 786689, metadata !8, metadata !"a", metadata !2, i32 16777230, metadata !5, i32 0, null} ; [ DW_TAG_arg_variable ]
+!20 = metadata !{i32 786688, metadata !21, metadata !"b", metadata !2, i32 14, metadata !5, i32 0, null} ; [ DW_TAG_auto_variable ]
+!21 = metadata !{i32 786443, metadata !8, i32 14, i32 19, metadata !2, i32 3} ; [ DW_TAG_lexical_block ]
+!22 = metadata !{i32 786484, i32 0, metadata !0, metadata !"x5", metadata !"x5", metadata !"", metadata !2, i32 16, metadata !5, i32 0, i32 1, i32* @x5, null} ; [ DW_TAG_variable ]
+!23 = metadata !{i32 786484, i32 0, metadata !0, metadata !"x4", metadata !"x4", metadata !"", metadata !2, i32 13, metadata !5, i32 1, i32 1, i32* @x4, null} ; [ DW_TAG_variable ]
+!24 = metadata !{i32 786484, i32 0, metadata !0, metadata !"x3", metadata !"x3", metadata !"", metadata !2, i32 10, metadata !5, i32 1, i32 1, i32* @x3, null} ; [ DW_TAG_variable ]
+!25 = metadata !{i32 786484, i32 0, metadata !0, metadata !"x2", metadata !"x2", metadata !"", metadata !2, i32 7, metadata !5, i32 1, i32 1, i32* @x2, null} ; [ DW_TAG_variable ]
+!26 = metadata !{i32 786484, i32 0, metadata !0, metadata !"x1", metadata !"x1", metadata !"", metadata !2, i32 4, metadata !5, i32 1, i32 1, i32* @x1, null} ; [ DW_TAG_variable ]
+!27 = metadata !{i32 786689, metadata !9, metadata !"a", metadata !2, i32 16777233, metadata !5, i32 0, null} ; [ DW_TAG_arg_variable ]
+!28 = metadata !{i32 786688, metadata !29, metadata !"b", metadata !2, i32 17, metadata !5, i32 0, null} ; [ DW_TAG_auto_variable ]
+!29 = metadata !{i32 786443, metadata !9, i32 17, i32 19, metadata !2, i32 4} ; [ DW_TAG_lexical_block ]
 !30 = metadata !{i32 5, i32 16, metadata !1, null}
 !31 = metadata !{i32 5, i32 32, metadata !12, null}
 !32 = metadata !{i32 8, i32 14, metadata !6, null}
@@ -121,3 +114,11 @@ declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
 !37 = metadata !{i32 14, i32 32, metadata !21, null}
 !38 = metadata !{i32 17, i32 16, metadata !9, null}
 !39 = metadata !{i32 17, i32 32, metadata !29, null}
+!40 = metadata !{metadata !1, metadata !6, metadata !7, metadata !8, metadata !9}
+!41 = metadata !{metadata !22, metadata !23, metadata !24, metadata !25, metadata !26}
+!42 = metadata !{metadata !10, metadata !11}
+!43 = metadata !{metadata !13, metadata !14}
+!44 = metadata !{metadata !16, metadata !17}
+!45 = metadata !{metadata !19, metadata !20}
+!46 = metadata !{metadata !27, metadata !28}
+!47 = metadata !{metadata !"ss3.c", metadata !"/private/tmp"}
diff --git a/test/CodeGen/ARM/2011-10-26-memset-with-neon.ll b/test/CodeGen/ARM/2011-10-26-memset-with-neon.ll
index 6e0ef9619657..f563eeef0180 100644
--- a/test/CodeGen/ARM/2011-10-26-memset-with-neon.ll
+++ b/test/CodeGen/ARM/2011-10-26-memset-with-neon.ll
@@ -1,13 +1,5 @@
 ; RUN: llc -march=arm -mcpu=cortex-a8 < %s | FileCheck %s
 
-; Should trigger a NEON store.
-; CHECK: vstr
-define void @f_0_12(i8* nocapture %c) nounwind optsize {
-entry:
-  call void @llvm.memset.p0i8.i64(i8* %c, i8 0, i64 12, i32 8, i1 false)
-  ret void
-}
-
 ; Trigger multiple NEON stores.
 ; CHECK:      vst1.64
 ; CHECK-NEXT: vst1.64
diff --git a/test/CodeGen/ARM/2011-11-29-128bitArithmetics.ll b/test/CodeGen/ARM/2011-11-29-128bitArithmetics.ll
index f9ede7401a3c..0d0d03b23e86 100644
--- a/test/CodeGen/ARM/2011-11-29-128bitArithmetics.ll
+++ b/test/CodeGen/ARM/2011-11-29-128bitArithmetics.ll
@@ -8,7 +8,7 @@ define void @test_sqrt(<4 x float>* %X) nounwind {
 
 ; CHECK:      movw    r1, :lower16:{{.*}}
 ; CHECK:      movt    r1, :upper16:{{.*}}
-; CHECK:      vld1.64 {{.*}}, [r1, :128]
+; CHECK:      vld1.64 {{.*}}, [r1:128]
 ; CHECK:      vsqrt.f32       {{s[0-9]+}}, {{s[0-9]+}}
 ; CHECK:      vsqrt.f32       {{s[0-9]+}}, {{s[0-9]+}}
 ; CHECK:      vsqrt.f32       {{s[0-9]+}}, {{s[0-9]+}}
@@ -252,7 +252,7 @@ define void @test_powi(<4 x float>* %X) nounwind {
 
 ; CHECK:       movw  [[reg0:r[0-9]+]], :lower16:{{.*}}
 ; CHECK:       movt  [[reg0]], :upper16:{{.*}}
-; CHECK:       vld1.64 {{.*}}, :128
+; CHECK:       vld1.64 {{.*}}:128
 ; CHECK:       vmul.f32 {{.*}}
 
 ; CHECK:      vst1.64
diff --git a/test/CodeGen/ARM/2011-12-14-machine-sink.ll b/test/CodeGen/ARM/2011-12-14-machine-sink.ll
index b21bb006e327..1b21f7571d8e 100644
--- a/test/CodeGen/ARM/2011-12-14-machine-sink.ll
+++ b/test/CodeGen/ARM/2011-12-14-machine-sink.ll
@@ -1,3 +1,4 @@
+; REQUIRES: asserts
 ; RUN: llc < %s -o /dev/null -stats 2>&1 | FileCheck %s -check-prefix=STATS
 ; Radar 10266272
 target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:32:64-v128:32:128-a0:0:32-n32-S32"
diff --git a/test/CodeGen/ARM/2012-06-12-SchedMemLatency.ll b/test/CodeGen/ARM/2012-06-12-SchedMemLatency.ll
index b05ec6367ee4..ca0964a05933 100644
--- a/test/CodeGen/ARM/2012-06-12-SchedMemLatency.ll
+++ b/test/CodeGen/ARM/2012-06-12-SchedMemLatency.ll
@@ -13,6 +13,7 @@
 ; CHECK-NOT: ch SU
 ; CHECK: ch SU(2): Latency=1
 ; CHECK-NOT: ch SU
+; CHECK: Successors:
 ; CHECK: ** List Scheduling
 ; CHECK: SU(2){{.*}}STR{{.*}}
 ; CHECK-NOT: ch SU
@@ -22,6 +23,7 @@
 ; CHECK-NOT: ch SU
 ; CHECK: ch SU(2): Latency=1
 ; CHECK-NOT: ch SU
+; CHECK: Successors:
 define i32 @f1(i32* nocapture %p1, i32* nocapture %p2) nounwind {
 entry:
   store volatile i32 65540, i32* %p1, align 4, !tbaa !0
diff --git a/test/CodeGen/ARM/2012-08-09-neon-extload.ll b/test/CodeGen/ARM/2012-08-09-neon-extload.ll
index b55f1cae7fe6..764c58f2e159 100644
--- a/test/CodeGen/ARM/2012-08-09-neon-extload.ll
+++ b/test/CodeGen/ARM/2012-08-09-neon-extload.ll
@@ -18,7 +18,7 @@ define void @test_v2i8tov2i32() {
 
   %i32val = sext <2 x i8> %i8val to <2 x i32>
   store <2 x i32> %i32val, <2 x i32>* @var_v2i32
-; CHECK: vld1.16 {d[[LOAD:[0-9]+]][0]}, [{{r[0-9]+}}, :16]
+; CHECK: vld1.16 {d[[LOAD:[0-9]+]][0]}, [{{r[0-9]+}}:16]
 ; CHECK: vmovl.s8 {{q[0-9]+}}, d[[LOAD]]
 ; CHECK: vmovl.s16 {{q[0-9]+}}, {{d[0-9]+}}
 
@@ -32,7 +32,7 @@ define void @test_v2i8tov2i64() {
 
   %i64val = sext <2 x i8> %i8val to <2 x i64>
   store <2 x i64> %i64val, <2 x i64>* @var_v2i64
-; CHECK: vld1.16 {d{{[0-9]+}}[0]}, [{{r[0-9]+}}, :16]
+; CHECK: vld1.16 {d{{[0-9]+}}[0]}, [{{r[0-9]+}}:16]
 ; CHECK: vmovl.s8 {{q[0-9]+}}, d[[LOAD]]
 ; CHECK: vmovl.s16 {{q[0-9]+}}, {{d[0-9]+}}
 ; CHECK: vmovl.s32 {{q[0-9]+}}, {{d[0-9]+}}
@@ -50,7 +50,7 @@ define void @test_v4i8tov4i16() {
 
   %i16val = sext <4 x i8> %i8val to <4 x i16>
   store <4 x i16> %i16val, <4 x i16>* @var_v4i16
-; CHECK: vld1.32 {d[[LOAD:[0-9]+]][0]}, [{{r[0-9]+}}, :32]
+; CHECK: vld1.32 {d[[LOAD:[0-9]+]][0]}, [{{r[0-9]+}}:32]
 ; CHECK: vmovl.s8 {{q[0-9]+}}, d[[LOAD]]
 ; CHECK-NOT: vmovl.s16
 
@@ -65,7 +65,7 @@ define void @test_v4i8tov4i32() {
 
   %i16val = sext <4 x i8> %i8val to <4 x i32>
   store <4 x i32> %i16val, <4 x i32>* @var_v4i32
-; CHECK: vld1.32 {d[[LOAD:[0-9]+]][0]}, [{{r[0-9]+}}, :32]
+; CHECK: vld1.32 {d[[LOAD:[0-9]+]][0]}, [{{r[0-9]+}}:32]
 ; CHECK: vmovl.s8 {{q[0-9]+}}, d[[LOAD]]
 ; CHECK: vmovl.s16 {{q[0-9]+}}, {{d[0-9]+}}
 
@@ -79,7 +79,7 @@ define void @test_v2i16tov2i32() {
 
   %i32val = sext <2 x i16> %i16val to <2 x i32>
   store <2 x i32> %i32val, <2 x i32>* @var_v2i32
-; CHECK: vld1.32 {d[[LOAD:[0-9]+]][0]}, [{{r[0-9]+}}, :32]
+; CHECK: vld1.32 {d[[LOAD:[0-9]+]][0]}, [{{r[0-9]+}}:32]
 ; CHECK: vmovl.s16 {{q[0-9]+}}, d[[LOAD]]
 ; CHECK-NOT: vmovl
 
@@ -94,7 +94,7 @@ define void @test_v2i16tov2i64() {
 
   %i64val = sext <2 x i16> %i16val to <2 x i64>
   store <2 x i64> %i64val, <2 x i64>* @var_v2i64
-; CHECK: vld1.32 {d[[LOAD:[0-9]+]][0]}, [{{r[0-9]+}}, :32]
+; CHECK: vld1.32 {d[[LOAD:[0-9]+]][0]}, [{{r[0-9]+}}:32]
 ; CHECK: vmovl.s16 {{q[0-9]+}}, d[[LOAD]]
 ; CHECK: vmovl.s32 {{q[0-9]+}}, d[[LOAD]]
 
diff --git a/test/CodeGen/ARM/2012-08-23-legalize-vmull.ll b/test/CodeGen/ARM/2012-08-23-legalize-vmull.ll
new file mode 100644
index 000000000000..2f55204aa407
--- /dev/null
+++ b/test/CodeGen/ARM/2012-08-23-legalize-vmull.ll
@@ -0,0 +1,150 @@
+; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s
+
+; PR12281
+; Test generataion of code for vmull instruction when multiplying 128-bit
+; vectors that were created by sign-extending smaller vector sizes.
+;
+; The vmull operation requires 64-bit vectors, so we must extend the original
+; vector size to 64 bits for vmull operation.
+; Previously failed with an assertion because the <4 x i8> vector was too small
+; for vmull.
+
+; Vector x Constant
+; v4i8
+;
+define void @sextload_v4i8_c(<4 x i8>* %v) nounwind {
+;CHECK: sextload_v4i8_c:
+entry:
+  %0 = load <4 x i8>* %v, align 8
+  %v0  = sext <4 x i8> %0 to <4 x i32>
+;CHECK: vmull
+  %v1 = mul <4 x i32>  %v0, <i32 3, i32 3, i32 3, i32 3>
+  store <4 x i32> %v1, <4 x i32>* undef, align 8
+  ret void;
+}
+
+; v2i8
+;
+define void @sextload_v2i8_c(<2 x i8>* %v) nounwind {
+;CHECK: sextload_v2i8_c:
+entry:
+  %0   = load <2 x i8>* %v, align 8
+  %v0  = sext <2 x i8>  %0 to <2 x i64>
+;CHECK: vmull
+  %v1  = mul <2 x i64>  %v0, <i64 3, i64 3>
+  store <2 x i64> %v1, <2 x i64>* undef, align 8
+  ret void;
+}
+
+; v2i16
+;
+define void @sextload_v2i16_c(<2 x i16>* %v) nounwind {
+;CHECK: sextload_v2i16_c:
+entry:
+  %0   = load <2 x i16>* %v, align 8
+  %v0  = sext <2 x i16>  %0 to <2 x i64>
+;CHECK: vmull
+  %v1  = mul <2 x i64>  %v0, <i64 3, i64 3>
+  store <2 x i64> %v1, <2 x i64>* undef, align 8
+  ret void;
+}
+
+
+; Vector x Vector
+; v4i8
+;
+define void @sextload_v4i8_v(<4 x i8>* %v, <4 x i8>* %p) nounwind {
+;CHECK: sextload_v4i8_v:
+entry:
+  %0 = load <4 x i8>* %v, align 8
+  %v0  = sext <4 x i8> %0 to <4 x i32>
+
+  %1  = load <4 x i8>* %p, align 8
+  %v2 = sext <4 x i8> %1 to <4 x i32>
+;CHECK: vmull
+  %v1 = mul <4 x i32>  %v0, %v2
+  store <4 x i32> %v1, <4 x i32>* undef, align 8
+  ret void;
+}
+
+; v2i8
+;
+define void @sextload_v2i8_v(<2 x i8>* %v, <2 x i8>* %p) nounwind {
+;CHECK: sextload_v2i8_v:
+entry:
+  %0 = load <2 x i8>* %v, align 8
+  %v0  = sext <2 x i8> %0 to <2 x i64>
+
+  %1  = load <2 x i8>* %p, align 8
+  %v2 = sext <2 x i8> %1 to <2 x i64>
+;CHECK: vmull
+  %v1 = mul <2 x i64>  %v0, %v2
+  store <2 x i64> %v1, <2 x i64>* undef, align 8
+  ret void;
+}
+
+; v2i16
+;
+define void @sextload_v2i16_v(<2 x i16>* %v, <2 x i16>* %p) nounwind {
+;CHECK: sextload_v2i16_v:
+entry:
+  %0 = load <2 x i16>* %v, align 8
+  %v0  = sext <2 x i16> %0 to <2 x i64>
+
+  %1  = load <2 x i16>* %p, align 8
+  %v2 = sext <2 x i16> %1 to <2 x i64>
+;CHECK: vmull
+  %v1 = mul <2 x i64>  %v0, %v2
+  store <2 x i64> %v1, <2 x i64>* undef, align 8
+  ret void;
+}
+
+
+; Vector(small) x Vector(big)
+; v4i8 x v4i16
+;
+define void @sextload_v4i8_vs(<4 x i8>* %v, <4 x i16>* %p) nounwind {
+;CHECK: sextload_v4i8_vs:
+entry:
+  %0 = load <4 x i8>* %v, align 8
+  %v0  = sext <4 x i8> %0 to <4 x i32>
+
+  %1  = load <4 x i16>* %p, align 8
+  %v2 = sext <4 x i16> %1 to <4 x i32>
+;CHECK: vmull
+  %v1 = mul <4 x i32>  %v0, %v2
+  store <4 x i32> %v1, <4 x i32>* undef, align 8
+  ret void;
+}
+
+; v2i8
+; v2i8 x v2i16
+define void @sextload_v2i8_vs(<2 x i8>* %v, <2 x i16>* %p) nounwind {
+;CHECK: sextload_v2i8_vs:
+entry:
+  %0 = load <2 x i8>* %v, align 8
+  %v0  = sext <2 x i8> %0 to <2 x i64>
+
+  %1  = load <2 x i16>* %p, align 8
+  %v2 = sext <2 x i16> %1 to <2 x i64>
+;CHECK: vmull
+  %v1 = mul <2 x i64>  %v0, %v2
+  store <2 x i64> %v1, <2 x i64>* undef, align 8
+  ret void;
+}
+
+; v2i16
+; v2i16 x v2i32
+define void @sextload_v2i16_vs(<2 x i16>* %v, <2 x i32>* %p) nounwind {
+;CHECK: sextload_v2i16_vs:
+entry:
+  %0 = load <2 x i16>* %v, align 8
+  %v0  = sext <2 x i16> %0 to <2 x i64>
+
+  %1  = load <2 x i32>* %p, align 8
+  %v2 = sext <2 x i32> %1 to <2 x i64>
+;CHECK: vmull
+  %v1 = mul <2 x i64>  %v0, %v2
+  store <2 x i64> %v1, <2 x i64>* undef, align 8
+  ret void;
+}
diff --git a/test/CodeGen/ARM/2012-10-04-AAPCS-byval-align8.ll b/test/CodeGen/ARM/2012-10-04-AAPCS-byval-align8.ll
index b5f6d311cb9c..b0644d17431d 100644
--- a/test/CodeGen/ARM/2012-10-04-AAPCS-byval-align8.ll
+++ b/test/CodeGen/ARM/2012-10-04-AAPCS-byval-align8.ll
@@ -19,7 +19,7 @@ entry:
 ; CHECK: bfc	[[REG]], #0, #3
   %0 = va_arg i8** %g, double
   call void @llvm.va_end(i8* %g1)
-  
+
   ret void
 }
 
diff --git a/test/CodeGen/ARM/2012-11-14-subs_carry.ll b/test/CodeGen/ARM/2012-11-14-subs_carry.ll
new file mode 100644
index 000000000000..38700f3a8d10
--- /dev/null
+++ b/test/CodeGen/ARM/2012-11-14-subs_carry.ll
@@ -0,0 +1,31 @@
+; RUN: llc < %s -mtriple=thumbv7-apple-ios | FileCheck %s
+
+;CHECK: foo
+;CHECK: adds
+;CHECK-NEXT: adc
+;CHECK-NEXT: bx
+
+;rdar://12028498
+
+define i32 @foo() nounwind ssp {
+entry:
+  %tmp2 = zext i32 3 to i64
+  br  label %bug_block
+
+bug_block:
+  %tmp410 = and i64 1031, 1647010
+  %tmp411 = and i64 %tmp2, -211
+  %tmp412 = shl i64 %tmp410, %tmp2
+  %tmp413 = shl i64 %tmp411, %tmp2
+  %tmp415 = and i64 %tmp413, 1
+  %tmp420 = xor i64 0, %tmp415
+  %tmp421 = and i64 %tmp412, %tmp415
+  %tmp422 = shl i64 %tmp421, 1
+  br  label %finish
+
+finish:
+  %tmp423 = lshr i64 %tmp422, 32
+  %tmp424 = trunc i64 %tmp423 to i32
+  ret i32 %tmp424
+}
+
diff --git a/test/CodeGen/ARM/2013-01-21-PR14992.ll b/test/CodeGen/ARM/2013-01-21-PR14992.ll
new file mode 100644
index 000000000000..38b9e0e8f086
--- /dev/null
+++ b/test/CodeGen/ARM/2013-01-21-PR14992.ll
@@ -0,0 +1,28 @@
+;PR14492 - Tablegen incorrectly converts ARM tLDMIA_UPD pseudo to tLDMIA
+;RUN: llc -mtriple=thumbv7 < %s  | FileCheck -check-prefix=EXPECTED %s
+;RUN: llc -mtriple=thumbv7 < %s  | FileCheck %s
+
+;EXPECTED: foo:
+;CHECK: foo:
+define i32 @foo(i32* %a) nounwind optsize {
+entry:
+  %0 = load i32* %a, align 4, !tbaa !0
+  %arrayidx1 = getelementptr inbounds i32* %a, i32 1
+  %1 = load i32* %arrayidx1, align 4, !tbaa !0
+  %arrayidx2 = getelementptr inbounds i32* %a, i32 2
+  %2 = load i32* %arrayidx2, align 4, !tbaa !0
+  %add.ptr = getelementptr inbounds i32* %a, i32 3
+;Make sure we do not have a duplicated register in the front of the reg list
+;EXPECTED:  ldm [[BASE:r[0-9]+]]!, {[[REG:r[0-9]+]], {{r[0-9]+}},
+;CHECK-NOT: ldm [[BASE:r[0-9]+]]!, {[[REG:r[0-9]+]], [[REG]],
+  tail call void @bar(i32* %add.ptr) nounwind optsize
+  %add = add nsw i32 %1, %0
+  %add3 = add nsw i32 %add, %2
+  ret i32 %add3
+}
+
+declare void @bar(i32*) optsize
+
+!0 = metadata !{metadata !"int", metadata !1}
+!1 = metadata !{metadata !"omnipotent char", metadata !2}
+!2 = metadata !{metadata !"Simple C/C++ TBAA"}
diff --git a/test/CodeGen/ARM/2013-02-27-expand-vfma.ll b/test/CodeGen/ARM/2013-02-27-expand-vfma.ll
new file mode 100644
index 000000000000..0e3bf2371061
--- /dev/null
+++ b/test/CodeGen/ARM/2013-02-27-expand-vfma.ll
@@ -0,0 +1,31 @@
+; RUN: llc < %s -mtriple=armv7-apple-darwin | FileCheck %s
+; RUN: llc < %s -mtriple=armv7s-apple-darwin | FileCheck %s -check-prefix=VFP4
+
+define <4 x float> @muladd(<4 x float> %a, <4 x float> %b, <4 x float> %c) nounwind {
+; CHECK: muladd:
+; CHECK: fmaf
+; CHECK: fmaf
+; CHECK: fmaf
+; CHECK: fmaf
+; CHECK-NOT: fmaf
+
+; CHECK-VFP4: vfma.f32
+  %tmp = tail call <4 x float> @llvm.fma.v4f32(<4 x float> %b, <4 x float> %c, <4 x float> %a) #2
+  ret <4 x float> %tmp
+}
+
+declare <4 x float> @llvm.fma.v4f32(<4 x float>, <4 x float>, <4 x float>) #1
+
+define <2 x float> @muladd2(<2 x float> %a, <2 x float> %b, <2 x float> %c) nounwind {
+; CHECK: muladd2:
+; CHECK: fmaf
+; CHECK: fmaf
+; CHECK-NOT: fmaf
+
+; CHECK-VFP4: vfma.f32
+  %tmp = tail call <2 x float> @llvm.fma.v2f32(<2 x float> %b, <2 x float> %c, <2 x float> %a) #2
+  ret <2 x float> %tmp
+}
+
+declare <2 x float> @llvm.fma.v2f32(<2 x float>, <2 x float>, <2 x float>) #1
+
diff --git a/test/CodeGen/ARM/2013-04-05-overridden-loads-PR14824.ll b/test/CodeGen/ARM/2013-04-05-overridden-loads-PR14824.ll
new file mode 100644
index 000000000000..2561686c1f83
--- /dev/null
+++ b/test/CodeGen/ARM/2013-04-05-overridden-loads-PR14824.ll
@@ -0,0 +1,110 @@
+; RUN: llc < %s -mtriple=thumbv7-none-linux-gnueabi -mcpu=cortex-a9 -mattr=+neon,+neonfp | FileCheck %s
+; The test is presented by Jiangning Liu.
+;CHECK-NOT: vldmia
+
+define void @sample_test(<8 x i64> * %secondSource, <8 x i64> * %source, <8 x i64> * %dest) nounwind {
+entry:
+  %s0 = load <8 x i64> * %source, align 64
+  %s1 = load <8 x i64> * %secondSource, align 64
+  %s2 = bitcast <8 x i64> %s0 to i512
+  %data.i.i.48.extract.shift = lshr i512 %s2, 384
+  %data.i.i.48.extract.trunc = trunc i512 %data.i.i.48.extract.shift to i64
+  %arrayidx64 = getelementptr inbounds <8 x i64> * %source, i32 6
+  %s120 = load <8 x i64> * %arrayidx64, align 64
+  %arrayidx67 = getelementptr inbounds <8 x i64> * %secondSource, i32 6
+  %s121 = load <8 x i64> * %arrayidx67, align 64
+  %s122 = bitcast <8 x i64> %s120 to i512
+  %data.i.i677.48.extract.shift = lshr i512 %s122, 384
+  %data.i.i677.48.extract.trunc = trunc i512 %data.i.i677.48.extract.shift to i64
+  %s123 = insertelement <8 x i64> undef, i64 %data.i.i677.48.extract.trunc, i32 0
+  %data.i.i677.32.extract.shift = lshr i512 %s122, 256
+  %data.i.i677.32.extract.trunc = trunc i512 %data.i.i677.32.extract.shift to i64
+  %s124 = insertelement <8 x i64> %s123, i64 %data.i.i677.32.extract.trunc, i32 1
+  %data.i.i677.16.extract.shift = lshr i512 %s122, 128
+  %data.i.i677.16.extract.trunc = trunc i512 %data.i.i677.16.extract.shift to i64
+  %s125 = insertelement <8 x i64> %s124, i64 %data.i.i677.16.extract.trunc, i32 2
+  %data.i.i677.56.extract.shift = lshr i512 %s122, 448
+  %data.i.i677.56.extract.trunc = trunc i512 %data.i.i677.56.extract.shift to i64
+  %s126 = insertelement <8 x i64> %s125, i64 %data.i.i677.56.extract.trunc, i32 3
+  %data.i.i677.24.extract.shift = lshr i512 %s122, 192
+  %data.i.i677.24.extract.trunc = trunc i512 %data.i.i677.24.extract.shift to i64
+  %s127 = insertelement <8 x i64> %s126, i64 %data.i.i677.24.extract.trunc, i32 4
+  %s128 = insertelement <8 x i64> %s127, i64 %data.i.i677.32.extract.trunc, i32 5
+  %s129 = insertelement <8 x i64> %s128, i64 %data.i.i677.16.extract.trunc, i32 6
+  %s130 = insertelement <8 x i64> %s129, i64 %data.i.i677.56.extract.trunc, i32 7
+  %s131 = bitcast <8 x i64> %s121 to i512
+  %data.i1.i676.48.extract.shift = lshr i512 %s131, 384
+  %data.i1.i676.48.extract.trunc = trunc i512 %data.i1.i676.48.extract.shift to i64
+  %s132 = insertelement <8 x i64> undef, i64 %data.i1.i676.48.extract.trunc, i32 0
+  %data.i1.i676.32.extract.shift = lshr i512 %s131, 256
+  %data.i1.i676.32.extract.trunc = trunc i512 %data.i1.i676.32.extract.shift to i64
+  %s133 = insertelement <8 x i64> %s132, i64 %data.i1.i676.32.extract.trunc, i32 1
+  %data.i1.i676.16.extract.shift = lshr i512 %s131, 128
+  %data.i1.i676.16.extract.trunc = trunc i512 %data.i1.i676.16.extract.shift to i64
+  %s134 = insertelement <8 x i64> %s133, i64 %data.i1.i676.16.extract.trunc, i32 2
+  %data.i1.i676.56.extract.shift = lshr i512 %s131, 448
+  %data.i1.i676.56.extract.trunc = trunc i512 %data.i1.i676.56.extract.shift to i64
+  %s135 = insertelement <8 x i64> %s134, i64 %data.i1.i676.56.extract.trunc, i32 3
+  %data.i1.i676.24.extract.shift = lshr i512 %s131, 192
+  %data.i1.i676.24.extract.trunc = trunc i512 %data.i1.i676.24.extract.shift to i64
+  %s136 = insertelement <8 x i64> %s135, i64 %data.i1.i676.24.extract.trunc, i32 4
+  %s137 = insertelement <8 x i64> %s136, i64 %data.i1.i676.32.extract.trunc, i32 5
+  %s138 = insertelement <8 x i64> %s137, i64 %data.i1.i676.16.extract.trunc, i32 6
+  %s139 = insertelement <8 x i64> %s138, i64 %data.i1.i676.56.extract.trunc, i32 7
+  %vecinit28.i.i699 = shufflevector <8 x i64> %s139, <8 x i64> %s130, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 12, i32 undef, i32 undef, i32 undef>
+  %vecinit35.i.i700 = shufflevector <8 x i64> %vecinit28.i.i699, <8 x i64> %s139, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 13, i32 undef, i32 undef>
+  %vecinit42.i.i701 = shufflevector <8 x i64> %vecinit35.i.i700, <8 x i64> %s139, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 14, i32 undef>
+  %vecinit49.i.i702 = shufflevector <8 x i64> %vecinit42.i.i701, <8 x i64> %s130, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 15>
+  %arrayidx72 = getelementptr inbounds <8 x i64> * %dest, i32 6
+  store <8 x i64> %vecinit49.i.i702, <8 x i64> * %arrayidx72, align 64
+  %arrayidx75 = getelementptr inbounds <8 x i64> * %source, i32 7
+  %s140 = load <8 x i64> * %arrayidx75, align 64
+  %arrayidx78 = getelementptr inbounds <8 x i64> * %secondSource, i32 7
+  %s141 = load <8 x i64> * %arrayidx78, align 64
+  %s142 = bitcast <8 x i64> %s140 to i512
+  %data.i.i650.32.extract.shift = lshr i512 %s142, 256
+  %data.i.i650.32.extract.trunc = trunc i512 %data.i.i650.32.extract.shift to i64
+  %s143 = insertelement <8 x i64> undef, i64 %data.i.i650.32.extract.trunc, i32 0
+  %s144 = insertelement <8 x i64> %s143, i64 %data.i.i650.32.extract.trunc, i32 1
+  %data.i.i650.16.extract.shift = lshr i512 %s142, 128
+  %data.i.i650.16.extract.trunc = trunc i512 %data.i.i650.16.extract.shift to i64
+  %s145 = insertelement <8 x i64> %s144, i64 %data.i.i650.16.extract.trunc, i32 2
+  %data.i.i650.8.extract.shift = lshr i512 %s142, 64
+  %data.i.i650.8.extract.trunc = trunc i512 %data.i.i650.8.extract.shift to i64
+  %s146 = insertelement <8 x i64> %s145, i64 %data.i.i650.8.extract.trunc, i32 3
+  %s147 = insertelement <8 x i64> %s146, i64 %data.i.i650.8.extract.trunc, i32 4
+  %data.i.i650.48.extract.shift = lshr i512 %s142, 384
+  %data.i.i650.48.extract.trunc = trunc i512 %data.i.i650.48.extract.shift to i64
+  %s148 = insertelement <8 x i64> %s147, i64 %data.i.i650.48.extract.trunc, i32 5
+  %s149 = insertelement <8 x i64> %s148, i64 %data.i.i650.16.extract.trunc, i32 6
+  %data.i.i650.0.extract.trunc = trunc i512 %s142 to i64
+  %s150 = insertelement <8 x i64> %s149, i64 %data.i.i650.0.extract.trunc, i32 7
+  %s151 = bitcast <8 x i64> %s141 to i512
+  %data.i1.i649.32.extract.shift = lshr i512 %s151, 256
+  %data.i1.i649.32.extract.trunc = trunc i512 %data.i1.i649.32.extract.shift to i64
+  %s152 = insertelement <8 x i64> undef, i64 %data.i1.i649.32.extract.trunc, i32 0
+  %s153 = insertelement <8 x i64> %s152, i64 %data.i1.i649.32.extract.trunc, i32 1
+  %data.i1.i649.16.extract.shift = lshr i512 %s151, 128
+  %data.i1.i649.16.extract.trunc = trunc i512 %data.i1.i649.16.extract.shift to i64
+  %s154 = insertelement <8 x i64> %s153, i64 %data.i1.i649.16.extract.trunc, i32 2
+  %data.i1.i649.8.extract.shift = lshr i512 %s151, 64
+  %data.i1.i649.8.extract.trunc = trunc i512 %data.i1.i649.8.extract.shift to i64
+  %s155 = insertelement <8 x i64> %s154, i64 %data.i1.i649.8.extract.trunc, i32 3
+  %s156 = insertelement <8 x i64> %s155, i64 %data.i1.i649.8.extract.trunc, i32 4
+  %data.i1.i649.48.extract.shift = lshr i512 %s151, 384
+  %data.i1.i649.48.extract.trunc = trunc i512 %data.i1.i649.48.extract.shift to i64
+  %s157 = insertelement <8 x i64> %s156, i64 %data.i1.i649.48.extract.trunc, i32 5
+  %s158 = insertelement <8 x i64> %s157, i64 %data.i1.i649.16.extract.trunc, i32 6
+  %data.i1.i649.0.extract.trunc = trunc i512 %s151 to i64
+  %s159 = insertelement <8 x i64> %s158, i64 %data.i1.i649.0.extract.trunc, i32 7
+  %vecinit7.i.i669 = shufflevector <8 x i64> %s159, <8 x i64> %s150, <8 x i32> <i32 0, i32 9, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
+  %vecinit14.i.i670 = shufflevector <8 x i64> %vecinit7.i.i669, <8 x i64> %s150, <8 x i32> <i32 0, i32 1, i32 10, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
+  %vecinit21.i.i671 = shufflevector <8 x i64> %vecinit14.i.i670, <8 x i64> %s150, <8 x i32> <i32 0, i32 1, i32 2, i32 11, i32 undef, i32 undef, i32 undef, i32 undef>
+  %vecinit28.i.i672 = shufflevector <8 x i64> %vecinit21.i.i671, <8 x i64> %s150, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 12, i32 undef, i32 undef, i32 undef>
+  %vecinit35.i.i673 = shufflevector <8 x i64> %vecinit28.i.i672, <8 x i64> %s159, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 13, i32 undef, i32 undef>
+  %vecinit42.i.i674 = shufflevector <8 x i64> %vecinit35.i.i673, <8 x i64> %s159, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 14, i32 undef>
+  %vecinit49.i.i675 = shufflevector <8 x i64> %vecinit42.i.i674, <8 x i64> %s159, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 15>
+  %arrayidx83 = getelementptr inbounds <8 x i64> * %dest, i32 7
+  store <8 x i64> %vecinit49.i.i675, <8 x i64> * %arrayidx83, align 64
+  ret void
+}
diff --git a/test/CodeGen/ARM/DbgValueOtherTargets.test b/test/CodeGen/ARM/DbgValueOtherTargets.test
new file mode 100644
index 000000000000..bf90891de0a7
--- /dev/null
+++ b/test/CodeGen/ARM/DbgValueOtherTargets.test
@@ -0,0 +1 @@
+RUN: llc -O0 -march=arm -asm-verbose < %S/../Inputs/DbgValueOtherTargets.ll | FileCheck %S/../Inputs/DbgValueOtherTargets.ll
diff --git a/test/CodeGen/ARM/MergeConsecutiveStores.ll b/test/CodeGen/ARM/MergeConsecutiveStores.ll
new file mode 100644
index 000000000000..06c87e986a83
--- /dev/null
+++ b/test/CodeGen/ARM/MergeConsecutiveStores.ll
@@ -0,0 +1,98 @@
+; RUN: llc < %s -mtriple=armv7-apple-darwin | FileCheck %s
+
+; Make sure that we merge the consecutive load/store sequence below and use a
+; word (16 bit) instead of a byte copy.
+; CHECK: MergeLoadStoreBaseIndexOffset
+; CHECK: ldrh    [[REG:r[0-9]+]], [{{.*}}]
+; CHECK: strh    [[REG]], [r1], #2
+define void @MergeLoadStoreBaseIndexOffset(i32* %a, i8* %b, i8* %c, i32 %n) {
+  br label %1
+
+; <label>:1
+  %.09 = phi i32 [ %n, %0 ], [ %11, %1 ]
+  %.08 = phi i8* [ %b, %0 ], [ %10, %1 ]
+  %.0 = phi i32* [ %a, %0 ], [ %2, %1 ]
+  %2 = getelementptr inbounds i32* %.0, i32 1
+  %3 = load i32* %.0, align 1
+  %4 = getelementptr inbounds i8* %c, i32 %3
+  %5 = load i8* %4, align 1
+  %6 = add i32 %3, 1
+  %7 = getelementptr inbounds i8* %c, i32 %6
+  %8 = load i8* %7, align 1
+  store i8 %5, i8* %.08, align 1
+  %9 = getelementptr inbounds i8* %.08, i32 1
+  store i8 %8, i8* %9, align 1
+  %10 = getelementptr inbounds i8* %.08, i32 2
+  %11 = add nsw i32 %.09, -1
+  %12 = icmp eq i32 %11, 0
+  br i1 %12, label %13, label %1
+
+; <label>:13
+  ret void
+}
+
+; Make sure that we merge the consecutive load/store sequence below and use a
+; word (16 bit) instead of a byte copy even if there are intermediate sign
+; extensions.
+; CHECK: MergeLoadStoreBaseIndexOffsetSext
+; CHECK: ldrh    [[REG:r[0-9]+]], [{{.*}}]
+; CHECK: strh    [[REG]], [r1], #2
+define void @MergeLoadStoreBaseIndexOffsetSext(i8* %a, i8* %b, i8* %c, i32 %n) {
+  br label %1
+
+; <label>:1
+  %.09 = phi i32 [ %n, %0 ], [ %12, %1 ]
+  %.08 = phi i8* [ %b, %0 ], [ %11, %1 ]
+  %.0 = phi i8* [ %a, %0 ], [ %2, %1 ]
+  %2 = getelementptr inbounds i8* %.0, i32 1
+  %3 = load i8* %.0, align 1
+  %4 = sext i8 %3 to i32
+  %5 = getelementptr inbounds i8* %c, i32 %4
+  %6 = load i8* %5, align 1
+  %7 = add i32 %4, 1
+  %8 = getelementptr inbounds i8* %c, i32 %7
+  %9 = load i8* %8, align 1
+  store i8 %6, i8* %.08, align 1
+  %10 = getelementptr inbounds i8* %.08, i32 1
+  store i8 %9, i8* %10, align 1
+  %11 = getelementptr inbounds i8* %.08, i32 2
+  %12 = add nsw i32 %.09, -1
+  %13 = icmp eq i32 %12, 0
+  br i1 %13, label %14, label %1
+
+; <label>:14
+  ret void
+}
+
+; However, we can only merge ignore sign extensions when they are on all memory
+; computations;
+; CHECK: loadStoreBaseIndexOffsetSextNoSex
+; CHECK-NOT: ldrh    [[REG:r[0-9]+]], [{{.*}}]
+; CHECK-NOT: strh    [[REG]], [r1], #2
+define void @loadStoreBaseIndexOffsetSextNoSex(i8* %a, i8* %b, i8* %c, i32 %n) {
+  br label %1
+
+; <label>:1
+  %.09 = phi i32 [ %n, %0 ], [ %12, %1 ]
+  %.08 = phi i8* [ %b, %0 ], [ %11, %1 ]
+  %.0 = phi i8* [ %a, %0 ], [ %2, %1 ]
+  %2 = getelementptr inbounds i8* %.0, i32 1
+  %3 = load i8* %.0, align 1
+  %4 = sext i8 %3 to i32
+  %5 = getelementptr inbounds i8* %c, i32 %4
+  %6 = load i8* %5, align 1
+  %7 = add i8 %3, 1
+  %wrap.4 = sext i8 %7 to i32
+  %8 = getelementptr inbounds i8* %c, i32 %wrap.4
+  %9 = load i8* %8, align 1
+  store i8 %6, i8* %.08, align 1
+  %10 = getelementptr inbounds i8* %.08, i32 1
+  store i8 %9, i8* %10, align 1
+  %11 = getelementptr inbounds i8* %.08, i32 2
+  %12 = add nsw i32 %.09, -1
+  %13 = icmp eq i32 %12, 0
+  br i1 %13, label %14, label %1
+
+; <label>:14
+  ret void
+}
diff --git a/test/CodeGen/ARM/PR15053.ll b/test/CodeGen/ARM/PR15053.ll
new file mode 100644
index 000000000000..706a90efe3a8
--- /dev/null
+++ b/test/CodeGen/ARM/PR15053.ll
@@ -0,0 +1,13 @@
+; RUN: llc -mtriple=armv7 < %s
+; PR15053
+
+declare i32 @llvm.arm.strexd(i32, i32, i8*) nounwind
+declare { i32, i32 } @llvm.arm.ldrexd(i8*) nounwind readonly
+
+define void @foo() {
+entry:
+  %0 = tail call { i32, i32 } @llvm.arm.ldrexd(i8* undef) nounwind
+  %1 = extractvalue { i32, i32 } %0, 0
+  %2 = tail call i32 @llvm.arm.strexd(i32 %1, i32 undef, i8* undef) nounwind
+  ret void
+}
diff --git a/test/CodeGen/ARM/a15-SD-dep.ll b/test/CodeGen/ARM/a15-SD-dep.ll
new file mode 100644
index 000000000000..a52468e5be9e
--- /dev/null
+++ b/test/CodeGen/ARM/a15-SD-dep.ll
@@ -0,0 +1,58 @@
+; RUN: llc -O1 -mcpu=cortex-a15 -mtriple=armv7-linux-gnueabi -disable-a15-sd-optimization -verify-machineinstrs < %s  | FileCheck -check-prefix=DISABLED %s
+; RUN: llc -O1 -mcpu=cortex-a15 -mtriple=armv7-linux-gnueabi -verify-machineinstrs < %s | FileCheck -check-prefix=ENABLED %s
+
+; CHECK-ENABLED: t1:
+; CHECK-DISABLED: t1:
+define <2 x float> @t1(float %f) {
+  ; CHECK-ENABLED: vdup.32 d{{[0-9]*}}, d0[0]
+  ; CHECK-DISABLED-NOT: vdup.32 d{{[0-9]*}}, d0[0]
+  %i1 = insertelement <2 x float> undef, float %f, i32 1
+  %i2 = fadd <2 x float> %i1, %i1
+  ret <2 x float> %i2
+}
+
+; CHECK-ENABLED: t2:
+; CHECK-DISABLED: t2:
+define <4 x float> @t2(float %g, float %f) {
+  ; CHECK-ENABLED: vdup.32 q{{[0-9]*}}, d0[0]
+  ; CHECK-DISABLED-NOT: vdup.32 d{{[0-9]*}}, d0[0]
+  %i1 = insertelement <4 x float> undef, float %f, i32 1
+  %i2 = fadd <4 x float> %i1, %i1
+  ret <4 x float> %i2
+}
+
+; CHECK-ENABLED: t3:
+; CHECK-DISABLED: t3:
+define arm_aapcs_vfpcc <2 x float> @t3(float %f) {
+  ; CHECK-ENABLED: vdup.32 d{{[0-9]*}}, d0[0] 
+  ; CHECK-DISABLED-NOT: vdup.32 d{{[0-9]*}}, d0[0]
+  %i1 = insertelement <2 x float> undef, float %f, i32 1
+  %i2 = fadd <2 x float> %i1, %i1
+  ret <2 x float> %i2
+}
+
+; CHECK-ENABLED: t4:
+; CHECK-DISABLED: t4:
+define <2 x float> @t4(float %f) {
+  ; CHECK-ENABLED: vdup.32 d{{[0-9]*}}, d0[0]
+  ; CHECK-DISABLED-NOT: vdup
+  %i1 = insertelement <2 x float> undef, float %f, i32 1
+  br label %b
+
+  ; Block %b has an S-reg as live-in.
+b:
+  %i2 = fadd <2 x float> %i1, %i1
+  ret <2 x float> %i2
+}
+
+; CHECK-ENABLED: t5:
+; CHECK-DISABLED: t5:
+define arm_aapcs_vfpcc <4 x float> @t5(<4 x float> %q, float %f) {
+  ; CHECK-ENABLED: vdup.32 d{{[0-9]*}}, d{{[0-9]*}}[0]
+  ; CHECK-ENABLED: vadd.f32
+  ; CHECK-ENABLED-NEXT: bx lr
+  ; CHECK-DISABLED-NOT: vdup
+  %i1 = insertelement <4 x float> %q, float %f, i32 1
+  %i2 = fadd <4 x float> %i1, %i1
+  ret <4 x float> %i2
+}
diff --git a/test/CodeGen/ARM/a15-partial-update.ll b/test/CodeGen/ARM/a15-partial-update.ll
new file mode 100644
index 000000000000..6306790d15f0
--- /dev/null
+++ b/test/CodeGen/ARM/a15-partial-update.ll
@@ -0,0 +1,38 @@
+; RUN: llc -O1 -mcpu=cortex-a15 -mtriple=armv7-linux-gnueabi -verify-machineinstrs < %s  | FileCheck %s
+
+; CHECK: t1:
+define <2 x float> @t1(float* %A, <2 x float> %B) {
+; The generated code for this test uses a vld1.32 instruction
+; to write the lane 1 of a D register containing the value of
+; <2 x float> %B. Since the D register is defined, it would
+; be incorrect to fully write it (with a vmov.f64) before the
+; vld1.32 instruction. The test checks that a vmov.f64 was not
+; generated.
+
+; CHECK-NOT: vmov.{{.*}} d{{[0-9]+}},
+  %tmp2 = load float* %A, align 4
+  %tmp3 = insertelement <2 x float> %B, float %tmp2, i32 1
+  ret <2 x float> %tmp3
+}
+
+; CHECK: t2:
+define void @t2(<4 x i8> *%in, <4 x i8> *%out, i32 %n) {
+entry:
+  br label %loop
+loop:
+; The code generated by this test uses a vld1.32 instruction.
+; We check that a dependency breaking vmov* instruction was
+; generated.
+
+; CHECK: vmov.{{.*}} d{{[0-9]+}},
+  %oldcount = phi i32 [0, %entry], [%newcount, %loop]
+  %newcount = add i32 %oldcount, 1
+  %p1 = getelementptr <4 x i8> *%in, i32 %newcount
+  %p2 = getelementptr <4 x i8> *%out, i32 %newcount
+  %tmp1 = load <4 x i8> *%p1, align 4
+  store <4 x i8> %tmp1, <4 x i8> *%p2
+  %cmp = icmp eq i32 %newcount, %n
+  br i1 %cmp, label %loop, label %ret
+ret:
+  ret void
+}
diff --git a/test/CodeGen/ARM/addrmode.ll b/test/CodeGen/ARM/addrmode.ll
index 6da90897b94b..748d25804447 100644
--- a/test/CodeGen/ARM/addrmode.ll
+++ b/test/CodeGen/ARM/addrmode.ll
@@ -1,3 +1,4 @@
+; REQUIRES: asserts
 ; RUN: llc < %s -march=arm -stats 2>&1 | grep asm-printer | grep 4
 
 define i32 @t1(i32 %a) {
diff --git a/test/CodeGen/ARM/alloc-no-stack-realign.ll b/test/CodeGen/ARM/alloc-no-stack-realign.ll
new file mode 100644
index 000000000000..273041dee34e
--- /dev/null
+++ b/test/CodeGen/ARM/alloc-no-stack-realign.ll
@@ -0,0 +1,48 @@
+; RUN: llc < %s -mtriple=armv7-apple-ios -O0 -realign-stack=0 | FileCheck %s -check-prefix=NO-REALIGN
+; RUN: llc < %s -mtriple=armv7-apple-ios -O0 | FileCheck %s
+
+; rdar://12713765
+; When realign-stack is set to false, make sure we are not creating stack
+; objects that are assumed to be 64-byte aligned.
+@T3_retval = common global <16 x float> zeroinitializer, align 16
+
+define void @test(<16 x float>* noalias sret %agg.result) nounwind ssp {
+entry:
+; CHECK: test
+; CHECK: bic sp, sp, #63
+; CHECK: orr [[R2:r[0-9]+]], [[R1:r[0-9]+]], #48
+; CHECK: vst1.64
+; CHECK: orr [[R2:r[0-9]+]], [[R1:r[0-9]+]], #32
+; CHECK: vst1.64
+; CHECK: orr [[R2:r[0-9]+]], [[R1:r[0-9]+]], #16
+; CHECK: vst1.64
+; CHECK: vst1.64
+; CHECK: add [[R2:r[0-9]+]], [[R1:r[0-9]+]], #48
+; CHECK: vst1.64
+; CHECK: add [[R2:r[0-9]+]], [[R1:r[0-9]+]], #32
+; CHECK: vst1.64
+; CHECK: add [[R2:r[0-9]+]], [[R1:r[0-9]+]], #16
+; CHECK: vst1.64
+; CHECK: vst1.64
+; NO-REALIGN: test
+; NO-REALIGN: add [[R2:r[0-9]+]], [[R1:r[0-9]+]], #48
+; NO-REALIGN: vst1.64
+; NO-REALIGN: add [[R2:r[0-9]+]], [[R1:r[0-9]+]], #32
+; NO-REALIGN: vst1.64
+; NO-REALIGN: add [[R2:r[0-9]+]], [[R1:r[0-9]+]], #16
+; NO-REALIGN: vst1.64
+; NO-REALIGN: vst1.64
+; NO-REALIGN: add [[R2:r[0-9]+]], [[R1:r[0-9]+]], #48
+; NO-REALIGN: vst1.64
+; NO-REALIGN: add [[R2:r[0-9]+]], [[R1:r[0-9]+]], #32
+; NO-REALIGN: vst1.64
+; NO-REALIGN: add [[R2:r[0-9]+]], [[R1:r[0-9]+]], #16
+; NO-REALIGN: vst1.64
+; NO-REALIGN: vst1.64
+ %retval = alloca <16 x float>, align 16
+ %0 = load <16 x float>* @T3_retval, align 16
+ store <16 x float> %0, <16 x float>* %retval
+ %1 = load <16 x float>* %retval
+ store <16 x float> %1, <16 x float>* %agg.result, align 16
+ ret void
+}
diff --git a/test/CodeGen/ARM/arm-modifier.ll b/test/CodeGen/ARM/arm-modifier.ll
index 5e12d8e03555..c74701663459 100644
--- a/test/CodeGen/ARM/arm-modifier.ll
+++ b/test/CodeGen/ARM/arm-modifier.ll
@@ -61,8 +61,7 @@ ret void
 define i64 @f4(i64* %val) nounwind {
 entry:
   ;CHECK: f4
-  ;CHECK: ldrexd [[REG1:(r[0-9]?[02468])]], {{r[0-9]?[13579]}}, [r0]
-  ;CHECK: mov r0, [[REG1]]
+  ;CHECK: ldrexd [[REG1:(r[0-9]?[02468])]], {{r[0-9]?[13579]}}, [r{{[0-9]+}}]
   %0 = tail call i64 asm sideeffect "ldrexd $0, ${0:H}, [$1]", "=&r,r,*Qo"(i64* %val, i64* %val) nounwind
   ret i64 %0
 }
diff --git a/test/CodeGen/ARM/arm-ttype-target2.ll b/test/CodeGen/ARM/arm-ttype-target2.ll
new file mode 100644
index 000000000000..8b5087f89c04
--- /dev/null
+++ b/test/CodeGen/ARM/arm-ttype-target2.ll
@@ -0,0 +1,44 @@
+; RUN: llc -mtriple=armv7-none-linux-gnueabi -arm-enable-ehabi -arm-enable-ehabi-descriptors < %s | FileCheck %s 
+
+@_ZTVN10__cxxabiv117__class_type_infoE = external global i8*
+@_ZTS3Foo = linkonce_odr constant [5 x i8] c"3Foo\00"
+@_ZTI3Foo = linkonce_odr unnamed_addr constant { i8*, i8* } { i8* bitcast (i8** getelementptr inbounds (i8** @_ZTVN10__cxxabiv117__class_type_infoE, i32 2) to i8*), i8* getelementptr inbounds ([5 x i8]* @_ZTS3Foo, i32 0, i32 0) }
+
+define i32 @main() {
+entry:
+  invoke void @_Z3foov()
+          to label %return unwind label %lpad
+
+lpad:                                             ; preds = %entry
+  %0 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*)
+          catch i8* bitcast ({ i8*, i8* }* @_ZTI3Foo to i8*)
+  %1 = extractvalue { i8*, i32 } %0, 1
+  %2 = tail call i32 @llvm.eh.typeid.for(i8* bitcast ({ i8*, i8* }* @_ZTI3Foo to i8*)) nounwind
+; CHECK: _ZTI3Foo(target2)
+
+  %matches = icmp eq i32 %1, %2
+  br i1 %matches, label %catch, label %eh.resume
+
+catch:                                            ; preds = %lpad
+  %3 = extractvalue { i8*, i32 } %0, 0
+  %4 = tail call i8* @__cxa_begin_catch(i8* %3) nounwind
+  tail call void @__cxa_end_catch()
+  br label %return
+
+return:                                           ; preds = %entry, %catch
+  %retval.0 = phi i32 [ 1, %catch ], [ 0, %entry ]
+  ret i32 %retval.0
+
+eh.resume:                                        ; preds = %lpad
+  resume { i8*, i32 } %0
+}
+
+declare void @_Z3foov()
+
+declare i32 @__gxx_personality_v0(...)
+
+declare i32 @llvm.eh.typeid.for(i8*) nounwind readnone
+
+declare i8* @__cxa_begin_catch(i8*)
+
+declare void @__cxa_end_catch()
diff --git a/test/CodeGen/ARM/atomic-64bit.ll b/test/CodeGen/ARM/atomic-64bit.ll
index e9609ac0f9ef..f2c7305ff33a 100644
--- a/test/CodeGen/ARM/atomic-64bit.ll
+++ b/test/CodeGen/ARM/atomic-64bit.ll
@@ -1,98 +1,176 @@
 ; RUN: llc < %s -mtriple=armv7-apple-ios | FileCheck %s
+; RUN: llc < %s -mtriple=thumbv7-none-linux-gnueabihf | FileCheck %s --check-prefix=CHECK-THUMB
 
 define i64 @test1(i64* %ptr, i64 %val) {
-; CHECK: test1
+; CHECK: test1:
 ; CHECK: dmb ish
-; CHECK: ldrexd r2, r3
-; CHECK: adds r0, r2
-; CHECK: adc r1, r3
-; CHECK: strexd {{[a-z0-9]+}}, r0, r1
+; CHECK: ldrexd [[REG1:(r[0-9]?[02468])]], [[REG2:(r[0-9]?[13579])]]
+; CHECK: adds [[REG3:(r[0-9]?[02468])]], [[REG1]]
+; CHECK: adc [[REG4:(r[0-9]?[13579])]], [[REG2]]
+; CHECK: strexd {{[a-z0-9]+}}, [[REG3]], [[REG4]]
 ; CHECK: cmp
 ; CHECK: bne
 ; CHECK: dmb ish
+
+; CHECK-THUMB: test1:
+; CHECK-THUMB: dmb ish
+; CHECK-THUMB: ldrexd [[REG1:[a-z0-9]+]], [[REG2:[a-z0-9]+]]
+; CHECK-THUMB: adds.w [[REG3:[a-z0-9]+]], [[REG1]]
+; CHECK-THUMB: adc.w [[REG4:[a-z0-9]+]], [[REG2]]
+; CHECK-THUMB: strexd {{[a-z0-9]+}}, [[REG3]], [[REG4]]
+; CHECK-THUMB: cmp
+; CHECK-THUMB: bne
+; CHECK-THUMB: dmb ish
+
   %r = atomicrmw add i64* %ptr, i64 %val seq_cst
   ret i64 %r
 }
 
 define i64 @test2(i64* %ptr, i64 %val) {
-; CHECK: test2
+; CHECK: test2:
 ; CHECK: dmb ish
-; CHECK: ldrexd r2, r3
-; CHECK: subs r0, r2
-; CHECK: sbc r1, r3
-; CHECK: strexd {{[a-z0-9]+}}, r0, r1
+; CHECK: ldrexd [[REG1:(r[0-9]?[02468])]], [[REG2:(r[0-9]?[13579])]]
+; CHECK: subs [[REG3:(r[0-9]?[02468])]], [[REG1]]
+; CHECK: sbc [[REG4:(r[0-9]?[13579])]], [[REG2]]
+; CHECK: strexd {{[a-z0-9]+}}, [[REG3]], [[REG4]]
 ; CHECK: cmp
 ; CHECK: bne
 ; CHECK: dmb ish
+
+; CHECK-THUMB: test2:
+; CHECK-THUMB: dmb ish
+; CHECK-THUMB: ldrexd [[REG1:[a-z0-9]+]], [[REG2:[a-z0-9]+]]
+; CHECK-THUMB: subs.w [[REG3:[a-z0-9]+]], [[REG1]]
+; CHECK-THUMB: sbc.w [[REG4:[a-z0-9]+]], [[REG2]]
+; CHECK-THUMB: strexd {{[a-z0-9]+}}, [[REG3]], [[REG4]]
+; CHECK-THUMB: cmp
+; CHECK-THUMB: bne
+; CHECK-THUMB: dmb ish
+
   %r = atomicrmw sub i64* %ptr, i64 %val seq_cst
   ret i64 %r
 }
 
 define i64 @test3(i64* %ptr, i64 %val) {
-; CHECK: test3
+; CHECK: test3:
 ; CHECK: dmb ish
-; CHECK: ldrexd r2, r3
-; CHECK: and r0, r2
-; CHECK: and r1, r3
-; CHECK: strexd {{[a-z0-9]+}}, r0, r1
+; CHECK: ldrexd [[REG1:(r[0-9]?[02468])]], [[REG2:(r[0-9]?[13579])]]
+; CHECK: and [[REG3:(r[0-9]?[02468])]], [[REG1]]
+; CHECK: and [[REG4:(r[0-9]?[13579])]], [[REG2]]
+; CHECK: strexd {{[a-z0-9]+}}, [[REG3]], [[REG4]]
 ; CHECK: cmp
 ; CHECK: bne
 ; CHECK: dmb ish
+
+; CHECK-THUMB: test3:
+; CHECK-THUMB: dmb ish
+; CHECK-THUMB: ldrexd [[REG1:[a-z0-9]+]], [[REG2:[a-z0-9]+]]
+; CHECK-THUMB: and.w [[REG3:[a-z0-9]+]], [[REG1]]
+; CHECK-THUMB: and.w [[REG4:[a-z0-9]+]], [[REG2]]
+; CHECK-THUMB: strexd {{[a-z0-9]+}}, [[REG3]], [[REG4]]
+; CHECK-THUMB: cmp
+; CHECK-THUMB: bne
+; CHECK-THUMB: dmb ish
+
   %r = atomicrmw and i64* %ptr, i64 %val seq_cst
   ret i64 %r
 }
 
 define i64 @test4(i64* %ptr, i64 %val) {
-; CHECK: test4
+; CHECK: test4:
 ; CHECK: dmb ish
-; CHECK: ldrexd r2, r3
-; CHECK: orr r0, r2
-; CHECK: orr r1, r3
-; CHECK: strexd {{[a-z0-9]+}}, r0, r1
+; CHECK: ldrexd [[REG1:(r[0-9]?[02468])]], [[REG2:(r[0-9]?[13579])]]
+; CHECK: orr [[REG3:(r[0-9]?[02468])]], [[REG1]]
+; CHECK: orr [[REG4:(r[0-9]?[13579])]], [[REG2]]
+; CHECK: strexd {{[a-z0-9]+}}, [[REG3]], [[REG4]]
 ; CHECK: cmp
 ; CHECK: bne
 ; CHECK: dmb ish
+
+; CHECK-THUMB: test4:
+; CHECK-THUMB: dmb ish
+; CHECK-THUMB: ldrexd [[REG1:[a-z0-9]+]], [[REG2:[a-z0-9]+]]
+; CHECK-THUMB: orr.w [[REG3:[a-z0-9]+]], [[REG1]]
+; CHECK-THUMB: orr.w [[REG4:[a-z0-9]+]], [[REG2]]
+; CHECK-THUMB: strexd {{[a-z0-9]+}}, [[REG3]], [[REG4]]
+; CHECK-THUMB: cmp
+; CHECK-THUMB: bne
+; CHECK-THUMB: dmb ish
+
   %r = atomicrmw or i64* %ptr, i64 %val seq_cst
   ret i64 %r
 }
 
 define i64 @test5(i64* %ptr, i64 %val) {
-; CHECK: test5
+; CHECK: test5:
 ; CHECK: dmb ish
-; CHECK: ldrexd r2, r3
-; CHECK: eor r0, r2
-; CHECK: eor r1, r3
-; CHECK: strexd {{[a-z0-9]+}}, r0, r1
+; CHECK: ldrexd [[REG1:(r[0-9]?[02468])]], [[REG2:(r[0-9]?[13579])]]
+; CHECK: eor [[REG3:(r[0-9]?[02468])]], [[REG1]]
+; CHECK: eor [[REG4:(r[0-9]?[13579])]], [[REG2]]
+; CHECK: strexd {{[a-z0-9]+}}, [[REG3]], [[REG4]]
 ; CHECK: cmp
 ; CHECK: bne
 ; CHECK: dmb ish
+
+; CHECK-THUMB: test5:
+; CHECK-THUMB: dmb ish
+; CHECK-THUMB: ldrexd [[REG1:[a-z0-9]+]], [[REG2:[a-z0-9]+]]
+; CHECK-THUMB: eor.w [[REG3:[a-z0-9]+]], [[REG1]]
+; CHECK-THUMB: eor.w [[REG4:[a-z0-9]+]], [[REG2]]
+; CHECK-THUMB: strexd {{[a-z0-9]+}}, [[REG3]], [[REG4]]
+; CHECK-THUMB: cmp
+; CHECK-THUMB: bne
+; CHECK-THUMB: dmb ish
+
   %r = atomicrmw xor i64* %ptr, i64 %val seq_cst
   ret i64 %r
 }
 
 define i64 @test6(i64* %ptr, i64 %val) {
-; CHECK: test6
+; CHECK: test6:
 ; CHECK: dmb ish
-; CHECK: ldrexd r2, r3
-; CHECK: strexd {{[a-z0-9]+}}, r0, r1
+; CHECK: ldrexd [[REG1:(r[0-9]?[02468])]], [[REG2:(r[0-9]?[13579])]]
+; CHECK: strexd {{[a-z0-9]+}}, {{r[0-9]?[02468]}}, {{r[0-9]?[13579]}}
 ; CHECK: cmp
 ; CHECK: bne
 ; CHECK: dmb ish
+
+; CHECK-THUMB: test6:
+; CHECK-THUMB: dmb ish
+; CHECK-THUMB: ldrexd [[REG1:[a-z0-9]+]], [[REG2:[a-z0-9]+]]
+; CHECK-THUMB: strexd {{[a-z0-9]+}}, {{[a-z0-9]+}}, {{[a-z0-9]+}}
+; CHECK-THUMB: cmp
+; CHECK-THUMB: bne
+; CHECK-THUMB: dmb ish
+
   %r = atomicrmw xchg i64* %ptr, i64 %val seq_cst
   ret i64 %r
 }
 
 define i64 @test7(i64* %ptr, i64 %val1, i64 %val2) {
-; CHECK: test7
+; CHECK: test7:
 ; CHECK: dmb ish
-; CHECK: ldrexd r2, r3
-; CHECK: cmp r2
-; CHECK: cmpeq r3
+; CHECK: ldrexd [[REG1:(r[0-9]?[02468])]], [[REG2:(r[0-9]?[13579])]]
+; CHECK: cmp [[REG1]]
+; CHECK: cmpeq [[REG2]]
 ; CHECK: bne
-; CHECK: strexd {{[a-z0-9]+}}, r0, r1
+; CHECK: strexd {{[a-z0-9]+}}, {{r[0-9]?[02468]}}, {{r[0-9]?[13579]}}
 ; CHECK: cmp
 ; CHECK: bne
 ; CHECK: dmb ish
+
+; CHECK-THUMB: test7:
+; CHECK-THUMB: dmb ish
+; CHECK-THUMB: ldrexd [[REG1:[a-z0-9]+]], [[REG2:[a-z0-9]+]]
+; CHECK-THUMB: cmp [[REG1]]
+; CHECK-THUMB: it eq
+; CHECK-THUMB: cmpeq [[REG2]]
+; CHECK-THUMB: bne
+; CHECK-THUMB: strexd {{[a-z0-9]+}}, {{[a-z0-9]+}}, {{[a-z0-9]+}}
+; CHECK-THUMB: cmp
+; CHECK-THUMB: bne
+; CHECK-THUMB: dmb ish
+
   %r = cmpxchg i64* %ptr, i64 %val1, i64 %val2 seq_cst
   ret i64 %r
 }
@@ -100,15 +178,27 @@ define i64 @test7(i64* %ptr, i64 %val1, i64 %val2) {
 ; Compiles down to cmpxchg
 ; FIXME: Should compile to a single ldrexd
 define i64 @test8(i64* %ptr) {
-; CHECK: test8
-; CHECK: ldrexd r2, r3
-; CHECK: cmp r2
-; CHECK: cmpeq r3
+; CHECK: test8:
+; CHECK: ldrexd [[REG1:(r[0-9]?[02468])]], [[REG2:(r[0-9]?[13579])]]
+; CHECK: cmp [[REG1]]
+; CHECK: cmpeq [[REG2]]
 ; CHECK: bne
-; CHECK: strexd {{[a-z0-9]+}}, r0, r1
+; CHECK: strexd {{[a-z0-9]+}}, {{r[0-9]?[02468]}}, {{r[0-9]?[13579]}}
 ; CHECK: cmp
 ; CHECK: bne
 ; CHECK: dmb ish
+
+; CHECK-THUMB: test8:
+; CHECK-THUMB: ldrexd [[REG1:[a-z0-9]+]], [[REG2:[a-z0-9]+]]
+; CHECK-THUMB: cmp [[REG1]]
+; CHECK-THUMB: it eq
+; CHECK-THUMB: cmpeq [[REG2]]
+; CHECK-THUMB: bne
+; CHECK-THUMB: strexd {{[a-z0-9]+}}, {{[a-z0-9]+}}, {{[a-z0-9]+}}
+; CHECK-THUMB: cmp
+; CHECK-THUMB: bne
+; CHECK-THUMB: dmb ish
+
   %r = load atomic i64* %ptr seq_cst, align 8
   ret i64 %r
 }
@@ -116,13 +206,131 @@ define i64 @test8(i64* %ptr) {
 ; Compiles down to atomicrmw xchg; there really isn't any more efficient
 ; way to write it.
 define void @test9(i64* %ptr, i64 %val) {
-; CHECK: test9
+; CHECK: test9:
 ; CHECK: dmb ish
-; CHECK: ldrexd r2, r3
-; CHECK: strexd {{[a-z0-9]+}}, r0, r1
+; CHECK: ldrexd [[REG1:(r[0-9]?[02468])]], [[REG2:(r[0-9]?[13579])]]
+; CHECK: strexd {{[a-z0-9]+}}, {{r[0-9]?[02468]}}, {{r[0-9]?[13579]}}
 ; CHECK: cmp
 ; CHECK: bne
 ; CHECK: dmb ish
+
+; CHECK-THUMB: test9:
+; CHECK-THUMB: dmb ish
+; CHECK-THUMB: ldrexd [[REG1:[a-z0-9]+]], [[REG2:[a-z0-9]+]]
+; CHECK-THUMB: strexd {{[a-z0-9]+}}, {{[a-z0-9]+}}, {{[a-z0-9]+}}
+; CHECK-THUMB: cmp
+; CHECK-THUMB: bne
+; CHECK-THUMB: dmb ish
+
   store atomic i64 %val, i64* %ptr seq_cst, align 8
   ret void
 }
+
+define i64 @test10(i64* %ptr, i64 %val) {
+; CHECK: test10:
+; CHECK: dmb ish
+; CHECK: ldrexd [[REG1:(r[0-9]?[02468])]], [[REG2:(r[0-9]?[13579])]]
+; CHECK: subs {{[a-z0-9]+}}, [[REG1]], [[REG3:(r[0-9]?[02468])]]
+; CHECK: sbcs {{[a-z0-9]+}}, [[REG2]], [[REG4:(r[0-9]?[13579])]]
+; CHECK: blt
+; CHECK: strexd {{[a-z0-9]+}}, [[REG3]], [[REG4]]
+; CHECK: cmp
+; CHECK: bne
+; CHECK: dmb ish
+
+; CHECK-THUMB: test10:
+; CHECK-THUMB: dmb ish
+; CHECK-THUMB: ldrexd [[REG1:[a-z0-9]+]], [[REG2:[a-z0-9]+]]
+; CHECK-THUMB: subs.w {{[a-z0-9]+}}, [[REG1]], [[REG3:[a-z0-9]+]]
+; CHECK-THUMB: sbcs.w {{[a-z0-9]+}}, [[REG2]], [[REG4:[a-z0-9]+]]
+; CHECK-THUMB: blt
+; CHECK-THUMB: strexd {{[a-z0-9]+}}, [[REG3]], [[REG4]]
+; CHECK-THUMB: cmp
+; CHECK-THUMB: bne
+; CHECK-THUMB: dmb ish
+
+  %r = atomicrmw min i64* %ptr, i64 %val seq_cst
+  ret i64 %r
+}
+
+define i64 @test11(i64* %ptr, i64 %val) {
+; CHECK: test11:
+; CHECK: dmb ish
+; CHECK: ldrexd [[REG1:(r[0-9]?[02468])]], [[REG2:(r[0-9]?[13579])]]
+; CHECK: subs {{[a-z0-9]+}}, [[REG1]], [[REG3:(r[0-9]?[02468])]]
+; CHECK: sbcs {{[a-z0-9]+}}, [[REG2]], [[REG4:(r[0-9]?[13579])]]
+; CHECK: blo
+; CHECK: strexd {{[a-z0-9]+}}, [[REG3]], [[REG4]]
+; CHECK: cmp
+; CHECK: bne
+; CHECK: dmb ish
+
+
+; CHECK-THUMB: test11:
+; CHECK-THUMB: dmb ish
+; CHECK-THUMB: ldrexd [[REG1:[a-z0-9]+]], [[REG2:[a-z0-9]+]]
+; CHECK-THUMB: subs.w {{[a-z0-9]+}}, [[REG1]], [[REG3:[a-z0-9]+]]
+; CHECK-THUMB: sbcs.w {{[a-z0-9]+}}, [[REG2]], [[REG4:[a-z0-9]+]]
+; CHECK-THUMB: blo
+; CHECK-THUMB: strexd {{[a-z0-9]+}}, [[REG3]], [[REG4]]
+; CHECK-THUMB: cmp
+; CHECK-THUMB: bne
+; CHECK-THUMB: dmb ish
+
+  %r = atomicrmw umin i64* %ptr, i64 %val seq_cst
+  ret i64 %r
+}
+
+define i64 @test12(i64* %ptr, i64 %val) {
+; CHECK: test12:
+; CHECK: dmb ish
+; CHECK: ldrexd [[REG1:(r[0-9]?[02468])]], [[REG2:(r[0-9]?[13579])]]
+; CHECK: subs {{[a-z0-9]+}}, [[REG1]], [[REG3:(r[0-9]?[02468])]]
+; CHECK: sbcs {{[a-z0-9]+}}, [[REG2]], [[REG4:(r[0-9]?[13579])]]
+; CHECK: bge
+; CHECK: strexd {{[a-z0-9]+}}, [[REG3]], [[REG4]]
+; CHECK: cmp
+; CHECK: bne
+; CHECK: dmb ish
+
+; CHECK-THUMB: test12:
+; CHECK-THUMB: dmb ish
+; CHECK-THUMB: ldrexd [[REG1:[a-z0-9]+]], [[REG2:[a-z0-9]+]]
+; CHECK-THUMB: subs.w {{[a-z0-9]+}}, [[REG1]], [[REG3:[a-z0-9]+]]
+; CHECK-THUMB: sbcs.w {{[a-z0-9]+}}, [[REG2]], [[REG4:[a-z0-9]+]]
+; CHECK-THUMB: bge
+; CHECK-THUMB: strexd {{[a-z0-9]+}}, [[REG3]], [[REG4]]
+; CHECK-THUMB: cmp
+; CHECK-THUMB: bne
+; CHECK-THUMB: dmb ish
+
+  %r = atomicrmw max i64* %ptr, i64 %val seq_cst
+  ret i64 %r
+}
+
+define i64 @test13(i64* %ptr, i64 %val) {
+; CHECK: test13:
+; CHECK: dmb ish
+; CHECK: ldrexd [[REG1:(r[0-9]?[02468])]], [[REG2:(r[0-9]?[13579])]]
+; CHECK: subs {{[a-z0-9]+}}, [[REG1]], [[REG3:(r[0-9]?[02468])]]
+; CHECK: sbcs {{[a-z0-9]+}}, [[REG2]], [[REG4:(r[0-9]?[13579])]]
+; CHECK: bhs
+; CHECK: strexd {{[a-z0-9]+}}, [[REG3]], [[REG4]]
+; CHECK: cmp
+; CHECK: bne
+; CHECK: dmb ish
+
+; CHECK-THUMB: test13:
+; CHECK-THUMB: dmb ish
+; CHECK-THUMB: ldrexd [[REG1:[a-z0-9]+]], [[REG2:[a-z0-9]+]]
+; CHECK-THUMB: subs.w {{[a-z0-9]+}}, [[REG1]], [[REG3:[a-z0-9]+]]
+; CHECK-THUMB: sbcs.w {{[a-z0-9]+}}, [[REG2]], [[REG4:[a-z0-9]+]]
+; CHECK-THUMB: bhs
+; CHECK-THUMB: strexd {{[a-z0-9]+}}, [[REG3]], [[REG4]]
+; CHECK-THUMB: cmp
+; CHECK-THUMB: bne
+; CHECK-THUMB: dmb ish
+  %r = atomicrmw umax i64* %ptr, i64 %val seq_cst
+  ret i64 %r
+}
+
diff --git a/test/CodeGen/ARM/avoid-cpsr-rmw.ll b/test/CodeGen/ARM/avoid-cpsr-rmw.ll
index 96e83dd88e92..c5d00a0f8a4c 100644
--- a/test/CodeGen/ARM/avoid-cpsr-rmw.ll
+++ b/test/CodeGen/ARM/avoid-cpsr-rmw.ll
@@ -49,3 +49,68 @@ while.body:
 while.end:
   ret void
 }
+
+; Allow partial CPSR dependency when code size is the priority.
+; rdar://12878928
+define void @t3(i32* nocapture %ptr1, i32* %ptr2, i32 %c) nounwind minsize {
+entry:
+; CHECK: t3:
+  %tobool7 = icmp eq i32* %ptr2, null
+  br i1 %tobool7, label %while.end, label %while.body
+
+while.body:
+; CHECK: while.body
+; CHECK: mul r{{[0-9]+}}
+; CHECK: muls
+  %ptr1.addr.09 = phi i32* [ %add.ptr, %while.body ], [ %ptr1, %entry ]
+  %ptr2.addr.08 = phi i32* [ %incdec.ptr, %while.body ], [ %ptr2, %entry ]
+  %0 = load i32* %ptr1.addr.09, align 4
+  %arrayidx1 = getelementptr inbounds i32* %ptr1.addr.09, i32 1
+  %1 = load i32* %arrayidx1, align 4
+  %arrayidx3 = getelementptr inbounds i32* %ptr1.addr.09, i32 2
+  %2 = load i32* %arrayidx3, align 4
+  %arrayidx4 = getelementptr inbounds i32* %ptr1.addr.09, i32 3
+  %3 = load i32* %arrayidx4, align 4
+  %add.ptr = getelementptr inbounds i32* %ptr1.addr.09, i32 4
+  %mul = mul i32 %1, %0
+  %mul5 = mul i32 %mul, %2
+  %mul6 = mul i32 %mul5, %3
+  store i32 %mul6, i32* %ptr2.addr.08, align 4
+  %incdec.ptr = getelementptr inbounds i32* %ptr2.addr.08, i32 -1
+  %tobool = icmp eq i32* %incdec.ptr, null
+  br i1 %tobool, label %while.end, label %while.body
+
+while.end:
+  ret void
+}
+
+; Avoid producing tMOVi8 after a high-latency flag-setting operation.
+; <rdar://problem/13468102>
+define void @t4(i32* nocapture %p, double* nocapture %q) {
+entry:
+; CHECK: t4
+; CHECK: vmrs APSR_nzcv, fpscr
+; CHECK: if.then
+; CHECK-NOT movs
+  %0 = load double* %q, align 4
+  %cmp = fcmp olt double %0, 1.000000e+01
+  %incdec.ptr1 = getelementptr inbounds i32* %p, i32 1
+  br i1 %cmp, label %if.then, label %if.else
+
+if.then:
+  store i32 7, i32* %p, align 4
+  %incdec.ptr2 = getelementptr inbounds i32* %p, i32 2
+  store i32 8, i32* %incdec.ptr1, align 4
+  store i32 9, i32* %incdec.ptr2, align 4
+  br label %if.end
+
+if.else:
+  store i32 3, i32* %p, align 4
+  %incdec.ptr5 = getelementptr inbounds i32* %p, i32 2
+  store i32 5, i32* %incdec.ptr1, align 4
+  store i32 6, i32* %incdec.ptr5, align 4
+  br label %if.end
+
+if.end:
+  ret void
+}
diff --git a/test/CodeGen/ARM/bfx.ll b/test/CodeGen/ARM/bfx.ll
index 519c1353a387..394da9e157ff 100644
--- a/test/CodeGen/ARM/bfx.ll
+++ b/test/CodeGen/ARM/bfx.ll
@@ -26,3 +26,28 @@ define i32 @ubfx2(i32 %a) {
 	ret i32 %t2
 }
 
+; rdar://12870177
+define i32 @ubfx_opt(i32* nocapture %ctx, i32 %x) nounwind readonly ssp {
+entry:
+; CHECK: ubfx_opt
+; CHECK: lsr [[REG1:(lr|r[0-9]+)]], r1, #24
+; CHECK: ldr {{lr|r[0-9]+}}, [r0, [[REG1]], lsl #2]
+; CHECK: ubfx [[REG2:(lr|r[0-9]+)]], r1, #16, #8
+; CHECK: ldr {{lr|r[0-9]+}}, [r0, [[REG2]], lsl #2]
+; CHECK: ubfx [[REG3:(lr|r[0-9]+)]], r1, #8, #8
+; CHECK: ldr {{lr|r[0-9]+}}, [r0, [[REG3]], lsl #2]
+  %and = lshr i32 %x, 8
+  %shr = and i32 %and, 255
+  %and1 = lshr i32 %x, 16
+  %shr2 = and i32 %and1, 255
+  %shr4 = lshr i32 %x, 24
+  %arrayidx = getelementptr inbounds i32* %ctx, i32 %shr4
+  %0 = load i32* %arrayidx, align 4
+  %arrayidx5 = getelementptr inbounds i32* %ctx, i32 %shr2
+  %1 = load i32* %arrayidx5, align 4
+  %add = add i32 %1, %0
+  %arrayidx6 = getelementptr inbounds i32* %ctx, i32 %shr
+  %2 = load i32* %arrayidx6, align 4
+  %add7 = add i32 %add, %2
+  ret i32 %add7
+}
diff --git a/test/CodeGen/ARM/call-tc.ll b/test/CodeGen/ARM/call-tc.ll
index 94edff5c0be5..58fbbda0f6bd 100644
--- a/test/CodeGen/ARM/call-tc.ll
+++ b/test/CodeGen/ARM/call-tc.ll
@@ -103,7 +103,6 @@ define i32 @t8(i32 %x) nounwind ssp {
 entry:
 ; CHECKT2D: t8:
 ; CHECKT2D-NOT: push
-; CHECKT2D-NOT
   %and = and i32 %x, 1
   %tobool = icmp eq i32 %and, 0
   br i1 %tobool, label %if.end, label %if.then
diff --git a/test/CodeGen/ARM/call_nolink.ll b/test/CodeGen/ARM/call_nolink.ll
index 00b16888f389..5ec7f74a605f 100644
--- a/test/CodeGen/ARM/call_nolink.ll
+++ b/test/CodeGen/ARM/call_nolink.ll
@@ -1,5 +1,4 @@
-; RUN: llc < %s -march=arm -mtriple=arm-linux-gnueabi | \
-; RUN:   not grep "bx lr"
+; RUN: llc < %s -march=arm -mtriple=arm-linux-gnueabi | FileCheck %s
 
 	%struct.anon = type { i32 (i32, i32, i32)*, i32, i32, [3 x i32], i8*, i8*, i8* }
 @r = external global [14 x i32]		; <[14 x i32]*> [#uses=4]
@@ -8,6 +7,8 @@
 @numi = external global i32		; <i32*> [#uses=1]
 @counter = external global [2 x i32]		; <[2 x i32]*> [#uses=1]
 
+; CHECK: main_bb_2E_i_bb205_2E_i_2E_i_bb115_2E_i_2E_i:
+; CHECK-NOT: bx lr
 
 define void @main_bb_2E_i_bb205_2E_i_2E_i_bb115_2E_i_2E_i() {
 newFuncRoot:
@@ -50,3 +51,12 @@ bb115.i.i:		; preds = %bb115.i.i.bb115.i.i_crit_edge, %newFuncRoot
 	icmp slt i32 %tmp166.i.i, %tmp168.i.i		; <i1>:0 [#uses=1]
 	br i1 %0, label %bb115.i.i.bb115.i.i_crit_edge, label %bb115.i.i.bb170.i.i_crit_edge.exitStub
 }
+
+define void @PR15520(void ()* %fn) {
+  call void %fn()
+  ret void
+
+; CHECK: PR15520:
+; CHECK: mov lr, pc
+; CHECK: mov pc, r0
+}
diff --git a/test/CodeGen/ARM/coalesce-subregs.ll b/test/CodeGen/ARM/coalesce-subregs.ll
index 3ba947579a3a..e7bd5f41bb4b 100644
--- a/test/CodeGen/ARM/coalesce-subregs.ll
+++ b/test/CodeGen/ARM/coalesce-subregs.ll
@@ -147,7 +147,7 @@ if.end:                                           ; preds = %entry, %if.then
 ; CHECK: vmov.f32 {{.*}}, #1.0
 ; CHECK-NOT: vmov
 ; CHECK-NOT: vorr
-; CHECK: %if.end
+; CHECK: bx
 ; We may leave the last insertelement in the if.end block.
 ; It is inserting the %add value into a dead lane, but %add causes interference
 ; in the entry block, and we don't do dead lane checks across basic blocks.
diff --git a/test/CodeGen/ARM/commute-movcc.ll b/test/CodeGen/ARM/commute-movcc.ll
index 7316452cd617..769ba55eb9eb 100644
--- a/test/CodeGen/ARM/commute-movcc.ll
+++ b/test/CodeGen/ARM/commute-movcc.ll
@@ -1,5 +1,5 @@
-; RUN: llc -mtriple=thumbv7-apple-ios -disable-code-place < %s | FileCheck %s
-; RUN: llc -mtriple=armv7-apple-ios   -disable-code-place < %s | FileCheck %s
+; RUN: llc -mtriple=thumbv7-apple-ios -disable-block-placement < %s | FileCheck %s
+; RUN: llc -mtriple=armv7-apple-ios   -disable-block-placement < %s | FileCheck %s
 
 ; LLVM IR optimizers canonicalize icmp+select this way.
 ; Make sure that TwoAddressInstructionPass can commute the corresponding
diff --git a/test/CodeGen/ARM/crash.ll b/test/CodeGen/ARM/crash.ll
index 0f6f33e0448e..4e3e2010b07a 100644
--- a/test/CodeGen/ARM/crash.ll
+++ b/test/CodeGen/ARM/crash.ll
@@ -69,3 +69,26 @@ bb:
   store <4 x float> %tmp154, <4 x float>* undef, align 16
   ret void
 }
+
+; <rdar://problem/12721258>
+%A = type { %B }
+%B = type { i32 }
+
+define void @_Z3Foov() ssp {
+entry:
+  br i1 true, label %exit, label %false
+
+false:
+  invoke void undef(%A* undef)
+          to label %exit unwind label %lpad
+
+lpad:
+  %0 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_sj0 to i8*)
+          catch i8* null
+  unreachable
+
+exit:
+  ret void
+}
+
+declare i32 @__gxx_personality_sj0(...)
diff --git a/test/CodeGen/ARM/debug-info-arg.ll b/test/CodeGen/ARM/debug-info-arg.ll
index a7b44e6fe709..33c8e9daae69 100644
--- a/test/CodeGen/ARM/debug-info-arg.ll
+++ b/test/CodeGen/ARM/debug-info-arg.ll
@@ -30,29 +30,27 @@ declare void @foobar(i64, i64)
 declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
 
 !llvm.dbg.cu = !{!0}
-!llvm.dbg.sp = !{!1}
-!llvm.dbg.lv.foo = !{!5, !13, !14, !17, !18, !19}
 
-!0 = metadata !{i32 589841, i32 0, i32 12, metadata !"one.c", metadata !"/Volumes/Athwagate/R10048772", metadata !"Apple clang version 3.0 (tags/Apple/clang-211.10.1) (based on LLVM 3.0svn)", i1 true, i1 true, metadata !"", i32 0} ; [ DW_TAG_compile_unit ]
-!1 = metadata !{i32 589870, i32 0, metadata !2, metadata !"foo", metadata !"foo", metadata !"", metadata !2, i32 11, metadata !3, i1 false, i1 true, i32 0, i32 0, i32 0, i32 256, i1 true, void (%struct.tag_s*, %struct.tag_s*, i64, i64, %struct.tag_s*, %struct.tag_s*)* @foo, null, null} ; [ DW_TAG_subprogram ]
-!2 = metadata !{i32 589865, metadata !"one.c", metadata !"/Volumes/Athwagate/R10048772", metadata !0} ; [ DW_TAG_file_type ]
-!3 = metadata !{i32 589845, metadata !2, metadata !"", metadata !2, i32 0, i64 0, i64 0, i32 0, i32 0, i32 0, metadata !4, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
+!0 = metadata !{i32 786449, metadata !32, i32 12, metadata !"Apple clang version 3.0 (tags/Apple/clang-211.10.1) (based on LLVM 3.0svn)", i1 true, metadata !"", i32 0, null, null, metadata !30, null, null} ; [ DW_TAG_compile_unit ]
+!1 = metadata !{i32 786478, metadata !2, metadata !2, metadata !"foo", metadata !"foo", metadata !"", i32 11, metadata !3, i1 false, i1 true, i32 0, i32 0, i32 0, i32 256, i1 true, void (%struct.tag_s*, %struct.tag_s*, i64, i64, %struct.tag_s*, %struct.tag_s*)* @foo, null, null, metadata !31, i32 11} ; [ DW_TAG_subprogram ]
+!2 = metadata !{i32 786473, metadata !32} ; [ DW_TAG_file_type ]
+!3 = metadata !{i32 786453, metadata !32, metadata !2, metadata !"", i32 0, i64 0, i64 0, i32 0, i32 0, i32 0, metadata !4, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
 !4 = metadata !{null}
-!5 = metadata !{i32 590081, metadata !1, metadata !"this", metadata !2, i32 16777227, metadata !6, i32 0} ; [ DW_TAG_arg_variable ]
-!6 = metadata !{i32 589839, metadata !0, metadata !"", null, i32 0, i64 32, i64 32, i64 0, i32 0, metadata !7} ; [ DW_TAG_pointer_type ]
-!7 = metadata !{i32 589843, metadata !0, metadata !"tag_s", metadata !2, i32 5, i64 96, i64 32, i32 0, i32 0, i32 0, metadata !8, i32 0, i32 0} ; [ DW_TAG_structure_type ]
+!5 = metadata !{i32 786689, metadata !1, metadata !"this", metadata !2, i32 16777227, metadata !6, i32 0, null} ; [ DW_TAG_arg_variable ]
+!6 = metadata !{i32 786447, null, metadata !0, metadata !"", i32 0, i64 32, i64 32, i64 0, i32 0, metadata !7} ; [ DW_TAG_pointer_type ]
+!7 = metadata !{i32 786451, metadata !32, metadata !0, metadata !"tag_s", i32 5, i64 96, i64 32, i32 0, i32 0, i32 0, metadata !8, i32 0, i32 0} ; [ DW_TAG_structure_type ]
 !8 = metadata !{metadata !9, metadata !11, metadata !12}
-!9 = metadata !{i32 589837, metadata !7, metadata !"x", metadata !2, i32 6, i64 32, i64 32, i64 0, i32 0, metadata !10} ; [ DW_TAG_member ]
-!10 = metadata !{i32 589860, metadata !0, metadata !"int", null, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
-!11 = metadata !{i32 589837, metadata !7, metadata !"y", metadata !2, i32 7, i64 32, i64 32, i64 32, i32 0, metadata !10} ; [ DW_TAG_member ]
-!12 = metadata !{i32 589837, metadata !7, metadata !"z", metadata !2, i32 8, i64 32, i64 32, i64 64, i32 0, metadata !10} ; [ DW_TAG_member ]
-!13 = metadata !{i32 590081, metadata !1, metadata !"c", metadata !2, i32 33554443, metadata !6, i32 0} ; [ DW_TAG_arg_variable ]
-!14 = metadata !{i32 590081, metadata !1, metadata !"x", metadata !2, i32 50331659, metadata !15, i32 0} ; [ DW_TAG_arg_variable ]
-!15 = metadata !{i32 589846, metadata !0, metadata !"UInt64", metadata !2, i32 1, i64 0, i64 0, i64 0, i32 0, metadata !16} ; [ DW_TAG_typedef ]
-!16 = metadata !{i32 589860, metadata !0, metadata !"long long unsigned int", null, i32 0, i64 64, i64 32, i64 0, i32 0, i32 7} ; [ DW_TAG_base_type ]
-!17 = metadata !{i32 590081, metadata !1, metadata !"y", metadata !2, i32 67108875, metadata !15, i32 0} ; [ DW_TAG_arg_variable ]
-!18 = metadata !{i32 590081, metadata !1, metadata !"ptr1", metadata !2, i32 83886091, metadata !6, i32 0} ; [ DW_TAG_arg_variable ]
-!19 = metadata !{i32 590081, metadata !1, metadata !"ptr2", metadata !2, i32 100663307, metadata !6, i32 0} ; [ DW_TAG_arg_variable ]
+!9 = metadata !{i32 786445, metadata !32, metadata !7, metadata !"x", i32 6, i64 32, i64 32, i64 0, i32 0, metadata !10} ; [ DW_TAG_member ]
+!10 = metadata !{i32 786468, null, metadata !0, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
+!11 = metadata !{i32 786445, metadata !32, metadata !7, metadata !"y", i32 7, i64 32, i64 32, i64 32, i32 0, metadata !10} ; [ DW_TAG_member ]
+!12 = metadata !{i32 786445, metadata !32, metadata !7, metadata !"z", i32 8, i64 32, i64 32, i64 64, i32 0, metadata !10} ; [ DW_TAG_member ]
+!13 = metadata !{i32 786689, metadata !1, metadata !"c", metadata !2, i32 33554443, metadata !6, i32 0, null} ; [ DW_TAG_arg_variable ]
+!14 = metadata !{i32 786689, metadata !1, metadata !"x", metadata !2, i32 50331659, metadata !15, i32 0, null} ; [ DW_TAG_arg_variable ]
+!15 = metadata !{i32 786454, metadata !32, metadata !0, metadata !"UInt64", i32 1, i64 0, i64 0, i64 0, i32 0, metadata !16} ; [ DW_TAG_typedef ]
+!16 = metadata !{i32 786468, null, metadata !0, metadata !"long long unsigned int", i32 0, i64 64, i64 32, i64 0, i32 0, i32 7} ; [ DW_TAG_base_type ]
+!17 = metadata !{i32 786689, metadata !1, metadata !"y", metadata !2, i32 67108875, metadata !15, i32 0, null} ; [ DW_TAG_arg_variable ]
+!18 = metadata !{i32 786689, metadata !1, metadata !"ptr1", metadata !2, i32 83886091, metadata !6, i32 0, null} ; [ DW_TAG_arg_variable ]
+!19 = metadata !{i32 786689, metadata !1, metadata !"ptr2", metadata !2, i32 100663307, metadata !6, i32 0, null} ; [ DW_TAG_arg_variable ]
 !20 = metadata !{i32 11, i32 24, metadata !1, null}
 !21 = metadata !{i32 11, i32 44, metadata !1, null}
 !22 = metadata !{i32 11, i32 54, metadata !1, null}
@@ -60,6 +58,9 @@ declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
 !24 = metadata !{i32 11, i32 81, metadata !1, null}
 !25 = metadata !{i32 11, i32 101, metadata !1, null}
 !26 = metadata !{i32 12, i32 3, metadata !27, null}
-!27 = metadata !{i32 589835, metadata !1, i32 11, i32 107, metadata !2, i32 0} ; [ DW_TAG_lexical_block ]
+!27 = metadata !{i32 786443, metadata !1, i32 11, i32 107, metadata !2, i32 0} ; [ DW_TAG_lexical_block ]
 !28 = metadata !{i32 13, i32 5, metadata !27, null}
 !29 = metadata !{i32 14, i32 1, metadata !27, null}
+!30 = metadata !{metadata !1}
+!31 = metadata !{metadata !5, metadata !13, metadata !14, metadata !17, metadata !18, metadata!19}
+!32 = metadata !{metadata !"one.c", metadata !"/Volumes/Athwagate/R10048772"}
diff --git a/test/CodeGen/ARM/debug-info-blocks.ll b/test/CodeGen/ARM/debug-info-blocks.ll
index 0ad0a15ca3d9..d0bfecc5af41 100644
--- a/test/CodeGen/ARM/debug-info-blocks.ll
+++ b/test/CodeGen/ARM/debug-info-blocks.ll
@@ -93,153 +93,166 @@ define hidden void @foobar_func_block_invoke_0(i8* %.block_descriptor, %0* %load
 }
 
 !llvm.dbg.cu = !{!0}
-!llvm.dbg.enum = !{!1, !1, !5, !5, !9, !14, !19, !19, !14, !14, !14, !19, !19, !19}
-!llvm.dbg.sp = !{!23}
 
-!0 = metadata !{i32 589841, i32 0, i32 16, metadata !"MyLibrary.i", metadata !"/Volumes/Sandbox/llvm", metadata !"Apple clang version 2.1", i1 true, i1 false, metadata !"", i32 2} ; [ DW_TAG_compile_unit ]
-!1 = metadata !{i32 589828, metadata !0, metadata !"", metadata !2, i32 248, i64 32, i64 32, i32 0, i32 0, i32 0, metadata !3, i32 0, i32 0} ; [ DW_TAG_enumeration_type ]
-!2 = metadata !{i32 589865, metadata !"header.h", metadata !"/Volumes/Sandbox/llvm", metadata !0} ; [ DW_TAG_file_type ]
+!0 = metadata !{i32 786449, i32 16, metadata !40, metadata !"Apple clang version 2.1", i1 false, metadata !"", i32 2, metadata !147, null, metadata !148, null, metadata !""} ; [ DW_TAG_compile_unit ]
+!1 = metadata !{i32 786433, metadata !160, metadata !0, metadata !"", i32 248, i64 32, i64 32, i32 0, i32 0, i32 0, metadata !3, i32 0, i32 0} ; [ DW_TAG_enumeration_type ]
+!2 = metadata !{i32 786473, metadata !160} ; [ DW_TAG_file_type ]
 !3 = metadata !{metadata !4}
-!4 = metadata !{i32 589864, metadata !"Ver1", i64 0} ; [ DW_TAG_enumerator ]
-!5 = metadata !{i32 589828, metadata !0, metadata !"Mode", metadata !6, i32 79, i64 32, i64 32, i32 0, i32 0, i32 0, metadata !7, i32 0, i32 0} ; [ DW_TAG_enumeration_type ]
-!6 = metadata !{i32 589865, metadata !"header2.h", metadata !"/Volumes/Sandbox/llvm", metadata !0} ; [ DW_TAG_file_type ]
+!4 = metadata !{i32 786472, metadata !"Ver1", i64 0} ; [ DW_TAG_enumerator ]
+!5 = metadata !{i32 786433, metadata !160, metadata !0, metadata !"Mode", i32 79, i64 32, i64 32, i32 0, i32 0, i32 0, metadata !7, i32 0, i32 0} ; [ DW_TAG_enumeration_type ]
+!6 = metadata !{i32 786473, metadata !161} ; [ DW_TAG_file_type ]
 !7 = metadata !{metadata !8}
-!8 = metadata !{i32 589864, metadata !"One", i64 0} ; [ DW_TAG_enumerator ]
-!9 = metadata !{i32 589828, metadata !0, metadata !"", metadata !10, i32 15, i64 32, i64 32, i32 0, i32 0, i32 0, metadata !11, i32 0, i32 0} ; [ DW_TAG_enumeration_type ]
-!10 = metadata !{i32 589865, metadata !"header3.h", metadata !"/Volumes/Sandbox/llvm", metadata !0} ; [ DW_TAG_file_type ]
+!8 = metadata !{i32 786472, metadata !"One", i64 0} ; [ DW_TAG_enumerator ]
+!9 = metadata !{i32 786433, metadata !149, metadata !0, metadata !"", i32 15, i64 32, i64 32, i32 0, i32 0, i32 0, metadata !11, i32 0, i32 0} ; [ DW_TAG_enumeration_type ]
+!10 = metadata !{i32 786473, metadata !149} ; [ DW_TAG_file_type ]
 !11 = metadata !{metadata !12, metadata !13}
-!12 = metadata !{i32 589864, metadata !"Unknown", i64 0} ; [ DW_TAG_enumerator ]
-!13 = metadata !{i32 589864, metadata !"Known", i64 1} ; [ DW_TAG_enumerator ]
-!14 = metadata !{i32 589828, metadata !0, metadata !"", metadata !15, i32 20, i64 32, i64 32, i32 0, i32 0, i32 0, metadata !16, i32 0, i32 0} ; [ DW_TAG_enumeration_type ]
-!15 = metadata !{i32 589865, metadata !"Private.h", metadata !"/Volumes/Sandbox/llvm", metadata !0} ; [ DW_TAG_file_type ]
+!12 = metadata !{i32 786472, metadata !"Unknown", i64 0} ; [ DW_TAG_enumerator ]
+!13 = metadata !{i32 786472, metadata !"Known", i64 1} ; [ DW_TAG_enumerator ]
+!14 = metadata !{i32 786433, metadata !150, metadata !0, metadata !"", i32 20, i64 32, i64 32, i32 0, i32 0, i32 0, metadata !16, i32 0, i32 0} ; [ DW_TAG_enumeration_type ]
+!15 = metadata !{i32 786473, metadata !150} ; [ DW_TAG_file_type ]
 !16 = metadata !{metadata !17, metadata !18}
-!17 = metadata !{i32 589864, metadata !"Single", i64 0} ; [ DW_TAG_enumerator ]
-!18 = metadata !{i32 589864, metadata !"Double", i64 1} ; [ DW_TAG_enumerator ]
-!19 = metadata !{i32 589828, metadata !0, metadata !"", metadata !20, i32 14, i64 32, i64 32, i32 0, i32 0, i32 0, metadata !21, i32 0, i32 0} ; [ DW_TAG_enumeration_type ]
-!20 = metadata !{i32 589865, metadata !"header4.h", metadata !"/Volumes/Sandbox/llvm", metadata !0} ; [ DW_TAG_file_type ]
+!17 = metadata !{i32 786472, metadata !"Single", i64 0} ; [ DW_TAG_enumerator ]
+!18 = metadata !{i32 786472, metadata !"Double", i64 1} ; [ DW_TAG_enumerator ]
+!19 = metadata !{i32 786433, metadata !151, metadata !0, metadata !"", i32 14, i64 32, i64 32, i32 0, i32 0, i32 0, metadata !21, i32 0, i32 0} ; [ DW_TAG_enumeration_type ]
+!20 = metadata !{i32 786473, metadata !151} ; [ DW_TAG_file_type ]
 !21 = metadata !{metadata !22}
-!22 = metadata !{i32 589864, metadata !"Eleven", i64 0} ; [ DW_TAG_enumerator ]
-!23 = metadata !{i32 589870, i32 0, metadata !24, metadata !"foobar_func_block_invoke_0", metadata !"foobar_func_block_invoke_0", metadata !"", metadata !24, i32 609, metadata !25, i1 true, i1 true, i32 0, i32 0, i32 0, i32 256, i1 false, void (i8*, %0*, [4 x i32], [4 x i32])* @foobar_func_block_invoke_0, null, null} ; [ DW_TAG_subprogram ]
-!24 = metadata !{i32 589865, metadata !"MyLibrary.m", metadata !"/Volumes/Sandbox/llvm", metadata !0} ; [ DW_TAG_file_type ]
-!25 = metadata !{i32 589845, metadata !24, metadata !"", metadata !24, i32 0, i64 0, i64 0, i32 0, i32 0, i32 0, metadata !26, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
+!22 = metadata !{i32 786472, metadata !"Eleven", i64 0} ; [ DW_TAG_enumerator ]
+!23 = metadata !{i32 786478, metadata !24, metadata !"foobar_func_block_invoke_0", metadata !"foobar_func_block_invoke_0", metadata !"", metadata !24, i32 609, metadata !25, i1 true, i1 true, i32 0, i32 0, i32 0, i32 256, i1 false, void (i8*, %0*, [4 x i32], [4 x i32])* @foobar_func_block_invoke_0, null, null, null, i32 609} ; [ DW_TAG_subprogram ]
+!24 = metadata !{i32 786473, metadata !152} ; [ DW_TAG_file_type ]
+!25 = metadata !{i32 786453, metadata !152, metadata !24, metadata !"", i32 0, i64 0, i64 0, i32 0, i32 0, i32 0, metadata !26, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
 !26 = metadata !{null}
-!27 = metadata !{i32 590081, metadata !23, metadata !".block_descriptor", metadata !24, i32 16777825, metadata !28, i32 64} ; [ DW_TAG_arg_variable ]
-!28 = metadata !{i32 589839, metadata !0, metadata !"", null, i32 0, i64 32, i64 0, i64 0, i32 0, metadata !29} ; [ DW_TAG_pointer_type ]
-!29 = metadata !{i32 589843, metadata !24, metadata !"__block_literal_14", metadata !24, i32 609, i64 256, i64 32, i32 0, i32 0, i32 0, metadata !30, i32 0, i32 0} ; [ DW_TAG_structure_type ]
+!27 = metadata !{i32 786689, metadata !23, metadata !".block_descriptor", metadata !24, i32 16777825, metadata !28, i32 64, null} ; [ DW_TAG_arg_variable ]
+!28 = metadata !{i32 786447, null, metadata !0, metadata !"", i32 0, i64 32, i64 0, i64 0, i32 0, metadata !29} ; [ DW_TAG_pointer_type ]
+!29 = metadata !{i32 786451, metadata !152, metadata !24, metadata !"__block_literal_14", i32 609, i64 256, i64 32, i32 0, i32 0, i32 0, metadata !30, i32 0, i32 0} ; [ DW_TAG_structure_type ]
 !30 = metadata !{metadata !31, metadata !33, metadata !35, metadata !36, metadata !37, metadata !48, metadata !89, metadata !124}
-!31 = metadata !{i32 589837, metadata !24, metadata !"__isa", metadata !24, i32 609, i64 32, i64 32, i64 0, i32 0, metadata !32} ; [ DW_TAG_member ]
-!32 = metadata !{i32 589839, metadata !0, metadata !"", null, i32 0, i64 32, i64 32, i64 0, i32 0, null} ; [ DW_TAG_pointer_type ]
-!33 = metadata !{i32 589837, metadata !24, metadata !"__flags", metadata !24, i32 609, i64 32, i64 32, i64 32, i32 0, metadata !34} ; [ DW_TAG_member ]
-!34 = metadata !{i32 589860, metadata !0, metadata !"int", null, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
-!35 = metadata !{i32 589837, metadata !24, metadata !"__reserved", metadata !24, i32 609, i64 32, i64 32, i64 64, i32 0, metadata !34} ; [ DW_TAG_member ]
-!36 = metadata !{i32 589837, metadata !24, metadata !"__FuncPtr", metadata !24, i32 609, i64 32, i64 32, i64 96, i32 0, metadata !32} ; [ DW_TAG_member ]
-!37 = metadata !{i32 589837, metadata !24, metadata !"__descriptor", metadata !24, i32 609, i64 32, i64 32, i64 128, i32 0, metadata !38} ; [ DW_TAG_member ]
-!38 = metadata !{i32 589839, metadata !0, metadata !"", null, i32 0, i64 32, i64 32, i64 0, i32 0, metadata !39} ; [ DW_TAG_pointer_type ]
-!39 = metadata !{i32 589843, metadata !0, metadata !"__block_descriptor_withcopydispose", metadata !40, i32 307, i64 128, i64 32, i32 0, i32 0, i32 0, metadata !41, i32 0, i32 0} ; [ DW_TAG_structure_type ]
-!40 = metadata !{i32 589865, metadata !"MyLibrary.i", metadata !"/Volumes/Sandbox/llvm", metadata !0} ; [ DW_TAG_file_type ]
+!31 = metadata !{i32 786445, metadata !152, metadata !24, metadata !"__isa", i32 609, i64 32, i64 32, i64 0, i32 0, metadata !32} ; [ DW_TAG_member ]
+!32 = metadata !{i32 786447, null, metadata !0, metadata !"", i32 0, i64 32, i64 32, i64 0, i32 0, null} ; [ DW_TAG_pointer_type ]
+!33 = metadata !{i32 786445, metadata !152, metadata !24, metadata !"__flags", i32 609, i64 32, i64 32, i64 32, i32 0, metadata !34} ; [ DW_TAG_member ]
+!34 = metadata !{i32 786468, null, metadata !0, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
+!35 = metadata !{i32 786445, metadata !152, metadata !24, metadata !"__reserved", i32 609, i64 32, i64 32, i64 64, i32 0, metadata !34} ; [ DW_TAG_member ]
+!36 = metadata !{i32 786445, metadata !152, metadata !24, metadata !"__FuncPtr", i32 609, i64 32, i64 32, i64 96, i32 0, metadata !32} ; [ DW_TAG_member ]
+!37 = metadata !{i32 786445, metadata !152, metadata !24, metadata !"__descriptor", i32 609, i64 32, i64 32, i64 128, i32 0, metadata !38} ; [ DW_TAG_member ]
+!38 = metadata !{i32 786447, null, metadata !0, metadata !"", i32 0, i64 32, i64 32, i64 0, i32 0, metadata !39} ; [ DW_TAG_pointer_type ]
+!39 = metadata !{i32 786451, metadata !153, metadata !0, metadata !"__block_descriptor_withcopydispose", i32 307, i64 128, i64 32, i32 0, i32 0, i32 0, metadata !41, i32 0, i32 0} ; [ DW_TAG_structure_type ]
+!40 = metadata !{i32 786473, metadata !153} ; [ DW_TAG_file_type ]
 !41 = metadata !{metadata !42, metadata !44, metadata !45, metadata !47}
-!42 = metadata !{i32 589837, metadata !40, metadata !"reserved", metadata !40, i32 307, i64 32, i64 32, i64 0, i32 0, metadata !43} ; [ DW_TAG_member ]
-!43 = metadata !{i32 589860, metadata !0, metadata !"long unsigned int", null, i32 0, i64 32, i64 32, i64 0, i32 0, i32 7} ; [ DW_TAG_base_type ]
-!44 = metadata !{i32 589837, metadata !40, metadata !"Size", metadata !40, i32 307, i64 32, i64 32, i64 32, i32 0, metadata !43} ; [ DW_TAG_member ]
-!45 = metadata !{i32 589837, metadata !40, metadata !"CopyFuncPtr", metadata !40, i32 307, i64 32, i64 32, i64 64, i32 0, metadata !46} ; [ DW_TAG_member ]
-!46 = metadata !{i32 589839, metadata !0, metadata !"", null, i32 0, i64 32, i64 32, i64 0, i32 0, metadata !32} ; [ DW_TAG_pointer_type ]
-!47 = metadata !{i32 589837, metadata !40, metadata !"DestroyFuncPtr", metadata !40, i32 307, i64 32, i64 32, i64 96, i32 0, metadata !46} ; [ DW_TAG_member ]
-!48 = metadata !{i32 589837, metadata !24, metadata !"mydata", metadata !24, i32 609, i64 32, i64 32, i64 160, i32 0, metadata !49} ; [ DW_TAG_member ]
-!49 = metadata !{i32 589839, metadata !0, metadata !"", null, i32 0, i64 32, i64 0, i64 0, i32 0, metadata !50} ; [ DW_TAG_pointer_type ]
-!50 = metadata !{i32 589843, metadata !24, metadata !"", metadata !24, i32 0, i64 224, i64 0, i32 0, i32 16, i32 0, metadata !51, i32 0, i32 0} ; [ DW_TAG_structure_type ]
+!42 = metadata !{i32 786445, metadata !153, metadata !40, metadata !"reserved", i32 307, i64 32, i64 32, i64 0, i32 0, metadata !43} ; [ DW_TAG_member ]
+!43 = metadata !{i32 786468, null, metadata !0, metadata !"long unsigned int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 7} ; [ DW_TAG_base_type ]
+!44 = metadata !{i32 786445, metadata !153, metadata !40, metadata !"Size", i32 307, i64 32, i64 32, i64 32, i32 0, metadata !43} ; [ DW_TAG_member ]
+!45 = metadata !{i32 786445, metadata !153, metadata !40, metadata !"CopyFuncPtr", i32 307, i64 32, i64 32, i64 64, i32 0, metadata !46} ; [ DW_TAG_member ]
+!46 = metadata !{i32 786447, null, metadata !0, metadata !"", i32 0, i64 32, i64 32, i64 0, i32 0, metadata !32} ; [ DW_TAG_pointer_type ]
+!47 = metadata !{i32 786445, metadata !153, metadata !40, metadata !"DestroyFuncPtr", i32 307, i64 32, i64 32, i64 96, i32 0, metadata !46} ; [ DW_TAG_member ]
+!48 = metadata !{i32 786445, metadata !152, metadata !24, metadata !"mydata", i32 609, i64 32, i64 32, i64 160, i32 0, metadata !49} ; [ DW_TAG_member ]
+!49 = metadata !{i32 786447, null, metadata !0, metadata !"", i32 0, i64 32, i64 0, i64 0, i32 0, metadata !50} ; [ DW_TAG_pointer_type ]
+!50 = metadata !{i32 786451, metadata !152, metadata !24, metadata !"", i32 0, i64 224, i64 0, i32 0, i32 16, i32 0, metadata !51, i32 0, i32 0} ; [ DW_TAG_structure_type ]
 !51 = metadata !{metadata !52, metadata !53, metadata !54, metadata !55, metadata !56, metadata !57, metadata !58}
-!52 = metadata !{i32 589837, metadata !24, metadata !"__isa", metadata !24, i32 0, i64 32, i64 32, i64 0, i32 0, metadata !32} ; [ DW_TAG_member ]
-!53 = metadata !{i32 589837, metadata !24, metadata !"__forwarding", metadata !24, i32 0, i64 32, i64 32, i64 32, i32 0, metadata !32} ; [ DW_TAG_member ]
-!54 = metadata !{i32 589837, metadata !24, metadata !"__flags", metadata !24, i32 0, i64 32, i64 32, i64 64, i32 0, metadata !34} ; [ DW_TAG_member ]
-!55 = metadata !{i32 589837, metadata !24, metadata !"__size", metadata !24, i32 0, i64 32, i64 32, i64 96, i32 0, metadata !34} ; [ DW_TAG_member ]
-!56 = metadata !{i32 589837, metadata !24, metadata !"__copy_helper", metadata !24, i32 0, i64 32, i64 32, i64 128, i32 0, metadata !32} ; [ DW_TAG_member ]
-!57 = metadata !{i32 589837, metadata !24, metadata !"__destroy_helper", metadata !24, i32 0, i64 32, i64 32, i64 160, i32 0, metadata !32} ; [ DW_TAG_member ]
-!58 = metadata !{i32 589837, metadata !24, metadata !"mydata", metadata !24, i32 0, i64 32, i64 32, i64 192, i32 0, metadata !59} ; [ DW_TAG_member ]
-!59 = metadata !{i32 589839, metadata !0, metadata !"", null, i32 0, i64 32, i64 32, i64 0, i32 0, metadata !60} ; [ DW_TAG_pointer_type ]
-!60 = metadata !{i32 589843, metadata !24, metadata !"UIMydata", metadata !61, i32 26, i64 128, i64 32, i32 0, i32 0, i32 0, metadata !62, i32 16, i32 0} ; [ DW_TAG_structure_type ]
-!61 = metadata !{i32 589865, metadata !"header11.h", metadata !"/Volumes/Sandbox/llvm", metadata !0} ; [ DW_TAG_file_type ]
+!52 = metadata !{i32 786445, metadata !152, metadata !24, metadata !"__isa", i32 0, i64 32, i64 32, i64 0, i32 0, metadata !32} ; [ DW_TAG_member ]
+!53 = metadata !{i32 786445, metadata !152, metadata !24, metadata !"__forwarding", i32 0, i64 32, i64 32, i64 32, i32 0, metadata !32} ; [ DW_TAG_member ]
+!54 = metadata !{i32 786445, metadata !152, metadata !24, metadata !"__flags", i32 0, i64 32, i64 32, i64 64, i32 0, metadata !34} ; [ DW_TAG_member ]
+!55 = metadata !{i32 786445, metadata !152, metadata !24, metadata !"__size", i32 0, i64 32, i64 32, i64 96, i32 0, metadata !34} ; [ DW_TAG_member ]
+!56 = metadata !{i32 786445, metadata !152, metadata !24, metadata !"__copy_helper", i32 0, i64 32, i64 32, i64 128, i32 0, metadata !32} ; [ DW_TAG_member ]
+!57 = metadata !{i32 786445, metadata !152, metadata !24, metadata !"__destroy_helper", i32 0, i64 32, i64 32, i64 160, i32 0, metadata !32} ; [ DW_TAG_member ]
+!58 = metadata !{i32 786445, metadata !152, metadata !24, metadata !"mydata", i32 0, i64 32, i64 32, i64 192, i32 0, metadata !59} ; [ DW_TAG_member ]
+!59 = metadata !{i32 786447, null, metadata !0, metadata !"", i32 0, i64 32, i64 32, i64 0, i32 0, metadata !60} ; [ DW_TAG_pointer_type ]
+!60 = metadata !{i32 786451, metadata !154, metadata !24, metadata !"UIMydata", i32 26, i64 128, i64 32, i32 0, i32 0, i32 0, metadata !62, i32 16, i32 0} ; [ DW_TAG_structure_type ]
+!61 = metadata !{i32 786473, metadata !154} ; [ DW_TAG_file_type ]
 !62 = metadata !{metadata !63, metadata !71, metadata !75, metadata !79}
-!63 = metadata !{i32 589852, metadata !60, null, metadata !61, i32 0, i64 0, i64 0, i64 0, i32 0, metadata !64} ; [ DW_TAG_inheritance ]
-!64 = metadata !{i32 589843, metadata !40, metadata !"NSO", metadata !65, i32 66, i64 32, i64 32, i32 0, i32 0, i32 0, metadata !66, i32 16, i32 0} ; [ DW_TAG_structure_type ]
-!65 = metadata !{i32 589865, metadata !"NSO.h", metadata !"/Volumes/Sandbox/llvm", metadata !0} ; [ DW_TAG_file_type ]
+!63 = metadata !{i32 786460, metadata !60, null, metadata !61, i32 0, i64 0, i64 0, i64 0, i32 0, metadata !64} ; [ DW_TAG_inheritance ]
+!64 = metadata !{i32 786451, metadata !155, metadata !40, metadata !"NSO", i32 66, i64 32, i64 32, i32 0, i32 0, i32 0, metadata !66, i32 16, i32 0} ; [ DW_TAG_structure_type ]
+!65 = metadata !{i32 786473, metadata !155} ; [ DW_TAG_file_type ]
 !66 = metadata !{metadata !67}
-!67 = metadata !{i32 589837, metadata !65, metadata !"isa", metadata !65, i32 67, i64 32, i64 32, i64 0, i32 2, metadata !68, metadata !"", metadata !"", metadata !"", i32 0} ; [ DW_TAG_member ]
-!68 = metadata !{i32 589846, metadata !0, metadata !"Class", metadata !40, i32 197, i64 0, i64 0, i64 0, i32 0, metadata !69} ; [ DW_TAG_typedef ]
-!69 = metadata !{i32 589839, metadata !0, metadata !"", null, i32 0, i64 32, i64 32, i64 0, i32 0, metadata !70} ; [ DW_TAG_pointer_type ]
-!70 = metadata !{i32 589843, metadata !0, metadata !"objc_class", metadata !40, i32 0, i64 0, i64 0, i32 0, i32 4, i32 0, null, i32 0, i32 0} ; [ DW_TAG_structure_type ]
-!71 = metadata !{i32 589837, metadata !61, metadata !"_mydataRef", metadata !61, i32 28, i64 32, i64 32, i64 32, i32 0, metadata !72, metadata !"", metadata !"", metadata !"", i32 0} ; [ DW_TAG_member ]
-!72 = metadata !{i32 589846, metadata !0, metadata !"CFTypeRef", metadata !24, i32 313, i64 0, i64 0, i64 0, i32 0, metadata !73} ; [ DW_TAG_typedef ]
-!73 = metadata !{i32 589839, metadata !0, metadata !"", null, i32 0, i64 32, i64 32, i64 0, i32 0, metadata !74} ; [ DW_TAG_pointer_type ]
-!74 = metadata !{i32 589862, metadata !0, metadata !"", null, i32 0, i64 0, i64 0, i64 0, i32 0, null} ; [ DW_TAG_const_type ]
-!75 = metadata !{i32 589837, metadata !61, metadata !"_scale", metadata !61, i32 29, i64 32, i64 32, i64 64, i32 0, metadata !76, metadata !"", metadata !"", metadata !"", i32 0} ; [ DW_TAG_member ]
-!76 = metadata !{i32 589846, metadata !0, metadata !"Float", metadata !77, i32 89, i64 0, i64 0, i64 0, i32 0, metadata !78} ; [ DW_TAG_typedef ]
-!77 = metadata !{i32 589865, metadata !"header12.h", metadata !"/Volumes/Sandbox/llvm", metadata !0} ; [ DW_TAG_file_type ]
-!78 = metadata !{i32 589860, metadata !0, metadata !"float", null, i32 0, i64 32, i64 32, i64 0, i32 0, i32 4} ; [ DW_TAG_base_type ]
-!79 = metadata !{i32 589837, metadata !61, metadata !"_mydataFlags", metadata !61, i32 37, i64 8, i64 8, i64 96, i32 0, metadata !80, metadata !"", metadata !"", metadata !"", i32 0} ; [ DW_TAG_member ]
-!80 = metadata !{i32 589843, metadata !0, metadata !"", metadata !61, i32 30, i64 8, i64 8, i32 0, i32 0, i32 0, metadata !81, i32 0, i32 0} ; [ DW_TAG_structure_type ]
+!67 = metadata !{i32 786445, metadata !155, metadata !65, metadata !"isa", i32 67, i64 32, i64 32, i64 0, i32 2, metadata !68, metadata !"", metadata !"", metadata !"", i32 0} ; [ DW_TAG_member ]
+!68 = metadata !{i32 786454, metadata !0, metadata !"Class", metadata !40, i32 197, i64 0, i64 0, i64 0, i32 0, metadata !69} ; [ DW_TAG_typedef ]
+!69 = metadata !{i32 786447, null, metadata !0, metadata !"", i32 0, i64 32, i64 32, i64 0, i32 0, metadata !70} ; [ DW_TAG_pointer_type ]
+!70 = metadata !{i32 786451, metadata !40, metadata !0, metadata !"objc_class", i32 0, i64 0, i64 0, i32 0, i32 4, i32 0, null, i32 0, i32 0} ; [ DW_TAG_structure_type ]
+!71 = metadata !{i32 786445, metadata !154, metadata !61, metadata !"_mydataRef", i32 28, i64 32, i64 32, i64 32, i32 0, metadata !72, metadata !"", metadata !"", metadata !"", i32 0} ; [ DW_TAG_member ]
+!72 = metadata !{i32 786454, metadata !0, metadata !"CFTypeRef", metadata !24, i32 313, i64 0, i64 0, i64 0, i32 0, metadata !73} ; [ DW_TAG_typedef ]
+!73 = metadata !{i32 786447, null, metadata !0, metadata !"", i32 0, i64 32, i64 32, i64 0, i32 0, metadata !74} ; [ DW_TAG_pointer_type ]
+!74 = metadata !{i32 786470, null, metadata !0, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null} ; [ DW_TAG_const_type ]
+!75 = metadata !{i32 786445, metadata !154, metadata !61, metadata !"_scale", i32 29, i64 32, i64 32, i64 64, i32 0, metadata !76, metadata !"", metadata !"", metadata !"", i32 0} ; [ DW_TAG_member ]
+!76 = metadata !{i32 786454, metadata !0, metadata !"Float", metadata !77, i32 89, i64 0, i64 0, i64 0, i32 0, metadata !78} ; [ DW_TAG_typedef ]
+!77 = metadata !{i32 786473, metadata !156} ; [ DW_TAG_file_type ]
+!78 = metadata !{i32 786468, null, metadata !0, metadata !"float", i32 0, i64 32, i64 32, i64 0, i32 0, i32 4} ; [ DW_TAG_base_type ]
+!79 = metadata !{i32 786445, metadata !154, metadata !61, metadata !"_mydataFlags", i32 37, i64 8, i64 8, i64 96, i32 0, metadata !80, metadata !"", metadata !"", metadata !"", i32 0} ; [ DW_TAG_member ]
+!80 = metadata !{i32 786451, metadata !154, metadata !0, metadata !"", i32 30, i64 8, i64 8, i32 0, i32 0, i32 0, metadata !81, i32 0, i32 0} ; [ DW_TAG_structure_type ]
 !81 = metadata !{metadata !82, metadata !84, metadata !85, metadata !86, metadata !87, metadata !88}
-!82 = metadata !{i32 589837, metadata !61, metadata !"named", metadata !61, i32 31, i64 1, i64 32, i64 0, i32 0, metadata !83} ; [ DW_TAG_member ]
-!83 = metadata !{i32 589860, metadata !0, metadata !"unsigned int", null, i32 0, i64 32, i64 32, i64 0, i32 0, i32 7} ; [ DW_TAG_base_type ]
-!84 = metadata !{i32 589837, metadata !61, metadata !"mydataO", metadata !61, i32 32, i64 3, i64 32, i64 1, i32 0, metadata !83} ; [ DW_TAG_member ]
-!85 = metadata !{i32 589837, metadata !61, metadata !"cached", metadata !61, i32 33, i64 1, i64 32, i64 4, i32 0, metadata !83} ; [ DW_TAG_member ]
-!86 = metadata !{i32 589837, metadata !61, metadata !"hasBeenCached", metadata !61, i32 34, i64 1, i64 32, i64 5, i32 0, metadata !83} ; [ DW_TAG_member ]
-!87 = metadata !{i32 589837, metadata !61, metadata !"hasPattern", metadata !61, i32 35, i64 1, i64 32, i64 6, i32 0, metadata !83} ; [ DW_TAG_member ]
-!88 = metadata !{i32 589837, metadata !61, metadata !"isCIMydata", metadata !61, i32 36, i64 1, i64 32, i64 7, i32 0, metadata !83} ; [ DW_TAG_member ]
-!89 = metadata !{i32 589837, metadata !24, metadata !"self", metadata !24, i32 609, i64 32, i64 32, i64 192, i32 0, metadata !90} ; [ DW_TAG_member ]
-!90 = metadata !{i32 589839, metadata !0, metadata !"", null, i32 0, i64 32, i64 32, i64 0, i32 0, metadata !91} ; [ DW_TAG_pointer_type ]
-!91 = metadata !{i32 589843, metadata !40, metadata !"MyWork", metadata !24, i32 36, i64 384, i64 32, i32 0, i32 0, i32 0, metadata !92, i32 16, i32 0} ; [ DW_TAG_structure_type ]
+!82 = metadata !{i32 786445, metadata !154, metadata !61, metadata !"named", i32 31, i64 1, i64 32, i64 0, i32 0, metadata !83} ; [ DW_TAG_member ]
+!83 = metadata !{i32 786468, null, metadata !0, metadata !"unsigned int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 7} ; [ DW_TAG_base_type ]
+!84 = metadata !{i32 786445, metadata !154, metadata !61, metadata !"mydataO", i32 32, i64 3, i64 32, i64 1, i32 0, metadata !83} ; [ DW_TAG_member ]
+!85 = metadata !{i32 786445, metadata !154, metadata !61, metadata !"cached", i32 33, i64 1, i64 32, i64 4, i32 0, metadata !83} ; [ DW_TAG_member ]
+!86 = metadata !{i32 786445, metadata !154, metadata !61, metadata !"hasBeenCached", i32 34, i64 1, i64 32, i64 5, i32 0, metadata !83} ; [ DW_TAG_member ]
+!87 = metadata !{i32 786445, metadata !154, metadata !61, metadata !"hasPattern", i32 35, i64 1, i64 32, i64 6, i32 0, metadata !83} ; [ DW_TAG_member ]
+!88 = metadata !{i32 786445, metadata !154, metadata !61, metadata !"isCIMydata", i32 36, i64 1, i64 32, i64 7, i32 0, metadata !83} ; [ DW_TAG_member ]
+!89 = metadata !{i32 786445, metadata !152, metadata !24, metadata !"self", i32 609, i64 32, i64 32, i64 192, i32 0, metadata !90} ; [ DW_TAG_member ]
+!90 = metadata !{i32 786447, null, metadata !0, metadata !"", i32 0, i64 32, i64 32, i64 0, i32 0, metadata !91} ; [ DW_TAG_pointer_type ]
+!91 = metadata !{i32 786451, metadata !152, metadata !40, metadata !"MyWork", i32 36, i64 384, i64 32, i32 0, i32 0, i32 0, metadata !92, i32 16, i32 0} ; [ DW_TAG_structure_type ]
 !92 = metadata !{metadata !93, metadata !98, metadata !101, metadata !107, metadata !123}
-!93 = metadata !{i32 589852, metadata !91, null, metadata !24, i32 0, i64 0, i64 0, i64 0, i32 0, metadata !94} ; [ DW_TAG_inheritance ]
-!94 = metadata !{i32 589843, metadata !40, metadata !"twork", metadata !95, i32 43, i64 32, i64 32, i32 0, i32 0, i32 0, metadata !96, i32 16, i32 0} ; [ DW_TAG_structure_type ]
-!95 = metadata !{i32 589865, metadata !"header13.h", metadata !"/Volumes/Sandbox/llvm", metadata !0} ; [ DW_TAG_file_type ]
+!93 = metadata !{i32 786460, metadata !91, null, metadata !24, i32 0, i64 0, i64 0, i64 0, i32 0, metadata !94} ; [ DW_TAG_inheritance ]
+!94 = metadata !{i32 786451, metadata !157, metadata !40, metadata !"twork", i32 43, i64 32, i64 32, i32 0, i32 0, i32 0, metadata !96, i32 16, i32 0} ; [ DW_TAG_structure_type ]
+!95 = metadata !{i32 786473, metadata !157} ; [ DW_TAG_file_type ]
 !96 = metadata !{metadata !97}
-!97 = metadata !{i32 589852, metadata !94, null, metadata !95, i32 0, i64 0, i64 0, i64 0, i32 0, metadata !64} ; [ DW_TAG_inheritance ]
-!98 = metadata !{i32 589837, metadata !24, metadata !"_itemID", metadata !24, i32 38, i64 64, i64 32, i64 32, i32 1, metadata !99, metadata !"", metadata !"", metadata !"", i32 0} ; [ DW_TAG_member ]
-!99 = metadata !{i32 589846, metadata !0, metadata !"uint64_t", metadata !40, i32 55, i64 0, i64 0, i64 0, i32 0, metadata !100} ; [ DW_TAG_typedef ]
-!100 = metadata !{i32 589860, metadata !0, metadata !"long long unsigned int", null, i32 0, i64 64, i64 32, i64 0, i32 0, i32 7} ; [ DW_TAG_base_type ]
-!101 = metadata !{i32 589837, metadata !24, metadata !"_library", metadata !24, i32 39, i64 32, i64 32, i64 96, i32 1, metadata !102, metadata !"", metadata !"", metadata !"", i32 0} ; [ DW_TAG_member ]
-!102 = metadata !{i32 589839, metadata !0, metadata !"", null, i32 0, i64 32, i64 32, i64 0, i32 0, metadata !103} ; [ DW_TAG_pointer_type ]
-!103 = metadata !{i32 589843, metadata !40, metadata !"MyLibrary2", metadata !104, i32 22, i64 32, i64 32, i32 0, i32 0, i32 0, metadata !105, i32 16, i32 0} ; [ DW_TAG_structure_type ]
-!104 = metadata !{i32 589865, metadata !"header14.h", metadata !"/Volumes/Sandbox/llvm", metadata !0} ; [ DW_TAG_file_type ]
+!97 = metadata !{i32 786460, metadata !94, null, metadata !95, i32 0, i64 0, i64 0, i64 0, i32 0, metadata !64} ; [ DW_TAG_inheritance ]
+!98 = metadata !{i32 786445, metadata !152, metadata !24, metadata !"_itemID", i32 38, i64 64, i64 32, i64 32, i32 1, metadata !99, metadata !"", metadata !"", metadata !"", i32 0} ; [ DW_TAG_member ]
+!99 = metadata !{i32 786454, metadata !0, metadata !"uint64_t", metadata !40, i32 55, i64 0, i64 0, i64 0, i32 0, metadata !100} ; [ DW_TAG_typedef ]
+!100 = metadata !{i32 786468, null, metadata !0, metadata !"long long unsigned int", i32 0, i64 64, i64 32, i64 0, i32 0, i32 7} ; [ DW_TAG_base_type ]
+!101 = metadata !{i32 786445, metadata !152, metadata !24, metadata !"_library", i32 39, i64 32, i64 32, i64 96, i32 1, metadata !102, metadata !"", metadata !"", metadata !"", i32 0} ; [ DW_TAG_member ]
+!102 = metadata !{i32 786447, null, metadata !0, metadata !"", i32 0, i64 32, i64 32, i64 0, i32 0, metadata !103} ; [ DW_TAG_pointer_type ]
+!103 = metadata !{i32 786451, metadata !158, metadata !40, metadata !"MyLibrary2", i32 22, i64 32, i64 32, i32 0, i32 0, i32 0, metadata !105, i32 16, i32 0} ; [ DW_TAG_structure_type ]
+!104 = metadata !{i32 786473, metadata !158} ; [ DW_TAG_file_type ]
 !105 = metadata !{metadata !106}
-!106 = metadata !{i32 589852, metadata !103, null, metadata !104, i32 0, i64 0, i64 0, i64 0, i32 0, metadata !64} ; [ DW_TAG_inheritance ]
-!107 = metadata !{i32 589837, metadata !24, metadata !"_bounds", metadata !24, i32 40, i64 128, i64 32, i64 128, i32 1, metadata !108, metadata !"", metadata !"", metadata !"", i32 0} ; [ DW_TAG_member ]
-!108 = metadata !{i32 589846, metadata !0, metadata !"CR", metadata !40, i32 33, i64 0, i64 0, i64 0, i32 0, metadata !109} ; [ DW_TAG_typedef ]
-!109 = metadata !{i32 589843, metadata !0, metadata !"CR", metadata !77, i32 29, i64 128, i64 32, i32 0, i32 0, i32 0, metadata !110, i32 0, i32 0} ; [ DW_TAG_structure_type ]
+!106 = metadata !{i32 786460, metadata !103, null, metadata !104, i32 0, i64 0, i64 0, i64 0, i32 0, metadata !64} ; [ DW_TAG_inheritance ]
+!107 = metadata !{i32 786445, metadata !152, metadata !24, metadata !"_bounds", i32 40, i64 128, i64 32, i64 128, i32 1, metadata !108, metadata !"", metadata !"", metadata !"", i32 0} ; [ DW_TAG_member ]
+!108 = metadata !{i32 786454, metadata !0, metadata !"CR", metadata !40, i32 33, i64 0, i64 0, i64 0, i32 0, metadata !109} ; [ DW_TAG_typedef ]
+!109 = metadata !{i32 786451, metadata !156, metadata !0, metadata !"CR", i32 29, i64 128, i64 32, i32 0, i32 0, i32 0, metadata !110, i32 0, i32 0} ; [ DW_TAG_structure_type ]
 !110 = metadata !{metadata !111, metadata !117}
-!111 = metadata !{i32 589837, metadata !77, metadata !"origin", metadata !77, i32 30, i64 64, i64 32, i64 0, i32 0, metadata !112} ; [ DW_TAG_member ]
-!112 = metadata !{i32 589846, metadata !0, metadata !"CP", metadata !77, i32 17, i64 0, i64 0, i64 0, i32 0, metadata !113} ; [ DW_TAG_typedef ]
-!113 = metadata !{i32 589843, metadata !0, metadata !"CP", metadata !77, i32 13, i64 64, i64 32, i32 0, i32 0, i32 0, metadata !114, i32 0, i32 0} ; [ DW_TAG_structure_type ]
+!111 = metadata !{i32 786445, metadata !156, metadata !77, metadata !"origin", i32 30, i64 64, i64 32, i64 0, i32 0, metadata !112} ; [ DW_TAG_member ]
+!112 = metadata !{i32 786454, metadata !0, metadata !"CP", metadata !77, i32 17, i64 0, i64 0, i64 0, i32 0, metadata !113} ; [ DW_TAG_typedef ]
+!113 = metadata !{i32 786451, metadata !156, metadata !0, metadata !"CP", i32 13, i64 64, i64 32, i32 0, i32 0, i32 0, metadata !114, i32 0, i32 0} ; [ DW_TAG_structure_type ]
 !114 = metadata !{metadata !115, metadata !116}
-!115 = metadata !{i32 589837, metadata !77, metadata !"x", metadata !77, i32 14, i64 32, i64 32, i64 0, i32 0, metadata !76} ; [ DW_TAG_member ]
-!116 = metadata !{i32 589837, metadata !77, metadata !"y", metadata !77, i32 15, i64 32, i64 32, i64 32, i32 0, metadata !76} ; [ DW_TAG_member ]
-!117 = metadata !{i32 589837, metadata !77, metadata !"size", metadata !77, i32 31, i64 64, i64 32, i64 64, i32 0, metadata !118} ; [ DW_TAG_member ]
-!118 = metadata !{i32 589846, metadata !0, metadata !"Size", metadata !77, i32 25, i64 0, i64 0, i64 0, i32 0, metadata !119} ; [ DW_TAG_typedef ]
-!119 = metadata !{i32 589843, metadata !0, metadata !"Size", metadata !77, i32 21, i64 64, i64 32, i32 0, i32 0, i32 0, metadata !120, i32 0, i32 0} ; [ DW_TAG_structure_type ]
+!115 = metadata !{i32 786445, metadata !156, metadata !77, metadata !"x", i32 14, i64 32, i64 32, i64 0, i32 0, metadata !76} ; [ DW_TAG_member ]
+!116 = metadata !{i32 786445, metadata !156, metadata !77, metadata !"y", i32 15, i64 32, i64 32, i64 32, i32 0, metadata !76} ; [ DW_TAG_member ]
+!117 = metadata !{i32 786445, metadata !156, metadata !77, metadata !"size", i32 31, i64 64, i64 32, i64 64, i32 0, metadata !118} ; [ DW_TAG_member ]
+!118 = metadata !{i32 786454, metadata !0, metadata !"Size", metadata !77, i32 25, i64 0, i64 0, i64 0, i32 0, metadata !119} ; [ DW_TAG_typedef ]
+!119 = metadata !{i32 786451, metadata !156, metadata !0, metadata !"Size", i32 21, i64 64, i64 32, i32 0, i32 0, i32 0, metadata !120, i32 0, i32 0} ; [ DW_TAG_structure_type ]
 !120 = metadata !{metadata !121, metadata !122}
-!121 = metadata !{i32 589837, metadata !77, metadata !"width", metadata !77, i32 22, i64 32, i64 32, i64 0, i32 0, metadata !76} ; [ DW_TAG_member ]
-!122 = metadata !{i32 589837, metadata !77, metadata !"height", metadata !77, i32 23, i64 32, i64 32, i64 32, i32 0, metadata !76} ; [ DW_TAG_member ]
-!123 = metadata !{i32 589837, metadata !24, metadata !"_data", metadata !24, i32 40, i64 128, i64 32, i64 256, i32 1, metadata !108, metadata !"", metadata !"", metadata !"", i32 0} ; [ DW_TAG_member ]
-!124 = metadata !{i32 589837, metadata !24, metadata !"semi", metadata !24, i32 609, i64 32, i64 32, i64 224, i32 0, metadata !125} ; [ DW_TAG_member ]
-!125 = metadata !{i32 589846, metadata !0, metadata !"d_t", metadata !24, i32 35, i64 0, i64 0, i64 0, i32 0, metadata !126} ; [ DW_TAG_typedef ]
-!126 = metadata !{i32 589839, metadata !0, metadata !"", null, i32 0, i64 32, i64 32, i64 0, i32 0, metadata !127} ; [ DW_TAG_pointer_type ]
-!127 = metadata !{i32 589843, metadata !0, metadata !"my_struct", metadata !128, i32 49, i64 0, i64 0, i32 0, i32 4, i32 0, null, i32 0, i32 0} ; [ DW_TAG_structure_type ]
-!128 = metadata !{i32 589865, metadata !"header15.h", metadata !"/Volumes/Sandbox/llvm", metadata !0} ; [ DW_TAG_file_type ]
+!121 = metadata !{i32 786445, metadata !156, metadata !77, metadata !"width", i32 22, i64 32, i64 32, i64 0, i32 0, metadata !76} ; [ DW_TAG_member ]
+!122 = metadata !{i32 786445, metadata !156, metadata !77, metadata !"height", i32 23, i64 32, i64 32, i64 32, i32 0, metadata !76} ; [ DW_TAG_member ]
+!123 = metadata !{i32 786445, metadata !152, metadata !24, metadata !"_data", i32 40, i64 128, i64 32, i64 256, i32 1, metadata !108, metadata !"", metadata !"", metadata !"", i32 0} ; [ DW_TAG_member ]
+!124 = metadata !{i32 786445, metadata !152, metadata !24, metadata !"semi", i32 609, i64 32, i64 32, i64 224, i32 0, metadata !125} ; [ DW_TAG_member ]
+!125 = metadata !{i32 786454, metadata !0, metadata !"d_t", metadata !24, i32 35, i64 0, i64 0, i64 0, i32 0, metadata !126} ; [ DW_TAG_typedef ]
+!126 = metadata !{i32 786447, null, metadata !0, metadata !"", i32 0, i64 32, i64 32, i64 0, i32 0, metadata !127} ; [ DW_TAG_pointer_type ]
+!127 = metadata !{i32 786451, metadata !159, metadata !0, metadata !"my_struct", i32 49, i64 0, i64 0, i32 0, i32 4, i32 0, null, i32 0, i32 0} ; [ DW_TAG_structure_type ]
+!128 = metadata !{i32 786473, metadata !159} ; [ DW_TAG_file_type ]
 !129 = metadata !{i32 609, i32 144, metadata !23, null}
-!130 = metadata !{i32 590081, metadata !23, metadata !"loadedMydata", metadata !24, i32 33555041, metadata !59, i32 0} ; [ DW_TAG_arg_variable ]
+!130 = metadata !{i32 786689, metadata !23, metadata !"loadedMydata", metadata !24, i32 33555041, metadata !59, i32 0, null} ; [ DW_TAG_arg_variable ]
 !131 = metadata !{i32 609, i32 155, metadata !23, null}
-!132 = metadata !{i32 590081, metadata !23, metadata !"bounds", metadata !24, i32 50332257, metadata !108, i32 0} ; [ DW_TAG_arg_variable ]
+!132 = metadata !{i32 786689, metadata !23, metadata !"bounds", metadata !24, i32 50332257, metadata !108, i32 0, null} ; [ DW_TAG_arg_variable ]
 !133 = metadata !{i32 609, i32 175, metadata !23, null}
-!134 = metadata !{i32 590081, metadata !23, metadata !"data", metadata !24, i32 67109473, metadata !108, i32 0} ; [ DW_TAG_arg_variable ]
+!134 = metadata !{i32 786689, metadata !23, metadata !"data", metadata !24, i32 67109473, metadata !108, i32 0, null} ; [ DW_TAG_arg_variable ]
 !135 = metadata !{i32 609, i32 190, metadata !23, null}
-!136 = metadata !{i32 590080, metadata !23, metadata !"mydata", metadata !24, i32 604, metadata !50, i32 0, i64 1, i64 20, i64 2, i64 1, i64 4, i64 2, i64 1, i64 24} ; [ DW_TAG_auto_variable ]
+!136 = metadata !{i32 786688, metadata !23, metadata !"mydata", metadata !24, i32 604, metadata !50, i32 0, null, i64 1, i64 20, i64 2, i64 1, i64 4, i64 2, i64 1, i64 24} ; [ DW_TAG_auto_variable ]
 !137 = metadata !{i32 604, i32 49, metadata !23, null}
-!138 = metadata !{i32 590080, metadata !23, metadata !"self", metadata !40, i32 604, metadata !90, i32 0, i64 1, i64 24} ; [ DW_TAG_auto_variable ]
-!139 = metadata !{i32 590080, metadata !23, metadata !"semi", metadata !24, i32 607, metadata !125, i32 0, i64 1, i64 28} ; [ DW_TAG_auto_variable ]
+!138 = metadata !{i32 786688, metadata !23, metadata !"self", metadata !40, i32 604, metadata !90, i32 0, null, i64 1, i64 24} ; [ DW_TAG_auto_variable ]
+!139 = metadata !{i32 786688, metadata !23, metadata !"semi", metadata !24, i32 607, metadata !125, i32 0, null, i64 1, i64 28} ; [ DW_TAG_auto_variable ]
 !140 = metadata !{i32 607, i32 30, metadata !23, null}
 !141 = metadata !{i32 610, i32 17, metadata !142, null}
-!142 = metadata !{i32 589835, metadata !23, i32 609, i32 200, metadata !24, i32 94} ; [ DW_TAG_lexical_block ]
+!142 = metadata !{i32 786443, metadata !23, i32 609, i32 200, metadata !24, i32 94} ; [ DW_TAG_lexical_block ]
 !143 = metadata !{i32 611, i32 17, metadata !142, null}
 !144 = metadata !{i32 612, i32 17, metadata !142, null}
 !145 = metadata !{i32 613, i32 17, metadata !142, null}
 !146 = metadata !{i32 615, i32 13, metadata !142, null}
+!147 = metadata !{metadata !1, metadata !1, metadata !5, metadata !5, metadata !9, metadata !14, metadata !19, metadata !19, metadata !14, metadata !14, metadata !14, metadata !19, metadata !19, metadata !19}
+!148 = metadata !{metadata !23}
+!149 = metadata !{metadata !"header3.h", metadata !"/Volumes/Sandbox/llvm"}
+!150 = metadata !{metadata !"Private.h", metadata !"/Volumes/Sandbox/llvm"}
+!151 = metadata !{metadata !"header4.h", metadata !"/Volumes/Sandbox/llvm"}
+!152 = metadata !{metadata !"MyLibrary.m", metadata !"/Volumes/Sandbox/llvm"}
+!153 = metadata !{metadata !"MyLibrary.i", metadata !"/Volumes/Sandbox/llvm"}
+!154 = metadata !{metadata !"header11.h", metadata !"/Volumes/Sandbox/llvm"}
+!155 = metadata !{metadata !"NSO.h", metadata !"/Volumes/Sandbox/llvm"}
+!156 = metadata !{metadata !"header12.h", metadata !"/Volumes/Sandbox/llvm"}
+!157 = metadata !{metadata !"header13.h", metadata !"/Volumes/Sandbox/llvm"}
+!158 = metadata !{metadata !"header14.h", metadata !"/Volumes/Sandbox/llvm"}
+!159 = metadata !{metadata !"header15.h", metadata !"/Volumes/Sandbox/llvm"}
+!160 = metadata !{metadata !"header.h", metadata !"/Volumes/Sandbox/llvm"}
+!161 = metadata !{metadata !"header2.h", metadata !"/Volumes/Sandbox/llvm"}
diff --git a/test/CodeGen/ARM/debug-info-branch-folding.ll b/test/CodeGen/ARM/debug-info-branch-folding.ll
index 4f4ff8e81707..95e6cf2554a0 100644
--- a/test/CodeGen/ARM/debug-info-branch-folding.ll
+++ b/test/CodeGen/ARM/debug-info-branch-folding.ll
@@ -3,7 +3,7 @@ target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:32-
 target triple = "thumbv7-apple-macosx10.6.7"
 
 ;CHECK: 	vadd.f32	q4, q8, q8
-;CHECK-NEXT: Ltmp1
+;CHECK-NEXT: LBB0_1
 
 ;CHECK:@DEBUG_VALUE: x <- Q4+0
 ;CHECK-NEXT:@DEBUG_VALUE: y <- Q4+0
@@ -38,58 +38,59 @@ declare i32 @printf(i8* nocapture, ...) nounwind
 
 declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
 
-!llvm.dbg.sp = !{!0, !10, !14}
-!llvm.dbg.lv.test0001 = !{!18}
-!llvm.dbg.lv.main = !{!19, !20, !24, !26, !27, !28, !29}
-!llvm.dbg.lv.printFV = !{!30}
-
-!0 = metadata !{i32 589870, i32 0, metadata !1, metadata !"test0001", metadata !"test0001", metadata !"", metadata !1, i32 3, metadata !3, i1 false, i1 true, i32 0, i32 0, i32 0, i32 256, i1 true, <4 x float> (float)* @test0001, null} ; [ DW_TAG_subprogram ]
-!1 = metadata !{i32 589865, metadata !"build2.c", metadata !"/private/tmp", metadata !2} ; [ DW_TAG_file_type ]
-!2 = metadata !{i32 589841, i32 0, i32 12, metadata !"build2.c", metadata !"/private/tmp", metadata !"clang version 3.0 (trunk 129915)", i1 true, i1 true, metadata !"", i32 0} ; [ DW_TAG_compile_unit ]
-!3 = metadata !{i32 589845, metadata !1, metadata !"", metadata !1, i32 0, i64 0, i64 0, i32 0, i32 0, i32 0, metadata !4, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
+!0 = metadata !{i32 786478, i32 0, metadata !1, metadata !"test0001", metadata !"test0001", metadata !"", metadata !1, i32 3, metadata !3, i1 false, i1 true, i32 0, i32 0, i32 0, i32 256, i1 true, <4 x float> (float)* @test0001, null, null, metadata !51, i32 0} ; [ DW_TAG_subprogram ]
+!1 = metadata !{i32 786473, metadata !54} ; [ DW_TAG_file_type ]
+!2 = metadata !{i32 786449, metadata !54, i32 12, metadata !"clang version 3.0 (trunk 129915)", i1 true, metadata !"", i32 0, null, null, metadata !50, null, null} ; [ DW_TAG_compile_unit ]
+!3 = metadata !{i32 786453, metadata !54, metadata !1, metadata !"", i32 0, i64 0, i64 0, i32 0, i32 0, i32 0, metadata !4, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
 !4 = metadata !{metadata !5}
-!5 = metadata !{i32 589846, metadata !2, metadata !"v4f32", metadata !1, i32 14, i64 0, i64 0, i64 0, i32 0, metadata !6} ; [ DW_TAG_typedef ]
-!6 = metadata !{i32 590083, metadata !2, metadata !"", metadata !2, i32 0, i64 128, i64 128, i32 0, i32 0, metadata !7, metadata !8, i32 0, i32 0} ; [ DW_TAG_vector_type ]
-!7 = metadata !{i32 589860, metadata !2, metadata !"float", null, i32 0, i64 32, i64 32, i64 0, i32 0, i32 4} ; [ DW_TAG_base_type ]
+!5 = metadata !{i32 786454, metadata !54, metadata !2, metadata !"v4f32", i32 14, i64 0, i64 0, i64 0, i32 0, metadata !6} ; [ DW_TAG_typedef ]
+!6 = metadata !{i32 786691, metadata !2, metadata !"", metadata !2, i32 0, i64 128, i64 128, i32 0, i32 0, metadata !7, metadata !8, i32 0, i32 0} ; [ DW_TAG_vector_type ]
+!7 = metadata !{i32 786468, null, metadata !2, metadata !"float", i32 0, i64 32, i64 32, i64 0, i32 0, i32 4} ; [ DW_TAG_base_type ]
 !8 = metadata !{metadata !9}
-!9 = metadata !{i32 589857, i64 0, i64 3}         ; [ DW_TAG_subrange_type ]
-!10 = metadata !{i32 589870, i32 0, metadata !1, metadata !"main", metadata !"main", metadata !"", metadata !1, i32 59, metadata !11, i1 false, i1 true, i32 0, i32 0, i32 0, i32 256, i1 true, i32 (i32, i8**, i1)* @main, null} ; [ DW_TAG_subprogram ]
-!11 = metadata !{i32 589845, metadata !1, metadata !"", metadata !1, i32 0, i64 0, i64 0, i32 0, i32 0, i32 0, metadata !12, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
+!9 = metadata !{i32 786465, i64 0, i64 4}         ; [ DW_TAG_subrange_type ]
+!10 = metadata !{i32 786478, i32 0, metadata !1, metadata !"main", metadata !"main", metadata !"", metadata !1, i32 59, metadata !11, i1 false, i1 true, i32 0, i32 0, i32 0, i32 256, i1 true, i32 (i32, i8**, i1)* @main, null, null, metadata !52, i32 0} ; [ DW_TAG_subprogram ]
+!11 = metadata !{i32 786453, metadata !54, metadata !1, metadata !"", i32 0, i64 0, i64 0, i32 0, i32 0, i32 0, metadata !12, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
 !12 = metadata !{metadata !13}
-!13 = metadata !{i32 589860, metadata !2, metadata !"int", null, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
-!14 = metadata !{i32 589870, i32 0, metadata !15, metadata !"printFV", metadata !"printFV", metadata !"", metadata !15, i32 41, metadata !16, i1 true, i1 true, i32 0, i32 0, i32 0, i32 256, i1 true, null, null} ; [ DW_TAG_subprogram ]
-!15 = metadata !{i32 589865, metadata !"/Volumes/Lalgate/work/llvm/projects/llvm-test/SingleSource/UnitTests/Vector/helpers.h", metadata !"/private/tmp", metadata !2} ; [ DW_TAG_file_type ]
-!16 = metadata !{i32 589845, metadata !15, metadata !"", metadata !15, i32 0, i64 0, i64 0, i32 0, i32 0, i32 0, metadata !17, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
+!13 = metadata !{i32 786468, null, metadata !2, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
+!14 = metadata !{i32 786478, i32 0, metadata !15, metadata !"printFV", metadata !"printFV", metadata !"", metadata !15, i32 41, metadata !16, i1 true, i1 true, i32 0, i32 0, i32 0, i32 256, i1 true, null, null, null, metadata !53, i32 0} ; [ DW_TAG_subprogram ]
+!15 = metadata !{i32 786473, metadata !55} ; [ DW_TAG_file_type ]
+!16 = metadata !{i32 786453, metadata !55, metadata !15, metadata !"", i32 0, i64 0, i64 0, i32 0, i32 0, i32 0, metadata !17, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
 !17 = metadata !{null}
-!18 = metadata !{i32 590081, metadata !0, metadata !"a", metadata !1, i32 16777219, metadata !7, i32 0} ; [ DW_TAG_arg_variable ]
-!19 = metadata !{i32 590081, metadata !10, metadata !"argc", metadata !1, i32 16777275, metadata !13, i32 0} ; [ DW_TAG_arg_variable ]
-!20 = metadata !{i32 590081, metadata !10, metadata !"argv", metadata !1, i32 33554491, metadata !21, i32 0} ; [ DW_TAG_arg_variable ]
-!21 = metadata !{i32 589839, metadata !2, metadata !"", null, i32 0, i64 32, i64 32, i64 0, i32 0, metadata !22} ; [ DW_TAG_pointer_type ]
-!22 = metadata !{i32 589839, metadata !2, metadata !"", null, i32 0, i64 32, i64 32, i64 0, i32 0, metadata !23} ; [ DW_TAG_pointer_type ]
-!23 = metadata !{i32 589860, metadata !2, metadata !"char", null, i32 0, i64 8, i64 8, i64 0, i32 0, i32 6} ; [ DW_TAG_base_type ]
-!24 = metadata !{i32 590080, metadata !25, metadata !"i", metadata !1, i32 60, metadata !13, i32 0} ; [ DW_TAG_auto_variable ]
-!25 = metadata !{i32 589835, metadata !10, i32 59, i32 33, metadata !1, i32 14} ; [ DW_TAG_lexical_block ]
-!26 = metadata !{i32 590080, metadata !25, metadata !"j", metadata !1, i32 60, metadata !13, i32 0} ; [ DW_TAG_auto_variable ]
-!27 = metadata !{i32 590080, metadata !25, metadata !"x", metadata !1, i32 61, metadata !5, i32 0} ; [ DW_TAG_auto_variable ]
-!28 = metadata !{i32 590080, metadata !25, metadata !"y", metadata !1, i32 62, metadata !5, i32 0} ; [ DW_TAG_auto_variable ]
-!29 = metadata !{i32 590080, metadata !25, metadata !"z", metadata !1, i32 63, metadata !5, i32 0} ; [ DW_TAG_auto_variable ]
-!30 = metadata !{i32 590081, metadata !14, metadata !"F", metadata !15, i32 16777257, metadata !31, i32 0} ; [ DW_TAG_arg_variable ]
-!31 = metadata !{i32 589839, metadata !2, metadata !"", null, i32 0, i64 32, i64 32, i64 0, i32 0, metadata !32} ; [ DW_TAG_pointer_type ]
-!32 = metadata !{i32 589846, metadata !2, metadata !"FV", metadata !15, i32 25, i64 0, i64 0, i64 0, i32 0, metadata !33} ; [ DW_TAG_typedef ]
-!33 = metadata !{i32 589847, metadata !2, metadata !"", metadata !15, i32 22, i64 128, i64 128, i64 0, i32 0, i32 0, metadata !34, i32 0, i32 0} ; [ DW_TAG_union_type ]
+!18 = metadata !{i32 786689, metadata !0, metadata !"a", metadata !1, i32 16777219, metadata !7, i32 0, null} ; [ DW_TAG_arg_variable ]
+!19 = metadata !{i32 786689, metadata !10, metadata !"argc", metadata !1, i32 16777275, metadata !13, i32 0, null} ; [ DW_TAG_arg_variable ]
+!20 = metadata !{i32 786689, metadata !10, metadata !"argv", metadata !1, i32 33554491, metadata !21, i32 0, null} ; [ DW_TAG_arg_variable ]
+!21 = metadata !{i32 786447, null, metadata !2, metadata !"", i32 0, i64 32, i64 32, i64 0, i32 0, metadata !22} ; [ DW_TAG_pointer_type ]
+!22 = metadata !{i32 786447, null, metadata !2, metadata !"", i32 0, i64 32, i64 32, i64 0, i32 0, metadata !23} ; [ DW_TAG_pointer_type ]
+!23 = metadata !{i32 786468, null, metadata !2, metadata !"char", i32 0, i64 8, i64 8, i64 0, i32 0, i32 6} ; [ DW_TAG_base_type ]
+!24 = metadata !{i32 786688, metadata !25, metadata !"i", metadata !1, i32 60, metadata !13, i32 0, null} ; [ DW_TAG_auto_variable ]
+!25 = metadata !{i32 786443, metadata !1, metadata !10, i32 59, i32 33, i32 14} ; [ DW_TAG_lexical_block ]
+!26 = metadata !{i32 786688, metadata !25, metadata !"j", metadata !1, i32 60, metadata !13, i32 0, null} ; [ DW_TAG_auto_variable ]
+!27 = metadata !{i32 786688, metadata !25, metadata !"x", metadata !1, i32 61, metadata !5, i32 0, null} ; [ DW_TAG_auto_variable ]
+!28 = metadata !{i32 786688, metadata !25, metadata !"y", metadata !1, i32 62, metadata !5, i32 0, null} ; [ DW_TAG_auto_variable ]
+!29 = metadata !{i32 786688, metadata !25, metadata !"z", metadata !1, i32 63, metadata !5, i32 0, null} ; [ DW_TAG_auto_variable ]
+!30 = metadata !{i32 786689, metadata !14, metadata !"F", metadata !15, i32 16777257, metadata !31, i32 0, null} ; [ DW_TAG_arg_variable ]
+!31 = metadata !{i32 786447, null, metadata !2, metadata !"", i32 0, i64 32, i64 32, i64 0, i32 0, metadata !32} ; [ DW_TAG_pointer_type ]
+!32 = metadata !{i32 786454, metadata !55, metadata !2, metadata !"FV", i32 25, i64 0, i64 0, i64 0, i32 0, metadata !33} ; [ DW_TAG_typedef ]
+!33 = metadata !{i32 786455, metadata !55, metadata !2, metadata !"", i32 22, i64 128, i64 128, i64 0, i32 0, i32 0, metadata !34, i32 0, i32 0} ; [ DW_TAG_union_type ]
 !34 = metadata !{metadata !35, metadata !37}
-!35 = metadata !{i32 589837, metadata !15, metadata !"V", metadata !15, i32 23, i64 128, i64 128, i64 0, i32 0, metadata !36} ; [ DW_TAG_member ]
-!36 = metadata !{i32 589846, metadata !2, metadata !"v4sf", metadata !15, i32 3, i64 0, i64 0, i64 0, i32 0, metadata !6} ; [ DW_TAG_typedef ]
-!37 = metadata !{i32 589837, metadata !15, metadata !"A", metadata !15, i32 24, i64 128, i64 32, i64 0, i32 0, metadata !38} ; [ DW_TAG_member ]
-!38 = metadata !{i32 589825, metadata !2, metadata !"", metadata !2, i32 0, i64 128, i64 32, i32 0, i32 0, metadata !7, metadata !8, i32 0, i32 0} ; [ DW_TAG_array_type ]
+!35 = metadata !{i32 786445, metadata !55, metadata !15, metadata !"V", i32 23, i64 128, i64 128, i64 0, i32 0, metadata !36} ; [ DW_TAG_member ]
+!36 = metadata !{i32 786454, metadata !55, metadata !2, metadata !"v4sf", i32 3, i64 0, i64 0, i64 0, i32 0, metadata !6} ; [ DW_TAG_typedef ]
+!37 = metadata !{i32 786445, metadata !55, metadata !15, metadata !"A", i32 24, i64 128, i64 32, i64 0, i32 0, metadata !38} ; [ DW_TAG_member ]
+!38 = metadata !{i32 786433, null, metadata !2, metadata !"", i32 0, i64 128, i64 32, i32 0, i32 0, metadata !7, metadata !8, i32 0, i32 0} ; [ DW_TAG_array_type ]
 !39 = metadata !{i32 79, i32 7, metadata !40, null}
-!40 = metadata !{i32 589835, metadata !41, i32 75, i32 35, metadata !1, i32 18} ; [ DW_TAG_lexical_block ]
-!41 = metadata !{i32 589835, metadata !42, i32 75, i32 5, metadata !1, i32 17} ; [ DW_TAG_lexical_block ]
-!42 = metadata !{i32 589835, metadata !43, i32 71, i32 32, metadata !1, i32 16} ; [ DW_TAG_lexical_block ]
-!43 = metadata !{i32 589835, metadata !25, i32 71, i32 3, metadata !1, i32 15} ; [ DW_TAG_lexical_block ]
+!40 = metadata !{i32 786443, metadata !1, metadata !41, i32 75, i32 35, i32 18} ; [ DW_TAG_lexical_block ]
+!41 = metadata !{i32 786443, metadata !1, metadata !42, i32 75, i32 5, i32 17} ; [ DW_TAG_lexical_block ]
+!42 = metadata !{i32 786443, metadata !1, metadata !43, i32 71, i32 32, i32 16} ; [ DW_TAG_lexical_block ]
+!43 = metadata !{i32 786443, metadata !1, metadata !25, i32 71, i32 3, i32 15} ; [ DW_TAG_lexical_block ]
 !44 = metadata !{i32 75, i32 5, metadata !42, null}
 !45 = metadata !{i32 42, i32 2, metadata !46, metadata !48}
-!46 = metadata !{i32 589835, metadata !47, i32 42, i32 2, metadata !15, i32 20} ; [ DW_TAG_lexical_block ]
-!47 = metadata !{i32 589835, metadata !14, i32 41, i32 28, metadata !15, i32 19} ; [ DW_TAG_lexical_block ]
+!46 = metadata !{i32 786443, metadata !15, metadata !47, i32 42, i32 2, i32 20} ; [ DW_TAG_lexical_block ]
+!47 = metadata !{i32 786443, metadata !15, metadata !14, i32 41, i32 28, i32 19} ; [ DW_TAG_lexical_block ]
 !48 = metadata !{i32 95, i32 3, metadata !25, null}
 !49 = metadata !{i32 99, i32 3, metadata !25, null}
+!50 = metadata !{metadata !0, metadata !10, metadata !14}
+!51 = metadata !{metadata !18}
+!52 = metadata !{metadata !19, metadata !20, metadata !24, metadata !26, metadata !27, metadata !28, metadata !29}
+!53 = metadata !{metadata !30}
+!54 = metadata !{metadata !"build2.c", metadata !"/private/tmp"}
+!55 = metadata !{metadata !"/Volumes/Lalgate/work/llvm/projects/llvm-test/SingleSource/UnitTests/Vector/helpers.h", metadata !"/private/tmp"}
diff --git a/test/CodeGen/ARM/debug-info-d16-reg.ll b/test/CodeGen/ARM/debug-info-d16-reg.ll
index 325eea00c8d6..e3e4d068932e 100644
--- a/test/CodeGen/ARM/debug-info-d16-reg.ll
+++ b/test/CodeGen/ARM/debug-info-d16-reg.ll
@@ -56,44 +56,41 @@ entry:
 
 declare i32 @puts(i8* nocapture) nounwind
 
-!llvm.dbg.sp = !{!0, !9, !10}
-!llvm.dbg.lv.printer = !{!16, !17, !18}
-!llvm.dbg.lv.inlineprinter = !{!19, !20, !21}
-!llvm.dbg.lv.main = !{!22, !23, !24}
+!llvm.dbg.cu = !{!2}
 
-!0 = metadata !{i32 589870, i32 0, metadata !1, metadata !"printer", metadata !"printer", metadata !"printer", metadata !1, i32 12, metadata !3, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 true, i32 (i8*, double, i8)* @printer} ; [ DW_TAG_subprogram ]
-!1 = metadata !{i32 589865, metadata !"a.c", metadata !"/tmp/", metadata !2} ; [ DW_TAG_file_type ]
-!2 = metadata !{i32 589841, i32 0, i32 1, metadata !"/tmp/a.c", metadata !"/tmp", metadata !"(LLVM build 00)", i1 true, i1 true, metadata !"", i32 0} ; [ DW_TAG_compile_unit ]
-!3 = metadata !{i32 589845, metadata !1, metadata !"", metadata !1, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !4, i32 0, null} ; [ DW_TAG_subroutine_type ]
+!0 = metadata !{i32 786478, metadata !1, metadata !"printer", metadata !"printer", metadata !"printer", metadata !1, i32 12, metadata !3, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 true, i32 (i8*, double, i8)* @printer, null, null, metadata !43, i32 12} ; [ DW_TAG_subprogram ]
+!1 = metadata !{i32 786473, metadata !46} ; [ DW_TAG_file_type ]
+!2 = metadata !{i32 786449, i32 1, metadata !1, metadata !"(LLVM build 00)", i1 true, metadata !"", i32 0, null, null, metadata !42, null, metadata !""} ; [ DW_TAG_compile_unit ]
+!3 = metadata !{i32 786453, metadata !1, metadata !"", metadata !1, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !4, i32 0, null} ; [ DW_TAG_subroutine_type ]
 !4 = metadata !{metadata !5, metadata !6, metadata !7, metadata !8}
-!5 = metadata !{i32 589860, metadata !1, metadata !"int", metadata !1, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
-!6 = metadata !{i32 589839, metadata !1, metadata !"", metadata !1, i32 0, i64 32, i64 32, i64 0, i32 0, null} ; [ DW_TAG_pointer_type ]
-!7 = metadata !{i32 589860, metadata !1, metadata !"double", metadata !1, i32 0, i64 64, i64 32, i64 0, i32 0, i32 4} ; [ DW_TAG_base_type ]
-!8 = metadata !{i32 589860, metadata !1, metadata !"unsigned char", metadata !1, i32 0, i64 8, i64 8, i64 0, i32 0, i32 8} ; [ DW_TAG_base_type ]
-!9 = metadata !{i32 589870, i32 0, metadata !1, metadata !"inlineprinter", metadata !"inlineprinter", metadata !"inlineprinter", metadata !1, i32 5, metadata !3, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 true, i32 (i8*, double, i8)* @inlineprinter} ; [ DW_TAG_subprogram ]
-!10 = metadata !{i32 589870, i32 0, metadata !1, metadata !"main", metadata !"main", metadata !"main", metadata !1, i32 18, metadata !11, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 true, i32 (i32, i8**)* @main} ; [ DW_TAG_subprogram ]
-!11 = metadata !{i32 589845, metadata !1, metadata !"", metadata !1, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !12, i32 0, null} ; [ DW_TAG_subroutine_type ]
+!5 = metadata !{i32 786468, metadata !1, metadata !"int", metadata !1, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
+!6 = metadata !{i32 786447, metadata !1, metadata !"", metadata !1, i32 0, i64 32, i64 32, i64 0, i32 0, null} ; [ DW_TAG_pointer_type ]
+!7 = metadata !{i32 786468, metadata !1, metadata !"double", metadata !1, i32 0, i64 64, i64 32, i64 0, i32 0, i32 4} ; [ DW_TAG_base_type ]
+!8 = metadata !{i32 786468, metadata !1, metadata !"unsigned char", metadata !1, i32 0, i64 8, i64 8, i64 0, i32 0, i32 8} ; [ DW_TAG_base_type ]
+!9 = metadata !{i32 786478, metadata !1, metadata !"inlineprinter", metadata !"inlineprinter", metadata !"inlineprinter", metadata !1, i32 5, metadata !3, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 true, i32 (i8*, double, i8)* @inlineprinter, null, null, metadata !44, i32 5} ; [ DW_TAG_subprogram ]
+!10 = metadata !{i32 786478, metadata !1, metadata !"main", metadata !"main", metadata !"main", metadata !1, i32 18, metadata !11, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 true, i32 (i32, i8**)* @main, null, null, metadata !45, i32 18} ; [ DW_TAG_subprogram ]
+!11 = metadata !{i32 786453, metadata !1, metadata !"", metadata !1, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !12, i32 0, null} ; [ DW_TAG_subroutine_type ]
 !12 = metadata !{metadata !5, metadata !5, metadata !13}
-!13 = metadata !{i32 589839, metadata !1, metadata !"", metadata !1, i32 0, i64 32, i64 32, i64 0, i32 0, metadata !14} ; [ DW_TAG_pointer_type ]
-!14 = metadata !{i32 589839, metadata !1, metadata !"", metadata !1, i32 0, i64 32, i64 32, i64 0, i32 0, metadata !15} ; [ DW_TAG_pointer_type ]
-!15 = metadata !{i32 589860, metadata !1, metadata !"char", metadata !1, i32 0, i64 8, i64 8, i64 0, i32 0, i32 6} ; [ DW_TAG_base_type ]
-!16 = metadata !{i32 590081, metadata !0, metadata !"ptr", metadata !1, i32 11, metadata !6, i32 0} ; [ DW_TAG_arg_variable ]
-!17 = metadata !{i32 590081, metadata !0, metadata !"val", metadata !1, i32 11, metadata !7, i32 0} ; [ DW_TAG_arg_variable ]
-!18 = metadata !{i32 590081, metadata !0, metadata !"c", metadata !1, i32 11, metadata !8, i32 0} ; [ DW_TAG_arg_variable ]
-!19 = metadata !{i32 590081, metadata !9, metadata !"ptr", metadata !1, i32 4, metadata !6, i32 0} ; [ DW_TAG_arg_variable ]
-!20 = metadata !{i32 590081, metadata !9, metadata !"val", metadata !1, i32 4, metadata !7, i32 0} ; [ DW_TAG_arg_variable ]
-!21 = metadata !{i32 590081, metadata !9, metadata !"c", metadata !1, i32 4, metadata !8, i32 0} ; [ DW_TAG_arg_variable ]
-!22 = metadata !{i32 590081, metadata !10, metadata !"argc", metadata !1, i32 17, metadata !5, i32 0} ; [ DW_TAG_arg_variable ]
-!23 = metadata !{i32 590081, metadata !10, metadata !"argv", metadata !1, i32 17, metadata !13, i32 0} ; [ DW_TAG_arg_variable ]
-!24 = metadata !{i32 590080, metadata !25, metadata !"dval", metadata !1, i32 19, metadata !7, i32 0} ; [ DW_TAG_auto_variable ]
-!25 = metadata !{i32 589835, metadata !10, i32 18, i32 0, metadata !1, i32 2} ; [ DW_TAG_lexical_block ]
+!13 = metadata !{i32 786447, metadata !1, metadata !"", metadata !1, i32 0, i64 32, i64 32, i64 0, i32 0, metadata !14} ; [ DW_TAG_pointer_type ]
+!14 = metadata !{i32 786447, metadata !1, metadata !"", metadata !1, i32 0, i64 32, i64 32, i64 0, i32 0, metadata !15} ; [ DW_TAG_pointer_type ]
+!15 = metadata !{i32 786468, metadata !1, metadata !"char", metadata !1, i32 0, i64 8, i64 8, i64 0, i32 0, i32 6} ; [ DW_TAG_base_type ]
+!16 = metadata !{i32 786689, metadata !0, metadata !"ptr", metadata !1, i32 11, metadata !6, i32 0, null} ; [ DW_TAG_arg_variable ]
+!17 = metadata !{i32 786689, metadata !0, metadata !"val", metadata !1, i32 11, metadata !7, i32 0, null} ; [ DW_TAG_arg_variable ]
+!18 = metadata !{i32 786689, metadata !0, metadata !"c", metadata !1, i32 11, metadata !8, i32 0, null} ; [ DW_TAG_arg_variable ]
+!19 = metadata !{i32 786689, metadata !9, metadata !"ptr", metadata !1, i32 4, metadata !6, i32 0, null} ; [ DW_TAG_arg_variable ]
+!20 = metadata !{i32 786689, metadata !9, metadata !"val", metadata !1, i32 4, metadata !7, i32 0, null} ; [ DW_TAG_arg_variable ]
+!21 = metadata !{i32 786689, metadata !9, metadata !"c", metadata !1, i32 4, metadata !8, i32 0, null} ; [ DW_TAG_arg_variable ]
+!22 = metadata !{i32 786689, metadata !10, metadata !"argc", metadata !1, i32 17, metadata !5, i32 0, null} ; [ DW_TAG_arg_variable ]
+!23 = metadata !{i32 786689, metadata !10, metadata !"argv", metadata !1, i32 17, metadata !13, i32 0, null} ; [ DW_TAG_arg_variable ]
+!24 = metadata !{i32 786688, metadata !25, metadata !"dval", metadata !1, i32 19, metadata !7, i32 0, null} ; [ DW_TAG_auto_variable ]
+!25 = metadata !{i32 786443, metadata !1, metadata !10, i32 18, i32 0, i32 2} ; [ DW_TAG_lexical_block ]
 !26 = metadata !{i32 4, i32 0, metadata !9, null}
 !27 = metadata !{i32 6, i32 0, metadata !28, null}
-!28 = metadata !{i32 589835, metadata !9, i32 5, i32 0, metadata !1, i32 1} ; [ DW_TAG_lexical_block ]
+!28 = metadata !{i32 786443, metadata !1, metadata !9, i32 5, i32 0, i32 1} ; [ DW_TAG_lexical_block ]
 !29 = metadata !{i32 7, i32 0, metadata !28, null}
 !30 = metadata !{i32 11, i32 0, metadata !0, null}
 !31 = metadata !{i32 13, i32 0, metadata !32, null}
-!32 = metadata !{i32 589835, metadata !0, i32 12, i32 0, metadata !1, i32 0} ; [ DW_TAG_lexical_block ]
+!32 = metadata !{i32 786443, metadata !1, metadata !0, i32 12, i32 0, i32 0} ; [ DW_TAG_lexical_block ]
 !33 = metadata !{i32 14, i32 0, metadata !32, null}
 !34 = metadata !{i32 17, i32 0, metadata !10, null}
 !35 = metadata !{i32 19, i32 0, metadata !25, null}
@@ -103,3 +100,8 @@ declare i32 @puts(i8* nocapture) nounwind
 !39 = metadata !{i32 6, i32 0, metadata !28, metadata !37}
 !40 = metadata !{i32 22, i32 0, metadata !25, null}
 !41 = metadata !{i32 23, i32 0, metadata !25, null}
+!42 = metadata !{metadata !0, metadata !9, metadata !10}
+!43 = metadata !{metadata !16, metadata !17, metadata !18}
+!44 = metadata !{metadata !19, metadata !20, metadata !21}
+!45 = metadata !{metadata !22, metadata !23, metadata !24}
+!46 = metadata !{metadata !"a.c", metadata !"/tmp/"}
diff --git a/test/CodeGen/ARM/debug-info-qreg.ll b/test/CodeGen/ARM/debug-info-qreg.ll
index 97c9c66c58aa..038c2296cdbe 100644
--- a/test/CodeGen/ARM/debug-info-qreg.ll
+++ b/test/CodeGen/ARM/debug-info-qreg.ll
@@ -35,58 +35,61 @@ declare i32 @printf(i8* nocapture, ...) nounwind
 
 declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
 
-!llvm.dbg.sp = !{!0, !10, !14}
-!llvm.dbg.lv.test0001 = !{!18}
-!llvm.dbg.lv.main = !{!19, !20, !24, !26, !27, !28, !29}
-!llvm.dbg.lv.printFV = !{!30}
+!llvm.dbg.cu = !{!2}
 
-!0 = metadata !{i32 589870, i32 0, metadata !1, metadata !"test0001", metadata !"test0001", metadata !"", metadata !1, i32 3, metadata !3, i1 false, i1 true, i32 0, i32 0, i32 0, i32 256, i1 true, <4 x float> (float)* @test0001, null} ; [ DW_TAG_subprogram ]
-!1 = metadata !{i32 589865, metadata !"build2.c", metadata !"/private/tmp", metadata !2} ; [ DW_TAG_file_type ]
-!2 = metadata !{i32 589841, i32 0, i32 12, metadata !"build2.c", metadata !"/private/tmp", metadata !"clang version 3.0 (trunk 129915)", i1 true, i1 true, metadata !"", i32 0} ; [ DW_TAG_compile_unit ]
-!3 = metadata !{i32 589845, metadata !1, metadata !"", metadata !1, i32 0, i64 0, i64 0, i32 0, i32 0, i32 0, metadata !4, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
+!0 = metadata !{i32 786478, metadata !1, metadata !"test0001", metadata !"test0001", metadata !"", metadata !1, i32 3, metadata !3, i1 false, i1 true, i32 0, i32 0, i32 0, i32 256, i1 true, <4 x float> (float)* @test0001, null, null, metadata !51, i32 3} ; [ DW_TAG_subprogram ]
+!1 = metadata !{i32 786473, metadata !54} ; [ DW_TAG_file_type ]
+!2 = metadata !{i32 786449, metadata !54, i32 12, metadata !"clang version 3.0 (trunk 129915)", i1 true, metadata !"", i32 0, null, null, metadata !50, null, null} ; [ DW_TAG_compile_unit ]
+!3 = metadata !{i32 786453, metadata !54, metadata !1, metadata !"", i32 0, i64 0, i64 0, i32 0, i32 0, i32 0, metadata !4, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
 !4 = metadata !{metadata !5}
-!5 = metadata !{i32 589846, metadata !2, metadata !"v4f32", metadata !1, i32 14, i64 0, i64 0, i64 0, i32 0, metadata !6} ; [ DW_TAG_typedef ]
-!6 = metadata !{i32 590083, metadata !2, metadata !"", metadata !2, i32 0, i64 128, i64 128, i32 0, i32 0, metadata !7, metadata !8, i32 0, i32 0} ; [ DW_TAG_vector_type ]
-!7 = metadata !{i32 589860, metadata !2, metadata !"float", null, i32 0, i64 32, i64 32, i64 0, i32 0, i32 4} ; [ DW_TAG_base_type ]
+!5 = metadata !{i32 786454, metadata !54, metadata !2, metadata !"v4f32", i32 14, i64 0, i64 0, i64 0, i32 0, metadata !6} ; [ DW_TAG_typedef ]
+!6 = metadata !{i32 786691, metadata !2, metadata !"", metadata !2, i32 0, i64 128, i64 128, i32 0, i32 0, metadata !7, metadata !8, i32 0, i32 0} ; [ DW_TAG_vector_type ]
+!7 = metadata !{i32 786468, null, metadata !2, metadata !"float", i32 0, i64 32, i64 32, i64 0, i32 0, i32 4} ; [ DW_TAG_base_type ]
 !8 = metadata !{metadata !9}
-!9 = metadata !{i32 589857, i64 0, i64 3}         ; [ DW_TAG_subrange_type ]
-!10 = metadata !{i32 589870, i32 0, metadata !1, metadata !"main", metadata !"main", metadata !"", metadata !1, i32 59, metadata !11, i1 false, i1 true, i32 0, i32 0, i32 0, i32 256, i1 true, i32 (i32, i8**)* @main, null} ; [ DW_TAG_subprogram ]
-!11 = metadata !{i32 589845, metadata !1, metadata !"", metadata !1, i32 0, i64 0, i64 0, i32 0, i32 0, i32 0, metadata !12, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
+!9 = metadata !{i32 786465, i64 0, i64 4}         ; [ DW_TAG_subrange_type ]
+!10 = metadata !{i32 786478, metadata !1, metadata !"main", metadata !"main", metadata !"", metadata !1, i32 59, metadata !11, i1 false, i1 true, i32 0, i32 0, i32 0, i32 256, i1 true, i32 (i32, i8**)* @main, null, null, metadata !52, i32 59} ; [ DW_TAG_subprogram ]
+!11 = metadata !{i32 786453, metadata !54, metadata !1, metadata !"", i32 0, i64 0, i64 0, i32 0, i32 0, i32 0, metadata !12, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
 !12 = metadata !{metadata !13}
-!13 = metadata !{i32 589860, metadata !2, metadata !"int", null, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
-!14 = metadata !{i32 589870, i32 0, metadata !15, metadata !"printFV", metadata !"printFV", metadata !"", metadata !15, i32 41, metadata !16, i1 true, i1 true, i32 0, i32 0, i32 0, i32 256, i1 true, null, null} ; [ DW_TAG_subprogram ]
-!15 = metadata !{i32 589865, metadata !"/Volumes/Lalgate/work/llvm/projects/llvm-test/SingleSource/UnitTests/Vector/helpers.h", metadata !"/private/tmp", metadata !2} ; [ DW_TAG_file_type ]
-!16 = metadata !{i32 589845, metadata !15, metadata !"", metadata !15, i32 0, i64 0, i64 0, i32 0, i32 0, i32 0, metadata !17, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
+!13 = metadata !{i32 786468, null, metadata !2, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
+!14 = metadata !{i32 786478, metadata !15, metadata !"printFV", metadata !"printFV", metadata !"", metadata !15, i32 41, metadata !16, i1 true, i1 true, i32 0, i32 0, i32 0, i32 256, i1 true, null, null, null, metadata !53, i32 41} ; [ DW_TAG_subprogram ]
+!15 = metadata !{i32 786473, metadata !55} ; [ DW_TAG_file_type ]
+!16 = metadata !{i32 786453, metadata !55, metadata !15, metadata !"", i32 0, i64 0, i64 0, i32 0, i32 0, i32 0, metadata !17, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
 !17 = metadata !{null}
-!18 = metadata !{i32 590081, metadata !0, metadata !"a", metadata !1, i32 16777219, metadata !7, i32 0} ; [ DW_TAG_arg_variable ]
-!19 = metadata !{i32 590081, metadata !10, metadata !"argc", metadata !1, i32 16777275, metadata !13, i32 0} ; [ DW_TAG_arg_variable ]
-!20 = metadata !{i32 590081, metadata !10, metadata !"argv", metadata !1, i32 33554491, metadata !21, i32 0} ; [ DW_TAG_arg_variable ]
-!21 = metadata !{i32 589839, metadata !2, metadata !"", null, i32 0, i64 32, i64 32, i64 0, i32 0, metadata !22} ; [ DW_TAG_pointer_type ]
-!22 = metadata !{i32 589839, metadata !2, metadata !"", null, i32 0, i64 32, i64 32, i64 0, i32 0, metadata !23} ; [ DW_TAG_pointer_type ]
-!23 = metadata !{i32 589860, metadata !2, metadata !"char", null, i32 0, i64 8, i64 8, i64 0, i32 0, i32 6} ; [ DW_TAG_base_type ]
-!24 = metadata !{i32 590080, metadata !25, metadata !"i", metadata !1, i32 60, metadata !13, i32 0} ; [ DW_TAG_auto_variable ]
-!25 = metadata !{i32 589835, metadata !10, i32 59, i32 33, metadata !1, i32 14} ; [ DW_TAG_lexical_block ]
-!26 = metadata !{i32 590080, metadata !25, metadata !"j", metadata !1, i32 60, metadata !13, i32 0} ; [ DW_TAG_auto_variable ]
-!27 = metadata !{i32 590080, metadata !25, metadata !"x", metadata !1, i32 61, metadata !5, i32 0} ; [ DW_TAG_auto_variable ]
-!28 = metadata !{i32 590080, metadata !25, metadata !"y", metadata !1, i32 62, metadata !5, i32 0} ; [ DW_TAG_auto_variable ]
-!29 = metadata !{i32 590080, metadata !25, metadata !"z", metadata !1, i32 63, metadata !5, i32 0} ; [ DW_TAG_auto_variable ]
-!30 = metadata !{i32 590081, metadata !14, metadata !"F", metadata !15, i32 16777257, metadata !31, i32 0} ; [ DW_TAG_arg_variable ]
-!31 = metadata !{i32 589839, metadata !2, metadata !"", null, i32 0, i64 32, i64 32, i64 0, i32 0, metadata !32} ; [ DW_TAG_pointer_type ]
-!32 = metadata !{i32 589846, metadata !2, metadata !"FV", metadata !15, i32 25, i64 0, i64 0, i64 0, i32 0, metadata !33} ; [ DW_TAG_typedef ]
-!33 = metadata !{i32 589847, metadata !2, metadata !"", metadata !15, i32 22, i64 128, i64 128, i64 0, i32 0, i32 0, metadata !34, i32 0, i32 0} ; [ DW_TAG_union_type ]
+!18 = metadata !{i32 786689, metadata !0, metadata !"a", metadata !1, i32 16777219, metadata !7, i32 0, null} ; [ DW_TAG_arg_variable ]
+!19 = metadata !{i32 786689, metadata !10, metadata !"argc", metadata !1, i32 16777275, metadata !13, i32 0, null} ; [ DW_TAG_arg_variable ]
+!20 = metadata !{i32 786689, metadata !10, metadata !"argv", metadata !1, i32 33554491, metadata !21, i32 0, null} ; [ DW_TAG_arg_variable ]
+!21 = metadata !{i32 786447, null, metadata !2, metadata !"", i32 0, i64 32, i64 32, i64 0, i32 0, metadata !22} ; [ DW_TAG_pointer_type ]
+!22 = metadata !{i32 786447, null, metadata !2, metadata !"", i32 0, i64 32, i64 32, i64 0, i32 0, metadata !23} ; [ DW_TAG_pointer_type ]
+!23 = metadata !{i32 786468, null, metadata !2, metadata !"char", i32 0, i64 8, i64 8, i64 0, i32 0, i32 6} ; [ DW_TAG_base_type ]
+!24 = metadata !{i32 786688, metadata !25, metadata !"i", metadata !1, i32 60, metadata !13, i32 0, null} ; [ DW_TAG_auto_variable ]
+!25 = metadata !{i32 786443, metadata !1, metadata !10, i32 59, i32 33, i32 14} ; [ DW_TAG_lexical_block ]
+!26 = metadata !{i32 786688, metadata !25, metadata !"j", metadata !1, i32 60, metadata !13, i32 0, null} ; [ DW_TAG_auto_variable ]
+!27 = metadata !{i32 786688, metadata !25, metadata !"x", metadata !1, i32 61, metadata !5, i32 0, null} ; [ DW_TAG_auto_variable ]
+!28 = metadata !{i32 786688, metadata !25, metadata !"y", metadata !1, i32 62, metadata !5, i32 0, null} ; [ DW_TAG_auto_variable ]
+!29 = metadata !{i32 786688, metadata !25, metadata !"z", metadata !1, i32 63, metadata !5, i32 0, null} ; [ DW_TAG_auto_variable ]
+!30 = metadata !{i32 786689, metadata !14, metadata !"F", metadata !15, i32 16777257, metadata !31, i32 0, null} ; [ DW_TAG_arg_variable ]
+!31 = metadata !{i32 786447, null, metadata !2, metadata !"", i32 0, i64 32, i64 32, i64 0, i32 0, metadata !32} ; [ DW_TAG_pointer_type ]
+!32 = metadata !{i32 786454, metadata !55, metadata !2, metadata !"FV", i32 25, i64 0, i64 0, i64 0, i32 0, metadata !33} ; [ DW_TAG_typedef ]
+!33 = metadata !{i32 786455, metadata !55, metadata !2, metadata !"", i32 22, i64 128, i64 128, i64 0, i32 0, i32 0, metadata !34, i32 0, i32 0} ; [ DW_TAG_union_type ]
 !34 = metadata !{metadata !35, metadata !37}
-!35 = metadata !{i32 589837, metadata !15, metadata !"V", metadata !15, i32 23, i64 128, i64 128, i64 0, i32 0, metadata !36} ; [ DW_TAG_member ]
-!36 = metadata !{i32 589846, metadata !2, metadata !"v4sf", metadata !15, i32 3, i64 0, i64 0, i64 0, i32 0, metadata !6} ; [ DW_TAG_typedef ]
-!37 = metadata !{i32 589837, metadata !15, metadata !"A", metadata !15, i32 24, i64 128, i64 32, i64 0, i32 0, metadata !38} ; [ DW_TAG_member ]
-!38 = metadata !{i32 589825, metadata !2, metadata !"", metadata !2, i32 0, i64 128, i64 32, i32 0, i32 0, metadata !7, metadata !8, i32 0, i32 0} ; [ DW_TAG_array_type ]
+!35 = metadata !{i32 786445, metadata !55, metadata !15, metadata !"V", i32 23, i64 128, i64 128, i64 0, i32 0, metadata !36} ; [ DW_TAG_member ]
+!36 = metadata !{i32 786454, metadata !55, metadata !2, metadata !"v4sf", i32 3, i64 0, i64 0, i64 0, i32 0, metadata !6} ; [ DW_TAG_typedef ]
+!37 = metadata !{i32 786445, metadata !55, metadata !15, metadata !"A", i32 24, i64 128, i64 32, i64 0, i32 0, metadata !38} ; [ DW_TAG_member ]
+!38 = metadata !{i32 786433, null, metadata !2, metadata !"", i32 0, i64 128, i64 32, i32 0, i32 0, metadata !7, metadata !8, i32 0, i32 0} ; [ DW_TAG_array_type ]
 !39 = metadata !{i32 79, i32 7, metadata !40, null}
-!40 = metadata !{i32 589835, metadata !41, i32 75, i32 35, metadata !1, i32 18} ; [ DW_TAG_lexical_block ]
-!41 = metadata !{i32 589835, metadata !42, i32 75, i32 5, metadata !1, i32 17} ; [ DW_TAG_lexical_block ]
-!42 = metadata !{i32 589835, metadata !43, i32 71, i32 32, metadata !1, i32 16} ; [ DW_TAG_lexical_block ]
-!43 = metadata !{i32 589835, metadata !25, i32 71, i32 3, metadata !1, i32 15} ; [ DW_TAG_lexical_block ]
+!40 = metadata !{i32 786443, metadata !1, metadata !41, i32 75, i32 35, i32 18} ; [ DW_TAG_lexical_block ]
+!41 = metadata !{i32 786443, metadata !1, metadata !42, i32 75, i32 5, i32 17} ; [ DW_TAG_lexical_block ]
+!42 = metadata !{i32 786443, metadata !1, metadata !43, i32 71, i32 32, i32 16} ; [ DW_TAG_lexical_block ]
+!43 = metadata !{i32 786443, metadata !1, metadata !25, i32 71, i32 3, i32 15} ; [ DW_TAG_lexical_block ]
 !44 = metadata !{i32 75, i32 5, metadata !42, null}
 !45 = metadata !{i32 42, i32 2, metadata !46, metadata !48}
-!46 = metadata !{i32 589835, metadata !47, i32 42, i32 2, metadata !15, i32 20} ; [ DW_TAG_lexical_block ]
-!47 = metadata !{i32 589835, metadata !14, i32 41, i32 28, metadata !15, i32 19} ; [ DW_TAG_lexical_block ]
+!46 = metadata !{i32 786443, metadata !15, metadata !47, i32 42, i32 2, i32 20} ; [ DW_TAG_lexical_block ]
+!47 = metadata !{i32 786443, metadata !15, metadata !14, i32 41, i32 28, i32 19} ; [ DW_TAG_lexical_block ]
 !48 = metadata !{i32 95, i32 3, metadata !25, null}
 !49 = metadata !{i32 99, i32 3, metadata !25, null}
+!50 = metadata !{metadata !0, metadata !10, metadata !14}
+!51 = metadata !{metadata !18}
+!52 = metadata !{metadata !19, metadata !20, metadata !24, metadata !26, metadata !27, metadata !28, metadata !29}
+!53 = metadata !{metadata !30}
+!54 = metadata !{metadata !"build2.c", metadata !"/private/tmp"}
+!55 = metadata !{metadata !"/Volumes/Lalgate/work/llvm/projects/llvm-test/SingleSource/UnitTests/Vector/helpers.h", metadata !"/private/tmp"}
diff --git a/test/CodeGen/ARM/debug-info-s16-reg.ll b/test/CodeGen/ARM/debug-info-s16-reg.ll
index db41143fb3b1..f3af0b93c69c 100644
--- a/test/CodeGen/ARM/debug-info-s16-reg.ll
+++ b/test/CodeGen/ARM/debug-info-s16-reg.ll
@@ -61,46 +61,43 @@ declare i32 @puts(i8* nocapture) nounwind optsize
 
 declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
 
-!llvm.dbg.sp = !{!0, !6, !7}
-!llvm.dbg.lv.inlineprinter = !{!8, !10, !12}
-!llvm.dbg.lv.printer = !{!14, !15, !16}
-!llvm.dbg.lv.main = !{!17, !18, !22}
+!llvm.dbg.cu = !{!2}
 
-!0 = metadata !{i32 589870, i32 0, metadata !1, metadata !"inlineprinter", metadata !"inlineprinter", metadata !"", metadata !1, i32 5, metadata !3, i1 false, i1 true, i32 0, i32 0, i32 0, i32 256, i1 true, i32 (i8*, float, i8)* @inlineprinter, null} ; [ DW_TAG_subprogram ]
-!1 = metadata !{i32 589865, metadata !"a.c", metadata !"/private/tmp", metadata !2} ; [ DW_TAG_file_type ]
-!2 = metadata !{i32 589841, i32 0, i32 12, metadata !"a.c", metadata !"/private/tmp", metadata !"clang version 3.0 (trunk 129915)", i1 true, i1 true, metadata !"", i32 0} ; [ DW_TAG_compile_unit ]
-!3 = metadata !{i32 589845, metadata !1, metadata !"", metadata !1, i32 0, i64 0, i64 0, i32 0, i32 0, i32 0, metadata !4, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
+!0 = metadata !{i32 786478, metadata !1, metadata !"inlineprinter", metadata !"inlineprinter", metadata !"", metadata !1, i32 5, metadata !3, i1 false, i1 true, i32 0, i32 0, i32 0, i32 256, i1 true, i32 (i8*, float, i8)* @inlineprinter, null, null, metadata !48, i32 5} ; [ DW_TAG_subprogram ]
+!1 = metadata !{i32 786473, metadata !51} ; [ DW_TAG_file_type ]
+!2 = metadata !{i32 786449, i32 12, metadata !1, metadata !"clang version 3.0 (trunk 129915)", i1 true, metadata !"", i32 0, null, null, metadata !47, null, null} ; [ DW_TAG_compile_unit ]
+!3 = metadata !{i32 786453, metadata !1, metadata !"", metadata !1, i32 0, i64 0, i64 0, i32 0, i32 0, i32 0, metadata !4, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
 !4 = metadata !{metadata !5}
-!5 = metadata !{i32 589860, metadata !2, metadata !"int", null, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
-!6 = metadata !{i32 589870, i32 0, metadata !1, metadata !"printer", metadata !"printer", metadata !"", metadata !1, i32 12, metadata !3, i1 false, i1 true, i32 0, i32 0, i32 0, i32 256, i1 true, i32 (i8*, float, i8)* @printer, null} ; [ DW_TAG_subprogram ]
-!7 = metadata !{i32 589870, i32 0, metadata !1, metadata !"main", metadata !"main", metadata !"", metadata !1, i32 18, metadata !3, i1 false, i1 true, i32 0, i32 0, i32 0, i32 256, i1 true, i32 (i32, i8**)* @main, null} ; [ DW_TAG_subprogram ]
-!8 = metadata !{i32 590081, metadata !0, metadata !"ptr", metadata !1, i32 16777220, metadata !9, i32 0} ; [ DW_TAG_arg_variable ]
-!9 = metadata !{i32 589839, metadata !2, metadata !"", null, i32 0, i64 32, i64 32, i64 0, i32 0, null} ; [ DW_TAG_pointer_type ]
-!10 = metadata !{i32 590081, metadata !0, metadata !"val", metadata !1, i32 33554436, metadata !11, i32 0} ; [ DW_TAG_arg_variable ]
-!11 = metadata !{i32 589860, metadata !2, metadata !"float", null, i32 0, i64 32, i64 32, i64 0, i32 0, i32 4} ; [ DW_TAG_base_type ]
-!12 = metadata !{i32 590081, metadata !0, metadata !"c", metadata !1, i32 50331652, metadata !13, i32 0} ; [ DW_TAG_arg_variable ]
-!13 = metadata !{i32 589860, metadata !2, metadata !"unsigned char", null, i32 0, i64 8, i64 8, i64 0, i32 0, i32 8} ; [ DW_TAG_base_type ]
-!14 = metadata !{i32 590081, metadata !6, metadata !"ptr", metadata !1, i32 16777227, metadata !9, i32 0} ; [ DW_TAG_arg_variable ]
-!15 = metadata !{i32 590081, metadata !6, metadata !"val", metadata !1, i32 33554443, metadata !11, i32 0} ; [ DW_TAG_arg_variable ]
-!16 = metadata !{i32 590081, metadata !6, metadata !"c", metadata !1, i32 50331659, metadata !13, i32 0} ; [ DW_TAG_arg_variable ]
-!17 = metadata !{i32 590081, metadata !7, metadata !"argc", metadata !1, i32 16777233, metadata !5, i32 0} ; [ DW_TAG_arg_variable ]
-!18 = metadata !{i32 590081, metadata !7, metadata !"argv", metadata !1, i32 33554449, metadata !19, i32 0} ; [ DW_TAG_arg_variable ]
-!19 = metadata !{i32 589839, metadata !2, metadata !"", null, i32 0, i64 32, i64 32, i64 0, i32 0, metadata !20} ; [ DW_TAG_pointer_type ]
-!20 = metadata !{i32 589839, metadata !2, metadata !"", null, i32 0, i64 32, i64 32, i64 0, i32 0, metadata !21} ; [ DW_TAG_pointer_type ]
-!21 = metadata !{i32 589860, metadata !2, metadata !"char", null, i32 0, i64 8, i64 8, i64 0, i32 0, i32 6} ; [ DW_TAG_base_type ]
-!22 = metadata !{i32 590080, metadata !23, metadata !"dval", metadata !1, i32 19, metadata !11, i32 0} ; [ DW_TAG_auto_variable ]
-!23 = metadata !{i32 589835, metadata !7, i32 18, i32 1, metadata !1, i32 2} ; [ DW_TAG_lexical_block ]
+!5 = metadata !{i32 786468, metadata !2, metadata !"int", null, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
+!6 = metadata !{i32 786478, metadata !1, metadata !"printer", metadata !"printer", metadata !"", metadata !1, i32 12, metadata !3, i1 false, i1 true, i32 0, i32 0, i32 0, i32 256, i1 true, i32 (i8*, float, i8)* @printer, null, null, metadata !49, i32 12} ; [ DW_TAG_subprogram ]
+!7 = metadata !{i32 786478, metadata !1, metadata !"main", metadata !"main", metadata !"", metadata !1, i32 18, metadata !3, i1 false, i1 true, i32 0, i32 0, i32 0, i32 256, i1 true, i32 (i32, i8**)* @main, null, null, metadata !50, i32 18} ; [ DW_TAG_subprogram ]
+!8 = metadata !{i32 786689, metadata !0, metadata !"ptr", metadata !1, i32 16777220, metadata !9, i32 0, null} ; [ DW_TAG_arg_variable ]
+!9 = metadata !{i32 786447, metadata !2, metadata !"", null, i32 0, i64 32, i64 32, i64 0, i32 0, null} ; [ DW_TAG_pointer_type ]
+!10 = metadata !{i32 786689, metadata !0, metadata !"val", metadata !1, i32 33554436, metadata !11, i32 0, null} ; [ DW_TAG_arg_variable ]
+!11 = metadata !{i32 786468, metadata !2, metadata !"float", null, i32 0, i64 32, i64 32, i64 0, i32 0, i32 4} ; [ DW_TAG_base_type ]
+!12 = metadata !{i32 786689, metadata !0, metadata !"c", metadata !1, i32 50331652, metadata !13, i32 0, null} ; [ DW_TAG_arg_variable ]
+!13 = metadata !{i32 786468, metadata !2, metadata !"unsigned char", null, i32 0, i64 8, i64 8, i64 0, i32 0, i32 8} ; [ DW_TAG_base_type ]
+!14 = metadata !{i32 786689, metadata !6, metadata !"ptr", metadata !1, i32 16777227, metadata !9, i32 0, null} ; [ DW_TAG_arg_variable ]
+!15 = metadata !{i32 786689, metadata !6, metadata !"val", metadata !1, i32 33554443, metadata !11, i32 0, null} ; [ DW_TAG_arg_variable ]
+!16 = metadata !{i32 786689, metadata !6, metadata !"c", metadata !1, i32 50331659, metadata !13, i32 0, null} ; [ DW_TAG_arg_variable ]
+!17 = metadata !{i32 786689, metadata !7, metadata !"argc", metadata !1, i32 16777233, metadata !5, i32 0, null} ; [ DW_TAG_arg_variable ]
+!18 = metadata !{i32 786689, metadata !7, metadata !"argv", metadata !1, i32 33554449, metadata !19, i32 0, null} ; [ DW_TAG_arg_variable ]
+!19 = metadata !{i32 786447, metadata !2, metadata !"", null, i32 0, i64 32, i64 32, i64 0, i32 0, metadata !20} ; [ DW_TAG_pointer_type ]
+!20 = metadata !{i32 786447, metadata !2, metadata !"", null, i32 0, i64 32, i64 32, i64 0, i32 0, metadata !21} ; [ DW_TAG_pointer_type ]
+!21 = metadata !{i32 786468, metadata !2, metadata !"char", null, i32 0, i64 8, i64 8, i64 0, i32 0, i32 6} ; [ DW_TAG_base_type ]
+!22 = metadata !{i32 786688, metadata !23, metadata !"dval", metadata !1, i32 19, metadata !11, i32 0, null} ; [ DW_TAG_auto_variable ]
+!23 = metadata !{i32 786443, metadata !1, metadata !7, i32 18, i32 1, i32 2} ; [ DW_TAG_lexical_block ]
 !24 = metadata !{i32 4, i32 22, metadata !0, null}
 !25 = metadata !{i32 4, i32 33, metadata !0, null}
 !26 = metadata !{i32 4, i32 52, metadata !0, null}
 !27 = metadata !{i32 6, i32 3, metadata !28, null}
-!28 = metadata !{i32 589835, metadata !0, i32 5, i32 1, metadata !1, i32 0} ; [ DW_TAG_lexical_block ]
+!28 = metadata !{i32 786443, metadata !1, metadata !0, i32 5, i32 1, i32 0} ; [ DW_TAG_lexical_block ]
 !29 = metadata !{i32 7, i32 3, metadata !28, null}
 !30 = metadata !{i32 11, i32 42, metadata !6, null}
 !31 = metadata !{i32 11, i32 53, metadata !6, null}
 !32 = metadata !{i32 11, i32 72, metadata !6, null}
 !33 = metadata !{i32 13, i32 3, metadata !34, null}
-!34 = metadata !{i32 589835, metadata !6, i32 12, i32 1, metadata !1, i32 1} ; [ DW_TAG_lexical_block ]
+!34 = metadata !{i32 786443, metadata !1, metadata !6, i32 12, i32 1, i32 1} ; [ DW_TAG_lexical_block ]
 !35 = metadata !{i32 14, i32 3, metadata !34, null}
 !36 = metadata !{i32 17, i32 15, metadata !7, null}
 !37 = metadata !{i32 17, i32 28, metadata !7, null}
@@ -113,3 +110,8 @@ declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
 !44 = metadata !{i32 6, i32 3, metadata !28, metadata !40}
 !45 = metadata !{i32 22, i32 3, metadata !23, null}
 !46 = metadata !{i32 23, i32 1, metadata !23, null}
+!47 = metadata !{metadata !0, metadata !6, metadata !7}
+!48 = metadata !{metadata !8, metadata !10, metadata !12}
+!49 = metadata !{metadata !14, metadata !15, metadata !16}
+!50 = metadata !{metadata !17, metadata !18, metadata !22}
+!51 = metadata !{metadata !"a.c", metadata !"/private/tmp"}
diff --git a/test/CodeGen/ARM/debug-info-sreg2.ll b/test/CodeGen/ARM/debug-info-sreg2.ll
index ae7af0afad50..ae02a245b432 100644
--- a/test/CodeGen/ARM/debug-info-sreg2.ll
+++ b/test/CodeGen/ARM/debug-info-sreg2.ll
@@ -40,22 +40,23 @@ declare float @_Z2f3f(float) optsize
 declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
 
 !llvm.dbg.cu = !{!0}
-!llvm.dbg.sp = !{!1}
-!llvm.dbg.lv._Z3foov = !{!5, !8}
 
-!0 = metadata !{i32 589841, i32 0, i32 4, metadata !"k.cc", metadata !"/private/tmp", metadata !"clang version 3.0 (trunk 130845)", i1 true, i1 true, metadata !"", i32 0} ; [ DW_TAG_compile_unit ]
-!1 = metadata !{i32 589870, i32 0, metadata !2, metadata !"foo", metadata !"foo", metadata !"_Z3foov", metadata !2, i32 5, metadata !3, i1 false, i1 true, i32 0, i32 0, i32 0, i32 256, i1 true, void ()* @_Z3foov, null, null} ; [ DW_TAG_subprogram ]
-!2 = metadata !{i32 589865, metadata !"k.cc", metadata !"/private/tmp", metadata !0} ; [ DW_TAG_file_type ]
-!3 = metadata !{i32 589845, metadata !2, metadata !"", metadata !2, i32 0, i64 0, i64 0, i32 0, i32 0, i32 0, metadata !4, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
+!0 = metadata !{i32 786449, i32 4, metadata !2, metadata !"clang version 3.0 (trunk 130845)", i1 true, metadata !"", i32 0, null, null, metadata !16, null, null} ; [ DW_TAG_compile_unit ]
+!1 = metadata !{i32 786478, metadata !2, metadata !"foo", metadata !"foo", metadata !"_Z3foov", metadata !2, i32 5, metadata !3, i1 false, i1 true, i32 0, i32 0, i32 0, i32 256, i1 true, void ()* @_Z3foov, null, null, metadata !17, i32 5} ; [ DW_TAG_subprogram ]
+!2 = metadata !{i32 786473, metadata !18} ; [ DW_TAG_file_type ]
+!3 = metadata !{i32 786453, metadata !2, metadata !"", metadata !2, i32 0, i64 0, i64 0, i32 0, i32 0, i32 0, metadata !4, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
 !4 = metadata !{null}
-!5 = metadata !{i32 590080, metadata !6, metadata !"k", metadata !2, i32 6, metadata !7, i32 0} ; [ DW_TAG_auto_variable ]
-!6 = metadata !{i32 589835, metadata !1, i32 5, i32 12, metadata !2, i32 0} ; [ DW_TAG_lexical_block ]
-!7 = metadata !{i32 589860, metadata !0, metadata !"float", null, i32 0, i64 32, i64 32, i64 0, i32 0, i32 4} ; [ DW_TAG_base_type ]
-!8 = metadata !{i32 590080, metadata !9, metadata !"y", metadata !2, i32 8, metadata !7, i32 0} ; [ DW_TAG_auto_variable ]
-!9 = metadata !{i32 589835, metadata !10, i32 7, i32 25, metadata !2, i32 2} ; [ DW_TAG_lexical_block ]
-!10 = metadata !{i32 589835, metadata !6, i32 7, i32 3, metadata !2, i32 1} ; [ DW_TAG_lexical_block ]
+!5 = metadata !{i32 786688, metadata !6, metadata !"k", metadata !2, i32 6, metadata !7, i32 0, null} ; [ DW_TAG_auto_variable ]
+!6 = metadata !{i32 786443, metadata !2, metadata !1, i32 5, i32 12, i32 0} ; [ DW_TAG_lexical_block ]
+!7 = metadata !{i32 786468, metadata !0, metadata !"float", null, i32 0, i64 32, i64 32, i64 0, i32 0, i32 4} ; [ DW_TAG_base_type ]
+!8 = metadata !{i32 786688, metadata !9, metadata !"y", metadata !2, i32 8, metadata !7, i32 0, null} ; [ DW_TAG_auto_variable ]
+!9 = metadata !{i32 786443, metadata !2, metadata !10, i32 7, i32 25, i32 2} ; [ DW_TAG_lexical_block ]
+!10 = metadata !{i32 786443, metadata !2, metadata !6, i32 7, i32 3, i32 1} ; [ DW_TAG_lexical_block ]
 !11 = metadata !{i32 6, i32 18, metadata !6, null}
 !12 = metadata !{i32 7, i32 3, metadata !6, null}
 !13 = metadata !{i32 8, i32 20, metadata !9, null}
 !14 = metadata !{i32 7, i32 20, metadata !10, null}
 !15 = metadata !{i32 10, i32 1, metadata !6, null}
+!16 = metadata !{metadata !1}
+!17 = metadata !{metadata !5, metadata !8}
+!18 = metadata !{metadata !"k.cc", metadata !"/private/tmp"}
diff --git a/test/CodeGen/ARM/domain-conv-vmovs.ll b/test/CodeGen/ARM/domain-conv-vmovs.ll
index a5c41144584c..b5586cc99fc1 100644
--- a/test/CodeGen/ARM/domain-conv-vmovs.ll
+++ b/test/CodeGen/ARM/domain-conv-vmovs.ll
@@ -78,7 +78,7 @@ define float @test_ineligible(float, float %in) {
   ; use-def chains would be messed up. Primarily a compile-test (we used to
   ; internal fault).
   call void @bar()
-; CHECL: bl bar
+; CHECK: bl bar
 ; CHECK: vext.32
 ; CHECK: vext.32
   ret float %val
@@ -98,3 +98,23 @@ define i32 @test_vmovs_no_sreg(i32 %in) {
 
   ret i32 %resi
 }
+
+
+; The point of this test is:
+;   + Make sure s1 is live before the BL
+;   + Make sure s1 is clobbered by the BL
+;   + Convince LLVM to emit a VMOV to S0
+;   + Convince LLVM to domain-convert this.
+
+; When all of those are satisfied, LLVM should *not* mark s1 as an implicit-use
+; because it's dead.
+
+declare float @clobbers_s1(float, float)
+
+define <2 x float> @test_clobbers_recognised(<2 x float> %invec, float %val) {
+  %elt = call float @clobbers_s1(float %val, float %val)
+
+  %vec = insertelement <2 x float> %invec, float %elt, i32 0
+  %res = fadd <2 x float> %vec, %vec
+  ret <2 x float> %res
+}
diff --git a/test/CodeGen/ARM/eh-dispcont.ll b/test/CodeGen/ARM/eh-dispcont.ll
new file mode 100644
index 000000000000..935965bbdf8b
--- /dev/null
+++ b/test/CodeGen/ARM/eh-dispcont.ll
@@ -0,0 +1,89 @@
+; RUN: llc -mtriple armv7-apple-ios -relocation-model=pic -o - %s | FileCheck %s -check-prefix=ARM-PIC
+; RUN: llc -mtriple armv7-apple-ios -relocation-model=static -o - %s | FileCheck %s -check-prefix=ARM-NOPIC
+; RUN: llc -mtriple armv7-apple-ios -relocation-model=dynamic-no-pic -o - %s | FileCheck %s -check-prefix=ARM-NOPIC
+; RUN: llc -mtriple thumbv6-apple-ios -relocation-model=pic -o - %s | FileCheck %s -check-prefix=THUMB1-PIC
+; RUN: llc -mtriple thumbv6-apple-ios -relocation-model=static -o - %s | FileCheck %s -check-prefix=THUMB1-NOPIC
+; RUN: llc -mtriple thumbv6-apple-ios -relocation-model=dynamic-no-pic -o - %s | FileCheck %s -check-prefix=THUMB1-NOPIC
+
+@_ZTIi = external constant i8*
+
+define i32 @main() #0 {
+entry:
+  %exception = tail call i8* @__cxa_allocate_exception(i32 4) #1
+  %0 = bitcast i8* %exception to i32*
+  store i32 1, i32* %0, align 4
+  invoke void @__cxa_throw(i8* %exception, i8* bitcast (i8** @_ZTIi to i8*), i8* null) #2
+          to label %unreachable unwind label %lpad
+
+lpad:                                             ; preds = %entry
+  %1 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_sj0 to i8*)
+          catch i8* null
+  %2 = extractvalue { i8*, i32 } %1, 0
+  %3 = tail call i8* @__cxa_begin_catch(i8* %2) #1
+  tail call void @__cxa_end_catch()
+  ret i32 0
+
+unreachable:                                      ; preds = %entry
+  unreachable
+}
+
+declare i8* @__cxa_allocate_exception(i32)
+
+declare void @__cxa_throw(i8*, i8*, i8*)
+
+declare i8* @__cxa_begin_catch(i8*)
+
+declare void @__cxa_end_catch()
+
+declare i32 @__gxx_personality_sj0(...)
+
+attributes #0 = { ssp }
+attributes #1 = { nounwind }
+attributes #2 = { noreturn }
+
+; ARM-PIC: cxa_throw
+; ARM-PIC: trap
+; ARM-PIC: adr [[REG1:r[0-9]+]], [[LJTI:.*]]
+; ARM-PIC: ldr [[REG0:r[0-9]+]], [r{{[0-9]+}}, [[REG1]]]
+; ARM-PIC: add pc, [[REG0]], [[REG1]]
+; ARM-PIC: [[LJTI]]
+; ARM-PIC: .data_region jt32
+; ARM-PIC: .long [[LABEL:LBB0_[0-9]]]-[[LJTI]]
+; ARM-PIC: .end_data_region
+; ARM-PIC: [[LABEL]]
+
+; ARM-NOPIC: cxa_throw
+; ARM-NOPIC: trap
+; ARM-NOPIC: adr [[REG1:r[0-9]+]], [[LJTI:.*]]
+; ARM-NOPIC: ldr [[REG0:r[0-9]+]], [r{{[0-9]+}}, [[REG1]]]
+; ARM-NOPIC: mov pc, [[REG0]]
+; ARM-NOPIC: [[LJTI]]
+; ARM-NOPIC: .data_region jt32
+; ARM-NOPIC: .long [[LABEL:LBB0_[0-9]]]
+; ARM-NOPIC: .end_data_region
+; ARM-NOPIC: [[LABEL]]
+
+; THUMB1-PIC: cxa_throw
+; THUMB1-PIC: trap
+; THUMB1-PIC: adr [[REG0:r[0-9]+]], [[LJTI:.*]]
+; THUMB1-PIC: adds [[REG1:r[0-9]+]], [[REG1]], [[REG0]]
+; THUMB1-PIC: ldr [[REG1]]
+; THUMB1-PIC: adds [[REG0]], [[REG1]], [[REG0]]
+; THUMB1-PIC: mov pc, [[REG0]]
+; THUMB1-PIC: [[LJTI]]
+; THUMB1-PIC: .data_region jt32
+; THUMB1-PIC: .long [[LABEL:LBB0_[0-9]]]-[[LJTI]]
+; THUMB1-PIC: .end_data_region
+; THUMB1-PIC: [[LABEL]]
+
+; THUMB1-NOPIC: cxa_throw
+; THUMB1-NOPIC: trap
+; THUMB1-NOPIC: adr [[REG1:r[0-9]+]], [[LJTI:.*]]
+; THUMB1-NOPIC: adds [[REG0:r[0-9]+]], [[REG0]], [[REG1]]
+; THUMB1-NOPIC: ldr [[REG0]]
+; THUMB1-NOPIC: mov pc, [[REG0]]
+; THUMB1-NOPIC: [[LJTI]]
+; THUMB1-NOPIC: .data_region jt32
+; THUMB1-NOPIC: .long [[LABEL:LBB0_[0-9]]]+1
+; THUMB1-NOPIC: .end_data_region
+; THUMB1-NOPIC: [[LABEL]]
diff --git a/test/CodeGen/ARM/ehabi-filters.ll b/test/CodeGen/ARM/ehabi-filters.ll
new file mode 100644
index 000000000000..c42839d9fe3d
--- /dev/null
+++ b/test/CodeGen/ARM/ehabi-filters.ll
@@ -0,0 +1,77 @@
+; RUN: llc -arm-enable-ehabi -arm-enable-ehabi-descriptors < %s | FileCheck %s
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:64:128-a0:0:64-n32-S64"
+target triple = "armv7-none-linux-gnueabi"
+
+@_ZTIi = external constant i8*
+
+declare void @_Z3foov() noreturn;
+
+declare i8* @__cxa_allocate_exception(i32)
+
+declare i32 @__gxx_personality_v0(...)
+
+declare void @__cxa_throw(i8*, i8*, i8*)
+
+declare void @__cxa_call_unexpected(i8*)
+
+define i32 @main() {
+; CHECK: main:
+entry:
+  %exception.i = tail call i8* @__cxa_allocate_exception(i32 4) nounwind
+  %0 = bitcast i8* %exception.i to i32*
+  store i32 42, i32* %0, align 4, !tbaa !0
+  invoke void @__cxa_throw(i8* %exception.i, i8* bitcast (i8** @_ZTIi to i8*), i8* null) noreturn
+          to label %unreachable.i unwind label %lpad.i
+
+lpad.i:                                           ; preds = %entry
+  %1 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*)
+          filter [1 x i8*] [i8* bitcast (i8** @_ZTIi to i8*)]
+          catch i8* bitcast (i8** @_ZTIi to i8*)
+; CHECK: .long	_ZTIi(target2)          @ TypeInfo 1
+; CHECK: .long	_ZTIi(target2)          @ FilterInfo -1
+  %2 = extractvalue { i8*, i32 } %1, 1
+  %ehspec.fails.i = icmp slt i32 %2, 0
+  br i1 %ehspec.fails.i, label %ehspec.unexpected.i, label %lpad.body
+
+ehspec.unexpected.i:                              ; preds = %lpad.i
+  %3 = extractvalue { i8*, i32 } %1, 0
+  invoke void @__cxa_call_unexpected(i8* %3) noreturn
+          to label %.noexc unwind label %lpad
+
+.noexc:                                           ; preds = %ehspec.unexpected.i
+  unreachable
+
+unreachable.i:                                    ; preds = %entry
+  unreachable
+
+lpad:                                             ; preds = %ehspec.unexpected.i
+  %4 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*)
+          catch i8* bitcast (i8** @_ZTIi to i8*)
+  br label %lpad.body
+
+lpad.body:                                        ; preds = %lpad.i, %lpad
+  %eh.lpad-body = phi { i8*, i32 } [ %4, %lpad ], [ %1, %lpad.i ]
+  %5 = extractvalue { i8*, i32 } %eh.lpad-body, 1
+  %6 = tail call i32 @llvm.eh.typeid.for(i8* bitcast (i8** @_ZTIi to i8*)) nounwind
+  %matches = icmp eq i32 %5, %6
+  br i1 %matches, label %try.cont, label %eh.resume
+
+try.cont:                                         ; preds = %lpad.body
+  %7 = extractvalue { i8*, i32 } %eh.lpad-body, 0
+  %8 = tail call i8* @__cxa_begin_catch(i8* %7) nounwind
+  tail call void @__cxa_end_catch() nounwind
+  ret i32 0
+
+eh.resume:                                        ; preds = %lpad.body
+  resume { i8*, i32 } %eh.lpad-body
+}
+
+declare i32 @llvm.eh.typeid.for(i8*) nounwind readnone
+
+declare i8* @__cxa_begin_catch(i8*)
+
+declare void @__cxa_end_catch()
+
+!0 = metadata !{metadata !"int", metadata !1}
+!1 = metadata !{metadata !"omnipotent char", metadata !2}
+!2 = metadata !{metadata !"Simple C/C++ TBAA"}
diff --git a/test/CodeGen/ARM/ehabi-mc-cantunwind.ll b/test/CodeGen/ARM/ehabi-mc-cantunwind.ll
new file mode 100644
index 000000000000..698d76e56580
--- /dev/null
+++ b/test/CodeGen/ARM/ehabi-mc-cantunwind.ll
@@ -0,0 +1,14 @@
+; RUN: llc -mtriple arm-unknown-linux-gnueabi \
+; RUN:     -arm-enable-ehabi -arm-enable-ehabi-descriptors \
+; RUN:     -filetype=obj -o - %s \
+; RUN:   | llvm-objdump -s - \
+; RUN:   | FileCheck %s
+
+define void @test() nounwind {
+entry:
+  ret void
+}
+
+; CHECK: section .text
+; CHECK: section .ARM.exidx
+; CHECK-NEXT: 0000 00000000 01000000
diff --git a/test/CodeGen/ARM/ehabi-mc-section-group.ll b/test/CodeGen/ARM/ehabi-mc-section-group.ll
new file mode 100644
index 000000000000..5e4b5096c494
--- /dev/null
+++ b/test/CodeGen/ARM/ehabi-mc-section-group.ll
@@ -0,0 +1,79 @@
+; Test section group of the function with linkonce_odr
+
+; The instantiation of C++ function template will come with linkonce_odr,
+; which indicates that the linker can remove the duplicated instantiation.
+; However, to make this feature work, we have to group the section properly.
+; .text, .ARM.extab, and .ARM.exidx should be grouped together.
+
+; RUN: llc -mtriple arm-unknown-linux-gnueabi \
+; RUN:     -arm-enable-ehabi -arm-enable-ehabi-descriptors \
+; RUN:     -filetype=obj -o - %s \
+; RUN:   | elf-dump --dump-section-data \
+; RUN:   | FileCheck %s
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:64:128-a0:0:64-n32-S64"
+target triple = "armv4t--linux-gnueabi"
+
+define void @_Z11instantiatev() {
+entry:
+  tail call void @_Z4testIidEvT_S0_S0_S0_S0_T0_S1_S1_S1_S1_(i32 1, i32 2, i32 3, i32 4, i32 5, double 1.000000e-01, double 2.000000e-01, double 3.000000e-01, double 4.000000e-01, double 5.000000e-01)
+  ret void
+}
+
+define linkonce_odr void @_Z4testIidEvT_S0_S0_S0_S0_T0_S1_S1_S1_S1_(i32 %u1, i32 %u2, i32 %u3, i32 %u4, i32 %u5, double %v1, double %v2, double %v3, double %v4, double %v5) {
+entry:
+  invoke void @_Z5printiiiii(i32 %u1, i32 %u2, i32 %u3, i32 %u4, i32 %u5)
+          to label %try.cont unwind label %lpad
+
+lpad:                                             ; preds = %entry
+  %0 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*)
+          catch i8* null
+  %1 = extractvalue { i8*, i32 } %0, 0
+  %2 = tail call i8* @__cxa_begin_catch(i8* %1) nounwind
+  invoke void @_Z5printddddd(double %v1, double %v2, double %v3, double %v4, double %v5)
+          to label %invoke.cont2 unwind label %lpad1
+
+invoke.cont2:                                     ; preds = %lpad
+  tail call void @__cxa_end_catch()
+  br label %try.cont
+
+try.cont:                                         ; preds = %entry, %invoke.cont2
+  ret void
+
+lpad1:                                            ; preds = %lpad
+  %3 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*)
+          cleanup
+  invoke void @__cxa_end_catch()
+          to label %eh.resume unwind label %terminate.lpad
+
+eh.resume:                                        ; preds = %lpad1
+  resume { i8*, i32 } %3
+
+terminate.lpad:                                   ; preds = %lpad1
+  %4 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*)
+          catch i8* null
+  tail call void @_ZSt9terminatev() noreturn nounwind
+  unreachable
+}
+
+declare void @_Z5printiiiii(i32, i32, i32, i32, i32)
+
+declare i32 @__gxx_personality_v0(...)
+
+declare i8* @__cxa_begin_catch(i8*)
+
+declare void @_Z5printddddd(double, double, double, double, double)
+
+declare void @__cxa_end_catch()
+
+declare void @_ZSt9terminatev()
+
+; CHECK:      # Section 1
+; CHECK-NEXT: (('sh_name', 0x0000002f) # '.group'
+; CHECK:       ('_section_data', '01000000 0a000000 0c000000 0e000000')
+; CHECK:      # Section 10
+; CHECK-NEXT: (('sh_name', 0x000000e1) # '.text._Z4testIidEvT_S0_S0_S0_S0_T0_S1_S1_S1_S1_'
+; CHECK:      # Section 12
+; CHECK-NEXT: (('sh_name', 0x000000d7) # '.ARM.extab.text._Z4testIidEvT_S0_S0_S0_S0_T0_S1_S1_S1_S1_'
+; CHECK:      # Section 14
+; CHECK-NEXT: (('sh_name', 0x00000065) # '.ARM.exidx.text._Z4testIidEvT_S0_S0_S0_S0_T0_S1_S1_S1_S1_'
diff --git a/test/CodeGen/ARM/ehabi-mc-section.ll b/test/CodeGen/ARM/ehabi-mc-section.ll
new file mode 100644
index 000000000000..fc51b240ff3d
--- /dev/null
+++ b/test/CodeGen/ARM/ehabi-mc-section.ll
@@ -0,0 +1,59 @@
+; RUN: llc -mtriple arm-unknown-linux-gnueabi \
+; RUN:     -arm-enable-ehabi -arm-enable-ehabi-descriptors \
+; RUN:     -filetype=obj -o - %s \
+; RUN:   | llvm-objdump -s - \
+; RUN:   | FileCheck %s
+
+define void @_Z4testiiiiiddddd(i32 %u1, i32 %u2, i32 %u3, i32 %u4, i32 %u5, double %v1, double %v2, double %v3, double %v4, double %v5) section ".test_section" {
+entry:
+  invoke void @_Z5printiiiii(i32 %u1, i32 %u2, i32 %u3, i32 %u4, i32 %u5)
+          to label %try.cont unwind label %lpad
+
+lpad:                                             ; preds = %entry
+  %0 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*)
+          catch i8* null
+  %1 = extractvalue { i8*, i32 } %0, 0
+  %2 = tail call i8* @__cxa_begin_catch(i8* %1) nounwind
+  invoke void @_Z5printddddd(double %v1, double %v2, double %v3, double %v4, double %v5)
+          to label %invoke.cont2 unwind label %lpad1
+
+invoke.cont2:                                     ; preds = %lpad
+  tail call void @__cxa_end_catch()
+  br label %try.cont
+
+try.cont:                                         ; preds = %entry, %invoke.cont2
+  ret void
+
+lpad1:                                            ; preds = %lpad
+  %3 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*)
+          cleanup
+  invoke void @__cxa_end_catch()
+          to label %eh.resume unwind label %terminate.lpad
+
+eh.resume:                                        ; preds = %lpad1
+  resume { i8*, i32 } %3
+
+terminate.lpad:                                   ; preds = %lpad1
+  %4 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*)
+          catch i8* null
+  tail call void @_ZSt9terminatev() noreturn nounwind
+  unreachable
+}
+
+declare void @_Z5printiiiii(i32, i32, i32, i32, i32)
+
+declare i32 @__gxx_personality_v0(...)
+
+declare i8* @__cxa_begin_catch(i8*)
+
+declare void @_Z5printddddd(double, double, double, double, double)
+
+declare void @__cxa_end_catch()
+
+declare void @_ZSt9terminatev()
+
+; CHECK: section .test_section
+; CHECK: section .ARM.extab.test_section
+; CHECK-NEXT: 0000 00000000 b0b0b000
+; CHECK: section .ARM.exidx.test_section
+; CHECK-NEXT: 0000 00000000 00000000
diff --git a/test/CodeGen/ARM/ehabi-mc-sh_link.ll b/test/CodeGen/ARM/ehabi-mc-sh_link.ll
new file mode 100644
index 000000000000..f90e5f384c1e
--- /dev/null
+++ b/test/CodeGen/ARM/ehabi-mc-sh_link.ll
@@ -0,0 +1,47 @@
+; Test the sh_link in Elf32_Shdr.
+
+; The .ARM.exidx section should be linked with corresponding text section.
+; The sh_link in Elf32_Shdr should be filled with the section index of
+; the text section.
+
+; RUN: llc -mtriple arm-unknown-linux-gnueabi \
+; RUN:     -arm-enable-ehabi -arm-enable-ehabi-descriptors \
+; RUN:     -filetype=obj -o - %s \
+; RUN:   | elf-dump --dump-section-data \
+; RUN:   | FileCheck %s
+
+define void @test1() nounwind {
+entry:
+  ret void
+}
+
+define void @test2() nounwind section ".test_section" {
+entry:
+  ret void
+}
+
+; CHECK: # Section 1
+; CHECK-NEXT: (('sh_name', 0x00000010) # '.text'
+
+; CHECK:      (('sh_name', 0x00000005) # '.ARM.exidx'
+; CHECK-NEXT:  ('sh_type', 0x70000001)
+; CHECK-NEXT:  ('sh_flags', 0x00000082)
+; CHECK-NEXT:  ('sh_addr', 0x00000000)
+; CHECK-NEXT:  ('sh_offset', 0x0000005c)
+; CHECK-NEXT:  ('sh_size', 0x00000008)
+; CHECK-NEXT:  ('sh_link',  0x00000001)
+; CHECK-NEXT:  ('sh_info',  0x00000000)
+; CHECK-NEXT:  ('sh_addralign',  0x00000004)
+
+; CHECK: # Section 7
+; CHECK-NEXT: (('sh_name', 0x00000039) # '.test_section'
+
+; CHECK:      (('sh_name', 0x0000002f) # '.ARM.exidx.test_section'
+; CHECK-NEXT:  ('sh_type', 0x70000001)
+; CHECK-NEXT:  ('sh_flags', 0x00000082)
+; CHECK-NEXT:  ('sh_addr', 0x00000000)
+; CHECK-NEXT:  ('sh_offset', 0x00000068)
+; CHECK-NEXT:  ('sh_size', 0x00000008)
+; CHECK-NEXT:  ('sh_link',  0x00000007)
+; CHECK-NEXT:  ('sh_info',  0x00000000)
+; CHECK-NEXT:  ('sh_addralign',  0x00000004)
diff --git a/test/CodeGen/ARM/ehabi-mc.ll b/test/CodeGen/ARM/ehabi-mc.ll
new file mode 100644
index 000000000000..0dc2ef7838f0
--- /dev/null
+++ b/test/CodeGen/ARM/ehabi-mc.ll
@@ -0,0 +1,59 @@
+; RUN: llc -mtriple arm-unknown-linux-gnueabi \
+; RUN:     -arm-enable-ehabi -arm-enable-ehabi-descriptors \
+; RUN:     -filetype=obj -o - %s \
+; RUN:   | llvm-objdump -s - \
+; RUN:   | FileCheck %s
+
+define void @_Z4testiiiiiddddd(i32 %u1, i32 %u2, i32 %u3, i32 %u4, i32 %u5, double %v1, double %v2, double %v3, double %v4, double %v5) {
+entry:
+  invoke void @_Z5printiiiii(i32 %u1, i32 %u2, i32 %u3, i32 %u4, i32 %u5)
+          to label %try.cont unwind label %lpad
+
+lpad:                                             ; preds = %entry
+  %0 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*)
+          catch i8* null
+  %1 = extractvalue { i8*, i32 } %0, 0
+  %2 = tail call i8* @__cxa_begin_catch(i8* %1) nounwind
+  invoke void @_Z5printddddd(double %v1, double %v2, double %v3, double %v4, double %v5)
+          to label %invoke.cont2 unwind label %lpad1
+
+invoke.cont2:                                     ; preds = %lpad
+  tail call void @__cxa_end_catch()
+  br label %try.cont
+
+try.cont:                                         ; preds = %entry, %invoke.cont2
+  ret void
+
+lpad1:                                            ; preds = %lpad
+  %3 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*)
+          cleanup
+  invoke void @__cxa_end_catch()
+          to label %eh.resume unwind label %terminate.lpad
+
+eh.resume:                                        ; preds = %lpad1
+  resume { i8*, i32 } %3
+
+terminate.lpad:                                   ; preds = %lpad1
+  %4 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*)
+          catch i8* null
+  tail call void @_ZSt9terminatev() noreturn nounwind
+  unreachable
+}
+
+declare void @_Z5printiiiii(i32, i32, i32, i32, i32)
+
+declare i32 @__gxx_personality_v0(...)
+
+declare i8* @__cxa_begin_catch(i8*)
+
+declare void @_Z5printddddd(double, double, double, double, double)
+
+declare void @__cxa_end_catch()
+
+declare void @_ZSt9terminatev()
+
+; CHECK: section .text
+; CHECK: section .ARM.extab
+; CHECK-NEXT: 0000 00000000 b0b0b000
+; CHECK: section .ARM.exidx
+; CHECK-NEXT: 0000 00000000 00000000
diff --git a/test/CodeGen/ARM/ehabi-no-landingpad.ll b/test/CodeGen/ARM/ehabi-no-landingpad.ll
new file mode 100644
index 000000000000..ac0dff421a6f
--- /dev/null
+++ b/test/CodeGen/ARM/ehabi-no-landingpad.ll
@@ -0,0 +1,18 @@
+; RUN: llc < %s -mtriple=armv7-unknown-linux-gnueabi \
+; RUN:   -arm-enable-ehabi -arm-enable-ehabi-descriptors | FileCheck %s
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:64:128-a0:0:64-n32-S64"
+target triple = "armv7-unknown-linux-gnueabi"
+
+define void @_Z4testv() {
+; CHECK: _Z4testv
+; CHECK: .fnstart
+; CHECK: .size
+; CHECK-NOT: .handlerdata
+; CHECK: .fnend
+entry:
+  call void @_Z15throw_exceptionv()
+  ret void
+}
+
+declare void @_Z15throw_exceptionv()
diff --git a/test/CodeGen/ARM/elf-lcomm-align.ll b/test/CodeGen/ARM/elf-lcomm-align.ll
index 46792990e593..a98b3c06f5e2 100644
--- a/test/CodeGen/ARM/elf-lcomm-align.ll
+++ b/test/CodeGen/ARM/elf-lcomm-align.ll
@@ -4,8 +4,9 @@
 @c = internal global i8 0, align 1
 @x = internal global i32 0, align 4
 
-; CHECK: .lcomm c,1
-; .lcomm doesn't support alignment.
+; .lcomm doesn't support alignment, so we always use .local/.comm.
+; CHECK: .local c
+; CHECK-NEXT: .comm c,1,1
 ; CHECK: .local x
 ; CHECK-NEXT: .comm x,4,4
 
diff --git a/test/CodeGen/ARM/extload-knownzero.ll b/test/CodeGen/ARM/extload-knownzero.ll
new file mode 100644
index 000000000000..8fd6b6bd777a
--- /dev/null
+++ b/test/CodeGen/ARM/extload-knownzero.ll
@@ -0,0 +1,26 @@
+; RUN: llc < %s -march=arm -mcpu=cortex-a8 | FileCheck %s
+; rdar://12771555
+
+define void @foo(i16* %ptr, i32 %a) nounwind {
+entry:
+; CHECK: foo:
+  %tmp1 = icmp ult i32 %a, 100
+  br i1 %tmp1, label %bb1, label %bb2
+bb1:
+; CHECK: ldrh
+  %tmp2 = load i16* %ptr, align 2
+  br label %bb2
+bb2:
+; CHECK-NOT: uxth
+; CHECK: cmp
+  %tmp3 = phi i16 [ 0, %entry ], [ %tmp2, %bb1 ]
+  %cmp = icmp ult i16 %tmp3, 24
+  br i1 %cmp, label %bb3, label %exit
+bb3:
+  call void @bar() nounwind
+  br label %exit
+exit:
+  ret void
+}
+
+declare void @bar () 
diff --git a/test/CodeGen/ARM/fabs-neon.ll b/test/CodeGen/ARM/fabs-neon.ll
new file mode 100644
index 000000000000..614117ff7bca
--- /dev/null
+++ b/test/CodeGen/ARM/fabs-neon.ll
@@ -0,0 +1,17 @@
+; RUN: llc < %s -mtriple=armv7-eabi -float-abi=hard -mcpu=cortex-a8 | FileCheck %s
+
+; CHECK: test:
+; CHECK:         vabs.f32        q0, q0
+define <4 x float> @test(<4 x float> %a) {
+  %foo = call <4 x float> @llvm.fabs.v4f32(<4 x float> %a)
+  ret <4 x float> %foo
+}
+declare <4 x float> @llvm.fabs.v4f32(<4 x float> %a)
+
+; CHECK: test2:
+; CHECK:        vabs.f32        d0, d0
+define <2 x float> @test2(<2 x float> %a) {
+  %foo = call <2 x float> @llvm.fabs.v2f32(<2 x float> %a)
+    ret <2 x float> %foo
+}
+declare <2 x float> @llvm.fabs.v2f32(<2 x float> %a)
diff --git a/test/CodeGen/ARM/fabss.ll b/test/CodeGen/ARM/fabss.ll
index 46c2f1c65fe5..c3e00ce47019 100644
--- a/test/CodeGen/ARM/fabss.ll
+++ b/test/CodeGen/ARM/fabss.ll
@@ -14,12 +14,12 @@ entry:
 declare float @fabsf(float)
 
 ; VFP2: test:
-; VFP2: 	vabs.f32	s2, s2
+; VFP2: 	vabs.f32	s
 
 ; NFP1: test:
-; NFP1: 	vabs.f32	d1, d1
+; NFP1: 	vabs.f32	d
 ; NFP0: test:
-; NFP0: 	vabs.f32	s2, s2
+; NFP0: 	vabs.f32	s
 
 ; CORTEXA8: test:
 ; CORTEXA8:     vadd.f32        [[D1:d[0-9]+]]
diff --git a/test/CodeGen/ARM/fadds.ll b/test/CodeGen/ARM/fadds.ll
index 48ef5ed88fb0..c7e2f5d094b8 100644
--- a/test/CodeGen/ARM/fadds.ll
+++ b/test/CodeGen/ARM/fadds.ll
@@ -1,6 +1,8 @@
 ; RUN: llc < %s -march=arm -mattr=+vfp2 | FileCheck %s -check-prefix=VFP2
 ; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s -check-prefix=NFP0
-; RUN: llc < %s -march=arm -mcpu=cortex-a8 | FileCheck %s -check-prefix=CORTEXA8
+; RUN: llc < %s -mtriple=arm-eabi -mcpu=cortex-a8 | FileCheck %s -check-prefix=CORTEXA8
+; RUN: llc < %s -mtriple=arm-eabi -mcpu=cortex-a8 --enable-unsafe-fp-math | FileCheck %s -check-prefix=CORTEXA8U
+; RUN: llc < %s -mtriple=arm-darwin -mcpu=cortex-a8 | FileCheck %s -check-prefix=CORTEXA8U
 ; RUN: llc < %s -march=arm -mcpu=cortex-a9 | FileCheck %s -check-prefix=CORTEXA9
 
 define float @test(float %a, float %b) {
@@ -18,6 +20,8 @@ entry:
 ; NFP0: 	vadd.f32	s
 
 ; CORTEXA8: test:
-; CORTEXA8: 	vadd.f32	d
+; CORTEXA8: 	vadd.f32	s
+; CORTEXA8U: test:
+; CORTEXA8U: 	vadd.f32	d
 ; CORTEXA9: test:
-; CORTEXA9: 	vadd.f32	s{{.}}, s{{.}}, s{{.}}
+; CORTEXA9: 	vadd.f32	s
diff --git a/test/CodeGen/ARM/fast-isel-GEP-coalesce.ll b/test/CodeGen/ARM/fast-isel-GEP-coalesce.ll
index dbb634df0a1e..60bc6a62f5d3 100644
--- a/test/CodeGen/ARM/fast-isel-GEP-coalesce.ll
+++ b/test/CodeGen/ARM/fast-isel-GEP-coalesce.ll
@@ -1,5 +1,5 @@
-; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=armv7-apple-darwin | FileCheck %s --check-prefix=ARM
-; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=thumbv7-apple-darwin | FileCheck %s --check-prefix=THUMB
+; RUN: llc < %s -O0 -verify-machineinstrs -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=armv7-apple-darwin | FileCheck %s --check-prefix=ARM
+; RUN: llc < %s -O0 -verify-machineinstrs -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=thumbv7-apple-darwin | FileCheck %s --check-prefix=THUMB
 
 %struct.A = type { i32, [2 x [2 x i32]], i8, [3 x [3 x [3 x i32]]] }
 %struct.B = type { i32, [2 x [2 x [2 x %struct.A]]] }
diff --git a/test/CodeGen/ARM/fast-isel-br-const.ll b/test/CodeGen/ARM/fast-isel-br-const.ll
index 7c532d5fba38..4e6efd248997 100644
--- a/test/CodeGen/ARM/fast-isel-br-const.ll
+++ b/test/CodeGen/ARM/fast-isel-br-const.ll
@@ -1,5 +1,5 @@
-; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=armv7-apple-ios | FileCheck %s --check-prefix=ARM
-; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=thumbv7-apple-ios | FileCheck %s --check-prefix=THUMB
+; RUN: llc < %s -O0 -verify-machineinstrs -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=armv7-apple-ios | FileCheck %s --check-prefix=ARM
+; RUN: llc < %s -O0 -verify-machineinstrs -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=thumbv7-apple-ios | FileCheck %s --check-prefix=THUMB
 
 define i32 @t1(i32 %a, i32 %b) nounwind uwtable ssp {
 entry:
diff --git a/test/CodeGen/ARM/fast-isel-call-multi-reg-return.ll b/test/CodeGen/ARM/fast-isel-call-multi-reg-return.ll
index 14721a4d8024..b6f201728c2b 100644
--- a/test/CodeGen/ARM/fast-isel-call-multi-reg-return.ll
+++ b/test/CodeGen/ARM/fast-isel-call-multi-reg-return.ll
@@ -1,5 +1,5 @@
-; RUN: llc < %s -O0 -relocation-model=dynamic-no-pic -mtriple=armv7-apple-ios | FileCheck %s --check-prefix=ARM
-; RUN: llc < %s -O0 -relocation-model=dynamic-no-pic -mtriple=thumbv7-apple-ios | FileCheck %s --check-prefix=THUMB
+; RUN: llc < %s -O0 -verify-machineinstrs -relocation-model=dynamic-no-pic -mtriple=armv7-apple-ios | FileCheck %s --check-prefix=ARM
+; RUN: llc < %s -O0 -verify-machineinstrs -relocation-model=dynamic-no-pic -mtriple=thumbv7-apple-ios | FileCheck %s --check-prefix=THUMB
 
 ; Fast-isel can't handle non-double multi-reg retvals.
 ; This test just check to make sure we don't hit the assert in FinishCall.
diff --git a/test/CodeGen/ARM/fast-isel-crash.ll b/test/CodeGen/ARM/fast-isel-crash.ll
index 370c70f174fd..8fb4b66b7dd4 100644
--- a/test/CodeGen/ARM/fast-isel-crash.ll
+++ b/test/CodeGen/ARM/fast-isel-crash.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -O0 -mtriple=thumbv7-apple-darwin
+; RUN: llc < %s -O0 -verify-machineinstrs -mtriple=thumbv7-apple-darwin
 
 %union.anon = type { <16 x i32> }
 
diff --git a/test/CodeGen/ARM/fast-isel-crash2.ll b/test/CodeGen/ARM/fast-isel-crash2.ll
index aa0629928846..f245168a8e30 100644
--- a/test/CodeGen/ARM/fast-isel-crash2.ll
+++ b/test/CodeGen/ARM/fast-isel-crash2.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -O0 -mtriple=thumbv7-apple-darwin
+; RUN: llc < %s -O0 -verify-machineinstrs -mtriple=thumbv7-apple-darwin
 ; rdar://9515076
 ; (Make sure this doesn't crash.)
 
diff --git a/test/CodeGen/ARM/fast-isel-deadcode.ll b/test/CodeGen/ARM/fast-isel-deadcode.ll
index 7e147c7b4d7d..3a943d854b4a 100644
--- a/test/CodeGen/ARM/fast-isel-deadcode.ll
+++ b/test/CodeGen/ARM/fast-isel-deadcode.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -O0 -relocation-model=dynamic-no-pic -mtriple=thumbv7-apple-ios | FileCheck %s --check-prefix=THUMB
+; RUN: llc < %s -O0 -verify-machineinstrs -relocation-model=dynamic-no-pic -mtriple=thumbv7-apple-ios | FileCheck %s --check-prefix=THUMB
 
 ; Target-specific selector can't properly handle the double because it isn't
 ; being passed via a register, so the materialized arguments become dead code.
diff --git a/test/CodeGen/ARM/fast-isel-fold.ll b/test/CodeGen/ARM/fast-isel-fold.ll
index 61bd18504c5c..7a65295f01b6 100644
--- a/test/CodeGen/ARM/fast-isel-fold.ll
+++ b/test/CodeGen/ARM/fast-isel-fold.ll
@@ -1,5 +1,5 @@
-; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=armv7-apple-darwin | FileCheck %s --check-prefix=ARM
-; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=thumbv7-apple-darwin | FileCheck %s --check-prefix=THUMB
+; RUN: llc < %s -O0 -verify-machineinstrs -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=armv7-apple-darwin | FileCheck %s --check-prefix=ARM
+; RUN: llc < %s -O0 -verify-machineinstrs -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=thumbv7-apple-darwin | FileCheck %s --check-prefix=THUMB
 
 @a = global i8 1, align 1
 @b = global i16 2, align 2
diff --git a/test/CodeGen/ARM/fast-isel-frameaddr.ll b/test/CodeGen/ARM/fast-isel-frameaddr.ll
index 8f7b2943b56d..c256e73ab98c 100644
--- a/test/CodeGen/ARM/fast-isel-frameaddr.ll
+++ b/test/CodeGen/ARM/fast-isel-frameaddr.ll
@@ -1,7 +1,7 @@
-; RUN: llc < %s -O0 -fast-isel-abort -mtriple=armv7-apple-darwin | FileCheck %s --check-prefix=DARWIN-ARM
-; RUN: llc < %s -O0 -fast-isel-abort -mtriple=armv7-linux-gnueabi | FileCheck %s --check-prefix=LINUX-ARM
-; RUN: llc < %s -O0 -fast-isel-abort -mtriple=thumbv7-apple-darwin | FileCheck %s --check-prefix=DARWIN-THUMB2
-; RUN: llc < %s -O0 -fast-isel-abort -mtriple=thumbv7-linux-gnueabi | FileCheck %s --check-prefix=LINUX-THUMB2
+; RUN: llc < %s -O0 -verify-machineinstrs -fast-isel-abort -mtriple=armv7-apple-darwin | FileCheck %s --check-prefix=DARWIN-ARM
+; RUN: llc < %s -O0 -verify-machineinstrs -fast-isel-abort -mtriple=armv7-linux-gnueabi | FileCheck %s --check-prefix=LINUX-ARM
+; RUN: llc < %s -O0 -verify-machineinstrs -fast-isel-abort -mtriple=thumbv7-apple-darwin | FileCheck %s --check-prefix=DARWIN-THUMB2
+; RUN: llc < %s -O0 -verify-machineinstrs -fast-isel-abort -mtriple=thumbv7-linux-gnueabi | FileCheck %s --check-prefix=LINUX-THUMB2
 
 define i8* @frameaddr_index0() nounwind {
 entry:
diff --git a/test/CodeGen/ARM/fast-isel-icmp.ll b/test/CodeGen/ARM/fast-isel-icmp.ll
index 8764bef7dab9..8357ed5c549c 100644
--- a/test/CodeGen/ARM/fast-isel-icmp.ll
+++ b/test/CodeGen/ARM/fast-isel-icmp.ll
@@ -1,6 +1,21 @@
 ; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=armv7-apple-ios | FileCheck %s --check-prefix=ARM
 ; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=thumbv7-apple-ios | FileCheck %s --check-prefix=THUMB
 
+define i32 @icmp_i16_signed(i16 %a, i16 %b) nounwind {
+entry:
+; ARM: icmp_i16_signed
+; ARM: sxth r0, r0
+; ARM: sxth r1, r1
+; ARM: cmp	r0, r1
+; THUMB: icmp_i16_signed
+; THUMB: sxth r0, r0
+; THUMB: sxth r1, r1
+; THUMB: cmp	r0, r1
+  %cmp = icmp slt i16 %a, %b
+  %conv2 = zext i1 %cmp to i32
+  ret i32 %conv2
+}
+
 define i32 @icmp_i16_unsigned(i16 %a, i16 %b) nounwind {
 entry:
 ; ARM: icmp_i16_unsigned
@@ -31,6 +46,21 @@ entry:
   ret i32 %conv2
 }
 
+define i32 @icmp_i8_unsigned(i8 %a, i8 %b) nounwind {
+entry:
+; ARM: icmp_i8_unsigned
+; ARM: uxtb r0, r0
+; ARM: uxtb r1, r1
+; ARM: cmp r0, r1
+; THUMB: icmp_i8_unsigned
+; THUMB: uxtb r0, r0
+; THUMB: uxtb r1, r1
+; THUMB: cmp r0, r1
+  %cmp = icmp ugt i8 %a, %b
+  %conv2 = zext i1 %cmp to i32
+  ret i32 %conv2
+}
+
 define i32 @icmp_i1_unsigned(i1 %a, i1 %b) nounwind {
 entry:
 ; ARM: icmp_i1_unsigned
diff --git a/test/CodeGen/ARM/fast-isel-indirectbr.ll b/test/CodeGen/ARM/fast-isel-indirectbr.ll
index be8035ec794d..ebc0e8426d55 100644
--- a/test/CodeGen/ARM/fast-isel-indirectbr.ll
+++ b/test/CodeGen/ARM/fast-isel-indirectbr.ll
@@ -1,5 +1,5 @@
-; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=armv7-apple-ios | FileCheck %s --check-prefix=ARM
-; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=thumbv7-apple-ios | FileCheck %s --check-prefix=THUMB
+; RUN: llc < %s -O0 -verify-machineinstrs -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=armv7-apple-ios | FileCheck %s --check-prefix=ARM
+; RUN: llc < %s -O0 -verify-machineinstrs -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=thumbv7-apple-ios | FileCheck %s --check-prefix=THUMB
 
 define void @t1(i8* %x) {
 entry:
diff --git a/test/CodeGen/ARM/fast-isel-intrinsic.ll b/test/CodeGen/ARM/fast-isel-intrinsic.ll
index b73fceff6cd0..48105dd3893b 100644
--- a/test/CodeGen/ARM/fast-isel-intrinsic.ll
+++ b/test/CodeGen/ARM/fast-isel-intrinsic.ll
@@ -35,7 +35,7 @@ define void @t1() nounwind ssp {
 ; THUMB-LONG: movt r3, :upper16:L_memset$non_lazy_ptr
 ; THUMB-LONG: ldr r3, [r3]
 ; THUMB-LONG: blx r3
-  call void @llvm.memset.p0i8.i32(i8* getelementptr inbounds ([60 x i8]* @message1, i32 0, i32 5), i8 64, i32 10, i32 1, i1 false)
+  call void @llvm.memset.p0i8.i32(i8* getelementptr inbounds ([60 x i8]* @message1, i32 0, i32 5), i8 64, i32 10, i32 4, i1 false)
   ret void
 }
 
@@ -73,7 +73,7 @@ define void @t2() nounwind ssp {
 ; THUMB-LONG: movt r3, :upper16:L_memcpy$non_lazy_ptr
 ; THUMB-LONG: ldr r3, [r3]
 ; THUMB-LONG: blx r3
-  call void @llvm.memcpy.p0i8.p0i8.i32(i8* getelementptr inbounds ([60 x i8]* @temp, i32 0, i32 4), i8* getelementptr inbounds ([60 x i8]* @temp, i32 0, i32 16), i32 17, i32 1, i1 false)
+  call void @llvm.memcpy.p0i8.p0i8.i32(i8* getelementptr inbounds ([60 x i8]* @temp, i32 0, i32 4), i8* getelementptr inbounds ([60 x i8]* @temp, i32 0, i32 16), i32 17, i32 4, i1 false)
   ret void
 }
 
@@ -125,6 +125,7 @@ define void @t4() nounwind ssp {
 ; ARM: ldrh r1, [r0, #24]
 ; ARM: strh r1, [r0, #12]
 ; ARM: bx lr
+; THUMB: t4
 ; THUMB: movw r0, :lower16:L_temp$non_lazy_ptr
 ; THUMB: movt r0, :upper16:L_temp$non_lazy_ptr
 ; THUMB: ldr r0, [r0]
@@ -135,8 +136,117 @@ define void @t4() nounwind ssp {
 ; THUMB: ldrh r1, [r0, #24]
 ; THUMB: strh r1, [r0, #12]
 ; THUMB: bx lr
-  call void @llvm.memcpy.p0i8.p0i8.i32(i8* getelementptr inbounds ([60 x i8]* @temp, i32 0, i32 4), i8* getelementptr inbounds ([60 x i8]* @temp, i32 0, i32 16), i32 10, i32 1, i1 false)
+  call void @llvm.memcpy.p0i8.p0i8.i32(i8* getelementptr inbounds ([60 x i8]* @temp, i32 0, i32 4), i8* getelementptr inbounds ([60 x i8]* @temp, i32 0, i32 16), i32 10, i32 4, i1 false)
   ret void
 }
 
 declare void @llvm.memmove.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i32, i1) nounwind
+
+define void @t5() nounwind ssp {
+; ARM: t5
+; ARM: movw r0, :lower16:L_temp$non_lazy_ptr
+; ARM: movt r0, :upper16:L_temp$non_lazy_ptr
+; ARM: ldr r0, [r0]
+; ARM: ldrh r1, [r0, #16]
+; ARM: strh r1, [r0, #4]
+; ARM: ldrh r1, [r0, #18]
+; ARM: strh r1, [r0, #6]
+; ARM: ldrh r1, [r0, #20]
+; ARM: strh r1, [r0, #8]
+; ARM: ldrh r1, [r0, #22]
+; ARM: strh r1, [r0, #10]
+; ARM: ldrh r1, [r0, #24]
+; ARM: strh r1, [r0, #12]
+; ARM: bx lr
+; THUMB: t5
+; THUMB: movw r0, :lower16:L_temp$non_lazy_ptr
+; THUMB: movt r0, :upper16:L_temp$non_lazy_ptr
+; THUMB: ldr r0, [r0]
+; THUMB: ldrh r1, [r0, #16]
+; THUMB: strh r1, [r0, #4]
+; THUMB: ldrh r1, [r0, #18]
+; THUMB: strh r1, [r0, #6]
+; THUMB: ldrh r1, [r0, #20]
+; THUMB: strh r1, [r0, #8]
+; THUMB: ldrh r1, [r0, #22]
+; THUMB: strh r1, [r0, #10]
+; THUMB: ldrh r1, [r0, #24]
+; THUMB: strh r1, [r0, #12]
+; THUMB: bx lr
+  call void @llvm.memcpy.p0i8.p0i8.i32(i8* getelementptr inbounds ([60 x i8]* @temp, i32 0, i32 4), i8* getelementptr inbounds ([60 x i8]* @temp, i32 0, i32 16), i32 10, i32 2, i1 false)
+  ret void
+}
+
+define void @t6() nounwind ssp {
+; ARM: t6
+; ARM: movw r0, :lower16:L_temp$non_lazy_ptr
+; ARM: movt r0, :upper16:L_temp$non_lazy_ptr
+; ARM: ldr r0, [r0]
+; ARM: ldrb r1, [r0, #16]
+; ARM: strb r1, [r0, #4]
+; ARM: ldrb r1, [r0, #17]
+; ARM: strb r1, [r0, #5]
+; ARM: ldrb r1, [r0, #18]
+; ARM: strb r1, [r0, #6]
+; ARM: ldrb r1, [r0, #19]
+; ARM: strb r1, [r0, #7]
+; ARM: ldrb r1, [r0, #20]
+; ARM: strb r1, [r0, #8]
+; ARM: ldrb r1, [r0, #21]
+; ARM: strb r1, [r0, #9]
+; ARM: ldrb r1, [r0, #22]
+; ARM: strb r1, [r0, #10]
+; ARM: ldrb r1, [r0, #23]
+; ARM: strb r1, [r0, #11]
+; ARM: ldrb r1, [r0, #24]
+; ARM: strb r1, [r0, #12]
+; ARM: ldrb r1, [r0, #25]
+; ARM: strb r1, [r0, #13]
+; ARM: bx lr
+; THUMB: t6
+; THUMB: movw r0, :lower16:L_temp$non_lazy_ptr
+; THUMB: movt r0, :upper16:L_temp$non_lazy_ptr
+; THUMB: ldr r0, [r0]
+; THUMB: ldrb r1, [r0, #16]
+; THUMB: strb r1, [r0, #4]
+; THUMB: ldrb r1, [r0, #17]
+; THUMB: strb r1, [r0, #5]
+; THUMB: ldrb r1, [r0, #18]
+; THUMB: strb r1, [r0, #6]
+; THUMB: ldrb r1, [r0, #19]
+; THUMB: strb r1, [r0, #7]
+; THUMB: ldrb r1, [r0, #20]
+; THUMB: strb r1, [r0, #8]
+; THUMB: ldrb r1, [r0, #21]
+; THUMB: strb r1, [r0, #9]
+; THUMB: ldrb r1, [r0, #22]
+; THUMB: strb r1, [r0, #10]
+; THUMB: ldrb r1, [r0, #23]
+; THUMB: strb r1, [r0, #11]
+; THUMB: ldrb r1, [r0, #24]
+; THUMB: strb r1, [r0, #12]
+; THUMB: ldrb r1, [r0, #25]
+; THUMB: strb r1, [r0, #13]
+; THUMB: bx lr
+  call void @llvm.memcpy.p0i8.p0i8.i32(i8* getelementptr inbounds ([60 x i8]* @temp, i32 0, i32 4), i8* getelementptr inbounds ([60 x i8]* @temp, i32 0, i32 16), i32 10, i32 1, i1 false)
+  ret void
+}
+
+; rdar://13202135
+define void @t7() nounwind ssp {
+; Just make sure this doesn't assert when we have an odd length and an alignment of 2.
+  call void @llvm.memcpy.p0i8.p0i8.i32(i8* getelementptr inbounds ([60 x i8]* @temp, i32 0, i32 4), i8* getelementptr inbounds ([60 x i8]* @temp, i32 0, i32 16), i32 3, i32 2, i1 false)
+  ret void
+}
+
+define i32 @t8(i32 %x) nounwind {
+entry:
+; ARM: t8
+; ARM-NOT: FastISel missed call:   %expval = call i32 @llvm.expect.i32(i32 %x, i32 1)
+; THUMB: t8
+; THUMB-NOT: FastISel missed call:   %expval = call i32 @llvm.expect.i32(i32 %x, i32 1)
+  %expval = call i32 @llvm.expect.i32(i32 %x, i32 1)
+  ret i32 %expval
+}
+
+declare i32 @llvm.expect.i32(i32, i32) nounwind readnone
diff --git a/test/CodeGen/ARM/fast-isel-ldrh-strh-arm.ll b/test/CodeGen/ARM/fast-isel-ldrh-strh-arm.ll
index e8cc2b238dff..0b5267ddc973 100644
--- a/test/CodeGen/ARM/fast-isel-ldrh-strh-arm.ll
+++ b/test/CodeGen/ARM/fast-isel-ldrh-strh-arm.ll
@@ -103,7 +103,7 @@ entry:
 ; ARM: t11
   %add.ptr = getelementptr inbounds i16* %a, i64 8
   store i16 0, i16* %add.ptr, align 2
-; ARM strh r{{[1-9]}}, [r0, #16]
+; ARM: strh r{{[1-9]}}, [r0, #16]
   ret void
 }
 
diff --git a/test/CodeGen/ARM/fast-isel-pred.ll b/test/CodeGen/ARM/fast-isel-pred.ll
index 8de54ad5332b..27731def1f57 100644
--- a/test/CodeGen/ARM/fast-isel-pred.ll
+++ b/test/CodeGen/ARM/fast-isel-pred.ll
@@ -1,4 +1,4 @@
-; RUN: llc -O0 -mtriple=armv7-apple-darwin < %s
+; RUN: llc -O0 -verify-machineinstrs -mtriple=armv7-apple-darwin < %s
 
 define i32 @main() nounwind ssp {
 entry:
diff --git a/test/CodeGen/ARM/fast-isel-redefinition.ll b/test/CodeGen/ARM/fast-isel-redefinition.ll
index e50c3a4954e1..563880dab0a9 100644
--- a/test/CodeGen/ARM/fast-isel-redefinition.ll
+++ b/test/CodeGen/ARM/fast-isel-redefinition.ll
@@ -1,4 +1,4 @@
-; RUN: llc -O0 -optimize-regalloc -regalloc=basic < %s
+; RUN: llc -O0 -verify-machineinstrs -optimize-regalloc -regalloc=basic < %s
 ; This isn't exactly a useful set of command-line options, but check that it
 ; doesn't crash.  (It was crashing because a register was getting redefined.)
 
diff --git a/test/CodeGen/ARM/fast-isel-static.ll b/test/CodeGen/ARM/fast-isel-static.ll
index a86e3251f715..e8759a7fc4ce 100644
--- a/test/CodeGen/ARM/fast-isel-static.ll
+++ b/test/CodeGen/ARM/fast-isel-static.ll
@@ -1,5 +1,5 @@
-; RUN: llc < %s -mtriple=thumbv7-apple-darwin -O0 -relocation-model=static -arm-long-calls | FileCheck -check-prefix=LONG %s
-; RUN: llc < %s -mtriple=thumbv7-apple-darwin -O0 -relocation-model=static | FileCheck -check-prefix=NORM %s
+; RUN: llc < %s -mtriple=thumbv7-apple-darwin -O0 -verify-machineinstrs -relocation-model=static -arm-long-calls | FileCheck -check-prefix=LONG %s
+; RUN: llc < %s -mtriple=thumbv7-apple-darwin -O0 -verify-machineinstrs -relocation-model=static | FileCheck -check-prefix=NORM %s
 
 define void @myadd(float* %sum, float* %addend) nounwind {
 entry:
diff --git a/test/CodeGen/ARM/fdivs.ll b/test/CodeGen/ARM/fdivs.ll
index 8fab00213585..8f13f395e078 100644
--- a/test/CodeGen/ARM/fdivs.ll
+++ b/test/CodeGen/ARM/fdivs.ll
@@ -10,14 +10,14 @@ entry:
 }
 
 ; VFP2: test:
-; VFP2: 	vdiv.f32	s0, s2, s0
+; VFP2: 	vdiv.f32	s{{.}}, s{{.}}, s{{.}}
 
 ; NFP1: test:
-; NFP1: 	vdiv.f32	s0, s2, s0
+; NFP1: 	vdiv.f32	s{{.}}, s{{.}}, s{{.}}
 ; NFP0: test:
-; NFP0: 	vdiv.f32	s0, s2, s0
+; NFP0: 	vdiv.f32	s{{.}}, s{{.}}, s{{.}}
 
 ; CORTEXA8: test:
-; CORTEXA8: 	vdiv.f32	s0, s2, s0
+; CORTEXA8: 	vdiv.f32	s{{.}}, s{{.}}, s{{.}}
 ; CORTEXA9: test:
 ; CORTEXA9: 	vdiv.f32	s{{.}}, s{{.}}, s{{.}}
diff --git a/test/CodeGen/ARM/fmuls.ll b/test/CodeGen/ARM/fmuls.ll
index 1566a9272db1..f5245c946398 100644
--- a/test/CodeGen/ARM/fmuls.ll
+++ b/test/CodeGen/ARM/fmuls.ll
@@ -1,6 +1,8 @@
 ; RUN: llc < %s -march=arm -mattr=+vfp2 | FileCheck %s -check-prefix=VFP2
 ; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s -check-prefix=NFP0
-; RUN: llc < %s -march=arm -mcpu=cortex-a8 | FileCheck %s -check-prefix=CORTEXA8
+; RUN: llc < %s -mtriple=arm-eabi -mcpu=cortex-a8 | FileCheck %s -check-prefix=CORTEXA8
+; RUN: llc < %s -mtriple=arm-eabi -mcpu=cortex-a8 --enable-unsafe-fp-math | FileCheck %s -check-prefix=CORTEXA8U
+; RUN: llc < %s -mtriple=arm-darwin -mcpu=cortex-a8 | FileCheck %s -check-prefix=CORTEXA8U
 ; RUN: llc < %s -march=arm -mcpu=cortex-a9 | FileCheck %s -check-prefix=CORTEXA9
 
 define float @test(float %a, float %b) {
@@ -18,9 +20,11 @@ entry:
 ; NFP0: 	vmul.f32	s
 
 ; CORTEXA8: test:
-; CORTEXA8: 	vmul.f32	d
+; CORTEXA8: 	vmul.f32	s
+; CORTEXA8U: test:
+; CORTEXA8U: 	vmul.f32	d
 ; CORTEXA9: test:
-; CORTEXA9: 	vmul.f32	s{{.}}, s{{.}}, s{{.}}
+; CORTEXA9: 	vmul.f32	s
 
 ; VFP2: test2
 define float @test2(float %a) nounwind {
diff --git a/test/CodeGen/ARM/fnegs.ll b/test/CodeGen/ARM/fnegs.ll
index 418b59803d30..d84690ba4e4b 100644
--- a/test/CodeGen/ARM/fnegs.ll
+++ b/test/CodeGen/ARM/fnegs.ll
@@ -1,6 +1,8 @@
 ; RUN: llc < %s -march=arm -mattr=+vfp2 | FileCheck %s -check-prefix=VFP2
 ; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s -check-prefix=NFP0
-; RUN: llc < %s -march=arm -mcpu=cortex-a8 | FileCheck %s -check-prefix=CORTEXA8
+; RUN: llc < %s -mtriple=arm-eabi -mcpu=cortex-a8 | FileCheck %s -check-prefix=CORTEXA8
+; RUN: llc < %s -mtriple=arm-eabi -mcpu=cortex-a8 --enable-unsafe-fp-math | FileCheck %s -check-prefix=CORTEXA8U
+; RUN: llc < %s -mtriple=arm-darwin -mcpu=cortex-a8 | FileCheck %s -check-prefix=CORTEXA8U
 ; RUN: llc < %s -march=arm -mcpu=cortex-a9 | FileCheck %s -check-prefix=CORTEXA9
 
 define float @test1(float* %a) {
@@ -22,7 +24,10 @@ entry:
 ; NFP0: 	vneg.f32	s{{.*}}, s{{.*}}
 
 ; CORTEXA8: test1:
-; CORTEXA8: 	vneg.f32	d{{.*}}, d{{.*}}
+; CORTEXA8: 	vneg.f32	s{{.*}}, s{{.*}}
+
+; CORTEXA8U: test1:
+; CORTEXA8U: 	vneg.f32	d{{.*}}, d{{.*}}
 
 ; CORTEXA9: test1:
 ; CORTEXA9: 	vneg.f32	s{{.*}}, s{{.*}}
@@ -46,7 +51,10 @@ entry:
 ; NFP0: 	vneg.f32	s{{.*}}, s{{.*}}
 
 ; CORTEXA8: test2:
-; CORTEXA8: 	vneg.f32	d{{.*}}, d{{.*}}
+; CORTEXA8: 	vneg.f32	s{{.*}}, s{{.*}}
+
+; CORTEXA8U: test2:
+; CORTEXA8U: 	vneg.f32	d{{.*}}, d{{.*}}
 
 ; CORTEXA9: test2:
 ; CORTEXA9: 	vneg.f32	s{{.*}}, s{{.*}}
diff --git a/test/CodeGen/ARM/fnmscs.ll b/test/CodeGen/ARM/fnmscs.ll
index 6081712829a2..c30806173428 100644
--- a/test/CodeGen/ARM/fnmscs.ll
+++ b/test/CodeGen/ARM/fnmscs.ll
@@ -1,7 +1,9 @@
 ; RUN: llc < %s -march=arm -mattr=+vfp2 | FileCheck %s -check-prefix=VFP2
 ; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s -check-prefix=NEON
-; RUN: llc < %s -march=arm -mcpu=cortex-a8 | FileCheck %s -check-prefix=A8
-; RUN: llc < %s -march=arm -mcpu=cortex-a8 -regalloc=basic | FileCheck %s -check-prefix=A8
+; RUN: llc < %s -mtriple=arm-eabi -mcpu=cortex-a8 | FileCheck %s -check-prefix=A8
+; RUN: llc < %s -mtriple=arm-eabi -mcpu=cortex-a8 -regalloc=basic | FileCheck %s -check-prefix=A8
+; RUN: llc < %s -mtriple=arm-eabi -mcpu=cortex-a8 --enable-unsafe-fp-math | FileCheck %s -check-prefix=A8U
+; RUN: llc < %s -mtriple=arm-darwin -mcpu=cortex-a8 | FileCheck %s -check-prefix=A8U
 
 define float @t1(float %acc, float %a, float %b) nounwind {
 entry:
@@ -11,9 +13,13 @@ entry:
 ; NEON: t1:
 ; NEON: vnmla.f32
 
+; A8U: t1:
+; A8U: vnmul.f32 s{{[0-9]}}, s{{[0-9]}}, s{{[0-9]}}
+; A8U: vsub.f32 d{{[0-9]}}, d{{[0-9]}}, d{{[0-9]}}
+
 ; A8: t1:
 ; A8: vnmul.f32 s{{[0-9]}}, s{{[0-9]}}, s{{[0-9]}}
-; A8: vsub.f32 d{{[0-9]}}, d{{[0-9]}}, d{{[0-9]}}
+; A8: vsub.f32 s{{[0-9]}}, s{{[0-9]}}, s{{[0-9]}}
 	%0 = fmul float %a, %b
 	%1 = fsub float -0.0, %0
         %2 = fsub float %1, %acc
@@ -28,9 +34,13 @@ entry:
 ; NEON: t2:
 ; NEON: vnmla.f32
 
+; A8U: t2:
+; A8U: vnmul.f32 s{{[01234]}}, s{{[01234]}}, s{{[01234]}}
+; A8U: vsub.f32 d{{[0-9]}}, d{{[0-9]}}, d{{[0-9]}}
+
 ; A8: t2:
 ; A8: vnmul.f32 s{{[01234]}}, s{{[01234]}}, s{{[01234]}}
-; A8: vsub.f32 d{{[0-9]}}, d{{[0-9]}}, d{{[0-9]}}
+; A8: vsub.f32 s{{[0-9]}}, s{{[0-9]}}, s{{[0-9]}}
 	%0 = fmul float %a, %b
 	%1 = fmul float -1.0, %0
         %2 = fsub float %1, %acc
@@ -45,9 +55,13 @@ entry:
 ; NEON: t3:
 ; NEON: vnmla.f64
 
+; A8U: t3:
+; A8U: vnmul.f64 d
+; A8U: vsub.f64 d
+
 ; A8: t3:
-; A8: vnmul.f64 d1{{[67]}}, d1{{[67]}}, d1{{[67]}}
-; A8: vsub.f64 d1{{[67]}}, d1{{[67]}}, d1{{[67]}}
+; A8: vnmul.f64 d
+; A8: vsub.f64 d
 	%0 = fmul double %a, %b
 	%1 = fsub double -0.0, %0
         %2 = fsub double %1, %acc
@@ -62,9 +76,13 @@ entry:
 ; NEON: t4:
 ; NEON: vnmla.f64
 
+; A8U: t4:
+; A8U: vnmul.f64 d
+; A8U: vsub.f64 d
+
 ; A8: t4:
-; A8: vnmul.f64 d1{{[67]}}, d1{{[67]}}, d1{{[67]}}
-; A8: vsub.f64 d1{{[67]}}, d1{{[67]}}, d1{{[67]}}
+; A8: vnmul.f64 d
+; A8: vsub.f64 d
 	%0 = fmul double %a, %b
 	%1 = fmul double -1.0, %0
         %2 = fsub double %1, %acc
diff --git a/test/CodeGen/ARM/fp_convert.ll b/test/CodeGen/ARM/fp_convert.ll
index 44298b9c5d8d..3c47eb580ff1 100644
--- a/test/CodeGen/ARM/fp_convert.ll
+++ b/test/CodeGen/ARM/fp_convert.ll
@@ -1,6 +1,8 @@
 ; RUN: llc < %s -march=arm -mattr=+vfp2 | FileCheck %s -check-prefix=VFP2
 ; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s -check-prefix=VFP2
-; RUN: llc < %s -march=arm -mcpu=cortex-a8 | FileCheck %s -check-prefix=NEON
+; RUN: llc < %s -mtriple=arm-eabi -mcpu=cortex-a8 | FileCheck %s -check-prefix=VFP2
+; RUN: llc < %s -mtriple=arm-eabi -mcpu=cortex-a8 --enable-unsafe-fp-math | FileCheck %s -check-prefix=NEON
+; RUN: llc < %s -mtriple=arm-darwin -mcpu=cortex-a8 | FileCheck %s -check-prefix=NEON
 ; RUN: llc < %s -march=arm -mcpu=cortex-a9 | FileCheck %s -check-prefix=VFP2
 
 define i32 @test1(float %a, float %b) {
diff --git a/test/CodeGen/ARM/fsubs.ll b/test/CodeGen/ARM/fsubs.ll
index f039e74c8ee6..617b01881a2e 100644
--- a/test/CodeGen/ARM/fsubs.ll
+++ b/test/CodeGen/ARM/fsubs.ll
@@ -1,5 +1,7 @@
 ; RUN: llc < %s -march=arm -mattr=+vfp2 | FileCheck %s -check-prefix=VFP2
-; RUN: llc < %s -march=arm -mcpu=cortex-a8 | FileCheck %s -check-prefix=NFP1
+; RUN: llc < %s -mtriple=arm-eabi -mcpu=cortex-a8 | FileCheck %s -check-prefix=NFP1
+; RUN: llc < %s -mtriple=arm-eabi -mcpu=cortex-a8 --enable-unsafe-fp-math | FileCheck %s -check-prefix=NFP1U
+; RUN: llc < %s -mtriple=arm-darwin -mcpu=cortex-a8 | FileCheck %s -check-prefix=NFP1U
 ; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s -check-prefix=NFP0
 
 define float @test(float %a, float %b) {
@@ -9,5 +11,6 @@ entry:
 }
 
 ; VFP2: vsub.f32	s
-; NFP1: vsub.f32	d
+; NFP1U: vsub.f32	d
+; NFP1: vsub.f32	s
 ; NFP0: vsub.f32	s
diff --git a/test/CodeGen/ARM/global-merge-addrspace.ll b/test/CodeGen/ARM/global-merge-addrspace.ll
new file mode 100644
index 000000000000..0efa690bde28
--- /dev/null
+++ b/test/CodeGen/ARM/global-merge-addrspace.ll
@@ -0,0 +1,12 @@
+; RUN: llc < %s -mtriple=thumb-apple-darwin | FileCheck %s
+; Test the GlobalMerge pass. Check that the pass does not crash when using
+; multiple address spaces.
+
+; CHECK: _MergedGlobals:
+@g1 = internal addrspace(1) global i32 1
+@g2 = internal addrspace(1) global i32 2
+
+
+; CHECK: _MergedGlobals1:
+@g3 = internal addrspace(2) global i32 3
+@g4 = internal addrspace(2) global i32 4
diff --git a/test/CodeGen/ARM/global-merge.ll b/test/CodeGen/ARM/global-merge.ll
index 1732df3fa5ef..f88e92796196 100644
--- a/test/CodeGen/ARM/global-merge.ll
+++ b/test/CodeGen/ARM/global-merge.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -mtriple=thumb-apple-darwin | FileCheck %s
+; RUN: llc < %s -mtriple=thumb-apple-darwin -global-merge-on-const=true | FileCheck %s
 ; Test the ARMGlobalMerge pass.  Use -march=thumb because it has a small
 ; value for the maximum offset (127).
 
@@ -6,6 +6,52 @@
 ; CHECK: g0:
 @g0 = internal global [32 x i32] [ i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 1, i32 2 ]
 
+; Global variables marked with "used" attribute must be kept
+; CHECK: g8
+@g8 = internal global i32 0
+@llvm.used = appending global [1 x i8*] [i8* bitcast (i32* @g8 to i8*)], section "llvm.metadata"
+
+; Global used in landing pad instruction must be kept
+; CHECK: ZTIi
+@_ZTIi = internal global i8* null
+
+define i32 @_Z9exceptioni(i32 %arg) {
+bb:
+  %tmp = invoke i32 @_Z14throwSomethingi(i32 %arg)
+          to label %bb9 unwind label %bb1
+
+bb1:                                              ; preds = %bb
+  %tmp2 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_sj0 to i8*)
+          catch i8* bitcast (i8** @_ZTIi to i8*)
+  %tmp3 = extractvalue { i8*, i32 } %tmp2, 1
+  %tmp4 = tail call i32 @llvm.eh.typeid.for(i8* bitcast (i8** @_ZTIi to i8*))
+  %tmp5 = icmp eq i32 %tmp3, %tmp4
+  br i1 %tmp5, label %bb6, label %bb10
+
+bb6:                                              ; preds = %bb1
+  %tmp7 = extractvalue { i8*, i32 } %tmp2, 0
+  %tmp8 = tail call i8* @__cxa_begin_catch(i8* %tmp7)
+  tail call void @__cxa_end_catch()
+  br label %bb9
+
+bb9:                                              ; preds = %bb6, %bb
+  %res.0 = phi i32 [ 0, %bb6 ], [ %tmp, %bb ]
+  ret i32 %res.0
+
+bb10:                                             ; preds = %bb1
+  resume { i8*, i32 } %tmp2
+}
+
+declare i32 @_Z14throwSomethingi(i32)
+
+declare i32 @__gxx_personality_sj0(...)
+
+declare i32 @llvm.eh.typeid.for(i8*)
+
+declare i8* @__cxa_begin_catch(i8*)
+
+declare void @__cxa_end_catch()
+
 ; CHECK: _MergedGlobals:
 @g1 = internal global i32 1
 @g2 = internal global i32 2
@@ -21,3 +67,8 @@
 ; CHECK: _MergedGlobals2
 @g4 = internal global i32 0
 @g5 = internal global i32 0
+
+; Global variables that are constant can be merged together
+; CHECK: _MergedGlobals3
+@g6 = internal constant [12 x i32] zeroinitializer, align 4
+@g7 = internal constant [12 x i32] zeroinitializer, align 4
diff --git a/test/CodeGen/ARM/indirect-reg-input.ll b/test/CodeGen/ARM/indirect-reg-input.ll
new file mode 100644
index 000000000000..86728fa61934
--- /dev/null
+++ b/test/CodeGen/ARM/indirect-reg-input.ll
@@ -0,0 +1,14 @@
+; RUN: llc < %s -march=arm -mcpu=cortex-a8 2>&1 | FileCheck %s
+
+; Check for error message:
+; CHECK: error: inline asm not supported yet: don't know how to handle tied indirect register inputs
+
+%struct.my_stack = type { %struct.myjmp_buf }
+%struct.myjmp_buf = type { [6 x i32] }
+
+define void @switch_to_stack(%struct.my_stack* %stack) nounwind {
+entry:
+  %regs = getelementptr inbounds %struct.my_stack* %stack, i32 0, i32 0
+  tail call void asm "\0A", "=*r,*0"(%struct.myjmp_buf* %regs, %struct.myjmp_buf* %regs)
+  ret void
+}
diff --git a/test/CodeGen/ARM/inlineasm-64bit.ll b/test/CodeGen/ARM/inlineasm-64bit.ll
new file mode 100644
index 000000000000..be5eb8157317
--- /dev/null
+++ b/test/CodeGen/ARM/inlineasm-64bit.ll
@@ -0,0 +1,54 @@
+; RUN: llc < %s -O3  -mtriple=arm-linux-gnueabi | FileCheck %s
+
+; check if regs are passing correctly
+define void @i64_write(i64* %p, i64 %val) nounwind {
+; CHECK: i64_write:
+; CHECK: ldrexd [[REG1:(r[0-9]?[02468])]], {{r[0-9]?[13579]}}, [r{{[0-9]+}}]
+; CHECK: strexd [[REG1]], {{r[0-9]?[02468]}}, {{r[0-9]?[13579]}}
+  %1 = tail call i64 asm sideeffect "1: ldrexd $0, ${0:H}, [$2]\0A strexd $0, $3, ${3:H}, [$2]\0A teq $0, #0\0A bne 1b", "=&r,=*Qo,r,r,~{cc}"(i64* %p, i64* %p, i64 %val) nounwind
+  ret void
+}
+
+; check if register allocation can reuse the registers
+define void @multi_writes(i64* %p, i64 %val1, i64 %val2, i64 %val3, i64 %val4, i64 %val5, i64 %val6) nounwind {
+entry:
+; CHECK: multi_writes:
+; check: strexd {{r[0-9]?[02468]}}, {{r[0-9]?[13579]}}, [r{{[0-9]+}}]
+; check: strexd {{r[0-9]?[02468]}}, {{r[0-9]?[13579]}}, [r{{[0-9]+}}]
+; check: strexd {{r[0-9]?[02468]}}, {{r[0-9]?[13579]}}, [r{{[0-9]+}}]
+; check: strexd {{r[0-9]?[02468]}}, {{r[0-9]?[13579]}}, [r{{[0-9]+}}]
+; check: strexd {{r[0-9]?[02468]}}, {{r[0-9]?[13579]}}, [r{{[0-9]+}}]
+; check: strexd {{r[0-9]?[02468]}}, {{r[0-9]?[13579]}}, [r{{[0-9]+}}]
+
+; check: strexd {{r[0-9]?[02468]}}, {{r[0-9]?[13579]}}, [r{{[0-9]+}}]
+; check: strexd {{r[0-9]?[02468]}}, {{r[0-9]?[13579]}}, [r{{[0-9]+}}]
+; check: strexd {{r[0-9]?[02468]}}, {{r[0-9]?[13579]}}, [r{{[0-9]+}}]
+; check: strexd {{r[0-9]?[02468]}}, {{r[0-9]?[13579]}}, [r{{[0-9]+}}]
+; check: strexd {{r[0-9]?[02468]}}, {{r[0-9]?[13579]}}, [r{{[0-9]+}}]
+; check: strexd {{r[0-9]?[02468]}}, {{r[0-9]?[13579]}}, [r{{[0-9]+}}]
+
+; check: strexd {{r[0-9]?[02468]}}, {{r[0-9]?[13579]}}, [r{{[0-9]+}}]
+; check: strexd {{r[0-9]?[02468]}}, {{r[0-9]?[13579]}}, [r{{[0-9]+}}]
+; check: strexd {{r[0-9]?[02468]}}, {{r[0-9]?[13579]}}, [r{{[0-9]+}}]
+; check: strexd {{r[0-9]?[02468]}}, {{r[0-9]?[13579]}}, [r{{[0-9]+}}]
+; check: strexd {{r[0-9]?[02468]}}, {{r[0-9]?[13579]}}, [r{{[0-9]+}}]
+; check: strexd {{r[0-9]?[02468]}}, {{r[0-9]?[13579]}}, [r{{[0-9]+}}]
+
+  tail call void asm sideeffect " strexd $1, ${1:H}, [$0]\0A strexd $2, ${2:H}, [$0]\0A strexd $3, ${3:H}, [$0]\0A strexd $4, ${4:H}, [$0]\0A strexd $5, ${5:H}, [$0]\0A strexd $6, ${6:H}, [$0]\0A", "r,r,r,r,r,r,r"(i64* %p, i64 %val1, i64 %val2, i64 %val3, i64 %val4, i64 %val5, i64 %val6) nounwind
+  %incdec.ptr = getelementptr inbounds i64* %p, i32 1
+  tail call void asm sideeffect " strexd $1, ${1:H}, [$0]\0A strexd $2, ${2:H}, [$0]\0A strexd $3, ${3:H}, [$0]\0A strexd $4, ${4:H}, [$0]\0A strexd $5, ${5:H}, [$0]\0A strexd $6, ${6:H}, [$0]\0A", "r,r,r,r,r,r,r"(i64* %incdec.ptr, i64 %val1, i64 %val2, i64 %val3, i64 %val4, i64 %val5, i64 %val6) nounwind
+  tail call void asm sideeffect " strexd $1, ${1:H}, [$0]\0A strexd $2, ${2:H}, [$0]\0A strexd $3, ${3:H}, [$0]\0A strexd $4, ${4:H}, [$0]\0A strexd $5, ${5:H}, [$0]\0A strexd $6, ${6:H}, [$0]\0A", "r,r,r,r,r,r,r"(i64* %incdec.ptr, i64 %val1, i64 %val2, i64 %val3, i64 %val4, i64 %val5, i64 %val6) nounwind
+  ret void
+}
+
+
+; check if callee-saved registers used by inline asm are saved/restored
+define void @foo(i64* %p, i64 %i) nounwind {
+; CHECK:foo:
+; CHECK: push {{{r[4-9]|r10|r11}}
+; CHECK: ldrexd [[REG1:(r[0-9]?[02468])]], {{r[0-9]?[13579]}}, [r{{[0-9]+}}]
+; CHECK: strexd [[REG1]], {{r[0-9]?[02468]}}, {{r[0-9]?[13579]}}
+; CHECK: pop {{{r[4-9]|r10|r11}}
+  %1 = tail call { i64, i64 } asm sideeffect "@ atomic64_set\0A1: ldrexd $0, ${0:H}, [$3]\0Aldrexd $1, ${1:H}, [$3]\0A strexd $0, $4, ${4:H}, [$3]\0A teq $0, #0\0A bne 1b", "=&r,=&r,=*Qo,r,r,~{cc}"(i64* %p, i64* %p, i64 %i) nounwind
+  ret void
+}
diff --git a/test/CodeGen/ARM/inlineasm3.ll b/test/CodeGen/ARM/inlineasm3.ll
index 2fcc45f4af9c..390a44e375b9 100644
--- a/test/CodeGen/ARM/inlineasm3.ll
+++ b/test/CodeGen/ARM/inlineasm3.ll
@@ -30,7 +30,7 @@ entry:
 
 define hidden void @conv4_8_E() nounwind {
 entry:
-%asmtmp31 = call %0 asm "vld1.u8  {$0}, [$1, :128]!\0A", "=w,=r,1"(<16 x i8>* undef) nounwind
+%asmtmp31 = call %0 asm "vld1.u8  {$0}, [$1:128]!\0A", "=w,=r,1"(<16 x i8>* undef) nounwind
 unreachable
 }
 
diff --git a/test/CodeGen/ARM/invoke-donothing-assert.ll b/test/CodeGen/ARM/invoke-donothing-assert.ll
new file mode 100644
index 000000000000..0b607f7edf38
--- /dev/null
+++ b/test/CodeGen/ARM/invoke-donothing-assert.ll
@@ -0,0 +1,73 @@
+; RUN: llc < %s -mtriple=armv7-apple-ios | FileCheck %s
+; This testcase makes sure we can handle invoke @llvm.donothing without
+; assertion failure.
+; <rdar://problem/13228754> & <rdar://problem/13316637>
+
+; CHECK: .globl  _foo
+define void @foo() {
+invoke.cont:
+  invoke void @callA() 
+          to label %invoke.cont25 unwind label %lpad2
+invoke.cont25:
+  invoke void @llvm.donothing()
+          to label %invoke.cont27 unwind label %lpad15
+
+invoke.cont27:
+  invoke void @callB()
+          to label %invoke.cont75 unwind label %lpad15
+
+invoke.cont75:
+  ret void
+
+lpad2:
+  %0 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_sj0 to i8*)
+          cleanup
+  br label %eh.resume
+
+lpad15:
+  %1 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_sj0 to i8*)
+          cleanup
+  br label %eh.resume
+
+eh.resume:
+  resume { i8*, i32 } zeroinitializer
+}
+
+; CHECK: .globl _bar
+define linkonce_odr void @bar(i32* %a) {
+if.end.i.i.i:
+  invoke void @llvm.donothing()
+          to label %call.i.i.i.noexc unwind label %eh.resume
+
+call.i.i.i.noexc:
+  br i1 false, label %cleanup, label %new.notnull.i.i
+
+new.notnull.i.i:
+  br label %cleanup
+
+cleanup:
+  %0 = load i32* %a, align 4
+  %inc294 = add nsw i32 %0, 4
+  store i32 %inc294, i32* %a, align 4
+  br i1 false, label %_ZN3lol5ArrayIivvvvvvvED1Ev.exit, label %delete.notnull.i.i.i1409
+
+delete.notnull.i.i.i1409:
+  br label %_ZN3lol5ArrayIivvvvvvvED1Ev.exit
+
+_ZN3lol5ArrayIivvvvvvvED1Ev.exit:
+  ret void
+
+eh.resume:
+  %1 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_sj0 to i8*)
+          cleanup
+  %2 = extractvalue { i8*, i32 } %1, 0
+  %3 = extractvalue { i8*, i32 } %1, 1
+  %lpad.val = insertvalue { i8*, i32 } undef, i8* %2, 0
+  %lpad.val395 = insertvalue { i8*, i32 } %lpad.val, i32 %3, 1
+  resume { i8*, i32 } %lpad.val395
+}
+
+declare void @callA()
+declare void @callB()
+declare void @llvm.donothing() nounwind readnone
+declare i32 @__gxx_personality_sj0(...)
diff --git a/test/CodeGen/ARM/lit.local.cfg b/test/CodeGen/ARM/lit.local.cfg
index cb77b09ef4ad..4d75f581a1d2 100644
--- a/test/CodeGen/ARM/lit.local.cfg
+++ b/test/CodeGen/ARM/lit.local.cfg
@@ -1,4 +1,4 @@
-config.suffixes = ['.ll', '.c', '.cpp']
+config.suffixes = ['.ll', '.c', '.cpp', '.test']
 
 targets = set(config.root.targets_to_build.split())
 if not 'ARM' in targets:
diff --git a/test/CodeGen/ARM/lsr-icmp-imm.ll b/test/CodeGen/ARM/lsr-icmp-imm.ll
index 5283f5747d96..248c4bd1beea 100644
--- a/test/CodeGen/ARM/lsr-icmp-imm.ll
+++ b/test/CodeGen/ARM/lsr-icmp-imm.ll
@@ -1,5 +1,5 @@
-; RUN: llc -mtriple=thumbv7-apple-ios -disable-code-place < %s | FileCheck %s
-; RUN: llc -mtriple=armv7-apple-ios   -disable-code-place < %s | FileCheck %s
+; RUN: llc -mtriple=thumbv7-apple-ios -disable-block-placement < %s | FileCheck %s
+; RUN: llc -mtriple=armv7-apple-ios   -disable-block-placement < %s | FileCheck %s
 
 ; LSR should compare against the post-incremented induction variable.
 ; In this case, the immediate value is -2 which requires a cmn instruction.
diff --git a/test/CodeGen/ARM/machine-cse-cmp.ll b/test/CodeGen/ARM/machine-cse-cmp.ll
index 3ac7d77d6f79..03abd762a261 100644
--- a/test/CodeGen/ARM/machine-cse-cmp.ll
+++ b/test/CodeGen/ARM/machine-cse-cmp.ll
@@ -45,3 +45,35 @@ for.cond1.preheader:                              ; preds = %entry
 }
 
 declare void @llvm.memset.p0i8.i32(i8* nocapture, i8, i32, i32, i1) nounwind
+
+; rdar://12462006
+define i8* @f3(i8* %base, i32* nocapture %offset, i32 %size) nounwind {
+entry:
+; CHECK: f3:
+; CHECK-NOT: sub
+; CHECK: cmp
+; CHECK: blt
+%0 = load i32* %offset, align 4
+%cmp = icmp slt i32 %0, %size
+%s = sub nsw i32 %0, %size
+%size2 = sub nsw i32 %size, 0
+br i1 %cmp, label %return, label %if.end
+
+if.end:
+; We are checking cse between %sub here and %s in entry block.
+%sub = sub nsw i32 %0, %size2
+%s2 = sub nsw i32 %s, %size
+%s3 = sub nsw i32 %sub, %s2
+; CHECK: sub [[R1:r[0-9]+]], [[R2:r[0-9]+]], r2
+; CHECK: sub [[R3:r[0-9]+]], [[R1]], r2
+; CHECK: sub [[R4:r[0-9]+]], [[R1]], [[R3]]
+; CHECK-NOT: sub
+; CHECK: str
+store i32 %s3, i32* %offset, align 4
+%add.ptr = getelementptr inbounds i8* %base, i32 %sub
+br label %return
+
+return:
+%retval.0 = phi i8* [ %add.ptr, %if.end ], [ null, %entry ]
+ret i8* %retval.0
+}
diff --git a/test/CodeGen/ARM/memcpy-inline.ll b/test/CodeGen/ARM/memcpy-inline.ll
index dc772827f270..d846e5cb268b 100644
--- a/test/CodeGen/ARM/memcpy-inline.ll
+++ b/test/CodeGen/ARM/memcpy-inline.ll
@@ -1,18 +1,115 @@
-; RUN: llc < %s -mtriple=thumbv7-apple-darwin -disable-post-ra | FileCheck %s
-
-; CHECK: ldrd
-; CHECK: strd
-; CHECK: ldrb
+; RUN: llc < %s -mtriple=thumbv7-apple-ios -mcpu=cortex-a8 -pre-RA-sched=source -disable-post-ra | FileCheck %s
 
 %struct.x = type { i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8 }
 
 @src = external global %struct.x
 @dst = external global %struct.x
 
-define i32 @t() {
+@.str1 = private unnamed_addr constant [31 x i8] c"DHRYSTONE PROGRAM, SOME STRING\00", align 1
+@.str2 = private unnamed_addr constant [36 x i8] c"DHRYSTONE PROGRAM, SOME STRING BLAH\00", align 1
+@.str3 = private unnamed_addr constant [24 x i8] c"DHRYSTONE PROGRAM, SOME\00", align 1
+@.str4 = private unnamed_addr constant [18 x i8] c"DHRYSTONE PROGR  \00", align 1
+@.str5 = private unnamed_addr constant [7 x i8] c"DHRYST\00", align 1
+@.str6 = private unnamed_addr constant [14 x i8] c"/tmp/rmXXXXXX\00", align 1
+@spool.splbuf = internal global [512 x i8] zeroinitializer, align 16
+
+define i32 @t0() {
 entry:
+; CHECK: t0:
+; CHECK: vldr [[REG1:d[0-9]+]],
+; CHECK: vstr [[REG1]], 
   call void @llvm.memcpy.p0i8.p0i8.i32(i8* getelementptr inbounds (%struct.x* @dst, i32 0, i32 0), i8* getelementptr inbounds (%struct.x* @src, i32 0, i32 0), i32 11, i32 8, i1 false)
   ret i32 0
 }
 
+define void @t1(i8* nocapture %C) nounwind {
+entry:
+; CHECK: t1:
+; CHECK: vld1.8 {d{{[0-9]+}}, d{{[0-9]+}}}, [r1]
+; CHECK: vst1.8 {d{{[0-9]+}}, d{{[0-9]+}}}, [r0]
+; CHECK: adds r0, #15
+; CHECK: adds r1, #15
+; CHECK: vld1.8 {d{{[0-9]+}}, d{{[0-9]+}}}, [r1]
+; CHECK: vst1.8 {d{{[0-9]+}}, d{{[0-9]+}}}, [r0]
+  tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %C, i8* getelementptr inbounds ([31 x i8]* @.str1, i64 0, i64 0), i64 31, i32 1, i1 false)
+  ret void
+}
+
+define void @t2(i8* nocapture %C) nounwind {
+entry:
+; CHECK: t2:
+; CHECK: ldr [[REG2:r[0-9]+]], [r1, #32]
+; CHECK: str [[REG2]], [r0, #32]
+; CHECK: vld1.8 {d{{[0-9]+}}, d{{[0-9]+}}}, [r1]
+; CHECK: vst1.8 {d{{[0-9]+}}, d{{[0-9]+}}}, [r0]
+; CHECK: adds r0, #16
+; CHECK: adds r1, #16
+; CHECK: vld1.8 {d{{[0-9]+}}, d{{[0-9]+}}}, [r1]
+; CHECK: vst1.8 {d{{[0-9]+}}, d{{[0-9]+}}}, [r0]
+  tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %C, i8* getelementptr inbounds ([36 x i8]* @.str2, i64 0, i64 0), i64 36, i32 1, i1 false)
+  ret void
+}
+
+define void @t3(i8* nocapture %C) nounwind {
+entry:
+; CHECK: t3:
+; CHECK: vld1.8 {d{{[0-9]+}}, d{{[0-9]+}}}, [r1]
+; CHECK: vst1.8 {d{{[0-9]+}}, d{{[0-9]+}}}, [r0]
+; CHECK: adds r0, #16
+; CHECK: adds r1, #16
+; CHECK: vld1.8 {d{{[0-9]+}}}, [r1]
+; CHECK: vst1.8 {d{{[0-9]+}}}, [r0]
+  tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %C, i8* getelementptr inbounds ([24 x i8]* @.str3, i64 0, i64 0), i64 24, i32 1, i1 false)
+  ret void
+}
+
+define void @t4(i8* nocapture %C) nounwind {
+entry:
+; CHECK: t4:
+; CHECK: vld1.8 {[[REG3:d[0-9]+]], [[REG4:d[0-9]+]]}, [r1]
+; CHECK: vst1.8 {[[REG3]], [[REG4]]}, [r0]
+  tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %C, i8* getelementptr inbounds ([18 x i8]* @.str4, i64 0, i64 0), i64 18, i32 1, i1 false)
+  ret void
+}
+
+define void @t5(i8* nocapture %C) nounwind {
+entry:
+; CHECK: t5:
+; CHECK: movs [[REG5:r[0-9]+]], #0
+; CHECK: strb [[REG5]], [r0, #6]
+; CHECK: movw [[REG6:r[0-9]+]], #21587
+; CHECK: strh [[REG6]], [r0, #4]
+; CHECK: ldr [[REG7:r[0-9]+]], 
+; CHECK: str [[REG7]]
+  tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %C, i8* getelementptr inbounds ([7 x i8]* @.str5, i64 0, i64 0), i64 7, i32 1, i1 false)
+  ret void
+}
+
+define void @t6() nounwind {
+entry:
+; CHECK: t6:
+; CHECK: vld1.8 {[[REG8:d[0-9]+]]}, [r0]
+; CHECK: vstr [[REG8]], [r1]
+; CHECK: adds r1, #6
+; CHECK: adds r0, #6
+; CHECK: vld1.8
+; CHECK: vst1.16
+  call void @llvm.memcpy.p0i8.p0i8.i64(i8* getelementptr inbounds ([512 x i8]* @spool.splbuf, i64 0, i64 0), i8* getelementptr inbounds ([14 x i8]* @.str6, i64 0, i64 0), i64 14, i32 1, i1 false)
+  ret void
+}
+
+%struct.Foo = type { i32, i32, i32, i32 }
+
+define void @t7(%struct.Foo* nocapture %a, %struct.Foo* nocapture %b) nounwind {
+entry:
+; CHECK: t7
+; CHECK: vld1.32
+; CHECK: vst1.32
+  %0 = bitcast %struct.Foo* %a to i8*
+  %1 = bitcast %struct.Foo* %b to i8*
+  tail call void @llvm.memcpy.p0i8.p0i8.i32(i8* %0, i8* %1, i32 16, i32 4, i1 false)
+  ret void
+}
+
 declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i32, i1) nounwind
+declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i32, i1) nounwind
diff --git a/test/CodeGen/ARM/memset-inline.ll b/test/CodeGen/ARM/memset-inline.ll
new file mode 100644
index 000000000000..ee8c36433885
--- /dev/null
+++ b/test/CodeGen/ARM/memset-inline.ll
@@ -0,0 +1,30 @@
+; RUN: llc < %s -mtriple=thumbv7-apple-ios -mcpu=cortex-a8 -pre-RA-sched=source -disable-post-ra | FileCheck %s
+
+define void @t1(i8* nocapture %c) nounwind optsize {
+entry:
+; CHECK: t1:
+; CHECK: movs r1, #0
+; CHECK: str r1, [r0]
+; CHECK: str r1, [r0, #4]
+; CHECK: str r1, [r0, #8]
+  call void @llvm.memset.p0i8.i64(i8* %c, i8 0, i64 12, i32 8, i1 false)
+  ret void
+}
+
+define void @t2() nounwind ssp {
+entry:
+; CHECK: t2:
+; CHECK: add.w r1, r0, #10
+; CHECK: vmov.i32 {{q[0-9]+}}, #0x0
+; CHECK: vst1.16 {d{{[0-9]+}}, d{{[0-9]+}}}, [r1]
+; CHECK: vst1.32 {d{{[0-9]+}}, d{{[0-9]+}}}, [r0]
+  %buf = alloca [26 x i8], align 1
+  %0 = getelementptr inbounds [26 x i8]* %buf, i32 0, i32 0
+  call void @llvm.memset.p0i8.i32(i8* %0, i8 0, i32 26, i32 1, i1 false)
+  call void @something(i8* %0) nounwind
+  ret void
+}
+
+declare void @something(i8*) nounwind
+declare void @llvm.memset.p0i8.i32(i8* nocapture, i8, i32, i32, i1) nounwind
+declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i32, i1) nounwind
diff --git a/test/CodeGen/ARM/neon-spfp.ll b/test/CodeGen/ARM/neon-spfp.ll
new file mode 100644
index 000000000000..c00f0d17c9f5
--- /dev/null
+++ b/test/CodeGen/ARM/neon-spfp.ll
@@ -0,0 +1,76 @@
+; RUN: llc < %s -mtriple armv7a-none-linux-gnueabihf -mcpu=cortex-a5 | FileCheck %s -check-prefix=LINUXA5
+; RUN: llc < %s -mtriple armv7a-none-linux-gnueabihf -mcpu=cortex-a8 | FileCheck %s -check-prefix=LINUXA8
+; RUN: llc < %s -mtriple armv7a-none-linux-gnueabihf -mcpu=cortex-a9 | FileCheck %s -check-prefix=LINUXA9
+; RUN: llc < %s -mtriple armv7a-none-linux-gnueabihf -mcpu=cortex-a15 | FileCheck %s -check-prefix=LINUXA15
+; RUN: llc < %s -mtriple armv7a-none-linux-gnueabihf -mcpu=swift | FileCheck %s -check-prefix=LINUXSWIFT
+
+; RUN: llc < %s -mtriple armv7a-none-linux-gnueabihf -mcpu=cortex-a5 --enable-unsafe-fp-math | FileCheck %s -check-prefix=UNSAFEA5
+; RUN: llc < %s -mtriple armv7a-none-linux-gnueabihf -mcpu=cortex-a8 --enable-unsafe-fp-math | FileCheck %s -check-prefix=UNSAFEA8
+; RUN: llc < %s -mtriple armv7a-none-linux-gnueabihf -mcpu=cortex-a9 --enable-unsafe-fp-math | FileCheck %s -check-prefix=UNSAFEA9
+; RUN: llc < %s -mtriple armv7a-none-linux-gnueabihf -mcpu=cortex-a15 --enable-unsafe-fp-math | FileCheck %s -check-prefix=UNSAFEA15
+; RUN: llc < %s -mtriple armv7a-none-linux-gnueabihf -mcpu=swift --enable-unsafe-fp-math | FileCheck %s -check-prefix=UNSAFESWIFT
+
+; RUN: llc < %s -mtriple armv7a-none-darwin -mcpu=cortex-a5 | FileCheck %s -check-prefix=DARWINA5
+; RUN: llc < %s -mtriple armv7a-none-darwin -mcpu=cortex-a8 | FileCheck %s -check-prefix=DARWINA8
+; RUN: llc < %s -mtriple armv7a-none-darwin -mcpu=cortex-a9 | FileCheck %s -check-prefix=DARWINA9
+; RUN: llc < %s -mtriple armv7a-none-darwin -mcpu=cortex-a15 | FileCheck %s -check-prefix=DARWINA15
+; RUN: llc < %s -mtriple armv7a-none-darwin -mcpu=swift | FileCheck %s -check-prefix=DARWINSWIFT
+
+; This test makes sure we're not lowering VMUL.f32 D* (aka. NEON) for single-prec. FP ops, since
+; NEON is not fully IEEE 754 compliant, unless unsafe-math is selected.
+
+@.str = private unnamed_addr constant [12 x i8] c"S317\09%.5g \0A\00", align 1
+
+; CHECK-LINUXA5: main:
+; CHECK-LINUXA8: main:
+; CHECK-LINUXA9: main:
+; CHECK-LINUXA15: main:
+; CHECK-LINUXSWIFT: main:
+; CHECK-UNSAFEA5: main:
+; CHECK-UNSAFEA8: main:
+; CHECK-UNSAFEA9: main:
+; CHECK-UNSAFEA15: main:
+; CHECK-UNSAFESWIFT: main:
+; CHECK-DARWINA5: main:
+; CHECK-DARWINA8: main:
+; CHECK-DARWINA9: main:
+; CHECK-DARWINA15: main:
+; CHECK-DARWINSWIFT: main:
+define i32 @main() {
+entry:
+  br label %for.body
+
+for.body:                                         ; preds = %for.body, %entry
+  %i.04 = phi i32 [ 0, %entry ], [ %inc, %for.body ]
+  %q.03 = phi float [ 1.000000e+00, %entry ], [ %mul, %for.body ]
+  %mul = fmul float %q.03, 0x3FEFAE1480000000
+; CHECK-LINUXA5: vmul.f32 s{{[0-9]*}}
+; CHECK-LINUXA8: vmul.f32 s{{[0-9]*}}
+; CHECK-LINUXA9: vmul.f32 s{{[0-9]*}}
+; CHECK-LINUXA15: vmul.f32 s{{[0-9]*}}
+; Swift is *always* unsafe
+; CHECK-LINUXSWIFT: vmul.f32 d{{[0-9]*}}
+
+; CHECK-UNSAFEA5: vmul.f32 d{{[0-9]*}}
+; CHECK-UNSAFEA8: vmul.f32 d{{[0-9]*}}
+; A9 and A15 don't need this
+; CHECK-UNSAFEA9: vmul.f32 s{{[0-9]*}}
+; CHECK-UNSAFEA15: vmul.f32 s{{[0-9]*}}
+; CHECK-UNSAFESWIFT: vmul.f32 d{{[0-9]*}}
+
+; CHECK-DARWINA5: vmul.f32 d{{[0-9]*}}
+; CHECK-DARWINA8: vmul.f32 d{{[0-9]*}}
+; CHECK-DARWINA9: vmul.f32 s{{[0-9]*}}
+; CHECK-DARWINA15: vmul.f32 s{{[0-9]*}}
+; CHECK-DARWINSWIFT: vmul.f32 d{{[0-9]*}}
+  %conv = fpext float %mul to double
+  %call = tail call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([12 x i8]* @.str, i32 0, i32 0), double %conv) #1
+  %inc = add nsw i32 %i.04, 1
+  %exitcond = icmp eq i32 %inc, 16000
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:                                          ; preds = %for.body
+  ret i32 0
+}
+
+declare i32 @printf(i8* nocapture, ...)
diff --git a/test/CodeGen/ARM/neon_cmp.ll b/test/CodeGen/ARM/neon_cmp.ll
new file mode 100644
index 000000000000..046b5da22899
--- /dev/null
+++ b/test/CodeGen/ARM/neon_cmp.ll
@@ -0,0 +1,15 @@
+; RUN: llc < %s -march=arm -mcpu=cortex-a9 | FileCheck %s
+; bug 15283
+; radar://13191881
+; CHECK: vfcmp
+define void @vfcmp(<2 x double>* %a, <2 x double>* %b) {
+  %wide.load = load <2 x double>* %a, align 4
+  %wide.load2 = load <2 x double>* %b, align 4
+; CHECK-NOT: vdup.32
+; CHECK-NOT: vmovn.i64
+  %v1 = fcmp olt <2 x double> %wide.load, %wide.load2
+  %v2 = zext <2 x i1> %v1 to <2 x i32>
+  %v3 = sitofp <2 x i32> %v2 to <2 x double>
+  store <2 x double> %v3, <2 x double>* %b, align 4
+  ret void
+}
diff --git a/test/CodeGen/ARM/neon_fpconv.ll b/test/CodeGen/ARM/neon_fpconv.ll
new file mode 100644
index 000000000000..149f4c777003
--- /dev/null
+++ b/test/CodeGen/ARM/neon_fpconv.ll
@@ -0,0 +1,42 @@
+; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s
+
+; PR12540: ARM backend lowering of FP_ROUND v2f64 to v2f32.
+define <2 x float> @vtrunc(<2 x double> %a) {
+; CHECK: vcvt.f32.f64 [[S0:s[0-9]+]], [[D0:d[0-9]+]]
+; CHECK: vcvt.f32.f64 [[S1:s[0-9]+]], [[D1:d[0-9]+]]
+  %vt = fptrunc <2 x double> %a to <2 x float>
+  ret <2 x float> %vt
+}
+
+define <2 x double> @vextend(<2 x float> %a) {
+; CHECK: vcvt.f64.f32 [[D0:d[0-9]+]], [[S0:s[0-9]+]]
+; CHECK: vcvt.f64.f32 [[D1:d[0-9]+]], [[S1:s[0-9]+]]
+  %ve = fpext <2 x float> %a to <2 x double>
+  ret <2 x double> %ve
+}
+
+; We used to generate vmovs between scalar and vfp/neon registers.
+; CHECK: vsitofp_double
+define void @vsitofp_double(<2 x i32>* %loadaddr,
+                            <2 x double>* %storeaddr) {
+  %v0 = load <2 x i32>* %loadaddr
+; CHECK:      vldr
+; CHECK-NEXT:	vcvt.f64.s32
+; CHECK-NEXT:	vcvt.f64.s32
+; CHECK-NEXT:	vst
+  %r = sitofp <2 x i32> %v0 to <2 x double>
+  store <2 x double> %r, <2 x double>* %storeaddr
+  ret void
+}
+; CHECK: vuitofp_double
+define void @vuitofp_double(<2 x i32>* %loadaddr,
+                            <2 x double>* %storeaddr) {
+  %v0 = load <2 x i32>* %loadaddr
+; CHECK:      vldr
+; CHECK-NEXT:	vcvt.f64.u32
+; CHECK-NEXT:	vcvt.f64.u32
+; CHECK-NEXT:	vst
+  %r = uitofp <2 x i32> %v0 to <2 x double>
+  store <2 x double> %r, <2 x double>* %storeaddr
+  ret void
+}
diff --git a/test/CodeGen/ARM/neon_ld2.ll b/test/CodeGen/ARM/neon_ld2.ll
index 497619ed746a..25a670b09778 100644
--- a/test/CodeGen/ARM/neon_ld2.ll
+++ b/test/CodeGen/ARM/neon_ld2.ll
@@ -7,10 +7,10 @@
 ; CHECK: vadd.i64 q
 ; CHECK: vst1.64
 ; SWIFT: t1
-; SWIFT: vld1.64 {{.d[0-9]+, d[0-9]+}, \[r[0-9]+, :128\]}}
-; SWIFT: vld1.64 {{.d[0-9]+, d[0-9]+}, \[r[0-9]+, :128\]}}
+; SWIFT: vld1.64 {{.d[0-9]+, d[0-9]+}, \[r[0-9]+:128\]}}
+; SWIFT: vld1.64 {{.d[0-9]+, d[0-9]+}, \[r[0-9]+:128\]}}
 ; SWIFT: vadd.i64 q
-; SWIFT: vst1.64 {{.d[0-9]+, d[0-9]+}, \[r[0-9]+, :128\]}}
+; SWIFT: vst1.64 {{.d[0-9]+, d[0-9]+}, \[r[0-9]+:128\]}}
 define void @t1(<4 x i32>* %r, <2 x i64>* %a, <2 x i64>* %b) nounwind {
 entry:
 	%0 = load <2 x i64>* %a, align 16		; <<2 x i64>> [#uses=1]
@@ -28,8 +28,8 @@ entry:
 ; CHECK: vmov r0, r1, d
 ; CHECK: vmov r2, r3, d
 ; SWIFT: t2
-; SWIFT: vld1.64 {{.d[0-9]+, d[0-9]+}, \[r[0-9]+, :128\]}}
-; SWIFT: vld1.64 {{.d[0-9]+, d[0-9]+}, \[r[0-9]+, :128\]}}
+; SWIFT: vld1.64 {{.d[0-9]+, d[0-9]+}, \[r[0-9]+:128\]}}
+; SWIFT: vld1.64 {{.d[0-9]+, d[0-9]+}, \[r[0-9]+:128\]}}
 ; SWIFT: vsub.i64 q
 ; SWIFT: vmov r0, r1, d
 ; SWIFT: vmov r2, r3, d
diff --git a/test/CodeGen/ARM/neon_minmax.ll b/test/CodeGen/ARM/neon_minmax.ll
index d301c6a4ca90..0a7c8b2b6aae 100644
--- a/test/CodeGen/ARM/neon_minmax.ll
+++ b/test/CodeGen/ARM/neon_minmax.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=arm -mcpu=cortex-a8 | FileCheck %s
+; RUN: llc < %s -march=arm -mcpu=swift | FileCheck %s
 
 define float @fmin_ole(float %x) nounwind {
 ;CHECK: fmin_ole:
diff --git a/test/CodeGen/ARM/popcnt.ll b/test/CodeGen/ARM/popcnt.ll
new file mode 100644
index 000000000000..0b9c9467c206
--- /dev/null
+++ b/test/CodeGen/ARM/popcnt.ll
@@ -0,0 +1,191 @@
+; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s
+; Implement ctpop with vcnt
+
+define <8 x i8> @vcnt8(<8 x i8>* %A) nounwind {
+;CHECK: vcnt8:
+;CHECK: vcnt.8 {{d[0-9]+}}, {{d[0-9]+}}
+	%tmp1 = load <8 x i8>* %A
+	%tmp2 = call <8 x i8> @llvm.ctpop.v8i8(<8 x i8> %tmp1)
+	ret <8 x i8> %tmp2
+}
+
+define <16 x i8> @vcntQ8(<16 x i8>* %A) nounwind {
+;CHECK: vcntQ8:
+;CHECK: vcnt.8 {{q[0-9]+}}, {{q[0-9]+}}
+	%tmp1 = load <16 x i8>* %A
+	%tmp2 = call <16 x i8> @llvm.ctpop.v16i8(<16 x i8> %tmp1)
+	ret <16 x i8> %tmp2
+}
+
+define <4 x i16> @vcnt16(<4 x i16>* %A) nounwind {
+; CHECK: vcnt16:
+; CHECK: vcnt.8 {{d[0-9]+}}, {{d[0-9]+}}
+; CHECK: vrev16.8 {{d[0-9]+}}, {{d[0-9]+}}
+; CHECK: vadd.i8 {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}}
+; CHECK: vuzp.8 {{d[0-9]+}}, {{d[0-9]+}}
+; CHECK: vmovl.u8 {{q[0-9]+}}, {{d[0-9]+}}
+	%tmp1 = load <4 x i16>* %A
+	%tmp2 = call <4 x i16> @llvm.ctpop.v4i16(<4 x i16> %tmp1)
+	ret <4 x i16> %tmp2
+}
+
+define <8 x i16> @vcntQ16(<8 x i16>* %A) nounwind {
+; CHECK: vcntQ16:
+; CHECK: vcnt.8 {{q[0-9]+}}, {{q[0-9]+}}
+; CHECK: vrev16.8 {{q[0-9]+}}, {{q[0-9]+}}
+; CHECK: vadd.i8 {{q[0-9]+}}, {{q[0-9]+}}, {{q[0-9]+}}
+; CHECK: vuzp.8 {{q[0-9]+}}, {{q[0-9]+}}
+; CHECK: vmovl.u8 {{q[0-9]+}}, {{d[0-9]+}}
+	%tmp1 = load <8 x i16>* %A
+	%tmp2 = call <8 x i16> @llvm.ctpop.v8i16(<8 x i16> %tmp1)
+	ret <8 x i16> %tmp2
+}
+
+define <2 x i32> @vcnt32(<2 x i32>* %A) nounwind {
+; CHECK: vcnt32:
+; CHECK: vcnt.8 {{d[0-9]+}}, {{d[0-9]+}}
+; CHECK: vrev16.8 {{d[0-9]+}}, {{d[0-9]+}}
+; CHECK: vadd.i8 {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}}
+; CHECK: vuzp.8 {{d[0-9]+}}, {{d[0-9]+}}
+; CHECK: vmovl.u8 {{q[0-9]+}}, {{d[0-9]+}}
+; CHECK: vrev32.16 {{d[0-9]+}}, {{d[0-9]+}}
+; CHECK: vuzp.16 {{d[0-9]+}}, {{d[0-9]+}}
+; CHECK: vmovl.u16 {{q[0-9]+}}, {{d[0-9]+}}
+	%tmp1 = load <2 x i32>* %A
+	%tmp2 = call <2 x i32> @llvm.ctpop.v2i32(<2 x i32> %tmp1)
+	ret <2 x i32> %tmp2
+}
+
+define <4 x i32> @vcntQ32(<4 x i32>* %A) nounwind {
+; CHECK: vcntQ32:
+; CHECK: vcnt.8 {{q[0-9]+}}, {{q[0-9]+}}
+; CHECK: vrev16.8 {{q[0-9]+}}, {{q[0-9]+}}
+; CHECK: vadd.i8 {{q[0-9]+}}, {{q[0-9]+}}, {{q[0-9]+}}
+; CHECK: vuzp.8 {{q[0-9]+}}, {{q[0-9]+}}
+; CHECK: vmovl.u8 {{q[0-9]+}}, {{d[0-9]+}}
+; CHECK: vrev32.16 {{q[0-9]+}}, {{q[0-9]+}}
+; CHECK: vuzp.16 {{q[0-9]+}}, {{q[0-9]+}}
+; CHECK: vmovl.u16 {{q[0-9]+}}, {{d[0-9]+}}
+	%tmp1 = load <4 x i32>* %A
+	%tmp2 = call <4 x i32> @llvm.ctpop.v4i32(<4 x i32> %tmp1)
+	ret <4 x i32> %tmp2
+}
+
+declare <8 x i8>  @llvm.ctpop.v8i8(<8 x i8>) nounwind readnone
+declare <16 x i8> @llvm.ctpop.v16i8(<16 x i8>) nounwind readnone
+declare <4 x i16> @llvm.ctpop.v4i16(<4 x i16>) nounwind readnone
+declare <8 x i16> @llvm.ctpop.v8i16(<8 x i16>) nounwind readnone
+declare <2 x i32> @llvm.ctpop.v2i32(<2 x i32>) nounwind readnone
+declare <4 x i32> @llvm.ctpop.v4i32(<4 x i32>) nounwind readnone
+
+define <8 x i8> @vclz8(<8 x i8>* %A) nounwind {
+;CHECK: vclz8:
+;CHECK: vclz.i8 {{d[0-9]+}}, {{d[0-9]+}}
+	%tmp1 = load <8 x i8>* %A
+	%tmp2 = call <8 x i8> @llvm.ctlz.v8i8(<8 x i8> %tmp1, i1 0)
+	ret <8 x i8> %tmp2
+}
+
+define <4 x i16> @vclz16(<4 x i16>* %A) nounwind {
+;CHECK: vclz16:
+;CHECK: vclz.i16 {{d[0-9]+}}, {{d[0-9]+}}
+	%tmp1 = load <4 x i16>* %A
+	%tmp2 = call <4 x i16> @llvm.ctlz.v4i16(<4 x i16> %tmp1, i1 0)
+	ret <4 x i16> %tmp2
+}
+
+define <2 x i32> @vclz32(<2 x i32>* %A) nounwind {
+;CHECK: vclz32:
+;CHECK: vclz.i32 {{d[0-9]+}}, {{d[0-9]+}}
+	%tmp1 = load <2 x i32>* %A
+	%tmp2 = call <2 x i32> @llvm.ctlz.v2i32(<2 x i32> %tmp1, i1 0)
+	ret <2 x i32> %tmp2
+}
+
+define <16 x i8> @vclzQ8(<16 x i8>* %A) nounwind {
+;CHECK: vclzQ8:
+;CHECK: vclz.i8 {{q[0-9]+}}, {{q[0-9]+}}
+	%tmp1 = load <16 x i8>* %A
+	%tmp2 = call <16 x i8> @llvm.ctlz.v16i8(<16 x i8> %tmp1, i1 0)
+	ret <16 x i8> %tmp2
+}
+
+define <8 x i16> @vclzQ16(<8 x i16>* %A) nounwind {
+;CHECK: vclzQ16:
+;CHECK: vclz.i16 {{q[0-9]+}}, {{q[0-9]+}}
+	%tmp1 = load <8 x i16>* %A
+	%tmp2 = call <8 x i16> @llvm.ctlz.v8i16(<8 x i16> %tmp1, i1 0)
+	ret <8 x i16> %tmp2
+}
+
+define <4 x i32> @vclzQ32(<4 x i32>* %A) nounwind {
+;CHECK: vclzQ32:
+;CHECK: vclz.i32 {{q[0-9]+}}, {{q[0-9]+}}
+	%tmp1 = load <4 x i32>* %A
+	%tmp2 = call <4 x i32> @llvm.ctlz.v4i32(<4 x i32> %tmp1, i1 0)
+	ret <4 x i32> %tmp2
+}
+
+declare <8 x i8>  @llvm.ctlz.v8i8(<8 x i8>, i1) nounwind readnone
+declare <4 x i16> @llvm.ctlz.v4i16(<4 x i16>, i1) nounwind readnone
+declare <2 x i32> @llvm.ctlz.v2i32(<2 x i32>, i1) nounwind readnone
+
+declare <16 x i8> @llvm.ctlz.v16i8(<16 x i8>, i1) nounwind readnone
+declare <8 x i16> @llvm.ctlz.v8i16(<8 x i16>, i1) nounwind readnone
+declare <4 x i32> @llvm.ctlz.v4i32(<4 x i32>, i1) nounwind readnone
+
+define <8 x i8> @vclss8(<8 x i8>* %A) nounwind {
+;CHECK: vclss8:
+;CHECK: vcls.s8
+	%tmp1 = load <8 x i8>* %A
+	%tmp2 = call <8 x i8> @llvm.arm.neon.vcls.v8i8(<8 x i8> %tmp1)
+	ret <8 x i8> %tmp2
+}
+
+define <4 x i16> @vclss16(<4 x i16>* %A) nounwind {
+;CHECK: vclss16:
+;CHECK: vcls.s16
+	%tmp1 = load <4 x i16>* %A
+	%tmp2 = call <4 x i16> @llvm.arm.neon.vcls.v4i16(<4 x i16> %tmp1)
+	ret <4 x i16> %tmp2
+}
+
+define <2 x i32> @vclss32(<2 x i32>* %A) nounwind {
+;CHECK: vclss32:
+;CHECK: vcls.s32
+	%tmp1 = load <2 x i32>* %A
+	%tmp2 = call <2 x i32> @llvm.arm.neon.vcls.v2i32(<2 x i32> %tmp1)
+	ret <2 x i32> %tmp2
+}
+
+define <16 x i8> @vclsQs8(<16 x i8>* %A) nounwind {
+;CHECK: vclsQs8:
+;CHECK: vcls.s8
+	%tmp1 = load <16 x i8>* %A
+	%tmp2 = call <16 x i8> @llvm.arm.neon.vcls.v16i8(<16 x i8> %tmp1)
+	ret <16 x i8> %tmp2
+}
+
+define <8 x i16> @vclsQs16(<8 x i16>* %A) nounwind {
+;CHECK: vclsQs16:
+;CHECK: vcls.s16
+	%tmp1 = load <8 x i16>* %A
+	%tmp2 = call <8 x i16> @llvm.arm.neon.vcls.v8i16(<8 x i16> %tmp1)
+	ret <8 x i16> %tmp2
+}
+
+define <4 x i32> @vclsQs32(<4 x i32>* %A) nounwind {
+;CHECK: vclsQs32:
+;CHECK: vcls.s32
+	%tmp1 = load <4 x i32>* %A
+	%tmp2 = call <4 x i32> @llvm.arm.neon.vcls.v4i32(<4 x i32> %tmp1)
+	ret <4 x i32> %tmp2
+}
+
+declare <8 x i8>  @llvm.arm.neon.vcls.v8i8(<8 x i8>) nounwind readnone
+declare <4 x i16> @llvm.arm.neon.vcls.v4i16(<4 x i16>) nounwind readnone
+declare <2 x i32> @llvm.arm.neon.vcls.v2i32(<2 x i32>) nounwind readnone
+
+declare <16 x i8> @llvm.arm.neon.vcls.v16i8(<16 x i8>) nounwind readnone
+declare <8 x i16> @llvm.arm.neon.vcls.v8i16(<8 x i16>) nounwind readnone
+declare <4 x i32> @llvm.arm.neon.vcls.v4i32(<4 x i32>) nounwind readnone
diff --git a/test/CodeGen/ARM/reg_asc_order.ll b/test/CodeGen/ARM/reg_asc_order.ll
deleted file mode 100644
index d1d0ee5f3e7b..000000000000
--- a/test/CodeGen/ARM/reg_asc_order.ll
+++ /dev/null
@@ -1,16 +0,0 @@
-; RUN: llc < %s -march=arm -mcpu=cortex-a8 | FileCheck %s
-; Check that memcpy gets lowered to ldm/stm, at least in this very smple case.
-
-%struct.Foo = type { i32, i32, i32, i32 }
-
-define void @_Z10CopyStructP3FooS0_(%struct.Foo* nocapture %a, %struct.Foo* nocapture %b) nounwind {
-entry:
-;CHECK: ldm
-;CHECK: stm
-  %0 = bitcast %struct.Foo* %a to i8*
-  %1 = bitcast %struct.Foo* %b to i8*
-  tail call void @llvm.memcpy.p0i8.p0i8.i32(i8* %0, i8* %1, i32 16, i32 4, i1 false)
-  ret void
-}
-
-declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i32, i1) nounwind
diff --git a/test/CodeGen/ARM/reg_sequence.ll b/test/CodeGen/ARM/reg_sequence.ll
index 6d6586e4f283..fd2083cf9f41 100644
--- a/test/CodeGen/ARM/reg_sequence.ll
+++ b/test/CodeGen/ARM/reg_sequence.ll
@@ -242,8 +242,8 @@ define arm_aapcs_vfpcc float @t9(%0* nocapture, %3* nocapture) nounwind {
 ; CHECK:        vldr
 ; CHECK-NOT:    vmov d{{.*}}, d16
 ; CHECK:        vmov.i32 d17
-; CHECK-NEXT:   vst1.64 {d16, d17}, [r0, :128]
-; CHECK-NEXT:   vst1.64 {d16, d17}, [r0, :128]
+; CHECK-NEXT:   vst1.64 {d16, d17}, [r0:128]
+; CHECK-NEXT:   vst1.64 {d16, d17}, [r0:128]
   %3 = bitcast double 0.000000e+00 to <2 x float> ; <<2 x float>> [#uses=2]
   %4 = shufflevector <2 x float> %3, <2 x float> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> ; <<4 x float>> [#uses=1]
   store <4 x float> %4, <4 x float>* undef, align 16
diff --git a/test/CodeGen/ARM/ret_sret_vector.ll b/test/CodeGen/ARM/ret_sret_vector.ll
new file mode 100644
index 000000000000..9bb3519555e8
--- /dev/null
+++ b/test/CodeGen/ARM/ret_sret_vector.ll
@@ -0,0 +1,12 @@
+; RUN: llc < %s | FileCheck %s
+
+target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:32:64-v128:32:128-a0:0:32-n32-S32"
+target triple = "thumbv7-apple-ios3.0.0"
+
+define <4 x double> @PR14337(<4 x double> %a, <4 x double> %b) {
+  %foo = fadd <4 x double>  %a, %b
+  ret <4 x double> %foo
+; CHECK: PR14337:
+; CHECK: vst1.64
+; CHECK: vst1.64
+}
diff --git a/test/CodeGen/ARM/sjlj-prepare-critical-edge.ll b/test/CodeGen/ARM/sjlj-prepare-critical-edge.ll
new file mode 100644
index 000000000000..d8241d0dc380
--- /dev/null
+++ b/test/CodeGen/ARM/sjlj-prepare-critical-edge.ll
@@ -0,0 +1,67 @@
+; RUN: llc < %s -O1 -mtriple thumbv7-apple-ios6
+; Just make sure no one tries to make the assumption that the normal edge of an
+; invoke is never a critical edge.  Previously, this code would assert.
+
+%struct.__CFString = type opaque
+
+declare void @bar(%struct.__CFString*, %struct.__CFString*)
+
+define noalias i8* @foo(i8* nocapture %inRefURL) noreturn ssp {
+entry:
+  %call = tail call %struct.__CFString* @bar3()
+  %call2 = invoke i8* @bar2()
+          to label %for.cond unwind label %lpad
+
+for.cond:                                         ; preds = %entry, %for.cond
+  invoke void @bar(%struct.__CFString* undef, %struct.__CFString* null)
+          to label %for.cond unwind label %lpad5
+
+lpad:                                             ; preds = %entry
+  %0 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_sj0 to i8*)
+          cleanup
+  %1 = extractvalue { i8*, i32 } %0, 0
+  %2 = extractvalue { i8*, i32 } %0, 1
+  br label %ehcleanup
+
+lpad5:                                            ; preds = %for.cond
+  %3 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_sj0 to i8*)
+          cleanup
+  %4 = extractvalue { i8*, i32 } %3, 0
+  %5 = extractvalue { i8*, i32 } %3, 1
+  invoke void @release(i8* %call2)
+          to label %ehcleanup unwind label %terminate.lpad.i.i16
+
+terminate.lpad.i.i16:                             ; preds = %lpad5
+  %6 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_sj0 to i8*)
+          catch i8* null
+  tail call void @terminatev() noreturn nounwind
+  unreachable
+
+ehcleanup:                                        ; preds = %lpad5, %lpad
+  %exn.slot.0 = phi i8* [ %1, %lpad ], [ %4, %lpad5 ]
+  %ehselector.slot.0 = phi i32 [ %2, %lpad ], [ %5, %lpad5 ]
+  %7 = bitcast %struct.__CFString* %call to i8*
+  invoke void @release(i8* %7)
+          to label %_ZN5SmartIPK10__CFStringED1Ev.exit unwind label %terminate.lpad.i.i
+
+terminate.lpad.i.i:                               ; preds = %ehcleanup
+  %8 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_sj0 to i8*)
+          catch i8* null
+  tail call void @terminatev() noreturn nounwind
+  unreachable
+
+_ZN5SmartIPK10__CFStringED1Ev.exit:               ; preds = %ehcleanup
+  %lpad.val = insertvalue { i8*, i32 } undef, i8* %exn.slot.0, 0
+  %lpad.val12 = insertvalue { i8*, i32 } %lpad.val, i32 %ehselector.slot.0, 1
+  resume { i8*, i32 } %lpad.val12
+}
+
+declare %struct.__CFString* @bar3()
+
+declare i8* @bar2()
+
+declare i32 @__gxx_personality_sj0(...)
+
+declare void @release(i8*)
+
+declare void @terminatev()
diff --git a/test/CodeGen/ARM/spill-q.ll b/test/CodeGen/ARM/spill-q.ll
index 057ea11389ac..e93cdbc10a46 100644
--- a/test/CodeGen/ARM/spill-q.ll
+++ b/test/CodeGen/ARM/spill-q.ll
@@ -12,8 +12,8 @@ declare <4 x float> @llvm.arm.neon.vld1.v4f32(i8*, i32) nounwind readonly
 define void @aaa(%quuz* %this, i8* %block) {
 ; CHECK: aaa:
 ; CHECK: bic {{.*}}, #15
-; CHECK: vst1.64 {{.*}}sp, :128
-; CHECK: vld1.64 {{.*}}sp, :128
+; CHECK: vst1.64 {{.*}}sp:128
+; CHECK: vld1.64 {{.*}}sp:128
 entry:
   %aligned_vec = alloca <4 x float>, align 16
   %"alloca point" = bitcast i32 0 to i32
diff --git a/test/CodeGen/ARM/subreg-remat.ll b/test/CodeGen/ARM/subreg-remat.ll
index 455bfce0f2e5..1bc0315354cb 100644
--- a/test/CodeGen/ARM/subreg-remat.ll
+++ b/test/CodeGen/ARM/subreg-remat.ll
@@ -12,7 +12,7 @@ target triple = "thumbv7-apple-ios"
 ;
 ; CHECK: f1
 ; CHECK: vmov    d0, r0, r0
-; CHECK: vldr s0, LCPI
+; CHECK: vldr s1, LCPI
 ; The vector must be spilled:
 ; CHECK: vstr d0,
 ; CHECK: asm clobber d0
@@ -20,8 +20,8 @@ target triple = "thumbv7-apple-ios"
 ; CHECK: vldr [[D16:d[0-9]+]],
 ; CHECK: vstr [[D16]], [r1]
 define void @f1(float %x, <2 x float>* %p) {
-  %v1 = insertelement <2 x float> undef, float %x, i32 1
-  %v2 = insertelement <2 x float> %v1, float 0x400921FB60000000, i32 0
+  %v1 = insertelement <2 x float> undef, float %x, i32 0
+  %v2 = insertelement <2 x float> %v1, float 0x400921FB60000000, i32 1
   %y = call double asm sideeffect "asm clobber $0", "=w,0,~{d1},~{d2},~{d3},~{d4},~{d5},~{d6},~{d7},~{d8},~{d9},~{d10},~{d11},~{d12},~{d13},~{d14},~{d15},~{d16},~{d17},~{d18},~{d19},~{d20},~{d21},~{d22},~{d23},~{d24},~{d25},~{d26},~{d27},~{d28},~{d29},~{d30},~{d31}"(<2 x float> %v2) nounwind
   store <2 x float> %v2, <2 x float>* %p, align 8
   ret void
diff --git a/test/CodeGen/ARM/trap.ll b/test/CodeGen/ARM/trap.ll
index 21865f8e4aed..a4e3c3c0efa9 100644
--- a/test/CodeGen/ARM/trap.ll
+++ b/test/CodeGen/ARM/trap.ll
@@ -1,5 +1,23 @@
 ; RUN: llc < %s -mtriple=arm-apple-darwin | FileCheck %s -check-prefix=INSTR
 ; RUN: llc < %s -mtriple=arm-apple-darwin -trap-func=_trap | FileCheck %s -check-prefix=FUNC
+; RUN: llc -mtriple=armv7-unknown-nacl -filetype=obj %s -o - \
+; RUN:  | llvm-objdump -disassemble -triple armv7-unknown-nacl - \
+; RUN:  | FileCheck %s -check-prefix=ENCODING-NACL
+; RUN: llc -mtriple=armv7-unknown-nacl -filetype=obj %s -o - \
+; RUN:  | llvm-objdump -disassemble -triple armv7 -mattr=+nacl-trap - \
+; RUN:  | FileCheck %s -check-prefix=ENCODING-NACL
+; RUN: llc -mtriple=armv7 -mattr=+nacl-trap -filetype=obj %s -o - \
+; RUN:  | llvm-objdump -disassemble -triple armv7 -mattr=+nacl-trap - \
+; RUN:  | FileCheck %s -check-prefix=ENCODING-NACL
+; RUN: llc -fast-isel -mtriple=armv7-unknown-nacl -filetype=obj %s -o - \
+; RUN:  | llvm-objdump -disassemble -triple armv7-unknown-nacl - \
+; RUN:  | FileCheck %s -check-prefix=ENCODING-NACL
+; RUN: llc -mtriple=armv7 -filetype=obj %s -o - \
+; RUN:  | llvm-objdump -disassemble -triple armv7 - \
+; RUN:  | FileCheck %s -check-prefix=ENCODING-ALL
+; RUN: llc -fast-isel -mtriple=armv7 -filetype=obj %s -o - \
+; RUN:  | llvm-objdump -disassemble -triple armv7 - \
+; RUN:  | FileCheck %s -check-prefix=ENCODING-ALL
 ; rdar://7961298
 ; rdar://9249183
 
@@ -10,6 +28,11 @@ entry:
 
 ; FUNC: t:
 ; FUNC: bl __trap
+
+; ENCODING-NACL: f0 de fe e7
+
+; ENCODING-ALL: fe de ff e7
+
   call void @llvm.trap()
   unreachable
 }
@@ -21,6 +44,11 @@ entry:
 
 ; FUNC: t2:
 ; FUNC: bl __trap
+
+; ENCODING-NACL: f0 de fe e7
+
+; ENCODING-ALL: fe de ff e7
+
   call void @llvm.debugtrap()
   unreachable
 }
diff --git a/test/CodeGen/ARM/vcvt.ll b/test/CodeGen/ARM/vcvt.ll
index c078f493094b..e67b4788a37d 100644
--- a/test/CodeGen/ARM/vcvt.ll
+++ b/test/CodeGen/ARM/vcvt.ll
@@ -156,3 +156,175 @@ define <4 x i16> @vcvt_f32tof16(<4 x float>* %A) nounwind {
 
 declare <4 x float> @llvm.arm.neon.vcvthf2fp(<4 x i16>) nounwind readnone
 declare <4 x i16> @llvm.arm.neon.vcvtfp2hf(<4 x float>) nounwind readnone
+
+; We currently estimate the cost of sext/zext/trunc v8(v16)i32 <-> v8(v16)i8
+; instructions as expensive. If lowering is improved the cost model needs to
+; change.
+; RUN: opt < %s  -cost-model -analyze -mtriple=thumbv7-apple-ios6.0.0 -march=arm -mcpu=cortex-a8 | FileCheck %s --check-prefix=COST
+%T0_5 = type <8 x i8>
+%T1_5 = type <8 x i32>
+; CHECK: func_cvt5:
+define void @func_cvt5(%T0_5* %loadaddr, %T1_5* %storeaddr) {
+; CHECK: vmovl.s8
+; CHECK: vmovl.s16
+; CHECK: vmovl.s16
+  %v0 = load %T0_5* %loadaddr
+; COST: func_cvt5
+; COST: cost of 3 {{.*}} sext
+  %r = sext %T0_5 %v0 to %T1_5
+  store %T1_5 %r, %T1_5* %storeaddr
+  ret void
+}
+;; We currently estimate the cost of this instruction as expensive. If lowering
+;; is improved the cost needs to change.
+%TA0_5 = type <8 x i8>
+%TA1_5 = type <8 x i32>
+; CHECK: func_cvt1:
+define void @func_cvt1(%TA0_5* %loadaddr, %TA1_5* %storeaddr) {
+; CHECK: vmovl.u8
+; CHECK: vmovl.u16
+; CHECK: vmovl.u16
+  %v0 = load %TA0_5* %loadaddr
+; COST: func_cvt1
+; COST: cost of 3 {{.*}} zext
+  %r = zext %TA0_5 %v0 to %TA1_5
+  store %TA1_5 %r, %TA1_5* %storeaddr
+  ret void
+}
+;; We currently estimate the cost of this instruction as expensive. If lowering
+;; is improved the cost needs to change.
+%T0_51 = type <8 x i32>
+%T1_51 = type <8 x i8>
+; CHECK: func_cvt51:
+define void @func_cvt51(%T0_51* %loadaddr, %T1_51* %storeaddr) {
+; CHECK: strb
+; CHECK: strb
+; CHECK: strb
+; CHECK: strb
+; CHECK: strb
+; CHECK: strb
+; CHECK: strb
+; CHECK: strb
+  %v0 = load %T0_51* %loadaddr
+; COST: func_cvt51
+; COST: cost of 19 {{.*}} trunc
+  %r = trunc %T0_51 %v0 to %T1_51
+  store %T1_51 %r, %T1_51* %storeaddr
+  ret void
+}
+;; We currently estimate the cost of this instruction as expensive. If lowering
+;; is improved the cost needs to change.
+%TT0_5 = type <16 x i8>
+%TT1_5 = type <16 x i32>
+; CHECK: func_cvt52:
+define void @func_cvt52(%TT0_5* %loadaddr, %TT1_5* %storeaddr) {
+; CHECK: vmovl.s16
+; CHECK: vmovl.s16
+; CHECK: vmovl.s16
+; CHECK: vmovl.s16
+  %v0 = load %TT0_5* %loadaddr
+; COST: func_cvt52
+; COST: cost of 6 {{.*}} sext
+  %r = sext %TT0_5 %v0 to %TT1_5
+  store %TT1_5 %r, %TT1_5* %storeaddr
+  ret void
+}
+;; We currently estimate the cost of this instruction as expensive. If lowering
+;; is improved the cost needs to change.
+%TTA0_5 = type <16 x i8>
+%TTA1_5 = type <16 x i32>
+; CHECK: func_cvt12:
+define void @func_cvt12(%TTA0_5* %loadaddr, %TTA1_5* %storeaddr) {
+; CHECK: vmovl.u16
+; CHECK: vmovl.u16
+; CHECK: vmovl.u16
+; CHECK: vmovl.u16
+  %v0 = load %TTA0_5* %loadaddr
+; COST: func_cvt12
+; COST: cost of 6 {{.*}} zext
+  %r = zext %TTA0_5 %v0 to %TTA1_5
+  store %TTA1_5 %r, %TTA1_5* %storeaddr
+  ret void
+}
+;; We currently estimate the cost of this instruction as expensive. If lowering
+;; is improved the cost needs to change.
+%TT0_51 = type <16 x i32>
+%TT1_51 = type <16 x i8>
+; CHECK: func_cvt512:
+define void @func_cvt512(%TT0_51* %loadaddr, %TT1_51* %storeaddr) {
+; CHECK: strb
+; CHECK: strb
+; CHECK: strb
+; CHECK: strb
+; CHECK: strb
+; CHECK: strb
+; CHECK: strb
+; CHECK: strb
+; CHECK: strb
+; CHECK: strb
+; CHECK: strb
+; CHECK: strb
+; CHECK: strb
+; CHECK: strb
+; CHECK: strb
+; CHECK: strb
+  %v0 = load %TT0_51* %loadaddr
+; COST: func_cvt512
+; COST: cost of 38 {{.*}} trunc
+  %r = trunc %TT0_51 %v0 to %TT1_51
+  store %TT1_51 %r, %TT1_51* %storeaddr
+  ret void
+}
+
+; CHECK: sext_v4i16_v4i64:
+define void @sext_v4i16_v4i64(<4 x i16>* %loadaddr, <4 x i64>* %storeaddr) {
+; CHECK: vmovl.s32
+; CHECK: vmovl.s32
+  %v0 = load <4 x i16>* %loadaddr
+; COST: sext_v4i16_v4i64
+; COST: cost of 3 {{.*}} sext
+  %r = sext <4 x i16> %v0 to <4 x i64>
+  store <4 x i64> %r, <4 x i64>* %storeaddr
+  ret void
+}
+
+; CHECK: zext_v4i16_v4i64:
+define void @zext_v4i16_v4i64(<4 x i16>* %loadaddr, <4 x i64>* %storeaddr) {
+; CHECK: vmovl.u32
+; CHECK: vmovl.u32
+  %v0 = load <4 x i16>* %loadaddr
+; COST: zext_v4i16_v4i64
+; COST: cost of 3 {{.*}} zext
+  %r = zext <4 x i16> %v0 to <4 x i64>
+  store <4 x i64> %r, <4 x i64>* %storeaddr
+  ret void
+}
+
+; CHECK: sext_v8i16_v8i64:
+define void @sext_v8i16_v8i64(<8 x i16>* %loadaddr, <8 x i64>* %storeaddr) {
+; CHECK: vmovl.s32
+; CHECK: vmovl.s32
+; CHECK: vmovl.s32
+; CHECK: vmovl.s32
+  %v0 = load <8 x i16>* %loadaddr
+; COST: sext_v8i16_v8i64
+; COST: cost of 6 {{.*}} sext
+  %r = sext <8 x i16> %v0 to <8 x i64>
+  store <8 x i64> %r, <8 x i64>* %storeaddr
+  ret void
+}
+
+; CHECK: zext_v8i16_v8i64:
+define void @zext_v8i16_v8i64(<8 x i16>* %loadaddr, <8 x i64>* %storeaddr) {
+; CHECK: vmovl.u32
+; CHECK: vmovl.u32
+; CHECK: vmovl.u32
+; CHECK: vmovl.u32
+  %v0 = load <8 x i16>* %loadaddr
+; COST: zext_v8i16_v8i64
+; COST: cost of 6 {{.*}} zext
+  %r = zext <8 x i16> %v0 to <8 x i64>
+  store <8 x i64> %r, <8 x i64>* %storeaddr
+  ret void
+}
+
diff --git a/test/CodeGen/ARM/vector-DAGCombine.ll b/test/CodeGen/ARM/vector-DAGCombine.ll
index a38a0feae042..42964deb0b5e 100644
--- a/test/CodeGen/ARM/vector-DAGCombine.ll
+++ b/test/CodeGen/ARM/vector-DAGCombine.ll
@@ -133,3 +133,30 @@ define i16 @foldBuildVectors() {
   %3 = extractelement <8 x i16> %2, i32 0
   ret i16 %3
 }
+
+; Test that we are generating vrev and vext for reverse shuffles of v8i16
+; shuffles.
+; CHECK: reverse_v8i16
+define void @reverse_v8i16(<8 x i16>* %loadaddr, <8 x i16>* %storeaddr) {
+  %v0 = load <8 x i16>* %loadaddr
+  ; CHECK: vrev64.16
+  ; CHECK: vext.16
+  %v1 = shufflevector <8 x i16> %v0, <8 x i16> undef,
+              <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
+  store <8 x i16> %v1, <8 x i16>* %storeaddr
+  ret void
+}
+
+; Test that we are generating vrev and vext for reverse shuffles of v16i8
+; shuffles.
+; CHECK: reverse_v16i8
+define void @reverse_v16i8(<16 x i8>* %loadaddr, <16 x i8>* %storeaddr) {
+  %v0 = load <16 x i8>* %loadaddr
+  ; CHECK: vrev64.8
+  ; CHECK: vext.8
+  %v1 = shufflevector <16 x i8> %v0, <16 x i8> undef,
+       <16 x i32> <i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8,
+                   i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
+  store <16 x i8> %v1, <16 x i8>* %storeaddr
+  ret void
+}
diff --git a/test/CodeGen/ARM/vfloatintrinsics.ll b/test/CodeGen/ARM/vfloatintrinsics.ll
new file mode 100644
index 000000000000..6f53b2ccd96c
--- /dev/null
+++ b/test/CodeGen/ARM/vfloatintrinsics.ll
@@ -0,0 +1,377 @@
+; RUN: llc -mcpu=swift -march=arm < %s | FileCheck %s
+target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:32:64-v128:32:128-a0:0:32-n32-S32"
+target triple = "thumbv7-apple-ios6.1.0"
+
+;;; Float vectors
+
+%v2f32 = type <2 x float>
+; CHECK: test_v2f32.sqrt:
+define %v2f32 @test_v2f32.sqrt(%v2f32 %a) {
+  ; CHECK: sqrt
+  %1 = call %v2f32 @llvm.sqrt.v2f32(%v2f32 %a)
+  ret %v2f32 %1
+}
+; CHECK: test_v2f32.powi:
+define %v2f32 @test_v2f32.powi(%v2f32 %a, i32 %b) {
+  ; CHECK: pow
+  %1 = call %v2f32 @llvm.powi.v2f32(%v2f32 %a, i32 %b)
+  ret %v2f32 %1
+}
+; CHECK: test_v2f32.sin:
+define %v2f32 @test_v2f32.sin(%v2f32 %a) {
+  ; CHECK: sin
+  %1 = call %v2f32 @llvm.sin.v2f32(%v2f32 %a)
+  ret %v2f32 %1
+}
+; CHECK: test_v2f32.cos:
+define %v2f32 @test_v2f32.cos(%v2f32 %a) {
+  ; CHECK: cos
+  %1 = call %v2f32 @llvm.cos.v2f32(%v2f32 %a)
+  ret %v2f32 %1
+}
+; CHECK: test_v2f32.pow:
+define %v2f32 @test_v2f32.pow(%v2f32 %a, %v2f32 %b) {
+  ; CHECK: pow
+  %1 = call %v2f32 @llvm.pow.v2f32(%v2f32 %a, %v2f32 %b)
+  ret %v2f32 %1
+}
+; CHECK: test_v2f32.exp:
+define %v2f32 @test_v2f32.exp(%v2f32 %a) {
+  ; CHECK: exp
+  %1 = call %v2f32 @llvm.exp.v2f32(%v2f32 %a)
+  ret %v2f32 %1
+}
+; CHECK: test_v2f32.exp2:
+define %v2f32 @test_v2f32.exp2(%v2f32 %a) {
+  ; CHECK: exp
+  %1 = call %v2f32 @llvm.exp2.v2f32(%v2f32 %a)
+  ret %v2f32 %1
+}
+; CHECK: test_v2f32.log:
+define %v2f32 @test_v2f32.log(%v2f32 %a) {
+  ; CHECK: log
+  %1 = call %v2f32 @llvm.log.v2f32(%v2f32 %a)
+  ret %v2f32 %1
+}
+; CHECK: test_v2f32.log10:
+define %v2f32 @test_v2f32.log10(%v2f32 %a) {
+  ; CHECK: log
+  %1 = call %v2f32 @llvm.log10.v2f32(%v2f32 %a)
+  ret %v2f32 %1
+}
+; CHECK: test_v2f32.log2:
+define %v2f32 @test_v2f32.log2(%v2f32 %a) {
+  ; CHECK: log
+  %1 = call %v2f32 @llvm.log2.v2f32(%v2f32 %a)
+  ret %v2f32 %1
+}
+; CHECK: test_v2f32.fma:
+define %v2f32 @test_v2f32.fma(%v2f32 %a, %v2f32 %b, %v2f32 %c) {
+  ; CHECK: fma
+  %1 = call %v2f32 @llvm.fma.v2f32(%v2f32 %a, %v2f32 %b, %v2f32 %c)
+  ret %v2f32 %1
+}
+; CHECK: test_v2f32.fabs:
+define %v2f32 @test_v2f32.fabs(%v2f32 %a) {
+  ; CHECK: fabs
+  %1 = call %v2f32 @llvm.fabs.v2f32(%v2f32 %a)
+  ret %v2f32 %1
+}
+; CHECK: test_v2f32.floor:
+define %v2f32 @test_v2f32.floor(%v2f32 %a) {
+  ; CHECK: floor
+  %1 = call %v2f32 @llvm.floor.v2f32(%v2f32 %a)
+  ret %v2f32 %1
+}
+; CHECK: test_v2f32.fceil:
+define %v2f32 @test_v2f32.fceil(%v2f32 %a) {
+  ; CHECK: ceil
+  %1 = call %v2f32 @llvm.fceil.v2f32(%v2f32 %a)
+  ret %v2f32 %1
+}
+; CHECK: test_v2f32.trunc:
+define %v2f32 @test_v2f32.trunc(%v2f32 %a) {
+  ; CHECK: trunc
+  %1 = call %v2f32 @llvm.trunc.v2f32(%v2f32 %a)
+  ret %v2f32 %1
+}
+; CHECK: test_v2f32.rint:
+define %v2f32 @test_v2f32.rint(%v2f32 %a) {
+  ; CHECK: rint
+  %1 = call %v2f32 @llvm.rint.v2f32(%v2f32 %a)
+  ret %v2f32 %1
+}
+; CHECK: test_v2f32.nearbyint:
+define %v2f32 @test_v2f32.nearbyint(%v2f32 %a) {
+  ; CHECK: nearbyint
+  %1 = call %v2f32 @llvm.nearbyint.v2f32(%v2f32 %a)
+  ret %v2f32 %1
+}
+
+declare %v2f32 @llvm.sqrt.v2f32(%v2f32) #0
+declare %v2f32 @llvm.powi.v2f32(%v2f32, i32) #0
+declare %v2f32 @llvm.sin.v2f32(%v2f32) #0
+declare %v2f32 @llvm.cos.v2f32(%v2f32) #0
+declare %v2f32 @llvm.pow.v2f32(%v2f32, %v2f32) #0
+declare %v2f32 @llvm.exp.v2f32(%v2f32) #0
+declare %v2f32 @llvm.exp2.v2f32(%v2f32) #0
+declare %v2f32 @llvm.log.v2f32(%v2f32) #0
+declare %v2f32 @llvm.log10.v2f32(%v2f32) #0
+declare %v2f32 @llvm.log2.v2f32(%v2f32) #0
+declare %v2f32 @llvm.fma.v2f32(%v2f32, %v2f32, %v2f32) #0
+declare %v2f32 @llvm.fabs.v2f32(%v2f32) #0
+declare %v2f32 @llvm.floor.v2f32(%v2f32) #0
+declare %v2f32 @llvm.fceil.v2f32(%v2f32) #0
+declare %v2f32 @llvm.trunc.v2f32(%v2f32) #0
+declare %v2f32 @llvm.rint.v2f32(%v2f32) #0
+declare %v2f32 @llvm.nearbyint.v2f32(%v2f32) #0
+
+;;;
+
+%v4f32 = type <4 x float>
+; CHECK: test_v4f32.sqrt:
+define %v4f32 @test_v4f32.sqrt(%v4f32 %a) {
+  ; CHECK: sqrt
+  %1 = call %v4f32 @llvm.sqrt.v4f32(%v4f32 %a)
+  ret %v4f32 %1
+}
+; CHECK: test_v4f32.powi:
+define %v4f32 @test_v4f32.powi(%v4f32 %a, i32 %b) {
+  ; CHECK: pow
+  %1 = call %v4f32 @llvm.powi.v4f32(%v4f32 %a, i32 %b)
+  ret %v4f32 %1
+}
+; CHECK: test_v4f32.sin:
+define %v4f32 @test_v4f32.sin(%v4f32 %a) {
+  ; CHECK: sin
+  %1 = call %v4f32 @llvm.sin.v4f32(%v4f32 %a)
+  ret %v4f32 %1
+}
+; CHECK: test_v4f32.cos:
+define %v4f32 @test_v4f32.cos(%v4f32 %a) {
+  ; CHECK: cos
+  %1 = call %v4f32 @llvm.cos.v4f32(%v4f32 %a)
+  ret %v4f32 %1
+}
+; CHECK: test_v4f32.pow:
+define %v4f32 @test_v4f32.pow(%v4f32 %a, %v4f32 %b) {
+  ; CHECK: pow
+  %1 = call %v4f32 @llvm.pow.v4f32(%v4f32 %a, %v4f32 %b)
+  ret %v4f32 %1
+}
+; CHECK: test_v4f32.exp:
+define %v4f32 @test_v4f32.exp(%v4f32 %a) {
+  ; CHECK: exp
+  %1 = call %v4f32 @llvm.exp.v4f32(%v4f32 %a)
+  ret %v4f32 %1
+}
+; CHECK: test_v4f32.exp2:
+define %v4f32 @test_v4f32.exp2(%v4f32 %a) {
+  ; CHECK: exp
+  %1 = call %v4f32 @llvm.exp2.v4f32(%v4f32 %a)
+  ret %v4f32 %1
+}
+; CHECK: test_v4f32.log:
+define %v4f32 @test_v4f32.log(%v4f32 %a) {
+  ; CHECK: log
+  %1 = call %v4f32 @llvm.log.v4f32(%v4f32 %a)
+  ret %v4f32 %1
+}
+; CHECK: test_v4f32.log10:
+define %v4f32 @test_v4f32.log10(%v4f32 %a) {
+  ; CHECK: log
+  %1 = call %v4f32 @llvm.log10.v4f32(%v4f32 %a)
+  ret %v4f32 %1
+}
+; CHECK: test_v4f32.log2:
+define %v4f32 @test_v4f32.log2(%v4f32 %a) {
+  ; CHECK: log
+  %1 = call %v4f32 @llvm.log2.v4f32(%v4f32 %a)
+  ret %v4f32 %1
+}
+; CHECK: test_v4f32.fma:
+define %v4f32 @test_v4f32.fma(%v4f32 %a, %v4f32 %b, %v4f32 %c) {
+  ; CHECK: fma
+  %1 = call %v4f32 @llvm.fma.v4f32(%v4f32 %a, %v4f32 %b, %v4f32 %c)
+  ret %v4f32 %1
+}
+; CHECK: test_v4f32.fabs:
+define %v4f32 @test_v4f32.fabs(%v4f32 %a) {
+  ; CHECK: fabs
+  %1 = call %v4f32 @llvm.fabs.v4f32(%v4f32 %a)
+  ret %v4f32 %1
+}
+; CHECK: test_v4f32.floor:
+define %v4f32 @test_v4f32.floor(%v4f32 %a) {
+  ; CHECK: floor
+  %1 = call %v4f32 @llvm.floor.v4f32(%v4f32 %a)
+  ret %v4f32 %1
+}
+; CHECK: test_v4f32.fceil:
+define %v4f32 @test_v4f32.fceil(%v4f32 %a) {
+  ; CHECK: ceil
+  %1 = call %v4f32 @llvm.fceil.v4f32(%v4f32 %a)
+  ret %v4f32 %1
+}
+; CHECK: test_v4f32.trunc:
+define %v4f32 @test_v4f32.trunc(%v4f32 %a) {
+  ; CHECK: trunc
+  %1 = call %v4f32 @llvm.trunc.v4f32(%v4f32 %a)
+  ret %v4f32 %1
+}
+; CHECK: test_v4f32.rint:
+define %v4f32 @test_v4f32.rint(%v4f32 %a) {
+  ; CHECK: rint
+  %1 = call %v4f32 @llvm.rint.v4f32(%v4f32 %a)
+  ret %v4f32 %1
+}
+; CHECK: test_v4f32.nearbyint:
+define %v4f32 @test_v4f32.nearbyint(%v4f32 %a) {
+  ; CHECK: nearbyint
+  %1 = call %v4f32 @llvm.nearbyint.v4f32(%v4f32 %a)
+  ret %v4f32 %1
+}
+
+declare %v4f32 @llvm.sqrt.v4f32(%v4f32) #0
+declare %v4f32 @llvm.powi.v4f32(%v4f32, i32) #0
+declare %v4f32 @llvm.sin.v4f32(%v4f32) #0
+declare %v4f32 @llvm.cos.v4f32(%v4f32) #0
+declare %v4f32 @llvm.pow.v4f32(%v4f32, %v4f32) #0
+declare %v4f32 @llvm.exp.v4f32(%v4f32) #0
+declare %v4f32 @llvm.exp2.v4f32(%v4f32) #0
+declare %v4f32 @llvm.log.v4f32(%v4f32) #0
+declare %v4f32 @llvm.log10.v4f32(%v4f32) #0
+declare %v4f32 @llvm.log2.v4f32(%v4f32) #0
+declare %v4f32 @llvm.fma.v4f32(%v4f32, %v4f32, %v4f32) #0
+declare %v4f32 @llvm.fabs.v4f32(%v4f32) #0
+declare %v4f32 @llvm.floor.v4f32(%v4f32) #0
+declare %v4f32 @llvm.fceil.v4f32(%v4f32) #0
+declare %v4f32 @llvm.trunc.v4f32(%v4f32) #0
+declare %v4f32 @llvm.rint.v4f32(%v4f32) #0
+declare %v4f32 @llvm.nearbyint.v4f32(%v4f32) #0
+
+;;; Double vector
+
+%v2f64 = type <2 x double>
+; CHECK: test_v2f64.sqrt:
+define %v2f64 @test_v2f64.sqrt(%v2f64 %a) {
+  ; CHECK: sqrt
+  %1 = call %v2f64 @llvm.sqrt.v2f64(%v2f64 %a)
+  ret %v2f64 %1
+}
+; CHECK: test_v2f64.powi:
+define %v2f64 @test_v2f64.powi(%v2f64 %a, i32 %b) {
+  ; CHECK: pow
+  %1 = call %v2f64 @llvm.powi.v2f64(%v2f64 %a, i32 %b)
+  ret %v2f64 %1
+}
+; CHECK: test_v2f64.sin:
+define %v2f64 @test_v2f64.sin(%v2f64 %a) {
+  ; CHECK: sin
+  %1 = call %v2f64 @llvm.sin.v2f64(%v2f64 %a)
+  ret %v2f64 %1
+}
+; CHECK: test_v2f64.cos:
+define %v2f64 @test_v2f64.cos(%v2f64 %a) {
+  ; CHECK: cos
+  %1 = call %v2f64 @llvm.cos.v2f64(%v2f64 %a)
+  ret %v2f64 %1
+}
+; CHECK: test_v2f64.pow:
+define %v2f64 @test_v2f64.pow(%v2f64 %a, %v2f64 %b) {
+  ; CHECK: pow
+  %1 = call %v2f64 @llvm.pow.v2f64(%v2f64 %a, %v2f64 %b)
+  ret %v2f64 %1
+}
+; CHECK: test_v2f64.exp:
+define %v2f64 @test_v2f64.exp(%v2f64 %a) {
+  ; CHECK: exp
+  %1 = call %v2f64 @llvm.exp.v2f64(%v2f64 %a)
+  ret %v2f64 %1
+}
+; CHECK: test_v2f64.exp2:
+define %v2f64 @test_v2f64.exp2(%v2f64 %a) {
+  ; CHECK: exp
+  %1 = call %v2f64 @llvm.exp2.v2f64(%v2f64 %a)
+  ret %v2f64 %1
+}
+; CHECK: test_v2f64.log:
+define %v2f64 @test_v2f64.log(%v2f64 %a) {
+  ; CHECK: log
+  %1 = call %v2f64 @llvm.log.v2f64(%v2f64 %a)
+  ret %v2f64 %1
+}
+; CHECK: test_v2f64.log10:
+define %v2f64 @test_v2f64.log10(%v2f64 %a) {
+  ; CHECK: log
+  %1 = call %v2f64 @llvm.log10.v2f64(%v2f64 %a)
+  ret %v2f64 %1
+}
+; CHECK: test_v2f64.log2:
+define %v2f64 @test_v2f64.log2(%v2f64 %a) {
+  ; CHECK: log
+  %1 = call %v2f64 @llvm.log2.v2f64(%v2f64 %a)
+  ret %v2f64 %1
+}
+; CHECK: test_v2f64.fma:
+define %v2f64 @test_v2f64.fma(%v2f64 %a, %v2f64 %b, %v2f64 %c) {
+  ; CHECK: fma
+  %1 = call %v2f64 @llvm.fma.v2f64(%v2f64 %a, %v2f64 %b, %v2f64 %c)
+  ret %v2f64 %1
+}
+; CHECK: test_v2f64.fabs:
+define %v2f64 @test_v2f64.fabs(%v2f64 %a) {
+  ; CHECK: fabs
+  %1 = call %v2f64 @llvm.fabs.v2f64(%v2f64 %a)
+  ret %v2f64 %1
+}
+; CHECK: test_v2f64.floor:
+define %v2f64 @test_v2f64.floor(%v2f64 %a) {
+  ; CHECK: floor
+  %1 = call %v2f64 @llvm.floor.v2f64(%v2f64 %a)
+  ret %v2f64 %1
+}
+; CHECK: test_v2f64.fceil:
+define %v2f64 @test_v2f64.fceil(%v2f64 %a) {
+  ; CHECK: ceil
+  %1 = call %v2f64 @llvm.fceil.v2f64(%v2f64 %a)
+  ret %v2f64 %1
+}
+; CHECK: test_v2f64.trunc:
+define %v2f64 @test_v2f64.trunc(%v2f64 %a) {
+  ; CHECK: trunc
+  %1 = call %v2f64 @llvm.trunc.v2f64(%v2f64 %a)
+  ret %v2f64 %1
+}
+; CHECK: test_v2f64.rint:
+define %v2f64 @test_v2f64.rint(%v2f64 %a) {
+  ; CHECK: rint
+  %1 = call %v2f64 @llvm.rint.v2f64(%v2f64 %a)
+  ret %v2f64 %1
+}
+; CHECK: test_v2f64.nearbyint:
+define %v2f64 @test_v2f64.nearbyint(%v2f64 %a) {
+  ; CHECK: nearbyint
+  %1 = call %v2f64 @llvm.nearbyint.v2f64(%v2f64 %a)
+  ret %v2f64 %1
+}
+
+declare %v2f64 @llvm.sqrt.v2f64(%v2f64) #0
+declare %v2f64 @llvm.powi.v2f64(%v2f64, i32) #0
+declare %v2f64 @llvm.sin.v2f64(%v2f64) #0
+declare %v2f64 @llvm.cos.v2f64(%v2f64) #0
+declare %v2f64 @llvm.pow.v2f64(%v2f64, %v2f64) #0
+declare %v2f64 @llvm.exp.v2f64(%v2f64) #0
+declare %v2f64 @llvm.exp2.v2f64(%v2f64) #0
+declare %v2f64 @llvm.log.v2f64(%v2f64) #0
+declare %v2f64 @llvm.log10.v2f64(%v2f64) #0
+declare %v2f64 @llvm.log2.v2f64(%v2f64) #0
+declare %v2f64 @llvm.fma.v2f64(%v2f64, %v2f64, %v2f64) #0
+declare %v2f64 @llvm.fabs.v2f64(%v2f64) #0
+declare %v2f64 @llvm.floor.v2f64(%v2f64) #0
+declare %v2f64 @llvm.fceil.v2f64(%v2f64) #0
+declare %v2f64 @llvm.trunc.v2f64(%v2f64) #0
+declare %v2f64 @llvm.rint.v2f64(%v2f64) #0
+declare %v2f64 @llvm.nearbyint.v2f64(%v2f64) #0
+
+attributes #0 = { nounwind readonly }
diff --git a/test/CodeGen/ARM/vld1.ll b/test/CodeGen/ARM/vld1.ll
index e524395c501a..994f05dacb84 100644
--- a/test/CodeGen/ARM/vld1.ll
+++ b/test/CodeGen/ARM/vld1.ll
@@ -4,7 +4,7 @@
 define <8 x i8> @vld1i8(i8* %A) nounwind {
 ;CHECK: vld1i8:
 ;Check the alignment value.  Max for this instruction is 64 bits:
-;CHECK: vld1.8 {d16}, [r0, :64]
+;CHECK: vld1.8 {d16}, [r0:64]
 	%tmp1 = call <8 x i8> @llvm.arm.neon.vld1.v8i8(i8* %A, i32 16)
 	ret <8 x i8> %tmp1
 }
@@ -68,7 +68,7 @@ define <1 x i64> @vld1i64(i64* %A) nounwind {
 define <16 x i8> @vld1Qi8(i8* %A) nounwind {
 ;CHECK: vld1Qi8:
 ;Check the alignment value.  Max for this instruction is 128 bits:
-;CHECK: vld1.8 {d16, d17}, [r0, :64]
+;CHECK: vld1.8 {d16, d17}, [r0:64]
 	%tmp1 = call <16 x i8> @llvm.arm.neon.vld1.v16i8(i8* %A, i32 8)
 	ret <16 x i8> %tmp1
 }
@@ -76,7 +76,7 @@ define <16 x i8> @vld1Qi8(i8* %A) nounwind {
 ;Check for a post-increment updating load.
 define <16 x i8> @vld1Qi8_update(i8** %ptr) nounwind {
 ;CHECK: vld1Qi8_update:
-;CHECK: vld1.8 {d16, d17}, [{{r[0-9]+}}, :64]!
+;CHECK: vld1.8 {d16, d17}, [{{r[0-9]+}}:64]!
 	%A = load i8** %ptr
 	%tmp1 = call <16 x i8> @llvm.arm.neon.vld1.v16i8(i8* %A, i32 8)
 	%tmp2 = getelementptr i8* %A, i32 16
@@ -87,7 +87,7 @@ define <16 x i8> @vld1Qi8_update(i8** %ptr) nounwind {
 define <8 x i16> @vld1Qi16(i16* %A) nounwind {
 ;CHECK: vld1Qi16:
 ;Check the alignment value.  Max for this instruction is 128 bits:
-;CHECK: vld1.16 {d16, d17}, [r0, :128]
+;CHECK: vld1.16 {d16, d17}, [r0:128]
 	%tmp0 = bitcast i16* %A to i8*
 	%tmp1 = call <8 x i16> @llvm.arm.neon.vld1.v8i16(i8* %tmp0, i32 32)
 	ret <8 x i16> %tmp1
diff --git a/test/CodeGen/ARM/vld2.ll b/test/CodeGen/ARM/vld2.ll
index 29b379465db5..caa016e929d8 100644
--- a/test/CodeGen/ARM/vld2.ll
+++ b/test/CodeGen/ARM/vld2.ll
@@ -14,7 +14,7 @@
 define <8 x i8> @vld2i8(i8* %A) nounwind {
 ;CHECK: vld2i8:
 ;Check the alignment value.  Max for this instruction is 128 bits:
-;CHECK: vld2.8 {d16, d17}, [r0, :64]
+;CHECK: vld2.8 {d16, d17}, [r0:64]
 	%tmp1 = call %struct.__neon_int8x8x2_t @llvm.arm.neon.vld2.v8i8(i8* %A, i32 8)
         %tmp2 = extractvalue %struct.__neon_int8x8x2_t %tmp1, 0
         %tmp3 = extractvalue %struct.__neon_int8x8x2_t %tmp1, 1
@@ -25,7 +25,7 @@ define <8 x i8> @vld2i8(i8* %A) nounwind {
 define <4 x i16> @vld2i16(i16* %A) nounwind {
 ;CHECK: vld2i16:
 ;Check the alignment value.  Max for this instruction is 128 bits:
-;CHECK: vld2.16 {d16, d17}, [r0, :128]
+;CHECK: vld2.16 {d16, d17}, [r0:128]
 	%tmp0 = bitcast i16* %A to i8*
 	%tmp1 = call %struct.__neon_int16x4x2_t @llvm.arm.neon.vld2.v4i16(i8* %tmp0, i32 32)
         %tmp2 = extractvalue %struct.__neon_int16x4x2_t %tmp1, 0
@@ -74,7 +74,7 @@ define <2 x float> @vld2f_update(float** %ptr) nounwind {
 define <1 x i64> @vld2i64(i64* %A) nounwind {
 ;CHECK: vld2i64:
 ;Check the alignment value.  Max for this instruction is 128 bits:
-;CHECK: vld1.64 {d16, d17}, [r0, :128]
+;CHECK: vld1.64 {d16, d17}, [r0:128]
 	%tmp0 = bitcast i64* %A to i8*
 	%tmp1 = call %struct.__neon_int64x1x2_t @llvm.arm.neon.vld2.v1i64(i8* %tmp0, i32 32)
         %tmp2 = extractvalue %struct.__neon_int64x1x2_t %tmp1, 0
@@ -86,7 +86,7 @@ define <1 x i64> @vld2i64(i64* %A) nounwind {
 define <16 x i8> @vld2Qi8(i8* %A) nounwind {
 ;CHECK: vld2Qi8:
 ;Check the alignment value.  Max for this instruction is 256 bits:
-;CHECK: vld2.8 {d16, d17, d18, d19}, [r0, :64]
+;CHECK: vld2.8 {d16, d17, d18, d19}, [r0:64]
 	%tmp1 = call %struct.__neon_int8x16x2_t @llvm.arm.neon.vld2.v16i8(i8* %A, i32 8)
         %tmp2 = extractvalue %struct.__neon_int8x16x2_t %tmp1, 0
         %tmp3 = extractvalue %struct.__neon_int8x16x2_t %tmp1, 1
@@ -97,7 +97,7 @@ define <16 x i8> @vld2Qi8(i8* %A) nounwind {
 ;Check for a post-increment updating load with register increment.
 define <16 x i8> @vld2Qi8_update(i8** %ptr, i32 %inc) nounwind {
 ;CHECK: vld2Qi8_update:
-;CHECK: vld2.8 {d16, d17, d18, d19}, [r2, :128], r1
+;CHECK: vld2.8 {d16, d17, d18, d19}, [r2:128], r1
 	%A = load i8** %ptr
 	%tmp1 = call %struct.__neon_int8x16x2_t @llvm.arm.neon.vld2.v16i8(i8* %A, i32 16)
         %tmp2 = extractvalue %struct.__neon_int8x16x2_t %tmp1, 0
@@ -111,7 +111,7 @@ define <16 x i8> @vld2Qi8_update(i8** %ptr, i32 %inc) nounwind {
 define <8 x i16> @vld2Qi16(i16* %A) nounwind {
 ;CHECK: vld2Qi16:
 ;Check the alignment value.  Max for this instruction is 256 bits:
-;CHECK: vld2.16 {d16, d17, d18, d19}, [r0, :128]
+;CHECK: vld2.16 {d16, d17, d18, d19}, [r0:128]
 	%tmp0 = bitcast i16* %A to i8*
 	%tmp1 = call %struct.__neon_int16x8x2_t @llvm.arm.neon.vld2.v8i16(i8* %tmp0, i32 16)
         %tmp2 = extractvalue %struct.__neon_int16x8x2_t %tmp1, 0
@@ -123,7 +123,7 @@ define <8 x i16> @vld2Qi16(i16* %A) nounwind {
 define <4 x i32> @vld2Qi32(i32* %A) nounwind {
 ;CHECK: vld2Qi32:
 ;Check the alignment value.  Max for this instruction is 256 bits:
-;CHECK: vld2.32 {d16, d17, d18, d19}, [r0, :256]
+;CHECK: vld2.32 {d16, d17, d18, d19}, [r0:256]
 	%tmp0 = bitcast i32* %A to i8*
 	%tmp1 = call %struct.__neon_int32x4x2_t @llvm.arm.neon.vld2.v4i32(i8* %tmp0, i32 64)
         %tmp2 = extractvalue %struct.__neon_int32x4x2_t %tmp1, 0
diff --git a/test/CodeGen/ARM/vld3.ll b/test/CodeGen/ARM/vld3.ll
index b495319830b0..ad63e1f716b2 100644
--- a/test/CodeGen/ARM/vld3.ll
+++ b/test/CodeGen/ARM/vld3.ll
@@ -15,7 +15,7 @@
 define <8 x i8> @vld3i8(i8* %A) nounwind {
 ;CHECK: vld3i8:
 ;Check the alignment value.  Max for this instruction is 64 bits:
-;CHECK: vld3.8 {d16, d17, d18}, [r0, :64]
+;CHECK: vld3.8 {d16, d17, d18}, [r0:64]
 	%tmp1 = call %struct.__neon_int8x8x3_t @llvm.arm.neon.vld3.v8i8(i8* %A, i32 32)
         %tmp2 = extractvalue %struct.__neon_int8x8x3_t %tmp1, 0
         %tmp3 = extractvalue %struct.__neon_int8x8x3_t %tmp1, 2
@@ -74,7 +74,7 @@ define <2 x float> @vld3f(float* %A) nounwind {
 define <1 x i64> @vld3i64(i64* %A) nounwind {
 ;CHECK: vld3i64:
 ;Check the alignment value.  Max for this instruction is 64 bits:
-;CHECK: vld1.64 {d16, d17, d18}, [r0, :64]
+;CHECK: vld1.64 {d16, d17, d18}, [r0:64]
 	%tmp0 = bitcast i64* %A to i8*
 	%tmp1 = call %struct.__neon_int64x1x3_t @llvm.arm.neon.vld3.v1i64(i8* %tmp0, i32 16)
         %tmp2 = extractvalue %struct.__neon_int64x1x3_t %tmp1, 0
@@ -86,8 +86,8 @@ define <1 x i64> @vld3i64(i64* %A) nounwind {
 define <16 x i8> @vld3Qi8(i8* %A) nounwind {
 ;CHECK: vld3Qi8:
 ;Check the alignment value.  Max for this instruction is 64 bits:
-;CHECK: vld3.8 {d16, d18, d20}, [r0, :64]!
-;CHECK: vld3.8 {d17, d19, d21}, [r0, :64]
+;CHECK: vld3.8 {d16, d18, d20}, [r0:64]!
+;CHECK: vld3.8 {d17, d19, d21}, [r0:64]
 	%tmp1 = call %struct.__neon_int8x16x3_t @llvm.arm.neon.vld3.v16i8(i8* %A, i32 32)
         %tmp2 = extractvalue %struct.__neon_int8x16x3_t %tmp1, 0
         %tmp3 = extractvalue %struct.__neon_int8x16x3_t %tmp1, 2
diff --git a/test/CodeGen/ARM/vld4.ll b/test/CodeGen/ARM/vld4.ll
index 59a73db3187e..9ee5fe46eea2 100644
--- a/test/CodeGen/ARM/vld4.ll
+++ b/test/CodeGen/ARM/vld4.ll
@@ -14,7 +14,7 @@
 define <8 x i8> @vld4i8(i8* %A) nounwind {
 ;CHECK: vld4i8:
 ;Check the alignment value.  Max for this instruction is 256 bits:
-;CHECK: vld4.8 {d16, d17, d18, d19}, [r0, :64]
+;CHECK: vld4.8 {d16, d17, d18, d19}, [r0:64]
 	%tmp1 = call %struct.__neon_int8x8x4_t @llvm.arm.neon.vld4.v8i8(i8* %A, i32 8)
         %tmp2 = extractvalue %struct.__neon_int8x8x4_t %tmp1, 0
         %tmp3 = extractvalue %struct.__neon_int8x8x4_t %tmp1, 2
@@ -25,7 +25,7 @@ define <8 x i8> @vld4i8(i8* %A) nounwind {
 ;Check for a post-increment updating load with register increment.
 define <8 x i8> @vld4i8_update(i8** %ptr, i32 %inc) nounwind {
 ;CHECK: vld4i8_update:
-;CHECK: vld4.8 {d16, d17, d18, d19}, [r2, :128], r1
+;CHECK: vld4.8 {d16, d17, d18, d19}, [r2:128], r1
 	%A = load i8** %ptr
 	%tmp1 = call %struct.__neon_int8x8x4_t @llvm.arm.neon.vld4.v8i8(i8* %A, i32 16)
 	%tmp2 = extractvalue %struct.__neon_int8x8x4_t %tmp1, 0
@@ -39,7 +39,7 @@ define <8 x i8> @vld4i8_update(i8** %ptr, i32 %inc) nounwind {
 define <4 x i16> @vld4i16(i16* %A) nounwind {
 ;CHECK: vld4i16:
 ;Check the alignment value.  Max for this instruction is 256 bits:
-;CHECK: vld4.16 {d16, d17, d18, d19}, [r0, :128]
+;CHECK: vld4.16 {d16, d17, d18, d19}, [r0:128]
 	%tmp0 = bitcast i16* %A to i8*
 	%tmp1 = call %struct.__neon_int16x4x4_t @llvm.arm.neon.vld4.v4i16(i8* %tmp0, i32 16)
         %tmp2 = extractvalue %struct.__neon_int16x4x4_t %tmp1, 0
@@ -51,7 +51,7 @@ define <4 x i16> @vld4i16(i16* %A) nounwind {
 define <2 x i32> @vld4i32(i32* %A) nounwind {
 ;CHECK: vld4i32:
 ;Check the alignment value.  Max for this instruction is 256 bits:
-;CHECK: vld4.32 {d16, d17, d18, d19}, [r0, :256]
+;CHECK: vld4.32 {d16, d17, d18, d19}, [r0:256]
 	%tmp0 = bitcast i32* %A to i8*
 	%tmp1 = call %struct.__neon_int32x2x4_t @llvm.arm.neon.vld4.v2i32(i8* %tmp0, i32 32)
         %tmp2 = extractvalue %struct.__neon_int32x2x4_t %tmp1, 0
@@ -74,7 +74,7 @@ define <2 x float> @vld4f(float* %A) nounwind {
 define <1 x i64> @vld4i64(i64* %A) nounwind {
 ;CHECK: vld4i64:
 ;Check the alignment value.  Max for this instruction is 256 bits:
-;CHECK: vld1.64 {d16, d17, d18, d19}, [r0, :256]
+;CHECK: vld1.64 {d16, d17, d18, d19}, [r0:256]
 	%tmp0 = bitcast i64* %A to i8*
 	%tmp1 = call %struct.__neon_int64x1x4_t @llvm.arm.neon.vld4.v1i64(i8* %tmp0, i32 64)
         %tmp2 = extractvalue %struct.__neon_int64x1x4_t %tmp1, 0
@@ -86,8 +86,8 @@ define <1 x i64> @vld4i64(i64* %A) nounwind {
 define <16 x i8> @vld4Qi8(i8* %A) nounwind {
 ;CHECK: vld4Qi8:
 ;Check the alignment value.  Max for this instruction is 256 bits:
-;CHECK: vld4.8 {d16, d18, d20, d22}, [r0, :256]!
-;CHECK: vld4.8 {d17, d19, d21, d23}, [r0, :256]
+;CHECK: vld4.8 {d16, d18, d20, d22}, [r0:256]!
+;CHECK: vld4.8 {d17, d19, d21, d23}, [r0:256]
 	%tmp1 = call %struct.__neon_int8x16x4_t @llvm.arm.neon.vld4.v16i8(i8* %A, i32 64)
         %tmp2 = extractvalue %struct.__neon_int8x16x4_t %tmp1, 0
         %tmp3 = extractvalue %struct.__neon_int8x16x4_t %tmp1, 2
@@ -111,8 +111,8 @@ define <8 x i16> @vld4Qi16(i16* %A) nounwind {
 ;Check for a post-increment updating load. 
 define <8 x i16> @vld4Qi16_update(i16** %ptr) nounwind {
 ;CHECK: vld4Qi16_update:
-;CHECK: vld4.16 {d16, d18, d20, d22}, [r1, :64]!
-;CHECK: vld4.16 {d17, d19, d21, d23}, [r1, :64]!
+;CHECK: vld4.16 {d16, d18, d20, d22}, [r1:64]!
+;CHECK: vld4.16 {d17, d19, d21, d23}, [r1:64]!
 	%A = load i16** %ptr
 	%tmp0 = bitcast i16* %A to i8*
 	%tmp1 = call %struct.__neon_int16x8x4_t @llvm.arm.neon.vld4.v8i16(i8* %tmp0, i32 8)
diff --git a/test/CodeGen/ARM/vlddup.ll b/test/CodeGen/ARM/vlddup.ll
index c69473f87f98..7c7319c090ba 100644
--- a/test/CodeGen/ARM/vlddup.ll
+++ b/test/CodeGen/ARM/vlddup.ll
@@ -13,7 +13,7 @@ define <8 x i8> @vld1dupi8(i8* %A) nounwind {
 define <4 x i16> @vld1dupi16(i16* %A) nounwind {
 ;CHECK: vld1dupi16:
 ;Check the alignment value.  Max for this instruction is 16 bits:
-;CHECK: vld1.16 {d16[]}, [r0, :16]
+;CHECK: vld1.16 {d16[]}, [r0:16]
 	%tmp1 = load i16* %A, align 8
 	%tmp2 = insertelement <4 x i16> undef, i16 %tmp1, i32 0
 	%tmp3 = shufflevector <4 x i16> %tmp2, <4 x i16> undef, <4 x i32> zeroinitializer
@@ -23,7 +23,7 @@ define <4 x i16> @vld1dupi16(i16* %A) nounwind {
 define <2 x i32> @vld1dupi32(i32* %A) nounwind {
 ;CHECK: vld1dupi32:
 ;Check the alignment value.  Max for this instruction is 32 bits:
-;CHECK: vld1.32 {d16[]}, [r0, :32]
+;CHECK: vld1.32 {d16[]}, [r0:32]
 	%tmp1 = load i32* %A, align 8
 	%tmp2 = insertelement <2 x i32> undef, i32 %tmp1, i32 0
 	%tmp3 = shufflevector <2 x i32> %tmp2, <2 x i32> undef, <2 x i32> zeroinitializer
@@ -32,7 +32,7 @@ define <2 x i32> @vld1dupi32(i32* %A) nounwind {
 
 define <2 x float> @vld1dupf(float* %A) nounwind {
 ;CHECK: vld1dupf:
-;CHECK: vld1.32 {d16[]}, [r0, :32]
+;CHECK: vld1.32 {d16[]}, [r0:32]
 	%tmp0 = load float* %A
         %tmp1 = insertelement <2 x float> undef, float %tmp0, i32 0
         %tmp2 = shufflevector <2 x float> %tmp1, <2 x float> undef, <2 x i32> zeroinitializer
@@ -51,7 +51,7 @@ define <16 x i8> @vld1dupQi8(i8* %A) nounwind {
 
 define <4 x float> @vld1dupQf(float* %A) nounwind {
 ;CHECK: vld1dupQf:
-;CHECK: vld1.32 {d16[], d17[]}, [r0, :32]
+;CHECK: vld1.32 {d16[], d17[]}, [r0:32]
         %tmp0 = load float* %A
         %tmp1 = insertelement <4 x float> undef, float %tmp0, i32 0
         %tmp2 = shufflevector <4 x float> %tmp1, <4 x float> undef, <4 x i32> zeroinitializer
@@ -109,7 +109,7 @@ define <4 x i16> @vld2dupi16_update(i16** %ptr) nounwind {
 define <2 x i32> @vld2dupi32(i8* %A) nounwind {
 ;CHECK: vld2dupi32:
 ;Check the alignment value.  Max for this instruction is 64 bits:
-;CHECK: vld2.32 {d16[], d17[]}, [r0, :64]
+;CHECK: vld2.32 {d16[], d17[]}, [r0:64]
 	%tmp0 = tail call %struct.__neon_int2x32x2_t @llvm.arm.neon.vld2lane.v2i32(i8* %A, <2 x i32> undef, <2 x i32> undef, i32 0, i32 16)
 	%tmp1 = extractvalue %struct.__neon_int2x32x2_t %tmp0, 0
 	%tmp2 = shufflevector <2 x i32> %tmp1, <2 x i32> undef, <2 x i32> zeroinitializer
@@ -194,7 +194,7 @@ define <2 x i32> @vld4dupi32(i8* %A) nounwind {
 ;CHECK: vld4dupi32:
 ;Check the alignment value.  An 8-byte alignment is allowed here even though
 ;it is smaller than the total size of the memory being loaded.
-;CHECK: vld4.32 {d16[], d17[], d18[], d19[]}, [r0, :64]
+;CHECK: vld4.32 {d16[], d17[], d18[], d19[]}, [r0:64]
 	%tmp0 = tail call %struct.__neon_int32x2x4_t @llvm.arm.neon.vld4lane.v2i32(i8* %A, <2 x i32> undef, <2 x i32> undef, <2 x i32> undef, <2 x i32> undef, i32 0, i32 8)
 	%tmp1 = extractvalue %struct.__neon_int32x2x4_t %tmp0, 0
 	%tmp2 = shufflevector <2 x i32> %tmp1, <2 x i32> undef, <2 x i32> zeroinitializer
diff --git a/test/CodeGen/ARM/vldlane.ll b/test/CodeGen/ARM/vldlane.ll
index 7bd0cbda02b1..f35fa92f5dc7 100644
--- a/test/CodeGen/ARM/vldlane.ll
+++ b/test/CodeGen/ARM/vldlane.ll
@@ -14,7 +14,7 @@ define <8 x i8> @vld1lanei8(i8* %A, <8 x i8>* %B) nounwind {
 define <4 x i16> @vld1lanei16(i16* %A, <4 x i16>* %B) nounwind {
 ;CHECK: vld1lanei16:
 ;Check the alignment value.  Max for this instruction is 16 bits:
-;CHECK: vld1.16 {d16[2]}, [r0, :16]
+;CHECK: vld1.16 {d16[2]}, [r0:16]
 	%tmp1 = load <4 x i16>* %B
 	%tmp2 = load i16* %A, align 8
 	%tmp3 = insertelement <4 x i16> %tmp1, i16 %tmp2, i32 2
@@ -24,7 +24,7 @@ define <4 x i16> @vld1lanei16(i16* %A, <4 x i16>* %B) nounwind {
 define <2 x i32> @vld1lanei32(i32* %A, <2 x i32>* %B) nounwind {
 ;CHECK: vld1lanei32:
 ;Check the alignment value.  Max for this instruction is 32 bits:
-;CHECK: vld1.32 {d16[1]}, [r0, :32]
+;CHECK: vld1.32 {d16[1]}, [r0:32]
 	%tmp1 = load <2 x i32>* %B
 	%tmp2 = load i32* %A, align 8
 	%tmp3 = insertelement <2 x i32> %tmp1, i32 %tmp2, i32 1
@@ -34,7 +34,7 @@ define <2 x i32> @vld1lanei32(i32* %A, <2 x i32>* %B) nounwind {
 define <2 x i32> @vld1lanei32a32(i32* %A, <2 x i32>* %B) nounwind {
 ;CHECK: vld1lanei32a32:
 ;Check the alignment value.  Legal values are none or :32.
-;CHECK: vld1.32 {d16[1]}, [r0, :32]
+;CHECK: vld1.32 {d16[1]}, [r0:32]
 	%tmp1 = load <2 x i32>* %B
 	%tmp2 = load i32* %A, align 4
 	%tmp3 = insertelement <2 x i32> %tmp1, i32 %tmp2, i32 1
@@ -43,7 +43,7 @@ define <2 x i32> @vld1lanei32a32(i32* %A, <2 x i32>* %B) nounwind {
 
 define <2 x float> @vld1lanef(float* %A, <2 x float>* %B) nounwind {
 ;CHECK: vld1lanef:
-;CHECK: vld1.32 {d16[1]}, [r0, :32]
+;CHECK: vld1.32 {d16[1]}, [r0:32]
 	%tmp1 = load <2 x float>* %B
 	%tmp2 = load float* %A, align 4
 	%tmp3 = insertelement <2 x float> %tmp1, float %tmp2, i32 1
@@ -61,7 +61,7 @@ define <16 x i8> @vld1laneQi8(i8* %A, <16 x i8>* %B) nounwind {
 
 define <8 x i16> @vld1laneQi16(i16* %A, <8 x i16>* %B) nounwind {
 ;CHECK: vld1laneQi16:
-;CHECK: vld1.16 {d17[1]}, [r0, :16]
+;CHECK: vld1.16 {d17[1]}, [r0:16]
 	%tmp1 = load <8 x i16>* %B
 	%tmp2 = load i16* %A, align 8
 	%tmp3 = insertelement <8 x i16> %tmp1, i16 %tmp2, i32 5
@@ -70,7 +70,7 @@ define <8 x i16> @vld1laneQi16(i16* %A, <8 x i16>* %B) nounwind {
 
 define <4 x i32> @vld1laneQi32(i32* %A, <4 x i32>* %B) nounwind {
 ;CHECK: vld1laneQi32:
-;CHECK: vld1.32 {d17[1]}, [r0, :32]
+;CHECK: vld1.32 {d17[1]}, [r0:32]
 	%tmp1 = load <4 x i32>* %B
 	%tmp2 = load i32* %A, align 8
 	%tmp3 = insertelement <4 x i32> %tmp1, i32 %tmp2, i32 3
@@ -79,7 +79,7 @@ define <4 x i32> @vld1laneQi32(i32* %A, <4 x i32>* %B) nounwind {
 
 define <4 x float> @vld1laneQf(float* %A, <4 x float>* %B) nounwind {
 ;CHECK: vld1laneQf:
-;CHECK: vld1.32 {d16[0]}, [r0, :32]
+;CHECK: vld1.32 {d16[0]}, [r0:32]
 	%tmp1 = load <4 x float>* %B
 	%tmp2 = load float* %A
 	%tmp3 = insertelement <4 x float> %tmp1, float %tmp2, i32 0
@@ -98,7 +98,7 @@ define <4 x float> @vld1laneQf(float* %A, <4 x float>* %B) nounwind {
 define <8 x i8> @vld2lanei8(i8* %A, <8 x i8>* %B) nounwind {
 ;CHECK: vld2lanei8:
 ;Check the alignment value.  Max for this instruction is 16 bits:
-;CHECK: vld2.8 {d16[1], d17[1]}, [r0, :16]
+;CHECK: vld2.8 {d16[1], d17[1]}, [r0:16]
 	%tmp1 = load <8 x i8>* %B
 	%tmp2 = call %struct.__neon_int8x8x2_t @llvm.arm.neon.vld2lane.v8i8(i8* %A, <8 x i8> %tmp1, <8 x i8> %tmp1, i32 1, i32 4)
         %tmp3 = extractvalue %struct.__neon_int8x8x2_t %tmp2, 0
@@ -110,7 +110,7 @@ define <8 x i8> @vld2lanei8(i8* %A, <8 x i8>* %B) nounwind {
 define <4 x i16> @vld2lanei16(i16* %A, <4 x i16>* %B) nounwind {
 ;CHECK: vld2lanei16:
 ;Check the alignment value.  Max for this instruction is 32 bits:
-;CHECK: vld2.16 {d16[1], d17[1]}, [r0, :32]
+;CHECK: vld2.16 {d16[1], d17[1]}, [r0:32]
 	%tmp0 = bitcast i16* %A to i8*
 	%tmp1 = load <4 x i16>* %B
 	%tmp2 = call %struct.__neon_int16x4x2_t @llvm.arm.neon.vld2lane.v4i16(i8* %tmp0, <4 x i16> %tmp1, <4 x i16> %tmp1, i32 1, i32 8)
@@ -176,7 +176,7 @@ define <8 x i16> @vld2laneQi16(i16* %A, <8 x i16>* %B) nounwind {
 define <4 x i32> @vld2laneQi32(i32* %A, <4 x i32>* %B) nounwind {
 ;CHECK: vld2laneQi32:
 ;Check the alignment value.  Max for this instruction is 64 bits:
-;CHECK: vld2.32 {d17[0], d19[0]}, [{{r[0-9]+}}, :64]
+;CHECK: vld2.32 {d17[0], d19[0]}, [{{r[0-9]+}}:64]
 	%tmp0 = bitcast i32* %A to i8*
 	%tmp1 = load <4 x i32>* %B
 	%tmp2 = call %struct.__neon_int32x4x2_t @llvm.arm.neon.vld2lane.v4i32(i8* %tmp0, <4 x i32> %tmp1, <4 x i32> %tmp1, i32 2, i32 16)
@@ -354,7 +354,7 @@ declare %struct.__neon_float32x4x3_t @llvm.arm.neon.vld3lane.v4f32(i8*, <4 x flo
 define <8 x i8> @vld4lanei8(i8* %A, <8 x i8>* %B) nounwind {
 ;CHECK: vld4lanei8:
 ;Check the alignment value.  Max for this instruction is 32 bits:
-;CHECK: vld4.8 {d{{.*}}[1], d{{.*}}[1], d{{.*}}[1], d{{.*}}[1]}, [{{r[0-9]+}}, :32]
+;CHECK: vld4.8 {d{{.*}}[1], d{{.*}}[1], d{{.*}}[1], d{{.*}}[1]}, [{{r[0-9]+}}:32]
 	%tmp1 = load <8 x i8>* %B
 	%tmp2 = call %struct.__neon_int8x8x4_t @llvm.arm.neon.vld4lane.v8i8(i8* %A, <8 x i8> %tmp1, <8 x i8> %tmp1, <8 x i8> %tmp1, <8 x i8> %tmp1, i32 1, i32 8)
         %tmp3 = extractvalue %struct.__neon_int8x8x4_t %tmp2, 0
@@ -370,7 +370,7 @@ define <8 x i8> @vld4lanei8(i8* %A, <8 x i8>* %B) nounwind {
 ;Check for a post-increment updating load.
 define <8 x i8> @vld4lanei8_update(i8** %ptr, <8 x i8>* %B) nounwind {
 ;CHECK: vld4lanei8_update:
-;CHECK: vld4.8 {d16[1], d17[1], d18[1], d19[1]}, [{{r[0-9]+}}, :32]!
+;CHECK: vld4.8 {d16[1], d17[1], d18[1], d19[1]}, [{{r[0-9]+}}:32]!
 	%A = load i8** %ptr
 	%tmp1 = load <8 x i8>* %B
 	%tmp2 = call %struct.__neon_int8x8x4_t @llvm.arm.neon.vld4lane.v8i8(i8* %A, <8 x i8> %tmp1, <8 x i8> %tmp1, <8 x i8> %tmp1, <8 x i8> %tmp1, i32 1, i32 8)
@@ -408,7 +408,7 @@ define <2 x i32> @vld4lanei32(i32* %A, <2 x i32>* %B) nounwind {
 ;CHECK: vld4lanei32:
 ;Check the alignment value.  An 8-byte alignment is allowed here even though
 ;it is smaller than the total size of the memory being loaded.
-;CHECK: vld4.32 {d16[1], d17[1], d18[1], d19[1]}, [{{r[0-9]+}}, :64]
+;CHECK: vld4.32 {d16[1], d17[1], d18[1], d19[1]}, [{{r[0-9]+}}:64]
 	%tmp0 = bitcast i32* %A to i8*
 	%tmp1 = load <2 x i32>* %B
 	%tmp2 = call %struct.__neon_int32x2x4_t @llvm.arm.neon.vld4lane.v2i32(i8* %tmp0, <2 x i32> %tmp1, <2 x i32> %tmp1, <2 x i32> %tmp1, <2 x i32> %tmp1, i32 1, i32 8)
@@ -441,7 +441,7 @@ define <2 x float> @vld4lanef(float* %A, <2 x float>* %B) nounwind {
 define <8 x i16> @vld4laneQi16(i16* %A, <8 x i16>* %B) nounwind {
 ;CHECK: vld4laneQi16:
 ;Check the alignment value.  Max for this instruction is 64 bits:
-;CHECK: vld4.16 {d16[1], d18[1], d20[1], d22[1]}, [{{r[0-9]+}}, :64]
+;CHECK: vld4.16 {d16[1], d18[1], d20[1], d22[1]}, [{{r[0-9]+}}:64]
 	%tmp0 = bitcast i16* %A to i8*
 	%tmp1 = load <8 x i16>* %B
 	%tmp2 = call %struct.__neon_int16x8x4_t @llvm.arm.neon.vld4lane.v8i16(i8* %tmp0, <8 x i16> %tmp1, <8 x i16> %tmp1, <8 x i16> %tmp1, <8 x i16> %tmp1, i32 1, i32 16)
diff --git a/test/CodeGen/ARM/vselect_imax.ll b/test/CodeGen/ARM/vselect_imax.ll
index f5994046de4b..7e79d6c68c2b 100644
--- a/test/CodeGen/ARM/vselect_imax.ll
+++ b/test/CodeGen/ARM/vselect_imax.ll
@@ -10,3 +10,114 @@ define void @vmax_v4i32(<4 x i32>* %m, <4 x i32> %a, <4 x i32> %b) {
     ret void
 }
 
+; We adjusted the cost model of the following selects. When we improve code
+; lowering we also need to adjust the cost.
+%T0_10 = type <16 x i16>
+%T1_10 = type <16 x i1>
+; CHECK: func_blend10:
+define void @func_blend10(%T0_10* %loadaddr, %T0_10* %loadaddr2,
+                           %T1_10* %blend, %T0_10* %storeaddr) {
+  %v0 = load %T0_10* %loadaddr
+  %v1 = load %T0_10* %loadaddr2
+  %c = icmp slt %T0_10 %v0, %v1
+; CHECK: vst1
+; CHECK: vst1
+; CHECK: vst1
+; CHECK: vst1
+; CHECK: vld
+; COST: func_blend10
+; COST: cost of 40 {{.*}} select
+  %r = select %T1_10 %c, %T0_10 %v0, %T0_10 %v1
+  store %T0_10 %r, %T0_10* %storeaddr
+  ret void
+}
+%T0_14 = type <8 x i32>
+%T1_14 = type <8 x i1>
+; CHECK: func_blend14:
+define void @func_blend14(%T0_14* %loadaddr, %T0_14* %loadaddr2,
+                           %T1_14* %blend, %T0_14* %storeaddr) {
+  %v0 = load %T0_14* %loadaddr
+  %v1 = load %T0_14* %loadaddr2
+  %c = icmp slt %T0_14 %v0, %v1
+; CHECK: strb
+; CHECK: strb
+; CHECK: strb
+; CHECK: strb
+; COST: func_blend14
+; COST: cost of 41 {{.*}} select
+  %r = select %T1_14 %c, %T0_14 %v0, %T0_14 %v1
+  store %T0_14 %r, %T0_14* %storeaddr
+  ret void
+}
+%T0_15 = type <16 x i32>
+%T1_15 = type <16 x i1>
+; CHECK: func_blend15:
+define void @func_blend15(%T0_15* %loadaddr, %T0_15* %loadaddr2,
+                           %T1_15* %blend, %T0_15* %storeaddr) {
+  %v0 = load %T0_15* %loadaddr
+  %v1 = load %T0_15* %loadaddr2
+  %c = icmp slt %T0_15 %v0, %v1
+; CHECK: strb
+; CHECK: strb
+; CHECK: strb
+; CHECK: strb
+; COST: func_blend15
+; COST: cost of 82 {{.*}} select
+  %r = select %T1_15 %c, %T0_15 %v0, %T0_15 %v1
+  store %T0_15 %r, %T0_15* %storeaddr
+  ret void
+}
+%T0_18 = type <4 x i64>
+%T1_18 = type <4 x i1>
+; CHECK: func_blend18:
+define void @func_blend18(%T0_18* %loadaddr, %T0_18* %loadaddr2,
+                           %T1_18* %blend, %T0_18* %storeaddr) {
+  %v0 = load %T0_18* %loadaddr
+  %v1 = load %T0_18* %loadaddr2
+  %c = icmp slt %T0_18 %v0, %v1
+; CHECK: strh
+; CHECK: strh
+; CHECK: strh
+; CHECK: strh
+; COST: func_blend18
+; COST: cost of 19 {{.*}} select
+  %r = select %T1_18 %c, %T0_18 %v0, %T0_18 %v1
+  store %T0_18 %r, %T0_18* %storeaddr
+  ret void
+}
+%T0_19 = type <8 x i64>
+%T1_19 = type <8 x i1>
+; CHECK: func_blend19:
+define void @func_blend19(%T0_19* %loadaddr, %T0_19* %loadaddr2,
+                           %T1_19* %blend, %T0_19* %storeaddr) {
+  %v0 = load %T0_19* %loadaddr
+  %v1 = load %T0_19* %loadaddr2
+  %c = icmp slt %T0_19 %v0, %v1
+; CHECK: strb
+; CHECK: strb
+; CHECK: strb
+; CHECK: strb
+; COST: func_blend19
+; COST: cost of 50 {{.*}} select
+  %r = select %T1_19 %c, %T0_19 %v0, %T0_19 %v1
+  store %T0_19 %r, %T0_19* %storeaddr
+  ret void
+}
+%T0_20 = type <16 x i64>
+%T1_20 = type <16 x i1>
+; CHECK: func_blend20:
+define void @func_blend20(%T0_20* %loadaddr, %T0_20* %loadaddr2,
+                           %T1_20* %blend, %T0_20* %storeaddr) {
+  %v0 = load %T0_20* %loadaddr
+  %v1 = load %T0_20* %loadaddr2
+  %c = icmp slt %T0_20 %v0, %v1
+; CHECK: strb
+; CHECK: strb
+; CHECK: strb
+; CHECK: strb
+; COST: func_blend20
+; COST: cost of 100 {{.*}} select
+  %r = select %T1_20 %c, %T0_20 %v0, %T0_20 %v1
+  store %T0_20 %r, %T0_20* %storeaddr
+  ret void
+}
diff --git a/test/CodeGen/ARM/vst1.ll b/test/CodeGen/ARM/vst1.ll
index 364d44b7116f..e1f3e8890724 100644
--- a/test/CodeGen/ARM/vst1.ll
+++ b/test/CodeGen/ARM/vst1.ll
@@ -3,7 +3,7 @@
 define void @vst1i8(i8* %A, <8 x i8>* %B) nounwind {
 ;CHECK: vst1i8:
 ;Check the alignment value.  Max for this instruction is 64 bits:
-;CHECK: vst1.8 {d16}, [r0, :64]
+;CHECK: vst1.8 {d16}, [r0:64]
 	%tmp1 = load <8 x i8>* %B
 	call void @llvm.arm.neon.vst1.v8i8(i8* %A, <8 x i8> %tmp1, i32 16)
 	ret void
@@ -61,7 +61,7 @@ define void @vst1i64(i64* %A, <1 x i64>* %B) nounwind {
 define void @vst1Qi8(i8* %A, <16 x i8>* %B) nounwind {
 ;CHECK: vst1Qi8:
 ;Check the alignment value.  Max for this instruction is 128 bits:
-;CHECK: vst1.8 {d16, d17}, [r0, :64]
+;CHECK: vst1.8 {d16, d17}, [r0:64]
 	%tmp1 = load <16 x i8>* %B
 	call void @llvm.arm.neon.vst1.v16i8(i8* %A, <16 x i8> %tmp1, i32 8)
 	ret void
@@ -70,7 +70,7 @@ define void @vst1Qi8(i8* %A, <16 x i8>* %B) nounwind {
 define void @vst1Qi16(i16* %A, <8 x i16>* %B) nounwind {
 ;CHECK: vst1Qi16:
 ;Check the alignment value.  Max for this instruction is 128 bits:
-;CHECK: vst1.16 {d16, d17}, [r0, :128]
+;CHECK: vst1.16 {d16, d17}, [r0:128]
 	%tmp0 = bitcast i16* %A to i8*
 	%tmp1 = load <8 x i16>* %B
 	call void @llvm.arm.neon.vst1.v8i16(i8* %tmp0, <8 x i16> %tmp1, i32 32)
@@ -80,7 +80,7 @@ define void @vst1Qi16(i16* %A, <8 x i16>* %B) nounwind {
 ;Check for a post-increment updating store with register increment.
 define void @vst1Qi16_update(i16** %ptr, <8 x i16>* %B, i32 %inc) nounwind {
 ;CHECK: vst1Qi16_update:
-;CHECK: vst1.16 {d16, d17}, [r1, :64], r2
+;CHECK: vst1.16 {d16, d17}, [r1:64], r2
 	%A = load i16** %ptr
 	%tmp0 = bitcast i16* %A to i8*
 	%tmp1 = load <8 x i16>* %B
diff --git a/test/CodeGen/ARM/vst2.ll b/test/CodeGen/ARM/vst2.ll
index fb05a20f6695..a31f8635fe3b 100644
--- a/test/CodeGen/ARM/vst2.ll
+++ b/test/CodeGen/ARM/vst2.ll
@@ -3,7 +3,7 @@
 define void @vst2i8(i8* %A, <8 x i8>* %B) nounwind {
 ;CHECK: vst2i8:
 ;Check the alignment value.  Max for this instruction is 128 bits:
-;CHECK: vst2.8 {d16, d17}, [r0, :64]
+;CHECK: vst2.8 {d16, d17}, [r0:64]
 	%tmp1 = load <8 x i8>* %B
 	call void @llvm.arm.neon.vst2.v8i8(i8* %A, <8 x i8> %tmp1, <8 x i8> %tmp1, i32 8)
 	ret void
@@ -24,7 +24,7 @@ define void @vst2i8_update(i8** %ptr, <8 x i8>* %B, i32 %inc) nounwind {
 define void @vst2i16(i16* %A, <4 x i16>* %B) nounwind {
 ;CHECK: vst2i16:
 ;Check the alignment value.  Max for this instruction is 128 bits:
-;CHECK: vst2.16 {d16, d17}, [r0, :128]
+;CHECK: vst2.16 {d16, d17}, [r0:128]
 	%tmp0 = bitcast i16* %A to i8*
 	%tmp1 = load <4 x i16>* %B
 	call void @llvm.arm.neon.vst2.v4i16(i8* %tmp0, <4 x i16> %tmp1, <4 x i16> %tmp1, i32 32)
@@ -52,7 +52,7 @@ define void @vst2f(float* %A, <2 x float>* %B) nounwind {
 define void @vst2i64(i64* %A, <1 x i64>* %B) nounwind {
 ;CHECK: vst2i64:
 ;Check the alignment value.  Max for this instruction is 128 bits:
-;CHECK: vst1.64 {d16, d17}, [r0, :128]
+;CHECK: vst1.64 {d16, d17}, [r0:128]
 	%tmp0 = bitcast i64* %A to i8*
 	%tmp1 = load <1 x i64>* %B
 	call void @llvm.arm.neon.vst2.v1i64(i8* %tmp0, <1 x i64> %tmp1, <1 x i64> %tmp1, i32 32)
@@ -62,7 +62,7 @@ define void @vst2i64(i64* %A, <1 x i64>* %B) nounwind {
 ;Check for a post-increment updating store.
 define void @vst2i64_update(i64** %ptr, <1 x i64>* %B) nounwind {
 ;CHECK: vst2i64_update:
-;CHECK: vst1.64 {d16, d17}, [r1, :64]!
+;CHECK: vst1.64 {d16, d17}, [r1:64]!
 	%A = load i64** %ptr
 	%tmp0 = bitcast i64* %A to i8*
 	%tmp1 = load <1 x i64>* %B
@@ -75,7 +75,7 @@ define void @vst2i64_update(i64** %ptr, <1 x i64>* %B) nounwind {
 define void @vst2Qi8(i8* %A, <16 x i8>* %B) nounwind {
 ;CHECK: vst2Qi8:
 ;Check the alignment value.  Max for this instruction is 256 bits:
-;CHECK: vst2.8 {d16, d17, d18, d19}, [r0, :64]
+;CHECK: vst2.8 {d16, d17, d18, d19}, [r0:64]
 	%tmp1 = load <16 x i8>* %B
 	call void @llvm.arm.neon.vst2.v16i8(i8* %A, <16 x i8> %tmp1, <16 x i8> %tmp1, i32 8)
 	ret void
@@ -84,7 +84,7 @@ define void @vst2Qi8(i8* %A, <16 x i8>* %B) nounwind {
 define void @vst2Qi16(i16* %A, <8 x i16>* %B) nounwind {
 ;CHECK: vst2Qi16:
 ;Check the alignment value.  Max for this instruction is 256 bits:
-;CHECK: vst2.16 {d16, d17, d18, d19}, [r0, :128]
+;CHECK: vst2.16 {d16, d17, d18, d19}, [r0:128]
 	%tmp0 = bitcast i16* %A to i8*
 	%tmp1 = load <8 x i16>* %B
 	call void @llvm.arm.neon.vst2.v8i16(i8* %tmp0, <8 x i16> %tmp1, <8 x i16> %tmp1, i32 16)
@@ -94,7 +94,7 @@ define void @vst2Qi16(i16* %A, <8 x i16>* %B) nounwind {
 define void @vst2Qi32(i32* %A, <4 x i32>* %B) nounwind {
 ;CHECK: vst2Qi32:
 ;Check the alignment value.  Max for this instruction is 256 bits:
-;CHECK: vst2.32 {d16, d17, d18, d19}, [r0, :256]
+;CHECK: vst2.32 {d16, d17, d18, d19}, [r0:256]
 	%tmp0 = bitcast i32* %A to i8*
 	%tmp1 = load <4 x i32>* %B
 	call void @llvm.arm.neon.vst2.v4i32(i8* %tmp0, <4 x i32> %tmp1, <4 x i32> %tmp1, i32 64)
diff --git a/test/CodeGen/ARM/vst3.ll b/test/CodeGen/ARM/vst3.ll
index f117ab205d41..281bb730feb7 100644
--- a/test/CodeGen/ARM/vst3.ll
+++ b/test/CodeGen/ARM/vst3.ll
@@ -4,7 +4,7 @@ define void @vst3i8(i8* %A, <8 x i8>* %B) nounwind {
 ;CHECK: vst3i8:
 ;Check the alignment value.  Max for this instruction is 64 bits:
 ;This test runs at -O0 so do not check for specific register numbers.
-;CHECK: vst3.8 {d{{.*}}, d{{.*}}, d{{.*}}}, [r{{.*}}, :64]
+;CHECK: vst3.8 {d{{.*}}, d{{.*}}, d{{.*}}}, [r{{.*}}:64]
 	%tmp1 = load <8 x i8>* %B
 	call void @llvm.arm.neon.vst3.v8i8(i8* %A, <8 x i8> %tmp1, <8 x i8> %tmp1, <8 x i8> %tmp1, i32 32)
 	ret void
@@ -54,7 +54,7 @@ define void @vst3i64(i64* %A, <1 x i64>* %B) nounwind {
 ;CHECK: vst3i64:
 ;Check the alignment value.  Max for this instruction is 64 bits:
 ;This test runs at -O0 so do not check for specific register numbers.
-;CHECK: vst1.64 {d{{.*}}, d{{.*}}, d{{.*}}}, [r{{.*}}, :64]
+;CHECK: vst1.64 {d{{.*}}, d{{.*}}, d{{.*}}}, [r{{.*}}:64]
 	%tmp0 = bitcast i64* %A to i8*
 	%tmp1 = load <1 x i64>* %B
 	call void @llvm.arm.neon.vst3.v1i64(i8* %tmp0, <1 x i64> %tmp1, <1 x i64> %tmp1, <1 x i64> %tmp1, i32 16)
@@ -65,8 +65,8 @@ define void @vst3Qi8(i8* %A, <16 x i8>* %B) nounwind {
 ;CHECK: vst3Qi8:
 ;Check the alignment value.  Max for this instruction is 64 bits:
 ;This test runs at -O0 so do not check for specific register numbers.
-;CHECK: vst3.8 {d{{.*}}, d{{.*}}, d{{.*}}}, [r{{.*}}, :64]!
-;CHECK: vst3.8 {d{{.*}}, d{{.*}}, d{{.*}}}, [r{{.*}}, :64]
+;CHECK: vst3.8 {d{{.*}}, d{{.*}}, d{{.*}}}, [r{{.*}}:64]!
+;CHECK: vst3.8 {d{{.*}}, d{{.*}}, d{{.*}}}, [r{{.*}}:64]
 	%tmp1 = load <16 x i8>* %B
 	call void @llvm.arm.neon.vst3.v16i8(i8* %A, <16 x i8> %tmp1, <16 x i8> %tmp1, <16 x i8> %tmp1, i32 32)
 	ret void
diff --git a/test/CodeGen/ARM/vst4.ll b/test/CodeGen/ARM/vst4.ll
index e94acb66bf2e..7dedb2fafee2 100644
--- a/test/CodeGen/ARM/vst4.ll
+++ b/test/CodeGen/ARM/vst4.ll
@@ -3,7 +3,7 @@
 define void @vst4i8(i8* %A, <8 x i8>* %B) nounwind {
 ;CHECK: vst4i8:
 ;Check the alignment value.  Max for this instruction is 256 bits:
-;CHECK: vst4.8 {d16, d17, d18, d19}, [r0, :64]
+;CHECK: vst4.8 {d16, d17, d18, d19}, [r0:64]
 	%tmp1 = load <8 x i8>* %B
 	call void @llvm.arm.neon.vst4.v8i8(i8* %A, <8 x i8> %tmp1, <8 x i8> %tmp1, <8 x i8> %tmp1, <8 x i8> %tmp1, i32 8)
 	ret void
@@ -12,7 +12,7 @@ define void @vst4i8(i8* %A, <8 x i8>* %B) nounwind {
 ;Check for a post-increment updating store with register increment.
 define void @vst4i8_update(i8** %ptr, <8 x i8>* %B, i32 %inc) nounwind {
 ;CHECK: vst4i8_update:
-;CHECK: vst4.8 {d16, d17, d18, d19}, [r1, :128], r2
+;CHECK: vst4.8 {d16, d17, d18, d19}, [r1:128], r2
 	%A = load i8** %ptr
 	%tmp1 = load <8 x i8>* %B
 	call void @llvm.arm.neon.vst4.v8i8(i8* %A, <8 x i8> %tmp1, <8 x i8> %tmp1, <8 x i8> %tmp1, <8 x i8> %tmp1, i32 16)
@@ -24,7 +24,7 @@ define void @vst4i8_update(i8** %ptr, <8 x i8>* %B, i32 %inc) nounwind {
 define void @vst4i16(i16* %A, <4 x i16>* %B) nounwind {
 ;CHECK: vst4i16:
 ;Check the alignment value.  Max for this instruction is 256 bits:
-;CHECK: vst4.16 {d16, d17, d18, d19}, [r0, :128]
+;CHECK: vst4.16 {d16, d17, d18, d19}, [r0:128]
 	%tmp0 = bitcast i16* %A to i8*
 	%tmp1 = load <4 x i16>* %B
 	call void @llvm.arm.neon.vst4.v4i16(i8* %tmp0, <4 x i16> %tmp1, <4 x i16> %tmp1, <4 x i16> %tmp1, <4 x i16> %tmp1, i32 16)
@@ -34,7 +34,7 @@ define void @vst4i16(i16* %A, <4 x i16>* %B) nounwind {
 define void @vst4i32(i32* %A, <2 x i32>* %B) nounwind {
 ;CHECK: vst4i32:
 ;Check the alignment value.  Max for this instruction is 256 bits:
-;CHECK: vst4.32 {d16, d17, d18, d19}, [r0, :256]
+;CHECK: vst4.32 {d16, d17, d18, d19}, [r0:256]
 	%tmp0 = bitcast i32* %A to i8*
 	%tmp1 = load <2 x i32>* %B
 	call void @llvm.arm.neon.vst4.v2i32(i8* %tmp0, <2 x i32> %tmp1, <2 x i32> %tmp1, <2 x i32> %tmp1, <2 x i32> %tmp1, i32 32)
@@ -53,7 +53,7 @@ define void @vst4f(float* %A, <2 x float>* %B) nounwind {
 define void @vst4i64(i64* %A, <1 x i64>* %B) nounwind {
 ;CHECK: vst4i64:
 ;Check the alignment value.  Max for this instruction is 256 bits:
-;CHECK: vst1.64 {d16, d17, d18, d19}, [r0, :256]
+;CHECK: vst1.64 {d16, d17, d18, d19}, [r0:256]
 	%tmp0 = bitcast i64* %A to i8*
 	%tmp1 = load <1 x i64>* %B
 	call void @llvm.arm.neon.vst4.v1i64(i8* %tmp0, <1 x i64> %tmp1, <1 x i64> %tmp1, <1 x i64> %tmp1, <1 x i64> %tmp1, i32 64)
@@ -63,8 +63,8 @@ define void @vst4i64(i64* %A, <1 x i64>* %B) nounwind {
 define void @vst4Qi8(i8* %A, <16 x i8>* %B) nounwind {
 ;CHECK: vst4Qi8:
 ;Check the alignment value.  Max for this instruction is 256 bits:
-;CHECK: vst4.8 {d16, d18, d20, d22}, [r0, :256]!
-;CHECK: vst4.8 {d17, d19, d21, d23}, [r0, :256]
+;CHECK: vst4.8 {d16, d18, d20, d22}, [r0:256]!
+;CHECK: vst4.8 {d17, d19, d21, d23}, [r0:256]
 	%tmp1 = load <16 x i8>* %B
 	call void @llvm.arm.neon.vst4.v16i8(i8* %A, <16 x i8> %tmp1, <16 x i8> %tmp1, <16 x i8> %tmp1, <16 x i8> %tmp1, i32 64)
 	ret void
diff --git a/test/CodeGen/ARM/vstlane.ll b/test/CodeGen/ARM/vstlane.ll
index 758b355736d0..67f251f70689 100644
--- a/test/CodeGen/ARM/vstlane.ll
+++ b/test/CodeGen/ARM/vstlane.ll
@@ -26,7 +26,7 @@ define void @vst1lanei8_update(i8** %ptr, <8 x i8>* %B) nounwind {
 define void @vst1lanei16(i16* %A, <4 x i16>* %B) nounwind {
 ;CHECK: vst1lanei16:
 ;Check the alignment value.  Max for this instruction is 16 bits:
-;CHECK: vst1.16 {d16[2]}, [r0, :16]
+;CHECK: vst1.16 {d16[2]}, [r0:16]
 	%tmp1 = load <4 x i16>* %B
         %tmp2 = extractelement <4 x i16> %tmp1, i32 2
         store i16 %tmp2, i16* %A, align 8
@@ -36,7 +36,7 @@ define void @vst1lanei16(i16* %A, <4 x i16>* %B) nounwind {
 define void @vst1lanei32(i32* %A, <2 x i32>* %B) nounwind {
 ;CHECK: vst1lanei32:
 ;Check the alignment value.  Max for this instruction is 32 bits:
-;CHECK: vst1.32 {d16[1]}, [r0, :32]
+;CHECK: vst1.32 {d16[1]}, [r0:32]
 	%tmp1 = load <2 x i32>* %B
         %tmp2 = extractelement <2 x i32> %tmp1, i32 1
         store i32 %tmp2, i32* %A, align 8
@@ -45,7 +45,7 @@ define void @vst1lanei32(i32* %A, <2 x i32>* %B) nounwind {
 
 define void @vst1lanef(float* %A, <2 x float>* %B) nounwind {
 ;CHECK: vst1lanef:
-;CHECK: vst1.32 {d16[1]}, [r0, :32]
+;CHECK: vst1.32 {d16[1]}, [r0:32]
 	%tmp1 = load <2 x float>* %B
         %tmp2 = extractelement <2 x float> %tmp1, i32 1
         store float %tmp2, float* %A
@@ -64,7 +64,7 @@ define void @vst1laneQi8(i8* %A, <16 x i8>* %B) nounwind {
 
 define void @vst1laneQi16(i16* %A, <8 x i16>* %B) nounwind {
 ;CHECK: vst1laneQi16:
-;CHECK: vst1.16 {d17[1]}, [r0, :16]
+;CHECK: vst1.16 {d17[1]}, [r0:16]
 	%tmp1 = load <8 x i16>* %B
         %tmp2 = extractelement <8 x i16> %tmp1, i32 5
         store i16 %tmp2, i16* %A, align 8
@@ -74,7 +74,7 @@ define void @vst1laneQi16(i16* %A, <8 x i16>* %B) nounwind {
 define void @vst1laneQi32(i32* %A, <4 x i32>* %B) nounwind {
 ;CHECK: vst1laneQi32:
 ; // Can use scalar load. No need to use vectors.
-; // CHE-CK: vst1.32 {d17[1]}, [r0, :32]
+; // CHE-CK: vst1.32 {d17[1]}, [r0:32]
 	%tmp1 = load <4 x i32>* %B
         %tmp2 = extractelement <4 x i32> %tmp1, i32 3
         store i32 %tmp2, i32* %A, align 8
@@ -85,7 +85,7 @@ define void @vst1laneQi32(i32* %A, <4 x i32>* %B) nounwind {
 define void @vst1laneQi32_update(i32** %ptr, <4 x i32>* %B) nounwind {
 ;CHECK: vst1laneQi32_update:
 ; // Can use scalar load. No need to use vectors.
-; // CHE-CK: vst1.32 {d17[1]}, [r1, :32]!
+; // CHE-CK: vst1.32 {d17[1]}, [r1:32]!
 	%A = load i32** %ptr
 	%tmp1 = load <4 x i32>* %B
 	%tmp2 = extractelement <4 x i32> %tmp1, i32 3
@@ -108,7 +108,7 @@ define void @vst1laneQf(float* %A, <4 x float>* %B) nounwind {
 define void @vst2lanei8(i8* %A, <8 x i8>* %B) nounwind {
 ;CHECK: vst2lanei8:
 ;Check the alignment value.  Max for this instruction is 16 bits:
-;CHECK: vst2.8 {d16[1], d17[1]}, [r0, :16]
+;CHECK: vst2.8 {d16[1], d17[1]}, [r0:16]
 	%tmp1 = load <8 x i8>* %B
 	call void @llvm.arm.neon.vst2lane.v8i8(i8* %A, <8 x i8> %tmp1, <8 x i8> %tmp1, i32 1, i32 4)
 	ret void
@@ -117,7 +117,7 @@ define void @vst2lanei8(i8* %A, <8 x i8>* %B) nounwind {
 define void @vst2lanei16(i16* %A, <4 x i16>* %B) nounwind {
 ;CHECK: vst2lanei16:
 ;Check the alignment value.  Max for this instruction is 32 bits:
-;CHECK: vst2.16 {d16[1], d17[1]}, [r0, :32]
+;CHECK: vst2.16 {d16[1], d17[1]}, [r0:32]
 	%tmp0 = bitcast i16* %A to i8*
 	%tmp1 = load <4 x i16>* %B
 	call void @llvm.arm.neon.vst2lane.v4i16(i8* %tmp0, <4 x i16> %tmp1, <4 x i16> %tmp1, i32 1, i32 8)
@@ -168,7 +168,7 @@ define void @vst2laneQi16(i16* %A, <8 x i16>* %B) nounwind {
 define void @vst2laneQi32(i32* %A, <4 x i32>* %B) nounwind {
 ;CHECK: vst2laneQi32:
 ;Check the alignment value.  Max for this instruction is 64 bits:
-;CHECK: vst2.32 {d17[0], d19[0]}, [r0, :64]
+;CHECK: vst2.32 {d17[0], d19[0]}, [r0:64]
 	%tmp0 = bitcast i32* %A to i8*
 	%tmp1 = load <4 x i32>* %B
 	call void @llvm.arm.neon.vst2lane.v4i32(i8* %tmp0, <4 x i32> %tmp1, <4 x i32> %tmp1, i32 2, i32 16)
@@ -283,7 +283,7 @@ declare void @llvm.arm.neon.vst3lane.v4f32(i8*, <4 x float>, <4 x float>, <4 x f
 define void @vst4lanei8(i8* %A, <8 x i8>* %B) nounwind {
 ;CHECK: vst4lanei8:
 ;Check the alignment value.  Max for this instruction is 32 bits:
-;CHECK: vst4.8 {d16[1], d17[1], d18[1], d19[1]}, [r0, :32]
+;CHECK: vst4.8 {d16[1], d17[1], d18[1], d19[1]}, [r0:32]
 	%tmp1 = load <8 x i8>* %B
 	call void @llvm.arm.neon.vst4lane.v8i8(i8* %A, <8 x i8> %tmp1, <8 x i8> %tmp1, <8 x i8> %tmp1, <8 x i8> %tmp1, i32 1, i32 8)
 	ret void
@@ -292,7 +292,7 @@ define void @vst4lanei8(i8* %A, <8 x i8>* %B) nounwind {
 ;Check for a post-increment updating store.
 define void @vst4lanei8_update(i8** %ptr, <8 x i8>* %B) nounwind {
 ;CHECK: vst4lanei8_update:
-;CHECK: vst4.8 {d16[1], d17[1], d18[1], d19[1]}, [r1, :32]!
+;CHECK: vst4.8 {d16[1], d17[1], d18[1], d19[1]}, [r1:32]!
 	%A = load i8** %ptr
 	%tmp1 = load <8 x i8>* %B
 	call void @llvm.arm.neon.vst4lane.v8i8(i8* %A, <8 x i8> %tmp1, <8 x i8> %tmp1, <8 x i8> %tmp1, <8 x i8> %tmp1, i32 1, i32 8)
@@ -313,7 +313,7 @@ define void @vst4lanei16(i16* %A, <4 x i16>* %B) nounwind {
 define void @vst4lanei32(i32* %A, <2 x i32>* %B) nounwind {
 ;CHECK: vst4lanei32:
 ;Check the alignment value.  Max for this instruction is 128 bits:
-;CHECK: vst4.32 {d16[1], d17[1], d18[1], d19[1]}, [r0, :128]
+;CHECK: vst4.32 {d16[1], d17[1], d18[1], d19[1]}, [r0:128]
 	%tmp0 = bitcast i32* %A to i8*
 	%tmp1 = load <2 x i32>* %B
 	call void @llvm.arm.neon.vst4lane.v2i32(i8* %tmp0, <2 x i32> %tmp1, <2 x i32> %tmp1, <2 x i32> %tmp1, <2 x i32> %tmp1, i32 1, i32 16)
@@ -332,7 +332,7 @@ define void @vst4lanef(float* %A, <2 x float>* %B) nounwind {
 define void @vst4laneQi16(i16* %A, <8 x i16>* %B) nounwind {
 ;CHECK: vst4laneQi16:
 ;Check the alignment value.  Max for this instruction is 64 bits:
-;CHECK: vst4.16 {d17[3], d19[3], d21[3], d23[3]}, [r0, :64]
+;CHECK: vst4.16 {d17[3], d19[3], d21[3], d23[3]}, [r0:64]
 	%tmp0 = bitcast i16* %A to i8*
 	%tmp1 = load <8 x i16>* %B
 	call void @llvm.arm.neon.vst4lane.v8i16(i8* %tmp0, <8 x i16> %tmp1, <8 x i16> %tmp1, <8 x i16> %tmp1, <8 x i16> %tmp1, i32 7, i32 16)
diff --git a/test/CodeGen/ARM/widen-vmovs.ll b/test/CodeGen/ARM/widen-vmovs.ll
index 679e3f434733..1efbc73650d8 100644
--- a/test/CodeGen/ARM/widen-vmovs.ll
+++ b/test/CodeGen/ARM/widen-vmovs.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -widen-vmovs -mcpu=cortex-a8 -verify-machineinstrs -disable-code-place | FileCheck %s
+; RUN: llc < %s -widen-vmovs -mcpu=cortex-a8 -verify-machineinstrs -disable-block-placement | FileCheck %s
 target triple = "thumbv7-apple-ios"
 
 ; The 1.0e+10 constant is loaded from the constant pool and kept in a register.
diff --git a/test/CodeGen/ARM/zextload_demandedbits.ll b/test/CodeGen/ARM/zextload_demandedbits.ll
new file mode 100644
index 000000000000..3d3269cae236
--- /dev/null
+++ b/test/CodeGen/ARM/zextload_demandedbits.ll
@@ -0,0 +1,35 @@
+; RUN: llc < %s -march=arm -mtriple="thumbv7-apple-ios3.0.0" | FileCheck %s
+
+target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:32:64-v128:32:128-a0:0:32-n32-S32"
+
+%struct.eggs = type { %struct.spam, i16 }
+%struct.spam = type { [3 x i32] }
+%struct.barney = type { [2 x i32], [2 x i32] }
+
+; Make sure that the sext op does not get lost due to ComputeMaskedBits.
+; CHECK: quux
+; CHECK: lsl
+; CHECK: asr
+; CHECK: bl
+; CHECK: pop
+define void @quux(%struct.eggs* %arg) {
+bb:
+  %tmp1 = getelementptr inbounds %struct.eggs* %arg, i32 0, i32 1
+  %0 = load i16* %tmp1, align 2
+  %tobool = icmp eq i16 %0, 0
+  br i1 %tobool, label %bb16, label %bb3
+
+bb3:                                              ; preds = %bb
+  %tmp4 = bitcast i16* %tmp1 to i8*
+  %tmp5 = ptrtoint i16* %tmp1 to i32
+  %tmp6 = shl i32 %tmp5, 20
+  %tmp7 = ashr exact i32 %tmp6, 20
+  %tmp14 = getelementptr inbounds %struct.barney* undef, i32 %tmp7
+  %tmp15 = tail call i32 @widget(%struct.barney* %tmp14, i8* %tmp4, i32 %tmp7)
+  br label %bb16
+
+bb16:                                             ; preds = %bb3, %bb
+  ret void
+}
+
+declare i32 @widget(%struct.barney*, i8*, i32)
diff --git a/test/CodeGen/CPP/2007-06-16-Funcname.ll b/test/CodeGen/CPP/2007-06-16-Funcname.ll
index 16e9798481fe..71fea12d9c2c 100644
--- a/test/CodeGen/CPP/2007-06-16-Funcname.ll
+++ b/test/CodeGen/CPP/2007-06-16-Funcname.ll
@@ -5,4 +5,3 @@ define void @foo() {
   ret void
 }
 
-
diff --git a/test/CodeGen/CellSPU/2009-01-01-BrCond.ll b/test/CodeGen/CellSPU/2009-01-01-BrCond.ll
deleted file mode 100644
index 35422311c574..000000000000
--- a/test/CodeGen/CellSPU/2009-01-01-BrCond.ll
+++ /dev/null
@@ -1,31 +0,0 @@
-; RUN: llc < %s -march=cellspu -o - | grep brz
-; PR3274
-
-target datalayout = "E-p:32:32:128-i1:8:128-i8:8:128-i16:16:128-i32:32:128-i64:32:128-f32:32:128-f64:64:128-v64:64:64-v128:128:128-a0:0:128-s0:128:128"
-target triple = "spu"
-	%struct.anon = type { i64 }
-	%struct.fp_number_type = type { i32, i32, i32, [4 x i8], %struct.anon }
-
-define double @__floatunsidf(i32 %arg_a) nounwind {
-entry:
-	%in = alloca %struct.fp_number_type, align 16
-	%0 = getelementptr %struct.fp_number_type* %in, i32 0, i32 1
-	store i32 0, i32* %0, align 4
-	%1 = icmp eq i32 %arg_a, 0
-	%2 = getelementptr %struct.fp_number_type* %in, i32 0, i32 0
-	br i1 %1, label %bb, label %bb1
-
-bb:		; preds = %entry
-	store i32 2, i32* %2, align 8
-	br label %bb7
-
-bb1:		; preds = %entry
-	ret double 0.0
-
-bb7:		; preds = %bb5, %bb1, %bb
-	ret double 1.0
-}
-
-; declare i32 @llvm.ctlz.i32(i32) nounwind readnone
-
-declare double @__pack_d(%struct.fp_number_type*)
diff --git a/test/CodeGen/CellSPU/2010-04-07-DbgValueOtherTargets.ll b/test/CodeGen/CellSPU/2010-04-07-DbgValueOtherTargets.ll
deleted file mode 100644
index 401399face9a..000000000000
--- a/test/CodeGen/CellSPU/2010-04-07-DbgValueOtherTargets.ll
+++ /dev/null
@@ -1,28 +0,0 @@
-; RUN: llc -O0 -march=cellspu -asm-verbose < %s | FileCheck %s
-; Check that DEBUG_VALUE comments come through on a variety of targets.
-
-define i32 @main() nounwind ssp {
-entry:
-; CHECK: DEBUG_VALUE
-  call void @llvm.dbg.value(metadata !6, i64 0, metadata !7), !dbg !9
-  ret i32 0, !dbg !10
-}
-
-declare void @llvm.dbg.declare(metadata, metadata) nounwind readnone
-
-declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
-
-!llvm.dbg.sp = !{!0}
-
-!0 = metadata !{i32 589870, i32 0, metadata !1, metadata !"main", metadata !"main", metadata !"", metadata !1, i32 2, metadata !3, i1 false, i1 true, i32 0, i32 0, null, i32 0, i1 false, i32 ()* @main} ; [ DW_TAG_subprogram ]
-!1 = metadata !{i32 589865, metadata !"/tmp/x.c", metadata !"/Users/manav", metadata !2} ; [ DW_TAG_file_type ]
-!2 = metadata !{i32 589841, i32 0, i32 12, metadata !"/tmp/x.c", metadata !"/Users/manav", metadata !"clang version 2.9 (trunk 120996)", i1 true, i1 false, metadata !"", i32 0} ; [ DW_TAG_compile_unit ]
-!3 = metadata !{i32 589845, metadata !1, metadata !"", metadata !1, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !4, i32 0, null} ; [ DW_TAG_subroutine_type ]
-!4 = metadata !{metadata !5}
-!5 = metadata !{i32 589860, metadata !2, metadata !"int", metadata !1, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
-!6 = metadata !{i32 0}
-!7 = metadata !{i32 590080, metadata !8, metadata !"i", metadata !1, i32 3, metadata !5, i32 0} ; [ DW_TAG_auto_variable ]
-!8 = metadata !{i32 589835, metadata !0, i32 2, i32 12, metadata !1, i32 0} ; [ DW_TAG_lexical_block ]
-!9 = metadata !{i32 3, i32 11, metadata !8, null}
-!10 = metadata !{i32 4, i32 2, metadata !8, null}
-
diff --git a/test/CodeGen/CellSPU/and_ops.ll b/test/CodeGen/CellSPU/and_ops.ll
deleted file mode 100644
index 4203e91068d0..000000000000
--- a/test/CodeGen/CellSPU/and_ops.ll
+++ /dev/null
@@ -1,282 +0,0 @@
-; RUN: llc < %s -march=cellspu > %t1.s
-; RUN: grep and    %t1.s | count 234
-; RUN: grep andc   %t1.s | count 85
-; RUN: grep andi   %t1.s | count 37
-; RUN: grep andhi  %t1.s | count 30
-; RUN: grep andbi  %t1.s | count 4
-
-; CellSPU legalization is over-sensitive to Legalize's traversal order.
-; XFAIL: *
-
-target datalayout = "E-p:32:32:128-f64:64:128-f32:32:128-i64:32:128-i32:32:128-i16:16:128-i8:8:128-i1:8:128-a0:0:128-v128:128:128-s0:128:128"
-target triple = "spu"
-
-; AND instruction generation:
-define <4 x i32> @and_v4i32_1(<4 x i32> %arg1, <4 x i32> %arg2) {
-        %A = and <4 x i32> %arg1, %arg2
-        ret <4 x i32> %A
-}
-
-define <4 x i32> @and_v4i32_2(<4 x i32> %arg1, <4 x i32> %arg2) {
-        %A = and <4 x i32> %arg2, %arg1
-        ret <4 x i32> %A
-}
-
-define <8 x i16> @and_v8i16_1(<8 x i16> %arg1, <8 x i16> %arg2) {
-        %A = and <8 x i16> %arg1, %arg2
-        ret <8 x i16> %A
-}
-
-define <8 x i16> @and_v8i16_2(<8 x i16> %arg1, <8 x i16> %arg2) {
-        %A = and <8 x i16> %arg2, %arg1
-        ret <8 x i16> %A
-}
-
-define <16 x i8> @and_v16i8_1(<16 x i8> %arg1, <16 x i8> %arg2) {
-        %A = and <16 x i8> %arg2, %arg1
-        ret <16 x i8> %A
-}
-
-define <16 x i8> @and_v16i8_2(<16 x i8> %arg1, <16 x i8> %arg2) {
-        %A = and <16 x i8> %arg1, %arg2
-        ret <16 x i8> %A
-}
-
-define i32 @and_i32_1(i32 %arg1, i32 %arg2) {
-        %A = and i32 %arg2, %arg1
-        ret i32 %A
-}
-
-define i32 @and_i32_2(i32 %arg1, i32 %arg2) {
-        %A = and i32 %arg1, %arg2
-        ret i32 %A
-}
-
-define i16 @and_i16_1(i16 %arg1, i16 %arg2) {
-        %A = and i16 %arg2, %arg1
-        ret i16 %A
-}
-
-define i16 @and_i16_2(i16 %arg1, i16 %arg2) {
-        %A = and i16 %arg1, %arg2
-        ret i16 %A
-}
-
-define i8 @and_i8_1(i8 %arg1, i8 %arg2) {
-        %A = and i8 %arg2, %arg1
-        ret i8 %A
-}
-
-define i8 @and_i8_2(i8 %arg1, i8 %arg2) {
-        %A = and i8 %arg1, %arg2
-        ret i8 %A
-}
-
-; ANDC instruction generation:
-define <4 x i32> @andc_v4i32_1(<4 x i32> %arg1, <4 x i32> %arg2) {
-        %A = xor <4 x i32> %arg2, < i32 -1, i32 -1, i32 -1, i32 -1 >
-        %B = and <4 x i32> %arg1, %A
-        ret <4 x i32> %B
-}
-
-define <4 x i32> @andc_v4i32_2(<4 x i32> %arg1, <4 x i32> %arg2) {
-        %A = xor <4 x i32> %arg1, < i32 -1, i32 -1, i32 -1, i32 -1 >
-        %B = and <4 x i32> %arg2, %A
-        ret <4 x i32> %B
-}
-
-define <4 x i32> @andc_v4i32_3(<4 x i32> %arg1, <4 x i32> %arg2) {
-        %A = xor <4 x i32> %arg1, < i32 -1, i32 -1, i32 -1, i32 -1 >
-        %B = and <4 x i32> %A, %arg2
-        ret <4 x i32> %B
-}
-
-define <8 x i16> @andc_v8i16_1(<8 x i16> %arg1, <8 x i16> %arg2) {
-        %A = xor <8 x i16> %arg2, < i16 -1, i16 -1, i16 -1, i16 -1,
-                                    i16 -1, i16 -1, i16 -1, i16 -1 >
-        %B = and <8 x i16> %arg1, %A
-        ret <8 x i16> %B
-}
-
-define <8 x i16> @andc_v8i16_2(<8 x i16> %arg1, <8 x i16> %arg2) {
-        %A = xor <8 x i16> %arg1, < i16 -1, i16 -1, i16 -1, i16 -1,
-                                    i16 -1, i16 -1, i16 -1, i16 -1 >
-        %B = and <8 x i16> %arg2, %A
-        ret <8 x i16> %B
-}
-
-define <16 x i8> @andc_v16i8_1(<16 x i8> %arg1, <16 x i8> %arg2) {
-        %A = xor <16 x i8> %arg1, < i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1,
-                                    i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1,
-                                    i8 -1, i8 -1, i8 -1, i8 -1 >
-        %B = and <16 x i8> %arg2, %A
-        ret <16 x i8> %B
-}
-
-define <16 x i8> @andc_v16i8_2(<16 x i8> %arg1, <16 x i8> %arg2) {
-        %A = xor <16 x i8> %arg2, < i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1,
-                                    i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1,
-                                    i8 -1, i8 -1, i8 -1, i8 -1 >
-        %B = and <16 x i8> %arg1, %A
-        ret <16 x i8> %B
-}
-
-define <16 x i8> @andc_v16i8_3(<16 x i8> %arg1, <16 x i8> %arg2) {
-        %A = xor <16 x i8> %arg2, < i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1,
-                                    i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1,
-                                    i8 -1, i8 -1, i8 -1, i8 -1 >
-        %B = and <16 x i8> %A, %arg1
-        ret <16 x i8> %B
-}
-
-define i32 @andc_i32_1(i32 %arg1, i32 %arg2) {
-        %A = xor i32 %arg2, -1
-        %B = and i32 %A, %arg1
-        ret i32 %B
-}
-
-define i32 @andc_i32_2(i32 %arg1, i32 %arg2) {
-        %A = xor i32 %arg1, -1
-        %B = and i32 %A, %arg2
-        ret i32 %B
-}
-
-define i32 @andc_i32_3(i32 %arg1, i32 %arg2) {
-        %A = xor i32 %arg2, -1
-        %B = and i32 %arg1, %A
-        ret i32 %B
-}
-
-define i16 @andc_i16_1(i16 %arg1, i16 %arg2) {
-        %A = xor i16 %arg2, -1
-        %B = and i16 %A, %arg1
-        ret i16 %B
-}
-
-define i16 @andc_i16_2(i16 %arg1, i16 %arg2) {
-        %A = xor i16 %arg1, -1
-        %B = and i16 %A, %arg2
-        ret i16 %B
-}
-
-define i16 @andc_i16_3(i16 %arg1, i16 %arg2) {
-        %A = xor i16 %arg2, -1
-        %B = and i16 %arg1, %A
-        ret i16 %B
-}
-
-define i8 @andc_i8_1(i8 %arg1, i8 %arg2) {
-        %A = xor i8 %arg2, -1
-        %B = and i8 %A, %arg1
-        ret i8 %B
-}
-
-define i8 @andc_i8_2(i8 %arg1, i8 %arg2) {
-        %A = xor i8 %arg1, -1
-        %B = and i8 %A, %arg2
-        ret i8 %B
-}
-
-define i8 @andc_i8_3(i8 %arg1, i8 %arg2) {
-        %A = xor i8 %arg2, -1
-        %B = and i8 %arg1, %A
-        ret i8 %B
-}
-
-; ANDI instruction generation (i32 data type):
-define <4 x i32> @andi_v4i32_1(<4 x i32> %in) {
-        %tmp2 = and <4 x i32> %in, < i32 511, i32 511, i32 511, i32 511 >
-        ret <4 x i32> %tmp2
-}
-
-define <4 x i32> @andi_v4i32_2(<4 x i32> %in) {
-        %tmp2 = and <4 x i32> %in, < i32 510, i32 510, i32 510, i32 510 >
-        ret <4 x i32> %tmp2
-}
-
-define <4 x i32> @andi_v4i32_3(<4 x i32> %in) {
-        %tmp2 = and <4 x i32> %in, < i32 -1, i32 -1, i32 -1, i32 -1 >
-        ret <4 x i32> %tmp2
-}
-
-define <4 x i32> @andi_v4i32_4(<4 x i32> %in) {
-        %tmp2 = and <4 x i32> %in, < i32 -512, i32 -512, i32 -512, i32 -512 >
-        ret <4 x i32> %tmp2
-}
-
-define zeroext i32 @andi_u32(i32 zeroext  %in)   {
-        %tmp37 = and i32 %in, 37
-        ret i32 %tmp37
-}
-
-define signext i32 @andi_i32(i32 signext  %in)   {
-        %tmp38 = and i32 %in, 37
-        ret i32 %tmp38
-}
-
-define i32 @andi_i32_1(i32 %in) {
-        %tmp37 = and i32 %in, 37
-        ret i32 %tmp37
-}
-
-; ANDHI instruction generation (i16 data type):
-define <8 x i16> @andhi_v8i16_1(<8 x i16> %in) {
-        %tmp2 = and <8 x i16> %in, < i16 511, i16 511, i16 511, i16 511,
-                                     i16 511, i16 511, i16 511, i16 511 >
-        ret <8 x i16> %tmp2
-}
-
-define <8 x i16> @andhi_v8i16_2(<8 x i16> %in) {
-        %tmp2 = and <8 x i16> %in, < i16 510, i16 510, i16 510, i16 510,
-                                     i16 510, i16 510, i16 510, i16 510 >
-        ret <8 x i16> %tmp2
-}
-
-define <8 x i16> @andhi_v8i16_3(<8 x i16> %in) {
-        %tmp2 = and <8 x i16> %in, < i16 -1, i16 -1, i16 -1, i16 -1, i16 -1,
-                                     i16 -1, i16 -1, i16 -1 >
-        ret <8 x i16> %tmp2
-}
-
-define <8 x i16> @andhi_v8i16_4(<8 x i16> %in) {
-        %tmp2 = and <8 x i16> %in, < i16 -512, i16 -512, i16 -512, i16 -512,
-                                     i16 -512, i16 -512, i16 -512, i16 -512 >
-        ret <8 x i16> %tmp2
-}
-
-define zeroext i16 @andhi_u16(i16 zeroext  %in)   {
-        %tmp37 = and i16 %in, 37         ; <i16> [#uses=1]
-        ret i16 %tmp37
-}
-
-define signext i16 @andhi_i16(i16 signext  %in)   {
-        %tmp38 = and i16 %in, 37         ; <i16> [#uses=1]
-        ret i16 %tmp38
-}
-
-; i8 data type (s/b ANDBI if 8-bit registers were supported):
-define <16 x i8> @and_v16i8(<16 x i8> %in) {
-        ; ANDBI generated for vector types
-        %tmp2 = and <16 x i8> %in, < i8 42, i8 42, i8 42, i8 42, i8 42, i8 42,
-                                     i8 42, i8 42, i8 42, i8 42, i8 42, i8 42,
-                                     i8 42, i8 42, i8 42, i8 42 >
-        ret <16 x i8> %tmp2
-}
-
-define zeroext i8 @and_u8(i8 zeroext  %in)   {
-        ; ANDBI generated:
-        %tmp37 = and i8 %in, 37
-        ret i8 %tmp37
-}
-
-define signext i8 @and_sext8(i8 signext  %in)   {
-        ; ANDBI generated
-        %tmp38 = and i8 %in, 37
-        ret i8 %tmp38
-}
-
-define i8 @and_i8(i8 %in) {
-        ; ANDBI generated
-        %tmp38 = and i8 %in, 205
-        ret i8 %tmp38
-}
diff --git a/test/CodeGen/CellSPU/arg_ret.ll b/test/CodeGen/CellSPU/arg_ret.ll
deleted file mode 100644
index 7410b724d6fc..000000000000
--- a/test/CodeGen/CellSPU/arg_ret.ll
+++ /dev/null
@@ -1,34 +0,0 @@
-; Test parameter passing and return values
-;RUN: llc --march=cellspu %s -o - | FileCheck %s
-
-; this fits into registers r3-r74
-%paramstruct = type { i32,i32,i32,i32,i32,i32,i32,i32,i32,i32,i32,i32,
-                      i32,i32,i32,i32,i32,i32,i32,i32,i32,i32,i32,i32,
-                      i32,i32,i32,i32,i32,i32,i32,i32,i32,i32,i32,i32,
-                      i32,i32,i32,i32,i32,i32,i32,i32,i32,i32,i32,i32,
-                      i32,i32,i32,i32,i32,i32,i32,i32,i32,i32,i32,i32,
-                      i32,i32,i32,i32,i32,i32,i32,i32,i32,i32,i32,i32}
-define ccc i32 @test_regs( %paramstruct %prm )
-{
-;CHECK:	lr	$3, $74
-;CHECK:	bi	$lr
-  %1 = extractvalue %paramstruct %prm, 71
-  ret i32 %1
-}
-
-define ccc i32 @test_regs_and_stack( %paramstruct %prm, i32 %stackprm )
-{
-;CHECK-NOT:	a	$3, $74, $75
-  %1 = extractvalue %paramstruct %prm, 71
-  %2 = add i32 %1, %stackprm
-  ret i32 %2
-}
-
-define ccc %paramstruct @test_return( i32 %param,  %paramstruct %prm )
-{
-;CHECK:  lqd	{{\$[0-9]+}}, 80($sp)
-;CHECK-NOT:	ori	{{\$[0-9]+, \$[0-9]+, 0}}
-;CHECK:  lr    $3, $4
-  ret %paramstruct %prm
-}
-
diff --git a/test/CodeGen/CellSPU/bigstack.ll b/test/CodeGen/CellSPU/bigstack.ll
deleted file mode 100644
index 63293e2aecb1..000000000000
--- a/test/CodeGen/CellSPU/bigstack.ll
+++ /dev/null
@@ -1,17 +0,0 @@
-; RUN: llc < %s -march=cellspu -o %t1.s
-; RUN: grep lqx   %t1.s | count 3
-; RUN: grep il    %t1.s | grep -v file | count 5
-; RUN: grep stqx  %t1.s | count 1
-
-define i32 @bigstack() nounwind {
-entry:
-  %avar = alloca i32                            
-  %big_data = alloca [2048 x i32]                
-  store i32 3840, i32* %avar, align 4
-  br label %return
-
-return:                                          
-  %retval = load i32* %avar                
-  ret i32 %retval
-}
-
diff --git a/test/CodeGen/CellSPU/bss.ll b/test/CodeGen/CellSPU/bss.ll
deleted file mode 100644
index 327800d09cbf..000000000000
--- a/test/CodeGen/CellSPU/bss.ll
+++ /dev/null
@@ -1,11 +0,0 @@
-; RUN: llc < %s -march=cellspu | FileCheck %s
-
-@bssVar = global i32 zeroinitializer
-; CHECK: .section .bss
-; CHECK-NEXT: .globl
-
-@localVar= internal global i32 zeroinitializer
-; CHECK-NOT: .lcomm
-; CHECK: .local
-; CHECK-NEXT: .comm
-
diff --git a/test/CodeGen/CellSPU/call.ll b/test/CodeGen/CellSPU/call.ll
deleted file mode 100644
index 11cf770145ba..000000000000
--- a/test/CodeGen/CellSPU/call.ll
+++ /dev/null
@@ -1,49 +0,0 @@
-; RUN: llc < %s -march=cellspu | FileCheck %s
-
-target datalayout = "E-p:32:32:128-f64:64:128-f32:32:128-i64:32:128-i32:32:128-i16:16:128-i8:8:128-i1:8:128-a0:0:128-v128:128:128-s0:128:128"
-target triple = "spu"
-
-define i32 @main() {
-entry:
-  %a = call i32 @stub_1(i32 1, float 0x400921FA00000000)
-  call void @extern_stub_1(i32 %a, i32 4)
-  ret i32 %a
-}
-
-declare void @extern_stub_1(i32, i32)
-
-define i32 @stub_1(i32 %x, float %y) {
- ; CHECK: il $3, 0
- ; CHECK: bi $lr 
-entry:
-  ret i32 0
-}
-
-; vararg call: ensure that all caller-saved registers are spilled to the
-; stack:
-define i32 @stub_2(...) {
-entry:
-  ret i32 0
-}
-
-; check that struct is passed in r3->
-; assert this by changing the second field in the struct
-%0 = type { i32, i32, i32 }
-declare %0 @callee()
-define %0 @test_structret()
-{
-;CHECK:	stqd	$lr, 16($sp)
-;CHECK:	stqd	$sp, -48($sp)
-;CHECK:	ai	$sp, $sp, -48
-;CHECK:	brasl	$lr, callee
-  %rv = call %0 @callee()
-;CHECK: ai	$4, $4, 1
-;CHECK: lqd	$lr, 64($sp)
-;CHECK:	ai	$sp, $sp, 48
-;CHECK:	bi	$lr
-  %oldval = extractvalue %0 %rv, 1
-  %newval = add i32 %oldval,1
-  %newrv = insertvalue %0 %rv, i32 %newval, 1
-  ret %0 %newrv
-}
-
diff --git a/test/CodeGen/CellSPU/crash.ll b/test/CodeGen/CellSPU/crash.ll
deleted file mode 100644
index cc2ab71db3b3..000000000000
--- a/test/CodeGen/CellSPU/crash.ll
+++ /dev/null
@@ -1,8 +0,0 @@
-; RUN: llc %s -march=cellspu -o -
-declare i8 @return_i8()
-declare i16 @return_i16()
-define void @testfunc() {
- %rv1 = call i8 @return_i8()
- %rv2 = call i16 @return_i16()
- ret void
-}
-\ No newline at end of file
diff --git a/test/CodeGen/CellSPU/ctpop.ll b/test/CodeGen/CellSPU/ctpop.ll
deleted file mode 100644
index e1a6cd829260..000000000000
--- a/test/CodeGen/CellSPU/ctpop.ll
+++ /dev/null
@@ -1,30 +0,0 @@
-; RUN: llc < %s -march=cellspu > %t1.s
-; RUN: grep cntb    %t1.s | count 3
-; RUN: grep andi    %t1.s | count 3
-; RUN: grep rotmi   %t1.s | count 2
-; RUN: grep rothmi  %t1.s | count 1
-target datalayout = "E-p:32:32:128-f64:64:128-f32:32:128-i64:32:128-i32:32:128-i16:16:128-i8:8:128-i1:8:128-a0:0:128-v128:128:128-s0:128:128"
-target triple = "spu"
-
-declare i8 @llvm.ctpop.i8(i8)
-declare i16 @llvm.ctpop.i16(i16)
-declare i32 @llvm.ctpop.i32(i32)
-
-define i32 @test_i8(i8 %X) {
-        call i8 @llvm.ctpop.i8(i8 %X)
-        %Y = zext i8 %1 to i32
-        ret i32 %Y
-}
-
-define i32 @test_i16(i16 %X) {
-        call i16 @llvm.ctpop.i16(i16 %X)
-        %Y = zext i16 %1 to i32
-        ret i32 %Y
-}
-
-define i32 @test_i32(i32 %X) {
-        call i32 @llvm.ctpop.i32(i32 %X)
-        %Y = bitcast i32 %1 to i32
-        ret i32 %Y
-}
-
diff --git a/test/CodeGen/CellSPU/div_ops.ll b/test/CodeGen/CellSPU/div_ops.ll
deleted file mode 100644
index 0c93d83ca76d..000000000000
--- a/test/CodeGen/CellSPU/div_ops.ll
+++ /dev/null
@@ -1,22 +0,0 @@
-; RUN: llc --march=cellspu %s -o - | FileCheck %s
-
-; signed division rounds towards zero, rotma don't.
-define i32 @sdivide (i32 %val )
-{
-; CHECK: rotmai
-; CHECK: rotmi
-; CHECK: a
-; CHECK: rotmai
-; CHECK: bi $lr
-   %rv = sdiv i32 %val, 4
-   ret i32 %rv
-}
-
-define i32 @udivide (i32 %val )
-{
-; CHECK: rotmi
-; CHECK: bi $lr
-   %rv = udiv i32 %val, 4
-   ret i32 %rv
-}
-
diff --git a/test/CodeGen/CellSPU/dp_farith.ll b/test/CodeGen/CellSPU/dp_farith.ll
deleted file mode 100644
index 66bff3eb7835..000000000000
--- a/test/CodeGen/CellSPU/dp_farith.ll
+++ /dev/null
@@ -1,102 +0,0 @@
-; RUN: llc < %s -march=cellspu > %t1.s
-; RUN: grep dfa    %t1.s | count 2
-; RUN: grep dfs    %t1.s | count 2
-; RUN: grep dfm    %t1.s | count 6
-; RUN: grep dfma   %t1.s | count 2
-; RUN: grep dfms   %t1.s | count 2
-; RUN: grep dfnms  %t1.s | count 4
-;
-; This file includes double precision floating point arithmetic instructions
-target datalayout = "E-p:32:32:128-f64:64:128-f32:32:128-i64:32:128-i32:32:128-i16:16:128-i8:8:128-i1:8:128-a0:0:128-v128:128:128-s0:128:128"
-target triple = "spu"
-
-define double @fadd(double %arg1, double %arg2) {
-        %A = fadd double %arg1, %arg2
-        ret double %A
-}
-
-define <2 x double> @fadd_vec(<2 x double> %arg1, <2 x double> %arg2) {
-        %A = fadd <2 x double> %arg1, %arg2
-        ret <2 x double> %A
-}
-
-define double @fsub(double %arg1, double %arg2) {
-        %A = fsub double %arg1,  %arg2
-        ret double %A
-}
-
-define <2 x double> @fsub_vec(<2 x double> %arg1, <2 x double> %arg2) {
-        %A = fsub <2 x double> %arg1,  %arg2
-        ret <2 x double> %A
-}
-
-define double @fmul(double %arg1, double %arg2) {
-        %A = fmul double %arg1,  %arg2
-        ret double %A
-}
-
-define <2 x double> @fmul_vec(<2 x double> %arg1, <2 x double> %arg2) {
-        %A = fmul <2 x double> %arg1,  %arg2
-        ret <2 x double> %A
-}
-
-define double @fma(double %arg1, double %arg2, double %arg3) {
-        %A = fmul double %arg1,  %arg2
-        %B = fadd double %A, %arg3
-        ret double %B
-}
-
-define <2 x double> @fma_vec(<2 x double> %arg1, <2 x double> %arg2, <2 x double> %arg3) {
-        %A = fmul <2 x double> %arg1,  %arg2
-        %B = fadd <2 x double> %A, %arg3
-        ret <2 x double> %B
-}
-
-define double @fms(double %arg1, double %arg2, double %arg3) {
-        %A = fmul double %arg1,  %arg2
-        %B = fsub double %A, %arg3
-        ret double %B
-}
-
-define <2 x double> @fms_vec(<2 x double> %arg1, <2 x double> %arg2, <2 x double> %arg3) {
-        %A = fmul <2 x double> %arg1,  %arg2
-        %B = fsub <2 x double> %A, %arg3
-        ret <2 x double> %B
-}
-
-; - (a * b - c)
-define double @d_fnms_1(double %arg1, double %arg2, double %arg3) {
-        %A = fmul double %arg1,  %arg2
-        %B = fsub double %A, %arg3
-        %C = fsub double -0.000000e+00, %B               ; <double> [#uses=1]
-        ret double %C
-}
-
-; Annother way of getting fnms
-; - ( a * b ) + c => c - (a * b)
-define double @d_fnms_2(double %arg1, double %arg2, double %arg3) {
-        %A = fmul double %arg1,  %arg2
-        %B = fsub double %arg3, %A
-        ret double %B
-}
-
-; FNMS: - (a * b - c) => c - (a * b)
-define <2 x double> @d_fnms_vec_1(<2 x double> %arg1, <2 x double> %arg2, <2 x double> %arg3) {
-        %A = fmul <2 x double> %arg1,  %arg2
-        %B = fsub <2 x double> %arg3, %A
-        ret <2 x double> %B
-}
-
-; Another way to get fnms using a constant vector
-; - ( a * b - c)
-define <2 x double> @d_fnms_vec_2(<2 x double> %arg1, <2 x double> %arg2, <2 x double> %arg3) {
-        %A = fmul <2 x double> %arg1,  %arg2     ; <<2 x double>> [#uses=1]
-        %B = fsub <2 x double> %A, %arg3 ; <<2 x double>> [#uses=1]
-        %C = fsub <2 x double> < double -0.00000e+00, double -0.00000e+00 >, %B
-        ret <2 x double> %C
-}
-
-;define double @fdiv_1(double %arg1, double %arg2) {
-;       %A = fdiv double %arg1,  %arg2  ; <double> [#uses=1]
-;       ret double %A
-;}
diff --git a/test/CodeGen/CellSPU/eqv.ll b/test/CodeGen/CellSPU/eqv.ll
deleted file mode 100644
index 79676814f282..000000000000
--- a/test/CodeGen/CellSPU/eqv.ll
+++ /dev/null
@@ -1,152 +0,0 @@
-; RUN: llc < %s -march=cellspu > %t1.s
-; RUN: grep eqv  %t1.s | count 18
-; RUN: grep xshw %t1.s | count 6
-; RUN: grep xsbh %t1.s | count 3
-; RUN: grep andi %t1.s | count 3
-
-; Test the 'eqv' instruction, whose boolean expression is:
-; (a & b) | (~a & ~b), which simplifies to
-; (a & b) | ~(a | b)
-; Alternatively, a ^ ~b, which the compiler will also match.
-
-; ModuleID = 'eqv.bc'
-target datalayout = "E-p:32:32:128-f64:64:128-f32:32:128-i64:32:128-i32:32:128-i16:16:128-i8:8:128-i1:8:128-a0:0:128-v128:128:128-s0:128:128"
-target triple = "spu"
-
-define <4 x i32> @equiv_v4i32_1(<4 x i32> %arg1, <4 x i32> %arg2) {
-        %A = and <4 x i32> %arg1, %arg2
-        %B = or <4 x i32> %arg1, %arg2
-        %Bnot = xor <4 x i32> %B, < i32 -1, i32 -1, i32 -1, i32 -1 >
-        %C = or <4 x i32> %A, %Bnot
-        ret <4 x i32> %C
-}
-
-define <4 x i32> @equiv_v4i32_2(<4 x i32> %arg1, <4 x i32> %arg2) {
-        %B = or <4 x i32> %arg1, %arg2          ; <<4 x i32>> [#uses=1]
-        %Bnot = xor <4 x i32> %B, < i32 -1, i32 -1, i32 -1, i32 -1 >            ; <<4 x i32>> [#uses=1]
-        %A = and <4 x i32> %arg1, %arg2         ; <<4 x i32>> [#uses=1]
-        %C = or <4 x i32> %A, %Bnot             ; <<4 x i32>> [#uses=1]
-        ret <4 x i32> %C
-}
-
-define <4 x i32> @equiv_v4i32_3(<4 x i32> %arg1, <4 x i32> %arg2) {
-        %B = or <4 x i32> %arg1, %arg2          ; <<4 x i32>> [#uses=1]
-        %A = and <4 x i32> %arg1, %arg2         ; <<4 x i32>> [#uses=1]
-        %Bnot = xor <4 x i32> %B, < i32 -1, i32 -1, i32 -1, i32 -1 >            ; <<4 x i32>> [#uses=1]
-        %C = or <4 x i32> %A, %Bnot             ; <<4 x i32>> [#uses=1]
-        ret <4 x i32> %C
-}
-
-define <4 x i32> @equiv_v4i32_4(<4 x i32> %arg1, <4 x i32> %arg2) {
-        %arg2not = xor <4 x i32> %arg2, < i32 -1, i32 -1, i32 -1, i32 -1 >
-        %C = xor <4 x i32> %arg1, %arg2not
-        ret <4 x i32> %C
-}
-
-define i32 @equiv_i32_1(i32 %arg1, i32 %arg2) {
-        %A = and i32 %arg1, %arg2               ; <i32> [#uses=1]
-        %B = or i32 %arg1, %arg2                ; <i32> [#uses=1]
-        %Bnot = xor i32 %B, -1                  ; <i32> [#uses=1]
-        %C = or i32 %A, %Bnot                   ; <i32> [#uses=1]
-        ret i32 %C
-}
-
-define i32 @equiv_i32_2(i32 %arg1, i32 %arg2) {
-        %B = or i32 %arg1, %arg2                ; <i32> [#uses=1]
-        %Bnot = xor i32 %B, -1                  ; <i32> [#uses=1]
-        %A = and i32 %arg1, %arg2               ; <i32> [#uses=1]
-        %C = or i32 %A, %Bnot                   ; <i32> [#uses=1]
-        ret i32 %C
-}
-
-define i32 @equiv_i32_3(i32 %arg1, i32 %arg2) {
-        %B = or i32 %arg1, %arg2                ; <i32> [#uses=1]
-        %A = and i32 %arg1, %arg2               ; <i32> [#uses=1]
-        %Bnot = xor i32 %B, -1                  ; <i32> [#uses=1]
-        %C = or i32 %A, %Bnot                   ; <i32> [#uses=1]
-        ret i32 %C
-}
-
-define i32 @equiv_i32_4(i32 %arg1, i32 %arg2) {
-        %arg2not = xor i32 %arg2, -1
-        %C = xor i32 %arg1, %arg2not
-        ret i32 %C
-}
-
-define i32 @equiv_i32_5(i32 %arg1, i32 %arg2) {
-        %arg1not = xor i32 %arg1, -1
-        %C = xor i32 %arg2, %arg1not
-        ret i32 %C
-}
-
-define signext i16 @equiv_i16_1(i16 signext %arg1, i16 signext %arg2)  {
-        %A = and i16 %arg1, %arg2               ; <i16> [#uses=1]
-        %B = or i16 %arg1, %arg2                ; <i16> [#uses=1]
-        %Bnot = xor i16 %B, -1                  ; <i16> [#uses=1]
-        %C = or i16 %A, %Bnot                   ; <i16> [#uses=1]
-        ret i16 %C
-}
-
-define signext i16 @equiv_i16_2(i16 signext %arg1, i16 signext %arg2) {
-        %B = or i16 %arg1, %arg2                ; <i16> [#uses=1]
-        %Bnot = xor i16 %B, -1                  ; <i16> [#uses=1]
-        %A = and i16 %arg1, %arg2               ; <i16> [#uses=1]
-        %C = or i16 %A, %Bnot                   ; <i16> [#uses=1]
-        ret i16 %C
-}
-
-define signext i16 @equiv_i16_3(i16 signext %arg1, i16 signext %arg2)  {
-        %B = or i16 %arg1, %arg2                ; <i16> [#uses=1]
-        %A = and i16 %arg1, %arg2               ; <i16> [#uses=1]
-        %Bnot = xor i16 %B, -1                  ; <i16> [#uses=1]
-        %C = or i16 %A, %Bnot                   ; <i16> [#uses=1]
-        ret i16 %C
-}
-
-define signext i8 @equiv_i8_1(i8 signext %arg1, i8 signext %arg2)  {
-        %A = and i8 %arg1, %arg2                ; <i8> [#uses=1]
-        %B = or i8 %arg1, %arg2         ; <i8> [#uses=1]
-        %Bnot = xor i8 %B, -1                   ; <i8> [#uses=1]
-        %C = or i8 %A, %Bnot                    ; <i8> [#uses=1]
-        ret i8 %C
-}
-
-define signext i8 @equiv_i8_2(i8 signext %arg1, i8 signext %arg2)  {
-        %B = or i8 %arg1, %arg2         ; <i8> [#uses=1]
-        %Bnot = xor i8 %B, -1                   ; <i8> [#uses=1]
-        %A = and i8 %arg1, %arg2                ; <i8> [#uses=1]
-        %C = or i8 %A, %Bnot                    ; <i8> [#uses=1]
-        ret i8 %C
-}
-
-define signext i8 @equiv_i8_3(i8 signext %arg1, i8 signext %arg2)  {
-        %B = or i8 %arg1, %arg2         ; <i8> [#uses=1]
-        %A = and i8 %arg1, %arg2                ; <i8> [#uses=1]
-        %Bnot = xor i8 %B, -1                   ; <i8> [#uses=1]
-        %C = or i8 %A, %Bnot                    ; <i8> [#uses=1]
-        ret i8 %C
-}
-
-define zeroext i8 @equiv_u8_1(i8 zeroext %arg1, i8 zeroext %arg2)  {
-        %A = and i8 %arg1, %arg2                ; <i8> [#uses=1]
-        %B = or i8 %arg1, %arg2         ; <i8> [#uses=1]
-        %Bnot = xor i8 %B, -1                   ; <i8> [#uses=1]
-        %C = or i8 %A, %Bnot                    ; <i8> [#uses=1]
-        ret i8 %C
-}
-
-define zeroext i8 @equiv_u8_2(i8 zeroext %arg1, i8 zeroext %arg2)  {
-        %B = or i8 %arg1, %arg2         ; <i8> [#uses=1]
-        %Bnot = xor i8 %B, -1                   ; <i8> [#uses=1]
-        %A = and i8 %arg1, %arg2                ; <i8> [#uses=1]
-        %C = or i8 %A, %Bnot                    ; <i8> [#uses=1]
-        ret i8 %C
-}
-
-define zeroext i8 @equiv_u8_3(i8 zeroext %arg1, i8 zeroext %arg2)  {
-        %B = or i8 %arg1, %arg2         ; <i8> [#uses=1]
-        %A = and i8 %arg1, %arg2                ; <i8> [#uses=1]
-        %Bnot = xor i8 %B, -1                   ; <i8> [#uses=1]
-        %C = or i8 %A, %Bnot                    ; <i8> [#uses=1]
-        ret i8 %C
-}
diff --git a/test/CodeGen/CellSPU/extract_elt.ll b/test/CodeGen/CellSPU/extract_elt.ll
deleted file mode 100644
index 0ac971c58c5b..000000000000
--- a/test/CodeGen/CellSPU/extract_elt.ll
+++ /dev/null
@@ -1,277 +0,0 @@
-; RUN: llc < %s -march=cellspu > %t1.s
-; RUN: grep shufb   %t1.s | count 39
-; RUN: grep ilhu    %t1.s | count 27
-; RUN: grep iohl    %t1.s | count 27
-; RUN: grep lqa     %t1.s | count 10
-; RUN: grep shlqby  %t1.s | count 12
-; RUN: grep   515   %t1.s | count 1
-; RUN: grep  1029   %t1.s | count 2
-; RUN: grep  1543   %t1.s | count 2
-; RUN: grep  2057   %t1.s | count 2
-; RUN: grep  2571   %t1.s | count 2
-; RUN: grep  3085   %t1.s | count 2
-; RUN: grep  3599   %t1.s | count 2
-; RUN: grep 32768   %t1.s | count 1
-; RUN: grep 32769   %t1.s | count 1
-; RUN: grep 32770   %t1.s | count 1
-; RUN: grep 32771   %t1.s | count 1
-; RUN: grep 32772   %t1.s | count 1
-; RUN: grep 32773   %t1.s | count 1
-; RUN: grep 32774   %t1.s | count 1
-; RUN: grep 32775   %t1.s | count 1
-; RUN: grep 32776   %t1.s | count 1
-; RUN: grep 32777   %t1.s | count 1
-; RUN: grep 32778   %t1.s | count 1
-; RUN: grep 32779   %t1.s | count 1
-; RUN: grep 32780   %t1.s | count 1
-; RUN: grep 32781   %t1.s | count 1
-; RUN: grep 32782   %t1.s | count 1
-; RUN: grep 32783   %t1.s | count 1
-; RUN: grep 32896   %t1.s | count 24
-
-target datalayout = "E-p:32:32:128-f64:64:128-f32:32:128-i64:32:128-i32:32:128-i16:16:128-i8:8:128-i1:8:128-a0:0:128-v128:128:128-s0:128:128"
-target triple = "spu"
-
-define i32 @i32_extract_0(<4 x i32> %v) {
-entry:
-  %a = extractelement <4 x i32> %v, i32 0
-  ret i32 %a
-}
-
-define i32 @i32_extract_1(<4 x i32> %v) {
-entry:
-  %a = extractelement <4 x i32> %v, i32 1
-  ret i32 %a
-}
-
-define i32 @i32_extract_2(<4 x i32> %v) {
-entry:
-  %a = extractelement <4 x i32> %v, i32 2
-  ret i32 %a
-}
-
-define i32 @i32_extract_3(<4 x i32> %v) {
-entry:
-  %a = extractelement <4 x i32> %v, i32 3
-  ret i32 %a
-}
-
-define i16 @i16_extract_0(<8 x i16> %v) {
-entry:
-  %a = extractelement <8 x i16> %v, i32 0
-  ret i16 %a
-}
-
-define i16 @i16_extract_1(<8 x i16> %v) {
-entry:
-  %a = extractelement <8 x i16> %v, i32 1
-  ret i16 %a
-}
-
-define i16 @i16_extract_2(<8 x i16> %v) {
-entry:
-  %a = extractelement <8 x i16> %v, i32 2
-  ret i16 %a
-}
-
-define i16 @i16_extract_3(<8 x i16> %v) {
-entry:
-  %a = extractelement <8 x i16> %v, i32 3
-  ret i16 %a
-}
-
-define i16 @i16_extract_4(<8 x i16> %v) {
-entry:
-  %a = extractelement <8 x i16> %v, i32 4
-  ret i16 %a
-}
-
-define i16 @i16_extract_5(<8 x i16> %v) {
-entry:
-  %a = extractelement <8 x i16> %v, i32 5
-  ret i16 %a
-}
-
-define i16 @i16_extract_6(<8 x i16> %v) {
-entry:
-  %a = extractelement <8 x i16> %v, i32 6
-  ret i16 %a
-}
-
-define i16 @i16_extract_7(<8 x i16> %v) {
-entry:
-  %a = extractelement <8 x i16> %v, i32 7
-  ret i16 %a
-}
-
-define i8 @i8_extract_0(<16 x i8> %v) {
-entry:
-  %a = extractelement <16 x i8> %v, i32 0
-  ret i8 %a
-}
-
-define i8 @i8_extract_1(<16 x i8> %v) {
-entry:
-  %a = extractelement <16 x i8> %v, i32 1
-  ret i8 %a
-}
-
-define i8 @i8_extract_2(<16 x i8> %v) {
-entry:
-  %a = extractelement <16 x i8> %v, i32 2
-  ret i8 %a
-}
-
-define i8 @i8_extract_3(<16 x i8> %v) {
-entry:
-  %a = extractelement <16 x i8> %v, i32 3
-  ret i8 %a
-}
-
-define i8 @i8_extract_4(<16 x i8> %v) {
-entry:
-  %a = extractelement <16 x i8> %v, i32 4
-  ret i8 %a
-}
-
-define i8 @i8_extract_5(<16 x i8> %v) {
-entry:
-  %a = extractelement <16 x i8> %v, i32 5
-  ret i8 %a
-}
-
-define i8 @i8_extract_6(<16 x i8> %v) {
-entry:
-  %a = extractelement <16 x i8> %v, i32 6
-  ret i8 %a
-}
-
-define i8 @i8_extract_7(<16 x i8> %v) {
-entry:
-  %a = extractelement <16 x i8> %v, i32 7
-  ret i8 %a
-}
-
-define i8 @i8_extract_8(<16 x i8> %v) {
-entry:
-  %a = extractelement <16 x i8> %v, i32 8
-  ret i8 %a
-}
-
-define i8 @i8_extract_9(<16 x i8> %v) {
-entry:
-  %a = extractelement <16 x i8> %v, i32 9
-  ret i8 %a
-}
-
-define i8 @i8_extract_10(<16 x i8> %v) {
-entry:
-  %a = extractelement <16 x i8> %v, i32 10
-  ret i8 %a
-}
-
-define i8 @i8_extract_11(<16 x i8> %v) {
-entry:
-  %a = extractelement <16 x i8> %v, i32 11
-  ret i8 %a
-}
-
-define i8 @i8_extract_12(<16 x i8> %v) {
-entry:
-  %a = extractelement <16 x i8> %v, i32 12
-  ret i8 %a
-}
-
-define i8 @i8_extract_13(<16 x i8> %v) {
-entry:
-  %a = extractelement <16 x i8> %v, i32 13
-  ret i8 %a
-}
-
-define i8 @i8_extract_14(<16 x i8> %v) {
-entry:
-  %a = extractelement <16 x i8> %v, i32 14
-  ret i8 %a
-}
-
-define i8 @i8_extract_15(<16 x i8> %v) {
-entry:
-  %a = extractelement <16 x i8> %v, i32 15
-  ret i8 %a
-}
-
-;;--------------------------------------------------------------------------
-;; extract element, variable index:
-;;--------------------------------------------------------------------------
-
-define i8 @extract_varadic_i8(i32 %i) nounwind readnone {
-entry:
-        %0 = extractelement <16 x i8> < i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15>, i32 %i
-        ret i8 %0
-}
-
-define i8 @extract_varadic_i8_1(<16 x i8> %v, i32 %i) nounwind readnone {
-entry:
-        %0 = extractelement <16 x i8> %v, i32 %i
-        ret i8 %0
-}
-
-define i16 @extract_varadic_i16(i32 %i) nounwind readnone {
-entry:
-        %0 = extractelement <8 x i16> < i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>, i32 %i
-        ret i16 %0
-}
-
-define i16 @extract_varadic_i16_1(<8 x i16> %v, i32 %i) nounwind readnone {
-entry:
-        %0 = extractelement <8 x i16> %v, i32 %i
-        ret i16 %0
-}
-
-define i32 @extract_varadic_i32(i32 %i) nounwind readnone {
-entry:
-        %0 = extractelement <4 x i32> < i32 0, i32 1, i32 2, i32 3>, i32 %i
-        ret i32 %0
-}
-
-define i32 @extract_varadic_i32_1(<4 x i32> %v, i32 %i) nounwind readnone {
-entry:
-        %0 = extractelement <4 x i32> %v, i32 %i
-        ret i32 %0
-}
-
-define float @extract_varadic_f32(i32 %i) nounwind readnone {
-entry:
-        %0 = extractelement <4 x float> < float 1.000000e+00, float 2.000000e+00, float 3.000000e+00, float 4.000000e+00 >, i32 %i
-        ret float %0
-}
-
-define float @extract_varadic_f32_1(<4 x float> %v, i32 %i) nounwind readnone {
-entry:
-        %0 = extractelement <4 x float> %v, i32 %i
-        ret float %0
-}
-
-define i64 @extract_varadic_i64(i32 %i) nounwind readnone {
-entry:
-        %0 = extractelement <2 x i64> < i64 0, i64 1>, i32 %i
-        ret i64 %0
-}
-
-define i64 @extract_varadic_i64_1(<2 x i64> %v, i32 %i) nounwind readnone {
-entry:
-        %0 = extractelement <2 x i64> %v, i32 %i
-        ret i64 %0
-}
-
-define double @extract_varadic_f64(i32 %i) nounwind readnone {
-entry:
-        %0 = extractelement <2 x double> < double 1.000000e+00, double 2.000000e+00>, i32 %i
-        ret double %0
-}
-
-define double @extract_varadic_f64_1(<2 x double> %v, i32 %i) nounwind readnone {
-entry:
-        %0 = extractelement <2 x double> %v, i32 %i
-        ret double %0
-}
diff --git a/test/CodeGen/CellSPU/fcmp32.ll b/test/CodeGen/CellSPU/fcmp32.ll
deleted file mode 100644
index f6b028dbb88a..000000000000
--- a/test/CodeGen/CellSPU/fcmp32.ll
+++ /dev/null
@@ -1,36 +0,0 @@
-; RUN: llc --mtriple=cellspu-unknown-elf %s -o - | FileCheck %s
-
-; Exercise the floating point comparison operators for f32:
-
-declare double @fabs(double)
-declare float @fabsf(float)
-
-define i1 @fcmp_eq(float %arg1, float %arg2) {
-; CHECK: fceq
-; CHECK: bi $lr
-        %A = fcmp oeq float %arg1,  %arg2
-        ret i1 %A
-}
-
-define i1 @fcmp_mag_eq(float %arg1, float %arg2) {
-; CHECK: fcmeq
-; CHECK: bi $lr
-        %1 = call float @fabsf(float %arg1) readnone
-        %2 = call float @fabsf(float %arg2) readnone
-        %3 = fcmp oeq float %1, %2
-        ret i1 %3
-}
-
-define i1 @test_ogt(float %a, float %b) {
-; CHECK: fcgt
-; CHECK: bi $lr
-	%cmp = fcmp ogt float %a, %b
-	ret i1 %cmp
-}
-
-define i1 @test_ugt(float %a, float %b) {
-; CHECK: fcgt
-; CHECK: bi $lr
-	%cmp = fcmp ugt float %a, %b
-	ret i1 %cmp
-}
diff --git a/test/CodeGen/CellSPU/fcmp64.ll b/test/CodeGen/CellSPU/fcmp64.ll
deleted file mode 100644
index 2b61fa6d2dc2..000000000000
--- a/test/CodeGen/CellSPU/fcmp64.ll
+++ /dev/null
@@ -1,7 +0,0 @@
-; RUN: llc < %s -march=cellspu > %t1.s
-
-define i1 @fcmp_eq_setcc_f64(double %arg1, double %arg2) nounwind {
-entry:
-       %A = fcmp oeq double %arg1, %arg2
-       ret i1 %A
-}
diff --git a/test/CodeGen/CellSPU/fdiv.ll b/test/CodeGen/CellSPU/fdiv.ll
deleted file mode 100644
index 9921626b79cb..000000000000
--- a/test/CodeGen/CellSPU/fdiv.ll
+++ /dev/null
@@ -1,22 +0,0 @@
-; RUN: llc < %s -march=cellspu > %t1.s
-; RUN: grep frest    %t1.s | count 2 
-; RUN: grep -w fi    %t1.s | count 2 
-; RUN: grep -w fm    %t1.s | count 2
-; RUN: grep fma      %t1.s | count 2 
-; RUN: grep fnms     %t1.s | count 4
-; RUN: grep cgti     %t1.s | count 2
-; RUN: grep selb     %t1.s | count 2
-;
-; This file includes standard floating point arithmetic instructions
-target datalayout = "E-p:32:32:128-f64:64:128-f32:32:128-i64:32:128-i32:32:128-i16:16:128-i8:8:128-i1:8:128-a0:0:128-v128:128:128-s0:128:128"
-target triple = "spu"
-
-define float @fdiv32(float %arg1, float %arg2) {
-        %A = fdiv float %arg1,  %arg2
-        ret float %A
-}
-
-define <4 x float> @fdiv_v4f32(<4 x float> %arg1, <4 x float> %arg2) {
-        %A = fdiv <4 x float> %arg1,  %arg2
-        ret <4 x float> %A
-}
diff --git a/test/CodeGen/CellSPU/fneg-fabs.ll b/test/CodeGen/CellSPU/fneg-fabs.ll
deleted file mode 100644
index 6e01906dae69..000000000000
--- a/test/CodeGen/CellSPU/fneg-fabs.ll
+++ /dev/null
@@ -1,42 +0,0 @@
-; RUN: llc < %s -march=cellspu > %t1.s
-; RUN: grep 32768   %t1.s | count 2
-; RUN: grep xor     %t1.s | count 4
-; RUN: grep and     %t1.s | count 2
-
-target datalayout = "E-p:32:32:128-f64:64:128-f32:32:128-i64:32:128-i32:32:128-i16:16:128-i8:8:128-i1:8:128-a0:0:128-v128:128:128-s0:128:128"
-target triple = "spu"
-
-define double @fneg_dp(double %X) {
-        %Y = fsub double -0.000000e+00, %X
-        ret double %Y
-}
-
-define <2 x double> @fneg_dp_vec(<2 x double> %X) {
-        %Y = fsub <2 x double> < double -0.0000e+00, double -0.0000e+00 >, %X
-        ret <2 x double> %Y
-}
-
-define float @fneg_sp(float %X) {
-        %Y = fsub float -0.000000e+00, %X
-        ret float %Y
-}
-
-define <4 x float> @fneg_sp_vec(<4 x float> %X) {
-        %Y = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00,
-                              float -0.000000e+00, float -0.000000e+00>, %X
-        ret <4 x float> %Y
-}
-
-declare double @fabs(double)
-
-declare float @fabsf(float)
-
-define double @fabs_dp(double %X) {
-        %Y = call double @fabs( double %X ) readnone
-        ret double %Y
-}
-
-define float @fabs_sp(float %X) {
-        %Y = call float @fabsf( float %X ) readnone
-        ret float %Y
-}
diff --git a/test/CodeGen/CellSPU/i64ops.ll b/test/CodeGen/CellSPU/i64ops.ll
deleted file mode 100644
index 3553cbbf7b5c..000000000000
--- a/test/CodeGen/CellSPU/i64ops.ll
+++ /dev/null
@@ -1,57 +0,0 @@
-; RUN: llc < %s -march=cellspu > %t1.s
-; RUN: grep xswd	     %t1.s | count 3
-; RUN: grep xsbh	     %t1.s | count 1
-; RUN: grep xshw	     %t1.s | count 2
-; RUN: grep shufb        %t1.s | count 7
-; RUN: grep cg           %t1.s | count 4
-; RUN: grep addx         %t1.s | count 4
-; RUN: grep fsmbi        %t1.s | count 3
-; RUN: grep il           %t1.s | count 2
-; RUN: grep mpy          %t1.s | count 10
-; RUN: grep mpyh         %t1.s | count 6
-; RUN: grep mpyhhu       %t1.s | count 2
-; RUN: grep mpyu         %t1.s | count 4
-
-; ModuleID = 'stores.bc'
-target datalayout = "E-p:32:32:128-f64:64:128-f32:32:128-i64:32:128-i32:32:128-i16:16:128-i8:8:128-i1:8:128-a0:0:128-v128:128:128-s0:128:128"
-target triple = "spu"
-
-define i64 @sext_i64_i8(i8 %a) nounwind {
-  %1 = sext i8 %a to i64
-  ret i64 %1
-}
-
-define i64 @sext_i64_i16(i16 %a) nounwind {
-  %1 = sext i16 %a to i64
-  ret i64 %1
-}
-
-define i64 @sext_i64_i32(i32 %a) nounwind {
-  %1 = sext i32 %a to i64
-  ret i64 %1
-}
-
-define i64 @zext_i64_i8(i8 %a) nounwind {
-  %1 = zext i8 %a to i64
-  ret i64 %1
-}
-
-define i64 @zext_i64_i16(i16 %a) nounwind {
-  %1 = zext i16 %a to i64
-  ret i64 %1
-}
-
-define i64 @zext_i64_i32(i32 %a) nounwind {
-  %1 = zext i32 %a to i64
-  ret i64 %1
-}
-
-define i64 @add_i64(i64 %a, i64 %b) nounwind {
-  %1 = add i64 %a, %b
-  ret i64 %1
-}
-
-define i64 @mul_i64(i64 %a, i64 %b) nounwind {
-  %1 = mul i64 %a, %b
-  ret i64 %1
-}
diff --git a/test/CodeGen/CellSPU/i8ops.ll b/test/CodeGen/CellSPU/i8ops.ll
deleted file mode 100644
index 57a2aa894725..000000000000
--- a/test/CodeGen/CellSPU/i8ops.ll
+++ /dev/null
@@ -1,25 +0,0 @@
-; RUN: llc < %s -march=cellspu > %t1.s
-
-; ModuleID = 'i8ops.bc'
-target datalayout = "E-p:32:32:128-f64:64:128-f32:32:128-i64:32:128-i32:32:128-i16:16:128-i8:8:128-i1:8:128-a0:0:128-v128:128:128-s0:128:128"
-target triple = "spu"
-
-define i8 @add_i8(i8 %a, i8 %b) nounwind {
-  %1 = add i8 %a, %b
-  ret i8 %1
-}
-
-define i8 @add_i8_imm(i8 %a, i8 %b) nounwind {
-  %1 = add i8 %a, 15 
-  ret i8 %1
-}
-
-define i8 @sub_i8(i8 %a, i8 %b) nounwind {
-  %1 = sub i8 %a, %b
-  ret i8 %1
-}
-
-define i8 @sub_i8_imm(i8 %a, i8 %b) nounwind {
-  %1 = sub i8 %a, 15 
-  ret i8 %1
-}
diff --git a/test/CodeGen/CellSPU/icmp16.ll b/test/CodeGen/CellSPU/icmp16.ll
deleted file mode 100644
index 853ae1db160f..000000000000
--- a/test/CodeGen/CellSPU/icmp16.ll
+++ /dev/null
@@ -1,574 +0,0 @@
-; RUN: llc < %s -march=cellspu | FileCheck %s
-
-target datalayout = "E-p:32:32:128-f64:64:128-f32:32:128-i64:32:128-i32:32:128-i16:16:128-i8:8:128-i1:8:128-a0:0:128-v128:128:128-s0:128:128"
-target triple = "spu"
-
-; $3 = %arg1, $4 = %arg2, $5 = %val1, $6 = %val2
-; $3 = %arg1, $4 = %val1, $5 = %val2
-;
-; For "positive" comparisons:
-; selb $3, $6, $5, <i1>
-; selb $3, $5, $4, <i1>
-;
-; For "negative" comparisons, i.e., those where the result of the comparison
-; must be inverted (setne, for example):
-; selb $3, $5, $6, <i1>
-; selb $3, $4, $5, <i1>
-
-; i16 integer comparisons:
-define i16 @icmp_eq_select_i16(i16 %arg1, i16 %arg2, i16 %val1, i16 %val2) nounwind {
-; CHECK:      icmp_eq_select_i16:
-; CHECK:        ceqh
-; CHECK:        selb $3, $6, $5, $3
-
-entry:
-       %A = icmp eq i16 %arg1, %arg2
-       %B = select i1 %A, i16 %val1, i16 %val2
-       ret i16 %B
-}
-
-define i1 @icmp_eq_setcc_i16(i16 %arg1, i16 %arg2, i16 %val1, i16 %val2) nounwind {
-; CHECK:      icmp_eq_setcc_i16:
-; CHECK:        ilhu
-; CHECK:        ceqh
-; CHECK:        iohl
-; CHECK:        shufb
-
-entry:
-       %A = icmp eq i16 %arg1, %arg2
-       ret i1 %A
-}
-
-define i16 @icmp_eq_immed01_i16(i16 %arg1, i16 %val1, i16 %val2) nounwind {
-; CHECK:      icmp_eq_immed01_i16:
-; CHECK:        ceqhi
-; CHECK:        selb $3, $5, $4, $3
-
-entry:
-       %A = icmp eq i16 %arg1, 511
-       %B = select i1 %A, i16 %val1, i16 %val2
-       ret i16 %B
-}
-
-define i16 @icmp_eq_immed02_i16(i16 %arg1, i16 %val1, i16 %val2) nounwind {
-; CHECK:      icmp_eq_immed02_i16:
-; CHECK:        ceqhi
-; CHECK:        selb $3, $5, $4, $3
-
-entry:
-       %A = icmp eq i16 %arg1, -512
-       %B = select i1 %A, i16 %val1, i16 %val2
-       ret i16 %B
-}
-
-define i16 @icmp_eq_immed03_i16(i16 %arg1, i16 %val1, i16 %val2) nounwind {
-; CHECK:      icmp_eq_immed03_i16:
-; CHECK:        ceqhi
-; CHECK:        selb $3, $5, $4, $3
-
-entry:
-       %A = icmp eq i16 %arg1, -1
-       %B = select i1 %A, i16 %val1, i16 %val2
-       ret i16 %B
-}
-
-define i16 @icmp_eq_immed04_i16(i16 %arg1, i16 %val1, i16 %val2) nounwind {
-; CHECK:      icmp_eq_immed04_i16:
-; CHECK:        ilh
-; CHECK:        ceqh
-; CHECK:        selb $3, $5, $4, $3
-
-entry:
-       %A = icmp eq i16 %arg1, 32768
-       %B = select i1 %A, i16 %val1, i16 %val2
-       ret i16 %B
-}
-
-define i16 @icmp_ne_select_i16(i16 %arg1, i16 %arg2, i16 %val1, i16 %val2) nounwind {
-; CHECK:      icmp_ne_select_i16:
-; CHECK:        ceqh
-; CHECK:        selb $3, $5, $6, $3
-
-entry:
-       %A = icmp ne i16 %arg1, %arg2
-       %B = select i1 %A, i16 %val1, i16 %val2
-       ret i16 %B
-}
-
-define i1 @icmp_ne_setcc_i16(i16 %arg1, i16 %arg2, i16 %val1, i16 %val2) nounwind {
-; CHECK:      icmp_ne_setcc_i16:
-; CHECK:        ceqh
-; CHECK:        ilhu
-; CHECK:        xorhi
-; CHECK:        iohl
-; CHECK:        shufb
-
-entry:
-       %A = icmp ne i16 %arg1, %arg2
-       ret i1 %A
-}
-
-define i16 @icmp_ne_immed01_i16(i16 %arg1, i16 %val1, i16 %val2) nounwind {
-; CHECK:      icmp_ne_immed01_i16:
-; CHECK:        ceqhi
-; CHECK:        selb $3, $4, $5, $3
-
-entry:
-       %A = icmp ne i16 %arg1, 511
-       %B = select i1 %A, i16 %val1, i16 %val2
-       ret i16 %B
-}
-
-define i16 @icmp_ne_immed02_i16(i16 %arg1, i16 %val1, i16 %val2) nounwind {
-; CHECK:      icmp_ne_immed02_i16:
-; CHECK:        ceqhi
-; CHECK:        selb $3, $4, $5, $3
-
-entry:
-       %A = icmp ne i16 %arg1, -512
-       %B = select i1 %A, i16 %val1, i16 %val2
-       ret i16 %B
-}
-
-define i16 @icmp_ne_immed03_i16(i16 %arg1, i16 %val1, i16 %val2) nounwind {
-; CHECK:      icmp_ne_immed03_i16:
-; CHECK:        ceqhi
-; CHECK:        selb $3, $4, $5, $3
-
-entry:
-       %A = icmp ne i16 %arg1, -1
-       %B = select i1 %A, i16 %val1, i16 %val2
-       ret i16 %B
-}
-
-define i16 @icmp_ne_immed04_i16(i16 %arg1, i16 %val1, i16 %val2) nounwind {
-; CHECK:      icmp_ne_immed04_i16:
-; CHECK:        ilh
-; CHECK:        ceqh
-; CHECK:        selb $3, $4, $5, $3
-
-entry:
-       %A = icmp ne i16 %arg1, 32768
-       %B = select i1 %A, i16 %val1, i16 %val2
-       ret i16 %B
-}
-
-define i16 @icmp_ugt_select_i16(i16 %arg1, i16 %arg2, i16 %val1, i16 %val2) nounwind {
-; CHECK:      icmp_ugt_select_i16:
-; CHECK:        clgth
-; CHECK:        selb $3, $6, $5, $3
-
-entry:
-       %A = icmp ugt i16 %arg1, %arg2
-       %B = select i1 %A, i16 %val1, i16 %val2
-       ret i16 %B
-}
-
-define i1 @icmp_ugt_setcc_i16(i16 %arg1, i16 %arg2, i16 %val1, i16 %val2) nounwind {
-; CHECK:      icmp_ugt_setcc_i16:
-; CHECK:        ilhu
-; CHECK:        clgth
-; CHECK:        iohl
-; CHECK:        shufb
-
-entry:
-       %A = icmp ugt i16 %arg1, %arg2
-       ret i1 %A
-}
-
-define i16 @icmp_ugt_immed01_i16(i16 %arg1, i16 %val1, i16 %val2) nounwind {
-; CHECK:      icmp_ugt_immed01_i16:
-; CHECK:        clgthi
-; CHECK:        selb $3, $5, $4, $3
-
-entry:
-       %A = icmp ugt i16 %arg1, 500
-       %B = select i1 %A, i16 %val1, i16 %val2
-       ret i16 %B
-}
-
-define i16 @icmp_ugt_immed02_i16(i16 %arg1, i16 %val1, i16 %val2) nounwind {
-; CHECK:      icmp_ugt_immed02_i16:
-; CHECK:        ceqhi
-; CHECK:        selb $3, $4, $5, $3
-
-entry:
-       %A = icmp ugt i16 %arg1, 0
-       %B = select i1 %A, i16 %val1, i16 %val2
-       ret i16 %B
-}
-
-define i16 @icmp_ugt_immed03_i16(i16 %arg1, i16 %val1, i16 %val2) nounwind {
-; CHECK:      icmp_ugt_immed03_i16:
-; CHECK:        clgthi
-; CHECK:        selb $3, $5, $4, $3
-
-entry:
-       %A = icmp ugt i16 %arg1, 65024
-       %B = select i1 %A, i16 %val1, i16 %val2
-       ret i16 %B
-}
-
-define i16 @icmp_ugt_immed04_i16(i16 %arg1, i16 %val1, i16 %val2) nounwind {
-; CHECK:      icmp_ugt_immed04_i16:
-; CHECK:        ilh
-; CHECK:        clgth
-; CHECK:        selb $3, $5, $4, $3
-
-entry:
-       %A = icmp ugt i16 %arg1, 32768
-       %B = select i1 %A, i16 %val1, i16 %val2
-       ret i16 %B
-}
-
-define i16 @icmp_uge_select_i16(i16 %arg1, i16 %arg2, i16 %val1, i16 %val2) nounwind {
-; CHECK:      icmp_uge_select_i16:
-; CHECK:        ceqh
-; CHECK:        clgth
-; CHECK:        or
-; CHECK:        selb $3, $6, $5, $3
-
-entry:
-       %A = icmp uge i16 %arg1, %arg2
-       %B = select i1 %A, i16 %val1, i16 %val2
-       ret i16 %B
-}
-
-define i1 @icmp_uge_setcc_i16(i16 %arg1, i16 %arg2, i16 %val1, i16 %val2) nounwind {
-; CHECK:      icmp_uge_setcc_i16:
-; CHECK:        ceqh
-; CHECK:        clgth
-; CHECK:        ilhu
-; CHECK:        or
-; CHECK:        iohl
-; CHECK:        shufb
-
-entry:
-       %A = icmp uge i16 %arg1, %arg2
-       ret i1 %A
-}
-
-;; Note: icmp uge i16 %arg1, <immed> can always be transformed into
-;;       icmp ugt i16 %arg1, <immed>-1
-;;
-;; Consequently, even though the patterns exist to match, it's unlikely
-;; they'll ever be generated.
-
-define i16 @icmp_ult_select_i16(i16 %arg1, i16 %arg2, i16 %val1, i16 %val2) nounwind {
-; CHECK:      icmp_ult_select_i16:
-; CHECK:        ceqh
-; CHECK:        clgth
-; CHECK:        nor
-; CHECK:        selb $3, $6, $5, $3
-
-entry:
-       %A = icmp ult i16 %arg1, %arg2
-       %B = select i1 %A, i16 %val1, i16 %val2
-       ret i16 %B
-}
-
-define i1 @icmp_ult_setcc_i16(i16 %arg1, i16 %arg2, i16 %val1, i16 %val2) nounwind {
-; CHECK:      icmp_ult_setcc_i16:
-; CHECK:        ceqh
-; CHECK:        clgth
-; CHECK:        ilhu
-; CHECK:        nor
-; CHECK:        iohl
-; CHECK:        shufb
-
-entry:
-       %A = icmp ult i16 %arg1, %arg2
-       ret i1 %A
-}
-
-define i16 @icmp_ult_immed01_i16(i16 %arg1, i16 %val1, i16 %val2) nounwind {
-; CHECK:      icmp_ult_immed01_i16:
-; CHECK:        ceqhi
-; CHECK:        clgthi
-; CHECK:        nor
-; CHECK:        selb $3, $5, $4, $3
-
-entry:
-       %A = icmp ult i16 %arg1, 511
-       %B = select i1 %A, i16 %val1, i16 %val2
-       ret i16 %B
-}
-
-define i16 @icmp_ult_immed02_i16(i16 %arg1, i16 %val1, i16 %val2) nounwind {
-; CHECK:      icmp_ult_immed02_i16:
-; CHECK:        ceqhi
-; CHECK:        clgthi
-; CHECK:        nor
-; CHECK:        selb $3, $5, $4, $3
-
-entry:
-       %A = icmp ult i16 %arg1, 65534
-       %B = select i1 %A, i16 %val1, i16 %val2
-       ret i16 %B
-}
-
-define i16 @icmp_ult_immed03_i16(i16 %arg1, i16 %val1, i16 %val2) nounwind {
-; CHECK:      icmp_ult_immed03_i16:
-; CHECK:        ceqhi
-; CHECK:        clgthi
-; CHECK:        nor
-; CHECK:        selb $3, $5, $4, $3
-
-entry:
-       %A = icmp ult i16 %arg1, 65024
-       %B = select i1 %A, i16 %val1, i16 %val2
-       ret i16 %B
-}
-
-define i16 @icmp_ult_immed04_i16(i16 %arg1, i16 %val1, i16 %val2) nounwind {
-; CHECK:      icmp_ult_immed04_i16:
-; CHECK:        ilh
-; CHECK:        ceqh
-; CHECK:        clgth
-; CHECK:        nor
-; CHECK:        selb $3, $5, $4, $3
-
-entry:
-       %A = icmp ult i16 %arg1, 32769
-       %B = select i1 %A, i16 %val1, i16 %val2
-       ret i16 %B
-}
-
-define i16 @icmp_ule_select_i16(i16 %arg1, i16 %arg2, i16 %val1, i16 %val2) nounwind {
-; CHECK:      icmp_ule_select_i16:
-; CHECK:        clgth
-; CHECK:        selb $3, $5, $6, $3
-
-entry:
-       %A = icmp ule i16 %arg1, %arg2
-       %B = select i1 %A, i16 %val1, i16 %val2
-       ret i16 %B
-}
-
-define i1 @icmp_ule_setcc_i16(i16 %arg1, i16 %arg2, i16 %val1, i16 %val2) nounwind {
-; CHECK:      icmp_ule_setcc_i16:
-; CHECK:        clgth
-; CHECK:        ilhu
-; CHECK:        xorhi
-; CHECK:        iohl
-; CHECK:        shufb
-
-entry:
-       %A = icmp ule i16 %arg1, %arg2
-       ret i1 %A
-}
-
-;; Note: icmp ule i16 %arg1, <immed> can always be transformed into
-;;       icmp ult i16 %arg1, <immed>+1
-;;
-;; Consequently, even though the patterns exist to match, it's unlikely
-;; they'll ever be generated.
-
-define i16 @icmp_sgt_select_i16(i16 %arg1, i16 %arg2, i16 %val1, i16 %val2) nounwind {
-; CHECK:      icmp_sgt_select_i16:
-; CHECK:        cgth
-; CHECK:        selb $3, $6, $5, $3
-
-entry:
-       %A = icmp sgt i16 %arg1, %arg2
-       %B = select i1 %A, i16 %val1, i16 %val2
-       ret i16 %B
-}
-
-define i1 @icmp_sgt_setcc_i16(i16 %arg1, i16 %arg2, i16 %val1, i16 %val2) nounwind {
-; CHECK:      icmp_sgt_setcc_i16:
-; CHECK:        ilhu
-; CHECK:        cgth
-; CHECK:        iohl
-; CHECK:        shufb
-
-entry:
-       %A = icmp sgt i16 %arg1, %arg2
-       ret i1 %A
-}
-
-define i16 @icmp_sgt_immed01_i16(i16 %arg1, i16 %val1, i16 %val2) nounwind {
-; CHECK:      icmp_sgt_immed01_i16:
-; CHECK:        cgthi
-; CHECK:        selb $3, $5, $4, $3
-
-entry:
-       %A = icmp sgt i16 %arg1, 511
-       %B = select i1 %A, i16 %val1, i16 %val2
-       ret i16 %B
-}
-
-define i16 @icmp_sgt_immed02_i16(i16 %arg1, i16 %val1, i16 %val2) nounwind {
-; CHECK:      icmp_sgt_immed02_i16:
-; CHECK:        cgthi
-; CHECK:        selb $3, $5, $4, $3
-
-entry:
-       %A = icmp sgt i16 %arg1, -1
-       %B = select i1 %A, i16 %val1, i16 %val2
-       ret i16 %B
-}
-
-define i16 @icmp_sgt_immed03_i16(i16 %arg1, i16 %val1, i16 %val2) nounwind {
-; CHECK:      icmp_sgt_immed03_i16:
-; CHECK:        cgthi
-; CHECK:        selb $3, $5, $4, $3
-
-entry:
-       %A = icmp sgt i16 %arg1, -512
-       %B = select i1 %A, i16 %val1, i16 %val2
-       ret i16 %B
-}
-
-define i16 @icmp_sgt_immed04_i16(i16 %arg1, i16 %val1, i16 %val2) nounwind {
-; CHECK:      icmp_sgt_immed04_i16:
-; CHECK:        ilh
-; CHECK:        ceqh
-; CHECK:        selb $3, $4, $5, $3
-
-entry:
-       %A = icmp sgt i16 %arg1, 32768
-       %B = select i1 %A, i16 %val1, i16 %val2
-       ret i16 %B
-}
-
-define i16 @icmp_sge_select_i16(i16 %arg1, i16 %arg2, i16 %val1, i16 %val2) nounwind {
-; CHECK:      icmp_sge_select_i16:
-; CHECK:        ceqh
-; CHECK:        cgth
-; CHECK:        or
-; CHECK:        selb $3, $6, $5, $3
-
-entry:
-       %A = icmp sge i16 %arg1, %arg2
-       %B = select i1 %A, i16 %val1, i16 %val2
-       ret i16 %B
-}
-
-define i1 @icmp_sge_setcc_i16(i16 %arg1, i16 %arg2, i16 %val1, i16 %val2) nounwind {
-; CHECK:      icmp_sge_setcc_i16:
-; CHECK:        ceqh
-; CHECK:        cgth
-; CHECK:        ilhu
-; CHECK:        or
-; CHECK:        iohl
-; CHECK:        shufb
-
-entry:
-       %A = icmp sge i16 %arg1, %arg2
-       ret i1 %A
-}
-
-;; Note: icmp sge i16 %arg1, <immed> can always be transformed into
-;;       icmp sgt i16 %arg1, <immed>-1
-;;
-;; Consequently, even though the patterns exist to match, it's unlikely
-;; they'll ever be generated.
-
-define i16 @icmp_slt_select_i16(i16 %arg1, i16 %arg2, i16 %val1, i16 %val2) nounwind {
-; CHECK:      icmp_slt_select_i16:
-; CHECK:        ceqh
-; CHECK:        cgth
-; CHECK:        nor
-; CHECK:        selb $3, $6, $5, $3
-
-entry:
-       %A = icmp slt i16 %arg1, %arg2
-       %B = select i1 %A, i16 %val1, i16 %val2
-       ret i16 %B
-}
-
-define i1 @icmp_slt_setcc_i16(i16 %arg1, i16 %arg2, i16 %val1, i16 %val2) nounwind {
-; CHECK:      icmp_slt_setcc_i16:
-; CHECK:        ceqh
-; CHECK:        cgth
-; CHECK:        ilhu
-; CHECK:        nor
-; CHECK:        iohl
-; CHECK:        shufb
-
-entry:
-       %A = icmp slt i16 %arg1, %arg2
-       ret i1 %A
-}
-
-define i16 @icmp_slt_immed01_i16(i16 %arg1, i16 %val1, i16 %val2) nounwind {
-; CHECK:      icmp_slt_immed01_i16:
-; CHECK:        ceqhi
-; CHECK:        cgthi
-; CHECK:        nor
-; CHECK:        selb $3, $5, $4, $3
-
-entry:
-       %A = icmp slt i16 %arg1, 511
-       %B = select i1 %A, i16 %val1, i16 %val2
-       ret i16 %B
-}
-
-define i16 @icmp_slt_immed02_i16(i16 %arg1, i16 %val1, i16 %val2) nounwind {
-; CHECK:      icmp_slt_immed02_i16:
-; CHECK:        ceqhi
-; CHECK:        cgthi
-; CHECK:        nor
-; CHECK:        selb $3, $5, $4, $3
-
-entry:
-       %A = icmp slt i16 %arg1, -512
-       %B = select i1 %A, i16 %val1, i16 %val2
-       ret i16 %B
-}
-
-define i16 @icmp_slt_immed03_i16(i16 %arg1, i16 %val1, i16 %val2) nounwind {
-; CHECK:      icmp_slt_immed03_i16:
-; CHECK:        ceqhi
-; CHECK:        cgthi
-; CHECK:        nor
-; CHECK:        selb $3, $5, $4, $3
-
-entry:
-       %A = icmp slt i16 %arg1, -1
-       %B = select i1 %A, i16 %val1, i16 %val2
-       ret i16 %B
-}
-
-define i16 @icmp_slt_immed04_i16(i16 %arg1, i16 %val1, i16 %val2) nounwind {
-; CHECK:      icmp_slt_immed04_i16:
-; CHECK:        lr
-; CHECK-NEXT:   bi
-
-entry:
-       %A = icmp slt i16 %arg1, 32768
-       %B = select i1 %A, i16 %val1, i16 %val2
-       ret i16 %B
-}
-
-define i16 @icmp_sle_select_i16(i16 %arg1, i16 %arg2, i16 %val1, i16 %val2) nounwind {
-; CHECK:      icmp_sle_select_i16:
-; CHECK:        cgth
-; CHECK:        selb $3, $5, $6, $3
-
-entry:
-       %A = icmp sle i16 %arg1, %arg2
-       %B = select i1 %A, i16 %val1, i16 %val2
-       ret i16 %B
-}
-
-define i1 @icmp_sle_setcc_i16(i16 %arg1, i16 %arg2, i16 %val1, i16 %val2) nounwind {
-; CHECK:      icmp_sle_setcc_i16:
-; CHECK:        cgth
-; CHECK:        ilhu
-; CHECK:        xorhi
-; CHECK:        iohl
-; CHECK:   bi
-
-entry:
-       %A = icmp sle i16 %arg1, %arg2
-       ret i1 %A
-}
-
-;; Note: icmp sle i16 %arg1, <immed> can always be transformed into
-;;       icmp slt i16 %arg1, <immed>+1
-;;
-;; Consequently, even though the patterns exist to match, it's unlikely
-;; they'll ever be generated.
-
diff --git a/test/CodeGen/CellSPU/icmp32.ll b/test/CodeGen/CellSPU/icmp32.ll
deleted file mode 100644
index 1794f4cd7b66..000000000000
--- a/test/CodeGen/CellSPU/icmp32.ll
+++ /dev/null
@@ -1,575 +0,0 @@
-; RUN: llc < %s -march=cellspu | FileCheck %s
-
-target datalayout = "E-p:32:32:128-f64:64:128-f32:32:128-i64:32:128-i32:32:128-i16:16:128-i8:8:128-i1:8:128-a0:0:128-v128:128:128-s0:128:128"
-target triple = "spu"
-
-; $3 = %arg1, $4 = %arg2, $5 = %val1, $6 = %val2
-; $3 = %arg1, $4 = %val1, $5 = %val2
-;
-; For "positive" comparisons:
-; selb $3, $6, $5, <i1>
-; selb $3, $5, $4, <i1>
-;
-; For "negative" comparisons, i.e., those where the result of the comparison
-; must be inverted (setne, for example):
-; selb $3, $5, $6, <i1>
-; selb $3, $4, $5, <i1>
-
-; i32 integer comparisons:
-define i32 @icmp_eq_select_i32(i32 %arg1, i32 %arg2, i32 %val1, i32 %val2) nounwind {
-; CHECK:      icmp_eq_select_i32:
-; CHECK:        ceq
-; CHECK:        selb $3, $6, $5, $3
-
-entry:
-       %A = icmp eq i32 %arg1, %arg2
-       %B = select i1 %A, i32 %val1, i32 %val2
-       ret i32 %B
-}
-
-define i1 @icmp_eq_setcc_i32(i32 %arg1, i32 %arg2, i32 %val1, i32 %val2) nounwind {
-; CHECK:      icmp_eq_setcc_i32:
-; CHECK:        ilhu
-; CHECK:        ceq
-; CHECK:        iohl
-; CHECK:        shufb
-
-entry:
-       %A = icmp eq i32 %arg1, %arg2
-       ret i1 %A
-}
-
-define i32 @icmp_eq_immed01_i32(i32 %arg1, i32 %val1, i32 %val2) nounwind {
-; CHECK:      icmp_eq_immed01_i32:
-; CHECK:        ceqi
-; CHECK:        selb $3, $5, $4, $3
-
-entry:
-       %A = icmp eq i32 %arg1, 511
-       %B = select i1 %A, i32 %val1, i32 %val2
-       ret i32 %B
-}
-
-define i32 @icmp_eq_immed02_i32(i32 %arg1, i32 %val1, i32 %val2) nounwind {
-; CHECK:      icmp_eq_immed02_i32:
-; CHECK:        ceqi
-; CHECK:        selb $3, $5, $4, $3
-
-entry:
-       %A = icmp eq i32 %arg1, -512
-       %B = select i1 %A, i32 %val1, i32 %val2
-       ret i32 %B
-}
-
-define i32 @icmp_eq_immed03_i32(i32 %arg1, i32 %val1, i32 %val2) nounwind {
-; CHECK:      icmp_eq_immed03_i32:
-; CHECK:        ceqi
-; CHECK:        selb $3, $5, $4, $3
-
-entry:
-       %A = icmp eq i32 %arg1, -1
-       %B = select i1 %A, i32 %val1, i32 %val2
-       ret i32 %B
-}
-
-define i32 @icmp_eq_immed04_i32(i32 %arg1, i32 %val1, i32 %val2) nounwind {
-; CHECK:      icmp_eq_immed04_i32:
-; CHECK:        ila
-; CHECK:        ceq
-; CHECK:        selb $3, $5, $4, $3
-
-entry:
-       %A = icmp eq i32 %arg1, 32768
-       %B = select i1 %A, i32 %val1, i32 %val2
-       ret i32 %B
-}
-
-define i32 @icmp_ne_select_i32(i32 %arg1, i32 %arg2, i32 %val1, i32 %val2) nounwind {
-; CHECK:      icmp_ne_select_i32:
-; CHECK:        ceq
-; CHECK:        selb $3, $5, $6, $3
-
-entry:
-       %A = icmp ne i32 %arg1, %arg2
-       %B = select i1 %A, i32 %val1, i32 %val2
-       ret i32 %B
-}
-
-define i1 @icmp_ne_setcc_i32(i32 %arg1, i32 %arg2, i32 %val1, i32 %val2) nounwind {
-; CHECK:      icmp_ne_setcc_i32:
-; CHECK:        ceq
-; CHECK:        ilhu
-; CHECK:        xori
-; CHECK:        iohl
-; CHECK:        shufb
-
-entry:
-       %A = icmp ne i32 %arg1, %arg2
-       ret i1 %A
-}
-
-define i32 @icmp_ne_immed01_i32(i32 %arg1, i32 %val1, i32 %val2) nounwind {
-; CHECK:      icmp_ne_immed01_i32:
-; CHECK:        ceqi
-; CHECK:        selb $3, $4, $5, $3
-
-entry:
-       %A = icmp ne i32 %arg1, 511
-       %B = select i1 %A, i32 %val1, i32 %val2
-       ret i32 %B
-}
-
-define i32 @icmp_ne_immed02_i32(i32 %arg1, i32 %val1, i32 %val2) nounwind {
-; CHECK:      icmp_ne_immed02_i32:
-; CHECK:        ceqi
-; CHECK:        selb $3, $4, $5, $3
-
-entry:
-       %A = icmp ne i32 %arg1, -512
-       %B = select i1 %A, i32 %val1, i32 %val2
-       ret i32 %B
-}
-
-define i32 @icmp_ne_immed03_i32(i32 %arg1, i32 %val1, i32 %val2) nounwind {
-; CHECK:      icmp_ne_immed03_i32:
-; CHECK:        ceqi
-; CHECK:        selb $3, $4, $5, $3
-
-entry:
-       %A = icmp ne i32 %arg1, -1
-       %B = select i1 %A, i32 %val1, i32 %val2
-       ret i32 %B
-}
-
-define i32 @icmp_ne_immed04_i32(i32 %arg1, i32 %val1, i32 %val2) nounwind {
-; CHECK:      icmp_ne_immed04_i32:
-; CHECK:        ila
-; CHECK:        ceq
-; CHECK:        selb $3, $4, $5, $3
-
-entry:
-       %A = icmp ne i32 %arg1, 32768
-       %B = select i1 %A, i32 %val1, i32 %val2
-       ret i32 %B
-}
-
-define i32 @icmp_ugt_select_i32(i32 %arg1, i32 %arg2, i32 %val1, i32 %val2) nounwind {
-; CHECK:      icmp_ugt_select_i32:
-; CHECK:        clgt
-; CHECK:        selb $3, $6, $5, $3
-
-entry:
-       %A = icmp ugt i32 %arg1, %arg2
-       %B = select i1 %A, i32 %val1, i32 %val2
-       ret i32 %B
-}
-
-define i1 @icmp_ugt_setcc_i32(i32 %arg1, i32 %arg2, i32 %val1, i32 %val2) nounwind {
-; CHECK:      icmp_ugt_setcc_i32:
-; CHECK:        ilhu
-; CHECK:        clgt
-; CHECK:        iohl
-; CHECK:        shufb
-
-entry:
-       %A = icmp ugt i32 %arg1, %arg2
-       ret i1 %A
-}
-
-define i32 @icmp_ugt_immed01_i32(i32 %arg1, i32 %val1, i32 %val2) nounwind {
-; CHECK:      icmp_ugt_immed01_i32:
-; CHECK:        clgti
-; CHECK:        selb $3, $5, $4, $3
-
-entry:
-       %A = icmp ugt i32 %arg1, 511
-       %B = select i1 %A, i32 %val1, i32 %val2
-       ret i32 %B
-}
-
-define i32 @icmp_ugt_immed02_i32(i32 %arg1, i32 %val1, i32 %val2) nounwind {
-; CHECK:      icmp_ugt_immed02_i32:
-; CHECK:        clgti
-; CHECK:        selb $3, $5, $4, $3
-
-entry:
-       %A = icmp ugt i32 %arg1, 4294966784
-       %B = select i1 %A, i32 %val1, i32 %val2
-       ret i32 %B
-}
-
-define i32 @icmp_ugt_immed03_i32(i32 %arg1, i32 %val1, i32 %val2) nounwind {
-; CHECK:      icmp_ugt_immed03_i32:
-; CHECK:        clgti
-; CHECK:        selb $3, $5, $4, $3
-
-entry:
-       %A = icmp ugt i32 %arg1, 4294967293
-       %B = select i1 %A, i32 %val1, i32 %val2
-       ret i32 %B
-}
-
-define i32 @icmp_ugt_immed04_i32(i32 %arg1, i32 %val1, i32 %val2) nounwind {
-; CHECK:      icmp_ugt_immed04_i32:
-; CHECK:        ila
-; CHECK:        clgt
-; CHECK:        selb $3, $5, $4, $3
-
-entry:
-       %A = icmp ugt i32 %arg1, 32768
-       %B = select i1 %A, i32 %val1, i32 %val2
-       ret i32 %B
-}
-
-define i32 @icmp_uge_select_i32(i32 %arg1, i32 %arg2, i32 %val1, i32 %val2) nounwind {
-; CHECK:      icmp_uge_select_i32:
-; CHECK:        ceq
-; CHECK:        clgt
-; CHECK:        or
-; CHECK:        selb $3, $6, $5, $3
-
-entry:
-       %A = icmp uge i32 %arg1, %arg2
-       %B = select i1 %A, i32 %val1, i32 %val2
-       ret i32 %B
-}
-
-define i1 @icmp_uge_setcc_i32(i32 %arg1, i32 %arg2, i32 %val1, i32 %val2) nounwind {
-; CHECK:      icmp_uge_setcc_i32:
-; CHECK:        ceq
-; CHECK:        clgt
-; CHECK:        ilhu
-; CHECK:        or
-; CHECK:        iohl
-; CHECK:        shufb
-
-entry:
-       %A = icmp uge i32 %arg1, %arg2
-       ret i1 %A
-}
-
-;; Note: icmp uge i32 %arg1, <immed> can always be transformed into
-;;       icmp ugt i32 %arg1, <immed>-1
-;;
-;; Consequently, even though the patterns exist to match, it's unlikely
-;; they'll ever be generated.
-
-define i32 @icmp_ult_select_i32(i32 %arg1, i32 %arg2, i32 %val1, i32 %val2) nounwind {
-; CHECK:      icmp_ult_select_i32:
-; CHECK:        ceq
-; CHECK:        clgt
-; CHECK:        nor
-; CHECK:        selb $3, $6, $5, $3
-
-entry:
-       %A = icmp ult i32 %arg1, %arg2
-       %B = select i1 %A, i32 %val1, i32 %val2
-       ret i32 %B
-}
-
-define i1 @icmp_ult_setcc_i32(i32 %arg1, i32 %arg2, i32 %val1, i32 %val2) nounwind {
-; CHECK:      icmp_ult_setcc_i32:
-; CHECK:        ceq
-; CHECK:        clgt
-; CHECK:        ilhu
-; CHECK:        nor
-; CHECK:        iohl
-; CHECK:        shufb
-
-entry:
-       %A = icmp ult i32 %arg1, %arg2
-       ret i1 %A
-}
-
-define i32 @icmp_ult_immed01_i32(i32 %arg1, i32 %val1, i32 %val2) nounwind {
-; CHECK:      icmp_ult_immed01_i32:
-; CHECK:        ceqi
-; CHECK:        clgti
-; CHECK:        nor
-; CHECK:        selb $3, $5, $4, $3
-
-entry:
-       %A = icmp ult i32 %arg1, 511
-       %B = select i1 %A, i32 %val1, i32 %val2
-       ret i32 %B
-}
-
-define i32 @icmp_ult_immed02_i32(i32 %arg1, i32 %val1, i32 %val2) nounwind {
-; CHECK:      icmp_ult_immed02_i32:
-; CHECK:        ceqi
-; CHECK:        clgti
-; CHECK:        nor
-; CHECK:        selb $3, $5, $4, $3
-
-entry:
-       %A = icmp ult i32 %arg1, 4294966784
-       %B = select i1 %A, i32 %val1, i32 %val2
-       ret i32 %B
-}
-
-define i32 @icmp_ult_immed03_i32(i32 %arg1, i32 %val1, i32 %val2) nounwind {
-; CHECK:      icmp_ult_immed03_i32:
-; CHECK:        ceqi
-; CHECK:        clgti
-; CHECK:        nor
-; CHECK:        selb $3, $5, $4, $3
-
-entry:
-       %A = icmp ult i32 %arg1, 4294967293
-       %B = select i1 %A, i32 %val1, i32 %val2
-       ret i32 %B
-}
-
-define i32 @icmp_ult_immed04_i32(i32 %arg1, i32 %val1, i32 %val2) nounwind {
-; CHECK:      icmp_ult_immed04_i32:
-; CHECK:        rotmi
-; CHECK:        ceqi
-; CHECK:        selb $3, $5, $4, $3
-
-entry:
-       %A = icmp ult i32 %arg1, 32768
-       %B = select i1 %A, i32 %val1, i32 %val2
-       ret i32 %B
-}
-
-define i32 @icmp_ule_select_i32(i32 %arg1, i32 %arg2, i32 %val1, i32 %val2) nounwind {
-; CHECK:      icmp_ule_select_i32:
-; CHECK:        clgt
-; CHECK:        selb $3, $5, $6, $3
-
-entry:
-       %A = icmp ule i32 %arg1, %arg2
-       %B = select i1 %A, i32 %val1, i32 %val2
-       ret i32 %B
-}
-
-define i1 @icmp_ule_setcc_i32(i32 %arg1, i32 %arg2, i32 %val1, i32 %val2) nounwind {
-; CHECK:      icmp_ule_setcc_i32:
-; CHECK:        clgt
-; CHECK:        ilhu
-; CHECK:        xori
-; CHECK:        iohl
-; CHECK:        shufb
-
-entry:
-       %A = icmp ule i32 %arg1, %arg2
-       ret i1 %A
-}
-
-;; Note: icmp ule i32 %arg1, <immed> can always be transformed into
-;;       icmp ult i32 %arg1, <immed>+1
-;;
-;; Consequently, even though the patterns exist to match, it's unlikely
-;; they'll ever be generated.
-
-define i32 @icmp_sgt_select_i32(i32 %arg1, i32 %arg2, i32 %val1, i32 %val2) nounwind {
-; CHECK:      icmp_sgt_select_i32:
-; CHECK:        cgt
-; CHECK:        selb $3, $6, $5, $3
-
-entry:
-       %A = icmp sgt i32 %arg1, %arg2
-       %B = select i1 %A, i32 %val1, i32 %val2
-       ret i32 %B
-}
-
-define i1 @icmp_sgt_setcc_i32(i32 %arg1, i32 %arg2, i32 %val1, i32 %val2) nounwind {
-; CHECK:      icmp_sgt_setcc_i32:
-; CHECK:        ilhu
-; CHECK:        cgt
-; CHECK:        iohl
-; CHECK:        shufb
-
-entry:
-       %A = icmp sgt i32 %arg1, %arg2
-       ret i1 %A
-}
-
-define i32 @icmp_sgt_immed01_i32(i32 %arg1, i32 %val1, i32 %val2) nounwind {
-; CHECK:      icmp_sgt_immed01_i32:
-; CHECK:        cgti
-; CHECK:        selb $3, $5, $4, $3
-
-entry:
-       %A = icmp sgt i32 %arg1, 511
-       %B = select i1 %A, i32 %val1, i32 %val2
-       ret i32 %B
-}
-
-define i32 @icmp_sgt_immed02_i32(i32 %arg1, i32 %val1, i32 %val2) nounwind {
-; CHECK:      icmp_sgt_immed02_i32:
-; CHECK:        cgti
-; CHECK:        selb $3, $5, $4, $3
-
-entry:
-       %A = icmp sgt i32 %arg1, 4294966784
-       %B = select i1 %A, i32 %val1, i32 %val2
-       ret i32 %B
-}
-
-define i32 @icmp_sgt_immed03_i32(i32 %arg1, i32 %val1, i32 %val2) nounwind {
-; CHECK:      icmp_sgt_immed03_i32:
-; CHECK:        cgti
-; CHECK:        selb $3, $5, $4, $3
-
-entry:
-       %A = icmp sgt i32 %arg1, 4294967293
-       %B = select i1 %A, i32 %val1, i32 %val2
-       ret i32 %B
-}
-
-define i32 @icmp_sgt_immed04_i32(i32 %arg1, i32 %val1, i32 %val2) nounwind {
-; CHECK:      icmp_sgt_immed04_i32:
-; CHECK:        ila
-; CHECK:        cgt
-; CHECK:        selb $3, $5, $4, $3
-
-entry:
-       %A = icmp sgt i32 %arg1, 32768
-       %B = select i1 %A, i32 %val1, i32 %val2
-       ret i32 %B
-}
-
-define i32 @icmp_sge_select_i32(i32 %arg1, i32 %arg2, i32 %val1, i32 %val2) nounwind {
-; CHECK:      icmp_sge_select_i32:
-; CHECK:        ceq
-; CHECK:        cgt
-; CHECK:        or
-; CHECK:        selb $3, $6, $5, $3
-
-entry:
-       %A = icmp sge i32 %arg1, %arg2
-       %B = select i1 %A, i32 %val1, i32 %val2
-       ret i32 %B
-}
-
-define i1 @icmp_sge_setcc_i32(i32 %arg1, i32 %arg2, i32 %val1, i32 %val2) nounwind {
-; CHECK:      icmp_sge_setcc_i32:
-; CHECK:        ceq
-; CHECK:        cgt
-; CHECK:        ilhu
-; CHECK:        or
-; CHECK:        iohl
-; CHECK:        shufb
-
-entry:
-       %A = icmp sge i32 %arg1, %arg2
-       ret i1 %A
-}
-
-;; Note: icmp sge i32 %arg1, <immed> can always be transformed into
-;;       icmp sgt i32 %arg1, <immed>-1
-;;
-;; Consequently, even though the patterns exist to match, it's unlikely
-;; they'll ever be generated.
-
-define i32 @icmp_slt_select_i32(i32 %arg1, i32 %arg2, i32 %val1, i32 %val2) nounwind {
-; CHECK:      icmp_slt_select_i32:
-; CHECK:        ceq
-; CHECK:        cgt
-; CHECK:        nor
-; CHECK:        selb $3, $6, $5, $3
-
-entry:
-       %A = icmp slt i32 %arg1, %arg2
-       %B = select i1 %A, i32 %val1, i32 %val2
-       ret i32 %B
-}
-
-define i1 @icmp_slt_setcc_i32(i32 %arg1, i32 %arg2, i32 %val1, i32 %val2) nounwind {
-; CHECK:      icmp_slt_setcc_i32:
-; CHECK:        ceq
-; CHECK:        cgt
-; CHECK:        ilhu
-; CHECK:        nor
-; CHECK:        iohl
-; CHECK:        shufb
-
-entry:
-       %A = icmp slt i32 %arg1, %arg2
-       ret i1 %A
-}
-
-define i32 @icmp_slt_immed01_i32(i32 %arg1, i32 %val1, i32 %val2) nounwind {
-; CHECK:      icmp_slt_immed01_i32:
-; CHECK:        ceqi
-; CHECK:        cgti
-; CHECK:        nor
-; CHECK:        selb $3, $5, $4, $3
-
-entry:
-       %A = icmp slt i32 %arg1, 511
-       %B = select i1 %A, i32 %val1, i32 %val2
-       ret i32 %B
-}
-
-define i32 @icmp_slt_immed02_i32(i32 %arg1, i32 %val1, i32 %val2) nounwind {
-; CHECK:      icmp_slt_immed02_i32:
-; CHECK:        ceqi
-; CHECK:        cgti
-; CHECK:        nor
-; CHECK:        selb $3, $5, $4, $3
-
-entry:
-       %A = icmp slt i32 %arg1, -512
-       %B = select i1 %A, i32 %val1, i32 %val2
-       ret i32 %B
-}
-
-define i32 @icmp_slt_immed03_i32(i32 %arg1, i32 %val1, i32 %val2) nounwind {
-; CHECK:      icmp_slt_immed03_i32:
-; CHECK:        ceqi
-; CHECK:        cgti
-; CHECK:        nor
-; CHECK:        selb $3, $5, $4, $3
-
-entry:
-       %A = icmp slt i32 %arg1, -1
-       %B = select i1 %A, i32 %val1, i32 %val2
-       ret i32 %B
-}
-
-define i32 @icmp_slt_immed04_i32(i32 %arg1, i32 %val1, i32 %val2) nounwind {
-; CHECK:      icmp_slt_immed04_i32:
-; CHECK:        ila
-; CHECK:        ceq
-; CHECK:        cgt
-; CHECK:        nor
-; CHECK:        selb $3, $5, $4, $3
-
-entry:
-       %A = icmp slt i32 %arg1, 32768
-       %B = select i1 %A, i32 %val1, i32 %val2
-       ret i32 %B
-}
-
-define i32 @icmp_sle_select_i32(i32 %arg1, i32 %arg2, i32 %val1, i32 %val2) nounwind {
-; CHECK:      icmp_sle_select_i32:
-; CHECK:        cgt
-; CHECK:        selb $3, $5, $6, $3
-
-entry:
-       %A = icmp sle i32 %arg1, %arg2
-       %B = select i1 %A, i32 %val1, i32 %val2
-       ret i32 %B
-}
-
-define i1 @icmp_sle_setcc_i32(i32 %arg1, i32 %arg2, i32 %val1, i32 %val2) nounwind {
-; CHECK:      icmp_sle_setcc_i32:
-; CHECK:        cgt
-; CHECK:        ilhu
-; CHECK:        xori
-; CHECK:        iohl
-; CHECK:        shufb
-
-entry:
-       %A = icmp sle i32 %arg1, %arg2
-       ret i1 %A
-}
-
-;; Note: icmp sle i32 %arg1, <immed> can always be transformed into
-;;       icmp slt i32 %arg1, <immed>+1
-;;
-;; Consequently, even though the patterns exist to match, it's unlikely
-;; they'll ever be generated.
-
diff --git a/test/CodeGen/CellSPU/icmp64.ll b/test/CodeGen/CellSPU/icmp64.ll
deleted file mode 100644
index 9dd2cdc0dea9..000000000000
--- a/test/CodeGen/CellSPU/icmp64.ll
+++ /dev/null
@@ -1,146 +0,0 @@
-; RUN: llc < %s -march=cellspu > %t1.s
-; RUN: grep ceq                                %t1.s | count 20
-; RUN: grep cgti                               %t1.s | count 12
-; RUN: grep cgt                                %t1.s | count 16
-; RUN: grep clgt                               %t1.s | count 12
-; RUN: grep gb                                 %t1.s | count 12
-; RUN: grep fsm                                %t1.s | count 10
-; RUN: grep xori                               %t1.s | count 5
-; RUN: grep selb                               %t1.s | count 18
-
-target datalayout = "E-p:32:32:128-f64:64:128-f32:32:128-i64:32:128-i32:32:128-i16:16:128-i8:8:128-i1:8:128-a0:0:128-v128:128:128-s0:128:128"
-target triple = "spu"
-
-; $3 = %arg1, $4 = %arg2, $5 = %val1, $6 = %val2
-; $3 = %arg1, $4 = %val1, $5 = %val2
-;
-; i64 integer comparisons:
-define i64 @icmp_eq_select_i64(i64 %arg1, i64 %arg2, i64 %val1, i64 %val2) nounwind {
-entry:
-       %A = icmp eq i64 %arg1, %arg2
-       %B = select i1 %A, i64 %val1, i64 %val2
-       ret i64 %B
-}
-
-define i1 @icmp_eq_setcc_i64(i64 %arg1, i64 %arg2, i64 %val1, i64 %val2) nounwind {
-entry:
-       %A = icmp eq i64 %arg1, %arg2
-       ret i1 %A
-}
-
-define i64 @icmp_ne_select_i64(i64 %arg1, i64 %arg2, i64 %val1, i64 %val2) nounwind {
-entry:
-       %A = icmp ne i64 %arg1, %arg2
-       %B = select i1 %A, i64 %val1, i64 %val2
-       ret i64 %B
-}
-
-define i1 @icmp_ne_setcc_i64(i64 %arg1, i64 %arg2, i64 %val1, i64 %val2) nounwind {
-entry:
-       %A = icmp ne i64 %arg1, %arg2
-       ret i1 %A
-}
-
-define i64 @icmp_ugt_select_i64(i64 %arg1, i64 %arg2, i64 %val1, i64 %val2) nounwind {
-entry:
-       %A = icmp ugt i64 %arg1, %arg2
-       %B = select i1 %A, i64 %val1, i64 %val2
-       ret i64 %B
-}
-
-define i1 @icmp_ugt_setcc_i64(i64 %arg1, i64 %arg2, i64 %val1, i64 %val2) nounwind {
-entry:
-       %A = icmp ugt i64 %arg1, %arg2
-       ret i1 %A
-}
-
-define i64 @icmp_uge_select_i64(i64 %arg1, i64 %arg2, i64 %val1, i64 %val2) nounwind {
-entry:
-       %A = icmp uge i64 %arg1, %arg2
-       %B = select i1 %A, i64 %val1, i64 %val2
-       ret i64 %B
-}
-
-define i1 @icmp_uge_setcc_i64(i64 %arg1, i64 %arg2, i64 %val1, i64 %val2) nounwind {
-entry:
-       %A = icmp uge i64 %arg1, %arg2
-       ret i1 %A
-}
-
-define i64 @icmp_ult_select_i64(i64 %arg1, i64 %arg2, i64 %val1, i64 %val2) nounwind {
-entry:
-       %A = icmp ult i64 %arg1, %arg2
-       %B = select i1 %A, i64 %val1, i64 %val2
-       ret i64 %B
-}
-
-define i1 @icmp_ult_setcc_i64(i64 %arg1, i64 %arg2, i64 %val1, i64 %val2) nounwind {
-entry:
-       %A = icmp ult i64 %arg1, %arg2
-       ret i1 %A
-}
-
-define i64 @icmp_ule_select_i64(i64 %arg1, i64 %arg2, i64 %val1, i64 %val2) nounwind {
-entry:
-       %A = icmp ule i64 %arg1, %arg2
-       %B = select i1 %A, i64 %val1, i64 %val2
-       ret i64 %B
-}
-
-define i1 @icmp_ule_setcc_i64(i64 %arg1, i64 %arg2, i64 %val1, i64 %val2) nounwind {
-entry:
-       %A = icmp ule i64 %arg1, %arg2
-       ret i1 %A
-}
-
-define i64 @icmp_sgt_select_i64(i64 %arg1, i64 %arg2, i64 %val1, i64 %val2) nounwind {
-entry:
-       %A = icmp sgt i64 %arg1, %arg2
-       %B = select i1 %A, i64 %val1, i64 %val2
-       ret i64 %B
-}
-
-define i1 @icmp_sgt_setcc_i64(i64 %arg1, i64 %arg2, i64 %val1, i64 %val2) nounwind {
-entry:
-       %A = icmp sgt i64 %arg1, %arg2
-       ret i1 %A
-}
-
-define i64 @icmp_sge_select_i64(i64 %arg1, i64 %arg2, i64 %val1, i64 %val2) nounwind {
-entry:
-       %A = icmp sge i64 %arg1, %arg2
-       %B = select i1 %A, i64 %val1, i64 %val2
-       ret i64 %B
-}
-
-define i1 @icmp_sge_setcc_i64(i64 %arg1, i64 %arg2, i64 %val1, i64 %val2) nounwind {
-entry:
-       %A = icmp sge i64 %arg1, %arg2
-       ret i1 %A
-}
-
-define i64 @icmp_slt_select_i64(i64 %arg1, i64 %arg2, i64 %val1, i64 %val2) nounwind {
-entry:
-       %A = icmp slt i64 %arg1, %arg2
-       %B = select i1 %A, i64 %val1, i64 %val2
-       ret i64 %B
-}
-
-define i1 @icmp_slt_setcc_i64(i64 %arg1, i64 %arg2, i64 %val1, i64 %val2) nounwind {
-entry:
-       %A = icmp slt i64 %arg1, %arg2
-       ret i1 %A
-}
-
-define i64 @icmp_sle_select_i64(i64 %arg1, i64 %arg2, i64 %val1, i64 %val2) nounwind {
-entry:
-       %A = icmp sle i64 %arg1, %arg2
-       %B = select i1 %A, i64 %val1, i64 %val2
-       ret i64 %B
-}
-
-define i1 @icmp_sle_setcc_i64(i64 %arg1, i64 %arg2, i64 %val1, i64 %val2) nounwind {
-entry:
-       %A = icmp sle i64 %arg1, %arg2
-       ret i1 %A
-}
diff --git a/test/CodeGen/CellSPU/icmp8.ll b/test/CodeGen/CellSPU/icmp8.ll
deleted file mode 100644
index 1db641e5a853..000000000000
--- a/test/CodeGen/CellSPU/icmp8.ll
+++ /dev/null
@@ -1,446 +0,0 @@
-; RUN: llc < %s -march=cellspu | FileCheck %s
-
-target datalayout = "E-p:32:32:128-f64:64:128-f32:32:128-i64:32:128-i32:32:128-i16:16:128-i8:8:128-i1:8:128-a0:0:128-v128:128:128-s0:128:128"
-target triple = "spu"
-
-; $3 = %arg1, $4 = %arg2, $5 = %val1, $6 = %val2
-; $3 = %arg1, $4 = %val1, $5 = %val2
-;
-; For "positive" comparisons:
-; selb $3, $6, $5, <i1>
-; selb $3, $5, $4, <i1>
-;
-; For "negative" comparisons, i.e., those where the result of the comparison
-; must be inverted (setne, for example):
-; selb $3, $5, $6, <i1>
-; selb $3, $4, $5, <i1>
-
-; i8 integer comparisons:
-define i8 @icmp_eq_select_i8(i8 %arg1, i8 %arg2, i8 %val1, i8 %val2) nounwind {
-; CHECK:      icmp_eq_select_i8:
-; CHECK:        ceqb
-; CHECK:        selb $3, $6, $5, $3
-
-entry:
-       %A = icmp eq i8 %arg1, %arg2
-       %B = select i1 %A, i8 %val1, i8 %val2
-       ret i8 %B
-}
-
-define i1 @icmp_eq_setcc_i8(i8 %arg1, i8 %arg2, i8 %val1, i8 %val2) nounwind {
-; CHECK:      icmp_eq_setcc_i8:
-; CHECK:        ceqb
-; CHECK-NEXT:   bi
-
-entry:
-       %A = icmp eq i8 %arg1, %arg2
-       ret i1 %A
-}
-
-define i8 @icmp_eq_immed01_i8(i8 %arg1, i8 %val1, i8 %val2) nounwind {
-; CHECK:      icmp_eq_immed01_i8:
-; CHECK:        ceqbi
-; CHECK:        selb $3, $5, $4, $3
-
-entry:
-       %A = icmp eq i8 %arg1, 127
-       %B = select i1 %A, i8 %val1, i8 %val2
-       ret i8 %B
-}
-
-define i8 @icmp_eq_immed02_i8(i8 %arg1, i8 %val1, i8 %val2) nounwind {
-; CHECK:      icmp_eq_immed02_i8:
-; CHECK:        ceqbi
-; CHECK:        selb $3, $5, $4, $3
-
-entry:
-       %A = icmp eq i8 %arg1, -128
-       %B = select i1 %A, i8 %val1, i8 %val2
-       ret i8 %B
-}
-
-define i8 @icmp_eq_immed03_i8(i8 %arg1, i8 %val1, i8 %val2) nounwind {
-; CHECK:      icmp_eq_immed03_i8:
-; CHECK:        ceqbi
-; CHECK:        selb $3, $5, $4, $3
-
-entry:
-       %A = icmp eq i8 %arg1, -1
-       %B = select i1 %A, i8 %val1, i8 %val2
-       ret i8 %B
-}
-
-define i8 @icmp_ne_select_i8(i8 %arg1, i8 %arg2, i8 %val1, i8 %val2) nounwind {
-; CHECK:      icmp_ne_select_i8:
-; CHECK:        ceqb
-; CHECK:        selb $3, $5, $6, $3
-
-entry:
-       %A = icmp ne i8 %arg1, %arg2
-       %B = select i1 %A, i8 %val1, i8 %val2
-       ret i8 %B
-}
-
-define i1 @icmp_ne_setcc_i8(i8 %arg1, i8 %arg2, i8 %val1, i8 %val2) nounwind {
-; CHECK:      icmp_ne_setcc_i8:
-; CHECK:        ceqb
-; CHECK:        xorbi
-; CHECK-NEXT:   bi
-
-entry:
-       %A = icmp ne i8 %arg1, %arg2
-       ret i1 %A
-}
-
-define i8 @icmp_ne_immed01_i8(i8 %arg1, i8 %val1, i8 %val2) nounwind {
-; CHECK:      icmp_ne_immed01_i8:
-; CHECK:        ceqbi
-; CHECK:        selb $3, $4, $5, $3
-
-entry:
-       %A = icmp ne i8 %arg1, 127
-       %B = select i1 %A, i8 %val1, i8 %val2
-       ret i8 %B
-}
-
-define i8 @icmp_ne_immed02_i8(i8 %arg1, i8 %val1, i8 %val2) nounwind {
-; CHECK:      icmp_ne_immed02_i8:
-; CHECK:        ceqbi
-; CHECK:        selb $3, $4, $5, $3
-
-entry:
-       %A = icmp ne i8 %arg1, -128
-       %B = select i1 %A, i8 %val1, i8 %val2
-       ret i8 %B
-}
-
-define i8 @icmp_ne_immed03_i8(i8 %arg1, i8 %val1, i8 %val2) nounwind {
-; CHECK:      icmp_ne_immed03_i8:
-; CHECK:        ceqbi
-; CHECK:        selb $3, $4, $5, $3
-
-entry:
-       %A = icmp ne i8 %arg1, -1
-       %B = select i1 %A, i8 %val1, i8 %val2
-       ret i8 %B
-}
-
-define i8 @icmp_ugt_select_i8(i8 %arg1, i8 %arg2, i8 %val1, i8 %val2) nounwind {
-; CHECK:      icmp_ugt_select_i8:
-; CHECK:        clgtb
-; CHECK:        selb $3, $6, $5, $3
-
-entry:
-       %A = icmp ugt i8 %arg1, %arg2
-       %B = select i1 %A, i8 %val1, i8 %val2
-       ret i8 %B
-}
-
-define i1 @icmp_ugt_setcc_i8(i8 %arg1, i8 %arg2, i8 %val1, i8 %val2) nounwind {
-; CHECK:      icmp_ugt_setcc_i8:
-; CHECK:        clgtb
-; CHECK-NEXT:   bi
-
-entry:
-       %A = icmp ugt i8 %arg1, %arg2
-       ret i1 %A
-}
-
-define i8 @icmp_ugt_immed01_i8(i8 %arg1, i8 %val1, i8 %val2) nounwind {
-; CHECK:      icmp_ugt_immed01_i8:
-; CHECK:        clgtbi
-; CHECK:        selb $3, $5, $4, $3
-
-entry:
-       %A = icmp ugt i8 %arg1, 126
-       %B = select i1 %A, i8 %val1, i8 %val2
-       ret i8 %B
-}
-
-define i8 @icmp_uge_select_i8(i8 %arg1, i8 %arg2, i8 %val1, i8 %val2) nounwind {
-; CHECK:      icmp_uge_select_i8:
-; CHECK:        ceqb
-; CHECK:        clgtb
-; CHECK:        or
-; CHECK:        selb $3, $6, $5, $3
-
-entry:
-       %A = icmp uge i8 %arg1, %arg2
-       %B = select i1 %A, i8 %val1, i8 %val2
-       ret i8 %B
-}
-
-define i1 @icmp_uge_setcc_i8(i8 %arg1, i8 %arg2, i8 %val1, i8 %val2) nounwind {
-; CHECK:      icmp_uge_setcc_i8:
-; CHECK:        ceqb
-; CHECK:        clgtb
-; CHECK:        or
-; CHECK-NEXT:   bi
-
-entry:
-       %A = icmp uge i8 %arg1, %arg2
-       ret i1 %A
-}
-
-;; Note: icmp uge i8 %arg1, <immed> can always be transformed into
-;;       icmp ugt i8 %arg1, <immed>-1
-;;
-;; Consequently, even though the patterns exist to match, it's unlikely
-;; they'll ever be generated.
-
-define i8 @icmp_ult_select_i8(i8 %arg1, i8 %arg2, i8 %val1, i8 %val2) nounwind {
-; CHECK:      icmp_ult_select_i8:
-; CHECK:        ceqb
-; CHECK:        clgtb
-; CHECK:        nor
-; CHECK:        selb $3, $6, $5, $3
-
-entry:
-       %A = icmp ult i8 %arg1, %arg2
-       %B = select i1 %A, i8 %val1, i8 %val2
-       ret i8 %B
-}
-
-define i1 @icmp_ult_setcc_i8(i8 %arg1, i8 %arg2, i8 %val1, i8 %val2) nounwind {
-; CHECK:      icmp_ult_setcc_i8:
-; CHECK:        ceqb
-; CHECK:        clgtb
-; CHECK:        nor
-; CHECK-NEXT:   bi
-
-entry:
-       %A = icmp ult i8 %arg1, %arg2
-       ret i1 %A
-}
-
-define i8 @icmp_ult_immed01_i8(i8 %arg1, i8 %val1, i8 %val2) nounwind {
-; CHECK:      icmp_ult_immed01_i8:
-; CHECK:        ceqbi
-; CHECK:        clgtbi
-; CHECK:        nor
-; CHECK:        selb $3, $5, $4, $3
-
-entry:
-       %A = icmp ult i8 %arg1, 253
-       %B = select i1 %A, i8 %val1, i8 %val2
-       ret i8 %B
-}
-
-define i8 @icmp_ult_immed02_i8(i8 %arg1, i8 %val1, i8 %val2) nounwind {
-; CHECK:      icmp_ult_immed02_i8:
-; CHECK:        ceqbi
-; CHECK:        clgtbi
-; CHECK:        nor
-; CHECK:        selb $3, $5, $4, $3
-
-entry:
-       %A = icmp ult i8 %arg1, 129
-       %B = select i1 %A, i8 %val1, i8 %val2
-       ret i8 %B
-}
-
-define i8 @icmp_ule_select_i8(i8 %arg1, i8 %arg2, i8 %val1, i8 %val2) nounwind {
-; CHECK:      icmp_ule_select_i8:
-; CHECK:        clgtb
-; CHECK:        selb $3, $5, $6, $3
-
-entry:
-       %A = icmp ule i8 %arg1, %arg2
-       %B = select i1 %A, i8 %val1, i8 %val2
-       ret i8 %B
-}
-
-define i1 @icmp_ule_setcc_i8(i8 %arg1, i8 %arg2, i8 %val1, i8 %val2) nounwind {
-; CHECK:      icmp_ule_setcc_i8:
-; CHECK:        clgtb
-; CHECK:        xorbi
-; CHECK-NEXT:   bi
-
-entry:
-       %A = icmp ule i8 %arg1, %arg2
-       ret i1 %A
-}
-
-;; Note: icmp ule i8 %arg1, <immed> can always be transformed into
-;;       icmp ult i8 %arg1, <immed>+1
-;;
-;; Consequently, even though the patterns exist to match, it's unlikely
-;; they'll ever be generated.
-
-define i8 @icmp_sgt_select_i8(i8 %arg1, i8 %arg2, i8 %val1, i8 %val2) nounwind {
-; CHECK:      icmp_sgt_select_i8:
-; CHECK:        cgtb
-; CHECK:        selb $3, $6, $5, $3
-
-entry:
-       %A = icmp sgt i8 %arg1, %arg2
-       %B = select i1 %A, i8 %val1, i8 %val2
-       ret i8 %B
-}
-
-define i1 @icmp_sgt_setcc_i8(i8 %arg1, i8 %arg2, i8 %val1, i8 %val2) nounwind {
-; CHECK:      icmp_sgt_setcc_i8:
-; CHECK:        cgtb
-; CHECK-NEXT:   bi
-
-entry:
-       %A = icmp sgt i8 %arg1, %arg2
-       ret i1 %A
-}
-
-define i8 @icmp_sgt_immed01_i8(i8 %arg1, i8 %val1, i8 %val2) nounwind {
-; CHECK:      icmp_sgt_immed01_i8:
-; CHECK:        cgtbi
-; CHECK:        selb $3, $5, $4, $3
-
-entry:
-       %A = icmp sgt i8 %arg1, 96
-       %B = select i1 %A, i8 %val1, i8 %val2
-       ret i8 %B
-}
-
-define i8 @icmp_sgt_immed02_i8(i8 %arg1, i8 %val1, i8 %val2) nounwind {
-; CHECK:      icmp_sgt_immed02_i8:
-; CHECK:        cgtbi
-; CHECK:        selb $3, $5, $4, $3
-
-entry:
-       %A = icmp sgt i8 %arg1, -1
-       %B = select i1 %A, i8 %val1, i8 %val2
-       ret i8 %B
-}
-
-define i8 @icmp_sgt_immed03_i8(i8 %arg1, i8 %val1, i8 %val2) nounwind {
-; CHECK:      icmp_sgt_immed03_i8:
-; CHECK:        ceqbi
-; CHECK:        selb $3, $4, $5, $3
-
-entry:
-       %A = icmp sgt i8 %arg1, -128
-       %B = select i1 %A, i8 %val1, i8 %val2
-       ret i8 %B
-}
-
-define i8 @icmp_sge_select_i8(i8 %arg1, i8 %arg2, i8 %val1, i8 %val2) nounwind {
-; CHECK:      icmp_sge_select_i8:
-; CHECK:        ceqb
-; CHECK:        cgtb
-; CHECK:        or
-; CHECK:        selb $3, $6, $5, $3
-
-entry:
-       %A = icmp sge i8 %arg1, %arg2
-       %B = select i1 %A, i8 %val1, i8 %val2
-       ret i8 %B
-}
-
-define i1 @icmp_sge_setcc_i8(i8 %arg1, i8 %arg2, i8 %val1, i8 %val2) nounwind {
-; CHECK:      icmp_sge_setcc_i8:
-; CHECK:        ceqb
-; CHECK:        cgtb
-; CHECK:        or
-; CHECK-NEXT:   bi
-
-entry:
-       %A = icmp sge i8 %arg1, %arg2
-       ret i1 %A
-}
-
-;; Note: icmp sge i8 %arg1, <immed> can always be transformed into
-;;       icmp sgt i8 %arg1, <immed>-1
-;;
-;; Consequently, even though the patterns exist to match, it's unlikely
-;; they'll ever be generated.
-
-define i8 @icmp_slt_select_i8(i8 %arg1, i8 %arg2, i8 %val1, i8 %val2) nounwind {
-; CHECK:      icmp_slt_select_i8:
-; CHECK:        ceqb
-; CHECK:        cgtb
-; CHECK:        nor
-; CHECK:        selb $3, $6, $5, $3
-
-entry:
-       %A = icmp slt i8 %arg1, %arg2
-       %B = select i1 %A, i8 %val1, i8 %val2
-       ret i8 %B
-}
-
-define i1 @icmp_slt_setcc_i8(i8 %arg1, i8 %arg2, i8 %val1, i8 %val2) nounwind {
-; CHECK:      icmp_slt_setcc_i8:
-; CHECK:        ceqb
-; CHECK:        cgtb
-; CHECK:        nor
-; CHECK-NEXT:   bi
-
-entry:
-       %A = icmp slt i8 %arg1, %arg2
-       ret i1 %A
-}
-
-define i8 @icmp_slt_immed01_i8(i8 %arg1, i8 %val1, i8 %val2) nounwind {
-; CHECK:      icmp_slt_immed01_i8:
-; CHECK:        ceqbi
-; CHECK:        cgtbi
-; CHECK:        nor
-; CHECK:        selb $3, $5, $4, $3
-
-entry:
-       %A = icmp slt i8 %arg1, 96
-       %B = select i1 %A, i8 %val1, i8 %val2
-       ret i8 %B
-}
-
-define i8 @icmp_slt_immed02_i8(i8 %arg1, i8 %val1, i8 %val2) nounwind {
-; CHECK:      icmp_slt_immed02_i8:
-; CHECK:        ceqbi
-; CHECK:        cgtbi
-; CHECK:        nor
-; CHECK:        selb $3, $5, $4, $3
-
-entry:
-       %A = icmp slt i8 %arg1, -120
-       %B = select i1 %A, i8 %val1, i8 %val2
-       ret i8 %B
-}
-
-define i8 @icmp_slt_immed03_i8(i8 %arg1, i8 %val1, i8 %val2) nounwind {
-; CHECK:      icmp_slt_immed03_i8:
-; CHECK:        ceqbi
-; CHECK:        cgtbi
-; CHECK:        nor
-; CHECK:        selb $3, $5, $4, $3
-
-entry:
-       %A = icmp slt i8 %arg1, -1
-       %B = select i1 %A, i8 %val1, i8 %val2
-       ret i8 %B
-}
-
-define i8 @icmp_sle_select_i8(i8 %arg1, i8 %arg2, i8 %val1, i8 %val2) nounwind {
-; CHECK:      icmp_sle_select_i8:
-; CHECK:        cgtb
-; CHECK:        selb $3, $5, $6, $3
-
-entry:
-       %A = icmp sle i8 %arg1, %arg2
-       %B = select i1 %A, i8 %val1, i8 %val2
-       ret i8 %B
-}
-
-define i1 @icmp_sle_setcc_i8(i8 %arg1, i8 %arg2, i8 %val1, i8 %val2) nounwind {
-; CHECK:      icmp_sle_setcc_i8:
-; CHECK:        cgtb
-; CHECK:        xorbi
-; CHECK-NEXT:   bi
-
-entry:
-       %A = icmp sle i8 %arg1, %arg2
-       ret i1 %A
-}
-
-;; Note: icmp sle i8 %arg1, <immed> can always be transformed into
-;;       icmp slt i8 %arg1, <immed>+1
-;;
-;; Consequently, even though the patterns exist to match, it's unlikely
-;; they'll ever be generated.
-
diff --git a/test/CodeGen/CellSPU/immed16.ll b/test/CodeGen/CellSPU/immed16.ll
deleted file mode 100644
index 077d07169e45..000000000000
--- a/test/CodeGen/CellSPU/immed16.ll
+++ /dev/null
@@ -1,40 +0,0 @@
-; RUN: llc < %s -march=cellspu > %t1.s
-; RUN: grep "ilh" %t1.s | count 11
-target datalayout = "E-p:32:32:128-f64:64:128-f32:32:128-i64:32:128-i32:32:128-i16:16:128-i8:8:128-i1:8:128-a0:0:128-v128:128:128-s0:128:128"
-target triple = "spu"
-
-define i16 @test_1() {
-  %x = alloca i16, align 16
-  store i16 419, i16* %x        ;; ILH via pattern
-  ret i16 0
-}
-
-define i16 @test_2() {
-  %x = alloca i16, align 16
-  store i16 1023, i16* %x       ;; ILH via pattern
-  ret i16 0
-}
-
-define i16 @test_3() {
-  %x = alloca i16, align 16
-  store i16 -1023, i16* %x      ;; ILH via pattern
-  ret i16 0
-}
-
-define i16 @test_4() {
-  %x = alloca i16, align 16
-  store i16 32767, i16* %x      ;; ILH via pattern
-  ret i16 0
-}
-
-define i16 @test_5() {
-  %x = alloca i16, align 16
-  store i16 -32768, i16* %x     ;; ILH via pattern
-  ret i16 0
-}
-
-define i16 @test_6() {
-  ret i16 0
-}
-
-
diff --git a/test/CodeGen/CellSPU/immed32.ll b/test/CodeGen/CellSPU/immed32.ll
deleted file mode 100644
index 8e48f0b52c17..000000000000
--- a/test/CodeGen/CellSPU/immed32.ll
+++ /dev/null
@@ -1,83 +0,0 @@
-; RUN: llc < %s -march=cellspu > %t1.s
-; RUN: grep ilhu  %t1.s | count 9
-; RUN: grep iohl  %t1.s | count 7
-; RUN: grep -w il    %t1.s | count 3
-; RUN: grep 16429 %t1.s | count 1
-; RUN: grep 63572 %t1.s | count 1
-; RUN: grep   128 %t1.s | count 1
-; RUN: grep 32639 %t1.s | count 1
-; RUN: grep 65535 %t1.s | count 1
-; RUN: grep 16457 %t1.s | count 1
-; RUN: grep  4059 %t1.s | count 1
-; RUN: grep 49077 %t1.s | count 1
-; RUN: grep  1267 %t1.s | count 2
-; RUN: grep 16309 %t1.s | count 1
-; RUN: cat %t1.s | FileCheck %s
-target datalayout = "E-p:32:32:128-f64:64:128-f32:32:128-i64:32:128-i32:32:128-i16:16:128-i8:8:128-i1:8:128-a0:0:128-v128:128:128-s0:128:128"
-target triple = "spu"
-
-define i32 @test_1() {
-  ret i32 4784128               ;; ILHU via pattern (0x49000)
-}
-
-define i32 @test_2() {
-  ret i32 5308431               ;; ILHU/IOHL via pattern (0x5100f)
-}
-
-define i32 @test_3() {
-  ret i32 511                   ;; IL via pattern
-}
-
-define i32 @test_4() {
-  ret i32 -512                  ;; IL via pattern
-}
-
-define i32 @test_5()
-{
-;CHECK: test_5:
-;CHECK-NOT: ila $3, 40000
-;CHECK: ilhu
-;CHECK: iohl
-;CHECK: bi $lr
-  ret i32 400000
-}
-
-;; double             float       floatval
-;; 0x4005bf0a80000000 0x402d|f854 2.718282
-define float @float_const_1() {
-  ret float 0x4005BF0A80000000  ;; ILHU/IOHL
-}
-
-;; double             float       floatval
-;; 0x3810000000000000 0x0080|0000 0.000000
-define float @float_const_2() {
-  ret float 0x3810000000000000  ;; IL 128
-}
-
-;; double             float       floatval
-;; 0x47efffffe0000000 0x7f7f|ffff NaN
-define float @float_const_3() {
-  ret float 0x47EFFFFFE0000000  ;; ILHU/IOHL via pattern
-}
-
-;; double             float       floatval
-;; 0x400921fb60000000 0x4049|0fdb 3.141593
-define float @float_const_4() {
-  ret float 0x400921FB60000000  ;; ILHU/IOHL via pattern
-}
-
-;; double             float       floatval
-;; 0xbff6a09e60000000 0xbfb5|04f3 -1.414214
-define float @float_const_5() {
-  ret float 0xBFF6A09E60000000  ;; ILHU/IOHL via pattern
-}
-
-;; double             float       floatval
-;; 0x3ff6a09e60000000 0x3fb5|04f3 1.414214
-define float @float_const_6() {
-  ret float 0x3FF6A09E60000000  ;; ILHU/IOHL via pattern
-}
-
-define float @float_const_7() {
-  ret float 0.000000e+00        ;; IL 0 via pattern
-}
diff --git a/test/CodeGen/CellSPU/immed64.ll b/test/CodeGen/CellSPU/immed64.ll
deleted file mode 100644
index fd483651756e..000000000000
--- a/test/CodeGen/CellSPU/immed64.ll
+++ /dev/null
@@ -1,95 +0,0 @@
-; RUN: llc < %s -march=cellspu > %t1.s
-; RUN: grep lqa        %t1.s | count 13
-; RUN: grep ilhu       %t1.s | count 15
-; RUN: grep ila        %t1.s | count 1
-; RUN: grep -w il      %t1.s | count 6
-; RUN: grep shufb      %t1.s | count 13
-; RUN: grep      65520 %t1.s | count  1
-; RUN: grep      43981 %t1.s | count  1
-; RUN: grep      13702 %t1.s | count  1
-; RUN: grep      28225 %t1.s | count  1
-; RUN: grep      30720 %t1.s | count  1
-; RUN: grep 3233857728 %t1.s | count  8
-; RUN: grep 2155905152 %t1.s | count  6
-; RUN: grep      66051 %t1.s | count  7
-; RUN: grep  471670303 %t1.s | count 11
-
-target datalayout = "E-p:32:32:128-f64:64:128-f32:32:128-i64:32:128-i32:32:128-i16:16:128-i8:8:128-i1:8:128-a0:0:128-v128:128:128-s0:128:128"
-target triple = "spu"
-
-;  1311768467750121234 => 0x 12345678 abcdef12 (4660,22136/43981,61202)
-; 18446744073709551591 => 0x ffffffff ffffffe7 (-25)
-; 18446744073708516742 => 0x ffffffff fff03586 (-1034874)
-;              5308431 => 0x 00000000 0051000F
-;  9223372038704560128 => 0x 80000000 6e417800
-
-define i64 @i64_const_1() {
-  ret i64  1311768467750121234          ;; Constant pool spill
-}
-
-define i64 @i64_const_2() {
-  ret i64 18446744073709551591          ;; IL/SHUFB
-}
-
-define i64 @i64_const_3() {
-  ret i64 18446744073708516742          ;; IHLU/IOHL/SHUFB
-}
-
-define i64 @i64_const_4() {
-  ret i64              5308431          ;; ILHU/IOHL/SHUFB
-}
-
-define i64 @i64_const_5() {
-  ret i64                  511          ;; IL/SHUFB
-}
-
-define i64 @i64_const_6() {
-  ret i64                 -512          ;; IL/SHUFB
-}
-
-define i64 @i64_const_7() {
-  ret i64  9223372038704560128          ;; IHLU/IOHL/SHUFB
-}
-
-define i64 @i64_const_8() {
-  ret i64 0                             ;; IL
-}
-
-define i64 @i64_const_9() {
-  ret i64 -1                            ;; IL
-}
-
-define i64 @i64_const_10() {
-  ret i64 281470681808895                ;; IL 65535
-}
-
-; 0x4005bf0a8b145769 ->
-;   (ILHU 0x4005 [16389]/IOHL 0xbf0a [48906])
-;   (ILHU 0x8b14 [35604]/IOHL 0x5769 [22377])
-define double @f64_const_1() {
- ret double 0x4005bf0a8b145769        ;; ILHU/IOHL via pattern
-}
- 
-define double @f64_const_2() {
- ret double 0x0010000000000000
-}
-
-define double @f64_const_3() {
- ret double 0x7fefffffffffffff
-}
-
-define double @f64_const_4() {
- ret double 0x400921fb54442d18
-}
- 
-define double @f64_const_5() {
-  ret double 0xbff6a09e667f3bcd         ;; ILHU/IOHL via pattern
-}
- 
-define double @f64_const_6() {
-  ret double 0x3ff6a09e667f3bcd
-}
-
-define double @f64_const_7() {
-  ret double 0.000000e+00
-}
diff --git a/test/CodeGen/CellSPU/int2fp.ll b/test/CodeGen/CellSPU/int2fp.ll
deleted file mode 100644
index 984c017c96d1..000000000000
--- a/test/CodeGen/CellSPU/int2fp.ll
+++ /dev/null
@@ -1,41 +0,0 @@
-; RUN: llc < %s -march=cellspu > %t1.s
-; RUN: grep csflt %t1.s | count 5
-; RUN: grep cuflt %t1.s | count 1
-; RUN: grep xshw  %t1.s | count 2
-; RUN: grep xsbh  %t1.s | count 1
-; RUN: grep and   %t1.s | count 2
-; RUN: grep andi  %t1.s | count 1
-; RUN: grep ila   %t1.s | count 1
-
-target datalayout = "E-p:32:32:128-f64:64:128-f32:32:128-i64:32:128-i32:32:128-i16:16:128-i8:8:128-i1:8:128-a0:0:128-v128:128:128-s0:128:128"
-target triple = "spu"
-
-define float @sitofp_i32(i32 %arg1) {
-        %A = sitofp i32 %arg1 to float          ; <float> [#uses=1]
-        ret float %A
-}
-
-define float @uitofp_u32(i32 %arg1) {
-        %A = uitofp i32 %arg1 to float          ; <float> [#uses=1]
-        ret float %A
-}
-
-define float @sitofp_i16(i16 %arg1) {
-        %A = sitofp i16 %arg1 to float          ; <float> [#uses=1]
-        ret float %A
-}
-
-define float @uitofp_i16(i16 %arg1) {
-        %A = uitofp i16 %arg1 to float          ; <float> [#uses=1]
-        ret float %A
-}
-
-define float @sitofp_i8(i8 %arg1) {
-        %A = sitofp i8 %arg1 to float           ; <float> [#uses=1]
-        ret float %A
-}
-
-define float @uitofp_i8(i8 %arg1) {
-        %A = uitofp i8 %arg1 to float           ; <float> [#uses=1]
-        ret float %A
-}
diff --git a/test/CodeGen/CellSPU/intrinsics_branch.ll b/test/CodeGen/CellSPU/intrinsics_branch.ll
deleted file mode 100644
index b0f6a6247e41..000000000000
--- a/test/CodeGen/CellSPU/intrinsics_branch.ll
+++ /dev/null
@@ -1,150 +0,0 @@
-; RUN: llc < %s -march=cellspu > %t1.s
-; RUN: grep ceq     %t1.s | count 30 
-; RUN: grep ceqb    %t1.s | count 10
-; RUN: grep ceqhi   %t1.s | count 5
-; RUN: grep ceqi    %t1.s | count 5
-; RUN: grep cgt     %t1.s | count 30
-; RUN: grep cgtb    %t1.s | count 10
-; RUN: grep cgthi   %t1.s | count 5
-; RUN: grep cgti    %t1.s | count 5
-target datalayout = "E-p:32:32:128-f64:64:128-f32:32:128-i64:32:128-i32:32:128-i16:16:128-i8:8:128-i1:8:128-a0:0:128-v128:128:128-s0:128:128"
-target triple = "spu"
-
-declare <4 x i32> @llvm.spu.si.shli(<4 x i32>, i8)
-
-declare <4 x i32> @llvm.spu.si.ceq(<4 x i32>, <4 x i32>)
-declare <16 x i8> @llvm.spu.si.ceqb(<16 x i8>, <16 x i8>)
-declare <8 x i16> @llvm.spu.si.ceqh(<8 x i16>, <8 x i16>)
-declare <4 x i32> @llvm.spu.si.ceqi(<4 x i32>, i16)
-declare <8 x i16> @llvm.spu.si.ceqhi(<8 x i16>, i16)
-declare <16 x i8> @llvm.spu.si.ceqbi(<16 x i8>, i8)
-
-declare <4 x i32> @llvm.spu.si.cgt(<4 x i32>, <4 x i32>)
-declare <16 x i8> @llvm.spu.si.cgtb(<16 x i8>, <16 x i8>)
-declare <8 x i16> @llvm.spu.si.cgth(<8 x i16>, <8 x i16>)
-declare <4 x i32> @llvm.spu.si.cgti(<4 x i32>, i16)
-declare <8 x i16> @llvm.spu.si.cgthi(<8 x i16>, i16)
-declare <16 x i8> @llvm.spu.si.cgtbi(<16 x i8>, i8)
-
-declare <4 x i32> @llvm.spu.si.clgt(<4 x i32>, <4 x i32>)
-declare <16 x i8> @llvm.spu.si.clgtb(<16 x i8>, <16 x i8>)
-declare <8 x i16> @llvm.spu.si.clgth(<8 x i16>, <8 x i16>)
-declare <4 x i32> @llvm.spu.si.clgti(<4 x i32>, i16)
-declare <8 x i16> @llvm.spu.si.clgthi(<8 x i16>, i16)
-declare <16 x i8> @llvm.spu.si.clgtbi(<16 x i8>, i8)
-
-
-
-define <4 x i32> @test(<4 x i32> %A) {
-        call <4 x i32> @llvm.spu.si.shli(<4 x i32> %A, i8 3)
-        %Y = bitcast <4 x i32> %1 to <4 x i32>
-        ret <4 x i32> %Y
-}
-
-define <4 x i32> @ceqtest(<4 x i32> %A, <4 x i32> %B) {
-        call <4 x i32> @llvm.spu.si.ceq(<4 x i32> %A, <4 x i32> %B)
-        %Y = bitcast <4 x i32> %1 to <4 x i32>
-        ret <4 x i32> %Y
-}
-
-define <8 x i16> @ceqhtest(<8 x i16> %A, <8 x i16> %B) {
-        call <8 x i16> @llvm.spu.si.ceqh(<8 x i16> %A, <8 x i16> %B)
-        %Y = bitcast <8 x i16> %1 to <8 x i16>
-        ret <8 x i16> %Y
-}
-
-define <16 x i8> @ceqbtest(<16 x i8> %A, <16 x i8> %B) {
-        call <16 x i8> @llvm.spu.si.ceqb(<16 x i8> %A, <16 x i8> %B)
-        %Y = bitcast <16 x i8> %1 to <16 x i8>
-        ret <16 x i8> %Y
-}
-
-define <4 x i32> @ceqitest(<4 x i32> %A) {
-        call <4 x i32> @llvm.spu.si.ceqi(<4 x i32> %A, i16 65)
-        %Y = bitcast <4 x i32> %1 to <4 x i32>
-        ret <4 x i32> %Y
-}
-
-define <8 x i16> @ceqhitest(<8 x i16> %A) {
-        call <8 x i16> @llvm.spu.si.ceqhi(<8 x i16> %A, i16 65)
-        %Y = bitcast <8 x i16> %1 to <8 x i16>
-        ret <8 x i16> %Y
-}
-
-define <16 x i8> @ceqbitest(<16 x i8> %A) {
-        call <16 x i8> @llvm.spu.si.ceqbi(<16 x i8> %A, i8 65)
-        %Y = bitcast <16 x i8> %1 to <16 x i8>
-        ret <16 x i8> %Y
-}
-
-define <4 x i32> @cgttest(<4 x i32> %A, <4 x i32> %B) {
-        call <4 x i32> @llvm.spu.si.cgt(<4 x i32> %A, <4 x i32> %B)
-        %Y = bitcast <4 x i32> %1 to <4 x i32>
-        ret <4 x i32> %Y
-}
-
-define <8 x i16> @cgthtest(<8 x i16> %A, <8 x i16> %B) {
-        call <8 x i16> @llvm.spu.si.cgth(<8 x i16> %A, <8 x i16> %B)
-        %Y = bitcast <8 x i16> %1 to <8 x i16>
-        ret <8 x i16> %Y
-}
-
-define <16 x i8> @cgtbtest(<16 x i8> %A, <16 x i8> %B) {
-        call <16 x i8> @llvm.spu.si.cgtb(<16 x i8> %A, <16 x i8> %B)
-        %Y = bitcast <16 x i8> %1 to <16 x i8>
-        ret <16 x i8> %Y
-}
-
-define <4 x i32> @cgtitest(<4 x i32> %A) {
-        call <4 x i32> @llvm.spu.si.cgti(<4 x i32> %A, i16 65)
-        %Y = bitcast <4 x i32> %1 to <4 x i32>
-        ret <4 x i32> %Y
-}
-
-define <8 x i16> @cgthitest(<8 x i16> %A) {
-        call <8 x i16> @llvm.spu.si.cgthi(<8 x i16> %A, i16 65)
-        %Y = bitcast <8 x i16> %1 to <8 x i16>
-        ret <8 x i16> %Y
-}
-
-define <16 x i8> @cgtbitest(<16 x i8> %A) {
-        call <16 x i8> @llvm.spu.si.cgtbi(<16 x i8> %A, i8 65)
-        %Y = bitcast <16 x i8> %1 to <16 x i8>
-        ret <16 x i8> %Y
-}
-
-define <4 x i32> @clgttest(<4 x i32> %A, <4 x i32> %B) {
-        call <4 x i32> @llvm.spu.si.clgt(<4 x i32> %A, <4 x i32> %B)
-        %Y = bitcast <4 x i32> %1 to <4 x i32>
-        ret <4 x i32> %Y
-}
-
-define <8 x i16> @clgthtest(<8 x i16> %A, <8 x i16> %B) {
-        call <8 x i16> @llvm.spu.si.clgth(<8 x i16> %A, <8 x i16> %B)
-        %Y = bitcast <8 x i16> %1 to <8 x i16>
-        ret <8 x i16> %Y
-}
-
-define <16 x i8> @clgtbtest(<16 x i8> %A, <16 x i8> %B) {
-        call <16 x i8> @llvm.spu.si.clgtb(<16 x i8> %A, <16 x i8> %B)
-        %Y = bitcast <16 x i8> %1 to <16 x i8>
-        ret <16 x i8> %Y
-}
-
-define <4 x i32> @clgtitest(<4 x i32> %A) {
-        call <4 x i32> @llvm.spu.si.clgti(<4 x i32> %A, i16 65)
-        %Y = bitcast <4 x i32> %1 to <4 x i32>
-        ret <4 x i32> %Y
-}
-
-define <8 x i16> @clgthitest(<8 x i16> %A) {
-        call <8 x i16> @llvm.spu.si.clgthi(<8 x i16> %A, i16 65)
-        %Y = bitcast <8 x i16> %1 to <8 x i16>
-        ret <8 x i16> %Y
-}
-
-define <16 x i8> @clgtbitest(<16 x i8> %A) {
-        call <16 x i8> @llvm.spu.si.clgtbi(<16 x i8> %A, i8 65)
-        %Y = bitcast <16 x i8> %1 to <16 x i8>
-        ret <16 x i8> %Y
-}
diff --git a/test/CodeGen/CellSPU/intrinsics_float.ll b/test/CodeGen/CellSPU/intrinsics_float.ll
deleted file mode 100644
index 81373470d069..000000000000
--- a/test/CodeGen/CellSPU/intrinsics_float.ll
+++ /dev/null
@@ -1,94 +0,0 @@
-; RUN: llc < %s -march=cellspu > %t1.s
-; RUN: grep fa      %t1.s | count 5
-; RUN: grep fs      %t1.s | count 5
-; RUN: grep fm      %t1.s | count 15
-; RUN: grep fceq    %t1.s | count 5
-; RUN: grep fcmeq   %t1.s | count 5
-; RUN: grep fcgt    %t1.s | count 5
-; RUN: grep fcmgt   %t1.s | count 5
-; RUN: grep fma     %t1.s | count 5
-; RUN: grep fnms    %t1.s | count 5
-; RUN: grep fms     %t1.s | count 5
-target datalayout = "E-p:32:32:128-f64:64:128-f32:32:128-i64:32:128-i32:32:128-i16:16:128-i8:8:128-i1:8:128-a0:0:128-v128:128:128-s0:128:128"
-target triple = "spu"
-
-declare <4 x i32> @llvm.spu.si.shli(<4 x i32>, i8)
-
-declare <4 x float> @llvm.spu.si.fa(<4 x float>, <4 x float>)
-declare <4 x float> @llvm.spu.si.fs(<4 x float>, <4 x float>)
-declare <4 x float> @llvm.spu.si.fm(<4 x float>, <4 x float>)
-
-declare <4 x float> @llvm.spu.si.fceq(<4 x float>, <4 x float>)
-declare <4 x float> @llvm.spu.si.fcmeq(<4 x float>, <4 x float>)
-declare <4 x float> @llvm.spu.si.fcgt(<4 x float>, <4 x float>)
-declare <4 x float> @llvm.spu.si.fcmgt(<4 x float>, <4 x float>)
-
-declare <4 x float> @llvm.spu.si.fma(<4 x float>, <4 x float>, <4 x float>)
-declare <4 x float> @llvm.spu.si.fnms(<4 x float>, <4 x float>, <4 x float>)
-declare <4 x float> @llvm.spu.si.fms(<4 x float>, <4 x float>, <4 x float>)
-
-define <4 x i32> @test(<4 x i32> %A) {
-        call <4 x i32> @llvm.spu.si.shli(<4 x i32> %A, i8 3)
-        %Y = bitcast <4 x i32> %1 to <4 x i32>
-        ret <4 x i32> %Y
-}
-
-define <4 x float> @fatest(<4 x float> %A, <4 x float> %B) {
-        call <4 x float> @llvm.spu.si.fa(<4 x float> %A, <4 x float> %B)
-        %Y = bitcast <4 x float> %1 to <4 x float>
-        ret <4 x float> %Y
-}
-
-define <4 x float> @fstest(<4 x float> %A, <4 x float> %B) {
-        call <4 x float> @llvm.spu.si.fs(<4 x float> %A, <4 x float> %B)
-        %Y = bitcast <4 x float> %1 to <4 x float>
-        ret <4 x float> %Y
-}
-
-define <4 x float> @fmtest(<4 x float> %A, <4 x float> %B) {
-        call <4 x float> @llvm.spu.si.fm(<4 x float> %A, <4 x float> %B)
-        %Y = bitcast <4 x float> %1 to <4 x float>
-        ret <4 x float> %Y
-}
-
-define <4 x float> @fceqtest(<4 x float> %A, <4 x float> %B) {
-        call <4 x float> @llvm.spu.si.fceq(<4 x float> %A, <4 x float> %B)
-        %Y = bitcast <4 x float> %1 to <4 x float>
-        ret <4 x float> %Y
-}
-
-define <4 x float> @fcmeqtest(<4 x float> %A, <4 x float> %B) {
-        call <4 x float> @llvm.spu.si.fcmeq(<4 x float> %A, <4 x float> %B)
-        %Y = bitcast <4 x float> %1 to <4 x float>
-        ret <4 x float> %Y
-}
-
-define <4 x float> @fcgttest(<4 x float> %A, <4 x float> %B) {
-        call <4 x float> @llvm.spu.si.fcgt(<4 x float> %A, <4 x float> %B)
-        %Y = bitcast <4 x float> %1 to <4 x float>
-        ret <4 x float> %Y
-}
-
-define <4 x float> @fcmgttest(<4 x float> %A, <4 x float> %B) {
-        call <4 x float> @llvm.spu.si.fcmgt(<4 x float> %A, <4 x float> %B)
-        %Y = bitcast <4 x float> %1 to <4 x float>
-        ret <4 x float> %Y
-}
-
-define <4 x float> @fmatest(<4 x float> %A, <4 x float> %B, <4 x float> %C) {
-        call <4 x float> @llvm.spu.si.fma(<4 x float> %A, <4 x float> %B, <4 x float> %C)
-        %Y = bitcast <4 x float> %1 to <4 x float>
-        ret <4 x float> %Y
-}
-
-define <4 x float> @fnmstest(<4 x float> %A, <4 x float> %B, <4 x float> %C) {
-        call <4 x float> @llvm.spu.si.fnms(<4 x float> %A, <4 x float> %B, <4 x float> %C)
-        %Y = bitcast <4 x float> %1 to <4 x float>
-        ret <4 x float> %Y
-}
-
-define <4 x float> @fmstest(<4 x float> %A, <4 x float> %B, <4 x float> %C) {
-        call <4 x float> @llvm.spu.si.fms(<4 x float> %A, <4 x float> %B, <4 x float> %C)
-        %Y = bitcast <4 x float> %1 to <4 x float>
-        ret <4 x float> %Y
-}
diff --git a/test/CodeGen/CellSPU/intrinsics_logical.ll b/test/CodeGen/CellSPU/intrinsics_logical.ll
deleted file mode 100644
index a29ee4c2405d..000000000000
--- a/test/CodeGen/CellSPU/intrinsics_logical.ll
+++ /dev/null
@@ -1,49 +0,0 @@
-; RUN: llc < %s -march=cellspu > %t1.s
-; RUN: grep and       %t1.s | count 20
-; RUN: grep andc      %t1.s | count 5
-target datalayout = "E-p:32:32:128-f64:64:128-f32:32:128-i64:32:128-i32:32:128-i16:16:128-i8:8:128-i1:8:128-a0:0:128-v128:128:128-s0:128:128"
-target triple = "spu"
-
-declare <4 x i32> @llvm.spu.si.and(<4 x i32>, <4 x i32>)
-declare <4 x i32> @llvm.spu.si.andc(<4 x i32>, <4 x i32>)
-declare <4 x i32> @llvm.spu.si.andi(<4 x i32>, i16)
-declare <8 x i16> @llvm.spu.si.andhi(<8 x i16>, i16)
-declare <16 x i8> @llvm.spu.si.andbi(<16 x i8>, i8)
-
-declare <4 x i32> @llvm.spu.si.or(<4 x i32>, <4 x i32>)
-declare <4 x i32> @llvm.spu.si.orc(<4 x i32>, <4 x i32>)
-declare <4 x i32> @llvm.spu.si.ori(<4 x i32>, i16)
-declare <8 x i16> @llvm.spu.si.orhi(<8 x i16>, i16)
-declare <16 x i8> @llvm.spu.si.orbi(<16 x i8>, i8)
-
-declare <4 x i32> @llvm.spu.si.xor(<4 x i32>, <4 x i32>)
-declare <4 x i32> @llvm.spu.si.xori(<4 x i32>, i16)
-declare <8 x i16> @llvm.spu.si.xorhi(<8 x i16>, i16)
-declare <16 x i8> @llvm.spu.si.xorbi(<16 x i8>, i8)
-
-declare <4 x i32> @llvm.spu.si.nand(<4 x i32>, <4 x i32>)
-declare <4 x i32> @llvm.spu.si.nor(<4 x i32>, <4 x i32>)
-
-define <4 x i32> @andtest(<4 x i32> %A, <4 x i32> %B) {
-        call <4 x i32> @llvm.spu.si.and(<4 x i32> %A, <4 x i32> %B)
-        %Y = bitcast <4 x i32> %1 to <4 x i32>
-        ret <4 x i32> %Y
-}
-
-define <4 x i32> @andctest(<4 x i32> %A, <4 x i32> %B) {
-        call <4 x i32> @llvm.spu.si.andc(<4 x i32> %A, <4 x i32> %B)
-        %Y = bitcast <4 x i32> %1 to <4 x i32>
-        ret <4 x i32> %Y
-}
-
-define <4 x i32> @anditest(<4 x i32> %A) {
-        call <4 x i32> @llvm.spu.si.andi(<4 x i32> %A, i16 65)
-        %Y = bitcast <4 x i32> %1 to <4 x i32>
-        ret <4 x i32> %Y
-}
-
-define <8 x i16> @andhitest(<8 x i16> %A) {
-        call <8 x i16> @llvm.spu.si.andhi(<8 x i16> %A, i16 65)
-        %Y = bitcast <8 x i16> %1 to <8 x i16>
-        ret <8 x i16> %Y
-}
diff --git a/test/CodeGen/CellSPU/jumptable.ll b/test/CodeGen/CellSPU/jumptable.ll
deleted file mode 100644
index 66c2fdeb51fd..000000000000
--- a/test/CodeGen/CellSPU/jumptable.ll
+++ /dev/null
@@ -1,21 +0,0 @@
-;RUN: llc --march=cellspu -disable-cgp-branch-opts %s -o - | FileCheck %s
-; This is to check that emitting jumptables doesn't crash llc
-define i32 @test(i32 %param) {
-entry:
-;CHECK:        ai      {{\$.}}, $3, -1
-;CHECK:        clgti   {{\$., \$.}}, 3
-;CHECK:        brnz    {{\$.}},.LBB0_
-  switch i32 %param, label %bb2 [
-    i32 1, label %bb1
-    i32 2, label %bb2
-    i32 3, label %bb3
-    i32 4, label %bb2
-  ]
-;CHECK-NOT: # BB#2
-bb1:                                            
-  ret i32 1
-bb2:      
-  ret i32 2
-bb3:     
-  ret i32 %param
-}
diff --git a/test/CodeGen/CellSPU/loads.ll b/test/CodeGen/CellSPU/loads.ll
deleted file mode 100644
index 4771752f5f4c..000000000000
--- a/test/CodeGen/CellSPU/loads.ll
+++ /dev/null
@@ -1,59 +0,0 @@
-; RUN: llc < %s -march=cellspu | FileCheck %s
-
-; ModuleID = 'loads.bc'
-target datalayout = "E-p:32:32:128-f64:64:128-f32:32:128-i64:32:128-i32:32:128-i16:16:128-i8:8:128-i1:8:128-a0:0:128-v128:128:128-s0:128:128"
-target triple = "spu"
-
-define <4 x float> @load_v4f32_1(<4 x float>* %a) nounwind readonly {
-entry:
-	%tmp1 = load <4 x float>* %a
-	ret <4 x float> %tmp1
-; CHECK:	lqd	$3, 0($3)
-}
-
-define <4 x float> @load_v4f32_2(<4 x float>* %a) nounwind readonly {
-entry:
-	%arrayidx = getelementptr <4 x float>* %a, i32 1
-	%tmp1 = load <4 x float>* %arrayidx
-	ret <4 x float> %tmp1
-; CHECK:	lqd	$3, 16($3)
-}
-
-
-declare <4 x i32>* @getv4f32ptr()
-define <4 x i32> @func() {
-	;CHECK: brasl
-	; we need to have some instruction to move the result to safety.
-	; which instruction (lr, stqd...) depends on the regalloc
-	;CHECK: {{.*}}
-	;CHECK: brasl
-	%rv1 = call <4 x i32>* @getv4f32ptr()
-	%rv2 = call <4 x i32>* @getv4f32ptr()
-	%rv3 = load <4 x i32>* %rv1
-	ret <4 x i32> %rv3
-}
-
-define <4 x float> @load_undef(){
-	; CHECK: lqd	$3, 0($3)
-	%val = load <4 x float>* undef
-	ret <4 x float> %val
-}
-
-;check that 'misaligned' loads that may span two memory chunks
-;have two loads. Don't check for the bitmanipulation, as that 
-;might change with improved algorithms or scheduling 
-define i32 @load_misaligned( i32* %ptr ){
-;CHECK: load_misaligned
-;CHECK: lqd
-;CHECK: lqd
-;CHECK: bi $lr
-  %rv = load i32* %ptr, align 2
-  ret i32 %rv
-}
-
-define <4 x i32> @load_null_vec( ) {
-;CHECK: lqa
-;CHECK: bi $lr
-	%rv = load <4 x i32>* null
-	ret <4 x i32> %rv
-}
diff --git a/test/CodeGen/CellSPU/mul-with-overflow.ll b/test/CodeGen/CellSPU/mul-with-overflow.ll
deleted file mode 100644
index c04e69e3e193..000000000000
--- a/test/CodeGen/CellSPU/mul-with-overflow.ll
+++ /dev/null
@@ -1,15 +0,0 @@
-; RUN: llc < %s -march=cellspu
-
-declare {i16, i1} @llvm.smul.with.overflow.i16(i16 %a, i16 %b)
-define zeroext i1 @a(i16 %x)  nounwind {
-  %res = call {i16, i1} @llvm.smul.with.overflow.i16(i16 %x, i16 3)
-  %obil = extractvalue {i16, i1} %res, 1
-  ret i1 %obil
-}
-
-declare {i16, i1} @llvm.umul.with.overflow.i16(i16 %a, i16 %b)
-define zeroext i1 @b(i16 %x)  nounwind {
-  %res = call {i16, i1} @llvm.umul.with.overflow.i16(i16 %x, i16 3)
-  %obil = extractvalue {i16, i1} %res, 1
-  ret i1 %obil
-}
diff --git a/test/CodeGen/CellSPU/mul_ops.ll b/test/CodeGen/CellSPU/mul_ops.ll
deleted file mode 100644
index 1e28fc7a918d..000000000000
--- a/test/CodeGen/CellSPU/mul_ops.ll
+++ /dev/null
@@ -1,88 +0,0 @@
-; RUN: llc < %s -march=cellspu > %t1.s
-; RUN: grep mpy     %t1.s | count 44
-; RUN: grep mpyu    %t1.s | count 4
-; RUN: grep mpyh    %t1.s | count 10
-; RUN: grep mpyhh   %t1.s | count 2
-; RUN: grep rotma   %t1.s | count 12
-; RUN: grep rotmahi %t1.s | count 4
-; RUN: grep and     %t1.s | count 2
-; RUN: grep selb    %t1.s | count 6
-; RUN: grep fsmbi   %t1.s | count 4
-; RUN: grep shli    %t1.s | count 4
-; RUN: grep shlhi   %t1.s | count 4
-; RUN: grep ila     %t1.s | count 2
-target datalayout = "E-p:32:32:128-f64:64:128-f32:32:128-i64:32:128-i32:32:128-i16:16:128-i8:8:128-i1:8:128-a0:0:128-v128:128:128-s0:128:128"
-target triple = "spu"
-
-; 32-bit multiply instruction generation:
-define <4 x i32> @mpy_v4i32_1(<4 x i32> %arg1, <4 x i32> %arg2) {
-entry:
-        %A = mul <4 x i32> %arg1, %arg2
-        ret <4 x i32> %A
-}
-
-define <4 x i32> @mpy_v4i32_2(<4 x i32> %arg1, <4 x i32> %arg2) {
-entry:
-        %A = mul <4 x i32> %arg2, %arg1
-        ret <4 x i32> %A
-}
-
-define <8 x i16> @mpy_v8i16_1(<8 x i16> %arg1, <8 x i16> %arg2) {
-entry:
-        %A = mul <8 x i16> %arg1, %arg2
-        ret <8 x i16> %A
-}
-
-define <8 x i16> @mpy_v8i16_2(<8 x i16> %arg1, <8 x i16> %arg2) {
-entry:
-        %A = mul <8 x i16> %arg2, %arg1
-        ret <8 x i16> %A
-}
-
-define <16 x i8> @mul_v16i8_1(<16 x i8> %arg1, <16 x i8> %arg2) {
-entry:
-        %A = mul <16 x i8> %arg2, %arg1
-        ret <16 x i8> %A
-}
-
-define <16 x i8> @mul_v16i8_2(<16 x i8> %arg1, <16 x i8> %arg2) {
-entry:
-        %A = mul <16 x i8> %arg1, %arg2
-        ret <16 x i8> %A
-}
-
-define i32 @mul_i32_1(i32 %arg1, i32 %arg2) {
-entry:
-        %A = mul i32 %arg2, %arg1
-        ret i32 %A
-}
-
-define i32 @mul_i32_2(i32 %arg1, i32 %arg2) {
-entry:
-        %A = mul i32 %arg1, %arg2
-        ret i32 %A
-}
-
-define i16 @mul_i16_1(i16 %arg1, i16 %arg2) {
-entry:
-        %A = mul i16 %arg2, %arg1
-        ret i16 %A
-}
-
-define i16 @mul_i16_2(i16 %arg1, i16 %arg2) {
-entry:
-        %A = mul i16 %arg1, %arg2
-        ret i16 %A
-}
-
-define i8 @mul_i8_1(i8 %arg1, i8 %arg2) {
-entry:
-        %A = mul i8 %arg2, %arg1
-        ret i8 %A
-}
-
-define i8 @mul_i8_2(i8 %arg1, i8 %arg2) {
-entry:
-        %A = mul i8 %arg1, %arg2
-        ret i8 %A
-}
diff --git a/test/CodeGen/CellSPU/nand.ll b/test/CodeGen/CellSPU/nand.ll
deleted file mode 100644
index 57ac709c5414..000000000000
--- a/test/CodeGen/CellSPU/nand.ll
+++ /dev/null
@@ -1,125 +0,0 @@
-; RUN: llc < %s -march=cellspu > %t1.s
-; RUN: grep nand   %t1.s | count 90
-; RUN: grep and    %t1.s | count 94
-; RUN: grep xsbh   %t1.s | count 2
-; RUN: grep xshw   %t1.s | count 4
-
-; CellSPU legalization is over-sensitive to Legalize's traversal order.
-; XFAIL: *
-
-target datalayout = "E-p:32:32:128-f64:64:128-f32:32:128-i64:32:128-i32:32:128-i16:16:128-i8:8:128-i1:8:128-a0:0:128-v128:128:128-s0:128:128"
-target triple = "spu"
-
-define <4 x i32> @nand_v4i32_1(<4 x i32> %arg1, <4 x i32> %arg2) {
-        %A = and <4 x i32> %arg2, %arg1      ; <<4 x i32>> [#uses=1]
-        %B = xor <4 x i32> %A, < i32 -1, i32 -1, i32 -1, i32 -1 >
-        ret <4 x i32> %B
-}
-
-define <4 x i32> @nand_v4i32_2(<4 x i32> %arg1, <4 x i32> %arg2) {
-        %A = and <4 x i32> %arg1, %arg2      ; <<4 x i32>> [#uses=1]
-        %B = xor <4 x i32> %A, < i32 -1, i32 -1, i32 -1, i32 -1 >
-        ret <4 x i32> %B
-}
-
-define <8 x i16> @nand_v8i16_1(<8 x i16> %arg1, <8 x i16> %arg2) {
-        %A = and <8 x i16> %arg2, %arg1      ; <<8 x i16>> [#uses=1]
-        %B = xor <8 x i16> %A, < i16 -1, i16 -1, i16 -1, i16 -1,
-                                 i16 -1, i16 -1, i16 -1, i16 -1 >
-        ret <8 x i16> %B
-}
-
-define <8 x i16> @nand_v8i16_2(<8 x i16> %arg1, <8 x i16> %arg2) {
-        %A = and <8 x i16> %arg1, %arg2      ; <<8 x i16>> [#uses=1]
-        %B = xor <8 x i16> %A, < i16 -1, i16 -1, i16 -1, i16 -1,
-                                 i16 -1, i16 -1, i16 -1, i16 -1 >
-        ret <8 x i16> %B
-}
-
-define <16 x i8> @nand_v16i8_1(<16 x i8> %arg1, <16 x i8> %arg2) {
-        %A = and <16 x i8> %arg2, %arg1      ; <<16 x i8>> [#uses=1]
-        %B = xor <16 x i8> %A, < i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1,
-                                    i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1,
-                                    i8 -1, i8 -1, i8 -1, i8 -1 >
-        ret <16 x i8> %B
-}
-
-define <16 x i8> @nand_v16i8_2(<16 x i8> %arg1, <16 x i8> %arg2) {
-        %A = and <16 x i8> %arg1, %arg2      ; <<16 x i8>> [#uses=1]
-        %B = xor <16 x i8> %A, < i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1,
-                                    i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1,
-                                    i8 -1, i8 -1, i8 -1, i8 -1 >
-        ret <16 x i8> %B
-}
-
-define i32 @nand_i32_1(i32 %arg1, i32 %arg2) {
-        %A = and i32 %arg2, %arg1            ; <i32> [#uses=1]
-        %B = xor i32 %A, -1                  ; <i32> [#uses=1]
-        ret i32 %B
-}
-
-define i32 @nand_i32_2(i32 %arg1, i32 %arg2) {
-        %A = and i32 %arg1, %arg2            ; <i32> [#uses=1]
-        %B = xor i32 %A, -1                  ; <i32> [#uses=1]
-        ret i32 %B
-}
-
-define signext i16 @nand_i16_1(i16 signext  %arg1, i16 signext  %arg2)   {
-        %A = and i16 %arg2, %arg1            ; <i16> [#uses=1]
-        %B = xor i16 %A, -1                  ; <i16> [#uses=1]
-        ret i16 %B
-}
-
-define signext i16 @nand_i16_2(i16 signext  %arg1, i16 signext  %arg2)   {
-        %A = and i16 %arg1, %arg2            ; <i16> [#uses=1]
-        %B = xor i16 %A, -1                  ; <i16> [#uses=1]
-        ret i16 %B
-}
-
-define zeroext i16 @nand_i16u_1(i16 zeroext  %arg1, i16 zeroext  %arg2)   {
-        %A = and i16 %arg2, %arg1            ; <i16> [#uses=1]
-        %B = xor i16 %A, -1                  ; <i16> [#uses=1]
-        ret i16 %B
-}
-
-define zeroext i16 @nand_i16u_2(i16 zeroext  %arg1, i16 zeroext  %arg2)   {
-        %A = and i16 %arg1, %arg2            ; <i16> [#uses=1]
-        %B = xor i16 %A, -1                  ; <i16> [#uses=1]
-        ret i16 %B
-}
-
-define zeroext i8 @nand_i8u_1(i8 zeroext  %arg1, i8 zeroext  %arg2)   {
-        %A = and i8 %arg2, %arg1             ; <i8> [#uses=1]
-        %B = xor i8 %A, -1                   ; <i8> [#uses=1]
-        ret i8 %B
-}
-
-define zeroext i8 @nand_i8u_2(i8 zeroext  %arg1, i8 zeroext  %arg2)   {
-        %A = and i8 %arg1, %arg2             ; <i8> [#uses=1]
-        %B = xor i8 %A, -1                   ; <i8> [#uses=1]
-        ret i8 %B
-}
-
-define signext i8 @nand_i8_1(i8 signext  %arg1, i8 signext  %arg2)   {
-        %A = and i8 %arg2, %arg1             ; <i8> [#uses=1]
-        %B = xor i8 %A, -1                   ; <i8> [#uses=1]
-        ret i8 %B
-}
-
-define signext i8 @nand_i8_2(i8 signext  %arg1, i8 signext  %arg2) {
-        %A = and i8 %arg1, %arg2             ; <i8> [#uses=1]
-        %B = xor i8 %A, -1                   ; <i8> [#uses=1]
-        ret i8 %B
-}
-
-define i8 @nand_i8_3(i8 %arg1, i8 %arg2) {
-        %A = and i8 %arg2, %arg1             ; <i8> [#uses=1]
-        %B = xor i8 %A, -1                   ; <i8> [#uses=1]
-        ret i8 %B
-}
-
-define i8 @nand_i8_4(i8 %arg1, i8 %arg2) {
-        %A = and i8 %arg1, %arg2             ; <i8> [#uses=1]
-        %B = xor i8 %A, -1                   ; <i8> [#uses=1]
-        ret i8 %B
-}
diff --git a/test/CodeGen/CellSPU/or_ops.ll b/test/CodeGen/CellSPU/or_ops.ll
deleted file mode 100644
index f329266a3c23..000000000000
--- a/test/CodeGen/CellSPU/or_ops.ll
+++ /dev/null
@@ -1,278 +0,0 @@
-; RUN: llc < %s -march=cellspu > %t1.s
-; RUN: grep and    %t1.s | count 2
-; RUN: grep orc    %t1.s | count 85
-; RUN: grep ori    %t1.s | count 34
-; RUN: grep orhi   %t1.s | count 30
-; RUN: grep orbi   %t1.s | count 15
-; RUN: FileCheck %s < %t1.s
-
-; CellSPU legalization is over-sensitive to Legalize's traversal order.
-; XFAIL: *
-
-target datalayout = "E-p:32:32:128-f64:64:128-f32:32:128-i64:32:128-i32:32:128-i16:16:128-i8:8:128-i1:8:128-a0:0:128-v128:128:128-s0:128:128"
-target triple = "spu"
-
-; OR instruction generation:
-define <4 x i32> @or_v4i32_1(<4 x i32> %arg1, <4 x i32> %arg2) {
-        %A = or <4 x i32> %arg1, %arg2
-        ret <4 x i32> %A
-}
-
-define <4 x i32> @or_v4i32_2(<4 x i32> %arg1, <4 x i32> %arg2) {
-        %A = or <4 x i32> %arg2, %arg1
-        ret <4 x i32> %A
-}
-
-define <8 x i16> @or_v8i16_1(<8 x i16> %arg1, <8 x i16> %arg2) {
-        %A = or <8 x i16> %arg1, %arg2
-        ret <8 x i16> %A
-}
-
-define <8 x i16> @or_v8i16_2(<8 x i16> %arg1, <8 x i16> %arg2) {
-        %A = or <8 x i16> %arg2, %arg1
-        ret <8 x i16> %A
-}
-
-define <16 x i8> @or_v16i8_1(<16 x i8> %arg1, <16 x i8> %arg2) {
-        %A = or <16 x i8> %arg2, %arg1
-        ret <16 x i8> %A
-}
-
-define <16 x i8> @or_v16i8_2(<16 x i8> %arg1, <16 x i8> %arg2) {
-        %A = or <16 x i8> %arg1, %arg2
-        ret <16 x i8> %A
-}
-
-define i32 @or_i32_1(i32 %arg1, i32 %arg2) {
-        %A = or i32 %arg2, %arg1
-        ret i32 %A
-}
-
-define i32 @or_i32_2(i32 %arg1, i32 %arg2) {
-        %A = or i32 %arg1, %arg2
-        ret i32 %A
-}
-
-define i16 @or_i16_1(i16 %arg1, i16 %arg2) {
-        %A = or i16 %arg2, %arg1
-        ret i16 %A
-}
-
-define i16 @or_i16_2(i16 %arg1, i16 %arg2) {
-        %A = or i16 %arg1, %arg2
-        ret i16 %A
-}
-
-define i8 @or_i8_1(i8 %arg1, i8 %arg2) {
-        %A = or i8 %arg2, %arg1
-        ret i8 %A
-}
-
-define i8 @or_i8_2(i8 %arg1, i8 %arg2) {
-        %A = or i8 %arg1, %arg2
-        ret i8 %A
-}
-
-; ORC instruction generation:
-define <4 x i32> @orc_v4i32_1(<4 x i32> %arg1, <4 x i32> %arg2) {
-        %A = xor <4 x i32> %arg2, < i32 -1, i32 -1, i32 -1, i32 -1 >
-        %B = or <4 x i32> %arg1, %A
-        ret <4 x i32> %B
-}
-
-define <4 x i32> @orc_v4i32_2(<4 x i32> %arg1, <4 x i32> %arg2) {
-        %A = xor <4 x i32> %arg1, < i32 -1, i32 -1, i32 -1, i32 -1 >
-        %B = or <4 x i32> %arg2, %A
-        ret <4 x i32> %B
-}
-
-define <4 x i32> @orc_v4i32_3(<4 x i32> %arg1, <4 x i32> %arg2) {
-        %A = xor <4 x i32> %arg1, < i32 -1, i32 -1, i32 -1, i32 -1 >
-        %B = or <4 x i32> %A, %arg2
-        ret <4 x i32> %B
-}
-
-define <8 x i16> @orc_v8i16_1(<8 x i16> %arg1, <8 x i16> %arg2) {
-        %A = xor <8 x i16> %arg2, < i16 -1, i16 -1, i16 -1, i16 -1,
-                                    i16 -1, i16 -1, i16 -1, i16 -1 >
-        %B = or <8 x i16> %arg1, %A
-        ret <8 x i16> %B
-}
-
-define <8 x i16> @orc_v8i16_2(<8 x i16> %arg1, <8 x i16> %arg2) {
-        %A = xor <8 x i16> %arg1, < i16 -1, i16 -1, i16 -1, i16 -1,
-                                    i16 -1, i16 -1, i16 -1, i16 -1 >
-        %B = or <8 x i16> %arg2, %A
-        ret <8 x i16> %B
-}
-
-define <16 x i8> @orc_v16i8_1(<16 x i8> %arg1, <16 x i8> %arg2) {
-        %A = xor <16 x i8> %arg1, < i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1,
-                                    i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1,
-                                    i8 -1, i8 -1, i8 -1, i8 -1 >
-        %B = or <16 x i8> %arg2, %A
-        ret <16 x i8> %B
-}
-
-define <16 x i8> @orc_v16i8_2(<16 x i8> %arg1, <16 x i8> %arg2) {
-        %A = xor <16 x i8> %arg2, < i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1,
-                                    i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1,
-                                    i8 -1, i8 -1, i8 -1, i8 -1 >
-        %B = or <16 x i8> %arg1, %A
-        ret <16 x i8> %B
-}
-
-define <16 x i8> @orc_v16i8_3(<16 x i8> %arg1, <16 x i8> %arg2) {
-        %A = xor <16 x i8> %arg2, < i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1,
-                                    i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1,
-                                    i8 -1, i8 -1, i8 -1, i8 -1 >
-        %B = or <16 x i8> %A, %arg1
-        ret <16 x i8> %B
-}
-
-define i32 @orc_i32_1(i32 %arg1, i32 %arg2) {
-        %A = xor i32 %arg2, -1
-        %B = or i32 %A, %arg1
-        ret i32 %B
-}
-
-define i32 @orc_i32_2(i32 %arg1, i32 %arg2) {
-        %A = xor i32 %arg1, -1
-        %B = or i32 %A, %arg2
-        ret i32 %B
-}
-
-define i32 @orc_i32_3(i32 %arg1, i32 %arg2) {
-        %A = xor i32 %arg2, -1
-        %B = or i32 %arg1, %A
-        ret i32 %B
-}
-
-define i16 @orc_i16_1(i16 %arg1, i16 %arg2) {
-        %A = xor i16 %arg2, -1
-        %B = or i16 %A, %arg1
-        ret i16 %B
-}
-
-define i16 @orc_i16_2(i16 %arg1, i16 %arg2) {
-        %A = xor i16 %arg1, -1
-        %B = or i16 %A, %arg2
-        ret i16 %B
-}
-
-define i16 @orc_i16_3(i16 %arg1, i16 %arg2) {
-        %A = xor i16 %arg2, -1
-        %B = or i16 %arg1, %A
-        ret i16 %B
-}
-
-define i8 @orc_i8_1(i8 %arg1, i8 %arg2) {
-        %A = xor i8 %arg2, -1
-        %B = or i8 %A, %arg1
-        ret i8 %B
-}
-
-define i8 @orc_i8_2(i8 %arg1, i8 %arg2) {
-        %A = xor i8 %arg1, -1
-        %B = or i8 %A, %arg2
-        ret i8 %B
-}
-
-define i8 @orc_i8_3(i8 %arg1, i8 %arg2) {
-        %A = xor i8 %arg2, -1
-        %B = or i8 %arg1, %A
-        ret i8 %B
-}
-
-; ORI instruction generation (i32 data type):
-define <4 x i32> @ori_v4i32_1(<4 x i32> %in) {
-        %tmp2 = or <4 x i32> %in, < i32 511, i32 511, i32 511, i32 511 >
-        ret <4 x i32> %tmp2
-}
-
-define <4 x i32> @ori_v4i32_2(<4 x i32> %in) {
-        %tmp2 = or <4 x i32> %in, < i32 510, i32 510, i32 510, i32 510 >
-        ret <4 x i32> %tmp2
-}
-
-define <4 x i32> @ori_v4i32_3(<4 x i32> %in) {
-        %tmp2 = or <4 x i32> %in, < i32 -1, i32 -1, i32 -1, i32 -1 >
-        ret <4 x i32> %tmp2
-}
-
-define <4 x i32> @ori_v4i32_4(<4 x i32> %in) {
-        %tmp2 = or <4 x i32> %in, < i32 -512, i32 -512, i32 -512, i32 -512 >
-        ret <4 x i32> %tmp2
-}
-
-define zeroext i32 @ori_u32(i32 zeroext  %in)   {
-        %tmp37 = or i32 %in, 37         ; <i32> [#uses=1]
-        ret i32 %tmp37
-}
-
-define signext i32 @ori_i32(i32 signext  %in)   {
-        %tmp38 = or i32 %in, 37         ; <i32> [#uses=1]
-        ret i32 %tmp38
-}
-
-define i32 @ori_i32_600(i32 %in) {
-	;600 does not fit into 'ori' immediate field
-	;CHECK: ori_i32_600
-	;CHECK: il
-	;CHECK: ori
-	%tmp = or i32 %in, 600
-	ret i32 %tmp
-}
-
-; ORHI instruction generation (i16 data type):
-define <8 x i16> @orhi_v8i16_1(<8 x i16> %in) {
-        %tmp2 = or <8 x i16> %in, < i16 511, i16 511, i16 511, i16 511,
-                                    i16 511, i16 511, i16 511, i16 511 >
-        ret <8 x i16> %tmp2
-}
-
-define <8 x i16> @orhi_v8i16_2(<8 x i16> %in) {
-        %tmp2 = or <8 x i16> %in, < i16 510, i16 510, i16 510, i16 510,
-                                    i16 510, i16 510, i16 510, i16 510 >
-        ret <8 x i16> %tmp2
-}
-
-define <8 x i16> @orhi_v8i16_3(<8 x i16> %in) {
-        %tmp2 = or <8 x i16> %in, < i16 -1, i16 -1, i16 -1, i16 -1, i16 -1,
-                                    i16 -1, i16 -1, i16 -1 >
-        ret <8 x i16> %tmp2
-}
-
-define <8 x i16> @orhi_v8i16_4(<8 x i16> %in) {
-        %tmp2 = or <8 x i16> %in, < i16 -512, i16 -512, i16 -512, i16 -512,
-                                    i16 -512, i16 -512, i16 -512, i16 -512 >
-        ret <8 x i16> %tmp2
-}
-
-define zeroext i16 @orhi_u16(i16 zeroext  %in)   {
-        %tmp37 = or i16 %in, 37         ; <i16> [#uses=1]
-        ret i16 %tmp37
-}
-
-define signext i16 @orhi_i16(i16 signext  %in)   {
-        %tmp38 = or i16 %in, 37         ; <i16> [#uses=1]
-        ret i16 %tmp38
-}
-
-; ORBI instruction generation (i8 data type):
-define <16 x i8> @orbi_v16i8(<16 x i8> %in) {
-        %tmp2 = or <16 x i8> %in, < i8 42, i8 42, i8 42, i8 42, i8 42, i8 42,
-                                    i8 42, i8 42, i8 42, i8 42, i8 42, i8 42,
-                                    i8 42, i8 42, i8 42, i8 42 >
-        ret <16 x i8> %tmp2
-}
-
-define zeroext i8 @orbi_u8(i8 zeroext  %in)   {
-        %tmp37 = or i8 %in, 37         ; <i8> [#uses=1]
-        ret i8 %tmp37
-}
-
-define signext i8 @orbi_i8(i8 signext  %in)   {
-        %tmp38 = or i8 %in, 37         ; <i8> [#uses=1]
-        ret i8 %tmp38
-}
diff --git a/test/CodeGen/CellSPU/private.ll b/test/CodeGen/CellSPU/private.ll
deleted file mode 100644
index 1d933adac939..000000000000
--- a/test/CodeGen/CellSPU/private.ll
+++ /dev/null
@@ -1,19 +0,0 @@
-; Test to make sure that the 'private' is used correctly.
-;
-; RUN: llc < %s -march=cellspu > %t
-; RUN: grep .Lfoo: %t
-; RUN: grep brsl.*\.Lfoo %t
-; RUN: grep .Lbaz: %t
-; RUN: grep ila.*\.Lbaz %t
-
-define private void @foo() {
-        ret void
-}
-
-@baz = private global i32 4
-
-define i32 @bar() {
-        call void @foo()
-	%1 = load i32* @baz, align 4
-        ret i32 %1
-}
diff --git a/test/CodeGen/CellSPU/rotate_ops.ll b/test/CodeGen/CellSPU/rotate_ops.ll
deleted file mode 100644
index 977093527609..000000000000
--- a/test/CodeGen/CellSPU/rotate_ops.ll
+++ /dev/null
@@ -1,172 +0,0 @@
-; RUN: llc < %s -march=cellspu -o %t1.s
-; RUN: grep rot          %t1.s | count 86
-; RUN: grep roth         %t1.s | count 8
-; RUN: grep roti.*5      %t1.s | count 1
-; RUN: grep roti.*27     %t1.s | count 1
-; RUN: grep rothi.*5      %t1.s | count 2
-; RUN: grep rothi.*11     %t1.s | count 1
-; RUN: grep rothi.*,.3    %t1.s | count 1
-; RUN: grep andhi        %t1.s | count 4
-; RUN: grep shlhi        %t1.s | count 4
-; RUN: cat %t1.s | FileCheck %s
-
-target datalayout = "E-p:32:32:128-f64:64:128-f32:32:128-i64:32:128-i32:32:128-i16:16:128-i8:8:128-i1:8:128-a0:0:128-v128:128:128-s0:128:128"
-target triple = "spu"
-
-; Vector rotates are not currently supported in gcc or llvm assembly. These are
-; not tested.
-
-; 32-bit rotates:
-define i32 @rotl32_1a(i32 %arg1, i8 %arg2) {
-        %tmp1 = zext i8 %arg2 to i32    ; <i32> [#uses=1]
-        %B = shl i32 %arg1, %tmp1       ; <i32> [#uses=1]
-        %arg22 = sub i8 32, %arg2       ; <i8> [#uses=1]
-        %tmp2 = zext i8 %arg22 to i32   ; <i32> [#uses=1]
-        %C = lshr i32 %arg1, %tmp2      ; <i32> [#uses=1]
-        %D = or i32 %B, %C              ; <i32> [#uses=1]
-        ret i32 %D
-}
-
-define i32 @rotl32_1b(i32 %arg1, i16 %arg2) {
-        %tmp1 = zext i16 %arg2 to i32   ; <i32> [#uses=1]
-        %B = shl i32 %arg1, %tmp1       ; <i32> [#uses=1]
-        %arg22 = sub i16 32, %arg2      ; <i8> [#uses=1]
-        %tmp2 = zext i16 %arg22 to i32  ; <i32> [#uses=1]
-        %C = lshr i32 %arg1, %tmp2      ; <i32> [#uses=1]
-        %D = or i32 %B, %C              ; <i32> [#uses=1]
-        ret i32 %D
-}
-
-define i32 @rotl32_2(i32 %arg1, i32 %arg2) {
-        %B = shl i32 %arg1, %arg2       ; <i32> [#uses=1]
-        %tmp1 = sub i32 32, %arg2       ; <i32> [#uses=1]
-        %C = lshr i32 %arg1, %tmp1      ; <i32> [#uses=1]
-        %D = or i32 %B, %C              ; <i32> [#uses=1]
-        ret i32 %D
-}
-
-define i32 @rotl32_3(i32 %arg1, i32 %arg2) {
-        %tmp1 = sub i32 32, %arg2       ; <i32> [#uses=1]
-        %B = shl i32 %arg1, %arg2       ; <i32> [#uses=1]
-        %C = lshr i32 %arg1, %tmp1      ; <i32> [#uses=1]
-        %D = or i32 %B, %C              ; <i32> [#uses=1]
-        ret i32 %D
-}
-
-define i32 @rotl32_4(i32 %arg1, i32 %arg2) {
-        %tmp1 = sub i32 32, %arg2       ; <i32> [#uses=1]
-        %C = lshr i32 %arg1, %tmp1      ; <i32> [#uses=1]
-        %B = shl i32 %arg1, %arg2       ; <i32> [#uses=1]
-        %D = or i32 %B, %C              ; <i32> [#uses=1]
-        ret i32 %D
-}
-
-define i32 @rotr32_1(i32 %A, i8 %Amt) {
-        %tmp1 = zext i8 %Amt to i32     ; <i32> [#uses=1]
-        %B = lshr i32 %A, %tmp1         ; <i32> [#uses=1]
-        %Amt2 = sub i8 32, %Amt         ; <i8> [#uses=1]
-        %tmp2 = zext i8 %Amt2 to i32    ; <i32> [#uses=1]
-        %C = shl i32 %A, %tmp2          ; <i32> [#uses=1]
-        %D = or i32 %B, %C              ; <i32> [#uses=1]
-        ret i32 %D
-}
-
-define i32 @rotr32_2(i32 %A, i8 %Amt) {
-        %Amt2 = sub i8 32, %Amt         ; <i8> [#uses=1]
-        %tmp1 = zext i8 %Amt to i32     ; <i32> [#uses=1]
-        %B = lshr i32 %A, %tmp1         ; <i32> [#uses=1]
-        %tmp2 = zext i8 %Amt2 to i32    ; <i32> [#uses=1]
-        %C = shl i32 %A, %tmp2          ; <i32> [#uses=1]
-        %D = or i32 %B, %C              ; <i32> [#uses=1]
-        ret i32 %D
-}
-
-; Rotate left with immediate
-define i32 @rotli32(i32 %A) {
-        %B = shl i32 %A, 5              ; <i32> [#uses=1]
-        %C = lshr i32 %A, 27            ; <i32> [#uses=1]
-        %D = or i32 %B, %C              ; <i32> [#uses=1]
-        ret i32 %D
-}
-
-; Rotate right with immediate
-define i32 @rotri32(i32 %A) {
-        %B = lshr i32 %A, 5             ; <i32> [#uses=1]
-        %C = shl i32 %A, 27             ; <i32> [#uses=1]
-        %D = or i32 %B, %C              ; <i32> [#uses=1]
-        ret i32 %D
-}
-
-; 16-bit rotates:
-define i16 @rotr16_1(i16 %arg1, i8 %arg) {
-        %tmp1 = zext i8 %arg to i16             ; <i16> [#uses=1]
-        %B = lshr i16 %arg1, %tmp1              ; <i16> [#uses=1]
-        %arg2 = sub i8 16, %arg                 ; <i8> [#uses=1]
-        %tmp2 = zext i8 %arg2 to i16            ; <i16> [#uses=1]
-        %C = shl i16 %arg1, %tmp2               ; <i16> [#uses=1]
-        %D = or i16 %B, %C                      ; <i16> [#uses=1]
-        ret i16 %D
-}
-
-define i16 @rotr16_2(i16 %arg1, i16 %arg) {
-        %B = lshr i16 %arg1, %arg       ; <i16> [#uses=1]
-        %tmp1 = sub i16 16, %arg        ; <i16> [#uses=1]
-        %C = shl i16 %arg1, %tmp1       ; <i16> [#uses=1]
-        %D = or i16 %B, %C              ; <i16> [#uses=1]
-        ret i16 %D
-}
-
-define i16 @rotli16(i16 %A) {
-        %B = shl i16 %A, 5              ; <i16> [#uses=1]
-        %C = lshr i16 %A, 11            ; <i16> [#uses=1]
-        %D = or i16 %B, %C              ; <i16> [#uses=1]
-        ret i16 %D
-}
-
-define i16 @rotri16(i16 %A) {
-        %B = lshr i16 %A, 5             ; <i16> [#uses=1]
-        %C = shl i16 %A, 11             ; <i16> [#uses=1]
-        %D = or i16 %B, %C              ; <i16> [#uses=1]
-        ret i16 %D
-}
-
-define i8 @rotl8(i8 %A, i8 %Amt) {
-        %B = shl i8 %A, %Amt            ; <i8> [#uses=1]
-        %Amt2 = sub i8 8, %Amt          ; <i8> [#uses=1]
-        %C = lshr i8 %A, %Amt2          ; <i8> [#uses=1]
-        %D = or i8 %B, %C               ; <i8> [#uses=1]
-        ret i8 %D
-}
-
-define i8 @rotr8(i8 %A, i8 %Amt) {
-        %B = lshr i8 %A, %Amt           ; <i8> [#uses=1]
-        %Amt2 = sub i8 8, %Amt          ; <i8> [#uses=1]
-        %C = shl i8 %A, %Amt2           ; <i8> [#uses=1]
-        %D = or i8 %B, %C               ; <i8> [#uses=1]
-        ret i8 %D
-}
-
-define i8 @rotli8(i8 %A) {
-        %B = shl i8 %A, 5               ; <i8> [#uses=1]
-        %C = lshr i8 %A, 3              ; <i8> [#uses=1]
-        %D = or i8 %B, %C               ; <i8> [#uses=1]
-        ret i8 %D
-}
-
-define i8 @rotri8(i8 %A) {
-        %B = lshr i8 %A, 5              ; <i8> [#uses=1]
-        %C = shl i8 %A, 3               ; <i8> [#uses=1]
-        %D = or i8 %B, %C               ; <i8> [#uses=1]
-        ret i8 %D
-}
-
-define <2 x float> @test1(<4 x float> %param )
-{
-; CHECK: test1
-; CHECK: shufb
-  %el = extractelement <4 x float> %param, i32 1
-  %vec1 = insertelement <1 x float> undef, float %el, i32 0
-  %rv = shufflevector <1 x float> %vec1, <1 x float> undef, <2 x i32><i32 0,i32 0>
-; CHECK: bi $lr
-  ret <2 x float> %rv
-} 
diff --git a/test/CodeGen/CellSPU/select_bits.ll b/test/CodeGen/CellSPU/select_bits.ll
deleted file mode 100644
index 65e0aa6fa0b0..000000000000
--- a/test/CodeGen/CellSPU/select_bits.ll
+++ /dev/null
@@ -1,572 +0,0 @@
-; RUN: llc < %s -march=cellspu > %t1.s
-; RUN: grep selb   %t1.s | count 56
-
-; CellSPU legalization is over-sensitive to Legalize's traversal order.
-; XFAIL: *
-
-target datalayout = "E-p:32:32:128-f64:64:128-f32:32:128-i64:32:128-i32:32:128-i16:16:128-i8:8:128-i1:8:128-a0:0:128-v128:128:128-s0:128:128"
-target triple = "spu"
-
-;-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
-; v2i64
-;-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
-
-; (or (and rC, rB), (and (not rC), rA))
-define <2 x i64> @selectbits_v2i64_01(<2 x i64> %rA, <2 x i64> %rB, <2 x i64> %rC) {
-        %C = and <2 x i64> %rC, %rB
-        %A = xor <2 x i64> %rC, < i64 -1, i64 -1 >
-        %B = and <2 x i64> %A, %rA
-        %D = or <2 x i64> %C, %B
-        ret <2 x i64> %D
-}
-
-; (or (and rB, rC), (and (not rC), rA))
-define <2 x i64> @selectbits_v2i64_02(<2 x i64> %rA, <2 x i64> %rB, <2 x i64> %rC) {
-        %C = and <2 x i64> %rB, %rC
-        %A = xor <2 x i64> %rC, < i64 -1, i64 -1 >
-        %B = and <2 x i64> %A, %rA
-        %D = or <2 x i64> %C, %B
-        ret <2 x i64> %D
-}
-
-; (or (and (not rC), rA), (and rB, rC))
-define <2 x i64> @selectbits_v2i64_03(<2 x i64> %rA, <2 x i64> %rB, <2 x i64> %rC) {
-        %A = xor <2 x i64> %rC, < i64 -1, i64 -1 >
-        %B = and <2 x i64> %A, %rA
-        %C = and <2 x i64> %rB, %rC
-        %D = or <2 x i64> %C, %B
-        ret <2 x i64> %D
-}
-
-; (or (and (not rC), rA), (and rC, rB))
-define <2 x i64> @selectbits_v2i64_04(<2 x i64> %rA, <2 x i64> %rB, <2 x i64> %rC) {
-        %A = xor <2 x i64> %rC, < i64 -1, i64 -1 >
-        %B = and <2 x i64> %A, %rA
-        %C = and <2 x i64> %rC, %rB
-        %D = or <2 x i64> %C, %B
-        ret <2 x i64> %D
-}
-
-; (or (and rC, rB), (and rA, (not rC)))
-define <2 x i64> @selectbits_v2i64_05(<2 x i64> %rA, <2 x i64> %rB, <2 x i64> %rC) {
-        %C = and <2 x i64> %rC, %rB
-        %A = xor <2 x i64> %rC, < i64 -1, i64 -1 >
-        %B = and <2 x i64> %rA, %A
-        %D = or <2 x i64> %C, %B
-        ret <2 x i64> %D
-}
-
-; (or (and rB, rC), (and rA, (not rC)))
-define <2 x i64> @selectbits_v2i64_06(<2 x i64> %rA, <2 x i64> %rB, <2 x i64> %rC) {
-        %C = and <2 x i64> %rB, %rC
-        %A = xor <2 x i64> %rC, < i64 -1, i64 -1 >
-        %B = and <2 x i64> %rA, %A
-        %D = or <2 x i64> %C, %B
-        ret <2 x i64> %D
-}
-
-; (or (and rA, (not rC)), (and rB, rC))
-define <2 x i64> @selectbits_v2i64_07(<2 x i64> %rA, <2 x i64> %rB, <2 x i64> %rC) {
-        %A = xor <2 x i64> %rC, < i64 -1, i64 -1 >
-        %B = and <2 x i64> %rA, %A
-        %C = and <2 x i64> %rB, %rC
-        %D = or <2 x i64> %C, %B
-        ret <2 x i64> %D
-}
-
-; (or (and rA, (not rC)), (and rC, rB))
-define <2 x i64> @selectbits_v2i64_08(<2 x i64> %rA, <2 x i64> %rB, <2 x i64> %rC) {
-        %A = xor <2 x i64> %rC, < i64 -1, i64 -1 >
-        %B = and <2 x i64> %rA, %A
-        %C = and <2 x i64> %rC, %rB
-        %D = or <2 x i64> %C, %B
-        ret <2 x i64> %D
-}
-
-;-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
-; v4i32
-;-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
-
-; (or (and rC, rB), (and (not rC), rA))
-define <4 x i32> @selectbits_v4i32_01(<4 x i32> %rA, <4 x i32> %rB, <4 x i32> %rC) {
-        %C = and <4 x i32> %rC, %rB
-        %A = xor <4 x i32> %rC, < i32 -1, i32 -1, i32 -1, i32 -1 >
-        %B = and <4 x i32> %A, %rA
-        %D = or <4 x i32> %C, %B
-        ret <4 x i32> %D
-}
-
-; (or (and rB, rC), (and (not rC), rA))
-define <4 x i32> @selectbits_v4i32_02(<4 x i32> %rA, <4 x i32> %rB, <4 x i32> %rC) {
-        %C = and <4 x i32> %rB, %rC
-        %A = xor <4 x i32> %rC, < i32 -1, i32 -1, i32 -1, i32 -1 >
-        %B = and <4 x i32> %A, %rA
-        %D = or <4 x i32> %C, %B
-        ret <4 x i32> %D
-}
-
-; (or (and (not rC), rA), (and rB, rC))
-define <4 x i32> @selectbits_v4i32_03(<4 x i32> %rA, <4 x i32> %rB, <4 x i32> %rC) {
-        %A = xor <4 x i32> %rC, < i32 -1, i32 -1, i32 -1, i32 -1 >
-        %B = and <4 x i32> %A, %rA
-        %C = and <4 x i32> %rB, %rC
-        %D = or <4 x i32> %C, %B
-        ret <4 x i32> %D
-}
-
-; (or (and (not rC), rA), (and rC, rB))
-define <4 x i32> @selectbits_v4i32_04(<4 x i32> %rA, <4 x i32> %rB, <4 x i32> %rC) {
-        %A = xor <4 x i32> %rC, < i32 -1, i32 -1, i32 -1, i32 -1>
-        %B = and <4 x i32> %A, %rA
-        %C = and <4 x i32> %rC, %rB
-        %D = or <4 x i32> %C, %B
-        ret <4 x i32> %D
-}
-
-; (or (and rC, rB), (and rA, (not rC)))
-define <4 x i32> @selectbits_v4i32_05(<4 x i32> %rA, <4 x i32> %rB, <4 x i32> %rC) {
-        %C = and <4 x i32> %rC, %rB
-        %A = xor <4 x i32> %rC, < i32 -1, i32 -1, i32 -1, i32 -1>
-        %B = and <4 x i32> %rA, %A
-        %D = or <4 x i32> %C, %B
-        ret <4 x i32> %D
-}
-
-; (or (and rB, rC), (and rA, (not rC)))
-define <4 x i32> @selectbits_v4i32_06(<4 x i32> %rA, <4 x i32> %rB, <4 x i32> %rC) {
-        %C = and <4 x i32> %rB, %rC
-        %A = xor <4 x i32> %rC, < i32 -1, i32 -1, i32 -1, i32 -1>
-        %B = and <4 x i32> %rA, %A
-        %D = or <4 x i32> %C, %B
-        ret <4 x i32> %D
-}
-
-; (or (and rA, (not rC)), (and rB, rC))
-define <4 x i32> @selectbits_v4i32_07(<4 x i32> %rA, <4 x i32> %rB, <4 x i32> %rC) {
-        %A = xor <4 x i32> %rC, < i32 -1, i32 -1, i32 -1, i32 -1>
-        %B = and <4 x i32> %rA, %A
-        %C = and <4 x i32> %rB, %rC
-        %D = or <4 x i32> %C, %B
-        ret <4 x i32> %D
-}
-
-; (or (and rA, (not rC)), (and rC, rB))
-define <4 x i32> @selectbits_v4i32_08(<4 x i32> %rA, <4 x i32> %rB, <4 x i32> %rC) {
-        %A = xor <4 x i32> %rC, < i32 -1, i32 -1, i32 -1, i32 -1>
-        %B = and <4 x i32> %rA, %A
-        %C = and <4 x i32> %rC, %rB
-        %D = or <4 x i32> %C, %B
-        ret <4 x i32> %D
-}
-
-;-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
-; v8i16
-;-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
-
-; (or (and rC, rB), (and (not rC), rA))
-define <8 x i16> @selectbits_v8i16_01(<8 x i16> %rA, <8 x i16> %rB, <8 x i16> %rC) {
-        %C = and <8 x i16> %rC, %rB
-        %A = xor <8 x i16> %rC, < i16 -1, i16 -1, i16 -1, i16 -1,
-                                  i16 -1, i16 -1, i16 -1, i16 -1 >
-        %B = and <8 x i16> %A, %rA
-        %D = or <8 x i16> %C, %B
-        ret <8 x i16> %D
-}
-
-; (or (and rB, rC), (and (not rC), rA))
-define <8 x i16> @selectbits_v8i16_02(<8 x i16> %rA, <8 x i16> %rB, <8 x i16> %rC) {
-        %C = and <8 x i16> %rB, %rC
-        %A = xor <8 x i16> %rC, < i16 -1, i16 -1, i16 -1, i16 -1,
-                                  i16 -1, i16 -1, i16 -1, i16 -1 >
-        %B = and <8 x i16> %A, %rA
-        %D = or <8 x i16> %C, %B
-        ret <8 x i16> %D
-}
-
-; (or (and (not rC), rA), (and rB, rC))
-define <8 x i16> @selectbits_v8i16_03(<8 x i16> %rA, <8 x i16> %rB, <8 x i16> %rC) {
-        %A = xor <8 x i16> %rC, < i16 -1, i16 -1, i16 -1, i16 -1,
-                                  i16 -1, i16 -1, i16 -1, i16 -1 >
-        %B = and <8 x i16> %A, %rA
-        %C = and <8 x i16> %rB, %rC
-        %D = or <8 x i16> %C, %B
-        ret <8 x i16> %D
-}
-
-; (or (and (not rC), rA), (and rC, rB))
-define <8 x i16> @selectbits_v8i16_04(<8 x i16> %rA, <8 x i16> %rB, <8 x i16> %rC) {
-        %A = xor <8 x i16> %rC, < i16 -1, i16 -1, i16 -1, i16 -1,
-                                  i16 -1, i16 -1, i16 -1, i16 -1 >
-        %B = and <8 x i16> %A, %rA
-        %C = and <8 x i16> %rC, %rB
-        %D = or <8 x i16> %C, %B
-        ret <8 x i16> %D
-}
-
-; (or (and rC, rB), (and rA, (not rC)))
-define <8 x i16> @selectbits_v8i16_05(<8 x i16> %rA, <8 x i16> %rB, <8 x i16> %rC) {
-        %C = and <8 x i16> %rC, %rB
-        %A = xor <8 x i16> %rC, < i16 -1, i16 -1, i16 -1, i16 -1,
-                                  i16 -1, i16 -1, i16 -1, i16 -1 >
-        %B = and <8 x i16> %rA, %A
-        %D = or <8 x i16> %C, %B
-        ret <8 x i16> %D
-}
-
-; (or (and rB, rC), (and rA, (not rC)))
-define <8 x i16> @selectbits_v8i16_06(<8 x i16> %rA, <8 x i16> %rB, <8 x i16> %rC) {
-        %C = and <8 x i16> %rB, %rC
-        %A = xor <8 x i16> %rC, < i16 -1, i16 -1, i16 -1, i16 -1,
-                                  i16 -1, i16 -1, i16 -1, i16 -1 >
-        %B = and <8 x i16> %rA, %A
-        %D = or <8 x i16> %C, %B
-        ret <8 x i16> %D
-}
-
-; (or (and rA, (not rC)), (and rB, rC))
-define <8 x i16> @selectbits_v8i16_07(<8 x i16> %rA, <8 x i16> %rB, <8 x i16> %rC) {
-        %A = xor <8 x i16> %rC, < i16 -1, i16 -1, i16 -1, i16 -1,
-                                  i16 -1, i16 -1, i16 -1, i16 -1 >
-        %B = and <8 x i16> %rA, %A
-        %C = and <8 x i16> %rB, %rC
-        %D = or <8 x i16> %C, %B
-        ret <8 x i16> %D
-}
-
-; (or (and rA, (not rC)), (and rC, rB))
-define <8 x i16> @selectbits_v8i16_08(<8 x i16> %rA, <8 x i16> %rB, <8 x i16> %rC) {
-        %A = xor <8 x i16> %rC, < i16 -1, i16 -1, i16 -1, i16 -1,
-                                  i16 -1, i16 -1, i16 -1, i16 -1 >
-        %B = and <8 x i16> %rA, %A
-        %C = and <8 x i16> %rC, %rB
-        %D = or <8 x i16> %C, %B
-        ret <8 x i16> %D
-}
-
-;-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
-; v16i8
-;-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
-
-; (or (and rC, rB), (and (not rC), rA))
-define <16 x i8> @selectbits_v16i8_01(<16 x i8> %rA, <16 x i8> %rB, <16 x i8> %rC) {
-        %C = and <16 x i8> %rC, %rB
-        %A = xor <16 x i8> %rC, < i8 -1, i8 -1, i8 -1, i8 -1,
-                                  i8 -1, i8 -1, i8 -1, i8 -1,
-                                  i8 -1, i8 -1, i8 -1, i8 -1,
-                                  i8 -1, i8 -1, i8 -1, i8 -1 >
-        %B = and <16 x i8> %A, %rA
-        %D = or <16 x i8> %C, %B
-        ret <16 x i8> %D
-}
-
-; (or (and rB, rC), (and (not rC), rA))
-define <16 x i8> @selectbits_v16i8_02(<16 x i8> %rA, <16 x i8> %rB, <16 x i8> %rC) {
-        %C = and <16 x i8> %rB, %rC
-        %A = xor <16 x i8> %rC, < i8 -1, i8 -1, i8 -1, i8 -1,
-                                  i8 -1, i8 -1, i8 -1, i8 -1,
-                                  i8 -1, i8 -1, i8 -1, i8 -1,
-                                  i8 -1, i8 -1, i8 -1, i8 -1 >
-        %B = and <16 x i8> %A, %rA
-        %D = or <16 x i8> %C, %B
-        ret <16 x i8> %D
-}
-
-; (or (and (not rC), rA), (and rB, rC))
-define <16 x i8> @selectbits_v16i8_03(<16 x i8> %rA, <16 x i8> %rB, <16 x i8> %rC) {
-        %A = xor <16 x i8> %rC, < i8 -1, i8 -1, i8 -1, i8 -1,
-                                  i8 -1, i8 -1, i8 -1, i8 -1,
-                                  i8 -1, i8 -1, i8 -1, i8 -1,
-                                  i8 -1, i8 -1, i8 -1, i8 -1 >
-        %B = and <16 x i8> %A, %rA
-        %C = and <16 x i8> %rB, %rC
-        %D = or <16 x i8> %C, %B
-        ret <16 x i8> %D
-}
-
-; (or (and (not rC), rA), (and rC, rB))
-define <16 x i8> @selectbits_v16i8_04(<16 x i8> %rA, <16 x i8> %rB, <16 x i8> %rC) {
-        %A = xor <16 x i8> %rC, < i8 -1, i8 -1, i8 -1, i8 -1,
-                                  i8 -1, i8 -1, i8 -1, i8 -1,
-                                  i8 -1, i8 -1, i8 -1, i8 -1,
-                                  i8 -1, i8 -1, i8 -1, i8 -1 >
-        %B = and <16 x i8> %A, %rA
-        %C = and <16 x i8> %rC, %rB
-        %D = or <16 x i8> %C, %B
-        ret <16 x i8> %D
-}
-
-; (or (and rC, rB), (and rA, (not rC)))
-define <16 x i8> @selectbits_v16i8_05(<16 x i8> %rA, <16 x i8> %rB, <16 x i8> %rC) {
-        %C = and <16 x i8> %rC, %rB
-        %A = xor <16 x i8> %rC, < i8 -1, i8 -1, i8 -1, i8 -1,
-                                  i8 -1, i8 -1, i8 -1, i8 -1,
-                                  i8 -1, i8 -1, i8 -1, i8 -1,
-                                  i8 -1, i8 -1, i8 -1, i8 -1 >
-        %B = and <16 x i8> %rA, %A
-        %D = or <16 x i8> %C, %B
-        ret <16 x i8> %D
-}
-
-; (or (and rB, rC), (and rA, (not rC)))
-define <16 x i8> @selectbits_v16i8_06(<16 x i8> %rA, <16 x i8> %rB, <16 x i8> %rC) {
-        %C = and <16 x i8> %rB, %rC
-        %A = xor <16 x i8> %rC, < i8 -1, i8 -1, i8 -1, i8 -1,
-                                  i8 -1, i8 -1, i8 -1, i8 -1,
-                                  i8 -1, i8 -1, i8 -1, i8 -1,
-                                  i8 -1, i8 -1, i8 -1, i8 -1 >
-        %B = and <16 x i8> %rA, %A
-        %D = or <16 x i8> %C, %B
-        ret <16 x i8> %D
-}
-
-; (or (and rA, (not rC)), (and rB, rC))
-define <16 x i8> @selectbits_v16i8_07(<16 x i8> %rA, <16 x i8> %rB, <16 x i8> %rC) {
-        %A = xor <16 x i8> %rC, < i8 -1, i8 -1, i8 -1, i8 -1,
-                                  i8 -1, i8 -1, i8 -1, i8 -1,
-                                  i8 -1, i8 -1, i8 -1, i8 -1,
-                                  i8 -1, i8 -1, i8 -1, i8 -1 >
-        %B = and <16 x i8> %rA, %A
-        %C = and <16 x i8> %rB, %rC
-        %D = or <16 x i8> %C, %B
-        ret <16 x i8> %D
-}
-
-; (or (and rA, (not rC)), (and rC, rB))
-define <16 x i8> @selectbits_v16i8_08(<16 x i8> %rA, <16 x i8> %rB, <16 x i8> %rC) {
-        %A = xor <16 x i8> %rC, < i8 -1, i8 -1, i8 -1, i8 -1,
-                                  i8 -1, i8 -1, i8 -1, i8 -1,
-                                  i8 -1, i8 -1, i8 -1, i8 -1,
-                                  i8 -1, i8 -1, i8 -1, i8 -1 >
-        %B = and <16 x i8> %rA, %A
-        %C = and <16 x i8> %rC, %rB
-        %D = or <16 x i8> %C, %B
-        ret <16 x i8> %D
-}
-
-;-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
-; i32
-;-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
-
-; (or (and rC, rB), (and (not rC), rA))
-define i32 @selectbits_i32_01(i32 %rA, i32 %rB, i32 %rC) {
-        %C = and i32 %rC, %rB
-        %A = xor i32 %rC, -1
-        %B = and i32 %A, %rA
-        %D = or i32 %C, %B
-        ret i32 %D
-}
-
-; (or (and rB, rC), (and (not rC), rA))
-define i32 @selectbits_i32_02(i32 %rA, i32 %rB, i32 %rC) {
-        %C = and i32 %rB, %rC
-        %A = xor i32 %rC, -1
-        %B = and i32 %A, %rA
-        %D = or i32 %C, %B
-        ret i32 %D
-}
-
-; (or (and (not rC), rA), (and rB, rC))
-define i32 @selectbits_i32_03(i32 %rA, i32 %rB, i32 %rC) {
-        %A = xor i32 %rC, -1
-        %B = and i32 %A, %rA
-        %C = and i32 %rB, %rC
-        %D = or i32 %C, %B
-        ret i32 %D
-}
-
-; (or (and (not rC), rA), (and rC, rB))
-define i32 @selectbits_i32_04(i32 %rA, i32 %rB, i32 %rC) {
-        %A = xor i32 %rC, -1
-        %B = and i32 %A, %rA
-        %C = and i32 %rC, %rB
-        %D = or i32 %C, %B
-        ret i32 %D
-}
-
-; (or (and rC, rB), (and rA, (not rC)))
-define i32 @selectbits_i32_05(i32 %rA, i32 %rB, i32 %rC) {
-        %C = and i32 %rC, %rB
-        %A = xor i32 %rC, -1
-        %B = and i32 %rA, %A
-        %D = or i32 %C, %B
-        ret i32 %D
-}
-
-; (or (and rB, rC), (and rA, (not rC)))
-define i32 @selectbits_i32_06(i32 %rA, i32 %rB, i32 %rC) {
-        %C = and i32 %rB, %rC
-        %A = xor i32 %rC, -1
-        %B = and i32 %rA, %A
-        %D = or i32 %C, %B
-        ret i32 %D
-}
-
-; (or (and rA, (not rC)), (and rB, rC))
-define i32 @selectbits_i32_07(i32 %rA, i32 %rB, i32 %rC) {
-        %A = xor i32 %rC, -1
-        %B = and i32 %rA, %A
-        %C = and i32 %rB, %rC
-        %D = or i32 %C, %B
-        ret i32 %D
-}
-
-; (or (and rA, (not rC)), (and rC, rB))
-define i32 @selectbits_i32_08(i32 %rA, i32 %rB, i32 %rC) {
-        %A = xor i32 %rC, -1
-        %B = and i32 %rA, %A
-        %C = and i32 %rC, %rB
-        %D = or i32 %C, %B
-        ret i32 %D
-}
-
-;-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
-; i16
-;-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
-
-; (or (and rC, rB), (and (not rC), rA))
-define i16 @selectbits_i16_01(i16 %rA, i16 %rB, i16 %rC) {
-        %C = and i16 %rC, %rB
-        %A = xor i16 %rC, -1
-        %B = and i16 %A, %rA
-        %D = or i16 %C, %B
-        ret i16 %D
-}
-
-; (or (and rB, rC), (and (not rC), rA))
-define i16 @selectbits_i16_02(i16 %rA, i16 %rB, i16 %rC) {
-        %C = and i16 %rB, %rC
-        %A = xor i16 %rC, -1
-        %B = and i16 %A, %rA
-        %D = or i16 %C, %B
-        ret i16 %D
-}
-
-; (or (and (not rC), rA), (and rB, rC))
-define i16 @selectbits_i16_03(i16 %rA, i16 %rB, i16 %rC) {
-        %A = xor i16 %rC, -1
-        %B = and i16 %A, %rA
-        %C = and i16 %rB, %rC
-        %D = or i16 %C, %B
-        ret i16 %D
-}
-
-; (or (and (not rC), rA), (and rC, rB))
-define i16 @selectbits_i16_04(i16 %rA, i16 %rB, i16 %rC) {
-        %A = xor i16 %rC, -1
-        %B = and i16 %A, %rA
-        %C = and i16 %rC, %rB
-        %D = or i16 %C, %B
-        ret i16 %D
-}
-
-; (or (and rC, rB), (and rA, (not rC)))
-define i16 @selectbits_i16_05(i16 %rA, i16 %rB, i16 %rC) {
-        %C = and i16 %rC, %rB
-        %A = xor i16 %rC, -1
-        %B = and i16 %rA, %A
-        %D = or i16 %C, %B
-        ret i16 %D
-}
-
-; (or (and rB, rC), (and rA, (not rC)))
-define i16 @selectbits_i16_06(i16 %rA, i16 %rB, i16 %rC) {
-        %C = and i16 %rB, %rC
-        %A = xor i16 %rC, -1
-        %B = and i16 %rA, %A
-        %D = or i16 %C, %B
-        ret i16 %D
-}
-
-; (or (and rA, (not rC)), (and rB, rC))
-define i16 @selectbits_i16_07(i16 %rA, i16 %rB, i16 %rC) {
-        %A = xor i16 %rC, -1
-        %B = and i16 %rA, %A
-        %C = and i16 %rB, %rC
-        %D = or i16 %C, %B
-        ret i16 %D
-}
-
-; (or (and rA, (not rC)), (and rC, rB))
-define i16 @selectbits_i16_08(i16 %rA, i16 %rB, i16 %rC) {
-        %A = xor i16 %rC, -1
-        %B = and i16 %rA, %A
-        %C = and i16 %rC, %rB
-        %D = or i16 %C, %B
-        ret i16 %D
-}
-
-;-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
-; i8
-;-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
-
-; (or (and rC, rB), (and (not rC), rA))
-define i8 @selectbits_i8_01(i8 %rA, i8 %rB, i8 %rC) {
-        %C = and i8 %rC, %rB
-        %A = xor i8 %rC, -1
-        %B = and i8 %A, %rA
-        %D = or i8 %C, %B
-        ret i8 %D
-}
-
-; (or (and rB, rC), (and (not rC), rA))
-define i8 @selectbits_i8_02(i8 %rA, i8 %rB, i8 %rC) {
-        %C = and i8 %rB, %rC
-        %A = xor i8 %rC, -1
-        %B = and i8 %A, %rA
-        %D = or i8 %C, %B
-        ret i8 %D
-}
-
-; (or (and (not rC), rA), (and rB, rC))
-define i8 @selectbits_i8_03(i8 %rA, i8 %rB, i8 %rC) {
-        %A = xor i8 %rC, -1
-        %B = and i8 %A, %rA
-        %C = and i8 %rB, %rC
-        %D = or i8 %C, %B
-        ret i8 %D
-}
-
-; (or (and (not rC), rA), (and rC, rB))
-define i8 @selectbits_i8_04(i8 %rA, i8 %rB, i8 %rC) {
-        %A = xor i8 %rC, -1
-        %B = and i8 %A, %rA
-        %C = and i8 %rC, %rB
-        %D = or i8 %C, %B
-        ret i8 %D
-}
-
-; (or (and rC, rB), (and rA, (not rC)))
-define i8 @selectbits_i8_05(i8 %rA, i8 %rB, i8 %rC) {
-        %C = and i8 %rC, %rB
-        %A = xor i8 %rC, -1
-        %B = and i8 %rA, %A
-        %D = or i8 %C, %B
-        ret i8 %D
-}
-
-; (or (and rB, rC), (and rA, (not rC)))
-define i8 @selectbits_i8_06(i8 %rA, i8 %rB, i8 %rC) {
-        %C = and i8 %rB, %rC
-        %A = xor i8 %rC, -1
-        %B = and i8 %rA, %A
-        %D = or i8 %C, %B
-        ret i8 %D
-}
-
-; (or (and rA, (not rC)), (and rB, rC))
-define i8 @selectbits_i8_07(i8 %rA, i8 %rB, i8 %rC) {
-        %A = xor i8 %rC, -1
-        %B = and i8 %rA, %A
-        %C = and i8 %rB, %rC
-        %D = or i8 %C, %B
-        ret i8 %D
-}
-
-; (or (and rA, (not rC)), (and rC, rB))
-define i8 @selectbits_i8_08(i8 %rA, i8 %rB, i8 %rC) {
-        %A = xor i8 %rC, -1
-        %B = and i8 %rA, %A
-        %C = and i8 %rC, %rB
-        %D = or i8 %C, %B
-        ret i8 %D
-}
diff --git a/test/CodeGen/CellSPU/sext128.ll b/test/CodeGen/CellSPU/sext128.ll
deleted file mode 100644
index 6ae9aa51202f..000000000000
--- a/test/CodeGen/CellSPU/sext128.ll
+++ /dev/null
@@ -1,71 +0,0 @@
-; RUN: llc < %s -march=cellspu | FileCheck %s 
-
-; ModuleID = 'sext128.bc'
-target datalayout = "E-p:32:32:128-i1:8:128-i8:8:128-i16:16:128-i32:32:128-i64:32:128-f32:32:128-f64:64:128-v64:128:128-v128:128:128-a0:0:128-s0:128:128"
-target triple = "spu"
-
-define i128 @sext_i64_i128(i64 %a) {
-entry:
-        %0 = sext i64 %a to i128
-        ret i128 %0
-; CHECK: 	long	269488144
-; CHECK: 	long	269488144
-; CHECK:	long	66051
-; CHECK: 	long	67438087
-; CHECK-NOT: rotqmbyi
-; CHECK:	lqa
-; CHECK: 	rotmai
-; CHECK:	shufb
-}
-
-define i128 @sext_i32_i128(i32 %a) {
-entry:
-        %0 = sext i32 %a to i128
-        ret i128 %0
-; CHECK: 	long	269488144
-; CHECK: 	long	269488144
-; CHECK: 	long	269488144
-; CHECK:	long	66051
-; CHECK-NOT: rotqmbyi
-; CHECK:	lqa
-; CHECK: 	rotmai
-; CHECK:	shufb
-}
-
-define i128 @sext_i32_i128a(float %a) {
-entry:
-  %0 = call i32 @myfunc(float %a)
-  %1 = sext i32 %0 to i128
-  ret i128 %1
-; CHECK: 	long	269488144
-; CHECK: 	long	269488144
-; CHECK: 	long	269488144
-; CHECK:	long	66051
-; CHECK-NOT: rotqmbyi
-; CHECK:	lqa
-; CHECK: 	rotmai
-; CHECK:	shufb
-}
-
-declare i32 @myfunc(float)
-
-define i128 @func1(i8 %u) {
-entry:
-; CHECK: xsbh
-; CHECK: xshw
-; CHECK: rotmai
-; CHECK: shufb
-; CHECK: bi $lr
-      %0 = sext i8 %u to i128
-      ret i128 %0
-}
-
-define i128 @func2(i16 %u) {
-entry:
-; CHECK: xshw
-; CHECK: rotmai
-; CHECK: shufb
-; CHECK: bi $lr
-      %0 = sext i16 %u to i128
-      ret i128 %0
-}
diff --git a/test/CodeGen/CellSPU/shift_ops.ll b/test/CodeGen/CellSPU/shift_ops.ll
deleted file mode 100644
index 1ccc356dcf5a..000000000000
--- a/test/CodeGen/CellSPU/shift_ops.ll
+++ /dev/null
@@ -1,348 +0,0 @@
-; RUN: llc < %s -march=cellspu > %t1.s
-; RUN: grep "shlh	"  %t1.s | count 10
-; RUN: grep "shlhi	"  %t1.s | count 3
-; RUN: grep "shl	"  %t1.s | count 10
-; RUN: grep "shli	"  %t1.s | count 3
-; RUN: grep "xshw	"  %t1.s | count 5
-; RUN: grep "and	"  %t1.s | count 15
-; RUN: grep "andi	"  %t1.s | count 4
-; RUN: grep "rotmi	"  %t1.s | count 4
-; RUN: grep "rotqmbyi	"  %t1.s | count 1
-; RUN: grep "rotqmbii	"  %t1.s | count 2
-; RUN: grep "rotqmby	"  %t1.s | count 1
-; RUN: grep "rotqmbi	"  %t1.s | count 2
-; RUN: grep "rotqbyi	"  %t1.s | count 1
-; RUN: grep "rotqbii	"  %t1.s | count 2
-; RUN: grep "rotqbybi	"  %t1.s | count 1
-; RUN: grep "sfi	"  %t1.s | count 6
-; RUN: cat %t1.s | FileCheck %s
-
-target datalayout = "E-p:32:32:128-f64:64:128-f32:32:128-i64:32:128-i32:32:128-i16:16:128-i8:8:128-i1:8:128-a0:0:128-v128:128:128-s0:128:128"
-target triple = "spu"
-
-; Shift left i16 via register, note that the second operand to shl is promoted
-; to a 32-bit type:
-
-define i16 @shlh_i16_1(i16 %arg1, i16 %arg2) {
-        %A = shl i16 %arg1, %arg2
-        ret i16 %A
-}
-
-define i16 @shlh_i16_2(i16 %arg1, i16 %arg2) {
-        %A = shl i16 %arg2, %arg1
-        ret i16 %A
-}
-
-define signext i16 @shlh_i16_3(i16 signext %arg1, i16 signext %arg2) {
-        %A = shl i16 %arg1, %arg2
-        ret i16 %A
-}
-
-define signext i16 @shlh_i16_4(i16 signext %arg1, i16 signext %arg2) {
-        %A = shl i16 %arg2, %arg1
-        ret i16 %A
-}
-
-define zeroext i16 @shlh_i16_5(i16 zeroext %arg1, i16 zeroext %arg2)  {
-        %A = shl i16 %arg1, %arg2
-        ret i16 %A
-}
-
-define zeroext i16 @shlh_i16_6(i16 zeroext %arg1, i16 zeroext %arg2) {
-        %A = shl i16 %arg2, %arg1
-        ret i16 %A
-}
-
-; Shift left i16 with immediate:
-define i16 @shlhi_i16_1(i16 %arg1) {
-        %A = shl i16 %arg1, 12
-        ret i16 %A
-}
-
-; Should not generate anything other than the return, arg1 << 0 = arg1
-define i16 @shlhi_i16_2(i16 %arg1) {
-        %A = shl i16 %arg1, 0
-        ret i16 %A
-}
-
-define i16 @shlhi_i16_3(i16 %arg1) {
-        %A = shl i16 16383, %arg1
-        ret i16 %A
-}
-
-; Should generate 0, 0 << arg1 = 0
-define i16 @shlhi_i16_4(i16 %arg1) {
-        %A = shl i16 0, %arg1
-        ret i16 %A
-}
-
-define signext i16 @shlhi_i16_5(i16 signext %arg1)  {
-        %A = shl i16 %arg1, 12
-        ret i16 %A
-}
-
-; Should not generate anything other than the return, arg1 << 0 = arg1
-define signext i16 @shlhi_i16_6(i16 signext %arg1) {
-        %A = shl i16 %arg1, 0
-        ret i16 %A
-}
-
-define signext i16 @shlhi_i16_7(i16 signext %arg1) {
-        %A = shl i16 16383, %arg1
-        ret i16 %A
-}
-
-; Should generate 0, 0 << arg1 = 0
-define signext i16 @shlhi_i16_8(i16 signext %arg1)  {
-        %A = shl i16 0, %arg1
-        ret i16 %A
-}
-
-define zeroext i16 @shlhi_i16_9(i16 zeroext %arg1)  {
-        %A = shl i16 %arg1, 12
-        ret i16 %A
-}
-
-; Should not generate anything other than the return, arg1 << 0 = arg1
-define zeroext i16 @shlhi_i16_10(i16 zeroext %arg1)  {
-        %A = shl i16 %arg1, 0
-        ret i16 %A
-}
-
-define zeroext i16 @shlhi_i16_11(i16 zeroext %arg1)  {
-        %A = shl i16 16383, %arg1
-        ret i16 %A
-}
-
-; Should generate 0, 0 << arg1 = 0
-define zeroext i16 @shlhi_i16_12(i16 zeroext %arg1)  {
-        %A = shl i16 0, %arg1
-        ret i16 %A
-}
-
-; Shift left i32 via register, note that the second operand to shl is promoted
-; to a 32-bit type:
-
-define i32 @shl_i32_1(i32 %arg1, i32 %arg2) {
-        %A = shl i32 %arg1, %arg2
-        ret i32 %A
-}
-
-define i32 @shl_i32_2(i32 %arg1, i32 %arg2) {
-        %A = shl i32 %arg2, %arg1
-        ret i32 %A
-}
-
-define signext i32 @shl_i32_3(i32 signext %arg1, i32 signext %arg2)  {
-        %A = shl i32 %arg1, %arg2
-        ret i32 %A
-}
-
-define signext i32 @shl_i32_4(i32 signext %arg1, i32 signext %arg2)  {
-        %A = shl i32 %arg2, %arg1
-        ret i32 %A
-}
-
-define zeroext i32 @shl_i32_5(i32 zeroext %arg1, i32 zeroext %arg2)  {
-        %A = shl i32 %arg1, %arg2
-        ret i32 %A
-}
-
-define zeroext i32 @shl_i32_6(i32 zeroext %arg1, i32 zeroext %arg2)  {
-        %A = shl i32 %arg2, %arg1
-        ret i32 %A
-}
-
-; Shift left i32 with immediate:
-define i32 @shli_i32_1(i32 %arg1) {
-        %A = shl i32 %arg1, 12
-        ret i32 %A
-}
-
-; Should not generate anything other than the return, arg1 << 0 = arg1
-define i32 @shli_i32_2(i32 %arg1) {
-        %A = shl i32 %arg1, 0
-        ret i32 %A
-}
-
-define i32 @shli_i32_3(i32 %arg1) {
-        %A = shl i32 16383, %arg1
-        ret i32 %A
-}
-
-; Should generate 0, 0 << arg1 = 0
-define i32 @shli_i32_4(i32 %arg1) {
-        %A = shl i32 0, %arg1
-        ret i32 %A
-}
-
-define signext i32 @shli_i32_5(i32 signext %arg1)  {
-        %A = shl i32 %arg1, 12
-        ret i32 %A
-}
-
-; Should not generate anything other than the return, arg1 << 0 = arg1
-define signext i32 @shli_i32_6(i32 signext %arg1) {
-        %A = shl i32 %arg1, 0
-        ret i32 %A
-}
-
-define signext i32 @shli_i32_7(i32 signext %arg1)  {
-        %A = shl i32 16383, %arg1
-        ret i32 %A
-}
-
-; Should generate 0, 0 << arg1 = 0
-define signext i32 @shli_i32_8(i32 signext %arg1) {
-        %A = shl i32 0, %arg1
-        ret i32 %A
-}
-
-define zeroext i32 @shli_i32_9(i32 zeroext %arg1)  {
-        %A = shl i32 %arg1, 12
-        ret i32 %A
-}
-
-; Should not generate anything other than the return, arg1 << 0 = arg1
-define zeroext i32 @shli_i32_10(i32 zeroext %arg1)  {
-        %A = shl i32 %arg1, 0
-        ret i32 %A
-}
-
-define zeroext i32 @shli_i32_11(i32 zeroext %arg1) {
-        %A = shl i32 16383, %arg1
-        ret i32 %A
-}
-
-; Should generate 0, 0 << arg1 = 0
-define zeroext i32 @shli_i32_12(i32 zeroext %arg1) {
-        %A = shl i32 0, %arg1
-        ret i32 %A
-}
-
-;; i64 shift left
-
-define i64 @shl_i64_1(i64 %arg1) {
-	%A = shl i64 %arg1, 9
-	ret i64 %A
-}
-
-define i64 @shl_i64_2(i64 %arg1) {
-	%A = shl i64 %arg1, 3
-	ret i64 %A
-}
-
-define i64 @shl_i64_3(i64 %arg1, i32 %shift) {
-	%1 = zext i32 %shift to i64
-	%2 = shl i64 %arg1, %1
-	ret i64 %2
-}
-
-;; i64 shift right logical (shift 0s from the right)
-
-define i64 @lshr_i64_1(i64 %arg1) {
-	%1 = lshr i64 %arg1, 9
-	ret i64 %1
-}
-
-define i64 @lshr_i64_2(i64 %arg1) {
-	%1 = lshr i64 %arg1, 3
-	ret i64 %1
-}
-
-define i64 @lshr_i64_3(i64 %arg1, i32 %shift) {
-	%1 = zext i32 %shift to i64
-	%2 = lshr i64 %arg1, %1
-	ret i64 %2
-}
-
-;; i64 shift right arithmetic (shift 1s from the right)
-
-define i64 @ashr_i64_1(i64 %arg) {
-	%1 = ashr i64 %arg, 9
-	ret i64 %1
-}
-
-define i64 @ashr_i64_2(i64 %arg) {
-	%1 = ashr i64 %arg, 3
-	ret i64 %1
-}
-
-define i64 @ashr_i64_3(i64 %arg1, i32 %shift) {
-	%1 = zext i32 %shift to i64
-	%2 = ashr i64 %arg1, %1
-	ret i64 %2
-}
-
-define i32 @hi32_i64(i64 %arg) {
-	%1 = lshr i64 %arg, 32
-	%2 = trunc i64 %1 to i32
-	ret i32 %2
-}
-
-; some random tests
-define i128 @test_lshr_i128( i128 %val ) {
- 	;CHECK: test_lshr_i128
-	;CHECK: sfi
-	;CHECK: rotqmbi
-	;CHECK: rotqmbybi
-	;CHECK: bi $lr
-	%rv = lshr i128 %val, 64
-	ret i128 %rv
-}
-
-;Vector shifts
-define <2 x i32> @shl_v2i32(<2 x i32> %val, <2 x i32> %sh) {
-;CHECK: shl
-;CHECK: bi $lr
-	%rv = shl <2 x i32> %val, %sh
-	ret <2 x i32> %rv
-}
-
-define <4 x i32> @shl_v4i32(<4 x i32> %val, <4 x i32> %sh) {
-;CHECK: shl
-;CHECK: bi $lr
-	%rv = shl <4 x i32> %val, %sh
-	ret <4 x i32> %rv
-}
-
-define <8 x i16> @shl_v8i16(<8 x i16> %val, <8 x i16> %sh) {
-;CHECK: shlh
-;CHECK: bi $lr
-	%rv = shl <8 x i16> %val, %sh
-	ret <8 x i16> %rv
-}
-
-define <4 x i32> @lshr_v4i32(<4 x i32> %val, <4 x i32> %sh) {
-;CHECK: rotm
-;CHECK: bi $lr
-	%rv = lshr <4 x i32> %val, %sh
-	ret <4 x i32> %rv
-}
-
-define <8 x i16> @lshr_v8i16(<8 x i16> %val, <8 x i16> %sh) {
-;CHECK: sfhi
-;CHECK: rothm
-;CHECK: bi $lr
-	%rv = lshr <8 x i16> %val, %sh
-	ret <8 x i16> %rv
-}
-
-define <4 x i32> @ashr_v4i32(<4 x i32> %val, <4 x i32> %sh) {
-;CHECK: rotma
-;CHECK: bi $lr
-	%rv = ashr <4 x i32> %val, %sh
-	ret <4 x i32> %rv
-}
-
-define <8 x i16> @ashr_v8i16(<8 x i16> %val, <8 x i16> %sh) {
-;CHECK: sfhi
-;CHECK: rotmah
-;CHECK: bi $lr
-	%rv = ashr <8 x i16> %val, %sh
-	ret <8 x i16> %rv
-}
-
-define <2 x i64> @special_const() {
-  ret <2 x i64> <i64 4294967295, i64 4294967295>
-}
diff --git a/test/CodeGen/CellSPU/shuffles.ll b/test/CodeGen/CellSPU/shuffles.ll
deleted file mode 100644
index 973586bf6cf2..000000000000
--- a/test/CodeGen/CellSPU/shuffles.ll
+++ /dev/null
@@ -1,69 +0,0 @@
-; RUN: llc -O1  --march=cellspu < %s | FileCheck %s
-
-;CHECK: shuffle
-define <4 x float> @shuffle(<4 x float> %param1, <4 x float> %param2) {
-  ; CHECK: cwd {{\$.}}, 0($sp)
-  ; CHECK: shufb {{\$., \$4, \$3, \$.}}
-  %val= shufflevector <4 x float> %param1, <4 x float> %param2, <4 x i32> <i32 4,i32 1,i32 2,i32 3>
-  ret <4 x float> %val
-}
-
-;CHECK: splat
-define <4 x float> @splat(float %param1) {
-  ; CHECK: lqa
-  ; CHECK: shufb $3
-  ; CHECK: bi
-  %vec = insertelement <1 x float> undef, float %param1, i32 0
-  %val= shufflevector <1 x float> %vec, <1 x float> undef, <4 x i32> <i32 0,i32 0,i32 0,i32 0>
-  ret <4 x float> %val  
-}
-
-;CHECK: test_insert
-define void @test_insert( <2 x float>* %ptr, float %val1, float %val2 ) {
-  %sl2_17_tmp1 = insertelement <2 x float> zeroinitializer, float %val1, i32 0
-;CHECK:	lqa	$6,
-;CHECK:	shufb	$4, $4, $5, $6
-  %sl2_17 = insertelement <2 x float> %sl2_17_tmp1, float %val2, i32 1
-
-;CHECK: cdd	$5, 0($3)
-;CHECK: lqd	$6, 0($3)
-;CHECK: shufb	$4, $4, $6, $5
-;CHECK: stqd	$4, 0($3)
-;CHECK:	bi	$lr
-  store <2 x float> %sl2_17, <2 x float>* %ptr
-  ret void 
-}
-
-;CHECK: test_insert_1
-define <4 x float>  @test_insert_1(<4 x float> %vparam, float %eltparam) {
-;CHECK: cwd     $5, 4($sp)
-;CHECK: shufb   $3, $4, $3, $5
-;CHECK: bi      $lr
-  %rv = insertelement <4 x float> %vparam, float %eltparam, i32 1
-  ret <4 x float> %rv
-}
-
-;CHECK: test_v2i32
-define <2 x i32> @test_v2i32(<4 x i32>%vec)
-{
-;CHECK: rotqbyi $3, $3, 4
-;CHECK: bi $lr
-  %rv = shufflevector <4 x i32> %vec, <4 x i32> undef, <2 x i32><i32 1,i32 2>
-  ret <2 x i32> %rv
-}
-
-define <4 x i32> @test_v4i32_rot8(<4 x i32>%vec)
-{
-  %rv = shufflevector <4 x i32> %vec, <4 x i32> undef, 
-        <4 x i32> <i32 2,i32 3,i32 0, i32 1>
-  ret <4 x i32> %rv
-}
-
-;CHECK: test_v4i32_rot4
-define <4 x i32> @test_v4i32_rot4(<4 x i32>%vec)
-{
-  %rv = shufflevector <4 x i32> %vec, <4 x i32> undef, 
-        <4 x i32> <i32 1,i32 2,i32 3, i32 0>
-  ret <4 x i32> %rv
-}
-
diff --git a/test/CodeGen/CellSPU/sp_farith.ll b/test/CodeGen/CellSPU/sp_farith.ll
deleted file mode 100644
index 80bf47ccf5d9..000000000000
--- a/test/CodeGen/CellSPU/sp_farith.ll
+++ /dev/null
@@ -1,90 +0,0 @@
-; RUN: llc < %s -march=cellspu -enable-unsafe-fp-math > %t1.s
-; RUN: grep fa %t1.s | count 2
-; RUN: grep fs %t1.s | count 2
-; RUN: grep fm %t1.s | count 6
-; RUN: grep fma %t1.s | count 2
-; RUN: grep fms %t1.s | count 2
-; RUN: grep fnms %t1.s | count 3
-;
-; This file includes standard floating point arithmetic instructions
-; NOTE fdiv is tested separately since it is a compound operation
-target datalayout = "E-p:32:32:128-f64:64:128-f32:32:128-i64:32:128-i32:32:128-i16:16:128-i8:8:128-i1:8:128-a0:0:128-v128:128:128-s0:128:128"
-target triple = "spu"
-
-define float @fp_add(float %arg1, float %arg2) {
-        %A = fadd float %arg1, %arg2     ; <float> [#uses=1]
-        ret float %A
-}
-
-define <4 x float> @fp_add_vec(<4 x float> %arg1, <4 x float> %arg2) {
-        %A = fadd <4 x float> %arg1, %arg2       ; <<4 x float>> [#uses=1]
-        ret <4 x float> %A
-}
-
-define float @fp_sub(float %arg1, float %arg2) {
-        %A = fsub float %arg1,  %arg2    ; <float> [#uses=1]
-        ret float %A
-}
-
-define <4 x float> @fp_sub_vec(<4 x float> %arg1, <4 x float> %arg2) {
-        %A = fsub <4 x float> %arg1,  %arg2      ; <<4 x float>> [#uses=1]
-        ret <4 x float> %A
-}
-
-define float @fp_mul(float %arg1, float %arg2) {
-        %A = fmul float %arg1,  %arg2    ; <float> [#uses=1]
-        ret float %A
-}
-
-define <4 x float> @fp_mul_vec(<4 x float> %arg1, <4 x float> %arg2) {
-        %A = fmul <4 x float> %arg1,  %arg2      ; <<4 x float>> [#uses=1]
-        ret <4 x float> %A
-}
-
-define float @fp_mul_add(float %arg1, float %arg2, float %arg3) {
-        %A = fmul float %arg1,  %arg2    ; <float> [#uses=1]
-        %B = fadd float %A, %arg3        ; <float> [#uses=1]
-        ret float %B
-}
-
-define <4 x float> @fp_mul_add_vec(<4 x float> %arg1, <4 x float> %arg2, <4 x float> %arg3) {
-        %A = fmul <4 x float> %arg1,  %arg2      ; <<4 x float>> [#uses=1]
-        %B = fadd <4 x float> %A, %arg3  ; <<4 x float>> [#uses=1]
-        ret <4 x float> %B
-}
-
-define float @fp_mul_sub(float %arg1, float %arg2, float %arg3) {
-        %A = fmul float %arg1,  %arg2    ; <float> [#uses=1]
-        %B = fsub float %A, %arg3        ; <float> [#uses=1]
-        ret float %B
-}
-
-define <4 x float> @fp_mul_sub_vec(<4 x float> %arg1, <4 x float> %arg2, <4 x float> %arg3) {
-        %A = fmul <4 x float> %arg1,  %arg2      ; <<4 x float>> [#uses=1]
-        %B = fsub <4 x float> %A, %arg3  ; <<4 x float>> [#uses=1]
-        ret <4 x float> %B
-}
-
-; Test the straightforward way of getting fnms
-; c - a * b
-define float @fp_neg_mul_sub_1(float %arg1, float %arg2, float %arg3) {
-        %A = fmul float %arg1,  %arg2
-        %B = fsub float %arg3, %A
-        ret float %B
-}
-
-; Test another way of getting fnms
-; - ( a *b -c ) = c - a * b
-define float @fp_neg_mul_sub_2(float %arg1, float %arg2, float %arg3) {
-        %A = fmul float %arg1,  %arg2
-        %B = fsub float %A, %arg3
-        %C = fsub float -0.0, %B
-        ret float %C
-}
-
-define <4 x float> @fp_neg_mul_sub_vec(<4 x float> %arg1, <4 x float> %arg2, <4 x float> %arg3) {
-        %A = fmul <4 x float> %arg1,  %arg2
-        %B = fsub <4 x float> %A, %arg3
-        %D = fsub <4 x float> < float -0.0, float -0.0, float -0.0, float -0.0 >, %B
-        ret <4 x float> %D
-}
diff --git a/test/CodeGen/CellSPU/stores.ll b/test/CodeGen/CellSPU/stores.ll
deleted file mode 100644
index 43f8776a3d46..000000000000
--- a/test/CodeGen/CellSPU/stores.ll
+++ /dev/null
@@ -1,181 +0,0 @@
-; RUN: llc < %s -march=cellspu > %t1.s
-; RUN: grep 'stqd.*0($3)'       %t1.s | count 4
-; RUN: grep 'stqd.*16($3)'      %t1.s | count 4
-; RUN: grep 16256               %t1.s | count 2
-; RUN: grep 16384               %t1.s | count 1
-; RUN: grep 771                 %t1.s | count 4
-; RUN: grep 515                 %t1.s | count 2
-; RUN: grep 1799                %t1.s | count 2
-; RUN: grep 1543                %t1.s | count 5
-; RUN: grep 1029                %t1.s | count 3
-; RUN: grep 'shli.*, 4'         %t1.s | count 4
-; RUN: grep stqx                %t1.s | count 4
-; RUN: grep ilhu                %t1.s | count 11
-; RUN: grep iohl                %t1.s | count 8
-; RUN: grep shufb               %t1.s | count 15
-; RUN: grep frds                %t1.s | count 1
-; RUN: llc < %s -march=cellspu | FileCheck %s
-
-; ModuleID = 'stores.bc'
-target datalayout = "E-p:32:32:128-f64:64:128-f32:32:128-i64:32:128-i32:32:128-i16:16:128-i8:8:128-i1:8:128-a0:0:128-v128:128:128-s0:128:128"
-target triple = "spu"
-
-define void @store_v16i8_1(<16 x i8>* %a) nounwind {
-entry:
-	store <16 x i8> < i8 1, i8 2, i8 1, i8 1, i8 1, i8 2, i8 1, i8 1, i8 1, i8 2, i8 1, i8 1, i8 1, i8 2, i8 1, i8 1 >, <16 x i8>* %a
-	ret void
-}
-
-define void @store_v16i8_2(<16 x i8>* %a) nounwind {
-entry:
-	%arrayidx = getelementptr <16 x i8>* %a, i32 1
-	store <16 x i8> < i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2 >, <16 x i8>* %arrayidx
-	ret void
-}
-
-define void @store_v16i8_3(<16 x i8>* %a, i32 %i) nounwind {
-entry:
-        %arrayidx = getelementptr <16 x i8>* %a, i32 %i
-	store <16 x i8> < i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1 >, <16 x i8>* %arrayidx
-        ret void
-}
-
-define void @store_v8i16_1(<8 x i16>* %a) nounwind {
-entry:
-	store <8 x i16> < i16 1, i16 2, i16 1, i16 1, i16 1, i16 2, i16 1, i16 1 >, <8 x i16>* %a
-	ret void
-}
-
-define void @store_v8i16_2(<8 x i16>* %a) nounwind {
-entry:
-	%arrayidx = getelementptr <8 x i16>* %a, i16 1
-	store <8 x i16> < i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2 >, <8 x i16>* %arrayidx
-	ret void
-}
-
-define void @store_v8i16_3(<8 x i16>* %a, i32 %i) nounwind {
-entry:
-        %arrayidx = getelementptr <8 x i16>* %a, i32 %i
-	store <8 x i16> < i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1 >, <8 x i16>* %arrayidx
-        ret void
-}
-
-define void @store_v4i32_1(<4 x i32>* %a) nounwind {
-entry:
-	store <4 x i32> < i32 1, i32 2, i32 1, i32 1 >, <4 x i32>* %a
-	ret void
-}
-
-define void @store_v4i32_2(<4 x i32>* %a) nounwind {
-entry:
-	%arrayidx = getelementptr <4 x i32>* %a, i32 1
-	store <4 x i32> < i32 2, i32 2, i32 2, i32 2 >, <4 x i32>* %arrayidx
-	ret void
-}
-
-define void @store_v4i32_3(<4 x i32>* %a, i32 %i) nounwind {
-entry:
-        %arrayidx = getelementptr <4 x i32>* %a, i32 %i
-        store <4 x i32> < i32 1, i32 1, i32 1, i32 1 >, <4 x i32>* %arrayidx
-        ret void
-}
-
-define void @store_v4f32_1(<4 x float>* %a) nounwind {
-entry:
-	store <4 x float> < float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00 >, <4 x float>* %a
-	ret void
-}
-
-define void @store_v4f32_2(<4 x float>* %a) nounwind {
-entry:
-	%arrayidx = getelementptr <4 x float>* %a, i32 1
-	store <4 x float> < float 2.000000e+00, float 2.000000e+00, float 2.000000e+00, float 2.000000e+00 >, <4 x float>* %arrayidx
-	ret void
-}
-
-define void @store_v4f32_3(<4 x float>* %a, i32 %i) nounwind {
-entry:
-        %arrayidx = getelementptr <4 x float>* %a, i32 %i
-        store <4 x float> < float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00 >, <4 x float>* %arrayidx
-        ret void
-}
-
-; Test truncating stores:
-
-define zeroext i8 @tstore_i16_i8(i16 signext %val, i8* %dest) nounwind {
-entry:
-	%conv = trunc i16 %val to i8
-	store i8 %conv, i8* %dest
-	ret i8 %conv
-}
-
-define zeroext i8 @tstore_i32_i8(i32 %val, i8* %dest) nounwind {
-entry:
-	%conv = trunc i32 %val to i8
-	store i8 %conv, i8* %dest
-	ret i8 %conv
-}
-
-define signext i16 @tstore_i32_i16(i32 %val, i16* %dest) nounwind {
-entry:
-	%conv = trunc i32 %val to i16
-	store i16 %conv, i16* %dest
-	ret i16 %conv
-}
-
-define zeroext i8 @tstore_i64_i8(i64 %val, i8* %dest) nounwind {
-entry:
-	%conv = trunc i64 %val to i8
-	store i8 %conv, i8* %dest
-	ret i8 %conv
-}
-
-define signext i16 @tstore_i64_i16(i64 %val, i16* %dest) nounwind {
-entry:
-	%conv = trunc i64 %val to i16
-	store i16 %conv, i16* %dest
-	ret i16 %conv
-}
-
-define i32 @tstore_i64_i32(i64 %val, i32* %dest) nounwind {
-entry:
-	%conv = trunc i64 %val to i32
-	store i32 %conv, i32* %dest
-	ret i32 %conv
-}
-
-define float @tstore_f64_f32(double %val, float* %dest) nounwind {
-entry:
-	%conv = fptrunc double %val to float
-	store float %conv, float* %dest
-	ret float %conv
-}
-
-;Check stores that might span two 16 byte memory blocks
-define void @store_misaligned( i32 %val, i32* %ptr) {	
-;CHECK: store_misaligned
-;CHECK: lqd
-;CHECK: lqd
-;CHECK: stqd
-;CHECK: stqd
-;CHECK: bi $lr
-	store i32 %val, i32*%ptr, align 2
-	ret void
-}
-
-define void @store_v8( <8 x float> %val, <8 x float>* %ptr )
-{
-;CHECK: stq
-;CHECK: stq
-;CHECK: bi $lr
-	store <8 x float> %val, <8 x float>* %ptr
-	ret void
-}
-
-define void @store_null_vec( <4 x i32> %val ) {
-; FIXME - this is for some reason compiled into a il+stqd, not a sta. 
-;CHECK: stqd
-;CHECK: bi $lr
-	store <4 x i32> %val, <4 x i32>* null
-	ret void
-}
diff --git a/test/CodeGen/CellSPU/storestruct.ll b/test/CodeGen/CellSPU/storestruct.ll
deleted file mode 100644
index 47185e829661..000000000000
--- a/test/CodeGen/CellSPU/storestruct.ll
+++ /dev/null
@@ -1,13 +0,0 @@
-; RUN: llc < %s -march=cellspu | FileCheck %s
-
-%0 = type {i32, i32} 
-@buffer = global [ 72 x %0 ] zeroinitializer
-
-define void@test( ) {
-; Check that there is no illegal "a rt, ra, imm" instruction 
-; CHECK-NOT:	a	 {{\$., \$., 5..}}
-; CHECK:	a	{{\$., \$., \$.}}
-	store %0 {i32 1, i32 2} , 
-                %0* getelementptr ([72 x %0]* @buffer, i32 0, i32 71)
-	ret void
-}
diff --git a/test/CodeGen/CellSPU/struct_1.ll b/test/CodeGen/CellSPU/struct_1.ll
deleted file mode 100644
index 8c3275080c69..000000000000
--- a/test/CodeGen/CellSPU/struct_1.ll
+++ /dev/null
@@ -1,147 +0,0 @@
-; RUN: llc < %s -march=cellspu > %t1.s
-; RUN: llc < %s -march=cellspu -mattr=large_mem > %t2.s
-; RUN: grep lqa     %t1.s | count 5
-; RUN: grep lqd     %t1.s | count 11
-; RUN: grep rotqbyi %t1.s | count 7
-; RUN: grep xshw    %t1.s | count 1
-; RUN: grep andi    %t1.s | count 5
-; RUN: grep cbd     %t1.s | count 3
-; RUN: grep chd     %t1.s | count 1
-; RUN: grep cwd     %t1.s | count 3
-; RUN: grep shufb   %t1.s | count 7
-; RUN: grep stqd    %t1.s | count 7
-; RUN: grep iohl    %t2.s | count 16
-; RUN: grep ilhu    %t2.s | count 16
-; RUN: grep lqd     %t2.s | count 16
-; RUN: grep rotqbyi %t2.s | count 7
-; RUN: grep xshw    %t2.s | count 1
-; RUN: grep andi    %t2.s | count 5
-; RUN: grep cbd     %t2.s | count 3
-; RUN: grep chd     %t2.s | count 1
-; RUN: grep cwd     %t2.s | count 3
-; RUN: grep shufb   %t2.s | count 7
-; RUN: grep stqd    %t2.s | count 7
-
-; CellSPU legalization is over-sensitive to Legalize's traversal order.
-; XFAIL: *
-
-; ModuleID = 'struct_1.bc'
-target datalayout = "E-p:32:32:128-f64:64:128-f32:32:128-i64:32:128-i32:32:128-i16:16:128-i8:8:128-i1:8:128-a0:0:128-v128:128:128-s0:128:128"
-target triple = "spu"
-
-; struct hackstate {
-;   unsigned char c1;   // offset 0 (rotate left by 13 bytes to byte 3)
-;   unsigned char c2;   // offset 1 (rotate left by 14 bytes to byte 3)
-;   unsigned char c3;   // offset 2 (rotate left by 15 bytes to byte 3)
-;   int           i1;   // offset 4 (rotate left by 4 bytes to byte 0)
-;   short         s1;   // offset 8 (rotate left by 6 bytes to byte 2)
-;   int           i2;   // offset 12 [ignored]
-;   unsigned char c4;   // offset 16 [ignored]
-;   unsigned char c5;   // offset 17 [ignored]
-;   unsigned char c6;   // offset 18 (rotate left by 14 bytes to byte 3)
-;   unsigned char c7;   // offset 19 (no rotate, in preferred slot)
-;   int           i3;   // offset 20 [ignored]
-;   int           i4;   // offset 24 [ignored]
-;   int           i5;   // offset 28 [ignored]
-;   int           i6;   // offset 32 (no rotate, in preferred slot)
-; }
-%struct.hackstate = type { i8, i8, i8, i32, i16, i32, i8, i8, i8, i8, i32, i32, i32, i32 }
-
-; struct hackstate state = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }
-@state = global %struct.hackstate zeroinitializer, align 16
-
-define zeroext i8 @get_hackstate_c1()  nounwind  {
-entry:
-        %tmp2 = load i8* getelementptr (%struct.hackstate* @state, i32 0, i32 0), align 16
-        ret i8 %tmp2
-}
-
-define zeroext i8 @get_hackstate_c2()  nounwind  {
-entry:
-        %tmp2 = load i8* getelementptr (%struct.hackstate* @state, i32 0, i32 1), align 16
-        ret i8 %tmp2
-}
-
-define zeroext i8 @get_hackstate_c3()  nounwind  {
-entry:
-        %tmp2 = load i8* getelementptr (%struct.hackstate* @state, i32 0, i32 2), align 16
-        ret i8 %tmp2
-}
-
-define i32 @get_hackstate_i1() nounwind  {
-entry:
-        %tmp2 = load i32* getelementptr (%struct.hackstate* @state, i32 0, i32 3), align 16
-        ret i32 %tmp2
-}
-
-define signext i16 @get_hackstate_s1()  nounwind  {
-entry:
-        %tmp2 = load i16* getelementptr (%struct.hackstate* @state, i32 0, i32 4), align 16
-        ret i16 %tmp2
-}
-
-define zeroext i8 @get_hackstate_c6()  nounwind  {
-entry:
-        %tmp2 = load i8* getelementptr (%struct.hackstate* @state, i32 0, i32 8), align 16
-        ret i8 %tmp2
-}
-
-define zeroext i8 @get_hackstate_c7()  nounwind  {
-entry:
-        %tmp2 = load i8* getelementptr (%struct.hackstate* @state, i32 0, i32 9), align 16
-        ret i8 %tmp2
-}
-
-define i32 @get_hackstate_i3() nounwind  {
-entry:
-        %tmp2 = load i32* getelementptr (%struct.hackstate* @state, i32 0, i32 10), align 16
-        ret i32 %tmp2
-}
-
-define i32 @get_hackstate_i6() nounwind  {
-entry:
-        %tmp2 = load i32* getelementptr (%struct.hackstate* @state, i32 0, i32 13), align 16
-        ret i32 %tmp2
-}
-
-define void @set_hackstate_c1(i8 zeroext  %c) nounwind  {
-entry:
-        store i8 %c, i8* getelementptr (%struct.hackstate* @state, i32 0, i32 0), align 16
-        ret void
-}
-
-define void @set_hackstate_c2(i8 zeroext  %c) nounwind  {
-entry:
-        store i8 %c, i8* getelementptr (%struct.hackstate* @state, i32 0, i32 1), align 16
-        ret void
-}
-
-define void @set_hackstate_c3(i8 zeroext  %c) nounwind  {
-entry:
-        store i8 %c, i8* getelementptr (%struct.hackstate* @state, i32 0, i32 2), align 16
-        ret void
-}
-
-define void @set_hackstate_i1(i32 %i) nounwind  {
-entry:
-        store i32 %i, i32* getelementptr (%struct.hackstate* @state, i32 0, i32 3), align 16
-        ret void
-}
-
-define void @set_hackstate_s1(i16 signext  %s) nounwind  {
-entry:
-        store i16 %s, i16* getelementptr (%struct.hackstate* @state, i32 0, i32 4), align 16
-        ret void
-}
-
-define void @set_hackstate_i3(i32 %i) nounwind  {
-entry:
-        store i32 %i, i32* getelementptr (%struct.hackstate* @state, i32 0, i32 10), align 16
-        ret void
-}
-
-define void @set_hackstate_i6(i32 %i) nounwind  {
-entry:
-        store i32 %i, i32* getelementptr (%struct.hackstate* @state, i32 0, i32 13), align 16
-        ret void
-}
diff --git a/test/CodeGen/CellSPU/sub_ops.ll b/test/CodeGen/CellSPU/sub_ops.ll
deleted file mode 100644
index f0c40d37ce9d..000000000000
--- a/test/CodeGen/CellSPU/sub_ops.ll
+++ /dev/null
@@ -1,26 +0,0 @@
-; RUN: llc < %s -march=cellspu | FileCheck %s
-
-define i32 @subword( i32 %param1, i32 %param2) {
-; Check ordering of registers ret=param1-param2 -> rt=rb-ra
-; CHECK-NOT:	sf	$3, $3, $4
-; CHECK:	sf	$3, $4, $3
-	%1 = sub i32 %param1, %param2
-	ret i32 %1
-}
-
-define i16 @subhword( i16 %param1, i16 %param2) {
-; Check ordering of registers ret=param1-param2 -> rt=rb-ra
-; CHECK-NOT:	sfh	$3, $3, $4
-; CHECK:	sfh	$3, $4, $3
-	%1 = sub i16 %param1, %param2
-	ret i16 %1
-}
-
-define float @subfloat( float %param1, float %param2) {
-; Check ordering of registers ret=param1-param2 -> rt=ra-rb 
-; (yes this is reverse of i32 instruction)
-; CHECK-NOT:	fs	$3, $4, $3 
-; CHECK:	fs	$3, $3, $4
-	%1 = fsub float %param1, %param2
-	ret float %1
-}
diff --git a/test/CodeGen/CellSPU/trunc.ll b/test/CodeGen/CellSPU/trunc.ll
deleted file mode 100644
index e4c8fb49a32c..000000000000
--- a/test/CodeGen/CellSPU/trunc.ll
+++ /dev/null
@@ -1,94 +0,0 @@
-; RUN: llc < %s -march=cellspu > %t1.s
-; RUN: grep shufb   %t1.s | count 19
-; RUN: grep "ilhu.*1799"  %t1.s | count 1
-; RUN: grep "ilhu.*771"  %t1.s | count 2
-; RUN: grep "ilhu.*1543"  %t1.s | count 1
-; RUN: grep "ilhu.*1029"  %t1.s | count 1
-; RUN: grep "ilhu.*515"  %t1.s | count 1
-; RUN: grep "ilhu.*3855"  %t1.s | count 1
-; RUN: grep "ilhu.*3599"  %t1.s | count 1
-; RUN: grep "ilhu.*3085"  %t1.s | count 1
-; RUN: grep "iohl.*3855"  %t1.s | count 1
-; RUN: grep "iohl.*3599"  %t1.s | count 2
-; RUN: grep "iohl.*1543"  %t1.s | count 2
-; RUN: grep "iohl.*771"  %t1.s | count 2
-; RUN: grep "iohl.*515"  %t1.s | count 1
-; RUN: grep "iohl.*1799"  %t1.s | count 1
-; RUN: grep lqa  %t1.s | count 1
-; RUN: grep cbd  %t1.s | count 4
-; RUN: grep chd  %t1.s | count 3
-; RUN: grep cwd  %t1.s | count 1
-; RUN: grep cdd  %t1.s | count 1
-
-; ModuleID = 'trunc.bc'
-target datalayout = "E-p:32:32:128-i1:8:128-i8:8:128-i16:16:128-i32:32:128-i64:32:128-f32:32:128-f64:64:128-v64:64:64-v128:128:128-a0:0:128-s0:128:128"
-target triple = "spu"
-
-define <16 x i8> @trunc_i128_i8(i128 %u, <16 x i8> %v) {
-entry:
-	%0 = trunc i128 %u to i8
-    %tmp1 = insertelement <16 x i8> %v, i8 %0, i32 15 
-    ret <16 x i8> %tmp1
-}
-
-define <8 x i16> @trunc_i128_i16(i128 %u, <8 x i16> %v) {
-entry:
-    %0 = trunc i128 %u to i16
-    %tmp1 = insertelement <8 x i16> %v, i16 %0, i32 8 
-    ret <8 x i16> %tmp1
-}
-
-define <4 x i32> @trunc_i128_i32(i128 %u, <4 x i32> %v) {
-entry:
-    %0 = trunc i128 %u to i32
-    %tmp1 = insertelement <4 x i32> %v, i32 %0, i32 2
-    ret <4 x i32> %tmp1
-}
-
-define <2 x i64> @trunc_i128_i64(i128 %u, <2 x i64> %v) {
-entry:
-    %0 = trunc i128 %u to i64
-    %tmp1 = insertelement <2 x i64> %v, i64 %0, i32 1
-    ret <2 x i64> %tmp1
-}
-
-define <16 x i8> @trunc_i64_i8(i64 %u, <16 x i8> %v) {
-entry:
-    %0 = trunc i64 %u to i8
-    %tmp1 = insertelement <16 x i8> %v, i8 %0, i32 10
-    ret <16 x i8> %tmp1
-}
-
-define <8 x i16> @trunc_i64_i16(i64 %u, <8 x i16> %v) {
-entry:
-    %0 = trunc i64 %u to i16
-    %tmp1 = insertelement <8 x i16> %v, i16 %0, i32 6
-    ret <8 x i16> %tmp1
-}
-
-define i32 @trunc_i64_i32(i64 %u) {
-entry:
-    %0 = trunc i64 %u to i32
-    ret i32 %0
-}
-
-define <16 x i8> @trunc_i32_i8(i32 %u, <16 x i8> %v) {
-entry:
-    %0 = trunc i32 %u to i8
-    %tmp1 = insertelement <16 x i8> %v, i8 %0, i32 7
-    ret <16 x i8> %tmp1
-}
-
-define <8 x i16> @trunc_i32_i16(i32 %u, <8 x i16> %v) {
-entry:
-    %0 = trunc i32 %u to i16
-    %tmp1 = insertelement <8 x i16> %v, i16 %0, i32 3
-    ret <8 x i16> %tmp1
-}
-
-define <16 x i8> @trunc_i16_i8(i16 %u, <16 x i8> %v) {
-entry:
-    %0 = trunc i16 %u to i8
-    %tmp1 = insertelement <16 x i8> %v, i8 %0, i32 5
-    ret <16 x i8> %tmp1
-}
diff --git a/test/CodeGen/CellSPU/useful-harnesses/README.txt b/test/CodeGen/CellSPU/useful-harnesses/README.txt
deleted file mode 100644
index d87b3989e4f7..000000000000
--- a/test/CodeGen/CellSPU/useful-harnesses/README.txt
+++ /dev/null
@@ -1,5 +0,0 @@
-This directory contains code that's not part of the DejaGNU test suite,
-but is generally useful as various test harnesses.
-
-vecoperations.c: Various vector operation sanity checks, e.g., shuffles,
-  8-bit vector add and multiply.
diff --git a/test/CodeGen/CellSPU/useful-harnesses/i32operations.c b/test/CodeGen/CellSPU/useful-harnesses/i32operations.c
deleted file mode 100644
index 12fc30bf65d7..000000000000
--- a/test/CodeGen/CellSPU/useful-harnesses/i32operations.c
+++ /dev/null
@@ -1,69 +0,0 @@
-#include <stdio.h>
-
-typedef unsigned int  		uint32_t;
-typedef int           		int32_t;
-
-const char *boolstring(int val) {
-  return val ? "true" : "false";
-}
-
-int i32_eq(int32_t a, int32_t b) {
-  return (a == b);
-}
-
-int i32_neq(int32_t a, int32_t b) {
-  return (a != b);
-}
-
-int32_t i32_eq_select(int32_t a, int32_t b, int32_t c, int32_t d) {
-  return ((a == b) ? c : d);
-}
-
-int32_t i32_neq_select(int32_t a, int32_t b, int32_t c, int32_t d) {
-  return ((a != b) ? c : d);
-}
-
-struct pred_s {
-  const char *name;
-  int (*predfunc)(int32_t, int32_t);
-  int (*selfunc)(int32_t, int32_t, int32_t, int32_t);
-};
-
-struct pred_s preds[] = {
-  { "eq",  i32_eq,  i32_eq_select },
-  { "neq", i32_neq, i32_neq_select }
-};
-
-int main(void) {
-  int i;
-  int32_t a = 1234567890;
-  int32_t b =  345678901;
-  int32_t c = 1234500000;
-  int32_t d =      10001;
-  int32_t e =      10000;
-
-  printf("a = %12d (0x%08x)\n", a, a);
-  printf("b = %12d (0x%08x)\n", b, b);
-  printf("c = %12d (0x%08x)\n", c, c);
-  printf("d = %12d (0x%08x)\n", d, d);
-  printf("e = %12d (0x%08x)\n", e, e);
-  printf("----------------------------------------\n");
-
-  for (i = 0; i < sizeof(preds)/sizeof(preds[0]); ++i) {
-    printf("a %s a = %s\n", preds[i].name, boolstring((*preds[i].predfunc)(a, a)));
-    printf("a %s a = %s\n", preds[i].name, boolstring((*preds[i].predfunc)(a, a)));
-    printf("a %s b = %s\n", preds[i].name, boolstring((*preds[i].predfunc)(a, b)));
-    printf("a %s c = %s\n", preds[i].name, boolstring((*preds[i].predfunc)(a, c)));
-    printf("d %s e = %s\n", preds[i].name, boolstring((*preds[i].predfunc)(d, e)));
-    printf("e %s e = %s\n", preds[i].name, boolstring((*preds[i].predfunc)(e, e)));
-
-    printf("a %s a ? c : d = %d\n", preds[i].name, (*preds[i].selfunc)(a, a, c, d));
-    printf("a %s a ? c : d == c (%s)\n", preds[i].name, boolstring((*preds[i].selfunc)(a, a, c, d) == c));
-    printf("a %s b ? c : d = %d\n", preds[i].name, (*preds[i].selfunc)(a, b, c, d));
-    printf("a %s b ? c : d == d (%s)\n", preds[i].name, boolstring((*preds[i].selfunc)(a, b, c, d) == d));
-
-    printf("----------------------------------------\n");
-  }
-
-  return 0;
-}
diff --git a/test/CodeGen/CellSPU/useful-harnesses/i64operations.c b/test/CodeGen/CellSPU/useful-harnesses/i64operations.c
deleted file mode 100644
index b613bd872e28..000000000000
--- a/test/CodeGen/CellSPU/useful-harnesses/i64operations.c
+++ /dev/null
@@ -1,673 +0,0 @@
-#include <stdio.h>
-#include "i64operations.h"
-
-int64_t         tval_a = 1234567890003LL;
-int64_t         tval_b = 2345678901235LL;
-int64_t         tval_c = 1234567890001LL;
-int64_t         tval_d = 10001LL;
-int64_t         tval_e = 10000LL;
-uint64_t        tval_f = 0xffffff0750135eb9;
-int64_t		tval_g = -1;
-
-/* ~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~- */
-
-int
-i64_eq(int64_t a, int64_t b)
-{
-  return (a == b);
-}
-
-int
-i64_neq(int64_t a, int64_t b)
-{
-  return (a != b);
-}
-
-int
-i64_gt(int64_t a, int64_t b)
-{
-  return (a > b);
-}
-
-int
-i64_le(int64_t a, int64_t b)
-{
-  return (a <= b);
-}
-
-int
-i64_ge(int64_t a, int64_t b) {
-  return (a >= b);
-}
-
-int
-i64_lt(int64_t a, int64_t b) {
-  return (a < b);
-}
-
-int
-i64_uge(uint64_t a, uint64_t b)
-{
-  return (a >= b);
-}
-
-int
-i64_ult(uint64_t a, uint64_t b)
-{
-  return (a < b);
-}
-
-int
-i64_ugt(uint64_t a, uint64_t b)
-{
-  return (a > b);
-}
-
-int
-i64_ule(uint64_t a, uint64_t b)
-{
-  return (a <= b);
-}
-
-int64_t
-i64_eq_select(int64_t a, int64_t b, int64_t c, int64_t d)
-{
-  return ((a == b) ? c : d);
-}
-
-int64_t
-i64_neq_select(int64_t a, int64_t b, int64_t c, int64_t d)
-{
-  return ((a != b) ? c : d);
-}
-
-int64_t
-i64_gt_select(int64_t a, int64_t b, int64_t c, int64_t d) {
-  return ((a > b) ? c : d);
-}
-
-int64_t
-i64_le_select(int64_t a, int64_t b, int64_t c, int64_t d) {
-  return ((a <= b) ? c : d);
-}
-
-int64_t
-i64_ge_select(int64_t a, int64_t b, int64_t c, int64_t d) {
-  return ((a >= b) ? c : d);
-}
-
-int64_t
-i64_lt_select(int64_t a, int64_t b, int64_t c, int64_t d) {
-  return ((a < b) ? c : d);
-}
-
-uint64_t
-i64_ugt_select(uint64_t a, uint64_t b, uint64_t c, uint64_t d)
-{
-  return ((a > b) ? c : d);
-}
-
-uint64_t
-i64_ule_select(uint64_t a, uint64_t b, uint64_t c, uint64_t d)
-{
-  return ((a <= b) ? c : d);
-}
-
-uint64_t
-i64_uge_select(uint64_t a, uint64_t b, uint64_t c, uint64_t d) {
-  return ((a >= b) ? c : d);
-}
-
-uint64_t
-i64_ult_select(uint64_t a, uint64_t b, uint64_t c, uint64_t d) {
-  return ((a < b) ? c : d);
-}
-
-/* ~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~- */
-
-struct harness_int64_pred int64_tests_eq[] = {
-  {"a %s a", &tval_a, &tval_a, &tval_c, &tval_d, TRUE_VAL, &tval_c},
-  {"a %s b", &tval_a, &tval_b, &tval_c, &tval_d, FALSE_VAL, &tval_d},
-  {"a %s c", &tval_a, &tval_c, &tval_c, &tval_d, FALSE_VAL, &tval_d},
-  {"d %s e", &tval_d, &tval_e, &tval_c, &tval_d, FALSE_VAL, &tval_d},
-  {"e %s e", &tval_e, &tval_e, &tval_c, &tval_d, TRUE_VAL, &tval_c}
-};
-
-struct harness_int64_pred int64_tests_neq[] = {
-  {"a %s a", &tval_a, &tval_a, &tval_c, &tval_d, FALSE_VAL, &tval_d},
-  {"a %s b", &tval_a, &tval_b, &tval_c, &tval_d, TRUE_VAL, &tval_c},
-  {"a %s c", &tval_a, &tval_c, &tval_c, &tval_d, TRUE_VAL, &tval_c},
-  {"d %s e", &tval_d, &tval_e, &tval_c, &tval_d, TRUE_VAL, &tval_c},
-  {"e %s e", &tval_e, &tval_e, &tval_c, &tval_d, FALSE_VAL, &tval_d}
-};
-
-struct harness_int64_pred int64_tests_sgt[] = {
-  {"a %s a", &tval_a, &tval_a, &tval_c, &tval_d, FALSE_VAL, &tval_d},
-  {"a %s b", &tval_a, &tval_b, &tval_c, &tval_d, FALSE_VAL, &tval_d},
-  {"a %s c", &tval_a, &tval_c, &tval_c, &tval_d, TRUE_VAL, &tval_c},
-  {"d %s e", &tval_d, &tval_e, &tval_c, &tval_d, TRUE_VAL, &tval_c},
-  {"e %s e", &tval_e, &tval_e, &tval_c, &tval_d, FALSE_VAL, &tval_d}
-};
-
-struct harness_int64_pred int64_tests_sle[] = {
-  {"a %s a", &tval_a, &tval_a, &tval_c, &tval_d, TRUE_VAL, &tval_c},
-  {"a %s b", &tval_a, &tval_b, &tval_c, &tval_d, TRUE_VAL, &tval_c},
-  {"a %s c", &tval_a, &tval_c, &tval_c, &tval_d, FALSE_VAL, &tval_d},
-  {"d %s e", &tval_d, &tval_e, &tval_c, &tval_d, FALSE_VAL, &tval_d},
-  {"e %s e", &tval_e, &tval_e, &tval_c, &tval_d, TRUE_VAL, &tval_c}
-};
-
-struct harness_int64_pred int64_tests_sge[] = {
-  {"a %s a", &tval_a, &tval_a, &tval_c, &tval_d, TRUE_VAL, &tval_c},
-  {"a %s b", &tval_a, &tval_b, &tval_c, &tval_d, FALSE_VAL, &tval_d},
-  {"a %s c", &tval_a, &tval_c, &tval_c, &tval_d, TRUE_VAL, &tval_c},
-  {"d %s e", &tval_d, &tval_e, &tval_c, &tval_d, TRUE_VAL, &tval_c},
-  {"e %s e", &tval_e, &tval_e, &tval_c, &tval_d, TRUE_VAL, &tval_c}
-};
-
-struct harness_int64_pred int64_tests_slt[] = {
-  {"a %s a", &tval_a, &tval_a, &tval_c, &tval_d, FALSE_VAL, &tval_d},
-  {"a %s b", &tval_a, &tval_b, &tval_c, &tval_d, TRUE_VAL, &tval_c},
-  {"a %s c", &tval_a, &tval_c, &tval_c, &tval_d, FALSE_VAL, &tval_d},
-  {"d %s e", &tval_d, &tval_e, &tval_c, &tval_d, FALSE_VAL, &tval_d},
-  {"e %s e", &tval_e, &tval_e, &tval_c, &tval_d, FALSE_VAL, &tval_d}
-};
-
-struct int64_pred_s int64_preds[] = {
-  {"eq", i64_eq, i64_eq_select,
-     int64_tests_eq, ARR_SIZE(int64_tests_eq)},
-  {"neq", i64_neq, i64_neq_select,
-     int64_tests_neq, ARR_SIZE(int64_tests_neq)},
-  {"gt", i64_gt, i64_gt_select,
-     int64_tests_sgt, ARR_SIZE(int64_tests_sgt)},
-  {"le", i64_le, i64_le_select,
-     int64_tests_sle, ARR_SIZE(int64_tests_sle)},
-  {"ge", i64_ge, i64_ge_select,
-     int64_tests_sge, ARR_SIZE(int64_tests_sge)},
-  {"lt", i64_lt, i64_lt_select,
-     int64_tests_slt, ARR_SIZE(int64_tests_slt)}
-};
-
-/* ~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~- */
-
-struct harness_uint64_pred uint64_tests_ugt[] = {
-  {"a %s a", (uint64_t *) &tval_a, (uint64_t *) &tval_a, (uint64_t *) &tval_c,
-     (uint64_t *) &tval_d, FALSE_VAL, (uint64_t *) &tval_d},
-  {"a %s b", (uint64_t *) &tval_a, (uint64_t *) &tval_b, (uint64_t *) &tval_c,
-     (uint64_t *) &tval_d, FALSE_VAL, (uint64_t *) &tval_d },
-  {"a %s c", (uint64_t *) &tval_a, (uint64_t *) &tval_c, (uint64_t *) &tval_c,
-     (uint64_t *) &tval_d, TRUE_VAL, (uint64_t *) &tval_c },
-  {"d %s e", (uint64_t *) &tval_d, (uint64_t *) &tval_e, (uint64_t *) &tval_c,
-     (uint64_t *) &tval_d, TRUE_VAL, (uint64_t *) &tval_c },
-  {"e %s e", (uint64_t *) &tval_e, (uint64_t *) &tval_e, (uint64_t *) &tval_c,
-     (uint64_t *) &tval_d, FALSE_VAL, (uint64_t *) &tval_d }
-};
-
-struct harness_uint64_pred uint64_tests_ule[] = {
-  {"a %s a", (uint64_t *) &tval_a, (uint64_t *) &tval_a, (uint64_t *) &tval_c,
-     (uint64_t *) &tval_d, TRUE_VAL, (uint64_t *) &tval_c},
-  {"a %s b", (uint64_t *) &tval_a, (uint64_t *) &tval_b, (uint64_t *) &tval_c,
-     (uint64_t *) &tval_d, TRUE_VAL, (uint64_t *) &tval_c},
-  {"a %s c", (uint64_t *) &tval_a, (uint64_t *) &tval_c, (uint64_t *) &tval_c,
-     (uint64_t *) &tval_d, FALSE_VAL, (uint64_t *) &tval_d},
-  {"d %s e", (uint64_t *) &tval_d, (uint64_t *) &tval_e, (uint64_t *) &tval_c,
-     (uint64_t *) &tval_d, FALSE_VAL, (uint64_t *) &tval_d},
-  {"e %s e", (uint64_t *) &tval_e, (uint64_t *) &tval_e, (uint64_t *) &tval_c,
-     (uint64_t *) &tval_d, TRUE_VAL, (uint64_t *) &tval_c}
-};
-
-struct harness_uint64_pred uint64_tests_uge[] = {
-  {"a %s a", (uint64_t *) &tval_a, (uint64_t *) &tval_a, (uint64_t *) &tval_c,
-     (uint64_t *) &tval_d, TRUE_VAL, (uint64_t *) &tval_c},
-  {"a %s b", (uint64_t *) &tval_a, (uint64_t *) &tval_b, (uint64_t *) &tval_c,
-     (uint64_t *) &tval_d, FALSE_VAL, (uint64_t *) &tval_d},
-  {"a %s c", (uint64_t *) &tval_a, (uint64_t *) &tval_c, (uint64_t *) &tval_c,
-     (uint64_t *) &tval_d, TRUE_VAL, (uint64_t *) &tval_c},
-  {"d %s e", (uint64_t *) &tval_d, (uint64_t *) &tval_e, (uint64_t *) &tval_c,
-     (uint64_t *) &tval_d, TRUE_VAL, (uint64_t *) &tval_c},
-  {"e %s e", (uint64_t *) &tval_e, (uint64_t *) &tval_e, (uint64_t *) &tval_c,
-     (uint64_t *) &tval_d, TRUE_VAL, (uint64_t *) &tval_c}
-};
-
-struct harness_uint64_pred uint64_tests_ult[] = {
-  {"a %s a", (uint64_t *) &tval_a, (uint64_t *) &tval_a, (uint64_t *) &tval_c,
-     (uint64_t *) &tval_d, FALSE_VAL, (uint64_t *) &tval_d},
-  {"a %s b", (uint64_t *) &tval_a, (uint64_t *) &tval_b, (uint64_t *) &tval_c,
-     (uint64_t *) &tval_d, TRUE_VAL, (uint64_t *) &tval_c},
-  {"a %s c", (uint64_t *) &tval_a, (uint64_t *) &tval_c, (uint64_t *) &tval_c,
-     (uint64_t *) &tval_d, FALSE_VAL, (uint64_t *) &tval_d},
-  {"d %s e", (uint64_t *) &tval_d, (uint64_t *) &tval_e, (uint64_t *) &tval_c,
-     (uint64_t *) &tval_d, FALSE_VAL, (uint64_t *) &tval_d},
-  {"e %s e", (uint64_t *) &tval_e, (uint64_t *) &tval_e, (uint64_t *) &tval_c,
-     (uint64_t *) &tval_d, FALSE_VAL, (uint64_t *) &tval_d}
-};
-
-struct uint64_pred_s uint64_preds[] = {
-  {"ugt", i64_ugt, i64_ugt_select,
-     uint64_tests_ugt, ARR_SIZE(uint64_tests_ugt)},
-  {"ule", i64_ule, i64_ule_select,
-     uint64_tests_ule, ARR_SIZE(uint64_tests_ule)},
-  {"uge", i64_uge, i64_uge_select,
-     uint64_tests_uge, ARR_SIZE(uint64_tests_uge)},
-  {"ult", i64_ult, i64_ult_select,
-     uint64_tests_ult, ARR_SIZE(uint64_tests_ult)}
-};
-
-int
-compare_expect_int64(const struct int64_pred_s * pred)
-{
-  int             j, failed = 0;
-
-  for (j = 0; j < pred->n_tests; ++j) {
-    int             pred_result;
-
-    pred_result = (*pred->predfunc) (*pred->tests[j].lhs, *pred->tests[j].rhs);
-
-    if (pred_result != pred->tests[j].expected) {
-      char            str[64];
-
-      sprintf(str, pred->tests[j].fmt_string, pred->name);
-      printf("%s: returned value is %d, expecting %d\n", str,
-	     pred_result, pred->tests[j].expected);
-      printf("  lhs = %19lld (0x%016llx)\n", *pred->tests[j].lhs,
-             *pred->tests[j].lhs);
-      printf("  rhs = %19lld (0x%016llx)\n", *pred->tests[j].rhs,
-             *pred->tests[j].rhs);
-      ++failed;
-    } else {
-      int64_t         selresult;
-
-      selresult = (pred->selfunc) (*pred->tests[j].lhs, *pred->tests[j].rhs,
-                                   *pred->tests[j].select_a,
-                                   *pred->tests[j].select_b);
-
-      if (selresult != *pred->tests[j].select_expected) {
-	char            str[64];
-
-	sprintf(str, pred->tests[j].fmt_string, pred->name);
-	printf("%s select: returned value is %d, expecting %d\n", str,
-	       pred_result, pred->tests[j].expected);
-	printf("  lhs   = %19lld (0x%016llx)\n", *pred->tests[j].lhs,
-	       *pred->tests[j].lhs);
-	printf("  rhs   = %19lld (0x%016llx)\n", *pred->tests[j].rhs,
-	       *pred->tests[j].rhs);
-	printf("  true  = %19lld (0x%016llx)\n", *pred->tests[j].select_a,
-	       *pred->tests[j].select_a);
-	printf("  false = %19lld (0x%016llx)\n", *pred->tests[j].select_b,
-	       *pred->tests[j].select_b);
-	++failed;
-      }
-    }
-  }
-
-  printf("  %d tests performed, should be %d.\n", j, pred->n_tests);
-
-  return failed;
-}
-
-int
-compare_expect_uint64(const struct uint64_pred_s * pred)
-{
-  int             j, failed = 0;
-
-  for (j = 0; j < pred->n_tests; ++j) {
-    int             pred_result;
-
-    pred_result = (*pred->predfunc) (*pred->tests[j].lhs, *pred->tests[j].rhs);
-    if (pred_result != pred->tests[j].expected) {
-      char            str[64];
-
-      sprintf(str, pred->tests[j].fmt_string, pred->name);
-      printf("%s: returned value is %d, expecting %d\n", str,
-	     pred_result, pred->tests[j].expected);
-      printf("  lhs = %19llu (0x%016llx)\n", *pred->tests[j].lhs,
-             *pred->tests[j].lhs);
-      printf("  rhs = %19llu (0x%016llx)\n", *pred->tests[j].rhs,
-             *pred->tests[j].rhs);
-      ++failed;
-    } else {
-      uint64_t        selresult;
-
-      selresult = (pred->selfunc) (*pred->tests[j].lhs, *pred->tests[j].rhs,
-                                   *pred->tests[j].select_a,
-                                   *pred->tests[j].select_b);
-      if (selresult != *pred->tests[j].select_expected) {
-	char            str[64];
-
-	sprintf(str, pred->tests[j].fmt_string, pred->name);
-	printf("%s select: returned value is %d, expecting %d\n", str,
-	       pred_result, pred->tests[j].expected);
-	printf("  lhs   = %19llu (0x%016llx)\n", *pred->tests[j].lhs,
-	       *pred->tests[j].lhs);
-	printf("  rhs   = %19llu (0x%016llx)\n", *pred->tests[j].rhs,
-	       *pred->tests[j].rhs);
-	printf("  true  = %19llu (0x%016llx)\n", *pred->tests[j].select_a,
-	       *pred->tests[j].select_a);
-	printf("  false = %19llu (0x%016llx)\n", *pred->tests[j].select_b,
-	       *pred->tests[j].select_b);
-	++failed;
-      }
-    }
-  }
-
-  printf("  %d tests performed, should be %d.\n", j, pred->n_tests);
-
-  return failed;
-}
-
-/* ~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~- */
-
-int
-test_i64_sext_i32(int in, int64_t expected) {
-  int64_t result = (int64_t) in;
-
-  if (result != expected) {
-    char str[64];
-    sprintf(str, "i64_sext_i32(%d) returns %lld\n", in, result);
-    return 1;
-  }
-
-  return 0;
-}
-
-int
-test_i64_sext_i16(short in, int64_t expected) {
-  int64_t result = (int64_t) in;
-
-  if (result != expected) {
-    char str[64];
-    sprintf(str, "i64_sext_i16(%hd) returns %lld\n", in, result);
-    return 1;
-  }
-
-  return 0;
-}
-
-int
-test_i64_sext_i8(signed char in, int64_t expected) {
-  int64_t result = (int64_t) in;
-
-  if (result != expected) {
-    char str[64];
-    sprintf(str, "i64_sext_i8(%d) returns %lld\n", in, result);
-    return 1;
-  }
-
-  return 0;
-}
-
-int
-test_i64_zext_i32(unsigned int in, uint64_t expected) {
-  uint64_t result = (uint64_t) in;
-
-  if (result != expected) {
-    char str[64];
-    sprintf(str, "i64_zext_i32(%u) returns %llu\n", in, result);
-    return 1;
-  }
-
-  return 0;
-}
-
-int
-test_i64_zext_i16(unsigned short in, uint64_t expected) {
-  uint64_t result = (uint64_t) in;
-
-  if (result != expected) {
-    char str[64];
-    sprintf(str, "i64_zext_i16(%hu) returns %llu\n", in, result);
-    return 1;
-  }
-
-  return 0;
-}
-
-int
-test_i64_zext_i8(unsigned char in, uint64_t expected) {
-  uint64_t result = (uint64_t) in;
-
-  if (result != expected) {
-    char str[64];
-    sprintf(str, "i64_zext_i8(%u) returns %llu\n", in, result);
-    return 1;
-  }
-
-  return 0;
-}
-
-/* ~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~- */
-
-int64_t
-i64_shl_const(int64_t a) {
-  return a << 10;
-}
-
-int64_t
-i64_shl(int64_t a, int amt) {
-  return a << amt;
-}
-
-uint64_t
-u64_shl_const(uint64_t a) {
-  return a << 10;
-}
-
-uint64_t
-u64_shl(uint64_t a, int amt) {
-  return a << amt;
-}
-
-int64_t
-i64_srl_const(int64_t a) {
-  return a >> 10;
-}
-
-int64_t
-i64_srl(int64_t a, int amt) {
-  return a >> amt;
-}
-
-uint64_t
-u64_srl_const(uint64_t a) {
-  return a >> 10;
-}
-
-uint64_t
-u64_srl(uint64_t a, int amt) {
-  return a >> amt;
-}
-
-int64_t
-i64_sra_const(int64_t a) {
-  return a >> 10;
-}
-
-int64_t
-i64_sra(int64_t a, int amt) {
-  return a >> amt;
-}
-
-uint64_t
-u64_sra_const(uint64_t a) {
-  return a >> 10;
-}
-
-uint64_t
-u64_sra(uint64_t a, int amt) {
-  return a >> amt;
-}
-
-int
-test_u64_constant_shift(const char *func_name, uint64_t (*func)(uint64_t), uint64_t a, uint64_t expected) {
-  uint64_t result = (*func)(a);
-
-  if (result != expected) {
-    printf("%s(0x%016llx) returns 0x%016llx, expected 0x%016llx\n", func_name, a, result, expected);
-    return 1;
-  }
-
-  return 0;
-}
-
-int
-test_i64_constant_shift(const char *func_name, int64_t (*func)(int64_t), int64_t a, int64_t expected) {
-  int64_t result = (*func)(a);
-
-  if (result != expected) {
-    printf("%s(0x%016llx) returns 0x%016llx, expected 0x%016llx\n", func_name, a, result, expected);
-    return 1;
-  }
-
-  return 0;
-}
-
-int
-test_u64_variable_shift(const char *func_name, uint64_t (*func)(uint64_t, int), uint64_t a, unsigned int b, uint64_t expected) {
-  uint64_t result = (*func)(a, b);
-
-  if (result != expected) {
-    printf("%s(0x%016llx, %d) returns 0x%016llx, expected 0x%016llx\n", func_name, a, b, result, expected);
-    return 1;
-  }
-
-  return 0;
-}
-
-int
-test_i64_variable_shift(const char *func_name, int64_t (*func)(int64_t, int), int64_t a, unsigned int b, int64_t expected) {
-  int64_t result = (*func)(a, b);
-
-  if (result != expected) {
-    printf("%s(0x%016llx, %d) returns 0x%016llx, expected 0x%016llx\n", func_name, a, b, result, expected);
-    return 1;
-  }
-
-  return 0;
-}
-
-/* ~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~- */
-
-int64_t i64_mul(int64_t a, int64_t b) {
-  return a * b;
-}
-
-/* ~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~- */
-
-int
-main(void)
-{
-  int             i, j, failed = 0;
-  const char     *something_failed = "  %d tests failed.\n";
-  const char     *all_tests_passed = "  All tests passed.\n";
-
-  printf("tval_a = %20lld (0x%016llx)\n", tval_a, tval_a);
-  printf("tval_b = %20lld (0x%016llx)\n", tval_b, tval_b);
-  printf("tval_c = %20lld (0x%016llx)\n", tval_c, tval_c);
-  printf("tval_d = %20lld (0x%016llx)\n", tval_d, tval_d);
-  printf("tval_e = %20lld (0x%016llx)\n", tval_e, tval_e);
-  printf("tval_f = %20llu (0x%016llx)\n", tval_f, tval_f);
-  printf("tval_g = %20llu (0x%016llx)\n", tval_g, tval_g);
-  printf("----------------------------------------\n");
-
-  for (i = 0; i < ARR_SIZE(int64_preds); ++i) {
-    printf("%s series:\n", int64_preds[i].name);
-    if ((failed = compare_expect_int64(int64_preds + i)) > 0) {
-      printf(something_failed, failed);
-    } else {
-      printf(all_tests_passed);
-    }
-
-    printf("----------------------------------------\n");
-  }
-
-  for (i = 0; i < ARR_SIZE(uint64_preds); ++i) {
-    printf("%s series:\n", uint64_preds[i].name);
-    if ((failed = compare_expect_uint64(uint64_preds + i)) > 0) {
-      printf(something_failed, failed);
-    } else {
-      printf(all_tests_passed);
-    }
-
-    printf("----------------------------------------\n");
-  }
-
-  /*----------------------------------------------------------------------*/
-
-  puts("signed/zero-extend tests:");
-
-  failed = 0;
-  failed += test_i64_sext_i32(-1, -1LL);
-  failed += test_i64_sext_i32(10, 10LL);
-  failed += test_i64_sext_i32(0x7fffffff, 0x7fffffffLL);
-  failed += test_i64_sext_i16(-1, -1LL);
-  failed += test_i64_sext_i16(10, 10LL);
-  failed += test_i64_sext_i16(0x7fff, 0x7fffLL);
-  failed += test_i64_sext_i8(-1, -1LL);
-  failed += test_i64_sext_i8(10, 10LL);
-  failed += test_i64_sext_i8(0x7f, 0x7fLL);
-
-  failed += test_i64_zext_i32(0xffffffff, 0x00000000ffffffffLLU);
-  failed += test_i64_zext_i32(0x01234567, 0x0000000001234567LLU);
-  failed += test_i64_zext_i16(0xffff,     0x000000000000ffffLLU);
-  failed += test_i64_zext_i16(0x569a,     0x000000000000569aLLU);
-  failed += test_i64_zext_i8(0xff,        0x00000000000000ffLLU);
-  failed += test_i64_zext_i8(0xa0,        0x00000000000000a0LLU);
-
-  if (failed > 0) {
-    printf("  %d tests failed.\n", failed);
-  } else {
-    printf("  All tests passed.\n");
-  }
-
-  printf("----------------------------------------\n");
-
-  failed = 0;
-  puts("signed left/right shift tests:");
-  failed += test_i64_constant_shift("i64_shl_const", i64_shl_const, tval_a,     0x00047dc7ec114c00LL);
-  failed += test_i64_variable_shift("i64_shl",       i64_shl,       tval_a, 10, 0x00047dc7ec114c00LL);
-  failed += test_i64_constant_shift("i64_srl_const", i64_srl_const, tval_a,     0x0000000047dc7ec1LL);
-  failed += test_i64_variable_shift("i64_srl",       i64_srl,       tval_a, 10, 0x0000000047dc7ec1LL);
-  failed += test_i64_constant_shift("i64_sra_const", i64_sra_const, tval_a,     0x0000000047dc7ec1LL);
-  failed += test_i64_variable_shift("i64_sra",       i64_sra,       tval_a, 10, 0x0000000047dc7ec1LL);
-
-  if (failed > 0) {
-    printf("  %d tests ailed.\n", failed);
-  } else {
-    printf("  All tests passed.\n");
-  }
-
-  printf("----------------------------------------\n");
-
-  failed = 0;
-  puts("unsigned left/right shift tests:");
-  failed += test_u64_constant_shift("u64_shl_const", u64_shl_const,  tval_f,     0xfffc1d404d7ae400LL);
-  failed += test_u64_variable_shift("u64_shl",       u64_shl,        tval_f, 10, 0xfffc1d404d7ae400LL);
-  failed += test_u64_constant_shift("u64_srl_const", u64_srl_const,  tval_f,     0x003fffffc1d404d7LL);
-  failed += test_u64_variable_shift("u64_srl",       u64_srl,        tval_f, 10, 0x003fffffc1d404d7LL);
-  failed += test_i64_constant_shift("i64_sra_const", i64_sra_const,  tval_f,     0xffffffffc1d404d7LL);
-  failed += test_i64_variable_shift("i64_sra",       i64_sra,        tval_f, 10, 0xffffffffc1d404d7LL);
-  failed += test_u64_constant_shift("u64_sra_const", u64_sra_const,  tval_f,     0x003fffffc1d404d7LL);
-  failed += test_u64_variable_shift("u64_sra",       u64_sra,        tval_f, 10, 0x003fffffc1d404d7LL);
-
-  if (failed > 0) {
-    printf("  %d tests ailed.\n", failed);
-  } else {
-    printf("  All tests passed.\n");
-  }
-
-  printf("----------------------------------------\n");
-
-  int64_t result;
-  
-  result = i64_mul(tval_g, tval_g);
-  printf("%20lld * %20lld = %20lld (0x%016llx)\n", tval_g, tval_g, result, result);
-  result = i64_mul(tval_d, tval_e);
-  printf("%20lld * %20lld = %20lld (0x%016llx)\n", tval_d, tval_e, result, result);
-  /* 0xba7a664f13077c9 */
-  result = i64_mul(tval_a, tval_b);
-  printf("%20lld * %20lld = %20lld (0x%016llx)\n", tval_a, tval_b, result, result);
-
-  printf("----------------------------------------\n");
-
-  return 0;
-}
diff --git a/test/CodeGen/CellSPU/useful-harnesses/i64operations.h b/test/CodeGen/CellSPU/useful-harnesses/i64operations.h
deleted file mode 100644
index 7a02794cd7e0..000000000000
--- a/test/CodeGen/CellSPU/useful-harnesses/i64operations.h
+++ /dev/null
@@ -1,43 +0,0 @@
-#define TRUE_VAL (!0)
-#define FALSE_VAL 0
-#define ARR_SIZE(arr) (sizeof(arr)/sizeof(arr[0]))
-
-typedef unsigned long long int uint64_t;
-typedef long long int int64_t;
-
-/* ~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~- */
-struct harness_int64_pred {
-  const char     *fmt_string;
-  int64_t        *lhs;
-  int64_t        *rhs;
-  int64_t        *select_a;
-  int64_t        *select_b;
-  int             expected;
-  int64_t        *select_expected;
-};
-
-struct harness_uint64_pred {
-  const char     *fmt_string;
-  uint64_t       *lhs;
-  uint64_t       *rhs;
-  uint64_t       *select_a;
-  uint64_t       *select_b;
-  int             expected;
-  uint64_t       *select_expected;
-};
-
-struct int64_pred_s {
-  const char     *name;
-  int             (*predfunc) (int64_t, int64_t);
-  int64_t         (*selfunc) (int64_t, int64_t, int64_t, int64_t);
-  struct harness_int64_pred *tests;
-  int             n_tests;
-};
-
-struct uint64_pred_s {
-  const char     *name;
-  int             (*predfunc) (uint64_t, uint64_t);
-  uint64_t        (*selfunc) (uint64_t, uint64_t, uint64_t, uint64_t);
-  struct harness_uint64_pred *tests;
-  int             n_tests;
-};
diff --git a/test/CodeGen/CellSPU/useful-harnesses/lit.local.cfg b/test/CodeGen/CellSPU/useful-harnesses/lit.local.cfg
deleted file mode 100644
index e6f55eef7af5..000000000000
--- a/test/CodeGen/CellSPU/useful-harnesses/lit.local.cfg
+++ /dev/null
@@ -1 +0,0 @@
-config.suffixes = []
diff --git a/test/CodeGen/CellSPU/useful-harnesses/vecoperations.c b/test/CodeGen/CellSPU/useful-harnesses/vecoperations.c
deleted file mode 100644
index c4c86e37635d..000000000000
--- a/test/CodeGen/CellSPU/useful-harnesses/vecoperations.c
+++ /dev/null
@@ -1,179 +0,0 @@
-#include <stdio.h>
-
-typedef unsigned char v16i8 __attribute__((ext_vector_type(16))); 
-typedef short         v8i16 __attribute__((ext_vector_type(16))); 
-typedef int           v4i32 __attribute__((ext_vector_type(4))); 
-typedef float         v4f32 __attribute__((ext_vector_type(4))); 
-typedef long long     v2i64 __attribute__((ext_vector_type(2))); 
-typedef double        v2f64 __attribute__((ext_vector_type(2))); 
-
-void print_v16i8(const char *str, const v16i8 v) {
-  union {
-    unsigned char elts[16];
-    v16i8 vec;
-  } tv;
-  tv.vec = v;
-  printf("%s = { %hhu, %hhu, %hhu, %hhu, %hhu, %hhu, %hhu, "
-                "%hhu, %hhu, %hhu, %hhu, %hhu, %hhu, %hhu, "
-		"%hhu, %hhu }\n",
-	str, tv.elts[0], tv.elts[1], tv.elts[2], tv.elts[3], tv.elts[4], tv.elts[5],
-	tv.elts[6], tv.elts[7], tv.elts[8], tv.elts[9], tv.elts[10], tv.elts[11],
-	tv.elts[12], tv.elts[13], tv.elts[14], tv.elts[15]);
-}
-
-void print_v16i8_hex(const char *str, const v16i8 v) {
-  union {
-    unsigned char elts[16];
-    v16i8 vec;
-  } tv;
-  tv.vec = v;
-  printf("%s = { 0x%02hhx, 0x%02hhx, 0x%02hhx, 0x%02hhx, 0x%02hhx, 0x%02hhx, 0x%02hhx, "
-                "0x%02hhx, 0x%02hhx, 0x%02hhx, 0x%02hhx, 0x%02hhx, 0x%02hhx, 0x%02hhx, "
-		"0x%02hhx, 0x%02hhx }\n",
-	str, tv.elts[0], tv.elts[1], tv.elts[2], tv.elts[3], tv.elts[4], tv.elts[5],
-	tv.elts[6], tv.elts[7], tv.elts[8], tv.elts[9], tv.elts[10], tv.elts[11],
-	tv.elts[12], tv.elts[13], tv.elts[14], tv.elts[15]);
-}
-
-void print_v8i16_hex(const char *str, v8i16 v) {
-  union {
-    short elts[8];
-    v8i16 vec;
-  } tv;
-  tv.vec = v;
-  printf("%s = { 0x%04hx, 0x%04hx, 0x%04hx, 0x%04hx, 0x%04hx, "
-                "0x%04hx, 0x%04hx, 0x%04hx }\n",
-	str, tv.elts[0], tv.elts[1], tv.elts[2], tv.elts[3], tv.elts[4],
-	tv.elts[5], tv.elts[6], tv.elts[7]);
-}
-
-void print_v4i32(const char *str, v4i32 v) {
-  printf("%s = { %d, %d, %d, %d }\n", str, v.x, v.y, v.z, v.w);
-}
-
-void print_v4f32(const char *str, v4f32 v) {
-  printf("%s = { %f, %f, %f, %f }\n", str, v.x, v.y, v.z, v.w);
-}
-
-void print_v2i64(const char *str, v2i64 v) {
-  printf("%s = { %lld, %lld }\n", str, v.x, v.y);
-}
-
-void print_v2f64(const char *str, v2f64 v) {
-  printf("%s = { %g, %g }\n", str, v.x, v.y);
-}
-
-/*----------------------------------------------------------------------*/
-
-v16i8 v16i8_mpy(v16i8 v1, v16i8 v2) {
-  return v1 * v2;
-}
-
-v16i8 v16i8_add(v16i8 v1, v16i8 v2) {
-  return v1 + v2;
-}
-
-v4i32 v4i32_shuffle_1(v4i32 a) {
-  v4i32 c2 = a.yzwx;
-  return c2;
-}
-
-v4i32 v4i32_shuffle_2(v4i32 a) {
-  v4i32 c2 = a.zwxy;
-  return c2;
-}
-
-v4i32 v4i32_shuffle_3(v4i32 a) {
-  v4i32 c2 = a.wxyz;
-  return c2;
-}
-
-v4i32 v4i32_shuffle_4(v4i32 a) {
-  v4i32 c2 = a.xyzw;
-  return c2;
-}
-
-v4i32 v4i32_shuffle_5(v4i32 a) {
-  v4i32 c2 = a.xwzy;
-  return c2;
-}
-
-v4f32 v4f32_shuffle_1(v4f32 a) {
-  v4f32 c2 = a.yzwx;
-  return c2;
-}
-
-v4f32 v4f32_shuffle_2(v4f32 a) {
-  v4f32 c2 = a.zwxy;
-  return c2;
-}
-
-v4f32 v4f32_shuffle_3(v4f32 a) {
-  v4f32 c2 = a.wxyz;
-  return c2;
-}
-
-v4f32 v4f32_shuffle_4(v4f32 a) {
-  v4f32 c2 = a.xyzw;
-  return c2;
-}
-
-v4f32 v4f32_shuffle_5(v4f32 a) {
-  v4f32 c2 = a.xwzy;
-  return c2;
-}
-
-v2i64 v2i64_shuffle(v2i64 a) {
-  v2i64 c2 = a.yx;
-  return c2;
-}
-
-v2f64 v2f64_shuffle(v2f64 a) {
-  v2f64 c2 = a.yx;
-  return c2;
-}
-
-int main(void) {
-  v16i8 v00 = { 0xf4, 0xad, 0x01, 0xe9, 0x51, 0x78, 0xc1, 0x8a,
-                0x94, 0x7c, 0x49, 0x6c, 0x21, 0x32, 0xb2, 0x04 };
-  v16i8 va0 = { 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08,
-                0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, 0x10 };
-  v16i8 va1 = { 0x11, 0x83, 0x4b, 0x63, 0xff, 0x90, 0x32, 0xe5,
-                0x5a, 0xaa, 0x20, 0x01, 0x0d, 0x15, 0x77, 0x05 };
-  v8i16 v01 = { 0x1a87, 0x0a14, 0x5014, 0xfff0,
-                0xe194, 0x0184, 0x801e, 0x5940 };
-  v4i32 v1 = { 1, 2, 3, 4 };
-  v4f32 v2 = { 1.0, 2.0, 3.0, 4.0 };
-  v2i64 v3 = { 691043ll, 910301513ll };
-  v2f64 v4 = { 5.8e56, 9.103e-62 };
-
-  puts("---- vector tests start ----");
-
-  print_v16i8_hex("v00                        ", v00);
-  print_v16i8_hex("va0                        ", va0);
-  print_v16i8_hex("va1                        ", va1);
-  print_v16i8_hex("va0 x va1                  ", v16i8_mpy(va0, va1));
-  print_v16i8_hex("va0 + va1                  ", v16i8_add(va0, va1));
-  print_v8i16_hex("v01                        ", v01);
-
-  print_v4i32("v4i32_shuffle_1(1, 2, 3, 4)", v4i32_shuffle_1(v1));
-  print_v4i32("v4i32_shuffle_2(1, 2, 3, 4)", v4i32_shuffle_2(v1));
-  print_v4i32("v4i32_shuffle_3(1, 2, 3, 4)", v4i32_shuffle_3(v1));
-  print_v4i32("v4i32_shuffle_4(1, 2, 3, 4)", v4i32_shuffle_4(v1));
-  print_v4i32("v4i32_shuffle_5(1, 2, 3, 4)", v4i32_shuffle_5(v1));
-
-  print_v4f32("v4f32_shuffle_1(1, 2, 3, 4)", v4f32_shuffle_1(v2));
-  print_v4f32("v4f32_shuffle_2(1, 2, 3, 4)", v4f32_shuffle_2(v2));
-  print_v4f32("v4f32_shuffle_3(1, 2, 3, 4)", v4f32_shuffle_3(v2));
-  print_v4f32("v4f32_shuffle_4(1, 2, 3, 4)", v4f32_shuffle_4(v2));
-  print_v4f32("v4f32_shuffle_5(1, 2, 3, 4)", v4f32_shuffle_5(v2));
-
-  print_v2i64("v3                         ", v3);
-  print_v2i64("v2i64_shuffle              ", v2i64_shuffle(v3));
-  print_v2f64("v4                         ", v4);
-  print_v2f64("v2f64_shuffle              ", v2f64_shuffle(v4));
-
-  puts("---- vector tests end ----");
-
-  return 0;
-}
diff --git a/test/CodeGen/CellSPU/v2f32.ll b/test/CodeGen/CellSPU/v2f32.ll
deleted file mode 100644
index 09e15ffbc75d..000000000000
--- a/test/CodeGen/CellSPU/v2f32.ll
+++ /dev/null
@@ -1,78 +0,0 @@
-;RUN: llc --march=cellspu %s -o - | FileCheck %s
-%vec = type <2 x float>
-
-define %vec @test_ret(%vec %param)
-{
-;CHECK: bi $lr
- ret %vec %param
-}
-
-define %vec @test_add(%vec %param)
-{
-;CHECK: fa {{\$.}}, $3, $3
- %1 = fadd %vec %param, %param
-;CHECK: bi $lr
- ret %vec %1
-}
-
-define %vec @test_sub(%vec %param)
-{
-;CHECK: fs {{\$.}}, $3, $3
- %1 = fsub %vec %param, %param
-
-;CHECK: bi $lr
- ret %vec %1
-}
-
-define %vec @test_mul(%vec %param)
-{
-;CHECK: fm {{\$.}}, $3, $3
- %1 = fmul %vec %param, %param
-
-;CHECK: bi $lr
- ret %vec %1
-}
-
-; CHECK: test_splat:
-define %vec @test_splat(float %param ) {
-;CHECK: lqa
-;CHECK: shufb
-  %sv = insertelement <1 x float> undef, float %param, i32 0 
-  %rv = shufflevector <1 x float> %sv, <1 x float> undef, <2 x i32> zeroinitializer 
-;CHECK: bi $lr
-  ret %vec %rv
-}
-
-define void @test_store(%vec %val, %vec* %ptr){
-; CHECK: test_store:
-;CHECK: stqd 
-  store %vec zeroinitializer, %vec* null
-
-;CHECK: stqd $3, 0(${{.*}})
-;CHECK: bi $lr
-  store %vec %val, %vec* %ptr
-  ret void
-}
-
-; CHECK: test_insert:
-define %vec @test_insert(){
-;CHECK: cwd
-;CHECK: shufb $3
-  %rv = insertelement %vec undef, float 0.0e+00, i32 undef
-;CHECK: bi $lr
-  ret %vec %rv
-}
-
-; CHECK: test_unaligned_store:
-
-define void @test_unaligned_store()  {
-;CHECK:	cdd
-;CHECK:	shufb
-;CHECK:	stqd
-  %data = alloca [4 x float], align 16         ; <[4 x float]*> [#uses=1]
-  %ptr = getelementptr [4 x float]* %data, i32 0, i32 2 ; <float*> [#uses=1]
-  %vptr = bitcast float* %ptr to  <2 x float>* ; <[1 x <2 x float>]*> [#uses=1]
-  store <2 x float> zeroinitializer, <2 x float>* %vptr
-  ret void
-}
-
diff --git a/test/CodeGen/CellSPU/v2i32.ll b/test/CodeGen/CellSPU/v2i32.ll
deleted file mode 100644
index 9c5b89613df9..000000000000
--- a/test/CodeGen/CellSPU/v2i32.ll
+++ /dev/null
@@ -1,61 +0,0 @@
-;RUN: llc --march=cellspu %s -o - | FileCheck %s
-%vec = type <2 x i32>
-
-define %vec @test_ret(%vec %param)
-{
-;CHECK:	bi	$lr
-  ret %vec %param
-}
-
-define %vec @test_add(%vec %param)
-{
-;CHECK: shufb
-;CHECK: addx
-  %1 = add %vec %param, %param
-;CHECK: bi $lr
-  ret %vec %1
-}
-
-define %vec @test_sub(%vec %param)
-{
-  %1 = sub %vec %param, <i32 1, i32 1>
-;CHECK: bi $lr
-  ret %vec %1
-}
-
-define %vec @test_mul(%vec %param)
-{
-  %1 = mul %vec %param, %param
-;CHECK: bi $lr
-  ret %vec %1
-}
-
-define <2 x i32> @test_splat(i32 %param ) {
-;see svn log for why this is here...
-;CHECK-NOT: or $3, $3, $3
-;CHECK: lqa
-;CHECK: shufb
-  %sv = insertelement <1 x i32> undef, i32 %param, i32 0 
-  %rv = shufflevector <1 x i32> %sv, <1 x i32> undef, <2 x i32> zeroinitializer 
-;CHECK: bi $lr
-  ret <2 x i32> %rv
-}
-
-define i32 @test_extract() {
-;CHECK: shufb $3
-  %rv = extractelement <2 x i32> zeroinitializer, i32 undef ; <i32> [#uses=1]
-;CHECK: bi $lr
-  ret i32 %rv
-}
-
-define void @test_store( %vec %val, %vec* %ptr)
-{
-  store %vec %val, %vec* %ptr
-  ret void
-}
-
-define <2 x i32>* @test_alignment( [2 x <2 x i32>]* %ptr)
-{
-   %rv = getelementptr [2 x <2 x i32>]* %ptr, i32 0, i32 1
-   ret <2 x i32>* %rv
-}
diff --git a/test/CodeGen/CellSPU/vec_const.ll b/test/CodeGen/CellSPU/vec_const.ll
deleted file mode 100644
index 24c05c684084..000000000000
--- a/test/CodeGen/CellSPU/vec_const.ll
+++ /dev/null
@@ -1,154 +0,0 @@
-; RUN: llc < %s -march=cellspu > %t1.s
-; RUN: llc < %s -march=cellspu -mattr=large_mem > %t2.s
-; RUN: grep -w il  %t1.s | count 3
-; RUN: grep ilhu   %t1.s | count 8
-; RUN: grep -w ilh %t1.s | count 5
-; RUN: grep iohl   %t1.s | count 7
-; RUN: grep lqa    %t1.s | count 6
-; RUN: grep 24672  %t1.s | count 2
-; RUN: grep 16429  %t1.s | count 1
-; RUN: grep 63572  %t1.s | count 1
-; RUN: grep  4660  %t1.s | count 1
-; RUN: grep 22136  %t1.s | count 1
-; RUN: grep 43981  %t1.s | count 1
-; RUN: grep 61202  %t1.s | count 1
-; RUN: grep 16393  %t1.s | count 1
-; RUN: grep  8699  %t1.s | count 1
-; RUN: grep 21572  %t1.s | count 1
-; RUN: grep 11544  %t1.s | count 1
-; RUN: grep 1311768467750121234 %t1.s | count 1
-; RUN: grep lqd    %t2.s | count 6
-
-target datalayout = "E-p:32:32:128-f64:64:128-f32:32:128-i64:32:128-i32:32:128-i16:16:128-i8:8:128-i1:8:128-a0:0:128-v128:128:128"
-target triple = "spu-unknown-elf"
-
-; Vector constant load tests:
-
-; IL <reg>, 2
-define <4 x i32> @v4i32_constvec() {
-        ret <4 x i32> < i32 2, i32 2, i32 2, i32 2 >
-}
-
-; Spill to constant pool
-define <4 x i32> @v4i32_constpool() {
-        ret <4 x i32> < i32 2, i32 1, i32 1, i32 2 >
-}
-
-; Max negative range for IL
-define <4 x i32> @v4i32_constvec_2() {
-        ret <4 x i32> < i32 -32768, i32 -32768, i32 -32768, i32 -32768 >
-}
-
-; ILHU <reg>, 73 (0x49)
-; 4784128 = 0x490000
-define <4 x i32> @v4i32_constvec_3() {
-        ret <4 x i32> < i32 4784128, i32 4784128,
-                        i32 4784128, i32 4784128 >
-}
-
-; ILHU <reg>, 61 (0x3d)
-; IOHL <reg>, 15395 (0x3c23)
-define <4 x i32> @v4i32_constvec_4() {
-        ret <4 x i32> < i32 4013091, i32 4013091,
-                        i32 4013091, i32 4013091 >
-}
-
-; ILHU <reg>, 0x5050 (20560)
-; IOHL <reg>, 0x5050 (20560)
-; Tests for whether we expand the size of the bit pattern properly, because
-; this could be interpreted as an i8 pattern (0x50)
-define <4 x i32> @v4i32_constvec_5() {
-        ret <4 x i32> < i32 1347440720, i32 1347440720,
-                        i32 1347440720, i32 1347440720 >
-}
-
-; ILH
-define <8 x i16> @v8i16_constvec_1() {
-        ret <8 x i16> < i16 32767, i16 32767, i16 32767, i16 32767,
-                        i16 32767, i16 32767, i16 32767, i16 32767 >
-}
-
-; ILH
-define <8 x i16> @v8i16_constvec_2() {
-        ret <8 x i16> < i16 511, i16 511, i16 511, i16 511, i16 511,
-                        i16 511, i16 511, i16 511 >
-}
-
-; ILH
-define <8 x i16> @v8i16_constvec_3() {
-        ret <8 x i16> < i16 -512, i16 -512, i16 -512, i16 -512, i16 -512,
-                        i16 -512, i16 -512, i16 -512 >
-}
-
-; ILH <reg>, 24672 (0x6060)
-; Tests whether we expand the size of the bit pattern properly, because
-; this could be interpreted as an i8 pattern (0x60)
-define <8 x i16> @v8i16_constvec_4() {
-        ret <8 x i16> < i16 24672, i16 24672, i16 24672, i16 24672, i16 24672,
-                        i16 24672, i16 24672, i16 24672 >
-}
-
-; ILH <reg>, 24672 (0x6060)
-; Tests whether we expand the size of the bit pattern properly, because
-; this is an i8 pattern but has to be expanded out to i16 to load it
-; properly into the vector register.
-define <16 x i8> @v16i8_constvec_1() {
-        ret <16 x i8> < i8 96, i8 96, i8 96, i8 96, i8 96, i8 96, i8 96, i8 96,
-                        i8 96, i8 96, i8 96, i8 96, i8 96, i8 96, i8 96, i8 96 >
-}
-
-define <4 x float> @v4f32_constvec_1() {
-entry:
-        ret <4 x float> < float 0x4005BF0A80000000,
-                          float 0x4005BF0A80000000,
-                          float 0x4005BF0A80000000,
-                          float 0x4005BF0A80000000 >
-}
-
-define <4 x float> @v4f32_constvec_2() {
-entry:
-        ret <4 x float> < float 0.000000e+00,
-                          float 0.000000e+00,
-                          float 0.000000e+00,
-                          float 0.000000e+00 >
-}
-
-
-define <4 x float> @v4f32_constvec_3() {
-entry:
-        ret <4 x float> < float 0x4005BF0A80000000,
-                          float 0x3810000000000000,
-                          float 0x47EFFFFFE0000000,
-                          float 0x400921FB60000000 >
-}
-
-;  1311768467750121234 => 0x 12345678 abcdef12
-;  HI32_hi:  4660
-;  HI32_lo: 22136
-;  LO32_hi: 43981
-;  LO32_lo: 61202
-define <2 x i64> @i64_constvec_1() {
-entry:
-        ret <2 x i64> < i64 1311768467750121234,
-                        i64 1311768467750121234 >
-}
-
-define <2 x i64> @i64_constvec_2() {
-entry:
-        ret <2 x i64> < i64 1, i64 1311768467750121234 >
-}
-
-define <2 x double> @f64_constvec_1() {
-entry:
- ret <2 x double> < double 0x400921fb54442d18,
-                    double 0xbff6a09e667f3bcd >
-}
-
-; 0x400921fb 54442d18 ->
-;   (ILHU 0x4009 [16393]/IOHL 0x21fb [ 8699])
-;   (ILHU 0x5444 [21572]/IOHL 0x2d18 [11544])
-define <2 x double> @f64_constvec_2() {
-entry:
- ret <2 x double> < double 0x400921fb54442d18,
-                    double 0x400921fb54442d18 >
-}
diff --git a/test/CodeGen/CellSPU/vecinsert.ll b/test/CodeGen/CellSPU/vecinsert.ll
deleted file mode 100644
index 8dcab1d84c9c..000000000000
--- a/test/CodeGen/CellSPU/vecinsert.ll
+++ /dev/null
@@ -1,131 +0,0 @@
-; RUN: llc < %s -march=cellspu > %t1.s
-; RUN: grep cbd     %t1.s | count 5
-; RUN: grep chd     %t1.s | count 5
-; RUN: grep cwd     %t1.s | count 11
-; RUN: grep -w il   %t1.s | count 5
-; RUN: grep -w ilh  %t1.s | count 6
-; RUN: grep iohl    %t1.s | count 1
-; RUN: grep ilhu    %t1.s | count 4
-; RUN: grep shufb   %t1.s | count 27
-; RUN: grep 17219   %t1.s | count 1 
-; RUN: grep 22598   %t1.s | count 1
-; RUN: grep -- -39  %t1.s | count 1
-; RUN: grep    24   %t1.s | count 1
-; RUN: grep  1159   %t1.s | count 1
-; RUN: FileCheck %s < %t1.s
-
-; ModuleID = 'vecinsert.bc'
-target datalayout = "E-p:32:32:128-f64:64:128-f32:32:128-i64:32:128-i32:32:128-i16:16:128-i8:8:128-i1:8:128-a0:0:128-v128:128:128"
-target triple = "spu-unknown-elf"
-
-; 67 -> 0x43, as 8-bit vector constant load = 0x4343 (17219)0x4343
-define <16 x i8> @test_v16i8(<16 x i8> %P, i8 %x) {
-entry:
-        %tmp1 = insertelement <16 x i8> %P, i8 %x, i32 10
-        %tmp1.1 = insertelement <16 x i8> %tmp1, i8 67, i32 7
-        %tmp1.2 = insertelement <16 x i8> %tmp1.1, i8 %x, i32 15
-        ret <16 x i8> %tmp1.2
-}
-
-; 22598 -> 0x5846
-define <8 x i16> @test_v8i16(<8 x i16> %P, i16 %x) {
-entry:
-        %tmp1 = insertelement <8 x i16> %P, i16 %x, i32 5
-        %tmp1.1 = insertelement <8 x i16> %tmp1, i16 22598, i32 7
-        %tmp1.2 = insertelement <8 x i16> %tmp1.1, i16 %x, i32 2
-        ret <8 x i16> %tmp1.2
-}
-
-; 1574023 -> 0x180487 (ILHU 24/IOHL 1159)
-define <4 x i32> @test_v4i32_1(<4 x i32> %P, i32 %x) {
-entry:
-        %tmp1 = insertelement <4 x i32> %P, i32 %x, i32 2
-        %tmp1.1 = insertelement <4 x i32> %tmp1, i32 1574023, i32 1
-        %tmp1.2 = insertelement <4 x i32> %tmp1.1, i32 %x, i32 3
-        ret <4 x i32> %tmp1.2
-}
-
-; Should generate IL for the load
-define <4 x i32> @test_v4i32_2(<4 x i32> %P, i32 %x) {
-entry:
-        %tmp1 = insertelement <4 x i32> %P, i32 %x, i32 2
-        %tmp1.1 = insertelement <4 x i32> %tmp1, i32 -39, i32 1
-        %tmp1.2 = insertelement <4 x i32> %tmp1.1, i32 %x, i32 3
-        ret <4 x i32> %tmp1.2
-}
-
-define void @variable_v16i8_1(<16 x i8>* %a, i32 %i) nounwind {
-entry:
-	%arrayidx = getelementptr <16 x i8>* %a, i32 %i
-	%tmp2 = load <16 x i8>* %arrayidx
-	%tmp3 = insertelement <16 x i8> %tmp2, i8 1, i32 1
-	%tmp8 = insertelement <16 x i8> %tmp3, i8 2, i32 11
-	store <16 x i8> %tmp8, <16 x i8>* %arrayidx
-	ret void
-}
-
-define void @variable_v8i16_1(<8 x i16>* %a, i32 %i) nounwind {
-entry:
-	%arrayidx = getelementptr <8 x i16>* %a, i32 %i
-	%tmp2 = load <8 x i16>* %arrayidx
-	%tmp3 = insertelement <8 x i16> %tmp2, i16 1, i32 1
-	%tmp8 = insertelement <8 x i16> %tmp3, i16 2, i32 6
-	store <8 x i16> %tmp8, <8 x i16>* %arrayidx
-	ret void
-}
-
-define void @variable_v4i32_1(<4 x i32>* %a, i32 %i) nounwind {
-entry:
-	%arrayidx = getelementptr <4 x i32>* %a, i32 %i
-	%tmp2 = load <4 x i32>* %arrayidx
-	%tmp3 = insertelement <4 x i32> %tmp2, i32 1, i32 1
-	%tmp8 = insertelement <4 x i32> %tmp3, i32 2, i32 2
-	store <4 x i32> %tmp8, <4 x i32>* %arrayidx
-	ret void
-}
-
-define void @variable_v4f32_1(<4 x float>* %a, i32 %i) nounwind {
-entry:
-	%arrayidx = getelementptr <4 x float>* %a, i32 %i
-	%tmp2 = load <4 x float>* %arrayidx
-	%tmp3 = insertelement <4 x float> %tmp2, float 1.000000e+00, i32 1
-	%tmp8 = insertelement <4 x float> %tmp3, float 2.000000e+00, i32 2
-	store <4 x float> %tmp8, <4 x float>* %arrayidx
-	ret void
-}
-
-define void @variable_v2i64_1(<2 x i64>* %a, i32 %i) nounwind {
-entry:
-	%arrayidx = getelementptr <2 x i64>* %a, i32 %i
-	%tmp2 = load <2 x i64>* %arrayidx
-	%tmp3 = insertelement <2 x i64> %tmp2, i64 615, i32 0
-	store <2 x i64> %tmp3, <2 x i64>* %arrayidx
-	ret void
-}
-
-define void @variable_v2i64_2(<2 x i64>* %a, i32 %i) nounwind {
-entry:
-	%arrayidx = getelementptr <2 x i64>* %a, i32 %i
-	%tmp2 = load <2 x i64>* %arrayidx
-	%tmp3 = insertelement <2 x i64> %tmp2, i64 615, i32 1
-	store <2 x i64> %tmp3, <2 x i64>* %arrayidx
-	ret void
-}
-
-define void @variable_v2f64_1(<2 x double>* %a, i32 %i) nounwind {
-entry:
-	%arrayidx = getelementptr <2 x double>* %a, i32 %i
-	%tmp2 = load <2 x double>* %arrayidx
-	%tmp3 = insertelement <2 x double> %tmp2, double 1.000000e+00, i32 1
-	store <2 x double> %tmp3, <2 x double>* %arrayidx
-	ret void
-}
-
-define <4 x i32> @undef_v4i32( i32 %param ) {
-	;CHECK: cwd
-	;CHECK: lqa
-	;CHECK: shufb
-	%val = insertelement <4 x i32> <i32 1, i32 2, i32 3, i32 4>, i32 %param, i32 undef 
-	ret <4 x i32> %val
-}
-
diff --git a/test/CodeGen/Generic/2008-02-20-MatchingMem.ll b/test/CodeGen/Generic/2008-02-20-MatchingMem.ll
index da1aeb556a39..7ffb734c713a 100644
--- a/test/CodeGen/Generic/2008-02-20-MatchingMem.ll
+++ b/test/CodeGen/Generic/2008-02-20-MatchingMem.ll
@@ -1,5 +1,6 @@
 ; RUN: llc < %s
 ; PR1133
+; XFAIL: hexagon
 define void @test(i32* %X) nounwind  {
 entry:
 	%tmp1 = getelementptr i32* %X, i32 10		; <i32*> [#uses=2]
diff --git a/test/CodeGen/Generic/2013-03-20-APFloatCrash.ll b/test/CodeGen/Generic/2013-03-20-APFloatCrash.ll
new file mode 100644
index 000000000000..a1aed0e3a4b6
--- /dev/null
+++ b/test/CodeGen/Generic/2013-03-20-APFloatCrash.ll
@@ -0,0 +1,7 @@
+; RUN: llc < %s
+
+define internal i1 @f(float %s) {
+entry:
+  %c = fcmp ogt float %s, 0x41EFFFFFE0000000
+  ret i1 %c
+}
diff --git a/test/CodeGen/Generic/dag-combine-crash.ll b/test/CodeGen/Generic/dag-combine-crash.ll
new file mode 100644
index 000000000000..a7810b5c05e2
--- /dev/null
+++ b/test/CodeGen/Generic/dag-combine-crash.ll
@@ -0,0 +1,21 @@
+; RUN: llc < %s
+
+define void @main()  {
+if.end:
+  br label %block.i.i
+
+block.i.i:
+  %tmpbb = load i8* undef
+  %tmp54 = zext i8 %tmpbb to i64
+  %tmp59 = and i64 %tmp54, 8
+  %tmp60 = add i64 %tmp59, 3691045929300498764
+  %tmp62 = sub i64 %tmp60, 3456506383779105993
+  %tmp63 = xor i64 1050774804270620004, %tmp62
+  %tmp65 = xor i64 %tmp62, 234539545521392771
+  %tmp67 = or i64 %tmp65, %tmp63
+  %tmp71 = xor i64 %tmp67, 6781485823212740913
+  %tmp72 = trunc i64 %tmp71 to i32
+  %tmp74 = lshr i32 2, %tmp72
+  store i32 %tmp74, i32* undef
+  br label %block.i.i
+}
diff --git a/test/CodeGen/Generic/inline-asm-mem-clobber.ll b/test/CodeGen/Generic/inline-asm-mem-clobber.ll
new file mode 100644
index 000000000000..e523d031dc65
--- /dev/null
+++ b/test/CodeGen/Generic/inline-asm-mem-clobber.ll
@@ -0,0 +1,21 @@
+; RUN: llc -O2 < %s | FileCheck %s
+
+@G = common global i32 0, align 4
+
+define i32 @foo(i8* %p) nounwind uwtable {
+entry:
+  %p.addr = alloca i8*, align 8
+  %rv = alloca i32, align 4
+  store i8* %p, i8** %p.addr, align 8
+  store i32 0, i32* @G, align 4
+  %0 = load i8** %p.addr, align 8
+; CHECK: blah
+  %1 = call i32 asm "blah", "=r,r,~{memory}"(i8* %0) nounwind
+; CHECK: @G
+  store i32 %1, i32* %rv, align 4
+  %2 = load i32* %rv, align 4
+  %3 = load i32* @G, align 4
+  %add = add nsw i32 %2, %3
+  ret i32 %add
+}
+
diff --git a/test/CodeGen/Generic/multiple-return-values-cross-block-with-invoke.ll b/test/CodeGen/Generic/multiple-return-values-cross-block-with-invoke.ll
index e709080bfc5a..a135c625fccc 100644
--- a/test/CodeGen/Generic/multiple-return-values-cross-block-with-invoke.ll
+++ b/test/CodeGen/Generic/multiple-return-values-cross-block-with-invoke.ll
@@ -1,5 +1,5 @@
 ; RUN: llc < %s
-
+; XFAIL: hexagon
 declare { i64, double } @wild()
 
 define void @foo(i64* %p, double* %q) nounwind {
diff --git a/test/CodeGen/Generic/select-cc.ll b/test/CodeGen/Generic/select-cc.ll
index b653e2a46dcf..7510f701b147 100644
--- a/test/CodeGen/Generic/select-cc.ll
+++ b/test/CodeGen/Generic/select-cc.ll
@@ -1,6 +1,6 @@
 ; RUN: llc < %s
 ; PR2504
-
+; XFAIL: hexagon
 define <2 x double> @vector_select(<2 x double> %x, <2 x double> %y) nounwind  {
 	%x.lo = extractelement <2 x double> %x, i32 0		; <double> [#uses=1]
 	%x.lo.ge = fcmp oge double %x.lo, 0.000000e+00		; <i1> [#uses=1]
diff --git a/test/CodeGen/Generic/vector.ll b/test/CodeGen/Generic/vector.ll
index a0f9a02d4cbb..bc7c7d00a11c 100644
--- a/test/CodeGen/Generic/vector.ll
+++ b/test/CodeGen/Generic/vector.ll
@@ -1,6 +1,6 @@
 ; Test that vectors are scalarized/lowered correctly.
 ; RUN: llc < %s
-
+; XFAIL: hexagon
 
 %d8 = type <8 x double>
 %f1 = type <1 x float>
@@ -152,3 +152,8 @@ define void @splat_i4(%i4* %P, %i4* %Q, i32 %X) {
 	store %i4 %R, %i4* %P
 	ret void
 }
+
+define <2 x i32*> @vector_gep(<2 x [3 x {i32, i32}]*> %a) {
+    %w = getelementptr <2 x [3 x {i32, i32}]*> %a, <2 x i32> <i32 1, i32 2>, <2 x i32> <i32 2, i32 3>, <2 x i32> <i32 1, i32 1>
+      ret <2 x i32*> %w
+}
diff --git a/test/CodeGen/Hexagon/absaddr-store.ll b/test/CodeGen/Hexagon/absaddr-store.ll
new file mode 100644
index 000000000000..5c2554df8aeb
--- /dev/null
+++ b/test/CodeGen/Hexagon/absaddr-store.ll
@@ -0,0 +1,46 @@
+; RUN: llc -march=hexagon -mcpu=hexagonv4 < %s | FileCheck %s
+; Check that we generate load instructions with absolute addressing mode.
+
+@a = external global i32
+@b = external global i8
+@c = external global i16
+@d = external global i64
+
+define zeroext i8 @absStoreByte() nounwind {
+; CHECK: memb(##b){{ *}}={{ *}}r{{[0-9]+}}
+entry:
+  %0 = load i8* @b, align 1
+  %conv = zext i8 %0 to i32
+  %mul = mul nsw i32 100, %conv
+  %conv1 = trunc i32 %mul to i8
+  store i8 %conv1, i8* @b, align 1
+  ret i8 %conv1
+}
+
+define signext i16 @absStoreHalf() nounwind {
+; CHECK: memh(##c){{ *}}={{ *}}r{{[0-9]+}}
+entry:
+  %0 = load i16* @c, align 2
+  %conv = sext i16 %0 to i32
+  %mul = mul nsw i32 100, %conv
+  %conv1 = trunc i32 %mul to i16
+  store i16 %conv1, i16* @c, align 2
+  ret i16 %conv1
+}
+
+define i32 @absStoreWord() nounwind {
+; CHECK: memw(##a){{ *}}={{ *}}r{{[0-9]+}}
+entry:
+  %0 = load i32* @a, align 4
+  %mul = mul nsw i32 100, %0
+  store i32 %mul, i32* @a, align 4
+  ret i32 %mul
+}
+
+define void @absStoreDouble() nounwind {
+; CHECK: memd(##d){{ *}}={{ *}}r{{[0-9]+}}:{{[0-9]+}}
+entry:
+  store i64 100, i64* @d, align 8
+  ret void
+}
+
diff --git a/test/CodeGen/Hexagon/adde.ll b/test/CodeGen/Hexagon/adde.ll
new file mode 100644
index 000000000000..9cee3e215d62
--- /dev/null
+++ b/test/CodeGen/Hexagon/adde.ll
@@ -0,0 +1,34 @@
+; RUN: llc -march=hexagon < %s | FileCheck %s
+
+; CHECK: r{{[0-9]+:[0-9]+}} = #0
+; CHECK: r{{[0-9]+:[0-9]+}} = #1
+; CHECK: r{{[0-9]+:[0-9]+}} = add(r{{[0-9]+:[0-9]+}}, r{{[0-9]+:[0-9]+}})
+; CHECK: p{{[0-9]+}} = cmp.gtu(r{{[0-9]+:[0-9]+}}, r{{[0-9]+:[0-9]+}})
+; CHECK: p{{[0-9]+}} = cmp.gtu(r{{[0-9]+:[0-9]+}}, r{{[0-9]+:[0-9]+}})
+; CHECK: r{{[0-9]+}} = mux(p{{[0-9]+}}, r{{[0-9]+}}, r{{[0-9]+}})
+; CHECK: r{{[0-9]+}} = mux(p{{[0-9]+}}, r{{[0-9]+}}, r{{[0-9]+}})
+; CHECK: r{{[0-9]+:[0-9]+}} = combine(r{{[0-9]+}}, r{{[0-9]+}})
+; CHECK: r{{[0-9]+}} = mux(p{{[0-9]+}}, r{{[0-9]+}}, r{{[0-9]+}})
+; CHECK: r{{[0-9]+}} = mux(p{{[0-9]+}}, r{{[0-9]+}}, r{{[0-9]+}})
+; CHECK: r{{[0-9]+:[0-9]+}} = combine(r{{[0-9]+}}, r{{[0-9]+}})
+; CHECK: r{{[0-9]+:[0-9]+}} = add(r{{[0-9]+:[0-9]+}}, r{{[0-9]+:[0-9]+}})
+
+
+define void @check_adde_addc (i64 %AL, i64 %AH, i64 %BL, i64 %BH, i64* %RL, i64* %RH) {
+entry:
+        %tmp1 = zext i64 %AL to i128
+        %tmp23 = zext i64 %AH to i128
+        %tmp4 = shl i128 %tmp23, 64
+        %tmp5 = or i128 %tmp4, %tmp1
+        %tmp67 = zext i64 %BL to i128
+        %tmp89 = zext i64 %BH to i128
+        %tmp11 = shl i128 %tmp89, 64
+        %tmp12 = or i128 %tmp11, %tmp67
+        %tmp15 = add i128 %tmp12, %tmp5
+        %tmp1617 = trunc i128 %tmp15 to i64
+        store i64 %tmp1617, i64* %RL
+        %tmp21 = lshr i128 %tmp15, 64
+        %tmp2122 = trunc i128 %tmp21 to i64
+        store i64 %tmp2122, i64* %RH
+        ret void
+}
+\ No newline at end of file
diff --git a/test/CodeGen/Hexagon/args.ll b/test/CodeGen/Hexagon/args.ll
index 8a6efb620ec0..f8c9e44c831d 100644
--- a/test/CodeGen/Hexagon/args.ll
+++ b/test/CodeGen/Hexagon/args.ll
@@ -1,12 +1,11 @@
-; RUN: llc -march=hexagon -mcpu=hexagonv4 -disable-hexagon-misched < %s | FileCheck %s
-; CHECK: r[[T0:[0-9]+]] = #7
-; CHECK: memw(r29 + #0) = r[[T0]]
-; CHECK: r5 = #6
+; RUN: llc -march=hexagon -mcpu=hexagonv4 -disable-dfa-sched -disable-hexagon-misched < %s | FileCheck %s
+; CHECK: memw(r29{{ *}}+{{ *}}#0){{ *}}={{ *}}#7
 ; CHECK: r0 = #1
 ; CHECK: r1 = #2
 ; CHECK: r2 = #3
 ; CHECK: r3 = #4
 ; CHECK: r4 = #5
+; CHECK: r5 = #6
 
 
 define void @foo() nounwind {
diff --git a/test/CodeGen/Hexagon/ashift-left-right.ll b/test/CodeGen/Hexagon/ashift-left-right.ll
new file mode 100644
index 000000000000..7c41bc7bbf3b
--- /dev/null
+++ b/test/CodeGen/Hexagon/ashift-left-right.ll
@@ -0,0 +1,21 @@
+; RUN: llc -march=hexagon -mcpu=hexagonv4 < %s | FileCheck %s
+
+define i32 @foo(i32 %a, i32 %b) nounwind readnone {
+; CHECK: lsl
+; CHECK: aslh
+entry:
+  %shl1 = shl i32 16, %a
+  %shl2 = shl i32 %b, 16
+  %ret = mul i32 %shl1, %shl2
+  ret i32 %ret
+}
+
+define i32 @bar(i32 %a, i32 %b) nounwind readnone {
+; CHECK: asrh
+; CHECK: lsr
+entry:
+  %shl1 = ashr i32 16, %a
+  %shl2 = ashr i32 %b, 16
+  %ret = mul i32 %shl1, %shl2
+  ret i32 %ret
+}
diff --git a/test/CodeGen/Hexagon/block-addr.ll b/test/CodeGen/Hexagon/block-addr.ll
new file mode 100644
index 000000000000..54a12bf48448
--- /dev/null
+++ b/test/CodeGen/Hexagon/block-addr.ll
@@ -0,0 +1,64 @@
+; RUN: llc -march=hexagon < %s | FileCheck %s
+
+; CHECK: r{{[0-9]+}} = CONST32(#.LJTI{{[0-9]+_[0-9]+}})
+; CHECK: r{{[0-9]+}} = memw(r{{[0-9]+}}+r{{[0-9]+<<#[0-9]+}})
+; CHECK: jumpr r{{[0-9]+}}
+
+define void @main() #0 {
+entry:
+  %ret = alloca i32, align 4
+  br label %while.body
+
+while.body:
+  %ret.0.load17 = load volatile i32* %ret, align 4
+  switch i32 %ret.0.load17, label %label6 [
+    i32 0, label %label0
+    i32 1, label %label1
+    i32 2, label %label2
+    i32 3, label %label3
+    i32 4, label %label4
+    i32 5, label %label5
+  ]
+
+label0:
+  %ret.0.load18 = load volatile i32* %ret, align 4
+  %inc = add nsw i32 %ret.0.load18, 1
+  store volatile i32 %inc, i32* %ret, align 4
+  br label %while.body
+
+label1:
+  %ret.0.load19 = load volatile i32* %ret, align 4
+  %inc2 = add nsw i32 %ret.0.load19, 1
+  store volatile i32 %inc2, i32* %ret, align 4
+  br label %while.body
+
+label2:
+  %ret.0.load20 = load volatile i32* %ret, align 4
+  %inc4 = add nsw i32 %ret.0.load20, 1
+  store volatile i32 %inc4, i32* %ret, align 4
+  br label %while.body
+
+label3:
+  %ret.0.load21 = load volatile i32* %ret, align 4
+  %inc6 = add nsw i32 %ret.0.load21, 1
+  store volatile i32 %inc6, i32* %ret, align 4
+  br label %while.body
+
+label4:
+  %ret.0.load22 = load volatile i32* %ret, align 4
+  %inc8 = add nsw i32 %ret.0.load22, 1
+  store volatile i32 %inc8, i32* %ret, align 4
+  br label %while.body
+
+label5:
+  %ret.0.load23 = load volatile i32* %ret, align 4
+  %inc10 = add nsw i32 %ret.0.load23, 1
+  store volatile i32 %inc10, i32* %ret, align 4
+  br label %while.body
+
+label6:
+  store volatile i32 0, i32* %ret, align 4
+  br label %while.body
+}
+
+attributes #0 = { noreturn nounwind "target-cpu"="hexagonv4" }
diff --git a/test/CodeGen/Hexagon/cext-check.ll b/test/CodeGen/Hexagon/cext-check.ll
new file mode 100644
index 000000000000..7c4b19e5a402
--- /dev/null
+++ b/test/CodeGen/Hexagon/cext-check.ll
@@ -0,0 +1,57 @@
+; RUN: llc -march=hexagon -mcpu=hexagonv4 < %s | FileCheck %s
+; Check that we constant extended instructions only when necessary.
+
+define i32 @cext_test1(i32* %a) nounwind {
+; CHECK: r{{[0-9]+}}{{ *}}={{ *}}memw(r{{[0-9]+}}+##8000)
+; CHECK: r{{[0-9]+}}{{ *}}={{ *}}add(r{{[0-9]+}}{{ *}},{{ *}}##300000)
+; CHECK-NOT: r{{[0-9]+}}{{ *}}={{ *}}memw(r{{[0-9]+}}+##4092)
+; CHECK-NOT: r{{[0-9]+}}{{ *}}={{ *}}add(r{{[0-9]+}}{{ *}},{{ *}}##300)
+entry:
+  %0 = load i32* %a, align 4
+  %tobool = icmp ne i32 %0, 0
+  br i1 %tobool, label %if.then, label %if.end
+
+if.then:
+  %arrayidx1 = getelementptr inbounds i32* %a, i32 2000
+  %1 = load i32* %arrayidx1, align 4
+  %add = add nsw i32 %1, 300000
+  br label %return
+
+if.end:
+  %arrayidx2 = getelementptr inbounds i32* %a, i32 1023
+  %2 = load i32* %arrayidx2, align 4
+  %add3 = add nsw i32 %2, 300
+  br label %return
+
+return:
+  %retval.0 = phi i32 [ %add, %if.then ], [ %add3, %if.end ]
+  ret i32 %retval.0
+}
+
+define i32 @cext_test2(i8* %a) nounwind {
+; CHECK-NOT: r{{[0-9]+}}{{ *}}={{ *}}memub(r{{[0-9]+}}+##1023)
+; CHECK: r{{[0-9]+}}{{ *}}={{ *}}add(r{{[0-9]+}}{{ *}},{{ *}}##300000)
+; CHECK: r{{[0-9]+}}{{ *}}={{ *}}memub(r{{[0-9]+}}+##1024)
+; CHECK-NOT: r{{[0-9]+}}{{ *}}={{ *}}add(r{{[0-9]+}}{{ *}},{{ *}}##6000)
+entry:
+  %tobool = icmp ne i8* %a, null
+  br i1 %tobool, label %if.then, label %if.end
+
+if.then:
+  %arrayidx = getelementptr inbounds i8* %a, i32 1023
+  %0 = load i8* %arrayidx, align 1
+  %conv = zext i8 %0 to i32
+  %add = add nsw i32 %conv, 300000
+  br label %return
+
+if.end:
+  %arrayidx1 = getelementptr inbounds i8* %a, i32 1024
+  %1 = load i8* %arrayidx1, align 1
+  %conv2 = zext i8 %1 to i32
+  %add3 = add nsw i32 %conv2, 6000
+  br label %return
+
+return:
+  %retval.0 = phi i32 [ %add, %if.then ], [ %add3, %if.end ]
+  ret i32 %retval.0
+}
diff --git a/test/CodeGen/Hexagon/cext-valid-packet1.ll b/test/CodeGen/Hexagon/cext-valid-packet1.ll
new file mode 100644
index 000000000000..a479d37e4ae5
--- /dev/null
+++ b/test/CodeGen/Hexagon/cext-valid-packet1.ll
@@ -0,0 +1,18 @@
+; RUN: llc -march=hexagon -mcpu=hexagonv4 < %s | FileCheck %s
+
+; Check that the packetizer generates valid packets with constant
+; extended instructions.
+; CHECK: {
+; CHECK-NEXT: r{{[0-9]+}}{{ *}}={{ *}}add(r{{[0-9]+}}, ##{{[0-9]+}})
+; CHECK-NEXT: r{{[0-9]+}}{{ *}}={{ *}}add(r{{[0-9]+}}, ##{{[0-9]+}})
+; CHECK-NEXT: }
+
+define i32 @check-packet1(i32 %a, i32 %b, i32 %c) nounwind readnone {
+entry:
+  %add = add nsw i32 %a, 200000
+  %add1 = add nsw i32 %b, 200001
+  %add2 = add nsw i32 %c, 200002
+  %cmp = icmp sgt i32 %add, %add1
+  %b.addr.0 = select i1 %cmp, i32 %add1, i32 %add2
+  ret i32 %b.addr.0
+}
diff --git a/test/CodeGen/Hexagon/cext-valid-packet2.ll b/test/CodeGen/Hexagon/cext-valid-packet2.ll
new file mode 100644
index 000000000000..2788a6b1c865
--- /dev/null
+++ b/test/CodeGen/Hexagon/cext-valid-packet2.ll
@@ -0,0 +1,43 @@
+; RUN: llc -march=hexagon -mcpu=hexagonv4 < %s | FileCheck %s
+; Check that the packetizer generates valid packets with constant
+; extended add and base+offset store instructions.
+
+; CHECK: {
+; CHECK-NEXT: r{{[0-9]+}}{{ *}}={{ *}}add(r{{[0-9]+}}, ##{{[0-9]+}})
+; CHECK-NEXT: memw(r{{[0-9]+}}+{{ *}}##{{[0-9]+}}){{ *}}={{ *}}r{{[0-9]+}}.new
+; CHECK-NEXT: }
+
+define i32 @test(i32* nocapture %a, i32* nocapture %b, i32 %c) nounwind {
+entry:
+  %add = add nsw i32 %c, 200002
+  %0 = load i32* %a, align 4
+  %add1 = add nsw i32 %0, 200000
+  %arrayidx2 = getelementptr inbounds i32* %a, i32 3000
+  store i32 %add1, i32* %arrayidx2, align 4
+  %1 = load i32* %b, align 4
+  %add4 = add nsw i32 %1, 200001
+  %arrayidx5 = getelementptr inbounds i32* %a, i32 1
+  store i32 %add4, i32* %arrayidx5, align 4
+  %arrayidx7 = getelementptr inbounds i32* %b, i32 1
+  %2 = load i32* %arrayidx7, align 4
+  %cmp = icmp sgt i32 %add4, %2
+  br i1 %cmp, label %if.then, label %if.else
+
+if.then:                                          ; preds = %entry
+  %arrayidx8 = getelementptr inbounds i32* %a, i32 2
+  %3 = load i32* %arrayidx8, align 4
+  %arrayidx9 = getelementptr inbounds i32* %b, i32 2000
+  %4 = load i32* %arrayidx9, align 4
+  %sub = sub nsw i32 %3, %4
+  %arrayidx10 = getelementptr inbounds i32* %a, i32 4000
+  store i32 %sub, i32* %arrayidx10, align 4
+  br label %if.end
+
+if.else:                                          ; preds = %entry
+  %arrayidx11 = getelementptr inbounds i32* %b, i32 3200
+  store i32 %add, i32* %arrayidx11, align 4
+  br label %if.end
+
+if.end:                                           ; preds = %if.else, %if.then
+  ret i32 %add
+}
diff --git a/test/CodeGen/Hexagon/cmp-to-genreg.ll b/test/CodeGen/Hexagon/cmp-to-genreg.ll
new file mode 100644
index 000000000000..97cf51ce1a2b
--- /dev/null
+++ b/test/CodeGen/Hexagon/cmp-to-genreg.ll
@@ -0,0 +1,34 @@
+; RUN: llc -march=hexagon -mcpu=hexagonv4 < %s | FileCheck %s
+; Check that we generate compare to general register.
+
+define i32 @compare1(i32 %a) nounwind {
+; CHECK: r{{[0-9]+}}{{ *}}={{ *}}cmp.eq(r{{[0-9]+}},{{ *}}#120)
+entry:
+  %cmp = icmp eq i32 %a, 120
+  %conv = zext i1 %cmp to i32
+  ret i32 %conv
+}
+
+define i32 @compare2(i32 %a) nounwind readnone {
+; CHECK: r{{[0-9]+}}{{ *}}={{ *}}!cmp.eq(r{{[0-9]+}},{{ *}}#120)
+entry:
+  %cmp = icmp ne i32 %a, 120
+  %conv = zext i1 %cmp to i32
+  ret i32 %conv
+}
+
+define i32 @compare3(i32 %a, i32 %b) nounwind readnone {
+; CHECK: r{{[0-9]+}}{{ *}}={{ *}}cmp.eq(r{{[0-9]+}},{{ *}}r{{[0-9]+}})
+entry:
+  %cmp = icmp eq i32 %a, %b
+  %conv = zext i1 %cmp to i32
+  ret i32 %conv
+}
+
+define i32 @compare4(i32 %a, i32 %b) nounwind readnone {
+; CHECK: r{{[0-9]+}}{{ *}}={{ *}}!cmp.eq(r{{[0-9]+}},{{ *}}r{{[0-9]+}})
+entry:
+  %cmp = icmp ne i32 %a, %b
+  %conv = zext i1 %cmp to i32
+  ret i32 %conv
+}
diff --git a/test/CodeGen/Hexagon/cmp-to-predreg.ll b/test/CodeGen/Hexagon/cmp-to-predreg.ll
new file mode 100644
index 000000000000..d430b901866d
--- /dev/null
+++ b/test/CodeGen/Hexagon/cmp-to-predreg.ll
@@ -0,0 +1,43 @@
+; RUN: llc -march=hexagon -mcpu=hexagonv4 < %s | FileCheck %s
+; Check that we generate compare to predicate register.
+
+define i32 @compare1(i32 %a, i32 %b) nounwind {
+; CHECK: p{{[0-3]}}{{ *}}={{ *}}!cmp.eq(r{{[0-9]+}},{{ *}}r{{[0-9]+}})
+entry:
+  %cmp = icmp ne i32 %a, %b
+  %add = add nsw i32 %a, %b
+  %sub = sub nsw i32 %a, %b
+  %add.sub = select i1 %cmp, i32 %add, i32 %sub
+  ret i32 %add.sub
+}
+
+define i32 @compare2(i32 %a) nounwind {
+; CHECK: p{{[0-3]}}{{ *}}={{ *}}!cmp.eq(r{{[0-9]+}},{{ *}}#10)
+entry:
+  %cmp = icmp ne i32 %a, 10
+  %add = add nsw i32 %a, 10
+  %sub = sub nsw i32 %a, 10
+  %add.sub = select i1 %cmp, i32 %add, i32 %sub
+  ret i32 %add.sub
+}
+
+define i32 @compare3(i32 %a, i32 %b) nounwind {
+; CHECK: p{{[0-3]}}{{ *}}={{ *}}cmp.gt(r{{[0-9]+}},{{ *}}r{{[0-9]+}})
+entry:
+  %cmp = icmp sgt i32 %a, %b
+  %sub = sub nsw i32 %a, %b
+  %add = add nsw i32 %a, %b
+  %sub.add = select i1 %cmp, i32 %sub, i32 %add
+  ret i32 %sub.add
+}
+
+define i32 @compare4(i32 %a) nounwind {
+; CHECK: p{{[0-3]}}{{ *}}={{ *}}cmp.gt(r{{[0-9]+}},{{ *}}#10)
+entry:
+  %cmp = icmp sgt i32 %a, 10
+  %sub = sub nsw i32 %a, 10
+  %add = add nsw i32 %a, 10
+  %sub.add = select i1 %cmp, i32 %sub, i32 %add
+  ret i32 %sub.add
+}
+
diff --git a/test/CodeGen/Hexagon/cmp_pred.ll b/test/CodeGen/Hexagon/cmp_pred.ll
new file mode 100644
index 000000000000..37db3b499f63
--- /dev/null
+++ b/test/CodeGen/Hexagon/cmp_pred.ll
@@ -0,0 +1,115 @@
+; RUN: llc -march=hexagon -mcpu=hexagonv5  < %s | FileCheck %s
+; Generate various cmpb instruction followed by if (p0) .. if (!p0)...
+target triple = "hexagon"
+
+define i32 @Func_3Ugt(i32 %Enum_Par_Val, i32 %pv2) nounwind readnone {
+entry:
+; CHECK-NOT: mux
+  %cmp = icmp ugt i32 %Enum_Par_Val, %pv2
+  %selv = zext i1 %cmp to i32
+  ret i32 %selv
+}
+
+define i32 @Func_3Uge(i32 %Enum_Par_Val, i32 %pv2) nounwind readnone {
+entry:
+; CHECK-NOT: mux
+  %cmp = icmp uge i32 %Enum_Par_Val, %pv2
+  %selv = zext i1 %cmp to i32
+  ret i32 %selv
+}
+
+define i32 @Func_3Ult(i32 %Enum_Par_Val, i32 %pv2) nounwind readnone {
+entry:
+; CHECK-NOT: mux
+  %cmp = icmp ult i32 %Enum_Par_Val, %pv2
+  %selv = zext i1 %cmp to i32
+  ret i32 %selv
+}
+
+define i32 @Func_3Ule(i32 %Enum_Par_Val, i32 %pv2) nounwind readnone {
+entry:
+; CHECK-NOT: mux
+  %cmp = icmp ule i32 %Enum_Par_Val, %pv2
+  %selv = zext i1 %cmp to i32
+  ret i32 %selv
+}
+
+define i32 @Func_3Ueq(i32 %Enum_Par_Val, i32 %pv2) nounwind readnone {
+entry:
+; CHECK-NOT: mux
+  %cmp = icmp eq i32 %Enum_Par_Val, %pv2
+  %selv = zext i1 %cmp to i32
+  ret i32 %selv
+}
+
+define i32 @Func_3Une(i32 %Enum_Par_Val, i32 %pv2) nounwind readnone {
+entry:
+; CHECK-NOT: mux
+  %cmp = icmp ne i32 %Enum_Par_Val, %pv2
+  %selv = zext i1 %cmp to i32
+  ret i32 %selv
+}
+
+define i32 @Func_3UneC(i32 %Enum_Par_Val) nounwind readnone {
+entry:
+; CHECK-NOT: mux
+  %cmp = icmp ne i32 %Enum_Par_Val, 122
+  %selv = zext i1 %cmp to i32
+  ret i32 %selv
+}
+
+define i32 @Func_3gt(i32 %Enum_Par_Val, i32 %pv2) nounwind readnone {
+entry:
+; CHECK: mux
+  %cmp = icmp sgt i32 %Enum_Par_Val, %pv2
+  %selv = zext i1 %cmp to i32
+  ret i32 %selv
+}
+
+define i32 @Func_3ge(i32 %Enum_Par_Val, i32 %pv2) nounwind readnone {
+entry:
+; CHECK-NOT: mux
+  %cmp = icmp sge i32 %Enum_Par_Val, %pv2
+  %selv = zext i1 %cmp to i32
+  ret i32 %selv
+}
+
+define i32 @Func_3lt(i32 %Enum_Par_Val, i32 %pv2) nounwind readnone {
+entry:
+; CHECK-NOT: mux
+  %cmp = icmp slt i32 %Enum_Par_Val, %pv2
+  %selv = zext i1 %cmp to i32
+  ret i32 %selv
+}
+
+define i32 @Func_3le(i32 %Enum_Par_Val, i32 %pv2) nounwind readnone {
+entry:
+; CHECK-NOT: mux
+  %cmp = icmp sle i32 %Enum_Par_Val, %pv2
+  %selv = zext i1 %cmp to i32
+  ret i32 %selv
+}
+
+define i32 @Func_3eq(i32 %Enum_Par_Val, i32 %pv2) nounwind readnone {
+entry:
+; CHECK-NOT: mux
+  %cmp = icmp eq i32 %Enum_Par_Val, %pv2
+  %selv = zext i1 %cmp to i32
+  ret i32 %selv
+}
+
+define i32 @Func_3ne(i32 %Enum_Par_Val, i32 %pv2) nounwind readnone {
+entry:
+; CHECK-NOT: mux
+  %cmp = icmp ne i32 %Enum_Par_Val, %pv2
+  %selv = zext i1 %cmp to i32
+  ret i32 %selv
+}
+
+define i32 @Func_3neC(i32 %Enum_Par_Val) nounwind readnone {
+entry:
+; CHECK-NOT: mux
+  %cmp = icmp ne i32 %Enum_Par_Val, 122
+  %selv = zext i1 %cmp to i32
+  ret i32 %selv
+}
diff --git a/test/CodeGen/Hexagon/cmp_pred_reg.ll b/test/CodeGen/Hexagon/cmp_pred_reg.ll
new file mode 100644
index 000000000000..37db3b499f63
--- /dev/null
+++ b/test/CodeGen/Hexagon/cmp_pred_reg.ll
@@ -0,0 +1,115 @@
+; RUN: llc -march=hexagon -mcpu=hexagonv5  < %s | FileCheck %s
+; Generate various cmpb instruction followed by if (p0) .. if (!p0)...
+target triple = "hexagon"
+
+define i32 @Func_3Ugt(i32 %Enum_Par_Val, i32 %pv2) nounwind readnone {
+entry:
+; CHECK-NOT: mux
+  %cmp = icmp ugt i32 %Enum_Par_Val, %pv2
+  %selv = zext i1 %cmp to i32
+  ret i32 %selv
+}
+
+define i32 @Func_3Uge(i32 %Enum_Par_Val, i32 %pv2) nounwind readnone {
+entry:
+; CHECK-NOT: mux
+  %cmp = icmp uge i32 %Enum_Par_Val, %pv2
+  %selv = zext i1 %cmp to i32
+  ret i32 %selv
+}
+
+define i32 @Func_3Ult(i32 %Enum_Par_Val, i32 %pv2) nounwind readnone {
+entry:
+; CHECK-NOT: mux
+  %cmp = icmp ult i32 %Enum_Par_Val, %pv2
+  %selv = zext i1 %cmp to i32
+  ret i32 %selv
+}
+
+define i32 @Func_3Ule(i32 %Enum_Par_Val, i32 %pv2) nounwind readnone {
+entry:
+; CHECK-NOT: mux
+  %cmp = icmp ule i32 %Enum_Par_Val, %pv2
+  %selv = zext i1 %cmp to i32
+  ret i32 %selv
+}
+
+define i32 @Func_3Ueq(i32 %Enum_Par_Val, i32 %pv2) nounwind readnone {
+entry:
+; CHECK-NOT: mux
+  %cmp = icmp eq i32 %Enum_Par_Val, %pv2
+  %selv = zext i1 %cmp to i32
+  ret i32 %selv
+}
+
+define i32 @Func_3Une(i32 %Enum_Par_Val, i32 %pv2) nounwind readnone {
+entry:
+; CHECK-NOT: mux
+  %cmp = icmp ne i32 %Enum_Par_Val, %pv2
+  %selv = zext i1 %cmp to i32
+  ret i32 %selv
+}
+
+define i32 @Func_3UneC(i32 %Enum_Par_Val) nounwind readnone {
+entry:
+; CHECK-NOT: mux
+  %cmp = icmp ne i32 %Enum_Par_Val, 122
+  %selv = zext i1 %cmp to i32
+  ret i32 %selv
+}
+
+define i32 @Func_3gt(i32 %Enum_Par_Val, i32 %pv2) nounwind readnone {
+entry:
+; CHECK: mux
+  %cmp = icmp sgt i32 %Enum_Par_Val, %pv2
+  %selv = zext i1 %cmp to i32
+  ret i32 %selv
+}
+
+define i32 @Func_3ge(i32 %Enum_Par_Val, i32 %pv2) nounwind readnone {
+entry:
+; CHECK-NOT: mux
+  %cmp = icmp sge i32 %Enum_Par_Val, %pv2
+  %selv = zext i1 %cmp to i32
+  ret i32 %selv
+}
+
+define i32 @Func_3lt(i32 %Enum_Par_Val, i32 %pv2) nounwind readnone {
+entry:
+; CHECK-NOT: mux
+  %cmp = icmp slt i32 %Enum_Par_Val, %pv2
+  %selv = zext i1 %cmp to i32
+  ret i32 %selv
+}
+
+define i32 @Func_3le(i32 %Enum_Par_Val, i32 %pv2) nounwind readnone {
+entry:
+; CHECK-NOT: mux
+  %cmp = icmp sle i32 %Enum_Par_Val, %pv2
+  %selv = zext i1 %cmp to i32
+  ret i32 %selv
+}
+
+define i32 @Func_3eq(i32 %Enum_Par_Val, i32 %pv2) nounwind readnone {
+entry:
+; CHECK-NOT: mux
+  %cmp = icmp eq i32 %Enum_Par_Val, %pv2
+  %selv = zext i1 %cmp to i32
+  ret i32 %selv
+}
+
+define i32 @Func_3ne(i32 %Enum_Par_Val, i32 %pv2) nounwind readnone {
+entry:
+; CHECK-NOT: mux
+  %cmp = icmp ne i32 %Enum_Par_Val, %pv2
+  %selv = zext i1 %cmp to i32
+  ret i32 %selv
+}
+
+define i32 @Func_3neC(i32 %Enum_Par_Val) nounwind readnone {
+entry:
+; CHECK-NOT: mux
+  %cmp = icmp ne i32 %Enum_Par_Val, 122
+  %selv = zext i1 %cmp to i32
+  ret i32 %selv
+}
diff --git a/test/CodeGen/Hexagon/cmpb_pred.ll b/test/CodeGen/Hexagon/cmpb_pred.ll
new file mode 100644
index 000000000000..1e6144701fee
--- /dev/null
+++ b/test/CodeGen/Hexagon/cmpb_pred.ll
@@ -0,0 +1,92 @@
+; RUN: llc -march=hexagon -mcpu=hexagonv5  < %s | FileCheck %s
+; Generate various cmpb instruction followed by if (p0) .. if (!p0)...
+target triple = "hexagon"
+
+@Enum_global = external global i8
+
+define i32 @Func_3(i32) nounwind readnone {
+entry:
+; CHECK-NOT: mux
+  %conv = and i32 %0, 255
+  %cmp = icmp eq i32 %conv, 2
+  %selv = zext i1 %cmp to i32
+  ret i32 %selv
+}
+
+define i32 @Func_3b(i32) nounwind readonly {
+entry:
+; CHECK-NOT: mux
+  %1 = load i8* @Enum_global, align 1, !tbaa !0
+  %2 = trunc i32 %0 to i8
+  %cmp = icmp ne i8 %1, %2
+  %selv = zext i1 %cmp to i32
+  ret i32 %selv
+}
+
+define i32 @Func_3c(i32) nounwind readnone {
+entry:
+; CHECK-NOT: mux
+  %conv = and i32 %0, 255
+  %cmp = icmp eq i32 %conv, 2
+  %selv = zext i1 %cmp to i32
+  ret i32 %selv
+}
+
+define i32 @Func_3d(i32) nounwind readonly {
+entry:
+; CHECK-NOT: mux
+  %1 = load i8* @Enum_global, align 1, !tbaa !0
+  %2 = trunc i32 %0 to i8
+  %cmp = icmp eq i8 %1, %2
+  %selv = zext i1 %cmp to i32
+  ret i32 %selv
+}
+
+define i32 @Func_3e(i32) nounwind readonly {
+entry:
+; CHECK-NOT: mux
+  %1 = load i8* @Enum_global, align 1, !tbaa !0
+  %2 = trunc i32 %0 to i8
+  %cmp = icmp eq i8 %1, %2
+  %selv = zext i1 %cmp to i32
+  ret i32 %selv
+}
+
+define i32 @Func_3f(i32) nounwind readnone {
+entry:
+; CHECK-NOT: mux
+  %conv = and i32 %0, 255
+  %cmp = icmp ugt i32 %conv, 2
+  %selv = zext i1 %cmp to i32
+  ret i32 %selv
+}
+
+define i32 @Func_3g(i32) nounwind readnone {
+entry:
+; CHECK: mux
+  %conv = and i32 %0, 255
+  %cmp = icmp ult i32 %conv, 3
+  %selv = zext i1 %cmp to i32
+  ret i32 %selv
+}
+
+define i32 @Func_3h(i32) nounwind readnone {
+entry:
+; CHECK-NOT: mux
+  %conv = and i32 %0, 254
+  %cmp = icmp ult i32 %conv, 2
+  %selv = zext i1 %cmp to i32
+  ret i32 %selv
+}
+
+define i32 @Func_3i(i32) nounwind readnone {
+entry:
+; CHECK-NOT: mux
+  %conv = and i32 %0, 254
+  %cmp = icmp ugt i32 %conv, 1
+  %selv = zext i1 %cmp to i32
+  ret i32 %selv
+}
+
+!0 = metadata !{metadata !"omnipotent char", metadata !1}
+!1 = metadata !{metadata !"Simple C/C++ TBAA"}
diff --git a/test/CodeGen/Hexagon/combine_ir.ll b/test/CodeGen/Hexagon/combine_ir.ll
new file mode 100644
index 000000000000..921ce9928e6d
--- /dev/null
+++ b/test/CodeGen/Hexagon/combine_ir.ll
@@ -0,0 +1,55 @@
+; RUN: llc -march=hexagon -mcpu=hexagonv4 < %s | FileCheck %s
+; CHECK: word
+; CHECK: combine(#0
+
+define void @word(i32* nocapture %a) nounwind {
+entry:
+  %0 = load i32* %a, align 4, !tbaa !0
+  %1 = zext i32 %0 to i64
+  %add.ptr = getelementptr inbounds i32* %a, i32 1
+  %2 = load i32* %add.ptr, align 4, !tbaa !0
+  %3 = zext i32 %2 to i64
+  %4 = shl nuw i64 %3, 32
+  %ins = or i64 %4, %1
+  tail call void @bar(i64 %ins) nounwind
+  ret void
+}
+
+declare void @bar(i64)
+
+; CHECK: halfword
+; CHECK: combine(#0
+
+define void @halfword(i16* nocapture %a) nounwind {
+entry:
+  %0 = load i16* %a, align 2, !tbaa !3
+  %1 = zext i16 %0 to i64
+  %add.ptr = getelementptr inbounds i16* %a, i32 1
+  %2 = load i16* %add.ptr, align 2, !tbaa !3
+  %3 = zext i16 %2 to i64
+  %4 = shl nuw nsw i64 %3, 16
+  %ins = or i64 %4, %1
+  tail call void @bar(i64 %ins) nounwind
+  ret void
+}
+
+; CHECK: byte
+; CHECK: combine(#0
+
+define void @byte(i8* nocapture %a) nounwind {
+entry:
+  %0 = load i8* %a, align 1, !tbaa !1
+  %1 = zext i8 %0 to i64
+  %add.ptr = getelementptr inbounds i8* %a, i32 1
+  %2 = load i8* %add.ptr, align 1, !tbaa !1
+  %3 = zext i8 %2 to i64
+  %4 = shl nuw nsw i64 %3, 8
+  %ins = or i64 %4, %1
+  tail call void @bar(i64 %ins) nounwind
+  ret void
+}
+
+!0 = metadata !{metadata !"int", metadata !1}
+!1 = metadata !{metadata !"omnipotent char", metadata !2}
+!2 = metadata !{metadata !"Simple C/C++ TBAA"}
+!3 = metadata !{metadata !"short", metadata !1}
diff --git a/test/CodeGen/Hexagon/ctlz-cttz-ctpop.ll b/test/CodeGen/Hexagon/ctlz-cttz-ctpop.ll
new file mode 100644
index 000000000000..e942f8d0c5dd
--- /dev/null
+++ b/test/CodeGen/Hexagon/ctlz-cttz-ctpop.ll
@@ -0,0 +1,34 @@
+; RUN: llc -march=hexagon -mcpu=hexagonv4 < %s | FileCheck %s
+
+; CHECK: r{{[0-9]+}}:{{[0-9]+}} |= lsr(r{{[0-9]+}}:{{[0-9]+}}, #4)
+; CHECK: r{{[0-9]+}}:{{[0-9]+}} &= lsr(r{{[0-9]+}}:{{[0-9]+}}, #2)
+; CHECK: r{{[0-9]+}} += lsr(r{{[0-9]+}}, #4)
+
+define i32 @foo(i64 %a, i32 %b) nounwind  {
+entry:
+        %tmp0 = tail call i64 @llvm.ctlz.i64( i64 %a, i1 true )
+        %tmp1 = tail call i64 @llvm.cttz.i64( i64 %a, i1 true )
+        %tmp2 = tail call i32 @llvm.ctlz.i32( i32 %b, i1 true )
+        %tmp3 = tail call i32 @llvm.cttz.i32( i32 %b, i1 true )
+        %tmp4 = tail call i64 @llvm.ctpop.i64( i64 %a )
+        %tmp5 = tail call i32 @llvm.ctpop.i32( i32 %b )
+
+
+        %tmp6 = trunc i64 %tmp0 to i32
+        %tmp7 = trunc i64 %tmp1 to i32
+        %tmp8 = trunc i64 %tmp4 to i32
+        %tmp9 = add i32 %tmp6, %tmp7
+        %tmp10 = add i32 %tmp9, %tmp8
+        %tmp11 = add i32 %tmp10, %tmp2
+        %tmp12 = add i32 %tmp11, %tmp3
+        %tmp13 = add i32 %tmp12, %tmp5
+
+        ret i32 %tmp13
+}
+
+declare i64 @llvm.ctlz.i64(i64, i1) nounwind readnone
+declare i32 @llvm.ctlz.i32(i32, i1) nounwind readnone
+declare i64 @llvm.cttz.i64(i64, i1) nounwind readnone
+declare i32 @llvm.cttz.i32(i32, i1) nounwind readnone
+declare i64 @llvm.ctpop.i64(i64) nounwind readnone
+declare i32 @llvm.ctpop.i32(i32) nounwind readnone
diff --git a/test/CodeGen/Hexagon/dualstore.ll b/test/CodeGen/Hexagon/dualstore.ll
index 9b27dda52c1d..f7d7e8bbe75d 100644
--- a/test/CodeGen/Hexagon/dualstore.ll
+++ b/test/CodeGen/Hexagon/dualstore.ll
@@ -1,8 +1,8 @@
-; RUN: llc -march=hexagon -mcpu=hexagonv4 < %s | FileCheck %s
+; RUN: llc -march=hexagon -mcpu=hexagonv4 -disable-hexagon-misched < %s | FileCheck %s
 ; Check that we generate dual stores in one packet in V4
 
-; CHECK: memw(r{{[0-9]+}} + #{{[0-9]+}}) = r{{[0-9]+}}
-; CHECK-NEXT: memw(r{{[0-9]+}} + #{{[0-9]+}}) = r{{[0-9]+}}
+; CHECK: memw(r{{[0-9]+}}{{ *}}+{{ *}}#{{[0-9]+}}){{ *}}={{ *}}##500000
+; CHECK-NEXT: memw(r{{[0-9]+}}{{ *}}+{{ *}}#{{[0-9]+}}){{ *}}={{ *}}##100000
 ; CHECK-NEXT: }
 
 @Reg = global i32 0, align 4
diff --git a/test/CodeGen/Hexagon/gp-plus-offset-load.ll b/test/CodeGen/Hexagon/gp-plus-offset-load.ll
new file mode 100644
index 000000000000..a1b80a65f82a
--- /dev/null
+++ b/test/CodeGen/Hexagon/gp-plus-offset-load.ll
@@ -0,0 +1,51 @@
+; RUN: llc -march=hexagon -mcpu=hexagonv4 < %s | FileCheck %s
+; Check that we generate load instructions with global + offset
+
+%struct.struc = type { i8, i8, i16, i32 }
+
+@foo = common global %struct.struc zeroinitializer, align 4
+
+define void @loadWord(i32 %val1, i32 %val2, i32* nocapture %ival) nounwind {
+; CHECK: r{{[0-9]+}}{{ *}}={{ *}}memw(##foo{{ *}}+{{ *}}4)
+entry:
+  %cmp = icmp sgt i32 %val1, %val2
+  br i1 %cmp, label %if.then, label %if.end
+
+if.then:                                          ; preds = %entry
+  %0 = load i32* getelementptr inbounds (%struct.struc* @foo, i32 0, i32 3), align 4
+  store i32 %0, i32* %ival, align 4
+  br label %if.end
+
+if.end:                                           ; preds = %if.then, %entry
+  ret void
+}
+
+define void @loadByte(i32 %val1, i32 %val2, i8* nocapture %ival) nounwind {
+; CHECK: r{{[0-9]+}}{{ *}}={{ *}}memb(##foo{{ *}}+{{ *}}1)
+entry:
+  %cmp = icmp sgt i32 %val1, %val2
+  br i1 %cmp, label %if.then, label %if.end
+
+if.then:                                          ; preds = %entry
+  %0 = load i8* getelementptr inbounds (%struct.struc* @foo, i32 0, i32 1), align 1
+  store i8 %0, i8* %ival, align 1
+  br label %if.end
+
+if.end:                                           ; preds = %if.then, %entry
+  ret void
+}
+
+define void @loadHWord(i32 %val1, i32 %val2, i16* %ival) nounwind {
+; CHECK: r{{[0-9]+}}{{ *}}={{ *}}memh(##foo{{ *}}+{{ *}}2)
+entry:
+  %cmp = icmp sgt i32 %val1, %val2
+  br i1 %cmp, label %if.then, label %if.end
+
+if.then:                                          ; preds = %entry
+  %0 = load i16* getelementptr inbounds (%struct.struc* @foo, i32 0, i32 2), align 2
+  store i16 %0, i16* %ival, align 2
+  br label %if.end
+
+if.end:                                           ; preds = %if.then, %entry
+  ret void
+}
diff --git a/test/CodeGen/Hexagon/gp-plus-offset-store.ll b/test/CodeGen/Hexagon/gp-plus-offset-store.ll
new file mode 100644
index 000000000000..c782b30920ea
--- /dev/null
+++ b/test/CodeGen/Hexagon/gp-plus-offset-store.ll
@@ -0,0 +1,35 @@
+; RUN: llc -march=hexagon -mcpu=hexagonv4 < %s | FileCheck %s
+; Check that we generate store instructions with global + offset
+
+%struct.struc = type { i8, i8, i16, i32 }
+
+@foo = common global %struct.struc zeroinitializer, align 4
+
+define void @storeByte(i32 %val1, i32 %val2, i8 zeroext %ival) nounwind {
+; CHECK: memb(##foo{{ *}}+{{ *}}1){{ *}}={{ *}}r{{[0-9]+}}
+entry:
+  %cmp = icmp sgt i32 %val1, %val2
+  br i1 %cmp, label %if.then, label %if.end
+
+if.then:                                          ; preds = %entry
+  store i8 %ival, i8* getelementptr inbounds (%struct.struc* @foo, i32 0, i32 1), align 1
+  br label %if.end
+
+if.end:                                           ; preds = %if.then, %entry
+  ret void
+}
+
+define void @storeHW(i32 %val1, i32 %val2, i16 signext %ival) nounwind {
+; CHECK: memh(##foo{{ *}}+{{ *}}2){{ *}}={{ *}}r{{[0-9]+}}
+entry:
+  %cmp = icmp sgt i32 %val1, %val2
+  br i1 %cmp, label %if.then, label %if.end
+
+if.then:                                          ; preds = %entry
+  store i16 %ival, i16* getelementptr inbounds (%struct.struc* @foo, i32 0, i32 2), align 2
+  br label %if.end
+
+if.end:                                           ; preds = %if.then, %entry
+  ret void
+}
+
diff --git a/test/CodeGen/Hexagon/gp-rel.ll b/test/CodeGen/Hexagon/gp-rel.ll
new file mode 100644
index 000000000000..561869e8ef35
--- /dev/null
+++ b/test/CodeGen/Hexagon/gp-rel.ll
@@ -0,0 +1,33 @@
+; RUN: llc -march=hexagon -mcpu=hexagonv4 < %s | FileCheck %s
+; Check that gp-relative instructions are being generated.
+
+@a = common global i32 0, align 4
+@b = common global i32 0, align 4
+@c = common global i32 0, align 4
+
+define i32 @foo(i32 %p) #0 {
+entry:
+; CHECK: r{{[0-9]+}}{{ *}}={{ *}}memw(#a)
+; CHECK: r{{[0-9]+}}{{ *}}={{ *}}memw(#b)
+; CHECK: if{{ *}}(p{{[0-3]}}) memw(##c){{ *}}={{ *}}r{{[0-9]+}}
+  %0 = load i32* @a, align 4
+  %1 = load i32* @b, align 4
+  %add = add nsw i32 %1, %0
+  %cmp = icmp eq i32 %0, %1
+  br i1 %cmp, label %if.then, label %entry.if.end_crit_edge
+
+entry.if.end_crit_edge:
+  %.pre = load i32* @c, align 4
+  br label %if.end
+
+if.then:
+  %add1 = add nsw i32 %add, %0
+  store i32 %add1, i32* @c, align 4
+  br label %if.end
+
+if.end:
+  %2 = phi i32 [ %.pre, %entry.if.end_crit_edge ], [ %add1, %if.then ]
+  %cmp2 = icmp eq i32 %add, %2
+  %sel1 = select i1 %cmp2, i32 %2, i32 %1
+  ret i32 %sel1
+}
diff --git a/test/CodeGen/Hexagon/hwloop-cleanup.ll b/test/CodeGen/Hexagon/hwloop-cleanup.ll
new file mode 100644
index 000000000000..6456ebff16d3
--- /dev/null
+++ b/test/CodeGen/Hexagon/hwloop-cleanup.ll
@@ -0,0 +1,86 @@
+; RUN: llc -march=hexagon -mcpu=hexagonv4 < %s | FileCheck %s
+; Check that we remove the compare and induction variable instructions
+; after generating hardware loops.
+; Bug 6685.
+
+; CHECK: loop0
+; CHECK-NOT: r{{[0-9]+}}{{.}}={{.}}add(r{{[0-9]+}},{{.}}#-1)
+; CHECK-NOT: cmp.eq
+; CHECK: endloop0
+
+define i32 @test1(i32* nocapture %b, i32 %n) nounwind readonly {
+entry:
+  %cmp1 = icmp sgt i32 %n, 0
+  br i1 %cmp1, label %for.body.preheader, label %for.end
+
+for.body.preheader:
+  br label %for.body
+
+for.body:                                         ; preds = %for.body.preheader, %for.body
+  %sum.03 = phi i32 [ %add, %for.body ], [ 0, %for.body.preheader ]
+  %arrayidx.phi = phi i32* [ %arrayidx.inc, %for.body ], [ %b, %for.body.preheader ]
+  %i.02 = phi i32 [ %inc, %for.body ], [ 0, %for.body.preheader ]
+  %0 = load i32* %arrayidx.phi, align 4
+  %add = add nsw i32 %0, %sum.03
+  %inc = add nsw i32 %i.02, 1
+  %exitcond = icmp eq i32 %inc, %n
+  %arrayidx.inc = getelementptr i32* %arrayidx.phi, i32 1
+  br i1 %exitcond, label %for.end.loopexit, label %for.body
+
+for.end.loopexit:
+  br label %for.end
+
+for.end:
+  %sum.0.lcssa = phi i32 [ 0, %entry ], [ %add, %for.end.loopexit ]
+  ret i32 %sum.0.lcssa
+}
+
+; This test checks that that initial loop count value is removed.
+; CHECK-NOT: ={{.}}#40
+; CHECK: loop0
+; CHECK-NOT: r{{[0-9]+}}{{.}}={{.}}add(r{{[0-9]+}},{{.}}#-1)
+; CHECK-NOT: cmp.eq
+; CHECK: endloop0
+
+define i32 @test2(i32* nocapture %b) nounwind readonly {
+entry:
+  br label %for.body
+
+for.body:
+  %sum.02 = phi i32 [ 0, %entry ], [ %add, %for.body ]
+  %arrayidx.phi = phi i32* [ %b, %entry ], [ %arrayidx.inc, %for.body ]
+  %i.01 = phi i32 [ 0, %entry ], [ %inc, %for.body ]
+  %0 = load i32* %arrayidx.phi, align 4
+  %add = add nsw i32 %0, %sum.02
+  %inc = add nsw i32 %i.01, 1
+  %exitcond = icmp eq i32 %inc, 40
+  %arrayidx.inc = getelementptr i32* %arrayidx.phi, i32 1
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:
+  ret i32 %add
+}
+
+; This test checks that we don't remove the induction variable since it's used.
+; CHECK: loop0
+; CHECK: r{{[0-9]+}}{{.}}={{.}}add(r{{[0-9]+}},{{.}}#1)
+; CHECK-NOT: cmp.eq
+; CHECK: endloop0
+define i32 @test3(i32* nocapture %b) nounwind {
+entry:
+  br label %for.body
+
+for.body:
+  %arrayidx.phi = phi i32* [ %b, %entry ], [ %arrayidx.inc, %for.body ]
+  %i.01 = phi i32 [ 0, %entry ], [ %inc, %for.body ]
+  store i32 %i.01, i32* %arrayidx.phi, align 4
+  %inc = add nsw i32 %i.01, 1
+  %exitcond = icmp eq i32 %inc, 40
+  %arrayidx.inc = getelementptr i32* %arrayidx.phi, i32 1
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:
+  ret i32 0
+}
+
+
diff --git a/test/CodeGen/Hexagon/hwloop-const.ll b/test/CodeGen/Hexagon/hwloop-const.ll
new file mode 100644
index 000000000000..a621c58c63ed
--- /dev/null
+++ b/test/CodeGen/Hexagon/hwloop-const.ll
@@ -0,0 +1,31 @@
+; RUN: llc -march=hexagon -mcpu=hexagonv4 -O2 < %s | FileCheck %s
+; ModuleID = 'hwloop-const.c'
+target datalayout = "e-p:32:32:32-i64:64:64-i32:32:32-i16:16:16-i1:32:32-f64:64:64-f32:32:32-v64:64:64-v32:32:32-a0:0-n16:32"
+target triple = "hexagon-unknown-linux-gnu"
+
+@b = common global [25000 x i32] zeroinitializer, align 8
+@a = common global [25000 x i32] zeroinitializer, align 8
+@c = common global [25000 x i32] zeroinitializer, align 8
+
+define i32 @hwloop_bug() nounwind {
+entry:
+  br label %for.body
+
+; CHECK: endloop
+for.body:                                         ; preds = %for.body, %entry
+  %i.02 = phi i32 [ 0, %entry ], [ %inc, %for.body ]
+  %arrayidx = getelementptr inbounds [25000 x i32]* @b, i32 0, i32 %i.02
+  store i32 %i.02, i32* %arrayidx, align 4, !tbaa !0
+  %arrayidx1 = getelementptr inbounds [25000 x i32]* @a, i32 0, i32 %i.02
+  store i32 %i.02, i32* %arrayidx1, align 4, !tbaa !0
+  %inc = add nsw i32 %i.02, 1
+  %exitcond = icmp eq i32 %inc, 25000
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:                                          ; preds = %for.body
+  ret i32 0
+}
+
+!0 = metadata !{metadata !"int", metadata !1}
+!1 = metadata !{metadata !"omnipotent char", metadata !2}
+!2 = metadata !{metadata !"Simple C/C++ TBAA"}
diff --git a/test/CodeGen/Hexagon/hwloop-dbg.ll b/test/CodeGen/Hexagon/hwloop-dbg.ll
new file mode 100644
index 000000000000..c2e8153b7dff
--- /dev/null
+++ b/test/CodeGen/Hexagon/hwloop-dbg.ll
@@ -0,0 +1,64 @@
+; RUN: llc < %s -march=hexagon -mcpu=hexagonv4 -O2 -disable-lsr | FileCheck %s
+; ModuleID = 'hwloop-dbg.o'
+target datalayout = "e-p:32:32:32-i64:64:64-i32:32:32-i16:16:16-i1:32:32-f64:64:64-f32:32:32-v64:64:64-v32:32:32-a0:0-n16:32"
+target triple = "hexagon"
+
+define void @foo(i32* nocapture %a, i32* nocapture %b) nounwind {
+entry:
+  tail call void @llvm.dbg.value(metadata !{i32* %a}, i64 0, metadata !13), !dbg !17
+  tail call void @llvm.dbg.value(metadata !{i32* %b}, i64 0, metadata !14), !dbg !18
+  tail call void @llvm.dbg.value(metadata !2, i64 0, metadata !15), !dbg !19
+  br label %for.body, !dbg !19
+
+for.body:                                         ; preds = %for.body, %entry
+; CHECK:     loop0(
+; CHECK-NOT: add({{r[0-9]*}}, #
+; CHECK:     endloop0
+  %arrayidx.phi = phi i32* [ %a, %entry ], [ %arrayidx.inc, %for.body ]
+  %i.02 = phi i32 [ 0, %entry ], [ %inc, %for.body ]
+  %b.addr.01 = phi i32* [ %b, %entry ], [ %incdec.ptr, %for.body ]
+  %incdec.ptr = getelementptr inbounds i32* %b.addr.01, i32 1, !dbg !21
+  tail call void @llvm.dbg.value(metadata !{i32* %incdec.ptr}, i64 0, metadata !14), !dbg !21
+  %0 = load i32* %b.addr.01, align 4, !dbg !21, !tbaa !23
+  store i32 %0, i32* %arrayidx.phi, align 4, !dbg !21, !tbaa !23
+  %inc = add nsw i32 %i.02, 1, !dbg !26
+  tail call void @llvm.dbg.value(metadata !{i32 %inc}, i64 0, metadata !15), !dbg !26
+  %exitcond = icmp eq i32 %inc, 10, !dbg !19
+  %arrayidx.inc = getelementptr i32* %arrayidx.phi, i32 1
+  br i1 %exitcond, label %for.end, label %for.body, !dbg !19
+
+for.end:                                          ; preds = %for.body
+  ret void, !dbg !27
+}
+
+declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
+
+
+!0 = metadata !{i32 786449, i32 0, i32 12, metadata !"hwloop-dbg.c", metadata !"/usr2/kparzysz/s.hex/t", metadata !"QuIC LLVM Hexagon Clang version 6.1-pre-unknown, (git://git-hexagon-aus.quicinc.com/llvm/clang-mainline.git e9382867661454cdf44addb39430741578e9765c) (llvm/llvm-mainline.git 36412bb1fcf03ed426d4437b41198bae066675ac)", i1 true, i1 true, metadata !"", i32 0, metadata !1, metadata !1, metadata !3, metadata !1} ; [ DW_TAG_compile_unit ] [/usr2/kparzysz/s.hex/t/hwloop-dbg.c] [DW_LANG_C99]
+!1 = metadata !{metadata !2}
+!2 = metadata !{i32 0}
+!3 = metadata !{metadata !4}
+!4 = metadata !{metadata !5}
+!5 = metadata !{i32 786478, i32 0, metadata !6, metadata !"foo", metadata !"foo", metadata !"", metadata !6, i32 1, metadata !7, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 true, void (i32*, i32*)* @foo, null, null, metadata !11, i32 1} ; [ DW_TAG_subprogram ] [line 1] [def] [foo]
+!6 = metadata !{i32 786473, metadata !"hwloop-dbg.c", metadata !"/usr2/kparzysz/s.hex/t", null} ; [ DW_TAG_file_type ]
+!7 = metadata !{i32 786453, i32 0, metadata !"", i32 0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !8, i32 0, i32 0} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!8 = metadata !{null, metadata !9, metadata !9}
+!9 = metadata !{i32 786447, null, metadata !"", null, i32 0, i64 32, i64 32, i64 0, i32 0, metadata !10} ; [ DW_TAG_pointer_type ] [line 0, size 32, align 32, offset 0] [from int]
+!10 = metadata !{i32 786468, null, metadata !"int", null, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ] [int] [line 0, size 32, align 32, offset 0, enc DW_ATE_signed]
+!11 = metadata !{metadata !12}
+!12 = metadata !{metadata !13, metadata !14, metadata !15}
+!13 = metadata !{i32 786689, metadata !5, metadata !"a", metadata !6, i32 16777217, metadata !9, i32 0, i32 0} ; [ DW_TAG_arg_variable ] [a] [line 1]
+!14 = metadata !{i32 786689, metadata !5, metadata !"b", metadata !6, i32 33554433, metadata !9, i32 0, i32 0} ; [ DW_TAG_arg_variable ] [b] [line 1]
+!15 = metadata !{i32 786688, metadata !16, metadata !"i", metadata !6, i32 2, metadata !10, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [i] [line 2]
+!16 = metadata !{i32 786443, metadata !5, i32 1, i32 26, metadata !6, i32 0} ; [ DW_TAG_lexical_block ] [/usr2/kparzysz/s.hex/t/hwloop-dbg.c]
+!17 = metadata !{i32 1, i32 15, metadata !5, null}
+!18 = metadata !{i32 1, i32 23, metadata !5, null}
+!19 = metadata !{i32 3, i32 8, metadata !20, null}
+!20 = metadata !{i32 786443, metadata !16, i32 3, i32 3, metadata !6, i32 1} ; [ DW_TAG_lexical_block ] [/usr2/kparzysz/s.hex/t/hwloop-dbg.c]
+!21 = metadata !{i32 4, i32 5, metadata !22, null}
+!22 = metadata !{i32 786443, metadata !20, i32 3, i32 28, metadata !6, i32 2} ; [ DW_TAG_lexical_block ] [/usr2/kparzysz/s.hex/t/hwloop-dbg.c]
+!23 = metadata !{metadata !"int", metadata !24}
+!24 = metadata !{metadata !"omnipotent char", metadata !25}
+!25 = metadata !{metadata !"Simple C/C++ TBAA"}
+!26 = metadata !{i32 3, i32 23, metadata !20, null}
+!27 = metadata !{i32 6, i32 1, metadata !16, null}
diff --git a/test/CodeGen/Hexagon/hwloop-le.ll b/test/CodeGen/Hexagon/hwloop-le.ll
new file mode 100644
index 000000000000..9c8cec7c2a1b
--- /dev/null
+++ b/test/CodeGen/Hexagon/hwloop-le.ll
@@ -0,0 +1,438 @@
+; RUN: llc -march=hexagon -mcpu=hexagonv4 -O3 < %s | FileCheck %s
+
+
+; CHECK: test_pos1_ir_sle
+; CHECK: loop0
+; a < b
+define void @test_pos1_ir_sle(i8* nocapture %p, i32 %a, i32 %b) nounwind {
+entry:
+  %cmp3 = icmp sle i32 28395, %b
+  br i1 %cmp3, label %for.body.lr.ph, label %for.end
+
+for.body.lr.ph:                                   ; preds = %entry
+  br label %for.body
+
+for.body:                                         ; preds = %for.body.lr.ph, %for.body
+  %i.04 = phi i32 [ 28395, %for.body.lr.ph ], [ %inc, %for.body ]
+  %arrayidx = getelementptr inbounds i8* %p, i32 %i.04
+  %0 = load i8* %arrayidx, align 1
+  %conv = zext i8 %0 to i32
+  %add = add nsw i32 %conv, 1
+  %conv1 = trunc i32 %add to i8
+  store i8 %conv1, i8* %arrayidx, align 1
+  %inc = add nsw i32 %i.04, 1
+  %cmp = icmp sle i32 %inc, %b
+  br i1 %cmp, label %for.body, label %for.end
+
+for.end:                                          ; preds = %for.body, %entry
+  ret void
+}
+
+
+
+; CHECK: test_pos2_ir_sle
+; CHECK: loop0
+; a < b
+define void @test_pos2_ir_sle(i8* nocapture %p, i32 %a, i32 %b) nounwind {
+entry:
+  %cmp3 = icmp sle i32 9073, %b
+  br i1 %cmp3, label %for.body.lr.ph, label %for.end
+
+for.body.lr.ph:                                   ; preds = %entry
+  br label %for.body
+
+for.body:                                         ; preds = %for.body.lr.ph, %for.body
+  %i.04 = phi i32 [ 9073, %for.body.lr.ph ], [ %inc, %for.body ]
+  %arrayidx = getelementptr inbounds i8* %p, i32 %i.04
+  %0 = load i8* %arrayidx, align 1
+  %conv = zext i8 %0 to i32
+  %add = add nsw i32 %conv, 1
+  %conv1 = trunc i32 %add to i8
+  store i8 %conv1, i8* %arrayidx, align 1
+  %inc = add nsw i32 %i.04, 2
+  %cmp = icmp sle i32 %inc, %b
+  br i1 %cmp, label %for.body, label %for.end
+
+for.end:                                          ; preds = %for.body, %entry
+  ret void
+}
+
+
+
+; CHECK: test_pos4_ir_sle
+; CHECK: loop0
+; a < b
+define void @test_pos4_ir_sle(i8* nocapture %p, i32 %a, i32 %b) nounwind {
+entry:
+  %cmp3 = icmp sle i32 21956, %b
+  br i1 %cmp3, label %for.body.lr.ph, label %for.end
+
+for.body.lr.ph:                                   ; preds = %entry
+  br label %for.body
+
+for.body:                                         ; preds = %for.body.lr.ph, %for.body
+  %i.04 = phi i32 [ 21956, %for.body.lr.ph ], [ %inc, %for.body ]
+  %arrayidx = getelementptr inbounds i8* %p, i32 %i.04
+  %0 = load i8* %arrayidx, align 1
+  %conv = zext i8 %0 to i32
+  %add = add nsw i32 %conv, 1
+  %conv1 = trunc i32 %add to i8
+  store i8 %conv1, i8* %arrayidx, align 1
+  %inc = add nsw i32 %i.04, 4
+  %cmp = icmp sle i32 %inc, %b
+  br i1 %cmp, label %for.body, label %for.end
+
+for.end:                                          ; preds = %for.body, %entry
+  ret void
+}
+
+
+
+; CHECK: test_pos8_ir_sle
+; CHECK: loop0
+; a < b
+define void @test_pos8_ir_sle(i8* nocapture %p, i32 %a, i32 %b) nounwind {
+entry:
+  %cmp3 = icmp sle i32 16782, %b
+  br i1 %cmp3, label %for.body.lr.ph, label %for.end
+
+for.body.lr.ph:                                   ; preds = %entry
+  br label %for.body
+
+for.body:                                         ; preds = %for.body.lr.ph, %for.body
+  %i.04 = phi i32 [ 16782, %for.body.lr.ph ], [ %inc, %for.body ]
+  %arrayidx = getelementptr inbounds i8* %p, i32 %i.04
+  %0 = load i8* %arrayidx, align 1
+  %conv = zext i8 %0 to i32
+  %add = add nsw i32 %conv, 1
+  %conv1 = trunc i32 %add to i8
+  store i8 %conv1, i8* %arrayidx, align 1
+  %inc = add nsw i32 %i.04, 8
+  %cmp = icmp sle i32 %inc, %b
+  br i1 %cmp, label %for.body, label %for.end
+
+for.end:                                          ; preds = %for.body, %entry
+  ret void
+}
+
+
+
+; CHECK: test_pos16_ir_sle
+; CHECK: loop0
+; a < b
+define void @test_pos16_ir_sle(i8* nocapture %p, i32 %a, i32 %b) nounwind {
+entry:
+  %cmp3 = icmp sle i32 19097, %b
+  br i1 %cmp3, label %for.body.lr.ph, label %for.end
+
+for.body.lr.ph:                                   ; preds = %entry
+  br label %for.body
+
+for.body:                                         ; preds = %for.body.lr.ph, %for.body
+  %i.04 = phi i32 [ 19097, %for.body.lr.ph ], [ %inc, %for.body ]
+  %arrayidx = getelementptr inbounds i8* %p, i32 %i.04
+  %0 = load i8* %arrayidx, align 1
+  %conv = zext i8 %0 to i32
+  %add = add nsw i32 %conv, 1
+  %conv1 = trunc i32 %add to i8
+  store i8 %conv1, i8* %arrayidx, align 1
+  %inc = add nsw i32 %i.04, 16
+  %cmp = icmp sle i32 %inc, %b
+  br i1 %cmp, label %for.body, label %for.end
+
+for.end:                                          ; preds = %for.body, %entry
+  ret void
+}
+
+
+
+; CHECK: test_pos1_ri_sle
+; CHECK: loop0
+; a < b
+define void @test_pos1_ri_sle(i8* nocapture %p, i32 %a, i32 %b) nounwind {
+entry:
+  %cmp3 = icmp sle i32 %a, 14040
+  br i1 %cmp3, label %for.body.lr.ph, label %for.end
+
+for.body.lr.ph:                                   ; preds = %entry
+  br label %for.body
+
+for.body:                                         ; preds = %for.body.lr.ph, %for.body
+  %i.04 = phi i32 [ %a, %for.body.lr.ph ], [ %inc, %for.body ]
+  %arrayidx = getelementptr inbounds i8* %p, i32 %i.04
+  %0 = load i8* %arrayidx, align 1
+  %conv = zext i8 %0 to i32
+  %add = add nsw i32 %conv, 1
+  %conv1 = trunc i32 %add to i8
+  store i8 %conv1, i8* %arrayidx, align 1
+  %inc = add nsw i32 %i.04, 1
+  %cmp = icmp sle i32 %inc, 14040
+  br i1 %cmp, label %for.body, label %for.end
+
+for.end:                                          ; preds = %for.body, %entry
+  ret void
+}
+
+
+
+; CHECK: test_pos2_ri_sle
+; CHECK: loop0
+; a < b
+define void @test_pos2_ri_sle(i8* nocapture %p, i32 %a, i32 %b) nounwind {
+entry:
+  %cmp3 = icmp sle i32 %a, 13710
+  br i1 %cmp3, label %for.body.lr.ph, label %for.end
+
+for.body.lr.ph:                                   ; preds = %entry
+  br label %for.body
+
+for.body:                                         ; preds = %for.body.lr.ph, %for.body
+  %i.04 = phi i32 [ %a, %for.body.lr.ph ], [ %inc, %for.body ]
+  %arrayidx = getelementptr inbounds i8* %p, i32 %i.04
+  %0 = load i8* %arrayidx, align 1
+  %conv = zext i8 %0 to i32
+  %add = add nsw i32 %conv, 1
+  %conv1 = trunc i32 %add to i8
+  store i8 %conv1, i8* %arrayidx, align 1
+  %inc = add nsw i32 %i.04, 2
+  %cmp = icmp sle i32 %inc, 13710
+  br i1 %cmp, label %for.body, label %for.end
+
+for.end:                                          ; preds = %for.body, %entry
+  ret void
+}
+
+
+
+; CHECK: test_pos4_ri_sle
+; CHECK: loop0
+; a < b
+define void @test_pos4_ri_sle(i8* nocapture %p, i32 %a, i32 %b) nounwind {
+entry:
+  %cmp3 = icmp sle i32 %a, 9920
+  br i1 %cmp3, label %for.body.lr.ph, label %for.end
+
+for.body.lr.ph:                                   ; preds = %entry
+  br label %for.body
+
+for.body:                                         ; preds = %for.body.lr.ph, %for.body
+  %i.04 = phi i32 [ %a, %for.body.lr.ph ], [ %inc, %for.body ]
+  %arrayidx = getelementptr inbounds i8* %p, i32 %i.04
+  %0 = load i8* %arrayidx, align 1
+  %conv = zext i8 %0 to i32
+  %add = add nsw i32 %conv, 1
+  %conv1 = trunc i32 %add to i8
+  store i8 %conv1, i8* %arrayidx, align 1
+  %inc = add nsw i32 %i.04, 4
+  %cmp = icmp sle i32 %inc, 9920
+  br i1 %cmp, label %for.body, label %for.end
+
+for.end:                                          ; preds = %for.body, %entry
+  ret void
+}
+
+
+
+; CHECK: test_pos8_ri_sle
+; CHECK: loop0
+; a < b
+define void @test_pos8_ri_sle(i8* nocapture %p, i32 %a, i32 %b) nounwind {
+entry:
+  %cmp3 = icmp sle i32 %a, 18924
+  br i1 %cmp3, label %for.body.lr.ph, label %for.end
+
+for.body.lr.ph:                                   ; preds = %entry
+  br label %for.body
+
+for.body:                                         ; preds = %for.body.lr.ph, %for.body
+  %i.04 = phi i32 [ %a, %for.body.lr.ph ], [ %inc, %for.body ]
+  %arrayidx = getelementptr inbounds i8* %p, i32 %i.04
+  %0 = load i8* %arrayidx, align 1
+  %conv = zext i8 %0 to i32
+  %add = add nsw i32 %conv, 1
+  %conv1 = trunc i32 %add to i8
+  store i8 %conv1, i8* %arrayidx, align 1
+  %inc = add nsw i32 %i.04, 8
+  %cmp = icmp sle i32 %inc, 18924
+  br i1 %cmp, label %for.body, label %for.end
+
+for.end:                                          ; preds = %for.body, %entry
+  ret void
+}
+
+
+
+; CHECK: test_pos16_ri_sle
+; CHECK: loop0
+; a < b
+define void @test_pos16_ri_sle(i8* nocapture %p, i32 %a, i32 %b) nounwind {
+entry:
+  %cmp3 = icmp sle i32 %a, 11812
+  br i1 %cmp3, label %for.body.lr.ph, label %for.end
+
+for.body.lr.ph:                                   ; preds = %entry
+  br label %for.body
+
+for.body:                                         ; preds = %for.body.lr.ph, %for.body
+  %i.04 = phi i32 [ %a, %for.body.lr.ph ], [ %inc, %for.body ]
+  %arrayidx = getelementptr inbounds i8* %p, i32 %i.04
+  %0 = load i8* %arrayidx, align 1
+  %conv = zext i8 %0 to i32
+  %add = add nsw i32 %conv, 1
+  %conv1 = trunc i32 %add to i8
+  store i8 %conv1, i8* %arrayidx, align 1
+  %inc = add nsw i32 %i.04, 16
+  %cmp = icmp sle i32 %inc, 11812
+  br i1 %cmp, label %for.body, label %for.end
+
+for.end:                                          ; preds = %for.body, %entry
+  ret void
+}
+
+
+
+; CHECK: test_pos1_rr_sle
+; CHECK: loop0
+; a < b
+define void @test_pos1_rr_sle(i8* nocapture %p, i32 %a, i32 %b) nounwind {
+entry:
+  %cmp3 = icmp sle i32 %a, %b
+  br i1 %cmp3, label %for.body.lr.ph, label %for.end
+
+for.body.lr.ph:                                   ; preds = %entry
+  br label %for.body
+
+for.body:                                         ; preds = %for.body.lr.ph, %for.body
+  %i.04 = phi i32 [ %a, %for.body.lr.ph ], [ %inc, %for.body ]
+  %arrayidx = getelementptr inbounds i8* %p, i32 %i.04
+  %0 = load i8* %arrayidx, align 1
+  %conv = zext i8 %0 to i32
+  %add = add nsw i32 %conv, 1
+  %conv1 = trunc i32 %add to i8
+  store i8 %conv1, i8* %arrayidx, align 1
+  %inc = add nsw i32 %i.04, 1
+  %cmp = icmp sle i32 %inc, %b
+  br i1 %cmp, label %for.body, label %for.end
+
+for.end:                                          ; preds = %for.body, %entry
+  ret void
+}
+
+
+
+; CHECK: test_pos2_rr_sle
+; CHECK: loop0
+; a < b
+define void @test_pos2_rr_sle(i8* nocapture %p, i32 %a, i32 %b) nounwind {
+entry:
+  %cmp3 = icmp sle i32 %a, %b
+  br i1 %cmp3, label %for.body.lr.ph, label %for.end
+
+for.body.lr.ph:                                   ; preds = %entry
+  br label %for.body
+
+for.body:                                         ; preds = %for.body.lr.ph, %for.body
+  %i.04 = phi i32 [ %a, %for.body.lr.ph ], [ %inc, %for.body ]
+  %arrayidx = getelementptr inbounds i8* %p, i32 %i.04
+  %0 = load i8* %arrayidx, align 1
+  %conv = zext i8 %0 to i32
+  %add = add nsw i32 %conv, 1
+  %conv1 = trunc i32 %add to i8
+  store i8 %conv1, i8* %arrayidx, align 1
+  %inc = add nsw i32 %i.04, 2
+  %cmp = icmp sle i32 %inc, %b
+  br i1 %cmp, label %for.body, label %for.end
+
+for.end:                                          ; preds = %for.body, %entry
+  ret void
+}
+
+
+
+; CHECK: test_pos4_rr_sle
+; CHECK: loop0
+; a < b
+define void @test_pos4_rr_sle(i8* nocapture %p, i32 %a, i32 %b) nounwind {
+entry:
+  %cmp3 = icmp sle i32 %a, %b
+  br i1 %cmp3, label %for.body.lr.ph, label %for.end
+
+for.body.lr.ph:                                   ; preds = %entry
+  br label %for.body
+
+for.body:                                         ; preds = %for.body.lr.ph, %for.body
+  %i.04 = phi i32 [ %a, %for.body.lr.ph ], [ %inc, %for.body ]
+  %arrayidx = getelementptr inbounds i8* %p, i32 %i.04
+  %0 = load i8* %arrayidx, align 1
+  %conv = zext i8 %0 to i32
+  %add = add nsw i32 %conv, 1
+  %conv1 = trunc i32 %add to i8
+  store i8 %conv1, i8* %arrayidx, align 1
+  %inc = add nsw i32 %i.04, 4
+  %cmp = icmp sle i32 %inc, %b
+  br i1 %cmp, label %for.body, label %for.end
+
+for.end:                                          ; preds = %for.body, %entry
+  ret void
+}
+
+
+
+; CHECK: test_pos8_rr_sle
+; CHECK: loop0
+; a < b
+define void @test_pos8_rr_sle(i8* nocapture %p, i32 %a, i32 %b) nounwind {
+entry:
+  %cmp3 = icmp sle i32 %a, %b
+  br i1 %cmp3, label %for.body.lr.ph, label %for.end
+
+for.body.lr.ph:                                   ; preds = %entry
+  br label %for.body
+
+for.body:                                         ; preds = %for.body.lr.ph, %for.body
+  %i.04 = phi i32 [ %a, %for.body.lr.ph ], [ %inc, %for.body ]
+  %arrayidx = getelementptr inbounds i8* %p, i32 %i.04
+  %0 = load i8* %arrayidx, align 1
+  %conv = zext i8 %0 to i32
+  %add = add nsw i32 %conv, 1
+  %conv1 = trunc i32 %add to i8
+  store i8 %conv1, i8* %arrayidx, align 1
+  %inc = add nsw i32 %i.04, 8
+  %cmp = icmp sle i32 %inc, %b
+  br i1 %cmp, label %for.body, label %for.end
+
+for.end:                                          ; preds = %for.body, %entry
+  ret void
+}
+
+
+
+; CHECK: test_pos16_rr_sle
+; CHECK: loop0
+; a < b
+define void @test_pos16_rr_sle(i8* nocapture %p, i32 %a, i32 %b) nounwind {
+entry:
+  %cmp3 = icmp sle i32 %a, %b
+  br i1 %cmp3, label %for.body.lr.ph, label %for.end
+
+for.body.lr.ph:                                   ; preds = %entry
+  br label %for.body
+
+for.body:                                         ; preds = %for.body.lr.ph, %for.body
+  %i.04 = phi i32 [ %a, %for.body.lr.ph ], [ %inc, %for.body ]
+  %arrayidx = getelementptr inbounds i8* %p, i32 %i.04
+  %0 = load i8* %arrayidx, align 1
+  %conv = zext i8 %0 to i32
+  %add = add nsw i32 %conv, 1
+  %conv1 = trunc i32 %add to i8
+  store i8 %conv1, i8* %arrayidx, align 1
+  %inc = add nsw i32 %i.04, 16
+  %cmp = icmp sle i32 %inc, %b
+  br i1 %cmp, label %for.body, label %for.end
+
+for.end:                                          ; preds = %for.body, %entry
+  ret void
+}
+
+
+
diff --git a/test/CodeGen/Hexagon/hwloop-lt.ll b/test/CodeGen/Hexagon/hwloop-lt.ll
new file mode 100644
index 000000000000..7e43733da2a6
--- /dev/null
+++ b/test/CodeGen/Hexagon/hwloop-lt.ll
@@ -0,0 +1,438 @@
+; RUN: llc -march=hexagon -mcpu=hexagonv4 -O3 < %s | FileCheck %s
+
+
+; CHECK: test_pos1_ir_slt
+; CHECK: loop0
+; a < b
+define void @test_pos1_ir_slt(i8* nocapture %p, i32 %a, i32 %b) nounwind {
+entry:
+  %cmp3 = icmp slt i32 8531, %b
+  br i1 %cmp3, label %for.body.lr.ph, label %for.end
+
+for.body.lr.ph:                                   ; preds = %entry
+  br label %for.body
+
+for.body:                                         ; preds = %for.body.lr.ph, %for.body
+  %i.04 = phi i32 [ 8531, %for.body.lr.ph ], [ %inc, %for.body ]
+  %arrayidx = getelementptr inbounds i8* %p, i32 %i.04
+  %0 = load i8* %arrayidx, align 1
+  %conv = zext i8 %0 to i32
+  %add = add nsw i32 %conv, 1
+  %conv1 = trunc i32 %add to i8
+  store i8 %conv1, i8* %arrayidx, align 1
+  %inc = add nsw i32 %i.04, 1
+  %cmp = icmp slt i32 %inc, %b
+  br i1 %cmp, label %for.body, label %for.end
+
+for.end:                                          ; preds = %for.body, %entry
+  ret void
+}
+
+
+
+; CHECK: test_pos2_ir_slt
+; CHECK: loop0
+; a < b
+define void @test_pos2_ir_slt(i8* nocapture %p, i32 %a, i32 %b) nounwind {
+entry:
+  %cmp3 = icmp slt i32 9152, %b
+  br i1 %cmp3, label %for.body.lr.ph, label %for.end
+
+for.body.lr.ph:                                   ; preds = %entry
+  br label %for.body
+
+for.body:                                         ; preds = %for.body.lr.ph, %for.body
+  %i.04 = phi i32 [ 9152, %for.body.lr.ph ], [ %inc, %for.body ]
+  %arrayidx = getelementptr inbounds i8* %p, i32 %i.04
+  %0 = load i8* %arrayidx, align 1
+  %conv = zext i8 %0 to i32
+  %add = add nsw i32 %conv, 1
+  %conv1 = trunc i32 %add to i8
+  store i8 %conv1, i8* %arrayidx, align 1
+  %inc = add nsw i32 %i.04, 2
+  %cmp = icmp slt i32 %inc, %b
+  br i1 %cmp, label %for.body, label %for.end
+
+for.end:                                          ; preds = %for.body, %entry
+  ret void
+}
+
+
+
+; CHECK: test_pos4_ir_slt
+; CHECK: loop0
+; a < b
+define void @test_pos4_ir_slt(i8* nocapture %p, i32 %a, i32 %b) nounwind {
+entry:
+  %cmp3 = icmp slt i32 18851, %b
+  br i1 %cmp3, label %for.body.lr.ph, label %for.end
+
+for.body.lr.ph:                                   ; preds = %entry
+  br label %for.body
+
+for.body:                                         ; preds = %for.body.lr.ph, %for.body
+  %i.04 = phi i32 [ 18851, %for.body.lr.ph ], [ %inc, %for.body ]
+  %arrayidx = getelementptr inbounds i8* %p, i32 %i.04
+  %0 = load i8* %arrayidx, align 1
+  %conv = zext i8 %0 to i32
+  %add = add nsw i32 %conv, 1
+  %conv1 = trunc i32 %add to i8
+  store i8 %conv1, i8* %arrayidx, align 1
+  %inc = add nsw i32 %i.04, 4
+  %cmp = icmp slt i32 %inc, %b
+  br i1 %cmp, label %for.body, label %for.end
+
+for.end:                                          ; preds = %for.body, %entry
+  ret void
+}
+
+
+
+; CHECK: test_pos8_ir_slt
+; CHECK: loop0
+; a < b
+define void @test_pos8_ir_slt(i8* nocapture %p, i32 %a, i32 %b) nounwind {
+entry:
+  %cmp3 = icmp slt i32 25466, %b
+  br i1 %cmp3, label %for.body.lr.ph, label %for.end
+
+for.body.lr.ph:                                   ; preds = %entry
+  br label %for.body
+
+for.body:                                         ; preds = %for.body.lr.ph, %for.body
+  %i.04 = phi i32 [ 25466, %for.body.lr.ph ], [ %inc, %for.body ]
+  %arrayidx = getelementptr inbounds i8* %p, i32 %i.04
+  %0 = load i8* %arrayidx, align 1
+  %conv = zext i8 %0 to i32
+  %add = add nsw i32 %conv, 1
+  %conv1 = trunc i32 %add to i8
+  store i8 %conv1, i8* %arrayidx, align 1
+  %inc = add nsw i32 %i.04, 8
+  %cmp = icmp slt i32 %inc, %b
+  br i1 %cmp, label %for.body, label %for.end
+
+for.end:                                          ; preds = %for.body, %entry
+  ret void
+}
+
+
+
+; CHECK: test_pos16_ir_slt
+; CHECK: loop0
+; a < b
+define void @test_pos16_ir_slt(i8* nocapture %p, i32 %a, i32 %b) nounwind {
+entry:
+  %cmp3 = icmp slt i32 9295, %b
+  br i1 %cmp3, label %for.body.lr.ph, label %for.end
+
+for.body.lr.ph:                                   ; preds = %entry
+  br label %for.body
+
+for.body:                                         ; preds = %for.body.lr.ph, %for.body
+  %i.04 = phi i32 [ 9295, %for.body.lr.ph ], [ %inc, %for.body ]
+  %arrayidx = getelementptr inbounds i8* %p, i32 %i.04
+  %0 = load i8* %arrayidx, align 1
+  %conv = zext i8 %0 to i32
+  %add = add nsw i32 %conv, 1
+  %conv1 = trunc i32 %add to i8
+  store i8 %conv1, i8* %arrayidx, align 1
+  %inc = add nsw i32 %i.04, 16
+  %cmp = icmp slt i32 %inc, %b
+  br i1 %cmp, label %for.body, label %for.end
+
+for.end:                                          ; preds = %for.body, %entry
+  ret void
+}
+
+
+
+; CHECK: test_pos1_ri_slt
+; CHECK: loop0
+; a < b
+define void @test_pos1_ri_slt(i8* nocapture %p, i32 %a, i32 %b) nounwind {
+entry:
+  %cmp3 = icmp slt i32 %a, 31236
+  br i1 %cmp3, label %for.body.lr.ph, label %for.end
+
+for.body.lr.ph:                                   ; preds = %entry
+  br label %for.body
+
+for.body:                                         ; preds = %for.body.lr.ph, %for.body
+  %i.04 = phi i32 [ %a, %for.body.lr.ph ], [ %inc, %for.body ]
+  %arrayidx = getelementptr inbounds i8* %p, i32 %i.04
+  %0 = load i8* %arrayidx, align 1
+  %conv = zext i8 %0 to i32
+  %add = add nsw i32 %conv, 1
+  %conv1 = trunc i32 %add to i8
+  store i8 %conv1, i8* %arrayidx, align 1
+  %inc = add nsw i32 %i.04, 1
+  %cmp = icmp slt i32 %inc, 31236
+  br i1 %cmp, label %for.body, label %for.end
+
+for.end:                                          ; preds = %for.body, %entry
+  ret void
+}
+
+
+
+; CHECK: test_pos2_ri_slt
+; CHECK: loop0
+; a < b
+define void @test_pos2_ri_slt(i8* nocapture %p, i32 %a, i32 %b) nounwind {
+entry:
+  %cmp3 = icmp slt i32 %a, 22653
+  br i1 %cmp3, label %for.body.lr.ph, label %for.end
+
+for.body.lr.ph:                                   ; preds = %entry
+  br label %for.body
+
+for.body:                                         ; preds = %for.body.lr.ph, %for.body
+  %i.04 = phi i32 [ %a, %for.body.lr.ph ], [ %inc, %for.body ]
+  %arrayidx = getelementptr inbounds i8* %p, i32 %i.04
+  %0 = load i8* %arrayidx, align 1
+  %conv = zext i8 %0 to i32
+  %add = add nsw i32 %conv, 1
+  %conv1 = trunc i32 %add to i8
+  store i8 %conv1, i8* %arrayidx, align 1
+  %inc = add nsw i32 %i.04, 2
+  %cmp = icmp slt i32 %inc, 22653
+  br i1 %cmp, label %for.body, label %for.end
+
+for.end:                                          ; preds = %for.body, %entry
+  ret void
+}
+
+
+
+; CHECK: test_pos4_ri_slt
+; CHECK: loop0
+; a < b
+define void @test_pos4_ri_slt(i8* nocapture %p, i32 %a, i32 %b) nounwind {
+entry:
+  %cmp3 = icmp slt i32 %a, 1431
+  br i1 %cmp3, label %for.body.lr.ph, label %for.end
+
+for.body.lr.ph:                                   ; preds = %entry
+  br label %for.body
+
+for.body:                                         ; preds = %for.body.lr.ph, %for.body
+  %i.04 = phi i32 [ %a, %for.body.lr.ph ], [ %inc, %for.body ]
+  %arrayidx = getelementptr inbounds i8* %p, i32 %i.04
+  %0 = load i8* %arrayidx, align 1
+  %conv = zext i8 %0 to i32
+  %add = add nsw i32 %conv, 1
+  %conv1 = trunc i32 %add to i8
+  store i8 %conv1, i8* %arrayidx, align 1
+  %inc = add nsw i32 %i.04, 4
+  %cmp = icmp slt i32 %inc, 1431
+  br i1 %cmp, label %for.body, label %for.end
+
+for.end:                                          ; preds = %for.body, %entry
+  ret void
+}
+
+
+
+; CHECK: test_pos8_ri_slt
+; CHECK: loop0
+; a < b
+define void @test_pos8_ri_slt(i8* nocapture %p, i32 %a, i32 %b) nounwind {
+entry:
+  %cmp3 = icmp slt i32 %a, 22403
+  br i1 %cmp3, label %for.body.lr.ph, label %for.end
+
+for.body.lr.ph:                                   ; preds = %entry
+  br label %for.body
+
+for.body:                                         ; preds = %for.body.lr.ph, %for.body
+  %i.04 = phi i32 [ %a, %for.body.lr.ph ], [ %inc, %for.body ]
+  %arrayidx = getelementptr inbounds i8* %p, i32 %i.04
+  %0 = load i8* %arrayidx, align 1
+  %conv = zext i8 %0 to i32
+  %add = add nsw i32 %conv, 1
+  %conv1 = trunc i32 %add to i8
+  store i8 %conv1, i8* %arrayidx, align 1
+  %inc = add nsw i32 %i.04, 8
+  %cmp = icmp slt i32 %inc, 22403
+  br i1 %cmp, label %for.body, label %for.end
+
+for.end:                                          ; preds = %for.body, %entry
+  ret void
+}
+
+
+
+; CHECK: test_pos16_ri_slt
+; CHECK: loop0
+; a < b
+define void @test_pos16_ri_slt(i8* nocapture %p, i32 %a, i32 %b) nounwind {
+entry:
+  %cmp3 = icmp slt i32 %a, 21715
+  br i1 %cmp3, label %for.body.lr.ph, label %for.end
+
+for.body.lr.ph:                                   ; preds = %entry
+  br label %for.body
+
+for.body:                                         ; preds = %for.body.lr.ph, %for.body
+  %i.04 = phi i32 [ %a, %for.body.lr.ph ], [ %inc, %for.body ]
+  %arrayidx = getelementptr inbounds i8* %p, i32 %i.04
+  %0 = load i8* %arrayidx, align 1
+  %conv = zext i8 %0 to i32
+  %add = add nsw i32 %conv, 1
+  %conv1 = trunc i32 %add to i8
+  store i8 %conv1, i8* %arrayidx, align 1
+  %inc = add nsw i32 %i.04, 16
+  %cmp = icmp slt i32 %inc, 21715
+  br i1 %cmp, label %for.body, label %for.end
+
+for.end:                                          ; preds = %for.body, %entry
+  ret void
+}
+
+
+
+; CHECK: test_pos1_rr_slt
+; CHECK: loop0
+; a < b
+define void @test_pos1_rr_slt(i8* nocapture %p, i32 %a, i32 %b) nounwind {
+entry:
+  %cmp3 = icmp slt i32 %a, %b
+  br i1 %cmp3, label %for.body.lr.ph, label %for.end
+
+for.body.lr.ph:                                   ; preds = %entry
+  br label %for.body
+
+for.body:                                         ; preds = %for.body.lr.ph, %for.body
+  %i.04 = phi i32 [ %a, %for.body.lr.ph ], [ %inc, %for.body ]
+  %arrayidx = getelementptr inbounds i8* %p, i32 %i.04
+  %0 = load i8* %arrayidx, align 1
+  %conv = zext i8 %0 to i32
+  %add = add nsw i32 %conv, 1
+  %conv1 = trunc i32 %add to i8
+  store i8 %conv1, i8* %arrayidx, align 1
+  %inc = add nsw i32 %i.04, 1
+  %cmp = icmp slt i32 %inc, %b
+  br i1 %cmp, label %for.body, label %for.end
+
+for.end:                                          ; preds = %for.body, %entry
+  ret void
+}
+
+
+
+; CHECK: test_pos2_rr_slt
+; CHECK: loop0
+; a < b
+define void @test_pos2_rr_slt(i8* nocapture %p, i32 %a, i32 %b) nounwind {
+entry:
+  %cmp3 = icmp slt i32 %a, %b
+  br i1 %cmp3, label %for.body.lr.ph, label %for.end
+
+for.body.lr.ph:                                   ; preds = %entry
+  br label %for.body
+
+for.body:                                         ; preds = %for.body.lr.ph, %for.body
+  %i.04 = phi i32 [ %a, %for.body.lr.ph ], [ %inc, %for.body ]
+  %arrayidx = getelementptr inbounds i8* %p, i32 %i.04
+  %0 = load i8* %arrayidx, align 1
+  %conv = zext i8 %0 to i32
+  %add = add nsw i32 %conv, 1
+  %conv1 = trunc i32 %add to i8
+  store i8 %conv1, i8* %arrayidx, align 1
+  %inc = add nsw i32 %i.04, 2
+  %cmp = icmp slt i32 %inc, %b
+  br i1 %cmp, label %for.body, label %for.end
+
+for.end:                                          ; preds = %for.body, %entry
+  ret void
+}
+
+
+
+; CHECK: test_pos4_rr_slt
+; CHECK: loop0
+; a < b
+define void @test_pos4_rr_slt(i8* nocapture %p, i32 %a, i32 %b) nounwind {
+entry:
+  %cmp3 = icmp slt i32 %a, %b
+  br i1 %cmp3, label %for.body.lr.ph, label %for.end
+
+for.body.lr.ph:                                   ; preds = %entry
+  br label %for.body
+
+for.body:                                         ; preds = %for.body.lr.ph, %for.body
+  %i.04 = phi i32 [ %a, %for.body.lr.ph ], [ %inc, %for.body ]
+  %arrayidx = getelementptr inbounds i8* %p, i32 %i.04
+  %0 = load i8* %arrayidx, align 1
+  %conv = zext i8 %0 to i32
+  %add = add nsw i32 %conv, 1
+  %conv1 = trunc i32 %add to i8
+  store i8 %conv1, i8* %arrayidx, align 1
+  %inc = add nsw i32 %i.04, 4
+  %cmp = icmp slt i32 %inc, %b
+  br i1 %cmp, label %for.body, label %for.end
+
+for.end:                                          ; preds = %for.body, %entry
+  ret void
+}
+
+
+
+; CHECK: test_pos8_rr_slt
+; CHECK: loop0
+; a < b
+define void @test_pos8_rr_slt(i8* nocapture %p, i32 %a, i32 %b) nounwind {
+entry:
+  %cmp3 = icmp slt i32 %a, %b
+  br i1 %cmp3, label %for.body.lr.ph, label %for.end
+
+for.body.lr.ph:                                   ; preds = %entry
+  br label %for.body
+
+for.body:                                         ; preds = %for.body.lr.ph, %for.body
+  %i.04 = phi i32 [ %a, %for.body.lr.ph ], [ %inc, %for.body ]
+  %arrayidx = getelementptr inbounds i8* %p, i32 %i.04
+  %0 = load i8* %arrayidx, align 1
+  %conv = zext i8 %0 to i32
+  %add = add nsw i32 %conv, 1
+  %conv1 = trunc i32 %add to i8
+  store i8 %conv1, i8* %arrayidx, align 1
+  %inc = add nsw i32 %i.04, 8
+  %cmp = icmp slt i32 %inc, %b
+  br i1 %cmp, label %for.body, label %for.end
+
+for.end:                                          ; preds = %for.body, %entry
+  ret void
+}
+
+
+
+; CHECK: test_pos16_rr_slt
+; CHECK: loop0
+; a < b
+define void @test_pos16_rr_slt(i8* nocapture %p, i32 %a, i32 %b) nounwind {
+entry:
+  %cmp3 = icmp slt i32 %a, %b
+  br i1 %cmp3, label %for.body.lr.ph, label %for.end
+
+for.body.lr.ph:                                   ; preds = %entry
+  br label %for.body
+
+for.body:                                         ; preds = %for.body.lr.ph, %for.body
+  %i.04 = phi i32 [ %a, %for.body.lr.ph ], [ %inc, %for.body ]
+  %arrayidx = getelementptr inbounds i8* %p, i32 %i.04
+  %0 = load i8* %arrayidx, align 1
+  %conv = zext i8 %0 to i32
+  %add = add nsw i32 %conv, 1
+  %conv1 = trunc i32 %add to i8
+  store i8 %conv1, i8* %arrayidx, align 1
+  %inc = add nsw i32 %i.04, 16
+  %cmp = icmp slt i32 %inc, %b
+  br i1 %cmp, label %for.body, label %for.end
+
+for.end:                                          ; preds = %for.body, %entry
+  ret void
+}
+
+
+
diff --git a/test/CodeGen/Hexagon/hwloop-lt1.ll b/test/CodeGen/Hexagon/hwloop-lt1.ll
new file mode 100644
index 000000000000..cf5874011ee0
--- /dev/null
+++ b/test/CodeGen/Hexagon/hwloop-lt1.ll
@@ -0,0 +1,32 @@
+; RUN: llc -march=hexagon -mcpu=hexagonv4 < %s | FileCheck %s
+; Check that we generate a hardware loop instruction.
+; CHECK: endloop0
+
+@A = common global [400 x i8] zeroinitializer, align 8
+@B = common global [400 x i8] zeroinitializer, align 8
+@C = common global [400 x i8] zeroinitializer, align 8
+
+define void @run() nounwind {
+entry:
+  br label %polly.loop_body
+
+polly.loop_after:                                 ; preds = %polly.loop_body
+  ret void
+
+polly.loop_body:                                  ; preds = %entry, %polly.loop_body
+  %polly.loopiv16 = phi i32 [ 0, %entry ], [ %polly.next_loopiv, %polly.loop_body ]
+  %polly.next_loopiv = add i32 %polly.loopiv16, 4
+  %p_vector_iv14 = or i32 %polly.loopiv16, 1
+  %p_vector_iv3 = add i32 %p_vector_iv14, 1
+  %p_vector_iv415 = or i32 %polly.loopiv16, 3
+  %p_arrayidx = getelementptr [400 x i8]* @A, i32 0, i32 %polly.loopiv16
+  %p_arrayidx5 = getelementptr [400 x i8]* @A, i32 0, i32 %p_vector_iv14
+  %p_arrayidx6 = getelementptr [400 x i8]* @A, i32 0, i32 %p_vector_iv3
+  %p_arrayidx7 = getelementptr [400 x i8]* @A, i32 0, i32 %p_vector_iv415
+  store i8 123, i8* %p_arrayidx, align 1
+  store i8 123, i8* %p_arrayidx5, align 1
+  store i8 123, i8* %p_arrayidx6, align 1
+  store i8 123, i8* %p_arrayidx7, align 1
+  %0 = icmp slt i32 %polly.next_loopiv, 400
+  br i1 %0, label %polly.loop_body, label %polly.loop_after
+}
diff --git a/test/CodeGen/Hexagon/hwloop-ne.ll b/test/CodeGen/Hexagon/hwloop-ne.ll
new file mode 100644
index 000000000000..bceef2a16955
--- /dev/null
+++ b/test/CodeGen/Hexagon/hwloop-ne.ll
@@ -0,0 +1,438 @@
+; RUN: llc -march=hexagon -mcpu=hexagonv4 -O3 < %s | FileCheck %s
+
+
+; CHECK: test_pos1_ir_ne
+; CHECK: loop0
+; a < b
+define void @test_pos1_ir_ne(i8* nocapture %p, i32 %a, i32 %b) nounwind {
+entry:
+  %cmp3 = icmp slt i32 32623, %b
+  br i1 %cmp3, label %for.body.lr.ph, label %for.end
+
+for.body.lr.ph:                                   ; preds = %entry
+  br label %for.body
+
+for.body:                                         ; preds = %for.body.lr.ph, %for.body
+  %i.04 = phi i32 [ 32623, %for.body.lr.ph ], [ %inc, %for.body ]
+  %arrayidx = getelementptr inbounds i8* %p, i32 %i.04
+  %0 = load i8* %arrayidx, align 1
+  %conv = zext i8 %0 to i32
+  %add = add nsw i32 %conv, 1
+  %conv1 = trunc i32 %add to i8
+  store i8 %conv1, i8* %arrayidx, align 1
+  %inc = add nsw i32 %i.04, 1
+  %cmp = icmp ne i32 %inc, %b
+  br i1 %cmp, label %for.body, label %for.end
+
+for.end:                                          ; preds = %for.body, %entry
+  ret void
+}
+
+
+
+; CHECK: test_pos2_ir_ne
+; CHECK: loop0
+; a < b
+define void @test_pos2_ir_ne(i8* nocapture %p, i32 %a, i32 %b) nounwind {
+entry:
+  %cmp3 = icmp slt i32 29554, %b
+  br i1 %cmp3, label %for.body.lr.ph, label %for.end
+
+for.body.lr.ph:                                   ; preds = %entry
+  br label %for.body
+
+for.body:                                         ; preds = %for.body.lr.ph, %for.body
+  %i.04 = phi i32 [ 29554, %for.body.lr.ph ], [ %inc, %for.body ]
+  %arrayidx = getelementptr inbounds i8* %p, i32 %i.04
+  %0 = load i8* %arrayidx, align 1
+  %conv = zext i8 %0 to i32
+  %add = add nsw i32 %conv, 1
+  %conv1 = trunc i32 %add to i8
+  store i8 %conv1, i8* %arrayidx, align 1
+  %inc = add nsw i32 %i.04, 2
+  %cmp = icmp ne i32 %inc, %b
+  br i1 %cmp, label %for.body, label %for.end
+
+for.end:                                          ; preds = %for.body, %entry
+  ret void
+}
+
+
+
+; CHECK: test_pos4_ir_ne
+; CHECK: loop0
+; a < b
+define void @test_pos4_ir_ne(i8* nocapture %p, i32 %a, i32 %b) nounwind {
+entry:
+  %cmp3 = icmp slt i32 15692, %b
+  br i1 %cmp3, label %for.body.lr.ph, label %for.end
+
+for.body.lr.ph:                                   ; preds = %entry
+  br label %for.body
+
+for.body:                                         ; preds = %for.body.lr.ph, %for.body
+  %i.04 = phi i32 [ 15692, %for.body.lr.ph ], [ %inc, %for.body ]
+  %arrayidx = getelementptr inbounds i8* %p, i32 %i.04
+  %0 = load i8* %arrayidx, align 1
+  %conv = zext i8 %0 to i32
+  %add = add nsw i32 %conv, 1
+  %conv1 = trunc i32 %add to i8
+  store i8 %conv1, i8* %arrayidx, align 1
+  %inc = add nsw i32 %i.04, 4
+  %cmp = icmp ne i32 %inc, %b
+  br i1 %cmp, label %for.body, label %for.end
+
+for.end:                                          ; preds = %for.body, %entry
+  ret void
+}
+
+
+
+; CHECK: test_pos8_ir_ne
+; CHECK: loop0
+; a < b
+define void @test_pos8_ir_ne(i8* nocapture %p, i32 %a, i32 %b) nounwind {
+entry:
+  %cmp3 = icmp slt i32 10449, %b
+  br i1 %cmp3, label %for.body.lr.ph, label %for.end
+
+for.body.lr.ph:                                   ; preds = %entry
+  br label %for.body
+
+for.body:                                         ; preds = %for.body.lr.ph, %for.body
+  %i.04 = phi i32 [ 10449, %for.body.lr.ph ], [ %inc, %for.body ]
+  %arrayidx = getelementptr inbounds i8* %p, i32 %i.04
+  %0 = load i8* %arrayidx, align 1
+  %conv = zext i8 %0 to i32
+  %add = add nsw i32 %conv, 1
+  %conv1 = trunc i32 %add to i8
+  store i8 %conv1, i8* %arrayidx, align 1
+  %inc = add nsw i32 %i.04, 8
+  %cmp = icmp ne i32 %inc, %b
+  br i1 %cmp, label %for.body, label %for.end
+
+for.end:                                          ; preds = %for.body, %entry
+  ret void
+}
+
+
+
+; CHECK: test_pos16_ir_ne
+; CHECK: loop0
+; a < b
+define void @test_pos16_ir_ne(i8* nocapture %p, i32 %a, i32 %b) nounwind {
+entry:
+  %cmp3 = icmp slt i32 32087, %b
+  br i1 %cmp3, label %for.body.lr.ph, label %for.end
+
+for.body.lr.ph:                                   ; preds = %entry
+  br label %for.body
+
+for.body:                                         ; preds = %for.body.lr.ph, %for.body
+  %i.04 = phi i32 [ 32087, %for.body.lr.ph ], [ %inc, %for.body ]
+  %arrayidx = getelementptr inbounds i8* %p, i32 %i.04
+  %0 = load i8* %arrayidx, align 1
+  %conv = zext i8 %0 to i32
+  %add = add nsw i32 %conv, 1
+  %conv1 = trunc i32 %add to i8
+  store i8 %conv1, i8* %arrayidx, align 1
+  %inc = add nsw i32 %i.04, 16
+  %cmp = icmp ne i32 %inc, %b
+  br i1 %cmp, label %for.body, label %for.end
+
+for.end:                                          ; preds = %for.body, %entry
+  ret void
+}
+
+
+
+; CHECK: test_pos1_ri_ne
+; CHECK: loop0
+; a < b
+define void @test_pos1_ri_ne(i8* nocapture %p, i32 %a, i32 %b) nounwind {
+entry:
+  %cmp3 = icmp slt i32 %a, 3472
+  br i1 %cmp3, label %for.body.lr.ph, label %for.end
+
+for.body.lr.ph:                                   ; preds = %entry
+  br label %for.body
+
+for.body:                                         ; preds = %for.body.lr.ph, %for.body
+  %i.04 = phi i32 [ %a, %for.body.lr.ph ], [ %inc, %for.body ]
+  %arrayidx = getelementptr inbounds i8* %p, i32 %i.04
+  %0 = load i8* %arrayidx, align 1
+  %conv = zext i8 %0 to i32
+  %add = add nsw i32 %conv, 1
+  %conv1 = trunc i32 %add to i8
+  store i8 %conv1, i8* %arrayidx, align 1
+  %inc = add nsw i32 %i.04, 1
+  %cmp = icmp ne i32 %inc, 3472
+  br i1 %cmp, label %for.body, label %for.end
+
+for.end:                                          ; preds = %for.body, %entry
+  ret void
+}
+
+
+
+; CHECK: test_pos2_ri_ne
+; CHECK: loop0
+; a < b
+define void @test_pos2_ri_ne(i8* nocapture %p, i32 %a, i32 %b) nounwind {
+entry:
+  %cmp3 = icmp slt i32 %a, 8730
+  br i1 %cmp3, label %for.body.lr.ph, label %for.end
+
+for.body.lr.ph:                                   ; preds = %entry
+  br label %for.body
+
+for.body:                                         ; preds = %for.body.lr.ph, %for.body
+  %i.04 = phi i32 [ %a, %for.body.lr.ph ], [ %inc, %for.body ]
+  %arrayidx = getelementptr inbounds i8* %p, i32 %i.04
+  %0 = load i8* %arrayidx, align 1
+  %conv = zext i8 %0 to i32
+  %add = add nsw i32 %conv, 1
+  %conv1 = trunc i32 %add to i8
+  store i8 %conv1, i8* %arrayidx, align 1
+  %inc = add nsw i32 %i.04, 2
+  %cmp = icmp ne i32 %inc, 8730
+  br i1 %cmp, label %for.body, label %for.end
+
+for.end:                                          ; preds = %for.body, %entry
+  ret void
+}
+
+
+
+; CHECK: test_pos4_ri_ne
+; CHECK: loop0
+; a < b
+define void @test_pos4_ri_ne(i8* nocapture %p, i32 %a, i32 %b) nounwind {
+entry:
+  %cmp3 = icmp slt i32 %a, 1493
+  br i1 %cmp3, label %for.body.lr.ph, label %for.end
+
+for.body.lr.ph:                                   ; preds = %entry
+  br label %for.body
+
+for.body:                                         ; preds = %for.body.lr.ph, %for.body
+  %i.04 = phi i32 [ %a, %for.body.lr.ph ], [ %inc, %for.body ]
+  %arrayidx = getelementptr inbounds i8* %p, i32 %i.04
+  %0 = load i8* %arrayidx, align 1
+  %conv = zext i8 %0 to i32
+  %add = add nsw i32 %conv, 1
+  %conv1 = trunc i32 %add to i8
+  store i8 %conv1, i8* %arrayidx, align 1
+  %inc = add nsw i32 %i.04, 4
+  %cmp = icmp ne i32 %inc, 1493
+  br i1 %cmp, label %for.body, label %for.end
+
+for.end:                                          ; preds = %for.body, %entry
+  ret void
+}
+
+
+
+; CHECK: test_pos8_ri_ne
+; CHECK: loop0
+; a < b
+define void @test_pos8_ri_ne(i8* nocapture %p, i32 %a, i32 %b) nounwind {
+entry:
+  %cmp3 = icmp slt i32 %a, 1706
+  br i1 %cmp3, label %for.body.lr.ph, label %for.end
+
+for.body.lr.ph:                                   ; preds = %entry
+  br label %for.body
+
+for.body:                                         ; preds = %for.body.lr.ph, %for.body
+  %i.04 = phi i32 [ %a, %for.body.lr.ph ], [ %inc, %for.body ]
+  %arrayidx = getelementptr inbounds i8* %p, i32 %i.04
+  %0 = load i8* %arrayidx, align 1
+  %conv = zext i8 %0 to i32
+  %add = add nsw i32 %conv, 1
+  %conv1 = trunc i32 %add to i8
+  store i8 %conv1, i8* %arrayidx, align 1
+  %inc = add nsw i32 %i.04, 8
+  %cmp = icmp ne i32 %inc, 1706
+  br i1 %cmp, label %for.body, label %for.end
+
+for.end:                                          ; preds = %for.body, %entry
+  ret void
+}
+
+
+
+; CHECK: test_pos16_ri_ne
+; CHECK: loop0
+; a < b
+define void @test_pos16_ri_ne(i8* nocapture %p, i32 %a, i32 %b) nounwind {
+entry:
+  %cmp3 = icmp slt i32 %a, 1886
+  br i1 %cmp3, label %for.body.lr.ph, label %for.end
+
+for.body.lr.ph:                                   ; preds = %entry
+  br label %for.body
+
+for.body:                                         ; preds = %for.body.lr.ph, %for.body
+  %i.04 = phi i32 [ %a, %for.body.lr.ph ], [ %inc, %for.body ]
+  %arrayidx = getelementptr inbounds i8* %p, i32 %i.04
+  %0 = load i8* %arrayidx, align 1
+  %conv = zext i8 %0 to i32
+  %add = add nsw i32 %conv, 1
+  %conv1 = trunc i32 %add to i8
+  store i8 %conv1, i8* %arrayidx, align 1
+  %inc = add nsw i32 %i.04, 16
+  %cmp = icmp ne i32 %inc, 1886
+  br i1 %cmp, label %for.body, label %for.end
+
+for.end:                                          ; preds = %for.body, %entry
+  ret void
+}
+
+
+
+; CHECK: test_pos1_rr_ne
+; CHECK: loop0
+; a < b
+define void @test_pos1_rr_ne(i8* nocapture %p, i32 %a, i32 %b) nounwind {
+entry:
+  %cmp3 = icmp slt i32 %a, %b
+  br i1 %cmp3, label %for.body.lr.ph, label %for.end
+
+for.body.lr.ph:                                   ; preds = %entry
+  br label %for.body
+
+for.body:                                         ; preds = %for.body.lr.ph, %for.body
+  %i.04 = phi i32 [ %a, %for.body.lr.ph ], [ %inc, %for.body ]
+  %arrayidx = getelementptr inbounds i8* %p, i32 %i.04
+  %0 = load i8* %arrayidx, align 1
+  %conv = zext i8 %0 to i32
+  %add = add nsw i32 %conv, 1
+  %conv1 = trunc i32 %add to i8
+  store i8 %conv1, i8* %arrayidx, align 1
+  %inc = add nsw i32 %i.04, 1
+  %cmp = icmp ne i32 %inc, %b
+  br i1 %cmp, label %for.body, label %for.end
+
+for.end:                                          ; preds = %for.body, %entry
+  ret void
+}
+
+
+
+; CHECK: test_pos2_rr_ne
+; CHECK: loop0
+; a < b
+define void @test_pos2_rr_ne(i8* nocapture %p, i32 %a, i32 %b) nounwind {
+entry:
+  %cmp3 = icmp slt i32 %a, %b
+  br i1 %cmp3, label %for.body.lr.ph, label %for.end
+
+for.body.lr.ph:                                   ; preds = %entry
+  br label %for.body
+
+for.body:                                         ; preds = %for.body.lr.ph, %for.body
+  %i.04 = phi i32 [ %a, %for.body.lr.ph ], [ %inc, %for.body ]
+  %arrayidx = getelementptr inbounds i8* %p, i32 %i.04
+  %0 = load i8* %arrayidx, align 1
+  %conv = zext i8 %0 to i32
+  %add = add nsw i32 %conv, 1
+  %conv1 = trunc i32 %add to i8
+  store i8 %conv1, i8* %arrayidx, align 1
+  %inc = add nsw i32 %i.04, 2
+  %cmp = icmp ne i32 %inc, %b
+  br i1 %cmp, label %for.body, label %for.end
+
+for.end:                                          ; preds = %for.body, %entry
+  ret void
+}
+
+
+
+; CHECK: test_pos4_rr_ne
+; CHECK: loop0
+; a < b
+define void @test_pos4_rr_ne(i8* nocapture %p, i32 %a, i32 %b) nounwind {
+entry:
+  %cmp3 = icmp slt i32 %a, %b
+  br i1 %cmp3, label %for.body.lr.ph, label %for.end
+
+for.body.lr.ph:                                   ; preds = %entry
+  br label %for.body
+
+for.body:                                         ; preds = %for.body.lr.ph, %for.body
+  %i.04 = phi i32 [ %a, %for.body.lr.ph ], [ %inc, %for.body ]
+  %arrayidx = getelementptr inbounds i8* %p, i32 %i.04
+  %0 = load i8* %arrayidx, align 1
+  %conv = zext i8 %0 to i32
+  %add = add nsw i32 %conv, 1
+  %conv1 = trunc i32 %add to i8
+  store i8 %conv1, i8* %arrayidx, align 1
+  %inc = add nsw i32 %i.04, 4
+  %cmp = icmp ne i32 %inc, %b
+  br i1 %cmp, label %for.body, label %for.end
+
+for.end:                                          ; preds = %for.body, %entry
+  ret void
+}
+
+
+
+; CHECK: test_pos8_rr_ne
+; CHECK: loop0
+; a < b
+define void @test_pos8_rr_ne(i8* nocapture %p, i32 %a, i32 %b) nounwind {
+entry:
+  %cmp3 = icmp slt i32 %a, %b
+  br i1 %cmp3, label %for.body.lr.ph, label %for.end
+
+for.body.lr.ph:                                   ; preds = %entry
+  br label %for.body
+
+for.body:                                         ; preds = %for.body.lr.ph, %for.body
+  %i.04 = phi i32 [ %a, %for.body.lr.ph ], [ %inc, %for.body ]
+  %arrayidx = getelementptr inbounds i8* %p, i32 %i.04
+  %0 = load i8* %arrayidx, align 1
+  %conv = zext i8 %0 to i32
+  %add = add nsw i32 %conv, 1
+  %conv1 = trunc i32 %add to i8
+  store i8 %conv1, i8* %arrayidx, align 1
+  %inc = add nsw i32 %i.04, 8
+  %cmp = icmp ne i32 %inc, %b
+  br i1 %cmp, label %for.body, label %for.end
+
+for.end:                                          ; preds = %for.body, %entry
+  ret void
+}
+
+
+
+; CHECK: test_pos16_rr_ne
+; CHECK: loop0
+; a < b
+define void @test_pos16_rr_ne(i8* nocapture %p, i32 %a, i32 %b) nounwind {
+entry:
+  %cmp3 = icmp slt i32 %a, %b
+  br i1 %cmp3, label %for.body.lr.ph, label %for.end
+
+for.body.lr.ph:                                   ; preds = %entry
+  br label %for.body
+
+for.body:                                         ; preds = %for.body.lr.ph, %for.body
+  %i.04 = phi i32 [ %a, %for.body.lr.ph ], [ %inc, %for.body ]
+  %arrayidx = getelementptr inbounds i8* %p, i32 %i.04
+  %0 = load i8* %arrayidx, align 1
+  %conv = zext i8 %0 to i32
+  %add = add nsw i32 %conv, 1
+  %conv1 = trunc i32 %add to i8
+  store i8 %conv1, i8* %arrayidx, align 1
+  %inc = add nsw i32 %i.04, 16
+  %cmp = icmp ne i32 %inc, %b
+  br i1 %cmp, label %for.body, label %for.end
+
+for.end:                                          ; preds = %for.body, %entry
+  ret void
+}
+
+
+
diff --git a/test/CodeGen/Hexagon/i16_VarArg.ll b/test/CodeGen/Hexagon/i16_VarArg.ll
new file mode 100644
index 000000000000..eb44c2905c9d
--- /dev/null
+++ b/test/CodeGen/Hexagon/i16_VarArg.ll
@@ -0,0 +1,40 @@
+; RUN: llc -march=hexagon < %s | FileCheck %s
+; CHECK: call __hexagon_{{[A-Z_a-z0-9]+}}
+
+@a_str = internal constant [8 x i8] c"a = %f\0A\00"
+@b_str = internal constant [8 x i8] c"b = %f\0A\00"
+@add_str = internal constant [12 x i8] c"a + b = %f\0A\00"
+@sub_str = internal constant [12 x i8] c"a - b = %f\0A\00"
+@mul_str = internal constant [12 x i8] c"a * b = %f\0A\00"
+@div_str = internal constant [12 x i8] c"b / a = %f\0A\00"
+@rem_str = internal constant [13 x i8] c"b %% a = %f\0A\00"
+@lt_str = internal constant [12 x i8] c"a < b = %d\0A\00"
+@le_str = internal constant [13 x i8] c"a <= b = %d\0A\00"
+@gt_str = internal constant [12 x i8] c"a > b = %d\0A\00"
+@ge_str = internal constant [13 x i8] c"a >= b = %d\0A\00"
+@eq_str = internal constant [13 x i8] c"a == b = %d\0A\00"
+@ne_str = internal constant [13 x i8] c"a != b = %d\0A\00"
+@A = global double 2.000000e+00
+@B = global double 5.000000e+00
+
+declare i32 @printf(i8*, ...)
+
+define i32 @main() {
+        %a = load double* @A
+        %b = load double* @B
+        %lt_r = fcmp olt double %a, %b
+        %le_r = fcmp ole double %a, %b
+        %gt_r = fcmp ogt double %a, %b
+        %ge_r = fcmp oge double %a, %b
+        %eq_r = fcmp oeq double %a, %b
+        %ne_r = fcmp une double %a, %b
+        %val1 = zext i1 %lt_r to i16
+        %lt_s = getelementptr [12 x i8]* @lt_str, i64 0, i64 0
+        %le_s = getelementptr [13 x i8]* @le_str, i64 0, i64 0
+        %gt_s = getelementptr [12 x i8]* @gt_str, i64 0, i64 0
+        %ge_s = getelementptr [13 x i8]* @ge_str, i64 0, i64 0
+        %eq_s = getelementptr [13 x i8]* @eq_str, i64 0, i64 0
+        %ne_s = getelementptr [13 x i8]* @ne_str, i64 0, i64 0
+        call i32 (i8*, ...)* @printf( i8* %lt_s, i16 %val1 )
+        ret i32 0
+}
+\ No newline at end of file
diff --git a/test/CodeGen/Hexagon/i1_VarArg.ll b/test/CodeGen/Hexagon/i1_VarArg.ll
new file mode 100644
index 000000000000..7dbfb25cd2b7
--- /dev/null
+++ b/test/CodeGen/Hexagon/i1_VarArg.ll
@@ -0,0 +1,44 @@
+; RUN: llc -march=hexagon < %s | FileCheck %s
+; CHECK: call __hexagon_{{[_A-Za-z0-9]+}}
+
+@a_str = internal constant [8 x i8] c"a = %f\0A\00"
+@b_str = internal constant [8 x i8] c"b = %f\0A\00"
+@add_str = internal constant [12 x i8] c"a + b = %f\0A\00"
+@sub_str = internal constant [12 x i8] c"a - b = %f\0A\00"
+@mul_str = internal constant [12 x i8] c"a * b = %f\0A\00"
+@div_str = internal constant [12 x i8] c"b / a = %f\0A\00"
+@rem_str = internal constant [13 x i8] c"b %% a = %f\0A\00"
+@lt_str = internal constant [12 x i8] c"a < b = %d\0A\00"
+@le_str = internal constant [13 x i8] c"a <= b = %d\0A\00"
+@gt_str = internal constant [12 x i8] c"a > b = %d\0A\00"
+@ge_str = internal constant [13 x i8] c"a >= b = %d\0A\00"
+@eq_str = internal constant [13 x i8] c"a == b = %d\0A\00"
+@ne_str = internal constant [13 x i8] c"a != b = %d\0A\00"
+@A = global double 2.000000e+00
+@B = global double 5.000000e+00
+
+declare i32 @printf(i8*, ...)
+
+define i32 @main() {
+        %a = load double* @A
+        %b = load double* @B
+        %lt_r = fcmp olt double %a, %b
+        %le_r = fcmp ole double %a, %b
+        %gt_r = fcmp ogt double %a, %b
+        %ge_r = fcmp oge double %a, %b
+        %eq_r = fcmp oeq double %a, %b
+        %ne_r = fcmp une double %a, %b
+        %lt_s = getelementptr [12 x i8]* @lt_str, i64 0, i64 0
+        %le_s = getelementptr [13 x i8]* @le_str, i64 0, i64 0
+        %gt_s = getelementptr [12 x i8]* @gt_str, i64 0, i64 0
+        %ge_s = getelementptr [13 x i8]* @ge_str, i64 0, i64 0
+        %eq_s = getelementptr [13 x i8]* @eq_str, i64 0, i64 0
+        %ne_s = getelementptr [13 x i8]* @ne_str, i64 0, i64 0
+        call i32 (i8*, ...)* @printf( i8* %lt_s, i1 %lt_r )
+        call i32 (i8*, ...)* @printf( i8* %le_s, i1 %le_r )
+        call i32 (i8*, ...)* @printf( i8* %gt_s, i1 %gt_r )
+        call i32 (i8*, ...)* @printf( i8* %ge_s, i1 %ge_r )
+        call i32 (i8*, ...)* @printf( i8* %eq_s, i1 %eq_r )
+        call i32 (i8*, ...)* @printf( i8* %ne_s, i1 %ne_r )
+        ret i32 0
+}
+\ No newline at end of file
diff --git a/test/CodeGen/Hexagon/i8_VarArg.ll b/test/CodeGen/Hexagon/i8_VarArg.ll
new file mode 100644
index 000000000000..687b178824ce
--- /dev/null
+++ b/test/CodeGen/Hexagon/i8_VarArg.ll
@@ -0,0 +1,40 @@
+; RUN: llc -march=hexagon < %s | FileCheck %s
+; CHECK: call __hexagon_{{[A-Z_a-z0-9]+}}
+
+@a_str = internal constant [8 x i8] c"a = %f\0A\00"
+@b_str = internal constant [8 x i8] c"b = %f\0A\00"
+@add_str = internal constant [12 x i8] c"a + b = %f\0A\00"
+@sub_str = internal constant [12 x i8] c"a - b = %f\0A\00"
+@mul_str = internal constant [12 x i8] c"a * b = %f\0A\00"
+@div_str = internal constant [12 x i8] c"b / a = %f\0A\00"
+@rem_str = internal constant [13 x i8] c"b %% a = %f\0A\00"
+@lt_str = internal constant [12 x i8] c"a < b = %d\0A\00"
+@le_str = internal constant [13 x i8] c"a <= b = %d\0A\00"
+@gt_str = internal constant [12 x i8] c"a > b = %d\0A\00"
+@ge_str = internal constant [13 x i8] c"a >= b = %d\0A\00"
+@eq_str = internal constant [13 x i8] c"a == b = %d\0A\00"
+@ne_str = internal constant [13 x i8] c"a != b = %d\0A\00"
+@A = global double 2.000000e+00
+@B = global double 5.000000e+00
+
+declare i32 @printf(i8*, ...)
+
+define i32 @main() {
+        %a = load double* @A
+        %b = load double* @B
+        %lt_r = fcmp olt double %a, %b
+        %le_r = fcmp ole double %a, %b
+        %gt_r = fcmp ogt double %a, %b
+        %ge_r = fcmp oge double %a, %b
+        %eq_r = fcmp oeq double %a, %b
+        %ne_r = fcmp une double %a, %b
+        %val1 = zext i1 %lt_r to i8
+        %lt_s = getelementptr [12 x i8]* @lt_str, i64 0, i64 0
+        %le_s = getelementptr [13 x i8]* @le_str, i64 0, i64 0
+        %gt_s = getelementptr [12 x i8]* @gt_str, i64 0, i64 0
+        %ge_s = getelementptr [13 x i8]* @ge_str, i64 0, i64 0
+        %eq_s = getelementptr [13 x i8]* @eq_str, i64 0, i64 0
+        %ne_s = getelementptr [13 x i8]* @ne_str, i64 0, i64 0
+        call i32 (i8*, ...)* @printf( i8* %lt_s, i8 %val1 )
+        ret i32 0
+}
+\ No newline at end of file
diff --git a/test/CodeGen/Hexagon/idxload-with-zero-offset.ll b/test/CodeGen/Hexagon/idxload-with-zero-offset.ll
new file mode 100644
index 000000000000..ca6df88a5529
--- /dev/null
+++ b/test/CodeGen/Hexagon/idxload-with-zero-offset.ll
@@ -0,0 +1,70 @@
+; RUN: llc -march=hexagon -mcpu=hexagonv4 < %s | FileCheck %s
+; Check that we generate load instruction with (base + register offset << 0)
+
+; load word
+
+define i32 @load_w(i32* nocapture %a, i32 %n) nounwind {
+; CHECK: r{{[0-9]+}}{{ *}}={{ *}}memw(r{{[0-9]+}}+r{{[0-9]+}}<<#0)
+entry:
+  %tmp = shl i32 %n, 4
+  %scevgep9 = getelementptr i32* %a, i32 %tmp
+  %val = load i32* %scevgep9, align 4
+  ret i32 %val
+}
+
+; load unsigned half word
+
+define i16 @load_uh(i16* nocapture %a, i32 %n) nounwind {
+; CHECK: r{{[0-9]+}}{{ *}}={{ *}}memuh(r{{[0-9]+}}+r{{[0-9]+}}<<#0)
+entry:
+  %tmp = shl i32 %n, 4
+  %scevgep9 = getelementptr i16* %a, i32 %tmp
+  %val = load i16* %scevgep9, align 2
+  ret i16 %val
+}
+
+; load signed half word
+
+define i32 @load_h(i16* nocapture %a, i32 %n) nounwind {
+; CHECK: r{{[0-9]+}}{{ *}}={{ *}}memh(r{{[0-9]+}}+r{{[0-9]+}}<<#0)
+entry:
+  %tmp = shl i32 %n, 4
+  %scevgep9 = getelementptr i16* %a, i32 %tmp
+  %val = load i16* %scevgep9, align 2
+  %conv = sext i16 %val to i32
+  ret i32 %conv
+}
+
+; load unsigned byte
+
+define i8 @load_ub(i8* nocapture %a, i32 %n) nounwind {
+; CHECK: r{{[0-9]+}}{{ *}}={{ *}}memub(r{{[0-9]+}}+r{{[0-9]+}}<<#0)
+entry:
+  %tmp = shl i32 %n, 4
+  %scevgep9 = getelementptr i8* %a, i32 %tmp
+  %val = load i8* %scevgep9, align 1
+  ret i8 %val
+}
+
+; load signed byte
+
+define i32 @foo_2(i8* nocapture %a, i32 %n) nounwind {
+; CHECK: r{{[0-9]+}}{{ *}}={{ *}}memb(r{{[0-9]+}}+r{{[0-9]+}}<<#0)
+entry:
+  %tmp = shl i32 %n, 4
+  %scevgep9 = getelementptr i8* %a, i32 %tmp
+  %val = load i8* %scevgep9, align 1
+  %conv = sext i8 %val to i32
+  ret i32 %conv
+}
+
+; load doubleword
+
+define i64 @load_d(i64* nocapture %a, i32 %n) nounwind {
+; CHECK: r{{[0-9]+}}:{{[0-9]+}}{{ *}}={{ *}}memd(r{{[0-9]+}}+r{{[0-9]+}}<<#0)
+entry:
+  %tmp = shl i32 %n, 4
+  %scevgep9 = getelementptr i64* %a, i32 %tmp
+  %val = load i64* %scevgep9, align 8
+  ret i64 %val
+}
diff --git a/test/CodeGen/Hexagon/indirect-br.ll b/test/CodeGen/Hexagon/indirect-br.ll
new file mode 100644
index 000000000000..919e50189160
--- /dev/null
+++ b/test/CodeGen/Hexagon/indirect-br.ll
@@ -0,0 +1,14 @@
+; RUN: llc -march=hexagon < %s | FileCheck %s
+
+;CHECK: jumpr  r{{[0-9]+}}
+
+define i32 @check_indirect_br(i8* %target) nounwind {
+entry:
+        indirectbr i8* %target, [label %test_label]
+
+test_label:
+        br label %ret
+
+ret:
+        ret i32 -1
+}
+\ No newline at end of file
diff --git a/test/CodeGen/Hexagon/memops.ll b/test/CodeGen/Hexagon/memops.ll
new file mode 100644
index 000000000000..5498848d8560
--- /dev/null
+++ b/test/CodeGen/Hexagon/memops.ll
@@ -0,0 +1,1369 @@
+; RUN: llc -march=hexagon -mcpu=hexagonv5  < %s | FileCheck %s
+; Generate MemOps for V4 and above.
+
+define void @memop_unsigned_char_add5(i8* nocapture %p) nounwind {
+entry:
+; CHECK:  memb(r{{[0-9]+}}{{ *}}+{{ *}}#0){{ *}}+={{ *}}#5
+  %0 = load i8* %p, align 1, !tbaa !0
+  %conv = zext i8 %0 to i32
+  %add = add nsw i32 %conv, 5
+  %conv1 = trunc i32 %add to i8
+  store i8 %conv1, i8* %p, align 1, !tbaa !0
+  ret void
+}
+
+define void @memop_unsigned_char_add(i8* nocapture %p, i8 zeroext %x) nounwind {
+entry:
+; CHECK:  memb(r{{[0-9]+}}{{ *}}+{{ *}}#0){{ *}}+={{ *}}r{{[0-9]+}}
+  %conv = zext i8 %x to i32
+  %0 = load i8* %p, align 1, !tbaa !0
+  %conv1 = zext i8 %0 to i32
+  %add = add nsw i32 %conv1, %conv
+  %conv2 = trunc i32 %add to i8
+  store i8 %conv2, i8* %p, align 1, !tbaa !0
+  ret void
+}
+
+define void @memop_unsigned_char_sub(i8* nocapture %p, i8 zeroext %x) nounwind {
+entry:
+; CHECK:  memb(r{{[0-9]+}}{{ *}}+{{ *}}#0){{ *}}-={{ *}}r{{[0-9]+}}
+  %conv = zext i8 %x to i32
+  %0 = load i8* %p, align 1, !tbaa !0
+  %conv1 = zext i8 %0 to i32
+  %sub = sub nsw i32 %conv1, %conv
+  %conv2 = trunc i32 %sub to i8
+  store i8 %conv2, i8* %p, align 1, !tbaa !0
+  ret void
+}
+
+define void @memop_unsigned_char_or(i8* nocapture %p, i8 zeroext %x) nounwind {
+entry:
+; CHECK:  memb(r{{[0-9]+}}{{ *}}+{{ *}}#0){{ *}}|={{ *}}r{{[0-9]+}}
+  %0 = load i8* %p, align 1, !tbaa !0
+  %or3 = or i8 %0, %x
+  store i8 %or3, i8* %p, align 1, !tbaa !0
+  ret void
+}
+
+define void @memop_unsigned_char_and(i8* nocapture %p, i8 zeroext %x) nounwind {
+entry:
+; CHECK:  memb(r{{[0-9]+}}{{ *}}+{{ *}}#0){{ *}}&={{ *}}r{{[0-9]+}}
+  %0 = load i8* %p, align 1, !tbaa !0
+  %and3 = and i8 %0, %x
+  store i8 %and3, i8* %p, align 1, !tbaa !0
+  ret void
+}
+
+define void @memop_unsigned_char_clrbit(i8* nocapture %p) nounwind {
+entry:
+; CHECK:  memb(r{{[0-9]+}}{{ *}}+{{ *}}#0){{ *}}={{ *}}clrbit({{ *}}#5{{ *}})
+  %0 = load i8* %p, align 1, !tbaa !0
+  %conv = zext i8 %0 to i32
+  %and = and i32 %conv, 223
+  %conv1 = trunc i32 %and to i8
+  store i8 %conv1, i8* %p, align 1, !tbaa !0
+  ret void
+}
+
+define void @memop_unsigned_char_setbit(i8* nocapture %p) nounwind {
+entry:
+; CHECK:  memb(r{{[0-9]+}}{{ *}}+{{ *}}#0){{ *}}={{ *}}setbit({{ *}}#7{{ *}})
+  %0 = load i8* %p, align 1, !tbaa !0
+  %conv = zext i8 %0 to i32
+  %or = or i32 %conv, 128
+  %conv1 = trunc i32 %or to i8
+  store i8 %conv1, i8* %p, align 1, !tbaa !0
+  ret void
+}
+
+define void @memop_unsigned_char_add5_index(i8* nocapture %p, i32 %i) nounwind {
+entry:
+; CHECK:  memb(r{{[0-9]+}}{{ *}}+{{ *}}#0){{ *}}+={{ *}}#5
+  %add.ptr = getelementptr inbounds i8* %p, i32 %i
+  %0 = load i8* %add.ptr, align 1, !tbaa !0
+  %conv = zext i8 %0 to i32
+  %add = add nsw i32 %conv, 5
+  %conv1 = trunc i32 %add to i8
+  store i8 %conv1, i8* %add.ptr, align 1, !tbaa !0
+  ret void
+}
+
+define void @memop_unsigned_char_add_index(i8* nocapture %p, i32 %i, i8 zeroext %x) nounwind {
+entry:
+; CHECK:  memb(r{{[0-9]+}}{{ *}}+{{ *}}#0){{ *}}+={{ *}}r{{[0-9]+}}
+  %conv = zext i8 %x to i32
+  %add.ptr = getelementptr inbounds i8* %p, i32 %i
+  %0 = load i8* %add.ptr, align 1, !tbaa !0
+  %conv1 = zext i8 %0 to i32
+  %add = add nsw i32 %conv1, %conv
+  %conv2 = trunc i32 %add to i8
+  store i8 %conv2, i8* %add.ptr, align 1, !tbaa !0
+  ret void
+}
+
+define void @memop_unsigned_char_sub_index(i8* nocapture %p, i32 %i, i8 zeroext %x) nounwind {
+entry:
+; CHECK:  memb(r{{[0-9]+}}{{ *}}+{{ *}}#0){{ *}}-={{ *}}r{{[0-9]+}}
+  %conv = zext i8 %x to i32
+  %add.ptr = getelementptr inbounds i8* %p, i32 %i
+  %0 = load i8* %add.ptr, align 1, !tbaa !0
+  %conv1 = zext i8 %0 to i32
+  %sub = sub nsw i32 %conv1, %conv
+  %conv2 = trunc i32 %sub to i8
+  store i8 %conv2, i8* %add.ptr, align 1, !tbaa !0
+  ret void
+}
+
+define void @memop_unsigned_char_or_index(i8* nocapture %p, i32 %i, i8 zeroext %x) nounwind {
+entry:
+; CHECK:  memb(r{{[0-9]+}}{{ *}}+{{ *}}#0){{ *}}|={{ *}}r{{[0-9]+}}
+  %add.ptr = getelementptr inbounds i8* %p, i32 %i
+  %0 = load i8* %add.ptr, align 1, !tbaa !0
+  %or3 = or i8 %0, %x
+  store i8 %or3, i8* %add.ptr, align 1, !tbaa !0
+  ret void
+}
+
+define void @memop_unsigned_char_and_index(i8* nocapture %p, i32 %i, i8 zeroext %x) nounwind {
+entry:
+; CHECK:  memb(r{{[0-9]+}}{{ *}}+{{ *}}#0){{ *}}&={{ *}}r{{[0-9]+}}
+  %add.ptr = getelementptr inbounds i8* %p, i32 %i
+  %0 = load i8* %add.ptr, align 1, !tbaa !0
+  %and3 = and i8 %0, %x
+  store i8 %and3, i8* %add.ptr, align 1, !tbaa !0
+  ret void
+}
+
+define void @memop_unsigned_char_clrbit_index(i8* nocapture %p, i32 %i) nounwind {
+entry:
+; CHECK:  memb(r{{[0-9]+}}{{ *}}+{{ *}}#0){{ *}}={{ *}}clrbit({{ *}}#5{{ *}})
+  %add.ptr = getelementptr inbounds i8* %p, i32 %i
+  %0 = load i8* %add.ptr, align 1, !tbaa !0
+  %conv = zext i8 %0 to i32
+  %and = and i32 %conv, 223
+  %conv1 = trunc i32 %and to i8
+  store i8 %conv1, i8* %add.ptr, align 1, !tbaa !0
+  ret void
+}
+
+define void @memop_unsigned_char_setbit_index(i8* nocapture %p, i32 %i) nounwind {
+entry:
+; CHECK:  memb(r{{[0-9]+}}{{ *}}+{{ *}}#0){{ *}}={{ *}}setbit({{ *}}#7{{ *}})
+  %add.ptr = getelementptr inbounds i8* %p, i32 %i
+  %0 = load i8* %add.ptr, align 1, !tbaa !0
+  %conv = zext i8 %0 to i32
+  %or = or i32 %conv, 128
+  %conv1 = trunc i32 %or to i8
+  store i8 %conv1, i8* %add.ptr, align 1, !tbaa !0
+  ret void
+}
+
+define void @memop_unsigned_char_add5_index5(i8* nocapture %p) nounwind {
+entry:
+; CHECK:  memb(r{{[0-9]+}}{{ *}}+{{ *}}#5){{ *}}+={{ *}}#5
+  %add.ptr = getelementptr inbounds i8* %p, i32 5
+  %0 = load i8* %add.ptr, align 1, !tbaa !0
+  %conv = zext i8 %0 to i32
+  %add = add nsw i32 %conv, 5
+  %conv1 = trunc i32 %add to i8
+  store i8 %conv1, i8* %add.ptr, align 1, !tbaa !0
+  ret void
+}
+
+define void @memop_unsigned_char_add_index5(i8* nocapture %p, i8 zeroext %x) nounwind {
+entry:
+; CHECK:  memb(r{{[0-9]+}}{{ *}}+{{ *}}#5){{ *}}+={{ *}}r{{[0-9]+}}
+  %conv = zext i8 %x to i32
+  %add.ptr = getelementptr inbounds i8* %p, i32 5
+  %0 = load i8* %add.ptr, align 1, !tbaa !0
+  %conv1 = zext i8 %0 to i32
+  %add = add nsw i32 %conv1, %conv
+  %conv2 = trunc i32 %add to i8
+  store i8 %conv2, i8* %add.ptr, align 1, !tbaa !0
+  ret void
+}
+
+define void @memop_unsigned_char_sub_index5(i8* nocapture %p, i8 zeroext %x) nounwind {
+entry:
+; CHECK:  memb(r{{[0-9]+}}{{ *}}+{{ *}}#5){{ *}}-={{ *}}r{{[0-9]+}}
+  %conv = zext i8 %x to i32
+  %add.ptr = getelementptr inbounds i8* %p, i32 5
+  %0 = load i8* %add.ptr, align 1, !tbaa !0
+  %conv1 = zext i8 %0 to i32
+  %sub = sub nsw i32 %conv1, %conv
+  %conv2 = trunc i32 %sub to i8
+  store i8 %conv2, i8* %add.ptr, align 1, !tbaa !0
+  ret void
+}
+
+define void @memop_unsigned_char_or_index5(i8* nocapture %p, i8 zeroext %x) nounwind {
+entry:
+; CHECK:  memb(r{{[0-9]+}}{{ *}}+{{ *}}#5){{ *}}|={{ *}}r{{[0-9]+}}
+  %add.ptr = getelementptr inbounds i8* %p, i32 5
+  %0 = load i8* %add.ptr, align 1, !tbaa !0
+  %or3 = or i8 %0, %x
+  store i8 %or3, i8* %add.ptr, align 1, !tbaa !0
+  ret void
+}
+
+define void @memop_unsigned_char_and_index5(i8* nocapture %p, i8 zeroext %x) nounwind {
+entry:
+; CHECK:  memb(r{{[0-9]+}}{{ *}}+{{ *}}#5){{ *}}&={{ *}}r{{[0-9]+}}
+  %add.ptr = getelementptr inbounds i8* %p, i32 5
+  %0 = load i8* %add.ptr, align 1, !tbaa !0
+  %and3 = and i8 %0, %x
+  store i8 %and3, i8* %add.ptr, align 1, !tbaa !0
+  ret void
+}
+
+define void @memop_unsigned_char_clrbit_index5(i8* nocapture %p) nounwind {
+entry:
+; CHECK:  memb(r{{[0-9]+}}{{ *}}+{{ *}}#5){{ *}}={{ *}}clrbit({{ *}}#5{{ *}})
+  %add.ptr = getelementptr inbounds i8* %p, i32 5
+  %0 = load i8* %add.ptr, align 1, !tbaa !0
+  %conv = zext i8 %0 to i32
+  %and = and i32 %conv, 223
+  %conv1 = trunc i32 %and to i8
+  store i8 %conv1, i8* %add.ptr, align 1, !tbaa !0
+  ret void
+}
+
+define void @memop_unsigned_char_setbit_index5(i8* nocapture %p) nounwind {
+entry:
+; CHECK:  memb(r{{[0-9]+}}{{ *}}+{{ *}}#5){{ *}}={{ *}}setbit({{ *}}#7{{ *}})
+  %add.ptr = getelementptr inbounds i8* %p, i32 5
+  %0 = load i8* %add.ptr, align 1, !tbaa !0
+  %conv = zext i8 %0 to i32
+  %or = or i32 %conv, 128
+  %conv1 = trunc i32 %or to i8
+  store i8 %conv1, i8* %add.ptr, align 1, !tbaa !0
+  ret void
+}
+
+define void @memop_signed_char_add5(i8* nocapture %p) nounwind {
+entry:
+; CHECK:  memb(r{{[0-9]+}}{{ *}}+{{ *}}#0){{ *}}+={{ *}}#5
+  %0 = load i8* %p, align 1, !tbaa !0
+  %conv2 = zext i8 %0 to i32
+  %add = add nsw i32 %conv2, 5
+  %conv1 = trunc i32 %add to i8
+  store i8 %conv1, i8* %p, align 1, !tbaa !0
+  ret void
+}
+
+define void @memop_signed_char_add(i8* nocapture %p, i8 signext %x) nounwind {
+entry:
+; CHECK:  memb(r{{[0-9]+}}{{ *}}+{{ *}}#0){{ *}}+={{ *}}r{{[0-9]+}}
+  %conv4 = zext i8 %x to i32
+  %0 = load i8* %p, align 1, !tbaa !0
+  %conv13 = zext i8 %0 to i32
+  %add = add nsw i32 %conv13, %conv4
+  %conv2 = trunc i32 %add to i8
+  store i8 %conv2, i8* %p, align 1, !tbaa !0
+  ret void
+}
+
+define void @memop_signed_char_sub(i8* nocapture %p, i8 signext %x) nounwind {
+entry:
+; CHECK:  memb(r{{[0-9]+}}{{ *}}+{{ *}}#0){{ *}}-={{ *}}r{{[0-9]+}}
+  %conv4 = zext i8 %x to i32
+  %0 = load i8* %p, align 1, !tbaa !0
+  %conv13 = zext i8 %0 to i32
+  %sub = sub nsw i32 %conv13, %conv4
+  %conv2 = trunc i32 %sub to i8
+  store i8 %conv2, i8* %p, align 1, !tbaa !0
+  ret void
+}
+
+define void @memop_signed_char_or(i8* nocapture %p, i8 signext %x) nounwind {
+entry:
+; CHECK:  memb(r{{[0-9]+}}{{ *}}+{{ *}}#0){{ *}}|={{ *}}r{{[0-9]+}}
+  %0 = load i8* %p, align 1, !tbaa !0
+  %or3 = or i8 %0, %x
+  store i8 %or3, i8* %p, align 1, !tbaa !0
+  ret void
+}
+
+define void @memop_signed_char_and(i8* nocapture %p, i8 signext %x) nounwind {
+entry:
+; CHECK:  memb(r{{[0-9]+}}{{ *}}+{{ *}}#0){{ *}}&={{ *}}r{{[0-9]+}}
+  %0 = load i8* %p, align 1, !tbaa !0
+  %and3 = and i8 %0, %x
+  store i8 %and3, i8* %p, align 1, !tbaa !0
+  ret void
+}
+
+define void @memop_signed_char_clrbit(i8* nocapture %p) nounwind {
+entry:
+; CHECK:  memb(r{{[0-9]+}}{{ *}}+{{ *}}#0){{ *}}={{ *}}clrbit({{ *}}#5{{ *}})
+  %0 = load i8* %p, align 1, !tbaa !0
+  %conv2 = zext i8 %0 to i32
+  %and = and i32 %conv2, 223
+  %conv1 = trunc i32 %and to i8
+  store i8 %conv1, i8* %p, align 1, !tbaa !0
+  ret void
+}
+
+define void @memop_signed_char_setbit(i8* nocapture %p) nounwind {
+entry:
+; CHECK:  memb(r{{[0-9]+}}{{ *}}+{{ *}}#0){{ *}}={{ *}}setbit({{ *}}#7{{ *}})
+  %0 = load i8* %p, align 1, !tbaa !0
+  %conv2 = zext i8 %0 to i32
+  %or = or i32 %conv2, 128
+  %conv1 = trunc i32 %or to i8
+  store i8 %conv1, i8* %p, align 1, !tbaa !0
+  ret void
+}
+
+define void @memop_signed_char_add5_index(i8* nocapture %p, i32 %i) nounwind {
+entry:
+; CHECK:  memb(r{{[0-9]+}}{{ *}}+{{ *}}#0){{ *}}+={{ *}}#5
+  %add.ptr = getelementptr inbounds i8* %p, i32 %i
+  %0 = load i8* %add.ptr, align 1, !tbaa !0
+  %conv2 = zext i8 %0 to i32
+  %add = add nsw i32 %conv2, 5
+  %conv1 = trunc i32 %add to i8
+  store i8 %conv1, i8* %add.ptr, align 1, !tbaa !0
+  ret void
+}
+
+define void @memop_signed_char_add_index(i8* nocapture %p, i32 %i, i8 signext %x) nounwind {
+entry:
+; CHECK:  memb(r{{[0-9]+}}{{ *}}+{{ *}}#0){{ *}}+={{ *}}r{{[0-9]+}}
+  %conv4 = zext i8 %x to i32
+  %add.ptr = getelementptr inbounds i8* %p, i32 %i
+  %0 = load i8* %add.ptr, align 1, !tbaa !0
+  %conv13 = zext i8 %0 to i32
+  %add = add nsw i32 %conv13, %conv4
+  %conv2 = trunc i32 %add to i8
+  store i8 %conv2, i8* %add.ptr, align 1, !tbaa !0
+  ret void
+}
+
+define void @memop_signed_char_sub_index(i8* nocapture %p, i32 %i, i8 signext %x) nounwind {
+entry:
+; CHECK:  memb(r{{[0-9]+}}{{ *}}+{{ *}}#0){{ *}}-={{ *}}r{{[0-9]+}}
+  %conv4 = zext i8 %x to i32
+  %add.ptr = getelementptr inbounds i8* %p, i32 %i
+  %0 = load i8* %add.ptr, align 1, !tbaa !0
+  %conv13 = zext i8 %0 to i32
+  %sub = sub nsw i32 %conv13, %conv4
+  %conv2 = trunc i32 %sub to i8
+  store i8 %conv2, i8* %add.ptr, align 1, !tbaa !0
+  ret void
+}
+
+define void @memop_signed_char_or_index(i8* nocapture %p, i32 %i, i8 signext %x) nounwind {
+entry:
+; CHECK:  memb(r{{[0-9]+}}{{ *}}+{{ *}}#0){{ *}}|={{ *}}r{{[0-9]+}}
+  %add.ptr = getelementptr inbounds i8* %p, i32 %i
+  %0 = load i8* %add.ptr, align 1, !tbaa !0
+  %or3 = or i8 %0, %x
+  store i8 %or3, i8* %add.ptr, align 1, !tbaa !0
+  ret void
+}
+
+define void @memop_signed_char_and_index(i8* nocapture %p, i32 %i, i8 signext %x) nounwind {
+entry:
+; CHECK:  memb(r{{[0-9]+}}{{ *}}+{{ *}}#0){{ *}}&={{ *}}r{{[0-9]+}}
+  %add.ptr = getelementptr inbounds i8* %p, i32 %i
+  %0 = load i8* %add.ptr, align 1, !tbaa !0
+  %and3 = and i8 %0, %x
+  store i8 %and3, i8* %add.ptr, align 1, !tbaa !0
+  ret void
+}
+
+define void @memop_signed_char_clrbit_index(i8* nocapture %p, i32 %i) nounwind {
+entry:
+; CHECK:  memb(r{{[0-9]+}}{{ *}}+{{ *}}#0){{ *}}={{ *}}clrbit({{ *}}#5{{ *}})
+  %add.ptr = getelementptr inbounds i8* %p, i32 %i
+  %0 = load i8* %add.ptr, align 1, !tbaa !0
+  %conv2 = zext i8 %0 to i32
+  %and = and i32 %conv2, 223
+  %conv1 = trunc i32 %and to i8
+  store i8 %conv1, i8* %add.ptr, align 1, !tbaa !0
+  ret void
+}
+
+define void @memop_signed_char_setbit_index(i8* nocapture %p, i32 %i) nounwind {
+entry:
+; CHECK:  memb(r{{[0-9]+}}{{ *}}+{{ *}}#0){{ *}}={{ *}}setbit({{ *}}#7{{ *}})
+  %add.ptr = getelementptr inbounds i8* %p, i32 %i
+  %0 = load i8* %add.ptr, align 1, !tbaa !0
+  %conv2 = zext i8 %0 to i32
+  %or = or i32 %conv2, 128
+  %conv1 = trunc i32 %or to i8
+  store i8 %conv1, i8* %add.ptr, align 1, !tbaa !0
+  ret void
+}
+
+define void @memop_signed_char_add5_index5(i8* nocapture %p) nounwind {
+entry:
+; CHECK:  memb(r{{[0-9]+}}{{ *}}+{{ *}}#5){{ *}}+={{ *}}#5
+  %add.ptr = getelementptr inbounds i8* %p, i32 5
+  %0 = load i8* %add.ptr, align 1, !tbaa !0
+  %conv2 = zext i8 %0 to i32
+  %add = add nsw i32 %conv2, 5
+  %conv1 = trunc i32 %add to i8
+  store i8 %conv1, i8* %add.ptr, align 1, !tbaa !0
+  ret void
+}
+
+define void @memop_signed_char_add_index5(i8* nocapture %p, i8 signext %x) nounwind {
+entry:
+; CHECK:  memb(r{{[0-9]+}}{{ *}}+{{ *}}#5){{ *}}+={{ *}}r{{[0-9]+}}
+  %conv4 = zext i8 %x to i32
+  %add.ptr = getelementptr inbounds i8* %p, i32 5
+  %0 = load i8* %add.ptr, align 1, !tbaa !0
+  %conv13 = zext i8 %0 to i32
+  %add = add nsw i32 %conv13, %conv4
+  %conv2 = trunc i32 %add to i8
+  store i8 %conv2, i8* %add.ptr, align 1, !tbaa !0
+  ret void
+}
+
+define void @memop_signed_char_sub_index5(i8* nocapture %p, i8 signext %x) nounwind {
+entry:
+; CHECK:  memb(r{{[0-9]+}}{{ *}}+{{ *}}#5){{ *}}-={{ *}}r{{[0-9]+}}
+  %conv4 = zext i8 %x to i32
+  %add.ptr = getelementptr inbounds i8* %p, i32 5
+  %0 = load i8* %add.ptr, align 1, !tbaa !0
+  %conv13 = zext i8 %0 to i32
+  %sub = sub nsw i32 %conv13, %conv4
+  %conv2 = trunc i32 %sub to i8
+  store i8 %conv2, i8* %add.ptr, align 1, !tbaa !0
+  ret void
+}
+
+define void @memop_signed_char_or_index5(i8* nocapture %p, i8 signext %x) nounwind {
+entry:
+; CHECK:  memb(r{{[0-9]+}}{{ *}}+{{ *}}#5){{ *}}|={{ *}}r{{[0-9]+}}
+  %add.ptr = getelementptr inbounds i8* %p, i32 5
+  %0 = load i8* %add.ptr, align 1, !tbaa !0
+  %or3 = or i8 %0, %x
+  store i8 %or3, i8* %add.ptr, align 1, !tbaa !0
+  ret void
+}
+
+define void @memop_signed_char_and_index5(i8* nocapture %p, i8 signext %x) nounwind {
+entry:
+; CHECK:  memb(r{{[0-9]+}}{{ *}}+{{ *}}#5){{ *}}&={{ *}}r{{[0-9]+}}
+  %add.ptr = getelementptr inbounds i8* %p, i32 5
+  %0 = load i8* %add.ptr, align 1, !tbaa !0
+  %and3 = and i8 %0, %x
+  store i8 %and3, i8* %add.ptr, align 1, !tbaa !0
+  ret void
+}
+
+define void @memop_signed_char_clrbit_index5(i8* nocapture %p) nounwind {
+entry:
+; CHECK:  memb(r{{[0-9]+}}{{ *}}+{{ *}}#5){{ *}}={{ *}}clrbit({{ *}}#5{{ *}})
+  %add.ptr = getelementptr inbounds i8* %p, i32 5
+  %0 = load i8* %add.ptr, align 1, !tbaa !0
+  %conv2 = zext i8 %0 to i32
+  %and = and i32 %conv2, 223
+  %conv1 = trunc i32 %and to i8
+  store i8 %conv1, i8* %add.ptr, align 1, !tbaa !0
+  ret void
+}
+
+define void @memop_signed_char_setbit_index5(i8* nocapture %p) nounwind {
+entry:
+; CHECK:  memb(r{{[0-9]+}}{{ *}}+{{ *}}#5){{ *}}={{ *}}setbit({{ *}}#7{{ *}})
+  %add.ptr = getelementptr inbounds i8* %p, i32 5
+  %0 = load i8* %add.ptr, align 1, !tbaa !0
+  %conv2 = zext i8 %0 to i32
+  %or = or i32 %conv2, 128
+  %conv1 = trunc i32 %or to i8
+  store i8 %conv1, i8* %add.ptr, align 1, !tbaa !0
+  ret void
+}
+
+define void @memop_unsigned_short_add5(i16* nocapture %p) nounwind {
+entry:
+; CHECK:  memh(r{{[0-9]+}}{{ *}}+{{ *}}#0){{ *}}+={{ *}}#5
+  %0 = load i16* %p, align 2, !tbaa !2
+  %conv = zext i16 %0 to i32
+  %add = add nsw i32 %conv, 5
+  %conv1 = trunc i32 %add to i16
+  store i16 %conv1, i16* %p, align 2, !tbaa !2
+  ret void
+}
+
+define void @memop_unsigned_short_add(i16* nocapture %p, i16 zeroext %x) nounwind {
+entry:
+; CHECK:  memh(r{{[0-9]+}}{{ *}}+{{ *}}#0){{ *}}+={{ *}}r{{[0-9]+}}
+  %conv = zext i16 %x to i32
+  %0 = load i16* %p, align 2, !tbaa !2
+  %conv1 = zext i16 %0 to i32
+  %add = add nsw i32 %conv1, %conv
+  %conv2 = trunc i32 %add to i16
+  store i16 %conv2, i16* %p, align 2, !tbaa !2
+  ret void
+}
+
+define void @memop_unsigned_short_sub(i16* nocapture %p, i16 zeroext %x) nounwind {
+entry:
+; CHECK:  memh(r{{[0-9]+}}{{ *}}+{{ *}}#0){{ *}}-={{ *}}r{{[0-9]+}}
+  %conv = zext i16 %x to i32
+  %0 = load i16* %p, align 2, !tbaa !2
+  %conv1 = zext i16 %0 to i32
+  %sub = sub nsw i32 %conv1, %conv
+  %conv2 = trunc i32 %sub to i16
+  store i16 %conv2, i16* %p, align 2, !tbaa !2
+  ret void
+}
+
+define void @memop_unsigned_short_or(i16* nocapture %p, i16 zeroext %x) nounwind {
+entry:
+; CHECK:  memh(r{{[0-9]+}}{{ *}}+{{ *}}#0){{ *}}|={{ *}}r{{[0-9]+}}
+  %0 = load i16* %p, align 2, !tbaa !2
+  %or3 = or i16 %0, %x
+  store i16 %or3, i16* %p, align 2, !tbaa !2
+  ret void
+}
+
+define void @memop_unsigned_short_and(i16* nocapture %p, i16 zeroext %x) nounwind {
+entry:
+; CHECK:  memh(r{{[0-9]+}}{{ *}}+{{ *}}#0){{ *}}&={{ *}}r{{[0-9]+}}
+  %0 = load i16* %p, align 2, !tbaa !2
+  %and3 = and i16 %0, %x
+  store i16 %and3, i16* %p, align 2, !tbaa !2
+  ret void
+}
+
+define void @memop_unsigned_short_clrbit(i16* nocapture %p) nounwind {
+entry:
+; CHECK:  memh(r{{[0-9]+}}{{ *}}+{{ *}}#0){{ *}}={{ *}}clrbit({{ *}}#5{{ *}})
+  %0 = load i16* %p, align 2, !tbaa !2
+  %conv = zext i16 %0 to i32
+  %and = and i32 %conv, 65503
+  %conv1 = trunc i32 %and to i16
+  store i16 %conv1, i16* %p, align 2, !tbaa !2
+  ret void
+}
+
+define void @memop_unsigned_short_setbit(i16* nocapture %p) nounwind {
+entry:
+; CHECK:  memh(r{{[0-9]+}}{{ *}}+{{ *}}#0){{ *}}={{ *}}setbit({{ *}}#7{{ *}})
+  %0 = load i16* %p, align 2, !tbaa !2
+  %conv = zext i16 %0 to i32
+  %or = or i32 %conv, 128
+  %conv1 = trunc i32 %or to i16
+  store i16 %conv1, i16* %p, align 2, !tbaa !2
+  ret void
+}
+
+define void @memop_unsigned_short_add5_index(i16* nocapture %p, i32 %i) nounwind {
+entry:
+; CHECK:  memh(r{{[0-9]+}}{{ *}}+{{ *}}#0){{ *}}+={{ *}}#5
+  %add.ptr = getelementptr inbounds i16* %p, i32 %i
+  %0 = load i16* %add.ptr, align 2, !tbaa !2
+  %conv = zext i16 %0 to i32
+  %add = add nsw i32 %conv, 5
+  %conv1 = trunc i32 %add to i16
+  store i16 %conv1, i16* %add.ptr, align 2, !tbaa !2
+  ret void
+}
+
+define void @memop_unsigned_short_add_index(i16* nocapture %p, i32 %i, i16 zeroext %x) nounwind {
+entry:
+; CHECK:  memh(r{{[0-9]+}}{{ *}}+{{ *}}#0){{ *}}+={{ *}}r{{[0-9]+}}
+  %conv = zext i16 %x to i32
+  %add.ptr = getelementptr inbounds i16* %p, i32 %i
+  %0 = load i16* %add.ptr, align 2, !tbaa !2
+  %conv1 = zext i16 %0 to i32
+  %add = add nsw i32 %conv1, %conv
+  %conv2 = trunc i32 %add to i16
+  store i16 %conv2, i16* %add.ptr, align 2, !tbaa !2
+  ret void
+}
+
+define void @memop_unsigned_short_sub_index(i16* nocapture %p, i32 %i, i16 zeroext %x) nounwind {
+entry:
+; CHECK:  memh(r{{[0-9]+}}{{ *}}+{{ *}}#0){{ *}}-={{ *}}r{{[0-9]+}}
+  %conv = zext i16 %x to i32
+  %add.ptr = getelementptr inbounds i16* %p, i32 %i
+  %0 = load i16* %add.ptr, align 2, !tbaa !2
+  %conv1 = zext i16 %0 to i32
+  %sub = sub nsw i32 %conv1, %conv
+  %conv2 = trunc i32 %sub to i16
+  store i16 %conv2, i16* %add.ptr, align 2, !tbaa !2
+  ret void
+}
+
+define void @memop_unsigned_short_or_index(i16* nocapture %p, i32 %i, i16 zeroext %x) nounwind {
+entry:
+; CHECK:  memh(r{{[0-9]+}}{{ *}}+{{ *}}#0){{ *}}|={{ *}}r{{[0-9]+}}
+  %add.ptr = getelementptr inbounds i16* %p, i32 %i
+  %0 = load i16* %add.ptr, align 2, !tbaa !2
+  %or3 = or i16 %0, %x
+  store i16 %or3, i16* %add.ptr, align 2, !tbaa !2
+  ret void
+}
+
+define void @memop_unsigned_short_and_index(i16* nocapture %p, i32 %i, i16 zeroext %x) nounwind {
+entry:
+; CHECK:  memh(r{{[0-9]+}}{{ *}}+{{ *}}#0){{ *}}&={{ *}}r{{[0-9]+}}
+  %add.ptr = getelementptr inbounds i16* %p, i32 %i
+  %0 = load i16* %add.ptr, align 2, !tbaa !2
+  %and3 = and i16 %0, %x
+  store i16 %and3, i16* %add.ptr, align 2, !tbaa !2
+  ret void
+}
+
+define void @memop_unsigned_short_clrbit_index(i16* nocapture %p, i32 %i) nounwind {
+entry:
+; CHECK:  memh(r{{[0-9]+}}{{ *}}+{{ *}}#0){{ *}}={{ *}}clrbit({{ *}}#5{{ *}})
+  %add.ptr = getelementptr inbounds i16* %p, i32 %i
+  %0 = load i16* %add.ptr, align 2, !tbaa !2
+  %conv = zext i16 %0 to i32
+  %and = and i32 %conv, 65503
+  %conv1 = trunc i32 %and to i16
+  store i16 %conv1, i16* %add.ptr, align 2, !tbaa !2
+  ret void
+}
+
+define void @memop_unsigned_short_setbit_index(i16* nocapture %p, i32 %i) nounwind {
+entry:
+; CHECK:  memh(r{{[0-9]+}}{{ *}}+{{ *}}#0){{ *}}={{ *}}setbit({{ *}}#7{{ *}})
+  %add.ptr = getelementptr inbounds i16* %p, i32 %i
+  %0 = load i16* %add.ptr, align 2, !tbaa !2
+  %conv = zext i16 %0 to i32
+  %or = or i32 %conv, 128
+  %conv1 = trunc i32 %or to i16
+  store i16 %conv1, i16* %add.ptr, align 2, !tbaa !2
+  ret void
+}
+
+define void @memop_unsigned_short_add5_index5(i16* nocapture %p) nounwind {
+entry:
+; CHECK:  memh(r{{[0-9]+}}{{ *}}+{{ *}}#10){{ *}}+={{ *}}#5
+  %add.ptr = getelementptr inbounds i16* %p, i32 5
+  %0 = load i16* %add.ptr, align 2, !tbaa !2
+  %conv = zext i16 %0 to i32
+  %add = add nsw i32 %conv, 5
+  %conv1 = trunc i32 %add to i16
+  store i16 %conv1, i16* %add.ptr, align 2, !tbaa !2
+  ret void
+}
+
+define void @memop_unsigned_short_add_index5(i16* nocapture %p, i16 zeroext %x) nounwind {
+entry:
+; CHECK:  memh(r{{[0-9]+}}{{ *}}+{{ *}}#10){{ *}}+={{ *}}r{{[0-9]+}}
+  %conv = zext i16 %x to i32
+  %add.ptr = getelementptr inbounds i16* %p, i32 5
+  %0 = load i16* %add.ptr, align 2, !tbaa !2
+  %conv1 = zext i16 %0 to i32
+  %add = add nsw i32 %conv1, %conv
+  %conv2 = trunc i32 %add to i16
+  store i16 %conv2, i16* %add.ptr, align 2, !tbaa !2
+  ret void
+}
+
+define void @memop_unsigned_short_sub_index5(i16* nocapture %p, i16 zeroext %x) nounwind {
+entry:
+; CHECK:  memh(r{{[0-9]+}}{{ *}}+{{ *}}#10){{ *}}-={{ *}}r{{[0-9]+}}
+  %conv = zext i16 %x to i32
+  %add.ptr = getelementptr inbounds i16* %p, i32 5
+  %0 = load i16* %add.ptr, align 2, !tbaa !2
+  %conv1 = zext i16 %0 to i32
+  %sub = sub nsw i32 %conv1, %conv
+  %conv2 = trunc i32 %sub to i16
+  store i16 %conv2, i16* %add.ptr, align 2, !tbaa !2
+  ret void
+}
+
+define void @memop_unsigned_short_or_index5(i16* nocapture %p, i16 zeroext %x) nounwind {
+entry:
+; CHECK:  memh(r{{[0-9]+}}{{ *}}+{{ *}}#10){{ *}}|={{ *}}r{{[0-9]+}}
+  %add.ptr = getelementptr inbounds i16* %p, i32 5
+  %0 = load i16* %add.ptr, align 2, !tbaa !2
+  %or3 = or i16 %0, %x
+  store i16 %or3, i16* %add.ptr, align 2, !tbaa !2
+  ret void
+}
+
+define void @memop_unsigned_short_and_index5(i16* nocapture %p, i16 zeroext %x) nounwind {
+entry:
+; CHECK:  memh(r{{[0-9]+}}{{ *}}+{{ *}}#10){{ *}}&={{ *}}r{{[0-9]+}}
+  %add.ptr = getelementptr inbounds i16* %p, i32 5
+  %0 = load i16* %add.ptr, align 2, !tbaa !2
+  %and3 = and i16 %0, %x
+  store i16 %and3, i16* %add.ptr, align 2, !tbaa !2
+  ret void
+}
+
+define void @memop_unsigned_short_clrbit_index5(i16* nocapture %p) nounwind {
+entry:
+; CHECK:  memh(r{{[0-9]+}}{{ *}}+{{ *}}#10){{ *}}={{ *}}clrbit({{ *}}#5{{ *}})
+  %add.ptr = getelementptr inbounds i16* %p, i32 5
+  %0 = load i16* %add.ptr, align 2, !tbaa !2
+  %conv = zext i16 %0 to i32
+  %and = and i32 %conv, 65503
+  %conv1 = trunc i32 %and to i16
+  store i16 %conv1, i16* %add.ptr, align 2, !tbaa !2
+  ret void
+}
+
+define void @memop_unsigned_short_setbit_index5(i16* nocapture %p) nounwind {
+entry:
+; CHECK:  memh(r{{[0-9]+}}{{ *}}+{{ *}}#10){{ *}}={{ *}}setbit({{ *}}#7{{ *}})
+  %add.ptr = getelementptr inbounds i16* %p, i32 5
+  %0 = load i16* %add.ptr, align 2, !tbaa !2
+  %conv = zext i16 %0 to i32
+  %or = or i32 %conv, 128
+  %conv1 = trunc i32 %or to i16
+  store i16 %conv1, i16* %add.ptr, align 2, !tbaa !2
+  ret void
+}
+
+define void @memop_signed_short_add5(i16* nocapture %p) nounwind {
+entry:
+; CHECK:  memh(r{{[0-9]+}}{{ *}}+{{ *}}#0){{ *}}+={{ *}}#5
+  %0 = load i16* %p, align 2, !tbaa !2
+  %conv2 = zext i16 %0 to i32
+  %add = add nsw i32 %conv2, 5
+  %conv1 = trunc i32 %add to i16
+  store i16 %conv1, i16* %p, align 2, !tbaa !2
+  ret void
+}
+
+define void @memop_signed_short_add(i16* nocapture %p, i16 signext %x) nounwind {
+entry:
+; CHECK:  memh(r{{[0-9]+}}{{ *}}+{{ *}}#0){{ *}}+={{ *}}r{{[0-9]+}}
+  %conv4 = zext i16 %x to i32
+  %0 = load i16* %p, align 2, !tbaa !2
+  %conv13 = zext i16 %0 to i32
+  %add = add nsw i32 %conv13, %conv4
+  %conv2 = trunc i32 %add to i16
+  store i16 %conv2, i16* %p, align 2, !tbaa !2
+  ret void
+}
+
+define void @memop_signed_short_sub(i16* nocapture %p, i16 signext %x) nounwind {
+entry:
+; CHECK:  memh(r{{[0-9]+}}{{ *}}+{{ *}}#0){{ *}}-={{ *}}r{{[0-9]+}}
+  %conv4 = zext i16 %x to i32
+  %0 = load i16* %p, align 2, !tbaa !2
+  %conv13 = zext i16 %0 to i32
+  %sub = sub nsw i32 %conv13, %conv4
+  %conv2 = trunc i32 %sub to i16
+  store i16 %conv2, i16* %p, align 2, !tbaa !2
+  ret void
+}
+
+define void @memop_signed_short_or(i16* nocapture %p, i16 signext %x) nounwind {
+entry:
+; CHECK:  memh(r{{[0-9]+}}{{ *}}+{{ *}}#0){{ *}}|={{ *}}r{{[0-9]+}}
+  %0 = load i16* %p, align 2, !tbaa !2
+  %or3 = or i16 %0, %x
+  store i16 %or3, i16* %p, align 2, !tbaa !2
+  ret void
+}
+
+define void @memop_signed_short_and(i16* nocapture %p, i16 signext %x) nounwind {
+entry:
+; CHECK:  memh(r{{[0-9]+}}{{ *}}+{{ *}}#0){{ *}}&={{ *}}r{{[0-9]+}}
+  %0 = load i16* %p, align 2, !tbaa !2
+  %and3 = and i16 %0, %x
+  store i16 %and3, i16* %p, align 2, !tbaa !2
+  ret void
+}
+
+define void @memop_signed_short_clrbit(i16* nocapture %p) nounwind {
+entry:
+; CHECK:  memh(r{{[0-9]+}}{{ *}}+{{ *}}#0){{ *}}={{ *}}clrbit({{ *}}#5{{ *}})
+  %0 = load i16* %p, align 2, !tbaa !2
+  %conv2 = zext i16 %0 to i32
+  %and = and i32 %conv2, 65503
+  %conv1 = trunc i32 %and to i16
+  store i16 %conv1, i16* %p, align 2, !tbaa !2
+  ret void
+}
+
+define void @memop_signed_short_setbit(i16* nocapture %p) nounwind {
+entry:
+; CHECK:  memh(r{{[0-9]+}}{{ *}}+{{ *}}#0){{ *}}={{ *}}setbit({{ *}}#7{{ *}})
+  %0 = load i16* %p, align 2, !tbaa !2
+  %conv2 = zext i16 %0 to i32
+  %or = or i32 %conv2, 128
+  %conv1 = trunc i32 %or to i16
+  store i16 %conv1, i16* %p, align 2, !tbaa !2
+  ret void
+}
+
+define void @memop_signed_short_add5_index(i16* nocapture %p, i32 %i) nounwind {
+entry:
+; CHECK:  memh(r{{[0-9]+}}{{ *}}+{{ *}}#0){{ *}}+={{ *}}#5
+  %add.ptr = getelementptr inbounds i16* %p, i32 %i
+  %0 = load i16* %add.ptr, align 2, !tbaa !2
+  %conv2 = zext i16 %0 to i32
+  %add = add nsw i32 %conv2, 5
+  %conv1 = trunc i32 %add to i16
+  store i16 %conv1, i16* %add.ptr, align 2, !tbaa !2
+  ret void
+}
+
+define void @memop_signed_short_add_index(i16* nocapture %p, i32 %i, i16 signext %x) nounwind {
+entry:
+; CHECK:  memh(r{{[0-9]+}}{{ *}}+{{ *}}#0){{ *}}+={{ *}}r{{[0-9]+}}
+  %conv4 = zext i16 %x to i32
+  %add.ptr = getelementptr inbounds i16* %p, i32 %i
+  %0 = load i16* %add.ptr, align 2, !tbaa !2
+  %conv13 = zext i16 %0 to i32
+  %add = add nsw i32 %conv13, %conv4
+  %conv2 = trunc i32 %add to i16
+  store i16 %conv2, i16* %add.ptr, align 2, !tbaa !2
+  ret void
+}
+
+define void @memop_signed_short_sub_index(i16* nocapture %p, i32 %i, i16 signext %x) nounwind {
+entry:
+; CHECK:  memh(r{{[0-9]+}}{{ *}}+{{ *}}#0){{ *}}-={{ *}}r{{[0-9]+}}
+  %conv4 = zext i16 %x to i32
+  %add.ptr = getelementptr inbounds i16* %p, i32 %i
+  %0 = load i16* %add.ptr, align 2, !tbaa !2
+  %conv13 = zext i16 %0 to i32
+  %sub = sub nsw i32 %conv13, %conv4
+  %conv2 = trunc i32 %sub to i16
+  store i16 %conv2, i16* %add.ptr, align 2, !tbaa !2
+  ret void
+}
+
+define void @memop_signed_short_or_index(i16* nocapture %p, i32 %i, i16 signext %x) nounwind {
+entry:
+; CHECK:  memh(r{{[0-9]+}}{{ *}}+{{ *}}#0){{ *}}|={{ *}}r{{[0-9]+}}
+  %add.ptr = getelementptr inbounds i16* %p, i32 %i
+  %0 = load i16* %add.ptr, align 2, !tbaa !2
+  %or3 = or i16 %0, %x
+  store i16 %or3, i16* %add.ptr, align 2, !tbaa !2
+  ret void
+}
+
+define void @memop_signed_short_and_index(i16* nocapture %p, i32 %i, i16 signext %x) nounwind {
+entry:
+; CHECK:  memh(r{{[0-9]+}}{{ *}}+{{ *}}#0){{ *}}&={{ *}}r{{[0-9]+}}
+  %add.ptr = getelementptr inbounds i16* %p, i32 %i
+  %0 = load i16* %add.ptr, align 2, !tbaa !2
+  %and3 = and i16 %0, %x
+  store i16 %and3, i16* %add.ptr, align 2, !tbaa !2
+  ret void
+}
+
+define void @memop_signed_short_clrbit_index(i16* nocapture %p, i32 %i) nounwind {
+entry:
+; CHECK:  memh(r{{[0-9]+}}{{ *}}+{{ *}}#0){{ *}}={{ *}}clrbit({{ *}}#5{{ *}})
+  %add.ptr = getelementptr inbounds i16* %p, i32 %i
+  %0 = load i16* %add.ptr, align 2, !tbaa !2
+  %conv2 = zext i16 %0 to i32
+  %and = and i32 %conv2, 65503
+  %conv1 = trunc i32 %and to i16
+  store i16 %conv1, i16* %add.ptr, align 2, !tbaa !2
+  ret void
+}
+
+define void @memop_signed_short_setbit_index(i16* nocapture %p, i32 %i) nounwind {
+entry:
+; CHECK:  memh(r{{[0-9]+}}{{ *}}+{{ *}}#0){{ *}}={{ *}}setbit({{ *}}#7{{ *}})
+  %add.ptr = getelementptr inbounds i16* %p, i32 %i
+  %0 = load i16* %add.ptr, align 2, !tbaa !2
+  %conv2 = zext i16 %0 to i32
+  %or = or i32 %conv2, 128
+  %conv1 = trunc i32 %or to i16
+  store i16 %conv1, i16* %add.ptr, align 2, !tbaa !2
+  ret void
+}
+
+define void @memop_signed_short_add5_index5(i16* nocapture %p) nounwind {
+entry:
+; CHECK:  memh(r{{[0-9]+}}{{ *}}+{{ *}}#10){{ *}}+={{ *}}#5
+  %add.ptr = getelementptr inbounds i16* %p, i32 5
+  %0 = load i16* %add.ptr, align 2, !tbaa !2
+  %conv2 = zext i16 %0 to i32
+  %add = add nsw i32 %conv2, 5
+  %conv1 = trunc i32 %add to i16
+  store i16 %conv1, i16* %add.ptr, align 2, !tbaa !2
+  ret void
+}
+
+define void @memop_signed_short_add_index5(i16* nocapture %p, i16 signext %x) nounwind {
+entry:
+; CHECK:  memh(r{{[0-9]+}}{{ *}}+{{ *}}#10){{ *}}+={{ *}}r{{[0-9]+}}
+  %conv4 = zext i16 %x to i32
+  %add.ptr = getelementptr inbounds i16* %p, i32 5
+  %0 = load i16* %add.ptr, align 2, !tbaa !2
+  %conv13 = zext i16 %0 to i32
+  %add = add nsw i32 %conv13, %conv4
+  %conv2 = trunc i32 %add to i16
+  store i16 %conv2, i16* %add.ptr, align 2, !tbaa !2
+  ret void
+}
+
+define void @memop_signed_short_sub_index5(i16* nocapture %p, i16 signext %x) nounwind {
+entry:
+; CHECK:  memh(r{{[0-9]+}}{{ *}}+{{ *}}#10){{ *}}-={{ *}}r{{[0-9]+}}
+  %conv4 = zext i16 %x to i32
+  %add.ptr = getelementptr inbounds i16* %p, i32 5
+  %0 = load i16* %add.ptr, align 2, !tbaa !2
+  %conv13 = zext i16 %0 to i32
+  %sub = sub nsw i32 %conv13, %conv4
+  %conv2 = trunc i32 %sub to i16
+  store i16 %conv2, i16* %add.ptr, align 2, !tbaa !2
+  ret void
+}
+
+define void @memop_signed_short_or_index5(i16* nocapture %p, i16 signext %x) nounwind {
+entry:
+; CHECK:  memh(r{{[0-9]+}}{{ *}}+{{ *}}#10){{ *}}|={{ *}}r{{[0-9]+}}
+  %add.ptr = getelementptr inbounds i16* %p, i32 5
+  %0 = load i16* %add.ptr, align 2, !tbaa !2
+  %or3 = or i16 %0, %x
+  store i16 %or3, i16* %add.ptr, align 2, !tbaa !2
+  ret void
+}
+
+define void @memop_signed_short_and_index5(i16* nocapture %p, i16 signext %x) nounwind {
+entry:
+; CHECK:  memh(r{{[0-9]+}}{{ *}}+{{ *}}#10){{ *}}&={{ *}}r{{[0-9]+}}
+  %add.ptr = getelementptr inbounds i16* %p, i32 5
+  %0 = load i16* %add.ptr, align 2, !tbaa !2
+  %and3 = and i16 %0, %x
+  store i16 %and3, i16* %add.ptr, align 2, !tbaa !2
+  ret void
+}
+
+define void @memop_signed_short_clrbit_index5(i16* nocapture %p) nounwind {
+entry:
+; CHECK:  memh(r{{[0-9]+}}{{ *}}+{{ *}}#10){{ *}}={{ *}}clrbit({{ *}}#5{{ *}})
+  %add.ptr = getelementptr inbounds i16* %p, i32 5
+  %0 = load i16* %add.ptr, align 2, !tbaa !2
+  %conv2 = zext i16 %0 to i32
+  %and = and i32 %conv2, 65503
+  %conv1 = trunc i32 %and to i16
+  store i16 %conv1, i16* %add.ptr, align 2, !tbaa !2
+  ret void
+}
+
+define void @memop_signed_short_setbit_index5(i16* nocapture %p) nounwind {
+entry:
+; CHECK:  memh(r{{[0-9]+}}{{ *}}+{{ *}}#10){{ *}}={{ *}}setbit({{ *}}#7{{ *}})
+  %add.ptr = getelementptr inbounds i16* %p, i32 5
+  %0 = load i16* %add.ptr, align 2, !tbaa !2
+  %conv2 = zext i16 %0 to i32
+  %or = or i32 %conv2, 128
+  %conv1 = trunc i32 %or to i16
+  store i16 %conv1, i16* %add.ptr, align 2, !tbaa !2
+  ret void
+}
+
+define void @memop_signed_int_add5(i32* nocapture %p) nounwind {
+entry:
+; CHECK:  memw(r{{[0-9]+}}{{ *}}+{{ *}}#0){{ *}}+={{ *}}#5
+  %0 = load i32* %p, align 4, !tbaa !3
+  %add = add i32 %0, 5
+  store i32 %add, i32* %p, align 4, !tbaa !3
+  ret void
+}
+
+define void @memop_signed_int_add(i32* nocapture %p, i32 %x) nounwind {
+entry:
+; CHECK:  memw(r{{[0-9]+}}{{ *}}+{{ *}}#0){{ *}}+={{ *}}r{{[0-9]+}}
+  %0 = load i32* %p, align 4, !tbaa !3
+  %add = add i32 %0, %x
+  store i32 %add, i32* %p, align 4, !tbaa !3
+  ret void
+}
+
+define void @memop_signed_int_sub(i32* nocapture %p, i32 %x) nounwind {
+entry:
+; CHECK:  memw(r{{[0-9]+}}{{ *}}+{{ *}}#0){{ *}}-={{ *}}r{{[0-9]+}}
+  %0 = load i32* %p, align 4, !tbaa !3
+  %sub = sub i32 %0, %x
+  store i32 %sub, i32* %p, align 4, !tbaa !3
+  ret void
+}
+
+define void @memop_signed_int_or(i32* nocapture %p, i32 %x) nounwind {
+entry:
+; CHECK:  memw(r{{[0-9]+}}{{ *}}+{{ *}}#0){{ *}}|={{ *}}r{{[0-9]+}}
+  %0 = load i32* %p, align 4, !tbaa !3
+  %or = or i32 %0, %x
+  store i32 %or, i32* %p, align 4, !tbaa !3
+  ret void
+}
+
+define void @memop_signed_int_and(i32* nocapture %p, i32 %x) nounwind {
+entry:
+; CHECK:  memw(r{{[0-9]+}}{{ *}}+{{ *}}#0){{ *}}&={{ *}}r{{[0-9]+}}
+  %0 = load i32* %p, align 4, !tbaa !3
+  %and = and i32 %0, %x
+  store i32 %and, i32* %p, align 4, !tbaa !3
+  ret void
+}
+
+define void @memop_signed_int_clrbit(i32* nocapture %p) nounwind {
+entry:
+; CHECK:  memw(r{{[0-9]+}}{{ *}}+{{ *}}#0){{ *}}={{ *}}clrbit({{ *}}#5{{ *}})
+  %0 = load i32* %p, align 4, !tbaa !3
+  %and = and i32 %0, -33
+  store i32 %and, i32* %p, align 4, !tbaa !3
+  ret void
+}
+
+define void @memop_signed_int_setbit(i32* nocapture %p) nounwind {
+entry:
+; CHECK:  memw(r{{[0-9]+}}{{ *}}+{{ *}}#0){{ *}}={{ *}}setbit({{ *}}#7{{ *}})
+  %0 = load i32* %p, align 4, !tbaa !3
+  %or = or i32 %0, 128
+  store i32 %or, i32* %p, align 4, !tbaa !3
+  ret void
+}
+
+define void @memop_signed_int_add5_index(i32* nocapture %p, i32 %i) nounwind {
+entry:
+; CHECK:  memw(r{{[0-9]+}}{{ *}}+{{ *}}#0){{ *}}+={{ *}}#5
+  %add.ptr = getelementptr inbounds i32* %p, i32 %i
+  %0 = load i32* %add.ptr, align 4, !tbaa !3
+  %add = add i32 %0, 5
+  store i32 %add, i32* %add.ptr, align 4, !tbaa !3
+  ret void
+}
+
+define void @memop_signed_int_add_index(i32* nocapture %p, i32 %i, i32 %x) nounwind {
+entry:
+; CHECK:  memw(r{{[0-9]+}}{{ *}}+{{ *}}#0){{ *}}+={{ *}}r{{[0-9]+}}
+  %add.ptr = getelementptr inbounds i32* %p, i32 %i
+  %0 = load i32* %add.ptr, align 4, !tbaa !3
+  %add = add i32 %0, %x
+  store i32 %add, i32* %add.ptr, align 4, !tbaa !3
+  ret void
+}
+
+define void @memop_signed_int_sub_index(i32* nocapture %p, i32 %i, i32 %x) nounwind {
+entry:
+; CHECK:  memw(r{{[0-9]+}}{{ *}}+{{ *}}#0){{ *}}-={{ *}}r{{[0-9]+}}
+  %add.ptr = getelementptr inbounds i32* %p, i32 %i
+  %0 = load i32* %add.ptr, align 4, !tbaa !3
+  %sub = sub i32 %0, %x
+  store i32 %sub, i32* %add.ptr, align 4, !tbaa !3
+  ret void
+}
+
+define void @memop_signed_int_or_index(i32* nocapture %p, i32 %i, i32 %x) nounwind {
+entry:
+; CHECK:  memw(r{{[0-9]+}}{{ *}}+{{ *}}#0){{ *}}|={{ *}}r{{[0-9]+}}
+  %add.ptr = getelementptr inbounds i32* %p, i32 %i
+  %0 = load i32* %add.ptr, align 4, !tbaa !3
+  %or = or i32 %0, %x
+  store i32 %or, i32* %add.ptr, align 4, !tbaa !3
+  ret void
+}
+
+define void @memop_signed_int_and_index(i32* nocapture %p, i32 %i, i32 %x) nounwind {
+entry:
+; CHECK:  memw(r{{[0-9]+}}{{ *}}+{{ *}}#0){{ *}}&={{ *}}r{{[0-9]+}}
+  %add.ptr = getelementptr inbounds i32* %p, i32 %i
+  %0 = load i32* %add.ptr, align 4, !tbaa !3
+  %and = and i32 %0, %x
+  store i32 %and, i32* %add.ptr, align 4, !tbaa !3
+  ret void
+}
+
+define void @memop_signed_int_clrbit_index(i32* nocapture %p, i32 %i) nounwind {
+entry:
+; CHECK:  memw(r{{[0-9]+}}{{ *}}+{{ *}}#0){{ *}}={{ *}}clrbit({{ *}}#5{{ *}})
+  %add.ptr = getelementptr inbounds i32* %p, i32 %i
+  %0 = load i32* %add.ptr, align 4, !tbaa !3
+  %and = and i32 %0, -33
+  store i32 %and, i32* %add.ptr, align 4, !tbaa !3
+  ret void
+}
+
+define void @memop_signed_int_setbit_index(i32* nocapture %p, i32 %i) nounwind {
+entry:
+; CHECK:  memw(r{{[0-9]+}}{{ *}}+{{ *}}#0){{ *}}={{ *}}setbit({{ *}}#7{{ *}})
+  %add.ptr = getelementptr inbounds i32* %p, i32 %i
+  %0 = load i32* %add.ptr, align 4, !tbaa !3
+  %or = or i32 %0, 128
+  store i32 %or, i32* %add.ptr, align 4, !tbaa !3
+  ret void
+}
+
+define void @memop_signed_int_add5_index5(i32* nocapture %p) nounwind {
+entry:
+; CHECK:  memw(r{{[0-9]+}}{{ *}}+{{ *}}#20){{ *}}+={{ *}}#5
+  %add.ptr = getelementptr inbounds i32* %p, i32 5
+  %0 = load i32* %add.ptr, align 4, !tbaa !3
+  %add = add i32 %0, 5
+  store i32 %add, i32* %add.ptr, align 4, !tbaa !3
+  ret void
+}
+
+define void @memop_signed_int_add_index5(i32* nocapture %p, i32 %x) nounwind {
+entry:
+; CHECK:  memw(r{{[0-9]+}}{{ *}}+{{ *}}#20){{ *}}+={{ *}}r{{[0-9]+}}
+  %add.ptr = getelementptr inbounds i32* %p, i32 5
+  %0 = load i32* %add.ptr, align 4, !tbaa !3
+  %add = add i32 %0, %x
+  store i32 %add, i32* %add.ptr, align 4, !tbaa !3
+  ret void
+}
+
+define void @memop_signed_int_sub_index5(i32* nocapture %p, i32 %x) nounwind {
+entry:
+; CHECK:  memw(r{{[0-9]+}}{{ *}}+{{ *}}#20){{ *}}-={{ *}}r{{[0-9]+}}
+  %add.ptr = getelementptr inbounds i32* %p, i32 5
+  %0 = load i32* %add.ptr, align 4, !tbaa !3
+  %sub = sub i32 %0, %x
+  store i32 %sub, i32* %add.ptr, align 4, !tbaa !3
+  ret void
+}
+
+define void @memop_signed_int_or_index5(i32* nocapture %p, i32 %x) nounwind {
+entry:
+; CHECK:  memw(r{{[0-9]+}}{{ *}}+{{ *}}#20){{ *}}|={{ *}}r{{[0-9]+}}
+  %add.ptr = getelementptr inbounds i32* %p, i32 5
+  %0 = load i32* %add.ptr, align 4, !tbaa !3
+  %or = or i32 %0, %x
+  store i32 %or, i32* %add.ptr, align 4, !tbaa !3
+  ret void
+}
+
+define void @memop_signed_int_and_index5(i32* nocapture %p, i32 %x) nounwind {
+entry:
+; CHECK:  memw(r{{[0-9]+}}{{ *}}+{{ *}}#20){{ *}}&={{ *}}r{{[0-9]+}}
+  %add.ptr = getelementptr inbounds i32* %p, i32 5
+  %0 = load i32* %add.ptr, align 4, !tbaa !3
+  %and = and i32 %0, %x
+  store i32 %and, i32* %add.ptr, align 4, !tbaa !3
+  ret void
+}
+
+define void @memop_signed_int_clrbit_index5(i32* nocapture %p) nounwind {
+entry:
+; CHECK:  memw(r{{[0-9]+}}{{ *}}+{{ *}}#20){{ *}}={{ *}}clrbit({{ *}}#5{{ *}})
+  %add.ptr = getelementptr inbounds i32* %p, i32 5
+  %0 = load i32* %add.ptr, align 4, !tbaa !3
+  %and = and i32 %0, -33
+  store i32 %and, i32* %add.ptr, align 4, !tbaa !3
+  ret void
+}
+
+define void @memop_signed_int_setbit_index5(i32* nocapture %p) nounwind {
+entry:
+; CHECK:  memw(r{{[0-9]+}}{{ *}}+{{ *}}#20){{ *}}={{ *}}setbit({{ *}}#7{{ *}})
+  %add.ptr = getelementptr inbounds i32* %p, i32 5
+  %0 = load i32* %add.ptr, align 4, !tbaa !3
+  %or = or i32 %0, 128
+  store i32 %or, i32* %add.ptr, align 4, !tbaa !3
+  ret void
+}
+
+define void @memop_unsigned_int_add5(i32* nocapture %p) nounwind {
+entry:
+; CHECK:  memw(r{{[0-9]+}}{{ *}}+{{ *}}#0){{ *}}+={{ *}}#5
+  %0 = load i32* %p, align 4, !tbaa !3
+  %add = add nsw i32 %0, 5
+  store i32 %add, i32* %p, align 4, !tbaa !3
+  ret void
+}
+
+define void @memop_unsigned_int_add(i32* nocapture %p, i32 %x) nounwind {
+entry:
+; CHECK:  memw(r{{[0-9]+}}{{ *}}+{{ *}}#0){{ *}}+={{ *}}r{{[0-9]+}}
+  %0 = load i32* %p, align 4, !tbaa !3
+  %add = add nsw i32 %0, %x
+  store i32 %add, i32* %p, align 4, !tbaa !3
+  ret void
+}
+
+define void @memop_unsigned_int_sub(i32* nocapture %p, i32 %x) nounwind {
+entry:
+; CHECK:  memw(r{{[0-9]+}}{{ *}}+{{ *}}#0){{ *}}-={{ *}}r{{[0-9]+}}
+  %0 = load i32* %p, align 4, !tbaa !3
+  %sub = sub nsw i32 %0, %x
+  store i32 %sub, i32* %p, align 4, !tbaa !3
+  ret void
+}
+
+define void @memop_unsigned_int_or(i32* nocapture %p, i32 %x) nounwind {
+entry:
+; CHECK:  memw(r{{[0-9]+}}{{ *}}+{{ *}}#0){{ *}}|={{ *}}r{{[0-9]+}}
+  %0 = load i32* %p, align 4, !tbaa !3
+  %or = or i32 %0, %x
+  store i32 %or, i32* %p, align 4, !tbaa !3
+  ret void
+}
+
+define void @memop_unsigned_int_and(i32* nocapture %p, i32 %x) nounwind {
+entry:
+; CHECK:  memw(r{{[0-9]+}}{{ *}}+{{ *}}#0){{ *}}&={{ *}}r{{[0-9]+}}
+  %0 = load i32* %p, align 4, !tbaa !3
+  %and = and i32 %0, %x
+  store i32 %and, i32* %p, align 4, !tbaa !3
+  ret void
+}
+
+define void @memop_unsigned_int_clrbit(i32* nocapture %p) nounwind {
+entry:
+; CHECK:  memw(r{{[0-9]+}}{{ *}}+{{ *}}#0){{ *}}={{ *}}clrbit({{ *}}#5{{ *}})
+  %0 = load i32* %p, align 4, !tbaa !3
+  %and = and i32 %0, -33
+  store i32 %and, i32* %p, align 4, !tbaa !3
+  ret void
+}
+
+define void @memop_unsigned_int_setbit(i32* nocapture %p) nounwind {
+entry:
+; CHECK:  memw(r{{[0-9]+}}{{ *}}+{{ *}}#0){{ *}}={{ *}}setbit({{ *}}#7{{ *}})
+  %0 = load i32* %p, align 4, !tbaa !3
+  %or = or i32 %0, 128
+  store i32 %or, i32* %p, align 4, !tbaa !3
+  ret void
+}
+
+define void @memop_unsigned_int_add5_index(i32* nocapture %p, i32 %i) nounwind {
+entry:
+; CHECK:  memw(r{{[0-9]+}}{{ *}}+{{ *}}#0){{ *}}+={{ *}}#5
+  %add.ptr = getelementptr inbounds i32* %p, i32 %i
+  %0 = load i32* %add.ptr, align 4, !tbaa !3
+  %add = add nsw i32 %0, 5
+  store i32 %add, i32* %add.ptr, align 4, !tbaa !3
+  ret void
+}
+
+define void @memop_unsigned_int_add_index(i32* nocapture %p, i32 %i, i32 %x) nounwind {
+entry:
+; CHECK:  memw(r{{[0-9]+}}{{ *}}+{{ *}}#0){{ *}}+={{ *}}r{{[0-9]+}}
+  %add.ptr = getelementptr inbounds i32* %p, i32 %i
+  %0 = load i32* %add.ptr, align 4, !tbaa !3
+  %add = add nsw i32 %0, %x
+  store i32 %add, i32* %add.ptr, align 4, !tbaa !3
+  ret void
+}
+
+define void @memop_unsigned_int_sub_index(i32* nocapture %p, i32 %i, i32 %x) nounwind {
+entry:
+; CHECK:  memw(r{{[0-9]+}}{{ *}}+{{ *}}#0){{ *}}-={{ *}}r{{[0-9]+}}
+  %add.ptr = getelementptr inbounds i32* %p, i32 %i
+  %0 = load i32* %add.ptr, align 4, !tbaa !3
+  %sub = sub nsw i32 %0, %x
+  store i32 %sub, i32* %add.ptr, align 4, !tbaa !3
+  ret void
+}
+
+define void @memop_unsigned_int_or_index(i32* nocapture %p, i32 %i, i32 %x) nounwind {
+entry:
+; CHECK:  memw(r{{[0-9]+}}{{ *}}+{{ *}}#0){{ *}}|={{ *}}r{{[0-9]+}}
+  %add.ptr = getelementptr inbounds i32* %p, i32 %i
+  %0 = load i32* %add.ptr, align 4, !tbaa !3
+  %or = or i32 %0, %x
+  store i32 %or, i32* %add.ptr, align 4, !tbaa !3
+  ret void
+}
+
+define void @memop_unsigned_int_and_index(i32* nocapture %p, i32 %i, i32 %x) nounwind {
+entry:
+; CHECK:  memw(r{{[0-9]+}}{{ *}}+{{ *}}#0){{ *}}&={{ *}}r{{[0-9]+}}
+  %add.ptr = getelementptr inbounds i32* %p, i32 %i
+  %0 = load i32* %add.ptr, align 4, !tbaa !3
+  %and = and i32 %0, %x
+  store i32 %and, i32* %add.ptr, align 4, !tbaa !3
+  ret void
+}
+
+define void @memop_unsigned_int_clrbit_index(i32* nocapture %p, i32 %i) nounwind {
+entry:
+; CHECK:  memw(r{{[0-9]+}}{{ *}}+{{ *}}#0){{ *}}={{ *}}clrbit({{ *}}#5{{ *}})
+  %add.ptr = getelementptr inbounds i32* %p, i32 %i
+  %0 = load i32* %add.ptr, align 4, !tbaa !3
+  %and = and i32 %0, -33
+  store i32 %and, i32* %add.ptr, align 4, !tbaa !3
+  ret void
+}
+
+define void @memop_unsigned_int_setbit_index(i32* nocapture %p, i32 %i) nounwind {
+entry:
+; CHECK:  memw(r{{[0-9]+}}{{ *}}+{{ *}}#0){{ *}}={{ *}}setbit({{ *}}#7{{ *}})
+  %add.ptr = getelementptr inbounds i32* %p, i32 %i
+  %0 = load i32* %add.ptr, align 4, !tbaa !3
+  %or = or i32 %0, 128
+  store i32 %or, i32* %add.ptr, align 4, !tbaa !3
+  ret void
+}
+
+define void @memop_unsigned_int_add5_index5(i32* nocapture %p) nounwind {
+entry:
+; CHECK:  memw(r{{[0-9]+}}{{ *}}+{{ *}}#20){{ *}}+={{ *}}#5
+  %add.ptr = getelementptr inbounds i32* %p, i32 5
+  %0 = load i32* %add.ptr, align 4, !tbaa !3
+  %add = add nsw i32 %0, 5
+  store i32 %add, i32* %add.ptr, align 4, !tbaa !3
+  ret void
+}
+
+define void @memop_unsigned_int_add_index5(i32* nocapture %p, i32 %x) nounwind {
+entry:
+; CHECK:  memw(r{{[0-9]+}}{{ *}}+{{ *}}#20){{ *}}+={{ *}}r{{[0-9]+}}
+  %add.ptr = getelementptr inbounds i32* %p, i32 5
+  %0 = load i32* %add.ptr, align 4, !tbaa !3
+  %add = add nsw i32 %0, %x
+  store i32 %add, i32* %add.ptr, align 4, !tbaa !3
+  ret void
+}
+
+define void @memop_unsigned_int_sub_index5(i32* nocapture %p, i32 %x) nounwind {
+entry:
+; CHECK:  memw(r{{[0-9]+}}{{ *}}+{{ *}}#20){{ *}}-={{ *}}r{{[0-9]+}}
+  %add.ptr = getelementptr inbounds i32* %p, i32 5
+  %0 = load i32* %add.ptr, align 4, !tbaa !3
+  %sub = sub nsw i32 %0, %x
+  store i32 %sub, i32* %add.ptr, align 4, !tbaa !3
+  ret void
+}
+
+define void @memop_unsigned_int_or_index5(i32* nocapture %p, i32 %x) nounwind {
+entry:
+; CHECK:  memw(r{{[0-9]+}}{{ *}}+{{ *}}#20){{ *}}|={{ *}}r{{[0-9]+}}
+  %add.ptr = getelementptr inbounds i32* %p, i32 5
+  %0 = load i32* %add.ptr, align 4, !tbaa !3
+  %or = or i32 %0, %x
+  store i32 %or, i32* %add.ptr, align 4, !tbaa !3
+  ret void
+}
+
+define void @memop_unsigned_int_and_index5(i32* nocapture %p, i32 %x) nounwind {
+entry:
+; CHECK:  memw(r{{[0-9]+}}{{ *}}+{{ *}}#20){{ *}}&={{ *}}r{{[0-9]+}}
+  %add.ptr = getelementptr inbounds i32* %p, i32 5
+  %0 = load i32* %add.ptr, align 4, !tbaa !3
+  %and = and i32 %0, %x
+  store i32 %and, i32* %add.ptr, align 4, !tbaa !3
+  ret void
+}
+
+define void @memop_unsigned_int_clrbit_index5(i32* nocapture %p) nounwind {
+entry:
+; CHECK:  memw(r{{[0-9]+}}{{ *}}+{{ *}}#20){{ *}}={{ *}}clrbit({{ *}}#5{{ *}})
+  %add.ptr = getelementptr inbounds i32* %p, i32 5
+  %0 = load i32* %add.ptr, align 4, !tbaa !3
+  %and = and i32 %0, -33
+  store i32 %and, i32* %add.ptr, align 4, !tbaa !3
+  ret void
+}
+
+define void @memop_unsigned_int_setbit_index5(i32* nocapture %p) nounwind {
+entry:
+; CHECK:  memw(r{{[0-9]+}}{{ *}}+{{ *}}#20){{ *}}={{ *}}setbit({{ *}}#7{{ *}})
+  %add.ptr = getelementptr inbounds i32* %p, i32 5
+  %0 = load i32* %add.ptr, align 4, !tbaa !3
+  %or = or i32 %0, 128
+  store i32 %or, i32* %add.ptr, align 4, !tbaa !3
+  ret void
+}
+
+!0 = metadata !{metadata !"omnipotent char", metadata !1}
+!1 = metadata !{metadata !"Simple C/C++ TBAA"}
+!2 = metadata !{metadata !"short", metadata !0}
+!3 = metadata !{metadata !"int", metadata !0}
diff --git a/test/CodeGen/Hexagon/memops1.ll b/test/CodeGen/Hexagon/memops1.ll
new file mode 100644
index 000000000000..2babdc848ddc
--- /dev/null
+++ b/test/CodeGen/Hexagon/memops1.ll
@@ -0,0 +1,33 @@
+; RUN: llc -march=hexagon -mcpu=hexagonv5  < %s | FileCheck %s
+; Generate MemOps for V4 and above.
+
+
+define void @f(i32* %p) nounwind {
+entry:
+; CHECK:  memw(r{{[0-9]+}}{{ *}}+{{ *}}#40){{ *}}-={{ *}}#1
+  %p.addr = alloca i32*, align 4
+  store i32* %p, i32** %p.addr, align 4
+  %0 = load i32** %p.addr, align 4
+  %add.ptr = getelementptr inbounds i32* %0, i32 10
+  %1 = load i32* %add.ptr, align 4
+  %sub = sub nsw i32 %1, 1
+  store i32 %sub, i32* %add.ptr, align 4
+  ret void
+}
+
+define void @g(i32* %p, i32 %i) nounwind {
+entry:
+; CHECK: memw(r{{[0-9]+}}{{ *}}+{{ *}}#40){{ *}}-={{ *}}#1
+  %p.addr = alloca i32*, align 4
+  %i.addr = alloca i32, align 4
+  store i32* %p, i32** %p.addr, align 4
+  store i32 %i, i32* %i.addr, align 4
+  %0 = load i32** %p.addr, align 4
+  %1 = load i32* %i.addr, align 4
+  %add.ptr = getelementptr inbounds i32* %0, i32 %1
+  %add.ptr1 = getelementptr inbounds i32* %add.ptr, i32 10
+  %2 = load i32* %add.ptr1, align 4
+  %sub = sub nsw i32 %2, 1
+  store i32 %sub, i32* %add.ptr1, align 4
+  ret void
+}
diff --git a/test/CodeGen/Hexagon/memops2.ll b/test/CodeGen/Hexagon/memops2.ll
new file mode 100644
index 000000000000..b1b25445c029
--- /dev/null
+++ b/test/CodeGen/Hexagon/memops2.ll
@@ -0,0 +1,32 @@
+; RUN: llc -march=hexagon -mcpu=hexagonv5  < %s | FileCheck %s
+; Generate MemOps for V4 and above.
+
+
+define void @f(i16* nocapture %p) nounwind {
+entry:
+; CHECK:  memh(r{{[0-9]+}}{{ *}}+{{ *}}#20){{ *}}-={{ *}}#1
+  %add.ptr = getelementptr inbounds i16* %p, i32 10
+  %0 = load i16* %add.ptr, align 2, !tbaa !0
+  %conv2 = zext i16 %0 to i32
+  %sub = add nsw i32 %conv2, 65535
+  %conv1 = trunc i32 %sub to i16
+  store i16 %conv1, i16* %add.ptr, align 2, !tbaa !0
+  ret void
+}
+
+define void @g(i16* nocapture %p, i32 %i) nounwind {
+entry:
+; CHECK:  memh(r{{[0-9]+}}{{ *}}+{{ *}}#20){{ *}}-={{ *}}#1
+  %add.ptr.sum = add i32 %i, 10
+  %add.ptr1 = getelementptr inbounds i16* %p, i32 %add.ptr.sum
+  %0 = load i16* %add.ptr1, align 2, !tbaa !0
+  %conv3 = zext i16 %0 to i32
+  %sub = add nsw i32 %conv3, 65535
+  %conv2 = trunc i32 %sub to i16
+  store i16 %conv2, i16* %add.ptr1, align 2, !tbaa !0
+  ret void
+}
+
+!0 = metadata !{metadata !"short", metadata !1}
+!1 = metadata !{metadata !"omnipotent char", metadata !2}
+!2 = metadata !{metadata !"Simple C/C++ TBAA"}
diff --git a/test/CodeGen/Hexagon/memops3.ll b/test/CodeGen/Hexagon/memops3.ll
new file mode 100644
index 000000000000..5b8bd6c87bfb
--- /dev/null
+++ b/test/CodeGen/Hexagon/memops3.ll
@@ -0,0 +1,31 @@
+; RUN: llc -march=hexagon -mcpu=hexagonv5  < %s | FileCheck %s
+; Generate MemOps for V4 and above.
+
+
+define void @f(i8* nocapture %p) nounwind {
+entry:
+; CHECK:  memb(r{{[0-9]+}}{{ *}}+{{ *}}#10){{ *}}-={{ *}}#1
+  %add.ptr = getelementptr inbounds i8* %p, i32 10
+  %0 = load i8* %add.ptr, align 1, !tbaa !0
+  %conv = zext i8 %0 to i32
+  %sub = add nsw i32 %conv, 255
+  %conv1 = trunc i32 %sub to i8
+  store i8 %conv1, i8* %add.ptr, align 1, !tbaa !0
+  ret void
+}
+
+define void @g(i8* nocapture %p, i32 %i) nounwind {
+entry:
+; CHECK:  memb(r{{[0-9]+}}{{ *}}+{{ *}}#10){{ *}}-={{ *}}#1
+  %add.ptr.sum = add i32 %i, 10
+  %add.ptr1 = getelementptr inbounds i8* %p, i32 %add.ptr.sum
+  %0 = load i8* %add.ptr1, align 1, !tbaa !0
+  %conv = zext i8 %0 to i32
+  %sub = add nsw i32 %conv, 255
+  %conv2 = trunc i32 %sub to i8
+  store i8 %conv2, i8* %add.ptr1, align 1, !tbaa !0
+  ret void
+}
+
+!0 = metadata !{metadata !"omnipotent char", metadata !1}
+!1 = metadata !{metadata !"Simple C/C++ TBAA"}
diff --git a/test/CodeGen/Hexagon/misaligned-access.ll b/test/CodeGen/Hexagon/misaligned-access.ll
new file mode 100644
index 000000000000..4dafb44cc3ef
--- /dev/null
+++ b/test/CodeGen/Hexagon/misaligned-access.ll
@@ -0,0 +1,16 @@
+; RUN: llc -march=hexagon -mcpu=hexagonv4 < %s
+; Check that the mis-aligned load doesn't cause compiler to assert.
+
+declare i32 @_hi(i64) #1
+@temp1 = common global i32 0, align 4
+
+define i32 @CSDRSEARCH_executeSearchManager() #0 {
+entry:
+  %temp = alloca i32, align 4
+  %0 = load i32* @temp1, align 4
+  store i32 %0, i32* %temp, align 4
+  %1 = bitcast i32* %temp to i64*
+  %2 = load i64* %1, align 8
+  %call = call i32 @_hi(i64 %2)
+  ret i32 %call
+}
diff --git a/test/CodeGen/Hexagon/postinc-load.ll b/test/CodeGen/Hexagon/postinc-load.ll
new file mode 100644
index 000000000000..855a347d74f5
--- /dev/null
+++ b/test/CodeGen/Hexagon/postinc-load.ll
@@ -0,0 +1,29 @@
+; RUN: llc -march=hexagon -mcpu=hexagonv4 < %s | FileCheck %s
+
+; Check that post-increment load instructions are being generated.
+; CHECK: r{{[0-9]+}}{{ *}}={{ *}}memw(r{{[0-9]+}}{{ *}}++{{ *}}#4{{ *}})
+
+define i32 @sum(i32* nocapture %a, i16* nocapture %b, i32 %n) nounwind {
+entry:
+  br label %for.body
+
+for.body:
+  %lsr.iv = phi i32 [ %lsr.iv.next, %for.body ], [ 10, %entry ]
+  %arrayidx.phi = phi i32* [ %a, %entry ], [ %arrayidx.inc, %for.body ]
+  %arrayidx1.phi = phi i16* [ %b, %entry ], [ %arrayidx1.inc, %for.body ]
+  %sum.03 = phi i32 [ 0, %entry ], [ %add2, %for.body ]
+  %0 = load i32* %arrayidx.phi, align 4
+  %1 = load i16* %arrayidx1.phi, align 2
+  %conv = sext i16 %1 to i32
+  %add = add i32 %0, %sum.03
+  %add2 = add i32 %add, %conv
+  %arrayidx.inc = getelementptr i32* %arrayidx.phi, i32 1
+  %arrayidx1.inc = getelementptr i16* %arrayidx1.phi, i32 1
+  %lsr.iv.next = add i32 %lsr.iv, -1
+  %exitcond = icmp eq i32 %lsr.iv.next, 0
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:
+  ret i32 %add2
+}
+
diff --git a/test/CodeGen/Hexagon/postinc-store.ll b/test/CodeGen/Hexagon/postinc-store.ll
new file mode 100644
index 000000000000..99a3a58ad39c
--- /dev/null
+++ b/test/CodeGen/Hexagon/postinc-store.ll
@@ -0,0 +1,29 @@
+; RUN: llc -march=hexagon -mcpu=hexagonv4 < %s | FileCheck %s
+
+; Check that post-increment store instructions are being generated.
+; CHECK: memw(r{{[0-9]+}}{{ *}}++{{ *}}#4{{ *}}){{ *}}={{ *}}r{{[0-9]+}}
+
+define i32 @sum(i32* nocapture %a, i16* nocapture %b, i32 %n) nounwind {
+entry:
+  br label %for.body
+
+for.body:                                         ; preds = %for.body, %entry
+  %lsr.iv = phi i32 [ %lsr.iv.next, %for.body ], [ 10, %entry ]
+  %arrayidx.phi = phi i32* [ %a, %entry ], [ %arrayidx.inc, %for.body ]
+  %arrayidx1.phi = phi i16* [ %b, %entry ], [ %arrayidx1.inc, %for.body ]
+  %0 = load i32* %arrayidx.phi, align 4
+  %1 = load i16* %arrayidx1.phi, align 2
+  %conv = sext i16 %1 to i32
+  %factor = mul i32 %0, 2
+  %add3 = add i32 %factor, %conv
+  store i32 %add3, i32* %arrayidx.phi, align 4
+
+  %arrayidx.inc = getelementptr i32* %arrayidx.phi, i32 1
+  %arrayidx1.inc = getelementptr i16* %arrayidx1.phi, i32 1
+  %lsr.iv.next = add i32 %lsr.iv, -1
+  %exitcond = icmp eq i32 %lsr.iv.next, 0
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:                                          ; preds = %for.body
+  ret i32 0
+}
diff --git a/test/CodeGen/Hexagon/pred-absolute-store.ll b/test/CodeGen/Hexagon/pred-absolute-store.ll
new file mode 100644
index 000000000000..b1b09f414a54
--- /dev/null
+++ b/test/CodeGen/Hexagon/pred-absolute-store.ll
@@ -0,0 +1,19 @@
+; RUN: llc -march=hexagon -mcpu=hexagonv4 < %s | FileCheck %s
+; Check that we are able to predicate instructions with abosolute
+; addressing mode.
+
+; CHECK: if{{ *}}(p{{[0-3]+}}){{ *}}memw(##gvar){{ *}}={{ *}}r{{[0-9]+}}
+
+@gvar = external global i32
+define i32 @test2(i32 %a, i32 %b) nounwind {
+entry:
+  %cmp = icmp eq i32 %a, %b
+  br i1 %cmp, label %if.then, label %if.end
+
+if.then:
+  store i32 %a, i32* @gvar, align 4
+  br label %if.end
+
+if.end:
+  ret i32 %b
+}
diff --git a/test/CodeGen/Hexagon/predicate-copy.ll b/test/CodeGen/Hexagon/predicate-copy.ll
new file mode 100644
index 000000000000..552b68794195
--- /dev/null
+++ b/test/CodeGen/Hexagon/predicate-copy.ll
@@ -0,0 +1,8 @@
+; RUN: llc -march=hexagon -mcpu=hexagonv4 -O3 < %s | FileCheck %s
+
+; CHECK: r{{[0-9]+}} = p{{[0-9]+}}
+define i1 @foo() {
+entry:
+  ret i1 false
+}
+
diff --git a/test/CodeGen/Hexagon/struct_args.ll b/test/CodeGen/Hexagon/struct_args.ll
index e488f33c3d16..f91300b5067e 100644
--- a/test/CodeGen/Hexagon/struct_args.ll
+++ b/test/CodeGen/Hexagon/struct_args.ll
@@ -1,5 +1,5 @@
 ; RUN: llc -march=hexagon -mcpu=hexagonv4 < %s | FileCheck %s
-; CHECK: r{{[0-9]}}:{{[0-9]}} = combine(r{{[0-9]}}, r{{[0-9]}})
+; CHECK: r{{[0-9]}}:{{[0-9]}} = combine({{r[0-9]|#0}}, r{{[0-9]}})
 ; CHECK: r{{[0-9]}}:{{[0-9]}} |= asl(r{{[0-9]}}:{{[0-9]}}, #32)
 
 %struct.small = type { i32, i32 }
diff --git a/test/CodeGen/Hexagon/sube.ll b/test/CodeGen/Hexagon/sube.ll
new file mode 100644
index 000000000000..84172e957d04
--- /dev/null
+++ b/test/CodeGen/Hexagon/sube.ll
@@ -0,0 +1,29 @@
+; RUN: llc -march=hexagon < %s | FileCheck %s
+
+; CHECK: r{{[0-9]+:[0-9]+}} = #0
+; CHECK: r{{[0-9]+:[0-9]+}} = #1
+; CHECK: p{{[0-9]+}} = cmp.gtu(r{{[0-9]+:[0-9]+}}, r{{[0-9]+:[0-9]+}})
+; CHECK: r{{[0-9]+}} = mux(p{{[0-9]+}}, r{{[0-9]+}}, r{{[0-9]+}})
+; CHECK: r{{[0-9]+}} = mux(p{{[0-9]+}}, r{{[0-9]+}}, r{{[0-9]+}})
+; CHECK: r{{[0-9]+:[0-9]+}} = sub(r{{[0-9]+:[0-9]+}}, r{{[0-9]+:[0-9]+}})
+; CHECK: r{{[0-9]+:[0-9]+}} = sub(r{{[0-9]+:[0-9]+}}, r{{[0-9]+:[0-9]+}})
+; CHECK: r{{[0-9]+:[0-9]+}} = combine(r{{[0-9]+}}, r{{[0-9]+}})
+
+define void @check_sube_subc(i64 %AL, i64 %AH, i64 %BL, i64 %BH, i64* %RL, i64* %RH) {
+entry:
+        %tmp1 = zext i64 %AL to i128
+        %tmp23 = zext i64 %AH to i128
+        %tmp4 = shl i128 %tmp23, 64
+        %tmp5 = or i128 %tmp4, %tmp1
+        %tmp67 = zext i64 %BL to i128
+        %tmp89 = zext i64 %BH to i128
+        %tmp11 = shl i128 %tmp89, 64
+        %tmp12 = or i128 %tmp11, %tmp67
+        %tmp15 = sub i128 %tmp5, %tmp12
+        %tmp1617 = trunc i128 %tmp15 to i64
+        store i64 %tmp1617, i64* %RL
+        %tmp21 = lshr i128 %tmp15, 64
+        %tmp2122 = trunc i128 %tmp21 to i64
+        store i64 %tmp2122, i64* %RH
+        ret void
+}
+\ No newline at end of file
diff --git a/test/CodeGen/Hexagon/validate-offset.ll b/test/CodeGen/Hexagon/validate-offset.ll
new file mode 100644
index 000000000000..9e7d0aa07832
--- /dev/null
+++ b/test/CodeGen/Hexagon/validate-offset.ll
@@ -0,0 +1,36 @@
+; RUN: llc -march=hexagon -mcpu=hexagonv4 < %s -O0
+
+; This is a regression test which makes sure that the offset check
+; is available for STRiw_indexed instruction. This is required
+; by 'Hexagon Expand Predicate Spill Code' pass.
+
+define i32 @f(i32 %a, i32 %b) nounwind {
+entry:
+  %retval = alloca i32, align 4
+  %a.addr = alloca i32, align 4
+  %b.addr = alloca i32, align 4
+  store i32 %a, i32* %a.addr, align 4
+  store i32 %b, i32* %b.addr, align 4
+  %0 = load i32* %a.addr, align 4
+  %1 = load i32* %b.addr, align 4
+  %cmp = icmp sgt i32 %0, %1
+  br i1 %cmp, label %if.then, label %if.else
+
+if.then:
+  %2 = load i32* %a.addr, align 4
+  %3 = load i32* %b.addr, align 4
+  %add = add nsw i32 %2, %3
+  store i32 %add, i32* %retval
+  br label %return
+
+if.else:
+  %4 = load i32* %a.addr, align 4
+  %5 = load i32* %b.addr, align 4
+  %sub = sub nsw i32 %4, %5
+  store i32 %sub, i32* %retval
+  br label %return
+
+return:
+  %6 = load i32* %retval
+  ret i32 %6
+}
diff --git a/test/CodeGen/Hexagon/zextloadi1.ll b/test/CodeGen/Hexagon/zextloadi1.ll
new file mode 100644
index 000000000000..cb6e6fdf84a5
--- /dev/null
+++ b/test/CodeGen/Hexagon/zextloadi1.ll
@@ -0,0 +1,25 @@
+; RUN: llc -march=hexagon -mcpu=hexagonv4 < %s | FileCheck %s
+
+; CHECK: r{{[0-9]+}} = ##i129_l+16
+; CHECK: r{{[0-9]+}} = ##i129_s+16
+; CHECK: memd(##i129_s) = r{{[0-9]+:[0-9]+}}
+; CHECK: r{{[0-9]+}} = ##i65_l+8
+; CHECK: r{{[0-9]+}} = ##i65_s+8
+; CHECK: memd(##i65_s) = r{{[0-9]+:[0-9]+}}
+
+@i65_l = external global i65
+@i65_s = external global i65
+@i129_l = external global i129
+@i129_s = external global i129
+
+define void @i129_ls() nounwind  {
+        %tmp = load i129* @i129_l
+        store i129 %tmp, i129* @i129_s
+        ret void
+}
+
+define void @i65_ls() nounwind  {
+        %tmp = load i65* @i65_l
+        store i65 %tmp, i65* @i65_s
+        ret void
+}
+\ No newline at end of file
diff --git a/test/CodeGen/Inputs/DbgValueOtherTargets.ll b/test/CodeGen/Inputs/DbgValueOtherTargets.ll
new file mode 100644
index 000000000000..d5162b964a08
--- /dev/null
+++ b/test/CodeGen/Inputs/DbgValueOtherTargets.ll
@@ -0,0 +1,28 @@
+; Check that DEBUG_VALUE comments come through on a variety of targets.
+
+define i32 @main() nounwind ssp {
+entry:
+; CHECK: DEBUG_VALUE
+  call void @llvm.dbg.value(metadata !6, i64 0, metadata !7), !dbg !9
+  ret i32 0, !dbg !10
+}
+
+declare void @llvm.dbg.declare(metadata, metadata) nounwind readnone
+
+declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
+
+!llvm.dbg.cu = !{!2}
+
+!0 = metadata !{i32 786478, metadata !1, metadata !"main", metadata !"main", metadata !"", metadata !1, i32 2, metadata !3, i1 false, i1 true, i32 0, i32 0, null, i32 0, i1 false, i32 ()* @main} ; [ DW_TAG_subprogram ]
+!1 = metadata !{i32 786473, metadata !12} ; [ DW_TAG_file_type ]
+!2 = metadata !{i32 786449, i32 12, metadata !1, metadata !"clang version 2.9 (trunk 120996)", i1 false, metadata !"", i32 0, null, null, metadata !11, null, null} ; [ DW_TAG_compile_unit ]
+!3 = metadata !{i32 786453, metadata !1, metadata !"", metadata !1, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !4, i32 0, null} ; [ DW_TAG_subroutine_type ]
+!4 = metadata !{metadata !5}
+!5 = metadata !{i32 786468, metadata !2, metadata !"int", metadata !1, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
+!6 = metadata !{i32 0}
+!7 = metadata !{i32 786688, metadata !8, metadata !"i", metadata !1, i32 3, metadata !5, i32 0, null} ; [ DW_TAG_auto_variable ]
+!8 = metadata !{i32 786443, metadata !0, i32 2, i32 12, metadata !1, i32 0} ; [ DW_TAG_lexical_block ]
+!9 = metadata !{i32 3, i32 11, metadata !8, null}
+!10 = metadata !{i32 4, i32 2, metadata !8, null}
+!11 = metadata !{metadata !0}
+!12 = metadata !{metadata !"/tmp/x.c", metadata !"/Users/manav"}
diff --git a/test/CodeGen/MBlaze/2010-04-07-DbgValueOtherTargets.ll b/test/CodeGen/MBlaze/2010-04-07-DbgValueOtherTargets.ll
deleted file mode 100644
index d8970eac9007..000000000000
--- a/test/CodeGen/MBlaze/2010-04-07-DbgValueOtherTargets.ll
+++ /dev/null
@@ -1,28 +0,0 @@
-; RUN: llc -O0 -march=mblaze -asm-verbose < %s | FileCheck %s
-; Check that DEBUG_VALUE comments come through on a variety of targets.
-
-define i32 @main() nounwind ssp {
-entry:
-; CHECK: DEBUG_VALUE
-  call void @llvm.dbg.value(metadata !6, i64 0, metadata !7), !dbg !9
-  ret i32 0, !dbg !10
-}
-
-declare void @llvm.dbg.declare(metadata, metadata) nounwind readnone
-
-declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
-
-!llvm.dbg.sp = !{!0}
-
-!0 = metadata !{i32 589870, i32 0, metadata !1, metadata !"main", metadata !"main", metadata !"", metadata !1, i32 2, metadata !3, i1 false, i1 true, i32 0, i32 0, null, i32 0, i1 false, i32 ()* @main} ; [ DW_TAG_subprogram ]
-!1 = metadata !{i32 589865, metadata !"/tmp/x.c", metadata !"/Users/manav", metadata !2} ; [ DW_TAG_file_type ]
-!2 = metadata !{i32 589841, i32 0, i32 12, metadata !"/tmp/x.c", metadata !"/Users/manav", metadata !"clang version 2.9 (trunk 120996)", i1 true, i1 false, metadata !"", i32 0} ; [ DW_TAG_compile_unit ]
-!3 = metadata !{i32 589845, metadata !1, metadata !"", metadata !1, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !4, i32 0, null} ; [ DW_TAG_subroutine_type ]
-!4 = metadata !{metadata !5}
-!5 = metadata !{i32 589860, metadata !2, metadata !"int", metadata !1, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
-!6 = metadata !{i32 0}
-!7 = metadata !{i32 590080, metadata !8, metadata !"i", metadata !1, i32 3, metadata !5, i32 0} ; [ DW_TAG_auto_variable ]
-!8 = metadata !{i32 589835, metadata !0, i32 2, i32 12, metadata !1, i32 0} ; [ DW_TAG_lexical_block ]
-!9 = metadata !{i32 3, i32 11, metadata !8, null}
-!10 = metadata !{i32 4, i32 2, metadata !8, null}
-
diff --git a/test/CodeGen/MBlaze/DbgValueOtherTargets.test b/test/CodeGen/MBlaze/DbgValueOtherTargets.test
new file mode 100644
index 000000000000..8b850f51105b
--- /dev/null
+++ b/test/CodeGen/MBlaze/DbgValueOtherTargets.test
@@ -0,0 +1 @@
+RUN: llc -O0 -march=mblaze -asm-verbose < %S/../Inputs/DbgValueOtherTargets.ll | FileCheck %S/../Inputs/DbgValueOtherTargets.ll
diff --git a/test/CodeGen/MBlaze/lit.local.cfg b/test/CodeGen/MBlaze/lit.local.cfg
index e236200d7572..ff4928de4b9c 100644
--- a/test/CodeGen/MBlaze/lit.local.cfg
+++ b/test/CodeGen/MBlaze/lit.local.cfg
@@ -1,4 +1,4 @@
-config.suffixes = ['.ll', '.c', '.cpp']
+config.suffixes = ['.ll', '.c', '.cpp', '.test']
 
 targets = set(config.root.targets_to_build.split())
 if not 'MBlaze' in targets:
diff --git a/test/CodeGen/MSP430/2010-04-07-DbgValueOtherTargets.ll b/test/CodeGen/MSP430/2010-04-07-DbgValueOtherTargets.ll
deleted file mode 100644
index 9d549da8a93a..000000000000
--- a/test/CodeGen/MSP430/2010-04-07-DbgValueOtherTargets.ll
+++ /dev/null
@@ -1,28 +0,0 @@
-; RUN: llc -O0 -march=msp430 -asm-verbose < %s | FileCheck %s
-; Check that DEBUG_VALUE comments come through on a variety of targets.
-
-define i32 @main() nounwind ssp {
-entry:
-; CHECK: DEBUG_VALUE
-  call void @llvm.dbg.value(metadata !6, i64 0, metadata !7), !dbg !9
-  ret i32 0, !dbg !10
-}
-
-declare void @llvm.dbg.declare(metadata, metadata) nounwind readnone
-
-declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
-
-!llvm.dbg.sp = !{!0}
-
-!0 = metadata !{i32 589870, i32 0, metadata !1, metadata !"main", metadata !"main", metadata !"", metadata !1, i32 2, metadata !3, i1 false, i1 true, i32 0, i32 0, null, i32 0, i1 false, i32 ()* @main} ; [ DW_TAG_subprogram ]
-!1 = metadata !{i32 589865, metadata !"/tmp/x.c", metadata !"/Users/manav", metadata !2} ; [ DW_TAG_file_type ]
-!2 = metadata !{i32 589841, i32 0, i32 12, metadata !"/tmp/x.c", metadata !"/Users/manav", metadata !"clang version 2.9 (trunk 120996)", i1 true, i1 false, metadata !"", i32 0} ; [ DW_TAG_compile_unit ]
-!3 = metadata !{i32 589845, metadata !1, metadata !"", metadata !1, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !4, i32 0, null} ; [ DW_TAG_subroutine_type ]
-!4 = metadata !{metadata !5}
-!5 = metadata !{i32 589860, metadata !2, metadata !"int", metadata !1, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
-!6 = metadata !{i32 0}
-!7 = metadata !{i32 590080, metadata !8, metadata !"i", metadata !1, i32 3, metadata !5, i32 0} ; [ DW_TAG_auto_variable ]
-!8 = metadata !{i32 589835, metadata !0, i32 2, i32 12, metadata !1, i32 0} ; [ DW_TAG_lexical_block ]
-!9 = metadata !{i32 3, i32 11, metadata !8, null}
-!10 = metadata !{i32 4, i32 2, metadata !8, null}
-
diff --git a/test/CodeGen/MSP430/DbgValueOtherTargets.test b/test/CodeGen/MSP430/DbgValueOtherTargets.test
new file mode 100644
index 000000000000..7adfbcafa35b
--- /dev/null
+++ b/test/CodeGen/MSP430/DbgValueOtherTargets.test
@@ -0,0 +1 @@
+RUN: llc -O0 -march=msp430 -asm-verbose < %S/../Inputs/DbgValueOtherTargets.ll | FileCheck %S/../Inputs/DbgValueOtherTargets.ll
diff --git a/test/CodeGen/MSP430/byval.ll b/test/CodeGen/MSP430/byval.ll
new file mode 100644
index 000000000000..9dda0a097b56
--- /dev/null
+++ b/test/CodeGen/MSP430/byval.ll
@@ -0,0 +1,26 @@
+; RUN: llc < %s | FileCheck %s
+
+target datalayout = "e-p:16:16:16-i8:8:8-i16:16:16-i32:16:32-n8:16"
+target triple = "msp430---elf"
+
+%struct.Foo = type { i16, i16, i16 }
+@foo = global %struct.Foo { i16 1, i16 2, i16 3 }, align 2
+
+define i16 @callee(%struct.Foo* byval %f) nounwind {
+entry:
+; CHECK: callee:
+; CHECK: mov.w 2(r1), r15
+  %0 = getelementptr inbounds %struct.Foo* %f, i32 0, i32 0
+  %1 = load i16* %0, align 2
+  ret i16 %1
+}
+
+define void @caller() nounwind {
+entry:
+; CHECK: caller:
+; CHECK: mov.w &foo+4, 4(r1)
+; CHECK-NEXT: mov.w &foo+2, 2(r1)
+; CHECK-NEXT: mov.w &foo, 0(r1)
+  %call = call i16 @callee(%struct.Foo* byval @foo)
+  ret void
+}
diff --git a/test/CodeGen/MSP430/lit.local.cfg b/test/CodeGen/MSP430/lit.local.cfg
index 972732ebad30..0ca9fc9c6912 100644
--- a/test/CodeGen/MSP430/lit.local.cfg
+++ b/test/CodeGen/MSP430/lit.local.cfg
@@ -1,4 +1,4 @@
-config.suffixes = ['.ll', '.c', '.cpp']
+config.suffixes = ['.ll', '.c', '.cpp', '.test']
 
 targets = set(config.root.targets_to_build.split())
 if not 'MSP430' in targets:
diff --git a/test/CodeGen/MSP430/vararg.ll b/test/CodeGen/MSP430/vararg.ll
new file mode 100644
index 000000000000..603d3ec6b686
--- /dev/null
+++ b/test/CodeGen/MSP430/vararg.ll
@@ -0,0 +1,50 @@
+; RUN: llc < %s | FileCheck %s
+
+target datalayout = "e-p:16:16:16-i8:8:8-i16:16:16-i32:16:32-n8:16"
+target triple = "msp430---elf"
+
+declare void @llvm.va_start(i8*) nounwind
+declare void @llvm.va_end(i8*) nounwind
+declare void @llvm.va_copy(i8*, i8*) nounwind
+
+define void @va_start(i16 %a, ...) nounwind {
+entry:
+; CHECK: va_start:
+; CHECK: sub.w #2, r1
+  %vl = alloca i8*, align 2
+  %vl1 = bitcast i8** %vl to i8*
+; CHECK-NEXT: mov.w r1, [[REG:r[0-9]+]]
+; CHECK-NEXT: add.w #6, [[REG]]
+; CHECK-NEXT: mov.w [[REG]], 0(r1)
+  call void @llvm.va_start(i8* %vl1)
+  call void @llvm.va_end(i8* %vl1)
+  ret void
+}
+
+define i16 @va_arg(i8* %vl) nounwind {
+entry:
+; CHECK: va_arg:
+  %vl.addr = alloca i8*, align 2
+; CHECK: mov.w r15, 0(r1)
+  store i8* %vl, i8** %vl.addr, align 2
+; CHECK: mov.w r15, [[REG:r[0-9]+]]
+; CHECK-NEXT: add.w #2, [[REG]]
+; CHECK-NEXT: mov.w [[REG]], 0(r1)
+  %0 = va_arg i8** %vl.addr, i16
+; CHECK-NEXT: mov.w 0(r15), r15
+  ret i16 %0
+}
+
+define void @va_copy(i8* %vl) nounwind {
+entry:
+; CHECK: va_copy:
+  %vl.addr = alloca i8*, align 2
+  %vl2 = alloca i8*, align 2
+; CHECK: mov.w r15, 2(r1)
+  store i8* %vl, i8** %vl.addr, align 2
+  %0 = bitcast i8** %vl2 to i8*
+  %1 = bitcast i8** %vl.addr to i8*
+; CHECK-NEXT: mov.w r15, 0(r1)
+  call void @llvm.va_copy(i8* %0, i8* %1)
+  ret void
+}
diff --git a/test/CodeGen/Mips/2010-04-07-DbgValueOtherTargets.ll b/test/CodeGen/Mips/2010-04-07-DbgValueOtherTargets.ll
deleted file mode 100644
index 994e19af4f87..000000000000
--- a/test/CodeGen/Mips/2010-04-07-DbgValueOtherTargets.ll
+++ /dev/null
@@ -1,28 +0,0 @@
-; RUN: llc -O0 -march=mips -asm-verbose < %s | FileCheck %s
-; Check that DEBUG_VALUE comments come through on a variety of targets.
-
-define i32 @main() nounwind ssp {
-entry:
-; CHECK: DEBUG_VALUE
-  call void @llvm.dbg.value(metadata !6, i64 0, metadata !7), !dbg !9
-  ret i32 0, !dbg !10
-}
-
-declare void @llvm.dbg.declare(metadata, metadata) nounwind readnone
-
-declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
-
-!llvm.dbg.sp = !{!0}
-
-!0 = metadata !{i32 589870, i32 0, metadata !1, metadata !"main", metadata !"main", metadata !"", metadata !1, i32 2, metadata !3, i1 false, i1 true, i32 0, i32 0, null, i32 0, i1 false, i32 ()* @main} ; [ DW_TAG_subprogram ]
-!1 = metadata !{i32 589865, metadata !"/tmp/x.c", metadata !"/Users/manav", metadata !2} ; [ DW_TAG_file_type ]
-!2 = metadata !{i32 589841, i32 0, i32 12, metadata !"/tmp/x.c", metadata !"/Users/manav", metadata !"clang version 2.9 (trunk 120996)", i1 true, i1 false, metadata !"", i32 0} ; [ DW_TAG_compile_unit ]
-!3 = metadata !{i32 589845, metadata !1, metadata !"", metadata !1, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !4, i32 0, null} ; [ DW_TAG_subroutine_type ]
-!4 = metadata !{metadata !5}
-!5 = metadata !{i32 589860, metadata !2, metadata !"int", metadata !1, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
-!6 = metadata !{i32 0}
-!7 = metadata !{i32 590080, metadata !8, metadata !"i", metadata !1, i32 3, metadata !5, i32 0} ; [ DW_TAG_auto_variable ]
-!8 = metadata !{i32 589835, metadata !0, i32 2, i32 12, metadata !1, i32 0} ; [ DW_TAG_lexical_block ]
-!9 = metadata !{i32 3, i32 11, metadata !8, null}
-!10 = metadata !{i32 4, i32 2, metadata !8, null}
-
diff --git a/test/CodeGen/Mips/2010-07-20-Switch.ll b/test/CodeGen/Mips/2010-07-20-Switch.ll
index 261fe9db1732..38d7b7e25592 100644
--- a/test/CodeGen/Mips/2010-07-20-Switch.ll
+++ b/test/CodeGen/Mips/2010-07-20-Switch.ll
@@ -1,6 +1,11 @@
-; RUN: llc < %s -march=mips -relocation-model=static | FileCheck %s -check-prefix=STATIC-O32 
-; RUN: llc < %s -march=mips -relocation-model=pic | FileCheck %s -check-prefix=PIC-O32 
-; RUN: llc < %s -march=mips64 -relocation-model=pic -mcpu=mips64 -mattr=n64 | FileCheck %s -check-prefix=PIC-N64
+; RUN: llc < %s -march=mips -relocation-model=static | \
+; RUN: FileCheck %s -check-prefix=STATIC-O32 
+; RUN: llc < %s -march=mips -relocation-model=pic | \
+; RUN: FileCheck %s -check-prefix=PIC-O32 
+; RUN: llc < %s -march=mips64 -relocation-model=pic -mcpu=mips64 | \
+; RUN: FileCheck %s -check-prefix=N64
+; RUN: llc < %s -march=mips64 -relocation-model=static -mcpu=mips64 | \
+; RUN: FileCheck %s -check-prefix=N64
 
 define i32 @main() nounwind readnone {
 entry:
@@ -17,12 +22,12 @@ entry:
 ; PIC-O32: lw $[[R4:[0-9]+]], %lo($JTI0_0)($[[R2]])
 ; PIC-O32: addu $[[R5:[0-9]+]], $[[R4:[0-9]+]]
 ; PIC-O32: jr  $[[R5]]
-; PIC-N64: dsll $[[R0:[0-9]+]], ${{[0-9]+}}, 3
-; PIC-N64: ld $[[R1:[0-9]+]], %got_page($JTI0_0)
-; PIC-N64: daddu $[[R2:[0-9]+]], $[[R0:[0-9]+]], $[[R1]]
-; PIC-N64: ld $[[R4:[0-9]+]], %got_ofst($JTI0_0)($[[R2]])
-; PIC-N64: daddu $[[R5:[0-9]+]], $[[R4:[0-9]+]]
-; PIC-N64: jr  $[[R5]]
+; N64: dsll $[[R0:[0-9]+]], ${{[0-9]+}}, 3
+; N64: ld $[[R1:[0-9]+]], %got_page($JTI0_0)
+; N64: daddu $[[R2:[0-9]+]], $[[R0:[0-9]+]], $[[R1]]
+; N64: ld $[[R4:[0-9]+]], %got_ofst($JTI0_0)($[[R2]])
+; N64: daddu $[[R5:[0-9]+]], $[[R4:[0-9]+]]
+; N64: jr  $[[R5]]
   switch i32 %0, label %bb4 [
     i32 0, label %bb5
     i32 1, label %bb1
@@ -58,10 +63,10 @@ bb5:                                              ; preds = %entry
 ; PIC-O32: .gpword
 ; PIC-O32: .gpword 
 ; PIC-O32: .gpword 
-; PIC-N64: .align  3
-; PIC-N64: $JTI0_0:
-; PIC-N64: .gpdword
-; PIC-N64: .gpdword
-; PIC-N64: .gpdword 
-; PIC-N64: .gpdword 
+; N64: .align  3
+; N64: $JTI0_0:
+; N64: .gpdword
+; N64: .gpdword
+; N64: .gpdword 
+; N64: .gpdword 
 
diff --git a/test/CodeGen/Mips/2012-12-12-ExpandMemcpy.ll b/test/CodeGen/Mips/2012-12-12-ExpandMemcpy.ll
new file mode 100644
index 000000000000..9d4daee696db
--- /dev/null
+++ b/test/CodeGen/Mips/2012-12-12-ExpandMemcpy.ll
@@ -0,0 +1,11 @@
+; RUN: llc -march=mips64el -mcpu=mips64r2 < %s
+
+@.str = private unnamed_addr constant [7 x i8] c"hello\0A\00", align 1
+
+define void @t(i8* %ptr) {
+entry:
+  tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %ptr, i8* getelementptr inbounds ([7 x i8]* @.str, i64 0, i64 0), i64 7, i32 1, i1 false)
+  ret void
+}
+
+declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i32, i1) nounwind
diff --git a/test/CodeGen/Mips/DbgValueOtherTargets.test b/test/CodeGen/Mips/DbgValueOtherTargets.test
new file mode 100644
index 000000000000..da20e7ef5224
--- /dev/null
+++ b/test/CodeGen/Mips/DbgValueOtherTargets.test
@@ -0,0 +1 @@
+RUN: llc -O0 -march=mips -asm-verbose < %S/../Inputs/DbgValueOtherTargets.ll | FileCheck %S/../Inputs/DbgValueOtherTargets.ll
diff --git a/test/CodeGen/Mips/addi.ll b/test/CodeGen/Mips/addi.ll
new file mode 100644
index 000000000000..8f70a469c44f
--- /dev/null
+++ b/test/CodeGen/Mips/addi.ll
@@ -0,0 +1,30 @@
+; RUN: llc -march=mipsel -mcpu=mips16 -mips16-hard-float -soft-float -relocation-model=static < %s | FileCheck %s -check-prefix=16
+
+@i = global i32 6, align 4
+@j = global i32 12, align 4
+@k = global i32 15, align 4
+@l = global i32 20, align 4
+@.str = private unnamed_addr constant [13 x i8] c"%i %i %i %i\0A\00", align 1
+
+define void @foo() nounwind {
+entry:
+  %0 = load i32* @i, align 4
+  %add = add nsw i32 %0, 5
+  store i32 %add, i32* @i, align 4
+  %1 = load i32* @j, align 4
+  %sub = sub nsw i32 %1, 5
+  store i32 %sub, i32* @j, align 4
+  %2 = load i32* @k, align 4
+  %add1 = add nsw i32 %2, 10000
+  store i32 %add1, i32* @k, align 4
+  %3 = load i32* @l, align 4
+  %sub2 = sub nsw i32 %3, 10000
+  store i32 %sub2, i32* @l, align 4
+; 16: 	addiu	${{[0-9]+}}, 5	# 16 bit inst
+; 16: 	addiu	${{[0-9]+}}, -5	# 16 bit inst
+; 16: 	addiu	${{[0-9]+}}, 10000
+; 16: 	addiu	${{[0-9]+}}, -10000
+  ret void
+}
+
+
diff --git a/test/CodeGen/Mips/addressing-mode.ll b/test/CodeGen/Mips/addressing-mode.ll
new file mode 100644
index 000000000000..ea76dde82dc3
--- /dev/null
+++ b/test/CodeGen/Mips/addressing-mode.ll
@@ -0,0 +1,41 @@
+; RUN: llc -march=mipsel < %s | FileCheck %s
+
+@g0 = common global i32 0, align 4
+@g1 = common global i32 0, align 4
+
+; Check that LSR doesn't choose a solution with a formula "reg + 4*reg".
+;
+; CHECK:      $BB0_2:
+; CHECK-NOT:  sll ${{[0-9]+}}, ${{[0-9]+}}, 2
+
+define i32 @f0(i32 %n, i32 %m, [256 x i32]* nocapture %a, [256 x i32]* nocapture %b) nounwind readonly {
+entry:
+  br label %for.cond1.preheader
+
+for.cond1.preheader:
+  %s.022 = phi i32 [ 0, %entry ], [ %add7, %for.inc9 ]
+  %i.021 = phi i32 [ 0, %entry ], [ %add10, %for.inc9 ]
+  br label %for.body3
+
+for.body3:
+  %s.120 = phi i32 [ %s.022, %for.cond1.preheader ], [ %add7, %for.body3 ]
+  %j.019 = phi i32 [ 0, %for.cond1.preheader ], [ %add8, %for.body3 ]
+  %arrayidx4 = getelementptr inbounds [256 x i32]* %a, i32 %i.021, i32 %j.019
+  %0 = load i32* %arrayidx4, align 4
+  %arrayidx6 = getelementptr inbounds [256 x i32]* %b, i32 %i.021, i32 %j.019
+  %1 = load i32* %arrayidx6, align 4
+  %add = add i32 %0, %s.120
+  %add7 = add i32 %add, %1
+  %add8 = add nsw i32 %j.019, %m
+  %cmp2 = icmp slt i32 %add8, 64
+  br i1 %cmp2, label %for.body3, label %for.inc9
+
+for.inc9:
+  %add10 = add nsw i32 %i.021, %n
+  %cmp = icmp slt i32 %add10, 64
+  br i1 %cmp, label %for.cond1.preheader, label %for.end11
+
+for.end11:
+  ret i32 %add7
+}
+
diff --git a/test/CodeGen/Mips/align16.ll b/test/CodeGen/Mips/align16.ll
new file mode 100644
index 000000000000..99139abbe848
--- /dev/null
+++ b/test/CodeGen/Mips/align16.ll
@@ -0,0 +1,31 @@
+; RUN: llc -march=mipsel -mcpu=mips16 -relocation-model=static < %s | FileCheck %s -check-prefix=16
+
+@i = global i32 25, align 4
+@.str = private unnamed_addr constant [5 x i8] c"%i \0A\00", align 1
+
+define void @p(i32* %i) nounwind {
+entry:
+  ret void
+}
+
+
+define void @foo() nounwind {
+entry:
+  %y = alloca [512 x i32], align 4
+  %x = alloca i32, align 8
+  %zz = alloca i32, align 4
+  %z = alloca i32, align 4
+  %0 = load i32* @i, align 4
+  %arrayidx = getelementptr inbounds [512 x i32]* %y, i32 0, i32 10
+  store i32 %0, i32* %arrayidx, align 4
+  %1 = load i32* @i, align 4
+  store i32 %1, i32* %x, align 8
+  call void @p(i32* %x)
+  %arrayidx1 = getelementptr inbounds [512 x i32]* %y, i32 0, i32 10
+  call void @p(i32* %arrayidx1)
+  ret void
+}
+; 16:	save	$ra, $s0, $s1, 2040
+; 16:	addiu	$sp, -48 # 16 bit inst
+; 16:	addiu	$sp, 48 # 16 bit inst
+; 16:	restore	$ra,  $s0, $s1, 2040
+\ No newline at end of file
diff --git a/test/CodeGen/Mips/alloca.ll b/test/CodeGen/Mips/alloca.ll
index 29f43c8afa18..d79ea9193d28 100644
--- a/test/CodeGen/Mips/alloca.ll
+++ b/test/CodeGen/Mips/alloca.ll
@@ -3,11 +3,11 @@
 define i32 @twoalloca(i32 %size) nounwind {
 entry:
 ; CHECK: subu  $[[T0:[0-9]+]], $sp, $[[SZ:[0-9]+]]
-; CHECK: addu  $sp, $zero, $[[T0]]
+; CHECK: move  $sp, $[[T0]]
 ; CHECK: subu  $[[T2:[0-9]+]], $sp, $[[SZ]]
-; CHECK: addu  $sp, $zero, $[[T2]]
-; CHECK: addu  $4, $zero, $[[T0]]
-; CHECK: addu  $4, $zero, $[[T2]]
+; CHECK: move  $sp, $[[T2]]
+; CHECK: move  $4, $[[T0]]
+; CHECK: move  $4, $[[T2]]
   %tmp1 = alloca i8, i32 %size, align 4
   %add.ptr = getelementptr inbounds i8* %tmp1, i32 5
   store i8 97, i8* %add.ptr, align 1
@@ -29,7 +29,7 @@ define i32 @alloca2(i32 %size) nounwind {
 entry:
 ; CHECK: alloca2
 ; CHECK: subu  $[[T0:[0-9]+]], $sp
-; CHECK: addu  $sp, $zero, $[[T0]]
+; CHECK: move  $sp, $[[T0]]
 
   %tmp1 = alloca i8, i32 %size, align 4
   %0 = bitcast i8* %tmp1 to i32*
diff --git a/test/CodeGen/Mips/alloca16.ll b/test/CodeGen/Mips/alloca16.ll
index 731edae43cbb..5ae9a847917b 100644
--- a/test/CodeGen/Mips/alloca16.ll
+++ b/test/CodeGen/Mips/alloca16.ll
@@ -68,8 +68,8 @@ entry:
   %21 = load i32** %ip, align 4
   %arrayidx6 = getelementptr inbounds i32* %21, i32 %20
   %22 = load i32* %arrayidx6, align 4
-; 16: 	save	16
+; 16: 	addiu $sp, -16
   call void @temp(i32 %22)
-; 16: 	restore	16
+; 16: 	addiu $sp, 16
   ret void
 }
diff --git a/test/CodeGen/Mips/br-jmp.ll b/test/CodeGen/Mips/br-jmp.ll
index 1b5513ab394d..9ca8d159614f 100644
--- a/test/CodeGen/Mips/br-jmp.ll
+++ b/test/CodeGen/Mips/br-jmp.ll
@@ -1,5 +1,7 @@
 ; RUN: llc -march=mipsel -relocation-model=pic < %s | FileCheck %s -check-prefix=CHECK-PIC
 ; RUN: llc -march=mipsel -relocation-model=static < %s | FileCheck %s -check-prefix=CHECK-STATIC
+; RUN: llc -march=mipsel -mcpu=mips16 -relocation-model=pic < %s | FileCheck %s -check-prefix=CHECK-PIC16
+; RUN: llc -march=mipsel -mcpu=mips16 -relocation-model=static < %s | FileCheck %s -check-prefix=CHECK-STATIC16
 
 define void @count(i32 %x, i32 %y, i32 %z) noreturn nounwind readnone {
 entry:
@@ -11,3 +13,6 @@ bosco:                                            ; preds = %bosco, %entry
 
 ; CHECK-PIC: b	$BB0_1
 ; CHECK-STATIC: j	$BB0_1
+; CHECK-PIC16: b	$BB0_1
+; CHECK-STATIC16: b	$BB0_1
+
diff --git a/test/CodeGen/Mips/brdelayslot.ll b/test/CodeGen/Mips/brdelayslot.ll
index 2fdb736dc886..2deb037c9c39 100644
--- a/test/CodeGen/Mips/brdelayslot.ll
+++ b/test/CodeGen/Mips/brdelayslot.ll
@@ -1,5 +1,12 @@
 ; RUN: llc -march=mipsel -O0 < %s | FileCheck %s -check-prefix=None
 ; RUN: llc -march=mipsel < %s | FileCheck %s -check-prefix=Default
+; RUN: llc -march=mipsel -O1 -relocation-model=static < %s | \
+; RUN: FileCheck %s -check-prefix=STATICO1
+; RUN: llc -march=mipsel -disable-mips-df-forward-search=false \
+; RUN: -relocation-model=static < %s | FileCheck %s -check-prefix=FORWARD
+; RUN: llc -march=mipsel -disable-mips-df-backward-search \
+; RUN: -disable-mips-df-succbb-search=false < %s | \
+; RUN: FileCheck %s -check-prefix=SUCCBB
 
 define void @foo1() nounwind {
 entry:
@@ -35,3 +42,137 @@ entry:
 
 declare void @foo4(double)
 
+@g2 = external global i32
+@g1 = external global i32
+@g3 = external global i32
+
+; Check that branch delay slot can be filled with an instruction with operand
+; $1.
+;
+; Default:     foo5:
+; Default-NOT: nop
+
+define void @foo5(i32 %a) nounwind {
+entry:
+  %0 = load i32* @g2, align 4
+  %tobool = icmp eq i32 %a, 0
+  br i1 %tobool, label %if.else, label %if.then
+
+if.then:
+  %1 = load i32* @g1, align 4
+  %add = add nsw i32 %1, %0
+  store i32 %add, i32* @g1, align 4
+  br label %if.end
+
+if.else:
+  %2 = load i32* @g3, align 4
+  %sub = sub nsw i32 %2, %0
+  store i32 %sub, i32* @g3, align 4
+  br label %if.end
+
+if.end:
+  ret void
+}
+
+; Check that delay slot filler can place mov.s or mov.d in delay slot.
+;
+; Default:     foo6:
+; Default-NOT: nop
+; Default:     .end foo6
+
+define void @foo6(float %a0, double %a1) nounwind {
+entry:
+  tail call void @foo7(double %a1, float %a0) nounwind
+  ret void
+}
+
+declare void @foo7(double, float)
+
+; Check that a store can move past other memory instructions.
+;
+; STATICO1:      foo8:
+; STATICO1:      jalr ${{[0-9]+}}
+; STATICO1-NEXT: sw ${{[0-9]+}}, %lo(g1)
+
+@foo9 = common global void ()* null, align 4
+
+define i32 @foo8(i32 %a) nounwind {
+entry:
+  store i32 %a, i32* @g1, align 4
+  %0 = load void ()** @foo9, align 4
+  tail call void %0() nounwind
+  %1 = load i32* @g1, align 4
+  %add = add nsw i32 %1, %a
+  ret i32 %add
+}
+
+; Test searchForward. Check that the second jal's slot is filled with another
+; instruction in the same block.
+;
+; FORWARD:     foo10:
+; FORWARD:     jal foo11
+; FORWARD:     jal foo11
+; FORWARD-NOT: nop
+; FORWARD:     end foo10
+
+define void @foo10() nounwind {
+entry:
+  tail call void @foo11() nounwind
+  tail call void @foo11() nounwind
+  store i32 0, i32* @g1, align 4
+  tail call void @foo11() nounwind
+  store i32 0, i32* @g1, align 4
+  ret void
+}
+
+declare void @foo11()
+
+; Check that delay slots of branches in both the entry block and loop body are
+; filled.
+;
+; SUCCBB:      succbbs_loop1:
+; SUCCBB:      bne ${{[0-9]+}}, $zero, $BB
+; SUCCBB-NEXT: addiu
+; SUCCBB:      bne ${{[0-9]+}}, $zero, $BB
+; SUCCBB-NEXT: addiu
+
+define i32 @succbbs_loop1(i32* nocapture %a, i32 %n) {
+entry:
+  %cmp4 = icmp sgt i32 %n, 0
+  br i1 %cmp4, label %for.body, label %for.end
+
+for.body:                                         ; preds = %entry, %for.body
+  %s.06 = phi i32 [ %add, %for.body ], [ 0, %entry ]
+  %i.05 = phi i32 [ %inc, %for.body ], [ 0, %entry ]
+  %arrayidx = getelementptr inbounds i32* %a, i32 %i.05
+  %0 = load i32* %arrayidx, align 4
+  %add = add nsw i32 %0, %s.06
+  %inc = add nsw i32 %i.05, 1
+  %exitcond = icmp eq i32 %inc, %n
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:                                          ; preds = %for.body, %entry
+  %s.0.lcssa = phi i32 [ 0, %entry ], [ %add, %for.body ]
+  ret i32 %s.0.lcssa
+}
+
+; Check that the first branch has its slot filled.
+;
+; SUCCBB:      succbbs_br1:
+; SUCCBB:      beq ${{[0-9]+}}, $zero, $BB
+; SUCCBB-NEXT: lw $25, %call16(foo100)
+
+define void @succbbs_br1(i32 %a) {
+entry:
+  %tobool = icmp eq i32 %a, 0
+  br i1 %tobool, label %if.end, label %if.then
+
+if.then:                                          ; preds = %entry
+  tail call void @foo100() #1
+  br label %if.end
+
+if.end:                                           ; preds = %entry, %if.then
+  ret void
+}
+
+declare void @foo100()
diff --git a/test/CodeGen/Mips/cmov.ll b/test/CodeGen/Mips/cmov.ll
index 3af899a4e258..81925a4953ce 100755
--- a/test/CodeGen/Mips/cmov.ll
+++ b/test/CodeGen/Mips/cmov.ll
@@ -59,3 +59,140 @@ entry:
   ret i64 %cond
 }
 
+; slti and conditional move.
+;
+; Check that, pattern
+;  (select (setgt a, N), t, f)
+; turns into
+;  (movz t, (setlt a, N + 1), f)
+; if N + 1 fits in 16-bit.
+
+; O32: slti0:
+; O32: slti $[[R0:[0-9]+]], ${{[0-9]+}}, 32767
+; O32: movz ${{[0-9]+}}, ${{[0-9]+}}, $[[R0]]
+
+define i32 @slti0(i32 %a) {
+entry:
+  %cmp = icmp sgt i32 %a, 32766
+  %cond = select i1 %cmp, i32 3, i32 4
+  ret i32 %cond
+}
+
+; O32: slti1:
+; O32: slt ${{[0-9]+}}
+
+define i32 @slti1(i32 %a) {
+entry:
+  %cmp = icmp sgt i32 %a, 32767
+  %cond = select i1 %cmp, i32 3, i32 4
+  ret i32 %cond
+}
+
+; O32: slti2:
+; O32: slti $[[R0:[0-9]+]], ${{[0-9]+}}, -32768
+; O32: movz ${{[0-9]+}}, ${{[0-9]+}}, $[[R0]]
+
+define i32 @slti2(i32 %a) {
+entry:
+  %cmp = icmp sgt i32 %a, -32769
+  %cond = select i1 %cmp, i32 3, i32 4
+  ret i32 %cond
+}
+
+; O32: slti3:
+; O32: slt ${{[0-9]+}}
+
+define i32 @slti3(i32 %a) {
+entry:
+  %cmp = icmp sgt i32 %a, -32770
+  %cond = select i1 %cmp, i32 3, i32 4
+  ret i32 %cond
+}
+
+; 64-bit patterns.
+
+; N64: slti64_0:
+; N64: slti $[[R0:[0-9]+]], ${{[0-9]+}}, 32767
+; N64: movz ${{[0-9]+}}, ${{[0-9]+}}, $[[R0]]
+
+define i64 @slti64_0(i64 %a) {
+entry:
+  %cmp = icmp sgt i64 %a, 32766
+  %conv = select i1 %cmp, i64 3, i64 4
+  ret i64 %conv
+}
+
+; N64: slti64_1:
+; N64: slt ${{[0-9]+}}
+
+define i64 @slti64_1(i64 %a) {
+entry:
+  %cmp = icmp sgt i64 %a, 32767
+  %conv = select i1 %cmp, i64 3, i64 4
+  ret i64 %conv
+}
+
+; N64: slti64_2:
+; N64: slti $[[R0:[0-9]+]], ${{[0-9]+}}, -32768
+; N64: movz ${{[0-9]+}}, ${{[0-9]+}}, $[[R0]]
+
+define i64 @slti64_2(i64 %a) {
+entry:
+  %cmp = icmp sgt i64 %a, -32769
+  %conv = select i1 %cmp, i64 3, i64 4
+  ret i64 %conv
+}
+
+; N64: slti64_3:
+; N64: slt ${{[0-9]+}}
+
+define i64 @slti64_3(i64 %a) {
+entry:
+  %cmp = icmp sgt i64 %a, -32770
+  %conv = select i1 %cmp, i64 3, i64 4
+  ret i64 %conv
+}
+
+; sltiu instructions.
+
+; O32: sltiu0:
+; O32: sltiu $[[R0:[0-9]+]], ${{[0-9]+}}, 32767
+; O32: movz ${{[0-9]+}}, ${{[0-9]+}}, $[[R0]]
+
+define i32 @sltiu0(i32 %a) {
+entry:
+  %cmp = icmp ugt i32 %a, 32766
+  %cond = select i1 %cmp, i32 3, i32 4
+  ret i32 %cond
+}
+
+; O32: sltiu1:
+; O32: sltu ${{[0-9]+}}
+
+define i32 @sltiu1(i32 %a) {
+entry:
+  %cmp = icmp ugt i32 %a, 32767
+  %cond = select i1 %cmp, i32 3, i32 4
+  ret i32 %cond
+}
+
+; O32: sltiu2:
+; O32: sltiu $[[R0:[0-9]+]], ${{[0-9]+}}, -32768
+; O32: movz ${{[0-9]+}}, ${{[0-9]+}}, $[[R0]]
+
+define i32 @sltiu2(i32 %a) {
+entry:
+  %cmp = icmp ugt i32 %a, -32769
+  %cond = select i1 %cmp, i32 3, i32 4
+  ret i32 %cond
+}
+
+; O32: sltiu3:
+; O32: sltu ${{[0-9]+}}
+
+define i32 @sltiu3(i32 %a) {
+entry:
+  %cmp = icmp ugt i32 %a, -32770
+  %cond = select i1 %cmp, i32 3, i32 4
+  ret i32 %cond
+}
diff --git a/test/CodeGen/Mips/dsp-patterns.ll b/test/CodeGen/Mips/dsp-patterns.ll
new file mode 100644
index 000000000000..0752f69c3e9e
--- /dev/null
+++ b/test/CodeGen/Mips/dsp-patterns.ll
@@ -0,0 +1,31 @@
+; RUN: llc -march=mips -mattr=dsp < %s | FileCheck %s
+
+; CHECK: test_lbux:
+; CHECK: lbux ${{[0-9]+}}
+
+define zeroext i8 @test_lbux(i8* nocapture %b, i32 %i) {
+entry:
+  %add.ptr = getelementptr inbounds i8* %b, i32 %i
+  %0 = load i8* %add.ptr, align 1
+  ret i8 %0
+}
+
+; CHECK: test_lhx:
+; CHECK: lhx ${{[0-9]+}}
+
+define signext i16 @test_lhx(i16* nocapture %b, i32 %i) {
+entry:
+  %add.ptr = getelementptr inbounds i16* %b, i32 %i
+  %0 = load i16* %add.ptr, align 2
+  ret i16 %0
+}
+
+; CHECK: test_lwx:
+; CHECK: lwx ${{[0-9]+}}
+
+define i32 @test_lwx(i32* nocapture %b, i32 %i) {
+entry:
+  %add.ptr = getelementptr inbounds i32* %b, i32 %i
+  %0 = load i32* %add.ptr, align 4
+  ret i32 %0
+}
diff --git a/test/CodeGen/Mips/eh-return32.ll b/test/CodeGen/Mips/eh-return32.ll
new file mode 100644
index 000000000000..c3003b34b162
--- /dev/null
+++ b/test/CodeGen/Mips/eh-return32.ll
@@ -0,0 +1,85 @@
+; RUN: llc -march=mipsel -mcpu=mips32 < %s | FileCheck %s
+
+declare void @llvm.eh.return.i32(i32, i8*)
+declare void @foo(...)
+
+define i8* @f1(i32 %offset, i8* %handler) {
+entry:
+  call void (...)* @foo()
+  call void @llvm.eh.return.i32(i32 %offset, i8* %handler)
+  unreachable
+
+; CHECK:        f1
+; CHECK:        addiu   $sp, $sp, -[[spoffset:[0-9]+]]
+
+; check that $a0-$a3 are saved on stack.
+; CHECK:        sw      $4, [[offset0:[0-9]+]]($sp)
+; CHECK:        sw      $5, [[offset1:[0-9]+]]($sp)
+; CHECK:        sw      $6, [[offset2:[0-9]+]]($sp)
+; CHECK:        sw      $7, [[offset3:[0-9]+]]($sp)
+
+; check that .cfi_offset directives are emitted for $a0-$a3.
+; CHECK:        .cfi_offset 4,
+; CHECK:        .cfi_offset 5,
+; CHECK:        .cfi_offset 6,
+; CHECK:        .cfi_offset 7,
+
+; check that stack adjustment and handler are put in $v1 and $v0.
+; CHECK:        move    $[[R0:[a-z0-9]+]], $5
+; CHECK:        move    $[[R1:[a-z0-9]+]], $4
+; CHECK:        move    $3, $[[R1]]
+; CHECK:        move    $2, $[[R0]]
+
+; check that $a0-$a3 are restored from stack.
+; CHECK:        lw      $4, [[offset0]]($sp)
+; CHECK:        lw      $5, [[offset1]]($sp)
+; CHECK:        lw      $6, [[offset2]]($sp)
+; CHECK:        lw      $7, [[offset3]]($sp)
+
+; check that stack is adjusted by $v1 and that code returns to address in $v0
+; also check that $25 contains handler value
+; CHECK:        addiu   $sp, $sp, [[spoffset]]
+; CHECK:        move    $25, $2
+; CHECK:        move    $ra, $2
+; CHECK:        jr      $ra
+; CHECK:        addu    $sp, $sp, $3
+}
+
+define i8* @f2(i32 %offset, i8* %handler) {
+entry:
+  call void @llvm.eh.return.i32(i32 %offset, i8* %handler)
+  unreachable
+
+; CHECK:        f2
+; CHECK:        addiu   $sp, $sp, -[[spoffset:[0-9]+]]
+
+; check that $a0-$a3 are saved on stack.
+; CHECK:        sw      $4, [[offset0:[0-9]+]]($sp)
+; CHECK:        sw      $5, [[offset1:[0-9]+]]($sp)
+; CHECK:        sw      $6, [[offset2:[0-9]+]]($sp)
+; CHECK:        sw      $7, [[offset3:[0-9]+]]($sp)
+
+; check that .cfi_offset directives are emitted for $a0-$a3.
+; CHECK:        .cfi_offset 4,
+; CHECK:        .cfi_offset 5,
+; CHECK:        .cfi_offset 6,
+; CHECK:        .cfi_offset 7,
+
+; check that stack adjustment and handler are put in $v1 and $v0.
+; CHECK:        move    $3, $4
+; CHECK:        move    $2, $5
+
+; check that $a0-$a3 are restored from stack.
+; CHECK:        lw      $4, [[offset0]]($sp)
+; CHECK:        lw      $5, [[offset1]]($sp)
+; CHECK:        lw      $6, [[offset2]]($sp)
+; CHECK:        lw      $7, [[offset3]]($sp)
+
+; check that stack is adjusted by $v1 and that code returns to address in $v0
+; also check that $25 contains handler value
+; CHECK:        addiu   $sp, $sp, [[spoffset]]
+; CHECK:        move    $25, $2
+; CHECK:        move    $ra, $2
+; CHECK:        jr      $ra
+; CHECK:        addu    $sp, $sp, $3
+}
diff --git a/test/CodeGen/Mips/eh-return64.ll b/test/CodeGen/Mips/eh-return64.ll
new file mode 100644
index 000000000000..373a9a114453
--- /dev/null
+++ b/test/CodeGen/Mips/eh-return64.ll
@@ -0,0 +1,87 @@
+; RUN: llc -march=mips64el -mcpu=mips64 < %s | FileCheck %s
+
+declare void @llvm.eh.return.i64(i64, i8*)
+declare void @foo(...)
+
+define void @f1(i64 %offset, i8* %handler) {
+entry:
+  call void (...)* @foo()
+  call void @llvm.eh.return.i64(i64 %offset, i8* %handler)
+  unreachable
+
+; CHECK:        f1
+; CHECK:        daddiu  $sp, $sp, -[[spoffset:[0-9]+]]
+
+; check that $a0-$a3 are saved on stack.
+; CHECK:        sd      $4, [[offset0:[0-9]+]]($sp)
+; CHECK:        sd      $5, [[offset1:[0-9]+]]($sp)
+; CHECK:        sd      $6, [[offset2:[0-9]+]]($sp)
+; CHECK:        sd      $7, [[offset3:[0-9]+]]($sp)
+
+; check that .cfi_offset directives are emitted for $a0-$a3.
+; CHECK:        .cfi_offset 4,
+; CHECK:        .cfi_offset 5,
+; CHECK:        .cfi_offset 6,
+; CHECK:        .cfi_offset 7,
+
+; check that stack adjustment and handler are put in $v1 and $v0.
+; CHECK:        move    $[[R0:[a-z0-9]+]], $5
+; CHECK:        move    $[[R1:[a-z0-9]+]], $4
+; CHECK:        move    $3, $[[R1]]
+; CHECK:        move    $2, $[[R0]]
+
+; check that $a0-$a3 are restored from stack.
+; CHECK:        ld      $4, [[offset0]]($sp)
+; CHECK:        ld      $5, [[offset1]]($sp)
+; CHECK:        ld      $6, [[offset2]]($sp)
+; CHECK:        ld      $7, [[offset3]]($sp)
+
+; check that stack is adjusted by $v1 and that code returns to address in $v0
+; also check that $25 contains handler value
+; CHECK:        daddiu  $sp, $sp, [[spoffset]]
+; CHECK:        move    $25, $2
+; CHECK:        move    $ra, $2
+; CHECK:        jr      $ra
+; CHECK:        daddu   $sp, $sp, $3
+
+}
+
+define void @f2(i64 %offset, i8* %handler) {
+entry:
+  call void @llvm.eh.return.i64(i64 %offset, i8* %handler)
+  unreachable
+
+; CHECK:        f2
+; CHECK:        daddiu  $sp, $sp, -[[spoffset:[0-9]+]]
+
+; check that $a0-$a3 are saved on stack.
+; CHECK:        sd      $4, [[offset0:[0-9]+]]($sp)
+; CHECK:        sd      $5, [[offset1:[0-9]+]]($sp)
+; CHECK:        sd      $6, [[offset2:[0-9]+]]($sp)
+; CHECK:        sd      $7, [[offset3:[0-9]+]]($sp)
+
+; check that .cfi_offset directives are emitted for $a0-$a3.
+; CHECK:        .cfi_offset 4,
+; CHECK:        .cfi_offset 5,
+; CHECK:        .cfi_offset 6,
+; CHECK:        .cfi_offset 7,
+
+; check that stack adjustment and handler are put in $v1 and $v0.
+; CHECK:        move    $3, $4
+; CHECK:        move    $2, $5
+
+; check that $a0-$a3 are restored from stack.
+; CHECK:        ld      $4, [[offset0]]($sp)
+; CHECK:        ld      $5, [[offset1]]($sp)
+; CHECK:        ld      $6, [[offset2]]($sp)
+; CHECK:        ld      $7, [[offset3]]($sp)
+
+; check that stack is adjusted by $v1 and that code returns to address in $v0
+; also check that $25 contains handler value
+; CHECK:        daddiu  $sp, $sp, [[spoffset]]
+; CHECK:        move    $25, $2
+; CHECK:        move    $ra, $2
+; CHECK:        jr      $ra
+; CHECK:        daddu   $sp, $sp, $3
+
+}
diff --git a/test/CodeGen/Mips/ex2.ll b/test/CodeGen/Mips/ex2.ll
new file mode 100644
index 000000000000..67d19e4b84ca
--- /dev/null
+++ b/test/CodeGen/Mips/ex2.ll
@@ -0,0 +1,29 @@
+; RUN: llc  -march=mipsel -mcpu=mips16 -relocation-model=pic -O3 < %s | FileCheck %s -check-prefix=16
+
+@.str = private unnamed_addr constant [6 x i8] c"hello\00", align 1
+@_ZTIPKc = external constant i8*
+
+define i32 @main() {
+; 16: main:
+; 16: 	.cfi_startproc
+; 16: 	save	$ra, $s0, $s1, 32
+; 16:   .cfi_offset 17, -8
+; 16: 	.cfi_offset 16, -12
+; 16: 	.cfi_offset 31, -4
+entry:
+  %retval = alloca i32, align 4
+  store i32 0, i32* %retval
+  %exception = call i8* @__cxa_allocate_exception(i32 4) nounwind
+  %0 = bitcast i8* %exception to i8**
+  store i8* getelementptr inbounds ([6 x i8]* @.str, i32 0, i32 0), i8** %0
+  call void @__cxa_throw(i8* %exception, i8* bitcast (i8** @_ZTIPKc to i8*), i8* null) noreturn
+  unreachable
+
+return:                                           ; No predecessors!
+  %1 = load i32* %retval
+  ret i32 %1
+}
+
+declare i8* @__cxa_allocate_exception(i32)
+
+declare void @__cxa_throw(i8*, i8*, i8*)
diff --git a/test/CodeGen/Mips/fp16static.ll b/test/CodeGen/Mips/fp16static.ll
new file mode 100644
index 000000000000..240ec75a36b6
--- /dev/null
+++ b/test/CodeGen/Mips/fp16static.ll
@@ -0,0 +1,13 @@
+; RUN: llc -march=mipsel -mcpu=mips16 -mips16-hard-float -soft-float -relocation-model=static < %s | FileCheck %s -check-prefix=CHECK-STATIC16
+
+@x = common global float 0.000000e+00, align 4
+
+define void @foo() nounwind {
+entry:
+  %0 = load float* @x, align 4
+  %1 = load float* @x, align 4
+  %mul = fmul float %0, %1
+  store float %mul, float* @x, align 4
+; CHECK-STATIC16: jal	__mips16_mulsf3
+  ret void
+}
diff --git a/test/CodeGen/Mips/frame-address.ll b/test/CodeGen/Mips/frame-address.ll
index 9df1808fde53..92946d9ffd68 100644
--- a/test/CodeGen/Mips/frame-address.ll
+++ b/test/CodeGen/Mips/frame-address.ll
@@ -7,6 +7,6 @@ entry:
   %0 = call i8* @llvm.frameaddress(i32 0)
   ret i8* %0
 
-; CHECK:   addu    $fp, $sp, $zero
-; CHECK:   addu    $2, $zero, $fp
+; CHECK:   move    $fp, $sp
+; CHECK:   move    $2, $fp
 }
diff --git a/test/CodeGen/Mips/gpreg-lazy-binding.ll b/test/CodeGen/Mips/gpreg-lazy-binding.ll
new file mode 100644
index 000000000000..88e596b3bb0d
--- /dev/null
+++ b/test/CodeGen/Mips/gpreg-lazy-binding.ll
@@ -0,0 +1,27 @@
+; RUN: llc -march=mipsel -disable-mips-delay-filler < %s | FileCheck %s 
+
+@g = external global i32
+
+; CHECK:     move  $gp
+; CHECK:     jalr  $25
+; CHECK:     nop
+; CHECK-NOT: move  $gp
+; CHECK:     jalr  $25
+
+define void @f0() nounwind {
+entry:
+  tail call void @externalFunc() nounwind
+  tail call fastcc void @internalFunc()
+  ret void
+}
+
+declare void @externalFunc()
+
+define internal fastcc void @internalFunc() nounwind noinline {
+entry:
+  %0 = load i32* @g, align 4
+  %inc = add nsw i32 %0, 1
+  store i32 %inc, i32* @g, align 4
+  ret void
+}
+
diff --git a/test/CodeGen/Mips/helloworld.ll b/test/CodeGen/Mips/helloworld.ll
index aee58b650e7a..56ee60785f46 100644
--- a/test/CodeGen/Mips/helloworld.ll
+++ b/test/CodeGen/Mips/helloworld.ll
@@ -1,9 +1,11 @@
 ; RUN: llc  -march=mipsel -mcpu=mips16 -relocation-model=pic -O3 < %s | FileCheck %s -check-prefix=C1
 ; RUN: llc  -march=mipsel -mcpu=mips16 -relocation-model=pic -O3 < %s | FileCheck %s -check-prefix=C2
 ; RUN: llc  -march=mipsel -mcpu=mips16 -relocation-model=pic -O3 < %s | FileCheck %s -check-prefix=PE
+; RUN: llc  -march=mipsel -mcpu=mips16 -relocation-model=static -O3 < %s | FileCheck %s -check-prefix=ST1
+; RUN: llc  -march=mipsel -mcpu=mips16 -relocation-model=static -O3 < %s | FileCheck %s -check-prefix=ST2
 ;
-; re-enable this when mips16's jalr is fixed.
-; DISABLED: llc  -march=mipsel -mcpu=mips16 -relocation-model=pic -O3 < %s | FileCheck %s -check-prefix=SR
+; RUN: llc  -march=mipsel -mcpu=mips16 -relocation-model=pic -O3 < %s | FileCheck %s -check-prefix=SR
+; RUN: llc  -march=mipsel -mcpu=mips32  -relocation-model=pic -O3 < %s | FileCheck %s -check-prefix=SR32
 
 
 @.str = private unnamed_addr constant [13 x i8] c"hello world\0A\00", align 1
@@ -15,7 +17,15 @@ entry:
 
 ; SR: 	.set	mips16                  # @main
 
-; SR:	save 	$ra, [[FS:[0-9]+]]
+; SR32: .set nomips16
+; SR32: .ent main
+; SR-NOT:  .set noreorder
+; SR-NOT:  .set nomacro
+; SR-NOT:  .set noat
+; SR32:  .set noreorder
+; SR32:  .set nomacro
+; SR32:  .set noat
+; SR:	save 	$ra, $s0, $s1, [[FS:[0-9]+]]
 ; PE:	li	$[[T1:[0-9]+]], %hi(_gp_disp)
 ; PE: 	addiu	$[[T2:[0-9]+]], $pc, %lo(_gp_disp)
 ; PE:	sll	$[[T3:[0-9]+]], $[[T1]], 16
@@ -25,10 +35,23 @@ entry:
 ; C2:	move	$25, ${{[0-9]+}}
 ; C1:	move 	$gp, ${{[0-9]+}}
 ; C1:	jalrc 	${{[0-9]+}}
-; SR:	restore 	$ra, [[FS]]
+; SR:	restore 	$ra, $s0, $s1, [[FS]]
 ; PE:	li	$2, 0
 ; PE:	jrc 	$ra
 
+; ST1:  li	${{[0-9]+}}, %hi($.str)
+; ST1:  sll     ${{[0-9]+}}, ${{[0-9]+}}, 16
+; ST1:	addiu	${{[0-9]+}}, %lo($.str)
+; ST2:  li	${{[0-9]+}}, %hi($.str)
+; ST2:  jal     printf
 }
 
+;  SR-NOT:  .set at
+;  SR-NOT:  .set macro
+;  SR-NOT:  .set reorder
+;  SR32:  .set at
+;  SR32:  .set macro
+;  SR32:  .set reorder
+; SR:   .end main
+; SR32:   .end main
 declare i32 @printf(i8*, ...)
diff --git a/test/CodeGen/Mips/hf16_1.ll b/test/CodeGen/Mips/hf16_1.ll
new file mode 100644
index 000000000000..c7454ee0a8dd
--- /dev/null
+++ b/test/CodeGen/Mips/hf16_1.ll
@@ -0,0 +1,256 @@
+; RUN: llc  -march=mipsel -mcpu=mips16 -relocation-model=pic -soft-float -mips16-hard-float -O3 < %s | FileCheck %s -check-prefix=1
+; RUN: llc  -march=mipsel -mcpu=mips16 -relocation-model=pic -soft-float -mips16-hard-float -O3 < %s | FileCheck %s -check-prefix=2
+
+
+@x = common global float 0.000000e+00, align 4
+@xd = common global double 0.000000e+00, align 8
+@y = common global float 0.000000e+00, align 4
+@yd = common global double 0.000000e+00, align 8
+@xy = common global { float, float } zeroinitializer, align 4
+@xyd = common global { double, double } zeroinitializer, align 8
+
+define void @foo() nounwind {
+entry:
+  %0 = load float* @x, align 4
+  call void @v_sf(float %0)
+  %1 = load double* @xd, align 8
+  call void @v_df(double %1)
+  %2 = load float* @x, align 4
+  %3 = load float* @y, align 4
+  call void @v_sf_sf(float %2, float %3)
+  %4 = load double* @xd, align 8
+  %5 = load float* @x, align 4
+  call void @v_df_sf(double %4, float %5)
+  %6 = load double* @xd, align 8
+  %7 = load double* @yd, align 8
+  call void @v_df_df(double %6, double %7)
+  %call = call float @sf_v()
+  %8 = load float* @x, align 4
+  %call1 = call float @sf_sf(float %8)
+  %9 = load double* @xd, align 8
+  %call2 = call float @sf_df(double %9)
+  %10 = load float* @x, align 4
+  %11 = load float* @y, align 4
+  %call3 = call float @sf_sf_sf(float %10, float %11)
+  %12 = load double* @xd, align 8
+  %13 = load float* @x, align 4
+  %call4 = call float @sf_df_sf(double %12, float %13)
+  %14 = load double* @xd, align 8
+  %15 = load double* @yd, align 8
+  %call5 = call float @sf_df_df(double %14, double %15)
+  %call6 = call double @df_v()
+  %16 = load float* @x, align 4
+  %call7 = call double @df_sf(float %16)
+  %17 = load double* @xd, align 8
+  %call8 = call double @df_df(double %17)
+  %18 = load float* @x, align 4
+  %19 = load float* @y, align 4
+  %call9 = call double @df_sf_sf(float %18, float %19)
+  %20 = load double* @xd, align 8
+  %21 = load float* @x, align 4
+  %call10 = call double @df_df_sf(double %20, float %21)
+  %22 = load double* @xd, align 8
+  %23 = load double* @yd, align 8
+  %call11 = call double @df_df_df(double %22, double %23)
+  %call12 = call { float, float } @sc_v()
+  %24 = extractvalue { float, float } %call12, 0
+  %25 = extractvalue { float, float } %call12, 1
+  %26 = load float* @x, align 4
+  %call13 = call { float, float } @sc_sf(float %26)
+  %27 = extractvalue { float, float } %call13, 0
+  %28 = extractvalue { float, float } %call13, 1
+  %29 = load double* @xd, align 8
+  %call14 = call { float, float } @sc_df(double %29)
+  %30 = extractvalue { float, float } %call14, 0
+  %31 = extractvalue { float, float } %call14, 1
+  %32 = load float* @x, align 4
+  %33 = load float* @y, align 4
+  %call15 = call { float, float } @sc_sf_sf(float %32, float %33)
+  %34 = extractvalue { float, float } %call15, 0
+  %35 = extractvalue { float, float } %call15, 1
+  %36 = load double* @xd, align 8
+  %37 = load float* @x, align 4
+  %call16 = call { float, float } @sc_df_sf(double %36, float %37)
+  %38 = extractvalue { float, float } %call16, 0
+  %39 = extractvalue { float, float } %call16, 1
+  %40 = load double* @xd, align 8
+  %41 = load double* @yd, align 8
+  %call17 = call { float, float } @sc_df_df(double %40, double %41)
+  %42 = extractvalue { float, float } %call17, 0
+  %43 = extractvalue { float, float } %call17, 1
+  %call18 = call { double, double } @dc_v()
+  %44 = extractvalue { double, double } %call18, 0
+  %45 = extractvalue { double, double } %call18, 1
+  %46 = load float* @x, align 4
+  %call19 = call { double, double } @dc_sf(float %46)
+  %47 = extractvalue { double, double } %call19, 0
+  %48 = extractvalue { double, double } %call19, 1
+  %49 = load double* @xd, align 8
+  %call20 = call { double, double } @dc_df(double %49)
+  %50 = extractvalue { double, double } %call20, 0
+  %51 = extractvalue { double, double } %call20, 1
+  %52 = load float* @x, align 4
+  %53 = load float* @y, align 4
+  %call21 = call { double, double } @dc_sf_sf(float %52, float %53)
+  %54 = extractvalue { double, double } %call21, 0
+  %55 = extractvalue { double, double } %call21, 1
+  %56 = load double* @xd, align 8
+  %57 = load float* @x, align 4
+  %call22 = call { double, double } @dc_df_sf(double %56, float %57)
+  %58 = extractvalue { double, double } %call22, 0
+  %59 = extractvalue { double, double } %call22, 1
+  %60 = load double* @xd, align 8
+  %61 = load double* @yd, align 8
+  %call23 = call { double, double } @dc_df_df(double %60, double %61)
+  %62 = extractvalue { double, double } %call23, 0
+  %63 = extractvalue { double, double } %call23, 1
+  ret void
+}
+
+declare void @v_sf(float)
+
+declare void @v_df(double)
+
+declare void @v_sf_sf(float, float)
+
+declare void @v_df_sf(double, float)
+
+declare void @v_df_df(double, double)
+
+declare float @sf_v()
+
+declare float @sf_sf(float)
+
+declare float @sf_df(double)
+
+declare float @sf_sf_sf(float, float)
+
+declare float @sf_df_sf(double, float)
+
+declare float @sf_df_df(double, double)
+
+declare double @df_v()
+
+declare double @df_sf(float)
+
+declare double @df_df(double)
+
+declare double @df_sf_sf(float, float)
+
+declare double @df_df_sf(double, float)
+
+declare double @df_df_df(double, double)
+
+declare { float, float } @sc_v()
+
+declare { float, float } @sc_sf(float)
+
+declare { float, float } @sc_df(double)
+
+declare { float, float } @sc_sf_sf(float, float)
+
+declare { float, float } @sc_df_sf(double, float)
+
+declare { float, float } @sc_df_df(double, double)
+
+declare { double, double } @dc_v()
+
+declare { double, double } @dc_sf(float)
+
+declare { double, double } @dc_df(double)
+
+declare { double, double } @dc_sf_sf(float, float)
+
+declare { double, double } @dc_df_sf(double, float)
+
+declare { double, double } @dc_df_df(double, double)
+
+; 1:	lw	${{[0-9]+}}, %got(__mips16_call_stub_1)(${{[0-9]+}})
+; 2:	lw	${{[0-9]+}}, %call16(v_sf)(${{[0-9]+}})
+
+; 1:	lw	${{[0-9]+}}, %got(__mips16_call_stub_2)(${{[0-9]+}})
+; 2:	lw	${{[0-9]+}}, %call16(v_df)(${{[0-9]+}})
+
+; 1:	lw	${{[0-9]+}}, %got(__mips16_call_stub_5)(${{[0-9]+}})
+; 2:	lw	${{[0-9]+}}, %call16(v_sf_sf)(${{[0-9]+}})
+
+; 1:	lw	${{[0-9]+}}, %got(__mips16_call_stub_6)(${{[0-9]+}})
+; 2:	lw	${{[0-9]+}}, %call16(v_df_sf)(${{[0-9]+}})
+
+; 1:	lw	${{[0-9]+}}, %got(__mips16_call_stub_10)(${{[0-9]+}})
+; 2:	lw	${{[0-9]+}}, %call16(v_df_df)(${{[0-9]+}})
+
+; 1:	lw	${{[0-9]+}}, %got(__mips16_call_stub_sf_0)(${{[0-9]+}})
+; 2:	lw	${{[0-9]+}}, %call16(sf_v)(${{[0-9]+}})
+
+; 1:	lw	${{[0-9]+}}, %got(__mips16_call_stub_sf_1)(${{[0-9]+}})
+; 2:	lw	${{[0-9]+}}, %call16(sf_sf)(${{[0-9]+}})
+
+; 1:	lw	${{[0-9]+}}, %got(__mips16_call_stub_sf_2)(${{[0-9]+}})
+; 2:	lw	${{[0-9]+}}, %call16(sf_df)(${{[0-9]+}})
+
+; 1:	lw	${{[0-9]+}}, %got(__mips16_call_stub_sf_5)(${{[0-9]+}})
+; 2:	lw	${{[0-9]+}}, %call16(sf_sf_sf)(${{[0-9]+}})
+
+; 1:	lw	${{[0-9]+}}, %got(__mips16_call_stub_sf_6)(${{[0-9]+}})
+; 2:	lw	${{[0-9]+}}, %call16(sf_df_sf)(${{[0-9]+}})
+
+; 1:	lw	${{[0-9]+}}, %got(__mips16_call_stub_sf_10)(${{[0-9]+}})
+; 2:	lw	${{[0-9]+}}, %call16(sf_df_df)(${{[0-9]+}})
+
+; 1:	lw	${{[0-9]+}}, %got(__mips16_call_stub_df_0)(${{[0-9]+}})
+; 2:	lw	${{[0-9]+}}, %call16(df_v)(${{[0-9]+}})
+
+; 1:	lw	${{[0-9]+}}, %got(__mips16_call_stub_df_1)(${{[0-9]+}})
+; 2:	lw	${{[0-9]+}}, %call16(df_sf)(${{[0-9]+}})
+
+; 1:	lw	${{[0-9]+}}, %got(__mips16_call_stub_df_2)(${{[0-9]+}})
+; 2:	lw	${{[0-9]+}}, %call16(df_df)(${{[0-9]+}})
+
+; 1:	lw	${{[0-9]+}}, %got(__mips16_call_stub_df_5)(${{[0-9]+}})
+; 2:	lw	${{[0-9]+}}, %call16(df_sf_sf)(${{[0-9]+}})
+
+; 1:	lw	${{[0-9]+}}, %got(__mips16_call_stub_df_6)(${{[0-9]+}})
+; 2:	lw	${{[0-9]+}}, %call16(df_df_sf)(${{[0-9]+}})
+
+; 1:	lw	${{[0-9]+}}, %got(__mips16_call_stub_df_10)(${{[0-9]+}})
+; 2:	lw	${{[0-9]+}}, %call16(df_df_df)(${{[0-9]+}})
+
+; 1:	lw	${{[0-9]+}}, %got(__mips16_call_stub_sc_0)(${{[0-9]+}})
+; 2:	lw	${{[0-9]+}}, %call16(sc_v)(${{[0-9]+}})
+
+; 1:	lw	${{[0-9]+}}, %got(__mips16_call_stub_sc_1)(${{[0-9]+}})
+; 2:	lw	${{[0-9]+}}, %call16(sc_sf)(${{[0-9]+}})
+
+; 1:	lw	${{[0-9]+}}, %got(__mips16_call_stub_sc_2)(${{[0-9]+}})
+; 2:	lw	${{[0-9]+}}, %call16(sc_df)(${{[0-9]+}})
+
+; 1:	lw	${{[0-9]+}}, %got(__mips16_call_stub_sc_5)(${{[0-9]+}})
+; 2:	lw	${{[0-9]+}}, %call16(sc_sf_sf)(${{[0-9]+}})
+
+; 1:	lw	${{[0-9]+}}, %got(__mips16_call_stub_sc_6)(${{[0-9]+}})
+; 2:	lw	${{[0-9]+}}, %call16(sc_df_sf)(${{[0-9]+}})
+
+; 1:	lw	${{[0-9]+}}, %got(__mips16_call_stub_sc_10)(${{[0-9]+}})
+; 2:	lw	${{[0-9]+}}, %call16(sc_df_df)(${{[0-9]+}})
+
+; 1:	lw	${{[0-9]+}}, %got(__mips16_call_stub_dc_0)(${{[0-9]+}})
+; 2:	lw	${{[0-9]+}}, %call16(dc_v)(${{[0-9]+}})
+
+; 1:	lw	${{[0-9]+}}, %got(__mips16_call_stub_dc_1)(${{[0-9]+}})
+; 2:	lw	${{[0-9]+}}, %call16(dc_sf)(${{[0-9]+}})
+
+; 1:	lw	${{[0-9]+}}, %got(__mips16_call_stub_dc_2)(${{[0-9]+}})
+; 2:	lw	${{[0-9]+}}, %call16(dc_df)(${{[0-9]+}})
+
+; 1:	lw	${{[0-9]+}}, %got(__mips16_call_stub_dc_5)(${{[0-9]+}})
+; 2:	lw	${{[0-9]+}}, %call16(dc_sf_sf)(${{[0-9]+}})
+
+; 1:	lw	${{[0-9]+}}, %got(__mips16_call_stub_dc_6)(${{[0-9]+}})
+; 2:	lw	${{[0-9]+}}, %call16(dc_df_sf)(${{[0-9]+}})
+
+; 1:	lw	${{[0-9]+}}, %got(__mips16_call_stub_dc_10)(${{[0-9]+}})
+; 2:	lw	${{[0-9]+}}, %call16(dc_df_df)(${{[0-9]+}})
+
+
+
diff --git a/test/CodeGen/Mips/i64arg.ll b/test/CodeGen/Mips/i64arg.ll
index 8b1f71b69f19..704014cba010 100644
--- a/test/CodeGen/Mips/i64arg.ll
+++ b/test/CodeGen/Mips/i64arg.ll
@@ -2,8 +2,8 @@
 
 define void @f1(i64 %ll1, float %f, i64 %ll, i32 %i, float %f2) nounwind {
 entry:
-; CHECK: addu $[[R1:[0-9]+]], $zero, $5
-; CHECK: addu $[[R0:[0-9]+]], $zero, $4
+; CHECK: move $[[R1:[0-9]+]], $5
+; CHECK: move $[[R0:[0-9]+]], $4
 ; CHECK: ori $6, ${{[0-9]+}}, 3855
 ; CHECK: ori $7, ${{[0-9]+}}, 22136
 ; CHECK: lw  $25, %call16(ff1)
@@ -12,16 +12,16 @@ entry:
 ; CHECK: lw $25, %call16(ff2)
 ; CHECK: lw $[[R2:[0-9]+]], 80($sp)
 ; CHECK: lw $[[R3:[0-9]+]], 84($sp)
-; CHECK: addu $4, $zero, $[[R2]]
-; CHECK: addu $5, $zero, $[[R3]]
+; CHECK: move $4, $[[R2]]
+; CHECK: move $5, $[[R3]]
 ; CHECK: jalr $25
   tail call void @ff2(i64 %ll, double 3.000000e+00) nounwind
   %sub = add nsw i32 %i, -1
+; CHECK: lw $25, %call16(ff3)
 ; CHECK: sw $[[R1]], 28($sp)
 ; CHECK: sw $[[R0]], 24($sp)
-; CHECK: lw $25, %call16(ff3)
-; CHECK: addu $6, $zero, $[[R2]]
-; CHECK: addu $7, $zero, $[[R3]]
+; CHECK: move $6, $[[R2]]
+; CHECK: move $7, $[[R3]]
 ; CHECK: jalr $25
   tail call void @ff3(i32 %i, i64 %ll, i32 %sub, i64 %ll1) nounwind
   ret void
diff --git a/test/CodeGen/Mips/inlineasm_constraint.ll b/test/CodeGen/Mips/inlineasm_constraint.ll
index 5adec3bb29ea..8d30f45d84e3 100644
--- a/test/CodeGen/Mips/inlineasm_constraint.ll
+++ b/test/CodeGen/Mips/inlineasm_constraint.ll
@@ -51,5 +51,14 @@ entry:
 ; CHECK: #NO_APP	
   tail call i32 asm sideeffect "addi $0,$1,$2", "=r,r,P"(i32 7, i32 65535) nounwind
 
+; Now R Which takes the address of c
+  %c = alloca i32, align 4
+  store i32 -4469539, i32* %c, align 4
+  %8 = call i32 asm sideeffect "lwl $0, 1 + $1\0A\09lwr $0, 2 + $1\0A\09", "=r,*R"(i32* %c) #1
+; CHECK: #APP
+; CHECK: lwl ${{[0-9]+}}, 1 + 0(${{[0-9]+}})
+; CHECK: lwr ${{[0-9]+}}, 2 + 0(${{[0-9]+}})
+; CHECK: #NO_APP	
+
   ret i32 0
 }
diff --git a/test/CodeGen/Mips/jtstat.ll b/test/CodeGen/Mips/jtstat.ll
new file mode 100644
index 000000000000..01afc080c2ed
--- /dev/null
+++ b/test/CodeGen/Mips/jtstat.ll
@@ -0,0 +1,71 @@
+; RUN: llc -march=mipsel -mcpu=mips16 -relocation-model=static < %s | FileCheck %s -check-prefix=CHECK-STATIC16
+
+@s = global i8 115, align 1
+@c = common global i8 0, align 1
+@.str = private unnamed_addr constant [5 x i8] c"%c \0A\00", align 1
+
+define void @test(i32 %i) nounwind {
+entry:
+  %i.addr = alloca i32, align 4
+  store i32 %i, i32* %i.addr, align 4
+  %0 = load i32* %i.addr, align 4
+  switch i32 %0, label %sw.epilog [
+    i32 115, label %sw.bb
+    i32 105, label %sw.bb1
+    i32 100, label %sw.bb2
+    i32 108, label %sw.bb3
+    i32 99, label %sw.bb4
+    i32 68, label %sw.bb5
+    i32 81, label %sw.bb6
+    i32 76, label %sw.bb7
+  ]
+
+sw.bb:                                            ; preds = %entry
+  store i8 115, i8* @c, align 1
+  br label %sw.epilog
+
+sw.bb1:                                           ; preds = %entry
+  store i8 105, i8* @c, align 1
+  br label %sw.epilog
+
+sw.bb2:                                           ; preds = %entry
+  store i8 100, i8* @c, align 1
+  br label %sw.epilog
+
+sw.bb3:                                           ; preds = %entry
+  store i8 108, i8* @c, align 1
+  br label %sw.epilog
+
+sw.bb4:                                           ; preds = %entry
+  store i8 99, i8* @c, align 1
+  br label %sw.epilog
+
+sw.bb5:                                           ; preds = %entry
+  store i8 68, i8* @c, align 1
+  br label %sw.epilog
+
+sw.bb6:                                           ; preds = %entry
+  store i8 81, i8* @c, align 1
+  br label %sw.epilog
+
+sw.bb7:                                           ; preds = %entry
+  store i8 76, i8* @c, align 1
+  br label %sw.epilog
+
+sw.epilog:                                        ; preds = %entry, %sw.bb7, %sw.bb6, %sw.bb5, %sw.bb4, %sw.bb3, %sw.bb2, %sw.bb1, %sw.bb
+  ret void
+}
+
+; CHECK-STATIC16: li	${{[0-9]+}}, %hi($JTI{{[0-9]+}}_{{[0-9]+}})
+; CHECK-STATIC16: lw	${{[0-9]+}}, %lo($JTI{{[0-9]+}}_{{[0-9]+}})(${{[0-9]+}})
+; CHECK-STATIC16: $JTI{{[0-9]+}}_{{[0-9]+}}:
+; CHECK-STATIC16: .4byte ($BB0_{{[0-9]+}})
+; CHECK-STATIC16: .4byte ($BB0_{{[0-9]+}})
+; CHECK-STATIC16: .4byte ($BB0_{{[0-9]+}})
+; CHECK-STATIC16: .4byte ($BB0_{{[0-9]+}})
+; CHECK-STATIC16: .4byte ($BB0_{{[0-9]+}})
+; CHECK-STATIC16: .4byte ($BB0_{{[0-9]+}})
+; CHECK-STATIC16: .4byte ($BB0_{{[0-9]+}})
+; CHECK-STATIC16: .4byte ($BB0_{{[0-9]+}})
+; CHECK-STATIC16: .4byte ($BB0_{{[0-9]+}})
+; CHECK-STATIC16: .4byte ($BB0_{{[0-9]+}})
diff --git a/test/CodeGen/Mips/largefr1.ll b/test/CodeGen/Mips/largefr1.ll
new file mode 100644
index 000000000000..0fe89f71d9f3
--- /dev/null
+++ b/test/CodeGen/Mips/largefr1.ll
@@ -0,0 +1,61 @@
+; RUN: llc -march=mipsel -mcpu=mips16 -mips16-hard-float -soft-float -relocation-model=static < %s | FileCheck %s -check-prefix=1
+
+@i = common global i32 0, align 4
+@j = common global i32 0, align 4
+@.str = private unnamed_addr constant [8 x i8] c"%i %i \0A\00", align 1
+
+define void @foo(i32* %p, i32 %i, i32 %j) nounwind {
+entry:
+  %p.addr = alloca i32*, align 4
+  %i.addr = alloca i32, align 4
+  %j.addr = alloca i32, align 4
+  store i32* %p, i32** %p.addr, align 4
+  store i32 %i, i32* %i.addr, align 4
+  store i32 %j, i32* %j.addr, align 4
+  %0 = load i32* %j.addr, align 4
+  %1 = load i32** %p.addr, align 4
+  %2 = load i32* %i.addr, align 4
+  %add.ptr = getelementptr inbounds i32* %1, i32 %2
+  store i32 %0, i32* %add.ptr, align 4
+  ret void
+}
+
+define i32 @main() nounwind {
+entry:
+; 1: main: 
+; 1: 1: 	.word	-797992
+; 1:            li ${{[0-9]+}}, 12
+; 1:            sll ${{[0-9]+}}, ${{[0-9]+}}, 16
+; 1:            addu ${{[0-9]+}}, ${{[0-9]+}}, ${{[0-9]+}}
+; 2:            move $sp, ${{[0-9]+}}
+; 2:            addu ${{[0-9]+}}, ${{[0-9]+}}, ${{[0-9]+}}
+; 1:            li ${{[0-9]+}}, 6
+; 1:            sll ${{[0-9]+}}, ${{[0-9]+}}, 16
+; 1:            addu ${{[0-9]+}}, ${{[0-9]+}}, ${{[0-9]+}}
+; 2:            move $sp, ${{[0-9]+}}
+; 2:            addu ${{[0-9]+}}, ${{[0-9]+}}, ${{[0-9]+}}
+; 1:          	addiu	${{[0-9]+}}, ${{[0-9]+}}, 6800
+; 1: 	        li	${{[0-9]+}}, 1
+; 1:	        sll	${{[0-9]+}}, ${{[0-9]+}}, 16
+; 2: 	        li	${{[0-9]+}}, 34463
+  %retval = alloca i32, align 4
+  %one = alloca [100000 x i32], align 4
+  %two = alloca [100000 x i32], align 4
+  store i32 0, i32* %retval
+  %arrayidx = getelementptr inbounds [100000 x i32]* %one, i32 0, i32 0
+  call void @foo(i32* %arrayidx, i32 50, i32 9999)
+  %arrayidx1 = getelementptr inbounds [100000 x i32]* %two, i32 0, i32 0
+  call void @foo(i32* %arrayidx1, i32 99999, i32 5555)
+  %arrayidx2 = getelementptr inbounds [100000 x i32]* %one, i32 0, i32 50
+  %0 = load i32* %arrayidx2, align 4
+  store i32 %0, i32* @i, align 4
+  %arrayidx3 = getelementptr inbounds [100000 x i32]* %two, i32 0, i32 99999
+  %1 = load i32* %arrayidx3, align 4
+  store i32 %1, i32* @j, align 4
+  %2 = load i32* @i, align 4
+  %3 = load i32* @j, align 4
+  %call = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([8 x i8]* @.str, i32 0, i32 0), i32 %2, i32 %3)
+  ret i32 0
+}
+
+declare i32 @printf(i8*, ...)
diff --git a/test/CodeGen/Mips/lit.local.cfg b/test/CodeGen/Mips/lit.local.cfg
index 0587d3243e6b..e157c540b538 100644
--- a/test/CodeGen/Mips/lit.local.cfg
+++ b/test/CodeGen/Mips/lit.local.cfg
@@ -1,4 +1,4 @@
-config.suffixes = ['.ll', '.c', '.cpp']
+config.suffixes = ['.ll', '.c', '.cpp', '.test']
 
 targets = set(config.root.targets_to_build.split())
 if not 'Mips' in targets:
diff --git a/test/CodeGen/Mips/madd-msub.ll b/test/CodeGen/Mips/madd-msub.ll
index 0aeabb30e289..0dbb2c27b8f9 100644
--- a/test/CodeGen/Mips/madd-msub.ll
+++ b/test/CodeGen/Mips/madd-msub.ll
@@ -1,6 +1,9 @@
-; RUN: llc -march=mips < %s | FileCheck %s
+; RUN: llc -march=mips < %s | FileCheck %s -check-prefix=32
+; RUN: llc -march=mips -mattr=dsp < %s | FileCheck %s -check-prefix=DSP
+; RUN: llc -march=mips -mcpu=mips16 < %s
 
-; CHECK: madd 
+; 32: madd ${{[0-9]+}}
+; DSP: madd $ac
 define i64 @madd1(i32 %a, i32 %b, i32 %c) nounwind readnone {
 entry:
   %conv = sext i32 %a to i64
@@ -11,7 +14,8 @@ entry:
   ret i64 %add
 }
 
-; CHECK: maddu
+; 32: maddu ${{[0-9]+}}
+; DSP: maddu $ac
 define i64 @madd2(i32 %a, i32 %b, i32 %c) nounwind readnone {
 entry:
   %conv = zext i32 %a to i64
@@ -22,7 +26,8 @@ entry:
   ret i64 %add
 }
 
-; CHECK: madd
+; 32: madd ${{[0-9]+}}
+; DSP: madd $ac
 define i64 @madd3(i32 %a, i32 %b, i64 %c) nounwind readnone {
 entry:
   %conv = sext i32 %a to i64
@@ -32,7 +37,8 @@ entry:
   ret i64 %add
 }
 
-; CHECK: msub
+; 32: msub ${{[0-9]+}}
+; DSP: msub $ac
 define i64 @msub1(i32 %a, i32 %b, i32 %c) nounwind readnone {
 entry:
   %conv = sext i32 %c to i64
@@ -43,7 +49,8 @@ entry:
   ret i64 %sub
 }
 
-; CHECK: msubu 
+; 32: msubu ${{[0-9]+}}
+; DSP: msubu $ac
 define i64 @msub2(i32 %a, i32 %b, i32 %c) nounwind readnone {
 entry:
   %conv = zext i32 %c to i64
@@ -54,7 +61,8 @@ entry:
   ret i64 %sub
 }
 
-; CHECK: msub 
+; 32: msub ${{[0-9]+}}
+; DSP: msub $ac
 define i64 @msub3(i32 %a, i32 %b, i64 %c) nounwind readnone {
 entry:
   %conv = sext i32 %a to i64
diff --git a/test/CodeGen/Mips/mips16ex.ll b/test/CodeGen/Mips/mips16ex.ll
new file mode 100644
index 000000000000..ecb30b5c63b8
--- /dev/null
+++ b/test/CodeGen/Mips/mips16ex.ll
@@ -0,0 +1,87 @@
+; RUN: llc  -march=mipsel -mcpu=mips16 -relocation-model=pic -O3 < %s | FileCheck %s -check-prefix=16
+
+;16: $eh_func_begin0=.
+@.str = private unnamed_addr constant [7 x i8] c"hello\0A\00", align 1
+@_ZTIi = external constant i8*
+@.str1 = private unnamed_addr constant [15 x i8] c"exception %i \0A\00", align 1
+
+define i32 @main() {
+entry:
+  %retval = alloca i32, align 4
+  %exn.slot = alloca i8*
+  %ehselector.slot = alloca i32
+  %e = alloca i32, align 4
+  store i32 0, i32* %retval
+  %call = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([7 x i8]* @.str, i32 0, i32 0))
+  %exception = call i8* @__cxa_allocate_exception(i32 4) nounwind
+  %0 = bitcast i8* %exception to i32*
+  store i32 20, i32* %0
+  invoke void @__cxa_throw(i8* %exception, i8* bitcast (i8** @_ZTIi to i8*), i8* null) noreturn
+          to label %unreachable unwind label %lpad
+
+lpad:                                             ; preds = %entry
+  %1 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*)
+          catch i8* bitcast (i8** @_ZTIi to i8*)
+  %2 = extractvalue { i8*, i32 } %1, 0
+  store i8* %2, i8** %exn.slot
+  %3 = extractvalue { i8*, i32 } %1, 1
+  store i32 %3, i32* %ehselector.slot
+  br label %catch.dispatch
+
+catch.dispatch:                                   ; preds = %lpad
+  %sel = load i32* %ehselector.slot
+  %4 = call i32 @llvm.eh.typeid.for(i8* bitcast (i8** @_ZTIi to i8*)) nounwind
+  %matches = icmp eq i32 %sel, %4
+  br i1 %matches, label %catch, label %eh.resume
+
+catch:                                            ; preds = %catch.dispatch
+  %exn = load i8** %exn.slot
+  %5 = call i8* @__cxa_begin_catch(i8* %exn) nounwind
+  %6 = bitcast i8* %5 to i32*
+  %exn.scalar = load i32* %6
+  store i32 %exn.scalar, i32* %e, align 4
+  %7 = load i32* %e, align 4
+  %call2 = invoke i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([15 x i8]* @.str1, i32 0, i32 0), i32 %7)
+          to label %invoke.cont unwind label %lpad1
+
+invoke.cont:                                      ; preds = %catch
+  call void @__cxa_end_catch() nounwind
+  br label %try.cont
+
+try.cont:                                         ; preds = %invoke.cont
+  ret i32 0
+
+lpad1:                                            ; preds = %catch
+  %8 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*)
+          cleanup
+  %9 = extractvalue { i8*, i32 } %8, 0
+  store i8* %9, i8** %exn.slot
+  %10 = extractvalue { i8*, i32 } %8, 1
+  store i32 %10, i32* %ehselector.slot
+  call void @__cxa_end_catch() nounwind
+  br label %eh.resume
+
+eh.resume:                                        ; preds = %lpad1, %catch.dispatch
+  %exn3 = load i8** %exn.slot
+  %sel4 = load i32* %ehselector.slot
+  %lpad.val = insertvalue { i8*, i32 } undef, i8* %exn3, 0
+  %lpad.val5 = insertvalue { i8*, i32 } %lpad.val, i32 %sel4, 1
+  resume { i8*, i32 } %lpad.val5
+
+unreachable:                                      ; preds = %entry
+  unreachable
+}
+
+declare i32 @printf(i8*, ...)
+
+declare i8* @__cxa_allocate_exception(i32)
+
+declare i32 @__gxx_personality_v0(...)
+
+declare void @__cxa_throw(i8*, i8*, i8*)
+
+declare i32 @llvm.eh.typeid.for(i8*) nounwind readnone
+
+declare i8* @__cxa_begin_catch(i8*)
+
+declare void @__cxa_end_catch()
diff --git a/test/CodeGen/Mips/mips16fpe.ll b/test/CodeGen/Mips/mips16fpe.ll
new file mode 100644
index 000000000000..433543607967
--- /dev/null
+++ b/test/CodeGen/Mips/mips16fpe.ll
@@ -0,0 +1,381 @@
+; RUN: llc  -march=mipsel -mcpu=mips16 -relocation-model=pic -O3 -soft-float -mips16-hard-float < %s | FileCheck %s -check-prefix=16hf
+
+@x = global float 5.000000e+00, align 4
+@y = global float 1.500000e+01, align 4
+@xd = global double 6.000000e+00, align 8
+@yd = global double 1.800000e+01, align 8
+@two = global i32 2, align 4
+@addsf3_result = common global float 0.000000e+00, align 4
+@adddf3_result = common global double 0.000000e+00, align 8
+@subsf3_result = common global float 0.000000e+00, align 4
+@subdf3_result = common global double 0.000000e+00, align 8
+@mulsf3_result = common global float 0.000000e+00, align 4
+@muldf3_result = common global double 0.000000e+00, align 8
+@divsf3_result = common global float 0.000000e+00, align 4
+@divdf3_result = common global double 0.000000e+00, align 8
+@extendsfdf2_result = common global double 0.000000e+00, align 8
+@xd2 = global double 0x40147E6B74B4CF6A, align 8
+@truncdfsf2_result = common global float 0.000000e+00, align 4
+@fix_truncsfsi_result = common global i32 0, align 4
+@fix_truncdfsi_result = common global i32 0, align 4
+@si = global i32 -9, align 4
+@ui = global i32 9, align 4
+@floatsisf_result = common global float 0.000000e+00, align 4
+@floatsidf_result = common global double 0.000000e+00, align 8
+@floatunsisf_result = common global float 0.000000e+00, align 4
+@floatunsidf_result = common global double 0.000000e+00, align 8
+@xx = global float 5.000000e+00, align 4
+@eqsf2_result = common global i32 0, align 4
+@xxd = global double 6.000000e+00, align 8
+@eqdf2_result = common global i32 0, align 4
+@nesf2_result = common global i32 0, align 4
+@nedf2_result = common global i32 0, align 4
+@gesf2_result = common global i32 0, align 4
+@gedf2_result = common global i32 0, align 4
+@ltsf2_result = common global i32 0, align 4
+@ltdf2_result = common global i32 0, align 4
+@lesf2_result = common global i32 0, align 4
+@ledf2_result = common global i32 0, align 4
+@gtsf2_result = common global i32 0, align 4
+@gtdf2_result = common global i32 0, align 4
+
+define void @test_addsf3() nounwind {
+entry:
+;16hf: test_addsf3:
+  %0 = load float* @x, align 4
+  %1 = load float* @y, align 4
+  %add = fadd float %0, %1
+  store float %add, float* @addsf3_result, align 4
+;16hf:  lw	${{[0-9]+}}, %call16(__mips16_addsf3)(${{[0-9]+}})
+  ret void
+}
+
+define void @test_adddf3() nounwind {
+entry:
+;16hf: test_adddf3:
+  %0 = load double* @xd, align 8
+  %1 = load double* @yd, align 8
+  %add = fadd double %0, %1
+  store double %add, double* @adddf3_result, align 8
+;16hf:  lw	${{[0-9]+}}, %call16(__mips16_adddf3)(${{[0-9]+}})
+  ret void
+}
+
+define void @test_subsf3() nounwind {
+entry:
+;16hf: test_subsf3:
+  %0 = load float* @x, align 4
+  %1 = load float* @y, align 4
+  %sub = fsub float %0, %1
+  store float %sub, float* @subsf3_result, align 4
+;16hf:  lw	${{[0-9]+}}, %call16(__mips16_subsf3)(${{[0-9]+}})
+  ret void
+}
+
+define void @test_subdf3() nounwind {
+entry:
+;16hf: test_subdf3:
+  %0 = load double* @xd, align 8
+  %1 = load double* @yd, align 8
+  %sub = fsub double %0, %1
+  store double %sub, double* @subdf3_result, align 8
+;16hf:  lw	${{[0-9]+}}, %call16(__mips16_subdf3)(${{[0-9]+}})
+  ret void
+}
+
+define void @test_mulsf3() nounwind {
+entry:
+;16hf: test_mulsf3:
+  %0 = load float* @x, align 4
+  %1 = load float* @y, align 4
+  %mul = fmul float %0, %1
+  store float %mul, float* @mulsf3_result, align 4
+;16hf:  lw	${{[0-9]+}}, %call16(__mips16_mulsf3)(${{[0-9]+}})
+  ret void
+}
+
+define void @test_muldf3() nounwind {
+entry:
+;16hf: test_muldf3:
+  %0 = load double* @xd, align 8
+  %1 = load double* @yd, align 8
+  %mul = fmul double %0, %1
+  store double %mul, double* @muldf3_result, align 8
+;16hf:  lw	${{[0-9]+}}, %call16(__mips16_muldf3)(${{[0-9]+}})
+  ret void
+}
+
+define void @test_divsf3() nounwind {
+entry:
+;16hf: test_divsf3:
+  %0 = load float* @y, align 4
+  %1 = load float* @x, align 4
+  %div = fdiv float %0, %1
+  store float %div, float* @divsf3_result, align 4
+;16hf:  lw	${{[0-9]+}}, %call16(__mips16_divsf3)(${{[0-9]+}})
+  ret void
+}
+
+define void @test_divdf3() nounwind {
+entry:
+;16hf: test_divdf3:
+  %0 = load double* @yd, align 8
+  %mul = fmul double %0, 2.000000e+00
+  %1 = load double* @xd, align 8
+  %div = fdiv double %mul, %1
+  store double %div, double* @divdf3_result, align 8
+;16hf:  lw	${{[0-9]+}}, %call16(__mips16_divdf3)(${{[0-9]+}})
+  ret void
+}
+
+define void @test_extendsfdf2() nounwind {
+entry:
+;16hf: test_extendsfdf2:
+  %0 = load float* @x, align 4
+  %conv = fpext float %0 to double
+  store double %conv, double* @extendsfdf2_result, align 8
+;16hf:  lw	${{[0-9]+}}, %call16(__mips16_extendsfdf2)(${{[0-9]+}})
+  ret void
+}
+
+define void @test_truncdfsf2() nounwind {
+entry:
+;16hf: test_truncdfsf2:
+  %0 = load double* @xd2, align 8
+  %conv = fptrunc double %0 to float
+  store float %conv, float* @truncdfsf2_result, align 4
+;16hf:  lw	${{[0-9]+}}, %call16(__mips16_truncdfsf2)(${{[0-9]+}})
+  ret void
+}
+
+define void @test_fix_truncsfsi() nounwind {
+entry:
+;16hf: test_fix_truncsfsi:
+  %0 = load float* @x, align 4
+  %conv = fptosi float %0 to i32
+  store i32 %conv, i32* @fix_truncsfsi_result, align 4
+;16hf:  lw	${{[0-9]+}}, %call16(__mips16_fix_truncsfsi)(${{[0-9]+}})
+  ret void
+}
+
+define void @test_fix_truncdfsi() nounwind {
+entry:
+;16hf: test_fix_truncdfsi:
+  %0 = load double* @xd, align 8
+  %conv = fptosi double %0 to i32
+  store i32 %conv, i32* @fix_truncdfsi_result, align 4
+;16hf:  lw	${{[0-9]+}}, %call16(__mips16_fix_truncdfsi)(${{[0-9]+}})
+  ret void
+}
+
+define void @test_floatsisf() nounwind {
+entry:
+;16hf: test_floatsisf:
+  %0 = load i32* @si, align 4
+  %conv = sitofp i32 %0 to float
+  store float %conv, float* @floatsisf_result, align 4
+;16hf:  lw	${{[0-9]+}}, %call16(__mips16_floatsisf)(${{[0-9]+}})
+  ret void
+}
+
+define void @test_floatsidf() nounwind {
+entry:
+;16hf: test_floatsidf:
+  %0 = load i32* @si, align 4
+  %conv = sitofp i32 %0 to double
+  store double %conv, double* @floatsidf_result, align 8
+;16hf:  lw	${{[0-9]+}}, %call16(__mips16_floatsidf)(${{[0-9]+}})
+  ret void
+}
+
+define void @test_floatunsisf() nounwind {
+entry:
+;16hf: test_floatunsisf:
+  %0 = load i32* @ui, align 4
+  %conv = uitofp i32 %0 to float
+  store float %conv, float* @floatunsisf_result, align 4
+;16hf:  lw	${{[0-9]+}}, %call16(__mips16_floatunsisf)(${{[0-9]+}})
+  ret void
+}
+
+define void @test_floatunsidf() nounwind {
+entry:
+;16hf: test_floatunsidf:
+  %0 = load i32* @ui, align 4
+  %conv = uitofp i32 %0 to double
+  store double %conv, double* @floatunsidf_result, align 8
+;16hf:  lw	${{[0-9]+}}, %call16(__mips16_floatunsidf)(${{[0-9]+}})
+  ret void
+}
+
+define void @test_eqsf2() nounwind {
+entry:
+;16hf: test_eqsf2:
+  %0 = load float* @x, align 4
+  %1 = load float* @xx, align 4
+  %cmp = fcmp oeq float %0, %1
+  %conv = zext i1 %cmp to i32
+  store i32 %conv, i32* @eqsf2_result, align 4
+;16hf:  lw	${{[0-9]+}}, %call16(__mips16_eqsf2)(${{[0-9]+}})
+  ret void
+}
+
+define void @test_eqdf2() nounwind {
+entry:
+;16hf: test_eqdf2:
+  %0 = load double* @xd, align 8
+  %1 = load double* @xxd, align 8
+  %cmp = fcmp oeq double %0, %1
+  %conv = zext i1 %cmp to i32
+  store i32 %conv, i32* @eqdf2_result, align 4
+;16hf:  lw	${{[0-9]+}}, %call16(__mips16_eqdf2)(${{[0-9]+}})
+  ret void
+}
+
+define void @test_nesf2() nounwind {
+entry:
+;16hf: test_nesf2:
+  %0 = load float* @x, align 4
+  %1 = load float* @y, align 4
+  %cmp = fcmp une float %0, %1
+  %conv = zext i1 %cmp to i32
+  store i32 %conv, i32* @nesf2_result, align 4
+;16hf:  lw	${{[0-9]+}}, %call16(__mips16_nesf2)(${{[0-9]+}})
+  ret void
+}
+
+define void @test_nedf2() nounwind {
+entry:
+;16hf: test_nedf2:
+  %0 = load double* @xd, align 8
+  %1 = load double* @yd, align 8
+  %cmp = fcmp une double %0, %1
+  %conv = zext i1 %cmp to i32
+  store i32 %conv, i32* @nedf2_result, align 4
+;16hf:  lw	${{[0-9]+}}, %call16(__mips16_nedf2)(${{[0-9]+}})
+  ret void
+}
+
+define void @test_gesf2() nounwind {
+entry:
+;16hf: test_gesf2:
+  %0 = load float* @x, align 4
+  %1 = load float* @xx, align 4
+  %cmp = fcmp oge float %0, %1
+  %2 = load float* @y, align 4
+  %cmp1 = fcmp oge float %2, %0
+  %and3 = and i1 %cmp, %cmp1
+  %and = zext i1 %and3 to i32
+  store i32 %and, i32* @gesf2_result, align 4
+;16hf:  lw	${{[0-9]+}}, %call16(__mips16_gesf2)(${{[0-9]+}})
+  ret void
+}
+
+define void @test_gedf2() nounwind {
+entry:
+;16hf: test_gedf2:
+  %0 = load double* @xd, align 8
+  %1 = load double* @xxd, align 8
+  %cmp = fcmp oge double %0, %1
+  %2 = load double* @yd, align 8
+  %cmp1 = fcmp oge double %2, %0
+  %and3 = and i1 %cmp, %cmp1
+  %and = zext i1 %and3 to i32
+  store i32 %and, i32* @gedf2_result, align 4
+;16hf:  lw	${{[0-9]+}}, %call16(__mips16_gedf2)(${{[0-9]+}})
+  ret void
+}
+
+define void @test_ltsf2() nounwind {
+entry:
+;16hf: test_ltsf2:
+  %0 = load float* @x, align 4
+  %1 = load float* @xx, align 4
+  %lnot = fcmp uge float %0, %1
+  %2 = load float* @y, align 4
+  %cmp1 = fcmp olt float %0, %2
+  %and2 = and i1 %lnot, %cmp1
+  %and = zext i1 %and2 to i32
+  store i32 %and, i32* @ltsf2_result, align 4
+;16hf:  lw	${{[0-9]+}}, %call16(__mips16_unordsf2)(${{[0-9]+}})
+;16hf:  lw	${{[0-9]+}}, %call16(__mips16_ltsf2)(${{[0-9]+}})
+  ret void
+}
+
+define void @test_ltdf2() nounwind {
+entry:
+;16hf: test_ltdf2:
+  %0 = load double* @xd, align 8
+  %1 = load double* @xxd, align 8
+  %lnot = fcmp uge double %0, %1
+  %2 = load double* @yd, align 8
+  %cmp1 = fcmp olt double %0, %2
+  %and2 = and i1 %lnot, %cmp1
+  %and = zext i1 %and2 to i32
+  store i32 %and, i32* @ltdf2_result, align 4
+;16hf:  lw	${{[0-9]+}}, %call16(__mips16_unorddf2)(${{[0-9]+}})
+;16hf:  lw	${{[0-9]+}}, %call16(__mips16_ltdf2)(${{[0-9]+}})
+  ret void
+}
+
+define void @test_lesf2() nounwind {
+entry:
+;16hf: test_lesf2:
+  %0 = load float* @x, align 4
+  %1 = load float* @xx, align 4
+  %cmp = fcmp ole float %0, %1
+  %2 = load float* @y, align 4
+  %cmp1 = fcmp ole float %0, %2
+  %and3 = and i1 %cmp, %cmp1
+  %and = zext i1 %and3 to i32
+  store i32 %and, i32* @lesf2_result, align 4
+;16hf:  lw	${{[0-9]+}}, %call16(__mips16_lesf2)(${{[0-9]+}})
+  ret void
+}
+
+define void @test_ledf2() nounwind {
+entry:
+;16hf: test_ledf2:
+  %0 = load double* @xd, align 8
+  %1 = load double* @xxd, align 8
+  %cmp = fcmp ole double %0, %1
+  %2 = load double* @yd, align 8
+  %cmp1 = fcmp ole double %0, %2
+  %and3 = and i1 %cmp, %cmp1
+  %and = zext i1 %and3 to i32
+  store i32 %and, i32* @ledf2_result, align 4
+;16hf:  lw	${{[0-9]+}}, %call16(__mips16_ledf2)(${{[0-9]+}})
+  ret void
+}
+
+define void @test_gtsf2() nounwind {
+entry:
+;16hf: test_gtsf2:
+  %0 = load float* @x, align 4
+  %1 = load float* @xx, align 4
+  %lnot = fcmp ule float %0, %1
+  %2 = load float* @y, align 4
+  %cmp1 = fcmp ogt float %2, %0
+  %and2 = and i1 %lnot, %cmp1
+  %and = zext i1 %and2 to i32
+  store i32 %and, i32* @gtsf2_result, align 4
+;16hf:  lw	${{[0-9]+}}, %call16(__mips16_gtsf2)(${{[0-9]+}})
+  ret void
+}
+
+define void @test_gtdf2() nounwind {
+entry:
+;16hf: test_gtdf2:
+  %0 = load double* @xd, align 8
+  %1 = load double* @xxd, align 8
+  %lnot = fcmp ule double %0, %1
+  %2 = load double* @yd, align 8
+  %cmp1 = fcmp ogt double %2, %0
+  %and2 = and i1 %lnot, %cmp1
+  %and = zext i1 %and2 to i32
+  store i32 %and, i32* @gtdf2_result, align 4
+;16hf:  lw	${{[0-9]+}}, %call16(__mips16_gtdf2)(${{[0-9]+}})
+  ret void
+}
+
+
diff --git a/test/CodeGen/Mips/mips64-f128-call.ll b/test/CodeGen/Mips/mips64-f128-call.ll
new file mode 100644
index 000000000000..455e540e5df1
--- /dev/null
+++ b/test/CodeGen/Mips/mips64-f128-call.ll
@@ -0,0 +1,45 @@
+; RUN: llc -march=mips64el -mcpu=mips64r2 < %s | FileCheck %s
+
+@gld0 = external global fp128
+@gld1 = external global fp128
+
+; CHECK: foo0
+; CHECK: sdc1  $f13, 8(${{[0-9]+}})
+; CHECK: sdc1  $f12, 0(${{[0-9]+}})
+
+define void @foo0(fp128 %a0) {
+entry:
+  store fp128 %a0, fp128* @gld0, align 16
+  ret void
+}
+
+; CHECK: foo1
+; CHECK: ldc1  $f13, 8(${{[0-9]+}})
+; CHECK: ldc1  $f12, 0(${{[0-9]+}})
+
+define void @foo1() {
+entry:
+  %0 = load fp128* @gld0, align 16
+  tail call void @foo2(fp128 %0)
+  ret void
+}
+
+declare void @foo2(fp128)
+
+; CHECK: foo3
+; CHECK: ld   $[[R0:[0-9]+]], %got_disp(gld0)
+; CHECK: sdc1 $f2, 8($[[R0]])
+; CHECK: sdc1 $f0, 0($[[R0]])
+; CHECK: ld   $[[R1:[0-9]+]], %got_disp(gld1)
+; CHECK: ldc1 $f0, 0($[[R1]])
+; CHECK: ldc1 $f2, 8($[[R1]])
+
+define fp128 @foo3() {
+entry:
+  %call = tail call fp128 @foo4()
+  store fp128 %call, fp128* @gld0, align 16
+  %0 = load fp128* @gld1, align 16
+  ret fp128 %0
+}
+
+declare fp128 @foo4()
diff --git a/test/CodeGen/Mips/mips64-f128.ll b/test/CodeGen/Mips/mips64-f128.ll
new file mode 100644
index 000000000000..5892cab4f8ea
--- /dev/null
+++ b/test/CodeGen/Mips/mips64-f128.ll
@@ -0,0 +1,646 @@
+; RUN: llc -mtriple=mips64el-unknown-unknown -mcpu=mips64 -soft-float -O1 \
+; RUN: -disable-mips-delay-filler < %s | FileCheck %s
+
+@gld0 = external global fp128
+@gld1 = external global fp128
+@gld2 = external global fp128
+@gf1 = external global float
+@gd1 = external global double
+
+; CHECK: addLD:
+; CHECK: ld $25, %call16(__addtf3)
+
+define fp128 @addLD() {
+entry:
+  %0 = load fp128* @gld0, align 16
+  %1 = load fp128* @gld1, align 16
+  %add = fadd fp128 %0, %1
+  ret fp128 %add
+}
+
+; CHECK: subLD:
+; CHECK: ld $25, %call16(__subtf3)
+
+define fp128 @subLD() {
+entry:
+  %0 = load fp128* @gld0, align 16
+  %1 = load fp128* @gld1, align 16
+  %sub = fsub fp128 %0, %1
+  ret fp128 %sub
+}
+
+; CHECK: mulLD:
+; CHECK: ld $25, %call16(__multf3)
+
+define fp128 @mulLD() {
+entry:
+  %0 = load fp128* @gld0, align 16
+  %1 = load fp128* @gld1, align 16
+  %mul = fmul fp128 %0, %1
+  ret fp128 %mul
+}
+
+; CHECK: divLD:
+; CHECK: ld $25, %call16(__divtf3)
+
+define fp128 @divLD() {
+entry:
+  %0 = load fp128* @gld0, align 16
+  %1 = load fp128* @gld1, align 16
+  %div = fdiv fp128 %0, %1
+  ret fp128 %div
+}
+
+; CHECK: conv_LD_char:
+; CHECK: ld $25, %call16(__floatsitf)
+
+define fp128 @conv_LD_char(i8 signext %a) {
+entry:
+  %conv = sitofp i8 %a to fp128
+  ret fp128 %conv
+}
+
+; CHECK: conv_LD_short:
+; CHECK: ld $25, %call16(__floatsitf)
+
+define fp128 @conv_LD_short(i16 signext %a) {
+entry:
+  %conv = sitofp i16 %a to fp128
+  ret fp128 %conv
+}
+
+; CHECK: conv_LD_int:
+; CHECK: ld $25, %call16(__floatsitf)
+
+define fp128 @conv_LD_int(i32 %a) {
+entry:
+  %conv = sitofp i32 %a to fp128
+  ret fp128 %conv
+}
+
+; CHECK: conv_LD_LL:
+; CHECK: ld $25, %call16(__floatditf)
+
+define fp128 @conv_LD_LL(i64 %a) {
+entry:
+  %conv = sitofp i64 %a to fp128
+  ret fp128 %conv
+}
+
+; CHECK: conv_LD_UChar:
+; CHECK: ld $25, %call16(__floatunsitf)
+
+define fp128 @conv_LD_UChar(i8 zeroext %a) {
+entry:
+  %conv = uitofp i8 %a to fp128
+  ret fp128 %conv
+}
+
+; CHECK: conv_LD_UShort:
+; CHECK: ld $25, %call16(__floatunsitf)
+
+define fp128 @conv_LD_UShort(i16 zeroext %a) {
+entry:
+  %conv = uitofp i16 %a to fp128
+  ret fp128 %conv
+}
+
+; CHECK: conv_LD_UInt:
+; CHECK: ld $25, %call16(__floatunsitf)
+
+define fp128 @conv_LD_UInt(i32 %a) {
+entry:
+  %conv = uitofp i32 %a to fp128
+  ret fp128 %conv
+}
+
+; CHECK: conv_LD_ULL:
+; CHECK: ld $25, %call16(__floatunditf)
+
+define fp128 @conv_LD_ULL(i64 %a) {
+entry:
+  %conv = uitofp i64 %a to fp128
+  ret fp128 %conv
+}
+
+; CHECK: conv_char_LD:
+; CHECK: ld $25, %call16(__fixtfsi)
+
+define signext i8 @conv_char_LD(fp128 %a) {
+entry:
+  %conv = fptosi fp128 %a to i8
+  ret i8 %conv
+}
+
+; CHECK: conv_short_LD:
+; CHECK: ld $25, %call16(__fixtfsi)
+
+define signext i16 @conv_short_LD(fp128 %a) {
+entry:
+  %conv = fptosi fp128 %a to i16
+  ret i16 %conv
+}
+
+; CHECK: conv_int_LD:
+; CHECK: ld $25, %call16(__fixtfsi)
+
+define i32 @conv_int_LD(fp128 %a) {
+entry:
+  %conv = fptosi fp128 %a to i32
+  ret i32 %conv
+}
+
+; CHECK: conv_LL_LD:
+; CHECK: ld $25, %call16(__fixtfdi)
+
+define i64 @conv_LL_LD(fp128 %a) {
+entry:
+  %conv = fptosi fp128 %a to i64
+  ret i64 %conv
+}
+
+; CHECK: conv_UChar_LD:
+; CHECK: ld $25, %call16(__fixtfsi)
+
+define zeroext i8 @conv_UChar_LD(fp128 %a) {
+entry:
+  %conv = fptoui fp128 %a to i8
+  ret i8 %conv
+}
+
+; CHECK: conv_UShort_LD:
+; CHECK: ld $25, %call16(__fixtfsi)
+
+define zeroext i16 @conv_UShort_LD(fp128 %a) {
+entry:
+  %conv = fptoui fp128 %a to i16
+  ret i16 %conv
+}
+
+; CHECK: conv_UInt_LD:
+; CHECK: ld $25, %call16(__fixunstfsi)
+
+define i32 @conv_UInt_LD(fp128 %a) {
+entry:
+  %conv = fptoui fp128 %a to i32
+  ret i32 %conv
+}
+
+; CHECK: conv_ULL_LD:
+; CHECK: ld $25, %call16(__fixunstfdi)
+
+define i64 @conv_ULL_LD(fp128 %a) {
+entry:
+  %conv = fptoui fp128 %a to i64
+  ret i64 %conv
+}
+
+; CHECK: conv_LD_float:
+; CHECK: ld $25, %call16(__extendsftf2)
+
+define fp128 @conv_LD_float(float %a) {
+entry:
+  %conv = fpext float %a to fp128
+  ret fp128 %conv
+}
+
+; CHECK: conv_LD_double:
+; CHECK: ld $25, %call16(__extenddftf2)
+
+define fp128 @conv_LD_double(double %a) {
+entry:
+  %conv = fpext double %a to fp128
+  ret fp128 %conv
+}
+
+; CHECK: conv_float_LD:
+; CHECK: ld $25, %call16(__trunctfsf2)
+
+define float @conv_float_LD(fp128 %a) {
+entry:
+  %conv = fptrunc fp128 %a to float
+  ret float %conv
+}
+
+; CHECK: conv_double_LD:
+; CHECK: ld $25, %call16(__trunctfdf2)
+
+define double @conv_double_LD(fp128 %a) {
+entry:
+  %conv = fptrunc fp128 %a to double
+  ret double %conv
+}
+
+; CHECK: libcall1_fabsl:
+; CHECK: ld      $[[R0:[0-9]+]], 8($[[R4:[0-9]+]])
+; CHECK: daddiu  $[[R1:[0-9]+]], $zero, 1
+; CHECK: dsll    $[[R2:[0-9]+]], $[[R1]], 63
+; CHECK: daddiu  $[[R3:[0-9]+]], $[[R2]], -1
+; CHECK: and     $4, $[[R0]], $[[R3]]
+; CHECK: ld      $2, 0($[[R4]])
+
+define fp128 @libcall1_fabsl() {
+entry:
+  %0 = load fp128* @gld0, align 16
+  %call = tail call fp128 @fabsl(fp128 %0) nounwind readnone
+  ret fp128 %call
+}
+
+declare fp128 @fabsl(fp128) #1
+
+; CHECK: libcall1_ceill:
+; CHECK: ld $25, %call16(ceill)
+
+define fp128 @libcall1_ceill() {
+entry:
+  %0 = load fp128* @gld0, align 16
+  %call = tail call fp128 @ceill(fp128 %0) nounwind readnone
+  ret fp128 %call
+}
+
+declare fp128 @ceill(fp128) #1
+
+; CHECK: libcall1_sinl:
+; CHECK: ld $25, %call16(sinl)
+
+define fp128 @libcall1_sinl() {
+entry:
+  %0 = load fp128* @gld0, align 16
+  %call = tail call fp128 @sinl(fp128 %0) nounwind
+  ret fp128 %call
+}
+
+declare fp128 @sinl(fp128) #2
+
+; CHECK: libcall1_cosl:
+; CHECK: ld $25, %call16(cosl)
+
+define fp128 @libcall1_cosl() {
+entry:
+  %0 = load fp128* @gld0, align 16
+  %call = tail call fp128 @cosl(fp128 %0) nounwind
+  ret fp128 %call
+}
+
+declare fp128 @cosl(fp128) #2
+
+; CHECK: libcall1_expl:
+; CHECK: ld $25, %call16(expl)
+
+define fp128 @libcall1_expl() {
+entry:
+  %0 = load fp128* @gld0, align 16
+  %call = tail call fp128 @expl(fp128 %0) nounwind
+  ret fp128 %call
+}
+
+declare fp128 @expl(fp128) #2
+
+; CHECK: libcall1_exp2l:
+; CHECK: ld $25, %call16(exp2l)
+
+define fp128 @libcall1_exp2l() {
+entry:
+  %0 = load fp128* @gld0, align 16
+  %call = tail call fp128 @exp2l(fp128 %0) nounwind
+  ret fp128 %call
+}
+
+declare fp128 @exp2l(fp128) #2
+
+; CHECK: libcall1_logl:
+; CHECK: ld $25, %call16(logl)
+
+define fp128 @libcall1_logl() {
+entry:
+  %0 = load fp128* @gld0, align 16
+  %call = tail call fp128 @logl(fp128 %0) nounwind
+  ret fp128 %call
+}
+
+declare fp128 @logl(fp128) #2
+
+; CHECK: libcall1_log2l:
+; CHECK: ld $25, %call16(log2l)
+
+define fp128 @libcall1_log2l() {
+entry:
+  %0 = load fp128* @gld0, align 16
+  %call = tail call fp128 @log2l(fp128 %0) nounwind
+  ret fp128 %call
+}
+
+declare fp128 @log2l(fp128) #2
+
+; CHECK: libcall1_log10l:
+; CHECK: ld $25, %call16(log10l)
+
+define fp128 @libcall1_log10l() {
+entry:
+  %0 = load fp128* @gld0, align 16
+  %call = tail call fp128 @log10l(fp128 %0) nounwind
+  ret fp128 %call
+}
+
+declare fp128 @log10l(fp128) #2
+
+; CHECK: libcall1_nearbyintl:
+; CHECK: ld $25, %call16(nearbyintl)
+
+define fp128 @libcall1_nearbyintl() {
+entry:
+  %0 = load fp128* @gld0, align 16
+  %call = tail call fp128 @nearbyintl(fp128 %0) nounwind readnone
+  ret fp128 %call
+}
+
+declare fp128 @nearbyintl(fp128) #1
+
+; CHECK: libcall1_floorl:
+; CHECK: ld $25, %call16(floorl)
+
+define fp128 @libcall1_floorl() {
+entry:
+  %0 = load fp128* @gld0, align 16
+  %call = tail call fp128 @floorl(fp128 %0) nounwind readnone
+  ret fp128 %call
+}
+
+declare fp128 @floorl(fp128) #1
+
+; CHECK: libcall1_sqrtl:
+; CHECK: ld $25, %call16(sqrtl)
+
+define fp128 @libcall1_sqrtl() {
+entry:
+  %0 = load fp128* @gld0, align 16
+  %call = tail call fp128 @sqrtl(fp128 %0) nounwind
+  ret fp128 %call
+}
+
+declare fp128 @sqrtl(fp128) #2
+
+; CHECK: libcall1_rintl:
+; CHECK: ld $25, %call16(rintl)
+
+define fp128 @libcall1_rintl() {
+entry:
+  %0 = load fp128* @gld0, align 16
+  %call = tail call fp128 @rintl(fp128 %0) nounwind readnone
+  ret fp128 %call
+}
+
+declare fp128 @rintl(fp128) #1
+
+; CHECK: libcall_powil:
+; CHECK: ld $25, %call16(__powitf2)
+
+define fp128 @libcall_powil(fp128 %a, i32 %b) {
+entry:
+  %0 = tail call fp128 @llvm.powi.f128(fp128 %a, i32 %b)
+  ret fp128 %0
+}
+
+declare fp128 @llvm.powi.f128(fp128, i32) #3
+
+; CHECK: libcall2_copysignl:
+; CHECK: daddiu $[[R2:[0-9]+]], $zero, 1
+; CHECK: dsll   $[[R3:[0-9]+]], $[[R2]], 63
+; CHECK: ld     $[[R0:[0-9]+]], %got_disp(gld1)
+; CHECK: ld     $[[R1:[0-9]+]], 8($[[R0]])
+; CHECK: and    $[[R4:[0-9]+]], $[[R1]], $[[R3]]
+; CHECK: ld     $[[R5:[0-9]+]], %got_disp(gld0)
+; CHECK: ld     $[[R6:[0-9]+]], 8($[[R5]])
+; CHECK: daddiu $[[R7:[0-9]+]], $[[R3]], -1
+; CHECK: and    $[[R8:[0-9]+]], $[[R6]], $[[R7]]
+; CHECK: or     $4, $[[R8]], $[[R4]]
+; CHECK: ld     $2, 0($[[R5]])
+
+define fp128 @libcall2_copysignl() {
+entry:
+  %0 = load fp128* @gld0, align 16
+  %1 = load fp128* @gld1, align 16
+  %call = tail call fp128 @copysignl(fp128 %0, fp128 %1) nounwind readnone
+  ret fp128 %call
+}
+
+declare fp128 @copysignl(fp128, fp128) #1
+
+; CHECK: libcall2_powl:
+; CHECK: ld $25, %call16(powl)
+
+define fp128 @libcall2_powl() {
+entry:
+  %0 = load fp128* @gld0, align 16
+  %1 = load fp128* @gld1, align 16
+  %call = tail call fp128 @powl(fp128 %0, fp128 %1) nounwind
+  ret fp128 %call
+}
+
+declare fp128 @powl(fp128, fp128) #2
+
+; CHECK: libcall2_fmodl:
+; CHECK: ld $25, %call16(fmodl)
+
+define fp128 @libcall2_fmodl() {
+entry:
+  %0 = load fp128* @gld0, align 16
+  %1 = load fp128* @gld1, align 16
+  %call = tail call fp128 @fmodl(fp128 %0, fp128 %1) nounwind
+  ret fp128 %call
+}
+
+declare fp128 @fmodl(fp128, fp128) #2
+
+; CHECK: libcall3_fmal:
+; CHECK: ld $25, %call16(fmal)
+
+define fp128 @libcall3_fmal() {
+entry:
+  %0 = load fp128* @gld0, align 16
+  %1 = load fp128* @gld2, align 16
+  %2 = load fp128* @gld1, align 16
+  %3 = tail call fp128 @llvm.fma.f128(fp128 %0, fp128 %2, fp128 %1)
+  ret fp128 %3
+}
+
+declare fp128 @llvm.fma.f128(fp128, fp128, fp128) #4
+
+; CHECK: cmp_lt:
+; CHECK: ld $25, %call16(__lttf2)
+
+define i32 @cmp_lt(fp128 %a, fp128 %b) {
+entry:
+  %cmp = fcmp olt fp128 %a, %b
+  %conv = zext i1 %cmp to i32
+  ret i32 %conv
+}
+
+; CHECK: cmp_le:
+; CHECK: ld $25, %call16(__letf2)
+
+define i32 @cmp_le(fp128 %a, fp128 %b) {
+entry:
+  %cmp = fcmp ole fp128 %a, %b
+  %conv = zext i1 %cmp to i32
+  ret i32 %conv
+}
+
+; CHECK: cmp_gt:
+; CHECK: ld $25, %call16(__gttf2)
+
+define i32 @cmp_gt(fp128 %a, fp128 %b) {
+entry:
+  %cmp = fcmp ogt fp128 %a, %b
+  %conv = zext i1 %cmp to i32
+  ret i32 %conv
+}
+
+; CHECK: cmp_ge:
+; CHECK: ld $25, %call16(__getf2)
+
+define i32 @cmp_ge(fp128 %a, fp128 %b) {
+entry:
+  %cmp = fcmp oge fp128 %a, %b
+  %conv = zext i1 %cmp to i32
+  ret i32 %conv
+}
+
+; CHECK: cmp_eq:
+; CHECK: ld $25, %call16(__eqtf2)
+
+define i32 @cmp_eq(fp128 %a, fp128 %b) {
+entry:
+  %cmp = fcmp oeq fp128 %a, %b
+  %conv = zext i1 %cmp to i32
+  ret i32 %conv
+}
+
+; CHECK: cmp_ne:
+; CHECK: ld $25, %call16(__netf2)
+
+define i32 @cmp_ne(fp128 %a, fp128 %b) {
+entry:
+  %cmp = fcmp une fp128 %a, %b
+  %conv = zext i1 %cmp to i32
+  ret i32 %conv
+}
+
+; CHECK: load_LD_LD:
+; CHECK: ld $[[R0:[0-9]+]], %got_disp(gld1)
+; CHECK: ld $2, 0($[[R0]])
+; CHECK: ld $4, 8($[[R0]])
+
+define fp128 @load_LD_LD() {
+entry:
+  %0 = load fp128* @gld1, align 16
+  ret fp128 %0
+}
+
+; CHECK: load_LD_float:
+; CHECK: ld   $[[R0:[0-9]+]], %got_disp(gf1)
+; CHECK: lw   $4, 0($[[R0]])
+; CHECK: ld   $25, %call16(__extendsftf2)
+; CHECK: jalr $25
+
+define fp128 @load_LD_float() {
+entry:
+  %0 = load float* @gf1, align 4
+  %conv = fpext float %0 to fp128
+  ret fp128 %conv
+}
+
+; CHECK: load_LD_double:
+; CHECK: ld   $[[R0:[0-9]+]], %got_disp(gd1)
+; CHECK: ld   $4, 0($[[R0]])
+; CHECK: ld   $25, %call16(__extenddftf2)
+; CHECK: jalr $25
+
+define fp128 @load_LD_double() {
+entry:
+  %0 = load double* @gd1, align 8
+  %conv = fpext double %0 to fp128
+  ret fp128 %conv
+}
+
+; CHECK: store_LD_LD:
+; CHECK: ld $[[R0:[0-9]+]], %got_disp(gld1)
+; CHECK: ld $[[R1:[0-9]+]], 0($[[R0]])
+; CHECK: ld $[[R2:[0-9]+]], 8($[[R0]])
+; CHECK: ld $[[R3:[0-9]+]], %got_disp(gld0)
+; CHECK: sd $[[R2]], 8($[[R3]])
+; CHECK: sd $[[R1]], 0($[[R3]])
+
+define void @store_LD_LD() {
+entry:
+  %0 = load fp128* @gld1, align 16
+  store fp128 %0, fp128* @gld0, align 16
+  ret void
+}
+
+; CHECK: store_LD_float:
+; CHECK: ld   $[[R0:[0-9]+]], %got_disp(gld1)
+; CHECK: ld   $4, 0($[[R0]])
+; CHECK: ld   $5, 8($[[R0]])
+; CHECK: ld   $25, %call16(__trunctfsf2)
+; CHECK: jalr $25
+; CHECK: ld   $[[R1:[0-9]+]], %got_disp(gf1)
+; CHECK: sw   $2, 0($[[R1]])
+
+define void @store_LD_float() {
+entry:
+  %0 = load fp128* @gld1, align 16
+  %conv = fptrunc fp128 %0 to float
+  store float %conv, float* @gf1, align 4
+  ret void
+}
+
+; CHECK: store_LD_double:
+; CHECK: ld   $[[R0:[0-9]+]], %got_disp(gld1)
+; CHECK: ld   $4, 0($[[R0]])
+; CHECK: ld   $5, 8($[[R0]])
+; CHECK: ld   $25, %call16(__trunctfdf2)
+; CHECK: jalr $25
+; CHECK: ld   $[[R1:[0-9]+]], %got_disp(gd1)
+; CHECK: sd   $2, 0($[[R1]])
+
+define void @store_LD_double() {
+entry:
+  %0 = load fp128* @gld1, align 16
+  %conv = fptrunc fp128 %0 to double
+  store double %conv, double* @gd1, align 8
+  ret void
+}
+
+; CHECK: select_LD:
+; CHECK: movn $8, $6, $4
+; CHECK: movn $9, $7, $4
+; CHECK: move $2, $8
+; CHECK: move $4, $9
+
+define fp128 @select_LD(i32 %a, i64, fp128 %b, fp128 %c) {
+entry:
+  %tobool = icmp ne i32 %a, 0
+  %cond = select i1 %tobool, fp128 %b, fp128 %c
+  ret fp128 %cond
+}
+
+; CHECK: selectCC_LD:
+; CHECK: move $[[R0:[0-9]+]], $11
+; CHECK: move $[[R1:[0-9]+]], $10
+; CHECK: move $[[R2:[0-9]+]], $9
+; CHECK: move $[[R3:[0-9]+]], $8
+; CHECK: ld   $25, %call16(__gttf2)($gp)
+; CHECK: jalr $25
+; CHECK: slti $1, $2, 1
+; CHECK: movz $[[R1]], $[[R3]], $1
+; CHECK: movz $[[R0]], $[[R2]], $1
+; CHECK: move $2, $[[R1]]
+; CHECK: move $4, $[[R0]]
+
+define fp128 @selectCC_LD(fp128 %a, fp128 %b, fp128 %c, fp128 %d) {
+entry:
+  %cmp = fcmp ogt fp128 %a, %b
+  %cond = select i1 %cmp, fp128 %c, fp128 %d
+  ret fp128 %cond
+}
diff --git a/test/CodeGen/Mips/mips64-libcall.ll b/test/CodeGen/Mips/mips64-libcall.ll
new file mode 100644
index 000000000000..d54598be70d8
--- /dev/null
+++ b/test/CodeGen/Mips/mips64-libcall.ll
@@ -0,0 +1,29 @@
+; RUN: llc -march=mips64el -mcpu=mips64r2 -O3 < %s |\
+; RUN: FileCheck %s -check-prefix=HARD
+; RUN: llc -march=mips64el -mcpu=mips64r2 -soft-float < %s |\
+; RUN: FileCheck %s -check-prefix=SOFT
+
+; Check that %add is not passed in an integer register.
+;
+; HARD: callfloor:
+; HARD-NOT: dmfc1 $4
+
+define double @callfloor(double %d) nounwind readnone {
+entry:
+  %add = fadd double %d, 1.000000e+00
+  %call = tail call double @floor(double %add) nounwind readnone
+  ret double %call
+}
+
+declare double @floor(double) nounwind readnone
+
+; Check call16.
+;
+; SOFT: f64add:
+; SOFT: ld $25, %call16(__adddf3)
+
+define double @f64add(double %a, double %b) {
+entry:
+  %add = fadd double %a, %b
+  ret double %add
+}
diff --git a/test/CodeGen/Mips/mips64-sret.ll b/test/CodeGen/Mips/mips64-sret.ll
index e26b0223b447..e01609f3b1e4 100644
--- a/test/CodeGen/Mips/mips64-sret.ll
+++ b/test/CodeGen/Mips/mips64-sret.ll
@@ -6,7 +6,7 @@
 
 define void @f(%struct.S* noalias sret %agg.result) nounwind {
 entry:
-; CHECK: daddu $2, $zero, $4
+; CHECK: move $2, $4
 
   %0 = bitcast %struct.S* %agg.result to i8*
   call void @llvm.memcpy.p0i8.p0i8.i64(i8* %0, i8* bitcast (%struct.S* @g to i8*), i64 32, i32 4, i1 false)
diff --git a/test/CodeGen/Mips/o32_cc_byval.ll b/test/CodeGen/Mips/o32_cc_byval.ll
index 5558ba6e10f4..0a8f85f4825d 100644
--- a/test/CodeGen/Mips/o32_cc_byval.ll
+++ b/test/CodeGen/Mips/o32_cc_byval.ll
@@ -12,20 +12,20 @@ define void @f1() nounwind {
 entry:
 ; CHECK: lw  $[[R1:[0-9]+]], %got(f1.s1)
 ; CHECK: addiu $[[R0:[0-9]+]], $[[R1]], %lo(f1.s1)
+; CHECK: lw  $[[R7:[0-9]+]], 12($[[R0]])
+; CHECK: lw  $[[R3:[0-9]+]], 16($[[R0]])
+; CHECK: lw  $[[R4:[0-9]+]], 20($[[R0]])
+; CHECK: lw  $[[R5:[0-9]+]], 24($[[R0]])
 ; CHECK: lw  $[[R6:[0-9]+]], 28($[[R0]])
 ; CHECK: sw  $[[R6]], 36($sp)
-; CHECK: lw  $[[R5:[0-9]+]], 24($[[R0]])
 ; CHECK: sw  $[[R5]], 32($sp)
-; CHECK: lw  $[[R4:[0-9]+]], 20($[[R0]])
 ; CHECK: sw  $[[R4]], 28($sp)
-; CHECK: lw  $[[R3:[0-9]+]], 16($[[R0]])
 ; CHECK: sw  $[[R3]], 24($sp)
-; CHECK: lw  $[[R7:[0-9]+]], 12($[[R0]])
 ; CHECK: sw  $[[R7]], 20($sp)
 ; CHECK: lw  $[[R2:[0-9]+]], 8($[[R0]])
 ; CHECK: sw  $[[R2]], 16($sp)
-; CHECK: lw  $7, 4($[[R0]])
 ; CHECK: lw  $6, %lo(f1.s1)($[[R1]])
+; CHECK: lw  $7, 4($[[R0]])
   %agg.tmp10 = alloca %struct.S3, align 4
   call void @callee1(float 2.000000e+01, %struct.S1* byval bitcast (%0* @f1.s1 to %struct.S1*)) nounwind
   call void @callee2(%struct.S2* byval @f1.s2) nounwind
diff --git a/test/CodeGen/Mips/return-vector-float4.ll b/test/CodeGen/Mips/return-vector-float4.ll
deleted file mode 100644
index ae10f123e4d2..000000000000
--- a/test/CodeGen/Mips/return-vector-float4.ll
+++ /dev/null
@@ -1,12 +0,0 @@
-; RUN: llc -march=mipsel -mattr=+android < %s | FileCheck %s
-
-define <4 x float> @retvec4() nounwind readnone {
-entry:
-; CHECK: lwc1 $f0
-; CHECK: lwc1 $f2
-; CHECK: lwc1 $f1
-; CHECK: lwc1 $f3
-
-  ret <4 x float> <float 1.000000e+00, float 2.000000e+00, float 3.000000e+00, float 4.000000e+00>
-}
-
diff --git a/test/CodeGen/Mips/return_address.ll b/test/CodeGen/Mips/return_address.ll
index e1c9241984ca..34b72baa6d25 100644
--- a/test/CodeGen/Mips/return_address.ll
+++ b/test/CodeGen/Mips/return_address.ll
@@ -5,7 +5,7 @@ entry:
   %0 = call i8* @llvm.returnaddress(i32 0)
   ret i8* %0
 
-; CHECK:    addu    $2, $zero, $ra
+; CHECK:    move  $2, $ra
 }
 
 define i8* @f2() nounwind {
@@ -14,9 +14,9 @@ entry:
   %0 = call i8* @llvm.returnaddress(i32 0)
   ret i8* %0
 
-; CHECK:    addu    $[[R0:[0-9]+]], $zero, $ra
+; CHECK:    move  $[[R0:[0-9]+]], $ra
 ; CHECK:    jal
-; CHECK:    addu    $2,  $zero, $[[R0]]
+; CHECK:    move  $2, $[[R0]]
 }
 
 declare i8* @llvm.returnaddress(i32) nounwind readnone
diff --git a/test/CodeGen/Mips/selTBteqzCmpi.ll b/test/CodeGen/Mips/selTBteqzCmpi.ll
new file mode 100644
index 000000000000..9cb8227f9d2b
--- /dev/null
+++ b/test/CodeGen/Mips/selTBteqzCmpi.ll
@@ -0,0 +1,26 @@
+; RUN: llc -march=mipsel -mcpu=mips16 -relocation-model=pic < %s | FileCheck %s -check-prefix=16
+
+@i = global i32 1, align 4
+@j = global i32 2, align 4
+@a = global i32 5, align 4
+@.str = private unnamed_addr constant [8 x i8] c"%i = 2\0A\00", align 1
+@k = common global i32 0, align 4
+
+define void @t() nounwind "target-cpu"="mips16" "target-features"="+mips16,+o32" {
+entry:
+  %0 = load i32* @a, align 4
+  %cmp = icmp eq i32 %0, 10
+  %1 = load i32* @i, align 4
+  %2 = load i32* @j, align 4
+  %cond = select i1 %cmp, i32 %1, i32 %2
+  store i32 %cond, i32* @i, align 4
+  ret void
+}
+
+attributes #0 = { nounwind "target-cpu"="mips16" "target-features"="+mips16,+o32" }
+
+
+; 16:	cmpi	${{[0-9]+}}, 10
+; 16:	bteqz	$BB{{[0-9]+}}_{{[0-9]}}
+
+
diff --git a/test/CodeGen/Mips/selTBtnezCmpi.ll b/test/CodeGen/Mips/selTBtnezCmpi.ll
new file mode 100644
index 000000000000..bd334f59d33b
--- /dev/null
+++ b/test/CodeGen/Mips/selTBtnezCmpi.ll
@@ -0,0 +1,26 @@
+; RUN: llc -march=mipsel -mcpu=mips16 -relocation-model=pic < %s | FileCheck %s -check-prefix=16
+
+@i = global i32 1, align 4
+@j = global i32 2, align 4
+@a = global i32 5, align 4
+@.str = private unnamed_addr constant [8 x i8] c"%i = 1\0A\00", align 1
+@k = common global i32 0, align 4
+
+define void @t() nounwind "target-cpu"="mips16" "target-features"="+mips16,+o32" {
+entry:
+  %0 = load i32* @a, align 4
+  %cmp = icmp ne i32 %0, 10
+  %1 = load i32* @i, align 4
+  %2 = load i32* @j, align 4
+  %cond = select i1 %cmp, i32 %1, i32 %2
+  store i32 %cond, i32* @i, align 4
+  ret void
+}
+
+; 16:	cmpi	${{[0-9]+}}, 10
+; 16:	btnez	$BB{{[0-9]+}}_{{[0-9]}}
+
+
+attributes #0 = { nounwind "target-cpu"="mips16" "target-features"="+mips16,+o32" }
+
+
diff --git a/test/CodeGen/Mips/selTBtnezSlti.ll b/test/CodeGen/Mips/selTBtnezSlti.ll
new file mode 100644
index 000000000000..593f6f274eb3
--- /dev/null
+++ b/test/CodeGen/Mips/selTBtnezSlti.ll
@@ -0,0 +1,25 @@
+; RUN: llc -march=mipsel -mcpu=mips16 -relocation-model=pic < %s | FileCheck %s -check-prefix=16
+
+@i = global i32 1, align 4
+@j = global i32 2, align 4
+@a = global i32 5, align 4
+@.str = private unnamed_addr constant [9 x i8] c"%i = 2 \0A\00", align 1
+@k = common global i32 0, align 4
+
+define void @t() nounwind "target-cpu"="mips16" "target-features"="+mips16,+o32" {
+entry:
+  %0 = load i32* @a, align 4
+  %cmp = icmp slt i32 %0, 10
+  %1 = load i32* @j, align 4
+  %2 = load i32* @i, align 4
+  %cond = select i1 %cmp, i32 %1, i32 %2
+  store i32 %cond, i32* @i, align 4
+  ret void
+}
+
+attributes #0 = { nounwind "target-cpu"="mips16" "target-features"="+mips16,+o32" }
+
+; 16:	slti	${{[0-9]+}}, 10
+; 16:	btnez	$BB{{[0-9]+}}_{{[0-9]}}
+
+
diff --git a/test/CodeGen/Mips/seleq.ll b/test/CodeGen/Mips/seleq.ll
new file mode 100644
index 000000000000..190baad0b1db
--- /dev/null
+++ b/test/CodeGen/Mips/seleq.ll
@@ -0,0 +1,95 @@
+; RUN: llc -march=mipsel -mcpu=mips16 -relocation-model=pic < %s | FileCheck %s -check-prefix=16
+
+@t = global i32 10, align 4
+@f = global i32 199, align 4
+@a = global i32 1, align 4
+@b = global i32 10, align 4
+@c = global i32 1, align 4
+@z1 = common global i32 0, align 4
+@z2 = common global i32 0, align 4
+@z3 = common global i32 0, align 4
+@z4 = common global i32 0, align 4
+
+define void @calc_seleq() nounwind "target-cpu"="mips32" "target-features"="+o32,+mips32" {
+entry:
+  %0 = load i32* @a, align 4
+  %1 = load i32* @b, align 4
+  %cmp = icmp eq i32 %0, %1
+  br i1 %cmp, label %cond.true, label %cond.false
+
+cond.true:                                        ; preds = %entry
+  %2 = load i32* @f, align 4
+  br label %cond.end
+
+cond.false:                                       ; preds = %entry
+  %3 = load i32* @t, align 4
+  br label %cond.end
+
+cond.end:                                         ; preds = %cond.false, %cond.true
+  %cond = phi i32 [ %2, %cond.true ], [ %3, %cond.false ]
+  store i32 %cond, i32* @z1, align 4
+  %4 = load i32* @b, align 4
+  %5 = load i32* @a, align 4
+  %cmp1 = icmp eq i32 %4, %5
+  br i1 %cmp1, label %cond.true2, label %cond.false3
+
+cond.true2:                                       ; preds = %cond.end
+  %6 = load i32* @f, align 4
+  br label %cond.end4
+
+cond.false3:                                      ; preds = %cond.end
+  %7 = load i32* @t, align 4
+  br label %cond.end4
+
+cond.end4:                                        ; preds = %cond.false3, %cond.true2
+  %cond5 = phi i32 [ %6, %cond.true2 ], [ %7, %cond.false3 ]
+  store i32 %cond5, i32* @z2, align 4
+  %8 = load i32* @c, align 4
+  %9 = load i32* @a, align 4
+  %cmp6 = icmp eq i32 %8, %9
+  br i1 %cmp6, label %cond.true7, label %cond.false8
+
+cond.true7:                                       ; preds = %cond.end4
+  %10 = load i32* @t, align 4
+  br label %cond.end9
+
+cond.false8:                                      ; preds = %cond.end4
+  %11 = load i32* @f, align 4
+  br label %cond.end9
+
+cond.end9:                                        ; preds = %cond.false8, %cond.true7
+  %cond10 = phi i32 [ %10, %cond.true7 ], [ %11, %cond.false8 ]
+  store i32 %cond10, i32* @z3, align 4
+  %12 = load i32* @a, align 4
+  %13 = load i32* @c, align 4
+  %cmp11 = icmp eq i32 %12, %13
+  br i1 %cmp11, label %cond.true12, label %cond.false13
+
+cond.true12:                                      ; preds = %cond.end9
+  %14 = load i32* @t, align 4
+  br label %cond.end14
+
+cond.false13:                                     ; preds = %cond.end9
+  %15 = load i32* @f, align 4
+  br label %cond.end14
+
+cond.end14:                                       ; preds = %cond.false13, %cond.true12
+  %cond15 = phi i32 [ %14, %cond.true12 ], [ %15, %cond.false13 ]
+  store i32 %cond15, i32* @z4, align 4
+  ret void
+}
+
+attributes #0 = { nounwind "target-cpu"="mips32" "target-features"="+o32,+mips32" }
+
+; 16:	cmp	${{[0-9]+}}, ${{[0-9]+}}
+; 16:	btnez	$BB{{[0-9]+}}_{{[0-9]}}
+
+; 16:	cmp	${{[0-9]+}}, ${{[0-9]+}}
+; 16:	btnez	$BB{{[0-9]+}}_{{[0-9]}}
+
+; 16:	cmp	${{[0-9]+}}, ${{[0-9]+}}
+; 16:	btnez	$BB{{[0-9]+}}_{{[0-9]}}
+
+; 16:	cmp	${{[0-9]+}}, ${{[0-9]+}}
+; 16:	btnez	$BB{{[0-9]+}}_{{[0-9]}}
+
diff --git a/test/CodeGen/Mips/seleqk.ll b/test/CodeGen/Mips/seleqk.ll
new file mode 100644
index 000000000000..3ca622d5d8fe
--- /dev/null
+++ b/test/CodeGen/Mips/seleqk.ll
@@ -0,0 +1,91 @@
+; RUN: llc -march=mipsel -mcpu=mips16 -relocation-model=pic < %s | FileCheck %s -check-prefix=16
+
+@t = global i32 10, align 4
+@f = global i32 199, align 4
+@a = global i32 1, align 4
+@b = global i32 1000, align 4
+@z1 = common global i32 0, align 4
+@z2 = common global i32 0, align 4
+@z3 = common global i32 0, align 4
+@z4 = common global i32 0, align 4
+@.str = private unnamed_addr constant [5 x i8] c"%i \0A\00", align 1
+
+define void @calc_seleqk() nounwind "target-cpu"="mips16" "target-features"="+mips16,+o32" {
+entry:
+  %0 = load i32* @a, align 4
+  %cmp = icmp eq i32 %0, 1
+  br i1 %cmp, label %cond.true, label %cond.false
+
+cond.true:                                        ; preds = %entry
+  %1 = load i32* @t, align 4
+  br label %cond.end
+
+cond.false:                                       ; preds = %entry
+  %2 = load i32* @f, align 4
+  br label %cond.end
+
+cond.end:                                         ; preds = %cond.false, %cond.true
+  %cond = phi i32 [ %1, %cond.true ], [ %2, %cond.false ]
+  store i32 %cond, i32* @z1, align 4
+  %3 = load i32* @a, align 4
+  %cmp1 = icmp eq i32 %3, 1000
+  br i1 %cmp1, label %cond.true2, label %cond.false3
+
+cond.true2:                                       ; preds = %cond.end
+  %4 = load i32* @f, align 4
+  br label %cond.end4
+
+cond.false3:                                      ; preds = %cond.end
+  %5 = load i32* @t, align 4
+  br label %cond.end4
+
+cond.end4:                                        ; preds = %cond.false3, %cond.true2
+  %cond5 = phi i32 [ %4, %cond.true2 ], [ %5, %cond.false3 ]
+  store i32 %cond5, i32* @z2, align 4
+  %6 = load i32* @b, align 4
+  %cmp6 = icmp eq i32 %6, 3
+  br i1 %cmp6, label %cond.true7, label %cond.false8
+
+cond.true7:                                       ; preds = %cond.end4
+  %7 = load i32* @f, align 4
+  br label %cond.end9
+
+cond.false8:                                      ; preds = %cond.end4
+  %8 = load i32* @t, align 4
+  br label %cond.end9
+
+cond.end9:                                        ; preds = %cond.false8, %cond.true7
+  %cond10 = phi i32 [ %7, %cond.true7 ], [ %8, %cond.false8 ]
+  store i32 %cond10, i32* @z3, align 4
+  %9 = load i32* @b, align 4
+  %cmp11 = icmp eq i32 %9, 1000
+  br i1 %cmp11, label %cond.true12, label %cond.false13
+
+cond.true12:                                      ; preds = %cond.end9
+  %10 = load i32* @t, align 4
+  br label %cond.end14
+
+cond.false13:                                     ; preds = %cond.end9
+  %11 = load i32* @f, align 4
+  br label %cond.end14
+
+cond.end14:                                       ; preds = %cond.false13, %cond.true12
+  %cond15 = phi i32 [ %10, %cond.true12 ], [ %11, %cond.false13 ]
+  store i32 %cond15, i32* @z4, align 4
+  ret void
+}
+
+attributes #0 = { nounwind "target-cpu"="mips16" "target-features"="+mips16,+o32" }
+attributes #1 = { "target-cpu"="mips16" "target-features"="+mips16,+o32" }
+
+; 16:	cmpi	${{[0-9]+}}, 1 	# 16 bit inst
+; 16:	btnez	$BB{{[0-9]+}}_{{[0-9]}}
+
+; 16:	cmpi	${{[0-9]+}}, 1000
+; 16:	btnez	$BB{{[0-9]+}}_{{[0-9]}}
+
+; 16:	cmpi	${{[0-9]+}}, 3 	# 16 bit inst
+; 16:	btnez	$BB{{[0-9]+}}_{{[0-9]}}
+
+; 16:	cmpi	${{[0-9]+}}, 1000
+; 16:	btnez	$BB{{[0-9]+}}_{{[0-9]}}
diff --git a/test/CodeGen/Mips/selgek.ll b/test/CodeGen/Mips/selgek.ll
new file mode 100644
index 000000000000..8ab4046e92cb
--- /dev/null
+++ b/test/CodeGen/Mips/selgek.ll
@@ -0,0 +1,94 @@
+; RUN: llc -march=mipsel -mcpu=mips16 -relocation-model=pic < %s | FileCheck %s -check-prefix=16
+
+@t = global i32 10, align 4
+@f = global i32 199, align 4
+@a = global i32 2, align 4
+@b = global i32 1000, align 4
+@c = global i32 2, align 4
+@z1 = common global i32 0, align 4
+@z2 = common global i32 0, align 4
+@z3 = common global i32 0, align 4
+@z4 = common global i32 0, align 4
+@.str = private unnamed_addr constant [5 x i8] c"%i \0A\00", align 1
+
+define void @calc_z() nounwind "target-cpu"="mips16" "target-features"="+mips16,+o32" {
+entry:
+  %0 = load i32* @a, align 4
+  %cmp = icmp sge i32 %0, 1000
+  br i1 %cmp, label %cond.true, label %cond.false
+
+cond.true:                                        ; preds = %entry
+  %1 = load i32* @f, align 4
+  br label %cond.end
+
+cond.false:                                       ; preds = %entry
+  %2 = load i32* @t, align 4
+  br label %cond.end
+
+cond.end:                                         ; preds = %cond.false, %cond.true
+  %cond = phi i32 [ %1, %cond.true ], [ %2, %cond.false ]
+  store i32 %cond, i32* @z1, align 4
+  %3 = load i32* @b, align 4
+  %cmp1 = icmp sge i32 %3, 1
+  br i1 %cmp1, label %cond.true2, label %cond.false3
+
+cond.true2:                                       ; preds = %cond.end
+  %4 = load i32* @t, align 4
+  br label %cond.end4
+
+cond.false3:                                      ; preds = %cond.end
+  %5 = load i32* @f, align 4
+  br label %cond.end4
+
+cond.end4:                                        ; preds = %cond.false3, %cond.true2
+  %cond5 = phi i32 [ %4, %cond.true2 ], [ %5, %cond.false3 ]
+  store i32 %cond5, i32* @z2, align 4
+  %6 = load i32* @c, align 4
+  %cmp6 = icmp sge i32 %6, 2
+  br i1 %cmp6, label %cond.true7, label %cond.false8
+
+cond.true7:                                       ; preds = %cond.end4
+  %7 = load i32* @t, align 4
+  br label %cond.end9
+
+cond.false8:                                      ; preds = %cond.end4
+  %8 = load i32* @f, align 4
+  br label %cond.end9
+
+cond.end9:                                        ; preds = %cond.false8, %cond.true7
+  %cond10 = phi i32 [ %7, %cond.true7 ], [ %8, %cond.false8 ]
+  store i32 %cond10, i32* @z3, align 4
+  %9 = load i32* @a, align 4
+  %cmp11 = icmp sge i32 %9, 2
+  br i1 %cmp11, label %cond.true12, label %cond.false13
+
+cond.true12:                                      ; preds = %cond.end9
+  %10 = load i32* @t, align 4
+  br label %cond.end14
+
+cond.false13:                                     ; preds = %cond.end9
+  %11 = load i32* @f, align 4
+  br label %cond.end14
+
+cond.end14:                                       ; preds = %cond.false13, %cond.true12
+  %cond15 = phi i32 [ %10, %cond.true12 ], [ %11, %cond.false13 ]
+  store i32 %cond15, i32* @z4, align 4
+  ret void
+}
+
+attributes #0 = { nounwind "target-cpu"="mips16" "target-features"="+mips16,+o32" }
+attributes #1 = { "target-cpu"="mips16" "target-features"="+mips16,+o32" }
+
+; 16:	slti	${{[0-9]+}}, 1000
+; 16:	btnez	$BB{{[0-9]+}}_{{[0-9]}}
+
+; 16:	slti	${{[0-9]+}}, 1 	# 16 bit inst
+; 16:	btnez	$BB{{[0-9]+}}_{{[0-9]}}
+
+; 16:	slti	${{[0-9]+}}, 2 	# 16 bit inst
+; 16:	btnez	$BB{{[0-9]+}}_{{[0-9]}}
+
+; 16:	slti	${{[0-9]+}}, 2 	# 16 bit inst
+; 16:	btnez	$BB{{[0-9]+}}_{{[0-9]}}
+
+
diff --git a/test/CodeGen/Mips/selgt.ll b/test/CodeGen/Mips/selgt.ll
new file mode 100644
index 000000000000..67b9b498709b
--- /dev/null
+++ b/test/CodeGen/Mips/selgt.ll
@@ -0,0 +1,98 @@
+; RUN: llc -march=mipsel -mcpu=mips16 -relocation-model=pic < %s | FileCheck %s -check-prefix=16
+
+@t = global i32 10, align 4
+@f = global i32 199, align 4
+@a = global i32 1, align 4
+@b = global i32 10, align 4
+@c = global i32 1, align 4
+@z1 = common global i32 0, align 4
+@z2 = common global i32 0, align 4
+@z3 = common global i32 0, align 4
+@z4 = common global i32 0, align 4
+@.str = private unnamed_addr constant [9 x i8] c"%i = %i\0A\00", align 1
+
+define i32 @calc_z() nounwind "target-cpu"="mips16" "target-features"="+mips16,+o32" {
+entry:
+  %retval = alloca i32, align 4
+  %0 = load i32* @a, align 4
+  %1 = load i32* @b, align 4
+  %cmp = icmp sgt i32 %0, %1
+  br i1 %cmp, label %cond.true, label %cond.false
+
+cond.true:                                        ; preds = %entry
+  %2 = load i32* @f, align 4
+  br label %cond.end
+
+cond.false:                                       ; preds = %entry
+  %3 = load i32* @t, align 4
+  br label %cond.end
+
+cond.end:                                         ; preds = %cond.false, %cond.true
+  %cond = phi i32 [ %2, %cond.true ], [ %3, %cond.false ]
+  store i32 %cond, i32* @z1, align 4
+  %4 = load i32* @b, align 4
+  %5 = load i32* @a, align 4
+  %cmp1 = icmp sgt i32 %4, %5
+  br i1 %cmp1, label %cond.true2, label %cond.false3
+
+cond.true2:                                       ; preds = %cond.end
+  %6 = load i32* @t, align 4
+  br label %cond.end4
+
+cond.false3:                                      ; preds = %cond.end
+  %7 = load i32* @f, align 4
+  br label %cond.end4
+
+cond.end4:                                        ; preds = %cond.false3, %cond.true2
+  %cond5 = phi i32 [ %6, %cond.true2 ], [ %7, %cond.false3 ]
+  store i32 %cond5, i32* @z2, align 4
+  %8 = load i32* @c, align 4
+  %9 = load i32* @a, align 4
+  %cmp6 = icmp sgt i32 %8, %9
+  br i1 %cmp6, label %cond.true7, label %cond.false8
+
+cond.true7:                                       ; preds = %cond.end4
+  %10 = load i32* @f, align 4
+  br label %cond.end9
+
+cond.false8:                                      ; preds = %cond.end4
+  %11 = load i32* @t, align 4
+  br label %cond.end9
+
+cond.end9:                                        ; preds = %cond.false8, %cond.true7
+  %cond10 = phi i32 [ %10, %cond.true7 ], [ %11, %cond.false8 ]
+  store i32 %cond10, i32* @z3, align 4
+  %12 = load i32* @a, align 4
+  %13 = load i32* @c, align 4
+  %cmp11 = icmp sgt i32 %12, %13
+  br i1 %cmp11, label %cond.true12, label %cond.false13
+
+cond.true12:                                      ; preds = %cond.end9
+  %14 = load i32* @f, align 4
+  br label %cond.end14
+
+cond.false13:                                     ; preds = %cond.end9
+  %15 = load i32* @t, align 4
+  br label %cond.end14
+
+cond.end14:                                       ; preds = %cond.false13, %cond.true12
+  %cond15 = phi i32 [ %14, %cond.true12 ], [ %15, %cond.false13 ]
+  store i32 %cond15, i32* @z4, align 4
+  %16 = load i32* %retval
+  ret i32 %16
+}
+
+; 16:	slt	${{[0-9]+}}, ${{[0-9]+}}
+; 16:	bteqz	$BB{{[0-9]+}}_{{[0-9]}}
+
+; 16:	slt	${{[0-9]+}}, ${{[0-9]+}}
+; 16:	bteqz	$BB{{[0-9]+}}_{{[0-9]}}
+
+; 16:	slt	${{[0-9]+}}, ${{[0-9]+}}
+; 16:	bteqz	$BB{{[0-9]+}}_{{[0-9]}}
+
+; 16:	slt	${{[0-9]+}}, ${{[0-9]+}}
+; 16:	bteqz	$BB{{[0-9]+}}_{{[0-9]}}
+
+attributes #0 = { nounwind "target-cpu"="mips16" "target-features"="+mips16,+o32" }
+attributes #1 = { "target-cpu"="mips16" "target-features"="+mips16,+o32" }
diff --git a/test/CodeGen/Mips/selle.ll b/test/CodeGen/Mips/selle.ll
new file mode 100644
index 000000000000..b27df45e6739
--- /dev/null
+++ b/test/CodeGen/Mips/selle.ll
@@ -0,0 +1,96 @@
+; RUN: llc -march=mipsel -mcpu=mips16 -relocation-model=pic < %s | FileCheck %s -check-prefix=16
+
+@t = global i32 10, align 4
+@f = global i32 199, align 4
+@a = global i32 1, align 4
+@b = global i32 10, align 4
+@c = global i32 1, align 4
+@z1 = common global i32 0, align 4
+@z2 = common global i32 0, align 4
+@z3 = common global i32 0, align 4
+@z4 = common global i32 0, align 4
+@.str = private unnamed_addr constant [5 x i8] c"%i \0A\00", align 1
+
+define void @calc_z() nounwind "target-cpu"="mips16" "target-features"="+mips16,+o32" {
+entry:
+  %0 = load i32* @a, align 4
+  %1 = load i32* @b, align 4
+  %cmp = icmp sle i32 %0, %1
+  br i1 %cmp, label %cond.true, label %cond.false
+
+cond.true:                                        ; preds = %entry
+  %2 = load i32* @t, align 4
+  br label %cond.end
+
+cond.false:                                       ; preds = %entry
+  %3 = load i32* @f, align 4
+  br label %cond.end
+
+cond.end:                                         ; preds = %cond.false, %cond.true
+  %cond = phi i32 [ %2, %cond.true ], [ %3, %cond.false ]
+  store i32 %cond, i32* @z1, align 4
+  %4 = load i32* @b, align 4
+  %5 = load i32* @a, align 4
+  %cmp1 = icmp sle i32 %4, %5
+  br i1 %cmp1, label %cond.true2, label %cond.false3
+
+cond.true2:                                       ; preds = %cond.end
+  %6 = load i32* @f, align 4
+  br label %cond.end4
+
+cond.false3:                                      ; preds = %cond.end
+  %7 = load i32* @t, align 4
+  br label %cond.end4
+
+cond.end4:                                        ; preds = %cond.false3, %cond.true2
+  %cond5 = phi i32 [ %6, %cond.true2 ], [ %7, %cond.false3 ]
+  store i32 %cond5, i32* @z2, align 4
+  %8 = load i32* @c, align 4
+  %9 = load i32* @a, align 4
+  %cmp6 = icmp sle i32 %8, %9
+  br i1 %cmp6, label %cond.true7, label %cond.false8
+
+cond.true7:                                       ; preds = %cond.end4
+  %10 = load i32* @t, align 4
+  br label %cond.end9
+
+cond.false8:                                      ; preds = %cond.end4
+  %11 = load i32* @f, align 4
+  br label %cond.end9
+
+cond.end9:                                        ; preds = %cond.false8, %cond.true7
+  %cond10 = phi i32 [ %10, %cond.true7 ], [ %11, %cond.false8 ]
+  store i32 %cond10, i32* @z3, align 4
+  %12 = load i32* @a, align 4
+  %13 = load i32* @c, align 4
+  %cmp11 = icmp sle i32 %12, %13
+  br i1 %cmp11, label %cond.true12, label %cond.false13
+
+cond.true12:                                      ; preds = %cond.end9
+  %14 = load i32* @t, align 4
+  br label %cond.end14
+
+cond.false13:                                     ; preds = %cond.end9
+  %15 = load i32* @f, align 4
+  br label %cond.end14
+
+cond.end14:                                       ; preds = %cond.false13, %cond.true12
+  %cond15 = phi i32 [ %14, %cond.true12 ], [ %15, %cond.false13 ]
+  store i32 %cond15, i32* @z4, align 4
+  ret void
+}
+
+; 16:	slt	${{[0-9]+}}, ${{[0-9]+}}
+; 16:	btnez	$BB{{[0-9]+}}_{{[0-9]}}
+
+; 16:	slt	${{[0-9]+}}, ${{[0-9]+}}
+; 16:	btnez	$BB{{[0-9]+}}_{{[0-9]}}
+
+; 16:	slt	${{[0-9]+}}, ${{[0-9]+}}
+; 16:	btnez	$BB{{[0-9]+}}_{{[0-9]}}
+
+; 16:	slt	${{[0-9]+}}, ${{[0-9]+}}
+; 16:	btnez	$BB{{[0-9]+}}_{{[0-9]}}
+
+attributes #0 = { nounwind "target-cpu"="mips16" "target-features"="+mips16,+o32" }
+attributes #1 = { "target-cpu"="mips16" "target-features"="+mips16,+o32" }
diff --git a/test/CodeGen/Mips/selltk.ll b/test/CodeGen/Mips/selltk.ll
new file mode 100644
index 000000000000..1471b892c92a
--- /dev/null
+++ b/test/CodeGen/Mips/selltk.ll
@@ -0,0 +1,93 @@
+; RUN: llc -march=mipsel -mcpu=mips16 -relocation-model=pic < %s | FileCheck %s -check-prefix=16
+
+@t = global i32 10, align 4
+@f = global i32 199, align 4
+@a = global i32 2, align 4
+@b = global i32 1000, align 4
+@c = global i32 2, align 4
+@z1 = common global i32 0, align 4
+@z2 = common global i32 0, align 4
+@z3 = common global i32 0, align 4
+@z4 = common global i32 0, align 4
+@.str = private unnamed_addr constant [5 x i8] c"%i \0A\00", align 1
+
+define void @calc_selltk() nounwind "target-cpu"="mips16" "target-features"="+mips16,+o32" {
+entry:
+  %0 = load i32* @a, align 4
+  %cmp = icmp slt i32 %0, 1000
+  br i1 %cmp, label %cond.true, label %cond.false
+
+cond.true:                                        ; preds = %entry
+  %1 = load i32* @t, align 4
+  br label %cond.end
+
+cond.false:                                       ; preds = %entry
+  %2 = load i32* @f, align 4
+  br label %cond.end
+
+cond.end:                                         ; preds = %cond.false, %cond.true
+  %cond = phi i32 [ %1, %cond.true ], [ %2, %cond.false ]
+  store i32 %cond, i32* @z1, align 4
+  %3 = load i32* @b, align 4
+  %cmp1 = icmp slt i32 %3, 2
+  br i1 %cmp1, label %cond.true2, label %cond.false3
+
+cond.true2:                                       ; preds = %cond.end
+  %4 = load i32* @f, align 4
+  br label %cond.end4
+
+cond.false3:                                      ; preds = %cond.end
+  %5 = load i32* @t, align 4
+  br label %cond.end4
+
+cond.end4:                                        ; preds = %cond.false3, %cond.true2
+  %cond5 = phi i32 [ %4, %cond.true2 ], [ %5, %cond.false3 ]
+  store i32 %cond5, i32* @z2, align 4
+  %6 = load i32* @c, align 4
+  %cmp6 = icmp sgt i32 %6, 2
+  br i1 %cmp6, label %cond.true7, label %cond.false8
+
+cond.true7:                                       ; preds = %cond.end4
+  %7 = load i32* @f, align 4
+  br label %cond.end9
+
+cond.false8:                                      ; preds = %cond.end4
+  %8 = load i32* @t, align 4
+  br label %cond.end9
+
+cond.end9:                                        ; preds = %cond.false8, %cond.true7
+  %cond10 = phi i32 [ %7, %cond.true7 ], [ %8, %cond.false8 ]
+  store i32 %cond10, i32* @z3, align 4
+  %9 = load i32* @a, align 4
+  %cmp11 = icmp sgt i32 %9, 2
+  br i1 %cmp11, label %cond.true12, label %cond.false13
+
+cond.true12:                                      ; preds = %cond.end9
+  %10 = load i32* @f, align 4
+  br label %cond.end14
+
+cond.false13:                                     ; preds = %cond.end9
+  %11 = load i32* @t, align 4
+  br label %cond.end14
+
+cond.end14:                                       ; preds = %cond.false13, %cond.true12
+  %cond15 = phi i32 [ %10, %cond.true12 ], [ %11, %cond.false13 ]
+  store i32 %cond15, i32* @z4, align 4
+  ret void
+}
+
+attributes #0 = { nounwind "target-cpu"="mips16" "target-features"="+mips16,+o32" }
+attributes #1 = { "target-cpu"="mips16" "target-features"="+mips16,+o32" }
+
+; 16:	slt	${{[0-9]+}}, ${{[0-9]+}}
+; 16:	btnez	$BB{{[0-9]+}}_{{[0-9]}}
+
+; 16:	slt	${{[0-9]+}}, ${{[0-9]+}}
+; 16:	btnez	$BB{{[0-9]+}}_{{[0-9]}}
+
+; 16:	slti	${{[0-9]+}}, 3 	# 16 bit inst
+; 16:	btnez	$BB{{[0-9]+}}_{{[0-9]}}
+
+; 16:	slti	${{[0-9]+}}, 3 	# 16 bit inst
+; 16:	btnez	$BB{{[0-9]+}}_{{[0-9]}}
+
diff --git a/test/CodeGen/Mips/selne.ll b/test/CodeGen/Mips/selne.ll
new file mode 100644
index 000000000000..e3d82b8cf5d0
--- /dev/null
+++ b/test/CodeGen/Mips/selne.ll
@@ -0,0 +1,97 @@
+; RUN: llc -march=mipsel -mcpu=mips16 -relocation-model=pic < %s | FileCheck %s -check-prefix=16
+
+@t = global i32 10, align 4
+@f = global i32 199, align 4
+@a = global i32 1, align 4
+@b = global i32 10, align 4
+@c = global i32 1, align 4
+@z1 = common global i32 0, align 4
+@z2 = common global i32 0, align 4
+@z3 = common global i32 0, align 4
+@z4 = common global i32 0, align 4
+@.str = private unnamed_addr constant [5 x i8] c"%i \0A\00", align 1
+
+define void @calc_seleq() nounwind "target-cpu"="mips16" "target-features"="+mips16,+o32" {
+entry:
+  %0 = load i32* @a, align 4
+  %1 = load i32* @b, align 4
+  %cmp = icmp ne i32 %0, %1
+  br i1 %cmp, label %cond.true, label %cond.false
+
+cond.true:                                        ; preds = %entry
+  %2 = load i32* @f, align 4
+  br label %cond.end
+
+cond.false:                                       ; preds = %entry
+  %3 = load i32* @t, align 4
+  br label %cond.end
+
+cond.end:                                         ; preds = %cond.false, %cond.true
+  %cond = phi i32 [ %2, %cond.true ], [ %3, %cond.false ]
+  store i32 %cond, i32* @z1, align 4
+  %4 = load i32* @b, align 4
+  %5 = load i32* @a, align 4
+  %cmp1 = icmp ne i32 %4, %5
+  br i1 %cmp1, label %cond.true2, label %cond.false3
+
+cond.true2:                                       ; preds = %cond.end
+  %6 = load i32* @f, align 4
+  br label %cond.end4
+
+cond.false3:                                      ; preds = %cond.end
+  %7 = load i32* @t, align 4
+  br label %cond.end4
+
+cond.end4:                                        ; preds = %cond.false3, %cond.true2
+  %cond5 = phi i32 [ %6, %cond.true2 ], [ %7, %cond.false3 ]
+  store i32 %cond5, i32* @z2, align 4
+  %8 = load i32* @c, align 4
+  %9 = load i32* @a, align 4
+  %cmp6 = icmp ne i32 %8, %9
+  br i1 %cmp6, label %cond.true7, label %cond.false8
+
+cond.true7:                                       ; preds = %cond.end4
+  %10 = load i32* @t, align 4
+  br label %cond.end9
+
+cond.false8:                                      ; preds = %cond.end4
+  %11 = load i32* @f, align 4
+  br label %cond.end9
+
+cond.end9:                                        ; preds = %cond.false8, %cond.true7
+  %cond10 = phi i32 [ %10, %cond.true7 ], [ %11, %cond.false8 ]
+  store i32 %cond10, i32* @z3, align 4
+  %12 = load i32* @a, align 4
+  %13 = load i32* @c, align 4
+  %cmp11 = icmp ne i32 %12, %13
+  br i1 %cmp11, label %cond.true12, label %cond.false13
+
+cond.true12:                                      ; preds = %cond.end9
+  %14 = load i32* @t, align 4
+  br label %cond.end14
+
+cond.false13:                                     ; preds = %cond.end9
+  %15 = load i32* @f, align 4
+  br label %cond.end14
+
+cond.end14:                                       ; preds = %cond.false13, %cond.true12
+  %cond15 = phi i32 [ %14, %cond.true12 ], [ %15, %cond.false13 ]
+  store i32 %cond15, i32* @z4, align 4
+  ret void
+}
+
+attributes #0 = { nounwind "target-cpu"="mips16" "target-features"="+mips16,+o32" }
+attributes #1 = { "target-cpu"="mips16" "target-features"="+mips16,+o32" }
+
+; 16:	cmp	${{[0-9]+}}, ${{[0-9]+}}
+; 16:	bteqz	$BB{{[0-9]+}}_{{[0-9]}}
+
+; 16:	cmp	${{[0-9]+}}, ${{[0-9]+}}
+; 16:	bteqz	$BB{{[0-9]+}}_{{[0-9]}}
+
+; 16:	cmp	${{[0-9]+}}, ${{[0-9]+}}
+; 16:	bteqz	$BB{{[0-9]+}}_{{[0-9]}}
+
+; 16:	cmp	${{[0-9]+}}, ${{[0-9]+}}
+; 16:	bteqz	$BB{{[0-9]+}}_{{[0-9]}}
+
diff --git a/test/CodeGen/Mips/selnek.ll b/test/CodeGen/Mips/selnek.ll
new file mode 100644
index 000000000000..26015523106d
--- /dev/null
+++ b/test/CodeGen/Mips/selnek.ll
@@ -0,0 +1,107 @@
+; RUN: llc -march=mipsel -mcpu=mips16 -relocation-model=pic < %s | FileCheck %s -check-prefix=16
+
+@t = global i32 10, align 4
+@f = global i32 199, align 4
+@a = global i32 1, align 4
+@b = global i32 1000, align 4
+@z1 = common global i32 0, align 4
+@z2 = common global i32 0, align 4
+@z3 = common global i32 0, align 4
+@z4 = common global i32 0, align 4
+@.str = private unnamed_addr constant [5 x i8] c"%i \0A\00", align 1
+
+define void @calc_z() nounwind "target-cpu"="mips16" "target-features"="+mips16,+o32" {
+entry:
+  %0 = load i32* @a, align 4
+  %cmp = icmp ne i32 %0, 1
+  br i1 %cmp, label %cond.true, label %cond.false
+
+cond.true:                                        ; preds = %entry
+  %1 = load i32* @f, align 4
+  br label %cond.end
+
+cond.false:                                       ; preds = %entry
+  %2 = load i32* @t, align 4
+  br label %cond.end
+
+cond.end:                                         ; preds = %cond.false, %cond.true
+  %cond = phi i32 [ %1, %cond.true ], [ %2, %cond.false ]
+  store i32 %cond, i32* @z1, align 4
+  %3 = load i32* @a, align 4
+  %cmp1 = icmp ne i32 %3, 1000
+  br i1 %cmp1, label %cond.true2, label %cond.false3
+
+cond.true2:                                       ; preds = %cond.end
+  %4 = load i32* @t, align 4
+  br label %cond.end4
+
+cond.false3:                                      ; preds = %cond.end
+  %5 = load i32* @f, align 4
+  br label %cond.end4
+
+cond.end4:                                        ; preds = %cond.false3, %cond.true2
+  %cond5 = phi i32 [ %4, %cond.true2 ], [ %5, %cond.false3 ]
+  store i32 %cond5, i32* @z2, align 4
+  %6 = load i32* @b, align 4
+  %cmp6 = icmp ne i32 %6, 3
+  br i1 %cmp6, label %cond.true7, label %cond.false8
+
+cond.true7:                                       ; preds = %cond.end4
+  %7 = load i32* @t, align 4
+  br label %cond.end9
+
+cond.false8:                                      ; preds = %cond.end4
+  %8 = load i32* @f, align 4
+  br label %cond.end9
+
+cond.end9:                                        ; preds = %cond.false8, %cond.true7
+  %cond10 = phi i32 [ %7, %cond.true7 ], [ %8, %cond.false8 ]
+  store i32 %cond10, i32* @z3, align 4
+  %9 = load i32* @b, align 4
+  %cmp11 = icmp ne i32 %9, 1000
+  br i1 %cmp11, label %cond.true12, label %cond.false13
+
+cond.true12:                                      ; preds = %cond.end9
+  %10 = load i32* @f, align 4
+  br label %cond.end14
+
+cond.false13:                                     ; preds = %cond.end9
+  %11 = load i32* @t, align 4
+  br label %cond.end14
+
+cond.end14:                                       ; preds = %cond.false13, %cond.true12
+  %cond15 = phi i32 [ %10, %cond.true12 ], [ %11, %cond.false13 ]
+  store i32 %cond15, i32* @z4, align 4
+  ret void
+}
+
+define i32 @main() nounwind "target-cpu"="mips16" "target-features"="+mips16,+o32" {
+entry:
+  call void @calc_z() "target-cpu"="mips16" "target-features"="+mips16,+o32"
+  %0 = load i32* @z1, align 4
+  %call = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([5 x i8]* @.str, i32 0, i32 0), i32 %0) "target-cpu"="mips16" "target-features"="+mips16,+o32"
+  %1 = load i32* @z2, align 4
+  %call1 = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([5 x i8]* @.str, i32 0, i32 0), i32 %1) "target-cpu"="mips16" "target-features"="+mips16,+o32"
+  %2 = load i32* @z3, align 4
+  %call2 = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([5 x i8]* @.str, i32 0, i32 0), i32 %2) "target-cpu"="mips16" "target-features"="+mips16,+o32"
+  %3 = load i32* @z4, align 4
+  %call3 = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([5 x i8]* @.str, i32 0, i32 0), i32 %3) "target-cpu"="mips16" "target-features"="+mips16,+o32"
+  ret i32 0
+}
+
+declare i32 @printf(i8*, ...) "target-cpu"="mips16" "target-features"="+mips16,+o32"
+
+attributes #0 = { nounwind "target-cpu"="mips16" "target-features"="+mips16,+o32" }
+attributes #1 = { "target-cpu"="mips16" "target-features"="+mips16,+o32" }
+
+; 16:	cmpi	${{[0-9]+}}, 1 	# 16 bit inst
+; 16:	bteqz	$BB{{[0-9]+}}_{{[0-9]}}
+
+; 16:	cmpi	${{[0-9]+}}, 1000
+; 16:	bteqz	$BB{{[0-9]+}}_{{[0-9]}}
+
+; 16:	cmpi	${{[0-9]+}}, 3 	# 16 bit inst
+; 16:	bteqz	$BB{{[0-9]+}}_{{[0-9]}}
+
+; 16:	cmpi	${{[0-9]+}}, 1000
+; 16:	bteqz	$BB{{[0-9]+}}_{{[0-9]}}
+\ No newline at end of file
diff --git a/test/CodeGen/Mips/selpat.ll b/test/CodeGen/Mips/selpat.ll
index cda0c96ef4be..8eda8de45e08 100644
--- a/test/CodeGen/Mips/selpat.ll
+++ b/test/CodeGen/Mips/selpat.ll
@@ -20,7 +20,7 @@ entry:
   %cond = select i1 %cmp, i32 %2, i32 %3
   store i32 %cond, i32* @z1, align 4
 ; 16:	cmp	${{[0-9]+}}, ${{[0-9]+}}
-; 16:	bteqz	.+4
+; 16:	bteqz	$BB{{[0-9]+}}_{{[0-9]}}
 ; 16: 	move    ${{[0-9]+}}, ${{[0-9]+}}
   store i32 %cond, i32* @z2, align 4
   %4 = load i32* @c, align 4
@@ -41,7 +41,7 @@ entry:
   %cond = select i1 %cmp, i32 %1, i32 %2
   store i32 %cond, i32* @z1, align 4
 ; 16:	cmpi	${{[0-9]+}}, 1
-; 16:	bteqz	.+4
+; 16:	bteqz	$BB{{[0-9]+}}_{{[0-9]}}
 ; 16: 	move    ${{[0-9]+}}, ${{[0-9]+}}
   %cmp1 = icmp eq i32 %0, 10
   %cond5 = select i1 %cmp1, i32 %2, i32 %1
@@ -51,7 +51,7 @@ entry:
   %cond10 = select i1 %cmp6, i32 %2, i32 %1
   store i32 %cond10, i32* @z3, align 4
 ; 16:	cmpi	${{[0-9]+}}, 10
-; 16:	bteqz	.+4
+; 16:	bteqz	$BB{{[0-9]+}}_{{[0-9]}}
 ; 16: 	move    ${{[0-9]+}}, ${{[0-9]+}}
   %cmp11 = icmp eq i32 %3, 10
   %cond15 = select i1 %cmp11, i32 %1, i32 %2
@@ -67,7 +67,7 @@ entry:
   %2 = load i32* @f, align 4
   %cond = select i1 %cmp, i32 %1, i32 %2
   store i32 %cond, i32* @z1, align 4
-; 16:	beqz	${{[0-9]+}}, .+4
+; 16:	beqz	${{[0-9]+}}, $BB{{[0-9]+}}_{{[0-9]}}
 ; 16: 	move    ${{[0-9]+}}, ${{[0-9]+}}
   %3 = load i32* @b, align 4
   %cmp1 = icmp eq i32 %3, 0
@@ -91,7 +91,7 @@ entry:
   %cond = select i1 %cmp, i32 %2, i32 %3
   store i32 %cond, i32* @z1, align 4
 ; 16:	slt	${{[0-9]+}}, ${{[0-9]+}}
-; 16:	bteqz	.+4
+; 16:	bteqz	$BB{{[0-9]+}}_{{[0-9]}}
 ; 16: 	move    ${{[0-9]+}}, ${{[0-9]+}}
   %cmp1 = icmp sge i32 %1, %0
   %cond5 = select i1 %cmp1, i32 %3, i32 %2
@@ -112,7 +112,7 @@ entry:
   %1 = load i32* @b, align 4
   %cmp = icmp sgt i32 %0, %1
 ; 16:	slt	${{[0-9]+}}, ${{[0-9]+}}
-; 16:	btnez	.+4
+; 16:	btnez	$BB{{[0-9]+}}_{{[0-9]}}
 ; 16: 	move    ${{[0-9]+}}, ${{[0-9]+}}
   %2 = load i32* @f, align 4
   %3 = load i32* @t, align 4
@@ -141,7 +141,7 @@ entry:
   %cond = select i1 %cmp, i32 %2, i32 %3
   store i32 %cond, i32* @z1, align 4
 ; 16:	slt	${{[0-9]+}}, ${{[0-9]+}}
-; 16:	bteqz	.+4
+; 16:	bteqz	$BB{{[0-9]+}}_{{[0-9]}}
 ; 16: 	move    ${{[0-9]+}}, ${{[0-9]+}}
   %cmp1 = icmp sle i32 %1, %0
   %cond5 = select i1 %cmp1, i32 %3, i32 %2
@@ -165,7 +165,7 @@ entry:
   %cond = select i1 %cmp, i32 %1, i32 %2
   store i32 %cond, i32* @z1, align 4
 ; 16:	slti	${{[0-9]+}}, {{[0-9]+}}
-; 16:	btnez	.+4
+; 16:	btnez	$BB{{[0-9]+}}_{{[0-9]}}
 ; 16: 	move    ${{[0-9]+}}, ${{[0-9]+}}
   %3 = load i32* @b, align 4
   %cmp1 = icmp slt i32 %3, 2
@@ -192,7 +192,7 @@ entry:
   %cond = select i1 %cmp, i32 %2, i32 %3
   store i32 %cond, i32* @z1, align 4
 ; 16:	cmp	${{[0-9]+}}, ${{[0-9]+}}
-; 16:	btnez	.+4
+; 16:	btnez	$BB{{[0-9]+}}_{{[0-9]}}
 ; 16: 	move    ${{[0-9]+}}, ${{[0-9]+}}
   store i32 %cond, i32* @z2, align 4
   %4 = load i32* @c, align 4
@@ -212,7 +212,7 @@ entry:
   %cond = select i1 %cmp, i32 %1, i32 %2
   store i32 %cond, i32* @z1, align 4
 ; 16:	cmpi	${{[0-9]+}}, 1
-; 16:	btnez	.+4
+; 16:	btnez	$BB{{[0-9]+}}_{{[0-9]}}
 ; 16: 	move    ${{[0-9]+}}, ${{[0-9]+}}
   %cmp1 = icmp ne i32 %0, 10
   %cond5 = select i1 %cmp1, i32 %2, i32 %1
@@ -222,7 +222,7 @@ entry:
   %cond10 = select i1 %cmp6, i32 %2, i32 %1
   store i32 %cond10, i32* @z3, align 4
 ; 16:	cmpi	${{[0-9]+}}, 10
-; 16:	btnez	.+4
+; 16:	btnez	$BB{{[0-9]+}}_{{[0-9]}}
 ; 16: 	move    ${{[0-9]+}}, ${{[0-9]+}}
   %cmp11 = icmp ne i32 %3, 10
   %cond15 = select i1 %cmp11, i32 %1, i32 %2
@@ -238,7 +238,7 @@ entry:
   %2 = load i32* @t, align 4
   %cond = select i1 %cmp, i32 %1, i32 %2
   store i32 %cond, i32* @z1, align 4
-; 16:	bnez	${{[0-9]+}}, .+4
+; 16:	bnez	${{[0-9]+}}, $BB{{[0-9]+}}_{{[0-9]}}
 ; 16: 	move    ${{[0-9]+}}, ${{[0-9]+}}
   %3 = load i32* @b, align 4
   %cmp1 = icmp ne i32 %3, 0
@@ -260,7 +260,7 @@ entry:
   %2 = load i32* @t, align 4
   %cond = select i1 %tobool, i32 %1, i32 %2
   store i32 %cond, i32* @z1, align 4
-; 16:	bnez	${{[0-9]+}}, .+4
+; 16:	bnez	${{[0-9]+}}, $BB{{[0-9]+}}_{{[0-9]}}
 ; 16: 	move    ${{[0-9]+}}, ${{[0-9]+}}
   %3 = load i32* @b, align 4
   %tobool1 = icmp ne i32 %3, 0
@@ -284,7 +284,7 @@ entry:
   %cond = select i1 %cmp, i32 %2, i32 %3
   store i32 %cond, i32* @z1, align 4
 ; 16:	sltu	${{[0-9]+}}, ${{[0-9]+}}
-; 16:	bteqz	.+4
+; 16:	bteqz	$BB{{[0-9]+}}_{{[0-9]}}
 ; 16: 	move    ${{[0-9]+}}, ${{[0-9]+}}
   %cmp1 = icmp uge i32 %1, %0
   %cond5 = select i1 %cmp1, i32 %3, i32 %2
@@ -309,7 +309,7 @@ entry:
   %cond = select i1 %cmp, i32 %2, i32 %3
   store i32 %cond, i32* @z1, align 4
 ; 16:	sltu	${{[0-9]+}}, ${{[0-9]+}}
-; 16:	btnez	.+4
+; 16:	btnez	$BB{{[0-9]+}}_{{[0-9]}}
 ; 16: 	move    ${{[0-9]+}}, ${{[0-9]+}}
   %cmp1 = icmp ugt i32 %1, %0
   %cond5 = select i1 %cmp1, i32 %3, i32 %2
@@ -334,7 +334,7 @@ entry:
   %cond = select i1 %cmp, i32 %2, i32 %3
   store i32 %cond, i32* @z1, align 4
 ; 16:	sltu	${{[0-9]+}}, ${{[0-9]+}}
-; 16:	bteqz	.+4
+; 16:	bteqz	$BB{{[0-9]+}}_{{[0-9]}}
 ; 16: 	move    ${{[0-9]+}}, ${{[0-9]+}}
   %cmp1 = icmp ule i32 %1, %0
   %cond5 = select i1 %cmp1, i32 %3, i32 %2
diff --git a/test/CodeGen/Mips/seteq.ll b/test/CodeGen/Mips/seteq.ll
index da840c83a2b4..5fadf78d57a0 100644
--- a/test/CodeGen/Mips/seteq.ll
+++ b/test/CodeGen/Mips/seteq.ll
@@ -15,7 +15,7 @@ entry:
   store i32 %conv, i32* @r1, align 4
 ; 16:	xor	$[[REGISTER:[0-9A-Ba-b_]+]], ${{[0-9]+}}
 ; 16:	sltiu	$[[REGISTER:[0-9A-Ba-b_]+]], 1
-; 16:	move	${{[0-9]+}}, $t8
+; 16:	move	${{[0-9]+}}, $24
   ret void
 }
 
diff --git a/test/CodeGen/Mips/seteqz.ll b/test/CodeGen/Mips/seteqz.ll
index d445be6aedb0..80dc3120a6a1 100644
--- a/test/CodeGen/Mips/seteqz.ll
+++ b/test/CodeGen/Mips/seteqz.ll
@@ -12,13 +12,13 @@ entry:
   %conv = zext i1 %cmp to i32
   store i32 %conv, i32* @r1, align 4
 ; 16:	sltiu	${{[0-9]+}}, 1
-; 16:	move	${{[0-9]+}}, $t8
+; 16:	move	${{[0-9]+}}, $24
   %1 = load i32* @j, align 4
   %cmp1 = icmp eq i32 %1, 99
   %conv2 = zext i1 %cmp1 to i32
   store i32 %conv2, i32* @r2, align 4
 ; 16:	xor	$[[REGISTER:[0-9A-Ba-b_]+]], ${{[0-9]+}}
 ; 16:	sltiu	$[[REGISTER:[0-9A-Ba-b_]+]], 1
-; 16:	move	${{[0-9]+}}, $t8
+; 16:	move	${{[0-9]+}}, $24
   ret void
 }
diff --git a/test/CodeGen/Mips/setge.ll b/test/CodeGen/Mips/setge.ll
index 94b499bc31e9..8869eb8fc547 100644
--- a/test/CodeGen/Mips/setge.ll
+++ b/test/CodeGen/Mips/setge.ll
@@ -17,7 +17,7 @@ entry:
   %conv = zext i1 %cmp to i32
   store i32 %conv, i32* @r1, align 4
 ; 16:	slt	${{[0-9]+}}, ${{[0-9]+}}
-; 16:	move	$[[REGISTER:[0-9]+]], $t8
+; 16:	move	$[[REGISTER:[0-9]+]], $24
 ; 16:	xor	$[[REGISTER]], ${{[0-9]+}}
   %2 = load i32* @m, align 4
   %cmp1 = icmp sge i32 %0, %2
diff --git a/test/CodeGen/Mips/setgek.ll b/test/CodeGen/Mips/setgek.ll
index b6bae09bcb5b..18a0fcf62130 100644
--- a/test/CodeGen/Mips/setgek.ll
+++ b/test/CodeGen/Mips/setgek.ll
@@ -12,7 +12,7 @@ entry:
   %conv = zext i1 %cmp to i32
   store i32 %conv, i32* @r1, align 4
 ; 16:	slti	${{[0-9]+}}, -32768
-; 16:	move	${{[0-9]+}}, $t8
+; 16:	move	${{[0-9]+}}, $24
 ; 16:	xor	${{[0-9]+}}, ${{[0-9]+}}
   ret void
 }
diff --git a/test/CodeGen/Mips/setle.ll b/test/CodeGen/Mips/setle.ll
index f36fb4392d76..2df6774c1fad 100644
--- a/test/CodeGen/Mips/setle.ll
+++ b/test/CodeGen/Mips/setle.ll
@@ -16,7 +16,7 @@ entry:
   %conv = zext i1 %cmp to i32
   store i32 %conv, i32* @r1, align 4
 ; 16:	slt	${{[0-9]+}}, ${{[0-9]+}}
-; 16:	move	$[[REGISTER:[0-9]+]], $t8
+; 16:	move	$[[REGISTER:[0-9]+]], $24
 ; 16:	xor	$[[REGISTER]], ${{[0-9]+}}
   %2 = load i32* @m, align 4
   %cmp1 = icmp sle i32 %2, %1
diff --git a/test/CodeGen/Mips/setlt.ll b/test/CodeGen/Mips/setlt.ll
index 435be8e2334a..3dac74bf2e01 100644
--- a/test/CodeGen/Mips/setlt.ll
+++ b/test/CodeGen/Mips/setlt.ll
@@ -16,6 +16,6 @@ entry:
   %conv = zext i1 %cmp to i32
   store i32 %conv, i32* @r1, align 4
 ; 16:	slt	${{[0-9]+}}, ${{[0-9]+}}
-; 16:	move	${{[0-9]+}}, $t8
+; 16:	move	${{[0-9]+}}, $24
   ret void
 }
diff --git a/test/CodeGen/Mips/setltk.ll b/test/CodeGen/Mips/setltk.ll
index c0b610e37784..ecebc7e578e1 100644
--- a/test/CodeGen/Mips/setltk.ll
+++ b/test/CodeGen/Mips/setltk.ll
@@ -15,6 +15,6 @@ entry:
   %conv = zext i1 %cmp to i32
   store i32 %conv, i32* @r1, align 4
 ; 16:	slti	$[[REGISTER:[0-9]+]], 10
-; 16:	move	$[[REGISTER]], $t8
+; 16:	move	$[[REGISTER]], $24
   ret void
 }
diff --git a/test/CodeGen/Mips/setne.ll b/test/CodeGen/Mips/setne.ll
index 6460c83c7b0b..9e66901e32b5 100644
--- a/test/CodeGen/Mips/setne.ll
+++ b/test/CodeGen/Mips/setne.ll
@@ -15,6 +15,6 @@ entry:
   store i32 %conv, i32* @r1, align 4
 ; 16:	xor	$[[REGISTER:[0-9]+]], ${{[0-9]+}}
 ; 16:	sltu	${{[0-9]+}}, $[[REGISTER]]
-; 16:	move	${{[0-9]+}}, $t8
+; 16:	move	${{[0-9]+}}, $24
   ret void
 }
diff --git a/test/CodeGen/Mips/setuge.ll b/test/CodeGen/Mips/setuge.ll
index ac72b66e9fb0..1c9b5bbe8114 100644
--- a/test/CodeGen/Mips/setuge.ll
+++ b/test/CodeGen/Mips/setuge.ll
@@ -16,7 +16,7 @@ entry:
   %conv = zext i1 %cmp to i32
   store i32 %conv, i32* @r1, align 4
 ; 16:	sltu	${{[0-9]+}}, ${{[0-9]+}}
-; 16:	move    $[[REGISTER:[0-9]+]], $t8
+; 16:	move    $[[REGISTER:[0-9]+]], $24
 ; 16:	xor	$[[REGISTER]], ${{[0-9]+}}
   %2 = load i32* @m, align 4
   %cmp1 = icmp uge i32 %0, %2
diff --git a/test/CodeGen/Mips/setugt.ll b/test/CodeGen/Mips/setugt.ll
index 328f0e3be34a..f10b47ae7178 100644
--- a/test/CodeGen/Mips/setugt.ll
+++ b/test/CodeGen/Mips/setugt.ll
@@ -16,6 +16,6 @@ entry:
   %conv = zext i1 %cmp to i32
   store i32 %conv, i32* @r1, align 4
 ; 16:	sltu	${{[0-9]+}}, ${{[0-9]+}}
-; 16:	move    ${{[0-9]+}}, $t8
+; 16:	move    ${{[0-9]+}}, $24
   ret void
 }
diff --git a/test/CodeGen/Mips/setule.ll b/test/CodeGen/Mips/setule.ll
index 792f2ae0fa29..a6d6bf064052 100644
--- a/test/CodeGen/Mips/setule.ll
+++ b/test/CodeGen/Mips/setule.ll
@@ -16,7 +16,7 @@ entry:
   %conv = zext i1 %cmp to i32
   store i32 %conv, i32* @r1, align 4
 ; 16:	sltu	${{[0-9]+}}, ${{[0-9]+}}
-; 16:	move	$[[REGISTER:[0-9]+]], $t8
+; 16:	move	$[[REGISTER:[0-9]+]], $24
 ; 16:	xor	$[[REGISTER]], ${{[0-9]+}}
   %2 = load i32* @m, align 4
   %cmp1 = icmp ule i32 %2, %1
diff --git a/test/CodeGen/Mips/setult.ll b/test/CodeGen/Mips/setult.ll
index 56d2e8daa3e0..00ee437a2ffe 100644
--- a/test/CodeGen/Mips/setult.ll
+++ b/test/CodeGen/Mips/setult.ll
@@ -16,6 +16,6 @@ entry:
   %conv = zext i1 %cmp to i32
   store i32 %conv, i32* @r1, align 4
 ; 16:	sltu	${{[0-9]+}}, ${{[0-9]+}}
-; 16:	move	${{[0-9]+}}, $t8
+; 16:	move	${{[0-9]+}}, $24
   ret void
 }
diff --git a/test/CodeGen/Mips/setultk.ll b/test/CodeGen/Mips/setultk.ll
index 75b270ed8428..eb9edbaad7f8 100644
--- a/test/CodeGen/Mips/setultk.ll
+++ b/test/CodeGen/Mips/setultk.ll
@@ -14,7 +14,7 @@ entry:
   %cmp = icmp ult i32 %0, 10
   %conv = zext i1 %cmp to i32
   store i32 %conv, i32* @r1, align 4
-; 16:	sltiu	$[[REGISTER:[0-9]+]], 10
-; 16:	move	$[[REGISTER]], $t8
+; 16:	sltiu	${{[0-9]+}}, 10 # 16 bit inst
+; 16:	move	${{[0-9]+}}, $24
   ret void
 }
diff --git a/test/CodeGen/Mips/tls.ll b/test/CodeGen/Mips/tls.ll
index 72d30dc36912..b86d25e5e5e8 100644
--- a/test/CodeGen/Mips/tls.ll
+++ b/test/CodeGen/Mips/tls.ll
@@ -21,9 +21,9 @@ entry:
 ; PIC:   jalr    $25
 ; PIC:   lw      $2, 0($2)
 
-; STATIC:   rdhwr   $3, $29
 ; STATIC:   lui     $[[R0:[0-9]+]], %tprel_hi(t1)
 ; STATIC:   addiu   $[[R1:[0-9]+]], $[[R0]], %tprel_lo(t1)
+; STATIC:   rdhwr   $3, $29
 ; STATIC:   addu    $[[R2:[0-9]+]], $3, $[[R1]]
 ; STATIC:   lw      $2, 0($[[R2]])
 }
diff --git a/test/CodeGen/Mips/vector-setcc.ll b/test/CodeGen/Mips/vector-setcc.ll
new file mode 100644
index 000000000000..aeff4918c8bb
--- /dev/null
+++ b/test/CodeGen/Mips/vector-setcc.ll
@@ -0,0 +1,16 @@
+; RUN: llc -march=mipsel < %s
+
+@a = common global <4 x i32> zeroinitializer, align 16
+@b = common global <4 x i32> zeroinitializer, align 16
+@g0 = common global <4 x i32> zeroinitializer, align 16
+
+define void @foo0() nounwind {
+entry:
+  %0 = load <4 x i32>* @a, align 16
+  %1 = load <4 x i32>* @b, align 16
+  %cmp = icmp slt <4 x i32> %0, %1
+  %sext = sext <4 x i1> %cmp to <4 x i32>
+  store <4 x i32> %sext, <4 x i32>* @g0, align 16
+  ret void
+}
+
diff --git a/test/CodeGen/NVPTX/annotations.ll b/test/CodeGen/NVPTX/annotations.ll
index d93f688ef1fd..39d52d382663 100644
--- a/test/CodeGen/NVPTX/annotations.ll
+++ b/test/CodeGen/NVPTX/annotations.ll
@@ -1,5 +1,3 @@
-; RUN: llc < %s -march=nvptx -mcpu=sm_10 | FileCheck %s
-; RUN: llc < %s -march=nvptx64 -mcpu=sm_10 | FileCheck %s
 ; RUN: llc < %s -march=nvptx -mcpu=sm_20 | FileCheck %s
 ; RUN: llc < %s -march=nvptx64 -mcpu=sm_20 | FileCheck %s
 
diff --git a/test/CodeGen/NVPTX/arithmetic-fp-sm10.ll b/test/CodeGen/NVPTX/arithmetic-fp-sm10.ll
deleted file mode 100644
index 73c77f56bc9c..000000000000
--- a/test/CodeGen/NVPTX/arithmetic-fp-sm10.ll
+++ /dev/null
@@ -1,72 +0,0 @@
-; RUN: llc < %s -march=nvptx -mcpu=sm_10 | FileCheck %s
-; RUN: llc < %s -march=nvptx64 -mcpu=sm_10 | FileCheck %s
-
-;; These tests should run for all targets
-
-;;===-- Basic instruction selection tests ---------------------------------===;;
-
-
-;;; f64
-
-define double @fadd_f64(double %a, double %b) {
-; CHECK: add.f64 %fl{{[0-9]+}}, %fl{{[0-9]+}}, %fl{{[0-9]+}}
-; CHECK: ret
-  %ret = fadd double %a, %b
-  ret double %ret
-}
-
-define double @fsub_f64(double %a, double %b) {
-; CHECK: sub.f64 %fl{{[0-9]+}}, %fl{{[0-9]+}}, %fl{{[0-9]+}}
-; CHECK: ret
-  %ret = fsub double %a, %b
-  ret double %ret
-}
-
-define double @fmul_f64(double %a, double %b) {
-; CHECK: mul.f64 %fl{{[0-9]+}}, %fl{{[0-9]+}}, %fl{{[0-9]+}}
-; CHECK: ret
-  %ret = fmul double %a, %b
-  ret double %ret
-}
-
-define double @fdiv_f64(double %a, double %b) {
-; CHECK: div.rn.f64 %fl{{[0-9]+}}, %fl{{[0-9]+}}, %fl{{[0-9]+}}
-; CHECK: ret
-  %ret = fdiv double %a, %b
-  ret double %ret
-}
-
-;; PTX does not have a floating-point rem instruction
-
-
-;;; f32
-
-define float @fadd_f32(float %a, float %b) {
-; CHECK: add.f32 %f{{[0-9]+}}, %f{{[0-9]+}}, %f{{[0-9]+}}
-; CHECK: ret
-  %ret = fadd float %a, %b
-  ret float %ret
-}
-
-define float @fsub_f32(float %a, float %b) {
-; CHECK: sub.f32 %f{{[0-9]+}}, %f{{[0-9]+}}, %f{{[0-9]+}}
-; CHECK: ret
-  %ret = fsub float %a, %b
-  ret float %ret
-}
-
-define float @fmul_f32(float %a, float %b) {
-; CHECK: mul.f32 %f{{[0-9]+}}, %f{{[0-9]+}}, %f{{[0-9]+}}
-; CHECK: ret
-  %ret = fmul float %a, %b
-  ret float %ret
-}
-
-define float @fdiv_f32(float %a, float %b) {
-; CHECK: div.full.f32 %f{{[0-9]+}}, %f{{[0-9]+}}, %f{{[0-9]+}}
-; CHECK: ret
-  %ret = fdiv float %a, %b
-  ret float %ret
-}
-
-;; PTX does not have a floating-point rem instruction
diff --git a/test/CodeGen/NVPTX/arithmetic-int.ll b/test/CodeGen/NVPTX/arithmetic-int.ll
index 529f84900afd..8d73b7e6c4c6 100644
--- a/test/CodeGen/NVPTX/arithmetic-int.ll
+++ b/test/CodeGen/NVPTX/arithmetic-int.ll
@@ -1,5 +1,3 @@
-; RUN: llc < %s -march=nvptx -mcpu=sm_10 | FileCheck %s
-; RUN: llc < %s -march=nvptx64 -mcpu=sm_10 | FileCheck %s
 ; RUN: llc < %s -march=nvptx -mcpu=sm_20 | FileCheck %s
 ; RUN: llc < %s -march=nvptx64 -mcpu=sm_20 | FileCheck %s
 
diff --git a/test/CodeGen/NVPTX/calling-conv.ll b/test/CodeGen/NVPTX/calling-conv.ll
index 968203e5f70e..190a1462adbc 100644
--- a/test/CodeGen/NVPTX/calling-conv.ll
+++ b/test/CodeGen/NVPTX/calling-conv.ll
@@ -1,5 +1,3 @@
-; RUN: llc < %s -march=nvptx -mcpu=sm_10 | FileCheck %s
-; RUN: llc < %s -march=nvptx64 -mcpu=sm_10 | FileCheck %s
 ; RUN: llc < %s -march=nvptx -mcpu=sm_20 | FileCheck %s
 ; RUN: llc < %s -march=nvptx64 -mcpu=sm_20 | FileCheck %s
 
diff --git a/test/CodeGen/NVPTX/compare-int.ll b/test/CodeGen/NVPTX/compare-int.ll
index 12fc7548212c..16af0a336ddc 100644
--- a/test/CodeGen/NVPTX/compare-int.ll
+++ b/test/CodeGen/NVPTX/compare-int.ll
@@ -1,5 +1,3 @@
-; RUN: llc < %s -march=nvptx -mcpu=sm_10 | FileCheck %s
-; RUN: llc < %s -march=nvptx64 -mcpu=sm_10 | FileCheck %s
 ; RUN: llc < %s -march=nvptx -mcpu=sm_20 | FileCheck %s
 ; RUN: llc < %s -march=nvptx64 -mcpu=sm_20 | FileCheck %s
 
diff --git a/test/CodeGen/NVPTX/convert-fp.ll b/test/CodeGen/NVPTX/convert-fp.ll
index 21c84379b062..1882121fa724 100644
--- a/test/CodeGen/NVPTX/convert-fp.ll
+++ b/test/CodeGen/NVPTX/convert-fp.ll
@@ -1,5 +1,3 @@
-; RUN: llc < %s -march=nvptx -mcpu=sm_10 | FileCheck %s
-; RUN: llc < %s -march=nvptx64 -mcpu=sm_10 | FileCheck %s
 ; RUN: llc < %s -march=nvptx -mcpu=sm_20 | FileCheck %s
 ; RUN: llc < %s -march=nvptx64 -mcpu=sm_20 | FileCheck %s
 
diff --git a/test/CodeGen/NVPTX/convert-int-sm10.ll b/test/CodeGen/NVPTX/convert-int-sm10.ll
deleted file mode 100644
index 20716f982e3b..000000000000
--- a/test/CodeGen/NVPTX/convert-int-sm10.ll
+++ /dev/null
@@ -1,55 +0,0 @@
-; RUN: llc < %s -march=nvptx -mcpu=sm_10 | FileCheck %s
-; RUN: llc < %s -march=nvptx64 -mcpu=sm_10 | FileCheck %s
-
-
-; i16
-
-define i16 @cvt_i16_i32(i32 %x) {
-; CHECK: cvt.u16.u32 %rs{{[0-9]+}}, %r{{[0-9]+}}
-; CHECK: ret
-  %a = trunc i32 %x to i16
-  ret i16 %a
-}
-
-define i16 @cvt_i16_i64(i64 %x) {
-; CHECK: cvt.u16.u64 %rs{{[0-9]+}}, %rl{{[0-9]+}}
-; CHECK: ret
-  %a = trunc i64 %x to i16
-  ret i16 %a
-}
-
-
-
-; i32
-
-define i32 @cvt_i32_i16(i16 %x) {
-; CHECK: cvt.u32.u16 %r{{[0-9]+}}, %rs{{[0-9]+}}
-; CHECK: ret
-  %a = zext i16 %x to i32
-  ret i32 %a
-}
-
-define i32 @cvt_i32_i64(i64 %x) {
-; CHECK: cvt.u32.u64 %r{{[0-9]+}}, %rl{{[0-9]+}}
-; CHECK: ret
-  %a = trunc i64 %x to i32
-  ret i32 %a
-}
-
-
-
-; i64
-
-define i64 @cvt_i64_i16(i16 %x) {
-; CHECK: cvt.u64.u16 %rl{{[0-9]+}}, %rs{{[0-9]+}}
-; CHECK: ret
-  %a = zext i16 %x to i64
-  ret i64 %a
-}
-
-define i64 @cvt_i64_i32(i32 %x) {
-; CHECK: cvt.u64.u32 %rl{{[0-9]+}}, %r{{[0-9]+}}
-; CHECK: ret
-  %a = zext i32 %x to i64
-  ret i64 %a
-}
diff --git a/test/CodeGen/NVPTX/intrin-nocapture.ll b/test/CodeGen/NVPTX/intrin-nocapture.ll
new file mode 100644
index 000000000000..55781bb15a0b
--- /dev/null
+++ b/test/CodeGen/NVPTX/intrin-nocapture.ll
@@ -0,0 +1,21 @@
+; RUN: opt < %s -O3 -S | FileCheck %s
+
+; Address space intrinsics were erroneously marked NoCapture, leading to bad
+; optimizations (such as the store below being eliminated as dead code). This
+; test makes sure we don't regress.
+
+declare void @foo(i32 addrspace(1)*)
+
+declare i32 addrspace(1)* @llvm.nvvm.ptr.gen.to.global.p1i32.p0i32(i32*)
+
+; CHECK: @bar
+define void @bar() {
+  %t1 = alloca i32
+; CHECK: call i32 addrspace(1)* @llvm.nvvm.ptr.gen.to.global.p1i32.p0i32(i32* %t1)
+; CHECK-NEXT: store i32 10, i32* %t1
+  %t2 = call i32 addrspace(1)* @llvm.nvvm.ptr.gen.to.global.p1i32.p0i32(i32* %t1)
+  store i32 10, i32* %t1
+  call void @foo(i32 addrspace(1)* %t2)
+  ret void
+}
+
diff --git a/test/CodeGen/NVPTX/intrinsic-old.ll b/test/CodeGen/NVPTX/intrinsic-old.ll
index 1c9879c4178b..53a28f333798 100644
--- a/test/CodeGen/NVPTX/intrinsic-old.ll
+++ b/test/CodeGen/NVPTX/intrinsic-old.ll
@@ -1,5 +1,3 @@
-; RUN: llc < %s -march=nvptx -mcpu=sm_10 | FileCheck %s
-; RUN: llc < %s -march=nvptx64 -mcpu=sm_10 | FileCheck %s
 ; RUN: llc < %s -march=nvptx -mcpu=sm_20 | FileCheck %s
 ; RUN: llc < %s -march=nvptx64 -mcpu=sm_20 | FileCheck %s
 
diff --git a/test/CodeGen/NVPTX/intrinsics.ll b/test/CodeGen/NVPTX/intrinsics.ll
index afab60ca96a8..8b0357be87cb 100644
--- a/test/CodeGen/NVPTX/intrinsics.ll
+++ b/test/CodeGen/NVPTX/intrinsics.ll
@@ -1,5 +1,3 @@
-; RUN: llc < %s -march=nvptx -mcpu=sm_10 | FileCheck %s
-; RUN: llc < %s -march=nvptx64 -mcpu=sm_10 | FileCheck %s
 ; RUN: llc < %s -march=nvptx -mcpu=sm_20 | FileCheck %s
 ; RUN: llc < %s -march=nvptx64 -mcpu=sm_20 | FileCheck %s
 
diff --git a/test/CodeGen/NVPTX/ld-addrspace.ll b/test/CodeGen/NVPTX/ld-addrspace.ll
index d1f5093df223..3265868d3c52 100644
--- a/test/CodeGen/NVPTX/ld-addrspace.ll
+++ b/test/CodeGen/NVPTX/ld-addrspace.ll
@@ -1,6 +1,4 @@
-; RUN: llc < %s -march=nvptx -mcpu=sm_10 | FileCheck %s --check-prefix=PTX32
 ; RUN: llc < %s -march=nvptx -mcpu=sm_20 | FileCheck %s --check-prefix=PTX32
-; RUN: llc < %s -march=nvptx64 -mcpu=sm_10 | FileCheck %s --check-prefix=PTX64
 ; RUN: llc < %s -march=nvptx64 -mcpu=sm_20 | FileCheck %s --check-prefix=PTX64
 
 
diff --git a/test/CodeGen/NVPTX/nvvm-reflect.ll b/test/CodeGen/NVPTX/nvvm-reflect.ll
new file mode 100644
index 000000000000..0d02194651e3
--- /dev/null
+++ b/test/CodeGen/NVPTX/nvvm-reflect.ll
@@ -0,0 +1,34 @@
+; RUN: opt < %s -S -nvvm-reflect -nvvm-reflect-list USE_MUL=0 -O2 | FileCheck %s --check-prefix=USE_MUL_0
+; RUN: opt < %s -S -nvvm-reflect -nvvm-reflect-list USE_MUL=1 -O2 | FileCheck %s --check-prefix=USE_MUL_1
+
+@str = private addrspace(4) unnamed_addr constant [8 x i8] c"USE_MUL\00"
+
+declare i32 @__nvvm_reflect(i8*)
+declare i8* @llvm.nvvm.ptr.constant.to.gen.p0i8.p4i8(i8 addrspace(4)*)
+
+define float @foo(float %a, float %b) {
+; USE_MUL_0: define float @foo
+; USE_MUL_0-NOT: call i32 @__nvvm_reflect
+; USE_MUL_1: define float @foo
+; USE_MUL_1-NOT: call i32 @__nvvm_reflect
+  %ptr = tail call i8* @llvm.nvvm.ptr.constant.to.gen.p0i8.p4i8(i8 addrspace(4)* getelementptr inbounds ([8 x i8] addrspace(4)* @str, i32 0, i32 0))
+  %reflect = tail call i32 @__nvvm_reflect(i8* %ptr)
+  %cmp = icmp ugt i32 %reflect, 0
+  br i1 %cmp, label %use_mul, label %use_add
+
+use_mul:
+; USE_MUL_1: fmul float %a, %b
+; USE_MUL_0-NOT: fadd float %a, %b
+  %ret1 = fmul float %a, %b
+  br label %exit
+
+use_add:
+; USE_MUL_0: fadd float %a, %b
+; USE_MUL_1-NOT: fmul float %a, %b
+  %ret2 = fadd float %a, %b
+  br label %exit
+
+exit:
+  %ret = phi float [%ret1, %use_mul], [%ret2, %use_add]
+  ret float %ret
+}
diff --git a/test/CodeGen/NVPTX/sched1.ll b/test/CodeGen/NVPTX/sched1.ll
new file mode 100644
index 000000000000..03ab635e73b9
--- /dev/null
+++ b/test/CodeGen/NVPTX/sched1.ll
@@ -0,0 +1,31 @@
+; RUN: llc < %s -march=nvptx -mcpu=sm_20 | FileCheck %s
+
+; Ensure source scheduling is working
+
+define void @foo(i32* %a) {
+; CHECK: .func foo
+; CHECK: ld.u32
+; CHECK-NEXT: ld.u32
+; CHECK-NEXT: ld.u32
+; CHECK-NEXT: ld.u32
+; CHECK-NEXT: add.s32
+; CHECK-NEXT: add.s32
+; CHECK-NEXT: add.s32
+  %ptr0 = getelementptr i32* %a, i32 0
+  %val0 = load i32* %ptr0
+  %ptr1 = getelementptr i32* %a, i32 1
+  %val1 = load i32* %ptr1
+  %ptr2 = getelementptr i32* %a, i32 2
+  %val2 = load i32* %ptr2
+  %ptr3 = getelementptr i32* %a, i32 3
+  %val3 = load i32* %ptr3
+
+  %t0 = add i32 %val0, %val1
+  %t1 = add i32 %t0, %val2
+  %t2 = add i32 %t1, %val3
+
+  store i32 %t2, i32* %a
+
+  ret void
+}
+
diff --git a/test/CodeGen/NVPTX/sched2.ll b/test/CodeGen/NVPTX/sched2.ll
new file mode 100644
index 000000000000..71a9a4963faf
--- /dev/null
+++ b/test/CodeGen/NVPTX/sched2.ll
@@ -0,0 +1,32 @@
+; RUN: llc < %s -march=nvptx -mcpu=sm_20 | FileCheck %s
+
+define void @foo(<2 x i32>* %a) {
+; CHECK: .func foo
+; CHECK: ld.v2.u32
+; CHECK-NEXT: ld.v2.u32
+; CHECK-NEXT: ld.v2.u32
+; CHECK-NEXT: ld.v2.u32
+; CHECK-NEXT: add.s32
+; CHECK-NEXT: add.s32
+; CHECK-NEXT: add.s32
+; CHECK-NEXT: add.s32
+; CHECK-NEXT: add.s32
+; CHECK-NEXT: add.s32
+  %ptr0 = getelementptr <2 x i32>* %a, i32 0
+  %val0 = load <2 x i32>* %ptr0
+  %ptr1 = getelementptr <2 x i32>* %a, i32 1
+  %val1 = load <2 x i32>* %ptr1
+  %ptr2 = getelementptr <2 x i32>* %a, i32 2
+  %val2 = load <2 x i32>* %ptr2
+  %ptr3 = getelementptr <2 x i32>* %a, i32 3
+  %val3 = load <2 x i32>* %ptr3
+
+  %t0 = add <2 x i32> %val0, %val1
+  %t1 = add <2 x i32> %t0, %val2
+  %t2 = add <2 x i32> %t1, %val3
+
+  store <2 x i32> %t2, <2 x i32>* %a
+
+  ret void
+}
+
diff --git a/test/CodeGen/NVPTX/sm-version-10.ll b/test/CodeGen/NVPTX/sm-version-10.ll
deleted file mode 100644
index 9324a3780986..000000000000
--- a/test/CodeGen/NVPTX/sm-version-10.ll
+++ /dev/null
@@ -1,6 +0,0 @@
-; RUN: llc < %s -march=nvptx -mcpu=sm_10 | FileCheck %s
-; RUN: llc < %s -march=nvptx64 -mcpu=sm_10 | FileCheck %s
-
-
-; CHECK: .target sm_10
-
diff --git a/test/CodeGen/NVPTX/sm-version-11.ll b/test/CodeGen/NVPTX/sm-version-11.ll
deleted file mode 100644
index 9033a4eba5e4..000000000000
--- a/test/CodeGen/NVPTX/sm-version-11.ll
+++ /dev/null
@@ -1,6 +0,0 @@
-; RUN: llc < %s -march=nvptx -mcpu=sm_11 | FileCheck %s
-; RUN: llc < %s -march=nvptx64 -mcpu=sm_11 | FileCheck %s
-
-
-; CHECK: .target sm_11
-
diff --git a/test/CodeGen/NVPTX/sm-version-12.ll b/test/CodeGen/NVPTX/sm-version-12.ll
deleted file mode 100644
index d8ee85c9010e..000000000000
--- a/test/CodeGen/NVPTX/sm-version-12.ll
+++ /dev/null
@@ -1,6 +0,0 @@
-; RUN: llc < %s -march=nvptx -mcpu=sm_12 | FileCheck %s
-; RUN: llc < %s -march=nvptx64 -mcpu=sm_12 | FileCheck %s
-
-
-; CHECK: .target sm_12
-
diff --git a/test/CodeGen/NVPTX/sm-version-13.ll b/test/CodeGen/NVPTX/sm-version-13.ll
deleted file mode 100644
index ad67d642ce30..000000000000
--- a/test/CodeGen/NVPTX/sm-version-13.ll
+++ /dev/null
@@ -1,6 +0,0 @@
-; RUN: llc < %s -march=nvptx -mcpu=sm_13 | FileCheck %s
-; RUN: llc < %s -march=nvptx64 -mcpu=sm_13 | FileCheck %s
-
-
-; CHECK: .target sm_13
-
diff --git a/test/CodeGen/NVPTX/st-addrspace.ll b/test/CodeGen/NVPTX/st-addrspace.ll
index 54e04ae6106d..0b26d802df84 100644
--- a/test/CodeGen/NVPTX/st-addrspace.ll
+++ b/test/CodeGen/NVPTX/st-addrspace.ll
@@ -1,6 +1,4 @@
-; RUN: llc < %s -march=nvptx -mcpu=sm_10 | FileCheck %s --check-prefix=PTX32
 ; RUN: llc < %s -march=nvptx -mcpu=sm_20 | FileCheck %s --check-prefix=PTX32
-; RUN: llc < %s -march=nvptx64 -mcpu=sm_10 | FileCheck %s --check-prefix=PTX64
 ; RUN: llc < %s -march=nvptx64 -mcpu=sm_20 | FileCheck %s --check-prefix=PTX64
 
 
diff --git a/test/CodeGen/NVPTX/tuple-literal.ll b/test/CodeGen/NVPTX/tuple-literal.ll
new file mode 100644
index 000000000000..2b1f2c4b6680
--- /dev/null
+++ b/test/CodeGen/NVPTX/tuple-literal.ll
@@ -0,0 +1,5 @@
+; RUN: llc < %s -march=nvptx -mcpu=sm_20
+
+define ptx_device void @test_function({i8, i8}*) {
+  ret void
+}
diff --git a/test/CodeGen/NVPTX/vector-args.ll b/test/CodeGen/NVPTX/vector-args.ll
new file mode 100644
index 000000000000..80deae46935a
--- /dev/null
+++ b/test/CodeGen/NVPTX/vector-args.ll
@@ -0,0 +1,27 @@
+; RUN: llc < %s -march=nvptx -mcpu=sm_20 | FileCheck %s
+
+
+define float @foo(<2 x float> %a) {
+; CHECK: .func (.param .b32 func_retval0) foo
+; CHECK: .param .align 8 .b8 foo_param_0[8]
+; CHECK: ld.param.f32 %f{{[0-9]+}}
+; CHECK: ld.param.f32 %f{{[0-9]+}}
+  %t1 = fmul <2 x float> %a, %a
+  %t2 = extractelement <2 x float> %t1, i32 0
+  %t3 = extractelement <2 x float> %t1, i32 1
+  %t4 = fadd float %t2, %t3
+  ret float %t4
+}
+
+
+define float @bar(<4 x float> %a) {
+; CHECK: .func (.param .b32 func_retval0) bar
+; CHECK: .param .align 16 .b8 bar_param_0[16]
+; CHECK: ld.param.f32 %f{{[0-9]+}}
+; CHECK: ld.param.f32 %f{{[0-9]+}}
+  %t1 = fmul <4 x float> %a, %a
+  %t2 = extractelement <4 x float> %t1, i32 0
+  %t3 = extractelement <4 x float> %t1, i32 1
+  %t4 = fadd float %t2, %t3
+  ret float %t4
+}
diff --git a/test/CodeGen/NVPTX/vector-compare.ll b/test/CodeGen/NVPTX/vector-compare.ll
new file mode 100644
index 000000000000..218049995233
--- /dev/null
+++ b/test/CodeGen/NVPTX/vector-compare.ll
@@ -0,0 +1,19 @@
+; RUN: llc < %s -march=nvptx -mcpu=sm_20
+; RUN: llc < %s -march=nvptx64 -mcpu=sm_20
+
+; This test makes sure that the result of vector compares are properly
+; scalarized.  If codegen fails, then the type legalizer incorrectly
+; tried to promote <2 x i1> to <2 x i8> and instruction selection failed.
+
+define void @foo(<2 x i32>* %a, <2 x i32>* %b, i32* %r1, i32* %r2) {
+  %aval = load <2 x i32>* %a
+  %bval = load <2 x i32>* %b
+  %res = icmp slt <2 x i32> %aval, %bval
+  %t1 = extractelement <2 x i1> %res, i32 0
+  %t2 = extractelement <2 x i1> %res, i32 1
+  %t1a = zext i1 %t1 to i32
+  %t2a = zext i1 %t2 to i32
+  store i32 %t1a, i32* %r1
+  store i32 %t2a, i32* %r2
+  ret void
+}
diff --git a/test/CodeGen/NVPTX/vector-loads.ll b/test/CodeGen/NVPTX/vector-loads.ll
new file mode 100644
index 000000000000..58882bf16668
--- /dev/null
+++ b/test/CodeGen/NVPTX/vector-loads.ll
@@ -0,0 +1,66 @@
+; RUN: llc < %s -march=nvptx -mcpu=sm_20 | FileCheck %s
+
+; Even though general vector types are not supported in PTX, we can still
+; optimize loads/stores with pseudo-vector instructions of the form:
+;
+; ld.v2.f32 {%f0, %f1}, [%r0]
+;
+; which will load two floats at once into scalar registers.
+
+define void @foo(<2 x float>* %a) {
+; CHECK: .func foo
+; CHECK: ld.v2.f32 {%f{{[0-9]+}}, %f{{[0-9]+}}}
+  %t1 = load <2 x float>* %a
+  %t2 = fmul <2 x float> %t1, %t1
+  store <2 x float> %t2, <2 x float>* %a
+  ret void
+}
+
+define void @foo2(<4 x float>* %a) {
+; CHECK: .func foo2
+; CHECK: ld.v4.f32 {%f{{[0-9]+}}, %f{{[0-9]+}}, %f{{[0-9]+}}, %f{{[0-9]+}}}
+  %t1 = load <4 x float>* %a
+  %t2 = fmul <4 x float> %t1, %t1
+  store <4 x float> %t2, <4 x float>* %a
+  ret void
+}
+
+define void @foo3(<8 x float>* %a) {
+; CHECK: .func foo3
+; CHECK: ld.v4.f32 {%f{{[0-9]+}}, %f{{[0-9]+}}, %f{{[0-9]+}}, %f{{[0-9]+}}}
+; CHECK-NEXT: ld.v4.f32 {%f{{[0-9]+}}, %f{{[0-9]+}}, %f{{[0-9]+}}, %f{{[0-9]+}}}
+  %t1 = load <8 x float>* %a
+  %t2 = fmul <8 x float> %t1, %t1
+  store <8 x float> %t2, <8 x float>* %a
+  ret void
+}
+
+
+
+define void @foo4(<2 x i32>* %a) {
+; CHECK: .func foo4
+; CHECK: ld.v2.u32 {%r{{[0-9]+}}, %r{{[0-9]+}}}
+  %t1 = load <2 x i32>* %a
+  %t2 = mul <2 x i32> %t1, %t1
+  store <2 x i32> %t2, <2 x i32>* %a
+  ret void
+}
+
+define void @foo5(<4 x i32>* %a) {
+; CHECK: .func foo5
+; CHECK: ld.v4.u32 {%r{{[0-9]+}}, %r{{[0-9]+}}, %r{{[0-9]+}}, %r{{[0-9]+}}}
+  %t1 = load <4 x i32>* %a
+  %t2 = mul <4 x i32> %t1, %t1
+  store <4 x i32> %t2, <4 x i32>* %a
+  ret void
+}
+
+define void @foo6(<8 x i32>* %a) {
+; CHECK: .func foo6
+; CHECK: ld.v4.u32 {%r{{[0-9]+}}, %r{{[0-9]+}}, %r{{[0-9]+}}, %r{{[0-9]+}}}
+; CHECK-NEXT: ld.v4.u32 {%r{{[0-9]+}}, %r{{[0-9]+}}, %r{{[0-9]+}}, %r{{[0-9]+}}}
+  %t1 = load <8 x i32>* %a
+  %t2 = mul <8 x i32> %t1, %t1
+  store <8 x i32> %t2, <8 x i32>* %a
+  ret void
+}
diff --git a/test/CodeGen/NVPTX/vector-select.ll b/test/CodeGen/NVPTX/vector-select.ll
new file mode 100644
index 000000000000..11893df10329
--- /dev/null
+++ b/test/CodeGen/NVPTX/vector-select.ll
@@ -0,0 +1,16 @@
+; RUN: llc < %s -march=nvptx -mcpu=sm_20
+; RUN: llc < %s -march=nvptx64 -mcpu=sm_20
+
+; This test makes sure that vector selects are scalarized by the type legalizer.
+; If not, type legalization will fail.
+
+define void @foo(<2 x i32> addrspace(1)* %def_a, <2 x i32> addrspace(1)* %def_b, <2 x i32> addrspace(1)* %def_c) {
+entry:
+  %tmp4 = load <2 x i32> addrspace(1)* %def_a
+  %tmp6 = load <2 x i32> addrspace(1)* %def_c
+  %tmp8 = load <2 x i32> addrspace(1)* %def_b
+  %0 = icmp sge <2 x i32> %tmp4, zeroinitializer
+  %cond = select <2 x i1> %0, <2 x i32> %tmp6, <2 x i32> %tmp8
+  store <2 x i32> %cond, <2 x i32> addrspace(1)* %def_c
+  ret void
+}
diff --git a/test/CodeGen/PowerPC/2007-09-04-AltivecDST.ll b/test/CodeGen/PowerPC/2007-09-04-AltivecDST.ll
index 82ef2b82cbe6..b6feb5abbc3f 100644
--- a/test/CodeGen/PowerPC/2007-09-04-AltivecDST.ll
+++ b/test/CodeGen/PowerPC/2007-09-04-AltivecDST.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=ppc64 | grep dst | count 4
+; RUN: llc < %s -march=ppc64 -mattr=+altivec | grep dst | count 4
 
 define hidden void @_Z4borkPc(i8* %image) {
 entry:
diff --git a/test/CodeGen/PowerPC/2008-07-10-SplatMiscompile.ll b/test/CodeGen/PowerPC/2008-07-10-SplatMiscompile.ll
index 8802b97d2a6a..00a402e0e487 100644
--- a/test/CodeGen/PowerPC/2008-07-10-SplatMiscompile.ll
+++ b/test/CodeGen/PowerPC/2008-07-10-SplatMiscompile.ll
@@ -1,5 +1,6 @@
 ; RUN: llc < %s -march=ppc32 -mcpu=g5 | grep vadduhm
 ; RUN: llc < %s -march=ppc32 -mcpu=g5 | grep vsubuhm
+; XFAIL: *
 
 define <4 x i32> @test() nounwind {
 	ret <4 x i32> < i32 4293066722, i32 4293066722, i32 4293066722, i32 4293066722>
diff --git a/test/CodeGen/PowerPC/2009-08-17-inline-asm-addr-mode-breakage.ll b/test/CodeGen/PowerPC/2009-08-17-inline-asm-addr-mode-breakage.ll
index 84aa40c4b52a..91253daae396 100644
--- a/test/CodeGen/PowerPC/2009-08-17-inline-asm-addr-mode-breakage.ll
+++ b/test/CodeGen/PowerPC/2009-08-17-inline-asm-addr-mode-breakage.ll
@@ -10,8 +10,8 @@ target triple = "powerpc-apple-darwin10.0"
 define void @foo(i32 %y) nounwind ssp {
 entry:
 ; CHECK: foo
-; CHECK: add r3
-; CHECK: 0(r3)
+; CHECK: add r2
+; CHECK: 0(r2)
   %y_addr = alloca i32                            ; <i32*> [#uses=2]
   %"alloca point" = bitcast i32 0 to i32          ; <i32> [#uses=0]
   store i32 %y, i32* %y_addr
diff --git a/test/CodeGen/PowerPC/2010-02-12-saveCR.ll b/test/CodeGen/PowerPC/2010-02-12-saveCR.ll
index 974a99a52cb5..097611a7619c 100644
--- a/test/CodeGen/PowerPC/2010-02-12-saveCR.ll
+++ b/test/CodeGen/PowerPC/2010-02-12-saveCR.ll
@@ -2,21 +2,21 @@
 ; ModuleID = 'hh.c'
 target datalayout = "E-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f128:64:128-n32"
 target triple = "powerpc-apple-darwin9.6"
-; This formerly used R0 for both the stack address and CR.
 
 define void @foo() nounwind {
 entry:
-;CHECK:  mfcr r2
-;CHECK:  lis r3, 1
-;CHECK:  rlwinm r2, r2, 8, 0, 31
-;CHECK:  ori r3, r3, 34524
-;CHECK:  stwx r2, r1, r3
-; Make sure that the register scavenger returns the same temporary register.
-;CHECK:  mfcr r2
-;CHECK:  lis r3, 1
-;CHECK:  rlwinm r2, r2, 12, 0, 31
-;CHECK:  ori r3, r3, 34520
-;CHECK:  stwx r2, r1, r3
+; Note that part of what is being checked here is proper register reuse.
+; CHECK: mfcr [[T1:r[0-9]+]]                         ; cr2
+; CHECK: lis [[T2:r[0-9]+]], 1
+; CHECK: addi r3, r1, 72
+; CHECK: rlwinm [[T1]], [[T1]], 8, 0, 31
+; CHECK: ori [[T2]], [[T2]], 34540
+; CHECK: stwx [[T1]], r1, [[T2]]
+; CHECK: lis [[T3:r[0-9]+]], 1
+; CHECK: mfcr [[T4:r[0-9]+]]                         ; cr3
+; CHECK: ori [[T3]], [[T3]], 34536
+; CHECK: rlwinm [[T4]], [[T4]], 12, 0, 31
+; CHECK: stwx [[T4]], r1, [[T3]]
   %x = alloca [100000 x i8]                       ; <[100000 x i8]*> [#uses=1]
   %"alloca point" = bitcast i32 0 to i32          ; <i32> [#uses=0]
   %x1 = bitcast [100000 x i8]* %x to i8*          ; <i8*> [#uses=1]
@@ -25,11 +25,16 @@ entry:
   br label %return
 
 return:                                           ; preds = %entry
-;CHECK:  lis r3, 1
-;CHECK:  ori r3, r3, 34524
-;CHECK:  lwzx r2, r1, r3
-;CHECK:  rlwinm r2, r2, 24, 0, 31
-;CHECK:  mtcrf 32, r2
+; CHECK: lis [[T1:r[0-9]+]], 1
+; CHECK: ori [[T1]], [[T1]], 34536
+; CHECK: lwzx [[T1]], r1, [[T1]]
+; CHECK: rlwinm [[T1]], [[T1]], 20, 0, 31
+; CHECK: mtcrf 16, [[T1]]
+; CHECK: lis [[T1]], 1
+; CHECK: ori [[T1]], [[T1]], 34540
+; CHECK: lwzx [[T1]], r1, [[T1]]
+; CHECK: rlwinm [[T1]], [[T1]], 24, 0, 31
+; CHECK: mtcrf 32, [[T1]]
   ret void
 }
 
diff --git a/test/CodeGen/PowerPC/2010-04-07-DbgValueOtherTargets.ll b/test/CodeGen/PowerPC/2010-04-07-DbgValueOtherTargets.ll
deleted file mode 100644
index 4a850984a909..000000000000
--- a/test/CodeGen/PowerPC/2010-04-07-DbgValueOtherTargets.ll
+++ /dev/null
@@ -1,28 +0,0 @@
-; RUN: llc -O0 -march=ppc32 -asm-verbose < %s | FileCheck %s
-; Check that DEBUG_VALUE comments come through on a variety of targets.
-
-define i32 @main() nounwind ssp {
-entry:
-; CHECK: DEBUG_VALUE
-  call void @llvm.dbg.value(metadata !6, i64 0, metadata !7), !dbg !9
-  ret i32 0, !dbg !10
-}
-
-declare void @llvm.dbg.declare(metadata, metadata) nounwind readnone
-
-declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
-
-!llvm.dbg.sp = !{!0}
-
-!0 = metadata !{i32 589870, i32 0, metadata !1, metadata !"main", metadata !"main", metadata !"", metadata !1, i32 2, metadata !3, i1 false, i1 true, i32 0, i32 0, null, i32 0, i1 false, i32 ()* @main} ; [ DW_TAG_subprogram ]
-!1 = metadata !{i32 589865, metadata !"/tmp/x.c", metadata !"/Users/manav", metadata !2} ; [ DW_TAG_file_type ]
-!2 = metadata !{i32 589841, i32 0, i32 12, metadata !"/tmp/x.c", metadata !"/Users/manav", metadata !"clang version 2.9 (trunk 120996)", i1 true, i1 false, metadata !"", i32 0} ; [ DW_TAG_compile_unit ]
-!3 = metadata !{i32 589845, metadata !1, metadata !"", metadata !1, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !4, i32 0, null} ; [ DW_TAG_subroutine_type ]
-!4 = metadata !{metadata !5}
-!5 = metadata !{i32 589860, metadata !2, metadata !"int", metadata !1, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
-!6 = metadata !{i32 0}
-!7 = metadata !{i32 590080, metadata !8, metadata !"i", metadata !1, i32 3, metadata !5, i32 0} ; [ DW_TAG_auto_variable ]
-!8 = metadata !{i32 589835, metadata !0, i32 2, i32 12, metadata !1, i32 0} ; [ DW_TAG_lexical_block ]
-!9 = metadata !{i32 3, i32 11, metadata !8, null}
-!10 = metadata !{i32 4, i32 2, metadata !8, null}
-
diff --git a/test/CodeGen/PowerPC/2010-05-03-retaddr1.ll b/test/CodeGen/PowerPC/2010-05-03-retaddr1.ll
index 72ae9d6c73b3..0dbc2d0180ff 100644
--- a/test/CodeGen/PowerPC/2010-05-03-retaddr1.ll
+++ b/test/CodeGen/PowerPC/2010-05-03-retaddr1.ll
@@ -18,8 +18,8 @@ entry:
 ; CHECK: _g:
 ; CHECK:  mflr r0
 ; CHECK:  stw r0, 8(r1)
-; CHECK:  lwz r3, 0(r1)
-; CHECK:  lwz r3, 8(r3)
+; CHECK:  lwz r2, 0(r1)
+; CHECK:  lwz r3, 8(r2)
   %0 = tail call i8* @llvm.returnaddress(i32 1)   ; <i8*> [#uses=1]
   ret i8* %0
 }
diff --git a/test/CodeGen/PowerPC/2010-12-18-PPCStackRefs.ll b/test/CodeGen/PowerPC/2010-12-18-PPCStackRefs.ll
index bf3d577a3677..d1a3c9f46b57 100644
--- a/test/CodeGen/PowerPC/2010-12-18-PPCStackRefs.ll
+++ b/test/CodeGen/PowerPC/2010-12-18-PPCStackRefs.ll
@@ -7,7 +7,7 @@ define i32 @main() nounwind {
 entry:
 ; Make sure we're generating references using the red zone
 ; CHECK: main:
-; CHECK: stw r3, -12(r1)
+; CHECK: stw r2, -12(r1)
   %retval = alloca i32
   %0 = alloca i32
   %"alloca point" = bitcast i32 0 to i32
diff --git a/test/CodeGen/PowerPC/2012-09-16-TOC-entry-check.ll b/test/CodeGen/PowerPC/2012-09-16-TOC-entry-check.ll
index 9d2e390c1c97..5bff58f2bbf5 100644
--- a/test/CodeGen/PowerPC/2012-09-16-TOC-entry-check.ll
+++ b/test/CodeGen/PowerPC/2012-09-16-TOC-entry-check.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s | FileCheck %s
+; RUN: llc -code-model=small < %s | FileCheck %s
 target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v128:128:128-n32:64"
 target triple = "powerpc64-unknown-linux-gnu"
 
diff --git a/test/CodeGen/PowerPC/2012-11-16-mischedcall.ll b/test/CodeGen/PowerPC/2012-11-16-mischedcall.ll
new file mode 100644
index 000000000000..35e3fdd26e72
--- /dev/null
+++ b/test/CodeGen/PowerPC/2012-11-16-mischedcall.ll
@@ -0,0 +1,33 @@
+; RUN: llc -mtriple=powerpc64-bgq-linux -enable-misched < %s | FileCheck %s
+;
+; PR14315: misched should not move the physreg copy of %t below the calls.
+
+@.str89 = external unnamed_addr constant [6 x i8], align 1
+
+declare void @init() nounwind
+
+declare void @clock() nounwind
+
+; CHECK: %entry
+; CHECK: fmr 31, 1
+; CHECK: bl init
+define void @s332(double %t) nounwind {
+entry:
+  tail call void @init()
+  tail call void @clock() nounwind
+  br label %for.cond2
+
+for.cond2:                                        ; preds = %for.body4, %entry
+  %i.0 = phi i32 [ %inc, %for.body4 ], [ 0, %entry ]
+  %cmp3 = icmp slt i32 undef, 16000
+  br i1 %cmp3, label %for.body4, label %L20
+
+for.body4:                                        ; preds = %for.cond2
+  %cmp5 = fcmp ogt double undef, %t
+  %inc = add nsw i32 %i.0, 1
+  br i1 %cmp5, label %L20, label %for.cond2
+
+L20:                                              ; preds = %for.body4, %for.cond2
+  %index.0 = phi i32 [ -2, %for.cond2 ], [ %i.0, %for.body4 ]
+  unreachable
+}
diff --git a/test/CodeGen/PowerPC/DbgValueOtherTargets.test b/test/CodeGen/PowerPC/DbgValueOtherTargets.test
new file mode 100644
index 000000000000..9702934f7e68
--- /dev/null
+++ b/test/CodeGen/PowerPC/DbgValueOtherTargets.test
@@ -0,0 +1 @@
+RUN: llc -O0 -march=ppc32 -asm-verbose < %S/../Inputs/DbgValueOtherTargets.ll | FileCheck %S/../Inputs/DbgValueOtherTargets.ll
diff --git a/test/CodeGen/PowerPC/LargeAbsoluteAddr.ll b/test/CodeGen/PowerPC/LargeAbsoluteAddr.ll
index 6f985c819fb6..e8765deab05d 100644
--- a/test/CodeGen/PowerPC/LargeAbsoluteAddr.ll
+++ b/test/CodeGen/PowerPC/LargeAbsoluteAddr.ll
@@ -1,9 +1,9 @@
 ; RUN: llc < %s -march=ppc32 -mtriple=powerpc-apple-darwin | \
-; RUN:   grep "stw r4, 32751"
+; RUN:   grep "stw r3, 32751"
 ; RUN: llc < %s -march=ppc64 -mtriple=powerpc-apple-darwin | \
-; RUN:   grep "stw r4, 32751"
+; RUN:   grep "stw r3, 32751"
 ; RUN: llc < %s -march=ppc64 -mtriple=powerpc-apple-darwin | \
-; RUN:   grep "std r4, 9024"
+; RUN:   grep "std r3, 9024"
 
 define void @test() nounwind {
 	store i32 0, i32* inttoptr (i64 48725999 to i32*)
diff --git a/test/CodeGen/PowerPC/a2q-stackalign.ll b/test/CodeGen/PowerPC/a2q-stackalign.ll
new file mode 100644
index 000000000000..00c329119376
--- /dev/null
+++ b/test/CodeGen/PowerPC/a2q-stackalign.ll
@@ -0,0 +1,23 @@
+; RUN: llc < %s -march=ppc64 -mcpu=a2 | FileCheck -check-prefix=CHECK-A2 %s
+; RUN: llc < %s -march=ppc64 -mcpu=a2q | FileCheck -check-prefix=CHECK-A2Q %s
+; RUN: llc < %s -march=ppc64 -mtriple=powerpc64-bgq-linux -mcpu=a2 | FileCheck -check-prefix=CHECK-BGQ %s
+target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-v128:128:128-n32:64"
+target triple = "powerpc64-unknown-linux-gnu"
+
+declare i32 @bar(i8* %a) nounwind;
+define i32 @foo() nounwind {
+  %p = alloca i8, i8 115
+  store i8 0, i8* %p
+  %r = call i32 @bar(i8* %p)
+  ret i32 %r
+}
+
+; Without QPX, the allocated stack frame is 240 bytes, but with QPX
+; (because we require 32-byte alignment), it is 256 bytes.
+; CHECK-A2: @foo
+; CHECK-A2: stdu 1, -240(1)
+; CHECK-A2Q: @foo
+; CHECK-A2Q: stdu 1, -256(1)
+; CHECK-BGQ: @foo
+; CHECK-BGQ: stdu 1, -256(1)
+
diff --git a/test/CodeGen/PowerPC/a2q.ll b/test/CodeGen/PowerPC/a2q.ll
new file mode 100644
index 000000000000..b26480f08b39
--- /dev/null
+++ b/test/CodeGen/PowerPC/a2q.ll
@@ -0,0 +1,10 @@
+; RUN: llc < %s -march=ppc64 -mcpu=a2q | FileCheck %s
+; RUN: llc < %s -march=ppc64 -mcpu=a2 -mattr=+qpx | FileCheck %s
+
+define void @foo() {
+entry:
+  ret void
+}
+
+; CHECK: @foo
+
diff --git a/test/CodeGen/PowerPC/allocate-r0.ll b/test/CodeGen/PowerPC/allocate-r0.ll
new file mode 100644
index 000000000000..1cf4cec07695
--- /dev/null
+++ b/test/CodeGen/PowerPC/allocate-r0.ll
@@ -0,0 +1,18 @@
+; RUN: llc < %s -mtriple=powerpc64-unknown-linux-gnu -mcpu=g5 | FileCheck %s
+target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-v128:128:128-n32:64"
+target triple = "powerpc64-unknown-linux-gnu"
+
+define i64 @foo(i64 %a) nounwind {
+entry:
+  call void asm sideeffect "", "~{r3},~{r4},~{r5},~{r6},~{r7},~{r8},~{r9},~{r10},~{r11},~{r12}"() nounwind
+  br label %return
+
+; CHECK: @foo
+; Because r0 is allocatable, we can use it to hold r3 without spilling.
+; CHECK: mr 0, 3
+; CHECK: mr 3, 0
+
+return:                                           ; preds = %entry
+  ret i64 %a
+}
+
diff --git a/test/CodeGen/PowerPC/anon_aggr.ll b/test/CodeGen/PowerPC/anon_aggr.ll
new file mode 100644
index 000000000000..52587e2c0b87
--- /dev/null
+++ b/test/CodeGen/PowerPC/anon_aggr.ll
@@ -0,0 +1,99 @@
+; RUN: llc -O0 -mcpu=pwr7 -mtriple=powerpc64-unknown-linux-gnu < %s | FileCheck %s
+
+; Test case for PR 14779: anonymous aggregates are not handled correctly.
+; The bug is triggered by passing a byval structure after an anonymous
+; aggregate.
+
+%tarray = type { i64, i8* }
+
+define i8* @func1({ i64, i8* } %array, i8* %ptr) {
+entry:
+  %array_ptr = extractvalue {i64, i8* } %array, 1
+  %cond = icmp eq i8* %array_ptr, %ptr
+  br i1 %cond, label %equal, label %unequal
+equal:
+  ret i8* %array_ptr
+unequal:
+  ret i8* %ptr
+}
+
+; CHECK: func1:
+; CHECK: cmpld {{[0-9]+}}, 4, 5
+; CHECK: std 4, -[[OFFSET1:[0-9]+]]
+; CHECK: std 5, -[[OFFSET2:[0-9]+]]
+; CHECK: ld 3, -[[OFFSET1]](1)
+; CHECK: ld 3, -[[OFFSET2]](1)
+
+
+define i8* @func2({ i64, i8* } %array1, %tarray* byval %array2) {
+entry:
+  %array1_ptr = extractvalue {i64, i8* } %array1, 1
+  %tmp = getelementptr inbounds %tarray* %array2, i32 0, i32 1
+  %array2_ptr = load i8** %tmp
+  %cond = icmp eq i8* %array1_ptr, %array2_ptr
+  br i1 %cond, label %equal, label %unequal
+equal:
+  ret i8* %array1_ptr
+unequal:
+  ret i8* %array2_ptr
+}
+
+; CHECK: func2:
+; CHECK: addi [[REG1:[0-9]+]], 1, 64
+; CHECK: ld [[REG2:[0-9]+]], 8([[REG1]])
+; CHECK: cmpld {{[0-9]+}}, 4, [[REG2]]
+; CHECK: std [[REG2]], -[[OFFSET1:[0-9]+]]
+; CHECK: std 4, -[[OFFSET2:[0-9]+]]
+; CHECK: ld 3, -[[OFFSET2]](1)
+; CHECK: ld 3, -[[OFFSET1]](1)
+
+define i8* @func3({ i64, i8* }* byval %array1, %tarray* byval %array2) {
+entry:
+  %tmp1 = getelementptr inbounds { i64, i8* }* %array1, i32 0, i32 1
+  %array1_ptr = load i8** %tmp1
+  %tmp2 = getelementptr inbounds %tarray* %array2, i32 0, i32 1
+  %array2_ptr = load i8** %tmp2
+  %cond = icmp eq i8* %array1_ptr, %array2_ptr
+  br i1 %cond, label %equal, label %unequal
+equal:
+  ret i8* %array1_ptr
+unequal:
+  ret i8* %array2_ptr
+}
+
+; CHECK: func3:
+; CHECK: addi [[REG1:[0-9]+]], 1, 64
+; CHECK: addi [[REG2:[0-9]+]], 1, 48
+; CHECK: ld [[REG3:[0-9]+]], 8([[REG1]])
+; CHECK: ld [[REG4:[0-9]+]], 8([[REG2]])
+; CHECK: cmpld {{[0-9]+}}, [[REG4]], [[REG3]]
+; CHECK: std [[REG3]], -[[OFFSET1:[0-9]+]](1)
+; CHECK: std [[REG4]], -[[OFFSET2:[0-9]+]](1)
+; CHECK: ld 3, -[[OFFSET2]](1)
+; CHECK: ld 3, -[[OFFSET1]](1)
+
+define i8* @func4(i64 %p1, i64 %p2, i64 %p3, i64 %p4,
+                  i64 %p5, i64 %p6, i64 %p7, i64 %p8,
+                  { i64, i8* } %array1, %tarray* byval %array2) {
+entry:
+  %array1_ptr = extractvalue {i64, i8* } %array1, 1
+  %tmp = getelementptr inbounds %tarray* %array2, i32 0, i32 1
+  %array2_ptr = load i8** %tmp
+  %cond = icmp eq i8* %array1_ptr, %array2_ptr
+  br i1 %cond, label %equal, label %unequal
+equal:
+  ret i8* %array1_ptr
+unequal:
+  ret i8* %array2_ptr
+}
+
+; CHECK: func4:
+; CHECK: addi [[REG1:[0-9]+]], 1, 128
+; CHECK: ld [[REG2:[0-9]+]], 120(1)
+; CHECK: ld [[REG3:[0-9]+]], 8([[REG1]])
+; CHECK: cmpld {{[0-9]+}}, [[REG2]], [[REG3]]
+; CHECK: std [[REG2]], -[[OFFSET1:[0-9]+]](1)
+; CHECK: std [[REG3]], -[[OFFSET2:[0-9]+]](1)
+; CHECK: ld 3, -[[OFFSET1]](1)
+; CHECK: ld 3, -[[OFFSET2]](1)
+
diff --git a/test/CodeGen/PowerPC/asym-regclass-copy.ll b/test/CodeGen/PowerPC/asym-regclass-copy.ll
new file mode 100644
index 000000000000..d04a6c98ee19
--- /dev/null
+++ b/test/CodeGen/PowerPC/asym-regclass-copy.ll
@@ -0,0 +1,56 @@
+; RUN: llc < %s -mtriple=powerpc64-unknown-linux-gnu -mcpu=pwr7
+target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-v128:128:128-n32:64"
+target triple = "powerpc64-unknown-linux-gnu"
+
+; This tests that the GPRC/GPRC_NOR0 intersection subclass relationship with
+; GPRC is handled correctly. When it was not, this test would assert.
+
+@gen_random.last = external unnamed_addr global i64, align 8
+@.str = external unnamed_addr constant [4 x i8], align 1
+
+declare double @gen_random(double) #0
+
+declare void @benchmark_heapsort(i32 signext, double* nocapture) #0
+
+define signext i32 @main(i32 signext %argc, i8** nocapture %argv) #0 {
+entry:
+  br i1 undef, label %cond.true, label %cond.end
+
+cond.true:                                        ; preds = %entry
+  br label %cond.end
+
+cond.end:                                         ; preds = %cond.true, %entry
+  %cond = phi i32 [ 0, %cond.true ], [ 8000000, %entry ]
+  %add = add i32 %cond, 1
+  %conv = sext i32 %add to i64
+  %mul = shl nsw i64 %conv, 3
+  %call1 = tail call noalias i8* @malloc(i64 %mul) #1
+  br i1 undef, label %for.end, label %for.body.lr.ph
+
+for.body.lr.ph:                                   ; preds = %cond.end
+  br label %for.body
+
+for.body:                                         ; preds = %for.body, %for.body.lr.ph
+  %indvars.iv = phi i64 [ 1, %for.body.lr.ph ], [ %indvars.iv.next, %for.body ]
+  %indvars.iv.next = add i64 %indvars.iv, 1
+  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+  %exitcond = icmp eq i32 %lftr.wideiv, %add
+  br i1 %exitcond, label %for.cond.for.end_crit_edge, label %for.body
+
+for.cond.for.end_crit_edge:                       ; preds = %for.body
+  br label %for.end
+
+for.end:                                          ; preds = %for.cond.for.end_crit_edge, %cond.end
+  ret i32 0
+}
+
+declare noalias i8* @malloc(i64) #0
+
+declare signext i32 @printf(i8* nocapture, ...) #0
+
+declare void @free(i8* nocapture) #0
+
+declare i64 @strtol(i8*, i8** nocapture, i32 signext) #0
+
+attributes #0 = { nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-frame-pointer-elim-non-leaf"="true" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #1 = { nounwind }
diff --git a/test/CodeGen/PowerPC/atomic-1.ll b/test/CodeGen/PowerPC/atomic-1.ll
index cbfa4094fb4e..838db20ddd1b 100644
--- a/test/CodeGen/PowerPC/atomic-1.ll
+++ b/test/CodeGen/PowerPC/atomic-1.ll
@@ -1,10 +1,10 @@
-; RUN: llc < %s -march=ppc32 |  FileCheck %s
+; RUN: llc < %s -mtriple=powerpc-apple-darwin -march=ppc32 |  FileCheck %s
 
 define i32 @exchange_and_add(i32* %mem, i32 %val) nounwind {
 ; CHECK: exchange_and_add:
-; CHECK: lwarx
+; CHECK: lwarx {{r[0-9]+}}, 0, {{r[0-9]+}}
   %tmp = atomicrmw add i32* %mem, i32 %val monotonic
-; CHECK: stwcx.
+; CHECK: stwcx. {{r[0-9]+}}, 0, {{r[0-9]+}}
   ret i32 %tmp
 }
 
diff --git a/test/CodeGen/PowerPC/atomic-2.ll b/test/CodeGen/PowerPC/atomic-2.ll
index a427379a8b6d..40b4a2eea976 100644
--- a/test/CodeGen/PowerPC/atomic-2.ll
+++ b/test/CodeGen/PowerPC/atomic-2.ll
@@ -24,3 +24,23 @@ define i64 @exchange(i64* %mem, i64 %val) nounwind {
 ; CHECK: stdcx.
   ret i64 %tmp
 }
+
+define void @atomic_store(i64* %mem, i64 %val) nounwind {
+entry:
+; CHECK: @atomic_store
+  store atomic i64 %val, i64* %mem release, align 64
+; CHECK: ldarx
+; CHECK: stdcx.
+  ret void
+}
+
+define i64 @atomic_load(i64* %mem) nounwind {
+entry:
+; CHECK: @atomic_load
+  %tmp = load atomic i64* %mem acquire, align 64
+; CHECK: ldarx
+; CHECK: stdcx.
+; CHECK: stdcx.
+  ret i64 %tmp
+}
+
diff --git a/test/CodeGen/PowerPC/available-externally.ll b/test/CodeGen/PowerPC/available-externally.ll
index fdead7dd8b34..abed0de80b88 100644
--- a/test/CodeGen/PowerPC/available-externally.ll
+++ b/test/CodeGen/PowerPC/available-externally.ll
@@ -1,6 +1,8 @@
 ; RUN: llc < %s -relocation-model=static | FileCheck %s -check-prefix=STATIC
 ; RUN: llc < %s -relocation-model=pic | FileCheck %s -check-prefix=PIC
+; RUN: llc < %s -relocation-model=pic -mtriple=powerpc64-apple-darwin8 | FileCheck %s -check-prefix=PIC64
 ; RUN: llc < %s -relocation-model=dynamic-no-pic | FileCheck %s -check-prefix=DYNAMIC
+; RUN: llc < %s -relocation-model=dynamic-no-pic -mtriple=powerpc64-apple-darwin8 | FileCheck %s -check-prefix=DYNAMIC64
 ; PR4482
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
 target triple = "powerpc-apple-darwin8"
@@ -16,10 +18,18 @@ entry:
 ; PIC: bl L_exact_log2$stub
 ; PIC: blr
 
+; PIC64: _foo:
+; PIC64: bl L_exact_log2$stub
+; PIC64: blr
+
 ; DYNAMIC: _foo:
 ; DYNAMIC: bl L_exact_log2$stub
 ; DYNAMIC: blr
 
+; DYNAMIC64: _foo:
+; DYNAMIC64: bl L_exact_log2$stub
+; DYNAMIC64: blr
+
         %A = call i32 @exact_log2(i64 %x) nounwind
 	ret i32 %A
 }
@@ -34,13 +44,13 @@ entry:
 ; PIC: L_exact_log2$stub:
 ; PIC: .indirect_symbol _exact_log2
 ; PIC: mflr r0
-; PIC: bcl 20,31,L_exact_log2$stub$tmp
+; PIC: bcl 20, 31, L_exact_log2$stub$tmp
 
 ; PIC: L_exact_log2$stub$tmp:
 ; PIC: mflr r11
-; PIC: addis r11,r11,ha16(L_exact_log2$lazy_ptr-L_exact_log2$stub$tmp)
+; PIC: addis r11, r11, ha16(L_exact_log2$lazy_ptr-L_exact_log2$stub$tmp)
 ; PIC: mtlr r0
-; PIC: lwzu r12,lo16(L_exact_log2$lazy_ptr-L_exact_log2$stub$tmp)(r11)
+; PIC: lwzu r12, lo16(L_exact_log2$lazy_ptr-L_exact_log2$stub$tmp)(r11)
 ; PIC: mtctr r12
 ; PIC: bctr
 
@@ -51,12 +61,32 @@ entry:
 
 ; PIC: .subsections_via_symbols
 
+; PIC64: .section __TEXT,__picsymbolstub1,symbol_stubs,pure_instructions,32
+; PIC64: L_exact_log2$stub:
+; PIC64: .indirect_symbol _exact_log2
+; PIC64: mflr r0
+; PIC64: bcl 20, 31, L_exact_log2$stub$tmp
+
+; PIC64: L_exact_log2$stub$tmp:
+; PIC64: mflr r11
+; PIC64: addis r11, r11, ha16(L_exact_log2$lazy_ptr-L_exact_log2$stub$tmp)
+; PIC64: mtlr r0
+; PIC64: ldu r12, lo16(L_exact_log2$lazy_ptr-L_exact_log2$stub$tmp)(r11)
+; PIC64: mtctr r12
+; PIC64: bctr
+
+; PIC64: .section __DATA,__la_symbol_ptr,lazy_symbol_pointers
+; PIC64: L_exact_log2$lazy_ptr:
+; PIC64: .indirect_symbol _exact_log2
+; PIC64: .quad dyld_stub_binding_helper
+
+; PIC64: .subsections_via_symbols
 
 ; DYNAMIC: .section __TEXT,__symbol_stub1,symbol_stubs,pure_instructions,16
 ; DYNAMIC: L_exact_log2$stub:
 ; DYNAMIC: .indirect_symbol _exact_log2
-; DYNAMIC: lis r11,ha16(L_exact_log2$lazy_ptr)
-; DYNAMIC: lwzu r12,lo16(L_exact_log2$lazy_ptr)(r11)
+; DYNAMIC: lis r11, ha16(L_exact_log2$lazy_ptr)
+; DYNAMIC: lwzu r12, lo16(L_exact_log2$lazy_ptr)(r11)
 ; DYNAMIC: mtctr r12
 ; DYNAMIC: bctr
 
@@ -65,7 +95,15 @@ entry:
 ; DYNAMIC: .indirect_symbol _exact_log2
 ; DYNAMIC: .long dyld_stub_binding_helper
 
-
-
-
-
+; DYNAMIC64: .section __TEXT,__symbol_stub1,symbol_stubs,pure_instructions,16
+; DYNAMIC64: L_exact_log2$stub:
+; DYNAMIC64: .indirect_symbol _exact_log2
+; DYNAMIC64: lis r11, ha16(L_exact_log2$lazy_ptr)
+; DYNAMIC64: ldu r12, lo16(L_exact_log2$lazy_ptr)(r11)
+; DYNAMIC64: mtctr r12
+; DYNAMIC64: bctr
+
+; DYNAMIC64: .section __DATA,__la_symbol_ptr,lazy_symbol_pointers
+; DYNAMIC64: L_exact_log2$lazy_ptr:
+; DYNAMIC64: .indirect_symbol _exact_log2
+; DYNAMIC64: .quad dyld_stub_binding_helper
diff --git a/test/CodeGen/PowerPC/bswap-load-store.ll b/test/CodeGen/PowerPC/bswap-load-store.ll
index 4f6bfc729913..53bbc52167c4 100644
--- a/test/CodeGen/PowerPC/bswap-load-store.ll
+++ b/test/CodeGen/PowerPC/bswap-load-store.ll
@@ -1,5 +1,7 @@
-; RUN: llc < %s -march=ppc32 | FileCheck %s -check-prefix=X32
-; RUN: llc < %s -march=ppc64 | FileCheck %s -check-prefix=X64
+; RUN: llc < %s -march=ppc32 -mcpu=ppc32 | FileCheck %s -check-prefix=X32
+; RUN: llc < %s -march=ppc64 -mcpu=ppc64 | FileCheck %s -check-prefix=X64
+; RUN: llc < %s -march=ppc64 -mcpu=pwr7 | FileCheck %s -check-prefix=PWR7
+; RUN: llc < %s -march=ppc32 -mcpu=pwr7 | FileCheck %s -check-prefix=X32
 
 
 define void @STWBRX(i32 %i, i8* %ptr, i32 %off) {
@@ -34,18 +36,47 @@ define i16 @LHBRX(i8* %ptr, i32 %off) {
         ret i16 %tmp6
 }
 
+define void @STDBRX(i64 %i, i8* %ptr, i64 %off) {
+        %tmp1 = getelementptr i8* %ptr, i64 %off                ; <i8*> [#uses=1]
+        %tmp1.upgrd.1 = bitcast i8* %tmp1 to i64*               ; <i64*> [#uses=1]
+        %tmp13 = tail call i64 @llvm.bswap.i64( i64 %i )                ; <i64> [#uses=1]
+        store i64 %tmp13, i64* %tmp1.upgrd.1
+        ret void
+}
+
+define i64 @LDBRX(i8* %ptr, i64 %off) {
+        %tmp1 = getelementptr i8* %ptr, i64 %off                ; <i8*> [#uses=1]
+        %tmp1.upgrd.2 = bitcast i8* %tmp1 to i64*               ; <i64*> [#uses=1]
+        %tmp = load i64* %tmp1.upgrd.2          ; <i64> [#uses=1]
+        %tmp14 = tail call i64 @llvm.bswap.i64( i64 %tmp )              ; <i64> [#uses=1]
+        ret i64 %tmp14
+}
+
 declare i32 @llvm.bswap.i32(i32)
 
 declare i16 @llvm.bswap.i16(i16)
 
+declare i64 @llvm.bswap.i64(i64)
+
 
 ; X32: stwbrx
 ; X32: lwbrx
 ; X32: sthbrx
 ; X32: lhbrx
+; X32-NOT: ldbrx
+; X32-NOT: stdbrx
 
 ; X64: stwbrx
 ; X64: lwbrx
 ; X64: sthbrx
 ; X64: lhbrx
+; X64-NOT: ldbrx
+; X64-NOT: stdbrx
+
+; PWR7: stwbrx
+; PWR7: lwbrx
+; PWR7: sthbrx
+; PWR7: lhbrx
+; PWR7: stdbrx
+; PWR7: ldbrx
 
diff --git a/test/CodeGen/PowerPC/buildvec_canonicalize.ll b/test/CodeGen/PowerPC/buildvec_canonicalize.ll
index 0454c584bcfe..e155a35c4da0 100644
--- a/test/CodeGen/PowerPC/buildvec_canonicalize.ll
+++ b/test/CodeGen/PowerPC/buildvec_canonicalize.ll
@@ -1,10 +1,4 @@
-; There should be exactly one vxor here.
-; RUN: llc < %s -march=ppc32 -mcpu=g5 --enable-unsafe-fp-math | \
-; RUN:   grep vxor | count 1
-
-; There should be exactly one vsplti here.
-; RUN: llc < %s -march=ppc32 -mcpu=g5 --enable-unsafe-fp-math | \
-; RUN:   grep vsplti | count 1
+; RUN: llc < %s -march=ppc32 -mattr=+altivec --enable-unsafe-fp-math | FileCheck %s
 
 define void @VXOR(<4 x float>* %P1, <4 x i32>* %P2, <4 x float>* %P3) {
         %tmp = load <4 x float>* %P3            ; <<4 x float>> [#uses=1]
@@ -15,10 +9,16 @@ define void @VXOR(<4 x float>* %P1, <4 x i32>* %P2, <4 x float>* %P3) {
         store <4 x i32> zeroinitializer, <4 x i32>* %P2
         ret void
 }
+; The fmul will spill a vspltisw to create a -0.0 vector used as the addend
+; to vmaddfp (so it would IEEE compliant with zero sign propagation).
+; CHECK: @VXOR
+; CHECK: vsplti
+; CHECK: vxor
 
 define void @VSPLTI(<4 x i32>* %P2, <8 x i16>* %P3) {
         store <4 x i32> bitcast (<16 x i8> < i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1 > to <4 x i32>), <4 x i32>* %P2
         store <8 x i16> < i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1 >, <8 x i16>* %P3
         ret void
 }
-
+; CHECK: @VSPLTI
+; CHECK: vsplti
diff --git a/test/CodeGen/PowerPC/complex-return.ll b/test/CodeGen/PowerPC/complex-return.ll
new file mode 100644
index 000000000000..f12152ff0fca
--- /dev/null
+++ b/test/CodeGen/PowerPC/complex-return.ll
@@ -0,0 +1,55 @@
+; RUN: llc -mcpu=pwr7 -O0 < %s | FileCheck %s
+
+target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-v128:128:128-n32:64"
+target triple = "powerpc64-unknown-linux-gnu"
+
+define { ppc_fp128, ppc_fp128 } @foo() nounwind {
+entry:
+  %retval = alloca { ppc_fp128, ppc_fp128 }, align 16
+  %x = alloca { ppc_fp128, ppc_fp128 }, align 16
+  %real = getelementptr inbounds { ppc_fp128, ppc_fp128 }* %x, i32 0, i32 0
+  %imag = getelementptr inbounds { ppc_fp128, ppc_fp128 }* %x, i32 0, i32 1
+  store ppc_fp128 0xM400C0000000000000000000000000000, ppc_fp128* %real
+  store ppc_fp128 0xMC00547AE147AE1483CA47AE147AE147A, ppc_fp128* %imag
+  %x.realp = getelementptr inbounds { ppc_fp128, ppc_fp128 }* %x, i32 0, i32 0
+  %x.real = load ppc_fp128* %x.realp
+  %x.imagp = getelementptr inbounds { ppc_fp128, ppc_fp128 }* %x, i32 0, i32 1
+  %x.imag = load ppc_fp128* %x.imagp
+  %real1 = getelementptr inbounds { ppc_fp128, ppc_fp128 }* %retval, i32 0, i32 0
+  %imag2 = getelementptr inbounds { ppc_fp128, ppc_fp128 }* %retval, i32 0, i32 1
+  store ppc_fp128 %x.real, ppc_fp128* %real1
+  store ppc_fp128 %x.imag, ppc_fp128* %imag2
+  %0 = load { ppc_fp128, ppc_fp128 }* %retval
+  ret { ppc_fp128, ppc_fp128 } %0
+}
+
+; CHECK: foo:
+; CHECK: lfd 3
+; CHECK: lfd 4
+; CHECK: lfd 2
+; CHECK: lfd 1
+
+define { float, float } @oof() nounwind {
+entry:
+  %retval = alloca { float, float }, align 4
+  %x = alloca { float, float }, align 4
+  %real = getelementptr inbounds { float, float }* %x, i32 0, i32 0
+  %imag = getelementptr inbounds { float, float }* %x, i32 0, i32 1
+  store float 3.500000e+00, float* %real
+  store float 0xC00547AE20000000, float* %imag
+  %x.realp = getelementptr inbounds { float, float }* %x, i32 0, i32 0
+  %x.real = load float* %x.realp
+  %x.imagp = getelementptr inbounds { float, float }* %x, i32 0, i32 1
+  %x.imag = load float* %x.imagp
+  %real1 = getelementptr inbounds { float, float }* %retval, i32 0, i32 0
+  %imag2 = getelementptr inbounds { float, float }* %retval, i32 0, i32 1
+  store float %x.real, float* %real1
+  store float %x.imag, float* %imag2
+  %0 = load { float, float }* %retval
+  ret { float, float } %0
+}
+
+; CHECK: oof:
+; CHECK: lfs 2
+; CHECK: lfs 1
+
diff --git a/test/CodeGen/PowerPC/cr-spills.ll b/test/CodeGen/PowerPC/cr-spills.ll
new file mode 100644
index 000000000000..d6df7a237668
--- /dev/null
+++ b/test/CodeGen/PowerPC/cr-spills.ll
@@ -0,0 +1,409 @@
+; RUN: llc < %s -mtriple=powerpc64-unknown-linux-gnu -mcpu=pwr7
+
+target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-v128:128:128-n32:64"
+target triple = "powerpc64-unknown-linux-gnu"
+
+; This test case triggers several functions related to cr spilling, both in
+; frame lowering and to handle cr register pressure. When the register kill
+; flags were not being set correctly, this would cause the register scavenger to
+; assert.
+
+@SetupFastFullPelSearch.orig_pels = external unnamed_addr global [768 x i16], align 2
+@weight_luma = external global i32
+@offset_luma = external global i32
+@wp_luma_round = external global i32, align 4
+@luma_log_weight_denom = external global i32, align 4
+
+define void @SetupFastFullPelSearch() #0 {
+entry:
+  %mul10 = mul nsw i32 undef, undef
+  br i1 undef, label %land.end, label %land.lhs.true
+
+land.lhs.true:                                    ; preds = %entry
+  switch i32 0, label %land.end [
+    i32 0, label %land.rhs
+    i32 3, label %land.rhs
+  ]
+
+land.rhs:                                         ; preds = %land.lhs.true, %land.lhs.true
+  %tobool21 = icmp ne i32 undef, 0
+  br label %land.end
+
+land.end:                                         ; preds = %land.rhs, %land.lhs.true, %entry
+  %0 = phi i1 [ %tobool21, %land.rhs ], [ false, %land.lhs.true ], [ false, %entry ]
+  %cond = load i32** undef, align 8
+  br i1 undef, label %if.then95, label %for.body.lr.ph
+
+if.then95:                                        ; preds = %land.end
+  %cmp.i4.i1427 = icmp slt i32 undef, undef
+  br label %for.body.lr.ph
+
+for.body.lr.ph:                                   ; preds = %if.then95, %land.end
+  br label %for.body
+
+for.body:                                         ; preds = %for.body, %for.body.lr.ph
+  br i1 undef, label %for.body, label %for.body252
+
+for.body252:                                      ; preds = %for.inc997, %for.body
+  %shl263 = add i32 undef, 80
+  br i1 %0, label %for.cond286.preheader, label %for.cond713.preheader
+
+for.cond286.preheader:                            ; preds = %for.body252
+  br label %for.cond290.preheader
+
+for.cond290.preheader:                            ; preds = %for.end520, %for.cond286.preheader
+  %srcptr.31595 = phi i16* [ getelementptr inbounds ([768 x i16]* @SetupFastFullPelSearch.orig_pels, i64 0, i64 0), %for.cond286.preheader ], [ null, %for.end520 ]
+  %1 = load i32* undef, align 4, !tbaa !0
+  %2 = load i32* @weight_luma, align 4, !tbaa !0
+  %3 = load i32* @wp_luma_round, align 4, !tbaa !0
+  %4 = load i32* @luma_log_weight_denom, align 4, !tbaa !0
+  %5 = load i32* @offset_luma, align 4, !tbaa !0
+  %incdec.ptr502.sum = add i64 undef, 16
+  br label %for.body293
+
+for.body293:                                      ; preds = %for.body293, %for.cond290.preheader
+  %srcptr.41591 = phi i16* [ %srcptr.31595, %for.cond290.preheader ], [ undef, %for.body293 ]
+  %refptr.11590 = phi i16* [ undef, %for.cond290.preheader ], [ %add.ptr517, %for.body293 ]
+  %LineSadBlk0.01588 = phi i32 [ 0, %for.cond290.preheader ], [ %add346, %for.body293 ]
+  %LineSadBlk1.01587 = phi i32 [ 0, %for.cond290.preheader ], [ %add402, %for.body293 ]
+  %LineSadBlk3.01586 = phi i32 [ 0, %for.cond290.preheader ], [ %add514, %for.body293 ]
+  %LineSadBlk2.01585 = phi i32 [ 0, %for.cond290.preheader ], [ %add458, %for.body293 ]
+  %6 = load i16* %refptr.11590, align 2, !tbaa !3
+  %conv294 = zext i16 %6 to i32
+  %mul295 = mul nsw i32 %conv294, %2
+  %add296 = add nsw i32 %mul295, %3
+  %shr = ashr i32 %add296, %4
+  %add297 = add nsw i32 %shr, %5
+  %cmp.i.i1513 = icmp sgt i32 %add297, 0
+  %cond.i.i1514 = select i1 %cmp.i.i1513, i32 %add297, i32 0
+  %cmp.i4.i1515 = icmp slt i32 %cond.i.i1514, %1
+  %cond.i5.i1516 = select i1 %cmp.i4.i1515, i32 %cond.i.i1514, i32 %1
+  %7 = load i16* %srcptr.41591, align 2, !tbaa !3
+  %conv300 = zext i16 %7 to i32
+  %sub301 = sub nsw i32 %cond.i5.i1516, %conv300
+  %idxprom302 = sext i32 %sub301 to i64
+  %arrayidx303 = getelementptr inbounds i32* %cond, i64 %idxprom302
+  %8 = load i32* %arrayidx303, align 4, !tbaa !0
+  %add304 = add nsw i32 %8, %LineSadBlk0.01588
+  %9 = load i32* undef, align 4, !tbaa !0
+  %add318 = add nsw i32 %add304, %9
+  %10 = load i16* undef, align 2, !tbaa !3
+  %conv321 = zext i16 %10 to i32
+  %mul322 = mul nsw i32 %conv321, %2
+  %add323 = add nsw i32 %mul322, %3
+  %shr324 = ashr i32 %add323, %4
+  %add325 = add nsw i32 %shr324, %5
+  %cmp.i.i1505 = icmp sgt i32 %add325, 0
+  %cond.i.i1506 = select i1 %cmp.i.i1505, i32 %add325, i32 0
+  %cmp.i4.i1507 = icmp slt i32 %cond.i.i1506, %1
+  %cond.i5.i1508 = select i1 %cmp.i4.i1507, i32 %cond.i.i1506, i32 %1
+  %sub329 = sub nsw i32 %cond.i5.i1508, 0
+  %idxprom330 = sext i32 %sub329 to i64
+  %arrayidx331 = getelementptr inbounds i32* %cond, i64 %idxprom330
+  %11 = load i32* %arrayidx331, align 4, !tbaa !0
+  %add332 = add nsw i32 %add318, %11
+  %cmp.i.i1501 = icmp sgt i32 undef, 0
+  %cond.i.i1502 = select i1 %cmp.i.i1501, i32 undef, i32 0
+  %cmp.i4.i1503 = icmp slt i32 %cond.i.i1502, %1
+  %cond.i5.i1504 = select i1 %cmp.i4.i1503, i32 %cond.i.i1502, i32 %1
+  %incdec.ptr341 = getelementptr inbounds i16* %srcptr.41591, i64 4
+  %12 = load i16* null, align 2, !tbaa !3
+  %conv342 = zext i16 %12 to i32
+  %sub343 = sub nsw i32 %cond.i5.i1504, %conv342
+  %idxprom344 = sext i32 %sub343 to i64
+  %arrayidx345 = getelementptr inbounds i32* %cond, i64 %idxprom344
+  %13 = load i32* %arrayidx345, align 4, !tbaa !0
+  %add346 = add nsw i32 %add332, %13
+  %incdec.ptr348 = getelementptr inbounds i16* %refptr.11590, i64 5
+  %14 = load i16* null, align 2, !tbaa !3
+  %conv349 = zext i16 %14 to i32
+  %mul350 = mul nsw i32 %conv349, %2
+  %add351 = add nsw i32 %mul350, %3
+  %shr352 = ashr i32 %add351, %4
+  %add353 = add nsw i32 %shr352, %5
+  %cmp.i.i1497 = icmp sgt i32 %add353, 0
+  %cond.i.i1498 = select i1 %cmp.i.i1497, i32 %add353, i32 0
+  %cmp.i4.i1499 = icmp slt i32 %cond.i.i1498, %1
+  %cond.i5.i1500 = select i1 %cmp.i4.i1499, i32 %cond.i.i1498, i32 %1
+  %incdec.ptr355 = getelementptr inbounds i16* %srcptr.41591, i64 5
+  %15 = load i16* %incdec.ptr341, align 2, !tbaa !3
+  %conv356 = zext i16 %15 to i32
+  %sub357 = sub nsw i32 %cond.i5.i1500, %conv356
+  %idxprom358 = sext i32 %sub357 to i64
+  %arrayidx359 = getelementptr inbounds i32* %cond, i64 %idxprom358
+  %16 = load i32* %arrayidx359, align 4, !tbaa !0
+  %add360 = add nsw i32 %16, %LineSadBlk1.01587
+  %incdec.ptr362 = getelementptr inbounds i16* %refptr.11590, i64 6
+  %17 = load i16* %incdec.ptr348, align 2, !tbaa !3
+  %conv363 = zext i16 %17 to i32
+  %mul364 = mul nsw i32 %conv363, %2
+  %add365 = add nsw i32 %mul364, %3
+  %shr366 = ashr i32 %add365, %4
+  %add367 = add nsw i32 %shr366, %5
+  %cmp.i.i1493 = icmp sgt i32 %add367, 0
+  %cond.i.i1494 = select i1 %cmp.i.i1493, i32 %add367, i32 0
+  %cmp.i4.i1495 = icmp slt i32 %cond.i.i1494, %1
+  %cond.i5.i1496 = select i1 %cmp.i4.i1495, i32 %cond.i.i1494, i32 %1
+  %incdec.ptr369 = getelementptr inbounds i16* %srcptr.41591, i64 6
+  %18 = load i16* %incdec.ptr355, align 2, !tbaa !3
+  %conv370 = zext i16 %18 to i32
+  %sub371 = sub nsw i32 %cond.i5.i1496, %conv370
+  %idxprom372 = sext i32 %sub371 to i64
+  %arrayidx373 = getelementptr inbounds i32* %cond, i64 %idxprom372
+  %19 = load i32* %arrayidx373, align 4, !tbaa !0
+  %add374 = add nsw i32 %add360, %19
+  %incdec.ptr376 = getelementptr inbounds i16* %refptr.11590, i64 7
+  %20 = load i16* %incdec.ptr362, align 2, !tbaa !3
+  %conv377 = zext i16 %20 to i32
+  %mul378 = mul nsw i32 %conv377, %2
+  %add379 = add nsw i32 %mul378, %3
+  %shr380 = ashr i32 %add379, %4
+  %add381 = add nsw i32 %shr380, %5
+  %cmp.i.i1489 = icmp sgt i32 %add381, 0
+  %cond.i.i1490 = select i1 %cmp.i.i1489, i32 %add381, i32 0
+  %cmp.i4.i1491 = icmp slt i32 %cond.i.i1490, %1
+  %cond.i5.i1492 = select i1 %cmp.i4.i1491, i32 %cond.i.i1490, i32 %1
+  %incdec.ptr383 = getelementptr inbounds i16* %srcptr.41591, i64 7
+  %21 = load i16* %incdec.ptr369, align 2, !tbaa !3
+  %conv384 = zext i16 %21 to i32
+  %sub385 = sub nsw i32 %cond.i5.i1492, %conv384
+  %idxprom386 = sext i32 %sub385 to i64
+  %arrayidx387 = getelementptr inbounds i32* %cond, i64 %idxprom386
+  %22 = load i32* %arrayidx387, align 4, !tbaa !0
+  %add388 = add nsw i32 %add374, %22
+  %23 = load i16* %incdec.ptr376, align 2, !tbaa !3
+  %conv391 = zext i16 %23 to i32
+  %mul392 = mul nsw i32 %conv391, %2
+  %add395 = add nsw i32 0, %5
+  %cmp.i.i1485 = icmp sgt i32 %add395, 0
+  %cond.i.i1486 = select i1 %cmp.i.i1485, i32 %add395, i32 0
+  %cmp.i4.i1487 = icmp slt i32 %cond.i.i1486, %1
+  %cond.i5.i1488 = select i1 %cmp.i4.i1487, i32 %cond.i.i1486, i32 %1
+  %incdec.ptr397 = getelementptr inbounds i16* %srcptr.41591, i64 8
+  %24 = load i16* %incdec.ptr383, align 2, !tbaa !3
+  %conv398 = zext i16 %24 to i32
+  %sub399 = sub nsw i32 %cond.i5.i1488, %conv398
+  %idxprom400 = sext i32 %sub399 to i64
+  %arrayidx401 = getelementptr inbounds i32* %cond, i64 %idxprom400
+  %25 = load i32* %arrayidx401, align 4, !tbaa !0
+  %add402 = add nsw i32 %add388, %25
+  %incdec.ptr404 = getelementptr inbounds i16* %refptr.11590, i64 9
+  %cmp.i4.i1483 = icmp slt i32 undef, %1
+  %cond.i5.i1484 = select i1 %cmp.i4.i1483, i32 undef, i32 %1
+  %26 = load i16* %incdec.ptr397, align 2, !tbaa !3
+  %conv412 = zext i16 %26 to i32
+  %sub413 = sub nsw i32 %cond.i5.i1484, %conv412
+  %idxprom414 = sext i32 %sub413 to i64
+  %arrayidx415 = getelementptr inbounds i32* %cond, i64 %idxprom414
+  %27 = load i32* %arrayidx415, align 4, !tbaa !0
+  %add416 = add nsw i32 %27, %LineSadBlk2.01585
+  %incdec.ptr418 = getelementptr inbounds i16* %refptr.11590, i64 10
+  %28 = load i16* %incdec.ptr404, align 2, !tbaa !3
+  %conv419 = zext i16 %28 to i32
+  %mul420 = mul nsw i32 %conv419, %2
+  %add421 = add nsw i32 %mul420, %3
+  %shr422 = ashr i32 %add421, %4
+  %add423 = add nsw i32 %shr422, %5
+  %cmp.i.i1477 = icmp sgt i32 %add423, 0
+  %cond.i.i1478 = select i1 %cmp.i.i1477, i32 %add423, i32 0
+  %cmp.i4.i1479 = icmp slt i32 %cond.i.i1478, %1
+  %cond.i5.i1480 = select i1 %cmp.i4.i1479, i32 %cond.i.i1478, i32 %1
+  %incdec.ptr425 = getelementptr inbounds i16* %srcptr.41591, i64 10
+  %sub427 = sub nsw i32 %cond.i5.i1480, 0
+  %idxprom428 = sext i32 %sub427 to i64
+  %arrayidx429 = getelementptr inbounds i32* %cond, i64 %idxprom428
+  %29 = load i32* %arrayidx429, align 4, !tbaa !0
+  %add430 = add nsw i32 %add416, %29
+  %incdec.ptr432 = getelementptr inbounds i16* %refptr.11590, i64 11
+  %30 = load i16* %incdec.ptr418, align 2, !tbaa !3
+  %conv433 = zext i16 %30 to i32
+  %mul434 = mul nsw i32 %conv433, %2
+  %add435 = add nsw i32 %mul434, %3
+  %shr436 = ashr i32 %add435, %4
+  %add437 = add nsw i32 %shr436, %5
+  %cmp.i.i1473 = icmp sgt i32 %add437, 0
+  %cond.i.i1474 = select i1 %cmp.i.i1473, i32 %add437, i32 0
+  %cmp.i4.i1475 = icmp slt i32 %cond.i.i1474, %1
+  %cond.i5.i1476 = select i1 %cmp.i4.i1475, i32 %cond.i.i1474, i32 %1
+  %31 = load i16* %incdec.ptr425, align 2, !tbaa !3
+  %conv440 = zext i16 %31 to i32
+  %sub441 = sub nsw i32 %cond.i5.i1476, %conv440
+  %idxprom442 = sext i32 %sub441 to i64
+  %arrayidx443 = getelementptr inbounds i32* %cond, i64 %idxprom442
+  %32 = load i32* %arrayidx443, align 4, !tbaa !0
+  %add444 = add nsw i32 %add430, %32
+  %incdec.ptr446 = getelementptr inbounds i16* %refptr.11590, i64 12
+  %33 = load i16* %incdec.ptr432, align 2, !tbaa !3
+  %conv447 = zext i16 %33 to i32
+  %mul448 = mul nsw i32 %conv447, %2
+  %add449 = add nsw i32 %mul448, %3
+  %shr450 = ashr i32 %add449, %4
+  %add451 = add nsw i32 %shr450, %5
+  %cmp.i.i1469 = icmp sgt i32 %add451, 0
+  %cond.i.i1470 = select i1 %cmp.i.i1469, i32 %add451, i32 0
+  %cmp.i4.i1471 = icmp slt i32 %cond.i.i1470, %1
+  %cond.i5.i1472 = select i1 %cmp.i4.i1471, i32 %cond.i.i1470, i32 %1
+  %incdec.ptr453 = getelementptr inbounds i16* %srcptr.41591, i64 12
+  %34 = load i16* undef, align 2, !tbaa !3
+  %conv454 = zext i16 %34 to i32
+  %sub455 = sub nsw i32 %cond.i5.i1472, %conv454
+  %idxprom456 = sext i32 %sub455 to i64
+  %arrayidx457 = getelementptr inbounds i32* %cond, i64 %idxprom456
+  %35 = load i32* %arrayidx457, align 4, !tbaa !0
+  %add458 = add nsw i32 %add444, %35
+  %incdec.ptr460 = getelementptr inbounds i16* %refptr.11590, i64 13
+  %36 = load i16* %incdec.ptr446, align 2, !tbaa !3
+  %conv461 = zext i16 %36 to i32
+  %mul462 = mul nsw i32 %conv461, %2
+  %add463 = add nsw i32 %mul462, %3
+  %shr464 = ashr i32 %add463, %4
+  %add465 = add nsw i32 %shr464, %5
+  %cmp.i.i1465 = icmp sgt i32 %add465, 0
+  %cond.i.i1466 = select i1 %cmp.i.i1465, i32 %add465, i32 0
+  %cmp.i4.i1467 = icmp slt i32 %cond.i.i1466, %1
+  %cond.i5.i1468 = select i1 %cmp.i4.i1467, i32 %cond.i.i1466, i32 %1
+  %incdec.ptr467 = getelementptr inbounds i16* %srcptr.41591, i64 13
+  %37 = load i16* %incdec.ptr453, align 2, !tbaa !3
+  %conv468 = zext i16 %37 to i32
+  %sub469 = sub nsw i32 %cond.i5.i1468, %conv468
+  %idxprom470 = sext i32 %sub469 to i64
+  %arrayidx471 = getelementptr inbounds i32* %cond, i64 %idxprom470
+  %38 = load i32* %arrayidx471, align 4, !tbaa !0
+  %add472 = add nsw i32 %38, %LineSadBlk3.01586
+  %incdec.ptr474 = getelementptr inbounds i16* %refptr.11590, i64 14
+  %add477 = add nsw i32 0, %3
+  %shr478 = ashr i32 %add477, %4
+  %add479 = add nsw i32 %shr478, %5
+  %cmp.i.i1461 = icmp sgt i32 %add479, 0
+  %cond.i.i1462 = select i1 %cmp.i.i1461, i32 %add479, i32 0
+  %cmp.i4.i1463 = icmp slt i32 %cond.i.i1462, %1
+  %cond.i5.i1464 = select i1 %cmp.i4.i1463, i32 %cond.i.i1462, i32 %1
+  %incdec.ptr481 = getelementptr inbounds i16* %srcptr.41591, i64 14
+  %39 = load i16* %incdec.ptr467, align 2, !tbaa !3
+  %conv482 = zext i16 %39 to i32
+  %sub483 = sub nsw i32 %cond.i5.i1464, %conv482
+  %idxprom484 = sext i32 %sub483 to i64
+  %arrayidx485 = getelementptr inbounds i32* %cond, i64 %idxprom484
+  %40 = load i32* %arrayidx485, align 4, !tbaa !0
+  %add486 = add nsw i32 %add472, %40
+  %incdec.ptr488 = getelementptr inbounds i16* %refptr.11590, i64 15
+  %41 = load i16* %incdec.ptr474, align 2, !tbaa !3
+  %conv489 = zext i16 %41 to i32
+  %mul490 = mul nsw i32 %conv489, %2
+  %add491 = add nsw i32 %mul490, %3
+  %shr492 = ashr i32 %add491, %4
+  %add493 = add nsw i32 %shr492, %5
+  %cmp.i.i1457 = icmp sgt i32 %add493, 0
+  %cond.i.i1458 = select i1 %cmp.i.i1457, i32 %add493, i32 0
+  %cmp.i4.i1459 = icmp slt i32 %cond.i.i1458, %1
+  %cond.i5.i1460 = select i1 %cmp.i4.i1459, i32 %cond.i.i1458, i32 %1
+  %incdec.ptr495 = getelementptr inbounds i16* %srcptr.41591, i64 15
+  %42 = load i16* %incdec.ptr481, align 2, !tbaa !3
+  %conv496 = zext i16 %42 to i32
+  %sub497 = sub nsw i32 %cond.i5.i1460, %conv496
+  %idxprom498 = sext i32 %sub497 to i64
+  %arrayidx499 = getelementptr inbounds i32* %cond, i64 %idxprom498
+  %43 = load i32* %arrayidx499, align 4, !tbaa !0
+  %add500 = add nsw i32 %add486, %43
+  %44 = load i16* %incdec.ptr488, align 2, !tbaa !3
+  %conv503 = zext i16 %44 to i32
+  %mul504 = mul nsw i32 %conv503, %2
+  %add505 = add nsw i32 %mul504, %3
+  %shr506 = ashr i32 %add505, %4
+  %add507 = add nsw i32 %shr506, %5
+  %cmp.i.i1453 = icmp sgt i32 %add507, 0
+  %cond.i.i1454 = select i1 %cmp.i.i1453, i32 %add507, i32 0
+  %cmp.i4.i1455 = icmp slt i32 %cond.i.i1454, %1
+  %cond.i5.i1456 = select i1 %cmp.i4.i1455, i32 %cond.i.i1454, i32 %1
+  %45 = load i16* %incdec.ptr495, align 2, !tbaa !3
+  %conv510 = zext i16 %45 to i32
+  %sub511 = sub nsw i32 %cond.i5.i1456, %conv510
+  %idxprom512 = sext i32 %sub511 to i64
+  %arrayidx513 = getelementptr inbounds i32* %cond, i64 %idxprom512
+  %46 = load i32* %arrayidx513, align 4, !tbaa !0
+  %add514 = add nsw i32 %add500, %46
+  %add.ptr517 = getelementptr inbounds i16* %refptr.11590, i64 %incdec.ptr502.sum
+  %exitcond1692 = icmp eq i32 undef, 4
+  br i1 %exitcond1692, label %for.end520, label %for.body293
+
+for.end520:                                       ; preds = %for.body293
+  store i32 %add346, i32* undef, align 4, !tbaa !0
+  store i32 %add402, i32* undef, align 4, !tbaa !0
+  store i32 %add458, i32* undef, align 4, !tbaa !0
+  store i32 %add514, i32* null, align 4, !tbaa !0
+  br i1 undef, label %for.end543, label %for.cond290.preheader
+
+for.end543:                                       ; preds = %for.end520
+  br i1 undef, label %for.inc997, label %for.body549
+
+for.body549:                                      ; preds = %for.inc701, %for.end543
+  %call554 = call i16* null(i16**** null, i32 signext undef, i32 signext %shl263) #1
+  br label %for.cond559.preheader
+
+for.cond559.preheader:                            ; preds = %for.cond559.preheader, %for.body549
+  br i1 undef, label %for.inc701, label %for.cond559.preheader
+
+for.inc701:                                       ; preds = %for.cond559.preheader
+  br i1 undef, label %for.inc997, label %for.body549
+
+for.cond713.preheader:                            ; preds = %for.end850, %for.body252
+  br label %for.body716
+
+for.body716:                                      ; preds = %for.body716, %for.cond713.preheader
+  br i1 undef, label %for.end850, label %for.body716
+
+for.end850:                                       ; preds = %for.body716
+  br i1 undef, label %for.end873, label %for.cond713.preheader
+
+for.end873:                                       ; preds = %for.end850
+  br i1 undef, label %for.inc997, label %for.body879
+
+for.body879:                                      ; preds = %for.inc992, %for.end873
+  br label %for.cond889.preheader
+
+for.cond889.preheader:                            ; preds = %for.end964, %for.body879
+  br i1 undef, label %for.cond894.preheader.lr.ph, label %for.end964
+
+for.cond894.preheader.lr.ph:                      ; preds = %for.cond889.preheader
+  br label %for.body898.lr.ph.us
+
+for.end957.us:                                    ; preds = %for.body946.us
+  br i1 undef, label %for.body898.lr.ph.us, label %for.end964
+
+for.body946.us:                                   ; preds = %for.body930.us, %for.body946.us
+  br i1 false, label %for.body946.us, label %for.end957.us
+
+for.body930.us:                                   ; preds = %for.body914.us, %for.body930.us
+  br i1 undef, label %for.body930.us, label %for.body946.us
+
+for.body914.us:                                   ; preds = %for.body898.us, %for.body914.us
+  br i1 undef, label %for.body914.us, label %for.body930.us
+
+for.body898.us:                                   ; preds = %for.body898.lr.ph.us, %for.body898.us
+  br i1 undef, label %for.body898.us, label %for.body914.us
+
+for.body898.lr.ph.us:                             ; preds = %for.end957.us, %for.cond894.preheader.lr.ph
+  br label %for.body898.us
+
+for.end964:                                       ; preds = %for.end957.us, %for.cond889.preheader
+  %inc990 = add nsw i32 undef, 1
+  br i1 false, label %for.inc992, label %for.cond889.preheader
+
+for.inc992:                                       ; preds = %for.end964
+  br i1 false, label %for.inc997, label %for.body879
+
+for.inc997:                                       ; preds = %for.inc992, %for.end873, %for.inc701, %for.end543
+  %cmp250 = icmp slt i32 undef, %mul10
+  br i1 %cmp250, label %for.body252, label %for.end999
+
+for.end999:                                       ; preds = %for.inc997
+  ret void
+}
+
+attributes #0 = { nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-frame-pointer-elim-non-leaf"="true" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #1 = { nounwind }
+
+!0 = metadata !{metadata !"int", metadata !1}
+!1 = metadata !{metadata !"omnipotent char", metadata !2}
+!2 = metadata !{metadata !"Simple C/C++ TBAA"}
+!3 = metadata !{metadata !"short", metadata !1}
diff --git a/test/CodeGen/PowerPC/ctr-cleanup.ll b/test/CodeGen/PowerPC/ctr-cleanup.ll
new file mode 100644
index 000000000000..04e4ffb0d48d
--- /dev/null
+++ b/test/CodeGen/PowerPC/ctr-cleanup.ll
@@ -0,0 +1,25 @@
+; RUN: llc < %s -mcpu=a2 | FileCheck %s
+target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-v128:128:128-n32:64"
+target triple = "powerpc64-unknown-linux-gnu"
+
+define void @main() #0 {
+entry:
+  br i1 undef, label %for.end, label %for.body
+
+for.body:                                         ; preds = %for.body, %entry
+  %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 1, %entry ]
+  %indvars.iv.next = add i64 %indvars.iv, 1
+  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+  %exitcond = icmp eq i32 %lftr.wideiv, 5
+  br i1 %exitcond, label %for.end, label %for.body
+
+; CHECK: @main
+; CHECK: li {{[0-9]+}}, 4
+; CHECK-NOT: li {{[0-9]+}}, 4
+; CHECK: bdnz
+
+for.end:                                          ; preds = %for.body, %entry
+  ret void
+}
+
+attributes #0 = { nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-frame-pointer-elim-non-leaf"="true" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "unsafe-fp-math"="false" "use-soft-float"="false" }
diff --git a/test/CodeGen/PowerPC/cttz.ll b/test/CodeGen/PowerPC/cttz.ll
index 1d365d47a877..3757fa3e2f29 100644
--- a/test/CodeGen/PowerPC/cttz.ll
+++ b/test/CodeGen/PowerPC/cttz.ll
@@ -1,10 +1,12 @@
 ; Make sure this testcase does not use ctpop
-; RUN: llc < %s -march=ppc32 | grep -i cntlzw
+; RUN: llc < %s -march=ppc32 -mcpu=g5 | FileCheck %s
 
 declare i32 @llvm.cttz.i32(i32, i1)
 
 define i32 @bar(i32 %x) {
 entry:
+; CHECK: @bar
+; CHECK: cntlzw
         %tmp.1 = call i32 @llvm.cttz.i32( i32 %x, i1 true )              ; <i32> [#uses=1]
         ret i32 %tmp.1
 }
diff --git a/test/CodeGen/PowerPC/dbg.ll b/test/CodeGen/PowerPC/dbg.ll
index e161cb05686f..21e36618c5c1 100644
--- a/test/CodeGen/PowerPC/dbg.ll
+++ b/test/CodeGen/PowerPC/dbg.ll
@@ -16,12 +16,10 @@ declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
 
 !llvm.dbg.cu = !{!0}
 
-!0 = metadata !{i32 720913, i32 0, i32 12, metadata !"dbg.c", metadata !"/src", metadata !"clang version 3.1", i1 true, i1 true, metadata !"", i32 0, metadata !1, metadata !1, metadata !3, metadata !1} ; [ DW_TAG_compile_unit ]
-!1 = metadata !{metadata !2}
-!2 = metadata !{i32 0}
-!3 = metadata !{metadata !4}
-!4 = metadata !{metadata !5}
-!5 = metadata !{i32 720942, i32 0, metadata !6, metadata !"main", metadata !"main", metadata !"", metadata !6, i32 1, metadata !7, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 true, i32 (i32, i8**)* @main, null, null, metadata !13} ; [ DW_TAG_subprogram ]
+!0 = metadata !{i32 720913, i32 12, metadata !6, metadata !"clang version 3.1", i1 true, metadata !"", i32 0, metadata !1, metadata !1, metadata !3, metadata !1, metadata !""} ; [ DW_TAG_compile_unit ]
+!1 = metadata !{i32 0}
+!3 = metadata !{metadata !5}
+!5 = metadata !{i32 720942, metadata !6, metadata !"main", metadata !"main", metadata !"", metadata !6, i32 1, metadata !7, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 true, i32 (i32, i8**)* @main, null, null, metadata !13} ; [ DW_TAG_subprogram ]
 !6 = metadata !{i32 720937, metadata !"dbg.c", metadata !"/src", null} ; [ DW_TAG_file_type ]
 !7 = metadata !{i32 720917, i32 0, metadata !"", i32 0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !8, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
 !8 = metadata !{metadata !9, metadata !9, metadata !10}
diff --git a/test/CodeGen/PowerPC/dcbt-sched.ll b/test/CodeGen/PowerPC/dcbt-sched.ll
new file mode 100644
index 000000000000..dfa1b75bd7db
--- /dev/null
+++ b/test/CodeGen/PowerPC/dcbt-sched.ll
@@ -0,0 +1,22 @@
+target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v128:128:128-n32:64"
+target triple = "powerpc64-unknown-linux-gnu"
+; RUN: llc -mcpu=a2 -enable-misched -enable-aa-sched-mi < %s | FileCheck %s
+
+define i8 @test1(i8* noalias %a, i8* noalias %b, i8* noalias %c) nounwind {
+entry:
+  %q = load i8* %b
+  call void @llvm.prefetch(i8* %a, i32 0, i32 3, i32 1)
+  %r = load i8* %c
+  %s = add i8 %q, %r
+  ret i8 %s
+}
+
+declare void @llvm.prefetch(i8*, i32, i32, i32)
+
+; Test that we've moved the second load to before the dcbt to better
+; hide its latency.
+; CHECK: @test1
+; CHECK: lbz
+; CHECK: lbz
+; CHECK: dcbt
+
diff --git a/test/CodeGen/PowerPC/float-asmprint.ll b/test/CodeGen/PowerPC/float-asmprint.ll
new file mode 100644
index 000000000000..c9dc02862aac
--- /dev/null
+++ b/test/CodeGen/PowerPC/float-asmprint.ll
@@ -0,0 +1,34 @@
+; RUN: llc -mtriple=powerpc64-none-linux < %s | FileCheck %s
+
+; Check that all current floating-point types are correctly emitted to assembly
+; on a big-endian target. x86_fp80 can't actually print for unrelated reasons,
+; but that's not really a problem.
+
+@var128 = global fp128 0xL00000000000000008000000000000000, align 16
+@varppc128 = global ppc_fp128 0xM80000000000000000000000000000000, align 16
+@var64 = global double -0.0, align 8
+@var32 = global float -0.0, align 4
+@var16 = global half -0.0, align 2
+
+; CHECK: var128:
+; CHECK-NEXT: .quad -9223372036854775808      # fp128 -0
+; CHECK-NEXT: .quad 0
+; CHECK-NEXT: .size
+
+; CHECK: varppc128:
+; CHECK-NEXT: .quad -9223372036854775808      # ppc_fp128 -0
+; CHECK-NEXT: .quad 0
+; CHECK-NEXT: .size
+
+; CHECK: var64:
+; CHECK-NEXT: .quad -9223372036854775808      # double -0
+; CHECK-NEXT: .size
+
+; CHECK: var32:
+; CHECK-NEXT: .long 2147483648                # float -0
+; CHECK-NEXT: .size
+
+; CHECK: var16:
+; CHECK-NEXT: .short 32768                    # half -0
+; CHECK-NEXT: .size
+
diff --git a/test/CodeGen/PowerPC/float-to-int.ll b/test/CodeGen/PowerPC/float-to-int.ll
new file mode 100644
index 000000000000..39cd4f929f8d
--- /dev/null
+++ b/test/CodeGen/PowerPC/float-to-int.ll
@@ -0,0 +1,93 @@
+; RUN: llc < %s -mtriple=powerpc64-unknown-linux-gnu -mcpu=a2 | FileCheck %s
+; RUN: llc < %s -mtriple=powerpc64-unknown-linux-gnu -mcpu=g5
+target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-v128:128:128-n32:64"
+target triple = "powerpc64-unknown-linux-gnu"
+
+define i64 @foo(float %a) nounwind {
+  %x = fptosi float %a to i64
+  ret i64 %x
+
+; CHECK: @foo
+; CHECK: fctidz [[REG:[0-9]+]], 1
+; CHECK: stfd [[REG]],
+; CHECK: ld 3,
+; CHECK: blr
+}
+
+define i64 @foo2(double %a) nounwind {
+  %x = fptosi double %a to i64
+  ret i64 %x
+
+; CHECK: @foo2
+; CHECK: fctidz [[REG:[0-9]+]], 1
+; CHECK: stfd [[REG]],
+; CHECK: ld 3,
+; CHECK: blr
+}
+
+define i64 @foo3(float %a) nounwind {
+  %x = fptoui float %a to i64
+  ret i64 %x
+
+; CHECK: @foo3
+; CHECK: fctiduz [[REG:[0-9]+]], 1
+; CHECK: stfd [[REG]],
+; CHECK: ld 3,
+; CHECK: blr
+}
+
+define i64 @foo4(double %a) nounwind {
+  %x = fptoui double %a to i64
+  ret i64 %x
+
+; CHECK: @foo4
+; CHECK: fctiduz [[REG:[0-9]+]], 1
+; CHECK: stfd [[REG]],
+; CHECK: ld 3,
+; CHECK: blr
+}
+
+define i32 @goo(float %a) nounwind {
+  %x = fptosi float %a to i32
+  ret i32 %x
+
+; CHECK: @goo
+; CHECK: fctiwz [[REG:[0-9]+]], 1
+; CHECK: stfiwx [[REG]],
+; CHECK: lwz 3,
+; CHECK: blr
+}
+
+define i32 @goo2(double %a) nounwind {
+  %x = fptosi double %a to i32
+  ret i32 %x
+
+; CHECK: @goo2
+; CHECK: fctiwz [[REG:[0-9]+]], 1
+; CHECK: stfiwx [[REG]],
+; CHECK: lwz 3,
+; CHECK: blr
+}
+
+define i32 @goo3(float %a) nounwind {
+  %x = fptoui float %a to i32
+  ret i32 %x
+
+; CHECK: @goo3
+; CHECK: fctiwuz [[REG:[0-9]+]], 1
+; CHECK: stfiwx [[REG]],
+; CHECK: lwz 3,
+; CHECK: blr
+}
+
+define i32 @goo4(double %a) nounwind {
+  %x = fptoui double %a to i32
+  ret i32 %x
+
+; CHECK: @goo4
+; CHECK: fctiwuz [[REG:[0-9]+]], 1
+; CHECK: stfiwx [[REG]],
+; CHECK: lwz 3,
+; CHECK: blr
+}
+
diff --git a/test/CodeGen/PowerPC/frame-size.ll b/test/CodeGen/PowerPC/frame-size.ll
new file mode 100644
index 000000000000..0e569a4602c3
--- /dev/null
+++ b/test/CodeGen/PowerPC/frame-size.ll
@@ -0,0 +1,32 @@
+; RUN: llc < %s -mtriple=powerpc64-unknown-linux-gnu -mcpu=a2 | FileCheck %s
+target datalayout = "E-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f128:64:128-n32"
+
+define i64 @foo() nounwind {
+entry:
+  %x = alloca [32568 x i8]
+  %"alloca point" = bitcast i32 0 to i32
+  %x1 = bitcast [32568 x i8]* %x to i8*
+
+; Check that the RS spill slot has been allocated (because the estimate
+; will fail the small-frame-size check and the function has spills).
+; CHECK: @foo
+; CHECK: stdu 1, -32768(1)
+
+  %s1 = call i64 @bar(i8* %x1) nounwind
+  %s2 = call i64 @bar(i8* %x1) nounwind
+  %s3 = call i64 @bar(i8* %x1) nounwind
+  %s4 = call i64 @bar(i8* %x1) nounwind
+  %s5 = call i64 @bar(i8* %x1) nounwind
+  %s6 = call i64 @bar(i8* %x1) nounwind
+  %s7 = call i64 @bar(i8* %x1) nounwind
+  %s8 = call i64 @bar(i8* %x1) nounwind
+  %r = call i64 @can(i64 %s1, i64 %s2, i64 %s3, i64 %s4, i64 %s5, i64 %s6, i64 %s7, i64 %s8) nounwind
+  br label %return
+
+return:
+  ret i64 %r
+}
+
+declare i64 @bar(i8*)
+declare i64 @can(i64, i64, i64, i64, i64, i64, i64, i64)
+
diff --git a/test/CodeGen/PowerPC/frameaddr.ll b/test/CodeGen/PowerPC/frameaddr.ll
new file mode 100644
index 000000000000..eabd4a68aa83
--- /dev/null
+++ b/test/CodeGen/PowerPC/frameaddr.ll
@@ -0,0 +1,47 @@
+; RUN: llc < %s -mcpu=pwr7 | FileCheck %s
+target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-v128:128:128-n32:64"
+target triple = "powerpc64-unknown-linux-gnu"
+
+declare void @llvm.eh.sjlj.longjmp(i8*) #1
+
+define i8* @main() #0 {
+entry:
+  %0 = call i8* @llvm.frameaddress(i32 0)
+  ret i8* %0
+
+; CHECK: @main
+; CHECK: mr 3, 1
+}
+
+define i8* @foo() #3 { ; naked
+entry:
+  %0 = call i8* @llvm.frameaddress(i32 0)
+  ret i8* %0
+
+; CHECK: @foo
+; CHECK: mr 3, 1
+}
+
+define i8* @bar() #0 {
+entry:
+  %x = alloca [100000 x i8]                       ; <[100000 x i8]*> [#uses=1]
+  %x1 = bitcast [100000 x i8]* %x to i8*          ; <i8*> [#uses=1]
+  call void @use(i8* %x1) nounwind
+  %0 = call i8* @llvm.frameaddress(i32 0)
+  ret i8* %0
+
+; Note that if we start eliminating non-leaf frame pointers by default, this
+; will need to be updated.
+; CHECK: @bar
+; CHECK: mr 3, 31
+}
+
+declare void @use(i8*)
+
+declare i8* @llvm.frameaddress(i32) #2
+
+attributes #0 = { nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-frame-pointer-elim-non-leaf"="true" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #1 = { noreturn nounwind }
+attributes #2 = { nounwind readnone }
+attributes #3 = { nounwind naked "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-frame-pointer-elim-non-leaf"="true" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "unsafe-fp-math"="false" "use-soft-float"="false" }
+
diff --git a/test/CodeGen/PowerPC/i32-to-float.ll b/test/CodeGen/PowerPC/i32-to-float.ll
new file mode 100644
index 000000000000..2707d0352de1
--- /dev/null
+++ b/test/CodeGen/PowerPC/i32-to-float.ll
@@ -0,0 +1,82 @@
+; RUN: llc < %s -mtriple=powerpc64-unknown-linux-gnu -mcpu=g5 | FileCheck %s
+; RUN: llc < %s -mtriple=powerpc64-unknown-linux-gnu -mcpu=pwr6 | FileCheck -check-prefix=CHECK-PWR6 %s
+; RUN: llc < %s -mtriple=powerpc64-unknown-linux-gnu -mcpu=a2 | FileCheck -check-prefix=CHECK-A2 %s
+target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-v128:128:128-n32:64"
+target triple = "powerpc64-unknown-linux-gnu"
+
+define float @foo(i32 %a) nounwind {
+entry:
+  %x = sitofp i32 %a to float
+  ret float %x
+
+; CHECK: @foo
+; CHECK: extsw [[REG:[0-9]+]], 3
+; CHECK: std [[REG]],
+; CHECK: lfd [[REG2:[0-9]+]],
+; CHECK: fcfid [[REG3:[0-9]+]], [[REG2]]
+; CHECK: frsp 1, [[REG3]]
+; CHECK: blr
+
+; CHECK-PWR6: @foo
+; CHECK-PWR6: stw 3,
+; CHECK-PWR6: lfiwax [[REG:[0-9]+]],
+; CHECK-PWR6: fcfid [[REG2:[0-9]+]], [[REG]]
+; CHECK-PWR6: frsp 1, [[REG2]]
+; CHECK-PWR6: blr
+
+; CHECK-A2: @foo
+; CHECK-A2: stw 3,
+; CHECK-A2: lfiwax [[REG:[0-9]+]],
+; CHECK-A2: fcfids 1, [[REG]]
+; CHECK-A2: blr
+}
+
+define double @goo(i32 %a) nounwind {
+entry:
+  %x = sitofp i32 %a to double
+  ret double %x
+
+; CHECK: @goo
+; CHECK: extsw [[REG:[0-9]+]], 3
+; CHECK: std [[REG]],
+; CHECK: lfd [[REG2:[0-9]+]],
+; CHECK: fcfid 1, [[REG2]]
+; CHECK: blr
+
+; CHECK-PWR6: @goo
+; CHECK-PWR6: stw 3,
+; CHECK-PWR6: lfiwax [[REG:[0-9]+]],
+; CHECK-PWR6: fcfid 1, [[REG]]
+; CHECK-PWR6: blr
+
+; CHECK-A2: @goo
+; CHECK-A2: stw 3,
+; CHECK-A2: lfiwax [[REG:[0-9]+]],
+; CHECK-A2: fcfid 1, [[REG]]
+; CHECK-A2: blr
+}
+
+define float @foou(i32 %a) nounwind {
+entry:
+  %x = uitofp i32 %a to float
+  ret float %x
+
+; CHECK-A2: @foou
+; CHECK-A2: stw 3,
+; CHECK-A2: lfiwzx [[REG:[0-9]+]],
+; CHECK-A2: fcfidus 1, [[REG]]
+; CHECK-A2: blr
+}
+
+define double @goou(i32 %a) nounwind {
+entry:
+  %x = uitofp i32 %a to double
+  ret double %x
+
+; CHECK-A2: @goou
+; CHECK-A2: stw 3,
+; CHECK-A2: lfiwzx [[REG:[0-9]+]],
+; CHECK-A2: fcfidu 1, [[REG]]
+; CHECK-A2: blr
+}
+
diff --git a/test/CodeGen/PowerPC/i64-to-float.ll b/test/CodeGen/PowerPC/i64-to-float.ll
new file mode 100644
index 000000000000..b81d109e7f45
--- /dev/null
+++ b/test/CodeGen/PowerPC/i64-to-float.ll
@@ -0,0 +1,52 @@
+; RUN: llc < %s -mtriple=powerpc64-unknown-linux-gnu -mcpu=a2 | FileCheck %s
+target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-v128:128:128-n32:64"
+target triple = "powerpc64-unknown-linux-gnu"
+
+define float @foo(i64 %a) nounwind {
+entry:
+  %x = sitofp i64 %a to float
+  ret float %x
+
+; CHECK: @foo
+; CHECK: std 3,
+; CHECK: lfd [[REG:[0-9]+]],
+; CHECK: fcfids 1, [[REG]]
+; CHECK: blr
+}
+
+define double @goo(i64 %a) nounwind {
+entry:
+  %x = sitofp i64 %a to double
+  ret double %x
+
+; CHECK: @goo
+; CHECK: std 3,
+; CHECK: lfd [[REG:[0-9]+]],
+; CHECK: fcfid 1, [[REG]]
+; CHECK: blr
+}
+
+define float @foou(i64 %a) nounwind {
+entry:
+  %x = uitofp i64 %a to float
+  ret float %x
+
+; CHECK: @foou
+; CHECK: std 3,
+; CHECK: lfd [[REG:[0-9]+]],
+; CHECK: fcfidus 1, [[REG]]
+; CHECK: blr
+}
+
+define double @goou(i64 %a) nounwind {
+entry:
+  %x = uitofp i64 %a to double
+  ret double %x
+
+; CHECK: @goou
+; CHECK: std 3,
+; CHECK: lfd [[REG:[0-9]+]],
+; CHECK: fcfidu 1, [[REG]]
+; CHECK: blr
+}
+
diff --git a/test/CodeGen/PowerPC/i64_fp_round.ll b/test/CodeGen/PowerPC/i64_fp_round.ll
index 5a0c072c9c52..d2a3239ab865 100644
--- a/test/CodeGen/PowerPC/i64_fp_round.ll
+++ b/test/CodeGen/PowerPC/i64_fp_round.ll
@@ -1,4 +1,4 @@
-; RUN: llc -mcpu=pwr7 < %s | FileCheck %s
+; RUN: llc -mcpu=pwr7 -mattr=-fpcvt < %s | FileCheck %s
 target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v128:128:128-n32:64"
 target triple = "powerpc64-unknown-linux-gnu"
 
@@ -12,16 +12,16 @@ entry:
 ; Note that only parts of the sequence are checked for here, to allow
 ; for minor code generation differences.
 
-; CHECK: sradi [[REGISTER:[0-9]+]], 3, 53
-; CHECK: addi [[REGISTER:[0-9]+]], [[REGISTER]], 1
-; CHECK: cmpldi 0, [[REGISTER]], 1
-; CHECK: isel [[REGISTER:[0-9]+]], {{[0-9]+}}, 3, 1
-; CHECK: std [[REGISTER]], -{{[0-9]+}}(1)
+; CHECK: sradi [[REG1:[0-9]+]], 3, 53
+; CHECK: addi [[REG2:[0-9]+]], [[REG1]], 1
+; CHECK: cmpldi 0, [[REG2]], 1
+; CHECK: isel [[REG3:[0-9]+]], {{[0-9]+}}, 3, 1
+; CHECK: std [[REG3]], -{{[0-9]+}}(1)
 
 
 ; Also check that with -enable-unsafe-fp-math we do not get that extra
 ; code sequence.  Simply verify that there is no "isel" present.
 
-; RUN: llc -mcpu=pwr7 -enable-unsafe-fp-math < %s | FileCheck %s -check-prefix=UNSAFE
+; RUN: llc -mcpu=pwr7 -mattr=-fpcvt -enable-unsafe-fp-math < %s | FileCheck %s -check-prefix=UNSAFE
 ; CHECK-UNSAFE-NOT: isel
 
diff --git a/test/CodeGen/PowerPC/iabs.ll b/test/CodeGen/PowerPC/iabs.ll
index 7d089bbd653c..f683238de268 100644
--- a/test/CodeGen/PowerPC/iabs.ll
+++ b/test/CodeGen/PowerPC/iabs.ll
@@ -1,3 +1,4 @@
+; REQUIRES: asserts
 ; RUN: llc < %s -march=ppc32 -stats 2>&1 | \
 ; RUN:   grep "4 .*Number of machine instrs printed"
 
diff --git a/test/CodeGen/PowerPC/in-asm-f64-reg.ll b/test/CodeGen/PowerPC/in-asm-f64-reg.ll
new file mode 100644
index 000000000000..1321dfce2027
--- /dev/null
+++ b/test/CodeGen/PowerPC/in-asm-f64-reg.ll
@@ -0,0 +1,11 @@
+; RUN: llc < %s -mtriple=powerpc64-unknown-linux-gnu | FileCheck %s
+
+define void @f() {
+; CHECK: @f
+
+entry:
+  %0 = tail call double* asm sideeffect "qvstfdux $2,$0,$1", "=b,{r7},{f11},0,~{memory}"(i32 64, double undef, double* undef)
+  ret void
+
+; CHECK: qvstfdux 11,{{[0-9]+}},7
+}
diff --git a/test/CodeGen/PowerPC/jaggedstructs.ll b/test/CodeGen/PowerPC/jaggedstructs.ll
index 62aa7cf929f8..a10c5ddb36fb 100644
--- a/test/CodeGen/PowerPC/jaggedstructs.ll
+++ b/test/CodeGen/PowerPC/jaggedstructs.ll
@@ -23,22 +23,22 @@ entry:
 ; CHECK: std 4, 200(1)
 ; CHECK: std 3, 192(1)
 ; CHECK: lbz {{[0-9]+}}, 199(1)
-; CHECK: stb {{[0-9]+}}, 55(1)
 ; CHECK: lhz {{[0-9]+}}, 197(1)
+; CHECK: stb {{[0-9]+}}, 55(1)
 ; CHECK: sth {{[0-9]+}}, 53(1)
 ; CHECK: lbz {{[0-9]+}}, 207(1)
-; CHECK: stb {{[0-9]+}}, 63(1)
 ; CHECK: lwz {{[0-9]+}}, 203(1)
+; CHECK: stb {{[0-9]+}}, 63(1)
 ; CHECK: stw {{[0-9]+}}, 59(1)
 ; CHECK: lhz {{[0-9]+}}, 214(1)
-; CHECK: sth {{[0-9]+}}, 70(1)
 ; CHECK: lwz {{[0-9]+}}, 210(1)
+; CHECK: sth {{[0-9]+}}, 70(1)
 ; CHECK: stw {{[0-9]+}}, 66(1)
 ; CHECK: lbz {{[0-9]+}}, 223(1)
-; CHECK: stb {{[0-9]+}}, 79(1)
 ; CHECK: lhz {{[0-9]+}}, 221(1)
-; CHECK: sth {{[0-9]+}}, 77(1)
 ; CHECK: lwz {{[0-9]+}}, 217(1)
+; CHECK: stb {{[0-9]+}}, 79(1)
+; CHECK: sth {{[0-9]+}}, 77(1)
 ; CHECK: stw {{[0-9]+}}, 73(1)
 ; CHECK: ld 6, 72(1)
 ; CHECK: ld 5, 64(1)
diff --git a/test/CodeGen/PowerPC/lbzux.ll b/test/CodeGen/PowerPC/lbzux.ll
index 12f1d1f130d8..98951306fd8e 100644
--- a/test/CodeGen/PowerPC/lbzux.ll
+++ b/test/CodeGen/PowerPC/lbzux.ll
@@ -1,6 +1,6 @@
 target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v128:128:128-n32:64"
 target triple = "powerpc64-unknown-linux-gnu"
-; RUN: llc < %s | FileCheck %s
+; RUN: llc -disable-ppc-unaligned < %s | FileCheck %s
 
 define fastcc void @allocateSpace(i1 %cond1, i1 %cond2) nounwind {
 entry:
diff --git a/test/CodeGen/PowerPC/lit.local.cfg b/test/CodeGen/PowerPC/lit.local.cfg
index 4019eca0bb88..aaa31d93d5f2 100644
--- a/test/CodeGen/PowerPC/lit.local.cfg
+++ b/test/CodeGen/PowerPC/lit.local.cfg
@@ -1,4 +1,4 @@
-config.suffixes = ['.ll', '.c', '.cpp']
+config.suffixes = ['.ll', '.c', '.cpp', '.test']
 
 targets = set(config.root.targets_to_build.split())
 if not 'PowerPC' in targets:
diff --git a/test/CodeGen/PowerPC/load-shift-combine.ll b/test/CodeGen/PowerPC/load-shift-combine.ll
new file mode 100644
index 000000000000..a5d1224864a6
--- /dev/null
+++ b/test/CodeGen/PowerPC/load-shift-combine.ll
@@ -0,0 +1,34 @@
+; RUN: llc < %s
+
+; This used to cause a crash.  A standard load is converted to a pre-increment
+; load.  Later the pre-increment load is combined with a subsequent SRL to
+; produce a smaller load.  This transform invalidly created a standard load
+; and propagated the produced value into uses of both produced values of the
+; pre-increment load.  The result was a crash when attempting to process an
+; add with a token-chain operand.
+
+%struct.Info = type { i32, i32, i8*, i8*, i8*, [32 x i8*], i64, [32 x i64], i64, i64, i64, [32 x i64] }
+%struct.S1847 = type { [12 x i8], [4 x i8], [8 x i8], [4 x i8], [8 x i8], [2 x i8], i8, [4 x i64], i8, [3 x i8], [4 x i8], i8, i16, [4 x %struct.anon.76], i16, i8, i8* }
+%struct.anon.76 = type { i32 }
+@info = common global %struct.Info zeroinitializer, align 8
+@fails = common global i32 0, align 4
+@a1847 = external global [5 x %struct.S1847]
+define void @test1847() nounwind {
+entry:
+  %j = alloca i32, align 4
+  %0 = load i64* getelementptr inbounds (%struct.Info* @info, i32 0, i32 8), align 8
+  %1 = load i32* @fails, align 4
+  %bf.load1 = load i96* bitcast (%struct.S1847* getelementptr inbounds ([5 x %struct.S1847]* @a1847, i32 0, i64 2) to i96*), align 8
+  %bf.clear2 = and i96 %bf.load1, 302231454903657293676543
+  %bf.set3 = or i96 %bf.clear2, -38383394772764476296921088
+  store i96 %bf.set3, i96* bitcast (%struct.S1847* getelementptr inbounds ([5 x %struct.S1847]* @a1847, i32 0, i64 2) to i96*), align 8
+  %2 = load i32* %j, align 4
+  %3 = load i32* %j, align 4
+  %inc11 = add nsw i32 %3, 1
+  store i32 %inc11, i32* %j, align 4
+  %bf.load15 = load i96* bitcast (%struct.S1847* getelementptr inbounds ([5 x %struct.S1847]* @a1847, i32 0, i64 2) to i96*), align 8
+  %bf.clear16 = and i96 %bf.load15, -18446744069414584321
+  %bf.set17 = or i96 %bf.clear16, 18446743532543672320
+  store i96 %bf.set17, i96* bitcast (%struct.S1847* getelementptr inbounds ([5 x %struct.S1847]* @a1847, i32 0, i64 2) to i96*), align 8
+  ret void
+}
diff --git a/test/CodeGen/PowerPC/mcm-1.ll b/test/CodeGen/PowerPC/mcm-1.ll
new file mode 100644
index 000000000000..a57fb9dd98d0
--- /dev/null
+++ b/test/CodeGen/PowerPC/mcm-1.ll
@@ -0,0 +1,27 @@
+; RUN: llc -mcpu=pwr7 -O0 -code-model=medium <%s | FileCheck %s
+; RUN: llc -mcpu=pwr7 -O0 -code-model=large <%s | FileCheck %s
+
+; Test correct code generation for medium and large code model
+; for loading and storing an external variable.
+
+target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-v128:128:128-n32:64"
+target triple = "powerpc64-unknown-linux-gnu"
+
+@ei = external global i32
+
+define signext i32 @test_external() nounwind {
+entry:
+  %0 = load i32* @ei, align 4
+  %inc = add nsw i32 %0, 1
+  store i32 %inc, i32* @ei, align 4
+  ret i32 %0
+}
+
+; CHECK: test_external:
+; CHECK: addis [[REG1:[0-9]+]], 2, .LC[[TOCNUM:[0-9]+]]@toc@ha
+; CHECK: ld [[REG2:[0-9]+]], .LC[[TOCNUM]]@toc@l([[REG1]])
+; CHECK: lwz {{[0-9]+}}, 0([[REG2]])
+; CHECK: stw {{[0-9]+}}, 0([[REG2]])
+; CHECK: .section .toc
+; CHECK: .LC[[TOCNUM]]:
+; CHECK: .tc {{[a-z0-9A-Z_.]+}}[TC],{{[a-z0-9A-Z_.]+}}
diff --git a/test/CodeGen/PowerPC/mcm-10.ll b/test/CodeGen/PowerPC/mcm-10.ll
new file mode 100644
index 000000000000..4bec3e16fa04
--- /dev/null
+++ b/test/CodeGen/PowerPC/mcm-10.ll
@@ -0,0 +1,25 @@
+; RUN: llc -mcpu=pwr7 -O1 -code-model=medium <%s | FileCheck %s
+
+; Test peephole optimization for medium code model (32-bit TOC offsets)
+; for loading and storing a static variable scoped to a function.
+
+target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-v128:128:128-n32:64"
+target triple = "powerpc64-unknown-linux-gnu"
+
+@test_fn_static.si = internal global i32 0, align 4
+
+define signext i32 @test_fn_static() nounwind {
+entry:
+  %0 = load i32* @test_fn_static.si, align 4
+  %inc = add nsw i32 %0, 1
+  store i32 %inc, i32* @test_fn_static.si, align 4
+  ret i32 %0
+}
+
+; CHECK: test_fn_static:
+; CHECK: addis [[REG1:[0-9]+]], 2, [[VAR:[a-z0-9A-Z_.]+]]@toc@ha
+; CHECK: lwz {{[0-9]+}}, [[VAR]]@toc@l([[REG1]])
+; CHECK: stw {{[0-9]+}}, [[VAR]]@toc@l([[REG1]])
+; CHECK: .type [[VAR]],@object
+; CHECK: .local [[VAR]]
+; CHECK: .comm [[VAR]],4,4
diff --git a/test/CodeGen/PowerPC/mcm-11.ll b/test/CodeGen/PowerPC/mcm-11.ll
new file mode 100644
index 000000000000..f2bc4c9cb72c
--- /dev/null
+++ b/test/CodeGen/PowerPC/mcm-11.ll
@@ -0,0 +1,27 @@
+; RUN: llc -mcpu=pwr7 -O1 -code-model=medium <%s | FileCheck %s
+
+; Test peephole optimization for medium code model (32-bit TOC offsets)
+; for loading and storing a file-scope static variable.
+
+target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-v128:128:128-n32:64"
+target triple = "powerpc64-unknown-linux-gnu"
+
+@gi = global i32 5, align 4
+
+define signext i32 @test_file_static() nounwind {
+entry:
+  %0 = load i32* @gi, align 4
+  %inc = add nsw i32 %0, 1
+  store i32 %inc, i32* @gi, align 4
+  ret i32 %0
+}
+
+; CHECK: test_file_static:
+; CHECK: addis [[REG1:[0-9]+]], 2, [[VAR:[a-z0-9A-Z_.]+]]@toc@ha
+; CHECK: lwz {{[0-9]+}}, [[VAR]]@toc@l([[REG1]])
+; CHECK: stw {{[0-9]+}}, [[VAR]]@toc@l([[REG1]])
+; CHECK: .type [[VAR]],@object
+; CHECK: .data
+; CHECK: .globl [[VAR]]
+; CHECK: [[VAR]]:
+; CHECK: .long 5
diff --git a/test/CodeGen/PowerPC/mcm-12.ll b/test/CodeGen/PowerPC/mcm-12.ll
new file mode 100644
index 000000000000..911305d4355f
--- /dev/null
+++ b/test/CodeGen/PowerPC/mcm-12.ll
@@ -0,0 +1,18 @@
+; RUN: llc -mcpu=pwr7 -O1 -code-model=medium <%s | FileCheck %s
+
+; Test peephole optimization for medium code model (32-bit TOC offsets)
+; for loading a value from the constant pool (TOC-relative).
+
+target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-v128:128:128-n32:64"
+target triple = "powerpc64-unknown-linux-gnu"
+
+define double @test_double_const() nounwind {
+entry:
+  ret double 0x3F4FD4920B498CF0
+}
+
+; CHECK: [[VAR:[a-z0-9A-Z_.]+]]:
+; CHECK: .quad 4562098671269285104
+; CHECK: test_double_const:
+; CHECK: addis [[REG1:[0-9]+]], 2, [[VAR]]@toc@ha
+; CHECK: lfd {{[0-9]+}}, [[VAR]]@toc@l([[REG1]])
diff --git a/test/CodeGen/PowerPC/mcm-2.ll b/test/CodeGen/PowerPC/mcm-2.ll
new file mode 100644
index 000000000000..f0dff4c5a39c
--- /dev/null
+++ b/test/CodeGen/PowerPC/mcm-2.ll
@@ -0,0 +1,37 @@
+; RUN: llc -mcpu=pwr7 -O0 -code-model=medium <%s | FileCheck -check-prefix=MEDIUM %s
+; RUN: llc -mcpu=pwr7 -O0 -code-model=large <%s | FileCheck -check-prefix=LARGE %s
+
+; Test correct code generation for medium and large code model
+; for loading and storing a static variable scoped to a function.
+
+target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-v128:128:128-n32:64"
+target triple = "powerpc64-unknown-linux-gnu"
+
+@test_fn_static.si = internal global i32 0, align 4
+
+define signext i32 @test_fn_static() nounwind {
+entry:
+  %0 = load i32* @test_fn_static.si, align 4
+  %inc = add nsw i32 %0, 1
+  store i32 %inc, i32* @test_fn_static.si, align 4
+  ret i32 %0
+}
+
+; MEDIUM: test_fn_static:
+; MEDIUM: addis [[REG1:[0-9]+]], 2, [[VAR:[a-z0-9A-Z_.]+]]@toc@ha
+; MEDIUM: addi [[REG2:[0-9]+]], [[REG1]], [[VAR]]@toc@l
+; MEDIUM: lwz {{[0-9]+}}, 0([[REG2]])
+; MEDIUM: stw {{[0-9]+}}, 0([[REG2]])
+; MEDIUM: .type [[VAR]],@object
+; MEDIUM: .local [[VAR]]
+; MEDIUM: .comm [[VAR]],4,4
+
+; LARGE: test_fn_static:
+; LARGE: addis [[REG1:[0-9]+]], 2, [[VAR:[a-z0-9A-Z_.]+]]@toc@ha
+; LARGE: ld [[REG2:[0-9]+]], [[VAR]]@toc@l([[REG1]])
+; LARGE: lwz {{[0-9]+}}, 0([[REG2]])
+; LARGE: stw {{[0-9]+}}, 0([[REG2]])
+; LARGE: .type [[VAR]],@object
+; LARGE: .local [[VAR]]
+; LARGE: .comm [[VAR]],4,4
+
diff --git a/test/CodeGen/PowerPC/mcm-3.ll b/test/CodeGen/PowerPC/mcm-3.ll
new file mode 100644
index 000000000000..b7905503f458
--- /dev/null
+++ b/test/CodeGen/PowerPC/mcm-3.ll
@@ -0,0 +1,41 @@
+; RUN: llc -mcpu=pwr7 -O0 -code-model=medium <%s | FileCheck -check-prefix=MEDIUM %s
+; RUN: llc -mcpu=pwr7 -O0 -code-model=large <%s | FileCheck -check-prefix=LARGE %s
+
+; Test correct code generation for medium and large code model
+; for loading and storing a file-scope static variable.
+
+target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-v128:128:128-n32:64"
+target triple = "powerpc64-unknown-linux-gnu"
+
+@gi = global i32 5, align 4
+
+define signext i32 @test_file_static() nounwind {
+entry:
+  %0 = load i32* @gi, align 4
+  %inc = add nsw i32 %0, 1
+  store i32 %inc, i32* @gi, align 4
+  ret i32 %0
+}
+
+; MEDIUM: test_file_static:
+; MEDIUM: addis [[REG1:[0-9]+]], 2, [[VAR:[a-z0-9A-Z_.]+]]@toc@ha
+; MEDIUM: addi [[REG2:[0-9]+]], [[REG1]], [[VAR]]@toc@l
+; MEDIUM: lwz {{[0-9]+}}, 0([[REG2]])
+; MEDIUM: stw {{[0-9]+}}, 0([[REG2]])
+; MEDIUM: .type [[VAR]],@object
+; MEDIUM: .data
+; MEDIUM: .globl [[VAR]]
+; MEDIUM: [[VAR]]:
+; MEDIUM: .long 5
+
+; LARGE: test_file_static:
+; LARGE: addis [[REG1:[0-9]+]], 2, [[VAR:[a-z0-9A-Z_.]+]]@toc@ha
+; LARGE: ld [[REG2:[0-9]+]], [[VAR]]@toc@l([[REG1]])
+; LARGE: lwz {{[0-9]+}}, 0([[REG2]])
+; LARGE: stw {{[0-9]+}}, 0([[REG2]])
+; LARGE: .type [[VAR]],@object
+; LARGE: .data
+; LARGE: .globl [[VAR]]
+; LARGE: [[VAR]]:
+; LARGE: .long 5
+
diff --git a/test/CodeGen/PowerPC/mcm-4.ll b/test/CodeGen/PowerPC/mcm-4.ll
new file mode 100644
index 000000000000..47c60c936038
--- /dev/null
+++ b/test/CodeGen/PowerPC/mcm-4.ll
@@ -0,0 +1,27 @@
+; RUN: llc -mcpu=pwr7 -O0 -code-model=medium <%s | FileCheck -check-prefix=MEDIUM %s
+; RUN: llc -mcpu=pwr7 -O0 -code-model=large <%s | FileCheck -check-prefix=LARGE %s
+
+; Test correct code generation for medium and large code model
+; for loading a value from the constant pool (TOC-relative).
+
+target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-v128:128:128-n32:64"
+target triple = "powerpc64-unknown-linux-gnu"
+
+define double @test_double_const() nounwind {
+entry:
+  ret double 0x3F4FD4920B498CF0
+}
+
+; MEDIUM: [[VAR:[a-z0-9A-Z_.]+]]:
+; MEDIUM: .quad 4562098671269285104
+; MEDIUM: test_double_const:
+; MEDIUM: addis [[REG1:[0-9]+]], 2, [[VAR]]@toc@ha
+; MEDIUM: addi [[REG2:[0-9]+]], [[REG1]], [[VAR]]@toc@l
+; MEDIUM: lfd {{[0-9]+}}, 0([[REG2]])
+
+; LARGE: [[VAR:[a-z0-9A-Z_.]+]]:
+; LARGE: .quad 4562098671269285104
+; LARGE: test_double_const:
+; LARGE: addis [[REG1:[0-9]+]], 2, [[VAR]]@toc@ha
+; LARGE: ld [[REG2:[0-9]+]], [[VAR]]@toc@l([[REG1]])
+; LARGE: lfd {{[0-9]+}}, 0([[REG2]])
diff --git a/test/CodeGen/PowerPC/mcm-5.ll b/test/CodeGen/PowerPC/mcm-5.ll
new file mode 100644
index 000000000000..1be27b7e8cc0
--- /dev/null
+++ b/test/CodeGen/PowerPC/mcm-5.ll
@@ -0,0 +1,60 @@
+; RUN: llc -mcpu=pwr7 -O0 -code-model=medium <%s | FileCheck %s
+; RUN: llc -mcpu=pwr7 -O0 -code-model=large <%s | FileCheck %s
+
+; Test correct code generation for medium and large code model
+; for loading the address of a jump table from the TOC.
+
+target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-v128:128:128-n32:64"
+target triple = "powerpc64-unknown-linux-gnu"
+
+define signext i32 @test_jump_table(i32 signext %i) nounwind {
+entry:
+  %i.addr = alloca i32, align 4
+  store i32 %i, i32* %i.addr, align 4
+  %0 = load i32* %i.addr, align 4
+  switch i32 %0, label %sw.default [
+    i32 3, label %sw.bb
+    i32 4, label %sw.bb1
+    i32 5, label %sw.bb2
+    i32 6, label %sw.bb3
+  ]
+
+sw.default:                                       ; preds = %entry
+  br label %sw.epilog
+
+sw.bb:                                            ; preds = %entry
+  %1 = load i32* %i.addr, align 4
+  %mul = mul nsw i32 %1, 7
+  store i32 %mul, i32* %i.addr, align 4
+  br label %sw.bb1
+
+sw.bb1:                                           ; preds = %entry, %sw.bb
+  %2 = load i32* %i.addr, align 4
+  %dec = add nsw i32 %2, -1
+  store i32 %dec, i32* %i.addr, align 4
+  br label %sw.bb2
+
+sw.bb2:                                           ; preds = %entry, %sw.bb1
+  %3 = load i32* %i.addr, align 4
+  %add = add nsw i32 %3, 3
+  store i32 %add, i32* %i.addr, align 4
+  br label %sw.bb3
+
+sw.bb3:                                           ; preds = %entry, %sw.bb2
+  %4 = load i32* %i.addr, align 4
+  %shl = shl i32 %4, 1
+  store i32 %shl, i32* %i.addr, align 4
+  br label %sw.epilog
+
+sw.epilog:                                        ; preds = %sw.bb3, %sw.default
+  %5 = load i32* %i.addr, align 4
+  ret i32 %5
+}
+
+; CHECK: test_jump_table:
+; CHECK: addis [[REG1:[0-9]+]], 2, .LC[[TOCNUM:[0-9]+]]@toc@ha
+; CHECK: ld [[REG2:[0-9]+]], .LC[[TOCNUM]]@toc@l([[REG1]])
+; CHECK: ldx {{[0-9]+}}, {{[0-9]+}}, [[REG2]]
+; CHECK: .section .toc
+; CHECK: .LC[[TOCNUM]]:
+; CHECK: .tc {{[a-z0-9A-Z_.]+}}[TC],{{[a-z0-9A-Z_.]+}}
diff --git a/test/CodeGen/PowerPC/mcm-6.ll b/test/CodeGen/PowerPC/mcm-6.ll
new file mode 100644
index 000000000000..35efaaa5628f
--- /dev/null
+++ b/test/CodeGen/PowerPC/mcm-6.ll
@@ -0,0 +1,28 @@
+; RUN: llc -mcpu=pwr7 -O0 -code-model=medium < %s | FileCheck %s
+; RUN: llc -mcpu=pwr7 -O0 -code-model=large < %s | FileCheck %s
+
+; Test correct code generation for medium and large code model
+; for loading and storing a tentatively defined variable.
+
+target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-v128:128:128-n32:64"
+target triple = "powerpc64-unknown-linux-gnu"
+
+@ti = common global i32 0, align 4
+
+define signext i32 @test_tentative() nounwind {
+entry:
+  %0 = load i32* @ti, align 4
+  %inc = add nsw i32 %0, 1
+  store i32 %inc, i32* @ti, align 4
+  ret i32 %0
+}
+
+; CHECK: test_tentative:
+; CHECK: addis [[REG1:[0-9]+]], 2, .LC[[TOCNUM:[0-9]+]]@toc@ha
+; CHECK: ld [[REG2:[0-9]+]], .LC[[TOCNUM]]@toc@l([[REG1]])
+; CHECK: lwz {{[0-9]+}}, 0([[REG2]])
+; CHECK: stw {{[0-9]+}}, 0([[REG2]])
+; CHECK: .section .toc
+; CHECK: .LC[[TOCNUM]]:
+; CHECK: .tc [[VAR:[a-z0-9A-Z_.]+]][TC],{{[a-z0-9A-Z_.]+}}
+; CHECK: .comm [[VAR]],4,4
diff --git a/test/CodeGen/PowerPC/mcm-7.ll b/test/CodeGen/PowerPC/mcm-7.ll
new file mode 100644
index 000000000000..0dd39ee4109d
--- /dev/null
+++ b/test/CodeGen/PowerPC/mcm-7.ll
@@ -0,0 +1,26 @@
+; RUN: llc -mcpu=pwr7 -O0 -code-model=medium < %s | FileCheck %s
+; RUN: llc -mcpu=pwr7 -O0 -code-model=large < %s | FileCheck %s
+
+; Test correct code generation for medium and large code model
+; for loading a function address.
+
+target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-v128:128:128-n32:64"
+target triple = "powerpc64-unknown-linux-gnu"
+
+define i8* @test_fnaddr() nounwind {
+entry:
+  %func = alloca i32 (i32)*, align 8
+  store i32 (i32)* @foo, i32 (i32)** %func, align 8
+  %0 = load i32 (i32)** %func, align 8
+  %1 = bitcast i32 (i32)* %0 to i8*
+  ret i8* %1
+}
+
+declare signext i32 @foo(i32 signext)
+
+; CHECK: test_fnaddr:
+; CHECK: addis [[REG1:[0-9]+]], 2, .LC[[TOCNUM:[0-9]+]]@toc@ha
+; CHECK: ld [[REG2:[0-9]+]], .LC[[TOCNUM]]@toc@l([[REG1]])
+; CHECK: .section .toc
+; CHECK: .LC[[TOCNUM]]:
+; CHECK: .tc {{[a-z0-9A-Z_.]+}}[TC],{{[a-z0-9A-Z_.]+}}
diff --git a/test/CodeGen/PowerPC/mcm-8.ll b/test/CodeGen/PowerPC/mcm-8.ll
new file mode 100644
index 000000000000..3ece786d6447
--- /dev/null
+++ b/test/CodeGen/PowerPC/mcm-8.ll
@@ -0,0 +1,25 @@
+; RUN: llc -mcpu=pwr7 -O0 -code-model=medium < %s | FileCheck %s
+; RUN: llc -mcpu=pwr7 -O0 -code-model=large < %s | FileCheck %s
+
+; Test correct code generation for medium and large code model
+; for loading a variable with available-externally linkage.
+
+target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-v128:128:128-n32:64"
+target triple = "powerpc64-unknown-linux-gnu"
+
+@x = available_externally constant [13 x i8] c"St9bad_alloc\00"
+
+define signext i8 @test_avext() nounwind {
+entry:
+  %0 = getelementptr inbounds [13 x i8]* @x, i32 0, i32 0
+  %1 = load i8* %0, align 1
+  ret i8 %1
+}
+
+; CHECK: test_avext:
+; CHECK: addis [[REG1:[0-9]+]], 2, .LC[[TOCNUM:[0-9]+]]@toc@ha
+; CHECK: ld [[REG2:[0-9]+]], .LC[[TOCNUM]]@toc@l([[REG1]])
+; CHECK: lbz {{[0-9]+}}, 0([[REG2]])
+; CHECK: .section .toc
+; CHECK: .LC[[TOCNUM]]:
+; CHECK: .tc {{[a-z0-9A-Z_.]+}}[TC],{{[a-z0-9A-Z_.]+}}
diff --git a/test/CodeGen/PowerPC/mcm-9.ll b/test/CodeGen/PowerPC/mcm-9.ll
new file mode 100644
index 000000000000..f366f45cc863
--- /dev/null
+++ b/test/CodeGen/PowerPC/mcm-9.ll
@@ -0,0 +1,28 @@
+; RUN: llc -mcpu=pwr7 -O0 -code-model=medium <%s | FileCheck %s
+; RUN: llc -mcpu=pwr7 -O0 -code-model=large <%s | FileCheck %s
+
+; Test correct code generation for medium and large code model
+; for loading and storing an aliased external variable.
+
+target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-v128:128:128-n32:64"
+target triple = "powerpc64-unknown-linux-gnu"
+
+@ei = external global i32
+@a = alias i32* @ei
+
+define signext i32 @test_external() nounwind {
+entry:
+  %0 = load i32* @a, align 4
+  %inc = add nsw i32 %0, 1
+  store i32 %inc, i32* @a, align 4
+  ret i32 %0
+}
+
+; CHECK: test_external:
+; CHECK: addis [[REG1:[0-9]+]], 2, .LC[[TOCNUM:[0-9]+]]@toc@ha
+; CHECK: ld [[REG2:[0-9]+]], .LC[[TOCNUM]]@toc@l([[REG1]])
+; CHECK: lwz {{[0-9]+}}, 0([[REG2]])
+; CHECK: stw {{[0-9]+}}, 0([[REG2]])
+; CHECK: .section .toc
+; CHECK: .LC[[TOCNUM]]:
+; CHECK: .tc {{[a-z0-9A-Z_.]+}}[TC],{{[a-z0-9A-Z_.]+}}
diff --git a/test/CodeGen/PowerPC/mcm-default.ll b/test/CodeGen/PowerPC/mcm-default.ll
new file mode 100644
index 000000000000..19de2536aec3
--- /dev/null
+++ b/test/CodeGen/PowerPC/mcm-default.ll
@@ -0,0 +1,26 @@
+; RUN: llc -mcpu=pwr7 -O0 <%s | FileCheck %s
+
+; Test that we generate code for the medium model as the default.
+; Use an external variable reference as an example.
+
+target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-v128:128:128-n32:64"
+target triple = "powerpc64-unknown-linux-gnu"
+
+@ei = external global i32
+
+define signext i32 @test_external() nounwind {
+entry:
+  %0 = load i32* @ei, align 4
+  %inc = add nsw i32 %0, 1
+  store i32 %inc, i32* @ei, align 4
+  ret i32 %0
+}
+
+; CHECK: test_external:
+; CHECK: addis [[REG1:[0-9]+]], 2, .LC[[TOCNUM:[0-9]+]]@toc@ha
+; CHECK: ld [[REG2:[0-9]+]], .LC[[TOCNUM]]@toc@l([[REG1]])
+; CHECK: lwz {{[0-9]+}}, 0([[REG2]])
+; CHECK: stw {{[0-9]+}}, 0([[REG2]])
+; CHECK: .section .toc
+; CHECK: .LC[[TOCNUM]]:
+; CHECK: .tc {{[a-z0-9A-Z_.]+}}[TC],{{[a-z0-9A-Z_.]+}}
diff --git a/test/CodeGen/PowerPC/mcm-obj-2.ll b/test/CodeGen/PowerPC/mcm-obj-2.ll
new file mode 100644
index 000000000000..2dd1718ba75a
--- /dev/null
+++ b/test/CodeGen/PowerPC/mcm-obj-2.ll
@@ -0,0 +1,77 @@
+; RUN: llc -O1 -mcpu=pwr7 -code-model=medium -filetype=obj %s -o - | \
+; RUN: elf-dump --dump-section-data | FileCheck %s
+
+; FIXME: When asm-parse is available, could make this an assembly test.
+
+target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-v128:128:128-n32:64"
+target triple = "powerpc64-unknown-linux-gnu"
+
+@test_fn_static.si = internal global i32 0, align 4
+
+define signext i32 @test_fn_static() nounwind {
+entry:
+  %0 = load i32* @test_fn_static.si, align 4
+  %inc = add nsw i32 %0, 1
+  store i32 %inc, i32* @test_fn_static.si, align 4
+  ret i32 %0
+}
+
+; Verify generation of R_PPC64_TOC16_HA and R_PPC64_TOC16_LO for
+; accessing function-scoped variable si.
+;
+; CHECK:       Relocation 0
+; CHECK-NEXT:  'r_offset'
+; CHECK-NEXT:  'r_sym', 0x[[SYM2:[0-9]+]]
+; CHECK-NEXT:  'r_type', 0x00000032
+; CHECK:       Relocation 1
+; CHECK-NEXT:  'r_offset'
+; CHECK-NEXT:  'r_sym', 0x[[SYM2]]
+; CHECK-NEXT:  'r_type', 0x00000030
+; CHECK:       Relocation 2
+; CHECK-NEXT:  'r_offset'
+; CHECK-NEXT:  'r_sym', 0x[[SYM2]]
+; CHECK-NEXT:  'r_type', 0x00000030
+
+@gi = global i32 5, align 4
+
+define signext i32 @test_file_static() nounwind {
+entry:
+  %0 = load i32* @gi, align 4
+  %inc = add nsw i32 %0, 1
+  store i32 %inc, i32* @gi, align 4
+  ret i32 %0
+}
+
+; Verify generation of R_PPC64_TOC16_HA and R_PPC64_TOC16_LO for
+; accessing file-scope variable gi.
+;
+; CHECK:       Relocation 3
+; CHECK-NEXT:  'r_offset'
+; CHECK-NEXT:  'r_sym', 0x[[SYM3:[0-9]+]]
+; CHECK-NEXT:  'r_type', 0x00000032
+; CHECK:       Relocation 4
+; CHECK-NEXT:  'r_offset'
+; CHECK-NEXT:  'r_sym', 0x[[SYM3]]
+; CHECK-NEXT:  'r_type', 0x00000030
+; CHECK:       Relocation 5
+; CHECK-NEXT:  'r_offset'
+; CHECK-NEXT:  'r_sym', 0x[[SYM3]]
+; CHECK-NEXT:  'r_type', 0x00000030
+
+define double @test_double_const() nounwind {
+entry:
+  ret double 0x3F4FD4920B498CF0
+}
+
+; Verify generation of R_PPC64_TOC16_HA and R_PPC64_TOC16_LO for
+; accessing a constant.
+;
+; CHECK:       Relocation 6
+; CHECK-NEXT:  'r_offset'
+; CHECK-NEXT:  'r_sym', 0x[[SYM4:[0-9]+]]
+; CHECK-NEXT:  'r_type', 0x00000032
+; CHECK:       Relocation 7
+; CHECK-NEXT:  'r_offset'
+; CHECK-NEXT:  'r_sym', 0x[[SYM4]]
+; CHECK-NEXT:  'r_type', 0x00000030
+
diff --git a/test/CodeGen/PowerPC/mcm-obj.ll b/test/CodeGen/PowerPC/mcm-obj.ll
new file mode 100644
index 000000000000..117c3b334346
--- /dev/null
+++ b/test/CodeGen/PowerPC/mcm-obj.ll
@@ -0,0 +1,268 @@
+; RUN: llc -O0 -mcpu=pwr7 -code-model=medium -filetype=obj %s -o - | \
+; RUN: elf-dump --dump-section-data | FileCheck -check-prefix=MEDIUM %s
+; RUN: llc -O0 -mcpu=pwr7 -code-model=large -filetype=obj %s -o - | \
+; RUN: elf-dump --dump-section-data | FileCheck -check-prefix=LARGE %s
+
+; FIXME: When asm-parse is available, could make this an assembly test.
+
+target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-v128:128:128-n32:64"
+target triple = "powerpc64-unknown-linux-gnu"
+
+@ei = external global i32
+
+define signext i32 @test_external() nounwind {
+entry:
+  %0 = load i32* @ei, align 4
+  %inc = add nsw i32 %0, 1
+  store i32 %inc, i32* @ei, align 4
+  ret i32 %0
+}
+
+; Verify generation of R_PPC64_TOC16_HA and R_PPC64_TOC16_LO_DS for
+; accessing external variable ei.
+;
+; MEDIUM:       '.rela.text'
+; MEDIUM:       Relocation 0
+; MEDIUM-NEXT:  'r_offset'
+; MEDIUM-NEXT:  'r_sym', 0x[[SYM1:[0-9]+]]
+; MEDIUM-NEXT:  'r_type', 0x00000032
+; MEDIUM:       Relocation 1
+; MEDIUM-NEXT:  'r_offset'
+; MEDIUM-NEXT:  'r_sym', 0x[[SYM1]]
+; MEDIUM-NEXT:  'r_type', 0x00000040
+;
+; LARGE:       '.rela.text'
+; LARGE:       Relocation 0
+; LARGE-NEXT:  'r_offset'
+; LARGE-NEXT:  'r_sym', 0x[[SYM1:[0-9]+]]
+; LARGE-NEXT:  'r_type', 0x00000032
+; LARGE:       Relocation 1
+; LARGE-NEXT:  'r_offset'
+; LARGE-NEXT:  'r_sym', 0x[[SYM1]]
+; LARGE-NEXT:  'r_type', 0x00000040
+
+@test_fn_static.si = internal global i32 0, align 4
+
+define signext i32 @test_fn_static() nounwind {
+entry:
+  %0 = load i32* @test_fn_static.si, align 4
+  %inc = add nsw i32 %0, 1
+  store i32 %inc, i32* @test_fn_static.si, align 4
+  ret i32 %0
+}
+
+; Verify generation of R_PPC64_TOC16_HA and R_PPC64_TOC16_LO for
+; accessing function-scoped variable si.
+;
+; MEDIUM:       Relocation 2
+; MEDIUM-NEXT:  'r_offset'
+; MEDIUM-NEXT:  'r_sym', 0x[[SYM2:[0-9]+]]
+; MEDIUM-NEXT:  'r_type', 0x00000032
+; MEDIUM:       Relocation 3
+; MEDIUM-NEXT:  'r_offset'
+; MEDIUM-NEXT:  'r_sym', 0x[[SYM2]]
+; MEDIUM-NEXT:  'r_type', 0x00000030
+;
+; Verify generation of R_PPC64_TOC16_HA and R_PPC64_TOC16_LO_DS for
+; accessing function-scoped variable si.
+;
+; LARGE:       Relocation 2
+; LARGE-NEXT:  'r_offset'
+; LARGE-NEXT:  'r_sym', 0x[[SYM2:[0-9]+]]
+; LARGE-NEXT:  'r_type', 0x00000032
+; LARGE:       Relocation 3
+; LARGE-NEXT:  'r_offset'
+; LARGE-NEXT:  'r_sym', 0x[[SYM2]]
+; LARGE-NEXT:  'r_type', 0x00000040
+
+@gi = global i32 5, align 4
+
+define signext i32 @test_file_static() nounwind {
+entry:
+  %0 = load i32* @gi, align 4
+  %inc = add nsw i32 %0, 1
+  store i32 %inc, i32* @gi, align 4
+  ret i32 %0
+}
+
+; Verify generation of R_PPC64_TOC16_HA and R_PPC64_TOC16_LO for
+; accessing file-scope variable gi.
+;
+; MEDIUM:       Relocation 4
+; MEDIUM-NEXT:  'r_offset'
+; MEDIUM-NEXT:  'r_sym', 0x[[SYM3:[0-9]+]]
+; MEDIUM-NEXT:  'r_type', 0x00000032
+; MEDIUM:       Relocation 5
+; MEDIUM-NEXT:  'r_offset'
+; MEDIUM-NEXT:  'r_sym', 0x[[SYM3]]
+; MEDIUM-NEXT:  'r_type', 0x00000030
+;
+; Verify generation of R_PPC64_TOC16_HA and R_PPC64_TOC16_LO_DS for
+; accessing file-scope variable gi.
+;
+; LARGE:       Relocation 4
+; LARGE-NEXT:  'r_offset'
+; LARGE-NEXT:  'r_sym', 0x[[SYM3:[0-9]+]]
+; LARGE-NEXT:  'r_type', 0x00000032
+; LARGE:       Relocation 5
+; LARGE-NEXT:  'r_offset'
+; LARGE-NEXT:  'r_sym', 0x[[SYM3]]
+; LARGE-NEXT:  'r_type', 0x00000040
+
+define double @test_double_const() nounwind {
+entry:
+  ret double 0x3F4FD4920B498CF0
+}
+
+; Verify generation of R_PPC64_TOC16_HA and R_PPC64_TOC16_LO for
+; accessing a constant.
+;
+; MEDIUM:       Relocation 6
+; MEDIUM-NEXT:  'r_offset'
+; MEDIUM-NEXT:  'r_sym', 0x[[SYM4:[0-9]+]]
+; MEDIUM-NEXT:  'r_type', 0x00000032
+; MEDIUM:       Relocation 7
+; MEDIUM-NEXT:  'r_offset'
+; MEDIUM-NEXT:  'r_sym', 0x[[SYM4]]
+; MEDIUM-NEXT:  'r_type', 0x00000030
+;
+; Verify generation of R_PPC64_TOC16_HA and R_PPC64_TOC16_LO_DS for
+; accessing a constant.
+;
+; LARGE:       Relocation 6
+; LARGE-NEXT:  'r_offset'
+; LARGE-NEXT:  'r_sym', 0x[[SYM4:[0-9]+]]
+; LARGE-NEXT:  'r_type', 0x00000032
+; LARGE:       Relocation 7
+; LARGE-NEXT:  'r_offset'
+; LARGE-NEXT:  'r_sym', 0x[[SYM4]]
+; LARGE-NEXT:  'r_type', 0x00000040
+
+define signext i32 @test_jump_table(i32 signext %i) nounwind {
+entry:
+  %i.addr = alloca i32, align 4
+  store i32 %i, i32* %i.addr, align 4
+  %0 = load i32* %i.addr, align 4
+  switch i32 %0, label %sw.default [
+    i32 3, label %sw.bb
+    i32 4, label %sw.bb1
+    i32 5, label %sw.bb2
+    i32 6, label %sw.bb3
+  ]
+
+sw.default:                                       ; preds = %entry
+  br label %sw.epilog
+
+sw.bb:                                            ; preds = %entry
+  %1 = load i32* %i.addr, align 4
+  %mul = mul nsw i32 %1, 7
+  store i32 %mul, i32* %i.addr, align 4
+  br label %sw.bb1
+
+sw.bb1:                                           ; preds = %entry, %sw.bb
+  %2 = load i32* %i.addr, align 4
+  %dec = add nsw i32 %2, -1
+  store i32 %dec, i32* %i.addr, align 4
+  br label %sw.bb2
+
+sw.bb2:                                           ; preds = %entry, %sw.bb1
+  %3 = load i32* %i.addr, align 4
+  %add = add nsw i32 %3, 3
+  store i32 %add, i32* %i.addr, align 4
+  br label %sw.bb3
+
+sw.bb3:                                           ; preds = %entry, %sw.bb2
+  %4 = load i32* %i.addr, align 4
+  %shl = shl i32 %4, 1
+  store i32 %shl, i32* %i.addr, align 4
+  br label %sw.epilog
+
+sw.epilog:                                        ; preds = %sw.bb3, %sw.default
+  %5 = load i32* %i.addr, align 4
+  ret i32 %5
+}
+
+; Verify generation of R_PPC64_TOC16_HA and R_PPC64_TOC16_LO_DS for
+; accessing a jump table address.
+;
+; MEDIUM:       Relocation 8
+; MEDIUM-NEXT:  'r_offset'
+; MEDIUM-NEXT:  'r_sym', 0x[[SYM5:[0-9]+]]
+; MEDIUM-NEXT:  'r_type', 0x00000032
+; MEDIUM:       Relocation 9
+; MEDIUM-NEXT:  'r_offset'
+; MEDIUM-NEXT:  'r_sym', 0x[[SYM5]]
+; MEDIUM-NEXT:  'r_type', 0x00000040
+;
+; LARGE:       Relocation 8
+; LARGE-NEXT:  'r_offset'
+; LARGE-NEXT:  'r_sym', 0x[[SYM5:[0-9]+]]
+; LARGE-NEXT:  'r_type', 0x00000032
+; LARGE:       Relocation 9
+; LARGE-NEXT:  'r_offset'
+; LARGE-NEXT:  'r_sym', 0x[[SYM5]]
+; LARGE-NEXT:  'r_type', 0x00000040
+
+@ti = common global i32 0, align 4
+
+define signext i32 @test_tentative() nounwind {
+entry:
+  %0 = load i32* @ti, align 4
+  %inc = add nsw i32 %0, 1
+  store i32 %inc, i32* @ti, align 4
+  ret i32 %0
+}
+
+; Verify generation of R_PPC64_TOC16_HA and R_PPC64_TOC16_LO_DS for
+; accessing tentatively declared variable ti.
+;
+; MEDIUM:       Relocation 10
+; MEDIUM-NEXT:  'r_offset'
+; MEDIUM-NEXT:  'r_sym', 0x[[SYM6:[0-9]+]]
+; MEDIUM-NEXT:  'r_type', 0x00000032
+; MEDIUM:       Relocation 11
+; MEDIUM-NEXT:  'r_offset'
+; MEDIUM-NEXT:  'r_sym', 0x[[SYM6]]
+; MEDIUM-NEXT:  'r_type', 0x00000040
+;
+; LARGE:       Relocation 10
+; LARGE-NEXT:  'r_offset'
+; LARGE-NEXT:  'r_sym', 0x[[SYM6:[0-9]+]]
+; LARGE-NEXT:  'r_type', 0x00000032
+; LARGE:       Relocation 11
+; LARGE-NEXT:  'r_offset'
+; LARGE-NEXT:  'r_sym', 0x[[SYM6]]
+; LARGE-NEXT:  'r_type', 0x00000040
+
+define i8* @test_fnaddr() nounwind {
+entry:
+  %func = alloca i32 (i32)*, align 8
+  store i32 (i32)* @foo, i32 (i32)** %func, align 8
+  %0 = load i32 (i32)** %func, align 8
+  %1 = bitcast i32 (i32)* %0 to i8*
+  ret i8* %1
+}
+
+declare signext i32 @foo(i32 signext)
+
+; Verify generation of R_PPC64_TOC16_HA and R_PPC64_TOC16_LO_DS for
+; accessing function address foo.
+;
+; MEDIUM:       Relocation 12
+; MEDIUM-NEXT:  'r_offset'
+; MEDIUM-NEXT:  'r_sym', 0x[[SYM7:[0-9]+]]
+; MEDIUM-NEXT:  'r_type', 0x00000032
+; MEDIUM:       Relocation 13
+; MEDIUM-NEXT:  'r_offset'
+; MEDIUM-NEXT:  'r_sym', 0x[[SYM7]]
+; MEDIUM-NEXT:  'r_type', 0x00000040
+;
+; LARGE:       Relocation 12
+; LARGE-NEXT:  'r_offset'
+; LARGE-NEXT:  'r_sym', 0x[[SYM7:[0-9]+]]
+; LARGE-NEXT:  'r_type', 0x00000032
+; LARGE:       Relocation 13
+; LARGE-NEXT:  'r_offset'
+; LARGE-NEXT:  'r_sym', 0x[[SYM7]]
+; LARGE-NEXT:  'r_type', 0x00000040
+
diff --git a/test/CodeGen/PowerPC/mem_update.ll b/test/CodeGen/PowerPC/mem_update.ll
index 39af11a3d54c..fcf53da67fc2 100644
--- a/test/CodeGen/PowerPC/mem_update.ll
+++ b/test/CodeGen/PowerPC/mem_update.ll
@@ -1,6 +1,6 @@
 ; RUN: llc < %s -march=ppc32 | \
 ; RUN:   not grep addi
-; RUN: llc < %s -march=ppc64 | \
+; RUN: llc -code-model=small < %s -march=ppc64 | \
 ; RUN:   not grep addi
 
 @Glob = global i64 4
diff --git a/test/CodeGen/PowerPC/misched-inorder-latency.ll b/test/CodeGen/PowerPC/misched-inorder-latency.ll
new file mode 100644
index 000000000000..8fae7ad4d1df
--- /dev/null
+++ b/test/CodeGen/PowerPC/misched-inorder-latency.ll
@@ -0,0 +1,55 @@
+; RUN: llc < %s -enable-misched -pre-RA-sched=source -scheditins=false \
+; RUN:          -disable-ifcvt-triangle-false -disable-post-ra | FileCheck %s
+;
+target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-v128:128:128-n32:64"
+target triple = "powerpc64-bgq-linux"
+
+; %val1 is a load live out of %entry. It should be hoisted
+; above the add.
+; CHECK: testload:
+; CHECK: %entry
+; CHECK: lwz
+; CHECK: addi
+; CHECK: bne
+; CHECK: %true
+define i32 @testload(i32 *%ptr, i32 %sumin) {
+entry:
+  %sum1 = add i32 %sumin, 1
+  %val1 = load i32* %ptr
+  %p = icmp eq i32 %sumin, 0
+  br i1 %p, label %true, label %end
+true:
+  %sum2 = add i32 %sum1, 1
+  %ptr2 = getelementptr i32* %ptr, i32 1
+  %val = load i32* %ptr2
+  %val2 = add i32 %val1, %val
+  br label %end
+end:
+  %valmerge = phi i32 [ %val1, %entry], [ %val2, %true ]
+  %summerge = phi i32 [ %sum1, %entry], [ %sum2, %true ]
+  %sumout = add i32 %valmerge, %summerge
+  ret i32 %sumout
+}
+
+; The prefetch gets a default latency of 3 cycles and should be hoisted
+; above the add.
+;
+; CHECK: testprefetch:
+; CHECK: %entry
+; CHECK: dcbt
+; CHECK: addi
+; CHECK: blr
+define i32 @testprefetch(i8 *%ptr, i32 %i) {
+entry:
+  %val1 = add i32 %i, 1
+  tail call void @llvm.prefetch( i8* %ptr, i32 0, i32 3, i32 1 )
+  %p = icmp eq i32 %i, 0
+  br i1 %p, label %true, label %end
+true:
+  %val2 = add i32 %val1, 1
+  br label %end
+end:
+  %valmerge = phi i32 [ %val1, %entry], [ %val2, %true ]
+  ret i32 %valmerge
+}
+declare void @llvm.prefetch(i8*, i32, i32, i32) nounwind
diff --git a/test/CodeGen/PowerPC/negctr.ll b/test/CodeGen/PowerPC/negctr.ll
new file mode 100644
index 000000000000..2f6995c65dd8
--- /dev/null
+++ b/test/CodeGen/PowerPC/negctr.ll
@@ -0,0 +1,83 @@
+; RUN: llc < %s -mcpu=a2 | FileCheck %s
+; RUN: llc < %s -mcpu=a2 -disable-lsr | FileCheck -check-prefix=NOLSR %s
+target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-v128:128:128-n32:64"
+target triple = "powerpc64-unknown-linux-gnu"
+
+define void @main() #0 {
+entry:
+  br i1 undef, label %for.end, label %for.body
+
+for.body:                                         ; preds = %for.body, %entry
+  %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 1, %entry ]
+  %indvars.iv.next = add i64 %indvars.iv, 1
+  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+  %exitcond = icmp eq i32 %lftr.wideiv, 0
+  br i1 %exitcond, label %for.end, label %for.body
+
+; FIXME: We currently can't form the 32-bit unsigned trip count necessary here!
+; CHECK: @main
+; CHECK-NOT: bdnz
+
+for.end:                                          ; preds = %for.body, %entry
+  ret void
+}
+
+define void @main1() #0 {
+entry:
+  br i1 undef, label %for.end, label %for.body
+
+for.body:                                         ; preds = %for.body, %entry
+  %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 1, %entry ]
+  %indvars.iv.next = add i64 %indvars.iv, 1
+  %exitcond = icmp eq i64 %indvars.iv.next, 0
+  br i1 %exitcond, label %for.end, label %for.body
+
+; CHECK: @main1
+; CHECK: li [[REG:[0-9]+]], -1
+; CHECK: mtctr [[REG]]
+; CHECK: bdnz
+
+for.end:                                          ; preds = %for.body, %entry
+  ret void
+}
+
+define void @main2() #0 {
+entry:
+  br i1 undef, label %for.end, label %for.body
+
+for.body:                                         ; preds = %for.body, %entry
+  %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 1, %entry ]
+  %indvars.iv.next = add i64 %indvars.iv, 1
+  %exitcond = icmp eq i64 %indvars.iv.next, -100000
+  br i1 %exitcond, label %for.end, label %for.body
+
+; CHECK: @main2
+; CHECK: lis [[REG:[0-9]+]], -2
+; CHECK: ori [[REG2:[0-9]+]], [[REG]], 31071
+; CHECK: mtctr [[REG2]]
+; CHECK: bdnz
+
+for.end:                                          ; preds = %for.body, %entry
+  ret void
+}
+
+define void @main3() #0 {
+entry:
+  br i1 undef, label %for.end, label %for.body
+
+for.body:                                         ; preds = %for.body, %entry
+  %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 127984, %entry ]
+  %indvars.iv.next = add i64 %indvars.iv, -16
+  %exitcond = icmp eq i64 %indvars.iv.next, -16
+  br i1 %exitcond, label %for.end, label %for.body
+
+; NOLSR: @main3
+; NOLSR: li [[REG:[0-9]+]], 8000
+; NOLSR: mtctr [[REG]]
+; NOLSR: bdnz
+
+for.end:                                          ; preds = %for.body, %entry
+  ret void
+}
+
+attributes #0 = { nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-frame-pointer-elim-non-leaf"="true" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "unsafe-fp-math"="false" "use-soft-float"="false" }
diff --git a/test/CodeGen/PowerPC/popcnt.ll b/test/CodeGen/PowerPC/popcnt.ll
new file mode 100644
index 000000000000..b304d72aede2
--- /dev/null
+++ b/test/CodeGen/PowerPC/popcnt.ll
@@ -0,0 +1,40 @@
+; RUN: llc -march=ppc64 -mattr=+popcntd < %s | FileCheck %s
+
+define i8 @cnt8(i8 %x) nounwind readnone {
+  %cnt = tail call i8 @llvm.ctpop.i8(i8 %x)
+  ret i8 %cnt
+; CHECK: @cnt8
+; CHECK: rlwinm
+; CHECK: popcntw
+; CHECK: blr
+}
+
+define i16 @cnt16(i16 %x) nounwind readnone {
+  %cnt = tail call i16 @llvm.ctpop.i16(i16 %x)
+  ret i16 %cnt
+; CHECK: @cnt16
+; CHECK: rlwinm
+; CHECK: popcntw
+; CHECK: blr
+}
+
+define i32 @cnt32(i32 %x) nounwind readnone {
+  %cnt = tail call i32 @llvm.ctpop.i32(i32 %x)
+  ret i32 %cnt
+; CHECK: @cnt32
+; CHECK: popcntw
+; CHECK: blr
+}
+
+define i64 @cnt64(i64 %x) nounwind readnone {
+  %cnt = tail call i64 @llvm.ctpop.i64(i64 %x)
+  ret i64 %cnt
+; CHECK: @cnt64
+; CHECK: popcntd
+; CHECK: blr
+}
+
+declare i8 @llvm.ctpop.i8(i8) nounwind readnone
+declare i16 @llvm.ctpop.i16(i16) nounwind readnone
+declare i32 @llvm.ctpop.i32(i32) nounwind readnone
+declare i64 @llvm.ctpop.i64(i64) nounwind readnone
diff --git a/test/CodeGen/PowerPC/ppc64-toc.ll b/test/CodeGen/PowerPC/ppc64-toc.ll
index a29bdcb25031..7f30ef883e9a 100644
--- a/test/CodeGen/PowerPC/ppc64-toc.ll
+++ b/test/CodeGen/PowerPC/ppc64-toc.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s | FileCheck %s
+; RUN: llc -code-model=small < %s | FileCheck %s
 target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v128:128:128-n32:64"
 target triple = "powerpc64-unknown-linux-gnu"
 
diff --git a/test/CodeGen/PowerPC/pr15031.ll b/test/CodeGen/PowerPC/pr15031.ll
new file mode 100644
index 000000000000..5ccf941a1f16
--- /dev/null
+++ b/test/CodeGen/PowerPC/pr15031.ll
@@ -0,0 +1,370 @@
+; RUN: llc -mcpu=pwr7 -O3 < %s | FileCheck %s
+
+; Test case derived from bug report 15031.  The code in the post-RA
+; scheduler to break critical anti-dependencies was failing to check
+; whether an instruction had more than one definition, and ensuring
+; that any additional definitions interfered with the choice of a new
+; register.  As a result, this test originally caused this to be
+; generated:
+;
+;   lbzu 3, 1(3)
+;
+; which is illegal, since it requires register 3 to both receive the
+; loaded value and receive the updated address.  With the fix to bug
+; 15031, a different register is chosen to receive the loaded value.
+
+target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-v128:128:128-n32:64"
+target triple = "powerpc64-unknown-linux-gnu"
+
+%"class.llvm::MachineMemOperand" = type { %"struct.llvm::MachinePointerInfo", i64, i32, %"class.llvm::MDNode"*, %"class.llvm::MDNode"* }
+%"struct.llvm::MachinePointerInfo" = type { %"class.llvm::Value"*, i64 }
+%"class.llvm::Value" = type { i32 (...)**, i8, i8, i16, %"class.llvm::Type"*, %"class.llvm::Use"*, %"class.llvm::StringMapEntry"* }
+%"class.llvm::Type" = type { %"class.llvm::LLVMContext"*, i32, i32, %"class.llvm::Type"** }
+%"class.llvm::LLVMContext" = type { %"class.llvm::LLVMContextImpl"* }
+%"class.llvm::LLVMContextImpl" = type opaque
+%"class.llvm::Use" = type { %"class.llvm::Value"*, %"class.llvm::Use"*, %"class.llvm::PointerIntPair" }
+%"class.llvm::PointerIntPair" = type { i64 }
+%"class.llvm::StringMapEntry" = type opaque
+%"class.llvm::MDNode" = type { %"class.llvm::Value", %"class.llvm::FoldingSetImpl::Node", i32, i32 }
+%"class.llvm::FoldingSetImpl::Node" = type { i8* }
+%"class.llvm::MachineInstr" = type { %"class.llvm::ilist_node", %"class.llvm::MCInstrDesc"*, %"class.llvm::MachineBasicBlock"*, %"class.llvm::MachineOperand"*, i32, %"class.llvm::ArrayRecycler<llvm::MachineOperand, 8>::Capacity", i8, i8, i8, %"class.llvm::MachineMemOperand"**, %"class.llvm::DebugLoc" }
+%"class.llvm::ilist_node" = type { %"class.llvm::ilist_half_node", %"class.llvm::MachineInstr"* }
+%"class.llvm::ilist_half_node" = type { %"class.llvm::MachineInstr"* }
+%"class.llvm::MCInstrDesc" = type { i16, i16, i16, i16, i16, i32, i64, i16*, i16*, %"class.llvm::MCOperandInfo"* }
+%"class.llvm::MCOperandInfo" = type { i16, i8, i8, i32 }
+%"class.llvm::MachineBasicBlock" = type { %"class.llvm::ilist_node.0", %"struct.llvm::ilist", %"class.llvm::BasicBlock"*, i32, %"class.llvm::MachineFunction"*, %"class.std::vector.163", %"class.std::vector.163", %"class.std::vector.123", %"class.std::vector.123", i32, i8, i8 }
+%"class.llvm::ilist_node.0" = type { %"class.llvm::ilist_half_node.1", %"class.llvm::MachineBasicBlock"* }
+%"class.llvm::ilist_half_node.1" = type { %"class.llvm::MachineBasicBlock"* }
+%"struct.llvm::ilist" = type { %"class.llvm::iplist" }
+%"class.llvm::iplist" = type { %"struct.llvm::ilist_traits", %"class.llvm::MachineInstr"* }
+%"struct.llvm::ilist_traits" = type { %"class.llvm::ilist_half_node", %"class.llvm::MachineBasicBlock"* }
+%"class.llvm::BasicBlock" = type { %"class.llvm::Value", %"class.llvm::ilist_node.2", %"class.llvm::iplist.4", %"class.llvm::Function"* }
+%"class.llvm::ilist_node.2" = type { %"class.llvm::ilist_half_node.3", %"class.llvm::BasicBlock"* }
+%"class.llvm::ilist_half_node.3" = type { %"class.llvm::BasicBlock"* }
+%"class.llvm::iplist.4" = type { %"struct.llvm::ilist_traits.5", %"class.llvm::Instruction"* }
+%"struct.llvm::ilist_traits.5" = type { %"class.llvm::ilist_half_node.10" }
+%"class.llvm::ilist_half_node.10" = type { %"class.llvm::Instruction"* }
+%"class.llvm::Instruction" = type { %"class.llvm::User", %"class.llvm::ilist_node.193", %"class.llvm::BasicBlock"*, %"class.llvm::DebugLoc" }
+%"class.llvm::User" = type { %"class.llvm::Value", %"class.llvm::Use"*, i32 }
+%"class.llvm::ilist_node.193" = type { %"class.llvm::ilist_half_node.10", %"class.llvm::Instruction"* }
+%"class.llvm::DebugLoc" = type { i32, i32 }
+%"class.llvm::Function" = type { %"class.llvm::GlobalValue", %"class.llvm::ilist_node.27", %"class.llvm::iplist.47", %"class.llvm::iplist.54", %"class.llvm::ValueSymbolTable"*, %"class.llvm::AttributeSet" }
+%"class.llvm::GlobalValue" = type { [52 x i8], [4 x i8], %"class.llvm::Module"*, %"class.std::basic_string" }
+%"class.llvm::Module" = type { %"class.llvm::LLVMContext"*, %"class.llvm::iplist.11", %"class.llvm::iplist.20", %"class.llvm::iplist.29", %"struct.llvm::ilist.38", %"class.std::basic_string", %"class.llvm::ValueSymbolTable"*, %"class.llvm::OwningPtr", %"class.std::basic_string", %"class.std::basic_string", %"class.std::basic_string", i8* }
+%"class.llvm::iplist.11" = type { %"struct.llvm::ilist_traits.12", %"class.llvm::GlobalVariable"* }
+%"struct.llvm::ilist_traits.12" = type { %"class.llvm::ilist_node.18" }
+%"class.llvm::ilist_node.18" = type { %"class.llvm::ilist_half_node.19", %"class.llvm::GlobalVariable"* }
+%"class.llvm::ilist_half_node.19" = type { %"class.llvm::GlobalVariable"* }
+%"class.llvm::GlobalVariable" = type { %"class.llvm::GlobalValue", %"class.llvm::ilist_node.18", i8 }
+%"class.llvm::iplist.20" = type { %"struct.llvm::ilist_traits.21", %"class.llvm::Function"* }
+%"struct.llvm::ilist_traits.21" = type { %"class.llvm::ilist_node.27" }
+%"class.llvm::ilist_node.27" = type { %"class.llvm::ilist_half_node.28", %"class.llvm::Function"* }
+%"class.llvm::ilist_half_node.28" = type { %"class.llvm::Function"* }
+%"class.llvm::iplist.29" = type { %"struct.llvm::ilist_traits.30", %"class.llvm::GlobalAlias"* }
+%"struct.llvm::ilist_traits.30" = type { %"class.llvm::ilist_node.36" }
+%"class.llvm::ilist_node.36" = type { %"class.llvm::ilist_half_node.37", %"class.llvm::GlobalAlias"* }
+%"class.llvm::ilist_half_node.37" = type { %"class.llvm::GlobalAlias"* }
+%"class.llvm::GlobalAlias" = type { %"class.llvm::GlobalValue", %"class.llvm::ilist_node.36" }
+%"struct.llvm::ilist.38" = type { %"class.llvm::iplist.39" }
+%"class.llvm::iplist.39" = type { %"struct.llvm::ilist_traits.40", %"class.llvm::NamedMDNode"* }
+%"struct.llvm::ilist_traits.40" = type { %"class.llvm::ilist_node.45" }
+%"class.llvm::ilist_node.45" = type { %"class.llvm::ilist_half_node.46", %"class.llvm::NamedMDNode"* }
+%"class.llvm::ilist_half_node.46" = type { %"class.llvm::NamedMDNode"* }
+%"class.llvm::NamedMDNode" = type { %"class.llvm::ilist_node.45", %"class.std::basic_string", %"class.llvm::Module"*, i8* }
+%"class.std::basic_string" = type { %"struct.std::basic_string<char, std::char_traits<char>, std::allocator<char> >::_Alloc_hider" }
+%"struct.std::basic_string<char, std::char_traits<char>, std::allocator<char> >::_Alloc_hider" = type { i8* }
+%"class.llvm::ValueSymbolTable" = type opaque
+%"class.llvm::OwningPtr" = type { %"class.llvm::GVMaterializer"* }
+%"class.llvm::GVMaterializer" = type opaque
+%"class.llvm::iplist.47" = type { %"struct.llvm::ilist_traits.48", %"class.llvm::BasicBlock"* }
+%"struct.llvm::ilist_traits.48" = type { %"class.llvm::ilist_half_node.3" }
+%"class.llvm::iplist.54" = type { %"struct.llvm::ilist_traits.55", %"class.llvm::Argument"* }
+%"struct.llvm::ilist_traits.55" = type { %"class.llvm::ilist_half_node.61" }
+%"class.llvm::ilist_half_node.61" = type { %"class.llvm::Argument"* }
+%"class.llvm::Argument" = type { %"class.llvm::Value", %"class.llvm::ilist_node.192", %"class.llvm::Function"* }
+%"class.llvm::ilist_node.192" = type { %"class.llvm::ilist_half_node.61", %"class.llvm::Argument"* }
+%"class.llvm::AttributeSet" = type { %"class.llvm::AttributeSetImpl"* }
+%"class.llvm::AttributeSetImpl" = type opaque
+%"class.llvm::MachineFunction" = type { %"class.llvm::Function"*, %"class.llvm::TargetMachine"*, %"class.llvm::MCContext"*, %"class.llvm::MachineModuleInfo"*, %"class.llvm::GCModuleInfo"*, %"class.llvm::MachineRegisterInfo"*, %"struct.llvm::MachineFunctionInfo"*, %"class.llvm::MachineFrameInfo"*, %"class.llvm::MachineConstantPool"*, %"class.llvm::MachineJumpTableInfo"*, %"class.std::vector.163", %"class.llvm::BumpPtrAllocator", %"class.llvm::Recycler", %"class.llvm::ArrayRecycler", %"class.llvm::Recycler.180", %"struct.llvm::ilist.181", i32, i32, i8 }
+%"class.llvm::TargetMachine" = type { i32 (...)**, %"class.llvm::Target"*, %"class.std::basic_string", %"class.std::basic_string", %"class.std::basic_string", %"class.llvm::MCCodeGenInfo"*, %"class.llvm::MCAsmInfo"*, i8, %"class.llvm::TargetOptions" }
+%"class.llvm::Target" = type opaque
+%"class.llvm::MCCodeGenInfo" = type opaque
+%"class.llvm::MCAsmInfo" = type opaque
+%"class.llvm::TargetOptions" = type { [2 x i8], i32, i8, i32, i8, %"class.std::basic_string", i32, i32 }
+%"class.llvm::MCContext" = type { %"class.llvm::SourceMgr"*, %"class.llvm::MCAsmInfo"*, %"class.llvm::MCRegisterInfo"*, %"class.llvm::MCObjectFileInfo"*, %"class.llvm::BumpPtrAllocator", %"class.llvm::StringMap", %"class.llvm::StringMap.62", i32, %"class.llvm::DenseMap.63", i8*, %"class.llvm::raw_ostream"*, i8, %"class.std::basic_string", %"class.std::basic_string", %"class.std::vector", %"class.std::vector.70", %"class.llvm::MCDwarfLoc", i8, i8, i32, %"class.llvm::MCSection"*, %"class.llvm::MCSymbol"*, %"class.llvm::MCSymbol"*, %"class.std::vector.75", %"class.llvm::StringRef", %"class.llvm::StringRef", i8, %"class.llvm::DenseMap.80", %"class.std::vector.84", i8*, i8*, i8*, i8 }
+%"class.llvm::SourceMgr" = type opaque
+%"class.llvm::MCRegisterInfo" = type { %"struct.llvm::MCRegisterDesc"*, i32, i32, i32, %"class.llvm::MCRegisterClass"*, i32, i32, [2 x i16]*, i16*, i8*, i16*, i32, i16*, i32, i32, i32, i32, %"struct.llvm::MCRegisterInfo::DwarfLLVMRegPair"*, %"struct.llvm::MCRegisterInfo::DwarfLLVMRegPair"*, %"struct.llvm::MCRegisterInfo::DwarfLLVMRegPair"*, %"struct.llvm::MCRegisterInfo::DwarfLLVMRegPair"*, %"class.llvm::DenseMap" }
+%"struct.llvm::MCRegisterDesc" = type { i32, i32, i32, i32, i32, i32 }
+%"class.llvm::MCRegisterClass" = type { i8*, i16*, i8*, i16, i16, i16, i16, i16, i8, i8 }
+%"struct.llvm::MCRegisterInfo::DwarfLLVMRegPair" = type { i32, i32 }
+%"class.llvm::DenseMap" = type { %"struct.std::pair"*, i32, i32, i32 }
+%"struct.std::pair" = type { i32, i32 }
+%"class.llvm::MCObjectFileInfo" = type opaque
+%"class.llvm::BumpPtrAllocator" = type { i64, i64, %"class.llvm::SlabAllocator"*, %"class.llvm::MemSlab"*, i8*, i8*, i64 }
+%"class.llvm::SlabAllocator" = type { i32 (...)** }
+%"class.llvm::MemSlab" = type { i64, %"class.llvm::MemSlab"* }
+%"class.llvm::StringMap" = type { %"class.llvm::StringMapImpl", %"class.llvm::BumpPtrAllocator"* }
+%"class.llvm::StringMapImpl" = type { %"class.llvm::StringMapEntryBase"**, i32, i32, i32, i32 }
+%"class.llvm::StringMapEntryBase" = type { i32 }
+%"class.llvm::StringMap.62" = type { %"class.llvm::StringMapImpl", %"class.llvm::BumpPtrAllocator"* }
+%"class.llvm::DenseMap.63" = type { %"struct.std::pair.66"*, i32, i32, i32 }
+%"struct.std::pair.66" = type opaque
+%"class.llvm::raw_ostream" = type { i32 (...)**, i8*, i8*, i8*, i32 }
+%"class.std::vector" = type { %"struct.std::_Vector_base" }
+%"struct.std::_Vector_base" = type { %"struct.std::_Vector_base<llvm::MCDwarfFile *, std::allocator<llvm::MCDwarfFile *> >::_Vector_impl" }
+%"struct.std::_Vector_base<llvm::MCDwarfFile *, std::allocator<llvm::MCDwarfFile *> >::_Vector_impl" = type { %"class.llvm::MCDwarfFile"**, %"class.llvm::MCDwarfFile"**, %"class.llvm::MCDwarfFile"** }
+%"class.llvm::MCDwarfFile" = type { %"class.llvm::StringRef", i32 }
+%"class.llvm::StringRef" = type { i8*, i64 }
+%"class.std::vector.70" = type { %"struct.std::_Vector_base.71" }
+%"struct.std::_Vector_base.71" = type { %"struct.std::_Vector_base<llvm::StringRef, std::allocator<llvm::StringRef> >::_Vector_impl" }
+%"struct.std::_Vector_base<llvm::StringRef, std::allocator<llvm::StringRef> >::_Vector_impl" = type { %"class.llvm::StringRef"*, %"class.llvm::StringRef"*, %"class.llvm::StringRef"* }
+%"class.llvm::MCDwarfLoc" = type { i32, i32, i32, i32, i32, i32 }
+%"class.llvm::MCSection" = type opaque
+%"class.llvm::MCSymbol" = type { %"class.llvm::StringRef", %"class.llvm::MCSection"*, %"class.llvm::MCExpr"*, i8 }
+%"class.llvm::MCExpr" = type opaque
+%"class.std::vector.75" = type { %"struct.std::_Vector_base.76" }
+%"struct.std::_Vector_base.76" = type { %"struct.std::_Vector_base<const llvm::MCGenDwarfLabelEntry *, std::allocator<const llvm::MCGenDwarfLabelEntry *> >::_Vector_impl" }
+%"struct.std::_Vector_base<const llvm::MCGenDwarfLabelEntry *, std::allocator<const llvm::MCGenDwarfLabelEntry *> >::_Vector_impl" = type { %"class.llvm::MCGenDwarfLabelEntry"**, %"class.llvm::MCGenDwarfLabelEntry"**, %"class.llvm::MCGenDwarfLabelEntry"** }
+%"class.llvm::MCGenDwarfLabelEntry" = type { %"class.llvm::StringRef", i32, i32, %"class.llvm::MCSymbol"* }
+%"class.llvm::DenseMap.80" = type { %"struct.std::pair.83"*, i32, i32, i32 }
+%"struct.std::pair.83" = type { %"class.llvm::MCSection"*, %"class.llvm::MCLineSection"* }
+%"class.llvm::MCLineSection" = type { %"class.std::vector.215" }
+%"class.std::vector.215" = type { %"struct.std::_Vector_base.216" }
+%"struct.std::_Vector_base.216" = type { %"struct.std::_Vector_base<llvm::MCLineEntry, std::allocator<llvm::MCLineEntry> >::_Vector_impl" }
+%"struct.std::_Vector_base<llvm::MCLineEntry, std::allocator<llvm::MCLineEntry> >::_Vector_impl" = type { %"class.llvm::MCLineEntry"*, %"class.llvm::MCLineEntry"*, %"class.llvm::MCLineEntry"* }
+%"class.llvm::MCLineEntry" = type { %"class.llvm::MCDwarfLoc", %"class.llvm::MCSymbol"* }
+%"class.std::vector.84" = type { %"struct.std::_Vector_base.85" }
+%"struct.std::_Vector_base.85" = type { %"struct.std::_Vector_base<const llvm::MCSection *, std::allocator<const llvm::MCSection *> >::_Vector_impl" }
+%"struct.std::_Vector_base<const llvm::MCSection *, std::allocator<const llvm::MCSection *> >::_Vector_impl" = type { %"class.llvm::MCSection"**, %"class.llvm::MCSection"**, %"class.llvm::MCSection"** }
+%"class.llvm::MachineModuleInfo" = type { %"class.llvm::ImmutablePass", %"class.llvm::MCContext", %"class.llvm::Module"*, %"class.llvm::MachineModuleInfoImpl"*, %"class.std::vector.95", i32, %"class.std::vector.100", %"class.llvm::DenseMap.110", %"class.llvm::DenseMap.114", i32, %"class.std::vector.118", %"class.std::vector.123", %"class.std::vector.123", %"class.std::vector.128", %"class.llvm::SmallPtrSet", %"class.llvm::MMIAddrLabelMap"*, i8, i8, i8, i8, %"class.llvm::SmallVector.133" }
+%"class.llvm::ImmutablePass" = type { %"class.llvm::ModulePass" }
+%"class.llvm::ModulePass" = type { %"class.llvm::Pass" }
+%"class.llvm::Pass" = type { i32 (...)**, %"class.llvm::AnalysisResolver"*, i8*, i32 }
+%"class.llvm::AnalysisResolver" = type { %"class.std::vector.89", %"class.llvm::PMDataManager"* }
+%"class.std::vector.89" = type { %"struct.std::_Vector_base.90" }
+%"struct.std::_Vector_base.90" = type { %"struct.std::_Vector_base<std::pair<const void *, llvm::Pass *>, std::allocator<std::pair<const void *, llvm::Pass *> > >::_Vector_impl" }
+%"struct.std::_Vector_base<std::pair<const void *, llvm::Pass *>, std::allocator<std::pair<const void *, llvm::Pass *> > >::_Vector_impl" = type { %"struct.std::pair.94"*, %"struct.std::pair.94"*, %"struct.std::pair.94"* }
+%"struct.std::pair.94" = type { i8*, %"class.llvm::Pass"* }
+%"class.llvm::PMDataManager" = type opaque
+%"class.llvm::MachineModuleInfoImpl" = type { i32 (...)** }
+%"class.std::vector.95" = type { %"struct.std::_Vector_base.96" }
+%"struct.std::_Vector_base.96" = type { %"struct.std::_Vector_base<llvm::MachineMove, std::allocator<llvm::MachineMove> >::_Vector_impl" }
+%"struct.std::_Vector_base<llvm::MachineMove, std::allocator<llvm::MachineMove> >::_Vector_impl" = type { %"class.llvm::MachineMove"*, %"class.llvm::MachineMove"*, %"class.llvm::MachineMove"* }
+%"class.llvm::MachineMove" = type { %"class.llvm::MCSymbol"*, %"class.llvm::MachineLocation", %"class.llvm::MachineLocation" }
+%"class.llvm::MachineLocation" = type { i8, i32, i32 }
+%"class.std::vector.100" = type { %"struct.std::_Vector_base.101" }
+%"struct.std::_Vector_base.101" = type { %"struct.std::_Vector_base<llvm::LandingPadInfo, std::allocator<llvm::LandingPadInfo> >::_Vector_impl" }
+%"struct.std::_Vector_base<llvm::LandingPadInfo, std::allocator<llvm::LandingPadInfo> >::_Vector_impl" = type { %"struct.llvm::LandingPadInfo"*, %"struct.llvm::LandingPadInfo"*, %"struct.llvm::LandingPadInfo"* }
+%"struct.llvm::LandingPadInfo" = type { %"class.llvm::MachineBasicBlock"*, %"class.llvm::SmallVector", %"class.llvm::SmallVector", %"class.llvm::MCSymbol"*, %"class.llvm::Function"*, %"class.std::vector.105" }
+%"class.llvm::SmallVector" = type { %"class.llvm::SmallVectorImpl", %"struct.llvm::SmallVectorStorage" }
+%"class.llvm::SmallVectorImpl" = type { %"class.llvm::SmallVectorTemplateBase" }
+%"class.llvm::SmallVectorTemplateBase" = type { %"class.llvm::SmallVectorTemplateCommon" }
+%"class.llvm::SmallVectorTemplateCommon" = type { %"class.llvm::SmallVectorBase", %"struct.llvm::AlignedCharArrayUnion" }
+%"class.llvm::SmallVectorBase" = type { i8*, i8*, i8* }
+%"struct.llvm::AlignedCharArrayUnion" = type { %"struct.llvm::AlignedCharArray" }
+%"struct.llvm::AlignedCharArray" = type { [8 x i8] }
+%"struct.llvm::SmallVectorStorage" = type { i8 }
+%"class.std::vector.105" = type { %"struct.std::_Vector_base.106" }
+%"struct.std::_Vector_base.106" = type { %"struct.std::_Vector_base<int, std::allocator<int> >::_Vector_impl" }
+%"struct.std::_Vector_base<int, std::allocator<int> >::_Vector_impl" = type { i32*, i32*, i32* }
+%"class.llvm::DenseMap.110" = type { %"struct.std::pair.113"*, i32, i32, i32 }
+%"struct.std::pair.113" = type { %"class.llvm::MCSymbol"*, %"class.llvm::SmallVector.206" }
+%"class.llvm::SmallVector.206" = type { [28 x i8], %"struct.llvm::SmallVectorStorage.207" }
+%"struct.llvm::SmallVectorStorage.207" = type { [3 x %"struct.llvm::AlignedCharArrayUnion.198"] }
+%"struct.llvm::AlignedCharArrayUnion.198" = type { %"struct.llvm::AlignedCharArray.199" }
+%"struct.llvm::AlignedCharArray.199" = type { [4 x i8] }
+%"class.llvm::DenseMap.114" = type { %"struct.std::pair.117"*, i32, i32, i32 }
+%"struct.std::pair.117" = type { %"class.llvm::MCSymbol"*, i32 }
+%"class.std::vector.118" = type { %"struct.std::_Vector_base.119" }
+%"struct.std::_Vector_base.119" = type { %"struct.std::_Vector_base<const llvm::GlobalVariable *, std::allocator<const llvm::GlobalVariable *> >::_Vector_impl" }
+%"struct.std::_Vector_base<const llvm::GlobalVariable *, std::allocator<const llvm::GlobalVariable *> >::_Vector_impl" = type { %"class.llvm::GlobalVariable"**, %"class.llvm::GlobalVariable"**, %"class.llvm::GlobalVariable"** }
+%"class.std::vector.123" = type { %"struct.std::_Vector_base.124" }
+%"struct.std::_Vector_base.124" = type { %"struct.std::_Vector_base<unsigned int, std::allocator<unsigned int> >::_Vector_impl" }
+%"struct.std::_Vector_base<unsigned int, std::allocator<unsigned int> >::_Vector_impl" = type { i32*, i32*, i32* }
+%"class.std::vector.128" = type { %"struct.std::_Vector_base.129" }
+%"struct.std::_Vector_base.129" = type { %"struct.std::_Vector_base<const llvm::Function *, std::allocator<const llvm::Function *> >::_Vector_impl" }
+%"struct.std::_Vector_base<const llvm::Function *, std::allocator<const llvm::Function *> >::_Vector_impl" = type { %"class.llvm::Function"**, %"class.llvm::Function"**, %"class.llvm::Function"** }
+%"class.llvm::SmallPtrSet" = type { %"class.llvm::SmallPtrSetImpl", [33 x i8*] }
+%"class.llvm::SmallPtrSetImpl" = type { i8**, i8**, i32, i32, i32 }
+%"class.llvm::MMIAddrLabelMap" = type opaque
+%"class.llvm::SmallVector.133" = type { %"class.llvm::SmallVectorImpl.134", %"struct.llvm::SmallVectorStorage.139" }
+%"class.llvm::SmallVectorImpl.134" = type { %"class.llvm::SmallVectorTemplateBase.135" }
+%"class.llvm::SmallVectorTemplateBase.135" = type { %"class.llvm::SmallVectorTemplateCommon.136" }
+%"class.llvm::SmallVectorTemplateCommon.136" = type { %"class.llvm::SmallVectorBase", %"struct.llvm::AlignedCharArrayUnion.137" }
+%"struct.llvm::AlignedCharArrayUnion.137" = type { %"struct.llvm::AlignedCharArray.138" }
+%"struct.llvm::AlignedCharArray.138" = type { [40 x i8] }
+%"struct.llvm::SmallVectorStorage.139" = type { [3 x %"struct.llvm::AlignedCharArrayUnion.137"] }
+%"class.llvm::GCModuleInfo" = type opaque
+%"class.llvm::MachineRegisterInfo" = type { %"class.llvm::TargetRegisterInfo"*, i8, i8, %"class.llvm::IndexedMap", %"class.llvm::IndexedMap.146", %"class.llvm::MachineOperand"**, %"class.llvm::BitVector", %"class.llvm::BitVector", %"class.llvm::BitVector", %"class.std::vector.147", %"class.std::vector.123" }
+%"class.llvm::TargetRegisterInfo" = type { i32 (...)**, %"class.llvm::MCRegisterInfo", %"struct.llvm::TargetRegisterInfoDesc"*, i8**, i32*, %"class.llvm::TargetRegisterClass"**, %"class.llvm::TargetRegisterClass"** }
+%"struct.llvm::TargetRegisterInfoDesc" = type { i32, i8 }
+%"class.llvm::TargetRegisterClass" = type { %"class.llvm::MCRegisterClass"*, i32*, i32*, i16*, %"class.llvm::TargetRegisterClass"**, void (%"class.llvm::ArrayRef"*, %"class.llvm::MachineFunction"*)* }
+%"class.llvm::ArrayRef" = type { i16*, i64 }
+%"class.llvm::IndexedMap" = type { %"class.std::vector.140", %"struct.std::pair.145", %"struct.llvm::VirtReg2IndexFunctor" }
+%"class.std::vector.140" = type { %"struct.std::_Vector_base.141" }
+%"struct.std::_Vector_base.141" = type { %"struct.std::_Vector_base<std::pair<const llvm::TargetRegisterClass *, llvm::MachineOperand *>, std::allocator<std::pair<const llvm::TargetRegisterClass *, llvm::MachineOperand *> > >::_Vector_impl" }
+%"struct.std::_Vector_base<std::pair<const llvm::TargetRegisterClass *, llvm::MachineOperand *>, std::allocator<std::pair<const llvm::TargetRegisterClass *, llvm::MachineOperand *> > >::_Vector_impl" = type { %"struct.std::pair.145"*, %"struct.std::pair.145"*, %"struct.std::pair.145"* }
+%"struct.std::pair.145" = type { %"class.llvm::TargetRegisterClass"*, %"class.llvm::MachineOperand"* }
+%"class.llvm::MachineOperand" = type { i8, [3 x i8], %union.anon, %"class.llvm::MachineInstr"*, %union.anon.188 }
+%union.anon = type { i32 }
+%union.anon.188 = type { %struct.anon }
+%struct.anon = type { %"class.llvm::MachineOperand"*, %"class.llvm::MachineOperand"* }
+%"struct.llvm::VirtReg2IndexFunctor" = type { i8 }
+%"class.llvm::IndexedMap.146" = type { %"class.std::vector.147", %"struct.std::pair.152", %"struct.llvm::VirtReg2IndexFunctor" }
+%"class.std::vector.147" = type { %"struct.std::_Vector_base.148" }
+%"struct.std::_Vector_base.148" = type { %"struct.std::_Vector_base<std::pair<unsigned int, unsigned int>, std::allocator<std::pair<unsigned int, unsigned int> > >::_Vector_impl" }
+%"struct.std::_Vector_base<std::pair<unsigned int, unsigned int>, std::allocator<std::pair<unsigned int, unsigned int> > >::_Vector_impl" = type { %"struct.std::pair.152"*, %"struct.std::pair.152"*, %"struct.std::pair.152"* }
+%"struct.std::pair.152" = type { i32, i32 }
+%"class.llvm::BitVector" = type { i64*, i32, i32 }
+%"struct.llvm::MachineFunctionInfo" = type { i32 (...)** }
+%"class.llvm::MachineFrameInfo" = type opaque
+%"class.llvm::MachineConstantPool" = type { %"class.llvm::DataLayout"*, i32, %"class.std::vector.153", %"class.llvm::DenseSet" }
+%"class.llvm::DataLayout" = type opaque
+%"class.std::vector.153" = type { %"struct.std::_Vector_base.154" }
+%"struct.std::_Vector_base.154" = type { %"struct.std::_Vector_base<llvm::MachineConstantPoolEntry, std::allocator<llvm::MachineConstantPoolEntry> >::_Vector_impl" }
+%"struct.std::_Vector_base<llvm::MachineConstantPoolEntry, std::allocator<llvm::MachineConstantPoolEntry> >::_Vector_impl" = type { %"class.llvm::MachineConstantPoolEntry"*, %"class.llvm::MachineConstantPoolEntry"*, %"class.llvm::MachineConstantPoolEntry"* }
+%"class.llvm::MachineConstantPoolEntry" = type { %union.anon.158, i32 }
+%union.anon.158 = type { %"class.llvm::Constant"* }
+%"class.llvm::Constant" = type { %"class.llvm::User" }
+%"class.llvm::DenseSet" = type { %"class.llvm::DenseMap.159" }
+%"class.llvm::DenseMap.159" = type { %"struct.std::pair.162"*, i32, i32, i32 }
+%"struct.std::pair.162" = type { %"class.llvm::MachineConstantPoolValue"*, i8 }
+%"class.llvm::MachineConstantPoolValue" = type { i32 (...)**, %"class.llvm::Type"* }
+%"class.llvm::MachineJumpTableInfo" = type opaque
+%"class.std::vector.163" = type { %"struct.std::_Vector_base.164" }
+%"struct.std::_Vector_base.164" = type { %"struct.std::_Vector_base<llvm::MachineBasicBlock *, std::allocator<llvm::MachineBasicBlock *> >::_Vector_impl" }
+%"struct.std::_Vector_base<llvm::MachineBasicBlock *, std::allocator<llvm::MachineBasicBlock *> >::_Vector_impl" = type { %"class.llvm::MachineBasicBlock"**, %"class.llvm::MachineBasicBlock"**, %"class.llvm::MachineBasicBlock"** }
+%"class.llvm::Recycler" = type { %"class.llvm::iplist.168" }
+%"class.llvm::iplist.168" = type { %"struct.llvm::ilist_traits.169", %"struct.llvm::RecyclerStruct"* }
+%"struct.llvm::ilist_traits.169" = type { %"struct.llvm::RecyclerStruct" }
+%"struct.llvm::RecyclerStruct" = type { %"struct.llvm::RecyclerStruct"*, %"struct.llvm::RecyclerStruct"* }
+%"class.llvm::ArrayRecycler" = type { %"class.llvm::SmallVector.174" }
+%"class.llvm::SmallVector.174" = type { %"class.llvm::SmallVectorImpl.175", %"struct.llvm::SmallVectorStorage.179" }
+%"class.llvm::SmallVectorImpl.175" = type { %"class.llvm::SmallVectorTemplateBase.176" }
+%"class.llvm::SmallVectorTemplateBase.176" = type { %"class.llvm::SmallVectorTemplateCommon.177" }
+%"class.llvm::SmallVectorTemplateCommon.177" = type { %"class.llvm::SmallVectorBase", %"struct.llvm::AlignedCharArrayUnion.178" }
+%"struct.llvm::AlignedCharArrayUnion.178" = type { %"struct.llvm::AlignedCharArray" }
+%"struct.llvm::SmallVectorStorage.179" = type { [7 x %"struct.llvm::AlignedCharArrayUnion.178"] }
+%"class.llvm::Recycler.180" = type { %"class.llvm::iplist.168" }
+%"struct.llvm::ilist.181" = type { %"class.llvm::iplist.182" }
+%"class.llvm::iplist.182" = type { %"struct.llvm::ilist_traits.183", %"class.llvm::MachineBasicBlock"* }
+%"struct.llvm::ilist_traits.183" = type { %"class.llvm::ilist_half_node.1" }
+%"class.llvm::ArrayRecycler<llvm::MachineOperand, 8>::Capacity" = type { i8 }
+%"class.llvm::ConstantInt" = type { %"class.llvm::Constant", %"class.llvm::APInt" }
+%"class.llvm::APInt" = type { i32, %union.anon.189 }
+%union.anon.189 = type { i64 }
+%"class.llvm::ConstantFP" = type { %"class.llvm::Constant", %"class.llvm::APFloat" }
+%"class.llvm::APFloat" = type { %"struct.llvm::fltSemantics"*, %"union.llvm::APFloat::Significand", i16, i8 }
+%"struct.llvm::fltSemantics" = type opaque
+%"union.llvm::APFloat::Significand" = type { i64 }
+%"class.llvm::BlockAddress" = type { %"class.llvm::Constant" }
+%"class.llvm::hash_code" = type { i64 }
+%"struct.llvm::hashing::detail::hash_combine_recursive_helper" = type { [64 x i8], %"struct.llvm::hashing::detail::hash_state", i64 }
+%"struct.llvm::hashing::detail::hash_state" = type { i64, i64, i64, i64, i64, i64, i64, i64 }
+%"class.llvm::PrintReg" = type { %"class.llvm::TargetRegisterInfo"*, i32, i32 }
+%"class.llvm::PseudoSourceValue" = type { %"class.llvm::Value" }
+%"class.llvm::FoldingSetNodeID" = type { %"class.llvm::SmallVector.194" }
+%"class.llvm::SmallVector.194" = type { [28 x i8], %"struct.llvm::SmallVectorStorage.200" }
+%"struct.llvm::SmallVectorStorage.200" = type { [31 x %"struct.llvm::AlignedCharArrayUnion.198"] }
+%"struct.llvm::ArrayRecycler<llvm::MachineOperand, 8>::FreeList" = type { %"struct.llvm::ArrayRecycler<llvm::MachineOperand, 8>::FreeList"* }
+%"class.llvm::ilist_iterator.202" = type { %"class.llvm::MachineInstr"* }
+%"class.llvm::TargetInstrInfo" = type { i32 (...)**, [28 x i8], i32, i32 }
+%"struct.std::pair.203" = type { i8, i8 }
+%"class.llvm::SmallVectorImpl.195" = type { %"class.llvm::SmallVectorTemplateBase.196" }
+%"class.llvm::SmallVectorTemplateBase.196" = type { %"class.llvm::SmallVectorTemplateCommon.197" }
+%"class.llvm::SmallVectorTemplateCommon.197" = type { %"class.llvm::SmallVectorBase", %"struct.llvm::AlignedCharArrayUnion.198" }
+%"class.llvm::AliasAnalysis" = type { i32 (...)**, %"class.llvm::DataLayout"*, %"class.llvm::TargetLibraryInfo"*, %"class.llvm::AliasAnalysis"* }
+%"class.llvm::TargetLibraryInfo" = type opaque
+%"struct.llvm::AliasAnalysis::Location" = type { %"class.llvm::Value"*, i64, %"class.llvm::MDNode"* }
+%"class.llvm::DIVariable" = type { %"class.llvm::DIDescriptor" }
+%"class.llvm::DIDescriptor" = type { %"class.llvm::MDNode"* }
+%"class.llvm::DIScope" = type { %"class.llvm::DIDescriptor" }
+%"class.llvm::ArrayRef.208" = type { i32*, i64 }
+%"class.llvm::SmallVector.209" = type { %"class.llvm::SmallVectorImpl.210", %"struct.llvm::SmallVectorStorage.214" }
+%"class.llvm::SmallVectorImpl.210" = type { %"class.llvm::SmallVectorTemplateBase.211" }
+%"class.llvm::SmallVectorTemplateBase.211" = type { %"class.llvm::SmallVectorTemplateCommon.212" }
+%"class.llvm::SmallVectorTemplateCommon.212" = type { %"class.llvm::SmallVectorBase", %"struct.llvm::AlignedCharArrayUnion.213" }
+%"struct.llvm::AlignedCharArrayUnion.213" = type { %"struct.llvm::AlignedCharArray" }
+%"struct.llvm::SmallVectorStorage.214" = type { [7 x %"struct.llvm::AlignedCharArrayUnion.213"] }
+%"class.llvm::Twine" = type { %"union.llvm::Twine::Child", %"union.llvm::Twine::Child", i8, i8 }
+%"union.llvm::Twine::Child" = type { %"class.llvm::Twine"* }
+%"struct.std::random_access_iterator_tag" = type { i8 }
+
+declare void @_ZN4llvm19MachineRegisterInfo27removeRegOperandFromUseListEPNS_14MachineOperandE(%"class.llvm::MachineRegisterInfo"*, %"class.llvm::MachineOperand"*)
+
+declare void @_ZN4llvm19MachineRegisterInfo22addRegOperandToUseListEPNS_14MachineOperandE(%"class.llvm::MachineRegisterInfo"*, %"class.llvm::MachineOperand"*)
+
+declare zeroext i32 @_ZNK4llvm14MCRegisterInfo9getSubRegEjj(%"class.llvm::MCRegisterInfo"*, i32 zeroext, i32 zeroext)
+
+define void @_ZN4llvm14MachineOperand12substPhysRegEjRKNS_18TargetRegisterInfoE(%"class.llvm::MachineOperand"* %this, i32 zeroext %Reg, %"class.llvm::TargetRegisterInfo"* %TRI) align 2 {
+entry:
+  %SubReg_TargetFlags.i = getelementptr inbounds %"class.llvm::MachineOperand"* %this, i64 0, i32 1
+  %0 = bitcast [3 x i8]* %SubReg_TargetFlags.i to i24*
+  %bf.load.i = load i24* %0, align 1
+  %bf.lshr.i = lshr i24 %bf.load.i, 12
+  %tobool = icmp eq i24 %bf.lshr.i, 0
+  br i1 %tobool, label %if.end, label %if.then
+
+if.then:                                          ; preds = %entry
+  %bf.cast.i = zext i24 %bf.lshr.i to i32
+  %add.ptr = getelementptr inbounds %"class.llvm::TargetRegisterInfo"* %TRI, i64 0, i32 1
+  %call3 = tail call zeroext i32 @_ZNK4llvm14MCRegisterInfo9getSubRegEjj(%"class.llvm::MCRegisterInfo"* %add.ptr, i32 zeroext %Reg, i32 zeroext %bf.cast.i)
+  %bf.load.i10 = load i24* %0, align 1
+  %bf.clear.i = and i24 %bf.load.i10, 4095
+  store i24 %bf.clear.i, i24* %0, align 1
+  br label %if.end
+
+if.end:                                           ; preds = %entry, %if.then
+  %Reg.addr.0 = phi i32 [ %call3, %if.then ], [ %Reg, %entry ]
+  %RegNo.i.i = getelementptr inbounds %"class.llvm::MachineOperand"* %this, i64 0, i32 2, i32 0
+  %1 = load i32* %RegNo.i.i, align 4, !tbaa !0
+  %cmp.i = icmp eq i32 %1, %Reg.addr.0
+  br i1 %cmp.i, label %_ZN4llvm14MachineOperand6setRegEj.exit, label %if.end.i
+
+if.end.i:                                         ; preds = %if.end
+  %ParentMI.i.i = getelementptr inbounds %"class.llvm::MachineOperand"* %this, i64 0, i32 3
+  %2 = load %"class.llvm::MachineInstr"** %ParentMI.i.i, align 8, !tbaa !3
+  %tobool.i = icmp eq %"class.llvm::MachineInstr"* %2, null
+  br i1 %tobool.i, label %if.end13.i, label %if.then3.i
+
+if.then3.i:                                       ; preds = %if.end.i
+  %Parent.i.i = getelementptr inbounds %"class.llvm::MachineInstr"* %2, i64 0, i32 2
+  %3 = load %"class.llvm::MachineBasicBlock"** %Parent.i.i, align 8, !tbaa !3
+  %tobool5.i = icmp eq %"class.llvm::MachineBasicBlock"* %3, null
+  br i1 %tobool5.i, label %if.end13.i, label %if.then6.i
+
+if.then6.i:                                       ; preds = %if.then3.i
+  %xParent.i.i = getelementptr inbounds %"class.llvm::MachineBasicBlock"* %3, i64 0, i32 4
+  %4 = load %"class.llvm::MachineFunction"** %xParent.i.i, align 8, !tbaa !3
+  %tobool8.i = icmp eq %"class.llvm::MachineFunction"* %4, null
+  br i1 %tobool8.i, label %if.end13.i, label %if.then9.i
+
+if.then9.i:                                       ; preds = %if.then6.i
+  %RegInfo.i.i = getelementptr inbounds %"class.llvm::MachineFunction"* %4, i64 0, i32 5
+  %5 = load %"class.llvm::MachineRegisterInfo"** %RegInfo.i.i, align 8, !tbaa !3
+  tail call void @_ZN4llvm19MachineRegisterInfo27removeRegOperandFromUseListEPNS_14MachineOperandE(%"class.llvm::MachineRegisterInfo"* %5, %"class.llvm::MachineOperand"* %this)
+  store i32 %Reg.addr.0, i32* %RegNo.i.i, align 4, !tbaa !0
+  tail call void @_ZN4llvm19MachineRegisterInfo22addRegOperandToUseListEPNS_14MachineOperandE(%"class.llvm::MachineRegisterInfo"* %5, %"class.llvm::MachineOperand"* %this)
+  br label %_ZN4llvm14MachineOperand6setRegEj.exit
+
+if.end13.i:                                       ; preds = %if.then6.i, %if.then3.i, %if.end.i
+  store i32 %Reg.addr.0, i32* %RegNo.i.i, align 4, !tbaa !0
+  br label %_ZN4llvm14MachineOperand6setRegEj.exit
+
+_ZN4llvm14MachineOperand6setRegEj.exit:           ; preds = %if.end, %if.then9.i, %if.end13.i
+  ret void
+}
+
+!0 = metadata !{metadata !"int", metadata !1}
+!1 = metadata !{metadata !"omnipotent char", metadata !2}
+!2 = metadata !{metadata !"Simple C/C++ TBAA"}
+!3 = metadata !{metadata !"any pointer", metadata !1}
+!4 = metadata !{metadata !"vtable pointer", metadata !2}
+!5 = metadata !{metadata !"long", metadata !1}
+!6 = metadata !{i64 0, i64 8, metadata !3, i64 8, i64 8, metadata !5}
+!7 = metadata !{metadata !"short", metadata !1}
+!8 = metadata !{i64 0, i64 1, metadata !1, i64 1, i64 4, metadata !0, i64 2, i64 1, metadata !1, i64 3, i64 1, metadata !9, i64 3, i64 1, metadata !9, i64 3, i64 1, metadata !9, i64 3, i64 1, metadata !9, i64 3, i64 1, metadata !9, i64 3, i64 1, metadata !9, i64 3, i64 1, metadata !9, i64 3, i64 1, metadata !9, i64 4, i64 4, metadata !0, i64 4, i64 4, metadata !0, i64 8, i64 8, metadata !3, i64 16, i64 8, metadata !3, i64 16, i64 8, metadata !3, i64 16, i64 8, metadata !3, i64 16, i64 8, metadata !5, i64 16, i64 8, metadata !3, i64 16, i64 8, metadata !3, i64 16, i64 8, metadata !3, i64 16, i64 8, metadata !3, i64 24, i64 8, metadata !3, i64 16, i64 4, metadata !0, i64 16, i64 8, metadata !3, i64 16, i64 8, metadata !3, i64 16, i64 8, metadata !3, i64 24, i64 4, metadata !0}
+!9 = metadata !{metadata !"bool", metadata !1}
+!10 = metadata !{i8 0, i8 2}
+
+; CHECK-NOT: lbzu 3, 1(3)
diff --git a/test/CodeGen/PowerPC/pr15359.ll b/test/CodeGen/PowerPC/pr15359.ll
new file mode 100644
index 000000000000..12fa3e5ffbdd
--- /dev/null
+++ b/test/CodeGen/PowerPC/pr15359.ll
@@ -0,0 +1,20 @@
+; RUN: llc -O0 -mcpu=pwr7 -filetype=obj %s -o - | \
+; RUN: elf-dump --dump-section-data | FileCheck %s
+
+target datalayout = "E-p:64:64:64-S0-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f16:16:16-f32:32:32-f64:64:64-f128:128:128-v64:64:64-v128:128:128-a0:0:64-n32:64"
+target triple = "powerpc64-unknown-linux-gnu"
+
+@nextIdx = external thread_local global i32
+
+define fastcc void @func() nounwind {
+entry:
+  store i32 42, i32* @nextIdx
+  ret void
+}
+
+; Verify that nextIdx has symbol type TLS.
+;
+; CHECK:    '.symtab'
+; CHECK:    'nextIdx'
+; CHECK:    'st_type', 0x6
+
diff --git a/test/CodeGen/PowerPC/pr15630.ll b/test/CodeGen/PowerPC/pr15630.ll
new file mode 100644
index 000000000000..c5ba8a4d4f04
--- /dev/null
+++ b/test/CodeGen/PowerPC/pr15630.ll
@@ -0,0 +1,16 @@
+; RUN: llc -mcpu=pwr7 -O0 < %s | FileCheck %s
+
+target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v128:128:128-n32:64"
+target triple = "powerpc64-unknown-linux-gnu"
+
+define weak_odr void @_D4core6atomic49__T11atomicStoreVE4core6atomic11MemoryOrder3ThThZ11atomicStoreFNaNbKOhhZv(i8* %val_arg, i8 zeroext %newval_arg) {
+entry:
+  %newval = alloca i8
+  %ordering = alloca i32, align 4
+  store i8 %newval_arg, i8* %newval
+  %tmp = load i8* %newval
+  store atomic volatile i8 %tmp, i8* %val_arg seq_cst, align 1
+  ret void
+}
+
+; CHECK: stwcx.
diff --git a/test/CodeGen/PowerPC/pr15632.ll b/test/CodeGen/PowerPC/pr15632.ll
new file mode 100644
index 000000000000..3ea83468b6d7
--- /dev/null
+++ b/test/CodeGen/PowerPC/pr15632.ll
@@ -0,0 +1,15 @@
+; RUN: llc -mcpu=pwr7 -O0 < %s | FileCheck %s
+
+target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v128:128:128-n32:64"
+target triple = "powerpc64-unknown-linux-gnu"
+
+declare void @other(ppc_fp128 %tmp70)
+
+define void @bug() {
+entry:
+  %tmp70 = frem ppc_fp128 0xM00000000000000000000000000000000, undef
+  call void @other(ppc_fp128 %tmp70)
+  unreachable
+}
+
+; CHECK: bl fmodl
diff --git a/test/CodeGen/PowerPC/pwr3-6x.ll b/test/CodeGen/PowerPC/pwr3-6x.ll
new file mode 100644
index 000000000000..a9cfe412fd84
--- /dev/null
+++ b/test/CodeGen/PowerPC/pwr3-6x.ll
@@ -0,0 +1,14 @@
+; Test basic support for some older processors.
+
+;RUN: llc < %s -march=ppc64 -mcpu=pwr3 | FileCheck %s
+;RUN: llc < %s -march=ppc64 -mcpu=pwr4 | FileCheck %s
+;RUN: llc < %s -march=ppc64 -mcpu=pwr5 | FileCheck %s
+;RUN: llc < %s -march=ppc64 -mcpu=pwr5x | FileCheck %s
+;RUN: llc < %s -march=ppc64 -mcpu=pwr6x | FileCheck %s
+
+define void @foo() {
+entry:
+  ret void
+}
+
+; CHECK: @foo
diff --git a/test/CodeGen/PowerPC/quadint-return.ll b/test/CodeGen/PowerPC/quadint-return.ll
new file mode 100644
index 000000000000..03499915e78e
--- /dev/null
+++ b/test/CodeGen/PowerPC/quadint-return.ll
@@ -0,0 +1,19 @@
+; REQUIRES: asserts
+; RUN: llc -O0 -debug -o - < %s 2>&1 | FileCheck %s
+
+target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-v128:128:128-n32:64"
+target triple = "powerpc64-unknown-linux-gnu"
+
+define i128 @foo() nounwind {
+entry:
+  %x = alloca i128, align 16
+  store i128 27, i128* %x, align 16
+  %0 = load i128* %x, align 16
+  ret i128 %0
+}
+
+; CHECK: ********** Function: foo
+; CHECK: ********** FAST REGISTER ALLOCATION **********
+; CHECK: %X3<def> = COPY %vreg
+; CHECK-NEXT: %X4<def> = COPY %vreg
+; CHECK-NEXT: BLR
diff --git a/test/CodeGen/PowerPC/r31.ll b/test/CodeGen/PowerPC/r31.ll
new file mode 100644
index 000000000000..7ce12f600b41
--- /dev/null
+++ b/test/CodeGen/PowerPC/r31.ll
@@ -0,0 +1,15 @@
+; RUN: llc < %s -mtriple=powerpc64-unknown-linux-gnu -mcpu=g4 | FileCheck %s
+target datalayout = "E-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f128:64:128-n32"
+
+define i64 @foo(i64 %a) nounwind {
+entry:
+  call void asm sideeffect "", "~{r0},~{r3},~{r4},~{r5},~{r6},~{r7},~{r8},~{r9},~{r10},~{r11},~{r12},~{r14},~{r15},~{r16},~{r17},~{r18},~{r19},~{r20},~{r21},~{r22},~{r23},~{r24},~{r25},~{r26},~{r27},~{r28},~{r29},~{r30}"() nounwind
+  br label %return
+
+; CHECK: @foo
+; CHECK: mr 31, 3
+
+return:                                           ; preds = %entry
+  ret i64 %a
+}
+
diff --git a/test/CodeGen/PowerPC/recipest.ll b/test/CodeGen/PowerPC/recipest.ll
new file mode 100644
index 000000000000..89705faa46e9
--- /dev/null
+++ b/test/CodeGen/PowerPC/recipest.ll
@@ -0,0 +1,226 @@
+; RUN: llc < %s -mtriple=powerpc64-unknown-linux-gnu -mcpu=pwr7 -enable-unsafe-fp-math | FileCheck %s
+; RUN: llc < %s -mtriple=powerpc64-unknown-linux-gnu -mcpu=pwr7 | FileCheck -check-prefix=CHECK-SAFE %s
+target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-v128:128:128-n32:64"
+target triple = "powerpc64-unknown-linux-gnu"
+
+declare double @llvm.sqrt.f64(double)
+declare float @llvm.sqrt.f32(float)
+declare <4 x float> @llvm.sqrt.v4f32(<4 x float>)
+
+define double @foo(double %a, double %b) nounwind {
+entry:
+  %x = call double @llvm.sqrt.f64(double %b)
+  %r = fdiv double %a, %x
+  ret double %r
+
+; CHECK: @foo
+; CHECK: frsqrte
+; CHECK: fnmsub
+; CHECK: fmul
+; CHECK: fmadd
+; CHECK: fmul
+; CHECK: fmul
+; CHECK: fmadd
+; CHECK: fmul
+; CHECK: fmul
+; CHECK: blr
+
+; CHECK-SAFE: @foo
+; CHECK-SAFE: fsqrt
+; CHECK-SAFE: fdiv
+; CHECK-SAFE: blr
+}
+
+define double @foof(double %a, float %b) nounwind {
+entry:
+  %x = call float @llvm.sqrt.f32(float %b)
+  %y = fpext float %x to double
+  %r = fdiv double %a, %y
+  ret double %r
+
+; CHECK: @foof
+; CHECK: frsqrtes
+; CHECK: fnmsubs
+; CHECK: fmuls
+; CHECK: fmadds
+; CHECK: fmuls
+; CHECK: fmul
+; CHECK: blr
+
+; CHECK-SAFE: @foof
+; CHECK-SAFE: fsqrts
+; CHECK-SAFE: fdiv
+; CHECK-SAFE: blr
+}
+
+define float @food(float %a, double %b) nounwind {
+entry:
+  %x = call double @llvm.sqrt.f64(double %b)
+  %y = fptrunc double %x to float
+  %r = fdiv float %a, %y
+  ret float %r
+
+; CHECK: @foo
+; CHECK: frsqrte
+; CHECK: fnmsub
+; CHECK: fmul
+; CHECK: fmadd
+; CHECK: fmul
+; CHECK: fmul
+; CHECK: fmadd
+; CHECK: fmul
+; CHECK: frsp
+; CHECK: fmuls
+; CHECK: blr
+
+; CHECK-SAFE: @foo
+; CHECK-SAFE: fsqrt
+; CHECK-SAFE: fdivs
+; CHECK-SAFE: blr
+}
+
+define float @goo(float %a, float %b) nounwind {
+entry:
+  %x = call float @llvm.sqrt.f32(float %b)
+  %r = fdiv float %a, %x
+  ret float %r
+
+; CHECK: @goo
+; CHECK: frsqrtes
+; CHECK: fnmsubs
+; CHECK: fmuls
+; CHECK: fmadds
+; CHECK: fmuls
+; CHECK: fmuls
+; CHECK: blr
+
+; CHECK-SAFE: @goo
+; CHECK-SAFE: fsqrts
+; CHECK-SAFE: fdivs
+; CHECK-SAFE: blr
+}
+
+define <4 x float> @hoo(<4 x float> %a, <4 x float> %b) nounwind {
+entry:
+  %x = call <4 x float> @llvm.sqrt.v4f32(<4 x float> %b)
+  %r = fdiv <4 x float> %a, %x
+  ret <4 x float> %r
+
+; CHECK: @hoo
+; CHECK: vrsqrtefp
+
+; CHECK-SAFE: @hoo
+; CHECK-SAFE-NOT: vrsqrtefp
+; CHECK-SAFE: blr
+}
+
+define double @foo2(double %a, double %b) nounwind {
+entry:
+  %r = fdiv double %a, %b
+  ret double %r
+
+; CHECK: @foo2
+; CHECK: fre
+; CHECK: fnmsub
+; CHECK: fmadd
+; CHECK: fnmsub
+; CHECK: fmadd
+; CHECK: fmul
+; CHECK: blr
+
+; CHECK-SAFE: @foo2
+; CHECK-SAFE: fdiv
+; CHECK-SAFE: blr
+}
+
+define float @goo2(float %a, float %b) nounwind {
+entry:
+  %r = fdiv float %a, %b
+  ret float %r
+
+; CHECK: @goo2
+; CHECK: fres
+; CHECK: fnmsubs
+; CHECK: fmadds
+; CHECK: fmuls
+; CHECK: blr
+
+; CHECK-SAFE: @goo2
+; CHECK-SAFE: fdivs
+; CHECK-SAFE: blr
+}
+
+define <4 x float> @hoo2(<4 x float> %a, <4 x float> %b) nounwind {
+entry:
+  %r = fdiv <4 x float> %a, %b
+  ret <4 x float> %r
+
+; CHECK: @hoo2
+; CHECK: vrefp
+
+; CHECK-SAFE: @hoo2
+; CHECK-SAFE-NOT: vrefp
+; CHECK-SAFE: blr
+}
+
+define double @foo3(double %a) nounwind {
+entry:
+  %r = call double @llvm.sqrt.f64(double %a)
+  ret double %r
+
+; CHECK: @foo3
+; CHECK: frsqrte
+; CHECK: fnmsub
+; CHECK: fmul
+; CHECK: fmadd
+; CHECK: fmul
+; CHECK: fmul
+; CHECK: fmadd
+; CHECK: fmul
+; CHECK: fre
+; CHECK: fnmsub
+; CHECK: fmadd
+; CHECK: fnmsub
+; CHECK: fmadd
+; CHECK: blr
+
+; CHECK-SAFE: @foo3
+; CHECK-SAFE: fsqrt
+; CHECK-SAFE: blr
+}
+
+define float @goo3(float %a) nounwind {
+entry:
+  %r = call float @llvm.sqrt.f32(float %a)
+  ret float %r
+
+; CHECK: @goo3
+; CHECK: frsqrtes
+; CHECK: fnmsubs
+; CHECK: fmuls
+; CHECK: fmadds
+; CHECK: fmuls
+; CHECK: fres
+; CHECK: fnmsubs
+; CHECK: fmadds
+; CHECK: blr
+
+; CHECK-SAFE: @goo3
+; CHECK-SAFE: fsqrts
+; CHECK-SAFE: blr
+}
+
+define <4 x float> @hoo3(<4 x float> %a) nounwind {
+entry:
+  %r = call <4 x float> @llvm.sqrt.v4f32(<4 x float> %a)
+  ret <4 x float> %r
+
+; CHECK: @hoo3
+; CHECK: vrsqrtefp
+; CHECK: vrefp
+
+; CHECK-SAFE: @hoo3
+; CHECK-SAFE-NOT: vrsqrtefp
+; CHECK-SAFE: blr
+}
+
diff --git a/test/CodeGen/PowerPC/rlwimi3.ll b/test/CodeGen/PowerPC/rlwimi3.ll
index 7efdbe9634fe..31b6d4aa03bc 100644
--- a/test/CodeGen/PowerPC/rlwimi3.ll
+++ b/test/CodeGen/PowerPC/rlwimi3.ll
@@ -1,3 +1,4 @@
+; REQUIRES: asserts
 ; RUN: llc < %s -march=ppc32 -stats 2>&1 | \
 ; RUN:   grep "Number of machine instrs printed" | grep 12
 
diff --git a/test/CodeGen/PowerPC/rounding-ops.ll b/test/CodeGen/PowerPC/rounding-ops.ll
new file mode 100644
index 000000000000..b210a6bda8bf
--- /dev/null
+++ b/test/CodeGen/PowerPC/rounding-ops.ll
@@ -0,0 +1,145 @@
+; RUN: llc < %s -mtriple=powerpc64-unknown-linux-gnu -mcpu=pwr7 | FileCheck %s
+; RUN: llc < %s -mtriple=powerpc64-unknown-linux-gnu -mcpu=pwr7 -enable-unsafe-fp-math | FileCheck -check-prefix=CHECK-FM %s
+target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-v128:128:128-n32:64"
+target triple = "powerpc64-unknown-linux-gnu"
+
+define float @test1(float %x) nounwind  {
+  %call = tail call float @floorf(float %x) nounwind readnone
+  ret float %call
+
+; CHECK: test1:
+; CHECK: frim 1, 1
+
+; CHECK-FM: test1:
+; CHECK-FM: frim 1, 1
+}
+
+declare float @floorf(float) nounwind readnone
+
+define double @test2(double %x) nounwind  {
+  %call = tail call double @floor(double %x) nounwind readnone
+  ret double %call
+
+; CHECK: test2:
+; CHECK: frim 1, 1
+
+; CHECK-FM: test2:
+; CHECK-FM: frim 1, 1
+}
+
+declare double @floor(double) nounwind readnone
+
+define float @test3(float %x) nounwind  {
+  %call = tail call float @nearbyintf(float %x) nounwind readnone
+  ret float %call
+
+; CHECK: test3:
+; CHECK-NOT: frin
+
+; CHECK-FM: test3:
+; CHECK-FM: frin 1, 1
+}
+
+declare float @nearbyintf(float) nounwind readnone
+
+define double @test4(double %x) nounwind  {
+  %call = tail call double @nearbyint(double %x) nounwind readnone
+  ret double %call
+
+; CHECK: test4:
+; CHECK-NOT: frin
+
+; CHECK-FM: test4:
+; CHECK-FM: frin 1, 1
+}
+
+declare double @nearbyint(double) nounwind readnone
+
+define float @test5(float %x) nounwind  {
+  %call = tail call float @ceilf(float %x) nounwind readnone
+  ret float %call
+
+; CHECK: test5:
+; CHECK: frip 1, 1
+
+; CHECK-FM: test5:
+; CHECK-FM: frip 1, 1
+}
+
+declare float @ceilf(float) nounwind readnone
+
+define double @test6(double %x) nounwind  {
+  %call = tail call double @ceil(double %x) nounwind readnone
+  ret double %call
+
+; CHECK: test6:
+; CHECK: frip 1, 1
+
+; CHECK-FM: test6:
+; CHECK-FM: frip 1, 1
+}
+
+declare double @ceil(double) nounwind readnone
+
+define float @test9(float %x) nounwind  {
+  %call = tail call float @truncf(float %x) nounwind readnone
+  ret float %call
+
+; CHECK: test9:
+; CHECK: friz 1, 1
+
+; CHECK-FM: test9:
+; CHECK-FM: friz 1, 1
+}
+
+declare float @truncf(float) nounwind readnone
+
+define double @test10(double %x) nounwind  {
+  %call = tail call double @trunc(double %x) nounwind readnone
+  ret double %call
+
+; CHECK: test10:
+; CHECK: friz 1, 1
+
+; CHECK-FM: test10:
+; CHECK-FM: friz 1, 1
+}
+
+declare double @trunc(double) nounwind readnone
+
+define float @test11(float %x) nounwind  {
+  %call = tail call float @rintf(float %x) nounwind readnone
+  ret float %call
+
+; CHECK: test11:
+; CHECK-NOT: frin
+
+; CHECK-FM: test11:
+; CHECK-FM: frin [[R2:[0-9]+]], [[R1:[0-9]+]]
+; CHECK-FM: fcmpu [[CR:[0-9]+]], [[R2]], [[R1]]
+; CHECK-FM: beq [[CR]], .LBB[[BB:[0-9]+]]_2
+; CHECK-FM: mtfsb1 6
+; CHECK-FM: .LBB[[BB]]_2:
+; CHECK-FM: blr
+}
+
+declare float @rintf(float) nounwind readnone
+
+define double @test12(double %x) nounwind  {
+  %call = tail call double @rint(double %x) nounwind readnone
+  ret double %call
+
+; CHECK: test12:
+; CHECK-NOT: frin
+
+; CHECK-FM: test12:
+; CHECK-FM: frin [[R2:[0-9]+]], [[R1:[0-9]+]]
+; CHECK-FM: fcmpu [[CR:[0-9]+]], [[R2]], [[R1]]
+; CHECK-FM: beq [[CR]], .LBB[[BB:[0-9]+]]_2
+; CHECK-FM: mtfsb1 6
+; CHECK-FM: .LBB[[BB]]_2:
+; CHECK-FM: blr
+}
+
+declare double @rint(double) nounwind readnone
+
diff --git a/test/CodeGen/PowerPC/s000-alias-misched.ll b/test/CodeGen/PowerPC/s000-alias-misched.ll
new file mode 100644
index 000000000000..d03ee8738eea
--- /dev/null
+++ b/test/CodeGen/PowerPC/s000-alias-misched.ll
@@ -0,0 +1,101 @@
+target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-v128:128:128-n32:64"
+target triple = "powerpc64-bgq-linux"
+; RUN: llc < %s -enable-misched -march=ppc64 -mcpu=a2 | FileCheck %s
+; RUN: llc < %s -enable-misched -enable-aa-sched-mi -march=ppc64 -mcpu=a2 | FileCheck %s
+
+@aa = external global [256 x [256 x double]], align 32
+@bb = external global [256 x [256 x double]], align 32
+@cc = external global [256 x [256 x double]], align 32
+@.str1 = external hidden unnamed_addr constant [6 x i8], align 1
+@X = external global [16000 x double], align 32
+@Y = external global [16000 x double], align 32
+@Z = external global [16000 x double], align 32
+@U = external global [16000 x double], align 32
+@V = external global [16000 x double], align 32
+@.str137 = external hidden unnamed_addr constant [14 x i8], align 1
+
+declare void @check(i32 signext) nounwind
+
+declare signext i32 @printf(i8* nocapture, ...) nounwind
+
+declare signext i32 @init(i8*) nounwind
+
+define signext i32 @s000() nounwind {
+entry:
+  %call = tail call signext i32 @init(i8* getelementptr inbounds ([6 x i8]* @.str1, i64 0, i64 0))
+  %call1 = tail call i64 @clock() nounwind
+  br label %for.cond2.preheader
+
+; CHECK: @s000
+
+for.cond2.preheader:                              ; preds = %for.end, %entry
+  %nl.018 = phi i32 [ 0, %entry ], [ %inc9, %for.end ]
+  br label %for.body4
+
+for.body4:                                        ; preds = %for.body4, %for.cond2.preheader
+  %indvars.iv = phi i64 [ 0, %for.cond2.preheader ], [ %indvars.iv.next.15, %for.body4 ]
+  %arrayidx = getelementptr inbounds [16000 x double]* @Y, i64 0, i64 %indvars.iv
+  %arrayidx6 = getelementptr inbounds [16000 x double]* @X, i64 0, i64 %indvars.iv
+  %0 = bitcast double* %arrayidx to <1 x double>*
+  %1 = load <1 x double>* %0, align 32, !tbaa !0
+  %add = fadd <1 x double> %1, <double 1.000000e+00>
+  %2 = bitcast double* %arrayidx6 to <1 x double>*
+  store <1 x double> %add, <1 x double>* %2, align 32, !tbaa !0
+  %indvars.iv.next.322 = or i64 %indvars.iv, 4
+  %arrayidx.4 = getelementptr inbounds [16000 x double]* @Y, i64 0, i64 %indvars.iv.next.322
+  %arrayidx6.4 = getelementptr inbounds [16000 x double]* @X, i64 0, i64 %indvars.iv.next.322
+  %3 = bitcast double* %arrayidx.4 to <1 x double>*
+  %4 = load <1 x double>* %3, align 32, !tbaa !0
+  %add.4 = fadd <1 x double> %4, <double 1.000000e+00>
+  %5 = bitcast double* %arrayidx6.4 to <1 x double>*
+  store <1 x double> %add.4, <1 x double>* %5, align 32, !tbaa !0
+  %indvars.iv.next.726 = or i64 %indvars.iv, 8
+  %arrayidx.8 = getelementptr inbounds [16000 x double]* @Y, i64 0, i64 %indvars.iv.next.726
+  %arrayidx6.8 = getelementptr inbounds [16000 x double]* @X, i64 0, i64 %indvars.iv.next.726
+  %6 = bitcast double* %arrayidx.8 to <1 x double>*
+  %7 = load <1 x double>* %6, align 32, !tbaa !0
+  %add.8 = fadd <1 x double> %7, <double 1.000000e+00>
+  %8 = bitcast double* %arrayidx6.8 to <1 x double>*
+  store <1 x double> %add.8, <1 x double>* %8, align 32, !tbaa !0
+  %indvars.iv.next.1130 = or i64 %indvars.iv, 12
+  %arrayidx.12 = getelementptr inbounds [16000 x double]* @Y, i64 0, i64 %indvars.iv.next.1130
+  %arrayidx6.12 = getelementptr inbounds [16000 x double]* @X, i64 0, i64 %indvars.iv.next.1130
+  %9 = bitcast double* %arrayidx.12 to <1 x double>*
+  %10 = load <1 x double>* %9, align 32, !tbaa !0
+  %add.12 = fadd <1 x double> %10, <double 1.000000e+00>
+  %11 = bitcast double* %arrayidx6.12 to <1 x double>*
+  store <1 x double> %add.12, <1 x double>* %11, align 32, !tbaa !0
+  %indvars.iv.next.15 = add i64 %indvars.iv, 16
+  %lftr.wideiv.15 = trunc i64 %indvars.iv.next.15 to i32
+  %exitcond.15 = icmp eq i32 %lftr.wideiv.15, 16000
+  br i1 %exitcond.15, label %for.end, label %for.body4
+
+; All of the loads should come before all of the stores.
+; CHECK: mtctr
+; CHECK: stfd
+; CHECK-NOT: lfd
+; CHECK: bdnz
+
+for.end:                                          ; preds = %for.body4
+  %call7 = tail call signext i32 @dummy(double* getelementptr inbounds ([16000 x double]* @X, i64 0, i64 0), double* getelementptr inbounds ([16000 x double]* @Y, i64 0, i64 0), double* getelementptr inbounds ([16000 x double]* @Z, i64 0, i64 0), double* getelementptr inbounds ([16000 x double]* @U, i64 0, i64 0), double* getelementptr inbounds ([16000 x double]* @V, i64 0, i64 0), [256 x double]* getelementptr inbounds ([256 x [256 x double]]* @aa, i64 0, i64 0), [256 x double]* getelementptr inbounds ([256 x [256 x double]]* @bb, i64 0, i64 0), [256 x double]* getelementptr inbounds ([256 x [256 x double]]* @cc, i64 0, i64 0), double 0.000000e+00) nounwind
+  %inc9 = add nsw i32 %nl.018, 1
+  %exitcond = icmp eq i32 %inc9, 400000
+  br i1 %exitcond, label %for.end10, label %for.cond2.preheader
+
+for.end10:                                        ; preds = %for.end
+  %call11 = tail call i64 @clock() nounwind
+  %sub = sub nsw i64 %call11, %call1
+  %conv = sitofp i64 %sub to double
+  %div = fdiv double %conv, 1.000000e+06
+  %call12 = tail call signext i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([14 x i8]* @.str137, i64 0, i64 0), double %div) nounwind
+  tail call void @check(i32 signext 1)
+  ret i32 0
+}
+
+declare i64 @clock() nounwind
+
+declare signext i32 @dummy(double*, double*, double*, double*, double*, [256 x double]*, [256 x double]*, [256 x double]*, double)
+
+!0 = metadata !{metadata !"double", metadata !1}
+!1 = metadata !{metadata !"omnipotent char", metadata !2}
+!2 = metadata !{metadata !"Simple C/C++ TBAA"}
diff --git a/test/CodeGen/PowerPC/sdag-ppcf128.ll b/test/CodeGen/PowerPC/sdag-ppcf128.ll
new file mode 100644
index 000000000000..535ece6d3dfe
--- /dev/null
+++ b/test/CodeGen/PowerPC/sdag-ppcf128.ll
@@ -0,0 +1,15 @@
+; RUN: llc -mtriple=powerpc64-unknown-linux-gnu < %s | FileCheck %s
+;
+; PR14751: Unsupported type in SelectionDAG::getConstantFP()
+
+define fastcc void @_D3std4math4sqrtFNaNbNfcZc() {
+entry:
+  br i1 undef, label %if, label %else
+; CHECK: cmplwi 0, 3, 0
+if:                                               ; preds = %entry
+  store { ppc_fp128, ppc_fp128 } zeroinitializer, { ppc_fp128, ppc_fp128 }* undef
+  ret void
+
+else:                                             ; preds = %entry
+  unreachable
+}
diff --git a/test/CodeGen/PowerPC/sjlj.ll b/test/CodeGen/PowerPC/sjlj.ll
new file mode 100644
index 000000000000..7ea35dafc3fa
--- /dev/null
+++ b/test/CodeGen/PowerPC/sjlj.ll
@@ -0,0 +1,112 @@
+; RUN: llc < %s -mtriple=powerpc64-unknown-linux-gnu -mcpu=pwr7 | FileCheck %s
+; RUN: llc < %s -mtriple=powerpc64-unknown-linux-gnu -mcpu=a2 | FileCheck -check-prefix=CHECK-NOAV %s
+target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-v128:128:128-n32:64"
+target triple = "powerpc64-unknown-linux-gnu"
+
+%struct.__jmp_buf_tag = type { [64 x i64], i32, %struct.__sigset_t, [8 x i8] }
+%struct.__sigset_t = type { [16 x i64] }
+
+@env_sigill = internal global [1 x %struct.__jmp_buf_tag] zeroinitializer, align 16
+
+define void @foo() #0 {
+entry:
+  call void @llvm.eh.sjlj.longjmp(i8* bitcast ([1 x %struct.__jmp_buf_tag]* @env_sigill to i8*))
+  unreachable
+
+; CHECK: @foo
+; CHECK: addis [[REG:[0-9]+]], 2, env_sigill@toc@ha
+; CHECK: addi [[REG]], [[REG]], env_sigill@toc@l
+; CHECK: ld 31, 0([[REG]])
+; CHECK: ld [[REG2:[0-9]+]], 8([[REG]])
+; CHECK: ld 1, 16([[REG]])
+; CHECK: mtctr [[REG2]]
+; CHECK: ld 2, 24([[REG]])
+; CHECK: bctr
+
+return:                                           ; No predecessors!
+  ret void
+}
+
+declare void @llvm.eh.sjlj.longjmp(i8*) #1
+
+define signext i32 @main() #0 {
+entry:
+  %retval = alloca i32, align 4
+  store i32 0, i32* %retval
+  %0 = call i8* @llvm.frameaddress(i32 0)
+  store i8* %0, i8** bitcast ([1 x %struct.__jmp_buf_tag]* @env_sigill to i8**)
+  %1 = call i8* @llvm.stacksave()
+  store i8* %1, i8** getelementptr (i8** bitcast ([1 x %struct.__jmp_buf_tag]* @env_sigill to i8**), i32 2)
+  %2 = call i32 @llvm.eh.sjlj.setjmp(i8* bitcast ([1 x %struct.__jmp_buf_tag]* @env_sigill to i8*))
+  %tobool = icmp ne i32 %2, 0
+  br i1 %tobool, label %if.then, label %if.else
+
+if.then:                                          ; preds = %entry
+  store i32 1, i32* %retval
+  br label %return
+
+if.else:                                          ; preds = %entry
+  call void @foo()
+  br label %if.end
+
+if.end:                                           ; preds = %if.else
+  store i32 0, i32* %retval
+  br label %return
+
+return:                                           ; preds = %if.end, %if.then
+  %3 = load i32* %retval
+  ret i32 %3
+
+; FIXME: We should be saving VRSAVE on Darwin, but we're not!
+
+; CHECK: @main
+; CHECK: std
+; Make sure that we're not saving VRSAVE on non-Darwin:
+; CHECK-NOT: mfspr
+; CHECK: stfd
+; CHECK: stvx
+
+; CHECK: addis [[REG:[0-9]+]], 2, env_sigill@toc@ha
+; CHECK: std 31, env_sigill@toc@l([[REG]])
+; CHECK: addi [[REG]], [[REG]], env_sigill@toc@l
+; CHECK: std [[REG]], [[OFF:[0-9]+]](31)                  # 8-byte Folded Spill
+; CHECK: std 1, 16([[REG]])
+; CHECK: std 2, 24([[REG]])
+; CHECK: bcl 20, 31, .LBB1_1
+; CHECK: li 3, 1
+; CHECK: #EH_SjLj_Setup	.LBB1_1
+; CHECK: b .LBB1_2
+
+; CHECK: .LBB1_1:
+; CHECK: mflr [[REGL:[0-9]+]]
+; CHECK: ld [[REG2:[0-9]+]], [[OFF]](31)                   # 8-byte Folded Reload
+; CHECK: std [[REGL]], 8([[REG2]])
+; CHECK: li 3, 0
+
+; CHECK: .LBB1_2:
+
+; CHECK: lfd
+; CHECK: lvx
+; CHECK: ld
+; CHECK: blr
+
+; CHECK-NOAV: @main
+; CHECK-NOAV-NOT: stvx
+; CHECK-NOAV: bcl
+; CHECK-NOAV: mflr
+; CHECK-NOAV: bl foo
+; CHECK-NOAV-NOT: lvx
+; CHECK-NOAV: blr
+}
+
+declare i8* @llvm.frameaddress(i32) #2
+
+declare i8* @llvm.stacksave() #3
+
+declare i32 @llvm.eh.sjlj.setjmp(i8*) #3
+
+attributes #0 = { nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-frame-pointer-elim-non-leaf"="true" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #1 = { noreturn nounwind }
+attributes #2 = { nounwind readnone }
+attributes #3 = { nounwind }
+
diff --git a/test/CodeGen/PowerPC/stdux-constuse.ll b/test/CodeGen/PowerPC/stdux-constuse.ll
new file mode 100644
index 000000000000..e62d438014ee
--- /dev/null
+++ b/test/CodeGen/PowerPC/stdux-constuse.ll
@@ -0,0 +1,47 @@
+; RUN: llc -mcpu=a2 -disable-lsr < %s | FileCheck %s
+target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v128:128:128-n32:64"
+target triple = "powerpc64-unknown-linux-gnu"
+
+define i32 @test1(i64 %add, i64* %ptr) nounwind {
+entry:
+  %p1 = getelementptr i64* %ptr, i64 144115188075855
+  br label %for.cond2.preheader
+
+for.cond2.preheader:
+  %nl.018 = phi i32 [ 0, %entry ], [ %inc9, %for.end ]
+  br label %for.body4
+
+for.body4:
+  %lsr.iv = phi i32 [ %lsr.iv.next, %for.body4 ], [ 16000, %for.cond2.preheader ]
+  %i0 = phi i64* [ %p1, %for.cond2.preheader ], [ %i6, %for.body4 ]
+  %i6 = getelementptr i64* %i0, i64 400000
+  %i7 = getelementptr i64* %i6, i64 300000
+  %i8 = getelementptr i64* %i6, i64 200000
+  %i9 = getelementptr i64* %i6, i64 100000
+  store i64 %add, i64* %i6, align 32
+  store i64 %add, i64* %i7, align 32
+  store i64 %add, i64* %i8, align 32
+  store i64 %add, i64* %i9, align 32
+  %lsr.iv.next = add i32 %lsr.iv, -16
+  %exitcond.15 = icmp eq i32 %lsr.iv.next, 0
+  br i1 %exitcond.15, label %for.end, label %for.body4
+
+; Make sure that we generate the most compact form of this loop with no
+; unnecessary moves
+; CHECK: @test1
+; CHECK: mtctr
+; CHECK: stdux
+; CHECK-NEXT: stdx
+; CHECK-NEXT: stdx
+; CHECK-NEXT: stdx
+; CHECK-NEXT: bdnz
+
+for.end:
+  %inc9 = add nsw i32 %nl.018, 1
+  %exitcond = icmp eq i32 %inc9, 400000
+  br i1 %exitcond, label %for.end10, label %for.cond2.preheader
+
+for.end10:
+  ret i32 0
+}
+
diff --git a/test/CodeGen/PowerPC/stfiwx-2.ll b/test/CodeGen/PowerPC/stfiwx-2.ll
index c49b25cc2303..7786fc17eacb 100644
--- a/test/CodeGen/PowerPC/stfiwx-2.ll
+++ b/test/CodeGen/PowerPC/stfiwx-2.ll
@@ -1,11 +1,14 @@
-; This cannot be a stfiwx
-; RUN: llc < %s -march=ppc32 -mcpu=g5 | grep stb
-; RUN: llc < %s -march=ppc32 -mcpu=g5 | not grep stfiwx
+; RUN: llc < %s -mtriple=powerpc-unknown-linux-gnu -mcpu=g5 | FileCheck %s
 
 define void @test(float %F, i8* %P) {
 	%I = fptosi float %F to i32
 	%X = trunc i32 %I to i8
 	store i8 %X, i8* %P
 	ret void
+; CHECK: fctiwz 0, 1
+; CHECK: stfiwx 0, 0, 4
+; CHECK: lwz 4, 12(1)
+; CHECK: stb 4, 0(3)
+; CHECK: blr
 }
 
diff --git a/test/CodeGen/PowerPC/store-update.ll b/test/CodeGen/PowerPC/store-update.ll
new file mode 100644
index 000000000000..538ed24fbc46
--- /dev/null
+++ b/test/CodeGen/PowerPC/store-update.ll
@@ -0,0 +1,170 @@
+; RUN: llc < %s | FileCheck %s
+
+target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-v128:128:128-n32:64"
+target triple = "powerpc64-unknown-linux-gnu"
+
+define i8* @stbu(i8* %base, i8 zeroext %val) nounwind {
+entry:
+  %arrayidx = getelementptr inbounds i8* %base, i64 16
+  store i8 %val, i8* %arrayidx, align 1
+  ret i8* %arrayidx
+}
+; CHECK: @stbu
+; CHECK: %entry
+; CHECK-NEXT: stbu
+; CHECK-NEXT: blr
+
+define i8* @stbux(i8* %base, i8 zeroext %val, i64 %offset) nounwind {
+entry:
+  %arrayidx = getelementptr inbounds i8* %base, i64 %offset
+  store i8 %val, i8* %arrayidx, align 1
+  ret i8* %arrayidx
+}
+; CHECK: @stbux
+; CHECK: %entry
+; CHECK-NEXT: stbux
+; CHECK-NEXT: blr
+
+define i16* @sthu(i16* %base, i16 zeroext %val) nounwind {
+entry:
+  %arrayidx = getelementptr inbounds i16* %base, i64 16
+  store i16 %val, i16* %arrayidx, align 2
+  ret i16* %arrayidx
+}
+; CHECK: @sthu
+; CHECK: %entry
+; CHECK-NEXT: sthu
+; CHECK-NEXT: blr
+
+define i16* @sthux(i16* %base, i16 zeroext %val, i64 %offset) nounwind {
+entry:
+  %arrayidx = getelementptr inbounds i16* %base, i64 %offset
+  store i16 %val, i16* %arrayidx, align 2
+  ret i16* %arrayidx
+}
+; CHECK: @sthux
+; CHECK: %entry
+; CHECK-NEXT: sldi
+; CHECK-NEXT: sthux
+; CHECK-NEXT: blr
+
+define i32* @stwu(i32* %base, i32 zeroext %val) nounwind {
+entry:
+  %arrayidx = getelementptr inbounds i32* %base, i64 16
+  store i32 %val, i32* %arrayidx, align 4
+  ret i32* %arrayidx
+}
+; CHECK: @stwu
+; CHECK: %entry
+; CHECK-NEXT: stwu
+; CHECK-NEXT: blr
+
+define i32* @stwux(i32* %base, i32 zeroext %val, i64 %offset) nounwind {
+entry:
+  %arrayidx = getelementptr inbounds i32* %base, i64 %offset
+  store i32 %val, i32* %arrayidx, align 4
+  ret i32* %arrayidx
+}
+; CHECK: @stwux
+; CHECK: %entry
+; CHECK-NEXT: sldi
+; CHECK-NEXT: stwux
+; CHECK-NEXT: blr
+
+define i8* @stbu8(i8* %base, i64 %val) nounwind {
+entry:
+  %conv = trunc i64 %val to i8
+  %arrayidx = getelementptr inbounds i8* %base, i64 16
+  store i8 %conv, i8* %arrayidx, align 1
+  ret i8* %arrayidx
+}
+; CHECK: @stbu
+; CHECK: %entry
+; CHECK-NEXT: stbu
+; CHECK-NEXT: blr
+
+define i8* @stbux8(i8* %base, i64 %val, i64 %offset) nounwind {
+entry:
+  %conv = trunc i64 %val to i8
+  %arrayidx = getelementptr inbounds i8* %base, i64 %offset
+  store i8 %conv, i8* %arrayidx, align 1
+  ret i8* %arrayidx
+}
+; CHECK: @stbux
+; CHECK: %entry
+; CHECK-NEXT: stbux
+; CHECK-NEXT: blr
+
+define i16* @sthu8(i16* %base, i64 %val) nounwind {
+entry:
+  %conv = trunc i64 %val to i16
+  %arrayidx = getelementptr inbounds i16* %base, i64 16
+  store i16 %conv, i16* %arrayidx, align 2
+  ret i16* %arrayidx
+}
+; CHECK: @sthu
+; CHECK: %entry
+; CHECK-NEXT: sthu
+; CHECK-NEXT: blr
+
+define i16* @sthux8(i16* %base, i64 %val, i64 %offset) nounwind {
+entry:
+  %conv = trunc i64 %val to i16
+  %arrayidx = getelementptr inbounds i16* %base, i64 %offset
+  store i16 %conv, i16* %arrayidx, align 2
+  ret i16* %arrayidx
+}
+; CHECK: @sthux
+; CHECK: %entry
+; CHECK-NEXT: sldi
+; CHECK-NEXT: sthux
+; CHECK-NEXT: blr
+
+define i32* @stwu8(i32* %base, i64 %val) nounwind {
+entry:
+  %conv = trunc i64 %val to i32
+  %arrayidx = getelementptr inbounds i32* %base, i64 16
+  store i32 %conv, i32* %arrayidx, align 4
+  ret i32* %arrayidx
+}
+; CHECK: @stwu
+; CHECK: %entry
+; CHECK-NEXT: stwu
+; CHECK-NEXT: blr
+
+define i32* @stwux8(i32* %base, i64 %val, i64 %offset) nounwind {
+entry:
+  %conv = trunc i64 %val to i32
+  %arrayidx = getelementptr inbounds i32* %base, i64 %offset
+  store i32 %conv, i32* %arrayidx, align 4
+  ret i32* %arrayidx
+}
+; CHECK: @stwux
+; CHECK: %entry
+; CHECK-NEXT: sldi
+; CHECK-NEXT: stwux
+; CHECK-NEXT: blr
+
+define i64* @stdu(i64* %base, i64 %val) nounwind {
+entry:
+  %arrayidx = getelementptr inbounds i64* %base, i64 16
+  store i64 %val, i64* %arrayidx, align 8
+  ret i64* %arrayidx
+}
+; CHECK: @stdu
+; CHECK: %entry
+; CHECK-NEXT: stdu
+; CHECK-NEXT: blr
+
+define i64* @stdux(i64* %base, i64 %val, i64 %offset) nounwind {
+entry:
+  %arrayidx = getelementptr inbounds i64* %base, i64 %offset
+  store i64 %val, i64* %arrayidx, align 8
+  ret i64* %arrayidx
+}
+; CHECK: @stdux
+; CHECK: %entry
+; CHECK-NEXT: sldi
+; CHECK-NEXT: stdux
+; CHECK-NEXT: blr
+
diff --git a/test/CodeGen/PowerPC/structsinmem.ll b/test/CodeGen/PowerPC/structsinmem.ll
index 884d3a89d15a..2a17e740ea01 100644
--- a/test/CodeGen/PowerPC/structsinmem.ll
+++ b/test/CodeGen/PowerPC/structsinmem.ll
@@ -1,9 +1,5 @@
 ; RUN: llc -mcpu=pwr7 -O0 -disable-fp-elim < %s | FileCheck %s
 
-; FIXME: The code generation for packed structs is very poor because the
-; PowerPC target wrongly rejects all unaligned loads.  This test case will
-; need to be revised when that is fixed.
-
 target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v128:128:128-n32:64"
 target triple = "powerpc64-unknown-linux-gnu"
 
@@ -118,8 +114,8 @@ entry:
   ret i32 %add13
 
 ; CHECK: lha {{[0-9]+}}, 126(1)
-; CHECK: lbz {{[0-9]+}}, 119(1)
 ; CHECK: lha {{[0-9]+}}, 132(1)
+; CHECK: lbz {{[0-9]+}}, 119(1)
 ; CHECK: lwz {{[0-9]+}}, 140(1)
 ; CHECK: lwz {{[0-9]+}}, 144(1)
 ; CHECK: lwz {{[0-9]+}}, 152(1)
@@ -209,19 +205,11 @@ entry:
   %add13 = add nsw i32 %add11, %6
   ret i32 %add13
 
-; CHECK: lbz {{[0-9]+}}, 149(1)
-; CHECK: lbz {{[0-9]+}}, 150(1)
-; CHECK: lbz {{[0-9]+}}, 147(1)
-; CHECK: lbz {{[0-9]+}}, 148(1)
-; CHECK: lbz {{[0-9]+}}, 133(1)
-; CHECK: lbz {{[0-9]+}}, 134(1)
 ; CHECK: lha {{[0-9]+}}, 126(1)
+; CHECK: lha {{[0-9]+}}, 133(1)
 ; CHECK: lbz {{[0-9]+}}, 119(1)
 ; CHECK: lwz {{[0-9]+}}, 140(1)
-; CHECK: lhz {{[0-9]+}}, 154(1)
-; CHECK: lhz {{[0-9]+}}, 156(1)
-; CHECK: lbz {{[0-9]+}}, 163(1)
-; CHECK: lbz {{[0-9]+}}, 164(1)
-; CHECK: lbz {{[0-9]+}}, 161(1)
-; CHECK: lbz {{[0-9]+}}, 162(1)
+; CHECK: lwz {{[0-9]+}}, 147(1)
+; CHECK: lwz {{[0-9]+}}, 154(1)
+; CHECK: lwz {{[0-9]+}}, 161(1)
 }
diff --git a/test/CodeGen/PowerPC/structsinregs.ll b/test/CodeGen/PowerPC/structsinregs.ll
index ef706af95d65..54de6060d0f0 100644
--- a/test/CodeGen/PowerPC/structsinregs.ll
+++ b/test/CodeGen/PowerPC/structsinregs.ll
@@ -1,9 +1,5 @@
 ; RUN: llc -mcpu=pwr7 -O0 -disable-fp-elim < %s | FileCheck %s
 
-; FIXME: The code generation for packed structs is very poor because the
-; PowerPC target wrongly rejects all unaligned loads.  This test case will
-; need to be revised when that is fixed.
-
 target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v128:128:128-n32:64"
 target triple = "powerpc64-unknown-linux-gnu"
 
@@ -63,13 +59,13 @@ entry:
   %call = call i32 @callee1(%struct.s1* byval %p1, %struct.s2* byval %p2, %struct.s3* byval %p3, %struct.s4* byval %p4, %struct.s5* byval %p5, %struct.s6* byval %p6, %struct.s7* byval %p7)
   ret i32 %call
 
-; CHECK: ld 9, 128(31)
-; CHECK: ld 8, 136(31)
-; CHECK: ld 7, 144(31)
-; CHECK: lwz 6, 152(31)
-; CHECK: lwz 5, 160(31)
-; CHECK: lhz 4, 168(31)
-; CHECK: lbz 3, 176(31)
+; CHECK: ld 9, 112(31)
+; CHECK: ld 8, 120(31)
+; CHECK: ld 7, 128(31)
+; CHECK: lwz 6, 136(31)
+; CHECK: lwz 5, 144(31)
+; CHECK: lhz 4, 152(31)
+; CHECK: lbz 3, 160(31)
 }
 
 declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i32, i1) nounwind
@@ -109,8 +105,8 @@ entry:
 ; CHECK: sth 4, 62(1)
 ; CHECK: stb 3, 55(1)
 ; CHECK: lha {{[0-9]+}}, 62(1)
-; CHECK: lbz {{[0-9]+}}, 55(1)
 ; CHECK: lha {{[0-9]+}}, 68(1)
+; CHECK: lbz {{[0-9]+}}, 55(1)
 ; CHECK: lwz {{[0-9]+}}, 76(1)
 ; CHECK: lwz {{[0-9]+}}, 80(1)
 ; CHECK: lwz {{[0-9]+}}, 88(1)
@@ -155,10 +151,10 @@ entry:
 ; CHECK: ld 9, 96(1)
 ; CHECK: ld 8, 88(1)
 ; CHECK: ld 7, 80(1)
-; CHECK: lwz 6, 152(31)
+; CHECK: lwz 6, 136(31)
 ; CHECK: ld 5, 64(1)
-; CHECK: lhz 4, 168(31)
-; CHECK: lbz 3, 176(31)
+; CHECK: lhz 4, 152(31)
+; CHECK: lbz 3, 160(31)
 }
 
 define internal i32 @callee2(%struct.t1* byval %v1, %struct.t2* byval %v2, %struct.t3* byval %v3, %struct.t4* byval %v4, %struct.t5* byval %v5, %struct.t6* byval %v6, %struct.t7* byval %v7) nounwind {
@@ -195,19 +191,11 @@ entry:
 ; CHECK: std 5, 64(1)
 ; CHECK: sth 4, 62(1)
 ; CHECK: stb 3, 55(1)
-; CHECK: lbz {{[0-9]+}}, 85(1)
-; CHECK: lbz {{[0-9]+}}, 86(1)
-; CHECK: lbz {{[0-9]+}}, 83(1)
-; CHECK: lbz {{[0-9]+}}, 84(1)
-; CHECK: lbz {{[0-9]+}}, 69(1)
-; CHECK: lbz {{[0-9]+}}, 70(1)
 ; CHECK: lha {{[0-9]+}}, 62(1)
+; CHECK: lha {{[0-9]+}}, 69(1)
 ; CHECK: lbz {{[0-9]+}}, 55(1)
 ; CHECK: lwz {{[0-9]+}}, 76(1)
-; CHECK: lhz {{[0-9]+}}, 90(1)
-; CHECK: lhz {{[0-9]+}}, 92(1)
-; CHECK: lbz {{[0-9]+}}, 99(1)
-; CHECK: lbz {{[0-9]+}}, 100(1)
-; CHECK: lbz {{[0-9]+}}, 97(1)
-; CHECK: lbz {{[0-9]+}}, 98(1)
+; CHECK: lwz {{[0-9]+}}, 83(1)
+; CHECK: lwz {{[0-9]+}}, 90(1)
+; CHECK: lwz {{[0-9]+}}, 97(1)
 }
diff --git a/test/CodeGen/PowerPC/stubs.ll b/test/CodeGen/PowerPC/stubs.ll
index 4889263b4c4e..cfcc50b7a876 100644
--- a/test/CodeGen/PowerPC/stubs.ll
+++ b/test/CodeGen/PowerPC/stubs.ll
@@ -10,8 +10,8 @@ entry:
 ; CHECK: 	.section	__TEXT,__symbol_stub1,symbol_stubs,pure_instructions,16
 ; CHECK: ___floatditf$stub:
 ; CHECK: 	.indirect_symbol ___floatditf
-; CHECK: 	lis r11,ha16(___floatditf$lazy_ptr)
-; CHECK: 	lwzu r12,lo16(___floatditf$lazy_ptr)(r11)
+; CHECK: 	lis r11, ha16(___floatditf$lazy_ptr)
+; CHECK: 	lwzu r12, lo16(___floatditf$lazy_ptr)(r11)
 ; CHECK: 	mtctr r12
 ; CHECK: 	bctr
 ; CHECK: 	.section	__DATA,__la_symbol_ptr,lazy_symbol_pointers
diff --git a/test/CodeGen/PowerPC/stwu8.ll b/test/CodeGen/PowerPC/stwu8.ll
index 897bfc6d6caa..e0bd04345439 100644
--- a/test/CodeGen/PowerPC/stwu8.ll
+++ b/test/CodeGen/PowerPC/stwu8.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s | FileCheck %s
+; RUN: llc -disable-ppc-unaligned < %s | FileCheck %s
 target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v128:128:128-n32:64"
 target triple = "powerpc64-unknown-linux-gnu"
 
diff --git a/test/CodeGen/PowerPC/svr4-redzone.ll b/test/CodeGen/PowerPC/svr4-redzone.ll
new file mode 100644
index 000000000000..91ff5797389b
--- /dev/null
+++ b/test/CodeGen/PowerPC/svr4-redzone.ll
@@ -0,0 +1,39 @@
+; RUN: llc -mtriple="powerpc-unknown-linux-gnu" < %s | FileCheck %s --check-prefix=PPC32
+; RUN: llc -mtriple="powerpc64-unknown-linux-gnu" < %s | FileCheck %s --check-prefix=PPC64
+; PR15332
+
+define void @regalloc() nounwind {
+entry:
+	%0 = add i32 1, 2
+	ret void
+}
+; PPC32: regalloc:
+; PPC32-NOT: stwu 1, -{{[0-9]+}}(1)
+; PPC32: blr
+
+; PPC64: regalloc:
+; PPC64-NOT: stdu 1, -{{[0-9]+}}(1)
+; PPC64: blr
+
+define void @smallstack() nounwind {
+entry:
+	%0 = alloca i8, i32 4
+	ret void
+}
+; PPC32: smallstack:
+; PPC32: stwu 1, -16(1)
+
+; PPC64: smallstack:
+; PPC64-NOT: stdu 1, -{{[0-9]+}}(1)
+; PPC64: blr
+
+define void @bigstack() nounwind {
+entry:
+	%0 = alloca i8, i32 230
+	ret void
+}
+; PPC32: bigstack:
+; PPC32: stwu 1, -240(1)
+
+; PPC64: bigstack:
+; PPC64: stdu 1, -352(1)
diff --git a/test/CodeGen/PowerPC/tls-2.ll b/test/CodeGen/PowerPC/tls-2.ll
new file mode 100644
index 000000000000..20d8fe46ea17
--- /dev/null
+++ b/test/CodeGen/PowerPC/tls-2.ll
@@ -0,0 +1,15 @@
+target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v128:128:128-n32:64"
+target triple = "powerpc64-unknown-freebsd10.0"
+; RUN: llc -O1 < %s -march=ppc64 | FileCheck %s
+
+@a = thread_local global i32 0, align 4
+
+;CHECK:          localexec:
+define i32 @localexec() nounwind {
+entry:
+;CHECK:          addis [[REG1:[0-9]+]], 13, a@tprel@ha
+;CHECK-NEXT:     li [[REG2:[0-9]+]], 42
+;CHECK-NEXT:     stw [[REG2]], a@tprel@l([[REG1]])
+  store i32 42, i32* @a, align 4
+  ret i32 0
+}
diff --git a/test/CodeGen/PowerPC/tls-gd-obj.ll b/test/CodeGen/PowerPC/tls-gd-obj.ll
new file mode 100644
index 000000000000..00b537d5325b
--- /dev/null
+++ b/test/CodeGen/PowerPC/tls-gd-obj.ll
@@ -0,0 +1,41 @@
+; RUN: llc -mcpu=pwr7 -O0 -filetype=obj -relocation-model=pic %s -o - | \
+; RUN: elf-dump --dump-section-data | FileCheck %s
+
+; Test correct relocation generation for thread-local storage using
+; the general dynamic model and integrated assembly.
+
+target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-v128:128:128-n32:64"
+target triple = "powerpc64-unknown-linux-gnu"
+
+@a = thread_local global i32 0, align 4
+
+define signext i32 @main() nounwind {
+entry:
+  %retval = alloca i32, align 4
+  store i32 0, i32* %retval
+  %0 = load i32* @a, align 4
+  ret i32 %0
+}
+
+; Verify generation of R_PPC64_GOT_TLSGD16_HA, R_PPC64_GOT_TLSGD16_LO,
+; and R_PPC64_TLSGD for accessing external variable a, and R_PPC64_REL24
+; for the call to __tls_get_addr.
+;
+; CHECK:       '.rela.text'
+; CHECK:       Relocation 0
+; CHECK-NEXT:  'r_offset'
+; CHECK-NEXT:  'r_sym', 0x[[SYM1:[0-9a-f]+]]
+; CHECK-NEXT:  'r_type', 0x00000052
+; CHECK:       Relocation 1
+; CHECK-NEXT:  'r_offset'
+; CHECK-NEXT:  'r_sym', 0x[[SYM1]]
+; CHECK-NEXT:  'r_type', 0x00000050
+; CHECK:       Relocation 2
+; CHECK-NEXT:  'r_offset'
+; CHECK-NEXT:  'r_sym', 0x[[SYM1]]
+; CHECK-NEXT:  'r_type', 0x0000006b
+; CHECK:       Relocation 3
+; CHECK-NEXT:  'r_offset'
+; CHECK-NEXT:  'r_sym', 0x{{[0-9a-f]+}}
+; CHECK-NEXT:  'r_type', 0x0000000a
+
diff --git a/test/CodeGen/PowerPC/tls-gd.ll b/test/CodeGen/PowerPC/tls-gd.ll
new file mode 100644
index 000000000000..5f0ef9a050da
--- /dev/null
+++ b/test/CodeGen/PowerPC/tls-gd.ll
@@ -0,0 +1,23 @@
+; RUN: llc -mcpu=pwr7 -O0 -relocation-model=pic < %s | FileCheck %s
+
+; Test correct assembly code generation for thread-local storage using
+; the general dynamic model.
+
+target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-v128:128:128-n32:64"
+target triple = "powerpc64-unknown-linux-gnu"
+
+@a = thread_local global i32 0, align 4
+
+define signext i32 @main() nounwind {
+entry:
+  %retval = alloca i32, align 4
+  store i32 0, i32* %retval
+  %0 = load i32* @a, align 4
+  ret i32 %0
+}
+
+; CHECK: addis [[REG:[0-9]+]], 2, a@got@tlsgd@ha
+; CHECK-NEXT: addi 3, [[REG]], a@got@tlsgd@l
+; CHECK:      bl __tls_get_addr(a@tlsgd)
+; CHECK-NEXT: nop
+
diff --git a/test/CodeGen/PowerPC/tls-ie-obj.ll b/test/CodeGen/PowerPC/tls-ie-obj.ll
new file mode 100644
index 000000000000..3600cc52ba54
--- /dev/null
+++ b/test/CodeGen/PowerPC/tls-ie-obj.ll
@@ -0,0 +1,36 @@
+; RUN: llc -mcpu=pwr7 -O0 -filetype=obj %s -o - | \
+; RUN: elf-dump --dump-section-data | FileCheck %s
+
+; Test correct relocation generation for thread-local storage
+; using the initial-exec model and integrated assembly.
+
+target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-v128:128:128-n32:64"
+target triple = "powerpc64-unknown-linux-gnu"
+
+@a = external thread_local global i32
+
+define signext i32 @main() nounwind {
+entry:
+  %retval = alloca i32, align 4
+  store i32 0, i32* %retval
+  %0 = load i32* @a, align 4
+  ret i32 %0
+}
+
+; Verify generation of R_PPC64_GOT_TPREL16_DS and R_PPC64_TLS for
+; accessing external variable a.
+;
+; CHECK:       '.rela.text'
+; CHECK:       Relocation 0
+; CHECK-NEXT:  'r_offset'
+; CHECK-NEXT:  'r_sym', 0x[[SYM1:[0-9a-f]+]]
+; CHECK-NEXT:  'r_type', 0x0000005a
+; CHECK:       Relocation 1
+; CHECK-NEXT:  'r_offset'
+; CHECK-NEXT:  'r_sym', 0x[[SYM1]]
+; CHECK-NEXT:  'r_type', 0x00000058
+; CHECK:       Relocation 2
+; CHECK-NEXT:  'r_offset'
+; CHECK-NEXT:  'r_sym', 0x[[SYM1]]
+; CHECK-NEXT:  'r_type', 0x00000043
+
diff --git a/test/CodeGen/PowerPC/tls-ie.ll b/test/CodeGen/PowerPC/tls-ie.ll
new file mode 100644
index 000000000000..c5cfba7b3f7a
--- /dev/null
+++ b/test/CodeGen/PowerPC/tls-ie.ll
@@ -0,0 +1,22 @@
+; RUN: llc -mcpu=pwr7 -O0 <%s | FileCheck %s
+
+; Test correct assembly code generation for thread-local storage
+; using the initial-exec model.
+
+target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-v128:128:128-n32:64"
+target triple = "powerpc64-unknown-linux-gnu"
+
+@a = external thread_local global i32
+
+define signext i32 @main() nounwind {
+entry:
+  %retval = alloca i32, align 4
+  store i32 0, i32* %retval
+  %0 = load i32* @a, align 4
+  ret i32 %0
+}
+
+; CHECK: addis [[REG1:[0-9]+]], 2, a@got@tprel@ha
+; CHECK: ld [[REG2:[0-9]+]], a@got@tprel@l([[REG1]])
+; CHECK: add {{[0-9]+}}, [[REG2]], a@tls
+
diff --git a/test/CodeGen/PowerPC/tls-ld-2.ll b/test/CodeGen/PowerPC/tls-ld-2.ll
new file mode 100644
index 000000000000..4399b330ea47
--- /dev/null
+++ b/test/CodeGen/PowerPC/tls-ld-2.ll
@@ -0,0 +1,24 @@
+; RUN: llc -mcpu=pwr7 -O1 -relocation-model=pic < %s | FileCheck %s
+
+; Test peephole optimization for thread-local storage using the
+; local dynamic model.
+
+target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-v128:128:128-n32:64"
+target triple = "powerpc64-unknown-linux-gnu"
+
+@a = hidden thread_local global i32 0, align 4
+
+define signext i32 @main() nounwind {
+entry:
+  %retval = alloca i32, align 4
+  store i32 0, i32* %retval
+  %0 = load i32* @a, align 4
+  ret i32 %0
+}
+
+; CHECK:      addis [[REG:[0-9]+]], 2, a@got@tlsld@ha
+; CHECK-NEXT: addi 3, [[REG]], a@got@tlsld@l
+; CHECK:      bl __tls_get_addr(a@tlsld)
+; CHECK-NEXT: nop
+; CHECK:      addis [[REG2:[0-9]+]], 3, a@dtprel@ha
+; CHECK-NEXT: lwa {{[0-9]+}}, a@dtprel@l([[REG2]])
diff --git a/test/CodeGen/PowerPC/tls-ld-obj.ll b/test/CodeGen/PowerPC/tls-ld-obj.ll
new file mode 100644
index 000000000000..c521ae405f46
--- /dev/null
+++ b/test/CodeGen/PowerPC/tls-ld-obj.ll
@@ -0,0 +1,50 @@
+; RUN: llc -mcpu=pwr7 -O0 -filetype=obj -relocation-model=pic %s -o - | \
+; RUN: elf-dump --dump-section-data | FileCheck %s
+
+; Test correct relocation generation for thread-local storage using
+; the local dynamic model.
+
+target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-v128:128:128-n32:64"
+target triple = "powerpc64-unknown-linux-gnu"
+
+@a = hidden thread_local global i32 0, align 4
+
+define signext i32 @main() nounwind {
+entry:
+  %retval = alloca i32, align 4
+  store i32 0, i32* %retval
+  %0 = load i32* @a, align 4
+  ret i32 %0
+}
+
+; Verify generation of R_PPC64_GOT_TLSLD16_HA, R_PPC64_GOT_TLSLD16_LO,
+; R_PPC64_TLSLD, R_PPC64_DTPREL16_HA, and R_PPC64_DTPREL16_LO for
+; accessing external variable a, and R_PPC64_REL24 for the call to
+; __tls_get_addr.
+;
+; CHECK:       '.rela.text'
+; CHECK:       Relocation 0
+; CHECK-NEXT:  'r_offset'
+; CHECK-NEXT:  'r_sym', 0x[[SYM1:[0-9a-f]+]]
+; CHECK-NEXT:  'r_type', 0x00000056
+; CHECK:       Relocation 1
+; CHECK-NEXT:  'r_offset'
+; CHECK-NEXT:  'r_sym', 0x[[SYM1]]
+; CHECK-NEXT:  'r_type', 0x00000054
+; CHECK:       Relocation 2
+; CHECK-NEXT:  'r_offset'
+; CHECK-NEXT:  'r_sym', 0x[[SYM1]]
+; CHECK-NEXT:  'r_type', 0x0000006c
+; CHECK:       Relocation 3
+; CHECK-NEXT:  'r_offset'
+; CHECK-NEXT:  'r_sym', 0x{{[0-9a-f]+}}
+; CHECK-NEXT:  'r_type', 0x0000000a
+; CHECK:       Relocation 4
+; CHECK-NEXT:  'r_offset'
+; CHECK-NEXT:  'r_sym', 0x[[SYM1]]
+; CHECK-NEXT:  'r_type', 0x0000004d
+; CHECK:       Relocation 5
+; CHECK-NEXT:  'r_offset'
+; CHECK-NEXT:  'r_sym', 0x[[SYM1]]
+; CHECK-NEXT:  'r_type', 0x0000004b
+
diff --git a/test/CodeGen/PowerPC/tls-ld.ll b/test/CodeGen/PowerPC/tls-ld.ll
new file mode 100644
index 000000000000..db02a56f6a22
--- /dev/null
+++ b/test/CodeGen/PowerPC/tls-ld.ll
@@ -0,0 +1,24 @@
+; RUN: llc -mcpu=pwr7 -O0 -relocation-model=pic < %s | FileCheck %s
+
+; Test correct assembly code generation for thread-local storage using
+; the local dynamic model.
+
+target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-v128:128:128-n32:64"
+target triple = "powerpc64-unknown-linux-gnu"
+
+@a = hidden thread_local global i32 0, align 4
+
+define signext i32 @main() nounwind {
+entry:
+  %retval = alloca i32, align 4
+  store i32 0, i32* %retval
+  %0 = load i32* @a, align 4
+  ret i32 %0
+}
+
+; CHECK:      addis [[REG:[0-9]+]], 2, a@got@tlsld@ha
+; CHECK-NEXT: addi 3, [[REG]], a@got@tlsld@l
+; CHECK:      bl __tls_get_addr(a@tlsld)
+; CHECK-NEXT: nop
+; CHECK:      addis [[REG2:[0-9]+]], 3, a@dtprel@ha
+; CHECK-NEXT: addi {{[0-9]+}}, [[REG2]], a@dtprel@l
diff --git a/test/CodeGen/PowerPC/tls.ll b/test/CodeGen/PowerPC/tls.ll
index 713893bf5862..2daa60ab37f2 100644
--- a/test/CodeGen/PowerPC/tls.ll
+++ b/test/CodeGen/PowerPC/tls.ll
@@ -1,16 +1,21 @@
 target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v128:128:128-n32:64"
 target triple = "powerpc64-unknown-freebsd10.0"
-; RUN: llc < %s -march=ppc64 | FileCheck %s
+; RUN: llc -O0 < %s -march=ppc64 | FileCheck -check-prefix=OPT0 %s
+; RUN: llc -O1 < %s -march=ppc64 | FileCheck -check-prefix=OPT1 %s
 
 @a = thread_local global i32 0, align 4
 
-;CHECK:          localexec:
+;OPT0:          localexec:
+;OPT1:          localexec:
 define i32 @localexec() nounwind {
 entry:
-;CHECK:          addis [[REG1:[0-9]+]], 13, a@tprel@ha
-;CHECK-NEXT:     li [[REG2:[0-9]+]], 42
-;CHECK-NEXT:     addi [[REG1]], [[REG1]], a@tprel@l
-;CHECK-NEXT:     stw [[REG2]], 0([[REG1]])
+;OPT0:          addis [[REG1:[0-9]+]], 13, a@tprel@ha
+;OPT0-NEXT:     li [[REG2:[0-9]+]], 42
+;OPT0-NEXT:     addi [[REG1]], [[REG1]], a@tprel@l
+;OPT0:          stw [[REG2]], 0([[REG1]])
+;OPT1:          addis [[REG1:[0-9]+]], 13, a@tprel@ha
+;OPT1-NEXT:     li [[REG2:[0-9]+]], 42
+;OPT1-NEXT:     stw [[REG2]], a@tprel@l([[REG1]])
   store i32 42, i32* @a, align 4
   ret i32 0
 }
diff --git a/test/CodeGen/PowerPC/unal4-std.ll b/test/CodeGen/PowerPC/unal4-std.ll
new file mode 100644
index 000000000000..169bd787c0c1
--- /dev/null
+++ b/test/CodeGen/PowerPC/unal4-std.ll
@@ -0,0 +1,27 @@
+; RUN: llc < %s -mcpu=pwr7 | FileCheck %s
+target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-v128:128:128-n32:64"
+target triple = "powerpc64-unknown-linux-gnu"
+
+define fastcc void @copy_to_conceal() #0 {
+entry:
+  br i1 undef, label %if.then, label %if.end210
+
+if.then:                                          ; preds = %entry
+  br label %vector.body.i
+
+vector.body.i:                                    ; preds = %vector.body.i, %if.then
+  %index.i = phi i64 [ 0, %vector.body.i ], [ 0, %if.then ]
+  store <8 x i16> zeroinitializer, <8 x i16>* undef, align 2
+  br label %vector.body.i
+
+if.end210:                                        ; preds = %entry
+  ret void
+
+; This will generate two align-1 i64 stores. Make sure that they are
+; indexed stores and not in r+i form (which require the offset to be
+; a multiple of 4).
+; CHECK: @copy_to_conceal
+; CHECK: stdx {{[0-9]+}}, 0,
+}
+
+attributes #0 = { nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-frame-pointer-elim-non-leaf"="true" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "unsafe-fp-math"="false" "use-soft-float"="false" }
diff --git a/test/CodeGen/PowerPC/unaligned.ll b/test/CodeGen/PowerPC/unaligned.ll
new file mode 100644
index 000000000000..d05080338f33
--- /dev/null
+++ b/test/CodeGen/PowerPC/unaligned.ll
@@ -0,0 +1,73 @@
+; RUN: llc < %s -mtriple=powerpc64-unknown-linux-gnu -mcpu=pwr7 | FileCheck %s
+target datalayout = "E-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f128:64:128-n32"
+
+define void @foo1(i16* %p, i16* %r) nounwind {
+entry:
+  %v = load i16* %p, align 1
+  store i16 %v, i16* %r, align 1
+  ret void
+
+; CHECK: @foo1
+; CHECK: lhz
+; CHECK: sth
+}
+
+define void @foo2(i32* %p, i32* %r) nounwind {
+entry:
+  %v = load i32* %p, align 1
+  store i32 %v, i32* %r, align 1
+  ret void
+
+; CHECK: @foo2
+; CHECK: lwz
+; CHECK: stw
+}
+
+define void @foo3(i64* %p, i64* %r) nounwind {
+entry:
+  %v = load i64* %p, align 1
+  store i64 %v, i64* %r, align 1
+  ret void
+
+; CHECK: @foo3
+; CHECK: ld
+; CHECK: std
+}
+
+define void @foo4(float* %p, float* %r) nounwind {
+entry:
+  %v = load float* %p, align 1
+  store float %v, float* %r, align 1
+  ret void
+
+; CHECK: @foo4
+; CHECK: lfs
+; CHECK: stfs
+}
+
+define void @foo5(double* %p, double* %r) nounwind {
+entry:
+  %v = load double* %p, align 1
+  store double %v, double* %r, align 1
+  ret void
+
+; CHECK: @foo5
+; CHECK: lfd
+; CHECK: stfd
+}
+
+define void @foo6(<4 x float>* %p, <4 x float>* %r) nounwind {
+entry:
+  %v = load <4 x float>* %p, align 1
+  store <4 x float> %v, <4 x float>* %r, align 1
+  ret void
+
+; These loads and stores are legalized into aligned loads and stores
+; using aligned stack slots.
+; CHECK: @foo6
+; CHECK: ld
+; CHECK: ld
+; CHECK: std
+; CHECK: std
+}
+
diff --git a/test/CodeGen/PowerPC/vaddsplat.ll b/test/CodeGen/PowerPC/vaddsplat.ll
new file mode 100644
index 000000000000..e65148aff03a
--- /dev/null
+++ b/test/CodeGen/PowerPC/vaddsplat.ll
@@ -0,0 +1,149 @@
+; RUN: llc -O0 -mcpu=pwr7 <%s | FileCheck %s
+
+; Test optimizations of build_vector for 6-bit immediates.
+
+target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-v128:128:128-n32:64"
+target triple = "powerpc64-unknown-linux-gnu"
+
+%v4i32 = type <4 x i32>
+%v8i16 = type <8 x i16>
+%v16i8 = type <16 x i8>
+
+define void @test_v4i32_pos_even(%v4i32* %P, %v4i32* %S) {
+       %p = load %v4i32* %P
+       %r = add %v4i32 %p, < i32 18, i32 18, i32 18, i32 18 >
+       store %v4i32 %r, %v4i32* %S
+       ret void
+}
+
+; CHECK: test_v4i32_pos_even:
+; CHECK: vspltisw [[REG1:[0-9]+]], 9
+; CHECK: vadduwm {{[0-9]+}}, [[REG1]], [[REG1]]
+
+define void @test_v4i32_neg_even(%v4i32* %P, %v4i32* %S) {
+       %p = load %v4i32* %P
+       %r = add %v4i32 %p, < i32 -28, i32 -28, i32 -28, i32 -28 >
+       store %v4i32 %r, %v4i32* %S
+       ret void
+}
+
+; CHECK: test_v4i32_neg_even:
+; CHECK: vspltisw [[REG1:[0-9]+]], -14
+; CHECK: vadduwm {{[0-9]+}}, [[REG1]], [[REG1]]
+
+define void @test_v8i16_pos_even(%v8i16* %P, %v8i16* %S) {
+       %p = load %v8i16* %P
+       %r = add %v8i16 %p, < i16 30, i16 30, i16 30, i16 30, i16 30, i16 30, i16 30, i16 30 >
+       store %v8i16 %r, %v8i16* %S
+       ret void
+}
+
+; CHECK: test_v8i16_pos_even:
+; CHECK: vspltish [[REG1:[0-9]+]], 15
+; CHECK: vadduhm {{[0-9]+}}, [[REG1]], [[REG1]]
+
+define void @test_v8i16_neg_even(%v8i16* %P, %v8i16* %S) {
+       %p = load %v8i16* %P
+       %r = add %v8i16 %p, < i16 -32, i16 -32, i16 -32, i16 -32, i16 -32, i16 -32, i16 -32, i16 -32 >
+       store %v8i16 %r, %v8i16* %S
+       ret void
+}
+
+; CHECK: test_v8i16_neg_even:
+; CHECK: vspltish [[REG1:[0-9]+]], -16
+; CHECK: vadduhm {{[0-9]+}}, [[REG1]], [[REG1]]
+
+define void @test_v16i8_pos_even(%v16i8* %P, %v16i8* %S) {
+       %p = load %v16i8* %P
+       %r = add %v16i8 %p, < i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16 >
+       store %v16i8 %r, %v16i8* %S
+       ret void
+}
+
+; CHECK: test_v16i8_pos_even:
+; CHECK: vspltisb [[REG1:[0-9]+]], 8
+; CHECK: vaddubm {{[0-9]+}}, [[REG1]], [[REG1]]
+
+define void @test_v16i8_neg_even(%v16i8* %P, %v16i8* %S) {
+       %p = load %v16i8* %P
+       %r = add %v16i8 %p, < i8 -18, i8 -18, i8 -18, i8 -18, i8 -18, i8 -18, i8 -18, i8 -18, i8 -18, i8 -18, i8 -18, i8 -18, i8 -18, i8 -18, i8 -18, i8 -18 >
+       store %v16i8 %r, %v16i8* %S
+       ret void
+}
+
+; CHECK: test_v16i8_neg_even:
+; CHECK: vspltisb [[REG1:[0-9]+]], -9
+; CHECK: vaddubm {{[0-9]+}}, [[REG1]], [[REG1]]
+
+define void @test_v4i32_pos_odd(%v4i32* %P, %v4i32* %S) {
+       %p = load %v4i32* %P
+       %r = add %v4i32 %p, < i32 27, i32 27, i32 27, i32 27 >
+       store %v4i32 %r, %v4i32* %S
+       ret void
+}
+
+; CHECK: test_v4i32_pos_odd:
+; CHECK: vspltisw [[REG2:[0-9]+]], -16
+; CHECK: vspltisw [[REG1:[0-9]+]], 11
+; CHECK: vsubuwm {{[0-9]+}}, [[REG1]], [[REG2]]
+
+define void @test_v4i32_neg_odd(%v4i32* %P, %v4i32* %S) {
+       %p = load %v4i32* %P
+       %r = add %v4i32 %p, < i32 -27, i32 -27, i32 -27, i32 -27 >
+       store %v4i32 %r, %v4i32* %S
+       ret void
+}
+
+; CHECK: test_v4i32_neg_odd:
+; CHECK: vspltisw [[REG2:[0-9]+]], -16
+; CHECK: vspltisw [[REG1:[0-9]+]], -11
+; CHECK: vadduwm {{[0-9]+}}, [[REG1]], [[REG2]]
+
+define void @test_v8i16_pos_odd(%v8i16* %P, %v8i16* %S) {
+       %p = load %v8i16* %P
+       %r = add %v8i16 %p, < i16 31, i16 31, i16 31, i16 31, i16 31, i16 31, i16 31, i16 31 >
+       store %v8i16 %r, %v8i16* %S
+       ret void
+}
+
+; CHECK: test_v8i16_pos_odd:
+; CHECK: vspltish [[REG2:[0-9]+]], -16
+; CHECK: vspltish [[REG1:[0-9]+]], 15
+; CHECK: vsubuhm {{[0-9]+}}, [[REG1]], [[REG2]]
+
+define void @test_v8i16_neg_odd(%v8i16* %P, %v8i16* %S) {
+       %p = load %v8i16* %P
+       %r = add %v8i16 %p, < i16 -31, i16 -31, i16 -31, i16 -31, i16 -31, i16 -31, i16 -31, i16 -31 >
+       store %v8i16 %r, %v8i16* %S
+       ret void
+}
+
+; CHECK: test_v8i16_neg_odd:
+; CHECK: vspltish [[REG2:[0-9]+]], -16
+; CHECK: vspltish [[REG1:[0-9]+]], -15
+; CHECK: vadduhm {{[0-9]+}}, [[REG1]], [[REG2]]
+
+define void @test_v16i8_pos_odd(%v16i8* %P, %v16i8* %S) {
+       %p = load %v16i8* %P
+       %r = add %v16i8 %p, < i8 17, i8 17, i8 17, i8 17, i8 17, i8 17, i8 17, i8 17, i8 17, i8 17, i8 17, i8 17, i8 17, i8 17, i8 17, i8 17 >
+       store %v16i8 %r, %v16i8* %S
+       ret void
+}
+
+; CHECK: test_v16i8_pos_odd:
+; CHECK: vspltisb [[REG2:[0-9]+]], -16
+; CHECK: vspltisb [[REG1:[0-9]+]], 1
+; CHECK: vsububm {{[0-9]+}}, [[REG1]], [[REG2]]
+
+define void @test_v16i8_neg_odd(%v16i8* %P, %v16i8* %S) {
+       %p = load %v16i8* %P
+       %r = add %v16i8 %p, < i8 -17, i8 -17, i8 -17, i8 -17, i8 -17, i8 -17, i8 -17, i8 -17, i8 -17, i8 -17, i8 -17, i8 -17, i8 -17, i8 -17, i8 -17, i8 -17 >
+       store %v16i8 %r, %v16i8* %S
+       ret void
+}
+
+; CHECK: test_v16i8_neg_odd:
+; CHECK: vspltisb [[REG2:[0-9]+]], -16
+; CHECK: vspltisb [[REG1:[0-9]+]], -1
+; CHECK: vaddubm {{[0-9]+}}, [[REG1]], [[REG2]]
+
diff --git a/test/CodeGen/PowerPC/varargs.ll b/test/CodeGen/PowerPC/varargs.ll
index 1769be957ac4..90f0480d6ad2 100644
--- a/test/CodeGen/PowerPC/varargs.ll
+++ b/test/CodeGen/PowerPC/varargs.ll
@@ -8,15 +8,16 @@ define i8* @test1(i8** %foo) nounwind {
 }
 
 ; P32: test1:
-; P32: 	lwz r4, 0(r3)
-; P32:	addi r5, r4, 4
-; P32:	stw r5, 0(r3)
-; P32:	lwz r3, 0(r4)
-; P32:	blr 
+; P32: lwz r2, 0(r3)
+; P32: addi r4, r2, 4
+; P32: stw r4, 0(r3)
+; P32: lwz r3, 0(r2)
+; P32: blr 
 
 ; P64: test1:
-; P64: ld r4, 0(r3)
-; P64: addi r5, r4, 8
-; P64: std r5, 0(r3)
-; P64: ld r3, 0(r4)
-; P64: blr
+; P64: ld r2, 0(r3)
+; P64: addi r4, r2, 8
+; P64: std r4, 0(r3)
+; P64: ld r3, 0(r2)
+; P64: blr 
+
diff --git a/test/CodeGen/PowerPC/vec_cmp.ll b/test/CodeGen/PowerPC/vec_cmp.ll
index 3180f464d125..eb41667610cd 100644
--- a/test/CodeGen/PowerPC/vec_cmp.ll
+++ b/test/CodeGen/PowerPC/vec_cmp.ll
@@ -54,7 +54,7 @@ entry:
 }
 ; CHECK:     v16si8_cmp_ne:
 ; CHECK:     vcmpequb [[RET:[0-9]+]], 2, 3
-; CHECK-NOR: vnor     2, [[RET]], [[RET]]
+; CHECK-NEXT: vnor     2, [[RET]], [[RET]]
 
 define <16 x i8> @v16si8_cmp_le(<16 x i8> %x, <16 x i8> %y) nounwind readnone {
 entry:
diff --git a/test/CodeGen/PowerPC/vec_constants.ll b/test/CodeGen/PowerPC/vec_constants.ll
index 399f19f8d2e2..e4799e50e6ad 100644
--- a/test/CodeGen/PowerPC/vec_constants.ll
+++ b/test/CodeGen/PowerPC/vec_constants.ll
@@ -1,4 +1,7 @@
-; RUN: llc < %s -march=ppc32 -mcpu=g5 | not grep CPI
+; RUN: llc -O0 -mcpu=pwr7 < %s | FileCheck %s
+
+target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-v128:128:128-n32:64"
+target triple = "powerpc64-unknown-linux-gnu"
 
 define void @test1(<4 x i32>* %P1, <4 x i32>* %P2, <4 x float>* %P3) nounwind {
 	%tmp = load <4 x i32>* %P1		; <<4 x i32>> [#uses=1]
@@ -13,32 +16,71 @@ define void @test1(<4 x i32>* %P1, <4 x i32>* %P2, <4 x float>* %P3) nounwind {
 	%tmp13 = bitcast <4 x i32> %tmp12 to <4 x float>		; <<4 x float>> [#uses=1]
 	store <4 x float> %tmp13, <4 x float>* %P3
 	ret void
+
+; CHECK: test1:
+; CHECK-NOT: CPI
 }
 
 define <4 x i32> @test_30() nounwind {
 	ret <4 x i32> < i32 30, i32 30, i32 30, i32 30 >
+
+; CHECK: test_30:
+; CHECK: vspltisw
+; CHECK-NEXT: vadduwm
+; CHECK-NEXT: blr
 }
 
 define <4 x i32> @test_29() nounwind {
 	ret <4 x i32> < i32 29, i32 29, i32 29, i32 29 >
+
+; CHECK: test_29:
+; CHECK: vspltisw
+; CHECK-NEXT: vspltisw
+; CHECK-NEXT: vsubuwm
+; CHECK-NEXT: blr
 }
 
 define <8 x i16> @test_n30() nounwind {
 	ret <8 x i16> < i16 -30, i16 -30, i16 -30, i16 -30, i16 -30, i16 -30, i16 -30, i16 -30 >
+
+; CHECK: test_n30:
+; CHECK: vspltish
+; CHECK-NEXT: vadduhm
+; CHECK-NEXT: blr
 }
 
 define <16 x i8> @test_n104() nounwind {
 	ret <16 x i8> < i8 -104, i8 -104, i8 -104, i8 -104, i8 -104, i8 -104, i8 -104, i8 -104, i8 -104, i8 -104, i8 -104, i8 -104, i8 -104, i8 -104, i8 -104, i8 -104 >
+
+; CHECK: test_n104:
+; CHECK: vspltisb
+; CHECK-NEXT: vslb
+; CHECK-NEXT: blr
 }
 
 define <4 x i32> @test_vsldoi() nounwind {
 	ret <4 x i32> < i32 512, i32 512, i32 512, i32 512 >
+
+; CHECK: test_vsldoi:
+; CHECK: vspltisw
+; CHECK-NEXT: vsldoi
+; CHECK-NEXT: blr
 }
 
 define <8 x i16> @test_vsldoi_65023() nounwind {
 	ret <8 x i16> < i16 65023, i16 65023,i16 65023,i16 65023,i16 65023,i16 65023,i16 65023,i16 65023 >
+
+; CHECK: test_vsldoi_65023:
+; CHECK: vspltish
+; CHECK-NEXT: vsldoi
+; CHECK-NEXT: blr
 }
 
 define <4 x i32> @test_rol() nounwind {
 	ret <4 x i32> < i32 -11534337, i32 -11534337, i32 -11534337, i32 -11534337 >
+
+; CHECK: test_rol:
+; CHECK: vspltisw
+; CHECK-NEXT: vrlw
+; CHECK-NEXT: blr
 }
diff --git a/test/CodeGen/PowerPC/vec_extload.ll b/test/CodeGen/PowerPC/vec_extload.ll
index 201c15b9c735..998645d90da6 100644
--- a/test/CodeGen/PowerPC/vec_extload.ll
+++ b/test/CodeGen/PowerPC/vec_extload.ll
@@ -1,4 +1,4 @@
-; RUN: llc -mcpu=pwr6 -mattr=+altivec < %s | FileCheck %s
+; RUN: llc -mcpu=pwr6 -mattr=+altivec -code-model=small < %s | FileCheck %s
 
 ; Check vector extend load expansion with altivec enabled.
 
@@ -15,55 +15,9 @@ define <16 x i8> @v16si8_sext_in_reg(<16 x i8> %a) {
   ret <16 x i8> %c
 }
 ; CHECK: v16si8_sext_in_reg:
-; CHECK: stvx 2, {{[0-9]+}}, {{[0-9]+}}
-; CHECK: lbz
-; CHECK: stb
-; CHECK: stvx 2, {{[0-9]+}}, {{[0-9]+}}
-; CHECK: lbz
-; CHECK: stb
-; CHECK: stvx 2, {{[0-9]+}}, {{[0-9]+}}
-; CHECK: lbz
-; CHECK: stb
-; CHECK: stvx 2, {{[0-9]+}}, {{[0-9]+}}
-; CHECK: lbz
-; CHECK: stb
-; CHECK: stvx 2, {{[0-9]+}}, {{[0-9]+}}
-; CHECK: lbz
-; CHECK: stb
-; CHECK: stvx 2, {{[0-9]+}}, {{[0-9]+}}
-; CHECK: lbz
-; CHECK: stb
-; CHECK: stvx 2, {{[0-9]+}}, {{[0-9]+}}
-; CHECK: lbz
-; CHECK: stb
-; CHECK: stvx 2, {{[0-9]+}}, {{[0-9]+}}
-; CHECK: lbz
-; CHECK: stb
-; CHECK: stvx 2, {{[0-9]+}}, {{[0-9]+}}
-; CHECK: lbz
-; CHECK: stb
-; CHECK: stvx 2, {{[0-9]+}}, {{[0-9]+}}
-; CHECK: lbz
-; CHECK: stb
-; CHECK: stvx 2, {{[0-9]+}}, {{[0-9]+}}
-; CHECK: lbz
-; CHECK: stb
-; CHECK: stvx 2, {{[0-9]+}}, {{[0-9]+}}
-; CHECK: lbz
-; CHECK: stb
-; CHECK: stvx 2, {{[0-9]+}}, {{[0-9]+}}
-; CHECK: lbz
-; CHECK: stb
-; CHECK: stvx 2, {{[0-9]+}}, {{[0-9]+}}
-; CHECK: lbz
-; CHECK: stb
-; CHECK: stvx 2, {{[0-9]+}}, {{[0-9]+}}
-; CHECK: lbz
-; CHECK: stb
-; CHECK: stvx 2, {{[0-9]+}}, {{[0-9]+}}
-; CHECK: lbz
-; CHECK: stb
-; CHECK: lvx 2, {{[0-9]+}}, {{[0-9]+}}
+; CHECK: vslb
+; CHECK: vsrab
+; CHECK: blr 
 
 ; The zero extend uses a more clever logic: a vector splat
 ; and a logic and to set higher bits to 0.
@@ -83,31 +37,9 @@ define <8 x i16> @v8si16_sext_in_reg(<8 x i16> %a) {
   ret <8 x i16> %c
 }
 ; CHECK: v8si16_sext_in_reg:
-; CHECK: stvx 2, {{[0-9]+}}, {{[0-9]+}}
-; CHECK: lhz
-; CHECK: sth
-; CHECK: stvx 2, {{[0-9]+}}, {{[0-9]+}}
-; CHECK: lhz
-; CHECK: sth
-; CHECK: stvx 2, {{[0-9]+}}, {{[0-9]+}}
-; CHECK: lhz
-; CHECK: sth
-; CHECK: stvx 2, {{[0-9]+}}, {{[0-9]+}}
-; CHECK: lhz
-; CHECK: sth
-; CHECK: stvx 2, {{[0-9]+}}, {{[0-9]+}}
-; CHECK: lhz
-; CHECK: sth
-; CHECK: stvx 2, {{[0-9]+}}, {{[0-9]+}}
-; CHECK: lhz
-; CHECK: sth
-; CHECK: stvx 2, {{[0-9]+}}, {{[0-9]+}}
-; CHECK: lhz
-; CHECK: sth
-; CHECK: stvx 2, {{[0-9]+}}, {{[0-9]+}}
-; CHECK: lhz
-; CHECK: sth
-; CHECK: lvx 2, {{[0-9]+}}, {{[0-9]+}}
+; CHECK: vslh
+; CHECK: vsrah
+; CHECK: blr 
 
 ; Same as v8si16_sext_in_reg, but instead of creating the mask
 ; with a splat, loads it from memory.
@@ -129,19 +61,9 @@ define <4 x i32> @v4si32_sext_in_reg(<4 x i32> %a) {
   ret <4 x i32> %c
 }
 ; CHECK: v4si32_sext_in_reg:
-; CHECK: stvx 2, {{[0-9]+}}, {{[0-9]+}}
-; CHECK: lha
-; CHECK: stw
-; CHECK: stvx 2, {{[0-9]+}}, {{[0-9]+}}
-; CHECK: lha
-; CHECK: stw
-; CHECK: stvx 2, {{[0-9]+}}, {{[0-9]+}}
-; CHECK: lha
-; CHECK: stw
-; CHECK: stvx 2, {{[0-9]+}}, {{[0-9]+}}
-; CHECK: lha
-; CHECK: stw
-; CHECK: lvx 2, {{[0-9]+}}, {{[0-9]+}}
+; CHECK: vslw
+; CHECK: vsraw
+; CHECK: blr 
 
 ; Same as v8si16_sext_in_reg.
 define <4 x i32> @v4si32_zext_in_reg(<4 x i32> %a) {
diff --git a/test/CodeGen/PowerPC/vec_mul.ll b/test/CodeGen/PowerPC/vec_mul.ll
index 80f4de4a1728..53bc75dd1078 100644
--- a/test/CodeGen/PowerPC/vec_mul.ll
+++ b/test/CodeGen/PowerPC/vec_mul.ll
@@ -1,5 +1,4 @@
-; RUN: llc < %s -march=ppc32 -mcpu=g5 | not grep mullw
-; RUN: llc < %s -march=ppc32 -mcpu=g5 | grep vmsumuhm
+; RUN: llc < %s -mtriple=powerpc-unknown-linux-gnu -march=ppc32 -mattr=+altivec | FileCheck %s
 
 define <4 x i32> @test_v4i32(<4 x i32>* %X, <4 x i32>* %Y) {
 	%tmp = load <4 x i32>* %X		; <<4 x i32>> [#uses=1]
@@ -7,6 +6,9 @@ define <4 x i32> @test_v4i32(<4 x i32>* %X, <4 x i32>* %Y) {
 	%tmp3 = mul <4 x i32> %tmp, %tmp2		; <<4 x i32>> [#uses=1]
 	ret <4 x i32> %tmp3
 }
+; CHECK: test_v4i32:
+; CHECK: vmsumuhm
+; CHECK-NOT: mullw
 
 define <8 x i16> @test_v8i16(<8 x i16>* %X, <8 x i16>* %Y) {
 	%tmp = load <8 x i16>* %X		; <<8 x i16>> [#uses=1]
@@ -14,6 +16,9 @@ define <8 x i16> @test_v8i16(<8 x i16>* %X, <8 x i16>* %Y) {
 	%tmp3 = mul <8 x i16> %tmp, %tmp2		; <<8 x i16>> [#uses=1]
 	ret <8 x i16> %tmp3
 }
+; CHECK: test_v8i16:
+; CHECK: vmladduhm
+; CHECK-NOT: mullw
 
 define <16 x i8> @test_v16i8(<16 x i8>* %X, <16 x i8>* %Y) {
 	%tmp = load <16 x i8>* %X		; <<16 x i8>> [#uses=1]
@@ -21,3 +26,21 @@ define <16 x i8> @test_v16i8(<16 x i8>* %X, <16 x i8>* %Y) {
 	%tmp3 = mul <16 x i8> %tmp, %tmp2		; <<16 x i8>> [#uses=1]
 	ret <16 x i8> %tmp3
 }
+; CHECK: test_v16i8:
+; CHECK: vmuloub
+; CHECK: vmuleub
+; CHECK-NOT: mullw
+
+define <4 x float> @test_float(<4 x float>* %X, <4 x float>* %Y) {
+	%tmp = load <4 x float>* %X
+	%tmp2 = load <4 x float>* %Y
+	%tmp3 = fmul <4 x float> %tmp, %tmp2
+	ret <4 x float> %tmp3
+}
+; Check the creation of a negative zero float vector by creating a vector of
+; all bits set and shifting it 31 bits to left, resulting a an vector of 
+; 4 x 0x80000000 (-0.0 as float).
+; CHECK: test_float:
+; CHECK: vspltisw [[ZNEG:[0-9]+]], -1
+; CHECK: vslw     {{[0-9]+}}, [[ZNEG]], [[ZNEG]]
+; CHECK: vmaddfp
diff --git a/test/CodeGen/PowerPC/vec_rounding.ll b/test/CodeGen/PowerPC/vec_rounding.ll
new file mode 100644
index 000000000000..7c55638620a9
--- /dev/null
+++ b/test/CodeGen/PowerPC/vec_rounding.ll
@@ -0,0 +1,172 @@
+; RUN: llc -mcpu=pwr6 -mattr=+altivec < %s | FileCheck %s
+
+; Check vector round to single-precision toward -infinity (vrfim)
+; instruction generation using Altivec.
+
+target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v128:128:128-n32:64"
+target triple = "powerpc64-unknown-linux-gnu"
+
+declare <2 x double> @llvm.floor.v2f64(<2 x double> %p)
+define <2 x double> @floor_v2f64(<2 x double> %p)
+{
+  %t = call <2 x double> @llvm.floor.v2f64(<2 x double> %p)
+  ret <2 x double> %t
+}
+; CHECK: floor_v2f64:
+; CHECK: frim
+; CHECK: frim
+
+declare <4 x double> @llvm.floor.v4f64(<4 x double> %p)
+define <4 x double> @floor_v4f64(<4 x double> %p)
+{
+  %t = call <4 x double> @llvm.floor.v4f64(<4 x double> %p)
+  ret <4 x double> %t
+}
+; CHECK: floor_v4f64:
+; CHECK: frim
+; CHECK: frim
+; CHECK: frim
+; CHECK: frim
+
+declare <2 x double> @llvm.ceil.v2f64(<2 x double> %p)
+define <2 x double> @ceil_v2f64(<2 x double> %p)
+{
+  %t = call <2 x double> @llvm.ceil.v2f64(<2 x double> %p)
+  ret <2 x double> %t
+}
+; CHECK: ceil_v2f64:
+; CHECK: frip
+; CHECK: frip
+
+declare <4 x double> @llvm.ceil.v4f64(<4 x double> %p)
+define <4 x double> @ceil_v4f64(<4 x double> %p)
+{
+  %t = call <4 x double> @llvm.ceil.v4f64(<4 x double> %p)
+  ret <4 x double> %t
+}
+; CHECK: ceil_v4f64:
+; CHECK: frip
+; CHECK: frip
+; CHECK: frip
+; CHECK: frip
+
+declare <2 x double> @llvm.trunc.v2f64(<2 x double> %p)
+define <2 x double> @trunc_v2f64(<2 x double> %p)
+{
+  %t = call <2 x double> @llvm.trunc.v2f64(<2 x double> %p)
+  ret <2 x double> %t
+}
+; CHECK: trunc_v2f64:
+; CHECK: friz
+; CHECK: friz
+
+declare <4 x double> @llvm.trunc.v4f64(<4 x double> %p)
+define <4 x double> @trunc_v4f64(<4 x double> %p)
+{
+  %t = call <4 x double> @llvm.trunc.v4f64(<4 x double> %p)
+  ret <4 x double> %t
+}
+; CHECK: trunc_v4f64:
+; CHECK: friz
+; CHECK: friz
+; CHECK: friz
+; CHECK: friz
+
+declare <2 x double> @llvm.nearbyint.v2f64(<2 x double> %p)
+define <2 x double> @nearbyint_v2f64(<2 x double> %p)
+{
+  %t = call <2 x double> @llvm.nearbyint.v2f64(<2 x double> %p)
+  ret <2 x double> %t
+}
+; CHECK: nearbyint_v2f64:
+; CHECK: bl nearbyint
+; CHECK: bl nearbyint
+
+declare <4 x double> @llvm.nearbyint.v4f64(<4 x double> %p)
+define <4 x double> @nearbyint_v4f64(<4 x double> %p)
+{
+  %t = call <4 x double> @llvm.nearbyint.v4f64(<4 x double> %p)
+  ret <4 x double> %t
+}
+; CHECK: nearbyint_v4f64:
+; CHECK: bl nearbyint
+; CHECK: bl nearbyint
+; CHECK: bl nearbyint
+; CHECK: bl nearbyint
+
+
+declare <4 x float> @llvm.floor.v4f32(<4 x float> %p)
+define <4 x float> @floor_v4f32(<4 x float> %p)
+{
+  %t = call <4 x float> @llvm.floor.v4f32(<4 x float> %p)
+  ret <4 x float> %t
+}
+; CHECK: floor_v4f32:
+; CHECK: vrfim
+
+declare <8 x float> @llvm.floor.v8f32(<8 x float> %p)
+define <8 x float> @floor_v8f32(<8 x float> %p)
+{
+  %t = call <8 x float> @llvm.floor.v8f32(<8 x float> %p)
+  ret <8 x float> %t
+}
+; CHECK: floor_v8f32:
+; CHECK: vrfim
+; CHECK: vrfim
+
+declare <4 x float> @llvm.ceil.v4f32(<4 x float> %p)
+define <4 x float> @ceil_v4f32(<4 x float> %p)
+{
+  %t = call <4 x float> @llvm.ceil.v4f32(<4 x float> %p)
+  ret <4 x float> %t
+}
+; CHECK: ceil_v4f32:
+; CHECK: vrfip
+
+declare <8 x float> @llvm.ceil.v8f32(<8 x float> %p)
+define <8 x float> @ceil_v8f32(<8 x float> %p)
+{
+  %t = call <8 x float> @llvm.ceil.v8f32(<8 x float> %p)
+  ret <8 x float> %t
+}
+; CHECK: ceil_v8f32:
+; CHECK: vrfip
+; CHECK: vrfip
+
+declare <4 x float> @llvm.trunc.v4f32(<4 x float> %p)
+define <4 x float> @trunc_v4f32(<4 x float> %p)
+{
+  %t = call <4 x float> @llvm.trunc.v4f32(<4 x float> %p)
+  ret <4 x float> %t
+}
+; CHECK: trunc_v4f32:
+; CHECK: vrfiz
+
+declare <8 x float> @llvm.trunc.v8f32(<8 x float> %p)
+define <8 x float> @trunc_v8f32(<8 x float> %p)
+{
+  %t = call <8 x float> @llvm.trunc.v8f32(<8 x float> %p)
+  ret <8 x float> %t
+}
+; CHECK: trunc_v8f32:
+; CHECK: vrfiz
+; CHECK: vrfiz
+
+declare <4 x float> @llvm.nearbyint.v4f32(<4 x float> %p)
+define <4 x float> @nearbyint_v4f32(<4 x float> %p)
+{
+  %t = call <4 x float> @llvm.nearbyint.v4f32(<4 x float> %p)
+  ret <4 x float> %t
+}
+; CHECK: nearbyint_v4f32:
+; CHECK: vrfin
+
+declare <8 x float> @llvm.nearbyint.v8f32(<8 x float> %p)
+define <8 x float> @nearbyint_v8f32(<8 x float> %p)
+{
+  %t = call <8 x float> @llvm.nearbyint.v8f32(<8 x float> %p)
+  ret <8 x float> %t
+}
+; CHECK: nearbyint_v8f32:
+; CHECK: vrfin
+; CHECK: vrfin
diff --git a/test/CodeGen/PowerPC/vec_select.ll b/test/CodeGen/PowerPC/vec_select.ll
new file mode 100644
index 000000000000..4ad0acca0067
--- /dev/null
+++ b/test/CodeGen/PowerPC/vec_select.ll
@@ -0,0 +1,7 @@
+; RUN: llc < %s -mtriple=powerpc64-linux-gnu -mattr=+altivec | FileCheck %s
+
+; CHECK: vsel_float
+define <4 x float> @vsel_float(<4 x float> %v1, <4 x float> %v2) {
+  %vsel = select <4 x i1> <i1 true, i1 false, i1 false, i1 false>, <4 x float> %v1, <4 x float> %v2
+  ret <4 x float> %vsel
+}
diff --git a/test/CodeGen/PowerPC/vrsave-spill.ll b/test/CodeGen/PowerPC/vrsave-spill.ll
new file mode 100644
index 000000000000..c73206d8fc86
--- /dev/null
+++ b/test/CodeGen/PowerPC/vrsave-spill.ll
@@ -0,0 +1,19 @@
+; RUN: llc < %s -mtriple=powerpc64-apple-darwin -mcpu=g5 | FileCheck %s
+target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-v128:128:128-n32:64"
+target triple = "powerpc64-apple-darwin"
+
+define <4 x float> @foo(<4 x float> %a, <4 x float> %b) nounwind {
+entry:
+  %c = fadd <4 x float> %a, %b
+  %d = fmul <4 x float> %c, %a
+  call void asm sideeffect "", "~{VRsave}"() nounwind
+  br label %return
+
+; CHECK: @foo
+; CHECK: mfspr r{{[0-9]+}}, 256
+; CHECK: mtspr 256, r{{[0-9]+}}
+
+return:                                           ; preds = %entry
+  ret <4 x float> %d
+}
+
diff --git a/test/CodeGen/PowerPC/vrspill.ll b/test/CodeGen/PowerPC/vrspill.ll
index 7641017c434e..9fb3d03477c9 100644
--- a/test/CodeGen/PowerPC/vrspill.ll
+++ b/test/CodeGen/PowerPC/vrspill.ll
@@ -13,7 +13,7 @@ entry:
   ret void
 }
 
-; CHECK: stvx 2, 0, 0
-; CHECK: lvx 2, 0, 0
+; CHECK: stvx 2, 1,
+; CHECK: lvx 2, 1,
 
 declare void @foo(i32*)
diff --git a/test/CodeGen/R600/128bit-kernel-args.ll b/test/CodeGen/R600/128bit-kernel-args.ll
new file mode 100644
index 000000000000..114f9e74474f
--- /dev/null
+++ b/test/CodeGen/R600/128bit-kernel-args.ll
@@ -0,0 +1,18 @@
+;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s
+
+; CHECK: @v4i32_kernel_arg
+; CHECK: VTX_READ_128 T{{[0-9]+}}.XYZW, T{{[0-9]+}}.X, 40
+
+define void @v4i32_kernel_arg(<4 x i32> addrspace(1)* %out, <4 x i32>  %in) {
+entry:
+  store <4 x i32> %in, <4 x i32> addrspace(1)* %out
+  ret void
+}
+
+; CHECK: @v4f32_kernel_arg
+; CHECK: VTX_READ_128 T{{[0-9]+}}.XYZW, T{{[0-9]+}}.X, 40
+define void @v4f32_kernel_args(<4 x float> addrspace(1)* %out, <4 x float>  %in) {
+entry:
+  store <4 x float> %in, <4 x float> addrspace(1)* %out
+  ret void
+}
diff --git a/test/CodeGen/R600/add.v4i32.ll b/test/CodeGen/R600/add.v4i32.ll
new file mode 100644
index 000000000000..ac4a87417bde
--- /dev/null
+++ b/test/CodeGen/R600/add.v4i32.ll
@@ -0,0 +1,15 @@
+;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s
+
+;CHECK: ADD_INT T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
+;CHECK: ADD_INT T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
+;CHECK: ADD_INT T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
+;CHECK: ADD_INT T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
+
+define void @test(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %in) {
+  %b_ptr = getelementptr <4 x i32> addrspace(1)* %in, i32 1
+  %a = load <4 x i32> addrspace(1) * %in
+  %b = load <4 x i32> addrspace(1) * %b_ptr
+  %result = add <4 x i32> %a, %b
+  store <4 x i32> %result, <4 x i32> addrspace(1)* %out
+  ret void
+}
diff --git a/test/CodeGen/R600/alu-split.ll b/test/CodeGen/R600/alu-split.ll
new file mode 100644
index 000000000000..afefcd9f78b0
--- /dev/null
+++ b/test/CodeGen/R600/alu-split.ll
@@ -0,0 +1,850 @@
+;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s
+
+;CHECK: ALU
+;CHECK: ALU
+;CHECK: ALU
+;CHECK-NOT: ALU
+
+define void @main() #0 {
+main_body:
+  %0 = call float @llvm.R600.load.input(i32 4)
+  %1 = call float @llvm.R600.load.input(i32 5)
+  %2 = call float @llvm.R600.load.input(i32 6)
+  %3 = call float @llvm.R600.load.input(i32 7)
+  %4 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 16)
+  %5 = extractelement <4 x float> %4, i32 0
+  %6 = fcmp une float 0x4016F2B020000000, %5
+  %7 = select i1 %6, float 1.000000e+00, float 0.000000e+00
+  %8 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 16)
+  %9 = extractelement <4 x float> %8, i32 1
+  %10 = fcmp une float 0x401FDCC640000000, %9
+  %11 = select i1 %10, float 1.000000e+00, float 0.000000e+00
+  %12 = fsub float -0.000000e+00, %7
+  %13 = fptosi float %12 to i32
+  %14 = fsub float -0.000000e+00, %11
+  %15 = fptosi float %14 to i32
+  %16 = bitcast i32 %13 to float
+  %17 = bitcast i32 %15 to float
+  %18 = bitcast float %16 to i32
+  %19 = bitcast float %17 to i32
+  %20 = or i32 %18, %19
+  %21 = bitcast i32 %20 to float
+  %22 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 17)
+  %23 = extractelement <4 x float> %22, i32 0
+  %24 = fcmp une float 0xC00574BC60000000, %23
+  %25 = select i1 %24, float 1.000000e+00, float 0.000000e+00
+  %26 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 17)
+  %27 = extractelement <4 x float> %26, i32 1
+  %28 = fcmp une float 0x40210068E0000000, %27
+  %29 = select i1 %28, float 1.000000e+00, float 0.000000e+00
+  %30 = fsub float -0.000000e+00, %25
+  %31 = fptosi float %30 to i32
+  %32 = fsub float -0.000000e+00, %29
+  %33 = fptosi float %32 to i32
+  %34 = bitcast i32 %31 to float
+  %35 = bitcast i32 %33 to float
+  %36 = bitcast float %34 to i32
+  %37 = bitcast float %35 to i32
+  %38 = or i32 %36, %37
+  %39 = bitcast i32 %38 to float
+  %40 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 18)
+  %41 = extractelement <4 x float> %40, i32 0
+  %42 = fcmp une float 0xBFC9A6B500000000, %41
+  %43 = select i1 %42, float 1.000000e+00, float 0.000000e+00
+  %44 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 18)
+  %45 = extractelement <4 x float> %44, i32 1
+  %46 = fcmp une float 0xC0119BDA60000000, %45
+  %47 = select i1 %46, float 1.000000e+00, float 0.000000e+00
+  %48 = fsub float -0.000000e+00, %43
+  %49 = fptosi float %48 to i32
+  %50 = fsub float -0.000000e+00, %47
+  %51 = fptosi float %50 to i32
+  %52 = bitcast i32 %49 to float
+  %53 = bitcast i32 %51 to float
+  %54 = bitcast float %52 to i32
+  %55 = bitcast float %53 to i32
+  %56 = or i32 %54, %55
+  %57 = bitcast i32 %56 to float
+  %58 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 19)
+  %59 = extractelement <4 x float> %58, i32 0
+  %60 = fcmp une float 0xC02085D640000000, %59
+  %61 = select i1 %60, float 1.000000e+00, float 0.000000e+00
+  %62 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 19)
+  %63 = extractelement <4 x float> %62, i32 1
+  %64 = fcmp une float 0xBFD7C1BDA0000000, %63
+  %65 = select i1 %64, float 1.000000e+00, float 0.000000e+00
+  %66 = fsub float -0.000000e+00, %61
+  %67 = fptosi float %66 to i32
+  %68 = fsub float -0.000000e+00, %65
+  %69 = fptosi float %68 to i32
+  %70 = bitcast i32 %67 to float
+  %71 = bitcast i32 %69 to float
+  %72 = bitcast float %70 to i32
+  %73 = bitcast float %71 to i32
+  %74 = or i32 %72, %73
+  %75 = bitcast i32 %74 to float
+  %76 = insertelement <4 x float> undef, float %21, i32 0
+  %77 = insertelement <4 x float> %76, float %39, i32 1
+  %78 = insertelement <4 x float> %77, float %57, i32 2
+  %79 = insertelement <4 x float> %78, float %75, i32 3
+  %80 = insertelement <4 x float> undef, float %21, i32 0
+  %81 = insertelement <4 x float> %80, float %39, i32 1
+  %82 = insertelement <4 x float> %81, float %57, i32 2
+  %83 = insertelement <4 x float> %82, float %75, i32 3
+  %84 = call float @llvm.AMDGPU.dp4(<4 x float> %79, <4 x float> %83)
+  %85 = bitcast float %84 to i32
+  %86 = icmp ne i32 %85, 0
+  %87 = sext i1 %86 to i32
+  %88 = bitcast i32 %87 to float
+  %89 = bitcast float %88 to i32
+  %90 = xor i32 %89, -1
+  %91 = bitcast i32 %90 to float
+  %92 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 20)
+  %93 = extractelement <4 x float> %92, i32 0
+  %94 = fcmp une float 0x401FDCC640000000, %93
+  %95 = select i1 %94, float 1.000000e+00, float 0.000000e+00
+  %96 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 20)
+  %97 = extractelement <4 x float> %96, i32 1
+  %98 = fcmp une float 0xC00574BC60000000, %97
+  %99 = select i1 %98, float 1.000000e+00, float 0.000000e+00
+  %100 = fsub float -0.000000e+00, %95
+  %101 = fptosi float %100 to i32
+  %102 = fsub float -0.000000e+00, %99
+  %103 = fptosi float %102 to i32
+  %104 = bitcast i32 %101 to float
+  %105 = bitcast i32 %103 to float
+  %106 = bitcast float %104 to i32
+  %107 = bitcast float %105 to i32
+  %108 = or i32 %106, %107
+  %109 = bitcast i32 %108 to float
+  %110 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 21)
+  %111 = extractelement <4 x float> %110, i32 0
+  %112 = fcmp une float 0x40210068E0000000, %111
+  %113 = select i1 %112, float 1.000000e+00, float 0.000000e+00
+  %114 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 21)
+  %115 = extractelement <4 x float> %114, i32 1
+  %116 = fcmp une float 0xBFC9A6B500000000, %115
+  %117 = select i1 %116, float 1.000000e+00, float 0.000000e+00
+  %118 = fsub float -0.000000e+00, %113
+  %119 = fptosi float %118 to i32
+  %120 = fsub float -0.000000e+00, %117
+  %121 = fptosi float %120 to i32
+  %122 = bitcast i32 %119 to float
+  %123 = bitcast i32 %121 to float
+  %124 = bitcast float %122 to i32
+  %125 = bitcast float %123 to i32
+  %126 = or i32 %124, %125
+  %127 = bitcast i32 %126 to float
+  %128 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 22)
+  %129 = extractelement <4 x float> %128, i32 0
+  %130 = fcmp une float 0xC0119BDA60000000, %129
+  %131 = select i1 %130, float 1.000000e+00, float 0.000000e+00
+  %132 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 22)
+  %133 = extractelement <4 x float> %132, i32 1
+  %134 = fcmp une float 0xC02085D640000000, %133
+  %135 = select i1 %134, float 1.000000e+00, float 0.000000e+00
+  %136 = fsub float -0.000000e+00, %131
+  %137 = fptosi float %136 to i32
+  %138 = fsub float -0.000000e+00, %135
+  %139 = fptosi float %138 to i32
+  %140 = bitcast i32 %137 to float
+  %141 = bitcast i32 %139 to float
+  %142 = bitcast float %140 to i32
+  %143 = bitcast float %141 to i32
+  %144 = or i32 %142, %143
+  %145 = bitcast i32 %144 to float
+  %146 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 23)
+  %147 = extractelement <4 x float> %146, i32 0
+  %148 = fcmp une float 0xBFD7C1BDA0000000, %147
+  %149 = select i1 %148, float 1.000000e+00, float 0.000000e+00
+  %150 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 23)
+  %151 = extractelement <4 x float> %150, i32 1
+  %152 = fcmp une float 0x401E1D7DC0000000, %151
+  %153 = select i1 %152, float 1.000000e+00, float 0.000000e+00
+  %154 = fsub float -0.000000e+00, %149
+  %155 = fptosi float %154 to i32
+  %156 = fsub float -0.000000e+00, %153
+  %157 = fptosi float %156 to i32
+  %158 = bitcast i32 %155 to float
+  %159 = bitcast i32 %157 to float
+  %160 = bitcast float %158 to i32
+  %161 = bitcast float %159 to i32
+  %162 = or i32 %160, %161
+  %163 = bitcast i32 %162 to float
+  %164 = insertelement <4 x float> undef, float %109, i32 0
+  %165 = insertelement <4 x float> %164, float %127, i32 1
+  %166 = insertelement <4 x float> %165, float %145, i32 2
+  %167 = insertelement <4 x float> %166, float %163, i32 3
+  %168 = insertelement <4 x float> undef, float %109, i32 0
+  %169 = insertelement <4 x float> %168, float %127, i32 1
+  %170 = insertelement <4 x float> %169, float %145, i32 2
+  %171 = insertelement <4 x float> %170, float %163, i32 3
+  %172 = call float @llvm.AMDGPU.dp4(<4 x float> %167, <4 x float> %171)
+  %173 = bitcast float %172 to i32
+  %174 = icmp ne i32 %173, 0
+  %175 = sext i1 %174 to i32
+  %176 = bitcast i32 %175 to float
+  %177 = bitcast float %176 to i32
+  %178 = xor i32 %177, -1
+  %179 = bitcast i32 %178 to float
+  %180 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 8)
+  %181 = extractelement <4 x float> %180, i32 0
+  %182 = fcmp une float 0x401FDCC640000000, %181
+  %183 = select i1 %182, float 1.000000e+00, float 0.000000e+00
+  %184 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 8)
+  %185 = extractelement <4 x float> %184, i32 1
+  %186 = fcmp une float 0xC00574BC60000000, %185
+  %187 = select i1 %186, float 1.000000e+00, float 0.000000e+00
+  %188 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 8)
+  %189 = extractelement <4 x float> %188, i32 2
+  %190 = fcmp une float 0x40210068E0000000, %189
+  %191 = select i1 %190, float 1.000000e+00, float 0.000000e+00
+  %192 = fsub float -0.000000e+00, %183
+  %193 = fptosi float %192 to i32
+  %194 = fsub float -0.000000e+00, %187
+  %195 = fptosi float %194 to i32
+  %196 = fsub float -0.000000e+00, %191
+  %197 = fptosi float %196 to i32
+  %198 = bitcast i32 %193 to float
+  %199 = bitcast i32 %195 to float
+  %200 = bitcast i32 %197 to float
+  %201 = bitcast float %199 to i32
+  %202 = bitcast float %200 to i32
+  %203 = or i32 %201, %202
+  %204 = bitcast i32 %203 to float
+  %205 = bitcast float %198 to i32
+  %206 = bitcast float %204 to i32
+  %207 = or i32 %205, %206
+  %208 = bitcast i32 %207 to float
+  %209 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 9)
+  %210 = extractelement <4 x float> %209, i32 0
+  %211 = fcmp une float 0xBFC9A6B500000000, %210
+  %212 = select i1 %211, float 1.000000e+00, float 0.000000e+00
+  %213 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 9)
+  %214 = extractelement <4 x float> %213, i32 1
+  %215 = fcmp une float 0xC0119BDA60000000, %214
+  %216 = select i1 %215, float 1.000000e+00, float 0.000000e+00
+  %217 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 9)
+  %218 = extractelement <4 x float> %217, i32 2
+  %219 = fcmp une float 0xC02085D640000000, %218
+  %220 = select i1 %219, float 1.000000e+00, float 0.000000e+00
+  %221 = fsub float -0.000000e+00, %212
+  %222 = fptosi float %221 to i32
+  %223 = fsub float -0.000000e+00, %216
+  %224 = fptosi float %223 to i32
+  %225 = fsub float -0.000000e+00, %220
+  %226 = fptosi float %225 to i32
+  %227 = bitcast i32 %222 to float
+  %228 = bitcast i32 %224 to float
+  %229 = bitcast i32 %226 to float
+  %230 = bitcast float %228 to i32
+  %231 = bitcast float %229 to i32
+  %232 = or i32 %230, %231
+  %233 = bitcast i32 %232 to float
+  %234 = bitcast float %227 to i32
+  %235 = bitcast float %233 to i32
+  %236 = or i32 %234, %235
+  %237 = bitcast i32 %236 to float
+  %238 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 10)
+  %239 = extractelement <4 x float> %238, i32 0
+  %240 = fcmp une float 0xBFD7C1BDA0000000, %239
+  %241 = select i1 %240, float 1.000000e+00, float 0.000000e+00
+  %242 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 10)
+  %243 = extractelement <4 x float> %242, i32 1
+  %244 = fcmp une float 0x401E1D7DC0000000, %243
+  %245 = select i1 %244, float 1.000000e+00, float 0.000000e+00
+  %246 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 10)
+  %247 = extractelement <4 x float> %246, i32 2
+  %248 = fcmp une float 0xC019893740000000, %247
+  %249 = select i1 %248, float 1.000000e+00, float 0.000000e+00
+  %250 = fsub float -0.000000e+00, %241
+  %251 = fptosi float %250 to i32
+  %252 = fsub float -0.000000e+00, %245
+  %253 = fptosi float %252 to i32
+  %254 = fsub float -0.000000e+00, %249
+  %255 = fptosi float %254 to i32
+  %256 = bitcast i32 %251 to float
+  %257 = bitcast i32 %253 to float
+  %258 = bitcast i32 %255 to float
+  %259 = bitcast float %257 to i32
+  %260 = bitcast float %258 to i32
+  %261 = or i32 %259, %260
+  %262 = bitcast i32 %261 to float
+  %263 = bitcast float %256 to i32
+  %264 = bitcast float %262 to i32
+  %265 = or i32 %263, %264
+  %266 = bitcast i32 %265 to float
+  %267 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 11)
+  %268 = extractelement <4 x float> %267, i32 0
+  %269 = fcmp une float 0x40220F0D80000000, %268
+  %270 = select i1 %269, float 1.000000e+00, float 0.000000e+00
+  %271 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 11)
+  %272 = extractelement <4 x float> %271, i32 1
+  %273 = fcmp une float 0xC018E2EB20000000, %272
+  %274 = select i1 %273, float 1.000000e+00, float 0.000000e+00
+  %275 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 11)
+  %276 = extractelement <4 x float> %275, i32 2
+  %277 = fcmp une float 0xBFEA8DB8C0000000, %276
+  %278 = select i1 %277, float 1.000000e+00, float 0.000000e+00
+  %279 = fsub float -0.000000e+00, %270
+  %280 = fptosi float %279 to i32
+  %281 = fsub float -0.000000e+00, %274
+  %282 = fptosi float %281 to i32
+  %283 = fsub float -0.000000e+00, %278
+  %284 = fptosi float %283 to i32
+  %285 = bitcast i32 %280 to float
+  %286 = bitcast i32 %282 to float
+  %287 = bitcast i32 %284 to float
+  %288 = bitcast float %286 to i32
+  %289 = bitcast float %287 to i32
+  %290 = or i32 %288, %289
+  %291 = bitcast i32 %290 to float
+  %292 = bitcast float %285 to i32
+  %293 = bitcast float %291 to i32
+  %294 = or i32 %292, %293
+  %295 = bitcast i32 %294 to float
+  %296 = insertelement <4 x float> undef, float %208, i32 0
+  %297 = insertelement <4 x float> %296, float %237, i32 1
+  %298 = insertelement <4 x float> %297, float %266, i32 2
+  %299 = insertelement <4 x float> %298, float %295, i32 3
+  %300 = insertelement <4 x float> undef, float %208, i32 0
+  %301 = insertelement <4 x float> %300, float %237, i32 1
+  %302 = insertelement <4 x float> %301, float %266, i32 2
+  %303 = insertelement <4 x float> %302, float %295, i32 3
+  %304 = call float @llvm.AMDGPU.dp4(<4 x float> %299, <4 x float> %303)
+  %305 = bitcast float %304 to i32
+  %306 = icmp ne i32 %305, 0
+  %307 = sext i1 %306 to i32
+  %308 = bitcast i32 %307 to float
+  %309 = bitcast float %308 to i32
+  %310 = xor i32 %309, -1
+  %311 = bitcast i32 %310 to float
+  %312 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 12)
+  %313 = extractelement <4 x float> %312, i32 0
+  %314 = fcmp une float 0xC00574BC60000000, %313
+  %315 = select i1 %314, float 1.000000e+00, float 0.000000e+00
+  %316 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 12)
+  %317 = extractelement <4 x float> %316, i32 1
+  %318 = fcmp une float 0x40210068E0000000, %317
+  %319 = select i1 %318, float 1.000000e+00, float 0.000000e+00
+  %320 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 12)
+  %321 = extractelement <4 x float> %320, i32 2
+  %322 = fcmp une float 0xBFC9A6B500000000, %321
+  %323 = select i1 %322, float 1.000000e+00, float 0.000000e+00
+  %324 = fsub float -0.000000e+00, %315
+  %325 = fptosi float %324 to i32
+  %326 = fsub float -0.000000e+00, %319
+  %327 = fptosi float %326 to i32
+  %328 = fsub float -0.000000e+00, %323
+  %329 = fptosi float %328 to i32
+  %330 = bitcast i32 %325 to float
+  %331 = bitcast i32 %327 to float
+  %332 = bitcast i32 %329 to float
+  %333 = bitcast float %331 to i32
+  %334 = bitcast float %332 to i32
+  %335 = or i32 %333, %334
+  %336 = bitcast i32 %335 to float
+  %337 = bitcast float %330 to i32
+  %338 = bitcast float %336 to i32
+  %339 = or i32 %337, %338
+  %340 = bitcast i32 %339 to float
+  %341 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 13)
+  %342 = extractelement <4 x float> %341, i32 0
+  %343 = fcmp une float 0xC0119BDA60000000, %342
+  %344 = select i1 %343, float 1.000000e+00, float 0.000000e+00
+  %345 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 13)
+  %346 = extractelement <4 x float> %345, i32 1
+  %347 = fcmp une float 0xC02085D640000000, %346
+  %348 = select i1 %347, float 1.000000e+00, float 0.000000e+00
+  %349 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 13)
+  %350 = extractelement <4 x float> %349, i32 2
+  %351 = fcmp une float 0xBFD7C1BDA0000000, %350
+  %352 = select i1 %351, float 1.000000e+00, float 0.000000e+00
+  %353 = fsub float -0.000000e+00, %344
+  %354 = fptosi float %353 to i32
+  %355 = fsub float -0.000000e+00, %348
+  %356 = fptosi float %355 to i32
+  %357 = fsub float -0.000000e+00, %352
+  %358 = fptosi float %357 to i32
+  %359 = bitcast i32 %354 to float
+  %360 = bitcast i32 %356 to float
+  %361 = bitcast i32 %358 to float
+  %362 = bitcast float %360 to i32
+  %363 = bitcast float %361 to i32
+  %364 = or i32 %362, %363
+  %365 = bitcast i32 %364 to float
+  %366 = bitcast float %359 to i32
+  %367 = bitcast float %365 to i32
+  %368 = or i32 %366, %367
+  %369 = bitcast i32 %368 to float
+  %370 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 14)
+  %371 = extractelement <4 x float> %370, i32 0
+  %372 = fcmp une float 0x401E1D7DC0000000, %371
+  %373 = select i1 %372, float 1.000000e+00, float 0.000000e+00
+  %374 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 14)
+  %375 = extractelement <4 x float> %374, i32 1
+  %376 = fcmp une float 0xC019893740000000, %375
+  %377 = select i1 %376, float 1.000000e+00, float 0.000000e+00
+  %378 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 14)
+  %379 = extractelement <4 x float> %378, i32 2
+  %380 = fcmp une float 0x40220F0D80000000, %379
+  %381 = select i1 %380, float 1.000000e+00, float 0.000000e+00
+  %382 = fsub float -0.000000e+00, %373
+  %383 = fptosi float %382 to i32
+  %384 = fsub float -0.000000e+00, %377
+  %385 = fptosi float %384 to i32
+  %386 = fsub float -0.000000e+00, %381
+  %387 = fptosi float %386 to i32
+  %388 = bitcast i32 %383 to float
+  %389 = bitcast i32 %385 to float
+  %390 = bitcast i32 %387 to float
+  %391 = bitcast float %389 to i32
+  %392 = bitcast float %390 to i32
+  %393 = or i32 %391, %392
+  %394 = bitcast i32 %393 to float
+  %395 = bitcast float %388 to i32
+  %396 = bitcast float %394 to i32
+  %397 = or i32 %395, %396
+  %398 = bitcast i32 %397 to float
+  %399 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 15)
+  %400 = extractelement <4 x float> %399, i32 0
+  %401 = fcmp une float 0xC018E2EB20000000, %400
+  %402 = select i1 %401, float 1.000000e+00, float 0.000000e+00
+  %403 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 15)
+  %404 = extractelement <4 x float> %403, i32 1
+  %405 = fcmp une float 0xBFEA8DB8C0000000, %404
+  %406 = select i1 %405, float 1.000000e+00, float 0.000000e+00
+  %407 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 15)
+  %408 = extractelement <4 x float> %407, i32 2
+  %409 = fcmp une float 0x4015236E20000000, %408
+  %410 = select i1 %409, float 1.000000e+00, float 0.000000e+00
+  %411 = fsub float -0.000000e+00, %402
+  %412 = fptosi float %411 to i32
+  %413 = fsub float -0.000000e+00, %406
+  %414 = fptosi float %413 to i32
+  %415 = fsub float -0.000000e+00, %410
+  %416 = fptosi float %415 to i32
+  %417 = bitcast i32 %412 to float
+  %418 = bitcast i32 %414 to float
+  %419 = bitcast i32 %416 to float
+  %420 = bitcast float %418 to i32
+  %421 = bitcast float %419 to i32
+  %422 = or i32 %420, %421
+  %423 = bitcast i32 %422 to float
+  %424 = bitcast float %417 to i32
+  %425 = bitcast float %423 to i32
+  %426 = or i32 %424, %425
+  %427 = bitcast i32 %426 to float
+  %428 = insertelement <4 x float> undef, float %340, i32 0
+  %429 = insertelement <4 x float> %428, float %369, i32 1
+  %430 = insertelement <4 x float> %429, float %398, i32 2
+  %431 = insertelement <4 x float> %430, float %427, i32 3
+  %432 = insertelement <4 x float> undef, float %340, i32 0
+  %433 = insertelement <4 x float> %432, float %369, i32 1
+  %434 = insertelement <4 x float> %433, float %398, i32 2
+  %435 = insertelement <4 x float> %434, float %427, i32 3
+  %436 = call float @llvm.AMDGPU.dp4(<4 x float> %431, <4 x float> %435)
+  %437 = bitcast float %436 to i32
+  %438 = icmp ne i32 %437, 0
+  %439 = sext i1 %438 to i32
+  %440 = bitcast i32 %439 to float
+  %441 = bitcast float %440 to i32
+  %442 = xor i32 %441, -1
+  %443 = bitcast i32 %442 to float
+  %444 = load <4 x float> addrspace(8)* null
+  %445 = extractelement <4 x float> %444, i32 0
+  %446 = fcmp une float 0xC00574BC60000000, %445
+  %447 = select i1 %446, float 1.000000e+00, float 0.000000e+00
+  %448 = load <4 x float> addrspace(8)* null
+  %449 = extractelement <4 x float> %448, i32 1
+  %450 = fcmp une float 0x40210068E0000000, %449
+  %451 = select i1 %450, float 1.000000e+00, float 0.000000e+00
+  %452 = load <4 x float> addrspace(8)* null
+  %453 = extractelement <4 x float> %452, i32 2
+  %454 = fcmp une float 0xBFC9A6B500000000, %453
+  %455 = select i1 %454, float 1.000000e+00, float 0.000000e+00
+  %456 = load <4 x float> addrspace(8)* null
+  %457 = extractelement <4 x float> %456, i32 3
+  %458 = fcmp une float 0xC0119BDA60000000, %457
+  %459 = select i1 %458, float 1.000000e+00, float 0.000000e+00
+  %460 = fsub float -0.000000e+00, %447
+  %461 = fptosi float %460 to i32
+  %462 = fsub float -0.000000e+00, %451
+  %463 = fptosi float %462 to i32
+  %464 = fsub float -0.000000e+00, %455
+  %465 = fptosi float %464 to i32
+  %466 = fsub float -0.000000e+00, %459
+  %467 = fptosi float %466 to i32
+  %468 = bitcast i32 %461 to float
+  %469 = bitcast i32 %463 to float
+  %470 = bitcast i32 %465 to float
+  %471 = bitcast i32 %467 to float
+  %472 = bitcast float %468 to i32
+  %473 = bitcast float %469 to i32
+  %474 = or i32 %472, %473
+  %475 = bitcast i32 %474 to float
+  %476 = bitcast float %470 to i32
+  %477 = bitcast float %471 to i32
+  %478 = or i32 %476, %477
+  %479 = bitcast i32 %478 to float
+  %480 = bitcast float %475 to i32
+  %481 = bitcast float %479 to i32
+  %482 = or i32 %480, %481
+  %483 = bitcast i32 %482 to float
+  %484 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 1)
+  %485 = extractelement <4 x float> %484, i32 0
+  %486 = fcmp une float 0xC02085D640000000, %485
+  %487 = select i1 %486, float 1.000000e+00, float 0.000000e+00
+  %488 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 1)
+  %489 = extractelement <4 x float> %488, i32 1
+  %490 = fcmp une float 0xBFD7C1BDA0000000, %489
+  %491 = select i1 %490, float 1.000000e+00, float 0.000000e+00
+  %492 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 1)
+  %493 = extractelement <4 x float> %492, i32 2
+  %494 = fcmp une float 0x401E1D7DC0000000, %493
+  %495 = select i1 %494, float 1.000000e+00, float 0.000000e+00
+  %496 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 1)
+  %497 = extractelement <4 x float> %496, i32 3
+  %498 = fcmp une float 0xC019893740000000, %497
+  %499 = select i1 %498, float 1.000000e+00, float 0.000000e+00
+  %500 = fsub float -0.000000e+00, %487
+  %501 = fptosi float %500 to i32
+  %502 = fsub float -0.000000e+00, %491
+  %503 = fptosi float %502 to i32
+  %504 = fsub float -0.000000e+00, %495
+  %505 = fptosi float %504 to i32
+  %506 = fsub float -0.000000e+00, %499
+  %507 = fptosi float %506 to i32
+  %508 = bitcast i32 %501 to float
+  %509 = bitcast i32 %503 to float
+  %510 = bitcast i32 %505 to float
+  %511 = bitcast i32 %507 to float
+  %512 = bitcast float %508 to i32
+  %513 = bitcast float %509 to i32
+  %514 = or i32 %512, %513
+  %515 = bitcast i32 %514 to float
+  %516 = bitcast float %510 to i32
+  %517 = bitcast float %511 to i32
+  %518 = or i32 %516, %517
+  %519 = bitcast i32 %518 to float
+  %520 = bitcast float %515 to i32
+  %521 = bitcast float %519 to i32
+  %522 = or i32 %520, %521
+  %523 = bitcast i32 %522 to float
+  %524 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 2)
+  %525 = extractelement <4 x float> %524, i32 0
+  %526 = fcmp une float 0x40220F0D80000000, %525
+  %527 = select i1 %526, float 1.000000e+00, float 0.000000e+00
+  %528 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 2)
+  %529 = extractelement <4 x float> %528, i32 1
+  %530 = fcmp une float 0xC018E2EB20000000, %529
+  %531 = select i1 %530, float 1.000000e+00, float 0.000000e+00
+  %532 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 2)
+  %533 = extractelement <4 x float> %532, i32 2
+  %534 = fcmp une float 0xBFEA8DB8C0000000, %533
+  %535 = select i1 %534, float 1.000000e+00, float 0.000000e+00
+  %536 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 2)
+  %537 = extractelement <4 x float> %536, i32 3
+  %538 = fcmp une float 0x4015236E20000000, %537
+  %539 = select i1 %538, float 1.000000e+00, float 0.000000e+00
+  %540 = fsub float -0.000000e+00, %527
+  %541 = fptosi float %540 to i32
+  %542 = fsub float -0.000000e+00, %531
+  %543 = fptosi float %542 to i32
+  %544 = fsub float -0.000000e+00, %535
+  %545 = fptosi float %544 to i32
+  %546 = fsub float -0.000000e+00, %539
+  %547 = fptosi float %546 to i32
+  %548 = bitcast i32 %541 to float
+  %549 = bitcast i32 %543 to float
+  %550 = bitcast i32 %545 to float
+  %551 = bitcast i32 %547 to float
+  %552 = bitcast float %548 to i32
+  %553 = bitcast float %549 to i32
+  %554 = or i32 %552, %553
+  %555 = bitcast i32 %554 to float
+  %556 = bitcast float %550 to i32
+  %557 = bitcast float %551 to i32
+  %558 = or i32 %556, %557
+  %559 = bitcast i32 %558 to float
+  %560 = bitcast float %555 to i32
+  %561 = bitcast float %559 to i32
+  %562 = or i32 %560, %561
+  %563 = bitcast i32 %562 to float
+  %564 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 3)
+  %565 = extractelement <4 x float> %564, i32 0
+  %566 = fcmp une float 0x4016ED5D00000000, %565
+  %567 = select i1 %566, float 1.000000e+00, float 0.000000e+00
+  %568 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 3)
+  %569 = extractelement <4 x float> %568, i32 1
+  %570 = fcmp une float 0x402332FEC0000000, %569
+  %571 = select i1 %570, float 1.000000e+00, float 0.000000e+00
+  %572 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 3)
+  %573 = extractelement <4 x float> %572, i32 2
+  %574 = fcmp une float 0xC01484B5E0000000, %573
+  %575 = select i1 %574, float 1.000000e+00, float 0.000000e+00
+  %576 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 3)
+  %577 = extractelement <4 x float> %576, i32 3
+  %578 = fcmp une float 0x400179A6C0000000, %577
+  %579 = select i1 %578, float 1.000000e+00, float 0.000000e+00
+  %580 = fsub float -0.000000e+00, %567
+  %581 = fptosi float %580 to i32
+  %582 = fsub float -0.000000e+00, %571
+  %583 = fptosi float %582 to i32
+  %584 = fsub float -0.000000e+00, %575
+  %585 = fptosi float %584 to i32
+  %586 = fsub float -0.000000e+00, %579
+  %587 = fptosi float %586 to i32
+  %588 = bitcast i32 %581 to float
+  %589 = bitcast i32 %583 to float
+  %590 = bitcast i32 %585 to float
+  %591 = bitcast i32 %587 to float
+  %592 = bitcast float %588 to i32
+  %593 = bitcast float %589 to i32
+  %594 = or i32 %592, %593
+  %595 = bitcast i32 %594 to float
+  %596 = bitcast float %590 to i32
+  %597 = bitcast float %591 to i32
+  %598 = or i32 %596, %597
+  %599 = bitcast i32 %598 to float
+  %600 = bitcast float %595 to i32
+  %601 = bitcast float %599 to i32
+  %602 = or i32 %600, %601
+  %603 = bitcast i32 %602 to float
+  %604 = insertelement <4 x float> undef, float %483, i32 0
+  %605 = insertelement <4 x float> %604, float %523, i32 1
+  %606 = insertelement <4 x float> %605, float %563, i32 2
+  %607 = insertelement <4 x float> %606, float %603, i32 3
+  %608 = insertelement <4 x float> undef, float %483, i32 0
+  %609 = insertelement <4 x float> %608, float %523, i32 1
+  %610 = insertelement <4 x float> %609, float %563, i32 2
+  %611 = insertelement <4 x float> %610, float %603, i32 3
+  %612 = call float @llvm.AMDGPU.dp4(<4 x float> %607, <4 x float> %611)
+  %613 = bitcast float %612 to i32
+  %614 = icmp ne i32 %613, 0
+  %615 = sext i1 %614 to i32
+  %616 = bitcast i32 %615 to float
+  %617 = bitcast float %616 to i32
+  %618 = xor i32 %617, -1
+  %619 = bitcast i32 %618 to float
+  %620 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 4)
+  %621 = extractelement <4 x float> %620, i32 0
+  %622 = fcmp une float 0x40210068E0000000, %621
+  %623 = select i1 %622, float 1.000000e+00, float 0.000000e+00
+  %624 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 4)
+  %625 = extractelement <4 x float> %624, i32 1
+  %626 = fcmp une float 0xBFC9A6B500000000, %625
+  %627 = select i1 %626, float 1.000000e+00, float 0.000000e+00
+  %628 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 4)
+  %629 = extractelement <4 x float> %628, i32 2
+  %630 = fcmp une float 0xC0119BDA60000000, %629
+  %631 = select i1 %630, float 1.000000e+00, float 0.000000e+00
+  %632 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 4)
+  %633 = extractelement <4 x float> %632, i32 3
+  %634 = fcmp une float 0xC02085D640000000, %633
+  %635 = select i1 %634, float 1.000000e+00, float 0.000000e+00
+  %636 = fsub float -0.000000e+00, %623
+  %637 = fptosi float %636 to i32
+  %638 = fsub float -0.000000e+00, %627
+  %639 = fptosi float %638 to i32
+  %640 = fsub float -0.000000e+00, %631
+  %641 = fptosi float %640 to i32
+  %642 = fsub float -0.000000e+00, %635
+  %643 = fptosi float %642 to i32
+  %644 = bitcast i32 %637 to float
+  %645 = bitcast i32 %639 to float
+  %646 = bitcast i32 %641 to float
+  %647 = bitcast i32 %643 to float
+  %648 = bitcast float %644 to i32
+  %649 = bitcast float %645 to i32
+  %650 = or i32 %648, %649
+  %651 = bitcast i32 %650 to float
+  %652 = bitcast float %646 to i32
+  %653 = bitcast float %647 to i32
+  %654 = or i32 %652, %653
+  %655 = bitcast i32 %654 to float
+  %656 = bitcast float %651 to i32
+  %657 = bitcast float %655 to i32
+  %658 = or i32 %656, %657
+  %659 = bitcast i32 %658 to float
+  %660 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 5)
+  %661 = extractelement <4 x float> %660, i32 0
+  %662 = fcmp une float 0xBFD7C1BDA0000000, %661
+  %663 = select i1 %662, float 1.000000e+00, float 0.000000e+00
+  %664 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 5)
+  %665 = extractelement <4 x float> %664, i32 1
+  %666 = fcmp une float 0x401E1D7DC0000000, %665
+  %667 = select i1 %666, float 1.000000e+00, float 0.000000e+00
+  %668 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 5)
+  %669 = extractelement <4 x float> %668, i32 2
+  %670 = fcmp une float 0xC019893740000000, %669
+  %671 = select i1 %670, float 1.000000e+00, float 0.000000e+00
+  %672 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 5)
+  %673 = extractelement <4 x float> %672, i32 3
+  %674 = fcmp une float 0x40220F0D80000000, %673
+  %675 = select i1 %674, float 1.000000e+00, float 0.000000e+00
+  %676 = fsub float -0.000000e+00, %663
+  %677 = fptosi float %676 to i32
+  %678 = fsub float -0.000000e+00, %667
+  %679 = fptosi float %678 to i32
+  %680 = fsub float -0.000000e+00, %671
+  %681 = fptosi float %680 to i32
+  %682 = fsub float -0.000000e+00, %675
+  %683 = fptosi float %682 to i32
+  %684 = bitcast i32 %677 to float
+  %685 = bitcast i32 %679 to float
+  %686 = bitcast i32 %681 to float
+  %687 = bitcast i32 %683 to float
+  %688 = bitcast float %684 to i32
+  %689 = bitcast float %685 to i32
+  %690 = or i32 %688, %689
+  %691 = bitcast i32 %690 to float
+  %692 = bitcast float %686 to i32
+  %693 = bitcast float %687 to i32
+  %694 = or i32 %692, %693
+  %695 = bitcast i32 %694 to float
+  %696 = bitcast float %691 to i32
+  %697 = bitcast float %695 to i32
+  %698 = or i32 %696, %697
+  %699 = bitcast i32 %698 to float
+  %700 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 6)
+  %701 = extractelement <4 x float> %700, i32 0
+  %702 = fcmp une float 0xC018E2EB20000000, %701
+  %703 = select i1 %702, float 1.000000e+00, float 0.000000e+00
+  %704 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 6)
+  %705 = extractelement <4 x float> %704, i32 1
+  %706 = fcmp une float 0xBFEA8DB8C0000000, %705
+  %707 = select i1 %706, float 1.000000e+00, float 0.000000e+00
+  %708 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 6)
+  %709 = extractelement <4 x float> %708, i32 2
+  %710 = fcmp une float 0x4015236E20000000, %709
+  %711 = select i1 %710, float 1.000000e+00, float 0.000000e+00
+  %712 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 6)
+  %713 = extractelement <4 x float> %712, i32 3
+  %714 = fcmp une float 0x4016ED5D00000000, %713
+  %715 = select i1 %714, float 1.000000e+00, float 0.000000e+00
+  %716 = fsub float -0.000000e+00, %703
+  %717 = fptosi float %716 to i32
+  %718 = fsub float -0.000000e+00, %707
+  %719 = fptosi float %718 to i32
+  %720 = fsub float -0.000000e+00, %711
+  %721 = fptosi float %720 to i32
+  %722 = fsub float -0.000000e+00, %715
+  %723 = fptosi float %722 to i32
+  %724 = bitcast i32 %717 to float
+  %725 = bitcast i32 %719 to float
+  %726 = bitcast i32 %721 to float
+  %727 = bitcast i32 %723 to float
+  %728 = bitcast float %724 to i32
+  %729 = bitcast float %725 to i32
+  %730 = or i32 %728, %729
+  %731 = bitcast i32 %730 to float
+  %732 = bitcast float %726 to i32
+  %733 = bitcast float %727 to i32
+  %734 = or i32 %732, %733
+  %735 = bitcast i32 %734 to float
+  %736 = bitcast float %731 to i32
+  %737 = bitcast float %735 to i32
+  %738 = or i32 %736, %737
+  %739 = bitcast i32 %738 to float
+  %740 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 7)
+  %741 = extractelement <4 x float> %740, i32 0
+  %742 = fcmp une float 0x402332FEC0000000, %741
+  %743 = select i1 %742, float 1.000000e+00, float 0.000000e+00
+  %744 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 7)
+  %745 = extractelement <4 x float> %744, i32 1
+  %746 = fcmp une float 0xC01484B5E0000000, %745
+  %747 = select i1 %746, float 1.000000e+00, float 0.000000e+00
+  %748 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 7)
+  %749 = extractelement <4 x float> %748, i32 2
+  %750 = fcmp une float 0x400179A6C0000000, %749
+  %751 = select i1 %750, float 1.000000e+00, float 0.000000e+00
+  %752 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 7)
+  %753 = extractelement <4 x float> %752, i32 3
+  %754 = fcmp une float 0xBFEE752540000000, %753
+  %755 = select i1 %754, float 1.000000e+00, float 0.000000e+00
+  %756 = fsub float -0.000000e+00, %743
+  %757 = fptosi float %756 to i32
+  %758 = fsub float -0.000000e+00, %747
+  %759 = fptosi float %758 to i32
+  %760 = fsub float -0.000000e+00, %751
+  %761 = fptosi float %760 to i32
+  %762 = fsub float -0.000000e+00, %755
+  %763 = fptosi float %762 to i32
+  %764 = bitcast i32 %757 to float
+  %765 = bitcast i32 %759 to float
+  %766 = bitcast i32 %761 to float
+  %767 = bitcast i32 %763 to float
+  %768 = bitcast float %764 to i32
+  %769 = bitcast float %765 to i32
+  %770 = or i32 %768, %769
+  %771 = bitcast i32 %770 to float
+  %772 = bitcast float %766 to i32
+  %773 = bitcast float %767 to i32
+  %774 = or i32 %772, %773
+  %775 = bitcast i32 %774 to float
+  %776 = bitcast float %771 to i32
+  %777 = bitcast float %775 to i32
+  %778 = or i32 %776, %777
+  %779 = bitcast i32 %778 to float
+  %780 = insertelement <4 x float> undef, float %659, i32 0
+  %781 = insertelement <4 x float> %780, float %699, i32 1
+  %782 = insertelement <4 x float> %781, float %739, i32 2
+  %783 = insertelement <4 x float> %782, float %779, i32 3
+  %784 = insertelement <4 x float> undef, float %659, i32 0
+  %785 = insertelement <4 x float> %784, float %699, i32 1
+  %786 = insertelement <4 x float> %785, float %739, i32 2
+  %787 = insertelement <4 x float> %786, float %779, i32 3
+  %788 = call float @llvm.AMDGPU.dp4(<4 x float> %783, <4 x float> %787)
+  %789 = bitcast float %788 to i32
+  %790 = icmp ne i32 %789, 0
+  %791 = sext i1 %790 to i32
+  %792 = bitcast i32 %791 to float
+  %793 = bitcast float %792 to i32
+  %794 = xor i32 %793, -1
+  %795 = bitcast i32 %794 to float
+  %796 = bitcast float %91 to i32
+  %797 = bitcast float %179 to i32
+  %798 = and i32 %796, %797
+  %799 = bitcast i32 %798 to float
+  %800 = bitcast float %311 to i32
+  %801 = bitcast float %443 to i32
+  %802 = and i32 %800, %801
+  %803 = bitcast i32 %802 to float
+  %804 = bitcast float %799 to i32
+  %805 = bitcast float %803 to i32
+  %806 = and i32 %804, %805
+  %807 = bitcast i32 %806 to float
+  %808 = bitcast float %619 to i32
+  %809 = bitcast float %795 to i32
+  %810 = and i32 %808, %809
+  %811 = bitcast i32 %810 to float
+  %812 = bitcast float %807 to i32
+  %813 = bitcast float %811 to i32
+  %814 = and i32 %812, %813
+  %815 = bitcast i32 %814 to float
+  %816 = bitcast float %815 to i32
+  %817 = icmp ne i32 %816, 0
+  %. = select i1 %817, float 1.000000e+00, float 0.000000e+00
+  %.32 = select i1 %817, float 0.000000e+00, float 1.000000e+00
+  %818 = insertelement <4 x float> undef, float %0, i32 0
+  %819 = insertelement <4 x float> %818, float %1, i32 1
+  %820 = insertelement <4 x float> %819, float %2, i32 2
+  %821 = insertelement <4 x float> %820, float %3, i32 3
+  call void @llvm.R600.store.swizzle(<4 x float> %821, i32 60, i32 1)
+  %822 = insertelement <4 x float> undef, float %.32, i32 0
+  %823 = insertelement <4 x float> %822, float %., i32 1
+  %824 = insertelement <4 x float> %823, float 0.000000e+00, i32 2
+  %825 = insertelement <4 x float> %824, float 1.000000e+00, i32 3
+  call void @llvm.R600.store.swizzle(<4 x float> %825, i32 0, i32 2)
+  ret void
+}
+
+declare float @llvm.R600.load.input(i32) #1
+
+declare float @llvm.AMDGPU.dp4(<4 x float>, <4 x float>) #1
+
+declare void @llvm.R600.store.swizzle(<4 x float>, i32, i32)
+
+attributes #0 = { "ShaderType"="1" }
+attributes #1 = { readnone }
diff --git a/test/CodeGen/R600/and.v4i32.ll b/test/CodeGen/R600/and.v4i32.ll
new file mode 100644
index 000000000000..662085e2d673
--- /dev/null
+++ b/test/CodeGen/R600/and.v4i32.ll
@@ -0,0 +1,15 @@
+;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s
+
+;CHECK: AND_INT T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
+;CHECK: AND_INT T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
+;CHECK: AND_INT T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
+;CHECK: AND_INT T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
+
+define void @test(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %in) {
+  %b_ptr = getelementptr <4 x i32> addrspace(1)* %in, i32 1
+  %a = load <4 x i32> addrspace(1) * %in
+  %b = load <4 x i32> addrspace(1) * %b_ptr
+  %result = and <4 x i32> %a, %b
+  store <4 x i32> %result, <4 x i32> addrspace(1)* %out
+  ret void
+}
diff --git a/test/CodeGen/R600/dagcombiner-bug-illegal-vec4-int-to-fp.ll b/test/CodeGen/R600/dagcombiner-bug-illegal-vec4-int-to-fp.ll
new file mode 100644
index 000000000000..fd958b365961
--- /dev/null
+++ b/test/CodeGen/R600/dagcombiner-bug-illegal-vec4-int-to-fp.ll
@@ -0,0 +1,36 @@
+;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s
+
+; This test is for a bug in
+; DAGCombiner::reduceBuildVecConvertToConvertBuildVec() where
+; the wrong type was being passed to
+; TargetLowering::getOperationAction() when checking the legality of
+; ISD::UINT_TO_FP and ISD::SINT_TO_FP opcodes.
+
+
+; CHECK: @sint
+; CHECK: INT_TO_FLT T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
+
+define void @sint(<4 x float> addrspace(1)* %out, i32 addrspace(1)* %in) {
+entry:
+  %ptr = getelementptr i32 addrspace(1)* %in, i32 1
+  %sint = load i32 addrspace(1) * %in
+  %conv = sitofp i32 %sint to float
+  %0 = insertelement <4 x float> undef, float %conv, i32 0
+  %splat = shufflevector <4 x float> %0, <4 x float> undef, <4 x i32> zeroinitializer
+  store <4 x float> %splat, <4 x float> addrspace(1)* %out
+  ret void
+}
+
+;CHECK: @uint
+;CHECK: UINT_TO_FLT T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
+
+define void @uint(<4 x float> addrspace(1)* %out, i32 addrspace(1)* %in) {
+entry:
+  %ptr = getelementptr i32 addrspace(1)* %in, i32 1
+  %uint = load i32 addrspace(1) * %in
+  %conv = uitofp i32 %uint to float
+  %0 = insertelement <4 x float> undef, float %conv, i32 0
+  %splat = shufflevector <4 x float> %0, <4 x float> undef, <4 x i32> zeroinitializer
+  store <4 x float> %splat, <4 x float> addrspace(1)* %out
+  ret void
+}
diff --git a/test/CodeGen/R600/disconnected-predset-break-bug.ll b/test/CodeGen/R600/disconnected-predset-break-bug.ll
new file mode 100644
index 000000000000..09baee7a1dcd
--- /dev/null
+++ b/test/CodeGen/R600/disconnected-predset-break-bug.ll
@@ -0,0 +1,29 @@
+; RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s
+
+; PRED_SET* instructions must be tied to any instruction that uses their
+; result.  This tests that there are no instructions between the PRED_SET*
+; and the PREDICATE_BREAK in this loop.
+
+; CHECK: @loop_ge
+; CHECK: LOOP_START_DX10
+; CHECK: PRED_SET
+; CHECK-NEXT: JUMP
+; CHECK-NEXT: LOOP_BREAK
+define void @loop_ge(i32 addrspace(1)* nocapture %out, i32 %iterations) nounwind {
+entry:
+  %cmp5 = icmp sgt i32 %iterations, 0
+  br i1 %cmp5, label %for.body, label %for.end
+
+for.body:                                         ; preds = %for.body, %entry
+  %i.07.in = phi i32 [ %i.07, %for.body ], [ %iterations, %entry ]
+  %ai.06 = phi i32 [ %add, %for.body ], [ 0, %entry ]
+  %i.07 = add nsw i32 %i.07.in, -1
+  %arrayidx = getelementptr inbounds i32 addrspace(1)* %out, i32 %ai.06
+  store i32 %i.07, i32 addrspace(1)* %arrayidx, align 4
+  %add = add nsw i32 %ai.06, 1
+  %exitcond = icmp eq i32 %add, %iterations
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:                                          ; preds = %for.body, %entry
+  ret void
+}
diff --git a/test/CodeGen/R600/fabs.ll b/test/CodeGen/R600/fabs.ll
new file mode 100644
index 000000000000..0407533eaa5f
--- /dev/null
+++ b/test/CodeGen/R600/fabs.ll
@@ -0,0 +1,16 @@
+;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s
+
+;CHECK: MOV T{{[0-9]+\.[XYZW], \|T[0-9]+\.[XYZW]\|}}
+
+define void @test() {
+   %r0 = call float @llvm.R600.load.input(i32 0)
+   %r1 = call float @fabs( float %r0)
+   call void @llvm.AMDGPU.store.output(float %r1, i32 0)
+   ret void
+}
+
+declare float @llvm.R600.load.input(i32) readnone
+
+declare void @llvm.AMDGPU.store.output(float, i32)
+
+declare float @fabs(float ) readnone
diff --git a/test/CodeGen/R600/fadd.ll b/test/CodeGen/R600/fadd.ll
new file mode 100644
index 000000000000..d7d1b6572c41
--- /dev/null
+++ b/test/CodeGen/R600/fadd.ll
@@ -0,0 +1,16 @@
+;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s
+
+; CHECK: ADD T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
+
+define void @test() {
+   %r0 = call float @llvm.R600.load.input(i32 0)
+   %r1 = call float @llvm.R600.load.input(i32 1)
+   %r2 = fadd float %r0, %r1
+   call void @llvm.AMDGPU.store.output(float %r2, i32 0)
+   ret void
+}
+
+declare float @llvm.R600.load.input(i32) readnone
+
+declare void @llvm.AMDGPU.store.output(float, i32)
+
diff --git a/test/CodeGen/R600/fadd.v4f32.ll b/test/CodeGen/R600/fadd.v4f32.ll
new file mode 100644
index 000000000000..85dbfd52cbb3
--- /dev/null
+++ b/test/CodeGen/R600/fadd.v4f32.ll
@@ -0,0 +1,15 @@
+;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s
+
+;CHECK: ADD T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
+;CHECK: ADD T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
+;CHECK: ADD T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
+;CHECK: ADD T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
+
+define void @test(<4 x float> addrspace(1)* %out, <4 x float> addrspace(1)* %in) {
+  %b_ptr = getelementptr <4 x float> addrspace(1)* %in, i32 1
+  %a = load <4 x float> addrspace(1) * %in
+  %b = load <4 x float> addrspace(1) * %b_ptr
+  %result = fadd <4 x float> %a, %b
+  store <4 x float> %result, <4 x float> addrspace(1)* %out
+  ret void
+}
diff --git a/test/CodeGen/R600/fcmp-cnd.ll b/test/CodeGen/R600/fcmp-cnd.ll
new file mode 100644
index 000000000000..a94cfb5cf2fe
--- /dev/null
+++ b/test/CodeGen/R600/fcmp-cnd.ll
@@ -0,0 +1,14 @@
+;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s
+
+;Not checking arguments 2 and 3 to CNDE, because they may change between
+;registers and literal.x depending on what the optimizer does.
+;CHECK: CNDE T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
+
+define void @test(i32 addrspace(1)* %out, float addrspace(1)* %in) {
+entry:
+  %0 = load float addrspace(1)* %in
+  %cmp = fcmp oeq float %0, 0.000000e+00
+  %value = select i1 %cmp, i32 2, i32 3 
+  store i32 %value, i32 addrspace(1)* %out
+  ret void
+}
diff --git a/test/CodeGen/R600/fcmp-cnde-int-args.ll b/test/CodeGen/R600/fcmp-cnde-int-args.ll
new file mode 100644
index 000000000000..55aba0d72d39
--- /dev/null
+++ b/test/CodeGen/R600/fcmp-cnde-int-args.ll
@@ -0,0 +1,16 @@
+; RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s
+
+; This test checks a bug in R600TargetLowering::LowerSELECT_CC where the
+; chance to optimize the fcmp + select instructions to SET* was missed
+; due to the fact that the operands to fcmp and select had different types
+
+; CHECK: SET{{[A-Z]+}}_DX10
+
+define void @test(i32 addrspace(1)* %out, float addrspace(1)* %in) {
+entry:
+  %0 = load float addrspace(1)* %in
+  %cmp = fcmp oeq float %0, 0.000000e+00
+  %value = select i1 %cmp, i32 -1, i32 0
+  store i32 %value, i32 addrspace(1)* %out
+  ret void
+}
diff --git a/test/CodeGen/R600/fcmp.ll b/test/CodeGen/R600/fcmp.ll
new file mode 100644
index 000000000000..37f621d23958
--- /dev/null
+++ b/test/CodeGen/R600/fcmp.ll
@@ -0,0 +1,37 @@
+; RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s
+
+; CHECK: @fcmp_sext
+; CHECK: SETE_DX10 T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
+
+define void @fcmp_sext(i32 addrspace(1)* %out, float addrspace(1)* %in) {
+entry:
+  %0 = load float addrspace(1)* %in
+  %arrayidx1 = getelementptr inbounds float addrspace(1)* %in, i32 1
+  %1 = load float addrspace(1)* %arrayidx1
+  %cmp = fcmp oeq float %0, %1
+  %sext = sext i1 %cmp to i32
+  store i32 %sext, i32 addrspace(1)* %out
+  ret void
+}
+
+; This test checks that a setcc node with f32 operands is lowered to a
+; SET*_DX10 instruction.  Previously we were lowering this to:
+; SET* + FP_TO_SINT
+
+; CHECK: @fcmp_br
+; CHECK: SET{{[N]*}}E_DX10 T{{[0-9]+\.[XYZW], [a-zA-Z0-9, .]+}}(5.0
+
+define void @fcmp_br(i32 addrspace(1)* %out, float %in) {
+entry:
+  %0 = fcmp oeq float %in, 5.0
+  br i1 %0, label %IF, label %ENDIF
+
+IF:
+  %1 = getelementptr i32 addrspace(1)* %out, i32 1
+  store i32 0, i32 addrspace(1)* %1
+  br label %ENDIF
+
+ENDIF:
+  store i32 0, i32 addrspace(1)* %out
+  ret void
+}
diff --git a/test/CodeGen/R600/fdiv.v4f32.ll b/test/CodeGen/R600/fdiv.v4f32.ll
new file mode 100644
index 000000000000..79e677f541f5
--- /dev/null
+++ b/test/CodeGen/R600/fdiv.v4f32.ll
@@ -0,0 +1,19 @@
+;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s
+
+;CHECK: RECIP_IEEE T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
+;CHECK: RECIP_IEEE T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
+;CHECK: RECIP_IEEE T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
+;CHECK: RECIP_IEEE T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
+;CHECK: MUL_IEEE T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
+;CHECK: MUL_IEEE T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
+;CHECK: MUL_IEEE T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
+;CHECK: MUL_IEEE T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
+
+define void @test(<4 x float> addrspace(1)* %out, <4 x float> addrspace(1)* %in) {
+  %b_ptr = getelementptr <4 x float> addrspace(1)* %in, i32 1
+  %a = load <4 x float> addrspace(1) * %in
+  %b = load <4 x float> addrspace(1) * %b_ptr
+  %result = fdiv <4 x float> %a, %b
+  store <4 x float> %result, <4 x float> addrspace(1)* %out
+  ret void
+}
diff --git a/test/CodeGen/R600/floor.ll b/test/CodeGen/R600/floor.ll
new file mode 100644
index 000000000000..845330f28419
--- /dev/null
+++ b/test/CodeGen/R600/floor.ll
@@ -0,0 +1,16 @@
+;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s
+
+;CHECK: FLOOR T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
+
+define void @test() {
+   %r0 = call float @llvm.R600.load.input(i32 0)
+   %r1 = call float @floor(float %r0)
+   call void @llvm.AMDGPU.store.output(float %r1, i32 0)
+   ret void
+}
+
+declare float @llvm.R600.load.input(i32) readnone
+
+declare void @llvm.AMDGPU.store.output(float, i32)
+
+declare float @floor(float) readonly
diff --git a/test/CodeGen/R600/fmad.ll b/test/CodeGen/R600/fmad.ll
new file mode 100644
index 000000000000..a3d4d0ff0db7
--- /dev/null
+++ b/test/CodeGen/R600/fmad.ll
@@ -0,0 +1,19 @@
+;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s
+
+;CHECK: MULADD_IEEE T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
+
+define void @test() {
+   %r0 = call float @llvm.R600.load.input(i32 0)
+   %r1 = call float @llvm.R600.load.input(i32 1)
+   %r2 = call float @llvm.R600.load.input(i32 2)
+   %r3 = fmul float %r0, %r1
+	%r4 = fadd float %r3, %r2
+   call void @llvm.AMDGPU.store.output(float %r4, i32 0)
+   ret void
+}
+
+declare float @llvm.R600.load.input(i32) readnone
+
+declare void @llvm.AMDGPU.store.output(float, i32)
+
+declare float @fabs(float ) readnone
diff --git a/test/CodeGen/R600/fmax.ll b/test/CodeGen/R600/fmax.ll
new file mode 100644
index 000000000000..3708f0b9eed2
--- /dev/null
+++ b/test/CodeGen/R600/fmax.ll
@@ -0,0 +1,16 @@
+;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s
+
+;CHECK: MAX T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
+
+define void @test() {
+   %r0 = call float @llvm.R600.load.input(i32 0)
+   %r1 = call float @llvm.R600.load.input(i32 1)
+   %r2 = fcmp uge float %r0, %r1
+   %r3 = select i1 %r2, float %r0, float %r1
+   call void @llvm.AMDGPU.store.output(float %r3, i32 0)
+   ret void
+}
+
+declare float @llvm.R600.load.input(i32) readnone
+
+declare void @llvm.AMDGPU.store.output(float, i32)
diff --git a/test/CodeGen/R600/fmin.ll b/test/CodeGen/R600/fmin.ll
new file mode 100644
index 000000000000..19d59ab3061e
--- /dev/null
+++ b/test/CodeGen/R600/fmin.ll
@@ -0,0 +1,16 @@
+;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s
+
+;CHECK: MIN T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
+
+define void @test() {
+   %r0 = call float @llvm.R600.load.input(i32 0)
+   %r1 = call float @llvm.R600.load.input(i32 1)
+   %r2 = fcmp uge float %r0, %r1
+   %r3 = select i1 %r2, float %r1, float %r0
+   call void @llvm.AMDGPU.store.output(float %r3, i32 0)
+   ret void
+}
+
+declare float @llvm.R600.load.input(i32) readnone
+
+declare void @llvm.AMDGPU.store.output(float, i32)
diff --git a/test/CodeGen/R600/fmul.ll b/test/CodeGen/R600/fmul.ll
new file mode 100644
index 000000000000..eb1d523c0bb4
--- /dev/null
+++ b/test/CodeGen/R600/fmul.ll
@@ -0,0 +1,16 @@
+;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s
+
+; CHECK: MUL_IEEE T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
+
+define void @test() {
+   %r0 = call float @llvm.R600.load.input(i32 0)
+   %r1 = call float @llvm.R600.load.input(i32 1)
+   %r2 = fmul float %r0, %r1
+   call void @llvm.AMDGPU.store.output(float %r2, i32 0)
+   ret void
+}
+
+declare float @llvm.R600.load.input(i32) readnone
+
+declare void @llvm.AMDGPU.store.output(float, i32)
+
diff --git a/test/CodeGen/R600/fmul.v4f32.ll b/test/CodeGen/R600/fmul.v4f32.ll
new file mode 100644
index 000000000000..6d44a0c5c782
--- /dev/null
+++ b/test/CodeGen/R600/fmul.v4f32.ll
@@ -0,0 +1,15 @@
+;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s
+
+;CHECK: MUL_IEEE T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
+;CHECK: MUL_IEEE T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
+;CHECK: MUL_IEEE T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
+;CHECK: MUL_IEEE T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
+
+define void @test(<4 x float> addrspace(1)* %out, <4 x float> addrspace(1)* %in) {
+  %b_ptr = getelementptr <4 x float> addrspace(1)* %in, i32 1
+  %a = load <4 x float> addrspace(1) * %in
+  %b = load <4 x float> addrspace(1) * %b_ptr
+  %result = fmul <4 x float> %a, %b
+  store <4 x float> %result, <4 x float> addrspace(1)* %out
+  ret void
+}
diff --git a/test/CodeGen/R600/fsub.ll b/test/CodeGen/R600/fsub.ll
new file mode 100644
index 000000000000..591aa52676a4
--- /dev/null
+++ b/test/CodeGen/R600/fsub.ll
@@ -0,0 +1,16 @@
+;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s
+
+; CHECK: ADD T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], -T[0-9]+\.[XYZW]}}
+
+define void @test() {
+   %r0 = call float @llvm.R600.load.input(i32 0)
+   %r1 = call float @llvm.R600.load.input(i32 1)
+   %r2 = fsub float %r0, %r1
+   call void @llvm.AMDGPU.store.output(float %r2, i32 0)
+   ret void
+}
+
+declare float @llvm.R600.load.input(i32) readnone
+
+declare void @llvm.AMDGPU.store.output(float, i32)
+
diff --git a/test/CodeGen/R600/fsub.v4f32.ll b/test/CodeGen/R600/fsub.v4f32.ll
new file mode 100644
index 000000000000..612a57e4b609
--- /dev/null
+++ b/test/CodeGen/R600/fsub.v4f32.ll
@@ -0,0 +1,15 @@
+;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s
+
+;CHECK: ADD T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
+;CHECK: ADD T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
+;CHECK: ADD T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
+;CHECK: ADD T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
+
+define void @test(<4 x float> addrspace(1)* %out, <4 x float> addrspace(1)* %in) {
+  %b_ptr = getelementptr <4 x float> addrspace(1)* %in, i32 1
+  %a = load <4 x float> addrspace(1) * %in
+  %b = load <4 x float> addrspace(1) * %b_ptr
+  %result = fsub <4 x float> %a, %b
+  store <4 x float> %result, <4 x float> addrspace(1)* %out
+  ret void
+}
diff --git a/test/CodeGen/R600/i8_to_double_to_float.ll b/test/CodeGen/R600/i8_to_double_to_float.ll
new file mode 100644
index 000000000000..39f33227fa4b
--- /dev/null
+++ b/test/CodeGen/R600/i8_to_double_to_float.ll
@@ -0,0 +1,11 @@
+;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s
+
+;CHECK: UINT_TO_FLT T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
+
+define void @test(float addrspace(1)* %out, i8 addrspace(1)* %in) {
+  %1 = load i8 addrspace(1)* %in
+  %2 = uitofp i8 %1 to double
+  %3 = fptrunc double %2 to float
+  store float %3, float addrspace(1)* %out
+  ret void
+}
diff --git a/test/CodeGen/R600/icmp-select-sete-reverse-args.ll b/test/CodeGen/R600/icmp-select-sete-reverse-args.ll
new file mode 100644
index 000000000000..71705a64f50e
--- /dev/null
+++ b/test/CodeGen/R600/icmp-select-sete-reverse-args.ll
@@ -0,0 +1,18 @@
+;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s
+
+;Test that a select with reversed True/False values is correctly lowered
+;to a SETNE_INT.  There should only be one SETNE_INT instruction.
+
+;CHECK: SETNE_INT T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
+;CHECK-NOT: SETNE_INT
+
+define void @test(i32 addrspace(1)* %out, i32 addrspace(1)* %in) {
+entry:
+  %0 = load i32 addrspace(1)* %in
+  %arrayidx1 = getelementptr inbounds i32 addrspace(1)* %in, i32 1
+  %1 = load i32 addrspace(1)* %arrayidx1
+  %cmp = icmp eq i32 %0, %1
+  %value = select i1 %cmp, i32 0, i32 -1
+  store i32 %value, i32 addrspace(1)* %out
+  ret void
+}
diff --git a/test/CodeGen/R600/jump_address.ll b/test/CodeGen/R600/jump_address.ll
new file mode 100644
index 000000000000..cd35bffb1304
--- /dev/null
+++ b/test/CodeGen/R600/jump_address.ll
@@ -0,0 +1,50 @@
+;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s
+
+; CHECK: JUMP @4
+
+define void @main() #0 {
+main_body:
+  %0 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 1)
+  %1 = extractelement <4 x float> %0, i32 0
+  %2 = bitcast float %1 to i32
+  %3 = icmp eq i32 %2, 0
+  %4 = sext i1 %3 to i32
+  %5 = bitcast i32 %4 to float
+  %6 = bitcast float %5 to i32
+  %7 = icmp ne i32 %6, 0
+  br i1 %7, label %ENDIF, label %ELSE
+
+ELSE:                                             ; preds = %main_body
+  %8 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 1)
+  %9 = extractelement <4 x float> %8, i32 0
+  %10 = bitcast float %9 to i32
+  %11 = icmp eq i32 %10, 1
+  %12 = sext i1 %11 to i32
+  %13 = bitcast i32 %12 to float
+  %14 = bitcast float %13 to i32
+  %15 = icmp ne i32 %14, 0
+  br i1 %15, label %IF13, label %ENDIF
+
+ENDIF:                                            ; preds = %IF13, %ELSE, %main_body
+  %temp.0 = phi float [ 0xFFF8000000000000, %main_body ], [ 0.000000e+00, %ELSE ], [ 0.000000e+00, %IF13 ]
+  %temp1.0 = phi float [ 0.000000e+00, %main_body ], [ %23, %IF13 ], [ 0.000000e+00, %ELSE ]
+  %temp2.0 = phi float [ 1.000000e+00, %main_body ], [ 0.000000e+00, %ELSE ], [ 0.000000e+00, %IF13 ]
+  %temp3.0 = phi float [ 5.000000e-01, %main_body ], [ 0.000000e+00, %ELSE ], [ 0.000000e+00, %IF13 ]
+  %16 = insertelement <4 x float> undef, float %temp.0, i32 0
+  %17 = insertelement <4 x float> %16, float %temp1.0, i32 1
+  %18 = insertelement <4 x float> %17, float %temp2.0, i32 2
+  %19 = insertelement <4 x float> %18, float %temp3.0, i32 3
+  call void @llvm.R600.store.swizzle(<4 x float> %19, i32 0, i32 0)
+  ret void
+
+IF13:                                             ; preds = %ELSE
+  %20 = load <4 x float> addrspace(8)* null
+  %21 = extractelement <4 x float> %20, i32 0
+  %22 = fsub float -0.000000e+00, %21
+  %23 = fadd float 0xFFF8000000000000, %22
+  br label %ENDIF
+}
+
+declare void @llvm.R600.store.swizzle(<4 x float>, i32, i32)
+
+attributes #0 = { "ShaderType"="0" }
diff --git a/test/CodeGen/R600/kcache-fold.ll b/test/CodeGen/R600/kcache-fold.ll
new file mode 100644
index 000000000000..3d70e4bd54aa
--- /dev/null
+++ b/test/CodeGen/R600/kcache-fold.ll
@@ -0,0 +1,100 @@
+;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s
+
+; CHECK: @main1
+; CHECK: MOV T{{[0-9]+\.[XYZW], KC0}}
+define void @main1() {
+main_body:
+  %0 = load <4 x float> addrspace(8)* null
+  %1 = extractelement <4 x float> %0, i32 0
+  %2 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 1)
+  %3 = extractelement <4 x float> %2, i32 0
+  %4 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 2)
+  %5 = extractelement <4 x float> %4, i32 0
+  %6 = fcmp ult float %1, 0.000000e+00
+  %7 = select i1 %6, float %3, float %5
+  %8 = load <4 x float> addrspace(8)* null
+  %9 = extractelement <4 x float> %8, i32 1
+  %10 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 1)
+  %11 = extractelement <4 x float> %10, i32 1
+  %12 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 2)
+  %13 = extractelement <4 x float> %12, i32 1
+  %14 = fcmp ult float %9, 0.000000e+00
+  %15 = select i1 %14, float %11, float %13
+  %16 = load <4 x float> addrspace(8)* null
+  %17 = extractelement <4 x float> %16, i32 2
+  %18 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 1)
+  %19 = extractelement <4 x float> %18, i32 2
+  %20 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 2)
+  %21 = extractelement <4 x float> %20, i32 2
+  %22 = fcmp ult float %17, 0.000000e+00
+  %23 = select i1 %22, float %19, float %21
+  %24 = load <4 x float> addrspace(8)* null
+  %25 = extractelement <4 x float> %24, i32 3
+  %26 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 1)
+  %27 = extractelement <4 x float> %26, i32 3
+  %28 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 2)
+  %29 = extractelement <4 x float> %28, i32 3
+  %30 = fcmp ult float %25, 0.000000e+00
+  %31 = select i1 %30, float %27, float %29
+  %32 = call float @llvm.AMDIL.clamp.(float %7, float 0.000000e+00, float 1.000000e+00)
+  %33 = call float @llvm.AMDIL.clamp.(float %15, float 0.000000e+00, float 1.000000e+00)
+  %34 = call float @llvm.AMDIL.clamp.(float %23, float 0.000000e+00, float 1.000000e+00)
+  %35 = call float @llvm.AMDIL.clamp.(float %31, float 0.000000e+00, float 1.000000e+00)
+  %36 = insertelement <4 x float> undef, float %32, i32 0
+  %37 = insertelement <4 x float> %36, float %33, i32 1
+  %38 = insertelement <4 x float> %37, float %34, i32 2
+  %39 = insertelement <4 x float> %38, float %35, i32 3
+  call void @llvm.R600.store.swizzle(<4 x float> %39, i32 0, i32 0)
+  ret void
+}
+
+; CHECK: @main2
+; CHECK-NOT: MOV
+define void @main2() {
+main_body:
+  %0 = load <4 x float> addrspace(8)* null
+  %1 = extractelement <4 x float> %0, i32 0
+  %2 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 1)
+  %3 = extractelement <4 x float> %2, i32 0
+  %4 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 1)
+  %5 = extractelement <4 x float> %4, i32 1
+  %6 = fcmp ult float %1, 0.000000e+00
+  %7 = select i1 %6, float %3, float %5
+  %8 = load <4 x float> addrspace(8)* null
+  %9 = extractelement <4 x float> %8, i32 1
+  %10 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 2)
+  %11 = extractelement <4 x float> %10, i32 0
+  %12 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 2)
+  %13 = extractelement <4 x float> %12, i32 1
+  %14 = fcmp ult float %9, 0.000000e+00
+  %15 = select i1 %14, float %11, float %13
+  %16 = load <4 x float> addrspace(8)* null
+  %17 = extractelement <4 x float> %16, i32 2
+  %18 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 1)
+  %19 = extractelement <4 x float> %18, i32 3
+  %20 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 1)
+  %21 = extractelement <4 x float> %20, i32 2
+  %22 = fcmp ult float %17, 0.000000e+00
+  %23 = select i1 %22, float %19, float %21
+  %24 = load <4 x float> addrspace(8)* null
+  %25 = extractelement <4 x float> %24, i32 3
+  %26 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 2)
+  %27 = extractelement <4 x float> %26, i32 3
+  %28 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 2)
+  %29 = extractelement <4 x float> %28, i32 2
+  %30 = fcmp ult float %25, 0.000000e+00
+  %31 = select i1 %30, float %27, float %29
+  %32 = call float @llvm.AMDIL.clamp.(float %7, float 0.000000e+00, float 1.000000e+00)
+  %33 = call float @llvm.AMDIL.clamp.(float %15, float 0.000000e+00, float 1.000000e+00)
+  %34 = call float @llvm.AMDIL.clamp.(float %23, float 0.000000e+00, float 1.000000e+00)
+  %35 = call float @llvm.AMDIL.clamp.(float %31, float 0.000000e+00, float 1.000000e+00)
+  %36 = insertelement <4 x float> undef, float %32, i32 0
+  %37 = insertelement <4 x float> %36, float %33, i32 1
+  %38 = insertelement <4 x float> %37, float %34, i32 2
+  %39 = insertelement <4 x float> %38, float %35, i32 3
+  call void @llvm.R600.store.swizzle(<4 x float> %39, i32 0, i32 0)
+  ret void
+}
+
+declare float @llvm.AMDIL.clamp.(float, float, float) readnone
+declare void @llvm.R600.store.swizzle(<4 x float>, i32, i32)
diff --git a/test/CodeGen/R600/legalizedag-bug-expand-setcc.ll b/test/CodeGen/R600/legalizedag-bug-expand-setcc.ll
new file mode 100644
index 000000000000..1aae7f9f91f4
--- /dev/null
+++ b/test/CodeGen/R600/legalizedag-bug-expand-setcc.ll
@@ -0,0 +1,26 @@
+; RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s
+
+; This tests a bug where LegalizeDAG was not checking the target's
+; BooleanContents value and always using one for true, when expanding
+; setcc to select_cc.
+;
+; This bug caused the icmp IR instruction to be expanded to two machine
+; instructions, when only one is needed.
+;
+
+; CHECK: @setcc_expand
+; CHECK: SET
+; CHECK-NOT: CND
+define void @setcc_expand(i32 addrspace(1)* %out, i32 %in) {
+entry:
+  %0 = icmp eq i32 %in, 5
+  br i1 %0, label %IF, label %ENDIF
+IF:
+  %1 = getelementptr i32 addrspace(1)* %out, i32 1
+  store i32 0, i32 addrspace(1)* %1
+  br label %ENDIF
+
+ENDIF:
+  store i32 0, i32 addrspace(1)* %out
+  ret void
+}
diff --git a/test/CodeGen/R600/lit.local.cfg b/test/CodeGen/R600/lit.local.cfg
new file mode 100644
index 000000000000..36ee493e5945
--- /dev/null
+++ b/test/CodeGen/R600/lit.local.cfg
@@ -0,0 +1,13 @@
+config.suffixes = ['.ll', '.c', '.cpp']
+
+def getRoot(config):
+    if not config.parent:
+        return config
+    return getRoot(config.parent)
+
+root = getRoot(config)
+
+targets = set(root.targets_to_build.split())
+if not 'R600' in targets:
+    config.unsupported = True
+
diff --git a/test/CodeGen/R600/literals.ll b/test/CodeGen/R600/literals.ll
new file mode 100644
index 000000000000..e69f64e0e142
--- /dev/null
+++ b/test/CodeGen/R600/literals.ll
@@ -0,0 +1,32 @@
+; RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s
+
+; Test using an integer literal constant.
+; Generated ASM should be:
+; ADD_INT REG literal.x, 5
+; or
+; ADD_INT literal.x REG, 5
+
+; CHECK: @i32_literal
+; CHECK: ADD_INT {{[A-Z0-9,. ]*}}literal.x,{{[A-Z0-9,. ]*}} 5
+define void @i32_literal(i32 addrspace(1)* %out, i32 %in) {
+entry:
+  %0 = add i32 5, %in
+  store i32 %0, i32 addrspace(1)* %out
+  ret void
+}
+
+; Test using a float literal constant.
+; Generated ASM should be:
+; ADD REG literal.x, 5.0
+; or
+; ADD literal.x REG, 5.0
+
+; CHECK: @float_literal
+; CHECK: ADD {{[A-Z0-9,. ]*}}literal.x,{{[A-Z0-9,. ]*}} {{[0-9]+}}(5.0
+define void @float_literal(float addrspace(1)* %out, float %in) {
+entry:
+  %0 = fadd float 5.0, %in
+  store float %0, float addrspace(1)* %out
+  ret void
+}
+
diff --git a/test/CodeGen/R600/llvm.AMDGPU.mul.ll b/test/CodeGen/R600/llvm.AMDGPU.mul.ll
new file mode 100644
index 000000000000..693eb27457c2
--- /dev/null
+++ b/test/CodeGen/R600/llvm.AMDGPU.mul.ll
@@ -0,0 +1,17 @@
+;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s
+
+;CHECK: MUL NON-IEEE T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
+
+define void @test() {
+   %r0 = call float @llvm.R600.load.input(i32 0)
+   %r1 = call float @llvm.R600.load.input(i32 1)
+   %r2 = call float @llvm.AMDGPU.mul( float %r0, float %r1)
+   call void @llvm.AMDGPU.store.output(float %r2, i32 0)
+   ret void
+}
+
+declare float @llvm.R600.load.input(i32) readnone
+
+declare void @llvm.AMDGPU.store.output(float, i32)
+
+declare float @llvm.AMDGPU.mul(float ,float ) readnone
diff --git a/test/CodeGen/R600/llvm.AMDGPU.tex.ll b/test/CodeGen/R600/llvm.AMDGPU.tex.ll
new file mode 100644
index 000000000000..74331fa26934
--- /dev/null
+++ b/test/CodeGen/R600/llvm.AMDGPU.tex.ll
@@ -0,0 +1,42 @@
+;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s
+
+;CHECK: TEX_SAMPLET{{[0-9]+\.XYZW, T[0-9]+\.XYZW}}, 0, 0, 1
+;CHECK: TEX_SAMPLET{{[0-9]+\.XYZW, T[0-9]+\.XYZW}}, 0, 0, 2
+;CHECK: TEX_SAMPLET{{[0-9]+\.XYZW, T[0-9]+\.XYZW}}, 0, 0, 3
+;CHECK: TEX_SAMPLET{{[0-9]+\.XYZW, T[0-9]+\.XYZW}}, 0, 0, 4
+;CHECK: TEX_SAMPLET{{[0-9]+\.XYZW, T[0-9]+\.XYZW}}, 0, 0, 5
+;CHECK: TEX_SAMPLE_CT{{[0-9]+\.XYZW, T[0-9]+\.XYZW}}, 0, 0, 6
+;CHECK: TEX_SAMPLE_CT{{[0-9]+\.XYZW, T[0-9]+\.XYZW}}, 0, 0, 7
+;CHECK: TEX_SAMPLE_CT{{[0-9]+\.XYZW, T[0-9]+\.XYZW}}, 0, 0, 8
+;CHECK: TEX_SAMPLET{{[0-9]+\.XYZW, T[0-9]+\.XYZW}}, 0, 0, 9
+;CHECK: TEX_SAMPLET{{[0-9]+\.XYZW, T[0-9]+\.XYZW}}, 0, 0, 10
+;CHECK: TEX_SAMPLE_CT{{[0-9]+\.XYZW, T[0-9]+\.XYZW}}, 0, 0, 11
+;CHECK: TEX_SAMPLE_CT{{[0-9]+\.XYZW, T[0-9]+\.XYZW}}, 0, 0, 12
+;CHECK: TEX_SAMPLE_CT{{[0-9]+\.XYZW, T[0-9]+\.XYZW}}, 0, 0, 13
+;CHECK: TEX_SAMPLET{{[0-9]+\.XYZW, T[0-9]+\.XYZW}}, 0, 0, 14
+;CHECK: TEX_SAMPLET{{[0-9]+\.XYZW, T[0-9]+\.XYZW}}, 0, 0, 15
+;CHECK: TEX_SAMPLET{{[0-9]+\.XYZW, T[0-9]+\.XYZW}}, 0, 0, 16
+
+define void @test(<4 x float> addrspace(1)* %out, <4 x float> addrspace(1)* %in) {
+   %addr = load <4 x float> addrspace(1)* %in
+   %res1 = call <4 x float> @llvm.AMDGPU.tex(<4 x float> %addr, i32 0, i32 0, i32 1)
+   %res2 = call <4 x float> @llvm.AMDGPU.tex(<4 x float> %res1, i32 0, i32 0, i32 2)
+   %res3 = call <4 x float> @llvm.AMDGPU.tex(<4 x float> %res2, i32 0, i32 0, i32 3)
+   %res4 = call <4 x float> @llvm.AMDGPU.tex(<4 x float> %res3, i32 0, i32 0, i32 4)
+   %res5 = call <4 x float> @llvm.AMDGPU.tex(<4 x float> %res4, i32 0, i32 0, i32 5)
+   %res6 = call <4 x float> @llvm.AMDGPU.tex(<4 x float> %res5, i32 0, i32 0, i32 6)
+   %res7 = call <4 x float> @llvm.AMDGPU.tex(<4 x float> %res6, i32 0, i32 0, i32 7)
+   %res8 = call <4 x float> @llvm.AMDGPU.tex(<4 x float> %res7, i32 0, i32 0, i32 8)
+   %res9 = call <4 x float> @llvm.AMDGPU.tex(<4 x float> %res8, i32 0, i32 0, i32 9)
+   %res10 = call <4 x float> @llvm.AMDGPU.tex(<4 x float> %res9, i32 0, i32 0, i32 10)
+   %res11 = call <4 x float> @llvm.AMDGPU.tex(<4 x float> %res10, i32 0, i32 0, i32 11)
+   %res12 = call <4 x float> @llvm.AMDGPU.tex(<4 x float> %res11, i32 0, i32 0, i32 12)
+   %res13 = call <4 x float> @llvm.AMDGPU.tex(<4 x float> %res12, i32 0, i32 0, i32 13)
+   %res14 = call <4 x float> @llvm.AMDGPU.tex(<4 x float> %res13, i32 0, i32 0, i32 14)
+   %res15 = call <4 x float> @llvm.AMDGPU.tex(<4 x float> %res14, i32 0, i32 0, i32 15)
+   %res16 = call <4 x float> @llvm.AMDGPU.tex(<4 x float> %res15, i32 0, i32 0, i32 16)
+   store <4 x float> %res16, <4 x float> addrspace(1)* %out
+   ret void
+}
+
+declare <4 x float> @llvm.AMDGPU.tex(<4 x float>, i32, i32, i32) readnone
diff --git a/test/CodeGen/R600/llvm.AMDGPU.trunc.ll b/test/CodeGen/R600/llvm.AMDGPU.trunc.ll
new file mode 100644
index 000000000000..fac957f7eeec
--- /dev/null
+++ b/test/CodeGen/R600/llvm.AMDGPU.trunc.ll
@@ -0,0 +1,16 @@
+;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s
+
+;CHECK: TRUNC T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
+
+define void @test() {
+   %r0 = call float @llvm.R600.load.input(i32 0)
+   %r1 = call float @llvm.AMDGPU.trunc( float %r0)
+   call void @llvm.AMDGPU.store.output(float %r1, i32 0)
+   ret void
+}
+
+declare float @llvm.R600.load.input(i32) readnone
+
+declare void @llvm.AMDGPU.store.output(float, i32)
+
+declare float @llvm.AMDGPU.trunc(float ) readnone
diff --git a/test/CodeGen/R600/llvm.SI.fs.interp.constant.ll b/test/CodeGen/R600/llvm.SI.fs.interp.constant.ll
new file mode 100644
index 000000000000..bf0cdaa2fa3a
--- /dev/null
+++ b/test/CodeGen/R600/llvm.SI.fs.interp.constant.ll
@@ -0,0 +1,21 @@
+;RUN: llc < %s -march=r600 -mcpu=SI | FileCheck %s
+
+;CHECK: S_MOV_B32
+;CHECK-NEXT: V_INTERP_MOV_F32
+
+define void @main(<16 x i8> addrspace(2)* inreg, <16 x i8> addrspace(2)* inreg, <32 x i8> addrspace(2)* inreg, i32 inreg) "ShaderType"="0" {
+main_body:
+  %4 = call float @llvm.SI.fs.constant(i32 0, i32 0, i32 %3)
+  %5 = call i32 @llvm.SI.packf16(float %4, float %4)
+  %6 = bitcast i32 %5 to float
+  call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %6, float %6, float %6, float %6)
+  ret void
+}
+
+declare void @llvm.AMDGPU.shader.type(i32)
+
+declare float @llvm.SI.fs.constant(i32, i32, i32) readnone
+
+declare i32 @llvm.SI.packf16(float, float) readnone
+
+declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)
diff --git a/test/CodeGen/R600/llvm.SI.sample.ll b/test/CodeGen/R600/llvm.SI.sample.ll
new file mode 100644
index 000000000000..c724395b98c2
--- /dev/null
+++ b/test/CodeGen/R600/llvm.SI.sample.ll
@@ -0,0 +1,106 @@
+;RUN: llc < %s -march=r600 -mcpu=SI | FileCheck %s
+
+;CHECK: IMAGE_SAMPLE
+;CHECK: IMAGE_SAMPLE
+;CHECK: IMAGE_SAMPLE
+;CHECK: IMAGE_SAMPLE
+;CHECK: IMAGE_SAMPLE
+;CHECK: IMAGE_SAMPLE_C
+;CHECK: IMAGE_SAMPLE_C
+;CHECK: IMAGE_SAMPLE_C
+;CHECK: IMAGE_SAMPLE
+;CHECK: IMAGE_SAMPLE
+;CHECK: IMAGE_SAMPLE_C
+;CHECK: IMAGE_SAMPLE_C
+;CHECK: IMAGE_SAMPLE_C
+;CHECK: IMAGE_SAMPLE
+;CHECK: IMAGE_SAMPLE
+;CHECK: IMAGE_SAMPLE
+
+define void @test(i32 %a1, i32 %a2, i32 %a3, i32 %a4) {
+   %v1 = insertelement <4 x i32> undef, i32 %a1, i32 0
+   %v2 = insertelement <4 x i32> undef, i32 %a1, i32 1
+   %v3 = insertelement <4 x i32> undef, i32 %a1, i32 2
+   %v4 = insertelement <4 x i32> undef, i32 %a1, i32 3
+   %v5 = insertelement <4 x i32> undef, i32 %a2, i32 0
+   %v6 = insertelement <4 x i32> undef, i32 %a2, i32 1
+   %v7 = insertelement <4 x i32> undef, i32 %a2, i32 2
+   %v8 = insertelement <4 x i32> undef, i32 %a2, i32 3
+   %v9 = insertelement <4 x i32> undef, i32 %a3, i32 0
+   %v10 = insertelement <4 x i32> undef, i32 %a3, i32 1
+   %v11 = insertelement <4 x i32> undef, i32 %a3, i32 2
+   %v12 = insertelement <4 x i32> undef, i32 %a3, i32 3
+   %v13 = insertelement <4 x i32> undef, i32 %a4, i32 0
+   %v14 = insertelement <4 x i32> undef, i32 %a4, i32 1
+   %v15 = insertelement <4 x i32> undef, i32 %a4, i32 2
+   %v16 = insertelement <4 x i32> undef, i32 %a4, i32 3
+   %res1 = call <4 x float> @llvm.SI.sample.(i32 15, <4 x i32> %v1,
+      <8 x i32> undef, <4 x i32> undef, i32 1)
+   %res2 = call <4 x float> @llvm.SI.sample.(i32 15, <4 x i32> %v2,
+      <8 x i32> undef, <4 x i32> undef, i32 2)
+   %res3 = call <4 x float> @llvm.SI.sample.(i32 15, <4 x i32> %v3,
+      <8 x i32> undef, <4 x i32> undef, i32 3)
+   %res4 = call <4 x float> @llvm.SI.sample.(i32 15, <4 x i32> %v4,
+      <8 x i32> undef, <4 x i32> undef, i32 4)
+   %res5 = call <4 x float> @llvm.SI.sample.(i32 15, <4 x i32> %v5,
+      <8 x i32> undef, <4 x i32> undef, i32 5)
+   %res6 = call <4 x float> @llvm.SI.sample.(i32 15, <4 x i32> %v6,
+      <8 x i32> undef, <4 x i32> undef, i32 6)
+   %res7 = call <4 x float> @llvm.SI.sample.(i32 15, <4 x i32> %v7,
+      <8 x i32> undef, <4 x i32> undef, i32 7)
+   %res8 = call <4 x float> @llvm.SI.sample.(i32 15, <4 x i32> %v8,
+      <8 x i32> undef, <4 x i32> undef, i32 8)
+   %res9 = call <4 x float> @llvm.SI.sample.(i32 15, <4 x i32> %v9,
+      <8 x i32> undef, <4 x i32> undef, i32 9)
+   %res10 = call <4 x float> @llvm.SI.sample.(i32 15, <4 x i32> %v10,
+      <8 x i32> undef, <4 x i32> undef, i32 10)
+   %res11 = call <4 x float> @llvm.SI.sample.(i32 15, <4 x i32> %v11,
+      <8 x i32> undef, <4 x i32> undef, i32 11)
+   %res12 = call <4 x float> @llvm.SI.sample.(i32 15, <4 x i32> %v12,
+      <8 x i32> undef, <4 x i32> undef, i32 12)
+   %res13 = call <4 x float> @llvm.SI.sample.(i32 15, <4 x i32> %v13,
+      <8 x i32> undef, <4 x i32> undef, i32 13)
+   %res14 = call <4 x float> @llvm.SI.sample.(i32 15, <4 x i32> %v14,
+      <8 x i32> undef, <4 x i32> undef, i32 14)
+   %res15 = call <4 x float> @llvm.SI.sample.(i32 15, <4 x i32> %v15,
+      <8 x i32> undef, <4 x i32> undef, i32 15)
+   %res16 = call <4 x float> @llvm.SI.sample.(i32 15, <4 x i32> %v16,
+      <8 x i32> undef, <4 x i32> undef, i32 16)
+   %e1 = extractelement <4 x float> %res1, i32 0
+   %e2 = extractelement <4 x float> %res2, i32 0
+   %e3 = extractelement <4 x float> %res3, i32 0
+   %e4 = extractelement <4 x float> %res4, i32 0
+   %e5 = extractelement <4 x float> %res5, i32 0
+   %e6 = extractelement <4 x float> %res6, i32 0
+   %e7 = extractelement <4 x float> %res7, i32 0
+   %e8 = extractelement <4 x float> %res8, i32 0
+   %e9 = extractelement <4 x float> %res9, i32 0
+   %e10 = extractelement <4 x float> %res10, i32 0
+   %e11 = extractelement <4 x float> %res11, i32 0
+   %e12 = extractelement <4 x float> %res12, i32 0
+   %e13 = extractelement <4 x float> %res13, i32 0
+   %e14 = extractelement <4 x float> %res14, i32 0
+   %e15 = extractelement <4 x float> %res15, i32 0
+   %e16 = extractelement <4 x float> %res16, i32 0
+   %s1 = fadd float %e1, %e2
+   %s2 = fadd float %s1, %e3
+   %s3 = fadd float %s2, %e4
+   %s4 = fadd float %s3, %e5
+   %s5 = fadd float %s4, %e6
+   %s6 = fadd float %s5, %e7
+   %s7 = fadd float %s6, %e8
+   %s8 = fadd float %s7, %e9
+   %s9 = fadd float %s8, %e10
+   %s10 = fadd float %s9, %e11
+   %s11 = fadd float %s10, %e12
+   %s12 = fadd float %s11, %e13
+   %s13 = fadd float %s12, %e14
+   %s14 = fadd float %s13, %e15
+   %s15 = fadd float %s14, %e16
+   call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %s15, float %s15, float %s15, float %s15)
+   ret void
+}
+
+declare <4 x float> @llvm.SI.sample.(i32, <4 x i32>, <8 x i32>, <4 x i32>, i32) readnone
+
+declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)
diff --git a/test/CodeGen/R600/llvm.cos.ll b/test/CodeGen/R600/llvm.cos.ll
new file mode 100644
index 000000000000..dc120bfb00c2
--- /dev/null
+++ b/test/CodeGen/R600/llvm.cos.ll
@@ -0,0 +1,16 @@
+;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s
+
+;CHECK: COS T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
+
+define void @test() {
+   %r0 = call float @llvm.R600.load.input(i32 0)
+   %r1 = call float @llvm.cos.f32(float %r0)
+   call void @llvm.AMDGPU.store.output(float %r1, i32 0)
+   ret void
+}
+
+declare float @llvm.cos.f32(float) readnone
+
+declare float @llvm.R600.load.input(i32) readnone
+
+declare void @llvm.AMDGPU.store.output(float, i32)
diff --git a/test/CodeGen/R600/llvm.pow.ll b/test/CodeGen/R600/llvm.pow.ll
new file mode 100644
index 000000000000..b4ce9f429f16
--- /dev/null
+++ b/test/CodeGen/R600/llvm.pow.ll
@@ -0,0 +1,19 @@
+;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s
+
+;CHECK: LOG_IEEE T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
+;CHECK-NEXT: MUL NON-IEEE T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
+;CHECK-NEXT: EXP_IEEE T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
+
+define void @test() {
+   %r0 = call float @llvm.R600.load.input(i32 0)
+   %r1 = call float @llvm.R600.load.input(i32 1)
+   %r2 = call float @llvm.pow.f32( float %r0, float %r1)
+   call void @llvm.AMDGPU.store.output(float %r2, i32 0)
+   ret void
+}
+
+declare float @llvm.R600.load.input(i32) readnone
+
+declare void @llvm.AMDGPU.store.output(float, i32)
+
+declare float @llvm.pow.f32(float ,float ) readonly
diff --git a/test/CodeGen/R600/llvm.sin.ll b/test/CodeGen/R600/llvm.sin.ll
new file mode 100644
index 000000000000..5cd6998c9370
--- /dev/null
+++ b/test/CodeGen/R600/llvm.sin.ll
@@ -0,0 +1,16 @@
+;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s
+
+;CHECK: SIN T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
+
+define void @test() {
+   %r0 = call float @llvm.R600.load.input(i32 0)
+   %r1 = call float @llvm.sin.f32( float %r0)
+   call void @llvm.AMDGPU.store.output(float %r1, i32 0)
+   ret void
+}
+
+declare float @llvm.sin.f32(float) readnone
+
+declare float @llvm.R600.load.input(i32) readnone
+
+declare void @llvm.AMDGPU.store.output(float, i32)
diff --git a/test/CodeGen/R600/load.constant_addrspace.f32.ll b/test/CodeGen/R600/load.constant_addrspace.f32.ll
new file mode 100644
index 000000000000..93627283bb94
--- /dev/null
+++ b/test/CodeGen/R600/load.constant_addrspace.f32.ll
@@ -0,0 +1,9 @@
+;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s
+
+;CHECK: VTX_READ_32 T{{[0-9]+\.X, T[0-9]+\.X}}
+
+define void @test(float addrspace(1)* %out, float addrspace(2)* %in) {
+  %1 = load float addrspace(2)* %in
+  store float %1, float addrspace(1)* %out
+  ret void
+}
diff --git a/test/CodeGen/R600/load.i8.ll b/test/CodeGen/R600/load.i8.ll
new file mode 100644
index 000000000000..b070dcd52049
--- /dev/null
+++ b/test/CodeGen/R600/load.i8.ll
@@ -0,0 +1,10 @@
+;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s
+
+;CHECK: VTX_READ_8 T{{[0-9]+\.X, T[0-9]+\.X}}
+
+define void @test(i32 addrspace(1)* %out, i8 addrspace(1)* %in) {
+  %1 = load i8 addrspace(1)* %in
+  %2 = zext i8 %1 to i32
+  store i32 %2, i32 addrspace(1)* %out
+  ret void
+}
diff --git a/test/CodeGen/R600/lshl.ll b/test/CodeGen/R600/lshl.ll
new file mode 100644
index 000000000000..423adb9da900
--- /dev/null
+++ b/test/CodeGen/R600/lshl.ll
@@ -0,0 +1,14 @@
+;RUN: llc < %s -march=r600 -mcpu=SI | FileCheck %s
+
+;CHECK: V_LSHLREV_B32_e32 VGPR0, 1, VGPR0
+
+define void @test(i32 %p) {
+   %i = mul i32 %p, 2
+   %r = bitcast i32 %i to float
+   call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %r, float %r, float %r, float %r)
+   ret void
+}
+
+declare <4 x float> @llvm.SI.sample.(i32, <4 x i32>, <8 x i32>, <4 x i32>, i32) readnone
+
+declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)
diff --git a/test/CodeGen/R600/lshr.ll b/test/CodeGen/R600/lshr.ll
new file mode 100644
index 000000000000..551eac1d76bf
--- /dev/null
+++ b/test/CodeGen/R600/lshr.ll
@@ -0,0 +1,14 @@
+;RUN: llc < %s -march=r600 -mcpu=SI | FileCheck %s
+
+;CHECK: V_LSHRREV_B32_e32 VGPR0, 1, VGPR0
+
+define void @test(i32 %p) {
+   %i = udiv i32 %p, 2
+   %r = bitcast i32 %i to float
+   call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %r, float %r, float %r, float %r)
+   ret void
+}
+
+declare <4 x float> @llvm.SI.sample.(i32, <4 x i32>, <8 x i32>, <4 x i32>, i32) readnone
+
+declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)
diff --git a/test/CodeGen/R600/mulhu.ll b/test/CodeGen/R600/mulhu.ll
new file mode 100644
index 000000000000..28744e00c3cf
--- /dev/null
+++ b/test/CodeGen/R600/mulhu.ll
@@ -0,0 +1,16 @@
+;RUN: llc < %s -march=r600 -mcpu=SI | FileCheck %s
+
+;CHECK: V_MOV_B32_e32 VGPR1, -1431655765
+;CHECK-NEXT: V_MUL_HI_U32 VGPR0, VGPR0, VGPR1, 0, 0, 0, 0, 0
+;CHECK-NEXT: V_LSHRREV_B32_e32 VGPR0, 1, VGPR0
+
+define void @test(i32 %p) {
+   %i = udiv i32 %p, 3
+   %r = bitcast i32 %i to float
+   call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %r, float %r, float %r, float %r)
+   ret void
+}
+
+declare <4 x float> @llvm.SI.sample.(i32, <4 x i32>, <8 x i32>, <4 x i32>, i32) readnone
+
+declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)
diff --git a/test/CodeGen/R600/predicates.ll b/test/CodeGen/R600/predicates.ll
new file mode 100644
index 000000000000..eb8b052b6f72
--- /dev/null
+++ b/test/CodeGen/R600/predicates.ll
@@ -0,0 +1,104 @@
+; RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s
+
+; These tests make sure the compiler is optimizing branches using predicates
+; when it is legal to do so.
+
+; CHECK: @simple_if
+; CHECK: PRED_SET{{[EGN][ET]*}}_INT Pred,
+; CHECK: LSHL T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}, 1, 0(0.000000e+00) Pred_sel
+define void @simple_if(i32 addrspace(1)* %out, i32 %in) {
+entry:
+  %0 = icmp sgt i32 %in, 0
+  br i1 %0, label %IF, label %ENDIF
+
+IF:
+  %1 = shl i32 %in, 1
+  br label %ENDIF
+
+ENDIF:
+  %2 = phi i32 [ %in, %entry ], [ %1, %IF ]
+  store i32 %2, i32 addrspace(1)* %out
+  ret void
+}
+
+; CHECK: @simple_if_else
+; CHECK: PRED_SET{{[EGN][ET]*}}_INT Pred,
+; CHECK: LSH{{[LR] T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}, 1, 0(0.000000e+00) Pred_sel
+; CHECK: LSH{{[LR] T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}, 1, 0(0.000000e+00) Pred_sel
+define void @simple_if_else(i32 addrspace(1)* %out, i32 %in) {
+entry:
+  %0 = icmp sgt i32 %in, 0
+  br i1 %0, label %IF, label %ELSE
+
+IF:
+  %1 = shl i32 %in, 1
+  br label %ENDIF
+
+ELSE:
+  %2 = lshr i32 %in, 1
+  br label %ENDIF
+
+ENDIF:
+  %3 = phi i32 [ %1, %IF ], [ %2, %ELSE ]
+  store i32 %3, i32 addrspace(1)* %out
+  ret void
+}
+
+; CHECK: @nested_if
+; CHECK: ALU_PUSH_BEFORE
+; CHECK: PRED_SET{{[EGN][ET]*}}_INT Exec
+; CHECK: JUMP
+; CHECK: PRED_SET{{[EGN][ET]*}}_INT Pred,
+; CHECK: LSHL T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}, 1, 0(0.000000e+00) Pred_sel
+; CHECK: POP
+define void @nested_if(i32 addrspace(1)* %out, i32 %in) {
+entry:
+  %0 = icmp sgt i32 %in, 0
+  br i1 %0, label %IF0, label %ENDIF
+
+IF0:
+  %1 = add i32 %in, 10
+  %2 = icmp sgt i32 %1, 0
+  br i1 %2, label %IF1, label %ENDIF
+
+IF1:
+  %3 = shl i32  %1, 1
+  br label %ENDIF
+
+ENDIF:
+  %4 = phi i32 [%in, %entry], [%1, %IF0], [%3, %IF1]
+  store i32 %4, i32 addrspace(1)* %out
+  ret void
+}
+
+; CHECK: @nested_if_else
+; CHECK: ALU_PUSH_BEFORE
+; CHECK: PRED_SET{{[EGN][ET]*}}_INT Exec
+; CHECK: JUMP
+; CHECK: PRED_SET{{[EGN][ET]*}}_INT Pred,
+; CHECK: LSH{{[LR] T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}, 1, 0(0.000000e+00) Pred_sel
+; CHECK: LSH{{[LR] T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}, 1, 0(0.000000e+00) Pred_sel
+; CHECK: POP
+define void @nested_if_else(i32 addrspace(1)* %out, i32 %in) {
+entry:
+  %0 = icmp sgt i32 %in, 0
+  br i1 %0, label %IF0, label %ENDIF
+
+IF0:
+  %1 = add i32 %in, 10
+  %2 = icmp sgt i32 %1, 0
+  br i1 %2, label %IF1, label %ELSE1
+
+IF1:
+  %3 = shl i32  %1, 1
+  br label %ENDIF
+
+ELSE1:
+  %4 = lshr i32 %in, 1
+  br label %ENDIF
+
+ENDIF:
+  %5 = phi i32 [%in, %entry], [%3, %IF1], [%4, %ELSE1]
+  store i32 %5, i32 addrspace(1)* %out
+  ret void
+}
diff --git a/test/CodeGen/R600/reciprocal.ll b/test/CodeGen/R600/reciprocal.ll
new file mode 100644
index 000000000000..6838c1ae3662
--- /dev/null
+++ b/test/CodeGen/R600/reciprocal.ll
@@ -0,0 +1,16 @@
+;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s
+
+;CHECK: RECIP_IEEE T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
+
+define void @test() {
+   %r0 = call float @llvm.R600.load.input(i32 0)
+   %r1 = fdiv float 1.0, %r0
+   call void @llvm.AMDGPU.store.output(float %r1, i32 0)
+   ret void
+}
+
+declare float @llvm.R600.load.input(i32) readnone
+
+declare void @llvm.AMDGPU.store.output(float, i32)
+
+declare float @llvm.AMDGPU.rcp(float ) readnone
diff --git a/test/CodeGen/R600/schedule-fs-loop-nested-if.ll b/test/CodeGen/R600/schedule-fs-loop-nested-if.ll
new file mode 100644
index 000000000000..ba9620c40a49
--- /dev/null
+++ b/test/CodeGen/R600/schedule-fs-loop-nested-if.ll
@@ -0,0 +1,83 @@
+;RUN: llc < %s -march=r600 -mcpu=cayman -stress-sched -verify-misched
+;REQUIRES: asserts
+
+define void @main() {
+main_body:
+  %0 = call float @llvm.R600.interp.input(i32 0, i32 0)
+  %1 = call float @llvm.R600.interp.input(i32 1, i32 0)
+  %2 = call float @llvm.R600.interp.input(i32 2, i32 0)
+  %3 = call float @llvm.R600.interp.input(i32 3, i32 0)
+  %4 = fcmp ult float %1, 0.000000e+00
+  %5 = select i1 %4, float 1.000000e+00, float 0.000000e+00
+  %6 = fsub float -0.000000e+00, %5
+  %7 = fptosi float %6 to i32
+  %8 = bitcast i32 %7 to float
+  %9 = fcmp ult float %0, 5.700000e+01
+  %10 = select i1 %9, float 1.000000e+00, float 0.000000e+00
+  %11 = fsub float -0.000000e+00, %10
+  %12 = fptosi float %11 to i32
+  %13 = bitcast i32 %12 to float
+  %14 = bitcast float %8 to i32
+  %15 = bitcast float %13 to i32
+  %16 = and i32 %14, %15
+  %17 = bitcast i32 %16 to float
+  %18 = bitcast float %17 to i32
+  %19 = icmp ne i32 %18, 0
+  %20 = fcmp ult float %0, 0.000000e+00
+  %21 = select i1 %20, float 1.000000e+00, float 0.000000e+00
+  %22 = fsub float -0.000000e+00, %21
+  %23 = fptosi float %22 to i32
+  %24 = bitcast i32 %23 to float
+  %25 = bitcast float %24 to i32
+  %26 = icmp ne i32 %25, 0
+  br i1 %19, label %IF, label %ELSE
+
+IF:                                               ; preds = %main_body
+  %. = select i1 %26, float 0.000000e+00, float 1.000000e+00
+  %.18 = select i1 %26, float 1.000000e+00, float 0.000000e+00
+  br label %ENDIF
+
+ELSE:                                             ; preds = %main_body
+  br i1 %26, label %ENDIF, label %ELSE17
+
+ENDIF:                                            ; preds = %ELSE17, %ELSE, %IF
+  %temp1.0 = phi float [ %., %IF ], [ %48, %ELSE17 ], [ 0.000000e+00, %ELSE ]
+  %temp2.0 = phi float [ 0.000000e+00, %IF ], [ %49, %ELSE17 ], [ 1.000000e+00, %ELSE ]
+  %temp.0 = phi float [ %.18, %IF ], [ %47, %ELSE17 ], [ 0.000000e+00, %ELSE ]
+  %27 = call float @llvm.AMDIL.clamp.(float %temp.0, float 0.000000e+00, float 1.000000e+00)
+  %28 = call float @llvm.AMDIL.clamp.(float %temp1.0, float 0.000000e+00, float 1.000000e+00)
+  %29 = call float @llvm.AMDIL.clamp.(float %temp2.0, float 0.000000e+00, float 1.000000e+00)
+  %30 = call float @llvm.AMDIL.clamp.(float 1.000000e+00, float 0.000000e+00, float 1.000000e+00)
+  %31 = insertelement <4 x float> undef, float %27, i32 0
+  %32 = insertelement <4 x float> %31, float %28, i32 1
+  %33 = insertelement <4 x float> %32, float %29, i32 2
+  %34 = insertelement <4 x float> %33, float %30, i32 3
+  call void @llvm.R600.store.swizzle(<4 x float> %34, i32 0, i32 0)
+  ret void
+
+ELSE17:                                           ; preds = %ELSE
+  %35 = fadd float 0.000000e+00, 0x3FC99999A0000000
+  %36 = fadd float 0.000000e+00, 0x3FC99999A0000000
+  %37 = fadd float 0.000000e+00, 0x3FC99999A0000000
+  %38 = fadd float %35, 0x3FC99999A0000000
+  %39 = fadd float %36, 0x3FC99999A0000000
+  %40 = fadd float %37, 0x3FC99999A0000000
+  %41 = fadd float %38, 0x3FC99999A0000000
+  %42 = fadd float %39, 0x3FC99999A0000000
+  %43 = fadd float %40, 0x3FC99999A0000000
+  %44 = fadd float %41, 0x3FC99999A0000000
+  %45 = fadd float %42, 0x3FC99999A0000000
+  %46 = fadd float %43, 0x3FC99999A0000000
+  %47 = fadd float %44, 0x3FC99999A0000000
+  %48 = fadd float %45, 0x3FC99999A0000000
+  %49 = fadd float %46, 0x3FC99999A0000000
+  br label %ENDIF
+}
+
+declare float @llvm.R600.interp.input(i32, i32) #0
+
+declare float @llvm.AMDIL.clamp.(float, float, float) #0
+
+declare void @llvm.R600.store.swizzle(<4 x float>, i32, i32)
+
+attributes #0 = { readnone }
diff --git a/test/CodeGen/R600/schedule-fs-loop-nested.ll b/test/CodeGen/R600/schedule-fs-loop-nested.ll
new file mode 100644
index 000000000000..5e875c49ab51
--- /dev/null
+++ b/test/CodeGen/R600/schedule-fs-loop-nested.ll
@@ -0,0 +1,88 @@
+;RUN: llc < %s -march=r600 -mcpu=cayman -stress-sched -verify-misched
+;REQUIRES: asserts
+
+define void @main() {
+main_body:
+  %0 = load <4 x float> addrspace(9)* null
+  %1 = extractelement <4 x float> %0, i32 3
+  %2 = fptosi float %1 to i32
+  %3 = bitcast i32 %2 to float
+  %4 = bitcast float %3 to i32
+  %5 = sdiv i32 %4, 4
+  %6 = bitcast i32 %5 to float
+  %7 = bitcast float %6 to i32
+  %8 = mul i32 %7, 4
+  %9 = bitcast i32 %8 to float
+  %10 = bitcast float %9 to i32
+  %11 = sub i32 0, %10
+  %12 = bitcast i32 %11 to float
+  %13 = bitcast float %3 to i32
+  %14 = bitcast float %12 to i32
+  %15 = add i32 %13, %14
+  %16 = bitcast i32 %15 to float
+  %17 = load <4 x float> addrspace(9)* null
+  %18 = extractelement <4 x float> %17, i32 0
+  %19 = load <4 x float> addrspace(9)* null
+  %20 = extractelement <4 x float> %19, i32 1
+  %21 = load <4 x float> addrspace(9)* null
+  %22 = extractelement <4 x float> %21, i32 2
+  br label %LOOP
+
+LOOP:                                             ; preds = %IF31, %main_body
+  %temp12.0 = phi float [ 0.000000e+00, %main_body ], [ %47, %IF31 ]
+  %temp6.0 = phi float [ %22, %main_body ], [ %temp6.1, %IF31 ]
+  %temp5.0 = phi float [ %20, %main_body ], [ %temp5.1, %IF31 ]
+  %temp4.0 = phi float [ %18, %main_body ], [ %temp4.1, %IF31 ]
+  %23 = bitcast float %temp12.0 to i32
+  %24 = bitcast float %6 to i32
+  %25 = icmp sge i32 %23, %24
+  %26 = sext i1 %25 to i32
+  %27 = bitcast i32 %26 to float
+  %28 = bitcast float %27 to i32
+  %29 = icmp ne i32 %28, 0
+  br i1 %29, label %IF, label %LOOP29
+
+IF:                                               ; preds = %LOOP
+  %30 = call float @llvm.AMDIL.clamp.(float %temp4.0, float 0.000000e+00, float 1.000000e+00)
+  %31 = call float @llvm.AMDIL.clamp.(float %temp5.0, float 0.000000e+00, float 1.000000e+00)
+  %32 = call float @llvm.AMDIL.clamp.(float %temp6.0, float 0.000000e+00, float 1.000000e+00)
+  %33 = call float @llvm.AMDIL.clamp.(float 1.000000e+00, float 0.000000e+00, float 1.000000e+00)
+  %34 = insertelement <4 x float> undef, float %30, i32 0
+  %35 = insertelement <4 x float> %34, float %31, i32 1
+  %36 = insertelement <4 x float> %35, float %32, i32 2
+  %37 = insertelement <4 x float> %36, float %33, i32 3
+  call void @llvm.R600.store.swizzle(<4 x float> %37, i32 0, i32 0)
+  ret void
+
+LOOP29:                                           ; preds = %LOOP, %ENDIF30
+  %temp6.1 = phi float [ %temp4.1, %ENDIF30 ], [ %temp6.0, %LOOP ]
+  %temp5.1 = phi float [ %temp6.1, %ENDIF30 ], [ %temp5.0, %LOOP ]
+  %temp4.1 = phi float [ %temp5.1, %ENDIF30 ], [ %temp4.0, %LOOP ]
+  %temp20.0 = phi float [ %50, %ENDIF30 ], [ 0.000000e+00, %LOOP ]
+  %38 = bitcast float %temp20.0 to i32
+  %39 = bitcast float %16 to i32
+  %40 = icmp sge i32 %38, %39
+  %41 = sext i1 %40 to i32
+  %42 = bitcast i32 %41 to float
+  %43 = bitcast float %42 to i32
+  %44 = icmp ne i32 %43, 0
+  br i1 %44, label %IF31, label %ENDIF30
+
+IF31:                                             ; preds = %LOOP29
+  %45 = bitcast float %temp12.0 to i32
+  %46 = add i32 %45, 1
+  %47 = bitcast i32 %46 to float
+  br label %LOOP
+
+ENDIF30:                                          ; preds = %LOOP29
+  %48 = bitcast float %temp20.0 to i32
+  %49 = add i32 %48, 1
+  %50 = bitcast i32 %49 to float
+  br label %LOOP29
+}
+
+declare float @llvm.AMDIL.clamp.(float, float, float) #0
+
+declare void @llvm.R600.store.swizzle(<4 x float>, i32, i32)
+
+attributes #0 = { readnone }
diff --git a/test/CodeGen/R600/schedule-fs-loop.ll b/test/CodeGen/R600/schedule-fs-loop.ll
new file mode 100644
index 000000000000..d142cacd4335
--- /dev/null
+++ b/test/CodeGen/R600/schedule-fs-loop.ll
@@ -0,0 +1,55 @@
+;RUN: llc < %s -march=r600 -mcpu=cayman -stress-sched -verify-misched
+;REQUIRES: asserts
+
+define void @main() {
+main_body:
+  %0 = load <4 x float> addrspace(9)* null
+  %1 = extractelement <4 x float> %0, i32 3
+  %2 = fptosi float %1 to i32
+  %3 = bitcast i32 %2 to float
+  %4 = load <4 x float> addrspace(9)* null
+  %5 = extractelement <4 x float> %4, i32 0
+  %6 = load <4 x float> addrspace(9)* null
+  %7 = extractelement <4 x float> %6, i32 1
+  %8 = load <4 x float> addrspace(9)* null
+  %9 = extractelement <4 x float> %8, i32 2
+  br label %LOOP
+
+LOOP:                                             ; preds = %ENDIF, %main_body
+  %temp4.0 = phi float [ %5, %main_body ], [ %temp5.0, %ENDIF ]
+  %temp5.0 = phi float [ %7, %main_body ], [ %temp6.0, %ENDIF ]
+  %temp6.0 = phi float [ %9, %main_body ], [ %temp4.0, %ENDIF ]
+  %temp8.0 = phi float [ 0.000000e+00, %main_body ], [ %27, %ENDIF ]
+  %10 = bitcast float %temp8.0 to i32
+  %11 = bitcast float %3 to i32
+  %12 = icmp sge i32 %10, %11
+  %13 = sext i1 %12 to i32
+  %14 = bitcast i32 %13 to float
+  %15 = bitcast float %14 to i32
+  %16 = icmp ne i32 %15, 0
+  br i1 %16, label %IF, label %ENDIF
+
+IF:                                               ; preds = %LOOP
+  %17 = call float @llvm.AMDIL.clamp.(float %temp4.0, float 0.000000e+00, float 1.000000e+00)
+  %18 = call float @llvm.AMDIL.clamp.(float %temp5.0, float 0.000000e+00, float 1.000000e+00)
+  %19 = call float @llvm.AMDIL.clamp.(float %temp6.0, float 0.000000e+00, float 1.000000e+00)
+  %20 = call float @llvm.AMDIL.clamp.(float 1.000000e+00, float 0.000000e+00, float 1.000000e+00)
+  %21 = insertelement <4 x float> undef, float %17, i32 0
+  %22 = insertelement <4 x float> %21, float %18, i32 1
+  %23 = insertelement <4 x float> %22, float %19, i32 2
+  %24 = insertelement <4 x float> %23, float %20, i32 3
+  call void @llvm.R600.store.swizzle(<4 x float> %24, i32 0, i32 0)
+  ret void
+
+ENDIF:                                            ; preds = %LOOP
+  %25 = bitcast float %temp8.0 to i32
+  %26 = add i32 %25, 1
+  %27 = bitcast i32 %26 to float
+  br label %LOOP
+}
+
+declare float @llvm.AMDIL.clamp.(float, float, float) #0
+
+declare void @llvm.R600.store.swizzle(<4 x float>, i32, i32)
+
+attributes #0 = { readnone }
diff --git a/test/CodeGen/R600/schedule-if-2.ll b/test/CodeGen/R600/schedule-if-2.ll
new file mode 100644
index 000000000000..6afd6772926b
--- /dev/null
+++ b/test/CodeGen/R600/schedule-if-2.ll
@@ -0,0 +1,94 @@
+;RUN: llc < %s -march=r600 -mcpu=cayman -stress-sched -verify-misched
+;REQUIRES: asserts
+
+define void @main() {
+main_body:
+  %0 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 2)
+  %1 = extractelement <4 x float> %0, i32 0
+  %2 = fadd float 1.000000e+03, %1
+  %3 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 1)
+  %4 = extractelement <4 x float> %3, i32 0
+  %5 = bitcast float %4 to i32
+  %6 = icmp eq i32 %5, 0
+  %7 = sext i1 %6 to i32
+  %8 = bitcast i32 %7 to float
+  %9 = bitcast float %8 to i32
+  %10 = icmp ne i32 %9, 0
+  br i1 %10, label %IF, label %ELSE
+
+IF:                                               ; preds = %main_body
+  %11 = call float @fabs(float %2)
+  %12 = fcmp ueq float %11, 0x7FF0000000000000
+  %13 = select i1 %12, float 1.000000e+00, float 0.000000e+00
+  %14 = fsub float -0.000000e+00, %13
+  %15 = fptosi float %14 to i32
+  %16 = bitcast i32 %15 to float
+  %17 = bitcast float %16 to i32
+  %18 = icmp ne i32 %17, 0
+  %. = select i1 %18, float 0x36A0000000000000, float 0.000000e+00
+  %19 = fcmp une float %2, %2
+  %20 = select i1 %19, float 1.000000e+00, float 0.000000e+00
+  %21 = fsub float -0.000000e+00, %20
+  %22 = fptosi float %21 to i32
+  %23 = bitcast i32 %22 to float
+  %24 = bitcast float %23 to i32
+  %25 = icmp ne i32 %24, 0
+  %temp8.0 = select i1 %25, float 0x36A0000000000000, float 0.000000e+00
+  %26 = bitcast float %. to i32
+  %27 = sitofp i32 %26 to float
+  %28 = bitcast float %temp8.0 to i32
+  %29 = sitofp i32 %28 to float
+  %30 = fcmp ugt float %2, 0.000000e+00
+  %31 = select i1 %30, float 1.000000e+00, float %2
+  %32 = fcmp uge float %31, 0.000000e+00
+  %33 = select i1 %32, float %31, float -1.000000e+00
+  %34 = fadd float %33, 1.000000e+00
+  %35 = fmul float %34, 5.000000e-01
+  br label %ENDIF
+
+ELSE:                                             ; preds = %main_body
+  %36 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 1)
+  %37 = extractelement <4 x float> %36, i32 0
+  %38 = bitcast float %37 to i32
+  %39 = icmp eq i32 %38, 1
+  %40 = sext i1 %39 to i32
+  %41 = bitcast i32 %40 to float
+  %42 = bitcast float %41 to i32
+  %43 = icmp ne i32 %42, 0
+  br i1 %43, label %IF23, label %ENDIF
+
+ENDIF:                                            ; preds = %IF23, %ELSE, %IF
+  %temp4.0 = phi float [ %2, %IF ], [ %56, %IF23 ], [ 0.000000e+00, %ELSE ]
+  %temp5.0 = phi float [ %27, %IF ], [ %60, %IF23 ], [ 0.000000e+00, %ELSE ]
+  %temp6.0 = phi float [ %29, %IF ], [ 0.000000e+00, %ELSE ], [ 0.000000e+00, %IF23 ]
+  %temp7.0 = phi float [ %35, %IF ], [ 0.000000e+00, %ELSE ], [ 0.000000e+00, %IF23 ]
+  %44 = insertelement <4 x float> undef, float %temp4.0, i32 0
+  %45 = insertelement <4 x float> %44, float %temp5.0, i32 1
+  %46 = insertelement <4 x float> %45, float %temp6.0, i32 2
+  %47 = insertelement <4 x float> %46, float %temp7.0, i32 3
+  call void @llvm.R600.store.swizzle(<4 x float> %47, i32 0, i32 0)
+  ret void
+
+IF23:                                             ; preds = %ELSE
+  %48 = fcmp ult float 0.000000e+00, %2
+  %49 = select i1 %48, float 1.000000e+00, float 0.000000e+00
+  %50 = fsub float -0.000000e+00, %49
+  %51 = fptosi float %50 to i32
+  %52 = bitcast i32 %51 to float
+  %53 = bitcast float %52 to i32
+  %54 = icmp ne i32 %53, 0
+  %.28 = select i1 %54, float 0x36A0000000000000, float 0.000000e+00
+  %55 = bitcast float %.28 to i32
+  %56 = sitofp i32 %55 to float
+  %57 = load <4 x float> addrspace(8)* null
+  %58 = extractelement <4 x float> %57, i32 0
+  %59 = fsub float -0.000000e+00, %58
+  %60 = fadd float %2, %59
+  br label %ENDIF
+}
+
+declare float @fabs(float) #0
+
+declare void @llvm.R600.store.swizzle(<4 x float>, i32, i32)
+
+attributes #0 = { readonly }
diff --git a/test/CodeGen/R600/schedule-if.ll b/test/CodeGen/R600/schedule-if.ll
new file mode 100644
index 000000000000..347d92fd6a0e
--- /dev/null
+++ b/test/CodeGen/R600/schedule-if.ll
@@ -0,0 +1,46 @@
+;RUN: llc < %s -march=r600 -mcpu=cayman -stress-sched -verify-misched
+;REQUIRES: asserts
+
+define void @main() {
+main_body:
+  %0 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 1)
+  %1 = extractelement <4 x float> %0, i32 0
+  %2 = bitcast float %1 to i32
+  %3 = icmp eq i32 %2, 0
+  %4 = sext i1 %3 to i32
+  %5 = bitcast i32 %4 to float
+  %6 = bitcast float %5 to i32
+  %7 = icmp ne i32 %6, 0
+  br i1 %7, label %ENDIF, label %ELSE
+
+ELSE:                                             ; preds = %main_body
+  %8 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 1)
+  %9 = extractelement <4 x float> %8, i32 0
+  %10 = bitcast float %9 to i32
+  %11 = icmp eq i32 %10, 1
+  %12 = sext i1 %11 to i32
+  %13 = bitcast i32 %12 to float
+  %14 = bitcast float %13 to i32
+  %15 = icmp ne i32 %14, 0
+  br i1 %15, label %IF13, label %ENDIF
+
+ENDIF:                                            ; preds = %IF13, %ELSE, %main_body
+  %temp.0 = phi float [ 1.000000e+03, %main_body ], [ 1.000000e+00, %IF13 ], [ 0.000000e+00, %ELSE ]
+  %temp1.0 = phi float [ 0.000000e+00, %main_body ], [ %23, %IF13 ], [ 0.000000e+00, %ELSE ]
+  %temp3.0 = phi float [ 1.000000e+00, %main_body ], [ 0.000000e+00, %ELSE ], [ 0.000000e+00, %IF13 ]
+  %16 = insertelement <4 x float> undef, float %temp.0, i32 0
+  %17 = insertelement <4 x float> %16, float %temp1.0, i32 1
+  %18 = insertelement <4 x float> %17, float 0.000000e+00, i32 2
+  %19 = insertelement <4 x float> %18, float %temp3.0, i32 3
+  call void @llvm.R600.store.swizzle(<4 x float> %19, i32 0, i32 0)
+  ret void
+
+IF13:                                             ; preds = %ELSE
+  %20 = load <4 x float> addrspace(8)* null
+  %21 = extractelement <4 x float> %20, i32 0
+  %22 = fsub float -0.000000e+00, %21
+  %23 = fadd float 1.000000e+03, %22
+  br label %ENDIF
+}
+
+declare void @llvm.R600.store.swizzle(<4 x float>, i32, i32)
diff --git a/test/CodeGen/R600/schedule-vs-if-nested-loop.ll b/test/CodeGen/R600/schedule-vs-if-nested-loop.ll
new file mode 100644
index 000000000000..44b7c2f68002
--- /dev/null
+++ b/test/CodeGen/R600/schedule-vs-if-nested-loop.ll
@@ -0,0 +1,134 @@
+;RUN: llc < %s -march=r600 -mcpu=cayman -stress-sched -verify-misched
+;REQUIRES: asserts
+
+define void @main() {
+main_body:
+  %0 = call float @llvm.R600.load.input(i32 4)
+  %1 = call float @llvm.R600.load.input(i32 5)
+  %2 = call float @llvm.R600.load.input(i32 6)
+  %3 = call float @llvm.R600.load.input(i32 7)
+  %4 = fcmp ult float %0, 0.000000e+00
+  %5 = select i1 %4, float 1.000000e+00, float 0.000000e+00
+  %6 = fsub float -0.000000e+00, %5
+  %7 = fptosi float %6 to i32
+  %8 = bitcast i32 %7 to float
+  %9 = bitcast float %8 to i32
+  %10 = icmp ne i32 %9, 0
+  br i1 %10, label %LOOP, label %ENDIF
+
+ENDIF:                                            ; preds = %ENDIF16, %LOOP, %main_body
+  %temp.0 = phi float [ 0.000000e+00, %main_body ], [ %temp.1, %LOOP ], [ %temp.1, %ENDIF16 ]
+  %temp1.0 = phi float [ 1.000000e+00, %main_body ], [ %temp1.1, %LOOP ], [ %temp1.1, %ENDIF16 ]
+  %temp2.0 = phi float [ 0.000000e+00, %main_body ], [ %temp2.1, %LOOP ], [ %temp2.1, %ENDIF16 ]
+  %temp3.0 = phi float [ 0.000000e+00, %main_body ], [ %temp3.1, %LOOP ], [ %temp3.1, %ENDIF16 ]
+  %11 = load <4 x float> addrspace(9)* null
+  %12 = extractelement <4 x float> %11, i32 0
+  %13 = fmul float %12, %0
+  %14 = load <4 x float> addrspace(9)* null
+  %15 = extractelement <4 x float> %14, i32 1
+  %16 = fmul float %15, %0
+  %17 = load <4 x float> addrspace(9)* null
+  %18 = extractelement <4 x float> %17, i32 2
+  %19 = fmul float %18, %0
+  %20 = load <4 x float> addrspace(9)* null
+  %21 = extractelement <4 x float> %20, i32 3
+  %22 = fmul float %21, %0
+  %23 = load <4 x float> addrspace(9)* getelementptr ([1024 x <4 x float>] addrspace(9)* null, i64 0, i32 1)
+  %24 = extractelement <4 x float> %23, i32 0
+  %25 = fmul float %24, %1
+  %26 = fadd float %25, %13
+  %27 = load <4 x float> addrspace(9)* getelementptr ([1024 x <4 x float>] addrspace(9)* null, i64 0, i32 1)
+  %28 = extractelement <4 x float> %27, i32 1
+  %29 = fmul float %28, %1
+  %30 = fadd float %29, %16
+  %31 = load <4 x float> addrspace(9)* getelementptr ([1024 x <4 x float>] addrspace(9)* null, i64 0, i32 1)
+  %32 = extractelement <4 x float> %31, i32 2
+  %33 = fmul float %32, %1
+  %34 = fadd float %33, %19
+  %35 = load <4 x float> addrspace(9)* getelementptr ([1024 x <4 x float>] addrspace(9)* null, i64 0, i32 1)
+  %36 = extractelement <4 x float> %35, i32 3
+  %37 = fmul float %36, %1
+  %38 = fadd float %37, %22
+  %39 = load <4 x float> addrspace(9)* getelementptr ([1024 x <4 x float>] addrspace(9)* null, i64 0, i32 2)
+  %40 = extractelement <4 x float> %39, i32 0
+  %41 = fmul float %40, %2
+  %42 = fadd float %41, %26
+  %43 = load <4 x float> addrspace(9)* getelementptr ([1024 x <4 x float>] addrspace(9)* null, i64 0, i32 2)
+  %44 = extractelement <4 x float> %43, i32 1
+  %45 = fmul float %44, %2
+  %46 = fadd float %45, %30
+  %47 = load <4 x float> addrspace(9)* getelementptr ([1024 x <4 x float>] addrspace(9)* null, i64 0, i32 2)
+  %48 = extractelement <4 x float> %47, i32 2
+  %49 = fmul float %48, %2
+  %50 = fadd float %49, %34
+  %51 = load <4 x float> addrspace(9)* getelementptr ([1024 x <4 x float>] addrspace(9)* null, i64 0, i32 2)
+  %52 = extractelement <4 x float> %51, i32 3
+  %53 = fmul float %52, %2
+  %54 = fadd float %53, %38
+  %55 = load <4 x float> addrspace(9)* getelementptr ([1024 x <4 x float>] addrspace(9)* null, i64 0, i32 3)
+  %56 = extractelement <4 x float> %55, i32 0
+  %57 = fmul float %56, %3
+  %58 = fadd float %57, %42
+  %59 = load <4 x float> addrspace(9)* getelementptr ([1024 x <4 x float>] addrspace(9)* null, i64 0, i32 3)
+  %60 = extractelement <4 x float> %59, i32 1
+  %61 = fmul float %60, %3
+  %62 = fadd float %61, %46
+  %63 = load <4 x float> addrspace(9)* getelementptr ([1024 x <4 x float>] addrspace(9)* null, i64 0, i32 3)
+  %64 = extractelement <4 x float> %63, i32 2
+  %65 = fmul float %64, %3
+  %66 = fadd float %65, %50
+  %67 = load <4 x float> addrspace(9)* getelementptr ([1024 x <4 x float>] addrspace(9)* null, i64 0, i32 3)
+  %68 = extractelement <4 x float> %67, i32 3
+  %69 = fmul float %68, %3
+  %70 = fadd float %69, %54
+  %71 = insertelement <4 x float> undef, float %58, i32 0
+  %72 = insertelement <4 x float> %71, float %62, i32 1
+  %73 = insertelement <4 x float> %72, float %66, i32 2
+  %74 = insertelement <4 x float> %73, float %70, i32 3
+  call void @llvm.R600.store.swizzle(<4 x float> %74, i32 60, i32 1)
+  %75 = insertelement <4 x float> undef, float %temp.0, i32 0
+  %76 = insertelement <4 x float> %75, float %temp1.0, i32 1
+  %77 = insertelement <4 x float> %76, float %temp2.0, i32 2
+  %78 = insertelement <4 x float> %77, float %temp3.0, i32 3
+  call void @llvm.R600.store.swizzle(<4 x float> %78, i32 0, i32 2)
+  ret void
+
+LOOP:                                             ; preds = %main_body, %ENDIF19
+  %temp.1 = phi float [ %93, %ENDIF19 ], [ 0.000000e+00, %main_body ]
+  %temp1.1 = phi float [ %94, %ENDIF19 ], [ 1.000000e+00, %main_body ]
+  %temp2.1 = phi float [ %95, %ENDIF19 ], [ 0.000000e+00, %main_body ]
+  %temp3.1 = phi float [ %96, %ENDIF19 ], [ 0.000000e+00, %main_body ]
+  %temp4.0 = phi float [ %97, %ENDIF19 ], [ -2.000000e+00, %main_body ]
+  %79 = fcmp uge float %temp4.0, %0
+  %80 = select i1 %79, float 1.000000e+00, float 0.000000e+00
+  %81 = fsub float -0.000000e+00, %80
+  %82 = fptosi float %81 to i32
+  %83 = bitcast i32 %82 to float
+  %84 = bitcast float %83 to i32
+  %85 = icmp ne i32 %84, 0
+  br i1 %85, label %ENDIF, label %ENDIF16
+
+ENDIF16:                                          ; preds = %LOOP
+  %86 = fcmp une float %2, %temp4.0
+  %87 = select i1 %86, float 1.000000e+00, float 0.000000e+00
+  %88 = fsub float -0.000000e+00, %87
+  %89 = fptosi float %88 to i32
+  %90 = bitcast i32 %89 to float
+  %91 = bitcast float %90 to i32
+  %92 = icmp ne i32 %91, 0
+  br i1 %92, label %ENDIF, label %ENDIF19
+
+ENDIF19:                                          ; preds = %ENDIF16
+  %93 = fadd float %temp.1, 1.000000e+00
+  %94 = fadd float %temp1.1, 0.000000e+00
+  %95 = fadd float %temp2.1, 0.000000e+00
+  %96 = fadd float %temp3.1, 0.000000e+00
+  %97 = fadd float %temp4.0, 1.000000e+00
+  br label %LOOP
+}
+
+declare float @llvm.R600.load.input(i32) #0
+
+declare void @llvm.R600.store.swizzle(<4 x float>, i32, i32)
+
+attributes #0 = { readnone }
diff --git a/test/CodeGen/R600/sdiv.ll b/test/CodeGen/R600/sdiv.ll
new file mode 100644
index 000000000000..3556facfbab3
--- /dev/null
+++ b/test/CodeGen/R600/sdiv.ll
@@ -0,0 +1,21 @@
+; RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s
+
+; The code generated by sdiv is long and complex and may frequently change.
+; The goal of this test is to make sure the ISel doesn't fail.
+;
+; This program was previously failing to compile when one of the selectcc
+; opcodes generated by the sdiv lowering was being legalized and optimized to:
+; selectcc Remainder -1, 0, -1, SETGT
+; This was fixed by adding an additional pattern in R600Instructions.td to
+; match this pattern with a CNDGE_INT.
+
+; CHECK: RETURN
+
+define void @test(i32 addrspace(1)* %out, i32 addrspace(1)* %in) {
+  %den_ptr = getelementptr i32 addrspace(1)* %in, i32 1
+  %num = load i32 addrspace(1) * %in
+  %den = load i32 addrspace(1) * %den_ptr
+  %result = sdiv i32 %num, %den
+  store i32 %result, i32 addrspace(1)* %out
+  ret void
+}
diff --git a/test/CodeGen/R600/selectcc-icmp-select-float.ll b/test/CodeGen/R600/selectcc-icmp-select-float.ll
new file mode 100644
index 000000000000..359ca1e6f8ce
--- /dev/null
+++ b/test/CodeGen/R600/selectcc-icmp-select-float.ll
@@ -0,0 +1,15 @@
+; RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s
+
+; Note additional optimizations may cause this SGT to be replaced with a
+; CND* instruction.
+; CHECK: SETGT_INT T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], literal.x, -1}}
+; Test a selectcc with i32 LHS/RHS and float True/False
+
+define void @test(float addrspace(1)* %out, i32 addrspace(1)* %in) {
+entry:
+  %0 = load i32 addrspace(1)* %in
+  %1 = icmp sge i32 %0, 0
+  %2 = select i1 %1, float 1.0, float 0.0
+  store float %2, float addrspace(1)* %out
+  ret void
+}
diff --git a/test/CodeGen/R600/selectcc-opt.ll b/test/CodeGen/R600/selectcc-opt.ll
new file mode 100644
index 000000000000..02d935390423
--- /dev/null
+++ b/test/CodeGen/R600/selectcc-opt.ll
@@ -0,0 +1,64 @@
+; RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s
+
+; CHECK: @test_a
+; CHECK-NOT: CND
+; CHECK: SET{{[NEQGTL]+}}_DX10
+
+define void @test_a(i32 addrspace(1)* %out, float %in) {
+entry:
+  %0 = fcmp ult float %in, 0.000000e+00
+  %1 = select i1 %0, float 1.000000e+00, float 0.000000e+00
+  %2 = fsub float -0.000000e+00, %1
+  %3 = fptosi float %2 to i32
+  %4 = bitcast i32 %3 to float
+  %5 = bitcast float %4 to i32
+  %6 = icmp ne i32 %5, 0
+  br i1 %6, label %IF, label %ENDIF
+
+IF:
+  %7 = getelementptr i32 addrspace(1)* %out, i32 1
+  store i32 0, i32 addrspace(1)* %7
+  br label %ENDIF
+
+ENDIF:
+  store i32 0, i32 addrspace(1)* %out
+  ret void
+}
+
+; Same as test_a, but the branch labels are swapped to produce the inverse cc
+; for the icmp instruction
+
+; CHECK: @test_b
+; CHECK: SET{{[GTEQN]+}}_DX10
+; CHECK-NEXT: PRED_
+define void @test_b(i32 addrspace(1)* %out, float %in) {
+entry:
+  %0 = fcmp ult float %in, 0.0
+  %1 = select i1 %0, float 1.000000e+00, float 0.000000e+00
+  %2 = fsub float -0.000000e+00, %1
+  %3 = fptosi float %2 to i32
+  %4 = bitcast i32 %3 to float
+  %5 = bitcast float %4 to i32
+  %6 = icmp ne i32 %5, 0
+  br i1 %6, label %ENDIF, label %IF
+
+IF:
+  %7 = getelementptr i32 addrspace(1)* %out, i32 1
+  store i32 0, i32 addrspace(1)* %7
+  br label %ENDIF
+
+ENDIF:
+  store i32 0, i32 addrspace(1)* %out
+  ret void
+}
+
+; Test a CND*_INT instruction with float true/false values
+; CHECK: @test_c
+; CHECK: CND{{[GTE]+}}_INT
+define void @test_c(float addrspace(1)* %out, i32 %in) {
+entry:
+  %0 = icmp sgt i32 %in, 0
+  %1 = select i1 %0, float 2.0, float 3.0
+  store float %1, float addrspace(1)* %out
+  ret void
+}
diff --git a/test/CodeGen/R600/selectcc_cnde.ll b/test/CodeGen/R600/selectcc_cnde.ll
new file mode 100644
index 000000000000..f0a0f512ba15
--- /dev/null
+++ b/test/CodeGen/R600/selectcc_cnde.ll
@@ -0,0 +1,11 @@
+;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s
+
+;CHECK-NOT: SETE
+;CHECK: CNDE T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], 1.0, literal.x, [-0-9]+\(2.0}}
+define void @test(float addrspace(1)* %out, float addrspace(1)* %in) {
+  %1 = load float addrspace(1)* %in
+  %2 = fcmp oeq float %1, 0.0
+  %3 = select i1 %2, float 1.0, float 2.0
+  store float %3, float addrspace(1)* %out
+  ret void
+}
diff --git a/test/CodeGen/R600/selectcc_cnde_int.ll b/test/CodeGen/R600/selectcc_cnde_int.ll
new file mode 100644
index 000000000000..b38078e26db6
--- /dev/null
+++ b/test/CodeGen/R600/selectcc_cnde_int.ll
@@ -0,0 +1,11 @@
+;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s
+
+;CHECK-NOT: SETE_INT
+;CHECK: CNDE_INT T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], 1, literal.x, 2}}
+define void @test(i32 addrspace(1)* %out, i32 addrspace(1)* %in) {
+  %1 = load i32 addrspace(1)* %in
+  %2 = icmp eq i32 %1, 0
+  %3 = select i1 %2, i32 1, i32 2
+  store i32 %3, i32 addrspace(1)* %out
+  ret void
+}
diff --git a/test/CodeGen/R600/set-dx10.ll b/test/CodeGen/R600/set-dx10.ll
new file mode 100644
index 000000000000..54febcf0e68e
--- /dev/null
+++ b/test/CodeGen/R600/set-dx10.ll
@@ -0,0 +1,137 @@
+; RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s
+
+; These tests check that floating point comparisons which are used by select
+; to store integer true (-1) and false (0) values are lowered to one of the
+; SET*DX10 instructions.
+
+; CHECK: @fcmp_une_select_fptosi
+; CHECK: SETNE_DX10 T{{[0-9]+\.[XYZW]}}, T{{[0-9]+\.[XYZW]}}, literal.x, 1084227584(5.000000e+00)
+define void @fcmp_une_select_fptosi(i32 addrspace(1)* %out, float %in) {
+entry:
+  %0 = fcmp une float %in, 5.0
+  %1 = select i1 %0, float 1.000000e+00, float 0.000000e+00
+  %2 = fsub float -0.000000e+00, %1
+  %3 = fptosi float %2 to i32
+  store i32 %3, i32 addrspace(1)* %out
+  ret void
+}
+
+; CHECK: @fcmp_une_select_i32
+; CHECK: SETNE_DX10 T{{[0-9]+\.[XYZW]}}, T{{[0-9]+\.[XYZW]}}, literal.x, 1084227584(5.000000e+00)
+define void @fcmp_une_select_i32(i32 addrspace(1)* %out, float %in) {
+entry:
+  %0 = fcmp une float %in, 5.0
+  %1 = select i1 %0, i32 -1, i32 0
+  store i32 %1, i32 addrspace(1)* %out
+  ret void
+}
+
+; CHECK: @fcmp_ueq_select_fptosi
+; CHECK: SETE_DX10 T{{[0-9]+\.[XYZW]}}, T{{[0-9]+\.[XYZW]}}, literal.x, 1084227584(5.000000e+00)
+define void @fcmp_ueq_select_fptosi(i32 addrspace(1)* %out, float %in) {
+entry:
+  %0 = fcmp ueq float %in, 5.0
+  %1 = select i1 %0, float 1.000000e+00, float 0.000000e+00
+  %2 = fsub float -0.000000e+00, %1
+  %3 = fptosi float %2 to i32
+  store i32 %3, i32 addrspace(1)* %out
+  ret void
+}
+
+; CHECK: @fcmp_ueq_select_i32
+; CHECK: SETE_DX10 T{{[0-9]+\.[XYZW]}}, T{{[0-9]+\.[XYZW]}}, literal.x, 1084227584(5.000000e+00)
+define void @fcmp_ueq_select_i32(i32 addrspace(1)* %out, float %in) {
+entry:
+  %0 = fcmp ueq float %in, 5.0
+  %1 = select i1 %0, i32 -1, i32 0
+  store i32 %1, i32 addrspace(1)* %out
+  ret void
+}
+
+; CHECK: @fcmp_ugt_select_fptosi
+; CHECK: SETGT_DX10 T{{[0-9]+\.[XYZW]}}, T{{[0-9]+\.[XYZW]}}, literal.x, 1084227584(5.000000e+00)
+define void @fcmp_ugt_select_fptosi(i32 addrspace(1)* %out, float %in) {
+entry:
+  %0 = fcmp ugt float %in, 5.0
+  %1 = select i1 %0, float 1.000000e+00, float 0.000000e+00
+  %2 = fsub float -0.000000e+00, %1
+  %3 = fptosi float %2 to i32
+  store i32 %3, i32 addrspace(1)* %out
+  ret void
+}
+
+; CHECK: @fcmp_ugt_select_i32
+; CHECK: SETGT_DX10 T{{[0-9]+\.[XYZW]}}, T{{[0-9]+\.[XYZW]}}, literal.x, 1084227584(5.000000e+00)
+define void @fcmp_ugt_select_i32(i32 addrspace(1)* %out, float %in) {
+entry:
+  %0 = fcmp ugt float %in, 5.0
+  %1 = select i1 %0, i32 -1, i32 0
+  store i32 %1, i32 addrspace(1)* %out
+  ret void
+}
+
+; CHECK: @fcmp_uge_select_fptosi
+; CHECK: SETGE_DX10 T{{[0-9]+\.[XYZW]}}, T{{[0-9]+\.[XYZW]}}, literal.x, 1084227584(5.000000e+00)
+define void @fcmp_uge_select_fptosi(i32 addrspace(1)* %out, float %in) {
+entry:
+  %0 = fcmp uge float %in, 5.0
+  %1 = select i1 %0, float 1.000000e+00, float 0.000000e+00
+  %2 = fsub float -0.000000e+00, %1
+  %3 = fptosi float %2 to i32
+  store i32 %3, i32 addrspace(1)* %out
+  ret void
+}
+
+; CHECK: @fcmp_uge_select_i32
+; CHECK: SETGE_DX10 T{{[0-9]+\.[XYZW]}}, T{{[0-9]+\.[XYZW]}}, literal.x, 1084227584(5.000000e+00)
+define void @fcmp_uge_select_i32(i32 addrspace(1)* %out, float %in) {
+entry:
+  %0 = fcmp uge float %in, 5.0
+  %1 = select i1 %0, i32 -1, i32 0
+  store i32 %1, i32 addrspace(1)* %out
+  ret void
+}
+
+; CHECK: @fcmp_ule_select_fptosi
+; CHECK: SETGE_DX10 T{{[0-9]+\.[XYZW]}}, literal.x, T{{[0-9]+\.[XYZW]}}, 1084227584(5.000000e+00)
+define void @fcmp_ule_select_fptosi(i32 addrspace(1)* %out, float %in) {
+entry:
+  %0 = fcmp ule float %in, 5.0
+  %1 = select i1 %0, float 1.000000e+00, float 0.000000e+00
+  %2 = fsub float -0.000000e+00, %1
+  %3 = fptosi float %2 to i32
+  store i32 %3, i32 addrspace(1)* %out
+  ret void
+}
+
+; CHECK: @fcmp_ule_select_i32
+; CHECK: SETGE_DX10 T{{[0-9]+\.[XYZW]}}, literal.x, T{{[0-9]+\.[XYZW]}}, 1084227584(5.000000e+00)
+define void @fcmp_ule_select_i32(i32 addrspace(1)* %out, float %in) {
+entry:
+  %0 = fcmp ule float %in, 5.0
+  %1 = select i1 %0, i32 -1, i32 0
+  store i32 %1, i32 addrspace(1)* %out
+  ret void
+}
+
+; CHECK: @fcmp_ult_select_fptosi
+; CHECK: SETGT_DX10 T{{[0-9]+\.[XYZW]}}, literal.x, T{{[0-9]+\.[XYZW]}}, 1084227584(5.000000e+00)
+define void @fcmp_ult_select_fptosi(i32 addrspace(1)* %out, float %in) {
+entry:
+  %0 = fcmp ult float %in, 5.0
+  %1 = select i1 %0, float 1.000000e+00, float 0.000000e+00
+  %2 = fsub float -0.000000e+00, %1
+  %3 = fptosi float %2 to i32
+  store i32 %3, i32 addrspace(1)* %out
+  ret void
+}
+
+; CHECK: @fcmp_ult_select_i32
+; CHECK: SETGT_DX10 T{{[0-9]+\.[XYZW]}}, literal.x, T{{[0-9]+\.[XYZW]}}, 1084227584(5.000000e+00)
+define void @fcmp_ult_select_i32(i32 addrspace(1)* %out, float %in) {
+entry:
+  %0 = fcmp ult float %in, 5.0
+  %1 = select i1 %0, i32 -1, i32 0
+  store i32 %1, i32 addrspace(1)* %out
+  ret void
+}
diff --git a/test/CodeGen/R600/setcc.v4i32.ll b/test/CodeGen/R600/setcc.v4i32.ll
new file mode 100644
index 000000000000..0752f2e63dbf
--- /dev/null
+++ b/test/CodeGen/R600/setcc.v4i32.ll
@@ -0,0 +1,12 @@
+;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s
+;CHECK: SETE_INT T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
+
+define void @test(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %in) {
+  %b_ptr = getelementptr <4 x i32> addrspace(1)* %in, i32 1
+  %a = load <4 x i32> addrspace(1) * %in
+  %b = load <4 x i32> addrspace(1) * %b_ptr
+  %result = icmp eq <4 x i32> %a, %b
+  %sext = sext <4 x i1> %result to <4 x i32>
+  store <4 x i32> %sext, <4 x i32> addrspace(1)* %out
+  ret void
+}
diff --git a/test/CodeGen/R600/seto.ll b/test/CodeGen/R600/seto.ll
new file mode 100644
index 000000000000..5ab4b87d570c
--- /dev/null
+++ b/test/CodeGen/R600/seto.ll
@@ -0,0 +1,13 @@
+;RUN: llc < %s -march=r600 -mcpu=SI | FileCheck %s
+
+;CHECK: V_CMP_O_F32_e64 SGPR0_SGPR1, VGPR0, VGPR0, 0, 0, 0, 0
+
+define void @main(float %p) {
+main_body:
+  %c = fcmp oeq float %p, %p
+  %r = select i1 %c, float 1.000000e+00, float 0.000000e+00
+  call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 0, float %r, float %r, float %r, float %r)
+  ret void
+}
+
+declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)
diff --git a/test/CodeGen/R600/setuo.ll b/test/CodeGen/R600/setuo.ll
new file mode 100644
index 000000000000..320835576d41
--- /dev/null
+++ b/test/CodeGen/R600/setuo.ll
@@ -0,0 +1,13 @@
+;RUN: llc < %s -march=r600 -mcpu=SI | FileCheck %s
+
+;CHECK: V_CMP_U_F32_e64 SGPR0_SGPR1, VGPR0, VGPR0, 0, 0, 0, 0
+
+define void @main(float %p) {
+main_body:
+  %c = fcmp une float %p, %p
+  %r = select i1 %c, float 1.000000e+00, float 0.000000e+00
+  call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 0, float %r, float %r, float %r, float %r)
+  ret void
+}
+
+declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)
diff --git a/test/CodeGen/R600/short-args.ll b/test/CodeGen/R600/short-args.ll
new file mode 100644
index 000000000000..b69e327bf6df
--- /dev/null
+++ b/test/CodeGen/R600/short-args.ll
@@ -0,0 +1,41 @@
+; RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s
+
+; CHECK: @i8_arg
+; CHECK: VTX_READ_8 T{{[0-9]+\.X, T[0-9]+\.X}}
+
+define void @i8_arg(i32 addrspace(1)* nocapture %out, i8 %in) nounwind {
+entry:
+  %0 = zext i8 %in to i32
+  store i32 %0, i32 addrspace(1)* %out, align 4
+  ret void
+}
+
+; CHECK: @i8_zext_arg
+; CHECK: VTX_READ_8 T{{[0-9]+\.X, T[0-9]+\.X}}
+
+define void @i8_zext_arg(i32 addrspace(1)* nocapture %out, i8 zeroext %in) nounwind {
+entry:
+  %0 = zext i8 %in to i32
+  store i32 %0, i32 addrspace(1)* %out, align 4
+  ret void
+}
+
+; CHECK: @i16_arg
+; CHECK: VTX_READ_16 T{{[0-9]+\.X, T[0-9]+\.X}}
+
+define void @i16_arg(i32 addrspace(1)* nocapture %out, i16 %in) nounwind {
+entry:
+  %0 = zext i16 %in to i32
+  store i32 %0, i32 addrspace(1)* %out, align 4
+  ret void
+}
+
+; CHECK: @i16_zext_arg
+; CHECK: VTX_READ_16 T{{[0-9]+\.X, T[0-9]+\.X}}
+
+define void @i16_zext_arg(i32 addrspace(1)* nocapture %out, i16 zeroext %in) nounwind {
+entry:
+  %0 = zext i16 %in to i32
+  store i32 %0, i32 addrspace(1)* %out, align 4
+  ret void
+}
diff --git a/test/CodeGen/R600/store.v4f32.ll b/test/CodeGen/R600/store.v4f32.ll
new file mode 100644
index 000000000000..8b0d24445971
--- /dev/null
+++ b/test/CodeGen/R600/store.v4f32.ll
@@ -0,0 +1,9 @@
+;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s
+
+;CHECK: RAT_WRITE_CACHELESS_128 T{{[0-9]+\.XYZW, T[0-9]+\.X}}, 1
+
+define void @test(<4 x float> addrspace(1)* %out, <4 x float> addrspace(1)* %in) {
+  %1 = load <4 x float> addrspace(1) * %in
+  store <4 x float> %1, <4 x float> addrspace(1)* %out
+  ret void
+}
diff --git a/test/CodeGen/R600/store.v4i32.ll b/test/CodeGen/R600/store.v4i32.ll
new file mode 100644
index 000000000000..a659815ddeba
--- /dev/null
+++ b/test/CodeGen/R600/store.v4i32.ll
@@ -0,0 +1,9 @@
+;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s
+
+;CHECK: RAT_WRITE_CACHELESS_128 T{{[0-9]+\.XYZW, T[0-9]+\.X}}, 1
+
+define void @test(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %in) {
+  %1 = load <4 x i32> addrspace(1) * %in
+  store <4 x i32> %1, <4 x i32> addrspace(1)* %out
+  ret void
+}
diff --git a/test/CodeGen/R600/udiv.v4i32.ll b/test/CodeGen/R600/udiv.v4i32.ll
new file mode 100644
index 000000000000..47657a6be75e
--- /dev/null
+++ b/test/CodeGen/R600/udiv.v4i32.ll
@@ -0,0 +1,15 @@
+;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s
+
+;The code generated by udiv is long and complex and may frequently change.
+;The goal of this test is to make sure the ISel doesn't fail when it gets
+;a v4i32 udiv
+;CHECK: RETURN
+
+define void @test(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %in) {
+  %b_ptr = getelementptr <4 x i32> addrspace(1)* %in, i32 1
+  %a = load <4 x i32> addrspace(1) * %in
+  %b = load <4 x i32> addrspace(1) * %b_ptr
+  %result = udiv <4 x i32> %a, %b
+  store <4 x i32> %result, <4 x i32> addrspace(1)* %out
+  ret void
+}
diff --git a/test/CodeGen/R600/unsupported-cc.ll b/test/CodeGen/R600/unsupported-cc.ll
new file mode 100644
index 000000000000..b48c59151831
--- /dev/null
+++ b/test/CodeGen/R600/unsupported-cc.ll
@@ -0,0 +1,83 @@
+; RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s
+
+; These tests are for condition codes that are not supported by the hardware
+
+; CHECK: @slt
+; CHECK: SETGT_INT T{{[0-9]+\.[XYZW]}}, literal.x, {{T[0-9]+\.[XYZW]}}, 5(7.006492e-45)
+define void @slt(i32 addrspace(1)* %out, i32 %in) {
+entry:
+  %0 = icmp slt i32 %in, 5
+  %1 = select i1 %0, i32 -1, i32 0
+  store i32 %1, i32 addrspace(1)* %out
+  ret void
+}
+
+; CHECK: @ult_i32
+; CHECK: SETGT_UINT T{{[0-9]+\.[XYZW]}}, literal.x, {{T[0-9]+\.[XYZW]}}, 5(7.006492e-45)
+define void @ult_i32(i32 addrspace(1)* %out, i32 %in) {
+entry:
+  %0 = icmp ult i32 %in, 5
+  %1 = select i1 %0, i32 -1, i32 0
+  store i32 %1, i32 addrspace(1)* %out
+  ret void
+}
+
+; CHECK: @ult_float
+; CHECK: SETGT T{{[0-9]+\.[XYZW]}}, literal.x, {{T[0-9]+\.[XYZW]}}, 1084227584(5.000000e+00)
+define void @ult_float(float addrspace(1)* %out, float %in) {
+entry:
+  %0 = fcmp ult float %in, 5.0
+  %1 = select i1 %0, float 1.0, float 0.0
+  store float %1, float addrspace(1)* %out
+  ret void
+}
+
+; CHECK: @olt
+; CHECK: SETGT T{{[0-9]+\.[XYZW]}}, literal.x, {{T[0-9]+\.[XYZW]}}, 1084227584(5.000000e+00)
+define void @olt(float addrspace(1)* %out, float %in) {
+entry:
+  %0 = fcmp olt float %in, 5.0
+  %1 = select i1 %0, float 1.0, float 0.0
+  store float %1, float addrspace(1)* %out
+  ret void
+}
+
+; CHECK: @sle
+; CHECK: SETGT_INT T{{[0-9]+\.[XYZW]}}, literal.x, {{T[0-9]+\.[XYZW]}}, 6(8.407791e-45)
+define void @sle(i32 addrspace(1)* %out, i32 %in) {
+entry:
+  %0 = icmp sle i32 %in, 5
+  %1 = select i1 %0, i32 -1, i32 0
+  store i32 %1, i32 addrspace(1)* %out
+  ret void
+}
+
+; CHECK: @ule_i32
+; CHECK: SETGT_UINT T{{[0-9]+\.[XYZW]}}, literal.x, {{T[0-9]+\.[XYZW]}}, 6(8.407791e-45)
+define void @ule_i32(i32 addrspace(1)* %out, i32 %in) {
+entry:
+  %0 = icmp ule i32 %in, 5
+  %1 = select i1 %0, i32 -1, i32 0
+  store i32 %1, i32 addrspace(1)* %out
+  ret void
+}
+
+; CHECK: @ule_float
+; CHECK: SETGE T{{[0-9]+\.[XYZW]}}, literal.x, {{T[0-9]+\.[XYZW]}}, 1084227584(5.000000e+00)
+define void @ule_float(float addrspace(1)* %out, float %in) {
+entry:
+  %0 = fcmp ule float %in, 5.0
+  %1 = select i1 %0, float 1.0, float 0.0
+  store float %1, float addrspace(1)* %out
+  ret void
+}
+
+; CHECK: @ole
+; CHECK: SETGE T{{[0-9]+\.[XYZW]}}, literal.x, {{T[0-9]+\.[XYZW]}}, 1084227584(5.000000e+00)
+define void @ole(float addrspace(1)* %out, float %in) {
+entry:
+  %0 = fcmp ole float %in, 5.0
+  %1 = select i1 %0, float 1.0, float 0.0
+  store float %1, float addrspace(1)* %out
+  ret void
+}
diff --git a/test/CodeGen/R600/urem.v4i32.ll b/test/CodeGen/R600/urem.v4i32.ll
new file mode 100644
index 000000000000..2e7388caa6ce
--- /dev/null
+++ b/test/CodeGen/R600/urem.v4i32.ll
@@ -0,0 +1,15 @@
+;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s
+
+;The code generated by urem is long and complex and may frequently change.
+;The goal of this test is to make sure the ISel doesn't fail when it gets
+;a v4i32 urem
+;CHECK: RETURN
+
+define void @test(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %in) {
+  %b_ptr = getelementptr <4 x i32> addrspace(1)* %in, i32 1
+  %a = load <4 x i32> addrspace(1) * %in
+  %b = load <4 x i32> addrspace(1) * %b_ptr
+  %result = urem <4 x i32> %a, %b
+  store <4 x i32> %result, <4 x i32> addrspace(1)* %out
+  ret void
+}
diff --git a/test/CodeGen/R600/vec4-expand.ll b/test/CodeGen/R600/vec4-expand.ll
new file mode 100644
index 000000000000..8f62bc692908
--- /dev/null
+++ b/test/CodeGen/R600/vec4-expand.ll
@@ -0,0 +1,53 @@
+; RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s
+
+; CHECK: @fp_to_sint
+; CHECK: FLT_TO_INT T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
+; CHECK: FLT_TO_INT T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
+; CHECK: FLT_TO_INT T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
+; CHECK: FLT_TO_INT T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
+
+define void @fp_to_sint(<4 x i32> addrspace(1)* %out, <4 x float> addrspace(1)* %in) {
+  %value = load <4 x float> addrspace(1) * %in
+  %result = fptosi <4 x float> %value to <4 x i32>
+  store <4 x i32> %result, <4 x i32> addrspace(1)* %out
+  ret void
+}
+
+; CHECK: @fp_to_uint
+; CHECK: FLT_TO_UINT T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
+; CHECK: FLT_TO_UINT T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
+; CHECK: FLT_TO_UINT T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
+; CHECK: FLT_TO_UINT T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
+
+define void @fp_to_uint(<4 x i32> addrspace(1)* %out, <4 x float> addrspace(1)* %in) {
+  %value = load <4 x float> addrspace(1) * %in
+  %result = fptoui <4 x float> %value to <4 x i32>
+  store <4 x i32> %result, <4 x i32> addrspace(1)* %out
+  ret void
+}
+
+; CHECK: @sint_to_fp
+; CHECK: INT_TO_FLT T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
+; CHECK: INT_TO_FLT T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
+; CHECK: INT_TO_FLT T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
+; CHECK: INT_TO_FLT T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
+
+define void @sint_to_fp(<4 x float> addrspace(1)* %out, <4 x i32> addrspace(1)* %in) {
+  %value = load <4 x i32> addrspace(1) * %in
+  %result = sitofp <4 x i32> %value to <4 x float>
+  store <4 x float> %result, <4 x float> addrspace(1)* %out
+  ret void
+}
+
+; CHECK: @uint_to_fp
+; CHECK: UINT_TO_FLT T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
+; CHECK: UINT_TO_FLT T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
+; CHECK: UINT_TO_FLT T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
+; CHECK: UINT_TO_FLT T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
+
+define void @uint_to_fp(<4 x float> addrspace(1)* %out, <4 x i32> addrspace(1)* %in) {
+  %value = load <4 x i32> addrspace(1) * %in
+  %result = uitofp <4 x i32> %value to <4 x float>
+  store <4 x float> %result, <4 x float> addrspace(1)* %out
+  ret void
+}
diff --git a/test/CodeGen/SI/sanity.ll b/test/CodeGen/SI/sanity.ll
new file mode 100644
index 000000000000..62cdcf5eca28
--- /dev/null
+++ b/test/CodeGen/SI/sanity.ll
@@ -0,0 +1,37 @@
+;RUN: llc < %s -march=r600 -mcpu=SI | FileCheck %s
+
+; CHECK: S_ENDPGM
+
+define void @main() {
+main_body:
+  call void @llvm.AMDGPU.shader.type(i32 1)
+  %0 = load <4 x i32> addrspace(2)* addrspace(8)* inttoptr (i32 6 to <4 x i32> addrspace(2)* addrspace(8)*)
+  %1 = getelementptr <4 x i32> addrspace(2)* %0, i32 0
+  %2 = load <4 x i32> addrspace(2)* %1
+  %3 = call i32 @llvm.SI.vs.load.buffer.index()
+  %4 = call <4 x float> @llvm.SI.vs.load.input(<4 x i32> %2, i32 0, i32 %3)
+  %5 = extractelement <4 x float> %4, i32 0
+  %6 = extractelement <4 x float> %4, i32 1
+  %7 = extractelement <4 x float> %4, i32 2
+  %8 = extractelement <4 x float> %4, i32 3
+  %9 = load <4 x i32> addrspace(2)* addrspace(8)* inttoptr (i32 6 to <4 x i32> addrspace(2)* addrspace(8)*)
+  %10 = getelementptr <4 x i32> addrspace(2)* %9, i32 1
+  %11 = load <4 x i32> addrspace(2)* %10
+  %12 = call i32 @llvm.SI.vs.load.buffer.index()
+  %13 = call <4 x float> @llvm.SI.vs.load.input(<4 x i32> %11, i32 0, i32 %12)
+  %14 = extractelement <4 x float> %13, i32 0
+  %15 = extractelement <4 x float> %13, i32 1
+  %16 = extractelement <4 x float> %13, i32 2
+  %17 = extractelement <4 x float> %13, i32 3
+  call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %14, float %15, float %16, float %17)
+  call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %5, float %6, float %7, float %8)
+  ret void
+}
+
+declare void @llvm.AMDGPU.shader.type(i32)
+
+declare i32 @llvm.SI.vs.load.buffer.index() readnone
+
+declare <4 x float> @llvm.SI.vs.load.input(<4 x i32>, i32, i32)
+
+declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)
diff --git a/test/CodeGen/SPARC/2010-04-07-DbgValueOtherTargets.ll b/test/CodeGen/SPARC/2010-04-07-DbgValueOtherTargets.ll
deleted file mode 100644
index 3b644986f2e4..000000000000
--- a/test/CodeGen/SPARC/2010-04-07-DbgValueOtherTargets.ll
+++ /dev/null
@@ -1,28 +0,0 @@
-; RUN: llc -O0 -march=sparc -asm-verbose < %s | FileCheck %s
-; Check that DEBUG_VALUE comments come through on a variety of targets.
-
-define i32 @main() nounwind ssp {
-entry:
-; CHECK: DEBUG_VALUE
-  call void @llvm.dbg.value(metadata !6, i64 0, metadata !7), !dbg !9
-  ret i32 0, !dbg !10
-}
-
-declare void @llvm.dbg.declare(metadata, metadata) nounwind readnone
-
-declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
-
-!llvm.dbg.sp = !{!0}
-
-!0 = metadata !{i32 589870, i32 0, metadata !1, metadata !"main", metadata !"main", metadata !"", metadata !1, i32 2, metadata !3, i1 false, i1 true, i32 0, i32 0, null, i32 0, i1 false, i32 ()* @main} ; [ DW_TAG_subprogram ]
-!1 = metadata !{i32 589865, metadata !"/tmp/x.c", metadata !"/Users/manav", metadata !2} ; [ DW_TAG_file_type ]
-!2 = metadata !{i32 589841, i32 0, i32 12, metadata !"/tmp/x.c", metadata !"/Users/manav", metadata !"clang version 2.9 (trunk 120996)", i1 true, i1 false, metadata !"", i32 0} ; [ DW_TAG_compile_unit ]
-!3 = metadata !{i32 589845, metadata !1, metadata !"", metadata !1, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !4, i32 0, null} ; [ DW_TAG_subroutine_type ]
-!4 = metadata !{metadata !5}
-!5 = metadata !{i32 589860, metadata !2, metadata !"int", metadata !1, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
-!6 = metadata !{i32 0}
-!7 = metadata !{i32 590080, metadata !8, metadata !"i", metadata !1, i32 3, metadata !5, i32 0} ; [ DW_TAG_auto_variable ]
-!8 = metadata !{i32 589835, metadata !0, i32 2, i32 12, metadata !1, i32 0} ; [ DW_TAG_lexical_block ]
-!9 = metadata !{i32 3, i32 11, metadata !8, null}
-!10 = metadata !{i32 4, i32 2, metadata !8, null}
-
diff --git a/test/CodeGen/SPARC/64bit.ll b/test/CodeGen/SPARC/64bit.ll
new file mode 100644
index 000000000000..0d4e191c9509
--- /dev/null
+++ b/test/CodeGen/SPARC/64bit.ll
@@ -0,0 +1,146 @@
+; RUN: llc < %s -march=sparcv9 | FileCheck %s
+
+; CHECK: ret2:
+; CHECK: or %g0, %i1, %i0
+define i64 @ret2(i64 %a, i64 %b) {
+  ret i64 %b
+}
+
+; CHECK: shl_imm
+; CHECK: sllx %i0, 7, %i0
+define i64 @shl_imm(i64 %a) {
+  %x = shl i64 %a, 7
+  ret i64 %x
+}
+
+; CHECK: sra_reg
+; CHECK: srax %i0, %i1, %i0
+define i64 @sra_reg(i64 %a, i64 %b) {
+  %x = ashr i64 %a, %b
+  ret i64 %x
+}
+
+; Immediate materialization. Many of these patterns could actually be merged
+; into the restore instruction:
+;
+;     restore %g0, %g0, %o0
+;
+; CHECK: ret_imm0
+; CHECK: or %g0, %g0, %i0
+define i64 @ret_imm0() {
+  ret i64 0
+}
+
+; CHECK: ret_simm13
+; CHECK: or %g0, -4096, %i0
+define i64 @ret_simm13() {
+  ret i64 -4096
+}
+
+; CHECK: ret_sethi
+; CHECK: sethi 4, %i0
+; CHECK-NOT: or
+; CHECK: restore
+define i64 @ret_sethi() {
+  ret i64 4096
+}
+
+; CHECK: ret_sethi
+; CHECK: sethi 4, [[R:%[goli][0-7]]]
+; CHECK: or [[R]], 1, %i0
+define i64 @ret_sethi_or() {
+  ret i64 4097
+}
+
+; CHECK: ret_nimm33
+; CHECK: sethi 4, [[R:%[goli][0-7]]]
+; CHECK: xor [[R]], -4, %i0
+define i64 @ret_nimm33() {
+  ret i64 -4100
+}
+
+; CHECK: ret_bigimm
+; CHECK: sethi
+; CHECK: sethi
+define i64 @ret_bigimm() {
+  ret i64 6800754272627607872
+}
+
+; CHECK: reg_reg_alu
+; CHECK: add %i0, %i1, [[R0:%[goli][0-7]]]
+; CHECK: sub [[R0]], %i2, [[R1:%[goli][0-7]]]
+; CHECK: andn [[R1]], %i0, %i0
+define i64 @reg_reg_alu(i64 %x, i64 %y, i64 %z) {
+  %a = add i64 %x, %y
+  %b = sub i64 %a, %z
+  %c = xor i64 %x, -1
+  %d = and i64 %b, %c
+  ret i64 %d
+}
+
+; CHECK: reg_imm_alu
+; CHECK: add %i0, -5, [[R0:%[goli][0-7]]]
+; CHECK: xor [[R0]], 2, %i0
+define i64 @reg_imm_alu(i64 %x, i64 %y, i64 %z) {
+  %a = add i64 %x, -5
+  %b = xor i64 %a, 2
+  ret i64 %b
+}
+
+; CHECK: loads
+; CHECK: ldx [%i0]
+; CHECK: stx %
+; CHECK: ld [%i1]
+; CHECK: st %
+; CHECK: ldsw [%i2]
+; CHECK: stx %
+; CHECK: ldsh [%i3]
+; CHECK: sth %
+define i64 @loads(i64* %p, i32* %q, i32* %r, i16* %s) {
+  %a = load i64* %p
+  %ai = add i64 1, %a
+  store i64 %ai, i64* %p
+  %b = load i32* %q
+  %b2 = zext i32 %b to i64
+  %bi = trunc i64 %ai to i32
+  store i32 %bi, i32* %q
+  %c = load i32* %r
+  %c2 = sext i32 %c to i64
+  store i64 %ai, i64* %p
+  %d = load i16* %s
+  %d2 = sext i16 %d to i64
+  %di = trunc i64 %ai to i16
+  store i16 %di, i16* %s
+
+  %x1 = add i64 %a, %b2
+  %x2 = add i64 %c2, %d2
+  %x3 = add i64 %x1, %x2
+  ret i64 %x3
+}
+
+; CHECK: stores
+; CHECK: ldx [%i0+8], [[R:%[goli][0-7]]]
+; CHECK: stx [[R]], [%i0+16]
+; CHECK: st [[R]], [%i1+-8]
+; CHECK: sth [[R]], [%i2+40]
+; CHECK: stb [[R]], [%i3+-20]
+define void @stores(i64* %p, i32* %q, i16* %r, i8* %s) {
+  %p1 = getelementptr i64* %p, i64 1
+  %p2 = getelementptr i64* %p, i64 2
+  %pv = load i64* %p1
+  store i64 %pv, i64* %p2
+
+  %q2 = getelementptr i32* %q, i32 -2
+  %qv = trunc i64 %pv to i32
+  store i32 %qv, i32* %q2
+
+  %r2 = getelementptr i16* %r, i16 20
+  %rv = trunc i64 %pv to i16
+  store i16 %rv, i16* %r2
+
+  %s2 = getelementptr i8* %s, i8 -20
+  %sv = trunc i64 %pv to i8
+  store i8 %sv, i8* %s2
+
+  ret void
+}
diff --git a/test/CodeGen/SPARC/64cond.ll b/test/CodeGen/SPARC/64cond.ll
new file mode 100644
index 000000000000..6e66a262a4f2
--- /dev/null
+++ b/test/CodeGen/SPARC/64cond.ll
@@ -0,0 +1,56 @@
+; RUN: llc < %s -march=sparcv9 | FileCheck %s
+; Testing 64-bit conditionals.
+
+; CHECK: cmpri
+; CHECK: subcc %i1, 1
+; CHECK: bpe %xcc,
+define void @cmpri(i64* %p, i64 %x) {
+entry:
+  %tobool = icmp eq i64 %x, 1
+  br i1 %tobool, label %if.end, label %if.then
+
+if.then:
+  store i64 %x, i64* %p, align 8
+  br label %if.end
+
+if.end:
+  ret void
+}
+
+; CHECK: cmprr
+; CHECK: subcc %i1, %i2
+; CHECK: bpgu %xcc,
+define void @cmprr(i64* %p, i64 %x, i64 %y) {
+entry:
+  %tobool = icmp ugt i64 %x, %y
+  br i1 %tobool, label %if.end, label %if.then
+
+if.then:
+  store i64 %x, i64* %p, align 8
+  br label %if.end
+
+if.end:
+  ret void
+}
+
+; CHECK: selecti32_xcc
+; CHECK: subcc %i0, %i1
+; CHECK: movg %xcc, %i2, %i3
+; CHECK: or %g0, %i3, %i0
+define i32 @selecti32_xcc(i64 %x, i64 %y, i32 %a, i32 %b) {
+entry:
+  %tobool = icmp sgt i64 %x, %y
+  %rv = select i1 %tobool, i32 %a, i32 %b
+  ret i32 %rv
+}
+
+; CHECK: selecti64_xcc
+; CHECK: subcc %i0, %i1
+; CHECK: movg %xcc, %i2, %i3
+; CHECK: or %g0, %i3, %i0
+define i64 @selecti64_xcc(i64 %x, i64 %y, i64 %a, i64 %b) {
+entry:
+  %tobool = icmp sgt i64 %x, %y
+  %rv = select i1 %tobool, i64 %a, i64 %b
+  ret i64 %rv
+}
diff --git a/test/CodeGen/SPARC/DbgValueOtherTargets.test b/test/CodeGen/SPARC/DbgValueOtherTargets.test
new file mode 100644
index 000000000000..a669bf848d65
--- /dev/null
+++ b/test/CodeGen/SPARC/DbgValueOtherTargets.test
@@ -0,0 +1 @@
+RUN: llc -O0 -march=sparc -asm-verbose < %S/../Inputs/DbgValueOtherTargets.ll | FileCheck %S/../Inputs/DbgValueOtherTargets.ll
diff --git a/test/CodeGen/SPARC/ctpop.ll b/test/CodeGen/SPARC/ctpop.ll
index e56f4947b52a..916a41496e2a 100644
--- a/test/CodeGen/SPARC/ctpop.ll
+++ b/test/CodeGen/SPARC/ctpop.ll
@@ -1,5 +1,5 @@
 ; RUN: llc < %s -march=sparc -mattr=-v9 | not grep popc
-; RUN: llc < %s -march=sparcv9 -mattr=v9 | grep popc
+; RUN: llc < %s -march=sparc -mattr=+v9 | grep popc
 
 declare i32 @llvm.ctpop.i32(i32)
 
diff --git a/test/CodeGen/SPARC/lit.local.cfg b/test/CodeGen/SPARC/lit.local.cfg
index 786fee9e6610..6f30a8797967 100644
--- a/test/CodeGen/SPARC/lit.local.cfg
+++ b/test/CodeGen/SPARC/lit.local.cfg
@@ -1,4 +1,4 @@
-config.suffixes = ['.ll', '.c', '.cpp']
+config.suffixes = ['.ll', '.c', '.cpp', '.test']
 
 targets = set(config.root.targets_to_build.split())
 if not 'Sparc' in targets:
diff --git a/test/CodeGen/Thumb/2010-04-07-DbgValueOtherTargets.ll b/test/CodeGen/Thumb/2010-04-07-DbgValueOtherTargets.ll
deleted file mode 100644
index b9039774d42e..000000000000
--- a/test/CodeGen/Thumb/2010-04-07-DbgValueOtherTargets.ll
+++ /dev/null
@@ -1,28 +0,0 @@
-; RUN: llc -O0 -march=thumb -asm-verbose < %s | FileCheck %s
-; Check that DEBUG_VALUE comments come through on a variety of targets.
-
-define i32 @main() nounwind ssp {
-entry:
-; CHECK: DEBUG_VALUE
-  call void @llvm.dbg.value(metadata !6, i64 0, metadata !7), !dbg !9
-  ret i32 0, !dbg !10
-}
-
-declare void @llvm.dbg.declare(metadata, metadata) nounwind readnone
-
-declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
-
-!llvm.dbg.sp = !{!0}
-
-!0 = metadata !{i32 589870, i32 0, metadata !1, metadata !"main", metadata !"main", metadata !"", metadata !1, i32 2, metadata !3, i1 false, i1 true, i32 0, i32 0, null, i32 0, i1 false, i32 ()* @main} ; [ DW_TAG_subprogram ]
-!1 = metadata !{i32 589865, metadata !"/tmp/x.c", metadata !"/Users/manav", metadata !2} ; [ DW_TAG_file_type ]
-!2 = metadata !{i32 589841, i32 0, i32 12, metadata !"/tmp/x.c", metadata !"/Users/manav", metadata !"clang version 2.9 (trunk 120996)", i1 true, i1 false, metadata !"", i32 0} ; [ DW_TAG_compile_unit ]
-!3 = metadata !{i32 589845, metadata !1, metadata !"", metadata !1, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !4, i32 0, null} ; [ DW_TAG_subroutine_type ]
-!4 = metadata !{metadata !5}
-!5 = metadata !{i32 589860, metadata !2, metadata !"int", metadata !1, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
-!6 = metadata !{i32 0}
-!7 = metadata !{i32 590080, metadata !8, metadata !"i", metadata !1, i32 3, metadata !5, i32 0} ; [ DW_TAG_auto_variable ]
-!8 = metadata !{i32 589835, metadata !0, i32 2, i32 12, metadata !1, i32 0} ; [ DW_TAG_lexical_block ]
-!9 = metadata !{i32 3, i32 11, metadata !8, null}
-!10 = metadata !{i32 4, i32 2, metadata !8, null}
-
diff --git a/test/CodeGen/Thumb/2010-07-15-debugOrdering.ll b/test/CodeGen/Thumb/2010-07-15-debugOrdering.ll
index 9f5a677ed356..d6b649569173 100644
--- a/test/CodeGen/Thumb/2010-07-15-debugOrdering.ll
+++ b/test/CodeGen/Thumb/2010-07-15-debugOrdering.ll
@@ -47,8 +47,8 @@ declare double @sqrt(double) nounwind readonly
 declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
 
 !0 = metadata !{i32 46, i32 0, metadata !1, null}
-!1 = metadata !{i32 524299, metadata !2, i32 44, i32 0} ; [ DW_TAG_lexical_block ]
-!2 = metadata !{i32 524299, metadata !3, i32 44, i32 0} ; [ DW_TAG_lexical_block ]
+!1 = metadata !{i32 524299, metadata !4, metadata !2, i32 44, i32 0} ; [ DW_TAG_lexical_block ]
+!2 = metadata !{i32 524299, metadata !4, metadata !3, i32 44, i32 0} ; [ DW_TAG_lexical_block ]
 !3 = metadata !{i32 524334, i32 0, metadata !4, metadata !"getClosestDiagonal3", metadata !"getClosestDiagonal3", metadata !"_Z19getClosestDiagonal3ii", metadata !4, i32 44, metadata !6, i1 false, i1 true, i32 0, i32 0, null, i1 false, i1 false, null} ; [ DW_TAG_subprogram ]
 !4 = metadata !{i32 524329, metadata !"ggEdgeDiscrepancy.cc", metadata !"/Volumes/Home/grosbaj/sources/llvm-externals/speccpu2000/benchspec/CINT2000/252.eon/src", metadata !5} ; [ DW_TAG_file_type ]
 !5 = metadata !{i32 524305, i32 0, i32 4, metadata !"ggEdgeDiscrepancy.cc", metadata !"/Volumes/Home/grosbaj/sources/llvm-externals/speccpu2000/benchspec/CINT2000/252.eon/src", metadata !"4.2.1 (Based on Apple Inc. build 5658) (LLVM build 00)", i1 true, i1 false, metadata !"", i32 0} ; [ DW_TAG_compile_unit ]
@@ -61,7 +61,7 @@ declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
 !12 = metadata !{i32 524289, metadata !4, metadata !"", metadata !4, i32 0, i64 192, i64 32, i64 0, i32 0, metadata !13, metadata !14, i32 0, null} ; [ DW_TAG_array_type ]
 !13 = metadata !{i32 524324, metadata !4, metadata !"double", metadata !4, i32 0, i64 64, i64 32, i64 0, i32 0, i32 4} ; [ DW_TAG_base_type ]
 !14 = metadata !{metadata !15}
-!15 = metadata !{i32 524321, i64 0, i64 2}        ; [ DW_TAG_subrange_type ]
+!15 = metadata !{i32 524321, i64 0, i64 3}        ; [ DW_TAG_subrange_type ]
 !16 = metadata !{i32 524334, i32 0, metadata !8, metadata !"ggVector3", metadata !"ggVector3", metadata !"", metadata !9, i32 72, metadata !17, i1 false, i1 false, i32 0, i32 0, null, i1 false, i1 false, null} ; [ DW_TAG_subprogram ]
 !17 = metadata !{i32 524309, metadata !4, metadata !"", metadata !4, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !18, i32 0, null} ; [ DW_TAG_subroutine_type ]
 !18 = metadata !{null, metadata !19, metadata !20}
@@ -140,8 +140,8 @@ declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
 !91 = metadata !{i32 524544, metadata !1, metadata !"vx", metadata !4, i32 46, metadata !13} ; [ DW_TAG_auto_variable ]
 !92 = metadata !{i32 48, i32 0, metadata !1, null}
 !93 = metadata !{i32 218, i32 0, metadata !94, metadata !96}
-!94 = metadata !{i32 524299, metadata !95, i32 217, i32 0} ; [ DW_TAG_lexical_block ]
-!95 = metadata !{i32 524299, metadata !77, i32 217, i32 0} ; [ DW_TAG_lexical_block ]
+!94 = metadata !{i32 524299, metadata !4, metadata !95, i32 217, i32 0} ; [ DW_TAG_lexical_block ]
+!95 = metadata !{i32 524299, metadata !4, metadata !77, i32 217, i32 0} ; [ DW_TAG_lexical_block ]
 !96 = metadata !{i32 51, i32 0, metadata !1, null}
 !97 = metadata !{i32 227, i32 0, metadata !94, metadata !96}
 !98 = metadata !{i32 52, i32 0, metadata !1, null}
diff --git a/test/CodeGen/Thumb/DbgValueOtherTargets.test b/test/CodeGen/Thumb/DbgValueOtherTargets.test
new file mode 100644
index 000000000000..afb18a43be47
--- /dev/null
+++ b/test/CodeGen/Thumb/DbgValueOtherTargets.test
@@ -0,0 +1 @@
+RUN: llc -O0 -march=thumb -asm-verbose < %S/../Inputs/DbgValueOtherTargets.ll | FileCheck %S/../Inputs/DbgValueOtherTargets.ll
diff --git a/test/CodeGen/Thumb/iabs.ll b/test/CodeGen/Thumb/iabs.ll
index 2e77660c45c1..76224bc5348c 100644
--- a/test/CodeGen/Thumb/iabs.ll
+++ b/test/CodeGen/Thumb/iabs.ll
@@ -1,22 +1,20 @@
-; RUN: llc < %s -march=thumb -stats 2>&1 | \
-; RUN:   grep "4 .*Number of machine instrs printed"
-
-;; Integer absolute value, should produce something as good as:
-;; Thumb:
-;;   movs r0, r0
-;;   bpl
-;;   rsb r0, r0, #0 (with opitmization, bpl + rsb is if-converted into rsbmi)
-;;   bx lr
+; RUN: llc < %s -mtriple=thumb-unknown-unknown -filetype=obj -o %t.o
+; RUN: llvm-objdump -disassemble -arch=thumb %t.o | FileCheck %s
 
 define i32 @test(i32 %a) {
         %tmp1neg = sub i32 0, %a
         %b = icmp sgt i32 %a, -1
         %abs = select i1 %b, i32 %a, i32 %tmp1neg
         ret i32 %abs
-; CHECK:  movs r0, r0
-; CHECK:  bpl
-; CHECK:  rsb r0, r0, #0
-; CHECK:  bx lr
-}
 
+; This test just checks that 4 instructions were emitted
+
+; CHECK:      {{text}}
+; CHECK:      0:
+; CHECK-NEXT: 2:
+; CHECK-NEXT: 4:
+; CHECK-NEXT: 6:
+
+; CHECK-NOT: 8:
+}
 
diff --git a/test/CodeGen/Thumb/lit.local.cfg b/test/CodeGen/Thumb/lit.local.cfg
index cb77b09ef4ad..4d75f581a1d2 100644
--- a/test/CodeGen/Thumb/lit.local.cfg
+++ b/test/CodeGen/Thumb/lit.local.cfg
@@ -1,4 +1,4 @@
-config.suffixes = ['.ll', '.c', '.cpp']
+config.suffixes = ['.ll', '.c', '.cpp', '.test']
 
 targets = set(config.root.targets_to_build.split())
 if not 'ARM' in targets:
diff --git a/test/CodeGen/Thumb/stack-coloring-without-frame-ptr.ll b/test/CodeGen/Thumb/stack-coloring-without-frame-ptr.ll
new file mode 100644
index 000000000000..3f6407a0a3c0
--- /dev/null
+++ b/test/CodeGen/Thumb/stack-coloring-without-frame-ptr.ll
@@ -0,0 +1,29 @@
+; RUN: llc < %s -march=thumb -mcpu=arm1022e
+
+%iterator = type { i8**, i8**, i8**, i8*** }
+%insert_iterator = type { %deque*, %iterator }
+%deque = type { %iterator, %iterator, i8***, i32 }
+
+define i32 @test_thumbv5e_fp_elim() nounwind optsize {
+entry:
+  %var1 = alloca %iterator, align 4
+  %var2 = alloca %insert_iterator, align 4
+  %var3 = alloca %deque, align 4
+
+  %0 = bitcast %deque* %var3 to i8*
+  %1 = bitcast %iterator* %var1 to i8*
+  call void @llvm.lifetime.start(i64 16, i8* %1) nounwind
+  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %1, i8* %0, i32 16, i32 4, i1 false)
+  call void @llvm.lifetime.end(i64 16, i8* %1) nounwind
+
+  %2 = bitcast %insert_iterator* %var2 to i8*
+  call void @llvm.lifetime.start(i64 20, i8* %2) nounwind
+
+  ret i32 0
+}
+
+declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i32, i1) nounwind
+
+declare void @llvm.lifetime.start(i64, i8* nocapture) nounwind
+
+declare void @llvm.lifetime.end(i64, i8* nocapture) nounwind
diff --git a/test/CodeGen/Thumb2/2013-02-19-tail-call-register-hint.ll b/test/CodeGen/Thumb2/2013-02-19-tail-call-register-hint.ll
new file mode 100644
index 000000000000..502b138f65c8
--- /dev/null
+++ b/test/CodeGen/Thumb2/2013-02-19-tail-call-register-hint.ll
@@ -0,0 +1,53 @@
+; RUN: llc < %s -mtriple=thumbv7s-apple-ios6.0.0 -verify-machineinstrs
+
+; Check to make sure the tail-call return at the end doesn't use a
+; callee-saved register. Register hinting from t2LDRDri was getting this
+; wrong. The intervening call will force allocation to try a high register
+; first, so the hint will attempt to fire, but must be rejected due to
+; not being in the allocation order for the tcGPR register class.
+; The machine instruction verifier will make sure that all actually worked
+; out the way it's supposed to.
+
+%"myclass" = type { %struct.foo }
+%struct.foo = type { i32, [40 x i8] }
+
+define hidden void @func(i8* %Data) nounwind ssp {
+  %1 = getelementptr inbounds i8* %Data, i32 12
+  %2 = bitcast i8* %1 to %"myclass"*
+  tail call void @abc(%"myclass"* %2) nounwind
+  tail call void @def(%"myclass"* %2) nounwind
+  %3 = getelementptr inbounds i8* %Data, i32 8
+  %4 = bitcast i8* %3 to i8**
+  %5 = load i8** %4, align 4, !tbaa !0
+  tail call void @ghi(i8* %5) nounwind
+  %6 = bitcast i8* %Data to void (i8*)**
+  %7 = load void (i8*)** %6, align 4, !tbaa !0
+  %8 = getelementptr inbounds i8* %Data, i32 4
+  %9 = bitcast i8* %8 to i8**
+  %10 = load i8** %9, align 4, !tbaa !0
+  %11 = icmp eq i8* %Data, null
+  br i1 %11, label %14, label %12
+
+; <label>:12                                      ; preds = %0
+  %13 = tail call %"myclass"* @jkl(%"myclass"* %2) nounwind
+  tail call void @mno(i8* %Data) nounwind
+  br label %14
+
+; <label>:14                                      ; preds = %12, %0
+  tail call void %7(i8* %10) nounwind
+  ret void
+}
+
+declare void @mno(i8*)
+
+declare void @def(%"myclass"*)
+
+declare void @abc(%"myclass"*)
+
+declare void @ghi(i8*)
+
+declare %"myclass"* @jkl(%"myclass"*) nounwind
+
+!0 = metadata !{metadata !"any pointer", metadata !1}
+!1 = metadata !{metadata !"omnipotent char", metadata !2}
+!2 = metadata !{metadata !"Simple C/C++ TBAA"}
diff --git a/test/CodeGen/Thumb2/2013-03-02-vduplane-nonconstant-source-index.ll b/test/CodeGen/Thumb2/2013-03-02-vduplane-nonconstant-source-index.ll
new file mode 100644
index 000000000000..937ecc0d6679
--- /dev/null
+++ b/test/CodeGen/Thumb2/2013-03-02-vduplane-nonconstant-source-index.ll
@@ -0,0 +1,14 @@
+; RUN: llc < %s -mtriple=thumbv7-apple-ios | FileCheck %s
+
+define void @bar(<4 x i32>* %p, i32 %lane, <4 x i32> %phitmp) nounwind {
+; CHECK:  vst1.64 {d{{[0-9]+}}, d{{[0-9]+}}}, [r[[SOURCE:[0-9]+]]:128]
+; CHECK:  add.w r[[ADDR:[0-9]+]], r[[SOURCE]], {{r[0-9]+}}, lsl #2
+; CHECK:  vld1.32 {[[DREG:d[0-9]+]][], [[DREG2:d[0-9]+]][]}, [r[[ADDR]]:32]
+; CHECK:  vst1.32 {[[DREG]], [[DREG2]]}, [r0]
+  %val = extractelement <4 x i32> %phitmp, i32 %lane
+  %r1 = insertelement <4 x i32> undef, i32 %val, i32 1
+  %r2 = insertelement <4 x i32> %r1, i32 %val, i32 2
+  %r3 = insertelement <4 x i32> %r2, i32 %val, i32 3
+  store <4 x i32> %r3, <4 x i32>* %p, align 4
+  ret void
+}
diff --git a/test/CodeGen/Thumb2/2013-03-06-vector-sext-operand-scalarize.ll b/test/CodeGen/Thumb2/2013-03-06-vector-sext-operand-scalarize.ll
new file mode 100644
index 000000000000..203815fadc9c
--- /dev/null
+++ b/test/CodeGen/Thumb2/2013-03-06-vector-sext-operand-scalarize.ll
@@ -0,0 +1,19 @@
+; RUN: llc < %s -mtriple=thumbv7-apple-darwin | FileCheck %s
+
+; Testing that these don't crash/assert. The loop vectorizer can end up
+; with odd constructs like this. The code actually generated is incidental.
+define <1 x i64> @test_zext(i32 %a) nounwind {
+; CHECK: test_zext:
+  %Cmp = icmp uge i32 %a, 42
+  %vec = insertelement <1 x i1> zeroinitializer, i1 %Cmp, i32 0
+  %Se = zext <1 x i1> %vec to <1 x i64>
+  ret <1 x i64> %Se
+}
+
+define <1 x i64> @test_sext(i32 %a) nounwind {
+; CHECK: test_sext:
+  %Cmp = icmp uge i32 %a, 42
+  %vec = insertelement <1 x i1> zeroinitializer, i1 %Cmp, i32 0
+  %Se = sext <1 x i1> %vec to <1 x i64>
+  ret <1 x i64> %Se
+}
diff --git a/test/CodeGen/Thumb2/aligned-spill.ll b/test/CodeGen/Thumb2/aligned-spill.ll
index c98ca8098583..3a2803f91f16 100644
--- a/test/CodeGen/Thumb2/aligned-spill.ll
+++ b/test/CodeGen/Thumb2/aligned-spill.ll
@@ -26,8 +26,8 @@ entry:
 ; NEON: bic r4, r4, #15
 ; Stack pointer must be updated before the spills.
 ; NEON: mov sp, r4
-; NEON: vst1.64 {d8, d9, d10, d11}, [r4, :128]!
-; NEON: vst1.64 {d12, d13, d14, d15}, [r4, :128]
+; NEON: vst1.64 {d8, d9, d10, d11}, [r4:128]!
+; NEON: vst1.64 {d12, d13, d14, d15}, [r4:128]
 ; Stack pointer adjustment for the stack frame contents.
 ; This could legally happen before the spills.
 ; Since the spill slot is only 8 bytes, technically it would be fine to only
@@ -36,8 +36,8 @@ entry:
 ; NEON: sub sp, #16
 ; The epilog is free to use another scratch register than r4.
 ; NEON: add r[[R4:[0-9]+]], sp, #16
-; NEON: vld1.64 {d8, d9, d10, d11}, [r[[R4]], :128]!
-; NEON: vld1.64 {d12, d13, d14, d15}, [r[[R4]], :128]
+; NEON: vld1.64 {d8, d9, d10, d11}, [r[[R4]]:128]!
+; NEON: vld1.64 {d12, d13, d14, d15}, [r[[R4]]:128]
 ; The stack pointer restore must happen after the reloads.
 ; NEON: mov sp,
 ; NEON: pop
@@ -57,8 +57,8 @@ entry:
 ; NEON: bic r4, r4, #15
 ; Stack pointer must be updated before the spills.
 ; NEON: mov sp, r4
-; NEON: vst1.64 {d8, d9, d10, d11}, [r4, :128]!
-; NEON: vst1.64 {d12, d13}, [r4, :128]
+; NEON: vst1.64 {d8, d9, d10, d11}, [r4:128]!
+; NEON: vst1.64 {d12, d13}, [r4:128]
 ; NEON: vstr d14, [r4, #16]
 ; Epilog
 ; NEON: vld1.64 {d8, d9, d10, d11},
@@ -84,7 +84,7 @@ entry:
 ; NEON: bic r4, r4, #15
 ; Stack pointer must be updated before the spills.
 ; NEON: mov sp, r4
-; NEON: vst1.64 {d8, d9}, [r4, :128]
+; NEON: vst1.64 {d8, d9}, [r4:128]
 ; NEON: vstr d10, [r4, #16]
 ; Epilog
 ; NEON: vld1.64 {d8, d9},
diff --git a/test/CodeGen/Thumb2/cortex-fp.ll b/test/CodeGen/Thumb2/cortex-fp.ll
index b7df2fbf546c..f6cea72caecd 100644
--- a/test/CodeGen/Thumb2/cortex-fp.ll
+++ b/test/CodeGen/Thumb2/cortex-fp.ll
@@ -7,7 +7,7 @@ define float @foo(float %a, float %b) {
 entry:
 ; CHECK: foo
 ; CORTEXM3: blx ___mulsf3
-; CORTEXM4: vmul.f32  s0, s2, s0
+; CORTEXM4: vmul.f32  s
 ; CORTEXA8: vmul.f32  d
   %0 = fmul float %a, %b
   ret float %0
diff --git a/test/CodeGen/Thumb2/crash.ll b/test/CodeGen/Thumb2/crash.ll
index cb4d08058f41..6ce0b82b94d7 100644
--- a/test/CodeGen/Thumb2/crash.ll
+++ b/test/CodeGen/Thumb2/crash.ll
@@ -1,4 +1,5 @@
 ; RUN: llc < %s -mtriple=thumbv7-apple-darwin -mcpu=cortex-a8 -verify-machineinstrs
+; RUN: llc < %s -mtriple=thumbv7-apple-darwin -mcpu=cortex-a8 -verify-machineinstrs -O0
 target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:32-f32:32:32-f64:32:32-v64:64:64-v128:128:128-a0:0:32-n32"
 target triple = "thumbv7-apple-darwin10"
 
@@ -76,3 +77,11 @@ entry:
   store i32 %num, i32* %p2, align 4
   ret void
 }
+
+; Check RAFast handling of inline assembly with many dense clobbers.
+; The large tuple aliases of the vector registers can cause problems.
+define void @rdar13249625(double* nocapture %p) nounwind {
+  %1 = tail call double asm sideeffect "@ $0", "=w,~{d0},~{q1},~{q2},~{q3},~{q4},~{q5},~{q6},~{q7},~{q8},~{q9},~{q10},~{q11},~{q12},~{q13},~{q14},~{q15}"() nounwind
+  store double %1, double* %p, align 4
+  ret void
+}
diff --git a/test/CodeGen/Thumb2/thumb2-ldr_post.ll b/test/CodeGen/Thumb2/thumb2-ldr_post.ll
index 2178eecb43e4..bce847471beb 100644
--- a/test/CodeGen/Thumb2/thumb2-ldr_post.ll
+++ b/test/CodeGen/Thumb2/thumb2-ldr_post.ll
@@ -1,5 +1,4 @@
-; RUN: llc < %s -march=thumb -mattr=+thumb2 | \
-; RUN:   grep "ldr.*\[.*\]," | count 1
+; RUN: llc < %s -march=thumb -mattr=+thumb2 | FileCheck %s
 
 define i32 @test(i32 %a, i32 %b, i32 %c) {
         %tmp1 = mul i32 %a, %b          ; <i32> [#uses=2]
@@ -9,4 +8,5 @@ define i32 @test(i32 %a, i32 %b, i32 %c) {
         %tmp5 = mul i32 %tmp4, %tmp3            ; <i32> [#uses=1]
         ret i32 %tmp5
 }
+; CHECK: 	ldr	r{{.*}},	[{{.*}}],
 
diff --git a/test/CodeGen/Thumb2/thumb2-mul.ll b/test/CodeGen/Thumb2/thumb2-mul.ll
index ac059bdaf05d..a8134e630821 100644
--- a/test/CodeGen/Thumb2/thumb2-mul.ll
+++ b/test/CodeGen/Thumb2/thumb2-mul.ll
@@ -15,7 +15,7 @@ entry:
 ; CHECK: t1:
 ; CHECK: mla     r0, r2, r0, r1
 ; CHECK: add.w   r0, r0, r0, lsl #3
-; CHECL: add.w   r0, r3, r0, lsl #2
+; CHECK: add.w   r0, r3, r0, lsl #2
   %mul = mul i32 %n, %i
   %add = add i32 %mul, %j
   %0 = ptrtoint %struct.CMPoint* %thePoints to i32
diff --git a/test/CodeGen/Thumb2/thumb2-shifter.ll b/test/CodeGen/Thumb2/thumb2-shifter.ll
index 98854a1205f8..05dd90cfbfed 100644
--- a/test/CodeGen/Thumb2/thumb2-shifter.ll
+++ b/test/CodeGen/Thumb2/thumb2-shifter.ll
@@ -1,24 +1,27 @@
-; RUN: llc < %s -march=thumb -mattr=+thumb2,+t2xtpk | FileCheck %s
+; RUN: llc < %s -march=thumb -mcpu=cortex-a8 | FileCheck %s --check-prefix=A8
+; RUN: llc < %s -march=thumb -mcpu=swift | FileCheck %s --check-prefix=SWIFT
+
+; rdar://12892707
 
 define i32 @t2ADDrs_lsl(i32 %X, i32 %Y) {
-; CHECK: t2ADDrs_lsl
-; CHECK: add.w  r0, r0, r1, lsl #16
+; A8: t2ADDrs_lsl
+; A8: add.w  r0, r0, r1, lsl #16
         %A = shl i32 %Y, 16
         %B = add i32 %X, %A
         ret i32 %B
 }
 
 define i32 @t2ADDrs_lsr(i32 %X, i32 %Y) {
-; CHECK: t2ADDrs_lsr
-; CHECK: add.w  r0, r0, r1, lsr #16
+; A8: t2ADDrs_lsr
+; A8: add.w  r0, r0, r1, lsr #16
         %A = lshr i32 %Y, 16
         %B = add i32 %X, %A
         ret i32 %B
 }
 
 define i32 @t2ADDrs_asr(i32 %X, i32 %Y) {
-; CHECK: t2ADDrs_asr
-; CHECK: add.w  r0, r0, r1, asr #16
+; A8: t2ADDrs_asr
+; A8: add.w  r0, r0, r1, asr #16
         %A = ashr i32 %Y, 16
         %B = add i32 %X, %A
         ret i32 %B
@@ -26,8 +29,8 @@ define i32 @t2ADDrs_asr(i32 %X, i32 %Y) {
 
 ; i32 ror(n) = (x >> n) | (x << (32 - n))
 define i32 @t2ADDrs_ror(i32 %X, i32 %Y) {
-; CHECK: t2ADDrs_ror
-; CHECK: add.w  r0, r0, r1, ror #16
+; A8: t2ADDrs_ror
+; A8: add.w  r0, r0, r1, ror #16
         %A = lshr i32 %Y, 16
         %B = shl  i32 %Y, 16
         %C = or   i32 %B, %A
@@ -36,13 +39,66 @@ define i32 @t2ADDrs_ror(i32 %X, i32 %Y) {
 }
 
 define i32 @t2ADDrs_noRegShift(i32 %X, i32 %Y, i8 %sh) {
-; CHECK: t2ADDrs_noRegShift
-; CHECK: uxtb r2, r2
-; CHECK: lsls r1, r2
-; CHECK: add  r0, r1
+; A8: t2ADDrs_noRegShift
+; A8: uxtb r2, r2
+; A8: lsls r1, r2
+; A8: add  r0, r1
+
+; SWIFT: t2ADDrs_noRegShift
+; SWIFT-NOT: lsls
+; SWIFT: lsl.w
+        %shift.upgrd.1 = zext i8 %sh to i32
+        %A = shl i32 %Y, %shift.upgrd.1
+        %B = add i32 %X, %A
+        ret i32 %B
+}
+
+define i32 @t2ADDrs_noRegShift2(i32 %X, i32 %Y, i8 %sh) {
+; A8: t2ADDrs_noRegShift2
+; A8: uxtb r2, r2
+; A8: lsrs r1, r2
+; A8: add  r0, r1
+
+; SWIFT: t2ADDrs_noRegShift2
+; SWIFT-NOT: lsrs
+; SWIFT: lsr.w
+        %shift.upgrd.1 = zext i8 %sh to i32
+        %A = lshr i32 %Y, %shift.upgrd.1
+        %B = add i32 %X, %A
+        ret i32 %B
+}
+
+define i32 @t2ADDrs_noRegShift3(i32 %X, i32 %Y, i8 %sh) {
+; A8: t2ADDrs_noRegShift3
+; A8: uxtb r2, r2
+; A8: asrs r1, r2
+; A8: add  r0, r1
+
+; SWIFT: t2ADDrs_noRegShift3
+; SWIFT-NOT: asrs
+; SWIFT: asr.w
+        %shift.upgrd.1 = zext i8 %sh to i32
+        %A = ashr i32 %Y, %shift.upgrd.1
+        %B = add i32 %X, %A
+        ret i32 %B
+}
+
+define i32 @t2ADDrs_optsize(i32 %X, i32 %Y, i8 %sh) optsize {
+; SWIFT: t2ADDrs_optsize
+; SWIFT-NOT: lsl.w
+; SWIFT: lsls
         %shift.upgrd.1 = zext i8 %sh to i32
         %A = shl i32 %Y, %shift.upgrd.1
         %B = add i32 %X, %A
         ret i32 %B
 }
 
+define i32 @t2ADDrs_minsize(i32 %X, i32 %Y, i8 %sh) minsize {
+; SWIFT: t2ADDrs_minsize
+; SWIFT-NOT: lsr.w
+; SWIFT: lsrs
+        %shift.upgrd.1 = zext i8 %sh to i32
+        %A = lshr i32 %Y, %shift.upgrd.1
+        %B = add i32 %X, %A
+        ret i32 %B
+}
diff --git a/test/CodeGen/Thumb2/thumb2-spill-q.ll b/test/CodeGen/Thumb2/thumb2-spill-q.ll
index d9a0617f5a46..5bff268e2c3e 100644
--- a/test/CodeGen/Thumb2/thumb2-spill-q.ll
+++ b/test/CodeGen/Thumb2/thumb2-spill-q.ll
@@ -12,8 +12,8 @@ declare <4 x float> @llvm.arm.neon.vld1.v4f32(i8*, i32) nounwind readonly
 define void @aaa(%quuz* %this, i8* %block) {
 ; CHECK: aaa:
 ; CHECK: bic r4, r4, #15
-; CHECK: vst1.64 {{.*}}[{{.*}}, :128]
-; CHECK: vld1.64 {{.*}}[{{.*}}, :128]
+; CHECK: vst1.64 {{.*}}[{{.*}}:128]
+; CHECK: vld1.64 {{.*}}[{{.*}}:128]
 entry:
   %aligned_vec = alloca <4 x float>, align 16
   %"alloca point" = bitcast i32 0 to i32
diff --git a/test/CodeGen/X86/2003-08-03-CallArgLiveRanges.ll b/test/CodeGen/X86/2003-08-03-CallArgLiveRanges.ll
index 0af2445d7fba..2e4cb1fe7eda 100644
--- a/test/CodeGen/X86/2003-08-03-CallArgLiveRanges.ll
+++ b/test/CodeGen/X86/2003-08-03-CallArgLiveRanges.ll
@@ -1,3 +1,4 @@
+; REQUIRES: asserts
 ; The old instruction selector used to load all arguments to a call up in 
 ; registers, then start pushing them all onto the stack.  This is bad news as
 ; it makes a ton of annoying overlapping live ranges.  This code should not
diff --git a/test/CodeGen/X86/2006-03-02-InstrSchedBug.ll b/test/CodeGen/X86/2006-03-02-InstrSchedBug.ll
index 1a3d74918d1a..7673124d5dda 100644
--- a/test/CodeGen/X86/2006-03-02-InstrSchedBug.ll
+++ b/test/CodeGen/X86/2006-03-02-InstrSchedBug.ll
@@ -1,3 +1,4 @@
+; REQUIRES: asserts
 ; RUN: llc < %s -march=x86  -stats 2>&1 | \
 ; RUN:   grep asm-printer | grep 7
 
diff --git a/test/CodeGen/X86/2006-05-01-SchedCausingSpills.ll b/test/CodeGen/X86/2006-05-01-SchedCausingSpills.ll
index 5cba3efeefb8..faa3e21a934d 100644
--- a/test/CodeGen/X86/2006-05-01-SchedCausingSpills.ll
+++ b/test/CodeGen/X86/2006-05-01-SchedCausingSpills.ll
@@ -1,3 +1,4 @@
+; REQUIRES: asserts
 ; RUN: llc < %s -march=x86 -mcpu=yonah -stats 2>&1 | \
 ; RUN:   not grep "Number of register spills"
 ; END.
diff --git a/test/CodeGen/X86/2006-05-02-InstrSched1.ll b/test/CodeGen/X86/2006-05-02-InstrSched1.ll
index 1c75f93915a7..0afddd8f876f 100644
--- a/test/CodeGen/X86/2006-05-02-InstrSched1.ll
+++ b/test/CodeGen/X86/2006-05-02-InstrSched1.ll
@@ -1,3 +1,4 @@
+; REQUIRES: asserts
 ; RUN: llc < %s -march=x86 -relocation-model=static -stats 2>&1 | \
 ; RUN:   grep asm-printer | grep 14
 ;
diff --git a/test/CodeGen/X86/2006-05-02-InstrSched2.ll b/test/CodeGen/X86/2006-05-02-InstrSched2.ll
index 95eefa1e7196..222b7a0b41fd 100644
--- a/test/CodeGen/X86/2006-05-02-InstrSched2.ll
+++ b/test/CodeGen/X86/2006-05-02-InstrSched2.ll
@@ -1,3 +1,4 @@
+; REQUIRES: asserts
 ; RUN: llc < %s -march=x86 -stats  2>&1 | \
 ; RUN:   grep asm-printer | grep 13
 
diff --git a/test/CodeGen/X86/2006-05-11-InstrSched.ll b/test/CodeGen/X86/2006-05-11-InstrSched.ll
index 37c510786a5e..6912351d7b7e 100644
--- a/test/CodeGen/X86/2006-05-11-InstrSched.ll
+++ b/test/CodeGen/X86/2006-05-11-InstrSched.ll
@@ -1,3 +1,4 @@
+; REQUIRES: asserts
 ; RUN: llc < %s -march=x86 -mtriple=i386-linux-gnu -mcpu=penryn -mattr=+sse2 -stats -realign-stack=0 2>&1 | \
 ; RUN:     grep "asm-printer" | grep 35
 
diff --git a/test/CodeGen/X86/2008-02-18-TailMergingBug.ll b/test/CodeGen/X86/2008-02-18-TailMergingBug.ll
index a1b973d7ccfa..363a6008a00d 100644
--- a/test/CodeGen/X86/2008-02-18-TailMergingBug.ll
+++ b/test/CodeGen/X86/2008-02-18-TailMergingBug.ll
@@ -1,3 +1,4 @@
+; REQUIRES: asserts
 ; RUN: llc < %s -march=x86 -mcpu=yonah -stats 2>&1 | grep "Number of block tails merged" | grep 16
 ; PR1909
 
diff --git a/test/CodeGen/X86/2008-05-22-FoldUnalignedLoad.ll b/test/CodeGen/X86/2008-05-22-FoldUnalignedLoad.ll
index 19a73543c65e..fc38135032c2 100644
--- a/test/CodeGen/X86/2008-05-22-FoldUnalignedLoad.ll
+++ b/test/CodeGen/X86/2008-05-22-FoldUnalignedLoad.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=x86 -mattr=+sse2 | grep movups | count 2
+; RUN: llc < %s -march=x86 -mcpu=penryn | FileCheck %s
 
 define void @a(<4 x float>* %x) nounwind  {
 entry:
@@ -8,4 +8,10 @@ entry:
         ret void
 }
 
+; CHECK: a:
+; CHECK: movups
+; CHECK: movups
+; CHECK-NOT: movups
+; CHECK: ret
+
 declare <4 x float> @llvm.x86.sse.rcp.ps(<4 x float>)
diff --git a/test/CodeGen/X86/2008-08-31-EH_RETURN64.ll b/test/CodeGen/X86/2008-08-31-EH_RETURN64.ll
index d423bfc389df..496779c468f4 100644
--- a/test/CodeGen/X86/2008-08-31-EH_RETURN64.ll
+++ b/test/CodeGen/X86/2008-08-31-EH_RETURN64.ll
@@ -1,10 +1,15 @@
 ; Check that eh_return & unwind_init were properly lowered
-; RUN: llc < %s | grep %rbp | count 7
-; RUN: llc < %s | grep %rcx | count 3
+; RUN: llc < %s -verify-machineinstrs | FileCheck %s
 
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
 target triple = "x86_64-unknown-linux-gnu"
 
+; CHECK: test
+; CHECK: pushq %rbp
+; CHECK: movq %rsp, %rbp
+; CHECK: popq %rbp
+; CHECK: movq %rcx, %rsp
+; CHECK: ret # eh_return, addr: %rcx
 define i8* @test(i64 %a, i8* %b)  {
 entry:
   call void @llvm.eh.unwind.init()
@@ -15,3 +20,36 @@ entry:
 
 declare void @llvm.eh.return.i64(i64, i8*)
 declare void @llvm.eh.unwind.init()
+
+@b = common global i32 0, align 4
+@a = common global i32 0, align 4
+
+; PR14750
+; This function contains a normal return as well as eh_return.
+; CHECK: _Unwind_Resume_or_Rethrow
+define i32 @_Unwind_Resume_or_Rethrow() nounwind uwtable ssp {
+entry:
+  %0 = load i32* @b, align 4
+  %tobool = icmp eq i32 %0, 0
+  br i1 %tobool, label %if.end, label %if.then
+
+if.then:                                          ; preds = %entry
+  ret i32 0
+
+if.end:                                           ; preds = %entry
+  %call = tail call i32 (...)* @_Unwind_ForcedUnwind_Phase2() nounwind
+  store i32 %call, i32* @a, align 4
+  %tobool1 = icmp eq i32 %call, 0
+  br i1 %tobool1, label %cond.end, label %cond.true
+
+cond.true:                                        ; preds = %if.end
+  tail call void @abort() noreturn nounwind
+  unreachable
+
+cond.end:                                         ; preds = %if.end
+  tail call void @llvm.eh.return.i64(i64 0, i8* null)
+  unreachable
+}
+
+declare i32 @_Unwind_ForcedUnwind_Phase2(...)
+declare void @abort() noreturn
diff --git a/test/CodeGen/X86/2008-10-27-CoalescerBug.ll b/test/CodeGen/X86/2008-10-27-CoalescerBug.ll
index b2cf34cd2033..0310a5dcb565 100644
--- a/test/CodeGen/X86/2008-10-27-CoalescerBug.ll
+++ b/test/CodeGen/X86/2008-10-27-CoalescerBug.ll
@@ -1,3 +1,4 @@
+; REQUIRES: asserts
 ; RUN: llc < %s -mtriple=i386-apple-darwin -mattr=+sse2 -stats 2>&1 | FileCheck %s
 ; Now this test spills one register. But a reload in the loop is cheaper than
 ; the divsd so it's a win.
diff --git a/test/CodeGen/X86/2008-10-27-StackRealignment.ll b/test/CodeGen/X86/2008-10-27-StackRealignment.ll
deleted file mode 100644
index a57f7166cadc..000000000000
--- a/test/CodeGen/X86/2008-10-27-StackRealignment.ll
+++ /dev/null
@@ -1,22 +0,0 @@
-; Linux doesn't support stack realignment for functions with allocas (PR2888).
-; Until it does, we shouldn't use movaps to access the stack.  On targets with
-; sufficiently aligned stack (e.g. darwin) we should.
-; PR8969 - make 32-bit linux have a 16-byte aligned stack
-; RUN: llc < %s -mtriple=i386-pc-linux-gnu -mcpu=yonah | grep movaps | count 2
-; RUN: llc < %s -mtriple=i686-apple-darwin9 -mcpu=yonah | grep movaps | count 2
-
-
-target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:32:32"
-target triple = "i386-pc-linux-gnu"
-  
-define void @foo(i32 %t) nounwind {
-  %tmp1210 = alloca i8, i32 32, align 4
-  call void @llvm.memset.p0i8.i64(i8* %tmp1210, i8 0, i64 32, i32 4, i1 false)
-  %x = alloca i8, i32 %t
-  call void @dummy(i8* %x)
-  ret void
-}
-
-declare void @dummy(i8*)
-
-declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i32, i1) nounwind
diff --git a/test/CodeGen/X86/2009-02-12-DebugInfoVLA.ll b/test/CodeGen/X86/2009-02-12-DebugInfoVLA.ll
index 0dca14d064eb..890fd0f067cf 100644
--- a/test/CodeGen/X86/2009-02-12-DebugInfoVLA.ll
+++ b/test/CodeGen/X86/2009-02-12-DebugInfoVLA.ll
@@ -78,7 +78,7 @@ declare void @llvm.stackrestore(i8*) nounwind
 !9 = metadata !{i32 458767, metadata !2, metadata !"", metadata !2, i32 0, i64 64, i64 64, i64 0, i32 64, metadata !10} ; [ DW_TAG_pointer_type ]
 !10 = metadata !{i32 458753, metadata !2, metadata !"", metadata !2, i32 0, i64 8, i64 8, i64 0, i32 0, metadata !5, metadata !11, i32 0, null} ; [ DW_TAG_array_type ]
 !11 = metadata !{metadata !12}
-!12 = metadata !{i32 458785, i64 0, i64 0}        ; [ DW_TAG_subrange_type ]
+!12 = metadata !{i32 458785, i64 0, i64 1}        ; [ DW_TAG_subrange_type ]
 !13 = metadata !{i32 3, i32 0, metadata !14, null}
 !14 = metadata !{i32 458763, metadata !1, i32 0, i32 0} ; [ DW_TAG_lexical_block ]
 !15 = metadata !{i32 4, i32 0, metadata !14, null}
diff --git a/test/CodeGen/X86/2009-02-25-CommuteBug.ll b/test/CodeGen/X86/2009-02-25-CommuteBug.ll
index 9cbf35094061..9ea34e27a17e 100644
--- a/test/CodeGen/X86/2009-02-25-CommuteBug.ll
+++ b/test/CodeGen/X86/2009-02-25-CommuteBug.ll
@@ -1,3 +1,4 @@
+; REQUIRES: asserts
 ; RUN: llc < %s -march=x86 -mattr=+sse2 -stats 2>&1 | not grep commuted
 ; rdar://6608609
 
diff --git a/test/CodeGen/X86/2009-02-26-MachineLICMBug.ll b/test/CodeGen/X86/2009-02-26-MachineLICMBug.ll
index d50fe6f73a00..68a9fafb6de8 100644
--- a/test/CodeGen/X86/2009-02-26-MachineLICMBug.ll
+++ b/test/CodeGen/X86/2009-02-26-MachineLICMBug.ll
@@ -1,3 +1,4 @@
+; REQUIRES: asserts
 ; RUN: llc < %s -march=x86-64 -mattr=+sse3,+sse41 -mcpu=penryn -stats 2>&1 | grep "5 machine-licm"
 ; RUN: llc < %s -march=x86-64 -mattr=+sse3,+sse41 -mcpu=penryn | FileCheck %s
 ; rdar://6627786
diff --git a/test/CodeGen/X86/2009-03-23-MultiUseSched.ll b/test/CodeGen/X86/2009-03-23-MultiUseSched.ll
index d934ec9a88f8..351a1722a231 100644
--- a/test/CodeGen/X86/2009-03-23-MultiUseSched.ll
+++ b/test/CodeGen/X86/2009-03-23-MultiUseSched.ll
@@ -1,3 +1,4 @@
+; REQUIRES: asserts
 ; RUN: llc < %s -mtriple=x86_64-linux -relocation-model=static -o /dev/null -stats -info-output-file - > %t
 ; RUN: not grep spill %t
 ; RUN: not grep "%rsp" %t
diff --git a/test/CodeGen/X86/2009-04-16-SpillerUnfold.ll b/test/CodeGen/X86/2009-04-16-SpillerUnfold.ll
index ad18a0c5b94d..0607eda271af 100644
--- a/test/CodeGen/X86/2009-04-16-SpillerUnfold.ll
+++ b/test/CodeGen/X86/2009-04-16-SpillerUnfold.ll
@@ -1,3 +1,4 @@
+; REQUIRES: asserts
 ; RUN: llc < %s -mtriple=x86_64-apple-darwin10.0 -relocation-model=pic -disable-fp-elim -stats 2>&1 | grep "Number of modref unfolded"
 ; XFAIL: *
 ; 69408 removed the opportunity for this optimization to work
diff --git a/test/CodeGen/X86/2009-11-16-UnfoldMemOpBug.ll b/test/CodeGen/X86/2009-11-16-UnfoldMemOpBug.ll
index 94075e78a28a..c2d9d84d4c5a 100644
--- a/test/CodeGen/X86/2009-11-16-UnfoldMemOpBug.ll
+++ b/test/CodeGen/X86/2009-11-16-UnfoldMemOpBug.ll
@@ -6,15 +6,16 @@
 define void @t(i32 %count) ssp nounwind {
 entry:
 ; CHECK: t:
-; CHECK: movq ___stack_chk_guard@GOTPCREL(%rip)
-; CHECK: movups L_str(%rip), %xmm0
+; CHECK: movups L_str+12(%rip), %xmm0
+; CHECK: movups L_str(%rip), %xmm1
   %tmp0 = alloca [60 x i8], align 1
   %tmp1 = getelementptr inbounds [60 x i8]* %tmp0, i64 0, i64 0
   br label %bb1
 
 bb1:
 ; CHECK: LBB0_1:
-; CHECK: movaps %xmm0, (%rsp)
+; CHECK: movups %xmm0, 12(%rsp)
+; CHECK: movaps %xmm1, (%rsp)
   %tmp2 = phi i32 [ %tmp3, %bb1 ], [ 0, %entry ]
   call void @llvm.memcpy.p0i8.p0i8.i64(i8* %tmp1, i8* getelementptr inbounds ([28 x i8]* @str, i64 0, i64 0), i64 28, i32 1, i1 false)
   %tmp3 = add i32 %tmp2, 1
diff --git a/test/CodeGen/X86/2010-01-18-DbgValue.ll b/test/CodeGen/X86/2010-01-18-DbgValue.ll
index 85ee091c3478..7dba332b1bec 100644
--- a/test/CodeGen/X86/2010-01-18-DbgValue.ll
+++ b/test/CodeGen/X86/2010-01-18-DbgValue.ll
@@ -28,21 +28,25 @@ return:                                           ; preds = %entry
 
 declare void @llvm.dbg.declare(metadata, metadata) nounwind readnone
 
-!0 = metadata !{i32 524545, metadata !1, metadata !"my_r0", metadata !2, i32 11, metadata !7} ; [ DW_TAG_arg_variable ]
-!1 = metadata !{i32 524334, i32 0, metadata !2, metadata !"foo", metadata !"foo", metadata !"foo", metadata !2, i32 11, metadata !4, i1 false, i1 true, i32 0, i32 0, null, i1 false} ; [ DW_TAG_subprogram ]
-!2 = metadata !{i32 524329, metadata !"b2.c", metadata !"/tmp/", metadata !3} ; [ DW_TAG_file_type ]
-!3 = metadata !{i32 524305, i32 0, i32 1, metadata !"b2.c", metadata !"/tmp/", metadata !"4.2.1 (Based on Apple Inc. build 5658) (LLVM build)", i1 true, i1 false, metadata !"", i32 0} ; [ DW_TAG_compile_unit ]
-!4 = metadata !{i32 524309, metadata !2, metadata !"", metadata !2, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !5, i32 0, null} ; [ DW_TAG_subroutine_type ]
+!llvm.dbg.cu = !{!3}
+
+!0 = metadata !{i32 786689, metadata !1, metadata !"my_r0", metadata !2, i32 11, metadata !7, i32 0, null} ; [ DW_TAG_arg_variable ]
+!1 = metadata !{i32 786478, metadata !2, metadata !"foo", metadata !"foo", metadata !"foo", metadata !2, i32 11, metadata !4, i1 false, i1 true, i32 0, i32 0, null, i32 0, i1 false, double (%struct.Rect*)* @foo, null, null, null, i32 11} ; [ DW_TAG_subprogram ]
+!2 = metadata !{i32 786473, metadata !19} ; [ DW_TAG_file_type ]
+!3 = metadata !{i32 786449, i32 1, metadata !2, metadata !"4.2.1 (Based on Apple Inc. build 5658) (LLVM build)", i1 false, metadata !"", i32 0, null, null, metadata !18, null, metadata !""} ; [ DW_TAG_compile_unit ]
+!4 = metadata !{i32 786453, metadata !19, metadata !2, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !5, i32 0, null} ; [ DW_TAG_subroutine_type ]
 !5 = metadata !{metadata !6, metadata !7}
-!6 = metadata !{i32 524324, metadata !2, metadata !"double", metadata !2, i32 0, i64 64, i64 64, i64 0, i32 0, i32 4} ; [ DW_TAG_base_type ]
-!7 = metadata !{i32 524307, metadata !2, metadata !"Rect", metadata !2, i32 6, i64 256, i64 64, i64 0, i32 0, null, metadata !8, i32 0, null} ; [ DW_TAG_structure_type ]
+!6 = metadata !{i32 786468, metadata !19, metadata !2, metadata !"double", i32 0, i64 64, i64 64, i64 0, i32 0, i32 4} ; [ DW_TAG_base_type ]
+!7 = metadata !{i32 786451, metadata !19, metadata !2, metadata !"Rect", i32 6, i64 256, i64 64, i64 0, i32 0, null, metadata !8, i32 0, null} ; [ DW_TAG_structure_type ]
 !8 = metadata !{metadata !9, metadata !14}
-!9 = metadata !{i32 524301, metadata !7, metadata !"P1", metadata !2, i32 7, i64 128, i64 64, i64 0, i32 0, metadata !10} ; [ DW_TAG_member ]
-!10 = metadata !{i32 524307, metadata !2, metadata !"Pt", metadata !2, i32 1, i64 128, i64 64, i64 0, i32 0, null, metadata !11, i32 0, null} ; [ DW_TAG_structure_type ]
+!9 = metadata !{i32 786445, metadata !19, metadata !7, metadata !"P1", i32 7, i64 128, i64 64, i64 0, i32 0, metadata !10} ; [ DW_TAG_member ]
+!10 = metadata !{i32 786451, metadata !19, metadata !2, metadata !"Pt", i32 1, i64 128, i64 64, i64 0, i32 0, null, metadata !11, i32 0, null} ; [ DW_TAG_structure_type ]
 !11 = metadata !{metadata !12, metadata !13}
-!12 = metadata !{i32 524301, metadata !10, metadata !"x", metadata !2, i32 2, i64 64, i64 64, i64 0, i32 0, metadata !6} ; [ DW_TAG_member ]
-!13 = metadata !{i32 524301, metadata !10, metadata !"y", metadata !2, i32 3, i64 64, i64 64, i64 64, i32 0, metadata !6} ; [ DW_TAG_member ]
-!14 = metadata !{i32 524301, metadata !7, metadata !"P2", metadata !2, i32 8, i64 128, i64 64, i64 128, i32 0, metadata !10} ; [ DW_TAG_member ]
+!12 = metadata !{i32 786445, metadata !19, metadata !10, metadata !"x", i32 2, i64 64, i64 64, i64 0, i32 0, metadata !6} ; [ DW_TAG_member ]
+!13 = metadata !{i32 786445, metadata !19, metadata !10, metadata !"y", i32 3, i64 64, i64 64, i64 64, i32 0, metadata !6} ; [ DW_TAG_member ]
+!14 = metadata !{i32 786445, metadata !19, metadata !7, metadata !"P2", i32 8, i64 128, i64 64, i64 128, i32 0, metadata !10} ; [ DW_TAG_member ]
 !15 = metadata !{i32 11, i32 0, metadata !1, null}
 !16 = metadata !{i32 12, i32 0, metadata !17, null}
-!17 = metadata !{i32 524299, metadata !1, i32 11, i32 0} ; [ DW_TAG_lexical_block ]
+!17 = metadata !{i32 786443, metadata !1, i32 11, i32 0} ; [ DW_TAG_lexical_block ]
+!18 = metadata !{metadata !1}
+!19 = metadata !{metadata !"b2.c", metadata !"/tmp/"}
diff --git a/test/CodeGen/X86/2010-01-19-OptExtBug.ll b/test/CodeGen/X86/2010-01-19-OptExtBug.ll
index eb4a5c04a2ae..ec24e73c34ac 100644
--- a/test/CodeGen/X86/2010-01-19-OptExtBug.ll
+++ b/test/CodeGen/X86/2010-01-19-OptExtBug.ll
@@ -1,3 +1,4 @@
+; REQUIRES: asserts
 ; RUN: llc < %s -mtriple=x86_64-apple-darwin11 -relocation-model=pic -disable-fp-elim -stats 2>&1 | not grep ext-opt
 
 define fastcc i8* @S_scan_str(i8* %start, i32 %keep_quoted, i32 %keep_delims) nounwind ssp {
diff --git a/test/CodeGen/X86/2010-04-07-DbgValueOtherTargets.ll b/test/CodeGen/X86/2010-04-07-DbgValueOtherTargets.ll
deleted file mode 100644
index 42f19b3ad86a..000000000000
--- a/test/CodeGen/X86/2010-04-07-DbgValueOtherTargets.ll
+++ /dev/null
@@ -1,28 +0,0 @@
-; RUN: llc -O0 -march=x86 -asm-verbose < %s | FileCheck %s
-; RUN: llc -O0 -march=x86-64 -asm-verbose < %s | FileCheck %s
-; Check that DEBUG_VALUE comments come through on a variety of targets.
-
-define i32 @main() nounwind ssp {
-entry:
-; CHECK: DEBUG_VALUE
-  call void @llvm.dbg.value(metadata !6, i64 0, metadata !7), !dbg !9
-  ret i32 0, !dbg !10
-}
-
-declare void @llvm.dbg.declare(metadata, metadata) nounwind readnone
-
-declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
-
-!llvm.dbg.sp = !{!0}
-
-!0 = metadata !{i32 589870, i32 0, metadata !1, metadata !"main", metadata !"main", metadata !"", metadata !1, i32 2, metadata !3, i1 false, i1 true, i32 0, i32 0, null, i32 0, i1 false, i32 ()* @main} ; [ DW_TAG_subprogram ]
-!1 = metadata !{i32 589865, metadata !"/tmp/x.c", metadata !"/Users/manav", metadata !2} ; [ DW_TAG_file_type ]
-!2 = metadata !{i32 589841, i32 0, i32 12, metadata !"/tmp/x.c", metadata !"/Users/manav", metadata !"clang version 2.9 (trunk 120996)", i1 true, i1 false, metadata !"", i32 0} ; [ DW_TAG_compile_unit ]
-!3 = metadata !{i32 589845, metadata !1, metadata !"", metadata !1, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !4, i32 0, null} ; [ DW_TAG_subroutine_type ]
-!4 = metadata !{metadata !5}
-!5 = metadata !{i32 589860, metadata !2, metadata !"int", metadata !1, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
-!6 = metadata !{i32 0}
-!7 = metadata !{i32 590080, metadata !8, metadata !"i", metadata !1, i32 3, metadata !5, i32 0} ; [ DW_TAG_auto_variable ]
-!8 = metadata !{i32 589835, metadata !0, i32 2, i32 12, metadata !1, i32 0} ; [ DW_TAG_lexical_block ]
-!9 = metadata !{i32 3, i32 11, metadata !8, null}
-!10 = metadata !{i32 4, i32 2, metadata !8, null}
diff --git a/test/CodeGen/X86/2010-05-25-DotDebugLoc.ll b/test/CodeGen/X86/2010-05-25-DotDebugLoc.ll
index 2fceab6f091f..8ab93fcb978f 100644
--- a/test/CodeGen/X86/2010-05-25-DotDebugLoc.ll
+++ b/test/CodeGen/X86/2010-05-25-DotDebugLoc.ll
@@ -198,27 +198,27 @@ declare float @copysignf(float, float) nounwind readnone
 
 declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
 
-!llvm.dbg.lv = !{!0, !11, !12, !13, !14, !16, !17, !18}
+!llvm.dbg.cu = !{!3}
 
-!0 = metadata !{i32 524545, metadata !1, metadata !"a", metadata !2, i32 1921, metadata !9} ; [ DW_TAG_arg_variable ]
-!1 = metadata !{i32 524334, i32 0, metadata !2, metadata !"__divsc3", metadata !"__divsc3", metadata !"__divsc3", metadata !2, i32 1922, metadata !4, i1 false, i1 true, i32 0, i32 0, null, i1 false, i1 true} ; [ DW_TAG_subprogram ]
-!2 = metadata !{i32 524329, metadata !"libgcc2.c", metadata !"/Users/yash/clean/LG.D/gcc/../../llvmgcc/gcc", metadata !3} ; [ DW_TAG_file_type ]
-!3 = metadata !{i32 524305, i32 0, i32 1, metadata !"libgcc2.c", metadata !"/Users/yash/clean/LG.D/gcc/../../llvmgcc/gcc", metadata !"4.2.1 (Based on Apple Inc. build 5658) (LLVM build)", i1 true, i1 true, metadata !"", i32 0} ; [ DW_TAG_compile_unit ]
-!4 = metadata !{i32 524309, metadata !2, metadata !"", metadata !2, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !5, i32 0, null} ; [ DW_TAG_subroutine_type ]
+!0 = metadata !{i32 786689, metadata !1, metadata !"a", metadata !2, i32 1921, metadata !9, i32 0, null} ; [ DW_TAG_arg_variable ]
+!1 = metadata !{i32 786478, metadata !2, metadata !"__divsc3", metadata !"__divsc3", metadata !"__divsc3", metadata !2, i32 1922, metadata !4, i1 false, i1 true, i32 0, i32 0, null, i1 false, i1 true, %0 (float, float, float, float)* @__divsc3, null, null, metadata !43, i32 1922} ; [ DW_TAG_subprogram ]
+!2 = metadata !{i32 786473, metadata !45} ; [ DW_TAG_file_type ]
+!3 = metadata !{i32 786449, i32 1, metadata !2, metadata !"4.2.1 (Based on Apple Inc. build 5658) (LLVM build)", i1 true, metadata !"", i32 0, null, null, metadata !44, null, metadata !""} ; [ DW_TAG_compile_unit ]
+!4 = metadata !{i32 786453, metadata !45, metadata !2, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !5, i32 0, null} ; [ DW_TAG_subroutine_type ]
 !5 = metadata !{metadata !6, metadata !9, metadata !9, metadata !9, metadata !9}
-!6 = metadata !{i32 524310, metadata !7, metadata !"SCtype", metadata !7, i32 170, i64 0, i64 0, i64 0, i32 0, metadata !8} ; [ DW_TAG_typedef ]
-!7 = metadata !{i32 524329, metadata !"libgcc2.h", metadata !"/Users/yash/clean/LG.D/gcc/../../llvmgcc/gcc", metadata !3} ; [ DW_TAG_file_type ]
-!8 = metadata !{i32 524324, metadata !2, metadata !"complex float", metadata !2, i32 0, i64 64, i64 32, i64 0, i32 0, i32 3} ; [ DW_TAG_base_type ]
-!9 = metadata !{i32 524310, metadata !7, metadata !"SFtype", metadata !7, i32 167, i64 0, i64 0, i64 0, i32 0, metadata !10} ; [ DW_TAG_typedef ]
-!10 = metadata !{i32 524324, metadata !2, metadata !"float", metadata !2, i32 0, i64 32, i64 32, i64 0, i32 0, i32 4} ; [ DW_TAG_base_type ]
-!11 = metadata !{i32 524545, metadata !1, metadata !"b", metadata !2, i32 1921, metadata !9} ; [ DW_TAG_arg_variable ]
-!12 = metadata !{i32 524545, metadata !1, metadata !"c", metadata !2, i32 1921, metadata !9} ; [ DW_TAG_arg_variable ]
-!13 = metadata !{i32 524545, metadata !1, metadata !"d", metadata !2, i32 1921, metadata !9} ; [ DW_TAG_arg_variable ]
-!14 = metadata !{i32 524544, metadata !15, metadata !"denom", metadata !2, i32 1923, metadata !9} ; [ DW_TAG_auto_variable ]
-!15 = metadata !{i32 524299, metadata !1, i32 1922, i32 0} ; [ DW_TAG_lexical_block ]
-!16 = metadata !{i32 524544, metadata !15, metadata !"ratio", metadata !2, i32 1923, metadata !9} ; [ DW_TAG_auto_variable ]
-!17 = metadata !{i32 524544, metadata !15, metadata !"x", metadata !2, i32 1923, metadata !9} ; [ DW_TAG_auto_variable ]
-!18 = metadata !{i32 524544, metadata !15, metadata !"y", metadata !2, i32 1923, metadata !9} ; [ DW_TAG_auto_variable ]
+!6 = metadata !{i32 786454, metadata !46, metadata !7, metadata !"SCtype", i32 170, i64 0, i64 0, i64 0, i32 0, metadata !8} ; [ DW_TAG_typedef ]
+!7 = metadata !{i32 786473, metadata !46} ; [ DW_TAG_file_type ]
+!8 = metadata !{i32 786468, metadata !45, metadata !2, metadata !"complex float", i32 0, i64 64, i64 32, i64 0, i32 0, i32 3} ; [ DW_TAG_base_type ]
+!9 = metadata !{i32 786454, metadata !46, metadata !7, metadata !"SFtype", i32 167, i64 0, i64 0, i64 0, i32 0, metadata !10} ; [ DW_TAG_typedef ]
+!10 = metadata !{i32 786468, metadata !45, metadata !2, metadata !"float", i32 0, i64 32, i64 32, i64 0, i32 0, i32 4} ; [ DW_TAG_base_type ]
+!11 = metadata !{i32 786689, metadata !1, metadata !"b", metadata !2, i32 1921, metadata !9, i32 0, null} ; [ DW_TAG_arg_variable ]
+!12 = metadata !{i32 786689, metadata !1, metadata !"c", metadata !2, i32 1921, metadata !9, i32 0, null} ; [ DW_TAG_arg_variable ]
+!13 = metadata !{i32 786689, metadata !1, metadata !"d", metadata !2, i32 1921, metadata !9, i32 0, null} ; [ DW_TAG_arg_variable ]
+!14 = metadata !{i32 786688, metadata !15, metadata !"denom", metadata !2, i32 1923, metadata !9, i32 0, null} ; [ DW_TAG_auto_variable ]
+!15 = metadata !{i32 786443, metadata !2, metadata !1, i32 1922, i32 0} ; [ DW_TAG_lexical_block ]
+!16 = metadata !{i32 786688, metadata !15, metadata !"ratio", metadata !2, i32 1923, metadata !9, i32 0, null} ; [ DW_TAG_auto_variable ]
+!17 = metadata !{i32 786688, metadata !15, metadata !"x", metadata !2, i32 1923, metadata !9, i32 0, null} ; [ DW_TAG_auto_variable ]
+!18 = metadata !{i32 786688, metadata !15, metadata !"y", metadata !2, i32 1923, metadata !9, i32 0, null} ; [ DW_TAG_auto_variable ]
 !19 = metadata !{i32 1929, i32 0, metadata !15, null}
 !20 = metadata !{i32 1931, i32 0, metadata !15, null}
 !21 = metadata !{i32 1932, i32 0, metadata !15, null}
@@ -243,3 +243,7 @@ declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
 !40 = metadata !{i32 1964, i32 0, metadata !15, null}
 !41 = metadata !{i32 1965, i32 0, metadata !15, null}
 !42 = metadata !{i32 1969, i32 0, metadata !15, null}
+!43 = metadata !{metadata !0, metadata !11, metadata !12, metadata !13, metadata !14, metadata !16, metadata !17, metadata !18}
+!44 = metadata !{metadata !1}
+!45 = metadata !{metadata !"libgcc2.c", metadata !"/Users/yash/clean/LG.D/gcc/../../llvmgcc/gcc"}
+!46 = metadata !{metadata !"libgcc2.h", metadata !"/Users/yash/clean/LG.D/gcc/../../llvmgcc/gcc"}
diff --git a/test/CodeGen/X86/2010-05-26-DotDebugLoc.ll b/test/CodeGen/X86/2010-05-26-DotDebugLoc.ll
index 7909d2736b9c..6519ca063a7c 100644
--- a/test/CodeGen/X86/2010-05-26-DotDebugLoc.ll
+++ b/test/CodeGen/X86/2010-05-26-DotDebugLoc.ll
@@ -21,40 +21,45 @@ declare void @foo(i32) nounwind optsize noinline ssp
 
 declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
 
-!llvm.dbg.gv = !{!0}
-!llvm.dbg.lv = !{!4, !8, !18, !25, !26}
+!llvm.dbg.cu = !{!2}
 
-!0 = metadata !{i32 524340, i32 0, metadata !1, metadata !"ret", metadata !"ret", metadata !"", metadata !1, i32 7, metadata !3, i1 false, i1 true, null} ; [ DW_TAG_variable ]
-!1 = metadata !{i32 524329, metadata !"foo.c", metadata !"/tmp/", metadata !2} ; [ DW_TAG_file_type ]
-!2 = metadata !{i32 524305, i32 0, i32 1, metadata !"foo.c", metadata !"/tmp/", metadata !"4.2.1 (Based on Apple Inc. build 5658) (LLVM build)", i1 true, i1 true, metadata !"", i32 0} ; [ DW_TAG_compile_unit ]
-!3 = metadata !{i32 524324, metadata !1, metadata !"int", metadata !1, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
-!4 = metadata !{i32 524545, metadata !5, metadata !"x", metadata !1, i32 12, metadata !3} ; [ DW_TAG_arg_variable ]
-!5 = metadata !{i32 524334, i32 0, metadata !1, metadata !"foo", metadata !"foo", metadata !"foo", metadata !1, i32 13, metadata !6, i1 false, i1 true, i32 0, i32 0, null, i1 false, i1 true} ; [ DW_TAG_subprogram ]
-!6 = metadata !{i32 524309, metadata !1, metadata !"", metadata !1, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !7, i32 0, null} ; [ DW_TAG_subroutine_type ]
+!0 = metadata !{i32 786484, i32 0, metadata !1, metadata !"ret", metadata !"ret", metadata !"", metadata !1, i32 7, metadata !3, i1 false, i1 true, null} ; [ DW_TAG_variable ]
+!1 = metadata !{i32 786473, metadata !36} ; [ DW_TAG_file_type ]
+!2 = metadata !{i32 786449, metadata !36, i32 1, metadata !"4.2.1 (Based on Apple Inc. build 5658) (LLVM build)", i1 true, metadata !"", i32 0, null, null, metadata !32, metadata !31, metadata !""} ; [ DW_TAG_compile_unit ]
+!3 = metadata !{i32 786468, metadata !1, metadata !"int", metadata !1, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
+!4 = metadata !{i32 786689, metadata !5, metadata !"x", metadata !1, i32 12, metadata !3, i32 0, null} ; [ DW_TAG_arg_variable ]
+!5 = metadata !{i32 786478, metadata !1, metadata !"foo", metadata !"foo", metadata !"foo", metadata !1, i32 13, metadata !6, i1 false, i1 true, i32 0, i32 0, null, i1 false, i1 true, void (i32)* @foo, null, null, metadata !33, i32 13} ; [ DW_TAG_subprogram ]
+!6 = metadata !{i32 786453, metadata !1, metadata !"", metadata !1, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !7, i32 0, null} ; [ DW_TAG_subroutine_type ]
 !7 = metadata !{null, metadata !3}
-!8 = metadata !{i32 524545, metadata !9, metadata !"myvar", metadata !1, i32 17, metadata !13} ; [ DW_TAG_arg_variable ]
-!9 = metadata !{i32 524334, i32 0, metadata !1, metadata !"bar", metadata !"bar", metadata !"bar", metadata !1, i32 17, metadata !10, i1 false, i1 true, i32 0, i32 0, null, i1 false, i1 true} ; [ DW_TAG_subprogram ]
-!10 = metadata !{i32 524309, metadata !1, metadata !"", metadata !1, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !11, i32 0, null} ; [ DW_TAG_subroutine_type ]
+!8 = metadata !{i32 786689, metadata !9, metadata !"myvar", metadata !1, i32 17, metadata !13, i32 0, null} ; [ DW_TAG_arg_variable ]
+!9 = metadata !{i32 786478, metadata !1, metadata !"bar", metadata !"bar", metadata !"bar", metadata !1, i32 17, metadata !10, i1 false, i1 true, i32 0, i32 0, null, i1 false, i1 true, i8* (%struct.a*)* @bar, null, null, metadata !34, i32 17} ; [ DW_TAG_subprogram ]
+!10 = metadata !{i32 786453, metadata !1, metadata !"", metadata !1, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !11, i32 0, null} ; [ DW_TAG_subroutine_type ]
 !11 = metadata !{metadata !12, metadata !13}
-!12 = metadata !{i32 524303, metadata !1, metadata !"", metadata !1, i32 0, i64 64, i64 64, i64 0, i32 0, null} ; [ DW_TAG_pointer_type ]
-!13 = metadata !{i32 524303, metadata !1, metadata !"", metadata !1, i32 0, i64 64, i64 64, i64 0, i32 0, metadata !14} ; [ DW_TAG_pointer_type ]
-!14 = metadata !{i32 524307, metadata !1, metadata !"a", metadata !1, i32 2, i64 128, i64 64, i64 0, i32 0, null, metadata !15, i32 0, null} ; [ DW_TAG_structure_type ]
+!12 = metadata !{i32 786447, metadata !1, metadata !"", metadata !1, i32 0, i64 64, i64 64, i64 0, i32 0, null} ; [ DW_TAG_pointer_type ]
+!13 = metadata !{i32 786447, metadata !1, metadata !"", metadata !1, i32 0, i64 64, i64 64, i64 0, i32 0, metadata !14} ; [ DW_TAG_pointer_type ]
+!14 = metadata !{i32 786451, metadata !1, metadata !"a", metadata !1, i32 2, i64 128, i64 64, i64 0, i32 0, null, metadata !15, i32 0, null} ; [ DW_TAG_structure_type ]
 !15 = metadata !{metadata !16, metadata !17}
-!16 = metadata !{i32 524301, metadata !14, metadata !"c", metadata !1, i32 3, i64 32, i64 32, i64 0, i32 0, metadata !3} ; [ DW_TAG_member ]
-!17 = metadata !{i32 524301, metadata !14, metadata !"d", metadata !1, i32 4, i64 64, i64 64, i64 64, i32 0, metadata !13} ; [ DW_TAG_member ]
-!18 = metadata !{i32 524545, metadata !19, metadata !"argc", metadata !1, i32 22, metadata !3} ; [ DW_TAG_arg_variable ]
-!19 = metadata !{i32 524334, i32 0, metadata !1, metadata !"main", metadata !"main", metadata !"main", metadata !1, i32 22, metadata !20, i1 false, i1 true, i32 0, i32 0, null, i1 false, i1 true} ; [ DW_TAG_subprogram ]
-!20 = metadata !{i32 524309, metadata !1, metadata !"", metadata !1, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !21, i32 0, null} ; [ DW_TAG_subroutine_type ]
+!16 = metadata !{i32 786445, metadata !14, metadata !"c", metadata !1, i32 3, i64 32, i64 32, i64 0, i32 0, metadata !3} ; [ DW_TAG_member ]
+!17 = metadata !{i32 786445, metadata !14, metadata !"d", metadata !1, i32 4, i64 64, i64 64, i64 64, i32 0, metadata !13} ; [ DW_TAG_member ]
+!18 = metadata !{i32 786689, metadata !19, metadata !"argc", metadata !1, i32 22, metadata !3, i32 0, null} ; [ DW_TAG_arg_variable ]
+!19 = metadata !{i32 786478, metadata !1, metadata !"main", metadata !"main", metadata !"main", metadata !1, i32 22, metadata !20, i1 false, i1 true, i32 0, i32 0, null, i1 false, i1 true, null, null, null, metadata !35, i32 22} ; [ DW_TAG_subprogram ]
+!20 = metadata !{i32 786453, metadata !1, metadata !"", metadata !1, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !21, i32 0, null} ; [ DW_TAG_subroutine_type ]
 !21 = metadata !{metadata !3, metadata !3, metadata !22}
-!22 = metadata !{i32 524303, metadata !1, metadata !"", metadata !1, i32 0, i64 64, i64 64, i64 0, i32 0, metadata !23} ; [ DW_TAG_pointer_type ]
-!23 = metadata !{i32 524303, metadata !1, metadata !"", metadata !1, i32 0, i64 64, i64 64, i64 0, i32 0, metadata !24} ; [ DW_TAG_pointer_type ]
-!24 = metadata !{i32 524324, metadata !1, metadata !"char", metadata !1, i32 0, i64 8, i64 8, i64 0, i32 0, i32 6} ; [ DW_TAG_base_type ]
-!25 = metadata !{i32 524545, metadata !19, metadata !"argv", metadata !1, i32 22, metadata !22} ; [ DW_TAG_arg_variable ]
-!26 = metadata !{i32 524544, metadata !27, metadata !"e", metadata !1, i32 23, metadata !14} ; [ DW_TAG_auto_variable ]
-!27 = metadata !{i32 524299, metadata !19, i32 22, i32 0} ; [ DW_TAG_lexical_block ]
+!22 = metadata !{i32 786447, metadata !1, metadata !"", metadata !1, i32 0, i64 64, i64 64, i64 0, i32 0, metadata !23} ; [ DW_TAG_pointer_type ]
+!23 = metadata !{i32 786447, metadata !1, metadata !"", metadata !1, i32 0, i64 64, i64 64, i64 0, i32 0, metadata !24} ; [ DW_TAG_pointer_type ]
+!24 = metadata !{i32 786468, metadata !1, metadata !"char", metadata !1, i32 0, i64 8, i64 8, i64 0, i32 0, i32 6} ; [ DW_TAG_base_type ]
+!25 = metadata !{i32 786689, metadata !19, metadata !"argv", metadata !1, i32 22, metadata !22, i32 0, null} ; [ DW_TAG_arg_variable ]
+!26 = metadata !{i32 786688, metadata !27, metadata !"e", metadata !1, i32 23, metadata !14, i32 0, null} ; [ DW_TAG_auto_variable ]
+!27 = metadata !{i32 786443, metadata !36, metadata !19, i32 22, i32 0, i32 0} ; [ DW_TAG_lexical_block ]
 !28 = metadata !{i32 18, i32 0, metadata !29, null}
-!29 = metadata !{i32 524299, metadata !9, i32 17, i32 0} ; [ DW_TAG_lexical_block ]
+!29 = metadata !{i32 786443, metadata !36, metadata !9, i32 17, i32 0, i32 1} ; [ DW_TAG_lexical_block ]
 !30 = metadata !{i32 19, i32 0, metadata !29, null}
+!31 = metadata !{metadata !0}
+!32 = metadata !{metadata !5, metadata !9, metadata !19}
+!33 = metadata !{metadata !4}
+!34 = metadata !{metadata !8}
+!35 = metadata !{metadata !18, metadata !25, metadata !26}
+!36 = metadata !{metadata !"foo.c", metadata !"/tmp/"}
 
 ; The variable bar:myvar changes registers after the first movq.
 ; It is cobbered by popq %rbx
@@ -79,4 +84,4 @@ declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
 ; CHECK-NEXT: .short  Lset{{.*}}
 ; CHECK-NEXT: Ltmp{{.*}}:
 ; CHECK-NEXT: .byte   83
-; CHECK-NEXT: Ltmp{{.*}}:
-\ No newline at end of file
+; CHECK-NEXT: Ltmp{{.*}}:
diff --git a/test/CodeGen/X86/2010-05-28-Crash.ll b/test/CodeGen/X86/2010-05-28-Crash.ll
index 1a0da3177a22..4ea3bf077841 100644
--- a/test/CodeGen/X86/2010-05-28-Crash.ll
+++ b/test/CodeGen/X86/2010-05-28-Crash.ll
@@ -22,23 +22,27 @@ entry:
   ret i32 %1, !dbg !13
 }
 
-!llvm.dbg.lv = !{!0, !7}
+!llvm.dbg.cu = !{!3}
 
-!0 = metadata !{i32 524545, metadata !1, metadata !"y", metadata !2, i32 2, metadata !6} ; [ DW_TAG_arg_variable ]
-!1 = metadata !{i32 524334, i32 0, metadata !2, metadata !"foo", metadata !"foo", metadata !"foo", metadata !2, i32 2, metadata !4, i1 false, i1 true, i32 0, i32 0, null, i1 false, i1 true} ; [ DW_TAG_subprogram ]
-!2 = metadata !{i32 524329, metadata !"f.c", metadata !"/tmp", metadata !3} ; [ DW_TAG_file_type ]
-!3 = metadata !{i32 524305, i32 0, i32 1, metadata !"f.c", metadata !"/tmp", metadata !"4.2.1 (Based on Apple Inc. build 5658) (LLVM build)", i1 true, i1 true, metadata !"", i32 0} ; [ DW_TAG_compile_unit ]
-!4 = metadata !{i32 524309, metadata !2, metadata !"", metadata !2, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !5, i32 0, null} ; [ DW_TAG_subroutine_type ]
+!0 = metadata !{i32 786689, metadata !1, metadata !"y", metadata !2, i32 2, metadata !6, i32 0, null} ; [ DW_TAG_arg_variable ]
+!1 = metadata !{i32 786478, metadata !2, metadata !"foo", metadata !"foo", metadata !"foo", metadata !2, i32 2, metadata !4, i1 false, i1 true, i32 0, i32 0, null, i1 false, i1 true, i32 (i32)* @foo, null, null, metadata !15, i32 2} ; [ DW_TAG_subprogram ]
+!2 = metadata !{i32 786473, metadata !18} ; [ DW_TAG_file_type ]
+!3 = metadata !{i32 786449, i32 1, metadata !2, metadata !"4.2.1 (Based on Apple Inc. build 5658) (LLVM build)", i1 true, metadata !"", i32 0, null, null, metadata !17, null, metadata !""} ; [ DW_TAG_compile_unit ]
+!4 = metadata !{i32 786453, metadata !2, metadata !"", metadata !2, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !5, i32 0, null} ; [ DW_TAG_subroutine_type ]
 !5 = metadata !{metadata !6, metadata !6}
-!6 = metadata !{i32 524324, metadata !2, metadata !"int", metadata !2, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
-!7 = metadata !{i32 524545, metadata !8, metadata !"x", metadata !2, i32 6, metadata !6} ; [ DW_TAG_arg_variable ]
-!8 = metadata !{i32 524334, i32 0, metadata !2, metadata !"bar", metadata !"bar", metadata !"bar", metadata !2, i32 6, metadata !4, i1 false, i1 true, i32 0, i32 0, null, i1 false, i1 true} ; [ DW_TAG_subprogram ]
+!6 = metadata !{i32 786468, metadata !2, metadata !"int", metadata !2, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
+!7 = metadata !{i32 786689, metadata !8, metadata !"x", metadata !2, i32 6, metadata !6, i32 0, null} ; [ DW_TAG_arg_variable ]
+!8 = metadata !{i32 786478, metadata !2, metadata !"bar", metadata !"bar", metadata !"bar", metadata !2, i32 6, metadata !4, i1 false, i1 true, i32 0, i32 0, null, i1 false, i1 true, i32 (i32)* @bar, null, null, metadata !16, i32 6} ; [ DW_TAG_subprogram ]
 !9 = metadata !{i32 3, i32 0, metadata !10, null}
-!10 = metadata !{i32 524299, metadata !1, i32 2, i32 0} ; [ DW_TAG_lexical_block ]
+!10 = metadata !{i32 786443, metadata !2, metadata !1, i32 2, i32 0} ; [ DW_TAG_lexical_block ]
 !11 = metadata !{i32 1}
 !12 = metadata !{i32 3, i32 0, metadata !10, metadata !13}
 !13 = metadata !{i32 7, i32 0, metadata !14, null}
-!14 = metadata !{i32 524299, metadata !8, i32 6, i32 0} ; [ DW_TAG_lexical_block ]
+!14 = metadata !{i32 786443, metadata !2, metadata !8, i32 6, i32 0} ; [ DW_TAG_lexical_block ]
+!15 = metadata !{metadata !0}
+!16 = metadata !{metadata !7}
+!17 = metadata !{metadata !1, metadata !8}
+!18 = metadata !{metadata !"f.c", metadata !"/tmp"}
 
 ;CHECK: DEBUG_VALUE: bar:x <- E
 ;CHECK: Ltmp
diff --git a/test/CodeGen/X86/2010-06-01-DeadArg-DbgInfo.ll b/test/CodeGen/X86/2010-06-01-DeadArg-DbgInfo.ll
index a9c03ee563d8..b764b0b34597 100644
--- a/test/CodeGen/X86/2010-06-01-DeadArg-DbgInfo.ll
+++ b/test/CodeGen/X86/2010-06-01-DeadArg-DbgInfo.ll
@@ -21,34 +21,35 @@ declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
 
 !llvm.dbg.lv = !{!0, !14, !15, !16, !17, !24, !25, !28}
 
-!0 = metadata !{i32 524545, metadata !1, metadata !"this", metadata !3, i32 11, metadata !12} ; [ DW_TAG_arg_variable ]
-!1 = metadata !{i32 524334, i32 0, metadata !2, metadata !"bar", metadata !"bar", metadata !"_ZN3foo3barEi", metadata !3, i32 11, metadata !9, i1 false, i1 true, i32 0, i32 0, null, i1 false, i1 true} ; [ DW_TAG_subprogram ]
-!2 = metadata !{i32 524307, metadata !3, metadata !"foo", metadata !3, i32 3, i64 32, i64 32, i64 0, i32 0, null, metadata !5, i32 0, null} ; [ DW_TAG_structure_type ]
-!3 = metadata !{i32 524329, metadata !"foo.cp", metadata !"/tmp/", metadata !4} ; [ DW_TAG_file_type ]
-!4 = metadata !{i32 524305, i32 0, i32 4, metadata !"foo.cp", metadata !"/tmp/", metadata !"4.2.1 LLVM build", i1 true, i1 true, metadata !"", i32 0} ; [ DW_TAG_compile_unit ]
+!0 = metadata !{i32 786689, metadata !1, metadata !"this", metadata !3, i32 11, metadata !12, i32 0, null} ; [ DW_TAG_arg_variable ]
+!1 = metadata !{i32 786478, metadata !3, metadata !2, metadata !"bar", metadata !"bar", metadata !"_ZN3foo3barEi", i32 11, metadata !9, i1 false, i1 true, i32 0, i32 0, null, i32 0, i1 true, i32 (%struct.foo*, i32)* @_ZN3foo3bazEi, null, null, null, i32 11} ; [ DW_TAG_subprogram ]
+!2 = metadata !{i32 786451, metadata !3, metadata !"foo", metadata !3, i32 3, i64 32, i64 32, i64 0, i32 0, null, metadata !5, i32 0, null} ; [ DW_TAG_structure_type ]
+!3 = metadata !{i32 786473, metadata !31} ; [ DW_TAG_file_type ]
+!4 = metadata !{i32 786449, i32 0, i32 4, metadata !"foo.cp", metadata !"/tmp/", metadata !"4.2.1 LLVM build", i1 true, i1 true, metadata !"", i32 0} ; [ DW_TAG_compile_unit ]
 !5 = metadata !{metadata !6, metadata !1, metadata !8}
-!6 = metadata !{i32 524301, metadata !2, metadata !"y", metadata !3, i32 8, i64 32, i64 32, i64 0, i32 0, metadata !7} ; [ DW_TAG_member ]
-!7 = metadata !{i32 524324, metadata !3, metadata !"int", metadata !3, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
-!8 = metadata !{i32 524334, i32 0, metadata !2, metadata !"baz", metadata !"baz", metadata !"_ZN3foo3bazEi", metadata !3, i32 15, metadata !9, i1 false, i1 true, i32 0, i32 0, null, i1 false, i1 true} ; [ DW_TAG_subprogram ]
-!9 = metadata !{i32 524309, metadata !3, metadata !"", metadata !3, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !10, i32 0, null} ; [ DW_TAG_subroutine_type ]
+!6 = metadata !{i32 786445, metadata !2, metadata !"y", metadata !3, i32 8, i64 32, i64 32, i64 0, i32 0, metadata !7} ; [ DW_TAG_member ]
+!7 = metadata !{i32 786468, metadata !3, metadata !"int", metadata !3, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
+!8 = metadata !{i32 786478, metadata !3, metadata !2, metadata !"baz", metadata !"baz", metadata !"_ZN3foo3bazEi", i32 15, metadata !9, i1 false, i1 true, i32 0, i32 0, null, i32 0, i1 true, i32 (%struct.foo*, i32)* @_ZN3foo3bazEi, null, null, null, i32 15} ; [ DW_TAG_subprogram ]
+!9 = metadata !{i32 786453, metadata !3, metadata !"", metadata !3, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !10, i32 0, null} ; [ DW_TAG_subroutine_type ]
 !10 = metadata !{metadata !7, metadata !11, metadata !7}
-!11 = metadata !{i32 524303, metadata !3, metadata !"", metadata !3, i32 0, i64 64, i64 64, i64 0, i32 64, metadata !2} ; [ DW_TAG_pointer_type ]
-!12 = metadata !{i32 524326, metadata !3, metadata !"", metadata !3, i32 0, i64 64, i64 64, i64 0, i32 64, metadata !13} ; [ DW_TAG_const_type ]
-!13 = metadata !{i32 524303, metadata !3, metadata !"", metadata !3, i32 0, i64 64, i64 64, i64 0, i32 0, metadata !2} ; [ DW_TAG_pointer_type ]
-!14 = metadata !{i32 524545, metadata !1, metadata !"x", metadata !3, i32 11, metadata !7} ; [ DW_TAG_arg_variable ]
-!15 = metadata !{i32 524545, metadata !8, metadata !"this", metadata !3, i32 15, metadata !12} ; [ DW_TAG_arg_variable ]
-!16 = metadata !{i32 524545, metadata !8, metadata !"x", metadata !3, i32 15, metadata !7} ; [ DW_TAG_arg_variable ]
-!17 = metadata !{i32 524545, metadata !18, metadata !"argc", metadata !3, i32 19, metadata !7} ; [ DW_TAG_arg_variable ]
-!18 = metadata !{i32 524334, i32 0, metadata !3, metadata !"main", metadata !"main", metadata !"main", metadata !3, i32 19, metadata !19, i1 false, i1 true, i32 0, i32 0, null, i1 false, i1 true} ; [ DW_TAG_subprogram ]
-!19 = metadata !{i32 524309, metadata !3, metadata !"", metadata !3, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !20, i32 0, null} ; [ DW_TAG_subroutine_type ]
+!11 = metadata !{i32 786447, metadata !3, metadata !"", metadata !3, i32 0, i64 64, i64 64, i64 0, i32 64, metadata !2} ; [ DW_TAG_pointer_type ]
+!12 = metadata !{i32 786470, metadata !3, metadata !"", metadata !3, i32 0, i64 64, i64 64, i64 0, i32 64, metadata !13} ; [ DW_TAG_const_type ]
+!13 = metadata !{i32 786447, metadata !3, metadata !"", metadata !3, i32 0, i64 64, i64 64, i64 0, i32 0, metadata !2} ; [ DW_TAG_pointer_type ]
+!14 = metadata !{i32 786689, metadata !1, metadata !"x", metadata !3, i32 11, metadata !7, i32 0, null} ; [ DW_TAG_arg_variable ]
+!15 = metadata !{i32 786689, metadata !8, metadata !"this", metadata !3, i32 15, metadata !12, i32 0, null} ; [ DW_TAG_arg_variable ]
+!16 = metadata !{i32 786689, metadata !8, metadata !"x", metadata !3, i32 15, metadata !7, i32 0, null} ; [ DW_TAG_arg_variable ]
+!17 = metadata !{i32 786689, metadata !18, metadata !"argc", metadata !3, i32 19, metadata !7, i32 0, null} ; [ DW_TAG_arg_variable ]
+!18 = metadata !{i32 786478, metadata !3, metadata !3, metadata !"main", metadata !"main", metadata !"main", i32 19, metadata !19, i1 false, i1 true, i32 0, i32 0, null, i32 0, i1 true, null, null, null, null, i32 19} ; [ DW_TAG_subprogram ]
+!19 = metadata !{i32 786453, metadata !3, metadata !"", metadata !3, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !20, i32 0, null} ; [ DW_TAG_subroutine_type ]
 !20 = metadata !{metadata !7, metadata !7, metadata !21}
-!21 = metadata !{i32 524303, metadata !3, metadata !"", metadata !3, i32 0, i64 64, i64 64, i64 0, i32 0, metadata !22} ; [ DW_TAG_pointer_type ]
-!22 = metadata !{i32 524303, metadata !3, metadata !"", metadata !3, i32 0, i64 64, i64 64, i64 0, i32 0, metadata !23} ; [ DW_TAG_pointer_type ]
-!23 = metadata !{i32 524324, metadata !3, metadata !"char", metadata !3, i32 0, i64 8, i64 8, i64 0, i32 0, i32 6} ; [ DW_TAG_base_type ]
-!24 = metadata !{i32 524545, metadata !18, metadata !"argv", metadata !3, i32 19, metadata !21} ; [ DW_TAG_arg_variable ]
-!25 = metadata !{i32 524544, metadata !26, metadata !"a", metadata !3, i32 20, metadata !2} ; [ DW_TAG_auto_variable ]
-!26 = metadata !{i32 524299, metadata !27, i32 19, i32 0} ; [ DW_TAG_lexical_block ]
-!27 = metadata !{i32 524299, metadata !18, i32 19, i32 0} ; [ DW_TAG_lexical_block ]
-!28 = metadata !{i32 524544, metadata !26, metadata !"b", metadata !3, i32 21, metadata !7} ; [ DW_TAG_auto_variable ]
+!21 = metadata !{i32 786447, metadata !3, metadata !"", metadata !3, i32 0, i64 64, i64 64, i64 0, i32 0, metadata !22} ; [ DW_TAG_pointer_type ]
+!22 = metadata !{i32 786447, metadata !3, metadata !"", metadata !3, i32 0, i64 64, i64 64, i64 0, i32 0, metadata !23} ; [ DW_TAG_pointer_type ]
+!23 = metadata !{i32 786468, metadata !3, metadata !"char", metadata !3, i32 0, i64 8, i64 8, i64 0, i32 0, i32 6} ; [ DW_TAG_base_type ]
+!24 = metadata !{i32 786689, metadata !18, metadata !"argv", metadata !3, i32 19, metadata !21, i32 0, null} ; [ DW_TAG_arg_variable ]
+!25 = metadata !{i32 786688, metadata !26, metadata !"a", metadata !3, i32 20, metadata !2, i32 0, null} ; [ DW_TAG_auto_variable ]
+!26 = metadata !{i32 786443, metadata !27, i32 19, i32 0} ; [ DW_TAG_lexical_block ]
+!27 = metadata !{i32 786443, metadata !18, i32 19, i32 0} ; [ DW_TAG_lexical_block ]
+!28 = metadata !{i32 786688, metadata !26, metadata !"b", metadata !3, i32 21, metadata !7, i32 0, null} ; [ DW_TAG_auto_variable ]
 !29 = metadata !{i32 16, i32 0, metadata !30, null}
-!30 = metadata !{i32 524299, metadata !8, i32 15, i32 0} ; [ DW_TAG_lexical_block ]
+!30 = metadata !{i32 786443, metadata !8, i32 15, i32 0} ; [ DW_TAG_lexical_block ]
+!31 = metadata !{metadata !"foo.cp", metadata !"/tmp/"}
diff --git a/test/CodeGen/X86/2010-07-06-DbgCrash.ll b/test/CodeGen/X86/2010-07-06-DbgCrash.ll
index edd6015b0d28..208e93e098e6 100644
--- a/test/CodeGen/X86/2010-07-06-DbgCrash.ll
+++ b/test/CodeGen/X86/2010-07-06-DbgCrash.ll
@@ -16,7 +16,7 @@
 !103 = metadata !{i32 524299, metadata !97, i32 73, i32 0} ; [ DW_TAG_lexical_block ]
 !104 = metadata !{i32 524289, metadata !38, metadata !"", metadata !38, i32 0, i64 85312, i64 64, i64 0, i32 0, metadata !46, metadata !105, i32 0, null} ; [ DW_TAG_array_type ]
 !105 = metadata !{metadata !106}
-!106 = metadata !{i32 524321, i64 0, i64 1332}    ; [ DW_TAG_subrange_type ]
+!106 = metadata !{i32 524321, i64 0, i64 1333}    ; [ DW_TAG_subrange_type ]
 !107 = metadata !{i32 73, i32 0, metadata !103, null}
 
 define i32 @main() nounwind ssp {
diff --git a/test/CodeGen/X86/2010-08-04-StackVariable.ll b/test/CodeGen/X86/2010-08-04-StackVariable.ll
index ba36fe7c12fd..aaa562a439d5 100644
--- a/test/CodeGen/X86/2010-08-04-StackVariable.ll
+++ b/test/CodeGen/X86/2010-08-04-StackVariable.ll
@@ -74,51 +74,52 @@ return:                                           ; preds = %entry
 
 declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
 
-!llvm.dbg.sp = !{!0, !9, !16, !17, !20}
+!llvm.dbg.cu = !{!3}
+!46 = metadata !{metadata !0, metadata !9, metadata !16, metadata !17, metadata !20}
 
-!0 = metadata !{i32 524334, i32 0, metadata !1, metadata !"SVal", metadata !"SVal", metadata !"", metadata !2, i32 11, metadata !14, i1 false, i1 false, i32 0, i32 0, null, i1 false, i1 false, null} ; [ DW_TAG_subprogram ]
-!1 = metadata !{i32 524307, metadata !2, metadata !"SVal", metadata !2, i32 1, i64 128, i64 64, i64 0, i32 0, null, metadata !4, i32 0, null} ; [ DW_TAG_structure_type ]
-!2 = metadata !{i32 524329, metadata !"small.cc", metadata !"/Users/manav/R8248330", metadata !3} ; [ DW_TAG_file_type ]
-!3 = metadata !{i32 524305, i32 0, i32 4, metadata !"small.cc", metadata !"/Users/manav/R8248330", metadata !"4.2.1 (Based on Apple Inc. build 5658) (LLVM build)", i1 true, i1 false, metadata !"", i32 0} ; [ DW_TAG_compile_unit ]
+!0 = metadata !{i32 786478, metadata !1, metadata !"SVal", metadata !"SVal", metadata !"", metadata !2, i32 11, metadata !14, i1 false, i1 false, i32 0, i32 0, null, i1 false, i1 false, null, null, null, null, i32 11} ; [ DW_TAG_subprogram ]
+!1 = metadata !{i32 786451, metadata !2, metadata !"SVal", metadata !2, i32 1, i64 128, i64 64, i64 0, i32 0, null, metadata !4, i32 0, null} ; [ DW_TAG_structure_type ]
+!2 = metadata !{i32 786473, metadata !"small.cc", metadata !"/Users/manav/R8248330", metadata !3} ; [ DW_TAG_file_type ]
+!3 = metadata !{i32 786449, i32 4, metadata !2, metadata !"4.2.1 (Based on Apple Inc. build 5658) (LLVM build)", i1 false, metadata !"", i32 0, null, null, metadata !46, null, metadata !""} ; [ DW_TAG_compile_unit ]
 !4 = metadata !{metadata !5, metadata !7, metadata !0, metadata !9}
-!5 = metadata !{i32 524301, metadata !1, metadata !"Data", metadata !2, i32 7, i64 64, i64 64, i64 0, i32 0, metadata !6} ; [ DW_TAG_member ]
-!6 = metadata !{i32 524303, metadata !2, metadata !"", metadata !2, i32 0, i64 64, i64 64, i64 0, i32 0, null} ; [ DW_TAG_pointer_type ]
-!7 = metadata !{i32 524301, metadata !1, metadata !"Kind", metadata !2, i32 8, i64 32, i64 32, i64 64, i32 0, metadata !8} ; [ DW_TAG_member ]
-!8 = metadata !{i32 524324, metadata !2, metadata !"unsigned int", metadata !2, i32 0, i64 32, i64 32, i64 0, i32 0, i32 7} ; [ DW_TAG_base_type ]
-!9 = metadata !{i32 524334, i32 0, metadata !1, metadata !"~SVal", metadata !"~SVal", metadata !"", metadata !2, i32 12, metadata !10, i1 false, i1 false, i32 0, i32 0, null, i1 false, i1 false, null} ; [ DW_TAG_subprogram ]
-!10 = metadata !{i32 524309, metadata !2, metadata !"", metadata !2, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !11, i32 0, null} ; [ DW_TAG_subroutine_type ]
+!5 = metadata !{i32 786445, metadata !1, metadata !"Data", metadata !2, i32 7, i64 64, i64 64, i64 0, i32 0, metadata !6} ; [ DW_TAG_member ]
+!6 = metadata !{i32 786447, metadata !2, metadata !"", metadata !2, i32 0, i64 64, i64 64, i64 0, i32 0, null} ; [ DW_TAG_pointer_type ]
+!7 = metadata !{i32 786445, metadata !1, metadata !"Kind", metadata !2, i32 8, i64 32, i64 32, i64 64, i32 0, metadata !8} ; [ DW_TAG_member ]
+!8 = metadata !{i32 786468, metadata !2, metadata !"unsigned int", metadata !2, i32 0, i64 32, i64 32, i64 0, i32 0, i32 7} ; [ DW_TAG_base_type ]
+!9 = metadata !{i32 786478, metadata !1, metadata !"~SVal", metadata !"~SVal", metadata !"", metadata !2, i32 12, metadata !10, i1 false, i1 false, i32 0, i32 0, null, i1 false, i1 false, null, null, null, null, i32 12} ; [ DW_TAG_subprogram ]
+!10 = metadata !{i32 786453, metadata !2, metadata !"", metadata !2, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !11, i32 0, null} ; [ DW_TAG_subroutine_type ]
 !11 = metadata !{null, metadata !12, metadata !13}
-!12 = metadata !{i32 524303, metadata !2, metadata !"", metadata !2, i32 0, i64 64, i64 64, i64 0, i32 64, metadata !1} ; [ DW_TAG_pointer_type ]
-!13 = metadata !{i32 524324, metadata !2, metadata !"int", metadata !2, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
-!14 = metadata !{i32 524309, metadata !2, metadata !"", metadata !2, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !15, i32 0, null} ; [ DW_TAG_subroutine_type ]
+!12 = metadata !{i32 786447, metadata !2, metadata !"", metadata !2, i32 0, i64 64, i64 64, i64 0, i32 64, metadata !1} ; [ DW_TAG_pointer_type ]
+!13 = metadata !{i32 786468, metadata !2, metadata !"int", metadata !2, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
+!14 = metadata !{i32 786453, metadata !2, metadata !"", metadata !2, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !15, i32 0, null} ; [ DW_TAG_subroutine_type ]
 !15 = metadata !{null, metadata !12}
-!16 = metadata !{i32 524334, i32 0, metadata !1, metadata !"SVal", metadata !"SVal", metadata !"_ZN4SValC1Ev", metadata !2, i32 11, metadata !14, i1 false, i1 true, i32 0, i32 0, null, i1 false, i1 false, void (%struct.SVal*)* @_ZN4SValC1Ev} ; [ DW_TAG_subprogram ]
-!17 = metadata !{i32 524334, i32 0, metadata !2, metadata !"foo", metadata !"foo", metadata !"_Z3fooi4SVal", metadata !2, i32 16, metadata !18, i1 false, i1 true, i32 0, i32 0, null, i1 false, i1 false, i32 (i32, %struct.SVal*)* @_Z3fooi4SVal} ; [ DW_TAG_subprogram ]
-!18 = metadata !{i32 524309, metadata !2, metadata !"", metadata !2, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !19, i32 0, null} ; [ DW_TAG_subroutine_type ]
+!16 = metadata !{i32 786478, metadata !1, metadata !"SVal", metadata !"SVal", metadata !"_ZN4SValC1Ev", metadata !2, i32 11, metadata !14, i1 false, i1 true, i32 0, i32 0, null, i1 false, i1 false, void (%struct.SVal*)* @_ZN4SValC1Ev, null, null, null, i32 11} ; [ DW_TAG_subprogram ]
+!17 = metadata !{i32 786478, metadata !2, metadata !"foo", metadata !"foo", metadata !"_Z3fooi4SVal", metadata !2, i32 16, metadata !18, i1 false, i1 true, i32 0, i32 0, null, i1 false, i1 false, i32 (i32, %struct.SVal*)* @_Z3fooi4SVal, null, null, null, i32 16} ; [ DW_TAG_subprogram ]
+!18 = metadata !{i32 786453, metadata !2, metadata !"", metadata !2, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !19, i32 0, null} ; [ DW_TAG_subroutine_type ]
 !19 = metadata !{metadata !13, metadata !13, metadata !1}
-!20 = metadata !{i32 524334, i32 0, metadata !2, metadata !"main", metadata !"main", metadata !"main", metadata !2, i32 23, metadata !21, i1 false, i1 true, i32 0, i32 0, null, i1 false, i1 false, i32 ()* @main} ; [ DW_TAG_subprogram ]
-!21 = metadata !{i32 524309, metadata !2, metadata !"", metadata !2, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !22, i32 0, null} ; [ DW_TAG_subroutine_type ]
+!20 = metadata !{i32 786478, metadata !2, metadata !"main", metadata !"main", metadata !"main", metadata !2, i32 23, metadata !21, i1 false, i1 true, i32 0, i32 0, null, i1 false, i1 false, i32 ()* @main, null, null, null, i32 23} ; [ DW_TAG_subprogram ]
+!21 = metadata !{i32 786453, metadata !2, metadata !"", metadata !2, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !22, i32 0, null} ; [ DW_TAG_subroutine_type ]
 !22 = metadata !{metadata !13}
-!23 = metadata !{i32 524545, metadata !17, metadata !"i", metadata !2, i32 16, metadata !13} ; [ DW_TAG_arg_variable ]
+!23 = metadata !{i32 786689, metadata !17, metadata !"i", metadata !2, i32 16, metadata !13, i32 0, null} ; [ DW_TAG_arg_variable ]
 !24 = metadata !{i32 16, i32 0, metadata !17, null}
-!25 = metadata !{i32 524545, metadata !17, metadata !"location", metadata !2, i32 16, metadata !26} ; [ DW_TAG_arg_variable ]
-!26 = metadata !{i32 524304, metadata !2, metadata !"SVal", metadata !2, i32 0, i64 64, i64 64, i64 0, i32 0, metadata !1} ; [ DW_TAG_reference_type ]
+!25 = metadata !{i32 786689, metadata !17, metadata !"location", metadata !2, i32 16, metadata !26, i32 0, null} ; [ DW_TAG_arg_variable ]
+!26 = metadata !{i32 786448, metadata !2, metadata !"SVal", metadata !2, i32 0, i64 64, i64 64, i64 0, i32 0, metadata !1} ; [ DW_TAG_reference_type ]
 !27 = metadata !{i32 17, i32 0, metadata !28, null}
-!28 = metadata !{i32 524299, metadata !17, i32 16, i32 0, metadata !2, i32 2} ; [ DW_TAG_lexical_block ]
+!28 = metadata !{i32 786443, metadata !2, metadata !17, i32 16, i32 0, i32 2} ; [ DW_TAG_lexical_block ]
 !29 = metadata !{i32 18, i32 0, metadata !28, null}
 !30 = metadata !{i32 20, i32 0, metadata !28, null}
-!31 = metadata !{i32 524545, metadata !16, metadata !"this", metadata !2, i32 11, metadata !32} ; [ DW_TAG_arg_variable ]
-!32 = metadata !{i32 524326, metadata !2, metadata !"", metadata !2, i32 0, i64 64, i64 64, i64 0, i32 64, metadata !33} ; [ DW_TAG_const_type ]
-!33 = metadata !{i32 524303, metadata !2, metadata !"", metadata !2, i32 0, i64 64, i64 64, i64 0, i32 0, metadata !1} ; [ DW_TAG_pointer_type ]
+!31 = metadata !{i32 786689, metadata !16, metadata !"this", metadata !2, i32 11, metadata !32, i32 0, null} ; [ DW_TAG_arg_variable ]
+!32 = metadata !{i32 786470, metadata !2, metadata !"", metadata !2, i32 0, i64 64, i64 64, i64 0, i32 64, metadata !33} ; [ DW_TAG_const_type ]
+!33 = metadata !{i32 786447, metadata !2, metadata !"", metadata !2, i32 0, i64 64, i64 64, i64 0, i32 0, metadata !1} ; [ DW_TAG_pointer_type ]
 !34 = metadata !{i32 11, i32 0, metadata !16, null}
 !35 = metadata !{i32 11, i32 0, metadata !36, null}
-!36 = metadata !{i32 524299, metadata !37, i32 11, i32 0, metadata !2, i32 1} ; [ DW_TAG_lexical_block ]
-!37 = metadata !{i32 524299, metadata !16, i32 11, i32 0, metadata !2, i32 0} ; [ DW_TAG_lexical_block ]
-!38 = metadata !{i32 524544, metadata !39, metadata !"v", metadata !2, i32 24, metadata !1} ; [ DW_TAG_auto_variable ]
-!39 = metadata !{i32 524299, metadata !40, i32 23, i32 0, metadata !2, i32 4} ; [ DW_TAG_lexical_block ]
-!40 = metadata !{i32 524299, metadata !20, i32 23, i32 0, metadata !2, i32 3} ; [ DW_TAG_lexical_block ]
+!36 = metadata !{i32 786443, metadata !2, metadata !37, i32 11, i32 0, i32 1} ; [ DW_TAG_lexical_block ]
+!37 = metadata !{i32 786443, metadata !2, metadata !16, i32 11, i32 0, i32 0} ; [ DW_TAG_lexical_block ]
+!38 = metadata !{i32 786688, metadata !39, metadata !"v", metadata !2, i32 24, metadata !1, i32 0, null} ; [ DW_TAG_auto_variable ]
+!39 = metadata !{i32 786443, metadata !2, metadata !40, i32 23, i32 0, i32 4} ; [ DW_TAG_lexical_block ]
+!40 = metadata !{i32 786443, metadata !2, metadata !20, i32 23, i32 0, i32 3} ; [ DW_TAG_lexical_block ]
 !41 = metadata !{i32 24, i32 0, metadata !39, null}
 !42 = metadata !{i32 25, i32 0, metadata !39, null}
 !43 = metadata !{i32 26, i32 0, metadata !39, null}
-!44 = metadata !{i32 524544, metadata !39, metadata !"k", metadata !2, i32 26, metadata !13} ; [ DW_TAG_auto_variable ]
+!44 = metadata !{i32 786688, metadata !39, metadata !"k", metadata !2, i32 26, metadata !13, i32 0, null} ; [ DW_TAG_auto_variable ]
 !45 = metadata !{i32 27, i32 0, metadata !39, null}
diff --git a/test/CodeGen/X86/2010-08-10-DbgConstant.ll b/test/CodeGen/X86/2010-08-10-DbgConstant.ll
deleted file mode 100644
index b3cc35d723f7..000000000000
--- a/test/CodeGen/X86/2010-08-10-DbgConstant.ll
+++ /dev/null
@@ -1,25 +0,0 @@
-; RUN: llc  -mtriple=i686-linux -O0 < %s | FileCheck %s
-; CHECK: DW_TAG_constant
-; CHECK-NEXT: .long .Lstring3 #{{#?}} DW_AT_name
-
-define void @foo() nounwind ssp {
-entry:
-  call void @bar(i32 201), !dbg !8
-  ret void, !dbg !8
-}
-
-declare void @bar(i32)
-
-!llvm.dbg.sp = !{!0}
-!llvm.dbg.gv = !{!5}
-
-!0 = metadata !{i32 524334, i32 0, metadata !1, metadata !"foo", metadata !"foo", metadata !"foo", metadata !1, i32 3, metadata !3, i1 false, i1 true, i32 0, i32 0, null, i1 false, i1 false, void ()* @foo} ; [ DW_TAG_subprogram ]
-!1 = metadata !{i32 524329, metadata !"/tmp/l.c", metadata !"/Volumes/Lalgate/clean/D", metadata !2} ; [ DW_TAG_file_type ]
-!2 = metadata !{i32 524305, i32 0, i32 12, metadata !"/tmp/l.c", metadata !"/Volumes/Lalgate/clean/D", metadata !"clang 2.8", i1 true, i1 false, metadata !"", i32 0} ; [ DW_TAG_compile_unit ]
-!3 = metadata !{i32 524309, metadata !1, metadata !"", metadata !1, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !4, i32 0, null} ; [ DW_TAG_subroutine_type ]
-!4 = metadata !{null}
-!5 = metadata !{i32 524327, i32 0, metadata !1, metadata !"ro", metadata !"ro", metadata !"ro", metadata !1, i32 1, metadata !6, i1 true, i1 true, i32 201} ; [ DW_TAG_constant ]
-!6 = metadata !{i32 524326, metadata !1, metadata !"", metadata !1, i32 0, i64 0, i64 0, i64 0, i32 0, metadata !7} ; [ DW_TAG_const_type ]
-!7 = metadata !{i32 524324, metadata !1, metadata !"unsigned int", metadata !1, i32 0, i64 32, i64 32, i64 0, i32 0, i32 7} ; [ DW_TAG_base_type ]
-!8 = metadata !{i32 3, i32 14, metadata !9, null}
-!9 = metadata !{i32 524299, metadata !0, i32 3, i32 12, metadata !1, i32 0} ; [ DW_TAG_lexical_block ]
diff --git a/test/CodeGen/X86/2010-09-16-EmptyFilename.ll b/test/CodeGen/X86/2010-09-16-EmptyFilename.ll
index bed8c8a77b9a..de0d216e266f 100644
--- a/test/CodeGen/X86/2010-09-16-EmptyFilename.ll
+++ b/test/CodeGen/X86/2010-09-16-EmptyFilename.ll
@@ -12,18 +12,21 @@ entry:
   ret i32 21, !dbg !10
 }
 
-!llvm.dbg.sp = !{!0, !6}
+!llvm.dbg.cu = !{!2}
 
-!0 = metadata !{i32 524334, i32 0, metadata !1, metadata !"foo", metadata !"foo", metadata !"foo", metadata !1, i32 53, metadata !3, i1 false, i1 true, i32 0, i32 0, null, i1 false, i1 false, i32 ()* @foo} ; [ DW_TAG_subprogram ]
-!1 = metadata !{i32 524329, metadata !"", metadata !"/private/tmp", metadata !2} ; [ DW_TAG_file_type ]
-!2 = metadata !{i32 524305, i32 0, i32 12, metadata !"bug.c", metadata !"/private/tmp", metadata !"clang version 2.9 (trunk 114084)", i1 true, i1 false, metadata !"", i32 0} ; [ DW_TAG_compile_unit ]
-!3 = metadata !{i32 524309, metadata !1, metadata !"", metadata !1, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !4, i32 0, null} ; [ DW_TAG_subroutine_type ]
+!0 = metadata !{i32 786478, metadata !1, metadata !"foo", metadata !"foo", metadata !"foo", metadata !1, i32 53, metadata !3, i1 false, i1 true, i32 0, i32 0, null, i1 false, i1 false, i32 ()* @foo} ; [ DW_TAG_subprogram ]
+!1 = metadata !{i32 786473, metadata !14} ; [ DW_TAG_file_type ]
+!2 = metadata !{i32 786449, metadata !15, i32 12, metadata !"clang version 2.9 (trunk 114084)", i1 false, metadata !"", i32 0, null, null, metadata !13, null, metadata !""} ; [ DW_TAG_compile_unit ]
+!3 = metadata !{i32 786453, metadata !1, metadata !"", metadata !1, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !4, i32 0, null, null, metadata !13, null} ; [ DW_TAG_subroutine_type ]
 !4 = metadata !{metadata !5}
-!5 = metadata !{i32 524324, metadata !1, metadata !"int", metadata !1, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
-!6 = metadata !{i32 524334, i32 0, metadata !7, metadata !"bar", metadata !"bar", metadata !"bar", metadata !7, i32 4, metadata !3, i1 false, i1 true, i32 0, i32 0, null, i1 false, i1 false, i32 ()* @bar} ; [ DW_TAG_subprogram ]
-!7 = metadata !{i32 524329, metadata !"bug.c", metadata !"/private/tmp", metadata !2} ; [ DW_TAG_file_type ]
+!5 = metadata !{i32 786468, metadata !1, metadata !"int", metadata !1, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
+!6 = metadata !{i32 786478, metadata !7, metadata !"bar", metadata !"bar", metadata !"bar", metadata !7, i32 4, metadata !3, i1 false, i1 true, i32 0, i32 0, null, i1 false, i1 false, i32 ()* @bar} ; [ DW_TAG_subprogram ]
+!7 = metadata !{i32 786473, metadata !15} ; [ DW_TAG_file_type ]
 !8 = metadata !{i32 53, i32 13, metadata !9, null}
-!9 = metadata !{i32 524299, metadata !0, i32 53, i32 11, metadata !1, i32 0} ; [ DW_TAG_lexical_block ]
+!9 = metadata !{i32 786443, metadata !0, i32 53, i32 11, metadata !1, i32 0} ; [ DW_TAG_lexical_block ]
 !10 = metadata !{i32 4, i32 13, metadata !11, null}
-!11 = metadata !{i32 524299, metadata !12, i32 4, i32 13, metadata !7, i32 2} ; [ DW_TAG_lexical_block ]
-!12 = metadata !{i32 524299, metadata !6, i32 4, i32 11, metadata !7, i32 1} ; [ DW_TAG_lexical_block ]
+!11 = metadata !{i32 786443, metadata !12, i32 4, i32 13, metadata !7, i32 2} ; [ DW_TAG_lexical_block ]
+!12 = metadata !{i32 786443, metadata !6, i32 4, i32 11, metadata !7, i32 1} ; [ DW_TAG_lexical_block ]
+!13 = metadata !{metadata !0, metadata !6}
+!14 = metadata !{metadata !"", metadata !"/private/tmp"}
+!15 = metadata !{metadata !"bug.c", metadata !"/private/tmp"}
diff --git a/test/CodeGen/X86/2010-11-02-DbgParameter.ll b/test/CodeGen/X86/2010-11-02-DbgParameter.ll
index 79c0cf35c660..31a6822b34b8 100644
--- a/test/CodeGen/X86/2010-11-02-DbgParameter.ll
+++ b/test/CodeGen/X86/2010-11-02-DbgParameter.ll
@@ -15,21 +15,23 @@ entry:
 
 declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
 
-!llvm.dbg.sp = !{!0}
-!llvm.dbg.lv.foo = !{!6}
+!llvm.dbg.cu = !{!2}
 
-!0 = metadata !{i32 589870, i32 0, metadata !1, metadata !"foo", metadata !"foo", metadata !"", metadata !1, i32 3, metadata !3, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 true, i32 (%struct.bar*)* @foo} ; [ DW_TAG_subprogram ]
-!1 = metadata !{i32 589865, metadata !"one.c", metadata !"/private/tmp", metadata !2} ; [ DW_TAG_file_type ]
-!2 = metadata !{i32 589841, i32 0, i32 12, metadata !"one.c", metadata !"/private/tmp", metadata !"clang version 2.9 (trunk 117922)", i1 true, i1 true, metadata !"", i32 0} ; [ DW_TAG_compile_unit ]
-!3 = metadata !{i32 589845, metadata !1, metadata !"", metadata !1, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !4, i32 0, null} ; [ DW_TAG_subroutine_type ]
+!0 = metadata !{i32 786478, metadata !1, metadata !"foo", metadata !"foo", metadata !"", metadata !1, i32 3, metadata !3, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 true, i32 (%struct.bar*)* @foo, null, null, metadata !16, i32 3} ; [ DW_TAG_subprogram ]
+!1 = metadata !{i32 786473, metadata !17} ; [ DW_TAG_file_type ]
+!2 = metadata !{i32 786449, i32 12, metadata !1, metadata !"clang version 2.9 (trunk 117922)", i1 true, metadata !"", i32 0, null, null, metadata !15, null, metadata !""} ; [ DW_TAG_compile_unit ]
+!3 = metadata !{i32 786453, metadata !1, metadata !"", metadata !1, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !4, i32 0, null} ; [ DW_TAG_subroutine_type ]
 !4 = metadata !{metadata !5}
-!5 = metadata !{i32 589860, metadata !2, metadata !"int", metadata !1, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
-!6 = metadata !{i32 590081, metadata !0, metadata !"i", metadata !1, i32 3, metadata !7, i32 0} ; [ DW_TAG_arg_variable ]
-!7 = metadata !{i32 589839, metadata !1, metadata !"", metadata !1, i32 0, i64 32, i64 32, i64 0, i32 0, metadata !8} ; [ DW_TAG_pointer_type ]
-!8 = metadata !{i32 589843, metadata !1, metadata !"bar", metadata !1, i32 2, i64 64, i64 32, i64 0, i32 0, null, metadata !9, i32 0, null} ; [ DW_TAG_structure_type ]
+!5 = metadata !{i32 786468, metadata !2, metadata !"int", metadata !1, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
+!6 = metadata !{i32 786689, metadata !0, metadata !"i", metadata !1, i32 3, metadata !7, i32 0, null} ; [ DW_TAG_arg_variable ]
+!7 = metadata !{i32 786447, metadata !1, metadata !"", metadata !1, i32 0, i64 32, i64 32, i64 0, i32 0, metadata !8} ; [ DW_TAG_pointer_type ]
+!8 = metadata !{i32 786451, metadata !1, metadata !"bar", metadata !1, i32 2, i64 64, i64 32, i64 0, i32 0, null, metadata !9, i32 0, null} ; [ DW_TAG_structure_type ]
 !9 = metadata !{metadata !10, metadata !11}
-!10 = metadata !{i32 589837, metadata !1, metadata !"x", metadata !1, i32 2, i64 32, i64 32, i64 0, i32 0, metadata !5} ; [ DW_TAG_member ]
-!11 = metadata !{i32 589837, metadata !1, metadata !"y", metadata !1, i32 2, i64 32, i64 32, i64 32, i32 0, metadata !5} ; [ DW_TAG_member ]
+!10 = metadata !{i32 786445, metadata !1, metadata !"x", metadata !1, i32 2, i64 32, i64 32, i64 0, i32 0, metadata !5} ; [ DW_TAG_member ]
+!11 = metadata !{i32 786445, metadata !1, metadata !"y", metadata !1, i32 2, i64 32, i64 32, i64 32, i32 0, metadata !5} ; [ DW_TAG_member ]
 !12 = metadata !{i32 3, i32 47, metadata !0, null}
 !13 = metadata !{i32 4, i32 2, metadata !14, null}
-!14 = metadata !{i32 589835, metadata !0, i32 3, i32 50, metadata !1, i32 0} ; [ DW_TAG_lexical_block ]
+!14 = metadata !{i32 786443, metadata !0, i32 3, i32 50, metadata !1, i32 0} ; [ DW_TAG_lexical_block ]
+!15 = metadata !{metadata !0}
+!16 = metadata !{metadata !6}
+!17 = metadata !{metadata !"one.c", metadata !"/private/tmp"}
diff --git a/test/CodeGen/X86/2010-12-02-MC-Set.ll b/test/CodeGen/X86/2010-12-02-MC-Set.ll
index 31446786ec15..4d8d974f703e 100644
--- a/test/CodeGen/X86/2010-12-02-MC-Set.ll
+++ b/test/CodeGen/X86/2010-12-02-MC-Set.ll
@@ -6,17 +6,18 @@ entry:
   ret void, !dbg !5
 }
 
-!llvm.dbg.sp = !{!0}
+!llvm.dbg.cu = !{!2}
+!7 = metadata !{metadata !0}
 
-!0 = metadata !{i32 589870, i32 0, metadata !1, metadata !"foo", metadata !"foo", metadata !"", metadata !1, i32 3, metadata !3, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, void ()* @foo} ; [ DW_TAG_subprogram ]
-!1 = metadata !{i32 589865, metadata !"e.c", metadata !"/private/tmp", metadata !2} ; [ DW_TAG_file_type ]
-!2 = metadata !{i32 589841, i32 0, i32 12, metadata !"e.c", metadata !"/private/tmp", metadata !"clang version 2.9 (trunk 120563)", i1 true, i1 false, metadata !"", i32 0} ; [ DW_TAG_compile_unit ]
-!3 = metadata !{i32 589845, metadata !1, metadata !"", metadata !1, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !4, i32 0, null} ; [ DW_TAG_subroutine_type ]
+!0 = metadata !{i32 786478, metadata !1, metadata !"foo", metadata !"foo", metadata !"", metadata !1, i32 3, metadata !3, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, void ()* @foo, null, null, null, i32 0} ; [ DW_TAG_subprogram ]
+!1 = metadata !{i32 786473, metadata !"e.c", metadata !"/private/tmp", metadata !2} ; [ DW_TAG_file_type ]
+!2 = metadata !{i32 786449, i32 12, metadata !1, metadata !"clang version 2.9 (trunk 120563)", i1 false, metadata !"", i32 0, null, null, metadata !7, null, metadata !""} ; [ DW_TAG_compile_unit ]
+!3 = metadata !{i32 786453, metadata !1, metadata !"", metadata !1, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !4, i32 0, null} ; [ DW_TAG_subroutine_type ]
 !4 = metadata !{null}
 !5 = metadata !{i32 5, i32 1, metadata !6, null}
-!6 = metadata !{i32 589835, metadata !0, i32 3, i32 16, metadata !1, i32 0} ; [ DW_TAG_lexical_block ]
+!6 = metadata !{i32 786443, metadata !0, i32 3, i32 16, metadata !1, i32 0} ; [ DW_TAG_lexical_block ]
 
 ; CHECK: .subsections_via_symbols
 ; CHECK-NEXT: __debug_line
-; CHECK-NEXT: Ltmp
+; CHECK-NEXT: Lline_table_start0
 ; CHECK-NEXT: Ltmp{{[0-9]}} = (Ltmp
diff --git a/test/CodeGen/X86/2011-01-24-DbgValue-Before-Use.ll b/test/CodeGen/X86/2011-01-24-DbgValue-Before-Use.ll
index 166dcf259989..2355528a81e8 100644
--- a/test/CodeGen/X86/2011-01-24-DbgValue-Before-Use.ll
+++ b/test/CodeGen/X86/2011-01-24-DbgValue-Before-Use.ll
@@ -4,7 +4,7 @@ target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f3
 target triple = "x86_64-apple-darwin10.0.0"
 
 ; Check debug info for variable z_s
-;CHECK: .long Lset13
+;CHECK: .long Lset14
 ;CHECK-NEXT:  ## DW_AT_decl_file
 ;CHECK-NEXT:  ## DW_AT_decl_line
 ;CHECK-NEXT:  ## DW_AT_type
@@ -69,35 +69,37 @@ declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
 
 declare i32 @puts(i8* nocapture) nounwind
 
-!llvm.dbg.sp = !{!0, !6}
-!llvm.dbg.lv.gcd = !{!10, !11, !12}
-!llvm.dbg.lv.main = !{!14, !17}
+!llvm.dbg.cu = !{!2}
 
-!0 = metadata !{i32 589870, i32 0, metadata !1, metadata !"gcd", metadata !"gcd", metadata !"", metadata !1, i32 5, metadata !3, i1 false, i1 true, i32 0, i32 0, i32 0, i32 256, i1 true, i64 (i64, i64)* @gcd} ; [ DW_TAG_subprogram ]
-!1 = metadata !{i32 589865, metadata !"rem_small.c", metadata !"/private/tmp", metadata !2} ; [ DW_TAG_file_type ]
-!2 = metadata !{i32 589841, i32 0, i32 12, metadata !"rem_small.c", metadata !"/private/tmp", metadata !"clang version 2.9 (trunk 124117)", i1 true, i1 true, metadata !"", i32 0} ; [ DW_TAG_compile_unit ]
-!3 = metadata !{i32 589845, metadata !1, metadata !"", metadata !1, i32 0, i64 0, i64 0, i32 0, i32 0, i32 0, metadata !4, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
+!0 = metadata !{i32 786478, metadata !1, metadata !"gcd", metadata !"gcd", metadata !"", metadata !1, i32 5, metadata !3, i1 false, i1 true, i32 0, i32 0, i32 0, i32 256, i1 true, i64 (i64, i64)* @gcd, null, null, metadata !29, i32 0} ; [ DW_TAG_subprogram ]
+!1 = metadata !{i32 786473, metadata !31} ; [ DW_TAG_file_type ]
+!2 = metadata !{i32 786449, metadata !31, i32 12, metadata !"clang version 2.9 (trunk 124117)", i1 true, metadata !"", i32 0, null, null, metadata !28, null, null} ; [ DW_TAG_compile_unit ]
+!3 = metadata !{i32 786453, metadata !1, metadata !1, metadata !"", i32 0, i64 0, i64 0, i32 0, i32 0, i32 0, metadata !4, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
 !4 = metadata !{metadata !5}
-!5 = metadata !{i32 589860, metadata !2, metadata !"long int", null, i32 0, i64 64, i64 64, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
-!6 = metadata !{i32 589870, i32 0, metadata !1, metadata !"main", metadata !"main", metadata !"", metadata !1, i32 25, metadata !7, i1 false, i1 true, i32 0, i32 0, i32 0, i32 0, i1 true, i32 ()* @main} ; [ DW_TAG_subprogram ]
-!7 = metadata !{i32 589845, metadata !1, metadata !"", metadata !1, i32 0, i64 0, i64 0, i32 0, i32 0, i32 0, metadata !8, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
+!5 = metadata !{i32 786468, null, metadata !2, metadata !"long int", i32 0, i64 64, i64 64, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
+!6 = metadata !{i32 786478, metadata !1, metadata !"main", metadata !"main", metadata !"", metadata !1, i32 25, metadata !7, i1 false, i1 true, i32 0, i32 0, i32 0, i32 0, i1 true, i32 ()* @main, null, null, metadata !30, i32 0} ; [ DW_TAG_subprogram ]
+!7 = metadata !{i32 786453, metadata !1, metadata !1, metadata !"", i32 0, i64 0, i64 0, i32 0, i32 0, i32 0, metadata !8, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
 !8 = metadata !{metadata !9}
-!9 = metadata !{i32 589860, metadata !2, metadata !"int", null, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
-!10 = metadata !{i32 590081, metadata !0, metadata !"a", metadata !1, i32 5, metadata !5, i32 0} ; [ DW_TAG_arg_variable ]
-!11 = metadata !{i32 590081, metadata !0, metadata !"b", metadata !1, i32 5, metadata !5, i32 0} ; [ DW_TAG_arg_variable ]
-!12 = metadata !{i32 590080, metadata !13, metadata !"c", metadata !1, i32 6, metadata !5, i32 0} ; [ DW_TAG_auto_variable ]
-!13 = metadata !{i32 589835, metadata !0, i32 5, i32 52, metadata !1, i32 0} ; [ DW_TAG_lexical_block ]
-!14 = metadata !{i32 590080, metadata !15, metadata !"m", metadata !1, i32 26, metadata !16, i32 0} ; [ DW_TAG_auto_variable ]
-!15 = metadata !{i32 589835, metadata !6, i32 25, i32 12, metadata !1, i32 2} ; [ DW_TAG_lexical_block ]
-!16 = metadata !{i32 589860, metadata !2, metadata !"unsigned int", null, i32 0, i64 32, i64 32, i64 0, i32 0, i32 7} ; [ DW_TAG_base_type ]
-!17 = metadata !{i32 590080, metadata !15, metadata !"z_s", metadata !1, i32 27, metadata !9, i32 0} ; [ DW_TAG_auto_variable ]
+!9 = metadata !{i32 786468, null, metadata !2, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
+!10 = metadata !{i32 786689, metadata !0, metadata !"a", metadata !1, i32 5, metadata !5, i32 0, null} ; [ DW_TAG_arg_variable ]
+!11 = metadata !{i32 786689, metadata !0, metadata !"b", metadata !1, i32 5, metadata !5, i32 0, null} ; [ DW_TAG_arg_variable ]
+!12 = metadata !{i32 786688, metadata !13, metadata !"c", metadata !1, i32 6, metadata !5, i32 0, null} ; [ DW_TAG_auto_variable ]
+!13 = metadata !{i32 786443, metadata !1, metadata !0, i32 5, i32 52, i32 0} ; [ DW_TAG_lexical_block ]
+!14 = metadata !{i32 786688, metadata !15, metadata !"m", metadata !1, i32 26, metadata !16, i32 0, null} ; [ DW_TAG_auto_variable ]
+!15 = metadata !{i32 786443, metadata !1, metadata !6, i32 25, i32 12, i32 2} ; [ DW_TAG_lexical_block ]
+!16 = metadata !{i32 786468, null, metadata !2, metadata !"unsigned int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 7} ; [ DW_TAG_base_type ]
+!17 = metadata !{i32 786688, metadata !15, metadata !"z_s", metadata !1, i32 27, metadata !9, i32 0, null} ; [ DW_TAG_auto_variable ]
 !18 = metadata !{i32 5, i32 41, metadata !0, null}
 !19 = metadata !{i32 5, i32 49, metadata !0, null}
 !20 = metadata !{i32 7, i32 5, metadata !13, null}
 !21 = metadata !{i32 8, i32 9, metadata !22, null}
-!22 = metadata !{i32 589835, metadata !13, i32 7, i32 14, metadata !1, i32 1} ; [ DW_TAG_lexical_block ]
+!22 = metadata !{i32 786443, metadata !1, metadata !13, i32 7, i32 14, i32 1} ; [ DW_TAG_lexical_block ]
 !23 = metadata !{i32 9, i32 9, metadata !22, null}
 !24 = metadata !{i32 26, i32 38, metadata !15, null}
 !25 = metadata !{i32 27, i32 38, metadata !15, null}
 !26 = metadata !{i32 28, i32 9, metadata !15, null}
 !27 = metadata !{i32 30, i32 1, metadata !15, null}
+!28 = metadata !{metadata !0, metadata !6}
+!29 = metadata !{metadata !10, metadata !11, metadata !12}
+!30 = metadata !{metadata !14, metadata !17}
+!31 = metadata !{metadata !"rem_small.c", metadata !"/private/tmp"}
diff --git a/test/CodeGen/X86/2011-06-12-FastAllocSpill.ll b/test/CodeGen/X86/2011-06-12-FastAllocSpill.ll
index 47ef693cc25e..6f43b94b264a 100644
--- a/test/CodeGen/X86/2011-06-12-FastAllocSpill.ll
+++ b/test/CodeGen/X86/2011-06-12-FastAllocSpill.ll
@@ -1,3 +1,4 @@
+; REQUIRES: asserts
 ; RUN: llc < %s -O0 -disable-fp-elim -relocation-model=pic -stats 2>&1 | FileCheck %s
 ;
 ; This test should not cause any spilling with RAFast.
diff --git a/test/CodeGen/X86/2011-09-14-valcoalesce.ll b/test/CodeGen/X86/2011-09-14-valcoalesce.ll
index a5ec614a943b..54d2b403509d 100644
--- a/test/CodeGen/X86/2011-09-14-valcoalesce.ll
+++ b/test/CodeGen/X86/2011-09-14-valcoalesce.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=x86 -disable-code-place | FileCheck %s
+; RUN: llc < %s -march=x86 -disable-block-placement | FileCheck %s
 ;
 ; Test RegistersDefinedFromSameValue. We have multiple copies of the same vreg:
 ; while.body85.i:
diff --git a/test/CodeGen/X86/2011-10-19-LegelizeLoad.ll b/test/CodeGen/X86/2011-10-19-LegelizeLoad.ll
index a7207537de21..da734d4b6454 100644
--- a/test/CodeGen/X86/2011-10-19-LegelizeLoad.ll
+++ b/test/CodeGen/X86/2011-10-19-LegelizeLoad.ll
@@ -16,8 +16,8 @@ target triple = "x86_64-unknown-linux-gnu"
 ; CHECK: main
 define i32 @main() nounwind uwtable {
 entry:
-; CHECK: movsbq  j(%rip), %
-; CHECK: movsbq  i(%rip), %
+; CHECK: pmovsxbq  j(%rip), %
+; CHECK: pmovsxbq  i(%rip), %
   %0 = load <2 x i8>* @i, align 8
   %1 = load <2 x i8>* @j, align 8
   %div = sdiv <2 x i8> %1, %0
diff --git a/test/CodeGen/X86/2011-11-30-or.ll b/test/CodeGen/X86/2011-11-30-or.ll
index 0a949eb29b89..8ac4632329b3 100644
--- a/test/CodeGen/X86/2011-11-30-or.ll
+++ b/test/CodeGen/X86/2011-11-30-or.ll
@@ -8,15 +8,15 @@ target triple = "x86_64-apple-macosx10.6.6"
 ; CHECK: pblendvb        %xmm1, %xmm2
 ; CHECK: ret
 
-define void @select_func() {
+define void @select_func(<8 x i16> %in) {
 entry:
-  %c.lobit.i.i.i = ashr <8 x i16> <i16 17, i16 5, i16 1, i16 15, i16 19, i16 15, i16 4, i16 1> , <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
-  %a35 = bitcast <8 x i16> %c.lobit.i.i.i to <2 x i64>
+  %c.lobit.i.i.i = ashr <8 x i16> %in, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
   %and.i56.i.i.i = and <8 x i16> %c.lobit.i.i.i, <i16 25, i16 8, i16 65, i16 25, i16 8, i16 95, i16 15, i16 45>
   %and.i5.i.i.i = bitcast <8 x i16> %and.i56.i.i.i to <2 x i64>
-  %neg.i.i.i.i = xor <2 x i64> %a35, <i64 -1, i64 -1>
-  %and.i.i.i.i = and <2 x i64> zeroinitializer, %neg.i.i.i.i
-  %or.i.i.i.i = or <2 x i64> %and.i.i.i.i, %and.i5.i.i.i
+  %neg.i.i.i.i = xor <8 x i16> %c.lobit.i.i.i, <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>
+  %and.i.i.i = and <8 x i16> %neg.i.i.i.i, <i16 45, i16 15, i16 95, i16 8, i16 25, i16 65, i16 8, i16 25>
+  %and.i2.i.i.i = bitcast <8 x i16> %and.i.i.i to <2 x i64>
+  %or.i.i.i.i = or <2 x i64> %and.i2.i.i.i, %and.i5.i.i.i
   %a37 = bitcast <2 x i64> %or.i.i.i.i to <8 x i16>
   store <8 x i16> %a37, <8 x i16> addrspace(1)* undef, align 4
   ret void
diff --git a/test/CodeGen/X86/2012-01-11-split-cv.ll b/test/CodeGen/X86/2012-01-11-split-cv.ll
index 6b9007291901..7e914984fe44 100644
--- a/test/CodeGen/X86/2012-01-11-split-cv.ll
+++ b/test/CodeGen/X86/2012-01-11-split-cv.ll
@@ -2,7 +2,7 @@
 
 ;CHECK: add18i16
 define void @add18i16(<18 x i16>* nocapture sret %ret, <18 x i16>* %bp) nounwind {
-;CHECK: vmovups
+;CHECK: vmovaps
   %b = load <18 x i16>* %bp, align 16
   %x = add <18 x i16> zeroinitializer, %b
   store <18 x i16> %x, <18 x i16>* %ret, align 16
diff --git a/test/CodeGen/X86/2012-03-26-PostRALICMBug.ll b/test/CodeGen/X86/2012-03-26-PostRALICMBug.ll
index 18a331377353..90d8d3d2dd6d 100644
--- a/test/CodeGen/X86/2012-03-26-PostRALICMBug.ll
+++ b/test/CodeGen/X86/2012-03-26-PostRALICMBug.ll
@@ -1,3 +1,4 @@
+; REQUIRES: asserts
 ; RUN: llc < %s -mtriple=x86_64-apple-darwin10 -stats 2>&1 | \
 ; RUN:   not grep "Number of machine instructions hoisted out of loops post regalloc"
 
diff --git a/test/CodeGen/Generic/2012-07-15-BuildVectorPromote.ll b/test/CodeGen/X86/2012-07-15-BuildVectorPromote.ll
index 6591c64d871e..078f1b05c3fc 100644
--- a/test/CodeGen/Generic/2012-07-15-BuildVectorPromote.ll
+++ b/test/CodeGen/X86/2012-07-15-BuildVectorPromote.ll
@@ -1,4 +1,4 @@
-; RUN: llc -mcpu=corei7 < %s
+; RUN: llc < %s -march=x86 -mcpu=corei7
 ; We don't care about the output, just that it doesn't crash
 
 define <1 x i1> @buildvec_promote() {
diff --git a/test/CodeGen/X86/2012-07-15-broadcastfold.ll b/test/CodeGen/X86/2012-07-15-broadcastfold.ll
index 3b7a8a7b871c..2c7dfc8dfd45 100644
--- a/test/CodeGen/X86/2012-07-15-broadcastfold.ll
+++ b/test/CodeGen/X86/2012-07-15-broadcastfold.ll
@@ -3,7 +3,7 @@
 declare x86_fastcallcc i64 @barrier()
 
 ;CHECK: bcast_fold
-;CHECK: vmovaps %xmm{{[0-9]+}}, [[SPILLED:[^\)]+\)]]
+;CHECK: vmov{{[au]}}ps %xmm{{[0-9]+}}, [[SPILLED:[^\)]+\)]]
 ;CHECK: barrier
 ;CHECK: vbroadcastss [[SPILLED]], %ymm0
 ;CHECK: ret
diff --git a/test/CodeGen/X86/2012-11-28-merge-store-alias.ll b/test/CodeGen/X86/2012-11-28-merge-store-alias.ll
new file mode 100644
index 000000000000..756e86e0f801
--- /dev/null
+++ b/test/CodeGen/X86/2012-11-28-merge-store-alias.ll
@@ -0,0 +1,52 @@
+; RUN: llc < %s -march=x86-64 -mcpu=corei7 -mtriple=x86_64-pc-win64 | FileCheck %s
+
+; CHECK: merge_stores_can
+; CHECK: callq foo
+; CHECK-NEXT: xorps %xmm0, %xmm0
+; CHECK-NEXT: movups  %xmm0
+; CHECK: callq foo
+; CHECK: ret
+declare i32 @foo([10 x i32]* )
+
+define i32 @merge_stores_can() nounwind ssp {
+  %object1 = alloca [10 x i32]
+
+  %ret0 = call i32 @foo([10 x i32]* %object1) nounwind
+
+  %O1_1 = getelementptr [10 x i32]* %object1, i64 0, i32 1
+  %O1_2 = getelementptr [10 x i32]* %object1, i64 0, i32 2
+  %O1_3 = getelementptr [10 x i32]* %object1, i64 0, i32 3
+  %O1_4 = getelementptr [10 x i32]* %object1, i64 0, i32 4
+  %ld_ptr = getelementptr [10 x i32]* %object1, i64 0, i32 9
+
+  store i32 0, i32* %O1_1
+  store i32 0, i32* %O1_2
+  %ret = load  i32* %ld_ptr  ; <--- does not alias.
+  store i32 0, i32* %O1_3
+  store i32 0, i32* %O1_4
+
+  %ret1 = call i32 @foo([10 x i32]* %object1) nounwind
+
+  ret i32 %ret
+}
+
+; CHECK: merge_stores_cant
+; CHECK-NOT: xorps %xmm0, %xmm0
+; CHECK-NOT: movups  %xmm0
+; CHECK: ret
+define i32 @merge_stores_cant([10 x i32]* %in0, [10 x i32]* %in1) nounwind ssp {
+
+  %O1_1 = getelementptr [10 x i32]* %in1, i64 0, i32 1
+  %O1_2 = getelementptr [10 x i32]* %in1, i64 0, i32 2
+  %O1_3 = getelementptr [10 x i32]* %in1, i64 0, i32 3
+  %O1_4 = getelementptr [10 x i32]* %in1, i64 0, i32 4
+  %ld_ptr = getelementptr [10 x i32]* %in0, i64 0, i32 2
+
+  store i32 0, i32* %O1_1
+  store i32 0, i32* %O1_2
+  %ret = load  i32* %ld_ptr  ;  <--- may alias
+  store i32 0, i32* %O1_3
+  store i32 0, i32* %O1_4
+
+  ret i32 %ret
+}
diff --git a/test/CodeGen/X86/2012-11-30-handlemove-dbg.ll b/test/CodeGen/X86/2012-11-30-handlemove-dbg.ll
new file mode 100644
index 000000000000..9525653f3fff
--- /dev/null
+++ b/test/CodeGen/X86/2012-11-30-handlemove-dbg.ll
@@ -0,0 +1,51 @@
+; RUN: llc < %s -mtriple=x86_64-apple-macosx -enable-misched \
+; RUN:          -verify-machineinstrs | FileCheck %s
+;
+; Test LiveInterval update handling of DBG_VALUE.
+; rdar://12777252.
+;
+; CHECK: %entry
+; CHECK: DEBUG_VALUE: hg
+; CHECK: je
+
+%struct.node.0.27 = type { i16, double, [3 x double], i32, i32 }
+%struct.hgstruct.2.29 = type { %struct.bnode.1.28*, [3 x double], double, [3 x double] }
+%struct.bnode.1.28 = type { i16, double, [3 x double], i32, i32, [3 x double], [3 x double], [3 x double], double, %struct.bnode.1.28*, %struct.bnode.1.28* }
+
+declare void @llvm.dbg.declare(metadata, metadata) nounwind readnone
+
+define signext i16 @subdivp(%struct.node.0.27* nocapture %p, double %dsq, double %tolsq, %struct.hgstruct.2.29* nocapture byval align 8 %hg) nounwind uwtable readonly ssp {
+entry:
+  call void @llvm.dbg.declare(metadata !{%struct.hgstruct.2.29* %hg}, metadata !4)
+  %type = getelementptr inbounds %struct.node.0.27* %p, i64 0, i32 0
+  %0 = load i16* %type, align 2, !tbaa !8
+  %cmp = icmp eq i16 %0, 1
+  br i1 %cmp, label %return, label %for.cond.preheader
+
+for.cond.preheader:                               ; preds = %entry
+  %arrayidx6.1 = getelementptr inbounds %struct.hgstruct.2.29* %hg, i64 0, i32 1, i64 1
+  %cmp22 = fcmp olt double 0.000000e+00, %dsq
+  %conv24 = zext i1 %cmp22 to i16
+  br label %return
+
+return:                                           ; preds = %for.cond.preheader, %entry
+  %retval.0 = phi i16 [ %conv24, %for.cond.preheader ], [ 0, %entry ]
+  ret i16 %retval.0
+}
+
+declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
+
+!llvm.dbg.cu = !{!0}
+
+!0 = metadata !{i32 786449, i32 0, i32 12, metadata !"MultiSource/Benchmarks/Olden/bh/newbh.c", metadata !"MultiSource/Benchmarks/Olden/bh", metadata !"clang version 3.3 (trunk 168918) (llvm/trunk 168920)", i1 true, i1 true, metadata !"", i32 0, metadata !1, metadata !1, metadata !1, metadata !3} ; [ DW_TAG_compile_unit ] [MultiSource/Benchmarks/Olden/bh/newbh.c] [DW_LANG_C99]
+!1 = metadata !{metadata !2}
+!2 = metadata !{i32 0}
+!3 = metadata !{null}
+!4 = metadata !{i32 786689, null, metadata !"hg", metadata !5, i32 67109589, metadata !6, i32 0, i32 0} ; [ DW_TAG_arg_variable ] [hg] [line 725]
+!5 = metadata !{i32 786473, metadata !11} ; [ DW_TAG_file_type ]
+!6 = metadata !{i32 786454, metadata !11, null, metadata !"hgstruct", i32 492, i64 0, i64 0, i64 0, i32 0, metadata !7} ; [ DW_TAG_typedef ] [hgstruct] [line 492, size 0, align 0, offset 0] [from ]
+!7 = metadata !{i32 786451, metadata !11, null, metadata !"", i32 487, i64 512, i64 64, i32 0, i32 0, null, null, i32 0, i32 0, i32 0} ; [ DW_TAG_structure_type ] [line 487, size 512, align 64, offset 0] [from ]
+!8 = metadata !{metadata !"short", metadata !9}
+!9 = metadata !{metadata !"omnipotent char", metadata !10}
+!10 = metadata !{metadata !"Simple C/C++ TBAA"}
+!11 = metadata !{metadata !"MultiSource/Benchmarks/Olden/bh/newbh.c", metadata !"MultiSource/Benchmarks/Olden/bh"}
diff --git a/test/CodeGen/X86/2012-11-30-misched-dbg.ll b/test/CodeGen/X86/2012-11-30-misched-dbg.ll
new file mode 100644
index 000000000000..a0fbbb2ff9ef
--- /dev/null
+++ b/test/CodeGen/X86/2012-11-30-misched-dbg.ll
@@ -0,0 +1,136 @@
+; RUN: llc < %s -mtriple=x86_64-apple-macosx -enable-misched \
+; RUN:          -verify-machineinstrs | FileCheck %s
+;
+; Test MachineScheduler handling of DBG_VALUE.
+; rdar://12776937.
+;
+; CHECK: %if.else581
+; CHECK: DEBUG_VALUE: num1
+; CHECK: call
+
+%union.rec = type {}
+
+@.str15 = external hidden unnamed_addr constant [6 x i8], align 1
+
+declare void @llvm.dbg.declare(metadata, metadata) nounwind readnone
+
+define i32 @AttachGalley(%union.rec** nocapture %suspend_pt) nounwind uwtable ssp {
+entry:
+  %num14075 = alloca [20 x i8], align 16
+  br label %if.end33
+
+if.end33:                                         ; preds = %entry
+  %cmp1733 = icmp eq i32 undef, 0
+  br label %if.else581
+
+if.else581:                                       ; preds = %if.end33
+  %cmp586 = icmp eq i8 undef, -123
+  br i1 %cmp586, label %if.then588, label %if.else594
+
+if.then588:                                       ; preds = %if.else581
+  br label %for.cond1710.preheader
+
+if.else594:                                       ; preds = %if.else581
+  unreachable
+
+for.cond1710.preheader:                           ; preds = %if.then588
+  br label %for.cond1710
+
+for.cond1710:                                     ; preds = %for.cond1710, %for.cond1710.preheader
+  br i1 undef, label %for.cond1710, label %if.then3344
+
+if.then3344:
+  br label %if.then4073
+
+if.then4073:                                      ; preds = %if.then3344
+  call void @llvm.dbg.declare(metadata !{[20 x i8]* %num14075}, metadata !4)
+  %arraydecay4078 = getelementptr inbounds [20 x i8]* %num14075, i64 0, i64 0
+  %0 = load i32* undef, align 4
+  %add4093 = add nsw i32 %0, 0
+  %conv4094 = sitofp i32 %add4093 to float
+  %div4095 = fdiv float %conv4094, 5.670000e+02
+  %conv4096 = fpext float %div4095 to double
+  %call4097 = call i32 (i8*, i32, i64, i8*, ...)* @__sprintf_chk(i8* %arraydecay4078, i32 0, i64 20, i8* getelementptr inbounds ([6 x i8]* @.str15, i64 0, i64 0), double %conv4096) nounwind
+  br i1 %cmp1733, label %if.then4107, label %if.else4114
+
+if.then4107:                                      ; preds = %if.then4073
+  unreachable
+
+if.else4114:                                      ; preds = %if.then4073
+  unreachable
+}
+
+declare i32 @__sprintf_chk(i8*, i32, i64, i8*, ...)
+
+!llvm.dbg.cu = !{!0}
+
+!0 = metadata !{i32 786449, i32 0, i32 12, metadata !"MultiSource/Benchmarks/MiBench/consumer-typeset/z19.c", metadata !"MultiSource/Benchmarks/MiBench/consumer-typeset", metadata !"clang version 3.3 (trunk 168918) (llvm/trunk 168920)", i1 true, i1 true, metadata !"", i32 0, metadata !1, metadata !1, metadata !3, metadata !1} ; [ DW_TAG_compile_unit ] [MultiSource/Benchmarks/MiBench/consumer-typeset/MultiSource/Benchmarks/MiBench/consumer-typeset/z19.c] [DW_LANG_C99]
+!1 = metadata !{metadata !2}
+!2 = metadata !{i32 0}
+!3 = metadata !{}
+!4 = metadata !{i32 786688, metadata !5, metadata !"num1", metadata !14, i32 815, metadata !15, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [num1] [line 815]
+!5 = metadata !{i32 786443, metadata !6, i32 815, i32 0, metadata !14, i32 177} ; [ DW_TAG_lexical_block ] [MultiSource/Benchmarks/MiBench/consumer-typeset/z19.c]
+!6 = metadata !{i32 786443, metadata !7, i32 812, i32 0, metadata !14, i32 176} ; [ DW_TAG_lexical_block ] [MultiSource/Benchmarks/MiBench/consumer-typeset/z19.c]
+!7 = metadata !{i32 786443, metadata !8, i32 807, i32 0, metadata !14, i32 175} ; [ DW_TAG_lexical_block ] [MultiSource/Benchmarks/MiBench/consumer-typeset/z19.c]
+!8 = metadata !{i32 786443, metadata !9, i32 440, i32 0, metadata !14, i32 94} ; [ DW_TAG_lexical_block ] [MultiSource/Benchmarks/MiBench/consumer-typeset/z19.c]
+!9 = metadata !{i32 786443, metadata !10, i32 435, i32 0, metadata !14, i32 91} ; [ DW_TAG_lexical_block ] [MultiSource/Benchmarks/MiBench/consumer-typeset/z19.c]
+!10 = metadata !{i32 786443, metadata !11, i32 434, i32 0, metadata !14, i32 90} ; [ DW_TAG_lexical_block ] [MultiSource/Benchmarks/MiBench/consumer-typeset/z19.c]
+!11 = metadata !{i32 786443, metadata !12, i32 250, i32 0, metadata !14, i32 24} ; [ DW_TAG_lexical_block ] [MultiSource/Benchmarks/MiBench/consumer-typeset/z19.c]
+!12 = metadata !{i32 786443, metadata !13, i32 249, i32 0, metadata !14, i32 23} ; [ DW_TAG_lexical_block ] [MultiSource/Benchmarks/MiBench/consumer-typeset/z19.c]
+!13 = metadata !{i32 786443, metadata !3, i32 221, i32 0, metadata !14, i32 19} ; [ DW_TAG_lexical_block ] [MultiSource/Benchmarks/MiBench/consumer-typeset/z19.c]
+!14 = metadata !{i32 786473, metadata !19} ; [ DW_TAG_file_type ]
+!15 = metadata !{i32 786433, null, null, metadata !"", i32 0, i64 160, i64 8, i32 0, i32 0, metadata !16, metadata !17, i32 0, i32 0} ; [ DW_TAG_array_type ] [line 0, size 160, align 8, offset 0] [from char]
+!16 = metadata !{i32 786468, null, null, metadata !"char", i32 0, i64 8, i64 8, i64 0, i32 0, i32 6} ; [ DW_TAG_base_type ] [char] [line 0, size 8, align 8, offset 0, enc DW_ATE_signed_char]
+!17 = metadata !{metadata !18}
+!18 = metadata !{i32 786465, i64 0, i64 20}       ; [ DW_TAG_subrange_type ] [0, 19]
+!19 = metadata !{metadata !"MultiSource/Benchmarks/MiBench/consumer-typeset/z19.c", metadata !"MultiSource/Benchmarks/MiBench/consumer-typeset"}
+
+; Test DebugValue uses visited by RegisterPressureTracker findUseBetween().
+;
+; CHECK: @main
+; CHECK: DEBUG_VALUE: X
+; CHECK: call
+
+%"class.__gnu_cxx::hash_map" = type { %"class.__gnu_cxx::hashtable" }
+%"class.__gnu_cxx::hashtable" = type { i64, i64, i64, i64, i64, i64 }
+
+define void @main() uwtable ssp {
+entry:
+  %X = alloca %"class.__gnu_cxx::hash_map", align 8
+  br i1 undef, label %cond.true, label %cond.end
+
+cond.true:                                        ; preds = %entry
+  unreachable
+
+cond.end:                                         ; preds = %entry
+  call void @llvm.dbg.declare(metadata !{%"class.__gnu_cxx::hash_map"* %X}, metadata !31)
+  %_M_num_elements.i.i.i.i = getelementptr inbounds %"class.__gnu_cxx::hash_map"* %X, i64 0, i32 0, i32 5
+  invoke void @_Znwm()
+          to label %exit.i unwind label %lpad2.i.i.i.i
+
+exit.i:                                           ; preds = %cond.end
+  unreachable
+
+lpad2.i.i.i.i:                                    ; preds = %cond.end
+  %0 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*)
+          cleanup
+  br i1 undef, label %lpad.body.i.i, label %if.then.i.i.i.i.i.i.i.i
+
+if.then.i.i.i.i.i.i.i.i:                          ; preds = %lpad2.i.i.i.i
+  unreachable
+
+lpad.body.i.i:                                    ; preds = %lpad2.i.i.i.i
+  resume { i8*, i32 } %0
+}
+
+declare i32 @__gxx_personality_v0(...)
+
+declare void @_Znwm()
+
+!llvm.dbg.cu = !{!30}
+
+!30 = metadata !{i32 786449, i32 0, i32 4, metadata !"SingleSource/Benchmarks/Shootout-C++/hash.cpp", metadata !"SingleSource/Benchmarks/Shootout-C++", metadata !"clang version 3.3 (trunk 169129) (llvm/trunk 169135)", i1 true, i1 true, metadata !"", i32 0, null, null, null, null} ; [ DW_TAG_compile_unit ] [SingleSource/Benchmarks/Shootout-C++/hash.cpp] [DW_LANG_C_plus_plus]
+!31 = metadata !{i32 786688, null, metadata !"X", null, i32 29, metadata !32, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [X] [line 29]
+!32 = metadata !{i32 786454, metadata !34, null, metadata !"HM", i32 28, i64 0, i64 0, i64 0, i32 0, null} ; [ DW_TAG_typedef ] [HM] [line 28, size 0, align 0, offset 0] [from ]
+!33 = metadata !{i32 786473, metadata !34} ; [ DW_TAG_file_type ]
+!34 = metadata !{metadata !"SingleSource/Benchmarks/Shootout-C++/hash.cpp", metadata !"SingleSource/Benchmarks/Shootout-C++"}
diff --git a/test/CodeGen/X86/2012-11-30-regpres-dbg.ll b/test/CodeGen/X86/2012-11-30-regpres-dbg.ll
new file mode 100644
index 000000000000..df93c5647d95
--- /dev/null
+++ b/test/CodeGen/X86/2012-11-30-regpres-dbg.ll
@@ -0,0 +1,44 @@
+; RUN: llc < %s -mtriple=x86_64-apple-macosx -enable-misched \
+; RUN:          -verify-machineinstrs | FileCheck %s
+;
+; Test RegisterPressure handling of DBG_VALUE.
+;
+; CHECK: %entry
+; CHECK: DEBUG_VALUE: callback
+; CHECK: ret
+
+%struct.btCompoundLeafCallback = type { i32, i32 }
+
+declare void @llvm.dbg.declare(metadata, metadata) nounwind readnone
+
+define void @test() unnamed_addr uwtable ssp align 2 {
+entry:
+  %callback = alloca %struct.btCompoundLeafCallback, align 8
+  br i1 undef, label %if.end, label %if.then
+
+if.then:                                          ; preds = %entry
+  unreachable
+
+if.end:                                           ; preds = %entry
+  call void @llvm.dbg.declare(metadata !{%struct.btCompoundLeafCallback* %callback}, metadata !3)
+  %m = getelementptr inbounds %struct.btCompoundLeafCallback* %callback, i64 0, i32 1
+  store i32 0, i32* undef, align 8
+  %cmp12447 = icmp sgt i32 undef, 0
+  br i1 %cmp12447, label %for.body.lr.ph, label %invoke.cont44
+
+for.body.lr.ph:                                   ; preds = %if.end
+  unreachable
+
+invoke.cont44:                                    ; preds = %if.end
+  ret void
+}
+
+!llvm.dbg.cu = !{!0}
+
+!0 = metadata !{i32 786449, i32 0, i32 4, metadata !"MultiSource/Benchmarks/Bullet/btCompoundCollisionAlgorithm.cpp", metadata !"MultiSource/Benchmarks/Bullet", metadata !"clang version 3.3 (trunk 168984) (llvm/trunk 168983)", i1 true, i1 true, metadata !"", i32 0, metadata !1, null, null, null} ; [ DW_TAG_compile_unit ] [MultiSource/Benchmarks/Bullet/MultiSource/Benchmarks/Bullet/btCompoundCollisionAlgorithm.cpp] [DW_LANG_C_plus_plus]
+!1 = metadata !{metadata !2}
+!2 = metadata !{null, null}
+!3 = metadata !{i32 786688, null, metadata !"callback", null, i32 214, metadata !4, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [callback] [line 214]
+!4 = metadata !{i32 786451, metadata !6, null, metadata !"btCompoundLeafCallback", i32 90, i64 512, i64 64, i32 0, i32 0, null, null, i32 0, null, null} ; [ DW_TAG_structure_type ] [btCompoundLeafCallback] [line 90, size 512, align 64, offset 0] [from ]
+!5 = metadata !{i32 786473, metadata !6} ; [ DW_TAG_file_type ]
+!6 = metadata !{metadata !"MultiSource/Benchmarks/Bullet/btCompoundCollisionAlgorithm.cpp", metadata !"MultiSource/Benchmarks/Bullet"}
diff --git a/test/CodeGen/X86/2012-12-06-python27-miscompile.ll b/test/CodeGen/X86/2012-12-06-python27-miscompile.ll
new file mode 100644
index 000000000000..d9effc92fa92
--- /dev/null
+++ b/test/CodeGen/X86/2012-12-06-python27-miscompile.ll
@@ -0,0 +1,23 @@
+; RUN: llc < %s -march=x86 -mcpu=corei7 -mtriple=i686-pc-win32 | FileCheck %s
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-apple-macosx10.8.0"
+
+; Make sure that we are zeroing one memory location at a time using xorl and
+; not both using XMM registers.
+
+;CHECK: @foo
+;CHECK: xorl
+;CHECK-NOT: xmm
+;CHECK: ret
+define i32 @foo (i64* %so) nounwind uwtable ssp {
+entry:
+  %used = getelementptr inbounds i64* %so, i32 3
+  store i64 0, i64* %used, align 8
+  %fill = getelementptr inbounds i64* %so, i32 2
+  %L = load i64* %fill, align 8
+  store i64 0, i64* %fill, align 8
+  %cmp28 = icmp sgt i64 %L, 0
+  %R = sext i1 %cmp28 to i32
+  ret i32 %R
+}
diff --git a/test/CodeGen/X86/2012-12-1-merge-multiple.ll b/test/CodeGen/X86/2012-12-1-merge-multiple.ll
new file mode 100644
index 000000000000..5931c3d27be1
--- /dev/null
+++ b/test/CodeGen/X86/2012-12-1-merge-multiple.ll
@@ -0,0 +1,31 @@
+; RUN: llc < %s -march=x86-64 -mcpu=corei7 -mtriple=x86_64-pc-win64 | FileCheck %s
+
+; CHECK: multiple_stores_on_chain
+; CHECK: movabsq
+; CHECK: movq
+; CHECK: movabsq
+; CHECK: movq
+; CHECK: ret
+define void @multiple_stores_on_chain(i16 * %A) {
+entry:
+  %a0 = getelementptr inbounds i16* %A, i64 0
+  %a1 = getelementptr inbounds i16* %A, i64 1
+  %a2 = getelementptr inbounds i16* %A, i64 2
+  %a3 = getelementptr inbounds i16* %A, i64 3
+  %a4 = getelementptr inbounds i16* %A, i64 4
+  %a5 = getelementptr inbounds i16* %A, i64 5
+  %a6 = getelementptr inbounds i16* %A, i64 6
+  %a7 = getelementptr inbounds i16* %A, i64 7
+
+  store i16 0, i16* %a0
+  store i16 1, i16* %a1
+  store i16 2, i16* %a2
+  store i16 3, i16* %a3
+  store i16 4, i16* %a4
+  store i16 5, i16* %a5
+  store i16 6, i16* %a6
+  store i16 7, i16* %a7
+
+  ret void
+}
+
diff --git a/test/CodeGen/X86/2012-12-12-DAGCombineCrash.ll b/test/CodeGen/X86/2012-12-12-DAGCombineCrash.ll
new file mode 100644
index 000000000000..8cef2c8201c6
--- /dev/null
+++ b/test/CodeGen/X86/2012-12-12-DAGCombineCrash.ll
@@ -0,0 +1,46 @@
+; RUN: llc -march=x86 -mtriple=i686-apple-ios -mcpu=yonah < %s
+; rdar://12868039
+
+define void @t() nounwind ssp {
+  %1 = alloca i32
+  %2 = ptrtoint i32* %1 to i32
+  br label %3
+
+; <label>:3                                       ; preds = %5, %3, %0
+  switch i32 undef, label %3 [
+    i32 611946160, label %5
+    i32 954117870, label %4
+  ]
+
+; <label>:4                                       ; preds = %3
+  ret void
+
+; <label>:5                                       ; preds = %5, %3
+  %6 = add i32 0, 148
+  %7 = and i32 %6, 48
+  %8 = add i32 %7, 0
+  %9 = or i32 %2, %8
+  %10 = xor i32 -1, %2
+  %11 = or i32 %8, %10
+  %12 = or i32 %9, %11
+  %13 = xor i32 %9, %11
+  %14 = sub i32 %12, %13
+  %15 = xor i32 2044674005, %14
+  %16 = xor i32 %15, 0
+  %17 = shl nuw nsw i32 %16, 1
+  %18 = sub i32 0, %17
+  %19 = and i32 %18, 2051242402
+  %20 = sub i32 0, %19
+  %21 = xor i32 %20, 0
+  %22 = xor i32 %21, 0
+  %23 = add i32 0, %22
+  %24 = shl i32 %23, 1
+  %25 = or i32 1, %24
+  %26 = add i32 0, %25
+  %27 = trunc i32 %26 to i8
+  %28 = xor i8 %27, 125
+  %29 = add i8 %28, -16
+  %30 = add i8 0, %29
+  store i8 %30, i8* null
+  br i1 undef, label %5, label %3
+}
diff --git a/test/CodeGen/X86/2012-12-14-v8fp80-crash.ll b/test/CodeGen/X86/2012-12-14-v8fp80-crash.ll
new file mode 100644
index 000000000000..c465527bd867
--- /dev/null
+++ b/test/CodeGen/X86/2012-12-14-v8fp80-crash.ll
@@ -0,0 +1,22 @@
+; RUN: llc < %s -march=x86 -mcpu=corei7 -mtriple=i686-pc-win32
+
+; Make sure we don't crash on this testcase.
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-apple-macosx10.9.0"
+
+define void @_ZN6VectorIfE3equIeEEvfRKS_IT_E() nounwind uwtable ssp align 2 {
+entry:
+  br i1 undef, label %while.end, label %while.body.lr.ph
+
+while.body.lr.ph:                                 ; preds = %entry
+  br label %vector.body
+
+vector.body:                                      ; preds = %vector.body, %while.body.lr.ph
+  %0 = fptrunc <8 x x86_fp80> undef to <8 x float>
+  store <8 x float> %0, <8 x float>* undef, align 4
+  br label %vector.body
+
+while.end:                                        ; preds = %entry
+  ret void
+}
diff --git a/test/CodeGen/X86/2012-12-19-NoImplicitFloat.ll b/test/CodeGen/X86/2012-12-19-NoImplicitFloat.ll
new file mode 100644
index 000000000000..302566520671
--- /dev/null
+++ b/test/CodeGen/X86/2012-12-19-NoImplicitFloat.ll
@@ -0,0 +1,17 @@
+; RUN: llc -mtriple=x86_64-apple-macosx10.8.0 -mcpu=core2 < %s | FileCheck %s
+; Test that we do not introduce vector operations with noimplicitfloat.
+; rdar://12879313
+
+%struct1 = type { i32*, i32* }
+
+define void @test() nounwind noimplicitfloat {
+entry:
+; CHECK-NOT: xmm
+; CHECK: ret
+  %0 = load %struct1** undef, align 8
+  %1 = getelementptr inbounds %struct1* %0, i64 0, i32 0
+  store i32* null, i32** %1, align 8
+  %2 = getelementptr inbounds %struct1* %0, i64 0, i32 1
+  store i32* null, i32** %2, align 8
+  ret void
+}
diff --git a/test/CodeGen/X86/2013-01-09-DAGCombineBug.ll b/test/CodeGen/X86/2013-01-09-DAGCombineBug.ll
new file mode 100644
index 000000000000..1b417e54a2f7
--- /dev/null
+++ b/test/CodeGen/X86/2013-01-09-DAGCombineBug.ll
@@ -0,0 +1,74 @@
+; RUN: llc -mtriple=x86_64-apple-macosx10.5.0 < %s
+
+; rdar://12968664
+
+define void @t() nounwind uwtable ssp {
+  br label %4
+
+; <label>:1                                       ; preds = %4, %2
+  ret void
+
+; <label>:2                                       ; preds = %6, %5, %3, %2
+  switch i32 undef, label %2 [
+    i32 1090573978, label %1
+    i32 1090573938, label %3
+    i32 1090573957, label %5
+  ]
+
+; <label>:3                                       ; preds = %4, %2
+  br i1 undef, label %2, label %4
+
+; <label>:4                                       ; preds = %6, %5, %3, %0
+  switch i32 undef, label %11 [
+    i32 1090573938, label %3
+    i32 1090573957, label %5
+    i32 1090573978, label %1
+    i32 165205179, label %6
+  ]
+
+; <label>:5                                       ; preds = %4, %2
+  br i1 undef, label %2, label %4
+
+; <label>:6                                       ; preds = %4
+  %7 = icmp eq i32 undef, 590901838
+  %8 = or i1 false, %7
+  %9 = or i1 true, %8
+  %10 = xor i1 %8, %9
+  br i1 %10, label %4, label %2
+
+; <label>:11                                      ; preds = %11, %4
+  br label %11
+}
+
+; PR15608
+@global = external constant [2 x i8]
+
+define void @PR15608() {
+bb:
+  br label %bb3
+
+bb1:                                              ; No predecessors!
+  br i1 icmp ult (i64 xor (i64 zext (i1 trunc (i192 lshr (i192 or (i192 shl (i192 zext (i64 trunc (i128 lshr (i128 trunc (i384 lshr (i384 or (i384 shl (i384 zext (i64 ptrtoint ([2 x i8]* @global to i64) to i384), i384 192), i384 425269881901436522087161771558896140289), i384 128) to i128), i128 64) to i64) to i192), i192 64), i192 1), i192 128) to i1) to i64), i64 1), i64 1), label %bb2, label %bb3
+
+bb2:                                              ; preds = %bb1
+  unreachable
+
+bb3:                                              ; preds = %bb1, %bb
+  br i1 xor (i1 trunc (i192 lshr (i192 or (i192 shl (i192 zext (i64 trunc (i128 lshr (i128 trunc (i384 lshr (i384 or (i384 shl (i384 zext (i64 ptrtoint ([2 x i8]* @global to i64) to i384), i384 192), i384 425269881901436522087161771558896140289), i384 128) to i128), i128 64) to i64) to i192), i192 64), i192 1), i192 128) to i1), i1 trunc (i192 lshr (i192 or (i192 and (i192 or (i192 shl (i192 zext (i64 trunc (i128 lshr (i128 trunc (i384 lshr (i384 or (i384 shl (i384 zext (i64 ptrtoint ([2 x i8]* @global to i64) to i384), i384 192), i384 425269881901436522087161771558896140289), i384 128) to i128), i128 64) to i64) to i192), i192 64), i192 1), i192 -340282366920938463463374607431768211457), i192 shl (i192 zext (i1 trunc (i192 lshr (i192 or (i192 shl (i192 zext (i64 trunc (i128 lshr (i128 trunc (i384 lshr (i384 or (i384 shl (i384 zext (i64 ptrtoint ([2 x i8]* @global to i64) to i384), i384 192), i384 425269881901436522087161771558896140289), i384 128) to i128), i128 64) to i64) to i192), i192 64), i192 1), i192 128) to i1) to i192), i192 128)), i192 128) to i1)), label %bb7, label %bb4
+
+bb4:                                              ; preds = %bb6, %bb3
+  %tmp = phi i1 [ true, %bb6 ], [ trunc (i192 lshr (i192 or (i192 and (i192 or (i192 shl (i192 zext (i64 trunc (i128 lshr (i128 trunc (i384 lshr (i384 or (i384 shl (i384 zext (i64 ptrtoint ([2 x i8]* @global to i64) to i384), i384 192), i384 425269881901436522087161771558896140289), i384 128) to i128), i128 64) to i64) to i192), i192 64), i192 1), i192 -340282366920938463463374607431768211457), i192 shl (i192 zext (i1 trunc (i192 lshr (i192 or (i192 shl (i192 zext (i64 trunc (i128 lshr (i128 trunc (i384 lshr (i384 or (i384 shl (i384 zext (i64 ptrtoint ([2 x i8]* @global to i64) to i384), i384 192), i384 425269881901436522087161771558896140289), i384 128) to i128), i128 64) to i64) to i192), i192 64), i192 1), i192 128) to i1) to i192), i192 128)), i192 128) to i1), %bb3 ]
+  br i1 false, label %bb8, label %bb5
+
+bb5:                                              ; preds = %bb4
+  br i1 %tmp, label %bb8, label %bb6
+
+bb6:                                              ; preds = %bb5
+  br i1 false, label %bb8, label %bb4
+
+bb7:                                              ; preds = %bb3
+  unreachable
+
+bb8:                                              ; preds = %bb6, %bb5, %bb4
+  unreachable
+}
diff --git a/test/CodeGen/X86/2013-02-12-ShuffleToZext.ll b/test/CodeGen/X86/2013-02-12-ShuffleToZext.ll
new file mode 100644
index 000000000000..614ccda5e250
--- /dev/null
+++ b/test/CodeGen/X86/2013-02-12-ShuffleToZext.ll
@@ -0,0 +1,14 @@
+; RUN: llc < %s -march=x86-64 -mcpu=corei7-avx -mtriple=x86_64-pc-win32 | FileCheck %s
+
+; CHECK: test
+; CHECK: vpmovzxwd
+; CHECK: vpmovzxwd
+define void @test(<4 x i64> %a, <4 x i16>* %buf) {
+  %ex1 = extractelement <4 x i64> %a, i32 0
+  %ex2 = extractelement <4 x i64> %a, i32 1
+  %x1 = bitcast i64 %ex1 to <4 x i16>
+  %x2 = bitcast i64 %ex2 to <4 x i16>
+  %Sh = shufflevector <4 x i16> %x1, <4 x i16> %x2, <4 x i32> <i32 0, i32 1, i32 4, i32 5>
+  store <4 x i16> %Sh, <4 x i16>* %buf, align 1
+  ret void
+}
diff --git a/test/CodeGen/X86/2013-03-13-VEX-DestReg.ll b/test/CodeGen/X86/2013-03-13-VEX-DestReg.ll
new file mode 100644
index 000000000000..03b6bdeafa87
--- /dev/null
+++ b/test/CodeGen/X86/2013-03-13-VEX-DestReg.ll
@@ -0,0 +1,31 @@
+; RUN: llc < %s -mtriple=x86_64-apple-macosx10.8.0 -mcpu=core-avx-i -show-mc-encoding
+
+; ModuleID = 'bugpoint-reduced-simplified.bc'
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-apple-macosx10.8.0"
+
+@b = external global [8 x float], align 32
+@e = external global [8 x float], align 16
+
+define void @main() #0 {
+entry:
+  %0 = load <8 x float>* bitcast ([8 x float]* @b to <8 x float>*), align 32, !tbaa !0
+  %bitcast.i = extractelement <8 x float> %0, i32 0
+  %vecinit.i.i = insertelement <4 x float> undef, float %bitcast.i, i32 0
+  %vecinit2.i.i = insertelement <4 x float> %vecinit.i.i, float 0.000000e+00, i32 1
+  %vecinit3.i.i = insertelement <4 x float> %vecinit2.i.i, float 0.000000e+00, i32 2
+  %vecinit4.i.i = insertelement <4 x float> %vecinit3.i.i, float 0.000000e+00, i32 3
+  %1 = tail call <4 x float> @llvm.x86.sse.rcp.ss(<4 x float> %vecinit4.i.i) #2
+  %vecext.i.i = extractelement <4 x float> %1, i32 0
+  store float %vecext.i.i, float* getelementptr inbounds ([8 x float]* @e, i64 0, i64 0), align 16, !tbaa !0
+  unreachable
+}
+
+declare <4 x float> @llvm.x86.sse.rcp.ss(<4 x float>) #1
+
+attributes #0 = { nounwind ssp uwtable "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf"="true" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #1 = { nounwind readnone }
+attributes #2 = { nounwind }
+
+!0 = metadata !{metadata !"omnipotent char", metadata !1}
+!1 = metadata !{metadata !"Simple C/C++ TBAA"}
diff --git a/test/CodeGen/X86/Atomics-64.ll b/test/CodeGen/X86/Atomics-64.ll
index 8e93762cec17..8b0a349a8be3 100644
--- a/test/CodeGen/X86/Atomics-64.ll
+++ b/test/CodeGen/X86/Atomics-64.ll
@@ -1,5 +1,5 @@
-; RUN: llc < %s -march=x86-64 > %t
-; RUN: llc < %s -march=x86 > %t
+; RUN: llc < %s -march=x86-64 > %t.x86-64
+; RUN: llc < %s -march=x86 > %t.x86
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
 target triple = "x86_64-apple-darwin8"
 
diff --git a/test/CodeGen/X86/DbgValueOtherTargets.test b/test/CodeGen/X86/DbgValueOtherTargets.test
new file mode 100644
index 000000000000..7b4d431c93b1
--- /dev/null
+++ b/test/CodeGen/X86/DbgValueOtherTargets.test
@@ -0,0 +1,2 @@
+RUN: llc -O0 -march=x86 -asm-verbose < %S/../Inputs/DbgValueOtherTargets.ll | FileCheck %S/../Inputs/DbgValueOtherTargets.ll
+RUN: llc -O0 -march=x86-64 -asm-verbose < %S/../Inputs/DbgValueOtherTargets.ll | FileCheck %S/../Inputs/DbgValueOtherTargets.ll
diff --git a/test/CodeGen/X86/GC/erlang-gc.ll b/test/CodeGen/X86/GC/erlang-gc.ll
new file mode 100644
index 000000000000..c55b7f6dcf61
--- /dev/null
+++ b/test/CodeGen/X86/GC/erlang-gc.ll
@@ -0,0 +1,25 @@
+; RUN: llc -mtriple=x86_64-linux-gnu < %s | FileCheck %s --check-prefix=CHECK64
+; RUN: llc -mtriple=i686-linux-gnu < %s | FileCheck %s --check-prefix=CHECK32
+
+define i32 @main(i32 %x) nounwind gc "erlang" {
+  %puts = tail call i32 @foo(i32 %x)
+  ret i32 0
+
+; CHECK64:      .section  .note.gc,"",@progbits
+; CHECK64-NEXT: .align 8
+; CHECK64-NEXT: .short 1      # safe point count
+; CHECK64-NEXT: .long  .Ltmp0 # safe point address
+; CHECK64-NEXT: .short 1      # stack frame size (in words)
+; CHECK64-NEXT: .short 0      # stack arity
+; CHECK64-NEXT: .short 0      # live root count
+
+; CHECK32:      .section  .note.gc,"",@progbits
+; CHECK32-NEXT: .align 4
+; CHECK32-NEXT: .short 1      # safe point count
+; CHECK32-NEXT: .long  .Ltmp0 # safe point address
+; CHECK32-NEXT: .short 3      # stack frame size (in words)
+; CHECK32-NEXT: .short 0      # stack arity
+; CHECK32-NEXT: .short 0      # live root count
+}
+
+declare i32 @foo(i32)
diff --git a/test/CodeGen/X86/GC/ocaml-gc.ll b/test/CodeGen/X86/GC/ocaml-gc.ll
new file mode 100644
index 000000000000..44241a90d0e7
--- /dev/null
+++ b/test/CodeGen/X86/GC/ocaml-gc.ll
@@ -0,0 +1,31 @@
+; RUN: llc < %s -mtriple=x86_64-linux-gnu | FileCheck %s
+
+define i32 @main(i32 %x) nounwind gc "ocaml" {
+; CHECK:        .text
+; CHECK-NEXT:   .globl  caml_3C_stdin_3E___code_begin
+; CHECK-NEXT: caml_3C_stdin_3E___code_begin:
+; CHECK-NEXT:   .data
+; CHECK-NEXT:   .globl  caml_3C_stdin_3E___data_begin
+; CHECK-NEXT: caml_3C_stdin_3E___data_begin:
+
+  %puts = tail call i32 @foo(i32 %x)
+  ret i32 0
+
+; CHECK:        .globl  caml_3C_stdin_3E___code_end
+; CHECK-NEXT: caml_3C_stdin_3E___code_end:
+; CHECK-NEXT:   .data
+; CHECK-NEXT:   .globl  caml_3C_stdin_3E___data_end
+; CHECK-NEXT: caml_3C_stdin_3E___data_end:
+; CHECK-NEXT:   .quad   0
+; CHECK-NEXT:   .globl  caml_3C_stdin_3E___frametable
+; CHECK-NEXT: caml_3C_stdin_3E___frametable:
+; CHECK-NEXT:   .short  1
+; CHECK-NEXT:   .align  8
+; CHECK-NEXT:                # live roots for main
+; CHECK-NEXT:   .quad   .Ltmp0
+; CHECK-NEXT:   .short  8
+; CHECK-NEXT:   .short  0
+; CHECK-NEXT:   .align  8
+}
+
+declare i32 @foo(i32)
diff --git a/test/CodeGen/X86/MachineSink-DbgValue.ll b/test/CodeGen/X86/MachineSink-DbgValue.ll
index ea791a3c083c..227ef3466e0a 100644
--- a/test/CodeGen/X86/MachineSink-DbgValue.ll
+++ b/test/CodeGen/X86/MachineSink-DbgValue.ll
@@ -26,24 +26,25 @@ bb2:
 declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
 
 !llvm.dbg.cu = !{!0}
-!llvm.dbg.sp = !{!1}
-!llvm.dbg.lv.foo = !{!6, !7, !10}
 
-!0 = metadata !{i32 589841, i32 0, i32 12, metadata !"a.c", metadata !"/private/tmp", metadata !"Apple clang version 3.0 (tags/Apple/clang-211.10.1) (based on LLVM 3.0svn)", i1 true, i1 true, metadata !"", i32 0} ; [ DW_TAG_compile_unit ]
-!1 = metadata !{i32 589870, i32 0, metadata !2, metadata !"foo", metadata !"foo", metadata !"", metadata !2, i32 2, metadata !3, i1 false, i1 true, i32 0, i32 0, i32 0, i32 256, i1 true, i32 (i32, i32*)* @foo, null, null} ; [ DW_TAG_subprogram ]
-!2 = metadata !{i32 589865, metadata !"a.c", metadata !"/private/tmp", metadata !0} ; [ DW_TAG_file_type ]
-!3 = metadata !{i32 589845, metadata !2, metadata !"", metadata !2, i32 0, i64 0, i64 0, i32 0, i32 0, i32 0, metadata !4, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
+!0 = metadata !{i32 786449, metadata !20, i32 12, metadata !"Apple clang version 3.0 (tags/Apple/clang-211.10.1) (based on LLVM 3.0svn)", i1 true, metadata !"", i32 0, null, null, metadata !18, null, null} ; [ DW_TAG_compile_unit ]
+!1 = metadata !{i32 786478, metadata !2, metadata !"foo", metadata !"foo", metadata !"", metadata !2, i32 2, metadata !3, i1 false, i1 true, i32 0, i32 0, i32 0, i32 256, i1 true, i32 (i32, i32*)* @foo, null, null, metadata !19, i32 0} ; [ DW_TAG_subprogram ]
+!2 = metadata !{i32 786473, metadata !20} ; [ DW_TAG_file_type ]
+!3 = metadata !{i32 786453, metadata !2, metadata !"", metadata !2, i32 0, i64 0, i64 0, i32 0, i32 0, i32 0, metadata !4, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
 !4 = metadata !{metadata !5}
-!5 = metadata !{i32 589860, metadata !0, metadata !"int", null, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
-!6 = metadata !{i32 590081, metadata !1, metadata !"i", metadata !2, i32 16777218, metadata !5, i32 0} ; [ DW_TAG_arg_variable ]
-!7 = metadata !{i32 590081, metadata !1, metadata !"c", metadata !2, i32 33554434, metadata !8, i32 0} ; [ DW_TAG_arg_variable ]
-!8 = metadata !{i32 589839, metadata !0, metadata !"", null, i32 0, i64 64, i64 64, i64 0, i32 0, metadata !9} ; [ DW_TAG_pointer_type ]
-!9 = metadata !{i32 589860, metadata !0, metadata !"char", null, i32 0, i64 8, i64 8, i64 0, i32 0, i32 6} ; [ DW_TAG_base_type ]
-!10 = metadata !{i32 590080, metadata !11, metadata !"a", metadata !2, i32 3, metadata !9, i32 0} ; [ DW_TAG_auto_variable ]
-!11 = metadata !{i32 589835, metadata !1, i32 2, i32 25, metadata !2, i32 0} ; [ DW_TAG_lexical_block ]
+!5 = metadata !{i32 786468, metadata !0, metadata !"int", null, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
+!6 = metadata !{i32 786689, metadata !1, metadata !"i", metadata !2, i32 16777218, metadata !5, i32 0, null} ; [ DW_TAG_arg_variable ]
+!7 = metadata !{i32 786689, metadata !1, metadata !"c", metadata !2, i32 33554434, metadata !8, i32 0, null} ; [ DW_TAG_arg_variable ]
+!8 = metadata !{i32 786447, metadata !0, metadata !"", null, i32 0, i64 64, i64 64, i64 0, i32 0, metadata !9} ; [ DW_TAG_pointer_type ]
+!9 = metadata !{i32 786468, metadata !0, metadata !"char", null, i32 0, i64 8, i64 8, i64 0, i32 0, i32 6} ; [ DW_TAG_base_type ]
+!10 = metadata !{i32 786688, metadata !11, metadata !"a", metadata !2, i32 3, metadata !9, i32 0, null} ; [ DW_TAG_auto_variable ]
+!11 = metadata !{i32 786443, metadata !20, metadata !1, i32 2, i32 25, i32 0} ; [ DW_TAG_lexical_block ]
 !12 = metadata !{i32 2, i32 13, metadata !1, null}
 !13 = metadata !{i32 2, i32 22, metadata !1, null}
 !14 = metadata !{i32 3, i32 14, metadata !11, null}
 !15 = metadata !{i32 4, i32 3, metadata !11, null}
 !16 = metadata !{i32 5, i32 5, metadata !11, null}
 !17 = metadata !{i32 7, i32 1, metadata !11, null}
+!18 = metadata !{metadata !1}
+!19 = metadata !{metadata !6, metadata !7, metadata !10}
+!20 = metadata !{metadata !"a.c", metadata !"/private/tmp"}
diff --git a/test/CodeGen/X86/MachineSink-PHIUse.ll b/test/CodeGen/X86/MachineSink-PHIUse.ll
index 33141680aa92..1329200c3e6e 100644
--- a/test/CodeGen/X86/MachineSink-PHIUse.ll
+++ b/test/CodeGen/X86/MachineSink-PHIUse.ll
@@ -1,3 +1,4 @@
+; REQUIRES: asserts
 ; RUN: llc < %s -mtriple=x86_64-appel-darwin -disable-cgp-branch-opts -stats 2>&1 | grep "machine-sink"
 
 define fastcc void @t() nounwind ssp {
diff --git a/test/CodeGen/X86/MergeConsecutiveStores.ll b/test/CodeGen/X86/MergeConsecutiveStores.ll
index 64825bac9719..bb227a0185df 100644
--- a/test/CodeGen/X86/MergeConsecutiveStores.ll
+++ b/test/CodeGen/X86/MergeConsecutiveStores.ll
@@ -40,9 +40,43 @@ define void @merge_const_store(i32 %count, %struct.A* nocapture %p) nounwind uwt
   ret void
 }
 
+; No vectors because we use noimplicitfloat
+; CHECK: merge_const_store_no_vec
+; CHECK-NOT: vmovups
+; CHECK: ret
+define void @merge_const_store_no_vec(i32 %count, %struct.B* nocapture %p) noimplicitfloat{
+  %1 = icmp sgt i32 %count, 0
+  br i1 %1, label %.lr.ph, label %._crit_edge
+.lr.ph:
+  %i.02 = phi i32 [ %10, %.lr.ph ], [ 0, %0 ]
+  %.01 = phi %struct.B* [ %11, %.lr.ph ], [ %p, %0 ]
+  %2 = getelementptr inbounds %struct.B* %.01, i64 0, i32 0
+  store i32 0, i32* %2, align 4
+  %3 = getelementptr inbounds %struct.B* %.01, i64 0, i32 1
+  store i32 0, i32* %3, align 4
+  %4 = getelementptr inbounds %struct.B* %.01, i64 0, i32 2
+  store i32 0, i32* %4, align 4
+  %5 = getelementptr inbounds %struct.B* %.01, i64 0, i32 3
+  store i32 0, i32* %5, align 4
+  %6 = getelementptr inbounds %struct.B* %.01, i64 0, i32 4
+  store i32 0, i32* %6, align 4
+  %7 = getelementptr inbounds %struct.B* %.01, i64 0, i32 5
+  store i32 0, i32* %7, align 4
+  %8 = getelementptr inbounds %struct.B* %.01, i64 0, i32 6
+  store i32 0, i32* %8, align 4
+  %9 = getelementptr inbounds %struct.B* %.01, i64 0, i32 7
+  store i32 0, i32* %9, align 4
+  %10 = add nsw i32 %i.02, 1
+  %11 = getelementptr inbounds %struct.B* %.01, i64 1
+  %exitcond = icmp eq i32 %10, %count
+  br i1 %exitcond, label %._crit_edge, label %.lr.ph
+._crit_edge:
+  ret void
+}
+
 ; Move the constants using a single vector store.
 ; CHECK: merge_const_store_vec
-; CHECK: vmovups  %ymm0, (%rsi)
+; CHECK: vmovups
 ; CHECK: ret
 define void @merge_const_store_vec(i32 %count, %struct.B* nocapture %p) nounwind uwtable noinline ssp {
   %1 = icmp sgt i32 %count, 0
@@ -303,3 +337,99 @@ block4:                                       ; preds = %4, %.lr.ph
   ret void
 }
 
+; Make sure that we merge the consecutive load/store sequence below and use a
+; word (16 bit) instead of a byte copy.
+; CHECK: MergeLoadStoreBaseIndexOffset
+; CHECK: movw    (%{{.*}},%{{.*}}), [[REG:%[a-z]+]]
+; CHECK: movw    [[REG]], (%{{.*}})
+define void @MergeLoadStoreBaseIndexOffset(i64* %a, i8* %b, i8* %c, i32 %n) {
+  br label %1
+
+; <label>:1
+  %.09 = phi i32 [ %n, %0 ], [ %11, %1 ]
+  %.08 = phi i8* [ %b, %0 ], [ %10, %1 ]
+  %.0 = phi i64* [ %a, %0 ], [ %2, %1 ]
+  %2 = getelementptr inbounds i64* %.0, i64 1
+  %3 = load i64* %.0, align 1
+  %4 = getelementptr inbounds i8* %c, i64 %3
+  %5 = load i8* %4, align 1
+  %6 = add i64 %3, 1
+  %7 = getelementptr inbounds i8* %c, i64 %6
+  %8 = load i8* %7, align 1
+  store i8 %5, i8* %.08, align 1
+  %9 = getelementptr inbounds i8* %.08, i64 1
+  store i8 %8, i8* %9, align 1
+  %10 = getelementptr inbounds i8* %.08, i64 2
+  %11 = add nsw i32 %.09, -1
+  %12 = icmp eq i32 %11, 0
+  br i1 %12, label %13, label %1
+
+; <label>:13
+  ret void
+}
+
+; Make sure that we merge the consecutive load/store sequence below and use a
+; word (16 bit) instead of a byte copy even if there are intermediate sign
+; extensions.
+; CHECK: MergeLoadStoreBaseIndexOffsetSext
+; CHECK: movw    (%{{.*}},%{{.*}}), [[REG:%[a-z]+]]
+; CHECK: movw    [[REG]], (%{{.*}})
+define void @MergeLoadStoreBaseIndexOffsetSext(i8* %a, i8* %b, i8* %c, i32 %n) {
+  br label %1
+
+; <label>:1
+  %.09 = phi i32 [ %n, %0 ], [ %12, %1 ]
+  %.08 = phi i8* [ %b, %0 ], [ %11, %1 ]
+  %.0 = phi i8* [ %a, %0 ], [ %2, %1 ]
+  %2 = getelementptr inbounds i8* %.0, i64 1
+  %3 = load i8* %.0, align 1
+  %4 = sext i8 %3 to i64
+  %5 = getelementptr inbounds i8* %c, i64 %4
+  %6 = load i8* %5, align 1
+  %7 = add i64 %4, 1
+  %8 = getelementptr inbounds i8* %c, i64 %7
+  %9 = load i8* %8, align 1
+  store i8 %6, i8* %.08, align 1
+  %10 = getelementptr inbounds i8* %.08, i64 1
+  store i8 %9, i8* %10, align 1
+  %11 = getelementptr inbounds i8* %.08, i64 2
+  %12 = add nsw i32 %.09, -1
+  %13 = icmp eq i32 %12, 0
+  br i1 %13, label %14, label %1
+
+; <label>:14
+  ret void
+}
+
+; However, we can only merge ignore sign extensions when they are on all memory
+; computations;
+; CHECK: loadStoreBaseIndexOffsetSextNoSex
+; CHECK-NOT: movw    (%{{.*}},%{{.*}}), [[REG:%[a-z]+]]
+; CHECK-NOT: movw    [[REG]], (%{{.*}})
+define void @loadStoreBaseIndexOffsetSextNoSex(i8* %a, i8* %b, i8* %c, i32 %n) {
+  br label %1
+
+; <label>:1
+  %.09 = phi i32 [ %n, %0 ], [ %12, %1 ]
+  %.08 = phi i8* [ %b, %0 ], [ %11, %1 ]
+  %.0 = phi i8* [ %a, %0 ], [ %2, %1 ]
+  %2 = getelementptr inbounds i8* %.0, i64 1
+  %3 = load i8* %.0, align 1
+  %4 = sext i8 %3 to i64
+  %5 = getelementptr inbounds i8* %c, i64 %4
+  %6 = load i8* %5, align 1
+  %7 = add i8 %3, 1
+  %wrap.4 = sext i8 %7 to i64
+  %8 = getelementptr inbounds i8* %c, i64 %wrap.4
+  %9 = load i8* %8, align 1
+  store i8 %6, i8* %.08, align 1
+  %10 = getelementptr inbounds i8* %.08, i64 1
+  store i8 %9, i8* %10, align 1
+  %11 = getelementptr inbounds i8* %.08, i64 2
+  %12 = add nsw i32 %.09, -1
+  %13 = icmp eq i32 %12, 0
+  br i1 %13, label %14, label %1
+
+; <label>:14
+  ret void
+}
diff --git a/test/CodeGen/X86/WidenArith.ll b/test/CodeGen/X86/WidenArith.ll
new file mode 100644
index 000000000000..0383bd665b0f
--- /dev/null
+++ b/test/CodeGen/X86/WidenArith.ll
@@ -0,0 +1,23 @@
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=corei7-avx | FileCheck %s
+
+;CHECK: test
+;CHECK: vaddps
+;CHECK: vmulps
+;CHECK: vsubps
+;CHECK: vcmpltps
+;CHECK: vcmpltps
+;CHECK: vandps
+;CHECK: vandps
+;CHECK: ret
+define <8 x i32> @test(<8 x float> %a, <8 x float> %b) {
+ %c1 = fadd <8 x float> %a, %b
+ %b1 = fmul <8 x float> %b, %a
+ %d  = fsub <8 x float> %b1, %c1
+ %res1 = fcmp olt <8 x float> %a, %b1
+ %res2 = fcmp olt <8 x float> %c1, %d
+ %andr = and <8 x i1>%res1, %res2
+ %ex = zext <8 x i1> %andr to <8 x i32>
+ ret <8 x i32>%ex
+}
+
+
diff --git a/test/CodeGen/X86/atom-bypass-slow-division-64.ll b/test/CodeGen/X86/atom-bypass-slow-division-64.ll
new file mode 100644
index 000000000000..a3bbea3c996b
--- /dev/null
+++ b/test/CodeGen/X86/atom-bypass-slow-division-64.ll
@@ -0,0 +1,46 @@
+; RUN: llc < %s -mcpu=atom -mtriple=i686-linux -march=x86-64 | FileCheck %s
+
+; Additional tests for 64-bit divide bypass
+
+define i64 @Test_get_quotient(i64 %a, i64 %b) nounwind {
+; CHECK: Test_get_quotient:
+; CHECK: orq %rsi, %rcx
+; CHECK-NEXT: testq $-65536, %rcx
+; CHECK-NEXT: je
+; CHECK: idivq
+; CHECK: ret
+; CHECK: divw
+; CHECK: ret
+  %result = sdiv i64 %a, %b
+  ret i64 %result
+}
+
+define i64 @Test_get_remainder(i64 %a, i64 %b) nounwind {
+; CHECK: Test_get_remainder:
+; CHECK: orq %rsi, %rcx
+; CHECK-NEXT: testq $-65536, %rcx
+; CHECK-NEXT: je
+; CHECK: idivq
+; CHECK: ret
+; CHECK: divw
+; CHECK: ret
+  %result = srem i64 %a, %b
+  ret i64 %result
+}
+
+define i64 @Test_get_quotient_and_remainder(i64 %a, i64 %b) nounwind {
+; CHECK: Test_get_quotient_and_remainder:
+; CHECK: orq %rsi, %rcx
+; CHECK-NEXT: testq $-65536, %rcx
+; CHECK-NEXT: je
+; CHECK: idivq
+; CHECK: divw
+; CHECK: addq
+; CHECK: ret
+; CHECK-NOT: idivq
+; CHECK-NOT: divw
+  %resultdiv = sdiv i64 %a, %b
+  %resultrem = srem i64 %a, %b
+  %result = add i64 %resultdiv, %resultrem
+  ret i64 %result
+}
diff --git a/test/CodeGen/X86/atom-bypass-slow-division.ll b/test/CodeGen/X86/atom-bypass-slow-division.ll
index e7c9605d3e88..4612940445cb 100644
--- a/test/CodeGen/X86/atom-bypass-slow-division.ll
+++ b/test/CodeGen/X86/atom-bypass-slow-division.ll
@@ -1,7 +1,7 @@
-; RUN: llc < %s -mcpu=atom -mtriple=i686-linux  | FileCheck %s
+; RUN: llc < %s -mcpu=atom -mtriple=i686-linux | FileCheck %s
 
-define i32 @test_get_quotient(i32 %a, i32 %b) nounwind {
-; CHECK: test_get_quotient
+define i32 @Test_get_quotient(i32 %a, i32 %b) nounwind {
+; CHECK: Test_get_quotient:
 ; CHECK: orl %ecx, %edx
 ; CHECK-NEXT: testl $-256, %edx
 ; CHECK-NEXT: je
@@ -13,8 +13,8 @@ define i32 @test_get_quotient(i32 %a, i32 %b) nounwind {
   ret i32 %result
 }
 
-define i32 @test_get_remainder(i32 %a, i32 %b) nounwind {
-; CHECK: test_get_remainder
+define i32 @Test_get_remainder(i32 %a, i32 %b) nounwind {
+; CHECK: Test_get_remainder:
 ; CHECK: orl %ecx, %edx
 ; CHECK-NEXT: testl $-256, %edx
 ; CHECK-NEXT: je
@@ -26,8 +26,8 @@ define i32 @test_get_remainder(i32 %a, i32 %b) nounwind {
   ret i32 %result
 }
 
-define i32 @test_get_quotient_and_remainder(i32 %a, i32 %b) nounwind {
-; CHECK: test_get_quotient_and_remainder
+define i32 @Test_get_quotient_and_remainder(i32 %a, i32 %b) nounwind {
+; CHECK: Test_get_quotient_and_remainder:
 ; CHECK: orl %ecx, %edx
 ; CHECK-NEXT: testl $-256, %edx
 ; CHECK-NEXT: je
@@ -35,7 +35,7 @@ define i32 @test_get_quotient_and_remainder(i32 %a, i32 %b) nounwind {
 ; CHECK: divb
 ; CHECK: addl
 ; CHECK: ret
-; CEECK-NOT: idivl
+; CHECK-NOT: idivl
 ; CHECK-NOT: divb
   %resultdiv = sdiv i32 %a, %b
   %resultrem = srem i32 %a, %b
@@ -43,8 +43,8 @@ define i32 @test_get_quotient_and_remainder(i32 %a, i32 %b) nounwind {
   ret i32 %result
 }
 
-define i32 @test_use_div_and_idiv(i32 %a, i32 %b) nounwind {
-; CHECK: test_use_div_and_idiv
+define i32 @Test_use_div_and_idiv(i32 %a, i32 %b) nounwind {
+; CHECK: Test_use_div_and_idiv:
 ; CHECK: idivl
 ; CHECK: divb
 ; CHECK: divl
@@ -57,34 +57,34 @@ define i32 @test_use_div_and_idiv(i32 %a, i32 %b) nounwind {
   ret i32 %result
 }
 
-define i32 @test_use_div_imm_imm() nounwind {
-; CHECK: test_use_div_imm_imm
+define i32 @Test_use_div_imm_imm() nounwind {
+; CHECK: Test_use_div_imm_imm:
 ; CHECK: movl $64
   %resultdiv = sdiv i32 256, 4
   ret i32 %resultdiv
 }
 
-define i32 @test_use_div_reg_imm(i32 %a) nounwind {
-; CHECK: test_use_div_reg_imm
-; CEHCK-NOT: test
+define i32 @Test_use_div_reg_imm(i32 %a) nounwind {
+; CHECK: Test_use_div_reg_imm:
+; CHECK-NOT: test
 ; CHECK-NOT: idiv
 ; CHECK-NOT: divb
   %resultdiv = sdiv i32 %a, 33
   ret i32 %resultdiv
 }
 
-define i32 @test_use_rem_reg_imm(i32 %a) nounwind {
-; CHECK: test_use_rem_reg_imm
-; CEHCK-NOT: test
+define i32 @Test_use_rem_reg_imm(i32 %a) nounwind {
+; CHECK: Test_use_rem_reg_imm:
+; CHECK-NOT: test
 ; CHECK-NOT: idiv
 ; CHECK-NOT: divb
   %resultrem = srem i32 %a, 33
   ret i32 %resultrem
 }
 
-define i32 @test_use_divrem_reg_imm(i32 %a) nounwind {
-; CHECK: test_use_divrem_reg_imm
-; CEHCK-NOT: test
+define i32 @Test_use_divrem_reg_imm(i32 %a) nounwind {
+; CHECK: Test_use_divrem_reg_imm:
+; CHECK-NOT: test
 ; CHECK-NOT: idiv
 ; CHECK-NOT: divb
   %resultdiv = sdiv i32 %a, 33
@@ -93,8 +93,8 @@ define i32 @test_use_divrem_reg_imm(i32 %a) nounwind {
   ret i32 %result
 }
 
-define i32 @test_use_div_imm_reg(i32 %a) nounwind {
-; CHECK: test_use_div_imm_reg
+define i32 @Test_use_div_imm_reg(i32 %a) nounwind {
+; CHECK: Test_use_div_imm_reg:
 ; CHECK: test
 ; CHECK: idiv
 ; CHECK: divb
@@ -102,8 +102,8 @@ define i32 @test_use_div_imm_reg(i32 %a) nounwind {
   ret i32 %resultdiv
 }
 
-define i32 @test_use_rem_imm_reg(i32 %a) nounwind {
-; CHECK: test_use_rem_imm_reg
+define i32 @Test_use_rem_imm_reg(i32 %a) nounwind {
+; CHECK: Test_use_rem_imm_reg:
 ; CHECK: test
 ; CHECK: idiv
 ; CHECK: divb
diff --git a/test/CodeGen/X86/atom-call-reg-indirect-foldedreload32.ll b/test/CodeGen/X86/atom-call-reg-indirect-foldedreload32.ll
new file mode 100644
index 000000000000..2a34e0298f30
--- /dev/null
+++ b/test/CodeGen/X86/atom-call-reg-indirect-foldedreload32.ll
@@ -0,0 +1,77 @@
+; RUN: llc < %s -mtriple=i386-linux-gnu -mcpu=atom 2>&1 | \
+; RUN:     grep "calll" | not grep "("
+; RUN: llc < %s -mtriple=i386-linux-gnu -mcpu=core2 2>&1 | \
+; RUN:     grep "calll" | grep "*funcp"
+;
+; original source code built with clang -S -emit-llvm -M32 test32.c:
+;
+;   int a, b, c, d, e, f, g, h, i, j;
+;   extern int (*funcp)(int, int, int, int, int, int, int, int);
+;   extern int sum;
+;   
+;   void func()
+;   {
+;     sum = 0;
+;     for( i = a; i < b; ++i )
+;     {
+;       sum += (*funcp)(i, b, c, d, e, f, g, h);
+;     }
+;   }
+;
+; ModuleID = 'test32.c'
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:32:32-n8:16:32-S128"
+target triple = "i386-unknown-linux-gnu"
+
+@sum = external global i32
+@a = common global i32 0, align 4
+@i = common global i32 0, align 4
+@b = common global i32 0, align 4
+@funcp = external global i32 (i32, i32, i32, i32, i32, i32, i32, i32)*
+@c = common global i32 0, align 4
+@d = common global i32 0, align 4
+@e = common global i32 0, align 4
+@f = common global i32 0, align 4
+@g = common global i32 0, align 4
+@h = common global i32 0, align 4
+@j = common global i32 0, align 4
+
+define void @func() #0 {
+entry:
+  store i32 0, i32* @sum, align 4
+  %0 = load i32* @a, align 4
+  store i32 %0, i32* @i, align 4
+  br label %for.cond
+
+for.cond:                                         ; preds = %for.inc, %entry
+  %1 = load i32* @i, align 4
+  %2 = load i32* @b, align 4
+  %cmp = icmp slt i32 %1, %2
+  br i1 %cmp, label %for.body, label %for.end
+
+for.body:                                         ; preds = %for.cond
+  %3 = load i32 (i32, i32, i32, i32, i32, i32, i32, i32)** @funcp, align 4
+  %4 = load i32* @i, align 4
+  %5 = load i32* @b, align 4
+  %6 = load i32* @c, align 4
+  %7 = load i32* @d, align 4
+  %8 = load i32* @e, align 4
+  %9 = load i32* @f, align 4
+  %10 = load i32* @g, align 4
+  %11 = load i32* @h, align 4
+  %call = call i32 %3(i32 %4, i32 %5, i32 %6, i32 %7, i32 %8, i32 %9, i32 %10, i32 %11)
+  %12 = load i32* @sum, align 4
+  %add = add nsw i32 %12, %call
+  store i32 %add, i32* @sum, align 4
+  br label %for.inc
+
+for.inc:                                          ; preds = %for.body
+  %13 = load i32* @i, align 4
+  %inc = add nsw i32 %13, 1
+  store i32 %inc, i32* @i, align 4
+  br label %for.cond
+
+for.end:                                          ; preds = %for.cond
+  ret void
+}
+
+attributes #0 = { nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-frame-pointer-elim-non-leaf"="true" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "unsafe-fp-math"="false" "use-soft-float"="false" }
diff --git a/test/CodeGen/X86/atom-call-reg-indirect-foldedreload64.ll b/test/CodeGen/X86/atom-call-reg-indirect-foldedreload64.ll
new file mode 100644
index 000000000000..bcfbd6107a56
--- /dev/null
+++ b/test/CodeGen/X86/atom-call-reg-indirect-foldedreload64.ll
@@ -0,0 +1,91 @@
+; RUN: llc < %s -mtriple=x86_64-linux-gnu -mcpu=atom 2>&1 | \
+; RUN:     grep "callq" | not grep "("
+; RUN: llc < %s -mtriple=x86_64-linux-gnu -mcpu=core2 2>&1 | \
+; RUN:     grep "callq" | grep "*funcp"
+;
+; Original source code built with clang -S -emit-llvm -m64 test64.c:
+;   int a, b, c, d, e, f, g, h, i, j, k, l, m, n;
+;   extern int (*funcp)(int, int, int, int, int, int,
+;                       int, int, int, int, int, int,
+;                       int, int);
+;   extern int sum;
+;   
+;   void func()
+;   {
+;     sum = 0;
+;     for( i = a; i < b; ++i )
+;     {
+;       sum += (*funcp)(a, i, i*2, i/b, c, d, e, f, g, h, j, k, l, n);
+;     }
+;   }
+;   
+; ModuleID = 'test64.c'
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+@sum = external global i32
+@a = common global i32 0, align 4
+@i = common global i32 0, align 4
+@b = common global i32 0, align 4
+@funcp = external global i32 (i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32)*
+@c = common global i32 0, align 4
+@d = common global i32 0, align 4
+@e = common global i32 0, align 4
+@f = common global i32 0, align 4
+@g = common global i32 0, align 4
+@h = common global i32 0, align 4
+@j = common global i32 0, align 4
+@k = common global i32 0, align 4
+@l = common global i32 0, align 4
+@n = common global i32 0, align 4
+@m = common global i32 0, align 4
+
+define void @func() #0 {
+entry:
+  store i32 0, i32* @sum, align 4
+  %0 = load i32* @a, align 4
+  store i32 %0, i32* @i, align 4
+  br label %for.cond
+
+for.cond:                                         ; preds = %for.inc, %entry
+  %1 = load i32* @i, align 4
+  %2 = load i32* @b, align 4
+  %cmp = icmp slt i32 %1, %2
+  br i1 %cmp, label %for.body, label %for.end
+
+for.body:                                         ; preds = %for.cond
+  %3 = load i32 (i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32)** @funcp, align 8
+  %4 = load i32* @a, align 4
+  %5 = load i32* @i, align 4
+  %6 = load i32* @i, align 4
+  %mul = mul nsw i32 %6, 2
+  %7 = load i32* @i, align 4
+  %8 = load i32* @b, align 4
+  %div = sdiv i32 %7, %8
+  %9 = load i32* @c, align 4
+  %10 = load i32* @d, align 4
+  %11 = load i32* @e, align 4
+  %12 = load i32* @f, align 4
+  %13 = load i32* @g, align 4
+  %14 = load i32* @h, align 4
+  %15 = load i32* @j, align 4
+  %16 = load i32* @k, align 4
+  %17 = load i32* @l, align 4
+  %18 = load i32* @n, align 4
+  %call = call i32 %3(i32 %4, i32 %5, i32 %mul, i32 %div, i32 %9, i32 %10, i32 %11, i32 %12, i32 %13, i32 %14, i32 %15, i32 %16, i32 %17, i32 %18)
+  %19 = load i32* @sum, align 4
+  %add = add nsw i32 %19, %call
+  store i32 %add, i32* @sum, align 4
+  br label %for.inc
+
+for.inc:                                          ; preds = %for.body
+  %20 = load i32* @i, align 4
+  %inc = add nsw i32 %20, 1
+  store i32 %inc, i32* @i, align 4
+  br label %for.cond
+
+for.end:                                          ; preds = %for.cond
+  ret void
+}
+
+attributes #0 = { nounwind uwtable "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-frame-pointer-elim-non-leaf"="true" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "unsafe-fp-math"="false" "use-soft-float"="false" }
diff --git a/test/CodeGen/X86/atom-call-reg-indirect.ll b/test/CodeGen/X86/atom-call-reg-indirect.ll
new file mode 100644
index 000000000000..632781130d06
--- /dev/null
+++ b/test/CodeGen/X86/atom-call-reg-indirect.ll
@@ -0,0 +1,45 @@
+; RUN: llc < %s -mcpu=atom -mtriple=i686-linux  | FileCheck -check-prefix=ATOM32 %s
+; RUN: llc < %s -mcpu=core2 -mtriple=i686-linux | FileCheck -check-prefix=ATOM-NOT32 %s
+; RUN: llc < %s -mcpu=atom -mtriple=x86_64-linux  | FileCheck -check-prefix=ATOM64 %s
+; RUN: llc < %s -mcpu=core2 -mtriple=x86_64-linux | FileCheck -check-prefix=ATOM-NOT64 %s
+
+
+; fn_ptr.ll
+%class.A = type { i32 (...)** }
+
+define i32 @test1() #0 {
+  ;ATOM: test1
+entry:
+  %call = tail call %class.A* @_Z3facv()
+  %0 = bitcast %class.A* %call to void (%class.A*)***
+  %vtable = load void (%class.A*)*** %0, align 8
+  %1 = load void (%class.A*)** %vtable, align 8
+  ;ATOM32: movl (%ecx), %ecx
+  ;ATOM32: calll *%ecx
+  ;ATOM-NOT32: calll *(%ecx)
+  ;ATOM64: movq (%rcx), %rcx
+  ;ATOM64: callq *%rcx
+  ;ATOM-NOT64: callq *(%rcx)
+  tail call void %1(%class.A* %call)
+  ret i32 0
+}
+
+declare %class.A* @_Z3facv() #1
+
+; virt_fn.ll
+@p = external global void (i32)**
+
+define i32 @test2() #0 {
+  ;ATOM: test2
+entry:
+  %0 = load void (i32)*** @p, align 8
+  %1 = load void (i32)** %0, align 8
+  ;ATOM32: movl (%eax), %eax
+  ;ATOM32: calll *%eax
+  ;ATOM-NOT: calll *(%eax)
+  ;ATOM64: movq (%rax), %rax
+  ;ATOM64: callq *%rax
+  ;ATOM-NOT64: callq *(%rax)
+  tail call void %1(i32 2)
+  ret i32 0
+}
diff --git a/test/CodeGen/X86/atom-pad-short-functions.ll b/test/CodeGen/X86/atom-pad-short-functions.ll
new file mode 100644
index 000000000000..b9a39e08cb51
--- /dev/null
+++ b/test/CodeGen/X86/atom-pad-short-functions.ll
@@ -0,0 +1,103 @@
+; RUN: llc < %s -O1 -mcpu=atom -mtriple=i686-linux  | FileCheck %s
+
+declare void @external_function(...)
+
+define i32 @test_return_val(i32 %a) nounwind {
+; CHECK: test_return_val
+; CHECK: movl
+; CHECK: nop
+; CHECK: nop
+; CHECK: nop
+; CHECK: nop
+; CHECK: nop
+; CHECK: nop
+; CHECK: ret
+  ret i32 %a
+}
+
+define i32 @test_optsize(i32 %a) nounwind optsize {
+; CHECK: test_optsize
+; CHECK: movl
+; CHECK-NEXT: ret
+  ret i32 %a
+}
+
+define i32 @test_minsize(i32 %a) nounwind minsize {
+; CHECK: test_minsize
+; CHECK: movl
+; CHECK-NEXT: ret
+  ret i32 %a
+}
+
+define i32 @test_add(i32 %a, i32 %b) nounwind {
+; CHECK: test_add
+; CHECK: addl
+; CHECK: nop
+; CHECK: nop
+; CHECK: nop
+; CHECK: nop
+; CHECK: ret
+  %result = add i32 %a, %b
+  ret i32 %result
+}
+
+define i32 @test_multiple_ret(i32 %a, i32 %b, i1 %c) nounwind {
+; CHECK: @test_multiple_ret
+; CHECK: je
+
+; CHECK: nop
+; CHECK: nop
+; CHECK: ret
+
+; CHECK: nop
+; CHECK: nop
+; CHECK: ret
+
+  br i1 %c, label %bb1, label %bb2
+
+bb1:
+  ret i32 %a
+
+bb2:
+  ret i32 %b
+}
+
+define void @test_call_others(i32 %x) nounwind
+{
+; CHECK: test_call_others
+; CHECK: je
+  %tobool = icmp eq i32 %x, 0
+  br i1 %tobool, label %if.end, label %true.case
+
+; CHECK: jmp external_function
+true.case:
+  tail call void bitcast (void (...)* @external_function to void ()*)() nounwind
+  br label %if.end
+
+; CHECK: nop
+; CHECK: nop
+; CHECK: nop
+; CHECK: nop
+; CHECK: ret
+if.end:
+  ret void
+
+}
+
+define void @test_branch_to_same_bb(i32 %x, i32 %y) nounwind {
+; CHECK: @test_branch_to_same_bb
+  %cmp = icmp sgt i32 %x, 0
+  br i1 %cmp, label %while.cond, label %while.end
+
+while.cond:
+  br label %while.cond
+
+; CHECK: nop
+; CHECK: nop
+; CHECK: nop
+; CHECK: nop
+; CHECK: ret
+while.end:
+  ret void
+}
+
diff --git a/test/CodeGen/X86/atomic-dagsched.ll b/test/CodeGen/X86/atomic-dagsched.ll
new file mode 100644
index 000000000000..0e7cf8c09668
--- /dev/null
+++ b/test/CodeGen/X86/atomic-dagsched.ll
@@ -0,0 +1,110 @@
+; RUN: llc < %s -march=x86-64 -mcpu=corei7 -verify-machineinstrs | FileCheck %s
+
+define void @test(i8** %a, i64* %b, i64 %c, i64 %d) nounwind {
+entry:
+  %ptrtoarg4 = load i8** %a, align 8
+  %brglist1 = getelementptr i8** %a, i64 1
+  %ptrtoarg25 = load i8** %brglist1, align 8
+  %0 = load i64* %b, align 8
+  %1 = mul i64 %0, 4
+  %scevgep = getelementptr i8* %ptrtoarg25, i64 %1
+  %2 = mul i64 %d, 4
+  br label %loop.cond
+
+loop.cond:                                        ; preds = %test.exit, %entry
+  %asr.iv6 = phi i8* [ %29, %test.exit ], [ %scevgep, %entry ]
+  %iv = phi i64 [ %0, %entry ], [ %28, %test.exit ]
+  %3 = icmp eq i64 %iv, %c
+  br i1 %3, label %return, label %loop
+
+loop:                                             ; preds = %loop.cond
+  %4 = load i64* addrspace(256)* inttoptr (i64 264 to i64* addrspace(256)*), align 8, !tbaa !0
+  %5 = load i64* %4, align 8, !tbaa !3
+  %vector.size.i = ashr i64 %5, 3
+  %num.vector.wi.i = shl i64 %vector.size.i, 3
+  %6 = icmp eq i64 %vector.size.i, 0
+  br i1 %6, label %scalarIf.i, label %dim_0_vector_pre_head.i
+
+dim_0_vector_pre_head.i:                          ; preds = %loop
+  %7 = trunc i64 %5 to i32
+  %tempvector_func.i = insertelement <8 x i32> undef, i32 %7, i32 0
+  %vectorvector_func.i = shufflevector <8 x i32> %tempvector_func.i, <8 x i32> undef, <8 x i32> zeroinitializer
+  br label %vector_kernel_entry.i
+
+vector_kernel_entry.i:                            ; preds = %vector_kernel_entry.i, %dim_0_vector_pre_head.i
+  %asr.iv9 = phi i8* [ %scevgep10, %vector_kernel_entry.i ], [ %asr.iv6, %dim_0_vector_pre_head.i ]
+  %asr.iv = phi i64 [ %asr.iv.next, %vector_kernel_entry.i ], [ %vector.size.i, %dim_0_vector_pre_head.i ]
+  %8 = bitcast i8* %ptrtoarg4 to i32 addrspace(1)*
+  %asr.iv911 = bitcast i8* %asr.iv9 to <8 x i32> addrspace(1)*
+  %9 = load <8 x i32> addrspace(1)* %asr.iv911, align 4
+  %extract8vector_func.i = extractelement <8 x i32> %9, i32 0
+  %extract9vector_func.i = extractelement <8 x i32> %9, i32 1
+  %extract10vector_func.i = extractelement <8 x i32> %9, i32 2
+  %extract11vector_func.i = extractelement <8 x i32> %9, i32 3
+  %extract12vector_func.i = extractelement <8 x i32> %9, i32 4
+  %extract13vector_func.i = extractelement <8 x i32> %9, i32 5
+  %extract14vector_func.i = extractelement <8 x i32> %9, i32 6
+  %extract15vector_func.i = extractelement <8 x i32> %9, i32 7
+  %10 = atomicrmw min i32 addrspace(1)* %8, i32 %extract8vector_func.i seq_cst
+  %11 = atomicrmw min i32 addrspace(1)* %8, i32 %extract9vector_func.i seq_cst
+  %12 = atomicrmw min i32 addrspace(1)* %8, i32 %extract10vector_func.i seq_cst
+  %13 = atomicrmw min i32 addrspace(1)* %8, i32 %extract11vector_func.i seq_cst
+  %14 = atomicrmw min i32 addrspace(1)* %8, i32 %extract12vector_func.i seq_cst
+  %15 = atomicrmw min i32 addrspace(1)* %8, i32 %extract13vector_func.i seq_cst
+  %16 = atomicrmw min i32 addrspace(1)* %8, i32 %extract14vector_func.i seq_cst
+  %17 = atomicrmw min i32 addrspace(1)* %8, i32 %extract15vector_func.i seq_cst
+  store <8 x i32> %vectorvector_func.i, <8 x i32> addrspace(1)* %asr.iv911, align 4
+  %asr.iv.next = add i64 %asr.iv, -1
+  %scevgep10 = getelementptr i8* %asr.iv9, i64 32
+  %dim_0_vector_cmp.to.max.i = icmp eq i64 %asr.iv.next, 0
+  br i1 %dim_0_vector_cmp.to.max.i, label %scalarIf.i, label %vector_kernel_entry.i
+
+scalarIf.i:                                       ; preds = %vector_kernel_entry.i, %loop
+  %exec_wi.i = phi i64 [ 0, %loop ], [ %num.vector.wi.i, %vector_kernel_entry.i ]
+  %18 = icmp eq i64 %exec_wi.i, %5
+  br i1 %18, label %test.exit, label %dim_0_pre_head.i
+
+dim_0_pre_head.i:                                 ; preds = %scalarIf.i
+  %19 = load i64* addrspace(256)* inttoptr (i64 264 to i64* addrspace(256)*), align 8, !tbaa !0
+  %20 = load i64* %19, align 8, !tbaa !3
+  %21 = trunc i64 %20 to i32
+  %22 = mul i64 %vector.size.i, 8
+  br label %scalar_kernel_entry.i
+
+scalar_kernel_entry.i:                            ; preds = %scalar_kernel_entry.i, %dim_0_pre_head.i
+  %asr.iv12 = phi i64 [ %asr.iv.next13, %scalar_kernel_entry.i ], [ %22, %dim_0_pre_head.i ]
+  %23 = bitcast i8* %asr.iv6 to i32 addrspace(1)*
+  %24 = bitcast i8* %ptrtoarg4 to i32 addrspace(1)*
+  %scevgep16 = getelementptr i32 addrspace(1)* %23, i64 %asr.iv12
+  %25 = load i32 addrspace(1)* %scevgep16, align 4, !tbaa !4
+  %26 = atomicrmw min i32 addrspace(1)* %24, i32 %25 seq_cst
+  %scevgep15 = getelementptr i32 addrspace(1)* %23, i64 %asr.iv12
+  store i32 %21, i32 addrspace(1)* %scevgep15, align 4, !tbaa !4
+  %asr.iv.next13 = add i64 %asr.iv12, 1
+  %dim_0_cmp.to.max.i = icmp eq i64 %5, %asr.iv.next13
+  br i1 %dim_0_cmp.to.max.i, label %test.exit, label %scalar_kernel_entry.i
+
+test.exit:                     ; preds = %scalar_kernel_entry.i, %scalarIf.i
+  %27 = bitcast i8* %asr.iv6 to i1*
+  %28 = add i64 %iv, %d
+  store i64 %28, i64* %b, align 8
+  %scevgep8 = getelementptr i1* %27, i64 %2
+  %29 = bitcast i1* %scevgep8 to i8*
+  br label %loop.cond
+
+return:                                           ; preds = %loop.cond
+  store i64 %0, i64* %b, align 8
+  ret void
+}
+
+!0 = metadata !{metadata !"any pointer", metadata !1}
+!1 = metadata !{metadata !"omnipotent char", metadata !2}
+!2 = metadata !{metadata !"Simple C/C++ TBAA"}
+!3 = metadata !{metadata !"long", metadata !1}
+!4 = metadata !{metadata !"int", metadata !1}
+
+; CHECK: test
+; CHECK: decq
+; CHECK-NOT: cmpxchgl
+; CHECK: jne
+; CHECK: ret
diff --git a/test/CodeGen/X86/atomic-load-store-wide.ll b/test/CodeGen/X86/atomic-load-store-wide.ll
index a9ebfef2ebeb..17e04f059034 100644
--- a/test/CodeGen/X86/atomic-load-store-wide.ll
+++ b/test/CodeGen/X86/atomic-load-store-wide.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=x86 | FileCheck %s
+; RUN: llc < %s -march=x86 -verify-machineinstrs | FileCheck %s
 
 ; 64-bit load/store on x86-32
 ; FIXME: The generated code can be substantially improved.
diff --git a/test/CodeGen/X86/atomic-load-store.ll b/test/CodeGen/X86/atomic-load-store.ll
index fee45859c16a..86a744ed00f0 100644
--- a/test/CodeGen/X86/atomic-load-store.ll
+++ b/test/CodeGen/X86/atomic-load-store.ll
@@ -1,5 +1,5 @@
 ; RUN: llc < %s -mtriple=x86_64-apple-macosx10.7.0 -verify-machineinstrs | FileCheck %s
-; RUN: llc < %s -mtriple=x86_64-apple-macosx10.7.0 -O0 | FileCheck %s
+; RUN: llc < %s -mtriple=x86_64-apple-macosx10.7.0 -verify-machineinstrs -O0 | FileCheck %s
 
 define void @test1(i32* %ptr, i32 %val1) {
 ; CHECK: test1
diff --git a/test/CodeGen/X86/atomic-minmax-i6432.ll b/test/CodeGen/X86/atomic-minmax-i6432.ll
index e3ef605f7f1c..62f784f69608 100644
--- a/test/CodeGen/X86/atomic-minmax-i6432.ll
+++ b/test/CodeGen/X86/atomic-minmax-i6432.ll
@@ -1,5 +1,6 @@
-; RUN: llc -march=x86 -mattr=+cmov -mtriple=i386-pc-linux < %s | FileCheck %s -check-prefix=LINUX
-; RUN: llc -march=x86 -mtriple=i386-macosx -relocation-model=pic < %s | FileCheck %s -check-prefix=PIC
+; RUN: llc -march=x86 -mattr=+cmov -mtriple=i386-pc-linux -verify-machineinstrs < %s | FileCheck %s -check-prefix=LINUX
+; RUN: llc -march=x86 -mattr=-cmov -mtriple=i386-pc-linux -verify-machineinstrs < %s | FileCheck %s -check-prefix=NOCMOV
+; RUN: llc -march=x86 -mtriple=i386-macosx -relocation-model=pic -verify-machineinstrs < %s | FileCheck %s -check-prefix=PIC
 
 @sc64 = external global i64
 
@@ -16,6 +17,16 @@ define void @atomic_maxmin_i6432() {
 ; LINUX: lock
 ; LINUX-NEXT: cmpxchg8b
 ; LINUX: jne [[LABEL]]
+; NOCMOV: [[LABEL:.LBB[0-9]+_[0-9]+]]
+; NOCMOV: cmpl
+; NOCMOV: setl
+; NOCMOV: cmpl
+; NOCMOV: setl
+; NOCMOV: jne
+; NOCMOV: jne
+; NOCMOV: lock
+; NOCMOV-NEXT: cmpxchg8b
+; NOCMOV: jne [[LABEL]]
   %2 = atomicrmw min  i64* @sc64, i64 6 acquire
 ; LINUX: [[LABEL:.LBB[0-9]+_[0-9]+]]
 ; LINUX: cmpl
@@ -27,6 +38,16 @@ define void @atomic_maxmin_i6432() {
 ; LINUX: lock
 ; LINUX-NEXT: cmpxchg8b
 ; LINUX: jne [[LABEL]]
+; NOCMOV: [[LABEL:.LBB[0-9]+_[0-9]+]]
+; NOCMOV: cmpl
+; NOCMOV: setg
+; NOCMOV: cmpl
+; NOCMOV: setg
+; NOCMOV: jne
+; NOCMOV: jne
+; NOCMOV: lock
+; NOCMOV-NEXT: cmpxchg8b
+; NOCMOV: jne [[LABEL]]
   %3 = atomicrmw umax i64* @sc64, i64 7 acquire
 ; LINUX: [[LABEL:.LBB[0-9]+_[0-9]+]]
 ; LINUX: cmpl
@@ -38,6 +59,16 @@ define void @atomic_maxmin_i6432() {
 ; LINUX: lock
 ; LINUX-NEXT: cmpxchg8b
 ; LINUX: jne [[LABEL]]
+; NOCMOV: [[LABEL:.LBB[0-9]+_[0-9]+]]
+; NOCMOV: cmpl
+; NOCMOV: setb
+; NOCMOV: cmpl
+; NOCMOV: setb
+; NOCMOV: jne
+; NOCMOV: jne
+; NOCMOV: lock
+; NOCMOV-NEXT: cmpxchg8b
+; NOCMOV: jne [[LABEL]]
   %4 = atomicrmw umin i64* @sc64, i64 8 acquire
 ; LINUX: [[LABEL:.LBB[0-9]+_[0-9]+]]
 ; LINUX: cmpl
@@ -49,6 +80,16 @@ define void @atomic_maxmin_i6432() {
 ; LINUX: lock
 ; LINUX-NEXT: cmpxchg8b
 ; LINUX: jne [[LABEL]]
+; NOCMOV: [[LABEL:.LBB[0-9]+_[0-9]+]]
+; NOCMOV: cmpl
+; NOCMOV: seta
+; NOCMOV: cmpl
+; NOCMOV: seta
+; NOCMOV: jne
+; NOCMOV: jne
+; NOCMOV: lock
+; NOCMOV-NEXT: cmpxchg8b
+; NOCMOV: jne [[LABEL]]
   ret void
 }
 
diff --git a/test/CodeGen/X86/atomic-or.ll b/test/CodeGen/X86/atomic-or.ll
index 3f02eafb44a2..d759beb2caa8 100644
--- a/test/CodeGen/X86/atomic-or.ll
+++ b/test/CodeGen/X86/atomic-or.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=x86-64 | FileCheck %s
+; RUN: llc < %s -march=x86-64 -verify-machineinstrs | FileCheck %s
 
 ; rdar://9692967
 
diff --git a/test/CodeGen/X86/atomic-pointer.ll b/test/CodeGen/X86/atomic-pointer.ll
index a455277be4db..ec3e6c3a8c19 100644
--- a/test/CodeGen/X86/atomic-pointer.ll
+++ b/test/CodeGen/X86/atomic-pointer.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -mtriple=i686-none-linux | FileCheck %s
+; RUN: llc < %s -mtriple=i686-none-linux -verify-machineinstrs | FileCheck %s
 
 define i32* @test_atomic_ptr_load(i32** %a0) {
 ; CHECK: test_atomic_ptr_load
diff --git a/test/CodeGen/X86/atomic16.ll b/test/CodeGen/X86/atomic16.ll
index 824995d6cb98..ec2887e29f81 100644
--- a/test/CodeGen/X86/atomic16.ll
+++ b/test/CodeGen/X86/atomic16.ll
@@ -1,5 +1,5 @@
-; RUN: llc < %s -O0 -mtriple=x86_64-unknown-unknown -mcpu=corei7 -show-mc-encoding | FileCheck %s --check-prefix X64
-; RUN: llc < %s -O0 -mtriple=i386-unknown-unknown -mcpu=corei7 | FileCheck %s --check-prefix X32
+; RUN: llc < %s -O0 -mtriple=x86_64-unknown-unknown -mcpu=corei7 -verify-machineinstrs -show-mc-encoding | FileCheck %s --check-prefix X64
+; RUN: llc < %s -O0 -mtriple=i386-unknown-unknown -mcpu=corei7 -verify-machineinstrs | FileCheck %s --check-prefix X32
 
 @sc16 = external global i16
 
diff --git a/test/CodeGen/X86/atomic32.ll b/test/CodeGen/X86/atomic32.ll
index dc927d8cb6f6..3cb9ca1c76c7 100644
--- a/test/CodeGen/X86/atomic32.ll
+++ b/test/CodeGen/X86/atomic32.ll
@@ -1,5 +1,6 @@
-; RUN: llc < %s -O0 -march=x86-64 -mcpu=corei7 | FileCheck %s --check-prefix X64
-; RUN: llc < %s -O0 -march=x86 -mcpu=corei7 | FileCheck %s --check-prefix X32
+; RUN: llc < %s -O0 -march=x86-64 -mcpu=corei7 -verify-machineinstrs | FileCheck %s --check-prefix X64
+; RUN: llc < %s -O0 -march=x86 -mcpu=corei7 -verify-machineinstrs | FileCheck %s --check-prefix X32
+; RUN: llc < %s -O0 -march=x86 -mcpu=corei7 -mattr=-cmov -verify-machineinstrs | FileCheck %s --check-prefix NOCMOV
 
 @sc32 = external global i32
 
@@ -164,9 +165,15 @@ define void @atomic_fetch_max32(i32 %x) nounwind {
 ; X32:       cmov
 ; X32:       lock
 ; X32:       cmpxchgl
+
+; NOCMOV:    cmpl
+; NOCMOV:    jl
+; NOCMOV:    lock
+; NOCMOV:    cmpxchgl
   ret void
 ; X64:       ret
 ; X32:       ret
+; NOCMOV:    ret
 }
 
 define void @atomic_fetch_min32(i32 %x) nounwind {
@@ -180,9 +187,15 @@ define void @atomic_fetch_min32(i32 %x) nounwind {
 ; X32:       cmov
 ; X32:       lock
 ; X32:       cmpxchgl
+
+; NOCMOV:    cmpl
+; NOCMOV:    jg
+; NOCMOV:    lock
+; NOCMOV:    cmpxchgl
   ret void
 ; X64:       ret
 ; X32:       ret
+; NOCMOV:    ret
 }
 
 define void @atomic_fetch_umax32(i32 %x) nounwind {
@@ -196,9 +209,15 @@ define void @atomic_fetch_umax32(i32 %x) nounwind {
 ; X32:       cmov
 ; X32:       lock
 ; X32:       cmpxchgl
+
+; NOCMOV:    cmpl
+; NOCMOV:    jb
+; NOCMOV:    lock
+; NOCMOV:    cmpxchgl
   ret void
 ; X64:       ret
 ; X32:       ret
+; NOCMOV:    ret
 }
 
 define void @atomic_fetch_umin32(i32 %x) nounwind {
@@ -207,13 +226,20 @@ define void @atomic_fetch_umin32(i32 %x) nounwind {
 ; X64:       cmov
 ; X64:       lock
 ; X64:       cmpxchgl
+
 ; X32:       cmpl
 ; X32:       cmov
 ; X32:       lock
 ; X32:       cmpxchgl
+
+; NOCMOV:    cmpl
+; NOCMOV:    ja
+; NOCMOV:    lock
+; NOCMOV:    cmpxchgl
   ret void
 ; X64:       ret
 ; X32:       ret
+; NOCMOV:    ret
 }
 
 define void @atomic_fetch_cmpxchg32() nounwind {
diff --git a/test/CodeGen/X86/atomic64.ll b/test/CodeGen/X86/atomic64.ll
index 45785cc8fe52..aa000455753f 100644
--- a/test/CodeGen/X86/atomic64.ll
+++ b/test/CodeGen/X86/atomic64.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -O0 -march=x86-64 -mcpu=corei7 | FileCheck %s --check-prefix X64
+; RUN: llc < %s -O0 -march=x86-64 -mcpu=corei7 -verify-machineinstrs | FileCheck %s --check-prefix X64
 
 @sc64 = external global i64
 
diff --git a/test/CodeGen/X86/atomic6432.ll b/test/CodeGen/X86/atomic6432.ll
index f9b21c5bc75e..31e66c876e3d 100644
--- a/test/CodeGen/X86/atomic6432.ll
+++ b/test/CodeGen/X86/atomic6432.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -O0 -march=x86 -mcpu=corei7 | FileCheck %s --check-prefix X32
+; RUN: llc < %s -O0 -march=x86 -mcpu=corei7 -verify-machineinstrs | FileCheck %s --check-prefix X32
 
 @sc64 = external global i64
 
diff --git a/test/CodeGen/X86/atomic8.ll b/test/CodeGen/X86/atomic8.ll
index 412428406dcf..3278ed1f504e 100644
--- a/test/CodeGen/X86/atomic8.ll
+++ b/test/CodeGen/X86/atomic8.ll
@@ -1,5 +1,5 @@
-; RUN: llc < %s -O0 -march=x86-64 -mcpu=corei7 | FileCheck %s --check-prefix X64
-; RUN: llc < %s -O0 -march=x86 -mcpu=corei7 | FileCheck %s --check-prefix X32
+; RUN: llc < %s -O0 -march=x86-64 -mcpu=corei7 -verify-machineinstrs | FileCheck %s --check-prefix X64
+; RUN: llc < %s -O0 -march=x86 -mcpu=corei7 -verify-machineinstrs | FileCheck %s --check-prefix X32
 
 @sc8 = external global i8
 
diff --git a/test/CodeGen/X86/atomic_add.ll b/test/CodeGen/X86/atomic_add.ll
index d94499889de4..6b3a6b224dba 100644
--- a/test/CodeGen/X86/atomic_add.ll
+++ b/test/CodeGen/X86/atomic_add.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=x86-64 | FileCheck %s
+; RUN: llc < %s -march=x86-64 -verify-machineinstrs | FileCheck %s
 
 ; rdar://7103704
 
diff --git a/test/CodeGen/X86/atomic_op.ll b/test/CodeGen/X86/atomic_op.ll
index c5fa07d07d80..a378d6e8d684 100644
--- a/test/CodeGen/X86/atomic_op.ll
+++ b/test/CodeGen/X86/atomic_op.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -mcpu=generic -march=x86 -mattr=+cmov | FileCheck %s
+; RUN: llc < %s -mcpu=generic -march=x86 -mattr=+cmov -verify-machineinstrs | FileCheck %s
 
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
 
diff --git a/test/CodeGen/X86/avx-cvt.ll b/test/CodeGen/X86/avx-cvt.ll
index d0a7fe01009e..22fad7ce4b7d 100644
--- a/test/CodeGen/X86/avx-cvt.ll
+++ b/test/CodeGen/X86/avx-cvt.ll
@@ -18,6 +18,12 @@ define <4 x double> @sitofp01(<4 x i32> %a) {
   ret <4 x double> %b
 }
 
+; CHECK: vcvtdq2ps %ymm
+define <8 x float> @sitofp02(<8 x i16> %a) {
+  %b = sitofp <8 x i16> %a to <8 x float>
+  ret <8 x float> %b
+}
+
 ; CHECK: vcvttpd2dqy %ymm
 define <4 x i32> @fptosi01(<4 x double> %a) {
   %b = fptosi <4 x double> %a to <4 x i32>
@@ -46,7 +52,7 @@ entry:
   ret double %conv
 }
 
-; CHECK: vcvtsi2sd (%
+; CHECK: vcvtsi2sdl (%
 define double @funcB(i32* nocapture %e) nounwind uwtable readonly ssp {
 entry:
   %tmp1 = load i32* %e, align 4
@@ -54,7 +60,7 @@ entry:
   ret double %conv
 }
 
-; CHECK: vcvtsi2ss (%
+; CHECK: vcvtsi2ssl (%
 define float @funcC(i32* nocapture %e) nounwind uwtable readonly ssp {
 entry:
   %tmp1 = load i32* %e, align 4
diff --git a/test/CodeGen/X86/avx-intel-ocl.ll b/test/CodeGen/X86/avx-intel-ocl.ll
index 1446b36a0fb4..055072098a25 100644
--- a/test/CodeGen/X86/avx-intel-ocl.ll
+++ b/test/CodeGen/X86/avx-intel-ocl.ll
@@ -1,9 +1,12 @@
-; RUN: llc < %s -mtriple=i386-pc-win32 -mcpu=corei7-avx -mattr=+avx | FileCheck -check-prefix=WIN32 %s
+; RUN: llc < %s -mtriple=i686-apple-darwin -mcpu=corei7-avx -mattr=+avx | FileCheck -check-prefix=X32 %s
+; RUN: llc < %s -mtriple=i386-pc-win32 -mcpu=corei7-avx -mattr=+avx | FileCheck -check-prefix=X32 %s
 ; RUN: llc < %s -mtriple=x86_64-win32 -mcpu=corei7-avx -mattr=+avx | FileCheck -check-prefix=WIN64 %s
-; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=corei7-avx -mattr=+avx | FileCheck -check-prefix=NOT_WIN %s
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=corei7-avx -mattr=+avx | FileCheck -check-prefix=X64 %s
 
 declare <16 x float> @func_float16_ptr(<16 x float>, <16 x float> *)
 declare <16 x float> @func_float16(<16 x float>, <16 x float>)
+declare i32 @func_int(i32, i32)
+
 ; WIN64: testf16_inp
 ; WIN64: vaddps  {{.*}}, {{%ymm[0-1]}}
 ; WIN64: vaddps  {{.*}}, {{%ymm[0-1]}}
@@ -11,19 +14,19 @@ declare <16 x float> @func_float16(<16 x float>, <16 x float>)
 ; WIN64: call
 ; WIN64: ret
 
-; WIN32: testf16_inp
-; WIN32: movl    %eax, (%esp)
-; WIN32: vaddps  {{.*}}, {{%ymm[0-1]}}
-; WIN32: vaddps  {{.*}}, {{%ymm[0-1]}}
-; WIN32: call
-; WIN32: ret
+; X32: testf16_inp
+; X32: movl    %eax, (%esp)
+; X32: vaddps  {{.*}}, {{%ymm[0-1]}}
+; X32: vaddps  {{.*}}, {{%ymm[0-1]}}
+; X32: call
+; X32: ret
 
-; NOT_WIN: testf16_inp
-; NOT_WIN: vaddps  {{.*}}, {{%ymm[0-1]}}
-; NOT_WIN: vaddps  {{.*}}, {{%ymm[0-1]}}
-; NOT_WIN: leaq    {{.*}}(%rsp), %rdi
-; NOT_WIN: call
-; NOT_WIN: ret
+; X64: testf16_inp
+; X64: vaddps  {{.*}}, {{%ymm[0-1]}}
+; X64: vaddps  {{.*}}, {{%ymm[0-1]}}
+; X64: leaq    {{.*}}(%rsp), %rdi
+; X64: call
+; X64: ret
 
 ;test calling conventions - input parameters
 define <16 x float> @testf16_inp(<16 x float> %a, <16 x float> %b) nounwind {
@@ -45,11 +48,11 @@ define <16 x float> @testf16_inp(<16 x float> %a, <16 x float> %b) nounwind {
 ; WIN64: ret
 
 ; preserved ymm8-ymm15
-; NOT_WIN: testf16_regs
-; NOT_WIN: call
-; NOT_WIN: vaddps  {{%ymm[8-9]}}, %ymm0, %ymm0
-; NOT_WIN: vaddps  {{%ymm[8-9]}}, %ymm1, %ymm1
-; NOT_WIN: ret
+; X64: testf16_regs
+; X64: call
+; X64: vaddps  {{%ymm[8-9]}}, %ymm0, %ymm0
+; X64: vaddps  {{%ymm[8-9]}}, %ymm1, %ymm1
+; X64: ret
 
 define <16 x float> @testf16_regs(<16 x float> %a, <16 x float> %b) nounwind {
   %y = alloca <16 x float>, align 16
@@ -84,24 +87,83 @@ define <16 x float> @testf16_regs(<16 x float> %a, <16 x float> %b) nounwind {
 ; WIN64: vmovaps {{.*(%rsp).*}}, {{%ymm([6-9]|1[0-5])}}     # 32-byte Reload
 ; WIN64: vmovaps {{.*(%rsp).*}}, {{%ymm([6-9]|1[0-5])}}     # 32-byte Reload
 
-; NOT_WIN: vmovaps {{%ymm([8-9]|1[0-5])}}, {{.*}}(%rbp)  ## 32-byte Spill
-; NOT_WIN: vmovaps {{%ymm([8-9]|1[0-5])}}, {{.*}}(%rbp)  ## 32-byte Spill
-; NOT_WIN: vmovaps {{%ymm([8-9]|1[0-5])}}, {{.*}}(%rbp)  ## 32-byte Spill
-; NOT_WIN: vmovaps {{%ymm([8-9]|1[0-5])}}, {{.*}}(%rbp)  ## 32-byte Spill
-; NOT_WIN: vmovaps {{%ymm([8-9]|1[0-5])}}, {{.*}}(%rbp)  ## 32-byte Spill
-; NOT_WIN: vmovaps {{%ymm([8-9]|1[0-5])}}, {{.*}}(%rbp)  ## 32-byte Spill
-; NOT_WIN: vmovaps {{%ymm([8-9]|1[0-5])}}, {{.*}}(%rbp)  ## 32-byte Spill
-; NOT_WIN: vmovaps {{%ymm([8-9]|1[0-5])}}, {{.*}}(%rbp)  ## 32-byte Spill
-; NOT_WIN: call
-; NOT_WIN: vmovaps {{.*}}(%rbp), {{%ymm([8-9]|1[0-5])}} ## 32-byte Reload
-; NOT_WIN: vmovaps {{.*}}(%rbp), {{%ymm([8-9]|1[0-5])}} ## 32-byte Reload
-; NOT_WIN: vmovaps {{.*}}(%rbp), {{%ymm([8-9]|1[0-5])}} ## 32-byte Reload
-; NOT_WIN: vmovaps {{.*}}(%rbp), {{%ymm([8-9]|1[0-5])}} ## 32-byte Reload
-; NOT_WIN: vmovaps {{.*}}(%rbp), {{%ymm([8-9]|1[0-5])}} ## 32-byte Reload
-; NOT_WIN: vmovaps {{.*}}(%rbp), {{%ymm([8-9]|1[0-5])}} ## 32-byte Reload
-; NOT_WIN: vmovaps {{.*}}(%rbp), {{%ymm([8-9]|1[0-5])}} ## 32-byte Reload
-; NOT_WIN: vmovaps {{.*}}(%rbp), {{%ymm([8-9]|1[0-5])}} ## 32-byte Reload
+; X64: vmovups {{%ymm([8-9]|1[0-5])}}, {{.*}}(%rsp)  ## 32-byte Folded Spill
+; X64: vmovups {{%ymm([8-9]|1[0-5])}}, {{.*}}(%rsp)  ## 32-byte Folded Spill
+; X64: vmovups {{%ymm([8-9]|1[0-5])}}, {{.*}}(%rsp)  ## 32-byte Folded Spill
+; X64: vmovups {{%ymm([8-9]|1[0-5])}}, {{.*}}(%rsp)  ## 32-byte Folded Spill
+; X64: vmovups {{%ymm([8-9]|1[0-5])}}, {{.*}}(%rsp)  ## 32-byte Folded Spill
+; X64: vmovups {{%ymm([8-9]|1[0-5])}}, {{.*}}(%rsp)  ## 32-byte Folded Spill
+; X64: vmovups {{%ymm([8-9]|1[0-5])}}, {{.*}}(%rsp)  ## 32-byte Folded Spill
+; X64: vmovups {{%ymm([8-9]|1[0-5])}}, {{.*}}(%rsp)  ## 32-byte Folded Spill
+; X64: call
+; X64: vmovups {{.*}}(%rsp), {{%ymm([8-9]|1[0-5])}} ## 32-byte Folded Reload
+; X64: vmovups {{.*}}(%rsp), {{%ymm([8-9]|1[0-5])}} ## 32-byte Folded Reload
+; X64: vmovups {{.*}}(%rsp), {{%ymm([8-9]|1[0-5])}} ## 32-byte Folded Reload
+; X64: vmovups {{.*}}(%rsp), {{%ymm([8-9]|1[0-5])}} ## 32-byte Folded Reload
+; X64: vmovups {{.*}}(%rsp), {{%ymm([8-9]|1[0-5])}} ## 32-byte Folded Reload
+; X64: vmovups {{.*}}(%rsp), {{%ymm([8-9]|1[0-5])}} ## 32-byte Folded Reload
+; X64: vmovups {{.*}}(%rsp), {{%ymm([8-9]|1[0-5])}} ## 32-byte Folded Reload
+; X64: vmovups {{.*}}(%rsp), {{%ymm([8-9]|1[0-5])}} ## 32-byte Folded Reload
 define intel_ocl_bicc <16 x float> @test_prolog_epilog(<16 x float> %a, <16 x float> %b) nounwind {
    %c = call <16 x float> @func_float16(<16 x float> %a, <16 x float> %b)
    ret <16 x float> %c
 }
+
+; test functions with integer parameters
+; pass parameters on stack for 32-bit platform
+; X32: movl {{.*}}, 4(%esp)
+; X32: movl {{.*}}, (%esp)
+; X32: call
+; X32: addl {{.*}}, %eax
+
+; pass parameters in registers for 64-bit platform
+; X64: leal {{.*}}, %edi
+; X64: movl {{.*}}, %esi
+; X64: call
+; X64: addl {{.*}}, %eax
+define i32 @test_int(i32 %a, i32 %b) nounwind {
+    %c1 = add i32 %a, %b
+	%c2 = call intel_ocl_bicc i32 @func_int(i32 %c1, i32 %a)
+    %c = add i32 %c2, %b
+	ret i32 %c
+}
+
+; WIN64: test_float4
+; WIN64-NOT: vzeroupper
+; WIN64: call
+; WIN64-NOT: vzeroupper
+; WIN64: call
+; WIN64: ret
+
+; X64: test_float4
+; X64-NOT: vzeroupper
+; X64: call
+; X64-NOT: vzeroupper
+; X64: call
+; X64: ret
+
+; X32: test_float4
+; X32: vzeroupper
+; X32: call
+; X32: vzeroupper
+; X32: call
+; X32: ret
+
+declare <4 x float> @func_float4(<4 x float>, <4 x float>, <4 x float>)
+
+define <8 x float> @test_float4(<8 x float> %a, <8 x float> %b, <8 x float> %c) nounwind readnone {
+entry:
+  %0 = shufflevector <8 x float> %a, <8 x float> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+  %1 = shufflevector <8 x float> %b, <8 x float> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+  %2 = shufflevector <8 x float> %c, <8 x float> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+  %call.i = tail call intel_ocl_bicc <4 x float> @func_float4(<4 x float> %0, <4 x float> %1, <4 x float> %2) nounwind
+  %3 = shufflevector <4 x float> %call.i, <4 x float> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef>
+  %4 = shufflevector <8 x float> %a, <8 x float> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+  %5 = shufflevector <8 x float> %b, <8 x float> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+  %6 = shufflevector <8 x float> %c, <8 x float> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+  %call.i2 = tail call intel_ocl_bicc <4 x float> @func_float4(<4 x float> %4, <4 x float> %5, <4 x float> %6) nounwind
+  %7 = shufflevector <4 x float> %call.i2, <4 x float> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef>
+  %8 = shufflevector <8 x float> %3, <8 x float> %7, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 10, i32 11>
+  ret <8 x float> %8
+}
+
diff --git a/test/CodeGen/X86/avx-intrinsics-x86.ll b/test/CodeGen/X86/avx-intrinsics-x86.ll
index 88ecd5a5d34f..0be83f648d1a 100644
--- a/test/CodeGen/X86/avx-intrinsics-x86.ll
+++ b/test/CodeGen/X86/avx-intrinsics-x86.ll
@@ -671,7 +671,9 @@ define void @test_x86_sse2_storeu_dq(i8* %a0, <16 x i8> %a1) {
   ; CHECK: test_x86_sse2_storeu_dq
   ; CHECK: movl
   ; CHECK: vmovdqu
-  call void @llvm.x86.sse2.storeu.dq(i8* %a0, <16 x i8> %a1)
+  ; add operation forces the execution domain.
+  %a2 = add <16 x i8> %a1, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
+  call void @llvm.x86.sse2.storeu.dq(i8* %a0, <16 x i8> %a2)
   ret void
 }
 declare void @llvm.x86.sse2.storeu.dq(i8*, <16 x i8>) nounwind
@@ -681,6 +683,7 @@ define void @test_x86_sse2_storeu_pd(i8* %a0, <2 x double> %a1) {
   ; CHECK: test_x86_sse2_storeu_pd
   ; CHECK: movl
   ; CHECK: vmovupd
+  ; fadd operation forces the execution domain.
   %a2 = fadd <2 x double> %a1, <double 0x0, double 0x4200000000000000>
   call void @llvm.x86.sse2.storeu.pd(i8* %a0, <2 x double> %a2)
   ret void
@@ -2345,7 +2348,7 @@ declare <4 x double> @llvm.x86.avx.vpermil.pd.256(<4 x double>, i8) nounwind rea
 
 
 define <4 x float> @test_x86_avx_vpermil_ps(<4 x float> %a0) {
-  ; CHECK: vpermilps
+  ; CHECK: vpshufd
   %res = call <4 x float> @llvm.x86.avx.vpermil.ps(<4 x float> %a0, i8 7) ; <<4 x float>> [#uses=1]
   ret <4 x float> %res
 }
diff --git a/test/CodeGen/X86/avx-load-store.ll b/test/CodeGen/X86/avx-load-store.ll
index c9fc66a8a791..a6775aba0989 100644
--- a/test/CodeGen/X86/avx-load-store.ll
+++ b/test/CodeGen/X86/avx-load-store.ll
@@ -53,19 +53,24 @@ define void @storev16i16(<16 x i16> %a) nounwind {
   unreachable
 }
 
-; CHECK: vmovups  %ymm
+; CHECK: storev16i16_01
+; CHECK: vextractf128
+; CHECK: vmovups  %xmm
 define void @storev16i16_01(<16 x i16> %a) nounwind {
   store <16 x i16> %a, <16 x i16>* undef, align 4
   unreachable
 }
 
+; CHECK: storev32i8
 ; CHECK: vmovaps  %ymm
 define void @storev32i8(<32 x i8> %a) nounwind {
   store <32 x i8> %a, <32 x i8>* undef, align 32
   unreachable
 }
 
-; CHECK: vmovups  %ymm
+; CHECK: storev32i8_01
+; CHECK: vextractf128
+; CHECK: vmovups  %xmm
 define void @storev32i8_01(<32 x i8> %a) nounwind {
   store <32 x i8> %a, <32 x i8>* undef, align 4
   unreachable
@@ -109,3 +114,38 @@ cif_mixed_test_any_check:                         ; preds = %cif_mask_mixed
   unreachable
 }
 
+; CHECK: add8i32
+; CHECK: vmovups
+; CHECK: vmovups
+; CHECK-NOT: vinsertf128
+; CHECK-NOT: vextractf128
+; CHECK: vmovups
+; CHECK: vmovups
+define void @add8i32(<8 x i32>* %ret, <8 x i32>* %bp) nounwind {
+  %b = load <8 x i32>* %bp, align 1
+  %x = add <8 x i32> zeroinitializer, %b
+  store <8 x i32> %x, <8 x i32>* %ret, align 1
+  ret void
+}
+
+; CHECK: add4i64a64
+; CHECK: vmovaps ({{.*}}), %ymm{{.*}}
+; CHECK: vmovaps %ymm{{.*}}, ({{.*}})
+define void @add4i64a64(<4 x i64>* %ret, <4 x i64>* %bp) nounwind {
+  %b = load <4 x i64>* %bp, align 64
+  %x = add <4 x i64> zeroinitializer, %b
+  store <4 x i64> %x, <4 x i64>* %ret, align 64
+  ret void
+}
+
+; CHECK: add4i64a16
+; CHECK: vmovaps {{.*}}({{.*}}), %xmm{{.*}}
+; CHECK: vmovaps {{.*}}({{.*}}), %xmm{{.*}}
+; CHECK: vmovaps %xmm{{.*}}, {{.*}}({{.*}})
+; CHECK: vmovaps %xmm{{.*}}, {{.*}}({{.*}})
+define void @add4i64a16(<4 x i64>* %ret, <4 x i64>* %bp) nounwind {
+  %b = load <4 x i64>* %bp, align 16
+  %x = add <4 x i64> zeroinitializer, %b
+  store <4 x i64> %x, <4 x i64>* %ret, align 16
+  ret void
+}
diff --git a/test/CodeGen/X86/avx-sext.ll b/test/CodeGen/X86/avx-sext.ll
index 3713a8c37799..b9c700051005 100755
--- a/test/CodeGen/X86/avx-sext.ll
+++ b/test/CodeGen/X86/avx-sext.ll
@@ -1,17 +1,188 @@
-; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=corei7-avx -mattr=+avx | FileCheck %s
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=corei7-avx | FileCheck %s -check-prefix=AVX
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=core2 | FileCheck %s -check-prefix=SSSE3
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=pentium4 | FileCheck %s -check-prefix=SSE2
 
 define <8 x i32> @sext_8i16_to_8i32(<8 x i16> %A) nounwind uwtable readnone ssp {
-;CHECK: sext_8i16_to_8i32
-;CHECK: vpmovsxwd
+; AVX: sext_8i16_to_8i32
+; AVX: vpmovsxwd
 
   %B = sext <8 x i16> %A to <8 x i32>
   ret <8 x i32>%B
 }
 
 define <4 x i64> @sext_4i32_to_4i64(<4 x i32> %A) nounwind uwtable readnone ssp {
-;CHECK: sext_4i32_to_4i64
-;CHECK: vpmovsxdq
+; AVX: sext_4i32_to_4i64
+; AVX: vpmovsxdq
 
   %B = sext <4 x i32> %A to <4 x i64>
   ret <4 x i64>%B
 }
+
+; AVX: load_sext_test1
+; AVX: vpmovsxwd (%r{{[^,]*}}), %xmm{{.*}}
+; AVX: ret
+
+; SSSE3: load_sext_test1
+; SSSE3: movq
+; SSSE3: punpcklwd %xmm{{.*}}, %xmm{{.*}}
+; SSSE3: psrad $16
+; SSSE3: ret
+
+; SSE2: load_sext_test1
+; SSE2: movq
+; SSE2: punpcklwd %xmm{{.*}}, %xmm{{.*}}
+; SSE2: psrad $16
+; SSE2: ret
+define <4 x i32> @load_sext_test1(<4 x i16> *%ptr) {
+ %X = load <4 x i16>* %ptr
+ %Y = sext <4 x i16> %X to <4 x i32>
+ ret <4 x i32>%Y
+}
+
+; AVX: load_sext_test2
+; AVX: vpmovsxbd (%r{{[^,]*}}), %xmm{{.*}}
+; AVX: ret
+
+; SSSE3: load_sext_test2
+; SSSE3: movd
+; SSSE3: pshufb
+; SSSE3: psrad $24
+; SSSE3: ret
+
+; SSE2: load_sext_test2
+; SSE2: movl
+; SSE2: psrad $24
+; SSE2: ret
+define <4 x i32> @load_sext_test2(<4 x i8> *%ptr) {
+ %X = load <4 x i8>* %ptr
+ %Y = sext <4 x i8> %X to <4 x i32>
+ ret <4 x i32>%Y
+}
+
+; AVX: load_sext_test3
+; AVX: vpmovsxbq (%r{{[^,]*}}), %xmm{{.*}}
+; AVX: ret
+
+; SSSE3: load_sext_test3
+; SSSE3: movsbq
+; SSSE3: movsbq
+; SSSE3: punpcklqdq
+; SSSE3: ret
+
+; SSE2: load_sext_test3
+; SSE2: movsbq
+; SSE2: movsbq
+; SSE2: punpcklqdq
+; SSE2: ret
+define <2 x i64> @load_sext_test3(<2 x i8> *%ptr) {
+ %X = load <2 x i8>* %ptr
+ %Y = sext <2 x i8> %X to <2 x i64>
+ ret <2 x i64>%Y
+}
+
+; AVX: load_sext_test4
+; AVX: vpmovsxwq (%r{{[^,]*}}), %xmm{{.*}}
+; AVX: ret
+
+; SSSE3: load_sext_test4
+; SSSE3: movswq
+; SSSE3: movswq
+; SSSE3: punpcklqdq
+; SSSE3: ret
+
+; SSE2: load_sext_test4
+; SSE2: movswq
+; SSE2: movswq
+; SSE2: punpcklqdq
+; SSE2: ret
+define <2 x i64> @load_sext_test4(<2 x i16> *%ptr) {
+ %X = load <2 x i16>* %ptr
+ %Y = sext <2 x i16> %X to <2 x i64>
+ ret <2 x i64>%Y
+}
+
+; AVX: load_sext_test5
+; AVX: vpmovsxdq (%r{{[^,]*}}), %xmm{{.*}}
+; AVX: ret
+
+; SSSE3: load_sext_test5
+; SSSE3: movslq
+; SSSE3: movslq
+; SSSE3: punpcklqdq
+; SSSE3: ret
+
+; SSE2: load_sext_test5
+; SSE2: movslq
+; SSE2: movslq
+; SSE2: punpcklqdq
+; SSE2: ret
+define <2 x i64> @load_sext_test5(<2 x i32> *%ptr) {
+ %X = load <2 x i32>* %ptr
+ %Y = sext <2 x i32> %X to <2 x i64>
+ ret <2 x i64>%Y
+}
+
+; AVX: load_sext_test6
+; AVX: vpmovsxbw (%r{{[^,]*}}), %xmm{{.*}}
+; AVX: ret
+
+; SSSE3: load_sext_test6
+; SSSE3: movq
+; SSSE3: punpcklbw
+; SSSE3: psraw $8
+; SSSE3: ret
+
+; SSE2: load_sext_test6
+; SSE2: movq
+; SSE2: punpcklbw
+; SSE2: psraw $8
+; SSE2: ret
+define <8 x i16> @load_sext_test6(<8 x i8> *%ptr) {
+ %X = load <8 x i8>* %ptr
+ %Y = sext <8 x i8> %X to <8 x i16>
+ ret <8 x i16>%Y
+}
+
+; AVX: sext_4i1_to_4i64
+; AVX: vpslld  $31
+; AVX: vpsrad  $31
+; AVX: vpmovsxdq
+; AVX: vpmovsxdq
+; AVX: ret
+define <4 x i64> @sext_4i1_to_4i64(<4 x i1> %mask) {
+  %extmask = sext <4 x i1> %mask to <4 x i64>
+  ret <4 x i64> %extmask
+}
+
+; AVX: sext_4i8_to_4i64
+; AVX: vpslld  $24
+; AVX: vpsrad  $24
+; AVX: vpmovsxdq
+; AVX: vpmovsxdq
+; AVX: ret
+define <4 x i64> @sext_4i8_to_4i64(<4 x i8> %mask) {
+  %extmask = sext <4 x i8> %mask to <4 x i64>
+  ret <4 x i64> %extmask
+}
+
+; AVX: sext_4i8_to_4i64
+; AVX: vpmovsxbd
+; AVX: vpmovsxdq
+; AVX: vpmovsxdq
+; AVX: ret
+define <4 x i64> @load_sext_4i8_to_4i64(<4 x i8> *%ptr) {
+ %X = load <4 x i8>* %ptr
+ %Y = sext <4 x i8> %X to <4 x i64>
+ ret <4 x i64>%Y
+}
+
+; AVX: sext_4i16_to_4i64
+; AVX: vpmovsxwd
+; AVX: vpmovsxdq
+; AVX: vpmovsxdq
+; AVX: ret
+define <4 x i64> @load_sext_4i16_to_4i64(<4 x i16> *%ptr) {
+ %X = load <4 x i16>* %ptr
+ %Y = sext <4 x i16> %X to <4 x i64>
+ ret <4 x i64>%Y
+}
diff --git a/test/CodeGen/X86/avx-shift.ll b/test/CodeGen/X86/avx-shift.ll
index 681747b844a0..01eb7361e293 100644
--- a/test/CodeGen/X86/avx-shift.ll
+++ b/test/CodeGen/X86/avx-shift.ll
@@ -105,13 +105,22 @@ define <32 x i8> @vshift12(<32 x i8> %a) nounwind readnone {
 ; CHECK: _vshift08
 ; CHECK: vextractf128 $1
 ; CHECK: vpslld $23
-; CHECK: vextractf128 $1
 ; CHECK: vpslld $23
 define <8 x i32> @vshift08(<8 x i32> %a) nounwind {
   %bitop = shl <8 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>, %a
   ret <8 x i32> %bitop
 }
 
+; PR15141
+; CHECK: _vshift13:
+; CHECK-NOT: vpsll
+; CHECK: vcvttps2dq
+; CHECK-NEXT: vpmulld
+define <4 x i32> @vshift13(<4 x i32> %in) {
+  %T = shl <4 x i32> %in, <i32 0, i32 1, i32 2, i32 4>
+  ret <4 x i32> %T
+}
+
 ;;; Uses shifts for sign extension
 ; CHECK: _sext_v16i16
 ; CHECK: vpsllw
diff --git a/test/CodeGen/X86/avx-shuffle.ll b/test/CodeGen/X86/avx-shuffle.ll
index ec11654b3556..73faa1fe0d40 100644
--- a/test/CodeGen/X86/avx-shuffle.ll
+++ b/test/CodeGen/X86/avx-shuffle.ll
@@ -6,7 +6,7 @@ define <4 x float> @test1(<4 x float> %a) nounwind {
   ret <4 x float> %b
 ; CHECK: test1:
 ; CHECK: vshufps
-; CHECK: vpermilps
+; CHECK: vpshufd
 }
 
 ; rdar://10538417
@@ -98,23 +98,23 @@ define i32 @test10(<4 x i32> %a) nounwind {
 }
 
 define <4 x float> @test11(<4 x float> %a) nounwind  {
-; check: test11
-; check: vpermilps $27
+; CHECK: test11
+; CHECK: vpshufd $27
   %tmp1 = shufflevector <4 x float> %a, <4 x float> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
   ret <4 x float> %tmp1
 }
 
 define <4 x float> @test12(<4 x float>* %a) nounwind  {
 ; CHECK: test12
-; CHECK: vpermilps $27, (
+; CHECK: vpshufd
   %tmp0 = load <4 x float>* %a
   %tmp1 = shufflevector <4 x float> %tmp0, <4 x float> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
   ret <4 x float> %tmp1
 }
 
 define <4 x i32> @test13(<4 x i32> %a) nounwind  {
-; check: test13
-; check: vpshufd $27
+; CHECK: test13
+; CHECK: vpshufd $27
   %tmp1 = shufflevector <4 x i32> %a, <4 x i32> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
   ret <4 x i32> %tmp1
 }
@@ -246,3 +246,54 @@ define <8 x float> @test19(<8 x float> %A, <8 x float>%B) nounwind {
   ret <8 x float>%S
 }
 
+; rdar://12684358
+; Make sure loads happen before stores.
+; CHECK: swap8doubles
+; CHECK: vmovups {{[0-9]*}}(%rdi), %xmm{{[0-9]+}}
+; CHECK: vmovups {{[0-9]*}}(%rdi), %xmm{{[0-9]+}}
+; CHECK: vmovups {{[0-9]*}}(%rdi), %xmm{{[0-9]+}}
+; CHECK: vmovups {{[0-9]*}}(%rdi), %xmm{{[0-9]+}}
+; CHECK: vmovaps {{[0-9]*}}(%rsi), %ymm{{[0-9]+}}
+; CHECK: vmovaps {{[0-9]*}}(%rsi), %ymm{{[0-9]+}}
+; CHECK: vmovaps %xmm{{[0-9]+}}, {{[0-9]*}}(%rdi)
+; CHECK: vextractf128
+; CHECK: vmovaps %xmm{{[0-9]+}}, {{[0-9]*}}(%rdi)
+; CHECK: vextractf128
+; CHECK: vmovaps %ymm{{[0-9]+}}, {{[0-9]*}}(%rsi)
+; CHECK: vmovaps %ymm{{[0-9]+}}, {{[0-9]*}}(%rsi)
+define void @swap8doubles(double* nocapture %A, double* nocapture %C) nounwind uwtable ssp {
+entry:
+  %add.ptr = getelementptr inbounds double* %A, i64 2
+  %v.i = bitcast double* %A to <2 x double>*
+  %0 = load <2 x double>* %v.i, align 1
+  %shuffle.i.i = shufflevector <2 x double> %0, <2 x double> <double 0.000000e+00, double undef>, <4 x i32> <i32 0, i32 1, i32 2, i32 2>
+  %v1.i = bitcast double* %add.ptr to <2 x double>*
+  %1 = load <2 x double>* %v1.i, align 1
+  %2 = tail call <4 x double> @llvm.x86.avx.vinsertf128.pd.256(<4 x double> %shuffle.i.i, <2 x double> %1, i8 1) nounwind
+  %add.ptr1 = getelementptr inbounds double* %A, i64 6
+  %add.ptr2 = getelementptr inbounds double* %A, i64 4
+  %v.i27 = bitcast double* %add.ptr2 to <2 x double>*
+  %3 = load <2 x double>* %v.i27, align 1
+  %shuffle.i.i28 = shufflevector <2 x double> %3, <2 x double> <double 0.000000e+00, double undef>, <4 x i32> <i32 0, i32 1, i32 2, i32 2>
+  %v1.i29 = bitcast double* %add.ptr1 to <2 x double>*
+  %4 = load <2 x double>* %v1.i29, align 1
+  %5 = tail call <4 x double> @llvm.x86.avx.vinsertf128.pd.256(<4 x double> %shuffle.i.i28, <2 x double> %4, i8 1) nounwind
+  %6 = bitcast double* %C to <4 x double>*
+  %7 = load <4 x double>* %6, align 32
+  %add.ptr5 = getelementptr inbounds double* %C, i64 4
+  %8 = bitcast double* %add.ptr5 to <4 x double>*
+  %9 = load <4 x double>* %8, align 32
+  %shuffle.i26 = shufflevector <4 x double> %7, <4 x double> undef, <2 x i32> <i32 0, i32 1>
+  %10 = tail call <2 x double> @llvm.x86.avx.vextractf128.pd.256(<4 x double> %7, i8 1)
+  %shuffle.i = shufflevector <4 x double> %9, <4 x double> undef, <2 x i32> <i32 0, i32 1>
+  %11 = tail call <2 x double> @llvm.x86.avx.vextractf128.pd.256(<4 x double> %9, i8 1)
+  store <2 x double> %shuffle.i26, <2 x double>* %v.i, align 16
+  store <2 x double> %10, <2 x double>* %v1.i, align 16
+  store <2 x double> %shuffle.i, <2 x double>* %v.i27, align 16
+  store <2 x double> %11, <2 x double>* %v1.i29, align 16
+  store <4 x double> %2, <4 x double>* %6, align 32
+  store <4 x double> %5, <4 x double>* %8, align 32
+  ret void
+}
+declare <2 x double> @llvm.x86.avx.vextractf128.pd.256(<4 x double>, i8) nounwind readnone
+declare <4 x double> @llvm.x86.avx.vinsertf128.pd.256(<4 x double>, <2 x double>, i8) nounwind readnone
diff --git a/test/CodeGen/X86/avx-splat.ll b/test/CodeGen/X86/avx-splat.ll
index 94bcddd97592..5c01c2cc5b50 100644
--- a/test/CodeGen/X86/avx-splat.ll
+++ b/test/CodeGen/X86/avx-splat.ll
@@ -3,8 +3,8 @@
 
 ; CHECK: vpunpcklbw %xmm
 ; CHECK-NEXT: vpunpckhbw %xmm
+; CHECK-NEXT: vpshufd $85
 ; CHECK-NEXT: vinsertf128 $1
-; CHECK-NEXT: vpermilps $85
 define <32 x i8> @funcA(<32 x i8> %a) nounwind uwtable readnone ssp {
 entry:
   %shuffle = shufflevector <32 x i8> %a, <32 x i8> undef, <32 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
@@ -12,8 +12,8 @@ entry:
 }
 
 ; CHECK: vpunpckhwd %xmm
+; CHECK-NEXT: vpshufd $85
 ; CHECK-NEXT: vinsertf128 $1
-; CHECK-NEXT: vpermilps $85
 define <16 x i16> @funcB(<16 x i16> %a) nounwind uwtable readnone ssp {
 entry:
   %shuffle = shufflevector <16 x i16> %a, <16 x i16> undef, <16 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
@@ -47,9 +47,9 @@ entry:
 ;   shuffle (scalar_to_vector (load (ptr + 4))), undef, <0, 0, 0, 0>
 ; To:
 ;   shuffle (vload ptr)), undef, <1, 1, 1, 1>
-; CHECK: vmovaps
+; CHECK: vmovdqa
+; CHECK-NEXT: vpshufd $-1
 ; CHECK-NEXT: vinsertf128  $1
-; CHECK-NEXT: vpermilps $-1
 define <8 x float> @funcE() nounwind {
 allocas:
   %udx495 = alloca [18 x [18 x float]], align 32
@@ -75,8 +75,8 @@ __load_and_broadcast_32.exit1249:                 ; preds = %load.i1247, %for_ex
   ret <8 x float> %load_broadcast12281250
 }
 
-; CHECK: vinsertf128 $1
-; CHECK-NEXT: vpermilps $0
+; CHECK: vpshufd $0
+; CHECK-NEXT: vinsertf128 $1
 define <8 x float> @funcF(i32 %val) nounwind {
   %ret6 = insertelement <8 x i32> undef, i32 %val, i32 6
   %ret7 = insertelement <8 x i32> %ret6, i32 %val, i32 7
@@ -84,8 +84,8 @@ define <8 x float> @funcF(i32 %val) nounwind {
   ret <8 x float> %tmp
 }
 
-; CHECK: vinsertf128  $1
-; CHECK-NEXT: vpermilps  $0
+; CHECK: vpshufd  $0
+; CHECK-NEXT: vinsertf128  $1
 define <8 x float> @funcG(<8 x float> %a) nounwind uwtable readnone ssp {
 entry:
   %shuffle = shufflevector <8 x float> %a, <8 x float> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
@@ -93,8 +93,8 @@ entry:
 }
 
 ; CHECK: vextractf128  $1
+; CHECK-NEXT: vpshufd
 ; CHECK-NEXT: vinsertf128  $1
-; CHECK-NEXT: vpermilps  $85
 define <8 x float> @funcH(<8 x float> %a) nounwind uwtable readnone ssp {
 entry:
   %shuffle = shufflevector <8 x float> %a, <8 x float> undef, <8 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
diff --git a/test/CodeGen/X86/avx-vextractf128.ll b/test/CodeGen/X86/avx-vextractf128.ll
index ff56a454996e..ad8365bb59c0 100644
--- a/test/CodeGen/X86/avx-vextractf128.ll
+++ b/test/CodeGen/X86/avx-vextractf128.ll
@@ -102,3 +102,21 @@ entry:
   store <2 x i64> %2, <2 x i64>* %addr, align 1
   ret void
 }
+
+; PR15462
+define void @t9(i64* %p) {
+ store i64 0, i64* %p
+ %q = getelementptr i64* %p, i64 1
+ store i64 0, i64* %q
+ %r = getelementptr i64* %p, i64 2
+ store i64 0, i64* %r
+ %s = getelementptr i64* %p, i64 3
+ store i64 0, i64* %s
+ ret void
+
+; CHECK: t9:
+; CHECK: vxorps	%xmm
+; CHECK-NOT: vextractf
+; CHECK: vmovups
+; CHECK: vmovups
+}
diff --git a/test/CodeGen/X86/avx-vpermil.ll b/test/CodeGen/X86/avx-vpermil.ll
index cb904b93313a..7f2f9d821dd5 100644
--- a/test/CodeGen/X86/avx-vpermil.ll
+++ b/test/CodeGen/X86/avx-vpermil.ll
@@ -45,8 +45,8 @@ entry:
   ret <8 x float> %shuffle
 }
 
-; CHECK: palignr
-; CHECK: palignr
+; CHECK: palignr $8
+; CHECK: psrldq $8
 define <8 x float> @funcF(<8 x float> %a) nounwind uwtable readnone ssp {
 entry:
   %shuffle = shufflevector <8 x float> %a, <8 x float> zeroinitializer, <8 x i32> <i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9>
diff --git a/test/CodeGen/X86/avx-zext.ll b/test/CodeGen/X86/avx-zext.ll
index b630e9d14612..582537ea906f 100755
--- a/test/CodeGen/X86/avx-zext.ll
+++ b/test/CodeGen/X86/avx-zext.ll
@@ -18,11 +18,10 @@ define <4 x i64> @zext_4i32_to_4i64(<4 x i32> %A) nounwind uwtable readnone ssp
   ret <4 x i64>%B
 }
 
-
 define <8 x i32> @zext_8i8_to_8i32(<8 x i8> %z) {
 ;CHECK: zext_8i8_to_8i32
 ;CHECK: vpunpckhwd
-;CHECK: vpunpcklwd
+;CHECK: vpmovzxwd
 ;CHECK: vinsertf128
 ;CHECK: ret
   %t = zext <8 x i8> %z to <8 x i32>
diff --git a/test/CodeGen/X86/avx2-conversions.ll b/test/CodeGen/X86/avx2-conversions.ll
index b47491335a31..3ce08dcc7370 100755
--- a/test/CodeGen/X86/avx2-conversions.ll
+++ b/test/CodeGen/X86/avx2-conversions.ll
@@ -63,6 +63,47 @@ define <8 x i32> @zext_8i8_8i32(<8 x i8> %A) nounwind {
   ret <8 x i32>%B
 }
 
+; CHECK: load_sext_test1
+; CHECK: vpmovsxdq (%r{{[^,]*}}), %ymm{{.*}}
+; CHECK: ret 
+define <4 x i64> @load_sext_test1(<4 x i32> *%ptr) {
+ %X = load <4 x i32>* %ptr
+ %Y = sext <4 x i32> %X to <4 x i64>
+ ret <4 x i64>%Y
+}
+
+; CHECK: load_sext_test2
+; CHECK: vpmovsxbq (%r{{[^,]*}}), %ymm{{.*}}
+; CHECK: ret 
+define <4 x i64> @load_sext_test2(<4 x i8> *%ptr) {
+ %X = load <4 x i8>* %ptr
+ %Y = sext <4 x i8> %X to <4 x i64>
+ ret <4 x i64>%Y
+}
 
+; CHECK: load_sext_test3
+; CHECK: vpmovsxwq (%r{{[^,]*}}), %ymm{{.*}}
+; CHECK: ret 
+define <4 x i64> @load_sext_test3(<4 x i16> *%ptr) {
+ %X = load <4 x i16>* %ptr
+ %Y = sext <4 x i16> %X to <4 x i64>
+ ret <4 x i64>%Y
+}
 
+; CHECK: load_sext_test4
+; CHECK: vpmovsxwd (%r{{[^,]*}}), %ymm{{.*}}
+; CHECK: ret 
+define <8 x i32> @load_sext_test4(<8 x i16> *%ptr) {
+ %X = load <8 x i16>* %ptr
+ %Y = sext <8 x i16> %X to <8 x i32>
+ ret <8 x i32>%Y
+}
 
+; CHECK: load_sext_test5
+; CHECK: vpmovsxbd (%r{{[^,]*}}), %ymm{{.*}}
+; CHECK: ret 
+define <8 x i32> @load_sext_test5(<8 x i8> *%ptr) {
+ %X = load <8 x i8>* %ptr
+ %Y = sext <8 x i8> %X to <8 x i32>
+ ret <8 x i32>%Y
+}
diff --git a/test/CodeGen/X86/avx2-logic.ll b/test/CodeGen/X86/avx2-logic.ll
index 13ebaa6f8797..a5bb1a8f8e44 100644
--- a/test/CodeGen/X86/avx2-logic.ll
+++ b/test/CodeGen/X86/avx2-logic.ll
@@ -48,9 +48,8 @@ entry:
 ; CHECK: vpblendvb
 ; CHECK: vpblendvb %ymm
 ; CHECK: ret
-define <32 x i8> @vpblendvb(<32 x i8> %x, <32 x i8> %y) {
-  %min_is_x = icmp ult <32 x i8> %x, %y
-  %min = select <32 x i1> %min_is_x, <32 x i8> %x, <32 x i8> %y
+define <32 x i8> @vpblendvb(<32 x i1> %cond, <32 x i8> %x, <32 x i8> %y) {
+  %min = select <32 x i1> %cond, <32 x i8> %x, <32 x i8> %y
   ret <32 x i8> %min
 }
 
diff --git a/test/CodeGen/X86/avx2-shuffle.ll b/test/CodeGen/X86/avx2-shuffle.ll
index a414e6880c32..cf319cb7fe1d 100644
--- a/test/CodeGen/X86/avx2-shuffle.ll
+++ b/test/CodeGen/X86/avx2-shuffle.ll
@@ -4,15 +4,62 @@
 ; The mask for the vpblendw instruction needs to be identical for both halves
 ; of the YMM. Need to use two vpblendw instructions.
 
-; CHECK: blendw1
-; CHECK: vpblendw
-; CHECK: vpblendw
+; CHECK: vpblendw_test1
+; mask = 10010110,b = 150,d
+; CHECK: vpblendw  $150, %ymm
 ; CHECK: ret
-define <16 x i16> @blendw1(<16 x i16> %a, <16 x i16> %b) nounwind alwaysinline {
-  %t = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 17, i32 18, i32 3, i32 20, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 31>
+define <16 x i16> @vpblendw_test1(<16 x i16> %a, <16 x i16> %b) nounwind alwaysinline {
+  %t = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 17, i32 18, i32 3,  i32 20, i32 5,  i32 6,  i32 23, 
+                                                               i32 8, i32 25, i32 26, i32 11, i32 28, i32 13, i32 14, i32 31>
   ret <16 x i16> %t
 }
 
+; CHECK: vpblendw_test2
+; mask1 = 00010110 = 22
+; mask2 = 10000000 = 128
+; CHECK: vpblendw  $128, %xmm
+; CHECK: vpblendw  $22, %xmm
+; CHECK: vinserti128
+; CHECK: ret
+define <16 x i16> @vpblendw_test2(<16 x i16> %a, <16 x i16> %b) nounwind alwaysinline {
+  %t = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 17, i32 18, i32 3, i32 20, i32 5, i32 6, i32 7, 
+                                                               i32 8, i32 9,  i32 10, i32 11, i32 12, i32 13, i32 14, i32 31>
+  ret <16 x i16> %t
+}
+
+; CHECK: blend_test1
+; CHECK: vpblendd
+; CHECK: ret
+define <8 x i32> @blend_test1(<8 x i32> %a, <8 x i32> %b) nounwind alwaysinline {
+  %t = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 9, i32 10, i32 3, i32 12, i32 5, i32 6, i32 7>
+  ret <8 x i32> %t
+}
+
+; CHECK: blend_test2
+; CHECK: vpblendd
+; CHECK: ret
+define <8 x i32> @blend_test2(<8 x i32> %a, <8 x i32> %b) nounwind alwaysinline {
+  %t = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 9, i32 10, i32 3, i32 12, i32 5, i32 6, i32 7>
+  ret <8 x i32> %t
+}
+
+
+; CHECK: blend_test3
+; CHECK: vblendps
+; CHECK: ret
+define <8 x float> @blend_test3(<8 x float> %a, <8 x float> %b) nounwind alwaysinline {
+  %t = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 9, i32 10, i32 3, i32 12, i32 5, i32 6, i32 7>
+  ret <8 x float> %t
+}
+
+; CHECK: blend_test4
+; CHECK: vblendpd
+; CHECK: ret
+define <4 x i64> @blend_test4(<4 x i64> %a, <4 x i64> %b) nounwind alwaysinline {
+  %t = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 0, i32 5, i32 6, i32 3>
+  ret <4 x i64> %t
+}
+
 ; CHECK: vpshufhw $27, %ymm
 define <16 x i16> @vpshufhw(<16 x i16> %src1) nounwind uwtable readnone ssp {
 entry:
diff --git a/test/CodeGen/X86/blend-msb.ll b/test/CodeGen/X86/blend-msb.ll
index 11f811f8cf63..e565da74a082 100644
--- a/test/CodeGen/X86/blend-msb.ll
+++ b/test/CodeGen/X86/blend-msb.ll
@@ -5,7 +5,8 @@
 ; shifting the needed bit to the MSB, and not using shl+sra.
 
 ;CHECK: vsel_float
-;CHECK: pslld
+;CHECK: movl $-2147483648
+;CHECK-NEXT: movd
 ;CHECK-NEXT: blendvps
 ;CHECK: ret
 define <4 x float> @vsel_float(<4 x float> %v1, <4 x float> %v2) {
@@ -14,7 +15,8 @@ define <4 x float> @vsel_float(<4 x float> %v1, <4 x float> %v2) {
 }
 
 ;CHECK: vsel_4xi8
-;CHECK: pslld
+;CHECK: movl $-2147483648
+;CHECK-NEXT: movd
 ;CHECK-NEXT: blendvps
 ;CHECK: ret
 define <4 x i8> @vsel_4xi8(<4 x i8> %v1, <4 x i8> %v2) {
diff --git a/test/CodeGen/X86/bmi.ll b/test/CodeGen/X86/bmi.ll
index 43c47c0fa8a5..b89e648c52d9 100644
--- a/test/CodeGen/X86/bmi.ll
+++ b/test/CodeGen/X86/bmi.ll
@@ -26,6 +26,14 @@ define i32 @t3(i32 %x) nounwind  {
 ; CHECK: tzcntl
 }
 
+define i32 @tzcnt32_load(i32* %x) nounwind  {
+  %x1 = load i32* %x
+  %tmp = tail call i32 @llvm.cttz.i32(i32 %x1, i1 false )
+  ret i32 %tmp
+; CHECK: tzcnt32_load:
+; CHECK: tzcntl ({{.*}})
+}
+
 define i64 @t4(i64 %x) nounwind  {
   %tmp = tail call i64 @llvm.cttz.i64( i64 %x, i1 false )
   ret i64 %tmp
@@ -69,6 +77,15 @@ define i32 @andn32(i32 %x, i32 %y) nounwind readnone {
 ; CHECK: andnl
 }
 
+define i32 @andn32_load(i32 %x, i32* %y) nounwind readnone {
+  %y1 = load i32* %y
+  %tmp1 = xor i32 %x, -1
+  %tmp2 = and i32 %y1, %tmp1
+  ret i32 %tmp2
+; CHECK: andn32_load:
+; CHECK: andnl ({{.*}})
+}
+
 define i64 @andn64(i64 %x, i64 %y) nounwind readnone {
   %tmp1 = xor i64 %x, -1
   %tmp2 = and i64 %tmp1, %y
@@ -84,6 +101,14 @@ define i32 @bextr32(i32 %x, i32 %y) nounwind readnone {
 ; CHECK: bextrl
 }
 
+define i32 @bextr32_load(i32* %x, i32 %y) nounwind readnone {
+  %x1 = load i32* %x
+  %tmp = tail call i32 @llvm.x86.bmi.bextr.32(i32 %x1, i32 %y)
+  ret i32 %tmp
+; CHECK: bextr32_load:
+; CHECK: bextrl {{.*}}, ({{.*}}), {{.*}}
+}
+
 declare i32 @llvm.x86.bmi.bextr.32(i32, i32) nounwind readnone
 
 define i64 @bextr64(i64 %x, i64 %y) nounwind readnone {
@@ -102,6 +127,14 @@ define i32 @bzhi32(i32 %x, i32 %y) nounwind readnone {
 ; CHECK: bzhil
 }
 
+define i32 @bzhi32_load(i32* %x, i32 %y) nounwind readnone {
+  %x1 = load i32* %x
+  %tmp = tail call i32 @llvm.x86.bmi.bzhi.32(i32 %x1, i32 %y)
+  ret i32 %tmp
+; CHECK: bzhi32_load:
+; CHECK: bzhil {{.*}}, ({{.*}}), {{.*}}
+}
+
 declare i32 @llvm.x86.bmi.bzhi.32(i32, i32) nounwind readnone
 
 define i64 @bzhi64(i64 %x, i64 %y) nounwind readnone {
@@ -121,6 +154,15 @@ define i32 @blsi32(i32 %x) nounwind readnone {
 ; CHECK: blsil
 }
 
+define i32 @blsi32_load(i32* %x) nounwind readnone {
+  %x1 = load i32* %x
+  %tmp = sub i32 0, %x1
+  %tmp2 = and i32 %x1, %tmp
+  ret i32 %tmp2
+; CHECK: blsi32_load:
+; CHECK: blsil ({{.*}})
+}
+
 define i64 @blsi64(i64 %x) nounwind readnone {
   %tmp = sub i64 0, %x
   %tmp2 = and i64 %tmp, %x
@@ -137,6 +179,15 @@ define i32 @blsmsk32(i32 %x) nounwind readnone {
 ; CHECK: blsmskl
 }
 
+define i32 @blsmsk32_load(i32* %x) nounwind readnone {
+  %x1 = load i32* %x
+  %tmp = sub i32 %x1, 1
+  %tmp2 = xor i32 %x1, %tmp
+  ret i32 %tmp2
+; CHECK: blsmsk32_load:
+; CHECK: blsmskl ({{.*}})
+}
+
 define i64 @blsmsk64(i64 %x) nounwind readnone {
   %tmp = sub i64 %x, 1
   %tmp2 = xor i64 %tmp, %x
@@ -153,6 +204,15 @@ define i32 @blsr32(i32 %x) nounwind readnone {
 ; CHECK: blsrl
 }
 
+define i32 @blsr32_load(i32* %x) nounwind readnone {
+  %x1 = load i32* %x
+  %tmp = sub i32 %x1, 1
+  %tmp2 = and i32 %x1, %tmp
+  ret i32 %tmp2
+; CHECK: blsr32_load:
+; CHECK: blsrl ({{.*}})
+}
+
 define i64 @blsr64(i64 %x) nounwind readnone {
   %tmp = sub i64 %x, 1
   %tmp2 = and i64 %tmp, %x
@@ -168,6 +228,14 @@ define i32 @pdep32(i32 %x, i32 %y) nounwind readnone {
 ; CHECK: pdepl
 }
 
+define i32 @pdep32_load(i32 %x, i32* %y) nounwind readnone {
+  %y1 = load i32* %y
+  %tmp = tail call i32 @llvm.x86.bmi.pdep.32(i32 %x, i32 %y1)
+  ret i32 %tmp
+; CHECK: pdep32_load:
+; CHECK: pdepl ({{.*}})
+}
+
 declare i32 @llvm.x86.bmi.pdep.32(i32, i32) nounwind readnone
 
 define i64 @pdep64(i64 %x, i64 %y) nounwind readnone {
@@ -186,6 +254,14 @@ define i32 @pext32(i32 %x, i32 %y) nounwind readnone {
 ; CHECK: pextl
 }
 
+define i32 @pext32_load(i32 %x, i32* %y) nounwind readnone {
+  %y1 = load i32* %y
+  %tmp = tail call i32 @llvm.x86.bmi.pext.32(i32 %x, i32 %y1)
+  ret i32 %tmp
+; CHECK: pext32_load:
+; CHECK: pextl ({{.*}})
+}
+
 declare i32 @llvm.x86.bmi.pext.32(i32, i32) nounwind readnone
 
 define i64 @pext64(i64 %x, i64 %y) nounwind readnone {
diff --git a/test/CodeGen/X86/bool-simplify.ll b/test/CodeGen/X86/bool-simplify.ll
index 09eb5d1038f7..fa6f6e85e9b8 100644
--- a/test/CodeGen/X86/bool-simplify.ll
+++ b/test/CodeGen/X86/bool-simplify.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=x86-64 -mattr=+sse41,-avx,+rdrand | FileCheck %s
+; RUN: llc < %s -march=x86-64 -mattr=+sse41,-avx,+rdrand,+rdseed | FileCheck %s
 
 define i32 @foo(<2 x i64> %c, i32 %a, i32 %b) {
   %t1 = call i32 @llvm.x86.sse41.ptestz(<2 x i64> %c, <2 x i64> %c)
@@ -39,7 +39,22 @@ define i32 @bax(<2 x i64> %c) {
 ; CHECK: ret
 }
 
-define i32 @rnd(i32 %arg) nounwind uwtable {
+define i16 @rnd16(i16 %arg) nounwind uwtable {
+  %1 = tail call { i16, i32 } @llvm.x86.rdrand.16() nounwind
+  %2 = extractvalue { i16, i32 } %1, 0
+  %3 = extractvalue { i16, i32 } %1, 1
+  %4 = icmp eq i32 %3, 0
+  %5 = select i1 %4, i16 0, i16 %arg
+  %6 = add i16 %5, %2
+  ret i16 %6
+; CHECK: rnd16
+; CHECK: rdrand
+; CHECK: cmov
+; CHECK-NOT: cmov
+; CHECK: ret
+}
+
+define i32 @rnd32(i32 %arg) nounwind uwtable {
   %1 = tail call { i32, i32 } @llvm.x86.rdrand.32() nounwind
   %2 = extractvalue { i32, i32 } %1, 0
   %3 = extractvalue { i32, i32 } %1, 1
@@ -47,12 +62,77 @@ define i32 @rnd(i32 %arg) nounwind uwtable {
   %5 = select i1 %4, i32 0, i32 %arg
   %6 = add i32 %5, %2
   ret i32 %6
-; CHECK: rnd
+; CHECK: rnd32
 ; CHECK: rdrand
 ; CHECK: cmov
 ; CHECK-NOT: cmov
 ; CHECK: ret
 }
 
+define i64 @rnd64(i64 %arg) nounwind uwtable {
+  %1 = tail call { i64, i32 } @llvm.x86.rdrand.64() nounwind
+  %2 = extractvalue { i64, i32 } %1, 0
+  %3 = extractvalue { i64, i32 } %1, 1
+  %4 = icmp eq i32 %3, 0
+  %5 = select i1 %4, i64 0, i64 %arg
+  %6 = add i64 %5, %2
+  ret i64 %6
+; CHECK: rnd64
+; CHECK: rdrand
+; CHECK: cmov
+; CHECK-NOT: cmov
+; CHECK: ret
+}
+
+define i16 @seed16(i16 %arg) nounwind uwtable {
+  %1 = tail call { i16, i32 } @llvm.x86.rdseed.16() nounwind
+  %2 = extractvalue { i16, i32 } %1, 0
+  %3 = extractvalue { i16, i32 } %1, 1
+  %4 = icmp eq i32 %3, 0
+  %5 = select i1 %4, i16 0, i16 %arg
+  %6 = add i16 %5, %2
+  ret i16 %6
+; CHECK: seed16
+; CHECK: rdseed
+; CHECK: cmov
+; CHECK-NOT: cmov
+; CHECK: ret
+}
+
+define i32 @seed32(i32 %arg) nounwind uwtable {
+  %1 = tail call { i32, i32 } @llvm.x86.rdseed.32() nounwind
+  %2 = extractvalue { i32, i32 } %1, 0
+  %3 = extractvalue { i32, i32 } %1, 1
+  %4 = icmp eq i32 %3, 0
+  %5 = select i1 %4, i32 0, i32 %arg
+  %6 = add i32 %5, %2
+  ret i32 %6
+; CHECK: seed32
+; CHECK: rdseed
+; CHECK: cmov
+; CHECK-NOT: cmov
+; CHECK: ret
+}
+
+define i64 @seed64(i64 %arg) nounwind uwtable {
+  %1 = tail call { i64, i32 } @llvm.x86.rdseed.64() nounwind
+  %2 = extractvalue { i64, i32 } %1, 0
+  %3 = extractvalue { i64, i32 } %1, 1
+  %4 = icmp eq i32 %3, 0
+  %5 = select i1 %4, i64 0, i64 %arg
+  %6 = add i64 %5, %2
+  ret i64 %6
+; CHECK: seed64
+; CHECK: rdseed
+; CHECK: cmov
+; CHECK-NOT: cmov
+; CHECK: ret
+}
+
 declare i32 @llvm.x86.sse41.ptestz(<2 x i64>, <2 x i64>) nounwind readnone
+declare { i16, i32 } @llvm.x86.rdrand.16() nounwind
 declare { i32, i32 } @llvm.x86.rdrand.32() nounwind
+declare { i64, i32 } @llvm.x86.rdrand.64() nounwind
+declare { i16, i32 } @llvm.x86.rdseed.16() nounwind
+declare { i32, i32 } @llvm.x86.rdseed.32() nounwind
+declare { i64, i32 } @llvm.x86.rdseed.64() nounwind
diff --git a/test/CodeGen/X86/bt.ll b/test/CodeGen/X86/bt.ll
index ec447e5e9c81..39a784dec37d 100644
--- a/test/CodeGen/X86/bt.ll
+++ b/test/CodeGen/X86/bt.ll
@@ -1,6 +1,4 @@
-; RUN: llc < %s -march=x86 | grep btl | count 28
-; RUN: llc < %s -march=x86 -mcpu=pentium4 | grep btl | not grep esp
-; RUN: llc < %s -march=x86 -mcpu=penryn   | grep btl | not grep esp
+; RUN: llc < %s -mtriple=i386-apple-macosx -mcpu=penryn | FileCheck %s
 ; PR3253
 
 ; The register+memory form of the BT instruction should be usable on
@@ -21,6 +19,9 @@
 
 define void @test2(i32 %x, i32 %n) nounwind {
 entry:
+; CHECK: test2
+; CHECK: btl %eax, %ecx
+; CHECK: jb
 	%tmp29 = lshr i32 %x, %n		; <i32> [#uses=1]
 	%tmp3 = and i32 %tmp29, 1		; <i32> [#uses=1]
 	%tmp4 = icmp eq i32 %tmp3, 0		; <i1> [#uses=1]
@@ -36,6 +37,9 @@ UnifiedReturnBlock:		; preds = %entry
 
 define void @test2b(i32 %x, i32 %n) nounwind {
 entry:
+; CHECK: test2b
+; CHECK: btl %eax, %ecx
+; CHECK: jb
 	%tmp29 = lshr i32 %x, %n		; <i32> [#uses=1]
 	%tmp3 = and i32 1, %tmp29
 	%tmp4 = icmp eq i32 %tmp3, 0		; <i1> [#uses=1]
@@ -51,6 +55,9 @@ UnifiedReturnBlock:		; preds = %entry
 
 define void @atest2(i32 %x, i32 %n) nounwind {
 entry:
+; CHECK: atest2
+; CHECK: btl %eax, %ecx
+; CHECK: jb
 	%tmp29 = ashr i32 %x, %n		; <i32> [#uses=1]
 	%tmp3 = and i32 %tmp29, 1		; <i32> [#uses=1]
 	%tmp4 = icmp eq i32 %tmp3, 0		; <i1> [#uses=1]
@@ -66,6 +73,8 @@ UnifiedReturnBlock:		; preds = %entry
 
 define void @atest2b(i32 %x, i32 %n) nounwind {
 entry:
+; CHECK: atest2b
+; CHECK: btl %eax, %ecx
 	%tmp29 = ashr i32 %x, %n		; <i32> [#uses=1]
 	%tmp3 = and i32 1, %tmp29
 	%tmp4 = icmp eq i32 %tmp3, 0		; <i1> [#uses=1]
@@ -81,6 +90,9 @@ UnifiedReturnBlock:		; preds = %entry
 
 define void @test3(i32 %x, i32 %n) nounwind {
 entry:
+; CHECK: test3
+; CHECK: btl %eax, %ecx
+; CHECK: jb
 	%tmp29 = shl i32 1, %n		; <i32> [#uses=1]
 	%tmp3 = and i32 %tmp29, %x		; <i32> [#uses=1]
 	%tmp4 = icmp eq i32 %tmp3, 0		; <i1> [#uses=1]
@@ -96,6 +108,9 @@ UnifiedReturnBlock:		; preds = %entry
 
 define void @test3b(i32 %x, i32 %n) nounwind {
 entry:
+; CHECK: test3b
+; CHECK: btl %eax, %ecx
+; CHECK: jb
 	%tmp29 = shl i32 1, %n		; <i32> [#uses=1]
 	%tmp3 = and i32 %x, %tmp29
 	%tmp4 = icmp eq i32 %tmp3, 0		; <i1> [#uses=1]
@@ -111,6 +126,9 @@ UnifiedReturnBlock:		; preds = %entry
 
 define void @testne2(i32 %x, i32 %n) nounwind {
 entry:
+; CHECK: testne2
+; CHECK: btl %eax, %ecx
+; CHECK: jae
 	%tmp29 = lshr i32 %x, %n		; <i32> [#uses=1]
 	%tmp3 = and i32 %tmp29, 1		; <i32> [#uses=1]
 	%tmp4 = icmp ne i32 %tmp3, 0		; <i1> [#uses=1]
@@ -126,6 +144,9 @@ UnifiedReturnBlock:		; preds = %entry
 
 define void @testne2b(i32 %x, i32 %n) nounwind {
 entry:
+; CHECK: testne2b
+; CHECK: btl %eax, %ecx
+; CHECK: jae
 	%tmp29 = lshr i32 %x, %n		; <i32> [#uses=1]
 	%tmp3 = and i32 1, %tmp29
 	%tmp4 = icmp ne i32 %tmp3, 0		; <i1> [#uses=1]
@@ -141,6 +162,9 @@ UnifiedReturnBlock:		; preds = %entry
 
 define void @atestne2(i32 %x, i32 %n) nounwind {
 entry:
+; CHECK: atestne2
+; CHECK: btl %eax, %ecx
+; CHECK: jae
 	%tmp29 = ashr i32 %x, %n		; <i32> [#uses=1]
 	%tmp3 = and i32 %tmp29, 1		; <i32> [#uses=1]
 	%tmp4 = icmp ne i32 %tmp3, 0		; <i1> [#uses=1]
@@ -156,6 +180,9 @@ UnifiedReturnBlock:		; preds = %entry
 
 define void @atestne2b(i32 %x, i32 %n) nounwind {
 entry:
+; CHECK: atestne2b
+; CHECK: btl %eax, %ecx
+; CHECK: jae
 	%tmp29 = ashr i32 %x, %n		; <i32> [#uses=1]
 	%tmp3 = and i32 1, %tmp29
 	%tmp4 = icmp ne i32 %tmp3, 0		; <i1> [#uses=1]
@@ -171,6 +198,9 @@ UnifiedReturnBlock:		; preds = %entry
 
 define void @testne3(i32 %x, i32 %n) nounwind {
 entry:
+; CHECK: testne3
+; CHECK: btl %eax, %ecx
+; CHECK: jae
 	%tmp29 = shl i32 1, %n		; <i32> [#uses=1]
 	%tmp3 = and i32 %tmp29, %x		; <i32> [#uses=1]
 	%tmp4 = icmp ne i32 %tmp3, 0		; <i1> [#uses=1]
@@ -186,6 +216,9 @@ UnifiedReturnBlock:		; preds = %entry
 
 define void @testne3b(i32 %x, i32 %n) nounwind {
 entry:
+; CHECK: testne3b
+; CHECK: btl %eax, %ecx
+; CHECK: jae
 	%tmp29 = shl i32 1, %n		; <i32> [#uses=1]
 	%tmp3 = and i32 %x, %tmp29
 	%tmp4 = icmp ne i32 %tmp3, 0		; <i1> [#uses=1]
@@ -201,6 +234,9 @@ UnifiedReturnBlock:		; preds = %entry
 
 define void @query2(i32 %x, i32 %n) nounwind {
 entry:
+; CHECK: query2
+; CHECK: btl %eax, %ecx
+; CHECK: jae
 	%tmp29 = lshr i32 %x, %n		; <i32> [#uses=1]
 	%tmp3 = and i32 %tmp29, 1		; <i32> [#uses=1]
 	%tmp4 = icmp eq i32 %tmp3, 1		; <i1> [#uses=1]
@@ -216,6 +252,9 @@ UnifiedReturnBlock:		; preds = %entry
 
 define void @query2b(i32 %x, i32 %n) nounwind {
 entry:
+; CHECK: query2b
+; CHECK: btl %eax, %ecx
+; CHECK: jae
 	%tmp29 = lshr i32 %x, %n		; <i32> [#uses=1]
 	%tmp3 = and i32 1, %tmp29
 	%tmp4 = icmp eq i32 %tmp3, 1		; <i1> [#uses=1]
@@ -231,6 +270,9 @@ UnifiedReturnBlock:		; preds = %entry
 
 define void @aquery2(i32 %x, i32 %n) nounwind {
 entry:
+; CHECK: aquery2
+; CHECK: btl %eax, %ecx
+; CHECK: jae
 	%tmp29 = ashr i32 %x, %n		; <i32> [#uses=1]
 	%tmp3 = and i32 %tmp29, 1		; <i32> [#uses=1]
 	%tmp4 = icmp eq i32 %tmp3, 1		; <i1> [#uses=1]
@@ -246,6 +288,9 @@ UnifiedReturnBlock:		; preds = %entry
 
 define void @aquery2b(i32 %x, i32 %n) nounwind {
 entry:
+; CHECK: aquery2b
+; CHECK: btl %eax, %ecx
+; CHECK: jae
 	%tmp29 = ashr i32 %x, %n		; <i32> [#uses=1]
 	%tmp3 = and i32 1, %tmp29
 	%tmp4 = icmp eq i32 %tmp3, 1		; <i1> [#uses=1]
@@ -261,6 +306,9 @@ UnifiedReturnBlock:		; preds = %entry
 
 define void @query3(i32 %x, i32 %n) nounwind {
 entry:
+; CHECK: query3
+; CHECK: btl %eax, %ecx
+; CHECK: jae
 	%tmp29 = shl i32 1, %n		; <i32> [#uses=1]
 	%tmp3 = and i32 %tmp29, %x		; <i32> [#uses=1]
 	%tmp4 = icmp eq i32 %tmp3, %tmp29		; <i1> [#uses=1]
@@ -276,6 +324,9 @@ UnifiedReturnBlock:		; preds = %entry
 
 define void @query3b(i32 %x, i32 %n) nounwind {
 entry:
+; CHECK: query3b
+; CHECK: btl %eax, %ecx
+; CHECK: jae
 	%tmp29 = shl i32 1, %n		; <i32> [#uses=1]
 	%tmp3 = and i32 %x, %tmp29
 	%tmp4 = icmp eq i32 %tmp3, %tmp29		; <i1> [#uses=1]
@@ -291,6 +342,9 @@ UnifiedReturnBlock:		; preds = %entry
 
 define void @query3x(i32 %x, i32 %n) nounwind {
 entry:
+; CHECK: query3x
+; CHECK: btl %eax, %ecx
+; CHECK: jae
 	%tmp29 = shl i32 1, %n		; <i32> [#uses=1]
 	%tmp3 = and i32 %tmp29, %x		; <i32> [#uses=1]
 	%tmp4 = icmp eq i32 %tmp29, %tmp3		; <i1> [#uses=1]
@@ -306,6 +360,9 @@ UnifiedReturnBlock:		; preds = %entry
 
 define void @query3bx(i32 %x, i32 %n) nounwind {
 entry:
+; CHECK: query3bx
+; CHECK: btl %eax, %ecx
+; CHECK: jae
 	%tmp29 = shl i32 1, %n		; <i32> [#uses=1]
 	%tmp3 = and i32 %x, %tmp29
 	%tmp4 = icmp eq i32 %tmp29, %tmp3		; <i1> [#uses=1]
@@ -321,6 +378,9 @@ UnifiedReturnBlock:		; preds = %entry
 
 define void @queryne2(i32 %x, i32 %n) nounwind {
 entry:
+; CHECK: queryne2
+; CHECK: btl %eax, %ecx
+; CHECK: jb
 	%tmp29 = lshr i32 %x, %n		; <i32> [#uses=1]
 	%tmp3 = and i32 %tmp29, 1		; <i32> [#uses=1]
 	%tmp4 = icmp ne i32 %tmp3, 1		; <i1> [#uses=1]
@@ -336,6 +396,9 @@ UnifiedReturnBlock:		; preds = %entry
 
 define void @queryne2b(i32 %x, i32 %n) nounwind {
 entry:
+; CHECK: queryne2b
+; CHECK: btl %eax, %ecx
+; CHECK: jb
 	%tmp29 = lshr i32 %x, %n		; <i32> [#uses=1]
 	%tmp3 = and i32 1, %tmp29
 	%tmp4 = icmp ne i32 %tmp3, 1		; <i1> [#uses=1]
@@ -351,6 +414,9 @@ UnifiedReturnBlock:		; preds = %entry
 
 define void @aqueryne2(i32 %x, i32 %n) nounwind {
 entry:
+; CHECK: aqueryne2
+; CHECK: btl %eax, %ecx
+; CHECK: jb
 	%tmp29 = ashr i32 %x, %n		; <i32> [#uses=1]
 	%tmp3 = and i32 %tmp29, 1		; <i32> [#uses=1]
 	%tmp4 = icmp ne i32 %tmp3, 1		; <i1> [#uses=1]
@@ -366,6 +432,9 @@ UnifiedReturnBlock:		; preds = %entry
 
 define void @aqueryne2b(i32 %x, i32 %n) nounwind {
 entry:
+; CHECK: aqueryne2b
+; CHECK: btl %eax, %ecx
+; CHECK: jb
 	%tmp29 = ashr i32 %x, %n		; <i32> [#uses=1]
 	%tmp3 = and i32 1, %tmp29
 	%tmp4 = icmp ne i32 %tmp3, 1		; <i1> [#uses=1]
@@ -381,6 +450,9 @@ UnifiedReturnBlock:		; preds = %entry
 
 define void @queryne3(i32 %x, i32 %n) nounwind {
 entry:
+; CHECK: queryne3
+; CHECK: btl %eax, %ecx
+; CHECK: jb
 	%tmp29 = shl i32 1, %n		; <i32> [#uses=1]
 	%tmp3 = and i32 %tmp29, %x		; <i32> [#uses=1]
 	%tmp4 = icmp ne i32 %tmp3, %tmp29		; <i1> [#uses=1]
@@ -396,6 +468,9 @@ UnifiedReturnBlock:		; preds = %entry
 
 define void @queryne3b(i32 %x, i32 %n) nounwind {
 entry:
+; CHECK: queryne3b
+; CHECK: btl %eax, %ecx
+; CHECK: jb
 	%tmp29 = shl i32 1, %n		; <i32> [#uses=1]
 	%tmp3 = and i32 %x, %tmp29
 	%tmp4 = icmp ne i32 %tmp3, %tmp29		; <i1> [#uses=1]
@@ -411,6 +486,9 @@ UnifiedReturnBlock:		; preds = %entry
 
 define void @queryne3x(i32 %x, i32 %n) nounwind {
 entry:
+; CHECK: queryne3x
+; CHECK: btl %eax, %ecx
+; CHECK: jb
 	%tmp29 = shl i32 1, %n		; <i32> [#uses=1]
 	%tmp3 = and i32 %tmp29, %x		; <i32> [#uses=1]
 	%tmp4 = icmp ne i32 %tmp29, %tmp3		; <i1> [#uses=1]
@@ -426,6 +504,9 @@ UnifiedReturnBlock:		; preds = %entry
 
 define void @queryne3bx(i32 %x, i32 %n) nounwind {
 entry:
+; CHECK: queryne3bx
+; CHECK: btl %eax, %ecx
+; CHECK: jb
 	%tmp29 = shl i32 1, %n		; <i32> [#uses=1]
 	%tmp3 = and i32 %x, %tmp29
 	%tmp4 = icmp ne i32 %tmp29, %tmp3		; <i1> [#uses=1]
@@ -440,3 +521,16 @@ UnifiedReturnBlock:		; preds = %entry
 }
 
 declare void @foo()
+
+; rdar://12755626
+define zeroext i1 @invert(i32 %flags, i32 %flag) nounwind {
+; CHECK: invert
+; CHECK: btl %eax, %ecx
+; CHECK: setae
+entry:
+  %neg = xor i32 %flags, -1
+  %shl = shl i32 1, %flag
+  %and = and i32 %shl, %neg
+  %tobool = icmp ne i32 %and, 0
+  ret i1 %tobool
+}
diff --git a/test/CodeGen/X86/byval2.ll b/test/CodeGen/X86/byval2.ll
index 196efe58e6f3..c5187db6de4b 100644
--- a/test/CodeGen/X86/byval2.ll
+++ b/test/CodeGen/X86/byval2.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -mtriple=x86_64-linux | FileCheck %s -check-prefix=X64
+; RUN: llc < %s -mtriple=x86_64-linux -mattr=-avx | FileCheck %s -check-prefix=X64
 ; X64-NOT:     movsq
 ; X64:     rep
 ; X64-NOT:     rep
@@ -12,7 +12,7 @@
 
 ; Win64 has not supported byval yet.
 
-; RUN: llc < %s -march=x86 | FileCheck %s -check-prefix=X32
+; RUN: llc < %s -march=x86 -mattr=-avx | FileCheck %s -check-prefix=X32
 ; X32-NOT:     movsl
 ; X32:     rep
 ; X32-NOT:     rep
diff --git a/test/CodeGen/X86/byval3.ll b/test/CodeGen/X86/byval3.ll
index f3b125c6e3ba..d06fd8898e7f 100644
--- a/test/CodeGen/X86/byval3.ll
+++ b/test/CodeGen/X86/byval3.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -mtriple=x86_64-linux | FileCheck %s -check-prefix=X64
+; RUN: llc < %s -mtriple=x86_64-linux -mattr=-avx | FileCheck %s -check-prefix=X64
 ; X64-NOT:     movsq
 ; X64:     rep
 ; X64-NOT:     rep
@@ -12,7 +12,7 @@
 
 ; Win64 has not supported byval yet.
 
-; RUN: llc < %s -march=x86 | FileCheck %s -check-prefix=X32
+; RUN: llc < %s -march=x86 -mattr=-avx | FileCheck %s -check-prefix=X32
 ; X32-NOT:     movsl
 ; X32:     rep
 ; X32-NOT:     rep
diff --git a/test/CodeGen/X86/byval4.ll b/test/CodeGen/X86/byval4.ll
index b7a4aa3f9b01..4711e4511112 100644
--- a/test/CodeGen/X86/byval4.ll
+++ b/test/CodeGen/X86/byval4.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -mtriple=x86_64-linux | FileCheck %s -check-prefix=X64
+; RUN: llc < %s -mtriple=x86_64-linux -mattr=-avx | FileCheck %s -check-prefix=X64
 ; X64-NOT:     movsq
 ; X64:     rep
 ; X64-NOT:     rep
@@ -12,7 +12,7 @@
 
 ; Win64 has not supported byval yet.
 
-; RUN: llc < %s -march=x86 | FileCheck %s -check-prefix=X32
+; RUN: llc < %s -march=x86 -mattr=-avx | FileCheck %s -check-prefix=X32
 ; X32-NOT:     movsl
 ; X32:     rep
 ; X32-NOT:     rep
diff --git a/test/CodeGen/X86/byval5.ll b/test/CodeGen/X86/byval5.ll
index dca093602241..f24a5f9aa3b4 100644
--- a/test/CodeGen/X86/byval5.ll
+++ b/test/CodeGen/X86/byval5.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -mtriple=x86_64-linux | FileCheck %s -check-prefix=X64
+; RUN: llc < %s -mtriple=x86_64-linux -mattr=-avx | FileCheck %s -check-prefix=X64
 ; X64-NOT:     movsq
 ; X64:     rep
 ; X64-NOT:     rep
@@ -12,7 +12,7 @@
 
 ; Win64 has not supported byval yet.
 
-; RUN: llc < %s -march=x86 | FileCheck %s -check-prefix=X32
+; RUN: llc < %s -march=x86 -mattr=-avx | FileCheck %s -check-prefix=X32
 ; X32-NOT:     movsl
 ; X32:     rep
 ; X32-NOT:     rep
diff --git a/test/CodeGen/X86/cas.ll b/test/CodeGen/X86/cas.ll
new file mode 100644
index 000000000000..c2dd05ef7302
--- /dev/null
+++ b/test/CodeGen/X86/cas.ll
@@ -0,0 +1,73 @@
+; RUN: llc -mtriple=x86_64-pc-linux-gnu %s -o - | FileCheck %s
+
+; C code this came from
+;bool cas(float volatile *p, float *expected, float desired) {
+;  bool success;
+;  __asm__ __volatile__("lock; cmpxchg %[desired], %[mem]; "
+;                       "mov %[expected], %[expected_out]; "
+;                       "sete %[success]"
+;                       : [success] "=a" (success),
+;                         [expected_out] "=rm" (*expected)
+;                       : [expected] "a" (*expected),
+;                         [desired] "q" (desired),
+;                         [mem] "m" (*p)
+;                       : "memory", "cc");
+;  return success;
+;}
+
+define zeroext i1 @cas(float* %p, float* %expected, float %desired) nounwind {
+entry:
+  %p.addr = alloca float*, align 8
+  %expected.addr = alloca float*, align 8
+  %desired.addr = alloca float, align 4
+  %success = alloca i8, align 1
+  store float* %p, float** %p.addr, align 8
+  store float* %expected, float** %expected.addr, align 8
+  store float %desired, float* %desired.addr, align 4
+  %0 = load float** %expected.addr, align 8
+  %1 = load float** %expected.addr, align 8
+  %2 = load float* %1, align 4
+  %3 = load float* %desired.addr, align 4
+  %4 = load float** %p.addr, align 8
+  %5 = call i8 asm sideeffect "lock; cmpxchg $3, $4; mov $2, $1; sete $0", "={ax},=*rm,{ax},q,*m,~{memory},~{cc},~{dirflag},~{fpsr},~{flags}"(float* %0, float %2, float %3, float* %4) nounwind
+  store i8 %5, i8* %success, align 1
+  %6 = load i8* %success, align 1
+  %tobool = trunc i8 %6 to i1
+  ret i1 %tobool
+}
+
+; CHECK: @cas
+; Make sure we're emitting a move from eax.
+; CHECK: #APP
+; CHECK-NEXT: lock;{{.*}}mov %eax,{{.*}}
+; CHECK-NEXT: #NO_APP
+
+define zeroext i1 @cas2(i8* %p, i8* %expected, i1 zeroext %desired) nounwind {
+entry:
+  %p.addr = alloca i8*, align 8
+  %expected.addr = alloca i8*, align 8
+  %desired.addr = alloca i8, align 1
+  %success = alloca i8, align 1
+  store i8* %p, i8** %p.addr, align 8
+  store i8* %expected, i8** %expected.addr, align 8
+  %frombool = zext i1 %desired to i8
+  store i8 %frombool, i8* %desired.addr, align 1
+  %0 = load i8** %expected.addr, align 8
+  %1 = load i8** %expected.addr, align 8
+  %2 = load i8* %1, align 1
+  %tobool = trunc i8 %2 to i1
+  %3 = load i8* %desired.addr, align 1
+  %tobool1 = trunc i8 %3 to i1
+  %4 = load i8** %p.addr, align 8
+  %5 = call i8 asm sideeffect "lock; cmpxchg $3, $4; mov $2, $1; sete $0", "={ax},=*rm,{ax},q,*m,~{memory},~{cc},~{dirflag},~{fpsr},~{flags}"(i8* %0, i1 %tobool, i1 %tobool1, i8* %4) nounwind
+  store i8 %5, i8* %success, align 1
+  %6 = load i8* %success, align 1
+  %tobool2 = trunc i8 %6 to i1
+  ret i1 %tobool2
+}
+
+; CHECK: @cas2
+; Make sure we're emitting a move from %al here.
+; CHECK: #APP
+; CHECK-NEXT: lock;{{.*}}mov %al,{{.*}}
+; CHECK-NEXT: #NO_APP
diff --git a/test/CodeGen/X86/clobber-fi0.ll b/test/CodeGen/X86/clobber-fi0.ll
new file mode 100644
index 000000000000..38a42dbf1aa1
--- /dev/null
+++ b/test/CodeGen/X86/clobber-fi0.ll
@@ -0,0 +1,37 @@
+; RUN: llc < %s -mcpu=generic -mtriple=x86_64-linux | FileCheck %s
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-apple-macosx10.7.0"
+
+; In the code below we need to copy the EFLAGS because of scheduling constraints.
+; When copying the EFLAGS we need to write to the stack with push/pop. This forces
+; us to emit the prolog.
+
+; CHECK: main
+; CHECK: subq{{.*}}rsp
+; CHECK: ret
+define i32 @main(i32 %arg, i8** %arg1) nounwind {
+bb:
+  %tmp = alloca i32, align 4                      ; [#uses=3 type=i32*]
+  %tmp2 = alloca i32, align 4                     ; [#uses=3 type=i32*]
+  %tmp3 = alloca i32                              ; [#uses=1 type=i32*]
+  store i32 1, i32* %tmp, align 4
+  store i32 1, i32* %tmp2, align 4
+  br label %bb4
+
+bb4:                                              ; preds = %bb4, %bb
+  %tmp6 = load i32* %tmp2, align 4                ; [#uses=1 type=i32]
+  %tmp7 = add i32 %tmp6, -1                       ; [#uses=2 type=i32]
+  store i32 %tmp7, i32* %tmp2, align 4
+  %tmp8 = icmp eq i32 %tmp7, 0                    ; [#uses=1 type=i1]
+  %tmp9 = load i32* %tmp                          ; [#uses=1 type=i32]
+  %tmp10 = add i32 %tmp9, -1              ; [#uses=1 type=i32]
+  store i32 %tmp10, i32* %tmp3
+  br i1 %tmp8, label %bb11, label %bb4
+
+bb11:                                             ; preds = %bb4
+  %tmp12 = load i32* %tmp, align 4                ; [#uses=1 type=i32]
+  ret i32 %tmp12
+}
+
+
diff --git a/test/CodeGen/X86/cmp.ll b/test/CodeGen/X86/cmp.ll
index eb06327f55a6..1855fe2fb89e 100644
--- a/test/CodeGen/X86/cmp.ll
+++ b/test/CodeGen/X86/cmp.ll
@@ -151,3 +151,18 @@ entry:
   %conv = zext i1 %cmp to i32
   ret i32 %conv
 }
+
+define i32 @test12() uwtable ssp {
+; CHECK: test12:
+; CHECK: testb
+  %1 = call zeroext i1 @test12b()
+  br i1 %1, label %2, label %3
+
+; <label>:2                                       ; preds = %0
+  ret i32 1
+
+; <label>:3                                       ; preds = %0
+  ret i32 2
+}
+
+declare zeroext i1 @test12b()
diff --git a/test/CodeGen/X86/coalesce-implicitdef.ll b/test/CodeGen/X86/coalesce-implicitdef.ll
new file mode 100644
index 000000000000..19cd08cf3797
--- /dev/null
+++ b/test/CodeGen/X86/coalesce-implicitdef.ll
@@ -0,0 +1,130 @@
+; RUN: llc < %s -verify-coalescing
+; PR14732
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-apple-macosx10"
+
+@c = common global i32 0, align 4
+@b = common global i32 0, align 4
+@a = common global i32 0, align 4
+@d = common global i32 0, align 4
+
+; This function creates an IMPLICIT_DEF with a long live range, even after
+; ProcessImplicitDefs.
+;
+; The coalescer should be able to deal with all kinds of IMPLICIT_DEF live
+; ranges, even if they are not common.
+
+define void @f() nounwind uwtable ssp {
+entry:
+  %i = alloca i32, align 4
+  br label %for.cond
+
+for.cond:                                         ; preds = %for.inc34, %entry
+  %i.0.load44 = phi i32 [ %inc35, %for.inc34 ], [ undef, %entry ]
+  %pi.0 = phi i32* [ %pi.4, %for.inc34 ], [ undef, %entry ]
+  %tobool = icmp eq i32 %i.0.load44, 0
+  br i1 %tobool, label %for.end36, label %for.body
+
+for.body:                                         ; preds = %for.cond
+  store i32 0, i32* @c, align 4, !tbaa !0
+  br label %for.body2
+
+for.body2:                                        ; preds = %for.body, %for.inc
+  %i.0.load45 = phi i32 [ %i.0.load44, %for.body ], [ 0, %for.inc ]
+  %tobool3 = icmp eq i32 %i.0.load45, 0
+  br i1 %tobool3, label %if.then10, label %if.then
+
+if.then:                                          ; preds = %for.body2
+  store i32 0, i32* %i, align 4, !tbaa !0
+  br label %for.body6
+
+for.body6:                                        ; preds = %if.then, %for.body6
+  store i32 0, i32* %i, align 4
+  br i1 true, label %for.body6, label %for.inc
+
+if.then10:                                        ; preds = %for.body2
+  store i32 1, i32* @b, align 4, !tbaa !0
+  ret void
+
+for.inc:                                          ; preds = %for.body6
+  br i1 undef, label %for.body2, label %if.end30
+
+while.condthread-pre-split:                       ; preds = %label.loopexit, %while.condthread-pre-split.lr.ph.lr.ph, %for.inc27.backedge
+  %0 = phi i32 [ %inc28, %for.inc27.backedge ], [ %inc285863, %while.condthread-pre-split.lr.ph.lr.ph ], [ %inc2858, %label.loopexit ]
+  %inc2060 = phi i32 [ %inc20, %for.inc27.backedge ], [ %a.promoted.pre, %while.condthread-pre-split.lr.ph.lr.ph ], [ %inc20, %label.loopexit ]
+  br label %while.cond
+
+while.cond:                                       ; preds = %while.condthread-pre-split, %while.cond
+  %p2.1.in = phi i32* [ %pi.3.ph, %while.cond ], [ %i, %while.condthread-pre-split ]
+  %p2.1 = bitcast i32* %p2.1.in to i16*
+  br i1 %tobool19, label %while.end, label %while.cond
+
+while.end:                                        ; preds = %while.cond
+  %inc20 = add nsw i32 %inc2060, 1
+  %tobool21 = icmp eq i32 %inc2060, 0
+  br i1 %tobool21, label %for.inc27.backedge, label %if.then22
+
+for.inc27.backedge:                               ; preds = %while.end, %if.then22
+  %inc28 = add nsw i32 %0, 1
+  store i32 %inc28, i32* @b, align 4, !tbaa !0
+  %tobool17 = icmp eq i32 %inc28, 0
+  br i1 %tobool17, label %for.inc27.if.end30.loopexit56_crit_edge, label %while.condthread-pre-split
+
+if.then22:                                        ; preds = %while.end
+  %1 = load i16* %p2.1, align 2, !tbaa !3
+  %tobool23 = icmp eq i16 %1, 0
+  br i1 %tobool23, label %for.inc27.backedge, label %label.loopexit
+
+label.loopexit:                                   ; preds = %if.then22
+  store i32 %inc20, i32* @a, align 4, !tbaa !0
+  %inc2858 = add nsw i32 %0, 1
+  store i32 %inc2858, i32* @b, align 4, !tbaa !0
+  %tobool1759 = icmp eq i32 %inc2858, 0
+  br i1 %tobool1759, label %if.end30, label %while.condthread-pre-split
+
+for.inc27.if.end30.loopexit56_crit_edge:          ; preds = %for.inc27.backedge
+  store i32 %inc20, i32* @a, align 4, !tbaa !0
+  br label %if.end30
+
+if.end30:                                         ; preds = %for.inc27.if.end30.loopexit56_crit_edge, %label.loopexit, %label.preheader, %for.inc
+  %i.0.load46 = phi i32 [ 0, %for.inc ], [ %i.0.load4669, %label.preheader ], [ %i.0.load4669, %label.loopexit ], [ %i.0.load4669, %for.inc27.if.end30.loopexit56_crit_edge ]
+  %pi.4 = phi i32* [ %i, %for.inc ], [ %pi.3.ph, %label.preheader ], [ %pi.3.ph, %label.loopexit ], [ %pi.3.ph, %for.inc27.if.end30.loopexit56_crit_edge ]
+  %2 = load i32* %pi.4, align 4, !tbaa !0
+  %tobool31 = icmp eq i32 %2, 0
+  br i1 %tobool31, label %for.inc34, label %label.preheader
+
+for.inc34:                                        ; preds = %if.end30
+  %inc35 = add nsw i32 %i.0.load46, 1
+  store i32 %inc35, i32* %i, align 4
+  br label %for.cond
+
+for.end36:                                        ; preds = %for.cond
+  store i32 1, i32* %i, align 4
+  %3 = load i32* @c, align 4, !tbaa !0
+  %tobool37 = icmp eq i32 %3, 0
+  br i1 %tobool37, label %label.preheader, label %land.rhs
+
+land.rhs:                                         ; preds = %for.end36
+  store i32 0, i32* @a, align 4, !tbaa !0
+  br label %label.preheader
+
+label.preheader:                                  ; preds = %for.end36, %if.end30, %land.rhs
+  %i.0.load4669 = phi i32 [ 1, %land.rhs ], [ %i.0.load46, %if.end30 ], [ 1, %for.end36 ]
+  %pi.3.ph = phi i32* [ %pi.0, %land.rhs ], [ %pi.4, %if.end30 ], [ %pi.0, %for.end36 ]
+  %4 = load i32* @b, align 4, !tbaa !0
+  %inc285863 = add nsw i32 %4, 1
+  store i32 %inc285863, i32* @b, align 4, !tbaa !0
+  %tobool175964 = icmp eq i32 %inc285863, 0
+  br i1 %tobool175964, label %if.end30, label %while.condthread-pre-split.lr.ph.lr.ph
+
+while.condthread-pre-split.lr.ph.lr.ph:           ; preds = %label.preheader
+  %.pr50 = load i32* @d, align 4, !tbaa !0
+  %tobool19 = icmp eq i32 %.pr50, 0
+  %a.promoted.pre = load i32* @a, align 4, !tbaa !0
+  br label %while.condthread-pre-split
+}
+
+!0 = metadata !{metadata !"int", metadata !1}
+!1 = metadata !{metadata !"omnipotent char", metadata !2}
+!2 = metadata !{metadata !"Simple C/C++ TBAA"}
+!3 = metadata !{metadata !"short", metadata !1}
diff --git a/test/CodeGen/X86/coldcc64.ll b/test/CodeGen/X86/coldcc64.ll
new file mode 100644
index 000000000000..4db56bbaea2d
--- /dev/null
+++ b/test/CodeGen/X86/coldcc64.ll
@@ -0,0 +1,24 @@
+; RUN: llc < %s | FileCheck %s
+
+target triple = "x86_64-linux-gnu"
+
+define coldcc void @foo() {
+; CHECK: pushq %rbp
+; CHECK: pushq %r15
+; CHECK: pushq %r14
+; CHECK: pushq %r13
+; CHECK: pushq %r12
+; CHECK: pushq %r11
+; CHECK: pushq %r10
+; CHECK: pushq %r9
+; CHECK: pushq %r8
+; CHECK: pushq %rdi
+; CHECK: pushq %rsi
+; CHECK: pushq %rdx
+; CHECK: pushq %rcx
+; CHECK: pushq %rbx
+; CHECK: movaps %xmm15
+; CHECK: movaps %xmm0
+  call void asm sideeffect "", "~{xmm15},~{xmm0},~{rbp},~{r15},~{r14},~{r13},~{r12},~{r11},~{r10},~{r9},~{r8},~{rdi},~{rsi},~{rdx},~{rcx},~{rbx}"()
+  ret void
+}
diff --git a/test/CodeGen/X86/complex-fca.ll b/test/CodeGen/X86/complex-fca.ll
index 7e7acaa98a76..8ad38a4ee5c0 100644
--- a/test/CodeGen/X86/complex-fca.ll
+++ b/test/CodeGen/X86/complex-fca.ll
@@ -1,5 +1,8 @@
 ; RUN: llc < %s -march=x86 | grep mov | count 2
 
+; Skip this on Windows as there is no ccosl and sret behaves differently.
+; XFAIL: pc-win32
+
 define void @ccosl({ x86_fp80, x86_fp80 }* noalias sret %agg.result, { x86_fp80, x86_fp80 } %z) nounwind {
 entry:
 	%z8 = extractvalue { x86_fp80, x86_fp80 } %z, 0
diff --git a/test/CodeGen/X86/constant-pool-remat-0.ll b/test/CodeGen/X86/constant-pool-remat-0.ll
index 4be14d2128ef..4a0110896ced 100644
--- a/test/CodeGen/X86/constant-pool-remat-0.ll
+++ b/test/CodeGen/X86/constant-pool-remat-0.ll
@@ -1,3 +1,4 @@
+; REQUIRES: asserts
 ; RUN: llc < %s -mtriple=x86_64-linux   | FileCheck %s
 ; RUN: llc < %s -mtriple=x86_64-linux -regalloc=greedy | FileCheck %s
 ; RUN: llc < %s -march=x86 -mattr=+sse2 | FileCheck %s
diff --git a/test/CodeGen/X86/convert-2-addr-3-addr-inc64.ll b/test/CodeGen/X86/convert-2-addr-3-addr-inc64.ll
index 064ee364d14e..74a7240c8190 100644
--- a/test/CodeGen/X86/convert-2-addr-3-addr-inc64.ll
+++ b/test/CodeGen/X86/convert-2-addr-3-addr-inc64.ll
@@ -1,3 +1,4 @@
+; REQUIRES: asserts
 ; RUN: llc < %s -mtriple=x86_64-linux -o /dev/null -stats 2>&1 | FileCheck %s -check-prefix=STATS
 ; RUN: llc < %s -mtriple=x86_64-win32 -o /dev/null -stats 2>&1 | FileCheck %s -check-prefix=STATS
 ; STATS: 9 asm-printer
diff --git a/test/CodeGen/X86/crash.ll b/test/CodeGen/X86/crash.ll
index 276d0db9a4f3..6d2196206e7c 100644
--- a/test/CodeGen/X86/crash.ll
+++ b/test/CodeGen/X86/crash.ll
@@ -431,7 +431,7 @@ return:                                           ; preds = %entry
 ; uitofp expands to an FCMOV instruction which splits the basic block.
 ; Make sure the live range of %AL isn't split.
 @.str = private unnamed_addr constant { [1 x i8], [63 x i8] } zeroinitializer, align 32
-define void @pr13188(i64* nocapture %this) uwtable ssp address_safety align 2 {
+define void @pr13188(i64* nocapture %this) uwtable ssp sanitize_address align 2 {
 entry:
   %x7 = load i64* %this, align 8
   %sub = add i64 %x7, -1
diff --git a/test/CodeGen/X86/cvtv2f32.ll b/test/CodeGen/X86/cvtv2f32.ll
index 466b09606786..d11bb9ee3e75 100644
--- a/test/CodeGen/X86/cvtv2f32.ll
+++ b/test/CodeGen/X86/cvtv2f32.ll
@@ -1,3 +1,7 @@
+; A bug fix in the DAGCombiner made this test fail, so marking as xfail
+; until this can be investigated further.
+; XFAIL: *
+
 ; RUN: llc < %s -mtriple=i686-linux-pc -mcpu=corei7 | FileCheck %s
 
 define <2 x float> @foo(i32 %x, i32 %y, <2 x float> %v) {
diff --git a/test/CodeGen/X86/dagcombine-cse.ll b/test/CodeGen/X86/dagcombine-cse.ll
index af69531246cf..75d3d93ddb89 100644
--- a/test/CodeGen/X86/dagcombine-cse.ll
+++ b/test/CodeGen/X86/dagcombine-cse.ll
@@ -1,3 +1,4 @@
+; REQUIRES: asserts
 ; RUN: llc < %s -march=x86 -mattr=+sse2 -mtriple=i386-apple-darwin -stats 2>&1 | grep asm-printer | grep 14
 
 define i32 @t(i8* %ref_frame_ptr, i32 %ref_frame_stride, i32 %idxX, i32 %idxY) nounwind  {
diff --git a/test/CodeGen/X86/dagcombine_unsafe_math.ll b/test/CodeGen/X86/dagcombine_unsafe_math.ll
new file mode 100644
index 000000000000..592cf1bec2e5
--- /dev/null
+++ b/test/CodeGen/X86/dagcombine_unsafe_math.ll
@@ -0,0 +1,56 @@
+; RUN: llc < %s -enable-unsafe-fp-math -mtriple=x86_64-apple-darwin -mcpu=corei7-avx | FileCheck %s 
+
+
+; rdar://13126763
+; Expression "x + x*x" was mistakenly transformed into "x * 3.0f".
+
+define float @test1(float %x) {
+  %t1 = fmul fast float %x, %x
+  %t2 = fadd fast float %t1, %x
+  ret float %t2
+; CHECK: test1
+; CHECK: vaddss
+}
+
+; (x + x) + x => x * 3.0
+define float @test2(float %x) {
+  %t1 = fadd fast float %x, %x
+  %t2 = fadd fast float %t1, %x
+  ret float %t2
+; CHECK: .long  1077936128
+; CHECK: test2
+; CHECK: vmulss LCPI1_0(%rip), %xmm0, %xmm0
+}
+
+; x + (x + x) => x * 3.0
+define float @test3(float %x) {
+  %t1 = fadd fast float %x, %x
+  %t2 = fadd fast float %t1, %x
+  ret float %t2
+; CHECK: .long  1077936128
+; CHECK: test3
+; CHECK: vmulss LCPI2_0(%rip), %xmm0, %xmm0
+}
+
+; (y + x) + x != x * 3.0
+define float @test4(float %x, float %y) {
+  %t1 = fadd fast float %x, %y
+  %t2 = fadd fast float %t1, %x
+  ret float %t2
+; CHECK: test4
+; CHECK: vaddss
+}
+
+; rdar://13445387
+; "x + x + x => 3.0 * x" should be disabled after legalization because 
+; Instruction-Selection dosen't know how to handle "3.0"
+; 
+define float @test5() {
+  %mul.i.i151 = fmul <4 x float> zeroinitializer, zeroinitializer
+  %vecext.i8.i152 = extractelement <4 x float> %mul.i.i151, i32 1
+  %vecext1.i9.i153 = extractelement <4 x float> %mul.i.i151, i32 0
+  %add.i10.i154 = fadd float %vecext1.i9.i153, %vecext.i8.i152
+  %vecext.i7.i155 = extractelement <4 x float> %mul.i.i151, i32 2
+  %add.i.i156 = fadd float %vecext.i7.i155, %add.i10.i154
+  ret float %add.i.i156
+}
diff --git a/test/CodeGen/X86/dbg-at-specficiation.ll b/test/CodeGen/X86/dbg-at-specficiation.ll
index aa5e6efede27..48b8202bd5fa 100644
--- a/test/CodeGen/X86/dbg-at-specficiation.ll
+++ b/test/CodeGen/X86/dbg-at-specficiation.ll
@@ -17,4 +17,4 @@
 !7 = metadata !{i32 720897, null, metadata !"", null, i32 0, i64 320, i64 32, i32 0, i32 0, metadata !8, metadata !9, i32 0, i32 0} ; [ DW_TAG_array_type ]
 !8 = metadata !{i32 720932, null, metadata !"int", null, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
 !9 = metadata !{metadata !10}
-!10 = metadata !{i32 720929, i64 0, i64 9}        ; [ DW_TAG_subrange_type ]
+!10 = metadata !{i32 720929, i64 0, i64 10}        ; [ DW_TAG_subrange_type ]
diff --git a/test/CodeGen/X86/dbg-byval-parameter.ll b/test/CodeGen/X86/dbg-byval-parameter.ll
index 5e5577620d97..aca06a27a1df 100644
--- a/test/CodeGen/X86/dbg-byval-parameter.ll
+++ b/test/CodeGen/X86/dbg-byval-parameter.ll
@@ -25,21 +25,25 @@ return:                                           ; preds = %entry
 
 declare void @llvm.dbg.declare(metadata, metadata) nounwind readnone
 
-!0 = metadata !{i32 524545, metadata !1, metadata !"my_r0", metadata !2, i32 11, metadata !7} ; [ DW_TAG_arg_variable ]
-!1 = metadata !{i32 524334, i32 0, metadata !2, metadata !"foo", metadata !"foo", metadata !"foo", metadata !2, i32 11, metadata !4, i1 false, i1 true, i32 0, i32 0, null, i1 false} ; [ DW_TAG_subprogram ]
-!2 = metadata !{i32 524329, metadata !"b2.c", metadata !"/tmp/", metadata !3} ; [ DW_TAG_file_type ]
-!3 = metadata !{i32 524305, i32 0, i32 1, metadata !"b2.c", metadata !"/tmp/", metadata !"4.2.1 (Based on Apple Inc. build 5658) (LLVM build)", i1 true, i1 false, metadata !"", i32 0} ; [ DW_TAG_compile_unit ]
-!4 = metadata !{i32 524309, metadata !2, metadata !"", metadata !2, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !5, i32 0, null} ; [ DW_TAG_subroutine_type ]
+!llvm.dbg.cu = !{!3}
+
+!0 = metadata !{i32 786689, metadata !1, metadata !"my_r0", metadata !2, i32 11, metadata !7, i32 0, null} ; [ DW_TAG_arg_variable ]
+!1 = metadata !{i32 786478, metadata !2, metadata !"foo", metadata !"foo", metadata !"foo", metadata !2, i32 11, metadata !4, i1 false, i1 true, i32 0, i32 0, null, i32 0, i1 false, double (%struct.Rect*)* @foo, null, null, null, i32 0} ; [ DW_TAG_subprogram ]
+!2 = metadata !{i32 786473, metadata !19} ; [ DW_TAG_file_type ]
+!3 = metadata !{i32 786449, i32 1, metadata !2, metadata !"4.2.1 (Based on Apple Inc. build 5658) (LLVM build)", i1 false, metadata !"", i32 0, null, null, metadata !18, null, metadata !""} ; [ DW_TAG_compile_unit ]
+!4 = metadata !{i32 786453, metadata !19, metadata !2, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !5, i32 0, null} ; [ DW_TAG_subroutine_type ]
 !5 = metadata !{metadata !6, metadata !7}
-!6 = metadata !{i32 524324, metadata !2, metadata !"double", metadata !2, i32 0, i64 64, i64 64, i64 0, i32 0, i32 4} ; [ DW_TAG_base_type ]
-!7 = metadata !{i32 524307, metadata !2, metadata !"Rect", metadata !2, i32 6, i64 256, i64 64, i64 0, i32 0, null, metadata !8, i32 0, null} ; [ DW_TAG_structure_type ]
+!6 = metadata !{i32 786468, metadata !19, metadata !2, metadata !"double", i32 0, i64 64, i64 64, i64 0, i32 0, i32 4} ; [ DW_TAG_base_type ]
+!7 = metadata !{i32 786451, metadata !19, metadata !2, metadata !"Rect", i32 6, i64 256, i64 64, i64 0, i32 0, null, metadata !8, i32 0, null} ; [ DW_TAG_structure_type ]
 !8 = metadata !{metadata !9, metadata !14}
-!9 = metadata !{i32 524301, metadata !7, metadata !"P1", metadata !2, i32 7, i64 128, i64 64, i64 0, i32 0, metadata !10} ; [ DW_TAG_member ]
-!10 = metadata !{i32 524307, metadata !2, metadata !"Pt", metadata !2, i32 1, i64 128, i64 64, i64 0, i32 0, null, metadata !11, i32 0, null} ; [ DW_TAG_structure_type ]
+!9 = metadata !{i32 786445, metadata !19, metadata !7, metadata !"P1", i32 7, i64 128, i64 64, i64 0, i32 0, metadata !10} ; [ DW_TAG_member ]
+!10 = metadata !{i32 786451, metadata !19, metadata !2, metadata !"Pt", i32 1, i64 128, i64 64, i64 0, i32 0, null, metadata !11, i32 0, null} ; [ DW_TAG_structure_type ]
 !11 = metadata !{metadata !12, metadata !13}
-!12 = metadata !{i32 524301, metadata !10, metadata !"x", metadata !2, i32 2, i64 64, i64 64, i64 0, i32 0, metadata !6} ; [ DW_TAG_member ]
-!13 = metadata !{i32 524301, metadata !10, metadata !"y", metadata !2, i32 3, i64 64, i64 64, i64 64, i32 0, metadata !6} ; [ DW_TAG_member ]
-!14 = metadata !{i32 524301, metadata !7, metadata !"P2", metadata !2, i32 8, i64 128, i64 64, i64 128, i32 0, metadata !10} ; [ DW_TAG_member ]
+!12 = metadata !{i32 786445, metadata !19, metadata !10, metadata !"x", i32 2, i64 64, i64 64, i64 0, i32 0, metadata !6} ; [ DW_TAG_member ]
+!13 = metadata !{i32 786445, metadata !19, metadata !10, metadata !"y", i32 3, i64 64, i64 64, i64 64, i32 0, metadata !6} ; [ DW_TAG_member ]
+!14 = metadata !{i32 786445, metadata !19, metadata !7, metadata !"P2", i32 8, i64 128, i64 64, i64 128, i32 0, metadata !10} ; [ DW_TAG_member ]
 !15 = metadata !{i32 11, i32 0, metadata !1, null}
 !16 = metadata !{i32 12, i32 0, metadata !17, null}
-!17 = metadata !{i32 524299, metadata !1, i32 11, i32 0} ; [ DW_TAG_lexical_block ]
+!17 = metadata !{i32 786443, metadata !2, metadata !1, i32 11, i32 0} ; [ DW_TAG_lexical_block ]
+!18 = metadata !{metadata !1}
+!19 = metadata !{metadata !"b2.c", metadata !"/tmp/"}
diff --git a/test/CodeGen/X86/dbg-const-int.ll b/test/CodeGen/X86/dbg-const-int.ll
index bfc96f17ec9b..aabc2068068d 100644
--- a/test/CodeGen/X86/dbg-const-int.ll
+++ b/test/CodeGen/X86/dbg-const-int.ll
@@ -13,17 +13,18 @@ entry:
 declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
 
 !llvm.dbg.cu = !{!0}
-!llvm.dbg.sp = !{!1}
-!llvm.dbg.lv.foo = !{!6}
 
-!0 = metadata !{i32 589841, i32 0, i32 12, metadata !"a.c", metadata !"/private/tmp", metadata !"clang version 3.0 (trunk 132191)", i1 true, i1 true, metadata !"", i32 0} ; [ DW_TAG_compile_unit ]
-!1 = metadata !{i32 589870, i32 0, metadata !2, metadata !"foo", metadata !"foo", metadata !"", metadata !2, i32 1, metadata !3, i1 false, i1 true, i32 0, i32 0, i32 0, i32 0, i1 true, i32 ()* @foo, null, null} ; [ DW_TAG_subprogram ]
-!2 = metadata !{i32 589865, metadata !"a.c", metadata !"/private/tmp", metadata !0} ; [ DW_TAG_file_type ]
-!3 = metadata !{i32 589845, metadata !2, metadata !"", metadata !2, i32 0, i64 0, i64 0, i32 0, i32 0, i32 0, metadata !4, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
+!0 = metadata !{i32 786449, i32 12, metadata !2, metadata !"clang version 3.0 (trunk 132191)", i1 true, metadata !"", i32 0, null, null, metadata !11, null, null} ; [ DW_TAG_compile_unit ]
+!1 = metadata !{i32 786478, metadata !2, metadata !"foo", metadata !"foo", metadata !"", metadata !2, i32 1, metadata !3, i1 false, i1 true, i32 0, i32 0, i32 0, i32 0, i1 true, i32 ()* @foo, null, null, metadata !12, i32 0} ; [ DW_TAG_subprogram ]
+!2 = metadata !{i32 786473, metadata !13} ; [ DW_TAG_file_type ]
+!3 = metadata !{i32 786453, metadata !2, metadata !"", metadata !2, i32 0, i64 0, i64 0, i32 0, i32 0, i32 0, metadata !4, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
 !4 = metadata !{metadata !5}
-!5 = metadata !{i32 589860, metadata !0, metadata !"int", null, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
-!6 = metadata !{i32 590080, metadata !7, metadata !"i", metadata !2, i32 2, metadata !5, i32 0} ; [ DW_TAG_auto_variable ]
-!7 = metadata !{i32 589835, metadata !1, i32 1, i32 11, metadata !2, i32 0} ; [ DW_TAG_lexical_block ]
+!5 = metadata !{i32 786468, metadata !0, metadata !"int", null, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
+!6 = metadata !{i32 786688, metadata !7, metadata !"i", metadata !2, i32 2, metadata !5, i32 0, null} ; [ DW_TAG_auto_variable ]
+!7 = metadata !{i32 786443, metadata !2, metadata !1, i32 1, i32 11, i32 0} ; [ DW_TAG_lexical_block ]
 !8 = metadata !{i32 42}
 !9 = metadata !{i32 2, i32 12, metadata !7, null}
 !10 = metadata !{i32 3, i32 2, metadata !7, null}
+!11 = metadata !{metadata !1}
+!12 = metadata !{metadata !6}
+!13 = metadata !{metadata !"a.c", metadata !"/private/tmp"}
diff --git a/test/CodeGen/X86/dbg-const.ll b/test/CodeGen/X86/dbg-const.ll
index 5a51eb88b895..a9b8f1fdc4f3 100644
--- a/test/CodeGen/X86/dbg-const.ll
+++ b/test/CodeGen/X86/dbg-const.ll
@@ -16,19 +16,21 @@ entry:
 declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
 declare i32 @bar() nounwind readnone
 
-!llvm.dbg.sp = !{!0}
-!llvm.dbg.lv.foobar = !{!6}
+!llvm.dbg.cu = !{!2}
 
-!0 = metadata !{i32 524334, i32 0, metadata !1, metadata !"foobar", metadata !"foobar", metadata !"foobar", metadata !1, i32 12, metadata !3, i1 false, i1 true, i32 0, i32 0, null, i1 false, i1 true, i32 ()* @foobar}
-!1 = metadata !{i32 524329, metadata !"mu.c", metadata !"/private/tmp", metadata !2}
-!2 = metadata !{i32 524305, i32 0, i32 12, metadata !"mu.c", metadata !"/private/tmp", metadata !"clang version 2.9 (trunk 114183)", i1 true, i1 true, metadata !"", i32 0}
-!3 = metadata !{i32 524309, metadata !1, metadata !"", metadata !1, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !4, i32 0, null}
+!0 = metadata !{i32 786478, metadata !1, metadata !"foobar", metadata !"foobar", metadata !"foobar", metadata !1, i32 12, metadata !3, i1 false, i1 true, i32 0, i32 0, null, i1 false, i1 true, i32 ()* @foobar, null, null, metadata !14, i32 0} ; [ DW_TAG_subprogram ]
+!1 = metadata !{i32 786473, metadata !15} ; [ DW_TAG_file_type ]
+!2 = metadata !{i32 786449, i32 12, metadata !1, metadata !"clang version 2.9 (trunk 114183)", i1 true, metadata !"", i32 0, null, null, metadata !13, null, metadata !""} ; [ DW_TAG_compile_unit ]
+!3 = metadata !{i32 786453, metadata !1, metadata !"", metadata !1, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !4, i32 0, null}
 !4 = metadata !{metadata !5}
-!5 = metadata !{i32 524324, metadata !1, metadata !"int", metadata !1, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5}
-!6 = metadata !{i32 524544, metadata !7, metadata !"j", metadata !1, i32 15, metadata !5}
-!7 = metadata !{i32 524299, metadata !0, i32 12, i32 52, metadata !1, i32 0}
+!5 = metadata !{i32 786468, metadata !1, metadata !"int", metadata !1, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5}
+!6 = metadata !{i32 786688, metadata !7, metadata !"j", metadata !1, i32 15, metadata !5, i32 0, null}
+!7 = metadata !{i32 786443, metadata !1, metadata !0, i32 12, i32 52, i32 0} ; [ DW_TAG_lexical_block ]
 !8 = metadata !{i32 42}
 !9 = metadata !{i32 15, i32 12, metadata !7, null}
 !10 = metadata !{i32 23, i32 3, metadata !7, null}
 !11 = metadata !{i32 17, i32 3, metadata !7, null}
 !12 = metadata !{i32 18, i32 3, metadata !7, null}
+!13 = metadata !{metadata !0}
+!14 = metadata !{metadata !6}
+!15 = metadata !{metadata !"mu.c", metadata !"/private/tmp"}
diff --git a/test/CodeGen/X86/dbg-declare-arg.ll b/test/CodeGen/X86/dbg-declare-arg.ll
index 367c1ef36c60..f7e0c91cdff2 100644
--- a/test/CodeGen/X86/dbg-declare-arg.ll
+++ b/test/CodeGen/X86/dbg-declare-arg.ll
@@ -69,55 +69,57 @@ entry:
   ret void, !dbg !48
 }
 
-!llvm.dbg.sp = !{!0, !10, !14, !19, !22, !25}
+!llvm.dbg.cu = !{!2}
 
-!0 = metadata !{i32 589870, i32 0, metadata !1, metadata !"~A", metadata !"~A", metadata !"", metadata !3, i32 2, metadata !11, i1 false, i1 false, i32 0, i32 0, null, i32 256, i1 false, null, null} ; [ DW_TAG_subprogram ]
+!0 = metadata !{i32 786478, metadata !"", i32 0, metadata !1, metadata !"~A", metadata !"~A", metadata !3, i32 2, metadata !11, i1 false, i1 false, i32 0, i32 0, null, i32 256, i1 false, null, null} ; [ DW_TAG_subprogram ]
 !1 = metadata !{i32 589826, metadata !2, metadata !"A", metadata !3, i32 2, i64 128, i64 32, i32 0, i32 0, null, metadata !4, i32 0, null, null} ; [ DW_TAG_class_type ]
-!2 = metadata !{i32 589841, i32 0, i32 4, metadata !"a.cc", metadata !"/private/tmp", metadata !"clang version 3.0 (trunk 130127)", i1 true, i1 false, metadata !"", i32 0} ; [ DW_TAG_compile_unit ]
-!3 = metadata !{i32 589865, metadata !"a.cc", metadata !"/private/tmp", metadata !2} ; [ DW_TAG_file_type ]
+!2 = metadata !{i32 786449, i32 4, metadata !3, metadata !"clang version 3.0 (trunk 130127)", i1 false, metadata !"", i32 0, null, null, metadata !50, null, null} ; [ DW_TAG_compile_unit ]
+!3 = metadata !{i32 786473, metadata !51} ; [ DW_TAG_file_type ]
 !4 = metadata !{metadata !5, metadata !7, metadata !8, metadata !9, metadata !0, metadata !10, metadata !14}
-!5 = metadata !{i32 589837, metadata !3, metadata !"x", metadata !3, i32 2, i64 32, i64 32, i64 0, i32 0, metadata !6} ; [ DW_TAG_member ]
-!6 = metadata !{i32 589860, metadata !2, metadata !"int", null, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
-!7 = metadata !{i32 589837, metadata !3, metadata !"y", metadata !3, i32 2, i64 32, i64 32, i64 32, i32 0, metadata !6} ; [ DW_TAG_member ]
-!8 = metadata !{i32 589837, metadata !3, metadata !"z", metadata !3, i32 2, i64 32, i64 32, i64 64, i32 0, metadata !6} ; [ DW_TAG_member ]
-!9 = metadata !{i32 589837, metadata !3, metadata !"o", metadata !3, i32 2, i64 32, i64 32, i64 96, i32 0, metadata !6} ; [ DW_TAG_member ]
-!10 = metadata !{i32 589870, i32 0, metadata !1, metadata !"A", metadata !"A", metadata !"", metadata !3, i32 2, metadata !11, i1 false, i1 false, i32 0, i32 0, null, i32 320, i1 false, null, null} ; [ DW_TAG_subprogram ]
-!11 = metadata !{i32 589845, metadata !3, metadata !"", metadata !3, i32 0, i64 0, i64 0, i32 0, i32 0, i32 0, metadata !12, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
+!5 = metadata !{i32 786445, metadata !3, metadata !"x", metadata !3, i32 2, i64 32, i64 32, i64 0, i32 0, metadata !6} ; [ DW_TAG_member ]
+!6 = metadata !{i32 786468, metadata !2, metadata !"int", null, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
+!7 = metadata !{i32 786445, metadata !3, metadata !"y", metadata !3, i32 2, i64 32, i64 32, i64 32, i32 0, metadata !6} ; [ DW_TAG_member ]
+!8 = metadata !{i32 786445, metadata !3, metadata !"z", metadata !3, i32 2, i64 32, i64 32, i64 64, i32 0, metadata !6} ; [ DW_TAG_member ]
+!9 = metadata !{i32 786445, metadata !3, metadata !"o", metadata !3, i32 2, i64 32, i64 32, i64 96, i32 0, metadata !6} ; [ DW_TAG_member ]
+!10 = metadata !{i32 786478, metadata !"", i32 0, metadata !1, metadata !"A", metadata !"A", metadata !3, i32 2, metadata !11, i1 false, i1 false, i32 0, i32 0, null, i32 320, i1 false, null, null} ; [ DW_TAG_subprogram ]
+!11 = metadata !{i32 786453, metadata !3, metadata !"", metadata !3, i32 0, i64 0, i64 0, i32 0, i32 0, i32 0, metadata !12, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
 !12 = metadata !{null, metadata !13}
-!13 = metadata !{i32 589839, metadata !2, metadata !"", i32 0, i32 0, i64 64, i64 64, i64 0, i32 64, metadata !1} ; [ DW_TAG_pointer_type ]
-!14 = metadata !{i32 589870, i32 0, metadata !1, metadata !"A", metadata !"A", metadata !"", metadata !3, i32 2, metadata !15, i1 false, i1 false, i32 0, i32 0, null, i32 320, i1 false, null, null} ; [ DW_TAG_subprogram ]
-!15 = metadata !{i32 589845, metadata !3, metadata !"", metadata !3, i32 0, i64 0, i64 0, i32 0, i32 0, i32 0, metadata !16, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
+!13 = metadata !{i32 786447, metadata !2, metadata !"", i32 0, i32 0, i64 64, i64 64, i64 0, i32 64, metadata !1} ; [ DW_TAG_pointer_type ]
+!14 = metadata !{i32 786478, metadata !"", i32 0, metadata !1, metadata !"A", metadata !"A", metadata !3, i32 2, metadata !15, i1 false, i1 false, i32 0, i32 0, null, i32 320, i1 false, null, null} ; [ DW_TAG_subprogram ]
+!15 = metadata !{i32 786453, metadata !3, metadata !"", metadata !3, i32 0, i64 0, i64 0, i32 0, i32 0, i32 0, metadata !16, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
 !16 = metadata !{null, metadata !13, metadata !17}
 !17 = metadata !{i32 589840, metadata !2, null, null, i32 0, i64 0, i64 0, i64 0, i32 0, metadata !18} ; [ DW_TAG_reference_type ]
-!18 = metadata !{i32 589862, metadata !2, metadata !"", null, i32 0, i64 0, i64 0, i64 0, i32 0, metadata !1} ; [ DW_TAG_const_type ]
-!19 = metadata !{i32 589870, i32 0, metadata !3, metadata !"foo", metadata !"foo", metadata !"_Z3fooi", metadata !3, i32 4, metadata !20, i1 false, i1 true, i32 0, i32 0, i32 0, i32 256, i1 false, void (%class.A*, i32)* @_Z3fooi, null, null} ; [ DW_TAG_subprogram ]
-!20 = metadata !{i32 589845, metadata !3, metadata !"", metadata !3, i32 0, i64 0, i64 0, i32 0, i32 0, i32 0, metadata !21, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
+!18 = metadata !{i32 786470, metadata !2, metadata !"", null, i32 0, i64 0, i64 0, i64 0, i32 0, metadata !1} ; [ DW_TAG_const_type ]
+!19 = metadata !{i32 786478, metadata !"_Z3fooi", i32 0, metadata !3, metadata !"foo", metadata !"foo", metadata !3, i32 4, metadata !20, i1 false, i1 true, i32 0, i32 0, i32 0, i32 256, i1 false, void (%class.A*, i32)* @_Z3fooi, null, null} ; [ DW_TAG_subprogram ]
+!20 = metadata !{i32 786453, metadata !3, metadata !"", metadata !3, i32 0, i64 0, i64 0, i32 0, i32 0, i32 0, metadata !21, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
 !21 = metadata !{metadata !1}
-!22 = metadata !{i32 589870, i32 0, metadata !3, metadata !"~A", metadata !"~A", metadata !"_ZN1AD1Ev", metadata !3, i32 2, metadata !23, i1 false, i1 true, i32 0, i32 0, i32 0, i32 256, i1 false, void (%class.A*)* @_ZN1AD1Ev, null, null} ; [ DW_TAG_subprogram ]
-!23 = metadata !{i32 589845, metadata !3, metadata !"", metadata !3, i32 0, i64 0, i64 0, i32 0, i32 0, i32 0, metadata !24, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
+!22 = metadata !{i32 786478, metadata !"_ZN1AD1Ev", i32 0, metadata !3, metadata !"~A", metadata !"~A", metadata !3, i32 2, metadata !23, i1 false, i1 true, i32 0, i32 0, i32 0, i32 256, i1 false, void (%class.A*)* @_ZN1AD1Ev, null, null} ; [ DW_TAG_subprogram ]
+!23 = metadata !{i32 786453, metadata !3, metadata !"", metadata !3, i32 0, i64 0, i64 0, i32 0, i32 0, i32 0, metadata !24, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
 !24 = metadata !{null}
-!25 = metadata !{i32 589870, i32 0, metadata !3, metadata !"~A", metadata !"~A", metadata !"_ZN1AD2Ev", metadata !3, i32 2, metadata !23, i1 false, i1 true, i32 0, i32 0, i32 0, i32 256, i1 false, void (%class.A*)* @_ZN1AD2Ev, null, null} ; [ DW_TAG_subprogram ]
-!26 = metadata !{i32 590081, metadata !19, metadata !"i", metadata !3, i32 16777220, metadata !6, i32 0} ; [ DW_TAG_arg_variable ]
+!25 = metadata !{i32 786478, metadata !"_ZN1AD2Ev", i32 0, metadata !3, metadata !"~A", metadata !"~A", metadata !3, i32 2, metadata !23, i1 false, i1 true, i32 0, i32 0, i32 0, i32 256, i1 false, void (%class.A*)* @_ZN1AD2Ev, null, null} ; [ DW_TAG_subprogram ]
+!26 = metadata !{i32 786689, metadata !19, metadata !"i", metadata !3, i32 16777220, metadata !6, i32 0, null} ; [ DW_TAG_arg_variable ]
 !27 = metadata !{i32 4, i32 11, metadata !19, null}
-!28 = metadata !{i32 590080, metadata !29, metadata !"j", metadata !3, i32 5, metadata !6, i32 0} ; [ DW_TAG_auto_variable ]
-!29 = metadata !{i32 589835, metadata !19, i32 4, i32 14, metadata !3, i32 0} ; [ DW_TAG_lexical_block ]
+!28 = metadata !{i32 786688, metadata !29, metadata !"j", metadata !3, i32 5, metadata !6, i32 0, null} ; [ DW_TAG_auto_variable ]
+!29 = metadata !{i32 786443, metadata !19, i32 4, i32 14, metadata !3, i32 0} ; [ DW_TAG_lexical_block ]
 !30 = metadata !{i32 5, i32 7, metadata !29, null}
 !31 = metadata !{i32 5, i32 12, metadata !29, null}
 !32 = metadata !{i32 6, i32 3, metadata !29, null}
 !33 = metadata !{i32 7, i32 5, metadata !34, null}
-!34 = metadata !{i32 589835, metadata !29, i32 6, i32 16, metadata !3, i32 1} ; [ DW_TAG_lexical_block ]
+!34 = metadata !{i32 786443, metadata !29, i32 6, i32 16, metadata !3, i32 1} ; [ DW_TAG_lexical_block ]
 !35 = metadata !{i32 8, i32 3, metadata !34, null}
 !36 = metadata !{i32 9, i32 9, metadata !29, null}
-!37 = metadata !{i32 590080, metadata !29, metadata !"my_a", metadata !3, i32 9, metadata !38, i32 0} ; [ DW_TAG_auto_variable ]
+!37 = metadata !{i32 786688, metadata !29, metadata !"my_a", metadata !3, i32 9, metadata !38, i32 0, null} ; [ DW_TAG_auto_variable ]
 !38 = metadata !{i32 589840, metadata !2, null, null, i32 0, i64 0, i64 0, i64 0, i32 0, metadata !1} ; [ DW_TAG_reference_type ]
 !39 = metadata !{i32 9, i32 5, metadata !29, null}
 !40 = metadata !{i32 10, i32 3, metadata !29, null}
 !41 = metadata !{i32 11, i32 3, metadata !29, null}
 !42 = metadata !{i32 12, i32 1, metadata !29, null}
-!43 = metadata !{i32 590081, metadata !22, metadata !"this", metadata !3, i32 16777218, metadata !13, i32 64} ; [ DW_TAG_arg_variable ]
+!43 = metadata !{i32 786689, metadata !22, metadata !"this", metadata !3, i32 16777218, metadata !13, i32 64, null} ; [ DW_TAG_arg_variable ]
 !44 = metadata !{i32 2, i32 47, metadata !22, null}
 !45 = metadata !{i32 2, i32 61, metadata !22, null}
-!46 = metadata !{i32 590081, metadata !25, metadata !"this", metadata !3, i32 16777218, metadata !13, i32 64} ; [ DW_TAG_arg_variable ]
+!46 = metadata !{i32 786689, metadata !25, metadata !"this", metadata !3, i32 16777218, metadata !13, i32 64, null} ; [ DW_TAG_arg_variable ]
 !47 = metadata !{i32 2, i32 47, metadata !25, null}
 !48 = metadata !{i32 2, i32 54, metadata !49, null}
-!49 = metadata !{i32 589835, metadata !25, i32 2, i32 52, metadata !3, i32 2} ; [ DW_TAG_lexical_block ]
+!49 = metadata !{i32 786443, metadata !25, i32 2, i32 52, metadata !3, i32 2} ; [ DW_TAG_lexical_block ]
+!50 = metadata !{metadata !0, metadata !10, metadata !14, metadata !19, metadata !22, metadata !25}
+!51 = metadata !{metadata !"a.cc", metadata !"/private/tmp"}
diff --git a/test/CodeGen/X86/dbg-declare.ll b/test/CodeGen/X86/dbg-declare.ll
index 5d4cedc5c4e3..6ac397ac42e1 100644
--- a/test/CodeGen/X86/dbg-declare.ll
+++ b/test/CodeGen/X86/dbg-declare.ll
@@ -29,12 +29,10 @@ declare void @llvm.stackrestore(i8*) nounwind
 
 !llvm.dbg.cu = !{!0}
 
-!0 = metadata !{i32 786449, i32 0, i32 12, metadata !"20020104-2.c", metadata !"/Volumes/Sandbox/llvm", metadata !"clang version 3.1 (trunk 153698)", i1 true, i1 false, metadata !"", i32 0, metadata !1, metadata !1, metadata !3, metadata !1} ; [ DW_TAG_compile_unit ]
-!1 = metadata !{metadata !2}
-!2 = metadata !{i32 0}
-!3 = metadata !{metadata !4}
-!4 = metadata !{metadata !5}
-!5 = metadata !{i32 786478, i32 0, metadata !6, metadata !"foo", metadata !"foo", metadata !"", metadata !6, i32 6, metadata !7, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, i32 (i32*)* @foo, null, null, metadata !12} ; [ DW_TAG_subprogram ]
+!0 = metadata !{i32 786449, i32 12, metadata !6, metadata !"clang version 3.1 (trunk 153698)", i1 false, metadata !"", i32 0, metadata !1, metadata !1, metadata !3, metadata !1, metadata !""} ; [ DW_TAG_compile_unit ]
+!1 = metadata !{i32 0}
+!3 = metadata !{metadata !5}
+!5 = metadata !{i32 786478, metadata !6, metadata !"foo", metadata !"foo", metadata !"", metadata !6, i32 6, metadata !7, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, i32 (i32*)* @foo, null, null, metadata !12} ; [ DW_TAG_subprogram ]
 !6 = metadata !{i32 786473, metadata !"20020104-2.c", metadata !"/Volumes/Sandbox/llvm", null} ; [ DW_TAG_file_type ]
 !7 = metadata !{i32 786453, i32 0, metadata !"", i32 0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !8, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
 !8 = metadata !{metadata !9, metadata !10}
@@ -51,7 +49,7 @@ declare void @llvm.stackrestore(i8*) nounwind
 !19 = metadata !{i32 786433, null, metadata !"", null, i32 0, i64 0, i64 8, i32 0, i32 0, metadata !20, metadata !21, i32 0, i32 0} ; [ DW_TAG_array_type ]
 !20 = metadata !{i32 786468, null, metadata !"char", null, i32 0, i64 8, i64 8, i64 0, i32 0, i32 6} ; [ DW_TAG_base_type ]
 !21 = metadata !{metadata !22}
-!22 = metadata !{i32 786465, i64 1, i64 0}        ; [ DW_TAG_subrange_type ]
+!22 = metadata !{i32 786465, i64 0, i64 -1}        ; [ DW_TAG_subrange_type ]
 !23 = metadata !{i32 7, i32 8, metadata !17, null}
 !24 = metadata !{i32 9, i32 1, metadata !17, null}
 !25 = metadata !{i32 8, i32 3, metadata !17, null}
diff --git a/test/CodeGen/X86/dbg-file-name.ll b/test/CodeGen/X86/dbg-file-name.ll
index adf985461055..1bd3d77522a3 100644
--- a/test/CodeGen/X86/dbg-file-name.ll
+++ b/test/CodeGen/X86/dbg-file-name.ll
@@ -9,11 +9,13 @@ define i32 @main() nounwind {
   ret i32 0
 }
 
-!llvm.dbg.sp = !{ !6}
+!llvm.dbg.cu = !{!2}
 
-!1 = metadata !{i32 589865, metadata !"simple.c", metadata !"/Users/manav/one/two", metadata !2} ; [ DW_TAG_file_type ]
-!2 = metadata !{i32 589841, i32 0, i32 1, metadata !"simple.c", metadata !"/Users/manav/one/two", metadata !"LLVM build 00", i1 true, i1 false, metadata !"", i32 0} ; [ DW_TAG_compile_unit ]
-!5 = metadata !{i32 589860, metadata !1, metadata !"int", metadata !1, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
-!6 = metadata !{i32 589870, i32 0, metadata !1, metadata !"main", metadata !"main", metadata !"main", metadata !1, i32 9, metadata !7, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, i32 ()* @main} ; [ DW_TAG_subprogram ]
-!7 = metadata !{i32 589845, metadata !1, metadata !"", metadata !1, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !8, i32 0, null} ; [ DW_TAG_subroutine_type ]
+!1 = metadata !{i32 786473, metadata !10} ; [ DW_TAG_file_type ]
+!2 = metadata !{i32 786449, metadata !10, i32 1, metadata !"LLVM build 00", i1 true, i1 false, metadata !"", i32 0, null, null, metadata !9, null} ; [ DW_TAG_compile_unit ]
+!5 = metadata !{i32 786468, metadata !1, metadata !"int", metadata !1, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
+!6 = metadata !{i32 786478, i32 0, metadata !1, metadata !"main", metadata !"main", metadata !"main", metadata !1, i32 9, metadata !7, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, i32 ()* @main, null, null, null, i32 0} ; [ DW_TAG_subprogram ]
+!7 = metadata !{i32 786453, metadata !1, metadata !"", metadata !1, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !8, i32 0, null} ; [ DW_TAG_subroutine_type ]
 !8 = metadata !{metadata !5}
+!9 = metadata !{metadata !6}
+!10 = metadata !{metadata !"simple.c", metadata !"/Users/manav/one/two"}
diff --git a/test/CodeGen/X86/dbg-i128-const.ll b/test/CodeGen/X86/dbg-i128-const.ll
index bd96d9195d02..17d645757d99 100644
--- a/test/CodeGen/X86/dbg-i128-const.ll
+++ b/test/CodeGen/X86/dbg-i128-const.ll
@@ -12,15 +12,20 @@ entry:
 
 declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
 
+!llvm.dbg.cu = !{!5}
+
 !0 = metadata !{i128 42 }
-!1 = metadata !{i32 524544, metadata !2, metadata !"MAX", metadata !4, i32 29, metadata !8} ; [ DW_TAG_auto_variable ]
-!2 = metadata !{i32 524299, metadata !3, i32 26, i32 0} ; [ DW_TAG_lexical_block ]
-!3 = metadata !{i32 524334, i32 0, metadata !4, metadata !"__foo", metadata !"__foo", metadata !"__foo", metadata !4, i32 26, metadata !6, i1 false, i1 true, i32 0, i32 0, null, i1 false} ; [ DW_TAG_subprogram ]
-!4 = metadata !{i32 524329, metadata !"foo.c", metadata !"/tmp", metadata !5} ; [ DW_TAG_file_type ]
-!5 = metadata !{i32 524305, i32 0, i32 1, metadata !"foo.c", metadata !"/tmp", metadata !"clang", i1 true, i1 true, metadata !"", i32 0} ; [ DW_TAG_compile_unit ]
-!6 = metadata !{i32 524309, metadata !4, metadata !"", metadata !4, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !7, i32 0, null} ; [ DW_TAG_subroutine_type ]
+!1 = metadata !{i32 786688, metadata !2, metadata !"MAX", metadata !4, i32 29, metadata !8, i32 0, null} ; [ DW_TAG_auto_variable ]
+!2 = metadata !{i32 786443, metadata !4, metadata !3, i32 26, i32 0, i32 0} ; [ DW_TAG_lexical_block ]
+!3 = metadata !{i32 786478, metadata !4, metadata !"__foo", metadata !"__foo", metadata !"__foo", metadata !4, i32 26, metadata !6, i1 false, i1 true, i32 0, i32 0, null, i1 false, i128 (i128, i128)* @__foo, null, null, null, i32 26} ; [ DW_TAG_subprogram ]
+!4 = metadata !{i32 786473, metadata !13} ; [ DW_TAG_file_type ]
+!5 = metadata !{i32 786449, i32 1, metadata !4, metadata !"clang", i1 true, metadata !"", i32 0, null, null, metadata !12, null, metadata !""} ; [ DW_TAG_compile_unit ]
+!6 = metadata !{i32 786453, metadata !13, metadata !4, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !7, i32 0, null} ; [ DW_TAG_subroutine_type ]
 !7 = metadata !{metadata !8, metadata !8, metadata !8}
-!8 = metadata !{i32 524310, metadata !4, metadata !"ti_int", metadata !9, i32 78, i64 0, i64 0, i64 0, i32 0, metadata !10} ; [ DW_TAG_typedef ]
-!9 = metadata !{i32 524329, metadata !"myint.h", metadata !"/tmp", metadata !5} ; [ DW_TAG_file_type ]
-!10 = metadata !{i32 524324, metadata !4, metadata !"", metadata !4, i32 0, i64 128, i64 128, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
+!8 = metadata !{i32 786454, metadata !14, metadata !4, metadata !"ti_int", i32 78, i64 0, i64 0, i64 0, i32 0, metadata !10} ; [ DW_TAG_typedef ]
+!9 = metadata !{i32 786473, metadata !14} ; [ DW_TAG_file_type ]
+!10 = metadata !{i32 786468, metadata !13, metadata !4, metadata !"", i32 0, i64 128, i64 128, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
 !11 = metadata !{i32 29, i32 0, metadata !2, null}
+!12 = metadata !{metadata !3}
+!13 = metadata !{metadata !"foo.c", metadata !"/tmp"}
+!14 = metadata !{metadata !"myint.h", metadata !"/tmp"}
diff --git a/test/CodeGen/X86/dbg-large-unsigned-const.ll b/test/CodeGen/X86/dbg-large-unsigned-const.ll
index fc295c679445..ff16318efcec 100644
--- a/test/CodeGen/X86/dbg-large-unsigned-const.ll
+++ b/test/CodeGen/X86/dbg-large-unsigned-const.ll
@@ -26,36 +26,36 @@ entry:
 declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
 
 !llvm.dbg.cu = !{!0}
-!llvm.dbg.sp = !{!1, !6}
-!llvm.dbg.lv._Z3iseRKxS0_ = !{!7, !11}
-!llvm.dbg.lv._Z2fnx = !{!12}
+!29 = metadata !{metadata !1, metadata !6}
+!30 = metadata !{metadata !7, metadata !11}
+!31 = metadata !{metadata !12}
 
-!0 = metadata !{i32 655377, i32 0, i32 4, metadata !"lli.cc", metadata !"/private/tmp", metadata !"clang version 3.0 (trunk 135593)", i1 true, i1 true, metadata !"", i32 0} ; [ DW_TAG_compile_unit ]
-!1 = metadata !{i32 655406, i32 0, metadata !2, metadata !"ise", metadata !"ise", metadata !"_Z3iseRKxS0_", metadata !2, i32 2, metadata !3, i1 false, i1 true, i32 0, i32 0, i32 0, i32 256, i1 true, i1 (i64*, i64*)* @_Z3iseRKxS0_, null, null} ; [ DW_TAG_subprogram ]
-!2 = metadata !{i32 655401, metadata !"lli.cc", metadata !"/private/tmp", metadata !0} ; [ DW_TAG_file_type ]
-!3 = metadata !{i32 655381, metadata !2, metadata !"", metadata !2, i32 0, i64 0, i64 0, i32 0, i32 0, i32 0, metadata !4, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
+!0 = metadata !{i32 786449, i32 4, metadata !2, metadata !"clang version 3.0 (trunk 135593)", i1 true, metadata !"", i32 0, null, null, metadata !29, null, metadata !""} ; [ DW_TAG_compile_unit ]
+!1 = metadata !{i32 786478, metadata !"_Z3iseRKxS0_", i32 0, metadata !2, metadata !"ise", metadata !"ise", metadata !2, i32 2, metadata !3, i1 false, i1 true, i32 0, i32 0, i32 0, i32 256, i1 true, i1 (i64*, i64*)* @_Z3iseRKxS0_, null, null, metadata !30, i32 2} ; [ DW_TAG_subprogram ]
+!2 = metadata !{i32 786473, metadata !"lli.cc", metadata !"/private/tmp", metadata !0} ; [ DW_TAG_file_type ]
+!3 = metadata !{i32 786453, metadata !2, metadata !"", metadata !2, i32 0, i64 0, i64 0, i32 0, i32 0, i32 0, metadata !4, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
 !4 = metadata !{metadata !5}
-!5 = metadata !{i32 655396, metadata !0, metadata !"bool", null, i32 0, i64 8, i64 8, i64 0, i32 0, i32 2} ; [ DW_TAG_base_type ]
-!6 = metadata !{i32 655406, i32 0, metadata !2, metadata !"fn", metadata !"fn", metadata !"_Z2fnx", metadata !2, i32 6, metadata !3, i1 false, i1 true, i32 0, i32 0, i32 0, i32 256, i1 true, i1 (i64)* @_Z2fnx, null, null} ; [ DW_TAG_subprogram ]
-!7 = metadata !{i32 655617, metadata !1, metadata !"LHS", metadata !2, i32 16777218, metadata !8, i32 0, i32 0} ; [ DW_TAG_arg_variable ]
-!8 = metadata !{i32 655376, metadata !0, null, null, i32 0, i64 32, i64 32, i64 0, i32 0, metadata !9} ; [ DW_TAG_reference_type ]
-!9 = metadata !{i32 655398, metadata !0, metadata !"", null, i32 0, i64 0, i64 0, i64 0, i32 0, metadata !10} ; [ DW_TAG_const_type ]
-!10 = metadata !{i32 655396, metadata !0, metadata !"long long int", null, i32 0, i64 64, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
-!11 = metadata !{i32 655617, metadata !1, metadata !"RHS", metadata !2, i32 33554434, metadata !8, i32 0, i32 0} ; [ DW_TAG_arg_variable ]
-!12 = metadata !{i32 655617, metadata !6, metadata !"a", metadata !2, i32 16777222, metadata !10, i32 0, i32 0} ; [ DW_TAG_arg_variable ]
+!5 = metadata !{i32 786468, metadata !0, metadata !"bool", null, i32 0, i64 8, i64 8, i64 0, i32 0, i32 2} ; [ DW_TAG_base_type ]
+!6 = metadata !{i32 786478, metadata !"_Z2fnx", i32 0, metadata !2, metadata !"fn", metadata !"fn", metadata !2, i32 6, metadata !3, i1 false, i1 true, i32 0, i32 0, i32 0, i32 256, i1 true, i1 (i64)* @_Z2fnx, null, null, metadata !31, i32 6} ; [ DW_TAG_subprogram ]
+!7 = metadata !{i32 786689, metadata !1, metadata !"LHS", metadata !2, i32 16777218, metadata !8, i32 0, i32 0} ; [ DW_TAG_arg_variable ]
+!8 = metadata !{i32 786448, metadata !0, null, null, i32 0, i64 32, i64 32, i64 0, i32 0, metadata !9} ; [ DW_TAG_reference_type ]
+!9 = metadata !{i32 786470, metadata !0, metadata !"", null, i32 0, i64 0, i64 0, i64 0, i32 0, metadata !10} ; [ DW_TAG_const_type ]
+!10 = metadata !{i32 786468, metadata !0, metadata !"long long int", null, i32 0, i64 64, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
+!11 = metadata !{i32 786689, metadata !1, metadata !"RHS", metadata !2, i32 33554434, metadata !8, i32 0, i32 0} ; [ DW_TAG_arg_variable ]
+!12 = metadata !{i32 786689, metadata !6, metadata !"a", metadata !2, i32 16777222, metadata !10, i32 0, i32 0} ; [ DW_TAG_arg_variable ]
 !13 = metadata !{i32 2, i32 27, metadata !1, null}
 !14 = metadata !{i32 2, i32 49, metadata !1, null}
 !15 = metadata !{i32 3, i32 3, metadata !16, null}
-!16 = metadata !{i32 655371, metadata !1, i32 2, i32 54, metadata !2, i32 0} ; [ DW_TAG_lexical_block ]
+!16 = metadata !{i32 786443, metadata !2, metadata !1, i32 2, i32 54, i32 0} ; [ DW_TAG_lexical_block ]
 !17 = metadata !{metadata !"long long", metadata !18}
 !18 = metadata !{metadata !"omnipotent char", metadata !19}
 !19 = metadata !{metadata !"Simple C/C++ TBAA", null}
 !20 = metadata !{i32 6, i32 19, metadata !6, null}
-!21 = metadata !{i32 655617, metadata !1, metadata !"LHS", metadata !2, i32 16777218, metadata !8, i32 0, metadata !22} ; [ DW_TAG_arg_variable ]
+!21 = metadata !{i32 786689, metadata !1, metadata !"LHS", metadata !2, i32 16777218, metadata !8, i32 0, metadata !22} ; [ DW_TAG_arg_variable ]
 !22 = metadata !{i32 7, i32 10, metadata !23, null}
-!23 = metadata !{i32 655371, metadata !6, i32 6, i32 22, metadata !2, i32 1} ; [ DW_TAG_lexical_block ]
+!23 = metadata !{i32 786443, metadata !2, metadata !6, i32 6, i32 22, i32 1} ; [ DW_TAG_lexical_block ]
 !24 = metadata !{i32 2, i32 27, metadata !1, metadata !22}
 !25 = metadata !{i64 9223372036854775807}         
-!26 = metadata !{i32 655617, metadata !1, metadata !"RHS", metadata !2, i32 33554434, metadata !8, i32 0, metadata !22} ; [ DW_TAG_arg_variable ]
+!26 = metadata !{i32 786689, metadata !1, metadata !"RHS", metadata !2, i32 33554434, metadata !8, i32 0, metadata !22} ; [ DW_TAG_arg_variable ]
 !27 = metadata !{i32 2, i32 49, metadata !1, metadata !22}
 !28 = metadata !{i32 3, i32 3, metadata !16, metadata !22}
diff --git a/test/CodeGen/X86/dbg-merge-loc-entry.ll b/test/CodeGen/X86/dbg-merge-loc-entry.ll
index d1e349f79d6f..baad6c0b60e6 100644
--- a/test/CodeGen/X86/dbg-merge-loc-entry.ll
+++ b/test/CodeGen/X86/dbg-merge-loc-entry.ll
@@ -43,33 +43,36 @@ declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
 
 declare %0 @llvm.uadd.with.overflow.i64(i64, i64) nounwind readnone
 
-!llvm.dbg.sp = !{!0, !9}
+!llvm.dbg.cu = !{!2}
 
-!0 = metadata !{i32 589870, i32 0, metadata !1, metadata !"__udivmodti4", metadata !"__udivmodti4", metadata !"", metadata !1, i32 879, metadata !3, i1 true, i1 true, i32 0, i32 0, null, i32 256, i1 true, null} ; [ DW_TAG_subprogram ]
-!1 = metadata !{i32 589865, metadata !"foobar.c", metadata !"/tmp", metadata !2} ; [ DW_TAG_file_type ]
-!2 = metadata !{i32 589841, i32 0, i32 1, metadata !"foobar.c", metadata !"/tmp", metadata !"4.2.1 (Based on Apple Inc. build 5658) (LLVM build)", i1 true, i1 true, metadata !"", i32 0} ; [ DW_TAG_compile_unit ]
-!3 = metadata !{i32 589845, metadata !1, metadata !"", metadata !1, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !4, i32 0, null} ; [ DW_TAG_subroutine_type ]
+!0 = metadata !{i32 786478, metadata !1, metadata !"__udivmodti4", metadata !"__udivmodti4", metadata !"", metadata !1, i32 879, metadata !3, i1 true, i1 true, i32 0, i32 0, null, i32 256, i1 true, null, null, null, null, i32 879} ; [ DW_TAG_subprogram ]
+!1 = metadata !{i32 786473, metadata !29} ; [ DW_TAG_file_type ]
+!2 = metadata !{i32 786449, i32 1, metadata !1, metadata !"4.2.1 (Based on Apple Inc. build 5658) (LLVM build)", i1 true, metadata !"", i32 0, null, null, metadata !28, null, metadata !""} ; [ DW_TAG_compile_unit ]
+!3 = metadata !{i32 786453, metadata !29, metadata !1, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !4, i32 0, null} ; [ DW_TAG_subroutine_type ]
 !4 = metadata !{metadata !5, metadata !5, metadata !5, metadata !8}
-!5 = metadata !{i32 589846, metadata !6, metadata !"UTItype", metadata !6, i32 166, i64 0, i64 0, i64 0, i32 0, metadata !7} ; [ DW_TAG_typedef ]
-!6 = metadata !{i32 589865, metadata !"foobar.h", metadata !"/tmp", metadata !2} ; [ DW_TAG_file_type ]
-!7 = metadata !{i32 589860, metadata !1, metadata !"", metadata !1, i32 0, i64 128, i64 128, i64 0, i32 0, i32 7} ; [ DW_TAG_base_type ]
-!8 = metadata !{i32 589839, metadata !1, metadata !"", metadata !1, i32 0, i64 64, i64 64, i64 0, i32 0, metadata !5} ; [ DW_TAG_pointer_type ]
-!9 = metadata !{i32 589870, i32 0, metadata !1, metadata !"__divti3", metadata !"__divti3", metadata !"__divti3", metadata !1, i32 1094, metadata !10, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 true, i128 (i128, i128)* @__divti3} ; [ DW_TAG_subprogram ]
-!10 = metadata !{i32 589845, metadata !1, metadata !"", metadata !1, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !11, i32 0, null} ; [ DW_TAG_subroutine_type ]
+!5 = metadata !{i32 786454, metadata !30, metadata !6, metadata !"UTItype", i32 166, i64 0, i64 0, i64 0, i32 0, metadata !7} ; [ DW_TAG_typedef ]
+!6 = metadata !{i32 786473, metadata !30} ; [ DW_TAG_file_type ]
+!7 = metadata !{i32 786468, metadata !29, metadata !1, metadata !"", i32 0, i64 128, i64 128, i64 0, i32 0, i32 7} ; [ DW_TAG_base_type ]
+!8 = metadata !{i32 786447, metadata !29, metadata !1, metadata !"", i32 0, i64 64, i64 64, i64 0, i32 0, metadata !5} ; [ DW_TAG_pointer_type ]
+!9 = metadata !{i32 786478, metadata !1, metadata !"__divti3", metadata !"__divti3", metadata !"__divti3", metadata !1, i32 1094, metadata !10, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 true, i128 (i128, i128)* @__divti3, null, null, null, i32 1094} ; [ DW_TAG_subprogram ]
+!10 = metadata !{i32 786453, metadata !29, metadata !1, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !11, i32 0, null} ; [ DW_TAG_subroutine_type ]
 !11 = metadata !{metadata !12, metadata !12, metadata !12}
-!12 = metadata !{i32 589846, metadata !6, metadata !"TItype", metadata !6, i32 160, i64 0, i64 0, i64 0, i32 0, metadata !13} ; [ DW_TAG_typedef ]
-!13 = metadata !{i32 589860, metadata !1, metadata !"", metadata !1, i32 0, i64 128, i64 128, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
-!14 = metadata !{i32 590081, metadata !9, metadata !"u", metadata !1, i32 1093, metadata !12, i32 0} ; [ DW_TAG_arg_variable ]
+!12 = metadata !{i32 786454, metadata !30, metadata !6, metadata !"TItype", i32 160, i64 0, i64 0, i64 0, i32 0, metadata !13} ; [ DW_TAG_typedef ]
+!13 = metadata !{i32 786468, metadata !29, metadata !1, metadata !"", i32 0, i64 128, i64 128, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
+!14 = metadata !{i32 786689, metadata !9, metadata !"u", metadata !1, i32 1093, metadata !12, i32 0, null} ; [ DW_TAG_arg_variable ]
 !15 = metadata !{i32 1093, i32 0, metadata !9, null}
 !16 = metadata !{i64 0}
-!17 = metadata !{i32 590080, metadata !18, metadata !"c", metadata !1, i32 1095, metadata !19, i32 0} ; [ DW_TAG_auto_variable ]
-!18 = metadata !{i32 589835, metadata !9, i32 1094, i32 0, metadata !1, i32 13} ; [ DW_TAG_lexical_block ]
-!19 = metadata !{i32 589846, metadata !6, metadata !"word_type", metadata !6, i32 424, i64 0, i64 0, i64 0, i32 0, metadata !20} ; [ DW_TAG_typedef ]
-!20 = metadata !{i32 589860, metadata !1, metadata !"long int", metadata !1, i32 0, i64 64, i64 64, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
+!17 = metadata !{i32 786688, metadata !18, metadata !"c", metadata !1, i32 1095, metadata !19, i32 0, null} ; [ DW_TAG_auto_variable ]
+!18 = metadata !{i32 786443, metadata !1, metadata !9, i32 1094, i32 0, i32 13} ; [ DW_TAG_lexical_block ]
+!19 = metadata !{i32 786454, metadata !30, metadata !6, metadata !"word_type", i32 424, i64 0, i64 0, i64 0, i32 0, metadata !20} ; [ DW_TAG_typedef ]
+!20 = metadata !{i32 786468, metadata !29, metadata !1, metadata !"long int", i32 0, i64 64, i64 64, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
 !21 = metadata !{i32 1095, i32 0, metadata !18, null}
 !22 = metadata !{i32 1103, i32 0, metadata !18, null}
 !23 = metadata !{i32 1104, i32 0, metadata !18, null}
 !24 = metadata !{i32 1003, i32 0, metadata !25, metadata !26}
-!25 = metadata !{i32 589835, metadata !0, i32 879, i32 0, metadata !1, i32 0} ; [ DW_TAG_lexical_block ]
+!25 = metadata !{i32 786443, metadata !1, metadata !0, i32 879, i32 0, i32 0} ; [ DW_TAG_lexical_block ]
 !26 = metadata !{i32 1107, i32 0, metadata !18, null}
 !27 = metadata !{i32 1111, i32 0, metadata !18, null}
+!28 = metadata !{metadata !0, metadata !9}
+!29 = metadata !{metadata !"foobar.c", metadata !"/tmp"}
+!30 = metadata !{metadata !"foobar.h", metadata !"/tmp"}
diff --git a/test/CodeGen/X86/dbg-prolog-end.ll b/test/CodeGen/X86/dbg-prolog-end.ll
index 81303bb3d2ba..26bac2e08286 100644
--- a/test/CodeGen/X86/dbg-prolog-end.ll
+++ b/test/CodeGen/X86/dbg-prolog-end.ll
@@ -33,23 +33,23 @@ entry:
 }
 
 !llvm.dbg.cu = !{!0}
-!llvm.dbg.sp = !{!1, !6}
+!18 = metadata !{metadata !1, metadata !6}
 
-!0 = metadata !{i32 589841, i32 0, i32 12, metadata !"/tmp/a.c", metadata !"/private/tmp", metadata !"clang version 3.0 (trunk 131100)", i1 true, i1 false, metadata !"", i32 0} ; [ DW_TAG_compile_unit ]
-!1 = metadata !{i32 589870, i32 0, metadata !2, metadata !"foo", metadata !"foo", metadata !"", metadata !2, i32 1, metadata !3, i1 false, i1 true, i32 0, i32 0, i32 0, i32 256, i1 false, i32 (i32)* @foo, null, null} ; [ DW_TAG_subprogram ]
-!2 = metadata !{i32 589865, metadata !"/tmp/a.c", metadata !"/private/tmp", metadata !0} ; [ DW_TAG_file_type ]
-!3 = metadata !{i32 589845, metadata !2, metadata !"", metadata !2, i32 0, i64 0, i64 0, i32 0, i32 0, i32 0, metadata !4, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
+!0 = metadata !{i32 786449, i32 12, metadata !2, metadata !"clang version 3.0 (trunk 131100)", i1 false, metadata !"", i32 0, null, null, metadata !18, null, metadata !""} ; [ DW_TAG_compile_unit ]
+!1 = metadata !{i32 786478, metadata !2, metadata !"foo", metadata !"foo", metadata !"", metadata !2, i32 1, metadata !3, i1 false, i1 true, i32 0, i32 0, i32 0, i32 256, i1 false, i32 (i32)* @foo, null, null, null, i32 1} ; [ DW_TAG_subprogram ]
+!2 = metadata !{i32 786473, metadata !"/tmp/a.c", metadata !"/private/tmp", metadata !0} ; [ DW_TAG_file_type ]
+!3 = metadata !{i32 786453, metadata !2, metadata !"", metadata !2, i32 0, i64 0, i64 0, i32 0, i32 0, i32 0, metadata !4, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
 !4 = metadata !{metadata !5}
-!5 = metadata !{i32 589860, metadata !0, metadata !"int", null, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
-!6 = metadata !{i32 589870, i32 0, metadata !2, metadata !"main", metadata !"main", metadata !"", metadata !2, i32 7, metadata !3, i1 false, i1 true, i32 0, i32 0, i32 0, i32 0, i1 false, i32 ()* @main, null, null} ; [ DW_TAG_subprogram ]
-!7 = metadata !{i32 590081, metadata !1, metadata !"i", metadata !2, i32 16777217, metadata !5, i32 0} ; [ DW_TAG_arg_variable ]
+!5 = metadata !{i32 786468, metadata !0, metadata !"int", null, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
+!6 = metadata !{i32 786478, metadata !2, metadata !"main", metadata !"main", metadata !"", metadata !2, i32 7, metadata !3, i1 false, i1 true, i32 0, i32 0, i32 0, i32 0, i1 false, i32 ()* @main, null, null, null, i32 7} ; [ DW_TAG_subprogram ]
+!7 = metadata !{i32 786689, metadata !1, metadata !"i", metadata !2, i32 16777217, metadata !5, i32 0, null} ; [ DW_TAG_arg_variable ]
 !8 = metadata !{i32 1, i32 13, metadata !1, null}
-!9 = metadata !{i32 590080, metadata !10, metadata !"j", metadata !2, i32 2, metadata !5, i32 0} ; [ DW_TAG_auto_variable ]
-!10 = metadata !{i32 589835, metadata !1, i32 1, i32 16, metadata !2, i32 0} ; [ DW_TAG_lexical_block ]
+!9 = metadata !{i32 786688, metadata !10, metadata !"j", metadata !2, i32 2, metadata !5, i32 0, null} ; [ DW_TAG_auto_variable ]
+!10 = metadata !{i32 786443, metadata !2, metadata !1, i32 1, i32 16, i32 0} ; [ DW_TAG_lexical_block ]
 !11 = metadata !{i32 2, i32 6, metadata !10, null}
 !12 = metadata !{i32 2, i32 11, metadata !10, null}
 !13 = metadata !{i32 3, i32 2, metadata !10, null}
 !14 = metadata !{i32 4, i32 2, metadata !10, null}
 !15 = metadata !{i32 5, i32 2, metadata !10, null}
 !16 = metadata !{i32 8, i32 2, metadata !17, null}
-!17 = metadata !{i32 589835, metadata !6, i32 7, i32 12, metadata !2, i32 1} ; [ DW_TAG_lexical_block ]
+!17 = metadata !{i32 786443, metadata !2, metadata !6, i32 7, i32 12, i32 1} ; [ DW_TAG_lexical_block ]
diff --git a/test/CodeGen/X86/dbg-subrange.ll b/test/CodeGen/X86/dbg-subrange.ll
index 788910c7fe72..6090185dc10e 100644
--- a/test/CodeGen/X86/dbg-subrange.ll
+++ b/test/CodeGen/X86/dbg-subrange.ll
@@ -14,24 +14,21 @@ entry:
 
 !llvm.dbg.cu = !{!0}
 
-!0 = metadata !{i32 720913, i32 0, i32 12, metadata !"small.c", metadata !"/private/tmp", metadata !"clang version 3.1 (trunk 144833)", i1 true, i1 false, metadata !"", i32 0, metadata !1, metadata !1, metadata !3, metadata !11} ; [ DW_TAG_compile_unit ]
-!1 = metadata !{metadata !2}
-!2 = metadata !{i32 0}
-!3 = metadata !{metadata !4}
-!4 = metadata !{metadata !5}
-!5 = metadata !{i32 720942, i32 0, metadata !6, metadata !"bar", metadata !"bar", metadata !"", metadata !6, i32 4, metadata !7, i1 false, i1 true, i32 0, i32 0, i32 0, i32 256, i1 false, void ()* @bar, null, null, metadata !9} ; [ DW_TAG_subprogram ]
+!0 = metadata !{i32 786449, i32 12, metadata !6, metadata !"clang version 3.1 (trunk 144833)", i1 false, metadata !"", i32 0, metadata !1, metadata !1, metadata !3, metadata !11, metadata !""} ; [ DW_TAG_compile_unit ]
+!1 = metadata !{i32 0}
+!3 = metadata !{metadata !5}
+!5 = metadata !{i32 720942, metadata !6, metadata !"bar", metadata !"bar", metadata !"", metadata !6, i32 4, metadata !7, i1 false, i1 true, i32 0, i32 0, i32 0, i32 256, i1 false, void ()* @bar, null, null, metadata !9} ; [ DW_TAG_subprogram ]
 !6 = metadata !{i32 720937, metadata !"small.c", metadata !"/private/tmp", null} ; [ DW_TAG_file_type ]
 !7 = metadata !{i32 720917, i32 0, metadata !"", i32 0, i32 0, i64 0, i64 0, i32 0, i32 0, i32 0, metadata !8, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
 !8 = metadata !{null}
 !9 = metadata !{metadata !10}
 !10 = metadata !{i32 720932}                      ; [ DW_TAG_base_type ]
-!11 = metadata !{metadata !12}
-!12 = metadata !{metadata !13}
-!13 = metadata !{i32 720948, i32 0, null, metadata !"s", metadata !"s", metadata !"", metadata !6, i32 2, metadata !14, i32 0, i32 1, [4294967296 x i8]* @s} ; [ DW_TAG_variable ]
+!11 = metadata !{metadata !13}
+!13 = metadata !{i32 720948, i32 0, null, metadata !"s", metadata !"s", metadata !"", metadata !6, i32 2, metadata !14, i32 0, i32 1, [4294967296 x i8]* @s, null} ; [ DW_TAG_variable ]
 !14 = metadata !{i32 720897, null, metadata !"", null, i32 0, i64 34359738368, i64 8, i32 0, i32 0, metadata !15, metadata !16, i32 0, i32 0} ; [ DW_TAG_array_type ]
 !15 = metadata !{i32 720932, null, metadata !"char", null, i32 0, i64 8, i64 8, i64 0, i32 0, i32 6} ; [ DW_TAG_base_type ]
 !16 = metadata !{metadata !17}
-!17 = metadata !{i32 720929, i64 0, i64 4294967295} ; [ DW_TAG_subrange_type ]
+!17 = metadata !{i32 720929, i64 0, i64 4294967296} ; [ DW_TAG_subrange_type ]
 !18 = metadata !{i32 5, i32 3, metadata !19, null}
-!19 = metadata !{i32 720907, metadata !5, i32 4, i32 1, metadata !6, i32 0} ; [ DW_TAG_lexical_block ]
+!19 = metadata !{i32 786443, metadata !5, i32 4, i32 1, metadata !6, i32 0} ; [ DW_TAG_lexical_block ]
 !20 = metadata !{i32 6, i32 1, metadata !19, null}
diff --git a/test/CodeGen/X86/dbg-value-dag-combine.ll b/test/CodeGen/X86/dbg-value-dag-combine.ll
index b115bf475c42..fcbf64f42378 100644
--- a/test/CodeGen/X86/dbg-value-dag-combine.ll
+++ b/test/CodeGen/X86/dbg-value-dag-combine.ll
@@ -23,26 +23,25 @@ entry:
   store i32 %tmp3, i32 addrspace(1)* %arrayidx, align 4, !dbg !16
   ret void, !dbg !17
 }
-!llvm.dbg.sp = !{!0}
+!llvm.dbg.cu = !{!2}
 
-!0 = metadata !{i32 589870, i32 0, metadata !1, metadata
-!"__OpenCL_test_kernel", metadata !"__OpenCL_test_kernel", metadata
-!"__OpenCL_test_kernel", metadata !1, i32 2, metadata !3, i1 false, i1 true, i32 0, i32 0, i32 0, i32 0, i1 false, null} ; [ DW_TAG_subprogram ]
-!1 = metadata !{i32 589865, metadata !"OCL6368.tmp.cl", metadata !"E:\5CUsers\5Cmvillmow.AMD\5CAppData\5CLocal\5CTemp", metadata !2} ; [ DW_TAG_file_type ]
-!2 = metadata !{i32 589841, i32 0, i32 1, metadata !"OCL6368.tmp.cl", metadata !"E:\5CUsers\5Cmvillmow.AMD\5CAppData\5CLocal\5CTemp", metadata !"clc", i1 true, i1 false, metadata !"", i32 0} ; [ DW_TAG_compile_unit ]
-!3 = metadata !{i32 589845, metadata !1, metadata !"", metadata !1, i32 0, i64 0, i64 0, i32 0, i32 0, i32 0, metadata !4, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
+!0 = metadata !{i32 786478, metadata !1, metadata !"__OpenCL_test_kernel", metadata !"__OpenCL_test_kernel", metadata !"__OpenCL_test_kernel", metadata !1, i32 2, metadata !3, i1 false, i1 true, i32 0, i32 0, i32 0, i32 0, i1 false, null} ; [ DW_TAG_subprogram ]
+!1 = metadata !{i32 786473, metadata !19} ; [ DW_TAG_file_type ]
+!2 = metadata !{i32 786449, i32 1, metadata !1, metadata !"clc", i1 false, metadata !"", i32 0, null, null, metadata !18, null, null} ; [ DW_TAG_compile_unit ]
+!3 = metadata !{i32 786453, metadata !1, metadata !"", metadata !1, i32 0, i64 0, i64 0, i32 0, i32 0, i32 0, metadata !4, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
 !4 = metadata !{null, metadata !5}
-!5 = metadata !{i32 589839, metadata !2, metadata !"", null, i32 0, i64 32, i64 32, i64 0, i32 0, metadata !6} ; [ DW_TAG_pointer_type ]
-!6 = metadata !{i32 589860, metadata !2, metadata !"unsigned int", null, i32 0, i64 32, i64 32, i64 0, i32 0, i32 7} ; [ DW_TAG_base_type ]
-!7 = metadata !{i32 590081, metadata !0, metadata !"ip", metadata !1, i32 1, metadata !5, i32 0} ; [ DW_TAG_arg_variable ]
+!5 = metadata !{i32 786447, metadata !2, metadata !"", null, i32 0, i64 32, i64 32, i64 0, i32 0, metadata !6} ; [ DW_TAG_pointer_type ]
+!6 = metadata !{i32 786468, metadata !2, metadata !"unsigned int", null, i32 0, i64 32, i64 32, i64 0, i32 0, i32 7} ; [ DW_TAG_base_type ]
+!7 = metadata !{i32 786689, metadata !0, metadata !"ip", metadata !1, i32 1, metadata !5, i32 0, null} ; [ DW_TAG_arg_variable ]
 !8 = metadata !{i32 1, i32 42, metadata !0, null}
-!9 = metadata !{i32 590080, metadata !10, metadata !"gid", metadata !1, i32 3, metadata !6, i32 0} ; [ DW_TAG_auto_variable ]
-!10 = metadata !{i32 589835, metadata !0, i32 2, i32 1, metadata !1, i32 0} ; [ DW_TAG_lexical_block ]
+!9 = metadata !{i32 786688, metadata !10, metadata !"gid", metadata !1, i32 3, metadata !6, i32 0, null} ; [ DW_TAG_auto_variable ]
+!10 = metadata !{i32 786443, metadata !0, i32 2, i32 1, metadata !1, i32 0} ; [ DW_TAG_lexical_block ]
 !11 = metadata !{i32 3, i32 41, metadata !10, null}
 !12 = metadata !{i32 0}
-!13 = metadata !{i32 590080, metadata !10, metadata !"idx", metadata !1, i32 4, metadata !6, i32 0} ; [ DW_TAG_auto_variable ]
+!13 = metadata !{i32 786688, metadata !10, metadata !"idx", metadata !1, i32 4, metadata !6, i32 0, null} ; [ DW_TAG_auto_variable ]
 !14 = metadata !{i32 4, i32 20, metadata !10, null}
 !15 = metadata !{i32 5, i32 15, metadata !10, null}
 !16 = metadata !{i32 6, i32 18, metadata !10, null}
 !17 = metadata !{i32 7, i32 1, metadata !0, null}
-
+!18 = metadata !{metadata !0}
+!19 = metadata !{metadata !"OCL6368.tmp.cl", metadata !"E:\5CUsers\5Cmvillmow.AMD\5CAppData\5CLocal\5CTemp"}
diff --git a/test/CodeGen/X86/dbg-value-inlined-parameter.ll b/test/CodeGen/X86/dbg-value-inlined-parameter.ll
deleted file mode 100644
index d248a4130355..000000000000
--- a/test/CodeGen/X86/dbg-value-inlined-parameter.ll
+++ /dev/null
@@ -1,87 +0,0 @@
-; RUN: llc -mtriple=x86_64-apple-darwin < %s | FileCheck %s
-; RUN: llc -mtriple=x86_64-apple-darwin -regalloc=basic < %s | FileCheck %s
-
-;CHECK: DW_TAG_inlined_subroutine
-;CHECK-NEXT: DW_AT_abstract_origin
-;CHECK-NEXT: DW_AT_low_pc
-;CHECK-NEXT: DW_AT_high_pc
-;CHECK-NEXT: DW_AT_call_file
-;CHECK-NEXT: DW_AT_call_line
-;CHECK-NEXT: DW_TAG_formal_parameter
-;CHECK-NEXT: Lstring11-Lsection_str ## DW_AT_name
-
-%struct.S1 = type { float*, i32 }
-
-@p = common global %struct.S1 zeroinitializer, align 8
-
-define i32 @foo(%struct.S1* nocapture %sp, i32 %nums) nounwind optsize ssp {
-entry:
-  tail call void @llvm.dbg.value(metadata !{%struct.S1* %sp}, i64 0, metadata !9), !dbg !20
-  tail call void @llvm.dbg.value(metadata !{i32 %nums}, i64 0, metadata !18), !dbg !21
-  %tmp2 = getelementptr inbounds %struct.S1* %sp, i64 0, i32 1, !dbg !22
-  store i32 %nums, i32* %tmp2, align 4, !dbg !22, !tbaa !24
-  %call = tail call float* @bar(i32 %nums) nounwind optsize, !dbg !27
-  %tmp5 = getelementptr inbounds %struct.S1* %sp, i64 0, i32 0, !dbg !27
-  store float* %call, float** %tmp5, align 8, !dbg !27, !tbaa !28
-  %cmp = icmp ne float* %call, null, !dbg !29
-  %cond = zext i1 %cmp to i32, !dbg !29
-  ret i32 %cond, !dbg !29
-}
-
-declare float* @bar(i32) optsize
-
-define void @foobar() nounwind optsize ssp {
-entry:
-  tail call void @llvm.dbg.value(metadata !30, i64 0, metadata !9) nounwind, !dbg !31
-  tail call void @llvm.dbg.value(metadata !34, i64 0, metadata !18) nounwind, !dbg !35
-  store i32 1, i32* getelementptr inbounds (%struct.S1* @p, i64 0, i32 1), align 8, !dbg !36, !tbaa !24
-  %call.i = tail call float* @bar(i32 1) nounwind optsize, !dbg !37
-  store float* %call.i, float** getelementptr inbounds (%struct.S1* @p, i64 0, i32 0), align 8, !dbg !37, !tbaa !28
-  ret void, !dbg !38
-}
-
-declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
-
-!llvm.dbg.sp = !{!0, !6}
-!llvm.dbg.lv.foo = !{!9, !18}
-!llvm.dbg.gv = !{!19}
-
-!0 = metadata !{i32 589870, i32 0, metadata !1, metadata !"foo", metadata !"foo", metadata !"", metadata !1, i32 8, metadata !3, i1 false, i1 true, i32 0, i32 0, i32 0, i32 256, i1 true, i32 (%struct.S1*, i32)* @foo} ; [ DW_TAG_subprogram ]
-!1 = metadata !{i32 589865, metadata !"nm2.c", metadata !"/private/tmp", metadata !2} ; [ DW_TAG_file_type ]
-!2 = metadata !{i32 589841, i32 0, i32 12, metadata !"nm2.c", metadata !"/private/tmp", metadata !"clang version 2.9 (trunk 125693)", i1 true, i1 true, metadata !"", i32 0} ; [ DW_TAG_compile_unit ]
-!3 = metadata !{i32 589845, metadata !1, metadata !"", metadata !1, i32 0, i64 0, i64 0, i32 0, i32 0, i32 0, metadata !4, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
-!4 = metadata !{metadata !5}
-!5 = metadata !{i32 589860, metadata !2, metadata !"int", null, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
-!6 = metadata !{i32 589870, i32 0, metadata !1, metadata !"foobar", metadata !"foobar", metadata !"", metadata !1, i32 15, metadata !7, i1 false, i1 true, i32 0, i32 0, i32 0, i32 0, i1 true, void ()* @foobar} ; [ DW_TAG_subprogram ]
-!7 = metadata !{i32 589845, metadata !1, metadata !"", metadata !1, i32 0, i64 0, i64 0, i32 0, i32 0, i32 0, metadata !8, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
-!8 = metadata !{null}
-!9 = metadata !{i32 590081, metadata !0, metadata !"sp", metadata !1, i32 7, metadata !10, i32 0} ; [ DW_TAG_arg_variable ]
-!10 = metadata !{i32 589839, metadata !2, metadata !"", null, i32 0, i64 64, i64 64, i64 0, i32 0, metadata !11} ; [ DW_TAG_pointer_type ]
-!11 = metadata !{i32 589846, metadata !2, metadata !"S1", metadata !1, i32 4, i64 0, i64 0, i64 0, i32 0, metadata !12} ; [ DW_TAG_typedef ]
-!12 = metadata !{i32 589843, metadata !2, metadata !"S1", metadata !1, i32 1, i64 128, i64 64, i32 0, i32 0, i32 0, metadata !13, i32 0, i32 0} ; [ DW_TAG_structure_type ]
-!13 = metadata !{metadata !14, metadata !17}
-!14 = metadata !{i32 589837, metadata !1, metadata !"m", metadata !1, i32 2, i64 64, i64 64, i64 0, i32 0, metadata !15} ; [ DW_TAG_member ]
-!15 = metadata !{i32 589839, metadata !2, metadata !"", null, i32 0, i64 64, i64 64, i64 0, i32 0, metadata !16} ; [ DW_TAG_pointer_type ]
-!16 = metadata !{i32 589860, metadata !2, metadata !"float", null, i32 0, i64 32, i64 32, i64 0, i32 0, i32 4} ; [ DW_TAG_base_type ]
-!17 = metadata !{i32 589837, metadata !1, metadata !"nums", metadata !1, i32 3, i64 32, i64 32, i64 64, i32 0, metadata !5} ; [ DW_TAG_member ]
-!18 = metadata !{i32 590081, metadata !0, metadata !"nums", metadata !1, i32 7, metadata !5, i32 0} ; [ DW_TAG_arg_variable ]
-!19 = metadata !{i32 589876, i32 0, metadata !2, metadata !"p", metadata !"p", metadata !"", metadata !1, i32 14, metadata !11, i32 0, i32 1, %struct.S1* @p} ; [ DW_TAG_variable ]
-!20 = metadata !{i32 7, i32 13, metadata !0, null}
-!21 = metadata !{i32 7, i32 21, metadata !0, null}
-!22 = metadata !{i32 9, i32 3, metadata !23, null}
-!23 = metadata !{i32 589835, metadata !0, i32 8, i32 1, metadata !1, i32 0} ; [ DW_TAG_lexical_block ]
-!24 = metadata !{metadata !"int", metadata !25}
-!25 = metadata !{metadata !"omnipotent char", metadata !26}
-!26 = metadata !{metadata !"Simple C/C++ TBAA", null}
-!27 = metadata !{i32 10, i32 3, metadata !23, null}
-!28 = metadata !{metadata !"any pointer", metadata !25}
-!29 = metadata !{i32 11, i32 3, metadata !23, null}
-!30 = metadata !{%struct.S1* @p}
-!31 = metadata !{i32 7, i32 13, metadata !0, metadata !32}
-!32 = metadata !{i32 16, i32 3, metadata !33, null}
-!33 = metadata !{i32 589835, metadata !6, i32 15, i32 15, metadata !1, i32 1} ; [ DW_TAG_lexical_block ]
-!34 = metadata !{i32 1}
-!35 = metadata !{i32 7, i32 21, metadata !0, metadata !32}
-!36 = metadata !{i32 9, i32 3, metadata !23, metadata !32}
-!37 = metadata !{i32 10, i32 3, metadata !23, metadata !32}
-!38 = metadata !{i32 17, i32 1, metadata !33, null}
diff --git a/test/CodeGen/X86/dbg-value-isel.ll b/test/CodeGen/X86/dbg-value-isel.ll
index f1101e61f0fe..55be3b1f222b 100644
--- a/test/CodeGen/X86/dbg-value-isel.ll
+++ b/test/CodeGen/X86/dbg-value-isel.ll
@@ -78,25 +78,26 @@ declare <4 x i32> @__amdil_get_global_id_int() nounwind
 
 declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
 
-!llvm.dbg.sp = !{!0}
+!llvm.dbg.cu = !{!2}
 
-!0 = metadata !{i32 589870, i32 0, metadata !1, metadata !"__OpenCL_nbt02_kernel", metadata !"__OpenCL_nbt02_kernel", metadata !"__OpenCL_nbt02_kernel", metadata !1, i32 2, metadata !3, i1 false, i1 true, i32 0, i32 0, i32 0, i32 0, i1 false, null} ; [ DW_TAG_subprogram ]
-!1 = metadata !{i32 589865, metadata !"OCLlLwTXZ.cl", metadata !"/tmp", metadata !2} ; [ DW_TAG_file_type ]
-!2 = metadata !{i32 589841, i32 0, i32 1, metadata !"OCLlLwTXZ.cl", metadata !"/tmp", metadata !"clc", i1 true, i1 false, metadata !"", i32 0} ; [ DW_TAG_compile_unit ]
-!3 = metadata !{i32 589845, metadata !1, metadata !"", metadata !1, i32 0, i64 0, i64 0, i32 0, i32 0, i32 0, metadata !4, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
+!0 = metadata !{i32 786478, metadata !1, metadata !"__OpenCL_nbt02_kernel", metadata !"__OpenCL_nbt02_kernel", metadata !"__OpenCL_nbt02_kernel", metadata !1, i32 2, metadata !3, i1 false, i1 true, i32 0, i32 0, i32 0, i32 0, i1 false, null} ; [ DW_TAG_subprogram ]
+!1 = metadata !{i32 786473, metadata !20} ; [ DW_TAG_file_type ]
+!2 = metadata !{i32 786449, metadata !20, i32 1, metadata !"clc", i1 false, metadata !"", i32 0, null, null, metadata !19, null, null} ; [ DW_TAG_compile_unit ]
+!3 = metadata !{i32 786453, metadata !20, metadata !1, metadata !"", i32 0, i64 0, i64 0, i32 0, i32 0, i32 0, metadata !4, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
 !4 = metadata !{null, metadata !5}
-!5 = metadata !{i32 589839, metadata !2, metadata !"", null, i32 0, i64 32, i64 32, i64 0, i32 0, metadata !6} ; [ DW_TAG_pointer_type ]
-!6 = metadata !{i32 589846, metadata !2, metadata !"uint", metadata !1, i32 0, i64 0, i64 0, i64 0, i32 0, metadata !7} ; [ DW_TAG_typedef ]
-!7 = metadata !{i32 589860, metadata !2, metadata !"unsigned int", null, i32 0, i64 32, i64 32, i64 0, i32 0, i32 7} ; [ DW_TAG_base_type ]
-!8 = metadata !{i32 590081, metadata !0, metadata !"ip", metadata !1, i32 1, metadata !5, i32 0} ; [ DW_TAG_arg_variable ]
+!5 = metadata !{i32 786447, null, metadata !2, metadata !"", i32 0, i64 32, i64 32, i64 0, i32 0, metadata !6} ; [ DW_TAG_pointer_type ]
+!6 = metadata !{i32 589846, metadata !20, metadata !2, metadata !"uint", i32 0, i64 0, i64 0, i64 0, i32 0, metadata !7} ; [ DW_TAG_typedef ]
+!7 = metadata !{i32 786468, null, metadata !2, metadata !"unsigned int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 7} ; [ DW_TAG_base_type ]
+!8 = metadata !{i32 786689, metadata !0, metadata !"ip", metadata !1, i32 1, metadata !5, i32 0, null} ; [ DW_TAG_arg_variable ]
 !9 = metadata !{i32 1, i32 32, metadata !0, null}
-!10 = metadata !{i32 590080, metadata !11, metadata !"tid", metadata !1, i32 3, metadata !6, i32 0} ; [ DW_TAG_auto_variable ]
-!11 = metadata !{i32 589835, metadata !0, i32 2, i32 1, metadata !1, i32 1} ; [ DW_TAG_lexical_block ]
+!10 = metadata !{i32 786688, metadata !11, metadata !"tid", metadata !1, i32 3, metadata !6, i32 0, null} ; [ DW_TAG_auto_variable ]
+!11 = metadata !{i32 786443, metadata !0, i32 2, i32 1, metadata !1, i32 1} ; [ DW_TAG_lexical_block ]
 !12 = metadata !{i32 5, i32 24, metadata !11, null}
-!13 = metadata !{i32 590080, metadata !11, metadata !"gid", metadata !1, i32 3, metadata !6, i32 0} ; [ DW_TAG_auto_variable ]
+!13 = metadata !{i32 786688, metadata !11, metadata !"gid", metadata !1, i32 3, metadata !6, i32 0, null} ; [ DW_TAG_auto_variable ]
 !14 = metadata !{i32 6, i32 25, metadata !11, null}
-!15 = metadata !{i32 590080, metadata !11, metadata !"lsz", metadata !1, i32 3, metadata !6, i32 0} ; [ DW_TAG_auto_variable ]
+!15 = metadata !{i32 786688, metadata !11, metadata !"lsz", metadata !1, i32 3, metadata !6, i32 0, null} ; [ DW_TAG_auto_variable ]
 !16 = metadata !{i32 7, i32 26, metadata !11, null}
 !17 = metadata !{i32 9, i32 24, metadata !11, null}
 !18 = metadata !{i32 10, i32 1, metadata !0, null}
-
+!19 = metadata !{metadata !0}
+!20 = metadata !{metadata !"OCLlLwTXZ.cl", metadata !"/tmp"}
diff --git a/test/CodeGen/X86/dbg-value-location.ll b/test/CodeGen/X86/dbg-value-location.ll
index 05e29ecff03f..2a1916f26c97 100644
--- a/test/CodeGen/X86/dbg-value-location.ll
+++ b/test/CodeGen/X86/dbg-value-location.ll
@@ -45,26 +45,30 @@ declare hidden fastcc i32 @bar2(i32) nounwind optsize ssp
 declare hidden fastcc i32 @bar3(i32) nounwind optsize ssp
 declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
 
-!llvm.dbg.sp = !{!0, !6, !7, !8}
+!llvm.dbg.cu = !{!2}
 
-!0 = metadata !{i32 589870, i32 0, metadata !1, metadata !"foo", metadata !"foo", metadata !"", metadata !1, i32 19510, metadata !3, i1 false, i1 true, i32 0, i32 0, i32 0, i32 256, i1 true, i32 (i32, i64, i8*, i32)* @foo} ; [ DW_TAG_subprogram ]
-!1 = metadata !{i32 589865, metadata !"/tmp/f.c", metadata !"/tmp", metadata !2} ; [ DW_TAG_file_type ]
-!2 = metadata !{i32 589841, i32 0, i32 12, metadata !"f.i", metadata !"/tmp", metadata !"clang version 2.9 (trunk 124753)", i1 true, i1 true, metadata !"", i32 0} ; [ DW_TAG_compile_unit ]
-!3 = metadata !{i32 589845, metadata !1, metadata !"", metadata !1, i32 0, i64 0, i64 0, i32 0, i32 0, i32 0, metadata !4, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
+!0 = metadata !{i32 786478, metadata !1, metadata !1, metadata !"foo", metadata !"foo", metadata !"", i32 19510, metadata !3, i1 false, i1 true, i32 0, i32 0, i32 0, i32 256, i1 true, i32 (i32, i64, i8*, i32)* @foo, null, null, null, i32 19510} ; [ DW_TAG_subprogram ]
+!1 = metadata !{i32 786473, metadata !26} ; [ DW_TAG_file_type ]
+!2 = metadata !{i32 786449, metadata !27, i32 12, metadata !"clang version 2.9 (trunk 124753)", i1 true, metadata !"", i32 0, null, null, metadata !24, null, null} ; [ DW_TAG_compile_unit ]
+!3 = metadata !{i32 786453, metadata !1, metadata !"", metadata !1, i32 0, i64 0, i64 0, i32 0, i32 0, i32 0, metadata !4, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
 !4 = metadata !{metadata !5}
-!5 = metadata !{i32 589860, metadata !2, metadata !"int", null, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
-!6 = metadata !{i32 589870, i32 0, metadata !1, metadata !"bar3", metadata !"bar3", metadata !"", metadata !1, i32 14827, metadata !3, i1 true, i1 true, i32 0, i32 0, i32 0, i32 256, i1 true, i32 (i32)* @bar3} ; [ DW_TAG_subprogram ]
-!7 = metadata !{i32 589870, i32 0, metadata !1, metadata !"bar2", metadata !"bar2", metadata !"", metadata !1, i32 15397, metadata !3, i1 true, i1 true, i32 0, i32 0, i32 0, i32 256, i1 true, i32 (i32)* @bar2} ; [ DW_TAG_subprogram ]
-!8 = metadata !{i32 589870, i32 0, metadata !1, metadata !"bar", metadata !"bar", metadata !"", metadata !1, i32 12382, metadata !9, i1 true, i1 true, i32 0, i32 0, i32 0, i32 256, i1 true, i32 (i32, i32*)* @bar} ; [ DW_TAG_subprogram ]
-!9 = metadata !{i32 589845, metadata !1, metadata !"", metadata !1, i32 0, i64 0, i64 0, i32 0, i32 0, i32 0, metadata !10, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
+!5 = metadata !{i32 786468, metadata !2, metadata !"int", null, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
+!6 = metadata !{i32 786478, metadata !1, metadata !1, metadata !"bar3", metadata !"bar3", metadata !"", i32 14827, metadata !3, i1 true, i1 true, i32 0, i32 0, i32 0, i32 256, i1 true, i32 (i32)* @bar3} ; [ DW_TAG_subprogram ]
+!7 = metadata !{i32 786478, metadata !1, metadata !1, metadata !"bar2", metadata !"bar2", metadata !"", i32 15397, metadata !3, i1 true, i1 true, i32 0, i32 0, i32 0, i32 256, i1 true, i32 (i32)* @bar2} ; [ DW_TAG_subprogram ]
+!8 = metadata !{i32 786478, metadata !1, metadata !1, metadata !"bar", metadata !"bar", metadata !"", i32 12382, metadata !9, i1 true, i1 true, i32 0, i32 0, i32 0, i32 256, i1 true, i32 (i32, i32*)* @bar} ; [ DW_TAG_subprogram ]
+!9 = metadata !{i32 786453, metadata !1, metadata !"", metadata !1, i32 0, i64 0, i64 0, i32 0, i32 0, i32 0, metadata !10, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
 !10 = metadata !{metadata !11}
-!11 = metadata !{i32 589860, metadata !2, metadata !"unsigned char", null, i32 0, i64 8, i64 8, i64 0, i32 0, i32 8} ; [ DW_TAG_base_type ]
-!12 = metadata !{i32 590081, metadata !0, metadata !"var", metadata !1, i32 19509, metadata !5, i32 0} ; [ DW_TAG_arg_variable ]
+!11 = metadata !{i32 786468, metadata !2, metadata !"unsigned char", null, i32 0, i64 8, i64 8, i64 0, i32 0, i32 8} ; [ DW_TAG_base_type ]
+!12 = metadata !{i32 786689, metadata !0, metadata !"var", metadata !1, i32 19509, metadata !5, i32 0, null} ; [ DW_TAG_arg_variable ]
 !13 = metadata !{i32 19509, i32 20, metadata !0, null}
 !14 = metadata !{i32 18091, i32 2, metadata !15, metadata !17}
-!15 = metadata !{i32 589835, metadata !16, i32 18086, i32 1, metadata !1, i32 748} ; [ DW_TAG_lexical_block ]
-!16 = metadata !{i32 589870, i32 0, metadata !1, metadata !"foo_bar", metadata !"foo_bar", metadata !"", metadata !1, i32 18086, metadata !3, i1 true, i1 true, i32 0, i32 0, i32 0, i32 256, i1 true, null} ; [ DW_TAG_subprogram ]
+!15 = metadata !{i32 786443, metadata !1, metadata !16, i32 18086, i32 1, i32 748} ; [ DW_TAG_lexical_block ]
+!16 = metadata !{i32 786478, metadata !1, metadata !1, metadata !"foo_bar", metadata !"foo_bar", metadata !"", i32 18086, metadata !3, i1 true, i1 true, i32 0, i32 0, i32 0, i32 256, i1 true, null} ; [ DW_TAG_subprogram ]
 !17 = metadata !{i32 19514, i32 2, metadata !18, null}
-!18 = metadata !{i32 589835, metadata !0, i32 19510, i32 1, metadata !1, i32 99} ; [ DW_TAG_lexical_block ]
+!18 = metadata !{i32 786443, metadata !1, metadata !0, i32 19510, i32 1, i32 99} ; [ DW_TAG_lexical_block ]
 !22 = metadata !{i32 18094, i32 2, metadata !15, metadata !17}
 !23 = metadata !{i32 19524, i32 1, metadata !18, null}
+!24 = metadata !{metadata !0, metadata !6, metadata !7, metadata !8}
+!25 = metadata !{i32 786473, metadata !27} ; [ DW_TAG_file_type ]
+!26 = metadata !{metadata !"/tmp/f.c", metadata !"/tmp"}
+!27 = metadata !{metadata !"f.i", metadata !"/tmp"}
diff --git a/test/CodeGen/X86/dbg-value-range.ll b/test/CodeGen/X86/dbg-value-range.ll
index 6b16865ba9ee..6766dbe9edb0 100644
--- a/test/CodeGen/X86/dbg-value-range.ll
+++ b/test/CodeGen/X86/dbg-value-range.ll
@@ -17,22 +17,21 @@ declare i32 @foo(...)
 
 declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
 
-!llvm.dbg.sp = !{!0}
-!llvm.dbg.lv.bar = !{!6, !11}
+!llvm.dbg.cu = !{!2}
 
-!0 = metadata !{i32 589870, i32 0, metadata !1, metadata !"bar", metadata !"bar", metadata !"", metadata !1, i32 5, metadata !3, i1 false, i1 true, i32 0, i32 0, i32 0, i32 256, i1 true, i32 (%struct.a*)* @bar} ; [ DW_TAG_subprogram ]
-!1 = metadata !{i32 589865, metadata !"bar.c", metadata !"/private/tmp", metadata !2} ; [ DW_TAG_file_type ]
-!2 = metadata !{i32 589841, i32 0, i32 12, metadata !"bar.c", metadata !"/private/tmp", metadata !"clang version 2.9 (trunk 122997)", i1 true, i1 true, metadata !"", i32 0} ; [ DW_TAG_compile_unit ]
-!3 = metadata !{i32 589845, metadata !1, metadata !"", metadata !1, i32 0, i64 0, i64 0, i32 0, i32 0, i32 0, metadata !4, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
+!0 = metadata !{i32 786478, metadata !1, metadata !"bar", metadata !"bar", metadata !"", metadata !1, i32 5, metadata !3, i1 false, i1 true, i32 0, i32 0, i32 0, i32 256, i1 true, i32 (%struct.a*)* @bar, null, null, metadata !21, i32 0} ; [ DW_TAG_subprogram ]
+!1 = metadata !{i32 786473, metadata !22} ; [ DW_TAG_file_type ]
+!2 = metadata !{i32 786449, metadata !22, i32 12, metadata !"clang version 2.9 (trunk 122997)", i1 true, metadata !"", i32 0, null, null, metadata !20, null, null} ; [ DW_TAG_compile_unit ]
+!3 = metadata !{i32 786453, metadata !1, metadata !"", metadata !1, i32 0, i64 0, i64 0, i32 0, i32 0, i32 0, metadata !4, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
 !4 = metadata !{metadata !5}
-!5 = metadata !{i32 589860, metadata !2, metadata !"int", null, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
-!6 = metadata !{i32 590081, metadata !0, metadata !"b", metadata !1, i32 5, metadata !7, i32 0} ; [ DW_TAG_arg_variable ]
-!7 = metadata !{i32 589839, metadata !2, metadata !"", null, i32 0, i64 64, i64 64, i64 0, i32 0, metadata !8} ; [ DW_TAG_pointer_type ]
-!8 = metadata !{i32 589843, metadata !2, metadata !"a", metadata !1, i32 1, i64 32, i64 32, i32 0, i32 0, i32 0, metadata !9, i32 0, i32 0} ; [ DW_TAG_structure_type ]
+!5 = metadata !{i32 786468, metadata !2, metadata !"int", null, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
+!6 = metadata !{i32 786689, metadata !0, metadata !"b", metadata !1, i32 5, metadata !7, i32 0, null} ; [ DW_TAG_arg_variable ]
+!7 = metadata !{i32 786447, metadata !2, metadata !"", null, i32 0, i64 64, i64 64, i64 0, i32 0, metadata !8} ; [ DW_TAG_pointer_type ]
+!8 = metadata !{i32 786451, metadata !2, metadata !"a", metadata !1, i32 1, i64 32, i64 32, i32 0, i32 0, i32 0, metadata !9, i32 0, i32 0} ; [ DW_TAG_structure_type ]
 !9 = metadata !{metadata !10}
-!10 = metadata !{i32 589837, metadata !1, metadata !"c", metadata !1, i32 2, i64 32, i64 32, i64 0, i32 0, metadata !5} ; [ DW_TAG_member ]
-!11 = metadata !{i32 590080, metadata !12, metadata !"x", metadata !1, i32 6, metadata !5, i32 0} ; [ DW_TAG_auto_variable ]
-!12 = metadata !{i32 589835, metadata !0, i32 5, i32 22, metadata !1, i32 0} ; [ DW_TAG_lexical_block ]
+!10 = metadata !{i32 786445, metadata !1, metadata !"c", metadata !1, i32 2, i64 32, i64 32, i64 0, i32 0, metadata !5} ; [ DW_TAG_member ]
+!11 = metadata !{i32 786688, metadata !12, metadata !"x", metadata !1, i32 6, metadata !5, i32 0, null} ; [ DW_TAG_auto_variable ]
+!12 = metadata !{i32 786443, metadata !22, metadata !0, i32 5, i32 22, i32 0} ; [ DW_TAG_lexical_block ]
 !13 = metadata !{i32 5, i32 19, metadata !0, null}
 !14 = metadata !{i32 6, i32 14, metadata !12, null}
 !15 = metadata !{metadata !"int", metadata !16}
@@ -40,6 +39,9 @@ declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
 !17 = metadata !{metadata !"Simple C/C++ TBAA", null}
 !18 = metadata !{i32 7, i32 2, metadata !12, null}
 !19 = metadata !{i32 8, i32 2, metadata !12, null}
+!20 = metadata !{metadata !0}
+!21 = metadata !{metadata !6, metadata !11}
+!22 = metadata !{metadata !"bar.c", metadata !"/private/tmp"}
 
 ; Check that variable bar:b value range is appropriately trucated in debug info.
 ; The variable is in %rdi which is clobbered by 'movl %ebx, %edi'
diff --git a/test/CodeGen/X86/divide-by-constant.ll b/test/CodeGen/X86/divide-by-constant.ll
index 8e7c13d8efa9..9669d97cb7fa 100644
--- a/test/CodeGen/X86/divide-by-constant.ll
+++ b/test/CodeGen/X86/divide-by-constant.ll
@@ -56,9 +56,9 @@ entry:
   %div = sdiv i16 %x, 10
   ret i16 %div
 ; CHECK: test6:
-; CHECK: imull	$26215, %eax, %eax
-; CHECK: shrl	$31, %ecx
-; CHECK: sarl	$18, %eax
+; CHECK: imull $26215, %eax, %ecx
+; CHECK: sarl $18, %ecx
+; CHECK: shrl $15, %eax
 }
 
 define i32 @test7(i32 %x) nounwind {
diff --git a/test/CodeGen/X86/dwarf-comp-dir.ll b/test/CodeGen/X86/dwarf-comp-dir.ll
index c64752c9522b..3bc2957963eb 100644
--- a/test/CodeGen/X86/dwarf-comp-dir.ll
+++ b/test/CodeGen/X86/dwarf-comp-dir.ll
@@ -1,14 +1,16 @@
 ; RUN: llc %s -o %t -filetype=obj
-; RUN: llvm-dwarfdump %t | FileCheck %s
+; RUN: llvm-dwarfdump -debug-dump=line %t | FileCheck %s
 
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
 target triple = "x86_64-unknown-linux-gnu"
 
 !llvm.dbg.cu = !{!0}
 
-!0 = metadata !{i32 720913, i32 0, i32 12, metadata !"empty.c", metadata !"/home/nlewycky", metadata !"clang version 3.1 (trunk 143523)", i1 true, i1 true, metadata !"", i32 0, metadata !1, metadata !1, metadata !1, metadata !1} ; [ DW_TAG_compile_unit ]
+!0 = metadata !{i32 720913, metadata !4, i32 12, metadata !"clang version 3.1 (trunk 143523)", i1 true, i1 true, metadata !"", i32 0, metadata !1, metadata !1, metadata !1, metadata !1} ; [ DW_TAG_compile_unit ]
 !1 = metadata !{metadata !2}
 !2 = metadata !{i32 0}
+!3 = metadata !{i32 786473, metadata !4} ; [ DW_TAG_file_type ]
+!4 = metadata !{metadata !"empty.c", metadata !"/home/nlewycky"}
 
 ; The important part of the following check is that dir = #0.
 ;                        Dir  Mod Time   File Len   File Name
diff --git a/test/CodeGen/X86/dynamic-allocas-VLAs.ll b/test/CodeGen/X86/dynamic-allocas-VLAs.ll
index c5e47facf346..9405f76cbed0 100644
--- a/test/CodeGen/X86/dynamic-allocas-VLAs.ll
+++ b/test/CodeGen/X86/dynamic-allocas-VLAs.ll
@@ -103,7 +103,7 @@ entry:
 
 declare void @t4_helper(i32*, i32*, <8 x float>*)
 
-; Dynamic realignment + Spill
+; Spilling an AVX register shouldn't cause dynamic realignment
 define i32 @t5(float* nocapture %f) nounwind uwtable ssp {
 entry:
   %a = alloca i32, align 4
@@ -116,21 +116,15 @@ entry:
   ret i32 %add
 
 ; CHECK: _t5
-; CHECK: pushq %rbp
-; CHECK: movq %rsp, %rbp
-; CHECK: andq $-32, %rsp
 ; CHECK: subq ${{[0-9]+}}, %rsp
 ;
 ; CHECK: vmovaps (%rdi), [[AVXREG:%ymm[0-9]+]]
-; CHECK: vmovaps [[AVXREG]], (%rsp)
+; CHECK: vmovups [[AVXREG]], (%rsp)
 ; CHECK: leaq {{[0-9]+}}(%rsp), %rdi
 ; CHECK: callq   _t5_helper1
-; CHECK: vmovaps (%rsp), %ymm0
+; CHECK: vmovups (%rsp), %ymm0
 ; CHECK: callq   _t5_helper2
 ; CHECK: movl {{[0-9]+}}(%rsp), %eax
-;
-; CHECK: movq %rbp, %rsp
-; CHECK: popq %rbp
 }
 
 declare void @t5_helper1(i32*)
diff --git a/test/CodeGen/X86/early-ifcvt-crash.ll b/test/CodeGen/X86/early-ifcvt-crash.ll
index c8280269689d..d9580503e91e 100644
--- a/test/CodeGen/X86/early-ifcvt-crash.ll
+++ b/test/CodeGen/X86/early-ifcvt-crash.ll
@@ -1,5 +1,7 @@
 ; RUN: llc < %s -x86-early-ifcvt -verify-machineinstrs
 ; RUN: llc < %s -x86-early-ifcvt -stress-early-ifcvt -verify-machineinstrs
+; CPU without a scheduling model:
+; RUN: llc < %s -x86-early-ifcvt -mcpu=pentium3 -verify-machineinstrs
 ;
 ; Run these tests with and without -stress-early-ifcvt to exercise heuristics.
 ;
diff --git a/test/CodeGen/X86/early-ifcvt.ll b/test/CodeGen/X86/early-ifcvt.ll
index 2e1852d3e3ae..2606bd28d5fc 100644
--- a/test/CodeGen/X86/early-ifcvt.ll
+++ b/test/CodeGen/X86/early-ifcvt.ll
@@ -142,3 +142,34 @@ save_state_and_return:
 }
 
 declare void @BZ2_bz__AssertH__fail()
+
+; Make sure we don't speculate on div/idiv instructions
+; CHECK: test_idiv
+; CHECK-NOT: cmov
+define i32 @test_idiv(i32 %a, i32 %b) nounwind uwtable readnone ssp {
+  %1 = icmp eq i32 %b, 0
+  br i1 %1, label %4, label %2
+
+; <label>:2                                       ; preds = %0
+  %3 = sdiv i32 %a, %b
+  br label %4
+
+; <label>:4                                       ; preds = %0, %2
+  %5 = phi i32 [ %3, %2 ], [ %a, %0 ]
+  ret i32 %5
+}
+
+; CHECK: test_div
+; CHECK-NOT: cmov
+define i32 @test_div(i32 %a, i32 %b) nounwind uwtable readnone ssp {
+  %1 = icmp eq i32 %b, 0
+  br i1 %1, label %4, label %2
+
+; <label>:2                                       ; preds = %0
+  %3 = udiv i32 %a, %b
+  br label %4
+
+; <label>:4                                       ; preds = %0, %2
+  %5 = phi i32 [ %3, %2 ], [ %a, %0 ]
+  ret i32 %5
+}
diff --git a/test/CodeGen/X86/fast-isel-args-fail.ll b/test/CodeGen/X86/fast-isel-args-fail.ll
new file mode 100644
index 000000000000..e748e1cad1fd
--- /dev/null
+++ b/test/CodeGen/X86/fast-isel-args-fail.ll
@@ -0,0 +1,23 @@
+; RUN: llc < %s -fast-isel -verify-machineinstrs -mtriple=x86_64-apple-darwin10
+; RUN: llc < %s -fast-isel -verify-machineinstrs -mtriple=x86_64-pc-win32 | FileCheck %s -check-prefix=WIN32
+; RUN: llc < %s -fast-isel -verify-machineinstrs -mtriple=x86_64-pc-win64 | FileCheck %s -check-prefix=WIN64
+; Requires: Asserts
+
+; Previously, this would cause an assert.
+define i31 @t1(i31 %a, i31 %b, i31 %c) {
+entry:
+  %add = add nsw i31 %b, %a
+  %add1 = add nsw i31 %add, %c
+  ret i31 %add1
+}
+
+; We don't handle the Windows CC, yet.
+define i32 @foo(i32* %p) {
+entry:
+; WIN32: foo
+; WIN32: movl (%rcx), %eax
+; WIN64: foo
+; WIN64: movl (%rdi), %eax
+  %0 = load i32* %p, align 4
+  ret i32 %0
+}
diff --git a/test/CodeGen/X86/fast-isel-args.ll b/test/CodeGen/X86/fast-isel-args.ll
new file mode 100644
index 000000000000..0f3626565e7d
--- /dev/null
+++ b/test/CodeGen/X86/fast-isel-args.ll
@@ -0,0 +1,25 @@
+; RUN: llc < %s -fast-isel -fast-isel-abort -fast-isel-abort-args -verify-machineinstrs -mtriple=x86_64-apple-darwin10
+
+; Just make sure these don't abort when lowering the arguments.
+define i32 @t1(i32 %a, i32 %b, i32 %c) {
+entry:
+  %add = add nsw i32 %b, %a
+  %add1 = add nsw i32 %add, %c
+  ret i32 %add1
+}
+
+define i64 @t2(i64 %a, i64 %b, i64 %c) {
+entry:
+  %add = add nsw i64 %b, %a
+  %add1 = add nsw i64 %add, %c
+  ret i64 %add1
+}
+
+define i64 @t3(i32 %a, i64 %b, i32 %c) {
+entry:
+  %conv = sext i32 %a to i64
+  %add = add nsw i64 %conv, %b
+  %conv1 = sext i32 %c to i64
+  %add2 = add nsw i64 %add, %conv1
+  ret i64 %add2
+}
diff --git a/test/CodeGen/X86/fast-isel-constant.ll b/test/CodeGen/X86/fast-isel-constant.ll
new file mode 100644
index 000000000000..6f9240ac4700
--- /dev/null
+++ b/test/CodeGen/X86/fast-isel-constant.ll
@@ -0,0 +1,24 @@
+; RUN: llc < %s -march=x86-64 -O0 | FileCheck %s
+; Make sure fast-isel doesn't reset the materialised constant map
+; across an intrinsic call.
+
+; CHECK: movl	$100000
+; CHECK-NOT: movl	$100000
+define i1 @test1(i32 %v1, i32 %v2, i32* %X) nounwind {
+entry:
+  %a = shl i32 100000, %v1
+  %t = call {i32, i1} @llvm.sadd.with.overflow.i32(i32 %a, i32 %v2)
+  %ext = extractvalue {i32, i1} %t, 0
+  %sum = shl i32 100000, %ext
+  %obit = extractvalue {i32, i1} %t, 1
+  br i1 %obit, label %overflow, label %normal
+
+normal:
+  store i32 %sum, i32* %X
+  br label %overflow
+
+overflow:
+  ret i1 false
+}
+
+declare {i32, i1} @llvm.sadd.with.overflow.i32(i32, i32)
diff --git a/test/CodeGen/X86/fast-isel-expect.ll b/test/CodeGen/X86/fast-isel-expect.ll
new file mode 100644
index 000000000000..c4be7f364f30
--- /dev/null
+++ b/test/CodeGen/X86/fast-isel-expect.ll
@@ -0,0 +1,21 @@
+; RUN: llc < %s -O0 -march=x86 | FileCheck %s
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+@glbl = extern_weak constant i8
+
+declare i64 @llvm.expect.i64(i64, i64)
+
+define void @test() {
+; CHECK: movl $glbl
+  %tmp = call i64 @llvm.expect.i64(i64 zext (i1 icmp eq (i8* @glbl, i8* null) to i64), i64 0)
+  %tmp2 = icmp ne i64 %tmp, 0
+  br i1 %tmp2, label %bb1, label %bb2
+
+bb1:
+  unreachable
+
+bb2:
+  unreachable
+}
diff --git a/test/CodeGen/X86/fast-isel-x86-64.ll b/test/CodeGen/X86/fast-isel-x86-64.ll
index cdfaf7f4c134..ad1520ef8194 100644
--- a/test/CodeGen/X86/fast-isel-x86-64.ll
+++ b/test/CodeGen/X86/fast-isel-x86-64.ll
@@ -1,5 +1,5 @@
-; RUN: llc < %s -mattr=-avx -fast-isel -O0 -regalloc=fast -asm-verbose=0 -fast-isel-abort | FileCheck %s
-; RUN: llc < %s -mattr=+avx -fast-isel -O0 -regalloc=fast -asm-verbose=0 -fast-isel-abort | FileCheck %s --check-prefix=AVX
+; RUN: llc < %s -mattr=-avx -fast-isel -mcpu=core2 -O0 -regalloc=fast -asm-verbose=0 -fast-isel-abort | FileCheck %s
+; RUN: llc < %s -mattr=+avx -fast-isel -mcpu=core2 -O0 -regalloc=fast -asm-verbose=0 -fast-isel-abort | FileCheck %s --check-prefix=AVX
 
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
 target triple = "x86_64-apple-darwin10.0.0"
diff --git a/test/CodeGen/X86/float-asmprint.ll b/test/CodeGen/X86/float-asmprint.ll
new file mode 100644
index 000000000000..4aeae7fe0469
--- /dev/null
+++ b/test/CodeGen/X86/float-asmprint.ll
@@ -0,0 +1,40 @@
+; RUN: llc -mtriple=x86_64-none-linux < %s | FileCheck %s
+
+; Check that all current floating-point types are correctly emitted to assembly
+; on a little-endian target.
+
+@var128 = global fp128 0xL00000000000000008000000000000000, align 16
+@varppc128 = global ppc_fp128 0xM80000000000000000000000000000000, align 16
+@var80 = global x86_fp80 0xK80000000000000000000, align 16
+@var64 = global double -0.0, align 8
+@var32 = global float -0.0, align 4
+@var16 = global half -0.0, align 2
+
+; CHECK: var128:
+; CHECK-NEXT: .quad 0                         # fp128 -0
+; CHECK-NEXT: .quad -9223372036854775808
+; CHECK-NEXT: .size
+
+; CHECK: varppc128:
+; CHECK-NEXT: .quad 0                         # ppc_fp128 -0
+; CHECK-NEXT: .quad -9223372036854775808
+; CHECK-NEXT: .size
+
+; CHECK: var80:
+; CHECK-NEXT: .quad 0                         # x86_fp80 -0
+; CHECK-NEXT: .short 32768
+; CHECK-NEXT: .zero 6
+; CHECK-NEXT: .size
+
+; CHECK: var64:
+; CHECK-NEXT: .quad -9223372036854775808      # double -0
+; CHECK-NEXT: .size
+
+; CHECK: var32:
+; CHECK-NEXT: .long 2147483648                # float -0
+; CHECK-NEXT: .size
+
+; CHECK: var16:
+; CHECK-NEXT: .short 32768                    # half -0
+; CHECK-NEXT: .size
+
diff --git a/test/CodeGen/X86/fma4-intrinsics-x86_64.ll b/test/CodeGen/X86/fma4-intrinsics-x86_64.ll
index 2fe1ecd40e0c..7a1a9ae46147 100644
--- a/test/CodeGen/X86/fma4-intrinsics-x86_64.ll
+++ b/test/CodeGen/X86/fma4-intrinsics-x86_64.ll
@@ -63,6 +63,16 @@ define < 4 x float > @test_x86_fma_vfmadd_ps_load2(< 4 x float > %a0, < 4 x floa
 }
 declare < 4 x float > @llvm.x86.fma.vfmadd.ps(< 4 x float >, < 4 x float >, < 4 x float >) nounwind readnone
 
+; To test execution dependency
+define < 4 x float > @test_x86_fma_vfmadd_ps_load3(< 4 x float >* %a0, < 4 x float >* %a1, < 4 x float > %a2) {
+  ; CHECK: vmovaps
+  ; CHECK: vfmaddps %{{.*}}, (%{{.*}})
+  %x = load <4 x float>* %a0
+  %y = load <4 x float>* %a1
+  %res = call < 4 x float > @llvm.x86.fma.vfmadd.ps(< 4 x float > %x, < 4 x float > %y, < 4 x float > %a2) ; <i64> [#uses=1]
+  ret < 4 x float > %res
+}
+
 define < 2 x double > @test_x86_fma_vfmadd_pd(< 2 x double > %a0, < 2 x double > %a1, < 2 x double > %a2) {
   ; CHECK: vfmaddpd
   %res = call < 2 x double > @llvm.x86.fma.vfmadd.pd(< 2 x double > %a0, < 2 x double > %a1, < 2 x double > %a2) ; <i64> [#uses=1]
@@ -82,6 +92,16 @@ define < 2 x double > @test_x86_fma_vfmadd_pd_load2(< 2 x double > %a0, < 2 x do
 }
 declare < 2 x double > @llvm.x86.fma.vfmadd.pd(< 2 x double >, < 2 x double >, < 2 x double >) nounwind readnone
 
+; To test execution dependency
+define < 2 x double > @test_x86_fma_vfmadd_pd_load3(< 2 x double >* %a0, < 2 x double >* %a1, < 2 x double > %a2) {
+  ; CHECK: vmovapd
+  ; CHECK: vfmaddpd %{{.*}}, (%{{.*}})
+  %x = load <2 x double>* %a0
+  %y = load <2 x double>* %a1
+  %res = call < 2 x double > @llvm.x86.fma.vfmadd.pd(< 2 x double > %x, < 2 x double > %y, < 2 x double > %a2) ; <i64> [#uses=1]
+  ret < 2 x double > %res
+}
+
 define < 8 x float > @test_x86_fma_vfmadd_ps_256(< 8 x float > %a0, < 8 x float > %a1, < 8 x float > %a2) {
   ; CHECK: vfmaddps
   ; CHECK: ymm
diff --git a/test/CodeGen/X86/fold-call.ll b/test/CodeGen/X86/fold-call.ll
index 603e9ad66caa..35327faa6486 100644
--- a/test/CodeGen/X86/fold-call.ll
+++ b/test/CodeGen/X86/fold-call.ll
@@ -1,10 +1,27 @@
-; RUN: llc < %s -march=x86 | not grep mov
-; RUN: llc < %s -march=x86-64 | not grep mov
+; RUN: llc < %s -march=x86 | FileCheck %s
+; RUN: llc < %s -march=x86-64 | FileCheck %s
 
-declare void @bar()
+; CHECK: test1
+; CHECK-NOT: mov
 
-define void @foo(i32 %i0, i32 %i1, i32 %i2, i32 %i3, i32 %i4, i32 %i5, void()* %arg) nounwind {
+declare void @bar()
+define void @test1(i32 %i0, i32 %i1, i32 %i2, i32 %i3, i32 %i4, i32 %i5, void()* %arg) nounwind {
 	call void @bar()
 	call void %arg()
 	ret void
 }
+
+; PR14739
+; CHECK: test2
+; CHECK: mov{{.*}} $0, ([[REGISTER:%[a-z]+]])
+; CHECK-NOT: jmp{{.*}} *([[REGISTER]])
+
+%struct.X = type { void ()* }
+define void @test2(%struct.X* nocapture %x) {
+entry:
+  %f = getelementptr inbounds %struct.X* %x, i64 0, i32 0
+  %0 = load void ()** %f
+  store void ()* null, void ()** %f
+  tail call void %0()
+  ret void
+}
diff --git a/test/CodeGen/X86/fold-load-vec.ll b/test/CodeGen/X86/fold-load-vec.ll
new file mode 100644
index 000000000000..c1756d5e2e1a
--- /dev/null
+++ b/test/CodeGen/X86/fold-load-vec.ll
@@ -0,0 +1,39 @@
+; RUN: llc < %s -march=x86-64 -mcpu=corei7 -mattr=+sse41 | FileCheck %s
+
+; rdar://12721174
+; We should not fold movss into pshufd since pshufd expects m128 while movss
+; loads from m32.
+define void @sample_test(<4 x float>* %source, <2 x float>* %dest) nounwind {
+; CHECK: sample_test
+; CHECK: movss
+; CHECK: pshufd
+entry:
+  %source.addr = alloca <4 x float>*, align 8
+  %dest.addr = alloca <2 x float>*, align 8
+  %tmp = alloca <2 x float>, align 8
+  store <4 x float>* %source, <4 x float>** %source.addr, align 8
+  store <2 x float>* %dest, <2 x float>** %dest.addr, align 8
+  store <2 x float> zeroinitializer, <2 x float>* %tmp, align 8
+  %0 = load <4 x float>** %source.addr, align 8
+  %arrayidx = getelementptr inbounds <4 x float>* %0, i64 0
+  %1 = load <4 x float>* %arrayidx, align 16
+  %2 = extractelement <4 x float> %1, i32 0
+  %3 = load <2 x float>* %tmp, align 8
+  %4 = insertelement <2 x float> %3, float %2, i32 1
+  store <2 x float> %4, <2 x float>* %tmp, align 8
+  %5 = load <2 x float>* %tmp, align 8
+  %6 = load <2 x float>** %dest.addr, align 8
+  %arrayidx1 = getelementptr inbounds <2 x float>* %6, i64 0
+  store <2 x float> %5, <2 x float>* %arrayidx1, align 8
+  %7 = load <2 x float>** %dest.addr, align 8
+  %arrayidx2 = getelementptr inbounds <2 x float>* %7, i64 0
+  %8 = load <2 x float>* %arrayidx2, align 8
+  %vecext = extractelement <2 x float> %8, i32 0
+  %9 = load <2 x float>** %dest.addr, align 8
+  %arrayidx3 = getelementptr inbounds <2 x float>* %9, i64 0
+  %10 = load <2 x float>* %arrayidx3, align 8
+  %vecext4 = extractelement <2 x float> %10, i32 1
+  call void @ext(float %vecext, float %vecext4)
+  ret void
+}
+declare void @ext(float, float)
diff --git a/test/CodeGen/X86/fold-pcmpeqd-2.ll b/test/CodeGen/X86/fold-pcmpeqd-2.ll
index 9cf4607cf5b2..2bde76efd2ae 100644
--- a/test/CodeGen/X86/fold-pcmpeqd-2.ll
+++ b/test/CodeGen/X86/fold-pcmpeqd-2.ll
@@ -43,21 +43,21 @@ forbody:		; preds = %forcond
 	%mul171.i = fmul <4 x float> %add167.i, %sub140.i		; <<4 x float>> [#uses=1]
 	%add172.i = fadd <4 x float> %mul171.i, < float 0x3FF0000A40000000, float 0x3FF0000A40000000, float 0x3FF0000A40000000, float 0x3FF0000A40000000 >		; <<4 x float>> [#uses=1]
 	%bitcast176.i = bitcast <4 x float> %add172.i to <4 x i32>		; <<4 x i32>> [#uses=1]
-	%andnps178.i = and <4 x i32> %bitcast176.i, zeroinitializer		; <<4 x i32>> [#uses=1]
+	%andnps178.i = add <4 x i32> %bitcast176.i, <i32 1, i32 1, i32 1, i32 1>		; <<4 x i32>> [#uses=1]
 	%bitcast179.i = bitcast <4 x i32> %andnps178.i to <4 x float>		; <<4 x float>> [#uses=1]
 	%mul186.i = fmul <4 x float> %bitcast179.i, zeroinitializer		; <<4 x float>> [#uses=1]
 	%bitcast190.i = bitcast <4 x float> %mul186.i to <4 x i32>		; <<4 x i32>> [#uses=1]
-	%andnps192.i = and <4 x i32> %bitcast190.i, zeroinitializer		; <<4 x i32>> [#uses=1]
+	%andnps192.i = add <4 x i32> %bitcast190.i, <i32 1, i32 1, i32 1, i32 1>		; <<4 x i32>> [#uses=1]
 	%xorps.i = xor <4 x i32> zeroinitializer, < i32 -1, i32 -1, i32 -1, i32 -1 >		; <<4 x i32>> [#uses=1]
-	%orps203.i = or <4 x i32> %andnps192.i, %xorps.i		; <<4 x i32>> [#uses=1]
+	%orps203.i = add <4 x i32> %andnps192.i, %xorps.i		; <<4 x i32>> [#uses=1]
 	%bitcast204.i = bitcast <4 x i32> %orps203.i to <4 x float>		; <<4 x float>> [#uses=1]
 	%mul310 = fmul <4 x float> %bitcast204.i104, zeroinitializer		; <<4 x float>> [#uses=2]
 	%mul313 = fmul <4 x float> %bitcast204.i, zeroinitializer		; <<4 x float>> [#uses=1]
 	%cmpunord.i11 = call <4 x float> @llvm.x86.sse.cmp.ps(<4 x float> zeroinitializer, <4 x float> zeroinitializer, i8 3) nounwind		; <<4 x float>> [#uses=1]
 	%bitcast6.i13 = bitcast <4 x float> %cmpunord.i11 to <4 x i32>		; <<4 x i32>> [#uses=2]
-	%andps.i14 = and <4 x i32> zeroinitializer, %bitcast6.i13		; <<4 x i32>> [#uses=1]
+	%andps.i14 = add <4 x i32> <i32 1, i32 1, i32 1, i32 1>, %bitcast6.i13		; <<4 x i32>> [#uses=1]
 	%not.i16 = xor <4 x i32> %bitcast6.i13, < i32 -1, i32 -1, i32 -1, i32 -1 >		; <<4 x i32>> [#uses=1]
-	%andnps.i17 = and <4 x i32> zeroinitializer, %not.i16		; <<4 x i32>> [#uses=1]
+	%andnps.i17 = add <4 x i32> <i32 1, i32 1, i32 1, i32 1>, %not.i16		; <<4 x i32>> [#uses=1]
 	%orps.i18 = or <4 x i32> %andnps.i17, %andps.i14		; <<4 x i32>> [#uses=1]
 	%bitcast17.i19 = bitcast <4 x i32> %orps.i18 to <4 x float>		; <<4 x float>> [#uses=1]
 	%tmp83 = call <4 x float> @llvm.x86.sse.min.ps(<4 x float> %mul310, <4 x float> zeroinitializer) nounwind		; <<4 x float>> [#uses=1]
diff --git a/test/CodeGen/X86/fold-vex.ll b/test/CodeGen/X86/fold-vex.ll
new file mode 100644
index 000000000000..2bb5b441c7c0
--- /dev/null
+++ b/test/CodeGen/X86/fold-vex.ll
@@ -0,0 +1,16 @@
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=corei7-avx | FileCheck %s
+
+;CHECK: @test
+; No need to load from memory. The operand will be loaded as part of th AND instr.
+;CHECK-NOT: vmovaps
+;CHECK: vandps
+;CHECK: ret
+
+define void @test1(<8 x i32>* %p0, <8 x i32> %in1) nounwind {
+entry:
+  %in0 = load <8 x i32>* %p0, align 2
+  %a = and <8 x i32> %in0, %in1
+  store <8 x i32> %a, <8 x i32>* undef
+  ret void
+}
+
diff --git a/test/CodeGen/X86/fp-fast.ll b/test/CodeGen/X86/fp-fast.ll
index d70aa7d79f00..287504801d04 100644
--- a/test/CodeGen/X86/fp-fast.ll
+++ b/test/CodeGen/X86/fp-fast.ll
@@ -38,7 +38,7 @@ define float @test3(float %a) {
 ; CHECK: test4
 define float @test4(float %a) {
 ; CHECK-NOT: fma
-; CHECK-NOT mul
+; CHECK-NOT: mul
 ; CHECK-NOT: add
 ; CHECK: ret
   %t1 = fmul float %a, 0.0
diff --git a/test/CodeGen/X86/fp-load-trunc.ll b/test/CodeGen/X86/fp-load-trunc.ll
index 2ae65c97d97a..a973befdafe7 100644
--- a/test/CodeGen/X86/fp-load-trunc.ll
+++ b/test/CodeGen/X86/fp-load-trunc.ll
@@ -49,8 +49,8 @@ define <8 x float> @test4(<8 x double>* %p) nounwind {
 ; CHECK: movlhps
 ; CHECK: ret
 ; AVX:   test4
-; AVX:   vcvtpd2psy {{[0-9]*}}(%{{.*}})
-; AVX:   vcvtpd2psy {{[0-9]*}}(%{{.*}})
+; AVX:   vcvtpd2psy
+; AVX:   vcvtpd2psy
 ; AVX:   vinsertf128
 ; AVX:   ret
   %x = load <8 x double>* %p
diff --git a/test/CodeGen/X86/handle-move.ll b/test/CodeGen/X86/handle-move.ll
index e9f7a962e20d..ba96275569b3 100644
--- a/test/CodeGen/X86/handle-move.ll
+++ b/test/CodeGen/X86/handle-move.ll
@@ -16,7 +16,7 @@
 ;       DL:     [0B,16r:0)[128r,144r:2)[144r,144d:1)  0@0B-phi 1@144r 2@128r
 ;         -->   [0B,16r:0)[128r,180r:2)[180r,180d:1)  0@0B-phi 1@180r 2@128r
 ;
-define i32 @f1(i32 %a, i32 %b, i32 %c, i32 %d) nounwind uwtable readnone ssp {
+define i32 @f1(i32 %a, i32 %b, i32 %c) nounwind uwtable readnone ssp {
 entry:
   %y = add i32 %c, 1
   %x = udiv i32 %b, %a
@@ -50,7 +50,7 @@ entry:
 ;       %vreg5:         [16r,112r:0)  0@16r
 ;            -->        [16r,120r:0)  0@16r
 ;
-define i32 @f3(i32 %a, i32 %b, i32 %c, i32 %d) nounwind uwtable readnone ssp {
+define i32 @f3(i32 %a, i32 %b) nounwind uwtable readnone ssp {
 entry:
   %y = sub i32 %a, %b
   %x = add i32 %a, %b
diff --git a/test/CodeGen/X86/hipe-cc.ll b/test/CodeGen/X86/hipe-cc.ll
new file mode 100644
index 000000000000..76d17a09d54e
--- /dev/null
+++ b/test/CodeGen/X86/hipe-cc.ll
@@ -0,0 +1,77 @@
+; RUN: llc < %s -tailcallopt -code-model=medium -stack-alignment=4 -mtriple=i686-linux-gnu -mcpu=pentium | FileCheck %s
+
+; Check the HiPE calling convention works (x86-32)
+
+define void @zap(i32 %a, i32 %b) nounwind {
+entry:
+  ; CHECK:      movl 40(%esp), %eax
+  ; CHECK-NEXT: movl 44(%esp), %edx
+  ; CHECK-NEXT: movl       $8, %ecx
+  ; CHECK-NEXT: calll addfour
+  %0 = call cc 11 {i32, i32, i32} @addfour(i32 undef, i32 undef, i32 %a, i32 %b, i32 8)
+  %res = extractvalue {i32, i32, i32} %0, 2
+
+  ; CHECK:      movl %eax, 16(%esp)
+  ; CHECK-NEXT: movl   $2, 12(%esp)
+  ; CHECK-NEXT: movl   $1,  8(%esp)
+  ; CHECK:      calll foo
+  tail call void @foo(i32 undef, i32 undef, i32 1, i32 2, i32 %res) nounwind
+  ret void
+}
+
+define cc 11 {i32, i32, i32} @addfour(i32 %hp, i32 %p, i32 %x, i32 %y, i32 %z) nounwind {
+entry:
+  ; CHECK:      addl %edx, %eax
+  ; CHECK-NEXT: addl %ecx, %eax
+  %0 = add i32 %x, %y
+  %1 = add i32 %0, %z
+
+  ; CHECK:      ret
+  %res = insertvalue {i32, i32, i32} undef, i32 %1, 2
+  ret {i32, i32, i32} %res
+}
+
+define cc 11 void @foo(i32 %hp, i32 %p, i32 %arg0, i32 %arg1, i32 %arg2) nounwind {
+entry:
+  ; CHECK:      movl  %esi, 16(%esp)
+  ; CHECK-NEXT: movl  %ebp, 12(%esp)
+  ; CHECK-NEXT: movl  %eax,  8(%esp)
+  ; CHECK-NEXT: movl  %edx,  4(%esp)
+  ; CHECK-NEXT: movl  %ecx,   (%esp)
+  %hp_var   = alloca i32
+  %p_var    = alloca i32
+  %arg0_var = alloca i32
+  %arg1_var = alloca i32
+  %arg2_var = alloca i32
+  store i32 %hp, i32* %hp_var
+  store i32 %p, i32* %p_var
+  store i32 %arg0, i32* %arg0_var
+  store i32 %arg1, i32* %arg1_var
+  store i32 %arg2, i32* %arg2_var
+
+  ; CHECK:      movl   4(%esp), %edx
+  ; CHECK-NEXT: movl   8(%esp), %eax
+  ; CHECK-NEXT: movl  12(%esp), %ebp
+  ; CHECK-NEXT: movl  16(%esp), %esi
+  %0 = load i32* %hp_var
+  %1 = load i32* %p_var
+  %2 = load i32* %arg0_var
+  %3 = load i32* %arg1_var
+  %4 = load i32* %arg2_var
+  ; CHECK:      jmp bar
+  tail call cc 11 void @bar(i32 %0, i32 %1, i32 %2, i32 %3, i32 %4) nounwind
+  ret void
+}
+
+define cc 11 void @baz() nounwind {
+  %tmp_clos = load i32* @clos
+  %tmp_clos2 = inttoptr i32 %tmp_clos to i32*
+  %indirect_call = bitcast i32* %tmp_clos2 to void (i32, i32, i32)*
+  ; CHECK:      movl $42, %eax
+  ; CHECK-NEXT: jmpl *clos
+  tail call cc 11 void %indirect_call(i32 undef, i32 undef, i32 42) nounwind
+  ret void
+}
+
+@clos = external constant i32
+declare cc 11 void @bar(i32, i32, i32, i32, i32)
diff --git a/test/CodeGen/X86/hipe-cc64.ll b/test/CodeGen/X86/hipe-cc64.ll
new file mode 100644
index 000000000000..5dbb5a25cbeb
--- /dev/null
+++ b/test/CodeGen/X86/hipe-cc64.ll
@@ -0,0 +1,87 @@
+; RUN: llc < %s -tailcallopt -code-model=medium -stack-alignment=8 -mtriple=x86_64-linux-gnu -mcpu=opteron | FileCheck %s
+
+; Check the HiPE calling convention works (x86-64)
+
+define void @zap(i64 %a, i64 %b) nounwind {
+entry:
+  ; CHECK:      movq %rsi, %rax
+  ; CHECK-NEXT: movq %rdi, %rsi
+  ; CHECK-NEXT: movq %rax, %rdx
+  ; CHECK-NEXT: movl $8, %ecx
+  ; CHECK-NEXT: movl $9, %r8d
+  ; CHECK-NEXT: callq addfour
+  %0 = call cc 11 {i64, i64, i64} @addfour(i64 undef, i64 undef, i64 %a, i64 %b, i64 8, i64 9)
+  %res = extractvalue {i64, i64, i64} %0, 2
+
+  ; CHECK:      movl $1, %edx
+  ; CHECK-NEXT: movl $2, %ecx
+  ; CHECK-NEXT: movl $3, %r8d
+  ; CHECK-NEXT: movq %rax, %r9
+  ; CHECK:      callq foo
+  tail call void @foo(i64 undef, i64 undef, i64 1, i64 2, i64 3, i64 %res) nounwind
+  ret void
+}
+
+define cc 11 {i64, i64, i64} @addfour(i64 %hp, i64 %p, i64 %x, i64 %y, i64 %z, i64 %w) nounwind {
+entry:
+  ; CHECK:      leaq (%rsi,%rdx), %rax
+  ; CHECK-NEXT: addq %rcx, %rax
+  ; CHECK-NEXT: addq %r8, %rax
+  %0 = add i64 %x, %y
+  %1 = add i64 %0, %z
+  %2 = add i64 %1, %w
+
+  ; CHECK:      ret
+  %res = insertvalue {i64, i64, i64} undef, i64 %2, 2
+  ret {i64, i64, i64} %res
+}
+
+define cc 11 void @foo(i64 %hp, i64 %p, i64 %arg0, i64 %arg1, i64 %arg2, i64 %arg3) nounwind {
+entry:
+  ; CHECK:      movq  %r15, 40(%rsp)
+  ; CHECK-NEXT: movq  %rbp, 32(%rsp)
+  ; CHECK-NEXT: movq  %rsi, 24(%rsp)
+  ; CHECK-NEXT: movq  %rdx, 16(%rsp)
+  ; CHECK-NEXT: movq  %rcx, 8(%rsp)
+  ; CHECK-NEXT: movq  %r8, (%rsp)
+  %hp_var   = alloca i64
+  %p_var    = alloca i64
+  %arg0_var = alloca i64
+  %arg1_var = alloca i64
+  %arg2_var = alloca i64
+  %arg3_var = alloca i64
+  store i64 %hp, i64* %hp_var
+  store i64 %p, i64* %p_var
+  store i64 %arg0, i64* %arg0_var
+  store i64 %arg1, i64* %arg1_var
+  store i64 %arg2, i64* %arg2_var
+  store i64 %arg3, i64* %arg3_var
+
+  ; CHECK:      movq  8(%rsp), %rcx
+  ; CHECK-NEXT: movq  16(%rsp), %rdx
+  ; CHECK-NEXT: movq  24(%rsp), %rsi
+  ; CHECK-NEXT: movq  32(%rsp), %rbp
+  ; CHECK-NEXT: movq  40(%rsp), %r15
+  %0 = load i64* %hp_var
+  %1 = load i64* %p_var
+  %2 = load i64* %arg0_var
+  %3 = load i64* %arg1_var
+  %4 = load i64* %arg2_var
+  %5 = load i64* %arg3_var
+  ; CHECK:      jmp bar
+  tail call cc 11 void @bar(i64 %0, i64 %1, i64 %2, i64 %3, i64 %4, i64 %5) nounwind
+  ret void
+}
+
+define cc 11 void @baz() nounwind {
+  %tmp_clos = load i64* @clos
+  %tmp_clos2 = inttoptr i64 %tmp_clos to i64*
+  %indirect_call = bitcast i64* %tmp_clos2 to void (i64, i64, i64)*
+  ; CHECK:      movl $42, %esi
+  ; CHECK-NEXT: jmpq *(%rax)
+  tail call cc 11 void %indirect_call(i64 undef, i64 undef, i64 42) nounwind
+  ret void
+}
+
+@clos = external constant i64
+declare cc 11 void @bar(i64, i64, i64, i64, i64, i64)
diff --git a/test/CodeGen/X86/hipe-prologue.ll b/test/CodeGen/X86/hipe-prologue.ll
new file mode 100644
index 000000000000..ff3c5c803c90
--- /dev/null
+++ b/test/CodeGen/X86/hipe-prologue.ll
@@ -0,0 +1,67 @@
+; RUN: llc < %s -mcpu=generic -mtriple=i686-linux -verify-machineinstrs | FileCheck %s -check-prefix=X32-Linux
+; RUN: llc < %s -mtriple=x86_64-linux-gnu -verify-machineinstrs | FileCheck %s -check-prefix=X64-Linux
+
+; The HiPE compiler (i.e., the native code compiler of the Erlang/OTP system)
+; adds a custom assembly prologue in order to efficiently manipulate the stack
+; at runtime.
+
+; Just to prevent the alloca from being optimized away.
+declare void @dummy_use(i32*, i32)
+
+define {i32, i32} @test_basic(i32 %hp, i32 %p) {
+  ; X32-Linux:       test_basic:
+  ; X32-Linux-NOT:   calll inc_stack_0
+
+  ; X64-Linux:       test_basic:
+  ; X64-Linux-NOT:   callq inc_stack_0
+
+  %mem = alloca i32, i32 10
+  call void @dummy_use (i32* %mem, i32 10)
+  %1 = insertvalue {i32, i32} undef, i32 %hp, 0
+  %2 = insertvalue {i32, i32} %1, i32 %p, 1
+  ret {i32, i32} %1
+}
+
+define cc 11 {i32, i32} @test_basic_hipecc(i32 %hp, i32 %p) {
+  ; X32-Linux:       test_basic_hipecc:
+  ; X32-Linux:       leal -156(%esp), %ebx
+  ; X32-Linux-NEXT:  cmpl 76(%ebp), %ebx
+  ; X32-Linux-NEXT:  jb .LBB1_1
+
+  ; X32-Linux:       ret
+
+  ; X32-Linux:       .LBB1_1:
+  ; X32-Linux-NEXT:  calll inc_stack_0
+
+  ; X64-Linux:       test_basic_hipecc:
+  ; X64-Linux:       leaq -232(%rsp), %r14
+  ; X64-Linux-NEXT:  cmpq 144(%rbp), %r14
+  ; X64-Linux-NEXT:  jb .LBB1_1
+
+  ; X64-Linux:       ret
+
+  ; X64-Linux:       .LBB1_1:
+  ; X64-Linux-NEXT:  callq inc_stack_0
+
+  %mem = alloca i32, i32 10
+  call void @dummy_use (i32* %mem, i32 10)
+  %1 = insertvalue {i32, i32} undef, i32 %hp, 0
+  %2 = insertvalue {i32, i32} %1, i32 %p, 1
+  ret {i32, i32} %2
+}
+
+define cc 11 {i32,i32,i32} @test_nocall_hipecc(i32 %hp,i32 %p,i32 %x,i32 %y) {
+  ; X32-Linux:       test_nocall_hipecc:
+  ; X32-Linux-NOT:   calll inc_stack_0
+
+  ; X64-Linux:       test_nocall_hipecc:
+  ; X64-Linux-NOT:   callq inc_stack_0
+
+  %1 = add i32 %x, %y
+  %2 = mul i32 42, %1
+  %3 = sub i32 24, %2
+  %4 = insertvalue {i32, i32, i32} undef, i32 %hp, 0
+  %5 = insertvalue {i32, i32, i32} %4, i32 %p, 1
+  %6 = insertvalue {i32, i32, i32} %5, i32 %p, 2
+  ret {i32, i32, i32} %6
+}
diff --git a/test/CodeGen/X86/hoist-invariant-load.ll b/test/CodeGen/X86/hoist-invariant-load.ll
index 74ecd045b3d5..34191e3f9a31 100644
--- a/test/CodeGen/X86/hoist-invariant-load.ll
+++ b/test/CodeGen/X86/hoist-invariant-load.ll
@@ -1,3 +1,4 @@
+; REQUIRES: asserts
 ; RUN: llc < %s -stats -O2 2>&1 | grep "1 machine-licm"
 
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
diff --git a/test/CodeGen/X86/imul-lea-2.ll b/test/CodeGen/X86/imul-lea-2.ll
index 1cb54b37b0e1..7b79d0678bee 100644
--- a/test/CodeGen/X86/imul-lea-2.ll
+++ b/test/CodeGen/X86/imul-lea-2.ll
@@ -1,15 +1,19 @@
-; RUN: llc < %s -march=x86-64 | grep lea | count 3
-; RUN: llc < %s -march=x86-64 | grep shl | count 1
-; RUN: llc < %s -march=x86-64 | not grep imul
+; RUN: llc < %s -march=x86-64 | FileCheck %s
+
+; CHECK-NOT: imul
 
 define i64 @t1(i64 %a) nounwind readnone {
 entry:
-	%0 = mul i64 %a, 81		; <i64> [#uses=1]
-	ret i64 %0
+  %0 = mul i64 %a, 81
+; CHECK: lea
+; CHECK: lea
+  ret i64 %0
 }
 
 define i64 @t2(i64 %a) nounwind readnone {
 entry:
-	%0 = mul i64 %a, 40		; <i64> [#uses=1]
-	ret i64 %0
+  %0 = mul i64 %a, 40
+; CHECK: shl
+; CHECK: lea
+  ret i64 %0
 }
diff --git a/test/CodeGen/X86/imul-lea.ll b/test/CodeGen/X86/imul-lea.ll
index 4e8e2af0f2fe..d55ece7996ed 100644
--- a/test/CodeGen/X86/imul-lea.ll
+++ b/test/CodeGen/X86/imul-lea.ll
@@ -1,10 +1,12 @@
-; RUN: llc < %s -march=x86 | grep lea
+; RUN: llc < %s -march=x86 | FileCheck %s
 
 declare i32 @foo()
 
 define i32 @test() {
-        %tmp.0 = tail call i32 @foo( )          ; <i32> [#uses=1]
-        %tmp.1 = mul i32 %tmp.0, 9              ; <i32> [#uses=1]
-        ret i32 %tmp.1
+  %tmp.0 = tail call i32 @foo( )
+  %tmp.1 = mul i32 %tmp.0, 9
+; CHECK-NOT: mul
+; CHECK: lea
+  ret i32 %tmp.1
 }
 
diff --git a/test/CodeGen/X86/imul64-lea.ll b/test/CodeGen/X86/imul64-lea.ll
new file mode 100644
index 000000000000..047c129ddb33
--- /dev/null
+++ b/test/CodeGen/X86/imul64-lea.ll
@@ -0,0 +1,25 @@
+; RUN: llc < %s -mtriple=x86_64-pc-linux-gnu | FileCheck %s
+; RUN: llc < %s -mtriple=x86_64-pc-linux-gnux32 | FileCheck %s
+
+; Test that 64-bit LEAs are generated for both LP64 and ILP32 in 64-bit mode.
+declare i64 @foo64()
+
+define i64 @test64() {
+  %tmp.0 = tail call i64 @foo64( )
+  %tmp.1 = mul i64 %tmp.0, 9
+; CHECK-NOT: mul
+; CHECK: leaq
+  ret i64 %tmp.1
+}
+
+; Test that 32-bit LEAs are generated for both LP64 and ILP32 in 64-bit mode.
+declare i32 @foo32()
+
+define i32 @test32() {
+  %tmp.0 = tail call i32 @foo32( )
+  %tmp.1 = mul i32 %tmp.0, 9
+; CHECK-NOT: mul
+; CHECK: leal
+  ret i32 %tmp.1
+}
+
diff --git a/test/CodeGen/X86/insertelement-copytoregs.ll b/test/CodeGen/X86/insertelement-copytoregs.ll
index 34a29ca7d939..88ff4dafad7d 100644
--- a/test/CodeGen/X86/insertelement-copytoregs.ll
+++ b/test/CodeGen/X86/insertelement-copytoregs.ll
@@ -1,4 +1,5 @@
-; RUN: llc < %s -march=x86-64 | grep -v IMPLICIT_DEF
+; RUN: llc < %s -march=x86-64 | FileCheck %s
+; CHECK-NOT: IMPLICIT_DEF
 
 define void @foo(<2 x float>* %p) {
   %t = insertelement <2 x float> undef, float 0.0, i32 0
diff --git a/test/CodeGen/X86/lea-2.ll b/test/CodeGen/X86/lea-2.ll
index 43f69b0c6e93..21128096e6e7 100644
--- a/test/CodeGen/X86/lea-2.ll
+++ b/test/CodeGen/X86/lea-2.ll
@@ -1,13 +1,15 @@
-; RUN: llc < %s -march=x86 -x86-asm-syntax=intel | \
-; RUN:   grep "lea	EAX, DWORD PTR \[... + 4\*... - 5\]"
-; RUN: llc < %s -march=x86 -x86-asm-syntax=intel | \
-; RUN:   not grep add
+; RUN: llc < %s -march=x86 -x86-asm-syntax=intel | FileCheck %s
 
 define i32 @test1(i32 %A, i32 %B) {
-        %tmp1 = shl i32 %A, 2           ; <i32> [#uses=1]
-        %tmp3 = add i32 %B, -5          ; <i32> [#uses=1]
-        %tmp4 = add i32 %tmp3, %tmp1            ; <i32> [#uses=1]
-        ret i32 %tmp4
+  %tmp1 = shl i32 %A, 2
+  %tmp3 = add i32 %B, -5
+  %tmp4 = add i32 %tmp3, %tmp1
+; The above computation of %tmp4 should match a single lea, without using
+; actual add instructions.
+; CHECK-NOT: add
+; CHECK: lea {{[A-Z]+}}, DWORD PTR [{{[A-Z]+}} + 4*{{[A-Z]+}} - 5]
+
+  ret i32 %tmp4
 }
 
 
diff --git a/test/CodeGen/X86/lea-4.ll b/test/CodeGen/X86/lea-4.ll
index 2171204c01d1..cef47264a583 100644
--- a/test/CodeGen/X86/lea-4.ll
+++ b/test/CodeGen/X86/lea-4.ll
@@ -1,19 +1,21 @@
-; RUN: llc < %s -march=x86-64 | grep lea | count 2
+; RUN: llc < %s -march=x86-64 | FileCheck %s
 
 define zeroext i16 @t1(i32 %on_off) nounwind {
 entry:
-	%0 = sub i32 %on_off, 1
-	%1 = mul i32 %0, 2
-	%2 = trunc i32 %1 to i16
-	%3 = zext i16 %2 to i32
-	%4 = trunc i32 %3 to i16
-	ret i16 %4
+  %0 = sub i32 %on_off, 1
+  %1 = mul i32 %0, 2
+  %2 = trunc i32 %1 to i16
+  %3 = zext i16 %2 to i32
+  %4 = trunc i32 %3 to i16
+; CHECK: lea
+  ret i16 %4
 }
 
 define i32 @t2(i32 %on_off) nounwind {
 entry:
-	%0 = sub i32 %on_off, 1
-	%1 = mul i32 %0, 2
-        %2 = and i32 %1, 65535
-	ret i32 %2
+  %0 = sub i32 %on_off, 1
+  %1 = mul i32 %0, 2
+  %2 = and i32 %1, 65535
+; CHECK: lea
+  ret i32 %2
 }
diff --git a/test/CodeGen/X86/legalize-shift-64.ll b/test/CodeGen/X86/legalize-shift-64.ll
index c9f2fc27dbff..71ef2d3152f8 100644
--- a/test/CodeGen/X86/legalize-shift-64.ll
+++ b/test/CodeGen/X86/legalize-shift-64.ll
@@ -54,3 +54,14 @@ define i64 @test4(i64 %xx, i32 %test) nounwind {
 ; CHECK: orl	%esi, %eax
 ; CHECK: sarl	%cl, %edx
 }
+
+; PR14668
+define <2 x i64> @test5(<2 x i64> %A, <2 x i64> %B) {
+  %shl = shl <2 x i64> %A, %B
+  ret <2 x i64> %shl
+; CHECK: test5
+; CHECK: shl
+; CHECK: shldl
+; CHECK: shl
+; CHECK: shldl
+}
diff --git a/test/CodeGen/X86/licm-nested.ll b/test/CodeGen/X86/licm-nested.ll
index c3f991d7a9b0..66074fb3682c 100644
--- a/test/CodeGen/X86/licm-nested.ll
+++ b/test/CodeGen/X86/licm-nested.ll
@@ -1,3 +1,4 @@
+; REQUIRES: asserts
 ; RUN: llc -mtriple=x86_64-apple-darwin -march=x86-64 < %s -o /dev/null -stats -info-output-file - | grep "hoisted out of loops" | grep 3
 
 ; MachineLICM should be able to hoist the symbolic addresses out of
diff --git a/test/CodeGen/X86/lit.local.cfg b/test/CodeGen/X86/lit.local.cfg
index a8ad0f1a28b2..9d285bf4e238 100644
--- a/test/CodeGen/X86/lit.local.cfg
+++ b/test/CodeGen/X86/lit.local.cfg
@@ -1,4 +1,4 @@
-config.suffixes = ['.ll', '.c', '.cpp']
+config.suffixes = ['.ll', '.c', '.cpp', '.test']
 
 targets = set(config.root.targets_to_build.split())
 if not 'X86' in targets:
diff --git a/test/CodeGen/X86/memcpy-2.ll b/test/CodeGen/X86/memcpy-2.ll
index eae2e708349c..630c0ed1a33c 100644
--- a/test/CodeGen/X86/memcpy-2.ll
+++ b/test/CodeGen/X86/memcpy-2.ll
@@ -1,4 +1,5 @@
-; RUN: llc < %s -mattr=+sse2      -mtriple=i686-apple-darwin -mcpu=core2 | FileCheck %s -check-prefix=SSE2
+; RUN: llc < %s -mattr=+sse2      -mtriple=i686-apple-darwin -mcpu=core2 | FileCheck %s -check-prefix=SSE2-Darwin
+; RUN: llc < %s -mattr=+sse2      -mtriple=i686-pc-mingw32 -mcpu=core2 | FileCheck %s -check-prefix=SSE2-Mingw32
 ; RUN: llc < %s -mattr=+sse,-sse2 -mtriple=i686-apple-darwin -mcpu=core2 | FileCheck %s -check-prefix=SSE1
 ; RUN: llc < %s -mattr=-sse       -mtriple=i686-apple-darwin -mcpu=core2 | FileCheck %s -check-prefix=NOSSE
 ; RUN: llc < %s                 -mtriple=x86_64-apple-darwin -mcpu=core2 | FileCheck %s -check-prefix=X86-64
@@ -8,19 +9,26 @@
 
 define void @t1(i32 %argc, i8** %argv) nounwind  {
 entry:
-; SSE2: t1:
-; SSE2: movaps _.str, %xmm0
-; SSE2: movaps %xmm0
-; SSE2: movb $0
-; SSE2: movl $0
-; SSE2: movl $0
+; SSE2-Darwin: t1:
+; SSE2-Darwin: movsd _.str+16, %xmm0
+; SSE2-Darwin: movsd %xmm0, 16(%esp)
+; SSE2-Darwin: movaps _.str, %xmm0
+; SSE2-Darwin: movaps %xmm0
+; SSE2-Darwin: movb $0, 24(%esp)
+
+; SSE2-Mingw32: t1:
+; SSE2-Mingw32: movsd _.str+16, %xmm0
+; SSE2-Mingw32: movsd %xmm0, 16(%esp)
+; SSE2-Mingw32: movaps _.str, %xmm0
+; SSE2-Mingw32: movups %xmm0
+; SSE2-Mingw32: movb $0, 24(%esp)
 
 ; SSE1: t1:
 ; SSE1: movaps _.str, %xmm0
 ; SSE1: movaps %xmm0
-; SSE1: movb $0
-; SSE1: movl $0
-; SSE1: movl $0
+; SSE1: movb $0, 24(%esp)
+; SSE1: movl $0, 20(%esp)
+; SSE1: movl $0, 16(%esp)
 
 ; NOSSE: t1:
 ; NOSSE: movb $0
@@ -47,9 +55,13 @@ entry:
 
 define void @t2(%struct.s0* nocapture %a, %struct.s0* nocapture %b) nounwind ssp {
 entry:
-; SSE2: t2:
-; SSE2: movaps (%eax), %xmm0
-; SSE2: movaps %xmm0, (%eax)
+; SSE2-Darwin: t2:
+; SSE2-Darwin: movaps (%eax), %xmm0
+; SSE2-Darwin: movaps %xmm0, (%eax)
+
+; SSE2-Mingw32: t2:
+; SSE2-Mingw32: movaps (%eax), %xmm0
+; SSE2-Mingw32: movaps %xmm0, (%eax)
 
 ; SSE1: t2:
 ; SSE1: movaps (%eax), %xmm0
@@ -78,11 +90,17 @@ entry:
 
 define void @t3(%struct.s0* nocapture %a, %struct.s0* nocapture %b) nounwind ssp {
 entry:
-; SSE2: t3:
-; SSE2: movsd (%eax), %xmm0
-; SSE2: movsd 8(%eax), %xmm1
-; SSE2: movsd %xmm1, 8(%eax)
-; SSE2: movsd %xmm0, (%eax)
+; SSE2-Darwin: t3:
+; SSE2-Darwin: movsd (%eax), %xmm0
+; SSE2-Darwin: movsd 8(%eax), %xmm1
+; SSE2-Darwin: movsd %xmm1, 8(%eax)
+; SSE2-Darwin: movsd %xmm0, (%eax)
+
+; SSE2-Mingw32: t3:
+; SSE2-Mingw32: movsd (%eax), %xmm0
+; SSE2-Mingw32: movsd 8(%eax), %xmm1
+; SSE2-Mingw32: movsd %xmm1, 8(%eax)
+; SSE2-Mingw32: movsd %xmm0, (%eax)
 
 ; SSE1: t3:
 ; SSE1: movl
@@ -121,15 +139,25 @@ entry:
 
 define void @t4() nounwind {
 entry:
-; SSE2: t4:
-; SSE2: movw $120
-; SSE2: movl $2021161080
-; SSE2: movl $2021161080
-; SSE2: movl $2021161080
-; SSE2: movl $2021161080
-; SSE2: movl $2021161080
-; SSE2: movl $2021161080
-; SSE2: movl $2021161080
+; SSE2-Darwin: t4:
+; SSE2-Darwin: movw $120
+; SSE2-Darwin: movl $2021161080
+; SSE2-Darwin: movl $2021161080
+; SSE2-Darwin: movl $2021161080
+; SSE2-Darwin: movl $2021161080
+; SSE2-Darwin: movl $2021161080
+; SSE2-Darwin: movl $2021161080
+; SSE2-Darwin: movl $2021161080
+
+; SSE2-Mingw32: t4:
+; SSE2-Mingw32: movw $120
+; SSE2-Mingw32: movl $2021161080
+; SSE2-Mingw32: movl $2021161080
+; SSE2-Mingw32: movl $2021161080
+; SSE2-Mingw32: movl $2021161080
+; SSE2-Mingw32: movl $2021161080
+; SSE2-Mingw32: movl $2021161080
+; SSE2-Mingw32: movl $2021161080
 
 ; SSE1: t4:
 ; SSE1: movw $120
diff --git a/test/CodeGen/X86/memcpy.ll b/test/CodeGen/X86/memcpy.ll
index 39c7fbafd4c7..3372a4adc5ee 100644
--- a/test/CodeGen/X86/memcpy.ll
+++ b/test/CodeGen/X86/memcpy.ll
@@ -87,8 +87,34 @@ entry:
   tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %C, i8* getelementptr inbounds ([30 x i8]* @.str, i64 0, i64 0), i64 16, i32 1, i1 false)
   ret void
 
+; DARWIN: test5:
 ; DARWIN: movabsq	$7016996765293437281
 ; DARWIN: movabsq	$7016996765293437184
 }
 
 
+; PR14896
+@.str2 = private unnamed_addr constant [2 x i8] c"x\00", align 1
+
+define void @test6() nounwind uwtable {
+entry:
+; DARWIN: test6
+; DARWIN: movw $0, 8
+; DARWIN: movq $120, 0
+  tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* null, i8* getelementptr inbounds ([2 x i8]* @.str2, i64 0, i64 0), i64 10, i32 1, i1 false)
+  ret void
+}
+
+define void @PR15348(i8* %a, i8* %b) {
+; Ensure that alignment of '0' in an @llvm.memcpy intrinsic results in
+; unaligned loads and stores.
+; LINUX: PR15348
+; LINUX: movb
+; LINUX: movb
+; LINUX: movq
+; LINUX: movq
+; LINUX: movq
+; LINUX: movq
+  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %a, i8* %b, i64 17, i32 0, i1 false)
+  ret void
+}
diff --git a/test/CodeGen/X86/memset-sse-stack-realignment.ll b/test/CodeGen/X86/memset-sse-stack-realignment.ll
new file mode 100644
index 000000000000..df9de5dfaf22
--- /dev/null
+++ b/test/CodeGen/X86/memset-sse-stack-realignment.ll
@@ -0,0 +1,77 @@
+; Make sure that we realign the stack. Mingw32 uses 4 byte stack alignment, we
+; need 16 bytes for SSE and 32 bytes for AVX.
+
+; RUN: llc < %s -mtriple=i386-pc-mingw32 -mcpu=pentium2 | FileCheck %s -check-prefix=NOSSE
+; RUN: llc < %s -mtriple=i386-pc-mingw32 -mcpu=pentium3 | FileCheck %s -check-prefix=SSE1
+; RUN: llc < %s -mtriple=i386-pc-mingw32 -mcpu=yonah | FileCheck %s -check-prefix=SSE2
+; RUN: llc < %s -mtriple=i386-pc-mingw32 -mcpu=corei7-avx | FileCheck %s -check-prefix=AVX1
+; RUN: llc < %s -mtriple=i386-pc-mingw32 -mcpu=core-avx2 | FileCheck %s -check-prefix=AVX2
+
+define void @test1(i32 %t) nounwind {
+  %tmp1210 = alloca i8, i32 32, align 4
+  call void @llvm.memset.p0i8.i64(i8* %tmp1210, i8 0, i64 32, i32 4, i1 false)
+  %x = alloca i8, i32 %t
+  call void @dummy(i8* %x)
+  ret void
+
+; NOSSE: test1:
+; NOSSE-NOT: and
+; NOSSE: movl $0
+
+; SSE1: test1:
+; SSE1: andl $-16
+; SSE1: movl %esp, %esi
+; SSE1: movaps
+
+; SSE2: test1:
+; SSE2: andl $-16
+; SSE2: movl %esp, %esi
+; SSE2: movaps
+
+; AVX1: test1:
+; AVX1: andl $-32
+; AVX1: movl %esp, %esi
+; AVX1: vmovaps %ymm
+
+; AVX2: test1:
+; AVX2: andl $-32
+; AVX2: movl %esp, %esi
+; AVX2: vmovaps %ymm
+
+}
+
+define void @test2(i32 %t) nounwind {
+  %tmp1210 = alloca i8, i32 16, align 4
+  call void @llvm.memset.p0i8.i64(i8* %tmp1210, i8 0, i64 16, i32 4, i1 false)
+  %x = alloca i8, i32 %t
+  call void @dummy(i8* %x)
+  ret void
+
+; NOSSE: test2:
+; NOSSE-NOT: and
+; NOSSE: movl $0
+
+; SSE1: test2:
+; SSE1: andl $-16
+; SSE1: movl %esp, %esi
+; SSE1: movaps
+
+; SSE2: test2:
+; SSE2: andl $-16
+; SSE2: movl %esp, %esi
+; SSE2: movaps
+
+; AVX1: test2:
+; AVX1: andl $-16
+; AVX1: movl %esp, %esi
+; AVX1: vmovaps %xmm
+
+; AVX2: test2:
+; AVX2: andl $-16
+; AVX2: movl %esp, %esi
+; AVX2: vmovaps %xmm
+}
+
+declare void @dummy(i8*)
+
+declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i32, i1) nounwind
diff --git a/test/CodeGen/X86/memset.ll b/test/CodeGen/X86/memset.ll
index 72b3e0fa3d51..0d479f0abe7b 100644
--- a/test/CodeGen/X86/memset.ll
+++ b/test/CodeGen/X86/memset.ll
@@ -1,5 +1,6 @@
-; RUN: llc < %s -march=x86 -mattr=-sse -mtriple=i686-apple-darwin8.8.0 | grep mov | count 9
-; RUN: llc < %s -march=x86 -mattr=+sse -mtriple=i686-apple-darwin8.8.0 | grep mov | count 3
+; RUN: llc < %s -march=x86 -mcpu=pentium2 -mtriple=i686-apple-darwin8.8.0 | FileCheck %s --check-prefix=X86
+; RUN: llc < %s -march=x86 -mcpu=pentium3 -mtriple=i686-apple-darwin8.8.0 | FileCheck %s --check-prefix=XMM
+; RUN: llc < %s -march=x86 -mcpu=bdver1   -mtriple=i686-apple-darwin8.8.0 | FileCheck %s --check-prefix=YMM
 
 	%struct.x = type { i16, i16 }
 
@@ -8,7 +9,30 @@ entry:
 	%up_mvd = alloca [8 x %struct.x]		; <[8 x %struct.x]*> [#uses=2]
 	%up_mvd116 = getelementptr [8 x %struct.x]* %up_mvd, i32 0, i32 0		; <%struct.x*> [#uses=1]
 	%tmp110117 = bitcast [8 x %struct.x]* %up_mvd to i8*		; <i8*> [#uses=1]
+
 	call void @llvm.memset.p0i8.i64(i8* %tmp110117, i8 0, i64 32, i32 8, i1 false)
+; X86: movl $0,
+; X86: movl $0,
+; X86: movl $0,
+; X86: movl $0,
+; X86: movl $0,
+; X86: movl $0,
+; X86: movl $0,
+; X86: movl $0,
+; X86-NOT: movl $0,
+; X86: ret
+
+; XMM: xorps %xmm{{[0-9]+}}, [[Z:%xmm[0-9]+]]
+; XMM: movaps [[Z]],
+; XMM: movaps [[Z]],
+; XMM-NOT: movaps
+; XMM: ret
+
+; YMM: vxorps %ymm{{[0-9]+}}, %ymm{{[0-9]+}}, [[Z:%ymm[0-9]+]]
+; YMM: vmovaps [[Z]],
+; YMM-NOT: movaps
+; YMM: ret
+
 	call void @foo( %struct.x* %up_mvd116 ) nounwind 
 	ret void
 }
@@ -16,3 +40,16 @@ entry:
 declare void @foo(%struct.x*)
 
 declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i32, i1) nounwind
+
+define void @PR15348(i8* %a) {
+; Ensure that alignment of '0' in an @llvm.memset intrinsic results in
+; unaligned loads and stores.
+; XMM: PR15348
+; XMM: movb $0,
+; XMM: movl $0,
+; XMM: movl $0,
+; XMM: movl $0,
+; XMM: movl $0,
+  call void @llvm.memset.p0i8.i64(i8* %a, i8 0, i64 17, i32 0, i1 false)
+  ret void
+}
diff --git a/test/CodeGen/X86/memset64-on-x86-32.ll b/test/CodeGen/X86/memset64-on-x86-32.ll
index e20fce172f27..8cfa032797f7 100644
--- a/test/CodeGen/X86/memset64-on-x86-32.ll
+++ b/test/CodeGen/X86/memset64-on-x86-32.ll
@@ -1,5 +1,6 @@
 ; RUN: llc < %s -mtriple=i386-apple-darwin   -mcpu=nehalem | grep movups | count 5
 ; RUN: llc < %s -mtriple=i386-apple-darwin   -mcpu=core2   | grep movl   | count 20
+; RUN: llc < %s -mtriple=i386-pc-mingw32   -mcpu=core2   | grep movl   | count 20
 ; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=core2   | grep movq   | count 10
 
 define void @bork() nounwind {
diff --git a/test/CodeGen/X86/misched-crash.ll b/test/CodeGen/X86/misched-crash.ll
new file mode 100644
index 000000000000..7644ee070878
--- /dev/null
+++ b/test/CodeGen/X86/misched-crash.ll
@@ -0,0 +1,40 @@
+; RUN: llc < %s -enable-misched -verify-misched
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-apple-macosx10"
+
+; This function contains a cmp instruction with two users.
+; Hoisting the last use requires trimming the EFLAGS live range to the second.
+define void @rdar13353090(i8* %plane, i64 %_x1, i64 %_x2) {
+entry:
+  %cmp = icmp ult i64 %_x1, %_x2
+  %cond = select i1 %cmp, i64 %_x1, i64 %_x2
+  %cond10 = select i1 %cmp, i64 %_x2, i64 %_x1
+  %0 = load i64* null, align 8
+  %cmp16 = icmp ult i64 %cond, %0
+  %cmp23 = icmp ugt i64 %cond10, 0
+  br i1 %cmp16, label %land.lhs.true21, label %return
+
+land.lhs.true21:                                  ; preds = %entry
+  %sub = add i64 %0, -1
+  br i1 %cmp23, label %if.then24, label %return
+
+if.then24:                                        ; preds = %land.lhs.true21
+  %cmp16.i = icmp ult i64 %cond, %sub
+  %cond20.i = select i1 %cmp16.i, i64 %cond, i64 %sub
+  %add21.i = add i64 0, %cond20.i
+  br label %for.body34.i
+
+for.body34.i:                                     ; preds = %for.inc39.i, %if.then24
+  %index.178.i = phi i64 [ %add21.i, %if.then24 ], [ %inc41.i, %for.inc39.i ]
+  %arrayidx35.i = getelementptr inbounds i8* %plane, i64 %index.178.i
+  %1 = load i8* %arrayidx35.i, align 1
+  %tobool36.i = icmp eq i8 %1, 0
+  br i1 %tobool36.i, label %for.inc39.i, label %return
+
+for.inc39.i:                                      ; preds = %for.body34.i
+  %inc41.i = add i64 %index.178.i, 1
+  br i1 undef, label %return, label %for.body34.i
+
+return:                                           ; preds = %for.inc39.i, %for.body34.i, %land.lhs.true21, %entry
+  ret void
+}
diff --git a/test/CodeGen/X86/misched-ilp.ll b/test/CodeGen/X86/misched-ilp.ll
index c6cedb7be871..4ca296ca92e5 100644
--- a/test/CodeGen/X86/misched-ilp.ll
+++ b/test/CodeGen/X86/misched-ilp.ll
@@ -1,5 +1,5 @@
-; RUN: llc < %s -mtriple=x86_64-apple-macosx -mcpu=core2 -enable-misched -misched=ilpmax | FileCheck -check-prefix=MAX %s
-; RUN: llc < %s -mtriple=x86_64-apple-macosx -mcpu=core2 -enable-misched -misched=ilpmin | FileCheck -check-prefix=MIN %s
+; RUN: llc < %s -mtriple=x86_64-apple-macosx -mcpu=nocona -enable-misched -misched=ilpmax | FileCheck -check-prefix=MAX %s
+; RUN: llc < %s -mtriple=x86_64-apple-macosx -mcpu=nocona -enable-misched -misched=ilpmin | FileCheck -check-prefix=MIN %s
 ;
 ; Basic verification of the ScheduleDAGILP metric.
 ;
diff --git a/test/CodeGen/X86/misched-matmul.ll b/test/CodeGen/X86/misched-matmul.ll
new file mode 100644
index 000000000000..0f6e442b1a8d
--- /dev/null
+++ b/test/CodeGen/X86/misched-matmul.ll
@@ -0,0 +1,228 @@
+; REQUIRES: asserts
+; RUN: llc < %s -march=x86-64 -mcpu=core2 -pre-RA-sched=source -enable-misched -stats 2>&1 | FileCheck %s
+;
+; Verify that register pressure heuristics are working in MachineScheduler.
+;
+; When we enable subtree scheduling heuristics on X86, we may need a
+; flag to disable it for this test case.
+;
+; CHECK: @wrap_mul4
+; CHECK: 30 regalloc - Number of spills inserted
+
+define void @wrap_mul4(double* nocapture %Out, [4 x double]* nocapture %A, [4 x double]* nocapture %B) #0 {
+entry:
+  %arrayidx1.i = getelementptr inbounds [4 x double]* %A, i64 0, i64 0
+  %0 = load double* %arrayidx1.i, align 8, !tbaa !0
+  %arrayidx3.i = getelementptr inbounds [4 x double]* %B, i64 0, i64 0
+  %1 = load double* %arrayidx3.i, align 8, !tbaa !0
+  %mul.i = fmul double %0, %1
+  %arrayidx5.i = getelementptr inbounds [4 x double]* %A, i64 0, i64 1
+  %2 = load double* %arrayidx5.i, align 8, !tbaa !0
+  %arrayidx7.i = getelementptr inbounds [4 x double]* %B, i64 1, i64 0
+  %3 = load double* %arrayidx7.i, align 8, !tbaa !0
+  %mul8.i = fmul double %2, %3
+  %add.i = fadd double %mul.i, %mul8.i
+  %arrayidx10.i = getelementptr inbounds [4 x double]* %A, i64 0, i64 2
+  %4 = load double* %arrayidx10.i, align 8, !tbaa !0
+  %arrayidx12.i = getelementptr inbounds [4 x double]* %B, i64 2, i64 0
+  %5 = load double* %arrayidx12.i, align 8, !tbaa !0
+  %mul13.i = fmul double %4, %5
+  %add14.i = fadd double %add.i, %mul13.i
+  %arrayidx16.i = getelementptr inbounds [4 x double]* %A, i64 0, i64 3
+  %6 = load double* %arrayidx16.i, align 8, !tbaa !0
+  %arrayidx18.i = getelementptr inbounds [4 x double]* %B, i64 3, i64 0
+  %7 = load double* %arrayidx18.i, align 8, !tbaa !0
+  %mul19.i = fmul double %6, %7
+  %add20.i = fadd double %add14.i, %mul19.i
+  %arrayidx25.i = getelementptr inbounds [4 x double]* %B, i64 0, i64 1
+  %8 = load double* %arrayidx25.i, align 8, !tbaa !0
+  %mul26.i = fmul double %0, %8
+  %arrayidx30.i = getelementptr inbounds [4 x double]* %B, i64 1, i64 1
+  %9 = load double* %arrayidx30.i, align 8, !tbaa !0
+  %mul31.i = fmul double %2, %9
+  %add32.i = fadd double %mul26.i, %mul31.i
+  %arrayidx36.i = getelementptr inbounds [4 x double]* %B, i64 2, i64 1
+  %10 = load double* %arrayidx36.i, align 8, !tbaa !0
+  %mul37.i = fmul double %4, %10
+  %add38.i = fadd double %add32.i, %mul37.i
+  %arrayidx42.i = getelementptr inbounds [4 x double]* %B, i64 3, i64 1
+  %11 = load double* %arrayidx42.i, align 8, !tbaa !0
+  %mul43.i = fmul double %6, %11
+  %add44.i = fadd double %add38.i, %mul43.i
+  %arrayidx49.i = getelementptr inbounds [4 x double]* %B, i64 0, i64 2
+  %12 = load double* %arrayidx49.i, align 8, !tbaa !0
+  %mul50.i = fmul double %0, %12
+  %arrayidx54.i = getelementptr inbounds [4 x double]* %B, i64 1, i64 2
+  %13 = load double* %arrayidx54.i, align 8, !tbaa !0
+  %mul55.i = fmul double %2, %13
+  %add56.i = fadd double %mul50.i, %mul55.i
+  %arrayidx60.i = getelementptr inbounds [4 x double]* %B, i64 2, i64 2
+  %14 = load double* %arrayidx60.i, align 8, !tbaa !0
+  %mul61.i = fmul double %4, %14
+  %add62.i = fadd double %add56.i, %mul61.i
+  %arrayidx66.i = getelementptr inbounds [4 x double]* %B, i64 3, i64 2
+  %15 = load double* %arrayidx66.i, align 8, !tbaa !0
+  %mul67.i = fmul double %6, %15
+  %add68.i = fadd double %add62.i, %mul67.i
+  %arrayidx73.i = getelementptr inbounds [4 x double]* %B, i64 0, i64 3
+  %16 = load double* %arrayidx73.i, align 8, !tbaa !0
+  %mul74.i = fmul double %0, %16
+  %arrayidx78.i = getelementptr inbounds [4 x double]* %B, i64 1, i64 3
+  %17 = load double* %arrayidx78.i, align 8, !tbaa !0
+  %mul79.i = fmul double %2, %17
+  %add80.i = fadd double %mul74.i, %mul79.i
+  %arrayidx84.i = getelementptr inbounds [4 x double]* %B, i64 2, i64 3
+  %18 = load double* %arrayidx84.i, align 8, !tbaa !0
+  %mul85.i = fmul double %4, %18
+  %add86.i = fadd double %add80.i, %mul85.i
+  %arrayidx90.i = getelementptr inbounds [4 x double]* %B, i64 3, i64 3
+  %19 = load double* %arrayidx90.i, align 8, !tbaa !0
+  %mul91.i = fmul double %6, %19
+  %add92.i = fadd double %add86.i, %mul91.i
+  %arrayidx95.i = getelementptr inbounds [4 x double]* %A, i64 1, i64 0
+  %20 = load double* %arrayidx95.i, align 8, !tbaa !0
+  %mul98.i = fmul double %1, %20
+  %arrayidx100.i = getelementptr inbounds [4 x double]* %A, i64 1, i64 1
+  %21 = load double* %arrayidx100.i, align 8, !tbaa !0
+  %mul103.i = fmul double %3, %21
+  %add104.i = fadd double %mul98.i, %mul103.i
+  %arrayidx106.i = getelementptr inbounds [4 x double]* %A, i64 1, i64 2
+  %22 = load double* %arrayidx106.i, align 8, !tbaa !0
+  %mul109.i = fmul double %5, %22
+  %add110.i = fadd double %add104.i, %mul109.i
+  %arrayidx112.i = getelementptr inbounds [4 x double]* %A, i64 1, i64 3
+  %23 = load double* %arrayidx112.i, align 8, !tbaa !0
+  %mul115.i = fmul double %7, %23
+  %add116.i = fadd double %add110.i, %mul115.i
+  %mul122.i = fmul double %8, %20
+  %mul127.i = fmul double %9, %21
+  %add128.i = fadd double %mul122.i, %mul127.i
+  %mul133.i = fmul double %10, %22
+  %add134.i = fadd double %add128.i, %mul133.i
+  %mul139.i = fmul double %11, %23
+  %add140.i = fadd double %add134.i, %mul139.i
+  %mul146.i = fmul double %12, %20
+  %mul151.i = fmul double %13, %21
+  %add152.i = fadd double %mul146.i, %mul151.i
+  %mul157.i = fmul double %14, %22
+  %add158.i = fadd double %add152.i, %mul157.i
+  %mul163.i = fmul double %15, %23
+  %add164.i = fadd double %add158.i, %mul163.i
+  %mul170.i = fmul double %16, %20
+  %mul175.i = fmul double %17, %21
+  %add176.i = fadd double %mul170.i, %mul175.i
+  %mul181.i = fmul double %18, %22
+  %add182.i = fadd double %add176.i, %mul181.i
+  %mul187.i = fmul double %19, %23
+  %add188.i = fadd double %add182.i, %mul187.i
+  %arrayidx191.i = getelementptr inbounds [4 x double]* %A, i64 2, i64 0
+  %24 = load double* %arrayidx191.i, align 8, !tbaa !0
+  %mul194.i = fmul double %1, %24
+  %arrayidx196.i = getelementptr inbounds [4 x double]* %A, i64 2, i64 1
+  %25 = load double* %arrayidx196.i, align 8, !tbaa !0
+  %mul199.i = fmul double %3, %25
+  %add200.i = fadd double %mul194.i, %mul199.i
+  %arrayidx202.i = getelementptr inbounds [4 x double]* %A, i64 2, i64 2
+  %26 = load double* %arrayidx202.i, align 8, !tbaa !0
+  %mul205.i = fmul double %5, %26
+  %add206.i = fadd double %add200.i, %mul205.i
+  %arrayidx208.i = getelementptr inbounds [4 x double]* %A, i64 2, i64 3
+  %27 = load double* %arrayidx208.i, align 8, !tbaa !0
+  %mul211.i = fmul double %7, %27
+  %add212.i = fadd double %add206.i, %mul211.i
+  %mul218.i = fmul double %8, %24
+  %mul223.i = fmul double %9, %25
+  %add224.i = fadd double %mul218.i, %mul223.i
+  %mul229.i = fmul double %10, %26
+  %add230.i = fadd double %add224.i, %mul229.i
+  %mul235.i = fmul double %11, %27
+  %add236.i = fadd double %add230.i, %mul235.i
+  %mul242.i = fmul double %12, %24
+  %mul247.i = fmul double %13, %25
+  %add248.i = fadd double %mul242.i, %mul247.i
+  %mul253.i = fmul double %14, %26
+  %add254.i = fadd double %add248.i, %mul253.i
+  %mul259.i = fmul double %15, %27
+  %add260.i = fadd double %add254.i, %mul259.i
+  %mul266.i = fmul double %16, %24
+  %mul271.i = fmul double %17, %25
+  %add272.i = fadd double %mul266.i, %mul271.i
+  %mul277.i = fmul double %18, %26
+  %add278.i = fadd double %add272.i, %mul277.i
+  %mul283.i = fmul double %19, %27
+  %add284.i = fadd double %add278.i, %mul283.i
+  %arrayidx287.i = getelementptr inbounds [4 x double]* %A, i64 3, i64 0
+  %28 = load double* %arrayidx287.i, align 8, !tbaa !0
+  %mul290.i = fmul double %1, %28
+  %arrayidx292.i = getelementptr inbounds [4 x double]* %A, i64 3, i64 1
+  %29 = load double* %arrayidx292.i, align 8, !tbaa !0
+  %mul295.i = fmul double %3, %29
+  %add296.i = fadd double %mul290.i, %mul295.i
+  %arrayidx298.i = getelementptr inbounds [4 x double]* %A, i64 3, i64 2
+  %30 = load double* %arrayidx298.i, align 8, !tbaa !0
+  %mul301.i = fmul double %5, %30
+  %add302.i = fadd double %add296.i, %mul301.i
+  %arrayidx304.i = getelementptr inbounds [4 x double]* %A, i64 3, i64 3
+  %31 = load double* %arrayidx304.i, align 8, !tbaa !0
+  %mul307.i = fmul double %7, %31
+  %add308.i = fadd double %add302.i, %mul307.i
+  %mul314.i = fmul double %8, %28
+  %mul319.i = fmul double %9, %29
+  %add320.i = fadd double %mul314.i, %mul319.i
+  %mul325.i = fmul double %10, %30
+  %add326.i = fadd double %add320.i, %mul325.i
+  %mul331.i = fmul double %11, %31
+  %add332.i = fadd double %add326.i, %mul331.i
+  %mul338.i = fmul double %12, %28
+  %mul343.i = fmul double %13, %29
+  %add344.i = fadd double %mul338.i, %mul343.i
+  %mul349.i = fmul double %14, %30
+  %add350.i = fadd double %add344.i, %mul349.i
+  %mul355.i = fmul double %15, %31
+  %add356.i = fadd double %add350.i, %mul355.i
+  %mul362.i = fmul double %16, %28
+  %mul367.i = fmul double %17, %29
+  %add368.i = fadd double %mul362.i, %mul367.i
+  %mul373.i = fmul double %18, %30
+  %add374.i = fadd double %add368.i, %mul373.i
+  %mul379.i = fmul double %19, %31
+  %add380.i = fadd double %add374.i, %mul379.i
+  store double %add20.i, double* %Out, align 8
+  %Res.i.sroa.1.8.idx2 = getelementptr inbounds double* %Out, i64 1
+  store double %add44.i, double* %Res.i.sroa.1.8.idx2, align 8
+  %Res.i.sroa.2.16.idx4 = getelementptr inbounds double* %Out, i64 2
+  store double %add68.i, double* %Res.i.sroa.2.16.idx4, align 8
+  %Res.i.sroa.3.24.idx6 = getelementptr inbounds double* %Out, i64 3
+  store double %add92.i, double* %Res.i.sroa.3.24.idx6, align 8
+  %Res.i.sroa.4.32.idx8 = getelementptr inbounds double* %Out, i64 4
+  store double %add116.i, double* %Res.i.sroa.4.32.idx8, align 8
+  %Res.i.sroa.5.40.idx10 = getelementptr inbounds double* %Out, i64 5
+  store double %add140.i, double* %Res.i.sroa.5.40.idx10, align 8
+  %Res.i.sroa.6.48.idx12 = getelementptr inbounds double* %Out, i64 6
+  store double %add164.i, double* %Res.i.sroa.6.48.idx12, align 8
+  %Res.i.sroa.7.56.idx14 = getelementptr inbounds double* %Out, i64 7
+  store double %add188.i, double* %Res.i.sroa.7.56.idx14, align 8
+  %Res.i.sroa.8.64.idx16 = getelementptr inbounds double* %Out, i64 8
+  store double %add212.i, double* %Res.i.sroa.8.64.idx16, align 8
+  %Res.i.sroa.9.72.idx18 = getelementptr inbounds double* %Out, i64 9
+  store double %add236.i, double* %Res.i.sroa.9.72.idx18, align 8
+  %Res.i.sroa.10.80.idx20 = getelementptr inbounds double* %Out, i64 10
+  store double %add260.i, double* %Res.i.sroa.10.80.idx20, align 8
+  %Res.i.sroa.11.88.idx22 = getelementptr inbounds double* %Out, i64 11
+  store double %add284.i, double* %Res.i.sroa.11.88.idx22, align 8
+  %Res.i.sroa.12.96.idx24 = getelementptr inbounds double* %Out, i64 12
+  store double %add308.i, double* %Res.i.sroa.12.96.idx24, align 8
+  %Res.i.sroa.13.104.idx26 = getelementptr inbounds double* %Out, i64 13
+  store double %add332.i, double* %Res.i.sroa.13.104.idx26, align 8
+  %Res.i.sroa.14.112.idx28 = getelementptr inbounds double* %Out, i64 14
+  store double %add356.i, double* %Res.i.sroa.14.112.idx28, align 8
+  %Res.i.sroa.15.120.idx30 = getelementptr inbounds double* %Out, i64 15
+  store double %add380.i, double* %Res.i.sroa.15.120.idx30, align 8
+  ret void
+}
+
+attributes #0 = { noinline nounwind ssp uwtable "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-frame-pointer-elim-non-leaf"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "unsafe-fp-math"="false" "use-soft-float"="false" }
+
+!0 = metadata !{metadata !"double", metadata !1}
+!1 = metadata !{metadata !"omnipotent char", metadata !2}
+!2 = metadata !{metadata !"Simple C/C++ TBAA"}
diff --git a/test/CodeGen/X86/misched-matrix.ll b/test/CodeGen/X86/misched-matrix.ll
new file mode 100644
index 000000000000..f5566e5e5de9
--- /dev/null
+++ b/test/CodeGen/X86/misched-matrix.ll
@@ -0,0 +1,195 @@
+; RUN: llc < %s -march=x86-64 -mcpu=core2 -pre-RA-sched=source -enable-misched \
+; RUN:          -misched-topdown -verify-machineinstrs \
+; RUN:     | FileCheck %s -check-prefix=TOPDOWN
+; RUN: llc < %s -march=x86-64 -mcpu=core2 -pre-RA-sched=source -enable-misched \
+; RUN:          -misched=ilpmin -verify-machineinstrs \
+; RUN:     | FileCheck %s -check-prefix=ILPMIN
+; RUN: llc < %s -march=x86-64 -mcpu=core2 -pre-RA-sched=source -enable-misched \
+; RUN:          -misched=ilpmax -verify-machineinstrs \
+; RUN:     | FileCheck %s -check-prefix=ILPMAX
+;
+; Verify that the MI scheduler minimizes register pressure for a
+; uniform set of bottom-up subtrees (unrolled matrix multiply).
+;
+; For current top-down heuristics, ensure that some folded imulls have
+; been reordered with the stores. This tests the scheduler's cheap
+; alias analysis ability (that doesn't require any AliasAnalysis pass).
+;
+; TOPDOWN: %for.body
+; TOPDOWN: movl %{{.*}}, (
+; TOPDOWN: imull {{[0-9]*}}(
+; TOPDOWN: movl %{{.*}}, 4(
+; TOPDOWN: imull {{[0-9]*}}(
+; TOPDOWN: movl %{{.*}}, 8(
+; TOPDOWN: movl %{{.*}}, 12(
+; TOPDOWN: %for.end
+;
+; For -misched=ilpmin, verify that each expression subtree is
+; scheduled independently, and that the imull/adds are interleaved.
+;
+; ILPMIN: %for.body
+; ILPMIN: movl %{{.*}}, (
+; ILPMIN: imull
+; ILPMIN: imull
+; ILPMIN: addl
+; ILPMIN: imull
+; ILPMIN: addl
+; ILPMIN: imull
+; ILPMIN: addl
+; ILPMIN: movl %{{.*}}, 4(
+; ILPMIN: imull
+; ILPMIN: imull
+; ILPMIN: addl
+; ILPMIN: imull
+; ILPMIN: addl
+; ILPMIN: imull
+; ILPMIN: addl
+; ILPMIN: movl %{{.*}}, 8(
+; ILPMIN: imull
+; ILPMIN: imull
+; ILPMIN: addl
+; ILPMIN: imull
+; ILPMIN: addl
+; ILPMIN: imull
+; ILPMIN: addl
+; ILPMIN: movl %{{.*}}, 12(
+; ILPMIN: %for.end
+;
+; For -misched=ilpmax, verify that each expression subtree is
+; scheduled independently, and that the imull/adds are clustered.
+;
+; ILPMAX: %for.body
+; ILPMAX: movl %{{.*}}, (
+; ILPMAX: imull
+; ILPMAX: imull
+; ILPMAX: imull
+; ILPMAX: imull
+; ILPMAX: addl
+; ILPMAX: addl
+; ILPMAX: addl
+; ILPMAX: movl %{{.*}}, 4(
+; ILPMAX: imull
+; ILPMAX: imull
+; ILPMAX: imull
+; ILPMAX: imull
+; ILPMAX: addl
+; ILPMAX: addl
+; ILPMAX: addl
+; ILPMAX: movl %{{.*}}, 8(
+; ILPMAX: imull
+; ILPMAX: imull
+; ILPMAX: imull
+; ILPMAX: imull
+; ILPMAX: addl
+; ILPMAX: addl
+; ILPMAX: addl
+; ILPMAX: movl %{{.*}}, 12(
+; ILPMAX: %for.end
+
+define void @mmult([4 x i32]* noalias nocapture %m1, [4 x i32]* noalias nocapture %m2,
+[4 x i32]* noalias nocapture %m3) nounwind uwtable ssp {
+entry:
+  br label %for.body
+
+for.body:                              ; preds = %for.body, %entry
+  %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
+  %arrayidx8 = getelementptr inbounds [4 x i32]* %m1, i64 %indvars.iv, i64 0
+  %tmp = load i32* %arrayidx8, align 4, !tbaa !0
+  %arrayidx12 = getelementptr inbounds [4 x i32]* %m2, i64 0, i64 0
+  %tmp1 = load i32* %arrayidx12, align 4, !tbaa !0
+  %arrayidx8.1 = getelementptr inbounds [4 x i32]* %m1, i64 %indvars.iv, i64 1
+  %tmp2 = load i32* %arrayidx8.1, align 4, !tbaa !0
+  %arrayidx12.1 = getelementptr inbounds [4 x i32]* %m2, i64 1, i64 0
+  %tmp3 = load i32* %arrayidx12.1, align 4, !tbaa !0
+  %arrayidx8.2 = getelementptr inbounds [4 x i32]* %m1, i64 %indvars.iv, i64 2
+  %tmp4 = load i32* %arrayidx8.2, align 4, !tbaa !0
+  %arrayidx12.2 = getelementptr inbounds [4 x i32]* %m2, i64 2, i64 0
+  %tmp5 = load i32* %arrayidx12.2, align 4, !tbaa !0
+  %arrayidx8.3 = getelementptr inbounds [4 x i32]* %m1, i64 %indvars.iv, i64 3
+  %tmp6 = load i32* %arrayidx8.3, align 4, !tbaa !0
+  %arrayidx12.3 = getelementptr inbounds [4 x i32]* %m2, i64 3, i64 0
+  %tmp8 = load i32* %arrayidx8, align 4, !tbaa !0
+  %arrayidx12.137 = getelementptr inbounds [4 x i32]* %m2, i64 0, i64 1
+  %tmp9 = load i32* %arrayidx12.137, align 4, !tbaa !0
+  %tmp10 = load i32* %arrayidx8.1, align 4, !tbaa !0
+  %arrayidx12.1.1 = getelementptr inbounds [4 x i32]* %m2, i64 1, i64 1
+  %tmp11 = load i32* %arrayidx12.1.1, align 4, !tbaa !0
+  %tmp12 = load i32* %arrayidx8.2, align 4, !tbaa !0
+  %arrayidx12.2.1 = getelementptr inbounds [4 x i32]* %m2, i64 2, i64 1
+  %tmp13 = load i32* %arrayidx12.2.1, align 4, !tbaa !0
+  %tmp14 = load i32* %arrayidx8.3, align 4, !tbaa !0
+  %arrayidx12.3.1 = getelementptr inbounds [4 x i32]* %m2, i64 3, i64 1
+  %tmp15 = load i32* %arrayidx12.3.1, align 4, !tbaa !0
+  %tmp16 = load i32* %arrayidx8, align 4, !tbaa !0
+  %arrayidx12.239 = getelementptr inbounds [4 x i32]* %m2, i64 0, i64 2
+  %tmp17 = load i32* %arrayidx12.239, align 4, !tbaa !0
+  %tmp18 = load i32* %arrayidx8.1, align 4, !tbaa !0
+  %arrayidx12.1.2 = getelementptr inbounds [4 x i32]* %m2, i64 1, i64 2
+  %tmp19 = load i32* %arrayidx12.1.2, align 4, !tbaa !0
+  %tmp20 = load i32* %arrayidx8.2, align 4, !tbaa !0
+  %arrayidx12.2.2 = getelementptr inbounds [4 x i32]* %m2, i64 2, i64 2
+  %tmp21 = load i32* %arrayidx12.2.2, align 4, !tbaa !0
+  %tmp22 = load i32* %arrayidx8.3, align 4, !tbaa !0
+  %arrayidx12.3.2 = getelementptr inbounds [4 x i32]* %m2, i64 3, i64 2
+  %tmp23 = load i32* %arrayidx12.3.2, align 4, !tbaa !0
+  %tmp24 = load i32* %arrayidx8, align 4, !tbaa !0
+  %arrayidx12.341 = getelementptr inbounds [4 x i32]* %m2, i64 0, i64 3
+  %tmp25 = load i32* %arrayidx12.341, align 4, !tbaa !0
+  %tmp26 = load i32* %arrayidx8.1, align 4, !tbaa !0
+  %arrayidx12.1.3 = getelementptr inbounds [4 x i32]* %m2, i64 1, i64 3
+  %tmp27 = load i32* %arrayidx12.1.3, align 4, !tbaa !0
+  %tmp28 = load i32* %arrayidx8.2, align 4, !tbaa !0
+  %arrayidx12.2.3 = getelementptr inbounds [4 x i32]* %m2, i64 2, i64 3
+  %tmp29 = load i32* %arrayidx12.2.3, align 4, !tbaa !0
+  %tmp30 = load i32* %arrayidx8.3, align 4, !tbaa !0
+  %arrayidx12.3.3 = getelementptr inbounds [4 x i32]* %m2, i64 3, i64 3
+  %tmp31 = load i32* %arrayidx12.3.3, align 4, !tbaa !0
+  %tmp7 = load i32* %arrayidx12.3, align 4, !tbaa !0
+  %mul = mul nsw i32 %tmp1, %tmp
+  %mul.1 = mul nsw i32 %tmp3, %tmp2
+  %mul.2 = mul nsw i32 %tmp5, %tmp4
+  %mul.3 = mul nsw i32 %tmp7, %tmp6
+  %mul.138 = mul nsw i32 %tmp9, %tmp8
+  %mul.1.1 = mul nsw i32 %tmp11, %tmp10
+  %mul.2.1 = mul nsw i32 %tmp13, %tmp12
+  %mul.3.1 = mul nsw i32 %tmp15, %tmp14
+  %mul.240 = mul nsw i32 %tmp17, %tmp16
+  %mul.1.2 = mul nsw i32 %tmp19, %tmp18
+  %mul.2.2 = mul nsw i32 %tmp21, %tmp20
+  %mul.3.2 = mul nsw i32 %tmp23, %tmp22
+  %mul.342 = mul nsw i32 %tmp25, %tmp24
+  %mul.1.3 = mul nsw i32 %tmp27, %tmp26
+  %mul.2.3 = mul nsw i32 %tmp29, %tmp28
+  %mul.3.3 = mul nsw i32 %tmp31, %tmp30
+  %add.1 = add nsw i32 %mul.1, %mul
+  %add.2 = add nsw i32 %mul.2, %add.1
+  %add.3 = add nsw i32 %mul.3, %add.2
+  %add.1.1 = add nsw i32 %mul.1.1, %mul.138
+  %add.2.1 = add nsw i32 %mul.2.1, %add.1.1
+  %add.3.1 = add nsw i32 %mul.3.1, %add.2.1
+  %add.1.2 = add nsw i32 %mul.1.2, %mul.240
+  %add.2.2 = add nsw i32 %mul.2.2, %add.1.2
+  %add.3.2 = add nsw i32 %mul.3.2, %add.2.2
+  %add.1.3 = add nsw i32 %mul.1.3, %mul.342
+  %add.2.3 = add nsw i32 %mul.2.3, %add.1.3
+  %add.3.3 = add nsw i32 %mul.3.3, %add.2.3
+  %arrayidx16 = getelementptr inbounds [4 x i32]* %m3, i64 %indvars.iv, i64 0
+  store i32 %add.3, i32* %arrayidx16, align 4, !tbaa !0
+  %arrayidx16.1 = getelementptr inbounds [4 x i32]* %m3, i64 %indvars.iv, i64 1
+  store i32 %add.3.1, i32* %arrayidx16.1, align 4, !tbaa !0
+  %arrayidx16.2 = getelementptr inbounds [4 x i32]* %m3, i64 %indvars.iv, i64 2
+  store i32 %add.3.2, i32* %arrayidx16.2, align 4, !tbaa !0
+  %arrayidx16.3 = getelementptr inbounds [4 x i32]* %m3, i64 %indvars.iv, i64 3
+  store i32 %add.3.3, i32* %arrayidx16.3, align 4, !tbaa !0
+  %indvars.iv.next = add i64 %indvars.iv, 1
+  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+  %exitcond = icmp eq i32 %lftr.wideiv, 4
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:                                        ; preds = %for.body
+  ret void
+}
+
+!0 = metadata !{metadata !"int", metadata !1}
+!1 = metadata !{metadata !"omnipotent char", metadata !2}
+!2 = metadata !{metadata !"Simple C/C++ TBAA"}
diff --git a/test/CodeGen/X86/misched-new.ll b/test/CodeGen/X86/misched-new.ll
index cec04b534fba..89e45b7cfc21 100644
--- a/test/CodeGen/X86/misched-new.ll
+++ b/test/CodeGen/X86/misched-new.ll
@@ -1,6 +1,9 @@
 ; RUN: llc < %s -march=x86-64 -mcpu=core2 -x86-early-ifcvt -enable-misched \
 ; RUN:          -misched=shuffle -misched-bottomup -verify-machineinstrs \
 ; RUN:     | FileCheck %s
+; RUN: llc < %s -march=x86-64 -mcpu=core2 -x86-early-ifcvt -enable-misched \
+; RUN:          -misched=shuffle -misched-topdown -verify-machineinstrs \
+; RUN:     | FileCheck %s --check-prefix TOPDOWN
 ; REQUIRES: asserts
 ;
 ; Interesting MachineScheduler cases.
@@ -51,3 +54,56 @@ if.end:                                           ; preds = %if.then, %entry
 }
 
 declare void @bar(i32,i32)
+
+; Test that the DAG builder can handle an undef vreg on ExitSU.
+; CHECK: hasundef
+; CHECK: call
+
+%t0 = type { i32, i32, i8 }
+%t6 = type { i32 (...)**, %t7* }
+%t7 = type { i32 (...)** }
+
+define void @hasundef() unnamed_addr uwtable ssp align 2 {
+  %1 = alloca %t0, align 8
+  br i1 undef, label %3, label %2
+
+; <label>:2                                       ; preds = %0
+  unreachable
+
+; <label>:3                                       ; preds = %0
+  br i1 undef, label %4, label %5
+
+; <label>:4                                       ; preds = %3
+  call void undef(%t6* undef, %t0* %1)
+  unreachable
+
+; <label>:5                                       ; preds = %3
+  ret void
+}
+
+; Test top-down subregister liveness tracking. Self-verification
+; catches any pressure set underflow.
+; rdar://12797931.
+;
+; TOPDOWN: @testSubregTracking
+; TOPDOWN: divb
+; TOPDOWN: movzbl %al
+; TOPDOWN: ret
+define void @testSubregTracking() nounwind uwtable ssp align 2 {
+  %tmp = load i8* undef, align 1
+  %tmp6 = sub i8 0, %tmp
+  %tmp7 = load i8* undef, align 1
+  %tmp8 = udiv i8 %tmp6, %tmp7
+  %tmp9 = zext i8 %tmp8 to i64
+  %tmp10 = load i8* undef, align 1
+  %tmp11 = zext i8 %tmp10 to i64
+  %tmp12 = mul i64 %tmp11, %tmp9
+  %tmp13 = urem i8 %tmp6, %tmp7
+  %tmp14 = zext i8 %tmp13 to i32
+  %tmp15 = add nsw i32 %tmp14, 0
+  %tmp16 = add i32 %tmp15, 0
+  store i32 %tmp16, i32* undef, align 4
+  %tmp17 = add i64 0, %tmp12
+  store i64 %tmp17, i64* undef, align 8
+  ret void
+}
diff --git a/test/CodeGen/X86/movgs.ll b/test/CodeGen/X86/movgs.ll
index 65ee7b1d8e00..bb42734833dd 100644
--- a/test/CodeGen/X86/movgs.ll
+++ b/test/CodeGen/X86/movgs.ll
@@ -1,6 +1,6 @@
-; RUN: llc < %s -march=x86 -mtriple=i386-linux-gnu -mattr=sse41 | FileCheck %s --check-prefix=X32
-; RUN: llc < %s -mtriple=x86_64-linux -mattr=sse41 | FileCheck %s --check-prefix=X64
-; RUN: llc < %s -mtriple=x86_64-win32 -mattr=sse41 | FileCheck %s --check-prefix=X64
+; RUN: llc < %s -march=x86 -mtriple=i386-linux-gnu -mcpu=penryn -mattr=sse41 | FileCheck %s --check-prefix=X32
+; RUN: llc < %s -mtriple=x86_64-linux -mcpu=penryn -mattr=sse41 | FileCheck %s --check-prefix=X64
+; RUN: llc < %s -mtriple=x86_64-win32 -mcpu=penryn -mattr=sse41 | FileCheck %s --check-prefix=X64
 
 define i32 @test1() nounwind readonly {
 entry:
diff --git a/test/CodeGen/X86/ms-inline-asm.ll b/test/CodeGen/X86/ms-inline-asm.ll
index 24d28adda894..5048a93ad302 100644
--- a/test/CodeGen/X86/ms-inline-asm.ll
+++ b/test/CodeGen/X86/ms-inline-asm.ll
@@ -1,10 +1,11 @@
-; RUN: llc < %s -march=x86 | FileCheck %s
+; RUN: llc < %s -march=x86 -mcpu=core2 | FileCheck %s
 
 define i32 @t1() nounwind {
 entry:
   %0 = tail call i32 asm sideeffect inteldialect "mov eax, $1\0A\09mov $0, eax", "=r,r,~{eax},~{dirflag},~{fpsr},~{flags}"(i32 1) nounwind
   ret i32 %0
 ; CHECK: t1
+; CHECK: movl %esp, %ebp
 ; CHECK: {{## InlineAsm Start|#APP}}
 ; CHECK: .intel_syntax
 ; CHECK: mov eax, ecx
@@ -18,6 +19,7 @@ entry:
   call void asm sideeffect inteldialect "mov eax, $$1", "~{eax},~{dirflag},~{fpsr},~{flags}"() nounwind
   ret void
 ; CHECK: t2
+; CHECK: movl %esp, %ebp
 ; CHECK: {{## InlineAsm Start|#APP}}
 ; CHECK: .intel_syntax
 ; CHECK: mov eax, 1
@@ -32,6 +34,7 @@ entry:
   call void asm sideeffect inteldialect "mov eax, DWORD PTR [$0]", "*m,~{eax},~{dirflag},~{fpsr},~{flags}"(i32* %V.addr) nounwind
   ret void
 ; CHECK: t3
+; CHECK: movl %esp, %ebp
 ; CHECK: {{## InlineAsm Start|#APP}}
 ; CHECK: .intel_syntax
 ; CHECK: mov eax, DWORD PTR {{[[esp]}}
@@ -53,6 +56,7 @@ entry:
   %0 = load i32* %b1, align 4
   ret i32 %0
 ; CHECK: t18
+; CHECK: movl %esp, %ebp
 ; CHECK: {{## InlineAsm Start|#APP}}
 ; CHECK: .intel_syntax
 ; CHECK: lea ebx, foo
@@ -61,3 +65,46 @@ entry:
 ; CHECK: .att_syntax
 ; CHECK: {{## InlineAsm End|#NO_APP}}
 }
+
+define void @t19_helper() nounwind {
+entry:
+  ret void
+}
+
+define void @t19() nounwind {
+entry:
+  call void asm sideeffect inteldialect "call $0", "r,~{dirflag},~{fpsr},~{flags}"(void ()* @t19_helper) nounwind
+  ret void
+; CHECK: t19:
+; CHECK: movl %esp, %ebp
+; CHECK: movl ${{_?}}t19_helper, %eax
+; CHECK: {{## InlineAsm Start|#APP}}
+; CHECK: .intel_syntax
+; CHECK: call eax
+; CHECK: .att_syntax
+; CHECK: {{## InlineAsm End|#NO_APP}}
+}
+
+@results = global [2 x i32] [i32 3, i32 2], align 4
+
+define i32* @t30() nounwind ssp {
+entry:
+  %res = alloca i32*, align 4
+  call void asm sideeffect inteldialect "lea edi, dword ptr $0", "*m,~{edi},~{dirflag},~{fpsr},~{flags}"([2 x i32]* @results) nounwind
+  call void asm sideeffect inteldialect "mov dword ptr $0, edi", "=*m,~{dirflag},~{fpsr},~{flags}"(i32** %res) nounwind
+  %0 = load i32** %res, align 4
+  ret i32* %0
+; CHECK: t30:
+; CHECK: movl %esp, %ebp
+; CHECK: {{## InlineAsm Start|#APP}}
+; CHECK: .intel_syntax
+; CHECK: lea edi, dword ptr [{{_?}}results]
+; CHECK: .att_syntax
+; CHECK: {{## InlineAsm End|#NO_APP}}
+; CHECK: {{## InlineAsm Start|#APP}}
+; CHECK: .intel_syntax
+; CHECK: mov dword ptr [esi], edi
+; CHECK: .att_syntax
+; CHECK: {{## InlineAsm End|#NO_APP}}
+; CHECK: movl (%esi), %eax
+}
diff --git a/test/CodeGen/X86/multiple-loop-post-inc.ll b/test/CodeGen/X86/multiple-loop-post-inc.ll
index 9f7d036cf141..29b9f34464f0 100644
--- a/test/CodeGen/X86/multiple-loop-post-inc.ll
+++ b/test/CodeGen/X86/multiple-loop-post-inc.ll
@@ -1,4 +1,4 @@
-; RUN: llc -asm-verbose=false -disable-branch-fold -disable-code-place -disable-tail-duplicate -march=x86-64 -mcpu=nehalem < %s | FileCheck %s
+; RUN: llc -asm-verbose=false -disable-branch-fold -disable-block-placement -disable-tail-duplicate -march=x86-64 -mcpu=nehalem < %s | FileCheck %s
 ; rdar://7236213
 ;
 ; The scheduler's 2-address hack has been disabled, so there is
diff --git a/test/CodeGen/X86/no-cmov.ll b/test/CodeGen/X86/no-cmov.ll
new file mode 100644
index 000000000000..62d73b0732e7
--- /dev/null
+++ b/test/CodeGen/X86/no-cmov.ll
@@ -0,0 +1,11 @@
+; RUN: llc -march=x86 -mcpu=i486 < %s | FileCheck %s
+
+define i32 @test1(i32 %g, i32* %j) {
+  %tobool = icmp eq i32 %g, 0
+  %cmp = load i32* %j, align 4
+  %retval.0 = select i1 %tobool, i32 1, i32 %cmp
+  ret i32 %retval.0
+
+; CHECK: test1:
+; CHECK-NOT: cmov
+}
diff --git a/test/CodeGen/X86/phi-immediate-factoring.ll b/test/CodeGen/X86/phi-immediate-factoring.ll
index 476bb1099831..6425ef0e8376 100644
--- a/test/CodeGen/X86/phi-immediate-factoring.ll
+++ b/test/CodeGen/X86/phi-immediate-factoring.ll
@@ -1,3 +1,4 @@
+; REQUIRES: asserts
 ; RUN: llc < %s -march=x86 -stats 2>&1 | grep "Number of blocks eliminated" | grep 6
 ; PR1296
 
diff --git a/test/CodeGen/X86/pmovsx-inreg.ll b/test/CodeGen/X86/pmovsx-inreg.ll
new file mode 100644
index 000000000000..d8c27f25043a
--- /dev/null
+++ b/test/CodeGen/X86/pmovsx-inreg.ll
@@ -0,0 +1,176 @@
+; RUN: llc < %s -march=x86-64 -mcpu=penryn | FileCheck -check-prefix=SSE41 %s
+; RUN: llc < %s -march=x86-64 -mcpu=corei7-avx | FileCheck -check-prefix=AVX1 %s
+; RUN: llc < %s -march=x86-64 -mcpu=core-avx2 | FileCheck -check-prefix=AVX2 %s
+
+; PR14887
+; These tests inject a store into the chain to test the inreg versions of pmovsx
+
+define void @test1(<2 x i8>* %in, <2 x i64>* %out) nounwind {
+  %wide.load35 = load <2 x i8>* %in, align 1
+  %sext = sext <2 x i8> %wide.load35 to <2 x i64>
+  store <2 x i64> zeroinitializer, <2 x i64>* undef, align 8
+  store <2 x i64> %sext, <2 x i64>* %out, align 8
+  ret void
+
+; SSE41: test1:
+; SSE41: pmovsxbq
+
+; AVX1: test1:
+; AVX1: vpmovsxbq
+
+; AVX2: test1:
+; AVX2: vpmovsxbq
+}
+
+define void @test2(<4 x i8>* %in, <4 x i64>* %out) nounwind {
+  %wide.load35 = load <4 x i8>* %in, align 1
+  %sext = sext <4 x i8> %wide.load35 to <4 x i64>
+  store <4 x i64> zeroinitializer, <4 x i64>* undef, align 8
+  store <4 x i64> %sext, <4 x i64>* %out, align 8
+  ret void
+
+; AVX2: test2:
+; AVX2: vpmovsxbq
+}
+
+define void @test3(<4 x i8>* %in, <4 x i32>* %out) nounwind {
+  %wide.load35 = load <4 x i8>* %in, align 1
+  %sext = sext <4 x i8> %wide.load35 to <4 x i32>
+  store <4 x i32> zeroinitializer, <4 x i32>* undef, align 8
+  store <4 x i32> %sext, <4 x i32>* %out, align 8
+  ret void
+
+; SSE41: test3:
+; SSE41: pmovsxbd
+
+; AVX1: test3:
+; AVX1: vpmovsxbd
+
+; AVX2: test3:
+; AVX2: vpmovsxbd
+}
+
+define void @test4(<8 x i8>* %in, <8 x i32>* %out) nounwind {
+  %wide.load35 = load <8 x i8>* %in, align 1
+  %sext = sext <8 x i8> %wide.load35 to <8 x i32>
+  store <8 x i32> zeroinitializer, <8 x i32>* undef, align 8
+  store <8 x i32> %sext, <8 x i32>* %out, align 8
+  ret void
+
+; AVX2: test4:
+; AVX2: vpmovsxbd
+}
+
+define void @test5(<8 x i8>* %in, <8 x i16>* %out) nounwind {
+  %wide.load35 = load <8 x i8>* %in, align 1
+  %sext = sext <8 x i8> %wide.load35 to <8 x i16>
+  store <8 x i16> zeroinitializer, <8 x i16>* undef, align 8
+  store <8 x i16> %sext, <8 x i16>* %out, align 8
+  ret void
+
+; SSE41: test5:
+; SSE41: pmovsxbw
+
+; AVX1: test5:
+; AVX1: vpmovsxbw
+
+; AVX2: test5:
+; AVX2: vpmovsxbw
+}
+
+define void @test6(<16 x i8>* %in, <16 x i16>* %out) nounwind {
+  %wide.load35 = load <16 x i8>* %in, align 1
+  %sext = sext <16 x i8> %wide.load35 to <16 x i16>
+  store <16 x i16> zeroinitializer, <16 x i16>* undef, align 8
+  store <16 x i16> %sext, <16 x i16>* %out, align 8
+  ret void
+
+; AVX2: test6:
+; FIXME: v16i8 -> v16i16 is scalarized.
+; AVX2-NOT: pmovsx
+}
+
+define void @test7(<2 x i16>* %in, <2 x i64>* %out) nounwind {
+  %wide.load35 = load <2 x i16>* %in, align 1
+  %sext = sext <2 x i16> %wide.load35 to <2 x i64>
+  store <2 x i64> zeroinitializer, <2 x i64>* undef, align 8
+  store <2 x i64> %sext, <2 x i64>* %out, align 8
+  ret void
+
+
+; SSE41: test7:
+; SSE41: pmovsxwq
+
+; AVX1: test7:
+; AVX1: vpmovsxwq
+
+; AVX2: test7:
+; AVX2: vpmovsxwq
+}
+
+define void @test8(<4 x i16>* %in, <4 x i64>* %out) nounwind {
+  %wide.load35 = load <4 x i16>* %in, align 1
+  %sext = sext <4 x i16> %wide.load35 to <4 x i64>
+  store <4 x i64> zeroinitializer, <4 x i64>* undef, align 8
+  store <4 x i64> %sext, <4 x i64>* %out, align 8
+  ret void
+
+; AVX2: test8:
+; AVX2: vpmovsxwq
+}
+
+define void @test9(<4 x i16>* %in, <4 x i32>* %out) nounwind {
+  %wide.load35 = load <4 x i16>* %in, align 1
+  %sext = sext <4 x i16> %wide.load35 to <4 x i32>
+  store <4 x i32> zeroinitializer, <4 x i32>* undef, align 8
+  store <4 x i32> %sext, <4 x i32>* %out, align 8
+  ret void
+
+; SSE41: test9:
+; SSE41: pmovsxwd
+
+; AVX1: test9:
+; AVX1: vpmovsxwd
+
+; AVX2: test9:
+; AVX2: vpmovsxwd
+}
+
+define void @test10(<8 x i16>* %in, <8 x i32>* %out) nounwind {
+  %wide.load35 = load <8 x i16>* %in, align 1
+  %sext = sext <8 x i16> %wide.load35 to <8 x i32>
+  store <8 x i32> zeroinitializer, <8 x i32>* undef, align 8
+  store <8 x i32> %sext, <8 x i32>* %out, align 8
+  ret void
+
+; AVX2: test10:
+; AVX2: vpmovsxwd
+}
+
+define void @test11(<2 x i32>* %in, <2 x i64>* %out) nounwind {
+  %wide.load35 = load <2 x i32>* %in, align 1
+  %sext = sext <2 x i32> %wide.load35 to <2 x i64>
+  store <2 x i64> zeroinitializer, <2 x i64>* undef, align 8
+  store <2 x i64> %sext, <2 x i64>* %out, align 8
+  ret void
+
+; SSE41: test11:
+; SSE41: pmovsxdq
+
+; AVX1: test11:
+; AVX1: vpmovsxdq
+
+; AVX2: test11:
+; AVX2: vpmovsxdq
+}
+
+define void @test12(<4 x i32>* %in, <4 x i64>* %out) nounwind {
+  %wide.load35 = load <4 x i32>* %in, align 1
+  %sext = sext <4 x i32> %wide.load35 to <4 x i64>
+  store <4 x i64> zeroinitializer, <4 x i64>* undef, align 8
+  store <4 x i64> %sext, <4 x i64>* %out, align 8
+  ret void
+
+; AVX2: test12:
+; AVX2: vpmovsxdq
+}
diff --git a/test/CodeGen/X86/pointer-vector.ll b/test/CodeGen/X86/pointer-vector.ll
index 58423d195964..0ee99875264f 100644
--- a/test/CodeGen/X86/pointer-vector.ll
+++ b/test/CodeGen/X86/pointer-vector.ll
@@ -1,5 +1,5 @@
 ; RUN: llc < %s -mtriple=i686-linux -mcpu=corei7 | FileCheck %s
-; RUN: opt -instsimplify %s -disable-output
+; RUN: opt -instsimplify -disable-output < %s
 
 ;CHECK: SHUFF0
 define <8 x i32*> @SHUFF0(<4 x i32*> %ptrv) nounwind {
diff --git a/test/CodeGen/X86/pr10475.ll b/test/CodeGen/X86/pr10475.ll
new file mode 100644
index 000000000000..3efc39ee9f1f
--- /dev/null
+++ b/test/CodeGen/X86/pr10475.ll
@@ -0,0 +1,30 @@
+; RUN: llc < %s -mtriple=x86_64-pc-linux -mcpu=corei7-avx
+
+; No check in a crash test
+
+define void @autogen_262380_1000() {
+BB:
+  br label %CF79
+
+CF79:                                             ; preds = %CF79, %BB
+  br i1 undef, label %CF79, label %CF84.critedge.critedge
+
+CF84.critedge.critedge:                           ; preds = %CF79
+  %L35 = load <8 x i32>* undef
+  br label %CF85
+
+CF85:                                             ; preds = %CF85, %CF84.critedge.critedge
+  br i1 undef, label %CF85, label %CF86
+
+CF86:                                             ; preds = %CF86, %CF85
+  %B61 = sub <8 x i32> %L35, zeroinitializer
+  %S64 = icmp ne <8 x i32> %B61, zeroinitializer
+  %E73 = extractelement <8 x i1> %S64, i32 6
+  br i1 %E73, label %CF86, label %CF87
+
+CF87:                                             ; preds = %CF87, %CF86
+  br i1 undef, label %CF87, label %CF88
+
+CF88:                                             ; preds = %CF87
+  ret void
+}
diff --git a/test/CodeGen/X86/pr10499.ll b/test/CodeGen/X86/pr10499.ll
new file mode 100644
index 000000000000..f9cc747e49a8
--- /dev/null
+++ b/test/CodeGen/X86/pr10499.ll
@@ -0,0 +1,14 @@
+; RUN: llc < %s -mtriple=x86_64-pc-linux -mcpu=corei7-avx -mattr=-sse2
+
+; No check as PR10499 is a crashing bug.
+
+define void @autogen_24438_500() {
+BB:
+  %I = insertelement <8 x i32> undef, i32 -1, i32 4
+  %BC = bitcast <8 x i32> %I to <8 x float>
+  br label %CF
+
+CF:                                               ; preds = %CF, %BB
+  %ZE = fpext <8 x float> %BC to <8 x double>
+  br label %CF
+}
diff --git a/test/CodeGen/X86/pr10523.ll b/test/CodeGen/X86/pr10523.ll
new file mode 100644
index 000000000000..7191d6949c18
--- /dev/null
+++ b/test/CodeGen/X86/pr10523.ll
@@ -0,0 +1,18 @@
+; RUN: llc < %s -march=x86-64 -mattr=+sse2,+sse41
+
+; No check in a crash test
+
+define void @autogen_129334_5000() {
+BB:
+  %I74 = insertelement <32 x i32> undef, i32 undef, i32 15
+  %I105 = insertelement <32 x i32> undef, i32 undef, i32 14
+  %Shuff292 = shufflevector <32 x i32> %I74, <32 x i32> undef, <32 x i32> <i32 undef, i32 12, i32 14, i32 16, i32 undef, i32 20, i32 22, i32 24, i32 26, i32 28, i32 30, i32 undef, i32 34, i32 36, i32 38, i32 40, i32 42, i32 44, i32 46, i32 48, i32 50, i32 undef, i32 54, i32 undef, i32 undef, i32 undef, i32 undef, i32 0, i32 2, i32 4, i32 6, i32 8>
+  %Shuff302 = shufflevector <32 x i32> %Shuff292, <32 x i32> undef, <32 x i32> <i32 27, i32 29, i32 undef, i32 33, i32 undef, i32 37, i32 39, i32 undef, i32 undef, i32 undef, i32 47, i32 undef, i32 51, i32 53, i32 55, i32 57, i32 undef, i32 undef, i32 63, i32 1, i32 undef, i32 undef, i32 undef, i32 9, i32 11, i32 13, i32 undef, i32 17, i32 19, i32 21, i32 23, i32 undef>
+  %I326 = insertelement <32 x i32> undef, i32 undef, i32 15
+  %B338 = sub <32 x i32> zeroinitializer, %I105
+  %FC339 = sitofp <32 x i32> %I326 to <32 x double>
+  %S341 = icmp ne <32 x i32> %B338, undef
+  %E376 = extractelement <32 x i1> %S341, i32 0
+  %Shuff419 = shufflevector <32 x i32> undef, <32 x i32> %Shuff302, <32 x i32> <i32 undef, i32 44, i32 46, i32 48, i32 50, i32 52, i32 undef, i32 56, i32 58, i32 60, i32 62, i32 0, i32 2, i32 4, i32 6, i32 undef, i32 undef, i32 12, i32 14, i32 undef, i32 undef, i32 20, i32 22, i32 undef, i32 26, i32 28, i32 undef, i32 32, i32 34, i32 36, i32 38, i32 40>
+  ret void
+}
diff --git a/test/CodeGen/X86/pr10524.ll b/test/CodeGen/X86/pr10524.ll
new file mode 100644
index 000000000000..ed3e7c528052
--- /dev/null
+++ b/test/CodeGen/X86/pr10524.ll
@@ -0,0 +1,14 @@
+; RUN: llc < %s -march=x86-64 -mattr=+sse2,+sse41
+
+; No check in a crash test
+
+define void @autogen_178513_5000() {
+BB:
+  %Shuff22 = shufflevector <2 x i32> undef, <2 x i32> zeroinitializer, <2 x i32> <i32 3, i32 1>
+  %B26 = sub <2 x i32> %Shuff22, zeroinitializer
+  %S79 = icmp eq <2 x i32> %B26, zeroinitializer
+  %B269 = urem <2 x i1> zeroinitializer, %S79
+  %Se335 = sext <2 x i1> %B269 to <2 x i8>
+  store <2 x i8> %Se335, <2 x i8>* undef
+  ret void
+}
diff --git a/test/CodeGen/X86/pr10525.ll b/test/CodeGen/X86/pr10525.ll
new file mode 100644
index 000000000000..342c1d63e192
--- /dev/null
+++ b/test/CodeGen/X86/pr10525.ll
@@ -0,0 +1,13 @@
+; RUN: llc < %s -march=x86-64 -mattr=+sse2,+sse41
+
+; No check in a crash test
+
+define void @autogen_163411_5000() {
+BB:
+  %L = load <2 x i64>* undef
+  %Shuff11 = shufflevector <2 x i64> %L, <2 x i64> %L, <2 x i32> <i32 2, i32 0>
+  %I51 = insertelement <2 x i64> undef, i64 undef, i32 0
+  %Shuff152 = shufflevector <2 x i64> %I51, <2 x i64> %Shuff11, <2 x i32> <i32 1, i32 3>
+  store <2 x i64> %Shuff152, <2 x i64>* undef
+  ret void
+}
diff --git a/test/CodeGen/X86/pr10526.ll b/test/CodeGen/X86/pr10526.ll
new file mode 100644
index 000000000000..6963fe515898
--- /dev/null
+++ b/test/CodeGen/X86/pr10526.ll
@@ -0,0 +1,13 @@
+; RUN: llc < %s -march=x86-64 -mattr=+sse2,+sse41
+
+; No check in a crash test
+
+define void @autogen_142660_5000() {
+BB:
+  %Shuff49 = shufflevector <8 x i32> zeroinitializer, <8 x i32> undef, <8 x i32> <i32 2, i32 4, i32 undef, i32 8, i32 10, i32 12, i32 14, i32 0>
+  %B85 = sub <8 x i32> %Shuff49, zeroinitializer
+  %S242 = icmp eq <8 x i32> zeroinitializer, %B85
+  %FC284 = uitofp <8 x i1> %S242 to <8 x float>
+  store <8 x float> %FC284, <8 x float>* undef
+  ret void
+}
diff --git a/test/CodeGen/X86/pr11998.ll b/test/CodeGen/X86/pr11998.ll
new file mode 100644
index 000000000000..1baf07924d39
--- /dev/null
+++ b/test/CodeGen/X86/pr11998.ll
@@ -0,0 +1,18 @@
+; RUN: llc < %s -mcpu=corei7-avx -march=x86-64 -mattr=+avx
+
+define void @autogen_51367_5000(i8) {
+BB:
+  %B = srem i8 55, %0
+  %B9 = shl i8 %B, %B
+  br label %CF
+
+CF:                                               ; preds = %CF, %BB
+  br i1 undef, label %CF, label %CF403
+
+CF403:                                            ; preds = %CF403, %CF
+  %S44 = icmp eq i8 %B9, %0
+  br i1 %S44, label %CF403, label %CF405
+
+CF405:                                            ; preds = %CF405, %CF403
+  br label %CF405
+}
diff --git a/test/CodeGen/X86/pr14314.ll b/test/CodeGen/X86/pr14314.ll
index 5388a4b01b65..0832702244e5 100644
--- a/test/CodeGen/X86/pr14314.ll
+++ b/test/CodeGen/X86/pr14314.ll
@@ -5,9 +5,9 @@ entry:
   %0 = atomicrmw sub i64* %a, i64 %b seq_cst
   ret i64 %0
 ; CHECK: atomicSub
-; movl %eax, %ebx
-; subl {{%[a-z]+}}, %ebx
-; movl %edx, %ecx
-; sbbl {{%[a-z]+}}, %ecx
+; CHECK: movl %eax, %ebx
+; CHECK: subl {{%[a-z]+}}, %ebx
+; CHECK: movl %edx, %ecx
+; CHECK: sbbl {{%[a-z]+}}, %ecx
 ; CHECK: ret
 }
diff --git a/test/CodeGen/X86/pr14562.ll b/test/CodeGen/X86/pr14562.ll
new file mode 100644
index 000000000000..e66f1752a30f
--- /dev/null
+++ b/test/CodeGen/X86/pr14562.ll
@@ -0,0 +1,15 @@
+; RUN: llc < %s -march=x86 | FileCheck %s
+
+@temp1 = global i64 -77129852189294865, align 8
+
+define void @foo() nounwind {
+  %x = load i64* @temp1, align 8
+  %s = shl i64 %x, 32
+  %t = trunc i64 %s to i32
+  %z = zext i32 %t to i64
+  store i64 %z, i64* @temp1, align 8
+; CHECK: movl $0, {{_?}}temp1+4                                                
+; CHECK: movl $0, {{_?}}temp1
+  ret void
+}
+
diff --git a/test/CodeGen/X86/pr15267.ll b/test/CodeGen/X86/pr15267.ll
new file mode 100644
index 000000000000..c8aaf327a7dd
--- /dev/null
+++ b/test/CodeGen/X86/pr15267.ll
@@ -0,0 +1,66 @@
+; RUN: llc < %s -mtriple=x86_64-pc-linux -mcpu=corei7-avx | FileCheck %s
+
+define <4 x i3> @test1(<4 x i3>* %in) nounwind {
+  %ret = load <4 x i3>* %in, align 1
+  ret <4 x i3> %ret
+}
+
+; CHECK: test1
+; CHECK: movzwl
+; CHECK: shrl $3
+; CHECK: andl $7
+; CHECK: andl $7
+; CHECK: vmovd
+; CHECK: pinsrd $1
+; CHECK: shrl $6
+; CHECK: andl $7
+; CHECK: pinsrd $2
+; CHECK: shrl $9
+; CHECK: andl $7
+; CHECK: pinsrd $3
+; CHECK: ret
+
+define <4 x i1> @test2(<4 x i1>* %in) nounwind {
+  %ret = load <4 x i1>* %in, align 1
+  ret <4 x i1> %ret
+}
+
+; CHECK: test2
+; CHECK: movzbl
+; CHECK: shrl
+; CHECK: andl $1
+; CHECK: andl $1
+; CHECK: vmovd
+; CHECK: pinsrd $1
+; CHECK: shrl $2
+; CHECK: andl $1
+; CHECK: pinsrd $2
+; CHECK: shrl $3
+; CHECK: andl $1
+; CHECK: pinsrd $3
+; CHECK: ret
+
+define <4 x i64> @test3(<4 x i1>* %in) nounwind {
+  %wide.load35 = load <4 x i1>* %in, align 1
+  %sext = sext <4 x i1> %wide.load35 to <4 x i64>
+  ret <4 x i64> %sext
+}
+
+; CHECK: test3
+; CHECK: movzbl
+; CHECK: shrl
+; CHECK: andl $1
+; CHECK: andl $1
+; CHECK: vmovd
+; CHECK: pinsrd $1
+; CHECK: shrl $2
+; CHECK: andl $1
+; CHECK: pinsrd $2
+; CHECK: shrl $3
+; CHECK: andl $1
+; CHECK: pinsrd $3
+; CHECK: pslld
+; CHECK: psrad
+; CHECK: pmovsxdq
+; CHECK: pmovsxdq
+; CHECK: ret
diff --git a/test/CodeGen/X86/pr15296.ll b/test/CodeGen/X86/pr15296.ll
new file mode 100644
index 000000000000..1187d80cdf75
--- /dev/null
+++ b/test/CodeGen/X86/pr15296.ll
@@ -0,0 +1,46 @@
+; RUN: llc < %s -mtriple=i686-pc-linux -mcpu=corei7-avx | FileCheck %s
+
+define <8 x i32> @shiftInput___vyuunu(<8 x i32> %input, i32 %shiftval, <8 x i32> %__mask) nounwind {
+allocas:
+  %smear.0 = insertelement <8 x i32> undef, i32 %shiftval, i32 0
+  %smear.1 = insertelement <8 x i32> %smear.0, i32 %shiftval, i32 1
+  %smear.2 = insertelement <8 x i32> %smear.1, i32 %shiftval, i32 2
+  %smear.3 = insertelement <8 x i32> %smear.2, i32 %shiftval, i32 3
+  %smear.4 = insertelement <8 x i32> %smear.3, i32 %shiftval, i32 4
+  %smear.5 = insertelement <8 x i32> %smear.4, i32 %shiftval, i32 5
+  %smear.6 = insertelement <8 x i32> %smear.5, i32 %shiftval, i32 6
+  %smear.7 = insertelement <8 x i32> %smear.6, i32 %shiftval, i32 7
+  %bitop = lshr <8 x i32> %input, %smear.7
+  ret <8 x i32> %bitop
+}
+
+; CHECK: shiftInput___vyuunu
+; CHECK: psrld
+; CHECK: psrld
+; CHECK: ret
+
+define <8 x i32> @shiftInput___canonical(<8 x i32> %input, i32 %shiftval, <8 x i32> %__mask) nounwind {
+allocas:
+  %smear.0 = insertelement <8 x i32> undef, i32 %shiftval, i32 0
+  %smear.7 = shufflevector <8 x i32> %smear.0, <8 x i32> undef, <8 x i32> zeroinitializer
+  %bitop = lshr <8 x i32> %input, %smear.7
+  ret <8 x i32> %bitop
+}
+
+; CHECK: shiftInput___canonical
+; CHECK: psrld
+; CHECK: psrld
+; CHECK: ret
+
+define <4 x i64> @shiftInput___64in32bitmode(<4 x i64> %input, i64 %shiftval, <4 x i64> %__mask) nounwind {
+allocas:
+  %smear.0 = insertelement <4 x i64> undef, i64 %shiftval, i32 0
+  %smear.7 = shufflevector <4 x i64> %smear.0, <4 x i64> undef, <4 x i32> zeroinitializer
+  %bitop = lshr <4 x i64> %input, %smear.7
+  ret <4 x i64> %bitop
+}
+
+; CHECK: shiftInput___64in32bitmode
+; CHECK: psrlq
+; CHECK: psrlq
+; CHECK: ret
diff --git a/test/CodeGen/X86/pr15309.ll b/test/CodeGen/X86/pr15309.ll
new file mode 100644
index 000000000000..6dbbc72a7b7a
--- /dev/null
+++ b/test/CodeGen/X86/pr15309.ll
@@ -0,0 +1,15 @@
+; RUN: llc < %s -mtriple=i686-linux-pc -mcpu=corei7 | FileCheck %s
+
+define void @test_convert_float2_ulong2(<2 x i64>* nocapture %src, <2 x float>* nocapture %dest) noinline {
+L.entry:
+  %0 = getelementptr <2 x i64>* %src, i32 10
+  %1 = load <2 x i64>* %0, align 16
+  %2 = uitofp <2 x i64> %1 to <2 x float>
+  %3 = getelementptr <2 x float>* %dest, i32 10
+  store <2 x float> %2, <2 x float>* %3, align 8
+  ret void
+}
+
+; CHECK: test_convert_float2_ulong2
+; CHECK-NOT: cvtpd2ps
+; CHECK: ret
diff --git a/test/CodeGen/X86/pr3522.ll b/test/CodeGen/X86/pr3522.ll
index d8f37781fc6e..9f8dc0370668 100644
--- a/test/CodeGen/X86/pr3522.ll
+++ b/test/CodeGen/X86/pr3522.ll
@@ -1,3 +1,4 @@
+; REQUIRES: asserts
 ; RUN: llc < %s -march=x86 -stats 2>&1 | not grep "instructions sunk"
 ; PR3522
 
diff --git a/test/CodeGen/X86/pre-ra-sched.ll b/test/CodeGen/X86/pre-ra-sched.ll
new file mode 100644
index 000000000000..b792ffa09fb9
--- /dev/null
+++ b/test/CodeGen/X86/pre-ra-sched.ll
@@ -0,0 +1,56 @@
+; RUN: llc < %s -mtriple=x86_64-apple-macosx -debug-only=pre-RA-sched \
+; RUN:     2>&1 | FileCheck %s
+; REQUIRES: asserts
+;
+; rdar:13279013: pre-RA-sched should not check all interferences and
+; repush them on the ready queue after scheduling each instruction.
+;
+; CHECK: *** List Scheduling
+; CHECK: Interfering reg EFLAGS
+; CHECK: Repushing
+; CHECK: Repushing
+; CHECK: Repushing
+; CHECK-NOT: Repushing
+; CHECK: *** Final schedule
+define i32 @test(i8* %pin) #0 {
+  %g0 = getelementptr inbounds i8* %pin, i64 0
+  %l0 = load i8* %g0, align 1
+
+  %g1a = getelementptr inbounds i8* %pin, i64 1
+  %l1a = load i8* %g1a, align 1
+  %z1a = zext i8 %l1a to i32
+  %g1b = getelementptr inbounds i8* %pin, i64 2
+  %l1b = load i8* %g1b, align 1
+  %z1b = zext i8 %l1b to i32
+  %c1 = icmp ne i8 %l0, 0
+  %x1 = xor i32 %z1a, %z1b
+  %s1 = select i1 %c1, i32 %z1a, i32 %x1
+
+  %g2a = getelementptr inbounds i8* %pin, i64 3
+  %l2a = load i8* %g2a, align 1
+  %z2a = zext i8 %l2a to i32
+  %g2b = getelementptr inbounds i8* %pin, i64 4
+  %l2b = load i8* %g2b, align 1
+  %z2b = zext i8 %l2b to i32
+  %x2 = xor i32 %z2a, %z2b
+  %s2 = select i1 %c1, i32 %z2a, i32 %x2
+
+  %g3a = getelementptr inbounds i8* %pin, i64 5
+  %l3a = load i8* %g3a, align 1
+  %z3a = zext i8 %l3a to i32
+  %g3b = getelementptr inbounds i8* %pin, i64 6
+  %l3b = load i8* %g3b, align 1
+  %z3b = zext i8 %l3b to i32
+  %x3 = xor i32 %z3a, %z3b
+  %s3 = select i1 %c1, i32 %z3a, i32 %x3
+
+  %c3 = icmp ne i8 %l1a, 0
+  %c4 = icmp ne i8 %l2a, 0
+
+  %s4 = select i1 %c3, i32 %s1, i32 %s2
+  %s5 = select i1 %c4, i32 %s4, i32 %s3
+
+  ret i32 %s5
+}
+
+attributes #0 = { nounwind ssp uwtable }
diff --git a/test/CodeGen/X86/prefetch.ll b/test/CodeGen/X86/prefetch.ll
index ec2f302b1499..efb51913c5c1 100644
--- a/test/CodeGen/X86/prefetch.ll
+++ b/test/CodeGen/X86/prefetch.ll
@@ -1,5 +1,6 @@
 ; RUN: llc < %s -march=x86 -mattr=+sse | FileCheck %s
 ; RUN: llc < %s -march=x86 -mattr=+avx | FileCheck %s
+; RUN: llc < %s -march=x86 -mattr=+sse -mattr=+prfchw | FileCheck %s -check-prefix=PRFCHW
 
 ; rdar://10538297
 
@@ -9,10 +10,12 @@ entry:
 ; CHECK: prefetcht1
 ; CHECK: prefetcht0
 ; CHECK: prefetchnta
+; PRFCHW: prefetchw
 	tail call void @llvm.prefetch( i8* %ptr, i32 0, i32 1, i32 1 )
 	tail call void @llvm.prefetch( i8* %ptr, i32 0, i32 2, i32 1 )
 	tail call void @llvm.prefetch( i8* %ptr, i32 0, i32 3, i32 1 )
 	tail call void @llvm.prefetch( i8* %ptr, i32 0, i32 0, i32 1 )
+	tail call void @llvm.prefetch( i8* %ptr, i32 1, i32 3, i32 1 )
 	ret void
 }
 
diff --git a/test/CodeGen/X86/psubus.ll b/test/CodeGen/X86/psubus.ll
new file mode 100644
index 000000000000..aff4afbd2e35
--- /dev/null
+++ b/test/CodeGen/X86/psubus.ll
@@ -0,0 +1,340 @@
+; RUN: llc -mcpu=core2 < %s | FileCheck %s -check-prefix=SSE2
+; RUN: llc -mcpu=corei7-avx < %s | FileCheck %s -check-prefix=AVX1
+; RUN: llc -mcpu=core-avx2 < %s | FileCheck %s -check-prefix=AVX2
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-apple-macosx10.8.0"
+
+define void @test1(i16* nocapture %head) nounwind {
+vector.ph:
+  br label %vector.body
+
+vector.body:                                      ; preds = %vector.body, %vector.ph
+  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
+  %0 = getelementptr inbounds i16* %head, i64 %index
+  %1 = bitcast i16* %0 to <8 x i16>*
+  %2 = load <8 x i16>* %1, align 2
+  %3 = icmp slt <8 x i16> %2, zeroinitializer
+  %4 = xor <8 x i16> %2, <i16 -32768, i16 -32768, i16 -32768, i16 -32768, i16 -32768, i16 -32768, i16 -32768, i16 -32768>
+  %5 = select <8 x i1> %3, <8 x i16> %4, <8 x i16> zeroinitializer
+  store <8 x i16> %5, <8 x i16>* %1, align 2
+  %index.next = add i64 %index, 8
+  %6 = icmp eq i64 %index.next, 16384
+  br i1 %6, label %for.end, label %vector.body
+
+for.end:                                          ; preds = %vector.body
+  ret void
+
+; SSE2: @test1
+; SSE2: psubusw LCPI0_0(%rip), %xmm0
+
+; AVX1: @test1
+; AVX1: vpsubusw LCPI0_0(%rip), %xmm0, %xmm0
+
+; AVX2: @test1
+; AVX2: vpsubusw LCPI0_0(%rip), %xmm0, %xmm0
+}
+
+define void @test2(i16* nocapture %head) nounwind {
+vector.ph:
+  br label %vector.body
+
+vector.body:                                      ; preds = %vector.body, %vector.ph
+  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
+  %0 = getelementptr inbounds i16* %head, i64 %index
+  %1 = bitcast i16* %0 to <8 x i16>*
+  %2 = load <8 x i16>* %1, align 2
+  %3 = icmp ugt <8 x i16> %2, <i16 32766, i16 32766, i16 32766, i16 32766, i16 32766, i16 32766, i16 32766, i16 32766>
+  %4 = add <8 x i16> %2, <i16 -32767, i16 -32767, i16 -32767, i16 -32767, i16 -32767, i16 -32767, i16 -32767, i16 -32767>
+  %5 = select <8 x i1> %3, <8 x i16> %4, <8 x i16> zeroinitializer
+  store <8 x i16> %5, <8 x i16>* %1, align 2
+  %index.next = add i64 %index, 8
+  %6 = icmp eq i64 %index.next, 16384
+  br i1 %6, label %for.end, label %vector.body
+
+for.end:                                          ; preds = %vector.body
+  ret void
+
+; SSE2: @test2
+; SSE2: psubusw LCPI1_0(%rip), %xmm0
+
+; AVX1: @test2
+; AVX1: vpsubusw LCPI1_0(%rip), %xmm0, %xmm0
+
+; AVX2: @test2
+; AVX2: vpsubusw LCPI1_0(%rip), %xmm0, %xmm0
+}
+
+define void @test3(i16* nocapture %head, i16 zeroext %w) nounwind {
+vector.ph:
+  %0 = insertelement <8 x i16> undef, i16 %w, i32 0
+  %broadcast15 = shufflevector <8 x i16> %0, <8 x i16> undef, <8 x i32> zeroinitializer
+  br label %vector.body
+
+vector.body:                                      ; preds = %vector.body, %vector.ph
+  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
+  %1 = getelementptr inbounds i16* %head, i64 %index
+  %2 = bitcast i16* %1 to <8 x i16>*
+  %3 = load <8 x i16>* %2, align 2
+  %4 = icmp ult <8 x i16> %3, %broadcast15
+  %5 = sub <8 x i16> %3, %broadcast15
+  %6 = select <8 x i1> %4, <8 x i16> zeroinitializer, <8 x i16> %5
+  store <8 x i16> %6, <8 x i16>* %2, align 2
+  %index.next = add i64 %index, 8
+  %7 = icmp eq i64 %index.next, 16384
+  br i1 %7, label %for.end, label %vector.body
+
+for.end:                                          ; preds = %vector.body
+  ret void
+
+; SSE2: @test3
+; SSE2: psubusw %xmm0, %xmm1
+
+; AVX1: @test3
+; AVX1: vpsubusw %xmm0, %xmm1, %xmm1
+
+; AVX2: @test3
+; AVX2: vpsubusw %xmm0, %xmm1, %xmm1
+}
+
+define void @test4(i8* nocapture %head) nounwind {
+vector.ph:
+  br label %vector.body
+
+vector.body:                                      ; preds = %vector.body, %vector.ph
+  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
+  %0 = getelementptr inbounds i8* %head, i64 %index
+  %1 = bitcast i8* %0 to <16 x i8>*
+  %2 = load <16 x i8>* %1, align 1
+  %3 = icmp slt <16 x i8> %2, zeroinitializer
+  %4 = xor <16 x i8> %2, <i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128>
+  %5 = select <16 x i1> %3, <16 x i8> %4, <16 x i8> zeroinitializer
+  store <16 x i8> %5, <16 x i8>* %1, align 1
+  %index.next = add i64 %index, 16
+  %6 = icmp eq i64 %index.next, 16384
+  br i1 %6, label %for.end, label %vector.body
+
+for.end:                                          ; preds = %vector.body
+  ret void
+
+; SSE2: @test4
+; SSE2: psubusb LCPI3_0(%rip), %xmm0
+
+; AVX1: @test4
+; AVX1: vpsubusb LCPI3_0(%rip), %xmm0, %xmm0
+
+; AVX2: @test4
+; AVX2: vpsubusb LCPI3_0(%rip), %xmm0, %xmm0
+}
+
+define void @test5(i8* nocapture %head) nounwind {
+vector.ph:
+  br label %vector.body
+
+vector.body:                                      ; preds = %vector.body, %vector.ph
+  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
+  %0 = getelementptr inbounds i8* %head, i64 %index
+  %1 = bitcast i8* %0 to <16 x i8>*
+  %2 = load <16 x i8>* %1, align 1
+  %3 = icmp ugt <16 x i8> %2, <i8 126, i8 126, i8 126, i8 126, i8 126, i8 126, i8 126, i8 126, i8 126, i8 126, i8 126, i8 126, i8 126, i8 126, i8 126, i8 126>
+  %4 = add <16 x i8> %2, <i8 -127, i8 -127, i8 -127, i8 -127, i8 -127, i8 -127, i8 -127, i8 -127, i8 -127, i8 -127, i8 -127, i8 -127, i8 -127, i8 -127, i8 -127, i8 -127>
+  %5 = select <16 x i1> %3, <16 x i8> %4, <16 x i8> zeroinitializer
+  store <16 x i8> %5, <16 x i8>* %1, align 1
+  %index.next = add i64 %index, 16
+  %6 = icmp eq i64 %index.next, 16384
+  br i1 %6, label %for.end, label %vector.body
+
+for.end:                                          ; preds = %vector.body
+  ret void
+
+; SSE2: @test5
+; SSE2: psubusb LCPI4_0(%rip), %xmm0
+
+; AVX1: @test5
+; AVX1: vpsubusb LCPI4_0(%rip), %xmm0, %xmm0
+
+; AVX2: @test5
+; AVX2: vpsubusb LCPI4_0(%rip), %xmm0, %xmm0
+}
+
+define void @test6(i8* nocapture %head, i8 zeroext %w) nounwind {
+vector.ph:
+  %0 = insertelement <16 x i8> undef, i8 %w, i32 0
+  %broadcast15 = shufflevector <16 x i8> %0, <16 x i8> undef, <16 x i32> zeroinitializer
+  br label %vector.body
+
+vector.body:                                      ; preds = %vector.body, %vector.ph
+  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
+  %1 = getelementptr inbounds i8* %head, i64 %index
+  %2 = bitcast i8* %1 to <16 x i8>*
+  %3 = load <16 x i8>* %2, align 1
+  %4 = icmp ult <16 x i8> %3, %broadcast15
+  %5 = sub <16 x i8> %3, %broadcast15
+  %6 = select <16 x i1> %4, <16 x i8> zeroinitializer, <16 x i8> %5
+  store <16 x i8> %6, <16 x i8>* %2, align 1
+  %index.next = add i64 %index, 16
+  %7 = icmp eq i64 %index.next, 16384
+  br i1 %7, label %for.end, label %vector.body
+
+for.end:                                          ; preds = %vector.body
+  ret void
+
+; SSE2: @test6
+; SSE2: psubusb %xmm0, %xmm1
+
+; AVX1: @test6
+; AVX1: vpsubusb %xmm0, %xmm1, %xmm1
+
+; AVX2: @test6
+; AVX2: vpsubusb %xmm0, %xmm1, %xmm1
+}
+
+define void @test7(i16* nocapture %head) nounwind {
+vector.ph:
+  br label %vector.body
+
+vector.body:                                      ; preds = %vector.body, %vector.ph
+  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
+  %0 = getelementptr inbounds i16* %head, i64 %index
+  %1 = bitcast i16* %0 to <16 x i16>*
+  %2 = load <16 x i16>* %1, align 2
+  %3 = icmp slt <16 x i16> %2, zeroinitializer
+  %4 = xor <16 x i16> %2, <i16 -32768, i16 -32768, i16 -32768, i16 -32768, i16 -32768, i16 -32768, i16 -32768, i16 -32768, i16 -32768, i16 -32768, i16 -32768, i16 -32768, i16 -32768, i16 -32768, i16 -32768, i16 -32768>
+  %5 = select <16 x i1> %3, <16 x i16> %4, <16 x i16> zeroinitializer
+  store <16 x i16> %5, <16 x i16>* %1, align 2
+  %index.next = add i64 %index, 8
+  %6 = icmp eq i64 %index.next, 16384
+  br i1 %6, label %for.end, label %vector.body
+
+for.end:                                          ; preds = %vector.body
+  ret void
+
+; AVX2: @test7
+; AVX2: vpsubusw LCPI6_0(%rip), %ymm0, %ymm0
+}
+
+define void @test8(i16* nocapture %head) nounwind {
+vector.ph:
+  br label %vector.body
+
+vector.body:                                      ; preds = %vector.body, %vector.ph
+  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
+  %0 = getelementptr inbounds i16* %head, i64 %index
+  %1 = bitcast i16* %0 to <16 x i16>*
+  %2 = load <16 x i16>* %1, align 2
+  %3 = icmp ugt <16 x i16> %2, <i16 32766, i16 32766, i16 32766, i16 32766, i16 32766, i16 32766, i16 32766, i16 32766, i16 32766, i16 32766, i16 32766, i16 32766, i16 32766, i16 32766, i16 32766, i16 32766>
+  %4 = add <16 x i16> %2, <i16 -32767, i16 -32767, i16 -32767, i16 -32767, i16 -32767, i16 -32767, i16 -32767, i16 -32767, i16 -32767, i16 -32767, i16 -32767, i16 -32767, i16 -32767, i16 -32767, i16 -32767, i16 -32767>
+  %5 = select <16 x i1> %3, <16 x i16> %4, <16 x i16> zeroinitializer
+  store <16 x i16> %5, <16 x i16>* %1, align 2
+  %index.next = add i64 %index, 8
+  %6 = icmp eq i64 %index.next, 16384
+  br i1 %6, label %for.end, label %vector.body
+
+for.end:                                          ; preds = %vector.body
+  ret void
+
+; AVX2: @test8
+; AVX2: vpsubusw LCPI7_0(%rip), %ymm0, %ymm0
+}
+
+define void @test9(i16* nocapture %head, i16 zeroext %w) nounwind {
+vector.ph:
+  %0 = insertelement <16 x i16> undef, i16 %w, i32 0
+  %broadcast15 = shufflevector <16 x i16> %0, <16 x i16> undef, <16 x i32> zeroinitializer
+  br label %vector.body
+
+vector.body:                                      ; preds = %vector.body, %vector.ph
+  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
+  %1 = getelementptr inbounds i16* %head, i64 %index
+  %2 = bitcast i16* %1 to <16 x i16>*
+  %3 = load <16 x i16>* %2, align 2
+  %4 = icmp ult <16 x i16> %3, %broadcast15
+  %5 = sub <16 x i16> %3, %broadcast15
+  %6 = select <16 x i1> %4, <16 x i16> zeroinitializer, <16 x i16> %5
+  store <16 x i16> %6, <16 x i16>* %2, align 2
+  %index.next = add i64 %index, 8
+  %7 = icmp eq i64 %index.next, 16384
+  br i1 %7, label %for.end, label %vector.body
+
+for.end:                                          ; preds = %vector.body
+  ret void
+
+
+; AVX2: @test9
+; AVX2: vpsubusw %ymm0, %ymm1, %ymm1
+}
+
+define void @test10(i8* nocapture %head) nounwind {
+vector.ph:
+  br label %vector.body
+
+vector.body:                                      ; preds = %vector.body, %vector.ph
+  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
+  %0 = getelementptr inbounds i8* %head, i64 %index
+  %1 = bitcast i8* %0 to <32 x i8>*
+  %2 = load <32 x i8>* %1, align 1
+  %3 = icmp slt <32 x i8> %2, zeroinitializer
+  %4 = xor <32 x i8> %2, <i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128>
+  %5 = select <32 x i1> %3, <32 x i8> %4, <32 x i8> zeroinitializer
+  store <32 x i8> %5, <32 x i8>* %1, align 1
+  %index.next = add i64 %index, 16
+  %6 = icmp eq i64 %index.next, 16384
+  br i1 %6, label %for.end, label %vector.body
+
+for.end:                                          ; preds = %vector.body
+  ret void
+
+
+; AVX2: @test10
+; AVX2: vpsubusb LCPI9_0(%rip), %ymm0, %ymm0
+}
+
+define void @test11(i8* nocapture %head) nounwind {
+vector.ph:
+  br label %vector.body
+
+vector.body:                                      ; preds = %vector.body, %vector.ph
+  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
+  %0 = getelementptr inbounds i8* %head, i64 %index
+  %1 = bitcast i8* %0 to <32 x i8>*
+  %2 = load <32 x i8>* %1, align 1
+  %3 = icmp ugt <32 x i8> %2, <i8 126, i8 126, i8 126, i8 126, i8 126, i8 126, i8 126, i8 126, i8 126, i8 126, i8 126, i8 126, i8 126, i8 126, i8 126, i8 126, i8 126, i8 126, i8 126, i8 126, i8 126, i8 126, i8 126, i8 126, i8 126, i8 126, i8 126, i8 126, i8 126, i8 126, i8 126, i8 126>
+  %4 = add <32 x i8> %2, <i8 -127, i8 -127, i8 -127, i8 -127, i8 -127, i8 -127, i8 -127, i8 -127, i8 -127, i8 -127, i8 -127, i8 -127, i8 -127, i8 -127, i8 -127, i8 -127, i8 -127, i8 -127, i8 -127, i8 -127, i8 -127, i8 -127, i8 -127, i8 -127, i8 -127, i8 -127, i8 -127, i8 -127, i8 -127, i8 -127, i8 -127, i8 -127>
+  %5 = select <32 x i1> %3, <32 x i8> %4, <32 x i8> zeroinitializer
+  store <32 x i8> %5, <32 x i8>* %1, align 1
+  %index.next = add i64 %index, 16
+  %6 = icmp eq i64 %index.next, 16384
+  br i1 %6, label %for.end, label %vector.body
+
+for.end:                                          ; preds = %vector.body
+  ret void
+
+; AVX2: @test11
+; AVX2: vpsubusb LCPI10_0(%rip), %ymm0, %ymm0
+}
+
+define void @test12(i8* nocapture %head, i8 zeroext %w) nounwind {
+vector.ph:
+  %0 = insertelement <32 x i8> undef, i8 %w, i32 0
+  %broadcast15 = shufflevector <32 x i8> %0, <32 x i8> undef, <32 x i32> zeroinitializer
+  br label %vector.body
+
+vector.body:                                      ; preds = %vector.body, %vector.ph
+  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
+  %1 = getelementptr inbounds i8* %head, i64 %index
+  %2 = bitcast i8* %1 to <32 x i8>*
+  %3 = load <32 x i8>* %2, align 1
+  %4 = icmp ult <32 x i8> %3, %broadcast15
+  %5 = sub <32 x i8> %3, %broadcast15
+  %6 = select <32 x i1> %4, <32 x i8> zeroinitializer, <32 x i8> %5
+  store <32 x i8> %6, <32 x i8>* %2, align 1
+  %index.next = add i64 %index, 16
+  %7 = icmp eq i64 %index.next, 16384
+  br i1 %7, label %for.end, label %vector.body
+
+for.end:                                          ; preds = %vector.body
+  ret void
+
+; AVX2: @test12
+; AVX2: vpsubusb %ymm0, %ymm1, %ymm1
+}
diff --git a/test/CodeGen/X86/rdrand.ll b/test/CodeGen/X86/rdrand.ll
index e2224a619676..98f407776381 100644
--- a/test/CodeGen/X86/rdrand.ll
+++ b/test/CodeGen/X86/rdrand.ll
@@ -39,7 +39,7 @@ define i32 @_rdrand64_step(i64* %random_val) {
   %isvalid = extractvalue {i64, i32} %call, 1
   ret i32 %isvalid
 ; CHECK: _rdrand64_step:
-; CHECK: rdrandq	%r[[T1:[[a-z]+]]
+; CHECK: rdrandq	%r[[T1:[a-z]+]]
 ; CHECK: movq	%r[[T1]], (%r[[A0]])
 ; CHECK: movl	$1, %eax
 ; CHECK: cmovael	%e[[T1]], %eax
diff --git a/test/CodeGen/X86/rdseed.ll b/test/CodeGen/X86/rdseed.ll
new file mode 100644
index 000000000000..35de7ebf7430
--- /dev/null
+++ b/test/CodeGen/X86/rdseed.ll
@@ -0,0 +1,48 @@
+; RUN: llc < %s -march=x86-64 -mcpu=core-avx-i -mattr=+rdseed | FileCheck %s
+
+declare {i16, i32} @llvm.x86.rdseed.16()
+declare {i32, i32} @llvm.x86.rdseed.32()
+declare {i64, i32} @llvm.x86.rdseed.64()
+
+define i32 @_rdseed16_step(i16* %random_val) {
+  %call = call {i16, i32} @llvm.x86.rdseed.16()
+  %randval = extractvalue {i16, i32} %call, 0
+  store i16 %randval, i16* %random_val
+  %isvalid = extractvalue {i16, i32} %call, 1
+  ret i32 %isvalid
+; CHECK: _rdseed16_step:
+; CHECK: rdseedw	%ax
+; CHECK: movw	%ax, (%r[[A0:di|cx]])
+; CHECK: movzwl	%ax, %ecx
+; CHECK: movl	$1, %eax
+; CHECK: cmovael	%ecx, %eax
+; CHECK: ret
+}
+
+define i32 @_rdseed32_step(i32* %random_val) {
+  %call = call {i32, i32} @llvm.x86.rdseed.32()
+  %randval = extractvalue {i32, i32} %call, 0
+  store i32 %randval, i32* %random_val
+  %isvalid = extractvalue {i32, i32} %call, 1
+  ret i32 %isvalid
+; CHECK: _rdseed32_step:
+; CHECK: rdseedl	%e[[T0:[a-z]+]]
+; CHECK: movl	%e[[T0]], (%r[[A0]])
+; CHECK: movl	$1, %eax
+; CHECK: cmovael	%e[[T0]], %eax
+; CHECK: ret
+}
+
+define i32 @_rdseed64_step(i64* %random_val) {
+  %call = call {i64, i32} @llvm.x86.rdseed.64()
+  %randval = extractvalue {i64, i32} %call, 0
+  store i64 %randval, i64* %random_val
+  %isvalid = extractvalue {i64, i32} %call, 1
+  ret i32 %isvalid
+; CHECK: _rdseed64_step:
+; CHECK: rdseedq	%r[[T1:[a-z]+]]
+; CHECK: movq	%r[[T1]], (%r[[A0]])
+; CHECK: movl	$1, %eax
+; CHECK: cmovael	%e[[T1]], %eax
+; CHECK: ret
+}
diff --git a/test/CodeGen/X86/regpressure.ll b/test/CodeGen/X86/regpressure.ll
index 52d7b56f182e..1f756bee8a9d 100644
--- a/test/CodeGen/X86/regpressure.ll
+++ b/test/CodeGen/X86/regpressure.ll
@@ -1,3 +1,4 @@
+; REQUIRES: asserts
 ;; Both functions in this testcase should codegen to the same function, and
 ;; neither of them should require spilling anything to the stack.
 
diff --git a/test/CodeGen/X86/ret-mmx.ll b/test/CodeGen/X86/ret-mmx.ll
index 865e147a4a24..778e4722cd95 100644
--- a/test/CodeGen/X86/ret-mmx.ll
+++ b/test/CodeGen/X86/ret-mmx.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -mtriple=x86_64-apple-darwin11 -mattr=+mmx,+sse2 | FileCheck %s
+; RUN: llc < %s -mtriple=x86_64-apple-darwin11 -mcpu=core2 -mattr=+mmx,+sse2 | FileCheck %s
 ; rdar://6602459
 
 @g_v1di = external global <1 x i64>
diff --git a/test/CodeGen/X86/rip-rel-lea.ll b/test/CodeGen/X86/rip-rel-lea.ll
new file mode 100644
index 000000000000..71dacf60caa1
--- /dev/null
+++ b/test/CodeGen/X86/rip-rel-lea.ll
@@ -0,0 +1,16 @@
+; RUN: llc < %s -mtriple=x86_64-pc-linux-gnu -relocation-model=pic | FileCheck %s -check-prefix=PIC64
+; RUN: llc < %s -mtriple=x86_64-pc-linux-gnux32 -relocation-model=pic | FileCheck %s -check-prefix=PICX32
+; RUN: llc < %s -mtriple=i686-pc-linux-gnu -relocation-model=pic | FileCheck %s -check-prefix=PIC32
+
+; Use %rip-relative addressing even in static mode on x86-64, because
+; it has a smaller encoding.
+
+@a = internal global double 3.4
+define double* @foo() nounwind {
+  %a = getelementptr double* @a, i64 0
+  ret double* %a
+  
+; PIC64:    leaq	a(%rip)
+; PICX32:   leal	a(%rip)
+; PIC32:    leal	a@GOTOFF(%eax)
+}
diff --git a/test/CodeGen/X86/sandybridge-loads.ll b/test/CodeGen/X86/sandybridge-loads.ll
new file mode 100644
index 000000000000..5a23cf136d85
--- /dev/null
+++ b/test/CodeGen/X86/sandybridge-loads.ll
@@ -0,0 +1,39 @@
+; RUN: llc -march=x86-64 -mtriple=x86_64-apple-darwin -mcpu=corei7-avx -o - < %s | FileCheck %s
+
+;CHECK: wideloads
+;CHECK: vmovaps
+;CHECK: vinsertf128
+;CHECK: vmovaps
+;CHECK-NOT: vinsertf128
+;CHECK: ret
+
+define void @wideloads(<8 x float>* %a, <8 x float>* %b, <8 x float>* %c) nounwind uwtable noinline ssp {
+  %v0 = load <8 x float>* %a, align 16  ; <---- unaligned!
+  %v1 = load <8 x float>* %b, align 32  ; <---- aligned!
+  %m0 = fcmp olt <8 x float> %v1, %v0
+  %v2 = load <8 x float>* %c, align 32  ; <---- aligned!
+  %m1 = fcmp olt <8 x float> %v2, %v0
+  %mand = and <8 x i1> %m1, %m0
+  %r = zext <8 x i1> %mand to <8 x i32>
+  store <8 x i32> %r, <8 x i32>* undef, align 32
+  ret void
+}
+
+; CHECK: widestores
+; loads:
+; CHECK: vmovaps
+; CHECK: vmovaps
+; stores:
+; CHECK: vmovaps
+; CHECK: vextractf128
+; CHECK: vmovaps
+;CHECK: ret
+
+define void @widestores(<8 x float>* %a, <8 x float>* %b, <8 x float>* %c) nounwind uwtable noinline ssp {
+  %v0 = load <8 x float>* %a, align 32
+  %v1 = load <8 x float>* %b, align 32
+  store <8 x float> %v0, <8 x float>* %b, align 32 ; <--- aligned
+  store <8 x float> %v1, <8 x float>* %a, align 16 ; <--- unaligned
+  ret void
+}
+
diff --git a/test/CodeGen/X86/select.ll b/test/CodeGen/X86/select.ll
index 3bec3acdbf76..09ca07b31a10 100644
--- a/test/CodeGen/X86/select.ll
+++ b/test/CodeGen/X86/select.ll
@@ -282,7 +282,7 @@ define i32 @test13(i32 %a, i32 %b) nounwind {
 ; ATOM: test13:
 ; ATOM: cmpl
 ; ATOM-NEXT: sbbl
-; ATOM-NEXT: ret
+; ATOM: ret
 }
 
 define i32 @test14(i32 %a, i32 %b) nounwind {
@@ -299,7 +299,7 @@ define i32 @test14(i32 %a, i32 %b) nounwind {
 ; ATOM: cmpl
 ; ATOM-NEXT: sbbl
 ; ATOM-NEXT: notl
-; ATOM-NEXT: ret
+; ATOM: ret
 }
 
 ; rdar://10961709
diff --git a/test/CodeGen/X86/sext-load.ll b/test/CodeGen/X86/sext-load.ll
index c9b39d3a489e..58c93229a2c0 100644
--- a/test/CodeGen/X86/sext-load.ll
+++ b/test/CodeGen/X86/sext-load.ll
@@ -1,9 +1,30 @@
-; RUN: llc < %s -march=x86 | grep movsbl
+; RUN: llc < %s -march=x86 | FileCheck %s
 
-define i32 @foo(i32 %X) nounwind  {
+; When doing sign extension, use the sext-load lowering to take advantage of
+; x86's sign extension during loads.
+;
+; CHECK: test1:
+; CHECK:      movsbl {{.*}}, %eax
+; CHECK-NEXT: ret
+define i32 @test1(i32 %X) nounwind  {
 entry:
 	%tmp12 = trunc i32 %X to i8		; <i8> [#uses=1]
 	%tmp123 = sext i8 %tmp12 to i32		; <i32> [#uses=1]
 	ret i32 %tmp123
 }
 
+; When using a sextload representation, ensure that the sign extension is
+; preserved even when removing shifted-out low bits.
+;
+; CHECK: test2:
+; CHECK:      movswl {{.*}}, %eax
+; CHECK-NEXT: ret
+define i32 @test2({i16, [6 x i8]}* %this) {
+entry:
+  %b48 = getelementptr inbounds { i16, [6 x i8] }* %this, i32 0, i32 1
+  %cast = bitcast [6 x i8]* %b48 to i48*
+  %bf.load = load i48* %cast, align 2
+  %bf.ashr = ashr i48 %bf.load, 32
+  %bf.cast = trunc i48 %bf.ashr to i32
+  ret i32 %bf.cast
+}
diff --git a/test/CodeGen/X86/sibcall.ll b/test/CodeGen/X86/sibcall.ll
index 2af355905dc3..ceb79ea927a1 100644
--- a/test/CodeGen/X86/sibcall.ll
+++ b/test/CodeGen/X86/sibcall.ll
@@ -1,5 +1,5 @@
-; RUN: llc < %s -mtriple=i686-linux   -mattr=+sse2 -asm-verbose=false | FileCheck %s -check-prefix=32
-; RUN: llc < %s -mtriple=x86_64-linux -mattr=+sse2 -asm-verbose=false | FileCheck %s -check-prefix=64
+; RUN: llc < %s -mtriple=i686-linux   -mcpu=core2 -mattr=+sse2 -asm-verbose=false | FileCheck %s -check-prefix=32
+; RUN: llc < %s -mtriple=x86_64-linux -mcpu=core2 -mattr=+sse2 -asm-verbose=false | FileCheck %s -check-prefix=64
 
 define void @t1(i32 %x) nounwind ssp {
 entry:
diff --git a/test/CodeGen/X86/sincos-opt.ll b/test/CodeGen/X86/sincos-opt.ll
new file mode 100644
index 000000000000..f364d1fc2dc8
--- /dev/null
+++ b/test/CodeGen/X86/sincos-opt.ll
@@ -0,0 +1,66 @@
+; RUN: llc < %s -mtriple=x86_64-apple-macosx10.9.0 -mcpu=core2 | FileCheck %s --check-prefix=OSX_SINCOS
+; RUN: llc < %s -mtriple=x86_64-apple-macosx10.8.0 -mcpu=core2 | FileCheck %s --check-prefix=OSX_NOOPT
+; RUN: llc < %s -mtriple=x86_64-pc-linux-gnu -mcpu=core2 -enable-unsafe-fp-math | FileCheck %s --check-prefix=GNU_SINCOS
+
+; Combine sin / cos into a single call.
+; rdar://13087969
+
+define float @test1(float %x) nounwind {
+entry:
+; GNU_SINCOS: test1:
+; GNU_SINCOS: callq sincosf
+; GNU_SINCOS: movss 4(%rsp), %xmm0
+; GNU_SINCOS: addss (%rsp), %xmm0
+
+; OSX_SINCOS: test1:
+; OSX_SINCOS: callq ___sincosf_stret
+; OSX_SINCOS: addss %xmm1, %xmm0
+
+; OSX_NOOPT: test1
+; OSX_NOOPT: callq _cosf
+; OSX_NOOPT: callq _sinf
+  %call = tail call float @sinf(float %x) nounwind readnone
+  %call1 = tail call float @cosf(float %x) nounwind readnone
+  %add = fadd float %call, %call1
+  ret float %add
+}
+
+define double @test2(double %x) nounwind {
+entry:
+; GNU_SINCOS: test2:
+; GNU_SINCOS: callq sincos
+; GNU_SINCOS: movsd 16(%rsp), %xmm0
+; GNU_SINCOS: addsd 8(%rsp), %xmm0
+
+; OSX_SINCOS: test2:
+; OSX_SINCOS: callq ___sincos_stret
+; OSX_SINCOS: addsd %xmm1, %xmm0
+
+; OSX_NOOPT: test2
+; OSX_NOOPT: callq _cos
+; OSX_NOOPT: callq _sin
+  %call = tail call double @sin(double %x) nounwind readnone
+  %call1 = tail call double @cos(double %x) nounwind readnone
+  %add = fadd double %call, %call1
+  ret double %add
+}
+
+define x86_fp80 @test3(x86_fp80 %x) nounwind {
+entry:
+; GNU_SINCOS: test3:
+; GNU_SINCOS: callq sinl
+; GNU_SINCOS: callq cosl
+; GNU_SINCOS: ret
+  %call = tail call x86_fp80 @sinl(x86_fp80 %x) nounwind
+  %call1 = tail call x86_fp80 @cosl(x86_fp80 %x) nounwind
+  %add = fadd x86_fp80 %call, %call1
+  ret x86_fp80 %add
+}
+
+declare float  @sinf(float) readonly
+declare double @sin(double) readonly
+declare float @cosf(float) readonly
+declare double @cos(double) readonly
+
+declare x86_fp80 @sinl(x86_fp80)
+declare x86_fp80 @cosl(x86_fp80)
diff --git a/test/CodeGen/X86/sink-hoist.ll b/test/CodeGen/X86/sink-hoist.ll
index 649cd61ab78c..2aca5b897d35 100644
--- a/test/CodeGen/X86/sink-hoist.ll
+++ b/test/CodeGen/X86/sink-hoist.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=x86-64 -asm-verbose=false -mtriple=x86_64-unknown-linux-gnu -mcpu=nehalem -post-RA-scheduler=true | FileCheck %s
+; RUN: llc < %s -march=x86-64 -asm-verbose=false -mtriple=x86_64-unknown-linux-gnu -mcpu=nehalem -post-RA-scheduler=true -schedmodel=false | FileCheck %s
 
 ; Currently, floating-point selects are lowered to CFG triangles.
 ; This means that one side of the select is always unconditionally
diff --git a/test/CodeGen/X86/sse-align-2.ll b/test/CodeGen/X86/sse-align-2.ll
index 102c3fb06cd7..22cd7723068c 100644
--- a/test/CodeGen/X86/sse-align-2.ll
+++ b/test/CodeGen/X86/sse-align-2.ll
@@ -1,12 +1,21 @@
-; RUN: llc < %s -march=x86-64 | grep movup | count 2
+; RUN: llc < %s -march=x86-64 -mcpu=penryn | FileCheck %s
 
 define <4 x float> @foo(<4 x float>* %p, <4 x float> %x) nounwind {
   %t = load <4 x float>* %p, align 4
   %z = fmul <4 x float> %t, %x
   ret <4 x float> %z
 }
+
+; CHECK: foo:
+; CHECK: movups
+; CHECK: ret
+
 define <2 x double> @bar(<2 x double>* %p, <2 x double> %x) nounwind {
   %t = load <2 x double>* %p, align 8
   %z = fmul <2 x double> %t, %x
   ret <2 x double> %z
 }
+
+; CHECK: bar:
+; CHECK: movupd
+; CHECK: ret
diff --git a/test/CodeGen/X86/sse-domains.ll b/test/CodeGen/X86/sse-domains.ll
index c99287bdfb9f..168959a5d653 100644
--- a/test/CodeGen/X86/sse-domains.ll
+++ b/test/CodeGen/X86/sse-domains.ll
@@ -55,10 +55,10 @@ while.end:
 ; instructions, they are still dependent on themselves.
 ; CHECK: xorps [[XMM1:%xmm[0-9]+]]
 ; CHECK: , [[XMM1]]
-; CHECK: cvtsi2ss %{{.*}}, [[XMM1]]
+; CHECK: cvtsi2ssl %{{.*}}, [[XMM1]]
 ; CHECK: xorps [[XMM2:%xmm[0-9]+]]
 ; CHECK: , [[XMM2]]
-; CHECK: cvtsi2ss %{{.*}}, [[XMM2]]
+; CHECK: cvtsi2ssl %{{.*}}, [[XMM2]]
 ;
 define float @f2(i32 %m) nounwind uwtable readnone ssp {
 entry:
diff --git a/test/CodeGen/X86/sse2-blend.ll b/test/CodeGen/X86/sse2-blend.ll
index 2f4317bf294c..30a0fbe7d6de 100644
--- a/test/CodeGen/X86/sse2-blend.ll
+++ b/test/CodeGen/X86/sse2-blend.ll
@@ -28,33 +28,29 @@ define void@vsel_i32(<4 x i32>* %v1, <4 x i32>* %v2) {
 
 ; Without forcing instructions, fall back to the preferred PS domain.
 ; CHECK: vsel_i64
-; CHECK: xorps
-; CHECK: andps
 ; CHECK: andnps
 ; CHECK: orps
 ; CHECK: ret
 
-define void@vsel_i64(<4 x i64>* %v1, <4 x i64>* %v2) {
-  %A = load <4 x i64>* %v1
-  %B = load <4 x i64>* %v2
-  %vsel = select <4 x i1> <i1 true, i1 false, i1 false, i1 false>, <4 x i64> %A, <4 x i64> %B
-  store <4 x i64 > %vsel, <4 x i64>* %v1
+define void@vsel_i64(<2 x i64>* %v1, <2 x i64>* %v2) {
+  %A = load <2 x i64>* %v1
+  %B = load <2 x i64>* %v2
+  %vsel = select <2 x i1> <i1 true, i1 false>, <2 x i64> %A, <2 x i64> %B
+  store <2 x i64 > %vsel, <2 x i64>* %v1
   ret void
 }
 
 ; Without forcing instructions, fall back to the preferred PS domain.
 ; CHECK: vsel_double
-; CHECK: xorps
-; CHECK: andps
 ; CHECK: andnps
 ; CHECK: orps
 ; CHECK: ret
 
-define void@vsel_double(<4 x double>* %v1, <4 x double>* %v2) {
-  %A = load <4 x double>* %v1
-  %B = load <4 x double>* %v2
-  %vsel = select <4 x i1> <i1 true, i1 false, i1 false, i1 false>, <4 x double> %A, <4 x double> %B
-  store <4 x double > %vsel, <4 x double>* %v1
+define void@vsel_double(<2 x double>* %v1, <2 x double>* %v2) {
+  %A = load <2 x double>* %v1
+  %B = load <2 x double>* %v2
+  %vsel = select <2 x i1> <i1 true, i1 false>, <2 x double> %A, <2 x double> %B
+  store <2 x double > %vsel, <2 x double>* %v1
   ret void
 }
 
diff --git a/test/CodeGen/X86/sse2-mul.ll b/test/CodeGen/X86/sse2-mul.ll
new file mode 100644
index 000000000000..0466d60ec301
--- /dev/null
+++ b/test/CodeGen/X86/sse2-mul.ll
@@ -0,0 +1,14 @@
+; RUN: llc < %s -march=x86-64 -mcpu=core2 | FileCheck %s
+
+define <4 x i32> @test1(<4 x i32> %x, <4 x i32> %y) {
+  %m = mul <4 x i32> %x, %y
+  ret <4 x i32> %m
+; CHECK: test1:
+; CHECK: pshufd $49
+; CHECK: pmuludq
+; CHECK: pshufd $49
+; CHECK: pmuludq
+; CHECK: shufps $-120
+; CHECK: pshufd $-40
+; CHECK: ret
+}
diff --git a/test/CodeGen/X86/stack-align-memcpy.ll b/test/CodeGen/X86/stack-align-memcpy.ll
new file mode 100644
index 000000000000..74945e5bb1bd
--- /dev/null
+++ b/test/CodeGen/X86/stack-align-memcpy.ll
@@ -0,0 +1,18 @@
+; RUN: llc < %s -force-align-stack -mtriple i386-apple-darwin -mcpu=i486 | FileCheck %s
+
+%struct.foo = type { [88 x i8] }
+
+; PR15249
+; We can't use rep;movsl here because it clobbers the base pointer in %esi.
+define void @test1(%struct.foo* nocapture %x, i32 %y) nounwind {
+  %dynalloc = alloca i8, i32 %y, align 1
+  call void @bar(i8* %dynalloc, %struct.foo* align 4 byval %x)
+  ret void
+
+; CHECK: test1:
+; CHECK: andl $-16, %esp
+; CHECK: movl %esp, %esi
+; CHECK-NOT: rep;movsl
+}
+
+declare void @bar(i8* nocapture, %struct.foo* align 4 byval) nounwind
diff --git a/test/CodeGen/X86/stack-protector.ll b/test/CodeGen/X86/stack-protector.ll
index c07511443bce..1e9ca1d2c24d 100644
--- a/test/CodeGen/X86/stack-protector.ll
+++ b/test/CodeGen/X86/stack-protector.ll
@@ -1,28 +1,3141 @@
-; RUN: llc -mtriple=i386-pc-linux-gnu < %s -o - | grep %gs:
-; RUN: llc -mtriple=x86_64-pc-linux-gnu < %s -o - | grep %fs:
-; RUN: llc -code-model=kernel -mtriple=x86_64-pc-linux-gnu < %s -o - | grep %gs:
-; RUN: llc -mtriple=x86_64-apple-darwin < %s -o - | grep "__stack_chk_guard"
-; RUN: llc -mtriple=x86_64-apple-darwin < %s -o - | grep "__stack_chk_fail"
+; RUN: llc -mtriple=i386-pc-linux-gnu < %s -o - | FileCheck --check-prefix=LINUX-I386 %s
+; RUN: llc -mtriple=x86_64-pc-linux-gnu < %s -o - | FileCheck --check-prefix=LINUX-X64 %s
+; RUN: llc -code-model=kernel -mtriple=x86_64-pc-linux-gnu < %s -o - | FileCheck --check-prefix=LINUX-KERNEL-X64 %s
+; RUN: llc -mtriple=x86_64-apple-darwin < %s -o - | FileCheck --check-prefix=DARWIN-X64 %s
 
-@"\01LC" = internal constant [11 x i8] c"buf == %s\0A\00"		; <[11 x i8]*> [#uses=1]
+%struct.foo = type { [16 x i8] }
+%struct.foo.0 = type { [4 x i8] }
+%struct.pair = type { i32, i32 }
+%struct.nest = type { %struct.pair, %struct.pair }
+%struct.vec = type { <4 x i32> }
+%class.A = type { [2 x i8] }
+%struct.deep = type { %union.anon }
+%union.anon = type { %struct.anon }
+%struct.anon = type { %struct.anon.0 }
+%struct.anon.0 = type { %union.anon.1 }
+%union.anon.1 = type { [2 x i8] }
+%struct.small = type { i8 }
 
-define void @test(i8* %a) nounwind ssp {
+@.str = private unnamed_addr constant [4 x i8] c"%s\0A\00", align 1
+
+; test1a: array of [16 x i8] 
+;         no ssp attribute
+; Requires no protector.
+define void @test1a(i8* %a) nounwind uwtable {
+entry:
+; LINUX-I386: test1a:
+; LINUX-I386-NOT: calll __stack_chk_fail
+; LINUX-I386: .cfi_endproc
+
+; LINUX-X64: test1a:
+; LINUX-X64-NOT: callq __stack_chk_fail
+; LINUX-X64: .cfi_endproc
+
+; LINUX-KERNEL-X64: test1a:
+; LINUX-KERNEL-X64-NOT: callq __stack_chk_fail
+; LINUX-KERNEL-X64: .cfi_endproc
+
+; DARWIN-X64: test1a:
+; DARWIN-X64-NOT: callq ___stack_chk_fail
+; DARWIN-X64: .cfi_endproc
+  %a.addr = alloca i8*, align 8
+  %buf = alloca [16 x i8], align 16
+  store i8* %a, i8** %a.addr, align 8
+  %arraydecay = getelementptr inbounds [16 x i8]* %buf, i32 0, i32 0
+  %0 = load i8** %a.addr, align 8
+  %call = call i8* @strcpy(i8* %arraydecay, i8* %0)
+  %arraydecay1 = getelementptr inbounds [16 x i8]* %buf, i32 0, i32 0
+  %call2 = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([4 x i8]* @.str, i32 0, i32 0), i8* %arraydecay1)
+  ret void
+}
+
+; test1b: array of [16 x i8] 
+;         ssp attribute
+; Requires protector.
+define void @test1b(i8* %a) nounwind uwtable ssp {
+entry:
+; LINUX-I386: test1b:
+; LINUX-I386: mov{{l|q}} %gs:
+; LINUX-I386: calll __stack_chk_fail
+
+; LINUX-X64: test1b:
+; LINUX-X64: mov{{l|q}} %fs:
+; LINUX-X64: callq __stack_chk_fail
+
+; LINUX-KERNEL-X64: test1b:
+; LINUX-KERNEL-X64: mov{{l|q}} %gs:
+; LINUX-KERNEL-X64: callq __stack_chk_fail
+
+; DARWIN-X64: test1b:
+; DARWIN-X64: mov{{l|q}} ___stack_chk_guard
+; DARWIN-X64: callq ___stack_chk_fail
+  %a.addr = alloca i8*, align 8
+  %buf = alloca [16 x i8], align 16
+  store i8* %a, i8** %a.addr, align 8
+  %arraydecay = getelementptr inbounds [16 x i8]* %buf, i32 0, i32 0
+  %0 = load i8** %a.addr, align 8
+  %call = call i8* @strcpy(i8* %arraydecay, i8* %0)
+  %arraydecay1 = getelementptr inbounds [16 x i8]* %buf, i32 0, i32 0
+  %call2 = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([4 x i8]* @.str, i32 0, i32 0), i8* %arraydecay1)
+  ret void
+}
+
+; test1c: array of [16 x i8] 
+;         sspstrong attribute
+; Requires protector.
+define void @test1c(i8* %a) nounwind uwtable sspstrong {
+entry:
+; LINUX-I386: test1c:
+; LINUX-I386: mov{{l|q}} %gs:
+; LINUX-I386: calll __stack_chk_fail
+
+; LINUX-X64: test1c:
+; LINUX-X64: mov{{l|q}} %fs:
+; LINUX-X64: callq __stack_chk_fail
+
+; LINUX-KERNEL-X64: test1c:
+; LINUX-KERNEL-X64: mov{{l|q}} %gs:
+; LINUX-KERNEL-X64: callq __stack_chk_fail
+
+; DARWIN-X64: test1c:
+; DARWIN-X64: mov{{l|q}} ___stack_chk_guard
+; DARWIN-X64: callq ___stack_chk_fail
+  %a.addr = alloca i8*, align 8
+  %buf = alloca [16 x i8], align 16
+  store i8* %a, i8** %a.addr, align 8
+  %arraydecay = getelementptr inbounds [16 x i8]* %buf, i32 0, i32 0
+  %0 = load i8** %a.addr, align 8
+  %call = call i8* @strcpy(i8* %arraydecay, i8* %0)
+  %arraydecay1 = getelementptr inbounds [16 x i8]* %buf, i32 0, i32 0
+  %call2 = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([4 x i8]* @.str, i32 0, i32 0), i8* %arraydecay1)
+  ret void
+}
+
+; test1d: array of [16 x i8] 
+;         sspreq attribute
+; Requires protector.
+define void @test1d(i8* %a) nounwind uwtable sspreq {
+entry:
+; LINUX-I386: test1d:
+; LINUX-I386: mov{{l|q}} %gs:
+; LINUX-I386: calll __stack_chk_fail
+
+; LINUX-X64: test1d:
+; LINUX-X64: mov{{l|q}} %fs:
+; LINUX-X64: callq __stack_chk_fail
+
+; LINUX-KERNEL-X64: test1d:
+; LINUX-KERNEL-X64: mov{{l|q}} %gs:
+; LINUX-KERNEL-X64: callq __stack_chk_fail
+
+; DARWIN-X64: test1d:
+; DARWIN-X64: mov{{l|q}} ___stack_chk_guard
+; DARWIN-X64: callq ___stack_chk_fail
+  %a.addr = alloca i8*, align 8
+  %buf = alloca [16 x i8], align 16
+  store i8* %a, i8** %a.addr, align 8
+  %arraydecay = getelementptr inbounds [16 x i8]* %buf, i32 0, i32 0
+  %0 = load i8** %a.addr, align 8
+  %call = call i8* @strcpy(i8* %arraydecay, i8* %0)
+  %arraydecay1 = getelementptr inbounds [16 x i8]* %buf, i32 0, i32 0
+  %call2 = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([4 x i8]* @.str, i32 0, i32 0), i8* %arraydecay1)
+  ret void
+}
+
+; test2a: struct { [16 x i8] }
+;         no ssp attribute
+; Requires no protector.
+define void @test2a(i8* %a) nounwind uwtable {
+entry:
+; LINUX-I386: test2a:
+; LINUX-I386-NOT: calll __stack_chk_fail
+; LINUX-I386: .cfi_endproc
+
+; LINUX-X64: test2a:
+; LINUX-X64-NOT: callq __stack_chk_fail
+; LINUX-X64: .cfi_endproc
+
+; LINUX-KERNEL-X64: test2a:
+; LINUX-KERNEL-X64-NOT: callq __stack_chk_fail
+; LINUX-KERNEL-X64: .cfi_endproc
+
+; DARWIN-X64: test2a:
+; DARWIN-X64-NOT: callq ___stack_chk_fail
+; DARWIN-X64: .cfi_endproc
+  %a.addr = alloca i8*, align 8
+  %b = alloca %struct.foo, align 1
+  store i8* %a, i8** %a.addr, align 8
+  %buf = getelementptr inbounds %struct.foo* %b, i32 0, i32 0
+  %arraydecay = getelementptr inbounds [16 x i8]* %buf, i32 0, i32 0
+  %0 = load i8** %a.addr, align 8
+  %call = call i8* @strcpy(i8* %arraydecay, i8* %0)
+  %buf1 = getelementptr inbounds %struct.foo* %b, i32 0, i32 0
+  %arraydecay2 = getelementptr inbounds [16 x i8]* %buf1, i32 0, i32 0
+  %call3 = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([4 x i8]* @.str, i32 0, i32 0), i8* %arraydecay2)
+  ret void
+}
+
+; test2b: struct { [16 x i8] }
+;          ssp attribute
+; Requires protector.
+define void @test2b(i8* %a) nounwind uwtable ssp {
+entry:
+; LINUX-I386: test2b:
+; LINUX-I386: mov{{l|q}} %gs:
+; LINUX-I386: calll __stack_chk_fail
+
+; LINUX-X64: test2b:
+; LINUX-X64: mov{{l|q}} %fs:
+; LINUX-X64: callq __stack_chk_fail
+
+; LINUX-KERNEL-X64: test2b:
+; LINUX-KERNEL-X64: mov{{l|q}} %gs:
+; LINUX-KERNEL-X64: callq __stack_chk_fail
+
+; DARWIN-X64: test2b:
+; DARWIN-X64: mov{{l|q}} ___stack_chk_guard
+; DARWIN-X64: callq ___stack_chk_fail
+  %a.addr = alloca i8*, align 8
+  %b = alloca %struct.foo, align 1
+  store i8* %a, i8** %a.addr, align 8
+  %buf = getelementptr inbounds %struct.foo* %b, i32 0, i32 0
+  %arraydecay = getelementptr inbounds [16 x i8]* %buf, i32 0, i32 0
+  %0 = load i8** %a.addr, align 8
+  %call = call i8* @strcpy(i8* %arraydecay, i8* %0)
+  %buf1 = getelementptr inbounds %struct.foo* %b, i32 0, i32 0
+  %arraydecay2 = getelementptr inbounds [16 x i8]* %buf1, i32 0, i32 0
+  %call3 = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([4 x i8]* @.str, i32 0, i32 0), i8* %arraydecay2)
+  ret void
+}
+
+; test2c: struct { [16 x i8] }
+;          sspstrong attribute
+; Requires protector.
+define void @test2c(i8* %a) nounwind uwtable sspstrong {
+entry:
+; LINUX-I386: test2c:
+; LINUX-I386: mov{{l|q}} %gs:
+; LINUX-I386: calll __stack_chk_fail
+
+; LINUX-X64: test2c:
+; LINUX-X64: mov{{l|q}} %fs:
+; LINUX-X64: callq __stack_chk_fail
+
+; LINUX-KERNEL-X64: test2c:
+; LINUX-KERNEL-X64: mov{{l|q}} %gs:
+; LINUX-KERNEL-X64: callq __stack_chk_fail
+
+; DARWIN-X64: test2c:
+; DARWIN-X64: mov{{l|q}} ___stack_chk_guard
+; DARWIN-X64: callq ___stack_chk_fail
+  %a.addr = alloca i8*, align 8
+  %b = alloca %struct.foo, align 1
+  store i8* %a, i8** %a.addr, align 8
+  %buf = getelementptr inbounds %struct.foo* %b, i32 0, i32 0
+  %arraydecay = getelementptr inbounds [16 x i8]* %buf, i32 0, i32 0
+  %0 = load i8** %a.addr, align 8
+  %call = call i8* @strcpy(i8* %arraydecay, i8* %0)
+  %buf1 = getelementptr inbounds %struct.foo* %b, i32 0, i32 0
+  %arraydecay2 = getelementptr inbounds [16 x i8]* %buf1, i32 0, i32 0
+  %call3 = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([4 x i8]* @.str, i32 0, i32 0), i8* %arraydecay2)
+  ret void
+}
+
+; test2d: struct { [16 x i8] }
+;          sspreq attribute
+; Requires protector.
+define void @test2d(i8* %a) nounwind uwtable sspreq {
+entry:
+; LINUX-I386: test2d:
+; LINUX-I386: mov{{l|q}} %gs:
+; LINUX-I386: calll __stack_chk_fail
+
+; LINUX-X64: test2d:
+; LINUX-X64: mov{{l|q}} %fs:
+; LINUX-X64: callq __stack_chk_fail
+
+; LINUX-KERNEL-X64: test2d:
+; LINUX-KERNEL-X64: mov{{l|q}} %gs:
+; LINUX-KERNEL-X64: callq __stack_chk_fail
+
+; DARWIN-X64: test2d:
+; DARWIN-X64: mov{{l|q}} ___stack_chk_guard
+; DARWIN-X64: callq ___stack_chk_fail
+  %a.addr = alloca i8*, align 8
+  %b = alloca %struct.foo, align 1
+  store i8* %a, i8** %a.addr, align 8
+  %buf = getelementptr inbounds %struct.foo* %b, i32 0, i32 0
+  %arraydecay = getelementptr inbounds [16 x i8]* %buf, i32 0, i32 0
+  %0 = load i8** %a.addr, align 8
+  %call = call i8* @strcpy(i8* %arraydecay, i8* %0)
+  %buf1 = getelementptr inbounds %struct.foo* %b, i32 0, i32 0
+  %arraydecay2 = getelementptr inbounds [16 x i8]* %buf1, i32 0, i32 0
+  %call3 = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([4 x i8]* @.str, i32 0, i32 0), i8* %arraydecay2)
+  ret void
+}
+
+; test3a:  array of [4 x i8]
+;          no ssp attribute
+; Requires no protector.
+define void @test3a(i8* %a) nounwind uwtable {
+entry:
+; LINUX-I386: test3a:
+; LINUX-I386-NOT: calll __stack_chk_fail
+; LINUX-I386: .cfi_endproc
+
+; LINUX-X64: test3a:
+; LINUX-X64-NOT: callq __stack_chk_fail
+; LINUX-X64: .cfi_endproc
+
+; LINUX-KERNEL-X64: test3a:
+; LINUX-KERNEL-X64-NOT: callq __stack_chk_fail
+; LINUX-KERNEL-X64: .cfi_endproc
+
+; DARWIN-X64: test3a:
+; DARWIN-X64-NOT: callq ___stack_chk_fail
+; DARWIN-X64: .cfi_endproc
+  %a.addr = alloca i8*, align 8
+  %buf = alloca [4 x i8], align 1
+  store i8* %a, i8** %a.addr, align 8
+  %arraydecay = getelementptr inbounds [4 x i8]* %buf, i32 0, i32 0
+  %0 = load i8** %a.addr, align 8
+  %call = call i8* @strcpy(i8* %arraydecay, i8* %0)
+  %arraydecay1 = getelementptr inbounds [4 x i8]* %buf, i32 0, i32 0
+  %call2 = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([4 x i8]* @.str, i32 0, i32 0), i8* %arraydecay1)
+  ret void
+}
+
+; test3b:  array [4 x i8]
+;          ssp attribute
+; Requires no protector.
+define void @test3b(i8* %a) nounwind uwtable ssp {
+entry:
+; LINUX-I386: test3b:
+; LINUX-I386-NOT: calll __stack_chk_fail
+; LINUX-I386: .cfi_endproc
+
+; LINUX-X64: test3b:
+; LINUX-X64-NOT: callq __stack_chk_fail
+; LINUX-X64: .cfi_endproc
+
+; LINUX-KERNEL-X64: test3b:
+; LINUX-KERNEL-X64-NOT: callq __stack_chk_fail
+; LINUX-KERNEL-X64: .cfi_endproc
+
+; DARWIN-X64: test3b:
+; DARWIN-X64-NOT: callq ___stack_chk_fail
+; DARWIN-X64: .cfi_endproc
+  %a.addr = alloca i8*, align 8
+  %buf = alloca [4 x i8], align 1
+  store i8* %a, i8** %a.addr, align 8
+  %arraydecay = getelementptr inbounds [4 x i8]* %buf, i32 0, i32 0
+  %0 = load i8** %a.addr, align 8
+  %call = call i8* @strcpy(i8* %arraydecay, i8* %0)
+  %arraydecay1 = getelementptr inbounds [4 x i8]* %buf, i32 0, i32 0
+  %call2 = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([4 x i8]* @.str, i32 0, i32 0), i8* %arraydecay1)
+  ret void
+}
+
+; test3c:  array of [4 x i8]
+;          sspstrong attribute
+; Requires protector.
+define void @test3c(i8* %a) nounwind uwtable sspstrong {
+entry:
+; LINUX-I386: test3c:
+; LINUX-I386: mov{{l|q}} %gs:
+; LINUX-I386: calll __stack_chk_fail
+
+; LINUX-X64: test3c:
+; LINUX-X64: mov{{l|q}} %fs:
+; LINUX-X64: callq __stack_chk_fail
+
+; LINUX-KERNEL-X64: test3c:
+; LINUX-KERNEL-X64: mov{{l|q}} %gs:
+; LINUX-KERNEL-X64: callq __stack_chk_fail
+
+; DARWIN-X64: test3c:
+; DARWIN-X64: mov{{l|q}} ___stack_chk_guard
+; DARWIN-X64: callq ___stack_chk_fail
+  %a.addr = alloca i8*, align 8
+  %buf = alloca [4 x i8], align 1
+  store i8* %a, i8** %a.addr, align 8
+  %arraydecay = getelementptr inbounds [4 x i8]* %buf, i32 0, i32 0
+  %0 = load i8** %a.addr, align 8
+  %call = call i8* @strcpy(i8* %arraydecay, i8* %0)
+  %arraydecay1 = getelementptr inbounds [4 x i8]* %buf, i32 0, i32 0
+  %call2 = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([4 x i8]* @.str, i32 0, i32 0), i8* %arraydecay1)
+  ret void
+}
+
+; test3d:  array of [4 x i8]
+;          sspreq attribute
+; Requires protector.
+define void @test3d(i8* %a) nounwind uwtable sspreq {
+entry:
+; LINUX-I386: test3d:
+; LINUX-I386: mov{{l|q}} %gs:
+; LINUX-I386: calll __stack_chk_fail
+
+; LINUX-X64: test3d:
+; LINUX-X64: mov{{l|q}} %fs:
+; LINUX-X64: callq __stack_chk_fail
+
+; LINUX-KERNEL-X64: test3d:
+; LINUX-KERNEL-X64: mov{{l|q}} %gs:
+; LINUX-KERNEL-X64: callq __stack_chk_fail
+
+; DARWIN-X64: test3d:
+; DARWIN-X64: mov{{l|q}} ___stack_chk_guard
+; DARWIN-X64: callq ___stack_chk_fail
+  %a.addr = alloca i8*, align 8
+  %buf = alloca [4 x i8], align 1
+  store i8* %a, i8** %a.addr, align 8
+  %arraydecay = getelementptr inbounds [4 x i8]* %buf, i32 0, i32 0
+  %0 = load i8** %a.addr, align 8
+  %call = call i8* @strcpy(i8* %arraydecay, i8* %0)
+  %arraydecay1 = getelementptr inbounds [4 x i8]* %buf, i32 0, i32 0
+  %call2 = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([4 x i8]* @.str, i32 0, i32 0), i8* %arraydecay1)
+  ret void
+}
+
+; test4a:  struct { [4 x i8] }
+;          no ssp attribute
+; Requires no protector.
+define void @test4a(i8* %a) nounwind uwtable {
+entry:
+; LINUX-I386: test4a:
+; LINUX-I386-NOT: calll __stack_chk_fail
+; LINUX-I386: .cfi_endproc
+
+; LINUX-X64: test4a:
+; LINUX-X64-NOT: callq __stack_chk_fail
+; LINUX-X64: .cfi_endproc
+
+; LINUX-KERNEL-X64: test4a:
+; LINUX-KERNEL-X64-NOT: callq __stack_chk_fail
+; LINUX-KERNEL-X64: .cfi_endproc
+
+; DARWIN-X64: test4a:
+; DARWIN-X64-NOT: callq ___stack_chk_fail
+; DARWIN-X64: .cfi_endproc
+  %a.addr = alloca i8*, align 8
+  %b = alloca %struct.foo.0, align 1
+  store i8* %a, i8** %a.addr, align 8
+  %buf = getelementptr inbounds %struct.foo.0* %b, i32 0, i32 0
+  %arraydecay = getelementptr inbounds [4 x i8]* %buf, i32 0, i32 0
+  %0 = load i8** %a.addr, align 8
+  %call = call i8* @strcpy(i8* %arraydecay, i8* %0)
+  %buf1 = getelementptr inbounds %struct.foo.0* %b, i32 0, i32 0
+  %arraydecay2 = getelementptr inbounds [4 x i8]* %buf1, i32 0, i32 0
+  %call3 = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([4 x i8]* @.str, i32 0, i32 0), i8* %arraydecay2)
+  ret void
+}
+
+; test4b:  struct { [4 x i8] }
+;          ssp attribute
+; Requires no protector.
+define void @test4b(i8* %a) nounwind uwtable ssp {
+entry:
+; LINUX-I386: test4b:
+; LINUX-I386-NOT: calll __stack_chk_fail
+; LINUX-I386: .cfi_endproc
+
+; LINUX-X64: test4b:
+; LINUX-X64-NOT: callq __stack_chk_fail
+; LINUX-X64: .cfi_endproc
+
+; LINUX-KERNEL-X64: test4b:
+; LINUX-KERNEL-X64-NOT: callq __stack_chk_fail
+; LINUX-KERNEL-X64: .cfi_endproc
+
+; DARWIN-X64: test4b:
+; DARWIN-X64-NOT: callq ___stack_chk_fail
+; DARWIN-X64: .cfi_endproc
+  %a.addr = alloca i8*, align 8
+  %b = alloca %struct.foo.0, align 1
+  store i8* %a, i8** %a.addr, align 8
+  %buf = getelementptr inbounds %struct.foo.0* %b, i32 0, i32 0
+  %arraydecay = getelementptr inbounds [4 x i8]* %buf, i32 0, i32 0
+  %0 = load i8** %a.addr, align 8
+  %call = call i8* @strcpy(i8* %arraydecay, i8* %0)
+  %buf1 = getelementptr inbounds %struct.foo.0* %b, i32 0, i32 0
+  %arraydecay2 = getelementptr inbounds [4 x i8]* %buf1, i32 0, i32 0
+  %call3 = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([4 x i8]* @.str, i32 0, i32 0), i8* %arraydecay2)
+  ret void
+}
+
+; test4c:  struct { [4 x i8] }
+;          sspstrong attribute
+; Requires protector.
+define void @test4c(i8* %a) nounwind uwtable sspstrong {
+entry:
+; LINUX-I386: test4c:
+; LINUX-I386: mov{{l|q}} %gs:
+; LINUX-I386: calll __stack_chk_fail
+
+; LINUX-X64: test4c:
+; LINUX-X64: mov{{l|q}} %fs:
+; LINUX-X64: callq __stack_chk_fail
+
+; LINUX-KERNEL-X64: test4c:
+; LINUX-KERNEL-X64: mov{{l|q}} %gs:
+; LINUX-KERNEL-X64: callq __stack_chk_fail
+
+; DARWIN-X64: test4c:
+; DARWIN-X64: mov{{l|q}} ___stack_chk_guard
+; DARWIN-X64: callq ___stack_chk_fail
+  %a.addr = alloca i8*, align 8
+  %b = alloca %struct.foo.0, align 1
+  store i8* %a, i8** %a.addr, align 8
+  %buf = getelementptr inbounds %struct.foo.0* %b, i32 0, i32 0
+  %arraydecay = getelementptr inbounds [4 x i8]* %buf, i32 0, i32 0
+  %0 = load i8** %a.addr, align 8
+  %call = call i8* @strcpy(i8* %arraydecay, i8* %0)
+  %buf1 = getelementptr inbounds %struct.foo.0* %b, i32 0, i32 0
+  %arraydecay2 = getelementptr inbounds [4 x i8]* %buf1, i32 0, i32 0
+  %call3 = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([4 x i8]* @.str, i32 0, i32 0), i8* %arraydecay2)
+  ret void
+}
+
+; test4d:  struct { [4 x i8] }
+;          sspreq attribute
+; Requires protector.
+define void @test4d(i8* %a) nounwind uwtable sspreq {
+entry:
+; LINUX-I386: test4d:
+; LINUX-I386: mov{{l|q}} %gs:
+; LINUX-I386: calll __stack_chk_fail
+
+; LINUX-X64: test4d:
+; LINUX-X64: mov{{l|q}} %fs:
+; LINUX-X64: callq __stack_chk_fail
+
+; LINUX-KERNEL-X64: test4d:
+; LINUX-KERNEL-X64: mov{{l|q}} %gs:
+; LINUX-KERNEL-X64: callq __stack_chk_fail
+
+; DARWIN-X64: test4d:
+; DARWIN-X64: mov{{l|q}} ___stack_chk_guard
+; DARWIN-X64: callq ___stack_chk_fail
+  %a.addr = alloca i8*, align 8
+  %b = alloca %struct.foo.0, align 1
+  store i8* %a, i8** %a.addr, align 8
+  %buf = getelementptr inbounds %struct.foo.0* %b, i32 0, i32 0
+  %arraydecay = getelementptr inbounds [4 x i8]* %buf, i32 0, i32 0
+  %0 = load i8** %a.addr, align 8
+  %call = call i8* @strcpy(i8* %arraydecay, i8* %0)
+  %buf1 = getelementptr inbounds %struct.foo.0* %b, i32 0, i32 0
+  %arraydecay2 = getelementptr inbounds [4 x i8]* %buf1, i32 0, i32 0
+  %call3 = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([4 x i8]* @.str, i32 0, i32 0), i8* %arraydecay2)
+  ret void
+}
+
+; test5a:  no arrays / no nested arrays
+;          no ssp attribute
+; Requires no protector.
+define void @test5a(i8* %a) nounwind uwtable {
+entry:
+; LINUX-I386: test5a:
+; LINUX-I386-NOT: calll __stack_chk_fail
+; LINUX-I386: .cfi_endproc
+
+; LINUX-X64: test5a:
+; LINUX-X64-NOT: callq __stack_chk_fail
+; LINUX-X64: .cfi_endproc
+
+; LINUX-KERNEL-X64: test5a:
+; LINUX-KERNEL-X64-NOT: callq __stack_chk_fail
+; LINUX-KERNEL-X64: .cfi_endproc
+
+; DARWIN-X64: test5a:
+; DARWIN-X64-NOT: callq ___stack_chk_fail
+; DARWIN-X64: .cfi_endproc
+  %a.addr = alloca i8*, align 8
+  store i8* %a, i8** %a.addr, align 8
+  %0 = load i8** %a.addr, align 8
+  %call = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([4 x i8]* @.str, i32 0, i32 0), i8* %0)
+  ret void
+}
+
+; test5b:  no arrays / no nested arrays
+;          ssp attribute
+; Requires no protector.
+define void @test5b(i8* %a) nounwind uwtable ssp {
+entry:
+; LINUX-I386: test5b:
+; LINUX-I386-NOT: calll __stack_chk_fail
+; LINUX-I386: .cfi_endproc
+
+; LINUX-X64: test5b:
+; LINUX-X64-NOT: callq __stack_chk_fail
+; LINUX-X64: .cfi_endproc
+
+; LINUX-KERNEL-X64: test5b:
+; LINUX-KERNEL-X64-NOT: callq __stack_chk_fail
+; LINUX-KERNEL-X64: .cfi_endproc
+
+; DARWIN-X64: test5b:
+; DARWIN-X64-NOT: callq ___stack_chk_fail
+; DARWIN-X64: .cfi_endproc
+  %a.addr = alloca i8*, align 8
+  store i8* %a, i8** %a.addr, align 8
+  %0 = load i8** %a.addr, align 8
+  %call = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([4 x i8]* @.str, i32 0, i32 0), i8* %0)
+  ret void
+}
+
+; test5c:  no arrays / no nested arrays
+;          sspstrong attribute
+; Requires no protector.
+define void @test5c(i8* %a) nounwind uwtable sspstrong {
+entry:
+; LINUX-I386: test5c:
+; LINUX-I386-NOT: calll __stack_chk_fail
+; LINUX-I386: .cfi_endproc
+
+; LINUX-X64: test5c:
+; LINUX-X64-NOT: callq __stack_chk_fail
+; LINUX-X64: .cfi_endproc
+
+; LINUX-KERNEL-X64: test5c:
+; LINUX-KERNEL-X64-NOT: callq __stack_chk_fail
+; LINUX-KERNEL-X64: .cfi_endproc
+
+; DARWIN-X64: test5c:
+; DARWIN-X64-NOT: callq ___stack_chk_fail
+; DARWIN-X64: .cfi_endproc
+  %a.addr = alloca i8*, align 8
+  store i8* %a, i8** %a.addr, align 8
+  %0 = load i8** %a.addr, align 8
+  %call = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([4 x i8]* @.str, i32 0, i32 0), i8* %0)
+  ret void
+}
+
+; test5d:  no arrays / no nested arrays
+;          sspreq attribute
+; Requires protector.
+define void @test5d(i8* %a) nounwind uwtable sspreq {
+entry:
+; LINUX-I386: test5d:
+; LINUX-I386: mov{{l|q}} %gs:
+; LINUX-I386: calll __stack_chk_fail
+
+; LINUX-X64: test5d:
+; LINUX-X64: mov{{l|q}} %fs:
+; LINUX-X64: callq __stack_chk_fail
+
+; LINUX-KERNEL-X64: test5d:
+; LINUX-KERNEL-X64: mov{{l|q}} %gs:
+; LINUX-KERNEL-X64: callq __stack_chk_fail
+
+; DARWIN-X64: test5d:
+; DARWIN-X64: mov{{l|q}} ___stack_chk_guard
+; DARWIN-X64: callq ___stack_chk_fail
+  %a.addr = alloca i8*, align 8
+  store i8* %a, i8** %a.addr, align 8
+  %0 = load i8** %a.addr, align 8
+  %call = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([4 x i8]* @.str, i32 0, i32 0), i8* %0)
+  ret void
+}
+
+; test6a:  Address-of local taken (j = &a)
+;          no ssp attribute
+; Requires no protector.
+define void @test6a() nounwind uwtable {
+entry:
+; LINUX-I386: test6a:
+; LINUX-I386-NOT: calll __stack_chk_fail
+; LINUX-I386: .cfi_endproc
+
+; LINUX-X64: test6a:
+; LINUX-X64-NOT: callq __stack_chk_fail
+; LINUX-X64: .cfi_endproc
+
+; LINUX-KERNEL-X64: test6a:
+; LINUX-KERNEL-X64-NOT: callq __stack_chk_fail
+; LINUX-KERNEL-X64: .cfi_endproc
+
+; DARWIN-X64: test6a:
+; DARWIN-X64-NOT: callq ___stack_chk_fail
+; DARWIN-X64: .cfi_endproc
+  %retval = alloca i32, align 4
+  %a = alloca i32, align 4
+  %j = alloca i32*, align 8
+  store i32 0, i32* %retval
+  %0 = load i32* %a, align 4
+  %add = add nsw i32 %0, 1
+  store i32 %add, i32* %a, align 4
+  store i32* %a, i32** %j, align 8
+  ret void
+}
+
+; test6b:  Address-of local taken (j = &a)
+;          ssp attribute
+; Requires no protector.
+define void @test6b() nounwind uwtable ssp {
+entry:
+; LINUX-I386: test6b:
+; LINUX-I386-NOT: calll __stack_chk_fail
+; LINUX-I386: .cfi_endproc
+
+; LINUX-X64: test6b:
+; LINUX-X64-NOT: callq __stack_chk_fail
+; LINUX-X64: .cfi_endproc
+
+; LINUX-KERNEL-X64: test6b:
+; LINUX-KERNEL-X64-NOT: callq __stack_chk_fail
+; LINUX-KERNEL-X64: .cfi_endproc
+
+; DARWIN-X64: test6b:
+; DARWIN-X64-NOT: callq ___stack_chk_fail
+; DARWIN-X64: .cfi_endproc
+  %retval = alloca i32, align 4
+  %a = alloca i32, align 4
+  %j = alloca i32*, align 8
+  store i32 0, i32* %retval
+  %0 = load i32* %a, align 4
+  %add = add nsw i32 %0, 1
+  store i32 %add, i32* %a, align 4
+  store i32* %a, i32** %j, align 8
+  ret void
+}
+
+; test6c:  Address-of local taken (j = &a)
+;          sspstrong attribute
+; Requires protector.
+define void @test6c() nounwind uwtable sspstrong {
+entry:
+; LINUX-I386: test6c:
+; LINUX-I386: mov{{l|q}} %gs:
+; LINUX-I386: calll __stack_chk_fail
+
+; LINUX-X64: test6c:
+; LINUX-X64: mov{{l|q}} %fs:
+; LINUX-X64: callq __stack_chk_fail
+
+; LINUX-KERNEL-X64: test6c:
+; LINUX-KERNEL-X64: mov{{l|q}} %gs:
+; LINUX-KERNEL-X64: callq __stack_chk_fail
+
+; DARWIN-X64: test6c:
+; DARWIN-X64: mov{{l|q}} ___stack_chk_guard
+; DARWIN-X64: callq ___stack_chk_fail
+  %retval = alloca i32, align 4
+  %a = alloca i32, align 4
+  %j = alloca i32*, align 8
+  store i32 0, i32* %retval
+  %0 = load i32* %a, align 4
+  %add = add nsw i32 %0, 1
+  store i32 %add, i32* %a, align 4
+  store i32* %a, i32** %j, align 8
+  ret void
+}
+
+; test6d:  Address-of local taken (j = &a)
+;          sspreq attribute
+; Requires protector.
+define void @test6d() nounwind uwtable sspreq {
+entry:
+; LINUX-I386: test6d:
+; LINUX-I386: mov{{l|q}} %gs:
+; LINUX-I386: calll __stack_chk_fail
+
+; LINUX-X64: test6d:
+; LINUX-X64: mov{{l|q}} %fs:
+; LINUX-X64: callq __stack_chk_fail
+
+; LINUX-KERNEL-X64: test6d:
+; LINUX-KERNEL-X64: mov{{l|q}} %gs:
+; LINUX-KERNEL-X64: callq __stack_chk_fail
+
+; DARWIN-X64: test6d:
+; DARWIN-X64: mov{{l|q}} ___stack_chk_guard
+; DARWIN-X64: callq ___stack_chk_fail
+  %retval = alloca i32, align 4
+  %a = alloca i32, align 4
+  %j = alloca i32*, align 8
+  store i32 0, i32* %retval
+  %0 = load i32* %a, align 4
+  %add = add nsw i32 %0, 1
+  store i32 %add, i32* %a, align 4
+  store i32* %a, i32** %j, align 8
+  ret void
+}
+
+; test7a:  PtrToInt Cast
+;          no ssp attribute
+; Requires no protector.
+define void @test7a() nounwind uwtable readnone {
+entry:
+; LINUX-I386: test7a:
+; LINUX-I386-NOT: calll __stack_chk_fail
+; LINUX-I386: .cfi_endproc
+
+; LINUX-X64: test7a:
+; LINUX-X64-NOT: callq __stack_chk_fail
+; LINUX-X64: .cfi_endproc
+
+; LINUX-KERNEL-X64: test7a:
+; LINUX-KERNEL-X64-NOT: callq __stack_chk_fail
+; LINUX-KERNEL-X64: .cfi_endproc
+
+; DARWIN-X64: test7a:
+; DARWIN-X64-NOT: callq ___stack_chk_fail
+; DARWIN-X64: .cfi_endproc
+  %a = alloca i32, align 4
+  %0 = ptrtoint i32* %a to i64
+  %call = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([4 x i8]* @.str, i32 0, i32 0), i64 %0)
+  ret void
+}
+
+; test7b:  PtrToInt Cast
+;          ssp attribute
+; Requires no protector.
+define void @test7b() nounwind uwtable readnone ssp {
+entry:
+; LINUX-I386: test7b:
+; LINUX-I386-NOT: calll __stack_chk_fail
+; LINUX-I386: .cfi_endproc
+
+; LINUX-X64: test7b:
+; LINUX-X64-NOT: callq __stack_chk_fail
+; LINUX-X64: .cfi_endproc
+
+; LINUX-KERNEL-X64: test7b:
+; LINUX-KERNEL-X64-NOT: callq __stack_chk_fail
+; LINUX-KERNEL-X64: .cfi_endproc
+
+; DARWIN-X64: test7b:
+; DARWIN-X64-NOT: callq ___stack_chk_fail
+; DARWIN-X64: .cfi_endproc
+  %a = alloca i32, align 4
+  %0 = ptrtoint i32* %a to i64
+  %call = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([4 x i8]* @.str, i32 0, i32 0), i64 %0)
+  ret void
+}
+
+; test7c:  PtrToInt Cast
+;          sspstrong attribute
+; Requires protector.
+define void @test7c() nounwind uwtable readnone sspstrong {
+entry:
+; LINUX-I386: test7c:
+; LINUX-I386: mov{{l|q}} %gs:
+; LINUX-I386: calll __stack_chk_fail
+
+; LINUX-X64: test7c:
+; LINUX-X64: mov{{l|q}} %fs:
+; LINUX-X64: callq __stack_chk_fail
+
+; LINUX-KERNEL-X64: test7c:
+; LINUX-KERNEL-X64: mov{{l|q}} %gs:
+; LINUX-KERNEL-X64: callq __stack_chk_fail
+
+; DARWIN-X64: test7c:
+; DARWIN-X64: mov{{l|q}} ___stack_chk_guard
+; DARWIN-X64: callq ___stack_chk_fail
+  %a = alloca i32, align 4
+  %0 = ptrtoint i32* %a to i64
+  %call = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([4 x i8]* @.str, i32 0, i32 0), i64 %0)
+  ret void
+}
+
+; test7d:  PtrToInt Cast
+;          sspreq attribute
+; Requires protector.
+define void @test7d() nounwind uwtable readnone sspreq {
+entry:
+; LINUX-I386: test7d:
+; LINUX-I386: mov{{l|q}} %gs:
+; LINUX-I386: calll __stack_chk_fail
+
+; LINUX-X64: test7d:
+; LINUX-X64: mov{{l|q}} %fs:
+; LINUX-X64: callq __stack_chk_fail
+
+; LINUX-KERNEL-X64: test7d:
+; LINUX-KERNEL-X64: mov{{l|q}} %gs:
+; LINUX-KERNEL-X64: callq __stack_chk_fail
+
+; DARWIN-X64: test7d:
+; DARWIN-X64: mov{{l|q}} ___stack_chk_guard
+; DARWIN-X64: callq ___stack_chk_fail
+  %a = alloca i32, align 4
+  %0 = ptrtoint i32* %a to i64
+  %call = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([4 x i8]* @.str, i32 0, i32 0), i64 %0)
+  ret void
+}
+
+; test8a:  Passing addr-of to function call
+;          no ssp attribute
+; Requires no protector.
+define void @test8a() nounwind uwtable {
+entry:
+; LINUX-I386: test8a:
+; LINUX-I386-NOT: calll __stack_chk_fail
+; LINUX-I386: .cfi_endproc
+
+; LINUX-X64: test8a:
+; LINUX-X64-NOT: callq __stack_chk_fail
+; LINUX-X64: .cfi_endproc
+
+; LINUX-KERNEL-X64: test8a:
+; LINUX-KERNEL-X64-NOT: callq __stack_chk_fail
+; LINUX-KERNEL-X64: .cfi_endproc
+
+; DARWIN-X64: test8a:
+; DARWIN-X64-NOT: callq ___stack_chk_fail
+; DARWIN-X64: .cfi_endproc
+  %b = alloca i32, align 4
+  call void @funcall(i32* %b) nounwind
+  ret void
+}
+
+; test8b:  Passing addr-of to function call
+;          ssp attribute
+; Requires no protector.
+define void @test8b() nounwind uwtable ssp {
+entry:
+; LINUX-I386: test8b:
+; LINUX-I386-NOT: calll __stack_chk_fail
+; LINUX-I386: .cfi_endproc
+
+; LINUX-X64: test8b:
+; LINUX-X64-NOT: callq __stack_chk_fail
+; LINUX-X64: .cfi_endproc
+
+; LINUX-KERNEL-X64: test8b:
+; LINUX-KERNEL-X64-NOT: callq __stack_chk_fail
+; LINUX-KERNEL-X64: .cfi_endproc
+
+; DARWIN-X64: test8b:
+; DARWIN-X64-NOT: callq ___stack_chk_fail
+; DARWIN-X64: .cfi_endproc
+  %b = alloca i32, align 4
+  call void @funcall(i32* %b) nounwind
+  ret void
+}
+
+; test8c:  Passing addr-of to function call
+;          sspstrong attribute
+; Requires protector.
+define void @test8c() nounwind uwtable sspstrong {
+entry:
+; LINUX-I386: test8c:
+; LINUX-I386: mov{{l|q}} %gs:
+; LINUX-I386: calll __stack_chk_fail
+
+; LINUX-X64: test8c:
+; LINUX-X64: mov{{l|q}} %fs:
+; LINUX-X64: callq __stack_chk_fail
+
+; LINUX-KERNEL-X64: test8c:
+; LINUX-KERNEL-X64: mov{{l|q}} %gs:
+; LINUX-KERNEL-X64: callq __stack_chk_fail
+
+; DARWIN-X64: test8c:
+; DARWIN-X64: mov{{l|q}} ___stack_chk_guard
+; DARWIN-X64: callq ___stack_chk_fail
+  %b = alloca i32, align 4
+  call void @funcall(i32* %b) nounwind
+  ret void
+}
+
+; test8d:  Passing addr-of to function call
+;          sspreq attribute
+; Requires protector.
+define void @test8d() nounwind uwtable sspreq {
+entry:
+; LINUX-I386: test8d:
+; LINUX-I386: mov{{l|q}} %gs:
+; LINUX-I386: calll __stack_chk_fail
+
+; LINUX-X64: test8d:
+; LINUX-X64: mov{{l|q}} %fs:
+; LINUX-X64: callq __stack_chk_fail
+
+; LINUX-KERNEL-X64: test8d:
+; LINUX-KERNEL-X64: mov{{l|q}} %gs:
+; LINUX-KERNEL-X64: callq __stack_chk_fail
+
+; DARWIN-X64: test8d:
+; DARWIN-X64: mov{{l|q}} ___stack_chk_guard
+; DARWIN-X64: callq ___stack_chk_fail
+  %b = alloca i32, align 4
+  call void @funcall(i32* %b) nounwind
+  ret void
+}
+
+; test9a:  Addr-of in select instruction
+;          no ssp attribute
+; Requires no protector.
+define void @test9a() nounwind uwtable {
+entry:
+; LINUX-I386: test9a:
+; LINUX-I386-NOT: calll __stack_chk_fail
+; LINUX-I386: .cfi_endproc
+
+; LINUX-X64: test9a:
+; LINUX-X64-NOT: callq __stack_chk_fail
+; LINUX-X64: .cfi_endproc
+
+; LINUX-KERNEL-X64: test9a:
+; LINUX-KERNEL-X64-NOT: callq __stack_chk_fail
+; LINUX-KERNEL-X64: .cfi_endproc
+
+; DARWIN-X64: test9a:
+; DARWIN-X64-NOT: callq ___stack_chk_fail
+; DARWIN-X64: .cfi_endproc
+  %x = alloca double, align 8
+  %call = call double @testi_aux() nounwind
+  store double %call, double* %x, align 8
+  %cmp2 = fcmp ogt double %call, 0.000000e+00
+  %y.1 = select i1 %cmp2, double* %x, double* null
+  %call2 = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([4 x i8]* @.str, i32 0, i32 0), double* %y.1)
+  ret void
+}
+
+; test9b:  Addr-of in select instruction
+;          ssp attribute
+; Requires no protector.
+define void @test9b() nounwind uwtable ssp {
+entry:
+; LINUX-I386: test9b:
+; LINUX-I386-NOT: calll __stack_chk_fail
+; LINUX-I386: .cfi_endproc
+
+; LINUX-X64: test9b:
+; LINUX-X64-NOT: callq __stack_chk_fail
+; LINUX-X64: .cfi_endproc
+
+; LINUX-KERNEL-X64: test9b:
+; LINUX-KERNEL-X64-NOT: callq __stack_chk_fail
+; LINUX-KERNEL-X64: .cfi_endproc
+
+; DARWIN-X64: test9b:
+; DARWIN-X64-NOT: callq ___stack_chk_fail
+; DARWIN-X64: .cfi_endproc
+  %x = alloca double, align 8
+  %call = call double @testi_aux() nounwind
+  store double %call, double* %x, align 8
+  %cmp2 = fcmp ogt double %call, 0.000000e+00
+  %y.1 = select i1 %cmp2, double* %x, double* null
+  %call2 = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([4 x i8]* @.str, i32 0, i32 0), double* %y.1)
+  ret void
+}
+
+; test9c:  Addr-of in select instruction
+;          sspstrong attribute
+; Requires protector.
+define void @test9c() nounwind uwtable sspstrong {
+entry:
+; LINUX-I386: test9c:
+; LINUX-I386: mov{{l|q}} %gs:
+; LINUX-I386: calll __stack_chk_fail
+
+; LINUX-X64: test9c:
+; LINUX-X64: mov{{l|q}} %fs:
+; LINUX-X64: callq __stack_chk_fail
+
+; LINUX-KERNEL-X64: test9c:
+; LINUX-KERNEL-X64: mov{{l|q}} %gs:
+; LINUX-KERNEL-X64: callq __stack_chk_fail
+
+; DARWIN-X64: test9c:
+; DARWIN-X64: mov{{l|q}} ___stack_chk_guard
+; DARWIN-X64: callq ___stack_chk_fail
+  %x = alloca double, align 8
+  %call = call double @testi_aux() nounwind
+  store double %call, double* %x, align 8
+  %cmp2 = fcmp ogt double %call, 0.000000e+00
+  %y.1 = select i1 %cmp2, double* %x, double* null
+  %call2 = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([4 x i8]* @.str, i32 0, i32 0), double* %y.1)
+  ret void
+}
+
+; test9d:  Addr-of in select instruction
+;          sspreq attribute
+; Requires protector.
+define void @test9d() nounwind uwtable sspreq {
+entry:
+; LINUX-I386: test9d:
+; LINUX-I386: mov{{l|q}} %gs:
+; LINUX-I386: calll __stack_chk_fail
+
+; LINUX-X64: test9d:
+; LINUX-X64: mov{{l|q}} %fs:
+; LINUX-X64: callq __stack_chk_fail
+
+; LINUX-KERNEL-X64: test9d:
+; LINUX-KERNEL-X64: mov{{l|q}} %gs:
+; LINUX-KERNEL-X64: callq __stack_chk_fail
+
+; DARWIN-X64: test9d:
+; DARWIN-X64: mov{{l|q}} ___stack_chk_guard
+; DARWIN-X64: callq ___stack_chk_fail
+  %x = alloca double, align 8
+  %call = call double @testi_aux() nounwind
+  store double %call, double* %x, align 8
+  %cmp2 = fcmp ogt double %call, 0.000000e+00
+  %y.1 = select i1 %cmp2, double* %x, double* null
+  %call2 = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([4 x i8]* @.str, i32 0, i32 0), double* %y.1)
+  ret void
+}
+
+; test10a: Addr-of in phi instruction
+;          no ssp attribute
+; Requires no protector.
+define void @test10a() nounwind uwtable {
+entry:
+; LINUX-I386: test10a:
+; LINUX-I386-NOT: calll __stack_chk_fail
+; LINUX-I386: .cfi_endproc
+
+; LINUX-X64: test10a:
+; LINUX-X64-NOT: callq __stack_chk_fail
+; LINUX-X64: .cfi_endproc
+
+; LINUX-KERNEL-X64: test10a:
+; LINUX-KERNEL-X64-NOT: callq __stack_chk_fail
+; LINUX-KERNEL-X64: .cfi_endproc
+
+; DARWIN-X64: test10a:
+; DARWIN-X64-NOT: callq ___stack_chk_fail
+; DARWIN-X64: .cfi_endproc
+  %x = alloca double, align 8
+  %call = call double @testi_aux() nounwind
+  store double %call, double* %x, align 8
+  %cmp = fcmp ogt double %call, 3.140000e+00
+  br i1 %cmp, label %if.then, label %if.else
+
+if.then:                                          ; preds = %entry
+  %call1 = call double @testi_aux() nounwind
+  store double %call1, double* %x, align 8
+  br label %if.end4
+
+if.else:                                          ; preds = %entry
+  %cmp2 = fcmp ogt double %call, 1.000000e+00
+  br i1 %cmp2, label %if.then3, label %if.end4
+
+if.then3:                                         ; preds = %if.else
+  br label %if.end4
+
+if.end4:                                          ; preds = %if.else, %if.then3, %if.then
+  %y.0 = phi double* [ null, %if.then ], [ %x, %if.then3 ], [ null, %if.else ]
+  %call5 = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([4 x i8]* @.str, i64 0, i64 0), double* %y.0) nounwind
+  ret void
+}
+
+; test10b: Addr-of in phi instruction
+;          ssp attribute
+; Requires no protector.
+define void @test10b() nounwind uwtable ssp {
+entry:
+; LINUX-I386: test10b:
+; LINUX-I386-NOT: calll __stack_chk_fail
+; LINUX-I386: .cfi_endproc
+
+; LINUX-X64: test10b:
+; LINUX-X64-NOT: callq __stack_chk_fail
+; LINUX-X64: .cfi_endproc
+
+; LINUX-KERNEL-X64: test10b:
+; LINUX-KERNEL-X64-NOT: callq __stack_chk_fail
+; LINUX-KERNEL-X64: .cfi_endproc
+
+; DARWIN-X64: test10b:
+; DARWIN-X64-NOT: callq ___stack_chk_fail
+; DARWIN-X64: .cfi_endproc
+  %x = alloca double, align 8
+  %call = call double @testi_aux() nounwind
+  store double %call, double* %x, align 8
+  %cmp = fcmp ogt double %call, 3.140000e+00
+  br i1 %cmp, label %if.then, label %if.else
+
+if.then:                                          ; preds = %entry
+  %call1 = call double @testi_aux() nounwind
+  store double %call1, double* %x, align 8
+  br label %if.end4
+
+if.else:                                          ; preds = %entry
+  %cmp2 = fcmp ogt double %call, 1.000000e+00
+  br i1 %cmp2, label %if.then3, label %if.end4
+
+if.then3:                                         ; preds = %if.else
+  br label %if.end4
+
+if.end4:                                          ; preds = %if.else, %if.then3, %if.then
+  %y.0 = phi double* [ null, %if.then ], [ %x, %if.then3 ], [ null, %if.else ]
+  %call5 = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([4 x i8]* @.str, i64 0, i64 0), double* %y.0) nounwind
+  ret void
+}
+
+; test10c: Addr-of in phi instruction
+;          sspstrong attribute
+; Requires protector.
+define void @test10c() nounwind uwtable sspstrong {
+entry:
+; LINUX-I386: test10c:
+; LINUX-I386: mov{{l|q}} %gs:
+; LINUX-I386: calll __stack_chk_fail
+
+; LINUX-X64: test10c:
+; LINUX-X64: mov{{l|q}} %fs:
+; LINUX-X64: callq __stack_chk_fail
+
+; LINUX-KERNEL-X64: test10c:
+; LINUX-KERNEL-X64: mov{{l|q}} %gs:
+; LINUX-KERNEL-X64: callq __stack_chk_fail
+
+; DARWIN-X64: test10c:
+; DARWIN-X64: mov{{l|q}} ___stack_chk_guard
+; DARWIN-X64: callq ___stack_chk_fail
+  %x = alloca double, align 8
+  %call = call double @testi_aux() nounwind
+  store double %call, double* %x, align 8
+  %cmp = fcmp ogt double %call, 3.140000e+00
+  br i1 %cmp, label %if.then, label %if.else
+
+if.then:                                          ; preds = %entry
+  %call1 = call double @testi_aux() nounwind
+  store double %call1, double* %x, align 8
+  br label %if.end4
+
+if.else:                                          ; preds = %entry
+  %cmp2 = fcmp ogt double %call, 1.000000e+00
+  br i1 %cmp2, label %if.then3, label %if.end4
+
+if.then3:                                         ; preds = %if.else
+  br label %if.end4
+
+if.end4:                                          ; preds = %if.else, %if.then3, %if.then
+  %y.0 = phi double* [ null, %if.then ], [ %x, %if.then3 ], [ null, %if.else ]
+  %call5 = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([4 x i8]* @.str, i64 0, i64 0), double* %y.0) nounwind
+  ret void
+}
+
+; test10d: Addr-of in phi instruction
+;          sspreq attribute
+; Requires protector.
+define void @test10d() nounwind uwtable sspreq {
+entry:
+; LINUX-I386: test10d:
+; LINUX-I386: mov{{l|q}} %gs:
+; LINUX-I386: calll __stack_chk_fail
+
+; LINUX-X64: test10d:
+; LINUX-X64: mov{{l|q}} %fs:
+; LINUX-X64: callq __stack_chk_fail
+
+; LINUX-KERNEL-X64: test10d:
+; LINUX-KERNEL-X64: mov{{l|q}} %gs:
+; LINUX-KERNEL-X64: callq __stack_chk_fail
+
+; DARWIN-X64: test10d:
+; DARWIN-X64: mov{{l|q}} ___stack_chk_guard
+; DARWIN-X64: callq ___stack_chk_fail
+  %x = alloca double, align 8
+  %call = call double @testi_aux() nounwind
+  store double %call, double* %x, align 8
+  %cmp = fcmp ogt double %call, 3.140000e+00
+  br i1 %cmp, label %if.then, label %if.else
+
+if.then:                                          ; preds = %entry
+  %call1 = call double @testi_aux() nounwind
+  store double %call1, double* %x, align 8
+  br label %if.end4
+
+if.else:                                          ; preds = %entry
+  %cmp2 = fcmp ogt double %call, 1.000000e+00
+  br i1 %cmp2, label %if.then3, label %if.end4
+
+if.then3:                                         ; preds = %if.else
+  br label %if.end4
+
+if.end4:                                          ; preds = %if.else, %if.then3, %if.then
+  %y.0 = phi double* [ null, %if.then ], [ %x, %if.then3 ], [ null, %if.else ]
+  %call5 = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([4 x i8]* @.str, i64 0, i64 0), double* %y.0) nounwind
+  ret void
+}
+
+; test11a: Addr-of struct element. (GEP followed by store).
+;          no ssp attribute
+; Requires no protector.
+define void @test11a() nounwind uwtable {
+entry:
+; LINUX-I386: test11a:
+; LINUX-I386-NOT: calll __stack_chk_fail
+; LINUX-I386: .cfi_endproc
+
+; LINUX-X64: test11a:
+; LINUX-X64-NOT: callq __stack_chk_fail
+; LINUX-X64: .cfi_endproc
+
+; LINUX-KERNEL-X64: test11a:
+; LINUX-KERNEL-X64-NOT: callq __stack_chk_fail
+; LINUX-KERNEL-X64: .cfi_endproc
+
+; DARWIN-X64: test11a:
+; DARWIN-X64-NOT: callq ___stack_chk_fail
+; DARWIN-X64: .cfi_endproc
+  %c = alloca %struct.pair, align 4
+  %b = alloca i32*, align 8
+  %y = getelementptr inbounds %struct.pair* %c, i32 0, i32 1
+  store i32* %y, i32** %b, align 8
+  %0 = load i32** %b, align 8
+  %call = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([4 x i8]* @.str, i32 0, i32 0), i32* %0)
+  ret void
+}
+
+; test11b: Addr-of struct element. (GEP followed by store).
+;          ssp attribute
+; Requires no protector.
+define void @test11b() nounwind uwtable ssp {
+entry:
+; LINUX-I386: test11b:
+; LINUX-I386-NOT: calll __stack_chk_fail
+; LINUX-I386: .cfi_endproc
+
+; LINUX-X64: test11b:
+; LINUX-X64-NOT: callq __stack_chk_fail
+; LINUX-X64: .cfi_endproc
+
+; LINUX-KERNEL-X64: test11b:
+; LINUX-KERNEL-X64-NOT: callq __stack_chk_fail
+; LINUX-KERNEL-X64: .cfi_endproc
+
+; DARWIN-X64: test11b:
+; DARWIN-X64-NOT: callq ___stack_chk_fail
+; DARWIN-X64: .cfi_endproc
+  %c = alloca %struct.pair, align 4
+  %b = alloca i32*, align 8
+  %y = getelementptr inbounds %struct.pair* %c, i32 0, i32 1
+  store i32* %y, i32** %b, align 8
+  %0 = load i32** %b, align 8
+  %call = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([4 x i8]* @.str, i32 0, i32 0), i32* %0)
+  ret void
+}
+
+; test11c: Addr-of struct element. (GEP followed by store).
+;          sspstrong attribute
+; Requires protector.
+define void @test11c() nounwind uwtable sspstrong {
+entry:
+; LINUX-I386: test11c:
+; LINUX-I386: mov{{l|q}} %gs:
+; LINUX-I386: calll __stack_chk_fail
+
+; LINUX-X64: test11c:
+; LINUX-X64: mov{{l|q}} %fs:
+; LINUX-X64: callq __stack_chk_fail
+
+; LINUX-KERNEL-X64: test11c:
+; LINUX-KERNEL-X64: mov{{l|q}} %gs:
+; LINUX-KERNEL-X64: callq __stack_chk_fail
+
+; DARWIN-X64: test11c:
+; DARWIN-X64: mov{{l|q}} ___stack_chk_guard
+; DARWIN-X64: callq ___stack_chk_fail
+  %c = alloca %struct.pair, align 4
+  %b = alloca i32*, align 8
+  %y = getelementptr inbounds %struct.pair* %c, i32 0, i32 1
+  store i32* %y, i32** %b, align 8
+  %0 = load i32** %b, align 8
+  %call = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([4 x i8]* @.str, i32 0, i32 0), i32* %0)
+  ret void
+}
+
+; test11d: Addr-of struct element. (GEP followed by store).
+;          sspreq attribute
+; Requires protector.
+define void @test11d() nounwind uwtable sspreq {
+entry:
+; LINUX-I386: test11d:
+; LINUX-I386: mov{{l|q}} %gs:
+; LINUX-I386: calll __stack_chk_fail
+
+; LINUX-X64: test11d:
+; LINUX-X64: mov{{l|q}} %fs:
+; LINUX-X64: callq __stack_chk_fail
+
+; LINUX-KERNEL-X64: test11d:
+; LINUX-KERNEL-X64: mov{{l|q}} %gs:
+; LINUX-KERNEL-X64: callq __stack_chk_fail
+
+; DARWIN-X64: test11d:
+; DARWIN-X64: mov{{l|q}} ___stack_chk_guard
+; DARWIN-X64: callq ___stack_chk_fail
+  %c = alloca %struct.pair, align 4
+  %b = alloca i32*, align 8
+  %y = getelementptr inbounds %struct.pair* %c, i32 0, i32 1
+  store i32* %y, i32** %b, align 8
+  %0 = load i32** %b, align 8
+  %call = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([4 x i8]* @.str, i32 0, i32 0), i32* %0)
+  ret void
+}
+
+; test12a: Addr-of struct element, GEP followed by ptrtoint.
+;          no ssp attribute
+; Requires no protector.
+define void @test12a() nounwind uwtable {
+entry:
+; LINUX-I386: test12a:
+; LINUX-I386-NOT: calll __stack_chk_fail
+; LINUX-I386: .cfi_endproc
+
+; LINUX-X64: test12a:
+; LINUX-X64-NOT: callq __stack_chk_fail
+; LINUX-X64: .cfi_endproc
+
+; LINUX-KERNEL-X64: test12a:
+; LINUX-KERNEL-X64-NOT: callq __stack_chk_fail
+; LINUX-KERNEL-X64: .cfi_endproc
+
+; DARWIN-X64: test12a:
+; DARWIN-X64-NOT: callq ___stack_chk_fail
+; DARWIN-X64: .cfi_endproc
+  %c = alloca %struct.pair, align 4
+  %b = alloca i32*, align 8
+  %y = getelementptr inbounds %struct.pair* %c, i32 0, i32 1
+  %0 = ptrtoint i32* %y to i64
+  %call = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([4 x i8]* @.str, i32 0, i32 0), i64 %0)
+  ret void
+}
+
+; test12b: Addr-of struct element, GEP followed by ptrtoint.
+;          ssp attribute
+; Requires no protector.
+define void @test12b() nounwind uwtable ssp {
+entry:
+; LINUX-I386: test12b:
+; LINUX-I386-NOT: calll __stack_chk_fail
+; LINUX-I386: .cfi_endproc
+
+; LINUX-X64: test12b:
+; LINUX-X64-NOT: callq __stack_chk_fail
+; LINUX-X64: .cfi_endproc
+
+; LINUX-KERNEL-X64: test12b:
+; LINUX-KERNEL-X64-NOT: callq __stack_chk_fail
+; LINUX-KERNEL-X64: .cfi_endproc
+
+; DARWIN-X64: test12b:
+; DARWIN-X64-NOT: callq ___stack_chk_fail
+; DARWIN-X64: .cfi_endproc
+  %c = alloca %struct.pair, align 4
+  %b = alloca i32*, align 8
+  %y = getelementptr inbounds %struct.pair* %c, i32 0, i32 1
+  %0 = ptrtoint i32* %y to i64
+  %call = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([4 x i8]* @.str, i32 0, i32 0), i64 %0)
+  ret void
+}
+
+; test12c: Addr-of struct element, GEP followed by ptrtoint.
+;          sspstrong attribute
+; Requires protector.
+define void @test12c() nounwind uwtable sspstrong {
+entry:
+; LINUX-I386: test12c:
+; LINUX-I386: mov{{l|q}} %gs:
+; LINUX-I386: calll __stack_chk_fail
+
+; LINUX-X64: test12c:
+; LINUX-X64: mov{{l|q}} %fs:
+; LINUX-X64: callq __stack_chk_fail
+
+; LINUX-KERNEL-X64: test12c:
+; LINUX-KERNEL-X64: mov{{l|q}} %gs:
+; LINUX-KERNEL-X64: callq __stack_chk_fail
+
+; DARWIN-X64: test12c:
+; DARWIN-X64: mov{{l|q}} ___stack_chk_guard
+; DARWIN-X64: callq ___stack_chk_fail
+  %c = alloca %struct.pair, align 4
+  %b = alloca i32*, align 8
+  %y = getelementptr inbounds %struct.pair* %c, i32 0, i32 1
+  %0 = ptrtoint i32* %y to i64
+  %call = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([4 x i8]* @.str, i32 0, i32 0), i64 %0)
+  ret void
+}
+
+; test12d: Addr-of struct element, GEP followed by ptrtoint.
+;          sspreq attribute
+; Requires protector.
+define void @test12d() nounwind uwtable sspreq {
+entry:
+; LINUX-I386: test12d:
+; LINUX-I386: mov{{l|q}} %gs:
+; LINUX-I386: calll __stack_chk_fail
+
+; LINUX-X64: test12d:
+; LINUX-X64: mov{{l|q}} %fs:
+; LINUX-X64: callq __stack_chk_fail
+
+; LINUX-KERNEL-X64: test12d:
+; LINUX-KERNEL-X64: mov{{l|q}} %gs:
+; LINUX-KERNEL-X64: callq __stack_chk_fail
+
+; DARWIN-X64: test12d:
+; DARWIN-X64: mov{{l|q}} ___stack_chk_guard
+; DARWIN-X64: callq ___stack_chk_fail
+  %c = alloca %struct.pair, align 4
+  %b = alloca i32*, align 8
+  %y = getelementptr inbounds %struct.pair* %c, i32 0, i32 1
+  %0 = ptrtoint i32* %y to i64
+  %call = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([4 x i8]* @.str, i32 0, i32 0), i64 %0)
+  ret void
+}
+
+; test13a: Addr-of struct element, GEP followed by callinst.
+;          no ssp attribute
+; Requires no protector.
+define void @test13a() nounwind uwtable {
+entry:
+; LINUX-I386: test13a:
+; LINUX-I386-NOT: calll __stack_chk_fail
+; LINUX-I386: .cfi_endproc
+
+; LINUX-X64: test13a:
+; LINUX-X64-NOT: callq __stack_chk_fail
+; LINUX-X64: .cfi_endproc
+
+; LINUX-KERNEL-X64: test13a:
+; LINUX-KERNEL-X64-NOT: callq __stack_chk_fail
+; LINUX-KERNEL-X64: .cfi_endproc
+
+; DARWIN-X64: test13a:
+; DARWIN-X64-NOT: callq ___stack_chk_fail
+; DARWIN-X64: .cfi_endproc
+  %c = alloca %struct.pair, align 4
+  %y = getelementptr inbounds %struct.pair* %c, i64 0, i32 1
+  %call = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([4 x i8]* @.str, i64 0, i64 0), i32* %y) nounwind
+  ret void
+}
+
+; test13b: Addr-of struct element, GEP followed by callinst.
+;          ssp attribute
+; Requires no protector.
+define void @test13b() nounwind uwtable ssp {
+entry:
+; LINUX-I386: test13b:
+; LINUX-I386-NOT: calll __stack_chk_fail
+; LINUX-I386: .cfi_endproc
+
+; LINUX-X64: test13b:
+; LINUX-X64-NOT: callq __stack_chk_fail
+; LINUX-X64: .cfi_endproc
+
+; LINUX-KERNEL-X64: test13b:
+; LINUX-KERNEL-X64-NOT: callq __stack_chk_fail
+; LINUX-KERNEL-X64: .cfi_endproc
+
+; DARWIN-X64: test13b:
+; DARWIN-X64-NOT: callq ___stack_chk_fail
+; DARWIN-X64: .cfi_endproc
+  %c = alloca %struct.pair, align 4
+  %y = getelementptr inbounds %struct.pair* %c, i64 0, i32 1
+  %call = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([4 x i8]* @.str, i64 0, i64 0), i32* %y) nounwind
+  ret void
+}
+
+; test13c: Addr-of struct element, GEP followed by callinst.
+;          sspstrong attribute
+; Requires protector.
+define void @test13c() nounwind uwtable sspstrong {
+entry:
+; LINUX-I386: test13c:
+; LINUX-I386: mov{{l|q}} %gs:
+; LINUX-I386: calll __stack_chk_fail
+
+; LINUX-X64: test13c:
+; LINUX-X64: mov{{l|q}} %fs:
+; LINUX-X64: callq __stack_chk_fail
+
+; LINUX-KERNEL-X64: test13c:
+; LINUX-KERNEL-X64: mov{{l|q}} %gs:
+; LINUX-KERNEL-X64: callq __stack_chk_fail
+
+; DARWIN-X64: test13c:
+; DARWIN-X64: mov{{l|q}} ___stack_chk_guard
+; DARWIN-X64: callq ___stack_chk_fail
+  %c = alloca %struct.pair, align 4
+  %y = getelementptr inbounds %struct.pair* %c, i64 0, i32 1
+  %call = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([4 x i8]* @.str, i64 0, i64 0), i32* %y) nounwind
+  ret void
+}
+
+; test13d: Addr-of struct element, GEP followed by callinst.
+;          sspreq attribute
+; Requires protector.
+define void @test13d() nounwind uwtable sspreq {
 entry:
-	%a_addr = alloca i8*		; <i8**> [#uses=2]
-	%buf = alloca [8 x i8]		; <[8 x i8]*> [#uses=2]
-	%"alloca point" = bitcast i32 0 to i32		; <i32> [#uses=0]
-	store i8* %a, i8** %a_addr
-	%buf1 = bitcast [8 x i8]* %buf to i8*		; <i8*> [#uses=1]
-	%0 = load i8** %a_addr, align 4		; <i8*> [#uses=1]
-	%1 = call i8* @strcpy(i8* %buf1, i8* %0) nounwind		; <i8*> [#uses=0]
-	%buf2 = bitcast [8 x i8]* %buf to i8*		; <i8*> [#uses=1]
-	%2 = call i32 (i8*, ...)* @printf(i8* getelementptr ([11 x i8]* @"\01LC", i32 0, i32 0), i8* %buf2) nounwind		; <i32> [#uses=0]
-	br label %return
+; LINUX-I386: test13d:
+; LINUX-I386: mov{{l|q}} %gs:
+; LINUX-I386: calll __stack_chk_fail
+
+; LINUX-X64: test13d:
+; LINUX-X64: mov{{l|q}} %fs:
+; LINUX-X64: callq __stack_chk_fail
 
-return:		; preds = %entry
-	ret void
+; LINUX-KERNEL-X64: test13d:
+; LINUX-KERNEL-X64: mov{{l|q}} %gs:
+; LINUX-KERNEL-X64: callq __stack_chk_fail
+
+; DARWIN-X64: test13d:
+; DARWIN-X64: mov{{l|q}} ___stack_chk_guard
+; DARWIN-X64: callq ___stack_chk_fail
+  %c = alloca %struct.pair, align 4
+  %y = getelementptr inbounds %struct.pair* %c, i64 0, i32 1
+  %call = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([4 x i8]* @.str, i64 0, i64 0), i32* %y) nounwind
+  ret void
 }
 
-declare i8* @strcpy(i8*, i8*) nounwind
+; test14a: Addr-of a local, optimized into a GEP (e.g., &a - 12)
+;          no ssp attribute
+; Requires no protector.
+define void @test14a() nounwind uwtable {
+entry:
+; LINUX-I386: test14a:
+; LINUX-I386-NOT: calll __stack_chk_fail
+; LINUX-I386: .cfi_endproc
+
+; LINUX-X64: test14a:
+; LINUX-X64-NOT: callq __stack_chk_fail
+; LINUX-X64: .cfi_endproc
+
+; LINUX-KERNEL-X64: test14a:
+; LINUX-KERNEL-X64-NOT: callq __stack_chk_fail
+; LINUX-KERNEL-X64: .cfi_endproc
+
+; DARWIN-X64: test14a:
+; DARWIN-X64-NOT: callq ___stack_chk_fail
+; DARWIN-X64: .cfi_endproc
+  %a = alloca i32, align 4
+  %add.ptr5 = getelementptr inbounds i32* %a, i64 -12
+  %call = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([4 x i8]* @.str, i64 0, i64 0), i32* %add.ptr5) nounwind
+  ret void
+}
+
+; test14b: Addr-of a local, optimized into a GEP (e.g., &a - 12)
+;          ssp attribute
+; Requires no protector.
+define void @test14b() nounwind uwtable ssp {
+entry:
+; LINUX-I386: test14b:
+; LINUX-I386-NOT: calll __stack_chk_fail
+; LINUX-I386: .cfi_endproc
+
+; LINUX-X64: test14b:
+; LINUX-X64-NOT: callq __stack_chk_fail
+; LINUX-X64: .cfi_endproc
+
+; LINUX-KERNEL-X64: test14b:
+; LINUX-KERNEL-X64-NOT: callq __stack_chk_fail
+; LINUX-KERNEL-X64: .cfi_endproc
+
+; DARWIN-X64: test14b:
+; DARWIN-X64-NOT: callq ___stack_chk_fail
+; DARWIN-X64: .cfi_endproc
+  %a = alloca i32, align 4
+  %add.ptr5 = getelementptr inbounds i32* %a, i64 -12
+  %call = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([4 x i8]* @.str, i64 0, i64 0), i32* %add.ptr5) nounwind
+  ret void
+}
+
+; test14c: Addr-of a local, optimized into a GEP (e.g., &a - 12)
+;          sspstrong attribute
+; Requires protector.
+define void @test14c() nounwind uwtable sspstrong {
+entry:
+; LINUX-I386: test14c:
+; LINUX-I386: mov{{l|q}} %gs:
+; LINUX-I386: calll __stack_chk_fail
+
+; LINUX-X64: test14c:
+; LINUX-X64: mov{{l|q}} %fs:
+; LINUX-X64: callq __stack_chk_fail
+
+; LINUX-KERNEL-X64: test14c:
+; LINUX-KERNEL-X64: mov{{l|q}} %gs:
+; LINUX-KERNEL-X64: callq __stack_chk_fail
+
+; DARWIN-X64: test14c:
+; DARWIN-X64: mov{{l|q}} ___stack_chk_guard
+; DARWIN-X64: callq ___stack_chk_fail
+  %a = alloca i32, align 4
+  %add.ptr5 = getelementptr inbounds i32* %a, i64 -12
+  %call = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([4 x i8]* @.str, i64 0, i64 0), i32* %add.ptr5) nounwind
+  ret void
+}
+
+; test14d: Addr-of a local, optimized into a GEP (e.g., &a - 12)
+;          sspreq  attribute
+; Requires protector.
+define void @test14d() nounwind uwtable sspreq {
+entry:
+; LINUX-I386: test14d:
+; LINUX-I386: mov{{l|q}} %gs:
+; LINUX-I386: calll __stack_chk_fail
+
+; LINUX-X64: test14d:
+; LINUX-X64: mov{{l|q}} %fs:
+; LINUX-X64: callq __stack_chk_fail
+
+; LINUX-KERNEL-X64: test14d:
+; LINUX-KERNEL-X64: mov{{l|q}} %gs:
+; LINUX-KERNEL-X64: callq __stack_chk_fail
+
+; DARWIN-X64: test14d:
+; DARWIN-X64: mov{{l|q}} ___stack_chk_guard
+; DARWIN-X64: callq ___stack_chk_fail
+  %a = alloca i32, align 4
+  %add.ptr5 = getelementptr inbounds i32* %a, i64 -12
+  %call = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([4 x i8]* @.str, i64 0, i64 0), i32* %add.ptr5) nounwind
+  ret void
+}
+
+; test15a: Addr-of a local cast to a ptr of a different type
+;           (e.g., int a; ... ; float *b = &a;)
+;          no ssp attribute
+; Requires no protector.
+define void @test15a() nounwind uwtable {
+entry:
+; LINUX-I386: test15a:
+; LINUX-I386-NOT: calll __stack_chk_fail
+; LINUX-I386: .cfi_endproc
+
+; LINUX-X64: test15a:
+; LINUX-X64-NOT: callq __stack_chk_fail
+; LINUX-X64: .cfi_endproc
+
+; LINUX-KERNEL-X64: test15a:
+; LINUX-KERNEL-X64-NOT: callq __stack_chk_fail
+; LINUX-KERNEL-X64: .cfi_endproc
+
+; DARWIN-X64: test15a:
+; DARWIN-X64-NOT: callq ___stack_chk_fail
+; DARWIN-X64: .cfi_endproc
+  %a = alloca i32, align 4
+  %b = alloca float*, align 8
+  store i32 0, i32* %a, align 4
+  %0 = bitcast i32* %a to float*
+  store float* %0, float** %b, align 8
+  %1 = load float** %b, align 8
+  %call = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([4 x i8]* @.str, i32 0, i32 0), float* %1)
+  ret void
+}
+
+; test15b: Addr-of a local cast to a ptr of a different type
+;           (e.g., int a; ... ; float *b = &a;)
+;          ssp attribute
+; Requires no protector.
+define void @test15b() nounwind uwtable ssp {
+entry:
+; LINUX-I386: test15b:
+; LINUX-I386-NOT: calll __stack_chk_fail
+; LINUX-I386: .cfi_endproc
+
+; LINUX-X64: test15b:
+; LINUX-X64-NOT: callq __stack_chk_fail
+; LINUX-X64: .cfi_endproc
+
+; LINUX-KERNEL-X64: test15b:
+; LINUX-KERNEL-X64-NOT: callq __stack_chk_fail
+; LINUX-KERNEL-X64: .cfi_endproc
+
+; DARWIN-X64: test15b:
+; DARWIN-X64-NOT: callq ___stack_chk_fail
+; DARWIN-X64: .cfi_endproc
+  %a = alloca i32, align 4
+  %b = alloca float*, align 8
+  store i32 0, i32* %a, align 4
+  %0 = bitcast i32* %a to float*
+  store float* %0, float** %b, align 8
+  %1 = load float** %b, align 8
+  %call = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([4 x i8]* @.str, i32 0, i32 0), float* %1)
+  ret void
+}
+
+; test15c: Addr-of a local cast to a ptr of a different type
+;           (e.g., int a; ... ; float *b = &a;)
+;          sspstrong attribute
+; Requires protector.
+define void @test15c() nounwind uwtable sspstrong {
+entry:
+; LINUX-I386: test15c:
+; LINUX-I386: mov{{l|q}} %gs:
+; LINUX-I386: calll __stack_chk_fail
+
+; LINUX-X64: test15c:
+; LINUX-X64: mov{{l|q}} %fs:
+; LINUX-X64: callq __stack_chk_fail
+
+; LINUX-KERNEL-X64: test15c:
+; LINUX-KERNEL-X64: mov{{l|q}} %gs:
+; LINUX-KERNEL-X64: callq __stack_chk_fail
+
+; DARWIN-X64: test15c:
+; DARWIN-X64: mov{{l|q}} ___stack_chk_guard
+; DARWIN-X64: callq ___stack_chk_fail
+  %a = alloca i32, align 4
+  %b = alloca float*, align 8
+  store i32 0, i32* %a, align 4
+  %0 = bitcast i32* %a to float*
+  store float* %0, float** %b, align 8
+  %1 = load float** %b, align 8
+  %call = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([4 x i8]* @.str, i32 0, i32 0), float* %1)
+  ret void
+}
+
+; test15d: Addr-of a local cast to a ptr of a different type
+;           (e.g., int a; ... ; float *b = &a;)
+;          sspreq attribute
+; Requires protector.
+define void @test15d() nounwind uwtable sspreq {
+entry:
+; LINUX-I386: test15d:
+; LINUX-I386: mov{{l|q}} %gs:
+; LINUX-I386: calll __stack_chk_fail
+
+; LINUX-X64: test15d:
+; LINUX-X64: mov{{l|q}} %fs:
+; LINUX-X64: callq __stack_chk_fail
+
+; LINUX-KERNEL-X64: test15d:
+; LINUX-KERNEL-X64: mov{{l|q}} %gs:
+; LINUX-KERNEL-X64: callq __stack_chk_fail
+
+; DARWIN-X64: test15d:
+; DARWIN-X64: mov{{l|q}} ___stack_chk_guard
+; DARWIN-X64: callq ___stack_chk_fail
+  %a = alloca i32, align 4
+  %b = alloca float*, align 8
+  store i32 0, i32* %a, align 4
+  %0 = bitcast i32* %a to float*
+  store float* %0, float** %b, align 8
+  %1 = load float** %b, align 8
+  %call = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([4 x i8]* @.str, i32 0, i32 0), float* %1)
+  ret void
+}
+
+; test16a: Addr-of a local cast to a ptr of a different type (optimized)
+;           (e.g., int a; ... ; float *b = &a;)
+;          no ssp attribute
+; Requires no protector.
+define void @test16a() nounwind uwtable {
+entry:
+; LINUX-I386: test16a:
+; LINUX-I386-NOT: calll __stack_chk_fail
+; LINUX-I386: .cfi_endproc
+
+; LINUX-X64: test16a:
+; LINUX-X64-NOT: callq __stack_chk_fail
+; LINUX-X64: .cfi_endproc
+
+; LINUX-KERNEL-X64: test16a:
+; LINUX-KERNEL-X64-NOT: callq __stack_chk_fail
+; LINUX-KERNEL-X64: .cfi_endproc
+
+; DARWIN-X64: test16a:
+; DARWIN-X64-NOT: callq ___stack_chk_fail
+; DARWIN-X64: .cfi_endproc
+  %a = alloca i32, align 4
+  store i32 0, i32* %a, align 4
+  %0 = bitcast i32* %a to float*
+  call void @funfloat(float* %0) nounwind
+  ret void
+}
+
+; test16b: Addr-of a local cast to a ptr of a different type (optimized)
+;           (e.g., int a; ... ; float *b = &a;)
+;          ssp attribute
+; Requires no protector.
+define void @test16b() nounwind uwtable ssp {
+entry:
+; LINUX-I386: test16b:
+; LINUX-I386-NOT: calll __stack_chk_fail
+; LINUX-I386: .cfi_endproc
+
+; LINUX-X64: test16b:
+; LINUX-X64-NOT: callq __stack_chk_fail
+; LINUX-X64: .cfi_endproc
+
+; LINUX-KERNEL-X64: test16b:
+; LINUX-KERNEL-X64-NOT: callq __stack_chk_fail
+; LINUX-KERNEL-X64: .cfi_endproc
+
+; DARWIN-X64: test16b:
+; DARWIN-X64-NOT: callq ___stack_chk_fail
+; DARWIN-X64: .cfi_endproc
+  %a = alloca i32, align 4
+  store i32 0, i32* %a, align 4
+  %0 = bitcast i32* %a to float*
+  call void @funfloat(float* %0) nounwind
+  ret void
+}
+
+; test16c: Addr-of a local cast to a ptr of a different type (optimized)
+;           (e.g., int a; ... ; float *b = &a;)
+;          sspstrong attribute
+; Requires protector.
+define void @test16c() nounwind uwtable sspstrong {
+entry:
+; LINUX-I386: test16c:
+; LINUX-I386: mov{{l|q}} %gs:
+; LINUX-I386: calll __stack_chk_fail
+
+; LINUX-X64: test16c:
+; LINUX-X64: mov{{l|q}} %fs:
+; LINUX-X64: callq __stack_chk_fail
+
+; LINUX-KERNEL-X64: test16c:
+; LINUX-KERNEL-X64: mov{{l|q}} %gs:
+; LINUX-KERNEL-X64: callq __stack_chk_fail
+
+; DARWIN-X64: test16c:
+; DARWIN-X64: mov{{l|q}} ___stack_chk_guard
+; DARWIN-X64: callq ___stack_chk_fail
+  %a = alloca i32, align 4
+  store i32 0, i32* %a, align 4
+  %0 = bitcast i32* %a to float*
+  call void @funfloat(float* %0) nounwind
+  ret void
+}
+
+; test16d: Addr-of a local cast to a ptr of a different type (optimized)
+;           (e.g., int a; ... ; float *b = &a;)
+;          sspreq attribute
+; Requires protector.
+define void @test16d() nounwind uwtable sspreq {
+entry:
+; LINUX-I386: test16d:
+; LINUX-I386: mov{{l|q}} %gs:
+; LINUX-I386: calll __stack_chk_fail
+
+; LINUX-X64: test16d:
+; LINUX-X64: mov{{l|q}} %fs:
+; LINUX-X64: callq __stack_chk_fail
+
+; LINUX-KERNEL-X64: test16d:
+; LINUX-KERNEL-X64: mov{{l|q}} %gs:
+; LINUX-KERNEL-X64: callq __stack_chk_fail
+
+; DARWIN-X64: test16d:
+; DARWIN-X64: mov{{l|q}} ___stack_chk_guard
+; DARWIN-X64: callq ___stack_chk_fail
+  %a = alloca i32, align 4
+  store i32 0, i32* %a, align 4
+  %0 = bitcast i32* %a to float*
+  call void @funfloat(float* %0) nounwind
+  ret void
+}
+
+; test17a: Addr-of a vector nested in a struct
+;          no ssp attribute
+; Requires no protector.
+define void @test17a() nounwind uwtable {
+entry:
+; LINUX-I386: test17a:
+; LINUX-I386-NOT: calll __stack_chk_fail
+; LINUX-I386: .cfi_endproc
+
+; LINUX-X64: test17a:
+; LINUX-X64-NOT: callq __stack_chk_fail
+; LINUX-X64: .cfi_endproc
+
+; LINUX-KERNEL-X64: test17a:
+; LINUX-KERNEL-X64-NOT: callq __stack_chk_fail
+; LINUX-KERNEL-X64: .cfi_endproc
+
+; DARWIN-X64: test17a:
+; DARWIN-X64-NOT: callq ___stack_chk_fail
+; DARWIN-X64: .cfi_endproc
+  %c = alloca %struct.vec, align 16
+  %y = getelementptr inbounds %struct.vec* %c, i64 0, i32 0
+  %add.ptr = getelementptr inbounds <4 x i32>* %y, i64 -12
+  %call = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([4 x i8]* @.str, i64 0, i64 0), <4 x i32>* %add.ptr) nounwind
+  ret void
+}
+
+; test17b: Addr-of a vector nested in a struct
+;          ssp attribute
+; Requires no protector.
+define void @test17b() nounwind uwtable ssp {
+entry:
+; LINUX-I386: test17b:
+; LINUX-I386-NOT: calll __stack_chk_fail
+; LINUX-I386: .cfi_endproc
+
+; LINUX-X64: test17b:
+; LINUX-X64-NOT: callq __stack_chk_fail
+; LINUX-X64: .cfi_endproc
+
+; LINUX-KERNEL-X64: test17b:
+; LINUX-KERNEL-X64-NOT: callq __stack_chk_fail
+; LINUX-KERNEL-X64: .cfi_endproc
+
+; DARWIN-X64: test17b:
+; DARWIN-X64-NOT: callq ___stack_chk_fail
+; DARWIN-X64: .cfi_endproc
+  %c = alloca %struct.vec, align 16
+  %y = getelementptr inbounds %struct.vec* %c, i64 0, i32 0
+  %add.ptr = getelementptr inbounds <4 x i32>* %y, i64 -12
+  %call = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([4 x i8]* @.str, i64 0, i64 0), <4 x i32>* %add.ptr) nounwind
+  ret void
+}
+
+; test17c: Addr-of a vector nested in a struct
+;          sspstrong attribute
+; Requires protector.
+define void @test17c() nounwind uwtable sspstrong {
+entry:
+; LINUX-I386: test17c:
+; LINUX-I386: mov{{l|q}} %gs:
+; LINUX-I386: calll __stack_chk_fail
+
+; LINUX-X64: test17c:
+; LINUX-X64: mov{{l|q}} %fs:
+; LINUX-X64: callq __stack_chk_fail
+
+; LINUX-KERNEL-X64: test17c:
+; LINUX-KERNEL-X64: mov{{l|q}} %gs:
+; LINUX-KERNEL-X64: callq __stack_chk_fail
+
+; DARWIN-X64: test17c:
+; DARWIN-X64: mov{{l|q}} ___stack_chk_guard
+; DARWIN-X64: callq ___stack_chk_fail
+  %c = alloca %struct.vec, align 16
+  %y = getelementptr inbounds %struct.vec* %c, i64 0, i32 0
+  %add.ptr = getelementptr inbounds <4 x i32>* %y, i64 -12
+  %call = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([4 x i8]* @.str, i64 0, i64 0), <4 x i32>* %add.ptr) nounwind
+  ret void
+}
+
+; test17d: Addr-of a vector nested in a struct
+;          sspreq attribute
+; Requires protector.
+define void @test17d() nounwind uwtable sspreq {
+entry:
+; LINUX-I386: test17d:
+; LINUX-I386: mov{{l|q}} %gs:
+; LINUX-I386: calll __stack_chk_fail
+
+; LINUX-X64: test17d:
+; LINUX-X64: mov{{l|q}} %fs:
+; LINUX-X64: callq __stack_chk_fail
+
+; LINUX-KERNEL-X64: test17d:
+; LINUX-KERNEL-X64: mov{{l|q}} %gs:
+; LINUX-KERNEL-X64: callq __stack_chk_fail
+
+; DARWIN-X64: test17d:
+; DARWIN-X64: mov{{l|q}} ___stack_chk_guard
+; DARWIN-X64: callq ___stack_chk_fail
+  %c = alloca %struct.vec, align 16
+  %y = getelementptr inbounds %struct.vec* %c, i64 0, i32 0
+  %add.ptr = getelementptr inbounds <4 x i32>* %y, i64 -12
+  %call = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([4 x i8]* @.str, i64 0, i64 0), <4 x i32>* %add.ptr) nounwind
+  ret void
+}
+
+; test18a: Addr-of a variable passed into an invoke instruction.
+;          no ssp attribute
+; Requires no protector.
+define i32 @test18a() uwtable {
+entry:
+; LINUX-I386: test18a:
+; LINUX-I386-NOT: calll __stack_chk_fail
+; LINUX-I386: .cfi_endproc
+
+; LINUX-X64: test18a:
+; LINUX-X64-NOT: callq __stack_chk_fail
+; LINUX-X64: .cfi_endproc
+
+; LINUX-KERNEL-X64: test18a:
+; LINUX-KERNEL-X64-NOT: callq __stack_chk_fail
+; LINUX-KERNEL-X64: .cfi_endproc
+
+; DARWIN-X64: test18a:
+; DARWIN-X64-NOT: callq ___stack_chk_fail
+; DARWIN-X64: .cfi_endproc
+  %a = alloca i32, align 4
+  %exn.slot = alloca i8*
+  %ehselector.slot = alloca i32
+  store i32 0, i32* %a, align 4
+  invoke void @_Z3exceptPi(i32* %a)
+          to label %invoke.cont unwind label %lpad
+
+invoke.cont:
+  ret i32 0
+
+lpad:
+  %0 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*)
+          catch i8* null
+  ret i32 0
+}
+
+; test18b: Addr-of a variable passed into an invoke instruction.
+;          ssp attribute
+; Requires no protector.
+define i32 @test18b() uwtable ssp {
+entry:
+; LINUX-I386: test18b:
+; LINUX-I386-NOT: calll __stack_chk_fail
+; LINUX-I386: .cfi_endproc
+
+; LINUX-X64: test18b:
+; LINUX-X64-NOT: callq __stack_chk_fail
+; LINUX-X64: .cfi_endproc
+
+; LINUX-KERNEL-X64: test18b:
+; LINUX-KERNEL-X64-NOT: callq __stack_chk_fail
+; LINUX-KERNEL-X64: .cfi_endproc
+
+; DARWIN-X64: test18b:
+; DARWIN-X64-NOT: callq ___stack_chk_fail
+; DARWIN-X64: .cfi_endproc
+  %a = alloca i32, align 4
+  %exn.slot = alloca i8*
+  %ehselector.slot = alloca i32
+  store i32 0, i32* %a, align 4
+  invoke void @_Z3exceptPi(i32* %a)
+          to label %invoke.cont unwind label %lpad
+
+invoke.cont:
+  ret i32 0
+
+lpad:
+  %0 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*)
+          catch i8* null
+  ret i32 0
+}
+
+; test18c: Addr-of a variable passed into an invoke instruction.
+;          sspstrong attribute
+; Requires protector.
+define i32 @test18c() uwtable sspstrong {
+entry:
+; LINUX-I386: test18c:
+; LINUX-I386: mov{{l|q}} %gs:
+; LINUX-I386: calll __stack_chk_fail
+
+; LINUX-X64: test18c:
+; LINUX-X64: mov{{l|q}} %fs:
+; LINUX-X64: callq __stack_chk_fail
+
+; LINUX-KERNEL-X64: test18c:
+; LINUX-KERNEL-X64: mov{{l|q}} %gs:
+; LINUX-KERNEL-X64: callq __stack_chk_fail
+
+; DARWIN-X64: test18c:
+; DARWIN-X64: mov{{l|q}} ___stack_chk_guard
+; DARWIN-X64: callq ___stack_chk_fail
+  %a = alloca i32, align 4
+  %exn.slot = alloca i8*
+  %ehselector.slot = alloca i32
+  store i32 0, i32* %a, align 4
+  invoke void @_Z3exceptPi(i32* %a)
+          to label %invoke.cont unwind label %lpad
+
+invoke.cont:
+  ret i32 0
+
+lpad:
+  %0 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*)
+          catch i8* null
+  ret i32 0
+}
+
+; test18d: Addr-of a variable passed into an invoke instruction.
+;          sspreq attribute
+; Requires protector.
+define i32 @test18d() uwtable sspreq {
+entry:
+; LINUX-I386: test18d:
+; LINUX-I386: mov{{l|q}} %gs:
+; LINUX-I386: calll __stack_chk_fail
+
+; LINUX-X64: test18d:
+; LINUX-X64: mov{{l|q}} %fs:
+; LINUX-X64: callq __stack_chk_fail
+
+; LINUX-KERNEL-X64: test18d:
+; LINUX-KERNEL-X64: mov{{l|q}} %gs:
+; LINUX-KERNEL-X64: callq __stack_chk_fail
+
+; DARWIN-X64: test18d:
+; DARWIN-X64: mov{{l|q}} ___stack_chk_guard
+; DARWIN-X64: callq ___stack_chk_fail
+  %a = alloca i32, align 4
+  %exn.slot = alloca i8*
+  %ehselector.slot = alloca i32
+  store i32 0, i32* %a, align 4
+  invoke void @_Z3exceptPi(i32* %a)
+          to label %invoke.cont unwind label %lpad
+
+invoke.cont:
+  ret i32 0
+
+lpad:
+  %0 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*)
+          catch i8* null
+  ret i32 0
+}
+
+; test19a: Addr-of a struct element passed into an invoke instruction.
+;           (GEP followed by an invoke)
+;          no ssp attribute
+; Requires no protector.
+define i32 @test19a() uwtable {
+entry:
+; LINUX-I386: test19a:
+; LINUX-I386-NOT: calll __stack_chk_fail
+; LINUX-I386: .cfi_endproc
+
+; LINUX-X64: test19a:
+; LINUX-X64-NOT: callq __stack_chk_fail
+; LINUX-X64: .cfi_endproc
+
+; LINUX-KERNEL-X64: test19a:
+; LINUX-KERNEL-X64-NOT: callq __stack_chk_fail
+; LINUX-KERNEL-X64: .cfi_endproc
+
+; DARWIN-X64: test19a:
+; DARWIN-X64-NOT: callq ___stack_chk_fail
+; DARWIN-X64: .cfi_endproc
+  %c = alloca %struct.pair, align 4
+  %exn.slot = alloca i8*
+  %ehselector.slot = alloca i32
+  %a = getelementptr inbounds %struct.pair* %c, i32 0, i32 0
+  store i32 0, i32* %a, align 4
+  %a1 = getelementptr inbounds %struct.pair* %c, i32 0, i32 0
+  invoke void @_Z3exceptPi(i32* %a1)
+          to label %invoke.cont unwind label %lpad
+
+invoke.cont:
+  ret i32 0
+
+lpad:
+  %0 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*)
+          catch i8* null
+  ret i32 0
+}
+
+; test19b: Addr-of a struct element passed into an invoke instruction.
+;           (GEP followed by an invoke)
+;          ssp attribute
+; Requires no protector.
+define i32 @test19b() uwtable ssp {
+entry:
+; LINUX-I386: test19b:
+; LINUX-I386-NOT: calll __stack_chk_fail
+; LINUX-I386: .cfi_endproc
+
+; LINUX-X64: test19b:
+; LINUX-X64-NOT: callq __stack_chk_fail
+; LINUX-X64: .cfi_endproc
+
+; LINUX-KERNEL-X64: test19b:
+; LINUX-KERNEL-X64-NOT: callq __stack_chk_fail
+; LINUX-KERNEL-X64: .cfi_endproc
+
+; DARWIN-X64: test19b:
+; DARWIN-X64-NOT: callq ___stack_chk_fail
+; DARWIN-X64: .cfi_endproc
+  %c = alloca %struct.pair, align 4
+  %exn.slot = alloca i8*
+  %ehselector.slot = alloca i32
+  %a = getelementptr inbounds %struct.pair* %c, i32 0, i32 0
+  store i32 0, i32* %a, align 4
+  %a1 = getelementptr inbounds %struct.pair* %c, i32 0, i32 0
+  invoke void @_Z3exceptPi(i32* %a1)
+          to label %invoke.cont unwind label %lpad
+
+invoke.cont:
+  ret i32 0
+
+lpad:
+  %0 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*)
+          catch i8* null
+  ret i32 0
+}
+
+; test19c: Addr-of a struct element passed into an invoke instruction.
+;           (GEP followed by an invoke)
+;          sspstrong attribute
+; Requires protector.
+define i32 @test19c() uwtable sspstrong {
+entry:
+; LINUX-I386: test19c:
+; LINUX-I386: mov{{l|q}} %gs:
+; LINUX-I386: calll __stack_chk_fail
+
+; LINUX-X64: test19c:
+; LINUX-X64: mov{{l|q}} %fs:
+; LINUX-X64: callq __stack_chk_fail
+
+; LINUX-KERNEL-X64: test19c:
+; LINUX-KERNEL-X64: mov{{l|q}} %gs:
+; LINUX-KERNEL-X64: callq __stack_chk_fail
+
+; DARWIN-X64: test19c:
+; DARWIN-X64: mov{{l|q}} ___stack_chk_guard
+; DARWIN-X64: callq ___stack_chk_fail
+  %c = alloca %struct.pair, align 4
+  %exn.slot = alloca i8*
+  %ehselector.slot = alloca i32
+  %a = getelementptr inbounds %struct.pair* %c, i32 0, i32 0
+  store i32 0, i32* %a, align 4
+  %a1 = getelementptr inbounds %struct.pair* %c, i32 0, i32 0
+  invoke void @_Z3exceptPi(i32* %a1)
+          to label %invoke.cont unwind label %lpad
+
+invoke.cont:
+  ret i32 0
+
+lpad:
+  %0 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*)
+          catch i8* null
+  ret i32 0
+}
+
+; test19d: Addr-of a struct element passed into an invoke instruction.
+;           (GEP followed by an invoke)
+;          sspreq attribute
+; Requires protector.
+define i32 @test19d() uwtable sspreq {
+entry:
+; LINUX-I386: test19d:
+; LINUX-I386: mov{{l|q}} %gs:
+; LINUX-I386: calll __stack_chk_fail
+
+; LINUX-X64: test19d:
+; LINUX-X64: mov{{l|q}} %fs:
+; LINUX-X64: callq __stack_chk_fail
+
+; LINUX-KERNEL-X64: test19d:
+; LINUX-KERNEL-X64: mov{{l|q}} %gs:
+; LINUX-KERNEL-X64: callq __stack_chk_fail
+
+; DARWIN-X64: test19d:
+; DARWIN-X64: mov{{l|q}} ___stack_chk_guard
+; DARWIN-X64: callq ___stack_chk_fail
+  %c = alloca %struct.pair, align 4
+  %exn.slot = alloca i8*
+  %ehselector.slot = alloca i32
+  %a = getelementptr inbounds %struct.pair* %c, i32 0, i32 0
+  store i32 0, i32* %a, align 4
+  %a1 = getelementptr inbounds %struct.pair* %c, i32 0, i32 0
+  invoke void @_Z3exceptPi(i32* %a1)
+          to label %invoke.cont unwind label %lpad
+
+invoke.cont:
+  ret i32 0
+
+lpad:
+  %0 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*)
+          catch i8* null
+  ret i32 0
+}
+
+; test20a: Addr-of a pointer
+;          no ssp attribute
+; Requires no protector.
+define void @test20a() nounwind uwtable {
+entry:
+; LINUX-I386: test20a:
+; LINUX-I386-NOT: calll __stack_chk_fail
+; LINUX-I386: .cfi_endproc
+
+; LINUX-X64: test20a:
+; LINUX-X64-NOT: callq __stack_chk_fail
+; LINUX-X64: .cfi_endproc
+
+; LINUX-KERNEL-X64: test20a:
+; LINUX-KERNEL-X64-NOT: callq __stack_chk_fail
+; LINUX-KERNEL-X64: .cfi_endproc
+
+; DARWIN-X64: test20a:
+; DARWIN-X64-NOT: callq ___stack_chk_fail
+; DARWIN-X64: .cfi_endproc
+  %a = alloca i32*, align 8
+  %b = alloca i32**, align 8
+  %call = call i32* @getp()
+  store i32* %call, i32** %a, align 8
+  store i32** %a, i32*** %b, align 8
+  %0 = load i32*** %b, align 8
+  call void @funcall2(i32** %0)
+  ret void
+}
+
+; test20b: Addr-of a pointer
+;          ssp attribute
+; Requires no protector.
+define void @test20b() nounwind uwtable ssp {
+entry:
+; LINUX-I386: test20b:
+; LINUX-I386-NOT: calll __stack_chk_fail
+; LINUX-I386: .cfi_endproc
+
+; LINUX-X64: test20b:
+; LINUX-X64-NOT: callq __stack_chk_fail
+; LINUX-X64: .cfi_endproc
+
+; LINUX-KERNEL-X64: test20b:
+; LINUX-KERNEL-X64-NOT: callq __stack_chk_fail
+; LINUX-KERNEL-X64: .cfi_endproc
+
+; DARWIN-X64: test20b:
+; DARWIN-X64-NOT: callq ___stack_chk_fail
+; DARWIN-X64: .cfi_endproc
+  %a = alloca i32*, align 8
+  %b = alloca i32**, align 8
+  %call = call i32* @getp()
+  store i32* %call, i32** %a, align 8
+  store i32** %a, i32*** %b, align 8
+  %0 = load i32*** %b, align 8
+  call void @funcall2(i32** %0)
+  ret void
+}
+
+; test20c: Addr-of a pointer
+;          sspstrong attribute
+; Requires protector.
+define void @test20c() nounwind uwtable sspstrong {
+entry:
+; LINUX-I386: test20c:
+; LINUX-I386: mov{{l|q}} %gs:
+; LINUX-I386: calll __stack_chk_fail
+
+; LINUX-X64: test20c:
+; LINUX-X64: mov{{l|q}} %fs:
+; LINUX-X64: callq __stack_chk_fail
+
+; LINUX-KERNEL-X64: test20c:
+; LINUX-KERNEL-X64: mov{{l|q}} %gs:
+; LINUX-KERNEL-X64: callq __stack_chk_fail
+
+; DARWIN-X64: test20c:
+; DARWIN-X64: mov{{l|q}} ___stack_chk_guard
+; DARWIN-X64: callq ___stack_chk_fail
+  %a = alloca i32*, align 8
+  %b = alloca i32**, align 8
+  %call = call i32* @getp()
+  store i32* %call, i32** %a, align 8
+  store i32** %a, i32*** %b, align 8
+  %0 = load i32*** %b, align 8
+  call void @funcall2(i32** %0)
+  ret void
+}
+
+; test20d: Addr-of a pointer
+;          sspreq attribute
+; Requires protector.
+define void @test20d() nounwind uwtable sspreq {
+entry:
+; LINUX-I386: test20d:
+; LINUX-I386: mov{{l|q}} %gs:
+; LINUX-I386: calll __stack_chk_fail
+
+; LINUX-X64: test20d:
+; LINUX-X64: mov{{l|q}} %fs:
+; LINUX-X64: callq __stack_chk_fail
+
+; LINUX-KERNEL-X64: test20d:
+; LINUX-KERNEL-X64: mov{{l|q}} %gs:
+; LINUX-KERNEL-X64: callq __stack_chk_fail
+
+; DARWIN-X64: test20d:
+; DARWIN-X64: mov{{l|q}} ___stack_chk_guard
+; DARWIN-X64: callq ___stack_chk_fail
+  %a = alloca i32*, align 8
+  %b = alloca i32**, align 8
+  %call = call i32* @getp()
+  store i32* %call, i32** %a, align 8
+  store i32** %a, i32*** %b, align 8
+  %0 = load i32*** %b, align 8
+  call void @funcall2(i32** %0)
+  ret void
+}
+
+; test21a: Addr-of a casted pointer
+;          no ssp attribute
+; Requires no protector.
+define void @test21a() nounwind uwtable {
+entry:
+; LINUX-I386: test21a:
+; LINUX-I386-NOT: calll __stack_chk_fail
+; LINUX-I386: .cfi_endproc
+
+; LINUX-X64: test21a:
+; LINUX-X64-NOT: callq __stack_chk_fail
+; LINUX-X64: .cfi_endproc
+
+; LINUX-KERNEL-X64: test21a:
+; LINUX-KERNEL-X64-NOT: callq __stack_chk_fail
+; LINUX-KERNEL-X64: .cfi_endproc
+
+; DARWIN-X64: test21a:
+; DARWIN-X64-NOT: callq ___stack_chk_fail
+; DARWIN-X64: .cfi_endproc
+  %a = alloca i32*, align 8
+  %b = alloca float**, align 8
+  %call = call i32* @getp()
+  store i32* %call, i32** %a, align 8
+  %0 = bitcast i32** %a to float**
+  store float** %0, float*** %b, align 8
+  %1 = load float*** %b, align 8
+  call void @funfloat2(float** %1)
+  ret void
+}
+
+; test21b: Addr-of a casted pointer
+;          ssp attribute
+; Requires no protector.
+define void @test21b() nounwind uwtable ssp {
+entry:
+; LINUX-I386: test21b:
+; LINUX-I386-NOT: calll __stack_chk_fail
+; LINUX-I386: .cfi_endproc
+
+; LINUX-X64: test21b:
+; LINUX-X64-NOT: callq __stack_chk_fail
+; LINUX-X64: .cfi_endproc
+
+; LINUX-KERNEL-X64: test21b:
+; LINUX-KERNEL-X64-NOT: callq __stack_chk_fail
+; LINUX-KERNEL-X64: .cfi_endproc
+
+; DARWIN-X64: test21b:
+; DARWIN-X64-NOT: callq ___stack_chk_fail
+; DARWIN-X64: .cfi_endproc
+  %a = alloca i32*, align 8
+  %b = alloca float**, align 8
+  %call = call i32* @getp()
+  store i32* %call, i32** %a, align 8
+  %0 = bitcast i32** %a to float**
+  store float** %0, float*** %b, align 8
+  %1 = load float*** %b, align 8
+  call void @funfloat2(float** %1)
+  ret void
+}
+
+; test21c: Addr-of a casted pointer
+;          sspstrong attribute
+; Requires protector.
+define void @test21c() nounwind uwtable sspstrong {
+entry:
+; LINUX-I386: test21c:
+; LINUX-I386: mov{{l|q}} %gs:
+; LINUX-I386: calll __stack_chk_fail
+
+; LINUX-X64: test21c:
+; LINUX-X64: mov{{l|q}} %fs:
+; LINUX-X64: callq __stack_chk_fail
+
+; LINUX-KERNEL-X64: test21c:
+; LINUX-KERNEL-X64: mov{{l|q}} %gs:
+; LINUX-KERNEL-X64: callq __stack_chk_fail
+
+; DARWIN-X64: test21c:
+; DARWIN-X64: mov{{l|q}} ___stack_chk_guard
+; DARWIN-X64: callq ___stack_chk_fail
+  %a = alloca i32*, align 8
+  %b = alloca float**, align 8
+  %call = call i32* @getp()
+  store i32* %call, i32** %a, align 8
+  %0 = bitcast i32** %a to float**
+  store float** %0, float*** %b, align 8
+  %1 = load float*** %b, align 8
+  call void @funfloat2(float** %1)
+  ret void
+}
+
+; test21d: Addr-of a casted pointer
+;          sspreq attribute
+; Requires protector.
+define void @test21d() nounwind uwtable sspreq {
+entry:
+; LINUX-I386: test21d:
+; LINUX-I386: mov{{l|q}} %gs:
+; LINUX-I386: calll __stack_chk_fail
+
+; LINUX-X64: test21d:
+; LINUX-X64: mov{{l|q}} %fs:
+; LINUX-X64: callq __stack_chk_fail
+
+; LINUX-KERNEL-X64: test21d:
+; LINUX-KERNEL-X64: mov{{l|q}} %gs:
+; LINUX-KERNEL-X64: callq __stack_chk_fail
+
+; DARWIN-X64: test21d:
+; DARWIN-X64: mov{{l|q}} ___stack_chk_guard
+; DARWIN-X64: callq ___stack_chk_fail
+  %a = alloca i32*, align 8
+  %b = alloca float**, align 8
+  %call = call i32* @getp()
+  store i32* %call, i32** %a, align 8
+  %0 = bitcast i32** %a to float**
+  store float** %0, float*** %b, align 8
+  %1 = load float*** %b, align 8
+  call void @funfloat2(float** %1)
+  ret void
+}
+
+; test22a: [2 x i8] in a class
+;          no ssp attribute
+; Requires no protector.
+define signext i8 @test22a() nounwind uwtable {
+entry:
+; LINUX-I386: test22a:
+; LINUX-I386-NOT: calll __stack_chk_fail
+; LINUX-I386: .cfi_endproc
+
+; LINUX-X64: test22a:
+; LINUX-X64-NOT: callq __stack_chk_fail
+; LINUX-X64: .cfi_endproc
+
+; LINUX-KERNEL-X64: test22a:
+; LINUX-KERNEL-X64-NOT: callq __stack_chk_fail
+; LINUX-KERNEL-X64: .cfi_endproc
+
+; DARWIN-X64: test22a:
+; DARWIN-X64-NOT: callq ___stack_chk_fail
+; DARWIN-X64: .cfi_endproc
+  %a = alloca %class.A, align 1
+  %array = getelementptr inbounds %class.A* %a, i32 0, i32 0
+  %arrayidx = getelementptr inbounds [2 x i8]* %array, i32 0, i64 0
+  %0 = load i8* %arrayidx, align 1
+  ret i8 %0
+}
+
+; test22b: [2 x i8] in a class
+;          ssp attribute
+; Requires no protector.
+define signext i8 @test22b() nounwind uwtable ssp {
+entry:
+; LINUX-I386: test22b:
+; LINUX-I386-NOT: calll __stack_chk_fail
+; LINUX-I386: .cfi_endproc
+
+; LINUX-X64: test22b:
+; LINUX-X64-NOT: callq __stack_chk_fail
+; LINUX-X64: .cfi_endproc
+
+; LINUX-KERNEL-X64: test22b:
+; LINUX-KERNEL-X64-NOT: callq __stack_chk_fail
+; LINUX-KERNEL-X64: .cfi_endproc
+
+; DARWIN-X64: test22b:
+; DARWIN-X64-NOT: callq ___stack_chk_fail
+; DARWIN-X64: .cfi_endproc
+  %a = alloca %class.A, align 1
+  %array = getelementptr inbounds %class.A* %a, i32 0, i32 0
+  %arrayidx = getelementptr inbounds [2 x i8]* %array, i32 0, i64 0
+  %0 = load i8* %arrayidx, align 1
+  ret i8 %0
+}
+
+; test22c: [2 x i8] in a class
+;          sspstrong attribute
+; Requires protector.
+define signext i8 @test22c() nounwind uwtable sspstrong {
+entry:
+; LINUX-I386: test22c:
+; LINUX-I386: mov{{l|q}} %gs:
+; LINUX-I386: calll __stack_chk_fail
+
+; LINUX-X64: test22c:
+; LINUX-X64: mov{{l|q}} %fs:
+; LINUX-X64: callq __stack_chk_fail
+
+; LINUX-KERNEL-X64: test22c:
+; LINUX-KERNEL-X64: mov{{l|q}} %gs:
+; LINUX-KERNEL-X64: callq __stack_chk_fail
+
+; DARWIN-X64: test22c:
+; DARWIN-X64: mov{{l|q}} ___stack_chk_guard
+; DARWIN-X64: callq ___stack_chk_fail
+  %a = alloca %class.A, align 1
+  %array = getelementptr inbounds %class.A* %a, i32 0, i32 0
+  %arrayidx = getelementptr inbounds [2 x i8]* %array, i32 0, i64 0
+  %0 = load i8* %arrayidx, align 1
+  ret i8 %0
+}
+
+; test22d: [2 x i8] in a class
+;          sspreq attribute
+; Requires protector.
+define signext i8 @test22d() nounwind uwtable sspreq {
+entry:
+; LINUX-I386: test22d:
+; LINUX-I386: mov{{l|q}} %gs:
+; LINUX-I386: calll __stack_chk_fail
+
+; LINUX-X64: test22d:
+; LINUX-X64: mov{{l|q}} %fs:
+; LINUX-X64: callq __stack_chk_fail
+
+; LINUX-KERNEL-X64: test22d:
+; LINUX-KERNEL-X64: mov{{l|q}} %gs:
+; LINUX-KERNEL-X64: callq __stack_chk_fail
+
+; DARWIN-X64: test22d:
+; DARWIN-X64: mov{{l|q}} ___stack_chk_guard
+; DARWIN-X64: callq ___stack_chk_fail
+  %a = alloca %class.A, align 1
+  %array = getelementptr inbounds %class.A* %a, i32 0, i32 0
+  %arrayidx = getelementptr inbounds [2 x i8]* %array, i32 0, i64 0
+  %0 = load i8* %arrayidx, align 1
+  ret i8 %0
+}
+
+; test23a: [2 x i8] nested in several layers of structs and unions
+;          no ssp attribute
+; Requires no protector.
+define signext i8 @test23a() nounwind uwtable {
+entry:
+; LINUX-I386: test23a:
+; LINUX-I386-NOT: calll __stack_chk_fail
+; LINUX-I386: .cfi_endproc
+
+; LINUX-X64: test23a:
+; LINUX-X64-NOT: callq __stack_chk_fail
+; LINUX-X64: .cfi_endproc
+
+; LINUX-KERNEL-X64: test23a:
+; LINUX-KERNEL-X64-NOT: callq __stack_chk_fail
+; LINUX-KERNEL-X64: .cfi_endproc
+
+; DARWIN-X64: test23a:
+; DARWIN-X64-NOT: callq ___stack_chk_fail
+; DARWIN-X64: .cfi_endproc
+  %x = alloca %struct.deep, align 1
+  %b = getelementptr inbounds %struct.deep* %x, i32 0, i32 0
+  %c = bitcast %union.anon* %b to %struct.anon*
+  %d = getelementptr inbounds %struct.anon* %c, i32 0, i32 0
+  %e = getelementptr inbounds %struct.anon.0* %d, i32 0, i32 0
+  %array = bitcast %union.anon.1* %e to [2 x i8]*
+  %arrayidx = getelementptr inbounds [2 x i8]* %array, i32 0, i64 0
+  %0 = load i8* %arrayidx, align 1
+  ret i8 %0
+}
+
+; test23b: [2 x i8] nested in several layers of structs and unions
+;          ssp attribute
+; Requires no protector.
+define signext i8 @test23b() nounwind uwtable ssp {
+entry:
+; LINUX-I386: test23b:
+; LINUX-I386-NOT: calll __stack_chk_fail
+; LINUX-I386: .cfi_endproc
+
+; LINUX-X64: test23b:
+; LINUX-X64-NOT: callq __stack_chk_fail
+; LINUX-X64: .cfi_endproc
+
+; LINUX-KERNEL-X64: test23b:
+; LINUX-KERNEL-X64-NOT: callq __stack_chk_fail
+; LINUX-KERNEL-X64: .cfi_endproc
+
+; DARWIN-X64: test23b:
+; DARWIN-X64-NOT: callq ___stack_chk_fail
+; DARWIN-X64: .cfi_endproc
+  %x = alloca %struct.deep, align 1
+  %b = getelementptr inbounds %struct.deep* %x, i32 0, i32 0
+  %c = bitcast %union.anon* %b to %struct.anon*
+  %d = getelementptr inbounds %struct.anon* %c, i32 0, i32 0
+  %e = getelementptr inbounds %struct.anon.0* %d, i32 0, i32 0
+  %array = bitcast %union.anon.1* %e to [2 x i8]*
+  %arrayidx = getelementptr inbounds [2 x i8]* %array, i32 0, i64 0
+  %0 = load i8* %arrayidx, align 1
+  ret i8 %0
+}
+
+; test23c: [2 x i8] nested in several layers of structs and unions
+;          sspstrong attribute
+; Requires protector.
+define signext i8 @test23c() nounwind uwtable sspstrong {
+entry:
+; LINUX-I386: test23c:
+; LINUX-I386: mov{{l|q}} %gs:
+; LINUX-I386: calll __stack_chk_fail
+
+; LINUX-X64: test23c:
+; LINUX-X64: mov{{l|q}} %fs:
+; LINUX-X64: callq __stack_chk_fail
+
+; LINUX-KERNEL-X64: test23c:
+; LINUX-KERNEL-X64: mov{{l|q}} %gs:
+; LINUX-KERNEL-X64: callq __stack_chk_fail
+
+; DARWIN-X64: test23c:
+; DARWIN-X64: mov{{l|q}} ___stack_chk_guard
+; DARWIN-X64: callq ___stack_chk_fail
+  %x = alloca %struct.deep, align 1
+  %b = getelementptr inbounds %struct.deep* %x, i32 0, i32 0
+  %c = bitcast %union.anon* %b to %struct.anon*
+  %d = getelementptr inbounds %struct.anon* %c, i32 0, i32 0
+  %e = getelementptr inbounds %struct.anon.0* %d, i32 0, i32 0
+  %array = bitcast %union.anon.1* %e to [2 x i8]*
+  %arrayidx = getelementptr inbounds [2 x i8]* %array, i32 0, i64 0
+  %0 = load i8* %arrayidx, align 1
+  ret i8 %0
+}
+
+; test23d: [2 x i8] nested in several layers of structs and unions
+;          sspreq attribute
+; Requires protector.
+define signext i8 @test23d() nounwind uwtable sspreq {
+entry:
+; LINUX-I386: test23d:
+; LINUX-I386: mov{{l|q}} %gs:
+; LINUX-I386: calll __stack_chk_fail
+
+; LINUX-X64: test23d:
+; LINUX-X64: mov{{l|q}} %fs:
+; LINUX-X64: callq __stack_chk_fail
+
+; LINUX-KERNEL-X64: test23d:
+; LINUX-KERNEL-X64: mov{{l|q}} %gs:
+; LINUX-KERNEL-X64: callq __stack_chk_fail
+
+; DARWIN-X64: test23d:
+; DARWIN-X64: mov{{l|q}} ___stack_chk_guard
+; DARWIN-X64: callq ___stack_chk_fail
+  %x = alloca %struct.deep, align 1
+  %b = getelementptr inbounds %struct.deep* %x, i32 0, i32 0
+  %c = bitcast %union.anon* %b to %struct.anon*
+  %d = getelementptr inbounds %struct.anon* %c, i32 0, i32 0
+  %e = getelementptr inbounds %struct.anon.0* %d, i32 0, i32 0
+  %array = bitcast %union.anon.1* %e to [2 x i8]*
+  %arrayidx = getelementptr inbounds [2 x i8]* %array, i32 0, i64 0
+  %0 = load i8* %arrayidx, align 1
+  ret i8 %0
+}
+
+; test24a: Variable sized alloca
+;          no ssp attribute
+; Requires no protector.
+define void @test24a(i32 %n) nounwind uwtable {
+entry:
+; LINUX-I386: test24a:
+; LINUX-I386-NOT: calll __stack_chk_fail
+; LINUX-I386: .cfi_endproc
+
+; LINUX-X64: test24a:
+; LINUX-X64-NOT: callq __stack_chk_fail
+; LINUX-X64: .cfi_endproc
+
+; LINUX-KERNEL-X64: test24a:
+; LINUX-KERNEL-X64-NOT: callq __stack_chk_fail
+; LINUX-KERNEL-X64: .cfi_endproc
+
+; DARWIN-X64: test24a:
+; DARWIN-X64-NOT: callq ___stack_chk_fail
+; DARWIN-X64: .cfi_endproc
+  %n.addr = alloca i32, align 4
+  %a = alloca i32*, align 8
+  store i32 %n, i32* %n.addr, align 4
+  %0 = load i32* %n.addr, align 4
+  %conv = sext i32 %0 to i64
+  %1 = alloca i8, i64 %conv
+  %2 = bitcast i8* %1 to i32*
+  store i32* %2, i32** %a, align 8
+  ret void
+}
+
+; test24b: Variable sized alloca
+;          ssp attribute
+; Requires protector.
+define void @test24b(i32 %n) nounwind uwtable ssp {
+entry:
+; LINUX-I386: test24b:
+; LINUX-I386: mov{{l|q}} %gs:
+; LINUX-I386: calll __stack_chk_fail
+
+; LINUX-X64: test24b:
+; LINUX-X64: mov{{l|q}} %fs:
+; LINUX-X64: callq __stack_chk_fail
+
+; LINUX-KERNEL-X64: test24b:
+; LINUX-KERNEL-X64: mov{{l|q}} %gs:
+; LINUX-KERNEL-X64: callq __stack_chk_fail
+
+; DARWIN-X64: test24b:
+; DARWIN-X64: mov{{l|q}} ___stack_chk_guard
+; DARWIN-X64: callq ___stack_chk_fail
+  %n.addr = alloca i32, align 4
+  %a = alloca i32*, align 8
+  store i32 %n, i32* %n.addr, align 4
+  %0 = load i32* %n.addr, align 4
+  %conv = sext i32 %0 to i64
+  %1 = alloca i8, i64 %conv
+  %2 = bitcast i8* %1 to i32*
+  store i32* %2, i32** %a, align 8
+  ret void
+}
+
+; test24c: Variable sized alloca
+;          sspstrong attribute
+; Requires protector.
+define void @test24c(i32 %n) nounwind uwtable sspstrong {
+entry:
+; LINUX-I386: test24c:
+; LINUX-I386: mov{{l|q}} %gs:
+; LINUX-I386: calll __stack_chk_fail
+
+; LINUX-X64: test24c:
+; LINUX-X64: mov{{l|q}} %fs:
+; LINUX-X64: callq __stack_chk_fail
+
+; LINUX-KERNEL-X64: test24c:
+; LINUX-KERNEL-X64: mov{{l|q}} %gs:
+; LINUX-KERNEL-X64: callq __stack_chk_fail
+
+; DARWIN-X64: test24c:
+; DARWIN-X64: mov{{l|q}} ___stack_chk_guard
+; DARWIN-X64: callq ___stack_chk_fail
+  %n.addr = alloca i32, align 4
+  %a = alloca i32*, align 8
+  store i32 %n, i32* %n.addr, align 4
+  %0 = load i32* %n.addr, align 4
+  %conv = sext i32 %0 to i64
+  %1 = alloca i8, i64 %conv
+  %2 = bitcast i8* %1 to i32*
+  store i32* %2, i32** %a, align 8
+  ret void
+}
+
+; test24d: Variable sized alloca
+;          sspreq attribute
+; Requires protector.
+define void @test24d(i32 %n) nounwind uwtable sspreq  {
+entry:
+; LINUX-I386: test24d:
+; LINUX-I386: mov{{l|q}} %gs:
+; LINUX-I386: calll __stack_chk_fail
+
+; LINUX-X64: test24d:
+; LINUX-X64: mov{{l|q}} %fs:
+; LINUX-X64: callq __stack_chk_fail
+
+; LINUX-KERNEL-X64: test24d:
+; LINUX-KERNEL-X64: mov{{l|q}} %gs:
+; LINUX-KERNEL-X64: callq __stack_chk_fail
+
+; DARWIN-X64: test24d:
+; DARWIN-X64: mov{{l|q}} ___stack_chk_guard
+; DARWIN-X64: callq ___stack_chk_fail
+  %n.addr = alloca i32, align 4
+  %a = alloca i32*, align 8
+  store i32 %n, i32* %n.addr, align 4
+  %0 = load i32* %n.addr, align 4
+  %conv = sext i32 %0 to i64
+  %1 = alloca i8, i64 %conv
+  %2 = bitcast i8* %1 to i32*
+  store i32* %2, i32** %a, align 8
+  ret void
+}
+
+; test25a: array of [4 x i32]
+;          no ssp attribute
+; Requires no protector.
+define i32 @test25a() nounwind uwtable {
+entry:
+; LINUX-I386: test25a:
+; LINUX-I386-NOT: calll __stack_chk_fail
+; LINUX-I386: .cfi_endproc
+
+; LINUX-X64: test25a:
+; LINUX-X64-NOT: callq __stack_chk_fail
+; LINUX-X64: .cfi_endproc
+
+; LINUX-KERNEL-X64: test25a:
+; LINUX-KERNEL-X64-NOT: callq __stack_chk_fail
+; LINUX-KERNEL-X64: .cfi_endproc
+
+; DARWIN-X64: test25a:
+; DARWIN-X64-NOT: callq ___stack_chk_fail
+; DARWIN-X64: .cfi_endproc
+  %a = alloca [4 x i32], align 16
+  %arrayidx = getelementptr inbounds [4 x i32]* %a, i32 0, i64 0
+  %0 = load i32* %arrayidx, align 4
+  ret i32 %0
+}
+
+; test25b: array of [4 x i32]
+;          ssp attribute
+; Requires no protector, except for Darwin which _does_ require a protector.
+define i32 @test25b() nounwind uwtable ssp {
+entry:
+; LINUX-I386: test25b:
+; LINUX-I386-NOT: calll __stack_chk_fail
+; LINUX-I386: .cfi_endproc
+
+; LINUX-X64: test25b:
+; LINUX-X64-NOT: callq __stack_chk_fail
+; LINUX-X64: .cfi_endproc
+
+; LINUX-KERNEL-X64: test25b:
+; LINUX-KERNEL-X64-NOT: callq __stack_chk_fail
+; LINUX-KERNEL-X64: .cfi_endproc
+
+; DARWIN-X64: test25b:
+; DARWIN-X64: mov{{l|q}} ___stack_chk_guard
+; DARWIN-X64: callq ___stack_chk_fail
+  %a = alloca [4 x i32], align 16
+  %arrayidx = getelementptr inbounds [4 x i32]* %a, i32 0, i64 0
+  %0 = load i32* %arrayidx, align 4
+  ret i32 %0
+}
+
+; test25c: array of [4 x i32]
+;          sspstrong attribute
+; Requires protector.
+define i32 @test25c() nounwind uwtable sspstrong {
+entry:
+; LINUX-I386: test25c:
+; LINUX-I386: mov{{l|q}} %gs:
+; LINUX-I386: calll __stack_chk_fail
+
+; LINUX-X64: test25c:
+; LINUX-X64: mov{{l|q}} %fs:
+; LINUX-X64: callq __stack_chk_fail
+
+; LINUX-KERNEL-X64: test25c:
+; LINUX-KERNEL-X64: mov{{l|q}} %gs:
+; LINUX-KERNEL-X64: callq __stack_chk_fail
+
+; DARWIN-X64: test25c:
+; DARWIN-X64: mov{{l|q}} ___stack_chk_guard
+; DARWIN-X64: callq ___stack_chk_fail
+  %a = alloca [4 x i32], align 16
+  %arrayidx = getelementptr inbounds [4 x i32]* %a, i32 0, i64 0
+  %0 = load i32* %arrayidx, align 4
+  ret i32 %0
+}
+
+; test25d: array of [4 x i32]
+;          sspreq attribute
+; Requires protector.
+define i32 @test25d() nounwind uwtable sspreq {
+entry:
+; LINUX-I386: test25d:
+; LINUX-I386: mov{{l|q}} %gs:
+; LINUX-I386: calll __stack_chk_fail
+
+; LINUX-X64: test25d:
+; LINUX-X64: mov{{l|q}} %fs:
+; LINUX-X64: callq __stack_chk_fail
+
+; LINUX-KERNEL-X64: test25d:
+; LINUX-KERNEL-X64: mov{{l|q}} %gs:
+; LINUX-KERNEL-X64: callq __stack_chk_fail
+
+; DARWIN-X64: test25d:
+; DARWIN-X64: mov{{l|q}} ___stack_chk_guard
+; DARWIN-X64: callq ___stack_chk_fail
+  %a = alloca [4 x i32], align 16
+  %arrayidx = getelementptr inbounds [4 x i32]* %a, i32 0, i64 0
+  %0 = load i32* %arrayidx, align 4
+  ret i32 %0
+}
+
+; test26: Nested structure, no arrays, no address-of expressions.
+;         Verify that the resulting gep-of-gep does not incorrectly trigger
+;         a stack protector.
+;         ssptrong attribute
+; Requires no protector.
+define void @test26() nounwind uwtable sspstrong {
+entry:
+; LINUX-I386: test26:
+; LINUX-I386-NOT: calll __stack_chk_fail
+; LINUX-I386: .cfi_endproc
+
+; LINUX-X64: test26:
+; LINUX-X64-NOT: callq __stack_chk_fail
+; LINUX-X64: .cfi_endproc
+
+; LINUX-KERNEL-X64: test26:
+; LINUX-KERNEL-X64-NOT: callq __stack_chk_fail
+; LINUX-KERNEL-X64: .cfi_endproc
+
+; DARWIN-X64: test26:
+; DARWIN-X64-NOT: callq ___stack_chk_fail
+; DARWIN-X64: .cfi_endproc
+  %c = alloca %struct.nest, align 4
+  %b = getelementptr inbounds %struct.nest* %c, i32 0, i32 1
+  %_a = getelementptr inbounds %struct.pair* %b, i32 0, i32 0
+  %0 = load i32* %_a, align 4
+  %call = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([4 x i8]* @.str, i32 0, i32 0), i32 %0)
+  ret void
+}
+
+; test27: Address-of a structure taken in a function with a loop where
+;         the alloca is an incoming value to a PHI node and a use of that PHI 
+;         node is also an incoming value.
+;         Verify that the address-of analysis does not get stuck in infinite
+;         recursion when chasing the alloca through the PHI nodes.
+; Requires protector.
+define i32 @test27(i32 %arg) nounwind uwtable sspstrong {
+bb:
+; LINUX-I386: test27:
+; LINUX-I386: mov{{l|q}} %gs:
+; LINUX-I386: calll __stack_chk_fail
+
+; LINUX-X64: test27:
+; LINUX-X64: mov{{l|q}} %fs:
+; LINUX-X64: callq __stack_chk_fail
+
+; LINUX-KERNEL-X64: test27:
+; LINUX-KERNEL-X64: mov{{l|q}} %gs:
+; LINUX-KERNEL-X64: callq __stack_chk_fail
+
+; DARWIN-X64: test27:
+; DARWIN-X64: mov{{l|q}} ___stack_chk_guard
+; DARWIN-X64: callq ___stack_chk_fail
+  %tmp = alloca %struct.small*, align 8
+  %tmp1 = call i32 (...)* @dummy(%struct.small** %tmp) nounwind
+  %tmp2 = load %struct.small** %tmp, align 8
+  %tmp3 = ptrtoint %struct.small* %tmp2 to i64
+  %tmp4 = trunc i64 %tmp3 to i32
+  %tmp5 = icmp sgt i32 %tmp4, 0
+  br i1 %tmp5, label %bb6, label %bb21
+
+bb6:                                              ; preds = %bb17, %bb
+  %tmp7 = phi %struct.small* [ %tmp19, %bb17 ], [ %tmp2, %bb ]
+  %tmp8 = phi i64 [ %tmp20, %bb17 ], [ 1, %bb ]
+  %tmp9 = phi i32 [ %tmp14, %bb17 ], [ %tmp1, %bb ]
+  %tmp10 = getelementptr inbounds %struct.small* %tmp7, i64 0, i32 0
+  %tmp11 = load i8* %tmp10, align 1
+  %tmp12 = icmp eq i8 %tmp11, 1
+  %tmp13 = add nsw i32 %tmp9, 8
+  %tmp14 = select i1 %tmp12, i32 %tmp13, i32 %tmp9
+  %tmp15 = trunc i64 %tmp8 to i32
+  %tmp16 = icmp eq i32 %tmp15, %tmp4
+  br i1 %tmp16, label %bb21, label %bb17
+
+bb17:                                             ; preds = %bb6
+  %tmp18 = getelementptr inbounds %struct.small** %tmp, i64 %tmp8
+  %tmp19 = load %struct.small** %tmp18, align 8
+  %tmp20 = add i64 %tmp8, 1
+  br label %bb6
+
+bb21:                                             ; preds = %bb6, %bb
+  %tmp22 = phi i32 [ %tmp1, %bb ], [ %tmp14, %bb6 ]
+  %tmp23 = call i32 (...)* @dummy(i32 %tmp22) nounwind
+  ret i32 undef
+}
 
-declare i32 @printf(i8*, ...) nounwind
+declare double @testi_aux()
+declare i8* @strcpy(i8*, i8*)
+declare i32 @printf(i8*, ...)
+declare void @funcall(i32*)
+declare void @funcall2(i32**)
+declare void @funfloat(float*)
+declare void @funfloat2(float**)
+declare void @_Z3exceptPi(i32*)
+declare i32 @__gxx_personality_v0(...)
+declare i32* @getp()
+declare i32 @dummy(...)
diff --git a/test/CodeGen/X86/stack-update-frame-opcode.ll b/test/CodeGen/X86/stack-update-frame-opcode.ll
new file mode 100644
index 000000000000..9a5a2421233d
--- /dev/null
+++ b/test/CodeGen/X86/stack-update-frame-opcode.ll
@@ -0,0 +1,31 @@
+; RUN: llc -mtriple=x86_64-pc-linux -mcpu=corei7 < %s | FileCheck -check-prefix=CORE_LP64 %s
+; RUN: llc -mtriple=x86_64-pc-linux -mcpu=atom < %s | FileCheck -check-prefix=ATOM_LP64 %s
+; RUN: llc -mtriple=x86_64-pc-linux-gnux32 -mcpu=corei7 < %s | FileCheck -check-prefix=CORE_ILP32 %s
+; RUN: llc -mtriple=x86_64-pc-linux-gnux32 -mcpu=atom < %s | FileCheck -check-prefix=ATOM_ILP32 %s
+
+define i32 @bar(i32 %a) nounwind {
+entry:
+  %arr = alloca [400 x i32], align 16
+
+; There is a 2x2 variation matrix here:
+; Atoms use LEA to update the SP. Opcode bitness depends on data model.
+; Cores use sub/add to update the SP. Opcode bitness depends on data model.
+
+; CORE_LP64: subq $1608
+; CORE_ILP32: subl $1608
+; ATOM_LP64: leaq -1608
+; ATOM_ILP32: leal -1608
+
+  %arraydecay = getelementptr inbounds [400 x i32]* %arr, i64 0, i64 0
+  %call = call i32 @foo(i32 %a, i32* %arraydecay) nounwind
+  ret i32 %call
+
+; CORE_LP64: addq $1608
+; CORE_ILP32: addl $1608
+; ATOM_LP64: leaq 1608
+; ATOM_ILP32: leal 1608
+
+}
+
+declare i32 @foo(i32, i32*)
+
diff --git a/test/CodeGen/X86/store_op_load_fold.ll b/test/CodeGen/X86/store_op_load_fold.ll
index 6e47eb397d1d..070cccdb87dd 100644
--- a/test/CodeGen/X86/store_op_load_fold.ll
+++ b/test/CodeGen/X86/store_op_load_fold.ll
@@ -1,13 +1,30 @@
-; RUN: llc < %s -march=x86 | not grep mov
+; RUN: llc < %s -mtriple=i686-darwin | FileCheck %s
 ;
 ; Test the add and load are folded into the store instruction.
 
 @X = internal global i16 0              ; <i16*> [#uses=2]
 
 define void @foo() nounwind {
+; CHECK: foo:
+; CHECK-NOT: mov
+; CHECK: add
+; CHECK-NEXT: ret
         %tmp.0 = load i16* @X           ; <i16> [#uses=1]
         %tmp.3 = add i16 %tmp.0, 329            ; <i16> [#uses=1]
         store i16 %tmp.3, i16* @X
         ret void
 }
 
+; rdar://12838504
+%struct.S2 = type { i64, i16, [2 x i8], i8, [3 x i8], [7 x i8], i8, [8 x i8] }
+@s2 = external global %struct.S2, align 16
+define void @test2() nounwind uwtable ssp {
+; CHECK: test2:
+; CHECK: mov
+; CHECK-NEXT: and
+; CHECK-NEXT: ret
+  %bf.load35 = load i56* bitcast ([7 x i8]* getelementptr inbounds (%struct.S2* @s2, i32 0, i32 5) to i56*), align 16
+  %bf.clear36 = and i56 %bf.load35, -1125895611875329
+  store i56 %bf.clear36, i56* bitcast ([7 x i8]* getelementptr inbounds (%struct.S2* @s2, i32 0, i32 5) to i56*), align 16
+  ret void
+}
diff --git a/test/CodeGen/X86/subtarget-feature-change.ll b/test/CodeGen/X86/subtarget-feature-change.ll
new file mode 100644
index 000000000000..cd677294c669
--- /dev/null
+++ b/test/CodeGen/X86/subtarget-feature-change.ll
@@ -0,0 +1,66 @@
+; RUN: llc < %s -march=x86-64 | FileCheck %s
+
+; This should not generate SSE instructions:
+;
+; CHECK: without.sse:
+; CHECK: flds
+; CHECK: fmuls
+; CHECK: fstps
+define void @without.sse(float* nocapture %a, float* nocapture %b, float* nocapture %c, i32 %n) #0 {
+entry:
+  %cmp9 = icmp sgt i32 %n, 0
+  br i1 %cmp9, label %for.body, label %for.end
+
+for.body:
+  %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
+  %arrayidx = getelementptr inbounds float* %b, i64 %indvars.iv
+  %0 = load float* %arrayidx, align 4, !tbaa !0
+  %arrayidx2 = getelementptr inbounds float* %c, i64 %indvars.iv
+  %1 = load float* %arrayidx2, align 4, !tbaa !0
+  %mul = fmul float %0, %1
+  %arrayidx4 = getelementptr inbounds float* %a, i64 %indvars.iv
+  store float %mul, float* %arrayidx4, align 4, !tbaa !0
+  %indvars.iv.next = add i64 %indvars.iv, 1
+  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+  %exitcond = icmp eq i32 %lftr.wideiv, %n
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:
+  ret void
+}
+
+; This should generate SSE instructions:
+;
+; CHECK: with.sse
+; CHECK: movss
+; CHECK: mulss
+; CHECK: movss
+define void @with.sse(float* nocapture %a, float* nocapture %b, float* nocapture %c, i32 %n) #1 {
+entry:
+  %cmp9 = icmp sgt i32 %n, 0
+  br i1 %cmp9, label %for.body, label %for.end
+
+for.body:
+  %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
+  %arrayidx = getelementptr inbounds float* %b, i64 %indvars.iv
+  %0 = load float* %arrayidx, align 4, !tbaa !0
+  %arrayidx2 = getelementptr inbounds float* %c, i64 %indvars.iv
+  %1 = load float* %arrayidx2, align 4, !tbaa !0
+  %mul = fmul float %0, %1
+  %arrayidx4 = getelementptr inbounds float* %a, i64 %indvars.iv
+  store float %mul, float* %arrayidx4, align 4, !tbaa !0
+  %indvars.iv.next = add i64 %indvars.iv, 1
+  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+  %exitcond = icmp eq i32 %lftr.wideiv, %n
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:
+  ret void
+}
+
+attributes #0 = { nounwind optsize ssp uwtable "target-cpu"="core2" "target-features"="-sse4a,-avx2,-xop,-fma4,-bmi2,-3dnow,-3dnowa,-pclmul,-sse,-avx,-sse41,-ssse3,+mmx,-rtm,-sse42,-lzcnt,-f16c,-popcnt,-bmi,-aes,-fma,-rdrand,-sse2,-sse3" }
+attributes #1 = { nounwind optsize ssp uwtable "target-cpu"="core2" "target-features"="-sse4a,-avx2,-xop,-fma4,-bmi2,-3dnow,-3dnowa,-pclmul,+sse,-avx,-sse41,+ssse3,+mmx,-rtm,-sse42,-lzcnt,-f16c,-popcnt,-bmi,-aes,-fma,-rdrand,+sse2,+sse3" }
+
+!0 = metadata !{metadata !"float", metadata !1}
+!1 = metadata !{metadata !"omnipotent char", metadata !2}
+!2 = metadata !{metadata !"Simple C/C++ TBAA"}
diff --git a/test/CodeGen/X86/tailcall-fastisel.ll b/test/CodeGen/X86/tailcall-fastisel.ll
index 7f92af4dca9f..842ed25439f8 100644
--- a/test/CodeGen/X86/tailcall-fastisel.ll
+++ b/test/CodeGen/X86/tailcall-fastisel.ll
@@ -1,12 +1,11 @@
-; RUN: llc < %s -march=x86-64 -tailcallopt -fast-isel | not grep TAILCALL
-
-; Fast-isel shouldn't attempt to cope with tail calls.
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -tailcallopt -fast-isel -fast-isel-abort | FileCheck %s
 
 %0 = type { i64, i32, i8* }
 
 define fastcc i8* @"visit_array_aux<`Reference>"(%0 %arg, i32 %arg1) nounwind {
 fail:                                             ; preds = %entry
   %tmp20 = tail call fastcc i8* @"visit_array_aux<`Reference>"(%0 %arg, i32 undef) ; <i8*> [#uses=1]
+; CHECK: jmp "_visit_array_aux<`Reference>" ## TAILCALL
   ret i8* %tmp20
 }
 
diff --git a/test/CodeGen/X86/tailcall-structret.ll b/test/CodeGen/X86/tailcall-structret.ll
index d8be4b2e2dfd..dcfefe86704e 100644
--- a/test/CodeGen/X86/tailcall-structret.ll
+++ b/test/CodeGen/X86/tailcall-structret.ll
@@ -1,6 +1,7 @@
-; RUN: llc < %s -march=x86 -tailcallopt | grep TAILCALL
+; RUN: llc < %s -mtriple=i686-unknown-linux -tailcallopt | FileCheck %s
 define fastcc { { i8*, i8* }*, i8*} @init({ { i8*, i8* }*, i8*}, i32) {
 entry:
       %2 = tail call fastcc { { i8*, i8* }*, i8* } @init({ { i8*, i8*}*, i8*} %0, i32 %1)
       ret { { i8*, i8* }*, i8*} %2
+; CHECK: jmp init
 }
diff --git a/test/CodeGen/X86/tailcallbyval.ll b/test/CodeGen/X86/tailcallbyval.ll
index 118eee6ba6cd..9a0b57c138c2 100644
--- a/test/CodeGen/X86/tailcallbyval.ll
+++ b/test/CodeGen/X86/tailcallbyval.ll
@@ -1,5 +1,4 @@
-; RUN: llc < %s -march=x86 -tailcallopt | grep TAILCALL
-; RUN: llc < %s -march=x86 -tailcallopt | grep "movl[[:space:]]*4(%esp), %eax" | count 1
+; RUN: llc < %s -mtriple=i686-unknown-linux -tailcallopt | FileCheck %s
 %struct.s = type {i32, i32, i32, i32, i32, i32, i32, i32,
                   i32, i32, i32, i32, i32, i32, i32, i32,
                   i32, i32, i32, i32, i32, i32, i32, i32 }
@@ -9,10 +8,14 @@ entry:
         %tmp2 = getelementptr %struct.s* %a, i32 0, i32 0
         %tmp3 = load i32* %tmp2
         ret i32 %tmp3
+; CHECK: tailcallee
+; CHECK: movl 4(%esp), %eax
 }
 
 define  fastcc i32 @tailcaller(%struct.s* byval %a) nounwind {
 entry:
         %tmp4 = tail call fastcc i32 @tailcallee(%struct.s* byval %a )
         ret i32 %tmp4
+; CHECK: tailcaller
+; CHECK: jmp tailcallee
 }
diff --git a/test/CodeGen/X86/tailcallfp.ll b/test/CodeGen/X86/tailcallfp.ll
index c0b609ac956e..22a7930ba877 100644
--- a/test/CodeGen/X86/tailcallfp.ll
+++ b/test/CodeGen/X86/tailcallfp.ll
@@ -1,5 +1,6 @@
-; RUN: llc < %s -march=x86 -x86-asm-syntax=intel -tailcallopt | not grep call
+; RUN: llc < %s -march=x86 -tailcallopt | FileCheck %s
 define fastcc i32 @bar(i32 %X, i32(double, i32) *%FP) {
      %Y = tail call fastcc i32 %FP(double 0.0, i32 %X)
      ret i32 %Y
+; CHECK: jmpl
 }
diff --git a/test/CodeGen/X86/tailcallpic1.ll b/test/CodeGen/X86/tailcallpic1.ll
index 60e3be5c50fd..ff590a1fd3e9 100644
--- a/test/CodeGen/X86/tailcallpic1.ll
+++ b/test/CodeGen/X86/tailcallpic1.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s  -tailcallopt -mtriple=i686-pc-linux-gnu -relocation-model=pic | grep TAILCALL
+; RUN: llc < %s  -tailcallopt -mtriple=i686-pc-linux-gnu -relocation-model=pic | FileCheck %s
 
 define protected fastcc i32 @tailcallee(i32 %a1, i32 %a2, i32 %a3, i32 %a4) {
 entry:
@@ -9,4 +9,5 @@ define fastcc i32 @tailcaller(i32 %in1, i32 %in2) {
 entry:
 	%tmp11 = tail call fastcc i32 @tailcallee( i32 %in1, i32 %in2, i32 %in1, i32 %in2 )		; <i32> [#uses=1]
 	ret i32 %tmp11
+; CHECK: jmp tailcallee
 }
diff --git a/test/CodeGen/X86/tailcallpic2.ll b/test/CodeGen/X86/tailcallpic2.ll
index eaa76312396c..1b6bdb769861 100644
--- a/test/CodeGen/X86/tailcallpic2.ll
+++ b/test/CodeGen/X86/tailcallpic2.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s  -tailcallopt -mtriple=i686-pc-linux-gnu -relocation-model=pic | grep -v TAILCALL
+; RUN: llc < %s  -tailcallopt -mtriple=i686-pc-linux-gnu -relocation-model=pic | FileCheck %s
 
 define fastcc i32 @tailcallee(i32 %a1, i32 %a2, i32 %a3, i32 %a4) {
 entry:
@@ -9,4 +9,7 @@ define fastcc i32 @tailcaller(i32 %in1, i32 %in2) {
 entry:
 	%tmp11 = tail call fastcc i32 @tailcallee( i32 %in1, i32 %in2, i32 %in1, i32 %in2 )		; <i32> [#uses=1]
 	ret i32 %tmp11
+; CHECK: movl tailcallee@GOT
+; CHECK: jmpl
 }
+
diff --git a/test/CodeGen/X86/thiscall-struct-return.ll b/test/CodeGen/X86/thiscall-struct-return.ll
deleted file mode 100644
index 0507cb890cd2..000000000000
--- a/test/CodeGen/X86/thiscall-struct-return.ll
+++ /dev/null
@@ -1,47 +0,0 @@
-; RUN: llc < %s -mtriple=i386-PC-Win32 | FileCheck %s
-
-%class.C = type { i8 }
-%struct.S = type { i32 }
-%struct.M = type { i32, i32 }
-
-declare void @_ZN1CC1Ev(%class.C* %this) unnamed_addr nounwind align 2
-declare x86_thiscallcc void @_ZNK1C5SmallEv(%struct.S* noalias sret %agg.result, %class.C* %this) nounwind align 2
-declare x86_thiscallcc void @_ZNK1C6MediumEv(%struct.M* noalias sret %agg.result, %class.C* %this) nounwind align 2
-
-define void @testv() nounwind {
-; CHECK: testv:
-; CHECK: leal 16(%esp), %esi
-; CHECK-NEXT: movl	%esi, (%esp)
-; CHECK-NEXT: calll _ZN1CC1Ev
-; CHECK: leal 8(%esp), %eax
-; CHECK-NEXT: movl %esi, %ecx
-; CHECK-NEXT: calll _ZNK1C5SmallEv
-entry:
-  %c = alloca %class.C, align 1
-  %tmp = alloca %struct.S, align 4
-  call void @_ZN1CC1Ev(%class.C* %c)
-  ; This call should put the return structure as a pointer
-  ; into EAX instead of returning directly in EAX.  The this
-  ; pointer should go into ECX
-  call x86_thiscallcc void @_ZNK1C5SmallEv(%struct.S* sret %tmp, %class.C* %c)
-  ret void
-}
-
-define void @test2v() nounwind {
-; CHECK: test2v:
-; CHECK: leal 16(%esp), %esi
-; CHECK-NEXT: movl	%esi, (%esp)
-; CHECK-NEXT: calll _ZN1CC1Ev
-; CHECK: leal 8(%esp), %eax
-; CHECK-NEXT: movl %esi, %ecx
-; CHECK-NEXT: calll _ZNK1C6MediumEv
-entry:
-  %c = alloca %class.C, align 1
-  %tmp = alloca %struct.M, align 4
-  call void @_ZN1CC1Ev(%class.C* %c)
-  ; This call should put the return structure as a pointer
-  ; into EAX instead of returning directly in EAX/EDX.  The this
-  ; pointer should go into ECX
-  call x86_thiscallcc void @_ZNK1C6MediumEv(%struct.M* sret %tmp, %class.C* %c)
-  ret void
-}
diff --git a/test/CodeGen/X86/tls.ll b/test/CodeGen/X86/tls.ll
index e8a79bfa6ee3..8cdecd81bff5 100644
--- a/test/CodeGen/X86/tls.ll
+++ b/test/CodeGen/X86/tls.ll
@@ -22,13 +22,13 @@ define i32 @f1() {
 ; X32_WIN:      movl __tls_index, %eax
 ; X32_WIN-NEXT: movl %fs:__tls_array, %ecx
 ; X32_WIN-NEXT: movl (%ecx,%eax,4), %eax
-; X32_WIN-NEXT: movl _i1@SECREL(%eax), %eax
+; X32_WIN-NEXT: movl _i1@SECREL32(%eax), %eax
 ; X32_WIN-NEXT: ret
 ; X64_WIN: f1:
 ; X64_WIN:      movl _tls_index(%rip), %eax
 ; X64_WIN-NEXT: movq %gs:88, %rcx
 ; X64_WIN-NEXT: movq (%rcx,%rax,8), %rax
-; X64_WIN-NEXT: movl i1@SECREL(%rax), %eax
+; X64_WIN-NEXT: movl i1@SECREL32(%rax), %eax
 ; X64_WIN-NEXT: ret
 
 entry:
@@ -49,13 +49,13 @@ define i32* @f2() {
 ; X32_WIN:      movl __tls_index, %eax
 ; X32_WIN-NEXT: movl %fs:__tls_array, %ecx
 ; X32_WIN-NEXT: movl (%ecx,%eax,4), %eax
-; X32_WIN-NEXT: leal _i1@SECREL(%eax), %eax
+; X32_WIN-NEXT: leal _i1@SECREL32(%eax), %eax
 ; X32_WIN-NEXT: ret
 ; X64_WIN: f2:
 ; X64_WIN:      movl _tls_index(%rip), %eax
 ; X64_WIN-NEXT: movq %gs:88, %rcx
 ; X64_WIN-NEXT: movq (%rcx,%rax,8), %rax
-; X64_WIN-NEXT: leaq i1@SECREL(%rax), %rax
+; X64_WIN-NEXT: leaq i1@SECREL32(%rax), %rax
 ; X64_WIN-NEXT: ret
 
 entry:
@@ -75,13 +75,13 @@ define i32 @f3() nounwind {
 ; X32_WIN:      movl __tls_index, %eax
 ; X32_WIN-NEXT: movl %fs:__tls_array, %ecx
 ; X32_WIN-NEXT: movl (%ecx,%eax,4), %eax
-; X32_WIN-NEXT: movl _i2@SECREL(%eax), %eax
+; X32_WIN-NEXT: movl _i2@SECREL32(%eax), %eax
 ; X32_WIN-NEXT: ret
 ; X64_WIN: f3:
 ; X64_WIN:      movl _tls_index(%rip), %eax
 ; X64_WIN-NEXT: movq %gs:88, %rcx
 ; X64_WIN-NEXT: movq (%rcx,%rax,8), %rax
-; X64_WIN-NEXT: movl i2@SECREL(%rax), %eax
+; X64_WIN-NEXT: movl i2@SECREL32(%rax), %eax
 ; X64_WIN-NEXT: ret
 
 entry:
@@ -102,13 +102,13 @@ define i32* @f4() {
 ; X32_WIN:      movl __tls_index, %eax
 ; X32_WIN-NEXT: movl %fs:__tls_array, %ecx
 ; X32_WIN-NEXT: movl (%ecx,%eax,4), %eax
-; X32_WIN-NEXT: leal _i2@SECREL(%eax), %eax
+; X32_WIN-NEXT: leal _i2@SECREL32(%eax), %eax
 ; X32_WIN-NEXT: ret
 ; X64_WIN: f4:
 ; X64_WIN:      movl _tls_index(%rip), %eax
 ; X64_WIN-NEXT: movq %gs:88, %rcx
 ; X64_WIN-NEXT: movq (%rcx,%rax,8), %rax
-; X64_WIN-NEXT: leaq i2@SECREL(%rax), %rax
+; X64_WIN-NEXT: leaq i2@SECREL32(%rax), %rax
 ; X64_WIN-NEXT: ret
 
 entry:
@@ -126,13 +126,13 @@ define i32 @f5() nounwind {
 ; X32_WIN:      movl __tls_index, %eax
 ; X32_WIN-NEXT: movl %fs:__tls_array, %ecx
 ; X32_WIN-NEXT: movl (%ecx,%eax,4), %eax
-; X32_WIN-NEXT: movl _i3@SECREL(%eax), %eax
+; X32_WIN-NEXT: movl _i3@SECREL32(%eax), %eax
 ; X32_WIN-NEXT: ret
 ; X64_WIN: f5:
 ; X64_WIN:      movl _tls_index(%rip), %eax
 ; X64_WIN-NEXT: movq %gs:88, %rcx
 ; X64_WIN-NEXT: movq (%rcx,%rax,8), %rax
-; X64_WIN-NEXT: movl i3@SECREL(%rax), %eax
+; X64_WIN-NEXT: movl i3@SECREL32(%rax), %eax
 ; X64_WIN-NEXT: ret
 
 entry:
@@ -153,13 +153,13 @@ define i32* @f6() {
 ; X32_WIN:      movl __tls_index, %eax
 ; X32_WIN-NEXT: movl %fs:__tls_array, %ecx
 ; X32_WIN-NEXT: movl (%ecx,%eax,4), %eax
-; X32_WIN-NEXT: leal _i3@SECREL(%eax), %eax
+; X32_WIN-NEXT: leal _i3@SECREL32(%eax), %eax
 ; X32_WIN-NEXT: ret
 ; X64_WIN: f6:
 ; X64_WIN:      movl _tls_index(%rip), %eax
 ; X64_WIN-NEXT: movq %gs:88, %rcx
 ; X64_WIN-NEXT: movq (%rcx,%rax,8), %rax
-; X64_WIN-NEXT: leaq i3@SECREL(%rax), %rax
+; X64_WIN-NEXT: leaq i3@SECREL32(%rax), %rax
 ; X64_WIN-NEXT: ret
 
 entry:
@@ -234,14 +234,14 @@ define i16 @f11() {
 ; X32_WIN:      movl __tls_index, %eax
 ; X32_WIN-NEXT: movl %fs:__tls_array, %ecx
 ; X32_WIN-NEXT: movl (%ecx,%eax,4), %eax
-; X32_WIN-NEXT: movzwl _s1@SECREL(%eax), %eax
+; X32_WIN-NEXT: movzwl _s1@SECREL32(%eax), %eax
 ; X32_WIN-NEXT: # kill
 ; X32_WIN-NEXT: ret
 ; X64_WIN: f11:
 ; X64_WIN:      movl _tls_index(%rip), %eax
 ; X64_WIN-NEXT: movq %gs:88, %rcx
 ; X64_WIN-NEXT: movq (%rcx,%rax,8), %rax
-; X64_WIN-NEXT: movzwl s1@SECREL(%rax), %eax
+; X64_WIN-NEXT: movzwl s1@SECREL32(%rax), %eax
 ; X64_WIN-NEXT: # kill
 ; X64_WIN-NEXT: ret
 
@@ -261,13 +261,13 @@ define i32 @f12() {
 ; X32_WIN:      movl __tls_index, %eax
 ; X32_WIN-NEXT: movl %fs:__tls_array, %ecx
 ; X32_WIN-NEXT: movl (%ecx,%eax,4), %eax
-; X32_WIN-NEXT: movswl _s1@SECREL(%eax), %eax
+; X32_WIN-NEXT: movswl _s1@SECREL32(%eax), %eax
 ; X32_WIN-NEXT: ret
 ; X64_WIN: f12:
 ; X64_WIN:      movl _tls_index(%rip), %eax
 ; X64_WIN-NEXT: movq %gs:88, %rcx
 ; X64_WIN-NEXT: movq (%rcx,%rax,8), %rax
-; X64_WIN-NEXT: movswl s1@SECREL(%rax), %eax
+; X64_WIN-NEXT: movswl s1@SECREL32(%rax), %eax
 ; X64_WIN-NEXT: ret
 
 entry:
@@ -287,13 +287,13 @@ define i8 @f13() {
 ; X32_WIN:      movl __tls_index, %eax
 ; X32_WIN-NEXT: movl %fs:__tls_array, %ecx
 ; X32_WIN-NEXT: movl (%ecx,%eax,4), %eax
-; X32_WIN-NEXT: movb _b1@SECREL(%eax), %al
+; X32_WIN-NEXT: movb _b1@SECREL32(%eax), %al
 ; X32_WIN-NEXT: ret
 ; X64_WIN: f13:
 ; X64_WIN:      movl _tls_index(%rip), %eax
 ; X64_WIN-NEXT: movq %gs:88, %rcx
 ; X64_WIN-NEXT: movq (%rcx,%rax,8), %rax
-; X64_WIN-NEXT: movb b1@SECREL(%rax), %al
+; X64_WIN-NEXT: movb b1@SECREL32(%rax), %al
 ; X64_WIN-NEXT: ret
 
 entry:
@@ -312,13 +312,13 @@ define i32 @f14() {
 ; X32_WIN:      movl __tls_index, %eax
 ; X32_WIN-NEXT: movl %fs:__tls_array, %ecx
 ; X32_WIN-NEXT: movl (%ecx,%eax,4), %eax
-; X32_WIN-NEXT: movsbl _b1@SECREL(%eax), %eax
+; X32_WIN-NEXT: movsbl _b1@SECREL32(%eax), %eax
 ; X32_WIN-NEXT: ret
 ; X64_WIN: f14:
 ; X64_WIN:      movl _tls_index(%rip), %eax
 ; X64_WIN-NEXT: movq %gs:88, %rcx
 ; X64_WIN-NEXT: movq (%rcx,%rax,8), %rax
-; X64_WIN-NEXT: movsbl b1@SECREL(%rax), %eax
+; X64_WIN-NEXT: movsbl b1@SECREL32(%rax), %eax
 ; X64_WIN-NEXT: ret
 
 entry:
diff --git a/test/CodeGen/X86/twoaddr-coalesce-2.ll b/test/CodeGen/X86/twoaddr-coalesce-2.ll
index af6d47af7a0f..cbcde0655597 100644
--- a/test/CodeGen/X86/twoaddr-coalesce-2.ll
+++ b/test/CodeGen/X86/twoaddr-coalesce-2.ll
@@ -1,3 +1,4 @@
+; REQUIRES: asserts
 ; RUN: llc < %s -march=x86 -mattr=+sse2 -mcpu=penryn -stats 2>&1 | \
 ; RUN:   grep "twoaddrinstr" | grep "Number of instructions aggressively commuted"
 ; rdar://6480363
diff --git a/test/CodeGen/X86/twoaddr-pass-sink.ll b/test/CodeGen/X86/twoaddr-pass-sink.ll
index 513c304e3bf8..9ca280627afe 100644
--- a/test/CodeGen/X86/twoaddr-pass-sink.ll
+++ b/test/CodeGen/X86/twoaddr-pass-sink.ll
@@ -1,3 +1,4 @@
+; REQUIRES: asserts
 ; RUN: llc < %s -march=x86 -mattr=+sse2 -stats 2>&1 | grep "Number of 3-address instructions sunk"
 
 define void @t2(<2 x i64>* %vDct, <2 x i64>* %vYp, i8* %skiplist, <2 x i64> %a1) nounwind  {
diff --git a/test/CodeGen/X86/unknown-location.ll b/test/CodeGen/X86/unknown-location.ll
index b89c4738af12..e02e3b54752b 100644
--- a/test/CodeGen/X86/unknown-location.ll
+++ b/test/CodeGen/X86/unknown-location.ll
@@ -18,12 +18,16 @@ entry:
   ret i32 %c, !dbg !8
 }
 
-!0 = metadata !{i32 524545, metadata !1, metadata !"x", metadata !2, i32 1, metadata !6} ; [ DW_TAG_arg_variable ]
-!1 = metadata !{i32 524334, i32 0, metadata !2, metadata !"foo", metadata !"foo", metadata !"foo", metadata !2, i32 1, metadata !4, i1 false, i1 true, i32 0, i32 0, null, i1 false, i1 false} ; [ DW_TAG_subprogram ]
-!2 = metadata !{i32 524329, metadata !"test.c", metadata !"/dir", metadata !3} ; [ DW_TAG_file_type ]
-!3 = metadata !{i32 524305, i32 0, i32 12, metadata !"test.c", metadata !".", metadata !"producer", i1 true, i1 false, metadata !"", i32 0} ; [ DW_TAG_compile_unit ]
-!4 = metadata !{i32 524309, metadata !2, metadata !"", metadata !2, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !5, i32 0, null} ; [ DW_TAG_subroutine_type ]
+!llvm.dbg.cu = !{!3}
+
+!0 = metadata !{i32 786689, metadata !1, metadata !"x", metadata !2, i32 1, metadata !6} ; [ DW_TAG_arg_variable ]
+!1 = metadata !{i32 786478, metadata !2, metadata !"foo", metadata !"foo", metadata !"foo", metadata !2, i32 1, metadata !4, i1 false, i1 true, i32 0, i32 0, null, i1 false, i1 false, i32 (i32, i32, i32, i32)* @foo, null, null, null, i32 1} ; [ DW_TAG_subprogram ]
+!2 = metadata !{i32 786473, metadata !10} ; [ DW_TAG_file_type ]
+!3 = metadata !{i32 786449, metadata !10, i32 12, metadata !"producer", i1 false, metadata !"", i32 0, null, null, metadata !9, null, metadata !""} ; [ DW_TAG_compile_unit ]
+!4 = metadata !{i32 786453, metadata !2, metadata !"", metadata !2, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !5, i32 0, null} ; [ DW_TAG_subroutine_type ]
 !5 = metadata !{metadata !6}
-!6 = metadata !{i32 524324, metadata !2, metadata !"int", metadata !2, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
-!7 = metadata !{i32 524299, metadata !1, i32 1, i32 30} ; [ DW_TAG_lexical_block ]
+!6 = metadata !{i32 786468, metadata !2, metadata !"int", metadata !2, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
+!7 = metadata !{i32 786443, metadata !1, i32 1, i32 30} ; [ DW_TAG_lexical_block ]
 !8 = metadata !{i32 4, i32 3, metadata !7, null}
+!9 = metadata !{metadata !1}
+!10 = metadata !{metadata !"test.c", metadata !"/dir"}
diff --git a/test/CodeGen/X86/v8i1-masks.ll b/test/CodeGen/X86/v8i1-masks.ll
new file mode 100644
index 000000000000..8cbfb5d7243a
--- /dev/null
+++ b/test/CodeGen/X86/v8i1-masks.ll
@@ -0,0 +1,39 @@
+; RUN: llc -march=x86-64 -mtriple=x86_64-apple-darwin -mcpu=corei7-avx -o - < %s | FileCheck %s
+
+;CHECK: and_masks
+;CHECK: vmovaps
+;CHECK: vcmpltp
+;CHECK: vcmpltp
+;CHECK: vandps
+;CHECK: vandps
+;CHECK: vmovaps
+;CHECK: ret
+
+define void @and_masks(<8 x float>* %a, <8 x float>* %b, <8 x float>* %c) nounwind uwtable noinline ssp {
+  %v0 = load <8 x float>* %a, align 16
+  %v1 = load <8 x float>* %b, align 16
+  %m0 = fcmp olt <8 x float> %v1, %v0
+  %v2 = load <8 x float>* %c, align 16
+  %m1 = fcmp olt <8 x float> %v2, %v0
+  %mand = and <8 x i1> %m1, %m0
+  %r = zext <8 x i1> %mand to <8 x i32>
+  store <8 x i32> %r, <8 x i32>* undef, align 32
+  ret void
+}
+
+;CHECK: neg_mask
+;CHECK: vcmpltps
+;CHECK: vxorps
+;CHECK: vandps
+;CHECK: vmovaps
+;CHECK: ret
+define void @neg_masks(<8 x float>* %a, <8 x float>* %b, <8 x float>* %c) nounwind uwtable noinline ssp {
+  %v0 = load <8 x float>* %a, align 16
+  %v1 = load <8 x float>* %b, align 16
+  %m0 = fcmp olt <8 x float> %v1, %v0
+  %mand = xor <8 x i1> %m0, <i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1>
+  %r = zext <8 x i1> %mand to <8 x i32>
+  store <8 x i32> %r, <8 x i32>* undef, align 32
+  ret void
+}
+
diff --git a/test/CodeGen/X86/vec_align_i256.ll b/test/CodeGen/X86/vec_align_i256.ll
new file mode 100644
index 000000000000..078bcb1544d3
--- /dev/null
+++ b/test/CodeGen/X86/vec_align_i256.ll
@@ -0,0 +1,14 @@
+; RUN: llc < %s -mcpu=corei7-avx | FileCheck %s 
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
+target triple = "i686-apple-darwin8"
+
+; Make sure that we are not generating a movaps because the vector is aligned to 1.
+;CHECK: @foo
+;CHECK: xor
+;CHECK-NEXT: vmovups
+;CHECK-NEXT: ret
+define void @foo() {
+  store <16 x i16> zeroinitializer, <16 x i16>* undef, align 1
+  ret void
+}
diff --git a/test/CodeGen/X86/vec_compare.ll b/test/CodeGen/X86/vec_compare.ll
index 367dd27f3076..b6d91a3f770e 100644
--- a/test/CodeGen/X86/vec_compare.ll
+++ b/test/CodeGen/X86/vec_compare.ll
@@ -41,3 +41,27 @@ define <4 x i32> @test4(<4 x i32> %A, <4 x i32> %B) nounwind {
         %D = sext <4 x i1> %C to <4 x i32>
 	ret <4 x i32> %D
 }
+
+define <2 x i64> @test5(<2 x i64> %A, <2 x i64> %B) nounwind {
+; CHECK: test5:
+; CHECK: pcmpeqd
+; CHECK: pshufd $-79
+; CHECK: pand
+; CHECK: ret
+	%C = icmp eq <2 x i64> %A, %B
+	%D = sext <2 x i1> %C to <2 x i64>
+	ret <2 x i64> %D
+}
+
+define <2 x i64> @test6(<2 x i64> %A, <2 x i64> %B) nounwind {
+; CHECK: test6:
+; CHECK: pcmpeqd
+; CHECK: pshufd $-79
+; CHECK: pand
+; CHECK: pcmpeqd
+; CHECK: pxor
+; CHECK: ret
+	%C = icmp ne <2 x i64> %A, %B
+	%D = sext <2 x i1> %C to <2 x i64>
+	ret <2 x i64> %D
+}
diff --git a/test/CodeGen/X86/vec_floor.ll b/test/CodeGen/X86/vec_floor.ll
index 5e0160bd2856..4db68bd18223 100644
--- a/test/CodeGen/X86/vec_floor.ll
+++ b/test/CodeGen/X86/vec_floor.ll
@@ -36,3 +36,147 @@ define <8 x float> @floor_v8f32(<8 x float> %p)
   ret <8 x float> %t
 }
 declare <8 x float> @llvm.floor.v8f32(<8 x float> %p)
+
+define <2 x double> @ceil_v2f64(<2 x double> %p)
+{
+  ; CHECK: ceil_v2f64
+  ; CHECK: vroundpd
+  %t = call <2 x double> @llvm.ceil.v2f64(<2 x double> %p)
+  ret <2 x double> %t
+}
+declare <2 x double> @llvm.ceil.v2f64(<2 x double> %p)
+
+define <4 x float> @ceil_v4f32(<4 x float> %p)
+{
+  ; CHECK: ceil_v4f32
+  ; CHECK: vroundps
+  %t = call <4 x float> @llvm.ceil.v4f32(<4 x float> %p)
+  ret <4 x float> %t
+}
+declare <4 x float> @llvm.ceil.v4f32(<4 x float> %p)
+
+define <4 x double> @ceil_v4f64(<4 x double> %p)
+{
+  ; CHECK: ceil_v4f64
+  ; CHECK: vroundpd
+  %t = call <4 x double> @llvm.ceil.v4f64(<4 x double> %p)
+  ret <4 x double> %t
+}
+declare <4 x double> @llvm.ceil.v4f64(<4 x double> %p)
+
+define <8 x float> @ceil_v8f32(<8 x float> %p)
+{
+  ; CHECK: ceil_v8f32
+  ; CHECK: vroundps
+  %t = call <8 x float> @llvm.ceil.v8f32(<8 x float> %p)
+  ret <8 x float> %t
+}
+declare <8 x float> @llvm.ceil.v8f32(<8 x float> %p)
+
+define <2 x double> @trunc_v2f64(<2 x double> %p)
+{
+  ; CHECK: trunc_v2f64
+  ; CHECK: vroundpd
+  %t = call <2 x double> @llvm.trunc.v2f64(<2 x double> %p)
+  ret <2 x double> %t
+}
+declare <2 x double> @llvm.trunc.v2f64(<2 x double> %p)
+
+define <4 x float> @trunc_v4f32(<4 x float> %p)
+{
+  ; CHECK: trunc_v4f32
+  ; CHECK: vroundps
+  %t = call <4 x float> @llvm.trunc.v4f32(<4 x float> %p)
+  ret <4 x float> %t
+}
+declare <4 x float> @llvm.trunc.v4f32(<4 x float> %p)
+
+define <4 x double> @trunc_v4f64(<4 x double> %p)
+{
+  ; CHECK: trunc_v4f64
+  ; CHECK: vroundpd
+  %t = call <4 x double> @llvm.trunc.v4f64(<4 x double> %p)
+  ret <4 x double> %t
+}
+declare <4 x double> @llvm.trunc.v4f64(<4 x double> %p)
+
+define <8 x float> @trunc_v8f32(<8 x float> %p)
+{
+  ; CHECK: trunc_v8f32
+  ; CHECK: vroundps
+  %t = call <8 x float> @llvm.trunc.v8f32(<8 x float> %p)
+  ret <8 x float> %t
+}
+declare <8 x float> @llvm.trunc.v8f32(<8 x float> %p)
+
+define <2 x double> @rint_v2f64(<2 x double> %p)
+{
+  ; CHECK: rint_v2f64
+  ; CHECK: vroundpd
+  %t = call <2 x double> @llvm.rint.v2f64(<2 x double> %p)
+  ret <2 x double> %t
+}
+declare <2 x double> @llvm.rint.v2f64(<2 x double> %p)
+
+define <4 x float> @rint_v4f32(<4 x float> %p)
+{
+  ; CHECK: rint_v4f32
+  ; CHECK: vroundps
+  %t = call <4 x float> @llvm.rint.v4f32(<4 x float> %p)
+  ret <4 x float> %t
+}
+declare <4 x float> @llvm.rint.v4f32(<4 x float> %p)
+
+define <4 x double> @rint_v4f64(<4 x double> %p)
+{
+  ; CHECK: rint_v4f64
+  ; CHECK: vroundpd
+  %t = call <4 x double> @llvm.rint.v4f64(<4 x double> %p)
+  ret <4 x double> %t
+}
+declare <4 x double> @llvm.rint.v4f64(<4 x double> %p)
+
+define <8 x float> @rint_v8f32(<8 x float> %p)
+{
+  ; CHECK: rint_v8f32
+  ; CHECK: vroundps
+  %t = call <8 x float> @llvm.rint.v8f32(<8 x float> %p)
+  ret <8 x float> %t
+}
+declare <8 x float> @llvm.rint.v8f32(<8 x float> %p)
+
+define <2 x double> @nearbyint_v2f64(<2 x double> %p)
+{
+  ; CHECK: nearbyint_v2f64
+  ; CHECK: vroundpd
+  %t = call <2 x double> @llvm.nearbyint.v2f64(<2 x double> %p)
+  ret <2 x double> %t
+}
+declare <2 x double> @llvm.nearbyint.v2f64(<2 x double> %p)
+
+define <4 x float> @nearbyint_v4f32(<4 x float> %p)
+{
+  ; CHECK: nearbyint_v4f32
+  ; CHECK: vroundps
+  %t = call <4 x float> @llvm.nearbyint.v4f32(<4 x float> %p)
+  ret <4 x float> %t
+}
+declare <4 x float> @llvm.nearbyint.v4f32(<4 x float> %p)
+
+define <4 x double> @nearbyint_v4f64(<4 x double> %p)
+{
+  ; CHECK: nearbyint_v4f64
+  ; CHECK: vroundpd
+  %t = call <4 x double> @llvm.nearbyint.v4f64(<4 x double> %p)
+  ret <4 x double> %t
+}
+declare <4 x double> @llvm.nearbyint.v4f64(<4 x double> %p)
+
+define <8 x float> @nearbyint_v8f32(<8 x float> %p)
+{
+  ; CHECK: nearbyint_v8f32
+  ; CHECK: vroundps
+  %t = call <8 x float> @llvm.nearbyint.v8f32(<8 x float> %p)
+  ret <8 x float> %t
+}
+declare <8 x float> @llvm.nearbyint.v8f32(<8 x float> %p)
diff --git a/test/CodeGen/X86/vec_fpext.ll b/test/CodeGen/X86/vec_fpext.ll
index dc0464ff9e0f..863712ff48b3 100644
--- a/test/CodeGen/X86/vec_fpext.ll
+++ b/test/CodeGen/X86/vec_fpext.ll
@@ -1,5 +1,5 @@
 ; RUN: llc < %s -march=x86 -mattr=+sse41,-avx | FileCheck %s
-; RUN: llc < %s -march=x86 -mattr=+avx | FileCheck --check-prefix=AVX %s
+; RUN: llc < %s -march=x86 -mcpu=corei7-avx | FileCheck --check-prefix=AVX %s
 
 ; PR11674
 define void @fpext_frommem(<2 x float>* %in, <2 x double>* %out) {
@@ -29,8 +29,8 @@ entry:
 ; CHECK: cvtps2pd 8(%{{.+}}), %xmm{{[0-9]+}}
 ; CHECK: cvtps2pd 16(%{{.+}}), %xmm{{[0-9]+}}
 ; CHECK: cvtps2pd 24(%{{.+}}), %xmm{{[0-9]+}}
-; AVX: vcvtps2pd (%{{.+}}), %ymm{{[0-9]+}}
 ; AVX: vcvtps2pd 16(%{{.+}}), %ymm{{[0-9]+}}
+; AVX: vcvtps2pd (%{{.+}}), %ymm{{[0-9]+}}
   %0 = load <8 x float>* %in
   %1 = fpext <8 x float> %0 to <8 x double>
   store <8 x double> %1, <8 x double>* %out, align 1
diff --git a/test/CodeGen/X86/vec_insert-6.ll b/test/CodeGen/X86/vec_insert-6.ll
index 2a4864a48a25..4583e1925e59 100644
--- a/test/CodeGen/X86/vec_insert-6.ll
+++ b/test/CodeGen/X86/vec_insert-6.ll
@@ -1,3 +1,4 @@
+; REQUIRES: asserts
 ; RUN: llc < %s -march=x86 -mattr=+sse2 -mcpu=penryn | grep pslldq
 ; RUN: llc < %s -march=x86 -mattr=+sse2 -mcpu=penryn -mtriple=i686-apple-darwin9 -o /dev/null -stats -info-output-file - | grep asm-printer | grep 6
 
diff --git a/test/CodeGen/X86/vec_sdiv_to_shift.ll b/test/CodeGen/X86/vec_sdiv_to_shift.ll
new file mode 100644
index 000000000000..349868a87f53
--- /dev/null
+++ b/test/CodeGen/X86/vec_sdiv_to_shift.ll
@@ -0,0 +1,72 @@
+; RUN: llc < %s -march=x86-64 -mcpu=penryn -mattr=+avx2 | FileCheck %s
+
+
+define <8 x i16> @sdiv_vec8x16(<8 x i16> %var) {
+entry:
+; CHECK: sdiv_vec8x16
+; CHECK: psraw  $15
+; CHECK: vpsrlw  $11
+; CHECK: vpaddw
+; CHECK: vpsraw  $5
+; CHECK: ret
+  %0 = sdiv <8 x i16> %var, <i16 32, i16 32, i16 32, i16 32, i16 32, i16 32, i16 32, i16 32>
+  ret <8 x i16> %0
+}
+
+define <4 x i32> @sdiv_zero(<4 x i32> %var) {
+entry:
+; CHECK: sdiv_zero
+; CHECK-NOT: sra
+; CHECK: ret
+  %0 = sdiv <4 x i32> %var, <i32 0, i32 0, i32 0, i32 0>
+  ret <4 x i32> %0
+}
+
+define <4 x i32> @sdiv_vec4x32(<4 x i32> %var) {
+entry:
+; CHECK: sdiv_vec4x32
+; CHECK: vpsrad $31
+; CHECK: vpsrld $28
+; CHECK: vpaddd
+; CHECK: vpsrad $4
+; CHECK: ret
+%0 = sdiv <4 x i32> %var, <i32 16, i32 16, i32 16, i32 16>
+ret <4 x i32> %0
+}
+
+define <4 x i32> @sdiv_negative(<4 x i32> %var) {
+entry:
+; CHECK: sdiv_negative
+; CHECK: vpsrad $31
+; CHECK: vpsrld $28
+; CHECK: vpaddd
+; CHECK: vpsrad $4
+; CHECK: vpsubd
+; CHECK: ret
+%0 = sdiv <4 x i32> %var, <i32 -16, i32 -16, i32 -16, i32 -16>
+ret <4 x i32> %0
+}
+
+define <8 x i32> @sdiv8x32(<8 x i32> %var) {
+entry:
+; CHECK: sdiv8x32
+; CHECK: vpsrad $31
+; CHECK: vpsrld $26
+; CHECK: vpaddd
+; CHECK: vpsrad $6
+; CHECK: ret
+%0 = sdiv <8 x i32> %var, <i32 64, i32 64, i32 64, i32 64, i32 64, i32 64, i32 64, i32 64>
+ret <8 x i32> %0
+}
+
+define <16 x i16> @sdiv16x16(<16 x i16> %var) {
+entry:
+; CHECK: sdiv16x16
+; CHECK: vpsraw  $15
+; CHECK: vpsrlw  $14
+; CHECK: vpaddw
+; CHECK: vpsraw  $2
+; CHECK: ret
+  %a0 = sdiv <16 x i16> %var, <i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4>
+  ret <16 x i16> %a0
+}
diff --git a/test/CodeGen/X86/vec_shuffle-19.ll b/test/CodeGen/X86/vec_shuffle-19.ll
index b26f920e5e23..48db8de0d936 100644
--- a/test/CodeGen/X86/vec_shuffle-19.ll
+++ b/test/CodeGen/X86/vec_shuffle-19.ll
@@ -1,3 +1,4 @@
+; REQUIRES: asserts
 ; RUN: llc < %s -o /dev/null -march=x86 -mcpu=penryn -mattr=+sse2 -mtriple=i686-apple-darwin9 -stats -info-output-file - | grep asm-printer | grep 4
 ; PR2485
 
diff --git a/test/CodeGen/X86/vec_shuffle-20.ll b/test/CodeGen/X86/vec_shuffle-20.ll
index 976cd1835b40..5a2c4449456b 100644
--- a/test/CodeGen/X86/vec_shuffle-20.ll
+++ b/test/CodeGen/X86/vec_shuffle-20.ll
@@ -1,4 +1,5 @@
-; RUN: llc < %s -o /dev/null -march=x86 -mcpu=corei7 -mtriple=i686-apple-darwin9 -stats -info-output-file - | grep asm-printer | grep 3
+; REQUIRES: asserts
+; RUN: llc < %s -o /dev/null -march=x86 -mcpu=corei7 -mtriple=i686-apple-darwin9 -stats -info-output-file - | grep asm-printer | grep 2
 
 define <4 x float> @func(<4 x float> %fp0, <4 x float> %fp1) nounwind  {
 entry:
diff --git a/test/CodeGen/X86/vec_splat-2.ll b/test/CodeGen/X86/vec_splat-2.ll
index f105de4d977d..5c668b7e5a5b 100644
--- a/test/CodeGen/X86/vec_splat-2.ll
+++ b/test/CodeGen/X86/vec_splat-2.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=x86 -mcpu=penryn -mattr=+sse2 | grep pshufd | count 1
+; RUN: llc < %s -march=x86 -mcpu=pentium4 -mattr=+sse2 | FileCheck %s
 
 define void @test(<2 x i64>* %P, i8 %x) nounwind {
 	%tmp = insertelement <16 x i8> zeroinitializer, i8 %x, i32 0		; <<16 x i8>> [#uses=1]
@@ -23,4 +23,11 @@ define void @test(<2 x i64>* %P, i8 %x) nounwind {
 	%tmp73.upgrd.1 = bitcast <16 x i8> %tmp73 to <2 x i64>		; <<2 x i64>> [#uses=1]
 	store <2 x i64> %tmp73.upgrd.1, <2 x i64>* %P
 	ret void
+
+; CHECK: test:
+; CHECK-NOT: pshufd
+; CHECK: punpcklbw
+; CHECK: punpcklbw
+; CHECK: pshufd $0
+; CHECK-NOT: pshufd
 }
diff --git a/test/CodeGen/X86/vec_splat-3.ll b/test/CodeGen/X86/vec_splat-3.ll
index feacc42406df..cf0ecf40554d 100644
--- a/test/CodeGen/X86/vec_splat-3.ll
+++ b/test/CodeGen/X86/vec_splat-3.ll
@@ -1,55 +1,230 @@
-; RUN: llc < %s -march=x86 -mcpu=penryn -mattr=sse41 -o %t
-; RUN: grep punpcklwd %t | count 4
-; RUN: grep punpckhwd %t | count 4
-; RUN: grep "pshufd" %t | count 8
+; RUN: llc <%s -march=x86 -mcpu=penryn -mattr=sse41 | FileCheck %s
 
 ; Splat test for v8i16
-; Should generate with pshufd with masks $0, $85, $170, $255 (each mask is used twice)
 define <8 x i16> @shuf_8i16_0(<8 x i16> %T0, <8 x i16> %T1) nounwind readnone {
-entry:
-	%tmp6 = shufflevector <8 x i16> %T0, <8 x i16> %T1, <8 x i32> < i32 0, i32 undef, i32 undef, i32 0, i32 undef, i32 undef, i32 undef , i32 undef >
+	%tmp6 = shufflevector <8 x i16> %T0, <8 x i16> %T1, <8 x i32> <i32 0, i32 undef, i32 undef, i32 0, i32 undef, i32 undef, i32 undef, i32 undef>
 	ret <8 x i16> %tmp6
+
+; CHECK: shuf_8i16_0:
+; CHECK: pshuflw $0
 }
 
 define <8 x i16> @shuf_8i16_1(<8 x i16> %T0, <8 x i16> %T1) nounwind readnone {
-entry:
-	%tmp6 = shufflevector <8 x i16> %T0, <8 x i16> %T1, <8 x i32> < i32 1, i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef , i32 undef >
+	%tmp6 = shufflevector <8 x i16> %T0, <8 x i16> %T1, <8 x i32> <i32 1, i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
 	ret <8 x i16> %tmp6
+
+; CHECK: shuf_8i16_1:
+; CHECK: pshuflw $5
 }
 
 define <8 x i16> @shuf_8i16_2(<8 x i16> %T0, <8 x i16> %T1) nounwind readnone {
-entry:
-	%tmp6 = shufflevector <8 x i16> %T0, <8 x i16> %T1, <8 x i32> < i32 2, i32 undef, i32 undef, i32 2, i32 undef, i32 2, i32 undef , i32 undef >
+	%tmp6 = shufflevector <8 x i16> %T0, <8 x i16> %T1, <8 x i32> <i32 2, i32 undef, i32 undef, i32 2, i32 undef, i32 2, i32 undef, i32 undef>
 	ret <8 x i16> %tmp6
+
+; CHECK: shuf_8i16_2:
+; CHECK: punpcklwd
+; CHECK-NEXT: pshufd $-86
 }
 
 define <8 x i16> @shuf_8i16_3(<8 x i16> %T0, <8 x i16> %T1) nounwind readnone {
-entry:
-	%tmp6 = shufflevector <8 x i16> %T0, <8 x i16> %T1, <8 x i32> < i32 3, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef , i32 undef >
+	%tmp6 = shufflevector <8 x i16> %T0, <8 x i16> %T1, <8 x i32> <i32 3, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
 	ret <8 x i16> %tmp6
+
+; CHECK: shuf_8i16_3:
+; CHECK: pshuflw $15
 }
 
 define <8 x i16> @shuf_8i16_4(<8 x i16> %T0, <8 x i16> %T1) nounwind readnone {
-entry:
-	%tmp6 = shufflevector <8 x i16> %T0, <8 x i16> %T1, <8 x i32> < i32 4, i32 undef, i32 undef, i32 undef, i32 4, i32 undef, i32 undef , i32 undef >
+	%tmp6 = shufflevector <8 x i16> %T0, <8 x i16> %T1, <8 x i32> <i32 4, i32 undef, i32 undef, i32 undef, i32 4, i32 undef, i32 undef, i32 undef>
 	ret <8 x i16> %tmp6
+
+; CHECK: shuf_8i16_4:
+; CHECK: movhlps
 }
 
 define <8 x i16> @shuf_8i16_5(<8 x i16> %T0, <8 x i16> %T1) nounwind readnone {
-entry:
-	%tmp6 = shufflevector <8 x i16> %T0, <8 x i16> %T1, <8 x i32> < i32 5, i32 undef, i32 undef, i32 5, i32 undef, i32 undef, i32 undef , i32 undef >
+	%tmp6 = shufflevector <8 x i16> %T0, <8 x i16> %T1, <8 x i32> <i32 5, i32 undef, i32 undef, i32 5, i32 undef, i32 undef, i32 undef, i32 undef>
 	ret <8 x i16> %tmp6
+
+; CHECK: shuf_8i16_5:
+; CHECK: punpckhwd
+; CHECK-NEXT: pshufd $85
 }
 
 define <8 x i16> @shuf_8i16_6(<8 x i16> %T0, <8 x i16> %T1) nounwind readnone {
-entry:
-	%tmp6 = shufflevector <8 x i16> %T0, <8 x i16> %T1, <8 x i32> < i32 6, i32 6, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef , i32 undef >
+	%tmp6 = shufflevector <8 x i16> %T0, <8 x i16> %T1, <8 x i32> <i32 6, i32 6, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
 	ret <8 x i16> %tmp6
-}
 
+; CHECK: shuf_8i16_6:
+; CHECK: punpckhwd
+; CHECK-NEXT: pshufd $-86
+}
 
 define <8 x i16> @shuf_8i16_7(<8 x i16> %T0, <8 x i16> %T1) nounwind readnone {
-entry:
-	%tmp6 = shufflevector <8 x i16> %T0, <8 x i16> %T1, <8 x i32> < i32 7, i32 undef, i32 undef, i32 7, i32 undef, i32 undef, i32 undef , i32 undef >
+	%tmp6 = shufflevector <8 x i16> %T0, <8 x i16> %T1, <8 x i32> <i32 7, i32 undef, i32 undef, i32 7, i32 undef, i32 undef, i32 undef, i32 undef>
 	ret <8 x i16> %tmp6
+
+; CHECK: shuf_8i16_7:
+; CHECK: punpckhwd
+; CHECK-NEXT: pshufd $-1
+}
+
+; Splat test for v16i8
+define <16 x i8> @shuf_16i8_8(<16 x i8> %T0, <16 x i8> %T1) nounwind readnone {
+	%tmp6 = shufflevector <16 x i8> %T0, <16 x i8> %T1, <16 x i32> <i32 0, i32 undef, i32 undef, i32 0, i32 undef, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
+	ret <16 x i8> %tmp6
+
+; CHECK: shuf_16i8_8:
+; CHECK: punpcklbw
+; CHECK-NEXT: punpcklbw
+; CHECK-NEXT: pshufd $0
+}
+
+define <16 x i8> @shuf_16i8_9(<16 x i8> %T0, <16 x i8> %T1) nounwind readnone {
+	%tmp6 = shufflevector <16 x i8> %T0, <16 x i8> %T1, <16 x i32> <i32 1, i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef >
+	ret <16 x i8> %tmp6
+
+; CHECK: shuf_16i8_9:
+; CHECK: punpcklbw
+; CHECK-NEXT: punpcklbw
+; CHECK-NEXT: pshufd $85
+}
+
+define <16 x i8> @shuf_16i8_10(<16 x i8> %T0, <16 x i8> %T1) nounwind readnone {
+	%tmp6 = shufflevector <16 x i8> %T0, <16 x i8> %T1, <16 x i32> <i32 2, i32 undef, i32 undef, i32 2, i32 undef, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2>
+	ret <16 x i8> %tmp6
+
+; CHECK: shuf_16i8_10:
+; CHECK: punpcklbw
+; CHECK-NEXT: punpcklbw
+; CHECK-NEXT: pshufd $-86
+}
+
+define <16 x i8> @shuf_16i8_11(<16 x i8> %T0, <16 x i8> %T1) nounwind readnone {
+	%tmp6 = shufflevector <16 x i8> %T0, <16 x i8> %T1, <16 x i32> <i32 3, i32 undef, i32 undef, i32 3, i32 undef, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
+	ret <16 x i8> %tmp6
+
+; CHECK: shuf_16i8_11:
+; CHECK: punpcklbw
+; CHECK-NEXT: punpcklbw
+; CHECK-NEXT: pshufd $-1
+}
+
+
+define <16 x i8> @shuf_16i8_12(<16 x i8> %T0, <16 x i8> %T1) nounwind readnone {
+	%tmp6 = shufflevector <16 x i8> %T0, <16 x i8> %T1, <16 x i32> <i32 4, i32 undef, i32 undef, i32 undef, i32 4, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef >
+	ret <16 x i8> %tmp6
+
+; CHECK: shuf_16i8_12:
+; CHECK: pshufd $5
+}
+
+define <16 x i8> @shuf_16i8_13(<16 x i8> %T0, <16 x i8> %T1) nounwind readnone {
+	%tmp6 = shufflevector <16 x i8> %T0, <16 x i8> %T1, <16 x i32> <i32 5, i32 undef, i32 undef, i32 5, i32 undef, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
+	ret <16 x i8> %tmp6
+
+; CHECK: shuf_16i8_13:
+; CHECK: punpcklbw
+; CHECK-NEXT: punpckhbw
+; CHECK-NEXT: pshufd $85
+}
+
+define <16 x i8> @shuf_16i8_14(<16 x i8> %T0, <16 x i8> %T1) nounwind readnone {
+	%tmp6 = shufflevector <16 x i8> %T0, <16 x i8> %T1, <16 x i32> <i32 6, i32 undef, i32 undef, i32 6, i32 undef, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6>
+	ret <16 x i8> %tmp6
+
+; CHECK: shuf_16i8_14:
+; CHECK: punpcklbw
+; CHECK-NEXT: punpckhbw
+; CHECK-NEXT: pshufd $-86
+}
+
+define <16 x i8> @shuf_16i8_15(<16 x i8> %T0, <16 x i8> %T1) nounwind readnone {
+	%tmp6 = shufflevector <16 x i8> %T0, <16 x i8> %T1, <16 x i32> <i32 7, i32 undef, i32 undef, i32 7, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef >
+	ret <16 x i8> %tmp6
+
+; CHECK: shuf_16i8_15:
+; CHECK: punpcklbw
+; CHECK-NEXT: punpckhbw
+; CHECK-NEXT: pshufd $-1
+}
+
+define <16 x i8> @shuf_16i8_16(<16 x i8> %T0, <16 x i8> %T1) nounwind readnone {
+	%tmp6 = shufflevector <16 x i8> %T0, <16 x i8> %T1, <16 x i32> <i32 8, i32 undef, i32 undef, i32 8, i32 undef, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8>
+	ret <16 x i8> %tmp6
+
+; CHECK: shuf_16i8_16:
+; CHECK: punpckhbw
+; CHECK-NEXT: punpcklbw
+; CHECK-NEXT: pshufd $0
+}
+
+define <16 x i8> @shuf_16i8_17(<16 x i8> %T0, <16 x i8> %T1) nounwind readnone {
+	%tmp6 = shufflevector <16 x i8> %T0, <16 x i8> %T1, <16 x i32> <i32 9, i32 undef, i32 undef, i32 9, i32 undef, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9>
+	ret <16 x i8> %tmp6
+
+; CHECK: shuf_16i8_17:
+; CHECK: punpckhbw
+; CHECK-NEXT: punpcklbw
+; CHECK-NEXT: pshufd $85
+}
+
+define <16 x i8> @shuf_16i8_18(<16 x i8> %T0, <16 x i8> %T1) nounwind readnone {
+	%tmp6 = shufflevector <16 x i8> %T0, <16 x i8> %T1, <16 x i32> <i32 10, i32 undef, i32 undef, i32 10, i32 undef, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10>
+	ret <16 x i8> %tmp6
+
+; CHECK: shuf_16i8_18:
+; CHECK: punpckhbw
+; CHECK-NEXT: punpcklbw
+; CHECK-NEXT: pshufd $-86
+}
+
+define <16 x i8> @shuf_16i8_19(<16 x i8> %T0, <16 x i8> %T1) nounwind readnone {
+	%tmp6 = shufflevector <16 x i8> %T0, <16 x i8> %T1, <16 x i32> <i32 11, i32 undef, i32 undef, i32 11, i32 undef, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11>
+	ret <16 x i8> %tmp6
+
+; CHECK: shuf_16i8_19:
+; CHECK: punpckhbw
+; CHECK-NEXT: punpcklbw
+; CHECK-NEXT: pshufd $-1
+}
+
+define <16 x i8> @shuf_16i8_20(<16 x i8> %T0, <16 x i8> %T1) nounwind readnone {
+	%tmp6 = shufflevector <16 x i8> %T0, <16 x i8> %T1, <16 x i32> <i32 12, i32 undef, i32 undef, i32 12, i32 undef, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12>
+	ret <16 x i8> %tmp6
+
+; CHECK: shuf_16i8_20:
+; CHECK: punpckhbw
+; CHECK-NEXT: punpckhbw
+; CHECK-NEXT: pshufd $0
+}
+
+define <16 x i8> @shuf_16i8_21(<16 x i8> %T0, <16 x i8> %T1) nounwind readnone {
+	%tmp6 = shufflevector <16 x i8> %T0, <16 x i8> %T1, <16 x i32> <i32 13, i32 undef, i32 undef, i32 13, i32 undef, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13>
+	ret <16 x i8> %tmp6
+
+; CHECK: shuf_16i8_21:
+; CHECK: punpckhbw
+; CHECK-NEXT: punpckhbw
+; CHECK-NEXT: pshufd $85
+}
+
+define <16 x i8> @shuf_16i8_22(<16 x i8> %T0, <16 x i8> %T1) nounwind readnone {
+	%tmp6 = shufflevector <16 x i8> %T0, <16 x i8> %T1, <16 x i32> <i32 14, i32 undef, i32 undef, i32 14, i32 undef, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14>
+	ret <16 x i8> %tmp6
+
+; CHECK: shuf_16i8_22:
+; CHECK: punpckhbw
+; CHECK-NEXT: punpckhbw
+; CHECK-NEXT: pshufd $-86
+}
+
+define <16 x i8> @shuf_16i8_23(<16 x i8> %T0, <16 x i8> %T1) nounwind readnone {
+	%tmp6 = shufflevector <16 x i8> %T0, <16 x i8> %T1, <16 x i32> <i32 15, i32 undef, i32 undef, i32 15, i32 undef, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15>
+	ret <16 x i8> %tmp6
+
+; CHECK: shuf_16i8_23:
+; CHECK: punpckhbw
+; CHECK-NEXT: punpckhbw
+; CHECK-NEXT: pshufd $-1
 }
diff --git a/test/CodeGen/X86/vec_splat-4.ll b/test/CodeGen/X86/vec_splat-4.ll
deleted file mode 100644
index 374acfa4e094..000000000000
--- a/test/CodeGen/X86/vec_splat-4.ll
+++ /dev/null
@@ -1,104 +0,0 @@
-; RUN: llc < %s -march=x86 -mcpu=penryn -mattr=sse41 -o %t
-; RUN: grep punpcklbw %t | count 16
-; RUN: grep punpckhbw %t | count 16
-; RUN: grep "pshufd" %t | count 16
-
-; Should generate with pshufd with masks $0, $85, $170, $255 (each mask is used 4 times)
-
-; Splat test for v16i8
-define <16 x i8 > @shuf_16i8_0(<16 x i8 > %T0, <16 x i8 > %T1) nounwind readnone {
-entry:
-	%tmp6 = shufflevector <16 x i8 > %T0, <16 x i8 > %T1, <16 x i32> < i32 0, i32 undef, i32 undef, i32 0, i32 undef, i32 0, i32 0 , i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0 >
-	ret <16 x i8 > %tmp6
-}
-
-define <16 x i8 > @shuf_16i8_1(<16 x i8 > %T0, <16 x i8 > %T1) nounwind readnone {
-entry:
-	%tmp6 = shufflevector <16 x i8 > %T0, <16 x i8 > %T1, <16 x i32> < i32 1, i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef , i32 undef, i32 undef, i32 undef, i32 undef, i32 undef , i32 undef, i32 undef, i32 undef, i32 undef  >
-	ret <16 x i8 > %tmp6
-}
-
-define <16 x i8 > @shuf_16i8_2(<16 x i8 > %T0, <16 x i8 > %T1) nounwind readnone {
-entry:
-	%tmp6 = shufflevector <16 x i8 > %T0, <16 x i8 > %T1, <16 x i32> < i32 2, i32 undef, i32 undef, i32 2, i32 undef, i32 2, i32 2 , i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2 >
-	ret <16 x i8 > %tmp6
-}
-
-define <16 x i8 > @shuf_16i8_3(<16 x i8 > %T0, <16 x i8 > %T1) nounwind readnone {
-entry:
-	%tmp6 = shufflevector <16 x i8 > %T0, <16 x i8 > %T1, <16 x i32> < i32 3, i32 undef, i32 undef, i32 3, i32 undef, i32 3, i32 3 , i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3 >
-	ret <16 x i8 > %tmp6
-}
-
-
-define <16 x i8 > @shuf_16i8_4(<16 x i8 > %T0, <16 x i8 > %T1) nounwind readnone {
-entry:
-	%tmp6 = shufflevector <16 x i8 > %T0, <16 x i8 > %T1, <16 x i32> < i32 4, i32 undef, i32 undef, i32 undef, i32 4, i32 undef, i32 undef , i32 undef, i32 undef, i32 undef, i32 undef , i32 undef, i32 undef, i32 undef, i32 undef , i32 undef  >
-	ret <16 x i8 > %tmp6
-}
-
-define <16 x i8 > @shuf_16i8_5(<16 x i8 > %T0, <16 x i8 > %T1) nounwind readnone {
-entry:
-	%tmp6 = shufflevector <16 x i8 > %T0, <16 x i8 > %T1, <16 x i32> < i32 5, i32 undef, i32 undef, i32 5, i32 undef, i32 5, i32 5 , i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5 >
-	ret <16 x i8 > %tmp6
-}
-
-define <16 x i8 > @shuf_16i8_6(<16 x i8 > %T0, <16 x i8 > %T1) nounwind readnone {
-entry:
-	%tmp6 = shufflevector <16 x i8 > %T0, <16 x i8 > %T1, <16 x i32> < i32 6, i32 undef, i32 undef, i32 6, i32 undef, i32 6, i32 6 , i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6 >
-	ret <16 x i8 > %tmp6
-}
-
-define <16 x i8 > @shuf_16i8_7(<16 x i8 > %T0, <16 x i8 > %T1) nounwind readnone {
-entry:
-	%tmp6 = shufflevector <16 x i8 > %T0, <16 x i8 > %T1, <16 x i32> < i32 7, i32 undef, i32 undef, i32 7, i32 undef, i32 undef, i32 undef , i32 undef, i32 undef, i32 undef, i32 undef , i32 undef , i32 undef, i32 undef, i32 undef , i32 undef  >
-	ret <16 x i8 > %tmp6
-}
-
-define <16 x i8 > @shuf_16i8_8(<16 x i8 > %T0, <16 x i8 > %T1) nounwind readnone {
-entry:
-	%tmp6 = shufflevector <16 x i8 > %T0, <16 x i8 > %T1, <16 x i32> < i32 8, i32 undef, i32 undef, i32 8, i32 undef, i32 8, i32 8 , i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8 >
-	ret <16 x i8 > %tmp6
-}
-
-define <16 x i8 > @shuf_16i8_9(<16 x i8 > %T0, <16 x i8 > %T1) nounwind readnone {
-entry:
-	%tmp6 = shufflevector <16 x i8 > %T0, <16 x i8 > %T1, <16 x i32> < i32 9, i32 undef, i32 undef, i32 9, i32 undef, i32 9, i32 9 , i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9 >
-	ret <16 x i8 > %tmp6
-}
-
-define <16 x i8 > @shuf_16i8_10(<16 x i8 > %T0, <16 x i8 > %T1) nounwind readnone {
-entry:
-	%tmp6 = shufflevector <16 x i8 > %T0, <16 x i8 > %T1, <16 x i32> < i32 10, i32 undef, i32 undef, i32 10, i32 undef, i32 10, i32 10 , i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10 >
-	ret <16 x i8 > %tmp6
-}
-
-define <16 x i8 > @shuf_16i8_11(<16 x i8 > %T0, <16 x i8 > %T1) nounwind readnone {
-entry:
-	%tmp6 = shufflevector <16 x i8 > %T0, <16 x i8 > %T1, <16 x i32> < i32 11, i32 undef, i32 undef, i32 11, i32 undef, i32 11, i32 11 , i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11 >
-	ret <16 x i8 > %tmp6
-}
-
-define <16 x i8 > @shuf_16i8_12(<16 x i8 > %T0, <16 x i8 > %T1) nounwind readnone {
-entry:
-	%tmp6 = shufflevector <16 x i8 > %T0, <16 x i8 > %T1, <16 x i32> < i32 12, i32 undef, i32 undef, i32 12, i32 undef, i32 12, i32 12 , i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12 >
-	ret <16 x i8 > %tmp6
-}
-
-define <16 x i8 > @shuf_16i8_13(<16 x i8 > %T0, <16 x i8 > %T1) nounwind readnone {
-entry:
-	%tmp6 = shufflevector <16 x i8 > %T0, <16 x i8 > %T1, <16 x i32> < i32 13, i32 undef, i32 undef, i32 13, i32 undef, i32 13, i32 13 , i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13 >
-	ret <16 x i8 > %tmp6
-}
-
-define <16 x i8 > @shuf_16i8_14(<16 x i8 > %T0, <16 x i8 > %T1) nounwind readnone {
-entry:
-	%tmp6 = shufflevector <16 x i8 > %T0, <16 x i8 > %T1, <16 x i32> < i32 14, i32 undef, i32 undef, i32 14, i32 undef, i32 14, i32 14 , i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14 >
-	ret <16 x i8 > %tmp6
-}
-
-define <16 x i8 > @shuf_16i8_15(<16 x i8 > %T0, <16 x i8 > %T1) nounwind readnone {
-entry:
-	%tmp6 = shufflevector <16 x i8 > %T0, <16 x i8 > %T1, <16 x i32> < i32 15, i32 undef, i32 undef, i32 15, i32 undef, i32 15, i32 15 , i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15 >
-	ret <16 x i8 > %tmp6
-}
diff --git a/test/CodeGen/X86/vec_splat.ll b/test/CodeGen/X86/vec_splat.ll
index 24d8487f17bd..deedee801967 100644
--- a/test/CodeGen/X86/vec_splat.ll
+++ b/test/CodeGen/X86/vec_splat.ll
@@ -1,5 +1,5 @@
-; RUN: llc < %s -march=x86 -mcpu=penryn -mattr=+sse2 | grep pshufd
-; RUN: llc < %s -march=x86 -mcpu=penryn -mattr=+sse3 | grep movddup
+; RUN: llc < %s -march=x86 -mcpu=pentium4 -mattr=+sse2 | FileCheck %s -check-prefix=SSE2
+; RUN: llc < %s -march=x86 -mcpu=pentium4 -mattr=+sse3 | FileCheck %s -check-prefix=SSE3
 
 define void @test_v4sf(<4 x float>* %P, <4 x float>* %Q, float %X) nounwind {
 	%tmp = insertelement <4 x float> zeroinitializer, float %X, i32 0		; <<4 x float>> [#uses=1]
@@ -10,6 +10,12 @@ define void @test_v4sf(<4 x float>* %P, <4 x float>* %Q, float %X) nounwind {
 	%tmp10 = fmul <4 x float> %tmp8, %tmp6		; <<4 x float>> [#uses=1]
 	store <4 x float> %tmp10, <4 x float>* %P
 	ret void
+
+; SSE2: test_v4sf:
+; SSE2: pshufd $0
+
+; SSE3: test_v4sf:
+; SSE3: pshufd $0
 }
 
 define void @test_v2sd(<2 x double>* %P, <2 x double>* %Q, double %X) nounwind {
@@ -19,4 +25,10 @@ define void @test_v2sd(<2 x double>* %P, <2 x double>* %Q, double %X) nounwind {
 	%tmp6 = fmul <2 x double> %tmp4, %tmp2		; <<2 x double>> [#uses=1]
 	store <2 x double> %tmp6, <2 x double>* %P
 	ret void
+
+; SSE2: test_v2sd:
+; SSE2: shufpd $0
+
+; SSE3: test_v2sd:
+; SSE3: movddup
 }
diff --git a/test/CodeGen/X86/vec_zero.ll b/test/CodeGen/X86/vec_zero.ll
index 682a0dfca806..c3ea0ad2023f 100644
--- a/test/CodeGen/X86/vec_zero.ll
+++ b/test/CodeGen/X86/vec_zero.ll
@@ -13,7 +13,7 @@ define void @foo(<4 x float>* %P) {
 ; CHECK: pxor
 define void @bar(<4 x i32>* %P) {
         %T = load <4 x i32>* %P         ; <<4 x i32>> [#uses=1]
-        %S = add <4 x i32> zeroinitializer, %T          ; <<4 x i32>> [#uses=1]
+        %S = sub <4 x i32> zeroinitializer, %T          ; <<4 x i32>> [#uses=1]
         store <4 x i32> %S, <4 x i32>* %P
         ret void
 }
diff --git a/test/CodeGen/X86/vector-gep.ll b/test/CodeGen/X86/vector-gep.ll
index 3476e36c646f..ec93ce0761cc 100644
--- a/test/CodeGen/X86/vector-gep.ll
+++ b/test/CodeGen/X86/vector-gep.ll
@@ -1,5 +1,5 @@
 ; RUN: llc < %s -march=x86 -mcpu=corei7-avx | FileCheck %s
-; RUN: opt -instsimplify %s -disable-output
+; RUN: opt -instsimplify -disable-output < %s
 
 ;CHECK: AGEP0:
 define <4 x i32*> @AGEP0(i32* %ptr) nounwind {
@@ -8,10 +8,8 @@ entry:
   %vecinit2.i = insertelement <4 x i32*> %vecinit.i, i32* %ptr, i32 1
   %vecinit4.i = insertelement <4 x i32*> %vecinit2.i, i32* %ptr, i32 2
   %vecinit6.i = insertelement <4 x i32*> %vecinit4.i, i32* %ptr, i32 3
-;CHECK: pslld $2
 ;CHECK: padd
   %A2 = getelementptr <4 x i32*> %vecinit6.i, <4 x i32> <i32 1, i32 2, i32 3, i32 4>
-;CHECK: pslld $2
 ;CHECK: padd
   %A3 = getelementptr <4 x i32*> %A2, <4 x i32> <i32 10, i32 14, i32 19, i32 233>
   ret <4 x i32*> %A3
@@ -21,7 +19,6 @@ entry:
 ;CHECK: AGEP1:
 define i32 @AGEP1(<4 x i32*> %param) nounwind {
 entry:
-;CHECK: pslld $2
 ;CHECK: padd
   %A2 = getelementptr <4 x i32*> %param, <4 x i32> <i32 1, i32 2, i32 3, i32 4>
   %k = extractelement <4 x i32*> %A2, i32 3
diff --git a/test/CodeGen/X86/vselect-minmax.ll b/test/CodeGen/X86/vselect-minmax.ll
new file mode 100644
index 000000000000..cf654b6f2059
--- /dev/null
+++ b/test/CodeGen/X86/vselect-minmax.ll
@@ -0,0 +1,2788 @@
+; RUN: llc -march=x86-64 -mcpu=core2 < %s | FileCheck %s -check-prefix=SSE2
+; RUN: llc -march=x86-64 -mcpu=corei7 < %s | FileCheck %s -check-prefix=SSE4
+; RUN: llc -march=x86-64 -mcpu=corei7-avx < %s | FileCheck %s -check-prefix=AVX1
+; RUN: llc -march=x86-64 -mcpu=core-avx2 -mattr=+avx2 < %s | FileCheck %s -check-prefix=AVX2
+
+define void @test1(i8* nocapture %a, i8* nocapture %b) nounwind {
+vector.ph:
+  br label %vector.body
+
+vector.body:                                      ; preds = %vector.body, %vector.ph
+  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
+  %gep.a = getelementptr inbounds i8* %a, i64 %index
+  %gep.b = getelementptr inbounds i8* %b, i64 %index
+  %ptr.a = bitcast i8* %gep.a to <16 x i8>*
+  %ptr.b = bitcast i8* %gep.b to <16 x i8>*
+  %load.a = load <16 x i8>* %ptr.a, align 2
+  %load.b = load <16 x i8>* %ptr.b, align 2
+  %cmp = icmp slt <16 x i8> %load.a, %load.b
+  %sel = select <16 x i1> %cmp, <16 x i8> %load.a, <16 x i8> %load.b
+  store <16 x i8> %sel, <16 x i8>* %ptr.a, align 2
+  %index.next = add i64 %index, 16
+  %loop = icmp eq i64 %index.next, 16384
+  br i1 %loop, label %for.end, label %vector.body
+
+for.end:                                          ; preds = %vector.body
+  ret void
+
+; SSE4: test1:
+; SSE4: pminsb
+
+; AVX1: test1:
+; AVX1: vpminsb
+
+; AVX2: test1:
+; AVX2: vpminsb
+}
+
+define void @test2(i8* nocapture %a, i8* nocapture %b) nounwind {
+vector.ph:
+  br label %vector.body
+
+vector.body:                                      ; preds = %vector.body, %vector.ph
+  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
+  %gep.a = getelementptr inbounds i8* %a, i64 %index
+  %gep.b = getelementptr inbounds i8* %b, i64 %index
+  %ptr.a = bitcast i8* %gep.a to <16 x i8>*
+  %ptr.b = bitcast i8* %gep.b to <16 x i8>*
+  %load.a = load <16 x i8>* %ptr.a, align 2
+  %load.b = load <16 x i8>* %ptr.b, align 2
+  %cmp = icmp sle <16 x i8> %load.a, %load.b
+  %sel = select <16 x i1> %cmp, <16 x i8> %load.a, <16 x i8> %load.b
+  store <16 x i8> %sel, <16 x i8>* %ptr.a, align 2
+  %index.next = add i64 %index, 16
+  %loop = icmp eq i64 %index.next, 16384
+  br i1 %loop, label %for.end, label %vector.body
+
+for.end:                                          ; preds = %vector.body
+  ret void
+
+; SSE4: test2:
+; SSE4: pminsb
+
+; AVX1: test2:
+; AVX1: vpminsb
+
+; AVX2: test2:
+; AVX2: vpminsb
+}
+
+define void @test3(i8* nocapture %a, i8* nocapture %b) nounwind {
+vector.ph:
+  br label %vector.body
+
+vector.body:                                      ; preds = %vector.body, %vector.ph
+  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
+  %gep.a = getelementptr inbounds i8* %a, i64 %index
+  %gep.b = getelementptr inbounds i8* %b, i64 %index
+  %ptr.a = bitcast i8* %gep.a to <16 x i8>*
+  %ptr.b = bitcast i8* %gep.b to <16 x i8>*
+  %load.a = load <16 x i8>* %ptr.a, align 2
+  %load.b = load <16 x i8>* %ptr.b, align 2
+  %cmp = icmp sgt <16 x i8> %load.a, %load.b
+  %sel = select <16 x i1> %cmp, <16 x i8> %load.a, <16 x i8> %load.b
+  store <16 x i8> %sel, <16 x i8>* %ptr.a, align 2
+  %index.next = add i64 %index, 16
+  %loop = icmp eq i64 %index.next, 16384
+  br i1 %loop, label %for.end, label %vector.body
+
+for.end:                                          ; preds = %vector.body
+  ret void
+
+; SSE4: test3:
+; SSE4: pmaxsb
+
+; AVX1: test3:
+; AVX1: vpmaxsb
+
+; AVX2: test3:
+; AVX2: vpmaxsb
+}
+
+define void @test4(i8* nocapture %a, i8* nocapture %b) nounwind {
+vector.ph:
+  br label %vector.body
+
+vector.body:                                      ; preds = %vector.body, %vector.ph
+  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
+  %gep.a = getelementptr inbounds i8* %a, i64 %index
+  %gep.b = getelementptr inbounds i8* %b, i64 %index
+  %ptr.a = bitcast i8* %gep.a to <16 x i8>*
+  %ptr.b = bitcast i8* %gep.b to <16 x i8>*
+  %load.a = load <16 x i8>* %ptr.a, align 2
+  %load.b = load <16 x i8>* %ptr.b, align 2
+  %cmp = icmp sge <16 x i8> %load.a, %load.b
+  %sel = select <16 x i1> %cmp, <16 x i8> %load.a, <16 x i8> %load.b
+  store <16 x i8> %sel, <16 x i8>* %ptr.a, align 2
+  %index.next = add i64 %index, 16
+  %loop = icmp eq i64 %index.next, 16384
+  br i1 %loop, label %for.end, label %vector.body
+
+for.end:                                          ; preds = %vector.body
+  ret void
+
+; SSE4: test4:
+; SSE4: pmaxsb
+
+; AVX1: test4:
+; AVX1: vpmaxsb
+
+; AVX2: test4:
+; AVX2: vpmaxsb
+}
+
+define void @test5(i8* nocapture %a, i8* nocapture %b) nounwind {
+vector.ph:
+  br label %vector.body
+
+vector.body:                                      ; preds = %vector.body, %vector.ph
+  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
+  %gep.a = getelementptr inbounds i8* %a, i64 %index
+  %gep.b = getelementptr inbounds i8* %b, i64 %index
+  %ptr.a = bitcast i8* %gep.a to <16 x i8>*
+  %ptr.b = bitcast i8* %gep.b to <16 x i8>*
+  %load.a = load <16 x i8>* %ptr.a, align 2
+  %load.b = load <16 x i8>* %ptr.b, align 2
+  %cmp = icmp ult <16 x i8> %load.a, %load.b
+  %sel = select <16 x i1> %cmp, <16 x i8> %load.a, <16 x i8> %load.b
+  store <16 x i8> %sel, <16 x i8>* %ptr.a, align 2
+  %index.next = add i64 %index, 16
+  %loop = icmp eq i64 %index.next, 16384
+  br i1 %loop, label %for.end, label %vector.body
+
+for.end:                                          ; preds = %vector.body
+  ret void
+
+; SSE2: test5:
+; SSE2: pminub
+
+; AVX1: test5:
+; AVX1: vpminub
+
+; AVX2: test5:
+; AVX2: vpminub
+}
+
+define void @test6(i8* nocapture %a, i8* nocapture %b) nounwind {
+vector.ph:
+  br label %vector.body
+
+vector.body:                                      ; preds = %vector.body, %vector.ph
+  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
+  %gep.a = getelementptr inbounds i8* %a, i64 %index
+  %gep.b = getelementptr inbounds i8* %b, i64 %index
+  %ptr.a = bitcast i8* %gep.a to <16 x i8>*
+  %ptr.b = bitcast i8* %gep.b to <16 x i8>*
+  %load.a = load <16 x i8>* %ptr.a, align 2
+  %load.b = load <16 x i8>* %ptr.b, align 2
+  %cmp = icmp ule <16 x i8> %load.a, %load.b
+  %sel = select <16 x i1> %cmp, <16 x i8> %load.a, <16 x i8> %load.b
+  store <16 x i8> %sel, <16 x i8>* %ptr.a, align 2
+  %index.next = add i64 %index, 16
+  %loop = icmp eq i64 %index.next, 16384
+  br i1 %loop, label %for.end, label %vector.body
+
+for.end:                                          ; preds = %vector.body
+  ret void
+
+; SSE2: test6:
+; SSE2: pminub
+
+; AVX1: test6:
+; AVX1: vpminub
+
+; AVX2: test6:
+; AVX2: vpminub
+}
+
+define void @test7(i8* nocapture %a, i8* nocapture %b) nounwind {
+vector.ph:
+  br label %vector.body
+
+vector.body:                                      ; preds = %vector.body, %vector.ph
+  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
+  %gep.a = getelementptr inbounds i8* %a, i64 %index
+  %gep.b = getelementptr inbounds i8* %b, i64 %index
+  %ptr.a = bitcast i8* %gep.a to <16 x i8>*
+  %ptr.b = bitcast i8* %gep.b to <16 x i8>*
+  %load.a = load <16 x i8>* %ptr.a, align 2
+  %load.b = load <16 x i8>* %ptr.b, align 2
+  %cmp = icmp ugt <16 x i8> %load.a, %load.b
+  %sel = select <16 x i1> %cmp, <16 x i8> %load.a, <16 x i8> %load.b
+  store <16 x i8> %sel, <16 x i8>* %ptr.a, align 2
+  %index.next = add i64 %index, 16
+  %loop = icmp eq i64 %index.next, 16384
+  br i1 %loop, label %for.end, label %vector.body
+
+for.end:                                          ; preds = %vector.body
+  ret void
+
+; SSE2: test7:
+; SSE2: pmaxub
+
+; AVX1: test7:
+; AVX1: vpmaxub
+
+; AVX2: test7:
+; AVX2: vpmaxub
+}
+
+define void @test8(i8* nocapture %a, i8* nocapture %b) nounwind {
+vector.ph:
+  br label %vector.body
+
+vector.body:                                      ; preds = %vector.body, %vector.ph
+  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
+  %gep.a = getelementptr inbounds i8* %a, i64 %index
+  %gep.b = getelementptr inbounds i8* %b, i64 %index
+  %ptr.a = bitcast i8* %gep.a to <16 x i8>*
+  %ptr.b = bitcast i8* %gep.b to <16 x i8>*
+  %load.a = load <16 x i8>* %ptr.a, align 2
+  %load.b = load <16 x i8>* %ptr.b, align 2
+  %cmp = icmp uge <16 x i8> %load.a, %load.b
+  %sel = select <16 x i1> %cmp, <16 x i8> %load.a, <16 x i8> %load.b
+  store <16 x i8> %sel, <16 x i8>* %ptr.a, align 2
+  %index.next = add i64 %index, 16
+  %loop = icmp eq i64 %index.next, 16384
+  br i1 %loop, label %for.end, label %vector.body
+
+for.end:                                          ; preds = %vector.body
+  ret void
+
+; SSE2: test8:
+; SSE2: pmaxub
+
+; AVX1: test8:
+; AVX1: vpmaxub
+
+; AVX2: test8:
+; AVX2: vpmaxub
+}
+
+define void @test9(i16* nocapture %a, i16* nocapture %b) nounwind {
+vector.ph:
+  br label %vector.body
+
+vector.body:                                      ; preds = %vector.body, %vector.ph
+  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
+  %gep.a = getelementptr inbounds i16* %a, i64 %index
+  %gep.b = getelementptr inbounds i16* %b, i64 %index
+  %ptr.a = bitcast i16* %gep.a to <8 x i16>*
+  %ptr.b = bitcast i16* %gep.b to <8 x i16>*
+  %load.a = load <8 x i16>* %ptr.a, align 2
+  %load.b = load <8 x i16>* %ptr.b, align 2
+  %cmp = icmp slt <8 x i16> %load.a, %load.b
+  %sel = select <8 x i1> %cmp, <8 x i16> %load.a, <8 x i16> %load.b
+  store <8 x i16> %sel, <8 x i16>* %ptr.a, align 2
+  %index.next = add i64 %index, 8
+  %loop = icmp eq i64 %index.next, 16384
+  br i1 %loop, label %for.end, label %vector.body
+
+for.end:                                          ; preds = %vector.body
+  ret void
+
+; SSE2: test9:
+; SSE2: pminsw
+
+; AVX1: test9:
+; AVX1: vpminsw
+
+; AVX2: test9:
+; AVX2: vpminsw
+}
+
+define void @test10(i16* nocapture %a, i16* nocapture %b) nounwind {
+vector.ph:
+  br label %vector.body
+
+vector.body:                                      ; preds = %vector.body, %vector.ph
+  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
+  %gep.a = getelementptr inbounds i16* %a, i64 %index
+  %gep.b = getelementptr inbounds i16* %b, i64 %index
+  %ptr.a = bitcast i16* %gep.a to <8 x i16>*
+  %ptr.b = bitcast i16* %gep.b to <8 x i16>*
+  %load.a = load <8 x i16>* %ptr.a, align 2
+  %load.b = load <8 x i16>* %ptr.b, align 2
+  %cmp = icmp sle <8 x i16> %load.a, %load.b
+  %sel = select <8 x i1> %cmp, <8 x i16> %load.a, <8 x i16> %load.b
+  store <8 x i16> %sel, <8 x i16>* %ptr.a, align 2
+  %index.next = add i64 %index, 8
+  %loop = icmp eq i64 %index.next, 16384
+  br i1 %loop, label %for.end, label %vector.body
+
+for.end:                                          ; preds = %vector.body
+  ret void
+
+; SSE2: test10:
+; SSE2: pminsw
+
+; AVX1: test10:
+; AVX1: vpminsw
+
+; AVX2: test10:
+; AVX2: vpminsw
+}
+
+define void @test11(i16* nocapture %a, i16* nocapture %b) nounwind {
+vector.ph:
+  br label %vector.body
+
+vector.body:                                      ; preds = %vector.body, %vector.ph
+  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
+  %gep.a = getelementptr inbounds i16* %a, i64 %index
+  %gep.b = getelementptr inbounds i16* %b, i64 %index
+  %ptr.a = bitcast i16* %gep.a to <8 x i16>*
+  %ptr.b = bitcast i16* %gep.b to <8 x i16>*
+  %load.a = load <8 x i16>* %ptr.a, align 2
+  %load.b = load <8 x i16>* %ptr.b, align 2
+  %cmp = icmp sgt <8 x i16> %load.a, %load.b
+  %sel = select <8 x i1> %cmp, <8 x i16> %load.a, <8 x i16> %load.b
+  store <8 x i16> %sel, <8 x i16>* %ptr.a, align 2
+  %index.next = add i64 %index, 8
+  %loop = icmp eq i64 %index.next, 16384
+  br i1 %loop, label %for.end, label %vector.body
+
+for.end:                                          ; preds = %vector.body
+  ret void
+
+; SSE2: test11:
+; SSE2: pmaxsw
+
+; AVX1: test11:
+; AVX1: vpmaxsw
+
+; AVX2: test11:
+; AVX2: vpmaxsw
+}
+
+define void @test12(i16* nocapture %a, i16* nocapture %b) nounwind {
+vector.ph:
+  br label %vector.body
+
+vector.body:                                      ; preds = %vector.body, %vector.ph
+  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
+  %gep.a = getelementptr inbounds i16* %a, i64 %index
+  %gep.b = getelementptr inbounds i16* %b, i64 %index
+  %ptr.a = bitcast i16* %gep.a to <8 x i16>*
+  %ptr.b = bitcast i16* %gep.b to <8 x i16>*
+  %load.a = load <8 x i16>* %ptr.a, align 2
+  %load.b = load <8 x i16>* %ptr.b, align 2
+  %cmp = icmp sge <8 x i16> %load.a, %load.b
+  %sel = select <8 x i1> %cmp, <8 x i16> %load.a, <8 x i16> %load.b
+  store <8 x i16> %sel, <8 x i16>* %ptr.a, align 2
+  %index.next = add i64 %index, 8
+  %loop = icmp eq i64 %index.next, 16384
+  br i1 %loop, label %for.end, label %vector.body
+
+for.end:                                          ; preds = %vector.body
+  ret void
+
+; SSE2: test12:
+; SSE2: pmaxsw
+
+; AVX1: test12:
+; AVX1: vpmaxsw
+
+; AVX2: test12:
+; AVX2: vpmaxsw
+}
+
+define void @test13(i16* nocapture %a, i16* nocapture %b) nounwind {
+vector.ph:
+  br label %vector.body
+
+vector.body:                                      ; preds = %vector.body, %vector.ph
+  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
+  %gep.a = getelementptr inbounds i16* %a, i64 %index
+  %gep.b = getelementptr inbounds i16* %b, i64 %index
+  %ptr.a = bitcast i16* %gep.a to <8 x i16>*
+  %ptr.b = bitcast i16* %gep.b to <8 x i16>*
+  %load.a = load <8 x i16>* %ptr.a, align 2
+  %load.b = load <8 x i16>* %ptr.b, align 2
+  %cmp = icmp ult <8 x i16> %load.a, %load.b
+  %sel = select <8 x i1> %cmp, <8 x i16> %load.a, <8 x i16> %load.b
+  store <8 x i16> %sel, <8 x i16>* %ptr.a, align 2
+  %index.next = add i64 %index, 8
+  %loop = icmp eq i64 %index.next, 16384
+  br i1 %loop, label %for.end, label %vector.body
+
+for.end:                                          ; preds = %vector.body
+  ret void
+
+; SSE4: test13:
+; SSE4: pminuw
+
+; AVX1: test13:
+; AVX1: vpminuw
+
+; AVX2: test13:
+; AVX2: vpminuw
+}
+
+define void @test14(i16* nocapture %a, i16* nocapture %b) nounwind {
+vector.ph:
+  br label %vector.body
+
+vector.body:                                      ; preds = %vector.body, %vector.ph
+  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
+  %gep.a = getelementptr inbounds i16* %a, i64 %index
+  %gep.b = getelementptr inbounds i16* %b, i64 %index
+  %ptr.a = bitcast i16* %gep.a to <8 x i16>*
+  %ptr.b = bitcast i16* %gep.b to <8 x i16>*
+  %load.a = load <8 x i16>* %ptr.a, align 2
+  %load.b = load <8 x i16>* %ptr.b, align 2
+  %cmp = icmp ule <8 x i16> %load.a, %load.b
+  %sel = select <8 x i1> %cmp, <8 x i16> %load.a, <8 x i16> %load.b
+  store <8 x i16> %sel, <8 x i16>* %ptr.a, align 2
+  %index.next = add i64 %index, 8
+  %loop = icmp eq i64 %index.next, 16384
+  br i1 %loop, label %for.end, label %vector.body
+
+for.end:                                          ; preds = %vector.body
+  ret void
+
+; SSE4: test14:
+; SSE4: pminuw
+
+; AVX1: test14:
+; AVX1: vpminuw
+
+; AVX2: test14:
+; AVX2: vpminuw
+}
+
+define void @test15(i16* nocapture %a, i16* nocapture %b) nounwind {
+vector.ph:
+  br label %vector.body
+
+vector.body:                                      ; preds = %vector.body, %vector.ph
+  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
+  %gep.a = getelementptr inbounds i16* %a, i64 %index
+  %gep.b = getelementptr inbounds i16* %b, i64 %index
+  %ptr.a = bitcast i16* %gep.a to <8 x i16>*
+  %ptr.b = bitcast i16* %gep.b to <8 x i16>*
+  %load.a = load <8 x i16>* %ptr.a, align 2
+  %load.b = load <8 x i16>* %ptr.b, align 2
+  %cmp = icmp ugt <8 x i16> %load.a, %load.b
+  %sel = select <8 x i1> %cmp, <8 x i16> %load.a, <8 x i16> %load.b
+  store <8 x i16> %sel, <8 x i16>* %ptr.a, align 2
+  %index.next = add i64 %index, 8
+  %loop = icmp eq i64 %index.next, 16384
+  br i1 %loop, label %for.end, label %vector.body
+
+for.end:                                          ; preds = %vector.body
+  ret void
+
+; SSE4: test15:
+; SSE4: pmaxuw
+
+; AVX1: test15:
+; AVX1: vpmaxuw
+
+; AVX2: test15:
+; AVX2: vpmaxuw
+}
+
+define void @test16(i16* nocapture %a, i16* nocapture %b) nounwind {
+vector.ph:
+  br label %vector.body
+
+vector.body:                                      ; preds = %vector.body, %vector.ph
+  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
+  %gep.a = getelementptr inbounds i16* %a, i64 %index
+  %gep.b = getelementptr inbounds i16* %b, i64 %index
+  %ptr.a = bitcast i16* %gep.a to <8 x i16>*
+  %ptr.b = bitcast i16* %gep.b to <8 x i16>*
+  %load.a = load <8 x i16>* %ptr.a, align 2
+  %load.b = load <8 x i16>* %ptr.b, align 2
+  %cmp = icmp uge <8 x i16> %load.a, %load.b
+  %sel = select <8 x i1> %cmp, <8 x i16> %load.a, <8 x i16> %load.b
+  store <8 x i16> %sel, <8 x i16>* %ptr.a, align 2
+  %index.next = add i64 %index, 8
+  %loop = icmp eq i64 %index.next, 16384
+  br i1 %loop, label %for.end, label %vector.body
+
+for.end:                                          ; preds = %vector.body
+  ret void
+
+; SSE4: test16:
+; SSE4: pmaxuw
+
+; AVX1: test16:
+; AVX1: vpmaxuw
+
+; AVX2: test16:
+; AVX2: vpmaxuw
+}
+
+define void @test17(i32* nocapture %a, i32* nocapture %b) nounwind {
+vector.ph:
+  br label %vector.body
+
+vector.body:                                      ; preds = %vector.body, %vector.ph
+  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
+  %gep.a = getelementptr inbounds i32* %a, i64 %index
+  %gep.b = getelementptr inbounds i32* %b, i64 %index
+  %ptr.a = bitcast i32* %gep.a to <4 x i32>*
+  %ptr.b = bitcast i32* %gep.b to <4 x i32>*
+  %load.a = load <4 x i32>* %ptr.a, align 2
+  %load.b = load <4 x i32>* %ptr.b, align 2
+  %cmp = icmp slt <4 x i32> %load.a, %load.b
+  %sel = select <4 x i1> %cmp, <4 x i32> %load.a, <4 x i32> %load.b
+  store <4 x i32> %sel, <4 x i32>* %ptr.a, align 2
+  %index.next = add i64 %index, 4
+  %loop = icmp eq i64 %index.next, 16384
+  br i1 %loop, label %for.end, label %vector.body
+
+for.end:                                          ; preds = %vector.body
+  ret void
+
+; SSE4: test17:
+; SSE4: pminsd
+
+; AVX1: test17:
+; AVX1: vpminsd
+
+; AVX2: test17:
+; AVX2: vpminsd
+}
+
+define void @test18(i32* nocapture %a, i32* nocapture %b) nounwind {
+vector.ph:
+  br label %vector.body
+
+vector.body:                                      ; preds = %vector.body, %vector.ph
+  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
+  %gep.a = getelementptr inbounds i32* %a, i64 %index
+  %gep.b = getelementptr inbounds i32* %b, i64 %index
+  %ptr.a = bitcast i32* %gep.a to <4 x i32>*
+  %ptr.b = bitcast i32* %gep.b to <4 x i32>*
+  %load.a = load <4 x i32>* %ptr.a, align 2
+  %load.b = load <4 x i32>* %ptr.b, align 2
+  %cmp = icmp sle <4 x i32> %load.a, %load.b
+  %sel = select <4 x i1> %cmp, <4 x i32> %load.a, <4 x i32> %load.b
+  store <4 x i32> %sel, <4 x i32>* %ptr.a, align 2
+  %index.next = add i64 %index, 4
+  %loop = icmp eq i64 %index.next, 16384
+  br i1 %loop, label %for.end, label %vector.body
+
+for.end:                                          ; preds = %vector.body
+  ret void
+
+; SSE4: test18:
+; SSE4: pminsd
+
+; AVX1: test18:
+; AVX1: vpminsd
+
+; AVX2: test18:
+; AVX2: vpminsd
+}
+
+define void @test19(i32* nocapture %a, i32* nocapture %b) nounwind {
+vector.ph:
+  br label %vector.body
+
+vector.body:                                      ; preds = %vector.body, %vector.ph
+  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
+  %gep.a = getelementptr inbounds i32* %a, i64 %index
+  %gep.b = getelementptr inbounds i32* %b, i64 %index
+  %ptr.a = bitcast i32* %gep.a to <4 x i32>*
+  %ptr.b = bitcast i32* %gep.b to <4 x i32>*
+  %load.a = load <4 x i32>* %ptr.a, align 2
+  %load.b = load <4 x i32>* %ptr.b, align 2
+  %cmp = icmp sgt <4 x i32> %load.a, %load.b
+  %sel = select <4 x i1> %cmp, <4 x i32> %load.a, <4 x i32> %load.b
+  store <4 x i32> %sel, <4 x i32>* %ptr.a, align 2
+  %index.next = add i64 %index, 4
+  %loop = icmp eq i64 %index.next, 16384
+  br i1 %loop, label %for.end, label %vector.body
+
+for.end:                                          ; preds = %vector.body
+  ret void
+
+; SSE4: test19:
+; SSE4: pmaxsd
+
+; AVX1: test19:
+; AVX1: vpmaxsd
+
+; AVX2: test19:
+; AVX2: vpmaxsd
+}
+
+define void @test20(i32* nocapture %a, i32* nocapture %b) nounwind {
+vector.ph:
+  br label %vector.body
+
+vector.body:                                      ; preds = %vector.body, %vector.ph
+  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
+  %gep.a = getelementptr inbounds i32* %a, i64 %index
+  %gep.b = getelementptr inbounds i32* %b, i64 %index
+  %ptr.a = bitcast i32* %gep.a to <4 x i32>*
+  %ptr.b = bitcast i32* %gep.b to <4 x i32>*
+  %load.a = load <4 x i32>* %ptr.a, align 2
+  %load.b = load <4 x i32>* %ptr.b, align 2
+  %cmp = icmp sge <4 x i32> %load.a, %load.b
+  %sel = select <4 x i1> %cmp, <4 x i32> %load.a, <4 x i32> %load.b
+  store <4 x i32> %sel, <4 x i32>* %ptr.a, align 2
+  %index.next = add i64 %index, 4
+  %loop = icmp eq i64 %index.next, 16384
+  br i1 %loop, label %for.end, label %vector.body
+
+for.end:                                          ; preds = %vector.body
+  ret void
+
+; SSE4: test20:
+; SSE4: pmaxsd
+
+; AVX1: test20:
+; AVX1: vpmaxsd
+
+; AVX2: test20:
+; AVX2: vpmaxsd
+}
+
+define void @test21(i32* nocapture %a, i32* nocapture %b) nounwind {
+vector.ph:
+  br label %vector.body
+
+vector.body:                                      ; preds = %vector.body, %vector.ph
+  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
+  %gep.a = getelementptr inbounds i32* %a, i64 %index
+  %gep.b = getelementptr inbounds i32* %b, i64 %index
+  %ptr.a = bitcast i32* %gep.a to <4 x i32>*
+  %ptr.b = bitcast i32* %gep.b to <4 x i32>*
+  %load.a = load <4 x i32>* %ptr.a, align 2
+  %load.b = load <4 x i32>* %ptr.b, align 2
+  %cmp = icmp ult <4 x i32> %load.a, %load.b
+  %sel = select <4 x i1> %cmp, <4 x i32> %load.a, <4 x i32> %load.b
+  store <4 x i32> %sel, <4 x i32>* %ptr.a, align 2
+  %index.next = add i64 %index, 4
+  %loop = icmp eq i64 %index.next, 16384
+  br i1 %loop, label %for.end, label %vector.body
+
+for.end:                                          ; preds = %vector.body
+  ret void
+
+; SSE4: test21:
+; SSE4: pminud
+
+; AVX1: test21:
+; AVX1: vpminud
+
+; AVX2: test21:
+; AVX2: vpminud
+}
+
+define void @test22(i32* nocapture %a, i32* nocapture %b) nounwind {
+vector.ph:
+  br label %vector.body
+
+vector.body:                                      ; preds = %vector.body, %vector.ph
+  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
+  %gep.a = getelementptr inbounds i32* %a, i64 %index
+  %gep.b = getelementptr inbounds i32* %b, i64 %index
+  %ptr.a = bitcast i32* %gep.a to <4 x i32>*
+  %ptr.b = bitcast i32* %gep.b to <4 x i32>*
+  %load.a = load <4 x i32>* %ptr.a, align 2
+  %load.b = load <4 x i32>* %ptr.b, align 2
+  %cmp = icmp ule <4 x i32> %load.a, %load.b
+  %sel = select <4 x i1> %cmp, <4 x i32> %load.a, <4 x i32> %load.b
+  store <4 x i32> %sel, <4 x i32>* %ptr.a, align 2
+  %index.next = add i64 %index, 4
+  %loop = icmp eq i64 %index.next, 16384
+  br i1 %loop, label %for.end, label %vector.body
+
+for.end:                                          ; preds = %vector.body
+  ret void
+
+; SSE4: test22:
+; SSE4: pminud
+
+; AVX1: test22:
+; AVX1: vpminud
+
+; AVX2: test22:
+; AVX2: vpminud
+}
+
+define void @test23(i32* nocapture %a, i32* nocapture %b) nounwind {
+vector.ph:
+  br label %vector.body
+
+vector.body:                                      ; preds = %vector.body, %vector.ph
+  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
+  %gep.a = getelementptr inbounds i32* %a, i64 %index
+  %gep.b = getelementptr inbounds i32* %b, i64 %index
+  %ptr.a = bitcast i32* %gep.a to <4 x i32>*
+  %ptr.b = bitcast i32* %gep.b to <4 x i32>*
+  %load.a = load <4 x i32>* %ptr.a, align 2
+  %load.b = load <4 x i32>* %ptr.b, align 2
+  %cmp = icmp ugt <4 x i32> %load.a, %load.b
+  %sel = select <4 x i1> %cmp, <4 x i32> %load.a, <4 x i32> %load.b
+  store <4 x i32> %sel, <4 x i32>* %ptr.a, align 2
+  %index.next = add i64 %index, 4
+  %loop = icmp eq i64 %index.next, 16384
+  br i1 %loop, label %for.end, label %vector.body
+
+for.end:                                          ; preds = %vector.body
+  ret void
+
+; SSE4: test23:
+; SSE4: pmaxud
+
+; AVX1: test23:
+; AVX1: vpmaxud
+
+; AVX2: test23:
+; AVX2: vpmaxud
+}
+
+define void @test24(i32* nocapture %a, i32* nocapture %b) nounwind {
+vector.ph:
+  br label %vector.body
+
+vector.body:                                      ; preds = %vector.body, %vector.ph
+  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
+  %gep.a = getelementptr inbounds i32* %a, i64 %index
+  %gep.b = getelementptr inbounds i32* %b, i64 %index
+  %ptr.a = bitcast i32* %gep.a to <4 x i32>*
+  %ptr.b = bitcast i32* %gep.b to <4 x i32>*
+  %load.a = load <4 x i32>* %ptr.a, align 2
+  %load.b = load <4 x i32>* %ptr.b, align 2
+  %cmp = icmp uge <4 x i32> %load.a, %load.b
+  %sel = select <4 x i1> %cmp, <4 x i32> %load.a, <4 x i32> %load.b
+  store <4 x i32> %sel, <4 x i32>* %ptr.a, align 2
+  %index.next = add i64 %index, 4
+  %loop = icmp eq i64 %index.next, 16384
+  br i1 %loop, label %for.end, label %vector.body
+
+for.end:                                          ; preds = %vector.body
+  ret void
+
+; SSE4: test24:
+; SSE4: pmaxud
+
+; AVX1: test24:
+; AVX1: vpmaxud
+
+; AVX2: test24:
+; AVX2: vpmaxud
+}
+
+define void @test25(i8* nocapture %a, i8* nocapture %b) nounwind {
+vector.ph:
+  br label %vector.body
+
+vector.body:                                      ; preds = %vector.body, %vector.ph
+  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
+  %gep.a = getelementptr inbounds i8* %a, i64 %index
+  %gep.b = getelementptr inbounds i8* %b, i64 %index
+  %ptr.a = bitcast i8* %gep.a to <32 x i8>*
+  %ptr.b = bitcast i8* %gep.b to <32 x i8>*
+  %load.a = load <32 x i8>* %ptr.a, align 2
+  %load.b = load <32 x i8>* %ptr.b, align 2
+  %cmp = icmp slt <32 x i8> %load.a, %load.b
+  %sel = select <32 x i1> %cmp, <32 x i8> %load.a, <32 x i8> %load.b
+  store <32 x i8> %sel, <32 x i8>* %ptr.a, align 2
+  %index.next = add i64 %index, 32
+  %loop = icmp eq i64 %index.next, 16384
+  br i1 %loop, label %for.end, label %vector.body
+
+for.end:                                          ; preds = %vector.body
+  ret void
+
+; AVX2: test25:
+; AVX2: vpminsb
+}
+
+define void @test26(i8* nocapture %a, i8* nocapture %b) nounwind {
+vector.ph:
+  br label %vector.body
+
+vector.body:                                      ; preds = %vector.body, %vector.ph
+  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
+  %gep.a = getelementptr inbounds i8* %a, i64 %index
+  %gep.b = getelementptr inbounds i8* %b, i64 %index
+  %ptr.a = bitcast i8* %gep.a to <32 x i8>*
+  %ptr.b = bitcast i8* %gep.b to <32 x i8>*
+  %load.a = load <32 x i8>* %ptr.a, align 2
+  %load.b = load <32 x i8>* %ptr.b, align 2
+  %cmp = icmp sle <32 x i8> %load.a, %load.b
+  %sel = select <32 x i1> %cmp, <32 x i8> %load.a, <32 x i8> %load.b
+  store <32 x i8> %sel, <32 x i8>* %ptr.a, align 2
+  %index.next = add i64 %index, 32
+  %loop = icmp eq i64 %index.next, 16384
+  br i1 %loop, label %for.end, label %vector.body
+
+for.end:                                          ; preds = %vector.body
+  ret void
+
+; AVX2: test26:
+; AVX2: vpminsb
+}
+
+define void @test27(i8* nocapture %a, i8* nocapture %b) nounwind {
+vector.ph:
+  br label %vector.body
+
+vector.body:                                      ; preds = %vector.body, %vector.ph
+  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
+  %gep.a = getelementptr inbounds i8* %a, i64 %index
+  %gep.b = getelementptr inbounds i8* %b, i64 %index
+  %ptr.a = bitcast i8* %gep.a to <32 x i8>*
+  %ptr.b = bitcast i8* %gep.b to <32 x i8>*
+  %load.a = load <32 x i8>* %ptr.a, align 2
+  %load.b = load <32 x i8>* %ptr.b, align 2
+  %cmp = icmp sgt <32 x i8> %load.a, %load.b
+  %sel = select <32 x i1> %cmp, <32 x i8> %load.a, <32 x i8> %load.b
+  store <32 x i8> %sel, <32 x i8>* %ptr.a, align 2
+  %index.next = add i64 %index, 32
+  %loop = icmp eq i64 %index.next, 16384
+  br i1 %loop, label %for.end, label %vector.body
+
+for.end:                                          ; preds = %vector.body
+  ret void
+
+; AVX2: test27:
+; AVX2: vpmaxsb
+}
+
+define void @test28(i8* nocapture %a, i8* nocapture %b) nounwind {
+vector.ph:
+  br label %vector.body
+
+vector.body:                                      ; preds = %vector.body, %vector.ph
+  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
+  %gep.a = getelementptr inbounds i8* %a, i64 %index
+  %gep.b = getelementptr inbounds i8* %b, i64 %index
+  %ptr.a = bitcast i8* %gep.a to <32 x i8>*
+  %ptr.b = bitcast i8* %gep.b to <32 x i8>*
+  %load.a = load <32 x i8>* %ptr.a, align 2
+  %load.b = load <32 x i8>* %ptr.b, align 2
+  %cmp = icmp sge <32 x i8> %load.a, %load.b
+  %sel = select <32 x i1> %cmp, <32 x i8> %load.a, <32 x i8> %load.b
+  store <32 x i8> %sel, <32 x i8>* %ptr.a, align 2
+  %index.next = add i64 %index, 32
+  %loop = icmp eq i64 %index.next, 16384
+  br i1 %loop, label %for.end, label %vector.body
+
+for.end:                                          ; preds = %vector.body
+  ret void
+
+; AVX2: test28:
+; AVX2: vpmaxsb
+}
+
+define void @test29(i8* nocapture %a, i8* nocapture %b) nounwind {
+vector.ph:
+  br label %vector.body
+
+vector.body:                                      ; preds = %vector.body, %vector.ph
+  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
+  %gep.a = getelementptr inbounds i8* %a, i64 %index
+  %gep.b = getelementptr inbounds i8* %b, i64 %index
+  %ptr.a = bitcast i8* %gep.a to <32 x i8>*
+  %ptr.b = bitcast i8* %gep.b to <32 x i8>*
+  %load.a = load <32 x i8>* %ptr.a, align 2
+  %load.b = load <32 x i8>* %ptr.b, align 2
+  %cmp = icmp ult <32 x i8> %load.a, %load.b
+  %sel = select <32 x i1> %cmp, <32 x i8> %load.a, <32 x i8> %load.b
+  store <32 x i8> %sel, <32 x i8>* %ptr.a, align 2
+  %index.next = add i64 %index, 32
+  %loop = icmp eq i64 %index.next, 16384
+  br i1 %loop, label %for.end, label %vector.body
+
+for.end:                                          ; preds = %vector.body
+  ret void
+
+; AVX2: test29:
+; AVX2: vpminub
+}
+
+define void @test30(i8* nocapture %a, i8* nocapture %b) nounwind {
+vector.ph:
+  br label %vector.body
+
+vector.body:                                      ; preds = %vector.body, %vector.ph
+  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
+  %gep.a = getelementptr inbounds i8* %a, i64 %index
+  %gep.b = getelementptr inbounds i8* %b, i64 %index
+  %ptr.a = bitcast i8* %gep.a to <32 x i8>*
+  %ptr.b = bitcast i8* %gep.b to <32 x i8>*
+  %load.a = load <32 x i8>* %ptr.a, align 2
+  %load.b = load <32 x i8>* %ptr.b, align 2
+  %cmp = icmp ule <32 x i8> %load.a, %load.b
+  %sel = select <32 x i1> %cmp, <32 x i8> %load.a, <32 x i8> %load.b
+  store <32 x i8> %sel, <32 x i8>* %ptr.a, align 2
+  %index.next = add i64 %index, 32
+  %loop = icmp eq i64 %index.next, 16384
+  br i1 %loop, label %for.end, label %vector.body
+
+for.end:                                          ; preds = %vector.body
+  ret void
+
+; AVX2: test30:
+; AVX2: vpminub
+}
+
+define void @test31(i8* nocapture %a, i8* nocapture %b) nounwind {
+vector.ph:
+  br label %vector.body
+
+vector.body:                                      ; preds = %vector.body, %vector.ph
+  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
+  %gep.a = getelementptr inbounds i8* %a, i64 %index
+  %gep.b = getelementptr inbounds i8* %b, i64 %index
+  %ptr.a = bitcast i8* %gep.a to <32 x i8>*
+  %ptr.b = bitcast i8* %gep.b to <32 x i8>*
+  %load.a = load <32 x i8>* %ptr.a, align 2
+  %load.b = load <32 x i8>* %ptr.b, align 2
+  %cmp = icmp ugt <32 x i8> %load.a, %load.b
+  %sel = select <32 x i1> %cmp, <32 x i8> %load.a, <32 x i8> %load.b
+  store <32 x i8> %sel, <32 x i8>* %ptr.a, align 2
+  %index.next = add i64 %index, 32
+  %loop = icmp eq i64 %index.next, 16384
+  br i1 %loop, label %for.end, label %vector.body
+
+for.end:                                          ; preds = %vector.body
+  ret void
+
+; AVX2: test31:
+; AVX2: vpmaxub
+}
+
+define void @test32(i8* nocapture %a, i8* nocapture %b) nounwind {
+vector.ph:
+  br label %vector.body
+
+vector.body:                                      ; preds = %vector.body, %vector.ph
+  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
+  %gep.a = getelementptr inbounds i8* %a, i64 %index
+  %gep.b = getelementptr inbounds i8* %b, i64 %index
+  %ptr.a = bitcast i8* %gep.a to <32 x i8>*
+  %ptr.b = bitcast i8* %gep.b to <32 x i8>*
+  %load.a = load <32 x i8>* %ptr.a, align 2
+  %load.b = load <32 x i8>* %ptr.b, align 2
+  %cmp = icmp uge <32 x i8> %load.a, %load.b
+  %sel = select <32 x i1> %cmp, <32 x i8> %load.a, <32 x i8> %load.b
+  store <32 x i8> %sel, <32 x i8>* %ptr.a, align 2
+  %index.next = add i64 %index, 32
+  %loop = icmp eq i64 %index.next, 16384
+  br i1 %loop, label %for.end, label %vector.body
+
+for.end:                                          ; preds = %vector.body
+  ret void
+
+; AVX2: test32:
+; AVX2: vpmaxub
+}
+
+define void @test33(i16* nocapture %a, i16* nocapture %b) nounwind {
+vector.ph:
+  br label %vector.body
+
+vector.body:                                      ; preds = %vector.body, %vector.ph
+  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
+  %gep.a = getelementptr inbounds i16* %a, i64 %index
+  %gep.b = getelementptr inbounds i16* %b, i64 %index
+  %ptr.a = bitcast i16* %gep.a to <16 x i16>*
+  %ptr.b = bitcast i16* %gep.b to <16 x i16>*
+  %load.a = load <16 x i16>* %ptr.a, align 2
+  %load.b = load <16 x i16>* %ptr.b, align 2
+  %cmp = icmp slt <16 x i16> %load.a, %load.b
+  %sel = select <16 x i1> %cmp, <16 x i16> %load.a, <16 x i16> %load.b
+  store <16 x i16> %sel, <16 x i16>* %ptr.a, align 2
+  %index.next = add i64 %index, 16
+  %loop = icmp eq i64 %index.next, 16384
+  br i1 %loop, label %for.end, label %vector.body
+
+for.end:                                          ; preds = %vector.body
+  ret void
+
+; AVX2: test33:
+; AVX2: vpminsw
+}
+
+define void @test34(i16* nocapture %a, i16* nocapture %b) nounwind {
+vector.ph:
+  br label %vector.body
+
+vector.body:                                      ; preds = %vector.body, %vector.ph
+  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
+  %gep.a = getelementptr inbounds i16* %a, i64 %index
+  %gep.b = getelementptr inbounds i16* %b, i64 %index
+  %ptr.a = bitcast i16* %gep.a to <16 x i16>*
+  %ptr.b = bitcast i16* %gep.b to <16 x i16>*
+  %load.a = load <16 x i16>* %ptr.a, align 2
+  %load.b = load <16 x i16>* %ptr.b, align 2
+  %cmp = icmp sle <16 x i16> %load.a, %load.b
+  %sel = select <16 x i1> %cmp, <16 x i16> %load.a, <16 x i16> %load.b
+  store <16 x i16> %sel, <16 x i16>* %ptr.a, align 2
+  %index.next = add i64 %index, 16
+  %loop = icmp eq i64 %index.next, 16384
+  br i1 %loop, label %for.end, label %vector.body
+
+for.end:                                          ; preds = %vector.body
+  ret void
+
+; AVX2: test34:
+; AVX2: vpminsw
+}
+
+define void @test35(i16* nocapture %a, i16* nocapture %b) nounwind {
+vector.ph:
+  br label %vector.body
+
+vector.body:                                      ; preds = %vector.body, %vector.ph
+  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
+  %gep.a = getelementptr inbounds i16* %a, i64 %index
+  %gep.b = getelementptr inbounds i16* %b, i64 %index
+  %ptr.a = bitcast i16* %gep.a to <16 x i16>*
+  %ptr.b = bitcast i16* %gep.b to <16 x i16>*
+  %load.a = load <16 x i16>* %ptr.a, align 2
+  %load.b = load <16 x i16>* %ptr.b, align 2
+  %cmp = icmp sgt <16 x i16> %load.a, %load.b
+  %sel = select <16 x i1> %cmp, <16 x i16> %load.a, <16 x i16> %load.b
+  store <16 x i16> %sel, <16 x i16>* %ptr.a, align 2
+  %index.next = add i64 %index, 16
+  %loop = icmp eq i64 %index.next, 16384
+  br i1 %loop, label %for.end, label %vector.body
+
+for.end:                                          ; preds = %vector.body
+  ret void
+
+; AVX2: test35:
+; AVX2: vpmaxsw
+}
+
+define void @test36(i16* nocapture %a, i16* nocapture %b) nounwind {
+vector.ph:
+  br label %vector.body
+
+vector.body:                                      ; preds = %vector.body, %vector.ph
+  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
+  %gep.a = getelementptr inbounds i16* %a, i64 %index
+  %gep.b = getelementptr inbounds i16* %b, i64 %index
+  %ptr.a = bitcast i16* %gep.a to <16 x i16>*
+  %ptr.b = bitcast i16* %gep.b to <16 x i16>*
+  %load.a = load <16 x i16>* %ptr.a, align 2
+  %load.b = load <16 x i16>* %ptr.b, align 2
+  %cmp = icmp sge <16 x i16> %load.a, %load.b
+  %sel = select <16 x i1> %cmp, <16 x i16> %load.a, <16 x i16> %load.b
+  store <16 x i16> %sel, <16 x i16>* %ptr.a, align 2
+  %index.next = add i64 %index, 16
+  %loop = icmp eq i64 %index.next, 16384
+  br i1 %loop, label %for.end, label %vector.body
+
+for.end:                                          ; preds = %vector.body
+  ret void
+
+; AVX2: test36:
+; AVX2: vpmaxsw
+}
+
+define void @test37(i16* nocapture %a, i16* nocapture %b) nounwind {
+vector.ph:
+  br label %vector.body
+
+vector.body:                                      ; preds = %vector.body, %vector.ph
+  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
+  %gep.a = getelementptr inbounds i16* %a, i64 %index
+  %gep.b = getelementptr inbounds i16* %b, i64 %index
+  %ptr.a = bitcast i16* %gep.a to <16 x i16>*
+  %ptr.b = bitcast i16* %gep.b to <16 x i16>*
+  %load.a = load <16 x i16>* %ptr.a, align 2
+  %load.b = load <16 x i16>* %ptr.b, align 2
+  %cmp = icmp ult <16 x i16> %load.a, %load.b
+  %sel = select <16 x i1> %cmp, <16 x i16> %load.a, <16 x i16> %load.b
+  store <16 x i16> %sel, <16 x i16>* %ptr.a, align 2
+  %index.next = add i64 %index, 16
+  %loop = icmp eq i64 %index.next, 16384
+  br i1 %loop, label %for.end, label %vector.body
+
+for.end:                                          ; preds = %vector.body
+  ret void
+
+; AVX2: test37:
+; AVX2: vpminuw
+}
+
+define void @test38(i16* nocapture %a, i16* nocapture %b) nounwind {
+vector.ph:
+  br label %vector.body
+
+vector.body:                                      ; preds = %vector.body, %vector.ph
+  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
+  %gep.a = getelementptr inbounds i16* %a, i64 %index
+  %gep.b = getelementptr inbounds i16* %b, i64 %index
+  %ptr.a = bitcast i16* %gep.a to <16 x i16>*
+  %ptr.b = bitcast i16* %gep.b to <16 x i16>*
+  %load.a = load <16 x i16>* %ptr.a, align 2
+  %load.b = load <16 x i16>* %ptr.b, align 2
+  %cmp = icmp ule <16 x i16> %load.a, %load.b
+  %sel = select <16 x i1> %cmp, <16 x i16> %load.a, <16 x i16> %load.b
+  store <16 x i16> %sel, <16 x i16>* %ptr.a, align 2
+  %index.next = add i64 %index, 16
+  %loop = icmp eq i64 %index.next, 16384
+  br i1 %loop, label %for.end, label %vector.body
+
+for.end:                                          ; preds = %vector.body
+  ret void
+
+; AVX2: test38:
+; AVX2: vpminuw
+}
+
+define void @test39(i16* nocapture %a, i16* nocapture %b) nounwind {
+vector.ph:
+  br label %vector.body
+
+vector.body:                                      ; preds = %vector.body, %vector.ph
+  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
+  %gep.a = getelementptr inbounds i16* %a, i64 %index
+  %gep.b = getelementptr inbounds i16* %b, i64 %index
+  %ptr.a = bitcast i16* %gep.a to <16 x i16>*
+  %ptr.b = bitcast i16* %gep.b to <16 x i16>*
+  %load.a = load <16 x i16>* %ptr.a, align 2
+  %load.b = load <16 x i16>* %ptr.b, align 2
+  %cmp = icmp ugt <16 x i16> %load.a, %load.b
+  %sel = select <16 x i1> %cmp, <16 x i16> %load.a, <16 x i16> %load.b
+  store <16 x i16> %sel, <16 x i16>* %ptr.a, align 2
+  %index.next = add i64 %index, 16
+  %loop = icmp eq i64 %index.next, 16384
+  br i1 %loop, label %for.end, label %vector.body
+
+for.end:                                          ; preds = %vector.body
+  ret void
+
+; AVX2: test39:
+; AVX2: vpmaxuw
+}
+
+define void @test40(i16* nocapture %a, i16* nocapture %b) nounwind {
+vector.ph:
+  br label %vector.body
+
+vector.body:                                      ; preds = %vector.body, %vector.ph
+  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
+  %gep.a = getelementptr inbounds i16* %a, i64 %index
+  %gep.b = getelementptr inbounds i16* %b, i64 %index
+  %ptr.a = bitcast i16* %gep.a to <16 x i16>*
+  %ptr.b = bitcast i16* %gep.b to <16 x i16>*
+  %load.a = load <16 x i16>* %ptr.a, align 2
+  %load.b = load <16 x i16>* %ptr.b, align 2
+  %cmp = icmp uge <16 x i16> %load.a, %load.b
+  %sel = select <16 x i1> %cmp, <16 x i16> %load.a, <16 x i16> %load.b
+  store <16 x i16> %sel, <16 x i16>* %ptr.a, align 2
+  %index.next = add i64 %index, 16
+  %loop = icmp eq i64 %index.next, 16384
+  br i1 %loop, label %for.end, label %vector.body
+
+for.end:                                          ; preds = %vector.body
+  ret void
+
+; AVX2: test40:
+; AVX2: vpmaxuw
+}
+
+define void @test41(i32* nocapture %a, i32* nocapture %b) nounwind {
+vector.ph:
+  br label %vector.body
+
+vector.body:                                      ; preds = %vector.body, %vector.ph
+  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
+  %gep.a = getelementptr inbounds i32* %a, i64 %index
+  %gep.b = getelementptr inbounds i32* %b, i64 %index
+  %ptr.a = bitcast i32* %gep.a to <8 x i32>*
+  %ptr.b = bitcast i32* %gep.b to <8 x i32>*
+  %load.a = load <8 x i32>* %ptr.a, align 2
+  %load.b = load <8 x i32>* %ptr.b, align 2
+  %cmp = icmp slt <8 x i32> %load.a, %load.b
+  %sel = select <8 x i1> %cmp, <8 x i32> %load.a, <8 x i32> %load.b
+  store <8 x i32> %sel, <8 x i32>* %ptr.a, align 2
+  %index.next = add i64 %index, 8
+  %loop = icmp eq i64 %index.next, 16384
+  br i1 %loop, label %for.end, label %vector.body
+
+for.end:                                          ; preds = %vector.body
+  ret void
+
+; AVX2: test41:
+; AVX2: vpminsd
+}
+
+define void @test42(i32* nocapture %a, i32* nocapture %b) nounwind {
+vector.ph:
+  br label %vector.body
+
+vector.body:                                      ; preds = %vector.body, %vector.ph
+  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
+  %gep.a = getelementptr inbounds i32* %a, i64 %index
+  %gep.b = getelementptr inbounds i32* %b, i64 %index
+  %ptr.a = bitcast i32* %gep.a to <8 x i32>*
+  %ptr.b = bitcast i32* %gep.b to <8 x i32>*
+  %load.a = load <8 x i32>* %ptr.a, align 2
+  %load.b = load <8 x i32>* %ptr.b, align 2
+  %cmp = icmp sle <8 x i32> %load.a, %load.b
+  %sel = select <8 x i1> %cmp, <8 x i32> %load.a, <8 x i32> %load.b
+  store <8 x i32> %sel, <8 x i32>* %ptr.a, align 2
+  %index.next = add i64 %index, 8
+  %loop = icmp eq i64 %index.next, 16384
+  br i1 %loop, label %for.end, label %vector.body
+
+for.end:                                          ; preds = %vector.body
+  ret void
+
+; AVX2: test42:
+; AVX2: vpminsd
+}
+
+define void @test43(i32* nocapture %a, i32* nocapture %b) nounwind {
+vector.ph:
+  br label %vector.body
+
+vector.body:                                      ; preds = %vector.body, %vector.ph
+  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
+  %gep.a = getelementptr inbounds i32* %a, i64 %index
+  %gep.b = getelementptr inbounds i32* %b, i64 %index
+  %ptr.a = bitcast i32* %gep.a to <8 x i32>*
+  %ptr.b = bitcast i32* %gep.b to <8 x i32>*
+  %load.a = load <8 x i32>* %ptr.a, align 2
+  %load.b = load <8 x i32>* %ptr.b, align 2
+  %cmp = icmp sgt <8 x i32> %load.a, %load.b
+  %sel = select <8 x i1> %cmp, <8 x i32> %load.a, <8 x i32> %load.b
+  store <8 x i32> %sel, <8 x i32>* %ptr.a, align 2
+  %index.next = add i64 %index, 8
+  %loop = icmp eq i64 %index.next, 16384
+  br i1 %loop, label %for.end, label %vector.body
+
+for.end:                                          ; preds = %vector.body
+  ret void
+
+; AVX2: test43:
+; AVX2: vpmaxsd
+}
+
+define void @test44(i32* nocapture %a, i32* nocapture %b) nounwind {
+vector.ph:
+  br label %vector.body
+
+vector.body:                                      ; preds = %vector.body, %vector.ph
+  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
+  %gep.a = getelementptr inbounds i32* %a, i64 %index
+  %gep.b = getelementptr inbounds i32* %b, i64 %index
+  %ptr.a = bitcast i32* %gep.a to <8 x i32>*
+  %ptr.b = bitcast i32* %gep.b to <8 x i32>*
+  %load.a = load <8 x i32>* %ptr.a, align 2
+  %load.b = load <8 x i32>* %ptr.b, align 2
+  %cmp = icmp sge <8 x i32> %load.a, %load.b
+  %sel = select <8 x i1> %cmp, <8 x i32> %load.a, <8 x i32> %load.b
+  store <8 x i32> %sel, <8 x i32>* %ptr.a, align 2
+  %index.next = add i64 %index, 8
+  %loop = icmp eq i64 %index.next, 16384
+  br i1 %loop, label %for.end, label %vector.body
+
+for.end:                                          ; preds = %vector.body
+  ret void
+
+; AVX2: test44:
+; AVX2: vpmaxsd
+}
+
+define void @test45(i32* nocapture %a, i32* nocapture %b) nounwind {
+vector.ph:
+  br label %vector.body
+
+vector.body:                                      ; preds = %vector.body, %vector.ph
+  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
+  %gep.a = getelementptr inbounds i32* %a, i64 %index
+  %gep.b = getelementptr inbounds i32* %b, i64 %index
+  %ptr.a = bitcast i32* %gep.a to <8 x i32>*
+  %ptr.b = bitcast i32* %gep.b to <8 x i32>*
+  %load.a = load <8 x i32>* %ptr.a, align 2
+  %load.b = load <8 x i32>* %ptr.b, align 2
+  %cmp = icmp ult <8 x i32> %load.a, %load.b
+  %sel = select <8 x i1> %cmp, <8 x i32> %load.a, <8 x i32> %load.b
+  store <8 x i32> %sel, <8 x i32>* %ptr.a, align 2
+  %index.next = add i64 %index, 8
+  %loop = icmp eq i64 %index.next, 16384
+  br i1 %loop, label %for.end, label %vector.body
+
+for.end:                                          ; preds = %vector.body
+  ret void
+
+; AVX2: test45:
+; AVX2: vpminud
+}
+
+define void @test46(i32* nocapture %a, i32* nocapture %b) nounwind {
+vector.ph:
+  br label %vector.body
+
+vector.body:                                      ; preds = %vector.body, %vector.ph
+  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
+  %gep.a = getelementptr inbounds i32* %a, i64 %index
+  %gep.b = getelementptr inbounds i32* %b, i64 %index
+  %ptr.a = bitcast i32* %gep.a to <8 x i32>*
+  %ptr.b = bitcast i32* %gep.b to <8 x i32>*
+  %load.a = load <8 x i32>* %ptr.a, align 2
+  %load.b = load <8 x i32>* %ptr.b, align 2
+  %cmp = icmp ule <8 x i32> %load.a, %load.b
+  %sel = select <8 x i1> %cmp, <8 x i32> %load.a, <8 x i32> %load.b
+  store <8 x i32> %sel, <8 x i32>* %ptr.a, align 2
+  %index.next = add i64 %index, 8
+  %loop = icmp eq i64 %index.next, 16384
+  br i1 %loop, label %for.end, label %vector.body
+
+for.end:                                          ; preds = %vector.body
+  ret void
+
+; AVX2: test46:
+; AVX2: vpminud
+}
+
+define void @test47(i32* nocapture %a, i32* nocapture %b) nounwind {
+vector.ph:
+  br label %vector.body
+
+vector.body:                                      ; preds = %vector.body, %vector.ph
+  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
+  %gep.a = getelementptr inbounds i32* %a, i64 %index
+  %gep.b = getelementptr inbounds i32* %b, i64 %index
+  %ptr.a = bitcast i32* %gep.a to <8 x i32>*
+  %ptr.b = bitcast i32* %gep.b to <8 x i32>*
+  %load.a = load <8 x i32>* %ptr.a, align 2
+  %load.b = load <8 x i32>* %ptr.b, align 2
+  %cmp = icmp ugt <8 x i32> %load.a, %load.b
+  %sel = select <8 x i1> %cmp, <8 x i32> %load.a, <8 x i32> %load.b
+  store <8 x i32> %sel, <8 x i32>* %ptr.a, align 2
+  %index.next = add i64 %index, 8
+  %loop = icmp eq i64 %index.next, 16384
+  br i1 %loop, label %for.end, label %vector.body
+
+for.end:                                          ; preds = %vector.body
+  ret void
+
+; AVX2: test47:
+; AVX2: vpmaxud
+}
+
+define void @test48(i32* nocapture %a, i32* nocapture %b) nounwind {
+vector.ph:
+  br label %vector.body
+
+vector.body:                                      ; preds = %vector.body, %vector.ph
+  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
+  %gep.a = getelementptr inbounds i32* %a, i64 %index
+  %gep.b = getelementptr inbounds i32* %b, i64 %index
+  %ptr.a = bitcast i32* %gep.a to <8 x i32>*
+  %ptr.b = bitcast i32* %gep.b to <8 x i32>*
+  %load.a = load <8 x i32>* %ptr.a, align 2
+  %load.b = load <8 x i32>* %ptr.b, align 2
+  %cmp = icmp uge <8 x i32> %load.a, %load.b
+  %sel = select <8 x i1> %cmp, <8 x i32> %load.a, <8 x i32> %load.b
+  store <8 x i32> %sel, <8 x i32>* %ptr.a, align 2
+  %index.next = add i64 %index, 8
+  %loop = icmp eq i64 %index.next, 16384
+  br i1 %loop, label %for.end, label %vector.body
+
+for.end:                                          ; preds = %vector.body
+  ret void
+
+; AVX2: test48:
+; AVX2: vpmaxud
+}
+
+define void @test49(i8* nocapture %a, i8* nocapture %b) nounwind {
+vector.ph:
+  br label %vector.body
+
+vector.body:                                      ; preds = %vector.body, %vector.ph
+  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
+  %gep.a = getelementptr inbounds i8* %a, i64 %index
+  %gep.b = getelementptr inbounds i8* %b, i64 %index
+  %ptr.a = bitcast i8* %gep.a to <16 x i8>*
+  %ptr.b = bitcast i8* %gep.b to <16 x i8>*
+  %load.a = load <16 x i8>* %ptr.a, align 2
+  %load.b = load <16 x i8>* %ptr.b, align 2
+  %cmp = icmp slt <16 x i8> %load.a, %load.b
+  %sel = select <16 x i1> %cmp, <16 x i8> %load.b, <16 x i8> %load.a
+  store <16 x i8> %sel, <16 x i8>* %ptr.a, align 2
+  %index.next = add i64 %index, 16
+  %loop = icmp eq i64 %index.next, 16384
+  br i1 %loop, label %for.end, label %vector.body
+
+for.end:                                          ; preds = %vector.body
+  ret void
+
+; SSE4: test49:
+; SSE4: pmaxsb
+
+; AVX1: test49:
+; AVX1: vpmaxsb
+
+; AVX2: test49:
+; AVX2: vpmaxsb
+}
+
+define void @test50(i8* nocapture %a, i8* nocapture %b) nounwind {
+vector.ph:
+  br label %vector.body
+
+vector.body:                                      ; preds = %vector.body, %vector.ph
+  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
+  %gep.a = getelementptr inbounds i8* %a, i64 %index
+  %gep.b = getelementptr inbounds i8* %b, i64 %index
+  %ptr.a = bitcast i8* %gep.a to <16 x i8>*
+  %ptr.b = bitcast i8* %gep.b to <16 x i8>*
+  %load.a = load <16 x i8>* %ptr.a, align 2
+  %load.b = load <16 x i8>* %ptr.b, align 2
+  %cmp = icmp sle <16 x i8> %load.a, %load.b
+  %sel = select <16 x i1> %cmp, <16 x i8> %load.b, <16 x i8> %load.a
+  store <16 x i8> %sel, <16 x i8>* %ptr.a, align 2
+  %index.next = add i64 %index, 16
+  %loop = icmp eq i64 %index.next, 16384
+  br i1 %loop, label %for.end, label %vector.body
+
+for.end:                                          ; preds = %vector.body
+  ret void
+
+; SSE4: test50:
+; SSE4: pmaxsb
+
+; AVX1: test50:
+; AVX1: vpmaxsb
+
+; AVX2: test50:
+; AVX2: vpmaxsb
+}
+
+define void @test51(i8* nocapture %a, i8* nocapture %b) nounwind {
+vector.ph:
+  br label %vector.body
+
+vector.body:                                      ; preds = %vector.body, %vector.ph
+  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
+  %gep.a = getelementptr inbounds i8* %a, i64 %index
+  %gep.b = getelementptr inbounds i8* %b, i64 %index
+  %ptr.a = bitcast i8* %gep.a to <16 x i8>*
+  %ptr.b = bitcast i8* %gep.b to <16 x i8>*
+  %load.a = load <16 x i8>* %ptr.a, align 2
+  %load.b = load <16 x i8>* %ptr.b, align 2
+  %cmp = icmp sgt <16 x i8> %load.a, %load.b
+  %sel = select <16 x i1> %cmp, <16 x i8> %load.b, <16 x i8> %load.a
+  store <16 x i8> %sel, <16 x i8>* %ptr.a, align 2
+  %index.next = add i64 %index, 16
+  %loop = icmp eq i64 %index.next, 16384
+  br i1 %loop, label %for.end, label %vector.body
+
+for.end:                                          ; preds = %vector.body
+  ret void
+
+; SSE4: test51:
+; SSE4: pminsb
+
+; AVX1: test51:
+; AVX1: vpminsb
+
+; AVX2: test51:
+; AVX2: vpminsb
+}
+
+define void @test52(i8* nocapture %a, i8* nocapture %b) nounwind {
+vector.ph:
+  br label %vector.body
+
+vector.body:                                      ; preds = %vector.body, %vector.ph
+  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
+  %gep.a = getelementptr inbounds i8* %a, i64 %index
+  %gep.b = getelementptr inbounds i8* %b, i64 %index
+  %ptr.a = bitcast i8* %gep.a to <16 x i8>*
+  %ptr.b = bitcast i8* %gep.b to <16 x i8>*
+  %load.a = load <16 x i8>* %ptr.a, align 2
+  %load.b = load <16 x i8>* %ptr.b, align 2
+  %cmp = icmp sge <16 x i8> %load.a, %load.b
+  %sel = select <16 x i1> %cmp, <16 x i8> %load.b, <16 x i8> %load.a
+  store <16 x i8> %sel, <16 x i8>* %ptr.a, align 2
+  %index.next = add i64 %index, 16
+  %loop = icmp eq i64 %index.next, 16384
+  br i1 %loop, label %for.end, label %vector.body
+
+for.end:                                          ; preds = %vector.body
+  ret void
+
+; SSE4: test52:
+; SSE4: pminsb
+
+; AVX1: test52:
+; AVX1: vpminsb
+
+; AVX2: test52:
+; AVX2: vpminsb
+}
+
+define void @test53(i8* nocapture %a, i8* nocapture %b) nounwind {
+vector.ph:
+  br label %vector.body
+
+vector.body:                                      ; preds = %vector.body, %vector.ph
+  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
+  %gep.a = getelementptr inbounds i8* %a, i64 %index
+  %gep.b = getelementptr inbounds i8* %b, i64 %index
+  %ptr.a = bitcast i8* %gep.a to <16 x i8>*
+  %ptr.b = bitcast i8* %gep.b to <16 x i8>*
+  %load.a = load <16 x i8>* %ptr.a, align 2
+  %load.b = load <16 x i8>* %ptr.b, align 2
+  %cmp = icmp ult <16 x i8> %load.a, %load.b
+  %sel = select <16 x i1> %cmp, <16 x i8> %load.b, <16 x i8> %load.a
+  store <16 x i8> %sel, <16 x i8>* %ptr.a, align 2
+  %index.next = add i64 %index, 16
+  %loop = icmp eq i64 %index.next, 16384
+  br i1 %loop, label %for.end, label %vector.body
+
+for.end:                                          ; preds = %vector.body
+  ret void
+
+; SSE2: test53:
+; SSE2: pmaxub
+
+; AVX1: test53:
+; AVX1: vpmaxub
+
+; AVX2: test53:
+; AVX2: vpmaxub
+}
+
+define void @test54(i8* nocapture %a, i8* nocapture %b) nounwind {
+vector.ph:
+  br label %vector.body
+
+vector.body:                                      ; preds = %vector.body, %vector.ph
+  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
+  %gep.a = getelementptr inbounds i8* %a, i64 %index
+  %gep.b = getelementptr inbounds i8* %b, i64 %index
+  %ptr.a = bitcast i8* %gep.a to <16 x i8>*
+  %ptr.b = bitcast i8* %gep.b to <16 x i8>*
+  %load.a = load <16 x i8>* %ptr.a, align 2
+  %load.b = load <16 x i8>* %ptr.b, align 2
+  %cmp = icmp ule <16 x i8> %load.a, %load.b
+  %sel = select <16 x i1> %cmp, <16 x i8> %load.b, <16 x i8> %load.a
+  store <16 x i8> %sel, <16 x i8>* %ptr.a, align 2
+  %index.next = add i64 %index, 16
+  %loop = icmp eq i64 %index.next, 16384
+  br i1 %loop, label %for.end, label %vector.body
+
+for.end:                                          ; preds = %vector.body
+  ret void
+
+; SSE2: test54:
+; SSE2: pmaxub
+
+; AVX1: test54:
+; AVX1: vpmaxub
+
+; AVX2: test54:
+; AVX2: vpmaxub
+}
+
+define void @test55(i8* nocapture %a, i8* nocapture %b) nounwind {
+vector.ph:
+  br label %vector.body
+
+vector.body:                                      ; preds = %vector.body, %vector.ph
+  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
+  %gep.a = getelementptr inbounds i8* %a, i64 %index
+  %gep.b = getelementptr inbounds i8* %b, i64 %index
+  %ptr.a = bitcast i8* %gep.a to <16 x i8>*
+  %ptr.b = bitcast i8* %gep.b to <16 x i8>*
+  %load.a = load <16 x i8>* %ptr.a, align 2
+  %load.b = load <16 x i8>* %ptr.b, align 2
+  %cmp = icmp ugt <16 x i8> %load.a, %load.b
+  %sel = select <16 x i1> %cmp, <16 x i8> %load.b, <16 x i8> %load.a
+  store <16 x i8> %sel, <16 x i8>* %ptr.a, align 2
+  %index.next = add i64 %index, 16
+  %loop = icmp eq i64 %index.next, 16384
+  br i1 %loop, label %for.end, label %vector.body
+
+for.end:                                          ; preds = %vector.body
+  ret void
+
+; SSE2: test55:
+; SSE2: pminub
+
+; AVX1: test55:
+; AVX1: vpminub
+
+; AVX2: test55:
+; AVX2: vpminub
+}
+
+define void @test56(i8* nocapture %a, i8* nocapture %b) nounwind {
+vector.ph:
+  br label %vector.body
+
+vector.body:                                      ; preds = %vector.body, %vector.ph
+  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
+  %gep.a = getelementptr inbounds i8* %a, i64 %index
+  %gep.b = getelementptr inbounds i8* %b, i64 %index
+  %ptr.a = bitcast i8* %gep.a to <16 x i8>*
+  %ptr.b = bitcast i8* %gep.b to <16 x i8>*
+  %load.a = load <16 x i8>* %ptr.a, align 2
+  %load.b = load <16 x i8>* %ptr.b, align 2
+  %cmp = icmp uge <16 x i8> %load.a, %load.b
+  %sel = select <16 x i1> %cmp, <16 x i8> %load.b, <16 x i8> %load.a
+  store <16 x i8> %sel, <16 x i8>* %ptr.a, align 2
+  %index.next = add i64 %index, 16
+  %loop = icmp eq i64 %index.next, 16384
+  br i1 %loop, label %for.end, label %vector.body
+
+for.end:                                          ; preds = %vector.body
+  ret void
+
+; SSE2: test56:
+; SSE2: pminub
+
+; AVX1: test56:
+; AVX1: vpminub
+
+; AVX2: test56:
+; AVX2: vpminub
+}
+
+define void @test57(i16* nocapture %a, i16* nocapture %b) nounwind {
+vector.ph:
+  br label %vector.body
+
+vector.body:                                      ; preds = %vector.body, %vector.ph
+  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
+  %gep.a = getelementptr inbounds i16* %a, i64 %index
+  %gep.b = getelementptr inbounds i16* %b, i64 %index
+  %ptr.a = bitcast i16* %gep.a to <8 x i16>*
+  %ptr.b = bitcast i16* %gep.b to <8 x i16>*
+  %load.a = load <8 x i16>* %ptr.a, align 2
+  %load.b = load <8 x i16>* %ptr.b, align 2
+  %cmp = icmp slt <8 x i16> %load.a, %load.b
+  %sel = select <8 x i1> %cmp, <8 x i16> %load.b, <8 x i16> %load.a
+  store <8 x i16> %sel, <8 x i16>* %ptr.a, align 2
+  %index.next = add i64 %index, 8
+  %loop = icmp eq i64 %index.next, 16384
+  br i1 %loop, label %for.end, label %vector.body
+
+for.end:                                          ; preds = %vector.body
+  ret void
+
+; SSE2: test57:
+; SSE2: pmaxsw
+
+; AVX1: test57:
+; AVX1: vpmaxsw
+
+; AVX2: test57:
+; AVX2: vpmaxsw
+}
+
+define void @test58(i16* nocapture %a, i16* nocapture %b) nounwind {
+vector.ph:
+  br label %vector.body
+
+vector.body:                                      ; preds = %vector.body, %vector.ph
+  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
+  %gep.a = getelementptr inbounds i16* %a, i64 %index
+  %gep.b = getelementptr inbounds i16* %b, i64 %index
+  %ptr.a = bitcast i16* %gep.a to <8 x i16>*
+  %ptr.b = bitcast i16* %gep.b to <8 x i16>*
+  %load.a = load <8 x i16>* %ptr.a, align 2
+  %load.b = load <8 x i16>* %ptr.b, align 2
+  %cmp = icmp sle <8 x i16> %load.a, %load.b
+  %sel = select <8 x i1> %cmp, <8 x i16> %load.b, <8 x i16> %load.a
+  store <8 x i16> %sel, <8 x i16>* %ptr.a, align 2
+  %index.next = add i64 %index, 8
+  %loop = icmp eq i64 %index.next, 16384
+  br i1 %loop, label %for.end, label %vector.body
+
+for.end:                                          ; preds = %vector.body
+  ret void
+
+; SSE2: test58:
+; SSE2: pmaxsw
+
+; AVX1: test58:
+; AVX1: vpmaxsw
+
+; AVX2: test58:
+; AVX2: vpmaxsw
+}
+
+define void @test59(i16* nocapture %a, i16* nocapture %b) nounwind {
+vector.ph:
+  br label %vector.body
+
+vector.body:                                      ; preds = %vector.body, %vector.ph
+  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
+  %gep.a = getelementptr inbounds i16* %a, i64 %index
+  %gep.b = getelementptr inbounds i16* %b, i64 %index
+  %ptr.a = bitcast i16* %gep.a to <8 x i16>*
+  %ptr.b = bitcast i16* %gep.b to <8 x i16>*
+  %load.a = load <8 x i16>* %ptr.a, align 2
+  %load.b = load <8 x i16>* %ptr.b, align 2
+  %cmp = icmp sgt <8 x i16> %load.a, %load.b
+  %sel = select <8 x i1> %cmp, <8 x i16> %load.b, <8 x i16> %load.a
+  store <8 x i16> %sel, <8 x i16>* %ptr.a, align 2
+  %index.next = add i64 %index, 8
+  %loop = icmp eq i64 %index.next, 16384
+  br i1 %loop, label %for.end, label %vector.body
+
+for.end:                                          ; preds = %vector.body
+  ret void
+
+; SSE2: test59:
+; SSE2: pminsw
+
+; AVX1: test59:
+; AVX1: vpminsw
+
+; AVX2: test59:
+; AVX2: vpminsw
+}
+
+define void @test60(i16* nocapture %a, i16* nocapture %b) nounwind {
+vector.ph:
+  br label %vector.body
+
+vector.body:                                      ; preds = %vector.body, %vector.ph
+  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
+  %gep.a = getelementptr inbounds i16* %a, i64 %index
+  %gep.b = getelementptr inbounds i16* %b, i64 %index
+  %ptr.a = bitcast i16* %gep.a to <8 x i16>*
+  %ptr.b = bitcast i16* %gep.b to <8 x i16>*
+  %load.a = load <8 x i16>* %ptr.a, align 2
+  %load.b = load <8 x i16>* %ptr.b, align 2
+  %cmp = icmp sge <8 x i16> %load.a, %load.b
+  %sel = select <8 x i1> %cmp, <8 x i16> %load.b, <8 x i16> %load.a
+  store <8 x i16> %sel, <8 x i16>* %ptr.a, align 2
+  %index.next = add i64 %index, 8
+  %loop = icmp eq i64 %index.next, 16384
+  br i1 %loop, label %for.end, label %vector.body
+
+for.end:                                          ; preds = %vector.body
+  ret void
+
+; SSE2: test60:
+; SSE2: pminsw
+
+; AVX1: test60:
+; AVX1: vpminsw
+
+; AVX2: test60:
+; AVX2: vpminsw
+}
+
+define void @test61(i16* nocapture %a, i16* nocapture %b) nounwind {
+vector.ph:
+  br label %vector.body
+
+vector.body:                                      ; preds = %vector.body, %vector.ph
+  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
+  %gep.a = getelementptr inbounds i16* %a, i64 %index
+  %gep.b = getelementptr inbounds i16* %b, i64 %index
+  %ptr.a = bitcast i16* %gep.a to <8 x i16>*
+  %ptr.b = bitcast i16* %gep.b to <8 x i16>*
+  %load.a = load <8 x i16>* %ptr.a, align 2
+  %load.b = load <8 x i16>* %ptr.b, align 2
+  %cmp = icmp ult <8 x i16> %load.a, %load.b
+  %sel = select <8 x i1> %cmp, <8 x i16> %load.b, <8 x i16> %load.a
+  store <8 x i16> %sel, <8 x i16>* %ptr.a, align 2
+  %index.next = add i64 %index, 8
+  %loop = icmp eq i64 %index.next, 16384
+  br i1 %loop, label %for.end, label %vector.body
+
+for.end:                                          ; preds = %vector.body
+  ret void
+
+; SSE4: test61:
+; SSE4: pmaxuw
+
+; AVX1: test61:
+; AVX1: vpmaxuw
+
+; AVX2: test61:
+; AVX2: vpmaxuw
+}
+
+define void @test62(i16* nocapture %a, i16* nocapture %b) nounwind {
+vector.ph:
+  br label %vector.body
+
+vector.body:                                      ; preds = %vector.body, %vector.ph
+  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
+  %gep.a = getelementptr inbounds i16* %a, i64 %index
+  %gep.b = getelementptr inbounds i16* %b, i64 %index
+  %ptr.a = bitcast i16* %gep.a to <8 x i16>*
+  %ptr.b = bitcast i16* %gep.b to <8 x i16>*
+  %load.a = load <8 x i16>* %ptr.a, align 2
+  %load.b = load <8 x i16>* %ptr.b, align 2
+  %cmp = icmp ule <8 x i16> %load.a, %load.b
+  %sel = select <8 x i1> %cmp, <8 x i16> %load.b, <8 x i16> %load.a
+  store <8 x i16> %sel, <8 x i16>* %ptr.a, align 2
+  %index.next = add i64 %index, 8
+  %loop = icmp eq i64 %index.next, 16384
+  br i1 %loop, label %for.end, label %vector.body
+
+for.end:                                          ; preds = %vector.body
+  ret void
+
+; SSE4: test62:
+; SSE4: pmaxuw
+
+; AVX1: test62:
+; AVX1: vpmaxuw
+
+; AVX2: test62:
+; AVX2: vpmaxuw
+}
+
+define void @test63(i16* nocapture %a, i16* nocapture %b) nounwind {
+vector.ph:
+  br label %vector.body
+
+vector.body:                                      ; preds = %vector.body, %vector.ph
+  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
+  %gep.a = getelementptr inbounds i16* %a, i64 %index
+  %gep.b = getelementptr inbounds i16* %b, i64 %index
+  %ptr.a = bitcast i16* %gep.a to <8 x i16>*
+  %ptr.b = bitcast i16* %gep.b to <8 x i16>*
+  %load.a = load <8 x i16>* %ptr.a, align 2
+  %load.b = load <8 x i16>* %ptr.b, align 2
+  %cmp = icmp ugt <8 x i16> %load.a, %load.b
+  %sel = select <8 x i1> %cmp, <8 x i16> %load.b, <8 x i16> %load.a
+  store <8 x i16> %sel, <8 x i16>* %ptr.a, align 2
+  %index.next = add i64 %index, 8
+  %loop = icmp eq i64 %index.next, 16384
+  br i1 %loop, label %for.end, label %vector.body
+
+for.end:                                          ; preds = %vector.body
+  ret void
+
+; SSE4: test63:
+; SSE4: pminuw
+
+; AVX1: test63:
+; AVX1: vpminuw
+
+; AVX2: test63:
+; AVX2: vpminuw
+}
+
+define void @test64(i16* nocapture %a, i16* nocapture %b) nounwind {
+vector.ph:
+  br label %vector.body
+
+vector.body:                                      ; preds = %vector.body, %vector.ph
+  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
+  %gep.a = getelementptr inbounds i16* %a, i64 %index
+  %gep.b = getelementptr inbounds i16* %b, i64 %index
+  %ptr.a = bitcast i16* %gep.a to <8 x i16>*
+  %ptr.b = bitcast i16* %gep.b to <8 x i16>*
+  %load.a = load <8 x i16>* %ptr.a, align 2
+  %load.b = load <8 x i16>* %ptr.b, align 2
+  %cmp = icmp uge <8 x i16> %load.a, %load.b
+  %sel = select <8 x i1> %cmp, <8 x i16> %load.b, <8 x i16> %load.a
+  store <8 x i16> %sel, <8 x i16>* %ptr.a, align 2
+  %index.next = add i64 %index, 8
+  %loop = icmp eq i64 %index.next, 16384
+  br i1 %loop, label %for.end, label %vector.body
+
+for.end:                                          ; preds = %vector.body
+  ret void
+
+; SSE4: test64:
+; SSE4: pminuw
+
+; AVX1: test64:
+; AVX1: vpminuw
+
+; AVX2: test64:
+; AVX2: vpminuw
+}
+
+define void @test65(i32* nocapture %a, i32* nocapture %b) nounwind {
+vector.ph:
+  br label %vector.body
+
+vector.body:                                      ; preds = %vector.body, %vector.ph
+  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
+  %gep.a = getelementptr inbounds i32* %a, i64 %index
+  %gep.b = getelementptr inbounds i32* %b, i64 %index
+  %ptr.a = bitcast i32* %gep.a to <4 x i32>*
+  %ptr.b = bitcast i32* %gep.b to <4 x i32>*
+  %load.a = load <4 x i32>* %ptr.a, align 2
+  %load.b = load <4 x i32>* %ptr.b, align 2
+  %cmp = icmp slt <4 x i32> %load.a, %load.b
+  %sel = select <4 x i1> %cmp, <4 x i32> %load.b, <4 x i32> %load.a
+  store <4 x i32> %sel, <4 x i32>* %ptr.a, align 2
+  %index.next = add i64 %index, 4
+  %loop = icmp eq i64 %index.next, 16384
+  br i1 %loop, label %for.end, label %vector.body
+
+for.end:                                          ; preds = %vector.body
+  ret void
+
+; SSE4: test65:
+; SSE4: pmaxsd
+
+; AVX1: test65:
+; AVX1: vpmaxsd
+
+; AVX2: test65:
+; AVX2: vpmaxsd
+}
+
+define void @test66(i32* nocapture %a, i32* nocapture %b) nounwind {
+vector.ph:
+  br label %vector.body
+
+vector.body:                                      ; preds = %vector.body, %vector.ph
+  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
+  %gep.a = getelementptr inbounds i32* %a, i64 %index
+  %gep.b = getelementptr inbounds i32* %b, i64 %index
+  %ptr.a = bitcast i32* %gep.a to <4 x i32>*
+  %ptr.b = bitcast i32* %gep.b to <4 x i32>*
+  %load.a = load <4 x i32>* %ptr.a, align 2
+  %load.b = load <4 x i32>* %ptr.b, align 2
+  %cmp = icmp sle <4 x i32> %load.a, %load.b
+  %sel = select <4 x i1> %cmp, <4 x i32> %load.b, <4 x i32> %load.a
+  store <4 x i32> %sel, <4 x i32>* %ptr.a, align 2
+  %index.next = add i64 %index, 4
+  %loop = icmp eq i64 %index.next, 16384
+  br i1 %loop, label %for.end, label %vector.body
+
+for.end:                                          ; preds = %vector.body
+  ret void
+
+; SSE4: test66:
+; SSE4: pmaxsd
+
+; AVX1: test66:
+; AVX1: vpmaxsd
+
+; AVX2: test66:
+; AVX2: vpmaxsd
+}
+
+define void @test67(i32* nocapture %a, i32* nocapture %b) nounwind {
+vector.ph:
+  br label %vector.body
+
+vector.body:                                      ; preds = %vector.body, %vector.ph
+  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
+  %gep.a = getelementptr inbounds i32* %a, i64 %index
+  %gep.b = getelementptr inbounds i32* %b, i64 %index
+  %ptr.a = bitcast i32* %gep.a to <4 x i32>*
+  %ptr.b = bitcast i32* %gep.b to <4 x i32>*
+  %load.a = load <4 x i32>* %ptr.a, align 2
+  %load.b = load <4 x i32>* %ptr.b, align 2
+  %cmp = icmp sgt <4 x i32> %load.a, %load.b
+  %sel = select <4 x i1> %cmp, <4 x i32> %load.b, <4 x i32> %load.a
+  store <4 x i32> %sel, <4 x i32>* %ptr.a, align 2
+  %index.next = add i64 %index, 4
+  %loop = icmp eq i64 %index.next, 16384
+  br i1 %loop, label %for.end, label %vector.body
+
+for.end:                                          ; preds = %vector.body
+  ret void
+
+; SSE4: test67:
+; SSE4: pminsd
+
+; AVX1: test67:
+; AVX1: vpminsd
+
+; AVX2: test67:
+; AVX2: vpminsd
+}
+
+define void @test68(i32* nocapture %a, i32* nocapture %b) nounwind {
+vector.ph:
+  br label %vector.body
+
+vector.body:                                      ; preds = %vector.body, %vector.ph
+  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
+  %gep.a = getelementptr inbounds i32* %a, i64 %index
+  %gep.b = getelementptr inbounds i32* %b, i64 %index
+  %ptr.a = bitcast i32* %gep.a to <4 x i32>*
+  %ptr.b = bitcast i32* %gep.b to <4 x i32>*
+  %load.a = load <4 x i32>* %ptr.a, align 2
+  %load.b = load <4 x i32>* %ptr.b, align 2
+  %cmp = icmp sge <4 x i32> %load.a, %load.b
+  %sel = select <4 x i1> %cmp, <4 x i32> %load.b, <4 x i32> %load.a
+  store <4 x i32> %sel, <4 x i32>* %ptr.a, align 2
+  %index.next = add i64 %index, 4
+  %loop = icmp eq i64 %index.next, 16384
+  br i1 %loop, label %for.end, label %vector.body
+
+for.end:                                          ; preds = %vector.body
+  ret void
+
+; SSE4: test68:
+; SSE4: pminsd
+
+; AVX1: test68:
+; AVX1: vpminsd
+
+; AVX2: test68:
+; AVX2: vpminsd
+}
+
+define void @test69(i32* nocapture %a, i32* nocapture %b) nounwind {
+vector.ph:
+  br label %vector.body
+
+vector.body:                                      ; preds = %vector.body, %vector.ph
+  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
+  %gep.a = getelementptr inbounds i32* %a, i64 %index
+  %gep.b = getelementptr inbounds i32* %b, i64 %index
+  %ptr.a = bitcast i32* %gep.a to <4 x i32>*
+  %ptr.b = bitcast i32* %gep.b to <4 x i32>*
+  %load.a = load <4 x i32>* %ptr.a, align 2
+  %load.b = load <4 x i32>* %ptr.b, align 2
+  %cmp = icmp ult <4 x i32> %load.a, %load.b
+  %sel = select <4 x i1> %cmp, <4 x i32> %load.b, <4 x i32> %load.a
+  store <4 x i32> %sel, <4 x i32>* %ptr.a, align 2
+  %index.next = add i64 %index, 4
+  %loop = icmp eq i64 %index.next, 16384
+  br i1 %loop, label %for.end, label %vector.body
+
+for.end:                                          ; preds = %vector.body
+  ret void
+
+; SSE4: test69:
+; SSE4: pmaxud
+
+; AVX1: test69:
+; AVX1: vpmaxud
+
+; AVX2: test69:
+; AVX2: vpmaxud
+}
+
+define void @test70(i32* nocapture %a, i32* nocapture %b) nounwind {
+vector.ph:
+  br label %vector.body
+
+vector.body:                                      ; preds = %vector.body, %vector.ph
+  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
+  %gep.a = getelementptr inbounds i32* %a, i64 %index
+  %gep.b = getelementptr inbounds i32* %b, i64 %index
+  %ptr.a = bitcast i32* %gep.a to <4 x i32>*
+  %ptr.b = bitcast i32* %gep.b to <4 x i32>*
+  %load.a = load <4 x i32>* %ptr.a, align 2
+  %load.b = load <4 x i32>* %ptr.b, align 2
+  %cmp = icmp ule <4 x i32> %load.a, %load.b
+  %sel = select <4 x i1> %cmp, <4 x i32> %load.b, <4 x i32> %load.a
+  store <4 x i32> %sel, <4 x i32>* %ptr.a, align 2
+  %index.next = add i64 %index, 4
+  %loop = icmp eq i64 %index.next, 16384
+  br i1 %loop, label %for.end, label %vector.body
+
+for.end:                                          ; preds = %vector.body
+  ret void
+
+; SSE4: test70:
+; SSE4: pmaxud
+
+; AVX1: test70:
+; AVX1: vpmaxud
+
+; AVX2: test70:
+; AVX2: vpmaxud
+}
+
+define void @test71(i32* nocapture %a, i32* nocapture %b) nounwind {
+vector.ph:
+  br label %vector.body
+
+vector.body:                                      ; preds = %vector.body, %vector.ph
+  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
+  %gep.a = getelementptr inbounds i32* %a, i64 %index
+  %gep.b = getelementptr inbounds i32* %b, i64 %index
+  %ptr.a = bitcast i32* %gep.a to <4 x i32>*
+  %ptr.b = bitcast i32* %gep.b to <4 x i32>*
+  %load.a = load <4 x i32>* %ptr.a, align 2
+  %load.b = load <4 x i32>* %ptr.b, align 2
+  %cmp = icmp ugt <4 x i32> %load.a, %load.b
+  %sel = select <4 x i1> %cmp, <4 x i32> %load.b, <4 x i32> %load.a
+  store <4 x i32> %sel, <4 x i32>* %ptr.a, align 2
+  %index.next = add i64 %index, 4
+  %loop = icmp eq i64 %index.next, 16384
+  br i1 %loop, label %for.end, label %vector.body
+
+for.end:                                          ; preds = %vector.body
+  ret void
+
+; SSE4: test71:
+; SSE4: pminud
+
+; AVX1: test71:
+; AVX1: vpminud
+
+; AVX2: test71:
+; AVX2: vpminud
+}
+
+define void @test72(i32* nocapture %a, i32* nocapture %b) nounwind {
+vector.ph:
+  br label %vector.body
+
+vector.body:                                      ; preds = %vector.body, %vector.ph
+  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
+  %gep.a = getelementptr inbounds i32* %a, i64 %index
+  %gep.b = getelementptr inbounds i32* %b, i64 %index
+  %ptr.a = bitcast i32* %gep.a to <4 x i32>*
+  %ptr.b = bitcast i32* %gep.b to <4 x i32>*
+  %load.a = load <4 x i32>* %ptr.a, align 2
+  %load.b = load <4 x i32>* %ptr.b, align 2
+  %cmp = icmp uge <4 x i32> %load.a, %load.b
+  %sel = select <4 x i1> %cmp, <4 x i32> %load.b, <4 x i32> %load.a
+  store <4 x i32> %sel, <4 x i32>* %ptr.a, align 2
+  %index.next = add i64 %index, 4
+  %loop = icmp eq i64 %index.next, 16384
+  br i1 %loop, label %for.end, label %vector.body
+
+for.end:                                          ; preds = %vector.body
+  ret void
+
+; SSE4: test72:
+; SSE4: pminud
+
+; AVX1: test72:
+; AVX1: vpminud
+
+; AVX2: test72:
+; AVX2: vpminud
+}
+
+define void @test73(i8* nocapture %a, i8* nocapture %b) nounwind {
+vector.ph:
+  br label %vector.body
+
+vector.body:                                      ; preds = %vector.body, %vector.ph
+  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
+  %gep.a = getelementptr inbounds i8* %a, i64 %index
+  %gep.b = getelementptr inbounds i8* %b, i64 %index
+  %ptr.a = bitcast i8* %gep.a to <32 x i8>*
+  %ptr.b = bitcast i8* %gep.b to <32 x i8>*
+  %load.a = load <32 x i8>* %ptr.a, align 2
+  %load.b = load <32 x i8>* %ptr.b, align 2
+  %cmp = icmp slt <32 x i8> %load.a, %load.b
+  %sel = select <32 x i1> %cmp, <32 x i8> %load.b, <32 x i8> %load.a
+  store <32 x i8> %sel, <32 x i8>* %ptr.a, align 2
+  %index.next = add i64 %index, 32
+  %loop = icmp eq i64 %index.next, 16384
+  br i1 %loop, label %for.end, label %vector.body
+
+for.end:                                          ; preds = %vector.body
+  ret void
+
+; AVX2: test73:
+; AVX2: vpmaxsb
+}
+
+define void @test74(i8* nocapture %a, i8* nocapture %b) nounwind {
+vector.ph:
+  br label %vector.body
+
+vector.body:                                      ; preds = %vector.body, %vector.ph
+  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
+  %gep.a = getelementptr inbounds i8* %a, i64 %index
+  %gep.b = getelementptr inbounds i8* %b, i64 %index
+  %ptr.a = bitcast i8* %gep.a to <32 x i8>*
+  %ptr.b = bitcast i8* %gep.b to <32 x i8>*
+  %load.a = load <32 x i8>* %ptr.a, align 2
+  %load.b = load <32 x i8>* %ptr.b, align 2
+  %cmp = icmp sle <32 x i8> %load.a, %load.b
+  %sel = select <32 x i1> %cmp, <32 x i8> %load.b, <32 x i8> %load.a
+  store <32 x i8> %sel, <32 x i8>* %ptr.a, align 2
+  %index.next = add i64 %index, 32
+  %loop = icmp eq i64 %index.next, 16384
+  br i1 %loop, label %for.end, label %vector.body
+
+for.end:                                          ; preds = %vector.body
+  ret void
+
+; AVX2: test74:
+; AVX2: vpmaxsb
+}
+
+define void @test75(i8* nocapture %a, i8* nocapture %b) nounwind {
+vector.ph:
+  br label %vector.body
+
+vector.body:                                      ; preds = %vector.body, %vector.ph
+  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
+  %gep.a = getelementptr inbounds i8* %a, i64 %index
+  %gep.b = getelementptr inbounds i8* %b, i64 %index
+  %ptr.a = bitcast i8* %gep.a to <32 x i8>*
+  %ptr.b = bitcast i8* %gep.b to <32 x i8>*
+  %load.a = load <32 x i8>* %ptr.a, align 2
+  %load.b = load <32 x i8>* %ptr.b, align 2
+  %cmp = icmp sgt <32 x i8> %load.a, %load.b
+  %sel = select <32 x i1> %cmp, <32 x i8> %load.b, <32 x i8> %load.a
+  store <32 x i8> %sel, <32 x i8>* %ptr.a, align 2
+  %index.next = add i64 %index, 32
+  %loop = icmp eq i64 %index.next, 16384
+  br i1 %loop, label %for.end, label %vector.body
+
+for.end:                                          ; preds = %vector.body
+  ret void
+
+; AVX2: test75:
+; AVX2: vpminsb
+}
+
+define void @test76(i8* nocapture %a, i8* nocapture %b) nounwind {
+vector.ph:
+  br label %vector.body
+
+vector.body:                                      ; preds = %vector.body, %vector.ph
+  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
+  %gep.a = getelementptr inbounds i8* %a, i64 %index
+  %gep.b = getelementptr inbounds i8* %b, i64 %index
+  %ptr.a = bitcast i8* %gep.a to <32 x i8>*
+  %ptr.b = bitcast i8* %gep.b to <32 x i8>*
+  %load.a = load <32 x i8>* %ptr.a, align 2
+  %load.b = load <32 x i8>* %ptr.b, align 2
+  %cmp = icmp sge <32 x i8> %load.a, %load.b
+  %sel = select <32 x i1> %cmp, <32 x i8> %load.b, <32 x i8> %load.a
+  store <32 x i8> %sel, <32 x i8>* %ptr.a, align 2
+  %index.next = add i64 %index, 32
+  %loop = icmp eq i64 %index.next, 16384
+  br i1 %loop, label %for.end, label %vector.body
+
+for.end:                                          ; preds = %vector.body
+  ret void
+
+; AVX2: test76:
+; AVX2: vpminsb
+}
+
+define void @test77(i8* nocapture %a, i8* nocapture %b) nounwind {
+vector.ph:
+  br label %vector.body
+
+vector.body:                                      ; preds = %vector.body, %vector.ph
+  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
+  %gep.a = getelementptr inbounds i8* %a, i64 %index
+  %gep.b = getelementptr inbounds i8* %b, i64 %index
+  %ptr.a = bitcast i8* %gep.a to <32 x i8>*
+  %ptr.b = bitcast i8* %gep.b to <32 x i8>*
+  %load.a = load <32 x i8>* %ptr.a, align 2
+  %load.b = load <32 x i8>* %ptr.b, align 2
+  %cmp = icmp ult <32 x i8> %load.a, %load.b
+  %sel = select <32 x i1> %cmp, <32 x i8> %load.b, <32 x i8> %load.a
+  store <32 x i8> %sel, <32 x i8>* %ptr.a, align 2
+  %index.next = add i64 %index, 32
+  %loop = icmp eq i64 %index.next, 16384
+  br i1 %loop, label %for.end, label %vector.body
+
+for.end:                                          ; preds = %vector.body
+  ret void
+
+; AVX2: test77:
+; AVX2: vpmaxub
+}
+
+define void @test78(i8* nocapture %a, i8* nocapture %b) nounwind {
+vector.ph:
+  br label %vector.body
+
+vector.body:                                      ; preds = %vector.body, %vector.ph
+  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
+  %gep.a = getelementptr inbounds i8* %a, i64 %index
+  %gep.b = getelementptr inbounds i8* %b, i64 %index
+  %ptr.a = bitcast i8* %gep.a to <32 x i8>*
+  %ptr.b = bitcast i8* %gep.b to <32 x i8>*
+  %load.a = load <32 x i8>* %ptr.a, align 2
+  %load.b = load <32 x i8>* %ptr.b, align 2
+  %cmp = icmp ule <32 x i8> %load.a, %load.b
+  %sel = select <32 x i1> %cmp, <32 x i8> %load.b, <32 x i8> %load.a
+  store <32 x i8> %sel, <32 x i8>* %ptr.a, align 2
+  %index.next = add i64 %index, 32
+  %loop = icmp eq i64 %index.next, 16384
+  br i1 %loop, label %for.end, label %vector.body
+
+for.end:                                          ; preds = %vector.body
+  ret void
+
+; AVX2: test78:
+; AVX2: vpmaxub
+}
+
+define void @test79(i8* nocapture %a, i8* nocapture %b) nounwind {
+vector.ph:
+  br label %vector.body
+
+vector.body:                                      ; preds = %vector.body, %vector.ph
+  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
+  %gep.a = getelementptr inbounds i8* %a, i64 %index
+  %gep.b = getelementptr inbounds i8* %b, i64 %index
+  %ptr.a = bitcast i8* %gep.a to <32 x i8>*
+  %ptr.b = bitcast i8* %gep.b to <32 x i8>*
+  %load.a = load <32 x i8>* %ptr.a, align 2
+  %load.b = load <32 x i8>* %ptr.b, align 2
+  %cmp = icmp ugt <32 x i8> %load.a, %load.b
+  %sel = select <32 x i1> %cmp, <32 x i8> %load.b, <32 x i8> %load.a
+  store <32 x i8> %sel, <32 x i8>* %ptr.a, align 2
+  %index.next = add i64 %index, 32
+  %loop = icmp eq i64 %index.next, 16384
+  br i1 %loop, label %for.end, label %vector.body
+
+for.end:                                          ; preds = %vector.body
+  ret void
+
+; AVX2: test79:
+; AVX2: vpminub
+}
+
+define void @test80(i8* nocapture %a, i8* nocapture %b) nounwind {
+vector.ph:
+  br label %vector.body
+
+vector.body:                                      ; preds = %vector.body, %vector.ph
+  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
+  %gep.a = getelementptr inbounds i8* %a, i64 %index
+  %gep.b = getelementptr inbounds i8* %b, i64 %index
+  %ptr.a = bitcast i8* %gep.a to <32 x i8>*
+  %ptr.b = bitcast i8* %gep.b to <32 x i8>*
+  %load.a = load <32 x i8>* %ptr.a, align 2
+  %load.b = load <32 x i8>* %ptr.b, align 2
+  %cmp = icmp uge <32 x i8> %load.a, %load.b
+  %sel = select <32 x i1> %cmp, <32 x i8> %load.b, <32 x i8> %load.a
+  store <32 x i8> %sel, <32 x i8>* %ptr.a, align 2
+  %index.next = add i64 %index, 32
+  %loop = icmp eq i64 %index.next, 16384
+  br i1 %loop, label %for.end, label %vector.body
+
+for.end:                                          ; preds = %vector.body
+  ret void
+
+; AVX2: test80:
+; AVX2: vpminub
+}
+
+define void @test81(i16* nocapture %a, i16* nocapture %b) nounwind {
+vector.ph:
+  br label %vector.body
+
+vector.body:                                      ; preds = %vector.body, %vector.ph
+  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
+  %gep.a = getelementptr inbounds i16* %a, i64 %index
+  %gep.b = getelementptr inbounds i16* %b, i64 %index
+  %ptr.a = bitcast i16* %gep.a to <16 x i16>*
+  %ptr.b = bitcast i16* %gep.b to <16 x i16>*
+  %load.a = load <16 x i16>* %ptr.a, align 2
+  %load.b = load <16 x i16>* %ptr.b, align 2
+  %cmp = icmp slt <16 x i16> %load.a, %load.b
+  %sel = select <16 x i1> %cmp, <16 x i16> %load.b, <16 x i16> %load.a
+  store <16 x i16> %sel, <16 x i16>* %ptr.a, align 2
+  %index.next = add i64 %index, 16
+  %loop = icmp eq i64 %index.next, 16384
+  br i1 %loop, label %for.end, label %vector.body
+
+for.end:                                          ; preds = %vector.body
+  ret void
+
+; AVX2: test81:
+; AVX2: vpmaxsw
+}
+
+define void @test82(i16* nocapture %a, i16* nocapture %b) nounwind {
+vector.ph:
+  br label %vector.body
+
+vector.body:                                      ; preds = %vector.body, %vector.ph
+  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
+  %gep.a = getelementptr inbounds i16* %a, i64 %index
+  %gep.b = getelementptr inbounds i16* %b, i64 %index
+  %ptr.a = bitcast i16* %gep.a to <16 x i16>*
+  %ptr.b = bitcast i16* %gep.b to <16 x i16>*
+  %load.a = load <16 x i16>* %ptr.a, align 2
+  %load.b = load <16 x i16>* %ptr.b, align 2
+  %cmp = icmp sle <16 x i16> %load.a, %load.b
+  %sel = select <16 x i1> %cmp, <16 x i16> %load.b, <16 x i16> %load.a
+  store <16 x i16> %sel, <16 x i16>* %ptr.a, align 2
+  %index.next = add i64 %index, 16
+  %loop = icmp eq i64 %index.next, 16384
+  br i1 %loop, label %for.end, label %vector.body
+
+for.end:                                          ; preds = %vector.body
+  ret void
+
+; AVX2: test82:
+; AVX2: vpmaxsw
+}
+
+define void @test83(i16* nocapture %a, i16* nocapture %b) nounwind {
+vector.ph:
+  br label %vector.body
+
+vector.body:                                      ; preds = %vector.body, %vector.ph
+  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
+  %gep.a = getelementptr inbounds i16* %a, i64 %index
+  %gep.b = getelementptr inbounds i16* %b, i64 %index
+  %ptr.a = bitcast i16* %gep.a to <16 x i16>*
+  %ptr.b = bitcast i16* %gep.b to <16 x i16>*
+  %load.a = load <16 x i16>* %ptr.a, align 2
+  %load.b = load <16 x i16>* %ptr.b, align 2
+  %cmp = icmp sgt <16 x i16> %load.a, %load.b
+  %sel = select <16 x i1> %cmp, <16 x i16> %load.b, <16 x i16> %load.a
+  store <16 x i16> %sel, <16 x i16>* %ptr.a, align 2
+  %index.next = add i64 %index, 16
+  %loop = icmp eq i64 %index.next, 16384
+  br i1 %loop, label %for.end, label %vector.body
+
+for.end:                                          ; preds = %vector.body
+  ret void
+
+; AVX2: test83:
+; AVX2: vpminsw
+}
+
+define void @test84(i16* nocapture %a, i16* nocapture %b) nounwind {
+vector.ph:
+  br label %vector.body
+
+vector.body:                                      ; preds = %vector.body, %vector.ph
+  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
+  %gep.a = getelementptr inbounds i16* %a, i64 %index
+  %gep.b = getelementptr inbounds i16* %b, i64 %index
+  %ptr.a = bitcast i16* %gep.a to <16 x i16>*
+  %ptr.b = bitcast i16* %gep.b to <16 x i16>*
+  %load.a = load <16 x i16>* %ptr.a, align 2
+  %load.b = load <16 x i16>* %ptr.b, align 2
+  %cmp = icmp sge <16 x i16> %load.a, %load.b
+  %sel = select <16 x i1> %cmp, <16 x i16> %load.b, <16 x i16> %load.a
+  store <16 x i16> %sel, <16 x i16>* %ptr.a, align 2
+  %index.next = add i64 %index, 16
+  %loop = icmp eq i64 %index.next, 16384
+  br i1 %loop, label %for.end, label %vector.body
+
+for.end:                                          ; preds = %vector.body
+  ret void
+
+; AVX2: test84:
+; AVX2: vpminsw
+}
+
+define void @test85(i16* nocapture %a, i16* nocapture %b) nounwind {
+vector.ph:
+  br label %vector.body
+
+vector.body:                                      ; preds = %vector.body, %vector.ph
+  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
+  %gep.a = getelementptr inbounds i16* %a, i64 %index
+  %gep.b = getelementptr inbounds i16* %b, i64 %index
+  %ptr.a = bitcast i16* %gep.a to <16 x i16>*
+  %ptr.b = bitcast i16* %gep.b to <16 x i16>*
+  %load.a = load <16 x i16>* %ptr.a, align 2
+  %load.b = load <16 x i16>* %ptr.b, align 2
+  %cmp = icmp ult <16 x i16> %load.a, %load.b
+  %sel = select <16 x i1> %cmp, <16 x i16> %load.b, <16 x i16> %load.a
+  store <16 x i16> %sel, <16 x i16>* %ptr.a, align 2
+  %index.next = add i64 %index, 16
+  %loop = icmp eq i64 %index.next, 16384
+  br i1 %loop, label %for.end, label %vector.body
+
+for.end:                                          ; preds = %vector.body
+  ret void
+
+; AVX2: test85:
+; AVX2: vpmaxuw
+}
+
+define void @test86(i16* nocapture %a, i16* nocapture %b) nounwind {
+vector.ph:
+  br label %vector.body
+
+vector.body:                                      ; preds = %vector.body, %vector.ph
+  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
+  %gep.a = getelementptr inbounds i16* %a, i64 %index
+  %gep.b = getelementptr inbounds i16* %b, i64 %index
+  %ptr.a = bitcast i16* %gep.a to <16 x i16>*
+  %ptr.b = bitcast i16* %gep.b to <16 x i16>*
+  %load.a = load <16 x i16>* %ptr.a, align 2
+  %load.b = load <16 x i16>* %ptr.b, align 2
+  %cmp = icmp ule <16 x i16> %load.a, %load.b
+  %sel = select <16 x i1> %cmp, <16 x i16> %load.b, <16 x i16> %load.a
+  store <16 x i16> %sel, <16 x i16>* %ptr.a, align 2
+  %index.next = add i64 %index, 16
+  %loop = icmp eq i64 %index.next, 16384
+  br i1 %loop, label %for.end, label %vector.body
+
+for.end:                                          ; preds = %vector.body
+  ret void
+
+; AVX2: test86:
+; AVX2: vpmaxuw
+}
+
+define void @test87(i16* nocapture %a, i16* nocapture %b) nounwind {
+vector.ph:
+  br label %vector.body
+
+vector.body:                                      ; preds = %vector.body, %vector.ph
+  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
+  %gep.a = getelementptr inbounds i16* %a, i64 %index
+  %gep.b = getelementptr inbounds i16* %b, i64 %index
+  %ptr.a = bitcast i16* %gep.a to <16 x i16>*
+  %ptr.b = bitcast i16* %gep.b to <16 x i16>*
+  %load.a = load <16 x i16>* %ptr.a, align 2
+  %load.b = load <16 x i16>* %ptr.b, align 2
+  %cmp = icmp ugt <16 x i16> %load.a, %load.b
+  %sel = select <16 x i1> %cmp, <16 x i16> %load.b, <16 x i16> %load.a
+  store <16 x i16> %sel, <16 x i16>* %ptr.a, align 2
+  %index.next = add i64 %index, 16
+  %loop = icmp eq i64 %index.next, 16384
+  br i1 %loop, label %for.end, label %vector.body
+
+for.end:                                          ; preds = %vector.body
+  ret void
+
+; AVX2: test87:
+; AVX2: vpminuw
+}
+
+define void @test88(i16* nocapture %a, i16* nocapture %b) nounwind {
+vector.ph:
+  br label %vector.body
+
+vector.body:                                      ; preds = %vector.body, %vector.ph
+  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
+  %gep.a = getelementptr inbounds i16* %a, i64 %index
+  %gep.b = getelementptr inbounds i16* %b, i64 %index
+  %ptr.a = bitcast i16* %gep.a to <16 x i16>*
+  %ptr.b = bitcast i16* %gep.b to <16 x i16>*
+  %load.a = load <16 x i16>* %ptr.a, align 2
+  %load.b = load <16 x i16>* %ptr.b, align 2
+  %cmp = icmp uge <16 x i16> %load.a, %load.b
+  %sel = select <16 x i1> %cmp, <16 x i16> %load.b, <16 x i16> %load.a
+  store <16 x i16> %sel, <16 x i16>* %ptr.a, align 2
+  %index.next = add i64 %index, 16
+  %loop = icmp eq i64 %index.next, 16384
+  br i1 %loop, label %for.end, label %vector.body
+
+for.end:                                          ; preds = %vector.body
+  ret void
+
+; AVX2: test88:
+; AVX2: vpminuw
+}
+
+define void @test89(i32* nocapture %a, i32* nocapture %b) nounwind {
+vector.ph:
+  br label %vector.body
+
+vector.body:                                      ; preds = %vector.body, %vector.ph
+  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
+  %gep.a = getelementptr inbounds i32* %a, i64 %index
+  %gep.b = getelementptr inbounds i32* %b, i64 %index
+  %ptr.a = bitcast i32* %gep.a to <8 x i32>*
+  %ptr.b = bitcast i32* %gep.b to <8 x i32>*
+  %load.a = load <8 x i32>* %ptr.a, align 2
+  %load.b = load <8 x i32>* %ptr.b, align 2
+  %cmp = icmp slt <8 x i32> %load.a, %load.b
+  %sel = select <8 x i1> %cmp, <8 x i32> %load.b, <8 x i32> %load.a
+  store <8 x i32> %sel, <8 x i32>* %ptr.a, align 2
+  %index.next = add i64 %index, 8
+  %loop = icmp eq i64 %index.next, 16384
+  br i1 %loop, label %for.end, label %vector.body
+
+for.end:                                          ; preds = %vector.body
+  ret void
+
+; AVX2: test89:
+; AVX2: vpmaxsd
+}
+
+define void @test90(i32* nocapture %a, i32* nocapture %b) nounwind {
+vector.ph:
+  br label %vector.body
+
+vector.body:                                      ; preds = %vector.body, %vector.ph
+  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
+  %gep.a = getelementptr inbounds i32* %a, i64 %index
+  %gep.b = getelementptr inbounds i32* %b, i64 %index
+  %ptr.a = bitcast i32* %gep.a to <8 x i32>*
+  %ptr.b = bitcast i32* %gep.b to <8 x i32>*
+  %load.a = load <8 x i32>* %ptr.a, align 2
+  %load.b = load <8 x i32>* %ptr.b, align 2
+  %cmp = icmp sle <8 x i32> %load.a, %load.b
+  %sel = select <8 x i1> %cmp, <8 x i32> %load.b, <8 x i32> %load.a
+  store <8 x i32> %sel, <8 x i32>* %ptr.a, align 2
+  %index.next = add i64 %index, 8
+  %loop = icmp eq i64 %index.next, 16384
+  br i1 %loop, label %for.end, label %vector.body
+
+for.end:                                          ; preds = %vector.body
+  ret void
+
+; AVX2: test90:
+; AVX2: vpmaxsd
+}
+
+define void @test91(i32* nocapture %a, i32* nocapture %b) nounwind {
+vector.ph:
+  br label %vector.body
+
+vector.body:                                      ; preds = %vector.body, %vector.ph
+  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
+  %gep.a = getelementptr inbounds i32* %a, i64 %index
+  %gep.b = getelementptr inbounds i32* %b, i64 %index
+  %ptr.a = bitcast i32* %gep.a to <8 x i32>*
+  %ptr.b = bitcast i32* %gep.b to <8 x i32>*
+  %load.a = load <8 x i32>* %ptr.a, align 2
+  %load.b = load <8 x i32>* %ptr.b, align 2
+  %cmp = icmp sgt <8 x i32> %load.a, %load.b
+  %sel = select <8 x i1> %cmp, <8 x i32> %load.b, <8 x i32> %load.a
+  store <8 x i32> %sel, <8 x i32>* %ptr.a, align 2
+  %index.next = add i64 %index, 8
+  %loop = icmp eq i64 %index.next, 16384
+  br i1 %loop, label %for.end, label %vector.body
+
+for.end:                                          ; preds = %vector.body
+  ret void
+
+; AVX2: test91:
+; AVX2: vpminsd
+}
+
+define void @test92(i32* nocapture %a, i32* nocapture %b) nounwind {
+vector.ph:
+  br label %vector.body
+
+vector.body:                                      ; preds = %vector.body, %vector.ph
+  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
+  %gep.a = getelementptr inbounds i32* %a, i64 %index
+  %gep.b = getelementptr inbounds i32* %b, i64 %index
+  %ptr.a = bitcast i32* %gep.a to <8 x i32>*
+  %ptr.b = bitcast i32* %gep.b to <8 x i32>*
+  %load.a = load <8 x i32>* %ptr.a, align 2
+  %load.b = load <8 x i32>* %ptr.b, align 2
+  %cmp = icmp sge <8 x i32> %load.a, %load.b
+  %sel = select <8 x i1> %cmp, <8 x i32> %load.b, <8 x i32> %load.a
+  store <8 x i32> %sel, <8 x i32>* %ptr.a, align 2
+  %index.next = add i64 %index, 8
+  %loop = icmp eq i64 %index.next, 16384
+  br i1 %loop, label %for.end, label %vector.body
+
+for.end:                                          ; preds = %vector.body
+  ret void
+
+; AVX2: test92:
+; AVX2: vpminsd
+}
+
+define void @test93(i32* nocapture %a, i32* nocapture %b) nounwind {
+vector.ph:
+  br label %vector.body
+
+vector.body:                                      ; preds = %vector.body, %vector.ph
+  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
+  %gep.a = getelementptr inbounds i32* %a, i64 %index
+  %gep.b = getelementptr inbounds i32* %b, i64 %index
+  %ptr.a = bitcast i32* %gep.a to <8 x i32>*
+  %ptr.b = bitcast i32* %gep.b to <8 x i32>*
+  %load.a = load <8 x i32>* %ptr.a, align 2
+  %load.b = load <8 x i32>* %ptr.b, align 2
+  %cmp = icmp ult <8 x i32> %load.a, %load.b
+  %sel = select <8 x i1> %cmp, <8 x i32> %load.b, <8 x i32> %load.a
+  store <8 x i32> %sel, <8 x i32>* %ptr.a, align 2
+  %index.next = add i64 %index, 8
+  %loop = icmp eq i64 %index.next, 16384
+  br i1 %loop, label %for.end, label %vector.body
+
+for.end:                                          ; preds = %vector.body
+  ret void
+
+; AVX2: test93:
+; AVX2: vpmaxud
+}
+
+define void @test94(i32* nocapture %a, i32* nocapture %b) nounwind {
+vector.ph:
+  br label %vector.body
+
+vector.body:                                      ; preds = %vector.body, %vector.ph
+  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
+  %gep.a = getelementptr inbounds i32* %a, i64 %index
+  %gep.b = getelementptr inbounds i32* %b, i64 %index
+  %ptr.a = bitcast i32* %gep.a to <8 x i32>*
+  %ptr.b = bitcast i32* %gep.b to <8 x i32>*
+  %load.a = load <8 x i32>* %ptr.a, align 2
+  %load.b = load <8 x i32>* %ptr.b, align 2
+  %cmp = icmp ule <8 x i32> %load.a, %load.b
+  %sel = select <8 x i1> %cmp, <8 x i32> %load.b, <8 x i32> %load.a
+  store <8 x i32> %sel, <8 x i32>* %ptr.a, align 2
+  %index.next = add i64 %index, 8
+  %loop = icmp eq i64 %index.next, 16384
+  br i1 %loop, label %for.end, label %vector.body
+
+for.end:                                          ; preds = %vector.body
+  ret void
+
+; AVX2: test94:
+; AVX2: vpmaxud
+}
+
+define void @test95(i32* nocapture %a, i32* nocapture %b) nounwind {
+vector.ph:
+  br label %vector.body
+
+vector.body:                                      ; preds = %vector.body, %vector.ph
+  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
+  %gep.a = getelementptr inbounds i32* %a, i64 %index
+  %gep.b = getelementptr inbounds i32* %b, i64 %index
+  %ptr.a = bitcast i32* %gep.a to <8 x i32>*
+  %ptr.b = bitcast i32* %gep.b to <8 x i32>*
+  %load.a = load <8 x i32>* %ptr.a, align 2
+  %load.b = load <8 x i32>* %ptr.b, align 2
+  %cmp = icmp ugt <8 x i32> %load.a, %load.b
+  %sel = select <8 x i1> %cmp, <8 x i32> %load.b, <8 x i32> %load.a
+  store <8 x i32> %sel, <8 x i32>* %ptr.a, align 2
+  %index.next = add i64 %index, 8
+  %loop = icmp eq i64 %index.next, 16384
+  br i1 %loop, label %for.end, label %vector.body
+
+for.end:                                          ; preds = %vector.body
+  ret void
+
+; AVX2: test95:
+; AVX2: vpminud
+}
+
+define void @test96(i32* nocapture %a, i32* nocapture %b) nounwind {
+vector.ph:
+  br label %vector.body
+
+vector.body:                                      ; preds = %vector.body, %vector.ph
+  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
+  %gep.a = getelementptr inbounds i32* %a, i64 %index
+  %gep.b = getelementptr inbounds i32* %b, i64 %index
+  %ptr.a = bitcast i32* %gep.a to <8 x i32>*
+  %ptr.b = bitcast i32* %gep.b to <8 x i32>*
+  %load.a = load <8 x i32>* %ptr.a, align 2
+  %load.b = load <8 x i32>* %ptr.b, align 2
+  %cmp = icmp uge <8 x i32> %load.a, %load.b
+  %sel = select <8 x i1> %cmp, <8 x i32> %load.b, <8 x i32> %load.a
+  store <8 x i32> %sel, <8 x i32>* %ptr.a, align 2
+  %index.next = add i64 %index, 8
+  %loop = icmp eq i64 %index.next, 16384
+  br i1 %loop, label %for.end, label %vector.body
+
+for.end:                                          ; preds = %vector.body
+  ret void
+
+; AVX2: test96:
+; AVX2: vpminud
+}
diff --git a/test/CodeGen/X86/vsplit-and.ll b/test/CodeGen/X86/vsplit-and.ll
index ee98806c0f8b..3b7fdff84e3c 100644
--- a/test/CodeGen/X86/vsplit-and.ll
+++ b/test/CodeGen/X86/vsplit-and.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -mtriple=x86_64-linux |  FileCheck %s
+; RUN: llc < %s -mtriple=x86_64-linux -mcpu=penryn |  FileCheck %s
 
 define void @t0(<2 x i64>* %dst, <2 x i64> %src1, <2 x i64> %src2) nounwind readonly {
 ; CHECK: t0
diff --git a/test/CodeGen/X86/wide-fma-contraction.ll b/test/CodeGen/X86/wide-fma-contraction.ll
new file mode 100644
index 000000000000..d93f33ba0e58
--- /dev/null
+++ b/test/CodeGen/X86/wide-fma-contraction.ll
@@ -0,0 +1,20 @@
+; RUN: llc -march=x86 -mattr=+fma4 -mtriple=x86_64-apple-darwin < %s | FileCheck %s
+
+; CHECK: fmafunc
+define <16 x float> @fmafunc(<16 x float> %a, <16 x float> %b, <16 x float> %c) {
+; CHECK-NOT: vmulps
+; CHECK-NOT: vaddps
+; CHECK: vfmaddps
+; CHECK-NOT: vmulps
+; CHECK-NOT: vaddps
+; CHECK: vfmaddps
+; CHECK-NOT: vmulps
+; CHECK-NOT: vaddps
+  %ret = tail call <16 x float> @llvm.fmuladd.v16f32(<16 x float> %a, <16 x float> %b, <16 x float> %c)
+  ret <16 x float> %ret
+}
+
+declare <16 x float> @llvm.fmuladd.v16f32(<16 x float>, <16 x float>, <16 x float>) nounwind readnone
+
+
+
diff --git a/test/CodeGen/X86/win32_sret.ll b/test/CodeGen/X86/win32_sret.ll
index 878c6db99286..52b987e2be65 100644
--- a/test/CodeGen/X86/win32_sret.ll
+++ b/test/CodeGen/X86/win32_sret.ll
@@ -1,28 +1,127 @@
-; RUN: llc < %s -mtriple=i686-pc-win32 | FileCheck %s -check-prefix=WIN_X32
-; RUN: llc < %s -mtriple=i686-pc-mingw32 | FileCheck %s -check-prefix=MINGW_X32
+; RUN: llc < %s -mtriple=i686-pc-win32 | FileCheck %s -check-prefix=WIN32
+; RUN: llc < %s -mtriple=i686-pc-mingw32 | FileCheck %s -check-prefix=MINGW_X86
 ; RUN: llc < %s -mtriple=i386-pc-linux | FileCheck %s -check-prefix=LINUX
-; RUN: llc < %s -O0 -mtriple=i686-pc-win32 | FileCheck %s -check-prefix=WIN_X32
-; RUN: llc < %s -O0 -mtriple=i686-pc-mingw32 | FileCheck %s -check-prefix=MINGW_X32
+; RUN: llc < %s -O0 -mtriple=i686-pc-win32 | FileCheck %s -check-prefix=WIN32
+; RUN: llc < %s -O0 -mtriple=i686-pc-mingw32 | FileCheck %s -check-prefix=MINGW_X86
 ; RUN: llc < %s -O0 -mtriple=i386-pc-linux | FileCheck %s -check-prefix=LINUX
 
 ; The SysV ABI used by most Unixes and Mingw on x86 specifies that an sret pointer
 ; is callee-cleanup. However, in MSVC's cdecl calling convention, sret pointer
 ; arguments are caller-cleanup like normal arguments.
 
-define void @sret1(i8* sret) nounwind {
+define void @sret1(i8* sret %x) nounwind {
 entry:
-; WIN_X32:    {{ret$}}
-; MINGW_X32:  ret $4
+; WIN32:      sret1
+; WIN32:      movb $42, (%eax)
+; WIN32-NOT:  popl %eax
+; WIN32:    {{ret$}}
+
+; MINGW_X86:  sret1
+; MINGW_X86:  ret $4
+
+; LINUX:      sret1
 ; LINUX:      ret $4
+
+  store i8 42, i8* %x, align 4
   ret void
 }
 
-define void @sret2(i32* sret %x, i32 %y) nounwind {
+define void @sret2(i8* sret %x, i8 %y) nounwind {
 entry:
-; WIN_X32:    {{ret$}}
-; MINGW_X32:  ret $4
+; WIN32:      sret2
+; WIN32:      movb {{.*}}, (%eax)
+; WIN32-NOT:  popl %eax
+; WIN32:    {{ret$}}
+
+; MINGW_X86:  sret2
+; MINGW_X86:  ret $4
+
+; LINUX:      sret2
 ; LINUX:      ret $4
-  store i32 %y, i32* %x
+
+  store i8 %y, i8* %x
+  ret void
+}
+
+define void @sret3(i8* sret %x, i8* %y) nounwind {
+entry:
+; WIN32:      sret3
+; WIN32:      movb $42, (%eax)
+; WIN32-NOT:  movb $13, (%eax)
+; WIN32-NOT:  popl %eax
+; WIN32:    {{ret$}}
+
+; MINGW_X86:  sret3
+; MINGW_X86:  ret $4
+
+; LINUX:      sret3
+; LINUX:      ret $4
+
+  store i8 42, i8* %x
+  store i8 13, i8* %y
+  ret void
+}
+
+; PR15556
+%struct.S4 = type { i32, i32, i32 }
+
+define void @sret4(%struct.S4* noalias sret %agg.result) {
+entry:
+; WIN32:     sret4
+; WIN32:     movl $42, (%eax)
+; WIN32-NOT: popl %eax
+; WIN32:   {{ret$}}
+
+; MINGW_X86: sret4
+; MINGW_X86: ret $4
+
+; LINUX:     sret4
+; LINUX:     ret $4
+
+  %x = getelementptr inbounds %struct.S4* %agg.result, i32 0, i32 0
+  store i32 42, i32* %x, align 4
   ret void
 }
 
+%struct.S5 = type { i32 }
+%class.C5 = type { i8 }
+
+define x86_thiscallcc void @"\01?foo@C5@@QAE?AUS5@@XZ"(%struct.S5* noalias sret %agg.result, %class.C5* %this) {
+entry:
+  %this.addr = alloca %class.C5*, align 4
+  store %class.C5* %this, %class.C5** %this.addr, align 4
+  %this1 = load %class.C5** %this.addr
+  %x = getelementptr inbounds %struct.S5* %agg.result, i32 0, i32 0
+  store i32 42, i32* %x, align 4
+  ret void
+; WIN32:     {{^}}"?foo@C5@@QAE?AUS5@@XZ":
+
+; The address of the return structure is passed as an implicit parameter.
+; In the -O0 build, %eax is spilled at the beginning of the function, hence we
+; should match both 4(%esp) and 8(%esp).
+; WIN32:     {{[48]}}(%esp), %eax
+; WIN32:     movl $42, (%eax)
+; WIN32:     ret $4
+}
+
+define void @call_foo5() {
+entry:
+  %c = alloca %class.C5, align 1
+  %s = alloca %struct.S5, align 4
+  call x86_thiscallcc void @"\01?foo@C5@@QAE?AUS5@@XZ"(%struct.S5* sret %s, %class.C5* %c)
+; WIN32:      {{^}}_call_foo5:
+
+; Load the address of the result and put it onto stack
+; (through %ecx in the -O0 build).
+; WIN32:      leal {{[0-9]+}}(%esp), %eax
+; WIN32:      movl %eax, (%e{{[sc][px]}})
+
+; The this pointer goes to ECX.
+; FIXME: for some reason, the below checks fail on the Ubuntu Atom D2700 bot.
+; FIXME-NEXT: leal {{[0-9]+}}(%esp), %ecx
+; FIXME-NEXT: calll "?foo@C5@@QAE?AUS5@@XZ"
+
+; WIN32:      calll "?foo@C5@@QAE?AUS5@@XZ"
+; WIN32:      ret
+  ret void
+}
diff --git a/test/CodeGen/X86/win_ftol2.ll b/test/CodeGen/X86/win_ftol2.ll
index 596b4262e6b0..14591248f354 100644
--- a/test/CodeGen/X86/win_ftol2.ll
+++ b/test/CodeGen/X86/win_ftol2.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -mtriple=i686-pc-win32 | FileCheck %s -check-prefix=FTOL
+; RUN: llc < %s -mtriple=i686-pc-win32 -mcpu=generic | FileCheck %s -check-prefix=FTOL
 ; RUN: llc < %s -mtriple=i686-pc-mingw32 | FileCheck %s -check-prefix=COMPILERRT
 ; RUN: llc < %s -mtriple=i686-pc-linux | FileCheck %s -check-prefix=COMPILERRT
 ; RUN: llc < %s -mtriple=x86_64-pc-win32 | FileCheck %s -check-prefix=COMPILERRT
@@ -63,9 +63,9 @@ define i64 @double_ui64_2(double %x, double %y, double %z) nounwind {
 
   %1 = fdiv double %x, %y
   %2 = fsub double %x, %z
-  %3 = fptoui double %1 to i64
-  %4 = fptoui double %2 to i64
-  %5 = sub i64 %3, %4
+  %3 = fptoui double %2 to i64
+  %4 = fptoui double %1 to i64
+  %5 = sub i64 %4, %3
   ret i64 %5
 }
 
@@ -121,9 +121,9 @@ define {double, i64} @double_ui64_4(double %x, double %y) nounwind {
 ; FTOL_2: calll __ftol2
 ;; stack is %x
 
-  %1 = fptoui double %x to i64
-  %2 = fptoui double %y to i64
-  %3 = sub i64 %1, %2
+  %1 = fptoui double %y to i64
+  %2 = fptoui double %x to i64
+  %3 = sub i64 %2, %1
   %4 = insertvalue {double, i64} undef, double %x, 0
   %5 = insertvalue {double, i64} %4, i64 %3, 1
   ret {double, i64} %5
diff --git a/test/CodeGen/X86/x86-64-dead-stack-adjust.ll b/test/CodeGen/X86/x86-64-dead-stack-adjust.ll
index 902c9d5ae081..9c01f16f24f5 100644
--- a/test/CodeGen/X86/x86-64-dead-stack-adjust.ll
+++ b/test/CodeGen/X86/x86-64-dead-stack-adjust.ll
@@ -1,5 +1,5 @@
 ; RUN: llc < %s -mcpu=nehalem | not grep rsp
-; RUN: llc < %s -mcpu=nehalem | grep cvttsd2siq
+; RUN: llc < %s -mcpu=nehalem | grep cvttsd2si
 
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
 target triple = "x86_64-apple-darwin8"
diff --git a/test/CodeGen/X86/x86-64-ptr-arg-simple.ll b/test/CodeGen/X86/x86-64-ptr-arg-simple.ll
new file mode 100644
index 000000000000..6d466639890b
--- /dev/null
+++ b/test/CodeGen/X86/x86-64-ptr-arg-simple.ll
@@ -0,0 +1,29 @@
+; RUN: llc -mtriple=x86_64-pc-linux < %s | FileCheck %s
+; RUN: llc -mtriple=x86_64-pc-linux-gnux32 < %s | FileCheck -check-prefix=X32ABI %s
+
+; %in is kept in %esi for both ABIs. But the pointer will be passed in %edi
+; for x32, not %rdi
+
+; CHECK: movl %esi, (%rdi)
+; X32ABI: movl %esi, (%edi)
+
+define void @foo(i32* nocapture %out, i32 %in) nounwind {
+entry:
+  store i32 %in, i32* %out, align 4
+  ret void
+}
+
+; CHECK: bar
+; CHECK: movl (%rsi), %eax
+
+; Similarly here, but for loading
+; X32ABI: bar
+; X32ABI: movl (%esi), %eax
+
+define void @bar(i32* nocapture %pOut, i32* nocapture %pIn) nounwind {
+entry:
+  %0 = load i32* %pIn, align 4
+  store i32 %0, i32* %pOut, align 4
+  ret void
+}
+
diff --git a/test/CodeGen/X86/x86-64-sret-return.ll b/test/CodeGen/X86/x86-64-sret-return.ll
index 7b5f189faa0f..bc8a54346580 100644
--- a/test/CodeGen/X86/x86-64-sret-return.ll
+++ b/test/CodeGen/X86/x86-64-sret-return.ll
@@ -1,11 +1,16 @@
-; RUN: llc < %s | FileCheck %s
+; RUN: llc -mtriple=x86_64-apple-darwin8 < %s | FileCheck %s
+; RUN: llc -mtriple=x86_64-pc-linux < %s | FileCheck %s
+; RUN: llc -mtriple=x86_64-pc-linux-gnux32 < %s | FileCheck -check-prefix=X32ABI %s
 
-target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
-target triple = "x86_64-apple-darwin8"
-	%struct.foo = type { [4 x i64] }
+%struct.foo = type { [4 x i64] }
 
 ; CHECK: bar:
 ; CHECK: movq %rdi, %rax
+
+; For the x32 ABI, pointers are 32-bit so 32-bit instructions will be used
+; X32ABI: bar:
+; X32ABI: movl %edi, %eax
+
 define void @bar(%struct.foo* noalias sret  %agg.result, %struct.foo* %d) nounwind  {
 entry:
 	%d_addr = alloca %struct.foo*		; <%struct.foo**> [#uses=2]
@@ -57,6 +62,11 @@ return:		; preds = %entry
 
 ; CHECK: foo:
 ; CHECK: movq %rdi, %rax
+
+; For the x32 ABI, pointers are 32-bit so 32-bit instructions will be used
+; X32ABI: foo:
+; X32ABI: movl %edi, %eax
+
 define void @foo({ i64 }* noalias nocapture sret %agg.result) nounwind {
   store { i64 } { i64 0 }, { i64 }* %agg.result
   ret void
diff --git a/test/CodeGen/X86/xtest.ll b/test/CodeGen/X86/xtest.ll
new file mode 100644
index 000000000000..e85565edcd55
--- /dev/null
+++ b/test/CodeGen/X86/xtest.ll
@@ -0,0 +1,11 @@
+; RUN: llc < %s -march=x86-64 -mattr=+rtm | FileCheck %s
+
+declare i32 @llvm.x86.xtest() nounwind
+
+define i32 @test_xtest() nounwind uwtable {
+entry:
+  %0 = tail call i32 @llvm.x86.xtest() nounwind
+  ret i32 %0
+; CHECK: test_xtest
+; CHECK: xtest
+}
diff --git a/test/CodeGen/X86/zero-remat.ll b/test/CodeGen/X86/zero-remat.ll
index 4242530f7731..5d25a2d74971 100644
--- a/test/CodeGen/X86/zero-remat.ll
+++ b/test/CodeGen/X86/zero-remat.ll
@@ -1,3 +1,4 @@
+; REQUIRES: asserts
 ; RUN: llc < %s -march=x86-64 | FileCheck %s --check-prefix=CHECK-64
 ; RUN: llc < %s -march=x86-64 -o /dev/null -stats  -info-output-file - | grep asm-printer  | grep 12
 ; RUN: llc < %s -march=x86 | FileCheck %s --check-prefix=CHECK-32
diff --git a/test/CodeGen/XCore/2010-04-07-DbgValueOtherTargets.ll b/test/CodeGen/XCore/2010-04-07-DbgValueOtherTargets.ll
deleted file mode 100644
index 80cf3a6d678f..000000000000
--- a/test/CodeGen/XCore/2010-04-07-DbgValueOtherTargets.ll
+++ /dev/null
@@ -1,28 +0,0 @@
-; RUN: llc -O0 -march=xcore -asm-verbose < %s | FileCheck %s
-; Check that DEBUG_VALUE comments come through on a variety of targets.
-
-define i32 @main() nounwind ssp {
-entry:
-; CHECK: DEBUG_VALUE
-  call void @llvm.dbg.value(metadata !6, i64 0, metadata !7), !dbg !9
-  ret i32 0, !dbg !10
-}
-
-declare void @llvm.dbg.declare(metadata, metadata) nounwind readnone
-
-declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
-
-!llvm.dbg.sp = !{!0}
-
-!0 = metadata !{i32 589870, i32 0, metadata !1, metadata !"main", metadata !"main", metadata !"", metadata !1, i32 2, metadata !3, i1 false, i1 true, i32 0, i32 0, null, i32 0, i1 false, i32 ()* @main} ; [ DW_TAG_subprogram ]
-!1 = metadata !{i32 589865, metadata !"/tmp/x.c", metadata !"/Users/manav", metadata !2} ; [ DW_TAG_file_type ]
-!2 = metadata !{i32 589841, i32 0, i32 12, metadata !"/tmp/x.c", metadata !"/Users/manav", metadata !"clang version 2.9 (trunk 120996)", i1 true, i1 false, metadata !"", i32 0} ; [ DW_TAG_compile_unit ]
-!3 = metadata !{i32 589845, metadata !1, metadata !"", metadata !1, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !4, i32 0, null} ; [ DW_TAG_subroutine_type ]
-!4 = metadata !{metadata !5}
-!5 = metadata !{i32 589860, metadata !2, metadata !"int", metadata !1, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
-!6 = metadata !{i32 0}
-!7 = metadata !{i32 590080, metadata !8, metadata !"i", metadata !1, i32 3, metadata !5, i32 0} ; [ DW_TAG_auto_variable ]
-!8 = metadata !{i32 589835, metadata !0, i32 2, i32 12, metadata !1, i32 0} ; [ DW_TAG_lexical_block ]
-!9 = metadata !{i32 3, i32 11, metadata !8, null}
-!10 = metadata !{i32 4, i32 2, metadata !8, null}
-
diff --git a/test/CodeGen/XCore/DbgValueOtherTargets.test b/test/CodeGen/XCore/DbgValueOtherTargets.test
new file mode 100644
index 000000000000..7c2ecd0312c6
--- /dev/null
+++ b/test/CodeGen/XCore/DbgValueOtherTargets.test
@@ -0,0 +1 @@
+RUN: llc -O0 -march=xcore -asm-verbose < %S/../Inputs/DbgValueOtherTargets.ll | FileCheck %S/../Inputs/DbgValueOtherTargets.ll
diff --git a/test/CodeGen/XCore/aliases.ll b/test/CodeGen/XCore/aliases.ll
new file mode 100644
index 000000000000..d83b246a5527
--- /dev/null
+++ b/test/CodeGen/XCore/aliases.ll
@@ -0,0 +1,32 @@
+; RUN: llc < %s -march=xcore | FileCheck %s
+declare void @a_val() nounwind
+@b_val = external constant i32, section ".cp.rodata"
+@c_val = external global i32
+
+@a = alias void ()* @a_val
+@b = alias i32* @b_val
+@c = alias i32* @c_val
+
+; CHECK: a_addr:
+; CHECK: ldap r11, a
+; CHECK: retsp
+define void ()* @a_addr() nounwind {
+entry:
+  ret void ()* @a
+}
+
+; CHECK: b_addr:
+; CHECK: ldaw r11, cp[b]
+; CHECK: retsp
+define i32 *@b_addr() nounwind {
+entry:
+  ret i32* @b
+}
+
+; CHECK: c_addr:
+; CHECK: ldaw r0, dp[c]
+; CHECK: retsp
+define i32 *@c_addr() nounwind {
+entry:
+  ret i32* @c
+}
diff --git a/test/CodeGen/XCore/lit.local.cfg b/test/CodeGen/XCore/lit.local.cfg
index f8726af57f79..8756f37fe8a1 100644
--- a/test/CodeGen/XCore/lit.local.cfg
+++ b/test/CodeGen/XCore/lit.local.cfg
@@ -1,4 +1,4 @@
-config.suffixes = ['.ll', '.c', '.cpp']
+config.suffixes = ['.ll', '.c', '.cpp', '.test']
 
 targets = set(config.root.targets_to_build.split())
 if not 'XCore' in targets: