476 files changed, 14320 insertions, 3439 deletions
diff --git a/test/CodeGen/ARM/2007-01-19-InfiniteLoop.ll b/test/CodeGen/ARM/2007-01-19-InfiniteLoop.ll
index ee63656b26d3..3694aaad5549 100644
--- a/test/CodeGen/ARM/2007-01-19-InfiniteLoop.ll
+++ b/test/CodeGen/ARM/2007-01-19-InfiniteLoop.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=arm -mattr=+v6,+vfp2 | FileCheck %s
+; RUN: llc < %s -mtriple=arm-apple-darwin -mattr=+v6,+vfp2 | FileCheck %s
 
 @quant_coef = external global [6 x [4 x [4 x i32]]]		; <[6 x [4 x [4 x i32]]]*> [#uses=1]
 @dequant_coef = external global [6 x [4 x [4 x i32]]]		; <[6 x [4 x [4 x i32]]]*> [#uses=1]
@@ -8,8 +8,9 @@
 define fastcc i32 @dct_luma_sp(i32 %block_x, i32 %block_y, i32* %coeff_cost) {
 entry:
 ; Make sure to use base-updating stores for saving callee-saved registers.
+; CHECK: push
 ; CHECK-NOT: sub sp
-; CHECK: vstmdb sp!
+; CHECK: push 
 	%predicted_block = alloca [4 x [4 x i32]], align 4		; <[4 x [4 x i32]]*> [#uses=1]
 	br label %cond_next489
 
diff --git a/test/CodeGen/ARM/2009-09-28-LdStOptiBug.ll b/test/CodeGen/ARM/2009-09-28-LdStOptiBug.ll
index 4aa879dc4092..0fe3b39a622d 100644
--- a/test/CodeGen/ARM/2009-09-28-LdStOptiBug.ll
+++ b/test/CodeGen/ARM/2009-09-28-LdStOptiBug.ll
@@ -5,7 +5,7 @@
 
 define void @foo(%0* noalias nocapture sret %agg.result, double %x.0, double %y.0) nounwind {
 ; CHECK: foo:
-; CHECK: bl __adddf3
+; CHECK: bl __aeabi_dadd
 ; CHECK-NOT: strd
 ; CHECK: mov
   %x76 = fmul double %y.0, 0.000000e+00           ; <double> [#uses=1]
diff --git a/test/CodeGen/ARM/2009-11-02-NegativeLane.ll b/test/CodeGen/ARM/2009-11-02-NegativeLane.ll
index 89c9037bd9f6..ca5ae8b62e8b 100644
--- a/test/CodeGen/ARM/2009-11-02-NegativeLane.ll
+++ b/test/CodeGen/ARM/2009-11-02-NegativeLane.ll
@@ -1,4 +1,4 @@
-; RUN: llc -mcpu=cortex-a8 < %s | grep vdup.16
+; RUN: llc -mcpu=cortex-a8 < %s | FileCheck %s
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64"
 target triple = "armv7-eabi"
 
@@ -7,6 +7,7 @@ entry:
   br i1 undef, label %return, label %bb
 
 bb:                                               ; preds = %bb, %entry
+; CHECK: vld1.16 {d16[], d17[]}
   %0 = load i16* undef, align 2
   %1 = insertelement <8 x i16> undef, i16 %0, i32 2
   %2 = insertelement <8 x i16> %1, i16 undef, i32 3
diff --git a/test/CodeGen/ARM/2010-03-18-ldm-rtrn.ll b/test/CodeGen/ARM/2010-03-18-ldm-rtrn.ll
index 31525eff4461..d9e1a1486a3c 100644
--- a/test/CodeGen/ARM/2010-03-18-ldm-rtrn.ll
+++ b/test/CodeGen/ARM/2010-03-18-ldm-rtrn.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -mtriple=armv4-unknown-eabi | FileCheck %s
+; RUN: llc < %s -mtriple=armv4-unknown-eabi | FileCheck %s -check-prefix=V4
 ; RUN: llc < %s -mtriple=armv5-unknown-eabi | FileCheck %s
 ; RUN: llc < %s -mtriple=armv6-unknown-eabi | FileCheck %s
 
@@ -7,6 +7,8 @@ entry:
   %0 = tail call i32 @foo(i32 %a) nounwind ; <i32> [#uses=1]
   %1 = add nsw i32 %0, 3                          ; <i32> [#uses=1]
 ; CHECK: ldmia	sp!, {r11, pc}
+; V4: pop
+; V4-NEXT: mov pc, lr
   ret i32 %1
 }
 
diff --git a/test/CodeGen/ARM/2010-04-07-DbgValueOtherTargets.ll b/test/CodeGen/ARM/2010-04-07-DbgValueOtherTargets.ll
index 8a24cfa39785..642268992062 100644
--- a/test/CodeGen/ARM/2010-04-07-DbgValueOtherTargets.ll
+++ b/test/CodeGen/ARM/2010-04-07-DbgValueOtherTargets.ll
@@ -1,33 +1,28 @@
 ; RUN: llc -O0 -march=arm -asm-verbose < %s | FileCheck %s
 ; Check that DEBUG_VALUE comments come through on a variety of targets.
 
-%tart.reflect.ComplexType = type { double, double }
-
-@.type.SwitchStmtTest = constant %tart.reflect.ComplexType { double 3.0, double 2.0 }
-
-define i32 @"main(tart.core.String[])->int32"(i32 %args) {
+define i32 @main() nounwind ssp {
 entry:
 ; CHECK: DEBUG_VALUE
-  tail call void @llvm.dbg.value(metadata !14, i64 0, metadata !8)
-  tail call void @"tart.reflect.ComplexType.create->tart.core.Object"(%tart.reflect.ComplexType* @.type.SwitchStmtTest) ; <%tart.core.Object*> [#uses=2]
-  ret i32 3
+  call void @llvm.dbg.value(metadata !6, i64 0, metadata !7), !dbg !9
+  ret i32 0, !dbg !10
 }
 
+declare void @llvm.dbg.declare(metadata, metadata) nounwind readnone
+
 declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
-declare void @"tart.reflect.ComplexType.create->tart.core.Object"(%tart.reflect.ComplexType*) nounwind readnone
 
-!0 = metadata !{i32 458769, i32 0, i32 1, metadata !"sm.c", metadata !"/Volumes/MacOS9/tests/", metadata !"4.2.1 (Based on Apple Inc. build 5658) (LLVM build)", i1 true, i1 false, metadata !"", i32 0} ; [ DW_TAG_compile_unit ]
-!1 = metadata !{i32 458790, metadata !0, metadata !"", metadata !0, i32 0, i64 192, i64 64, i64 0, i32 0, metadata !2} ; [ DW_TAG_const_type ]
-!2 = metadata !{i32 458771, metadata !0, metadata !"C", metadata !0, i32 1, i64 192, i64 64, i64 0, i32 0, null, metadata !3, i32 0, null} ; [ DW_TAG_structure_type ]
-!3 = metadata !{metadata !4, metadata !6, metadata !7}
-!4 = metadata !{i32 458765, metadata !2, metadata !"x", metadata !0, i32 1, i64 64, i64 64, i64 0, i32 0, metadata !5} ; [ DW_TAG_member ]
-!5 = metadata !{i32 458788, metadata !0, metadata !"double", metadata !0, i32 0, i64 64, i64 64, i64 0, i32 0, i32 4} ; [ DW_TAG_base_type ]
-!6 = metadata !{i32 458765, metadata !2, metadata !"y", metadata !0, i32 1, i64 64, i64 64, i64 64, i32 0, metadata !5} ; [ DW_TAG_member ]
-!7 = metadata !{i32 458765, metadata !2, metadata !"z", metadata !0, i32 1, i64 64, i64 64, i64 128, i32 0, metadata !5} ; [ DW_TAG_member ]
-!8 = metadata !{i32 459008, metadata !9, metadata !"t", metadata !0, i32 5, metadata !2} ; [ DW_TAG_auto_variable ]
-!9 = metadata !{i32 458763, metadata !10}        ; [ DW_TAG_lexical_block ]
-!10 = metadata !{i32 458798, i32 0, metadata !0, metadata !"foo", metadata !"foo", metadata !"foo", metadata !0, i32 4, metadata !11, i1 false, i1 true, i32 0, i32 0, null} ; [ DW_TAG_subprogram ]
-!11 = metadata !{i32 458773, metadata !0, metadata !"", metadata !0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !12, i32 0, null} ; [ DW_TAG_subroutine_type ]
-!12 = metadata !{metadata !13}
-!13 = metadata !{i32 458788, metadata !0, metadata !"int", metadata !0, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
-!14 = metadata !{%tart.reflect.ComplexType* @.type.SwitchStmtTest}
+!llvm.dbg.sp = !{!0}
+
+!0 = metadata !{i32 589870, i32 0, metadata !1, metadata !"main", metadata !"main", metadata !"", metadata !1, i32 2, metadata !3, i1 false, i1 true, i32 0, i32 0, null, i32 0, i1 false, i32 ()* @main} ; [ DW_TAG_subprogram ]
+!1 = metadata !{i32 589865, metadata !"/tmp/x.c", metadata !"/Users/manav", metadata !2} ; [ DW_TAG_file_type ]
+!2 = metadata !{i32 589841, i32 0, i32 12, metadata !"/tmp/x.c", metadata !"/Users/manav", metadata !"clang version 2.9 (trunk 120996)", i1 true, i1 false, metadata !"", i32 0} ; [ DW_TAG_compile_unit ]
+!3 = metadata !{i32 589845, metadata !1, metadata !"", metadata !1, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !4, i32 0, null} ; [ DW_TAG_subroutine_type ]
+!4 = metadata !{metadata !5}
+!5 = metadata !{i32 589860, metadata !2, metadata !"int", metadata !1, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
+!6 = metadata !{i32 0}
+!7 = metadata !{i32 590080, metadata !8, metadata !"i", metadata !1, i32 3, metadata !5, i32 0} ; [ DW_TAG_auto_variable ]
+!8 = metadata !{i32 589835, metadata !0, i32 2, i32 12, metadata !1, i32 0} ; [ DW_TAG_lexical_block ]
+!9 = metadata !{i32 3, i32 11, metadata !8, null}
+!10 = metadata !{i32 4, i32 2, metadata !8, null}
+
diff --git a/test/CodeGen/ARM/2010-05-17-DAGCombineAssert.ll b/test/CodeGen/ARM/2010-05-17-DAGCombineAssert.ll
deleted file mode 100644
index 2a4bbd1d8cc6..000000000000
--- a/test/CodeGen/ARM/2010-05-17-DAGCombineAssert.ll
+++ /dev/null
@@ -1,17 +0,0 @@
-; RUN: llc < %s -mtriple=armv7-eabi -mcpu=cortex-a8
-; PR7158
-
-define arm_aapcs_vfpcc i32 @main() nounwind {
-bb.nph55.bb.nph55.split_crit_edge:
-  br label %bb3
-
-bb3:                                              ; preds = %bb3, %bb.nph55.bb.nph55.split_crit_edge
-  br i1 undef, label %bb.i19, label %bb3
-
-bb.i19:                                           ; preds = %bb.i19, %bb3
-  %0 = insertelement <4 x float> undef, float undef, i32 3 ; <<4 x float>> [#uses=3]
-  %1 = fmul <4 x float> %0, %0                    ; <<4 x float>> [#uses=1]
-  %2 = bitcast <4 x float> %1 to <2 x double>     ; <<2 x double>> [#uses=0]
-  %3 = fmul <4 x float> %0, undef                 ; <<4 x float>> [#uses=0]
-  br label %bb.i19
-}
diff --git a/test/CodeGen/ARM/2010-06-28-DAGCombineUndef.ll b/test/CodeGen/ARM/2010-06-28-DAGCombineUndef.ll
deleted file mode 100644
index ad2810b5bb9a..000000000000
--- a/test/CodeGen/ARM/2010-06-28-DAGCombineUndef.ll
+++ /dev/null
@@ -1,10 +0,0 @@
-; RUN: llc < %s -march=arm -mattr=+neon
-
-define void @main() nounwind {
-entry:
-  store <2 x i64> undef, <2 x i64>* undef, align 16
-  %0 = load <16 x i8>* undef, align 16            ; <<16 x i8>> [#uses=1]
-  %1 = or <16 x i8> zeroinitializer, %0           ; <<16 x i8>> [#uses=1]
-  store <16 x i8> %1, <16 x i8>* undef, align 16
-  ret void
-}
diff --git a/test/CodeGen/ARM/2010-06-29-PartialRedefFastAlloc.ll b/test/CodeGen/ARM/2010-06-29-PartialRedefFastAlloc.ll
index ffc47ebdf196..b9d5600d2ad8 100644
--- a/test/CodeGen/ARM/2010-06-29-PartialRedefFastAlloc.ll
+++ b/test/CodeGen/ARM/2010-06-29-PartialRedefFastAlloc.ll
@@ -10,9 +10,9 @@ target triple = "thumbv7-apple-darwin10"
 ; %reg1028 gets allocated %Q0, and if %reg1030 is reloaded for the partial
 ; redef, it cannot also get %Q0.
 
-; CHECK: vld1.64 {d0, d1}, [r{{.}}]
-; CHECK-NOT: vld1.64 {d0, d1}
-; CHECK: vmov.f64 d3, d0
+; CHECK: vld1.64 {d16, d17}, [r{{.}}]
+; CHECK-NOT: vld1.64 {d16, d17}
+; CHECK: vmov.f64 d19, d16
 
 define i32 @test(i8* %arg) nounwind {
 entry:
diff --git a/test/CodeGen/ARM/2010-09-21-OptCmpBug.ll b/test/CodeGen/ARM/2010-09-21-OptCmpBug.ll
new file mode 100644
index 000000000000..d2820918626a
--- /dev/null
+++ b/test/CodeGen/ARM/2010-09-21-OptCmpBug.ll
@@ -0,0 +1,84 @@
+; RUN: llc < %s -mtriple=thumbv7-apple-darwin10
+
+declare noalias i8* @malloc(i32) nounwind
+
+define internal void @gl_DrawPixels(i32 %width, i32 %height, i32 %format, i32 %type, i8* %pixels) nounwind {
+entry:
+  br i1 undef, label %bb3.i, label %bb3
+
+bb3.i:                                            ; preds = %entry
+  unreachable
+
+gl_error.exit:                                    ; preds = %bb22
+  ret void
+
+bb3:                                              ; preds = %entry
+  br i1 false, label %bb5, label %bb4
+
+bb4:                                              ; preds = %bb3
+  br label %bb5
+
+bb5:                                              ; preds = %bb4, %bb3
+  br i1 undef, label %bb19, label %bb22
+
+bb19:                                             ; preds = %bb5
+  switch i32 %type, label %bb3.i6.i [
+    i32 5120, label %bb1.i13
+    i32 5121, label %bb1.i13
+    i32 6656, label %bb9.i.i6
+  ]
+
+bb9.i.i6:                                         ; preds = %bb19
+  br label %bb1.i13
+
+bb3.i6.i:                                         ; preds = %bb19
+  unreachable
+
+bb1.i13:                                          ; preds = %bb9.i.i6, %bb19, %bb19
+  br i1 undef, label %bb3.i17, label %bb2.i16
+
+bb2.i16:                                          ; preds = %bb1.i13
+  unreachable
+
+bb3.i17:                                          ; preds = %bb1.i13
+  br i1 undef, label %bb4.i18, label %bb23.i
+
+bb4.i18:                                          ; preds = %bb3.i17
+  %0 = mul nsw i32 %height, %width
+  %1 = and i32 %0, 7
+  %not..i = icmp ne i32 %1, 0
+  %2 = zext i1 %not..i to i32
+  %storemerge2.i = add i32 0, %2
+  %3 = call noalias i8* @malloc(i32 %storemerge2.i) nounwind
+  br i1 undef, label %bb3.i9, label %bb9.i
+
+bb9.i:                                            ; preds = %bb4.i18
+  br i1 undef, label %bb13.i19, label %bb.i24.i
+
+bb13.i19:                                         ; preds = %bb9.i
+  br i1 undef, label %bb14.i20, label %bb15.i
+
+bb14.i20:                                         ; preds = %bb13.i19
+  unreachable
+
+bb15.i:                                           ; preds = %bb13.i19
+  unreachable
+
+bb.i24.i:                                         ; preds = %bb.i24.i, %bb9.i
+  %storemerge1.i21.i = phi i32 [ %4, %bb.i24.i ], [ 0, %bb9.i ]
+  %4 = add i32 %storemerge1.i21.i, 1
+  %exitcond47.i = icmp eq i32 %4, %storemerge2.i
+  br i1 %exitcond47.i, label %bb22, label %bb.i24.i
+
+bb23.i:                                           ; preds = %bb3.i17
+  unreachable
+
+bb3.i9:                                           ; preds = %bb4.i18
+  unreachable
+
+bb22:                                             ; preds = %bb.i24.i, %bb5
+  br i1 undef, label %gl_error.exit, label %bb23
+
+bb23:                                             ; preds = %bb22
+  ret void
+}
diff --git a/test/CodeGen/ARM/2010-09-29-mc-asm-header-test.ll b/test/CodeGen/ARM/2010-09-29-mc-asm-header-test.ll
new file mode 100644
index 000000000000..bda14bcb1520
--- /dev/null
+++ b/test/CodeGen/ARM/2010-09-29-mc-asm-header-test.ll
@@ -0,0 +1,13 @@
+; RUN: llc < %s -mtriple=arm-linux-gnueabi | FileCheck %s
+; This tests that MC/asm header conversion is smooth
+;
+; CHECK:      .syntax unified
+; CHECK: .eabi_attribute 20, 1
+; CHECK: .eabi_attribute 21, 1
+; CHECK: .eabi_attribute 23, 3
+; CHECK: .eabi_attribute 24, 1
+; CHECK: .eabi_attribute 25, 1
+
+define i32 @f(i64 %z) {
+	ret i32 0
+}
diff --git a/test/CodeGen/ARM/2010-10-19-mc-elf-objheader.ll b/test/CodeGen/ARM/2010-10-19-mc-elf-objheader.ll
new file mode 100644
index 000000000000..ee443febcc1e
--- /dev/null
+++ b/test/CodeGen/ARM/2010-10-19-mc-elf-objheader.ll
@@ -0,0 +1,37 @@
+; RUN: llc  %s -mtriple=arm-linux-gnueabi -filetype=obj -o - | \
+; RUN:    elf-dump --dump-section-data | FileCheck  -check-prefix=BASIC %s 
+; RUN: llc  %s -mtriple=armv7-linux-gnueabi -march=arm -mcpu=cortex-a8 \
+; RUN:    -mattr=-neon -mattr=+vfp2 \
+; RUN:    -arm-reserve-r9 -filetype=obj -o - | \
+; RUN:    elf-dump --dump-section-data | FileCheck  -check-prefix=CORTEXA8 %s
+
+
+; This tests that the extpected ARM attributes are emitted.
+;
+; BASIC:        .ARM.attributes
+; BASIC-NEXT:         0x70000003
+; BASIC-NEXT:         0x00000000
+; BASIC-NEXT:         0x00000000
+; BASIC-NEXT:         0x0000003c
+; BASIC-NEXT:         0x00000020
+; BASIC-NEXT:         0x00000000
+; BASIC-NEXT:         0x00000000
+; BASIC-NEXT:         0x00000001
+; BASIC-NEXT:         0x00000000
+; BASIC-NEXT:         '411f0000 00616561 62690001 15000000 06020801 09011401 15011703 18011901'
+
+; CORTEXA8:        .ARM.attributes
+; CORTEXA8-NEXT:         0x70000003
+; CORTEXA8-NEXT:         0x00000000
+; CORTEXA8-NEXT:         0x00000000
+; CORTEXA8-NEXT:         0x0000003c
+; CORTEXA8-NEXT:         0x0000002f
+; CORTEXA8-NEXT:         0x00000000
+; CORTEXA8-NEXT:         0x00000000
+; CORTEXA8-NEXT:         0x00000001
+; CORTEXA8-NEXT:         0x00000000
+; CORTEXA8-NEXT:         '412e0000 00616561 62690001 24000000 05434f52 5445582d 41380006 0a074108 0109020a 02140115 01170318 011901'
+
+define i32 @f(i64 %z) {
+       ret i32 0
+}
diff --git a/test/CodeGen/ARM/2010-10-25-ifcvt-ldm.ll b/test/CodeGen/ARM/2010-10-25-ifcvt-ldm.ll
new file mode 100644
index 000000000000..163c9b030ec8
--- /dev/null
+++ b/test/CodeGen/ARM/2010-10-25-ifcvt-ldm.ll
@@ -0,0 +1,31 @@
+; RUN: llc < %s -mtriple=armv6-apple-darwin -mcpu=arm1136jf-s | FileCheck %s
+; Radar 8589805: Counting the number of microcoded operations, such as for an
+; LDM instruction, was causing an assertion failure because the microop count
+; was being treated as an instruction count.
+
+; CHECK: push
+; CHECK: ldmia
+; CHECK: ldmia
+; CHECK: ldmia
+
+define i32 @test(i32 %x) {
+entry:
+  %0 = tail call signext i16 undef(i32* undef)
+  switch i32 undef, label %bb3 [
+    i32 0, label %bb4
+    i32 1, label %bb1
+    i32 2, label %bb2
+  ]
+
+bb1:
+  ret i32 1
+
+bb2:
+  ret i32 2
+
+bb3:
+  ret i32 1
+
+bb4:
+  ret i32 3
+}
diff --git a/test/CodeGen/ARM/2010-11-15-SpillEarlyClobber.ll b/test/CodeGen/ARM/2010-11-15-SpillEarlyClobber.ll
new file mode 100644
index 000000000000..04220949027f
--- /dev/null
+++ b/test/CodeGen/ARM/2010-11-15-SpillEarlyClobber.ll
@@ -0,0 +1,85 @@
+; RUN: llc < %s -verify-machineinstrs -spiller=standard
+; RUN: llc < %s -verify-machineinstrs -spiller=inline
+; PR8612
+;
+; This test has an inline asm with early-clobber arguments.
+; It is big enough that one of the early clobber registers is spilled.
+;
+; All the spillers would get the live ranges wrong when spilling an early
+; clobber, allowing the undef register to be allocated to the same register as
+; the early clobber.
+;
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:64:128-a0:0:64-n32"
+target triple = "armv7-eabi"
+
+%0 = type { i32, i32 }
+
+define void @foo(i32* %in) nounwind {
+entry:
+  br label %bb.i
+
+bb.i:                                             ; preds = %bb.i, %entry
+  br i1 undef, label %bb10.preheader.i, label %bb.i
+
+bb10.preheader.i:                                 ; preds = %bb.i
+  br label %bb10.i
+
+bb10.i:                                           ; preds = %bb10.i, %bb10.preheader.i
+  br i1 undef, label %bb27.i, label %bb10.i
+
+bb27.i:                                           ; preds = %bb10.i
+  br label %bb28.i
+
+bb28.i:                                           ; preds = %bb28.i, %bb27.i
+  br i1 undef, label %presymmetry.exit, label %bb28.i
+
+presymmetry.exit:                                 ; preds = %bb28.i
+  %tmp175387 = or i32 undef, 12
+  %scevgep101.i = getelementptr i32* %in, i32 undef
+  %tmp189401 = or i32 undef, 7
+  %scevgep97.i = getelementptr i32* %in, i32 undef
+  %tmp198410 = or i32 undef, 1
+  %scevgep.i48 = getelementptr i32* %in, i32 undef
+  %0 = load i32* %scevgep.i48, align 4
+  %1 = add nsw i32 %0, 0
+  store i32 %1, i32* undef, align 4
+  %asmtmp.i.i33.i.i.i = tail call %0 asm "smull\09$0, $1, $2, $3", "=&r,=&r,%r,r,~{cc}"(i32 undef, i32 1518500250) nounwind
+  %asmresult1.i.i34.i.i.i = extractvalue %0 %asmtmp.i.i33.i.i.i, 1
+  %2 = shl i32 %asmresult1.i.i34.i.i.i, 1
+  %3 = load i32* null, align 4
+  %4 = load i32* undef, align 4
+  %5 = sub nsw i32 %3, %4
+  %6 = load i32* undef, align 4
+  %7 = load i32* null, align 4
+  %8 = sub nsw i32 %6, %7
+  %9 = load i32* %scevgep97.i, align 4
+  %10 = load i32* undef, align 4
+  %11 = sub nsw i32 %9, %10
+  %12 = load i32* null, align 4
+  %13 = load i32* %scevgep101.i, align 4
+  %14 = sub nsw i32 %12, %13
+  %15 = load i32* %scevgep.i48, align 4
+  %16 = load i32* null, align 4
+  %17 = add nsw i32 %16, %15
+  %18 = sub nsw i32 %15, %16
+  %19 = load i32* undef, align 4
+  %20 = add nsw i32 %19, %2
+  %21 = sub nsw i32 %19, %2
+  %22 = add nsw i32 %14, %5
+  %23 = sub nsw i32 %5, %14
+  %24 = add nsw i32 %11, %8
+  %25 = sub nsw i32 %8, %11
+  %26 = add nsw i32 %21, %23
+  store i32 %26, i32* %scevgep.i48, align 4
+  %27 = sub nsw i32 %25, %18
+  store i32 %27, i32* null, align 4
+  %28 = sub nsw i32 %23, %21
+  store i32 %28, i32* undef, align 4
+  %29 = add nsw i32 %18, %25
+  store i32 %29, i32* undef, align 4
+  %30 = add nsw i32 %17, %22
+  store i32 %30, i32* %scevgep101.i, align 4
+  %31 = add nsw i32 %20, %24
+  store i32 %31, i32* null, align 4
+  unreachable
+}
diff --git a/test/CodeGen/ARM/2010-11-29-PrologueBug.ll b/test/CodeGen/ARM/2010-11-29-PrologueBug.ll
new file mode 100644
index 000000000000..8d7541feae94
--- /dev/null
+++ b/test/CodeGen/ARM/2010-11-29-PrologueBug.ll
@@ -0,0 +1,28 @@
+; RUN: llc < %s -mtriple=armv7-apple-darwin   | FileCheck %s --check-prefix=ARM
+; RUN: llc < %s -mtriple=thumbv7-apple-darwin | FileCheck %s --check-prefix=THUMB2
+; rdar://8690640
+
+define i32* @t(i32* %x) nounwind {
+entry:
+; ARM: t:
+; ARM: push
+; ARM: mov r7, sp
+; ARM: bl _foo
+; ARM: bl _foo
+; ARM: bl _foo
+; ARM: ldmia sp!, {r7, pc}
+
+; THUMB2: t:
+; THUMB2: push
+; THUMB2: mov r7, sp
+; THUMB2: blx _foo
+; THUMB2: blx _foo
+; THUMB2: blx _foo
+; THUMB2: pop
+  %0 = tail call i32* @foo(i32* %x) nounwind
+  %1 = tail call i32* @foo(i32* %0) nounwind
+  %2 = tail call i32* @foo(i32* %1) nounwind
+  ret i32* %2
+}
+
+declare i32* @foo(i32*)
diff --git a/test/CodeGen/ARM/2010-11-30-reloc-movt.ll b/test/CodeGen/ARM/2010-11-30-reloc-movt.ll
new file mode 100644
index 000000000000..930cd8d41563
--- /dev/null
+++ b/test/CodeGen/ARM/2010-11-30-reloc-movt.ll
@@ -0,0 +1,42 @@
+; RUN: llc  %s -mtriple=armv7-linux-gnueabi -filetype=obj -o - | \
+; RUN:    elf-dump --dump-section-data | FileCheck  -check-prefix=OBJ %s
+
+target triple = "armv7-none-linux-gnueabi"
+
+@a = external global i8
+
+define arm_aapcs_vfpcc i32 @barf() nounwind {
+entry:
+  %0 = tail call arm_aapcs_vfpcc  i32 @foo(i8* @a) nounwind
+  ret i32 %0
+; OBJ:         '.text'
+; OBJ-NEXT:    'sh_type'
+; OBJ-NEXT:    'sh_flags'
+; OBJ-NEXT:    'sh_addr'
+; OBJ-NEXT:    'sh_offset'
+; OBJ-NEXT:    'sh_size'
+; OBJ-NEXT:    'sh_link'
+; OBJ-NEXT:    'sh_info'
+; OBJ-NEXT:    'sh_addralign'
+; OBJ-NEXT:    'sh_entsize'
+; OBJ-NEXT:    '_section_data', '00482de9 000000e3 000040e3 feffffeb 0088bde8'
+
+; OBJ:            Relocation 0x00000000
+; OBJ-NEXT:       'r_offset', 0x00000004
+; OBJ-NEXT:       'r_sym', 0x00000007
+; OBJ-NEXT:        'r_type', 0x0000002b
+
+; OBJ:          Relocation 0x00000001
+; OBJ-NEXT:       'r_offset', 0x00000008
+; OBJ-NEXT:       'r_sym'
+; OBJ-NEXT:        'r_type', 0x0000002c
+
+; OBJ:          # Relocation 0x00000002
+; OBJ-NEXT:       'r_offset', 0x0000000c
+; OBJ-NEXT:       'r_sym', 0x00000008
+; OBJ-NEXT:       'r_type', 0x0000001c
+
+}
+
+declare arm_aapcs_vfpcc i32 @foo(i8*)
+
diff --git a/test/CodeGen/ARM/2010-12-07-PEIBug.ll b/test/CodeGen/ARM/2010-12-07-PEIBug.ll
new file mode 100644
index 000000000000..c65952be3c64
--- /dev/null
+++ b/test/CodeGen/ARM/2010-12-07-PEIBug.ll
@@ -0,0 +1,40 @@
+; RUN: llc < %s -mtriple=thumbv7-apple-darwin10 -mcpu=cortex-a8 | FileCheck %s
+; rdar://8728956
+
+define hidden void @foo() nounwind ssp {
+entry:
+; CHECK: foo:
+; CHECK: push {r7, lr}
+; CHECK-NEXT: mov r7, sp
+; CHECK-NEXT: vpush {d8}
+; CHECK-NEXT: vpush {d10, d11}
+  %tmp40 = load <4 x i8>* undef
+  %tmp41 = extractelement <4 x i8> %tmp40, i32 2
+  %conv42 = zext i8 %tmp41 to i32
+  %conv43 = sitofp i32 %conv42 to float
+  %div44 = fdiv float %conv43, 2.560000e+02
+  %vecinit45 = insertelement <4 x float> undef, float %div44, i32 2
+  %vecinit46 = insertelement <4 x float> %vecinit45, float 1.000000e+00, i32 3
+  store <4 x float> %vecinit46, <4 x float>* undef
+  br i1 undef, label %if.then105, label %if.else109
+
+if.then105:                                       ; preds = %entry
+  br label %if.end114
+
+if.else109:                                       ; preds = %entry
+  br label %if.end114
+
+if.end114:                                        ; preds = %if.else109, %if.then105
+  %call185 = call float @bar()
+  %vecinit186 = insertelement <4 x float> undef, float %call185, i32 1
+  %call189 = call float @bar()
+  %vecinit190 = insertelement <4 x float> %vecinit186, float %call189, i32 2
+  %vecinit191 = insertelement <4 x float> %vecinit190, float 1.000000e+00, i32 3
+  store <4 x float> %vecinit191, <4 x float>* undef
+; CHECK: vpop {d10, d11}
+; CHECK-NEXT: vpop {d8}
+; CHECK-NEXT: pop {r7, pc}
+  ret void
+}
+
+declare hidden float @bar() nounwind readnone ssp
diff --git a/test/CodeGen/ARM/2010-12-08-tpsoft.ll b/test/CodeGen/ARM/2010-12-08-tpsoft.ll
new file mode 100644
index 000000000000..b8ed8199d398
--- /dev/null
+++ b/test/CodeGen/ARM/2010-12-08-tpsoft.ll
@@ -0,0 +1,52 @@
+; RUN: llc  %s -mtriple=armv7-linux-gnueabi -o - | \
+; RUN:    FileCheck  -check-prefix=ELFASM %s 
+; RUN: llc  %s -mtriple=armv7-linux-gnueabi -filetype=obj -o - | \
+; RUN:    elf-dump --dump-section-data | FileCheck  -check-prefix=ELFOBJ %s
+
+;; Make sure that bl __aeabi_read_tp is materiazlied and fixed up correctly
+;; in the obj case. 
+
+@i = external thread_local global i32
+@a = external global i8
+@b = external global [10 x i8]
+
+define arm_aapcs_vfpcc i32 @main() nounwind {
+entry:
+  %0 = load i32* @i, align 4
+  switch i32 %0, label %bb2 [
+    i32 12, label %bb
+    i32 13, label %bb1
+  ]
+
+bb:                                               ; preds = %entry
+  %1 = tail call arm_aapcs_vfpcc  i32 @foo(i8* @a) nounwind
+  ret i32 %1
+; ELFASM:       	bl	__aeabi_read_tp
+
+
+; ELFOBJ:   '.text'
+; ELFOBJ-NEXT:  'sh_type'
+; ELFOBJ-NEXT:  'sh_flags'
+; ELFOBJ-NEXT:  'sh_addr'
+; ELFOBJ-NEXT:  'sh_offset'
+; ELFOBJ-NEXT:  'sh_size'
+; ELFOBJ-NEXT:  'sh_link'
+; ELFOBJ-NEXT:  'sh_info'
+; ELFOBJ-NEXT:  'sh_addralign'
+; ELFOBJ-NEXT:  'sh_entsize'
+;;;               BL __aeabi_read_tp is ---+
+;;;                                        V
+; ELFOBJ-NEXT:  00482de9 3c009fe5 00109fe7 feffffeb
+
+
+bb1:                                              ; preds = %entry
+  %2 = tail call arm_aapcs_vfpcc  i32 @bar(i32* bitcast ([10 x i8]* @b to i32*)) nounwind
+  ret i32 %2
+
+bb2:                                              ; preds = %entry
+  ret i32 -1
+}
+
+declare arm_aapcs_vfpcc i32 @foo(i8*)
+
+declare arm_aapcs_vfpcc i32 @bar(i32*)
diff --git a/test/CodeGen/ARM/2010-12-13-reloc-pic.ll b/test/CodeGen/ARM/2010-12-13-reloc-pic.ll
new file mode 100644
index 000000000000..d5aefbee197c
--- /dev/null
+++ b/test/CodeGen/ARM/2010-12-13-reloc-pic.ll
@@ -0,0 +1,100 @@
+; RUN: llc  %s -mtriple=armv7-linux-gnueabi -relocation-model=pic -filetype=obj -o - | \
+; RUN:    elf-dump --dump-section-data | FileCheck  -check-prefix=PIC01 %s
+
+;; FIXME: Reduce this test further, or even better,
+;; redo as .s -> .o test once ARM AsmParser is working better
+
+; ModuleID = 'large2.pnacl.bc'
+target triple = "armv7-none-linux-gnueabi"
+
+%struct._Bigint = type { %struct._Bigint*, i32, i32, i32, i32, [1 x i32] }
+%struct.__FILE = type { i8*, i32, i32, i16, i16, %struct.__sbuf, i32, %struct._reent*, i8*, i32 (%struct._reent*, i8*, i8*, i32)*, i32 (%struct._reent*, i8*, i8*, i32)*, i32 (%struct._reent*, i8*, i32, i32)*, i32 (%struct._reent*, i8*)*, %struct.__sbuf, i8*, i32, [3 x i8], [1 x i8], %struct.__sbuf, i32, i32, %struct._flock_t, %struct._mbstate_t, i32 }
+%struct.__sbuf = type { i8*, i32 }
+%struct.__tm = type { i32, i32, i32, i32, i32, i32, i32, i32, i32 }
+%struct._atexit = type { %struct._atexit*, i32, [32 x void ()*], %struct._on_exit_args* }
+%struct._flock_t = type { i32, i32, i32, i32, i32 }
+%struct._glue = type { %struct._glue*, i32, %struct.__FILE* }
+%struct._mbstate_t = type { i32, %union.anon }
+%struct._misc_reent = type { i8*, %struct._mbstate_t, %struct._mbstate_t, %struct._mbstate_t, [8 x i8], i32, %struct._mbstate_t, %struct._mbstate_t, %struct._mbstate_t, %struct._mbstate_t, %struct._mbstate_t }
+%struct._mprec = type { %struct._Bigint*, i32, %struct._Bigint*, %struct._Bigint** }
+%struct._on_exit_args = type { [32 x i8*], [32 x i8*], i32, i32 }
+%struct._rand48 = type { [3 x i16], [3 x i16], i16, i64 }
+%struct._reent = type { %struct.__FILE*, %struct.__FILE*, %struct.__FILE*, i32, i32, i8*, i32, i32, i8*, %struct._mprec*, void (%struct._reent*)*, i32, i32, i8*, %struct._rand48*, %struct.__tm*, i8*, void (i32)**, %struct._atexit*, %struct._atexit, %struct._glue, %struct.__FILE*, %struct._misc_reent*, i8* }
+%union.anon = type { i32 }
+
+@buf = constant [2 x i8] c"x\00", align 4
+@_impure_ptr = external thread_local global %struct._reent*
+@.str = private constant [22 x i8] c"This should fault...\0A\00", align 4
+@.str1 = private constant [40 x i8] c"We're still running. This is not good.\0A\00", align 4
+
+define i32 @main() nounwind {
+entry:
+  %0 = load %struct._reent** @_impure_ptr, align 4
+  %1 = getelementptr inbounds %struct._reent* %0, i32 0, i32 1
+  %2 = load %struct.__FILE** %1, align 4
+  %3 = bitcast %struct.__FILE* %2 to i8*
+  %4 = tail call i32 @fwrite(i8* getelementptr inbounds ([22 x i8]* @.str, i32 0, i32 0), i32 1, i32 21, i8* %3) nounwind
+  %5 = load %struct._reent** @_impure_ptr, align 4
+  %6 = getelementptr inbounds %struct._reent* %5, i32 0, i32 1
+  %7 = load %struct.__FILE** %6, align 4
+  %8 = tail call i32 @fflush(%struct.__FILE* %7) nounwind
+  store i8 121, i8* getelementptr inbounds ([2 x i8]* @buf, i32 0, i32 0), align 4
+  %9 = load %struct._reent** @_impure_ptr, align 4
+  %10 = getelementptr inbounds %struct._reent* %9, i32 0, i32 1
+  %11 = load %struct.__FILE** %10, align 4
+  %12 = bitcast %struct.__FILE* %11 to i8*
+  %13 = tail call i32 @fwrite(i8* getelementptr inbounds ([40 x i8]* @.str1, i32 0, i32 0), i32 1, i32 39, i8* %12) nounwind
+  ret i32 1
+}
+
+
+; PIC01:             Relocation 0x00000000
+; PIC01-NEXT:        'r_offset', 0x0000001c
+; PIC01-NEXT:          'r_sym'
+; PIC01-NEXT:          'r_type', 0x0000001b
+
+
+; PIC01:             Relocation 0x00000001
+; PIC01-NEXT:      'r_offset', 0x00000038
+; PIC01-NEXT:        'r_sym'
+; PIC01-NEXT:        'r_type', 0x0000001b
+
+; PIC01:              Relocation 0x00000002
+; PIC01-NEXT:      'r_offset', 0x00000044
+; PIC01-NEXT:        'r_sym'
+; PIC01-NEXT:        'r_type', 0x0000001b
+
+; PIC01:              Relocation 0x00000003
+; PIC01-NEXT:      'r_offset', 0x00000070
+; PIC01-NEXT:        'r_sym'
+; PIC01-NEXT:        'r_type', 0x0000001b
+
+; PIC01:              Relocation 0x00000004
+; PIC01-NEXT:      'r_offset', 0x0000007c
+; PIC01-NEXT:        'r_sym'
+; PIC01-NEXT:        'r_type', 0x00000019
+
+
+; PIC01:              Relocation 0x00000005
+; PIC01-NEXT:      'r_offset', 0x00000080
+; PIC01-NEXT:        'r_sym'
+; PIC01-NEXT:        'r_type', 0x00000018
+
+; PIC01:              Relocation 0x00000006
+; PIC01-NEXT:      'r_offset', 0x00000084
+; PIC01-NEXT:        'r_sym'
+; PIC01-NEXT:        'r_type', 0x00000068
+
+; PIC01:              Relocation 0x00000007
+; PIC01-NEXT:      'r_offset', 0x00000088
+; PIC01-NEXT:        'r_sym'
+; PIC01-NEXT:        'r_type', 0x0000001a
+
+; PIC01:              Relocation 0x00000008
+; PIC01-NEXT:      'r_offset', 0x0000008c
+; PIC01-NEXT:        'r_sym'
+; PIC01-NEXT:        'r_type', 0x00000018
+
+declare i32 @fwrite(i8* nocapture, i32, i32, i8* nocapture) nounwind
+
+declare i32 @fflush(%struct.__FILE* nocapture) nounwind
diff --git a/test/CodeGen/ARM/2010-12-15-elf-lcomm.ll b/test/CodeGen/ARM/2010-12-15-elf-lcomm.ll
new file mode 100644
index 000000000000..eaa34e7960fb
--- /dev/null
+++ b/test/CodeGen/ARM/2010-12-15-elf-lcomm.ll
@@ -0,0 +1,35 @@
+; RUN: llc  %s -mtriple=armv7-linux-gnueabi -filetype=obj -o - | \
+; RUN:    elf-dump --dump-section-data | FileCheck  -check-prefix=OBJ %s
+; RUN: llc  %s -mtriple=armv7-linux-gnueabi -o - | \
+; RUN:    FileCheck  -check-prefix=ASM %s
+
+
+@dummy = internal global i32 666
+@array00 = internal global [20 x i32] zeroinitializer
+@sum = internal global i32 55
+@STRIDE = internal global i32 8
+
+; ASM:          .type   array00,%object         @ @array00
+; ASM-NEXT:     .lcomm  array00,80              @ @array00
+; ASM-NEXT:     .type   _MergedGlobals,%object  @ @_MergedGlobals
+
+
+
+; OBJ:          Section 0x00000003
+; OBJ-NEXT:     '.bss'
+
+; OBJ:          'array00'
+; OBJ-NEXT:     'st_value', 0x00000000
+; OBJ-NEXT:     'st_size', 0x00000050
+; OBJ-NEXT:     'st_bind', 0x00000000
+; OBJ-NEXT:     'st_type', 0x00000001
+; OBJ-NEXT:     'st_other', 0x00000000
+; OBJ-NEXT:     'st_shndx', 0x00000003
+
+define i32 @main(i32 %argc) nounwind {
+  %1 = load i32* @sum, align 4
+  %2 = getelementptr  [20 x i32]* @array00, i32 0, i32 %argc
+  %3 = load i32* %2, align 4
+  %4 = add i32 %1, %3
+  ret i32 %4;
+}
diff --git a/test/CodeGen/ARM/2010-12-17-LocalStackSlotCrash.ll b/test/CodeGen/ARM/2010-12-17-LocalStackSlotCrash.ll
new file mode 100644
index 000000000000..a2f50b587b22
--- /dev/null
+++ b/test/CodeGen/ARM/2010-12-17-LocalStackSlotCrash.ll
@@ -0,0 +1,15 @@
+; RUN: llc < %s -mtriple=armv6-apple-darwin10
+; <rdar://problem/8782198>
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:32:64-v128:32:128-a0:0:64-n32"
+target triple = "armv6-apple-darwin10"
+
+define void @func() nounwind optsize {
+entry:
+  %buf = alloca [8096 x i8], align 1
+  br label %bb
+
+bb:
+  %p.2 = getelementptr [8096 x i8]* %buf, i32 0, i32 0
+  store i8 undef, i8* %p.2, align 1
+  ret void
+}
diff --git a/test/CodeGen/ARM/2011-01-19-MergedGlobalDbg.ll b/test/CodeGen/ARM/2011-01-19-MergedGlobalDbg.ll
new file mode 100644
index 000000000000..99baad2d38d1
--- /dev/null
+++ b/test/CodeGen/ARM/2011-01-19-MergedGlobalDbg.ll
@@ -0,0 +1,127 @@
+; RUN: llc < %s | FileCheck %s
+
+target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:32:64-v128:32:128-a0:0:32-n32"
+target triple = "thumbv7-apple-darwin10"
+
+@x1 = internal global i8 1
+@x2 = internal global i8 1
+@x3 = internal global i8 1
+@x4 = internal global i8 1
+@x5 = global i8 1
+
+; Check debug info output for merged global.
+; DW_AT_location
+; DW_OP_addr
+; DW_OP_plus
+; .long __MergedGlobals
+; DW_OP_constu
+; offset
+
+;CHECK:        .byte   7                       @ Abbrev [7] 0x1a5:0x13 DW_TAG_variable
+;CHECK-NEXT:        .ascii   "x2"                   @ DW_AT_name
+;CHECK-NEXT:        .byte   0
+;CHECK-NEXT:        .long   93                      @ DW_AT_type
+;CHECK-NEXT:        .byte   1                       @ DW_AT_decl_file
+;CHECK-NEXT:        .byte   6                       @ DW_AT_decl_line
+;CHECK-NEXT:        .byte   8                       @ DW_AT_location
+;CHECK-NEXT:        .byte   3
+;CHECK-NEXT:        .long   __MergedGlobals
+;CHECK-NEXT:        .byte   16
+;CHECK-NEXT:        .byte   1
+;CHECK-NEXT:        .byte   34
+
+define zeroext i8 @get1(i8 zeroext %a) nounwind optsize {
+entry:
+  tail call void @llvm.dbg.value(metadata !{i8 %a}, i64 0, metadata !10), !dbg !30
+  %0 = load i8* @x1, align 4, !dbg !30
+  tail call void @llvm.dbg.value(metadata !{i8 %0}, i64 0, metadata !11), !dbg !30
+  store i8 %a, i8* @x1, align 4, !dbg !30
+  ret i8 %0, !dbg !31
+}
+
+declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
+
+define zeroext i8 @get2(i8 zeroext %a) nounwind optsize {
+entry:
+  tail call void @llvm.dbg.value(metadata !{i8 %a}, i64 0, metadata !18), !dbg !32
+  %0 = load i8* @x2, align 4, !dbg !32
+  tail call void @llvm.dbg.value(metadata !{i8 %0}, i64 0, metadata !19), !dbg !32
+  store i8 %a, i8* @x2, align 4, !dbg !32
+  ret i8 %0, !dbg !33
+}
+
+define zeroext i8 @get3(i8 zeroext %a) nounwind optsize {
+entry:
+  tail call void @llvm.dbg.value(metadata !{i8 %a}, i64 0, metadata !21), !dbg !34
+  %0 = load i8* @x3, align 4, !dbg !34
+  tail call void @llvm.dbg.value(metadata !{i8 %0}, i64 0, metadata !22), !dbg !34
+  store i8 %a, i8* @x3, align 4, !dbg !34
+  ret i8 %0, !dbg !35
+}
+
+define zeroext i8 @get4(i8 zeroext %a) nounwind optsize {
+entry:
+  tail call void @llvm.dbg.value(metadata !{i8 %a}, i64 0, metadata !24), !dbg !36
+  %0 = load i8* @x4, align 4, !dbg !36
+  tail call void @llvm.dbg.value(metadata !{i8 %0}, i64 0, metadata !25), !dbg !36
+  store i8 %a, i8* @x4, align 4, !dbg !36
+  ret i8 %0, !dbg !37
+}
+
+define zeroext i8 @get5(i8 zeroext %a) nounwind optsize {
+entry:
+  tail call void @llvm.dbg.value(metadata !{i8 %a}, i64 0, metadata !27), !dbg !38
+  %0 = load i8* @x5, align 4, !dbg !38
+  tail call void @llvm.dbg.value(metadata !{i8 %0}, i64 0, metadata !28), !dbg !38
+  store i8 %a, i8* @x5, align 4, !dbg !38
+  ret i8 %0, !dbg !39
+}
+
+!llvm.dbg.sp = !{!0, !6, !7, !8, !9}
+!llvm.dbg.lv.get1 = !{!10, !11}
+!llvm.dbg.gv = !{!13, !14, !15, !16, !17}
+!llvm.dbg.lv.get2 = !{!18, !19}
+!llvm.dbg.lv.get3 = !{!21, !22}
+!llvm.dbg.lv.get4 = !{!24, !25}
+!llvm.dbg.lv.get5 = !{!27, !28}
+
+!0 = metadata !{i32 589870, i32 0, metadata !1, metadata !"get1", metadata !"get1", metadata !"get1", metadata !1, i32 4, metadata !3, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 true, i8 (i8)* @get1} ; [ DW_TAG_subprogram ]
+!1 = metadata !{i32 589865, metadata !"foo.c", metadata !"/tmp/", metadata !2} ; [ DW_TAG_file_type ]
+!2 = metadata !{i32 589841, i32 0, i32 1, metadata !"foo.c", metadata !"/tmp/", metadata !"4.2.1 (Based on Apple Inc. build 5658) (LLVM build 2369.8)", i1 true, i1 true, metadata !"", i32 0} ; [ DW_TAG_compile_unit ]
+!3 = metadata !{i32 589845, metadata !1, metadata !"", metadata !1, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !4, i32 0, null} ; [ DW_TAG_subroutine_type ]
+!4 = metadata !{metadata !5, metadata !5}
+!5 = metadata !{i32 589860, metadata !1, metadata !"_Bool", metadata !1, i32 0, i64 8, i64 8, i64 0, i32 0, i32 2} ; [ DW_TAG_base_type ]
+!6 = metadata !{i32 589870, i32 0, metadata !1, metadata !"get2", metadata !"get2", metadata !"get2", metadata !1, i32 7, metadata !3, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 true, i8 (i8)* @get2} ; [ DW_TAG_subprogram ]
+!7 = metadata !{i32 589870, i32 0, metadata !1, metadata !"get3", metadata !"get3", metadata !"get3", metadata !1, i32 10, metadata !3, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 true, i8 (i8)* @get3} ; [ DW_TAG_subprogram ]
+!8 = metadata !{i32 589870, i32 0, metadata !1, metadata !"get4", metadata !"get4", metadata !"get4", metadata !1, i32 13, metadata !3, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 true, i8 (i8)* @get4} ; [ DW_TAG_subprogram ]
+!9 = metadata !{i32 589870, i32 0, metadata !1, metadata !"get5", metadata !"get5", metadata !"get5", metadata !1, i32 16, metadata !3, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 true, i8 (i8)* @get5} ; [ DW_TAG_subprogram ]
+!10 = metadata !{i32 590081, metadata !0, metadata !"a", metadata !1, i32 4, metadata !5, i32 0} ; [ DW_TAG_arg_variable ]
+!11 = metadata !{i32 590080, metadata !12, metadata !"b", metadata !1, i32 4, metadata !5, i32 0} ; [ DW_TAG_auto_variable ]
+!12 = metadata !{i32 589835, metadata !0, i32 4, i32 0, metadata !1, i32 0} ; [ DW_TAG_lexical_block ]
+!13 = metadata !{i32 589876, i32 0, metadata !1, metadata !"x1", metadata !"x1", metadata !"", metadata !1, i32 3, metadata !5, i1 true, i1 true, i8* @x1} ; [ DW_TAG_variable ]
+!14 = metadata !{i32 589876, i32 0, metadata !1, metadata !"x2", metadata !"x2", metadata !"", metadata !1, i32 6, metadata !5, i1 true, i1 true, i8* @x2} ; [ DW_TAG_variable ]
+!15 = metadata !{i32 589876, i32 0, metadata !1, metadata !"x3", metadata !"x3", metadata !"", metadata !1, i32 9, metadata !5, i1 true, i1 true, i8* @x3} ; [ DW_TAG_variable ]
+!16 = metadata !{i32 589876, i32 0, metadata !1, metadata !"x4", metadata !"x4", metadata !"", metadata !1, i32 12, metadata !5, i1 true, i1 true, i8* @x4} ; [ DW_TAG_variable ]
+!17 = metadata !{i32 589876, i32 0, metadata !1, metadata !"x5", metadata !"x5", metadata !"", metadata !1, i32 15, metadata !5, i1 false, i1 true, i8* @x5} ; [ DW_TAG_variable ]
+!18 = metadata !{i32 590081, metadata !6, metadata !"a", metadata !1, i32 7, metadata !5, i32 0} ; [ DW_TAG_arg_variable ]
+!19 = metadata !{i32 590080, metadata !20, metadata !"b", metadata !1, i32 7, metadata !5, i32 0} ; [ DW_TAG_auto_variable ]
+!20 = metadata !{i32 589835, metadata !6, i32 7, i32 0, metadata !1, i32 1} ; [ DW_TAG_lexical_block ]
+!21 = metadata !{i32 590081, metadata !7, metadata !"a", metadata !1, i32 10, metadata !5, i32 0} ; [ DW_TAG_arg_variable ]
+!22 = metadata !{i32 590080, metadata !23, metadata !"b", metadata !1, i32 10, metadata !5, i32 0} ; [ DW_TAG_auto_variable ]
+!23 = metadata !{i32 589835, metadata !7, i32 10, i32 0, metadata !1, i32 2} ; [ DW_TAG_lexical_block ]
+!24 = metadata !{i32 590081, metadata !8, metadata !"a", metadata !1, i32 13, metadata !5, i32 0} ; [ DW_TAG_arg_variable ]
+!25 = metadata !{i32 590080, metadata !26, metadata !"b", metadata !1, i32 13, metadata !5, i32 0} ; [ DW_TAG_auto_variable ]
+!26 = metadata !{i32 589835, metadata !8, i32 13, i32 0, metadata !1, i32 3} ; [ DW_TAG_lexical_block ]
+!27 = metadata !{i32 590081, metadata !9, metadata !"a", metadata !1, i32 16, metadata !5, i32 0} ; [ DW_TAG_arg_variable ]
+!28 = metadata !{i32 590080, metadata !29, metadata !"b", metadata !1, i32 16, metadata !5, i32 0} ; [ DW_TAG_auto_variable ]
+!29 = metadata !{i32 589835, metadata !9, i32 16, i32 0, metadata !1, i32 4} ; [ DW_TAG_lexical_block ]
+!30 = metadata !{i32 4, i32 0, metadata !0, null}
+!31 = metadata !{i32 4, i32 0, metadata !12, null}
+!32 = metadata !{i32 7, i32 0, metadata !6, null}
+!33 = metadata !{i32 7, i32 0, metadata !20, null}
+!34 = metadata !{i32 10, i32 0, metadata !7, null}
+!35 = metadata !{i32 10, i32 0, metadata !23, null}
+!36 = metadata !{i32 13, i32 0, metadata !8, null}
+!37 = metadata !{i32 13, i32 0, metadata !26, null}
+!38 = metadata !{i32 16, i32 0, metadata !9, null}
+!39 = metadata !{i32 16, i32 0, metadata !29, null}
diff --git a/test/CodeGen/ARM/2011-02-04-AntidepMultidef.ll b/test/CodeGen/ARM/2011-02-04-AntidepMultidef.ll
new file mode 100644
index 000000000000..85a113755bf4
--- /dev/null
+++ b/test/CodeGen/ARM/2011-02-04-AntidepMultidef.ll
@@ -0,0 +1,128 @@
+; RUN: llc < %s -asm-verbose=false -O3 -mtriple=armv6-apple-darwin -relocation-model=pic  -mcpu=arm1136jf-s | FileCheck %s
+; rdar://8959122 illegal register operands for UMULL instruction
+;   in cfrac nightly test.
+; Armv6 generates a umull that must write to two distinct destination regs.
+
+; ModuleID = 'bugpoint-reduced-simplified.bc'
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:32:64-v128:32:128-a0:0:64-n32"
+target triple = "armv6-apple-darwin10"
+
+define void @ptoa() nounwind {
+entry:
+  br i1 false, label %bb3, label %bb
+
+bb:                                               ; preds = %entry
+  br label %bb3
+
+bb3:                                              ; preds = %bb, %entry
+  %0 = call noalias i8* @malloc() nounwind
+  br i1 undef, label %bb46, label %bb8
+
+bb8:                                              ; preds = %bb3
+  %1 = getelementptr inbounds i8* %0, i32 0
+  store i8 0, i8* %1, align 1
+  %2 = call i32 @ptou() nounwind
+  ; CHECK: umull [[REGISTER:lr|r[0-9]+]],
+  ; CHECK-NOT: [[REGISTER]],
+  ; CHECK: {{lr|r[0-9]+}}, {{lr|r[0-9]+$}}
+  ; CHECK: umull [[REGISTER:lr|r[0-9]+]],
+  ; CHECK-NOT: [[REGISTER]],
+  ; CHECK: {{lr|r[0-9]+}}, {{lr|r[0-9]+$}}
+  %3 = udiv i32 %2, 10
+  %4 = urem i32 %3, 10
+  %5 = icmp ult i32 %4, 10
+  %6 = trunc i32 %4 to i8
+  %7 = or i8 %6, 48
+  %8 = add i8 %6, 87
+  %iftmp.5.0.1 = select i1 %5, i8 %7, i8 %8
+  store i8 %iftmp.5.0.1, i8* undef, align 1
+  ; CHECK: umull [[REGISTER:lr|r[0-9]+]],
+  ; CHECK-NOT: [[REGISTER]],
+  ; CHECK: {{lr|r[0-9]+}}, {{lr|r[0-9]+$}}
+  ; CHECK: umull [[REGISTER:lr|r[0-9]+]],
+  ; CHECK-NOT: [[REGISTER]],
+  ; CHECK: {{lr|r[0-9]+}}, {{lr|r[0-9]+$}}
+  %9 = udiv i32 %2, 100
+  %10 = urem i32 %9, 10
+  %11 = icmp ult i32 %10, 10
+  %12 = trunc i32 %10 to i8
+  %13 = or i8 %12, 48
+  %14 = add i8 %12, 87
+  %iftmp.5.0.2 = select i1 %11, i8 %13, i8 %14
+  store i8 %iftmp.5.0.2, i8* undef, align 1
+  ; CHECK: umull [[REGISTER:lr|r[0-9]+]],
+  ; CHECK-NOT: [[REGISTER]],
+  ; CHECK: {{lr|r[0-9]+}}, {{lr|r[0-9]+$}}
+  ; CHECK: umull [[REGISTER:lr|r[0-9]+]],
+  ; CHECK-NOT: [[REGISTER]],
+  ; CHECK: {{lr|r[0-9]+}}, {{lr|r[0-9]+$}}
+  %15 = udiv i32 %2, 10000
+  %16 = urem i32 %15, 10
+  %17 = icmp ult i32 %16, 10
+  %18 = trunc i32 %16 to i8
+  %19 = or i8 %18, 48
+  %20 = add i8 %18, 87
+  %iftmp.5.0.4 = select i1 %17, i8 %19, i8 %20
+  store i8 %iftmp.5.0.4, i8* null, align 1
+  ; CHECK: umull [[REGISTER:lr|r[0-9]+]],
+  ; CHECK-NOT: [[REGISTER]],
+  ; CHECK: {{lr|r[0-9]+}}, {{lr|r[0-9]+$}}
+  ; CHECK: umull [[REGISTER:lr|r[0-9]+]],
+  ; CHECK-NOT: [[REGISTER]],
+  ; CHECK: {{lr|r[0-9]+}}, {{lr|r[0-9]+$}}
+  %21 = udiv i32 %2, 100000
+  %22 = urem i32 %21, 10
+  %23 = icmp ult i32 %22, 10
+  %iftmp.5.0.5 = select i1 %23, i8 0, i8 undef
+  store i8 %iftmp.5.0.5, i8* undef, align 1
+  ; CHECK: umull [[REGISTER:lr|r[0-9]+]],
+  ; CHECK-NOT: [[REGISTER]],
+  ; CHECK: {{lr|r[0-9]+}}, {{lr|r[0-9]+$}}
+  ; CHECK: umull [[REGISTER:lr|r[0-9]+]],
+  ; CHECK-NOT: [[REGISTER]],
+  ; CHECK: {{lr|r[0-9]+}}, {{lr|r[0-9]+$}}
+  %24 = udiv i32 %2, 1000000
+  %25 = urem i32 %24, 10
+  %26 = icmp ult i32 %25, 10
+  %27 = trunc i32 %25 to i8
+  %28 = or i8 %27, 48
+  %29 = add i8 %27, 87
+  %iftmp.5.0.6 = select i1 %26, i8 %28, i8 %29
+  store i8 %iftmp.5.0.6, i8* undef, align 1
+  ; CHECK: umull [[REGISTER:lr|r[0-9]+]],
+  ; CHECK-NOT: [[REGISTER]],
+  ; CHECK: {{lr|r[0-9]+}}, {{lr|r[0-9]+$}}
+  ; CHECK: umull [[REGISTER:lr|r[0-9]+]],
+  ; CHECK-NOT: [[REGISTER]],
+  ; CHECK: {{lr|r[0-9]+}}, {{lr|r[0-9]+$}}
+  %30 = udiv i32 %2, 10000000
+  %31 = urem i32 %30, 10
+  %32 = icmp ult i32 %31, 10
+  %33 = trunc i32 %31 to i8
+  %34 = or i8 %33, 48
+  %35 = add i8 %33, 87
+  %iftmp.5.0.7 = select i1 %32, i8 %34, i8 %35
+  store i8 %iftmp.5.0.7, i8* undef, align 1
+  ; CHECK: umull [[REGISTER:lr|r[0-9]+]],
+  ; CHECK-NOT: [[REGISTER]],
+  ; CHECK: {{lr|r[0-9]+}}, {{lr|r[0-9]+$}}
+  ; CHECK: umull [[REGISTER:lr|r[0-9]+]],
+  ; CHECK-NOT: [[REGISTER]],
+  ; CHECK: {{lr|r[0-9]+}}, {{lr|r[0-9]+$}}
+  %36 = udiv i32 %2, 100000000
+  %37 = urem i32 %36, 10
+  %38 = icmp ult i32 %37, 10
+  %39 = trunc i32 %37 to i8
+  %40 = or i8 %39, 48
+  %41 = add i8 %39, 87
+  %iftmp.5.0.8 = select i1 %38, i8 %40, i8 %41
+  store i8 %iftmp.5.0.8, i8* null, align 1
+  unreachable
+
+bb46:                                             ; preds = %bb3
+  ret void
+}
+
+declare noalias i8* @malloc() nounwind
+
+declare i32 @ptou()
diff --git a/test/CodeGen/ARM/2011-02-07-AntidepClobber.ll b/test/CodeGen/ARM/2011-02-07-AntidepClobber.ll
new file mode 100644
index 000000000000..f3d788818afc
--- /dev/null
+++ b/test/CodeGen/ARM/2011-02-07-AntidepClobber.ll
@@ -0,0 +1,89 @@
+; RUN: llc < %s -asm-verbose=false -O3  -mtriple=armv5e-none-linux-gnueabi | FileCheck %s
+; PR8986: PostRA antidependence breaker must respect "earlyclobber".
+; armv5e generates mulv5 that cannot used the same reg for src/dest.
+
+; ModuleID = '<stdin>'
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:64:128-a0:0:64-n32"
+target triple = "armv5e-none-linux-gnueabi"
+
+define hidden fastcc void @storeAtts() nounwind {
+entry:
+  %.SV116 = alloca i8**
+  br i1 undef, label %meshBB520, label %meshBB464
+
+bb15:                                             ; preds = %meshBB424
+  br i1 undef, label %bb216, label %meshBB396
+
+bb22:                                             ; preds = %meshBB396
+  br label %cBB564
+
+cBB564:                                           ; preds = %cBB564, %bb22
+  br label %cBB564
+
+poolStoreString.exit.thread:                      ; preds = %meshBB424
+  ret void
+
+bb78:                                             ; preds = %meshBB412
+  unreachable
+
+bb129:                                            ; preds = %meshBB540
+  br i1 undef, label %bb131.loopexit, label %meshBB540
+
+bb131.loopexit:                                   ; preds = %bb129
+  br label %bb131
+
+bb131:                                            ; preds = %bb135, %bb131.loopexit
+  br i1 undef, label %bb134, label %meshBB396
+
+bb134:                                            ; preds = %bb131
+  unreachable
+
+bb135:                                            ; preds = %meshBB396
+  %uriHash.1.phi.load = load i32* undef
+  %.load120 = load i8*** %.SV116
+  %.phi24 = load i8* null
+  %.phi26 = load i8** null
+  store i8 %.phi24, i8* %.phi26, align 1
+  %0 = getelementptr inbounds i8* %.phi26, i32 1
+  store i8* %0, i8** %.load120, align 4
+  ; CHECK: mul [[REGISTER:lr|r[0-9]+]],
+  ; CHECK-NOT: [[REGISTER]],
+  ; CHECK: {{(lr|r[0-9]+)$}}
+  %1 = mul i32 %uriHash.1.phi.load, 1000003
+  %2 = xor i32 0, %1
+  store i32 %2, i32* null
+  %3 = load i8* null, align 1
+  %4 = icmp eq i8 %3, 0
+  store i8* %0, i8** undef
+  br i1 %4, label %meshBB472, label %bb131
+
+bb212:                                            ; preds = %meshBB540
+  unreachable
+
+bb216:                                            ; preds = %bb15
+  ret void
+
+meshBB396:                                        ; preds = %bb131, %bb15
+  br i1 undef, label %bb135, label %bb22
+
+meshBB412:                                        ; preds = %meshBB464
+  br i1 undef, label %meshBB504, label %bb78
+
+meshBB424:                                        ; preds = %meshBB464
+  br i1 undef, label %poolStoreString.exit.thread, label %bb15
+
+meshBB464:                                        ; preds = %entry
+  br i1 undef, label %meshBB424, label %meshBB412
+
+meshBB472:                                        ; preds = %meshBB504, %bb135
+  unreachable
+
+meshBB504:                                        ; preds = %meshBB412
+  br label %meshBB472
+
+meshBB520:                                        ; preds = %entry
+  br label %meshBB540
+
+meshBB540:                                        ; preds = %meshBB520, %bb129
+  br i1 undef, label %bb212, label %bb129
+}
diff --git a/test/CodeGen/ARM/align.ll b/test/CodeGen/ARM/align.ll
index d4d01288f29b..d57c159b85cb 100644
--- a/test/CodeGen/ARM/align.ll
+++ b/test/CodeGen/ARM/align.ll
@@ -22,7 +22,7 @@
 @e = global i64 4
 ;ELF: .align 3
 ;ELF: e
-;DARWIN: .align 2
+;DARWIN: .align 3
 ;DARWIN: _e:
 
 @f = global float 5.0
@@ -34,7 +34,7 @@
 @g = global double 6.0
 ;ELF: .align 3
 ;ELF: g:
-;DARWIN: .align 2
+;DARWIN: .align 3
 ;DARWIN: _g:
 
 @bar = common global [75 x i8] zeroinitializer, align 128
diff --git a/test/CodeGen/ARM/arguments.ll b/test/CodeGen/ARM/arguments.ll
index bb7853e66ef4..c7fcb9755d9e 100644
--- a/test/CodeGen/ARM/arguments.ll
+++ b/test/CodeGen/ARM/arguments.ll
@@ -13,8 +13,8 @@ define i32 @f1(i32 %a, i64 %b) {
 ; test that allocating the double to r2/r3 makes r1 unavailable on gnueabi.
 define i32 @f2() nounwind optsize {
 ; ELF: f2:
-; ELF: mov  r0, #128
-; ELF: str  r0, [sp]
+; ELF: mov  [[REGISTER:(r[0-9]+)]], #128
+; ELF: str  [[REGISTER]], [sp]
 ; DARWIN: f2:
 ; DARWIN: mov	r3, #128
 entry:
diff --git a/test/CodeGen/ARM/arm-and-tst-peephole.ll b/test/CodeGen/ARM/arm-and-tst-peephole.ll
new file mode 100644
index 000000000000..50c638b73931
--- /dev/null
+++ b/test/CodeGen/ARM/arm-and-tst-peephole.ll
@@ -0,0 +1,112 @@
+; RUN: llc < %s -march=arm | FileCheck -check-prefix=ARM %s
+; RUN: llc < %s -march=thumb | FileCheck -check-prefix=THUMB %s
+; RUN: llc < %s -march=thumb -mattr=+thumb2 | FileCheck -check-prefix=T2 %s
+
+; FIXME: The -march=thumb test doesn't change if -disable-peephole is specified.
+
+%struct.Foo = type { i8* }
+
+; ARM:   foo
+; THUMB: foo
+; T2:    foo
+define %struct.Foo* @foo(%struct.Foo* %this, i32 %acc) nounwind readonly align 2 {
+entry:
+  %scevgep = getelementptr %struct.Foo* %this, i32 1
+  br label %tailrecurse
+
+tailrecurse:                                      ; preds = %sw.bb, %entry
+  %lsr.iv2 = phi %struct.Foo* [ %scevgep3, %sw.bb ], [ %scevgep, %entry ]
+  %lsr.iv = phi i32 [ %lsr.iv.next, %sw.bb ], [ 1, %entry ]
+  %acc.tr = phi i32 [ %or, %sw.bb ], [ %acc, %entry ]
+  %lsr.iv24 = bitcast %struct.Foo* %lsr.iv2 to i8**
+  %scevgep5 = getelementptr i8** %lsr.iv24, i32 -1
+  %tmp2 = load i8** %scevgep5
+  %0 = ptrtoint i8* %tmp2 to i32
+
+; ARM:      ands r12, r12, #3
+; ARM-NEXT: beq
+
+; THUMB:      movs r5, #3
+; THUMB-NEXT: ands r5, r4
+; THUMB-NEXT: cmp r5, #0
+; THUMB-NEXT: beq
+
+; T2:      ands r12, r12, #3
+; T2-NEXT: beq
+
+  %and = and i32 %0, 3
+  %tst = icmp eq i32 %and, 0
+  br i1 %tst, label %sw.bb, label %tailrecurse.switch
+
+tailrecurse.switch:                               ; preds = %tailrecurse
+  switch i32 %and, label %sw.epilog [
+    i32 1, label %sw.bb
+    i32 3, label %sw.bb6
+    i32 2, label %sw.bb8
+  ]
+
+sw.bb:                                            ; preds = %tailrecurse.switch, %tailrecurse
+  %shl = shl i32 %acc.tr, 1
+  %or = or i32 %and, %shl
+  %lsr.iv.next = add i32 %lsr.iv, 1
+  %scevgep3 = getelementptr %struct.Foo* %lsr.iv2, i32 1
+  br label %tailrecurse
+
+sw.bb6:                                           ; preds = %tailrecurse.switch
+  ret %struct.Foo* %lsr.iv2
+
+sw.bb8:                                           ; preds = %tailrecurse.switch
+  %tmp1 = add i32 %acc.tr, %lsr.iv
+  %add.ptr11 = getelementptr inbounds %struct.Foo* %this, i32 %tmp1
+  ret %struct.Foo* %add.ptr11
+
+sw.epilog:                                        ; preds = %tailrecurse.switch
+  ret %struct.Foo* undef
+}
+
+; Another test that exercises the AND/TST peephole optimization and also
+; generates a predicated ANDS instruction. Check that the predicate is printed
+; after the "S" modifier on the instruction.
+
+%struct.S = type { i8* (i8*)*, [1 x i8] }
+
+; ARM: bar
+; THUMB: bar
+; T2: bar
+define internal zeroext i8 @bar(%struct.S* %x, %struct.S* nocapture %y) nounwind readonly {
+entry:
+  %0 = getelementptr inbounds %struct.S* %x, i32 0, i32 1, i32 0
+  %1 = load i8* %0, align 1
+  %2 = zext i8 %1 to i32
+; ARM: ands
+; THUMB: ands
+; T2: ands
+  %3 = and i32 %2, 112
+  %4 = icmp eq i32 %3, 0
+  br i1 %4, label %return, label %bb
+
+bb:                                               ; preds = %entry
+  %5 = getelementptr inbounds %struct.S* %y, i32 0, i32 1, i32 0
+  %6 = load i8* %5, align 1
+  %7 = zext i8 %6 to i32
+; ARM: andsne
+; THUMB: ands
+; T2: andsne
+  %8 = and i32 %7, 112
+  %9 = icmp eq i32 %8, 0
+  br i1 %9, label %return, label %bb2
+
+bb2:                                              ; preds = %bb
+  %10 = icmp eq i32 %3, 16
+  %11 = icmp eq i32 %8, 16
+  %or.cond = or i1 %10, %11
+  br i1 %or.cond, label %bb4, label %return
+
+bb4:                                              ; preds = %bb2
+  %12 = ptrtoint %struct.S* %x to i32
+  %phitmp = trunc i32 %12 to i8
+  ret i8 %phitmp
+
+return:                                           ; preds = %bb2, %bb, %entry
+  ret i8 1
+}
diff --git a/test/CodeGen/ARM/atomic-cmp.ll b/test/CodeGen/ARM/atomic-cmp.ll
new file mode 100644
index 000000000000..f31aa7bc58e3
--- /dev/null
+++ b/test/CodeGen/ARM/atomic-cmp.ll
@@ -0,0 +1,17 @@
+; RUN: llc < %s -mtriple=armv7-apple-darwin   | FileCheck %s -check-prefix=ARM
+; RUN: llc < %s -mtriple=thumbv7-apple-darwin | FileCheck %s -check-prefix=T2
+; rdar://8964854
+
+define i8 @t(i8* %a, i8 %b, i8 %c) nounwind {
+; ARM: t:
+; ARM: ldrexb
+; ARM: strexb
+
+; T2: t:
+; T2: ldrexb
+; T2: strexb
+  %tmp0 = tail call i8 @llvm.atomic.cmp.swap.i8.p0i8(i8* %a, i8 %b, i8 %c)
+  ret i8 %tmp0
+}
+
+declare i8 @llvm.atomic.cmp.swap.i8.p0i8(i8* nocapture, i8, i8) nounwind
diff --git a/test/CodeGen/ARM/bfi.ll b/test/CodeGen/ARM/bfi.ll
index 59e2b43a9172..946db1909fe5 100644
--- a/test/CodeGen/ARM/bfi.ll
+++ b/test/CodeGen/ARM/bfi.ll
@@ -16,10 +16,10 @@ entry:
   ret void
 }
 
-define i32 @f2(i32 %A, i32 %B) nounwind readnone optsize {
+define i32 @f2(i32 %A, i32 %B) nounwind {
 entry:
 ; CHECK: f2
-; CHECK: mov r1, r1, lsr #7
+; CHECK: lsr{{.*}}#7
 ; CHECK: bfi r0, r1, #7, #16
   %and = and i32 %A, -8388481                     ; <i32> [#uses=1]
   %and2 = and i32 %B, 8388480                     ; <i32> [#uses=1]
@@ -27,10 +27,10 @@ entry:
   ret i32 %or
 }
 
-define i32 @f3(i32 %A, i32 %B) nounwind readnone optsize {
+define i32 @f3(i32 %A, i32 %B) nounwind {
 entry:
 ; CHECK: f3
-; CHECK: mov r2, r0, lsr #7
+; CHECK: lsr{{.*}} #7
 ; CHECK: mov r0, r1
 ; CHECK: bfi r0, r2, #7, #16
   %and = and i32 %A, 8388480                      ; <i32> [#uses=1]
@@ -38,3 +38,27 @@ entry:
   %or = or i32 %and2, %and                        ; <i32> [#uses=1]
   ret i32 %or
 }
+
+; rdar://8752056
+define i32 @f4(i32 %a) nounwind {
+; CHECK: f4
+; CHECK: movw r1, #3137
+; CHECK: bfi r1, r0, #15, #5
+  %1 = shl i32 %a, 15
+  %ins7 = and i32 %1, 1015808
+  %ins12 = or i32 %ins7, 3137
+  ret i32 %ins12
+}
+
+; rdar://8458663
+define i32 @f5(i32 %a, i32 %b) nounwind {
+entry:
+; CHECK: f5:
+; CHECK-NOT: bfc
+; CHECK: bfi r0, r1, #20, #4
+  %0 = and i32 %a, -15728641
+  %1 = shl i32 %b, 20
+  %2 = and i32 %1, 15728640
+  %3 = or i32 %2, %0
+  ret i32 %3
+}
diff --git a/test/CodeGen/ARM/bits.ll b/test/CodeGen/ARM/bits.ll
index 9e94efe3f9db..ce1b2ad5fad3 100644
--- a/test/CodeGen/ARM/bits.ll
+++ b/test/CodeGen/ARM/bits.ll
@@ -1,36 +1,41 @@
-; RUN: llc < %s -march=arm > %t
-; RUN: grep and      %t | count 1
-; RUN: grep orr      %t | count 1
-; RUN: grep eor      %t | count 1
-; RUN: grep mov.*lsl %t | count 1
-; RUN: grep mov.*asr %t | count 1
+; RUN: llc < %s -march=arm | FileCheck %s
 
 define i32 @f1(i32 %a, i32 %b) {
 entry:
+; CHECK: f1
+; CHECK: and r0, r1, r0
 	%tmp2 = and i32 %b, %a		; <i32> [#uses=1]
 	ret i32 %tmp2
 }
 
 define i32 @f2(i32 %a, i32 %b) {
 entry:
+; CHECK: f2
+; CHECK: orr r0, r1, r0
 	%tmp2 = or i32 %b, %a		; <i32> [#uses=1]
 	ret i32 %tmp2
 }
 
 define i32 @f3(i32 %a, i32 %b) {
 entry:
+; CHECK: f3
+; CHECK: eor r0, r1, r0
 	%tmp2 = xor i32 %b, %a		; <i32> [#uses=1]
 	ret i32 %tmp2
 }
 
 define i32 @f4(i32 %a, i32 %b) {
 entry:
+; CHECK: f4
+; CHECK: lsl
 	%tmp3 = shl i32 %a, %b		; <i32> [#uses=1]
 	ret i32 %tmp3
 }
 
 define i32 @f5(i32 %a, i32 %b) {
 entry:
+; CHECK: f5
+; CHECK: asr
 	%tmp3 = ashr i32 %a, %b		; <i32> [#uses=1]
 	ret i32 %tmp3
 }
diff --git a/test/CodeGen/ARM/bswap-inline-asm.ll b/test/CodeGen/ARM/bswap-inline-asm.ll
new file mode 100644
index 000000000000..472213d5f85f
--- /dev/null
+++ b/test/CodeGen/ARM/bswap-inline-asm.ll
@@ -0,0 +1,9 @@
+; RUN: llc < %s -mtriple=arm-apple-darwin -mattr=+v6 | FileCheck %s
+
+define i32 @t1(i32 %x) nounwind {
+; CHECK: t1:
+; CHECK-NOT: InlineAsm
+; CHECK: rev
+  %asmtmp = tail call i32 asm "rev $0, $1\0A", "=l,l"(i32 %x) nounwind
+  ret i32 %asmtmp
+}
diff --git a/test/CodeGen/ARM/bx_fold.ll b/test/CodeGen/ARM/bx_fold.ll
index 0e3e070a818f..09f1aae0a9f0 100644
--- a/test/CodeGen/ARM/bx_fold.ll
+++ b/test/CodeGen/ARM/bx_fold.ll
@@ -1,5 +1,4 @@
-; RUN: llc < %s -march=arm
-; RUN: llc < %s -march=arm | not grep bx
+; RUN: llc < %s -mtriple=armv5t-apple-darwin | FileCheck %s
 
 define void @test(i32 %Ptr, i8* %L) {
 entry:
@@ -24,6 +23,8 @@ bb1:		; preds = %bb, %entry
 	br i1 %bothcond, label %bb, label %bb18
 
 bb18:		; preds = %bb1
+; CHECK-NOT: bx
+; CHECK: ldmia sp!
 	ret void
 }
 
diff --git a/test/CodeGen/ARM/call-tc.ll b/test/CodeGen/ARM/call-tc.ll
index db5afe3f56cb..a77aba037be5 100644
--- a/test/CodeGen/ARM/call-tc.ll
+++ b/test/CodeGen/ARM/call-tc.ll
@@ -1,8 +1,6 @@
-; RUN: llc < %s -mtriple=arm-apple-darwin -march=arm | FileCheck %s -check-prefix=CHECKV4
-; RUN: llc < %s -march=arm -mtriple=arm-apple-darwin -mattr=+v5t | FileCheck %s -check-prefix=CHECKV5
-; RUN: llc < %s -march=arm -mtriple=arm-linux-gnueabi\
-; RUN:   -relocation-model=pic | FileCheck %s -check-prefix=CHECKELF
-; XFAIL: *
+; RUN: llc < %s -mtriple=armv6-apple-darwin -mattr=+vfp2 -arm-tail-calls | FileCheck %s -check-prefix=CHECKV6
+; RUN: llc < %s -mtriple=armv6-linux-gnueabi -relocation-model=pic -mattr=+vfp2 -arm-tail-calls | FileCheck %s -check-prefix=CHECKELF
+; RUN: llc < %s -mtriple=thumbv7-apple-darwin -arm-tail-calls | FileCheck %s -check-prefix=CHECKT2
 
 @t = weak global i32 ()* null           ; <i32 ()**> [#uses=1]
 
@@ -10,40 +8,80 @@ declare void @g(i32, i32, i32, i32)
 
 define void @t1() {
 ; CHECKELF: t1:
-; CHECKELF: PLT
+; CHECKELF: bl g(PLT)
         call void @g( i32 1, i32 2, i32 3, i32 4 )
         ret void
 }
 
 define void @t2() {
-; CHECKV4: t2:
-; CHECKV4: bx r0 @ TAILCALL
-; CHECKV5: t2:
-; CHECKV5: bx r0 @ TAILCALL
+; CHECKV6: t2:
+; CHECKV6: bx r0 @ TAILCALL
         %tmp = load i32 ()** @t         ; <i32 ()*> [#uses=1]
         %tmp.upgrd.2 = tail call i32 %tmp( )            ; <i32> [#uses=0]
         ret void
 }
 
-define i32* @t3(i32, i32, i32*, i32*, i32*) nounwind {
-; CHECKV4: t3:
-; CHECKV4: bx r{{.*}}
-BB0:
-  %5 = inttoptr i32 %0 to i32*                    ; <i32*> [#uses=1]
-  %t35 = volatile load i32* %5                    ; <i32> [#uses=1]
-  %6 = inttoptr i32 %t35 to i32**                 ; <i32**> [#uses=1]
-  %7 = getelementptr i32** %6, i32 86             ; <i32**> [#uses=1]
-  %8 = load i32** %7                              ; <i32*> [#uses=1]
-  %9 = bitcast i32* %8 to i32* (i32, i32*, i32, i32*, i32*, i32*)* ; <i32* (i32, i32*, i32, i32*, i32*, i32*)*> [#uses=1]
-  %10 = call i32* %9(i32 %0, i32* null, i32 %1, i32* %2, i32* %3, i32* %4) ; <i32*> [#uses=1]
-  ret i32* %10
-}
-
-define void @t4() {
-; CHECKV4: t4:
-; CHECKV4: b _t2  @ TAILCALL
-; CHECKV5: t4:
-; CHECKV5: b _t2  @ TAILCALL
+define void @t3() {
+; CHECKV6: t3:
+; CHECKV6: b _t2  @ TAILCALL
+; CHECKELF: t3:
+; CHECKELF: b t2(PLT) @ TAILCALL
         tail call void @t2( )            ; <i32> [#uses=0]
         ret void
 }
+
+; Sibcall optimization of expanded libcalls. rdar://8707777
+define double @t4(double %a) nounwind readonly ssp {
+entry:
+; CHECKV6: t4:
+; CHECKV6: b _sin @ TAILCALL
+; CHECKELF: t4:
+; CHECKELF: b sin(PLT) @ TAILCALL
+  %0 = tail call double @sin(double %a) nounwind readonly ; <double> [#uses=1]
+  ret double %0
+}
+
+define float @t5(float %a) nounwind readonly ssp {
+entry:
+; CHECKV6: t5:
+; CHECKV6: b _sinf @ TAILCALL
+; CHECKELF: t5:
+; CHECKELF: b sinf(PLT) @ TAILCALL
+  %0 = tail call float @sinf(float %a) nounwind readonly ; <float> [#uses=1]
+  ret float %0
+}
+
+declare float @sinf(float) nounwind readonly
+
+declare double @sin(double) nounwind readonly
+
+define i32 @t6(i32 %a, i32 %b) nounwind readnone {
+entry:
+; CHECKV6: t6:
+; CHECKV6: b ___divsi3 @ TAILCALL
+; CHECKELF: t6:
+; CHECKELF: b __aeabi_idiv(PLT) @ TAILCALL
+  %0 = sdiv i32 %a, %b
+  ret i32 %0
+}
+
+; Make sure the tail call instruction isn't deleted
+; rdar://8309338
+declare void @foo() nounwind
+
+define void @t7() nounwind {
+entry:
+; CHECKT2: t7:
+; CHECKT2: blxeq _foo
+; CHECKT2-NEXT: pop.w
+; CHECKT2-NEXT: b.w _foo
+  br i1 undef, label %bb, label %bb1.lr.ph
+
+bb1.lr.ph:
+  tail call void @foo() nounwind
+  unreachable
+
+bb:
+  tail call void @foo() nounwind
+  ret void
+}
diff --git a/test/CodeGen/ARM/clz.ll b/test/CodeGen/ARM/clz.ll
index d2235c9221ce..e381e0029819 100644
--- a/test/CodeGen/ARM/clz.ll
+++ b/test/CodeGen/ARM/clz.ll
@@ -1,8 +1,10 @@
-; RUN: llc < %s -march=arm -mattr=+v5t | grep clz
+; RUN: llc < %s -march=arm -mattr=+v5t | FileCheck %s
 
 declare i32 @llvm.ctlz.i32(i32)
 
 define i32 @test(i32 %x) {
-        %tmp.1 = call i32 @llvm.ctlz.i32( i32 %x )              ; <i32> [#uses=1]
+; CHECK: test
+; CHECK: clz r0, r0
+        %tmp.1 = call i32 @llvm.ctlz.i32( i32 %x )
         ret i32 %tmp.1
 }
diff --git a/test/CodeGen/ARM/code-placement.ll b/test/CodeGen/ARM/code-placement.ll
index 25c556889fc4..845be8c20ea5 100644
--- a/test/CodeGen/ARM/code-placement.ll
+++ b/test/CodeGen/ARM/code-placement.ll
@@ -1,12 +1,13 @@
-; RUN: llc < %s -mtriple=armv7-apple-darwin | FileCheck %s
+; RUN: llc < %s -mtriple=armv7-apple-darwin -cgp-critical-edge-splitting=0 | FileCheck %s
 ; PHI elimination shouldn't break backedge.
 ; rdar://8263994
 
 %struct.list_data_s = type { i16, i16 }
 %struct.list_head = type { %struct.list_head*, %struct.list_data_s* }
 
-define arm_apcscc %struct.list_head* @t(%struct.list_head* %list) nounwind {
+define arm_apcscc %struct.list_head* @t1(%struct.list_head* %list) nounwind {
 entry:
+; CHECK: t1:
   %0 = icmp eq %struct.list_head* %list, null
   br i1 %0, label %bb2, label %bb
 
@@ -27,3 +28,52 @@ bb2:
   %next.0.lcssa = phi %struct.list_head* [ null, %entry ], [ %list_addr.05, %bb ]
   ret %struct.list_head* %next.0.lcssa
 }
+
+; Optimize loop entry, eliminate intra loop branches
+; rdar://8117827
+define i32 @t2(i32 %passes, i32* nocapture %src, i32 %size) nounwind readonly {
+entry:
+; CHECK: t2:
+; CHECK: beq LBB1_[[RET:.]]
+  %0 = icmp eq i32 %passes, 0                     ; <i1> [#uses=1]
+  br i1 %0, label %bb5, label %bb.nph15
+
+; CHECK: LBB1_[[PREHDR:.]]: @ %bb2.preheader
+bb1:                                              ; preds = %bb2.preheader, %bb1
+; CHECK: LBB1_[[BB1:.]]: @ %bb1
+; CHECK: bne LBB1_[[BB1]]
+  %indvar = phi i32 [ %indvar.next, %bb1 ], [ 0, %bb2.preheader ] ; <i32> [#uses=2]
+  %sum.08 = phi i32 [ %2, %bb1 ], [ %sum.110, %bb2.preheader ] ; <i32> [#uses=1]
+  %tmp17 = sub i32 %i.07, %indvar                 ; <i32> [#uses=1]
+  %scevgep = getelementptr i32* %src, i32 %tmp17  ; <i32*> [#uses=1]
+  %1 = load i32* %scevgep, align 4                ; <i32> [#uses=1]
+  %2 = add nsw i32 %1, %sum.08                    ; <i32> [#uses=2]
+  %indvar.next = add i32 %indvar, 1               ; <i32> [#uses=2]
+  %exitcond = icmp eq i32 %indvar.next, %size     ; <i1> [#uses=1]
+  br i1 %exitcond, label %bb3, label %bb1
+
+bb3:                                              ; preds = %bb1, %bb2.preheader
+; CHECK: LBB1_[[BB3:.]]: @ %bb3
+; CHECK: bne LBB1_[[PREHDR]]
+; CHECK-NOT: b LBB1_
+  %sum.0.lcssa = phi i32 [ %sum.110, %bb2.preheader ], [ %2, %bb1 ] ; <i32> [#uses=2]
+  %3 = add i32 %pass.011, 1                       ; <i32> [#uses=2]
+  %exitcond18 = icmp eq i32 %3, %passes           ; <i1> [#uses=1]
+  br i1 %exitcond18, label %bb5, label %bb2.preheader
+
+bb.nph15:                                         ; preds = %entry
+  %i.07 = add i32 %size, -1                       ; <i32> [#uses=2]
+  %4 = icmp sgt i32 %i.07, -1                     ; <i1> [#uses=1]
+  br label %bb2.preheader
+
+bb2.preheader:                                    ; preds = %bb3, %bb.nph15
+  %pass.011 = phi i32 [ 0, %bb.nph15 ], [ %3, %bb3 ] ; <i32> [#uses=1]
+  %sum.110 = phi i32 [ 0, %bb.nph15 ], [ %sum.0.lcssa, %bb3 ] ; <i32> [#uses=2]
+  br i1 %4, label %bb1, label %bb3
+
+; CHECK: LBB1_[[RET]]: @ %bb5
+; CHECK: ldmia sp!
+bb5:                                              ; preds = %bb3, %entry
+  %sum.1.lcssa = phi i32 [ 0, %entry ], [ %sum.0.lcssa, %bb3 ] ; <i32> [#uses=1]
+  ret i32 %sum.1.lcssa
+}
diff --git a/test/CodeGen/ARM/constants.ll b/test/CodeGen/ARM/constants.ll
index ce919361619a..542cf02f2a90 100644
--- a/test/CodeGen/ARM/constants.ll
+++ b/test/CodeGen/ARM/constants.ll
@@ -14,34 +14,33 @@ define i32 @f2() {
 
 define i32 @f3() {
 ; CHECK: f3
-; CHECK: mov r0{{.*}}256
+; CHECK: mov r0, #1, 24
         ret i32 256
 }
 
 define i32 @f4() {
 ; CHECK: f4
-; CHECK: orr{{.*}}256
+; CHECK: orr{{.*}}#1, 24
         ret i32 257
 }
 
 define i32 @f5() {
 ; CHECK: f5
-; CHECK: mov r0, {{.*}}-1073741761
+; CHECK: mov r0, #255, 2
         ret i32 -1073741761
 }
 
 define i32 @f6() {
 ; CHECK: f6
-; CHECK: mov r0, {{.*}}1008
+; CHECK: mov r0, #63, 28
         ret i32 1008
 }
 
 define void @f7(i32 %a) {
 ; CHECK: f7
 ; CHECK: cmp r0, #1, 16
-        %b = icmp ugt i32 %a, 65536             ; <i1> [#uses=1]
+        %b = icmp ugt i32 %a, 65536
         br i1 %b, label %r, label %r
-
-r:              ; preds = %0, %0
+r:
         ret void
 }
diff --git a/test/CodeGen/ARM/crash.ll b/test/CodeGen/ARM/crash.ll
new file mode 100644
index 000000000000..4b6876df4a03
--- /dev/null
+++ b/test/CodeGen/ARM/crash.ll
@@ -0,0 +1,29 @@
+; RUN: llc < %s -mtriple=thumbv7-apple-darwin10
+
+; <rdar://problem/8529919>
+%struct.foo = type { i32, i32 }
+
+define void @func() nounwind {
+entry:
+  %tmp = load i32* undef, align 4
+  br label %bb1
+
+bb1:
+  %tmp1 = and i32 %tmp, 16
+  %tmp2 = icmp eq i32 %tmp1, 0
+  %invok.1.i = select i1 %tmp2, i32 undef, i32 0
+  %tmp119 = add i32 %invok.1.i, 0
+  br i1 undef, label %bb2, label %exit
+
+bb2:
+  %tmp120 = add i32 %tmp119, 0
+  %scevgep810.i = getelementptr %struct.foo* null, i32 %tmp120, i32 1
+  store i32 undef, i32* %scevgep810.i, align 4
+  br i1 undef, label %bb2, label %bb3
+
+bb3:
+  br i1 %tmp2, label %bb2, label %bb2
+
+exit:
+  ret void
+}
diff --git a/test/CodeGen/ARM/div.ll b/test/CodeGen/ARM/div.ll
index 448b437ddf46..3d29e05a0ccf 100644
--- a/test/CodeGen/ARM/div.ll
+++ b/test/CodeGen/ARM/div.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=arm | FileCheck %s -check-prefix=CHECK-ARM
+; RUN: llc < %s -mtriple=arm-apple-darwin | FileCheck %s -check-prefix=CHECK-ARM
 
 define i32 @f1(i32 %a, i32 %b) {
 entry:
diff --git a/test/CodeGen/ARM/fabss.ll b/test/CodeGen/ARM/fabss.ll
index dfc1e0a957c3..f03282bdab7f 100644
--- a/test/CodeGen/ARM/fabss.ll
+++ b/test/CodeGen/ARM/fabss.ll
@@ -24,4 +24,4 @@ declare float @fabsf(float)
 ; CORTEXA8: test:
 ; CORTEXA8: 	vabs.f32	d1, d1
 ; CORTEXA9: test:
-; CORTEXA9: 	vabs.f32	s0, s0
+; CORTEXA9: 	vabs.f32	s1, s1
diff --git a/test/CodeGen/ARM/fadds.ll b/test/CodeGen/ARM/fadds.ll
index 113f0e29bd15..749690e98d0f 100644
--- a/test/CodeGen/ARM/fadds.ll
+++ b/test/CodeGen/ARM/fadds.ll
@@ -20,4 +20,4 @@ entry:
 ; CORTEXA8: test:
 ; CORTEXA8: 	vadd.f32	d0, d1, d0
 ; CORTEXA9: test:
-; CORTEXA9: 	vadd.f32	s0, s0, s1
+; CORTEXA9: 	vadd.f32	s0, s1, s0
diff --git a/test/CodeGen/ARM/fast-isel-crash.ll b/test/CodeGen/ARM/fast-isel-crash.ll
new file mode 100644
index 000000000000..370c70f174fd
--- /dev/null
+++ b/test/CodeGen/ARM/fast-isel-crash.ll
@@ -0,0 +1,21 @@
+; RUN: llc < %s -O0 -mtriple=thumbv7-apple-darwin
+
+%union.anon = type { <16 x i32> }
+
+@__md0 = external global [137 x i8]
+
+define internal void @stretch(<4 x i8> addrspace(1)* %src, <4 x i8> addrspace(1)* %dst, i32 %width, i32 %height, i32 %iLS, i32 %oLS, <2 x float> %c, <4 x float> %param) nounwind {
+entry:
+  ret void
+}
+
+define internal i32 @_Z13get_global_idj(i32 %dim) nounwind ssp {
+entry:
+  ret i32 undef
+}
+
+define void @wrap(i8 addrspace(1)* addrspace(1)* %arglist, i32 addrspace(1)* %gtid) nounwind ssp {
+entry:
+  call void @stretch(<4 x i8> addrspace(1)* undef, <4 x i8> addrspace(1)* undef, i32 undef, i32 undef, i32 undef, i32 undef, <2 x float> undef, <4 x float> undef)
+  ret void
+}
diff --git a/test/CodeGen/ARM/fast-isel-static.ll b/test/CodeGen/ARM/fast-isel-static.ll
new file mode 100644
index 000000000000..8f58480be164
--- /dev/null
+++ b/test/CodeGen/ARM/fast-isel-static.ll
@@ -0,0 +1,30 @@
+; RUN: llc < %s -mtriple=thumbv7-apple-darwin -O0 -relocation-model=static -arm-long-calls | FileCheck -check-prefix=LONG %s
+; RUN: llc < %s -mtriple=thumbv7-apple-darwin -O0 -relocation-model=static | FileCheck -check-prefix=NORM %s
+
+define void @myadd(float* %sum, float* %addend) nounwind {
+entry:
+  %sum.addr = alloca float*, align 4
+  %addend.addr = alloca float*, align 4
+  store float* %sum, float** %sum.addr, align 4
+  store float* %addend, float** %addend.addr, align 4
+  %tmp = load float** %sum.addr, align 4
+  %tmp1 = load float* %tmp
+  %tmp2 = load float** %addend.addr, align 4
+  %tmp3 = load float* %tmp2
+  %add = fadd float %tmp1, %tmp3
+  %tmp4 = load float** %sum.addr, align 4
+  store float %add, float* %tmp4
+  ret void
+}
+
+define i32 @main(i32 %argc, i8** %argv) nounwind {
+entry:
+  %ztot = alloca float, align 4
+  %z = alloca float, align 4
+  store float 0.000000e+00, float* %ztot, align 4
+  store float 1.000000e+00, float* %z, align 4
+; CHECK-LONG: blx     r2
+; CHECK-NORM: blx     _myadd
+  call void @myadd(float* %ztot, float* %z)
+  ret i32 0
+}
diff --git a/test/CodeGen/ARM/fast-isel.ll b/test/CodeGen/ARM/fast-isel.ll
index 3bee84d84de4..dd806ec6f1ae 100644
--- a/test/CodeGen/ARM/fast-isel.ll
+++ b/test/CodeGen/ARM/fast-isel.ll
@@ -1,9 +1,9 @@
-; RUN: llc < %s -O0 -arm-fast-isel -fast-isel-abort -mtriple=armv7-apple-darwin
-; RUN: llc < %s -O0 -arm-fast-isel -fast-isel-abort -mtriple=thumbv7-apple-darwin
+; RUN: llc < %s -O0 -fast-isel-abort -mtriple=armv7-apple-darwin
+; RUN: llc < %s -O0 -fast-isel-abort -mtriple=thumbv7-apple-darwin
 
 ; Very basic fast-isel functionality.
 
-define i32 @add(i32 %a, i32 %b) nounwind ssp {
+define i32 @add(i32 %a, i32 %b) nounwind {
 entry:
   %a.addr = alloca i32, align 4
   %b.addr = alloca i32, align 4
@@ -13,27 +13,4 @@ entry:
   %tmp1 = load i32* %b.addr
   %add = add nsw i32 %tmp, %tmp1
   ret i32 %add
-}
-
-define i32* @foo(i32* %p, i32* %q, i32** %z) nounwind {
-entry:
-  %r = load i32* %p
-  %s = load i32* %q
-  %y = load i32** %z
-  br label %fast
-
-fast:
-  %t0 = add i32 %r, %s
-  %t1 = mul i32 %t0, %s
-  %t2 = sub i32 %t1, %s
-  %t3 = and i32 %t2, %s
-  %t4 = xor i32 %t3, 3
-  %t5 = xor i32 %t4, %s
-  %t6 = add i32 %t5, 2
-  %t7 = getelementptr i32* %y, i32 1
-  %t8 = getelementptr i32* %t7, i32 %t6
-  br label %exit
-
-exit:
-  ret i32* %t8
-}
+}
+\ No newline at end of file
diff --git a/test/CodeGen/ARM/fcopysign.ll b/test/CodeGen/ARM/fcopysign.ll
index a6d741087a89..1050cd265998 100644
--- a/test/CodeGen/ARM/fcopysign.ll
+++ b/test/CodeGen/ARM/fcopysign.ll
@@ -1,18 +1,45 @@
-; RUN: llc < %s -march=arm | grep bic | count 2
-; RUN: llc < %s -march=arm -mattr=+v6,+vfp2 | \
-; RUN:   grep vneg | count 2
+; RUN: llc < %s -mtriple=armv7-apple-darwin -mcpu=cortex-a8 | FileCheck %s -check-prefix=SOFT
+; RUN: llc < %s -mtriple=armv7-gnueabi -float-abi=hard -mcpu=cortex-a8 | FileCheck %s -check-prefix=HARD
 
-define float @test1(float %x, double %y) {
-	%tmp = fpext float %x to double
-	%tmp2 = tail call double @copysign( double %tmp, double %y )
-	%tmp3 = fptrunc double %tmp2 to float
-	ret float %tmp3
+; rdar://8984306
+define float @test1(float %x, float %y) nounwind {
+entry:
+; SOFT: test1:
+; SOFT: lsr r1, r1, #31
+; SOFT: bfi r0, r1, #31, #1
+
+; HARD: test1:
+; HARD: vabs.f32 d0, d0
+; HARD: cmp r0, #0
+; HARD: vneglt.f32 s0, s0
+  %0 = tail call float @copysignf(float %x, float %y) nounwind
+  ret float %0
+}
+
+define double @test2(double %x, double %y) nounwind {
+entry:
+; SOFT: test2:
+; SOFT: lsr r2, r3, #31
+; SOFT: bfi r1, r2, #31, #1
+
+; HARD: test2:
+; HARD: vabs.f64 d0, d0
+; HARD: cmp r1, #0
+; HARD: vneglt.f64 d0, d0
+  %0 = tail call double @copysign(double %x, double %y) nounwind
+  ret double %0
 }
 
-define double @test2(double %x, float %y) {
-	%tmp = fpext float %y to double
-	%tmp2 = tail call double @copysign( double %x, double %tmp )
-	ret double %tmp2
+define double @test3(double %x, double %y, double %z) nounwind {
+entry:
+; SOFT: test3:
+; SOFT: vabs.f64
+; SOFT: cmp {{.*}}, #0
+; SOFT: vneglt.f64
+  %0 = fmul double %x, %y
+  %1 = tail call double @copysign(double %0, double %z) nounwind
+  ret double %1
 }
 
-declare double @copysign(double, double)
+declare double @copysign(double, double) nounwind
+declare float @copysignf(float, float) nounwind
diff --git a/test/CodeGen/ARM/fdivs.ll b/test/CodeGen/ARM/fdivs.ll
index 9af1217de1d0..0c3149579297 100644
--- a/test/CodeGen/ARM/fdivs.ll
+++ b/test/CodeGen/ARM/fdivs.ll
@@ -20,4 +20,4 @@ entry:
 ; CORTEXA8: test:
 ; CORTEXA8: 	vdiv.f32	s0, s1, s0
 ; CORTEXA9: test:
-; CORTEXA9: 	vdiv.f32	s0, s0, s1
+; CORTEXA9: 	vdiv.f32	s0, s1, s0
diff --git a/test/CodeGen/ARM/fmacs.ll b/test/CodeGen/ARM/fmacs.ll
index c4ceca9828b0..fb83ef626af6 100644
--- a/test/CodeGen/ARM/fmacs.ll
+++ b/test/CodeGen/ARM/fmacs.ll
@@ -1,24 +1,51 @@
 ; RUN: llc < %s -march=arm -mattr=+vfp2 | FileCheck %s -check-prefix=VFP2
-; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s -check-prefix=NFP0
-; RUN: llc < %s -march=arm -mcpu=cortex-a8 | FileCheck %s -check-prefix=CORTEXA8
-; RUN: llc < %s -march=arm -mcpu=cortex-a9 | FileCheck %s -check-prefix=CORTEXA9
+; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s -check-prefix=NEON
+; RUN: llc < %s -march=arm -mcpu=cortex-a8 | FileCheck %s -check-prefix=A8
 
-define float @test(float %acc, float %a, float %b) {
+define float @t1(float %acc, float %a, float %b) {
 entry:
+; VFP2: t1:
+; VFP2: vmla.f32
+
+; NEON: t1:
+; NEON: vmla.f32
+
+; A8: t1:
+; A8: vmul.f32
+; A8: vadd.f32
 	%0 = fmul float %a, %b
         %1 = fadd float %acc, %0
 	ret float %1
 }
 
-; VFP2: test:
-; VFP2: 	vmla.f32	s2, s1, s0
+define double @t2(double %acc, double %a, double %b) {
+entry:
+; VFP2: t2:
+; VFP2: vmla.f64
+
+; NEON: t2:
+; NEON: vmla.f64
 
-; NFP1: test:
-; NFP1: 	vmul.f32	d0, d1, d0
-; NFP0: test:
-; NFP0: 	vmla.f32	s2, s1, s0
+; A8: t2:
+; A8: vmul.f64
+; A8: vadd.f64
+	%0 = fmul double %a, %b
+        %1 = fadd double %acc, %0
+	ret double %1
+}
 
-; CORTEXA8: test:
-; CORTEXA8: 	vmul.f32	d0, d1, d0
-; CORTEXA9: test:
-; CORTEXA9: 	vmla.f32	s0, s1, s2
+define float @t3(float %acc, float %a, float %b) {
+entry:
+; VFP2: t3:
+; VFP2: vmla.f32
+
+; NEON: t3:
+; NEON: vmla.f32
+
+; A8: t3:
+; A8: vmul.f32
+; A8: vadd.f32
+	%0 = fmul float %a, %b
+        %1 = fadd float %0, %acc
+	ret float %1
+}
diff --git a/test/CodeGen/ARM/fmscs.ll b/test/CodeGen/ARM/fmscs.ll
index 103ce334519b..a182833a7a2c 100644
--- a/test/CodeGen/ARM/fmscs.ll
+++ b/test/CodeGen/ARM/fmscs.ll
@@ -1,24 +1,35 @@
 ; RUN: llc < %s -march=arm -mattr=+vfp2 | FileCheck %s -check-prefix=VFP2
-; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s -check-prefix=NFP0
-; RUN: llc < %s -march=arm -mcpu=cortex-a8 | FileCheck %s -check-prefix=CORTEXA8
-; RUN: llc < %s -march=arm -mcpu=cortex-a9 | FileCheck %s -check-prefix=CORTEXA9
+; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s -check-prefix=NEON
+; RUN: llc < %s -march=arm -mcpu=cortex-a8 | FileCheck %s -check-prefix=A8
 
-define float @test(float %acc, float %a, float %b) {
+define float @t1(float %acc, float %a, float %b) {
 entry:
+; VFP2: t1:
+; VFP2: vnmls.f32
+
+; NEON: t1:
+; NEON: vnmls.f32
+
+; A8: t1:
+; A8: vmul.f32
+; A8: vsub.f32
 	%0 = fmul float %a, %b
         %1 = fsub float %0, %acc
 	ret float %1
 }
 
-; VFP2: test:
-; VFP2: 	vnmls.f32	s2, s1, s0
+define double @t2(double %acc, double %a, double %b) {
+entry:
+; VFP2: t2:
+; VFP2: vnmls.f64
 
-; NFP1: test:
-; NFP1: 	vnmls.f32	s2, s1, s0
-; NFP0: test:
-; NFP0: 	vnmls.f32	s2, s1, s0
+; NEON: t2:
+; NEON: vnmls.f64
 
-; CORTEXA8: test:
-; CORTEXA8: 	vnmls.f32	s2, s1, s0
-; CORTEXA9: test:
-; CORTEXA9: 	vnmls.f32	s0, s1, s2
+; A8: t2:
+; A8: vmul.f64
+; A8: vsub.f64
+	%0 = fmul double %a, %b
+        %1 = fsub double %0, %acc
+	ret double %1
+}
diff --git a/test/CodeGen/ARM/fmuls.ll b/test/CodeGen/ARM/fmuls.ll
index bfafd20c8602..ef4e3e52818e 100644
--- a/test/CodeGen/ARM/fmuls.ll
+++ b/test/CodeGen/ARM/fmuls.ll
@@ -20,4 +20,4 @@ entry:
 ; CORTEXA8: test:
 ; CORTEXA8: 	vmul.f32	d0, d1, d0
 ; CORTEXA9: test:
-; CORTEXA9: 	vmul.f32	s0, s0, s1
+; CORTEXA9: 	vmul.f32	s0, s1, s0
diff --git a/test/CodeGen/ARM/fnegs.ll b/test/CodeGen/ARM/fnegs.ll
index c15005e6e8ab..418b59803d30 100644
--- a/test/CodeGen/ARM/fnegs.ll
+++ b/test/CodeGen/ARM/fnegs.ll
@@ -13,19 +13,19 @@ entry:
 	ret float %retval
 }
 ; VFP2: test1:
-; VFP2: 	vneg.f32	s1, s0
+; VFP2: 	vneg.f32	s{{.*}}, s{{.*}}
 
 ; NFP1: test1:
-; NFP1: 	vneg.f32	d1, d0
+; NFP1: 	vneg.f32	d{{.*}}, d{{.*}}
 
 ; NFP0: test1:
-; NFP0: 	vneg.f32	s1, s0
+; NFP0: 	vneg.f32	s{{.*}}, s{{.*}}
 
 ; CORTEXA8: test1:
-; CORTEXA8: 	vneg.f32	d1, d0
+; CORTEXA8: 	vneg.f32	d{{.*}}, d{{.*}}
 
 ; CORTEXA9: test1:
-; CORTEXA9: 	vneg.f32	s1, s0
+; CORTEXA9: 	vneg.f32	s{{.*}}, s{{.*}}
 
 define float @test2(float* %a) {
 entry:
@@ -37,17 +37,17 @@ entry:
 	ret float %retval
 }
 ; VFP2: test2:
-; VFP2: 	vneg.f32	s1, s0
+; VFP2: 	vneg.f32	s{{.*}}, s{{.*}}
 
 ; NFP1: test2:
-; NFP1: 	vneg.f32	d1, d0
+; NFP1: 	vneg.f32	d{{.*}}, d{{.*}}
 
 ; NFP0: test2:
-; NFP0: 	vneg.f32	s1, s0
+; NFP0: 	vneg.f32	s{{.*}}, s{{.*}}
 
 ; CORTEXA8: test2:
-; CORTEXA8: 	vneg.f32	d1, d0
+; CORTEXA8: 	vneg.f32	d{{.*}}, d{{.*}}
 
 ; CORTEXA9: test2:
-; CORTEXA9: 	vneg.f32	s1, s0
+; CORTEXA9: 	vneg.f32	s{{.*}}, s{{.*}}
 
diff --git a/test/CodeGen/ARM/fnmacs.ll b/test/CodeGen/ARM/fnmacs.ll
index 1d1d06a70ea6..1763d46e06c4 100644
--- a/test/CodeGen/ARM/fnmacs.ll
+++ b/test/CodeGen/ARM/fnmacs.ll
@@ -1,20 +1,35 @@
 ; RUN: llc < %s -march=arm -mattr=+vfp2 | FileCheck %s -check-prefix=VFP2
 ; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s -check-prefix=NEON
-; RUN: llc < %s -march=arm -mcpu=cortex-a8 | FileCheck %s -check-prefix=NEONFP
+; RUN: llc < %s -march=arm -mcpu=cortex-a8 | FileCheck %s -check-prefix=A8
 
-define float @test(float %acc, float %a, float %b) {
+define float @t1(float %acc, float %a, float %b) {
 entry:
+; VFP2: t1:
 ; VFP2: vmls.f32
-; NEON: vmls.f32
 
-; NEONFP-NOT: vmls
-; NEONFP-NOT: vmov.f32
-; NEONFP:     vmul.f32
-; NEONFP:     vsub.f32
-; NEONFP:     vmov
+; NEON: t1:
+; NEON: vmls.f32
 
+; A8: t1:
+; A8: vmul.f32
+; A8: vsub.f32
 	%0 = fmul float %a, %b
         %1 = fsub float %acc, %0
 	ret float %1
 }
 
+define double @t2(double %acc, double %a, double %b) {
+entry:
+; VFP2: t2:
+; VFP2: vmls.f64
+
+; NEON: t2:
+; NEON: vmls.f64
+
+; A8: t2:
+; A8: vmul.f64
+; A8: vsub.f64
+	%0 = fmul double %a, %b
+        %1 = fsub double %acc, %0
+	ret double %1
+}
diff --git a/test/CodeGen/ARM/fnmscs.ll b/test/CodeGen/ARM/fnmscs.ll
index 0b47edd5f1f1..76c806761f75 100644
--- a/test/CodeGen/ARM/fnmscs.ll
+++ b/test/CodeGen/ARM/fnmscs.ll
@@ -1,23 +1,71 @@
-; RUN: llc < %s -march=arm -mattr=+vfp2 | FileCheck %s
-; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s
-; RUN: llc < %s -march=arm -mcpu=cortex-a8 | FileCheck %s
-; RUN: llc < %s -march=arm -mcpu=cortex-a9 | FileCheck %s
+; RUN: llc < %s -march=arm -mattr=+vfp2 | FileCheck %s -check-prefix=VFP2
+; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s -check-prefix=NEON
+; RUN: llc < %s -march=arm -mcpu=cortex-a8 | FileCheck %s -check-prefix=A8
 
-define float @test1(float %acc, float %a, float %b) nounwind {
-; CHECK: vnmla.f32 s{{.*}}, s{{.*}}, s{{.*}}
+define float @t1(float %acc, float %a, float %b) nounwind {
 entry:
+; VFP2: t1:
+; VFP2: vnmla.f32
+
+; NEON: t1:
+; NEON: vnmla.f32
+
+; A8: t1:
+; A8: vnmul.f32 s0, s{{[01]}}, s{{[01]}}
+; A8: vsub.f32 d0, d0, d1
 	%0 = fmul float %a, %b
 	%1 = fsub float -0.0, %0
         %2 = fsub float %1, %acc
 	ret float %2
 }
 
-define float @test2(float %acc, float %a, float %b) nounwind {
-; CHECK: vnmla.f32 s{{.*}}, s{{.*}}, s{{.*}}
+define float @t2(float %acc, float %a, float %b) nounwind {
 entry:
+; VFP2: t2:
+; VFP2: vnmla.f32
+
+; NEON: t2:
+; NEON: vnmla.f32
+
+; A8: t2:
+; A8: vnmul.f32 s0, s{{[01]}}, s{{[01]}}
+; A8: vsub.f32 d0, d0, d1
 	%0 = fmul float %a, %b
 	%1 = fmul float -1.0, %0
         %2 = fsub float %1, %acc
 	ret float %2
 }
 
+define double @t3(double %acc, double %a, double %b) nounwind {
+entry:
+; VFP2: t3:
+; VFP2: vnmla.f64
+
+; NEON: t3:
+; NEON: vnmla.f64
+
+; A8: t3:
+; A8: vnmul.f64 d16, d1{{[67]}}, d1{{[67]}}
+; A8: vsub.f64 d16, d16, d17
+	%0 = fmul double %a, %b
+	%1 = fsub double -0.0, %0
+        %2 = fsub double %1, %acc
+	ret double %2
+}
+
+define double @t4(double %acc, double %a, double %b) nounwind {
+entry:
+; VFP2: t4:
+; VFP2: vnmla.f64
+
+; NEON: t4:
+; NEON: vnmla.f64
+
+; A8: t4:
+; A8: vnmul.f64 d16, d1{{[67]}}, d1{{[67]}}
+; A8: vsub.f64 d16, d16, d17
+	%0 = fmul double %a, %b
+	%1 = fmul double -1.0, %0
+        %2 = fsub double %1, %acc
+	ret double %2
+}
diff --git a/test/CodeGen/ARM/fp.ll b/test/CodeGen/ARM/fp.ll
index 8fbd45b97579..b6e9c3c22e75 100644
--- a/test/CodeGen/ARM/fp.ll
+++ b/test/CodeGen/ARM/fp.ll
@@ -51,7 +51,7 @@ entry:
 
 define float @h2() {
 ;CHECK: h2:
-;CHECK: 1065353216
+;CHECK: mov r0, #254, 10
 entry:
         ret float 1.000000e+00
 }
diff --git a/test/CodeGen/ARM/fpcmp-opt.ll b/test/CodeGen/ARM/fpcmp-opt.ll
index 64350591b87f..65b921bdf655 100644
--- a/test/CodeGen/ARM/fpcmp-opt.ll
+++ b/test/CodeGen/ARM/fpcmp-opt.ll
@@ -38,6 +38,7 @@ entry:
 ; FINITE: t2:
 ; FINITE-NOT: vldr
 ; FINITE: ldrd r0, [r0]
+; FINITE-NOT: b LBB
 ; FINITE: cmp r0, #0
 ; FINITE: cmpeq r1, #0
 ; FINITE-NOT: vcmpe.f32
diff --git a/test/CodeGen/ARM/fpcmp_ueq.ll b/test/CodeGen/ARM/fpcmp_ueq.ll
index 67f70e9eb5ed..2e6b3e3167ae 100644
--- a/test/CodeGen/ARM/fpcmp_ueq.ll
+++ b/test/CodeGen/ARM/fpcmp_ueq.ll
@@ -1,8 +1,14 @@
-; RUN: llc < %s -march=arm | grep moveq 
-; RUN: llc < %s -march=arm -mattr=+vfp2 | grep movvs
+; RUN: llc < %s -mtriple=arm-apple-darwin | grep moveq 
+; RUN: llc < %s -mtriple=armv7-apple-darwin -mcpu=cortex-a8 | FileCheck %s
 
 define i32 @f7(float %a, float %b) {
 entry:
+; CHECK: f7:
+; CHECK: vcmpe.f32
+; CHECK: vmrs apsr_nzcv, fpscr
+; CHECK: movweq
+; CHECK-NOT: vmrs
+; CHECK: movwvs
     %tmp = fcmp ueq float %a,%b
     %retval = select i1 %tmp, i32 666, i32 42
     ret i32 %retval
diff --git a/test/CodeGen/ARM/fpconsts.ll b/test/CodeGen/ARM/fpconsts.ll
index f1d6a16f3edb..638dde9d8a0f 100644
--- a/test/CodeGen/ARM/fpconsts.ll
+++ b/test/CodeGen/ARM/fpconsts.ll
@@ -3,7 +3,7 @@
 define float @t1(float %x) nounwind readnone optsize {
 entry:
 ; CHECK: t1:
-; CHECK: vmov.f32 s1, #4.000000e+00
+; CHECK: vmov.f32 s{{.*}}, #4.000000e+00
   %0 = fadd float %x, 4.000000e+00
   ret float %0
 }
@@ -11,7 +11,7 @@ entry:
 define double @t2(double %x) nounwind readnone optsize {
 entry:
 ; CHECK: t2:
-; CHECK: vmov.f64 d1, #3.000000e+00
+; CHECK: vmov.f64 d{{.*}}, #3.000000e+00
   %0 = fadd double %x, 3.000000e+00
   ret double %0
 }
@@ -19,7 +19,7 @@ entry:
 define double @t3(double %x) nounwind readnone optsize {
 entry:
 ; CHECK: t3:
-; CHECK: vmov.f64 d1, #-1.300000e+01
+; CHECK: vmov.f64 d{{.*}}, #-1.300000e+01
   %0 = fmul double %x, -1.300000e+01
   ret double %0
 }
@@ -27,7 +27,7 @@ entry:
 define float @t4(float %x) nounwind readnone optsize {
 entry:
 ; CHECK: t4:
-; CHECK: vmov.f32 s1, #-2.400000e+01
+; CHECK: vmov.f32 s{{.*}}, #-2.400000e+01
   %0 = fmul float %x, -2.400000e+01
   ret float %0
 }
diff --git a/test/CodeGen/ARM/fpconv.ll b/test/CodeGen/ARM/fpconv.ll
index bf197a46cb77..1b4c008bb775 100644
--- a/test/CodeGen/ARM/fpconv.ll
+++ b/test/CodeGen/ARM/fpconv.ll
@@ -1,5 +1,5 @@
 ; RUN: llc < %s -march=arm -mattr=+vfp2 | FileCheck %s --check-prefix=CHECK-VFP
-; RUN: llc < %s -march=arm | FileCheck %s
+; RUN: llc < %s -mtriple=arm-apple-darwin | FileCheck %s
 
 define float @f1(double %x) {
 ;CHECK-VFP: f1:
diff --git a/test/CodeGen/ARM/global-merge.ll b/test/CodeGen/ARM/global-merge.ll
new file mode 100644
index 000000000000..28bf2214740a
--- /dev/null
+++ b/test/CodeGen/ARM/global-merge.ll
@@ -0,0 +1,23 @@
+; RUN: llc < %s -mtriple=thumb-apple-darwin | FileCheck %s
+; Test the ARMGlobalMerge pass.  Use -march=thumb because it has a small
+; value for the maximum offset (127).
+
+; A local array that exceeds the maximum offset should not be merged.
+; CHECK: g0:
+@g0 = internal global [32 x i32] [ i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 1, i32 2 ]
+
+; CHECK: _MergedGlobals:
+@g1 = internal global i32 1
+@g2 = internal global i32 2
+
+; Make sure that the complete variable fits within the range of the maximum
+; offset.  Having the starting offset in range is not sufficient.
+; When this works properly, @g3 is placed in a separate chunk of merged globals.
+; CHECK: _MergedGlobals1:
+@g3 = internal global [30 x i32] [ i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10 ]
+
+; Global variables that can be placed in BSS should be kept together in a
+; separate pool of merged globals.
+; CHECK: _MergedGlobals2
+@g4 = internal global i32 0
+@g5 = internal global i32 0
diff --git a/test/CodeGen/ARM/hello.ll b/test/CodeGen/ARM/hello.ll
index ccdc7bf4c140..bfed7a6630b4 100644
--- a/test/CodeGen/ARM/hello.ll
+++ b/test/CodeGen/ARM/hello.ll
@@ -1,7 +1,7 @@
 ; RUN: llc < %s -march=arm
 ; RUN: llc < %s -mtriple=arm-linux-gnueabi | grep mov | count 1
 ; RUN: llc < %s -mtriple=arm-linux-gnu --disable-fp-elim | \
-; RUN:   grep mov | count 3
+; RUN:   grep mov | count 2
 ; RUN: llc < %s -mtriple=arm-apple-darwin | grep mov | count 2
 
 @str = internal constant [12 x i8] c"Hello World\00"
diff --git a/test/CodeGen/ARM/ifcvt10.ll b/test/CodeGen/ARM/ifcvt10.ll
new file mode 100644
index 000000000000..75428ac21655
--- /dev/null
+++ b/test/CodeGen/ARM/ifcvt10.ll
@@ -0,0 +1,43 @@
+; RUN: llc < %s -mtriple=arm-apple-darwin -mcpu=cortex-a9 | FileCheck %s
+; rdar://8402126
+; Make sure if-converter is not predicating vldmia and ldmia. These are
+; micro-coded and would have long issue latency even if predicated on
+; false predicate.
+
+define void @t(double %a, double %b, double %c, double %d, i32* nocapture %solutions, double* nocapture %x) nounwind {
+entry:
+; CHECK: t:
+; CHECK: vpop {d8}
+; CHECK-NOT: vpopne
+; CHECK: ldmia sp!, {r7, pc}
+; CHECK: vpop {d8}
+; CHECK: ldmia sp!, {r7, pc}
+  br i1 undef, label %if.else, label %if.then
+
+if.then:                                          ; preds = %entry
+  %mul73 = fmul double undef, 0.000000e+00
+  %sub76 = fsub double %mul73, undef
+  store double %sub76, double* undef, align 4
+  %call88 = tail call double @cos(double 0.000000e+00) nounwind
+  %mul89 = fmul double undef, %call88
+  %sub92 = fsub double %mul89, undef
+  store double %sub92, double* undef, align 4
+  ret void
+
+if.else:                                          ; preds = %entry
+  %tmp101 = tail call double @llvm.pow.f64(double undef, double 0x3FD5555555555555)
+  %add112 = fadd double %tmp101, undef
+  %mul118 = fmul double %add112, undef
+  store double 0.000000e+00, double* %x, align 4
+  ret void
+}
+
+declare double @acos(double)
+
+declare double @sqrt(double) readnone
+
+declare double @cos(double) readnone
+
+declare double @fabs(double)
+
+declare double @llvm.pow.f64(double, double) nounwind readonly
diff --git a/test/CodeGen/ARM/ifcvt11.ll b/test/CodeGen/ARM/ifcvt11.ll
new file mode 100644
index 000000000000..63f8557d555b
--- /dev/null
+++ b/test/CodeGen/ARM/ifcvt11.ll
@@ -0,0 +1,59 @@
+; RUN: llc < %s -mtriple=arm-apple-darwin -mcpu=cortex-a8 | FileCheck %s
+; rdar://8598427
+; Adjust if-converter heuristics to avoid predicating vmrs which can cause
+; significant regression.
+
+%struct.xyz_t = type { double, double, double }
+
+define i32 @effie(i32 %tsets, %struct.xyz_t* nocapture %p, i32 %a, i32 %b, i32 %c) nounwind readonly noinline {
+; CHECK: effie:
+entry:
+  %0 = icmp sgt i32 %tsets, 0
+  br i1 %0, label %bb.nph, label %bb6
+
+bb.nph:                                           ; preds = %entry
+  %1 = add nsw i32 %b, %a
+  %2 = add nsw i32 %1, %c
+  br label %bb
+
+bb:                                               ; preds = %bb4, %bb.nph
+; CHECK: vcmpe.f64
+; CHECK: vmrs apsr_nzcv, fpscr
+  %r.19 = phi i32 [ 0, %bb.nph ], [ %r.0, %bb4 ]
+  %n.08 = phi i32 [ 0, %bb.nph ], [ %10, %bb4 ]
+  %scevgep10 = getelementptr inbounds %struct.xyz_t* %p, i32 %n.08, i32 0
+  %scevgep11 = getelementptr %struct.xyz_t* %p, i32 %n.08, i32 1
+  %3 = load double* %scevgep10, align 4
+  %4 = load double* %scevgep11, align 4
+  %5 = fcmp uge double %3, %4
+  br i1 %5, label %bb3, label %bb1
+
+bb1:                                              ; preds = %bb
+; CHECK-NOT: it
+; CHECK-NOT: vcmpemi
+; CHECK-NOT: vmrsmi
+; CHECK: vcmpe.f64
+; CHECK: vmrs apsr_nzcv, fpscr
+  %scevgep12 = getelementptr %struct.xyz_t* %p, i32 %n.08, i32 2
+  %6 = load double* %scevgep12, align 4
+  %7 = fcmp uge double %3, %6
+  br i1 %7, label %bb3, label %bb2
+
+bb2:                                              ; preds = %bb1
+  %8 = add nsw i32 %2, %r.19
+  br label %bb4
+
+bb3:                                              ; preds = %bb1, %bb
+  %9 = add nsw i32 %r.19, 1
+  br label %bb4
+
+bb4:                                              ; preds = %bb3, %bb2
+  %r.0 = phi i32 [ %9, %bb3 ], [ %8, %bb2 ]
+  %10 = add nsw i32 %n.08, 1
+  %exitcond = icmp eq i32 %10, %tsets
+  br i1 %exitcond, label %bb6, label %bb
+
+bb6:                                              ; preds = %bb4, %entry
+  %r.1.lcssa = phi i32 [ 0, %entry ], [ %r.0, %bb4 ]
+  ret i32 %r.1.lcssa
+}
diff --git a/test/CodeGen/ARM/ifcvt6.ll b/test/CodeGen/ARM/ifcvt6.ll
index e2c0ba398c68..5edf32fd1af6 100644
--- a/test/CodeGen/ARM/ifcvt6.ll
+++ b/test/CodeGen/ARM/ifcvt6.ll
@@ -1,10 +1,9 @@
-; RUN: llc < %s -march=arm -mtriple=arm-apple-darwin | \
-; RUN:   grep cmpne | count 1
-; RUN: llc < %s -march=arm -mtriple=arm-apple-darwin | \
-; RUN:   grep ldmiahi | count 1
+; RUN: llc < %s -mtriple=armv7-apple-darwin | FileCheck %s
 
 define void @foo(i32 %X, i32 %Y) {
 entry:
+; CHECK: cmpne
+; CHECK: ldmiahi sp!
 	%tmp1 = icmp ult i32 %X, 4		; <i1> [#uses=1]
 	%tmp4 = icmp eq i32 %Y, 0		; <i1> [#uses=1]
 	%tmp7 = or i1 %tmp4, %tmp1		; <i1> [#uses=1]
diff --git a/test/CodeGen/ARM/ifcvt7.ll b/test/CodeGen/ARM/ifcvt7.ll
index eb97085ac004..62e13557cfdc 100644
--- a/test/CodeGen/ARM/ifcvt7.ll
+++ b/test/CodeGen/ARM/ifcvt7.ll
@@ -1,14 +1,12 @@
-; RUN: llc < %s -march=arm -mtriple=arm-apple-darwin | \
-; RUN:   grep cmpeq | count 1
-; RUN: llc < %s -march=arm -mtriple=arm-apple-darwin | \
-; RUN:   grep moveq | count 1
-; RUN: llc < %s -march=arm -mtriple=arm-apple-darwin | \
-; RUN:   grep ldmiaeq | count 1
+; RUN: llc < %s -mtriple=armv7-apple-darwin | FileCheck %s
 ; FIXME: Need post-ifcvt branch folding to get rid of the extra br at end of BB1.
 
 	%struct.quad_struct = type { i32, i32, %struct.quad_struct*, %struct.quad_struct*, %struct.quad_struct*, %struct.quad_struct*, %struct.quad_struct* }
 
 define fastcc i32 @CountTree(%struct.quad_struct* %tree) {
+; CHECK: cmpeq
+; CHECK: moveq
+; CHECK: ldmiaeq sp!
 entry:
 	br label %tailrecurse
 
diff --git a/test/CodeGen/ARM/ifcvt8.ll b/test/CodeGen/ARM/ifcvt8.ll
index 1e39060e69f2..5fdfc4ea6805 100644
--- a/test/CodeGen/ARM/ifcvt8.ll
+++ b/test/CodeGen/ARM/ifcvt8.ll
@@ -1,11 +1,11 @@
-; RUN: llc < %s -march=arm -mtriple=arm-apple-darwin | \
-; RUN:   grep ldmiane | count 1
+; RUN: llc < %s -mtriple=armv7-apple-darwin | FileCheck %s
 
 	%struct.SString = type { i8*, i32, i32 }
 
 declare void @abort()
 
 define fastcc void @t(%struct.SString* %word, i8 signext  %c) {
+; CHECK: ldmiane sp!
 entry:
 	%tmp1 = icmp eq %struct.SString* %word, null		; <i1> [#uses=1]
 	br i1 %tmp1, label %cond_true, label %cond_false
diff --git a/test/CodeGen/ARM/inlineasm3.ll b/test/CodeGen/ARM/inlineasm3.ll
index 687e138c1b4e..9f77ad1f794c 100644
--- a/test/CodeGen/ARM/inlineasm3.ll
+++ b/test/CodeGen/ARM/inlineasm3.ll
@@ -7,7 +7,7 @@ define void @t() nounwind {
 entry:
 ; CHECK: vmov.I64 q15, #0
 ; CHECK: vmov.32 d30[0], r0
-; CHECK: vmov q0, q15
+; CHECK: vmov q8, q15
   %tmp = alloca %struct.int32x4_t, align 16
   call void asm sideeffect "vmov.I64 q15, #0\0Avmov.32 d30[0], $1\0Avmov ${0:q}, q15\0A", "=*w,r,~{d31},~{d30}"(%struct.int32x4_t* %tmp, i32 8192) nounwind
   ret void
@@ -18,7 +18,7 @@ entry:
 
 define void @t2() nounwind {
 entry:
-; CHECK: vmov d30, d0
+; CHECK: vmov d30, d16
 ; CHECK: vmov.32 r0, d30[0]
   %asmtmp2 = tail call i32 asm sideeffect "vmov d30, $1\0Avmov.32 $0, d30[0]\0A", "=r,w,~{d30}"(<2 x i32> undef) nounwind
   ret void
diff --git a/test/CodeGen/ARM/ispositive.ll b/test/CodeGen/ARM/ispositive.ll
index 245ed516f70b..2f1a2cfd7786 100644
--- a/test/CodeGen/ARM/ispositive.ll
+++ b/test/CodeGen/ARM/ispositive.ll
@@ -1,7 +1,7 @@
 ; RUN: llc < %s -march=arm | FileCheck %s
 
 define i32 @test1(i32 %X) {
-; CHECK: mov r0, r0, lsr #31
+; CHECK: lsr{{.*}}#31
 entry:
         icmp slt i32 %X, 0              ; <i1>:0 [#uses=1]
         zext i1 %0 to i32               ; <i32>:1 [#uses=1]
diff --git a/test/CodeGen/ARM/ldm.ll b/test/CodeGen/ARM/ldm.ll
index 78201a6b341a..2f1b85ebbb04 100644
--- a/test/CodeGen/ARM/ldm.ll
+++ b/test/CodeGen/ARM/ldm.ll
@@ -1,10 +1,13 @@
-; RUN: llc < %s -mtriple=arm-apple-darwin | FileCheck %s
+; RUN: llc < %s -mtriple=armv7-apple-darwin | FileCheck %s
+; RUN: llc < %s -mtriple=armv4t-apple-darwin | FileCheck %s -check-prefix=V4T
 
 @X = external global [0 x i32]          ; <[0 x i32]*> [#uses=5]
 
 define i32 @t1() {
 ; CHECK: t1:
 ; CHECK: ldmia
+; V4T: t1:
+; V4T: ldmia
         %tmp = load i32* getelementptr ([0 x i32]* @X, i32 0, i32 0)            ; <i32> [#uses=1]
         %tmp3 = load i32* getelementptr ([0 x i32]* @X, i32 0, i32 1)           ; <i32> [#uses=1]
         %tmp4 = tail call i32 @f1( i32 %tmp, i32 %tmp3 )                ; <i32> [#uses=1]
@@ -14,6 +17,8 @@ define i32 @t1() {
 define i32 @t2() {
 ; CHECK: t2:
 ; CHECK: ldmia
+; V4T: t2:
+; V4T: ldmia
         %tmp = load i32* getelementptr ([0 x i32]* @X, i32 0, i32 2)            ; <i32> [#uses=1]
         %tmp3 = load i32* getelementptr ([0 x i32]* @X, i32 0, i32 3)           ; <i32> [#uses=1]
         %tmp5 = load i32* getelementptr ([0 x i32]* @X, i32 0, i32 4)           ; <i32> [#uses=1]
@@ -25,6 +30,10 @@ define i32 @t3() {
 ; CHECK: t3:
 ; CHECK: ldmib
 ; CHECK: ldmia sp!
+; V4T: t3:
+; V4T: ldmib
+; V4T: pop
+; V4T-NEXT: bx lr
         %tmp = load i32* getelementptr ([0 x i32]* @X, i32 0, i32 1)            ; <i32> [#uses=1]
         %tmp3 = load i32* getelementptr ([0 x i32]* @X, i32 0, i32 2)           ; <i32> [#uses=1]
         %tmp5 = load i32* getelementptr ([0 x i32]* @X, i32 0, i32 3)           ; <i32> [#uses=1]
diff --git a/test/CodeGen/ARM/ldst-f32-2-i32.ll b/test/CodeGen/ARM/ldst-f32-2-i32.ll
new file mode 100644
index 000000000000..2d016f6cd423
--- /dev/null
+++ b/test/CodeGen/ARM/ldst-f32-2-i32.ll
@@ -0,0 +1,28 @@
+; RUN: llc < %s -mtriple=armv7-apple-darwin -mcpu=cortex-a8 | FileCheck %s
+; Check if the f32 load / store pair are optimized to i32 load / store.
+; rdar://8944252
+
+define void @t(i32 %width, float* nocapture %src, float* nocapture %dst, i32 %index) nounwind {
+; CHECK: t:
+entry:
+  %src6 = bitcast float* %src to i8*
+  %0 = icmp eq i32 %width, 0
+  br i1 %0, label %return, label %bb
+
+bb:
+; CHECK: ldr [[REGISTER:(r[0-9]+)]], [r1], r3
+; CHECK: str [[REGISTER]], [r2], #4
+  %j.05 = phi i32 [ %2, %bb ], [ 0, %entry ]
+  %tmp = mul i32 %j.05, %index
+  %uglygep = getelementptr i8* %src6, i32 %tmp
+  %src_addr.04 = bitcast i8* %uglygep to float*
+  %dst_addr.03 = getelementptr float* %dst, i32 %j.05
+  %1 = load float* %src_addr.04, align 4
+  store float %1, float* %dst_addr.03, align 4
+  %2 = add i32 %j.05, 1
+  %exitcond = icmp eq i32 %2, %width
+  br i1 %exitcond, label %return, label %bb
+
+return:
+  ret void
+}
diff --git a/test/CodeGen/ARM/load-global.ll b/test/CodeGen/ARM/load-global.ll
new file mode 100644
index 000000000000..15a415df731d
--- /dev/null
+++ b/test/CodeGen/ARM/load-global.ll
@@ -0,0 +1,50 @@
+; RUN: llc < %s -mtriple=armv6-apple-darwin -relocation-model=static | FileCheck %s -check-prefix=STATIC
+; RUN: llc < %s -mtriple=armv6-apple-darwin -relocation-model=dynamic-no-pic | FileCheck %s -check-prefix=DYNAMIC
+; RUN: llc < %s -mtriple=armv6-apple-darwin -relocation-model=pic | FileCheck %s -check-prefix=PIC
+; RUN: llc < %s -mtriple=thumbv6-apple-darwin -relocation-model=pic | FileCheck %s -check-prefix=PIC_T
+; RUN: llc < %s -mtriple=armv7-apple-darwin -relocation-model=pic | FileCheck %s -check-prefix=PIC_V7
+; RUN: llc < %s -mtriple=armv6-linux-gnueabi -relocation-model=pic | FileCheck %s -check-prefix=LINUX
+
+@G = external global i32
+
+define i32 @test1() {
+; STATIC: _test1:
+; STATIC: ldr r0, LCPI0_0
+; STATIC: ldr r0, [r0]
+; STATIC: .long _G
+
+; DYNAMIC: _test1:
+; DYNAMIC: ldr r0, LCPI0_0
+; DYNAMIC: ldr r0, [r0]
+; DYNAMIC: ldr r0, [r0]
+; DYNAMIC: .long L_G$non_lazy_ptr
+
+; PIC: _test1
+; PIC: ldr r0, LCPI0_0
+; PIC: ldr r0, [pc, r0]
+; PIC: ldr r0, [r0]
+; PIC: .long L_G$non_lazy_ptr-(LPC0_0+8)
+
+; PIC_T: _test1
+; PIC_T: ldr.n r0, LCPI0_0
+; PIC_T: add r0, pc
+; PIC_T: ldr r0, [r0]
+; PIC_T: ldr r0, [r0]
+; PIC_T: .long L_G$non_lazy_ptr-(LPC0_0+4)
+
+; PIC_V7: _test1
+; PIC_V7: movw r0, :lower16:(L_G$non_lazy_ptr-(LPC0_0+8))
+; PIC_V7: movt r0, :upper16:(L_G$non_lazy_ptr-(LPC0_0+8))
+; PIC_V7: ldr r0, [pc, r0]
+; PIC_V7: ldr r0, [r0]
+
+; LINUX: test1
+; LINUX: ldr r0, .LCPI0_0
+; LINUX: ldr r1, .LCPI0_1
+; LINUX: add r0, pc, r0
+; LINUX: ldr r0, [r1, r0]
+; LINUX: ldr r0, [r0]
+; LINUX: .long G(GOT)
+	%tmp = load i32* @G
+	ret i32 %tmp
+}
diff --git a/test/CodeGen/ARM/long.ll b/test/CodeGen/ARM/long.ll
index 16ef7cc2cb6c..74f8d783377d 100644
--- a/test/CodeGen/ARM/long.ll
+++ b/test/CodeGen/ARM/long.ll
@@ -14,22 +14,22 @@ entry:
 
 define i64 @f3() {
 ; CHECK: f3:
-; CHECK: mvn{{.*}}-2147483648
+; CHECK: mvn r0, #2, 2
 entry:
         ret i64 2147483647
 }
 
 define i64 @f4() {
 ; CHECK: f4:
-; CHECK: -2147483648
+; CHECK: mov r0, #2, 2
 entry:
         ret i64 2147483648
 }
 
 define i64 @f5() {
 ; CHECK: f5:
-; CHECK: mvn
-; CHECK: mvn{{.*}}-2147483648
+; CHECK: mvn r0, #0
+; CHECK: mvn r1, #2, 2
 entry:
         ret i64 9223372036854775807
 }
diff --git a/test/CodeGen/ARM/long_shift.ll b/test/CodeGen/ARM/long_shift.ll
index 1ec4d15f6672..5e4f5730f8d2 100644
--- a/test/CodeGen/ARM/long_shift.ll
+++ b/test/CodeGen/ARM/long_shift.ll
@@ -2,8 +2,8 @@
 
 define i64 @f0(i64 %A, i64 %B) {
 ; CHECK: f0
-; CHECK:      movs    r3, r3, lsr #1
-; CHECK-NEXT: mov     r2, r2, rrx
+; CHECK:      lsrs    r3, r3, #1
+; CHECK-NEXT: rrx     r2, r2
 ; CHECK-NEXT: subs    r0, r0, r2
 ; CHECK-NEXT: sbc     r1, r1, r3
 	%tmp = bitcast i64 %A to i64
@@ -14,7 +14,7 @@ define i64 @f0(i64 %A, i64 %B) {
 
 define i32 @f1(i64 %x, i64 %y) {
 ; CHECK: f1
-; CHECK: mov r0, r0, lsl r2
+; CHECK: lsl{{.*}}r2
 	%a = shl i64 %x, %y
 	%b = trunc i64 %a to i32
 	ret i32 %b
@@ -22,7 +22,7 @@ define i32 @f1(i64 %x, i64 %y) {
 
 define i32 @f2(i64 %x, i64 %y) {
 ; CHECK: f2
-; CHECK:      mov     r0, r0, lsr r2
+; CHECK:      lsr{{.*}}r2
 ; CHECK-NEXT: rsb     r3, r2, #32
 ; CHECK-NEXT: subs    r2, r2, #32
 ; CHECK-NEXT: orr     r0, r0, r1, lsl r3
@@ -34,7 +34,7 @@ define i32 @f2(i64 %x, i64 %y) {
 
 define i32 @f3(i64 %x, i64 %y) {
 ; CHECK: f3
-; CHECK:      mov     r0, r0, lsr r2
+; CHECK:      lsr{{.*}}r2
 ; CHECK-NEXT: rsb     r3, r2, #32
 ; CHECK-NEXT: subs    r2, r2, #32
 ; CHECK-NEXT: orr     r0, r0, r1, lsl r3
diff --git a/test/CodeGen/ARM/lsr-code-insertion.ll b/test/CodeGen/ARM/lsr-code-insertion.ll
index b8c543b1bd18..1bbb96deeefe 100644
--- a/test/CodeGen/ARM/lsr-code-insertion.ll
+++ b/test/CodeGen/ARM/lsr-code-insertion.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -stats |& grep {38.*Number of machine instrs printed}
+; RUN: llc < %s -stats |& grep {39.*Number of machine instrs printed}
 ; RUN: llc < %s -stats |& not grep {.*Number of re-materialization}
 ; This test really wants to check that the resultant "cond_true" block only 
 ; has a single store in it, and that cond_true55 only has code to materialize 
diff --git a/test/CodeGen/ARM/lsr-on-unrolled-loops.ll b/test/CodeGen/ARM/lsr-on-unrolled-loops.ll
index 866be423c2cb..9882690da268 100644
--- a/test/CodeGen/ARM/lsr-on-unrolled-loops.ll
+++ b/test/CodeGen/ARM/lsr-on-unrolled-loops.ll
@@ -4,14 +4,14 @@
 ; constant offset addressing, so that each of the following stores
 ; uses the same register.
 
-; CHECK: vstr.32 s{{.*}}, [r{{.*}}, #-128]
-; CHECK: vstr.32 s{{.*}}, [r{{.*}}, #-96]
-; CHECK: vstr.32 s{{.*}}, [r{{.*}}, #-64]
-; CHECK: vstr.32 s{{.*}}, [r{{.*}}, #-32]
-; CHECK: vstr.32 s{{.*}}, [r{{.*}}]
-; CHECK: vstr.32 s{{.*}}, [r{{.*}}, #32]
-; CHECK: vstr.32 s{{.*}}, [r{{.*}}, #64]
-; CHECK: vstr.32 s{{.*}}, [r{{.*}}, #96]
+; CHECK: vstr.32 s{{.*}}, [{{(r[0-9]+)|(lr)}}, #-128]
+; CHECK: vstr.32 s{{.*}}, [{{(r[0-9]+)|(lr)}}, #-96]
+; CHECK: vstr.32 s{{.*}}, [{{(r[0-9]+)|(lr)}}, #-64]
+; CHECK: vstr.32 s{{.*}}, [{{(r[0-9]+)|(lr)}}, #-32]
+; CHECK: vstr.32 s{{.*}}, [{{(r[0-9]+)|(lr)}}]
+; CHECK: vstr.32 s{{.*}}, [{{(r[0-9]+)|(lr)}}, #32]
+; CHECK: vstr.32 s{{.*}}, [{{(r[0-9]+)|(lr)}}, #64]
+; CHECK: vstr.32 s{{.*}}, [{{(r[0-9]+)|(lr)}}, #96]
 
 target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:32-f32:32:32-f64:32:32-v64:64:64-v128:128:128-a0:0:32-n32"
 
@@ -624,12 +624,11 @@ bb23:                                             ; preds = %bb22, %bb20, %bb9,
 bb24:                                             ; preds = %bb23
 
 ; LSR should use count-down iteration to avoid requiring the trip count
-; in a register, and it shouldn't require any reloads here.
+; in a register.
 
 ;      CHECK: @ %bb24
-; CHECK-NEXT: @   in Loop: Header=BB1_1 Depth=1
-; CHECK-NEXT: sub{{.*}} [[REGISTER:(r[0-9]+)|(lr)]], #1
-; CHECK-NEXT: bne.w
+; CHECK: subs{{.*}} {{(r[0-9]+)|(lr)}}, #1
+; CHECK: bne.w
 
   %92 = icmp eq i32 %tmp81, %indvar78             ; <i1> [#uses=1]
   %indvar.next79 = add i32 %indvar78, 1           ; <i32> [#uses=1]
diff --git a/test/CodeGen/Thumb/machine-licm.ll b/test/CodeGen/ARM/machine-licm.ll
index a87e82c21dd7..8656c5bbd72c 100644
--- a/test/CodeGen/Thumb/machine-licm.ll
+++ b/test/CodeGen/ARM/machine-licm.ll
@@ -1,6 +1,9 @@
-; RUN: llc < %s -mtriple=thumb-apple-darwin -relocation-model=pic -disable-fp-elim | FileCheck %s
+; RUN: llc < %s -mtriple=thumb-apple-darwin -relocation-model=pic -disable-fp-elim | FileCheck %s -check-prefix=THUMB
+; RUN: llc < %s -mtriple=arm-apple-darwin -relocation-model=pic -disable-fp-elim   | FileCheck %s -check-prefix=ARM
+; RUN: llc < %s -mtriple=arm-apple-darwin -relocation-model=pic -disable-fp-elim -mattr=+v6t2 | FileCheck %s -check-prefix=MOVT
 ; rdar://7353541
 ; rdar://7354376
+; rdar://8887598
 
 ; The generated code is no where near ideal. It's not recognizing the two
 ; constantpool entries being loaded can be merged into one.
@@ -9,19 +12,41 @@
 
 define void @t(i32* nocapture %vals, i32 %c) nounwind {
 entry:
-; CHECK: t:
+; ARM: t:
+; ARM: ldr [[REGISTER_1:r[0-9]+]], LCPI0_0
+; Unfortunately currently ARM codegen doesn't cse the ldr from constantpool.
+; The issue is it can be read by an "add pc" or a "ldr [pc]" so it's messy
+; to add the pseudo instructions to make sure they are CSE'ed at the same
+; time as the "ldr cp".
+; ARM: ldr r{{[0-9]+}}, LCPI0_1
+; ARM: LPC0_0:
+; ARM: ldr r{{[0-9]+}}, [pc, [[REGISTER_1]]]
+; ARM: ldr r{{[0-9]+}}, [r{{[0-9]+}}]
+
+; MOVT: t:
+; MOVT: movw [[REGISTER_2:r[0-9]+]], :lower16:(L_GV$non_lazy_ptr-(LPC0_0+8))
+; MOVT: movt [[REGISTER_2]], :upper16:(L_GV$non_lazy_ptr-(LPC0_0+8))
+; MOVT: LPC0_0:
+; MOVT: ldr r{{[0-9]+}}, [pc, [[REGISTER_2]]]
+; MOVT: ldr r{{[0-9]+}}, [r{{[0-9]+}}]
+
+; THUMB: t:
   %0 = icmp eq i32 %c, 0                          ; <i1> [#uses=1]
   br i1 %0, label %return, label %bb.nph
 
 bb.nph:                                           ; preds = %entry
-; CHECK: BB#1
-; CHECK: ldr.n r2, LCPI0_0
-; CHECK: add r2, pc
-; CHECK: ldr r{{[0-9]+}}, [r2]
-; CHECK: LBB0_2
-; CHECK: LCPI0_0:
-; CHECK-NOT: LCPI0_1:
-; CHECK: .section
+; ARM: LCPI0_0:
+; ARM: LCPI0_1:
+; ARM: .section
+
+; THUMB: BB#1
+; THUMB: ldr.n r2, LCPI0_0
+; THUMB: add r2, pc
+; THUMB: ldr r{{[0-9]+}}, [r2]
+; THUMB: LBB0_2
+; THUMB: LCPI0_0:
+; THUMB-NOT: LCPI0_1:
+; THUMB: .section
   %.pre = load i32* @GV, align 4                  ; <i32> [#uses=1]
   br label %bb
 
diff --git a/test/CodeGen/ARM/mul_const.ll b/test/CodeGen/ARM/mul_const.ll
index 8c102464612c..3cb8a8e816f6 100644
--- a/test/CodeGen/ARM/mul_const.ll
+++ b/test/CodeGen/ARM/mul_const.ll
@@ -36,7 +36,7 @@ define i32 @t12288(i32 %v) nounwind readnone {
 entry:
 ; CHECK: t12288:
 ; CHECK: add r0, r0, r0, lsl #1
-; CHECK: mov     r0, r0, lsl #12
+; CHECK: lsl{{.*}}#12
         %0 = mul i32 %v, 12288
         ret i32 %0
 }
diff --git a/test/CodeGen/ARM/mult-alt-generic-arm.ll b/test/CodeGen/ARM/mult-alt-generic-arm.ll
new file mode 100644
index 000000000000..a8104db337f5
--- /dev/null
+++ b/test/CodeGen/ARM/mult-alt-generic-arm.ll
@@ -0,0 +1,323 @@
+; RUN: llc < %s -march=arm
+; ModuleID = 'mult-alt-generic.c'
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-n32"
+target triple = "arm"
+
+@mout0 = common global i32 0, align 4
+@min1 = common global i32 0, align 4
+@marray = common global [2 x i32] zeroinitializer, align 4
+
+define arm_aapcscc void @single_m() nounwind {
+entry:
+  call void asm "foo $1,$0", "=*m,*m"(i32* @mout0, i32* @min1) nounwind
+  ret void
+}
+
+define arm_aapcscc void @single_o() nounwind {
+entry:
+  %out0 = alloca i32, align 4
+  %index = alloca i32, align 4
+  store i32 0, i32* %out0, align 4
+  store i32 1, i32* %index, align 4
+  ret void
+}
+
+define arm_aapcscc void @single_V() nounwind {
+entry:
+  ret void
+}
+
+define arm_aapcscc void @single_lt() nounwind {
+entry:
+  %out0 = alloca i32, align 4
+  %in1 = alloca i32, align 4
+  store i32 0, i32* %out0, align 4
+  store i32 1, i32* %in1, align 4
+  %tmp = load i32* %in1, align 4
+  %0 = call i32 asm "foo $1,$0", "=r,<r"(i32 %tmp) nounwind
+  store i32 %0, i32* %out0, align 4
+  %tmp1 = load i32* %in1, align 4
+  %1 = call i32 asm "foo $1,$0", "=r,r<"(i32 %tmp1) nounwind
+  store i32 %1, i32* %out0, align 4
+  ret void
+}
+
+define arm_aapcscc void @single_gt() nounwind {
+entry:
+  %out0 = alloca i32, align 4
+  %in1 = alloca i32, align 4
+  store i32 0, i32* %out0, align 4
+  store i32 1, i32* %in1, align 4
+  %tmp = load i32* %in1, align 4
+  %0 = call i32 asm "foo $1,$0", "=r,>r"(i32 %tmp) nounwind
+  store i32 %0, i32* %out0, align 4
+  %tmp1 = load i32* %in1, align 4
+  %1 = call i32 asm "foo $1,$0", "=r,r>"(i32 %tmp1) nounwind
+  store i32 %1, i32* %out0, align 4
+  ret void
+}
+
+define arm_aapcscc void @single_r() nounwind {
+entry:
+  %out0 = alloca i32, align 4
+  %in1 = alloca i32, align 4
+  store i32 0, i32* %out0, align 4
+  store i32 1, i32* %in1, align 4
+  %tmp = load i32* %in1, align 4
+  %0 = call i32 asm "foo $1,$0", "=r,r"(i32 %tmp) nounwind
+  store i32 %0, i32* %out0, align 4
+  ret void
+}
+
+define arm_aapcscc void @single_i() nounwind {
+entry:
+  %out0 = alloca i32, align 4
+  store i32 0, i32* %out0, align 4
+  %0 = call i32 asm "foo $1,$0", "=r,i"(i32 1) nounwind
+  store i32 %0, i32* %out0, align 4
+  ret void
+}
+
+define arm_aapcscc void @single_n() nounwind {
+entry:
+  %out0 = alloca i32, align 4
+  store i32 0, i32* %out0, align 4
+  %0 = call i32 asm "foo $1,$0", "=r,n"(i32 1) nounwind
+  store i32 %0, i32* %out0, align 4
+  ret void
+}
+
+define arm_aapcscc void @single_E() nounwind {
+entry:
+  %out0 = alloca double, align 8
+  store double 0.000000e+000, double* %out0, align 8
+; No lowering support.
+;  %0 = call double asm "foo $1,$0", "=r,E"(double 1.000000e+001) nounwind
+;  store double %0, double* %out0, align 8
+  ret void
+}
+
+define arm_aapcscc void @single_F() nounwind {
+entry:
+  %out0 = alloca double, align 8
+  store double 0.000000e+000, double* %out0, align 8
+; No lowering support.
+;  %0 = call double asm "foo $1,$0", "=r,F"(double 1.000000e+000) nounwind
+;  store double %0, double* %out0, align 8
+  ret void
+}
+
+define arm_aapcscc void @single_s() nounwind {
+entry:
+  %out0 = alloca i32, align 4
+  store i32 0, i32* %out0, align 4
+  ret void
+}
+
+define arm_aapcscc void @single_g() nounwind {
+entry:
+  %out0 = alloca i32, align 4
+  %in1 = alloca i32, align 4
+  store i32 0, i32* %out0, align 4
+  store i32 1, i32* %in1, align 4
+  %tmp = load i32* %in1, align 4
+  %0 = call i32 asm "foo $1,$0", "=r,imr"(i32 %tmp) nounwind
+  store i32 %0, i32* %out0, align 4
+  %tmp1 = load i32* @min1, align 4
+  %1 = call i32 asm "foo $1,$0", "=r,imr"(i32 %tmp1) nounwind
+  store i32 %1, i32* %out0, align 4
+  %2 = call i32 asm "foo $1,$0", "=r,imr"(i32 1) nounwind
+  store i32 %2, i32* %out0, align 4
+  ret void
+}
+
+define arm_aapcscc void @single_X() nounwind {
+entry:
+  %out0 = alloca i32, align 4
+  %in1 = alloca i32, align 4
+  store i32 0, i32* %out0, align 4
+  store i32 1, i32* %in1, align 4
+  %tmp = load i32* %in1, align 4
+  %0 = call i32 asm "foo $1,$0", "=r,X"(i32 %tmp) nounwind
+  store i32 %0, i32* %out0, align 4
+  %tmp1 = load i32* @min1, align 4
+  %1 = call i32 asm "foo $1,$0", "=r,X"(i32 %tmp1) nounwind
+  store i32 %1, i32* %out0, align 4
+  %2 = call i32 asm "foo $1,$0", "=r,X"(i32 1) nounwind
+  store i32 %2, i32* %out0, align 4
+  %3 = call i32 asm "foo $1,$0", "=r,X"(i32* getelementptr inbounds ([2 x i32]* @marray, i32 0, i32 0)) nounwind
+  store i32 %3, i32* %out0, align 4
+; No lowering support.
+;  %4 = call i32 asm "foo $1,$0", "=r,X"(double 1.000000e+001) nounwind
+;  store i32 %4, i32* %out0, align 4
+;  %5 = call i32 asm "foo $1,$0", "=r,X"(double 1.000000e+000) nounwind
+;  store i32 %5, i32* %out0, align 4
+  ret void
+}
+
+define arm_aapcscc void @single_p() nounwind {
+entry:
+  %out0 = alloca i32, align 4
+  store i32 0, i32* %out0, align 4
+  %0 = call i32 asm "foo $1,$0", "=r,r"(i32* getelementptr inbounds ([2 x i32]* @marray, i32 0, i32 0)) nounwind
+  store i32 %0, i32* %out0, align 4
+  ret void
+}
+
+define arm_aapcscc void @multi_m() nounwind {
+entry:
+  %tmp = load i32* @min1, align 4
+  call void asm "foo $1,$0", "=*m|r,m|r"(i32* @mout0, i32 %tmp) nounwind
+  ret void
+}
+
+define arm_aapcscc void @multi_o() nounwind {
+entry:
+  %out0 = alloca i32, align 4
+  %index = alloca i32, align 4
+  store i32 0, i32* %out0, align 4
+  store i32 1, i32* %index, align 4
+  ret void
+}
+
+define arm_aapcscc void @multi_V() nounwind {
+entry:
+  ret void
+}
+
+define arm_aapcscc void @multi_lt() nounwind {
+entry:
+  %out0 = alloca i32, align 4
+  %in1 = alloca i32, align 4
+  store i32 0, i32* %out0, align 4
+  store i32 1, i32* %in1, align 4
+  %tmp = load i32* %in1, align 4
+  %0 = call i32 asm "foo $1,$0", "=r|r,r|<r"(i32 %tmp) nounwind
+  store i32 %0, i32* %out0, align 4
+  %tmp1 = load i32* %in1, align 4
+  %1 = call i32 asm "foo $1,$0", "=r|r,r|r<"(i32 %tmp1) nounwind
+  store i32 %1, i32* %out0, align 4
+  ret void
+}
+
+define arm_aapcscc void @multi_gt() nounwind {
+entry:
+  %out0 = alloca i32, align 4
+  %in1 = alloca i32, align 4
+  store i32 0, i32* %out0, align 4
+  store i32 1, i32* %in1, align 4
+  %tmp = load i32* %in1, align 4
+  %0 = call i32 asm "foo $1,$0", "=r|r,r|>r"(i32 %tmp) nounwind
+  store i32 %0, i32* %out0, align 4
+  %tmp1 = load i32* %in1, align 4
+  %1 = call i32 asm "foo $1,$0", "=r|r,r|r>"(i32 %tmp1) nounwind
+  store i32 %1, i32* %out0, align 4
+  ret void
+}
+
+define arm_aapcscc void @multi_r() nounwind {
+entry:
+  %out0 = alloca i32, align 4
+  %in1 = alloca i32, align 4
+  store i32 0, i32* %out0, align 4
+  store i32 1, i32* %in1, align 4
+  %tmp = load i32* %in1, align 4
+  %0 = call i32 asm "foo $1,$0", "=r|r,r|m"(i32 %tmp) nounwind
+  store i32 %0, i32* %out0, align 4
+  ret void
+}
+
+define arm_aapcscc void @multi_i() nounwind {
+entry:
+  %out0 = alloca i32, align 4
+  store i32 0, i32* %out0, align 4
+  %0 = call i32 asm "foo $1,$0", "=r|r,r|i"(i32 1) nounwind
+  store i32 %0, i32* %out0, align 4
+  ret void
+}
+
+define arm_aapcscc void @multi_n() nounwind {
+entry:
+  %out0 = alloca i32, align 4
+  store i32 0, i32* %out0, align 4
+  %0 = call i32 asm "foo $1,$0", "=r|r,r|n"(i32 1) nounwind
+  store i32 %0, i32* %out0, align 4
+  ret void
+}
+
+define arm_aapcscc void @multi_E() nounwind {
+entry:
+  %out0 = alloca double, align 8
+  store double 0.000000e+000, double* %out0, align 8
+; No lowering support.
+;  %0 = call double asm "foo $1,$0", "=r|r,r|E"(double 1.000000e+001) nounwind
+;  store double %0, double* %out0, align 8
+  ret void
+}
+
+define arm_aapcscc void @multi_F() nounwind {
+entry:
+  %out0 = alloca double, align 8
+  store double 0.000000e+000, double* %out0, align 8
+; No lowering support.
+;  %0 = call double asm "foo $1,$0", "=r|r,r|F"(double 1.000000e+000) nounwind
+;  store double %0, double* %out0, align 8
+  ret void
+}
+
+define arm_aapcscc void @multi_s() nounwind {
+entry:
+  %out0 = alloca i32, align 4
+  store i32 0, i32* %out0, align 4
+  ret void
+}
+
+define arm_aapcscc void @multi_g() nounwind {
+entry:
+  %out0 = alloca i32, align 4
+  %in1 = alloca i32, align 4
+  store i32 0, i32* %out0, align 4
+  store i32 1, i32* %in1, align 4
+  %tmp = load i32* %in1, align 4
+  %0 = call i32 asm "foo $1,$0", "=r|r,r|imr"(i32 %tmp) nounwind
+  store i32 %0, i32* %out0, align 4
+  %tmp1 = load i32* @min1, align 4
+  %1 = call i32 asm "foo $1,$0", "=r|r,r|imr"(i32 %tmp1) nounwind
+  store i32 %1, i32* %out0, align 4
+  %2 = call i32 asm "foo $1,$0", "=r|r,r|imr"(i32 1) nounwind
+  store i32 %2, i32* %out0, align 4
+  ret void
+}
+
+define arm_aapcscc void @multi_X() nounwind {
+entry:
+  %out0 = alloca i32, align 4
+  %in1 = alloca i32, align 4
+  store i32 0, i32* %out0, align 4
+  store i32 1, i32* %in1, align 4
+  %tmp = load i32* %in1, align 4
+  %0 = call i32 asm "foo $1,$0", "=r|r,r|X"(i32 %tmp) nounwind
+  store i32 %0, i32* %out0, align 4
+  %tmp1 = load i32* @min1, align 4
+  %1 = call i32 asm "foo $1,$0", "=r|r,r|X"(i32 %tmp1) nounwind
+  store i32 %1, i32* %out0, align 4
+  %2 = call i32 asm "foo $1,$0", "=r|r,r|X"(i32 1) nounwind
+  store i32 %2, i32* %out0, align 4
+  %3 = call i32 asm "foo $1,$0", "=r|r,r|X"(i32* getelementptr inbounds ([2 x i32]* @marray, i32 0, i32 0)) nounwind
+  store i32 %3, i32* %out0, align 4
+; No lowering support.
+;  %4 = call i32 asm "foo $1,$0", "=r|r,r|X"(double 1.000000e+001) nounwind
+;  store i32 %4, i32* %out0, align 4
+;  %5 = call i32 asm "foo $1,$0", "=r|r,r|X"(double 1.000000e+000) nounwind
+;  store i32 %5, i32* %out0, align 4
+  ret void
+}
+
+define arm_aapcscc void @multi_p() nounwind {
+entry:
+  %out0 = alloca i32, align 4
+  store i32 0, i32* %out0, align 4
+  %0 = call i32 asm "foo $1,$0", "=r|r,r|r"(i32* getelementptr inbounds ([2 x i32]* @marray, i32 0, i32 0)) nounwind
+  store i32 %0, i32* %out0, align 4
+  ret void
+}
diff --git a/test/CodeGen/ARM/neon_div.ll b/test/CodeGen/ARM/neon_div.ll
new file mode 100644
index 000000000000..e33797079093
--- /dev/null
+++ b/test/CodeGen/ARM/neon_div.ll
@@ -0,0 +1,48 @@
+; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s
+
+define <8 x i8> @sdivi8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
+;CHECK: vrecpe.f32
+;CHECK: vrecpe.f32
+;CHECK: vmovn.i32
+;CHECK: vmovn.i32
+;CHECK: vmovn.i16
+	%tmp1 = load <8 x i8>* %A
+	%tmp2 = load <8 x i8>* %B
+	%tmp3 = sdiv <8 x i8> %tmp1, %tmp2
+	ret <8 x i8> %tmp3
+}
+
+define <8 x i8> @udivi8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
+;CHECK: vrecpe.f32
+;CHECK: vrecps.f32
+;CHECK: vrecpe.f32
+;CHECK: vrecps.f32
+;CHECK: vmovn.i32
+;CHECK: vmovn.i32
+;CHECK: vqmovun.s16
+	%tmp1 = load <8 x i8>* %A
+	%tmp2 = load <8 x i8>* %B
+	%tmp3 = udiv <8 x i8> %tmp1, %tmp2
+	ret <8 x i8> %tmp3
+}
+
+define <4 x i16> @sdivi16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
+;CHECK: vrecpe.f32
+;CHECK: vrecps.f32
+;CHECK: vmovn.i32
+	%tmp1 = load <4 x i16>* %A
+	%tmp2 = load <4 x i16>* %B
+	%tmp3 = sdiv <4 x i16> %tmp1, %tmp2
+	ret <4 x i16> %tmp3
+}
+
+define <4 x i16> @udivi16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
+;CHECK: vrecpe.f32
+;CHECK: vrecps.f32
+;CHECK: vrecps.f32
+;CHECK: vmovn.i32
+	%tmp1 = load <4 x i16>* %A
+	%tmp2 = load <4 x i16>* %B
+	%tmp3 = udiv <4 x i16> %tmp1, %tmp2
+	ret <4 x i16> %tmp3
+}
diff --git a/test/CodeGen/ARM/pack.ll b/test/CodeGen/ARM/pack.ll
index 4905dc28cf48..90151767b919 100644
--- a/test/CodeGen/ARM/pack.ll
+++ b/test/CodeGen/ARM/pack.ll
@@ -3,87 +3,78 @@
 ; CHECK: test1
 ; CHECK: pkhbt   r0, r0, r1, lsl #16
 define i32 @test1(i32 %X, i32 %Y) {
-	%tmp1 = and i32 %X, 65535		; <i32> [#uses=1]
-	%tmp4 = shl i32 %Y, 16		; <i32> [#uses=1]
-	%tmp5 = or i32 %tmp4, %tmp1		; <i32> [#uses=1]
-	ret i32 %tmp5
-}
-
-; CHECK: test1a
-; CHECK: pkhbt   r0, r0, r1, lsl #16
-define i32 @test1a(i32 %X, i32 %Y) {
-	%tmp19 = and i32 %X, 65535		; <i32> [#uses=1]
-	%tmp37 = shl i32 %Y, 16		; <i32> [#uses=1]
-	%tmp5 = or i32 %tmp37, %tmp19		; <i32> [#uses=1]
+	%tmp1 = and i32 %X, 65535
+	%tmp4 = shl i32 %Y, 16
+	%tmp5 = or i32 %tmp4, %tmp1
 	ret i32 %tmp5
 }
 
 ; CHECK: test2
 ; CHECK: pkhbt   r0, r0, r1, lsl #12
 define i32 @test2(i32 %X, i32 %Y) {
-	%tmp1 = and i32 %X, 65535		; <i32> [#uses=1]
-	%tmp3 = shl i32 %Y, 12		; <i32> [#uses=1]
-	%tmp4 = and i32 %tmp3, -65536		; <i32> [#uses=1]
-	%tmp57 = or i32 %tmp4, %tmp1		; <i32> [#uses=1]
+	%tmp1 = and i32 %X, 65535
+	%tmp3 = shl i32 %Y, 12
+	%tmp4 = and i32 %tmp3, -65536
+	%tmp57 = or i32 %tmp4, %tmp1
 	ret i32 %tmp57
 }
 
 ; CHECK: test3
 ; CHECK: pkhbt   r0, r0, r1, lsl #18
 define i32 @test3(i32 %X, i32 %Y) {
-	%tmp19 = and i32 %X, 65535		; <i32> [#uses=1]
-	%tmp37 = shl i32 %Y, 18		; <i32> [#uses=1]
-	%tmp5 = or i32 %tmp37, %tmp19		; <i32> [#uses=1]
+	%tmp19 = and i32 %X, 65535
+	%tmp37 = shl i32 %Y, 18
+	%tmp5 = or i32 %tmp37, %tmp19
 	ret i32 %tmp5
 }
 
 ; CHECK: test4
 ; CHECK: pkhbt   r0, r0, r1
 define i32 @test4(i32 %X, i32 %Y) {
-	%tmp1 = and i32 %X, 65535		; <i32> [#uses=1]
-	%tmp3 = and i32 %Y, -65536		; <i32> [#uses=1]
-	%tmp46 = or i32 %tmp3, %tmp1		; <i32> [#uses=1]
+	%tmp1 = and i32 %X, 65535
+	%tmp3 = and i32 %Y, -65536
+	%tmp46 = or i32 %tmp3, %tmp1
 	ret i32 %tmp46
 }
 
 ; CHECK: test5
 ; CHECK: pkhtb   r0, r0, r1, asr #16
 define i32 @test5(i32 %X, i32 %Y) {
-	%tmp17 = and i32 %X, -65536		; <i32> [#uses=1]
-	%tmp2 = bitcast i32 %Y to i32		; <i32> [#uses=1]
-	%tmp4 = lshr i32 %tmp2, 16		; <i32> [#uses=2]
-	%tmp5 = or i32 %tmp4, %tmp17		; <i32> [#uses=1]
+	%tmp17 = and i32 %X, -65536
+	%tmp2 = bitcast i32 %Y to i32
+	%tmp4 = lshr i32 %tmp2, 16
+	%tmp5 = or i32 %tmp4, %tmp17
 	ret i32 %tmp5
 }
 
 ; CHECK: test5a
 ; CHECK: pkhtb   r0, r0, r1, asr #16
 define i32 @test5a(i32 %X, i32 %Y) {
-	%tmp110 = and i32 %X, -65536		; <i32> [#uses=1]
-	%tmp37 = lshr i32 %Y, 16		; <i32> [#uses=1]
-	%tmp39 = bitcast i32 %tmp37 to i32		; <i32> [#uses=1]
-	%tmp5 = or i32 %tmp39, %tmp110		; <i32> [#uses=1]
+	%tmp110 = and i32 %X, -65536
+	%tmp37 = lshr i32 %Y, 16
+	%tmp39 = bitcast i32 %tmp37 to i32
+	%tmp5 = or i32 %tmp39, %tmp110
 	ret i32 %tmp5
 }
 
 ; CHECK: test6
 ; CHECK: pkhtb   r0, r0, r1, asr #12
 define i32 @test6(i32 %X, i32 %Y) {
-	%tmp1 = and i32 %X, -65536		; <i32> [#uses=1]
-	%tmp37 = lshr i32 %Y, 12		; <i32> [#uses=1]
-	%tmp38 = bitcast i32 %tmp37 to i32		; <i32> [#uses=1]
-	%tmp4 = and i32 %tmp38, 65535		; <i32> [#uses=1]
-	%tmp59 = or i32 %tmp4, %tmp1		; <i32> [#uses=1]
+	%tmp1 = and i32 %X, -65536
+	%tmp37 = lshr i32 %Y, 12
+	%tmp38 = bitcast i32 %tmp37 to i32
+	%tmp4 = and i32 %tmp38, 65535
+	%tmp59 = or i32 %tmp4, %tmp1
 	ret i32 %tmp59
 }
 
 ; CHECK: test7
 ; CHECK: pkhtb   r0, r0, r1, asr #18
 define i32 @test7(i32 %X, i32 %Y) {
-	%tmp1 = and i32 %X, -65536		; <i32> [#uses=1]
-	%tmp3 = ashr i32 %Y, 18		; <i32> [#uses=1]
-	%tmp4 = and i32 %tmp3, 65535		; <i32> [#uses=1]
-	%tmp57 = or i32 %tmp4, %tmp1		; <i32> [#uses=1]
+	%tmp1 = and i32 %X, -65536
+	%tmp3 = ashr i32 %Y, 18
+	%tmp4 = and i32 %tmp3, 65535
+	%tmp57 = or i32 %tmp4, %tmp1
 	ret i32 %tmp57
 }
 
diff --git a/test/CodeGen/ARM/phi.ll b/test/CodeGen/ARM/phi.ll
new file mode 100644
index 000000000000..29e17c095a74
--- /dev/null
+++ b/test/CodeGen/ARM/phi.ll
@@ -0,0 +1,23 @@
+; RUN: llc -march=arm < %s | FileCheck %s
+; <rdar://problem/8686347>
+
+define i32 @test1(i1 %a, i32* %b) {
+; CHECK: test1
+entry:
+  br i1 %a, label %lblock, label %rblock
+
+lblock:
+  %lbranch = getelementptr i32* %b, i32 1
+  br label %end
+
+rblock:
+  %rbranch = getelementptr i32* %b, i32 1
+  br label %end
+  
+end:
+; CHECK: ldr	r0, [r1, #4]
+  %gep = phi i32* [%lbranch, %lblock], [%rbranch, %rblock]
+  %r = load i32* %gep
+; CHECK-NEXT: bx	lr
+  ret i32 %r
+}
+\ No newline at end of file
diff --git a/test/CodeGen/ARM/prefetch.ll b/test/CodeGen/ARM/prefetch.ll
new file mode 100644
index 000000000000..895b27b749db
--- /dev/null
+++ b/test/CodeGen/ARM/prefetch.ll
@@ -0,0 +1,61 @@
+; RUN: llc < %s -march=thumb -mattr=-thumb2 | not grep pld
+; RUN: llc < %s -march=thumb -mattr=+v7a     | FileCheck %s -check-prefix=THUMB2
+; RUN: llc < %s -march=arm   -mattr=+v7a,+mp | FileCheck %s -check-prefix=ARM-MP
+; rdar://8601536
+
+define void @t1(i8* %ptr) nounwind  {
+entry:
+; ARM-MP: t1:
+; ARM-MP: pldw [r0]
+; ARM-MP: pld [r0]
+
+; THUMB2: t1:
+; THUMB2-NOT: pldw [r0]
+; THUMB2: pld [r0]
+  tail call void @llvm.prefetch( i8* %ptr, i32 1, i32 3 )
+  tail call void @llvm.prefetch( i8* %ptr, i32 0, i32 3 )
+  ret void
+}
+
+define void @t2(i8* %ptr) nounwind  {
+entry:
+; ARM-MP: t2:
+; ARM-MP: pld [r0, #1023]
+
+; THUMB2: t2:
+; THUMB2: pld [r0, #1023]
+  %tmp = getelementptr i8* %ptr, i32 1023
+  tail call void @llvm.prefetch( i8* %tmp, i32 0, i32 3 )
+  ret void
+}
+
+define void @t3(i32 %base, i32 %offset) nounwind  {
+entry:
+; ARM-MP: t3:
+; ARM-MP: pld [r0, r1, lsr #2]
+
+; THUMB2: t3:
+; THUMB2: lsrs r1, r1, #2
+; THUMB2: pld [r0, r1]
+  %tmp1 = lshr i32 %offset, 2
+  %tmp2 = add i32 %base, %tmp1
+  %tmp3 = inttoptr i32 %tmp2 to i8*
+  tail call void @llvm.prefetch( i8* %tmp3, i32 0, i32 3 )
+  ret void
+}
+
+define void @t4(i32 %base, i32 %offset) nounwind  {
+entry:
+; ARM-MP: t4:
+; ARM-MP: pld [r0, r1, lsl #2]
+
+; THUMB2: t4:
+; THUMB2: pld [r0, r1, lsl #2]
+  %tmp1 = shl i32 %offset, 2
+  %tmp2 = add i32 %base, %tmp1
+  %tmp3 = inttoptr i32 %tmp2 to i8*
+  tail call void @llvm.prefetch( i8* %tmp3, i32 0, i32 3 )
+  ret void
+}
+
+declare void @llvm.prefetch(i8*, i32, i32) nounwind 
diff --git a/test/CodeGen/ARM/reg_sequence.ll b/test/CodeGen/ARM/reg_sequence.ll
index 2e4f10d8a63d..53214fd4c302 100644
--- a/test/CodeGen/ARM/reg_sequence.ll
+++ b/test/CodeGen/ARM/reg_sequence.ll
@@ -46,8 +46,8 @@ entry:
 ; CHECK:        t2:
 ; CHECK:        vld1.16
 ; CHECK-NOT:    vmov
-; CHECK:        vld1.16
 ; CHECK:        vmul.i16
+; CHECK:        vld1.16
 ; CHECK:        vmul.i16
 ; CHECK-NOT:    vmov
 ; CHECK:        vst1.16
@@ -75,7 +75,8 @@ define <8 x i8> @t3(i8* %A, i8* %B) nounwind {
 ; CHECK:        t3:
 ; CHECK:        vld3.8
 ; CHECK:        vmul.i8
-; CHECK-NOT:    vmov
+; CHECK:        vmov r
+; CHECK-NOT:    vmov d
 ; CHECK:        vst3.8
   %tmp1 = call %struct.__neon_int8x8x3_t @llvm.arm.neon.vld3.v8i8(i8* %A, i32 1) ; <%struct.__neon_int8x8x3_t> [#uses=2]
   %tmp2 = extractvalue %struct.__neon_int8x8x3_t %tmp1, 0 ; <<8 x i8>> [#uses=1]
@@ -122,9 +123,9 @@ return1:
 return2:
 ; CHECK:        %return2
 ; CHECK:        vadd.i32
-; CHECK:        vmov q1, q3
+; CHECK:        vmov q9, q11
 ; CHECK-NOT:    vmov
-; CHECK:        vst2.32 {d0, d1, d2, d3}
+; CHECK:        vst2.32 {d16, d17, d18, d19}
   %tmp100 = extractvalue %struct.__neon_int32x4x2_t %tmp2, 0 ; <<4 x i32>> [#uses=1]
   %tmp101 = extractvalue %struct.__neon_int32x4x2_t %tmp5, 1 ; <<4 x i32>> [#uses=1]
   %tmp102 = add <4 x i32> %tmp100, %tmp101              ; <<4 x i32>> [#uses=1]
@@ -136,9 +137,9 @@ return2:
 define <8 x i16> @t5(i16* %A, <8 x i16>* %B) nounwind {
 ; CHECK:        t5:
 ; CHECK:        vldmia
-; CHECK:        vmov q1, q0
+; CHECK:        vmov q9, q8
 ; CHECK-NOT:    vmov
-; CHECK:        vld2.16 {d0[1], d2[1]}, [r0]
+; CHECK:        vld2.16 {d16[1], d18[1]}, [r0]
 ; CHECK-NOT:    vmov
 ; CHECK:        vadd.i16
   %tmp0 = bitcast i16* %A to i8*                  ; <i8*> [#uses=1]
@@ -153,8 +154,8 @@ define <8 x i16> @t5(i16* %A, <8 x i16>* %B) nounwind {
 define <8 x i8> @t6(i8* %A, <8 x i8>* %B) nounwind {
 ; CHECK:        t6:
 ; CHECK:        vldr.64
-; CHECK:        vmov d1, d0
-; CHECK-NEXT:   vld2.8 {d0[1], d1[1]}
+; CHECK:        vmov d17, d16
+; CHECK-NEXT:   vld2.8 {d16[1], d17[1]}
   %tmp1 = load <8 x i8>* %B                       ; <<8 x i8>> [#uses=2]
   %tmp2 = call %struct.__neon_int8x8x2_t @llvm.arm.neon.vld2lane.v8i8(i8* %A, <8 x i8> %tmp1, <8 x i8> %tmp1, i32 1, i32 1) ; <%struct.__neon_int8x8x2_t> [#uses=2]
   %tmp3 = extractvalue %struct.__neon_int8x8x2_t %tmp2, 0 ; <<8 x i8>> [#uses=1]
@@ -168,10 +169,10 @@ entry:
 ; CHECK:        t7:
 ; CHECK:        vld2.32
 ; CHECK:        vst2.32
-; CHECK:        vld1.32 {d0, d1},
-; CHECK:        vmov q1, q0
+; CHECK:        vld1.32 {d16, d17},
+; CHECK:        vmov q9, q8
 ; CHECK-NOT:    vmov
-; CHECK:        vuzp.32 q0, q1
+; CHECK:        vuzp.32 q8, q9
 ; CHECK:        vst1.32
   %0 = bitcast i32* %iptr to i8*                  ; <i8*> [#uses=2]
   %1 = tail call %struct.__neon_int32x4x2_t @llvm.arm.neon.vld2.v4i32(i8* %0, i32 1) ; <%struct.__neon_int32x4x2_t> [#uses=2]
@@ -188,7 +189,7 @@ entry:
 ; PR7156
 define arm_aapcs_vfpcc i32 @t8() nounwind {
 ; CHECK: t8:
-; CHECK: vrsqrte.f32 q0, q0
+; CHECK: vrsqrte.f32 q8, q8
 bb.nph55.bb.nph55.split_crit_edge:
   br label %bb3
 
@@ -238,10 +239,10 @@ bb14:                                             ; preds = %bb6
 define arm_aapcs_vfpcc float @t9(%0* nocapture, %3* nocapture) nounwind {
 ; CHECK:        t9:
 ; CHECK:        vldr.64
-; CHECK-NOT:    vmov d{{.*}}, d0
-; CHECK:        vmov.i32 d1
-; CHECK-NEXT:   vstmia r0, {d0, d1}
-; CHECK-NEXT:   vstmia r0, {d0, d1}
+; CHECK-NOT:    vmov d{{.*}}, d16
+; CHECK:        vmov.i32 d17
+; CHECK-NEXT:   vstmia r0, {d16, d17}
+; CHECK-NEXT:   vstmia r0, {d16, d17}
   %3 = bitcast double 0.000000e+00 to <2 x float> ; <<2 x float>> [#uses=2]
   %4 = shufflevector <2 x float> %3, <2 x float> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> ; <<4 x float>> [#uses=1]
   store <4 x float> %4, <4 x float>* undef, align 16
@@ -269,9 +270,9 @@ define arm_aapcs_vfpcc float @t9(%0* nocapture, %3* nocapture) nounwind {
 define arm_aapcs_vfpcc i32 @t10() nounwind {
 entry:
 ; CHECK: t10:
-; CHECK: vmov.i32 q1, #0x3F000000
-; CHECK: vmov d0, d1
-; CHECK: vmla.f32 q0, q0, d0[0]
+; CHECK: vmul.f32 q8, q8, d0[0]
+; CHECK: vmov.i32 q9, #0x3F000000
+; CHECK: vadd.f32 q8, q8, q8
   %0 = shufflevector <4 x float> zeroinitializer, <4 x float> undef, <4 x i32> zeroinitializer ; <<4 x float>> [#uses=1]
   %1 = insertelement <4 x float> %0, float undef, i32 1 ; <<4 x float>> [#uses=1]
   %2 = insertelement <4 x float> %1, float undef, i32 2 ; <<4 x float>> [#uses=1]
diff --git a/test/CodeGen/ARM/remat.ll b/test/CodeGen/ARM/remat.ll
deleted file mode 100644
index 6b86f1a9f368..000000000000
--- a/test/CodeGen/ARM/remat.ll
+++ /dev/null
@@ -1,65 +0,0 @@
-; RUN: llc < %s -march=arm -mattr=+v6,+vfp2 -o /dev/null -stats -info-output-file - | grep "Number of re-materialization"
-
-define i32 @main(i32 %argc, i8** nocapture %argv, double %d1, double %d2) nounwind {
-entry:
-  br i1 undef, label %smvp.exit, label %bb.i3
-
-bb.i3:                                            ; preds = %bb.i3, %bb134
-  br i1 undef, label %smvp.exit, label %bb.i3
-
-smvp.exit:                                        ; preds = %bb.i3
-  %0 = fmul double %d1, 2.400000e-03            ; <double> [#uses=2]
-  br i1 undef, label %bb138.preheader, label %bb159
-
-bb138.preheader:                                  ; preds = %smvp.exit
-  br label %bb138
-
-bb138:                                            ; preds = %bb138, %bb138.preheader
-  br i1 undef, label %bb138, label %bb145.loopexit
-
-bb142:                                            ; preds = %bb.nph218.bb.nph218.split_crit_edge, %phi0.exit
-  %1 = fmul double %d1, -1.200000e-03           ; <double> [#uses=1]
-  %2 = fadd double %d2, %1                      ; <double> [#uses=1]
-  %3 = fmul double %2, %d2                      ; <double> [#uses=1]
-  %4 = fsub double 0.000000e+00, %3               ; <double> [#uses=1]
-  br i1 %14, label %phi1.exit, label %bb.i35
-
-bb.i35:                                           ; preds = %bb142
-  %5 = call  double @sin(double %15) nounwind readonly ; <double> [#uses=1]
-  %6 = fmul double %5, 0x4031740AFA84AD8A         ; <double> [#uses=1]
-  %7 = fsub double 1.000000e+00, undef            ; <double> [#uses=1]
-  %8 = fdiv double %7, 6.000000e-01               ; <double> [#uses=1]
-  br label %phi1.exit
-
-phi1.exit:                                        ; preds = %bb.i35, %bb142
-  %.pn = phi double [ %6, %bb.i35 ], [ 0.000000e+00, %bb142 ] ; <double> [#uses=1]
-  %9 = phi double [ %8, %bb.i35 ], [ 0.000000e+00, %bb142 ] ; <double> [#uses=1]
-  %10 = fmul double %.pn, %9                      ; <double> [#uses=1]
-  br i1 %14, label %phi0.exit, label %bb.i
-
-bb.i:                                             ; preds = %phi1.exit
-  unreachable
-
-phi0.exit:                                        ; preds = %phi1.exit
-  %11 = fsub double %4, %10                       ; <double> [#uses=1]
-  %12 = fadd double 0.000000e+00, %11             ; <double> [#uses=1]
-  store double %12, double* undef, align 4
-  br label %bb142
-
-bb145.loopexit:                                   ; preds = %bb138
-  br i1 undef, label %bb.nph218.bb.nph218.split_crit_edge, label %bb159
-
-bb.nph218.bb.nph218.split_crit_edge:              ; preds = %bb145.loopexit
-  %13 = fmul double %0, 0x401921FB54442D18        ; <double> [#uses=1]
-  %14 = fcmp ugt double %0, 6.000000e-01          ; <i1> [#uses=2]
-  %15 = fdiv double %13, 6.000000e-01             ; <double> [#uses=1]
-  br label %bb142
-
-bb159:                                            ; preds = %bb145.loopexit, %smvp.exit, %bb134
-  unreachable
-
-bb166:                                            ; preds = %bb127
-  unreachable
-}
-
-declare double @sin(double) nounwind readonly
diff --git a/test/CodeGen/ARM/rev.ll b/test/CodeGen/ARM/rev.ll
index 1c12268ef86c..687bf8834c9f 100644
--- a/test/CodeGen/ARM/rev.ll
+++ b/test/CodeGen/ARM/rev.ll
@@ -1,27 +1,30 @@
-; RUN: llc < %s -march=arm -mattr=+v6 | grep rev16
-; RUN: llc < %s -march=arm -mattr=+v6 | grep revsh
+; RUN: llc < %s -march=arm -mattr=+v6 | FileCheck %s
 
 define i32 @test1(i32 %X) {
-        %tmp1 = lshr i32 %X, 8          ; <i32> [#uses=3]
-        %X15 = bitcast i32 %X to i32            ; <i32> [#uses=1]
-        %tmp4 = shl i32 %X15, 8         ; <i32> [#uses=2]
-        %tmp2 = and i32 %tmp1, 16711680         ; <i32> [#uses=1]
-        %tmp5 = and i32 %tmp4, -16777216                ; <i32> [#uses=1]
-        %tmp9 = and i32 %tmp1, 255              ; <i32> [#uses=1]
-        %tmp13 = and i32 %tmp4, 65280           ; <i32> [#uses=1]
-        %tmp6 = or i32 %tmp5, %tmp2             ; <i32> [#uses=1]
-        %tmp10 = or i32 %tmp6, %tmp13           ; <i32> [#uses=1]
-        %tmp14 = or i32 %tmp10, %tmp9           ; <i32> [#uses=1]
+; CHECK: test1
+; CHECK: rev16 r0, r0
+        %tmp1 = lshr i32 %X, 8
+        %X15 = bitcast i32 %X to i32
+        %tmp4 = shl i32 %X15, 8
+        %tmp2 = and i32 %tmp1, 16711680
+        %tmp5 = and i32 %tmp4, -16777216
+        %tmp9 = and i32 %tmp1, 255
+        %tmp13 = and i32 %tmp4, 65280
+        %tmp6 = or i32 %tmp5, %tmp2
+        %tmp10 = or i32 %tmp6, %tmp13
+        %tmp14 = or i32 %tmp10, %tmp9
         ret i32 %tmp14
 }
 
 define i32 @test2(i32 %X) {
-        %tmp1 = lshr i32 %X, 8          ; <i32> [#uses=1]
-        %tmp1.upgrd.1 = trunc i32 %tmp1 to i16          ; <i16> [#uses=1]
-        %tmp3 = trunc i32 %X to i16             ; <i16> [#uses=1]
-        %tmp2 = and i16 %tmp1.upgrd.1, 255              ; <i16> [#uses=1]
-        %tmp4 = shl i16 %tmp3, 8                ; <i16> [#uses=1]
-        %tmp5 = or i16 %tmp2, %tmp4             ; <i16> [#uses=1]
-        %tmp5.upgrd.2 = sext i16 %tmp5 to i32           ; <i32> [#uses=1]
+; CHECK: test2
+; CHECK: revsh r0, r0
+        %tmp1 = lshr i32 %X, 8
+        %tmp1.upgrd.1 = trunc i32 %tmp1 to i16
+        %tmp3 = trunc i32 %X to i16
+        %tmp2 = and i16 %tmp1.upgrd.1, 255
+        %tmp4 = shl i16 %tmp3, 8
+        %tmp5 = or i16 %tmp2, %tmp4
+        %tmp5.upgrd.2 = sext i16 %tmp5 to i32
         ret i32 %tmp5.upgrd.2
 }
diff --git a/test/CodeGen/ARM/select-imm.ll b/test/CodeGen/ARM/select-imm.ll
index 6e15fde045fb..578834ec93bc 100644
--- a/test/CodeGen/ARM/select-imm.ll
+++ b/test/CodeGen/ARM/select-imm.ll
@@ -1,5 +1,6 @@
-; RUN: llc < %s -march=arm                | FileCheck %s --check-prefix=ARM
-; RUN: llc < %s -march=arm -mattr=+thumb2 | FileCheck %s --check-prefix=T2
+; RUN: llc < %s -march=arm                  | FileCheck %s --check-prefix=ARM
+; RUN: llc < %s -march=arm -mattr=+thumb2   | FileCheck %s --check-prefix=ARMT2
+; RUN: llc < %s -march=thumb -mattr=+thumb2 | FileCheck %s --check-prefix=THUMB2
 
 define i32 @t1(i32 %c) nounwind readnone {
 entry:
@@ -8,9 +9,13 @@ entry:
 ; ARM: orr r1, r1, #1, 24
 ; ARM: movgt r0, #123
 
-; T2: t1:
-; T2: movw r0, #357
-; T2: movgt r0, #123
+; ARMT2: t1:
+; ARMT2: movw r0, #357
+; ARMT2: movgt r0, #123
+
+; THUMB2: t1:
+; THUMB2: movw r0, #357
+; THUMB2: movgt r0, #123
 
   %0 = icmp sgt i32 %c, 1
   %1 = select i1 %0, i32 123, i32 357
@@ -20,13 +25,17 @@ entry:
 define i32 @t2(i32 %c) nounwind readnone {
 entry:
 ; ARM: t2:
-; ARM: mov r1, #101
-; ARM: orr r1, r1, #1, 24
-; ARM: movle r0, #123
+; ARM: mov r0, #123
+; ARM: movgt r0, #101
+; ARM: orrgt r0, r0, #1, 24
 
-; T2: t2:
-; T2: movw r0, #357
-; T2: movle r0, #123
+; ARMT2: t2:
+; ARMT2: mov r0, #123
+; ARMT2: movwgt r0, #357
+
+; THUMB2: t2:
+; THUMB2: mov.w r0, #123
+; THUMB2: movwgt r0, #357
 
   %0 = icmp sgt i32 %c, 1
   %1 = select i1 %0, i32 357, i32 123
@@ -39,10 +48,31 @@ entry:
 ; ARM: mov r0, #0
 ; ARM: moveq r0, #1
 
-; T2: t3:
-; T2: mov r0, #0
-; T2: moveq r0, #1
+; ARMT2: t3:
+; ARMT2: mov r0, #0
+; ARMT2: moveq r0, #1
+
+; THUMB2: t3:
+; THUMB2: mov.w r0, #0
+; THUMB2: moveq r0, #1
   %0 = icmp eq i32 %a, 160
   %1 = zext i1 %0 to i32
   ret i32 %1
 }
+
+define i32 @t4(i32 %a, i32 %b, i32 %x) nounwind {
+entry:
+; ARM: t4:
+; ARM: ldr
+; ARM: movlt
+
+; ARMT2: t4:
+; ARMT2: movwlt r0, #65365
+; ARMT2: movtlt r0, #65365
+
+; THUMB2: t4:
+; THUMB2: mvnlt.w r0, #11141290
+  %0 = icmp slt i32 %a, %b
+  %1 = select i1 %0, i32 4283826005, i32 %x
+  ret i32 %1
+}
diff --git a/test/CodeGen/ARM/select.ll b/test/CodeGen/ARM/select.ll
index 7413bed5c5b1..1aa0d3904125 100644
--- a/test/CodeGen/ARM/select.ll
+++ b/test/CodeGen/ARM/select.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=arm | FileCheck %s
+; RUN: llc < %s -mtriple=arm-apple-darwin | FileCheck %s
 ; RUN: llc < %s -march=arm -mattr=+vfp2 | FileCheck %s --check-prefix=CHECK-VFP
 ; RUN: llc < %s -mattr=+neon,+thumb2 -mtriple=thumbv7-apple-darwin | FileCheck %s --check-prefix=CHECK-NEON
 
@@ -79,9 +79,9 @@ define double @f7(double %a, double %b) {
 ; CHECK-NEON:      movw   [[REGISTER_1:r[0-9]+]], #1123
 ; CHECK-NEON-NEXT: movs   [[REGISTER_2:r[0-9]+]], #0
 ; CHECK-NEON-NEXT: cmp    r0, [[REGISTER_1]]
-; CHECK-NEON-NEXT: adr    [[REGISTER_3:r[0-9]+]], #LCPI
 ; CHECK-NEON-NEXT: it     eq
 ; CHECK-NEON-NEXT: moveq  [[REGISTER_2]], #4
+; CHECK-NEON-NEXT: adr    [[REGISTER_3:r[0-9]+]], #LCPI
 ; CHECK-NEON-NEXT: ldr
 ; CHECK-NEON:      bx
 
diff --git a/test/CodeGen/ARM/select_xform.ll b/test/CodeGen/ARM/select_xform.ll
index 7fd91ceea5ad..5dabfc3a82a3 100644
--- a/test/CodeGen/ARM/select_xform.ll
+++ b/test/CodeGen/ARM/select_xform.ll
@@ -1,15 +1,60 @@
-; RUN: llc < %s -march=arm | grep mov | count 2
+; RUN: llc < %s -mtriple=arm-apple-darwin -mcpu=cortex-a8 | FileCheck %s -check-prefix=ARM
+; RUN: llc < %s -mtriple=thumb-apple-darwin -mcpu=cortex-a8 | FileCheck %s -check-prefix=T2
+; rdar://8662825
 
 define i32 @t1(i32 %a, i32 %b, i32 %c) nounwind {
-        %tmp1 = icmp sgt i32 %c, 10
-        %tmp2 = select i1 %tmp1, i32 0, i32 2147483647
-        %tmp3 = add i32 %tmp2, %b
-        ret i32 %tmp3
+; ARM: t1:
+; ARM: sub r0, r1, #6, 2
+; ARM: movgt r0, r1
+
+; T2: t1:
+; T2: mvn r0, #-2147483648
+; T2: add r0, r1
+; T2: movgt r0, r1
+  %tmp1 = icmp sgt i32 %c, 10
+  %tmp2 = select i1 %tmp1, i32 0, i32 2147483647
+  %tmp3 = add i32 %tmp2, %b
+  ret i32 %tmp3
 }
 
 define i32 @t2(i32 %a, i32 %b, i32 %c, i32 %d) nounwind {
-        %tmp1 = icmp sgt i32 %c, 10
-        %tmp2 = select i1 %tmp1, i32 0, i32 10
-        %tmp3 = sub i32 %b, %tmp2
-        ret i32 %tmp3
+; ARM: t2:
+; ARM: sub r0, r1, #10
+; ARM: movgt r0, r1
+
+; T2: t2:
+; T2: sub.w r0, r1, #10
+; T2: movgt r0, r1
+  %tmp1 = icmp sgt i32 %c, 10
+  %tmp2 = select i1 %tmp1, i32 0, i32 10
+  %tmp3 = sub i32 %b, %tmp2
+  ret i32 %tmp3
+}
+
+define i32 @t3(i32 %a, i32 %b, i32 %x, i32 %y) nounwind {
+; ARM: t3:
+; ARM: mvnlt r2, #0
+; ARM: and r0, r2, r3
+
+; T2: t3:
+; T2: movlt.w r2, #-1
+; T2: and.w r0, r2, r3
+  %cond = icmp slt i32 %a, %b
+  %z = select i1 %cond, i32 -1, i32 %x
+  %s = and i32 %z, %y
+ ret i32 %s
+}
+
+define i32 @t4(i32 %a, i32 %b, i32 %x, i32 %y) nounwind {
+; ARM: t4:
+; ARM: movlt r2, #0
+; ARM: orr r0, r2, r3
+
+; T2: t4:
+; T2: movlt r2, #0
+; T2: orr.w r0, r2, r3
+  %cond = icmp slt i32 %a, %b
+  %z = select i1 %cond, i32 0, i32 %x
+  %s = or i32 %z, %y
+ ret i32 %s
 }
diff --git a/test/CodeGen/ARM/shifter_operand.ll b/test/CodeGen/ARM/shifter_operand.ll
index 2bbe9fd2602c..01e3a922f656 100644
--- a/test/CodeGen/ARM/shifter_operand.ll
+++ b/test/CodeGen/ARM/shifter_operand.ll
@@ -1,18 +1,72 @@
-; RUN: llc < %s -march=arm | grep add | grep lsl
-; RUN: llc < %s -march=arm | grep bic | grep asr
+; RUN: llc < %s -mtriple=armv7-apple-darwin -mcpu=cortex-a8 | FileCheck %s -check-prefix=A8
+; RUN: llc < %s -mtriple=armv7-apple-darwin -mcpu=cortex-a9 | FileCheck %s -check-prefix=A9
+; rdar://8576755
 
 
 define i32 @test1(i32 %X, i32 %Y, i8 %sh) {
-        %shift.upgrd.1 = zext i8 %sh to i32             ; <i32> [#uses=1]
-        %A = shl i32 %Y, %shift.upgrd.1         ; <i32> [#uses=1]
-        %B = add i32 %X, %A             ; <i32> [#uses=1]
+; A8: test1:
+; A8: add r0, r0, r1, lsl r2
+
+; A9: test1:
+; A9: add r0, r0, r1, lsl r2
+        %shift.upgrd.1 = zext i8 %sh to i32
+        %A = shl i32 %Y, %shift.upgrd.1
+        %B = add i32 %X, %A
         ret i32 %B
 }
 
 define i32 @test2(i32 %X, i32 %Y, i8 %sh) {
-        %shift.upgrd.2 = zext i8 %sh to i32             ; <i32> [#uses=1]
-        %A = ashr i32 %Y, %shift.upgrd.2                ; <i32> [#uses=1]
-        %B = xor i32 %A, -1             ; <i32> [#uses=1]
-        %C = and i32 %X, %B             ; <i32> [#uses=1]
+; A8: test2:
+; A8: bic r0, r0, r1, asr r2
+
+; A9: test2:
+; A9: bic r0, r0, r1, asr r2
+        %shift.upgrd.2 = zext i8 %sh to i32
+        %A = ashr i32 %Y, %shift.upgrd.2
+        %B = xor i32 %A, -1
+        %C = and i32 %X, %B
         ret i32 %C
 }
+
+define i32 @test3(i32 %base, i32 %base2, i32 %offset) {
+entry:
+; A8: test3:
+; A8: ldr r0, [r0, r2, lsl #2]
+; A8: ldr r1, [r1, r2, lsl #2]
+
+; lsl #2 is free
+; A9: test3:
+; A9: ldr r0, [r0, r2, lsl #2]
+; A9: ldr r1, [r1, r2, lsl #2]
+        %tmp1 = shl i32 %offset, 2
+        %tmp2 = add i32 %base, %tmp1
+        %tmp3 = inttoptr i32 %tmp2 to i32*
+        %tmp4 = add i32 %base2, %tmp1
+        %tmp5 = inttoptr i32 %tmp4 to i32*
+        %tmp6 = load i32* %tmp3
+        %tmp7 = load i32* %tmp5
+        %tmp8 = add i32 %tmp7, %tmp6
+        ret i32 %tmp8
+}
+
+declare i8* @malloc(...)
+
+define fastcc void @test4() nounwind {
+entry:
+; A8: test4:
+; A8: ldr r1, [r0, r0, lsl #2]
+; A8: str r1, [r0, r0, lsl #2]
+
+; A9: test4:
+; A9: add r0, r0, r0, lsl #2
+; A9: ldr r1, [r0]
+; A9: str r1, [r0]
+  %0 = tail call i8* (...)* @malloc(i32 undef) nounwind
+  %1 = bitcast i8* %0 to i32*
+  %2 = sext i16 undef to i32
+  %3 = getelementptr inbounds i32* %1, i32 %2
+  %4 = load i32* %3, align 4
+  %5 = add nsw i32 %4, 1
+  store i32 %5, i32* %3, align 4
+  ret void
+}
diff --git a/test/CodeGen/ARM/spill-q.ll b/test/CodeGen/ARM/spill-q.ll
index ae1ba2f73825..bf4e55cb06c4 100644
--- a/test/CodeGen/ARM/spill-q.ll
+++ b/test/CodeGen/ARM/spill-q.ll
@@ -15,11 +15,34 @@ define void @aaa(%quuz* %this, i8* %block) {
 ; CHECK: vst1.64 {{.*}}sp, :128
 ; CHECK: vld1.64 {{.*}}sp, :128
 entry:
-  %0 = call <4 x float> @llvm.arm.neon.vld1.v4f32(i8* undef, i32 1) nounwind ; <<4 x float>> [#uses=1]
+  %aligned_vec = alloca <4 x float>, align 16
+  %"alloca point" = bitcast i32 0 to i32
+  %vecptr = bitcast <4 x float>* %aligned_vec to i8*
+  %0 = call <4 x float> @llvm.arm.neon.vld1.v4f32(i8* %vecptr, i32 1) nounwind ; <<4 x float>> [#uses=1]
   store float 6.300000e+01, float* undef, align 4
   %1 = call <4 x float> @llvm.arm.neon.vld1.v4f32(i8* undef, i32 1) nounwind ; <<4 x float>> [#uses=1]
   store float 0.000000e+00, float* undef, align 4
   %2 = call <4 x float> @llvm.arm.neon.vld1.v4f32(i8* undef, i32 1) nounwind ; <<4 x float>> [#uses=1]
+  %ld3 = call <4 x float> @llvm.arm.neon.vld1.v4f32(i8* undef, i32 1) nounwind
+  store float 0.000000e+00, float* undef, align 4
+  %ld4 = call <4 x float> @llvm.arm.neon.vld1.v4f32(i8* undef, i32 1) nounwind
+  store float 0.000000e+00, float* undef, align 4
+  %ld5 = call <4 x float> @llvm.arm.neon.vld1.v4f32(i8* undef, i32 1) nounwind
+  store float 0.000000e+00, float* undef, align 4
+  %ld6 = call <4 x float> @llvm.arm.neon.vld1.v4f32(i8* undef, i32 1) nounwind
+  store float 0.000000e+00, float* undef, align 4
+  %ld7 = call <4 x float> @llvm.arm.neon.vld1.v4f32(i8* undef, i32 1) nounwind
+  store float 0.000000e+00, float* undef, align 4
+  %ld8 = call <4 x float> @llvm.arm.neon.vld1.v4f32(i8* undef, i32 1) nounwind
+  store float 0.000000e+00, float* undef, align 4
+  %ld9 = call <4 x float> @llvm.arm.neon.vld1.v4f32(i8* undef, i32 1) nounwind
+  store float 0.000000e+00, float* undef, align 4
+  %ld10 = call <4 x float> @llvm.arm.neon.vld1.v4f32(i8* undef, i32 1) nounwind
+  store float 0.000000e+00, float* undef, align 4
+  %ld11 = call <4 x float> @llvm.arm.neon.vld1.v4f32(i8* undef, i32 1) nounwind
+  store float 0.000000e+00, float* undef, align 4
+  %ld12 = call <4 x float> @llvm.arm.neon.vld1.v4f32(i8* undef, i32 1) nounwind
+  store float 0.000000e+00, float* undef, align 4
   %val173 = load <4 x float>* undef               ; <<4 x float>> [#uses=1]
   br label %bb4
 
@@ -44,7 +67,16 @@ bb4:                                              ; preds = %bb193, %entry
   %18 = fmul <4 x float> %17, %val173             ; <<4 x float>> [#uses=1]
   %19 = shufflevector <4 x float> %18, <4 x float> undef, <2 x i32> <i32 2, i32 3> ; <<2 x float>> [#uses=1]
   %20 = shufflevector <2 x float> %19, <2 x float> undef, <4 x i32> zeroinitializer ; <<4 x float>> [#uses=1]
-  %21 = fadd <4 x float> zeroinitializer, %20     ; <<4 x float>> [#uses=2]
+  %tmp1 = fadd <4 x float> %20, %ld3
+  %tmp2 = fadd <4 x float> %tmp1, %ld4
+  %tmp3 = fadd <4 x float> %tmp2, %ld5
+  %tmp4 = fadd <4 x float> %tmp3, %ld6
+  %tmp5 = fadd <4 x float> %tmp4, %ld7
+  %tmp6 = fadd <4 x float> %tmp5, %ld8
+  %tmp7 = fadd <4 x float> %tmp6, %ld9
+  %tmp8 = fadd <4 x float> %tmp7, %ld10
+  %tmp9 = fadd <4 x float> %tmp8, %ld11
+  %21 = fadd <4 x float> %tmp9, %ld12
   %22 = fcmp ogt <4 x float> %besterror.0.2264, %21 ; <<4 x i1>> [#uses=0]
   %tmp = extractelement <4 x i1> %22, i32 0
   br i1 %tmp, label %bb193, label %bb186
diff --git a/test/CodeGen/ARM/stm.ll b/test/CodeGen/ARM/stm.ll
index 22a7ecb4aa28..2f5fadbee28a 100644
--- a/test/CodeGen/ARM/stm.ll
+++ b/test/CodeGen/ARM/stm.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -mtriple=arm-apple-darwin -mattr=+v6,+vfp2 | grep stm | count 2
+; RUN: llc < %s -mtriple=arm-apple-darwin -mattr=+v6,+vfp2 | FileCheck %s
 
 @"\01LC" = internal constant [32 x i8] c"Boolean Not: %d %d %d %d %d %d\0A\00", section "__TEXT,__cstring,cstring_literals"		; <[32 x i8]*> [#uses=1]
 @"\01LC1" = internal constant [26 x i8] c"Bitwise Not: %d %d %d %d\0A\00", section "__TEXT,__cstring,cstring_literals"		; <[26 x i8]*> [#uses=1]
@@ -7,6 +7,9 @@ declare i32 @printf(i8* nocapture, ...) nounwind
 
 define i32 @main() nounwind {
 entry:
+; CHECK: main
+; CHECK: push
+; CHECK: stmib
 	%0 = tail call i32 (i8*, ...)* @printf(i8* getelementptr ([26 x i8]* @"\01LC1", i32 0, i32 0), i32 -2, i32 -3, i32 2, i32 -6) nounwind		; <i32> [#uses=0]
 	%1 = tail call i32 (i8*, ...)* @printf(i8* getelementptr ([32 x i8]* @"\01LC", i32 0, i32 0), i32 0, i32 1, i32 0, i32 1, i32 0, i32 1) nounwind		; <i32> [#uses=0]
 	ret i32 0
diff --git a/test/CodeGen/ARM/str_pre-2.ll b/test/CodeGen/ARM/str_pre-2.ll
index 553cd64fce94..465c7e676c56 100644
--- a/test/CodeGen/ARM/str_pre-2.ll
+++ b/test/CodeGen/ARM/str_pre-2.ll
@@ -1,10 +1,11 @@
-; RUN: llc < %s -mtriple=arm-linux-gnu | grep {str.*\\!}
-; RUN: llc < %s -mtriple=arm-linux-gnu | grep {ldr.*\\\[.*\], #4}
+; RUN: llc < %s -mtriple=armv6-linux-gnu | FileCheck %s
 
 @b = external global i64*
 
 define i64 @t(i64 %a) nounwind readonly {
 entry:
+; CHECK: str lr, [sp, #-4]!
+; CHECK: ldr lr, [sp], #4
 	%0 = load i64** @b, align 4
 	%1 = load i64* %0, align 4
 	%2 = mul i64 %1, %a
diff --git a/test/CodeGen/ARM/tail-opts.ll b/test/CodeGen/ARM/tail-opts.ll
index 17c8baedbfa8..5b3dce386bb7 100644
--- a/test/CodeGen/ARM/tail-opts.ll
+++ b/test/CodeGen/ARM/tail-opts.ll
@@ -17,13 +17,16 @@ declare i8* @choose(i8*, i8*)
 ; CHECK: tail_duplicate_me:
 ; CHECK:      qux
 ; CHECK:      qux
-; CHECK:      ldr r{{.}}, LCPI
+; CHECK:      movw r{{[0-9]+}}, :lower16:_GHJK
+; CHECK:      movt r{{[0-9]+}}, :upper16:_GHJK
 ; CHECK:      str r
 ; CHECK-NEXT: bx r
-; CHECK:      ldr r{{.}}, LCPI
+; CHECK:      movw r{{[0-9]+}}, :lower16:_GHJK
+; CHECK:      movt r{{[0-9]+}}, :upper16:_GHJK
 ; CHECK:      str r
 ; CHECK-NEXT: bx r
-; CHECK:      ldr r{{.}}, LCPI
+; CHECK:      movw r{{[0-9]+}}, :lower16:_GHJK
+; CHECK:      movt r{{[0-9]+}}, :upper16:_GHJK
 ; CHECK:      str r
 ; CHECK-NEXT: bx r
 
diff --git a/test/CodeGen/ARM/thumb1-varalloc.ll b/test/CodeGen/ARM/thumb1-varalloc.ll
new file mode 100644
index 000000000000..25093fee225a
--- /dev/null
+++ b/test/CodeGen/ARM/thumb1-varalloc.ll
@@ -0,0 +1,40 @@
+; RUN: llc < %s -mtriple=thumbv6-apple-darwin | FileCheck %s
+; rdar://8819685
+
+@__bar = external hidden global i8*
+@__baz = external hidden global i8*
+
+define i8* @_foo() {
+entry:
+; CHECK: foo:
+
+	%size = alloca i32, align 4
+	%0 = load i8** @__bar, align 4
+	%1 = icmp eq i8* %0, null
+	br i1 %1, label %bb1, label %bb3
+		
+bb1:
+	store i32 1026, i32* %size, align 4
+	%2 = alloca [1026 x i8], align 1
+; CHECK: mov     r0, sp
+; CHECK: adds    r4, r0, r4
+	%3 = getelementptr inbounds [1026 x i8]* %2, i32 0, i32 0
+	%4 = call i32 @_called_func(i8* %3, i32* %size) nounwind
+	%5 = icmp eq i32 %4, 0
+	br i1 %5, label %bb2, label %bb3
+	
+bb2:
+	%6 = call i8* @strdup(i8* %3) nounwind
+	store i8* %6, i8** @__baz, align 4
+	br label %bb3
+	
+bb3:
+	%.0 = phi i8* [ %0, %entry ], [ %6, %bb2 ], [ %3, %bb1 ]
+; CHECK: subs    r4, #5
+; CHECK-NEXT: mov     sp, r4
+; CHECK-NEXT: pop     {r4, r5, r6, r7, pc}
+	ret i8* %.0
+}
+
+declare noalias i8* @strdup(i8* nocapture) nounwind
+declare i32 @_called_func(i8*, i32*) nounwind
+\ No newline at end of file
diff --git a/test/CodeGen/ARM/umulo-32.ll b/test/CodeGen/ARM/umulo-32.ll
new file mode 100644
index 000000000000..aa7d28a62349
--- /dev/null
+++ b/test/CodeGen/ARM/umulo-32.ll
@@ -0,0 +1,14 @@
+; RUN: llc < %s -mtriple=thumbv6-apple-darwin | FileCheck %s
+
+%umul.ty = type { i32, i1 }
+
+define i32 @func(i32 %a) nounwind {
+; CHECK: func
+; CHECK: muldi3
+  %tmp0 = tail call %umul.ty @llvm.umul.with.overflow.i32(i32 %a, i32 37)
+  %tmp1 = extractvalue %umul.ty %tmp0, 0
+  %tmp2 = select i1 undef, i32 -1, i32 %tmp1
+  ret i32 %tmp2
+}
+
+declare %umul.ty @llvm.umul.with.overflow.i32(i32, i32) nounwind readnone
diff --git a/test/CodeGen/ARM/unaligned_load_store.ll b/test/CodeGen/ARM/unaligned_load_store.ll
index e2794919d9da..b42e11f2c4ab 100644
--- a/test/CodeGen/ARM/unaligned_load_store.ll
+++ b/test/CodeGen/ARM/unaligned_load_store.ll
@@ -1,5 +1,6 @@
-; RUN: llc < %s -march=arm | FileCheck %s -check-prefix=GENERIC
+; RUN: llc < %s -march=arm -pre-RA-sched=source | FileCheck %s -check-prefix=GENERIC
 ; RUN: llc < %s -mtriple=armv6-apple-darwin | FileCheck %s -check-prefix=DARWIN_V6
+; RUN: llc < %s -mtriple=armv6-apple-darwin -arm-strict-align | FileCheck %s -check-prefix=GENERIC
 ; RUN: llc < %s -mtriple=armv6-linux | FileCheck %s -check-prefix=GENERIC
 
 ; rdar://7113725
diff --git a/test/CodeGen/ARM/vbits.ll b/test/CodeGen/ARM/vbits.ll
index 293d22938a76..51f9bdf9718b 100644
--- a/test/CodeGen/ARM/vbits.ll
+++ b/test/CodeGen/ARM/vbits.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s
+; RUN: llc < %s -march=arm -mattr=+neon -mcpu=cortex-a8 | FileCheck %s
 
 define <8 x i8> @v_andi8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
 ;CHECK: v_andi8:
@@ -505,3 +505,43 @@ define <4 x i32> @vtstQi32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
         %tmp5 = sext <4 x i1> %tmp4 to <4 x i32>
 	ret <4 x i32> %tmp5
 }
+
+define <8 x i8> @v_orrimm(<8 x i8>* %A) nounwind {
+; CHECK: v_orrimm:
+; CHECK-NOT: vmov
+; CHECK-NOT: vmvn
+; CHECK: vorr
+	%tmp1 = load <8 x i8>* %A
+	%tmp3 = or <8 x i8> %tmp1, <i8 0, i8 0, i8 0, i8 1, i8 0, i8 0, i8 0, i8 1>
+	ret <8 x i8> %tmp3
+}
+
+define <16 x i8> @v_orrimmQ(<16 x i8>* %A) nounwind {
+; CHECK: v_orrimmQ
+; CHECK-NOT: vmov
+; CHECK-NOT: vmvn
+; CHECK: vorr
+	%tmp1 = load <16 x i8>* %A
+	%tmp3 = or <16 x i8> %tmp1, <i8 0, i8 0, i8 0, i8 1, i8 0, i8 0, i8 0, i8 1, i8 0, i8 0, i8 0, i8 1, i8 0, i8 0, i8 0, i8 1>
+	ret <16 x i8> %tmp3
+}
+
+define <8 x i8> @v_bicimm(<8 x i8>* %A) nounwind {
+; CHECK: v_bicimm:
+; CHECK-NOT: vmov
+; CHECK-NOT: vmvn
+; CHECK: vbic
+	%tmp1 = load <8 x i8>* %A
+	%tmp3 = and <8 x i8> %tmp1, < i8 -1, i8 -1, i8 -1, i8 0, i8 -1, i8 -1, i8 -1, i8 0 >
+	ret <8 x i8> %tmp3
+}
+
+define <16 x i8> @v_bicimmQ(<16 x i8>* %A) nounwind {
+; CHECK: v_bicimmQ:
+; CHECK-NOT: vmov
+; CHECK-NOT: vmvn
+; CHECK: vbic
+	%tmp1 = load <16 x i8>* %A
+	%tmp3 = and <16 x i8> %tmp1, < i8 -1, i8 -1, i8 -1, i8 0, i8 -1, i8 -1, i8 -1, i8 0, i8 -1, i8 -1, i8 -1, i8 0, i8 -1, i8 -1, i8 -1, i8 0 >
+	ret <16 x i8> %tmp3
+}
diff --git a/test/CodeGen/ARM/vceq.ll b/test/CodeGen/ARM/vceq.ll
index e4787518e731..051c349a06a4 100644
--- a/test/CodeGen/ARM/vceq.ll
+++ b/test/CodeGen/ARM/vceq.ll
@@ -79,3 +79,14 @@ define <4 x i32> @vceqQf32(<4 x float>* %A, <4 x float>* %B) nounwind {
         %tmp4 = sext <4 x i1> %tmp3 to <4 x i32>
 	ret <4 x i32> %tmp4
 }
+
+define <8 x i8> @vceqi8Z(<8 x i8>* %A) nounwind {
+;CHECK: vceqi8Z:
+;CHECK-NOT: vmov
+;CHECK-NOT: vmvn
+;CHECK: vceq.i8
+	%tmp1 = load <8 x i8>* %A
+	%tmp3 = icmp eq <8 x i8> %tmp1, <i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0>
+        %tmp4 = sext <8 x i1> %tmp3 to <8 x i8>
+	ret <8 x i8> %tmp4
+}
diff --git a/test/CodeGen/ARM/vcge.ll b/test/CodeGen/ARM/vcge.ll
index 2c161113c113..bf5f0b9efb2f 100644
--- a/test/CodeGen/ARM/vcge.ll
+++ b/test/CodeGen/ARM/vcge.ll
@@ -160,3 +160,44 @@ define <4 x i32> @vacgeQf32(<4 x float>* %A, <4 x float>* %B) nounwind {
 
 declare <2 x i32> @llvm.arm.neon.vacged(<2 x float>, <2 x float>) nounwind readnone
 declare <4 x i32> @llvm.arm.neon.vacgeq(<4 x float>, <4 x float>) nounwind readnone
+
+define <8 x i8> @vcgei8Z(<8 x i8>* %A) nounwind {
+;CHECK: vcgei8Z:
+;CHECK-NOT: vmov
+;CHECK-NOT: vmvn
+;CHECK: vcge.s8
+	%tmp1 = load <8 x i8>* %A
+	%tmp3 = icmp sge <8 x i8> %tmp1, <i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0>
+        %tmp4 = sext <8 x i1> %tmp3 to <8 x i8>
+	ret <8 x i8> %tmp4
+}
+
+define <8 x i8> @vclei8Z(<8 x i8>* %A) nounwind {
+;CHECK: vclei8Z:
+;CHECK-NOT: vmov
+;CHECK-NOT: vmvn
+;CHECK: vcle.s8
+	%tmp1 = load <8 x i8>* %A
+	%tmp3 = icmp sle <8 x i8> %tmp1, <i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0>
+        %tmp4 = sext <8 x i1> %tmp3 to <8 x i8>
+	ret <8 x i8> %tmp4
+}
+
+; Radar 8782191
+; Floating-point comparisons against zero produce results with integer
+; elements, not floating-point elements.
+define void @test_vclez_fp() nounwind optsize {
+;CHECK: test_vclez_fp
+;CHECK: vcle.f32
+entry:
+  %0 = fcmp ole <4 x float> undef, zeroinitializer
+  %1 = sext <4 x i1> %0 to <4 x i16>
+  %2 = add <4 x i16> %1, zeroinitializer
+  %3 = shufflevector <4 x i16> %2, <4 x i16> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+  %4 = add <8 x i16> %3, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
+  %5 = trunc <8 x i16> %4 to <8 x i8>
+  tail call void @llvm.arm.neon.vst1.v8i8(i8* undef, <8 x i8> %5, i32 1)
+  unreachable
+}
+
+declare void @llvm.arm.neon.vst1.v8i8(i8*, <8 x i8>, i32) nounwind
diff --git a/test/CodeGen/ARM/vcgt.ll b/test/CodeGen/ARM/vcgt.ll
index 194093c8418c..c3c4cb356307 100644
--- a/test/CodeGen/ARM/vcgt.ll
+++ b/test/CodeGen/ARM/vcgt.ll
@@ -161,9 +161,9 @@ define <4 x i32> @vacgtQf32(<4 x float>* %A, <4 x float>* %B) nounwind {
 ; rdar://7923010
 define <4 x i32> @vcgt_zext(<4 x float>* %A, <4 x float>* %B) nounwind {
 ;CHECK: vcgt_zext:
-;CHECK: vcgt.f32 q0
-;CHECK: vmov.i32 q1, #0x1
-;CHECK: vand q0, q0, q1
+;CHECK: vmov.i32 q10, #0x1
+;CHECK: vcgt.f32 q8
+;CHECK: vand q8, q8, q10
 	%tmp1 = load <4 x float>* %A
 	%tmp2 = load <4 x float>* %B
 	%tmp3 = fcmp ogt <4 x float> %tmp1, %tmp2
@@ -173,3 +173,25 @@ define <4 x i32> @vcgt_zext(<4 x float>* %A, <4 x float>* %B) nounwind {
 
 declare <2 x i32> @llvm.arm.neon.vacgtd(<2 x float>, <2 x float>) nounwind readnone
 declare <4 x i32> @llvm.arm.neon.vacgtq(<4 x float>, <4 x float>) nounwind readnone
+
+define <8 x i8> @vcgti8Z(<8 x i8>* %A) nounwind {
+;CHECK: vcgti8Z:
+;CHECK-NOT: vmov
+;CHECK-NOT: vmvn
+;CHECK: vcgt.s8
+	%tmp1 = load <8 x i8>* %A
+	%tmp3 = icmp sgt <8 x i8> %tmp1, <i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0>
+        %tmp4 = sext <8 x i1> %tmp3 to <8 x i8>
+	ret <8 x i8> %tmp4
+}
+
+define <8 x i8> @vclti8Z(<8 x i8>* %A) nounwind {
+;CHECK: vclti8Z:
+;CHECK-NOT: vmov
+;CHECK-NOT: vmvn
+;CHECK: vclt.s8
+	%tmp1 = load <8 x i8>* %A
+	%tmp3 = icmp slt <8 x i8> %tmp1, <i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0>
+        %tmp4 = sext <8 x i1> %tmp3 to <8 x i8>
+	ret <8 x i8> %tmp4
+}
diff --git a/test/CodeGen/ARM/vcombine.ll b/test/CodeGen/ARM/vcombine.ll
index e6733051f269..527f93b6637c 100644
--- a/test/CodeGen/ARM/vcombine.ll
+++ b/test/CodeGen/ARM/vcombine.ll
@@ -1,6 +1,9 @@
-; RUN: llc < %s -march=arm -mattr=+neon
+; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s
 
 define <16 x i8> @vcombine8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
+; CHECK: vcombine8
+; CHECK: vmov r0, r1, d16
+; CHECK: vmov r2, r3, d17
 	%tmp1 = load <8 x i8>* %A
 	%tmp2 = load <8 x i8>* %B
 	%tmp3 = shufflevector <8 x i8> %tmp1, <8 x i8> %tmp2, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
@@ -8,6 +11,9 @@ define <16 x i8> @vcombine8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
 }
 
 define <8 x i16> @vcombine16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
+; CHECK: vcombine16
+; CHECK: vmov r0, r1, d16
+; CHECK: vmov r2, r3, d17
 	%tmp1 = load <4 x i16>* %A
 	%tmp2 = load <4 x i16>* %B
 	%tmp3 = shufflevector <4 x i16> %tmp1, <4 x i16> %tmp2, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
@@ -15,6 +21,9 @@ define <8 x i16> @vcombine16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
 }
 
 define <4 x i32> @vcombine32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
+; CHECK: vcombine32
+; CHECK: vmov r0, r1, d16
+; CHECK: vmov r2, r3, d17
 	%tmp1 = load <2 x i32>* %A
 	%tmp2 = load <2 x i32>* %B
 	%tmp3 = shufflevector <2 x i32> %tmp1, <2 x i32> %tmp2, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
@@ -22,6 +31,9 @@ define <4 x i32> @vcombine32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
 }
 
 define <4 x float> @vcombinefloat(<2 x float>* %A, <2 x float>* %B) nounwind {
+; CHECK: vcombinefloat
+; CHECK: vmov r0, r1, d16
+; CHECK: vmov r2, r3, d17
 	%tmp1 = load <2 x float>* %A
 	%tmp2 = load <2 x float>* %B
 	%tmp3 = shufflevector <2 x float> %tmp1, <2 x float> %tmp2, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
@@ -29,8 +41,32 @@ define <4 x float> @vcombinefloat(<2 x float>* %A, <2 x float>* %B) nounwind {
 }
 
 define <2 x i64> @vcombine64(<1 x i64>* %A, <1 x i64>* %B) nounwind {
+; CHECK: vcombine64
+; CHECK: vmov r0, r1, d16
+; CHECK: vmov r2, r3, d17
 	%tmp1 = load <1 x i64>* %A
 	%tmp2 = load <1 x i64>* %B
 	%tmp3 = shufflevector <1 x i64> %tmp1, <1 x i64> %tmp2, <2 x i32> <i32 0, i32 1>
 	ret <2 x i64> %tmp3
 }
+
+; Check for vget_low and vget_high implemented with shufflevector.  PR8411.
+; They should not require storing to the stack.
+
+define <4 x i16> @vget_low16(<8 x i16>* %A) nounwind {
+; CHECK: vget_low16
+; CHECK-NOT: vst
+; CHECK: vmov r0, r1, d16
+	%tmp1 = load <8 x i16>* %A
+        %tmp2 = shufflevector <8 x i16> %tmp1, <8 x i16> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+        ret <4 x i16> %tmp2
+}
+
+define <8 x i8> @vget_high8(<16 x i8>* %A) nounwind {
+; CHECK: vget_high8
+; CHECK-NOT: vst
+; CHECK: vmov r0, r1, d17
+	%tmp1 = load <16 x i8>* %A
+        %tmp2 = shufflevector <16 x i8> %tmp1, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+        ret <8 x i8> %tmp2
+}
diff --git a/test/CodeGen/ARM/vcvt.ll b/test/CodeGen/ARM/vcvt.ll
index f4cc5368d9aa..c078f493094b 100644
--- a/test/CodeGen/ARM/vcvt.ll
+++ b/test/CodeGen/ARM/vcvt.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s
+; RUN: llc < %s -march=arm -mattr=+neon,+fp16 | FileCheck %s
 
 define <2 x i32> @vcvt_f32tos32(<2 x float>* %A) nounwind {
 ;CHECK: vcvt_f32tos32:
@@ -138,3 +138,21 @@ declare <4 x i32> @llvm.arm.neon.vcvtfp2fxu.v4i32.v4f32(<4 x float>, i32) nounwi
 declare <4 x float> @llvm.arm.neon.vcvtfxs2fp.v4f32.v4i32(<4 x i32>, i32) nounwind readnone
 declare <4 x float> @llvm.arm.neon.vcvtfxu2fp.v4f32.v4i32(<4 x i32>, i32) nounwind readnone
 
+define <4 x float> @vcvt_f16tof32(<4 x i16>* %A) nounwind {
+;CHECK: vcvt_f16tof32:
+;CHECK: vcvt.f32.f16
+	%tmp1 = load <4 x i16>* %A
+	%tmp2 = call <4 x float> @llvm.arm.neon.vcvthf2fp(<4 x i16> %tmp1)
+	ret <4 x float> %tmp2
+}
+
+define <4 x i16> @vcvt_f32tof16(<4 x float>* %A) nounwind {
+;CHECK: vcvt_f32tof16:
+;CHECK: vcvt.f16.f32
+	%tmp1 = load <4 x float>* %A
+	%tmp2 = call <4 x i16> @llvm.arm.neon.vcvtfp2hf(<4 x float> %tmp1)
+	ret <4 x i16> %tmp2
+}
+
+declare <4 x float> @llvm.arm.neon.vcvthf2fp(<4 x i16>) nounwind readnone
+declare <4 x i16> @llvm.arm.neon.vcvtfp2hf(<4 x float>) nounwind readnone
diff --git a/test/CodeGen/ARM/vdup.ll b/test/CodeGen/ARM/vdup.ll
index a545f6c03d5b..e99fac1f1e67 100644
--- a/test/CodeGen/ARM/vdup.ll
+++ b/test/CodeGen/ARM/vdup.ll
@@ -162,24 +162,6 @@ define <4 x float> @v_shuffledupQfloat(float %A) nounwind {
 	ret <4 x float> %tmp2
 }
 
-define <2 x float> @v_shuffledupfloat2(float* %A) nounwind {
-;CHECK: v_shuffledupfloat2:
-;CHECK: vdup.32
-	%tmp0 = load float* %A
-        %tmp1 = insertelement <2 x float> undef, float %tmp0, i32 0
-        %tmp2 = shufflevector <2 x float> %tmp1, <2 x float> undef, <2 x i32> zeroinitializer
-        ret <2 x float> %tmp2
-}
-
-define <4 x float> @v_shuffledupQfloat2(float* %A) nounwind {
-;CHECK: v_shuffledupQfloat2:
-;CHECK: vdup.32
-        %tmp0 = load float* %A
-        %tmp1 = insertelement <4 x float> undef, float %tmp0, i32 0
-        %tmp2 = shufflevector <4 x float> %tmp1, <4 x float> undef, <4 x i32> zeroinitializer
-        ret <4 x float> %tmp2
-}
-
 define <8 x i8> @vduplane8(<8 x i8>* %A) nounwind {
 ;CHECK: vduplane8:
 ;CHECK: vdup.8
diff --git a/test/CodeGen/ARM/vector-DAGCombine.ll b/test/CodeGen/ARM/vector-DAGCombine.ll
new file mode 100644
index 000000000000..3ab0cfcbbc77
--- /dev/null
+++ b/test/CodeGen/ARM/vector-DAGCombine.ll
@@ -0,0 +1,107 @@
+; RUN: llc < %s -mtriple=armv7-apple-darwin | FileCheck %s
+
+; PR7158
+define i32 @test_pr7158() nounwind {
+bb.nph55.bb.nph55.split_crit_edge:
+  br label %bb3
+
+bb3:                                              ; preds = %bb3, %bb.nph55.bb.nph55.split_crit_edge
+  br i1 undef, label %bb.i19, label %bb3
+
+bb.i19:                                           ; preds = %bb.i19, %bb3
+  %0 = insertelement <4 x float> undef, float undef, i32 3 ; <<4 x float>> [#uses=3]
+  %1 = fmul <4 x float> %0, %0                    ; <<4 x float>> [#uses=1]
+  %2 = bitcast <4 x float> %1 to <2 x double>     ; <<2 x double>> [#uses=0]
+  %3 = fmul <4 x float> %0, undef                 ; <<4 x float>> [#uses=0]
+  br label %bb.i19
+}
+
+; Check that the DAG combiner does not arbitrarily modify BUILD_VECTORs
+; after legalization.
+define void @test_illegal_build_vector() nounwind {
+entry:
+  store <2 x i64> undef, <2 x i64>* undef, align 16
+  %0 = load <16 x i8>* undef, align 16            ; <<16 x i8>> [#uses=1]
+  %1 = or <16 x i8> zeroinitializer, %0           ; <<16 x i8>> [#uses=1]
+  store <16 x i8> %1, <16 x i8>* undef, align 16
+  ret void
+}
+
+; Radar 8407927: Make sure that VMOVRRD gets optimized away when the result is
+; converted back to be used as a vector type.
+; CHECK: test_vmovrrd_combine
+define <4 x i32> @test_vmovrrd_combine() nounwind {
+entry:
+  br i1 undef, label %bb1, label %bb2
+
+bb1:
+  %0 = bitcast <2 x i64> zeroinitializer to <2 x double>
+  %1 = extractelement <2 x double> %0, i32 0
+  %2 = bitcast double %1 to i64
+  %3 = insertelement <1 x i64> undef, i64 %2, i32 0
+; CHECK-NOT: vmov s
+; CHECK: vext.8
+  %4 = shufflevector <1 x i64> %3, <1 x i64> undef, <2 x i32> <i32 0, i32 1>
+  %tmp2006.3 = bitcast <2 x i64> %4 to <16 x i8>
+  %5 = shufflevector <16 x i8> %tmp2006.3, <16 x i8> undef, <16 x i32> <i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19>
+  %tmp2004.3 = bitcast <16 x i8> %5 to <4 x i32>
+  br i1 undef, label %bb2, label %bb1
+
+bb2:
+  %result = phi <4 x i32> [ undef, %entry ], [ %tmp2004.3, %bb1 ]
+  ret <4 x i32> %result
+}
+
+; Test trying to do a ShiftCombine on illegal types.
+; The vector should be split first.
+define void @lshrIllegalType(<8 x i32>* %A) nounwind {
+       %tmp1 = load <8 x i32>* %A
+       %tmp2 = lshr <8 x i32> %tmp1, < i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
+       store <8 x i32> %tmp2, <8 x i32>* %A
+       ret void
+}
+
+; Test folding a binary vector operation with constant BUILD_VECTOR
+; operands with i16 elements.
+define void @test_i16_constant_fold() nounwind optsize {
+entry:
+  %0 = sext <4 x i1> zeroinitializer to <4 x i16>
+  %1 = add <4 x i16> %0, zeroinitializer
+  %2 = shufflevector <4 x i16> %1, <4 x i16> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+  %3 = add <8 x i16> %2, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
+  %4 = trunc <8 x i16> %3 to <8 x i8>
+  tail call void @llvm.arm.neon.vst1.v8i8(i8* undef, <8 x i8> %4, i32 1)
+  unreachable
+}
+
+declare void @llvm.arm.neon.vst1.v8i8(i8*, <8 x i8>, i32) nounwind
+
+; Test that loads and stores of i64 vector elements are handled as f64 values
+; so they are not split up into i32 values.  Radar 8755338.
+define void @i64_buildvector(i64* %ptr, <2 x i64>* %vp) nounwind {
+; CHECK: i64_buildvector
+; CHECK: vldr.64
+  %t0 = load i64* %ptr, align 4
+  %t1 = insertelement <2 x i64> undef, i64 %t0, i32 0
+  store <2 x i64> %t1, <2 x i64>* %vp
+  ret void
+}
+
+define void @i64_insertelement(i64* %ptr, <2 x i64>* %vp) nounwind {
+; CHECK: i64_insertelement
+; CHECK: vldr.64
+  %t0 = load i64* %ptr, align 4
+  %vec = load <2 x i64>* %vp
+  %t1 = insertelement <2 x i64> %vec, i64 %t0, i32 0
+  store <2 x i64> %t1, <2 x i64>* %vp
+  ret void
+}
+
+define void @i64_extractelement(i64* %ptr, <2 x i64>* %vp) nounwind {
+; CHECK: i64_extractelement
+; CHECK: vstr.64
+  %vec = load <2 x i64>* %vp
+  %t1 = extractelement <2 x i64> %vec, i32 0
+  store i64 %t1, i64* %ptr
+  ret void
+}
diff --git a/test/CodeGen/ARM/vext.ll b/test/CodeGen/ARM/vext.ll
index e460a84f6265..55abefef0fa7 100644
--- a/test/CodeGen/ARM/vext.ll
+++ b/test/CodeGen/ARM/vext.ll
@@ -74,3 +74,62 @@ define <16 x i8> @test_vextRq_undef(<16 x i8>* %A, <16 x i8>* %B) nounwind {
 	ret <16 x i8> %tmp3
 }
 
+; Tests for ReconstructShuffle function. Indices have to be carefully
+; chosen to reach lowering phase as a BUILD_VECTOR.
+
+; One vector needs vext, the other can be handled by extract_subvector
+; Also checks interleaving of sources is handled correctly.
+; Essence: a vext is used on %A and something saner than stack load/store for final result.
+define <4 x i16> @test_interleaved(<8 x i16>* %A, <8 x i16>* %B) nounwind {
+;CHECK: test_interleaved:
+;CHECK: vext.16
+;CHECK-NOT: vext.16
+;CHECK: vzip.16
+        %tmp1 = load <8 x i16>* %A
+        %tmp2 = load <8 x i16>* %B
+        %tmp3 = shufflevector <8 x i16> %tmp1, <8 x i16> %tmp2, <4 x i32> <i32 3, i32 8, i32 5, i32 9>
+        ret <4 x i16> %tmp3
+}
+
+; An undef in the shuffle list should still be optimizable
+define <4 x i16> @test_undef(<8 x i16>* %A, <8 x i16>* %B) nounwind {
+;CHECK: test_undef:
+;CHECK: vzip.16
+        %tmp1 = load <8 x i16>* %A
+        %tmp2 = load <8 x i16>* %B
+        %tmp3 = shufflevector <8 x i16> %tmp1, <8 x i16> %tmp2, <4 x i32> <i32 undef, i32 8, i32 5, i32 9>
+        ret <4 x i16> %tmp3
+}
+
+; We should ignore a build_vector with more than two sources.
+; Use illegal <32 x i16> type to produce such a shuffle after legalizing types.
+; Try to look for fallback to stack expansion.
+define <4 x i16> @test_multisource(<32 x i16>* %B) nounwind {
+;CHECK: test_multisource:
+;CHECK: vst1.16
+        %tmp1 = load <32 x i16>* %B
+        %tmp2 = shufflevector <32 x i16> %tmp1, <32 x i16> undef, <4 x i32> <i32 0, i32 8, i32 16, i32 24>
+        ret <4 x i16> %tmp2
+}
+
+; We don't handle shuffles using more than half of a 128-bit vector.
+; Again, test for fallback to stack expansion
+define <4 x i16> @test_largespan(<8 x i16>* %B) nounwind {
+;CHECK: test_largespan:
+;CHECK: vst1.16
+        %tmp1 = load <8 x i16>* %B
+        %tmp2 = shufflevector <8 x i16> %tmp1, <8 x i16> undef, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
+        ret <4 x i16> %tmp2
+}
+
+; The actual shuffle code only handles some cases, make sure we check
+; this rather than blindly emitting a VECTOR_SHUFFLE (infinite
+; lowering loop can result otherwise).
+define <8 x i8> @test_illegal(<16 x i8>* %A, <16 x i8>* %B) nounwind {
+;CHECK: test_illegal:
+;CHECK: vst1.8
+       %tmp1 = load <16 x i8>* %A
+       %tmp2 = load <16 x i8>* %B
+       %tmp3 = shufflevector <16 x i8> %tmp1, <16 x i8> %tmp2, <8 x i32> <i32 0, i32 7, i32 5, i32 25, i32 3, i32 2, i32 2, i32 26>
+       ret <8 x i8> %tmp3
+}
diff --git a/test/CodeGen/ARM/vget_lane.ll b/test/CodeGen/ARM/vget_lane.ll
index 05e7f5090952..1fc885d61372 100644
--- a/test/CodeGen/ARM/vget_lane.ll
+++ b/test/CodeGen/ARM/vget_lane.ll
@@ -96,13 +96,14 @@ define i32 @vgetQ_lanei32(<4 x i32>* %A) nounwind {
 
 define arm_aapcs_vfpcc void @test_vget_laneu16() nounwind {
 entry:
-; CHECK: vmov.u16 r0, d0[1]
+; CHECK: vmov.u16 r0, d{{.*}}[1]
   %arg0_uint16x4_t = alloca <4 x i16>             ; <<4 x i16>*> [#uses=1]
   %out_uint16_t = alloca i16                      ; <i16*> [#uses=1]
   %"alloca point" = bitcast i32 0 to i32          ; <i32> [#uses=0]
   %0 = load <4 x i16>* %arg0_uint16x4_t, align 8  ; <<4 x i16>> [#uses=1]
   %1 = extractelement <4 x i16> %0, i32 1         ; <i16> [#uses=1]
-  store i16 %1, i16* %out_uint16_t, align 2
+  %2 = add i16 %1, %1
+  store i16 %2, i16* %out_uint16_t, align 2
   br label %return
 
 return:                                           ; preds = %entry
@@ -111,13 +112,14 @@ return:                                           ; preds = %entry
 
 define arm_aapcs_vfpcc void @test_vget_laneu8() nounwind {
 entry:
-; CHECK: vmov.u8 r0, d0[1]
+; CHECK: vmov.u8 r0, d{{.*}}[1]
   %arg0_uint8x8_t = alloca <8 x i8>               ; <<8 x i8>*> [#uses=1]
   %out_uint8_t = alloca i8                        ; <i8*> [#uses=1]
   %"alloca point" = bitcast i32 0 to i32          ; <i32> [#uses=0]
   %0 = load <8 x i8>* %arg0_uint8x8_t, align 8    ; <<8 x i8>> [#uses=1]
   %1 = extractelement <8 x i8> %0, i32 1          ; <i8> [#uses=1]
-  store i8 %1, i8* %out_uint8_t, align 1
+  %2 = add i8 %1, %1
+  store i8 %2, i8* %out_uint8_t, align 1
   br label %return
 
 return:                                           ; preds = %entry
@@ -126,13 +128,14 @@ return:                                           ; preds = %entry
 
 define arm_aapcs_vfpcc void @test_vgetQ_laneu16() nounwind {
 entry:
-; CHECK: vmov.u16 r0, d0[1]
+; CHECK: vmov.u16 r0, d{{.*}}[1]
   %arg0_uint16x8_t = alloca <8 x i16>             ; <<8 x i16>*> [#uses=1]
   %out_uint16_t = alloca i16                      ; <i16*> [#uses=1]
   %"alloca point" = bitcast i32 0 to i32          ; <i32> [#uses=0]
   %0 = load <8 x i16>* %arg0_uint16x8_t, align 16 ; <<8 x i16>> [#uses=1]
   %1 = extractelement <8 x i16> %0, i32 1         ; <i16> [#uses=1]
-  store i16 %1, i16* %out_uint16_t, align 2
+  %2 = add i16 %1, %1
+  store i16 %2, i16* %out_uint16_t, align 2
   br label %return
 
 return:                                           ; preds = %entry
@@ -141,13 +144,14 @@ return:                                           ; preds = %entry
 
 define arm_aapcs_vfpcc void @test_vgetQ_laneu8() nounwind {
 entry:
-; CHECK: vmov.u8 r0, d0[1]
+; CHECK: vmov.u8 r0, d{{.*}}[1]
   %arg0_uint8x16_t = alloca <16 x i8>             ; <<16 x i8>*> [#uses=1]
   %out_uint8_t = alloca i8                        ; <i8*> [#uses=1]
   %"alloca point" = bitcast i32 0 to i32          ; <i32> [#uses=0]
   %0 = load <16 x i8>* %arg0_uint8x16_t, align 16 ; <<16 x i8>> [#uses=1]
   %1 = extractelement <16 x i8> %0, i32 1         ; <i8> [#uses=1]
-  store i8 %1, i8* %out_uint8_t, align 1
+  %2 = add i8 %1, %1
+  store i8 %2, i8* %out_uint8_t, align 1
   br label %return
 
 return:                                           ; preds = %entry
@@ -210,3 +214,20 @@ entry:
   %0 = insertelement <2 x float> %arg1_float32x2_t, float %arg0_float32_t, i32 1 ; <<2 x float>> [#uses=1]
   ret <2 x float> %0
 }
+
+; The llvm extractelement instruction does not require that the lane number
+; be an immediate constant.  Make sure a variable lane number is handled.
+
+define i32 @vget_variable_lanes8(<8 x i8>* %A, i32 %B) nounwind {
+	%tmp1 = load <8 x i8>* %A
+	%tmp2 = extractelement <8 x i8> %tmp1, i32 %B
+	%tmp3 = sext i8 %tmp2 to i32
+	ret i32 %tmp3
+}
+
+define i32 @vgetQ_variable_lanei32(<4 x i32>* %A, i32 %B) nounwind {
+	%tmp1 = load <4 x i32>* %A
+	%tmp2 = add <4 x i32> %tmp1, %tmp1
+	%tmp3 = extractelement <4 x i32> %tmp2, i32 %B
+	ret i32 %tmp3
+}
diff --git a/test/CodeGen/ARM/vld1.ll b/test/CodeGen/ARM/vld1.ll
index 2488e8a0d0cc..c886125a2fb0 100644
--- a/test/CodeGen/ARM/vld1.ll
+++ b/test/CodeGen/ARM/vld1.ll
@@ -2,8 +2,9 @@
 
 define <8 x i8> @vld1i8(i8* %A) nounwind {
 ;CHECK: vld1i8:
-;CHECK: vld1.8
-	%tmp1 = call <8 x i8> @llvm.arm.neon.vld1.v8i8(i8* %A, i32 1)
+;Check the alignment value.  Max for this instruction is 64 bits:
+;CHECK: vld1.8 {d16}, [r0, :64]
+	%tmp1 = call <8 x i8> @llvm.arm.neon.vld1.v8i8(i8* %A, i32 16)
 	ret <8 x i8> %tmp1
 }
 
@@ -15,6 +16,18 @@ define <4 x i16> @vld1i16(i16* %A) nounwind {
 	ret <4 x i16> %tmp1
 }
 
+;Check for a post-increment updating load. 
+define <4 x i16> @vld1i16_update(i16** %ptr) nounwind {
+;CHECK: vld1i16_update:
+;CHECK: vld1.16 {d16}, [r1]!
+	%A = load i16** %ptr
+	%tmp0 = bitcast i16* %A to i8*
+	%tmp1 = call <4 x i16> @llvm.arm.neon.vld1.v4i16(i8* %tmp0, i32 1)
+	%tmp2 = getelementptr i16* %A, i32 4
+	       store i16* %tmp2, i16** %ptr
+	ret <4 x i16> %tmp1
+}
+
 define <2 x i32> @vld1i32(i32* %A) nounwind {
 ;CHECK: vld1i32:
 ;CHECK: vld1.32
@@ -23,6 +36,18 @@ define <2 x i32> @vld1i32(i32* %A) nounwind {
 	ret <2 x i32> %tmp1
 }
 
+;Check for a post-increment updating load with register increment.
+define <2 x i32> @vld1i32_update(i32** %ptr, i32 %inc) nounwind {
+;CHECK: vld1i32_update:
+;CHECK: vld1.32 {d16}, [r2], r1
+	%A = load i32** %ptr
+	%tmp0 = bitcast i32* %A to i8*
+	%tmp1 = call <2 x i32> @llvm.arm.neon.vld1.v2i32(i8* %tmp0, i32 1)
+	%tmp2 = getelementptr i32* %A, i32 %inc
+	store i32* %tmp2, i32** %ptr
+	ret <2 x i32> %tmp1
+}
+
 define <2 x float> @vld1f(float* %A) nounwind {
 ;CHECK: vld1f:
 ;CHECK: vld1.32
@@ -41,16 +66,29 @@ define <1 x i64> @vld1i64(i64* %A) nounwind {
 
 define <16 x i8> @vld1Qi8(i8* %A) nounwind {
 ;CHECK: vld1Qi8:
-;CHECK: vld1.8
-	%tmp1 = call <16 x i8> @llvm.arm.neon.vld1.v16i8(i8* %A, i32 1)
+;Check the alignment value.  Max for this instruction is 128 bits:
+;CHECK: vld1.8 {d16, d17}, [r0, :64]
+	%tmp1 = call <16 x i8> @llvm.arm.neon.vld1.v16i8(i8* %A, i32 8)
+	ret <16 x i8> %tmp1
+}
+
+;Check for a post-increment updating load.
+define <16 x i8> @vld1Qi8_update(i8** %ptr) nounwind {
+;CHECK: vld1Qi8_update:
+;CHECK: vld1.8 {d16, d17}, [r1, :64]!
+	%A = load i8** %ptr
+	%tmp1 = call <16 x i8> @llvm.arm.neon.vld1.v16i8(i8* %A, i32 8)
+	%tmp2 = getelementptr i8* %A, i32 16
+	store i8* %tmp2, i8** %ptr
 	ret <16 x i8> %tmp1
 }
 
 define <8 x i16> @vld1Qi16(i16* %A) nounwind {
 ;CHECK: vld1Qi16:
-;CHECK: vld1.16
+;Check the alignment value.  Max for this instruction is 128 bits:
+;CHECK: vld1.16 {d16, d17}, [r0, :128]
 	%tmp0 = bitcast i16* %A to i8*
-	%tmp1 = call <8 x i16> @llvm.arm.neon.vld1.v8i16(i8* %tmp0, i32 1)
+	%tmp1 = call <8 x i16> @llvm.arm.neon.vld1.v8i16(i8* %tmp0, i32 32)
 	ret <8 x i16> %tmp1
 }
 
diff --git a/test/CodeGen/ARM/vld2.ll b/test/CodeGen/ARM/vld2.ll
index 811f6e6db96f..29b379465db5 100644
--- a/test/CodeGen/ARM/vld2.ll
+++ b/test/CodeGen/ARM/vld2.ll
@@ -13,8 +13,9 @@
 
 define <8 x i8> @vld2i8(i8* %A) nounwind {
 ;CHECK: vld2i8:
-;CHECK: vld2.8
-	%tmp1 = call %struct.__neon_int8x8x2_t @llvm.arm.neon.vld2.v8i8(i8* %A, i32 1)
+;Check the alignment value.  Max for this instruction is 128 bits:
+;CHECK: vld2.8 {d16, d17}, [r0, :64]
+	%tmp1 = call %struct.__neon_int8x8x2_t @llvm.arm.neon.vld2.v8i8(i8* %A, i32 8)
         %tmp2 = extractvalue %struct.__neon_int8x8x2_t %tmp1, 0
         %tmp3 = extractvalue %struct.__neon_int8x8x2_t %tmp1, 1
         %tmp4 = add <8 x i8> %tmp2, %tmp3
@@ -23,9 +24,10 @@ define <8 x i8> @vld2i8(i8* %A) nounwind {
 
 define <4 x i16> @vld2i16(i16* %A) nounwind {
 ;CHECK: vld2i16:
-;CHECK: vld2.16
+;Check the alignment value.  Max for this instruction is 128 bits:
+;CHECK: vld2.16 {d16, d17}, [r0, :128]
 	%tmp0 = bitcast i16* %A to i8*
-	%tmp1 = call %struct.__neon_int16x4x2_t @llvm.arm.neon.vld2.v4i16(i8* %tmp0, i32 1)
+	%tmp1 = call %struct.__neon_int16x4x2_t @llvm.arm.neon.vld2.v4i16(i8* %tmp0, i32 32)
         %tmp2 = extractvalue %struct.__neon_int16x4x2_t %tmp1, 0
         %tmp3 = extractvalue %struct.__neon_int16x4x2_t %tmp1, 1
         %tmp4 = add <4 x i16> %tmp2, %tmp3
@@ -54,11 +56,27 @@ define <2 x float> @vld2f(float* %A) nounwind {
 	ret <2 x float> %tmp4
 }
 
+;Check for a post-increment updating load. 
+define <2 x float> @vld2f_update(float** %ptr) nounwind {
+;CHECK: vld2f_update:
+;CHECK: vld2.32 {d16, d17}, [r1]!
+	%A = load float** %ptr
+	%tmp0 = bitcast float* %A to i8*
+	%tmp1 = call %struct.__neon_float32x2x2_t @llvm.arm.neon.vld2.v2f32(i8* %tmp0, i32 1)
+	%tmp2 = extractvalue %struct.__neon_float32x2x2_t %tmp1, 0
+	%tmp3 = extractvalue %struct.__neon_float32x2x2_t %tmp1, 1
+	%tmp4 = fadd <2 x float> %tmp2, %tmp3
+	%tmp5 = getelementptr float* %A, i32 4
+	store float* %tmp5, float** %ptr
+	ret <2 x float> %tmp4
+}
+
 define <1 x i64> @vld2i64(i64* %A) nounwind {
 ;CHECK: vld2i64:
-;CHECK: vld1.64
+;Check the alignment value.  Max for this instruction is 128 bits:
+;CHECK: vld1.64 {d16, d17}, [r0, :128]
 	%tmp0 = bitcast i64* %A to i8*
-	%tmp1 = call %struct.__neon_int64x1x2_t @llvm.arm.neon.vld2.v1i64(i8* %tmp0, i32 1)
+	%tmp1 = call %struct.__neon_int64x1x2_t @llvm.arm.neon.vld2.v1i64(i8* %tmp0, i32 32)
         %tmp2 = extractvalue %struct.__neon_int64x1x2_t %tmp1, 0
         %tmp3 = extractvalue %struct.__neon_int64x1x2_t %tmp1, 1
         %tmp4 = add <1 x i64> %tmp2, %tmp3
@@ -67,19 +85,35 @@ define <1 x i64> @vld2i64(i64* %A) nounwind {
 
 define <16 x i8> @vld2Qi8(i8* %A) nounwind {
 ;CHECK: vld2Qi8:
-;CHECK: vld2.8
-	%tmp1 = call %struct.__neon_int8x16x2_t @llvm.arm.neon.vld2.v16i8(i8* %A, i32 1)
+;Check the alignment value.  Max for this instruction is 256 bits:
+;CHECK: vld2.8 {d16, d17, d18, d19}, [r0, :64]
+	%tmp1 = call %struct.__neon_int8x16x2_t @llvm.arm.neon.vld2.v16i8(i8* %A, i32 8)
+        %tmp2 = extractvalue %struct.__neon_int8x16x2_t %tmp1, 0
+        %tmp3 = extractvalue %struct.__neon_int8x16x2_t %tmp1, 1
+        %tmp4 = add <16 x i8> %tmp2, %tmp3
+	ret <16 x i8> %tmp4
+}
+
+;Check for a post-increment updating load with register increment.
+define <16 x i8> @vld2Qi8_update(i8** %ptr, i32 %inc) nounwind {
+;CHECK: vld2Qi8_update:
+;CHECK: vld2.8 {d16, d17, d18, d19}, [r2, :128], r1
+	%A = load i8** %ptr
+	%tmp1 = call %struct.__neon_int8x16x2_t @llvm.arm.neon.vld2.v16i8(i8* %A, i32 16)
         %tmp2 = extractvalue %struct.__neon_int8x16x2_t %tmp1, 0
         %tmp3 = extractvalue %struct.__neon_int8x16x2_t %tmp1, 1
         %tmp4 = add <16 x i8> %tmp2, %tmp3
+	%tmp5 = getelementptr i8* %A, i32 %inc
+	store i8* %tmp5, i8** %ptr
 	ret <16 x i8> %tmp4
 }
 
 define <8 x i16> @vld2Qi16(i16* %A) nounwind {
 ;CHECK: vld2Qi16:
-;CHECK: vld2.16
+;Check the alignment value.  Max for this instruction is 256 bits:
+;CHECK: vld2.16 {d16, d17, d18, d19}, [r0, :128]
 	%tmp0 = bitcast i16* %A to i8*
-	%tmp1 = call %struct.__neon_int16x8x2_t @llvm.arm.neon.vld2.v8i16(i8* %tmp0, i32 1)
+	%tmp1 = call %struct.__neon_int16x8x2_t @llvm.arm.neon.vld2.v8i16(i8* %tmp0, i32 16)
         %tmp2 = extractvalue %struct.__neon_int16x8x2_t %tmp1, 0
         %tmp3 = extractvalue %struct.__neon_int16x8x2_t %tmp1, 1
         %tmp4 = add <8 x i16> %tmp2, %tmp3
@@ -88,9 +122,10 @@ define <8 x i16> @vld2Qi16(i16* %A) nounwind {
 
 define <4 x i32> @vld2Qi32(i32* %A) nounwind {
 ;CHECK: vld2Qi32:
-;CHECK: vld2.32
+;Check the alignment value.  Max for this instruction is 256 bits:
+;CHECK: vld2.32 {d16, d17, d18, d19}, [r0, :256]
 	%tmp0 = bitcast i32* %A to i8*
-	%tmp1 = call %struct.__neon_int32x4x2_t @llvm.arm.neon.vld2.v4i32(i8* %tmp0, i32 1)
+	%tmp1 = call %struct.__neon_int32x4x2_t @llvm.arm.neon.vld2.v4i32(i8* %tmp0, i32 64)
         %tmp2 = extractvalue %struct.__neon_int32x4x2_t %tmp1, 0
         %tmp3 = extractvalue %struct.__neon_int32x4x2_t %tmp1, 1
         %tmp4 = add <4 x i32> %tmp2, %tmp3
diff --git a/test/CodeGen/ARM/vld3.ll b/test/CodeGen/ARM/vld3.ll
index 92538c34f5b8..dde530f6df1f 100644
--- a/test/CodeGen/ARM/vld3.ll
+++ b/test/CodeGen/ARM/vld3.ll
@@ -13,8 +13,9 @@
 
 define <8 x i8> @vld3i8(i8* %A) nounwind {
 ;CHECK: vld3i8:
-;CHECK: vld3.8
-	%tmp1 = call %struct.__neon_int8x8x3_t @llvm.arm.neon.vld3.v8i8(i8* %A, i32 1)
+;Check the alignment value.  Max for this instruction is 64 bits:
+;CHECK: vld3.8 {d16, d17, d18}, [r0, :64]
+	%tmp1 = call %struct.__neon_int8x8x3_t @llvm.arm.neon.vld3.v8i8(i8* %A, i32 32)
         %tmp2 = extractvalue %struct.__neon_int8x8x3_t %tmp1, 0
         %tmp3 = extractvalue %struct.__neon_int8x8x3_t %tmp1, 2
         %tmp4 = add <8 x i8> %tmp2, %tmp3
@@ -32,6 +33,21 @@ define <4 x i16> @vld3i16(i16* %A) nounwind {
 	ret <4 x i16> %tmp4
 }
 
+;Check for a post-increment updating load with register increment.
+define <4 x i16> @vld3i16_update(i16** %ptr, i32 %inc) nounwind {
+;CHECK: vld3i16_update:
+;CHECK: vld3.16 {d16, d17, d18}, [r2], r1
+	%A = load i16** %ptr
+	%tmp0 = bitcast i16* %A to i8*
+	%tmp1 = call %struct.__neon_int16x4x3_t @llvm.arm.neon.vld3.v4i16(i8* %tmp0, i32 1)
+	%tmp2 = extractvalue %struct.__neon_int16x4x3_t %tmp1, 0
+	%tmp3 = extractvalue %struct.__neon_int16x4x3_t %tmp1, 2
+	%tmp4 = add <4 x i16> %tmp2, %tmp3
+	%tmp5 = getelementptr i16* %A, i32 %inc
+	store i16* %tmp5, i16** %ptr
+	ret <4 x i16> %tmp4
+}
+
 define <2 x i32> @vld3i32(i32* %A) nounwind {
 ;CHECK: vld3i32:
 ;CHECK: vld3.32
@@ -56,9 +72,10 @@ define <2 x float> @vld3f(float* %A) nounwind {
 
 define <1 x i64> @vld3i64(i64* %A) nounwind {
 ;CHECK: vld3i64:
-;CHECK: vld1.64
+;Check the alignment value.  Max for this instruction is 64 bits:
+;CHECK: vld1.64 {d16, d17, d18}, [r0, :64]
 	%tmp0 = bitcast i64* %A to i8*
-	%tmp1 = call %struct.__neon_int64x1x3_t @llvm.arm.neon.vld3.v1i64(i8* %tmp0, i32 1)
+	%tmp1 = call %struct.__neon_int64x1x3_t @llvm.arm.neon.vld3.v1i64(i8* %tmp0, i32 16)
         %tmp2 = extractvalue %struct.__neon_int64x1x3_t %tmp1, 0
         %tmp3 = extractvalue %struct.__neon_int64x1x3_t %tmp1, 2
         %tmp4 = add <1 x i64> %tmp2, %tmp3
@@ -67,9 +84,10 @@ define <1 x i64> @vld3i64(i64* %A) nounwind {
 
 define <16 x i8> @vld3Qi8(i8* %A) nounwind {
 ;CHECK: vld3Qi8:
-;CHECK: vld3.8
-;CHECK: vld3.8
-	%tmp1 = call %struct.__neon_int8x16x3_t @llvm.arm.neon.vld3.v16i8(i8* %A, i32 1)
+;Check the alignment value.  Max for this instruction is 64 bits:
+;CHECK: vld3.8 {d16, d18, d20}, [r0, :64]!
+;CHECK: vld3.8 {d17, d19, d21}, [r0, :64]
+	%tmp1 = call %struct.__neon_int8x16x3_t @llvm.arm.neon.vld3.v16i8(i8* %A, i32 32)
         %tmp2 = extractvalue %struct.__neon_int8x16x3_t %tmp1, 0
         %tmp3 = extractvalue %struct.__neon_int8x16x3_t %tmp1, 2
         %tmp4 = add <16 x i8> %tmp2, %tmp3
@@ -100,6 +118,22 @@ define <4 x i32> @vld3Qi32(i32* %A) nounwind {
 	ret <4 x i32> %tmp4
 }
 
+;Check for a post-increment updating load. 
+define <4 x i32> @vld3Qi32_update(i32** %ptr) nounwind {
+;CHECK: vld3Qi32_update:
+;CHECK: vld3.32 {d16, d18, d20}, [r1]!
+;CHECK: vld3.32 {d17, d19, d21}, [r1]!
+	%A = load i32** %ptr
+	%tmp0 = bitcast i32* %A to i8*
+	%tmp1 = call %struct.__neon_int32x4x3_t @llvm.arm.neon.vld3.v4i32(i8* %tmp0, i32 1)
+	%tmp2 = extractvalue %struct.__neon_int32x4x3_t %tmp1, 0
+	%tmp3 = extractvalue %struct.__neon_int32x4x3_t %tmp1, 2
+	%tmp4 = add <4 x i32> %tmp2, %tmp3
+	%tmp5 = getelementptr i32* %A, i32 12
+	store i32* %tmp5, i32** %ptr
+	ret <4 x i32> %tmp4
+}
+
 define <4 x float> @vld3Qf(float* %A) nounwind {
 ;CHECK: vld3Qf:
 ;CHECK: vld3.32
diff --git a/test/CodeGen/ARM/vld4.ll b/test/CodeGen/ARM/vld4.ll
index d1bf957ebadc..59a73db3187e 100644
--- a/test/CodeGen/ARM/vld4.ll
+++ b/test/CodeGen/ARM/vld4.ll
@@ -13,19 +13,35 @@
 
 define <8 x i8> @vld4i8(i8* %A) nounwind {
 ;CHECK: vld4i8:
-;CHECK: vld4.8
-	%tmp1 = call %struct.__neon_int8x8x4_t @llvm.arm.neon.vld4.v8i8(i8* %A, i32 1)
+;Check the alignment value.  Max for this instruction is 256 bits:
+;CHECK: vld4.8 {d16, d17, d18, d19}, [r0, :64]
+	%tmp1 = call %struct.__neon_int8x8x4_t @llvm.arm.neon.vld4.v8i8(i8* %A, i32 8)
         %tmp2 = extractvalue %struct.__neon_int8x8x4_t %tmp1, 0
         %tmp3 = extractvalue %struct.__neon_int8x8x4_t %tmp1, 2
         %tmp4 = add <8 x i8> %tmp2, %tmp3
 	ret <8 x i8> %tmp4
 }
 
+;Check for a post-increment updating load with register increment.
+define <8 x i8> @vld4i8_update(i8** %ptr, i32 %inc) nounwind {
+;CHECK: vld4i8_update:
+;CHECK: vld4.8 {d16, d17, d18, d19}, [r2, :128], r1
+	%A = load i8** %ptr
+	%tmp1 = call %struct.__neon_int8x8x4_t @llvm.arm.neon.vld4.v8i8(i8* %A, i32 16)
+	%tmp2 = extractvalue %struct.__neon_int8x8x4_t %tmp1, 0
+	%tmp3 = extractvalue %struct.__neon_int8x8x4_t %tmp1, 2
+	%tmp4 = add <8 x i8> %tmp2, %tmp3
+	%tmp5 = getelementptr i8* %A, i32 %inc
+	store i8* %tmp5, i8** %ptr
+	ret <8 x i8> %tmp4
+}
+
 define <4 x i16> @vld4i16(i16* %A) nounwind {
 ;CHECK: vld4i16:
-;CHECK: vld4.16
+;Check the alignment value.  Max for this instruction is 256 bits:
+;CHECK: vld4.16 {d16, d17, d18, d19}, [r0, :128]
 	%tmp0 = bitcast i16* %A to i8*
-	%tmp1 = call %struct.__neon_int16x4x4_t @llvm.arm.neon.vld4.v4i16(i8* %tmp0, i32 1)
+	%tmp1 = call %struct.__neon_int16x4x4_t @llvm.arm.neon.vld4.v4i16(i8* %tmp0, i32 16)
         %tmp2 = extractvalue %struct.__neon_int16x4x4_t %tmp1, 0
         %tmp3 = extractvalue %struct.__neon_int16x4x4_t %tmp1, 2
         %tmp4 = add <4 x i16> %tmp2, %tmp3
@@ -34,9 +50,10 @@ define <4 x i16> @vld4i16(i16* %A) nounwind {
 
 define <2 x i32> @vld4i32(i32* %A) nounwind {
 ;CHECK: vld4i32:
-;CHECK: vld4.32
+;Check the alignment value.  Max for this instruction is 256 bits:
+;CHECK: vld4.32 {d16, d17, d18, d19}, [r0, :256]
 	%tmp0 = bitcast i32* %A to i8*
-	%tmp1 = call %struct.__neon_int32x2x4_t @llvm.arm.neon.vld4.v2i32(i8* %tmp0, i32 1)
+	%tmp1 = call %struct.__neon_int32x2x4_t @llvm.arm.neon.vld4.v2i32(i8* %tmp0, i32 32)
         %tmp2 = extractvalue %struct.__neon_int32x2x4_t %tmp1, 0
         %tmp3 = extractvalue %struct.__neon_int32x2x4_t %tmp1, 2
         %tmp4 = add <2 x i32> %tmp2, %tmp3
@@ -56,9 +73,10 @@ define <2 x float> @vld4f(float* %A) nounwind {
 
 define <1 x i64> @vld4i64(i64* %A) nounwind {
 ;CHECK: vld4i64:
-;CHECK: vld1.64
+;Check the alignment value.  Max for this instruction is 256 bits:
+;CHECK: vld1.64 {d16, d17, d18, d19}, [r0, :256]
 	%tmp0 = bitcast i64* %A to i8*
-	%tmp1 = call %struct.__neon_int64x1x4_t @llvm.arm.neon.vld4.v1i64(i8* %tmp0, i32 1)
+	%tmp1 = call %struct.__neon_int64x1x4_t @llvm.arm.neon.vld4.v1i64(i8* %tmp0, i32 64)
         %tmp2 = extractvalue %struct.__neon_int64x1x4_t %tmp1, 0
         %tmp3 = extractvalue %struct.__neon_int64x1x4_t %tmp1, 2
         %tmp4 = add <1 x i64> %tmp2, %tmp3
@@ -67,9 +85,10 @@ define <1 x i64> @vld4i64(i64* %A) nounwind {
 
 define <16 x i8> @vld4Qi8(i8* %A) nounwind {
 ;CHECK: vld4Qi8:
-;CHECK: vld4.8
-;CHECK: vld4.8
-	%tmp1 = call %struct.__neon_int8x16x4_t @llvm.arm.neon.vld4.v16i8(i8* %A, i32 1)
+;Check the alignment value.  Max for this instruction is 256 bits:
+;CHECK: vld4.8 {d16, d18, d20, d22}, [r0, :256]!
+;CHECK: vld4.8 {d17, d19, d21, d23}, [r0, :256]
+	%tmp1 = call %struct.__neon_int8x16x4_t @llvm.arm.neon.vld4.v16i8(i8* %A, i32 64)
         %tmp2 = extractvalue %struct.__neon_int8x16x4_t %tmp1, 0
         %tmp3 = extractvalue %struct.__neon_int8x16x4_t %tmp1, 2
         %tmp4 = add <16 x i8> %tmp2, %tmp3
@@ -78,8 +97,9 @@ define <16 x i8> @vld4Qi8(i8* %A) nounwind {
 
 define <8 x i16> @vld4Qi16(i16* %A) nounwind {
 ;CHECK: vld4Qi16:
-;CHECK: vld4.16
-;CHECK: vld4.16
+;Check for no alignment specifier.
+;CHECK: vld4.16 {d16, d18, d20, d22}, [r0]!
+;CHECK: vld4.16 {d17, d19, d21, d23}, [r0]
 	%tmp0 = bitcast i16* %A to i8*
 	%tmp1 = call %struct.__neon_int16x8x4_t @llvm.arm.neon.vld4.v8i16(i8* %tmp0, i32 1)
         %tmp2 = extractvalue %struct.__neon_int16x8x4_t %tmp1, 0
@@ -88,6 +108,22 @@ define <8 x i16> @vld4Qi16(i16* %A) nounwind {
 	ret <8 x i16> %tmp4
 }
 
+;Check for a post-increment updating load. 
+define <8 x i16> @vld4Qi16_update(i16** %ptr) nounwind {
+;CHECK: vld4Qi16_update:
+;CHECK: vld4.16 {d16, d18, d20, d22}, [r1, :64]!
+;CHECK: vld4.16 {d17, d19, d21, d23}, [r1, :64]!
+	%A = load i16** %ptr
+	%tmp0 = bitcast i16* %A to i8*
+	%tmp1 = call %struct.__neon_int16x8x4_t @llvm.arm.neon.vld4.v8i16(i8* %tmp0, i32 8)
+	%tmp2 = extractvalue %struct.__neon_int16x8x4_t %tmp1, 0
+	%tmp3 = extractvalue %struct.__neon_int16x8x4_t %tmp1, 2
+	%tmp4 = add <8 x i16> %tmp2, %tmp3
+	%tmp5 = getelementptr i16* %A, i32 32
+	store i16* %tmp5, i16** %ptr
+	ret <8 x i16> %tmp4
+}
+
 define <4 x i32> @vld4Qi32(i32* %A) nounwind {
 ;CHECK: vld4Qi32:
 ;CHECK: vld4.32
diff --git a/test/CodeGen/ARM/vlddup.ll b/test/CodeGen/ARM/vlddup.ll
new file mode 100644
index 000000000000..d0e9ac3ad3c4
--- /dev/null
+++ b/test/CodeGen/ARM/vlddup.ll
@@ -0,0 +1,212 @@
+; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s
+
+define <8 x i8> @vld1dupi8(i8* %A) nounwind {
+;CHECK: vld1dupi8:
+;Check the (default) alignment value.
+;CHECK: vld1.8 {d16[]}, [r0]
+	%tmp1 = load i8* %A, align 8
+	%tmp2 = insertelement <8 x i8> undef, i8 %tmp1, i32 0
+	%tmp3 = shufflevector <8 x i8> %tmp2, <8 x i8> undef, <8 x i32> zeroinitializer
+        ret <8 x i8> %tmp3
+}
+
+define <4 x i16> @vld1dupi16(i16* %A) nounwind {
+;CHECK: vld1dupi16:
+;Check the alignment value.  Max for this instruction is 16 bits:
+;CHECK: vld1.16 {d16[]}, [r0, :16]
+	%tmp1 = load i16* %A, align 8
+	%tmp2 = insertelement <4 x i16> undef, i16 %tmp1, i32 0
+	%tmp3 = shufflevector <4 x i16> %tmp2, <4 x i16> undef, <4 x i32> zeroinitializer
+        ret <4 x i16> %tmp3
+}
+
+define <2 x i32> @vld1dupi32(i32* %A) nounwind {
+;CHECK: vld1dupi32:
+;Check the alignment value.  Max for this instruction is 32 bits:
+;CHECK: vld1.32 {d16[]}, [r0, :32]
+	%tmp1 = load i32* %A, align 8
+	%tmp2 = insertelement <2 x i32> undef, i32 %tmp1, i32 0
+	%tmp3 = shufflevector <2 x i32> %tmp2, <2 x i32> undef, <2 x i32> zeroinitializer
+        ret <2 x i32> %tmp3
+}
+
+define <2 x float> @vld1dupf(float* %A) nounwind {
+;CHECK: vld1dupf:
+;CHECK: vld1.32 {d16[]}, [r0]
+	%tmp0 = load float* %A
+        %tmp1 = insertelement <2 x float> undef, float %tmp0, i32 0
+        %tmp2 = shufflevector <2 x float> %tmp1, <2 x float> undef, <2 x i32> zeroinitializer
+        ret <2 x float> %tmp2
+}
+
+define <16 x i8> @vld1dupQi8(i8* %A) nounwind {
+;CHECK: vld1dupQi8:
+;Check the (default) alignment value.
+;CHECK: vld1.8 {d16[], d17[]}, [r0]
+	%tmp1 = load i8* %A, align 8
+	%tmp2 = insertelement <16 x i8> undef, i8 %tmp1, i32 0
+	%tmp3 = shufflevector <16 x i8> %tmp2, <16 x i8> undef, <16 x i32> zeroinitializer
+        ret <16 x i8> %tmp3
+}
+
+define <4 x float> @vld1dupQf(float* %A) nounwind {
+;CHECK: vld1dupQf:
+;CHECK: vld1.32 {d16[], d17[]}, [r0]
+        %tmp0 = load float* %A
+        %tmp1 = insertelement <4 x float> undef, float %tmp0, i32 0
+        %tmp2 = shufflevector <4 x float> %tmp1, <4 x float> undef, <4 x i32> zeroinitializer
+        ret <4 x float> %tmp2
+}
+
+%struct.__neon_int8x8x2_t = type { <8 x i8>, <8 x i8> }
+%struct.__neon_int4x16x2_t = type { <4 x i16>, <4 x i16> }
+%struct.__neon_int2x32x2_t = type { <2 x i32>, <2 x i32> }
+
+define <8 x i8> @vld2dupi8(i8* %A) nounwind {
+;CHECK: vld2dupi8:
+;Check the (default) alignment value.
+;CHECK: vld2.8 {d16[], d17[]}, [r0]
+	%tmp0 = tail call %struct.__neon_int8x8x2_t @llvm.arm.neon.vld2lane.v8i8(i8* %A, <8 x i8> undef, <8 x i8> undef, i32 0, i32 1)
+	%tmp1 = extractvalue %struct.__neon_int8x8x2_t %tmp0, 0
+	%tmp2 = shufflevector <8 x i8> %tmp1, <8 x i8> undef, <8 x i32> zeroinitializer
+	%tmp3 = extractvalue %struct.__neon_int8x8x2_t %tmp0, 1
+	%tmp4 = shufflevector <8 x i8> %tmp3, <8 x i8> undef, <8 x i32> zeroinitializer
+        %tmp5 = add <8 x i8> %tmp2, %tmp4
+        ret <8 x i8> %tmp5
+}
+
+define <4 x i16> @vld2dupi16(i16* %A) nounwind {
+;CHECK: vld2dupi16:
+;Check that a power-of-two alignment smaller than the total size of the memory
+;being loaded is ignored.
+;CHECK: vld2.16 {d16[], d17[]}, [r0]
+	%tmp0 = tail call %struct.__neon_int4x16x2_t @llvm.arm.neon.vld2lane.v4i16(i16* %A, <4 x i16> undef, <4 x i16> undef, i32 0, i32 2)
+	%tmp1 = extractvalue %struct.__neon_int4x16x2_t %tmp0, 0
+	%tmp2 = shufflevector <4 x i16> %tmp1, <4 x i16> undef, <4 x i32> zeroinitializer
+	%tmp3 = extractvalue %struct.__neon_int4x16x2_t %tmp0, 1
+	%tmp4 = shufflevector <4 x i16> %tmp3, <4 x i16> undef, <4 x i32> zeroinitializer
+        %tmp5 = add <4 x i16> %tmp2, %tmp4
+        ret <4 x i16> %tmp5
+}
+
+;Check for a post-increment updating load. 
+define <4 x i16> @vld2dupi16_update(i16** %ptr) nounwind {
+;CHECK: vld2dupi16_update:
+;CHECK: vld2.16 {d16[], d17[]}, [r1]!
+	%A = load i16** %ptr
+	%tmp0 = tail call %struct.__neon_int4x16x2_t @llvm.arm.neon.vld2lane.v4i16(i16* %A, <4 x i16> undef, <4 x i16> undef, i32 0, i32 2)
+	%tmp1 = extractvalue %struct.__neon_int4x16x2_t %tmp0, 0
+	%tmp2 = shufflevector <4 x i16> %tmp1, <4 x i16> undef, <4 x i32> zeroinitializer
+	%tmp3 = extractvalue %struct.__neon_int4x16x2_t %tmp0, 1
+	%tmp4 = shufflevector <4 x i16> %tmp3, <4 x i16> undef, <4 x i32> zeroinitializer
+	%tmp5 = add <4 x i16> %tmp2, %tmp4
+	%tmp6 = getelementptr i16* %A, i32 2
+	store i16* %tmp6, i16** %ptr
+	ret <4 x i16> %tmp5
+}
+
+define <2 x i32> @vld2dupi32(i32* %A) nounwind {
+;CHECK: vld2dupi32:
+;Check the alignment value.  Max for this instruction is 64 bits:
+;CHECK: vld2.32 {d16[], d17[]}, [r0, :64]
+	%tmp0 = tail call %struct.__neon_int2x32x2_t @llvm.arm.neon.vld2lane.v2i32(i32* %A, <2 x i32> undef, <2 x i32> undef, i32 0, i32 16)
+	%tmp1 = extractvalue %struct.__neon_int2x32x2_t %tmp0, 0
+	%tmp2 = shufflevector <2 x i32> %tmp1, <2 x i32> undef, <2 x i32> zeroinitializer
+	%tmp3 = extractvalue %struct.__neon_int2x32x2_t %tmp0, 1
+	%tmp4 = shufflevector <2 x i32> %tmp3, <2 x i32> undef, <2 x i32> zeroinitializer
+        %tmp5 = add <2 x i32> %tmp2, %tmp4
+        ret <2 x i32> %tmp5
+}
+
+declare %struct.__neon_int8x8x2_t @llvm.arm.neon.vld2lane.v8i8(i8*, <8 x i8>, <8 x i8>, i32, i32) nounwind readonly
+declare %struct.__neon_int4x16x2_t @llvm.arm.neon.vld2lane.v4i16(i16*, <4 x i16>, <4 x i16>, i32, i32) nounwind readonly
+declare %struct.__neon_int2x32x2_t @llvm.arm.neon.vld2lane.v2i32(i32*, <2 x i32>, <2 x i32>, i32, i32) nounwind readonly
+
+%struct.__neon_int8x8x3_t = type { <8 x i8>, <8 x i8>, <8 x i8> }
+%struct.__neon_int16x4x3_t = type { <4 x i16>, <4 x i16>, <4 x i16> }
+
+;Check for a post-increment updating load with register increment.
+define <8 x i8> @vld3dupi8_update(i8** %ptr, i32 %inc) nounwind {
+;CHECK: vld3dupi8_update:
+;CHECK: vld3.8 {d16[], d17[], d18[]}, [r2], r1
+	%A = load i8** %ptr
+	%tmp0 = tail call %struct.__neon_int8x8x3_t @llvm.arm.neon.vld3lane.v8i8(i8* %A, <8 x i8> undef, <8 x i8> undef, <8 x i8> undef, i32 0, i32 8)
+	%tmp1 = extractvalue %struct.__neon_int8x8x3_t %tmp0, 0
+	%tmp2 = shufflevector <8 x i8> %tmp1, <8 x i8> undef, <8 x i32> zeroinitializer
+	%tmp3 = extractvalue %struct.__neon_int8x8x3_t %tmp0, 1
+	%tmp4 = shufflevector <8 x i8> %tmp3, <8 x i8> undef, <8 x i32> zeroinitializer
+	%tmp5 = extractvalue %struct.__neon_int8x8x3_t %tmp0, 2
+	%tmp6 = shufflevector <8 x i8> %tmp5, <8 x i8> undef, <8 x i32> zeroinitializer
+	%tmp7 = add <8 x i8> %tmp2, %tmp4
+	%tmp8 = add <8 x i8> %tmp7, %tmp6
+	%tmp9 = getelementptr i8* %A, i32 %inc
+	store i8* %tmp9, i8** %ptr
+	ret <8 x i8> %tmp8
+}
+
+define <4 x i16> @vld3dupi16(i16* %A) nounwind {
+;CHECK: vld3dupi16:
+;Check the (default) alignment value. VLD3 does not support alignment.
+;CHECK: vld3.16 {d16[], d17[], d18[]}, [r0]
+	%tmp0 = tail call %struct.__neon_int16x4x3_t @llvm.arm.neon.vld3lane.v4i16(i16* %A, <4 x i16> undef, <4 x i16> undef, <4 x i16> undef, i32 0, i32 8)
+	%tmp1 = extractvalue %struct.__neon_int16x4x3_t %tmp0, 0
+	%tmp2 = shufflevector <4 x i16> %tmp1, <4 x i16> undef, <4 x i32> zeroinitializer
+	%tmp3 = extractvalue %struct.__neon_int16x4x3_t %tmp0, 1
+	%tmp4 = shufflevector <4 x i16> %tmp3, <4 x i16> undef, <4 x i32> zeroinitializer
+	%tmp5 = extractvalue %struct.__neon_int16x4x3_t %tmp0, 2
+	%tmp6 = shufflevector <4 x i16> %tmp5, <4 x i16> undef, <4 x i32> zeroinitializer
+        %tmp7 = add <4 x i16> %tmp2, %tmp4
+        %tmp8 = add <4 x i16> %tmp7, %tmp6
+        ret <4 x i16> %tmp8
+}
+
+declare %struct.__neon_int8x8x3_t @llvm.arm.neon.vld3lane.v8i8(i8*, <8 x i8>, <8 x i8>, <8 x i8>, i32, i32) nounwind readonly
+declare %struct.__neon_int16x4x3_t @llvm.arm.neon.vld3lane.v4i16(i16*, <4 x i16>, <4 x i16>, <4 x i16>, i32, i32) nounwind readonly
+
+%struct.__neon_int16x4x4_t = type { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> }
+%struct.__neon_int32x2x4_t = type { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> }
+
+;Check for a post-increment updating load.
+define <4 x i16> @vld4dupi16_update(i16** %ptr) nounwind {
+;CHECK: vld4dupi16_update:
+;CHECK: vld4.16 {d16[], d17[], d18[], d19[]}, [r1]!
+	%A = load i16** %ptr
+	%tmp0 = tail call %struct.__neon_int16x4x4_t @llvm.arm.neon.vld4lane.v4i16(i16* %A, <4 x i16> undef, <4 x i16> undef, <4 x i16> undef, <4 x i16> undef, i32 0, i32 1)
+	%tmp1 = extractvalue %struct.__neon_int16x4x4_t %tmp0, 0
+	%tmp2 = shufflevector <4 x i16> %tmp1, <4 x i16> undef, <4 x i32> zeroinitializer
+	%tmp3 = extractvalue %struct.__neon_int16x4x4_t %tmp0, 1
+	%tmp4 = shufflevector <4 x i16> %tmp3, <4 x i16> undef, <4 x i32> zeroinitializer
+	%tmp5 = extractvalue %struct.__neon_int16x4x4_t %tmp0, 2
+	%tmp6 = shufflevector <4 x i16> %tmp5, <4 x i16> undef, <4 x i32> zeroinitializer
+	%tmp7 = extractvalue %struct.__neon_int16x4x4_t %tmp0, 3
+	%tmp8 = shufflevector <4 x i16> %tmp7, <4 x i16> undef, <4 x i32> zeroinitializer
+	%tmp9 = add <4 x i16> %tmp2, %tmp4
+	%tmp10 = add <4 x i16> %tmp6, %tmp8
+	%tmp11 = add <4 x i16> %tmp9, %tmp10
+	%tmp12 = getelementptr i16* %A, i32 4
+	store i16* %tmp12, i16** %ptr
+	ret <4 x i16> %tmp11
+}
+
+define <2 x i32> @vld4dupi32(i32* %A) nounwind {
+;CHECK: vld4dupi32:
+;Check the alignment value.  An 8-byte alignment is allowed here even though
+;it is smaller than the total size of the memory being loaded.
+;CHECK: vld4.32 {d16[], d17[], d18[], d19[]}, [r0, :64]
+	%tmp0 = tail call %struct.__neon_int32x2x4_t @llvm.arm.neon.vld4lane.v2i32(i32* %A, <2 x i32> undef, <2 x i32> undef, <2 x i32> undef, <2 x i32> undef, i32 0, i32 8)
+	%tmp1 = extractvalue %struct.__neon_int32x2x4_t %tmp0, 0
+	%tmp2 = shufflevector <2 x i32> %tmp1, <2 x i32> undef, <2 x i32> zeroinitializer
+	%tmp3 = extractvalue %struct.__neon_int32x2x4_t %tmp0, 1
+	%tmp4 = shufflevector <2 x i32> %tmp3, <2 x i32> undef, <2 x i32> zeroinitializer
+	%tmp5 = extractvalue %struct.__neon_int32x2x4_t %tmp0, 2
+	%tmp6 = shufflevector <2 x i32> %tmp5, <2 x i32> undef, <2 x i32> zeroinitializer
+	%tmp7 = extractvalue %struct.__neon_int32x2x4_t %tmp0, 3
+	%tmp8 = shufflevector <2 x i32> %tmp7, <2 x i32> undef, <2 x i32> zeroinitializer
+        %tmp9 = add <2 x i32> %tmp2, %tmp4
+        %tmp10 = add <2 x i32> %tmp6, %tmp8
+        %tmp11 = add <2 x i32> %tmp9, %tmp10
+        ret <2 x i32> %tmp11
+}
+
+declare %struct.__neon_int16x4x4_t @llvm.arm.neon.vld4lane.v4i16(i16*, <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16>, i32, i32) nounwind readonly
+declare %struct.__neon_int32x2x4_t @llvm.arm.neon.vld4lane.v2i32(i32*, <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, i32, i32) nounwind readonly
diff --git a/test/CodeGen/ARM/vldlane.ll b/test/CodeGen/ARM/vldlane.ll
index 31ee64fa598f..770ed071ac12 100644
--- a/test/CodeGen/ARM/vldlane.ll
+++ b/test/CodeGen/ARM/vldlane.ll
@@ -1,5 +1,80 @@
 ; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s
 
+define <8 x i8> @vld1lanei8(i8* %A, <8 x i8>* %B) nounwind {
+;CHECK: vld1lanei8:
+;Check the (default) alignment value.
+;CHECK: vld1.8 {d16[3]}, [r0]
+	%tmp1 = load <8 x i8>* %B
+	%tmp2 = load i8* %A, align 8
+	%tmp3 = insertelement <8 x i8> %tmp1, i8 %tmp2, i32 3
+        ret <8 x i8> %tmp3
+}
+
+define <4 x i16> @vld1lanei16(i16* %A, <4 x i16>* %B) nounwind {
+;CHECK: vld1lanei16:
+;Check the alignment value.  Max for this instruction is 16 bits:
+;CHECK: vld1.16 {d16[2]}, [r0, :16]
+	%tmp1 = load <4 x i16>* %B
+	%tmp2 = load i16* %A, align 8
+	%tmp3 = insertelement <4 x i16> %tmp1, i16 %tmp2, i32 2
+        ret <4 x i16> %tmp3
+}
+
+define <2 x i32> @vld1lanei32(i32* %A, <2 x i32>* %B) nounwind {
+;CHECK: vld1lanei32:
+;Check the alignment value.  Max for this instruction is 32 bits:
+;CHECK: vld1.32 {d16[1]}, [r0, :32]
+	%tmp1 = load <2 x i32>* %B
+	%tmp2 = load i32* %A, align 8
+	%tmp3 = insertelement <2 x i32> %tmp1, i32 %tmp2, i32 1
+        ret <2 x i32> %tmp3
+}
+
+define <2 x float> @vld1lanef(float* %A, <2 x float>* %B) nounwind {
+;CHECK: vld1lanef:
+;CHECK: vld1.32 {d16[1]}, [r0]
+	%tmp1 = load <2 x float>* %B
+	%tmp2 = load float* %A, align 4
+	%tmp3 = insertelement <2 x float> %tmp1, float %tmp2, i32 1
+	ret <2 x float> %tmp3
+}
+
+define <16 x i8> @vld1laneQi8(i8* %A, <16 x i8>* %B) nounwind {
+;CHECK: vld1laneQi8:
+;CHECK: vld1.8 {d17[1]}, [r0]
+	%tmp1 = load <16 x i8>* %B
+	%tmp2 = load i8* %A, align 8
+	%tmp3 = insertelement <16 x i8> %tmp1, i8 %tmp2, i32 9
+	ret <16 x i8> %tmp3
+}
+
+define <8 x i16> @vld1laneQi16(i16* %A, <8 x i16>* %B) nounwind {
+;CHECK: vld1laneQi16:
+;CHECK: vld1.16 {d17[1]}, [r0, :16]
+	%tmp1 = load <8 x i16>* %B
+	%tmp2 = load i16* %A, align 8
+	%tmp3 = insertelement <8 x i16> %tmp1, i16 %tmp2, i32 5
+	ret <8 x i16> %tmp3
+}
+
+define <4 x i32> @vld1laneQi32(i32* %A, <4 x i32>* %B) nounwind {
+;CHECK: vld1laneQi32:
+;CHECK: vld1.32 {d17[1]}, [r0, :32]
+	%tmp1 = load <4 x i32>* %B
+	%tmp2 = load i32* %A, align 8
+	%tmp3 = insertelement <4 x i32> %tmp1, i32 %tmp2, i32 3
+	ret <4 x i32> %tmp3
+}
+
+define <4 x float> @vld1laneQf(float* %A, <4 x float>* %B) nounwind {
+;CHECK: vld1laneQf:
+;CHECK: vld1.32 {d16[0]}, [r0]
+	%tmp1 = load <4 x float>* %B
+	%tmp2 = load float* %A
+	%tmp3 = insertelement <4 x float> %tmp1, float %tmp2, i32 0
+	ret <4 x float> %tmp3
+}
+
 %struct.__neon_int8x8x2_t = type { <8 x i8>,  <8 x i8> }
 %struct.__neon_int16x4x2_t = type { <4 x i16>, <4 x i16> }
 %struct.__neon_int32x2x2_t = type { <2 x i32>, <2 x i32> }
@@ -11,9 +86,10 @@
 
 define <8 x i8> @vld2lanei8(i8* %A, <8 x i8>* %B) nounwind {
 ;CHECK: vld2lanei8:
-;CHECK: vld2.8
+;Check the alignment value.  Max for this instruction is 16 bits:
+;CHECK: vld2.8 {d16[1], d17[1]}, [r0, :16]
 	%tmp1 = load <8 x i8>* %B
-	%tmp2 = call %struct.__neon_int8x8x2_t @llvm.arm.neon.vld2lane.v8i8(i8* %A, <8 x i8> %tmp1, <8 x i8> %tmp1, i32 1, i32 1)
+	%tmp2 = call %struct.__neon_int8x8x2_t @llvm.arm.neon.vld2lane.v8i8(i8* %A, <8 x i8> %tmp1, <8 x i8> %tmp1, i32 1, i32 4)
         %tmp3 = extractvalue %struct.__neon_int8x8x2_t %tmp2, 0
         %tmp4 = extractvalue %struct.__neon_int8x8x2_t %tmp2, 1
         %tmp5 = add <8 x i8> %tmp3, %tmp4
@@ -22,10 +98,11 @@ define <8 x i8> @vld2lanei8(i8* %A, <8 x i8>* %B) nounwind {
 
 define <4 x i16> @vld2lanei16(i16* %A, <4 x i16>* %B) nounwind {
 ;CHECK: vld2lanei16:
-;CHECK: vld2.16
+;Check the alignment value.  Max for this instruction is 32 bits:
+;CHECK: vld2.16 {d16[1], d17[1]}, [r0, :32]
 	%tmp0 = bitcast i16* %A to i8*
 	%tmp1 = load <4 x i16>* %B
-	%tmp2 = call %struct.__neon_int16x4x2_t @llvm.arm.neon.vld2lane.v4i16(i8* %tmp0, <4 x i16> %tmp1, <4 x i16> %tmp1, i32 1, i32 1)
+	%tmp2 = call %struct.__neon_int16x4x2_t @llvm.arm.neon.vld2lane.v4i16(i8* %tmp0, <4 x i16> %tmp1, <4 x i16> %tmp1, i32 1, i32 8)
         %tmp3 = extractvalue %struct.__neon_int16x4x2_t %tmp2, 0
         %tmp4 = extractvalue %struct.__neon_int16x4x2_t %tmp2, 1
         %tmp5 = add <4 x i16> %tmp3, %tmp4
@@ -44,6 +121,22 @@ define <2 x i32> @vld2lanei32(i32* %A, <2 x i32>* %B) nounwind {
 	ret <2 x i32> %tmp5
 }
 
+;Check for a post-increment updating load.
+define <2 x i32> @vld2lanei32_update(i32** %ptr, <2 x i32>* %B) nounwind {
+;CHECK: vld2lanei32_update:
+;CHECK: vld2.32 {d16[1], d17[1]}, [r1]!
+	%A = load i32** %ptr
+	%tmp0 = bitcast i32* %A to i8*
+	%tmp1 = load <2 x i32>* %B
+	%tmp2 = call %struct.__neon_int32x2x2_t @llvm.arm.neon.vld2lane.v2i32(i8* %tmp0, <2 x i32> %tmp1, <2 x i32> %tmp1, i32 1, i32 1)
+	%tmp3 = extractvalue %struct.__neon_int32x2x2_t %tmp2, 0
+	%tmp4 = extractvalue %struct.__neon_int32x2x2_t %tmp2, 1
+	%tmp5 = add <2 x i32> %tmp3, %tmp4
+	%tmp6 = getelementptr i32* %A, i32 2
+	store i32* %tmp6, i32** %ptr
+	ret <2 x i32> %tmp5
+}
+
 define <2 x float> @vld2lanef(float* %A, <2 x float>* %B) nounwind {
 ;CHECK: vld2lanef:
 ;CHECK: vld2.32
@@ -58,10 +151,11 @@ define <2 x float> @vld2lanef(float* %A, <2 x float>* %B) nounwind {
 
 define <8 x i16> @vld2laneQi16(i16* %A, <8 x i16>* %B) nounwind {
 ;CHECK: vld2laneQi16:
-;CHECK: vld2.16
+;Check the (default) alignment.
+;CHECK: vld2.16 {d17[1], d19[1]}, [r0]
 	%tmp0 = bitcast i16* %A to i8*
 	%tmp1 = load <8 x i16>* %B
-	%tmp2 = call %struct.__neon_int16x8x2_t @llvm.arm.neon.vld2lane.v8i16(i8* %tmp0, <8 x i16> %tmp1, <8 x i16> %tmp1, i32 1, i32 1)
+	%tmp2 = call %struct.__neon_int16x8x2_t @llvm.arm.neon.vld2lane.v8i16(i8* %tmp0, <8 x i16> %tmp1, <8 x i16> %tmp1, i32 5, i32 1)
         %tmp3 = extractvalue %struct.__neon_int16x8x2_t %tmp2, 0
         %tmp4 = extractvalue %struct.__neon_int16x8x2_t %tmp2, 1
         %tmp5 = add <8 x i16> %tmp3, %tmp4
@@ -70,10 +164,11 @@ define <8 x i16> @vld2laneQi16(i16* %A, <8 x i16>* %B) nounwind {
 
 define <4 x i32> @vld2laneQi32(i32* %A, <4 x i32>* %B) nounwind {
 ;CHECK: vld2laneQi32:
-;CHECK: vld2.32
+;Check the alignment value.  Max for this instruction is 64 bits:
+;CHECK: vld2.32 {d17[0], d19[0]}, [r0, :64]
 	%tmp0 = bitcast i32* %A to i8*
 	%tmp1 = load <4 x i32>* %B
-	%tmp2 = call %struct.__neon_int32x4x2_t @llvm.arm.neon.vld2lane.v4i32(i8* %tmp0, <4 x i32> %tmp1, <4 x i32> %tmp1, i32 2, i32 1)
+	%tmp2 = call %struct.__neon_int32x4x2_t @llvm.arm.neon.vld2lane.v4i32(i8* %tmp0, <4 x i32> %tmp1, <4 x i32> %tmp1, i32 2, i32 16)
         %tmp3 = extractvalue %struct.__neon_int32x4x2_t %tmp2, 0
         %tmp4 = extractvalue %struct.__neon_int32x4x2_t %tmp2, 1
         %tmp5 = add <4 x i32> %tmp3, %tmp4
@@ -125,10 +220,11 @@ define <8 x i8> @vld3lanei8(i8* %A, <8 x i8>* %B) nounwind {
 
 define <4 x i16> @vld3lanei16(i16* %A, <4 x i16>* %B) nounwind {
 ;CHECK: vld3lanei16:
-;CHECK: vld3.16
+;Check the (default) alignment value.  VLD3 does not support alignment.
+;CHECK: vld3.16 {d16[1], d17[1], d18[1]}, [r0]
 	%tmp0 = bitcast i16* %A to i8*
 	%tmp1 = load <4 x i16>* %B
-	%tmp2 = call %struct.__neon_int16x4x3_t @llvm.arm.neon.vld3lane.v4i16(i8* %tmp0, <4 x i16> %tmp1, <4 x i16> %tmp1, <4 x i16> %tmp1, i32 1, i32 1)
+	%tmp2 = call %struct.__neon_int16x4x3_t @llvm.arm.neon.vld3lane.v4i16(i8* %tmp0, <4 x i16> %tmp1, <4 x i16> %tmp1, <4 x i16> %tmp1, i32 1, i32 8)
         %tmp3 = extractvalue %struct.__neon_int16x4x3_t %tmp2, 0
         %tmp4 = extractvalue %struct.__neon_int16x4x3_t %tmp2, 1
         %tmp5 = extractvalue %struct.__neon_int16x4x3_t %tmp2, 2
@@ -167,10 +263,11 @@ define <2 x float> @vld3lanef(float* %A, <2 x float>* %B) nounwind {
 
 define <8 x i16> @vld3laneQi16(i16* %A, <8 x i16>* %B) nounwind {
 ;CHECK: vld3laneQi16:
-;CHECK: vld3.16
+;Check the (default) alignment value.  VLD3 does not support alignment.
+;CHECK: vld3.16 {d16[1], d18[1], d20[1]}, [r0]
 	%tmp0 = bitcast i16* %A to i8*
 	%tmp1 = load <8 x i16>* %B
-	%tmp2 = call %struct.__neon_int16x8x3_t @llvm.arm.neon.vld3lane.v8i16(i8* %tmp0, <8 x i16> %tmp1, <8 x i16> %tmp1, <8 x i16> %tmp1, i32 1, i32 1)
+	%tmp2 = call %struct.__neon_int16x8x3_t @llvm.arm.neon.vld3lane.v8i16(i8* %tmp0, <8 x i16> %tmp1, <8 x i16> %tmp1, <8 x i16> %tmp1, i32 1, i32 8)
         %tmp3 = extractvalue %struct.__neon_int16x8x3_t %tmp2, 0
         %tmp4 = extractvalue %struct.__neon_int16x8x3_t %tmp2, 1
         %tmp5 = extractvalue %struct.__neon_int16x8x3_t %tmp2, 2
@@ -179,6 +276,24 @@ define <8 x i16> @vld3laneQi16(i16* %A, <8 x i16>* %B) nounwind {
 	ret <8 x i16> %tmp7
 }
 
+;Check for a post-increment updating load with register increment.
+define <8 x i16> @vld3laneQi16_update(i16** %ptr, <8 x i16>* %B, i32 %inc) nounwind {
+;CHECK: vld3laneQi16_update:
+;CHECK: vld3.16 {d16[1], d18[1], d20[1]}, [r2], r1
+	%A = load i16** %ptr
+	%tmp0 = bitcast i16* %A to i8*
+	%tmp1 = load <8 x i16>* %B
+	%tmp2 = call %struct.__neon_int16x8x3_t @llvm.arm.neon.vld3lane.v8i16(i8* %tmp0, <8 x i16> %tmp1, <8 x i16> %tmp1, <8 x i16> %tmp1, i32 1, i32 8)
+	%tmp3 = extractvalue %struct.__neon_int16x8x3_t %tmp2, 0
+	%tmp4 = extractvalue %struct.__neon_int16x8x3_t %tmp2, 1
+	%tmp5 = extractvalue %struct.__neon_int16x8x3_t %tmp2, 2
+	%tmp6 = add <8 x i16> %tmp3, %tmp4
+	%tmp7 = add <8 x i16> %tmp5, %tmp6
+	%tmp8 = getelementptr i16* %A, i32 %inc
+	store i16* %tmp8, i16** %ptr
+	ret <8 x i16> %tmp7
+}
+
 define <4 x i32> @vld3laneQi32(i32* %A, <4 x i32>* %B) nounwind {
 ;CHECK: vld3laneQi32:
 ;CHECK: vld3.32
@@ -227,9 +342,10 @@ declare %struct.__neon_float32x4x3_t @llvm.arm.neon.vld3lane.v4f32(i8*, <4 x flo
 
 define <8 x i8> @vld4lanei8(i8* %A, <8 x i8>* %B) nounwind {
 ;CHECK: vld4lanei8:
-;CHECK: vld4.8
+;Check the alignment value.  Max for this instruction is 32 bits:
+;CHECK: vld4.8 {d16[1], d17[1], d18[1], d19[1]}, [r0, :32]
 	%tmp1 = load <8 x i8>* %B
-	%tmp2 = call %struct.__neon_int8x8x4_t @llvm.arm.neon.vld4lane.v8i8(i8* %A, <8 x i8> %tmp1, <8 x i8> %tmp1, <8 x i8> %tmp1, <8 x i8> %tmp1, i32 1, i32 1)
+	%tmp2 = call %struct.__neon_int8x8x4_t @llvm.arm.neon.vld4lane.v8i8(i8* %A, <8 x i8> %tmp1, <8 x i8> %tmp1, <8 x i8> %tmp1, <8 x i8> %tmp1, i32 1, i32 8)
         %tmp3 = extractvalue %struct.__neon_int8x8x4_t %tmp2, 0
         %tmp4 = extractvalue %struct.__neon_int8x8x4_t %tmp2, 1
         %tmp5 = extractvalue %struct.__neon_int8x8x4_t %tmp2, 2
@@ -240,12 +356,33 @@ define <8 x i8> @vld4lanei8(i8* %A, <8 x i8>* %B) nounwind {
 	ret <8 x i8> %tmp9
 }
 
+;Check for a post-increment updating load.
+define <8 x i8> @vld4lanei8_update(i8** %ptr, <8 x i8>* %B) nounwind {
+;CHECK: vld4lanei8_update:
+;CHECK: vld4.8 {d16[1], d17[1], d18[1], d19[1]}, [r1, :32]!
+	%A = load i8** %ptr
+	%tmp1 = load <8 x i8>* %B
+	%tmp2 = call %struct.__neon_int8x8x4_t @llvm.arm.neon.vld4lane.v8i8(i8* %A, <8 x i8> %tmp1, <8 x i8> %tmp1, <8 x i8> %tmp1, <8 x i8> %tmp1, i32 1, i32 8)
+	%tmp3 = extractvalue %struct.__neon_int8x8x4_t %tmp2, 0
+	%tmp4 = extractvalue %struct.__neon_int8x8x4_t %tmp2, 1
+	%tmp5 = extractvalue %struct.__neon_int8x8x4_t %tmp2, 2
+	%tmp6 = extractvalue %struct.__neon_int8x8x4_t %tmp2, 3
+	%tmp7 = add <8 x i8> %tmp3, %tmp4
+	%tmp8 = add <8 x i8> %tmp5, %tmp6
+	%tmp9 = add <8 x i8> %tmp7, %tmp8
+	%tmp10 = getelementptr i8* %A, i32 4
+	store i8* %tmp10, i8** %ptr
+	ret <8 x i8> %tmp9
+}
+
 define <4 x i16> @vld4lanei16(i16* %A, <4 x i16>* %B) nounwind {
 ;CHECK: vld4lanei16:
-;CHECK: vld4.16
+;Check that a power-of-two alignment smaller than the total size of the memory
+;being loaded is ignored.
+;CHECK: vld4.16 {d16[1], d17[1], d18[1], d19[1]}, [r0]
 	%tmp0 = bitcast i16* %A to i8*
 	%tmp1 = load <4 x i16>* %B
-	%tmp2 = call %struct.__neon_int16x4x4_t @llvm.arm.neon.vld4lane.v4i16(i8* %tmp0, <4 x i16> %tmp1, <4 x i16> %tmp1, <4 x i16> %tmp1, <4 x i16> %tmp1, i32 1, i32 1)
+	%tmp2 = call %struct.__neon_int16x4x4_t @llvm.arm.neon.vld4lane.v4i16(i8* %tmp0, <4 x i16> %tmp1, <4 x i16> %tmp1, <4 x i16> %tmp1, <4 x i16> %tmp1, i32 1, i32 4)
         %tmp3 = extractvalue %struct.__neon_int16x4x4_t %tmp2, 0
         %tmp4 = extractvalue %struct.__neon_int16x4x4_t %tmp2, 1
         %tmp5 = extractvalue %struct.__neon_int16x4x4_t %tmp2, 2
@@ -258,10 +395,12 @@ define <4 x i16> @vld4lanei16(i16* %A, <4 x i16>* %B) nounwind {
 
 define <2 x i32> @vld4lanei32(i32* %A, <2 x i32>* %B) nounwind {
 ;CHECK: vld4lanei32:
-;CHECK: vld4.32
+;Check the alignment value.  An 8-byte alignment is allowed here even though
+;it is smaller than the total size of the memory being loaded.
+;CHECK: vld4.32 {d16[1], d17[1], d18[1], d19[1]}, [r0, :64]
 	%tmp0 = bitcast i32* %A to i8*
 	%tmp1 = load <2 x i32>* %B
-	%tmp2 = call %struct.__neon_int32x2x4_t @llvm.arm.neon.vld4lane.v2i32(i8* %tmp0, <2 x i32> %tmp1, <2 x i32> %tmp1, <2 x i32> %tmp1, <2 x i32> %tmp1, i32 1, i32 1)
+	%tmp2 = call %struct.__neon_int32x2x4_t @llvm.arm.neon.vld4lane.v2i32(i8* %tmp0, <2 x i32> %tmp1, <2 x i32> %tmp1, <2 x i32> %tmp1, <2 x i32> %tmp1, i32 1, i32 8)
         %tmp3 = extractvalue %struct.__neon_int32x2x4_t %tmp2, 0
         %tmp4 = extractvalue %struct.__neon_int32x2x4_t %tmp2, 1
         %tmp5 = extractvalue %struct.__neon_int32x2x4_t %tmp2, 2
@@ -290,10 +429,11 @@ define <2 x float> @vld4lanef(float* %A, <2 x float>* %B) nounwind {
 
 define <8 x i16> @vld4laneQi16(i16* %A, <8 x i16>* %B) nounwind {
 ;CHECK: vld4laneQi16:
-;CHECK: vld4.16
+;Check the alignment value.  Max for this instruction is 64 bits:
+;CHECK: vld4.16 {d16[1], d18[1], d20[1], d22[1]}, [r0, :64]
 	%tmp0 = bitcast i16* %A to i8*
 	%tmp1 = load <8 x i16>* %B
-	%tmp2 = call %struct.__neon_int16x8x4_t @llvm.arm.neon.vld4lane.v8i16(i8* %tmp0, <8 x i16> %tmp1, <8 x i16> %tmp1, <8 x i16> %tmp1, <8 x i16> %tmp1, i32 1, i32 1)
+	%tmp2 = call %struct.__neon_int16x8x4_t @llvm.arm.neon.vld4lane.v8i16(i8* %tmp0, <8 x i16> %tmp1, <8 x i16> %tmp1, <8 x i16> %tmp1, <8 x i16> %tmp1, i32 1, i32 16)
         %tmp3 = extractvalue %struct.__neon_int16x8x4_t %tmp2, 0
         %tmp4 = extractvalue %struct.__neon_int16x8x4_t %tmp2, 1
         %tmp5 = extractvalue %struct.__neon_int16x8x4_t %tmp2, 2
@@ -306,10 +446,11 @@ define <8 x i16> @vld4laneQi16(i16* %A, <8 x i16>* %B) nounwind {
 
 define <4 x i32> @vld4laneQi32(i32* %A, <4 x i32>* %B) nounwind {
 ;CHECK: vld4laneQi32:
-;CHECK: vld4.32
+;Check the (default) alignment.
+;CHECK: vld4.32 {d17[0], d19[0], d21[0], d23[0]}, [r0]
 	%tmp0 = bitcast i32* %A to i8*
 	%tmp1 = load <4 x i32>* %B
-	%tmp2 = call %struct.__neon_int32x4x4_t @llvm.arm.neon.vld4lane.v4i32(i8* %tmp0, <4 x i32> %tmp1, <4 x i32> %tmp1, <4 x i32> %tmp1, <4 x i32> %tmp1, i32 1, i32 1)
+	%tmp2 = call %struct.__neon_int32x4x4_t @llvm.arm.neon.vld4lane.v4i32(i8* %tmp0, <4 x i32> %tmp1, <4 x i32> %tmp1, <4 x i32> %tmp1, <4 x i32> %tmp1, i32 2, i32 1)
         %tmp3 = extractvalue %struct.__neon_int32x4x4_t %tmp2, 0
         %tmp4 = extractvalue %struct.__neon_int32x4x4_t %tmp2, 1
         %tmp5 = extractvalue %struct.__neon_int32x4x4_t %tmp2, 2
@@ -344,3 +485,22 @@ declare %struct.__neon_float32x2x4_t @llvm.arm.neon.vld4lane.v2f32(i8*, <2 x flo
 declare %struct.__neon_int16x8x4_t @llvm.arm.neon.vld4lane.v8i16(i8*, <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16>, i32, i32) nounwind readonly
 declare %struct.__neon_int32x4x4_t @llvm.arm.neon.vld4lane.v4i32(i8*, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, i32, i32) nounwind readonly
 declare %struct.__neon_float32x4x4_t @llvm.arm.neon.vld4lane.v4f32(i8*, <4 x float>, <4 x float>, <4 x float>, <4 x float>, i32, i32) nounwind readonly
+
+; Radar 8776599: If one of the operands to a QQQQ REG_SEQUENCE is a register
+; in the QPR_VFP2 regclass, it needs to be copied to a QPR regclass because
+; we don't currently have a QQQQ_VFP2 super-regclass.  (The "0" for the low
+; part of %ins67 is supposed to be loaded by a VLDRS instruction in this test.)
+define void @test_qqqq_regsequence_subreg([6 x i64] %b) nounwind {
+;CHECK: test_qqqq_regsequence_subreg
+;CHECK: vld3.16
+  %tmp63 = extractvalue [6 x i64] %b, 5
+  %tmp64 = zext i64 %tmp63 to i128
+  %tmp65 = shl i128 %tmp64, 64
+  %ins67 = or i128 %tmp65, 0
+  %tmp78 = bitcast i128 %ins67 to <8 x i16>
+  %vld3_lane = tail call %struct.__neon_int16x8x3_t @llvm.arm.neon.vld3lane.v8i16(i8* undef, <8 x i16> undef, <8 x i16> undef, <8 x i16> %tmp78, i32 1, i32 2)
+  call void @llvm.trap()
+  unreachable
+}
+
+declare void @llvm.trap() nounwind
diff --git a/test/CodeGen/ARM/vmov.ll b/test/CodeGen/ARM/vmov.ll
index 8cd94576b0c2..a86be32bd203 100644
--- a/test/CodeGen/ARM/vmov.ll
+++ b/test/CodeGen/ARM/vmov.ll
@@ -2,169 +2,169 @@
 
 define <8 x i8> @v_movi8() nounwind {
 ;CHECK: v_movi8:
-;CHECK: vmov.i8 d0, #0x8
+;CHECK: vmov.i8 d{{.*}}, #0x8
 	ret <8 x i8> < i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8 >
 }
 
 define <4 x i16> @v_movi16a() nounwind {
 ;CHECK: v_movi16a:
-;CHECK: vmov.i16 d0, #0x10
+;CHECK: vmov.i16 d{{.*}}, #0x10
 	ret <4 x i16> < i16 16, i16 16, i16 16, i16 16 >
 }
 
 define <4 x i16> @v_movi16b() nounwind {
 ;CHECK: v_movi16b:
-;CHECK: vmov.i16 d0, #0x1000
+;CHECK: vmov.i16 d{{.*}}, #0x1000
 	ret <4 x i16> < i16 4096, i16 4096, i16 4096, i16 4096 >
 }
 
 define <4 x i16> @v_mvni16a() nounwind {
 ;CHECK: v_mvni16a:
-;CHECK: vmvn.i16 d0, #0x10
+;CHECK: vmvn.i16 d{{.*}}, #0x10
 	ret <4 x i16> < i16 65519, i16 65519, i16 65519, i16 65519 >
 }
 
 define <4 x i16> @v_mvni16b() nounwind {
 ;CHECK: v_mvni16b:
-;CHECK: vmvn.i16 d0, #0x1000
+;CHECK: vmvn.i16 d{{.*}}, #0x1000
 	ret <4 x i16> < i16 61439, i16 61439, i16 61439, i16 61439 >
 }
 
 define <2 x i32> @v_movi32a() nounwind {
 ;CHECK: v_movi32a:
-;CHECK: vmov.i32 d0, #0x20
+;CHECK: vmov.i32 d{{.*}}, #0x20
 	ret <2 x i32> < i32 32, i32 32 >
 }
 
 define <2 x i32> @v_movi32b() nounwind {
 ;CHECK: v_movi32b:
-;CHECK: vmov.i32 d0, #0x2000
+;CHECK: vmov.i32 d{{.*}}, #0x2000
 	ret <2 x i32> < i32 8192, i32 8192 >
 }
 
 define <2 x i32> @v_movi32c() nounwind {
 ;CHECK: v_movi32c:
-;CHECK: vmov.i32 d0, #0x200000
+;CHECK: vmov.i32 d{{.*}}, #0x200000
 	ret <2 x i32> < i32 2097152, i32 2097152 >
 }
 
 define <2 x i32> @v_movi32d() nounwind {
 ;CHECK: v_movi32d:
-;CHECK: vmov.i32 d0, #0x20000000
+;CHECK: vmov.i32 d{{.*}}, #0x20000000
 	ret <2 x i32> < i32 536870912, i32 536870912 >
 }
 
 define <2 x i32> @v_movi32e() nounwind {
 ;CHECK: v_movi32e:
-;CHECK: vmov.i32 d0, #0x20FF
+;CHECK: vmov.i32 d{{.*}}, #0x20FF
 	ret <2 x i32> < i32 8447, i32 8447 >
 }
 
 define <2 x i32> @v_movi32f() nounwind {
 ;CHECK: v_movi32f:
-;CHECK: vmov.i32 d0, #0x20FFFF
+;CHECK: vmov.i32 d{{.*}}, #0x20FFFF
 	ret <2 x i32> < i32 2162687, i32 2162687 >
 }
 
 define <2 x i32> @v_mvni32a() nounwind {
 ;CHECK: v_mvni32a:
-;CHECK: vmvn.i32 d0, #0x20
+;CHECK: vmvn.i32 d{{.*}}, #0x20
 	ret <2 x i32> < i32 4294967263, i32 4294967263 >
 }
 
 define <2 x i32> @v_mvni32b() nounwind {
 ;CHECK: v_mvni32b:
-;CHECK: vmvn.i32 d0, #0x2000
+;CHECK: vmvn.i32 d{{.*}}, #0x2000
 	ret <2 x i32> < i32 4294959103, i32 4294959103 >
 }
 
 define <2 x i32> @v_mvni32c() nounwind {
 ;CHECK: v_mvni32c:
-;CHECK: vmvn.i32 d0, #0x200000
+;CHECK: vmvn.i32 d{{.*}}, #0x200000
 	ret <2 x i32> < i32 4292870143, i32 4292870143 >
 }
 
 define <2 x i32> @v_mvni32d() nounwind {
 ;CHECK: v_mvni32d:
-;CHECK: vmvn.i32 d0, #0x20000000
+;CHECK: vmvn.i32 d{{.*}}, #0x20000000
 	ret <2 x i32> < i32 3758096383, i32 3758096383 >
 }
 
 define <2 x i32> @v_mvni32e() nounwind {
 ;CHECK: v_mvni32e:
-;CHECK: vmvn.i32 d0, #0x20FF
+;CHECK: vmvn.i32 d{{.*}}, #0x20FF
 	ret <2 x i32> < i32 4294958848, i32 4294958848 >
 }
 
 define <2 x i32> @v_mvni32f() nounwind {
 ;CHECK: v_mvni32f:
-;CHECK: vmvn.i32 d0, #0x20FFFF
+;CHECK: vmvn.i32 d{{.*}}, #0x20FFFF
 	ret <2 x i32> < i32 4292804608, i32 4292804608 >
 }
 
 define <1 x i64> @v_movi64() nounwind {
 ;CHECK: v_movi64:
-;CHECK: vmov.i64 d0, #0xFF0000FF0000FFFF
+;CHECK: vmov.i64 d{{.*}}, #0xFF0000FF0000FFFF
 	ret <1 x i64> < i64 18374687574888349695 >
 }
 
 define <16 x i8> @v_movQi8() nounwind {
 ;CHECK: v_movQi8:
-;CHECK: vmov.i8 q0, #0x8
+;CHECK: vmov.i8 q{{.*}}, #0x8
 	ret <16 x i8> < i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8 >
 }
 
 define <8 x i16> @v_movQi16a() nounwind {
 ;CHECK: v_movQi16a:
-;CHECK: vmov.i16 q0, #0x10
+;CHECK: vmov.i16 q{{.*}}, #0x10
 	ret <8 x i16> < i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16 >
 }
 
 define <8 x i16> @v_movQi16b() nounwind {
 ;CHECK: v_movQi16b:
-;CHECK: vmov.i16 q0, #0x1000
+;CHECK: vmov.i16 q{{.*}}, #0x1000
 	ret <8 x i16> < i16 4096, i16 4096, i16 4096, i16 4096, i16 4096, i16 4096, i16 4096, i16 4096 >
 }
 
 define <4 x i32> @v_movQi32a() nounwind {
 ;CHECK: v_movQi32a:
-;CHECK: vmov.i32 q0, #0x20
+;CHECK: vmov.i32 q{{.*}}, #0x20
 	ret <4 x i32> < i32 32, i32 32, i32 32, i32 32 >
 }
 
 define <4 x i32> @v_movQi32b() nounwind {
 ;CHECK: v_movQi32b:
-;CHECK: vmov.i32 q0, #0x2000
+;CHECK: vmov.i32 q{{.*}}, #0x2000
 	ret <4 x i32> < i32 8192, i32 8192, i32 8192, i32 8192 >
 }
 
 define <4 x i32> @v_movQi32c() nounwind {
 ;CHECK: v_movQi32c:
-;CHECK: vmov.i32 q0, #0x200000
+;CHECK: vmov.i32 q{{.*}}, #0x200000
 	ret <4 x i32> < i32 2097152, i32 2097152, i32 2097152, i32 2097152 >
 }
 
 define <4 x i32> @v_movQi32d() nounwind {
 ;CHECK: v_movQi32d:
-;CHECK: vmov.i32 q0, #0x20000000
+;CHECK: vmov.i32 q{{.*}}, #0x20000000
 	ret <4 x i32> < i32 536870912, i32 536870912, i32 536870912, i32 536870912 >
 }
 
 define <4 x i32> @v_movQi32e() nounwind {
 ;CHECK: v_movQi32e:
-;CHECK: vmov.i32 q0, #0x20FF
+;CHECK: vmov.i32 q{{.*}}, #0x20FF
 	ret <4 x i32> < i32 8447, i32 8447, i32 8447, i32 8447 >
 }
 
 define <4 x i32> @v_movQi32f() nounwind {
 ;CHECK: v_movQi32f:
-;CHECK: vmov.i32 q0, #0x20FFFF
+;CHECK: vmov.i32 q{{.*}}, #0x20FFFF
 	ret <4 x i32> < i32 2162687, i32 2162687, i32 2162687, i32 2162687 >
 }
 
 define <2 x i64> @v_movQi64() nounwind {
 ;CHECK: v_movQi64:
-;CHECK: vmov.i64 q0, #0xFF0000FF0000FFFF
+;CHECK: vmov.i64 q{{.*}}, #0xFF0000FF0000FFFF
 	ret <2 x i64> < i64 18374687574888349695, i64 18374687574888349695 >
 }
 
@@ -173,7 +173,7 @@ define <2 x i64> @v_movQi64() nounwind {
 define void @vdupn128(%struct.int8x8_t* noalias nocapture sret %agg.result) nounwind {
 entry:
 ;CHECK: vdupn128:
-;CHECK: vmov.i8 d0, #0x80
+;CHECK: vmov.i8 d{{.*}}, #0x80
   %0 = getelementptr inbounds %struct.int8x8_t* %agg.result, i32 0, i32 0 ; <<8 x i8>*> [#uses=1]
   store <8 x i8> <i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128>, <8 x i8>* %0, align 8
   ret void
@@ -182,7 +182,7 @@ entry:
 define void @vdupnneg75(%struct.int8x8_t* noalias nocapture sret %agg.result) nounwind {
 entry:
 ;CHECK: vdupnneg75:
-;CHECK: vmov.i8 d0, #0xB5
+;CHECK: vmov.i8 d{{.*}}, #0xB5
   %0 = getelementptr inbounds %struct.int8x8_t* %agg.result, i32 0, i32 0 ; <<8 x i8>*> [#uses=1]
   store <8 x i8> <i8 -75, i8 -75, i8 -75, i8 -75, i8 -75, i8 -75, i8 -75, i8 -75>, <8 x i8>* %0, align 8
   ret void
@@ -343,3 +343,13 @@ declare <2 x i32> @llvm.arm.neon.vqmovnu.v2i32(<2 x i64>) nounwind readnone
 declare <8 x i8>  @llvm.arm.neon.vqmovnsu.v8i8(<8 x i16>) nounwind readnone
 declare <4 x i16> @llvm.arm.neon.vqmovnsu.v4i16(<4 x i32>) nounwind readnone
 declare <2 x i32> @llvm.arm.neon.vqmovnsu.v2i32(<2 x i64>) nounwind readnone
+
+; Truncating vector stores are not supported.  The following should not crash.
+; Radar 8598391.
+define void @noTruncStore(<4 x i32>* %a, <4 x i16>* %b) nounwind {
+;CHECK: vmovn
+  %tmp1 = load <4 x i32>* %a, align 16
+  %tmp2 = trunc <4 x i32> %tmp1 to <4 x i16>
+  store <4 x i16> %tmp2, <4 x i16>* %b, align 8
+  ret void
+}
diff --git a/test/CodeGen/ARM/vmul.ll b/test/CodeGen/ARM/vmul.ll
index 5383425018f8..ee033caa00d0 100644
--- a/test/CodeGen/ARM/vmul.ll
+++ b/test/CodeGen/ARM/vmul.ll
@@ -267,3 +267,75 @@ entry:
 }
 
 declare <8 x i16>  @llvm.arm.neon.vmullp.v8i16(<8 x i8>, <8 x i8>) nounwind readnone
+
+
+; Radar 8687140
+; VMULL needs to recognize BUILD_VECTORs with sign/zero-extended elements.
+
+define <8 x i16> @vmull_extvec_s8(<8 x i8> %arg) nounwind {
+; CHECK: vmull_extvec_s8
+; CHECK: vmull.s8
+  %tmp3 = sext <8 x i8> %arg to <8 x i16>
+  %tmp4 = mul <8 x i16> %tmp3, <i16 -12, i16 -12, i16 -12, i16 -12, i16 -12, i16 -12, i16 -12, i16 -12>
+  ret <8 x i16> %tmp4
+}
+
+define <8 x i16> @vmull_extvec_u8(<8 x i8> %arg) nounwind {
+; CHECK: vmull_extvec_u8
+; CHECK: vmull.u8
+  %tmp3 = zext <8 x i8> %arg to <8 x i16>
+  %tmp4 = mul <8 x i16> %tmp3, <i16 12, i16 12, i16 12, i16 12, i16 12, i16 12, i16 12, i16 12>
+  ret <8 x i16> %tmp4
+}
+
+define <8 x i16> @vmull_noextvec_s8(<8 x i8> %arg) nounwind {
+; Do not use VMULL if the BUILD_VECTOR element values are too big.
+; CHECK: vmull_noextvec_s8
+; CHECK: vmovl.s8
+; CHECK: vmul.i16
+  %tmp3 = sext <8 x i8> %arg to <8 x i16>
+  %tmp4 = mul <8 x i16> %tmp3, <i16 -999, i16 -999, i16 -999, i16 -999, i16 -999, i16 -999, i16 -999, i16 -999>
+  ret <8 x i16> %tmp4
+}
+
+define <8 x i16> @vmull_noextvec_u8(<8 x i8> %arg) nounwind {
+; Do not use VMULL if the BUILD_VECTOR element values are too big.
+; CHECK: vmull_noextvec_u8
+; CHECK: vmovl.u8
+; CHECK: vmul.i16
+  %tmp3 = zext <8 x i8> %arg to <8 x i16>
+  %tmp4 = mul <8 x i16> %tmp3, <i16 999, i16 999, i16 999, i16 999, i16 999, i16 999, i16 999, i16 999>
+  ret <8 x i16> %tmp4
+}
+
+define <4 x i32> @vmull_extvec_s16(<4 x i16> %arg) nounwind {
+; CHECK: vmull_extvec_s16
+; CHECK: vmull.s16
+  %tmp3 = sext <4 x i16> %arg to <4 x i32>
+  %tmp4 = mul <4 x i32> %tmp3, <i32 -12, i32 -12, i32 -12, i32 -12>
+  ret <4 x i32> %tmp4
+}
+
+define <4 x i32> @vmull_extvec_u16(<4 x i16> %arg) nounwind {
+; CHECK: vmull_extvec_u16
+; CHECK: vmull.u16
+  %tmp3 = zext <4 x i16> %arg to <4 x i32>
+  %tmp4 = mul <4 x i32> %tmp3, <i32 1234, i32 1234, i32 1234, i32 1234>
+  ret <4 x i32> %tmp4
+}
+
+define <2 x i64> @vmull_extvec_s32(<2 x i32> %arg) nounwind {
+; CHECK: vmull_extvec_s32
+; CHECK: vmull.s32
+  %tmp3 = sext <2 x i32> %arg to <2 x i64>
+  %tmp4 = mul <2 x i64> %tmp3, <i64 -1234, i64 -1234>
+  ret <2 x i64> %tmp4
+}
+
+define <2 x i64> @vmull_extvec_u32(<2 x i32> %arg) nounwind {
+; CHECK: vmull_extvec_u32
+; CHECK: vmull.u32
+  %tmp3 = zext <2 x i32> %arg to <2 x i64>
+  %tmp4 = mul <2 x i64> %tmp3, <i64 1234, i64 1234>
+  ret <2 x i64> %tmp4
+}
diff --git a/test/CodeGen/ARM/vrev.ll b/test/CodeGen/ARM/vrev.ll
index e1fe64b02d9d..f0f9e4e339b4 100644
--- a/test/CodeGen/ARM/vrev.ll
+++ b/test/CodeGen/ARM/vrev.ll
@@ -129,3 +129,21 @@ define <8 x i16> @test_vrev32Q16_undef(<8 x i16>* %A) nounwind {
 	%tmp2 = shufflevector <8 x i16> %tmp1, <8 x i16> undef, <8 x i32> <i32 undef, i32 0, i32 undef, i32 2, i32 5, i32 4, i32 7, i32 undef>
 	ret <8 x i16> %tmp2
 }
+
+; A vcombine feeding a VREV should not obscure things.  Radar 8597007.
+
+define void @test_with_vcombine(<4 x float>* %v) nounwind {
+;CHECK: test_with_vcombine:
+;CHECK-NOT: vext
+;CHECK: vrev64.32
+  %tmp1 = load <4 x float>* %v, align 16
+  %tmp2 = bitcast <4 x float> %tmp1 to <2 x double>
+  %tmp3 = extractelement <2 x double> %tmp2, i32 0
+  %tmp4 = bitcast double %tmp3 to <2 x float>
+  %tmp5 = extractelement <2 x double> %tmp2, i32 1
+  %tmp6 = bitcast double %tmp5 to <2 x float>
+  %tmp7 = fadd <2 x float> %tmp6, %tmp6
+  %tmp8 = shufflevector <2 x float> %tmp4, <2 x float> %tmp7, <4 x i32> <i32 1, i32 0, i32 3, i32 2>
+  store <4 x float> %tmp8, <4 x float>* %v, align 16
+  ret void
+}
diff --git a/test/CodeGen/ARM/vst1.ll b/test/CodeGen/ARM/vst1.ll
index 2b535ada3072..364d44b7116f 100644
--- a/test/CodeGen/ARM/vst1.ll
+++ b/test/CodeGen/ARM/vst1.ll
@@ -2,9 +2,10 @@
 
 define void @vst1i8(i8* %A, <8 x i8>* %B) nounwind {
 ;CHECK: vst1i8:
-;CHECK: vst1.8
+;Check the alignment value.  Max for this instruction is 64 bits:
+;CHECK: vst1.8 {d16}, [r0, :64]
 	%tmp1 = load <8 x i8>* %B
-	call void @llvm.arm.neon.vst1.v8i8(i8* %A, <8 x i8> %tmp1, i32 1)
+	call void @llvm.arm.neon.vst1.v8i8(i8* %A, <8 x i8> %tmp1, i32 16)
 	ret void
 }
 
@@ -35,6 +36,19 @@ define void @vst1f(float* %A, <2 x float>* %B) nounwind {
 	ret void
 }
 
+;Check for a post-increment updating store.
+define void @vst1f_update(float** %ptr, <2 x float>* %B) nounwind {
+;CHECK: vst1f_update:
+;CHECK: vst1.32 {d16}, [r1]!
+	%A = load float** %ptr
+	%tmp0 = bitcast float* %A to i8*
+	%tmp1 = load <2 x float>* %B
+	call void @llvm.arm.neon.vst1.v2f32(i8* %tmp0, <2 x float> %tmp1, i32 1)
+	%tmp2 = getelementptr float* %A, i32 2
+	store float* %tmp2, float** %ptr
+	ret void
+}
+
 define void @vst1i64(i64* %A, <1 x i64>* %B) nounwind {
 ;CHECK: vst1i64:
 ;CHECK: vst1.64
@@ -46,18 +60,33 @@ define void @vst1i64(i64* %A, <1 x i64>* %B) nounwind {
 
 define void @vst1Qi8(i8* %A, <16 x i8>* %B) nounwind {
 ;CHECK: vst1Qi8:
-;CHECK: vst1.8
+;Check the alignment value.  Max for this instruction is 128 bits:
+;CHECK: vst1.8 {d16, d17}, [r0, :64]
 	%tmp1 = load <16 x i8>* %B
-	call void @llvm.arm.neon.vst1.v16i8(i8* %A, <16 x i8> %tmp1, i32 1)
+	call void @llvm.arm.neon.vst1.v16i8(i8* %A, <16 x i8> %tmp1, i32 8)
 	ret void
 }
 
 define void @vst1Qi16(i16* %A, <8 x i16>* %B) nounwind {
 ;CHECK: vst1Qi16:
-;CHECK: vst1.16
+;Check the alignment value.  Max for this instruction is 128 bits:
+;CHECK: vst1.16 {d16, d17}, [r0, :128]
+	%tmp0 = bitcast i16* %A to i8*
+	%tmp1 = load <8 x i16>* %B
+	call void @llvm.arm.neon.vst1.v8i16(i8* %tmp0, <8 x i16> %tmp1, i32 32)
+	ret void
+}
+
+;Check for a post-increment updating store with register increment.
+define void @vst1Qi16_update(i16** %ptr, <8 x i16>* %B, i32 %inc) nounwind {
+;CHECK: vst1Qi16_update:
+;CHECK: vst1.16 {d16, d17}, [r1, :64], r2
+	%A = load i16** %ptr
 	%tmp0 = bitcast i16* %A to i8*
 	%tmp1 = load <8 x i16>* %B
-	call void @llvm.arm.neon.vst1.v8i16(i8* %tmp0, <8 x i16> %tmp1, i32 1)
+	call void @llvm.arm.neon.vst1.v8i16(i8* %tmp0, <8 x i16> %tmp1, i32 8)
+	%tmp2 = getelementptr i16* %A, i32 %inc
+	store i16* %tmp2, i16** %ptr
 	ret void
 }
 
diff --git a/test/CodeGen/ARM/vst2.ll b/test/CodeGen/ARM/vst2.ll
index aed15fd51c56..915a84b67767 100644
--- a/test/CodeGen/ARM/vst2.ll
+++ b/test/CodeGen/ARM/vst2.ll
@@ -2,18 +2,32 @@
 
 define void @vst2i8(i8* %A, <8 x i8>* %B) nounwind {
 ;CHECK: vst2i8:
-;CHECK: vst2.8
+;Check the alignment value.  Max for this instruction is 128 bits:
+;CHECK: vst2.8 {d16, d17}, [r0, :64]
 	%tmp1 = load <8 x i8>* %B
-	call void @llvm.arm.neon.vst2.v8i8(i8* %A, <8 x i8> %tmp1, <8 x i8> %tmp1, i32 1)
+	call void @llvm.arm.neon.vst2.v8i8(i8* %A, <8 x i8> %tmp1, <8 x i8> %tmp1, i32 8)
+	ret void
+}
+
+;Check for a post-increment updating store with register increment.
+define void @vst2i8_update(i8** %ptr, <8 x i8>* %B, i32 %inc) nounwind {
+;CHECK: vst2i8_update:
+;CHECK: vst2.8 {d16, d17}, [r1], r2
+	%A = load i8** %ptr
+	%tmp1 = load <8 x i8>* %B
+	call void @llvm.arm.neon.vst2.v8i8(i8* %A, <8 x i8> %tmp1, <8 x i8> %tmp1, i32 4)
+	%tmp2 = getelementptr i8* %A, i32 %inc
+	store i8* %tmp2, i8** %ptr
 	ret void
 }
 
 define void @vst2i16(i16* %A, <4 x i16>* %B) nounwind {
 ;CHECK: vst2i16:
-;CHECK: vst2.16
+;Check the alignment value.  Max for this instruction is 128 bits:
+;CHECK: vst2.16 {d16, d17}, [r0, :128]
 	%tmp0 = bitcast i16* %A to i8*
 	%tmp1 = load <4 x i16>* %B
-	call void @llvm.arm.neon.vst2.v4i16(i8* %tmp0, <4 x i16> %tmp1, <4 x i16> %tmp1, i32 1)
+	call void @llvm.arm.neon.vst2.v4i16(i8* %tmp0, <4 x i16> %tmp1, <4 x i16> %tmp1, i32 32)
 	ret void
 }
 
@@ -37,36 +51,53 @@ define void @vst2f(float* %A, <2 x float>* %B) nounwind {
 
 define void @vst2i64(i64* %A, <1 x i64>* %B) nounwind {
 ;CHECK: vst2i64:
-;CHECK: vst1.64
+;Check the alignment value.  Max for this instruction is 128 bits:
+;CHECK: vst1.64 {d16, d17}, [r0, :128]
+	%tmp0 = bitcast i64* %A to i8*
+	%tmp1 = load <1 x i64>* %B
+	call void @llvm.arm.neon.vst2.v1i64(i8* %tmp0, <1 x i64> %tmp1, <1 x i64> %tmp1, i32 32)
+	ret void
+}
+
+;Check for a post-increment updating store.
+define void @vst2i64_update(i64** %ptr, <1 x i64>* %B) nounwind {
+;CHECK: vst2i64_update:
+;CHECK: vst1.64 {d16, d17}, [r1, :64]!
+	%A = load i64** %ptr
 	%tmp0 = bitcast i64* %A to i8*
 	%tmp1 = load <1 x i64>* %B
-	call void @llvm.arm.neon.vst2.v1i64(i8* %tmp0, <1 x i64> %tmp1, <1 x i64> %tmp1, i32 1)
+	call void @llvm.arm.neon.vst2.v1i64(i8* %tmp0, <1 x i64> %tmp1, <1 x i64> %tmp1, i32 8)
+	%tmp2 = getelementptr i64* %A, i32 2
+	store i64* %tmp2, i64** %ptr
 	ret void
 }
 
 define void @vst2Qi8(i8* %A, <16 x i8>* %B) nounwind {
 ;CHECK: vst2Qi8:
-;CHECK: vst2.8
+;Check the alignment value.  Max for this instruction is 256 bits:
+;CHECK: vst2.8 {d16, d17, d18, d19}, [r0, :64]
 	%tmp1 = load <16 x i8>* %B
-	call void @llvm.arm.neon.vst2.v16i8(i8* %A, <16 x i8> %tmp1, <16 x i8> %tmp1, i32 1)
+	call void @llvm.arm.neon.vst2.v16i8(i8* %A, <16 x i8> %tmp1, <16 x i8> %tmp1, i32 8)
 	ret void
 }
 
 define void @vst2Qi16(i16* %A, <8 x i16>* %B) nounwind {
 ;CHECK: vst2Qi16:
-;CHECK: vst2.16
+;Check the alignment value.  Max for this instruction is 256 bits:
+;CHECK: vst2.16 {d16, d17, d18, d19}, [r0, :128]
 	%tmp0 = bitcast i16* %A to i8*
 	%tmp1 = load <8 x i16>* %B
-	call void @llvm.arm.neon.vst2.v8i16(i8* %tmp0, <8 x i16> %tmp1, <8 x i16> %tmp1, i32 1)
+	call void @llvm.arm.neon.vst2.v8i16(i8* %tmp0, <8 x i16> %tmp1, <8 x i16> %tmp1, i32 16)
 	ret void
 }
 
 define void @vst2Qi32(i32* %A, <4 x i32>* %B) nounwind {
 ;CHECK: vst2Qi32:
-;CHECK: vst2.32
+;Check the alignment value.  Max for this instruction is 256 bits:
+;CHECK: vst2.32 {d16, d17, d18, d19}, [r0, :256]
 	%tmp0 = bitcast i32* %A to i8*
 	%tmp1 = load <4 x i32>* %B
-	call void @llvm.arm.neon.vst2.v4i32(i8* %tmp0, <4 x i32> %tmp1, <4 x i32> %tmp1, i32 1)
+	call void @llvm.arm.neon.vst2.v4i32(i8* %tmp0, <4 x i32> %tmp1, <4 x i32> %tmp1, i32 64)
 	ret void
 }
 
diff --git a/test/CodeGen/ARM/vst3.ll b/test/CodeGen/ARM/vst3.ll
index 1feaed5a1044..d262303bc60e 100644
--- a/test/CodeGen/ARM/vst3.ll
+++ b/test/CodeGen/ARM/vst3.ll
@@ -2,9 +2,11 @@
 
 define void @vst3i8(i8* %A, <8 x i8>* %B) nounwind {
 ;CHECK: vst3i8:
-;CHECK: vst3.8
+;Check the alignment value.  Max for this instruction is 64 bits:
+;This test runs at -O0 so do not check for specific register numbers.
+;CHECK: vst3.8 {d{{.*}}, d{{.*}}, d{{.*}}}, [r{{.*}}, :64]
 	%tmp1 = load <8 x i8>* %B
-	call void @llvm.arm.neon.vst3.v8i8(i8* %A, <8 x i8> %tmp1, <8 x i8> %tmp1, <8 x i8> %tmp1, i32 1)
+	call void @llvm.arm.neon.vst3.v8i8(i8* %A, <8 x i8> %tmp1, <8 x i8> %tmp1, <8 x i8> %tmp1, i32 32)
 	ret void
 }
 
@@ -26,6 +28,19 @@ define void @vst3i32(i32* %A, <2 x i32>* %B) nounwind {
 	ret void
 }
 
+;Check for a post-increment updating store.
+define void @vst3i32_update(i32** %ptr, <2 x i32>* %B) nounwind {
+;CHECK: vst3i32_update:
+;CHECK: vst3.32 {d{{.*}}, d{{.*}}, d{{.*}}}, [r{{.*}}]!
+	%A = load i32** %ptr
+	%tmp0 = bitcast i32* %A to i8*
+	%tmp1 = load <2 x i32>* %B
+	call void @llvm.arm.neon.vst3.v2i32(i8* %tmp0, <2 x i32> %tmp1, <2 x i32> %tmp1, <2 x i32> %tmp1, i32 1)
+	%tmp2 = getelementptr i32* %A, i32 6
+	store i32* %tmp2, i32** %ptr
+	ret void
+}
+
 define void @vst3f(float* %A, <2 x float>* %B) nounwind {
 ;CHECK: vst3f:
 ;CHECK: vst3.32
@@ -37,19 +52,23 @@ define void @vst3f(float* %A, <2 x float>* %B) nounwind {
 
 define void @vst3i64(i64* %A, <1 x i64>* %B) nounwind {
 ;CHECK: vst3i64:
-;CHECK: vst1.64
+;Check the alignment value.  Max for this instruction is 64 bits:
+;This test runs at -O0 so do not check for specific register numbers.
+;CHECK: vst1.64 {d{{.*}}, d{{.*}}, d{{.*}}}, [r{{.*}}, :64]
 	%tmp0 = bitcast i64* %A to i8*
 	%tmp1 = load <1 x i64>* %B
-	call void @llvm.arm.neon.vst3.v1i64(i8* %tmp0, <1 x i64> %tmp1, <1 x i64> %tmp1, <1 x i64> %tmp1, i32 1)
+	call void @llvm.arm.neon.vst3.v1i64(i8* %tmp0, <1 x i64> %tmp1, <1 x i64> %tmp1, <1 x i64> %tmp1, i32 16)
 	ret void
 }
 
 define void @vst3Qi8(i8* %A, <16 x i8>* %B) nounwind {
 ;CHECK: vst3Qi8:
-;CHECK: vst3.8
-;CHECK: vst3.8
+;Check the alignment value.  Max for this instruction is 64 bits:
+;This test runs at -O0 so do not check for specific register numbers.
+;CHECK: vst3.8 {d{{.*}}, d{{.*}}, d{{.*}}}, [r{{.*}}, :64]!
+;CHECK: vst3.8 {d{{.*}}, d{{.*}}, d{{.*}}}, [r{{.*}}, :64]
 	%tmp1 = load <16 x i8>* %B
-	call void @llvm.arm.neon.vst3.v16i8(i8* %A, <16 x i8> %tmp1, <16 x i8> %tmp1, <16 x i8> %tmp1, i32 1)
+	call void @llvm.arm.neon.vst3.v16i8(i8* %A, <16 x i8> %tmp1, <16 x i8> %tmp1, <16 x i8> %tmp1, i32 32)
 	ret void
 }
 
@@ -63,6 +82,20 @@ define void @vst3Qi16(i16* %A, <8 x i16>* %B) nounwind {
 	ret void
 }
 
+;Check for a post-increment updating store.
+define void @vst3Qi16_update(i16** %ptr, <8 x i16>* %B) nounwind {
+;CHECK: vst3Qi16_update:
+;CHECK: vst3.16 {d{{.*}}, d{{.*}}, d{{.*}}}, [r{{.*}}]!
+;CHECK: vst3.16 {d{{.*}}, d{{.*}}, d{{.*}}}, [r{{.*}}]!
+	%A = load i16** %ptr
+	%tmp0 = bitcast i16* %A to i8*
+	%tmp1 = load <8 x i16>* %B
+	call void @llvm.arm.neon.vst3.v8i16(i8* %tmp0, <8 x i16> %tmp1, <8 x i16> %tmp1, <8 x i16> %tmp1, i32 1)
+	%tmp2 = getelementptr i16* %A, i32 24
+	store i16* %tmp2, i16** %ptr
+	ret void
+}
+
 define void @vst3Qi32(i32* %A, <4 x i32>* %B) nounwind {
 ;CHECK: vst3Qi32:
 ;CHECK: vst3.32
diff --git a/test/CodeGen/ARM/vst4.ll b/test/CodeGen/ARM/vst4.ll
index d302f097fc1f..e94acb66bf2e 100644
--- a/test/CodeGen/ARM/vst4.ll
+++ b/test/CodeGen/ARM/vst4.ll
@@ -2,27 +2,42 @@
 
 define void @vst4i8(i8* %A, <8 x i8>* %B) nounwind {
 ;CHECK: vst4i8:
-;CHECK: vst4.8
+;Check the alignment value.  Max for this instruction is 256 bits:
+;CHECK: vst4.8 {d16, d17, d18, d19}, [r0, :64]
 	%tmp1 = load <8 x i8>* %B
-	call void @llvm.arm.neon.vst4.v8i8(i8* %A, <8 x i8> %tmp1, <8 x i8> %tmp1, <8 x i8> %tmp1, <8 x i8> %tmp1, i32 1)
+	call void @llvm.arm.neon.vst4.v8i8(i8* %A, <8 x i8> %tmp1, <8 x i8> %tmp1, <8 x i8> %tmp1, <8 x i8> %tmp1, i32 8)
+	ret void
+}
+
+;Check for a post-increment updating store with register increment.
+define void @vst4i8_update(i8** %ptr, <8 x i8>* %B, i32 %inc) nounwind {
+;CHECK: vst4i8_update:
+;CHECK: vst4.8 {d16, d17, d18, d19}, [r1, :128], r2
+	%A = load i8** %ptr
+	%tmp1 = load <8 x i8>* %B
+	call void @llvm.arm.neon.vst4.v8i8(i8* %A, <8 x i8> %tmp1, <8 x i8> %tmp1, <8 x i8> %tmp1, <8 x i8> %tmp1, i32 16)
+	%tmp2 = getelementptr i8* %A, i32 %inc
+	store i8* %tmp2, i8** %ptr
 	ret void
 }
 
 define void @vst4i16(i16* %A, <4 x i16>* %B) nounwind {
 ;CHECK: vst4i16:
-;CHECK: vst4.16
+;Check the alignment value.  Max for this instruction is 256 bits:
+;CHECK: vst4.16 {d16, d17, d18, d19}, [r0, :128]
 	%tmp0 = bitcast i16* %A to i8*
 	%tmp1 = load <4 x i16>* %B
-	call void @llvm.arm.neon.vst4.v4i16(i8* %tmp0, <4 x i16> %tmp1, <4 x i16> %tmp1, <4 x i16> %tmp1, <4 x i16> %tmp1, i32 1)
+	call void @llvm.arm.neon.vst4.v4i16(i8* %tmp0, <4 x i16> %tmp1, <4 x i16> %tmp1, <4 x i16> %tmp1, <4 x i16> %tmp1, i32 16)
 	ret void
 }
 
 define void @vst4i32(i32* %A, <2 x i32>* %B) nounwind {
 ;CHECK: vst4i32:
-;CHECK: vst4.32
+;Check the alignment value.  Max for this instruction is 256 bits:
+;CHECK: vst4.32 {d16, d17, d18, d19}, [r0, :256]
 	%tmp0 = bitcast i32* %A to i8*
 	%tmp1 = load <2 x i32>* %B
-	call void @llvm.arm.neon.vst4.v2i32(i8* %tmp0, <2 x i32> %tmp1, <2 x i32> %tmp1, <2 x i32> %tmp1, <2 x i32> %tmp1, i32 1)
+	call void @llvm.arm.neon.vst4.v2i32(i8* %tmp0, <2 x i32> %tmp1, <2 x i32> %tmp1, <2 x i32> %tmp1, <2 x i32> %tmp1, i32 32)
 	ret void
 }
 
@@ -37,26 +52,29 @@ define void @vst4f(float* %A, <2 x float>* %B) nounwind {
 
 define void @vst4i64(i64* %A, <1 x i64>* %B) nounwind {
 ;CHECK: vst4i64:
-;CHECK: vst1.64
+;Check the alignment value.  Max for this instruction is 256 bits:
+;CHECK: vst1.64 {d16, d17, d18, d19}, [r0, :256]
 	%tmp0 = bitcast i64* %A to i8*
 	%tmp1 = load <1 x i64>* %B
-	call void @llvm.arm.neon.vst4.v1i64(i8* %tmp0, <1 x i64> %tmp1, <1 x i64> %tmp1, <1 x i64> %tmp1, <1 x i64> %tmp1, i32 1)
+	call void @llvm.arm.neon.vst4.v1i64(i8* %tmp0, <1 x i64> %tmp1, <1 x i64> %tmp1, <1 x i64> %tmp1, <1 x i64> %tmp1, i32 64)
 	ret void
 }
 
 define void @vst4Qi8(i8* %A, <16 x i8>* %B) nounwind {
 ;CHECK: vst4Qi8:
-;CHECK: vst4.8
-;CHECK: vst4.8
+;Check the alignment value.  Max for this instruction is 256 bits:
+;CHECK: vst4.8 {d16, d18, d20, d22}, [r0, :256]!
+;CHECK: vst4.8 {d17, d19, d21, d23}, [r0, :256]
 	%tmp1 = load <16 x i8>* %B
-	call void @llvm.arm.neon.vst4.v16i8(i8* %A, <16 x i8> %tmp1, <16 x i8> %tmp1, <16 x i8> %tmp1, <16 x i8> %tmp1, i32 1)
+	call void @llvm.arm.neon.vst4.v16i8(i8* %A, <16 x i8> %tmp1, <16 x i8> %tmp1, <16 x i8> %tmp1, <16 x i8> %tmp1, i32 64)
 	ret void
 }
 
 define void @vst4Qi16(i16* %A, <8 x i16>* %B) nounwind {
 ;CHECK: vst4Qi16:
-;CHECK: vst4.16
-;CHECK: vst4.16
+;Check for no alignment specifier.
+;CHECK: vst4.16 {d16, d18, d20, d22}, [r0]!
+;CHECK: vst4.16 {d17, d19, d21, d23}, [r0]
 	%tmp0 = bitcast i16* %A to i8*
 	%tmp1 = load <8 x i16>* %B
 	call void @llvm.arm.neon.vst4.v8i16(i8* %tmp0, <8 x i16> %tmp1, <8 x i16> %tmp1, <8 x i16> %tmp1, <8 x i16> %tmp1, i32 1)
@@ -83,6 +101,20 @@ define void @vst4Qf(float* %A, <4 x float>* %B) nounwind {
 	ret void
 }
 
+;Check for a post-increment updating store.
+define void @vst4Qf_update(float** %ptr, <4 x float>* %B) nounwind {
+;CHECK: vst4Qf_update:
+;CHECK: vst4.32 {d16, d18, d20, d22}, [r1]!
+;CHECK: vst4.32 {d17, d19, d21, d23}, [r1]!
+	%A = load float** %ptr
+	%tmp0 = bitcast float* %A to i8*
+	%tmp1 = load <4 x float>* %B
+	call void @llvm.arm.neon.vst4.v4f32(i8* %tmp0, <4 x float> %tmp1, <4 x float> %tmp1, <4 x float> %tmp1, <4 x float> %tmp1, i32 1)
+	%tmp2 = getelementptr float* %A, i32 16
+	store float* %tmp2, float** %ptr
+	ret void
+}
+
 declare void @llvm.arm.neon.vst4.v8i8(i8*, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, i32) nounwind
 declare void @llvm.arm.neon.vst4.v4i16(i8*, <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16>, i32) nounwind
 declare void @llvm.arm.neon.vst4.v2i32(i8*, <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, i32) nounwind
diff --git a/test/CodeGen/ARM/vstlane.ll b/test/CodeGen/ARM/vstlane.ll
index 30ec52ac6420..6cc052bbeb1c 100644
--- a/test/CodeGen/ARM/vstlane.ll
+++ b/test/CodeGen/ARM/vstlane.ll
@@ -1,19 +1,109 @@
 ; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s
 
+define void @vst1lanei8(i8* %A, <8 x i8>* %B) nounwind {
+;CHECK: vst1lanei8:
+;Check the (default) alignment.
+;CHECK: vst1.8 {d16[3]}, [r0]
+	%tmp1 = load <8 x i8>* %B
+        %tmp2 = extractelement <8 x i8> %tmp1, i32 3
+        store i8 %tmp2, i8* %A, align 8
+	ret void
+}
+
+define void @vst1lanei16(i16* %A, <4 x i16>* %B) nounwind {
+;CHECK: vst1lanei16:
+;Check the alignment value.  Max for this instruction is 16 bits:
+;CHECK: vst1.16 {d16[2]}, [r0, :16]
+	%tmp1 = load <4 x i16>* %B
+        %tmp2 = extractelement <4 x i16> %tmp1, i32 2
+        store i16 %tmp2, i16* %A, align 8
+	ret void
+}
+
+define void @vst1lanei32(i32* %A, <2 x i32>* %B) nounwind {
+;CHECK: vst1lanei32:
+;Check the alignment value.  Max for this instruction is 32 bits:
+;CHECK: vst1.32 {d16[1]}, [r0, :32]
+	%tmp1 = load <2 x i32>* %B
+        %tmp2 = extractelement <2 x i32> %tmp1, i32 1
+        store i32 %tmp2, i32* %A, align 8
+	ret void
+}
+
+define void @vst1lanef(float* %A, <2 x float>* %B) nounwind {
+;CHECK: vst1lanef:
+;CHECK: vst1.32 {d16[1]}, [r0]
+	%tmp1 = load <2 x float>* %B
+        %tmp2 = extractelement <2 x float> %tmp1, i32 1
+        store float %tmp2, float* %A
+	ret void
+}
+
+define void @vst1laneQi8(i8* %A, <16 x i8>* %B) nounwind {
+;CHECK: vst1laneQi8:
+;CHECK: vst1.8 {d17[1]}, [r0]
+	%tmp1 = load <16 x i8>* %B
+        %tmp2 = extractelement <16 x i8> %tmp1, i32 9
+        store i8 %tmp2, i8* %A, align 8
+	ret void
+}
+
+define void @vst1laneQi16(i16* %A, <8 x i16>* %B) nounwind {
+;CHECK: vst1laneQi16:
+;CHECK: vst1.16 {d17[1]}, [r0, :16]
+	%tmp1 = load <8 x i16>* %B
+        %tmp2 = extractelement <8 x i16> %tmp1, i32 5
+        store i16 %tmp2, i16* %A, align 8
+	ret void
+}
+
+define void @vst1laneQi32(i32* %A, <4 x i32>* %B) nounwind {
+;CHECK: vst1laneQi32:
+;CHECK: vst1.32 {d17[1]}, [r0, :32]
+	%tmp1 = load <4 x i32>* %B
+        %tmp2 = extractelement <4 x i32> %tmp1, i32 3
+        store i32 %tmp2, i32* %A, align 8
+	ret void
+}
+
+define void @vst1laneQf(float* %A, <4 x float>* %B) nounwind {
+;CHECK: vst1laneQf:
+;CHECK: vst1.32 {d17[1]}, [r0]
+	%tmp1 = load <4 x float>* %B
+        %tmp2 = extractelement <4 x float> %tmp1, i32 3
+        store float %tmp2, float* %A
+	ret void
+}
+
 define void @vst2lanei8(i8* %A, <8 x i8>* %B) nounwind {
 ;CHECK: vst2lanei8:
-;CHECK: vst2.8
+;Check the alignment value.  Max for this instruction is 16 bits:
+;CHECK: vst2.8 {d16[1], d17[1]}, [r0, :16]
 	%tmp1 = load <8 x i8>* %B
-	call void @llvm.arm.neon.vst2lane.v8i8(i8* %A, <8 x i8> %tmp1, <8 x i8> %tmp1, i32 1, i32 1)
+	call void @llvm.arm.neon.vst2lane.v8i8(i8* %A, <8 x i8> %tmp1, <8 x i8> %tmp1, i32 1, i32 4)
 	ret void
 }
 
 define void @vst2lanei16(i16* %A, <4 x i16>* %B) nounwind {
 ;CHECK: vst2lanei16:
-;CHECK: vst2.16
+;Check the alignment value.  Max for this instruction is 32 bits:
+;CHECK: vst2.16 {d16[1], d17[1]}, [r0, :32]
 	%tmp0 = bitcast i16* %A to i8*
 	%tmp1 = load <4 x i16>* %B
-	call void @llvm.arm.neon.vst2lane.v4i16(i8* %tmp0, <4 x i16> %tmp1, <4 x i16> %tmp1, i32 1, i32 1)
+	call void @llvm.arm.neon.vst2lane.v4i16(i8* %tmp0, <4 x i16> %tmp1, <4 x i16> %tmp1, i32 1, i32 8)
+	ret void
+}
+
+;Check for a post-increment updating store with register increment.
+define void @vst2lanei16_update(i16** %ptr, <4 x i16>* %B, i32 %inc) nounwind {
+;CHECK: vst2lanei16_update:
+;CHECK: vst2.16 {d16[1], d17[1]}, [r1], r2
+	%A = load i16** %ptr
+	%tmp0 = bitcast i16* %A to i8*
+	%tmp1 = load <4 x i16>* %B
+	call void @llvm.arm.neon.vst2lane.v4i16(i8* %tmp0, <4 x i16> %tmp1, <4 x i16> %tmp1, i32 1, i32 2)
+	%tmp2 = getelementptr i16* %A, i32 %inc
+	store i16* %tmp2, i16** %ptr
 	ret void
 }
 
@@ -37,19 +127,21 @@ define void @vst2lanef(float* %A, <2 x float>* %B) nounwind {
 
 define void @vst2laneQi16(i16* %A, <8 x i16>* %B) nounwind {
 ;CHECK: vst2laneQi16:
-;CHECK: vst2.16
+;Check the (default) alignment.
+;CHECK: vst2.16 {d17[1], d19[1]}, [r0]
 	%tmp0 = bitcast i16* %A to i8*
 	%tmp1 = load <8 x i16>* %B
-	call void @llvm.arm.neon.vst2lane.v8i16(i8* %tmp0, <8 x i16> %tmp1, <8 x i16> %tmp1, i32 1, i32 1)
+	call void @llvm.arm.neon.vst2lane.v8i16(i8* %tmp0, <8 x i16> %tmp1, <8 x i16> %tmp1, i32 5, i32 1)
 	ret void
 }
 
 define void @vst2laneQi32(i32* %A, <4 x i32>* %B) nounwind {
 ;CHECK: vst2laneQi32:
-;CHECK: vst2.32
+;Check the alignment value.  Max for this instruction is 64 bits:
+;CHECK: vst2.32 {d17[0], d19[0]}, [r0, :64]
 	%tmp0 = bitcast i32* %A to i8*
 	%tmp1 = load <4 x i32>* %B
-	call void @llvm.arm.neon.vst2lane.v4i32(i8* %tmp0, <4 x i32> %tmp1, <4 x i32> %tmp1, i32 2, i32 1)
+	call void @llvm.arm.neon.vst2lane.v4i32(i8* %tmp0, <4 x i32> %tmp1, <4 x i32> %tmp1, i32 2, i32 16)
 	ret void
 }
 
@@ -81,10 +173,11 @@ define void @vst3lanei8(i8* %A, <8 x i8>* %B) nounwind {
 
 define void @vst3lanei16(i16* %A, <4 x i16>* %B) nounwind {
 ;CHECK: vst3lanei16:
-;CHECK: vst3.16
+;Check the (default) alignment value.  VST3 does not support alignment.
+;CHECK: vst3.16 {d16[1], d17[1], d18[1]}, [r0]
 	%tmp0 = bitcast i16* %A to i8*
 	%tmp1 = load <4 x i16>* %B
-	call void @llvm.arm.neon.vst3lane.v4i16(i8* %tmp0, <4 x i16> %tmp1, <4 x i16> %tmp1, <4 x i16> %tmp1, i32 1, i32 1)
+	call void @llvm.arm.neon.vst3lane.v4i16(i8* %tmp0, <4 x i16> %tmp1, <4 x i16> %tmp1, <4 x i16> %tmp1, i32 1, i32 8)
 	ret void
 }
 
@@ -108,10 +201,11 @@ define void @vst3lanef(float* %A, <2 x float>* %B) nounwind {
 
 define void @vst3laneQi16(i16* %A, <8 x i16>* %B) nounwind {
 ;CHECK: vst3laneQi16:
-;CHECK: vst3.16
+;Check the (default) alignment value.  VST3 does not support alignment.
+;CHECK: vst3.16 {d17[2], d19[2], d21[2]}, [r0]
 	%tmp0 = bitcast i16* %A to i8*
 	%tmp1 = load <8 x i16>* %B
-	call void @llvm.arm.neon.vst3lane.v8i16(i8* %tmp0, <8 x i16> %tmp1, <8 x i16> %tmp1, <8 x i16> %tmp1, i32 6, i32 1)
+	call void @llvm.arm.neon.vst3lane.v8i16(i8* %tmp0, <8 x i16> %tmp1, <8 x i16> %tmp1, <8 x i16> %tmp1, i32 6, i32 8)
 	ret void
 }
 
@@ -124,6 +218,19 @@ define void @vst3laneQi32(i32* %A, <4 x i32>* %B) nounwind {
 	ret void
 }
 
+;Check for a post-increment updating store.
+define void @vst3laneQi32_update(i32** %ptr, <4 x i32>* %B) nounwind {
+;CHECK: vst3laneQi32_update:
+;CHECK: vst3.32 {d16[0], d18[0], d20[0]}, [r1]!
+	%A = load i32** %ptr
+	%tmp0 = bitcast i32* %A to i8*
+	%tmp1 = load <4 x i32>* %B
+	call void @llvm.arm.neon.vst3lane.v4i32(i8* %tmp0, <4 x i32> %tmp1, <4 x i32> %tmp1, <4 x i32> %tmp1, i32 0, i32 1)
+	%tmp2 = getelementptr i32* %A, i32 3
+	store i32* %tmp2, i32** %ptr
+	ret void
+}
+
 define void @vst3laneQf(float* %A, <4 x float>* %B) nounwind {
 ;CHECK: vst3laneQf:
 ;CHECK: vst3.32
@@ -145,9 +252,22 @@ declare void @llvm.arm.neon.vst3lane.v4f32(i8*, <4 x float>, <4 x float>, <4 x f
 
 define void @vst4lanei8(i8* %A, <8 x i8>* %B) nounwind {
 ;CHECK: vst4lanei8:
-;CHECK: vst4.8
+;Check the alignment value.  Max for this instruction is 32 bits:
+;CHECK: vst4.8 {d16[1], d17[1], d18[1], d19[1]}, [r0, :32]
+	%tmp1 = load <8 x i8>* %B
+	call void @llvm.arm.neon.vst4lane.v8i8(i8* %A, <8 x i8> %tmp1, <8 x i8> %tmp1, <8 x i8> %tmp1, <8 x i8> %tmp1, i32 1, i32 8)
+	ret void
+}
+
+;Check for a post-increment updating store.
+define void @vst4lanei8_update(i8** %ptr, <8 x i8>* %B) nounwind {
+;CHECK: vst4lanei8_update:
+;CHECK: vst4.8 {d16[1], d17[1], d18[1], d19[1]}, [r1, :32]!
+	%A = load i8** %ptr
 	%tmp1 = load <8 x i8>* %B
-	call void @llvm.arm.neon.vst4lane.v8i8(i8* %A, <8 x i8> %tmp1, <8 x i8> %tmp1, <8 x i8> %tmp1, <8 x i8> %tmp1, i32 1, i32 1)
+	call void @llvm.arm.neon.vst4lane.v8i8(i8* %A, <8 x i8> %tmp1, <8 x i8> %tmp1, <8 x i8> %tmp1, <8 x i8> %tmp1, i32 1, i32 8)
+	%tmp2 = getelementptr i8* %A, i32 4
+	store i8* %tmp2, i8** %ptr
 	ret void
 }
 
@@ -162,10 +282,11 @@ define void @vst4lanei16(i16* %A, <4 x i16>* %B) nounwind {
 
 define void @vst4lanei32(i32* %A, <2 x i32>* %B) nounwind {
 ;CHECK: vst4lanei32:
-;CHECK: vst4.32
+;Check the alignment value.  Max for this instruction is 128 bits:
+;CHECK: vst4.32 {d16[1], d17[1], d18[1], d19[1]}, [r0, :128]
 	%tmp0 = bitcast i32* %A to i8*
 	%tmp1 = load <2 x i32>* %B
-	call void @llvm.arm.neon.vst4lane.v2i32(i8* %tmp0, <2 x i32> %tmp1, <2 x i32> %tmp1, <2 x i32> %tmp1, <2 x i32> %tmp1, i32 1, i32 1)
+	call void @llvm.arm.neon.vst4lane.v2i32(i8* %tmp0, <2 x i32> %tmp1, <2 x i32> %tmp1, <2 x i32> %tmp1, <2 x i32> %tmp1, i32 1, i32 16)
 	ret void
 }
 
@@ -180,16 +301,18 @@ define void @vst4lanef(float* %A, <2 x float>* %B) nounwind {
 
 define void @vst4laneQi16(i16* %A, <8 x i16>* %B) nounwind {
 ;CHECK: vst4laneQi16:
-;CHECK: vst4.16
+;Check the alignment value.  Max for this instruction is 64 bits:
+;CHECK: vst4.16 {d17[3], d19[3], d21[3], d23[3]}, [r0, :64]
 	%tmp0 = bitcast i16* %A to i8*
 	%tmp1 = load <8 x i16>* %B
-	call void @llvm.arm.neon.vst4lane.v8i16(i8* %tmp0, <8 x i16> %tmp1, <8 x i16> %tmp1, <8 x i16> %tmp1, <8 x i16> %tmp1, i32 7, i32 1)
+	call void @llvm.arm.neon.vst4lane.v8i16(i8* %tmp0, <8 x i16> %tmp1, <8 x i16> %tmp1, <8 x i16> %tmp1, <8 x i16> %tmp1, i32 7, i32 16)
 	ret void
 }
 
 define void @vst4laneQi32(i32* %A, <4 x i32>* %B) nounwind {
 ;CHECK: vst4laneQi32:
-;CHECK: vst4.32
+;Check the (default) alignment.
+;CHECK: vst4.32 {d17[0], d19[0], d21[0], d23[0]}, [r0]
 	%tmp0 = bitcast i32* %A to i8*
 	%tmp1 = load <4 x i32>* %B
 	call void @llvm.arm.neon.vst4lane.v4i32(i8* %tmp0, <4 x i32> %tmp1, <4 x i32> %tmp1, <4 x i32> %tmp1, <4 x i32> %tmp1, i32 2, i32 1)
diff --git a/test/CodeGen/Alpha/2010-04-07-DbgValueOtherTargets.ll b/test/CodeGen/Alpha/2010-04-07-DbgValueOtherTargets.ll
index cf3f0b90037d..4590f1245ae9 100644
--- a/test/CodeGen/Alpha/2010-04-07-DbgValueOtherTargets.ll
+++ b/test/CodeGen/Alpha/2010-04-07-DbgValueOtherTargets.ll
@@ -1,33 +1,28 @@
 ; RUN: llc -O0 -march=alpha -asm-verbose < %s | FileCheck %s
 ; Check that DEBUG_VALUE comments come through on a variety of targets.
 
-%tart.reflect.ComplexType = type { double, double }
-
-@.type.SwitchStmtTest = constant %tart.reflect.ComplexType { double 3.0, double 2.0 }
-
-define i32 @"main(tart.core.String[])->int32"(i32 %args) {
+define i32 @main() nounwind ssp {
 entry:
 ; CHECK: DEBUG_VALUE
-  tail call void @llvm.dbg.value(metadata !14, i64 0, metadata !8)
-  tail call void @"tart.reflect.ComplexType.create->tart.core.Object"(%tart.reflect.ComplexType* @.type.SwitchStmtTest) ; <%tart.core.Object*> [#uses=2]
-  ret i32 3
+  call void @llvm.dbg.value(metadata !6, i64 0, metadata !7), !dbg !9
+  ret i32 0, !dbg !10
 }
 
+declare void @llvm.dbg.declare(metadata, metadata) nounwind readnone
+
 declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
-declare void @"tart.reflect.ComplexType.create->tart.core.Object"(%tart.reflect.ComplexType*) nounwind readnone
 
-!0 = metadata !{i32 458769, i32 0, i32 1, metadata !"sm.c", metadata !"/Volumes/MacOS9/tests/", metadata !"4.2.1 (Based on Apple Inc. build 5658) (LLVM build)", i1 true, i1 false, metadata !"", i32 0} ; [ DW_TAG_compile_unit ]
-!1 = metadata !{i32 458790, metadata !0, metadata !"", metadata !0, i32 0, i64 192, i64 64, i64 0, i32 0, metadata !2} ; [ DW_TAG_const_type ]
-!2 = metadata !{i32 458771, metadata !0, metadata !"C", metadata !0, i32 1, i64 192, i64 64, i64 0, i32 0, null, metadata !3, i32 0, null} ; [ DW_TAG_structure_type ]
-!3 = metadata !{metadata !4, metadata !6, metadata !7}
-!4 = metadata !{i32 458765, metadata !2, metadata !"x", metadata !0, i32 1, i64 64, i64 64, i64 0, i32 0, metadata !5} ; [ DW_TAG_member ]
-!5 = metadata !{i32 458788, metadata !0, metadata !"double", metadata !0, i32 0, i64 64, i64 64, i64 0, i32 0, i32 4} ; [ DW_TAG_base_type ]
-!6 = metadata !{i32 458765, metadata !2, metadata !"y", metadata !0, i32 1, i64 64, i64 64, i64 64, i32 0, metadata !5} ; [ DW_TAG_member ]
-!7 = metadata !{i32 458765, metadata !2, metadata !"z", metadata !0, i32 1, i64 64, i64 64, i64 128, i32 0, metadata !5} ; [ DW_TAG_member ]
-!8 = metadata !{i32 459008, metadata !9, metadata !"t", metadata !0, i32 5, metadata !2} ; [ DW_TAG_auto_variable ]
-!9 = metadata !{i32 458763, metadata !10}        ; [ DW_TAG_lexical_block ]
-!10 = metadata !{i32 458798, i32 0, metadata !0, metadata !"foo", metadata !"foo", metadata !"foo", metadata !0, i32 4, metadata !11, i1 false, i1 true, i32 0, i32 0, null} ; [ DW_TAG_subprogram ]
-!11 = metadata !{i32 458773, metadata !0, metadata !"", metadata !0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !12, i32 0, null} ; [ DW_TAG_subroutine_type ]
-!12 = metadata !{metadata !13}
-!13 = metadata !{i32 458788, metadata !0, metadata !"int", metadata !0, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
-!14 = metadata !{%tart.reflect.ComplexType* @.type.SwitchStmtTest}
+!llvm.dbg.sp = !{!0}
+
+!0 = metadata !{i32 589870, i32 0, metadata !1, metadata !"main", metadata !"main", metadata !"", metadata !1, i32 2, metadata !3, i1 false, i1 true, i32 0, i32 0, null, i32 0, i1 false, i32 ()* @main} ; [ DW_TAG_subprogram ]
+!1 = metadata !{i32 589865, metadata !"/tmp/x.c", metadata !"/Users/manav", metadata !2} ; [ DW_TAG_file_type ]
+!2 = metadata !{i32 589841, i32 0, i32 12, metadata !"/tmp/x.c", metadata !"/Users/manav", metadata !"clang version 2.9 (trunk 120996)", i1 true, i1 false, metadata !"", i32 0} ; [ DW_TAG_compile_unit ]
+!3 = metadata !{i32 589845, metadata !1, metadata !"", metadata !1, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !4, i32 0, null} ; [ DW_TAG_subroutine_type ]
+!4 = metadata !{metadata !5}
+!5 = metadata !{i32 589860, metadata !2, metadata !"int", metadata !1, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
+!6 = metadata !{i32 0}
+!7 = metadata !{i32 590080, metadata !8, metadata !"i", metadata !1, i32 3, metadata !5, i32 0} ; [ DW_TAG_auto_variable ]
+!8 = metadata !{i32 589835, metadata !0, i32 2, i32 12, metadata !1, i32 0} ; [ DW_TAG_lexical_block ]
+!9 = metadata !{i32 3, i32 11, metadata !8, null}
+!10 = metadata !{i32 4, i32 2, metadata !8, null}
+
diff --git a/test/CodeGen/CellSPU/2010-04-07-DbgValueOtherTargets.ll b/test/CodeGen/CellSPU/2010-04-07-DbgValueOtherTargets.ll
index 45d53c8c975f..401399face9a 100644
--- a/test/CodeGen/CellSPU/2010-04-07-DbgValueOtherTargets.ll
+++ b/test/CodeGen/CellSPU/2010-04-07-DbgValueOtherTargets.ll
@@ -1,33 +1,28 @@
 ; RUN: llc -O0 -march=cellspu -asm-verbose < %s | FileCheck %s
 ; Check that DEBUG_VALUE comments come through on a variety of targets.
 
-%tart.reflect.ComplexType = type { double, double }
-
-@.type.SwitchStmtTest = constant %tart.reflect.ComplexType { double 3.0, double 2.0 }
-
-define i32 @"main(tart.core.String[])->int32"(i32 %args) {
+define i32 @main() nounwind ssp {
 entry:
 ; CHECK: DEBUG_VALUE
-  tail call void @llvm.dbg.value(metadata !14, i64 0, metadata !8)
-  tail call void @"tart.reflect.ComplexType.create->tart.core.Object"(%tart.reflect.ComplexType* @.type.SwitchStmtTest) ; <%tart.core.Object*> [#uses=2]
-  ret i32 3
+  call void @llvm.dbg.value(metadata !6, i64 0, metadata !7), !dbg !9
+  ret i32 0, !dbg !10
 }
 
+declare void @llvm.dbg.declare(metadata, metadata) nounwind readnone
+
 declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
-declare void @"tart.reflect.ComplexType.create->tart.core.Object"(%tart.reflect.ComplexType*) nounwind readnone
 
-!0 = metadata !{i32 458769, i32 0, i32 1, metadata !"sm.c", metadata !"/Volumes/MacOS9/tests/", metadata !"4.2.1 (Based on Apple Inc. build 5658) (LLVM build)", i1 true, i1 false, metadata !"", i32 0} ; [ DW_TAG_compile_unit ]
-!1 = metadata !{i32 458790, metadata !0, metadata !"", metadata !0, i32 0, i64 192, i64 64, i64 0, i32 0, metadata !2} ; [ DW_TAG_const_type ]
-!2 = metadata !{i32 458771, metadata !0, metadata !"C", metadata !0, i32 1, i64 192, i64 64, i64 0, i32 0, null, metadata !3, i32 0, null} ; [ DW_TAG_structure_type ]
-!3 = metadata !{metadata !4, metadata !6, metadata !7}
-!4 = metadata !{i32 458765, metadata !2, metadata !"x", metadata !0, i32 1, i64 64, i64 64, i64 0, i32 0, metadata !5} ; [ DW_TAG_member ]
-!5 = metadata !{i32 458788, metadata !0, metadata !"double", metadata !0, i32 0, i64 64, i64 64, i64 0, i32 0, i32 4} ; [ DW_TAG_base_type ]
-!6 = metadata !{i32 458765, metadata !2, metadata !"y", metadata !0, i32 1, i64 64, i64 64, i64 64, i32 0, metadata !5} ; [ DW_TAG_member ]
-!7 = metadata !{i32 458765, metadata !2, metadata !"z", metadata !0, i32 1, i64 64, i64 64, i64 128, i32 0, metadata !5} ; [ DW_TAG_member ]
-!8 = metadata !{i32 459008, metadata !9, metadata !"t", metadata !0, i32 5, metadata !2} ; [ DW_TAG_auto_variable ]
-!9 = metadata !{i32 458763, metadata !10}        ; [ DW_TAG_lexical_block ]
-!10 = metadata !{i32 458798, i32 0, metadata !0, metadata !"foo", metadata !"foo", metadata !"foo", metadata !0, i32 4, metadata !11, i1 false, i1 true, i32 0, i32 0, null} ; [ DW_TAG_subprogram ]
-!11 = metadata !{i32 458773, metadata !0, metadata !"", metadata !0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !12, i32 0, null} ; [ DW_TAG_subroutine_type ]
-!12 = metadata !{metadata !13}
-!13 = metadata !{i32 458788, metadata !0, metadata !"int", metadata !0, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
-!14 = metadata !{%tart.reflect.ComplexType* @.type.SwitchStmtTest}
+!llvm.dbg.sp = !{!0}
+
+!0 = metadata !{i32 589870, i32 0, metadata !1, metadata !"main", metadata !"main", metadata !"", metadata !1, i32 2, metadata !3, i1 false, i1 true, i32 0, i32 0, null, i32 0, i1 false, i32 ()* @main} ; [ DW_TAG_subprogram ]
+!1 = metadata !{i32 589865, metadata !"/tmp/x.c", metadata !"/Users/manav", metadata !2} ; [ DW_TAG_file_type ]
+!2 = metadata !{i32 589841, i32 0, i32 12, metadata !"/tmp/x.c", metadata !"/Users/manav", metadata !"clang version 2.9 (trunk 120996)", i1 true, i1 false, metadata !"", i32 0} ; [ DW_TAG_compile_unit ]
+!3 = metadata !{i32 589845, metadata !1, metadata !"", metadata !1, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !4, i32 0, null} ; [ DW_TAG_subroutine_type ]
+!4 = metadata !{metadata !5}
+!5 = metadata !{i32 589860, metadata !2, metadata !"int", metadata !1, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
+!6 = metadata !{i32 0}
+!7 = metadata !{i32 590080, metadata !8, metadata !"i", metadata !1, i32 3, metadata !5, i32 0} ; [ DW_TAG_auto_variable ]
+!8 = metadata !{i32 589835, metadata !0, i32 2, i32 12, metadata !1, i32 0} ; [ DW_TAG_lexical_block ]
+!9 = metadata !{i32 3, i32 11, metadata !8, null}
+!10 = metadata !{i32 4, i32 2, metadata !8, null}
+
diff --git a/test/CodeGen/CellSPU/arg_ret.ll b/test/CodeGen/CellSPU/arg_ret.ll
index 743292a58d59..7410b724d6fc 100644
--- a/test/CodeGen/CellSPU/arg_ret.ll
+++ b/test/CodeGen/CellSPU/arg_ret.ll
@@ -26,7 +26,8 @@ define ccc i32 @test_regs_and_stack( %paramstruct %prm, i32 %stackprm )
 
 define ccc %paramstruct @test_return( i32 %param,  %paramstruct %prm )
 {
-;CHECK:  lqd	$75, 80($sp)
+;CHECK:  lqd	{{\$[0-9]+}}, 80($sp)
+;CHECK-NOT:	ori	{{\$[0-9]+, \$[0-9]+, 0}}
 ;CHECK:  lr    $3, $4
   ret %paramstruct %prm
 }
diff --git a/test/CodeGen/CellSPU/div_ops.ll b/test/CodeGen/CellSPU/div_ops.ll
new file mode 100644
index 000000000000..0c93d83ca76d
--- /dev/null
+++ b/test/CodeGen/CellSPU/div_ops.ll
@@ -0,0 +1,22 @@
+; RUN: llc --march=cellspu %s -o - | FileCheck %s
+
+; signed division rounds towards zero, rotma don't.
+define i32 @sdivide (i32 %val )
+{
+; CHECK: rotmai
+; CHECK: rotmi
+; CHECK: a
+; CHECK: rotmai
+; CHECK: bi $lr
+   %rv = sdiv i32 %val, 4
+   ret i32 %rv
+}
+
+define i32 @udivide (i32 %val )
+{
+; CHECK: rotmi
+; CHECK: bi $lr
+   %rv = udiv i32 %val, 4
+   ret i32 %rv
+}
+
diff --git a/test/CodeGen/CellSPU/fcmp32.ll b/test/CodeGen/CellSPU/fcmp32.ll
index f07fe6fdab28..c14fd7ba4a46 100644
--- a/test/CodeGen/CellSPU/fcmp32.ll
+++ b/test/CodeGen/CellSPU/fcmp32.ll
@@ -1,9 +1,4 @@
-; RUN: llc < %s -march=cellspu > %t1.s
-; RUN: grep fceq  %t1.s | count 1
-; RUN: grep fcmeq %t1.s | count 1
-
-target datalayout = "E-p:32:32:128-f64:64:128-f32:32:128-i64:32:128-i32:32:128-i16:16:128-i8:8:128-i1:8:128-a0:0:128-v128:128:128-s0:128:128"
-target triple = "spu"
+; RUN: llc --march=cellspu %s -o - | FileCheck %s
 
 ; Exercise the floating point comparison operators for f32:
 
@@ -11,13 +6,31 @@ declare double @fabs(double)
 declare float @fabsf(float)
 
 define i1 @fcmp_eq(float %arg1, float %arg2) {
+; CHECK: fceq
+; CHECK: bi $lr
         %A = fcmp oeq float %arg1,  %arg2
         ret i1 %A
 }
 
 define i1 @fcmp_mag_eq(float %arg1, float %arg2) {
+; CHECK: fcmeq
+; CHECK: bi $lr
         %1 = call float @fabsf(float %arg1)
         %2 = call float @fabsf(float %arg2)
         %3 = fcmp oeq float %1, %2
         ret i1 %3
 }
+
+define i1 @test_ogt(float %a, float %b) {
+; CHECK: fcgt
+; CHECK: bi $lr
+	%cmp = fcmp ogt float %a, %b
+	ret i1 %cmp
+}
+
+define i1 @test_ugt(float %a, float %b) {
+; CHECK: fcgt
+; CHECK: bi $lr
+	%cmp = fcmp ugt float %a, %b
+	ret i1 %cmp
+}
diff --git a/test/CodeGen/CellSPU/immed32.ll b/test/CodeGen/CellSPU/immed32.ll
index 119f526847ce..8e48f0b52c17 100644
--- a/test/CodeGen/CellSPU/immed32.ll
+++ b/test/CodeGen/CellSPU/immed32.ll
@@ -1,6 +1,6 @@
 ; RUN: llc < %s -march=cellspu > %t1.s
-; RUN: grep ilhu  %t1.s | count 8
-; RUN: grep iohl  %t1.s | count 6
+; RUN: grep ilhu  %t1.s | count 9
+; RUN: grep iohl  %t1.s | count 7
 ; RUN: grep -w il    %t1.s | count 3
 ; RUN: grep 16429 %t1.s | count 1
 ; RUN: grep 63572 %t1.s | count 1
@@ -12,6 +12,7 @@
 ; RUN: grep 49077 %t1.s | count 1
 ; RUN: grep  1267 %t1.s | count 2
 ; RUN: grep 16309 %t1.s | count 1
+; RUN: cat %t1.s | FileCheck %s
 target datalayout = "E-p:32:32:128-f64:64:128-f32:32:128-i64:32:128-i32:32:128-i16:16:128-i8:8:128-i1:8:128-a0:0:128-v128:128:128-s0:128:128"
 target triple = "spu"
 
@@ -31,6 +32,16 @@ define i32 @test_4() {
   ret i32 -512                  ;; IL via pattern
 }
 
+define i32 @test_5()
+{
+;CHECK: test_5:
+;CHECK-NOT: ila $3, 40000
+;CHECK: ilhu
+;CHECK: iohl
+;CHECK: bi $lr
+  ret i32 400000
+}
+
 ;; double             float       floatval
 ;; 0x4005bf0a80000000 0x402d|f854 2.718282
 define float @float_const_1() {
diff --git a/test/CodeGen/CellSPU/loads.ll b/test/CodeGen/CellSPU/loads.ll
index d40217dacfea..03d7ad1153a1 100644
--- a/test/CodeGen/CellSPU/loads.ll
+++ b/test/CodeGen/CellSPU/loads.ll
@@ -38,3 +38,15 @@ define <4 x float> @load_undef(){
 	%val = load <4 x float>* undef
 	ret <4 x float> %val
 }
+
+;check that 'misaligned' loads that may span two memory chunks
+;have two loads. Don't check for the bitmanipulation, as that 
+;might change with improved algorithms or scheduling 
+define i32 @load_misaligned( i32* %ptr ){
+;CHECK: load_misaligned
+;CHECK: lqd
+;CHECK: lqd
+;CHECK: bi $lr
+  %rv = load i32* %ptr, align 2
+  ret i32 %rv
+}
diff --git a/test/CodeGen/CellSPU/rotate_ops.ll b/test/CodeGen/CellSPU/rotate_ops.ll
index a504c002ae12..e1172089c703 100644
--- a/test/CodeGen/CellSPU/rotate_ops.ll
+++ b/test/CodeGen/CellSPU/rotate_ops.ll
@@ -1,5 +1,5 @@
 ; RUN: llc < %s -march=cellspu -o %t1.s
-; RUN: grep rot          %t1.s | count 85
+; RUN: grep rot          %t1.s | count 86
 ; RUN: grep roth         %t1.s | count 8
 ; RUN: grep roti.*5      %t1.s | count 1
 ; RUN: grep roti.*27     %t1.s | count 1
@@ -8,6 +8,7 @@
 ; RUN grep rothi.*,.3    %t1.s | count 1
 ; RUN: grep andhi        %t1.s | count 4
 ; RUN: grep shlhi        %t1.s | count 4
+; RUN: cat %t1.s | FileCheck %s
 
 target datalayout = "E-p:32:32:128-f64:64:128-f32:32:128-i64:32:128-i32:32:128-i16:16:128-i8:8:128-i1:8:128-a0:0:128-v128:128:128-s0:128:128"
 target triple = "spu"
@@ -158,3 +159,14 @@ define i8 @rotri8(i8 %A) {
         %D = or i8 %B, %C               ; <i8> [#uses=1]
         ret i8 %D
 }
+
+define <2 x float> @test1(<4 x float> %param )
+{
+; CHECK: test1
+; CHECK: rotqbyi
+  %el = extractelement <4 x float> %param, i32 1
+  %vec1 = insertelement <1 x float> undef, float %el, i32 0
+  %rv = shufflevector <1 x float> %vec1, <1 x float> undef, <2 x i32><i32 0,i32 0>
+; CHECK: bi $lr
+  ret <2 x float> %rv
+} 
diff --git a/test/CodeGen/CellSPU/sext128.ll b/test/CodeGen/CellSPU/sext128.ll
index 0c0b3599b110..6ae9aa51202f 100644
--- a/test/CodeGen/CellSPU/sext128.ll
+++ b/test/CodeGen/CellSPU/sext128.ll
@@ -12,8 +12,9 @@ entry:
 ; CHECK: 	long	269488144
 ; CHECK:	long	66051
 ; CHECK: 	long	67438087
-; CHECK: 	rotmai
+; CHECK-NOT: rotqmbyi
 ; CHECK:	lqa
+; CHECK: 	rotmai
 ; CHECK:	shufb
 }
 
@@ -25,8 +26,9 @@ entry:
 ; CHECK: 	long	269488144
 ; CHECK: 	long	269488144
 ; CHECK:	long	66051
-; CHECK: 	rotmai
+; CHECK-NOT: rotqmbyi
 ; CHECK:	lqa
+; CHECK: 	rotmai
 ; CHECK:	shufb
 }
 
@@ -39,9 +41,31 @@ entry:
 ; CHECK: 	long	269488144
 ; CHECK: 	long	269488144
 ; CHECK:	long	66051
-; CHECK: 	rotmai
+; CHECK-NOT: rotqmbyi
 ; CHECK:	lqa
+; CHECK: 	rotmai
 ; CHECK:	shufb
 }
 
 declare i32 @myfunc(float)
+
+define i128 @func1(i8 %u) {
+entry:
+; CHECK: xsbh
+; CHECK: xshw
+; CHECK: rotmai
+; CHECK: shufb
+; CHECK: bi $lr
+      %0 = sext i8 %u to i128
+      ret i128 %0
+}
+
+define i128 @func2(i16 %u) {
+entry:
+; CHECK: xshw
+; CHECK: rotmai
+; CHECK: shufb
+; CHECK: bi $lr
+      %0 = sext i16 %u to i128
+      ret i128 %0
+}
diff --git a/test/CodeGen/CellSPU/shift_ops.ll b/test/CodeGen/CellSPU/shift_ops.ll
index 0264fc830ea8..92390abf9465 100644
--- a/test/CodeGen/CellSPU/shift_ops.ll
+++ b/test/CodeGen/CellSPU/shift_ops.ll
@@ -4,17 +4,18 @@
 ; RUN: grep {shl	}  %t1.s | count 9
 ; RUN: grep {shli	}  %t1.s | count 3
 ; RUN: grep {xshw	}  %t1.s | count 5
-; RUN: grep {and	}  %t1.s | count 5
+; RUN: grep {and	}  %t1.s | count 14
 ; RUN: grep {andi	}  %t1.s | count 2
 ; RUN: grep {rotmi	}  %t1.s | count 2
 ; RUN: grep {rotqmbyi	}  %t1.s | count 1
 ; RUN: grep {rotqmbii	}  %t1.s | count 2
 ; RUN: grep {rotqmby	}  %t1.s | count 1
-; RUN: grep {rotqmbi	}  %t1.s | count 1
+; RUN: grep {rotqmbi	}  %t1.s | count 2
 ; RUN: grep {rotqbyi	}  %t1.s | count 1
 ; RUN: grep {rotqbii	}  %t1.s | count 2
 ; RUN: grep {rotqbybi	}  %t1.s | count 1
-; RUN: grep {sfi	}  %t1.s | count 3
+; RUN: grep {sfi	}  %t1.s | count 4
+; RUN: cat %t1.s | FileCheck %s
 
 target datalayout = "E-p:32:32:128-f64:64:128-f32:32:128-i64:32:128-i32:32:128-i16:16:128-i8:8:128-i1:8:128-a0:0:128-v128:128:128-s0:128:128"
 target triple = "spu"
@@ -281,3 +282,14 @@ define i32 @hi32_i64(i64 %arg) {
 	%2 = trunc i64 %1 to i32
 	ret i32 %2
 }
+
+; some random tests
+define i128 @test_lshr_i128( i128 %val ) {
+ 	;CHECK: test_lshr_i128
+	;CHECK: sfi
+	;CHECK: rotqmbi
+	;CHECK: rotqmbybi
+	;CHECK: bi $lr
+	%rv = lshr i128 %val, 64
+	ret i128 %rv
+}
diff --git a/test/CodeGen/CellSPU/shuffles.ll b/test/CodeGen/CellSPU/shuffles.ll
index f37d2ae89b00..c88a258c26c7 100644
--- a/test/CodeGen/CellSPU/shuffles.ll
+++ b/test/CodeGen/CellSPU/shuffles.ll
@@ -1,4 +1,4 @@
-; RUN: llc --march=cellspu < %s | FileCheck %s
+; RUN: llc -O1  --march=cellspu < %s | FileCheck %s
 
 define <4 x float> @shuffle(<4 x float> %param1, <4 x float> %param2) {
   ; CHECK: cwd {{\$.}}, 0($sp)
@@ -39,3 +39,29 @@ define <4 x float>  @test_insert_1(<4 x float> %vparam, float %eltparam) {
   ret <4 x float> %rv
 }
 
+define <2 x i32> @test_v2i32(<4 x i32>%vec)
+{
+;CHECK: rotqbyi $3, $3, 4
+;CHECK: bi $lr
+  %rv = shufflevector <4 x i32> %vec, <4 x i32> undef, <2 x i32><i32 1,i32 2>
+  ret <2 x i32> %rv
+}
+
+define <4 x i32> @test_v4i32_rot8(<4 x i32>%vec)
+{
+;CHECK: rotqbyi $3, $3, 8
+;CHECK: bi $lr
+  %rv = shufflevector <4 x i32> %vec, <4 x i32> undef, 
+        <4 x i32> <i32 2,i32 3,i32 0, i32 1>
+  ret <4 x i32> %rv
+}
+
+define <4 x i32> @test_v4i32_rot4(<4 x i32>%vec)
+{
+;CHECK: rotqbyi $3, $3, 4
+;CHECK: bi $lr
+  %rv = shufflevector <4 x i32> %vec, <4 x i32> undef, 
+        <4 x i32> <i32 1,i32 2,i32 3, i32 0>
+  ret <4 x i32> %rv
+}
+
diff --git a/test/CodeGen/CellSPU/stores.ll b/test/CodeGen/CellSPU/stores.ll
index 05f44f4be046..7e0bf06b4e45 100644
--- a/test/CodeGen/CellSPU/stores.ll
+++ b/test/CodeGen/CellSPU/stores.ll
@@ -14,6 +14,7 @@
 ; RUN: grep iohl                %t1.s | count 8
 ; RUN: grep shufb               %t1.s | count 15
 ; RUN: grep frds                %t1.s | count 1
+; RUN: llc < %s -march=cellspu | FileCheck %s
 
 ; ModuleID = 'stores.bc'
 target datalayout = "E-p:32:32:128-f64:64:128-f32:32:128-i64:32:128-i32:32:128-i16:16:128-i8:8:128-i1:8:128-a0:0:128-v128:128:128-s0:128:128"
@@ -149,3 +150,24 @@ entry:
 	store float %conv, float* %dest
 	ret float %conv
 }
+
+;Check stores that might span two 16 byte memory blocks
+define void @store_misaligned( i32 %val, i32* %ptr) {	
+;CHECK: store_misaligned
+;CHECK: lqd
+;CHECK: lqd
+;CHECK: stqd
+;CHECK: stqd
+;CHECK: bi $lr
+	store i32 %val, i32*%ptr, align 2
+	ret void
+}
+
+define void @store_v8( <8 x float> %val, <8 x float>* %ptr )
+{
+;CHECK: stq
+;CHECK: stq
+;CHECK: bi $lr
+	store <8 x float> %val, <8 x float>* %ptr
+	ret void
+}
diff --git a/test/CodeGen/CellSPU/v2f32.ll b/test/CodeGen/CellSPU/v2f32.ll
index b81c0cdbb299..efd032031002 100644
--- a/test/CodeGen/CellSPU/v2f32.ll
+++ b/test/CodeGen/CellSPU/v2f32.ll
@@ -62,8 +62,7 @@ define %vec @test_insert(){
 }
 
 define void @test_unaligned_store()  {
-;CHECK:	cdd	$3, 8($3)
-;CHECK: 	lqd	
+;CHECK:	cdd
 ;CHECK:	shufb
 ;CHECK:	stqd
   %data = alloca [4 x float], align 16         ; <[4 x float]*> [#uses=1]
diff --git a/test/CodeGen/CellSPU/v2i32.ll b/test/CodeGen/CellSPU/v2i32.ll
index dd51be5a71d2..71d4aba63332 100644
--- a/test/CodeGen/CellSPU/v2i32.ll
+++ b/test/CodeGen/CellSPU/v2i32.ll
@@ -37,9 +37,8 @@ define %vec @test_mul(%vec %param)
 }
 
 define <2 x i32> @test_splat(i32 %param ) {
-;TODO insertelement transforms to a PREFSLOT2VEC, that trasforms to the 
-;     somewhat redundant: 
-;CHECK-NOT or $3, $3, $3
+;see svn log for why this is here...
+;CHECK-NOT: or $3, $3, $3
 ;CHECK: lqa
 ;CHECK: shufb
   %sv = insertelement <1 x i32> undef, i32 %param, i32 0 
@@ -62,3 +61,17 @@ define void @test_store( %vec %val, %vec* %ptr)
   store %vec %val, %vec* %ptr
   ret void
 }
+
+;Alignment of <2 x i32> is not *directly* defined in the ABI
+;It probably is safe to interpret it as an array, thus having 8 byte
+;alignment (according to ABI). This tests that the size of
+;[2 x <2 x i32>] is 16 bytes, i.e. there is no padding between the
+;two arrays
+define <2 x i32>* @test_alignment( [2 x <2 x i32>]* %ptr)
+{
+; CHECK-NOT:	ai	$3, $3, 16
+; CHECK:	ai	$3, $3, 8
+; CHECK:	bi	$lr
+   %rv = getelementptr [2 x <2 x i32>]* %ptr, i32 0, i32 1
+   ret <2 x i32>* %rv
+}
diff --git a/test/CodeGen/Generic/2010-11-04-BigByval.ll b/test/CodeGen/Generic/2010-11-04-BigByval.ll
new file mode 100644
index 000000000000..df2ca4c18a05
--- /dev/null
+++ b/test/CodeGen/Generic/2010-11-04-BigByval.ll
@@ -0,0 +1,11 @@
+; RUN: llc < %s
+; PR7170
+
+%big = type [131072 x i8]
+
+declare void @foo(%big* byval align 1)
+
+define void @bar(%big* byval align 1 %x) {
+  call void @foo(%big* byval align 1 %x)
+  ret void
+}
diff --git a/test/CodeGen/Generic/2011-01-06-BigNumberCrash.ll b/test/CodeGen/Generic/2011-01-06-BigNumberCrash.ll
new file mode 100644
index 000000000000..05fdf4c7449b
--- /dev/null
+++ b/test/CodeGen/Generic/2011-01-06-BigNumberCrash.ll
@@ -0,0 +1,15 @@
+; RUN: llc < %s
+; PR8582
+
+define void @uint82() nounwind {
+entry:
+  %tmp3 = select i1 undef, i960 4872657003430991806293355221650511486142000513558154090491761976385142772940676648094983476628187266917101386048750715027104076737938178423519545241493072038894065019132638919037781494702597609951702322267198307200588774905587225212622510286498675097141625012190497682454879271766334636032, i960 0
+  br i1 undef, label %for.body25.for.body25_crit_edge, label %if.end
+
+for.body25.for.body25_crit_edge:                  ; preds = %entry
+  %ins = or i960 %tmp3, undef
+  ret void
+
+if.end:                                           ; preds = %entry
+  ret void
+}
diff --git a/test/CodeGen/Generic/2011-02-12-shuffle.ll b/test/CodeGen/Generic/2011-02-12-shuffle.ll
new file mode 100644
index 000000000000..b4d56d193ca3
--- /dev/null
+++ b/test/CodeGen/Generic/2011-02-12-shuffle.ll
@@ -0,0 +1,32 @@
+; RUN: llc < %s
+; PR9165
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f80:128:128-v64:64:64-v128:128:128-a0:0:64-f80:32:32-n8:16:32"
+target triple = "i686-pc-win32"
+
+define void @m_387() nounwind {
+entry:
+  br i1 undef, label %if.end, label %UnifiedReturnBlock
+
+if.end:                                           ; preds = %entry
+  %tmp1067 = load <16 x i32> addrspace(1)* null, align 64
+  %tmp1082 = shufflevector         <16 x i32> <i32 0, i32 0, i32 0, i32 undef, i32 undef, i32 0, i32 0, i32 undef, i32 0, i32 0, i32 undef, i32 undef, i32 0, i32 undef, i32 undef, i32 undef>, 
+                                                                                                                <16 x i32> %tmp1067, 
+                                                                                                                <16 x i32> <i32 0, i32 1, i32 2, i32 undef, i32 26, i32 5, i32 6, i32 undef, i32 8, i32 9, i32 31, i32 30, i32 12, i32 undef, i32 undef, i32 undef>
+  
+  %tmp1100 = shufflevector         <16 x i32> %tmp1082, 
+                                                                                                                <16 x i32> %tmp1067, 
+                                                                                                                <16 x i32> <i32 0, i32 1, i32 2, i32 undef, i32 4, i32 5, i32 6, i32 18, i32 8, i32 9, i32 10, i32 11, i32 12, i32 25, i32 undef, i32 17>
+  
+  %tmp1112 = shufflevector         <16 x i32> %tmp1100, 
+                                                                                                                <16 x i32> %tmp1067, 
+                                                                                                                <16 x i32> <i32 0, i32 1, i32 2, i32 24, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 18, i32 15>
+  
+  store <16 x i32> %tmp1112, <16 x i32> addrspace(1)* undef, align 64
+  
+  ret void
+
+UnifiedReturnBlock:                               ; preds = %entry
+  ret void
+}
+
diff --git a/test/CodeGen/Generic/add-with-overflow-128.ll b/test/CodeGen/Generic/add-with-overflow-128.ll
index c46c820a7907..33f44d6e4436 100644
--- a/test/CodeGen/Generic/add-with-overflow-128.ll
+++ b/test/CodeGen/Generic/add-with-overflow-128.ll
@@ -3,22 +3,7 @@
 @ok = internal constant [4 x i8] c"%d\0A\00"
 @no = internal constant [4 x i8] c"no\0A\00"
 
-define i1 @func1(i128 signext %v1, i128 signext %v2) nounwind {
-entry:
-  %t = call {i128, i1} @llvm.sadd.with.overflow.i128(i128 %v1, i128 %v2)
-  %sum = extractvalue {i128, i1} %t, 0
-  %sum32 = trunc i128 %sum to i32
-  %obit = extractvalue {i128, i1} %t, 1
-  br i1 %obit, label %overflow, label %normal
-
-normal:
-  %t1 = tail call i32 (i8*, ...)* @printf( i8* getelementptr ([4 x i8]* @ok, i32 0, i32 0), i32 %sum32 ) nounwind
-  ret i1 true
 
-overflow:
-  %t2 = tail call i32 (i8*, ...)* @printf( i8* getelementptr ([4 x i8]* @no, i32 0, i32 0) ) nounwind
-  ret i1 false
-}
 
 define i1 @func2(i128 zeroext %v1, i128 zeroext %v2) nounwind {
 entry:
@@ -38,5 +23,12 @@ carry:
 }
 
 declare i32 @printf(i8*, ...) nounwind
-declare {i128, i1} @llvm.sadd.with.overflow.i128(i128, i128)
+declare {i96, i1} @llvm.sadd.with.overflow.i96(i96, i96)
 declare {i128, i1} @llvm.uadd.with.overflow.i128(i128, i128)
+
+define i1 @func1(i96 signext %v1, i96 signext %v2) nounwind {
+entry:
+  %t = call {i96, i1} @llvm.sadd.with.overflow.i96(i96 %v1, i96 %v2)
+  %obit = extractvalue {i96, i1} %t, 1
+  ret i1 %obit
+}
diff --git a/test/CodeGen/Generic/crash.ll b/test/CodeGen/Generic/crash.ll
index 7218565617fc..042739884df7 100644
--- a/test/CodeGen/Generic/crash.ll
+++ b/test/CodeGen/Generic/crash.ll
@@ -6,3 +6,35 @@
 @tags = global [1 x %struct.AVCodecTag*] [%struct.AVCodecTag* getelementptr
 inbounds ([0 x %struct.AVCodecTag]* @ff_codec_bmp_tags, i32 0, i32 0)]
 
+
+; rdar://8878965
+
+%struct.CAMERA = type { [3 x double], [3 x double], [3 x double], [3 x double], [3 x double], [3 x double], double, double, i32, double, double, i32, double, i32* }
+
+define void @Parse_Camera(%struct.CAMERA** nocapture %Camera_Ptr) nounwind {
+entry:
+%.pre = load %struct.CAMERA** %Camera_Ptr, align 4
+%0 = getelementptr inbounds %struct.CAMERA* %.pre, i32 0, i32 1, i32 0
+%1 = getelementptr inbounds %struct.CAMERA* %.pre, i32 0, i32 1, i32 2
+br label %bb32
+
+bb32:                                             ; preds = %bb6
+%2 = load double* %0, align 4
+%3 = load double* %1, align 4
+%4 = load double* %0, align 4
+call void @Parse_Vector(double* %0) nounwind
+%5 = call i32 @llvm.objectsize.i32(i8* undef, i1 false)
+%6 = icmp eq i32 %5, -1
+br i1 %6, label %bb34, label %bb33
+
+bb33:                                             ; preds = %bb32
+unreachable
+
+bb34:                                             ; preds = %bb32
+unreachable
+
+}
+
+declare void @Parse_Vector(double*)
+declare i32 @llvm.objectsize.i32(i8*, i1)
+
diff --git a/test/CodeGen/Generic/overflow.ll b/test/CodeGen/Generic/overflow.ll
new file mode 100644
index 000000000000..4196855c4ee7
--- /dev/null
+++ b/test/CodeGen/Generic/overflow.ll
@@ -0,0 +1,220 @@
+; RUN: llc < %s
+; Verify codegen's don't crash on overflow intrinsics.
+
+;; SADD
+
+define zeroext i8 @sadd_i8(i8 signext %a, i8 signext %b) nounwind ssp {
+entry:
+  %sadd = tail call { i8, i1 } @llvm.sadd.with.overflow.i8(i8 %a, i8 %b)
+  %cmp = extractvalue { i8, i1 } %sadd, 1
+  %sadd.result = extractvalue { i8, i1 } %sadd, 0
+  %X = select i1 %cmp, i8 %sadd.result, i8 42
+  ret i8 %X
+}
+
+declare { i8, i1 } @llvm.sadd.with.overflow.i8(i8, i8) nounwind readnone
+
+define zeroext i16 @sadd_i16(i16 signext %a, i16 signext %b) nounwind ssp {
+entry:
+  %sadd = tail call { i16, i1 } @llvm.sadd.with.overflow.i16(i16 %a, i16 %b)
+  %cmp = extractvalue { i16, i1 } %sadd, 1
+  %sadd.result = extractvalue { i16, i1 } %sadd, 0
+  %X = select i1 %cmp, i16 %sadd.result, i16 42
+  ret i16 %X
+}
+
+declare { i16, i1 } @llvm.sadd.with.overflow.i16(i16, i16) nounwind readnone
+
+define zeroext i32 @sadd_i32(i32 signext %a, i32 signext %b) nounwind ssp {
+entry:
+  %sadd = tail call { i32, i1 } @llvm.sadd.with.overflow.i32(i32 %a, i32 %b)
+  %cmp = extractvalue { i32, i1 } %sadd, 1
+  %sadd.result = extractvalue { i32, i1 } %sadd, 0
+  %X = select i1 %cmp, i32 %sadd.result, i32 42
+  ret i32 %X
+}
+
+declare { i32, i1 } @llvm.sadd.with.overflow.i32(i32, i32) nounwind readnone
+
+
+;; UADD
+
+define zeroext i8 @uadd_i8(i8 signext %a, i8 signext %b) nounwind ssp {
+entry:
+  %uadd = tail call { i8, i1 } @llvm.uadd.with.overflow.i8(i8 %a, i8 %b)
+  %cmp = extractvalue { i8, i1 } %uadd, 1
+  %uadd.result = extractvalue { i8, i1 } %uadd, 0
+  %X = select i1 %cmp, i8 %uadd.result, i8 42
+  ret i8 %X
+}
+
+declare { i8, i1 } @llvm.uadd.with.overflow.i8(i8, i8) nounwind readnone
+
+define zeroext i16 @uadd_i16(i16 signext %a, i16 signext %b) nounwind ssp {
+entry:
+  %uadd = tail call { i16, i1 } @llvm.uadd.with.overflow.i16(i16 %a, i16 %b)
+  %cmp = extractvalue { i16, i1 } %uadd, 1
+  %uadd.result = extractvalue { i16, i1 } %uadd, 0
+  %X = select i1 %cmp, i16 %uadd.result, i16 42
+  ret i16 %X
+}
+
+declare { i16, i1 } @llvm.uadd.with.overflow.i16(i16, i16) nounwind readnone
+
+define zeroext i32 @uadd_i32(i32 signext %a, i32 signext %b) nounwind ssp {
+entry:
+  %uadd = tail call { i32, i1 } @llvm.uadd.with.overflow.i32(i32 %a, i32 %b)
+  %cmp = extractvalue { i32, i1 } %uadd, 1
+  %uadd.result = extractvalue { i32, i1 } %uadd, 0
+  %X = select i1 %cmp, i32 %uadd.result, i32 42
+  ret i32 %X
+}
+
+declare { i32, i1 } @llvm.uadd.with.overflow.i32(i32, i32) nounwind readnone
+
+
+
+;; ssub
+
+define zeroext i8 @ssub_i8(i8 signext %a, i8 signext %b) nounwind ssp {
+entry:
+  %ssub = tail call { i8, i1 } @llvm.ssub.with.overflow.i8(i8 %a, i8 %b)
+  %cmp = extractvalue { i8, i1 } %ssub, 1
+  %ssub.result = extractvalue { i8, i1 } %ssub, 0
+  %X = select i1 %cmp, i8 %ssub.result, i8 42
+  ret i8 %X
+}
+
+declare { i8, i1 } @llvm.ssub.with.overflow.i8(i8, i8) nounwind readnone
+
+define zeroext i16 @ssub_i16(i16 signext %a, i16 signext %b) nounwind ssp {
+entry:
+  %ssub = tail call { i16, i1 } @llvm.ssub.with.overflow.i16(i16 %a, i16 %b)
+  %cmp = extractvalue { i16, i1 } %ssub, 1
+  %ssub.result = extractvalue { i16, i1 } %ssub, 0
+  %X = select i1 %cmp, i16 %ssub.result, i16 42
+  ret i16 %X
+}
+
+declare { i16, i1 } @llvm.ssub.with.overflow.i16(i16, i16) nounwind readnone
+
+define zeroext i32 @ssub_i32(i32 signext %a, i32 signext %b) nounwind ssp {
+entry:
+  %ssub = tail call { i32, i1 } @llvm.ssub.with.overflow.i32(i32 %a, i32 %b)
+  %cmp = extractvalue { i32, i1 } %ssub, 1
+  %ssub.result = extractvalue { i32, i1 } %ssub, 0
+  %X = select i1 %cmp, i32 %ssub.result, i32 42
+  ret i32 %X
+}
+
+declare { i32, i1 } @llvm.ssub.with.overflow.i32(i32, i32) nounwind readnone
+
+
+;; usub
+
+define zeroext i8 @usub_i8(i8 signext %a, i8 signext %b) nounwind ssp {
+entry:
+  %usub = tail call { i8, i1 } @llvm.usub.with.overflow.i8(i8 %a, i8 %b)
+  %cmp = extractvalue { i8, i1 } %usub, 1
+  %usub.result = extractvalue { i8, i1 } %usub, 0
+  %X = select i1 %cmp, i8 %usub.result, i8 42
+  ret i8 %X
+}
+
+declare { i8, i1 } @llvm.usub.with.overflow.i8(i8, i8) nounwind readnone
+
+define zeroext i16 @usub_i16(i16 signext %a, i16 signext %b) nounwind ssp {
+entry:
+  %usub = tail call { i16, i1 } @llvm.usub.with.overflow.i16(i16 %a, i16 %b)
+  %cmp = extractvalue { i16, i1 } %usub, 1
+  %usub.result = extractvalue { i16, i1 } %usub, 0
+  %X = select i1 %cmp, i16 %usub.result, i16 42
+  ret i16 %X
+}
+
+declare { i16, i1 } @llvm.usub.with.overflow.i16(i16, i16) nounwind readnone
+
+define zeroext i32 @usub_i32(i32 signext %a, i32 signext %b) nounwind ssp {
+entry:
+  %usub = tail call { i32, i1 } @llvm.usub.with.overflow.i32(i32 %a, i32 %b)
+  %cmp = extractvalue { i32, i1 } %usub, 1
+  %usub.result = extractvalue { i32, i1 } %usub, 0
+  %X = select i1 %cmp, i32 %usub.result, i32 42
+  ret i32 %X
+}
+
+declare { i32, i1 } @llvm.usub.with.overflow.i32(i32, i32) nounwind readnone
+
+
+
+;; smul
+
+define zeroext i8 @smul_i8(i8 signext %a, i8 signext %b) nounwind ssp {
+entry:
+  %smul = tail call { i8, i1 } @llvm.smul.with.overflow.i8(i8 %a, i8 %b)
+  %cmp = extractvalue { i8, i1 } %smul, 1
+  %smul.result = extractvalue { i8, i1 } %smul, 0
+  %X = select i1 %cmp, i8 %smul.result, i8 42
+  ret i8 %X
+}
+
+declare { i8, i1 } @llvm.smul.with.overflow.i8(i8, i8) nounwind readnone
+
+define zeroext i16 @smul_i16(i16 signext %a, i16 signext %b) nounwind ssp {
+entry:
+  %smul = tail call { i16, i1 } @llvm.smul.with.overflow.i16(i16 %a, i16 %b)
+  %cmp = extractvalue { i16, i1 } %smul, 1
+  %smul.result = extractvalue { i16, i1 } %smul, 0
+  %X = select i1 %cmp, i16 %smul.result, i16 42
+  ret i16 %X
+}
+
+declare { i16, i1 } @llvm.smul.with.overflow.i16(i16, i16) nounwind readnone
+
+define zeroext i32 @smul_i32(i32 signext %a, i32 signext %b) nounwind ssp {
+entry:
+  %smul = tail call { i32, i1 } @llvm.smul.with.overflow.i32(i32 %a, i32 %b)
+  %cmp = extractvalue { i32, i1 } %smul, 1
+  %smul.result = extractvalue { i32, i1 } %smul, 0
+  %X = select i1 %cmp, i32 %smul.result, i32 42
+  ret i32 %X
+}
+
+declare { i32, i1 } @llvm.smul.with.overflow.i32(i32, i32) nounwind readnone
+
+
+;; umul
+
+define zeroext i8 @umul_i8(i8 signext %a, i8 signext %b) nounwind ssp {
+entry:
+  %umul = tail call { i8, i1 } @llvm.umul.with.overflow.i8(i8 %a, i8 %b)
+  %cmp = extractvalue { i8, i1 } %umul, 1
+  %umul.result = extractvalue { i8, i1 } %umul, 0
+  %X = select i1 %cmp, i8 %umul.result, i8 42
+  ret i8 %X
+}
+
+declare { i8, i1 } @llvm.umul.with.overflow.i8(i8, i8) nounwind readnone
+
+define zeroext i16 @umul_i16(i16 signext %a, i16 signext %b) nounwind ssp {
+entry:
+  %umul = tail call { i16, i1 } @llvm.umul.with.overflow.i16(i16 %a, i16 %b)
+  %cmp = extractvalue { i16, i1 } %umul, 1
+  %umul.result = extractvalue { i16, i1 } %umul, 0
+  %X = select i1 %cmp, i16 %umul.result, i16 42
+  ret i16 %X
+}
+
+declare { i16, i1 } @llvm.umul.with.overflow.i16(i16, i16) nounwind readnone
+
+define zeroext i32 @umul_i32(i32 signext %a, i32 signext %b) nounwind ssp {
+entry:
+  %umul = tail call { i32, i1 } @llvm.umul.with.overflow.i32(i32 %a, i32 %b)
+  %cmp = extractvalue { i32, i1 } %umul, 1
+  %umul.result = extractvalue { i32, i1 } %umul, 0
+  %X = select i1 %cmp, i32 %umul.result, i32 42
+  ret i32 %X
+}
+
+declare { i32, i1 } @llvm.umul.with.overflow.i32(i32, i32) nounwind readnone
+
diff --git a/test/CodeGen/MBlaze/2010-04-07-DbgValueOtherTargets.ll b/test/CodeGen/MBlaze/2010-04-07-DbgValueOtherTargets.ll
index 854352a70111..d8970eac9007 100644
--- a/test/CodeGen/MBlaze/2010-04-07-DbgValueOtherTargets.ll
+++ b/test/CodeGen/MBlaze/2010-04-07-DbgValueOtherTargets.ll
@@ -1,33 +1,28 @@
 ; RUN: llc -O0 -march=mblaze -asm-verbose < %s | FileCheck %s
 ; Check that DEBUG_VALUE comments come through on a variety of targets.
 
-%tart.reflect.ComplexType = type { double, double }
-
-@.type.SwitchStmtTest = constant %tart.reflect.ComplexType { double 3.0, double 2.0 }
-
-define i32 @"main(tart.core.String[])->int32"(i32 %args) {
+define i32 @main() nounwind ssp {
 entry:
 ; CHECK: DEBUG_VALUE
-  tail call void @llvm.dbg.value(metadata !14, i64 0, metadata !8)
-  tail call void @"tart.reflect.ComplexType.create->tart.core.Object"(%tart.reflect.ComplexType* @.type.SwitchStmtTest) ; <%tart.core.Object*> [#uses=2]
-  ret i32 3
+  call void @llvm.dbg.value(metadata !6, i64 0, metadata !7), !dbg !9
+  ret i32 0, !dbg !10
 }
 
+declare void @llvm.dbg.declare(metadata, metadata) nounwind readnone
+
 declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
-declare void @"tart.reflect.ComplexType.create->tart.core.Object"(%tart.reflect.ComplexType*) nounwind readnone
 
-!0 = metadata !{i32 458769, i32 0, i32 1, metadata !"sm.c", metadata !"/Volumes/MacOS9/tests/", metadata !"4.2.1 (Based on Apple Inc. build 5658) (LLVM build)", i1 true, i1 false, metadata !"", i32 0} ; [ DW_TAG_compile_unit ]
-!1 = metadata !{i32 458790, metadata !0, metadata !"", metadata !0, i32 0, i64 192, i64 64, i64 0, i32 0, metadata !2} ; [ DW_TAG_const_type ]
-!2 = metadata !{i32 458771, metadata !0, metadata !"C", metadata !0, i32 1, i64 192, i64 64, i64 0, i32 0, null, metadata !3, i32 0, null} ; [ DW_TAG_structure_type ]
-!3 = metadata !{metadata !4, metadata !6, metadata !7}
-!4 = metadata !{i32 458765, metadata !2, metadata !"x", metadata !0, i32 1, i64 64, i64 64, i64 0, i32 0, metadata !5} ; [ DW_TAG_member ]
-!5 = metadata !{i32 458788, metadata !0, metadata !"double", metadata !0, i32 0, i64 64, i64 64, i64 0, i32 0, i32 4} ; [ DW_TAG_base_type ]
-!6 = metadata !{i32 458765, metadata !2, metadata !"y", metadata !0, i32 1, i64 64, i64 64, i64 64, i32 0, metadata !5} ; [ DW_TAG_member ]
-!7 = metadata !{i32 458765, metadata !2, metadata !"z", metadata !0, i32 1, i64 64, i64 64, i64 128, i32 0, metadata !5} ; [ DW_TAG_member ]
-!8 = metadata !{i32 459008, metadata !9, metadata !"t", metadata !0, i32 5, metadata !2} ; [ DW_TAG_auto_variable ]
-!9 = metadata !{i32 458763, metadata !10}        ; [ DW_TAG_lexical_block ]
-!10 = metadata !{i32 458798, i32 0, metadata !0, metadata !"foo", metadata !"foo", metadata !"foo", metadata !0, i32 4, metadata !11, i1 false, i1 true, i32 0, i32 0, null} ; [ DW_TAG_subprogram ]
-!11 = metadata !{i32 458773, metadata !0, metadata !"", metadata !0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !12, i32 0, null} ; [ DW_TAG_subroutine_type ]
-!12 = metadata !{metadata !13}
-!13 = metadata !{i32 458788, metadata !0, metadata !"int", metadata !0, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
-!14 = metadata !{%tart.reflect.ComplexType* @.type.SwitchStmtTest}
+!llvm.dbg.sp = !{!0}
+
+!0 = metadata !{i32 589870, i32 0, metadata !1, metadata !"main", metadata !"main", metadata !"", metadata !1, i32 2, metadata !3, i1 false, i1 true, i32 0, i32 0, null, i32 0, i1 false, i32 ()* @main} ; [ DW_TAG_subprogram ]
+!1 = metadata !{i32 589865, metadata !"/tmp/x.c", metadata !"/Users/manav", metadata !2} ; [ DW_TAG_file_type ]
+!2 = metadata !{i32 589841, i32 0, i32 12, metadata !"/tmp/x.c", metadata !"/Users/manav", metadata !"clang version 2.9 (trunk 120996)", i1 true, i1 false, metadata !"", i32 0} ; [ DW_TAG_compile_unit ]
+!3 = metadata !{i32 589845, metadata !1, metadata !"", metadata !1, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !4, i32 0, null} ; [ DW_TAG_subroutine_type ]
+!4 = metadata !{metadata !5}
+!5 = metadata !{i32 589860, metadata !2, metadata !"int", metadata !1, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
+!6 = metadata !{i32 0}
+!7 = metadata !{i32 590080, metadata !8, metadata !"i", metadata !1, i32 3, metadata !5, i32 0} ; [ DW_TAG_auto_variable ]
+!8 = metadata !{i32 589835, metadata !0, i32 2, i32 12, metadata !1, i32 0} ; [ DW_TAG_lexical_block ]
+!9 = metadata !{i32 3, i32 11, metadata !8, null}
+!10 = metadata !{i32 4, i32 2, metadata !8, null}
+
diff --git a/test/CodeGen/MBlaze/brind.ll b/test/CodeGen/MBlaze/brind.ll
index 7798e0f56aac..2229a873827a 100644
--- a/test/CodeGen/MBlaze/brind.ll
+++ b/test/CodeGen/MBlaze/brind.ll
@@ -28,32 +28,31 @@ loop:
                              label %L3,
                              label %L4,
                              label %L5 ]
-    ; CHECK:        br {{r[0-9]*}}
+    ; CHECK:        brad {{r[0-9]*}}
 
 L1:
     %tmp.1 = add i32 %a, %b
     br label %finish
-    ; CHECK:        br
+    ; CHECK:        brid
 
 L2:
     %tmp.2 = sub i32 %a, %b
     br label %finish
-    ; CHECK:        br
+    ; CHECK:        brid
 
 L3:
     %tmp.3 = mul i32 %a, %b
     br label %finish
-    ; CHECK:        br
+    ; CHECK:        brid
 
 L4:
     %tmp.4 = sdiv i32 %a, %b
     br label %finish
-    ; CHECK:        br
+    ; CHECK:        brid
 
 L5:
     %tmp.5 = srem i32 %a, %b
     br label %finish
-    ; CHECK:        br
 
 finish:
     %tmp.6 = phi i32 [ %tmp.1, %L1 ],
@@ -69,5 +68,5 @@ finish:
     %tmp.8 = urem i32 %tmp.7, 5
 
     br label %loop
-    ; CHECK:        br
+    ; CHECK:        brad {{r[0-9]*}}
 }
diff --git a/test/CodeGen/MBlaze/cc.ll b/test/CodeGen/MBlaze/cc.ll
index aaa918ffc343..b1eb22aee9fd 100644
--- a/test/CodeGen/MBlaze/cc.ll
+++ b/test/CodeGen/MBlaze/cc.ll
@@ -12,7 +12,7 @@ declare i32 @printf(i8*, ...)
 define void @params0_noret() {
     ; CHECK:        params0_noret:
     ret void
-    ; CHECK-NOT:    {{.* r3, r0, 1}}
+    ; CHECK-NOT:    {{.* r3, .*, .*}}
     ; CHECK-NOT:    {{.* r4, .*, .*}}
     ; CHECK:        rtsd
 }
@@ -20,81 +20,88 @@ define void @params0_noret() {
 define i8 @params0_8bitret() {
     ; CHECK:        params0_8bitret:
     ret i8 1
-    ; CHECK:        {{.* r3, r0, 1}}
+    ; CHECK-NOT:    {{.* r3, .*, .*}}
     ; CHECK-NOT:    {{.* r4, .*, .*}}
     ; CHECK:        rtsd
+    ; CHECK:        {{.* r3, r0, 1}}
 }
 
 define i16 @params0_16bitret() {
     ; CHECK:        params0_16bitret:
     ret i16 1
+    ; CHECK:        rtsd
     ; CHECK:        {{.* r3, r0, 1}}
     ; CHECK-NOT:    {{.* r4, .*, .*}}
-    ; CHECK:        rtsd
 }
 
 define i32 @params0_32bitret() {
     ; CHECK:        params0_32bitret:
     ret i32 1
-    ; CHECK:        {{.* r3, r0, 1}}
     ; CHECK-NOT:    {{.* r4, .*, .*}}
     ; CHECK:        rtsd
+    ; CHECK:        {{.* r3, r0, 1}}
 }
 
 define i64 @params0_64bitret() {
     ; CHECK:        params0_64bitret:
     ret i64 1
     ; CHECK:        {{.* r3, r0, .*}}
-    ; CHECK:        {{.* r4, r0, 1}}
     ; CHECK:        rtsd
+    ; CHECK:        {{.* r4, r0, 1}}
 }
 
 define i32 @params1_32bitret(i32 %a) {
     ; CHECK:        params1_32bitret:
     ret i32 %a
-    ; CHECK:        {{.* r3, r5, r0}}
+    ; CHECK-NOT:    {{.* r3, .*, .*}}
     ; CHECK-NOT:    {{.* r4, .*, .*}}
     ; CHECK:        rtsd
+    ; CHECK:        {{.* r3, r5, r0}}
 }
 
 define i32 @params2_32bitret(i32 %a, i32 %b) {
     ; CHECK:        params2_32bitret:
     ret i32 %b
-    ; CHECK:        {{.* r3, r6, r0}}
+    ; CHECK-NOT:    {{.* r3, .*, .*}}
     ; CHECK-NOT:    {{.* r4, .*, .*}}
     ; CHECK:        rtsd
+    ; CHECK:        {{.* r3, r6, r0}}
 }
 
 define i32 @params3_32bitret(i32 %a, i32 %b, i32 %c) {
     ; CHECK:        params3_32bitret:
     ret i32 %c
-    ; CHECK:        {{.* r3, r7, r0}}
+    ; CHECK-NOT:    {{.* r3, .*, .*}}
     ; CHECK-NOT:    {{.* r4, .*, .*}}
     ; CHECK:        rtsd
+    ; CHECK:        {{.* r3, r7, r0}}
 }
 
 define i32 @params4_32bitret(i32 %a, i32 %b, i32 %c, i32 %d) {
     ; CHECK:        params4_32bitret:
     ret i32 %d
-    ; CHECK:        {{.* r3, r8, r0}}
+    ; CHECK-NOT:    {{.* r3, .*, .*}}
     ; CHECK-NOT:    {{.* r4, .*, .*}}
     ; CHECK:        rtsd
+    ; CHECK:        {{.* r3, r8, r0}}
 }
 
 define i32 @params5_32bitret(i32 %a, i32 %b, i32 %c, i32 %d, i32 %e) {
     ; CHECK:        params5_32bitret:
     ret i32 %e
-    ; CHECK:        {{.* r3, r9, r0}}
+    ; CHECK-NOT:    {{.* r3, .*, .*}}
     ; CHECK-NOT:    {{.* r4, .*, .*}}
     ; CHECK:        rtsd
+    ; CHECK:        {{.* r3, r9, r0}}
 }
 
 define i32 @params6_32bitret(i32 %a, i32 %b, i32 %c, i32 %d, i32 %e, i32 %f) {
     ; CHECK:        params6_32bitret:
     ret i32 %f
-    ; CHECK:        {{.* r3, r10, r0}}
+    ; CHECK-NOT:    {{.* r3, .*, .*}}
     ; CHECK-NOT:    {{.* r4, .*, .*}}
     ; CHECK:        rtsd
+    ; CHECK:        {{.* r3, r10, r0}}
 }
 
 define i32 @params7_32bitret(i32 %a, i32 %b, i32 %c, i32 %d, i32 %e, i32 %f,
@@ -142,53 +149,29 @@ define void @testing() {
     %tmp.1 = call i8 @params0_8bitret()
     ; CHECK:        brlid
     call i32 (i8*,...)* @printf(i8* %MSG.1, i8 %tmp.1)
-    ; CHECK:        {{.* r5, .*, .*}}
-    ; CHECK:        {{.* r6, r3, r0}}
-    ; CHECK-NOT:    {{.* r7, .*, .*}}
-    ; CHECK:        brlid
 
     %tmp.2 = call i16 @params0_16bitret()
     ; CHECK:        brlid
     call i32 (i8*,...)* @printf(i8* %MSG.1, i16 %tmp.2)
-    ; CHECK:        {{.* r5, .*, .*}}
-    ; CHECK:        {{.* r6, r3, r0}}
-    ; CHECK-NOT:    {{.* r7, .*, .*}}
-    ; CHECK:        brlid
 
     %tmp.3 = call i32 @params0_32bitret()
     ; CHECK:        brlid
     call i32 (i8*,...)* @printf(i8* %MSG.1, i32 %tmp.3)
-    ; CHECK:        {{.* r5, .*, .*}}
-    ; CHECK:        {{.* r6, r3, r0}}
-    ; CHECK-NOT:    {{.* r7, .*, .*}}
-    ; CHECK:        brlid
 
     %tmp.4 = call i64 @params0_64bitret()
     ; CHECK:        brlid
     call i32 (i8*,...)* @printf(i8* %MSG.1, i64 %tmp.4)
-    ; CHECK:        {{.* r5, .*, .*}}
-    ; CHECK:        {{.* r6, r3, r0}}
-    ; CHECK:        {{.* r7, r4, r0}}
-    ; CHECK:        brlid
 
     %tmp.5 = call i32 @params1_32bitret(i32 1)
     ; CHECK:        {{.* r5, .*, .*}}
     ; CHECK:        brlid
     call i32 (i8*,...)* @printf(i8* %MSG.1, i32 %tmp.5)
-    ; CHECK:        {{.* r5, .*, .*}}
-    ; CHECK:        {{.* r6, r3, r0}}
-    ; CHECK-NOT:    {{.* r7, .*, .*}}
-    ; CHECK:        brlid
 
     %tmp.6 = call i32 @params2_32bitret(i32 1, i32 2)
     ; CHECK:        {{.* r5, .*, .*}}
     ; CHECK:        {{.* r6, .*, .*}}
     ; CHECK:        brlid
     call i32 (i8*,...)* @printf(i8* %MSG.1, i32 %tmp.6)
-    ; CHECK:        {{.* r5, .*, .*}}
-    ; CHECK:        {{.* r6, r3, r0}}
-    ; CHECK-NOT:    {{.* r7, .*, .*}}
-    ; CHECK:        brlid
 
     %tmp.7 = call i32 @params3_32bitret(i32 1, i32 2, i32 3)
     ; CHECK:        {{.* r5, .*, .*}}
@@ -196,10 +179,6 @@ define void @testing() {
     ; CHECK:        {{.* r7, .*, .*}}
     ; CHECK:        brlid
     call i32 (i8*,...)* @printf(i8* %MSG.1, i32 %tmp.7)
-    ; CHECK:        {{.* r5, .*, .*}}
-    ; CHECK:        {{.* r6, r3, r0}}
-    ; CHECK-NOT:    {{.* r7, .*, .*}}
-    ; CHECK:        brlid
 
     %tmp.8 = call i32 @params4_32bitret(i32 1, i32 2, i32 3, i32 4)
     ; CHECK:        {{.* r5, .*, .*}}
@@ -208,10 +187,6 @@ define void @testing() {
     ; CHECK:        {{.* r8, .*, .*}}
     ; CHECK:        brlid
     call i32 (i8*,...)* @printf(i8* %MSG.1, i32 %tmp.8)
-    ; CHECK:        {{.* r5, .*, .*}}
-    ; CHECK:        {{.* r6, r3, r0}}
-    ; CHECK-NOT:    {{.* r7, .*, .*}}
-    ; CHECK:        brlid
 
     %tmp.9 = call i32 @params5_32bitret(i32 1, i32 2, i32 3, i32 4, i32 5)
     ; CHECK:        {{.* r5, .*, .*}}
@@ -221,10 +196,6 @@ define void @testing() {
     ; CHECK:        {{.* r9, .*, .*}}
     ; CHECK:        brlid
     call i32 (i8*,...)* @printf(i8* %MSG.1, i32 %tmp.9)
-    ; CHECK:        {{.* r5, .*, .*}}
-    ; CHECK:        {{.* r6, r3, r0}}
-    ; CHECK-NOT:    {{.* r7, .*, .*}}
-    ; CHECK:        brlid
 
     %tmp.10 = call i32 @params6_32bitret(i32 1, i32 2, i32 3, i32 4, i32 5,
                                          i32 6)
@@ -236,10 +207,6 @@ define void @testing() {
     ; CHECK:        {{.* r10, .*, .*}}
     ; CHECK:        brlid
     call i32 (i8*,...)* @printf(i8* %MSG.1, i32 %tmp.10)
-    ; CHECK:        {{.* r5, .*, .*}}
-    ; CHECK:        {{.* r6, r3, r0}}
-    ; CHECK-NOT:    {{.* r7, .*, .*}}
-    ; CHECK:        brlid
 
     %tmp.11 = call i32 @params7_32bitret(i32 1, i32 2, i32 3, i32 4, i32 5,
                                          i32 6, i32 7)
@@ -252,10 +219,6 @@ define void @testing() {
     ; CHECK:        {{.* r10, .*, .*}}
     ; CHECK:        brlid
     call i32 (i8*,...)* @printf(i8* %MSG.1, i32 %tmp.11)
-    ; CHECK:        {{.* r5, .*, .*}}
-    ; CHECK:        {{.* r6, r3, r0}}
-    ; CHECK-NOT:    {{.* r7, .*, .*}}
-    ; CHECK:        brlid
 
     %tmp.12 = call i32 @params8_32bitret(i32 1, i32 2, i32 3, i32 4, i32 5,
                                          i32 6, i32 7, i32 8)
@@ -269,10 +232,6 @@ define void @testing() {
     ; CHECK:        {{.* r10, .*, .*}}
     ; CHECK:        brlid
     call i32 (i8*,...)* @printf(i8* %MSG.1, i32 %tmp.12)
-    ; CHECK:        {{.* r5, .*, .*}}
-    ; CHECK:        {{.* r6, r3, r0}}
-    ; CHECK-NOT:    {{.* r7, .*, .*}}
-    ; CHECK:        brlid
 
     %tmp.13 = call i32 @params9_32bitret(i32 1, i32 2, i32 3, i32 4, i32 5,
                                          i32 6, i32 7, i32 8, i32 9)
@@ -287,10 +246,6 @@ define void @testing() {
     ; CHECK:        {{.* r10, .*, .*}}
     ; CHECK:        brlid
     call i32 (i8*,...)* @printf(i8* %MSG.1, i32 %tmp.13)
-    ; CHECK:        {{.* r5, .*, .*}}
-    ; CHECK:        {{.* r6, r3, r0}}
-    ; CHECK-NOT:    {{.* r7, .*, .*}}
-    ; CHECK:        brlid
 
     %tmp.14 = call i32 @params10_32bitret(i32 1, i32 2, i32 3, i32 4, i32 5,
                                           i32 6, i32 7, i32 8, i32 9, i32 10)
@@ -306,10 +261,6 @@ define void @testing() {
     ; CHECK:        {{.* r10, .*, .*}}
     ; CHECK:        brlid
     call i32 (i8*,...)* @printf(i8* %MSG.1, i32 %tmp.14)
-    ; CHECK:        {{.* r5, .*, .*}}
-    ; CHECK:        {{.* r6, r3, r0}}
-    ; CHECK-NOT:    {{.* r7, .*, .*}}
-    ; CHECK:        brlid
 
     ret void
 }
diff --git a/test/CodeGen/MBlaze/fpu.ll b/test/CodeGen/MBlaze/fpu.ll
index 83f4d831241d..2aef4fd64105 100644
--- a/test/CodeGen/MBlaze/fpu.ll
+++ b/test/CodeGen/MBlaze/fpu.ll
@@ -10,14 +10,14 @@ define float @test_add(float %a, float %b) {
     ; FPU:        test_add:
 
     %tmp.1 = fadd float %a, %b
-    ; FUN-NOT:    fadd
     ; FUN:        brlid
     ; FPU-NOT:    brlid
-    ; FPU:        fadd
 
     ret float %tmp.1
     ; FUN:        rtsd
     ; FPU:        rtsd
+    ; FUN-NOT:    fadd
+    ; FPU-NEXT:   fadd
 }
 
 define float @test_sub(float %a, float %b) {
@@ -25,14 +25,14 @@ define float @test_sub(float %a, float %b) {
     ; FPU:        test_sub:
 
     %tmp.1 = fsub float %a, %b
-    ; FUN-NOT:    frsub
     ; FUN:        brlid
     ; FPU-NOT:    brlid
-    ; FPU:        frsub
 
     ret float %tmp.1
     ; FUN:        rtsd
     ; FPU:        rtsd
+    ; FUN-NOT:    frsub
+    ; FPU-NEXT:   frsub
 }
 
 define float @test_mul(float %a, float %b) {
@@ -40,14 +40,14 @@ define float @test_mul(float %a, float %b) {
     ; FPU:        test_mul:
 
     %tmp.1 = fmul float %a, %b
-    ; FUN-NOT:    fmul
     ; FUN:        brlid
     ; FPU-NOT:    brlid
-    ; FPU:        fmul
 
     ret float %tmp.1
     ; FUN:        rtsd
     ; FPU:        rtsd
+    ; FUN-NOT:    fmul
+    ; FPU-NEXT:   fmul
 }
 
 define float @test_div(float %a, float %b) {
@@ -55,12 +55,12 @@ define float @test_div(float %a, float %b) {
     ; FPU:        test_div:
 
     %tmp.1 = fdiv float %a, %b
-    ; FUN-NOT:    fdiv
     ; FUN:        brlid
     ; FPU-NOT:    brlid
-    ; FPU:        fdiv
 
     ret float %tmp.1
     ; FUN:        rtsd
     ; FPU:        rtsd
+    ; FUN-NOT:    fdiv
+    ; FPU-NEXT:   fdiv
 }
diff --git a/test/CodeGen/MBlaze/imm.ll b/test/CodeGen/MBlaze/imm.ll
index 85fad175b77f..6effd3e09a24 100644
--- a/test/CodeGen/MBlaze/imm.ll
+++ b/test/CodeGen/MBlaze/imm.ll
@@ -7,22 +7,22 @@
 
 define i8 @retimm_i8() {
     ; CHECK:        retimm_i8:
-    ; CHECK:        add
-    ; CHECK-NEXT:   rtsd
+    ; CHECK:        rtsd
+    ; CHECK-NEXT:   add
     ; FPU:          retimm_i8:
-    ; FPU:          add
-    ; FPU-NEXT:     rtsd
+    ; FPU:          rtsd
+    ; FPU-NEXT:     add
     ret i8 123
 }
 
 define i16 @retimm_i16() {
     ; CHECK:        retimm_i16:
-    ; CHECK:        add
-    ; CHECK-NEXT:   rtsd
+    ; CHECK:        rtsd
+    ; CHECK-NEXT:   add
     ; FPU:          retimm_i16:
-    ; FPU:          add
-    ; FPU-NEXT:     rtsd
-    ret i16 38212
+    ; FPU:          rtsd
+    ; FPU-NEXT:     add
+    ret i16 31212
 }
 
 define i32 @retimm_i32() {
@@ -38,12 +38,12 @@ define i32 @retimm_i32() {
 define i64 @retimm_i64() {
     ; CHECK:        retimm_i64:
     ; CHECK:        add
-    ; CHECK-NEXT:   add
     ; CHECK-NEXT:   rtsd
+    ; CHECK-NEXT:   add
     ; FPU:          retimm_i64:
     ; FPU:          add
-    ; FPU-NEXT:     add
     ; FPU-NEXT:     rtsd
+    ; FPU-NEXT:     add
     ret i64 94581823
 }
 
@@ -53,7 +53,7 @@ define float @retimm_float() {
     ; CHECK-NEXT:   rtsd
     ; FPU:          retimm_float:
     ; FPU:          or
-    ; FPU:          rtsd
+    ; FPU-NEXT:     rtsd
     ret float 12.0
 }
 
diff --git a/test/CodeGen/MBlaze/intr.ll b/test/CodeGen/MBlaze/intr.ll
new file mode 100644
index 000000000000..79c6bffd00cb
--- /dev/null
+++ b/test/CodeGen/MBlaze/intr.ll
@@ -0,0 +1,48 @@
+; Ensure that the MBlaze interrupt_handler calling convention (cc73) is handled
+; correctly correctly by the MBlaze backend.
+;
+; RUN: llc < %s -march=mblaze | FileCheck %s
+
+@.str = private constant [28 x i8] c"The interrupt has gone off\0A\00"
+@_interrupt_handler = alias void ()* @myintr
+
+define cc73 void @myintr() nounwind noinline {
+  ; CHECK:        myintr:
+  ; CHECK:        swi   r3, r1
+  ; CHECK:        swi   r4, r1
+  ; CHECK:        swi   r5, r1
+  ; CHECK:        swi   r6, r1
+  ; CHECK:        swi   r7, r1
+  ; CHECK:        swi   r8, r1
+  ; CHECK:        swi   r9, r1
+  ; CHECK:        swi   r10, r1
+  ; CHECK:        swi   r11, r1
+  ; CHECK:        swi   r12, r1
+  ; CHECK:        swi   r17, r1
+  ; CHECK:        swi   r18, r1
+  ; CHECK:        mfs   r11, rmsr
+  ; CHECK:        swi   r11, r1
+  entry:
+    %call = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([28 x i8]* @.str, i32 0, i32 0))
+      ret void
+
+  ; CHECK:        lwi   r11, r1
+  ; CHECK:        mts   rmsr, r11
+  ; CHECK:        lwi   r18, r1
+  ; CHECK:        lwi   r17, r1
+  ; CHECK:        lwi   r12, r1
+  ; CHECK:        lwi   r11, r1
+  ; CHECK:        lwi   r10, r1
+  ; CHECK:        lwi   r9, r1
+  ; CHECK:        lwi   r8, r1
+  ; CHECK:        lwi   r7, r1
+  ; CHECK:        lwi   r6, r1
+  ; CHECK:        lwi   r5, r1
+  ; CHECK:        lwi   r4, r1
+  ; CHECK:        lwi   r3, r1
+  ; CHECK:        rtid  r14, 0
+}
+
+  ; CHECK:    .globl  _interrupt_handler
+  ; CHECK:    _interrupt_handler = myintr
+declare i32 @printf(i8*, ...)
diff --git a/test/CodeGen/MBlaze/jumptable.ll b/test/CodeGen/MBlaze/jumptable.ll
index 3f27c12f19a3..299084d0ed23 100644
--- a/test/CodeGen/MBlaze/jumptable.ll
+++ b/test/CodeGen/MBlaze/jumptable.ll
@@ -18,8 +18,8 @@ define i32 @jmptable(i32 %arg)
                                       i32 8, label %L8
                                       i32 9, label %L9 ]
 
-    ; CHECK:        lw [[REG:r[0-9]*]]
-    ; CHECK:        br [[REG]]
+    ; CHECK:        lw   [[REG:r[0-9]*]]
+    ; CHECK:        brad [[REG]]
 L0:
     %var0 = add i32 %arg, 0
     br label %DONE
diff --git a/test/CodeGen/MBlaze/loop.ll b/test/CodeGen/MBlaze/loop.ll
index b473020e6641..8973f75aa1dc 100644
--- a/test/CodeGen/MBlaze/loop.ll
+++ b/test/CodeGen/MBlaze/loop.ll
@@ -27,11 +27,10 @@ loop_inner:
 
 loop_inner_finish:
     %inner.5 = add i32 %inner.2, 1
-    ; CHECK:        addi {{.*, 1}}
-
     call i32 (i8*,...)* @printf( i8* getelementptr([19 x i8]* @MSG,i32 0,i32 0),
                                  i32 %inner.0, i32 %inner.1, i32 %inner.2 )
     ; CHECK:        brlid
+    ; CHECK:        addik {{.*, 1}}
 
     %inner.6 = icmp eq i32 %inner.5, 100
     ; CHECK:        cmp
diff --git a/test/CodeGen/MBlaze/mul.ll b/test/CodeGen/MBlaze/mul.ll
index 65d3e22a3e74..cefdb8d56f21 100644
--- a/test/CodeGen/MBlaze/mul.ll
+++ b/test/CodeGen/MBlaze/mul.ll
@@ -13,11 +13,11 @@ define i8 @test_i8(i8 %a, i8 %b) {
     ; FUN-NOT:    mul
     ; FUN:        brlid
     ; MUL-NOT:    brlid
-    ; MUL:        mul
 
     ret i8 %tmp.1
     ; FUN:        rtsd
     ; MUL:        rtsd
+    ; MUL:        mul
 }
 
 define i16 @test_i16(i16 %a, i16 %b) {
@@ -28,11 +28,11 @@ define i16 @test_i16(i16 %a, i16 %b) {
     ; FUN-NOT:    mul
     ; FUN:        brlid
     ; MUL-NOT:    brlid
-    ; MUL:        mul
 
     ret i16 %tmp.1
     ; FUN:        rtsd
     ; MUL:        rtsd
+    ; MUL:        mul
 }
 
 define i32 @test_i32(i32 %a, i32 %b) {
@@ -43,9 +43,9 @@ define i32 @test_i32(i32 %a, i32 %b) {
     ; FUN-NOT:    mul
     ; FUN:        brlid
     ; MUL-NOT:    brlid
-    ; MUL:        mul
 
     ret i32 %tmp.1
     ; FUN:        rtsd
     ; MUL:        rtsd
+    ; MUL:        mul
 }
diff --git a/test/CodeGen/MBlaze/shift.ll b/test/CodeGen/MBlaze/shift.ll
index 186115ec192b..99f0519c020c 100644
--- a/test/CodeGen/MBlaze/shift.ll
+++ b/test/CodeGen/MBlaze/shift.ll
@@ -10,17 +10,16 @@ define i8 @test_i8(i8 %a, i8 %b) {
     ; SHT:        test_i8:
 
     %tmp.1 = shl i8 %a, %b
-    ; FUN-NOT:    bsll
     ; FUN:        andi
     ; FUN:        add
     ; FUN:        bnei
-    ; SHT-NOT:    andi
     ; SHT-NOT:    bnei
-    ; SHT:        bsll
 
     ret i8 %tmp.1
     ; FUN:        rtsd
     ; SHT:        rtsd
+    ; FUN-NOT:    bsll
+    ; SHT-NEXT:   bsll
 }
 
 define i8 @testc_i8(i8 %a, i8 %b) {
@@ -28,18 +27,18 @@ define i8 @testc_i8(i8 %a, i8 %b) {
     ; SHT:        testc_i8:
 
     %tmp.1 = shl i8 %a, 5
-    ; FUN-NOT:    bsll
     ; FUN:        andi
     ; FUN:        add
     ; FUN:        bnei
     ; SHT-NOT:    andi
     ; SHT-NOT:    add
     ; SHT-NOT:    bnei
-    ; SHT:        bslli
 
     ret i8 %tmp.1
     ; FUN:        rtsd
     ; SHT:        rtsd
+    ; FUN-NOT:    bsll
+    ; SHT-NEXT:   bslli
 }
 
 define i16 @test_i16(i16 %a, i16 %b) {
@@ -47,17 +46,16 @@ define i16 @test_i16(i16 %a, i16 %b) {
     ; SHT:        test_i16:
 
     %tmp.1 = shl i16 %a, %b
-    ; FUN-NOT:    bsll
     ; FUN:        andi
     ; FUN:        add
     ; FUN:        bnei
-    ; SHT-NOT:    andi
     ; SHT-NOT:    bnei
-    ; SHT:        bsll
 
     ret i16 %tmp.1
     ; FUN:        rtsd
     ; SHT:        rtsd
+    ; FUN-NOT:    bsll
+    ; SHT-NEXT:   bsll
 }
 
 define i16 @testc_i16(i16 %a, i16 %b) {
@@ -65,18 +63,18 @@ define i16 @testc_i16(i16 %a, i16 %b) {
     ; SHT:        testc_i16:
 
     %tmp.1 = shl i16 %a, 5
-    ; FUN-NOT:    bsll
     ; FUN:        andi
     ; FUN:        add
     ; FUN:        bnei
     ; SHT-NOT:    andi
     ; SHT-NOT:    add
     ; SHT-NOT:    bnei
-    ; SHT:        bslli
 
     ret i16 %tmp.1
     ; FUN:        rtsd
     ; SHT:        rtsd
+    ; FUN-NOT:    bsll
+    ; SHT-NEXT:   bslli
 }
 
 define i32 @test_i32(i32 %a, i32 %b) {
@@ -84,17 +82,17 @@ define i32 @test_i32(i32 %a, i32 %b) {
     ; SHT:        test_i32:
 
     %tmp.1 = shl i32 %a, %b
-    ; FUN-NOT:    bsll
     ; FUN:        andi
     ; FUN:        add
     ; FUN:        bnei
     ; SHT-NOT:    andi
     ; SHT-NOT:    bnei
-    ; SHT:        bsll
 
     ret i32 %tmp.1
     ; FUN:        rtsd
     ; SHT:        rtsd
+    ; FUN-NOT:    bsll
+    ; SHT-NEXT:   bsll
 }
 
 define i32 @testc_i32(i32 %a, i32 %b) {
@@ -102,16 +100,16 @@ define i32 @testc_i32(i32 %a, i32 %b) {
     ; SHT:        testc_i32:
 
     %tmp.1 = shl i32 %a, 5
-    ; FUN-NOT:    bsll
     ; FUN:        andi
     ; FUN:        add
     ; FUN:        bnei
     ; SHT-NOT:    andi
     ; SHT-NOT:    add
     ; SHT-NOT:    bnei
-    ; SHT:        bslli
 
     ret i32 %tmp.1
     ; FUN:        rtsd
     ; SHT:        rtsd
+    ; FUN-NOT:    bsll
+    ; SHT-NEXT:   bslli
 }
diff --git a/test/CodeGen/MBlaze/svol.ll b/test/CodeGen/MBlaze/svol.ll
new file mode 100644
index 000000000000..c1e96202845a
--- /dev/null
+++ b/test/CodeGen/MBlaze/svol.ll
@@ -0,0 +1,80 @@
+; Ensure that the MBlaze save_volatiles calling convention (cc74) is handled
+; correctly correctly by the MBlaze backend.
+;
+; RUN: llc < %s -march=mblaze | FileCheck %s
+
+@.str = private constant [28 x i8] c"The interrupt has gone off\0A\00"
+
+define cc74 void @mysvol() nounwind noinline {
+  ; CHECK:        mysvol:
+  ; CHECK:        swi   r3, r1
+  ; CHECK:        swi   r4, r1
+  ; CHECK:        swi   r5, r1
+  ; CHECK:        swi   r6, r1
+  ; CHECK:        swi   r7, r1
+  ; CHECK:        swi   r8, r1
+  ; CHECK:        swi   r9, r1
+  ; CHECK:        swi   r10, r1
+  ; CHECK:        swi   r11, r1
+  ; CHECK:        swi   r12, r1
+  ; CHECK:        swi   r17, r1
+  ; CHECK:        swi   r18, r1
+  ; CHECK-NOT:    mfs   r11, rmsr
+  entry:
+    %call = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([28 x i8]* @.str, i32 0, i32 0))
+      ret void
+
+  ; CHECK-NOT:    mts   rmsr, r11
+  ; CHECK:        lwi   r18, r1
+  ; CHECK:        lwi   r17, r1
+  ; CHECK:        lwi   r12, r1
+  ; CHECK:        lwi   r11, r1
+  ; CHECK:        lwi   r10, r1
+  ; CHECK:        lwi   r9, r1
+  ; CHECK:        lwi   r8, r1
+  ; CHECK:        lwi   r7, r1
+  ; CHECK:        lwi   r6, r1
+  ; CHECK:        lwi   r5, r1
+  ; CHECK:        lwi   r4, r1
+  ; CHECK:        lwi   r3, r1
+  ; CHECK:        rtsd  r15, 8
+}
+
+define cc74 void @mysvol2() nounwind noinline {
+  ; CHECK:        mysvol2:
+  ; CHECK-NOT:    swi   r3, r1
+  ; CHECK-NOT:    swi   r4, r1
+  ; CHECK-NOT:    swi   r5, r1
+  ; CHECK-NOT:    swi   r6, r1
+  ; CHECK-NOT:    swi   r7, r1
+  ; CHECK-NOT:    swi   r8, r1
+  ; CHECK-NOT:    swi   r9, r1
+  ; CHECK-NOT:    swi   r10, r1
+  ; CHECK-NOT:    swi   r11, r1
+  ; CHECK-NOT:    swi   r12, r1
+  ; CHECK:        swi   r17, r1
+  ; CHECK:        swi   r18, r1
+  ; CHECK-NOT:    mfs   r11, rmsr
+entry:
+
+  ; CHECK-NOT:    mts   rmsr, r11
+  ; CHECK:        lwi   r18, r1
+  ; CHECK:        lwi   r17, r1
+  ; CHECK-NOT:    lwi   r12, r1
+  ; CHECK-NOT:    lwi   r11, r1
+  ; CHECK-NOT:    lwi   r10, r1
+  ; CHECK-NOT:    lwi   r9, r1
+  ; CHECK-NOT:    lwi   r8, r1
+  ; CHECK-NOT:    lwi   r7, r1
+  ; CHECK-NOT:    lwi   r6, r1
+  ; CHECK-NOT:    lwi   r5, r1
+  ; CHECK-NOT:    lwi   r4, r1
+  ; CHECK-NOT:    lwi   r3, r1
+  ; CHECK:        rtsd  r15, 8
+  ret void
+}
+
+  ; CHECK-NOT:    .globl  _interrupt_handler
+  ; CHECK-NOT:    _interrupt_handler = mysvol
+  ; CHECK-NOT:    _interrupt_handler = mysvol2
+declare i32 @printf(i8*, ...)
diff --git a/test/CodeGen/MSP430/2010-04-07-DbgValueOtherTargets.ll b/test/CodeGen/MSP430/2010-04-07-DbgValueOtherTargets.ll
index 8de044cf48ba..9d549da8a93a 100644
--- a/test/CodeGen/MSP430/2010-04-07-DbgValueOtherTargets.ll
+++ b/test/CodeGen/MSP430/2010-04-07-DbgValueOtherTargets.ll
@@ -1,33 +1,28 @@
 ; RUN: llc -O0 -march=msp430 -asm-verbose < %s | FileCheck %s
 ; Check that DEBUG_VALUE comments come through on a variety of targets.
 
-%tart.reflect.ComplexType = type { double, double }
-
-@.type.SwitchStmtTest = constant %tart.reflect.ComplexType { double 3.0, double 2.0 }
-
-define i32 @"main(tart.core.String[])->int32"(i32 %args) {
+define i32 @main() nounwind ssp {
 entry:
 ; CHECK: DEBUG_VALUE
-  tail call void @llvm.dbg.value(metadata !14, i64 0, metadata !8)
-  tail call void @"tart.reflect.ComplexType.create->tart.core.Object"(%tart.reflect.ComplexType* @.type.SwitchStmtTest) ; <%tart.core.Object*> [#uses=2]
-  ret i32 3
+  call void @llvm.dbg.value(metadata !6, i64 0, metadata !7), !dbg !9
+  ret i32 0, !dbg !10
 }
 
+declare void @llvm.dbg.declare(metadata, metadata) nounwind readnone
+
 declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
-declare void @"tart.reflect.ComplexType.create->tart.core.Object"(%tart.reflect.ComplexType*) nounwind readnone
 
-!0 = metadata !{i32 458769, i32 0, i32 1, metadata !"sm.c", metadata !"/Volumes/MacOS9/tests/", metadata !"4.2.1 (Based on Apple Inc. build 5658) (LLVM build)", i1 true, i1 false, metadata !"", i32 0} ; [ DW_TAG_compile_unit ]
-!1 = metadata !{i32 458790, metadata !0, metadata !"", metadata !0, i32 0, i64 192, i64 64, i64 0, i32 0, metadata !2} ; [ DW_TAG_const_type ]
-!2 = metadata !{i32 458771, metadata !0, metadata !"C", metadata !0, i32 1, i64 192, i64 64, i64 0, i32 0, null, metadata !3, i32 0, null} ; [ DW_TAG_structure_type ]
-!3 = metadata !{metadata !4, metadata !6, metadata !7}
-!4 = metadata !{i32 458765, metadata !2, metadata !"x", metadata !0, i32 1, i64 64, i64 64, i64 0, i32 0, metadata !5} ; [ DW_TAG_member ]
-!5 = metadata !{i32 458788, metadata !0, metadata !"double", metadata !0, i32 0, i64 64, i64 64, i64 0, i32 0, i32 4} ; [ DW_TAG_base_type ]
-!6 = metadata !{i32 458765, metadata !2, metadata !"y", metadata !0, i32 1, i64 64, i64 64, i64 64, i32 0, metadata !5} ; [ DW_TAG_member ]
-!7 = metadata !{i32 458765, metadata !2, metadata !"z", metadata !0, i32 1, i64 64, i64 64, i64 128, i32 0, metadata !5} ; [ DW_TAG_member ]
-!8 = metadata !{i32 459008, metadata !9, metadata !"t", metadata !0, i32 5, metadata !2} ; [ DW_TAG_auto_variable ]
-!9 = metadata !{i32 458763, metadata !10}        ; [ DW_TAG_lexical_block ]
-!10 = metadata !{i32 458798, i32 0, metadata !0, metadata !"foo", metadata !"foo", metadata !"foo", metadata !0, i32 4, metadata !11, i1 false, i1 true, i32 0, i32 0, null} ; [ DW_TAG_subprogram ]
-!11 = metadata !{i32 458773, metadata !0, metadata !"", metadata !0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !12, i32 0, null} ; [ DW_TAG_subroutine_type ]
-!12 = metadata !{metadata !13}
-!13 = metadata !{i32 458788, metadata !0, metadata !"int", metadata !0, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
-!14 = metadata !{%tart.reflect.ComplexType* @.type.SwitchStmtTest}
+!llvm.dbg.sp = !{!0}
+
+!0 = metadata !{i32 589870, i32 0, metadata !1, metadata !"main", metadata !"main", metadata !"", metadata !1, i32 2, metadata !3, i1 false, i1 true, i32 0, i32 0, null, i32 0, i1 false, i32 ()* @main} ; [ DW_TAG_subprogram ]
+!1 = metadata !{i32 589865, metadata !"/tmp/x.c", metadata !"/Users/manav", metadata !2} ; [ DW_TAG_file_type ]
+!2 = metadata !{i32 589841, i32 0, i32 12, metadata !"/tmp/x.c", metadata !"/Users/manav", metadata !"clang version 2.9 (trunk 120996)", i1 true, i1 false, metadata !"", i32 0} ; [ DW_TAG_compile_unit ]
+!3 = metadata !{i32 589845, metadata !1, metadata !"", metadata !1, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !4, i32 0, null} ; [ DW_TAG_subroutine_type ]
+!4 = metadata !{metadata !5}
+!5 = metadata !{i32 589860, metadata !2, metadata !"int", metadata !1, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
+!6 = metadata !{i32 0}
+!7 = metadata !{i32 590080, metadata !8, metadata !"i", metadata !1, i32 3, metadata !5, i32 0} ; [ DW_TAG_auto_variable ]
+!8 = metadata !{i32 589835, metadata !0, i32 2, i32 12, metadata !1, i32 0} ; [ DW_TAG_lexical_block ]
+!9 = metadata !{i32 3, i32 11, metadata !8, null}
+!10 = metadata !{i32 4, i32 2, metadata !8, null}
+
diff --git a/test/CodeGen/MSP430/mult-alt-generic-msp430.ll b/test/CodeGen/MSP430/mult-alt-generic-msp430.ll
new file mode 100644
index 000000000000..342afed66053
--- /dev/null
+++ b/test/CodeGen/MSP430/mult-alt-generic-msp430.ll
@@ -0,0 +1,323 @@
+; RUN: llc < %s -march=msp430
+; ModuleID = 'mult-alt-generic.c'
+target datalayout = "e-p:16:16:16-i8:8:8-i16:16:16-i32:16:32-n8:16"
+target triple = "msp430"
+
+@mout0 = common global i16 0, align 2
+@min1 = common global i16 0, align 2
+@marray = common global [2 x i16] zeroinitializer, align 2
+
+define void @single_m() nounwind {
+entry:
+  call void asm "foo $1,$0", "=*m,*m"(i16* @mout0, i16* @min1) nounwind
+  ret void
+}
+
+define void @single_o() nounwind {
+entry:
+  %out0 = alloca i16, align 2
+  %index = alloca i16, align 2
+  store i16 0, i16* %out0, align 2
+  store i16 1, i16* %index, align 2
+  ret void
+}
+
+define void @single_V() nounwind {
+entry:
+  ret void
+}
+
+define void @single_lt() nounwind {
+entry:
+  %out0 = alloca i16, align 2
+  %in1 = alloca i16, align 2
+  store i16 0, i16* %out0, align 2
+  store i16 1, i16* %in1, align 2
+  %tmp = load i16* %in1, align 2
+  %0 = call i16 asm "foo $1,$0", "=r,<r"(i16 %tmp) nounwind
+  store i16 %0, i16* %out0, align 2
+  %tmp1 = load i16* %in1, align 2
+  %1 = call i16 asm "foo $1,$0", "=r,r<"(i16 %tmp1) nounwind
+  store i16 %1, i16* %out0, align 2
+  ret void
+}
+
+define void @single_gt() nounwind {
+entry:
+  %out0 = alloca i16, align 2
+  %in1 = alloca i16, align 2
+  store i16 0, i16* %out0, align 2
+  store i16 1, i16* %in1, align 2
+  %tmp = load i16* %in1, align 2
+  %0 = call i16 asm "foo $1,$0", "=r,>r"(i16 %tmp) nounwind
+  store i16 %0, i16* %out0, align 2
+  %tmp1 = load i16* %in1, align 2
+  %1 = call i16 asm "foo $1,$0", "=r,r>"(i16 %tmp1) nounwind
+  store i16 %1, i16* %out0, align 2
+  ret void
+}
+
+define void @single_r() nounwind {
+entry:
+  %out0 = alloca i16, align 2
+  %in1 = alloca i16, align 2
+  store i16 0, i16* %out0, align 2
+  store i16 1, i16* %in1, align 2
+  %tmp = load i16* %in1, align 2
+  %0 = call i16 asm "foo $1,$0", "=r,r"(i16 %tmp) nounwind
+  store i16 %0, i16* %out0, align 2
+  ret void
+}
+
+define void @single_i() nounwind {
+entry:
+  %out0 = alloca i16, align 2
+  store i16 0, i16* %out0, align 2
+  %0 = call i16 asm "foo $1,$0", "=r,i"(i16 1) nounwind
+  store i16 %0, i16* %out0, align 2
+  ret void
+}
+
+define void @single_n() nounwind {
+entry:
+  %out0 = alloca i16, align 2
+  store i16 0, i16* %out0, align 2
+  %0 = call i16 asm "foo $1,$0", "=r,n"(i16 1) nounwind
+  store i16 %0, i16* %out0, align 2
+  ret void
+}
+
+define void @single_E() nounwind {
+entry:
+  %out0 = alloca double, align 8
+  store double 0.000000e+000, double* %out0, align 8
+; No lowering support.
+;  %0 = call double asm "foo $1,$0", "=r,E"(double 1.000000e+001) nounwind
+;  store double %0, double* %out0, align 8
+  ret void
+}
+
+define void @single_F() nounwind {
+entry:
+  %out0 = alloca double, align 8
+  store double 0.000000e+000, double* %out0, align 8
+; No lowering support.
+;  %0 = call double asm "foo $1,$0", "=r,F"(double 1.000000e+000) nounwind
+;  store double %0, double* %out0, align 8
+  ret void
+}
+
+define void @single_s() nounwind {
+entry:
+  %out0 = alloca i16, align 2
+  store i16 0, i16* %out0, align 2
+  ret void
+}
+
+define void @single_g() nounwind {
+entry:
+  %out0 = alloca i16, align 2
+  %in1 = alloca i16, align 2
+  store i16 0, i16* %out0, align 2
+  store i16 1, i16* %in1, align 2
+  %tmp = load i16* %in1, align 2
+  %0 = call i16 asm "foo $1,$0", "=r,imr"(i16 %tmp) nounwind
+  store i16 %0, i16* %out0, align 2
+  %tmp1 = load i16* @min1, align 2
+  %1 = call i16 asm "foo $1,$0", "=r,imr"(i16 %tmp1) nounwind
+  store i16 %1, i16* %out0, align 2
+  %2 = call i16 asm "foo $1,$0", "=r,imr"(i16 1) nounwind
+  store i16 %2, i16* %out0, align 2
+  ret void
+}
+
+define void @single_X() nounwind {
+entry:
+  %out0 = alloca i16, align 2
+  %in1 = alloca i16, align 2
+  store i16 0, i16* %out0, align 2
+  store i16 1, i16* %in1, align 2
+  %tmp = load i16* %in1, align 2
+  %0 = call i16 asm "foo $1,$0", "=r,X"(i16 %tmp) nounwind
+  store i16 %0, i16* %out0, align 2
+  %tmp1 = load i16* @min1, align 2
+  %1 = call i16 asm "foo $1,$0", "=r,X"(i16 %tmp1) nounwind
+  store i16 %1, i16* %out0, align 2
+  %2 = call i16 asm "foo $1,$0", "=r,X"(i16 1) nounwind
+  store i16 %2, i16* %out0, align 2
+  %3 = call i16 asm "foo $1,$0", "=r,X"(i16* getelementptr inbounds ([2 x i16]* @marray, i32 0, i32 0)) nounwind
+  store i16 %3, i16* %out0, align 2
+; No lowering support.
+;  %4 = call i16 asm "foo $1,$0", "=r,X"(double 1.000000e+001) nounwind
+;  store i16 %4, i16* %out0, align 2
+;  %5 = call i16 asm "foo $1,$0", "=r,X"(double 1.000000e+000) nounwind
+;  store i16 %5, i16* %out0, align 2
+  ret void
+}
+
+define void @single_p() nounwind {
+entry:
+  %out0 = alloca i16, align 2
+  store i16 0, i16* %out0, align 2
+  %0 = call i16 asm "foo $1,$0", "=r,r"(i16* getelementptr inbounds ([2 x i16]* @marray, i32 0, i32 0)) nounwind
+  store i16 %0, i16* %out0, align 2
+  ret void
+}
+
+define void @multi_m() nounwind {
+entry:
+  %tmp = load i16* @min1, align 2
+  call void asm "foo $1,$0", "=*m|r,m|r"(i16* @mout0, i16 %tmp) nounwind
+  ret void
+}
+
+define void @multi_o() nounwind {
+entry:
+  %out0 = alloca i16, align 2
+  %index = alloca i16, align 2
+  store i16 0, i16* %out0, align 2
+  store i16 1, i16* %index, align 2
+  ret void
+}
+
+define void @multi_V() nounwind {
+entry:
+  ret void
+}
+
+define void @multi_lt() nounwind {
+entry:
+  %out0 = alloca i16, align 2
+  %in1 = alloca i16, align 2
+  store i16 0, i16* %out0, align 2
+  store i16 1, i16* %in1, align 2
+  %tmp = load i16* %in1, align 2
+  %0 = call i16 asm "foo $1,$0", "=r|r,r|<r"(i16 %tmp) nounwind
+  store i16 %0, i16* %out0, align 2
+  %tmp1 = load i16* %in1, align 2
+  %1 = call i16 asm "foo $1,$0", "=r|r,r|r<"(i16 %tmp1) nounwind
+  store i16 %1, i16* %out0, align 2
+  ret void
+}
+
+define void @multi_gt() nounwind {
+entry:
+  %out0 = alloca i16, align 2
+  %in1 = alloca i16, align 2
+  store i16 0, i16* %out0, align 2
+  store i16 1, i16* %in1, align 2
+  %tmp = load i16* %in1, align 2
+  %0 = call i16 asm "foo $1,$0", "=r|r,r|>r"(i16 %tmp) nounwind
+  store i16 %0, i16* %out0, align 2
+  %tmp1 = load i16* %in1, align 2
+  %1 = call i16 asm "foo $1,$0", "=r|r,r|r>"(i16 %tmp1) nounwind
+  store i16 %1, i16* %out0, align 2
+  ret void
+}
+
+define void @multi_r() nounwind {
+entry:
+  %out0 = alloca i16, align 2
+  %in1 = alloca i16, align 2
+  store i16 0, i16* %out0, align 2
+  store i16 1, i16* %in1, align 2
+  %tmp = load i16* %in1, align 2
+  %0 = call i16 asm "foo $1,$0", "=r|r,r|m"(i16 %tmp) nounwind
+  store i16 %0, i16* %out0, align 2
+  ret void
+}
+
+define void @multi_i() nounwind {
+entry:
+  %out0 = alloca i16, align 2
+  store i16 0, i16* %out0, align 2
+  %0 = call i16 asm "foo $1,$0", "=r|r,r|i"(i16 1) nounwind
+  store i16 %0, i16* %out0, align 2
+  ret void
+}
+
+define void @multi_n() nounwind {
+entry:
+  %out0 = alloca i16, align 2
+  store i16 0, i16* %out0, align 2
+  %0 = call i16 asm "foo $1,$0", "=r|r,r|n"(i16 1) nounwind
+  store i16 %0, i16* %out0, align 2
+  ret void
+}
+
+define void @multi_E() nounwind {
+entry:
+  %out0 = alloca double, align 8
+  store double 0.000000e+000, double* %out0, align 8
+; No lowering support.
+;  %0 = call double asm "foo $1,$0", "=r|r,r|E"(double 1.000000e+001) nounwind
+;  store double %0, double* %out0, align 8
+  ret void
+}
+
+define void @multi_F() nounwind {
+entry:
+  %out0 = alloca double, align 8
+  store double 0.000000e+000, double* %out0, align 8
+; No lowering support.
+;  %0 = call double asm "foo $1,$0", "=r|r,r|F"(double 1.000000e+000) nounwind
+;  store double %0, double* %out0, align 8
+  ret void
+}
+
+define void @multi_s() nounwind {
+entry:
+  %out0 = alloca i16, align 2
+  store i16 0, i16* %out0, align 2
+  ret void
+}
+
+define void @multi_g() nounwind {
+entry:
+  %out0 = alloca i16, align 2
+  %in1 = alloca i16, align 2
+  store i16 0, i16* %out0, align 2
+  store i16 1, i16* %in1, align 2
+  %tmp = load i16* %in1, align 2
+  %0 = call i16 asm "foo $1,$0", "=r|r,r|imr"(i16 %tmp) nounwind
+  store i16 %0, i16* %out0, align 2
+  %tmp1 = load i16* @min1, align 2
+  %1 = call i16 asm "foo $1,$0", "=r|r,r|imr"(i16 %tmp1) nounwind
+  store i16 %1, i16* %out0, align 2
+  %2 = call i16 asm "foo $1,$0", "=r|r,r|imr"(i16 1) nounwind
+  store i16 %2, i16* %out0, align 2
+  ret void
+}
+
+define void @multi_X() nounwind {
+entry:
+  %out0 = alloca i16, align 2
+  %in1 = alloca i16, align 2
+  store i16 0, i16* %out0, align 2
+  store i16 1, i16* %in1, align 2
+  %tmp = load i16* %in1, align 2
+  %0 = call i16 asm "foo $1,$0", "=r|r,r|X"(i16 %tmp) nounwind
+  store i16 %0, i16* %out0, align 2
+  %tmp1 = load i16* @min1, align 2
+  %1 = call i16 asm "foo $1,$0", "=r|r,r|X"(i16 %tmp1) nounwind
+  store i16 %1, i16* %out0, align 2
+  %2 = call i16 asm "foo $1,$0", "=r|r,r|X"(i16 1) nounwind
+  store i16 %2, i16* %out0, align 2
+  %3 = call i16 asm "foo $1,$0", "=r|r,r|X"(i16* getelementptr inbounds ([2 x i16]* @marray, i32 0, i32 0)) nounwind
+  store i16 %3, i16* %out0, align 2
+; No lowering support.
+;  %4 = call i16 asm "foo $1,$0", "=r|r,r|X"(double 1.000000e+001) nounwind
+;  store i16 %4, i16* %out0, align 2
+;  %5 = call i16 asm "foo $1,$0", "=r|r,r|X"(double 1.000000e+000) nounwind
+;  store i16 %5, i16* %out0, align 2
+  ret void
+}
+
+define void @multi_p() nounwind {
+entry:
+  %out0 = alloca i16, align 2
+  store i16 0, i16* %out0, align 2
+  %0 = call i16 asm "foo $1,$0", "=r|r,r|r"(i16* getelementptr inbounds ([2 x i16]* @marray, i32 0, i32 0)) nounwind
+  store i16 %0, i16* %out0, align 2
+  ret void
+}
diff --git a/test/CodeGen/Mips/2008-07-15-InternalConstant.ll b/test/CodeGen/Mips/2008-07-15-InternalConstant.ll
index bda4a3172f30..c3db6387aff3 100644
--- a/test/CodeGen/Mips/2008-07-15-InternalConstant.ll
+++ b/test/CodeGen/Mips/2008-07-15-InternalConstant.ll
@@ -7,8 +7,8 @@
 
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:32-i16:16:32-i32:32:32-i64:32:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64"
 target triple = "mipsallegrexel-unknown-psp-elf"
-@.str = internal constant [10 x i8] c"AAAAAAAAA\00"
-@i0 = internal constant [5 x i32] [ i32 0, i32 1, i32 2, i32 3, i32 4 ] 
+@.str = internal unnamed_addr constant [10 x i8] c"AAAAAAAAA\00"
+@i0 = internal unnamed_addr constant [5 x i32] [ i32 0, i32 1, i32 2, i32 3, i32 4 ] 
 
 define i8* @foo() nounwind {
 entry:
diff --git a/test/CodeGen/Mips/2010-04-07-DbgValueOtherTargets.ll b/test/CodeGen/Mips/2010-04-07-DbgValueOtherTargets.ll
index 4161c1d686e6..994e19af4f87 100644
--- a/test/CodeGen/Mips/2010-04-07-DbgValueOtherTargets.ll
+++ b/test/CodeGen/Mips/2010-04-07-DbgValueOtherTargets.ll
@@ -1,33 +1,28 @@
 ; RUN: llc -O0 -march=mips -asm-verbose < %s | FileCheck %s
 ; Check that DEBUG_VALUE comments come through on a variety of targets.
 
-%tart.reflect.ComplexType = type { double, double }
-
-@.type.SwitchStmtTest = constant %tart.reflect.ComplexType { double 3.0, double 2.0 }
-
-define i32 @"main(tart.core.String[])->int32"(i32 %args) {
+define i32 @main() nounwind ssp {
 entry:
 ; CHECK: DEBUG_VALUE
-  tail call void @llvm.dbg.value(metadata !14, i64 0, metadata !8)
-  tail call void @"tart.reflect.ComplexType.create->tart.core.Object"(%tart.reflect.ComplexType* @.type.SwitchStmtTest) ; <%tart.core.Object*> [#uses=2]
-  ret i32 3
+  call void @llvm.dbg.value(metadata !6, i64 0, metadata !7), !dbg !9
+  ret i32 0, !dbg !10
 }
 
+declare void @llvm.dbg.declare(metadata, metadata) nounwind readnone
+
 declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
-declare void @"tart.reflect.ComplexType.create->tart.core.Object"(%tart.reflect.ComplexType*) nounwind readnone
 
-!0 = metadata !{i32 458769, i32 0, i32 1, metadata !"sm.c", metadata !"/Volumes/MacOS9/tests/", metadata !"4.2.1 (Based on Apple Inc. build 5658) (LLVM build)", i1 true, i1 false, metadata !"", i32 0} ; [ DW_TAG_compile_unit ]
-!1 = metadata !{i32 458790, metadata !0, metadata !"", metadata !0, i32 0, i64 192, i64 64, i64 0, i32 0, metadata !2} ; [ DW_TAG_const_type ]
-!2 = metadata !{i32 458771, metadata !0, metadata !"C", metadata !0, i32 1, i64 192, i64 64, i64 0, i32 0, null, metadata !3, i32 0, null} ; [ DW_TAG_structure_type ]
-!3 = metadata !{metadata !4, metadata !6, metadata !7}
-!4 = metadata !{i32 458765, metadata !2, metadata !"x", metadata !0, i32 1, i64 64, i64 64, i64 0, i32 0, metadata !5} ; [ DW_TAG_member ]
-!5 = metadata !{i32 458788, metadata !0, metadata !"double", metadata !0, i32 0, i64 64, i64 64, i64 0, i32 0, i32 4} ; [ DW_TAG_base_type ]
-!6 = metadata !{i32 458765, metadata !2, metadata !"y", metadata !0, i32 1, i64 64, i64 64, i64 64, i32 0, metadata !5} ; [ DW_TAG_member ]
-!7 = metadata !{i32 458765, metadata !2, metadata !"z", metadata !0, i32 1, i64 64, i64 64, i64 128, i32 0, metadata !5} ; [ DW_TAG_member ]
-!8 = metadata !{i32 459008, metadata !9, metadata !"t", metadata !0, i32 5, metadata !2} ; [ DW_TAG_auto_variable ]
-!9 = metadata !{i32 458763, metadata !10}        ; [ DW_TAG_lexical_block ]
-!10 = metadata !{i32 458798, i32 0, metadata !0, metadata !"foo", metadata !"foo", metadata !"foo", metadata !0, i32 4, metadata !11, i1 false, i1 true, i32 0, i32 0, null} ; [ DW_TAG_subprogram ]
-!11 = metadata !{i32 458773, metadata !0, metadata !"", metadata !0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !12, i32 0, null} ; [ DW_TAG_subroutine_type ]
-!12 = metadata !{metadata !13}
-!13 = metadata !{i32 458788, metadata !0, metadata !"int", metadata !0, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
-!14 = metadata !{%tart.reflect.ComplexType* @.type.SwitchStmtTest}
+!llvm.dbg.sp = !{!0}
+
+!0 = metadata !{i32 589870, i32 0, metadata !1, metadata !"main", metadata !"main", metadata !"", metadata !1, i32 2, metadata !3, i1 false, i1 true, i32 0, i32 0, null, i32 0, i1 false, i32 ()* @main} ; [ DW_TAG_subprogram ]
+!1 = metadata !{i32 589865, metadata !"/tmp/x.c", metadata !"/Users/manav", metadata !2} ; [ DW_TAG_file_type ]
+!2 = metadata !{i32 589841, i32 0, i32 12, metadata !"/tmp/x.c", metadata !"/Users/manav", metadata !"clang version 2.9 (trunk 120996)", i1 true, i1 false, metadata !"", i32 0} ; [ DW_TAG_compile_unit ]
+!3 = metadata !{i32 589845, metadata !1, metadata !"", metadata !1, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !4, i32 0, null} ; [ DW_TAG_subroutine_type ]
+!4 = metadata !{metadata !5}
+!5 = metadata !{i32 589860, metadata !2, metadata !"int", metadata !1, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
+!6 = metadata !{i32 0}
+!7 = metadata !{i32 590080, metadata !8, metadata !"i", metadata !1, i32 3, metadata !5, i32 0} ; [ DW_TAG_auto_variable ]
+!8 = metadata !{i32 589835, metadata !0, i32 2, i32 12, metadata !1, i32 0} ; [ DW_TAG_lexical_block ]
+!9 = metadata !{i32 3, i32 11, metadata !8, null}
+!10 = metadata !{i32 4, i32 2, metadata !8, null}
+
diff --git a/test/CodeGen/Mips/2010-07-20-Select.ll b/test/CodeGen/Mips/2010-07-20-Select.ll
index 8b7f9a919378..891b5d9e1884 100644
--- a/test/CodeGen/Mips/2010-07-20-Select.ll
+++ b/test/CodeGen/Mips/2010-07-20-Select.ll
@@ -9,12 +9,12 @@ entry:
   volatile store i32 0, i32* %c, align 4
   %0 = volatile load i32* %a, align 4             ; <i32> [#uses=1]
   %1 = icmp eq i32 %0, 0                          ; <i1> [#uses=1]
-; CHECK: addiu $4, $zero, 3
+; CHECK: addiu $3, $zero, 0
   %iftmp.0.0 = select i1 %1, i32 3, i32 0         ; <i32> [#uses=1]
   %2 = volatile load i32* %c, align 4             ; <i32> [#uses=1]
   %3 = icmp eq i32 %2, 0                          ; <i1> [#uses=1]
-; CHECK: addu $4, $zero, $3
-; CHECK: addu $2, $5, $4
+; CHECK: addiu $3, $zero, 3
+; CHECK: addu $2, $5, $3
   %iftmp.2.0 = select i1 %3, i32 0, i32 5         ; <i32> [#uses=1]
   %4 = add nsw i32 %iftmp.2.0, %iftmp.0.0         ; <i32> [#uses=1]
   ret i32 %4
diff --git a/test/CodeGen/Mips/2010-11-09-CountLeading.ll b/test/CodeGen/Mips/2010-11-09-CountLeading.ll
new file mode 100644
index 000000000000..d592fef331af
--- /dev/null
+++ b/test/CodeGen/Mips/2010-11-09-CountLeading.ll
@@ -0,0 +1,33 @@
+; RUN: llc -march=mips -mcpu=4ke < %s | FileCheck %s
+
+; CHECK: clz $2, $4
+define i32 @t1(i32 %X) nounwind readnone {
+entry:
+  %tmp1 = tail call i32 @llvm.ctlz.i32(i32 %X)
+  ret i32 %tmp1
+}
+
+declare i32 @llvm.ctlz.i32(i32) nounwind readnone
+
+; CHECK: clz $2, $4
+define i32 @t2(i32 %X) nounwind readnone {
+entry:
+  %tmp1 = tail call i32 @llvm.ctlz.i32(i32 %X)
+  ret i32 %tmp1
+}
+
+; CHECK: clo $2, $4
+define i32 @t3(i32 %X) nounwind readnone {
+entry:
+  %neg = xor i32 %X, -1
+  %tmp1 = tail call i32 @llvm.ctlz.i32(i32 %neg)
+  ret i32 %tmp1
+}
+
+; CHECK: clo $2, $4
+define i32 @t4(i32 %X) nounwind readnone {
+entry:
+  %neg = xor i32 %X, -1
+  %tmp1 = tail call i32 @llvm.ctlz.i32(i32 %neg)
+  ret i32 %tmp1
+}
diff --git a/test/CodeGen/Mips/2010-11-09-Mul.ll b/test/CodeGen/Mips/2010-11-09-Mul.ll
new file mode 100644
index 000000000000..65a10b5836cc
--- /dev/null
+++ b/test/CodeGen/Mips/2010-11-09-Mul.ll
@@ -0,0 +1,15 @@
+; RUN: llc -march=mips -mcpu=4ke < %s | FileCheck %s
+
+; CHECK: mul $2, $5, $4
+define i32 @mul1(i32 %a, i32 %b) nounwind readnone {
+entry:
+  %mul = mul i32 %b, %a
+  ret i32 %mul
+}
+
+; CHECK: mul $2, $5, $4
+define i32 @mul2(i32 %a, i32 %b) nounwind readnone {
+entry:
+  %mul = mul nsw i32 %b, %a
+  ret i32 %mul
+}
diff --git a/test/CodeGen/Mips/cmov.ll b/test/CodeGen/Mips/cmov.ll
new file mode 100755
index 000000000000..7d3e0252e3c9
--- /dev/null
+++ b/test/CodeGen/Mips/cmov.ll
@@ -0,0 +1,15 @@
+; RUN: llc -march=mips -mcpu=4ke < %s | FileCheck %s
+
+@i1 = global [3 x i32] [i32 1, i32 2, i32 3], align 4
+@i3 = common global i32* null, align 4
+
+; CHECK:  lw  $3, %got(i3)($gp)
+; CHECK:  addiu $5, $gp, %got(i1)
+define i32* @cmov1(i32 %s) nounwind readonly {
+entry:
+  %tobool = icmp ne i32 %s, 0
+  %tmp1 = load i32** @i3, align 4
+  %cond = select i1 %tobool, i32* getelementptr inbounds ([3 x i32]* @i1, i32 0, i32 0), i32* %tmp1
+  ret i32* %cond
+}
+
diff --git a/test/CodeGen/Mips/madd-msub.ll b/test/CodeGen/Mips/madd-msub.ll
new file mode 100644
index 000000000000..4a205b1f3ffb
--- /dev/null
+++ b/test/CodeGen/Mips/madd-msub.ll
@@ -0,0 +1,65 @@
+; RUN: llc -march=mips -mcpu=4ke < %s | FileCheck %s
+
+; CHECK: madd $5, $4
+define i64 @madd1(i32 %a, i32 %b, i32 %c) nounwind readnone {
+entry:
+  %conv = sext i32 %a to i64
+  %conv2 = sext i32 %b to i64
+  %mul = mul nsw i64 %conv2, %conv
+  %conv4 = sext i32 %c to i64
+  %add = add nsw i64 %mul, %conv4
+  ret i64 %add
+}
+
+; CHECK: maddu $5, $4
+define i64 @madd2(i32 %a, i32 %b, i32 %c) nounwind readnone {
+entry:
+  %conv = zext i32 %a to i64
+  %conv2 = zext i32 %b to i64
+  %mul = mul nsw i64 %conv2, %conv
+  %conv4 = zext i32 %c to i64
+  %add = add nsw i64 %mul, %conv4
+  ret i64 %add
+}
+
+; CHECK: madd $5, $4
+define i64 @madd3(i32 %a, i32 %b, i64 %c) nounwind readnone {
+entry:
+  %conv = sext i32 %a to i64
+  %conv2 = sext i32 %b to i64
+  %mul = mul nsw i64 %conv2, %conv
+  %add = add nsw i64 %mul, %c
+  ret i64 %add
+}
+
+; CHECK: msub $5, $4
+define i64 @msub1(i32 %a, i32 %b, i32 %c) nounwind readnone {
+entry:
+  %conv = sext i32 %c to i64
+  %conv2 = sext i32 %a to i64
+  %conv4 = sext i32 %b to i64
+  %mul = mul nsw i64 %conv4, %conv2
+  %sub = sub nsw i64 %conv, %mul
+  ret i64 %sub
+}
+
+; CHECK: msubu $5, $4
+define i64 @msub2(i32 %a, i32 %b, i32 %c) nounwind readnone {
+entry:
+  %conv = zext i32 %c to i64
+  %conv2 = zext i32 %a to i64
+  %conv4 = zext i32 %b to i64
+  %mul = mul nsw i64 %conv4, %conv2
+  %sub = sub nsw i64 %conv, %mul
+  ret i64 %sub
+}
+
+; CHECK: msub $5, $4
+define i64 @msub3(i32 %a, i32 %b, i64 %c) nounwind readnone {
+entry:
+  %conv = sext i32 %a to i64
+  %conv3 = sext i32 %b to i64
+  %mul = mul nsw i64 %conv3, %conv
+  %sub = sub nsw i64 %c, %mul
+  ret i64 %sub
+}
diff --git a/test/CodeGen/Mips/o32_cc.ll b/test/CodeGen/Mips/o32_cc.ll
new file mode 100644
index 000000000000..b6df62be6603
--- /dev/null
+++ b/test/CodeGen/Mips/o32_cc.ll
@@ -0,0 +1,325 @@
+; RUN: llc -march=mips -mcpu=4ke < %s | FileCheck %s
+
+; FIXME: Disabled because it unpredictably fails on certain platforms.
+; REQUIRES: disabled
+
+; $f12, $f14
+; CHECK: ldc1 $f12, %lo
+; CHECK: ldc1 $f14, %lo
+define void @testlowercall0() nounwind {
+entry:
+  tail call void @f0(double 5.000000e+00, double 6.000000e+00) nounwind
+  ret void
+}
+
+declare void @f0(double, double)
+
+; $f12, $f14
+; CHECK: lwc1 $f12, %lo
+; CHECK: lwc1 $f14, %lo
+define void @testlowercall1() nounwind {
+entry:
+  tail call void @f1(float 8.000000e+00, float 9.000000e+00) nounwind
+  ret void
+}
+
+declare void @f1(float, float)
+
+; $f12, $f14
+; CHECK: lwc1 $f12, %lo
+; CHECK: ldc1 $f14, %lo
+define void @testlowercall2() nounwind {
+entry:
+  tail call void @f2(float 8.000000e+00, double 6.000000e+00) nounwind
+  ret void
+}
+
+declare void @f2(float, double)
+
+; $f12, $f14
+; CHECK: ldc1 $f12, %lo
+; CHECK: lwc1 $f14, %lo
+define void @testlowercall3() nounwind {
+entry:
+  tail call void @f3(double 5.000000e+00, float 9.000000e+00) nounwind
+  ret void
+}
+
+declare void @f3(double, float)
+
+; $4, $5, $6, $7
+; CHECK: addiu $4, $zero, 12
+; CHECK: addiu $5, $zero, 13
+; CHECK: addiu $6, $zero, 14
+; CHECK: addiu $7, $zero, 15
+define void @testlowercall4() nounwind {
+entry:
+  tail call void @f4(i32 12, i32 13, i32 14, i32 15) nounwind
+  ret void
+}
+
+declare void @f4(i32, i32, i32, i32)
+
+; $f12, $6, stack
+; CHECK: sw  $2, 16($sp)
+; CHECK: sw  $zero, 20($sp)
+; CHECK: ldc1 $f12, %lo
+; CHECK: addiu $6, $zero, 23
+define void @testlowercall5() nounwind {
+entry:
+  tail call void @f5(double 1.500000e+01, i32 23, double 1.700000e+01) nounwind
+  ret void
+}
+
+declare void @f5(double, i32, double)
+
+; $f12, $6, $7
+; CHECK: ldc1 $f12, %lo
+; CHECK: addiu $6, $zero, 33
+; CHECK: addiu $7, $zero, 24
+define void @testlowercall6() nounwind {
+entry:
+  tail call void @f6(double 2.500000e+01, i32 33, i32 24) nounwind
+  ret void
+}
+
+declare void @f6(double, i32, i32)
+
+; $f12, $5, $6
+; CHECK: lwc1 $f12, %lo
+; CHECK: addiu $5, $zero, 43
+; CHECK: addiu $6, $zero, 34
+define void @testlowercall7() nounwind {
+entry:
+  tail call void @f7(float 1.800000e+01, i32 43, i32 34) nounwind
+  ret void
+}
+
+declare void @f7(float, i32, i32)
+
+; $4, $5, $6, stack
+; CHECK: sw  $2, 16($sp)
+; CHECK: sw  $zero, 20($sp)
+; CHECK: addiu $4, $zero, 22
+; CHECK: addiu $5, $zero, 53
+; CHECK: addiu $6, $zero, 44
+define void @testlowercall8() nounwind {
+entry:
+  tail call void @f8(i32 22, i32 53, i32 44, double 4.000000e+00) nounwind
+  ret void
+}
+
+declare void @f8(i32, i32, i32, double)
+
+; $4, $5, $6, $7
+; CHECK: addiu $4, $zero, 32
+; CHECK: addiu $5, $zero, 63
+; CHECK: addiu $6, $zero, 54
+; CHECK: ori $7, $2, 0
+define void @testlowercall9() nounwind {
+entry:
+  tail call void @f9(i32 32, i32 63, i32 54, float 1.100000e+01) nounwind
+  ret void
+}
+
+declare void @f9(i32, i32, i32, float)
+
+; $4, $5, ($6, $7)
+; CHECK: addiu $4, $zero, 42
+; CHECK: addiu $5, $zero, 73
+; CHECK: addiu $6, $zero, 0
+; CHECK: ori $7, $2, 0
+define void @testlowercall10() nounwind {
+entry:
+  tail call void @f10(i32 42, i32 73, double 2.700000e+01) nounwind
+  ret void
+}
+
+declare void @f10(i32, i32, double)
+
+; $4, ($6, $7)
+; CHECK: addiu $4, $zero, 52
+; CHECK: addiu $6, $zero, 0
+; CHECK: ori $7, $2, 0
+define void @testlowercall11() nounwind {
+entry:
+  tail call void @f11(i32 52, double 1.600000e+01) nounwind
+  ret void
+}
+
+declare void @f11(i32, double)
+
+; $f12, $f14, $6, $7
+; CHECK: lwc1 $f12, %lo
+; CHECK: lwc1 $f14, %lo
+; CHECK: ori $6, $4, 0
+; CHECK: ori $7, $5, 0
+define void @testlowercall12() nounwind {
+entry:
+  tail call void @f12(float 2.800000e+01, float 1.900000e+01, float 1.000000e+01, float 2.100000e+01) nounwind
+  ret void
+}
+
+declare void @f12(float, float, float, float)
+
+; $f12, $5, $6, $7
+; CHECK: lwc1 $f12, %lo
+; CHECK: addiu $5, $zero, 83
+; CHECK: ori $6, $3, 0
+; CHECK: addiu $7, $zero, 25
+define void @testlowercall13() nounwind {
+entry:
+  tail call void @f13(float 3.800000e+01, i32 83, float 2.000000e+01, i32 25) nounwind
+  ret void
+}
+
+
+declare void @f13(float, i32, float, i32)
+
+; $f12, $f14, $7
+; CHECK: ldc1 $f12, %lo
+; CHECK: lwc1 $f14, %lo
+; CHECK: ori $7, $4, 0
+define void @testlowercall14() nounwind {
+entry:
+  tail call void @f14(double 3.500000e+01, float 2.900000e+01, float 3.000000e+01) nounwind
+  ret void
+}
+
+declare void @f14(double, float, float)
+
+; $f12, $f14, ($6, $7)
+; CHECK: lwc1 $f12, %lo
+; CHECK: lwc1 $f14, %lo
+; CHECK: addiu $6, $zero, 0
+; CHECK: ori $7, $4, 32768
+define void @testlowercall15() nounwind {
+entry:
+  tail call void @f15(float 4.800000e+01, float 3.900000e+01, double 3.700000e+01) nounwind
+  ret void
+}
+
+declare void @f15(float, float, double)
+
+; $4, $5, $6, $7
+; CHECK: addiu $4, $zero, 62
+; CHECK: ori $5, $2, 0
+; CHECK: addiu $6, $zero, 64
+; CHECK: ori $7, $3, 0
+define void @testlowercall16() nounwind {
+entry:
+  tail call void @f16(i32 62, float 4.900000e+01, i32 64, float 3.100000e+01) nounwind
+  ret void
+}
+
+declare void @f16(i32, float, i32, float)
+
+; $4, $5, $6, $7
+; CHECK: addiu $4, $zero, 72
+; CHECK: ori $5, $2, 0
+; CHECK: addiu $6, $zero, 74
+; CHECK: addiu $7, $zero, 35
+define void @testlowercall17() nounwind {
+entry:
+  tail call void @f17(i32 72, float 5.900000e+01, i32 74, i32 35) nounwind
+  ret void
+}
+
+declare void @f17(i32, float, i32, i32)
+
+; $4, $5, $6, $7
+; CHECK: addiu $4, $zero, 82
+; CHECK: addiu $5, $zero, 93
+; CHECK: ori $6, $2, 0
+; CHECK: addiu $7, $zero, 45
+define void @testlowercall18() nounwind {
+entry:
+  tail call void @f18(i32 82, i32 93, float 4.000000e+01, i32 45) nounwind
+  ret void
+}
+
+declare void @f18(i32, i32, float, i32)
+
+
+; $4, ($6, $7), stack
+; CHECK: sw  $2, 16($sp)
+; CHECK: sw  $zero, 20($sp)
+; CHECK: addiu $4, $zero, 92
+; CHECK: addiu $6, $zero, 0
+; CHECK: ori $7, $3, 0
+define void @testlowercall20() nounwind {
+entry:
+  tail call void @f20(i32 92, double 2.600000e+01, double 4.700000e+01) nounwind
+  ret void
+}
+
+declare void @f20(i32, double, double)
+
+; $f12, $5
+; CHECK: lwc1 $f12, %lo
+; CHECK: addiu $5, $zero, 103
+define void @testlowercall21() nounwind {
+entry:
+  tail call void @f21(float 5.800000e+01, i32 103) nounwind
+  ret void
+}
+
+declare void @f21(float, i32)
+
+; $f12, $5, ($6, $7)
+; CHECK: lwc1 $f12, %lo
+; CHECK: addiu $5, $zero, 113
+; CHECK: addiu $6, $zero, 0
+; CHECK: ori $7, $3, 32768
+define void @testlowercall22() nounwind {
+entry:
+  tail call void @f22(float 6.800000e+01, i32 113, double 5.700000e+01) nounwind
+  ret void
+}
+
+declare void @f22(float, i32, double)
+
+; $f12, f6
+; CHECK: ldc1 $f12, %lo
+; CHECK: addiu $6, $zero, 123
+define void @testlowercall23() nounwind {
+entry:
+  tail call void @f23(double 4.500000e+01, i32 123) nounwind
+  ret void
+}
+
+declare void @f23(double, i32)
+
+; $f12,$6, stack
+; CHECK: sw  $2, 16($sp)
+; CHECK: sw  $zero, 20($sp)
+; CHECK: ldc1 $f12, %lo
+; CHECK: addiu $6, $zero, 133
+define void @testlowercall24() nounwind {
+entry:
+  tail call void @f24(double 5.500000e+01, i32 133, double 6.700000e+01) nounwind
+  ret void
+}
+
+declare void @f24(double, i32, double)
+
+; CHECK: lwc1 $f12, %lo
+; lwc1 $f12, %lo
+; CHECK: lwc1 $f14, %lo
+; CHECK: ori $6, $4, 0
+; CHECK: ori $7, $5, 0
+; CHECK: lwc1 $f12, %lo
+; CHECK: addiu $5, $zero, 83
+; CHECK: ori $6, $3, 0
+; CHECK: addiu $7, $zero, 25
+; CHECK: addiu $4, $zero, 82
+; CHECK: addiu $5, $zero, 93
+; CHECK: ori $6, $2, 0
+; CHECK: addiu $7, $zero, 45
+define void @testlowercall25() nounwind {
+entry:
+  tail call void @f12(float 2.800000e+01, float 1.900000e+01, float 1.000000e+01, float 2.100000e+01) nounwind
+  tail call void @f13(float 3.800000e+01, i32 83, float 2.000000e+01, i32 25) nounwind
+  tail call void @f18(i32 82, i32 93, float 4.000000e+01, i32 45) nounwind
+  ret void
+}
diff --git a/test/CodeGen/Mips/rotate.ll b/test/CodeGen/Mips/rotate.ll
new file mode 100644
index 000000000000..e7dc30932144
--- /dev/null
+++ b/test/CodeGen/Mips/rotate.ll
@@ -0,0 +1,40 @@
+; RUN: llc -march=mips -mcpu=4ke < %s | FileCheck %s
+
+; CHECK:  rotrv $2, $4, $2
+define i32 @rot0(i32 %a, i32 %b) nounwind readnone {
+entry:
+  %shl = shl i32 %a, %b
+  %sub = sub i32 32, %b
+  %shr = lshr i32 %a, %sub
+  %or = or i32 %shr, %shl
+  ret i32 %or
+}
+
+; CHECK:  rotr  $2, $4, 22
+define i32 @rot1(i32 %a) nounwind readnone {
+entry:
+  %shl = shl i32 %a, 10
+  %shr = lshr i32 %a, 22
+  %or = or i32 %shl, %shr
+  ret i32 %or
+}
+
+; CHECK:  rotrv $2, $4, $5
+define i32 @rot2(i32 %a, i32 %b) nounwind readnone {
+entry:
+  %shr = lshr i32 %a, %b
+  %sub = sub i32 32, %b
+  %shl = shl i32 %a, %sub
+  %or = or i32 %shl, %shr
+  ret i32 %or
+}
+
+; CHECK:  rotr  $2, $4, 10
+define i32 @rot3(i32 %a) nounwind readnone {
+entry:
+  %shr = lshr i32 %a, 10
+  %shl = shl i32 %a, 22
+  %or = or i32 %shr, %shl
+  ret i32 %or
+}
+
diff --git a/test/CodeGen/PIC16/2009-07-17-PR4566-pic16.ll b/test/CodeGen/PIC16/2009-07-17-PR4566-pic16.ll
deleted file mode 100644
index 5b5e11f2df0c..000000000000
--- a/test/CodeGen/PIC16/2009-07-17-PR4566-pic16.ll
+++ /dev/null
@@ -1,32 +0,0 @@
-; RUN: llc < %s -march=pic16 | FileCheck %s
-; XFAIL: vg_leak
-
-target datalayout = "e-p:16:8:8-i8:8:8-i16:8:8-i32:8:8-f32:32:32"
-target triple = "pic16-"
-@i = global i32 -10, align 1		; <i32*> [#uses=1]
-@j = global i32 -20, align 1		; <i32*> [#uses=1]
-@pc = global i8* inttoptr (i64 160 to i8*), align 1		; <i8**> [#uses=3]
-@main.auto.k = internal global i32 0		; <i32*> [#uses=2]
-
-define void @main() nounwind {
-entry:
-	%tmp = load i32* @i		; <i32> [#uses=1]
-	%tmp1 = load i32* @j		; <i32> [#uses=1]
-	%add = add i32 %tmp, %tmp1		; <i32> [#uses=1]
-	store i32 %add, i32* @main.auto.k
-	%tmp2 = load i32* @main.auto.k		; <i32> [#uses=1]
-	%add3 = add i32 %tmp2, 32		; <i32> [#uses=1]
-	%conv = trunc i32 %add3 to i8		; <i8> [#uses=1]
-	%tmp4 = load i8** @pc		; <i8*> [#uses=1]
-	store i8 %conv, i8* %tmp4
-	%tmp5 = load i8** @pc		; <i8*> [#uses=1]
-	%tmp6 = load i8* %tmp5		; <i8> [#uses=1]
-	%conv7 = sext i8 %tmp6 to i16		; <i16> [#uses=1]
-	%sub = sub i16 %conv7, 1		; <i16> [#uses=1]
-	%conv8 = trunc i16 %sub to i8		; <i8> [#uses=1]
-	%tmp9 = load i8** @pc		; <i8*> [#uses=1]
-	store i8 %conv8, i8* %tmp9
-	ret void
-}
-
-; CHECK: movf @i + 0, W
diff --git a/test/CodeGen/PIC16/2009-11-20-NewNode.ll b/test/CodeGen/PIC16/2009-11-20-NewNode.ll
deleted file mode 100644
index d68f0f41c4a5..000000000000
--- a/test/CodeGen/PIC16/2009-11-20-NewNode.ll
+++ /dev/null
@@ -1,36 +0,0 @@
-; RUN: llc -march=pic16 < %s
-; PR5558
-
-define i64 @_strtoll_r(i16 %base) nounwind {
-entry:
-  br i1 undef, label %if.then, label %if.end27
-
-if.then:                                          ; preds = %do.end
-  br label %if.end27
-
-if.end27:                                         ; preds = %if.then, %do.end
-  %cond66 = select i1 undef, i64 -9223372036854775808, i64 9223372036854775807 ; <i64> [#uses=3]
-  %conv69 = sext i16 %base to i64                 ; <i64> [#uses=1]
-  %div = udiv i64 %cond66, %conv69                ; <i64> [#uses=1]
-  br label %for.cond
-
-for.cond:                                         ; preds = %if.end116, %if.end27
-  br i1 undef, label %if.then152, label %if.then93
-
-if.then93:                                        ; preds = %for.cond
-  br i1 undef, label %if.end116, label %if.then152
-
-if.end116:                                        ; preds = %if.then93
-  %cmp123 = icmp ugt i64 undef, %div              ; <i1> [#uses=1]
-  %or.cond = or i1 undef, %cmp123                 ; <i1> [#uses=0]
-  br label %for.cond
-
-if.then152:                                       ; preds = %if.then93, %for.cond
-  br i1 undef, label %if.end182, label %if.then172
-
-if.then172:                                       ; preds = %if.then152
-  ret i64 %cond66
-
-if.end182:                                        ; preds = %if.then152
-  ret i64 %cond66
-}
diff --git a/test/CodeGen/PIC16/C16-11.ll b/test/CodeGen/PIC16/C16-11.ll
deleted file mode 100644
index 8a5a0ac11f75..000000000000
--- a/test/CodeGen/PIC16/C16-11.ll
+++ /dev/null
@@ -1,40 +0,0 @@
-; RUN: llc < %s -march=pic16
-; XFAIL: *
-; This fails because PIC16 doesn't define a (xor reg, reg) pattern.
-; 
-
-@c612.auto.a.b = internal global i1 false         ; <i1*> [#uses=2]
-@c612.auto.A.b = internal global i1 false         ; <i1*> [#uses=2]
-
-define void @c612() nounwind {
-entry:
-  %tmp3.b = load i1* @c612.auto.a.b               ; <i1> [#uses=1]
-  %tmp3 = zext i1 %tmp3.b to i16                  ; <i16> [#uses=1]
-  %tmp4.b = load i1* @c612.auto.A.b               ; <i1> [#uses=1]
-  %tmp4 = select i1 %tmp4.b, i16 2, i16 0         ; <i16> [#uses=1]
-  %cmp5 = icmp ne i16 %tmp3, %tmp4                ; <i1> [#uses=1]
-  %conv7 = zext i1 %cmp5 to i8                    ; <i8> [#uses=1]
-  tail call void @expectWrap(i8 %conv7, i8 2)
-  ret void
-}
-
-define void @expectWrap(i8 %boolresult, i8 %errCode) nounwind {
-entry:
-  %tobool = icmp eq i8 %boolresult, 0             ; <i1> [#uses=1]
-  br i1 %tobool, label %if.then, label %if.end
-
-if.then:                                          ; preds = %entry
-  tail call void @exit(i16 1)
-  unreachable
-
-if.end:                                           ; preds = %entry
-  ret void
-}
-
-define i16 @main() nounwind {
-entry:
-  tail call void @c612()
-  ret i16 0
-}
-
-declare void @exit(i16) noreturn nounwind
diff --git a/test/CodeGen/PIC16/C16-15.ll b/test/CodeGen/PIC16/C16-15.ll
deleted file mode 100644
index 020b0dd6743e..000000000000
--- a/test/CodeGen/PIC16/C16-15.ll
+++ /dev/null
@@ -1,45 +0,0 @@
-; RUN: llc < %s -march=pic16 | grep "extern" | grep "@.lib.unordered.f32" | count 3
-; XFAIL: vg_leak
-
-@pc = global i8* inttoptr (i64 160 to i8*), align 1 ; <i8**> [#uses=2]
-@aa = common global i16 0, align 1                ; <i16*> [#uses=0]
-@c6214.auto.d = internal global float 0.000000e+00, align 4 ; <float*> [#uses=1]
-@c6214.auto.l = internal global float 0.000000e+00, align 4 ; <float*> [#uses=1]
-
-define float @dvalue(float %f) nounwind {
-entry:
-  ret float %f
-}
-
-define void @_assert(i16 %line, i16 %result) nounwind {
-entry:
-  %add = add i16 %line, %result                   ; <i16> [#uses=1]
-  %conv = trunc i16 %add to i8                    ; <i8> [#uses=1]
-  %tmp2 = load i8** @pc                           ; <i8*> [#uses=1]
-  store i8 %conv, i8* %tmp2
-  ret void
-}
-
-define i16 @main() nounwind {
-entry:
-  %retval = alloca i16, align 1                   ; <i16*> [#uses=2]
-  store i16 0, i16* %retval
-  call void @c6214()
-  %0 = load i16* %retval                          ; <i16> [#uses=1]
-  ret i16 %0
-}
-
-define internal void @c6214() nounwind {
-entry:
-  %call = call float @dvalue(float 0x3FF3C0CA40000000) ; <float> [#uses=3]
-  store float %call, float* @c6214.auto.d
-  store float %call, float* @c6214.auto.l
-  %cmp = fcmp ord float %call, 0.000000e+00       ; <i1> [#uses=1]
-  %conv = zext i1 %cmp to i16                     ; <i16> [#uses=1]
-  call void @_assert(i16 10, i16 %conv)
-  %tmp3 = load i8** @pc                           ; <i8*> [#uses=2]
-  %tmp4 = load i8* %tmp3                          ; <i8> [#uses=1]
-  %sub = add i8 %tmp4, -10                        ; <i8> [#uses=1]
-  store i8 %sub, i8* %tmp3
-  ret void
-}
diff --git a/test/CodeGen/PIC16/C16-49.ll b/test/CodeGen/PIC16/C16-49.ll
deleted file mode 100644
index e59800b9a926..000000000000
--- a/test/CodeGen/PIC16/C16-49.ll
+++ /dev/null
@@ -1,15 +0,0 @@
-;RUN: llvm-as < %s | llc -march=pic16
-
-@aa = global i16 55, align 1                      ; <i16*> [#uses=1]
-@bb = global i16 44, align 1                      ; <i16*> [#uses=1]
-@PORTD = external global i8                       ; <i8*> [#uses=1]
-
-define void @foo() nounwind {
-entry:
-  %tmp = volatile load i16* @aa                   ; <i16> [#uses=1]
-  %tmp1 = volatile load i16* @bb                  ; <i16> [#uses=1]
-  %sub = sub i16 %tmp, %tmp1                      ; <i16> [#uses=1]
-  %conv = trunc i16 %sub to i8                    ; <i8> [#uses=1]
-  store i8 %conv, i8* @PORTD
-  ret void
-}
diff --git a/test/CodeGen/PIC16/check_inc_files.ll b/test/CodeGen/PIC16/check_inc_files.ll
deleted file mode 100644
index 436d41607374..000000000000
--- a/test/CodeGen/PIC16/check_inc_files.ll
+++ /dev/null
@@ -1,9 +0,0 @@
-; RUN: llvm-as < %s | llc -march=pic16 | FileCheck %s 
-
-;CHECK: #include p16f1xxx.inc
-;CHECK: #include stdmacros.inc
-
-define void @foo() nounwind {
-entry:
-  ret void
-}
diff --git a/test/CodeGen/PIC16/global-in-user-section.ll b/test/CodeGen/PIC16/global-in-user-section.ll
deleted file mode 100644
index 6cdb64864ad5..000000000000
--- a/test/CodeGen/PIC16/global-in-user-section.ll
+++ /dev/null
@@ -1,6 +0,0 @@
-; RUN: llc < %s -march=pic16 | FileCheck %s
-; XFAIL: vg_leak
-
-@G1 = common global i16 0, section "usersection", align 1 
-; CHECK: usersection UDATA
-; CHECK: @G1 RES 2 
diff --git a/test/CodeGen/PIC16/globals.ll b/test/CodeGen/PIC16/globals.ll
deleted file mode 100644
index 3ee2e25265d3..000000000000
--- a/test/CodeGen/PIC16/globals.ll
+++ /dev/null
@@ -1,18 +0,0 @@
-; RUN: llc < %s -march=pic16 | FileCheck %s
-; XFAIL: vg_leak
-
-@G1 = global i32 4712, section "Address=412"
-; CHECK: @G1.412..user_section.#	IDATA	412
-; CHECK: @G1
-; CHECK:     dl 4712
-
-@G2 = global i32 0, section "Address=412"
-; CHECK: @G2.412..user_section.#	UDATA	412
-; CHECK: @G2 RES 4
-
-@G3 = addrspace(1) constant i32 4712, section "Address=412"
-; CHECK: @G3.412..user_section.#	ROMDATA	412
-; CHECK: @G3
-; CHECK:     rom_dl 4712
-
-
diff --git a/test/CodeGen/PIC16/result_direction.ll b/test/CodeGen/PIC16/result_direction.ll
deleted file mode 100644
index 8549e21b3333..000000000000
--- a/test/CodeGen/PIC16/result_direction.ll
+++ /dev/null
@@ -1,13 +0,0 @@
-; RUN: llvm-as < %s | llc -march=pic16 | FileCheck %s
-
-@a = common global i16 0, align 1                 ; <i16*> [#uses=2]
-
-define void @foo() nounwind {
-entry:
-  %tmp = load i16* @a                             ; <i16> [#uses=1]
-  %add = add nsw i16 %tmp, 1                      ; <i16> [#uses=1]
-  store i16 %add, i16* @a
-;CHECK: movlw 1
-;CHECK: addwf @a + 0, F
-  ret void
-}
diff --git a/test/CodeGen/PIC16/sext.ll b/test/CodeGen/PIC16/sext.ll
deleted file mode 100644
index e51a54287ce6..000000000000
--- a/test/CodeGen/PIC16/sext.ll
+++ /dev/null
@@ -1,11 +0,0 @@
-; RUN: llc < %s -march=pic16
-; XFAIL: vg_leak
-
-@main.auto.c = internal global i8 0		; <i8*> [#uses=1]
-
-define i16 @main() nounwind {
-entry:
-	%tmp = load i8* @main.auto.c		; <i8> [#uses=1]
-	%conv = sext i8 %tmp to i16		; <i16> [#uses=1]
-	ret i16 %conv
-}
diff --git a/test/CodeGen/PIC16/test_indf_name.ll b/test/CodeGen/PIC16/test_indf_name.ll
deleted file mode 100644
index d52fc1125d7c..000000000000
--- a/test/CodeGen/PIC16/test_indf_name.ll
+++ /dev/null
@@ -1,12 +0,0 @@
-; RUN: llvm-as < %s | llc -march=pic16 | FileCheck %s
-
-@pi = common global i16* null, align 1            ; <i16**> [#uses=1]
-
-define void @foo() nounwind {
-entry:
-  %tmp = load i16** @pi                           ; <i16*> [#uses=1]
-  store i16 1, i16* %tmp
-; CHECK: movwi {{[0-1]}}[INDF{{[0-1]}}]
-; CHECK: movwi {{[0-1]}}[INDF{{[0-1]}}]
-  ret void
-}
diff --git a/test/CodeGen/PTX/add.ll b/test/CodeGen/PTX/add.ll
new file mode 100644
index 000000000000..1259d03e96c9
--- /dev/null
+++ b/test/CodeGen/PTX/add.ll
@@ -0,0 +1,15 @@
+; RUN: llc < %s -march=ptx | FileCheck %s
+
+define ptx_device i32 @t1(i32 %x, i32 %y) {
+; CHECK: add.s32 r0, r1, r2;
+	%z = add i32 %x, %y
+; CHECK: ret;
+	ret i32 %z
+}
+
+define ptx_device i32 @t2(i32 %x) {
+; CHECK: add.s32 r0, r1, 1;
+	%z = add i32 %x, 1
+; CHECK: ret;
+	ret i32 %z
+}
diff --git a/test/CodeGen/PIC16/dg.exp b/test/CodeGen/PTX/dg.exp
index b08b9858e048..2c304b57741e 100644
--- a/test/CodeGen/PIC16/dg.exp
+++ b/test/CodeGen/PTX/dg.exp
@@ -1,5 +1,5 @@
 load_lib llvm.exp
 
-if { [llvm_supports_target PIC16] } {
+if { [llvm_supports_target PTX] } {
   RunLLVMTests [lsort [glob -nocomplain $srcdir/$subdir/*.{ll,c,cpp}]]
 }
diff --git a/test/CodeGen/PTX/exit.ll b/test/CodeGen/PTX/exit.ll
new file mode 100644
index 000000000000..4071babb80ce
--- /dev/null
+++ b/test/CodeGen/PTX/exit.ll
@@ -0,0 +1,14 @@
+; RUN: llc < %s -march=ptx | FileCheck %s
+
+define ptx_kernel void @t1() {
+; CHECK: exit;
+; CHECK-NOT: ret;
+  ret void
+}
+
+define ptx_kernel void @t2(i32* %p, i32 %x) {
+  store i32 %x, i32* %p
+; CHECK: exit;
+; CHECK-NOT: ret;
+  ret void
+}
diff --git a/test/CodeGen/PTX/ld.ll b/test/CodeGen/PTX/ld.ll
new file mode 100644
index 000000000000..836c4d41045a
--- /dev/null
+++ b/test/CodeGen/PTX/ld.ll
@@ -0,0 +1,78 @@
+; RUN: llc < %s -march=ptx | FileCheck %s
+
+;CHECK: .extern .global .s32 array[];
+@array = external global [10 x i32]
+
+;CHECK: .extern .const .s32 array_constant[];
+@array_constant = external addrspace(1) constant [10 x i32]
+
+;CHECK: .extern .local .s32 array_local[];
+@array_local = external addrspace(2) global [10 x i32]
+
+;CHECK: .extern .shared .s32 array_shared[];
+@array_shared = external addrspace(4) global [10 x i32]
+
+define ptx_device i32 @t1(i32* %p) {
+entry:
+;CHECK: ld.global.s32 r0, [r1];
+  %x = load i32* %p
+  ret i32 %x
+}
+
+define ptx_device i32 @t2(i32* %p) {
+entry:
+;CHECK: ld.global.s32 r0, [r1+4];
+  %i = getelementptr i32* %p, i32 1
+  %x = load i32* %i
+  ret i32 %x
+}
+
+define ptx_device i32 @t3(i32* %p, i32 %q) {
+entry:
+;CHECK: shl.b32 r0, r2, 2;
+;CHECK: add.s32 r0, r1, r0;
+;CHECK: ld.global.s32 r0, [r0];
+  %i = getelementptr i32* %p, i32 %q
+  %x = load i32* %i
+  ret i32 %x
+}
+
+define ptx_device i32 @t4_global() {
+entry:
+;CHECK: ld.global.s32 r0, [array];
+  %i = getelementptr [10 x i32]* @array, i32 0, i32 0
+  %x = load i32* %i
+  ret i32 %x
+}
+
+define ptx_device i32 @t4_const() {
+entry:
+;CHECK: ld.const.s32 r0, [array_constant];
+  %i = getelementptr [10 x i32] addrspace(1)* @array_constant, i32 0, i32 0
+  %x = load i32 addrspace(1)* %i
+  ret i32 %x
+}
+
+define ptx_device i32 @t4_local() {
+entry:
+;CHECK: ld.local.s32 r0, [array_local];
+  %i = getelementptr [10 x i32] addrspace(2)* @array_local, i32 0, i32 0
+  %x = load i32 addrspace(2)* %i
+  ret i32 %x
+}
+
+define ptx_device i32 @t4_shared() {
+entry:
+;CHECK: ld.shared.s32 r0, [array_shared];
+  %i = getelementptr [10 x i32] addrspace(4)* @array_shared, i32 0, i32 0
+  %x = load i32 addrspace(4)* %i
+  ret i32 %x
+}
+
+define ptx_device i32 @t5() {
+entry:
+;CHECK: ld.global.s32 r0, [array+4];
+  %i = getelementptr [10 x i32]* @array, i32 0, i32 1
+  %x = load i32* %i
+  ret i32 %x
+}
diff --git a/test/CodeGen/PTX/mov.ll b/test/CodeGen/PTX/mov.ll
new file mode 100644
index 000000000000..c365e9beb897
--- /dev/null
+++ b/test/CodeGen/PTX/mov.ll
@@ -0,0 +1,13 @@
+; RUN: llc < %s -march=ptx | FileCheck %s
+
+define ptx_device i32 @t1() {
+; CHECK: mov.s32 r0, 0;
+; CHECK: ret;
+	ret i32 0
+}
+
+define ptx_device i32 @t2(i32 %x) {
+; CHECK: mov.s32 r0, r1;
+; CHECK: ret;
+	ret i32 %x
+}
diff --git a/test/CodeGen/PTX/options.ll b/test/CodeGen/PTX/options.ll
new file mode 100644
index 000000000000..a14d5c9c27ba
--- /dev/null
+++ b/test/CodeGen/PTX/options.ll
@@ -0,0 +1,6 @@
+; RUN: llc < %s -march=ptx -ptx-version=2.0 | grep ".version 2.0"
+; RUN: llc < %s -march=ptx -ptx-target=sm_20 | grep ".target sm_20"
+
+define ptx_device void @t1() {
+	ret void
+}
diff --git a/test/CodeGen/PTX/ret.ll b/test/CodeGen/PTX/ret.ll
new file mode 100644
index 000000000000..d5037f25fd36
--- /dev/null
+++ b/test/CodeGen/PTX/ret.ll
@@ -0,0 +1,7 @@
+; RUN: llc < %s -march=ptx | FileCheck %s
+
+define ptx_device void @t1() {
+; CHECK: ret;
+; CHECK-NOT: exit;
+	ret void
+}
diff --git a/test/CodeGen/PTX/shl.ll b/test/CodeGen/PTX/shl.ll
new file mode 100644
index 000000000000..b564b43ab932
--- /dev/null
+++ b/test/CodeGen/PTX/shl.ll
@@ -0,0 +1,22 @@
+; RUN: llc < %s -march=ptx | FileCheck %s
+
+define ptx_device i32 @t1(i32 %x, i32 %y) {
+; CHECK: shl.b32 r0, r1, r2
+	%z = shl i32 %x, %y
+; CHECK: ret;
+	ret i32 %z
+}
+
+define ptx_device i32 @t2(i32 %x) {
+; CHECK: shl.b32 r0, r1, 3
+	%z = shl i32 %x, 3
+; CHECK: ret;
+	ret i32 %z
+}
+
+define ptx_device i32 @t3(i32 %x) {
+; CHECK: shl.b32 r0, 3, r1
+	%z = shl i32 3, %x
+; CHECK: ret;
+	ret i32 %z
+}
diff --git a/test/CodeGen/PTX/shr.ll b/test/CodeGen/PTX/shr.ll
new file mode 100644
index 000000000000..3f8ade862b75
--- /dev/null
+++ b/test/CodeGen/PTX/shr.ll
@@ -0,0 +1,43 @@
+; RUN: llc < %s -march=ptx | FileCheck %s
+
+define ptx_device i32 @t1(i32 %x, i32 %y) {
+; CHECK: shr.u32 r0, r1, r2
+	%z = lshr i32 %x, %y
+; CHECK: ret;
+	ret i32 %z
+}
+
+define ptx_device i32 @t2(i32 %x) {
+; CHECK: shr.u32 r0, r1, 3
+	%z = lshr i32 %x, 3
+; CHECK: ret;
+	ret i32 %z
+}
+
+define ptx_device i32 @t3(i32 %x) {
+; CHECK: shr.u32 r0, 3, r1
+	%z = lshr i32 3, %x
+; CHECK: ret;
+	ret i32 %z
+}
+
+define ptx_device i32 @t4(i32 %x, i32 %y) {
+; CHECK: shr.s32 r0, r1, r2
+	%z = ashr i32 %x, %y
+; CHECK: ret;
+	ret i32 %z
+}
+
+define ptx_device i32 @t5(i32 %x) {
+; CHECK: shr.s32 r0, r1, 3
+	%z = ashr i32 %x, 3
+; CHECK: ret;
+	ret i32 %z
+}
+
+define ptx_device i32 @t6(i32 %x) {
+; CHECK: shr.s32 r0, -3, r1
+	%z = ashr i32 -3, %x
+; CHECK: ret;
+	ret i32 %z
+}
diff --git a/test/CodeGen/PTX/st.ll b/test/CodeGen/PTX/st.ll
new file mode 100644
index 000000000000..2cbacb9ee59c
--- /dev/null
+++ b/test/CodeGen/PTX/st.ll
@@ -0,0 +1,71 @@
+; RUN: llc < %s -march=ptx | FileCheck %s
+
+;CHECK: .extern .global .s32 array[];
+@array = external global [10 x i32]
+
+;CHECK: .extern .const .s32 array_constant[];
+@array_constant = external addrspace(1) constant [10 x i32]
+
+;CHECK: .extern .local .s32 array_local[];
+@array_local = external addrspace(2) global [10 x i32]
+
+;CHECK: .extern .shared .s32 array_shared[];
+@array_shared = external addrspace(4) global [10 x i32]
+
+define ptx_device void @t1(i32* %p, i32 %x) {
+entry:
+;CHECK: st.global.s32 [r1], r2;
+  store i32 %x, i32* %p
+  ret void
+}
+
+define ptx_device void @t2(i32* %p, i32 %x) {
+entry:
+;CHECK: st.global.s32 [r1+4], r2;
+  %i = getelementptr i32* %p, i32 1
+  store i32 %x, i32* %i
+  ret void
+}
+
+define ptx_device void @t3(i32* %p, i32 %q, i32 %x) {
+;CHECK: .reg .s32 r0;
+entry:
+;CHECK: shl.b32 r0, r2, 2;
+;CHECK: add.s32 r0, r1, r0;
+;CHECK: st.global.s32 [r0], r3;
+  %i = getelementptr i32* %p, i32 %q
+  store i32 %x, i32* %i
+  ret void
+}
+
+define ptx_device void @t4_global(i32 %x) {
+entry:
+;CHECK: st.global.s32 [array], r1;
+  %i = getelementptr [10 x i32]* @array, i32 0, i32 0
+  store i32 %x, i32* %i
+  ret void
+}
+
+define ptx_device void @t4_local(i32 %x) {
+entry:
+;CHECK: st.local.s32 [array_local], r1;
+  %i = getelementptr [10 x i32] addrspace(2)* @array_local, i32 0, i32 0
+  store i32 %x, i32 addrspace(2)* %i
+  ret void
+}
+
+define ptx_device void @t4_shared(i32 %x) {
+entry:
+;CHECK: st.shared.s32 [array_shared], r1;
+  %i = getelementptr [10 x i32] addrspace(4)* @array_shared, i32 0, i32 0
+  store i32 %x, i32 addrspace(4)* %i
+  ret void
+}
+
+define ptx_device void @t5(i32 %x) {
+entry:
+;CHECK: st.global.s32 [array+4], r1;
+  %i = getelementptr [10 x i32]* @array, i32 0, i32 1
+  store i32 %x, i32* %i
+  ret void
+}
diff --git a/test/CodeGen/PTX/sub.ll b/test/CodeGen/PTX/sub.ll
new file mode 100644
index 000000000000..aab3fdadad13
--- /dev/null
+++ b/test/CodeGen/PTX/sub.ll
@@ -0,0 +1,15 @@
+; RUN: llc < %s -march=ptx | FileCheck %s
+
+define ptx_device i32 @t1(i32 %x, i32 %y) {
+;CHECK: sub.s32 r0, r1, r2;
+	%z = sub i32 %x, %y
+;CHECK: ret;
+	ret i32 %z
+}
+
+define ptx_device i32 @t2(i32 %x) {
+;CHECK: add.s32 r0, r1, -1;
+	%z = sub i32 %x, 1
+;CHECK: ret;
+	ret i32 %z
+}
diff --git a/test/CodeGen/PowerPC/2007-03-24-cntlzd.ll b/test/CodeGen/PowerPC/2007-03-24-cntlzd.ll
index e93395a67ec6..cca9e658ad5f 100644
--- a/test/CodeGen/PowerPC/2007-03-24-cntlzd.ll
+++ b/test/CodeGen/PowerPC/2007-03-24-cntlzd.ll
@@ -1,6 +1,6 @@
 ; RUN: llc < %s -march=ppc64 -mcpu=g5 | grep cntlzd
 
-define i32 @_ZNK4llvm5APInt17countLeadingZerosEv(i64 *%t) {
+define i32 @_ZNK4llvm5APInt17countLeadingZerosEv(i64 *%t) nounwind {
         %tmp19 = load i64* %t
         %tmp22 = tail call i64 @llvm.ctlz.i64( i64 %tmp19 )             ; <i64> [#uses=1]
         %tmp23 = trunc i64 %tmp22 to i32
diff --git a/test/CodeGen/PowerPC/2010-04-07-DbgValueOtherTargets.ll b/test/CodeGen/PowerPC/2010-04-07-DbgValueOtherTargets.ll
index f48f32f8fb17..4a850984a909 100644
--- a/test/CodeGen/PowerPC/2010-04-07-DbgValueOtherTargets.ll
+++ b/test/CodeGen/PowerPC/2010-04-07-DbgValueOtherTargets.ll
@@ -1,33 +1,28 @@
 ; RUN: llc -O0 -march=ppc32 -asm-verbose < %s | FileCheck %s
 ; Check that DEBUG_VALUE comments come through on a variety of targets.
 
-%tart.reflect.ComplexType = type { double, double }
-
-@.type.SwitchStmtTest = constant %tart.reflect.ComplexType { double 3.0, double 2.0 }
-
-define i32 @"main(tart.core.String[])->int32"(i32 %args) {
+define i32 @main() nounwind ssp {
 entry:
 ; CHECK: DEBUG_VALUE
-  tail call void @llvm.dbg.value(metadata !14, i64 0, metadata !8)
-  tail call void @"tart.reflect.ComplexType.create->tart.core.Object"(%tart.reflect.ComplexType* @.type.SwitchStmtTest) ; <%tart.core.Object*> [#uses=2]
-  ret i32 3
+  call void @llvm.dbg.value(metadata !6, i64 0, metadata !7), !dbg !9
+  ret i32 0, !dbg !10
 }
 
+declare void @llvm.dbg.declare(metadata, metadata) nounwind readnone
+
 declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
-declare void @"tart.reflect.ComplexType.create->tart.core.Object"(%tart.reflect.ComplexType*) nounwind readnone
 
-!0 = metadata !{i32 458769, i32 0, i32 1, metadata !"sm.c", metadata !"/Volumes/MacOS9/tests/", metadata !"4.2.1 (Based on Apple Inc. build 5658) (LLVM build)", i1 true, i1 false, metadata !"", i32 0} ; [ DW_TAG_compile_unit ]
-!1 = metadata !{i32 458790, metadata !0, metadata !"", metadata !0, i32 0, i64 192, i64 64, i64 0, i32 0, metadata !2} ; [ DW_TAG_const_type ]
-!2 = metadata !{i32 458771, metadata !0, metadata !"C", metadata !0, i32 1, i64 192, i64 64, i64 0, i32 0, null, metadata !3, i32 0, null} ; [ DW_TAG_structure_type ]
-!3 = metadata !{metadata !4, metadata !6, metadata !7}
-!4 = metadata !{i32 458765, metadata !2, metadata !"x", metadata !0, i32 1, i64 64, i64 64, i64 0, i32 0, metadata !5} ; [ DW_TAG_member ]
-!5 = metadata !{i32 458788, metadata !0, metadata !"double", metadata !0, i32 0, i64 64, i64 64, i64 0, i32 0, i32 4} ; [ DW_TAG_base_type ]
-!6 = metadata !{i32 458765, metadata !2, metadata !"y", metadata !0, i32 1, i64 64, i64 64, i64 64, i32 0, metadata !5} ; [ DW_TAG_member ]
-!7 = metadata !{i32 458765, metadata !2, metadata !"z", metadata !0, i32 1, i64 64, i64 64, i64 128, i32 0, metadata !5} ; [ DW_TAG_member ]
-!8 = metadata !{i32 459008, metadata !9, metadata !"t", metadata !0, i32 5, metadata !2} ; [ DW_TAG_auto_variable ]
-!9 = metadata !{i32 458763, metadata !10}        ; [ DW_TAG_lexical_block ]
-!10 = metadata !{i32 458798, i32 0, metadata !0, metadata !"foo", metadata !"foo", metadata !"foo", metadata !0, i32 4, metadata !11, i1 false, i1 true, i32 0, i32 0, null} ; [ DW_TAG_subprogram ]
-!11 = metadata !{i32 458773, metadata !0, metadata !"", metadata !0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !12, i32 0, null} ; [ DW_TAG_subroutine_type ]
-!12 = metadata !{metadata !13}
-!13 = metadata !{i32 458788, metadata !0, metadata !"int", metadata !0, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
-!14 = metadata !{%tart.reflect.ComplexType* @.type.SwitchStmtTest}
+!llvm.dbg.sp = !{!0}
+
+!0 = metadata !{i32 589870, i32 0, metadata !1, metadata !"main", metadata !"main", metadata !"", metadata !1, i32 2, metadata !3, i1 false, i1 true, i32 0, i32 0, null, i32 0, i1 false, i32 ()* @main} ; [ DW_TAG_subprogram ]
+!1 = metadata !{i32 589865, metadata !"/tmp/x.c", metadata !"/Users/manav", metadata !2} ; [ DW_TAG_file_type ]
+!2 = metadata !{i32 589841, i32 0, i32 12, metadata !"/tmp/x.c", metadata !"/Users/manav", metadata !"clang version 2.9 (trunk 120996)", i1 true, i1 false, metadata !"", i32 0} ; [ DW_TAG_compile_unit ]
+!3 = metadata !{i32 589845, metadata !1, metadata !"", metadata !1, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !4, i32 0, null} ; [ DW_TAG_subroutine_type ]
+!4 = metadata !{metadata !5}
+!5 = metadata !{i32 589860, metadata !2, metadata !"int", metadata !1, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
+!6 = metadata !{i32 0}
+!7 = metadata !{i32 590080, metadata !8, metadata !"i", metadata !1, i32 3, metadata !5, i32 0} ; [ DW_TAG_auto_variable ]
+!8 = metadata !{i32 589835, metadata !0, i32 2, i32 12, metadata !1, i32 0} ; [ DW_TAG_lexical_block ]
+!9 = metadata !{i32 3, i32 11, metadata !8, null}
+!10 = metadata !{i32 4, i32 2, metadata !8, null}
+
diff --git a/test/CodeGen/PowerPC/2010-10-11-Fast-Varargs.ll b/test/CodeGen/PowerPC/2010-10-11-Fast-Varargs.ll
new file mode 100644
index 000000000000..da77b2878543
--- /dev/null
+++ b/test/CodeGen/PowerPC/2010-10-11-Fast-Varargs.ll
@@ -0,0 +1,16 @@
+; RUN: llc < %s -O0
+; PR8357
+target datalayout = "E-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v128:128:128-n32"
+target triple = "powerpc-unknown-freebsd9.0"
+
+; RegAllocFast requires that each physreg only be used once. The varargs
+; lowering code needs to use virtual registers when storing live-in registers on
+; the stack.
+
+define i32 @testing(i32 %x, float %a, ...) nounwind {
+  %1 = alloca i32, align 4
+  %2 = alloca float, align 4
+  store i32 %x, i32* %1, align 4
+  store float %a, float* %2, align 4
+  ret i32 0
+}
diff --git a/test/CodeGen/PowerPC/2010-12-18-PPCStackRefs.ll b/test/CodeGen/PowerPC/2010-12-18-PPCStackRefs.ll
new file mode 100644
index 000000000000..bf3d577a3677
--- /dev/null
+++ b/test/CodeGen/PowerPC/2010-12-18-PPCStackRefs.ll
@@ -0,0 +1,22 @@
+; RUN: llc -disable-fp-elim < %s | FileCheck %s
+; PR8749
+target datalayout = "E-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f128:64:128-n32"
+target triple = "powerpc-apple-darwin9.8"
+
+define i32 @main() nounwind {
+entry:
+; Make sure we're generating references using the red zone
+; CHECK: main:
+; CHECK: stw r3, -12(r1)
+  %retval = alloca i32
+  %0 = alloca i32
+  %"alloca point" = bitcast i32 0 to i32
+  store i32 0, i32* %0, align 4
+  %1 = load i32* %0, align 4
+  store i32 %1, i32* %retval, align 4
+  br label %return
+
+return:                                           ; preds = %entry
+  %retval1 = load i32* %retval
+  ret i32 %retval1
+}
diff --git a/test/CodeGen/PowerPC/align.ll b/test/CodeGen/PowerPC/align.ll
index 109a83726e98..0797ca8d0be8 100644
--- a/test/CodeGen/PowerPC/align.ll
+++ b/test/CodeGen/PowerPC/align.ll
@@ -1,5 +1,6 @@
 ; RUN: llc < %s -mtriple=powerpc-linux-gnu | FileCheck %s -check-prefix=ELF
 ; RUN: llc < %s -mtriple=powerpc-apple-darwin9 | FileCheck %s -check-prefix=DARWIN
+; RUN: llc < %s -mtriple=powerpc-apple-darwin8 | FileCheck %s -check-prefix=DARWIN8
 
 @a = global i1 true
 ; no alignment
@@ -40,3 +41,6 @@
 @bar = common global [75 x i8] zeroinitializer, align 128
 ;ELF: .comm bar,75,128
 ;DARWIN: .comm _bar,75,7
+
+;; Darwin8 doesn't support aligned comm.  Just miscompile this.
+; DARWIN8: .comm _bar,75 ;
diff --git a/test/CodeGen/PowerPC/compare-simm.ll b/test/CodeGen/PowerPC/compare-simm.ll
index 5ba050060fcb..92d1dbe902a1 100644
--- a/test/CodeGen/PowerPC/compare-simm.ll
+++ b/test/CodeGen/PowerPC/compare-simm.ll
@@ -1,7 +1,7 @@
 ; RUN: llc < %s -march=ppc32 -mtriple=powerpc-apple-darwin8 | \
 ; RUN:   grep {cmpwi cr0, r3, -1}
 
-define i32 @test(i32 %x) {
+define i32 @test(i32 %x) nounwind {
         %c = icmp eq i32 %x, -1
 	br i1 %c, label %T, label %F
 T:
diff --git a/test/CodeGen/PowerPC/indirectbr.ll b/test/CodeGen/PowerPC/indirectbr.ll
index ab8d9dca5dc4..5122ab39d232 100644
--- a/test/CodeGen/PowerPC/indirectbr.ll
+++ b/test/CodeGen/PowerPC/indirectbr.ll
@@ -43,8 +43,8 @@ L2:                                               ; preds = %L3, %bb2
 
 L1:                                               ; preds = %L2, %bb2
   %res.3 = phi i32 [ %phitmp, %L2 ], [ 2, %bb2 ]  ; <i32> [#uses=1]
-; PIC: addis r4, r4, ha16(Ltmp0-"L0$pb")
-; PIC: li r6, lo16(Ltmp0-"L0$pb")
+; PIC: addis r4, r4, ha16(Ltmp0-L0$pb)
+; PIC: li r6, lo16(Ltmp0-L0$pb)
 ; PIC: add r4, r4, r6
 ; PIC: stw r4
 ; STATIC: li r5, lo16(Ltmp0)
diff --git a/test/CodeGen/PowerPC/mult-alt-generic-powerpc.ll b/test/CodeGen/PowerPC/mult-alt-generic-powerpc.ll
new file mode 100644
index 000000000000..659cdf74d026
--- /dev/null
+++ b/test/CodeGen/PowerPC/mult-alt-generic-powerpc.ll
@@ -0,0 +1,321 @@
+; RUN: llc < %s -march=ppc32
+; ModuleID = 'mult-alt-generic.c'
+target datalayout = "E-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v128:128:128-n32"
+target triple = "powerpc"
+
+@mout0 = common global i32 0, align 4
+@min1 = common global i32 0, align 4
+@marray = common global [2 x i32] zeroinitializer, align 4
+
+define void @single_m() nounwind {
+entry:
+  call void asm "foo $1,$0", "=*m,*m"(i32* @mout0, i32* @min1) nounwind
+  ret void
+}
+
+define void @single_o() nounwind {
+entry:
+  %out0 = alloca i32, align 4
+  %index = alloca i32, align 4
+  store i32 0, i32* %out0, align 4
+  store i32 1, i32* %index, align 4
+  ret void
+}
+
+define void @single_V() nounwind {
+entry:
+  ret void
+}
+
+define void @single_lt() nounwind {
+entry:
+  %out0 = alloca i32, align 4
+  %in1 = alloca i32, align 4
+  store i32 0, i32* %out0, align 4
+  store i32 1, i32* %in1, align 4
+  %tmp = load i32* %in1, align 4
+  %0 = call i32 asm "foo $1,$0", "=r,<r"(i32 %tmp) nounwind
+  store i32 %0, i32* %out0, align 4
+  %tmp1 = load i32* %in1, align 4
+  %1 = call i32 asm "foo $1,$0", "=r,r<"(i32 %tmp1) nounwind
+  store i32 %1, i32* %out0, align 4
+  ret void
+}
+
+define void @single_gt() nounwind {
+entry:
+  %out0 = alloca i32, align 4
+  %in1 = alloca i32, align 4
+  store i32 0, i32* %out0, align 4
+  store i32 1, i32* %in1, align 4
+  %tmp = load i32* %in1, align 4
+  %0 = call i32 asm "foo $1,$0", "=r,>r"(i32 %tmp) nounwind
+  store i32 %0, i32* %out0, align 4
+  %tmp1 = load i32* %in1, align 4
+  %1 = call i32 asm "foo $1,$0", "=r,r>"(i32 %tmp1) nounwind
+  store i32 %1, i32* %out0, align 4
+  ret void
+}
+
+define void @single_r() nounwind {
+entry:
+  %out0 = alloca i32, align 4
+  %in1 = alloca i32, align 4
+  store i32 0, i32* %out0, align 4
+  store i32 1, i32* %in1, align 4
+  %tmp = load i32* %in1, align 4
+  %0 = call i32 asm "foo $1,$0", "=r,r"(i32 %tmp) nounwind
+  store i32 %0, i32* %out0, align 4
+  ret void
+}
+
+define void @single_i() nounwind {
+entry:
+  %out0 = alloca i32, align 4
+  store i32 0, i32* %out0, align 4
+  %0 = call i32 asm "foo $1,$0", "=r,i"(i32 1) nounwind
+  store i32 %0, i32* %out0, align 4
+  ret void
+}
+
+define void @single_n() nounwind {
+entry:
+  %out0 = alloca i32, align 4
+  store i32 0, i32* %out0, align 4
+  %0 = call i32 asm "foo $1,$0", "=r,n"(i32 1) nounwind
+  store i32 %0, i32* %out0, align 4
+  ret void
+}
+
+define void @single_E() nounwind {
+entry:
+  %out0 = alloca double, align 8
+  store double 0.000000e+000, double* %out0, align 8
+; No lowering support.
+;  %0 = call double asm "foo $1,$0", "=r,E"(double 1.000000e+001) nounwind
+;  store double %0, double* %out0, align 8
+  ret void
+}
+
+define void @single_F() nounwind {
+entry:
+  %out0 = alloca double, align 8
+  store double 0.000000e+000, double* %out0, align 8
+; No lowering support.
+;  %0 = call double asm "foo $1,$0", "=r,F"(double 1.000000e+000) nounwind
+;  store double %0, double* %out0, align 8
+  ret void
+}
+
+define void @single_s() nounwind {
+entry:
+  %out0 = alloca i32, align 4
+  store i32 0, i32* %out0, align 4
+  ret void
+}
+
+define void @single_g() nounwind {
+entry:
+  %out0 = alloca i32, align 4
+  %in1 = alloca i32, align 4
+  store i32 0, i32* %out0, align 4
+  store i32 1, i32* %in1, align 4
+  %tmp = load i32* %in1, align 4
+  %0 = call i32 asm "foo $1,$0", "=r,imr"(i32 %tmp) nounwind
+  store i32 %0, i32* %out0, align 4
+  %tmp1 = load i32* @min1, align 4
+  %1 = call i32 asm "foo $1,$0", "=r,imr"(i32 %tmp1) nounwind
+  store i32 %1, i32* %out0, align 4
+  %2 = call i32 asm "foo $1,$0", "=r,imr"(i32 1) nounwind
+  store i32 %2, i32* %out0, align 4
+  ret void
+}
+
+define void @single_X() nounwind {
+entry:
+  %out0 = alloca i32, align 4
+  %in1 = alloca i32, align 4
+  store i32 0, i32* %out0, align 4
+  store i32 1, i32* %in1, align 4
+  %tmp = load i32* %in1, align 4
+  %0 = call i32 asm "foo $1,$0", "=r,X"(i32 %tmp) nounwind
+  store i32 %0, i32* %out0, align 4
+  %tmp1 = load i32* @min1, align 4
+  %1 = call i32 asm "foo $1,$0", "=r,X"(i32 %tmp1) nounwind
+  store i32 %1, i32* %out0, align 4
+  %2 = call i32 asm "foo $1,$0", "=r,X"(i32 1) nounwind
+  store i32 %2, i32* %out0, align 4
+  %3 = call i32 asm "foo $1,$0", "=r,X"(i32* getelementptr inbounds ([2 x i32]* @marray, i32 0, i32 0)) nounwind
+  store i32 %3, i32* %out0, align 4
+  %4 = call i32 asm "foo $1,$0", "=r,X"(double 1.000000e+001) nounwind
+  store i32 %4, i32* %out0, align 4
+  %5 = call i32 asm "foo $1,$0", "=r,X"(double 1.000000e+000) nounwind
+  store i32 %5, i32* %out0, align 4
+  ret void
+}
+
+define void @single_p() nounwind {
+entry:
+  %out0 = alloca i32, align 4
+  store i32 0, i32* %out0, align 4
+  %0 = call i32 asm "foo $1,$0", "=r,r"(i32* getelementptr inbounds ([2 x i32]* @marray, i32 0, i32 0)) nounwind
+  store i32 %0, i32* %out0, align 4
+  ret void
+}
+
+define void @multi_m() nounwind {
+entry:
+  %tmp = load i32* @min1, align 4
+  call void asm "foo $1,$0", "=*m|r,m|r"(i32* @mout0, i32 %tmp) nounwind
+  ret void
+}
+
+define void @multi_o() nounwind {
+entry:
+  %out0 = alloca i32, align 4
+  %index = alloca i32, align 4
+  store i32 0, i32* %out0, align 4
+  store i32 1, i32* %index, align 4
+  ret void
+}
+
+define void @multi_V() nounwind {
+entry:
+  ret void
+}
+
+define void @multi_lt() nounwind {
+entry:
+  %out0 = alloca i32, align 4
+  %in1 = alloca i32, align 4
+  store i32 0, i32* %out0, align 4
+  store i32 1, i32* %in1, align 4
+  %tmp = load i32* %in1, align 4
+  %0 = call i32 asm "foo $1,$0", "=r|r,r|<r"(i32 %tmp) nounwind
+  store i32 %0, i32* %out0, align 4
+  %tmp1 = load i32* %in1, align 4
+  %1 = call i32 asm "foo $1,$0", "=r|r,r|r<"(i32 %tmp1) nounwind
+  store i32 %1, i32* %out0, align 4
+  ret void
+}
+
+define void @multi_gt() nounwind {
+entry:
+  %out0 = alloca i32, align 4
+  %in1 = alloca i32, align 4
+  store i32 0, i32* %out0, align 4
+  store i32 1, i32* %in1, align 4
+  %tmp = load i32* %in1, align 4
+  %0 = call i32 asm "foo $1,$0", "=r|r,r|>r"(i32 %tmp) nounwind
+  store i32 %0, i32* %out0, align 4
+  %tmp1 = load i32* %in1, align 4
+  %1 = call i32 asm "foo $1,$0", "=r|r,r|r>"(i32 %tmp1) nounwind
+  store i32 %1, i32* %out0, align 4
+  ret void
+}
+
+define void @multi_r() nounwind {
+entry:
+  %out0 = alloca i32, align 4
+  %in1 = alloca i32, align 4
+  store i32 0, i32* %out0, align 4
+  store i32 1, i32* %in1, align 4
+  %tmp = load i32* %in1, align 4
+  %0 = call i32 asm "foo $1,$0", "=r|r,r|m"(i32 %tmp) nounwind
+  store i32 %0, i32* %out0, align 4
+  ret void
+}
+
+define void @multi_i() nounwind {
+entry:
+  %out0 = alloca i32, align 4
+  store i32 0, i32* %out0, align 4
+  %0 = call i32 asm "foo $1,$0", "=r|r,r|i"(i32 1) nounwind
+  store i32 %0, i32* %out0, align 4
+  ret void
+}
+
+define void @multi_n() nounwind {
+entry:
+  %out0 = alloca i32, align 4
+  store i32 0, i32* %out0, align 4
+  %0 = call i32 asm "foo $1,$0", "=r|r,r|n"(i32 1) nounwind
+  store i32 %0, i32* %out0, align 4
+  ret void
+}
+
+define void @multi_E() nounwind {
+entry:
+  %out0 = alloca double, align 8
+  store double 0.000000e+000, double* %out0, align 8
+; No lowering support.
+;  %0 = call double asm "foo $1,$0", "=r|r,r|E"(double 1.000000e+001) nounwind
+;  store double %0, double* %out0, align 8
+  ret void
+}
+
+define void @multi_F() nounwind {
+entry:
+  %out0 = alloca double, align 8
+  store double 0.000000e+000, double* %out0, align 8
+; No lowering support.
+;  %0 = call double asm "foo $1,$0", "=r|r,r|F"(double 1.000000e+000) nounwind
+;  store double %0, double* %out0, align 8
+  ret void
+}
+
+define void @multi_s() nounwind {
+entry:
+  %out0 = alloca i32, align 4
+  store i32 0, i32* %out0, align 4
+  ret void
+}
+
+define void @multi_g() nounwind {
+entry:
+  %out0 = alloca i32, align 4
+  %in1 = alloca i32, align 4
+  store i32 0, i32* %out0, align 4
+  store i32 1, i32* %in1, align 4
+  %tmp = load i32* %in1, align 4
+  %0 = call i32 asm "foo $1,$0", "=r|r,r|imr"(i32 %tmp) nounwind
+  store i32 %0, i32* %out0, align 4
+  %tmp1 = load i32* @min1, align 4
+  %1 = call i32 asm "foo $1,$0", "=r|r,r|imr"(i32 %tmp1) nounwind
+  store i32 %1, i32* %out0, align 4
+  %2 = call i32 asm "foo $1,$0", "=r|r,r|imr"(i32 1) nounwind
+  store i32 %2, i32* %out0, align 4
+  ret void
+}
+
+define void @multi_X() nounwind {
+entry:
+  %out0 = alloca i32, align 4
+  %in1 = alloca i32, align 4
+  store i32 0, i32* %out0, align 4
+  store i32 1, i32* %in1, align 4
+  %tmp = load i32* %in1, align 4
+  %0 = call i32 asm "foo $1,$0", "=r|r,r|X"(i32 %tmp) nounwind
+  store i32 %0, i32* %out0, align 4
+  %tmp1 = load i32* @min1, align 4
+  %1 = call i32 asm "foo $1,$0", "=r|r,r|X"(i32 %tmp1) nounwind
+  store i32 %1, i32* %out0, align 4
+  %2 = call i32 asm "foo $1,$0", "=r|r,r|X"(i32 1) nounwind
+  store i32 %2, i32* %out0, align 4
+  %3 = call i32 asm "foo $1,$0", "=r|r,r|X"(i32* getelementptr inbounds ([2 x i32]* @marray, i32 0, i32 0)) nounwind
+  store i32 %3, i32* %out0, align 4
+  %4 = call i32 asm "foo $1,$0", "=r|r,r|X"(double 1.000000e+001) nounwind
+  store i32 %4, i32* %out0, align 4
+  %5 = call i32 asm "foo $1,$0", "=r|r,r|X"(double 1.000000e+000) nounwind
+  store i32 %5, i32* %out0, align 4
+  ret void
+}
+
+define void @multi_p() nounwind {
+entry:
+  %out0 = alloca i32, align 4
+  store i32 0, i32* %out0, align 4
+  %0 = call i32 asm "foo $1,$0", "=r|r,r|r"(i32* getelementptr inbounds ([2 x i32]* @marray, i32 0, i32 0)) nounwind
+  store i32 %0, i32* %out0, align 4
+  ret void
+}
diff --git a/test/CodeGen/PowerPC/mult-alt-generic-powerpc64.ll b/test/CodeGen/PowerPC/mult-alt-generic-powerpc64.ll
new file mode 100644
index 000000000000..3da06f65db83
--- /dev/null
+++ b/test/CodeGen/PowerPC/mult-alt-generic-powerpc64.ll
@@ -0,0 +1,321 @@
+; RUN: llc < %s -march=ppc64
+; ModuleID = 'mult-alt-generic.c'
+target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v128:128:128-n32:64"
+target triple = "powerpc64"
+
+@mout0 = common global i32 0, align 4
+@min1 = common global i32 0, align 4
+@marray = common global [2 x i32] zeroinitializer, align 4
+
+define void @single_m() nounwind {
+entry:
+  call void asm "foo $1,$0", "=*m,*m"(i32* @mout0, i32* @min1) nounwind
+  ret void
+}
+
+define void @single_o() nounwind {
+entry:
+  %out0 = alloca i32, align 4
+  %index = alloca i32, align 4
+  store i32 0, i32* %out0, align 4
+  store i32 1, i32* %index, align 4
+  ret void
+}
+
+define void @single_V() nounwind {
+entry:
+  ret void
+}
+
+define void @single_lt() nounwind {
+entry:
+  %out0 = alloca i32, align 4
+  %in1 = alloca i32, align 4
+  store i32 0, i32* %out0, align 4
+  store i32 1, i32* %in1, align 4
+  %tmp = load i32* %in1, align 4
+  %0 = call i32 asm "foo $1,$0", "=r,<r"(i32 %tmp) nounwind
+  store i32 %0, i32* %out0, align 4
+  %tmp1 = load i32* %in1, align 4
+  %1 = call i32 asm "foo $1,$0", "=r,r<"(i32 %tmp1) nounwind
+  store i32 %1, i32* %out0, align 4
+  ret void
+}
+
+define void @single_gt() nounwind {
+entry:
+  %out0 = alloca i32, align 4
+  %in1 = alloca i32, align 4
+  store i32 0, i32* %out0, align 4
+  store i32 1, i32* %in1, align 4
+  %tmp = load i32* %in1, align 4
+  %0 = call i32 asm "foo $1,$0", "=r,>r"(i32 %tmp) nounwind
+  store i32 %0, i32* %out0, align 4
+  %tmp1 = load i32* %in1, align 4
+  %1 = call i32 asm "foo $1,$0", "=r,r>"(i32 %tmp1) nounwind
+  store i32 %1, i32* %out0, align 4
+  ret void
+}
+
+define void @single_r() nounwind {
+entry:
+  %out0 = alloca i32, align 4
+  %in1 = alloca i32, align 4
+  store i32 0, i32* %out0, align 4
+  store i32 1, i32* %in1, align 4
+  %tmp = load i32* %in1, align 4
+  %0 = call i32 asm "foo $1,$0", "=r,r"(i32 %tmp) nounwind
+  store i32 %0, i32* %out0, align 4
+  ret void
+}
+
+define void @single_i() nounwind {
+entry:
+  %out0 = alloca i32, align 4
+  store i32 0, i32* %out0, align 4
+  %0 = call i32 asm "foo $1,$0", "=r,i"(i32 1) nounwind
+  store i32 %0, i32* %out0, align 4
+  ret void
+}
+
+define void @single_n() nounwind {
+entry:
+  %out0 = alloca i32, align 4
+  store i32 0, i32* %out0, align 4
+  %0 = call i32 asm "foo $1,$0", "=r,n"(i32 1) nounwind
+  store i32 %0, i32* %out0, align 4
+  ret void
+}
+
+define void @single_E() nounwind {
+entry:
+  %out0 = alloca double, align 8
+  store double 0.000000e+000, double* %out0, align 8
+; No lowering support.
+;  %0 = call double asm "foo $1,$0", "=r,E"(double 1.000000e+001) nounwind
+;  store double %0, double* %out0, align 8
+  ret void
+}
+
+define void @single_F() nounwind {
+entry:
+  %out0 = alloca double, align 8
+  store double 0.000000e+000, double* %out0, align 8
+; No lowering support.
+;  %0 = call double asm "foo $1,$0", "=r,F"(double 1.000000e+000) nounwind
+;  store double %0, double* %out0, align 8
+  ret void
+}
+
+define void @single_s() nounwind {
+entry:
+  %out0 = alloca i32, align 4
+  store i32 0, i32* %out0, align 4
+  ret void
+}
+
+define void @single_g() nounwind {
+entry:
+  %out0 = alloca i32, align 4
+  %in1 = alloca i32, align 4
+  store i32 0, i32* %out0, align 4
+  store i32 1, i32* %in1, align 4
+  %tmp = load i32* %in1, align 4
+  %0 = call i32 asm "foo $1,$0", "=r,imr"(i32 %tmp) nounwind
+  store i32 %0, i32* %out0, align 4
+  %tmp1 = load i32* @min1, align 4
+  %1 = call i32 asm "foo $1,$0", "=r,imr"(i32 %tmp1) nounwind
+  store i32 %1, i32* %out0, align 4
+  %2 = call i32 asm "foo $1,$0", "=r,imr"(i32 1) nounwind
+  store i32 %2, i32* %out0, align 4
+  ret void
+}
+
+define void @single_X() nounwind {
+entry:
+  %out0 = alloca i32, align 4
+  %in1 = alloca i32, align 4
+  store i32 0, i32* %out0, align 4
+  store i32 1, i32* %in1, align 4
+  %tmp = load i32* %in1, align 4
+  %0 = call i32 asm "foo $1,$0", "=r,X"(i32 %tmp) nounwind
+  store i32 %0, i32* %out0, align 4
+  %tmp1 = load i32* @min1, align 4
+  %1 = call i32 asm "foo $1,$0", "=r,X"(i32 %tmp1) nounwind
+  store i32 %1, i32* %out0, align 4
+  %2 = call i32 asm "foo $1,$0", "=r,X"(i32 1) nounwind
+  store i32 %2, i32* %out0, align 4
+  %3 = call i32 asm "foo $1,$0", "=r,X"(i32* getelementptr inbounds ([2 x i32]* @marray, i32 0, i32 0)) nounwind
+  store i32 %3, i32* %out0, align 4
+  %4 = call i32 asm "foo $1,$0", "=r,X"(double 1.000000e+001) nounwind
+  store i32 %4, i32* %out0, align 4
+  %5 = call i32 asm "foo $1,$0", "=r,X"(double 1.000000e+000) nounwind
+  store i32 %5, i32* %out0, align 4
+  ret void
+}
+
+define void @single_p() nounwind {
+entry:
+  %out0 = alloca i32, align 4
+  store i32 0, i32* %out0, align 4
+  %0 = call i32 asm "foo $1,$0", "=r,r"(i32* getelementptr inbounds ([2 x i32]* @marray, i32 0, i32 0)) nounwind
+  store i32 %0, i32* %out0, align 4
+  ret void
+}
+
+define void @multi_m() nounwind {
+entry:
+  %tmp = load i32* @min1, align 4
+  call void asm "foo $1,$0", "=*m|r,m|r"(i32* @mout0, i32 %tmp) nounwind
+  ret void
+}
+
+define void @multi_o() nounwind {
+entry:
+  %out0 = alloca i32, align 4
+  %index = alloca i32, align 4
+  store i32 0, i32* %out0, align 4
+  store i32 1, i32* %index, align 4
+  ret void
+}
+
+define void @multi_V() nounwind {
+entry:
+  ret void
+}
+
+define void @multi_lt() nounwind {
+entry:
+  %out0 = alloca i32, align 4
+  %in1 = alloca i32, align 4
+  store i32 0, i32* %out0, align 4
+  store i32 1, i32* %in1, align 4
+  %tmp = load i32* %in1, align 4
+  %0 = call i32 asm "foo $1,$0", "=r|r,r|<r"(i32 %tmp) nounwind
+  store i32 %0, i32* %out0, align 4
+  %tmp1 = load i32* %in1, align 4
+  %1 = call i32 asm "foo $1,$0", "=r|r,r|r<"(i32 %tmp1) nounwind
+  store i32 %1, i32* %out0, align 4
+  ret void
+}
+
+define void @multi_gt() nounwind {
+entry:
+  %out0 = alloca i32, align 4
+  %in1 = alloca i32, align 4
+  store i32 0, i32* %out0, align 4
+  store i32 1, i32* %in1, align 4
+  %tmp = load i32* %in1, align 4
+  %0 = call i32 asm "foo $1,$0", "=r|r,r|>r"(i32 %tmp) nounwind
+  store i32 %0, i32* %out0, align 4
+  %tmp1 = load i32* %in1, align 4
+  %1 = call i32 asm "foo $1,$0", "=r|r,r|r>"(i32 %tmp1) nounwind
+  store i32 %1, i32* %out0, align 4
+  ret void
+}
+
+define void @multi_r() nounwind {
+entry:
+  %out0 = alloca i32, align 4
+  %in1 = alloca i32, align 4
+  store i32 0, i32* %out0, align 4
+  store i32 1, i32* %in1, align 4
+  %tmp = load i32* %in1, align 4
+  %0 = call i32 asm "foo $1,$0", "=r|r,r|m"(i32 %tmp) nounwind
+  store i32 %0, i32* %out0, align 4
+  ret void
+}
+
+define void @multi_i() nounwind {
+entry:
+  %out0 = alloca i32, align 4
+  store i32 0, i32* %out0, align 4
+  %0 = call i32 asm "foo $1,$0", "=r|r,r|i"(i32 1) nounwind
+  store i32 %0, i32* %out0, align 4
+  ret void
+}
+
+define void @multi_n() nounwind {
+entry:
+  %out0 = alloca i32, align 4
+  store i32 0, i32* %out0, align 4
+  %0 = call i32 asm "foo $1,$0", "=r|r,r|n"(i32 1) nounwind
+  store i32 %0, i32* %out0, align 4
+  ret void
+}
+
+define void @multi_E() nounwind {
+entry:
+  %out0 = alloca double, align 8
+  store double 0.000000e+000, double* %out0, align 8
+; No lowering support.
+;  %0 = call double asm "foo $1,$0", "=r|r,r|E"(double 1.000000e+001) nounwind
+;  store double %0, double* %out0, align 8
+  ret void
+}
+
+define void @multi_F() nounwind {
+entry:
+  %out0 = alloca double, align 8
+  store double 0.000000e+000, double* %out0, align 8
+; No lowering support.
+;  %0 = call double asm "foo $1,$0", "=r|r,r|F"(double 1.000000e+000) nounwind
+;  store double %0, double* %out0, align 8
+  ret void
+}
+
+define void @multi_s() nounwind {
+entry:
+  %out0 = alloca i32, align 4
+  store i32 0, i32* %out0, align 4
+  ret void
+}
+
+define void @multi_g() nounwind {
+entry:
+  %out0 = alloca i32, align 4
+  %in1 = alloca i32, align 4
+  store i32 0, i32* %out0, align 4
+  store i32 1, i32* %in1, align 4
+  %tmp = load i32* %in1, align 4
+  %0 = call i32 asm "foo $1,$0", "=r|r,r|imr"(i32 %tmp) nounwind
+  store i32 %0, i32* %out0, align 4
+  %tmp1 = load i32* @min1, align 4
+  %1 = call i32 asm "foo $1,$0", "=r|r,r|imr"(i32 %tmp1) nounwind
+  store i32 %1, i32* %out0, align 4
+  %2 = call i32 asm "foo $1,$0", "=r|r,r|imr"(i32 1) nounwind
+  store i32 %2, i32* %out0, align 4
+  ret void
+}
+
+define void @multi_X() nounwind {
+entry:
+  %out0 = alloca i32, align 4
+  %in1 = alloca i32, align 4
+  store i32 0, i32* %out0, align 4
+  store i32 1, i32* %in1, align 4
+  %tmp = load i32* %in1, align 4
+  %0 = call i32 asm "foo $1,$0", "=r|r,r|X"(i32 %tmp) nounwind
+  store i32 %0, i32* %out0, align 4
+  %tmp1 = load i32* @min1, align 4
+  %1 = call i32 asm "foo $1,$0", "=r|r,r|X"(i32 %tmp1) nounwind
+  store i32 %1, i32* %out0, align 4
+  %2 = call i32 asm "foo $1,$0", "=r|r,r|X"(i32 1) nounwind
+  store i32 %2, i32* %out0, align 4
+  %3 = call i32 asm "foo $1,$0", "=r|r,r|X"(i32* getelementptr inbounds ([2 x i32]* @marray, i32 0, i32 0)) nounwind
+  store i32 %3, i32* %out0, align 4
+  %4 = call i32 asm "foo $1,$0", "=r|r,r|X"(double 1.000000e+001) nounwind
+  store i32 %4, i32* %out0, align 4
+  %5 = call i32 asm "foo $1,$0", "=r|r,r|X"(double 1.000000e+000) nounwind
+  store i32 %5, i32* %out0, align 4
+  ret void
+}
+
+define void @multi_p() nounwind {
+entry:
+  %out0 = alloca i32, align 4
+  store i32 0, i32* %out0, align 4
+  %0 = call i32 asm "foo $1,$0", "=r|r,r|r"(i32* getelementptr inbounds ([2 x i32]* @marray, i32 0, i32 0)) nounwind
+  store i32 %0, i32* %out0, align 4
+  ret void
+}
diff --git a/test/CodeGen/PowerPC/rlwimi2.ll b/test/CodeGen/PowerPC/rlwimi2.ll
index 59a36555bf86..1bee4e03f1b0 100644
--- a/test/CodeGen/PowerPC/rlwimi2.ll
+++ b/test/CodeGen/PowerPC/rlwimi2.ll
@@ -4,7 +4,7 @@
 ; RUN: grep srwi   %t | count 1
 ; RUN: not grep slwi %t
 
-define i16 @test1(i32 %srcA, i32 %srcB, i32 %alpha) {
+define i16 @test1(i32 %srcA, i32 %srcB, i32 %alpha) nounwind {
 entry:
 	%tmp.1 = shl i32 %srcA, 15		; <i32> [#uses=1]
 	%tmp.4 = and i32 %tmp.1, 32505856		; <i32> [#uses=1]
diff --git a/test/CodeGen/PowerPC/stfiwx.ll b/test/CodeGen/PowerPC/stfiwx.ll
index d1c3f5234a26..1ad558c6abc9 100644
--- a/test/CodeGen/PowerPC/stfiwx.ll
+++ b/test/CodeGen/PowerPC/stfiwx.ll
@@ -6,13 +6,13 @@
 ; RUN: not grep stfiwx %t2
 ; RUN: grep r1 %t2
 
-define void @test(float %a, i32* %b) {
+define void @test(float %a, i32* %b) nounwind {
         %tmp.2 = fptosi float %a to i32         ; <i32> [#uses=1]
         store i32 %tmp.2, i32* %b
         ret void
 }
 
-define void @test2(float %a, i32* %b, i32 %i) {
+define void @test2(float %a, i32* %b, i32 %i) nounwind {
         %tmp.2 = getelementptr i32* %b, i32 1           ; <i32*> [#uses=1]
         %tmp.5 = getelementptr i32* %b, i32 %i          ; <i32*> [#uses=1]
         %tmp.7 = fptosi float %a to i32         ; <i32> [#uses=3]
diff --git a/test/CodeGen/PowerPC/tango.net.ftp.FtpClient.ll b/test/CodeGen/PowerPC/tango.net.ftp.FtpClient.ll
deleted file mode 100644
index 6f103462664f..000000000000
--- a/test/CodeGen/PowerPC/tango.net.ftp.FtpClient.ll
+++ /dev/null
@@ -1,585 +0,0 @@
-; RN: llc < %s
-; RUN: false
-; XFAIL: *
-; PR4534
-
-; ModuleID = 'tango.net.ftp.FtpClient.bc'
-target datalayout = "E-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f128:64:128"
-target triple = "powerpc-apple-darwin9.6.0"
-	%"byte[]" = type { i32, i8* }
-@.str167 = external constant [11 x i8]		; <[11 x i8]*> [#uses=1]
-@.str170 = external constant [11 x i8]		; <[11 x i8]*> [#uses=2]
-@.str171 = external constant [5 x i8]		; <[5 x i8]*> [#uses=1]
-@llvm.used = appending global [1 x i8*] [i8* bitcast (void (%"byte[]")* @foo to i8*)], section "llvm.metadata"		; <[1 x i8*]*> [#uses=0]
-
-define fastcc void @foo(%"byte[]" %line_arg) {
-entry:
-	%line_arg830 = extractvalue %"byte[]" %line_arg, 0		; <i32> [#uses=12]
-	%line_arg831 = extractvalue %"byte[]" %line_arg, 1		; <i8*> [#uses=17]
-	%t5 = load i8* %line_arg831		; <i8> [#uses=1]
-	br label %forcondi
-
-forcondi:		; preds = %forbodyi, %entry
-	%l.0i = phi i32 [ 10, %entry ], [ %t4i, %forbodyi ]		; <i32> [#uses=2]
-	%p.0i = phi i8* [ getelementptr ([11 x i8]* @.str167, i32 0, i32 -1), %entry ], [ %t7i, %forbodyi ]		; <i8*> [#uses=1]
-	%t4i = add i32 %l.0i, -1		; <i32> [#uses=1]
-	%t5i = icmp eq i32 %l.0i, 0		; <i1> [#uses=1]
-	br i1 %t5i, label %forcond.i, label %forbodyi
-
-forbodyi:		; preds = %forcondi
-	%t7i = getelementptr i8* %p.0i, i32 1		; <i8*> [#uses=2]
-	%t8i = load i8* %t7i		; <i8> [#uses=1]
-	%t12i = icmp eq i8 %t8i, %t5		; <i1> [#uses=1]
-	br i1 %t12i, label %forcond.i, label %forcondi
-
-forcond.i:		; preds = %forbody.i, %forbodyi, %forcondi
-	%storemerge.i = phi i32 [ %t106.i, %forbody.i ], [ 1, %forcondi ], [ 1, %forbodyi ]		; <i32> [#uses=1]
-	%t77.i286 = phi i1 [ %phit3, %forbody.i ], [ false, %forcondi ], [ false, %forbodyi ]		; <i1> [#uses=1]
-	br i1 %t77.i286, label %forcond.i295, label %forbody.i
-
-forbody.i:		; preds = %forcond.i
-	%t106.i = add i32 %storemerge.i, 1		; <i32> [#uses=2]
-	%phit3 = icmp ugt i32 %t106.i, 3		; <i1> [#uses=1]
-	br label %forcond.i
-
-forcond.i295:		; preds = %forbody.i301, %forcond.i
-	%storemerge.i292 = phi i32 [ %t106.i325, %forbody.i301 ], [ 4, %forcond.i ]		; <i32> [#uses=1]
-	%t77.i293 = phi i1 [ %phit2, %forbody.i301 ], [ false, %forcond.i ]		; <i1> [#uses=1]
-	br i1 %t77.i293, label %forcond.i332, label %forbody.i301
-
-forbody.i301:		; preds = %forcond.i295
-	%t106.i325 = add i32 %storemerge.i292, 1		; <i32> [#uses=2]
-	%phit2 = icmp ugt i32 %t106.i325, 6		; <i1> [#uses=1]
-	br label %forcond.i295
-
-forcond.i332:		; preds = %forbody.i338, %forcond.i295
-	%storemerge.i329 = phi i32 [ %t106.i362, %forbody.i338 ], [ 7, %forcond.i295 ]		; <i32> [#uses=3]
-	%t77.i330 = phi i1 [ %phit1, %forbody.i338 ], [ false, %forcond.i295 ]		; <i1> [#uses=1]
-	br i1 %t77.i330, label %wcond.i370, label %forbody.i338
-
-forbody.i338:		; preds = %forcond.i332
-	%t106.i362 = add i32 %storemerge.i329, 1		; <i32> [#uses=2]
-	%phit1 = icmp ugt i32 %t106.i362, 9		; <i1> [#uses=1]
-	br label %forcond.i332
-
-wcond.i370:		; preds = %wbody.i372, %forcond.i332
-	%.frame.0.11 = phi i32 [ %t18.i371.c, %wbody.i372 ], [ %storemerge.i329, %forcond.i332 ]		; <i32> [#uses=2]
-	%t3.i368 = phi i32 [ %t18.i371.c, %wbody.i372 ], [ %storemerge.i329, %forcond.i332 ]		; <i32> [#uses=5]
-	%t4.i369 = icmp ult i32 %t3.i368, %line_arg830		; <i1> [#uses=1]
-	br i1 %t4.i369, label %andand.i378, label %wcond22.i383
-
-wbody.i372:		; preds = %andand.i378
-	%t18.i371.c = add i32 %t3.i368, 1		; <i32> [#uses=2]
-	br label %wcond.i370
-
-andand.i378:		; preds = %wcond.i370
-	%t11.i375 = getelementptr i8* %line_arg831, i32 %t3.i368		; <i8*> [#uses=1]
-	%t12.i376 = load i8* %t11.i375		; <i8> [#uses=1]
-	%t14.i377 = icmp eq i8 %t12.i376, 32		; <i1> [#uses=1]
-	br i1 %t14.i377, label %wbody.i372, label %wcond22.i383
-
-wcond22.i383:		; preds = %wbody23.i385, %andand.i378, %wcond.i370
-	%.frame.0.10 = phi i32 [ %t50.i384, %wbody23.i385 ], [ %.frame.0.11, %wcond.i370 ], [ %.frame.0.11, %andand.i378 ]		; <i32> [#uses=2]
-	%t49.i381 = phi i32 [ %t50.i384, %wbody23.i385 ], [ %t3.i368, %wcond.i370 ], [ %t3.i368, %andand.i378 ]		; <i32> [#uses=5]
-	%t32.i382 = icmp ult i32 %t49.i381, %line_arg830		; <i1> [#uses=1]
-	br i1 %t32.i382, label %andand33.i391, label %wcond54.i396
-
-wbody23.i385:		; preds = %andand33.i391
-	%t50.i384 = add i32 %t49.i381, 1		; <i32> [#uses=2]
-	br label %wcond22.i383
-
-andand33.i391:		; preds = %wcond22.i383
-	%t42.i388 = getelementptr i8* %line_arg831, i32 %t49.i381		; <i8*> [#uses=1]
-	%t43.i389 = load i8* %t42.i388		; <i8> [#uses=1]
-	%t45.i390 = icmp eq i8 %t43.i389, 32		; <i1> [#uses=1]
-	br i1 %t45.i390, label %wcond54.i396, label %wbody23.i385
-
-wcond54.i396:		; preds = %wbody55.i401, %andand33.i391, %wcond22.i383
-	%.frame.0.9 = phi i32 [ %t82.i400, %wbody55.i401 ], [ %.frame.0.10, %wcond22.i383 ], [ %.frame.0.10, %andand33.i391 ]		; <i32> [#uses=2]
-	%t81.i394 = phi i32 [ %t82.i400, %wbody55.i401 ], [ %t49.i381, %wcond22.i383 ], [ %t49.i381, %andand33.i391 ]		; <i32> [#uses=3]
-	%t64.i395 = icmp ult i32 %t81.i394, %line_arg830		; <i1> [#uses=1]
-	br i1 %t64.i395, label %andand65.i407, label %wcond.i716
-
-wbody55.i401:		; preds = %andand65.i407
-	%t82.i400 = add i32 %t81.i394, 1		; <i32> [#uses=2]
-	br label %wcond54.i396
-
-andand65.i407:		; preds = %wcond54.i396
-	%t74.i404 = getelementptr i8* %line_arg831, i32 %t81.i394		; <i8*> [#uses=1]
-	%t75.i405 = load i8* %t74.i404		; <i8> [#uses=1]
-	%t77.i406 = icmp eq i8 %t75.i405, 32		; <i1> [#uses=1]
-	br i1 %t77.i406, label %wbody55.i401, label %wcond.i716
-
-wcond.i716:		; preds = %wbody.i717, %andand65.i407, %wcond54.i396
-	%.frame.0.0 = phi i32 [ %t18.i.c829, %wbody.i717 ], [ %.frame.0.9, %wcond54.i396 ], [ %.frame.0.9, %andand65.i407 ]		; <i32> [#uses=7]
-	%t4.i715 = icmp ult i32 %.frame.0.0, %line_arg830		; <i1> [#uses=1]
-	br i1 %t4.i715, label %andand.i721, label %wcond22.i724
-
-wbody.i717:		; preds = %andand.i721
-	%t18.i.c829 = add i32 %.frame.0.0, 1		; <i32> [#uses=1]
-	br label %wcond.i716
-
-andand.i721:		; preds = %wcond.i716
-	%t11.i718 = getelementptr i8* %line_arg831, i32 %.frame.0.0		; <i8*> [#uses=1]
-	%t12.i719 = load i8* %t11.i718		; <i8> [#uses=1]
-	%t14.i720 = icmp eq i8 %t12.i719, 32		; <i1> [#uses=1]
-	br i1 %t14.i720, label %wbody.i717, label %wcond22.i724
-
-wcond22.i724:		; preds = %wbody23.i726, %andand.i721, %wcond.i716
-	%.frame.0.1 = phi i32 [ %t50.i725, %wbody23.i726 ], [ %.frame.0.0, %wcond.i716 ], [ %.frame.0.0, %andand.i721 ]		; <i32> [#uses=2]
-	%t49.i722 = phi i32 [ %t50.i725, %wbody23.i726 ], [ %.frame.0.0, %wcond.i716 ], [ %.frame.0.0, %andand.i721 ]		; <i32> [#uses=5]
-	%t32.i723 = icmp ult i32 %t49.i722, %line_arg830		; <i1> [#uses=1]
-	br i1 %t32.i723, label %andand33.i731, label %wcond54.i734
-
-wbody23.i726:		; preds = %andand33.i731
-	%t50.i725 = add i32 %t49.i722, 1		; <i32> [#uses=2]
-	br label %wcond22.i724
-
-andand33.i731:		; preds = %wcond22.i724
-	%t42.i728 = getelementptr i8* %line_arg831, i32 %t49.i722		; <i8*> [#uses=1]
-	%t43.i729 = load i8* %t42.i728		; <i8> [#uses=1]
-	%t45.i730 = icmp eq i8 %t43.i729, 32		; <i1> [#uses=1]
-	br i1 %t45.i730, label %wcond54.i734, label %wbody23.i726
-
-wcond54.i734:		; preds = %wbody55.i736, %andand33.i731, %wcond22.i724
-	%.frame.0.2 = phi i32 [ %t82.i735, %wbody55.i736 ], [ %.frame.0.1, %wcond22.i724 ], [ %.frame.0.1, %andand33.i731 ]		; <i32> [#uses=2]
-	%t81.i732 = phi i32 [ %t82.i735, %wbody55.i736 ], [ %t49.i722, %wcond22.i724 ], [ %t49.i722, %andand33.i731 ]		; <i32> [#uses=3]
-	%t64.i733 = icmp ult i32 %t81.i732, %line_arg830		; <i1> [#uses=1]
-	br i1 %t64.i733, label %andand65.i740, label %wcond.i750
-
-wbody55.i736:		; preds = %andand65.i740
-	%t82.i735 = add i32 %t81.i732, 1		; <i32> [#uses=2]
-	br label %wcond54.i734
-
-andand65.i740:		; preds = %wcond54.i734
-	%t74.i737 = getelementptr i8* %line_arg831, i32 %t81.i732		; <i8*> [#uses=1]
-	%t75.i738 = load i8* %t74.i737		; <i8> [#uses=1]
-	%t77.i739 = icmp eq i8 %t75.i738, 32		; <i1> [#uses=1]
-	br i1 %t77.i739, label %wbody55.i736, label %wcond.i750
-
-wcond.i750:		; preds = %wbody.i752, %andand65.i740, %wcond54.i734
-	%.frame.0.3 = phi i32 [ %t18.i751.c, %wbody.i752 ], [ %.frame.0.2, %wcond54.i734 ], [ %.frame.0.2, %andand65.i740 ]		; <i32> [#uses=11]
-	%t4.i749 = icmp ult i32 %.frame.0.3, %line_arg830		; <i1> [#uses=1]
-	br i1 %t4.i749, label %andand.i758, label %wcond22.i761
-
-wbody.i752:		; preds = %andand.i758
-	%t18.i751.c = add i32 %.frame.0.3, 1		; <i32> [#uses=1]
-	br label %wcond.i750
-
-andand.i758:		; preds = %wcond.i750
-	%t11.i755 = getelementptr i8* %line_arg831, i32 %.frame.0.3		; <i8*> [#uses=1]
-	%t12.i756 = load i8* %t11.i755		; <i8> [#uses=1]
-	%t14.i757 = icmp eq i8 %t12.i756, 32		; <i1> [#uses=1]
-	br i1 %t14.i757, label %wbody.i752, label %wcond22.i761
-
-wcond22.i761:		; preds = %wbody23.i763, %andand.i758, %wcond.i750
-	%.frame.0.4 = phi i32 [ %t50.i762, %wbody23.i763 ], [ %.frame.0.3, %wcond.i750 ], [ %.frame.0.3, %andand.i758 ]		; <i32> [#uses=2]
-	%t49.i759 = phi i32 [ %t50.i762, %wbody23.i763 ], [ %.frame.0.3, %wcond.i750 ], [ %.frame.0.3, %andand.i758 ]		; <i32> [#uses=7]
-	%t32.i760 = icmp ult i32 %t49.i759, %line_arg830		; <i1> [#uses=1]
-	br i1 %t32.i760, label %andand33.i769, label %wcond54.i773
-
-wbody23.i763:		; preds = %andand33.i769
-	%t50.i762 = add i32 %t49.i759, 1		; <i32> [#uses=2]
-	br label %wcond22.i761
-
-andand33.i769:		; preds = %wcond22.i761
-	%t42.i766 = getelementptr i8* %line_arg831, i32 %t49.i759		; <i8*> [#uses=1]
-	%t43.i767 = load i8* %t42.i766		; <i8> [#uses=1]
-	%t45.i768 = icmp eq i8 %t43.i767, 32		; <i1> [#uses=1]
-	br i1 %t45.i768, label %wcond54.i773, label %wbody23.i763
-
-wcond54.i773:		; preds = %wbody55.i775, %andand33.i769, %wcond22.i761
-	%.frame.0.5 = phi i32 [ %t82.i774, %wbody55.i775 ], [ %.frame.0.4, %wcond22.i761 ], [ %.frame.0.4, %andand33.i769 ]		; <i32> [#uses=1]
-	%t81.i770 = phi i32 [ %t82.i774, %wbody55.i775 ], [ %t49.i759, %wcond22.i761 ], [ %t49.i759, %andand33.i769 ]		; <i32> [#uses=3]
-	%t64.i771 = icmp ult i32 %t81.i770, %line_arg830		; <i1> [#uses=1]
-	br i1 %t64.i771, label %andand65.i780, label %Dt3net3ftp9FClient13FConnection13pListLineMFAaZS5t3net3ftp9FClient11FFileInfo10p_wordMFZAa.exit786
-
-wbody55.i775:		; preds = %andand65.i780
-	%t82.i774 = add i32 %t81.i770, 1		; <i32> [#uses=2]
-	br label %wcond54.i773
-
-andand65.i780:		; preds = %wcond54.i773
-	%t74.i777 = getelementptr i8* %line_arg831, i32 %t81.i770		; <i8*> [#uses=1]
-	%t75.i778 = load i8* %t74.i777		; <i8> [#uses=1]
-	%t77.i779 = icmp eq i8 %t75.i778, 32		; <i1> [#uses=1]
-	br i1 %t77.i779, label %wbody55.i775, label %Dt3net3ftp9FClient13FConnection13pListLineMFAaZS5t3net3ftp9FClient11FFileInfo10p_wordMFZAa.exit786
-
-Dt3net3ftp9FClient13FConnection13pListLineMFAaZS5t3net3ftp9FClient11FFileInfo10p_wordMFZAa.exit786:		; preds = %andand65.i780, %wcond54.i773
-	%t89.i782 = getelementptr i8* %line_arg831, i32 %.frame.0.3		; <i8*> [#uses=4]
-	%t90.i783 = sub i32 %t49.i759, %.frame.0.3		; <i32> [#uses=2]
-	br label %wcond.i792
-
-wcond.i792:		; preds = %wbody.i794, %Dt3net3ftp9FClient13FConnection13pListLineMFAaZS5t3net3ftp9FClient11FFileInfo10p_wordMFZAa.exit786
-	%.frame.0.6 = phi i32 [ %.frame.0.5, %Dt3net3ftp9FClient13FConnection13pListLineMFAaZS5t3net3ftp9FClient11FFileInfo10p_wordMFZAa.exit786 ], [ %t18.i793.c, %wbody.i794 ]		; <i32> [#uses=9]
-	%t4.i791 = icmp ult i32 %.frame.0.6, %line_arg830		; <i1> [#uses=1]
-	br i1 %t4.i791, label %andand.i800, label %wcond22.i803
-
-wbody.i794:		; preds = %andand.i800
-	%t18.i793.c = add i32 %.frame.0.6, 1		; <i32> [#uses=1]
-	br label %wcond.i792
-
-andand.i800:		; preds = %wcond.i792
-	%t11.i797 = getelementptr i8* %line_arg831, i32 %.frame.0.6		; <i8*> [#uses=1]
-	%t12.i798 = load i8* %t11.i797		; <i8> [#uses=1]
-	%t14.i799 = icmp eq i8 %t12.i798, 32		; <i1> [#uses=1]
-	br i1 %t14.i799, label %wbody.i794, label %wcond22.i803
-
-wcond22.i803:		; preds = %wbody23.i805, %andand.i800, %wcond.i792
-	%t49.i801 = phi i32 [ %t50.i804, %wbody23.i805 ], [ %.frame.0.6, %wcond.i792 ], [ %.frame.0.6, %andand.i800 ]		; <i32> [#uses=7]
-	%t32.i802 = icmp ult i32 %t49.i801, %line_arg830		; <i1> [#uses=1]
-	br i1 %t32.i802, label %andand33.i811, label %wcond54.i815
-
-wbody23.i805:		; preds = %andand33.i811
-	%t50.i804 = add i32 %t49.i801, 1		; <i32> [#uses=1]
-	br label %wcond22.i803
-
-andand33.i811:		; preds = %wcond22.i803
-	%t42.i808 = getelementptr i8* %line_arg831, i32 %t49.i801		; <i8*> [#uses=1]
-	%t43.i809 = load i8* %t42.i808		; <i8> [#uses=1]
-	%t45.i810 = icmp eq i8 %t43.i809, 32		; <i1> [#uses=1]
-	br i1 %t45.i810, label %wcond54.i815, label %wbody23.i805
-
-wcond54.i815:		; preds = %wbody55.i817, %andand33.i811, %wcond22.i803
-	%t81.i812 = phi i32 [ %t82.i816, %wbody55.i817 ], [ %t49.i801, %wcond22.i803 ], [ %t49.i801, %andand33.i811 ]		; <i32> [#uses=3]
-	%t64.i813 = icmp ult i32 %t81.i812, %line_arg830		; <i1> [#uses=1]
-	br i1 %t64.i813, label %andand65.i822, label %Dt3net3ftp9FClient13FConnection13pListLineMFAaZS5t3net3ftp9FClient11FFileInfo10p_wordMFZAa.exit828
-
-wbody55.i817:		; preds = %andand65.i822
-	%t82.i816 = add i32 %t81.i812, 1		; <i32> [#uses=1]
-	br label %wcond54.i815
-
-andand65.i822:		; preds = %wcond54.i815
-	%t74.i819 = getelementptr i8* %line_arg831, i32 %t81.i812		; <i8*> [#uses=1]
-	%t75.i820 = load i8* %t74.i819		; <i8> [#uses=1]
-	%t77.i821 = icmp eq i8 %t75.i820, 32		; <i1> [#uses=1]
-	br i1 %t77.i821, label %wbody55.i817, label %Dt3net3ftp9FClient13FConnection13pListLineMFAaZS5t3net3ftp9FClient11FFileInfo10p_wordMFZAa.exit828
-
-Dt3net3ftp9FClient13FConnection13pListLineMFAaZS5t3net3ftp9FClient11FFileInfo10p_wordMFZAa.exit828:		; preds = %andand65.i822, %wcond54.i815
-	%t89.i824 = getelementptr i8* %line_arg831, i32 %.frame.0.6		; <i8*> [#uses=4]
-	%t90.i825 = sub i32 %t49.i801, %.frame.0.6		; <i32> [#uses=2]
-	%t63 = load i8* %t89.i824		; <i8> [#uses=2]
-	br label %forcondi622
-
-forcondi622:		; preds = %forbodyi626, %Dt3net3ftp9FClient13FConnection13pListLineMFAaZS5t3net3ftp9FClient11FFileInfo10p_wordMFZAa.exit828
-	%l.0i618 = phi i32 [ 10, %Dt3net3ftp9FClient13FConnection13pListLineMFAaZS5t3net3ftp9FClient11FFileInfo10p_wordMFZAa.exit828 ], [ %t4i620, %forbodyi626 ]		; <i32> [#uses=2]
-	%p.0i619 = phi i8* [ getelementptr ([11 x i8]* @.str170, i32 0, i32 -1), %Dt3net3ftp9FClient13FConnection13pListLineMFAaZS5t3net3ftp9FClient11FFileInfo10p_wordMFZAa.exit828 ], [ %t7i623, %forbodyi626 ]		; <i8*> [#uses=1]
-	%t4i620 = add i32 %l.0i618, -1		; <i32> [#uses=1]
-	%t5i621 = icmp eq i32 %l.0i618, 0		; <i1> [#uses=1]
-	br i1 %t5i621, label %if65, label %forbodyi626
-
-forbodyi626:		; preds = %forcondi622
-	%t7i623 = getelementptr i8* %p.0i619, i32 1		; <i8*> [#uses=3]
-	%t8i624 = load i8* %t7i623		; <i8> [#uses=1]
-	%t12i625 = icmp eq i8 %t8i624, %t63		; <i1> [#uses=1]
-	br i1 %t12i625, label %ifi630, label %forcondi622
-
-ifi630:		; preds = %forbodyi626
-	%t15i627 = ptrtoint i8* %t7i623 to i32		; <i32> [#uses=1]
-	%t17i629 = sub i32 %t15i627, ptrtoint ([11 x i8]* @.str170 to i32)		; <i32> [#uses=1]
-	%phit636 = icmp eq i32 %t17i629, 10		; <i1> [#uses=1]
-	br i1 %phit636, label %if65, label %e67
-
-if65:		; preds = %ifi630, %forcondi622
-	%t4i532 = icmp eq i32 %t49.i759, %.frame.0.3		; <i1> [#uses=1]
-	br i1 %t4i532, label %Dt4x7c7I11V4tTaZ4tFAaKbKkZk.exit.i576, label %forcondi539
-
-forcondi539:		; preds = %zi546, %if65
-	%sign.1.i533 = phi i1 [ %sign.0.i543, %zi546 ], [ false, %if65 ]		; <i1> [#uses=2]
-	%l.0i534 = phi i32 [ %t33i545, %zi546 ], [ %t90.i783, %if65 ]		; <i32> [#uses=3]
-	%p.0i535 = phi i8* [ %t30i544, %zi546 ], [ %t89.i782, %if65 ]		; <i8*> [#uses=6]
-	%c.0.ini536 = phi i8* [ %t30i544, %zi546 ], [ %t89.i782, %if65 ]		; <i8*> [#uses=1]
-	%c.0i537 = load i8* %c.0.ini536		; <i8> [#uses=2]
-	%t8i538 = icmp eq i32 %l.0i534, 0		; <i1> [#uses=1]
-	br i1 %t8i538, label %endfori550, label %forbodyi540
-
-forbodyi540:		; preds = %forcondi539
-	switch i8 %c.0i537, label %endfori550 [
-		i8 32, label %zi546
-		i8 9, label %zi546
-		i8 45, label %if20i541
-		i8 43, label %if26i542
-	]
-
-if20i541:		; preds = %forbodyi540
-	br label %zi546
-
-if26i542:		; preds = %forbodyi540
-	br label %zi546
-
-zi546:		; preds = %if26i542, %if20i541, %forbodyi540, %forbodyi540
-	%sign.0.i543 = phi i1 [ false, %if26i542 ], [ true, %if20i541 ], [ %sign.1.i533, %forbodyi540 ], [ %sign.1.i533, %forbodyi540 ]		; <i1> [#uses=1]
-	%t30i544 = getelementptr i8* %p.0i535, i32 1		; <i8*> [#uses=2]
-	%t33i545 = add i32 %l.0i534, -1		; <i32> [#uses=1]
-	br label %forcondi539
-
-endfori550:		; preds = %forbodyi540, %forcondi539
-	%t37i547 = icmp eq i8 %c.0i537, 48		; <i1> [#uses=1]
-	%t39i548 = icmp sgt i32 %l.0i534, 1		; <i1> [#uses=1]
-	%or.condi549 = and i1 %t37i547, %t39i548		; <i1> [#uses=1]
-	br i1 %or.condi549, label %if40i554, label %endif41i564
-
-if40i554:		; preds = %endfori550
-	%t43i551 = getelementptr i8* %p.0i535, i32 1		; <i8*> [#uses=2]
-	%t44i552 = load i8* %t43i551		; <i8> [#uses=1]
-	%t45i553 = zext i8 %t44i552 to i32		; <i32> [#uses=1]
-	switch i32 %t45i553, label %endif41i564 [
-		i32 120, label %case46i556
-		i32 88, label %case46i556
-		i32 98, label %case51i558
-		i32 66, label %case51i558
-		i32 111, label %case56i560
-		i32 79, label %case56i560
-	]
-
-case46i556:		; preds = %if40i554, %if40i554
-	%t48i555 = getelementptr i8* %p.0i535, i32 2		; <i8*> [#uses=1]
-	br label %endif41i564
-
-case51i558:		; preds = %if40i554, %if40i554
-	%t53i557 = getelementptr i8* %p.0i535, i32 2		; <i8*> [#uses=1]
-	br label %endif41i564
-
-case56i560:		; preds = %if40i554, %if40i554
-	%t58i559 = getelementptr i8* %p.0i535, i32 2		; <i8*> [#uses=1]
-	br label %endif41i564
-
-endif41i564:		; preds = %case56i560, %case51i558, %case46i556, %if40i554, %endfori550
-	%r.0i561 = phi i32 [ 0, %if40i554 ], [ 8, %case56i560 ], [ 2, %case51i558 ], [ 16, %case46i556 ], [ 0, %endfori550 ]		; <i32> [#uses=2]
-	%p.2i562 = phi i8* [ %t43i551, %if40i554 ], [ %t58i559, %case56i560 ], [ %t53i557, %case51i558 ], [ %t48i555, %case46i556 ], [ %p.0i535, %endfori550 ]		; <i8*> [#uses=2]
-	%t63i563 = icmp eq i32 %r.0i561, 0		; <i1> [#uses=1]
-	br i1 %t63i563, label %Dt4x7c7I11V4tTaZ4tFAaKbKkZk.exit.i576, label %if70i568
-
-if70i568:		; preds = %endif41i564
-	br label %Dt4x7c7I11V4tTaZ4tFAaKbKkZk.exit.i576
-
-Dt4x7c7I11V4tTaZ4tFAaKbKkZk.exit.i576:		; preds = %if70i568, %endif41i564, %if65
-	%radix.0.i570 = phi i32 [ 0, %if65 ], [ %r.0i561, %if70i568 ], [ 10, %endif41i564 ]		; <i32> [#uses=2]
-	%p.1i571 = phi i8* [ %p.2i562, %if70i568 ], [ %t89.i782, %if65 ], [ %p.2i562, %endif41i564 ]		; <i8*> [#uses=1]
-	%t84i572 = ptrtoint i8* %p.1i571 to i32		; <i32> [#uses=1]
-	%t85i573 = ptrtoint i8* %t89.i782 to i32		; <i32> [#uses=1]
-	%t86i574 = sub i32 %t84i572, %t85i573		; <i32> [#uses=2]
-	%t6.i575 = sub i32 %t90.i783, %t86i574		; <i32> [#uses=1]
-	%t59i604 = zext i32 %radix.0.i570 to i64		; <i64> [#uses=1]
-	br label %fcondi581
-
-fcondi581:		; preds = %if55i610, %Dt4x7c7I11V4tTaZ4tFAaKbKkZk.exit.i576
-	%value.0i577 = phi i64 [ 0, %Dt4x7c7I11V4tTaZ4tFAaKbKkZk.exit.i576 ], [ %t65i607, %if55i610 ]		; <i64> [#uses=1]
-	%fkey.0i579 = phi i32 [ 0, %Dt4x7c7I11V4tTaZ4tFAaKbKkZk.exit.i576 ], [ %t70i609, %if55i610 ]		; <i32> [#uses=3]
-	%t3i580 = icmp ult i32 %fkey.0i579, %t6.i575		; <i1> [#uses=1]
-	br i1 %t3i580, label %fbodyi587, label %wcond.i422
-
-fbodyi587:		; preds = %fcondi581
-	%t5.s.i582 = add i32 %t86i574, %fkey.0i579		; <i32> [#uses=1]
-	%t89.i782.s = add i32 %.frame.0.3, %t5.s.i582		; <i32> [#uses=1]
-	%t5i583 = getelementptr i8* %line_arg831, i32 %t89.i782.s		; <i8*> [#uses=1]
-	%t6i584 = load i8* %t5i583		; <i8> [#uses=6]
-	%t6.off84i585 = add i8 %t6i584, -48		; <i8> [#uses=1]
-	%or.cond.i28.i586 = icmp ugt i8 %t6.off84i585, 9		; <i1> [#uses=1]
-	br i1 %or.cond.i28.i586, label %ei590, label %endifi603
-
-ei590:		; preds = %fbodyi587
-	%t6.off83i588 = add i8 %t6i584, -97		; <i8> [#uses=1]
-	%or.cond81i589 = icmp ugt i8 %t6.off83i588, 25		; <i1> [#uses=1]
-	br i1 %or.cond81i589, label %e24i595, label %if22i592
-
-if22i592:		; preds = %ei590
-	%t27i591 = add i8 %t6i584, -39		; <i8> [#uses=1]
-	br label %endifi603
-
-e24i595:		; preds = %ei590
-	%t6.offi593 = add i8 %t6i584, -65		; <i8> [#uses=1]
-	%or.cond82i594 = icmp ugt i8 %t6.offi593, 25		; <i1> [#uses=1]
-	br i1 %or.cond82i594, label %wcond.i422, label %if39i597
-
-if39i597:		; preds = %e24i595
-	%t44.i29.i596 = add i8 %t6i584, -7		; <i8> [#uses=1]
-	br label %endifi603
-
-endifi603:		; preds = %if39i597, %if22i592, %fbodyi587
-	%c.0.i30.i598 = phi i8 [ %t27i591, %if22i592 ], [ %t44.i29.i596, %if39i597 ], [ %t6i584, %fbodyi587 ]		; <i8> [#uses=1]
-	%t48.i31.i599 = zext i8 %c.0.i30.i598 to i32		; <i32> [#uses=1]
-	%t49i600 = add i32 %t48.i31.i599, 208		; <i32> [#uses=1]
-	%t52i601 = and i32 %t49i600, 255		; <i32> [#uses=2]
-	%t54i602 = icmp ult i32 %t52i601, %radix.0.i570		; <i1> [#uses=1]
-	br i1 %t54i602, label %if55i610, label %wcond.i422
-
-if55i610:		; preds = %endifi603
-	%t61i605 = mul i64 %value.0i577, %t59i604		; <i64> [#uses=1]
-	%t64i606 = zext i32 %t52i601 to i64		; <i64> [#uses=1]
-	%t65i607 = add i64 %t61i605, %t64i606		; <i64> [#uses=1]
-	%t70i609 = add i32 %fkey.0i579, 1		; <i32> [#uses=1]
-	br label %fcondi581
-
-e67:		; preds = %ifi630
-	%t4i447 = icmp eq i32 %t49.i801, %.frame.0.6		; <i1> [#uses=1]
-	br i1 %t4i447, label %Dt4x7c7I11V4tTaZ4tFAaKbKkZk.exit.i491, label %forcondi454
-
-forcondi454:		; preds = %zi461, %e67
-	%c.0i452 = phi i8 [ %c.0i452.pre, %zi461 ], [ %t63, %e67 ]		; <i8> [#uses=2]
-	%sign.1.i448 = phi i1 [ %sign.0.i458, %zi461 ], [ false, %e67 ]		; <i1> [#uses=2]
-	%l.0i449 = phi i32 [ %t33i460, %zi461 ], [ %t90.i825, %e67 ]		; <i32> [#uses=3]
-	%p.0i450 = phi i8* [ %t30i459, %zi461 ], [ %t89.i824, %e67 ]		; <i8*> [#uses=5]
-	%t8i453 = icmp eq i32 %l.0i449, 0		; <i1> [#uses=1]
-	br i1 %t8i453, label %endfori465, label %forbodyi455
-
-forbodyi455:		; preds = %forcondi454
-	switch i8 %c.0i452, label %endfori465 [
-		i8 32, label %zi461
-		i8 9, label %zi461
-		i8 45, label %if20i456
-		i8 43, label %if26i457
-	]
-
-if20i456:		; preds = %forbodyi455
-	br label %zi461
-
-if26i457:		; preds = %forbodyi455
-	br label %zi461
-
-zi461:		; preds = %if26i457, %if20i456, %forbodyi455, %forbodyi455
-	%sign.0.i458 = phi i1 [ false, %if26i457 ], [ true, %if20i456 ], [ %sign.1.i448, %forbodyi455 ], [ %sign.1.i448, %forbodyi455 ]		; <i1> [#uses=1]
-	%t30i459 = getelementptr i8* %p.0i450, i32 1		; <i8*> [#uses=2]
-	%t33i460 = add i32 %l.0i449, -1		; <i32> [#uses=1]
-	%c.0i452.pre = load i8* %t30i459		; <i8> [#uses=1]
-	br label %forcondi454
-
-endfori465:		; preds = %forbodyi455, %forcondi454
-	%t37i462 = icmp eq i8 %c.0i452, 48		; <i1> [#uses=1]
-	%t39i463 = icmp sgt i32 %l.0i449, 1		; <i1> [#uses=1]
-	%or.condi464 = and i1 %t37i462, %t39i463		; <i1> [#uses=1]
-	br i1 %or.condi464, label %if40i469, label %endif41i479
-
-if40i469:		; preds = %endfori465
-	%t43i466 = getelementptr i8* %p.0i450, i32 1		; <i8*> [#uses=2]
-	%t44i467 = load i8* %t43i466		; <i8> [#uses=1]
-	%t45i468 = zext i8 %t44i467 to i32		; <i32> [#uses=1]
-	switch i32 %t45i468, label %endif41i479 [
-		i32 120, label %case46i471
-		i32 111, label %case56i475
-	]
-
-case46i471:		; preds = %if40i469
-	%t48i470 = getelementptr i8* %p.0i450, i32 2		; <i8*> [#uses=1]
-	br label %endif41i479
-
-case56i475:		; preds = %if40i469
-	%t58i474 = getelementptr i8* %p.0i450, i32 2		; <i8*> [#uses=1]
-	br label %endif41i479
-
-endif41i479:		; preds = %case56i475, %case46i471, %if40i469, %endfori465
-	%r.0i476 = phi i32 [ 0, %if40i469 ], [ 8, %case56i475 ], [ 16, %case46i471 ], [ 0, %endfori465 ]		; <i32> [#uses=2]
-	%p.2i477 = phi i8* [ %t43i466, %if40i469 ], [ %t58i474, %case56i475 ], [ %t48i470, %case46i471 ], [ %p.0i450, %endfori465 ]		; <i8*> [#uses=2]
-	%t63i478 = icmp eq i32 %r.0i476, 0		; <i1> [#uses=1]
-	br i1 %t63i478, label %Dt4x7c7I11V4tTaZ4tFAaKbKkZk.exit.i491, label %if70i483
-
-if70i483:		; preds = %endif41i479
-	br label %Dt4x7c7I11V4tTaZ4tFAaKbKkZk.exit.i491
-
-Dt4x7c7I11V4tTaZ4tFAaKbKkZk.exit.i491:		; preds = %if70i483, %endif41i479, %e67
-	%radix.0.i485 = phi i32 [ 0, %e67 ], [ %r.0i476, %if70i483 ], [ 10, %endif41i479 ]		; <i32> [#uses=2]
-	%p.1i486 = phi i8* [ %p.2i477, %if70i483 ], [ %t89.i824, %e67 ], [ %p.2i477, %endif41i479 ]		; <i8*> [#uses=1]
-	%t84i487 = ptrtoint i8* %p.1i486 to i32		; <i32> [#uses=1]
-	%t85i488 = ptrtoint i8* %t89.i824 to i32		; <i32> [#uses=1]
-	%t86i489 = sub i32 %t84i487, %t85i488		; <i32> [#uses=2]
-	%ttt = sub i32 %t90.i825, %t86i489		; <i32> [#uses=1]
-	%t59i519 = zext i32 %radix.0.i485 to i64		; <i64> [#uses=1]
-	br label %fcondi496
-
-fcondi496:		; preds = %if55i525, %Dt4x7c7I11V4tTaZ4tFAaKbKkZk.exit.i491
-	%value.0i492 = phi i64 [ 0, %Dt4x7c7I11V4tTaZ4tFAaKbKkZk.exit.i491 ], [ %t65i522, %if55i525 ]		; <i64> [#uses=1]
-	%fkey.0i494 = phi i32 [ 0, %Dt4x7c7I11V4tTaZ4tFAaKbKkZk.exit.i491 ], [ %t70i524, %if55i525 ]		; <i32> [#uses=3]
-	%t3i495 = icmp ult i32 %fkey.0i494, %ttt		; <i1> [#uses=1]
-	br i1 %t3i495, label %fbodyi502, label %wcond.i422
-
-fbodyi502:		; preds = %fcondi496
-	%t5.s.i497 = add i32 %t86i489, %fkey.0i494		; <i32> [#uses=1]
-	%t89.i824.s = add i32 %.frame.0.6, %t5.s.i497		; <i32> [#uses=1]
-	%t5i498 = getelementptr i8* %line_arg831, i32 %t89.i824.s		; <i8*> [#uses=1]
-	%t6i499 = load i8* %t5i498		; <i8> [#uses=6]
-	%t6.off84i500 = add i8 %t6i499, -48		; <i8> [#uses=1]
-	%or.cond.i28.i501 = icmp ugt i8 %t6.off84i500, 9		; <i1> [#uses=1]
-	br i1 %or.cond.i28.i501, label %ei505, label %endifi518
-
-ei505:		; preds = %fbodyi502
-	%t6.off83i503 = add i8 %t6i499, -97		; <i8> [#uses=1]
-	%or.cond81i504 = icmp ugt i8 %t6.off83i503, 25		; <i1> [#uses=1]
-	br i1 %or.cond81i504, label %e24i510, label %if22i507
-
-if22i507:		; preds = %ei505
-	%t27i506 = add i8 %t6i499, -39		; <i8> [#uses=1]
-	br label %endifi518
-
-e24i510:		; preds = %ei505
-	%t6.offi508 = add i8 %t6i499, -65		; <i8> [#uses=1]
-	%or.cond82i509 = icmp ugt i8 %t6.offi508, 25		; <i1> [#uses=1]
-	br i1 %or.cond82i509, label %wcond.i422, label %if39i512
-
-if39i512:		; preds = %e24i510
-	%t44.i29.i511 = add i8 %t6i499, -7		; <i8> [#uses=1]
-	br label %endifi518
-
-endifi518:		; preds = %if39i512, %if22i507, %fbodyi502
-	%c.0.i30.i513 = phi i8 [ %t27i506, %if22i507 ], [ %t44.i29.i511, %if39i512 ], [ %t6i499, %fbodyi502 ]		; <i8> [#uses=1]
-	%t48.i31.i514 = zext i8 %c.0.i30.i513 to i32		; <i32> [#uses=1]
-	%t49i515 = add i32 %t48.i31.i514, 208		; <i32> [#uses=1]
-	%t52i516 = and i32 %t49i515, 255		; <i32> [#uses=2]
-	%t54i517 = icmp ult i32 %t52i516, %radix.0.i485		; <i1> [#uses=1]
-	br i1 %t54i517, label %if55i525, label %wcond.i422
-
-if55i525:		; preds = %endifi518
-	%t61i520 = mul i64 %value.0i492, %t59i519		; <i64> [#uses=1]
-	%t64i521 = zext i32 %t52i516 to i64		; <i64> [#uses=1]
-	%t65i522 = add i64 %t61i520, %t64i521		; <i64> [#uses=1]
-	%t70i524 = add i32 %fkey.0i494, 1		; <i32> [#uses=1]
-	br label %fcondi496
-
-wcond.i422:		; preds = %e40.i, %endifi518, %e24i510, %fcondi496, %endifi603, %e24i595, %fcondi581
-	%sarg60.pn.i = phi i8* [ %p.0.i, %e40.i ], [ undef, %fcondi496 ], [ undef, %e24i510 ], [ undef, %endifi518 ], [ undef, %endifi603 ], [ undef, %e24i595 ], [ undef, %fcondi581 ]		; <i8*> [#uses=3]
-	%start_arg.pn.i = phi i32 [ %t49.i443, %e40.i ], [ 0, %fcondi496 ], [ 0, %e24i510 ], [ 0, %endifi518 ], [ 0, %endifi603 ], [ 0, %e24i595 ], [ 0, %fcondi581 ]		; <i32> [#uses=3]
-	%extent.0.i = phi i32 [ %t51.i, %e40.i ], [ undef, %fcondi496 ], [ undef, %e24i510 ], [ undef, %endifi518 ], [ undef, %endifi603 ], [ undef, %e24i595 ], [ undef, %fcondi581 ]		; <i32> [#uses=3]
-	%p.0.i = getelementptr i8* %sarg60.pn.i, i32 %start_arg.pn.i		; <i8*> [#uses=2]
-	%p.0.s63.i = add i32 %start_arg.pn.i, -1		; <i32> [#uses=1]
-	%t2i424 = getelementptr i8* %sarg60.pn.i, i32 %p.0.s63.i		; <i8*> [#uses=1]
-	br label %forcondi430
-
-forcondi430:		; preds = %forbodyi434, %wcond.i422
-	%l.0i426 = phi i32 [ %extent.0.i, %wcond.i422 ], [ %t4i428, %forbodyi434 ]		; <i32> [#uses=2]
-	%p.0i427 = phi i8* [ %t2i424, %wcond.i422 ], [ %t7i431, %forbodyi434 ]		; <i8*> [#uses=1]
-	%t4i428 = add i32 %l.0i426, -1		; <i32> [#uses=1]
-	%t5i429 = icmp eq i32 %l.0i426, 0		; <i1> [#uses=1]
-	br i1 %t5i429, label %e.i441, label %forbodyi434
-
-forbodyi434:		; preds = %forcondi430
-	%t7i431 = getelementptr i8* %p.0i427, i32 1		; <i8*> [#uses=3]
-	%t8i432 = load i8* %t7i431		; <i8> [#uses=1]
-	%t12i433 = icmp eq i8 %t8i432, 32		; <i1> [#uses=1]
-	br i1 %t12i433, label %ifi438, label %forcondi430
-
-ifi438:		; preds = %forbodyi434
-	%t15i435 = ptrtoint i8* %t7i431 to i32		; <i32> [#uses=1]
-	%t16i436 = ptrtoint i8* %p.0.i to i32		; <i32> [#uses=1]
-	%t17i437 = sub i32 %t15i435, %t16i436		; <i32> [#uses=1]
-	br label %e.i441
-
-e.i441:		; preds = %ifi438, %forcondi430
-	%t2561.i = phi i32 [ %t17i437, %ifi438 ], [ %extent.0.i, %forcondi430 ]		; <i32> [#uses=2]
-	%p.0.s.i = add i32 %start_arg.pn.i, %t2561.i		; <i32> [#uses=1]
-	%t32.s.i = add i32 %p.0.s.i, -1		; <i32> [#uses=1]
-	%t2i.i = getelementptr i8* %sarg60.pn.i, i32 %t32.s.i		; <i8*> [#uses=1]
-	br label %forbodyi.i
-
-forbodyi.i:		; preds = %forbodyi.i, %e.i441
-	%p.0i.i = phi i8* [ %t2i.i, %e.i441 ], [ %t7i.i, %forbodyi.i ]		; <i8*> [#uses=1]
-	%s2.0i.i = phi i8* [ getelementptr ([5 x i8]* @.str171, i32 0, i32 0), %e.i441 ], [ %t11i.i, %forbodyi.i ]		; <i8*> [#uses=2]
-	%t7i.i = getelementptr i8* %p.0i.i, i32 1		; <i8*> [#uses=2]
-	%t8i.i = load i8* %t7i.i		; <i8> [#uses=1]
-	%t11i.i = getelementptr i8* %s2.0i.i, i32 1		; <i8*> [#uses=1]
-	%t12i.i = load i8* %s2.0i.i		; <i8> [#uses=1]
-	%t14i.i = icmp eq i8 %t8i.i, %t12i.i		; <i1> [#uses=1]
-	br i1 %t14i.i, label %forbodyi.i, label %e40.i
-
-e40.i:		; preds = %forbodyi.i
-	%t49.i443 = add i32 %t2561.i, 1		; <i32> [#uses=2]
-	%t51.i = sub i32 %extent.0.i, %t49.i443		; <i32> [#uses=1]
-	br label %wcond.i422
-}
diff --git a/test/CodeGen/PowerPC/unsafe-math.ll b/test/CodeGen/PowerPC/unsafe-math.ll
index ef9791277dcd..b0bdcc28d28e 100644
--- a/test/CodeGen/PowerPC/unsafe-math.ll
+++ b/test/CodeGen/PowerPC/unsafe-math.ll
@@ -2,7 +2,7 @@
 ; RUN: llc < %s -march=ppc32 -enable-unsafe-fp-math | \
 ; RUN:   grep fmul | count 1
 
-define double @foo(double %X) {
+define double @foo(double %X) nounwind {
         %tmp1 = fmul double %X, 1.23
         %tmp2 = fmul double %tmp1, 4.124
         ret double %tmp2
diff --git a/test/CodeGen/PowerPC/varargs.ll b/test/CodeGen/PowerPC/varargs.ll
new file mode 100644
index 000000000000..1769be957ac4
--- /dev/null
+++ b/test/CodeGen/PowerPC/varargs.ll
@@ -0,0 +1,22 @@
+; RUN: llc < %s -mtriple=powerpc-apple-darwin | FileCheck -check-prefix=P32 %s
+; RUN: llc < %s -mtriple=powerpc64-apple-darwin | FileCheck -check-prefix=P64 %s
+
+; PR8327
+define i8* @test1(i8** %foo) nounwind {
+  %A = va_arg i8** %foo, i8*
+  ret i8* %A
+}
+
+; P32: test1:
+; P32: 	lwz r4, 0(r3)
+; P32:	addi r5, r4, 4
+; P32:	stw r5, 0(r3)
+; P32:	lwz r3, 0(r4)
+; P32:	blr 
+
+; P64: test1:
+; P64: ld r4, 0(r3)
+; P64: addi r5, r4, 8
+; P64: std r5, 0(r3)
+; P64: ld r3, 0(r4)
+; P64: blr
diff --git a/test/CodeGen/SPARC/2010-04-07-DbgValueOtherTargets.ll b/test/CodeGen/SPARC/2010-04-07-DbgValueOtherTargets.ll
index f66ee216089d..3b644986f2e4 100644
--- a/test/CodeGen/SPARC/2010-04-07-DbgValueOtherTargets.ll
+++ b/test/CodeGen/SPARC/2010-04-07-DbgValueOtherTargets.ll
@@ -1,33 +1,28 @@
 ; RUN: llc -O0 -march=sparc -asm-verbose < %s | FileCheck %s
 ; Check that DEBUG_VALUE comments come through on a variety of targets.
 
-%tart.reflect.ComplexType = type { double, double }
-
-@.type.SwitchStmtTest = constant %tart.reflect.ComplexType { double 3.0, double 2.0 }
-
-define i32 @"main(tart.core.String[])->int32"(i32 %args) {
+define i32 @main() nounwind ssp {
 entry:
 ; CHECK: DEBUG_VALUE
-  tail call void @llvm.dbg.value(metadata !14, i64 0, metadata !8)
-  tail call void @"tart.reflect.ComplexType.create->tart.core.Object"(%tart.reflect.ComplexType* @.type.SwitchStmtTest) ; <%tart.core.Object*> [#uses=2]
-  ret i32 3
+  call void @llvm.dbg.value(metadata !6, i64 0, metadata !7), !dbg !9
+  ret i32 0, !dbg !10
 }
 
+declare void @llvm.dbg.declare(metadata, metadata) nounwind readnone
+
 declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
-declare void @"tart.reflect.ComplexType.create->tart.core.Object"(%tart.reflect.ComplexType*) nounwind readnone
 
-!0 = metadata !{i32 458769, i32 0, i32 1, metadata !"sm.c", metadata !"/Volumes/MacOS9/tests/", metadata !"4.2.1 (Based on Apple Inc. build 5658) (LLVM build)", i1 true, i1 false, metadata !"", i32 0} ; [ DW_TAG_compile_unit ]
-!1 = metadata !{i32 458790, metadata !0, metadata !"", metadata !0, i32 0, i64 192, i64 64, i64 0, i32 0, metadata !2} ; [ DW_TAG_const_type ]
-!2 = metadata !{i32 458771, metadata !0, metadata !"C", metadata !0, i32 1, i64 192, i64 64, i64 0, i32 0, null, metadata !3, i32 0, null} ; [ DW_TAG_structure_type ]
-!3 = metadata !{metadata !4, metadata !6, metadata !7}
-!4 = metadata !{i32 458765, metadata !2, metadata !"x", metadata !0, i32 1, i64 64, i64 64, i64 0, i32 0, metadata !5} ; [ DW_TAG_member ]
-!5 = metadata !{i32 458788, metadata !0, metadata !"double", metadata !0, i32 0, i64 64, i64 64, i64 0, i32 0, i32 4} ; [ DW_TAG_base_type ]
-!6 = metadata !{i32 458765, metadata !2, metadata !"y", metadata !0, i32 1, i64 64, i64 64, i64 64, i32 0, metadata !5} ; [ DW_TAG_member ]
-!7 = metadata !{i32 458765, metadata !2, metadata !"z", metadata !0, i32 1, i64 64, i64 64, i64 128, i32 0, metadata !5} ; [ DW_TAG_member ]
-!8 = metadata !{i32 459008, metadata !9, metadata !"t", metadata !0, i32 5, metadata !2} ; [ DW_TAG_auto_variable ]
-!9 = metadata !{i32 458763, metadata !10}        ; [ DW_TAG_lexical_block ]
-!10 = metadata !{i32 458798, i32 0, metadata !0, metadata !"foo", metadata !"foo", metadata !"foo", metadata !0, i32 4, metadata !11, i1 false, i1 true, i32 0, i32 0, null} ; [ DW_TAG_subprogram ]
-!11 = metadata !{i32 458773, metadata !0, metadata !"", metadata !0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !12, i32 0, null} ; [ DW_TAG_subroutine_type ]
-!12 = metadata !{metadata !13}
-!13 = metadata !{i32 458788, metadata !0, metadata !"int", metadata !0, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
-!14 = metadata !{%tart.reflect.ComplexType* @.type.SwitchStmtTest}
+!llvm.dbg.sp = !{!0}
+
+!0 = metadata !{i32 589870, i32 0, metadata !1, metadata !"main", metadata !"main", metadata !"", metadata !1, i32 2, metadata !3, i1 false, i1 true, i32 0, i32 0, null, i32 0, i1 false, i32 ()* @main} ; [ DW_TAG_subprogram ]
+!1 = metadata !{i32 589865, metadata !"/tmp/x.c", metadata !"/Users/manav", metadata !2} ; [ DW_TAG_file_type ]
+!2 = metadata !{i32 589841, i32 0, i32 12, metadata !"/tmp/x.c", metadata !"/Users/manav", metadata !"clang version 2.9 (trunk 120996)", i1 true, i1 false, metadata !"", i32 0} ; [ DW_TAG_compile_unit ]
+!3 = metadata !{i32 589845, metadata !1, metadata !"", metadata !1, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !4, i32 0, null} ; [ DW_TAG_subroutine_type ]
+!4 = metadata !{metadata !5}
+!5 = metadata !{i32 589860, metadata !2, metadata !"int", metadata !1, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
+!6 = metadata !{i32 0}
+!7 = metadata !{i32 590080, metadata !8, metadata !"i", metadata !1, i32 3, metadata !5, i32 0} ; [ DW_TAG_auto_variable ]
+!8 = metadata !{i32 589835, metadata !0, i32 2, i32 12, metadata !1, i32 0} ; [ DW_TAG_lexical_block ]
+!9 = metadata !{i32 3, i32 11, metadata !8, null}
+!10 = metadata !{i32 4, i32 2, metadata !8, null}
+
diff --git a/test/CodeGen/SPARC/2011-01-11-CC.ll b/test/CodeGen/SPARC/2011-01-11-CC.ll
new file mode 100755
index 000000000000..3ceda958de6e
--- /dev/null
+++ b/test/CodeGen/SPARC/2011-01-11-CC.ll
@@ -0,0 +1,105 @@
+; RUN: llc -march=sparc <%s | FileCheck %s -check-prefix=V8
+; RUN: llc -march=sparc -mattr=v9 <%s | FileCheck %s -check-prefix=V9
+
+
+define i32 @test_addx(i64 %a, i64 %b, i64 %c) nounwind readnone noinline {
+entry:
+; V8: addcc
+; V8-NOT: subcc
+; V8: addx
+; V9: addcc
+; V9-NOT: subcc
+; V9: addx
+; V9: mov{{e|ne}} %icc
+  %0 = add i64 %a, %b
+  %1 = icmp ugt i64 %0, %c
+  %2 = zext i1 %1 to i32
+  ret i32 %2
+}
+
+
+define i32 @test_select_int_icc(i32 %a, i32 %b, i32 %c) nounwind readnone noinline {
+entry:
+; V8: test_select_int_icc
+; V8: subcc
+; V8: {{be|bne}}
+; V9: test_select_int_icc
+; V9: subcc
+; V9-NOT: {{be|bne}}
+; V9: mov{{e|ne}} %icc
+  %0 = icmp eq i32 %a, 0
+  %1 = select i1 %0, i32 %b, i32 %c
+  ret i32 %1
+}
+
+
+define float @test_select_fp_icc(i32 %a, float %f1, float %f2) nounwind readnone noinline {
+entry:
+; V8: test_select_fp_icc
+; V8: subcc
+; V8: {{be|bne}}
+; V9: test_select_fp_icc
+; V9: subcc
+; V9-NOT: {{be|bne}}
+; V9: fmovs{{e|ne}} %icc
+  %0 = icmp eq i32 %a, 0
+  %1 = select i1 %0, float %f1, float %f2
+  ret float %1
+}
+
+define double @test_select_dfp_icc(i32 %a, double %f1, double %f2) nounwind readnone noinline {
+entry:
+; V8: test_select_dfp_icc
+; V8: subcc
+; V8: {{be|bne}}
+; V9: test_select_dfp_icc
+; V9: subcc
+; V9=NOT: {{be|bne}}
+; V9: fmovd{{e|ne}} %icc
+  %0 = icmp eq i32 %a, 0
+  %1 = select i1 %0, double %f1, double %f2
+  ret double %1
+}
+
+define i32 @test_select_int_fcc(float %f, i32 %a, i32 %b) nounwind readnone noinline {
+entry:
+;V8: test_select_int_fcc
+;V8: fcmps
+;V8: {{fbe|fbne}}
+;V9: test_select_int_fcc
+;V9: fcmps
+;V9-NOT: {{fbe|fbne}}
+;V9: mov{{e|ne}} %fcc0
+  %0 = fcmp une float %f, 0.000000e+00
+  %a.b = select i1 %0, i32 %a, i32 %b
+  ret i32 %a.b
+}
+
+
+define float @test_select_fp_fcc(float %f, float %f1, float %f2) nounwind readnone noinline {
+entry:
+;V8: test_select_fp_fcc
+;V8: fcmps
+;V8: {{fbe|fbne}}
+;V9: test_select_fp_fcc
+;V9: fcmps
+;V9-NOT: {{fbe|fbne}}
+;V9: fmovs{{e|ne}} %fcc0
+  %0 = fcmp une float %f, 0.000000e+00
+  %1 = select i1 %0, float %f1, float %f2
+  ret float %1
+}
+
+define double @test_select_dfp_fcc(double %f, double %f1, double %f2) nounwind readnone noinline {
+entry:
+;V8: test_select_dfp_fcc
+;V8: fcmpd
+;V8: {{fbne|fbe}}
+;V9: test_select_dfp_fcc
+;V9: fcmpd
+;V9-NOT: {{fbne|fbe}}
+;V9: fmovd{{e|ne}} %fcc0
+  %0 = fcmp une double %f, 0.000000e+00
+  %1 = select i1 %0, double %f1, double %f2
+  ret double %1
+}
diff --git a/test/CodeGen/SPARC/2011-01-11-Call.ll b/test/CodeGen/SPARC/2011-01-11-Call.ll
new file mode 100644
index 000000000000..7350e9232428
--- /dev/null
+++ b/test/CodeGen/SPARC/2011-01-11-Call.ll
@@ -0,0 +1,13 @@
+; RUN: llc -march=sparc -O0 <%s
+
+define void @test() nounwind {
+entry:
+ %0 = tail call i32 (...)* @foo() nounwind
+ tail call void (...)* @bar() nounwind
+ ret void
+}
+
+declare i32 @foo(...)
+
+declare void @bar(...)
+
diff --git a/test/CodeGen/SPARC/2011-01-11-FrameAddr.ll b/test/CodeGen/SPARC/2011-01-11-FrameAddr.ll
new file mode 100644
index 000000000000..fbf724270566
--- /dev/null
+++ b/test/CodeGen/SPARC/2011-01-11-FrameAddr.ll
@@ -0,0 +1,64 @@
+;RUN: llc -march=sparc < %s | FileCheck %s -check-prefix=V8
+;RUN: llc -march=sparc -mattr=v9 < %s | FileCheck %s -check-prefix=V9
+
+define i8* @frameaddr() nounwind readnone {
+entry:
+;V8: frameaddr
+;V8: or %g0, %fp, {{.+}}
+
+;V9: frameaddr
+;V9: or %g0, %fp, {{.+}}
+  %0 = tail call i8* @llvm.frameaddress(i32 0)
+  ret i8* %0
+}
+
+define i8* @frameaddr2() nounwind readnone {
+entry:
+;V8: frameaddr2
+;V8: ta 3
+;V8: ld [%fp+56], {{.+}}
+;V8: ld [{{.+}}+56], {{.+}}
+;V8: ld [{{.+}}+56], {{.+}}
+
+;V9: frameaddr2
+;V9: flushw
+;V9: ld [%fp+56], {{.+}}
+;V9: ld [{{.+}}+56], {{.+}}
+;V9: ld [{{.+}}+56], {{.+}}
+  %0 = tail call i8* @llvm.frameaddress(i32 3)
+  ret i8* %0
+}
+
+declare i8* @llvm.frameaddress(i32) nounwind readnone
+
+
+
+define i8* @retaddr() nounwind readnone {
+entry:
+;V8: retaddr
+;V8: or %g0, %i7, {{.+}}
+
+;V9: retaddr
+;V9: or %g0, %i7, {{.+}}
+  %0 = tail call i8* @llvm.returnaddress(i32 0)
+  ret i8* %0
+}
+
+define i8* @retaddr2() nounwind readnone {
+entry:
+;V8: retaddr2
+;V8: ta 3
+;V8: ld [%fp+56], {{.+}}
+;V8: ld [{{.+}}+56], {{.+}}
+;V8: ld [{{.+}}+60], {{.+}}
+
+;V9: retaddr2
+;V9: flushw
+;V9: ld [%fp+56], {{.+}}
+;V9: ld [{{.+}}+56], {{.+}}
+;V9: ld [{{.+}}+60], {{.+}}
+  %0 = tail call i8* @llvm.returnaddress(i32 3)
+  ret i8* %0
+}
+
+declare i8* @llvm.returnaddress(i32) nounwind readnone
diff --git a/test/CodeGen/SPARC/2011-01-19-DelaySlot.ll b/test/CodeGen/SPARC/2011-01-19-DelaySlot.ll
new file mode 100644
index 000000000000..bc27e987a179
--- /dev/null
+++ b/test/CodeGen/SPARC/2011-01-19-DelaySlot.ll
@@ -0,0 +1,90 @@
+;RUN: llc -march=sparc < %s | FileCheck %s
+;RUN: llc -march=sparc -O0 < %s | FileCheck %s -check-prefix=UNOPT
+
+
+define i32 @test(i32 %a) nounwind {
+entry:
+; CHECK: test
+; CHECK: call bar
+; CHECK-NOT: nop
+; CHECK: ret
+; CHECK-NEXT: restore
+  %0 = tail call i32 @bar(i32 %a) nounwind
+  ret i32 %0
+}
+
+define i32 @test_jmpl(i32 (i32, i32)* nocapture %f, i32 %a, i32 %b) nounwind {
+entry:
+; CHECK:      test_jmpl
+; CHECK:      call
+; CHECK-NOT:  nop
+; CHECK:      ret
+; CHECK-NEXT: restore
+  %0 = tail call i32 %f(i32 %a, i32 %b) nounwind
+  ret i32 %0
+}
+
+define i32 @test_loop(i32 %a, i32 %b) nounwind readnone {
+; CHECK: test_loop
+entry:
+  %0 = icmp sgt i32 %b, 0
+  br i1 %0, label %bb, label %bb5
+
+bb:                                               ; preds = %entry, %bb
+  %a_addr.18 = phi i32 [ %a_addr.0, %bb ], [ %a, %entry ]
+  %1 = phi i32 [ %3, %bb ], [ 0, %entry ]
+  %tmp9 = mul i32 %1, %b
+  %2 = and i32 %1, 1
+  %tmp = xor i32 %2, 1
+  %.pn = shl i32 %tmp9, %tmp
+  %a_addr.0 = add i32 %.pn, %a_addr.18
+  %3 = add nsw i32 %1, 1
+  %exitcond = icmp eq i32 %3, %b
+;CHECK:      subcc
+;CHECK:      bne
+;CHECK-NOT:  nop
+  br i1 %exitcond, label %bb5, label %bb
+
+bb5:                                              ; preds = %bb, %entry
+  %a_addr.1.lcssa = phi i32 [ %a, %entry ], [ %a_addr.0, %bb ]
+;CHECK:      ret
+;CHECK-NEXT: restore
+  ret i32 %a_addr.1.lcssa
+}
+
+define i32 @test_inlineasm(i32 %a) nounwind {
+entry:
+;CHECK:      test_inlineasm
+;CHECK:      sethi
+;CHECK:      !NO_APP
+;CHECK-NEXT: subcc
+;CHECK-NEXT: bg
+;CHECK-NEXT: nop
+  tail call void asm sideeffect "sethi 0, %g0", ""() nounwind
+  %0 = icmp slt i32 %a, 0
+  br i1 %0, label %bb, label %bb1
+
+bb:                                               ; preds = %entry
+  %1 = tail call i32 (...)* @foo(i32 %a) nounwind
+  ret i32 %1
+
+bb1:                                              ; preds = %entry
+  %2 = tail call i32 @bar(i32 %a) nounwind
+  ret i32 %2
+}
+
+declare i32 @foo(...)
+
+declare i32 @bar(i32)
+
+
+define i32 @test_implicit_def() nounwind {
+entry:
+;UNOPT:       test_implicit_def
+;UNOPT:       call func
+;UNOPT-NEXT:  nop
+  %0 = tail call i32 @func(i32* undef) nounwind
+  ret i32 0
+}
+
+declare i32 @func(i32*)
diff --git a/test/CodeGen/SPARC/2011-01-21-ByValArgs.ll b/test/CodeGen/SPARC/2011-01-21-ByValArgs.ll
new file mode 100644
index 000000000000..85c16e4684ed
--- /dev/null
+++ b/test/CodeGen/SPARC/2011-01-21-ByValArgs.ll
@@ -0,0 +1,18 @@
+;RUN: llc -march=sparc < %s | FileCheck %s
+
+%struct.foo_t = type { i32, i32, i32 }
+
+@s = internal unnamed_addr global %struct.foo_t { i32 10, i32 20, i32 30 }
+
+define i32 @test() nounwind {
+entry:
+;CHECK:     test
+;CHECK:     st
+;CHECK:     st
+;CHECK:     st
+;CHECK:     bar
+  %0 = tail call i32 @bar(%struct.foo_t* byval @s) nounwind
+  ret i32 %0
+}
+
+declare i32 @bar(%struct.foo_t* byval)
diff --git a/test/CodeGen/SPARC/2011-01-22-SRet.ll b/test/CodeGen/SPARC/2011-01-22-SRet.ll
new file mode 100644
index 000000000000..2f684b009c96
--- /dev/null
+++ b/test/CodeGen/SPARC/2011-01-22-SRet.ll
@@ -0,0 +1,36 @@
+;RUN: llc -march=sparc < %s | FileCheck %s
+
+%struct.foo_t = type { i32, i32, i32 }
+
+define weak void @make_foo(%struct.foo_t* noalias sret %agg.result, i32 %a, i32 %b, i32 %c) nounwind {
+entry:
+;CHECK: make_foo
+;CHECK: ld [%fp+64], {{.+}}
+;CHECK: or {{.+}}, {{.+}}, %i0
+;CHECK: ret
+  %0 = getelementptr inbounds %struct.foo_t* %agg.result, i32 0, i32 0
+  store i32 %a, i32* %0, align 4
+  %1 = getelementptr inbounds %struct.foo_t* %agg.result, i32 0, i32 1
+  store i32 %b, i32* %1, align 4
+  %2 = getelementptr inbounds %struct.foo_t* %agg.result, i32 0, i32 2
+  store i32 %c, i32* %2, align 4
+  ret void
+}
+
+define i32 @test() nounwind {
+entry:
+;CHECK: test
+;CHECK: st {{.+}}, [%sp+64]
+;CHECK: make_foo
+  %f = alloca %struct.foo_t, align 8
+  call void @make_foo(%struct.foo_t* noalias sret %f, i32 10, i32 20, i32 30) nounwind
+  %0 = getelementptr inbounds %struct.foo_t* %f, i32 0, i32 0
+  %1 = load i32* %0, align 8
+  %2 = getelementptr inbounds %struct.foo_t* %f, i32 0, i32 1
+  %3 = load i32* %2, align 4
+  %4 = getelementptr inbounds %struct.foo_t* %f, i32 0, i32 2
+  %5 = load i32* %4, align 8
+  %6 = add nsw i32 %3, %1
+  %7 = add nsw i32 %6, %5
+  ret i32 %7
+}
diff --git a/test/CodeGen/SPARC/basictest.ll b/test/CodeGen/SPARC/basictest.ll
index 9c2c16a6947c..4352e6246301 100644
--- a/test/CodeGen/SPARC/basictest.ll
+++ b/test/CodeGen/SPARC/basictest.ll
@@ -1,6 +1,26 @@
-; RUN: llc < %s -march=sparc
+; RUN: llc < %s -march=sparc | FileCheck %s
 
-define i32 @test(i32 %X) {
+define i32 @test0(i32 %X) {
 	%tmp.1 = add i32 %X, 1
 	ret i32 %tmp.1
+; CHECK: test0:
+; CHECK: add %i0, 1, %i0
+}
+
+
+;; xnor tests.
+define i32 @test1(i32 %X, i32 %Y) {
+        %A = xor i32 %X, %Y
+        %B = xor i32 %A, -1
+        ret i32 %B
+; CHECK: test1:
+; CHECK: xnor %i0, %i1, %i0
+}
+
+define i32 @test2(i32 %X, i32 %Y) {
+        %A = xor i32 %X, -1
+        %B = xor i32 %A, %Y
+        ret i32 %B
+; CHECK: test2:
+; CHECK: xnor %i0, %i1, %i0
 }
diff --git a/test/CodeGen/SPARC/mult-alt-generic-sparc.ll b/test/CodeGen/SPARC/mult-alt-generic-sparc.ll
new file mode 100644
index 000000000000..6013b17d9372
--- /dev/null
+++ b/test/CodeGen/SPARC/mult-alt-generic-sparc.ll
@@ -0,0 +1,323 @@
+; RUN: llc < %s -march=sparc
+; ModuleID = 'mult-alt-generic.c'
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-n32"
+target triple = "sparc"
+
+@mout0 = common global i32 0, align 4
+@min1 = common global i32 0, align 4
+@marray = common global [2 x i32] zeroinitializer, align 4
+
+define void @single_m() nounwind {
+entry:
+  call void asm "foo $1,$0", "=*m,*m"(i32* @mout0, i32* @min1) nounwind
+  ret void
+}
+
+define void @single_o() nounwind {
+entry:
+  %out0 = alloca i32, align 4
+  %index = alloca i32, align 4
+  store i32 0, i32* %out0, align 4
+  store i32 1, i32* %index, align 4
+  ret void
+}
+
+define void @single_V() nounwind {
+entry:
+  ret void
+}
+
+define void @single_lt() nounwind {
+entry:
+  %out0 = alloca i32, align 4
+  %in1 = alloca i32, align 4
+  store i32 0, i32* %out0, align 4
+  store i32 1, i32* %in1, align 4
+  %tmp = load i32* %in1, align 4
+  %0 = call i32 asm "foo $1,$0", "=r,<r"(i32 %tmp) nounwind
+  store i32 %0, i32* %out0, align 4
+  %tmp1 = load i32* %in1, align 4
+  %1 = call i32 asm "foo $1,$0", "=r,r<"(i32 %tmp1) nounwind
+  store i32 %1, i32* %out0, align 4
+  ret void
+}
+
+define void @single_gt() nounwind {
+entry:
+  %out0 = alloca i32, align 4
+  %in1 = alloca i32, align 4
+  store i32 0, i32* %out0, align 4
+  store i32 1, i32* %in1, align 4
+  %tmp = load i32* %in1, align 4
+  %0 = call i32 asm "foo $1,$0", "=r,>r"(i32 %tmp) nounwind
+  store i32 %0, i32* %out0, align 4
+  %tmp1 = load i32* %in1, align 4
+  %1 = call i32 asm "foo $1,$0", "=r,r>"(i32 %tmp1) nounwind
+  store i32 %1, i32* %out0, align 4
+  ret void
+}
+
+define void @single_r() nounwind {
+entry:
+  %out0 = alloca i32, align 4
+  %in1 = alloca i32, align 4
+  store i32 0, i32* %out0, align 4
+  store i32 1, i32* %in1, align 4
+  %tmp = load i32* %in1, align 4
+  %0 = call i32 asm "foo $1,$0", "=r,r"(i32 %tmp) nounwind
+  store i32 %0, i32* %out0, align 4
+  ret void
+}
+
+define void @single_i() nounwind {
+entry:
+  %out0 = alloca i32, align 4
+  store i32 0, i32* %out0, align 4
+  %0 = call i32 asm "foo $1,$0", "=r,i"(i32 1) nounwind
+  store i32 %0, i32* %out0, align 4
+  ret void
+}
+
+define void @single_n() nounwind {
+entry:
+  %out0 = alloca i32, align 4
+  store i32 0, i32* %out0, align 4
+  %0 = call i32 asm "foo $1,$0", "=r,n"(i32 1) nounwind
+  store i32 %0, i32* %out0, align 4
+  ret void
+}
+
+define void @single_E() nounwind {
+entry:
+  %out0 = alloca double, align 8
+  store double 0.000000e+000, double* %out0, align 8
+; No lowering support.
+;  %0 = call double asm "foo $1,$0", "=r,E"(double 1.000000e+001) nounwind
+;  store double %0, double* %out0, align 8
+  ret void
+}
+
+define void @single_F() nounwind {
+entry:
+  %out0 = alloca double, align 8
+  store double 0.000000e+000, double* %out0, align 8
+; No lowering support.
+;  %0 = call double asm "foo $1,$0", "=r,F"(double 1.000000e+000) nounwind
+;  store double %0, double* %out0, align 8
+  ret void
+}
+
+define void @single_s() nounwind {
+entry:
+  %out0 = alloca i32, align 4
+  store i32 0, i32* %out0, align 4
+  ret void
+}
+
+define void @single_g() nounwind {
+entry:
+  %out0 = alloca i32, align 4
+  %in1 = alloca i32, align 4
+  store i32 0, i32* %out0, align 4
+  store i32 1, i32* %in1, align 4
+  %tmp = load i32* %in1, align 4
+  %0 = call i32 asm "foo $1,$0", "=r,imr"(i32 %tmp) nounwind
+  store i32 %0, i32* %out0, align 4
+  %tmp1 = load i32* @min1, align 4
+  %1 = call i32 asm "foo $1,$0", "=r,imr"(i32 %tmp1) nounwind
+  store i32 %1, i32* %out0, align 4
+  %2 = call i32 asm "foo $1,$0", "=r,imr"(i32 1) nounwind
+  store i32 %2, i32* %out0, align 4
+  ret void
+}
+
+define void @single_X() nounwind {
+entry:
+  %out0 = alloca i32, align 4
+  %in1 = alloca i32, align 4
+  store i32 0, i32* %out0, align 4
+  store i32 1, i32* %in1, align 4
+  %tmp = load i32* %in1, align 4
+  %0 = call i32 asm "foo $1,$0", "=r,X"(i32 %tmp) nounwind
+  store i32 %0, i32* %out0, align 4
+  %tmp1 = load i32* @min1, align 4
+  %1 = call i32 asm "foo $1,$0", "=r,X"(i32 %tmp1) nounwind
+  store i32 %1, i32* %out0, align 4
+  %2 = call i32 asm "foo $1,$0", "=r,X"(i32 1) nounwind
+  store i32 %2, i32* %out0, align 4
+  %3 = call i32 asm "foo $1,$0", "=r,X"(i32* getelementptr inbounds ([2 x i32]* @marray, i32 0, i32 0)) nounwind
+  store i32 %3, i32* %out0, align 4
+; No lowering support.
+;  %4 = call i32 asm "foo $1,$0", "=r,X"(double 1.000000e+001) nounwind
+;  store i32 %4, i32* %out0, align 4
+;  %5 = call i32 asm "foo $1,$0", "=r,X"(double 1.000000e+000) nounwind
+;  store i32 %5, i32* %out0, align 4
+  ret void
+}
+
+define void @single_p() nounwind {
+entry:
+  %out0 = alloca i32, align 4
+  store i32 0, i32* %out0, align 4
+  %0 = call i32 asm "foo $1,$0", "=r,r"(i32* getelementptr inbounds ([2 x i32]* @marray, i32 0, i32 0)) nounwind
+  store i32 %0, i32* %out0, align 4
+  ret void
+}
+
+define void @multi_m() nounwind {
+entry:
+  %tmp = load i32* @min1, align 4
+  call void asm "foo $1,$0", "=*m|r,m|r"(i32* @mout0, i32 %tmp) nounwind
+  ret void
+}
+
+define void @multi_o() nounwind {
+entry:
+  %out0 = alloca i32, align 4
+  %index = alloca i32, align 4
+  store i32 0, i32* %out0, align 4
+  store i32 1, i32* %index, align 4
+  ret void
+}
+
+define void @multi_V() nounwind {
+entry:
+  ret void
+}
+
+define void @multi_lt() nounwind {
+entry:
+  %out0 = alloca i32, align 4
+  %in1 = alloca i32, align 4
+  store i32 0, i32* %out0, align 4
+  store i32 1, i32* %in1, align 4
+  %tmp = load i32* %in1, align 4
+  %0 = call i32 asm "foo $1,$0", "=r|r,r|<r"(i32 %tmp) nounwind
+  store i32 %0, i32* %out0, align 4
+  %tmp1 = load i32* %in1, align 4
+  %1 = call i32 asm "foo $1,$0", "=r|r,r|r<"(i32 %tmp1) nounwind
+  store i32 %1, i32* %out0, align 4
+  ret void
+}
+
+define void @multi_gt() nounwind {
+entry:
+  %out0 = alloca i32, align 4
+  %in1 = alloca i32, align 4
+  store i32 0, i32* %out0, align 4
+  store i32 1, i32* %in1, align 4
+  %tmp = load i32* %in1, align 4
+  %0 = call i32 asm "foo $1,$0", "=r|r,r|>r"(i32 %tmp) nounwind
+  store i32 %0, i32* %out0, align 4
+  %tmp1 = load i32* %in1, align 4
+  %1 = call i32 asm "foo $1,$0", "=r|r,r|r>"(i32 %tmp1) nounwind
+  store i32 %1, i32* %out0, align 4
+  ret void
+}
+
+define void @multi_r() nounwind {
+entry:
+  %out0 = alloca i32, align 4
+  %in1 = alloca i32, align 4
+  store i32 0, i32* %out0, align 4
+  store i32 1, i32* %in1, align 4
+  %tmp = load i32* %in1, align 4
+  %0 = call i32 asm "foo $1,$0", "=r|r,r|m"(i32 %tmp) nounwind
+  store i32 %0, i32* %out0, align 4
+  ret void
+}
+
+define void @multi_i() nounwind {
+entry:
+  %out0 = alloca i32, align 4
+  store i32 0, i32* %out0, align 4
+  %0 = call i32 asm "foo $1,$0", "=r|r,r|i"(i32 1) nounwind
+  store i32 %0, i32* %out0, align 4
+  ret void
+}
+
+define void @multi_n() nounwind {
+entry:
+  %out0 = alloca i32, align 4
+  store i32 0, i32* %out0, align 4
+  %0 = call i32 asm "foo $1,$0", "=r|r,r|n"(i32 1) nounwind
+  store i32 %0, i32* %out0, align 4
+  ret void
+}
+
+define void @multi_E() nounwind {
+entry:
+  %out0 = alloca double, align 8
+  store double 0.000000e+000, double* %out0, align 8
+; No lowering support.
+;  %0 = call double asm "foo $1,$0", "=r|r,r|E"(double 1.000000e+001) nounwind
+;  store double %0, double* %out0, align 8
+  ret void
+}
+
+define void @multi_F() nounwind {
+entry:
+  %out0 = alloca double, align 8
+  store double 0.000000e+000, double* %out0, align 8
+; No lowering support.
+;  %0 = call double asm "foo $1,$0", "=r|r,r|F"(double 1.000000e+000) nounwind
+;  store double %0, double* %out0, align 8
+  ret void
+}
+
+define void @multi_s() nounwind {
+entry:
+  %out0 = alloca i32, align 4
+  store i32 0, i32* %out0, align 4
+  ret void
+}
+
+define void @multi_g() nounwind {
+entry:
+  %out0 = alloca i32, align 4
+  %in1 = alloca i32, align 4
+  store i32 0, i32* %out0, align 4
+  store i32 1, i32* %in1, align 4
+  %tmp = load i32* %in1, align 4
+  %0 = call i32 asm "foo $1,$0", "=r|r,r|imr"(i32 %tmp) nounwind
+  store i32 %0, i32* %out0, align 4
+  %tmp1 = load i32* @min1, align 4
+  %1 = call i32 asm "foo $1,$0", "=r|r,r|imr"(i32 %tmp1) nounwind
+  store i32 %1, i32* %out0, align 4
+  %2 = call i32 asm "foo $1,$0", "=r|r,r|imr"(i32 1) nounwind
+  store i32 %2, i32* %out0, align 4
+  ret void
+}
+
+define void @multi_X() nounwind {
+entry:
+  %out0 = alloca i32, align 4
+  %in1 = alloca i32, align 4
+  store i32 0, i32* %out0, align 4
+  store i32 1, i32* %in1, align 4
+  %tmp = load i32* %in1, align 4
+  %0 = call i32 asm "foo $1,$0", "=r|r,r|X"(i32 %tmp) nounwind
+  store i32 %0, i32* %out0, align 4
+  %tmp1 = load i32* @min1, align 4
+  %1 = call i32 asm "foo $1,$0", "=r|r,r|X"(i32 %tmp1) nounwind
+  store i32 %1, i32* %out0, align 4
+  %2 = call i32 asm "foo $1,$0", "=r|r,r|X"(i32 1) nounwind
+  store i32 %2, i32* %out0, align 4
+  %3 = call i32 asm "foo $1,$0", "=r|r,r|X"(i32* getelementptr inbounds ([2 x i32]* @marray, i32 0, i32 0)) nounwind
+  store i32 %3, i32* %out0, align 4
+; No lowering support.
+;  %4 = call i32 asm "foo $1,$0", "=r|r,r|X"(double 1.000000e+001) nounwind
+;  store i32 %4, i32* %out0, align 4
+;  %5 = call i32 asm "foo $1,$0", "=r|r,r|X"(double 1.000000e+000) nounwind
+;  store i32 %5, i32* %out0, align 4
+  ret void
+}
+
+define void @multi_p() nounwind {
+entry:
+  %out0 = alloca i32, align 4
+  store i32 0, i32* %out0, align 4
+  %0 = call i32 asm "foo $1,$0", "=r|r,r|r"(i32* getelementptr inbounds ([2 x i32]* @marray, i32 0, i32 0)) nounwind
+  store i32 %0, i32* %out0, align 4
+  ret void
+}
diff --git a/test/CodeGen/SPARC/xnor.ll b/test/CodeGen/SPARC/xnor.ll
deleted file mode 100644
index 6ff66bd6fcc6..000000000000
--- a/test/CodeGen/SPARC/xnor.ll
+++ /dev/null
@@ -1,15 +0,0 @@
-; RUN: llc < %s -march=sparc | \
-; RUN:   grep xnor | count 2
-
-define i32 @test1(i32 %X, i32 %Y) {
-        %A = xor i32 %X, %Y             ; <i32> [#uses=1]
-        %B = xor i32 %A, -1             ; <i32> [#uses=1]
-        ret i32 %B
-}
-
-define i32 @test2(i32 %X, i32 %Y) {
-        %A = xor i32 %X, -1             ; <i32> [#uses=1]
-        %B = xor i32 %A, %Y             ; <i32> [#uses=1]
-        ret i32 %B
-}
-
diff --git a/test/CodeGen/SystemZ/2010-04-07-DbgValueOtherTargets.ll b/test/CodeGen/SystemZ/2010-04-07-DbgValueOtherTargets.ll
index 610aa40197e3..c2877ac55ed1 100644
--- a/test/CodeGen/SystemZ/2010-04-07-DbgValueOtherTargets.ll
+++ b/test/CodeGen/SystemZ/2010-04-07-DbgValueOtherTargets.ll
@@ -1,33 +1,28 @@
 ; RUN: llc -O0 -march=systemz -asm-verbose < %s | FileCheck %s
 ; Check that DEBUG_VALUE comments come through on a variety of targets.
 
-%tart.reflect.ComplexType = type { double, double }
-
-@.type.SwitchStmtTest = constant %tart.reflect.ComplexType { double 3.0, double 2.0 }
-
-define i32 @"main(tart.core.String[])->int32"(i32 %args) {
+define i32 @main() nounwind ssp {
 entry:
 ; CHECK: DEBUG_VALUE
-  tail call void @llvm.dbg.value(metadata !14, i64 0, metadata !8)
-  tail call void @"tart.reflect.ComplexType.create->tart.core.Object"(%tart.reflect.ComplexType* @.type.SwitchStmtTest) ; <%tart.core.Object*> [#uses=2]
-  ret i32 3
+  call void @llvm.dbg.value(metadata !6, i64 0, metadata !7), !dbg !9
+  ret i32 0, !dbg !10
 }
 
+declare void @llvm.dbg.declare(metadata, metadata) nounwind readnone
+
 declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
-declare void @"tart.reflect.ComplexType.create->tart.core.Object"(%tart.reflect.ComplexType*) nounwind readnone
 
-!0 = metadata !{i32 458769, i32 0, i32 1, metadata !"sm.c", metadata !"/Volumes/MacOS9/tests/", metadata !"4.2.1 (Based on Apple Inc. build 5658) (LLVM build)", i1 true, i1 false, metadata !"", i32 0} ; [ DW_TAG_compile_unit ]
-!1 = metadata !{i32 458790, metadata !0, metadata !"", metadata !0, i32 0, i64 192, i64 64, i64 0, i32 0, metadata !2} ; [ DW_TAG_const_type ]
-!2 = metadata !{i32 458771, metadata !0, metadata !"C", metadata !0, i32 1, i64 192, i64 64, i64 0, i32 0, null, metadata !3, i32 0, null} ; [ DW_TAG_structure_type ]
-!3 = metadata !{metadata !4, metadata !6, metadata !7}
-!4 = metadata !{i32 458765, metadata !2, metadata !"x", metadata !0, i32 1, i64 64, i64 64, i64 0, i32 0, metadata !5} ; [ DW_TAG_member ]
-!5 = metadata !{i32 458788, metadata !0, metadata !"double", metadata !0, i32 0, i64 64, i64 64, i64 0, i32 0, i32 4} ; [ DW_TAG_base_type ]
-!6 = metadata !{i32 458765, metadata !2, metadata !"y", metadata !0, i32 1, i64 64, i64 64, i64 64, i32 0, metadata !5} ; [ DW_TAG_member ]
-!7 = metadata !{i32 458765, metadata !2, metadata !"z", metadata !0, i32 1, i64 64, i64 64, i64 128, i32 0, metadata !5} ; [ DW_TAG_member ]
-!8 = metadata !{i32 459008, metadata !9, metadata !"t", metadata !0, i32 5, metadata !2} ; [ DW_TAG_auto_variable ]
-!9 = metadata !{i32 458763, metadata !10}        ; [ DW_TAG_lexical_block ]
-!10 = metadata !{i32 458798, i32 0, metadata !0, metadata !"foo", metadata !"foo", metadata !"foo", metadata !0, i32 4, metadata !11, i1 false, i1 true, i32 0, i32 0, null} ; [ DW_TAG_subprogram ]
-!11 = metadata !{i32 458773, metadata !0, metadata !"", metadata !0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !12, i32 0, null} ; [ DW_TAG_subroutine_type ]
-!12 = metadata !{metadata !13}
-!13 = metadata !{i32 458788, metadata !0, metadata !"int", metadata !0, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
-!14 = metadata !{%tart.reflect.ComplexType* @.type.SwitchStmtTest}
+!llvm.dbg.sp = !{!0}
+
+!0 = metadata !{i32 589870, i32 0, metadata !1, metadata !"main", metadata !"main", metadata !"", metadata !1, i32 2, metadata !3, i1 false, i1 true, i32 0, i32 0, null, i32 0, i1 false, i32 ()* @main} ; [ DW_TAG_subprogram ]
+!1 = metadata !{i32 589865, metadata !"/tmp/x.c", metadata !"/Users/manav", metadata !2} ; [ DW_TAG_file_type ]
+!2 = metadata !{i32 589841, i32 0, i32 12, metadata !"/tmp/x.c", metadata !"/Users/manav", metadata !"clang version 2.9 (trunk 120996)", i1 true, i1 false, metadata !"", i32 0} ; [ DW_TAG_compile_unit ]
+!3 = metadata !{i32 589845, metadata !1, metadata !"", metadata !1, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !4, i32 0, null} ; [ DW_TAG_subroutine_type ]
+!4 = metadata !{metadata !5}
+!5 = metadata !{i32 589860, metadata !2, metadata !"int", metadata !1, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
+!6 = metadata !{i32 0}
+!7 = metadata !{i32 590080, metadata !8, metadata !"i", metadata !1, i32 3, metadata !5, i32 0} ; [ DW_TAG_auto_variable ]
+!8 = metadata !{i32 589835, metadata !0, i32 2, i32 12, metadata !1, i32 0} ; [ DW_TAG_lexical_block ]
+!9 = metadata !{i32 3, i32 11, metadata !8, null}
+!10 = metadata !{i32 4, i32 2, metadata !8, null}
+
diff --git a/test/CodeGen/Thumb/2010-04-07-DbgValueOtherTargets.ll b/test/CodeGen/Thumb/2010-04-07-DbgValueOtherTargets.ll
index 6b6c14f40871..b9039774d42e 100644
--- a/test/CodeGen/Thumb/2010-04-07-DbgValueOtherTargets.ll
+++ b/test/CodeGen/Thumb/2010-04-07-DbgValueOtherTargets.ll
@@ -1,33 +1,28 @@
 ; RUN: llc -O0 -march=thumb -asm-verbose < %s | FileCheck %s
 ; Check that DEBUG_VALUE comments come through on a variety of targets.
 
-%tart.reflect.ComplexType = type { double, double }
-
-@.type.SwitchStmtTest = constant %tart.reflect.ComplexType { double 3.0, double 2.0 }
-
-define i32 @"main(tart.core.String[])->int32"(i32 %args) {
+define i32 @main() nounwind ssp {
 entry:
 ; CHECK: DEBUG_VALUE
-  tail call void @llvm.dbg.value(metadata !14, i64 0, metadata !8)
-  tail call void @"tart.reflect.ComplexType.create->tart.core.Object"(%tart.reflect.ComplexType* @.type.SwitchStmtTest) ; <%tart.core.Object*> [#uses=2]
-  ret i32 3
+  call void @llvm.dbg.value(metadata !6, i64 0, metadata !7), !dbg !9
+  ret i32 0, !dbg !10
 }
 
+declare void @llvm.dbg.declare(metadata, metadata) nounwind readnone
+
 declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
-declare void @"tart.reflect.ComplexType.create->tart.core.Object"(%tart.reflect.ComplexType*) nounwind readnone
 
-!0 = metadata !{i32 458769, i32 0, i32 1, metadata !"sm.c", metadata !"/Volumes/MacOS9/tests/", metadata !"4.2.1 (Based on Apple Inc. build 5658) (LLVM build)", i1 true, i1 false, metadata !"", i32 0} ; [ DW_TAG_compile_unit ]
-!1 = metadata !{i32 458790, metadata !0, metadata !"", metadata !0, i32 0, i64 192, i64 64, i64 0, i32 0, metadata !2} ; [ DW_TAG_const_type ]
-!2 = metadata !{i32 458771, metadata !0, metadata !"C", metadata !0, i32 1, i64 192, i64 64, i64 0, i32 0, null, metadata !3, i32 0, null} ; [ DW_TAG_structure_type ]
-!3 = metadata !{metadata !4, metadata !6, metadata !7}
-!4 = metadata !{i32 458765, metadata !2, metadata !"x", metadata !0, i32 1, i64 64, i64 64, i64 0, i32 0, metadata !5} ; [ DW_TAG_member ]
-!5 = metadata !{i32 458788, metadata !0, metadata !"double", metadata !0, i32 0, i64 64, i64 64, i64 0, i32 0, i32 4} ; [ DW_TAG_base_type ]
-!6 = metadata !{i32 458765, metadata !2, metadata !"y", metadata !0, i32 1, i64 64, i64 64, i64 64, i32 0, metadata !5} ; [ DW_TAG_member ]
-!7 = metadata !{i32 458765, metadata !2, metadata !"z", metadata !0, i32 1, i64 64, i64 64, i64 128, i32 0, metadata !5} ; [ DW_TAG_member ]
-!8 = metadata !{i32 459008, metadata !9, metadata !"t", metadata !0, i32 5, metadata !2} ; [ DW_TAG_auto_variable ]
-!9 = metadata !{i32 458763, metadata !10}        ; [ DW_TAG_lexical_block ]
-!10 = metadata !{i32 458798, i32 0, metadata !0, metadata !"foo", metadata !"foo", metadata !"foo", metadata !0, i32 4, metadata !11, i1 false, i1 true, i32 0, i32 0, null} ; [ DW_TAG_subprogram ]
-!11 = metadata !{i32 458773, metadata !0, metadata !"", metadata !0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !12, i32 0, null} ; [ DW_TAG_subroutine_type ]
-!12 = metadata !{metadata !13}
-!13 = metadata !{i32 458788, metadata !0, metadata !"int", metadata !0, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
-!14 = metadata !{%tart.reflect.ComplexType* @.type.SwitchStmtTest}
+!llvm.dbg.sp = !{!0}
+
+!0 = metadata !{i32 589870, i32 0, metadata !1, metadata !"main", metadata !"main", metadata !"", metadata !1, i32 2, metadata !3, i1 false, i1 true, i32 0, i32 0, null, i32 0, i1 false, i32 ()* @main} ; [ DW_TAG_subprogram ]
+!1 = metadata !{i32 589865, metadata !"/tmp/x.c", metadata !"/Users/manav", metadata !2} ; [ DW_TAG_file_type ]
+!2 = metadata !{i32 589841, i32 0, i32 12, metadata !"/tmp/x.c", metadata !"/Users/manav", metadata !"clang version 2.9 (trunk 120996)", i1 true, i1 false, metadata !"", i32 0} ; [ DW_TAG_compile_unit ]
+!3 = metadata !{i32 589845, metadata !1, metadata !"", metadata !1, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !4, i32 0, null} ; [ DW_TAG_subroutine_type ]
+!4 = metadata !{metadata !5}
+!5 = metadata !{i32 589860, metadata !2, metadata !"int", metadata !1, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
+!6 = metadata !{i32 0}
+!7 = metadata !{i32 590080, metadata !8, metadata !"i", metadata !1, i32 3, metadata !5, i32 0} ; [ DW_TAG_auto_variable ]
+!8 = metadata !{i32 589835, metadata !0, i32 2, i32 12, metadata !1, i32 0} ; [ DW_TAG_lexical_block ]
+!9 = metadata !{i32 3, i32 11, metadata !8, null}
+!10 = metadata !{i32 4, i32 2, metadata !8, null}
+
diff --git a/test/CodeGen/Thumb/2010-07-15-debugOrdering.ll b/test/CodeGen/Thumb/2010-07-15-debugOrdering.ll
index 9a6321bb43c4..06c0dfec5bab 100644
--- a/test/CodeGen/Thumb/2010-07-15-debugOrdering.ll
+++ b/test/CodeGen/Thumb/2010-07-15-debugOrdering.ll
@@ -10,7 +10,7 @@
 define void @_Z19getClosestDiagonal3ii(%0* noalias sret, i32, i32) nounwind {
 ; CHECK: blx ___muldf3
 ; CHECK: blx ___muldf3
-; CHECK: beq LBB0_8
+; CHECK: beq LBB0_7
 ; CHECK: blx ___muldf3
 ; <label>:3
   switch i32 %1, label %4 [
diff --git a/test/CodeGen/Thumb/2011-EpilogueBug.ll b/test/CodeGen/Thumb/2011-EpilogueBug.ll
new file mode 100644
index 000000000000..16789e66cc18
--- /dev/null
+++ b/test/CodeGen/Thumb/2011-EpilogueBug.ll
@@ -0,0 +1,17 @@
+; RUN: llc -mtriple=thumbv6-apple-darwin < %s | FileCheck %s
+; r8869722
+
+%struct.state = type { i32, %struct.info*, float**, i32, i32, i32, i32, i32, i32, i32, i32, i32, i64, i64, i64, i64, i64, i64, i8* }
+%struct.info = type { i32, i32, i32, i32, i32, i32, i32, i8* }
+
+define void @t1(%struct.state* %v) {
+; CHECK: push {r4
+  %tmp6 = load i32* null
+  %tmp8 = alloca float, i32 %tmp6
+  store i32 1, i32* null
+  br label %return
+
+return:                                           ; preds = %0
+; CHECK: mov sp, r4
+  ret void
+}
diff --git a/test/CodeGen/Thumb/barrier.ll b/test/CodeGen/Thumb/barrier.ll
index c611b865f67d..419c3baa3da3 100644
--- a/test/CodeGen/Thumb/barrier.ll
+++ b/test/CodeGen/Thumb/barrier.ll
@@ -1,15 +1,16 @@
 ; RUN: llc < %s -mtriple=thumbv6-apple-darwin  | FileCheck %s -check-prefix=V6
+; RUN: llc < %s -mtriple=thumbv7-apple-darwin -mattr=-db | FileCheck %s -check-prefix=V6
 ; RUN: llc < %s -march=thumb -mattr=+v6m       | FileCheck %s -check-prefix=V6M
 
-declare void @llvm.memory.barrier( i1 , i1 , i1 , i1 , i1 )
+declare void @llvm.memory.barrier(i1 , i1 , i1 , i1 , i1)
 
 define void @t1() {
 ; V6: t1:
 ; V6: blx {{_*}}sync_synchronize
 
 ; V6M: t1:
-; V6M: dsb
-  call void @llvm.memory.barrier( i1 false, i1 false, i1 false, i1 true, i1 true )
+; V6M: dmb st
+  call void @llvm.memory.barrier(i1 false, i1 false, i1 false, i1 true, i1 true)
   ret void
 }
 
@@ -18,7 +19,7 @@ define void @t2() {
 ; V6: blx {{_*}}sync_synchronize
 
 ; V6M: t2:
-; V6M: dmb
-  call void @llvm.memory.barrier( i1 false, i1 false, i1 false, i1 true, i1 false )
+; V6M: dmb ish
+  call void @llvm.memory.barrier(i1 true, i1 false, i1 false, i1 true, i1 false)
   ret void
 }
diff --git a/test/CodeGen/Thumb/dyn-stackalloc.ll b/test/CodeGen/Thumb/dyn-stackalloc.ll
index 5c8ad974bc0e..1f31dca0524d 100644
--- a/test/CodeGen/Thumb/dyn-stackalloc.ll
+++ b/test/CodeGen/Thumb/dyn-stackalloc.ll
@@ -1,12 +1,15 @@
-; RUN: llc < %s -march=thumb | not grep {ldr sp}
-; RUN: llc < %s -mtriple=thumb-apple-darwin | \
-; RUN:   not grep {sub.*r7}
-; RUN: llc < %s -march=thumb | grep {mov.*r6, sp}
+; RUN: llc < %s -mtriple=thumb-apple-darwin | FileCheck %s
 
 	%struct.state = type { i32, %struct.info*, float**, i32, i32, i32, i32, i32, i32, i32, i32, i32, i64, i64, i64, i64, i64, i64, i8* }
 	%struct.info = type { i32, i32, i32, i32, i32, i32, i32, i8* }
 
 define void @t1(%struct.state* %v) {
+; CHECK: t1:
+; CHECK: push
+; CHECK: add r7, sp, #12
+; CHECK: mov r2, sp
+; CHECK: subs r4, r2, r1
+; CHECK: mov sp, r4
 	%tmp6 = load i32* null
 	%tmp8 = alloca float, i32 %tmp6
 	store i32 1, i32* null
@@ -34,6 +37,18 @@ declare fastcc void @f2(float*, float*, float*, i32)
 @str215 = external global [2 x i8]
 
 define void @t2(%struct.comment* %vc, i8* %tag, i8* %contents) {
+; CHECK: t2:
+; CHECK: push
+; CHECK: add r7, sp, #12
+; CHECK: sub sp, #8
+; CHECK: mov r6, sp
+; CHECK: str r2, [r6, #4]
+; CHECK: str r0, [r6]
+; CHECK-NOT: ldr r0, [sp
+; CHECK: ldr r0, [r6, #4]
+; CHECK: mov r0, sp
+; CHECK: subs r5, r0, r1
+; CHECK: mov sp, r5
 	%tmp1 = call i32 @strlen( i8* %tag )
 	%tmp3 = call i32 @strlen( i8* %contents )
 	%tmp4 = add i32 %tmp1, 2
diff --git a/test/CodeGen/Thumb/large-stack.ll b/test/CodeGen/Thumb/large-stack.ll
index b289484f5efb..fbacabaedc35 100644
--- a/test/CodeGen/Thumb/large-stack.ll
+++ b/test/CodeGen/Thumb/large-stack.ll
@@ -10,22 +10,22 @@ define void @test1() {
 
 define void @test2() {
 ; CHECK: test2:
-; CHECK: ldr r0, LCPI
+; CHECK: ldr.n r0, LCPI
 ; CHECK: add sp, r0
-; CHECK: mov sp, r7
-; CHECK: sub sp, #4
+; CHECK: subs r4, r7, #4
+; CHECK: mov sp, r4
     %tmp = alloca [ 4168 x i8 ] , align 4
     ret void
 }
 
 define i32 @test3() {
 ; CHECK: test3:
-; CHECK: ldr r2, LCPI
+; CHECK: ldr.n r2, LCPI
 ; CHECK: add sp, r2
-; CHECK: ldr r1, LCPI
+; CHECK: ldr.n r1, LCPI
 ; CHECK: add r1, sp
-; CHECK: mov sp, r7
-; CHECK: sub sp, #4
+; CHECK: subs r4, r7, #4
+; CHECK: mov sp, r4
     %retval = alloca i32, align 4
     %tmp = alloca i32, align 4
     %a = alloca [805306369 x i8], align 16
diff --git a/test/CodeGen/Thumb/long.ll b/test/CodeGen/Thumb/long.ll
index e3ef44a87586..197e19e31b49 100644
--- a/test/CodeGen/Thumb/long.ll
+++ b/test/CodeGen/Thumb/long.ll
@@ -4,7 +4,7 @@
 ; RUN:   grep adc | count 1
 ; RUN: llc < %s -march=thumb | \
 ; RUN:   grep sbc | count 1
-; RUN: llc < %s -march=thumb | grep __muldi3
+; RUN: llc < %s -mtriple=thumb-apple-darwin | grep __muldi3
 
 define i64 @f1() {
 entry:
diff --git a/test/CodeGen/Thumb/select.ll b/test/CodeGen/Thumb/select.ll
index 7a183b0f9e26..780e5fac02b9 100644
--- a/test/CodeGen/Thumb/select.ll
+++ b/test/CodeGen/Thumb/select.ll
@@ -4,7 +4,7 @@
 ; RUN: llc < %s -march=thumb | grep ble | count 1
 ; RUN: llc < %s -march=thumb | grep bls | count 1
 ; RUN: llc < %s -march=thumb | grep bhi | count 1
-; RUN: llc < %s -march=thumb | grep __ltdf2
+; RUN: llc < %s -mtriple=thumb-apple-darwin | grep __ltdf2
 
 define i32 @f1(i32 %a.s) {
 entry:
diff --git a/test/CodeGen/Thumb2/2009-08-06-SpDecBug.ll b/test/CodeGen/Thumb2/2009-08-06-SpDecBug.ll
index f26c6d114b8e..550b3efae998 100644
--- a/test/CodeGen/Thumb2/2009-08-06-SpDecBug.ll
+++ b/test/CodeGen/Thumb2/2009-08-06-SpDecBug.ll
@@ -5,8 +5,13 @@
 define hidden i32 @__gcov_execlp(i8* %path, i8* %arg, ...) nounwind {
 entry:
 ; CHECK: __gcov_execlp:
-; CHECK: mov sp, r7
-; CHECK: sub sp, #4
+; CHECK: sub sp, #8
+; CHECK: push
+; CHECK: add r7, sp, #4
+; CHECK: subs r4, r7, #4
+; CHECK: mov sp, r4
+; CHECK-NOT: mov sp, r7
+; CHECK: add sp, #8
 	call void @__gcov_flush() nounwind
 	br i1 undef, label %bb5, label %bb
 
diff --git a/test/CodeGen/ARM/2009-08-21-PostRAKill4.ll b/test/CodeGen/Thumb2/2009-08-21-PostRAKill4.ll
index 5cfc68d09408..5cfc68d09408 100644
--- a/test/CodeGen/ARM/2009-08-21-PostRAKill4.ll
+++ b/test/CodeGen/Thumb2/2009-08-21-PostRAKill4.ll
diff --git a/test/CodeGen/ARM/2009-09-01-PostRAProlog.ll b/test/CodeGen/Thumb2/2009-09-01-PostRAProlog.ll
index 06a152d56e4d..06a152d56e4d 100644
--- a/test/CodeGen/ARM/2009-09-01-PostRAProlog.ll
+++ b/test/CodeGen/Thumb2/2009-09-01-PostRAProlog.ll
diff --git a/test/CodeGen/Thumb2/2010-03-15-AsmCCClobber.ll b/test/CodeGen/Thumb2/2010-03-15-AsmCCClobber.ll
index 7ee19863de19..458569ec93b5 100644
--- a/test/CodeGen/Thumb2/2010-03-15-AsmCCClobber.ll
+++ b/test/CodeGen/Thumb2/2010-03-15-AsmCCClobber.ll
@@ -1,4 +1,7 @@
-; RUN: llc < %s -mtriple=thumbv7-apple-darwin -mcpu=cortex-a8 | FileCheck %s
+; RUN: llc < %s -mtriple=thumbv7-apple-darwin -mcpu=cortex-a8 \
+; RUN:   -pre-RA-sched=source | FileCheck -check-prefix=SOURCE %s
+; RUN: llc < %s -mtriple=thumbv7-apple-darwin -mcpu=cortex-a8 \
+; RUN:   -pre-RA-sched=list-hybrid | FileCheck -check-prefix=HYBRID %s
 ; Radar 7459078
 target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:32-f32:32:32-f64:32:32-v64:64:64-v128:128:128-a0:0:32-n32"
 
@@ -10,9 +13,11 @@ target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:32-
 %s5 = type { i32 }
 
 ; Make sure the cmp is not scheduled before the InlineAsm that clobbers cc.
-; CHECK: InlineAsm End
-; CHECK: cmp
-; CHECK: beq
+; SOURCE: InlineAsm End
+; SOURCE: cmp
+; SOURCE: beq
+; HYBRID: InlineAsm End
+; HYBRID: cbz
 define void @test(%s1* %this, i32 %format, i32 %w, i32 %h, i32 %levels, i32* %s, i8* %data, i32* nocapture %rowbytes, void (i8*, i8*)* %release, i8* %info) nounwind {
 entry:
   %tmp1 = getelementptr inbounds %s1* %this, i32 0, i32 0, i32 0, i32 1, i32 0, i32 0
diff --git a/test/CodeGen/Thumb2/2010-06-14-NEONCoalescer.ll b/test/CodeGen/Thumb2/2010-06-14-NEONCoalescer.ll
index 26750065af3f..9ed6a01255f8 100644
--- a/test/CodeGen/Thumb2/2010-06-14-NEONCoalescer.ll
+++ b/test/CodeGen/Thumb2/2010-06-14-NEONCoalescer.ll
@@ -23,8 +23,9 @@ entry:
   %4 = insertelement <2 x double> %2, double %V.0.ph, i32 1 ; <<2 x double>> [#uses=2]
 ; Constant pool load followed by add.
 ; Then clobber the loaded register, not the sum.
-; CHECK: vldr.64 [[LDR:d.]]
-; CHECK: vadd.f64 [[ADD:d.]], [[LDR]], [[LDR]]
+; CHECK: vldr.64 [[LDR:d.*]],
+; CHECK: LPC0_0:
+; CHECK: vadd.f64 [[ADD:d.*]], [[LDR]], [[LDR]]
 ; CHECK: vmov.f64 [[LDR]]
   %5 = fadd <2 x double> %3, %3                   ; <<2 x double>> [#uses=2]
   %6 = fadd <2 x double> %4, %4                   ; <<2 x double>> [#uses=2]
diff --git a/test/CodeGen/Thumb2/2010-06-21-TailMergeBug.ll b/test/CodeGen/Thumb2/2010-06-21-TailMergeBug.ll
index c5fc5098cd46..f91e1c9febe2 100644
--- a/test/CodeGen/Thumb2/2010-06-21-TailMergeBug.ll
+++ b/test/CodeGen/Thumb2/2010-06-21-TailMergeBug.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -mtriple=thumbv7-apple-darwin -O3 -relocation-model=pic -mcpu=cortex-a8 | FileCheck %s
+; RUN: llc < %s -mtriple=thumbv7-apple-darwin -O3 -relocation-model=pic | FileCheck %s
 ; rdar://8115404
 ; Tail merging must not split an IT block.
 
diff --git a/test/CodeGen/Thumb2/2010-08-10-VarSizedAllocaBug.ll b/test/CodeGen/Thumb2/2010-08-10-VarSizedAllocaBug.ll
index abcf13a3e38f..41f7f299555d 100644
--- a/test/CodeGen/Thumb2/2010-08-10-VarSizedAllocaBug.ll
+++ b/test/CodeGen/Thumb2/2010-08-10-VarSizedAllocaBug.ll
@@ -5,6 +5,10 @@
 define internal fastcc i32 @Callee(i32 %i) nounwind {
 entry:
 ; CHECK: Callee:
+; CHECK: push
+; CHECK: mov r4, sp
+; CHECK: sub.w r12, r4, #1000
+; CHECK: mov sp, r12
   %0 = icmp eq i32 %i, 0                          ; <i1> [#uses=1]
   br i1 %0, label %bb2, label %bb
 
@@ -17,9 +21,11 @@ bb:                                               ; preds = %entry
   ret i32 %4
 
 bb2:                                              ; preds = %entry
-; Must restore sp from fp here
-; CHECK: mov sp, r7
-; CHECK: sub sp, #8
+; Must restore sp from fp here. Make sure not to leave sp in a temporarily invalid
+; state though. rdar://8465407
+; CHECK-NOT: mov sp, r7
+; CHECK: sub.w r4, r7, #8
+; CHECK: mov sp, r4
 ; CHECK: pop
   ret i32 0
 }
diff --git a/test/CodeGen/Thumb2/2010-11-22-EpilogueBug.ll b/test/CodeGen/Thumb2/2010-11-22-EpilogueBug.ll
new file mode 100644
index 000000000000..313728c1b56a
--- /dev/null
+++ b/test/CodeGen/Thumb2/2010-11-22-EpilogueBug.ll
@@ -0,0 +1,34 @@
+; rdar://8465407
+; RUN: llc < %s -mtriple=thumbv7-apple-darwin | FileCheck %s
+
+%struct.buf = type opaque
+
+declare void @bar() nounwind optsize
+
+define void @foo() nounwind optsize {
+; CHECK: foo:
+; CHECK: push
+; CHECK: add r7, sp, #4
+; CHECK: sub sp, #4
+entry:
+  %m.i = alloca %struct.buf*, align 4
+  br label %bb
+
+bb:
+  br i1 undef, label %bb3, label %bb2
+
+bb2:
+  call void @bar() nounwind optsize
+  br i1 undef, label %bb, label %bb3
+
+bb3:
+  br i1 undef, label %return, label %bb
+
+return:
+; CHECK: %return
+; 'mov sp, r7' would have left sp in an invalid state
+; CHECK-NOT: mov sp, r7
+; CHECK-NOT: sub, sp, #4
+; CHECK: add sp, #4
+  ret void
+}
diff --git a/test/CodeGen/Thumb2/2010-12-03-AddSPNarrowing.ll b/test/CodeGen/Thumb2/2010-12-03-AddSPNarrowing.ll
new file mode 100644
index 000000000000..5b91a5f65aee
--- /dev/null
+++ b/test/CodeGen/Thumb2/2010-12-03-AddSPNarrowing.ll
@@ -0,0 +1,11 @@
+; RUN: llc < %s -mtriple=thumbv7-apple-darwin | FileCheck %s
+; Radar 8724703: Make sure that a t2ADDrSPi instruction with SP as the
+; destination register is narrowed to tADDspi instead of tADDrSPi.
+
+define void @test() nounwind {
+entry:
+; CHECK: sub.w
+; CHECK: add.w
+  %Buffer.i = alloca [512 x i8], align 4
+  ret void
+}
diff --git a/test/CodeGen/Thumb2/bfi.ll b/test/CodeGen/Thumb2/bfi.ll
index 22473bb35a0a..6fb2fc888d9f 100644
--- a/test/CodeGen/Thumb2/bfi.ll
+++ b/test/CodeGen/Thumb2/bfi.ll
@@ -38,3 +38,14 @@ entry:
   %or = or i32 %and2, %and                        ; <i32> [#uses=1]
   ret i32 %or
 }
+
+; rdar://8752056
+define i32 @f4(i32 %a) nounwind {
+; CHECK: f4
+; CHECK: movw r1, #3137
+; CHECK: bfi r1, r0, #15, #5
+  %1 = shl i32 %a, 15
+  %ins7 = and i32 %1, 1015808
+  %ins12 = or i32 %ins7, 3137
+  ret i32 %ins12
+}
diff --git a/test/CodeGen/Thumb2/buildvector-crash.ll b/test/CodeGen/Thumb2/buildvector-crash.ll
new file mode 100644
index 000000000000..01ef472d3104
--- /dev/null
+++ b/test/CodeGen/Thumb2/buildvector-crash.ll
@@ -0,0 +1,17 @@
+; RUN: llc < %s -O3 -mtriple=thumbv7-apple-darwin10 -mcpu=cortex-a8 | FileCheck %s
+; Formerly crashed, 3573915.
+
+define void @RotateStarsFP_Vec() nounwind {
+bb.nph372:
+  br label %bb8
+
+bb8:                                              ; preds = %bb8, %bb.nph372
+  %0 = fadd <4 x float> undef, <float 0xBFEE353F80000000, float 0xBFEE353F80000000, float 0xBFEE353F80000000, float 0xBFEE353F80000000>
+  %1 = fmul <4 x float> %0, undef
+  %2 = fmul <4 x float> %1, undef
+  %3 = fadd <4 x float> undef, %2
+  store <4 x float> %3, <4 x float>* undef, align 4
+  br label %bb8
+; CHECK: RotateStarsFP_Vec:
+; CHECK: vldmia
+}
diff --git a/test/CodeGen/Thumb2/cortex-fp.ll b/test/CodeGen/Thumb2/cortex-fp.ll
index f7ec5a3b577c..d06f8a7beeb0 100644
--- a/test/CodeGen/Thumb2/cortex-fp.ll
+++ b/test/CodeGen/Thumb2/cortex-fp.ll
@@ -19,6 +19,6 @@ entry:
   %0 = fmul double %a, %b
 ; CORTEXM3: blx ___muldf3
 ; CORTEXM4: blx ___muldf3
-; CORTEXA8: vmul.f64  d0, d1, d0
+; CORTEXA8: vmul.f64  d16, d17, d16
   ret double %0
 }
diff --git a/test/CodeGen/Thumb2/cross-rc-coalescing-2.ll b/test/CodeGen/Thumb2/cross-rc-coalescing-2.ll
index 583f4057bcd9..b8c8cb122a19 100644
--- a/test/CodeGen/Thumb2/cross-rc-coalescing-2.ll
+++ b/test/CodeGen/Thumb2/cross-rc-coalescing-2.ll
@@ -1,15 +1,20 @@
-; RUN: llc < %s -mtriple=thumbv7-apple-darwin9 -mcpu=cortex-a8 | grep vmov.f32 | count 1
+; RUN: llc < %s -mtriple=thumbv7-apple-darwin9 -mcpu=cortex-a8 | FileCheck %s
 
 define void @fht(float* nocapture %fz, i16 signext %n) nounwind {
+; CHECK: fht:
 entry:
   br label %bb5
 
 bb5:                                              ; preds = %bb5, %entry
+; CHECK: %bb5
+; CHECK: bne
   br i1 undef, label %bb5, label %bb.nph
 
 bb.nph:                                           ; preds = %bb5
   br label %bb7
 
+; Loop preheader
+; CHECK: vmov.f32
 bb7:                                              ; preds = %bb9, %bb.nph
   %s1.02 = phi float [ undef, %bb.nph ], [ %35, %bb9 ] ; <float> [#uses=3]
   %tmp79 = add i32 undef, undef                   ; <i32> [#uses=1]
@@ -19,6 +24,9 @@ bb7:                                              ; preds = %bb9, %bb.nph
   br label %bb8
 
 bb8:                                              ; preds = %bb8, %bb7
+; CHECK: %bb8
+; CHECK-NOT: vmov.f32
+; CHECK: blt
   %tmp54 = add i32 0, %tmp53                      ; <i32> [#uses=0]
   %fi.1 = getelementptr float* %fz, i32 undef     ; <float*> [#uses=2]
   %tmp80 = add i32 0, %tmp79                      ; <i32> [#uses=1]
diff --git a/test/CodeGen/Thumb2/div.ll b/test/CodeGen/Thumb2/div.ll
index e63a115273ff..2c00c70c0db6 100644
--- a/test/CodeGen/Thumb2/div.ll
+++ b/test/CodeGen/Thumb2/div.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=thumb -mattr=+thumb2 \
+; RUN: llc < %s -mtriple=thumb-apple-darwin -mattr=+thumb2 \
 ; RUN:    | FileCheck %s -check-prefix=CHECK-THUMB
 ; RUN: llc < %s -march=thumb -mcpu=cortex-m3 -mattr=+thumb2 \
 ; RUN:    | FileCheck %s -check-prefix=CHECK-THUMBV7M
diff --git a/test/CodeGen/Thumb2/large-stack.ll b/test/CodeGen/Thumb2/large-stack.ll
index 97295341858c..68b5d1cc94fb 100644
--- a/test/CodeGen/Thumb2/large-stack.ll
+++ b/test/CodeGen/Thumb2/large-stack.ll
@@ -27,7 +27,7 @@ define i32 @test3() {
 ; DARWIN: sub.w sp, sp, #805306368
 ; DARWIN: sub sp, #20
 ; LINUX: test3:
-; LINUX: stmdb   sp!, {r4, r7, r11, lr}
+; LINUX: push.w {r4, r7, r11, lr}
 ; LINUX: sub.w sp, sp, #805306368
 ; LINUX: sub sp, #16
     %retval = alloca i32, align 4
diff --git a/test/CodeGen/Thumb2/load-global.ll b/test/CodeGen/Thumb2/load-global.ll
deleted file mode 100644
index 46e053ca4ea5..000000000000
--- a/test/CodeGen/Thumb2/load-global.ll
+++ /dev/null
@@ -1,23 +0,0 @@
-; RUN: llc < %s -mtriple=thumbv7-apple-darwin -relocation-model=static | FileCheck %s -check-prefix=STATIC
-; RUN: llc < %s -mtriple=thumbv7-apple-darwin -relocation-model=dynamic-no-pic | FileCheck %s -check-prefix=DYNAMIC
-; RUN: llc < %s -mtriple=thumbv7-apple-darwin -relocation-model=pic | FileCheck %s -check-prefix=PIC
-; RUN: llc < %s -mtriple=thumbv7-linux-gnueabi -relocation-model=pic | FileCheck %s -check-prefix=LINUX
-
-@G = external global i32
-
-define i32 @test1() {
-; STATIC: _test1:
-; STATIC: .long _G
-
-; DYNAMIC: _test1:
-; DYNAMIC: .long L_G$non_lazy_ptr
-
-; PIC: _test1
-; PIC: add r0, pc
-; PIC: .long L_G$non_lazy_ptr-(LPC0_0+4)
-
-; LINUX: test1
-; LINUX: .long G(GOT)
-	%tmp = load i32* @G
-	ret i32 %tmp
-}
diff --git a/test/CodeGen/Thumb2/machine-licm-vdup.ll b/test/CodeGen/Thumb2/machine-licm-vdup.ll
deleted file mode 100644
index fde2ee0ab0c9..000000000000
--- a/test/CodeGen/Thumb2/machine-licm-vdup.ll
+++ /dev/null
@@ -1,38 +0,0 @@
-; RUN: llc < %s -mtriple=thumbv7-apple-darwin -mcpu=cortex-a8 -disable-fp-elim                -arm-vdup-splat | FileCheck %s
-; RUN: llc < %s -mtriple=thumbv7-apple-darwin -mcpu=cortex-a8 -relocation-model=pic -disable-fp-elim -arm-vdup-splat | FileCheck %s 
-; Modified version of machine-licm.ll with -arm-vdup-splat turned on, 8003375.
-; Eventually this should become the default and be moved into machine-licm.ll.
-; FIXME: the vdup should be hoisted out of the loop, 8248029.
-
-define void @t2(i8* %ptr1, i8* %ptr2) nounwind {
-entry:
-; CHECK: t2:
-; CHECK: mov.w r3, #1065353216
-  br i1 undef, label %bb1, label %bb2
-
-bb1:
-; CHECK-NEXT: %bb1
-; CHECK: vdup.32 q1, r3
-  %indvar = phi i32 [ %indvar.next, %bb1 ], [ 0, %entry ]
-  %tmp1 = shl i32 %indvar, 2
-  %gep1 = getelementptr i8* %ptr1, i32 %tmp1
-  %tmp2 = call <4 x float> @llvm.arm.neon.vld1.v4f32(i8* %gep1, i32 1)
-  %tmp3 = call <4 x float> @llvm.arm.neon.vmaxs.v4f32(<4 x float> <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>, <4 x float> %tmp2)
-  %gep2 = getelementptr i8* %ptr2, i32 %tmp1
-  call void @llvm.arm.neon.vst1.v4f32(i8* %gep2, <4 x float> %tmp3, i32 1)
-  %indvar.next = add i32 %indvar, 1
-  %cond = icmp eq i32 %indvar.next, 10
-  br i1 %cond, label %bb2, label %bb1
-
-bb2:
-  ret void
-}
-
-; CHECK-NOT: LCPI1_0:
-; CHECK: .subsections_via_symbols
-
-declare <4 x float> @llvm.arm.neon.vld1.v4f32(i8*, i32) nounwind readonly
-
-declare void @llvm.arm.neon.vst1.v4f32(i8*, <4 x float>, i32) nounwind
-
-declare <4 x float> @llvm.arm.neon.vmaxs.v4f32(<4 x float>, <4 x float>) nounwind readnone
diff --git a/test/CodeGen/Thumb2/machine-licm.ll b/test/CodeGen/Thumb2/machine-licm.ll
index b949b2f30506..5e776dd8937c 100644
--- a/test/CodeGen/Thumb2/machine-licm.ll
+++ b/test/CodeGen/Thumb2/machine-licm.ll
@@ -3,9 +3,6 @@
 ; rdar://7353541
 ; rdar://7354376
 
-; The generated code is no where near ideal. It's not recognizing the two
-; constantpool entries being loaded can be merged into one.
-
 @GV = external global i32                         ; <i32*> [#uses=2]
 
 define void @t1(i32* nocapture %vals, i32 %c) nounwind {
@@ -17,21 +14,21 @@ entry:
 
 bb.nph:                                           ; preds = %entry
 ; CHECK: BB#1
-; CHECK: ldr.n r2, LCPI0_0
+; CHECK: movw r2, :lower16:L_GV$non_lazy_ptr
+; CHECK: movt r2, :upper16:L_GV$non_lazy_ptr
 ; CHECK: ldr r2, [r2]
 ; CHECK: ldr r3, [r2]
 ; CHECK: LBB0_2
-; CHECK: LCPI0_0:
-; CHECK-NOT: LCPI0_1:
+; CHECK-NOT: LCPI0_0:
 
 ; PIC: BB#1
-; PIC: ldr.n r2, LCPI0_0
+; PIC: movw r2, :lower16:(L_GV$non_lazy_ptr-(LPC0_0+4))
+; PIC: movt r2, :upper16:(L_GV$non_lazy_ptr-(LPC0_0+4))
 ; PIC: add r2, pc
 ; PIC: ldr r2, [r2]
 ; PIC: ldr r3, [r2]
 ; PIC: LBB0_2
-; PIC: LCPI0_0:
-; PIC-NOT: LCPI0_1:
+; PIC-NOT: LCPI0_0:
 ; PIC: .section
   %.pre = load i32* @GV, align 4                  ; <i32> [#uses=1]
   br label %bb
@@ -55,8 +52,8 @@ return:                                           ; preds = %bb, %entry
 define void @t2(i8* %ptr1, i8* %ptr2) nounwind {
 entry:
 ; CHECK: t2:
-; CHECK: adr r{{.}}, #LCPI1_0
-; CHECK: vldmia r3, {d0, d1}
+; CHECK: mov.w r3, #1065353216
+; CHECK: vdup.32 q{{.*}}, r3
   br i1 undef, label %bb1, label %bb2
 
 bb1:
@@ -76,11 +73,50 @@ bb2:
   ret void
 }
 
-; CHECK: LCPI1_0:
-; CHECK: .section
+; CHECK-NOT: LCPI1_0:
 
 declare <4 x float> @llvm.arm.neon.vld1.v4f32(i8*, i32) nounwind readonly
 
 declare void @llvm.arm.neon.vst1.v4f32(i8*, <4 x float>, i32) nounwind
 
 declare <4 x float> @llvm.arm.neon.vmaxs.v4f32(<4 x float>, <4 x float>) nounwind readnone
+
+; rdar://8241368
+; isel should not fold immediate into eor's which would have prevented LICM.
+define zeroext i16 @t3(i8 zeroext %data, i16 zeroext %crc) nounwind readnone {
+; CHECK: t3:
+bb.nph:
+; CHECK: bb.nph
+; CHECK: movw {{(r[0-9])|(lr)}}, #32768
+; CHECK: movs {{(r[0-9])|(lr)}}, #8
+; CHECK: movw [[REGISTER:(r[0-9])|(lr)]], #16386
+; CHECK: movw {{(r[0-9])|(lr)}}, #65534
+; CHECK: movt {{(r[0-9])|(lr)}}, #65535
+  br label %bb
+
+bb:                                               ; preds = %bb, %bb.nph
+; CHECK: bb
+; CHECK: eor.w {{(r[0-9])|(lr)}}, {{(r[0-9])|(lr)}}, [[REGISTER]]
+; CHECK: eor.w
+; CHECK-NOT: eor
+; CHECK: and
+  %data_addr.013 = phi i8 [ %data, %bb.nph ], [ %8, %bb ] ; <i8> [#uses=2]
+  %crc_addr.112 = phi i16 [ %crc, %bb.nph ], [ %crc_addr.2, %bb ] ; <i16> [#uses=3]
+  %i.011 = phi i8 [ 0, %bb.nph ], [ %7, %bb ]     ; <i8> [#uses=1]
+  %0 = trunc i16 %crc_addr.112 to i8              ; <i8> [#uses=1]
+  %1 = xor i8 %data_addr.013, %0                  ; <i8> [#uses=1]
+  %2 = and i8 %1, 1                               ; <i8> [#uses=1]
+  %3 = icmp eq i8 %2, 0                           ; <i1> [#uses=2]
+  %4 = xor i16 %crc_addr.112, 16386               ; <i16> [#uses=1]
+  %crc_addr.0 = select i1 %3, i16 %crc_addr.112, i16 %4 ; <i16> [#uses=1]
+  %5 = lshr i16 %crc_addr.0, 1                    ; <i16> [#uses=2]
+  %6 = or i16 %5, -32768                          ; <i16> [#uses=1]
+  %crc_addr.2 = select i1 %3, i16 %5, i16 %6      ; <i16> [#uses=2]
+  %7 = add i8 %i.011, 1                           ; <i8> [#uses=2]
+  %8 = lshr i8 %data_addr.013, 1                  ; <i8> [#uses=1]
+  %exitcond = icmp eq i8 %7, 8                    ; <i1> [#uses=1]
+  br i1 %exitcond, label %bb8, label %bb
+
+bb8:                                              ; preds = %bb
+  ret i16 %crc_addr.2
+}
diff --git a/test/CodeGen/Thumb2/thumb2-badreg-operands.ll b/test/CodeGen/Thumb2/thumb2-badreg-operands.ll
deleted file mode 100644
index 4df06b836fc5..000000000000
--- a/test/CodeGen/Thumb2/thumb2-badreg-operands.ll
+++ /dev/null
@@ -1,15 +0,0 @@
-; RUN: llc < %s -mtriple=thumbv7-apple-darwin10 | FileCheck %s
-
-define void @b(i32 %x) nounwind optsize {
-entry:
-; CHECK: b
-; CHECK: mov r2, sp
-; CHECK: mls r0, r0, r1, r2
-; CHECK: mov sp, r0
-  %0 = mul i32 %x, 24                             ; <i32> [#uses=1]
-  %vla = alloca i8, i32 %0, align 1               ; <i8*> [#uses=1]
-  call arm_aapcscc  void @a(i8* %vla) nounwind optsize
-  ret void
-}
-
-declare void @a(i8*) optsize
diff --git a/test/CodeGen/Thumb2/thumb2-barrier.ll b/test/CodeGen/Thumb2/thumb2-barrier.ll
index a54d09e62919..93ae7c428bdf 100644
--- a/test/CodeGen/Thumb2/thumb2-barrier.ll
+++ b/test/CodeGen/Thumb2/thumb2-barrier.ll
@@ -1,17 +1,31 @@
 ; RUN: llc < %s -march=thumb -mcpu=cortex-a8 | FileCheck %s
 
-declare void @llvm.memory.barrier( i1 , i1 , i1 , i1 , i1 )
+declare void @llvm.memory.barrier(i1 , i1 , i1 , i1 , i1)
 
-define void @t1() {
-; CHECK: t1:
-; CHECK: dsb
-  call void @llvm.memory.barrier( i1 false, i1 false, i1 false, i1 true, i1 true )
+define void @t_st() {
+; CHECK: t_st:
+; CHECK: dmb st
+  call void @llvm.memory.barrier(i1 false, i1 false, i1 false, i1 true, i1 true)
   ret void
 }
 
-define void @t2() {
-; CHECK: t2:
-; CHECK: dmb
-  call void @llvm.memory.barrier( i1 false, i1 false, i1 false, i1 true, i1 false )
+define void @t_sy() {
+; CHECK: t_sy:
+; CHECK: dmb sy
+  call void @llvm.memory.barrier(i1 true, i1 false, i1 false, i1 true, i1 true)
+  ret void
+}
+
+define void @t_ishst() {
+; CHECK: t_ishst:
+; CHECK: dmb ishst
+  call void @llvm.memory.barrier(i1 false, i1 false, i1 false, i1 true, i1 false)
+  ret void
+}
+
+define void @t_ish() {
+; CHECK: t_ish:
+; CHECK: dmb ish
+  call void @llvm.memory.barrier(i1 true, i1 false, i1 false, i1 true, i1 false)
   ret void
 }
diff --git a/test/CodeGen/Thumb2/thumb2-ifcvt3.ll b/test/CodeGen/Thumb2/thumb2-ifcvt3.ll
index cc2ef140d113..bcf10eff729b 100644
--- a/test/CodeGen/Thumb2/thumb2-ifcvt3.ll
+++ b/test/CodeGen/Thumb2/thumb2-ifcvt3.ll
@@ -23,7 +23,6 @@ bb52:                                             ; preds = %newFuncRoot
 ; CHECK: movne
 ; CHECK: moveq
 ; CHECK: pop
-; CHECK-NEXT: @ BB#1:
   %0 = load i64* @posed, align 4                  ; <i64> [#uses=3]
   %1 = sub i64 %0, %.reload78                     ; <i64> [#uses=1]
   %2 = ashr i64 %1, 1                             ; <i64> [#uses=3]
diff --git a/test/CodeGen/Thumb2/thumb2-ldrd.ll b/test/CodeGen/Thumb2/thumb2-ldrd.ll
index 22d4e88ed17d..a747d5f75697 100644
--- a/test/CodeGen/Thumb2/thumb2-ldrd.ll
+++ b/test/CodeGen/Thumb2/thumb2-ldrd.ll
@@ -4,7 +4,7 @@
 
 define i64 @t(i64 %a) nounwind readonly {
 entry:
-;CHECK: ldrd r2, [r2]
+;CHECK: ldrd r2, r3, [r2]
 	%0 = load i64** @b, align 4
 	%1 = load i64* %0, align 4
 	%2 = mul i64 %1, %a
diff --git a/test/CodeGen/Thumb2/thumb2-mov.ll b/test/CodeGen/Thumb2/thumb2-mov.ll
index 1dc3614993bd..adb6dde2c788 100644
--- a/test/CodeGen/Thumb2/thumb2-mov.ll
+++ b/test/CodeGen/Thumb2/thumb2-mov.ll
@@ -53,7 +53,7 @@ define i32 @t2_const_var2_2_ok_1(i32 %lhs) {
 
 define i32 @t2_const_var2_2_ok_2(i32 %lhs) {
 ;CHECK: t2_const_var2_2_ok_2:
-;CHECK: add.w   r0, r0, #-1426063360
+;CHECK: add.w   r0, r0, #2868903936
 ;CHECK: add.w   r0, r0, #47616
     %ret = add i32 %lhs, 2868951552 ; 0xab00ba00
     ret i32 %ret
@@ -61,7 +61,7 @@ define i32 @t2_const_var2_2_ok_2(i32 %lhs) {
 
 define i32 @t2_const_var2_2_ok_3(i32 %lhs) {
 ;CHECK: t2_const_var2_2_ok_3:
-;CHECK: add.w   r0, r0, #-1426019584
+;CHECK: add.w   r0, r0, #2868947712
 ;CHECK: adds    r0, #16
     %ret = add i32 %lhs, 2868947728 ; 0xab00ab10
     ret i32 %ret
@@ -69,7 +69,7 @@ define i32 @t2_const_var2_2_ok_3(i32 %lhs) {
 
 define i32 @t2_const_var2_2_ok_4(i32 %lhs) {
 ;CHECK: t2_const_var2_2_ok_4:
-;CHECK: add.w   r0, r0, #-1426019584
+;CHECK: add.w   r0, r0, #2868947712
 ;CHECK: add.w   r0, r0, #1048592
     %ret = add i32 %lhs, 2869996304 ; 0xab10ab10
     ret i32 %ret
diff --git a/test/CodeGen/Thumb2/thumb2-mul.ll b/test/CodeGen/Thumb2/thumb2-mul.ll
index b1515b514820..8d1de55b4dc6 100644
--- a/test/CodeGen/Thumb2/thumb2-mul.ll
+++ b/test/CodeGen/Thumb2/thumb2-mul.ll
@@ -6,3 +6,21 @@ define i32 @f1(i32 %a, i32 %b, i32 %c) {
     %tmp = mul i32 %a, %b
     ret i32 %tmp
 }
+
+%struct.CMPoint = type { %struct.Point, float, float, [5 x float] }
+%struct.Point = type { float, float }
+
+define %struct.CMPoint* @t1(i32 %i, i32 %j, i32 %n, %struct.CMPoint* %thePoints) nounwind readnone ssp {
+entry:
+; CHECK: t1:
+; CHECK: mla     r0, r2, r0, r1
+; CHECK: add.w   r0, r0, r0, lsl #3
+; CHECL: add.w   r0, r3, r0, lsl #2
+  %mul = mul i32 %n, %i
+  %add = add i32 %mul, %j
+  %0 = ptrtoint %struct.CMPoint* %thePoints to i32
+  %mul5 = mul i32 %add, 36
+  %add6 = add i32 %mul5, %0
+  %1 = inttoptr i32 %add6 to %struct.CMPoint*
+  ret %struct.CMPoint* %1
+}
diff --git a/test/CodeGen/Thumb2/thumb2-select_xform.ll b/test/CodeGen/Thumb2/thumb2-select_xform.ll
index 56cb1f6fb409..ceefabbbfa21 100644
--- a/test/CodeGen/Thumb2/thumb2-select_xform.ll
+++ b/test/CodeGen/Thumb2/thumb2-select_xform.ll
@@ -2,8 +2,8 @@
 
 define i32 @t1(i32 %a, i32 %b, i32 %c) nounwind {
 ; CHECK: t1
-; CHECK: sub.w r0, r1, #-2147483648
-; CHECK: subs r0, #1
+; CHECK: mvn r0, #-2147483648
+; CHECK: add r0, r1
 ; CHECK: cmp r2, #10
 ; CHECK: it  gt
 ; CHECK: movgt r0, r1
diff --git a/test/CodeGen/Thumb2/thumb2-spill-q.ll b/test/CodeGen/Thumb2/thumb2-spill-q.ll
index 4f92c9333806..d9a0617f5a46 100644
--- a/test/CodeGen/Thumb2/thumb2-spill-q.ll
+++ b/test/CodeGen/Thumb2/thumb2-spill-q.ll
@@ -15,11 +15,34 @@ define void @aaa(%quuz* %this, i8* %block) {
 ; CHECK: vst1.64 {{.*}}[{{.*}}, :128]
 ; CHECK: vld1.64 {{.*}}[{{.*}}, :128]
 entry:
-  %0 = call <4 x float> @llvm.arm.neon.vld1.v4f32(i8* undef, i32 1) nounwind ; <<4 x float>> [#uses=1]
+  %aligned_vec = alloca <4 x float>, align 16
+  %"alloca point" = bitcast i32 0 to i32
+  %vecptr = bitcast <4 x float>* %aligned_vec to i8*
+  %0 = call <4 x float> @llvm.arm.neon.vld1.v4f32(i8* %vecptr, i32 1) nounwind 
   store float 6.300000e+01, float* undef, align 4
   %1 = call <4 x float> @llvm.arm.neon.vld1.v4f32(i8* undef, i32 1) nounwind ; <<4 x float>> [#uses=1]
   store float 0.000000e+00, float* undef, align 4
   %2 = call <4 x float> @llvm.arm.neon.vld1.v4f32(i8* undef, i32 1) nounwind ; <<4 x float>> [#uses=1]
+  %ld3 = call <4 x float> @llvm.arm.neon.vld1.v4f32(i8* undef, i32 1) nounwind
+  store float 0.000000e+00, float* undef, align 4
+  %ld4 = call <4 x float> @llvm.arm.neon.vld1.v4f32(i8* undef, i32 1) nounwind
+  store float 0.000000e+00, float* undef, align 4
+  %ld5 = call <4 x float> @llvm.arm.neon.vld1.v4f32(i8* undef, i32 1) nounwind
+  store float 0.000000e+00, float* undef, align 4
+  %ld6 = call <4 x float> @llvm.arm.neon.vld1.v4f32(i8* undef, i32 1) nounwind
+  store float 0.000000e+00, float* undef, align 4
+  %ld7 = call <4 x float> @llvm.arm.neon.vld1.v4f32(i8* undef, i32 1) nounwind
+  store float 0.000000e+00, float* undef, align 4
+  %ld8 = call <4 x float> @llvm.arm.neon.vld1.v4f32(i8* undef, i32 1) nounwind
+  store float 0.000000e+00, float* undef, align 4
+  %ld9 = call <4 x float> @llvm.arm.neon.vld1.v4f32(i8* undef, i32 1) nounwind
+  store float 0.000000e+00, float* undef, align 4
+  %ld10 = call <4 x float> @llvm.arm.neon.vld1.v4f32(i8* undef, i32 1) nounwind
+  store float 0.000000e+00, float* undef, align 4
+  %ld11 = call <4 x float> @llvm.arm.neon.vld1.v4f32(i8* undef, i32 1) nounwind
+  store float 0.000000e+00, float* undef, align 4
+  %ld12 = call <4 x float> @llvm.arm.neon.vld1.v4f32(i8* undef, i32 1) nounwind
+  store float 0.000000e+00, float* undef, align 4
   %val173 = load <4 x float>* undef               ; <<4 x float>> [#uses=1]
   br label %bb4
 
@@ -44,7 +67,16 @@ bb4:                                              ; preds = %bb193, %entry
   %18 = fmul <4 x float> %17, %val173             ; <<4 x float>> [#uses=1]
   %19 = shufflevector <4 x float> %18, <4 x float> undef, <2 x i32> <i32 2, i32 3> ; <<2 x float>> [#uses=1]
   %20 = shufflevector <2 x float> %19, <2 x float> undef, <4 x i32> zeroinitializer ; <<4 x float>> [#uses=1]
-  %21 = fadd <4 x float> zeroinitializer, %20     ; <<4 x float>> [#uses=2]
+  %tmp1 = fadd <4 x float> %20, %ld3
+  %tmp2 = fadd <4 x float> %tmp1, %ld4
+  %tmp3 = fadd <4 x float> %tmp2, %ld5
+  %tmp4 = fadd <4 x float> %tmp3, %ld6
+  %tmp5 = fadd <4 x float> %tmp4, %ld7
+  %tmp6 = fadd <4 x float> %tmp5, %ld8
+  %tmp7 = fadd <4 x float> %tmp6, %ld9
+  %tmp8 = fadd <4 x float> %tmp7, %ld10
+  %tmp9 = fadd <4 x float> %tmp8, %ld11
+  %21 = fadd <4 x float> %tmp9, %ld12
   %22 = fcmp ogt <4 x float> %besterror.0.2264, %21 ; <<4 x i1>> [#uses=0]
   %tmp = extractelement <4 x i1> %22, i32 0
   br i1 %tmp, label %bb193, label %bb186
diff --git a/test/CodeGen/X86/2007-04-25-MMX-PADDQ.ll b/test/CodeGen/X86/2007-04-25-MMX-PADDQ.ll
index c39b82a1fe36..a662dd58df57 100644
--- a/test/CodeGen/X86/2007-04-25-MMX-PADDQ.ll
+++ b/test/CodeGen/X86/2007-04-25-MMX-PADDQ.ll
@@ -1,4 +1,5 @@
 ; RUN: llc < %s -o - -march=x86 -mattr=+mmx | FileCheck %s
+; There are no MMX instructions here.  We use add+adcl for the adds.
 
 define <1 x i64> @unsigned_add3(<1 x i64>* %a, <1 x i64>* %b, i32 %count) nounwind {
 entry:
@@ -7,9 +8,8 @@ entry:
 
 bb26:		; preds = %bb26, %entry
 
-; CHECK:  movq	({{.*}},8), %mm
-; CHECK:  paddq	({{.*}},8), %mm
-; CHECK:  paddq	%mm{{[0-7]}}, %mm
+; CHECK:  addl  %e
+; CHECK:  adcl  %e
 
 	%i.037.0 = phi i32 [ 0, %entry ], [ %tmp25, %bb26 ]		; <i32> [#uses=3]
 	%sum.035.0 = phi <1 x i64> [ zeroinitializer, %entry ], [ %tmp22, %bb26 ]		; <<1 x i64>> [#uses=1]
@@ -27,3 +27,38 @@ bb31:		; preds = %bb26, %entry
 	%sum.035.1 = phi <1 x i64> [ zeroinitializer, %entry ], [ %tmp22, %bb26 ]		; <<1 x i64>> [#uses=1]
 	ret <1 x i64> %sum.035.1
 }
+
+
+; This is the original test converted to use MMX intrinsics.
+
+define <1 x i64> @unsigned_add3a(x86_mmx* %a, x86_mmx* %b, i32 %count) nounwind {
+entry:
+        %tmp2943 = bitcast <1 x i64><i64 0> to x86_mmx
+	%tmp2942 = icmp eq i32 %count, 0		; <i1> [#uses=1]
+	br i1 %tmp2942, label %bb31, label %bb26
+
+bb26:		; preds = %bb26, %entry
+
+; CHECK:  movq	({{.*}},8), %mm
+; CHECK:  paddq	({{.*}},8), %mm
+; CHECK:  paddq	%mm{{[0-7]}}, %mm
+
+	%i.037.0 = phi i32 [ 0, %entry ], [ %tmp25, %bb26 ]		; <i32> [#uses=3]
+	%sum.035.0 = phi x86_mmx [ %tmp2943, %entry ], [ %tmp22, %bb26 ]		; <x86_mmx> [#uses=1]
+	%tmp13 = getelementptr x86_mmx* %b, i32 %i.037.0		; <x86_mmx*> [#uses=1]
+	%tmp14 = load x86_mmx* %tmp13		; <x86_mmx> [#uses=1]
+	%tmp18 = getelementptr x86_mmx* %a, i32 %i.037.0		; <x86_mmx*> [#uses=1]
+	%tmp19 = load x86_mmx* %tmp18		; <x86_mmx> [#uses=1]
+	%tmp21 = call x86_mmx @llvm.x86.mmx.padd.q (x86_mmx %tmp19, x86_mmx %tmp14)		; <x86_mmx> [#uses=1]
+	%tmp22 = call x86_mmx @llvm.x86.mmx.padd.q (x86_mmx %tmp21, x86_mmx %sum.035.0)		; <x86_mmx> [#uses=2]
+	%tmp25 = add i32 %i.037.0, 1		; <i32> [#uses=2]
+	%tmp29 = icmp ult i32 %tmp25, %count		; <i1> [#uses=1]
+	br i1 %tmp29, label %bb26, label %bb31
+
+bb31:		; preds = %bb26, %entry
+	%sum.035.1 = phi x86_mmx [ %tmp2943, %entry ], [ %tmp22, %bb26 ]		; <x86_mmx> [#uses=1]
+        %t = bitcast x86_mmx %sum.035.1 to <1 x i64>
+	ret <1 x i64> %t
+}
+
+declare x86_mmx @llvm.x86.mmx.padd.q(x86_mmx, x86_mmx)
diff --git a/test/CodeGen/X86/2007-05-15-maskmovq.ll b/test/CodeGen/X86/2007-05-15-maskmovq.ll
index 2093b8f68744..006cf2e43a2f 100644
--- a/test/CodeGen/X86/2007-05-15-maskmovq.ll
+++ b/test/CodeGen/X86/2007-05-15-maskmovq.ll
@@ -5,10 +5,10 @@ target triple = "i686-apple-darwin8"
 
 define void @test(<1 x i64> %c64, <1 x i64> %mask1, i8* %P) {
 entry:
-	%tmp4 = bitcast <1 x i64> %mask1 to <8 x i8>		; <<8 x i8>> [#uses=1]
-	%tmp6 = bitcast <1 x i64> %c64 to <8 x i8>		; <<8 x i8>> [#uses=1]
-	tail call void @llvm.x86.mmx.maskmovq( <8 x i8> %tmp6, <8 x i8> %tmp4, i8* %P )
+	%tmp4 = bitcast <1 x i64> %mask1 to x86_mmx		; <x86_mmx> [#uses=1]
+	%tmp6 = bitcast <1 x i64> %c64 to x86_mmx		; <x86_mmx> [#uses=1]
+	tail call void @llvm.x86.mmx.maskmovq( x86_mmx %tmp4, x86_mmx %tmp6, i8* %P )
 	ret void
 }
 
-declare void @llvm.x86.mmx.maskmovq(<8 x i8>, <8 x i8>, i8*)
+declare void @llvm.x86.mmx.maskmovq(x86_mmx, x86_mmx, i8*)
diff --git a/test/CodeGen/X86/2007-06-15-IntToMMX.ll b/test/CodeGen/X86/2007-06-15-IntToMMX.ll
index 6128d8b92d11..660d4fe7b19e 100644
--- a/test/CodeGen/X86/2007-06-15-IntToMMX.ll
+++ b/test/CodeGen/X86/2007-06-15-IntToMMX.ll
@@ -1,17 +1,16 @@
 ; RUN: llc < %s -march=x86-64 -mattr=+mmx | grep paddusw
-@R = external global <1 x i64>          ; <<1 x i64>*> [#uses=1]
+@R = external global x86_mmx          ; <x86_mmx*> [#uses=1]
 
 define void @foo(<1 x i64> %A, <1 x i64> %B) {
 entry:
-        %tmp4 = bitcast <1 x i64> %B to <4 x i16>               ; <<4 x i16>> [#uses=1]
-        %tmp6 = bitcast <1 x i64> %A to <4 x i16>               ; <<4 x i16>> [#uses=1]
-        %tmp7 = tail call <4 x i16> @llvm.x86.mmx.paddus.w( <4 x i16> %tmp6, <4 x i16> %tmp4 )   ; <<4 x i16>> [#uses=1]
-        %tmp8 = bitcast <4 x i16> %tmp7 to <1 x i64>            ; <<1 x i64>> [#uses=1]
-        store <1 x i64> %tmp8, <1 x i64>* @R
+        %tmp2 = bitcast <1 x i64> %A to x86_mmx
+        %tmp3 = bitcast <1 x i64> %B to x86_mmx
+        %tmp7 = tail call x86_mmx @llvm.x86.mmx.paddus.w( x86_mmx %tmp2, x86_mmx %tmp3 )   ; <x86_mmx> [#uses=1]
+        store x86_mmx %tmp7, x86_mmx* @R
         tail call void @llvm.x86.mmx.emms( )
         ret void
 }
 
-declare <4 x i16> @llvm.x86.mmx.paddus.w(<4 x i16>, <4 x i16>)
+declare x86_mmx @llvm.x86.mmx.paddus.w(x86_mmx, x86_mmx)
 
 declare void @llvm.x86.mmx.emms()
diff --git a/test/CodeGen/X86/2007-07-03-GR64ToVR64.ll b/test/CodeGen/X86/2007-07-03-GR64ToVR64.ll
index 2c513f17811a..1c5e6766fd6e 100644
--- a/test/CodeGen/X86/2007-07-03-GR64ToVR64.ll
+++ b/test/CodeGen/X86/2007-07-03-GR64ToVR64.ll
@@ -2,19 +2,17 @@
 ; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+mmx | grep {movd	%rdi, %mm1}
 ; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+mmx | grep {paddusw	%mm0, %mm1}
 
-@R = external global <1 x i64>		; <<1 x i64>*> [#uses=1]
+@R = external global x86_mmx		; <x86_mmx*> [#uses=1]
 
 define void @foo(<1 x i64> %A, <1 x i64> %B) nounwind {
 entry:
-	%tmp4 = bitcast <1 x i64> %B to <4 x i16>		; <<4 x i16>> [#uses=1]
-	%tmp6 = bitcast <1 x i64> %A to <4 x i16>		; <<4 x i16>> [#uses=1]
-	%tmp7 = tail call <4 x i16> @llvm.x86.mmx.paddus.w( <4 x i16> %tmp6, <4 x i16> %tmp4 )		; <<4 x i16>> [#uses=1]
-	%tmp8 = bitcast <4 x i16> %tmp7 to <1 x i64>		; <<1 x i64>> [#uses=1]
-	store <1 x i64> %tmp8, <1 x i64>* @R
+	%tmp4 = bitcast <1 x i64> %B to x86_mmx		; <<4 x i16>> [#uses=1]
+	%tmp6 = bitcast <1 x i64> %A to x86_mmx		; <<4 x i16>> [#uses=1]
+	%tmp7 = tail call x86_mmx @llvm.x86.mmx.paddus.w( x86_mmx %tmp6, x86_mmx %tmp4 )		; <x86_mmx> [#uses=1]
+	store x86_mmx %tmp7, x86_mmx* @R
 	tail call void @llvm.x86.mmx.emms( )
 	ret void
 }
 
-declare <4 x i16> @llvm.x86.mmx.paddus.w(<4 x i16>, <4 x i16>)
-
+declare x86_mmx @llvm.x86.mmx.paddus.w(x86_mmx, x86_mmx)
 declare void @llvm.x86.mmx.emms()
diff --git a/test/CodeGen/X86/2007-10-16-fp80_select.ll b/test/CodeGen/X86/2007-10-16-fp80_select.ll
deleted file mode 100644
index 3f9845c3c3ec..000000000000
--- a/test/CodeGen/X86/2007-10-16-fp80_select.ll
+++ /dev/null
@@ -1,19 +0,0 @@
-; RUN: llc < %s -march=x86
-; ModuleID = 'bugpoint-reduced-simplified.bc'
-target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
-target triple = "i686-apple-darwin9"
-        %struct.wxPoint2DInt = type { i32, i32 }
-
-define x86_fp80 @_ZNK12wxPoint2DInt14GetVectorAngleEv(%struct.wxPoint2DInt* %this) {
-entry:
-        br i1 false, label %cond_true, label %UnifiedReturnBlock
-
-cond_true:              ; preds = %entry
-        %tmp8 = load i32* null, align 4         ; <i32> [#uses=1]
-        %tmp9 = icmp sgt i32 %tmp8, -1          ; <i1> [#uses=1]
-        %retval = select i1 %tmp9, x86_fp80 0xK4005B400000000000000, x86_fp80 0xK40078700000000000000           ; <x86_fp80> [#uses=1]
-        ret x86_fp80 %retval
-
-UnifiedReturnBlock:             ; preds = %entry
-        ret x86_fp80 0xK4005B400000000000000
-}
diff --git a/test/CodeGen/X86/2008-02-18-TailMergingBug.ll b/test/CodeGen/X86/2008-02-18-TailMergingBug.ll
index 7463a0eebf34..bdacf5071128 100644
--- a/test/CodeGen/X86/2008-02-18-TailMergingBug.ll
+++ b/test/CodeGen/X86/2008-02-18-TailMergingBug.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=x86 -mcpu=yonah -stats |& grep {Number of block tails merged} | grep 9
+; RUN: llc < %s -march=x86 -mcpu=yonah -stats |& grep {Number of block tails merged} | grep 16
 ; PR1909
 
 @.str = internal constant [48 x i8] c"transformed bounds: (%.2f, %.2f), (%.2f, %.2f)\0A\00"		; <[48 x i8]*> [#uses=1]
diff --git a/test/CodeGen/X86/2008-04-08-CoalescerCrash.ll b/test/CodeGen/X86/2008-04-08-CoalescerCrash.ll
index dc8c097efc50..5089e8c5b69d 100644
--- a/test/CodeGen/X86/2008-04-08-CoalescerCrash.ll
+++ b/test/CodeGen/X86/2008-04-08-CoalescerCrash.ll
@@ -5,15 +5,15 @@ entry:
 	tail call void asm sideeffect "# top of block", "~{dirflag},~{fpsr},~{flags},~{di},~{si},~{dx},~{cx},~{ax}"( ) nounwind 
 	tail call void asm sideeffect ".file \224443946.c\22", "~{dirflag},~{fpsr},~{flags}"( ) nounwind 
 	tail call void asm sideeffect ".line 8", "~{dirflag},~{fpsr},~{flags}"( ) nounwind 
-	%tmp1 = tail call <2 x i32> asm sideeffect "movd $1, $0", "=={mm4},{bp},~{dirflag},~{fpsr},~{flags},~{memory}"( i32 undef ) nounwind 		; <<2 x i32>> [#uses=1]
+	%tmp1 = tail call x86_mmx asm sideeffect "movd $1, $0", "=={mm4},{bp},~{dirflag},~{fpsr},~{flags},~{memory}"( i32 undef ) nounwind 		; <x86_mmx> [#uses=1]
 	tail call void asm sideeffect ".file \224443946.c\22", "~{dirflag},~{fpsr},~{flags}"( ) nounwind 
 	tail call void asm sideeffect ".line 9", "~{dirflag},~{fpsr},~{flags}"( ) nounwind 
-	%tmp3 = tail call i32 asm sideeffect "movd $1, $0", "=={bp},{mm3},~{dirflag},~{fpsr},~{flags},~{memory}"( <2 x i32> undef ) nounwind 		; <i32> [#uses=1]
+	%tmp3 = tail call i32 asm sideeffect "movd $1, $0", "=={bp},{mm3},~{dirflag},~{fpsr},~{flags},~{memory}"( x86_mmx undef ) nounwind 		; <i32> [#uses=1]
 	tail call void asm sideeffect ".file \224443946.c\22", "~{dirflag},~{fpsr},~{flags}"( ) nounwind 
 	tail call void asm sideeffect ".line 10", "~{dirflag},~{fpsr},~{flags}"( ) nounwind 
-	tail call void asm sideeffect "movntq $0, 0($1,$2)", "{mm0},{di},{bp},~{dirflag},~{fpsr},~{flags},~{memory}"( <2 x i32> undef, i32 undef, i32 %tmp3 ) nounwind 
+	tail call void asm sideeffect "movntq $0, 0($1,$2)", "{mm0},{di},{bp},~{dirflag},~{fpsr},~{flags},~{memory}"( x86_mmx undef, i32 undef, i32 %tmp3 ) nounwind 
 	tail call void asm sideeffect ".file \224443946.c\22", "~{dirflag},~{fpsr},~{flags}"( ) nounwind 
 	tail call void asm sideeffect ".line 11", "~{dirflag},~{fpsr},~{flags}"( ) nounwind 
-	%tmp8 = tail call i32 asm sideeffect "movd $1, $0", "=={bp},{mm4},~{dirflag},~{fpsr},~{flags},~{memory}"( <2 x i32> %tmp1 ) nounwind 		; <i32> [#uses=0]
+	%tmp8 = tail call i32 asm sideeffect "movd $1, $0", "=={bp},{mm4},~{dirflag},~{fpsr},~{flags},~{memory}"( x86_mmx %tmp1 ) nounwind 		; <i32> [#uses=0]
 	ret i32 undef
 }
diff --git a/test/CodeGen/X86/2008-06-13-VolatileLoadStore.ll b/test/CodeGen/X86/2008-06-13-VolatileLoadStore.ll
index 500cd1f08cfa..86652826aeac 100644
--- a/test/CodeGen/X86/2008-06-13-VolatileLoadStore.ll
+++ b/test/CodeGen/X86/2008-06-13-VolatileLoadStore.ll
@@ -1,5 +1,5 @@
-; RUN: llc < %s -march=x86 -mattr=+sse2 | grep movsd | count 5
-; RUN: llc < %s -march=x86 -mattr=+sse2 | grep movl | count 2
+; RUN: llc < %s -mtriple=i686-linux -mattr=+sse2 | grep movsd | count 5
+; RUN: llc < %s -mtriple=i686-linux -mattr=+sse2 | grep movl | count 2
 
 @atomic = global double 0.000000e+00		; <double*> [#uses=1]
 @atomic2 = global double 0.000000e+00		; <double*> [#uses=1]
diff --git a/test/CodeGen/X86/2008-07-19-movups-spills.ll b/test/CodeGen/X86/2008-07-19-movups-spills.ll
index 98919ee5221a..cf04dcf0f18c 100644
--- a/test/CodeGen/X86/2008-07-19-movups-spills.ll
+++ b/test/CodeGen/X86/2008-07-19-movups-spills.ll
@@ -1,6 +1,7 @@
 ; RUN: llc < %s -mtriple=i686-pc-linux -realign-stack=1 -mattr=sse2 | grep movaps | count 75
-; RUN: llc < %s -mtriple=i686-pc-linux -realign-stack=0 -mattr=sse2 | grep movaps | count 1
+; RUN: llc < %s -mtriple=i686-pc-linux -realign-stack=0 -mattr=sse2 | grep movaps | count 75
 ; PR2539
+; PR8969 - make 32-bit linux have a 16-byte aligned stack
 
 external global <4 x float>, align 1		; <<4 x float>*>:0 [#uses=2]
 external global <4 x float>, align 1		; <<4 x float>*>:1 [#uses=1]
diff --git a/test/CodeGen/X86/2008-08-23-64Bit-maskmovq.ll b/test/CodeGen/X86/2008-08-23-64Bit-maskmovq.ll
index c76dd7de1256..53402c04511c 100644
--- a/test/CodeGen/X86/2008-08-23-64Bit-maskmovq.ll
+++ b/test/CodeGen/X86/2008-08-23-64Bit-maskmovq.ll
@@ -17,11 +17,13 @@ entry:
 	br i1 false, label %bb.nph144.split, label %bb133
 
 bb.nph144.split:		; preds = %entry
-	tail call void @llvm.x86.mmx.maskmovq( <8 x i8> zeroinitializer, <8 x i8> zeroinitializer, i8* null ) nounwind
+        %tmp = bitcast <8 x i8> zeroinitializer to x86_mmx
+        %tmp2 = bitcast <8 x i8> zeroinitializer to x86_mmx
+	tail call void @llvm.x86.mmx.maskmovq( x86_mmx %tmp, x86_mmx %tmp2, i8* null ) nounwind
 	unreachable
 
 bb133:		; preds = %entry
 	ret void
 }
 
-declare void @llvm.x86.mmx.maskmovq(<8 x i8>, <8 x i8>, i8*) nounwind
+declare void @llvm.x86.mmx.maskmovq(x86_mmx, x86_mmx, i8*) nounwind
diff --git a/test/CodeGen/X86/2008-09-05-sinttofp-2xi32.ll b/test/CodeGen/X86/2008-09-05-sinttofp-2xi32.ll
index 60be0d51e7e7..2dc1deaf1738 100644
--- a/test/CodeGen/X86/2008-09-05-sinttofp-2xi32.ll
+++ b/test/CodeGen/X86/2008-09-05-sinttofp-2xi32.ll
@@ -1,6 +1,9 @@
+; RUN: llc < %s -march=x86 -mattr=+sse2 -mattr=+mmx | grep unpcklpd
+; RUN: llc < %s -march=x86 -mattr=+sse2 -mattr=+mmx | grep unpckhpd
 ; RUN: llc < %s -march=x86 -mattr=+sse2 | grep cvttpd2pi | count 1
 ; RUN: llc < %s -march=x86 -mattr=+sse2 | grep cvtpi2pd | count 1
-; PR2687
+; originally from PR2687, but things don't work that way any more.
+; there are no MMX instructions here; we use XMM.
 
 define <2 x double> @a(<2 x i32> %x) nounwind {
 entry:
@@ -13,3 +16,20 @@ entry:
   %y = fptosi <2 x double> %x to <2 x i32>
   ret <2 x i32> %y
 }
+
+; This is how to get MMX instructions.
+
+define <2 x double> @a2(x86_mmx %x) nounwind {
+entry:
+  %y = tail call <2 x double> @llvm.x86.sse.cvtpi2pd(x86_mmx %x)
+  ret <2 x double> %y
+}
+
+define x86_mmx @b2(<2 x double> %x) nounwind {
+entry:
+  %y = tail call x86_mmx @llvm.x86.sse.cvttpd2pi (<2 x double> %x)
+  ret x86_mmx %y
+}
+
+declare <2 x double> @llvm.x86.sse.cvtpi2pd(x86_mmx)
+declare x86_mmx @llvm.x86.sse.cvttpd2pi(<2 x double>)
diff --git a/test/CodeGen/X86/2008-09-17-inline-asm-1.ll b/test/CodeGen/X86/2008-09-17-inline-asm-1.ll
index 3c64fe45c997..86e50c98bfdb 100644
--- a/test/CodeGen/X86/2008-09-17-inline-asm-1.ll
+++ b/test/CodeGen/X86/2008-09-17-inline-asm-1.ll
@@ -15,14 +15,16 @@
 
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
 target triple = "i386-apple-darwin8"
-@x = common global i32 0		; <i32*> [#uses=1]
+@x = common global i32 0
 
 define i32 @aci(i32* %pw) nounwind {
 entry:
-	%0 = load i32* @x, align 4		; <i32> [#uses=1]
-	%asmtmp = tail call { i32, i32 } asm "movl $0, %eax\0A\090:\0A\09test %eax, %eax\0A\09je 1f\0A\09movl %eax, $2\0A\09incl $2\0A\09lock\0A\09cmpxchgl $2, $0\0A\09jne 0b\0A\091:", "=*m,=&{ax},=&r,*m,~{dirflag},~{fpsr},~{flags},~{memory},~{cc}"(i32* %pw, i32* %pw) nounwind		; <{ i32, i32 }> [#uses=0]
-	%asmtmp2 = tail call { i32, i32 } asm "movl $0, %edx\0A\090:\0A\09test %edx, %edx\0A\09je 1f\0A\09movl %edx, $2\0A\09incl $2\0A\09lock\0A\09cmpxchgl $2, $0\0A\09jne 0b\0A\091:", "=*m,=&{dx},=&r,*m,~{dirflag},~{fpsr},~{flags},~{memory},~{cc}"(i32* %pw, i32* %pw) nounwind		; <{ i32, i32 }> [#uses=1]
-	%asmresult3 = extractvalue { i32, i32 } %asmtmp2, 0		; <i32> [#uses=1]
-	%1 = add i32 %asmresult3, %0		; <i32> [#uses=1]
-	ret i32 %1
+	%0 = load i32* @x, align 4
+	%asmtmp = tail call { i32, i32 } asm "movl $0, %eax\0A\090:\0A\09test %eax, %eax\0A\09je 1f\0A\09movl %eax, $2\0A\09incl $2\0A\09lock\0A\09cmpxchgl $2, $0\0A\09jne 0b\0A\091:", "=*m,=&{ax},=&r,*m,~{dirflag},~{fpsr},~{flags},~{memory},~{cc}"(i32* %pw, i32* %pw) nounwind
+	%asmtmp2 = tail call { i32, i32 } asm "movl $0, %edx\0A\090:\0A\09test %edx, %edx\0A\09je 1f\0A\09movl %edx, $2\0A\09incl $2\0A\09lock\0A\09cmpxchgl $2, $0\0A\09jne 0b\0A\091:", "=*m,=&{dx},=&r,*m,~{dirflag},~{fpsr},~{flags},~{memory},~{cc}"(i32* %pw, i32* %pw) nounwind
+	%asmresult2 = extractvalue { i32, i32 } %asmtmp, 0
+	%asmresult3 = extractvalue { i32, i32 } %asmtmp2, 0
+	%1 = add i32 %asmresult2, %asmresult3
+	%2 = add i32 %0, %1
+	ret i32 %2
 }
diff --git a/test/CodeGen/X86/2008-10-27-CoalescerBug.ll b/test/CodeGen/X86/2008-10-27-CoalescerBug.ll
index afeb358da572..9d144a4be0e9 100644
--- a/test/CodeGen/X86/2008-10-27-CoalescerBug.ll
+++ b/test/CodeGen/X86/2008-10-27-CoalescerBug.ll
@@ -1,6 +1,9 @@
-; RUN: llc < %s -march=x86 -mattr=+sse2 -stats |& not grep {Number of register spills}
+; RUN: llc < %s -mtriple=i386-apple-darwin -mattr=+sse2 -stats |& FileCheck %s
+; Now this test spills one register. But a reload in the loop is cheaper than
+; the divsd so it's a win.
 
 define fastcc void @fourn(double* %data, i32 %isign) nounwind {
+; CHECK: fourn
 entry:
 	br label %bb
 
@@ -11,6 +14,11 @@ bb:		; preds = %bb, %entry
 	%1 = icmp sgt i32 %0, 2		; <i1> [#uses=1]
 	br i1 %1, label %bb30.loopexit, label %bb
 
+; CHECK: %bb30.loopexit
+; CHECK: divsd %xmm0
+; CHECK: movsd %xmm0, 16(%esp)
+; CHECK: .align
+; CHECK-NEXT: %bb3
 bb3:		; preds = %bb30.loopexit, %bb25, %bb3
 	%2 = load i32* null, align 4		; <i32> [#uses=1]
 	%3 = mul i32 %2, 0		; <i32> [#uses=1]
diff --git a/test/CodeGen/X86/2008-10-27-StackRealignment.ll b/test/CodeGen/X86/2008-10-27-StackRealignment.ll
index 784bc72f42e9..3d0766cde845 100644
--- a/test/CodeGen/X86/2008-10-27-StackRealignment.ll
+++ b/test/CodeGen/X86/2008-10-27-StackRealignment.ll
@@ -1,8 +1,8 @@
 ; Linux doesn't support stack realignment for functions with allocas (PR2888).
 ; Until it does, we shouldn't use movaps to access the stack.  On targets with
 ; sufficiently aligned stack (e.g. darwin) we should.
-
-; RUN: llc < %s -mtriple=i386-pc-linux-gnu -mcpu=yonah | not grep movaps
+; PR8969 - make 32-bit linux have a 16-byte aligned stack
+; RUN: llc < %s -mtriple=i386-pc-linux-gnu -mcpu=yonah | grep movaps | count 2
 ; RUN: llc < %s -mtriple=i686-apple-darwin9 -mcpu=yonah | grep movaps | count 2
 
 
diff --git a/test/CodeGen/X86/2008-11-29-DivideConstant16bit.ll b/test/CodeGen/X86/2008-11-29-DivideConstant16bit.ll
deleted file mode 100644
index 2e114ab5ae88..000000000000
--- a/test/CodeGen/X86/2008-11-29-DivideConstant16bit.ll
+++ /dev/null
@@ -1,9 +0,0 @@
-; RUN: llc < %s -mtriple=i686-pc-linux-gnu | grep -- -1985 | count 1
-target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:32:32"
-target triple = "i686-pc-linux-gnu"
-
-define zeroext i16 @a(i16 zeroext %x) nounwind {
-entry:
-	%div = udiv i16 %x, 33		; <i32> [#uses=1]
-	ret i16 %div
-}
diff --git a/test/CodeGen/X86/2008-11-29-DivideConstant16bitSigned.ll b/test/CodeGen/X86/2008-11-29-DivideConstant16bitSigned.ll
deleted file mode 100644
index 7c811afa51d3..000000000000
--- a/test/CodeGen/X86/2008-11-29-DivideConstant16bitSigned.ll
+++ /dev/null
@@ -1,9 +0,0 @@
-; RUN: llc < %s -mtriple=i686-pc-linux-gnu | grep -- -1985
-target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:32:32"
-target triple = "i686-pc-linux-gnu"
-
-define signext i16 @a(i16 signext %x) nounwind {
-entry:
-	%div = sdiv i16 %x, 33		; <i32> [#uses=1]
-	ret i16 %div
-}
diff --git a/test/CodeGen/X86/2009-01-13-DoubleUpdate.ll b/test/CodeGen/X86/2009-01-13-DoubleUpdate.ll
index 9c71469b5b20..4feb764bec6b 100644
--- a/test/CodeGen/X86/2009-01-13-DoubleUpdate.ll
+++ b/test/CodeGen/X86/2009-01-13-DoubleUpdate.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=x86 -mattr=+sse2  -disable-mmx -enable-legalize-types-checking
+; RUN: llc < %s -march=x86 -mattr=+sse2 -enable-legalize-types-checking
 
 declare <2 x double> @llvm.x86.sse2.min.pd(<2 x double>, <2 x double>) nounwind readnone
 
diff --git a/test/CodeGen/X86/2009-01-27-NullStrings.ll b/test/CodeGen/X86/2009-01-27-NullStrings.ll
index 8684f4a19ca4..8b3094be4b06 100644
--- a/test/CodeGen/X86/2009-01-27-NullStrings.ll
+++ b/test/CodeGen/X86/2009-01-27-NullStrings.ll
@@ -1,7 +1,7 @@
 ; RUN: llc < %s -mtriple=i686-apple-darwin | FileCheck %s
 ; CHECK: .section __TEXT,__cstring,cstring_literals
 
-@x = internal constant [1 x i8] zeroinitializer		; <[1 x i8]*> [#uses=1]
+@x = internal unnamed_addr constant [1 x i8] zeroinitializer		; <[1 x i8]*> [#uses=1]
 
 @y = global [1 x i8]* @x
 
diff --git a/test/CodeGen/X86/2009-02-26-MachineLICMBug.ll b/test/CodeGen/X86/2009-02-26-MachineLICMBug.ll
index bb01e5afceff..0b5b7bdd94d7 100644
--- a/test/CodeGen/X86/2009-02-26-MachineLICMBug.ll
+++ b/test/CodeGen/X86/2009-02-26-MachineLICMBug.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=x86-64 -mattr=+sse3,+sse41 -stats |& grep {6 machine-licm}
+; RUN: llc < %s -march=x86-64 -mattr=+sse3,+sse41 -stats |& grep {8 machine-licm}
 ; RUN: llc < %s -march=x86-64 -mattr=+sse3,+sse41 | FileCheck %s
 ; rdar://6627786
 ; rdar://7792037
diff --git a/test/CodeGen/X86/2009-04-24.ll b/test/CodeGen/X86/2009-04-24.ll
index 757042e5be42..dd8823574cde 100644
--- a/test/CodeGen/X86/2009-04-24.ll
+++ b/test/CodeGen/X86/2009-04-24.ll
@@ -1,5 +1,6 @@
 ; RUN: llc < %s -march=x86-64 -mtriple=x86_64-linux-gnu -regalloc=fast -relocation-model=pic > %t2
-; RUN: grep {leaq.*TLSGD.*__tls_get_addr} %t2
+; RUN: grep {leaq.*TLSGD} %t2
+; RUN; grep {__tls_get_addr} %t2
 ; PR4004
 
 @i = thread_local global i32 15
diff --git a/test/CodeGen/X86/2009-06-03-Win64DisableRedZone.ll b/test/CodeGen/X86/2009-06-03-Win64DisableRedZone.ll
index c5982285afe0..98b1e0ed2f42 100644
--- a/test/CodeGen/X86/2009-06-03-Win64DisableRedZone.ll
+++ b/test/CodeGen/X86/2009-06-03-Win64DisableRedZone.ll
@@ -1,9 +1,8 @@
-; RUN: llc < %s | grep "subq.*\\\$40, \\\%rsp"
-target triple = "x86_64-pc-mingw64"
+; RUN: llc -mtriple=x86_64-mingw32 < %s | FileCheck %s
+; CHECK-NOT: -{{[1-9][0-9]*}}(%rsp)
 
 define x86_fp80 @a(i64 %x) nounwind readnone {
 entry:
-	%conv = sitofp i64 %x to x86_fp80		; <x86_fp80> [#uses=1]
-	ret x86_fp80 %conv
+        %conv = sitofp i64 %x to x86_fp80               ; <x86_fp80> [#uses=1]
+        ret x86_fp80 %conv
 }
-
diff --git a/test/CodeGen/X86/2009-06-03-Win64SpillXMM.ll b/test/CodeGen/X86/2009-06-03-Win64SpillXMM.ll
index 810a6f4d6c65..12bd28518762 100644
--- a/test/CodeGen/X86/2009-06-03-Win64SpillXMM.ll
+++ b/test/CodeGen/X86/2009-06-03-Win64SpillXMM.ll
@@ -1,12 +1,10 @@
-; RUN: llc < %s -o %t1
-; RUN: grep "subq.*\\\$72, \\\%rsp" %t1
-; RUN: grep "movaps	\\\%xmm8, 32\\\(\\\%rsp\\\)" %t1
-; RUN: grep "movaps	\\\%xmm7, 48\\\(\\\%rsp\\\)" %t1
-target triple = "x86_64-pc-mingw64"
+; RUN: llc -mtriple=x86_64-mingw32 < %s | FileCheck %s
+; CHECK: subq    $40, %rsp
+; CHECK: movaps  %xmm8, (%rsp)
+; CHECK: movaps  %xmm7, 16(%rsp)
 
 define i32 @a() nounwind {
 entry:
-	tail call void asm sideeffect "", "~{xmm7},~{xmm8},~{dirflag},~{fpsr},~{flags}"() nounwind
-	ret i32 undef
+        tail call void asm sideeffect "", "~{xmm7},~{xmm8},~{dirflag},~{fpsr},~{flags}"() nounwind
+        ret i32 undef
 }
-
diff --git a/test/CodeGen/X86/2009-06-05-ScalarToVectorByteMMX.ll b/test/CodeGen/X86/2009-06-05-ScalarToVectorByteMMX.ll
index 336f17e2a325..01852a6eca1f 100644
--- a/test/CodeGen/X86/2009-06-05-ScalarToVectorByteMMX.ll
+++ b/test/CodeGen/X86/2009-06-05-ScalarToVectorByteMMX.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=x86 -mattr=+mmx | not grep movl
+; RUN: llc < %s -march=x86 -mattr=+mmx,+sse2 | not grep movl
 
 define <8 x i8> @a(i8 zeroext %x) nounwind {
   %r = insertelement <8 x i8> undef, i8 %x, i32 0
diff --git a/test/CodeGen/X86/2009-07-07-SplitICmp.ll b/test/CodeGen/X86/2009-07-07-SplitICmp.ll
index eb9378b9527b..366985678e54 100644
--- a/test/CodeGen/X86/2009-07-07-SplitICmp.ll
+++ b/test/CodeGen/X86/2009-07-07-SplitICmp.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=x86 -disable-mmx
+; RUN: llc < %s -march=x86
 
 define void @test2(<2 x i32> %A, <2 x i32> %B, <2 x i32>* %C) nounwind {
        %D = icmp sgt <2 x i32> %A, %B
diff --git a/test/CodeGen/X86/2009-08-02-mmx-scalar-to-vector.ll b/test/CodeGen/X86/2009-08-02-mmx-scalar-to-vector.ll
index b9b09a3f0004..288eef4f6991 100644
--- a/test/CodeGen/X86/2009-08-02-mmx-scalar-to-vector.ll
+++ b/test/CodeGen/X86/2009-08-02-mmx-scalar-to-vector.ll
@@ -1,10 +1,12 @@
 ; RUN: llc < %s -march=x86-64
 ; PR4669
-declare <1 x i64> @llvm.x86.mmx.pslli.q(<1 x i64>, i32)
+declare x86_mmx @llvm.x86.mmx.pslli.q(x86_mmx, i32)
 
 define <1 x i64> @test(i64 %t) {
 entry:
 	%t1 = insertelement <1 x i64> undef, i64 %t, i32 0
-	%t2 = tail call <1 x i64> @llvm.x86.mmx.pslli.q(<1 x i64> %t1, i32 48)
-	ret <1 x i64> %t2
+        %t0 = bitcast <1 x i64> %t1 to x86_mmx
+	%t2 = tail call x86_mmx @llvm.x86.mmx.pslli.q(x86_mmx %t0, i32 48)
+        %t3 = bitcast x86_mmx %t2 to <1 x i64>
+	ret <1 x i64> %t3
 }
diff --git a/test/CodeGen/X86/2009-08-06-inlineasm.ll b/test/CodeGen/X86/2009-08-06-inlineasm.ll
index de32c2159ce1..f9b5f9e0b1fd 100644
--- a/test/CodeGen/X86/2009-08-06-inlineasm.ll
+++ b/test/CodeGen/X86/2009-08-06-inlineasm.ll
@@ -1,10 +1,12 @@
-; RUN: llc -mtriple=i386-pc-linux-gnu < %s
+; RUN: false
+; XRUN: llc -mtriple=i386-pc-linux-gnu < %s
 ; PR4668
 ; XFAIL: *
 ; FIXME: If the coalescer happens to coalesce %level.1 with the copy to EAX
 ; (for ret) then this will fail to compile. The fundamental problem is
 ; once the coalescer fixes a virtual register to physical register we can't
-; evict it.
+; evict it. This started passing again due to the changes for PR8969
+; so I've disabled it with a bigger stick.
 
 define i32 @x(i32 %qscale) nounwind {
 entry:
diff --git a/test/CodeGen/X86/2009-09-10-SpillComments.ll b/test/CodeGen/X86/2009-09-10-SpillComments.ll
index f9ca861c558a..adac20336048 100644
--- a/test/CodeGen/X86/2009-09-10-SpillComments.ll
+++ b/test/CodeGen/X86/2009-09-10-SpillComments.ll
@@ -2,9 +2,9 @@
 
 ; This test shouldn't require spills.
 
-; CHECK: subq  $8, %rsp
+; CHECK: pushq
 ; CHECK-NOT: $rsp
-; CHECK: addq  $8, %rsp
+; CHECK: popq
 
 	%struct..0anon = type { i32 }
 	%struct.rtvec_def = type { i32, [1 x %struct..0anon] }
diff --git a/test/CodeGen/X86/2009-12-11-TLSNoRedZone.ll b/test/CodeGen/X86/2009-12-11-TLSNoRedZone.ll
index f7ba661c4f75..823e0ca465ef 100644
--- a/test/CodeGen/X86/2009-12-11-TLSNoRedZone.ll
+++ b/test/CodeGen/X86/2009-12-11-TLSNoRedZone.ll
@@ -21,7 +21,7 @@ define void @leaf() nounwind {
 ; CHECK: leaf:
 ; CHECK-NOT: -8(%rsp)
 ; CHECK: leaq link_ptr@TLSGD
-; CHECK: call __tls_get_addr@PLT
+; CHECK: callq __tls_get_addr@PLT
 "file foo2.c, line 14, bb1":
   %p = alloca %test*, align 8                     ; <%test**> [#uses=4]
   br label %"file foo2.c, line 14, bb2"
diff --git a/test/CodeGen/X86/2010-04-07-DbgValueOtherTargets.ll b/test/CodeGen/X86/2010-04-07-DbgValueOtherTargets.ll
index 76cc1a497d3d..42f19b3ad86a 100644
--- a/test/CodeGen/X86/2010-04-07-DbgValueOtherTargets.ll
+++ b/test/CodeGen/X86/2010-04-07-DbgValueOtherTargets.ll
@@ -2,33 +2,27 @@
 ; RUN: llc -O0 -march=x86-64 -asm-verbose < %s | FileCheck %s
 ; Check that DEBUG_VALUE comments come through on a variety of targets.
 
-%tart.reflect.ComplexType = type { double, double }
-
-@.type.SwitchStmtTest = constant %tart.reflect.ComplexType { double 3.0, double 2.0 }
-
-define i32 @"main(tart.core.String[])->int32"(i32 %args) {
+define i32 @main() nounwind ssp {
 entry:
 ; CHECK: DEBUG_VALUE
-  tail call void @llvm.dbg.value(metadata !14, i64 0, metadata !8)
-  tail call void @"tart.reflect.ComplexType.create->tart.core.Object"(%tart.reflect.ComplexType* @.type.SwitchStmtTest) ; <%tart.core.Object*> [#uses=2]
-  ret i32 3
+  call void @llvm.dbg.value(metadata !6, i64 0, metadata !7), !dbg !9
+  ret i32 0, !dbg !10
 }
 
+declare void @llvm.dbg.declare(metadata, metadata) nounwind readnone
+
 declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
-declare void @"tart.reflect.ComplexType.create->tart.core.Object"(%tart.reflect.ComplexType*) nounwind readnone
 
-!0 = metadata !{i32 458769, i32 0, i32 1, metadata !"sm.c", metadata !"/Volumes/MacOS9/tests/", metadata !"4.2.1 (Based on Apple Inc. build 5658) (LLVM build)", i1 true, i1 false, metadata !"", i32 0} ; [ DW_TAG_compile_unit ]
-!1 = metadata !{i32 458790, metadata !0, metadata !"", metadata !0, i32 0, i64 192, i64 64, i64 0, i32 0, metadata !2} ; [ DW_TAG_const_type ]
-!2 = metadata !{i32 458771, metadata !0, metadata !"C", metadata !0, i32 1, i64 192, i64 64, i64 0, i32 0, null, metadata !3, i32 0, null} ; [ DW_TAG_structure_type ]
-!3 = metadata !{metadata !4, metadata !6, metadata !7}
-!4 = metadata !{i32 458765, metadata !2, metadata !"x", metadata !0, i32 1, i64 64, i64 64, i64 0, i32 0, metadata !5} ; [ DW_TAG_member ]
-!5 = metadata !{i32 458788, metadata !0, metadata !"double", metadata !0, i32 0, i64 64, i64 64, i64 0, i32 0, i32 4} ; [ DW_TAG_base_type ]
-!6 = metadata !{i32 458765, metadata !2, metadata !"y", metadata !0, i32 1, i64 64, i64 64, i64 64, i32 0, metadata !5} ; [ DW_TAG_member ]
-!7 = metadata !{i32 458765, metadata !2, metadata !"z", metadata !0, i32 1, i64 64, i64 64, i64 128, i32 0, metadata !5} ; [ DW_TAG_member ]
-!8 = metadata !{i32 459008, metadata !9, metadata !"t", metadata !0, i32 5, metadata !2} ; [ DW_TAG_auto_variable ]
-!9 = metadata !{i32 458763, metadata !10}        ; [ DW_TAG_lexical_block ]
-!10 = metadata !{i32 458798, i32 0, metadata !0, metadata !"foo", metadata !"foo", metadata !"foo", metadata !0, i32 4, metadata !11, i1 false, i1 true, i32 0, i32 0, null} ; [ DW_TAG_subprogram ]
-!11 = metadata !{i32 458773, metadata !0, metadata !"", metadata !0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !12, i32 0, null} ; [ DW_TAG_subroutine_type ]
-!12 = metadata !{metadata !13}
-!13 = metadata !{i32 458788, metadata !0, metadata !"int", metadata !0, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
-!14 = metadata !{%tart.reflect.ComplexType* @.type.SwitchStmtTest}
+!llvm.dbg.sp = !{!0}
+
+!0 = metadata !{i32 589870, i32 0, metadata !1, metadata !"main", metadata !"main", metadata !"", metadata !1, i32 2, metadata !3, i1 false, i1 true, i32 0, i32 0, null, i32 0, i1 false, i32 ()* @main} ; [ DW_TAG_subprogram ]
+!1 = metadata !{i32 589865, metadata !"/tmp/x.c", metadata !"/Users/manav", metadata !2} ; [ DW_TAG_file_type ]
+!2 = metadata !{i32 589841, i32 0, i32 12, metadata !"/tmp/x.c", metadata !"/Users/manav", metadata !"clang version 2.9 (trunk 120996)", i1 true, i1 false, metadata !"", i32 0} ; [ DW_TAG_compile_unit ]
+!3 = metadata !{i32 589845, metadata !1, metadata !"", metadata !1, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !4, i32 0, null} ; [ DW_TAG_subroutine_type ]
+!4 = metadata !{metadata !5}
+!5 = metadata !{i32 589860, metadata !2, metadata !"int", metadata !1, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
+!6 = metadata !{i32 0}
+!7 = metadata !{i32 590080, metadata !8, metadata !"i", metadata !1, i32 3, metadata !5, i32 0} ; [ DW_TAG_auto_variable ]
+!8 = metadata !{i32 589835, metadata !0, i32 2, i32 12, metadata !1, i32 0} ; [ DW_TAG_lexical_block ]
+!9 = metadata !{i32 3, i32 11, metadata !8, null}
+!10 = metadata !{i32 4, i32 2, metadata !8, null}
diff --git a/test/CodeGen/X86/2010-04-23-mmx-movdq2q.ll b/test/CodeGen/X86/2010-04-23-mmx-movdq2q.ll
index 4cd3be35e820..fa3d5fbcdc48 100644
--- a/test/CodeGen/X86/2010-04-23-mmx-movdq2q.ll
+++ b/test/CodeGen/X86/2010-04-23-mmx-movdq2q.ll
@@ -1,12 +1,12 @@
 ; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+mmx,+sse2 | FileCheck %s
+; There are no MMX operations here, so we use XMM or i64.
 
 define void @ti8(double %a, double %b) nounwind {
 entry:
         %tmp1 = bitcast double %a to <8 x i8>
-; CHECK: movdq2q
         %tmp2 = bitcast double %b to <8 x i8>
-; CHECK: movdq2q
         %tmp3 = add <8 x i8> %tmp1, %tmp2
+; CHECK:  paddb %xmm1, %xmm0
         store <8 x i8> %tmp3, <8 x i8>* null
         ret void
 }
@@ -14,10 +14,9 @@ entry:
 define void @ti16(double %a, double %b) nounwind {
 entry:
         %tmp1 = bitcast double %a to <4 x i16>
-; CHECK: movdq2q
         %tmp2 = bitcast double %b to <4 x i16>
-; CHECK: movdq2q
         %tmp3 = add <4 x i16> %tmp1, %tmp2
+; CHECK:  paddw %xmm1, %xmm0
         store <4 x i16> %tmp3, <4 x i16>* null
         ret void
 }
@@ -25,10 +24,9 @@ entry:
 define void @ti32(double %a, double %b) nounwind {
 entry:
         %tmp1 = bitcast double %a to <2 x i32>
-; CHECK: movdq2q
         %tmp2 = bitcast double %b to <2 x i32>
-; CHECK: movdq2q
         %tmp3 = add <2 x i32> %tmp1, %tmp2
+; CHECK:  paddd %xmm1, %xmm0
         store <2 x i32> %tmp3, <2 x i32>* null
         ret void
 }
@@ -36,10 +34,60 @@ entry:
 define void @ti64(double %a, double %b) nounwind {
 entry:
         %tmp1 = bitcast double %a to <1 x i64>
-; CHECK: movdq2q
         %tmp2 = bitcast double %b to <1 x i64>
-; CHECK: movdq2q
         %tmp3 = add <1 x i64> %tmp1, %tmp2
+; CHECK:  addq  %rax, %rcx
         store <1 x i64> %tmp3, <1 x i64>* null
         ret void
 }
+
+; MMX intrinsics calls get us MMX instructions.
+
+define void @ti8a(double %a, double %b) nounwind {
+entry:
+        %tmp1 = bitcast double %a to x86_mmx
+; CHECK: movdq2q
+        %tmp2 = bitcast double %b to x86_mmx
+; CHECK: movdq2q
+        %tmp3 = tail call x86_mmx @llvm.x86.mmx.padd.b(x86_mmx %tmp1, x86_mmx %tmp2)
+        store x86_mmx %tmp3, x86_mmx* null
+        ret void
+}
+
+define void @ti16a(double %a, double %b) nounwind {
+entry:
+        %tmp1 = bitcast double %a to x86_mmx
+; CHECK: movdq2q
+        %tmp2 = bitcast double %b to x86_mmx
+; CHECK: movdq2q
+        %tmp3 = tail call x86_mmx @llvm.x86.mmx.padd.w(x86_mmx %tmp1, x86_mmx %tmp2)
+        store x86_mmx %tmp3, x86_mmx* null
+        ret void
+}
+
+define void @ti32a(double %a, double %b) nounwind {
+entry:
+        %tmp1 = bitcast double %a to x86_mmx
+; CHECK: movdq2q
+        %tmp2 = bitcast double %b to x86_mmx
+; CHECK: movdq2q
+        %tmp3 = tail call x86_mmx @llvm.x86.mmx.padd.d(x86_mmx %tmp1, x86_mmx %tmp2)
+        store x86_mmx %tmp3, x86_mmx* null
+        ret void
+}
+
+define void @ti64a(double %a, double %b) nounwind {
+entry:
+        %tmp1 = bitcast double %a to x86_mmx
+; CHECK: movdq2q
+        %tmp2 = bitcast double %b to x86_mmx
+; CHECK: movdq2q
+        %tmp3 = tail call x86_mmx @llvm.x86.mmx.padd.q(x86_mmx %tmp1, x86_mmx %tmp2)
+        store x86_mmx %tmp3, x86_mmx* null
+        ret void
+}
+ 
+declare x86_mmx @llvm.x86.mmx.padd.b(x86_mmx, x86_mmx)
+declare x86_mmx @llvm.x86.mmx.padd.w(x86_mmx, x86_mmx)
+declare x86_mmx @llvm.x86.mmx.padd.d(x86_mmx, x86_mmx)
+declare x86_mmx @llvm.x86.mmx.padd.q(x86_mmx, x86_mmx)
diff --git a/test/CodeGen/X86/2010-04-30-LocalAlloc-LandingPad.ll b/test/CodeGen/X86/2010-04-30-LocalAlloc-LandingPad.ll
index e20f1d8c79ce..3738f802e95a 100644
--- a/test/CodeGen/X86/2010-04-30-LocalAlloc-LandingPad.ll
+++ b/test/CodeGen/X86/2010-04-30-LocalAlloc-LandingPad.ll
@@ -11,7 +11,7 @@ target triple = "i386-apple-darwin10.0.0"
 ; Verify that %esi gets spilled before the call.
 ; CHECK: Z4test1SiS
 ; CHECK: movl %esi,{{.*}}(%ebp) 
-; CHECK: call __Z6throwsv
+; CHECK: calll __Z6throwsv
 
 define i8* @_Z4test1SiS_(%struct.S* byval %s1, i32 %n, %struct.S* byval %s2) ssp {
 entry:
diff --git a/test/CodeGen/X86/2010-05-25-DotDebugLoc.ll b/test/CodeGen/X86/2010-05-25-DotDebugLoc.ll
index d2115496f8f4..f9bda7f1007e 100644
--- a/test/CodeGen/X86/2010-05-25-DotDebugLoc.ll
+++ b/test/CodeGen/X86/2010-05-25-DotDebugLoc.ll
@@ -1,5 +1,8 @@
-; RUN: llc -march=x86-64 -O2 < %s | grep debug_loc12
-; Test to check .debug_loc support. This test case emits 13 debug_loc entries.
+; RUN: llc -march=x86-64 -O2 < %s | FileCheck %s
+; Test to check .debug_loc support. This test case emits many debug_loc entries.
+
+; CHECK: Loc expr size
+; CHECK-NEXT: DW_OP_reg
 
 %0 = type { double }
 
diff --git a/test/CodeGen/X86/2010-05-26-DotDebugLoc.ll b/test/CodeGen/X86/2010-05-26-DotDebugLoc.ll
index 13f72a99d2b2..60171eb62973 100644
--- a/test/CodeGen/X86/2010-05-26-DotDebugLoc.ll
+++ b/test/CodeGen/X86/2010-05-26-DotDebugLoc.ll
@@ -61,6 +61,6 @@ declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
 ; CHECK-NEXT: .short  1
 ; CHECK-NEXT: .byte   85
 ; CHECK-NEXT: .quad   Ltmp3
-; CHECK-NEXT: .quad   Lfunc_end
+; CHECK-NEXT: .quad   Ltmp6
 ; CHECK-NEXT: .short  1
 ; CHECK-NEXT: .byte   83
diff --git a/test/CodeGen/X86/2010-05-28-Crash.ll b/test/CodeGen/X86/2010-05-28-Crash.ll
index 80643d0792ac..ad8546ef8ce8 100644
--- a/test/CodeGen/X86/2010-05-28-Crash.ll
+++ b/test/CodeGen/X86/2010-05-28-Crash.ll
@@ -39,6 +39,6 @@ entry:
 !13 = metadata !{i32 7, i32 0, metadata !14, null}
 !14 = metadata !{i32 524299, metadata !8, i32 6, i32 0} ; [ DW_TAG_lexical_block ]
 
-;CHECK:	        DEBUG_VALUE: bar:x <- EBX+0
-;CHECK-NEXT:Ltmp
-;CHECK-NEXT:	DEBUG_VALUE: foo:y <- 1+0
+;CHECK: DEBUG_VALUE: bar:x <- E
+;CHECK: Ltmp
+;CHECK:	DEBUG_VALUE: foo:y <- 1+0
diff --git a/test/CodeGen/X86/2010-06-25-CoalescerSubRegDefDead.ll b/test/CodeGen/X86/2010-06-25-CoalescerSubRegDefDead.ll
index c6421a247eaa..6db3ce1f42c0 100644
--- a/test/CodeGen/X86/2010-06-25-CoalescerSubRegDefDead.ll
+++ b/test/CodeGen/X86/2010-06-25-CoalescerSubRegDefDead.ll
@@ -1,4 +1,4 @@
-; RUN: llc -O1 -mtriple=x86_64-apple-darwin10 -relocation-model=pic -disable-fp-elim < %s | FileCheck %s
+; RUN: llc -O1 -mtriple=x86_64-unknown-linux-gnu -relocation-model=pic -disable-fp-elim < %s | FileCheck %s
 ; <rdar://problem/8124405>
 
 %struct.type = type { %struct.subtype*, i32, i8, i32, i8, i32, i32, i32, i32, i32, i8, i32, i32, i32, i32, i32, [256 x i32], i32, [257 x i32], [257 x i32], i32*, i16*, i8*, i32, i32, i32, i32, i32, [256 x i8], [16 x i8], [256 x i8], [4096 x i8], [16 x i32], [18002 x i8], [18002 x i8], [6 x [258 x i8]], [6 x [258 x i32]], [6 x [258 x i32]], [6 x [258 x i32]], [6 x i32], i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32*, i32*, i32* }
@@ -21,9 +21,9 @@ bb:
 ; statement. It can be an ADD or LEA instruction, it's not important which one
 ; it is.
 ;
-;      CHECK: ## %bb
-; CHECK-NEXT: addq $64036, %rdi
-;      CHECK: rep;stosl
+; CHECK: # %bb
+; CHECK: addq $64036, %rdi
+; CHECK: rep;stosl
 
   %tmp5 = bitcast i32* %tmp4 to i8*
   call void @llvm.memset.p0i8.i64(i8* %tmp5, i8 0, i64 84, i32 4, i1 false)
diff --git a/test/CodeGen/X86/2010-07-02-asm-alignstack.ll b/test/CodeGen/X86/2010-07-02-asm-alignstack.ll
index cb47d208dd44..0bbb24f6ecdf 100644
--- a/test/CodeGen/X86/2010-07-02-asm-alignstack.ll
+++ b/test/CodeGen/X86/2010-07-02-asm-alignstack.ll
@@ -3,7 +3,7 @@
 define void @foo() nounwind ssp {
 entry:
 ; CHECK: foo
-; CHECK: subq $8, %rsp
+; CHECK: pushq
 ; CHECK: int $3
   call void asm sideeffect alignstack "# top of block", "~{dirflag},~{fpsr},~{flags},~{edi},~{esi},~{edx},~{ecx},~{eax}"() nounwind
   call void asm sideeffect alignstack ".file \22small.c\22", "~{dirflag},~{fpsr},~{flags}"() nounwind
@@ -18,7 +18,7 @@ return:                                           ; preds = %entry
 define void @bar() nounwind ssp {
 entry:
 ; CHECK: bar
-; CHECK-NOT: subq $8, %rsp
+; CHECK-NOT: pushq
 ; CHECK: int $3
   call void asm sideeffect "# top of block", "~{dirflag},~{fpsr},~{flags},~{edi},~{esi},~{edx},~{ecx},~{eax}"() nounwind
   call void asm sideeffect ".file \22small.c\22", "~{dirflag},~{fpsr},~{flags}"() nounwind
diff --git a/test/CodeGen/X86/2010-09-16-EmptyFilename.ll b/test/CodeGen/X86/2010-09-16-EmptyFilename.ll
new file mode 100644
index 000000000000..bed8c8a77b9a
--- /dev/null
+++ b/test/CodeGen/X86/2010-09-16-EmptyFilename.ll
@@ -0,0 +1,29 @@
+; RUN: llc -O0 -mtriple=x86_64-apple-darwin10 < %s - | FileCheck %s
+; Radar 8286101
+; CHECK: .file   2 "<stdin>"
+
+define i32 @foo() nounwind ssp {
+entry:
+  ret i32 42, !dbg !8
+}
+
+define i32 @bar() nounwind ssp {
+entry:
+  ret i32 21, !dbg !10
+}
+
+!llvm.dbg.sp = !{!0, !6}
+
+!0 = metadata !{i32 524334, i32 0, metadata !1, metadata !"foo", metadata !"foo", metadata !"foo", metadata !1, i32 53, metadata !3, i1 false, i1 true, i32 0, i32 0, null, i1 false, i1 false, i32 ()* @foo} ; [ DW_TAG_subprogram ]
+!1 = metadata !{i32 524329, metadata !"", metadata !"/private/tmp", metadata !2} ; [ DW_TAG_file_type ]
+!2 = metadata !{i32 524305, i32 0, i32 12, metadata !"bug.c", metadata !"/private/tmp", metadata !"clang version 2.9 (trunk 114084)", i1 true, i1 false, metadata !"", i32 0} ; [ DW_TAG_compile_unit ]
+!3 = metadata !{i32 524309, metadata !1, metadata !"", metadata !1, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !4, i32 0, null} ; [ DW_TAG_subroutine_type ]
+!4 = metadata !{metadata !5}
+!5 = metadata !{i32 524324, metadata !1, metadata !"int", metadata !1, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
+!6 = metadata !{i32 524334, i32 0, metadata !7, metadata !"bar", metadata !"bar", metadata !"bar", metadata !7, i32 4, metadata !3, i1 false, i1 true, i32 0, i32 0, null, i1 false, i1 false, i32 ()* @bar} ; [ DW_TAG_subprogram ]
+!7 = metadata !{i32 524329, metadata !"bug.c", metadata !"/private/tmp", metadata !2} ; [ DW_TAG_file_type ]
+!8 = metadata !{i32 53, i32 13, metadata !9, null}
+!9 = metadata !{i32 524299, metadata !0, i32 53, i32 11, metadata !1, i32 0} ; [ DW_TAG_lexical_block ]
+!10 = metadata !{i32 4, i32 13, metadata !11, null}
+!11 = metadata !{i32 524299, metadata !12, i32 4, i32 13, metadata !7, i32 2} ; [ DW_TAG_lexical_block ]
+!12 = metadata !{i32 524299, metadata !6, i32 4, i32 11, metadata !7, i32 1} ; [ DW_TAG_lexical_block ]
diff --git a/test/CodeGen/X86/2010-09-16-asmcrash.ll b/test/CodeGen/X86/2010-09-16-asmcrash.ll
new file mode 100644
index 000000000000..9bbd6919421f
--- /dev/null
+++ b/test/CodeGen/X86/2010-09-16-asmcrash.ll
@@ -0,0 +1,56 @@
+; RUN: llc < %s -mtriple=x86_64-unknown-freebsd8.1 -o /dev/null
+; This formerly crashed, PR 8154.
+
+module asm ".weak sem_close"
+module asm ".equ sem_close, _sem_close"
+module asm ".weak sem_destroy"
+module asm ".equ sem_destroy, _sem_destroy"
+module asm ".weak sem_getvalue"
+module asm ".equ sem_getvalue, _sem_getvalue"
+module asm ".weak sem_init"
+module asm ".equ sem_init, _sem_init"
+module asm ".weak sem_open"
+module asm ".equ sem_open, _sem_open"
+module asm ".weak sem_post"
+module asm ".equ sem_post, _sem_post"
+module asm ".weak sem_timedwait"
+module asm ".equ sem_timedwait, _sem_timedwait"
+module asm ".weak sem_trywait"
+module asm ".equ sem_trywait, _sem_trywait"
+module asm ".weak sem_unlink"
+module asm ".equ sem_unlink, _sem_unlink"
+module asm ".weak sem_wait"
+module asm ".equ sem_wait, _sem_wait"
+
+%struct._sem = type { i32, %struct._usem }
+%struct._usem = type { i32, i32, i32 }
+
+define void @_sem_timedwait(%struct._sem* noalias %sem) nounwind ssp {
+entry:
+  br i1 undef, label %while.cond.preheader, label %sem_check_validity.exit
+
+while.cond.preheader:                             ; preds = %entry
+  %tmp4 = getelementptr inbounds %struct._sem* %sem, i64 0, i32 1, i32 1
+  br label %while.cond
+
+sem_check_validity.exit:                          ; preds = %entry
+  ret void
+
+while.cond:                                       ; preds = %while.body, %while.cond.preheader
+  br i1 undef, label %while.body, label %while.end
+
+while.body:                                       ; preds = %while.cond
+  %0 = call i8 asm sideeffect "\09lock ; \09\09\09cmpxchgl $2,$1 ;\09       sete\09$0 ;\09\091:\09\09\09\09# atomic_cmpset_int", "={ax},=*m,r,{ax},*m,~{memory},~{dirflag},~{fpsr},~{flags}"(i32* %tmp4, i32 undef, i32 undef, i32* %tmp4) nounwind, !srcloc !0
+  br i1 undef, label %while.cond, label %return
+
+while.end:                                        ; preds = %while.cond
+  br i1 undef, label %if.end18, label %return
+
+if.end18:                                         ; preds = %while.end
+  unreachable
+
+return:                                           ; preds = %while.end, %while.body
+  ret void
+}
+
+!0 = metadata !{i32 158484}
diff --git a/test/CodeGen/X86/2010-09-17-SideEffectsInChain.ll b/test/CodeGen/X86/2010-09-17-SideEffectsInChain.ll
new file mode 100644
index 000000000000..8fe0309421e5
--- /dev/null
+++ b/test/CodeGen/X86/2010-09-17-SideEffectsInChain.ll
@@ -0,0 +1,26 @@
+; RUN: llc < %s -combiner-alias-analysis -march=x86-64 | FileCheck %s
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
+target triple = "x86_64-apple-darwin10.4"
+declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i32, i1) nounwind
+
+define fastcc i32 @cli_magic_scandesc(i8* %in) nounwind ssp {
+entry:
+  %a = alloca [64 x i8]
+  %b = getelementptr inbounds [64 x i8]* %a, i64 0, i32 0
+  %c = getelementptr inbounds [64 x i8]* %a, i64 0, i32 30
+  %d = load i8* %b, align 8
+  %e = load i8* %c, align 8
+  %f = bitcast [64 x i8]* %a to i8*
+  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %f, i8* %in, i64 64, i32 8, i1 false) nounwind
+  store i8 %d, i8* %b, align 8
+  store i8 %e, i8* %c, align 8
+  ret i32 0
+}
+
+; CHECK: movq	___stack_chk_guard@GOTPCREL(%rip), %rax
+; CHECK: movb	30(%rsp), %dl
+; CHECK: movb	(%rsp), %sil
+; CHECK: movb	%sil, (%rsp)
+; CHECK: movb	%dl, 30(%rsp)
+; CHECK: callq	___stack_chk_fail
diff --git a/test/CodeGen/X86/2010-09-30-CMOV-JumpTable-PHI.ll b/test/CodeGen/X86/2010-09-30-CMOV-JumpTable-PHI.ll
new file mode 100644
index 000000000000..cae81d086ea1
--- /dev/null
+++ b/test/CodeGen/X86/2010-09-30-CMOV-JumpTable-PHI.ll
@@ -0,0 +1,71 @@
+; RUN: llc -verify-machineinstrs -cgp-critical-edge-splitting=0 -mcpu=i386 < %s
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:32:32-n8:16:32"
+target triple = "i386-pc-linux-gnu"
+
+; The bb.i basic block gets split while emitting the schedule because
+; -mcpu=i386 doesn't have CMOV.'
+;
+; That causes the PHI to be updated wrong because the jumptable data structure is remembering the original MBB.
+;
+; -cgp-critical-edge-splitting=0 prevents the edge to PHI from being split.
+
+@.str146 = external constant [4 x i8], align 1
+@.str706 = external constant [4 x i8], align 1
+@.str1189 = external constant [5 x i8], align 1
+
+declare i32 @memcmp(i8* nocapture, i8* nocapture, i32) nounwind readonly
+declare i32 @strlen(i8* nocapture) nounwind readonly
+
+define hidden zeroext i8 @f(i8* %this, i8* %Name.0, i32 %Name.1, i8* noalias %NameLoc, i8* %Operands) nounwind align 2 {
+bb.i:
+  %0 = icmp eq i8 undef, 0
+  %iftmp.285.0 = select i1 %0, i8* getelementptr inbounds ([5 x i8]* @.str1189, i32 0, i32 0), i8* getelementptr inbounds ([4 x i8]* @.str706, i32 0, i32 0)
+  %1 = call i32 @strlen(i8* %iftmp.285.0) nounwind readonly
+  switch i32 %Name.1, label %_ZNK4llvm12StringSwitchINS_9StringRefES1_E7DefaultERKS1_.exit [
+    i32 3, label %bb1.i
+    i32 4, label %bb1.i1237
+    i32 5, label %bb1.i1266
+    i32 6, label %bb1.i1275
+    i32 2, label %bb1.i1434
+    i32 8, label %bb1.i1523
+    i32 7, label %bb1.i1537
+  ]
+
+bb1.i:                                            ; preds = %bb.i
+  unreachable
+
+bb1.i1237:                                        ; preds = %bb.i
+  br i1 undef, label %bb.i1820, label %bb1.i1241
+
+bb1.i1241:                                        ; preds = %bb1.i1237
+  unreachable
+
+bb1.i1266:                                        ; preds = %bb.i
+  unreachable
+
+bb1.i1275:                                        ; preds = %bb.i
+  unreachable
+
+bb1.i1434:                                        ; preds = %bb.i
+  unreachable
+
+bb1.i1523:                                        ; preds = %bb.i
+  unreachable
+
+bb1.i1537:                                        ; preds = %bb.i
+  unreachable
+
+bb.i1820:                                         ; preds = %bb1.i1237
+  br label %_ZNK4llvm12StringSwitchINS_9StringRefES1_E7DefaultERKS1_.exit
+
+_ZNK4llvm12StringSwitchINS_9StringRefES1_E7DefaultERKS1_.exit: ; preds = %bb.i1820, %bb.i
+  %PatchedName.0.0 = phi i8* [ undef, %bb.i1820 ], [ %Name.0, %bb.i ]
+  br i1 undef, label %bb141, label %_ZNK4llvm9StringRef10startswithES0_.exit
+
+_ZNK4llvm9StringRef10startswithES0_.exit:         ; preds = %_ZNK4llvm12StringSwitchINS_9StringRefES1_E7DefaultERKS1_.exit
+  %2 = call i32 @memcmp(i8* %PatchedName.0.0, i8* getelementptr inbounds ([4 x i8]* @.str146, i32 0, i32 0), i32 3) nounwind readonly
+  unreachable
+
+bb141:                                            ; preds = %_ZNK4llvm12StringSwitchINS_9StringRefES1_E7DefaultERKS1_.exit
+  unreachable
+}
diff --git a/test/CodeGen/X86/2010-10-08-cmpxchg8b.ll b/test/CodeGen/X86/2010-10-08-cmpxchg8b.ll
new file mode 100644
index 000000000000..40e7f017dc3d
--- /dev/null
+++ b/test/CodeGen/X86/2010-10-08-cmpxchg8b.ll
@@ -0,0 +1,28 @@
+; RUN: llc < %s -march=x86 -mtriple=i386-apple-darwin | FileCheck %s
+; PR8297
+;
+; On i386, i64 cmpxchg is lowered during legalize types to extract the
+; 64-bit result into a pair of fixed regs. So creation of the DAG node
+; happens in a different place. See
+; X86TargetLowering::ReplaceNodeResults, case ATOMIC_CMP_SWAP.
+;
+; Neither Atomic-xx.ll nor atomic_op.ll cover this. Those tests were
+; autogenerated from C source before 64-bit variants were supported.
+;
+; Note that this case requires a loop around the cmpxchg to force
+; machine licm to query alias anlysis, exposing a bad
+; MachineMemOperand.
+define void @foo(i64* %ptr) nounwind inlinehint {
+entry:
+  br label %loop
+loop:
+; CHECK: lock
+; CHECK-NEXT: cmpxchg8b
+  %r = call i64 @llvm.atomic.cmp.swap.i64.p0i64(i64* %ptr, i64 0, i64 1)
+  %stored1  = icmp eq i64 %r, 0
+  br i1 %stored1, label %loop, label %continue
+continue:
+  ret void
+}
+
+declare i64 @llvm.atomic.cmp.swap.i64.p0i64(i64* nocapture, i64, i64) nounwind
diff --git a/test/CodeGen/X86/2010-11-02-DbgParameter.ll b/test/CodeGen/X86/2010-11-02-DbgParameter.ll
new file mode 100644
index 000000000000..79c0cf35c660
--- /dev/null
+++ b/test/CodeGen/X86/2010-11-02-DbgParameter.ll
@@ -0,0 +1,35 @@
+; RUN: llc -O2 -asm-verbose < %s | FileCheck %s
+; Radar 8616981
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128-n8:16:32"
+target triple = "i386-apple-darwin11.0.0"
+
+%struct.bar = type { i32, i32 }
+
+define i32 @foo(%struct.bar* nocapture %i) nounwind readnone optsize noinline ssp {
+; CHECK: TAG_formal_parameter
+entry:
+  tail call void @llvm.dbg.value(metadata !{%struct.bar* %i}, i64 0, metadata !6), !dbg !12
+  ret i32 1, !dbg !13
+}
+
+declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
+
+!llvm.dbg.sp = !{!0}
+!llvm.dbg.lv.foo = !{!6}
+
+!0 = metadata !{i32 589870, i32 0, metadata !1, metadata !"foo", metadata !"foo", metadata !"", metadata !1, i32 3, metadata !3, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 true, i32 (%struct.bar*)* @foo} ; [ DW_TAG_subprogram ]
+!1 = metadata !{i32 589865, metadata !"one.c", metadata !"/private/tmp", metadata !2} ; [ DW_TAG_file_type ]
+!2 = metadata !{i32 589841, i32 0, i32 12, metadata !"one.c", metadata !"/private/tmp", metadata !"clang version 2.9 (trunk 117922)", i1 true, i1 true, metadata !"", i32 0} ; [ DW_TAG_compile_unit ]
+!3 = metadata !{i32 589845, metadata !1, metadata !"", metadata !1, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !4, i32 0, null} ; [ DW_TAG_subroutine_type ]
+!4 = metadata !{metadata !5}
+!5 = metadata !{i32 589860, metadata !2, metadata !"int", metadata !1, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
+!6 = metadata !{i32 590081, metadata !0, metadata !"i", metadata !1, i32 3, metadata !7, i32 0} ; [ DW_TAG_arg_variable ]
+!7 = metadata !{i32 589839, metadata !1, metadata !"", metadata !1, i32 0, i64 32, i64 32, i64 0, i32 0, metadata !8} ; [ DW_TAG_pointer_type ]
+!8 = metadata !{i32 589843, metadata !1, metadata !"bar", metadata !1, i32 2, i64 64, i64 32, i64 0, i32 0, null, metadata !9, i32 0, null} ; [ DW_TAG_structure_type ]
+!9 = metadata !{metadata !10, metadata !11}
+!10 = metadata !{i32 589837, metadata !1, metadata !"x", metadata !1, i32 2, i64 32, i64 32, i64 0, i32 0, metadata !5} ; [ DW_TAG_member ]
+!11 = metadata !{i32 589837, metadata !1, metadata !"y", metadata !1, i32 2, i64 32, i64 32, i64 32, i32 0, metadata !5} ; [ DW_TAG_member ]
+!12 = metadata !{i32 3, i32 47, metadata !0, null}
+!13 = metadata !{i32 4, i32 2, metadata !14, null}
+!14 = metadata !{i32 589835, metadata !0, i32 3, i32 50, metadata !1, i32 0} ; [ DW_TAG_lexical_block ]
diff --git a/test/CodeGen/X86/2010-11-09-MOVLPS.ll b/test/CodeGen/X86/2010-11-09-MOVLPS.ll
new file mode 100644
index 000000000000..2368f3f69195
--- /dev/null
+++ b/test/CodeGen/X86/2010-11-09-MOVLPS.ll
@@ -0,0 +1,66 @@
+; RUN: llc < %s -march=x86-64 -O0
+; PR8211
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
+target triple = "x86_64-unknown-linux-gnu"
+
+module asm "\09.ident\09\22GCC: (GNU) 4.5.2 20100914 (prerelease) LLVM: 114628\22"
+
+%0 = type { %"int[]" }
+%float = type float
+%"float[]" = type [4 x float]
+%int = type i32
+%"int[]" = type [4 x i32]
+%"long unsigned int" = type i64
+
+define void @swizzle(i8* %a, %0* %b, %0* %c) nounwind {
+entry:
+  %a_addr = alloca i8*
+  %b_addr = alloca %0*
+  %c_addr = alloca %0*
+  %"alloca point" = bitcast i32 0 to i32
+  store i8* %a, i8** %a_addr
+  store %0* %b, %0** %b_addr
+  store %0* %c, %0** %c_addr
+  %0 = load i8** %a_addr, align 64
+  %1 = load %0** %b_addr, align 64
+  %2 = load %0** %c_addr, align 64
+  %"ssa point" = bitcast i32 0 to i32
+  br label %"2"
+
+"2":                                              ; preds = %entry
+  %3 = bitcast i8* %0 to <2 x i32>*
+  %4 = getelementptr inbounds %0* %1, i32 0, i32 0
+  %5 = bitcast %"int[]"* %4 to <4 x float>*
+  %6 = load <4 x float>* %5, align 16
+  %7 = bitcast <2 x i32>* %3 to <2 x float>*
+  %8 = bitcast <2 x float>* %7 to double*
+  %9 = load double* %8
+  %10 = insertelement <2 x double> undef, double %9, i32 0
+  %11 = insertelement <2 x double> %10, double undef, i32 1
+  %12 = bitcast <2 x double> %11 to <4 x float>
+  %13 = shufflevector <4 x float> %6, <4 x float> %12, <4 x i32> <i32 4, i32 5, i32 2, i32 3>
+  %14 = getelementptr inbounds %0* %1, i32 0, i32 0
+  %15 = bitcast %"int[]"* %14 to <4 x float>*
+  store <4 x float> %13, <4 x float>* %15, align 16
+  %16 = bitcast i8* %0 to <2 x i32>*
+  %17 = bitcast <2 x i32>* %16 to i8*
+  %18 = getelementptr i8* %17, i64 8
+  %19 = bitcast i8* %18 to <2 x i32>*
+  %20 = getelementptr inbounds %0* %2, i32 0, i32 0
+  %21 = bitcast %"int[]"* %20 to <4 x float>*
+  %22 = load <4 x float>* %21, align 16
+  %23 = bitcast <2 x i32>* %19 to <2 x float>*
+  %24 = bitcast <2 x float>* %23 to double*
+  %25 = load double* %24
+  %26 = insertelement <2 x double> undef, double %25, i32 0
+  %27 = insertelement <2 x double> %26, double undef, i32 1
+  %28 = bitcast <2 x double> %27 to <4 x float>
+  %29 = shufflevector <4 x float> %22, <4 x float> %28, <4 x i32> <i32 4, i32 5, i32 2, i32 3>
+  %30 = getelementptr inbounds %0* %2, i32 0, i32 0
+  %31 = bitcast %"int[]"* %30 to <4 x float>*
+  store <4 x float> %29, <4 x float>* %31, align 16
+  br label %return
+
+return:                                           ; preds = %"2"
+  ret void
+}
diff --git a/test/CodeGen/X86/2010-11-18-SelectOfExtload.ll b/test/CodeGen/X86/2010-11-18-SelectOfExtload.ll
new file mode 100644
index 000000000000..a1074b6b8f3c
--- /dev/null
+++ b/test/CodeGen/X86/2010-11-18-SelectOfExtload.ll
@@ -0,0 +1,15 @@
+; RUN: llc < %s -march=x86 | FileCheck %s
+; Both values were being zero extended.
+@u = external global i8
+@s = external global i8
+define i32 @foo(i1 %cond) {
+; CHECK: @foo
+  %u_base = load i8* @u
+  %u_val = zext i8 %u_base to i32
+; CHECK: movzbl
+; CHECK: movsbl
+  %s_base = load i8* @s
+  %s_val = sext i8 %s_base to i32
+  %val = select i1 %cond, i32 %u_val, i32 %s_val
+  ret i32 %val
+}
diff --git a/test/CodeGen/X86/2010-12-02-MC-Set.ll b/test/CodeGen/X86/2010-12-02-MC-Set.ll
new file mode 100644
index 000000000000..31446786ec15
--- /dev/null
+++ b/test/CodeGen/X86/2010-12-02-MC-Set.ll
@@ -0,0 +1,22 @@
+; RUN: llc < %s -disable-dot-loc -mtriple=x86_64-apple-darwin -O0 | FileCheck %s
+
+
+define void @foo() nounwind ssp {
+entry:
+  ret void, !dbg !5
+}
+
+!llvm.dbg.sp = !{!0}
+
+!0 = metadata !{i32 589870, i32 0, metadata !1, metadata !"foo", metadata !"foo", metadata !"", metadata !1, i32 3, metadata !3, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, void ()* @foo} ; [ DW_TAG_subprogram ]
+!1 = metadata !{i32 589865, metadata !"e.c", metadata !"/private/tmp", metadata !2} ; [ DW_TAG_file_type ]
+!2 = metadata !{i32 589841, i32 0, i32 12, metadata !"e.c", metadata !"/private/tmp", metadata !"clang version 2.9 (trunk 120563)", i1 true, i1 false, metadata !"", i32 0} ; [ DW_TAG_compile_unit ]
+!3 = metadata !{i32 589845, metadata !1, metadata !"", metadata !1, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !4, i32 0, null} ; [ DW_TAG_subroutine_type ]
+!4 = metadata !{null}
+!5 = metadata !{i32 5, i32 1, metadata !6, null}
+!6 = metadata !{i32 589835, metadata !0, i32 3, i32 16, metadata !1, i32 0} ; [ DW_TAG_lexical_block ]
+
+; CHECK: .subsections_via_symbols
+; CHECK-NEXT: __debug_line
+; CHECK-NEXT: Ltmp
+; CHECK-NEXT: Ltmp{{[0-9]}} = (Ltmp
diff --git a/test/CodeGen/X86/2011-01-07-LegalizeTypesCrash.ll b/test/CodeGen/X86/2011-01-07-LegalizeTypesCrash.ll
new file mode 100644
index 000000000000..b9cf65b1e733
--- /dev/null
+++ b/test/CodeGen/X86/2011-01-07-LegalizeTypesCrash.ll
@@ -0,0 +1,19 @@
+; RUN: llc < %s -enable-legalize-types-checking
+; PR8582
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f80:128:128-v64:64:64-v128:128:128-a0:0:64-f80:32:32-n8:16:32"
+target triple = "i686-pc-win32"
+
+define void @test() nounwind {
+ %i17 = icmp eq <4 x i8> undef, zeroinitializer
+ %cond = extractelement <4 x i1> %i17, i32 0
+ %_comp = select i1 %cond, i8 0, i8 undef
+ %merge = insertelement <4 x i8> undef, i8 %_comp, i32 0
+ %cond3 = extractelement <4 x i1> %i17, i32 1
+ %_comp4 = select i1 %cond3, i8 0, i8 undef
+ %merge5 = insertelement <4 x i8> %merge, i8 %_comp4, i32 1
+ %cond8 = extractelement <4 x i1> %i17, i32 2
+ %_comp9 = select i1 %cond8, i8 0, i8 undef
+ %m387 = insertelement <4 x i8> %merge5, i8 %_comp9, i32 2
+ store <4 x i8> %m387, <4 x i8>* undef
+ ret void
+}
diff --git a/test/CodeGen/X86/2011-01-10-DagCombineHang.ll b/test/CodeGen/X86/2011-01-10-DagCombineHang.ll
new file mode 100644
index 000000000000..bf438b82edf8
--- /dev/null
+++ b/test/CodeGen/X86/2011-01-10-DagCombineHang.ll
@@ -0,0 +1,15 @@
+; RUN: llc < %s -mtriple=x86_64-apple-darwin10
+; This formerly got DagCombine into a loop, PR 8916.
+
+define i32 @foo(i64 %x, i64 %y, i64 %z, i32 %a, i32 %b) {
+entry:
+  %"alloca point" = bitcast i32 0 to i32          ; <i32> [#uses=0]
+  %t1 = shl i64 %x, 15
+  %t2 = and i64 %t1, 4294934528
+  %t3 = or i64 %t2, %y
+  %t4 = xor i64 %z, %t3
+  %t5 = trunc i64 %t4 to i32
+  %t6 = add i32 %a, %t5
+  %t7 = add i32 %t6, %b
+  ret i32 %t7
+}
diff --git a/test/CodeGen/X86/2011-01-24-DbgValue-Before-Use.ll b/test/CodeGen/X86/2011-01-24-DbgValue-Before-Use.ll
new file mode 100644
index 000000000000..973975b658a3
--- /dev/null
+++ b/test/CodeGen/X86/2011-01-24-DbgValue-Before-Use.ll
@@ -0,0 +1,103 @@
+; RUN: llc < %s | FileCheck %s
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
+target triple = "x86_64-apple-darwin10.0.0"
+
+; Check debug info for variable z_s
+;CHECK:       .ascii   "z_s"                  ## DW_AT_name
+;CHECK-NEXT:  .byte   0
+;CHECK-NEXT:  ## DW_AT_decl_file
+;CHECK-NEXT:  ## DW_AT_decl_line
+;CHECK-NEXT:  ## DW_AT_type
+;CHECK-NEXT:  ## DW_AT_location
+
+
+@.str1 = private unnamed_addr constant [14 x i8] c"m=%u, z_s=%d\0A\00"
+@str = internal constant [21 x i8] c"Failing test vector:\00"
+
+define i64 @gcd(i64 %a, i64 %b) nounwind readnone optsize noinline ssp {
+entry:
+  tail call void @llvm.dbg.value(metadata !{i64 %a}, i64 0, metadata !10), !dbg !18
+  tail call void @llvm.dbg.value(metadata !{i64 %b}, i64 0, metadata !11), !dbg !19
+  br label %while.body, !dbg !20
+
+while.body:                                       ; preds = %while.body, %entry
+  %b.addr.0 = phi i64 [ %b, %entry ], [ %rem, %while.body ]
+  %a.addr.0 = phi i64 [ %a, %entry ], [ %b.addr.0, %while.body ]
+  %rem = srem i64 %a.addr.0, %b.addr.0, !dbg !21
+  %cmp = icmp eq i64 %rem, 0, !dbg !23
+  br i1 %cmp, label %if.then, label %while.body, !dbg !23
+
+if.then:                                          ; preds = %while.body
+  tail call void @llvm.dbg.value(metadata !{i64 %rem}, i64 0, metadata !12), !dbg !21
+  ret i64 %b.addr.0, !dbg !23
+}
+
+define i32 @main() nounwind optsize ssp {
+entry:
+  %call = tail call i32 @rand() nounwind optsize, !dbg !24
+  tail call void @llvm.dbg.value(metadata !{i32 %call}, i64 0, metadata !14), !dbg !24
+  %cmp = icmp ugt i32 %call, 21, !dbg !25
+  br i1 %cmp, label %cond.true, label %cond.end, !dbg !25
+
+cond.true:                                        ; preds = %entry
+  %call1 = tail call i32 @rand() nounwind optsize, !dbg !25
+  br label %cond.end, !dbg !25
+
+cond.end:                                         ; preds = %entry, %cond.true
+  %cond = phi i32 [ %call1, %cond.true ], [ %call, %entry ], !dbg !25
+  tail call void @llvm.dbg.value(metadata !{i32 %cond}, i64 0, metadata !17), !dbg !25
+  %conv = sext i32 %cond to i64, !dbg !26
+  %conv5 = zext i32 %call to i64, !dbg !26
+  %call6 = tail call i64 @gcd(i64 %conv, i64 %conv5) optsize, !dbg !26
+  %cmp7 = icmp eq i64 %call6, 0
+  br i1 %cmp7, label %return, label %if.then, !dbg !26
+
+if.then:                                          ; preds = %cond.end
+  %puts = tail call i32 @puts(i8* getelementptr inbounds ([21 x i8]* @str, i64 0, i64 0))
+  %call12 = tail call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([14 x i8]* @.str1, i64 0, i64 0), i32 %call, i32 %cond) nounwind optsize, !dbg !26
+  ret i32 1, !dbg !27
+
+return:                                           ; preds = %cond.end
+  ret i32 0, !dbg !27
+}
+
+declare i32 @rand() optsize
+
+declare i32 @printf(i8* nocapture, ...) nounwind optsize
+
+declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
+
+declare i32 @puts(i8* nocapture) nounwind
+
+!llvm.dbg.sp = !{!0, !6}
+!llvm.dbg.lv.gcd = !{!10, !11, !12}
+!llvm.dbg.lv.main = !{!14, !17}
+
+!0 = metadata !{i32 589870, i32 0, metadata !1, metadata !"gcd", metadata !"gcd", metadata !"", metadata !1, i32 5, metadata !3, i1 false, i1 true, i32 0, i32 0, i32 0, i32 256, i1 true, i64 (i64, i64)* @gcd} ; [ DW_TAG_subprogram ]
+!1 = metadata !{i32 589865, metadata !"rem_small.c", metadata !"/private/tmp", metadata !2} ; [ DW_TAG_file_type ]
+!2 = metadata !{i32 589841, i32 0, i32 12, metadata !"rem_small.c", metadata !"/private/tmp", metadata !"clang version 2.9 (trunk 124117)", i1 true, i1 true, metadata !"", i32 0} ; [ DW_TAG_compile_unit ]
+!3 = metadata !{i32 589845, metadata !1, metadata !"", metadata !1, i32 0, i64 0, i64 0, i32 0, i32 0, i32 0, metadata !4, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
+!4 = metadata !{metadata !5}
+!5 = metadata !{i32 589860, metadata !2, metadata !"long int", null, i32 0, i64 64, i64 64, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
+!6 = metadata !{i32 589870, i32 0, metadata !1, metadata !"main", metadata !"main", metadata !"", metadata !1, i32 25, metadata !7, i1 false, i1 true, i32 0, i32 0, i32 0, i32 0, i1 true, i32 ()* @main} ; [ DW_TAG_subprogram ]
+!7 = metadata !{i32 589845, metadata !1, metadata !"", metadata !1, i32 0, i64 0, i64 0, i32 0, i32 0, i32 0, metadata !8, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
+!8 = metadata !{metadata !9}
+!9 = metadata !{i32 589860, metadata !2, metadata !"int", null, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
+!10 = metadata !{i32 590081, metadata !0, metadata !"a", metadata !1, i32 5, metadata !5, i32 0} ; [ DW_TAG_arg_variable ]
+!11 = metadata !{i32 590081, metadata !0, metadata !"b", metadata !1, i32 5, metadata !5, i32 0} ; [ DW_TAG_arg_variable ]
+!12 = metadata !{i32 590080, metadata !13, metadata !"c", metadata !1, i32 6, metadata !5, i32 0} ; [ DW_TAG_auto_variable ]
+!13 = metadata !{i32 589835, metadata !0, i32 5, i32 52, metadata !1, i32 0} ; [ DW_TAG_lexical_block ]
+!14 = metadata !{i32 590080, metadata !15, metadata !"m", metadata !1, i32 26, metadata !16, i32 0} ; [ DW_TAG_auto_variable ]
+!15 = metadata !{i32 589835, metadata !6, i32 25, i32 12, metadata !1, i32 2} ; [ DW_TAG_lexical_block ]
+!16 = metadata !{i32 589860, metadata !2, metadata !"unsigned int", null, i32 0, i64 32, i64 32, i64 0, i32 0, i32 7} ; [ DW_TAG_base_type ]
+!17 = metadata !{i32 590080, metadata !15, metadata !"z_s", metadata !1, i32 27, metadata !9, i32 0} ; [ DW_TAG_auto_variable ]
+!18 = metadata !{i32 5, i32 41, metadata !0, null}
+!19 = metadata !{i32 5, i32 49, metadata !0, null}
+!20 = metadata !{i32 7, i32 5, metadata !13, null}
+!21 = metadata !{i32 8, i32 9, metadata !22, null}
+!22 = metadata !{i32 589835, metadata !13, i32 7, i32 14, metadata !1, i32 1} ; [ DW_TAG_lexical_block ]
+!23 = metadata !{i32 9, i32 9, metadata !22, null}
+!24 = metadata !{i32 26, i32 38, metadata !15, null}
+!25 = metadata !{i32 27, i32 38, metadata !15, null}
+!26 = metadata !{i32 28, i32 9, metadata !15, null}
+!27 = metadata !{i32 30, i32 1, metadata !15, null}
diff --git a/test/CodeGen/X86/2011-02-04-FastRegallocNoFP.ll b/test/CodeGen/X86/2011-02-04-FastRegallocNoFP.ll
new file mode 100644
index 000000000000..cedd6a2a1b8e
--- /dev/null
+++ b/test/CodeGen/X86/2011-02-04-FastRegallocNoFP.ll
@@ -0,0 +1,14 @@
+; RUN: llc -O0 < %s | FileCheck %s
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
+target triple = "x86_64-unknown-linux-gnu"
+
+declare i32 @foo()
+
+define i32 @bar() nounwind {
+; CHECK: bar
+; CHECK-NOT: pop.*ax
+  %call = call i32 @foo()
+  ret i32 %call
+}
+
diff --git a/test/CodeGen/X86/3addr-or.ll b/test/CodeGen/X86/3addr-or.ll
index 30a1f36850de..912bdc215474 100644
--- a/test/CodeGen/X86/3addr-or.ll
+++ b/test/CodeGen/X86/3addr-or.ll
@@ -1,9 +1,9 @@
 ; RUN: llc < %s -mtriple=x86_64-apple-darwin | FileCheck %s
 ; rdar://7527734
 
-define i32 @test(i32 %x) nounwind readnone ssp {
+define i32 @test1(i32 %x) nounwind readnone ssp {
 entry:
-; CHECK: test:
+; CHECK: test1:
 ; CHECK: leal 3(%rdi), %eax
   %0 = shl i32 %x, 5                              ; <i32> [#uses=1]
   %1 = or i32 %0, 3                               ; <i32> [#uses=1]
@@ -25,3 +25,37 @@ define i64 @test2(i8 %A, i8 %B) nounwind {
   %H = or i64 %G, %E                              ; <i64> [#uses=1]
   ret i64 %H
 }
+
+;; Test that OR is only emitted as LEA, not as ADD.
+
+define void @test3(i32 %x, i32* %P) nounwind readnone ssp {
+entry:
+; No reason to emit an add here, should be an or.
+; CHECK: test3:
+; CHECK: orl $3, %edi
+  %0 = shl i32 %x, 5
+  %1 = or i32 %0, 3
+  store i32 %1, i32* %P
+  ret void
+}
+
+define i32 @test4(i32 %a, i32 %b) nounwind readnone ssp {
+entry:
+  %and = and i32 %a, 6
+  %and2 = and i32 %b, 16
+  %or = or i32 %and2, %and
+  ret i32 %or
+; CHECK: test4:
+; CHECK: leal	(%rsi,%rdi), %eax
+}
+
+define void @test5(i32 %a, i32 %b, i32* nocapture %P) nounwind ssp {
+entry:
+  %and = and i32 %a, 6
+  %and2 = and i32 %b, 16
+  %or = or i32 %and2, %and
+  store i32 %or, i32* %P, align 4
+  ret void
+; CHECK: test5:
+; CHECK: orl
+}
diff --git a/test/CodeGen/X86/abi-isel.ll b/test/CodeGen/X86/abi-isel.ll
index 23042b6eff3e..5b4d79fa22b9 100644
--- a/test/CodeGen/X86/abi-isel.ll
+++ b/test/CodeGen/X86/abi-isel.ll
@@ -1,16 +1,16 @@
-; RUN: llc < %s -asm-verbose=0 -mtriple=i686-unknown-linux-gnu -march=x86 -relocation-model=static -code-model=small -post-RA-scheduler=false | FileCheck %s -check-prefix=LINUX-32-STATIC
-; RUN: llc < %s -asm-verbose=0 -mtriple=i686-unknown-linux-gnu -march=x86 -relocation-model=static -code-model=small -post-RA-scheduler=false | FileCheck %s -check-prefix=LINUX-32-PIC
+; RUN: llc < %s -asm-verbose=0 -mtriple=i686-unknown-linux-gnu -march=x86 -relocation-model=static -code-model=small | FileCheck %s -check-prefix=LINUX-32-STATIC
+; RUN: llc < %s -asm-verbose=0 -mtriple=i686-unknown-linux-gnu -march=x86 -relocation-model=static -code-model=small | FileCheck %s -check-prefix=LINUX-32-PIC
 
-; RUN: llc < %s -asm-verbose=0 -mtriple=x86_64-unknown-linux-gnu -march=x86-64 -relocation-model=static -code-model=small -post-RA-scheduler=false | FileCheck %s -check-prefix=LINUX-64-STATIC
-; RUN: llc < %s -asm-verbose=0 -mtriple=x86_64-unknown-linux-gnu -march=x86-64 -relocation-model=pic -code-model=small -post-RA-scheduler=false | FileCheck %s -check-prefix=LINUX-64-PIC
+; RUN: llc < %s -asm-verbose=0 -mtriple=x86_64-unknown-linux-gnu -march=x86-64 -relocation-model=static -code-model=small | FileCheck %s -check-prefix=LINUX-64-STATIC
+; RUN: llc < %s -asm-verbose=0 -mtriple=x86_64-unknown-linux-gnu -march=x86-64 -relocation-model=pic -code-model=small | FileCheck %s -check-prefix=LINUX-64-PIC
 
-; RUN: llc < %s -asm-verbose=0 -mtriple=i686-apple-darwin -march=x86 -relocation-model=static -code-model=small -post-RA-scheduler=false | FileCheck %s -check-prefix=DARWIN-32-STATIC
-; RUN: llc < %s -asm-verbose=0 -mtriple=i686-apple-darwin -march=x86 -relocation-model=dynamic-no-pic -code-model=small -post-RA-scheduler=false | FileCheck %s -check-prefix=DARWIN-32-DYNAMIC
-; RUN: llc < %s -asm-verbose=0 -mtriple=i686-apple-darwin -march=x86 -relocation-model=pic -code-model=small -post-RA-scheduler=false | FileCheck %s -check-prefix=DARWIN-32-PIC
+; RUN: llc < %s -asm-verbose=0 -mtriple=i686-apple-darwin -march=x86 -relocation-model=static -code-model=small | FileCheck %s -check-prefix=DARWIN-32-STATIC
+; RUN: llc < %s -asm-verbose=0 -mtriple=i686-apple-darwin -march=x86 -relocation-model=dynamic-no-pic -code-model=small | FileCheck %s -check-prefix=DARWIN-32-DYNAMIC
+; RUN: llc < %s -asm-verbose=0 -mtriple=i686-apple-darwin -march=x86 -relocation-model=pic -code-model=small | FileCheck %s -check-prefix=DARWIN-32-PIC
 
-; RUN: llc < %s -asm-verbose=0 -mtriple=x86_64-apple-darwin -march=x86-64 -relocation-model=static -code-model=small -post-RA-scheduler=false | FileCheck %s -check-prefix=DARWIN-64-STATIC
-; RUN: llc < %s -asm-verbose=0 -mtriple=x86_64-apple-darwin -march=x86-64 -relocation-model=dynamic-no-pic -code-model=small -post-RA-scheduler=false | FileCheck %s -check-prefix=DARWIN-64-DYNAMIC
-; RUN: llc < %s -asm-verbose=0 -mtriple=x86_64-apple-darwin -march=x86-64 -relocation-model=pic -code-model=small -post-RA-scheduler=false | FileCheck %s -check-prefix=DARWIN-64-PIC
+; RUN: llc < %s -asm-verbose=0 -mtriple=x86_64-apple-darwin -march=x86-64 -relocation-model=static -code-model=small | FileCheck %s -check-prefix=DARWIN-64-STATIC
+; RUN: llc < %s -asm-verbose=0 -mtriple=x86_64-apple-darwin -march=x86-64 -relocation-model=dynamic-no-pic -code-model=small | FileCheck %s -check-prefix=DARWIN-64-DYNAMIC
+; RUN: llc < %s -asm-verbose=0 -mtriple=x86_64-apple-darwin -march=x86-64 -relocation-model=pic -code-model=small | FileCheck %s -check-prefix=DARWIN-64-PIC
 
 @src = external global [131072 x i32]
 @dst = external global [131072 x i32]
@@ -72,7 +72,7 @@ entry:
 ; DARWIN-32-DYNAMIC-NEXT: 	ret
 
 ; DARWIN-32-PIC: _foo00:
-; DARWIN-32-PIC: 	call	L0$pb
+; DARWIN-32-PIC: 	calll	L0$pb
 ; DARWIN-32-PIC-NEXT: L0$pb:
 ; DARWIN-32-PIC-NEXT: 	popl	%eax
 ; DARWIN-32-PIC-NEXT: 	movl	L_src$non_lazy_ptr-L0$pb(%eax), %ecx
@@ -144,7 +144,7 @@ entry:
 ; DARWIN-32-DYNAMIC-NEXT: 	ret
 
 ; DARWIN-32-PIC: _fxo00:
-; DARWIN-32-PIC: 	call	L1$pb
+; DARWIN-32-PIC: 	calll	L1$pb
 ; DARWIN-32-PIC-NEXT: L1$pb:
 ; DARWIN-32-PIC-NEXT: 	popl	%eax
 ; DARWIN-32-PIC-NEXT: 	movl	L_xsrc$non_lazy_ptr-L1$pb(%eax), %ecx
@@ -208,7 +208,7 @@ entry:
 ; DARWIN-32-DYNAMIC-NEXT: 	ret
 
 ; DARWIN-32-PIC: _foo01:
-; DARWIN-32-PIC: 	call	L2$pb
+; DARWIN-32-PIC: 	calll	L2$pb
 ; DARWIN-32-PIC-NEXT: L2$pb:
 ; DARWIN-32-PIC-NEXT: 	popl	%eax
 ; DARWIN-32-PIC-NEXT: 	movl	L_dst$non_lazy_ptr-L2$pb(%eax), %ecx
@@ -268,7 +268,7 @@ entry:
 ; DARWIN-32-DYNAMIC-NEXT: 	ret
 
 ; DARWIN-32-PIC: _fxo01:
-; DARWIN-32-PIC: 	call	L3$pb
+; DARWIN-32-PIC: 	calll	L3$pb
 ; DARWIN-32-PIC-NEXT: L3$pb:
 ; DARWIN-32-PIC-NEXT: 	popl	%eax
 ; DARWIN-32-PIC-NEXT: 	movl	L_xdst$non_lazy_ptr-L3$pb(%eax), %ecx
@@ -342,7 +342,7 @@ entry:
 ; DARWIN-32-DYNAMIC-NEXT: 	ret
 
 ; DARWIN-32-PIC: _foo02:
-; DARWIN-32-PIC: 	call	L4$pb
+; DARWIN-32-PIC: 	calll	L4$pb
 ; DARWIN-32-PIC-NEXT: L4$pb:
 ; DARWIN-32-PIC-NEXT: 	popl	%eax
 ; DARWIN-32-PIC-NEXT: 	movl	L_src$non_lazy_ptr-L4$pb(%eax), %ecx
@@ -424,7 +424,7 @@ entry:
 ; DARWIN-32-DYNAMIC-NEXT: 	ret
 
 ; DARWIN-32-PIC: _fxo02:
-; DARWIN-32-PIC: 	call	L5$pb
+; DARWIN-32-PIC: 	calll	L5$pb
 ; DARWIN-32-PIC-NEXT: L5$pb:
 ; DARWIN-32-PIC-NEXT: 	popl	%eax
 ; DARWIN-32-PIC-NEXT: 	movl	L_xsrc$non_lazy_ptr-L5$pb(%eax), %ecx
@@ -497,7 +497,7 @@ entry:
 ; DARWIN-32-DYNAMIC-NEXT: 	ret
 
 ; DARWIN-32-PIC: _foo03:
-; DARWIN-32-PIC: 	call	L6$pb
+; DARWIN-32-PIC: 	calll	L6$pb
 ; DARWIN-32-PIC-NEXT: L6$pb:
 ; DARWIN-32-PIC-NEXT: 	popl	%eax
 ; DARWIN-32-PIC-NEXT: 	movl	_dsrc-L6$pb(%eax), %ecx
@@ -551,7 +551,7 @@ entry:
 ; DARWIN-32-DYNAMIC-NEXT: 	ret
 
 ; DARWIN-32-PIC: _foo04:
-; DARWIN-32-PIC: 	call	L7$pb
+; DARWIN-32-PIC: 	calll	L7$pb
 ; DARWIN-32-PIC-NEXT: L7$pb:
 ; DARWIN-32-PIC-NEXT: 	popl	%eax
 ; DARWIN-32-PIC-NEXT: 	leal	_ddst-L7$pb(%eax), %ecx
@@ -619,7 +619,7 @@ entry:
 ; DARWIN-32-DYNAMIC-NEXT: 	ret
 
 ; DARWIN-32-PIC: _foo05:
-; DARWIN-32-PIC: 	call	L8$pb
+; DARWIN-32-PIC: 	calll	L8$pb
 ; DARWIN-32-PIC-NEXT: L8$pb:
 ; DARWIN-32-PIC-NEXT: 	popl	%eax
 ; DARWIN-32-PIC-NEXT: 	movl	_dsrc-L8$pb(%eax), %ecx
@@ -682,7 +682,7 @@ entry:
 ; DARWIN-32-DYNAMIC-NEXT: 	ret
 
 ; DARWIN-32-PIC: _foo06:
-; DARWIN-32-PIC: 	call	L9$pb
+; DARWIN-32-PIC: 	calll	L9$pb
 ; DARWIN-32-PIC-NEXT: L9$pb:
 ; DARWIN-32-PIC-NEXT: 	popl	%eax
 ; DARWIN-32-PIC-NEXT: 	movl	_lsrc-L9$pb(%eax), %ecx
@@ -735,7 +735,7 @@ entry:
 ; DARWIN-32-DYNAMIC-NEXT: 	ret
 
 ; DARWIN-32-PIC: _foo07:
-; DARWIN-32-PIC: 	call	L10$pb
+; DARWIN-32-PIC: 	calll	L10$pb
 ; DARWIN-32-PIC-NEXT: L10$pb:
 ; DARWIN-32-PIC-NEXT: 	popl	%eax
 ; DARWIN-32-PIC-NEXT: 	leal	_ldst-L10$pb(%eax), %ecx
@@ -801,7 +801,7 @@ entry:
 ; DARWIN-32-DYNAMIC-NEXT: 	ret
 
 ; DARWIN-32-PIC: _foo08:
-; DARWIN-32-PIC: 	call	L11$pb
+; DARWIN-32-PIC: 	calll	L11$pb
 ; DARWIN-32-PIC-NEXT: L11$pb:
 ; DARWIN-32-PIC-NEXT: 	popl	%eax
 ; DARWIN-32-PIC-NEXT: 	movl	_lsrc-L11$pb(%eax), %ecx
@@ -868,7 +868,7 @@ entry:
 ; DARWIN-32-DYNAMIC-NEXT: 	ret
 
 ; DARWIN-32-PIC: _qux00:
-; DARWIN-32-PIC: 	call	L12$pb
+; DARWIN-32-PIC: 	calll	L12$pb
 ; DARWIN-32-PIC-NEXT: L12$pb:
 ; DARWIN-32-PIC-NEXT: 	popl	%eax
 ; DARWIN-32-PIC-NEXT: 	movl	L_src$non_lazy_ptr-L12$pb(%eax), %ecx
@@ -939,7 +939,7 @@ entry:
 ; DARWIN-32-DYNAMIC-NEXT: 	ret
 
 ; DARWIN-32-PIC: _qxx00:
-; DARWIN-32-PIC: 	call	L13$pb
+; DARWIN-32-PIC: 	calll	L13$pb
 ; DARWIN-32-PIC-NEXT: L13$pb:
 ; DARWIN-32-PIC-NEXT: 	popl	%eax
 ; DARWIN-32-PIC-NEXT: 	movl	L_xsrc$non_lazy_ptr-L13$pb(%eax), %ecx
@@ -1005,7 +1005,7 @@ entry:
 ; DARWIN-32-DYNAMIC-NEXT: 	ret
 
 ; DARWIN-32-PIC: _qux01:
-; DARWIN-32-PIC: 	call	L14$pb
+; DARWIN-32-PIC: 	calll	L14$pb
 ; DARWIN-32-PIC-NEXT: L14$pb:
 ; DARWIN-32-PIC-NEXT: 	popl	%eax
 ; DARWIN-32-PIC-NEXT: 	movl	L_dst$non_lazy_ptr-L14$pb(%eax), %ecx
@@ -1071,7 +1071,7 @@ entry:
 ; DARWIN-32-DYNAMIC-NEXT: 	ret
 
 ; DARWIN-32-PIC: _qxx01:
-; DARWIN-32-PIC: 	call	L15$pb
+; DARWIN-32-PIC: 	calll	L15$pb
 ; DARWIN-32-PIC-NEXT: L15$pb:
 ; DARWIN-32-PIC-NEXT: 	popl	%eax
 ; DARWIN-32-PIC-NEXT: 	movl	L_xdst$non_lazy_ptr-L15$pb(%eax), %ecx
@@ -1150,7 +1150,7 @@ entry:
 ; DARWIN-32-DYNAMIC-NEXT: 	ret
 
 ; DARWIN-32-PIC: _qux02:
-; DARWIN-32-PIC: 	call	L16$pb
+; DARWIN-32-PIC: 	calll	L16$pb
 ; DARWIN-32-PIC-NEXT: L16$pb:
 ; DARWIN-32-PIC-NEXT: 	popl	%eax
 ; DARWIN-32-PIC-NEXT: 	movl	L_src$non_lazy_ptr-L16$pb(%eax), %ecx
@@ -1233,7 +1233,7 @@ entry:
 ; DARWIN-32-DYNAMIC-NEXT: 	ret
 
 ; DARWIN-32-PIC: _qxx02:
-; DARWIN-32-PIC: 	call	L17$pb
+; DARWIN-32-PIC: 	calll	L17$pb
 ; DARWIN-32-PIC-NEXT: L17$pb:
 ; DARWIN-32-PIC-NEXT: 	popl	%eax
 ; DARWIN-32-PIC-NEXT: 	movl	L_xsrc$non_lazy_ptr-L17$pb(%eax), %ecx
@@ -1306,7 +1306,7 @@ entry:
 ; DARWIN-32-DYNAMIC-NEXT: 	ret
 
 ; DARWIN-32-PIC: _qux03:
-; DARWIN-32-PIC: 	call	L18$pb
+; DARWIN-32-PIC: 	calll	L18$pb
 ; DARWIN-32-PIC-NEXT: L18$pb:
 ; DARWIN-32-PIC-NEXT: 	popl	%eax
 ; DARWIN-32-PIC-NEXT: 	movl	(_dsrc-L18$pb)+64(%eax), %ecx
@@ -1361,7 +1361,7 @@ entry:
 ; DARWIN-32-DYNAMIC-NEXT: 	ret
 
 ; DARWIN-32-PIC: _qux04:
-; DARWIN-32-PIC: 	call	L19$pb
+; DARWIN-32-PIC: 	calll	L19$pb
 ; DARWIN-32-PIC-NEXT: L19$pb:
 ; DARWIN-32-PIC-NEXT: 	popl	%eax
 ; DARWIN-32-PIC-NEXT: 	leal	(_ddst-L19$pb)+64(%eax), %ecx
@@ -1430,7 +1430,7 @@ entry:
 ; DARWIN-32-DYNAMIC-NEXT: 	ret
 
 ; DARWIN-32-PIC: _qux05:
-; DARWIN-32-PIC: 	call	L20$pb
+; DARWIN-32-PIC: 	calll	L20$pb
 ; DARWIN-32-PIC-NEXT: L20$pb:
 ; DARWIN-32-PIC-NEXT: 	popl	%eax
 ; DARWIN-32-PIC-NEXT: 	movl	(_dsrc-L20$pb)+64(%eax), %ecx
@@ -1493,7 +1493,7 @@ entry:
 ; DARWIN-32-DYNAMIC-NEXT: 	ret
 
 ; DARWIN-32-PIC: _qux06:
-; DARWIN-32-PIC: 	call	L21$pb
+; DARWIN-32-PIC: 	calll	L21$pb
 ; DARWIN-32-PIC-NEXT: L21$pb:
 ; DARWIN-32-PIC-NEXT: 	popl	%eax
 ; DARWIN-32-PIC-NEXT: 	movl	(_lsrc-L21$pb)+64(%eax), %ecx
@@ -1546,7 +1546,7 @@ entry:
 ; DARWIN-32-DYNAMIC-NEXT: 	ret
 
 ; DARWIN-32-PIC: _qux07:
-; DARWIN-32-PIC: 	call	L22$pb
+; DARWIN-32-PIC: 	calll	L22$pb
 ; DARWIN-32-PIC-NEXT: L22$pb:
 ; DARWIN-32-PIC-NEXT: 	popl	%eax
 ; DARWIN-32-PIC-NEXT: 	leal	(_ldst-L22$pb)+64(%eax), %ecx
@@ -1613,7 +1613,7 @@ entry:
 ; DARWIN-32-DYNAMIC-NEXT: 	ret
 
 ; DARWIN-32-PIC: _qux08:
-; DARWIN-32-PIC: 	call	L23$pb
+; DARWIN-32-PIC: 	calll	L23$pb
 ; DARWIN-32-PIC-NEXT: L23$pb:
 ; DARWIN-32-PIC-NEXT: 	popl	%eax
 ; DARWIN-32-PIC-NEXT: 	movl	(_lsrc-L23$pb)+64(%eax), %ecx
@@ -1686,7 +1686,7 @@ entry:
 ; DARWIN-32-DYNAMIC-NEXT: 	ret
 
 ; DARWIN-32-PIC: _ind00:
-; DARWIN-32-PIC: 	call	L24$pb
+; DARWIN-32-PIC: 	calll	L24$pb
 ; DARWIN-32-PIC-NEXT: L24$pb:
 ; DARWIN-32-PIC-NEXT: 	popl	%eax
 ; DARWIN-32-PIC-NEXT: 	movl	4(%esp), %ecx
@@ -1764,7 +1764,7 @@ entry:
 ; DARWIN-32-DYNAMIC-NEXT: 	ret
 
 ; DARWIN-32-PIC: _ixd00:
-; DARWIN-32-PIC: 	call	L25$pb
+; DARWIN-32-PIC: 	calll	L25$pb
 ; DARWIN-32-PIC-NEXT: L25$pb:
 ; DARWIN-32-PIC-NEXT: 	popl	%eax
 ; DARWIN-32-PIC-NEXT: 	movl	4(%esp), %ecx
@@ -1840,7 +1840,7 @@ entry:
 ; DARWIN-32-DYNAMIC-NEXT: 	ret
 
 ; DARWIN-32-PIC: _ind01:
-; DARWIN-32-PIC: 	call	L26$pb
+; DARWIN-32-PIC: 	calll	L26$pb
 ; DARWIN-32-PIC-NEXT: L26$pb:
 ; DARWIN-32-PIC-NEXT: 	popl	%eax
 ; DARWIN-32-PIC-NEXT: 	movl	4(%esp), %ecx
@@ -1916,7 +1916,7 @@ entry:
 ; DARWIN-32-DYNAMIC-NEXT: 	ret
 
 ; DARWIN-32-PIC: _ixd01:
-; DARWIN-32-PIC: 	call	L27$pb
+; DARWIN-32-PIC: 	calll	L27$pb
 ; DARWIN-32-PIC-NEXT: L27$pb:
 ; DARWIN-32-PIC-NEXT: 	popl	%eax
 ; DARWIN-32-PIC-NEXT: 	movl	4(%esp), %ecx
@@ -2001,7 +2001,7 @@ entry:
 ; DARWIN-32-DYNAMIC-NEXT: 	ret
 
 ; DARWIN-32-PIC: _ind02:
-; DARWIN-32-PIC: 	call	L28$pb
+; DARWIN-32-PIC: 	calll	L28$pb
 ; DARWIN-32-PIC-NEXT: L28$pb:
 ; DARWIN-32-PIC-NEXT: 	popl	%eax
 ; DARWIN-32-PIC-NEXT: 	movl	4(%esp), %ecx
@@ -2090,7 +2090,7 @@ entry:
 ; DARWIN-32-DYNAMIC-NEXT: 	ret
 
 ; DARWIN-32-PIC: _ixd02:
-; DARWIN-32-PIC: 	call	L29$pb
+; DARWIN-32-PIC: 	calll	L29$pb
 ; DARWIN-32-PIC-NEXT: L29$pb:
 ; DARWIN-32-PIC-NEXT: 	popl	%eax
 ; DARWIN-32-PIC-NEXT: 	movl	4(%esp), %ecx
@@ -2170,7 +2170,7 @@ entry:
 ; DARWIN-32-DYNAMIC-NEXT: 	ret
 
 ; DARWIN-32-PIC: _ind03:
-; DARWIN-32-PIC: 	call	L30$pb
+; DARWIN-32-PIC: 	calll	L30$pb
 ; DARWIN-32-PIC-NEXT: L30$pb:
 ; DARWIN-32-PIC-NEXT: 	popl	%eax
 ; DARWIN-32-PIC-NEXT: 	movl	4(%esp), %ecx
@@ -2242,7 +2242,7 @@ entry:
 ; DARWIN-32-DYNAMIC-NEXT: 	ret
 
 ; DARWIN-32-PIC: _ind04:
-; DARWIN-32-PIC: 	call	L31$pb
+; DARWIN-32-PIC: 	calll	L31$pb
 ; DARWIN-32-PIC-NEXT: L31$pb:
 ; DARWIN-32-PIC-NEXT: 	popl	%eax
 ; DARWIN-32-PIC-NEXT: 	movl	4(%esp), %ecx
@@ -2320,7 +2320,7 @@ entry:
 ; DARWIN-32-DYNAMIC-NEXT: 	ret
 
 ; DARWIN-32-PIC: _ind05:
-; DARWIN-32-PIC: 	call	L32$pb
+; DARWIN-32-PIC: 	calll	L32$pb
 ; DARWIN-32-PIC-NEXT: L32$pb:
 ; DARWIN-32-PIC-NEXT: 	popl	%eax
 ; DARWIN-32-PIC-NEXT: 	movl	4(%esp), %ecx
@@ -2395,7 +2395,7 @@ entry:
 ; DARWIN-32-DYNAMIC-NEXT: 	ret
 
 ; DARWIN-32-PIC: _ind06:
-; DARWIN-32-PIC: 	call	L33$pb
+; DARWIN-32-PIC: 	calll	L33$pb
 ; DARWIN-32-PIC-NEXT: L33$pb:
 ; DARWIN-32-PIC-NEXT: 	popl	%eax
 ; DARWIN-32-PIC-NEXT: 	movl	4(%esp), %ecx
@@ -2466,7 +2466,7 @@ entry:
 ; DARWIN-32-DYNAMIC-NEXT: 	ret
 
 ; DARWIN-32-PIC: _ind07:
-; DARWIN-32-PIC: 	call	L34$pb
+; DARWIN-32-PIC: 	calll	L34$pb
 ; DARWIN-32-PIC-NEXT: L34$pb:
 ; DARWIN-32-PIC-NEXT: 	popl	%eax
 ; DARWIN-32-PIC-NEXT: 	movl	4(%esp), %ecx
@@ -2543,7 +2543,7 @@ entry:
 ; DARWIN-32-DYNAMIC-NEXT: 	ret
 
 ; DARWIN-32-PIC: _ind08:
-; DARWIN-32-PIC: 	call	L35$pb
+; DARWIN-32-PIC: 	calll	L35$pb
 ; DARWIN-32-PIC-NEXT: L35$pb:
 ; DARWIN-32-PIC-NEXT: 	popl	%eax
 ; DARWIN-32-PIC-NEXT: 	movl	4(%esp), %ecx
@@ -2621,7 +2621,7 @@ entry:
 ; DARWIN-32-DYNAMIC-NEXT: 	ret
 
 ; DARWIN-32-PIC: _off00:
-; DARWIN-32-PIC: 	call	L36$pb
+; DARWIN-32-PIC: 	calll	L36$pb
 ; DARWIN-32-PIC-NEXT: L36$pb:
 ; DARWIN-32-PIC-NEXT: 	popl	%eax
 ; DARWIN-32-PIC-NEXT: 	movl	4(%esp), %ecx
@@ -2700,7 +2700,7 @@ entry:
 ; DARWIN-32-DYNAMIC-NEXT: 	ret
 
 ; DARWIN-32-PIC: _oxf00:
-; DARWIN-32-PIC: 	call	L37$pb
+; DARWIN-32-PIC: 	calll	L37$pb
 ; DARWIN-32-PIC-NEXT: L37$pb:
 ; DARWIN-32-PIC-NEXT: 	popl	%eax
 ; DARWIN-32-PIC-NEXT: 	movl	4(%esp), %ecx
@@ -2777,7 +2777,7 @@ entry:
 ; DARWIN-32-DYNAMIC-NEXT: 	ret
 
 ; DARWIN-32-PIC: _off01:
-; DARWIN-32-PIC: 	call	L38$pb
+; DARWIN-32-PIC: 	calll	L38$pb
 ; DARWIN-32-PIC-NEXT: L38$pb:
 ; DARWIN-32-PIC-NEXT: 	popl	%eax
 ; DARWIN-32-PIC-NEXT: 	movl	4(%esp), %ecx
@@ -2854,7 +2854,7 @@ entry:
 ; DARWIN-32-DYNAMIC-NEXT: 	ret
 
 ; DARWIN-32-PIC: _oxf01:
-; DARWIN-32-PIC: 	call	L39$pb
+; DARWIN-32-PIC: 	calll	L39$pb
 ; DARWIN-32-PIC-NEXT: L39$pb:
 ; DARWIN-32-PIC-NEXT: 	popl	%eax
 ; DARWIN-32-PIC-NEXT: 	movl	4(%esp), %ecx
@@ -2940,7 +2940,7 @@ entry:
 ; DARWIN-32-DYNAMIC-NEXT: 	ret
 
 ; DARWIN-32-PIC: _off02:
-; DARWIN-32-PIC: 	call	L40$pb
+; DARWIN-32-PIC: 	calll	L40$pb
 ; DARWIN-32-PIC-NEXT: L40$pb:
 ; DARWIN-32-PIC-NEXT: 	popl	%eax
 ; DARWIN-32-PIC-NEXT: 	movl	4(%esp), %ecx
@@ -3030,7 +3030,7 @@ entry:
 ; DARWIN-32-DYNAMIC-NEXT: 	ret
 
 ; DARWIN-32-PIC: _oxf02:
-; DARWIN-32-PIC: 	call	L41$pb
+; DARWIN-32-PIC: 	calll	L41$pb
 ; DARWIN-32-PIC-NEXT: L41$pb:
 ; DARWIN-32-PIC-NEXT: 	popl	%eax
 ; DARWIN-32-PIC-NEXT: 	movl	4(%esp), %ecx
@@ -3111,7 +3111,7 @@ entry:
 ; DARWIN-32-DYNAMIC-NEXT: 	ret
 
 ; DARWIN-32-PIC: _off03:
-; DARWIN-32-PIC: 	call	L42$pb
+; DARWIN-32-PIC: 	calll	L42$pb
 ; DARWIN-32-PIC-NEXT: L42$pb:
 ; DARWIN-32-PIC-NEXT: 	popl	%eax
 ; DARWIN-32-PIC-NEXT: 	movl	4(%esp), %ecx
@@ -3184,7 +3184,7 @@ entry:
 ; DARWIN-32-DYNAMIC-NEXT: 	ret
 
 ; DARWIN-32-PIC: _off04:
-; DARWIN-32-PIC: 	call	L43$pb
+; DARWIN-32-PIC: 	calll	L43$pb
 ; DARWIN-32-PIC-NEXT: L43$pb:
 ; DARWIN-32-PIC-NEXT: 	popl	%eax
 ; DARWIN-32-PIC-NEXT: 	movl	4(%esp), %ecx
@@ -3263,7 +3263,7 @@ entry:
 ; DARWIN-32-DYNAMIC-NEXT: 	ret
 
 ; DARWIN-32-PIC: _off05:
-; DARWIN-32-PIC: 	call	L44$pb
+; DARWIN-32-PIC: 	calll	L44$pb
 ; DARWIN-32-PIC-NEXT: L44$pb:
 ; DARWIN-32-PIC-NEXT: 	popl	%eax
 ; DARWIN-32-PIC-NEXT: 	movl	4(%esp), %ecx
@@ -3339,7 +3339,7 @@ entry:
 ; DARWIN-32-DYNAMIC-NEXT: 	ret
 
 ; DARWIN-32-PIC: _off06:
-; DARWIN-32-PIC: 	call	L45$pb
+; DARWIN-32-PIC: 	calll	L45$pb
 ; DARWIN-32-PIC-NEXT: L45$pb:
 ; DARWIN-32-PIC-NEXT: 	popl	%eax
 ; DARWIN-32-PIC-NEXT: 	movl	4(%esp), %ecx
@@ -3411,7 +3411,7 @@ entry:
 ; DARWIN-32-DYNAMIC-NEXT: 	ret
 
 ; DARWIN-32-PIC: _off07:
-; DARWIN-32-PIC: 	call	L46$pb
+; DARWIN-32-PIC: 	calll	L46$pb
 ; DARWIN-32-PIC-NEXT: L46$pb:
 ; DARWIN-32-PIC-NEXT: 	popl	%eax
 ; DARWIN-32-PIC-NEXT: 	movl	4(%esp), %ecx
@@ -3489,7 +3489,7 @@ entry:
 ; DARWIN-32-DYNAMIC-NEXT: 	ret
 
 ; DARWIN-32-PIC: _off08:
-; DARWIN-32-PIC: 	call	L47$pb
+; DARWIN-32-PIC: 	calll	L47$pb
 ; DARWIN-32-PIC-NEXT: L47$pb:
 ; DARWIN-32-PIC-NEXT: 	popl	%eax
 ; DARWIN-32-PIC-NEXT: 	movl	4(%esp), %ecx
@@ -3560,7 +3560,7 @@ entry:
 ; DARWIN-32-DYNAMIC-NEXT: 	ret
 
 ; DARWIN-32-PIC: _moo00:
-; DARWIN-32-PIC: 	call	L48$pb
+; DARWIN-32-PIC: 	calll	L48$pb
 ; DARWIN-32-PIC-NEXT: L48$pb:
 ; DARWIN-32-PIC-NEXT: 	popl	%eax
 ; DARWIN-32-PIC-NEXT: 	movl	L_src$non_lazy_ptr-L48$pb(%eax), %ecx
@@ -3626,7 +3626,7 @@ entry:
 ; DARWIN-32-DYNAMIC-NEXT: 	ret
 
 ; DARWIN-32-PIC: _moo01:
-; DARWIN-32-PIC: 	call	L49$pb
+; DARWIN-32-PIC: 	calll	L49$pb
 ; DARWIN-32-PIC-NEXT: L49$pb:
 ; DARWIN-32-PIC-NEXT: 	popl	%eax
 ; DARWIN-32-PIC-NEXT: 	movl	$262144, %ecx
@@ -3705,7 +3705,7 @@ entry:
 ; DARWIN-32-DYNAMIC-NEXT: 	ret
 
 ; DARWIN-32-PIC: _moo02:
-; DARWIN-32-PIC: 	call	L50$pb
+; DARWIN-32-PIC: 	calll	L50$pb
 ; DARWIN-32-PIC-NEXT: L50$pb:
 ; DARWIN-32-PIC-NEXT: 	popl	%eax
 ; DARWIN-32-PIC-NEXT: 	movl	L_src$non_lazy_ptr-L50$pb(%eax), %ecx
@@ -3778,7 +3778,7 @@ entry:
 ; DARWIN-32-DYNAMIC-NEXT: 	ret
 
 ; DARWIN-32-PIC: _moo03:
-; DARWIN-32-PIC: 	call	L51$pb
+; DARWIN-32-PIC: 	calll	L51$pb
 ; DARWIN-32-PIC-NEXT: L51$pb:
 ; DARWIN-32-PIC-NEXT: 	popl	%eax
 ; DARWIN-32-PIC-NEXT: 	movl	(_dsrc-L51$pb)+262144(%eax), %ecx
@@ -3833,7 +3833,7 @@ entry:
 ; DARWIN-32-DYNAMIC-NEXT: 	ret
 
 ; DARWIN-32-PIC: _moo04:
-; DARWIN-32-PIC: 	call	L52$pb
+; DARWIN-32-PIC: 	calll	L52$pb
 ; DARWIN-32-PIC-NEXT: L52$pb:
 ; DARWIN-32-PIC-NEXT: 	popl	%eax
 ; DARWIN-32-PIC-NEXT: 	leal	(_ddst-L52$pb)+262144(%eax), %ecx
@@ -3902,7 +3902,7 @@ entry:
 ; DARWIN-32-DYNAMIC-NEXT: 	ret
 
 ; DARWIN-32-PIC: _moo05:
-; DARWIN-32-PIC: 	call	L53$pb
+; DARWIN-32-PIC: 	calll	L53$pb
 ; DARWIN-32-PIC-NEXT: L53$pb:
 ; DARWIN-32-PIC-NEXT: 	popl	%eax
 ; DARWIN-32-PIC-NEXT: 	movl	(_dsrc-L53$pb)+262144(%eax), %ecx
@@ -3965,7 +3965,7 @@ entry:
 ; DARWIN-32-DYNAMIC-NEXT: 	ret
 
 ; DARWIN-32-PIC: _moo06:
-; DARWIN-32-PIC: 	call	L54$pb
+; DARWIN-32-PIC: 	calll	L54$pb
 ; DARWIN-32-PIC-NEXT: L54$pb:
 ; DARWIN-32-PIC-NEXT: 	popl	%eax
 ; DARWIN-32-PIC-NEXT: 	movl	(_lsrc-L54$pb)+262144(%eax), %ecx
@@ -4018,7 +4018,7 @@ entry:
 ; DARWIN-32-DYNAMIC-NEXT: 	ret
 
 ; DARWIN-32-PIC: _moo07:
-; DARWIN-32-PIC: 	call	L55$pb
+; DARWIN-32-PIC: 	calll	L55$pb
 ; DARWIN-32-PIC-NEXT: L55$pb:
 ; DARWIN-32-PIC-NEXT: 	popl	%eax
 ; DARWIN-32-PIC-NEXT: 	leal	(_ldst-L55$pb)+262144(%eax), %ecx
@@ -4085,7 +4085,7 @@ entry:
 ; DARWIN-32-DYNAMIC-NEXT: 	ret
 
 ; DARWIN-32-PIC: _moo08:
-; DARWIN-32-PIC: 	call	L56$pb
+; DARWIN-32-PIC: 	calll	L56$pb
 ; DARWIN-32-PIC-NEXT: L56$pb:
 ; DARWIN-32-PIC-NEXT: 	popl	%eax
 ; DARWIN-32-PIC-NEXT: 	movl	(_lsrc-L56$pb)+262144(%eax), %ecx
@@ -4159,7 +4159,7 @@ entry:
 ; DARWIN-32-DYNAMIC-NEXT: 	ret
 
 ; DARWIN-32-PIC: _big00:
-; DARWIN-32-PIC: 	call	L57$pb
+; DARWIN-32-PIC: 	calll	L57$pb
 ; DARWIN-32-PIC-NEXT: L57$pb:
 ; DARWIN-32-PIC-NEXT: 	popl	%eax
 ; DARWIN-32-PIC-NEXT: 	movl	4(%esp), %ecx
@@ -4236,7 +4236,7 @@ entry:
 ; DARWIN-32-DYNAMIC-NEXT: 	ret
 
 ; DARWIN-32-PIC: _big01:
-; DARWIN-32-PIC: 	call	L58$pb
+; DARWIN-32-PIC: 	calll	L58$pb
 ; DARWIN-32-PIC-NEXT: L58$pb:
 ; DARWIN-32-PIC-NEXT: 	popl	%eax
 ; DARWIN-32-PIC-NEXT: 	movl	4(%esp), %ecx
@@ -4322,7 +4322,7 @@ entry:
 ; DARWIN-32-DYNAMIC-NEXT: 	ret
 
 ; DARWIN-32-PIC: _big02:
-; DARWIN-32-PIC: 	call	L59$pb
+; DARWIN-32-PIC: 	calll	L59$pb
 ; DARWIN-32-PIC-NEXT: L59$pb:
 ; DARWIN-32-PIC-NEXT: 	popl	%eax
 ; DARWIN-32-PIC-NEXT: 	movl	4(%esp), %ecx
@@ -4403,7 +4403,7 @@ entry:
 ; DARWIN-32-DYNAMIC-NEXT: 	ret
 
 ; DARWIN-32-PIC: _big03:
-; DARWIN-32-PIC: 	call	L60$pb
+; DARWIN-32-PIC: 	calll	L60$pb
 ; DARWIN-32-PIC-NEXT: L60$pb:
 ; DARWIN-32-PIC-NEXT: 	popl	%eax
 ; DARWIN-32-PIC-NEXT: 	movl	4(%esp), %ecx
@@ -4476,7 +4476,7 @@ entry:
 ; DARWIN-32-DYNAMIC-NEXT: 	ret
 
 ; DARWIN-32-PIC: _big04:
-; DARWIN-32-PIC: 	call	L61$pb
+; DARWIN-32-PIC: 	calll	L61$pb
 ; DARWIN-32-PIC-NEXT: L61$pb:
 ; DARWIN-32-PIC-NEXT: 	popl	%eax
 ; DARWIN-32-PIC-NEXT: 	movl	4(%esp), %ecx
@@ -4555,7 +4555,7 @@ entry:
 ; DARWIN-32-DYNAMIC-NEXT: 	ret
 
 ; DARWIN-32-PIC: _big05:
-; DARWIN-32-PIC: 	call	L62$pb
+; DARWIN-32-PIC: 	calll	L62$pb
 ; DARWIN-32-PIC-NEXT: L62$pb:
 ; DARWIN-32-PIC-NEXT: 	popl	%eax
 ; DARWIN-32-PIC-NEXT: 	movl	4(%esp), %ecx
@@ -4631,7 +4631,7 @@ entry:
 ; DARWIN-32-DYNAMIC-NEXT: 	ret
 
 ; DARWIN-32-PIC: _big06:
-; DARWIN-32-PIC: 	call	L63$pb
+; DARWIN-32-PIC: 	calll	L63$pb
 ; DARWIN-32-PIC-NEXT: L63$pb:
 ; DARWIN-32-PIC-NEXT: 	popl	%eax
 ; DARWIN-32-PIC-NEXT: 	movl	4(%esp), %ecx
@@ -4703,7 +4703,7 @@ entry:
 ; DARWIN-32-DYNAMIC-NEXT: 	ret
 
 ; DARWIN-32-PIC: _big07:
-; DARWIN-32-PIC: 	call	L64$pb
+; DARWIN-32-PIC: 	calll	L64$pb
 ; DARWIN-32-PIC-NEXT: L64$pb:
 ; DARWIN-32-PIC-NEXT: 	popl	%eax
 ; DARWIN-32-PIC-NEXT: 	movl	4(%esp), %ecx
@@ -4781,7 +4781,7 @@ entry:
 ; DARWIN-32-DYNAMIC-NEXT: 	ret
 
 ; DARWIN-32-PIC: _big08:
-; DARWIN-32-PIC: 	call	L65$pb
+; DARWIN-32-PIC: 	calll	L65$pb
 ; DARWIN-32-PIC-NEXT: L65$pb:
 ; DARWIN-32-PIC-NEXT: 	popl	%eax
 ; DARWIN-32-PIC-NEXT: 	movl	4(%esp), %ecx
@@ -4840,7 +4840,7 @@ entry:
 ; DARWIN-32-DYNAMIC-NEXT: 	ret
 
 ; DARWIN-32-PIC: _bar00:
-; DARWIN-32-PIC: 	call	L66$pb
+; DARWIN-32-PIC: 	calll	L66$pb
 ; DARWIN-32-PIC-NEXT: L66$pb:
 ; DARWIN-32-PIC-NEXT: 	popl	%eax
 ; DARWIN-32-PIC-NEXT: 	movl	L_src$non_lazy_ptr-L66$pb(%eax), %eax
@@ -4887,7 +4887,7 @@ entry:
 ; DARWIN-32-DYNAMIC-NEXT: 	ret
 
 ; DARWIN-32-PIC: _bxr00:
-; DARWIN-32-PIC: 	call	L67$pb
+; DARWIN-32-PIC: 	calll	L67$pb
 ; DARWIN-32-PIC-NEXT: L67$pb:
 ; DARWIN-32-PIC-NEXT: 	popl	%eax
 ; DARWIN-32-PIC-NEXT: 	movl	L_xsrc$non_lazy_ptr-L67$pb(%eax), %eax
@@ -4934,7 +4934,7 @@ entry:
 ; DARWIN-32-DYNAMIC-NEXT: 	ret
 
 ; DARWIN-32-PIC: _bar01:
-; DARWIN-32-PIC: 	call	L68$pb
+; DARWIN-32-PIC: 	calll	L68$pb
 ; DARWIN-32-PIC-NEXT: L68$pb:
 ; DARWIN-32-PIC-NEXT: 	popl	%eax
 ; DARWIN-32-PIC-NEXT: 	movl	L_dst$non_lazy_ptr-L68$pb(%eax), %eax
@@ -4981,7 +4981,7 @@ entry:
 ; DARWIN-32-DYNAMIC-NEXT: 	ret
 
 ; DARWIN-32-PIC: _bxr01:
-; DARWIN-32-PIC: 	call	L69$pb
+; DARWIN-32-PIC: 	calll	L69$pb
 ; DARWIN-32-PIC-NEXT: L69$pb:
 ; DARWIN-32-PIC-NEXT: 	popl	%eax
 ; DARWIN-32-PIC-NEXT: 	movl	L_xdst$non_lazy_ptr-L69$pb(%eax), %eax
@@ -5028,7 +5028,7 @@ entry:
 ; DARWIN-32-DYNAMIC-NEXT: 	ret
 
 ; DARWIN-32-PIC: _bar02:
-; DARWIN-32-PIC: 	call	L70$pb
+; DARWIN-32-PIC: 	calll	L70$pb
 ; DARWIN-32-PIC-NEXT: L70$pb:
 ; DARWIN-32-PIC-NEXT: 	popl	%eax
 ; DARWIN-32-PIC-NEXT: 	movl	L_ptr$non_lazy_ptr-L70$pb(%eax), %eax
@@ -5075,7 +5075,7 @@ entry:
 ; DARWIN-32-DYNAMIC-NEXT: 	ret
 
 ; DARWIN-32-PIC: _bar03:
-; DARWIN-32-PIC: 	call	L71$pb
+; DARWIN-32-PIC: 	calll	L71$pb
 ; DARWIN-32-PIC-NEXT: L71$pb:
 ; DARWIN-32-PIC-NEXT: 	popl	%eax
 ; DARWIN-32-PIC-NEXT: 	leal	_dsrc-L71$pb(%eax), %eax
@@ -5122,7 +5122,7 @@ entry:
 ; DARWIN-32-DYNAMIC-NEXT: 	ret
 
 ; DARWIN-32-PIC: _bar04:
-; DARWIN-32-PIC: 	call	L72$pb
+; DARWIN-32-PIC: 	calll	L72$pb
 ; DARWIN-32-PIC-NEXT: L72$pb:
 ; DARWIN-32-PIC-NEXT: 	popl	%eax
 ; DARWIN-32-PIC-NEXT: 	leal	_ddst-L72$pb(%eax), %eax
@@ -5169,7 +5169,7 @@ entry:
 ; DARWIN-32-DYNAMIC-NEXT: 	ret
 
 ; DARWIN-32-PIC: _bar05:
-; DARWIN-32-PIC: 	call	L73$pb
+; DARWIN-32-PIC: 	calll	L73$pb
 ; DARWIN-32-PIC-NEXT: L73$pb:
 ; DARWIN-32-PIC-NEXT: 	popl	%eax
 ; DARWIN-32-PIC-NEXT: 	leal	_dptr-L73$pb(%eax), %eax
@@ -5216,7 +5216,7 @@ entry:
 ; DARWIN-32-DYNAMIC-NEXT: 	ret
 
 ; DARWIN-32-PIC: _bar06:
-; DARWIN-32-PIC: 	call	L74$pb
+; DARWIN-32-PIC: 	calll	L74$pb
 ; DARWIN-32-PIC-NEXT: L74$pb:
 ; DARWIN-32-PIC-NEXT: 	popl	%eax
 ; DARWIN-32-PIC-NEXT: 	leal	_lsrc-L74$pb(%eax), %eax
@@ -5263,7 +5263,7 @@ entry:
 ; DARWIN-32-DYNAMIC-NEXT: 	ret
 
 ; DARWIN-32-PIC: _bar07:
-; DARWIN-32-PIC: 	call	L75$pb
+; DARWIN-32-PIC: 	calll	L75$pb
 ; DARWIN-32-PIC-NEXT: L75$pb:
 ; DARWIN-32-PIC-NEXT: 	popl	%eax
 ; DARWIN-32-PIC-NEXT: 	leal	_ldst-L75$pb(%eax), %eax
@@ -5310,7 +5310,7 @@ entry:
 ; DARWIN-32-DYNAMIC-NEXT: 	ret
 
 ; DARWIN-32-PIC: _bar08:
-; DARWIN-32-PIC: 	call	L76$pb
+; DARWIN-32-PIC: 	calll	L76$pb
 ; DARWIN-32-PIC-NEXT: L76$pb:
 ; DARWIN-32-PIC-NEXT: 	popl	%eax
 ; DARWIN-32-PIC-NEXT: 	leal	_lptr-L76$pb(%eax), %eax
@@ -5357,7 +5357,7 @@ entry:
 ; DARWIN-32-DYNAMIC-NEXT: 	ret
 
 ; DARWIN-32-PIC: _har00:
-; DARWIN-32-PIC: 	call	L77$pb
+; DARWIN-32-PIC: 	calll	L77$pb
 ; DARWIN-32-PIC-NEXT: L77$pb:
 ; DARWIN-32-PIC-NEXT: 	popl	%eax
 ; DARWIN-32-PIC-NEXT: 	movl	L_src$non_lazy_ptr-L77$pb(%eax), %eax
@@ -5404,7 +5404,7 @@ entry:
 ; DARWIN-32-DYNAMIC-NEXT: 	ret
 
 ; DARWIN-32-PIC: _hxr00:
-; DARWIN-32-PIC: 	call	L78$pb
+; DARWIN-32-PIC: 	calll	L78$pb
 ; DARWIN-32-PIC-NEXT: L78$pb:
 ; DARWIN-32-PIC-NEXT: 	popl	%eax
 ; DARWIN-32-PIC-NEXT: 	movl	L_xsrc$non_lazy_ptr-L78$pb(%eax), %eax
@@ -5451,7 +5451,7 @@ entry:
 ; DARWIN-32-DYNAMIC-NEXT: 	ret
 
 ; DARWIN-32-PIC: _har01:
-; DARWIN-32-PIC: 	call	L79$pb
+; DARWIN-32-PIC: 	calll	L79$pb
 ; DARWIN-32-PIC-NEXT: L79$pb:
 ; DARWIN-32-PIC-NEXT: 	popl	%eax
 ; DARWIN-32-PIC-NEXT: 	movl	L_dst$non_lazy_ptr-L79$pb(%eax), %eax
@@ -5498,7 +5498,7 @@ entry:
 ; DARWIN-32-DYNAMIC-NEXT: 	ret
 
 ; DARWIN-32-PIC: _hxr01:
-; DARWIN-32-PIC: 	call	L80$pb
+; DARWIN-32-PIC: 	calll	L80$pb
 ; DARWIN-32-PIC-NEXT: L80$pb:
 ; DARWIN-32-PIC-NEXT: 	popl	%eax
 ; DARWIN-32-PIC-NEXT: 	movl	L_xdst$non_lazy_ptr-L80$pb(%eax), %eax
@@ -5549,7 +5549,7 @@ entry:
 ; DARWIN-32-DYNAMIC-NEXT: 	ret
 
 ; DARWIN-32-PIC: _har02:
-; DARWIN-32-PIC: 	call	L81$pb
+; DARWIN-32-PIC: 	calll	L81$pb
 ; DARWIN-32-PIC-NEXT: L81$pb:
 ; DARWIN-32-PIC-NEXT: 	popl	%eax
 ; DARWIN-32-PIC-NEXT: 	movl	L_ptr$non_lazy_ptr-L81$pb(%eax), %eax
@@ -5600,7 +5600,7 @@ entry:
 ; DARWIN-32-DYNAMIC-NEXT: 	ret
 
 ; DARWIN-32-PIC: _har03:
-; DARWIN-32-PIC: 	call	L82$pb
+; DARWIN-32-PIC: 	calll	L82$pb
 ; DARWIN-32-PIC-NEXT: L82$pb:
 ; DARWIN-32-PIC-NEXT: 	popl	%eax
 ; DARWIN-32-PIC-NEXT: 	leal	_dsrc-L82$pb(%eax), %eax
@@ -5647,7 +5647,7 @@ entry:
 ; DARWIN-32-DYNAMIC-NEXT: 	ret
 
 ; DARWIN-32-PIC: _har04:
-; DARWIN-32-PIC: 	call	L83$pb
+; DARWIN-32-PIC: 	calll	L83$pb
 ; DARWIN-32-PIC-NEXT: L83$pb:
 ; DARWIN-32-PIC-NEXT: 	popl	%eax
 ; DARWIN-32-PIC-NEXT: 	leal	_ddst-L83$pb(%eax), %eax
@@ -5697,7 +5697,7 @@ entry:
 ; DARWIN-32-DYNAMIC-NEXT: 	ret
 
 ; DARWIN-32-PIC: _har05:
-; DARWIN-32-PIC: 	call	L84$pb
+; DARWIN-32-PIC: 	calll	L84$pb
 ; DARWIN-32-PIC-NEXT: L84$pb:
 ; DARWIN-32-PIC-NEXT: 	popl	%eax
 ; DARWIN-32-PIC-NEXT: 	movl	_dptr-L84$pb(%eax), %eax
@@ -5744,7 +5744,7 @@ entry:
 ; DARWIN-32-DYNAMIC-NEXT: 	ret
 
 ; DARWIN-32-PIC: _har06:
-; DARWIN-32-PIC: 	call	L85$pb
+; DARWIN-32-PIC: 	calll	L85$pb
 ; DARWIN-32-PIC-NEXT: L85$pb:
 ; DARWIN-32-PIC-NEXT: 	popl	%eax
 ; DARWIN-32-PIC-NEXT: 	leal	_lsrc-L85$pb(%eax), %eax
@@ -5791,7 +5791,7 @@ entry:
 ; DARWIN-32-DYNAMIC-NEXT: 	ret
 
 ; DARWIN-32-PIC: _har07:
-; DARWIN-32-PIC: 	call	L86$pb
+; DARWIN-32-PIC: 	calll	L86$pb
 ; DARWIN-32-PIC-NEXT: L86$pb:
 ; DARWIN-32-PIC-NEXT: 	popl	%eax
 ; DARWIN-32-PIC-NEXT: 	leal	_ldst-L86$pb(%eax), %eax
@@ -5840,7 +5840,7 @@ entry:
 ; DARWIN-32-DYNAMIC-NEXT: 	ret
 
 ; DARWIN-32-PIC: _har08:
-; DARWIN-32-PIC: 	call	L87$pb
+; DARWIN-32-PIC: 	calll	L87$pb
 ; DARWIN-32-PIC-NEXT: L87$pb:
 ; DARWIN-32-PIC-NEXT: 	popl	%eax
 ; DARWIN-32-PIC-NEXT: 	movl	_lptr-L87$pb(%eax), %eax
@@ -5889,7 +5889,7 @@ entry:
 ; DARWIN-32-DYNAMIC-NEXT: 	ret
 
 ; DARWIN-32-PIC: _bat00:
-; DARWIN-32-PIC: 	call	L88$pb
+; DARWIN-32-PIC: 	calll	L88$pb
 ; DARWIN-32-PIC-NEXT: L88$pb:
 ; DARWIN-32-PIC-NEXT: 	popl	%eax
 ; DARWIN-32-PIC-NEXT: 	movl	L_src$non_lazy_ptr-L88$pb(%eax), %eax
@@ -5942,7 +5942,7 @@ entry:
 ; DARWIN-32-DYNAMIC-NEXT: 	ret
 
 ; DARWIN-32-PIC: _bxt00:
-; DARWIN-32-PIC: 	call	L89$pb
+; DARWIN-32-PIC: 	calll	L89$pb
 ; DARWIN-32-PIC-NEXT: L89$pb:
 ; DARWIN-32-PIC-NEXT: 	popl	%eax
 ; DARWIN-32-PIC-NEXT: 	movl	L_xsrc$non_lazy_ptr-L89$pb(%eax), %eax
@@ -5995,7 +5995,7 @@ entry:
 ; DARWIN-32-DYNAMIC-NEXT: 	ret
 
 ; DARWIN-32-PIC: _bat01:
-; DARWIN-32-PIC: 	call	L90$pb
+; DARWIN-32-PIC: 	calll	L90$pb
 ; DARWIN-32-PIC-NEXT: L90$pb:
 ; DARWIN-32-PIC-NEXT: 	popl	%eax
 ; DARWIN-32-PIC-NEXT: 	movl	L_dst$non_lazy_ptr-L90$pb(%eax), %eax
@@ -6048,7 +6048,7 @@ entry:
 ; DARWIN-32-DYNAMIC-NEXT: 	ret
 
 ; DARWIN-32-PIC: _bxt01:
-; DARWIN-32-PIC: 	call	L91$pb
+; DARWIN-32-PIC: 	calll	L91$pb
 ; DARWIN-32-PIC-NEXT: L91$pb:
 ; DARWIN-32-PIC-NEXT: 	popl	%eax
 ; DARWIN-32-PIC-NEXT: 	movl	L_xdst$non_lazy_ptr-L91$pb(%eax), %eax
@@ -6110,7 +6110,7 @@ entry:
 ; DARWIN-32-DYNAMIC-NEXT: 	ret
 
 ; DARWIN-32-PIC: _bat02:
-; DARWIN-32-PIC: 	call	L92$pb
+; DARWIN-32-PIC: 	calll	L92$pb
 ; DARWIN-32-PIC-NEXT: L92$pb:
 ; DARWIN-32-PIC-NEXT: 	popl	%eax
 ; DARWIN-32-PIC-NEXT: 	movl	L_ptr$non_lazy_ptr-L92$pb(%eax), %eax
@@ -6166,7 +6166,7 @@ entry:
 ; DARWIN-32-DYNAMIC-NEXT: 	ret
 
 ; DARWIN-32-PIC: _bat03:
-; DARWIN-32-PIC: 	call	L93$pb
+; DARWIN-32-PIC: 	calll	L93$pb
 ; DARWIN-32-PIC-NEXT: L93$pb:
 ; DARWIN-32-PIC-NEXT: 	popl	%eax
 ; DARWIN-32-PIC-NEXT: 	leal	(_dsrc-L93$pb)+64(%eax), %eax
@@ -6214,7 +6214,7 @@ entry:
 ; DARWIN-32-DYNAMIC-NEXT: 	ret
 
 ; DARWIN-32-PIC: _bat04:
-; DARWIN-32-PIC: 	call	L94$pb
+; DARWIN-32-PIC: 	calll	L94$pb
 ; DARWIN-32-PIC-NEXT: L94$pb:
 ; DARWIN-32-PIC-NEXT: 	popl	%eax
 ; DARWIN-32-PIC-NEXT: 	leal	(_ddst-L94$pb)+64(%eax), %eax
@@ -6271,7 +6271,7 @@ entry:
 ; DARWIN-32-DYNAMIC-NEXT: 	ret
 
 ; DARWIN-32-PIC: _bat05:
-; DARWIN-32-PIC: 	call	L95$pb
+; DARWIN-32-PIC: 	calll	L95$pb
 ; DARWIN-32-PIC-NEXT: L95$pb:
 ; DARWIN-32-PIC-NEXT: 	popl	%eax
 ; DARWIN-32-PIC-NEXT: 	movl	_dptr-L95$pb(%eax), %eax
@@ -6322,7 +6322,7 @@ entry:
 ; DARWIN-32-DYNAMIC-NEXT: 	ret
 
 ; DARWIN-32-PIC: _bat06:
-; DARWIN-32-PIC: 	call	L96$pb
+; DARWIN-32-PIC: 	calll	L96$pb
 ; DARWIN-32-PIC-NEXT: L96$pb:
 ; DARWIN-32-PIC-NEXT: 	popl	%eax
 ; DARWIN-32-PIC-NEXT: 	leal	(_lsrc-L96$pb)+64(%eax), %eax
@@ -6369,7 +6369,7 @@ entry:
 ; DARWIN-32-DYNAMIC-NEXT: 	ret
 
 ; DARWIN-32-PIC: _bat07:
-; DARWIN-32-PIC: 	call	L97$pb
+; DARWIN-32-PIC: 	calll	L97$pb
 ; DARWIN-32-PIC-NEXT: L97$pb:
 ; DARWIN-32-PIC-NEXT: 	popl	%eax
 ; DARWIN-32-PIC-NEXT: 	leal	(_ldst-L97$pb)+64(%eax), %eax
@@ -6425,7 +6425,7 @@ entry:
 ; DARWIN-32-DYNAMIC-NEXT: 	ret
 
 ; DARWIN-32-PIC: _bat08:
-; DARWIN-32-PIC: 	call	L98$pb
+; DARWIN-32-PIC: 	calll	L98$pb
 ; DARWIN-32-PIC-NEXT: L98$pb:
 ; DARWIN-32-PIC-NEXT: 	popl	%eax
 ; DARWIN-32-PIC-NEXT: 	movl	_lptr-L98$pb(%eax), %eax
@@ -6478,7 +6478,7 @@ entry:
 ; DARWIN-32-DYNAMIC-NEXT: 	ret
 
 ; DARWIN-32-PIC: _bam00:
-; DARWIN-32-PIC: 	call	L99$pb
+; DARWIN-32-PIC: 	calll	L99$pb
 ; DARWIN-32-PIC-NEXT: L99$pb:
 ; DARWIN-32-PIC-NEXT: 	popl	%ecx
 ; DARWIN-32-PIC-NEXT: 	movl	$262144, %eax
@@ -6531,7 +6531,7 @@ entry:
 ; DARWIN-32-DYNAMIC-NEXT: 	ret
 
 ; DARWIN-32-PIC: _bam01:
-; DARWIN-32-PIC: 	call	L100$pb
+; DARWIN-32-PIC: 	calll	L100$pb
 ; DARWIN-32-PIC-NEXT: L100$pb:
 ; DARWIN-32-PIC-NEXT: 	popl	%ecx
 ; DARWIN-32-PIC-NEXT: 	movl	$262144, %eax
@@ -6584,7 +6584,7 @@ entry:
 ; DARWIN-32-DYNAMIC-NEXT: 	ret
 
 ; DARWIN-32-PIC: _bxm01:
-; DARWIN-32-PIC: 	call	L101$pb
+; DARWIN-32-PIC: 	calll	L101$pb
 ; DARWIN-32-PIC-NEXT: L101$pb:
 ; DARWIN-32-PIC-NEXT: 	popl	%ecx
 ; DARWIN-32-PIC-NEXT: 	movl	$262144, %eax
@@ -6646,7 +6646,7 @@ entry:
 ; DARWIN-32-DYNAMIC-NEXT: 	ret
 
 ; DARWIN-32-PIC: _bam02:
-; DARWIN-32-PIC: 	call	L102$pb
+; DARWIN-32-PIC: 	calll	L102$pb
 ; DARWIN-32-PIC-NEXT: L102$pb:
 ; DARWIN-32-PIC-NEXT: 	popl	%eax
 ; DARWIN-32-PIC-NEXT: 	movl	L_ptr$non_lazy_ptr-L102$pb(%eax), %ecx
@@ -6702,7 +6702,7 @@ entry:
 ; DARWIN-32-DYNAMIC-NEXT: 	ret
 
 ; DARWIN-32-PIC: _bam03:
-; DARWIN-32-PIC: 	call	L103$pb
+; DARWIN-32-PIC: 	calll	L103$pb
 ; DARWIN-32-PIC-NEXT: L103$pb:
 ; DARWIN-32-PIC-NEXT: 	popl	%eax
 ; DARWIN-32-PIC-NEXT: 	leal	(_dsrc-L103$pb)+262144(%eax), %eax
@@ -6750,7 +6750,7 @@ entry:
 ; DARWIN-32-DYNAMIC-NEXT: 	ret
 
 ; DARWIN-32-PIC: _bam04:
-; DARWIN-32-PIC: 	call	L104$pb
+; DARWIN-32-PIC: 	calll	L104$pb
 ; DARWIN-32-PIC-NEXT: L104$pb:
 ; DARWIN-32-PIC-NEXT: 	popl	%eax
 ; DARWIN-32-PIC-NEXT: 	leal	(_ddst-L104$pb)+262144(%eax), %eax
@@ -6807,7 +6807,7 @@ entry:
 ; DARWIN-32-DYNAMIC-NEXT: 	ret
 
 ; DARWIN-32-PIC: _bam05:
-; DARWIN-32-PIC: 	call	L105$pb
+; DARWIN-32-PIC: 	calll	L105$pb
 ; DARWIN-32-PIC-NEXT: L105$pb:
 ; DARWIN-32-PIC-NEXT: 	popl	%ecx
 ; DARWIN-32-PIC-NEXT: 	movl	$262144, %eax
@@ -6858,7 +6858,7 @@ entry:
 ; DARWIN-32-DYNAMIC-NEXT: 	ret
 
 ; DARWIN-32-PIC: _bam06:
-; DARWIN-32-PIC: 	call	L106$pb
+; DARWIN-32-PIC: 	calll	L106$pb
 ; DARWIN-32-PIC-NEXT: L106$pb:
 ; DARWIN-32-PIC-NEXT: 	popl	%eax
 ; DARWIN-32-PIC-NEXT: 	leal	(_lsrc-L106$pb)+262144(%eax), %eax
@@ -6905,7 +6905,7 @@ entry:
 ; DARWIN-32-DYNAMIC-NEXT: 	ret
 
 ; DARWIN-32-PIC: _bam07:
-; DARWIN-32-PIC: 	call	L107$pb
+; DARWIN-32-PIC: 	calll	L107$pb
 ; DARWIN-32-PIC-NEXT: L107$pb:
 ; DARWIN-32-PIC-NEXT: 	popl	%eax
 ; DARWIN-32-PIC-NEXT: 	leal	(_ldst-L107$pb)+262144(%eax), %eax
@@ -6961,7 +6961,7 @@ entry:
 ; DARWIN-32-DYNAMIC-NEXT: 	ret
 
 ; DARWIN-32-PIC: _bam08:
-; DARWIN-32-PIC: 	call	L108$pb
+; DARWIN-32-PIC: 	calll	L108$pb
 ; DARWIN-32-PIC-NEXT: L108$pb:
 ; DARWIN-32-PIC-NEXT: 	popl	%ecx
 ; DARWIN-32-PIC-NEXT: 	movl	$262144, %eax
@@ -7021,7 +7021,7 @@ entry:
 ; DARWIN-32-DYNAMIC-NEXT: 	ret
 
 ; DARWIN-32-PIC: _cat00:
-; DARWIN-32-PIC: 	call	L109$pb
+; DARWIN-32-PIC: 	calll	L109$pb
 ; DARWIN-32-PIC-NEXT: L109$pb:
 ; DARWIN-32-PIC-NEXT: 	popl	%eax
 ; DARWIN-32-PIC-NEXT: 	movl	4(%esp), %ecx
@@ -7082,7 +7082,7 @@ entry:
 ; DARWIN-32-DYNAMIC-NEXT: 	ret
 
 ; DARWIN-32-PIC: _cxt00:
-; DARWIN-32-PIC: 	call	L110$pb
+; DARWIN-32-PIC: 	calll	L110$pb
 ; DARWIN-32-PIC-NEXT: L110$pb:
 ; DARWIN-32-PIC-NEXT: 	popl	%eax
 ; DARWIN-32-PIC-NEXT: 	movl	4(%esp), %ecx
@@ -7143,7 +7143,7 @@ entry:
 ; DARWIN-32-DYNAMIC-NEXT: 	ret
 
 ; DARWIN-32-PIC: _cat01:
-; DARWIN-32-PIC: 	call	L111$pb
+; DARWIN-32-PIC: 	calll	L111$pb
 ; DARWIN-32-PIC-NEXT: L111$pb:
 ; DARWIN-32-PIC-NEXT: 	popl	%eax
 ; DARWIN-32-PIC-NEXT: 	movl	4(%esp), %ecx
@@ -7204,7 +7204,7 @@ entry:
 ; DARWIN-32-DYNAMIC-NEXT: 	ret
 
 ; DARWIN-32-PIC: _cxt01:
-; DARWIN-32-PIC: 	call	L112$pb
+; DARWIN-32-PIC: 	calll	L112$pb
 ; DARWIN-32-PIC-NEXT: L112$pb:
 ; DARWIN-32-PIC-NEXT: 	popl	%eax
 ; DARWIN-32-PIC-NEXT: 	movl	4(%esp), %ecx
@@ -7272,7 +7272,7 @@ entry:
 ; DARWIN-32-DYNAMIC-NEXT: 	ret
 
 ; DARWIN-32-PIC: _cat02:
-; DARWIN-32-PIC: 	call	L113$pb
+; DARWIN-32-PIC: 	calll	L113$pb
 ; DARWIN-32-PIC-NEXT: L113$pb:
 ; DARWIN-32-PIC-NEXT: 	popl	%eax
 ; DARWIN-32-PIC-NEXT: 	movl	L_ptr$non_lazy_ptr-L113$pb(%eax), %eax
@@ -7336,7 +7336,7 @@ entry:
 ; DARWIN-32-DYNAMIC-NEXT: 	ret
 
 ; DARWIN-32-PIC: _cat03:
-; DARWIN-32-PIC: 	call	L114$pb
+; DARWIN-32-PIC: 	calll	L114$pb
 ; DARWIN-32-PIC-NEXT: L114$pb:
 ; DARWIN-32-PIC-NEXT: 	popl	%eax
 ; DARWIN-32-PIC-NEXT: 	movl	4(%esp), %ecx
@@ -7395,7 +7395,7 @@ entry:
 ; DARWIN-32-DYNAMIC-NEXT: 	ret
 
 ; DARWIN-32-PIC: _cat04:
-; DARWIN-32-PIC: 	call	L115$pb
+; DARWIN-32-PIC: 	calll	L115$pb
 ; DARWIN-32-PIC-NEXT: L115$pb:
 ; DARWIN-32-PIC-NEXT: 	popl	%eax
 ; DARWIN-32-PIC-NEXT: 	movl	4(%esp), %ecx
@@ -7461,7 +7461,7 @@ entry:
 ; DARWIN-32-DYNAMIC-NEXT: 	ret
 
 ; DARWIN-32-PIC: _cat05:
-; DARWIN-32-PIC: 	call	L116$pb
+; DARWIN-32-PIC: 	calll	L116$pb
 ; DARWIN-32-PIC-NEXT: L116$pb:
 ; DARWIN-32-PIC-NEXT: 	popl	%eax
 ; DARWIN-32-PIC-NEXT: 	movl	4(%esp), %ecx
@@ -7521,7 +7521,7 @@ entry:
 ; DARWIN-32-DYNAMIC-NEXT: 	ret
 
 ; DARWIN-32-PIC: _cat06:
-; DARWIN-32-PIC: 	call	L117$pb
+; DARWIN-32-PIC: 	calll	L117$pb
 ; DARWIN-32-PIC-NEXT: L117$pb:
 ; DARWIN-32-PIC-NEXT: 	popl	%eax
 ; DARWIN-32-PIC-NEXT: 	movl	4(%esp), %ecx
@@ -7580,7 +7580,7 @@ entry:
 ; DARWIN-32-DYNAMIC-NEXT: 	ret
 
 ; DARWIN-32-PIC: _cat07:
-; DARWIN-32-PIC: 	call	L118$pb
+; DARWIN-32-PIC: 	calll	L118$pb
 ; DARWIN-32-PIC-NEXT: L118$pb:
 ; DARWIN-32-PIC-NEXT: 	popl	%eax
 ; DARWIN-32-PIC-NEXT: 	movl	4(%esp), %ecx
@@ -7645,7 +7645,7 @@ entry:
 ; DARWIN-32-DYNAMIC-NEXT: 	ret
 
 ; DARWIN-32-PIC: _cat08:
-; DARWIN-32-PIC: 	call	L119$pb
+; DARWIN-32-PIC: 	calll	L119$pb
 ; DARWIN-32-PIC-NEXT: L119$pb:
 ; DARWIN-32-PIC-NEXT: 	popl	%eax
 ; DARWIN-32-PIC-NEXT: 	movl	4(%esp), %ecx
@@ -7706,7 +7706,7 @@ entry:
 ; DARWIN-32-DYNAMIC-NEXT: 	ret
 
 ; DARWIN-32-PIC: _cam00:
-; DARWIN-32-PIC: 	call	L120$pb
+; DARWIN-32-PIC: 	calll	L120$pb
 ; DARWIN-32-PIC-NEXT: L120$pb:
 ; DARWIN-32-PIC-NEXT: 	popl	%eax
 ; DARWIN-32-PIC-NEXT: 	movl	4(%esp), %ecx
@@ -7767,7 +7767,7 @@ entry:
 ; DARWIN-32-DYNAMIC-NEXT: 	ret
 
 ; DARWIN-32-PIC: _cxm00:
-; DARWIN-32-PIC: 	call	L121$pb
+; DARWIN-32-PIC: 	calll	L121$pb
 ; DARWIN-32-PIC-NEXT: L121$pb:
 ; DARWIN-32-PIC-NEXT: 	popl	%eax
 ; DARWIN-32-PIC-NEXT: 	movl	4(%esp), %ecx
@@ -7828,7 +7828,7 @@ entry:
 ; DARWIN-32-DYNAMIC-NEXT: 	ret
 
 ; DARWIN-32-PIC: _cam01:
-; DARWIN-32-PIC: 	call	L122$pb
+; DARWIN-32-PIC: 	calll	L122$pb
 ; DARWIN-32-PIC-NEXT: L122$pb:
 ; DARWIN-32-PIC-NEXT: 	popl	%eax
 ; DARWIN-32-PIC-NEXT: 	movl	4(%esp), %ecx
@@ -7889,7 +7889,7 @@ entry:
 ; DARWIN-32-DYNAMIC-NEXT: 	ret
 
 ; DARWIN-32-PIC: _cxm01:
-; DARWIN-32-PIC: 	call	L123$pb
+; DARWIN-32-PIC: 	calll	L123$pb
 ; DARWIN-32-PIC-NEXT: L123$pb:
 ; DARWIN-32-PIC-NEXT: 	popl	%eax
 ; DARWIN-32-PIC-NEXT: 	movl	4(%esp), %ecx
@@ -7957,7 +7957,7 @@ entry:
 ; DARWIN-32-DYNAMIC-NEXT: 	ret
 
 ; DARWIN-32-PIC: _cam02:
-; DARWIN-32-PIC: 	call	L124$pb
+; DARWIN-32-PIC: 	calll	L124$pb
 ; DARWIN-32-PIC-NEXT: L124$pb:
 ; DARWIN-32-PIC-NEXT: 	popl	%eax
 ; DARWIN-32-PIC-NEXT: 	movl	L_ptr$non_lazy_ptr-L124$pb(%eax), %eax
@@ -8021,7 +8021,7 @@ entry:
 ; DARWIN-32-DYNAMIC-NEXT: 	ret
 
 ; DARWIN-32-PIC: _cam03:
-; DARWIN-32-PIC: 	call	L125$pb
+; DARWIN-32-PIC: 	calll	L125$pb
 ; DARWIN-32-PIC-NEXT: L125$pb:
 ; DARWIN-32-PIC-NEXT: 	popl	%eax
 ; DARWIN-32-PIC-NEXT: 	movl	4(%esp), %ecx
@@ -8080,7 +8080,7 @@ entry:
 ; DARWIN-32-DYNAMIC-NEXT: 	ret
 
 ; DARWIN-32-PIC: _cam04:
-; DARWIN-32-PIC: 	call	L126$pb
+; DARWIN-32-PIC: 	calll	L126$pb
 ; DARWIN-32-PIC-NEXT: L126$pb:
 ; DARWIN-32-PIC-NEXT: 	popl	%eax
 ; DARWIN-32-PIC-NEXT: 	movl	4(%esp), %ecx
@@ -8146,7 +8146,7 @@ entry:
 ; DARWIN-32-DYNAMIC-NEXT: 	ret
 
 ; DARWIN-32-PIC: _cam05:
-; DARWIN-32-PIC: 	call	L127$pb
+; DARWIN-32-PIC: 	calll	L127$pb
 ; DARWIN-32-PIC-NEXT: L127$pb:
 ; DARWIN-32-PIC-NEXT: 	popl	%eax
 ; DARWIN-32-PIC-NEXT: 	movl	4(%esp), %ecx
@@ -8206,7 +8206,7 @@ entry:
 ; DARWIN-32-DYNAMIC-NEXT: 	ret
 
 ; DARWIN-32-PIC: _cam06:
-; DARWIN-32-PIC: 	call	L128$pb
+; DARWIN-32-PIC: 	calll	L128$pb
 ; DARWIN-32-PIC-NEXT: L128$pb:
 ; DARWIN-32-PIC-NEXT: 	popl	%eax
 ; DARWIN-32-PIC-NEXT: 	movl	4(%esp), %ecx
@@ -8265,7 +8265,7 @@ entry:
 ; DARWIN-32-DYNAMIC-NEXT: 	ret
 
 ; DARWIN-32-PIC: _cam07:
-; DARWIN-32-PIC: 	call	L129$pb
+; DARWIN-32-PIC: 	calll	L129$pb
 ; DARWIN-32-PIC-NEXT: L129$pb:
 ; DARWIN-32-PIC-NEXT: 	popl	%eax
 ; DARWIN-32-PIC-NEXT: 	movl	4(%esp), %ecx
@@ -8330,7 +8330,7 @@ entry:
 ; DARWIN-32-DYNAMIC-NEXT: 	ret
 
 ; DARWIN-32-PIC: _cam08:
-; DARWIN-32-PIC: 	call	L130$pb
+; DARWIN-32-PIC: 	calll	L130$pb
 ; DARWIN-32-PIC-NEXT: L130$pb:
 ; DARWIN-32-PIC-NEXT: 	popl	%eax
 ; DARWIN-32-PIC-NEXT: 	movl	4(%esp), %ecx
@@ -8375,31 +8375,32 @@ entry:
 ; LINUX-64-STATIC: ret
 
 ; LINUX-32-STATIC: lcallee:
-; LINUX-32-STATIC: 	subl	$4, %esp
-; LINUX-32-STATIC-NEXT: 	call	x
-; LINUX-32-STATIC-NEXT: 	call	x
-; LINUX-32-STATIC-NEXT: 	call	x
-; LINUX-32-STATIC-NEXT: 	call	x
-; LINUX-32-STATIC-NEXT: 	call	x
-; LINUX-32-STATIC-NEXT: 	call	x
-; LINUX-32-STATIC-NEXT: 	call	x
-; LINUX-32-STATIC-NEXT: 	addl	$4, %esp
+; LINUX-32-STATIC: 	subl
+; LINUX-32-STATIC-NEXT: 	calll	x
+; LINUX-32-STATIC-NEXT: 	calll	x
+; LINUX-32-STATIC-NEXT: 	calll	x
+; LINUX-32-STATIC-NEXT: 	calll	x
+; LINUX-32-STATIC-NEXT: 	calll	x
+; LINUX-32-STATIC-NEXT: 	calll	x
+; LINUX-32-STATIC-NEXT: 	calll	x
+; LINUX-32-STATIC-NEXT: 	addl
 ; LINUX-32-STATIC-NEXT: 	ret
 
 ; LINUX-32-PIC: lcallee:
-; LINUX-32-PIC: 	subl	$4, %esp
-; LINUX-32-PIC-NEXT: 	call	x
-; LINUX-32-PIC-NEXT: 	call	x
-; LINUX-32-PIC-NEXT: 	call	x
-; LINUX-32-PIC-NEXT: 	call	x
-; LINUX-32-PIC-NEXT: 	call	x
-; LINUX-32-PIC-NEXT: 	call	x
-; LINUX-32-PIC-NEXT: 	call	x
-; LINUX-32-PIC-NEXT: 	addl	$4, %esp
+; LINUX-32-PIC: 	subl
+; LINUX-32-PIC-NEXT: 	calll	x
+; LINUX-32-PIC-NEXT: 	calll	x
+; LINUX-32-PIC-NEXT: 	calll	x
+; LINUX-32-PIC-NEXT: 	calll	x
+; LINUX-32-PIC-NEXT: 	calll	x
+; LINUX-32-PIC-NEXT: 	calll	x
+; LINUX-32-PIC-NEXT: 	calll	x
+; LINUX-32-PIC-NEXT: 	addl
+
 ; LINUX-32-PIC-NEXT: 	ret
 
 ; LINUX-64-PIC: lcallee:
-; LINUX-64-PIC: 	subq	$8, %rsp
+; LINUX-64-PIC: 	pushq
 ; LINUX-64-PIC-NEXT: 	callq	x@PLT
 ; LINUX-64-PIC-NEXT: 	callq	x@PLT
 ; LINUX-64-PIC-NEXT: 	callq	x@PLT
@@ -8407,47 +8408,47 @@ entry:
 ; LINUX-64-PIC-NEXT: 	callq	x@PLT
 ; LINUX-64-PIC-NEXT: 	callq	x@PLT
 ; LINUX-64-PIC-NEXT: 	callq	x@PLT
-; LINUX-64-PIC-NEXT: 	addq	$8, %rsp
+; LINUX-64-PIC-NEXT: 	popq
 ; LINUX-64-PIC-NEXT: 	ret
 
 ; DARWIN-32-STATIC: _lcallee:
 ; DARWIN-32-STATIC: 	subl	$12, %esp
-; DARWIN-32-STATIC-NEXT: 	call	_x
-; DARWIN-32-STATIC-NEXT: 	call	_x
-; DARWIN-32-STATIC-NEXT: 	call	_x
-; DARWIN-32-STATIC-NEXT: 	call	_x
-; DARWIN-32-STATIC-NEXT: 	call	_x
-; DARWIN-32-STATIC-NEXT: 	call	_x
-; DARWIN-32-STATIC-NEXT: 	call	_x
+; DARWIN-32-STATIC-NEXT: 	calll	_x
+; DARWIN-32-STATIC-NEXT: 	calll	_x
+; DARWIN-32-STATIC-NEXT: 	calll	_x
+; DARWIN-32-STATIC-NEXT: 	calll	_x
+; DARWIN-32-STATIC-NEXT: 	calll	_x
+; DARWIN-32-STATIC-NEXT: 	calll	_x
+; DARWIN-32-STATIC-NEXT: 	calll	_x
 ; DARWIN-32-STATIC-NEXT: 	addl	$12, %esp
 ; DARWIN-32-STATIC-NEXT: 	ret
 
 ; DARWIN-32-DYNAMIC: _lcallee:
 ; DARWIN-32-DYNAMIC: 	subl	$12, %esp
-; DARWIN-32-DYNAMIC-NEXT: 	call	L_x$stub
-; DARWIN-32-DYNAMIC-NEXT: 	call	L_x$stub
-; DARWIN-32-DYNAMIC-NEXT: 	call	L_x$stub
-; DARWIN-32-DYNAMIC-NEXT: 	call	L_x$stub
-; DARWIN-32-DYNAMIC-NEXT: 	call	L_x$stub
-; DARWIN-32-DYNAMIC-NEXT: 	call	L_x$stub
-; DARWIN-32-DYNAMIC-NEXT: 	call	L_x$stub
+; DARWIN-32-DYNAMIC-NEXT: 	calll	L_x$stub
+; DARWIN-32-DYNAMIC-NEXT: 	calll	L_x$stub
+; DARWIN-32-DYNAMIC-NEXT: 	calll	L_x$stub
+; DARWIN-32-DYNAMIC-NEXT: 	calll	L_x$stub
+; DARWIN-32-DYNAMIC-NEXT: 	calll	L_x$stub
+; DARWIN-32-DYNAMIC-NEXT: 	calll	L_x$stub
+; DARWIN-32-DYNAMIC-NEXT: 	calll	L_x$stub
 ; DARWIN-32-DYNAMIC-NEXT: 	addl	$12, %esp
 ; DARWIN-32-DYNAMIC-NEXT: 	ret
 
 ; DARWIN-32-PIC: _lcallee:
 ; DARWIN-32-PIC: 	subl	$12, %esp
-; DARWIN-32-PIC-NEXT: 	call	L_x$stub
-; DARWIN-32-PIC-NEXT: 	call	L_x$stub
-; DARWIN-32-PIC-NEXT: 	call	L_x$stub
-; DARWIN-32-PIC-NEXT: 	call	L_x$stub
-; DARWIN-32-PIC-NEXT: 	call	L_x$stub
-; DARWIN-32-PIC-NEXT: 	call	L_x$stub
-; DARWIN-32-PIC-NEXT: 	call	L_x$stub
+; DARWIN-32-PIC-NEXT: 	calll	L_x$stub
+; DARWIN-32-PIC-NEXT: 	calll	L_x$stub
+; DARWIN-32-PIC-NEXT: 	calll	L_x$stub
+; DARWIN-32-PIC-NEXT: 	calll	L_x$stub
+; DARWIN-32-PIC-NEXT: 	calll	L_x$stub
+; DARWIN-32-PIC-NEXT: 	calll	L_x$stub
+; DARWIN-32-PIC-NEXT: 	calll	L_x$stub
 ; DARWIN-32-PIC-NEXT: 	addl	$12, %esp
 ; DARWIN-32-PIC-NEXT: 	ret
 
 ; DARWIN-64-STATIC: _lcallee:
-; DARWIN-64-STATIC: 	subq	$8, %rsp
+; DARWIN-64-STATIC: 	pushq
 ; DARWIN-64-STATIC-NEXT: 	callq	_x
 ; DARWIN-64-STATIC-NEXT: 	callq	_x
 ; DARWIN-64-STATIC-NEXT: 	callq	_x
@@ -8455,11 +8456,11 @@ entry:
 ; DARWIN-64-STATIC-NEXT: 	callq	_x
 ; DARWIN-64-STATIC-NEXT: 	callq	_x
 ; DARWIN-64-STATIC-NEXT: 	callq	_x
-; DARWIN-64-STATIC-NEXT: 	addq	$8, %rsp
+; DARWIN-64-STATIC-NEXT: 	popq
 ; DARWIN-64-STATIC-NEXT: 	ret
 
 ; DARWIN-64-DYNAMIC: _lcallee:
-; DARWIN-64-DYNAMIC: 	subq	$8, %rsp
+; DARWIN-64-DYNAMIC: 	pushq
 ; DARWIN-64-DYNAMIC-NEXT: 	callq	_x
 ; DARWIN-64-DYNAMIC-NEXT: 	callq	_x
 ; DARWIN-64-DYNAMIC-NEXT: 	callq	_x
@@ -8467,11 +8468,11 @@ entry:
 ; DARWIN-64-DYNAMIC-NEXT: 	callq	_x
 ; DARWIN-64-DYNAMIC-NEXT: 	callq	_x
 ; DARWIN-64-DYNAMIC-NEXT: 	callq	_x
-; DARWIN-64-DYNAMIC-NEXT: 	addq	$8, %rsp
+; DARWIN-64-DYNAMIC-NEXT: 	popq
 ; DARWIN-64-DYNAMIC-NEXT: 	ret
 
 ; DARWIN-64-PIC: _lcallee:
-; DARWIN-64-PIC: 	subq	$8, %rsp
+; DARWIN-64-PIC: 	pushq
 ; DARWIN-64-PIC-NEXT: 	callq	_x
 ; DARWIN-64-PIC-NEXT: 	callq	_x
 ; DARWIN-64-PIC-NEXT: 	callq	_x
@@ -8479,7 +8480,7 @@ entry:
 ; DARWIN-64-PIC-NEXT: 	callq	_x
 ; DARWIN-64-PIC-NEXT: 	callq	_x
 ; DARWIN-64-PIC-NEXT: 	callq	_x
-; DARWIN-64-PIC-NEXT: 	addq	$8, %rsp
+; DARWIN-64-PIC-NEXT: 	popq
 ; DARWIN-64-PIC-NEXT: 	ret
 }
 
@@ -8506,31 +8507,32 @@ entry:
 ; LINUX-64-STATIC: ret
 
 ; LINUX-32-STATIC: dcallee:
-; LINUX-32-STATIC: 	subl	$4, %esp
-; LINUX-32-STATIC-NEXT: 	call	y
-; LINUX-32-STATIC-NEXT: 	call	y
-; LINUX-32-STATIC-NEXT: 	call	y
-; LINUX-32-STATIC-NEXT: 	call	y
-; LINUX-32-STATIC-NEXT: 	call	y
-; LINUX-32-STATIC-NEXT: 	call	y
-; LINUX-32-STATIC-NEXT: 	call	y
-; LINUX-32-STATIC-NEXT: 	addl	$4, %esp
+; LINUX-32-STATIC: 	subl
+; LINUX-32-STATIC-NEXT: 	calll	y
+; LINUX-32-STATIC-NEXT: 	calll	y
+; LINUX-32-STATIC-NEXT: 	calll	y
+; LINUX-32-STATIC-NEXT: 	calll	y
+; LINUX-32-STATIC-NEXT: 	calll	y
+; LINUX-32-STATIC-NEXT: 	calll	y
+; LINUX-32-STATIC-NEXT: 	calll	y
+; LINUX-32-STATIC-NEXT: 	addl
 ; LINUX-32-STATIC-NEXT: 	ret
 
 ; LINUX-32-PIC: dcallee:
-; LINUX-32-PIC: 	subl	$4, %esp
-; LINUX-32-PIC-NEXT: 	call	y
-; LINUX-32-PIC-NEXT: 	call	y
-; LINUX-32-PIC-NEXT: 	call	y
-; LINUX-32-PIC-NEXT: 	call	y
-; LINUX-32-PIC-NEXT: 	call	y
-; LINUX-32-PIC-NEXT: 	call	y
-; LINUX-32-PIC-NEXT: 	call	y
-; LINUX-32-PIC-NEXT: 	addl	$4, %esp
+; LINUX-32-PIC: 	subl
+; LINUX-32-PIC-NEXT: 	calll	y
+; LINUX-32-PIC-NEXT: 	calll	y
+; LINUX-32-PIC-NEXT: 	calll	y
+; LINUX-32-PIC-NEXT: 	calll	y
+; LINUX-32-PIC-NEXT: 	calll	y
+; LINUX-32-PIC-NEXT: 	calll	y
+; LINUX-32-PIC-NEXT: 	calll	y
+; LINUX-32-PIC-NEXT: 	addl
+
 ; LINUX-32-PIC-NEXT: 	ret
 
 ; LINUX-64-PIC: dcallee:
-; LINUX-64-PIC: 	subq	$8, %rsp
+; LINUX-64-PIC: 	pushq
 ; LINUX-64-PIC-NEXT: 	callq	y@PLT
 ; LINUX-64-PIC-NEXT: 	callq	y@PLT
 ; LINUX-64-PIC-NEXT: 	callq	y@PLT
@@ -8538,47 +8540,47 @@ entry:
 ; LINUX-64-PIC-NEXT: 	callq	y@PLT
 ; LINUX-64-PIC-NEXT: 	callq	y@PLT
 ; LINUX-64-PIC-NEXT: 	callq	y@PLT
-; LINUX-64-PIC-NEXT: 	addq	$8, %rsp
+; LINUX-64-PIC-NEXT: 	popq
 ; LINUX-64-PIC-NEXT: 	ret
 
 ; DARWIN-32-STATIC: _dcallee:
 ; DARWIN-32-STATIC: 	subl	$12, %esp
-; DARWIN-32-STATIC-NEXT: 	call	_y
-; DARWIN-32-STATIC-NEXT: 	call	_y
-; DARWIN-32-STATIC-NEXT: 	call	_y
-; DARWIN-32-STATIC-NEXT: 	call	_y
-; DARWIN-32-STATIC-NEXT: 	call	_y
-; DARWIN-32-STATIC-NEXT: 	call	_y
-; DARWIN-32-STATIC-NEXT: 	call	_y
+; DARWIN-32-STATIC-NEXT: 	calll	_y
+; DARWIN-32-STATIC-NEXT: 	calll	_y
+; DARWIN-32-STATIC-NEXT: 	calll	_y
+; DARWIN-32-STATIC-NEXT: 	calll	_y
+; DARWIN-32-STATIC-NEXT: 	calll	_y
+; DARWIN-32-STATIC-NEXT: 	calll	_y
+; DARWIN-32-STATIC-NEXT: 	calll	_y
 ; DARWIN-32-STATIC-NEXT: 	addl	$12, %esp
 ; DARWIN-32-STATIC-NEXT: 	ret
 
 ; DARWIN-32-DYNAMIC: _dcallee:
 ; DARWIN-32-DYNAMIC: 	subl	$12, %esp
-; DARWIN-32-DYNAMIC-NEXT: 	call	L_y$stub
-; DARWIN-32-DYNAMIC-NEXT: 	call	L_y$stub
-; DARWIN-32-DYNAMIC-NEXT: 	call	L_y$stub
-; DARWIN-32-DYNAMIC-NEXT: 	call	L_y$stub
-; DARWIN-32-DYNAMIC-NEXT: 	call	L_y$stub
-; DARWIN-32-DYNAMIC-NEXT: 	call	L_y$stub
-; DARWIN-32-DYNAMIC-NEXT: 	call	L_y$stub
+; DARWIN-32-DYNAMIC-NEXT: 	calll	L_y$stub
+; DARWIN-32-DYNAMIC-NEXT: 	calll	L_y$stub
+; DARWIN-32-DYNAMIC-NEXT: 	calll	L_y$stub
+; DARWIN-32-DYNAMIC-NEXT: 	calll	L_y$stub
+; DARWIN-32-DYNAMIC-NEXT: 	calll	L_y$stub
+; DARWIN-32-DYNAMIC-NEXT: 	calll	L_y$stub
+; DARWIN-32-DYNAMIC-NEXT: 	calll	L_y$stub
 ; DARWIN-32-DYNAMIC-NEXT: 	addl	$12, %esp
 ; DARWIN-32-DYNAMIC-NEXT: 	ret
 
 ; DARWIN-32-PIC: _dcallee:
 ; DARWIN-32-PIC: 	subl	$12, %esp
-; DARWIN-32-PIC-NEXT: 	call	L_y$stub
-; DARWIN-32-PIC-NEXT: 	call	L_y$stub
-; DARWIN-32-PIC-NEXT: 	call	L_y$stub
-; DARWIN-32-PIC-NEXT: 	call	L_y$stub
-; DARWIN-32-PIC-NEXT: 	call	L_y$stub
-; DARWIN-32-PIC-NEXT: 	call	L_y$stub
-; DARWIN-32-PIC-NEXT: 	call	L_y$stub
+; DARWIN-32-PIC-NEXT: 	calll	L_y$stub
+; DARWIN-32-PIC-NEXT: 	calll	L_y$stub
+; DARWIN-32-PIC-NEXT: 	calll	L_y$stub
+; DARWIN-32-PIC-NEXT: 	calll	L_y$stub
+; DARWIN-32-PIC-NEXT: 	calll	L_y$stub
+; DARWIN-32-PIC-NEXT: 	calll	L_y$stub
+; DARWIN-32-PIC-NEXT: 	calll	L_y$stub
 ; DARWIN-32-PIC-NEXT: 	addl	$12, %esp
 ; DARWIN-32-PIC-NEXT: 	ret
 
 ; DARWIN-64-STATIC: _dcallee:
-; DARWIN-64-STATIC: 	subq	$8, %rsp
+; DARWIN-64-STATIC: 	pushq
 ; DARWIN-64-STATIC-NEXT: 	callq	_y
 ; DARWIN-64-STATIC-NEXT: 	callq	_y
 ; DARWIN-64-STATIC-NEXT: 	callq	_y
@@ -8586,11 +8588,11 @@ entry:
 ; DARWIN-64-STATIC-NEXT: 	callq	_y
 ; DARWIN-64-STATIC-NEXT: 	callq	_y
 ; DARWIN-64-STATIC-NEXT: 	callq	_y
-; DARWIN-64-STATIC-NEXT: 	addq	$8, %rsp
+; DARWIN-64-STATIC-NEXT: 	popq
 ; DARWIN-64-STATIC-NEXT: 	ret
 
 ; DARWIN-64-DYNAMIC: _dcallee:
-; DARWIN-64-DYNAMIC: 	subq	$8, %rsp
+; DARWIN-64-DYNAMIC: 	pushq
 ; DARWIN-64-DYNAMIC-NEXT: 	callq	_y
 ; DARWIN-64-DYNAMIC-NEXT: 	callq	_y
 ; DARWIN-64-DYNAMIC-NEXT: 	callq	_y
@@ -8598,11 +8600,11 @@ entry:
 ; DARWIN-64-DYNAMIC-NEXT: 	callq	_y
 ; DARWIN-64-DYNAMIC-NEXT: 	callq	_y
 ; DARWIN-64-DYNAMIC-NEXT: 	callq	_y
-; DARWIN-64-DYNAMIC-NEXT: 	addq	$8, %rsp
+; DARWIN-64-DYNAMIC-NEXT: 	popq
 ; DARWIN-64-DYNAMIC-NEXT: 	ret
 
 ; DARWIN-64-PIC: _dcallee:
-; DARWIN-64-PIC: 	subq	$8, %rsp
+; DARWIN-64-PIC: 	pushq
 ; DARWIN-64-PIC-NEXT: 	callq	_y
 ; DARWIN-64-PIC-NEXT: 	callq	_y
 ; DARWIN-64-PIC-NEXT: 	callq	_y
@@ -8610,7 +8612,7 @@ entry:
 ; DARWIN-64-PIC-NEXT: 	callq	_y
 ; DARWIN-64-PIC-NEXT: 	callq	_y
 ; DARWIN-64-PIC-NEXT: 	callq	_y
-; DARWIN-64-PIC-NEXT: 	addq	$8, %rsp
+; DARWIN-64-PIC-NEXT: 	popq
 ; DARWIN-64-PIC-NEXT: 	ret
 }
 
@@ -8644,7 +8646,7 @@ entry:
 ; DARWIN-32-DYNAMIC-NEXT: 	ret
 
 ; DARWIN-32-PIC: _address:
-; DARWIN-32-PIC: 	call	L133$pb
+; DARWIN-32-PIC: 	calll	L133$pb
 ; DARWIN-32-PIC-NEXT: L133$pb:
 ; DARWIN-32-PIC-NEXT: 	popl	%eax
 ; DARWIN-32-PIC-NEXT: 	movl	L_callee$non_lazy_ptr-L133$pb(%eax), %eax
@@ -8693,7 +8695,7 @@ entry:
 ; DARWIN-32-DYNAMIC-NEXT: 	ret
 
 ; DARWIN-32-PIC: _laddress:
-; DARWIN-32-PIC: 	call	L134$pb
+; DARWIN-32-PIC: 	calll	L134$pb
 ; DARWIN-32-PIC-NEXT: L134$pb:
 ; DARWIN-32-PIC-NEXT: 	popl	%eax
 ; DARWIN-32-PIC-NEXT: 	leal	_lcallee-L134$pb(%eax), %eax
@@ -8740,7 +8742,7 @@ entry:
 ; DARWIN-32-DYNAMIC-NEXT: 	ret
 
 ; DARWIN-32-PIC: _daddress:
-; DARWIN-32-PIC: 	call	L135$pb
+; DARWIN-32-PIC: 	calll	L135$pb
 ; DARWIN-32-PIC-NEXT: L135$pb:
 ; DARWIN-32-PIC-NEXT: 	popl	%eax
 ; DARWIN-32-PIC-NEXT: 	leal	_dcallee-L135$pb(%eax), %eax
@@ -8770,66 +8772,67 @@ entry:
 ; LINUX-64-STATIC: ret
 
 ; LINUX-32-STATIC: caller:
-; LINUX-32-STATIC: 	subl	$4, %esp
-; LINUX-32-STATIC-NEXT: 	call	callee
-; LINUX-32-STATIC-NEXT: 	call	callee
-; LINUX-32-STATIC-NEXT: 	addl	$4, %esp
+; LINUX-32-STATIC: 	subl
+; LINUX-32-STATIC-NEXT: 	calll	callee
+; LINUX-32-STATIC-NEXT: 	calll	callee
+; LINUX-32-STATIC-NEXT: 	addl
 ; LINUX-32-STATIC-NEXT: 	ret
 
 ; LINUX-32-PIC: caller:
-; LINUX-32-PIC: 	subl	$4, %esp
-; LINUX-32-PIC-NEXT: 	call	callee
-; LINUX-32-PIC-NEXT: 	call	callee
-; LINUX-32-PIC-NEXT: 	addl	$4, %esp
+; LINUX-32-PIC: 	subl
+; LINUX-32-PIC-NEXT: 	calll	callee
+; LINUX-32-PIC-NEXT: 	calll	callee
+; LINUX-32-PIC-NEXT: 	addl
+
 ; LINUX-32-PIC-NEXT: 	ret
 
 ; LINUX-64-PIC: caller:
-; LINUX-64-PIC: 	subq	$8, %rsp
+; LINUX-64-PIC: 	pushq
 ; LINUX-64-PIC-NEXT: 	callq	callee@PLT
 ; LINUX-64-PIC-NEXT: 	callq	callee@PLT
-; LINUX-64-PIC-NEXT: 	addq	$8, %rsp
+; LINUX-64-PIC-NEXT: 	popq
 ; LINUX-64-PIC-NEXT: 	ret
 
 ; DARWIN-32-STATIC: _caller:
 ; DARWIN-32-STATIC: 	subl	$12, %esp
-; DARWIN-32-STATIC-NEXT: 	call	_callee
-; DARWIN-32-STATIC-NEXT: 	call	_callee
+; DARWIN-32-STATIC-NEXT: 	calll	_callee
+; DARWIN-32-STATIC-NEXT: 	calll	_callee
 ; DARWIN-32-STATIC-NEXT: 	addl	$12, %esp
 ; DARWIN-32-STATIC-NEXT: 	ret
 
 ; DARWIN-32-DYNAMIC: _caller:
 ; DARWIN-32-DYNAMIC: 	subl	$12, %esp
-; DARWIN-32-DYNAMIC-NEXT: 	call	L_callee$stub
-; DARWIN-32-DYNAMIC-NEXT: 	call	L_callee$stub
+; DARWIN-32-DYNAMIC-NEXT: 	calll	L_callee$stub
+; DARWIN-32-DYNAMIC-NEXT: 	calll	L_callee$stub
 ; DARWIN-32-DYNAMIC-NEXT: 	addl	$12, %esp
 ; DARWIN-32-DYNAMIC-NEXT: 	ret
 
 ; DARWIN-32-PIC: _caller:
 ; DARWIN-32-PIC: 	subl	$12, %esp
-; DARWIN-32-PIC-NEXT: 	call	L_callee$stub
-; DARWIN-32-PIC-NEXT: 	call	L_callee$stub
+; DARWIN-32-PIC-NEXT: 	calll	L_callee$stub
+; DARWIN-32-PIC-NEXT: 	calll	L_callee$stub
 ; DARWIN-32-PIC-NEXT: 	addl	$12, %esp
 ; DARWIN-32-PIC-NEXT: 	ret
 
 ; DARWIN-64-STATIC: _caller:
-; DARWIN-64-STATIC: 	subq	$8, %rsp
+; DARWIN-64-STATIC: 	pushq
 ; DARWIN-64-STATIC-NEXT: 	callq	_callee
 ; DARWIN-64-STATIC-NEXT: 	callq	_callee
-; DARWIN-64-STATIC-NEXT: 	addq	$8, %rsp
+; DARWIN-64-STATIC-NEXT: 	popq
 ; DARWIN-64-STATIC-NEXT: 	ret
 
 ; DARWIN-64-DYNAMIC: _caller:
-; DARWIN-64-DYNAMIC: 	subq	$8, %rsp
+; DARWIN-64-DYNAMIC: 	pushq
 ; DARWIN-64-DYNAMIC-NEXT: 	callq	_callee
 ; DARWIN-64-DYNAMIC-NEXT: 	callq	_callee
-; DARWIN-64-DYNAMIC-NEXT: 	addq	$8, %rsp
+; DARWIN-64-DYNAMIC-NEXT: 	popq
 ; DARWIN-64-DYNAMIC-NEXT: 	ret
 
 ; DARWIN-64-PIC: _caller:
-; DARWIN-64-PIC: 	subq	$8, %rsp
+; DARWIN-64-PIC: 	pushq
 ; DARWIN-64-PIC-NEXT: 	callq	_callee
 ; DARWIN-64-PIC-NEXT: 	callq	_callee
-; DARWIN-64-PIC-NEXT: 	addq	$8, %rsp
+; DARWIN-64-PIC-NEXT: 	popq
 ; DARWIN-64-PIC-NEXT: 	ret
 }
 
@@ -8844,66 +8847,67 @@ entry:
 ; LINUX-64-STATIC: ret
 
 ; LINUX-32-STATIC: dcaller:
-; LINUX-32-STATIC: 	subl	$4, %esp
-; LINUX-32-STATIC-NEXT: 	call	dcallee
-; LINUX-32-STATIC-NEXT: 	call	dcallee
-; LINUX-32-STATIC-NEXT: 	addl	$4, %esp
+; LINUX-32-STATIC: 	subl
+; LINUX-32-STATIC-NEXT: 	calll	dcallee
+; LINUX-32-STATIC-NEXT: 	calll	dcallee
+; LINUX-32-STATIC-NEXT: 	addl
 ; LINUX-32-STATIC-NEXT: 	ret
 
 ; LINUX-32-PIC: dcaller:
-; LINUX-32-PIC: 	subl	$4, %esp
-; LINUX-32-PIC-NEXT: 	call	dcallee
-; LINUX-32-PIC-NEXT: 	call	dcallee
-; LINUX-32-PIC-NEXT: 	addl	$4, %esp
+; LINUX-32-PIC: 	subl
+; LINUX-32-PIC-NEXT: 	calll	dcallee
+; LINUX-32-PIC-NEXT: 	calll	dcallee
+; LINUX-32-PIC-NEXT: 	addl
+
 ; LINUX-32-PIC-NEXT: 	ret
 
 ; LINUX-64-PIC: dcaller:
-; LINUX-64-PIC: 	subq	$8, %rsp
+; LINUX-64-PIC: 	pushq
 ; LINUX-64-PIC-NEXT: 	callq	dcallee
 ; LINUX-64-PIC-NEXT: 	callq	dcallee
-; LINUX-64-PIC-NEXT: 	addq	$8, %rsp
+; LINUX-64-PIC-NEXT: 	popq
 ; LINUX-64-PIC-NEXT: 	ret
 
 ; DARWIN-32-STATIC: _dcaller:
 ; DARWIN-32-STATIC: 	subl	$12, %esp
-; DARWIN-32-STATIC-NEXT: 	call	_dcallee
-; DARWIN-32-STATIC-NEXT: 	call	_dcallee
+; DARWIN-32-STATIC-NEXT: 	calll	_dcallee
+; DARWIN-32-STATIC-NEXT: 	calll	_dcallee
 ; DARWIN-32-STATIC-NEXT: 	addl	$12, %esp
 ; DARWIN-32-STATIC-NEXT: 	ret
 
 ; DARWIN-32-DYNAMIC: _dcaller:
 ; DARWIN-32-DYNAMIC: 	subl	$12, %esp
-; DARWIN-32-DYNAMIC-NEXT: 	call	_dcallee
-; DARWIN-32-DYNAMIC-NEXT: 	call	_dcallee
+; DARWIN-32-DYNAMIC-NEXT: 	calll	_dcallee
+; DARWIN-32-DYNAMIC-NEXT: 	calll	_dcallee
 ; DARWIN-32-DYNAMIC-NEXT: 	addl	$12, %esp
 ; DARWIN-32-DYNAMIC-NEXT: 	ret
 
 ; DARWIN-32-PIC: _dcaller:
 ; DARWIN-32-PIC: 	subl	$12, %esp
-; DARWIN-32-PIC-NEXT: 	call	_dcallee
-; DARWIN-32-PIC-NEXT: 	call	_dcallee
+; DARWIN-32-PIC-NEXT: 	calll	_dcallee
+; DARWIN-32-PIC-NEXT: 	calll	_dcallee
 ; DARWIN-32-PIC-NEXT: 	addl	$12, %esp
 ; DARWIN-32-PIC-NEXT: 	ret
 
 ; DARWIN-64-STATIC: _dcaller:
-; DARWIN-64-STATIC: 	subq	$8, %rsp
+; DARWIN-64-STATIC: 	pushq
 ; DARWIN-64-STATIC-NEXT: 	callq	_dcallee
 ; DARWIN-64-STATIC-NEXT: 	callq	_dcallee
-; DARWIN-64-STATIC-NEXT: 	addq	$8, %rsp
+; DARWIN-64-STATIC-NEXT: 	popq
 ; DARWIN-64-STATIC-NEXT: 	ret
 
 ; DARWIN-64-DYNAMIC: _dcaller:
-; DARWIN-64-DYNAMIC: 	subq	$8, %rsp
+; DARWIN-64-DYNAMIC: 	pushq
 ; DARWIN-64-DYNAMIC-NEXT: 	callq	_dcallee
 ; DARWIN-64-DYNAMIC-NEXT: 	callq	_dcallee
-; DARWIN-64-DYNAMIC-NEXT: 	addq	$8, %rsp
+; DARWIN-64-DYNAMIC-NEXT: 	popq
 ; DARWIN-64-DYNAMIC-NEXT: 	ret
 
 ; DARWIN-64-PIC: _dcaller:
-; DARWIN-64-PIC: 	subq	$8, %rsp
+; DARWIN-64-PIC: 	pushq
 ; DARWIN-64-PIC-NEXT: 	callq	_dcallee
 ; DARWIN-64-PIC-NEXT: 	callq	_dcallee
-; DARWIN-64-PIC-NEXT: 	addq	$8, %rsp
+; DARWIN-64-PIC-NEXT: 	popq
 ; DARWIN-64-PIC-NEXT: 	ret
 }
 
@@ -8918,66 +8922,67 @@ entry:
 ; LINUX-64-STATIC: ret
 
 ; LINUX-32-STATIC: lcaller:
-; LINUX-32-STATIC: 	subl	$4, %esp
-; LINUX-32-STATIC-NEXT: 	call	lcallee
-; LINUX-32-STATIC-NEXT: 	call	lcallee
-; LINUX-32-STATIC-NEXT: 	addl	$4, %esp
+; LINUX-32-STATIC: 	subl
+; LINUX-32-STATIC-NEXT: 	calll	lcallee
+; LINUX-32-STATIC-NEXT: 	calll	lcallee
+; LINUX-32-STATIC-NEXT: 	addl
 ; LINUX-32-STATIC-NEXT: 	ret
 
 ; LINUX-32-PIC: lcaller:
-; LINUX-32-PIC: 	subl	$4, %esp
-; LINUX-32-PIC-NEXT: 	call	lcallee
-; LINUX-32-PIC-NEXT: 	call	lcallee
-; LINUX-32-PIC-NEXT: 	addl	$4, %esp
+; LINUX-32-PIC: 	subl
+; LINUX-32-PIC-NEXT: 	calll	lcallee
+; LINUX-32-PIC-NEXT: 	calll	lcallee
+; LINUX-32-PIC-NEXT: 	addl
+
 ; LINUX-32-PIC-NEXT: 	ret
 
 ; LINUX-64-PIC: lcaller:
-; LINUX-64-PIC: 	subq	$8, %rsp
+; LINUX-64-PIC: 	pushq
 ; LINUX-64-PIC-NEXT: 	callq	lcallee@PLT
 ; LINUX-64-PIC-NEXT: 	callq	lcallee@PLT
-; LINUX-64-PIC-NEXT: 	addq	$8, %rsp
+; LINUX-64-PIC-NEXT: 	popq
 ; LINUX-64-PIC-NEXT: 	ret
 
 ; DARWIN-32-STATIC: _lcaller:
 ; DARWIN-32-STATIC: 	subl	$12, %esp
-; DARWIN-32-STATIC-NEXT: 	call	_lcallee
-; DARWIN-32-STATIC-NEXT: 	call	_lcallee
+; DARWIN-32-STATIC-NEXT: 	calll	_lcallee
+; DARWIN-32-STATIC-NEXT: 	calll	_lcallee
 ; DARWIN-32-STATIC-NEXT: 	addl	$12, %esp
 ; DARWIN-32-STATIC-NEXT: 	ret
 
 ; DARWIN-32-DYNAMIC: _lcaller:
 ; DARWIN-32-DYNAMIC: 	subl	$12, %esp
-; DARWIN-32-DYNAMIC-NEXT: 	call	_lcallee
-; DARWIN-32-DYNAMIC-NEXT: 	call	_lcallee
+; DARWIN-32-DYNAMIC-NEXT: 	calll	_lcallee
+; DARWIN-32-DYNAMIC-NEXT: 	calll	_lcallee
 ; DARWIN-32-DYNAMIC-NEXT: 	addl	$12, %esp
 ; DARWIN-32-DYNAMIC-NEXT: 	ret
 
 ; DARWIN-32-PIC: _lcaller:
 ; DARWIN-32-PIC: 	subl	$12, %esp
-; DARWIN-32-PIC-NEXT: 	call	_lcallee
-; DARWIN-32-PIC-NEXT: 	call	_lcallee
+; DARWIN-32-PIC-NEXT: 	calll	_lcallee
+; DARWIN-32-PIC-NEXT: 	calll	_lcallee
 ; DARWIN-32-PIC-NEXT: 	addl	$12, %esp
 ; DARWIN-32-PIC-NEXT: 	ret
 
 ; DARWIN-64-STATIC: _lcaller:
-; DARWIN-64-STATIC: 	subq	$8, %rsp
+; DARWIN-64-STATIC: 	pushq
 ; DARWIN-64-STATIC-NEXT: 	callq	_lcallee
 ; DARWIN-64-STATIC-NEXT: 	callq	_lcallee
-; DARWIN-64-STATIC-NEXT: 	addq	$8, %rsp
+; DARWIN-64-STATIC-NEXT: 	popq
 ; DARWIN-64-STATIC-NEXT: 	ret
 
 ; DARWIN-64-DYNAMIC: _lcaller:
-; DARWIN-64-DYNAMIC: 	subq	$8, %rsp
+; DARWIN-64-DYNAMIC: 	pushq
 ; DARWIN-64-DYNAMIC-NEXT: 	callq	_lcallee
 ; DARWIN-64-DYNAMIC-NEXT: 	callq	_lcallee
-; DARWIN-64-DYNAMIC-NEXT: 	addq	$8, %rsp
+; DARWIN-64-DYNAMIC-NEXT: 	popq
 ; DARWIN-64-DYNAMIC-NEXT: 	ret
 
 ; DARWIN-64-PIC: _lcaller:
-; DARWIN-64-PIC: 	subq	$8, %rsp
+; DARWIN-64-PIC: 	pushq
 ; DARWIN-64-PIC-NEXT: 	callq	_lcallee
 ; DARWIN-64-PIC-NEXT: 	callq	_lcallee
-; DARWIN-64-PIC-NEXT: 	addq	$8, %rsp
+; DARWIN-64-PIC-NEXT: 	popq
 ; DARWIN-64-PIC-NEXT: 	ret
 }
 
@@ -8990,57 +8995,58 @@ entry:
 ; LINUX-64-STATIC: ret
 
 ; LINUX-32-STATIC: tailcaller:
-; LINUX-32-STATIC: 	subl	$4, %esp
-; LINUX-32-STATIC-NEXT: 	call	callee
-; LINUX-32-STATIC-NEXT: 	addl	$4, %esp
+; LINUX-32-STATIC: 	subl
+; LINUX-32-STATIC-NEXT: 	calll	callee
+; LINUX-32-STATIC-NEXT: 	addl
 ; LINUX-32-STATIC-NEXT: 	ret
 
 ; LINUX-32-PIC: tailcaller:
-; LINUX-32-PIC: 	subl	$4, %esp
-; LINUX-32-PIC-NEXT: 	call	callee
-; LINUX-32-PIC-NEXT: 	addl	$4, %esp
+; LINUX-32-PIC: 	subl
+; LINUX-32-PIC-NEXT: 	calll	callee
+; LINUX-32-PIC-NEXT: 	addl
+
 ; LINUX-32-PIC-NEXT: 	ret
 
 ; LINUX-64-PIC: tailcaller:
-; LINUX-64-PIC: 	subq	$8, %rsp
+; LINUX-64-PIC: 	pushq
 ; LINUX-64-PIC-NEXT: 	callq	callee@PLT
-; LINUX-64-PIC-NEXT: 	addq	$8, %rsp
+; LINUX-64-PIC-NEXT: 	popq
 ; LINUX-64-PIC-NEXT: 	ret
 
 ; DARWIN-32-STATIC: _tailcaller:
 ; DARWIN-32-STATIC: 	subl	$12, %esp
-; DARWIN-32-STATIC-NEXT: 	call	_callee
+; DARWIN-32-STATIC-NEXT: 	calll	_callee
 ; DARWIN-32-STATIC-NEXT: 	addl	$12, %esp
 ; DARWIN-32-STATIC-NEXT: 	ret
 
 ; DARWIN-32-DYNAMIC: _tailcaller:
 ; DARWIN-32-DYNAMIC: 	subl	$12, %esp
-; DARWIN-32-DYNAMIC-NEXT: 	call	L_callee$stub
+; DARWIN-32-DYNAMIC-NEXT: 	calll	L_callee$stub
 ; DARWIN-32-DYNAMIC-NEXT: 	addl	$12, %esp
 ; DARWIN-32-DYNAMIC-NEXT: 	ret
 
 ; DARWIN-32-PIC: _tailcaller:
 ; DARWIN-32-PIC: 	subl	$12, %esp
-; DARWIN-32-PIC-NEXT: 	call	L_callee$stub
+; DARWIN-32-PIC-NEXT: 	calll	L_callee$stub
 ; DARWIN-32-PIC-NEXT: 	addl	$12, %esp
 ; DARWIN-32-PIC-NEXT: 	ret
 
 ; DARWIN-64-STATIC: _tailcaller:
-; DARWIN-64-STATIC: 	subq	$8, %rsp
+; DARWIN-64-STATIC: 	pushq
 ; DARWIN-64-STATIC-NEXT: 	callq	_callee
-; DARWIN-64-STATIC-NEXT: 	addq	$8, %rsp
+; DARWIN-64-STATIC-NEXT: 	popq
 ; DARWIN-64-STATIC-NEXT: 	ret
 
 ; DARWIN-64-DYNAMIC: _tailcaller:
-; DARWIN-64-DYNAMIC: 	subq	$8, %rsp
+; DARWIN-64-DYNAMIC: 	pushq
 ; DARWIN-64-DYNAMIC-NEXT: 	callq	_callee
-; DARWIN-64-DYNAMIC-NEXT: 	addq	$8, %rsp
+; DARWIN-64-DYNAMIC-NEXT: 	popq
 ; DARWIN-64-DYNAMIC-NEXT: 	ret
 
 ; DARWIN-64-PIC: _tailcaller:
-; DARWIN-64-PIC: 	subq	$8, %rsp
+; DARWIN-64-PIC: 	pushq
 ; DARWIN-64-PIC-NEXT: 	callq	_callee
-; DARWIN-64-PIC-NEXT: 	addq	$8, %rsp
+; DARWIN-64-PIC-NEXT: 	popq
 ; DARWIN-64-PIC-NEXT: 	ret
 }
 
@@ -9053,57 +9059,58 @@ entry:
 ; LINUX-64-STATIC: ret
 
 ; LINUX-32-STATIC: dtailcaller:
-; LINUX-32-STATIC: 	subl	$4, %esp
-; LINUX-32-STATIC-NEXT: 	call	dcallee
-; LINUX-32-STATIC-NEXT: 	addl	$4, %esp
+; LINUX-32-STATIC: 	subl
+; LINUX-32-STATIC-NEXT: 	calll	dcallee
+; LINUX-32-STATIC-NEXT: 	addl
 ; LINUX-32-STATIC-NEXT: 	ret
 
 ; LINUX-32-PIC: dtailcaller:
-; LINUX-32-PIC: 	subl	$4, %esp
-; LINUX-32-PIC-NEXT: 	call	dcallee
-; LINUX-32-PIC-NEXT: 	addl	$4, %esp
+; LINUX-32-PIC: 	subl
+; LINUX-32-PIC-NEXT: 	calll	dcallee
+; LINUX-32-PIC-NEXT: 	addl
+
 ; LINUX-32-PIC-NEXT: 	ret
 
 ; LINUX-64-PIC: dtailcaller:
-; LINUX-64-PIC: 	subq	$8, %rsp
+; LINUX-64-PIC: 	pushq
 ; LINUX-64-PIC-NEXT: 	callq	dcallee
-; LINUX-64-PIC-NEXT: 	addq	$8, %rsp
+; LINUX-64-PIC-NEXT: 	popq
 ; LINUX-64-PIC-NEXT: 	ret
 
 ; DARWIN-32-STATIC: _dtailcaller:
 ; DARWIN-32-STATIC: 	subl	$12, %esp
-; DARWIN-32-STATIC-NEXT: 	call	_dcallee
+; DARWIN-32-STATIC-NEXT: 	calll	_dcallee
 ; DARWIN-32-STATIC-NEXT: 	addl	$12, %esp
 ; DARWIN-32-STATIC-NEXT: 	ret
 
 ; DARWIN-32-DYNAMIC: _dtailcaller:
 ; DARWIN-32-DYNAMIC: 	subl	$12, %esp
-; DARWIN-32-DYNAMIC-NEXT: 	call	_dcallee
+; DARWIN-32-DYNAMIC-NEXT: 	calll	_dcallee
 ; DARWIN-32-DYNAMIC-NEXT: 	addl	$12, %esp
 ; DARWIN-32-DYNAMIC-NEXT: 	ret
 
 ; DARWIN-32-PIC: _dtailcaller:
 ; DARWIN-32-PIC: 	subl	$12, %esp
-; DARWIN-32-PIC-NEXT: 	call	_dcallee
+; DARWIN-32-PIC-NEXT: 	calll	_dcallee
 ; DARWIN-32-PIC-NEXT: 	addl	$12, %esp
 ; DARWIN-32-PIC-NEXT: 	ret
 
 ; DARWIN-64-STATIC: _dtailcaller:
-; DARWIN-64-STATIC: 	subq	$8, %rsp
+; DARWIN-64-STATIC: 	pushq
 ; DARWIN-64-STATIC-NEXT: 	callq	_dcallee
-; DARWIN-64-STATIC-NEXT: 	addq	$8, %rsp
+; DARWIN-64-STATIC-NEXT: 	popq
 ; DARWIN-64-STATIC-NEXT: 	ret
 
 ; DARWIN-64-DYNAMIC: _dtailcaller:
-; DARWIN-64-DYNAMIC: 	subq	$8, %rsp
+; DARWIN-64-DYNAMIC: 	pushq
 ; DARWIN-64-DYNAMIC-NEXT: 	callq	_dcallee
-; DARWIN-64-DYNAMIC-NEXT: 	addq	$8, %rsp
+; DARWIN-64-DYNAMIC-NEXT: 	popq
 ; DARWIN-64-DYNAMIC-NEXT: 	ret
 
 ; DARWIN-64-PIC: _dtailcaller:
-; DARWIN-64-PIC: 	subq	$8, %rsp
+; DARWIN-64-PIC: 	pushq
 ; DARWIN-64-PIC-NEXT: 	callq	_dcallee
-; DARWIN-64-PIC-NEXT: 	addq	$8, %rsp
+; DARWIN-64-PIC-NEXT: 	popq
 ; DARWIN-64-PIC-NEXT: 	ret
 }
 
@@ -9116,57 +9123,58 @@ entry:
 ; LINUX-64-STATIC: ret
 
 ; LINUX-32-STATIC: ltailcaller:
-; LINUX-32-STATIC: 	subl	$4, %esp
-; LINUX-32-STATIC-NEXT: 	call	lcallee
-; LINUX-32-STATIC-NEXT: 	addl	$4, %esp
+; LINUX-32-STATIC: 	subl
+; LINUX-32-STATIC-NEXT: 	calll	lcallee
+; LINUX-32-STATIC-NEXT: 	addl
 ; LINUX-32-STATIC-NEXT: 	ret
 
 ; LINUX-32-PIC: ltailcaller:
-; LINUX-32-PIC: 	subl	$4, %esp
-; LINUX-32-PIC-NEXT: 	call	lcallee
-; LINUX-32-PIC-NEXT: 	addl	$4, %esp
+; LINUX-32-PIC: 	subl
+; LINUX-32-PIC-NEXT: 	calll	lcallee
+; LINUX-32-PIC-NEXT: 	addl
+
 ; LINUX-32-PIC-NEXT: 	ret
 
 ; LINUX-64-PIC: ltailcaller:
-; LINUX-64-PIC: 	subq	$8, %rsp
+; LINUX-64-PIC: 	pushq
 ; LINUX-64-PIC-NEXT: 	callq	lcallee@PLT
-; LINUX-64-PIC-NEXT: 	addq	$8, %rsp
+; LINUX-64-PIC-NEXT: 	popq
 ; LINUX-64-PIC-NEXT: 	ret
 
 ; DARWIN-32-STATIC: _ltailcaller:
 ; DARWIN-32-STATIC: 	subl	$12, %esp
-; DARWIN-32-STATIC-NEXT: 	call	_lcallee
+; DARWIN-32-STATIC-NEXT: 	calll	_lcallee
 ; DARWIN-32-STATIC-NEXT: 	addl	$12, %esp
 ; DARWIN-32-STATIC-NEXT: 	ret
 
 ; DARWIN-32-DYNAMIC: _ltailcaller:
 ; DARWIN-32-DYNAMIC: 	subl	$12, %esp
-; DARWIN-32-DYNAMIC-NEXT: 	call	_lcallee
+; DARWIN-32-DYNAMIC-NEXT: 	calll	_lcallee
 ; DARWIN-32-DYNAMIC-NEXT: 	addl	$12, %esp
 ; DARWIN-32-DYNAMIC-NEXT: 	ret
 
 ; DARWIN-32-PIC: _ltailcaller:
 ; DARWIN-32-PIC: 	subl	$12, %esp
-; DARWIN-32-PIC-NEXT: 	call	_lcallee
+; DARWIN-32-PIC-NEXT: 	calll	_lcallee
 ; DARWIN-32-PIC-NEXT: 	addl	$12, %esp
 ; DARWIN-32-PIC-NEXT: 	ret
 
 ; DARWIN-64-STATIC: _ltailcaller:
-; DARWIN-64-STATIC: 	subq	$8, %rsp
+; DARWIN-64-STATIC: 	pushq
 ; DARWIN-64-STATIC-NEXT: 	callq	_lcallee
-; DARWIN-64-STATIC-NEXT: 	addq	$8, %rsp
+; DARWIN-64-STATIC-NEXT: 	popq
 ; DARWIN-64-STATIC-NEXT: 	ret
 
 ; DARWIN-64-DYNAMIC: _ltailcaller:
-; DARWIN-64-DYNAMIC: 	subq	$8, %rsp
+; DARWIN-64-DYNAMIC: 	pushq
 ; DARWIN-64-DYNAMIC-NEXT: 	callq	_lcallee
-; DARWIN-64-DYNAMIC-NEXT: 	addq	$8, %rsp
+; DARWIN-64-DYNAMIC-NEXT: 	popq
 ; DARWIN-64-DYNAMIC-NEXT: 	ret
 
 ; DARWIN-64-PIC: _ltailcaller:
-; DARWIN-64-PIC: 	subq	$8, %rsp
+; DARWIN-64-PIC: 	pushq
 ; DARWIN-64-PIC-NEXT: 	callq	_lcallee
-; DARWIN-64-PIC-NEXT: 	addq	$8, %rsp
+; DARWIN-64-PIC-NEXT: 	popq
 ; DARWIN-64-PIC-NEXT: 	ret
 }
 
@@ -9183,17 +9191,18 @@ entry:
 ; LINUX-64-STATIC: ret
 
 ; LINUX-32-STATIC: icaller:
-; LINUX-32-STATIC: 	subl	$4, %esp
-; LINUX-32-STATIC-NEXT: 	call	*ifunc
-; LINUX-32-STATIC-NEXT: 	call	*ifunc
-; LINUX-32-STATIC-NEXT: 	addl	$4, %esp
+; LINUX-32-STATIC: 	subl
+; LINUX-32-STATIC-NEXT: 	calll	*ifunc
+; LINUX-32-STATIC-NEXT: 	calll	*ifunc
+; LINUX-32-STATIC-NEXT: 	addl
 ; LINUX-32-STATIC-NEXT: 	ret
 
 ; LINUX-32-PIC: icaller:
-; LINUX-32-PIC: 	subl	$4, %esp
-; LINUX-32-PIC-NEXT: 	call	*ifunc
-; LINUX-32-PIC-NEXT: 	call	*ifunc
-; LINUX-32-PIC-NEXT: 	addl	$4, %esp
+; LINUX-32-PIC: 	subl
+; LINUX-32-PIC-NEXT: 	calll	*ifunc
+; LINUX-32-PIC-NEXT: 	calll	*ifunc
+; LINUX-32-PIC-NEXT: 	addl
+
 ; LINUX-32-PIC-NEXT: 	ret
 
 ; LINUX-64-PIC: icaller:
@@ -9206,8 +9215,8 @@ entry:
 
 ; DARWIN-32-STATIC: _icaller:
 ; DARWIN-32-STATIC: 	subl	$12, %esp
-; DARWIN-32-STATIC-NEXT: 	call	*_ifunc
-; DARWIN-32-STATIC-NEXT: 	call	*_ifunc
+; DARWIN-32-STATIC-NEXT: 	calll	*_ifunc
+; DARWIN-32-STATIC-NEXT: 	calll	*_ifunc
 ; DARWIN-32-STATIC-NEXT: 	addl	$12, %esp
 ; DARWIN-32-STATIC-NEXT: 	ret
 
@@ -9215,8 +9224,8 @@ entry:
 ; DARWIN-32-DYNAMIC: 	pushl	%esi
 ; DARWIN-32-DYNAMIC-NEXT: 	subl	$8, %esp
 ; DARWIN-32-DYNAMIC-NEXT: 	movl	L_ifunc$non_lazy_ptr, %esi
-; DARWIN-32-DYNAMIC-NEXT: 	call	*(%esi)
-; DARWIN-32-DYNAMIC-NEXT: 	call	*(%esi)
+; DARWIN-32-DYNAMIC-NEXT: 	calll	*(%esi)
+; DARWIN-32-DYNAMIC-NEXT: 	calll	*(%esi)
 ; DARWIN-32-DYNAMIC-NEXT: 	addl	$8, %esp
 ; DARWIN-32-DYNAMIC-NEXT: 	popl	%esi
 ; DARWIN-32-DYNAMIC-NEXT: 	ret
@@ -9224,12 +9233,12 @@ entry:
 ; DARWIN-32-PIC: _icaller:
 ; DARWIN-32-PIC: 	pushl	%esi
 ; DARWIN-32-PIC-NEXT: 	subl	$8, %esp
-; DARWIN-32-PIC-NEXT: 	call	L142$pb
+; DARWIN-32-PIC-NEXT: 	calll	L142$pb
 ; DARWIN-32-PIC-NEXT: L142$pb:
 ; DARWIN-32-PIC-NEXT: 	popl	%eax
 ; DARWIN-32-PIC-NEXT: 	movl	L_ifunc$non_lazy_ptr-L142$pb(%eax), %esi
-; DARWIN-32-PIC-NEXT: 	call	*(%esi)
-; DARWIN-32-PIC-NEXT: 	call	*(%esi)
+; DARWIN-32-PIC-NEXT: 	calll	*(%esi)
+; DARWIN-32-PIC-NEXT: 	calll	*(%esi)
 ; DARWIN-32-PIC-NEXT: 	addl	$8, %esp
 ; DARWIN-32-PIC-NEXT: 	popl	%esi
 ; DARWIN-32-PIC-NEXT: 	ret
@@ -9272,17 +9281,18 @@ entry:
 ; LINUX-64-STATIC: ret
 
 ; LINUX-32-STATIC: dicaller:
-; LINUX-32-STATIC: 	subl	$4, %esp
-; LINUX-32-STATIC-NEXT: 	call	*difunc
-; LINUX-32-STATIC-NEXT: 	call	*difunc
-; LINUX-32-STATIC-NEXT: 	addl	$4, %esp
+; LINUX-32-STATIC: 	subl
+; LINUX-32-STATIC-NEXT: 	calll	*difunc
+; LINUX-32-STATIC-NEXT: 	calll	*difunc
+; LINUX-32-STATIC-NEXT: 	addl
 ; LINUX-32-STATIC-NEXT: 	ret
 
 ; LINUX-32-PIC: dicaller:
-; LINUX-32-PIC: 	subl	$4, %esp
-; LINUX-32-PIC-NEXT: 	call	*difunc
-; LINUX-32-PIC-NEXT: 	call	*difunc
-; LINUX-32-PIC-NEXT: 	addl	$4, %esp
+; LINUX-32-PIC: 	subl
+; LINUX-32-PIC-NEXT: 	calll	*difunc
+; LINUX-32-PIC-NEXT: 	calll	*difunc
+; LINUX-32-PIC-NEXT: 	addl
+
 ; LINUX-32-PIC-NEXT: 	ret
 
 ; LINUX-64-PIC: dicaller:
@@ -9295,49 +9305,49 @@ entry:
 
 ; DARWIN-32-STATIC: _dicaller:
 ; DARWIN-32-STATIC: 	subl	$12, %esp
-; DARWIN-32-STATIC-NEXT: 	call	*_difunc
-; DARWIN-32-STATIC-NEXT: 	call	*_difunc
+; DARWIN-32-STATIC-NEXT: 	calll	*_difunc
+; DARWIN-32-STATIC-NEXT: 	calll	*_difunc
 ; DARWIN-32-STATIC-NEXT: 	addl	$12, %esp
 ; DARWIN-32-STATIC-NEXT: 	ret
 
 ; DARWIN-32-DYNAMIC: _dicaller:
 ; DARWIN-32-DYNAMIC: 	subl	$12, %esp
-; DARWIN-32-DYNAMIC-NEXT: 	call	*_difunc
-; DARWIN-32-DYNAMIC-NEXT: 	call	*_difunc
+; DARWIN-32-DYNAMIC-NEXT: 	calll	*_difunc
+; DARWIN-32-DYNAMIC-NEXT: 	calll	*_difunc
 ; DARWIN-32-DYNAMIC-NEXT: 	addl	$12, %esp
 ; DARWIN-32-DYNAMIC-NEXT: 	ret
 
 ; DARWIN-32-PIC: _dicaller:
 ; DARWIN-32-PIC: 	pushl	%esi
 ; DARWIN-32-PIC-NEXT: 	subl	$8, %esp
-; DARWIN-32-PIC-NEXT: 	call	L143$pb
+; DARWIN-32-PIC-NEXT: 	calll	L143$pb
 ; DARWIN-32-PIC-NEXT: L143$pb:
 ; DARWIN-32-PIC-NEXT: 	popl	%esi
-; DARWIN-32-PIC-NEXT: 	call	*_difunc-L143$pb(%esi)
-; DARWIN-32-PIC-NEXT: 	call	*_difunc-L143$pb(%esi)
+; DARWIN-32-PIC-NEXT: 	calll	*_difunc-L143$pb(%esi)
+; DARWIN-32-PIC-NEXT: 	calll	*_difunc-L143$pb(%esi)
 ; DARWIN-32-PIC-NEXT: 	addl	$8, %esp
 ; DARWIN-32-PIC-NEXT: 	popl	%esi
 ; DARWIN-32-PIC-NEXT: 	ret
 
 ; DARWIN-64-STATIC: _dicaller:
-; DARWIN-64-STATIC: 	subq	$8, %rsp
+; DARWIN-64-STATIC: 	pushq
 ; DARWIN-64-STATIC-NEXT: 	callq	*_difunc(%rip)
 ; DARWIN-64-STATIC-NEXT: 	callq	*_difunc(%rip)
-; DARWIN-64-STATIC-NEXT: 	addq	$8, %rsp
+; DARWIN-64-STATIC-NEXT: 	popq
 ; DARWIN-64-STATIC-NEXT: 	ret
 
 ; DARWIN-64-DYNAMIC: _dicaller:
-; DARWIN-64-DYNAMIC: 	subq	$8, %rsp
+; DARWIN-64-DYNAMIC: 	pushq
 ; DARWIN-64-DYNAMIC-NEXT: 	callq	*_difunc(%rip)
 ; DARWIN-64-DYNAMIC-NEXT: 	callq	*_difunc(%rip)
-; DARWIN-64-DYNAMIC-NEXT: 	addq	$8, %rsp
+; DARWIN-64-DYNAMIC-NEXT: 	popq
 ; DARWIN-64-DYNAMIC-NEXT: 	ret
 
 ; DARWIN-64-PIC: _dicaller:
-; DARWIN-64-PIC: 	subq	$8, %rsp
+; DARWIN-64-PIC: 	pushq
 ; DARWIN-64-PIC-NEXT: 	callq	*_difunc(%rip)
 ; DARWIN-64-PIC-NEXT: 	callq	*_difunc(%rip)
-; DARWIN-64-PIC-NEXT: 	addq	$8, %rsp
+; DARWIN-64-PIC-NEXT: 	popq
 ; DARWIN-64-PIC-NEXT: 	ret
 }
 
@@ -9354,71 +9364,72 @@ entry:
 ; LINUX-64-STATIC: ret
 
 ; LINUX-32-STATIC: licaller:
-; LINUX-32-STATIC: 	subl	$4, %esp
-; LINUX-32-STATIC-NEXT: 	call	*lifunc
-; LINUX-32-STATIC-NEXT: 	call	*lifunc
-; LINUX-32-STATIC-NEXT: 	addl	$4, %esp
+; LINUX-32-STATIC: 	subl
+; LINUX-32-STATIC-NEXT: 	calll	*lifunc
+; LINUX-32-STATIC-NEXT: 	calll	*lifunc
+; LINUX-32-STATIC-NEXT: 	addl
 ; LINUX-32-STATIC-NEXT: 	ret
 
 ; LINUX-32-PIC: licaller:
-; LINUX-32-PIC: 	subl	$4, %esp
-; LINUX-32-PIC-NEXT: 	call	*lifunc
-; LINUX-32-PIC-NEXT: 	call	*lifunc
-; LINUX-32-PIC-NEXT: 	addl	$4, %esp
+; LINUX-32-PIC: 	subl
+; LINUX-32-PIC-NEXT: 	calll	*lifunc
+; LINUX-32-PIC-NEXT: 	calll	*lifunc
+; LINUX-32-PIC-NEXT: 	addl
+
 ; LINUX-32-PIC-NEXT: 	ret
 
 ; LINUX-64-PIC: licaller:
-; LINUX-64-PIC: 	subq	$8, %rsp
+; LINUX-64-PIC: 	pushq
 ; LINUX-64-PIC-NEXT: 	callq	*lifunc(%rip)
 ; LINUX-64-PIC-NEXT: 	callq	*lifunc(%rip)
-; LINUX-64-PIC-NEXT: 	addq	$8, %rsp
+; LINUX-64-PIC-NEXT: 	popq
 ; LINUX-64-PIC-NEXT: 	ret
 
 ; DARWIN-32-STATIC: _licaller:
 ; DARWIN-32-STATIC: 	subl	$12, %esp
-; DARWIN-32-STATIC-NEXT: 	call	*_lifunc
-; DARWIN-32-STATIC-NEXT: 	call	*_lifunc
+; DARWIN-32-STATIC-NEXT: 	calll	*_lifunc
+; DARWIN-32-STATIC-NEXT: 	calll	*_lifunc
 ; DARWIN-32-STATIC-NEXT: 	addl	$12, %esp
 ; DARWIN-32-STATIC-NEXT: 	ret
 
 ; DARWIN-32-DYNAMIC: _licaller:
 ; DARWIN-32-DYNAMIC: 	subl	$12, %esp
-; DARWIN-32-DYNAMIC-NEXT: 	call	*_lifunc
-; DARWIN-32-DYNAMIC-NEXT: 	call	*_lifunc
+; DARWIN-32-DYNAMIC-NEXT: 	calll	*_lifunc
+; DARWIN-32-DYNAMIC-NEXT: 	calll	*_lifunc
 ; DARWIN-32-DYNAMIC-NEXT: 	addl	$12, %esp
 ; DARWIN-32-DYNAMIC-NEXT: 	ret
 
 ; DARWIN-32-PIC: _licaller:
 ; DARWIN-32-PIC: 	pushl	%esi
 ; DARWIN-32-PIC-NEXT: 	subl	$8, %esp
-; DARWIN-32-PIC-NEXT: 	call	L144$pb
+; DARWIN-32-PIC-NEXT: 	calll	L144$pb
 ; DARWIN-32-PIC-NEXT: L144$pb:
 ; DARWIN-32-PIC-NEXT: 	popl	%esi
-; DARWIN-32-PIC-NEXT: 	call	*_lifunc-L144$pb(%esi)
-; DARWIN-32-PIC-NEXT: 	call	*_lifunc-L144$pb(%esi)
+; DARWIN-32-PIC-NEXT: 	calll	*_lifunc-L144$pb(%esi)
+; DARWIN-32-PIC-NEXT: 	calll	*_lifunc-L144$pb(%esi)
 ; DARWIN-32-PIC-NEXT: 	addl	$8, %esp
 ; DARWIN-32-PIC-NEXT: 	popl	%esi
 ; DARWIN-32-PIC-NEXT: 	ret
 
 ; DARWIN-64-STATIC: _licaller:
-; DARWIN-64-STATIC: 	subq	$8, %rsp
+; DARWIN-64-STATIC: 	pushq
 ; DARWIN-64-STATIC-NEXT: 	callq	*_lifunc(%rip)
 ; DARWIN-64-STATIC-NEXT: 	callq	*_lifunc(%rip)
-; DARWIN-64-STATIC-NEXT: 	addq	$8, %rsp
+; DARWIN-64-STATIC-NEXT: 	popq
 ; DARWIN-64-STATIC-NEXT: 	ret
 
 ; DARWIN-64-DYNAMIC: _licaller:
-; DARWIN-64-DYNAMIC: 	subq	$8, %rsp
+; DARWIN-64-DYNAMIC: 	pushq
 ; DARWIN-64-DYNAMIC-NEXT: 	callq	*_lifunc(%rip)
 ; DARWIN-64-DYNAMIC-NEXT: 	callq	*_lifunc(%rip)
-; DARWIN-64-DYNAMIC-NEXT: 	addq	$8, %rsp
+; DARWIN-64-DYNAMIC-NEXT: 	popq
 ; DARWIN-64-DYNAMIC-NEXT: 	ret
 
 ; DARWIN-64-PIC: _licaller:
-; DARWIN-64-PIC: 	subq	$8, %rsp
+; DARWIN-64-PIC: 	pushq
 ; DARWIN-64-PIC-NEXT: 	callq	*_lifunc(%rip)
 ; DARWIN-64-PIC-NEXT: 	callq	*_lifunc(%rip)
-; DARWIN-64-PIC-NEXT: 	addq	$8, %rsp
+; DARWIN-64-PIC-NEXT: 	popq
 ; DARWIN-64-PIC-NEXT: 	ret
 }
 
@@ -9435,17 +9446,18 @@ entry:
 ; LINUX-64-STATIC: ret
 
 ; LINUX-32-STATIC: itailcaller:
-; LINUX-32-STATIC: 	subl	$4, %esp
-; LINUX-32-STATIC-NEXT: 	call	*ifunc
-; LINUX-32-STATIC-NEXT: 	call	*ifunc
-; LINUX-32-STATIC-NEXT: 	addl	$4, %esp
+; LINUX-32-STATIC: 	subl
+; LINUX-32-STATIC-NEXT: 	calll	*ifunc
+; LINUX-32-STATIC-NEXT: 	calll	*ifunc
+; LINUX-32-STATIC-NEXT: 	addl
 ; LINUX-32-STATIC-NEXT: 	ret
 
 ; LINUX-32-PIC: itailcaller:
-; LINUX-32-PIC: 	subl	$4, %esp
-; LINUX-32-PIC-NEXT: 	call	*ifunc
-; LINUX-32-PIC-NEXT: 	call	*ifunc
-; LINUX-32-PIC-NEXT: 	addl	$4, %esp
+; LINUX-32-PIC: 	subl
+; LINUX-32-PIC-NEXT: 	calll	*ifunc
+; LINUX-32-PIC-NEXT: 	calll	*ifunc
+; LINUX-32-PIC-NEXT: 	addl
+
 ; LINUX-32-PIC-NEXT: 	ret
 
 ; LINUX-64-PIC: itailcaller:
@@ -9458,8 +9470,8 @@ entry:
 
 ; DARWIN-32-STATIC: _itailcaller:
 ; DARWIN-32-STATIC: 	subl	$12, %esp
-; DARWIN-32-STATIC-NEXT: 	call	*_ifunc
-; DARWIN-32-STATIC-NEXT: 	call	*_ifunc
+; DARWIN-32-STATIC-NEXT: 	calll	*_ifunc
+; DARWIN-32-STATIC-NEXT: 	calll	*_ifunc
 ; DARWIN-32-STATIC-NEXT: 	addl	$12, %esp
 ; DARWIN-32-STATIC-NEXT: 	ret
 
@@ -9467,8 +9479,8 @@ entry:
 ; DARWIN-32-DYNAMIC: 	pushl	%esi
 ; DARWIN-32-DYNAMIC-NEXT: 	subl	$8, %esp
 ; DARWIN-32-DYNAMIC-NEXT: 	movl	L_ifunc$non_lazy_ptr, %esi
-; DARWIN-32-DYNAMIC-NEXT: 	call	*(%esi)
-; DARWIN-32-DYNAMIC-NEXT: 	call	*(%esi)
+; DARWIN-32-DYNAMIC-NEXT: 	calll	*(%esi)
+; DARWIN-32-DYNAMIC-NEXT: 	calll	*(%esi)
 ; DARWIN-32-DYNAMIC-NEXT: 	addl	$8, %esp
 ; DARWIN-32-DYNAMIC-NEXT: 	popl	%esi
 ; DARWIN-32-DYNAMIC-NEXT: 	ret
@@ -9476,12 +9488,12 @@ entry:
 ; DARWIN-32-PIC: _itailcaller:
 ; DARWIN-32-PIC: 	pushl	%esi
 ; DARWIN-32-PIC-NEXT: 	subl	$8, %esp
-; DARWIN-32-PIC-NEXT: 	call	L145$pb
+; DARWIN-32-PIC-NEXT: 	calll	L145$pb
 ; DARWIN-32-PIC-NEXT: L145$pb:
 ; DARWIN-32-PIC-NEXT: 	popl	%eax
 ; DARWIN-32-PIC-NEXT: 	movl	L_ifunc$non_lazy_ptr-L145$pb(%eax), %esi
-; DARWIN-32-PIC-NEXT: 	call	*(%esi)
-; DARWIN-32-PIC-NEXT: 	call	*(%esi)
+; DARWIN-32-PIC-NEXT: 	calll	*(%esi)
+; DARWIN-32-PIC-NEXT: 	calll	*(%esi)
 ; DARWIN-32-PIC-NEXT: 	addl	$8, %esp
 ; DARWIN-32-PIC-NEXT: 	popl	%esi
 ; DARWIN-32-PIC-NEXT: 	ret
@@ -9521,60 +9533,61 @@ entry:
 ; LINUX-64-STATIC: ret
 
 ; LINUX-32-STATIC: ditailcaller:
-; LINUX-32-STATIC: 	subl	$4, %esp
-; LINUX-32-STATIC-NEXT: 	call	*difunc
-; LINUX-32-STATIC-NEXT: 	addl	$4, %esp
+; LINUX-32-STATIC: 	subl
+; LINUX-32-STATIC-NEXT: 	calll	*difunc
+; LINUX-32-STATIC-NEXT: 	addl
 ; LINUX-32-STATIC-NEXT: 	ret
 
 ; LINUX-32-PIC: ditailcaller:
-; LINUX-32-PIC: 	subl	$4, %esp
-; LINUX-32-PIC-NEXT: 	call	*difunc
-; LINUX-32-PIC-NEXT: 	addl	$4, %esp
+; LINUX-32-PIC: 	subl
+; LINUX-32-PIC-NEXT: 	calll	*difunc
+; LINUX-32-PIC-NEXT: 	addl
+
 ; LINUX-32-PIC-NEXT: 	ret
 
 ; LINUX-64-PIC: ditailcaller:
-; LINUX-64-PIC: 	subq	$8, %rsp
+; LINUX-64-PIC: 	pushq
 ; LINUX-64-PIC-NEXT: 	movq	difunc@GOTPCREL(%rip), %rax
 ; LINUX-64-PIC-NEXT: 	callq	*(%rax)
-; LINUX-64-PIC-NEXT: 	addq	$8, %rsp
+; LINUX-64-PIC-NEXT: 	popq
 ; LINUX-64-PIC-NEXT: 	ret
 
 ; DARWIN-32-STATIC: _ditailcaller:
 ; DARWIN-32-STATIC: 	subl	$12, %esp
-; DARWIN-32-STATIC-NEXT: 	call	*_difunc
+; DARWIN-32-STATIC-NEXT: 	calll	*_difunc
 ; DARWIN-32-STATIC-NEXT: 	addl	$12, %esp
 ; DARWIN-32-STATIC-NEXT: 	ret
 
 ; DARWIN-32-DYNAMIC: _ditailcaller:
 ; DARWIN-32-DYNAMIC: 	subl	$12, %esp
-; DARWIN-32-DYNAMIC-NEXT: 	call	*_difunc
+; DARWIN-32-DYNAMIC-NEXT: 	calll	*_difunc
 ; DARWIN-32-DYNAMIC-NEXT: 	addl	$12, %esp
 ; DARWIN-32-DYNAMIC-NEXT: 	ret
 
 ; DARWIN-32-PIC: _ditailcaller:
 ; DARWIN-32-PIC: 	subl	$12, %esp
-; DARWIN-32-PIC-NEXT: 	call	L146$pb
+; DARWIN-32-PIC-NEXT: 	calll	L146$pb
 ; DARWIN-32-PIC-NEXT: L146$pb:
 ; DARWIN-32-PIC-NEXT: 	popl	%eax
-; DARWIN-32-PIC-NEXT: 	call	*_difunc-L146$pb(%eax)
+; DARWIN-32-PIC-NEXT: 	calll	*_difunc-L146$pb(%eax)
 ; DARWIN-32-PIC-NEXT: 	addl	$12, %esp
 ; DARWIN-32-PIC-NEXT: 	ret
 
 ; DARWIN-64-STATIC: _ditailcaller:
-; DARWIN-64-STATIC: 	subq	$8, %rsp
+; DARWIN-64-STATIC: 	pushq
 ; DARWIN-64-STATIC-NEXT: 	callq	*_difunc(%rip)
-; DARWIN-64-STATIC-NEXT: 	addq	$8, %rsp
+; DARWIN-64-STATIC-NEXT: 	popq
 ; DARWIN-64-STATIC-NEXT: 	ret
 
 ; DARWIN-64-DYNAMIC: _ditailcaller:
-; DARWIN-64-DYNAMIC: 	subq	$8, %rsp
+; DARWIN-64-DYNAMIC: 	pushq
 ; DARWIN-64-DYNAMIC-NEXT: 	callq	*_difunc(%rip)
-; DARWIN-64-DYNAMIC-NEXT: 	addq	$8, %rsp
+; DARWIN-64-DYNAMIC-NEXT: 	popq
 ; DARWIN-64-DYNAMIC-NEXT: 	ret
 
 ; DARWIN-64-PIC: _ditailcaller:
 ; DARWIN-64-PIC: 	callq	*_difunc(%rip)
-; DARWIN-64-PIC-NEXT: 	addq	$8, %rsp
+; DARWIN-64-PIC-NEXT: 	popq
 ; DARWIN-64-PIC-NEXT: 	ret
 }
 
@@ -9588,59 +9601,60 @@ entry:
 ; LINUX-64-STATIC: ret
 
 ; LINUX-32-STATIC: litailcaller:
-; LINUX-32-STATIC: 	subl	$4, %esp
-; LINUX-32-STATIC-NEXT: 	call	*lifunc
-; LINUX-32-STATIC-NEXT: 	addl	$4, %esp
+; LINUX-32-STATIC: 	subl
+; LINUX-32-STATIC-NEXT: 	calll	*lifunc
+; LINUX-32-STATIC-NEXT: 	addl
 ; LINUX-32-STATIC-NEXT: 	ret
 
 ; LINUX-32-PIC: litailcaller:
-; LINUX-32-PIC: 	subl	$4, %esp
-; LINUX-32-PIC-NEXT: 	call	*lifunc
-; LINUX-32-PIC-NEXT: 	addl	$4, %esp
+; LINUX-32-PIC: 	subl
+; LINUX-32-PIC-NEXT: 	calll	*lifunc
+; LINUX-32-PIC-NEXT: 	addl
+
 ; LINUX-32-PIC-NEXT: 	ret
 
 ; LINUX-64-PIC: litailcaller:
-; LINUX-64-PIC: 	subq	$8, %rsp
+; LINUX-64-PIC: 	pushq
 ; LINUX-64-PIC-NEXT: 	callq	*lifunc(%rip)
-; LINUX-64-PIC-NEXT: 	addq	$8, %rsp
+; LINUX-64-PIC-NEXT: 	popq
 ; LINUX-64-PIC-NEXT: 	ret
 
 ; DARWIN-32-STATIC: _litailcaller:
 ; DARWIN-32-STATIC: 	subl	$12, %esp
-; DARWIN-32-STATIC-NEXT: 	call	*_lifunc
+; DARWIN-32-STATIC-NEXT: 	calll	*_lifunc
 ; DARWIN-32-STATIC-NEXT: 	addl	$12, %esp
 ; DARWIN-32-STATIC-NEXT: 	ret
 
 ; DARWIN-32-DYNAMIC: _litailcaller:
 ; DARWIN-32-DYNAMIC: 	subl	$12, %esp
-; DARWIN-32-DYNAMIC-NEXT: 	call	*_lifunc
+; DARWIN-32-DYNAMIC-NEXT: 	calll	*_lifunc
 ; DARWIN-32-DYNAMIC-NEXT: 	addl	$12, %esp
 ; DARWIN-32-DYNAMIC-NEXT: 	ret
 
 ; DARWIN-32-PIC: _litailcaller:
 ; DARWIN-32-PIC: 	subl	$12, %esp
-; DARWIN-32-PIC-NEXT: 	call	L147$pb
+; DARWIN-32-PIC-NEXT: 	calll	L147$pb
 ; DARWIN-32-PIC-NEXT: L147$pb:
 ; DARWIN-32-PIC-NEXT: 	popl	%eax
-; DARWIN-32-PIC-NEXT: 	call	*_lifunc-L147$pb(%eax)
+; DARWIN-32-PIC-NEXT: 	calll	*_lifunc-L147$pb(%eax)
 ; DARWIN-32-PIC-NEXT: 	addl	$12, %esp
 ; DARWIN-32-PIC-NEXT: 	ret
 
 ; DARWIN-64-STATIC: _litailcaller:
-; DARWIN-64-STATIC: 	subq	$8, %rsp
+; DARWIN-64-STATIC: 	pushq
 ; DARWIN-64-STATIC-NEXT: 	callq	*_lifunc(%rip)
-; DARWIN-64-STATIC-NEXT: 	addq	$8, %rsp
+; DARWIN-64-STATIC-NEXT: 	popq
 ; DARWIN-64-STATIC-NEXT: 	ret
 
 ; DARWIN-64-DYNAMIC: _litailcaller:
-; DARWIN-64-DYNAMIC: 	subq	$8, %rsp
+; DARWIN-64-DYNAMIC: 	pushq
 ; DARWIN-64-DYNAMIC-NEXT: 	callq	*_lifunc(%rip)
-; DARWIN-64-DYNAMIC-NEXT: 	addq	$8, %rsp
+; DARWIN-64-DYNAMIC-NEXT: 	popq
 ; DARWIN-64-DYNAMIC-NEXT: 	ret
 
 ; DARWIN-64-PIC: _litailcaller:
-; DARWIN-64-PIC: 	subq	$8, %rsp
+; DARWIN-64-PIC: 	pushq
 ; DARWIN-64-PIC-NEXT: 	callq	*_lifunc(%rip)
-; DARWIN-64-PIC-NEXT: 	addq	$8, %rsp
+; DARWIN-64-PIC-NEXT: 	popq
 ; DARWIN-64-PIC-NEXT: 	ret
 }
diff --git a/test/CodeGen/X86/add-of-carry.ll b/test/CodeGen/X86/add-of-carry.ll
new file mode 100644
index 000000000000..f924ec8132ee
--- /dev/null
+++ b/test/CodeGen/X86/add-of-carry.ll
@@ -0,0 +1,34 @@
+; RUN: llc < %s -march=x86 | FileCheck %s
+; <rdar://problem/8449754>
+
+define i32 @test1(i32 %sum, i32 %x) nounwind readnone ssp {
+entry:
+; CHECK: test1:
+; CHECK:	sbbl	%ecx, %ecx
+; CHECK-NOT: addl
+; CHECK: subl	%ecx, %eax
+  %add4 = add i32 %x, %sum
+  %cmp = icmp ult i32 %add4, %x
+  %inc = zext i1 %cmp to i32
+  %z.0 = add i32 %add4, %inc
+  ret i32 %z.0
+}
+
+; Instcombine transforms test1 into test2:
+; CHECK: test2:
+; CHECK: movl
+; CHECK-NEXT: addl
+; CHECK-NEXT: sbbl
+; CHECK-NEXT: subl
+; CHECK-NEXT: ret
+define i32 @test2(i32 %sum, i32 %x) nounwind readnone ssp {
+entry:
+  %uadd = call { i32, i1 } @llvm.uadd.with.overflow.i32(i32 %x, i32 %sum)
+  %0 = extractvalue { i32, i1 } %uadd, 0
+  %cmp = extractvalue { i32, i1 } %uadd, 1
+  %inc = zext i1 %cmp to i32
+  %z.0 = add i32 %0, %inc
+  ret i32 %z.0
+}
+
+declare { i32, i1 } @llvm.uadd.with.overflow.i32(i32, i32) nounwind readnone
diff --git a/test/CodeGen/X86/add.ll b/test/CodeGen/X86/add.ll
index 3991a6849f3e..3ec5358affb3 100644
--- a/test/CodeGen/X86/add.ll
+++ b/test/CodeGen/X86/add.ll
@@ -92,3 +92,43 @@ define i64 @test6(i64 %A, i32 %B) nounwind {
 ; X64:	ret
 }
 
+define {i32, i1} @test7(i32 %v1, i32 %v2) nounwind {
+   %t = call {i32, i1} @llvm.uadd.with.overflow.i32(i32 %v1, i32 %v2)
+   ret {i32, i1} %t
+}
+
+; X64: test7:
+; X64: addl %esi, %eax
+; X64-NEXT: setb %dl
+; X64-NEXT: ret
+
+; PR5443
+define {i64, i1} @test8(i64 %left, i64 %right) nounwind {
+entry:
+    %extleft = zext i64 %left to i65
+    %extright = zext i64 %right to i65
+    %sum = add i65 %extleft, %extright
+    %res.0 = trunc i65 %sum to i64
+    %overflow = and i65 %sum, -18446744073709551616
+    %res.1 = icmp ne i65 %overflow, 0
+    %final0 = insertvalue {i64, i1} undef, i64 %res.0, 0
+    %final1 = insertvalue {i64, i1} %final0, i1 %res.1, 1
+    ret {i64, i1} %final1
+}
+
+; X64: test8:
+; X64: addq
+; X64-NEXT: sbbq
+; X64-NEXT: testb
+
+define i32 @test9(i32 %x, i32 %y) nounwind readnone {
+  %cmp = icmp eq i32 %x, 10
+  %sub = sext i1 %cmp to i32
+  %cond = add i32 %sub, %y
+  ret i32 %cond
+; X64: test9:
+; X64: cmpl $10
+; X64: sete
+; X64: subl
+; X64: ret
+}
diff --git a/test/CodeGen/X86/addr-label-difference.ll b/test/CodeGen/X86/addr-label-difference.ll
index be0908aa1a9d..49abd8a92e64 100644
--- a/test/CodeGen/X86/addr-label-difference.ll
+++ b/test/CodeGen/X86/addr-label-difference.ll
@@ -5,7 +5,7 @@ target triple = "i386-apple-darwin10.0"
 
 ; This array should go into the __TEXT,__const section, not into the
 ; __DATA,__const section, because the elements don't need relocations.
-@test.array = internal constant [3 x i32] [i32 sub (i32 ptrtoint (i8* blockaddress(@test, %foo) to i32), i32 ptrtoint (i8* blockaddress(@test, %foo) to i32)), i32 sub (i32 ptrtoint (i8* blockaddress(@test, %bar) to i32), i32 ptrtoint (i8* blockaddress(@test, %foo) to i32)), i32 sub (i32 ptrtoint (i8* blockaddress(@test, %hack) to i32), i32 ptrtoint (i8* blockaddress(@test, %foo) to i32))] ; <[3 x i32]*> [#uses=1]
+@test.array = internal unnamed_addr constant [3 x i32] [i32 sub (i32 ptrtoint (i8* blockaddress(@test, %foo) to i32), i32 ptrtoint (i8* blockaddress(@test, %foo) to i32)), i32 sub (i32 ptrtoint (i8* blockaddress(@test, %bar) to i32), i32 ptrtoint (i8* blockaddress(@test, %foo) to i32)), i32 sub (i32 ptrtoint (i8* blockaddress(@test, %hack) to i32), i32 ptrtoint (i8* blockaddress(@test, %foo) to i32))] ; <[3 x i32]*> [#uses=1]
 
 define void @test(i32 %i) nounwind ssp {
 entry:
diff --git a/test/CodeGen/X86/alldiv-divdi3.ll b/test/CodeGen/X86/alldiv-divdi3.ll
new file mode 100644
index 000000000000..86aa1fde1957
--- /dev/null
+++ b/test/CodeGen/X86/alldiv-divdi3.ll
@@ -0,0 +1,17 @@
+; Test that, for a 64 bit signed div, a libcall to alldiv is made on Windows
+; unless we have libgcc.
+
+; RUN: llc < %s -mtriple i386-pc-win32 | FileCheck %s
+; RUN: llc < %s -mtriple i386-pc-cygwin | FileCheck %s -check-prefix USEDIVDI
+; RUN: llc < %s -mtriple i386-pc-mingw32 | FileCheck %s -check-prefix USEDIVDI
+
+define i32 @main(i32 %argc, i8** nocapture %argv) nounwind readonly {
+entry:
+  %conv4 = sext i32 %argc to i64
+  %div = sdiv i64 84, %conv4
+  %conv7 = trunc i64 %div to i32
+  ret i32 %conv7
+}
+
+; CHECK: alldiv
+; USEDIVDI: divdi3
diff --git a/test/CodeGen/X86/andimm8.ll b/test/CodeGen/X86/andimm8.ll
new file mode 100644
index 000000000000..640237d0b504
--- /dev/null
+++ b/test/CodeGen/X86/andimm8.ll
@@ -0,0 +1,19 @@
+; RUN: llc < %s -march=x86-64 -mtriple=x86_64-pc-linux-gnu -show-mc-encoding | FileCheck %s
+
+; PR8365
+; CHECK: andl	$-64, %edi              # encoding: [0x83,0xe7,0xc0]
+
+define i64 @bra(i32 %zed) nounwind {
+ %t1 = zext i32 %zed to i64
+ %t2 = and i64  %t1, 4294967232
+ ret i64 %t2
+}
+
+; CHECK:  orq     $2, %rdi                # encoding: [0x48,0x83,0xcf,0x02]
+
+define void @foo(i64 %zed, i64* %x) nounwind {
+  %t1 = and i64 %zed, -4
+  %t2 = or i64 %t1, 2
+  store i64 %t2, i64* %x, align 8
+  ret void
+}
diff --git a/test/CodeGen/X86/apm.ll b/test/CodeGen/X86/apm.ll
new file mode 100644
index 000000000000..d0c64f243386
--- /dev/null
+++ b/test/CodeGen/X86/apm.ll
@@ -0,0 +1,26 @@
+; RUN: llc < %s -o - -march=x86-64 | FileCheck %s
+; PR8573
+
+; CHECK: foo:
+; CHECK: leaq    (%rdi), %rax
+; CHECK-NEXT: movl    %esi, %ecx
+; CHECK-NEXT: monitor
+define void @foo(i8* %P, i32 %E, i32 %H) nounwind {
+entry:
+  tail call void @llvm.x86.sse3.monitor(i8* %P, i32 %E, i32 %H)
+  ret void
+}
+
+declare void @llvm.x86.sse3.monitor(i8*, i32, i32) nounwind
+
+; CHECK: bar:
+; CHECK: movl    %edi, %ecx
+; CHECK-NEXT: movl    %esi, %eax
+; CHECK-NEXT: mwait
+define void @bar(i32 %E, i32 %H) nounwind {
+entry:
+  tail call void @llvm.x86.sse3.mwait(i32 %E, i32 %H)
+  ret void
+}
+
+declare void @llvm.x86.sse3.mwait(i32, i32) nounwind
diff --git a/test/CodeGen/X86/atomic_op.ll b/test/CodeGen/X86/atomic_op.ll
index 3ef1887083d0..f3ade93c8a30 100644
--- a/test/CodeGen/X86/atomic_op.ll
+++ b/test/CodeGen/X86/atomic_op.ll
@@ -1,16 +1,8 @@
-; RUN: llc < %s -march=x86 -o %t1
-; RUN: grep "lock" %t1 | count 17
-; RUN: grep "xaddl" %t1 | count 4 
-; RUN: grep "cmpxchgl"  %t1 | count 13 
-; RUN: grep "xchgl" %t1 | count 14
-; RUN: grep "cmova" %t1 | count 2
-; RUN: grep "cmovb" %t1 | count 2
-; RUN: grep "cmovg" %t1 | count 2
-; RUN: grep "cmovl" %t1 | count 2
+; RUN: llc < %s -march=x86 | FileCheck %s
 
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
 
-define void @main(i32 %argc, i8** %argv) {
+define void @func(i32 %argc, i8** %argv) nounwind {
 entry:
 	%argc.addr = alloca i32		; <i32*> [#uses=1]
 	%argv.addr = alloca i8**		; <i8***> [#uses=1]
@@ -29,48 +21,105 @@ entry:
 	store i32 3855, i32* %ort
 	store i32 3855, i32* %xort
 	store i32 4, i32* %temp
-	%tmp = load i32* %temp		; <i32> [#uses=1]
+	%tmp = load i32* %temp
+        ; CHECK: lock
+        ; CHECK: xaddl
 	call i32 @llvm.atomic.load.add.i32.p0i32( i32* %val1, i32 %tmp )		; <i32>:0 [#uses=1]
 	store i32 %0, i32* %old
+        ; CHECK: lock
+        ; CHECK: xaddl
 	call i32 @llvm.atomic.load.sub.i32.p0i32( i32* %val2, i32 30 )		; <i32>:1 [#uses=1]
 	store i32 %1, i32* %old
+        ; CHECK: lock
+        ; CHECK: xaddl
 	call i32 @llvm.atomic.load.add.i32.p0i32( i32* %val2, i32 1 )		; <i32>:2 [#uses=1]
 	store i32 %2, i32* %old
+        ; CHECK: lock
+        ; CHECK: xaddl
 	call i32 @llvm.atomic.load.sub.i32.p0i32( i32* %val2, i32 1 )		; <i32>:3 [#uses=1]
 	store i32 %3, i32* %old
+        ; CHECK: andl
+        ; CHECK: lock
+        ; CHECK: cmpxchgl
 	call i32 @llvm.atomic.load.and.i32.p0i32( i32* %andt, i32 4080 )		; <i32>:4 [#uses=1]
 	store i32 %4, i32* %old
+        ; CHECK: orl
+        ; CHECK: lock
+        ; CHECK: cmpxchgl
 	call i32 @llvm.atomic.load.or.i32.p0i32( i32* %ort, i32 4080 )		; <i32>:5 [#uses=1]
 	store i32 %5, i32* %old
+        ; CHECK: xorl
+        ; CHECK: lock
+        ; CHECK: cmpxchgl
 	call i32 @llvm.atomic.load.xor.i32.p0i32( i32* %xort, i32 4080 )		; <i32>:6 [#uses=1]
 	store i32 %6, i32* %old
+        ; CHECK: cmov
+        ; CHECK: lock
+        ; CHECK: cmpxchgl
 	call i32 @llvm.atomic.load.min.i32.p0i32( i32* %val2, i32 16 )		; <i32>:7 [#uses=1]
 	store i32 %7, i32* %old
 	%neg = sub i32 0, 1		; <i32> [#uses=1]
+        ; CHECK: cmov
+        ; CHECK: lock
+        ; CHECK: cmpxchgl
 	call i32 @llvm.atomic.load.min.i32.p0i32( i32* %val2, i32 %neg )		; <i32>:8 [#uses=1]
 	store i32 %8, i32* %old
+        ; CHECK: cmov
+        ; CHECK: lock
+        ; CHECK: cmpxchgl
 	call i32 @llvm.atomic.load.max.i32.p0i32( i32* %val2, i32 1 )		; <i32>:9 [#uses=1]
 	store i32 %9, i32* %old
+        ; CHECK: cmov
+        ; CHECK: lock
+        ; CHECK: cmpxchgl
 	call i32 @llvm.atomic.load.max.i32.p0i32( i32* %val2, i32 0 )		; <i32>:10 [#uses=1]
 	store i32 %10, i32* %old
+        ; CHECK: cmov
+        ; CHECK: lock
+        ; CHECK: cmpxchgl
 	call i32 @llvm.atomic.load.umax.i32.p0i32( i32* %val2, i32 65535 )		; <i32>:11 [#uses=1]
 	store i32 %11, i32* %old
+        ; CHECK: cmov
+        ; CHECK: lock
+        ; CHECK: cmpxchgl
 	call i32 @llvm.atomic.load.umax.i32.p0i32( i32* %val2, i32 10 )		; <i32>:12 [#uses=1]
 	store i32 %12, i32* %old
+        ; CHECK: cmov
+        ; CHECK: lock
+        ; CHECK: cmpxchgl
 	call i32 @llvm.atomic.load.umin.i32.p0i32( i32* %val2, i32 1 )		; <i32>:13 [#uses=1]
 	store i32 %13, i32* %old
+        ; CHECK: cmov
+        ; CHECK: lock
+        ; CHECK: cmpxchgl
 	call i32 @llvm.atomic.load.umin.i32.p0i32( i32* %val2, i32 10 )		; <i32>:14 [#uses=1]
 	store i32 %14, i32* %old
+        ; CHECK: xchgl   %{{.*}}, {{.*}}(%esp)
 	call i32 @llvm.atomic.swap.i32.p0i32( i32* %val2, i32 1976 )		; <i32>:15 [#uses=1]
 	store i32 %15, i32* %old
 	%neg1 = sub i32 0, 10		; <i32> [#uses=1]
+        ; CHECK: lock
+        ; CHECK: cmpxchgl
 	call i32 @llvm.atomic.cmp.swap.i32.p0i32( i32* %val2, i32 %neg1, i32 1 )		; <i32>:16 [#uses=1]
 	store i32 %16, i32* %old
+        ; CHECK: lock
+        ; CHECK: cmpxchgl
 	call i32 @llvm.atomic.cmp.swap.i32.p0i32( i32* %val2, i32 1976, i32 1 )		; <i32>:17 [#uses=1]
 	store i32 %17, i32* %old
 	ret void
 }
 
+define void @test2(i32 addrspace(256)* nocapture %P) nounwind {
+entry:
+; CHECK: lock
+; CHECK:	cmpxchgl	%{{.*}}, %gs:(%{{.*}})
+
+  %0 = tail call i32 @llvm.atomic.cmp.swap.i32.p256i32(i32 addrspace(256)* %P, i32 0, i32 1)
+  ret void
+}
+
+declare i32 @llvm.atomic.cmp.swap.i32.p256i32(i32 addrspace(256)* nocapture, i32, i32) nounwind
+
 declare i32 @llvm.atomic.load.add.i32.p0i32(i32*, i32) nounwind 
 
 declare i32 @llvm.atomic.load.sub.i32.p0i32(i32*, i32) nounwind 
diff --git a/test/CodeGen/X86/avx-128.ll b/test/CodeGen/X86/avx-128.ll
index a72160be719a..2bd3b5dfedd6 100644
--- a/test/CodeGen/X86/avx-128.ll
+++ b/test/CodeGen/X86/avx-128.ll
@@ -4,7 +4,7 @@
 
 define void @zero() nounwind ssp {
 entry:
-  ; CHECK: vpxor
+  ; CHECK: vxorps
   ; CHECK: vmovaps
   store <4 x float> zeroinitializer, <4 x float>* @z, align 16
   ret void
diff --git a/test/CodeGen/X86/avx-intrinsics-x86.ll b/test/CodeGen/X86/avx-intrinsics-x86.ll
index 9de90237d146..6c32396a4177 100644
--- a/test/CodeGen/X86/avx-intrinsics-x86.ll
+++ b/test/CodeGen/X86/avx-intrinsics-x86.ll
@@ -114,8 +114,8 @@ declare i32 @llvm.x86.sse2.comile.sd(<2 x double>, <2 x double>) nounwind readno
 
 define i32 @test_x86_sse2_comilt_sd(<2 x double> %a0, <2 x double> %a1) {
   ; CHECK: vcomisd
-  ; CHECK: setb
-  ; CHECK: movzbl
+  ; CHECK: sbbl    %eax, %eax
+  ; CHECK: andl    $1, %eax
   %res = call i32 @llvm.x86.sse2.comilt.sd(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1]
   ret i32 %res
 }
@@ -230,7 +230,7 @@ declare <4 x i32> @llvm.x86.sse2.cvttps2dq(<4 x float>) nounwind readnone
 
 
 define i32 @test_x86_sse2_cvttsd2si(<2 x double> %a0) {
-  ; CHECK: vcvttss2si
+  ; CHECK: vcvttsd2si
   %res = call i32 @llvm.x86.sse2.cvttsd2si(<2 x double> %a0) ; <i32> [#uses=1]
   ret i32 %res
 }
@@ -825,8 +825,7 @@ declare i32 @llvm.x86.sse2.ucomile.sd(<2 x double>, <2 x double>) nounwind readn
 
 define i32 @test_x86_sse2_ucomilt_sd(<2 x double> %a0, <2 x double> %a1) {
   ; CHECK: vucomisd
-  ; CHECK: setb
-  ; CHECK: movzbl
+  ; CHECK: sbbl
   %res = call i32 @llvm.x86.sse2.ucomilt.sd(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1]
   ret i32 %res
 }
@@ -1183,8 +1182,7 @@ declare <2 x i64> @llvm.x86.sse41.pmuldq(<4 x i32>, <4 x i32>) nounwind readnone
 
 define i32 @test_x86_sse41_ptestc(<4 x float> %a0, <4 x float> %a1) {
   ; CHECK: vptest 
-  ; CHECK: setb
-  ; CHECK: movzbl
+  ; CHECK: sbbl
   %res = call i32 @llvm.x86.sse41.ptestc(<4 x float> %a0, <4 x float> %a1) ; <i32> [#uses=1]
   ret i32 %res
 }
@@ -1455,8 +1453,7 @@ declare i32 @llvm.x86.sse.comile.ss(<4 x float>, <4 x float>) nounwind readnone
 
 define i32 @test_x86_sse_comilt_ss(<4 x float> %a0, <4 x float> %a1) {
   ; CHECK: vcomiss
-  ; CHECK: setb
-  ; CHECK: movzbl
+  ; CHECK: sbb
   %res = call i32 @llvm.x86.sse.comilt.ss(<4 x float> %a0, <4 x float> %a1) ; <i32> [#uses=1]
   ret i32 %res
 }
@@ -1697,8 +1694,7 @@ declare i32 @llvm.x86.sse.ucomile.ss(<4 x float>, <4 x float>) nounwind readnone
 
 define i32 @test_x86_sse_ucomilt_ss(<4 x float> %a0, <4 x float> %a1) {
   ; CHECK: vucomiss
-  ; CHECK: setb
-  ; CHECK: movzbl
+  ; CHECK: sbbl
   %res = call i32 @llvm.x86.sse.ucomilt.ss(<4 x float> %a0, <4 x float> %a1) ; <i32> [#uses=1]
   ret i32 %res
 }
@@ -2173,8 +2169,7 @@ declare void @llvm.x86.avx.movnt.ps.256(i8*, <8 x float>) nounwind
 
 define i32 @test_x86_avx_ptestc_256(<4 x i64> %a0, <4 x i64> %a1) {
   ; CHECK: vptest
-  ; CHECK: setb
-  ; CHECK: movzbl
+  ; CHECK: sbbl
   %res = call i32 @llvm.x86.avx.ptestc.256(<4 x i64> %a0, <4 x i64> %a1) ; <i32> [#uses=1]
   ret i32 %res
 }
@@ -2451,8 +2446,7 @@ declare <8 x float> @llvm.x86.avx.vpermilvar.ps.256(<8 x float>, <8 x i32>) noun
 
 define i32 @test_x86_avx_vtestc_pd(<2 x double> %a0, <2 x double> %a1) {
   ; CHECK: vtestpd
-  ; CHECK: setb
-  ; CHECK: movzbl
+  ; CHECK: sbbl
   %res = call i32 @llvm.x86.avx.vtestc.pd(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1]
   ret i32 %res
 }
@@ -2461,8 +2455,7 @@ declare i32 @llvm.x86.avx.vtestc.pd(<2 x double>, <2 x double>) nounwind readnon
 
 define i32 @test_x86_avx_vtestc_pd_256(<4 x double> %a0, <4 x double> %a1) {
   ; CHECK: vtestpd
-  ; CHECK: setb
-  ; CHECK: movzbl
+  ; CHECK: sbbl
   %res = call i32 @llvm.x86.avx.vtestc.pd.256(<4 x double> %a0, <4 x double> %a1) ; <i32> [#uses=1]
   ret i32 %res
 }
@@ -2471,8 +2464,7 @@ declare i32 @llvm.x86.avx.vtestc.pd.256(<4 x double>, <4 x double>) nounwind rea
 
 define i32 @test_x86_avx_vtestc_ps(<4 x float> %a0, <4 x float> %a1) {
   ; CHECK: vtestps
-  ; CHECK: setb
-  ; CHECK: movzbl
+  ; CHECK: sbbl
   %res = call i32 @llvm.x86.avx.vtestc.ps(<4 x float> %a0, <4 x float> %a1) ; <i32> [#uses=1]
   ret i32 %res
 }
@@ -2481,8 +2473,7 @@ declare i32 @llvm.x86.avx.vtestc.ps(<4 x float>, <4 x float>) nounwind readnone
 
 define i32 @test_x86_avx_vtestc_ps_256(<8 x float> %a0, <8 x float> %a1) {
   ; CHECK: vtestps
-  ; CHECK: setb
-  ; CHECK: movzbl
+  ; CHECK: sbbl
   %res = call i32 @llvm.x86.avx.vtestc.ps.256(<8 x float> %a0, <8 x float> %a1) ; <i32> [#uses=1]
   ret i32 %res
 }
diff --git a/test/CodeGen/X86/avx-intrinsics-x86_64.ll b/test/CodeGen/X86/avx-intrinsics-x86_64.ll
index b1867105ce85..5a466fc3250f 100644
--- a/test/CodeGen/X86/avx-intrinsics-x86_64.ll
+++ b/test/CodeGen/X86/avx-intrinsics-x86_64.ll
@@ -17,7 +17,7 @@ declare <2 x double> @llvm.x86.sse2.cvtsi642sd(<2 x double>, i64) nounwind readn
 
 
 define i64 @test_x86_sse2_cvttsd2si64(<2 x double> %a0) {
-  ; CHECK: vcvttss2si
+  ; CHECK: vcvttsd2si
   %res = call i64 @llvm.x86.sse2.cvttsd2si64(<2 x double> %a0) ; <i64> [#uses=1]
   ret i64 %res
 }
diff --git a/test/CodeGen/X86/bc-extract.ll b/test/CodeGen/X86/bc-extract.ll
new file mode 100644
index 000000000000..ac972a8e2e5b
--- /dev/null
+++ b/test/CodeGen/X86/bc-extract.ll
@@ -0,0 +1,27 @@
+; RUN: llc < %s -march=x86-64 -mattr=+sse42 |  FileCheck %s
+
+
+define float @extractFloat1() nounwind {
+entry:
+  ; CHECK: 1065353216
+  %tmp0 = bitcast <1 x double> <double 0x000000003F800000> to <2 x float>
+  %tmp1 = extractelement <2 x float> %tmp0, i32 0 
+  ret float %tmp1
+}
+
+define float @extractFloat2() nounwind {
+entry:
+  ; CHECK: pxor	%xmm0, %xmm0
+  %tmp4 = bitcast <1 x double> <double 0x000000003F800000> to <2 x float>
+  %tmp5 = extractelement <2 x float> %tmp4, i32 1
+  ret float %tmp5
+}
+
+define i32 @extractInt2() nounwind {
+entry:
+  ; CHECK: xorl	%eax, %eax
+  %tmp4 = bitcast <1 x i64> <i64 256> to <2 x i32>
+  %tmp5 = extractelement <2 x i32> %tmp4, i32 1
+  ret i32 %tmp5
+}
+
diff --git a/test/CodeGen/X86/bit-test-shift.ll b/test/CodeGen/X86/bit-test-shift.ll
new file mode 100644
index 000000000000..7497613f2565
--- /dev/null
+++ b/test/CodeGen/X86/bit-test-shift.ll
@@ -0,0 +1,13 @@
+; RUN: llc < %s -march=x86 | FileCheck %s
+; <rdar://problem/8285015>
+
+define i32 @x(i32 %t) nounwind readnone ssp {
+entry:
+; CHECK: shll	$23, %eax
+; CHECK: sarl	$31, %eax
+; CHECK: andl	$-26, %eax
+  %and = and i32 %t, 256
+  %tobool = icmp eq i32 %and, 0
+  %retval.0 = select i1 %tobool, i32 0, i32 -26
+  ret i32 %retval.0
+}
diff --git a/test/CodeGen/X86/bswap-inline-asm.ll b/test/CodeGen/X86/bswap-inline-asm.ll
index 2b7019371a17..3bb9124633d6 100644
--- a/test/CodeGen/X86/bswap-inline-asm.ll
+++ b/test/CodeGen/X86/bswap-inline-asm.ll
@@ -1,5 +1,5 @@
-; RUN: llc < %s -march=x86-64 > %t
-; RUN: not grep APP %t
+; RUN: llc < %s -mtriple=x86_64-apple-darwin > %t
+; RUN: not grep InlineAsm %t
 ; RUN: FileCheck %s < %t
 
 ; CHECK: foo:
@@ -65,6 +65,13 @@ define i32 @t32(i32 %x) nounwind {
   ret i32 %asmtmp
 }
 
+; CHECK: u32:
+; CHECK: bswapl
+define i32 @u32(i32 %x) nounwind {
+  %asmtmp = tail call i32 asm "rorw $$8, ${0:w};rorl $$16, $0;rorw $$8, ${0:w}", "=r,0,~{cc},~{dirflag},~{flags},~{fpsr}"(i32 %x) nounwind
+  ret i32 %asmtmp
+}
+
 ; CHECK: s64:
 ; CHECK: bswapq
 define i64 @s64(i64 %x) nounwind {
diff --git a/test/CodeGen/X86/byval.ll b/test/CodeGen/X86/byval.ll
index af36e1bb8cb4..ac0bc094e56e 100644
--- a/test/CodeGen/X86/byval.ll
+++ b/test/CodeGen/X86/byval.ll
@@ -1,7 +1,10 @@
-; RUN: llc < %s -march=x86-64 | grep {movq	8(%rsp), %rax}
-; RUN: llc < %s -march=x86 > %t
-; RUN: grep {movl	8(%esp), %edx} %t
-; RUN: grep {movl	4(%esp), %eax} %t
+; RUN: llc < %s -march=x86-64 | FileCheck -check-prefix=X86-64 %s
+; RUN: llc < %s -march=x86 | FileCheck -check-prefix=X86 %s
+
+; X86: movl	4(%esp), %eax
+; X86: movl	8(%esp), %edx
+
+; X86-64: movq	8(%rsp), %rax
 
 %struct.s = type { i64, i64, i64 }
 
diff --git a/test/CodeGen/X86/cmp-test.ll b/test/CodeGen/X86/cmp-test.ll
deleted file mode 100644
index 898c09b82f5e..000000000000
--- a/test/CodeGen/X86/cmp-test.ll
+++ /dev/null
@@ -1,27 +0,0 @@
-; RUN: llc < %s -march=x86 | grep cmp | count 1
-; RUN: llc < %s -march=x86 | grep test | count 1
-
-define i32 @f1(i32 %X, i32* %y) {
-	%tmp = load i32* %y		; <i32> [#uses=1]
-	%tmp.upgrd.1 = icmp eq i32 %tmp, 0		; <i1> [#uses=1]
-	br i1 %tmp.upgrd.1, label %ReturnBlock, label %cond_true
-
-cond_true:		; preds = %0
-	ret i32 1
-
-ReturnBlock:		; preds = %0
-	ret i32 0
-}
-
-define i32 @f2(i32 %X, i32* %y) {
-	%tmp = load i32* %y		; <i32> [#uses=1]
-	%tmp1 = shl i32 %tmp, 3		; <i32> [#uses=1]
-	%tmp1.upgrd.2 = icmp eq i32 %tmp1, 0		; <i1> [#uses=1]
-	br i1 %tmp1.upgrd.2, label %ReturnBlock, label %cond_true
-
-cond_true:		; preds = %0
-	ret i32 1
-
-ReturnBlock:		; preds = %0
-	ret i32 0
-}
diff --git a/test/CodeGen/X86/cmp.ll b/test/CodeGen/X86/cmp.ll
new file mode 100644
index 000000000000..ef5e353e9f9f
--- /dev/null
+++ b/test/CodeGen/X86/cmp.ll
@@ -0,0 +1,92 @@
+; RUN: llc < %s -mtriple=x86_64-apple-darwin10 -show-mc-encoding | FileCheck %s
+
+define i32 @test1(i32 %X, i32* %y) nounwind {
+	%tmp = load i32* %y		; <i32> [#uses=1]
+	%tmp.upgrd.1 = icmp eq i32 %tmp, 0		; <i1> [#uses=1]
+	br i1 %tmp.upgrd.1, label %ReturnBlock, label %cond_true
+
+cond_true:		; preds = %0
+	ret i32 1
+
+ReturnBlock:		; preds = %0
+	ret i32 0
+; CHECK: test1:
+; CHECK: cmpl	$0, (%rsi)
+}
+
+define i32 @test2(i32 %X, i32* %y) nounwind {
+	%tmp = load i32* %y		; <i32> [#uses=1]
+	%tmp1 = shl i32 %tmp, 3		; <i32> [#uses=1]
+	%tmp1.upgrd.2 = icmp eq i32 %tmp1, 0		; <i1> [#uses=1]
+	br i1 %tmp1.upgrd.2, label %ReturnBlock, label %cond_true
+
+cond_true:		; preds = %0
+	ret i32 1
+
+ReturnBlock:		; preds = %0
+	ret i32 0
+; CHECK: test2:
+; CHECK: movl	(%rsi), %eax
+; CHECK: shll	$3, %eax
+; CHECK: testl	%eax, %eax
+}
+
+define i64 @test3(i64 %x) nounwind {
+  %t = icmp eq i64 %x, 0
+  %r = zext i1 %t to i64
+  ret i64 %r
+; CHECK: test3:
+; CHECK: 	testq	%rdi, %rdi
+; CHECK: 	sete	%al
+; CHECK: 	movzbl	%al, %eax
+; CHECK: 	ret
+}
+
+define i64 @test4(i64 %x) nounwind {
+  %t = icmp slt i64 %x, 1
+  %r = zext i1 %t to i64
+  ret i64 %r
+; CHECK: test4:
+; CHECK: 	testq	%rdi, %rdi
+; CHECK: 	setle	%al
+; CHECK: 	movzbl	%al, %eax
+; CHECK: 	ret
+}
+
+
+define i32 @test5(double %A) nounwind  {
+ entry:
+ %tmp2 = fcmp ogt double %A, 1.500000e+02; <i1> [#uses=1]
+ %tmp5 = fcmp ult double %A, 7.500000e+01; <i1> [#uses=1]
+ %bothcond = or i1 %tmp2, %tmp5; <i1> [#uses=1]
+ br i1 %bothcond, label %bb8, label %bb12
+
+ bb8:; preds = %entry
+ %tmp9 = tail call i32 (...)* @foo( ) nounwind ; <i32> [#uses=1]
+ ret i32 %tmp9
+
+ bb12:; preds = %entry
+ ret i32 32
+; CHECK: test5:
+; CHECK: ucomisd	LCPI4_0(%rip), %xmm0
+; CHECK: ucomisd	LCPI4_1(%rip), %xmm0
+}
+
+declare i32 @foo(...)
+
+define i32 @test6() nounwind align 2 {
+  %A = alloca {i64, i64}, align 8
+  %B = getelementptr inbounds {i64, i64}* %A, i64 0, i32 1
+  %C = load i64* %B
+  %D = icmp eq i64 %C, 0
+  br i1 %D, label %T, label %F
+T:
+  ret i32 1
+  
+F:
+  ret i32 0
+; CHECK: test6:
+; CHECK: cmpq	$0, -8(%rsp)
+; CHECK: encoding: [0x48,0x83,0x7c,0x24,0xf8,0x00]
+}
+
diff --git a/test/CodeGen/X86/cmp0.ll b/test/CodeGen/X86/cmp0.ll
deleted file mode 100644
index 4878448800cc..000000000000
--- a/test/CodeGen/X86/cmp0.ll
+++ /dev/null
@@ -1,24 +0,0 @@
-; RUN: llc < %s -march=x86-64 | FileCheck %s
-
-define i64 @test0(i64 %x) nounwind {
-  %t = icmp eq i64 %x, 0
-  %r = zext i1 %t to i64
-  ret i64 %r
-; CHECK: test0:
-; CHECK: 	testq	%rdi, %rdi
-; CHECK: 	sete	%al
-; CHECK: 	movzbl	%al, %eax
-; CHECK: 	ret
-}
-
-define i64 @test1(i64 %x) nounwind {
-  %t = icmp slt i64 %x, 1
-  %r = zext i1 %t to i64
-  ret i64 %r
-; CHECK: test1:
-; CHECK: 	testq	%rdi, %rdi
-; CHECK: 	setle	%al
-; CHECK: 	movzbl	%al, %eax
-; CHECK: 	ret
-}
-
diff --git a/test/CodeGen/X86/cmp2.ll b/test/CodeGen/X86/cmp2.ll
deleted file mode 100644
index 9a8e00c8bca0..000000000000
--- a/test/CodeGen/X86/cmp2.ll
+++ /dev/null
@@ -1,18 +0,0 @@
-; RUN: llc < %s -march=x86 -mattr=+sse2 | grep ucomisd | grep CPI | count 2
-
-define i32 @test(double %A) nounwind  {
- entry:
- %tmp2 = fcmp ogt double %A, 1.500000e+02; <i1> [#uses=1]
- %tmp5 = fcmp ult double %A, 7.500000e+01; <i1> [#uses=1]
- %bothcond = or i1 %tmp2, %tmp5; <i1> [#uses=1]
- br i1 %bothcond, label %bb8, label %bb12
-
- bb8:; preds = %entry
- %tmp9 = tail call i32 (...)* @foo( ) nounwind ; <i32> [#uses=1]
- ret i32 %tmp9
-
- bb12:; preds = %entry
- ret i32 32
-}
-
-declare i32 @foo(...)
diff --git a/test/CodeGen/X86/commute-two-addr.ll b/test/CodeGen/X86/commute-two-addr.ll
index 56ea26b658d8..89b436e75c9e 100644
--- a/test/CodeGen/X86/commute-two-addr.ll
+++ b/test/CodeGen/X86/commute-two-addr.ll
@@ -2,24 +2,62 @@
 ; insertion of register-register copies.
 
 ; Make sure there are only 3 mov's for each testcase
-; RUN: llc < %s -march=x86 -x86-asm-syntax=intel | \
-; RUN:   grep {\\\<mov\\\>} | count 6
+; RUN: llc < %s -mtriple=i686-pc-linux-gnu   | FileCheck %s -check-prefix=LINUX
+; RUN: llc < %s -mtriple=x86_64-apple-darwin | FileCheck %s -check-prefix=DARWIN
 
 
-target triple = "i686-pc-linux-gnu"
 @G = external global i32                ; <i32*> [#uses=2]
 
 declare void @ext(i32)
 
-define i32 @add_test(i32 %X, i32 %Y) {
+define i32 @t1(i32 %X, i32 %Y) nounwind {
+; LINUX: t1:
+; LINUX: movl 4(%esp), %eax
+; LINUX: movl 8(%esp), %ecx
+; LINUX: addl %eax, %ecx
+; LINUX: movl %ecx, G
         %Z = add i32 %X, %Y             ; <i32> [#uses=1]
         store i32 %Z, i32* @G
         ret i32 %X
 }
 
-define i32 @xor_test(i32 %X, i32 %Y) {
+define i32 @t2(i32 %X, i32 %Y) nounwind {
+; LINUX: t2:
+; LINUX: movl 4(%esp), %eax
+; LINUX: movl 8(%esp), %ecx
+; LINUX: xorl %eax, %ecx
+; LINUX: movl %ecx, G
         %Z = xor i32 %X, %Y             ; <i32> [#uses=1]
         store i32 %Z, i32* @G
         ret i32 %X
 }
 
+; rdar://8762995
+%0 = type { i64, i32 }
+
+define %0 @t3(i32 %lb, i8 zeroext %has_lb, i8 zeroext %lb_inclusive, i32 %ub, i8 zeroext %has_ub, i8 zeroext %ub_inclusive) nounwind {
+entry:
+; DARWIN: t3:
+; DARWIN: shlq $32, %rcx
+; DARWIN-NOT: leaq
+; DARWIN: orq %rcx, %rax
+; DARWIN-NOT: mov
+; DARWIN: shll $16
+  %tmp21 = zext i32 %lb to i64
+  %tmp23 = zext i32 %ub to i64
+  %tmp24 = shl i64 %tmp23, 32
+  %ins26 = or i64 %tmp24, %tmp21
+  %tmp28 = zext i8 %has_lb to i32
+  %tmp33 = zext i8 %has_ub to i32
+  %tmp34 = shl i32 %tmp33, 8
+  %tmp38 = zext i8 %lb_inclusive to i32
+  %tmp39 = shl i32 %tmp38, 16
+  %tmp43 = zext i8 %ub_inclusive to i32
+  %tmp44 = shl i32 %tmp43, 24
+  %ins31 = or i32 %tmp39, %tmp28
+  %ins36 = or i32 %ins31, %tmp34
+  %ins46 = or i32 %ins36, %tmp44
+  %tmp16 = insertvalue %0 undef, i64 %ins26, 0
+  %tmp19 = insertvalue %0 %tmp16, i32 %ins46, 1
+  ret %0 %tmp19
+}
diff --git a/test/CodeGen/X86/compare-inf.ll b/test/CodeGen/X86/compare-inf.ll
index 2be90c9764c2..9aa44a30af57 100644
--- a/test/CodeGen/X86/compare-inf.ll
+++ b/test/CodeGen/X86/compare-inf.ll
@@ -5,7 +5,7 @@
 
 ; CHECK: oeq_inff:
 ; CHECK: ucomiss
-; CHECK: jae
+; CHECK: jb
 define float @oeq_inff(float %x, float %y) nounwind readonly {
   %t0 = fcmp oeq float %x, 0x7FF0000000000000
   %t1 = select i1 %t0, float 1.0, float %y
@@ -14,7 +14,7 @@ define float @oeq_inff(float %x, float %y) nounwind readonly {
 
 ; CHECK: oeq_inf:
 ; CHECK: ucomisd
-; CHECK: jae
+; CHECK: jb
 define double @oeq_inf(double %x, double %y) nounwind readonly {
   %t0 = fcmp oeq double %x, 0x7FF0000000000000
   %t1 = select i1 %t0, double 1.0, double %y
@@ -23,7 +23,7 @@ define double @oeq_inf(double %x, double %y) nounwind readonly {
 
 ; CHECK: une_inff:
 ; CHECK: ucomiss
-; CHECK: jb
+; CHECK: jae
 define float @une_inff(float %x, float %y) nounwind readonly {
   %t0 = fcmp une float %x, 0x7FF0000000000000
   %t1 = select i1 %t0, float 1.0, float %y
@@ -32,7 +32,7 @@ define float @une_inff(float %x, float %y) nounwind readonly {
 
 ; CHECK: une_inf:
 ; CHECK: ucomisd
-; CHECK: jb
+; CHECK: jae
 define double @une_inf(double %x, double %y) nounwind readonly {
   %t0 = fcmp une double %x, 0x7FF0000000000000
   %t1 = select i1 %t0, double 1.0, double %y
@@ -41,7 +41,7 @@ define double @une_inf(double %x, double %y) nounwind readonly {
 
 ; CHECK: oeq_neg_inff:
 ; CHECK: ucomiss
-; CHECK: jae
+; CHECK: jb
 define float @oeq_neg_inff(float %x, float %y) nounwind readonly {
   %t0 = fcmp oeq float %x, 0xFFF0000000000000
   %t1 = select i1 %t0, float 1.0, float %y
@@ -50,7 +50,7 @@ define float @oeq_neg_inff(float %x, float %y) nounwind readonly {
 
 ; CHECK: oeq_neg_inf:
 ; CHECK: ucomisd
-; CHECK: jae
+; CHECK: jb
 define double @oeq_neg_inf(double %x, double %y) nounwind readonly {
   %t0 = fcmp oeq double %x, 0xFFF0000000000000
   %t1 = select i1 %t0, double 1.0, double %y
@@ -59,7 +59,7 @@ define double @oeq_neg_inf(double %x, double %y) nounwind readonly {
 
 ; CHECK: une_neg_inff:
 ; CHECK: ucomiss
-; CHECK: jb
+; CHECK: jae
 define float @une_neg_inff(float %x, float %y) nounwind readonly {
   %t0 = fcmp une float %x, 0xFFF0000000000000
   %t1 = select i1 %t0, float 1.0, float %y
@@ -68,7 +68,7 @@ define float @une_neg_inff(float %x, float %y) nounwind readonly {
 
 ; CHECK: une_neg_inf:
 ; CHECK: ucomisd
-; CHECK: jb
+; CHECK: jae
 define double @une_neg_inf(double %x, double %y) nounwind readonly {
   %t0 = fcmp une double %x, 0xFFF0000000000000
   %t1 = select i1 %t0, double 1.0, double %y
diff --git a/test/CodeGen/X86/complex-asm.ll b/test/CodeGen/X86/complex-asm.ll
new file mode 100644
index 000000000000..49878b982db3
--- /dev/null
+++ b/test/CodeGen/X86/complex-asm.ll
@@ -0,0 +1,17 @@
+; RUN: llc < %s -mtriple=x86_64-apple-darwin
+; This formerly crashed.
+
+%0 = type { i64, i64 }
+
+define %0 @f() nounwind ssp {
+entry:
+  %v = alloca %0, align 8
+  call void asm sideeffect "", "=*r,r,r,0,~{dirflag},~{fpsr},~{flags}"(%0* %v, i32 0, i32 1, i128 undef) nounwind
+  %0 = getelementptr inbounds %0* %v, i64 0, i32 0
+  %1 = load i64* %0, align 8
+  %2 = getelementptr inbounds %0* %v, i64 0, i32 1
+  %3 = load i64* %2, align 8
+  %mrv4 = insertvalue %0 undef, i64 %1, 0
+  %mrv5 = insertvalue %0 %mrv4, i64 %3, 1
+  ret %0 %mrv5
+}
diff --git a/test/CodeGen/X86/conditional-indecrement.ll b/test/CodeGen/X86/conditional-indecrement.ll
new file mode 100644
index 000000000000..a3a0c39905aa
--- /dev/null
+++ b/test/CodeGen/X86/conditional-indecrement.ll
@@ -0,0 +1,89 @@
+; RUN: llc -march=x86 < %s | FileCheck %s
+
+define i32 @test1(i32 %a, i32 %b) nounwind readnone {
+  %not.cmp = icmp ne i32 %a, 0
+  %inc = zext i1 %not.cmp to i32
+  %retval.0 = add i32 %inc, %b
+  ret i32 %retval.0
+; CHECK: test1:
+; CHECK: cmpl $1
+; CHECK: sbbl $-1
+; CHECK: ret
+}
+
+define i32 @test2(i32 %a, i32 %b) nounwind readnone {
+  %cmp = icmp eq i32 %a, 0
+  %inc = zext i1 %cmp to i32
+  %retval.0 = add i32 %inc, %b
+  ret i32 %retval.0
+; CHECK: test2:
+; CHECK: cmpl $1
+; CHECK: adcl $0
+; CHECK: ret
+}
+
+define i32 @test3(i32 %a, i32 %b) nounwind readnone {
+  %cmp = icmp eq i32 %a, 0
+  %inc = zext i1 %cmp to i32
+  %retval.0 = add i32 %inc, %b
+  ret i32 %retval.0
+; CHECK: test3:
+; CHECK: cmpl $1
+; CHECK: adcl $0
+; CHECK: ret
+}
+
+define i32 @test4(i32 %a, i32 %b) nounwind readnone {
+  %not.cmp = icmp ne i32 %a, 0
+  %inc = zext i1 %not.cmp to i32
+  %retval.0 = add i32 %inc, %b
+  ret i32 %retval.0
+; CHECK: test4:
+; CHECK: cmpl $1
+; CHECK: sbbl $-1
+; CHECK: ret
+}
+
+define i32 @test5(i32 %a, i32 %b) nounwind readnone {
+  %not.cmp = icmp ne i32 %a, 0
+  %inc = zext i1 %not.cmp to i32
+  %retval.0 = sub i32 %b, %inc
+  ret i32 %retval.0
+; CHECK: test5:
+; CHECK: cmpl $1
+; CHECK: adcl $-1
+; CHECK: ret
+}
+
+define i32 @test6(i32 %a, i32 %b) nounwind readnone {
+  %cmp = icmp eq i32 %a, 0
+  %inc = zext i1 %cmp to i32
+  %retval.0 = sub i32 %b, %inc
+  ret i32 %retval.0
+; CHECK: test6:
+; CHECK: cmpl $1
+; CHECK: sbbl $0
+; CHECK: ret
+}
+
+define i32 @test7(i32 %a, i32 %b) nounwind readnone {
+  %cmp = icmp eq i32 %a, 0
+  %inc = zext i1 %cmp to i32
+  %retval.0 = sub i32 %b, %inc
+  ret i32 %retval.0
+; CHECK: test7:
+; CHECK: cmpl $1
+; CHECK: sbbl $0
+; CHECK: ret
+}
+
+define i32 @test8(i32 %a, i32 %b) nounwind readnone {
+  %not.cmp = icmp ne i32 %a, 0
+  %inc = zext i1 %not.cmp to i32
+  %retval.0 = sub i32 %b, %inc
+  ret i32 %retval.0
+; CHECK: test8:
+; CHECK: cmpl $1
+; CHECK: adcl $-1
+; CHECK: ret
+}
diff --git a/test/CodeGen/X86/const-select.ll b/test/CodeGen/X86/const-select.ll
deleted file mode 100644
index 665984ce28ed..000000000000
--- a/test/CodeGen/X86/const-select.ll
+++ /dev/null
@@ -1,22 +0,0 @@
-
-target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
-target triple = "i386-apple-darwin7"
-
-; RUN: llc < %s | grep {LCPI0_0(,%eax,4)}
-define float @f(i32 %x) nounwind readnone {
-entry:
-	%0 = icmp eq i32 %x, 0		; <i1> [#uses=1]
-	%iftmp.0.0 = select i1 %0, float 4.200000e+01, float 2.300000e+01		; <float> [#uses=1]
-	ret float %iftmp.0.0
-}
-
-; RUN: llc < %s | grep {movsbl.*(%e.x,%e.x,4), %eax}
-define signext i8 @test(i8* nocapture %P, double %F) nounwind readonly {
-entry:
-	%0 = fcmp olt double %F, 4.200000e+01		; <i1> [#uses=1]
-	%iftmp.0.0 = select i1 %0, i32 4, i32 0		; <i32> [#uses=1]
-	%1 = getelementptr i8* %P, i32 %iftmp.0.0		; <i8*> [#uses=1]
-	%2 = load i8* %1, align 1		; <i8> [#uses=1]
-	ret i8 %2
-}
-
diff --git a/test/CodeGen/X86/crash.ll b/test/CodeGen/X86/crash.ll
index a14a48baa355..2d8e63e31342 100644
--- a/test/CodeGen/X86/crash.ll
+++ b/test/CodeGen/X86/crash.ll
@@ -141,3 +141,61 @@ entry:
   call void asm sideeffect "outb $0, ${1:w}", "{ax},N{dx},~{dirflag},~{fpsr},~{flags}"(i8 %conv4.i, i32 1017) nounwind
   unreachable
 }
+
+; Crash trying to form conditional increment with fp value.
+; PR8981
+define i32 @test9(double %X) ssp align 2 {
+entry:
+  %0 = fcmp one double %X, 0.000000e+00
+  %cond = select i1 %0, i32 1, i32 2
+  ret i32 %cond
+}
+
+
+; PR8514 - Crash in match address do to "heroics" turning and-of-shift into
+; shift of and.
+%struct.S0 = type { i8, [2 x i8], i8 }
+
+define void @func_59(i32 %p_63) noreturn nounwind {
+entry:
+  br label %for.body
+
+for.body:                                         ; preds = %for.inc44, %entry
+  %p_63.addr.1 = phi i32 [ %p_63, %entry ], [ 0, %for.inc44 ]
+  %l_74.0 = phi i32 [ 0, %entry ], [ %add46, %for.inc44 ]
+  br i1 undef, label %for.inc44, label %bb.nph81
+
+bb.nph81:                                         ; preds = %for.body
+  %tmp98 = add i32 %p_63.addr.1, 0
+  br label %for.body22
+
+for.body22:                                       ; preds = %for.body22, %bb.nph81
+  %l_75.077 = phi i64 [ %ins, %for.body22 ], [ undef, %bb.nph81 ]
+  %tmp110 = trunc i64 %l_75.077 to i32
+  %tmp111 = and i32 %tmp110, 65535
+  %arrayidx32.0 = getelementptr [9 x [5 x [2 x %struct.S0]]]* undef, i32 0, i32 %l_74.0, i32 %tmp98, i32 %tmp111, i32 0
+  store i8 1, i8* %arrayidx32.0, align 4
+  %tmp106 = shl i32 %tmp110, 2
+  %tmp107 = and i32 %tmp106, 262140
+  %scevgep99.sum114 = or i32 %tmp107, 1
+  %arrayidx32.1.1 = getelementptr [9 x [5 x [2 x %struct.S0]]]* undef, i32 0, i32 %l_74.0, i32 %tmp98, i32 0, i32 1, i32 %scevgep99.sum114
+  store i8 0, i8* %arrayidx32.1.1, align 1
+  %ins = or i64 undef, undef
+  br label %for.body22
+
+for.inc44:                                        ; preds = %for.body
+  %add46 = add i32 %l_74.0, 1
+  br label %for.body
+}
+
+; PR9028
+define void @f(i64 %A) nounwind {
+entry:
+  %0 = zext i64 %A to i160
+  %1 = shl i160 %0, 64
+  %2 = zext i160 %1 to i576
+  %3 = zext i96 undef to i576
+  %4 = or i576 %3, %2
+  store i576 %4, i576* undef, align 8
+  ret void
+}
diff --git a/test/CodeGen/X86/critical-edge-split-2.ll b/test/CodeGen/X86/critical-edge-split-2.ll
new file mode 100644
index 000000000000..70301cd9bcc4
--- /dev/null
+++ b/test/CodeGen/X86/critical-edge-split-2.ll
@@ -0,0 +1,29 @@
+; RUN: llc < %s | FileCheck %s
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
+target triple = "x86_64-apple-darwin10.0.0"
+
+%0 = type <{ %1, %1 }>
+%1 = type { i8, i8, i8, i8 }
+
+@g_2 = global %0 zeroinitializer
+@g_4 = global %1 zeroinitializer, align 4
+
+
+; PR8642
+define i16 @test1(i1 zeroext %C, i8** nocapture %argv) nounwind ssp {
+entry:
+  br i1 %C, label %cond.end.i, label %cond.false.i
+
+cond.false.i:                                     ; preds = %entry
+  br label %cond.end.i
+
+cond.end.i:                                       ; preds = %entry
+  %call1 = phi i16 [ trunc (i32 srem (i32 1, i32 zext (i1 icmp eq (%1* bitcast (i8* getelementptr inbounds (%0* @g_2, i64 0, i32 1, i32 0) to %1*), %1* @g_4) to i32)) to i16), %cond.false.i ], [ 1, %entry ]
+  ret i16 %call1
+}
+
+; CHECK: test1:
+; CHECK: testb %dil, %dil
+; CHECK: jne LBB0_2
+; CHECK: divl
+; CHECK: LBB0_2:
diff --git a/test/CodeGen/X86/critical-edge-split.ll b/test/CodeGen/X86/critical-edge-split.ll
deleted file mode 100644
index 96fef0fbfc61..000000000000
--- a/test/CodeGen/X86/critical-edge-split.ll
+++ /dev/null
@@ -1,50 +0,0 @@
-; RUN: llc < %s -mtriple=i386-apple-darwin -o /dev/null -stats -info-output-file - | grep asm-printer | grep 29
-
-	%CC = type { %Register }
-	%II = type { %"struct.XX::II::$_74" }
-	%JITFunction = type %YYValue* (%CC*, %YYValue**)
-	%YYValue = type { i32 (...)** }
-	%Register = type { %"struct.XX::ByteCodeFeatures" }
-	%"struct.XX::ByteCodeFeatures" = type { i32 }
-	%"struct.XX::II::$_74" = type { i8* }
-@llvm.used = appending global [1 x i8*] [ i8* bitcast (%JITFunction* @loop to i8*) ], section "llvm.metadata"		; <[1 x i8*]*> [#uses=0]
-
-define %YYValue* @loop(%CC*, %YYValue**) nounwind {
-; <label>:2
-	%3 = getelementptr %CC* %0, i32 -9		; <%CC*> [#uses=1]
-	%4 = bitcast %CC* %3 to %YYValue**		; <%YYValue**> [#uses=2]
-	%5 = load %YYValue** %4		; <%YYValue*> [#uses=3]
-	%unique_1.i = ptrtoint %YYValue* %5 to i1		; <i1> [#uses=1]
-	br i1 %unique_1.i, label %loop, label %11
-
-loop:		; preds = %6, %2
-	%.1 = phi %YYValue* [ inttoptr (i32 1 to %YYValue*), %2 ], [ %intAddValue, %6 ]		; <%YYValue*> [#uses=3]
-	%immediateCmp = icmp slt %YYValue* %.1, %5		; <i1> [#uses=1]
-	br i1 %immediateCmp, label %6, label %8
-
-; <label>:6		; preds = %loop
-	%lhsInt = ptrtoint %YYValue* %.1 to i32		; <i32> [#uses=1]
-	%7 = call { i32, i1 } @llvm.sadd.with.overflow.i32(i32 %lhsInt, i32 2)		; <{ i32, i1 }> [#uses=2]
-	%intAdd = extractvalue { i32, i1 } %7, 0		; <i32> [#uses=1]
-	%intAddValue = inttoptr i32 %intAdd to %YYValue*		; <%YYValue*> [#uses=1]
-	%intAddOverflow = extractvalue { i32, i1 } %7, 1		; <i1> [#uses=1]
-	br i1 %intAddOverflow, label %.loopexit, label %loop
-
-; <label>:8		; preds = %loop
-	ret %YYValue* inttoptr (i32 10 to %YYValue*)
-
-.loopexit:		; preds = %6
-	%9 = bitcast %CC* %0 to %YYValue**		; <%YYValue**> [#uses=1]
-	store %YYValue* %.1, %YYValue** %9
-	store %YYValue* %5, %YYValue** %4
-	%10 = call fastcc %YYValue* @foobar(%II* inttoptr (i32 3431104 to %II*), %CC* %0, %YYValue** %1)		; <%YYValue*> [#uses=1]
-	ret %YYValue* %10
-
-; <label>:11		; preds = %2
-	%12 = call fastcc %YYValue* @foobar(%II* inttoptr (i32 3431080 to %II*), %CC* %0, %YYValue** %1)		; <%YYValue*> [#uses=1]
-	ret %YYValue* %12
-}
-
-declare fastcc %YYValue* @foobar(%II*, %CC*, %YYValue**) nounwind
-
-declare { i32, i1 } @llvm.sadd.with.overflow.i32(i32, i32) nounwind
diff --git a/test/CodeGen/X86/ctpop-combine.ll b/test/CodeGen/X86/ctpop-combine.ll
new file mode 100644
index 000000000000..c957d385a24a
--- /dev/null
+++ b/test/CodeGen/X86/ctpop-combine.ll
@@ -0,0 +1,40 @@
+; RUN: llc -march=x86-64 < %s | FileCheck %s
+
+declare i64 @llvm.ctpop.i64(i64) nounwind readnone
+
+define i32 @test1(i64 %x) nounwind readnone {
+  %count = tail call i64 @llvm.ctpop.i64(i64 %x)
+  %cast = trunc i64 %count to i32
+  %cmp = icmp ugt i32 %cast, 1
+  %conv = zext i1 %cmp to i32
+  ret i32 %conv
+; CHECK: test1:
+; CHECK: leaq -1(%rdi)
+; CHECK-NEXT: testq
+; CHECK-NEXT: setne
+; CHECK: ret
+}
+
+
+define i32 @test2(i64 %x) nounwind readnone {
+  %count = tail call i64 @llvm.ctpop.i64(i64 %x)
+  %cmp = icmp ult i64 %count, 2
+  %conv = zext i1 %cmp to i32
+  ret i32 %conv
+; CHECK: test2:
+; CHECK: leaq -1(%rdi)
+; CHECK-NEXT: testq
+; CHECK-NEXT: sete
+; CHECK: ret
+}
+
+define i32 @test3(i64 %x) nounwind readnone {
+  %count = tail call i64 @llvm.ctpop.i64(i64 %x)
+  %cast = trunc i64 %count to i6 ; Too small for 0-64
+  %cmp = icmp ult i6 %cast, 2
+  %conv = zext i1 %cmp to i32
+  ret i32 %conv
+; CHECK: test3:
+; CHECK: cmpb $2
+; CHECK: ret
+}
diff --git a/test/CodeGen/X86/dagcombine-buildvector.ll b/test/CodeGen/X86/dagcombine-buildvector.ll
index 5cc6eaa405ad..dae91d5ccdd6 100644
--- a/test/CodeGen/X86/dagcombine-buildvector.ll
+++ b/test/CodeGen/X86/dagcombine-buildvector.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=x86 -mcpu=penryn -disable-mmx | FileCheck %s
+; RUN: llc < %s -march=x86 -mcpu=penryn | FileCheck %s
 
 ; Shows a dag combine bug that will generate an illegal build vector
 ; with v2i64 build_vector i32, i32.
diff --git a/test/CodeGen/X86/dbg-live-in-location.ll b/test/CodeGen/X86/dbg-live-in-location.ll
new file mode 100644
index 000000000000..9b1464d415f9
--- /dev/null
+++ b/test/CodeGen/X86/dbg-live-in-location.ll
@@ -0,0 +1,84 @@
+; RUN: llc < %s | FileCheck %s
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
+target triple = "x86_64-apple-darwin10.0.0"
+
+@str = internal constant [3 x i8] c"Hi\00"
+
+define void @foo() nounwind ssp {
+entry:
+  %puts = tail call i32 @puts(i8* getelementptr inbounds ([3 x i8]* @str, i64 0, i64 0))
+  ret void, !dbg !17
+}
+
+; CHECK: arg.c:5:14
+
+define i32 @main(i32 %argc, i8** nocapture %argv) nounwind ssp {
+entry:
+  tail call void @llvm.dbg.value(metadata !{i32 %argc}, i64 0, metadata !9), !dbg !19
+  tail call void @llvm.dbg.value(metadata !{i8** %argv}, i64 0, metadata !10), !dbg !20
+  %cmp = icmp sgt i32 %argc, 1, !dbg !21
+  br i1 %cmp, label %cond.end, label %for.body.lr.ph, !dbg !21
+
+cond.end:                                         ; preds = %entry
+  %arrayidx = getelementptr inbounds i8** %argv, i64 1, !dbg !21
+  %tmp2 = load i8** %arrayidx, align 8, !dbg !21, !tbaa !22
+  %call = tail call i32 (...)* @atoi(i8* %tmp2) nounwind, !dbg !21
+  tail call void @llvm.dbg.value(metadata !{i32 %call}, i64 0, metadata !16), !dbg !21
+  tail call void @llvm.dbg.value(metadata !25, i64 0, metadata !14), !dbg !26
+  %cmp57 = icmp sgt i32 %call, 0, !dbg !26
+  br i1 %cmp57, label %for.body.lr.ph, label %for.end, !dbg !26
+
+for.body.lr.ph:                                   ; preds = %entry, %cond.end
+  %cond10 = phi i32 [ %call, %cond.end ], [ 300, %entry ]
+  br label %for.body
+
+for.body:                                         ; preds = %for.body, %for.body.lr.ph
+  %i.08 = phi i32 [ 0, %for.body.lr.ph ], [ %inc, %for.body ]
+  %puts.i = tail call i32 @puts(i8* getelementptr inbounds ([3 x i8]* @str, i64 0, i64 0)) nounwind
+  %inc = add nsw i32 %i.08, 1, !dbg !27
+  %exitcond = icmp eq i32 %inc, %cond10
+  br i1 %exitcond, label %for.end, label %for.body, !dbg !26
+
+for.end:                                          ; preds = %for.body, %cond.end
+  ret i32 0, !dbg !29
+}
+
+declare i32 @atoi(...)
+
+declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
+
+declare i32 @puts(i8* nocapture) nounwind
+
+!llvm.dbg.sp = !{!0, !5}
+!llvm.dbg.lv.main = !{!9, !10, !14, !16}
+
+!0 = metadata !{i32 589870, i32 0, metadata !1, metadata !"foo", metadata !"foo", metadata !"", metadata !1, i32 2, metadata !3, i1 false, i1 true, i32 0, i32 0, i32 0, i32 0, i1 true, void ()* @foo} ; [ DW_TAG_subprogram ]
+!1 = metadata !{i32 589865, metadata !"arg.c", metadata !"/private/tmp", metadata !2} ; [ DW_TAG_file_type ]
+!2 = metadata !{i32 589841, i32 0, i32 12, metadata !"arg.c", metadata !"/private/tmp", metadata !"clang version 2.9 (trunk 124504)", i1 true, i1 true, metadata !"", i32 0} ; [ DW_TAG_compile_unit ]
+!3 = metadata !{i32 589845, metadata !1, metadata !"", metadata !1, i32 0, i64 0, i64 0, i32 0, i32 0, i32 0, metadata !4, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
+!4 = metadata !{null}
+!5 = metadata !{i32 589870, i32 0, metadata !1, metadata !"main", metadata !"main", metadata !"", metadata !1, i32 6, metadata !6, i1 false, i1 true, i32 0, i32 0, i32 0, i32 256, i1 true, i32 (i32, i8**)* @main} ; [ DW_TAG_subprogram ]
+!6 = metadata !{i32 589845, metadata !1, metadata !"", metadata !1, i32 0, i64 0, i64 0, i32 0, i32 0, i32 0, metadata !7, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
+!7 = metadata !{metadata !8}
+!8 = metadata !{i32 589860, metadata !2, metadata !"int", null, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
+!9 = metadata !{i32 590081, metadata !5, metadata !"argc", metadata !1, i32 5, metadata !8, i32 0} ; [ DW_TAG_arg_variable ]
+!10 = metadata !{i32 590081, metadata !5, metadata !"argv", metadata !1, i32 5, metadata !11, i32 0} ; [ DW_TAG_arg_variable ]
+!11 = metadata !{i32 589839, metadata !2, metadata !"", null, i32 0, i64 64, i64 64, i64 0, i32 0, metadata !12} ; [ DW_TAG_pointer_type ]
+!12 = metadata !{i32 589839, metadata !2, metadata !"", null, i32 0, i64 64, i64 64, i64 0, i32 0, metadata !13} ; [ DW_TAG_pointer_type ]
+!13 = metadata !{i32 589860, metadata !2, metadata !"char", null, i32 0, i64 8, i64 8, i64 0, i32 0, i32 6} ; [ DW_TAG_base_type ]
+!14 = metadata !{i32 590080, metadata !15, metadata !"i", metadata !1, i32 7, metadata !8, i32 0} ; [ DW_TAG_auto_variable ]
+!15 = metadata !{i32 589835, metadata !5, i32 6, i32 1, metadata !1, i32 1} ; [ DW_TAG_lexical_block ]
+!16 = metadata !{i32 590080, metadata !15, metadata !"iterations", metadata !1, i32 8, metadata !8, i32 0} ; [ DW_TAG_auto_variable ]
+!17 = metadata !{i32 4, i32 1, metadata !18, null}
+!18 = metadata !{i32 589835, metadata !0, i32 2, i32 12, metadata !1, i32 0} ; [ DW_TAG_lexical_block ]
+!19 = metadata !{i32 5, i32 14, metadata !5, null}
+!20 = metadata !{i32 5, i32 26, metadata !5, null}
+!21 = metadata !{i32 8, i32 51, metadata !15, null}
+!22 = metadata !{metadata !"any pointer", metadata !23}
+!23 = metadata !{metadata !"omnipotent char", metadata !24}
+!24 = metadata !{metadata !"Simple C/C++ TBAA", null}
+!25 = metadata !{i32 0}
+!26 = metadata !{i32 9, i32 2, metadata !15, null}
+!27 = metadata !{i32 9, i32 30, metadata !28, null}
+!28 = metadata !{i32 589835, metadata !15, i32 9, i32 2, metadata !1, i32 2} ; [ DW_TAG_lexical_block ]
+!29 = metadata !{i32 12, i32 9, metadata !15, null}
diff --git a/test/CodeGen/X86/dbg-merge-loc-entry.ll b/test/CodeGen/X86/dbg-merge-loc-entry.ll
new file mode 100644
index 000000000000..83df1478cf18
--- /dev/null
+++ b/test/CodeGen/X86/dbg-merge-loc-entry.ll
@@ -0,0 +1,71 @@
+; RUN: llc < %s | FileCheck %s
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
+target triple = "x86_64-apple-darwin8"
+
+;CHECK: Ldebug_loc0:
+;CHECK-NEXT:	.quad	Lfunc_begin0
+;CHECK-NEXT:	.quad	Lfunc_end0
+;CHECK-NEXT:	.short	1                       ## Loc expr size
+;CHECK-NEXT:	.byte	85                      ## DW_OP_reg5
+;CHECK-NEXT:	.quad	0
+;CHECK-NEXT:	.quad	0
+
+%0 = type { i64, i1 }
+
+@__clz_tab = external constant [256 x i8]
+
+define hidden i128 @__divti3(i128 %u, i128 %v) nounwind readnone {
+entry:
+  tail call void @llvm.dbg.value(metadata !{i128 %u}, i64 0, metadata !14), !dbg !15
+  tail call void @llvm.dbg.value(metadata !16, i64 0, metadata !17), !dbg !21
+  br i1 undef, label %bb2, label %bb4, !dbg !22
+
+bb2:                                              ; preds = %entry
+  br label %bb4, !dbg !23
+
+bb4:                                              ; preds = %bb2, %entry
+  br i1 undef, label %__udivmodti4.exit, label %bb82.i, !dbg !24
+
+bb82.i:                                           ; preds = %bb4
+  unreachable
+
+__udivmodti4.exit:                                ; preds = %bb4
+  ret i128 undef, !dbg !27
+}
+
+declare void @llvm.dbg.declare(metadata, metadata) nounwind readnone
+
+declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
+
+declare %0 @llvm.uadd.with.overflow.i64(i64, i64) nounwind readnone
+
+!llvm.dbg.sp = !{!0, !9}
+
+!0 = metadata !{i32 589870, i32 0, metadata !1, metadata !"__udivmodti4", metadata !"__udivmodti4", metadata !"", metadata !1, i32 879, metadata !3, i1 true, i1 true, i32 0, i32 0, null, i32 256, i1 true, null} ; [ DW_TAG_subprogram ]
+!1 = metadata !{i32 589865, metadata !"foobar.c", metadata !"/tmp", metadata !2} ; [ DW_TAG_file_type ]
+!2 = metadata !{i32 589841, i32 0, i32 1, metadata !"foobar.c", metadata !"/tmp", metadata !"4.2.1 (Based on Apple Inc. build 5658) (LLVM build)", i1 true, i1 true, metadata !"", i32 0} ; [ DW_TAG_compile_unit ]
+!3 = metadata !{i32 589845, metadata !1, metadata !"", metadata !1, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !4, i32 0, null} ; [ DW_TAG_subroutine_type ]
+!4 = metadata !{metadata !5, metadata !5, metadata !5, metadata !8}
+!5 = metadata !{i32 589846, metadata !6, metadata !"UTItype", metadata !6, i32 166, i64 0, i64 0, i64 0, i32 0, metadata !7} ; [ DW_TAG_typedef ]
+!6 = metadata !{i32 589865, metadata !"foobar.h", metadata !"/tmp", metadata !2} ; [ DW_TAG_file_type ]
+!7 = metadata !{i32 589860, metadata !1, metadata !"", metadata !1, i32 0, i64 128, i64 128, i64 0, i32 0, i32 7} ; [ DW_TAG_base_type ]
+!8 = metadata !{i32 589839, metadata !1, metadata !"", metadata !1, i32 0, i64 64, i64 64, i64 0, i32 0, metadata !5} ; [ DW_TAG_pointer_type ]
+!9 = metadata !{i32 589870, i32 0, metadata !1, metadata !"__divti3", metadata !"__divti3", metadata !"__divti3", metadata !1, i32 1094, metadata !10, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 true, i128 (i128, i128)* @__divti3} ; [ DW_TAG_subprogram ]
+!10 = metadata !{i32 589845, metadata !1, metadata !"", metadata !1, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !11, i32 0, null} ; [ DW_TAG_subroutine_type ]
+!11 = metadata !{metadata !12, metadata !12, metadata !12}
+!12 = metadata !{i32 589846, metadata !6, metadata !"TItype", metadata !6, i32 160, i64 0, i64 0, i64 0, i32 0, metadata !13} ; [ DW_TAG_typedef ]
+!13 = metadata !{i32 589860, metadata !1, metadata !"", metadata !1, i32 0, i64 128, i64 128, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
+!14 = metadata !{i32 590081, metadata !9, metadata !"u", metadata !1, i32 1093, metadata !12, i32 0} ; [ DW_TAG_arg_variable ]
+!15 = metadata !{i32 1093, i32 0, metadata !9, null}
+!16 = metadata !{i64 0}
+!17 = metadata !{i32 590080, metadata !18, metadata !"c", metadata !1, i32 1095, metadata !19, i32 0} ; [ DW_TAG_auto_variable ]
+!18 = metadata !{i32 589835, metadata !9, i32 1094, i32 0, metadata !1, i32 13} ; [ DW_TAG_lexical_block ]
+!19 = metadata !{i32 589846, metadata !6, metadata !"word_type", metadata !6, i32 424, i64 0, i64 0, i64 0, i32 0, metadata !20} ; [ DW_TAG_typedef ]
+!20 = metadata !{i32 589860, metadata !1, metadata !"long int", metadata !1, i32 0, i64 64, i64 64, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
+!21 = metadata !{i32 1095, i32 0, metadata !18, null}
+!22 = metadata !{i32 1103, i32 0, metadata !18, null}
+!23 = metadata !{i32 1104, i32 0, metadata !18, null}
+!24 = metadata !{i32 1003, i32 0, metadata !25, metadata !26}
+!25 = metadata !{i32 589835, metadata !0, i32 879, i32 0, metadata !1, i32 0} ; [ DW_TAG_lexical_block ]
+!26 = metadata !{i32 1107, i32 0, metadata !18, null}
+!27 = metadata !{i32 1111, i32 0, metadata !18, null}
diff --git a/test/CodeGen/X86/dbg-value-inlined-parameter.ll b/test/CodeGen/X86/dbg-value-inlined-parameter.ll
new file mode 100644
index 000000000000..89bbf34a1286
--- /dev/null
+++ b/test/CodeGen/X86/dbg-value-inlined-parameter.ll
@@ -0,0 +1,86 @@
+; RUN: llc -mtriple=x86_64-apple-darwin < %s | FileCheck %s
+
+;CHECK: DW_TAG_inlined_subroutine
+;CHECK-NEXT: DW_AT_abstract_origin
+;CHECK-NEXT: DW_AT_low_pc
+;CHECK-NEXT: DW_AT_high_pc
+;CHECK-NEXT: DW_AT_call_file
+;CHECK-NEXT: DW_AT_call_line
+;CHECK-NEXT: DW_TAG_formal_parameter
+;CHECK-NEXT: .ascii   "sp"                   ## DW_AT_name
+
+%struct.S1 = type { float*, i32 }
+
+@p = common global %struct.S1 zeroinitializer, align 8
+
+define i32 @foo(%struct.S1* nocapture %sp, i32 %nums) nounwind optsize ssp {
+entry:
+  tail call void @llvm.dbg.value(metadata !{%struct.S1* %sp}, i64 0, metadata !9), !dbg !20
+  tail call void @llvm.dbg.value(metadata !{i32 %nums}, i64 0, metadata !18), !dbg !21
+  %tmp2 = getelementptr inbounds %struct.S1* %sp, i64 0, i32 1, !dbg !22
+  store i32 %nums, i32* %tmp2, align 4, !dbg !22, !tbaa !24
+  %call = tail call float* @bar(i32 %nums) nounwind optsize, !dbg !27
+  %tmp5 = getelementptr inbounds %struct.S1* %sp, i64 0, i32 0, !dbg !27
+  store float* %call, float** %tmp5, align 8, !dbg !27, !tbaa !28
+  %cmp = icmp ne float* %call, null, !dbg !29
+  %cond = zext i1 %cmp to i32, !dbg !29
+  ret i32 %cond, !dbg !29
+}
+
+declare float* @bar(i32) optsize
+
+define void @foobar() nounwind optsize ssp {
+entry:
+  tail call void @llvm.dbg.value(metadata !30, i64 0, metadata !9) nounwind, !dbg !31
+  tail call void @llvm.dbg.value(metadata !34, i64 0, metadata !18) nounwind, !dbg !35
+  store i32 1, i32* getelementptr inbounds (%struct.S1* @p, i64 0, i32 1), align 8, !dbg !36, !tbaa !24
+  %call.i = tail call float* @bar(i32 1) nounwind optsize, !dbg !37
+  store float* %call.i, float** getelementptr inbounds (%struct.S1* @p, i64 0, i32 0), align 8, !dbg !37, !tbaa !28
+  ret void, !dbg !38
+}
+
+declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
+
+!llvm.dbg.sp = !{!0, !6}
+!llvm.dbg.lv.foo = !{!9, !18}
+!llvm.dbg.gv = !{!19}
+
+!0 = metadata !{i32 589870, i32 0, metadata !1, metadata !"foo", metadata !"foo", metadata !"", metadata !1, i32 8, metadata !3, i1 false, i1 true, i32 0, i32 0, i32 0, i32 256, i1 true, i32 (%struct.S1*, i32)* @foo} ; [ DW_TAG_subprogram ]
+!1 = metadata !{i32 589865, metadata !"nm2.c", metadata !"/private/tmp", metadata !2} ; [ DW_TAG_file_type ]
+!2 = metadata !{i32 589841, i32 0, i32 12, metadata !"nm2.c", metadata !"/private/tmp", metadata !"clang version 2.9 (trunk 125693)", i1 true, i1 true, metadata !"", i32 0} ; [ DW_TAG_compile_unit ]
+!3 = metadata !{i32 589845, metadata !1, metadata !"", metadata !1, i32 0, i64 0, i64 0, i32 0, i32 0, i32 0, metadata !4, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
+!4 = metadata !{metadata !5}
+!5 = metadata !{i32 589860, metadata !2, metadata !"int", null, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
+!6 = metadata !{i32 589870, i32 0, metadata !1, metadata !"foobar", metadata !"foobar", metadata !"", metadata !1, i32 15, metadata !7, i1 false, i1 true, i32 0, i32 0, i32 0, i32 0, i1 true, void ()* @foobar} ; [ DW_TAG_subprogram ]
+!7 = metadata !{i32 589845, metadata !1, metadata !"", metadata !1, i32 0, i64 0, i64 0, i32 0, i32 0, i32 0, metadata !8, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
+!8 = metadata !{null}
+!9 = metadata !{i32 590081, metadata !0, metadata !"sp", metadata !1, i32 7, metadata !10, i32 0} ; [ DW_TAG_arg_variable ]
+!10 = metadata !{i32 589839, metadata !2, metadata !"", null, i32 0, i64 64, i64 64, i64 0, i32 0, metadata !11} ; [ DW_TAG_pointer_type ]
+!11 = metadata !{i32 589846, metadata !2, metadata !"S1", metadata !1, i32 4, i64 0, i64 0, i64 0, i32 0, metadata !12} ; [ DW_TAG_typedef ]
+!12 = metadata !{i32 589843, metadata !2, metadata !"S1", metadata !1, i32 1, i64 128, i64 64, i32 0, i32 0, i32 0, metadata !13, i32 0, i32 0} ; [ DW_TAG_structure_type ]
+!13 = metadata !{metadata !14, metadata !17}
+!14 = metadata !{i32 589837, metadata !1, metadata !"m", metadata !1, i32 2, i64 64, i64 64, i64 0, i32 0, metadata !15} ; [ DW_TAG_member ]
+!15 = metadata !{i32 589839, metadata !2, metadata !"", null, i32 0, i64 64, i64 64, i64 0, i32 0, metadata !16} ; [ DW_TAG_pointer_type ]
+!16 = metadata !{i32 589860, metadata !2, metadata !"float", null, i32 0, i64 32, i64 32, i64 0, i32 0, i32 4} ; [ DW_TAG_base_type ]
+!17 = metadata !{i32 589837, metadata !1, metadata !"nums", metadata !1, i32 3, i64 32, i64 32, i64 64, i32 0, metadata !5} ; [ DW_TAG_member ]
+!18 = metadata !{i32 590081, metadata !0, metadata !"nums", metadata !1, i32 7, metadata !5, i32 0} ; [ DW_TAG_arg_variable ]
+!19 = metadata !{i32 589876, i32 0, metadata !2, metadata !"p", metadata !"p", metadata !"", metadata !1, i32 14, metadata !11, i32 0, i32 1, %struct.S1* @p} ; [ DW_TAG_variable ]
+!20 = metadata !{i32 7, i32 13, metadata !0, null}
+!21 = metadata !{i32 7, i32 21, metadata !0, null}
+!22 = metadata !{i32 9, i32 3, metadata !23, null}
+!23 = metadata !{i32 589835, metadata !0, i32 8, i32 1, metadata !1, i32 0} ; [ DW_TAG_lexical_block ]
+!24 = metadata !{metadata !"int", metadata !25}
+!25 = metadata !{metadata !"omnipotent char", metadata !26}
+!26 = metadata !{metadata !"Simple C/C++ TBAA", null}
+!27 = metadata !{i32 10, i32 3, metadata !23, null}
+!28 = metadata !{metadata !"any pointer", metadata !25}
+!29 = metadata !{i32 11, i32 3, metadata !23, null}
+!30 = metadata !{%struct.S1* @p}
+!31 = metadata !{i32 7, i32 13, metadata !0, metadata !32}
+!32 = metadata !{i32 16, i32 3, metadata !33, null}
+!33 = metadata !{i32 589835, metadata !6, i32 15, i32 15, metadata !1, i32 1} ; [ DW_TAG_lexical_block ]
+!34 = metadata !{i32 1}
+!35 = metadata !{i32 7, i32 21, metadata !0, metadata !32}
+!36 = metadata !{i32 9, i32 3, metadata !23, metadata !32}
+!37 = metadata !{i32 10, i32 3, metadata !23, metadata !32}
+!38 = metadata !{i32 17, i32 1, metadata !33, null}
diff --git a/test/CodeGen/X86/dbg-value-location.ll b/test/CodeGen/X86/dbg-value-location.ll
new file mode 100644
index 000000000000..2449046c65fb
--- /dev/null
+++ b/test/CodeGen/X86/dbg-value-location.ll
@@ -0,0 +1,70 @@
+; RUN: llc < %s | FileCheck %s
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
+target triple = "x86_64-apple-darwin10.0.0"
+;Radar 8950491
+
+;CHECK:        .ascii   "var"                  ## DW_AT_name
+;CHECK-NEXT:        .byte   0
+;CHECK-NEXT:        .byte   2                       ## DW_AT_decl_file
+;CHECK-NEXT:        .short  19509                   ## DW_AT_decl_line
+;CHECK-NEXT:        .long   68                      ## DW_AT_type
+;CHECK-NEXT:        .byte   1                       ## DW_AT_location
+
+@dfm = external global i32, align 4
+
+declare void @llvm.dbg.declare(metadata, metadata) nounwind readnone
+
+define i32 @foo(i32 %dev, i64 %cmd, i8* %data, i32 %data2) nounwind optsize ssp {
+entry:
+  call void @llvm.dbg.value(metadata !{i32 %dev}, i64 0, metadata !12), !dbg !13
+  %tmp.i = load i32* @dfm, align 4, !dbg !14
+  %cmp.i = icmp eq i32 %tmp.i, 0, !dbg !14
+  br i1 %cmp.i, label %if.else, label %if.end.i, !dbg !14
+
+if.end.i:                                         ; preds = %entry
+  switch i64 %cmd, label %if.then [
+    i64 2147772420, label %bb.i
+    i64 536897538, label %bb116.i
+  ], !dbg !22
+
+bb.i:                                             ; preds = %if.end.i
+  unreachable
+
+bb116.i:                                          ; preds = %if.end.i
+  unreachable
+
+if.then:                                          ; preds = %if.end.i
+  ret i32 undef, !dbg !23
+
+if.else:                                          ; preds = %entry
+  ret i32 0
+}
+
+declare hidden fastcc i32 @bar(i32, i32* nocapture) nounwind optsize ssp
+declare hidden fastcc i32 @bar2(i32) nounwind optsize ssp
+declare hidden fastcc i32 @bar3(i32) nounwind optsize ssp
+declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
+
+!llvm.dbg.sp = !{!0, !6, !7, !8}
+
+!0 = metadata !{i32 589870, i32 0, metadata !1, metadata !"foo", metadata !"foo", metadata !"", metadata !1, i32 19510, metadata !3, i1 false, i1 true, i32 0, i32 0, i32 0, i32 256, i1 true, i32 (i32, i64, i8*, i32)* @foo} ; [ DW_TAG_subprogram ]
+!1 = metadata !{i32 589865, metadata !"/tmp/f.c", metadata !"/tmp", metadata !2} ; [ DW_TAG_file_type ]
+!2 = metadata !{i32 589841, i32 0, i32 12, metadata !"f.i", metadata !"/tmp", metadata !"clang version 2.9 (trunk 124753)", i1 true, i1 true, metadata !"", i32 0} ; [ DW_TAG_compile_unit ]
+!3 = metadata !{i32 589845, metadata !1, metadata !"", metadata !1, i32 0, i64 0, i64 0, i32 0, i32 0, i32 0, metadata !4, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
+!4 = metadata !{metadata !5}
+!5 = metadata !{i32 589860, metadata !2, metadata !"int", null, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
+!6 = metadata !{i32 589870, i32 0, metadata !1, metadata !"bar3", metadata !"bar3", metadata !"", metadata !1, i32 14827, metadata !3, i1 true, i1 true, i32 0, i32 0, i32 0, i32 256, i1 true, i32 (i32)* @bar3} ; [ DW_TAG_subprogram ]
+!7 = metadata !{i32 589870, i32 0, metadata !1, metadata !"bar2", metadata !"bar2", metadata !"", metadata !1, i32 15397, metadata !3, i1 true, i1 true, i32 0, i32 0, i32 0, i32 256, i1 true, i32 (i32)* @bar2} ; [ DW_TAG_subprogram ]
+!8 = metadata !{i32 589870, i32 0, metadata !1, metadata !"bar", metadata !"bar", metadata !"", metadata !1, i32 12382, metadata !9, i1 true, i1 true, i32 0, i32 0, i32 0, i32 256, i1 true, i32 (i32, i32*)* @bar} ; [ DW_TAG_subprogram ]
+!9 = metadata !{i32 589845, metadata !1, metadata !"", metadata !1, i32 0, i64 0, i64 0, i32 0, i32 0, i32 0, metadata !10, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
+!10 = metadata !{metadata !11}
+!11 = metadata !{i32 589860, metadata !2, metadata !"unsigned char", null, i32 0, i64 8, i64 8, i64 0, i32 0, i32 8} ; [ DW_TAG_base_type ]
+!12 = metadata !{i32 590081, metadata !0, metadata !"var", metadata !1, i32 19509, metadata !5, i32 0} ; [ DW_TAG_arg_variable ]
+!13 = metadata !{i32 19509, i32 20, metadata !0, null}
+!14 = metadata !{i32 18091, i32 2, metadata !15, metadata !17}
+!15 = metadata !{i32 589835, metadata !16, i32 18086, i32 1, metadata !1, i32 748} ; [ DW_TAG_lexical_block ]
+!16 = metadata !{i32 589870, i32 0, metadata !1, metadata !"foo_bar", metadata !"foo_bar", metadata !"", metadata !1, i32 18086, metadata !3, i1 true, i1 true, i32 0, i32 0, i32 0, i32 256, i1 true, null} ; [ DW_TAG_subprogram ]
+!17 = metadata !{i32 19514, i32 2, metadata !18, null}
+!18 = metadata !{i32 589835, metadata !0, i32 19510, i32 1, metadata !1, i32 99} ; [ DW_TAG_lexical_block ]
+!22 = metadata !{i32 18094, i32 2, metadata !15, metadata !17}
+!23 = metadata !{i32 19524, i32 1, metadata !18, null}
diff --git a/test/CodeGen/X86/dbg-value-range.ll b/test/CodeGen/X86/dbg-value-range.ll
new file mode 100644
index 000000000000..2985224d9dbd
--- /dev/null
+++ b/test/CodeGen/X86/dbg-value-range.ll
@@ -0,0 +1,56 @@
+; RUN: llc -mtriple=x86_64-apple-darwin < %s | FileCheck %s
+
+%struct.a = type { i32 }
+
+define i32 @bar(%struct.a* nocapture %b) nounwind ssp {
+entry:
+  tail call void @llvm.dbg.value(metadata !{%struct.a* %b}, i64 0, metadata !6), !dbg !13
+  %tmp1 = getelementptr inbounds %struct.a* %b, i64 0, i32 0, !dbg !14
+  %tmp2 = load i32* %tmp1, align 4, !dbg !14, !tbaa !15
+  tail call void @llvm.dbg.value(metadata !{i32 %tmp2}, i64 0, metadata !11), !dbg !14
+  %call = tail call i32 (...)* @foo(i32 %tmp2) nounwind , !dbg !18
+  %add = add nsw i32 %tmp2, 1, !dbg !19
+  ret i32 %add, !dbg !19
+}
+
+declare i32 @foo(...) 
+
+declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
+
+!llvm.dbg.sp = !{!0}
+!llvm.dbg.lv.bar = !{!6, !11}
+
+!0 = metadata !{i32 589870, i32 0, metadata !1, metadata !"bar", metadata !"bar", metadata !"", metadata !1, i32 5, metadata !3, i1 false, i1 true, i32 0, i32 0, i32 0, i32 256, i1 true, i32 (%struct.a*)* @bar} ; [ DW_TAG_subprogram ]
+!1 = metadata !{i32 589865, metadata !"bar.c", metadata !"/private/tmp", metadata !2} ; [ DW_TAG_file_type ]
+!2 = metadata !{i32 589841, i32 0, i32 12, metadata !"bar.c", metadata !"/private/tmp", metadata !"clang version 2.9 (trunk 122997)", i1 true, i1 true, metadata !"", i32 0} ; [ DW_TAG_compile_unit ]
+!3 = metadata !{i32 589845, metadata !1, metadata !"", metadata !1, i32 0, i64 0, i64 0, i32 0, i32 0, i32 0, metadata !4, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
+!4 = metadata !{metadata !5}
+!5 = metadata !{i32 589860, metadata !2, metadata !"int", null, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
+!6 = metadata !{i32 590081, metadata !0, metadata !"b", metadata !1, i32 5, metadata !7, i32 0} ; [ DW_TAG_arg_variable ]
+!7 = metadata !{i32 589839, metadata !2, metadata !"", null, i32 0, i64 64, i64 64, i64 0, i32 0, metadata !8} ; [ DW_TAG_pointer_type ]
+!8 = metadata !{i32 589843, metadata !2, metadata !"a", metadata !1, i32 1, i64 32, i64 32, i32 0, i32 0, i32 0, metadata !9, i32 0, i32 0} ; [ DW_TAG_structure_type ]
+!9 = metadata !{metadata !10}
+!10 = metadata !{i32 589837, metadata !1, metadata !"c", metadata !1, i32 2, i64 32, i64 32, i64 0, i32 0, metadata !5} ; [ DW_TAG_member ]
+!11 = metadata !{i32 590080, metadata !12, metadata !"x", metadata !1, i32 6, metadata !5, i32 0} ; [ DW_TAG_auto_variable ]
+!12 = metadata !{i32 589835, metadata !0, i32 5, i32 22, metadata !1, i32 0} ; [ DW_TAG_lexical_block ]
+!13 = metadata !{i32 5, i32 19, metadata !0, null}
+!14 = metadata !{i32 6, i32 14, metadata !12, null}
+!15 = metadata !{metadata !"int", metadata !16}
+!16 = metadata !{metadata !"omnipotent char", metadata !17}
+!17 = metadata !{metadata !"Simple C/C++ TBAA", null}
+!18 = metadata !{i32 7, i32 2, metadata !12, null}
+!19 = metadata !{i32 8, i32 2, metadata !12, null}
+
+; check that variable bar:b value range is appropriately trucated in debug info. Here Ltmp5 is end of
+; location range.
+
+;CHECK:Ltmp6
+;CHECK-NEXT: DEBUG_VALUE: bar:b <- undef
+
+;CHECK:Ldebug_loc0:
+;CHECK-NEXT:	.quad	Ltmp
+;CHECK-NEXT:	.quad	Ltmp6
+;CHECK-NEXT:	.short	1
+;CHECK-NEXT:	.byte	85
+;CHECK-NEXT:	.quad	0
+;CHECK-NEXT:	.quad	0
diff --git a/test/CodeGen/X86/div_const.ll b/test/CodeGen/X86/div_const.ll
deleted file mode 100644
index f0ada41338b2..000000000000
--- a/test/CodeGen/X86/div_const.ll
+++ /dev/null
@@ -1,7 +0,0 @@
-; RUN: llc < %s -march=x86 | grep 365384439
-
-define i32 @f9188_mul365384439_shift27(i32 %A) {
-        %tmp1 = udiv i32 %A, 1577682821         ; <i32> [#uses=1]
-        ret i32 %tmp1
-}
-
diff --git a/test/CodeGen/X86/divide-by-constant.ll b/test/CodeGen/X86/divide-by-constant.ll
new file mode 100644
index 000000000000..7ceb972f61bb
--- /dev/null
+++ b/test/CodeGen/X86/divide-by-constant.ll
@@ -0,0 +1,62 @@
+; RUN: llc < %s -mtriple=i686-pc-linux-gnu -asm-verbose=0 | FileCheck %s
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:32:32"
+target triple = "i686-pc-linux-gnu"
+
+define zeroext i16 @test1(i16 zeroext %x) nounwind {
+entry:
+	%div = udiv i16 %x, 33
+	ret i16 %div
+; CHECK: test1:
+; CHECK: imull	$63551, %eax, %eax
+; CHECK-NEXT: shrl	$21, %eax
+; CHECK-NEXT: ret
+}
+
+define zeroext i16 @test2(i8 signext %x, i16 zeroext %c) nounwind readnone ssp noredzone {
+entry:
+  %div = udiv i16 %c, 3
+  ret i16 %div
+
+; CHECK: test2:
+; CHECK: imull	$43691, %eax, %eax
+; CHECK-NEXT: shrl	$17, %eax
+; CHECK-NEXT: ret
+}
+
+define zeroext i8 @test3(i8 zeroext %x, i8 zeroext %c) nounwind readnone ssp noredzone {
+entry:
+  %div = udiv i8 %c, 3
+  ret i8 %div
+
+; CHECK: test3:
+; CHECK: movzbl  8(%esp), %eax
+; CHECK-NEXT: imull	$171, %eax, %eax
+; CHECK-NEXT: shrl	$9, %eax
+; CHECK-NEXT: ret
+}
+
+define signext i16 @test4(i16 signext %x) nounwind {
+entry:
+	%div = sdiv i16 %x, 33		; <i32> [#uses=1]
+	ret i16 %div
+; CHECK: test4:
+; CHECK: imull	$-1985, %ecx, %ecx 
+}
+
+define i32 @test5(i32 %A) nounwind {
+        %tmp1 = udiv i32 %A, 1577682821         ; <i32> [#uses=1]
+        ret i32 %tmp1
+; CHECK: test5:
+; CHECK: movl	$365384439, %eax
+; CHECK: mull	4(%esp)
+}
+
+define signext i16 @test6(i16 signext %x) nounwind {
+entry:
+  %div = sdiv i16 %x, 10
+  ret i16 %div
+; CHECK: test6:
+; CHECK: imull	$26215, %eax, %eax
+; CHECK: shrl	$31, %ecx
+; CHECK: sarl	$18, %eax
+}
diff --git a/test/CodeGen/X86/dll-linkage.ll b/test/CodeGen/X86/dll-linkage.ll
index c634c7e1fd42..913617585206 100644
--- a/test/CodeGen/X86/dll-linkage.ll
+++ b/test/CodeGen/X86/dll-linkage.ll
@@ -3,7 +3,7 @@
 declare dllimport void @foo()
 
 define void @bar() nounwind {
-; CHECK: call	*__imp__foo
+; CHECK: calll	*__imp__foo
   call void @foo()
   ret void
 }
diff --git a/test/CodeGen/X86/dollar-name.ll b/test/CodeGen/X86/dollar-name.ll
index 3b263194a5a8..2ecd72909cb1 100644
--- a/test/CodeGen/X86/dollar-name.ll
+++ b/test/CodeGen/X86/dollar-name.ll
@@ -7,7 +7,7 @@
 define i32 @"$foo"() nounwind {
 ; CHECK: movl	($bar),
 ; CHECK: addl	($qux),
-; CHECK: call	($hen)
+; CHECK: calll	($hen)
   %m = load i32* @"$bar"
   %n = load i32* @"$qux"
   %t = add i32 %m, %n
diff --git a/test/CodeGen/X86/fast-isel-avoid-unnecessary-pic-base.ll b/test/CodeGen/X86/fast-isel-avoid-unnecessary-pic-base.ll
new file mode 100644
index 000000000000..9233d3f7c1a0
--- /dev/null
+++ b/test/CodeGen/X86/fast-isel-avoid-unnecessary-pic-base.ll
@@ -0,0 +1,23 @@
+; RUN: llc -O0 -relocation-model=pic < %s | not grep call
+; rdar://8396318
+
+; Don't emit a PIC base register if no addresses are needed.
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128-n8:16:32"
+target triple = "i386-apple-darwin11.0.0"
+
+define i32 @foo(i32 %x, i32 %y, i32 %z) nounwind ssp {
+entry:
+  %x.addr = alloca i32, align 4
+  %y.addr = alloca i32, align 4
+  %z.addr = alloca i32, align 4
+  store i32 %x, i32* %x.addr, align 4
+  store i32 %y, i32* %y.addr, align 4
+  store i32 %z, i32* %z.addr, align 4
+  %tmp = load i32* %x.addr, align 4
+  %tmp1 = load i32* %y.addr, align 4
+  %add = add nsw i32 %tmp, %tmp1
+  %tmp2 = load i32* %z.addr, align 4
+  %add3 = add nsw i32 %add, %tmp2
+  ret i32 %add3
+}
diff --git a/test/CodeGen/X86/fast-isel-bc.ll b/test/CodeGen/X86/fast-isel-bc.ll
index 8d7dc8f9a7f8..4abc3b5b3c85 100644
--- a/test/CodeGen/X86/fast-isel-bc.ll
+++ b/test/CodeGen/X86/fast-isel-bc.ll
@@ -1,19 +1,23 @@
-; RUN: llc < %s -O0 -regalloc=linearscan -march=x86-64 -mattr=+mmx | FileCheck %s
+; RUN: llc < %s -O0 -regalloc=linearscan -march=x86-64 -mattr=+mmx,+sse2 | FileCheck %s
 ; PR4684
 
 target datalayout =
 "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
 target triple = "x86_64-apple-darwin9.8"
 
-declare void @func2(<1 x i64>)
+declare void @func2(x86_mmx)
 
 define void @func1() nounwind {
 
 ; This isn't spectacular, but it's MMX code at -O0...
-; CHECK: movl $2, %eax
-; CHECK: movd %rax, %mm0
-; CHECK: movd %mm0, %rdi
+; CHECK:  movq2dq %mm0, %xmm0
+; For now, handling of x86_mmx parameters in fast Isel is unimplemented,
+; so we get pretty poor code.  The below is preferable.
+; CHEK: movl $2, %eax
+; CHEK: movd %rax, %mm0
+; CHEK: movd %mm0, %rdi
 
-        call void @func2(<1 x i64> <i64 2>)
+        %tmp0 = bitcast <2 x i32><i32 0, i32 2> to x86_mmx
+        call void @func2(x86_mmx %tmp0)
         ret void
 }
diff --git a/test/CodeGen/X86/fast-isel-gep.ll b/test/CodeGen/X86/fast-isel-gep.ll
index 577dd7223a4d..622a1ff831d0 100644
--- a/test/CodeGen/X86/fast-isel-gep.ll
+++ b/test/CodeGen/X86/fast-isel-gep.ll
@@ -70,3 +70,20 @@ entry:
 ; X64: test4:
 ; X64: 128(%r{{.*}},%r{{.*}},8)
 }
+
+; PR8961 - Make sure the sext for the GEP addressing comes before the load that
+; is folded.
+define i64 @test5(i8* %A, i32 %I, i64 %B) nounwind {
+  %v8 = getelementptr i8* %A, i32 %I
+  %v9 = bitcast i8* %v8 to i64*
+  %v10 = load i64* %v9
+  %v11 = add i64 %B, %v10
+  ret i64 %v11
+; X64: test5:
+; X64: movslq	%esi, %rax
+; X64-NEXT: movq	(%rdi,%rax), %rax
+; X64-NEXT: addq	%rdx, %rax
+; X64-NEXT: ret
+}
+
+
diff --git a/test/CodeGen/X86/fast-isel-mem.ll b/test/CodeGen/X86/fast-isel-mem.ll
index 35ec1e7115b2..8db1936bc20e 100644
--- a/test/CodeGen/X86/fast-isel-mem.ll
+++ b/test/CodeGen/X86/fast-isel-mem.ll
@@ -1,10 +1,8 @@
-; RUN: llc < %s -fast-isel -mtriple=i386-apple-darwin | \
-; RUN:   grep lazy_ptr, | count 2
-; RUN: llc < %s -fast-isel -march=x86 -relocation-model=static | \
-; RUN:   grep lea
+; RUN: llc < %s -fast-isel -mtriple=i386-apple-darwin | FileCheck %s
 
 @src = external global i32
 
+; rdar://6653118
 define i32 @loadgv() nounwind {
 entry:
 	%0 = load i32* @src, align 4
@@ -12,6 +10,14 @@ entry:
         %2 = add i32 %0, %1
         store i32 %2, i32* @src
 	ret i32 %2
+; This should fold one of the loads into the add.
+; CHECK: loadgv:
+; CHECK: 	movl	L_src$non_lazy_ptr, %ecx
+; CHECK: 	movl	(%ecx), %eax
+; CHECK: 	addl	(%ecx), %eax
+; CHECK: 	movl	%eax, (%ecx)
+; CHECK: 	ret
+
 }
 
 %stuff = type { i32 (...)** }
@@ -21,4 +27,8 @@ define void @t(%stuff* %this) nounwind {
 entry:
 	store i32 (...)** getelementptr ([4 x i32 (...)*]* @LotsStuff, i32 0, i32 2), i32 (...)*** null, align 4
 	ret void
+; CHECK: _t:
+; CHECK:	movl	$0, %eax
+; CHECK:	movl	L_LotsStuff$non_lazy_ptr, %ecx
+
 }
diff --git a/test/CodeGen/X86/fltused.ll b/test/CodeGen/X86/fltused.ll
new file mode 100644
index 000000000000..2ffcb966782a
--- /dev/null
+++ b/test/CodeGen/X86/fltused.ll
@@ -0,0 +1,19 @@
+; The purpose of this test to to verify that the fltused symbol is emitted when
+; any function is called with floating point arguments on Windows. And that it
+; is not emitted otherwise.
+
+; RUN: llc < %s -mtriple i686-pc-win32 | FileCheck %s --check-prefix WIN32
+; RUN: llc < %s -mtriple x86_64-pc-win32 | FileCheck %s --check-prefix WIN64
+
+@.str = private constant [4 x i8] c"%f\0A\00"
+
+define i32 @main() nounwind {
+entry:
+  %call = tail call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([4 x i8]* @.str, i32 0, i32 0), double 1.000000e+000) nounwind
+  ret i32 0
+}
+
+declare i32 @printf(i8* nocapture, ...) nounwind
+
+; WIN32: .globl __fltused
+; WIN64: .globl _fltused
diff --git a/test/CodeGen/X86/fp-in-intregs.ll b/test/CodeGen/X86/fp-in-intregs.ll
index 08ea77d75f26..6966cf049789 100644
--- a/test/CodeGen/X86/fp-in-intregs.ll
+++ b/test/CodeGen/X86/fp-in-intregs.ll
@@ -1,4 +1,5 @@
-; RUN: llc < %s -march=x86 -mcpu=yonah | not egrep {\(\(xor\|and\)ps\|movd\)}
+; RUN: llc < %s -march=x86 -mcpu=yonah | FileCheck %s
+; CHECK-NOT:     {{((xor|and)ps|movd)}}
 
 ; These operations should be done in integer registers, eliminating constant
 ; pool loads, movd's etc.
diff --git a/test/CodeGen/X86/fp-stack-compare.ll b/test/CodeGen/X86/fp-stack-compare.ll
index 4bdf4590b07c..b216914d2391 100644
--- a/test/CodeGen/X86/fp-stack-compare.ll
+++ b/test/CodeGen/X86/fp-stack-compare.ll
@@ -1,5 +1,4 @@
-; RUN: llc < %s -march=x86 -mcpu=i386 | \
-; RUN:   grep {fucomi.*st.\[12\]}
+; RUN: llc < %s -march=x86 -mcpu=i386 | grep {fucompi.*st.\[12\]}
 ; PR1012
 
 define float @foo(float* %col.2.0) {
diff --git a/test/CodeGen/X86/ghc-cc.ll b/test/CodeGen/X86/ghc-cc.ll
index 9393cf5a7383..0e65cfdbae30 100644
--- a/test/CodeGen/X86/ghc-cc.ll
+++ b/test/CodeGen/X86/ghc-cc.ll
@@ -11,9 +11,9 @@ define void @zap(i32 %a, i32 %b) nounwind {
 entry:
   ; CHECK: movl {{[0-9]*}}(%esp), %ebx
   ; CHECK-NEXT: movl {{[0-9]*}}(%esp), %ebp
-  ; CHECK-NEXT: call addtwo
+  ; CHECK-NEXT: calll addtwo
   %0 = call cc 10 i32 @addtwo(i32 %a, i32 %b)
-  ; CHECK: call foo
+  ; CHECK: calll foo
   call void @foo() nounwind
   ret void
 }
diff --git a/test/CodeGen/X86/global-sections.ll b/test/CodeGen/X86/global-sections.ll
index 6d211913b015..39a69e17a100 100644
--- a/test/CodeGen/X86/global-sections.ll
+++ b/test/CodeGen/X86/global-sections.ll
@@ -15,7 +15,7 @@
 
 
 ; const int G2 __attribute__((weak)) = 42;
-@G2 = weak_odr constant i32 42	
+@G2 = weak_odr unnamed_addr constant i32 42	
 
 
 ; TODO: linux drops this into .rodata, we drop it into ".gnu.linkonce.r.G2"
@@ -26,7 +26,7 @@
 
 
 ; int * const G3 = &G1;
-@G3 = constant i32* @G1
+@G3 = unnamed_addr constant i32* @G1
 
 ; DARWIN: .section        __DATA,__const
 ; DARWIN: .globl _G3
@@ -41,7 +41,7 @@
 
 
 ; _Complex long long const G4 = 34;
-@G4 = constant {i64,i64} { i64 34, i64 0 }
+@G4 = unnamed_addr constant {i64,i64} { i64 34, i64 0 }
 
 ; DARWIN: .section        __TEXT,__const
 ; DARWIN: _G4:
@@ -66,7 +66,7 @@
 @"foo bar" = linkonce global i32 42
 
 ; LINUX: .type	foo_20_bar,@object
-; LINUX:.section	.gnu.linkonce.d.foo_20_bar,"aw",@progbits
+; LINUX: .section .data.foo_20_bar,"aGw",@progbits,foo_20_bar,comdat
 ; LINUX: .weak	foo_20_bar
 ; LINUX: foo_20_bar:
 
@@ -76,10 +76,10 @@
 ; DARWIN: "_foo bar":
 
 ; PR4650
-@G6 = weak_odr constant [1 x i8] c"\01"
+@G6 = weak_odr unnamed_addr constant [1 x i8] c"\01"
 
 ; LINUX:   .type	G6,@object
-; LINUX:   .section	.gnu.linkonce.r.G6,"a",@progbits
+; LINUX:   .section	.rodata.G6,"aG",@progbits,G6,comdat
 ; LINUX:   .weak	G6
 ; LINUX: G6:
 ; LINUX:   .byte	1
@@ -92,7 +92,7 @@
 ; DARWIN:  .byte 1
 
 
-@G7 = constant [10 x i8] c"abcdefghi\00"
+@G7 = unnamed_addr constant [10 x i8] c"abcdefghi\00"
 
 ; DARWIN:	__TEXT,__cstring,cstring_literals
 ; DARWIN:	.globl _G7
@@ -108,7 +108,7 @@
 ; LINUX-SECTIONS:	.globl G7
 
 
-@G8 = constant [4 x i16] [ i16 1, i16 2, i16 3, i16 0 ]
+@G8 = unnamed_addr constant [4 x i16] [ i16 1, i16 2, i16 3, i16 0 ]
 
 ; DARWIN:	.section	__TEXT,__const
 ; DARWIN:	.globl _G8
@@ -118,7 +118,7 @@
 ; LINUX:	.globl G8
 ; LINUX:G8:
 
-@G9 = constant [4 x i32] [ i32 1, i32 2, i32 3, i32 0 ]
+@G9 = unnamed_addr constant [4 x i32] [ i32 1, i32 2, i32 3, i32 0 ]
 
 ; DARWIN:	.globl _G9
 ; DARWIN: _G9:
diff --git a/test/CodeGen/X86/inline-asm-h.ll b/test/CodeGen/X86/inline-asm-h.ll
new file mode 100644
index 000000000000..53cf419bd11a
--- /dev/null
+++ b/test/CodeGen/X86/inline-asm-h.ll
@@ -0,0 +1,12 @@
+; RUN: llc -mtriple=x86_64-pc-linux-gnu < %s | FileCheck %s
+
+@foobar = common global i32 0, align 4
+
+define void @zed() nounwind {
+entry:
+  call void asm "movq %mm2,${0:H}", "=*m,~{dirflag},~{fpsr},~{flags}"(i32* @foobar) nounwind
+  ret void
+}
+
+; CHECK: zed
+; CHECK: movq %mm2,foobar+8(%rip)
diff --git a/test/CodeGen/X86/inline-asm-ptr-cast.ll b/test/CodeGen/X86/inline-asm-ptr-cast.ll
new file mode 100644
index 000000000000..50e302101814
--- /dev/null
+++ b/test/CodeGen/X86/inline-asm-ptr-cast.ll
@@ -0,0 +1,27 @@
+; RUN: llc -mtriple=x86_64-unknown-linux-gnu <%s
+; ModuleID = 'bug.c'
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
+target triple = "x86_64-unknown-linux-gnu"
+
+@func.flagmask = internal constant i64 1, align 8
+
+define void @func() nounwind {
+entry:
+  %src = alloca i32, align 4
+  %dst = alloca i32, align 4
+  %flags = alloca i64, align 8
+  %newflags = alloca i64, align 8
+  store i32 0, i32* %src, align 4
+  store i32 0, i32* %dst, align 4
+  store i64 1, i64* %flags, align 8
+  store i64 -1, i64* %newflags, align 8
+  %0 = bitcast i32* %dst to i8*
+  %tmp = load i64* %flags, align 8
+  %and = and i64 %tmp, 1
+  %1 = bitcast i32* %src to i8*
+  %tmp1 = load i8* %1
+  %2 = bitcast i32* %dst to i8*
+  %tmp2 = load i8* %2
+  call void asm "pushfq \0Aandq $2, (%rsp) \0Aorq  $3, (%rsp) \0Apopfq \0Aaddb $4, $1 \0Apushfq \0Apopq $0 \0A", "=*&rm,=*&rm,i,r,r,1,~{cc},~{dirflag},~{fpsr},~{flags}"(i64* %newflags, i8* %0, i64 -2, i64 %and, i8 %tmp1, i8 %tmp2) nounwind
+  ret void
+}
diff --git a/test/CodeGen/X86/insertelement-legalize.ll b/test/CodeGen/X86/insertelement-legalize.ll
index 18aade2bb302..3805cbbaaaf8 100644
--- a/test/CodeGen/X86/insertelement-legalize.ll
+++ b/test/CodeGen/X86/insertelement-legalize.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=x86 -disable-mmx
+; RUN: llc < %s -march=x86
 
 ; Test to check that we properly legalize an insert vector element
 define void @test(<2 x i64> %val, <2 x i64>* %dst, i64 %x) nounwind {
diff --git a/test/CodeGen/X86/legalize-sub-zero-2.ll b/test/CodeGen/X86/legalize-sub-zero-2.ll
new file mode 100644
index 000000000000..f02ca715aeeb
--- /dev/null
+++ b/test/CodeGen/X86/legalize-sub-zero-2.ll
@@ -0,0 +1,41 @@
+; RUN: llc < %s -mtriple=i386-apple-darwin
+
+define fastcc void @foo(i32 %type) nounwind optsize {
+entry:
+  switch i32 %type, label %bb26 [
+    i32 33634, label %bb11
+    i32 5121, label %bb27
+  ]
+
+bb11:                                             ; preds = %entry
+  br label %bb27
+
+bb26:                                             ; preds = %entry
+  unreachable
+
+bb27:                                             ; preds = %bb11, %entry
+  %srcpb.0 = phi i32 [ 1, %bb11 ], [ 0, %entry ]
+  br i1 undef, label %bb348, label %bb30.lr.ph
+
+bb30.lr.ph:                                       ; preds = %bb27
+  %.sum743 = shl i32 %srcpb.0, 1
+  %0 = mul i32 %srcpb.0, -2
+  %.sum745 = add i32 %.sum743, %0
+  br i1 undef, label %bb70, label %bb71
+
+bb70:                                             ; preds = %bb30.lr.ph
+  unreachable
+
+bb71:                                             ; preds = %bb30.lr.ph
+  br i1 undef, label %bb92, label %bb80
+
+bb80:                                             ; preds = %bb71
+  unreachable
+
+bb92:                                             ; preds = %bb71
+  %1 = getelementptr inbounds i8* undef, i32 %.sum745
+  unreachable
+
+bb348:                                            ; preds = %bb27
+  ret void
+}
diff --git a/test/CodeGen/X86/legalize-sub-zero.ll b/test/CodeGen/X86/legalize-sub-zero.ll
new file mode 100644
index 000000000000..ee76d468e811
--- /dev/null
+++ b/test/CodeGen/X86/legalize-sub-zero.ll
@@ -0,0 +1,35 @@
+; RUN: llc < %s -mtriple=i686-pc-win32
+
+;target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f80:128:128-v64:64:64-v128:128:128-a0:0:64-f80:32:32-n8:16:32"
+;target triple = "i686-pc-win32"
+
+define void @test() nounwind {
+  %1 = fdiv <3 x double> zeroinitializer, undef
+  %2 = fdiv <2 x double> zeroinitializer, undef
+  %3 = shufflevector <2 x double> %2, <2 x double> undef, <3 x i32> <i32 0, i32
+1, i32 undef>
+  %4 = insertelement <3 x double> %3, double undef, i32 2
+  %5 = bitcast <3 x double> %1 to <3 x i64>
+  %6 = bitcast <3 x double> %4 to <3 x i64>
+  %7 = sub <3 x i64> %5, %6
+  %8 = shufflevector <3 x i64> %7, <3 x i64> undef, <2 x i32> <i32 0, i32 1>
+  %9 = xor <2 x i64> %8, zeroinitializer
+  %10 = add nsw <2 x i64> %9, zeroinitializer
+  %11 = shufflevector <2 x i64> %10, <2 x i64> undef, <3 x i32> <i32 0, i32 1,
+i32 undef>
+  %12 = insertelement <3 x i64> %11, i64 0, i32 2
+  %13 = shufflevector <3 x i64> %12, <3 x i64> undef, <4 x i32> <i32 0, i32 1,
+i32 2, i32 3>
+  %14 = shufflevector <4 x i64> %13, <4 x i64> undef, <2 x i32> <i32 0, i32 1>
+  %15 = bitcast <2 x i64> %14 to <4 x i32>
+  %16 = shufflevector <4 x i32> %15, <4 x i32> undef, <4 x i32> <i32 0, i32 2,
+i32 0, i32 2>
+  %17 = bitcast <4 x i32> %16 to <2 x i64>
+  %18 = shufflevector <2 x i64> %17, <2 x i64> undef, <2 x i32> <i32 0, i32 2>
+  %19 = bitcast <2 x i64> %18 to <4 x i32>
+  %20 = shufflevector <4 x i32> %19, <4 x i32> undef, <3 x i32> <i32 0, i32 1,
+i32 2>
+  %21 = or <3 x i32> %20, zeroinitializer
+  store <3 x i32> %21, <3 x i32> addrspace(1)* undef, align 16
+  ret void
+}
diff --git a/test/CodeGen/X86/legalizedag_vec.ll b/test/CodeGen/X86/legalizedag_vec.ll
index 574b46acea60..dff693120fb6 100644
--- a/test/CodeGen/X86/legalizedag_vec.ll
+++ b/test/CodeGen/X86/legalizedag_vec.ll
@@ -1,5 +1,4 @@
-; RUN: llc < %s -march=x86 -mattr=sse2 -disable-mmx -o %t
-; RUN: grep {call.*divdi3}  %t | count 2
+; RUN: llc < %s -march=x86 -mattr=sse2 | FileCheck %s
 
 
 ; Test case for r63760 where we generate a legalization assert that an illegal
@@ -12,4 +11,7 @@
 define <2 x i64> @test_long_div(<2 x i64> %num, <2 x i64> %div) {
   %div.r = sdiv <2 x i64> %num, %div
   ret <2 x i64>  %div.r
-}                                     
+}
+
+; CHECK: call{{.*(divdi3|alldiv)}}
+; CHECK: call{{.*(divdi3|alldiv)}}
diff --git a/test/CodeGen/X86/licm-symbol.ll b/test/CodeGen/X86/licm-symbol.ll
index 08306c2950e2..c3d1938e9dbd 100644
--- a/test/CodeGen/X86/licm-symbol.ll
+++ b/test/CodeGen/X86/licm-symbol.ll
@@ -3,7 +3,7 @@
 ; MachineLICM should be able to hoist the sF reference out of the loop.
 
 ; CHECK: pushl %esi
-; CHECK: subl  $4, %esp
+; CHECK: pushl
 ; CHECK: movl  $176, %esi
 ; CHECK: addl  L___sF$non_lazy_ptr, %esi
 ; CHECK: .align  4, 0x90
diff --git a/test/CodeGen/X86/loop-blocks.ll b/test/CodeGen/X86/loop-blocks.ll
index 354d08206972..faba63007127 100644
--- a/test/CodeGen/X86/loop-blocks.ll
+++ b/test/CodeGen/X86/loop-blocks.ll
@@ -70,6 +70,7 @@ exit:
 
 ; Same as slightly_more_involved, but block_a is now a CFG diamond with
 ; fallthrough edges which should be preserved.
+; "callq block_a_merge_func" is tail duped.
 
 ; CHECK: yet_more_involved:
 ;      CHECK:   jmp .LBB2_1
@@ -78,12 +79,12 @@ exit:
 ; CHECK-NEXT:   callq bar99
 ; CHECK-NEXT:   callq get
 ; CHECK-NEXT:   cmpl $2999, %eax
-; CHECK-NEXT:   jg .LBB2_6
-; CHECK-NEXT:   callq block_a_true_func
-; CHECK-NEXT:   jmp .LBB2_7
-; CHECK-NEXT: .LBB2_6:
+; CHECK-NEXT:   jle .LBB2_5
 ; CHECK-NEXT:   callq block_a_false_func
-; CHECK-NEXT: .LBB2_7:
+; CHECK-NEXT:   callq block_a_merge_func
+; CHECK-NEXT:   jmp .LBB2_1
+; CHECK-NEXT: .LBB2_5:
+; CHECK-NEXT:   callq block_a_true_func
 ; CHECK-NEXT:   callq block_a_merge_func
 ; CHECK-NEXT: .LBB2_1:
 ; CHECK-NEXT:   callq body
diff --git a/test/CodeGen/X86/lsr-reuse.ll b/test/CodeGen/X86/lsr-reuse.ll
index d2ff58be1055..2a9762928329 100644
--- a/test/CodeGen/X86/lsr-reuse.ll
+++ b/test/CodeGen/X86/lsr-reuse.ll
@@ -353,11 +353,11 @@ return:
 
 ; CHECK: count_me_3:
 ; CHECK: call
-; CHECK: movsd   (%r15,%r13,8), %xmm0
-; CHECK: mulsd   (%r14,%r13,8), %xmm0
-; CHECK: movsd   %xmm0, (%r12,%r13,8)
-; CHECK: incq    %r13
-; CHECK: cmpq    %r13, %rbx
+; CHECK: movsd   (%r{{[^,]*}},%r{{[^,]*}},8), %xmm0
+; CHECK: mulsd   (%r{{[^,]*}},%r{{[^,]*}},8), %xmm0
+; CHECK: movsd   %xmm0, (%r{{[^,]*}},%r{{[^,]*}},8)
+; CHECK: incq    %r{{.*}}
+; CHECK: cmpq    %r{{.*}}, %r{{.*}}
 ; CHECK: jne
 
 declare void @use(i64)
@@ -389,7 +389,7 @@ return:
 ; rdar://7657764
 
 ; CHECK: asd:
-; CHECK: BB9_5:
+; CHECK: BB9_4:
 ; CHECK-NEXT: addl  (%r{{[^,]*}},%rdi,4), %e
 ; CHECK-NEXT: incq  %rdi
 ; CHECK-NEXT: cmpq  %rdi, %r{{[^,]*}}
@@ -464,7 +464,7 @@ bb5:                                              ; preds = %bb3, %entry
 
 ; And the one at %bb68, where we want to be sure to use superhero mode:
 
-; CHECK:      BB10_9:
+; CHECK:      BB10_7:
 ; CHECK-NEXT:   movaps  48(%r{{[^,]*}}), %xmm{{.*}}
 ; CHECK-NEXT:   mulps   %xmm{{.*}}, %xmm{{.*}}
 ; CHECK-NEXT:   movaps  32(%r{{[^,]*}}), %xmm{{.*}}
@@ -484,7 +484,6 @@ bb5:                                              ; preds = %bb3, %entry
 ; CHECK-NEXT:   addq    $64, %r{{.*}}
 ; CHECK-NEXT:   addq    $64, %r{{.*}}
 ; CHECK-NEXT:   addq    $-16, %r{{.*}}
-; CHECK-NEXT: BB10_10:
 ; CHECK-NEXT:   cmpq    $15, %r{{.*}}
 ; CHECK-NEXT:   jg
 
diff --git a/test/CodeGen/X86/machine-cse.ll b/test/CodeGen/X86/machine-cse.ll
index a8afdc84c51f..e284776ed02d 100644
--- a/test/CodeGen/X86/machine-cse.ll
+++ b/test/CodeGen/X86/machine-cse.ll
@@ -37,3 +37,43 @@ bb3:
 declare void @bar(i32*)
 
 declare fastcc i8* @foo(%struct.s2*) nounwind
+
+; rdar://8773371
+
+declare void @printf(...) nounwind
+
+define void @commute(i32 %test_case, i32 %scale) nounwind ssp {
+; CHECK: commute:
+entry:
+  switch i32 %test_case, label %sw.bb307 [
+    i32 1, label %sw.bb
+    i32 2, label %sw.bb
+    i32 3, label %sw.bb
+  ]
+
+sw.bb:                                            ; preds = %entry, %entry, %entry
+  %mul = mul nsw i32 %test_case, 3
+  %mul20 = mul nsw i32 %mul, %scale
+  br i1 undef, label %if.end34, label %sw.bb307
+
+if.end34:                                         ; preds = %sw.bb
+; CHECK: %if.end34
+; CHECK: imull
+; CHECK: leal
+; CHECK-NOT: imull
+  tail call void (...)* @printf(i32 %test_case, i32 %mul20) nounwind
+  %tmp = mul i32 %scale, %test_case
+  %tmp752 = mul i32 %tmp, 3
+  %tmp753 = zext i32 %tmp752 to i64
+  br label %bb.nph743.us
+
+for.body53.us:                                    ; preds = %bb.nph743.us, %for.body53.us
+  %exitcond = icmp eq i64 undef, %tmp753
+  br i1 %exitcond, label %bb.nph743.us, label %for.body53.us
+
+bb.nph743.us:                                     ; preds = %for.body53.us, %if.end34
+  br label %for.body53.us
+
+sw.bb307:                                         ; preds = %sw.bb, %entry
+  ret void
+}
diff --git a/test/CodeGen/X86/memcmp.ll b/test/CodeGen/X86/memcmp.ll
index b90d2e211878..36be1f308ccd 100644
--- a/test/CodeGen/X86/memcmp.ll
+++ b/test/CodeGen/X86/memcmp.ll
@@ -20,8 +20,8 @@ bb:                                               ; preds = %entry
 return:                                           ; preds = %entry
   ret void
 ; CHECK: memcmp2:
-; CHECK: movw    (%rsi), %ax
-; CHECK: cmpw    %ax, (%rdi)
+; CHECK: movw    (%rdi), %ax
+; CHECK: cmpw    (%rsi), %ax
 }
 
 define void @memcmp2a(i8* %X, i32* nocapture %P) nounwind {
@@ -54,8 +54,8 @@ bb:                                               ; preds = %entry
 return:                                           ; preds = %entry
   ret void
 ; CHECK: memcmp4:
-; CHECK: movl    (%rsi), %eax
-; CHECK: cmpl    %eax, (%rdi)
+; CHECK: movl    (%rdi), %eax
+; CHECK: cmpl    (%rsi), %eax
 }
 
 define void @memcmp4a(i8* %X, i32* nocapture %P) nounwind {
@@ -87,8 +87,8 @@ bb:                                               ; preds = %entry
 return:                                           ; preds = %entry
   ret void
 ; CHECK: memcmp8:
-; CHECK: movq    (%rsi), %rax
-; CHECK: cmpq    %rax, (%rdi)
+; CHECK: movq    (%rdi), %rax
+; CHECK: cmpq    (%rsi), %rax
 }
 
 define void @memcmp8a(i8* %X, i32* nocapture %P) nounwind {
diff --git a/test/CodeGen/X86/memcpy.ll b/test/CodeGen/X86/memcpy.ll
index 7bc31bec163d..72342cbacb4f 100644
--- a/test/CodeGen/X86/memcpy.ll
+++ b/test/CodeGen/X86/memcpy.ll
@@ -1,4 +1,5 @@
-; RUN: llc < %s -march=x86-64 | FileCheck %s
+; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu | FileCheck %s -check-prefix=LINUX
+; RUN: llc < %s -mtriple=x86_64-apple-darwin | FileCheck %s -check-prefix=DARWIN
 
 declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i32, i1) nounwind
 
@@ -9,8 +10,8 @@ entry:
 	tail call void @llvm.memcpy.p0i8.p0i8.i64( i8* %a, i8* %b, i64 %n, i32 1, i1 0 )
 	ret i8* %a
         
-; CHECK: test1:
-; CHECK: memcpy
+; LINUX: test1:
+; LINUX: memcpy
 }
 
 ; Variable memcpy's should lower to calls.
@@ -21,18 +22,41 @@ entry:
 	tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %tmp14, i8* %tmp25, i64 %n, i32 8, i1 0 )
 	ret i8* %tmp14
         
-; CHECK: test2:
-; CHECK: memcpy
+; LINUX: test2:
+; LINUX: memcpy
 }
 
 ; Large constant memcpy's should lower to a call when optimizing for size.
 ; PR6623
+
+; On the other hand, Darwin's definition of -Os is optimizing for size without
+; hurting performance so it should just ignore optsize when expanding memcpy.
+; rdar://8821501
 define void @test3(i8* nocapture %A, i8* nocapture %B) nounwind optsize noredzone {
 entry:
   tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %A, i8* %B, i64 64, i32 1, i1 false)
   ret void
-; CHECK: test3:
-; CHECK: memcpy
+; LINUX: test3:
+; LINUX: memcpy
+
+; DARWIN: test3:
+; DARWIN-NOT: memcpy
+; DARWIN: movq
+; DARWIN: movq
+; DARWIN: movq
+; DARWIN: movq
+; DARWIN: movq
+; DARWIN: movq
+; DARWIN: movq
+; DARWIN: movq
+; DARWIN: movq
+; DARWIN: movq
+; DARWIN: movq
+; DARWIN: movq
+; DARWIN: movq
+; DARWIN: movq
+; DARWIN: movq
+; DARWIN: movq
 }
 
 ; Large constant memcpy's should be inlined when not optimizing for size.
@@ -40,18 +64,18 @@ define void @test4(i8* nocapture %A, i8* nocapture %B) nounwind noredzone {
 entry:
   tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %A, i8* %B, i64 64, i32 1, i1 false)
   ret void
-; CHECK: test4:
-; CHECK: movq
-; CHECK: movq
-; CHECK: movq
-; CHECK: movq
-; CHECK: movq
-; CHECK: movq
-; CHECK: movq
-; CHECK: movq
-; CHECK: movq
-; CHECK: movq
-; CHECK: movq
-; CHECK: movq
+; LINUX: test4:
+; LINUX movq
+; LINUX movq
+; LINUX movq
+; LINUX movq
+; LINUX movq
+; LINUX movq
+; LINUX movq
+; LINUX movq
+; LINUX movq
+; LINUX movq
+; LINUX movq
+; LINUX movq
 }
 
diff --git a/test/CodeGen/X86/memmove-0.ll b/test/CodeGen/X86/memmove-0.ll
deleted file mode 100644
index d4050689f594..000000000000
--- a/test/CodeGen/X86/memmove-0.ll
+++ /dev/null
@@ -1,9 +0,0 @@
-; RUN: llc < %s -march=x86 -mtriple=i686-pc-linux-gnu | grep {call	memcpy}
-
-declare void @llvm.memmove.i64(i8* %d, i8* %s, i64 %l, i32 %a)
-
-define void @foo(i8* noalias %d, i8* noalias %s, i64 %l)
-{
-  call void @llvm.memmove.i64(i8* %d, i8* %s, i64 %l, i32 1)
-  ret void
-}
diff --git a/test/CodeGen/X86/memmove-1.ll b/test/CodeGen/X86/memmove-1.ll
deleted file mode 100644
index 2057be88174d..000000000000
--- a/test/CodeGen/X86/memmove-1.ll
+++ /dev/null
@@ -1,9 +0,0 @@
-; RUN: llc < %s -march=x86 -mtriple=i686-pc-linux-gnu | grep {call	memmove}
-
-declare void @llvm.memmove.i64(i8* %d, i8* %s, i64 %l, i32 %a)
-
-define void @foo(i8* %d, i8* %s, i64 %l)
-{
-  call void @llvm.memmove.i64(i8* %d, i8* %s, i64 %l, i32 1)
-  ret void
-}
diff --git a/test/CodeGen/X86/memmove-2.ll b/test/CodeGen/X86/memmove-2.ll
deleted file mode 100644
index 68a9f4dfb9cb..000000000000
--- a/test/CodeGen/X86/memmove-2.ll
+++ /dev/null
@@ -1,9 +0,0 @@
-; RUN: llc < %s -march=x86 -mtriple=i686-pc-linux-gnu | not grep call
-
-declare void @llvm.memmove.i64(i8* %d, i8* %s, i64 %l, i32 %a)
-
-define void @foo(i8* noalias %d, i8* noalias %s)
-{
-  call void @llvm.memmove.i64(i8* %d, i8* %s, i64 32, i32 1)
-  ret void
-}
diff --git a/test/CodeGen/X86/memmove-3.ll b/test/CodeGen/X86/memmove-3.ll
deleted file mode 100644
index d8a419c07457..000000000000
--- a/test/CodeGen/X86/memmove-3.ll
+++ /dev/null
@@ -1,9 +0,0 @@
-; RUN: llc < %s -march=x86 -mtriple=i686-pc-linux-gnu | grep {call	memmove}
-
-declare void @llvm.memmove.i64(i8* %d, i8* %s, i64 %l, i32 %a)
-
-define void @foo(i8* %d, i8* %s)
-{
-  call void @llvm.memmove.i64(i8* %d, i8* %s, i64 32, i32 1)
-  ret void
-}
diff --git a/test/CodeGen/X86/memset-2.ll b/test/CodeGen/X86/memset-2.ll
index 0e1559548e2b..993583b4a49b 100644
--- a/test/CodeGen/X86/memset-2.ll
+++ b/test/CodeGen/X86/memset-2.ll
@@ -1,11 +1,11 @@
-; RUN: llc -mtriple=i386-apple-darwin < %s | FileCheck %s
+; RUN: llc -mtriple=i386-apple-darwin -mcpu=yonah < %s | FileCheck %s
 
 declare void @llvm.memset.i32(i8*, i8, i32, i32) nounwind
 
 define fastcc void @t1() nounwind {
 entry:
 ; CHECK: t1:
-; CHECK: call _memset
+; CHECK: calll _memset
   call void @llvm.memset.i32( i8* null, i8 0, i32 188, i32 1 ) nounwind
   unreachable
 }
@@ -13,7 +13,27 @@ entry:
 define fastcc void @t2(i8 signext %c) nounwind {
 entry:
 ; CHECK: t2:
-; CHECK: call _memset
+; CHECK: calll _memset
   call void @llvm.memset.i32( i8* undef, i8 %c, i32 76, i32 1 ) nounwind
   unreachable
 }
+
+declare void @llvm.memset.p0i8.i32(i8* nocapture, i8, i32, i32, i1) nounwind
+
+define void @t3(i8* nocapture %s, i8 %a) nounwind {
+entry:
+  tail call void @llvm.memset.p0i8.i32(i8* %s, i8 %a, i32 8, i32 1, i1 false)
+  ret void
+; CHECK: t3:
+; CHECK: imull $16843009
+}
+
+define void @t4(i8* nocapture %s, i8 %a) nounwind {
+entry:
+  tail call void @llvm.memset.p0i8.i32(i8* %s, i8 %a, i32 15, i32 1, i1 false)
+  ret void
+; CHECK: t4:
+; CHECK: imull $16843009
+; CHECK-NOT: imul
+; CHECK: ret
+}
diff --git a/test/CodeGen/X86/memset64-on-x86-32.ll b/test/CodeGen/X86/memset64-on-x86-32.ll
index c0cd271d985e..3f069b4a1aa8 100644
--- a/test/CodeGen/X86/memset64-on-x86-32.ll
+++ b/test/CodeGen/X86/memset64-on-x86-32.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -mtriple=i386-apple-darwin   -mcpu=nehalem | grep movaps | count 5
+; RUN: llc < %s -mtriple=i386-apple-darwin   -mcpu=nehalem | grep movups | count 5
 ; RUN: llc < %s -mtriple=i386-apple-darwin   -mcpu=core2   | grep movl   | count 20
 ; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=core2   | grep movq   | count 10
 
diff --git a/test/CodeGen/X86/mingw-alloca.ll b/test/CodeGen/X86/mingw-alloca.ll
index 7dcd84d8a157..ded4b73d0931 100644
--- a/test/CodeGen/X86/mingw-alloca.ll
+++ b/test/CodeGen/X86/mingw-alloca.ll
@@ -6,7 +6,7 @@ target triple = "i386-pc-mingw32"
 define void @foo1(i32 %N) nounwind {
 entry:
 ; CHECK: _foo1:
-; CHECK: call __alloca
+; CHECK: calll __alloca
 	%tmp14 = alloca i32, i32 %N		; <i32*> [#uses=1]
 	call void @bar1( i32* %tmp14 )
 	ret void
@@ -19,7 +19,7 @@ entry:
 ; CHECK: _foo2:
 ; CHECK: andl $-16, %esp
 ; CHECK: pushl %eax
-; CHECK: call __alloca
+; CHECK: calll __alloca
 ; CHECK: movl	8028(%esp), %eax
 	%A2 = alloca [2000 x i32], align 16		; <[2000 x i32]*> [#uses=1]
 	%A2.sub = getelementptr [2000 x i32]* %A2, i32 0, i32 0		; <i32*> [#uses=1]
diff --git a/test/CodeGen/X86/misaligned-memset.ll b/test/CodeGen/X86/misaligned-memset.ll
new file mode 100644
index 000000000000..21f8bf2bf29e
--- /dev/null
+++ b/test/CodeGen/X86/misaligned-memset.ll
@@ -0,0 +1,15 @@
+; RUN: llc -mtriple=x86_64-apple-darwin -mcpu=nehalem < %s | FileCheck %s
+
+@a = common global [3 x i64] zeroinitializer, align 16
+
+define i32 @main() nounwind ssp {
+; CHECK: movups
+entry:
+  %retval = alloca i32, align 4
+  store i32 0, i32* %retval
+  call void @llvm.memset.p0i8.i64(i8* bitcast (i64* getelementptr inbounds ([3 x i64]* @a, i32 0, i64 1) to i8*), i8 0, i64 16, i32 1, i1 false)
+  %0 = load i32* %retval
+  ret i32 %0
+}
+
+declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i32, i1) nounwind
diff --git a/test/CodeGen/X86/mmx-arg-passing.ll b/test/CodeGen/X86/mmx-arg-passing.ll
index 426e98e019bc..b348512b5798 100644
--- a/test/CodeGen/X86/mmx-arg-passing.ll
+++ b/test/CodeGen/X86/mmx-arg-passing.ll
@@ -1,24 +1,27 @@
-; RUN: llc < %s -mtriple=i386-apple-darwin -mattr=+mmx | grep mm0 | count 3
-; RUN: llc < %s -mtriple=i386-apple-darwin -mattr=+mmx | grep esp | count 1
+; RUN: llc < %s -mtriple=i386-apple-darwin -mattr=+mmx | grep mm0 | count 1
+; RUN: llc < %s -mtriple=i386-apple-darwin -mattr=+mmx | grep esp | count 2
 ; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+mmx,+sse2 | grep xmm0
 ; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+mmx,+sse2 | grep rdi
 ; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+mmx,+sse2 | not grep movups
 ;
 ; On Darwin x86-32, v8i8, v4i16, v2i32 values are passed in MM[0-2].
-; On Darwin x86-32, v1i64 values are passed in memory.
+; On Darwin x86-32, v1i64 values are passed in memory.  In this example, they
+;                   are never moved into an MM register at all.
 ; On Darwin x86-64, v8i8, v4i16, v2i32 values are passed in XMM[0-7].
 ; On Darwin x86-64, v1i64 values are passed in 64-bit GPRs.
 
-@u1 = external global <8 x i8>
+@u1 = external global x86_mmx
 
-define void @t1(<8 x i8> %v1) nounwind  {
-	store <8 x i8> %v1, <8 x i8>* @u1, align 8
+define void @t1(x86_mmx %v1) nounwind  {
+	store x86_mmx %v1, x86_mmx* @u1, align 8
 	ret void
 }
 
-@u2 = external global <1 x i64>
+@u2 = external global x86_mmx
 
 define void @t2(<1 x i64> %v1) nounwind  {
-	store <1 x i64> %v1, <1 x i64>* @u2, align 8
+        %tmp = bitcast <1 x i64> %v1 to x86_mmx
+	store x86_mmx %tmp, x86_mmx* @u2, align 8
 	ret void
 }
+
diff --git a/test/CodeGen/X86/mmx-arg-passing2.ll b/test/CodeGen/X86/mmx-arg-passing2.ll
index c42af082364c..c132d311b94b 100644
--- a/test/CodeGen/X86/mmx-arg-passing2.ll
+++ b/test/CodeGen/X86/mmx-arg-passing2.ll
@@ -1,17 +1,21 @@
-; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+mmx,+sse2 | grep movq2dq | count 1
 ; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+mmx,+sse2 | grep movdq2q | count 2
+; Since the add is not an MMX add, we don't have a movq2dq any more.
 
 @g_v8qi = external global <8 x i8>
 
 define void @t1() nounwind  {
 	%tmp3 = load <8 x i8>* @g_v8qi, align 8
-	%tmp4 = tail call i32 (...)* @pass_v8qi( <8 x i8> %tmp3 ) nounwind
+        %tmp3a = bitcast <8 x i8> %tmp3 to x86_mmx
+	%tmp4 = tail call i32 (...)* @pass_v8qi( x86_mmx %tmp3a ) nounwind
 	ret void
 }
 
-define void @t2(<8 x i8> %v1, <8 x i8> %v2) nounwind  {
-       %tmp3 = add <8 x i8> %v1, %v2
-       %tmp4 = tail call i32 (...)* @pass_v8qi( <8 x i8> %tmp3 ) nounwind
+define void @t2(x86_mmx %v1, x86_mmx %v2) nounwind  {
+       %v1a = bitcast x86_mmx %v1 to <8 x i8>
+       %v2b = bitcast x86_mmx %v2 to <8 x i8>
+       %tmp3 = add <8 x i8> %v1a, %v2b
+       %tmp3a = bitcast <8 x i8> %tmp3 to x86_mmx
+       %tmp4 = tail call i32 (...)* @pass_v8qi( x86_mmx %tmp3a ) nounwind
        ret void
 }
 
diff --git a/test/CodeGen/X86/mmx-arith.ll b/test/CodeGen/X86/mmx-arith.ll
index e4dfdbfe1bb1..681748732401 100644
--- a/test/CodeGen/X86/mmx-arith.ll
+++ b/test/CodeGen/X86/mmx-arith.ll
@@ -1,131 +1,309 @@
 ; RUN: llc < %s -march=x86 -mattr=+mmx
 
 ;; A basic sanity check to make sure that MMX arithmetic actually compiles.
+;; First is a straight translation of the original with bitcasts as needed.
 
-define void @foo(<8 x i8>* %A, <8 x i8>* %B) {
+define void @foo(x86_mmx* %A, x86_mmx* %B) {
 entry:
-	%tmp1 = load <8 x i8>* %A		; <<8 x i8>> [#uses=1]
-	%tmp3 = load <8 x i8>* %B		; <<8 x i8>> [#uses=1]
-	%tmp4 = add <8 x i8> %tmp1, %tmp3		; <<8 x i8>> [#uses=2]
-	store <8 x i8> %tmp4, <8 x i8>* %A
-	%tmp7 = load <8 x i8>* %B		; <<8 x i8>> [#uses=1]
-	%tmp12 = tail call <8 x i8> @llvm.x86.mmx.padds.b( <8 x i8> %tmp4, <8 x i8> %tmp7 )		; <<8 x i8>> [#uses=2]
-	store <8 x i8> %tmp12, <8 x i8>* %A
-	%tmp16 = load <8 x i8>* %B		; <<8 x i8>> [#uses=1]
-	%tmp21 = tail call <8 x i8> @llvm.x86.mmx.paddus.b( <8 x i8> %tmp12, <8 x i8> %tmp16 )		; <<8 x i8>> [#uses=2]
-	store <8 x i8> %tmp21, <8 x i8>* %A
-	%tmp27 = load <8 x i8>* %B		; <<8 x i8>> [#uses=1]
-	%tmp28 = sub <8 x i8> %tmp21, %tmp27		; <<8 x i8>> [#uses=2]
-	store <8 x i8> %tmp28, <8 x i8>* %A
-	%tmp31 = load <8 x i8>* %B		; <<8 x i8>> [#uses=1]
-	%tmp36 = tail call <8 x i8> @llvm.x86.mmx.psubs.b( <8 x i8> %tmp28, <8 x i8> %tmp31 )		; <<8 x i8>> [#uses=2]
-	store <8 x i8> %tmp36, <8 x i8>* %A
-	%tmp40 = load <8 x i8>* %B		; <<8 x i8>> [#uses=1]
-	%tmp45 = tail call <8 x i8> @llvm.x86.mmx.psubus.b( <8 x i8> %tmp36, <8 x i8> %tmp40 )		; <<8 x i8>> [#uses=2]
-	store <8 x i8> %tmp45, <8 x i8>* %A
-	%tmp51 = load <8 x i8>* %B		; <<8 x i8>> [#uses=1]
-	%tmp52 = mul <8 x i8> %tmp45, %tmp51		; <<8 x i8>> [#uses=2]
-	store <8 x i8> %tmp52, <8 x i8>* %A
-	%tmp57 = load <8 x i8>* %B		; <<8 x i8>> [#uses=1]
-	%tmp58 = and <8 x i8> %tmp52, %tmp57		; <<8 x i8>> [#uses=2]
-	store <8 x i8> %tmp58, <8 x i8>* %A
-	%tmp63 = load <8 x i8>* %B		; <<8 x i8>> [#uses=1]
-	%tmp64 = or <8 x i8> %tmp58, %tmp63		; <<8 x i8>> [#uses=2]
-	store <8 x i8> %tmp64, <8 x i8>* %A
-	%tmp69 = load <8 x i8>* %B		; <<8 x i8>> [#uses=1]
-	%tmp70 = xor <8 x i8> %tmp64, %tmp69		; <<8 x i8>> [#uses=1]
-	store <8 x i8> %tmp70, <8 x i8>* %A
+	%tmp1 = load x86_mmx* %A		; <x86_mmx> [#uses=1]
+	%tmp3 = load x86_mmx* %B		; <x86_mmx> [#uses=1]
+        %tmp1a = bitcast x86_mmx %tmp1 to <8 x i8>
+        %tmp3a = bitcast x86_mmx %tmp3 to <8 x i8>
+	%tmp4 = add <8 x i8> %tmp1a, %tmp3a		; <<8 x i8>> [#uses=2]
+        %tmp4a = bitcast <8 x i8> %tmp4 to x86_mmx
+	store x86_mmx %tmp4a, x86_mmx* %A
+	%tmp7 = load x86_mmx* %B		; <x86_mmx> [#uses=1]
+	%tmp12 = tail call x86_mmx @llvm.x86.mmx.padds.b( x86_mmx %tmp4a, x86_mmx %tmp7 )		; <x86_mmx> [#uses=2]
+	store x86_mmx %tmp12, x86_mmx* %A
+	%tmp16 = load x86_mmx* %B		; <x86_mmx> [#uses=1]
+	%tmp21 = tail call x86_mmx @llvm.x86.mmx.paddus.b( x86_mmx %tmp12, x86_mmx %tmp16 )		; <x86_mmx> [#uses=2]
+	store x86_mmx %tmp21, x86_mmx* %A
+	%tmp27 = load x86_mmx* %B		; <x86_mmx> [#uses=1]
+        %tmp21a = bitcast x86_mmx %tmp21 to <8 x i8>
+        %tmp27a = bitcast x86_mmx %tmp27 to <8 x i8>
+	%tmp28 = sub <8 x i8> %tmp21a, %tmp27a		; <<8 x i8>> [#uses=2]
+        %tmp28a = bitcast <8 x i8> %tmp28 to x86_mmx
+	store x86_mmx %tmp28a, x86_mmx* %A
+	%tmp31 = load x86_mmx* %B		; <x86_mmx> [#uses=1]
+	%tmp36 = tail call x86_mmx @llvm.x86.mmx.psubs.b( x86_mmx %tmp28a, x86_mmx %tmp31 )		; <x86_mmx> [#uses=2]
+	store x86_mmx %tmp36, x86_mmx* %A
+	%tmp40 = load x86_mmx* %B		; <x86_mmx> [#uses=1]
+	%tmp45 = tail call x86_mmx @llvm.x86.mmx.psubus.b( x86_mmx %tmp36, x86_mmx %tmp40 )		; <x86_mmx> [#uses=2]
+	store x86_mmx %tmp45, x86_mmx* %A
+	%tmp51 = load x86_mmx* %B		; <x86_mmx> [#uses=1]
+        %tmp45a = bitcast x86_mmx %tmp45 to <8 x i8>
+        %tmp51a = bitcast x86_mmx %tmp51 to <8 x i8>
+	%tmp52 = mul <8 x i8> %tmp45a, %tmp51a		; <<8 x i8>> [#uses=2]
+        %tmp52a = bitcast <8 x i8> %tmp52 to x86_mmx
+	store x86_mmx %tmp52a, x86_mmx* %A
+	%tmp57 = load x86_mmx* %B		; <x86_mmx> [#uses=1]
+        %tmp57a = bitcast x86_mmx %tmp57 to <8 x i8>
+	%tmp58 = and <8 x i8> %tmp52, %tmp57a		; <<8 x i8>> [#uses=2]
+        %tmp58a = bitcast <8 x i8> %tmp58 to x86_mmx
+	store x86_mmx %tmp58a, x86_mmx* %A
+	%tmp63 = load x86_mmx* %B		; <x86_mmx> [#uses=1]
+        %tmp63a = bitcast x86_mmx %tmp63 to <8 x i8>
+	%tmp64 = or <8 x i8> %tmp58, %tmp63a		; <<8 x i8>> [#uses=2]
+        %tmp64a = bitcast <8 x i8> %tmp64 to x86_mmx
+	store x86_mmx %tmp64a, x86_mmx* %A
+	%tmp69 = load x86_mmx* %B		; <x86_mmx> [#uses=1]
+        %tmp69a = bitcast x86_mmx %tmp69 to <8 x i8>
+        %tmp64b = bitcast x86_mmx %tmp64a to <8 x i8>
+	%tmp70 = xor <8 x i8> %tmp64b, %tmp69a		; <<8 x i8>> [#uses=1]
+        %tmp70a = bitcast <8 x i8> %tmp70 to x86_mmx
+	store x86_mmx %tmp70a, x86_mmx* %A
 	tail call void @llvm.x86.mmx.emms( )
 	ret void
 }
 
-define void @baz(<2 x i32>* %A, <2 x i32>* %B) {
+define void @baz(x86_mmx* %A, x86_mmx* %B) {
 entry:
-	%tmp1 = load <2 x i32>* %A		; <<2 x i32>> [#uses=1]
-	%tmp3 = load <2 x i32>* %B		; <<2 x i32>> [#uses=1]
-	%tmp4 = add <2 x i32> %tmp1, %tmp3		; <<2 x i32>> [#uses=2]
-	store <2 x i32> %tmp4, <2 x i32>* %A
-	%tmp9 = load <2 x i32>* %B		; <<2 x i32>> [#uses=1]
-	%tmp10 = sub <2 x i32> %tmp4, %tmp9		; <<2 x i32>> [#uses=2]
-	store <2 x i32> %tmp10, <2 x i32>* %A
-	%tmp15 = load <2 x i32>* %B		; <<2 x i32>> [#uses=1]
-	%tmp16 = mul <2 x i32> %tmp10, %tmp15		; <<2 x i32>> [#uses=2]
-	store <2 x i32> %tmp16, <2 x i32>* %A
-	%tmp21 = load <2 x i32>* %B		; <<2 x i32>> [#uses=1]
-	%tmp22 = and <2 x i32> %tmp16, %tmp21		; <<2 x i32>> [#uses=2]
-	store <2 x i32> %tmp22, <2 x i32>* %A
-	%tmp27 = load <2 x i32>* %B		; <<2 x i32>> [#uses=1]
-	%tmp28 = or <2 x i32> %tmp22, %tmp27		; <<2 x i32>> [#uses=2]
-	store <2 x i32> %tmp28, <2 x i32>* %A
-	%tmp33 = load <2 x i32>* %B		; <<2 x i32>> [#uses=1]
-	%tmp34 = xor <2 x i32> %tmp28, %tmp33		; <<2 x i32>> [#uses=1]
-	store <2 x i32> %tmp34, <2 x i32>* %A
+	%tmp1 = load x86_mmx* %A		; <x86_mmx> [#uses=1]
+	%tmp3 = load x86_mmx* %B		; <x86_mmx> [#uses=1]
+        %tmp1a = bitcast x86_mmx %tmp1 to <2 x i32>
+        %tmp3a = bitcast x86_mmx %tmp3 to <2 x i32>
+	%tmp4 = add <2 x i32> %tmp1a, %tmp3a		; <<2 x i32>> [#uses=2]
+        %tmp4a = bitcast <2 x i32> %tmp4 to x86_mmx
+	store x86_mmx %tmp4a, x86_mmx* %A
+	%tmp9 = load x86_mmx* %B		; <x86_mmx> [#uses=1]
+        %tmp9a = bitcast x86_mmx %tmp9 to <2 x i32>
+	%tmp10 = sub <2 x i32> %tmp4, %tmp9a		; <<2 x i32>> [#uses=2]
+        %tmp10a = bitcast <2 x i32> %tmp4 to x86_mmx
+	store x86_mmx %tmp10a, x86_mmx* %A
+	%tmp15 = load x86_mmx* %B		; <x86_mmx> [#uses=1]
+        %tmp10b = bitcast x86_mmx %tmp10a to <2 x i32>
+        %tmp15a = bitcast x86_mmx %tmp15 to <2 x i32>
+	%tmp16 = mul <2 x i32> %tmp10b, %tmp15a		; <<2 x i32>> [#uses=2]
+        %tmp16a = bitcast <2 x i32> %tmp16 to x86_mmx
+	store x86_mmx %tmp16a, x86_mmx* %A
+	%tmp21 = load x86_mmx* %B		; <x86_mmx> [#uses=1]
+        %tmp16b = bitcast x86_mmx %tmp16a to <2 x i32>
+        %tmp21a = bitcast x86_mmx %tmp21 to <2 x i32>
+	%tmp22 = and <2 x i32> %tmp16b, %tmp21a		; <<2 x i32>> [#uses=2]
+        %tmp22a = bitcast <2 x i32> %tmp22 to x86_mmx
+	store x86_mmx %tmp22a, x86_mmx* %A
+	%tmp27 = load x86_mmx* %B		; <x86_mmx> [#uses=1]
+        %tmp22b = bitcast x86_mmx %tmp22a to <2 x i32>
+        %tmp27a = bitcast x86_mmx %tmp27 to <2 x i32>
+	%tmp28 = or <2 x i32> %tmp22b, %tmp27a		; <<2 x i32>> [#uses=2]
+        %tmp28a = bitcast <2 x i32> %tmp28 to x86_mmx
+	store x86_mmx %tmp28a, x86_mmx* %A
+	%tmp33 = load x86_mmx* %B		; <x86_mmx> [#uses=1]
+        %tmp28b = bitcast x86_mmx %tmp28a to <2 x i32>
+        %tmp33a = bitcast x86_mmx %tmp33 to <2 x i32>
+	%tmp34 = xor <2 x i32> %tmp28b, %tmp33a		; <<2 x i32>> [#uses=1]
+        %tmp34a = bitcast <2 x i32> %tmp34 to x86_mmx
+	store x86_mmx %tmp34a, x86_mmx* %A
 	tail call void @llvm.x86.mmx.emms( )
 	ret void
 }
 
-define void @bar(<4 x i16>* %A, <4 x i16>* %B) {
+define void @bar(x86_mmx* %A, x86_mmx* %B) {
 entry:
-	%tmp1 = load <4 x i16>* %A		; <<4 x i16>> [#uses=1]
-	%tmp3 = load <4 x i16>* %B		; <<4 x i16>> [#uses=1]
-	%tmp4 = add <4 x i16> %tmp1, %tmp3		; <<4 x i16>> [#uses=2]
-	store <4 x i16> %tmp4, <4 x i16>* %A
-	%tmp7 = load <4 x i16>* %B		; <<4 x i16>> [#uses=1]
-	%tmp12 = tail call <4 x i16> @llvm.x86.mmx.padds.w( <4 x i16> %tmp4, <4 x i16> %tmp7 )		; <<4 x i16>> [#uses=2]
-	store <4 x i16> %tmp12, <4 x i16>* %A
-	%tmp16 = load <4 x i16>* %B		; <<4 x i16>> [#uses=1]
-	%tmp21 = tail call <4 x i16> @llvm.x86.mmx.paddus.w( <4 x i16> %tmp12, <4 x i16> %tmp16 )		; <<4 x i16>> [#uses=2]
-	store <4 x i16> %tmp21, <4 x i16>* %A
-	%tmp27 = load <4 x i16>* %B		; <<4 x i16>> [#uses=1]
-	%tmp28 = sub <4 x i16> %tmp21, %tmp27		; <<4 x i16>> [#uses=2]
-	store <4 x i16> %tmp28, <4 x i16>* %A
-	%tmp31 = load <4 x i16>* %B		; <<4 x i16>> [#uses=1]
-	%tmp36 = tail call <4 x i16> @llvm.x86.mmx.psubs.w( <4 x i16> %tmp28, <4 x i16> %tmp31 )		; <<4 x i16>> [#uses=2]
-	store <4 x i16> %tmp36, <4 x i16>* %A
-	%tmp40 = load <4 x i16>* %B		; <<4 x i16>> [#uses=1]
-	%tmp45 = tail call <4 x i16> @llvm.x86.mmx.psubus.w( <4 x i16> %tmp36, <4 x i16> %tmp40 )		; <<4 x i16>> [#uses=2]
-	store <4 x i16> %tmp45, <4 x i16>* %A
-	%tmp51 = load <4 x i16>* %B		; <<4 x i16>> [#uses=1]
-	%tmp52 = mul <4 x i16> %tmp45, %tmp51		; <<4 x i16>> [#uses=2]
-	store <4 x i16> %tmp52, <4 x i16>* %A
-	%tmp55 = load <4 x i16>* %B		; <<4 x i16>> [#uses=1]
-	%tmp60 = tail call <4 x i16> @llvm.x86.mmx.pmulh.w( <4 x i16> %tmp52, <4 x i16> %tmp55 )		; <<4 x i16>> [#uses=2]
-	store <4 x i16> %tmp60, <4 x i16>* %A
-	%tmp64 = load <4 x i16>* %B		; <<4 x i16>> [#uses=1]
-	%tmp69 = tail call <2 x i32> @llvm.x86.mmx.pmadd.wd( <4 x i16> %tmp60, <4 x i16> %tmp64 )		; <<2 x i32>> [#uses=1]
-	%tmp70 = bitcast <2 x i32> %tmp69 to <4 x i16>		; <<4 x i16>> [#uses=2]
-	store <4 x i16> %tmp70, <4 x i16>* %A
-	%tmp75 = load <4 x i16>* %B		; <<4 x i16>> [#uses=1]
-	%tmp76 = and <4 x i16> %tmp70, %tmp75		; <<4 x i16>> [#uses=2]
-	store <4 x i16> %tmp76, <4 x i16>* %A
-	%tmp81 = load <4 x i16>* %B		; <<4 x i16>> [#uses=1]
-	%tmp82 = or <4 x i16> %tmp76, %tmp81		; <<4 x i16>> [#uses=2]
-	store <4 x i16> %tmp82, <4 x i16>* %A
-	%tmp87 = load <4 x i16>* %B		; <<4 x i16>> [#uses=1]
-	%tmp88 = xor <4 x i16> %tmp82, %tmp87		; <<4 x i16>> [#uses=1]
-	store <4 x i16> %tmp88, <4 x i16>* %A
+	%tmp1 = load x86_mmx* %A		; <x86_mmx> [#uses=1]
+	%tmp3 = load x86_mmx* %B		; <x86_mmx> [#uses=1]
+        %tmp1a = bitcast x86_mmx %tmp1 to <4 x i16>
+        %tmp3a = bitcast x86_mmx %tmp3 to <4 x i16>
+	%tmp4 = add <4 x i16> %tmp1a, %tmp3a		; <<4 x i16>> [#uses=2]
+        %tmp4a = bitcast <4 x i16> %tmp4 to x86_mmx
+	store x86_mmx %tmp4a, x86_mmx* %A
+	%tmp7 = load x86_mmx* %B		; <x86_mmx> [#uses=1]
+	%tmp12 = tail call x86_mmx @llvm.x86.mmx.padds.w( x86_mmx %tmp4a, x86_mmx %tmp7 )		; <x86_mmx> [#uses=2]
+	store x86_mmx %tmp12, x86_mmx* %A
+	%tmp16 = load x86_mmx* %B		; <x86_mmx> [#uses=1]
+	%tmp21 = tail call x86_mmx @llvm.x86.mmx.paddus.w( x86_mmx %tmp12, x86_mmx %tmp16 )		; <x86_mmx> [#uses=2]
+	store x86_mmx %tmp21, x86_mmx* %A
+	%tmp27 = load x86_mmx* %B		; <x86_mmx> [#uses=1]
+        %tmp21a = bitcast x86_mmx %tmp21 to <4 x i16>
+        %tmp27a = bitcast x86_mmx %tmp27 to <4 x i16>
+	%tmp28 = sub <4 x i16> %tmp21a, %tmp27a		; <<4 x i16>> [#uses=2]
+        %tmp28a = bitcast <4 x i16> %tmp28 to x86_mmx
+	store x86_mmx %tmp28a, x86_mmx* %A
+	%tmp31 = load x86_mmx* %B		; <x86_mmx> [#uses=1]
+	%tmp36 = tail call x86_mmx @llvm.x86.mmx.psubs.w( x86_mmx %tmp28a, x86_mmx %tmp31 )		; <x86_mmx> [#uses=2]
+	store x86_mmx %tmp36, x86_mmx* %A
+	%tmp40 = load x86_mmx* %B		; <x86_mmx> [#uses=1]
+	%tmp45 = tail call x86_mmx @llvm.x86.mmx.psubus.w( x86_mmx %tmp36, x86_mmx %tmp40 )		; <x86_mmx> [#uses=2]
+	store x86_mmx %tmp45, x86_mmx* %A
+	%tmp51 = load x86_mmx* %B		; <x86_mmx> [#uses=1]
+        %tmp45a = bitcast x86_mmx %tmp45 to <4 x i16>
+        %tmp51a = bitcast x86_mmx %tmp51 to <4 x i16>
+	%tmp52 = mul <4 x i16> %tmp45a, %tmp51a		; <<4 x i16>> [#uses=2]
+        %tmp52a = bitcast <4 x i16> %tmp52 to x86_mmx
+	store x86_mmx %tmp52a, x86_mmx* %A
+	%tmp55 = load x86_mmx* %B		; <x86_mmx> [#uses=1]
+	%tmp60 = tail call x86_mmx @llvm.x86.mmx.pmulh.w( x86_mmx %tmp52a, x86_mmx %tmp55 )		; <x86_mmx> [#uses=2]
+	store x86_mmx %tmp60, x86_mmx* %A
+	%tmp64 = load x86_mmx* %B		; <x86_mmx> [#uses=1]
+	%tmp69 = tail call x86_mmx @llvm.x86.mmx.pmadd.wd( x86_mmx %tmp60, x86_mmx %tmp64 )		; <x86_mmx> [#uses=1]
+	%tmp70 = bitcast x86_mmx %tmp69 to x86_mmx		; <x86_mmx> [#uses=2]
+	store x86_mmx %tmp70, x86_mmx* %A
+	%tmp75 = load x86_mmx* %B		; <x86_mmx> [#uses=1]
+        %tmp70a = bitcast x86_mmx %tmp70 to <4 x i16>
+        %tmp75a = bitcast x86_mmx %tmp75 to <4 x i16>
+	%tmp76 = and <4 x i16> %tmp70a, %tmp75a		; <<4 x i16>> [#uses=2]
+        %tmp76a = bitcast <4 x i16> %tmp76 to x86_mmx
+	store x86_mmx %tmp76a, x86_mmx* %A
+	%tmp81 = load x86_mmx* %B		; <x86_mmx> [#uses=1]
+        %tmp76b = bitcast x86_mmx %tmp76a to <4 x i16>
+        %tmp81a = bitcast x86_mmx %tmp81 to <4 x i16>
+	%tmp82 = or <4 x i16> %tmp76b, %tmp81a		; <<4 x i16>> [#uses=2]
+        %tmp82a = bitcast <4 x i16> %tmp82 to x86_mmx
+	store x86_mmx %tmp82a, x86_mmx* %A
+	%tmp87 = load x86_mmx* %B		; <x86_mmx> [#uses=1]
+        %tmp82b = bitcast x86_mmx %tmp82a to <4 x i16>
+        %tmp87a = bitcast x86_mmx %tmp87 to <4 x i16>
+	%tmp88 = xor <4 x i16> %tmp82b, %tmp87a		; <<4 x i16>> [#uses=1]
+        %tmp88a = bitcast <4 x i16> %tmp88 to x86_mmx
+	store x86_mmx %tmp88a, x86_mmx* %A
 	tail call void @llvm.x86.mmx.emms( )
 	ret void
 }
 
-declare <8 x i8> @llvm.x86.mmx.padds.b(<8 x i8>, <8 x i8>)
+;; The following is modified to use MMX intrinsics everywhere they work.
 
-declare <8 x i8> @llvm.x86.mmx.paddus.b(<8 x i8>, <8 x i8>)
+define void @fooa(x86_mmx* %A, x86_mmx* %B) {
+entry:
+	%tmp1 = load x86_mmx* %A		; <x86_mmx> [#uses=1]
+	%tmp3 = load x86_mmx* %B		; <x86_mmx> [#uses=1]
+	%tmp4 = tail call x86_mmx @llvm.x86.mmx.padd.b( x86_mmx %tmp1, x86_mmx %tmp3 )		; <x86_mmx> [#uses=2]
+	store x86_mmx %tmp4, x86_mmx* %A
+	%tmp7 = load x86_mmx* %B		; <x86_mmx> [#uses=1]
+	%tmp12 = tail call x86_mmx @llvm.x86.mmx.padds.b( x86_mmx %tmp4, x86_mmx %tmp7 )		; <x86_mmx> [#uses=2]
+	store x86_mmx %tmp12, x86_mmx* %A
+	%tmp16 = load x86_mmx* %B		; <x86_mmx> [#uses=1]
+	%tmp21 = tail call x86_mmx @llvm.x86.mmx.paddus.b( x86_mmx %tmp12, x86_mmx %tmp16 )		; <x86_mmx> [#uses=2]
+	store x86_mmx %tmp21, x86_mmx* %A
+	%tmp27 = load x86_mmx* %B		; <x86_mmx> [#uses=1]
+	%tmp28 = tail call x86_mmx @llvm.x86.mmx.psub.b( x86_mmx %tmp21, x86_mmx %tmp27 )		; <x86_mmx> [#uses=2]
+	store x86_mmx %tmp28, x86_mmx* %A
+	%tmp31 = load x86_mmx* %B		; <x86_mmx> [#uses=1]
+	%tmp36 = tail call x86_mmx @llvm.x86.mmx.psubs.b( x86_mmx %tmp28, x86_mmx %tmp31 )		; <x86_mmx> [#uses=2]
+	store x86_mmx %tmp36, x86_mmx* %A
+	%tmp40 = load x86_mmx* %B		; <x86_mmx> [#uses=1]
+	%tmp45 = tail call x86_mmx @llvm.x86.mmx.psubus.b( x86_mmx %tmp36, x86_mmx %tmp40 )		; <x86_mmx> [#uses=2]
+	store x86_mmx %tmp45, x86_mmx* %A
+	%tmp51 = load x86_mmx* %B		; <x86_mmx> [#uses=1]
+        %tmp51a = bitcast x86_mmx %tmp51 to i64
+        %tmp51aa = bitcast i64 %tmp51a to <8 x i8>
+        %tmp51b = bitcast x86_mmx %tmp45 to <8 x i8>
+	%tmp52 = mul <8 x i8> %tmp51b, %tmp51aa		; <x86_mmx> [#uses=2]
+        %tmp52a = bitcast <8 x i8> %tmp52 to i64
+        %tmp52aa = bitcast i64 %tmp52a to x86_mmx
+	store x86_mmx %tmp52aa, x86_mmx* %A
+	%tmp57 = load x86_mmx* %B		; <x86_mmx> [#uses=1]
+	%tmp58 = tail call x86_mmx @llvm.x86.mmx.pand( x86_mmx %tmp51, x86_mmx %tmp57 )		; <x86_mmx> [#uses=2]
+	store x86_mmx %tmp58, x86_mmx* %A
+	%tmp63 = load x86_mmx* %B		; <x86_mmx> [#uses=1]
+	%tmp64 = tail call x86_mmx @llvm.x86.mmx.por( x86_mmx %tmp58, x86_mmx %tmp63 )		; <x86_mmx> [#uses=2]	
+	store x86_mmx %tmp64, x86_mmx* %A
+	%tmp69 = load x86_mmx* %B		; <x86_mmx> [#uses=1]
+	%tmp70 = tail call x86_mmx @llvm.x86.mmx.pxor( x86_mmx %tmp64, x86_mmx %tmp69 )		; <x86_mmx> [#uses=2]
+	store x86_mmx %tmp70, x86_mmx* %A
+	tail call void @llvm.x86.mmx.emms( )
+	ret void
+}
 
-declare <8 x i8> @llvm.x86.mmx.psubs.b(<8 x i8>, <8 x i8>)
+define void @baza(x86_mmx* %A, x86_mmx* %B) {
+entry:
+	%tmp1 = load x86_mmx* %A		; <x86_mmx> [#uses=1]
+	%tmp3 = load x86_mmx* %B		; <x86_mmx> [#uses=1]
+	%tmp4 = tail call x86_mmx @llvm.x86.mmx.padd.d( x86_mmx %tmp1, x86_mmx %tmp3 )		; <x86_mmx> [#uses=2]
+	store x86_mmx %tmp4, x86_mmx* %A
+	%tmp9 = load x86_mmx* %B		; <x86_mmx> [#uses=1]
+	%tmp10 = tail call x86_mmx @llvm.x86.mmx.psub.d( x86_mmx %tmp4, x86_mmx %tmp9 )		; <x86_mmx> [#uses=2]
+	store x86_mmx %tmp10, x86_mmx* %A
+	%tmp15 = load x86_mmx* %B		; <x86_mmx> [#uses=1]
+        %tmp10a = bitcast x86_mmx %tmp10 to <2 x i32>
+        %tmp15a = bitcast x86_mmx %tmp15 to <2 x i32>
+	%tmp16 = mul <2 x i32> %tmp10a, %tmp15a		; <x86_mmx> [#uses=2]
+        %tmp16a = bitcast <2 x i32> %tmp16 to x86_mmx
+	store x86_mmx %tmp16a, x86_mmx* %A
+	%tmp21 = load x86_mmx* %B		; <x86_mmx> [#uses=1]
+	%tmp22 = tail call x86_mmx @llvm.x86.mmx.pand( x86_mmx %tmp16a, x86_mmx %tmp21 )		; <x86_mmx> [#uses=2]
+	store x86_mmx %tmp22, x86_mmx* %A
+	%tmp27 = load x86_mmx* %B		; <x86_mmx> [#uses=1]
+	%tmp28 = tail call x86_mmx @llvm.x86.mmx.por( x86_mmx %tmp22, x86_mmx %tmp27 )		; <x86_mmx> [#uses=2]
+	store x86_mmx %tmp28, x86_mmx* %A
+	%tmp33 = load x86_mmx* %B		; <x86_mmx> [#uses=1]
+	%tmp34 = tail call x86_mmx @llvm.x86.mmx.pxor( x86_mmx %tmp28, x86_mmx %tmp33 )		; <x86_mmx> [#uses=2]
+	store x86_mmx %tmp34, x86_mmx* %A
+	tail call void @llvm.x86.mmx.emms( )
+	ret void
+}
 
-declare <8 x i8> @llvm.x86.mmx.psubus.b(<8 x i8>, <8 x i8>)
+define void @bara(x86_mmx* %A, x86_mmx* %B) {
+entry:
+	%tmp1 = load x86_mmx* %A		; <x86_mmx> [#uses=1]
+	%tmp3 = load x86_mmx* %B		; <x86_mmx> [#uses=1]
+	%tmp4 = tail call x86_mmx @llvm.x86.mmx.padd.w( x86_mmx %tmp1, x86_mmx %tmp3 )		; <x86_mmx> [#uses=2]
+	store x86_mmx %tmp4, x86_mmx* %A
+	%tmp7 = load x86_mmx* %B		; <x86_mmx> [#uses=1]
+	%tmp12 = tail call x86_mmx @llvm.x86.mmx.padds.w( x86_mmx %tmp4, x86_mmx %tmp7 )		; <x86_mmx> [#uses=2]
+	store x86_mmx %tmp12, x86_mmx* %A
+	%tmp16 = load x86_mmx* %B		; <x86_mmx> [#uses=1]
+	%tmp21 = tail call x86_mmx @llvm.x86.mmx.paddus.w( x86_mmx %tmp12, x86_mmx %tmp16 )		; <x86_mmx> [#uses=2]
+	store x86_mmx %tmp21, x86_mmx* %A
+	%tmp27 = load x86_mmx* %B		; <x86_mmx> [#uses=1]
+	%tmp28 = tail call x86_mmx @llvm.x86.mmx.psub.w( x86_mmx %tmp21, x86_mmx %tmp27 )		; <x86_mmx> [#uses=2]
+	store x86_mmx %tmp28, x86_mmx* %A
+	%tmp31 = load x86_mmx* %B		; <x86_mmx> [#uses=1]
+	%tmp36 = tail call x86_mmx @llvm.x86.mmx.psubs.w( x86_mmx %tmp28, x86_mmx %tmp31 )		; <x86_mmx> [#uses=2]
+	store x86_mmx %tmp36, x86_mmx* %A
+	%tmp40 = load x86_mmx* %B		; <x86_mmx> [#uses=1]
+	%tmp45 = tail call x86_mmx @llvm.x86.mmx.psubus.w( x86_mmx %tmp36, x86_mmx %tmp40 )		; <x86_mmx> [#uses=2]
+	store x86_mmx %tmp45, x86_mmx* %A
+	%tmp51 = load x86_mmx* %B		; <x86_mmx> [#uses=1]
+	%tmp52 = tail call x86_mmx @llvm.x86.mmx.pmull.w( x86_mmx %tmp45, x86_mmx %tmp51 )		; <x86_mmx> [#uses=2]
+	store x86_mmx %tmp52, x86_mmx* %A
+	%tmp55 = load x86_mmx* %B		; <x86_mmx> [#uses=1]
+	%tmp60 = tail call x86_mmx @llvm.x86.mmx.pmulh.w( x86_mmx %tmp52, x86_mmx %tmp55 )		; <x86_mmx> [#uses=2]
+	store x86_mmx %tmp60, x86_mmx* %A
+	%tmp64 = load x86_mmx* %B		; <x86_mmx> [#uses=1]
+	%tmp69 = tail call x86_mmx @llvm.x86.mmx.pmadd.wd( x86_mmx %tmp60, x86_mmx %tmp64 )		; <x86_mmx> [#uses=1]
+	%tmp70 = bitcast x86_mmx %tmp69 to x86_mmx		; <x86_mmx> [#uses=2]
+	store x86_mmx %tmp70, x86_mmx* %A
+	%tmp75 = load x86_mmx* %B		; <x86_mmx> [#uses=1]
+	%tmp76 = tail call x86_mmx @llvm.x86.mmx.pand( x86_mmx %tmp70, x86_mmx %tmp75 )		; <x86_mmx> [#uses=2]
+	store x86_mmx %tmp76, x86_mmx* %A
+	%tmp81 = load x86_mmx* %B		; <x86_mmx> [#uses=1]
+	%tmp82 = tail call x86_mmx @llvm.x86.mmx.por( x86_mmx %tmp76, x86_mmx %tmp81 )		; <x86_mmx> [#uses=2]
+	store x86_mmx %tmp82, x86_mmx* %A
+	%tmp87 = load x86_mmx* %B		; <x86_mmx> [#uses=1]
+	%tmp88 = tail call x86_mmx @llvm.x86.mmx.pxor( x86_mmx %tmp82, x86_mmx %tmp87 )		; <x86_mmx> [#uses=2]
+	store x86_mmx %tmp88, x86_mmx* %A
+	tail call void @llvm.x86.mmx.emms( )
+	ret void
+}
 
-declare <4 x i16> @llvm.x86.mmx.padds.w(<4 x i16>, <4 x i16>)
+declare x86_mmx @llvm.x86.mmx.paddus.b(x86_mmx, x86_mmx)
 
-declare <4 x i16> @llvm.x86.mmx.paddus.w(<4 x i16>, <4 x i16>)
+declare x86_mmx @llvm.x86.mmx.psubus.b(x86_mmx, x86_mmx)
 
-declare <4 x i16> @llvm.x86.mmx.psubs.w(<4 x i16>, <4 x i16>)
+declare x86_mmx @llvm.x86.mmx.paddus.w(x86_mmx, x86_mmx)
 
-declare <4 x i16> @llvm.x86.mmx.psubus.w(<4 x i16>, <4 x i16>)
+declare x86_mmx @llvm.x86.mmx.psubus.w(x86_mmx, x86_mmx)
 
-declare <4 x i16> @llvm.x86.mmx.pmulh.w(<4 x i16>, <4 x i16>)
+declare x86_mmx @llvm.x86.mmx.pmulh.w(x86_mmx, x86_mmx)
 
-declare <2 x i32> @llvm.x86.mmx.pmadd.wd(<4 x i16>, <4 x i16>)
+declare x86_mmx @llvm.x86.mmx.pmadd.wd(x86_mmx, x86_mmx)
 
 declare void @llvm.x86.mmx.emms()
+
+declare x86_mmx @llvm.x86.mmx.padd.b(x86_mmx, x86_mmx)
+declare x86_mmx @llvm.x86.mmx.padd.w(x86_mmx, x86_mmx)
+declare x86_mmx @llvm.x86.mmx.padd.d(x86_mmx, x86_mmx)
+declare x86_mmx @llvm.x86.mmx.padds.b(x86_mmx, x86_mmx)
+declare x86_mmx @llvm.x86.mmx.padds.w(x86_mmx, x86_mmx)
+declare x86_mmx @llvm.x86.mmx.padds.d(x86_mmx, x86_mmx)
+declare x86_mmx @llvm.x86.mmx.psubs.b(x86_mmx, x86_mmx)
+declare x86_mmx @llvm.x86.mmx.psubs.w(x86_mmx, x86_mmx)
+declare x86_mmx @llvm.x86.mmx.psubs.d(x86_mmx, x86_mmx)
+declare x86_mmx @llvm.x86.mmx.psub.b(x86_mmx, x86_mmx)
+declare x86_mmx @llvm.x86.mmx.psub.w(x86_mmx, x86_mmx)
+declare x86_mmx @llvm.x86.mmx.psub.d(x86_mmx, x86_mmx)
+declare x86_mmx @llvm.x86.mmx.pmull.w(x86_mmx, x86_mmx)
+declare x86_mmx @llvm.x86.mmx.pand(x86_mmx, x86_mmx)
+declare x86_mmx @llvm.x86.mmx.por(x86_mmx, x86_mmx)
+declare x86_mmx @llvm.x86.mmx.pxor(x86_mmx, x86_mmx)
+
diff --git a/test/CodeGen/X86/mmx-bitcast-to-i64.ll b/test/CodeGen/X86/mmx-bitcast-to-i64.ll
index 1fd8f67a0ccc..8b1840abf615 100644
--- a/test/CodeGen/X86/mmx-bitcast-to-i64.ll
+++ b/test/CodeGen/X86/mmx-bitcast-to-i64.ll
@@ -1,26 +1,31 @@
 ; RUN: llc < %s -march=x86-64 | grep movd | count 4
 
-define i64 @foo(<1 x i64>* %p) {
-  %t = load <1 x i64>* %p
-  %u = add <1 x i64> %t, %t
-  %s = bitcast <1 x i64> %u to i64
+define i64 @foo(x86_mmx* %p) {
+  %t = load x86_mmx* %p
+  %u = tail call x86_mmx @llvm.x86.mmx.padd.q(x86_mmx %t, x86_mmx %t)
+  %s = bitcast x86_mmx %u to i64
   ret i64 %s
 }
-define i64 @goo(<2 x i32>* %p) {
-  %t = load <2 x i32>* %p
-  %u = add <2 x i32> %t, %t
-  %s = bitcast <2 x i32> %u to i64
+define i64 @goo(x86_mmx* %p) {
+  %t = load x86_mmx* %p
+  %u = tail call x86_mmx @llvm.x86.mmx.padd.d(x86_mmx %t, x86_mmx %t)
+  %s = bitcast x86_mmx %u to i64
   ret i64 %s
 }
-define i64 @hoo(<4 x i16>* %p) {
-  %t = load <4 x i16>* %p
-  %u = add <4 x i16> %t, %t
-  %s = bitcast <4 x i16> %u to i64
+define i64 @hoo(x86_mmx* %p) {
+  %t = load x86_mmx* %p
+  %u = tail call x86_mmx @llvm.x86.mmx.padd.w(x86_mmx %t, x86_mmx %t)
+  %s = bitcast x86_mmx %u to i64
   ret i64 %s
 }
-define i64 @ioo(<8 x i8>* %p) {
-  %t = load <8 x i8>* %p
-  %u = add <8 x i8> %t, %t
-  %s = bitcast <8 x i8> %u to i64
+define i64 @ioo(x86_mmx* %p) {
+  %t = load x86_mmx* %p
+  %u = tail call x86_mmx @llvm.x86.mmx.padd.b(x86_mmx %t, x86_mmx %t)
+  %s = bitcast x86_mmx %u to i64
   ret i64 %s
 }
+
+declare x86_mmx @llvm.x86.mmx.padd.b(x86_mmx, x86_mmx)
+declare x86_mmx @llvm.x86.mmx.padd.w(x86_mmx, x86_mmx)
+declare x86_mmx @llvm.x86.mmx.padd.d(x86_mmx, x86_mmx)
+declare x86_mmx @llvm.x86.mmx.padd.q(x86_mmx, x86_mmx)
diff --git a/test/CodeGen/X86/mmx-builtins.ll b/test/CodeGen/X86/mmx-builtins.ll
new file mode 100644
index 000000000000..3ac0e4ee4b85
--- /dev/null
+++ b/test/CodeGen/X86/mmx-builtins.ll
@@ -0,0 +1,1324 @@
+; RUN: llc < %s -march=x86 -mattr=+mmx,+ssse3 | FileCheck %s
+
+declare x86_mmx @llvm.x86.ssse3.phadd.w(x86_mmx, x86_mmx) nounwind readnone
+
+define i64 @test1(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
+; CHECK: phaddw
+entry:
+  %0 = bitcast <1 x i64> %b to <4 x i16>
+  %1 = bitcast <1 x i64> %a to <4 x i16>
+  %2 = bitcast <4 x i16> %1 to x86_mmx
+  %3 = bitcast <4 x i16> %0 to x86_mmx
+  %4 = tail call x86_mmx @llvm.x86.ssse3.phadd.w(x86_mmx %2, x86_mmx %3) nounwind readnone
+  %5 = bitcast x86_mmx %4 to <4 x i16>
+  %6 = bitcast <4 x i16> %5 to <1 x i64>
+  %7 = extractelement <1 x i64> %6, i32 0
+  ret i64 %7
+}
+
+declare x86_mmx @llvm.x86.mmx.pcmpgt.d(x86_mmx, x86_mmx) nounwind readnone
+
+define i64 @test88(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
+; CHECK: pcmpgtd
+entry:
+  %0 = bitcast <1 x i64> %b to <2 x i32>
+  %1 = bitcast <1 x i64> %a to <2 x i32>
+  %mmx_var.i = bitcast <2 x i32> %1 to x86_mmx
+  %mmx_var1.i = bitcast <2 x i32> %0 to x86_mmx
+  %2 = tail call x86_mmx @llvm.x86.mmx.pcmpgt.d(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
+  %3 = bitcast x86_mmx %2 to <2 x i32>
+  %4 = bitcast <2 x i32> %3 to <1 x i64>
+  %5 = extractelement <1 x i64> %4, i32 0
+  ret i64 %5
+}
+
+declare x86_mmx @llvm.x86.mmx.pcmpgt.w(x86_mmx, x86_mmx) nounwind readnone
+
+define i64 @test87(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
+; CHECK: pcmpgtw
+entry:
+  %0 = bitcast <1 x i64> %b to <4 x i16>
+  %1 = bitcast <1 x i64> %a to <4 x i16>
+  %mmx_var.i = bitcast <4 x i16> %1 to x86_mmx
+  %mmx_var1.i = bitcast <4 x i16> %0 to x86_mmx
+  %2 = tail call x86_mmx @llvm.x86.mmx.pcmpgt.w(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
+  %3 = bitcast x86_mmx %2 to <4 x i16>
+  %4 = bitcast <4 x i16> %3 to <1 x i64>
+  %5 = extractelement <1 x i64> %4, i32 0
+  ret i64 %5
+}
+
+declare x86_mmx @llvm.x86.mmx.pcmpgt.b(x86_mmx, x86_mmx) nounwind readnone
+
+define i64 @test86(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
+; CHECK: pcmpgtb
+entry:
+  %0 = bitcast <1 x i64> %b to <8 x i8>
+  %1 = bitcast <1 x i64> %a to <8 x i8>
+  %mmx_var.i = bitcast <8 x i8> %1 to x86_mmx
+  %mmx_var1.i = bitcast <8 x i8> %0 to x86_mmx
+  %2 = tail call x86_mmx @llvm.x86.mmx.pcmpgt.b(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
+  %3 = bitcast x86_mmx %2 to <8 x i8>
+  %4 = bitcast <8 x i8> %3 to <1 x i64>
+  %5 = extractelement <1 x i64> %4, i32 0
+  ret i64 %5
+}
+
+declare x86_mmx @llvm.x86.mmx.pcmpeq.d(x86_mmx, x86_mmx) nounwind readnone
+
+define i64 @test85(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
+; CHECK: pcmpeqd
+entry:
+  %0 = bitcast <1 x i64> %b to <2 x i32>
+  %1 = bitcast <1 x i64> %a to <2 x i32>
+  %mmx_var.i = bitcast <2 x i32> %1 to x86_mmx
+  %mmx_var1.i = bitcast <2 x i32> %0 to x86_mmx
+  %2 = tail call x86_mmx @llvm.x86.mmx.pcmpeq.d(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
+  %3 = bitcast x86_mmx %2 to <2 x i32>
+  %4 = bitcast <2 x i32> %3 to <1 x i64>
+  %5 = extractelement <1 x i64> %4, i32 0
+  ret i64 %5
+}
+
+declare x86_mmx @llvm.x86.mmx.pcmpeq.w(x86_mmx, x86_mmx) nounwind readnone
+
+define i64 @test84(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
+; CHECK: pcmpeqw
+entry:
+  %0 = bitcast <1 x i64> %b to <4 x i16>
+  %1 = bitcast <1 x i64> %a to <4 x i16>
+  %mmx_var.i = bitcast <4 x i16> %1 to x86_mmx
+  %mmx_var1.i = bitcast <4 x i16> %0 to x86_mmx
+  %2 = tail call x86_mmx @llvm.x86.mmx.pcmpeq.w(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
+  %3 = bitcast x86_mmx %2 to <4 x i16>
+  %4 = bitcast <4 x i16> %3 to <1 x i64>
+  %5 = extractelement <1 x i64> %4, i32 0
+  ret i64 %5
+}
+
+declare x86_mmx @llvm.x86.mmx.pcmpeq.b(x86_mmx, x86_mmx) nounwind readnone
+
+define i64 @test83(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
+; CHECK: pcmpeqb
+entry:
+  %0 = bitcast <1 x i64> %b to <8 x i8>
+  %1 = bitcast <1 x i64> %a to <8 x i8>
+  %mmx_var.i = bitcast <8 x i8> %1 to x86_mmx
+  %mmx_var1.i = bitcast <8 x i8> %0 to x86_mmx
+  %2 = tail call x86_mmx @llvm.x86.mmx.pcmpeq.b(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
+  %3 = bitcast x86_mmx %2 to <8 x i8>
+  %4 = bitcast <8 x i8> %3 to <1 x i64>
+  %5 = extractelement <1 x i64> %4, i32 0
+  ret i64 %5
+}
+
+declare x86_mmx @llvm.x86.mmx.punpckldq(x86_mmx, x86_mmx) nounwind readnone
+
+define i64 @test82(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
+; CHECK: punpckldq
+entry:
+  %0 = bitcast <1 x i64> %b to <2 x i32>
+  %1 = bitcast <1 x i64> %a to <2 x i32>
+  %mmx_var.i = bitcast <2 x i32> %1 to x86_mmx
+  %mmx_var1.i = bitcast <2 x i32> %0 to x86_mmx
+  %2 = tail call x86_mmx @llvm.x86.mmx.punpckldq(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
+  %3 = bitcast x86_mmx %2 to <2 x i32>
+  %4 = bitcast <2 x i32> %3 to <1 x i64>
+  %5 = extractelement <1 x i64> %4, i32 0
+  ret i64 %5
+}
+
+declare x86_mmx @llvm.x86.mmx.punpcklwd(x86_mmx, x86_mmx) nounwind readnone
+
+define i64 @test81(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
+; CHECK: punpcklwd
+entry:
+  %0 = bitcast <1 x i64> %b to <4 x i16>
+  %1 = bitcast <1 x i64> %a to <4 x i16>
+  %mmx_var.i = bitcast <4 x i16> %1 to x86_mmx
+  %mmx_var1.i = bitcast <4 x i16> %0 to x86_mmx
+  %2 = tail call x86_mmx @llvm.x86.mmx.punpcklwd(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
+  %3 = bitcast x86_mmx %2 to <4 x i16>
+  %4 = bitcast <4 x i16> %3 to <1 x i64>
+  %5 = extractelement <1 x i64> %4, i32 0
+  ret i64 %5
+}
+
+declare x86_mmx @llvm.x86.mmx.punpcklbw(x86_mmx, x86_mmx) nounwind readnone
+
+define i64 @test80(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
+; CHECK: punpcklbw
+entry:
+  %0 = bitcast <1 x i64> %b to <8 x i8>
+  %1 = bitcast <1 x i64> %a to <8 x i8>
+  %mmx_var.i = bitcast <8 x i8> %1 to x86_mmx
+  %mmx_var1.i = bitcast <8 x i8> %0 to x86_mmx
+  %2 = tail call x86_mmx @llvm.x86.mmx.punpcklbw(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
+  %3 = bitcast x86_mmx %2 to <8 x i8>
+  %4 = bitcast <8 x i8> %3 to <1 x i64>
+  %5 = extractelement <1 x i64> %4, i32 0
+  ret i64 %5
+}
+
+declare x86_mmx @llvm.x86.mmx.punpckhdq(x86_mmx, x86_mmx) nounwind readnone
+
+define i64 @test79(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
+; CHECK: punpckhdq
+entry:
+  %0 = bitcast <1 x i64> %b to <2 x i32>
+  %1 = bitcast <1 x i64> %a to <2 x i32>
+  %mmx_var.i = bitcast <2 x i32> %1 to x86_mmx
+  %mmx_var1.i = bitcast <2 x i32> %0 to x86_mmx
+  %2 = tail call x86_mmx @llvm.x86.mmx.punpckhdq(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
+  %3 = bitcast x86_mmx %2 to <2 x i32>
+  %4 = bitcast <2 x i32> %3 to <1 x i64>
+  %5 = extractelement <1 x i64> %4, i32 0
+  ret i64 %5
+}
+
+declare x86_mmx @llvm.x86.mmx.punpckhwd(x86_mmx, x86_mmx) nounwind readnone
+
+define i64 @test78(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
+; CHECK: punpckhwd
+entry:
+  %0 = bitcast <1 x i64> %b to <4 x i16>
+  %1 = bitcast <1 x i64> %a to <4 x i16>
+  %mmx_var.i = bitcast <4 x i16> %1 to x86_mmx
+  %mmx_var1.i = bitcast <4 x i16> %0 to x86_mmx
+  %2 = tail call x86_mmx @llvm.x86.mmx.punpckhwd(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
+  %3 = bitcast x86_mmx %2 to <4 x i16>
+  %4 = bitcast <4 x i16> %3 to <1 x i64>
+  %5 = extractelement <1 x i64> %4, i32 0
+  ret i64 %5
+}
+
+declare x86_mmx @llvm.x86.mmx.punpckhbw(x86_mmx, x86_mmx) nounwind readnone
+
+define i64 @test77(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
+; CHECK: punpckhbw
+entry:
+  %0 = bitcast <1 x i64> %b to <8 x i8>
+  %1 = bitcast <1 x i64> %a to <8 x i8>
+  %mmx_var.i = bitcast <8 x i8> %1 to x86_mmx
+  %mmx_var1.i = bitcast <8 x i8> %0 to x86_mmx
+  %2 = tail call x86_mmx @llvm.x86.mmx.punpckhbw(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
+  %3 = bitcast x86_mmx %2 to <8 x i8>
+  %4 = bitcast <8 x i8> %3 to <1 x i64>
+  %5 = extractelement <1 x i64> %4, i32 0
+  ret i64 %5
+}
+
+declare x86_mmx @llvm.x86.mmx.packuswb(x86_mmx, x86_mmx) nounwind readnone
+
+define i64 @test76(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
+; CHECK: packuswb
+entry:
+  %0 = bitcast <1 x i64> %b to <4 x i16>
+  %1 = bitcast <1 x i64> %a to <4 x i16>
+  %mmx_var.i = bitcast <4 x i16> %1 to x86_mmx
+  %mmx_var1.i = bitcast <4 x i16> %0 to x86_mmx
+  %2 = tail call x86_mmx @llvm.x86.mmx.packuswb(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
+  %3 = bitcast x86_mmx %2 to <8 x i8>
+  %4 = bitcast <8 x i8> %3 to <1 x i64>
+  %5 = extractelement <1 x i64> %4, i32 0
+  ret i64 %5
+}
+
+declare x86_mmx @llvm.x86.mmx.packssdw(x86_mmx, x86_mmx) nounwind readnone
+
+define i64 @test75(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
+; CHECK: packssdw
+entry:
+  %0 = bitcast <1 x i64> %b to <2 x i32>
+  %1 = bitcast <1 x i64> %a to <2 x i32>
+  %mmx_var.i = bitcast <2 x i32> %1 to x86_mmx
+  %mmx_var1.i = bitcast <2 x i32> %0 to x86_mmx
+  %2 = tail call x86_mmx @llvm.x86.mmx.packssdw(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
+  %3 = bitcast x86_mmx %2 to <4 x i16>
+  %4 = bitcast <4 x i16> %3 to <1 x i64>
+  %5 = extractelement <1 x i64> %4, i32 0
+  ret i64 %5
+}
+
+declare x86_mmx @llvm.x86.mmx.packsswb(x86_mmx, x86_mmx) nounwind readnone
+
+define i64 @test74(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
+; CHECK: packsswb
+entry:
+  %0 = bitcast <1 x i64> %b to <4 x i16>
+  %1 = bitcast <1 x i64> %a to <4 x i16>
+  %mmx_var.i = bitcast <4 x i16> %1 to x86_mmx
+  %mmx_var1.i = bitcast <4 x i16> %0 to x86_mmx
+  %2 = tail call x86_mmx @llvm.x86.mmx.packsswb(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
+  %3 = bitcast x86_mmx %2 to <8 x i8>
+  %4 = bitcast <8 x i8> %3 to <1 x i64>
+  %5 = extractelement <1 x i64> %4, i32 0
+  ret i64 %5
+}
+
+declare x86_mmx @llvm.x86.mmx.psrai.d(x86_mmx, i32) nounwind readnone
+
+define i64 @test73(<1 x i64> %a) nounwind readnone optsize ssp {
+; CHECK: psrad
+entry:
+  %0 = bitcast <1 x i64> %a to <2 x i32>
+  %mmx_var.i = bitcast <2 x i32> %0 to x86_mmx
+  %1 = tail call x86_mmx @llvm.x86.mmx.psrai.d(x86_mmx %mmx_var.i, i32 3) nounwind
+  %2 = bitcast x86_mmx %1 to <2 x i32>
+  %3 = bitcast <2 x i32> %2 to <1 x i64>
+  %4 = extractelement <1 x i64> %3, i32 0
+  ret i64 %4
+}
+
+declare x86_mmx @llvm.x86.mmx.psrai.w(x86_mmx, i32) nounwind readnone
+
+define i64 @test72(<1 x i64> %a) nounwind readnone optsize ssp {
+; CHECK: psraw
+entry:
+  %0 = bitcast <1 x i64> %a to <4 x i16>
+  %mmx_var.i = bitcast <4 x i16> %0 to x86_mmx
+  %1 = tail call x86_mmx @llvm.x86.mmx.psrai.w(x86_mmx %mmx_var.i, i32 3) nounwind
+  %2 = bitcast x86_mmx %1 to <4 x i16>
+  %3 = bitcast <4 x i16> %2 to <1 x i64>
+  %4 = extractelement <1 x i64> %3, i32 0
+  ret i64 %4
+}
+
+declare x86_mmx @llvm.x86.mmx.psrli.q(x86_mmx, i32) nounwind readnone
+
+define i64 @test71(<1 x i64> %a) nounwind readnone optsize ssp {
+; CHECK: psrlq
+entry:
+  %0 = extractelement <1 x i64> %a, i32 0
+  %mmx_var.i = bitcast i64 %0 to x86_mmx
+  %1 = tail call x86_mmx @llvm.x86.mmx.psrli.q(x86_mmx %mmx_var.i, i32 3) nounwind
+  %2 = bitcast x86_mmx %1 to i64
+  ret i64 %2
+}
+
+declare x86_mmx @llvm.x86.mmx.psrli.d(x86_mmx, i32) nounwind readnone
+
+define i64 @test70(<1 x i64> %a) nounwind readnone optsize ssp {
+; CHECK: psrld
+entry:
+  %0 = bitcast <1 x i64> %a to <2 x i32>
+  %mmx_var.i = bitcast <2 x i32> %0 to x86_mmx
+  %1 = tail call x86_mmx @llvm.x86.mmx.psrli.d(x86_mmx %mmx_var.i, i32 3) nounwind
+  %2 = bitcast x86_mmx %1 to <2 x i32>
+  %3 = bitcast <2 x i32> %2 to <1 x i64>
+  %4 = extractelement <1 x i64> %3, i32 0
+  ret i64 %4
+}
+
+declare x86_mmx @llvm.x86.mmx.psrli.w(x86_mmx, i32) nounwind readnone
+
+define i64 @test69(<1 x i64> %a) nounwind readnone optsize ssp {
+; CHECK: psrlw
+entry:
+  %0 = bitcast <1 x i64> %a to <4 x i16>
+  %mmx_var.i = bitcast <4 x i16> %0 to x86_mmx
+  %1 = tail call x86_mmx @llvm.x86.mmx.psrli.w(x86_mmx %mmx_var.i, i32 3) nounwind
+  %2 = bitcast x86_mmx %1 to <4 x i16>
+  %3 = bitcast <4 x i16> %2 to <1 x i64>
+  %4 = extractelement <1 x i64> %3, i32 0
+  ret i64 %4
+}
+
+declare x86_mmx @llvm.x86.mmx.pslli.q(x86_mmx, i32) nounwind readnone
+
+define i64 @test68(<1 x i64> %a) nounwind readnone optsize ssp {
+; CHECK: psllq
+entry:
+  %0 = extractelement <1 x i64> %a, i32 0
+  %mmx_var.i = bitcast i64 %0 to x86_mmx
+  %1 = tail call x86_mmx @llvm.x86.mmx.pslli.q(x86_mmx %mmx_var.i, i32 3) nounwind
+  %2 = bitcast x86_mmx %1 to i64
+  ret i64 %2
+}
+
+declare x86_mmx @llvm.x86.mmx.pslli.d(x86_mmx, i32) nounwind readnone
+
+define i64 @test67(<1 x i64> %a) nounwind readnone optsize ssp {
+; CHECK: pslld
+entry:
+  %0 = bitcast <1 x i64> %a to <2 x i32>
+  %mmx_var.i = bitcast <2 x i32> %0 to x86_mmx
+  %1 = tail call x86_mmx @llvm.x86.mmx.pslli.d(x86_mmx %mmx_var.i, i32 3) nounwind
+  %2 = bitcast x86_mmx %1 to <2 x i32>
+  %3 = bitcast <2 x i32> %2 to <1 x i64>
+  %4 = extractelement <1 x i64> %3, i32 0
+  ret i64 %4
+}
+
+declare x86_mmx @llvm.x86.mmx.pslli.w(x86_mmx, i32) nounwind readnone
+
+define i64 @test66(<1 x i64> %a) nounwind readnone optsize ssp {
+; CHECK: psllw
+entry:
+  %0 = bitcast <1 x i64> %a to <4 x i16>
+  %mmx_var.i = bitcast <4 x i16> %0 to x86_mmx
+  %1 = tail call x86_mmx @llvm.x86.mmx.pslli.w(x86_mmx %mmx_var.i, i32 3) nounwind
+  %2 = bitcast x86_mmx %1 to <4 x i16>
+  %3 = bitcast <4 x i16> %2 to <1 x i64>
+  %4 = extractelement <1 x i64> %3, i32 0
+  ret i64 %4
+}
+
+declare x86_mmx @llvm.x86.mmx.psra.d(x86_mmx, x86_mmx) nounwind readnone
+
+define i64 @test65(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
+; CHECK: psrad
+entry:
+  %0 = bitcast <1 x i64> %a to <2 x i32>
+  %mmx_var.i = bitcast <2 x i32> %0 to x86_mmx
+  %1 = extractelement <1 x i64> %b, i32 0
+  %mmx_var1.i = bitcast i64 %1 to x86_mmx
+  %2 = tail call x86_mmx @llvm.x86.mmx.psra.d(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
+  %3 = bitcast x86_mmx %2 to <2 x i32>
+  %4 = bitcast <2 x i32> %3 to <1 x i64>
+  %5 = extractelement <1 x i64> %4, i32 0
+  ret i64 %5
+}
+
+declare x86_mmx @llvm.x86.mmx.psra.w(x86_mmx, x86_mmx) nounwind readnone
+
+define i64 @test64(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
+; CHECK: psraw
+entry:
+  %0 = bitcast <1 x i64> %a to <4 x i16>
+  %mmx_var.i = bitcast <4 x i16> %0 to x86_mmx
+  %1 = extractelement <1 x i64> %b, i32 0
+  %mmx_var1.i = bitcast i64 %1 to x86_mmx
+  %2 = tail call x86_mmx @llvm.x86.mmx.psra.w(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
+  %3 = bitcast x86_mmx %2 to <4 x i16>
+  %4 = bitcast <4 x i16> %3 to <1 x i64>
+  %5 = extractelement <1 x i64> %4, i32 0
+  ret i64 %5
+}
+
+declare x86_mmx @llvm.x86.mmx.psrl.q(x86_mmx, x86_mmx) nounwind readnone
+
+define i64 @test63(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
+; CHECK: psrlq
+entry:
+  %0 = extractelement <1 x i64> %a, i32 0
+  %mmx_var.i = bitcast i64 %0 to x86_mmx
+  %1 = extractelement <1 x i64> %b, i32 0
+  %mmx_var1.i = bitcast i64 %1 to x86_mmx
+  %2 = tail call x86_mmx @llvm.x86.mmx.psrl.q(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
+  %3 = bitcast x86_mmx %2 to i64
+  ret i64 %3
+}
+
+declare x86_mmx @llvm.x86.mmx.psrl.d(x86_mmx, x86_mmx) nounwind readnone
+
+define i64 @test62(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
+; CHECK: psrld
+entry:
+  %0 = bitcast <1 x i64> %a to <2 x i32>
+  %mmx_var.i = bitcast <2 x i32> %0 to x86_mmx
+  %1 = extractelement <1 x i64> %b, i32 0
+  %mmx_var1.i = bitcast i64 %1 to x86_mmx
+  %2 = tail call x86_mmx @llvm.x86.mmx.psrl.d(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
+  %3 = bitcast x86_mmx %2 to <2 x i32>
+  %4 = bitcast <2 x i32> %3 to <1 x i64>
+  %5 = extractelement <1 x i64> %4, i32 0
+  ret i64 %5
+}
+
+declare x86_mmx @llvm.x86.mmx.psrl.w(x86_mmx, x86_mmx) nounwind readnone
+
+define i64 @test61(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
+; CHECK: psrlw
+entry:
+  %0 = bitcast <1 x i64> %a to <4 x i16>
+  %mmx_var.i = bitcast <4 x i16> %0 to x86_mmx
+  %1 = extractelement <1 x i64> %b, i32 0
+  %mmx_var1.i = bitcast i64 %1 to x86_mmx
+  %2 = tail call x86_mmx @llvm.x86.mmx.psrl.w(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
+  %3 = bitcast x86_mmx %2 to <4 x i16>
+  %4 = bitcast <4 x i16> %3 to <1 x i64>
+  %5 = extractelement <1 x i64> %4, i32 0
+  ret i64 %5
+}
+
+declare x86_mmx @llvm.x86.mmx.psll.q(x86_mmx, x86_mmx) nounwind readnone
+
+define i64 @test60(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
+; CHECK: psllq
+entry:
+  %0 = extractelement <1 x i64> %a, i32 0
+  %mmx_var.i = bitcast i64 %0 to x86_mmx
+  %1 = extractelement <1 x i64> %b, i32 0
+  %mmx_var1.i = bitcast i64 %1 to x86_mmx
+  %2 = tail call x86_mmx @llvm.x86.mmx.psll.q(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
+  %3 = bitcast x86_mmx %2 to i64
+  ret i64 %3
+}
+
+declare x86_mmx @llvm.x86.mmx.psll.d(x86_mmx, x86_mmx) nounwind readnone
+
+define i64 @test59(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
+; CHECK: pslld
+entry:
+  %0 = bitcast <1 x i64> %a to <2 x i32>
+  %mmx_var.i = bitcast <2 x i32> %0 to x86_mmx
+  %1 = extractelement <1 x i64> %b, i32 0
+  %mmx_var1.i = bitcast i64 %1 to x86_mmx
+  %2 = tail call x86_mmx @llvm.x86.mmx.psll.d(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
+  %3 = bitcast x86_mmx %2 to <2 x i32>
+  %4 = bitcast <2 x i32> %3 to <1 x i64>
+  %5 = extractelement <1 x i64> %4, i32 0
+  ret i64 %5
+}
+
+declare x86_mmx @llvm.x86.mmx.psll.w(x86_mmx, x86_mmx) nounwind readnone
+
+define i64 @test58(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
+; CHECK: psllw
+entry:
+  %0 = bitcast <1 x i64> %a to <4 x i16>
+  %mmx_var.i = bitcast <4 x i16> %0 to x86_mmx
+  %1 = extractelement <1 x i64> %b, i32 0
+  %mmx_var1.i = bitcast i64 %1 to x86_mmx
+  %2 = tail call x86_mmx @llvm.x86.mmx.psll.w(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
+  %3 = bitcast x86_mmx %2 to <4 x i16>
+  %4 = bitcast <4 x i16> %3 to <1 x i64>
+  %5 = extractelement <1 x i64> %4, i32 0
+  ret i64 %5
+}
+
+declare x86_mmx @llvm.x86.mmx.pxor(x86_mmx, x86_mmx) nounwind readnone
+
+define i64 @test56(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
+; CHECK: pxor
+entry:
+  %0 = bitcast <1 x i64> %b to <2 x i32>
+  %1 = bitcast <1 x i64> %a to <2 x i32>
+  %mmx_var.i = bitcast <2 x i32> %1 to x86_mmx
+  %mmx_var1.i = bitcast <2 x i32> %0 to x86_mmx
+  %2 = tail call x86_mmx @llvm.x86.mmx.pxor(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
+  %3 = bitcast x86_mmx %2 to <2 x i32>
+  %4 = bitcast <2 x i32> %3 to <1 x i64>
+  %5 = extractelement <1 x i64> %4, i32 0
+  ret i64 %5
+}
+
+declare x86_mmx @llvm.x86.mmx.por(x86_mmx, x86_mmx) nounwind readnone
+
+define i64 @test55(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
+; CHECK: por
+entry:
+  %0 = bitcast <1 x i64> %b to <2 x i32>
+  %1 = bitcast <1 x i64> %a to <2 x i32>
+  %mmx_var.i = bitcast <2 x i32> %1 to x86_mmx
+  %mmx_var1.i = bitcast <2 x i32> %0 to x86_mmx
+  %2 = tail call x86_mmx @llvm.x86.mmx.por(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
+  %3 = bitcast x86_mmx %2 to <2 x i32>
+  %4 = bitcast <2 x i32> %3 to <1 x i64>
+  %5 = extractelement <1 x i64> %4, i32 0
+  ret i64 %5
+}
+
+declare x86_mmx @llvm.x86.mmx.pandn(x86_mmx, x86_mmx) nounwind readnone
+
+define i64 @test54(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
+; CHECK: pandn
+entry:
+  %0 = bitcast <1 x i64> %b to <2 x i32>
+  %1 = bitcast <1 x i64> %a to <2 x i32>
+  %mmx_var.i = bitcast <2 x i32> %1 to x86_mmx
+  %mmx_var1.i = bitcast <2 x i32> %0 to x86_mmx
+  %2 = tail call x86_mmx @llvm.x86.mmx.pandn(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
+  %3 = bitcast x86_mmx %2 to <2 x i32>
+  %4 = bitcast <2 x i32> %3 to <1 x i64>
+  %5 = extractelement <1 x i64> %4, i32 0
+  ret i64 %5
+}
+
+declare x86_mmx @llvm.x86.mmx.pand(x86_mmx, x86_mmx) nounwind readnone
+
+define i64 @test53(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
+; CHECK: pand
+entry:
+  %0 = bitcast <1 x i64> %b to <2 x i32>
+  %1 = bitcast <1 x i64> %a to <2 x i32>
+  %mmx_var.i = bitcast <2 x i32> %1 to x86_mmx
+  %mmx_var1.i = bitcast <2 x i32> %0 to x86_mmx
+  %2 = tail call x86_mmx @llvm.x86.mmx.pand(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
+  %3 = bitcast x86_mmx %2 to <2 x i32>
+  %4 = bitcast <2 x i32> %3 to <1 x i64>
+  %5 = extractelement <1 x i64> %4, i32 0
+  ret i64 %5
+}
+
+declare x86_mmx @llvm.x86.mmx.pmull.w(x86_mmx, x86_mmx) nounwind readnone
+
+define i64 @test52(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
+; CHECK: pmullw
+entry:
+  %0 = bitcast <1 x i64> %b to <4 x i16>
+  %1 = bitcast <1 x i64> %a to <4 x i16>
+  %mmx_var.i = bitcast <4 x i16> %1 to x86_mmx
+  %mmx_var1.i = bitcast <4 x i16> %0 to x86_mmx
+  %2 = tail call x86_mmx @llvm.x86.mmx.pmull.w(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
+  %3 = bitcast x86_mmx %2 to <4 x i16>
+  %4 = bitcast <4 x i16> %3 to <1 x i64>
+  %5 = extractelement <1 x i64> %4, i32 0
+  ret i64 %5
+}
+
+define i64 @test51(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
+; CHECK: pmullw
+entry:
+  %0 = bitcast <1 x i64> %b to <4 x i16>
+  %1 = bitcast <1 x i64> %a to <4 x i16>
+  %mmx_var.i = bitcast <4 x i16> %1 to x86_mmx
+  %mmx_var1.i = bitcast <4 x i16> %0 to x86_mmx
+  %2 = tail call x86_mmx @llvm.x86.mmx.pmull.w(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
+  %3 = bitcast x86_mmx %2 to <4 x i16>
+  %4 = bitcast <4 x i16> %3 to <1 x i64>
+  %5 = extractelement <1 x i64> %4, i32 0
+  ret i64 %5
+}
+
+declare x86_mmx @llvm.x86.mmx.pmulh.w(x86_mmx, x86_mmx) nounwind readnone
+
+define i64 @test50(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
+; CHECK: pmulhw
+entry:
+  %0 = bitcast <1 x i64> %b to <4 x i16>
+  %1 = bitcast <1 x i64> %a to <4 x i16>
+  %mmx_var.i = bitcast <4 x i16> %1 to x86_mmx
+  %mmx_var1.i = bitcast <4 x i16> %0 to x86_mmx
+  %2 = tail call x86_mmx @llvm.x86.mmx.pmulh.w(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
+  %3 = bitcast x86_mmx %2 to <4 x i16>
+  %4 = bitcast <4 x i16> %3 to <1 x i64>
+  %5 = extractelement <1 x i64> %4, i32 0
+  ret i64 %5
+}
+
+declare x86_mmx @llvm.x86.mmx.pmadd.wd(x86_mmx, x86_mmx) nounwind readnone
+
+define i64 @test49(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
+; CHECK: pmaddwd
+entry:
+  %0 = bitcast <1 x i64> %b to <4 x i16>
+  %1 = bitcast <1 x i64> %a to <4 x i16>
+  %mmx_var.i = bitcast <4 x i16> %1 to x86_mmx
+  %mmx_var1.i = bitcast <4 x i16> %0 to x86_mmx
+  %2 = tail call x86_mmx @llvm.x86.mmx.pmadd.wd(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
+  %3 = bitcast x86_mmx %2 to <2 x i32>
+  %4 = bitcast <2 x i32> %3 to <1 x i64>
+  %5 = extractelement <1 x i64> %4, i32 0
+  ret i64 %5
+}
+
+declare x86_mmx @llvm.x86.mmx.psubus.w(x86_mmx, x86_mmx) nounwind readnone
+
+define i64 @test48(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
+; CHECK: psubusw
+entry:
+  %0 = bitcast <1 x i64> %b to <4 x i16>
+  %1 = bitcast <1 x i64> %a to <4 x i16>
+  %mmx_var.i = bitcast <4 x i16> %1 to x86_mmx
+  %mmx_var1.i = bitcast <4 x i16> %0 to x86_mmx
+  %2 = tail call x86_mmx @llvm.x86.mmx.psubus.w(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
+  %3 = bitcast x86_mmx %2 to <4 x i16>
+  %4 = bitcast <4 x i16> %3 to <1 x i64>
+  %5 = extractelement <1 x i64> %4, i32 0
+  ret i64 %5
+}
+
+declare x86_mmx @llvm.x86.mmx.psubus.b(x86_mmx, x86_mmx) nounwind readnone
+
+define i64 @test47(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
+; CHECK: psubusb
+entry:
+  %0 = bitcast <1 x i64> %b to <8 x i8>
+  %1 = bitcast <1 x i64> %a to <8 x i8>
+  %mmx_var.i = bitcast <8 x i8> %1 to x86_mmx
+  %mmx_var1.i = bitcast <8 x i8> %0 to x86_mmx
+  %2 = tail call x86_mmx @llvm.x86.mmx.psubus.b(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
+  %3 = bitcast x86_mmx %2 to <8 x i8>
+  %4 = bitcast <8 x i8> %3 to <1 x i64>
+  %5 = extractelement <1 x i64> %4, i32 0
+  ret i64 %5
+}
+
+declare x86_mmx @llvm.x86.mmx.psubs.w(x86_mmx, x86_mmx) nounwind readnone
+
+define i64 @test46(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
+; CHECK: psubsw
+entry:
+  %0 = bitcast <1 x i64> %b to <4 x i16>
+  %1 = bitcast <1 x i64> %a to <4 x i16>
+  %mmx_var.i = bitcast <4 x i16> %1 to x86_mmx
+  %mmx_var1.i = bitcast <4 x i16> %0 to x86_mmx
+  %2 = tail call x86_mmx @llvm.x86.mmx.psubs.w(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
+  %3 = bitcast x86_mmx %2 to <4 x i16>
+  %4 = bitcast <4 x i16> %3 to <1 x i64>
+  %5 = extractelement <1 x i64> %4, i32 0
+  ret i64 %5
+}
+
+declare x86_mmx @llvm.x86.mmx.psubs.b(x86_mmx, x86_mmx) nounwind readnone
+
+define i64 @test45(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
+; CHECK: psubsb
+entry:
+  %0 = bitcast <1 x i64> %b to <8 x i8>
+  %1 = bitcast <1 x i64> %a to <8 x i8>
+  %mmx_var.i = bitcast <8 x i8> %1 to x86_mmx
+  %mmx_var1.i = bitcast <8 x i8> %0 to x86_mmx
+  %2 = tail call x86_mmx @llvm.x86.mmx.psubs.b(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
+  %3 = bitcast x86_mmx %2 to <8 x i8>
+  %4 = bitcast <8 x i8> %3 to <1 x i64>
+  %5 = extractelement <1 x i64> %4, i32 0
+  ret i64 %5
+}
+
+define i64 @test44(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
+; CHECK: psubq
+entry:
+  %0 = extractelement <1 x i64> %a, i32 0
+  %mmx_var = bitcast i64 %0 to x86_mmx
+  %1 = extractelement <1 x i64> %b, i32 0
+  %mmx_var1 = bitcast i64 %1 to x86_mmx
+  %2 = tail call x86_mmx @llvm.x86.mmx.psub.q(x86_mmx %mmx_var, x86_mmx %mmx_var1)
+  %3 = bitcast x86_mmx %2 to i64
+  ret i64 %3
+}
+
+declare x86_mmx @llvm.x86.mmx.psub.q(x86_mmx, x86_mmx) nounwind readnone
+
+declare x86_mmx @llvm.x86.mmx.psub.d(x86_mmx, x86_mmx) nounwind readnone
+
+define i64 @test43(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
+; CHECK: psubd
+entry:
+  %0 = bitcast <1 x i64> %b to <2 x i32>
+  %1 = bitcast <1 x i64> %a to <2 x i32>
+  %mmx_var.i = bitcast <2 x i32> %1 to x86_mmx
+  %mmx_var1.i = bitcast <2 x i32> %0 to x86_mmx
+  %2 = tail call x86_mmx @llvm.x86.mmx.psub.d(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
+  %3 = bitcast x86_mmx %2 to <2 x i32>
+  %4 = bitcast <2 x i32> %3 to <1 x i64>
+  %5 = extractelement <1 x i64> %4, i32 0
+  ret i64 %5
+}
+
+declare x86_mmx @llvm.x86.mmx.psub.w(x86_mmx, x86_mmx) nounwind readnone
+
+define i64 @test42(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
+; CHECK: psubw
+entry:
+  %0 = bitcast <1 x i64> %b to <4 x i16>
+  %1 = bitcast <1 x i64> %a to <4 x i16>
+  %mmx_var.i = bitcast <4 x i16> %1 to x86_mmx
+  %mmx_var1.i = bitcast <4 x i16> %0 to x86_mmx
+  %2 = tail call x86_mmx @llvm.x86.mmx.psub.w(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
+  %3 = bitcast x86_mmx %2 to <4 x i16>
+  %4 = bitcast <4 x i16> %3 to <1 x i64>
+  %5 = extractelement <1 x i64> %4, i32 0
+  ret i64 %5
+}
+
+declare x86_mmx @llvm.x86.mmx.psub.b(x86_mmx, x86_mmx) nounwind readnone
+
+define i64 @test41(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
+; CHECK: psubb
+entry:
+  %0 = bitcast <1 x i64> %b to <8 x i8>
+  %1 = bitcast <1 x i64> %a to <8 x i8>
+  %mmx_var.i = bitcast <8 x i8> %1 to x86_mmx
+  %mmx_var1.i = bitcast <8 x i8> %0 to x86_mmx
+  %2 = tail call x86_mmx @llvm.x86.mmx.psub.b(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
+  %3 = bitcast x86_mmx %2 to <8 x i8>
+  %4 = bitcast <8 x i8> %3 to <1 x i64>
+  %5 = extractelement <1 x i64> %4, i32 0
+  ret i64 %5
+}
+
+declare x86_mmx @llvm.x86.mmx.paddus.w(x86_mmx, x86_mmx) nounwind readnone
+
+define i64 @test40(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
+; CHECK: paddusw
+entry:
+  %0 = bitcast <1 x i64> %b to <4 x i16>
+  %1 = bitcast <1 x i64> %a to <4 x i16>
+  %mmx_var.i = bitcast <4 x i16> %1 to x86_mmx
+  %mmx_var1.i = bitcast <4 x i16> %0 to x86_mmx
+  %2 = tail call x86_mmx @llvm.x86.mmx.paddus.w(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
+  %3 = bitcast x86_mmx %2 to <4 x i16>
+  %4 = bitcast <4 x i16> %3 to <1 x i64>
+  %5 = extractelement <1 x i64> %4, i32 0
+  ret i64 %5
+}
+
+declare x86_mmx @llvm.x86.mmx.paddus.b(x86_mmx, x86_mmx) nounwind readnone
+
+define i64 @test39(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
+; CHECK: paddusb
+entry:
+  %0 = bitcast <1 x i64> %b to <8 x i8>
+  %1 = bitcast <1 x i64> %a to <8 x i8>
+  %mmx_var.i = bitcast <8 x i8> %1 to x86_mmx
+  %mmx_var1.i = bitcast <8 x i8> %0 to x86_mmx
+  %2 = tail call x86_mmx @llvm.x86.mmx.paddus.b(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
+  %3 = bitcast x86_mmx %2 to <8 x i8>
+  %4 = bitcast <8 x i8> %3 to <1 x i64>
+  %5 = extractelement <1 x i64> %4, i32 0
+  ret i64 %5
+}
+
+declare x86_mmx @llvm.x86.mmx.padds.w(x86_mmx, x86_mmx) nounwind readnone
+
+define i64 @test38(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
+; CHECK: paddsw
+entry:
+  %0 = bitcast <1 x i64> %b to <4 x i16>
+  %1 = bitcast <1 x i64> %a to <4 x i16>
+  %mmx_var.i = bitcast <4 x i16> %1 to x86_mmx
+  %mmx_var1.i = bitcast <4 x i16> %0 to x86_mmx
+  %2 = tail call x86_mmx @llvm.x86.mmx.padds.w(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
+  %3 = bitcast x86_mmx %2 to <4 x i16>
+  %4 = bitcast <4 x i16> %3 to <1 x i64>
+  %5 = extractelement <1 x i64> %4, i32 0
+  ret i64 %5
+}
+
+declare x86_mmx @llvm.x86.mmx.padds.b(x86_mmx, x86_mmx) nounwind readnone
+
+define i64 @test37(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
+; CHECK: paddsb
+entry:
+  %0 = bitcast <1 x i64> %b to <8 x i8>
+  %1 = bitcast <1 x i64> %a to <8 x i8>
+  %mmx_var.i = bitcast <8 x i8> %1 to x86_mmx
+  %mmx_var1.i = bitcast <8 x i8> %0 to x86_mmx
+  %2 = tail call x86_mmx @llvm.x86.mmx.padds.b(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
+  %3 = bitcast x86_mmx %2 to <8 x i8>
+  %4 = bitcast <8 x i8> %3 to <1 x i64>
+  %5 = extractelement <1 x i64> %4, i32 0
+  ret i64 %5
+}
+
+declare x86_mmx @llvm.x86.mmx.padd.q(x86_mmx, x86_mmx) nounwind readnone
+
+define i64 @test36(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
+; CHECK: paddq
+entry:
+  %0 = extractelement <1 x i64> %a, i32 0
+  %mmx_var = bitcast i64 %0 to x86_mmx
+  %1 = extractelement <1 x i64> %b, i32 0
+  %mmx_var1 = bitcast i64 %1 to x86_mmx
+  %2 = tail call x86_mmx @llvm.x86.mmx.padd.q(x86_mmx %mmx_var, x86_mmx %mmx_var1)
+  %3 = bitcast x86_mmx %2 to i64
+  ret i64 %3
+}
+
+declare x86_mmx @llvm.x86.mmx.padd.d(x86_mmx, x86_mmx) nounwind readnone
+
+define i64 @test35(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
+; CHECK: paddd
+entry:
+  %0 = bitcast <1 x i64> %b to <2 x i32>
+  %1 = bitcast <1 x i64> %a to <2 x i32>
+  %mmx_var.i = bitcast <2 x i32> %1 to x86_mmx
+  %mmx_var1.i = bitcast <2 x i32> %0 to x86_mmx
+  %2 = tail call x86_mmx @llvm.x86.mmx.padd.d(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
+  %3 = bitcast x86_mmx %2 to <2 x i32>
+  %4 = bitcast <2 x i32> %3 to <1 x i64>
+  %5 = extractelement <1 x i64> %4, i32 0
+  ret i64 %5
+}
+
+declare x86_mmx @llvm.x86.mmx.padd.w(x86_mmx, x86_mmx) nounwind readnone
+
+define i64 @test34(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
+; CHECK: paddw
+entry:
+  %0 = bitcast <1 x i64> %b to <4 x i16>
+  %1 = bitcast <1 x i64> %a to <4 x i16>
+  %mmx_var.i = bitcast <4 x i16> %1 to x86_mmx
+  %mmx_var1.i = bitcast <4 x i16> %0 to x86_mmx
+  %2 = tail call x86_mmx @llvm.x86.mmx.padd.w(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
+  %3 = bitcast x86_mmx %2 to <4 x i16>
+  %4 = bitcast <4 x i16> %3 to <1 x i64>
+  %5 = extractelement <1 x i64> %4, i32 0
+  ret i64 %5
+}
+
+declare x86_mmx @llvm.x86.mmx.padd.b(x86_mmx, x86_mmx) nounwind readnone
+
+define i64 @test33(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
+; CHECK: paddb
+entry:
+  %0 = bitcast <1 x i64> %b to <8 x i8>
+  %1 = bitcast <1 x i64> %a to <8 x i8>
+  %mmx_var.i = bitcast <8 x i8> %1 to x86_mmx
+  %mmx_var1.i = bitcast <8 x i8> %0 to x86_mmx
+  %2 = tail call x86_mmx @llvm.x86.mmx.padd.b(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
+  %3 = bitcast x86_mmx %2 to <8 x i8>
+  %4 = bitcast <8 x i8> %3 to <1 x i64>
+  %5 = extractelement <1 x i64> %4, i32 0
+  ret i64 %5
+}
+
+declare x86_mmx @llvm.x86.mmx.psad.bw(x86_mmx, x86_mmx) nounwind readnone
+
+define i64 @test32(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
+; CHECK: psadbw
+entry:
+  %0 = bitcast <1 x i64> %b to <8 x i8>
+  %1 = bitcast <1 x i64> %a to <8 x i8>
+  %mmx_var.i = bitcast <8 x i8> %1 to x86_mmx
+  %mmx_var1.i = bitcast <8 x i8> %0 to x86_mmx
+  %2 = tail call x86_mmx @llvm.x86.mmx.psad.bw(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
+  %3 = bitcast x86_mmx %2 to i64
+  ret i64 %3
+}
+
+declare x86_mmx @llvm.x86.mmx.pmins.w(x86_mmx, x86_mmx) nounwind readnone
+
+define i64 @test31(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
+; CHECK: pminsw
+entry:
+  %0 = bitcast <1 x i64> %b to <4 x i16>
+  %1 = bitcast <1 x i64> %a to <4 x i16>
+  %mmx_var.i = bitcast <4 x i16> %1 to x86_mmx
+  %mmx_var1.i = bitcast <4 x i16> %0 to x86_mmx
+  %2 = tail call x86_mmx @llvm.x86.mmx.pmins.w(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
+  %3 = bitcast x86_mmx %2 to <4 x i16>
+  %4 = bitcast <4 x i16> %3 to <1 x i64>
+  %5 = extractelement <1 x i64> %4, i32 0
+  ret i64 %5
+}
+
+declare x86_mmx @llvm.x86.mmx.pminu.b(x86_mmx, x86_mmx) nounwind readnone
+
+define i64 @test30(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
+; CHECK: pminub
+entry:
+  %0 = bitcast <1 x i64> %b to <8 x i8>
+  %1 = bitcast <1 x i64> %a to <8 x i8>
+  %mmx_var.i = bitcast <8 x i8> %1 to x86_mmx
+  %mmx_var1.i = bitcast <8 x i8> %0 to x86_mmx
+  %2 = tail call x86_mmx @llvm.x86.mmx.pminu.b(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
+  %3 = bitcast x86_mmx %2 to <8 x i8>
+  %4 = bitcast <8 x i8> %3 to <1 x i64>
+  %5 = extractelement <1 x i64> %4, i32 0
+  ret i64 %5
+}
+
+declare x86_mmx @llvm.x86.mmx.pmaxs.w(x86_mmx, x86_mmx) nounwind readnone
+
+define i64 @test29(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
+; CHECK: pmaxsw
+entry:
+  %0 = bitcast <1 x i64> %b to <4 x i16>
+  %1 = bitcast <1 x i64> %a to <4 x i16>
+  %mmx_var.i = bitcast <4 x i16> %1 to x86_mmx
+  %mmx_var1.i = bitcast <4 x i16> %0 to x86_mmx
+  %2 = tail call x86_mmx @llvm.x86.mmx.pmaxs.w(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
+  %3 = bitcast x86_mmx %2 to <4 x i16>
+  %4 = bitcast <4 x i16> %3 to <1 x i64>
+  %5 = extractelement <1 x i64> %4, i32 0
+  ret i64 %5
+}
+
+declare x86_mmx @llvm.x86.mmx.pmaxu.b(x86_mmx, x86_mmx) nounwind readnone
+
+define i64 @test28(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
+; CHECK: pmaxub
+entry:
+  %0 = bitcast <1 x i64> %b to <8 x i8>
+  %1 = bitcast <1 x i64> %a to <8 x i8>
+  %mmx_var.i = bitcast <8 x i8> %1 to x86_mmx
+  %mmx_var1.i = bitcast <8 x i8> %0 to x86_mmx
+  %2 = tail call x86_mmx @llvm.x86.mmx.pmaxu.b(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
+  %3 = bitcast x86_mmx %2 to <8 x i8>
+  %4 = bitcast <8 x i8> %3 to <1 x i64>
+  %5 = extractelement <1 x i64> %4, i32 0
+  ret i64 %5
+}
+
+declare x86_mmx @llvm.x86.mmx.pavg.w(x86_mmx, x86_mmx) nounwind readnone
+
+define i64 @test27(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
+; CHECK: pavgw
+entry:
+  %0 = bitcast <1 x i64> %b to <4 x i16>
+  %1 = bitcast <1 x i64> %a to <4 x i16>
+  %mmx_var.i = bitcast <4 x i16> %1 to x86_mmx
+  %mmx_var1.i = bitcast <4 x i16> %0 to x86_mmx
+  %2 = tail call x86_mmx @llvm.x86.mmx.pavg.w(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
+  %3 = bitcast x86_mmx %2 to <4 x i16>
+  %4 = bitcast <4 x i16> %3 to <1 x i64>
+  %5 = extractelement <1 x i64> %4, i32 0
+  ret i64 %5
+}
+
+declare x86_mmx @llvm.x86.mmx.pavg.b(x86_mmx, x86_mmx) nounwind readnone
+
+define i64 @test26(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
+; CHECK: pavgb
+entry:
+  %0 = bitcast <1 x i64> %b to <8 x i8>
+  %1 = bitcast <1 x i64> %a to <8 x i8>
+  %mmx_var.i = bitcast <8 x i8> %1 to x86_mmx
+  %mmx_var1.i = bitcast <8 x i8> %0 to x86_mmx
+  %2 = tail call x86_mmx @llvm.x86.mmx.pavg.b(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
+  %3 = bitcast x86_mmx %2 to <8 x i8>
+  %4 = bitcast <8 x i8> %3 to <1 x i64>
+  %5 = extractelement <1 x i64> %4, i32 0
+  ret i64 %5
+}
+
+declare void @llvm.x86.mmx.movnt.dq(x86_mmx*, x86_mmx) nounwind
+
+define void @test25(<1 x i64>* %p, <1 x i64> %a) nounwind optsize ssp {
+; CHECK: movntq
+entry:
+  %mmx_ptr_var.i = bitcast <1 x i64>* %p to x86_mmx*
+  %0 = extractelement <1 x i64> %a, i32 0
+  %mmx_var.i = bitcast i64 %0 to x86_mmx
+  tail call void @llvm.x86.mmx.movnt.dq(x86_mmx* %mmx_ptr_var.i, x86_mmx %mmx_var.i) nounwind
+  ret void
+}
+
+declare i32 @llvm.x86.mmx.pmovmskb(x86_mmx) nounwind readnone
+
+define i32 @test24(<1 x i64> %a) nounwind readnone optsize ssp {
+; CHECK: pmovmskb
+entry:
+  %0 = bitcast <1 x i64> %a to <8 x i8>
+  %mmx_var.i = bitcast <8 x i8> %0 to x86_mmx
+  %1 = tail call i32 @llvm.x86.mmx.pmovmskb(x86_mmx %mmx_var.i) nounwind
+  ret i32 %1
+}
+
+declare void @llvm.x86.mmx.maskmovq(x86_mmx, x86_mmx, i8*) nounwind
+
+define void @test23(<1 x i64> %d, <1 x i64> %n, i8* %p) nounwind optsize ssp {
+; CHECK: maskmovq
+entry:
+  %0 = bitcast <1 x i64> %n to <8 x i8>
+  %1 = bitcast <1 x i64> %d to <8 x i8>
+  %mmx_var.i = bitcast <8 x i8> %1 to x86_mmx
+  %mmx_var1.i = bitcast <8 x i8> %0 to x86_mmx
+  tail call void @llvm.x86.mmx.maskmovq(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i, i8* %p) nounwind
+  ret void
+}
+
+declare x86_mmx @llvm.x86.mmx.pmulhu.w(x86_mmx, x86_mmx) nounwind readnone
+
+define i64 @test22(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
+; CHECK: pmulhuw
+entry:
+  %0 = bitcast <1 x i64> %b to <4 x i16>
+  %1 = bitcast <1 x i64> %a to <4 x i16>
+  %mmx_var.i = bitcast <4 x i16> %1 to x86_mmx
+  %mmx_var1.i = bitcast <4 x i16> %0 to x86_mmx
+  %2 = tail call x86_mmx @llvm.x86.mmx.pmulhu.w(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
+  %3 = bitcast x86_mmx %2 to <4 x i16>
+  %4 = bitcast <4 x i16> %3 to <1 x i64>
+  %5 = extractelement <1 x i64> %4, i32 0
+  ret i64 %5
+}
+
+declare x86_mmx @llvm.x86.sse.pshuf.w(x86_mmx, i8) nounwind readnone
+
+define i64 @test21(<1 x i64> %a) nounwind readnone optsize ssp {
+; CHECK: pshufw
+entry:
+  %0 = bitcast <1 x i64> %a to <4 x i16>
+  %1 = bitcast <4 x i16> %0 to x86_mmx
+  %2 = tail call x86_mmx @llvm.x86.sse.pshuf.w(x86_mmx %1, i8 3) nounwind readnone
+  %3 = bitcast x86_mmx %2 to <4 x i16>
+  %4 = bitcast <4 x i16> %3 to <1 x i64>
+  %5 = extractelement <1 x i64> %4, i32 0
+  ret i64 %5
+}
+
+declare x86_mmx @llvm.x86.mmx.pmulu.dq(x86_mmx, x86_mmx) nounwind readnone
+
+define i64 @test20(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
+; CHECK: pmuludq
+entry:
+  %0 = bitcast <1 x i64> %b to <2 x i32>
+  %1 = bitcast <1 x i64> %a to <2 x i32>
+  %mmx_var.i = bitcast <2 x i32> %1 to x86_mmx
+  %mmx_var1.i = bitcast <2 x i32> %0 to x86_mmx
+  %2 = tail call x86_mmx @llvm.x86.mmx.pmulu.dq(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
+  %3 = bitcast x86_mmx %2 to i64
+  ret i64 %3
+}
+
+declare <2 x double> @llvm.x86.sse.cvtpi2pd(x86_mmx) nounwind readnone
+
+define <2 x double> @test19(<1 x i64> %a) nounwind readnone optsize ssp {
+; CHECK: cvtpi2pd
+entry:
+  %0 = bitcast <1 x i64> %a to <2 x i32>
+  %1 = bitcast <2 x i32> %0 to x86_mmx
+  %2 = tail call <2 x double> @llvm.x86.sse.cvtpi2pd(x86_mmx %1) nounwind readnone
+  ret <2 x double> %2
+}
+
+declare x86_mmx @llvm.x86.sse.cvttpd2pi(<2 x double>) nounwind readnone
+
+define i64 @test18(<2 x double> %a) nounwind readnone optsize ssp {
+; CHECK: cvttpd2pi
+entry:
+  %0 = tail call x86_mmx @llvm.x86.sse.cvttpd2pi(<2 x double> %a) nounwind readnone
+  %1 = bitcast x86_mmx %0 to <2 x i32>
+  %2 = bitcast <2 x i32> %1 to <1 x i64>
+  %3 = extractelement <1 x i64> %2, i32 0
+  ret i64 %3
+}
+
+declare x86_mmx @llvm.x86.sse.cvtpd2pi(<2 x double>) nounwind readnone
+
+define i64 @test17(<2 x double> %a) nounwind readnone optsize ssp {
+; CHECK: cvtpd2pi
+entry:
+  %0 = tail call x86_mmx @llvm.x86.sse.cvtpd2pi(<2 x double> %a) nounwind readnone
+  %1 = bitcast x86_mmx %0 to <2 x i32>
+  %2 = bitcast <2 x i32> %1 to <1 x i64>
+  %3 = extractelement <1 x i64> %2, i32 0
+  ret i64 %3
+}
+
+declare x86_mmx @llvm.x86.mmx.palignr.b(x86_mmx, x86_mmx, i8) nounwind readnone
+
+define i64 @test16(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
+; CHECK: palignr
+entry:
+  %0 = extractelement <1 x i64> %a, i32 0
+  %mmx_var = bitcast i64 %0 to x86_mmx
+  %1 = extractelement <1 x i64> %b, i32 0
+  %mmx_var1 = bitcast i64 %1 to x86_mmx
+  %2 = tail call x86_mmx @llvm.x86.mmx.palignr.b(x86_mmx %mmx_var, x86_mmx %mmx_var1, i8 16)
+  %3 = bitcast x86_mmx %2 to i64
+  ret i64 %3
+}
+
+declare x86_mmx @llvm.x86.ssse3.pabs.d(x86_mmx) nounwind readnone
+
+define i64 @test15(<1 x i64> %a) nounwind readnone optsize ssp {
+; CHECK: pabsd
+entry:
+  %0 = bitcast <1 x i64> %a to <2 x i32>
+  %1 = bitcast <2 x i32> %0 to x86_mmx
+  %2 = tail call x86_mmx @llvm.x86.ssse3.pabs.d(x86_mmx %1) nounwind readnone
+  %3 = bitcast x86_mmx %2 to <2 x i32>
+  %4 = bitcast <2 x i32> %3 to <1 x i64>
+  %5 = extractelement <1 x i64> %4, i32 0
+  ret i64 %5
+}
+
+declare x86_mmx @llvm.x86.ssse3.pabs.w(x86_mmx) nounwind readnone
+
+define i64 @test14(<1 x i64> %a) nounwind readnone optsize ssp {
+; CHECK: pabsw
+entry:
+  %0 = bitcast <1 x i64> %a to <4 x i16>
+  %1 = bitcast <4 x i16> %0 to x86_mmx
+  %2 = tail call x86_mmx @llvm.x86.ssse3.pabs.w(x86_mmx %1) nounwind readnone
+  %3 = bitcast x86_mmx %2 to <4 x i16>
+  %4 = bitcast <4 x i16> %3 to <1 x i64>
+  %5 = extractelement <1 x i64> %4, i32 0
+  ret i64 %5
+}
+
+declare x86_mmx @llvm.x86.ssse3.pabs.b(x86_mmx) nounwind readnone
+
+define i64 @test13(<1 x i64> %a) nounwind readnone optsize ssp {
+; CHECK: pabsb
+entry:
+  %0 = bitcast <1 x i64> %a to <8 x i8>
+  %1 = bitcast <8 x i8> %0 to x86_mmx
+  %2 = tail call x86_mmx @llvm.x86.ssse3.pabs.b(x86_mmx %1) nounwind readnone
+  %3 = bitcast x86_mmx %2 to <8 x i8>
+  %4 = bitcast <8 x i8> %3 to <1 x i64>
+  %5 = extractelement <1 x i64> %4, i32 0
+  ret i64 %5
+}
+
+declare x86_mmx @llvm.x86.ssse3.psign.d(x86_mmx, x86_mmx) nounwind readnone
+
+define i64 @test12(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
+; CHECK: psignd
+entry:
+  %0 = bitcast <1 x i64> %b to <2 x i32>
+  %1 = bitcast <1 x i64> %a to <2 x i32>
+  %2 = bitcast <2 x i32> %1 to x86_mmx
+  %3 = bitcast <2 x i32> %0 to x86_mmx
+  %4 = tail call x86_mmx @llvm.x86.ssse3.psign.d(x86_mmx %2, x86_mmx %3) nounwind readnone
+  %5 = bitcast x86_mmx %4 to <2 x i32>
+  %6 = bitcast <2 x i32> %5 to <1 x i64>
+  %7 = extractelement <1 x i64> %6, i32 0
+  ret i64 %7
+}
+
+declare x86_mmx @llvm.x86.ssse3.psign.w(x86_mmx, x86_mmx) nounwind readnone
+
+define i64 @test11(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
+; CHECK: psignw
+entry:
+  %0 = bitcast <1 x i64> %b to <4 x i16>
+  %1 = bitcast <1 x i64> %a to <4 x i16>
+  %2 = bitcast <4 x i16> %1 to x86_mmx
+  %3 = bitcast <4 x i16> %0 to x86_mmx
+  %4 = tail call x86_mmx @llvm.x86.ssse3.psign.w(x86_mmx %2, x86_mmx %3) nounwind readnone
+  %5 = bitcast x86_mmx %4 to <4 x i16>
+  %6 = bitcast <4 x i16> %5 to <1 x i64>
+  %7 = extractelement <1 x i64> %6, i32 0
+  ret i64 %7
+}
+
+declare x86_mmx @llvm.x86.ssse3.psign.b(x86_mmx, x86_mmx) nounwind readnone
+
+define i64 @test10(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
+; CHECK: psignb
+entry:
+  %0 = bitcast <1 x i64> %b to <8 x i8>
+  %1 = bitcast <1 x i64> %a to <8 x i8>
+  %2 = bitcast <8 x i8> %1 to x86_mmx
+  %3 = bitcast <8 x i8> %0 to x86_mmx
+  %4 = tail call x86_mmx @llvm.x86.ssse3.psign.b(x86_mmx %2, x86_mmx %3) nounwind readnone
+  %5 = bitcast x86_mmx %4 to <8 x i8>
+  %6 = bitcast <8 x i8> %5 to <1 x i64>
+  %7 = extractelement <1 x i64> %6, i32 0
+  ret i64 %7
+}
+
+declare x86_mmx @llvm.x86.ssse3.pshuf.b(x86_mmx, x86_mmx) nounwind readnone
+
+define i64 @test9(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
+; CHECK: pshufb
+entry:
+  %0 = bitcast <1 x i64> %b to <8 x i8>
+  %1 = bitcast <1 x i64> %a to <8 x i8>
+  %2 = bitcast <8 x i8> %1 to x86_mmx
+  %3 = bitcast <8 x i8> %0 to x86_mmx
+  %4 = tail call x86_mmx @llvm.x86.ssse3.pshuf.b(x86_mmx %2, x86_mmx %3) nounwind readnone
+  %5 = bitcast x86_mmx %4 to <8 x i8>
+  %6 = bitcast <8 x i8> %5 to <1 x i64>
+  %7 = extractelement <1 x i64> %6, i32 0
+  ret i64 %7
+}
+
+declare x86_mmx @llvm.x86.ssse3.pmul.hr.sw(x86_mmx, x86_mmx) nounwind readnone
+
+define i64 @test8(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
+; CHECK: pmulhrsw
+entry:
+  %0 = bitcast <1 x i64> %b to <4 x i16>
+  %1 = bitcast <1 x i64> %a to <4 x i16>
+  %2 = bitcast <4 x i16> %1 to x86_mmx
+  %3 = bitcast <4 x i16> %0 to x86_mmx
+  %4 = tail call x86_mmx @llvm.x86.ssse3.pmul.hr.sw(x86_mmx %2, x86_mmx %3) nounwind readnone
+  %5 = bitcast x86_mmx %4 to <4 x i16>
+  %6 = bitcast <4 x i16> %5 to <1 x i64>
+  %7 = extractelement <1 x i64> %6, i32 0
+  ret i64 %7
+}
+
+declare x86_mmx @llvm.x86.ssse3.pmadd.ub.sw(x86_mmx, x86_mmx) nounwind readnone
+
+define i64 @test7(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
+; CHECK: pmaddubsw
+entry:
+  %0 = bitcast <1 x i64> %b to <8 x i8>
+  %1 = bitcast <1 x i64> %a to <8 x i8>
+  %2 = bitcast <8 x i8> %1 to x86_mmx
+  %3 = bitcast <8 x i8> %0 to x86_mmx
+  %4 = tail call x86_mmx @llvm.x86.ssse3.pmadd.ub.sw(x86_mmx %2, x86_mmx %3) nounwind readnone
+  %5 = bitcast x86_mmx %4 to <8 x i8>
+  %6 = bitcast <8 x i8> %5 to <1 x i64>
+  %7 = extractelement <1 x i64> %6, i32 0
+  ret i64 %7
+}
+
+declare x86_mmx @llvm.x86.ssse3.phsub.sw(x86_mmx, x86_mmx) nounwind readnone
+
+define i64 @test6(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
+; CHECK: phsubsw
+entry:
+  %0 = bitcast <1 x i64> %b to <4 x i16>
+  %1 = bitcast <1 x i64> %a to <4 x i16>
+  %2 = bitcast <4 x i16> %1 to x86_mmx
+  %3 = bitcast <4 x i16> %0 to x86_mmx
+  %4 = tail call x86_mmx @llvm.x86.ssse3.phsub.sw(x86_mmx %2, x86_mmx %3) nounwind readnone
+  %5 = bitcast x86_mmx %4 to <4 x i16>
+  %6 = bitcast <4 x i16> %5 to <1 x i64>
+  %7 = extractelement <1 x i64> %6, i32 0
+  ret i64 %7
+}
+
+declare x86_mmx @llvm.x86.ssse3.phsub.d(x86_mmx, x86_mmx) nounwind readnone
+
+define i64 @test5(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
+; CHECK: phsubd
+entry:
+  %0 = bitcast <1 x i64> %b to <2 x i32>
+  %1 = bitcast <1 x i64> %a to <2 x i32>
+  %2 = bitcast <2 x i32> %1 to x86_mmx
+  %3 = bitcast <2 x i32> %0 to x86_mmx
+  %4 = tail call x86_mmx @llvm.x86.ssse3.phsub.d(x86_mmx %2, x86_mmx %3) nounwind readnone
+  %5 = bitcast x86_mmx %4 to <2 x i32>
+  %6 = bitcast <2 x i32> %5 to <1 x i64>
+  %7 = extractelement <1 x i64> %6, i32 0
+  ret i64 %7
+}
+
+declare x86_mmx @llvm.x86.ssse3.phsub.w(x86_mmx, x86_mmx) nounwind readnone
+
+define i64 @test4(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
+; CHECK: phsubw
+entry:
+  %0 = bitcast <1 x i64> %b to <4 x i16>
+  %1 = bitcast <1 x i64> %a to <4 x i16>
+  %2 = bitcast <4 x i16> %1 to x86_mmx
+  %3 = bitcast <4 x i16> %0 to x86_mmx
+  %4 = tail call x86_mmx @llvm.x86.ssse3.phsub.w(x86_mmx %2, x86_mmx %3) nounwind readnone
+  %5 = bitcast x86_mmx %4 to <4 x i16>
+  %6 = bitcast <4 x i16> %5 to <1 x i64>
+  %7 = extractelement <1 x i64> %6, i32 0
+  ret i64 %7
+}
+
+declare x86_mmx @llvm.x86.ssse3.phadd.sw(x86_mmx, x86_mmx) nounwind readnone
+
+define i64 @test3(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
+; CHECK: phaddsw
+entry:
+  %0 = bitcast <1 x i64> %b to <4 x i16>
+  %1 = bitcast <1 x i64> %a to <4 x i16>
+  %2 = bitcast <4 x i16> %1 to x86_mmx
+  %3 = bitcast <4 x i16> %0 to x86_mmx
+  %4 = tail call x86_mmx @llvm.x86.ssse3.phadd.sw(x86_mmx %2, x86_mmx %3) nounwind readnone
+  %5 = bitcast x86_mmx %4 to <4 x i16>
+  %6 = bitcast <4 x i16> %5 to <1 x i64>
+  %7 = extractelement <1 x i64> %6, i32 0
+  ret i64 %7
+}
+
+declare x86_mmx @llvm.x86.ssse3.phadd.d(x86_mmx, x86_mmx) nounwind readnone
+
+define i64 @test2(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
+; CHECK: phaddd
+entry:
+  %0 = bitcast <1 x i64> %b to <2 x i32>
+  %1 = bitcast <1 x i64> %a to <2 x i32>
+  %2 = bitcast <2 x i32> %1 to x86_mmx
+  %3 = bitcast <2 x i32> %0 to x86_mmx
+  %4 = tail call x86_mmx @llvm.x86.ssse3.phadd.d(x86_mmx %2, x86_mmx %3) nounwind readnone
+  %5 = bitcast x86_mmx %4 to <2 x i32>
+  %6 = bitcast <2 x i32> %5 to <1 x i64>
+  %7 = extractelement <1 x i64> %6, i32 0
+  ret i64 %7
+}
diff --git a/test/CodeGen/X86/mmx-insert-element.ll b/test/CodeGen/X86/mmx-insert-element.ll
index a063ee1d6cf4..348dac8d4d59 100644
--- a/test/CodeGen/X86/mmx-insert-element.ll
+++ b/test/CodeGen/X86/mmx-insert-element.ll
@@ -1,7 +1,9 @@
-; RUN: llc < %s -march=x86 -mattr=+mmx | not grep movq
-; RUN: llc < %s -march=x86 -mattr=+mmx | grep psllq
+; RUN: llc < %s -march=x86 -mattr=+mmx,+sse2 | grep movq
+; RUN: llc < %s -march=x86 -mattr=+mmx,+sse2 | grep pshufd
+; This is not an MMX operation; promoted to XMM.
 
-define <2 x i32> @qux(i32 %A) nounwind {
+define x86_mmx @qux(i32 %A) nounwind {
 	%tmp3 = insertelement <2 x i32> < i32 0, i32 undef >, i32 %A, i32 1		; <<2 x i32>> [#uses=1]
-	ret <2 x i32> %tmp3
+        %tmp4 = bitcast <2 x i32> %tmp3 to x86_mmx
+	ret x86_mmx %tmp4
 }
diff --git a/test/CodeGen/X86/mmx-pinsrw.ll b/test/CodeGen/X86/mmx-pinsrw.ll
index 3af09f4998d3..6062b505a569 100644
--- a/test/CodeGen/X86/mmx-pinsrw.ll
+++ b/test/CodeGen/X86/mmx-pinsrw.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=x86 -mattr=+mmx | grep pinsrw | count 1
+; RUN: llc < %s -march=x86 -mattr=+mmx,+sse2 | grep pinsrw | count 1
 ; PR2562
 
 external global i16		; <i16*>:0 [#uses=1]
diff --git a/test/CodeGen/X86/mmx-punpckhdq.ll b/test/CodeGen/X86/mmx-punpckhdq.ll
index 0af7e017b626..689f7bf59564 100644
--- a/test/CodeGen/X86/mmx-punpckhdq.ll
+++ b/test/CodeGen/X86/mmx-punpckhdq.ll
@@ -1,6 +1,9 @@
-; RUN: llc < %s -march=x86 -mattr=+mmx | grep punpckhdq | count 1
+; RUN: llc < %s -march=x86 -mattr=+mmx,+sse42 -mtriple=x86_64-apple-darwin10 | FileCheck %s
+; There are no MMX operations in bork; promoted to XMM.
 
 define void @bork(<1 x i64>* %x) {
+; CHECK: bork
+; CHECK: pextrd
 entry:
 	%tmp2 = load <1 x i64>* %x		; <<1 x i64>> [#uses=1]
 	%tmp6 = bitcast <1 x i64> %tmp2 to <2 x i32>		; <<2 x i32>> [#uses=1]
@@ -11,4 +14,18 @@ entry:
 	ret void
 }
 
+; pork uses MMX.
+
+define void @pork(x86_mmx* %x) {
+; CHECK: pork
+; CHECK: punpckhdq
+entry:
+	%tmp2 = load x86_mmx* %x		; <x86_mmx> [#uses=1]
+        %tmp9 = tail call x86_mmx @llvm.x86.mmx.punpckhdq (x86_mmx %tmp2, x86_mmx %tmp2)
+	store x86_mmx %tmp9, x86_mmx* %x
+	tail call void @llvm.x86.mmx.emms( )
+	ret void
+}
+
+declare x86_mmx @llvm.x86.mmx.punpckhdq(x86_mmx, x86_mmx)
 declare void @llvm.x86.mmx.emms()
diff --git a/test/CodeGen/X86/mmx-shift.ll b/test/CodeGen/X86/mmx-shift.ll
index dd0aa2ca31f4..bafc75444d91 100644
--- a/test/CodeGen/X86/mmx-shift.ll
+++ b/test/CodeGen/X86/mmx-shift.ll
@@ -5,28 +5,28 @@
 
 define i64 @t1(<1 x i64> %mm1) nounwind  {
 entry:
-	%tmp6 = tail call <1 x i64> @llvm.x86.mmx.pslli.q( <1 x i64> %mm1, i32 32 )		; <<1 x i64>> [#uses=1]
-	%retval1112 = bitcast <1 x i64> %tmp6 to i64		; <i64> [#uses=1]
+        %tmp = bitcast <1 x i64> %mm1 to x86_mmx
+	%tmp6 = tail call x86_mmx @llvm.x86.mmx.pslli.q( x86_mmx %tmp, i32 32 )		; <x86_mmx> [#uses=1]
+        %retval1112 = bitcast x86_mmx %tmp6 to i64
 	ret i64 %retval1112
 }
 
-declare <1 x i64> @llvm.x86.mmx.pslli.q(<1 x i64>, i32) nounwind readnone 
+declare x86_mmx @llvm.x86.mmx.pslli.q(x86_mmx, i32) nounwind readnone 
 
-define i64 @t2(<2 x i32> %mm1, <2 x i32> %mm2) nounwind  {
+define i64 @t2(x86_mmx %mm1, x86_mmx %mm2) nounwind  {
 entry:
-	%tmp7 = tail call <2 x i32> @llvm.x86.mmx.psra.d( <2 x i32> %mm1, <2 x i32> %mm2 ) nounwind readnone 		; <<2 x i32>> [#uses=1]
-	%retval1112 = bitcast <2 x i32> %tmp7 to i64		; <i64> [#uses=1]
+	%tmp7 = tail call x86_mmx @llvm.x86.mmx.psra.d( x86_mmx %mm1, x86_mmx %mm2 ) nounwind readnone 		; <x86_mmx> [#uses=1]
+        %retval1112 = bitcast x86_mmx %tmp7 to i64
 	ret i64 %retval1112
 }
 
-declare <2 x i32> @llvm.x86.mmx.psra.d(<2 x i32>, <2 x i32>) nounwind readnone 
+declare x86_mmx @llvm.x86.mmx.psra.d(x86_mmx, x86_mmx) nounwind readnone 
 
-define i64 @t3(<1 x i64> %mm1, i32 %bits) nounwind  {
+define i64 @t3(x86_mmx %mm1, i32 %bits) nounwind  {
 entry:
-	%tmp6 = bitcast <1 x i64> %mm1 to <4 x i16>		; <<4 x i16>> [#uses=1]
-	%tmp8 = tail call <4 x i16> @llvm.x86.mmx.psrli.w( <4 x i16> %tmp6, i32 %bits ) nounwind readnone 		; <<4 x i16>> [#uses=1]
-	%retval1314 = bitcast <4 x i16> %tmp8 to i64		; <i64> [#uses=1]
+	%tmp8 = tail call x86_mmx @llvm.x86.mmx.psrli.w( x86_mmx %mm1, i32 %bits ) nounwind readnone 		; <x86_mmx> [#uses=1]
+        %retval1314 = bitcast x86_mmx %tmp8 to i64
 	ret i64 %retval1314
 }
 
-declare <4 x i16> @llvm.x86.mmx.psrli.w(<4 x i16>, i32) nounwind readnone 
+declare x86_mmx @llvm.x86.mmx.psrli.w(x86_mmx, i32) nounwind readnone 
diff --git a/test/CodeGen/X86/mmx-shuffle.ll b/test/CodeGen/X86/mmx-shuffle.ll
index e3125c7345b8..9f7501eb7c5d 100644
--- a/test/CodeGen/X86/mmx-shuffle.ll
+++ b/test/CodeGen/X86/mmx-shuffle.ll
@@ -22,8 +22,10 @@ entry:
 	%tmp542 = bitcast <2 x i32> %tmp529 to <4 x i16>		; <<4 x i16>> [#uses=1]
 	%tmp543 = add <4 x i16> %tmp542, < i16 0, i16 16448, i16 24672, i16 28784 >		; <<4 x i16>> [#uses=1]
 	%tmp555 = bitcast <4 x i16> %tmp543 to <8 x i8>		; <<8 x i8>> [#uses=1]
-	tail call void @llvm.x86.mmx.maskmovq( <8 x i8> zeroinitializer, <8 x i8> %tmp555, i8* null )
+        %tmp556 = bitcast <8 x i8> %tmp555 to x86_mmx
+        %tmp557 = bitcast <8 x i8> zeroinitializer to x86_mmx
+	tail call void @llvm.x86.mmx.maskmovq( x86_mmx %tmp557, x86_mmx %tmp556, i8* null )
 	ret void
 }
 
-declare void @llvm.x86.mmx.maskmovq(<8 x i8>, <8 x i8>, i8*)
+declare void @llvm.x86.mmx.maskmovq(x86_mmx, x86_mmx, i8*)
diff --git a/test/CodeGen/X86/mmx-vzmovl-2.ll b/test/CodeGen/X86/mmx-vzmovl-2.ll
index 8253c200323c..a7ce7d93920e 100644
--- a/test/CodeGen/X86/mmx-vzmovl-2.ll
+++ b/test/CodeGen/X86/mmx-vzmovl-2.ll
@@ -1,10 +1,10 @@
-; RUN: llc < %s -march=x86-64 -mattr=+mmx | grep pxor
-; RUN: llc < %s -march=x86-64 -mattr=+mmx | grep punpckldq
+; RUN: llc < %s -march=x86-64 -mattr=+mmx,+sse2 | grep pxor
+; RUN: llc < %s -march=x86-64 -mattr=+mmx,+sse2 | grep punpckldq
 
 	%struct.vS1024 = type { [8 x <4 x i32>] }
 	%struct.vS512 = type { [4 x <4 x i32>] }
 
-declare <1 x i64> @llvm.x86.mmx.psrli.q(<1 x i64>, i32) nounwind readnone
+declare x86_mmx @llvm.x86.mmx.psrli.q(x86_mmx, i32) nounwind readnone
 
 define void @t() nounwind {
 entry:
@@ -12,14 +12,18 @@ entry:
 
 bb554:		; preds = %bb554, %entry
 	%sum.0.reg2mem.0 = phi <1 x i64> [ %tmp562, %bb554 ], [ zeroinitializer, %entry ]		; <<1 x i64>> [#uses=1]
-	%0 = load <1 x i64>* null, align 8		; <<1 x i64>> [#uses=2]
-	%1 = bitcast <1 x i64> %0 to <2 x i32>		; <<2 x i32>> [#uses=1]
+	%0 = load x86_mmx* null, align 8		; <<1 x i64>> [#uses=2]
+	%1 = bitcast x86_mmx %0 to <2 x i32>		; <<2 x i32>> [#uses=1]
 	%tmp555 = and <2 x i32> %1, < i32 -1, i32 0 >		; <<2 x i32>> [#uses=1]
-	%2 = bitcast <2 x i32> %tmp555 to <1 x i64>		; <<1 x i64>> [#uses=1]
-	%3 = call <1 x i64> @llvm.x86.mmx.psrli.q(<1 x i64> %0, i32 32) nounwind readnone		; <<1 x i64>> [#uses=1]
+	%2 = bitcast <2 x i32> %tmp555 to x86_mmx		; <<1 x i64>> [#uses=1]
+	%3 = call x86_mmx @llvm.x86.mmx.psrli.q(x86_mmx %0, i32 32) nounwind readnone		; <<1 x i64>> [#uses=1]
         store <1 x i64> %sum.0.reg2mem.0, <1 x i64>* null
-	%tmp558 = add <1 x i64> %sum.0.reg2mem.0, %2		; <<1 x i64>> [#uses=1]
-	%4 = call <1 x i64> @llvm.x86.mmx.psrli.q(<1 x i64> %tmp558, i32 32) nounwind readnone		; <<1 x i64>> [#uses=1]
-	%tmp562 = add <1 x i64> %4, %3		; <<1 x i64>> [#uses=1]
+        %tmp3 = bitcast x86_mmx %2 to <1 x i64>
+	%tmp558 = add <1 x i64> %sum.0.reg2mem.0, %tmp3		; <<1 x i64>> [#uses=1]
+        %tmp5 = bitcast <1 x i64> %tmp558 to x86_mmx
+	%4 = call x86_mmx @llvm.x86.mmx.psrli.q(x86_mmx %tmp5, i32 32) nounwind readnone		; <<1 x i64>> [#uses=1]
+        %tmp6 = bitcast x86_mmx %4 to <1 x i64>
+        %tmp7 = bitcast x86_mmx %3 to <1 x i64>
+	%tmp562 = add <1 x i64> %tmp6, %tmp7		; <<1 x i64>> [#uses=1]
 	br label %bb554
 }
diff --git a/test/CodeGen/X86/mmx-vzmovl.ll b/test/CodeGen/X86/mmx-vzmovl.ll
index d21e2404882d..191e261f616f 100644
--- a/test/CodeGen/X86/mmx-vzmovl.ll
+++ b/test/CodeGen/X86/mmx-vzmovl.ll
@@ -1,5 +1,5 @@
-; RUN: llc < %s -march=x86-64 -mattr=+mmx | grep movd
-; RUN: llc < %s -march=x86-64 -mattr=+mmx | grep movq
+; RUN: llc < %s -march=x86-64 -mattr=+mmx,+sse2 | grep movq | count 2
+; There are no MMX operations here; this is promoted to XMM.
 
 define void @foo(<1 x i64>* %a, <1 x i64>* %b) nounwind {
 entry:
diff --git a/test/CodeGen/X86/movgs.ll b/test/CodeGen/X86/movgs.ll
index b04048b92c13..00190e802fc9 100644
--- a/test/CodeGen/X86/movgs.ll
+++ b/test/CodeGen/X86/movgs.ll
@@ -1,8 +1,57 @@
-; RUN: llc < %s -march=x86 | grep gs
+; RUN: llc < %s -march=x86 -mattr=sse41 | FileCheck %s --check-prefix=X32
+; RUN: llc < %s -march=x86-64 -mattr=sse41 | FileCheck %s --check-prefix=X64
 
-define i32 @foo() nounwind readonly {
+define i32 @test1() nounwind readonly {
 entry:
 	%tmp = load i32* addrspace(256)* getelementptr (i32* addrspace(256)* inttoptr (i32 72 to i32* addrspace(256)*), i32 31)		; <i32*> [#uses=1]
 	%tmp1 = load i32* %tmp		; <i32> [#uses=1]
 	ret i32 %tmp1
 }
+; X32: test1:
+; X32: 	movl	%gs:196, %eax
+; X32: 	movl	(%eax), %eax
+; X32: 	ret
+
+; X64: test1:
+; X64: 	movq	%gs:320, %rax
+; X64: 	movl	(%rax), %eax
+; X64: 	ret
+
+define i64 @test2(void (i8*)* addrspace(256)* %tmp8) nounwind {
+entry:
+  %tmp9 = load void (i8*)* addrspace(256)* %tmp8, align 8
+  tail call void %tmp9(i8* undef) nounwind optsize
+  ret i64 0
+}
+
+; rdar://8453210
+; X32: test2:
+; X32: movl	{{.*}}(%esp), %eax
+; X32: calll	*%gs:(%eax)
+
+; X64: test2:
+; X64: callq	*%gs:(%rdi)
+
+
+
+
+define <2 x i64> @pmovsxwd_1(i64 addrspace(256)* %p) nounwind readonly {
+entry:
+  %0 = load i64 addrspace(256)* %p
+  %tmp2 = insertelement <2 x i64> zeroinitializer, i64 %0, i32 0
+  %1 = bitcast <2 x i64> %tmp2 to <8 x i16>
+  %2 = tail call <4 x i32> @llvm.x86.sse41.pmovsxwd(<8 x i16> %1) nounwind readnone
+  %3 = bitcast <4 x i32> %2 to <2 x i64>
+  ret <2 x i64> %3
+  
+; X32: pmovsxwd_1:
+; X32: 	movl	4(%esp), %eax
+; X32: 	pmovsxwd	%gs:(%eax), %xmm0
+; X32: 	ret
+
+; X64: pmovsxwd_1:
+; X64:	pmovsxwd	%gs:(%rdi), %xmm0
+; X64:	ret
+}
+
+declare <4 x i32> @llvm.x86.sse41.pmovsxwd(<8 x i16>) nounwind readnone
diff --git a/test/CodeGen/X86/mult-alt-generic-i686.ll b/test/CodeGen/X86/mult-alt-generic-i686.ll
new file mode 100644
index 000000000000..7c3499f178a6
--- /dev/null
+++ b/test/CodeGen/X86/mult-alt-generic-i686.ll
@@ -0,0 +1,321 @@
+; RUN: llc < %s -march=x86
+; ModuleID = 'mult-alt-generic.c'
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:32:32-n8:16:32"
+target triple = "i686"
+
+@mout0 = common global i32 0, align 4
+@min1 = common global i32 0, align 4
+@marray = common global [2 x i32] zeroinitializer, align 4
+
+define void @single_m() nounwind {
+entry:
+  call void asm "foo $1,$0", "=*m,*m,~{dirflag},~{fpsr},~{flags}"(i32* @mout0, i32* @min1) nounwind
+  ret void
+}
+
+define void @single_o() nounwind {
+entry:
+  %out0 = alloca i32, align 4
+  %index = alloca i32, align 4
+  store i32 0, i32* %out0, align 4
+  store i32 1, i32* %index, align 4
+  ret void
+}
+
+define void @single_V() nounwind {
+entry:
+  ret void
+}
+
+define void @single_lt() nounwind {
+entry:
+  %out0 = alloca i32, align 4
+  %in1 = alloca i32, align 4
+  store i32 0, i32* %out0, align 4
+  store i32 1, i32* %in1, align 4
+  %tmp = load i32* %in1, align 4
+  %0 = call i32 asm "foo $1,$0", "=r,<r,~{dirflag},~{fpsr},~{flags}"(i32 %tmp) nounwind
+  store i32 %0, i32* %out0, align 4
+  %tmp1 = load i32* %in1, align 4
+  %1 = call i32 asm "foo $1,$0", "=r,r<,~{dirflag},~{fpsr},~{flags}"(i32 %tmp1) nounwind
+  store i32 %1, i32* %out0, align 4
+  ret void
+}
+
+define void @single_gt() nounwind {
+entry:
+  %out0 = alloca i32, align 4
+  %in1 = alloca i32, align 4
+  store i32 0, i32* %out0, align 4
+  store i32 1, i32* %in1, align 4
+  %tmp = load i32* %in1, align 4
+  %0 = call i32 asm "foo $1,$0", "=r,>r,~{dirflag},~{fpsr},~{flags}"(i32 %tmp) nounwind
+  store i32 %0, i32* %out0, align 4
+  %tmp1 = load i32* %in1, align 4
+  %1 = call i32 asm "foo $1,$0", "=r,r>,~{dirflag},~{fpsr},~{flags}"(i32 %tmp1) nounwind
+  store i32 %1, i32* %out0, align 4
+  ret void
+}
+
+define void @single_r() nounwind {
+entry:
+  %out0 = alloca i32, align 4
+  %in1 = alloca i32, align 4
+  store i32 0, i32* %out0, align 4
+  store i32 1, i32* %in1, align 4
+  %tmp = load i32* %in1, align 4
+  %0 = call i32 asm "foo $1,$0", "=r,r,~{dirflag},~{fpsr},~{flags}"(i32 %tmp) nounwind
+  store i32 %0, i32* %out0, align 4
+  ret void
+}
+
+define void @single_i() nounwind {
+entry:
+  %out0 = alloca i32, align 4
+  store i32 0, i32* %out0, align 4
+  %0 = call i32 asm "foo $1,$0", "=r,i,~{dirflag},~{fpsr},~{flags}"(i32 1) nounwind
+  store i32 %0, i32* %out0, align 4
+  ret void
+}
+
+define void @single_n() nounwind {
+entry:
+  %out0 = alloca i32, align 4
+  store i32 0, i32* %out0, align 4
+  %0 = call i32 asm "foo $1,$0", "=r,n,~{dirflag},~{fpsr},~{flags}"(i32 1) nounwind
+  store i32 %0, i32* %out0, align 4
+  ret void
+}
+
+define void @single_E() nounwind {
+entry:
+  %out0 = alloca double, align 8
+  store double 0.000000e+000, double* %out0, align 8
+; No lowering support.
+;  %0 = call double asm "foo $1,$0", "=r,E,~{dirflag},~{fpsr},~{flags}"(double 1.000000e+001) nounwind
+;  store double %0, double* %out0, align 8
+  ret void
+}
+
+define void @single_F() nounwind {
+entry:
+  %out0 = alloca double, align 8
+  store double 0.000000e+000, double* %out0, align 8
+; No lowering support.
+;  %0 = call double asm "foo $1,$0", "=r,F,~{dirflag},~{fpsr},~{flags}"(double 1.000000e+000) nounwind
+;  store double %0, double* %out0, align 8
+  ret void
+}
+
+define void @single_s() nounwind {
+entry:
+  %out0 = alloca i32, align 4
+  store i32 0, i32* %out0, align 4
+  ret void
+}
+
+define void @single_g() nounwind {
+entry:
+  %out0 = alloca i32, align 4
+  %in1 = alloca i32, align 4
+  store i32 0, i32* %out0, align 4
+  store i32 1, i32* %in1, align 4
+  %tmp = load i32* %in1, align 4
+  %0 = call i32 asm "foo $1,$0", "=r,imr,~{dirflag},~{fpsr},~{flags}"(i32 %tmp) nounwind
+  store i32 %0, i32* %out0, align 4
+  %tmp1 = load i32* @min1, align 4
+  %1 = call i32 asm "foo $1,$0", "=r,imr,~{dirflag},~{fpsr},~{flags}"(i32 %tmp1) nounwind
+  store i32 %1, i32* %out0, align 4
+  %2 = call i32 asm "foo $1,$0", "=r,imr,~{dirflag},~{fpsr},~{flags}"(i32 1) nounwind
+  store i32 %2, i32* %out0, align 4
+  ret void
+}
+
+define void @single_X() nounwind {
+entry:
+  %out0 = alloca i32, align 4
+  %in1 = alloca i32, align 4
+  store i32 0, i32* %out0, align 4
+  store i32 1, i32* %in1, align 4
+  %tmp = load i32* %in1, align 4
+  %0 = call i32 asm "foo $1,$0", "=r,X,~{dirflag},~{fpsr},~{flags}"(i32 %tmp) nounwind
+  store i32 %0, i32* %out0, align 4
+  %tmp1 = load i32* @min1, align 4
+  %1 = call i32 asm "foo $1,$0", "=r,X,~{dirflag},~{fpsr},~{flags}"(i32 %tmp1) nounwind
+  store i32 %1, i32* %out0, align 4
+  %2 = call i32 asm "foo $1,$0", "=r,X,~{dirflag},~{fpsr},~{flags}"(i32 1) nounwind
+  store i32 %2, i32* %out0, align 4
+  %3 = call i32 asm "foo $1,$0", "=r,X,~{dirflag},~{fpsr},~{flags}"(i32* getelementptr inbounds ([2 x i32]* @marray, i32 0, i32 0)) nounwind
+  store i32 %3, i32* %out0, align 4
+  %4 = call i32 asm "foo $1,$0", "=r,X,~{dirflag},~{fpsr},~{flags}"(double 1.000000e+001) nounwind
+  store i32 %4, i32* %out0, align 4
+  %5 = call i32 asm "foo $1,$0", "=r,X,~{dirflag},~{fpsr},~{flags}"(double 1.000000e+000) nounwind
+  store i32 %5, i32* %out0, align 4
+  ret void
+}
+
+define void @single_p() nounwind {
+entry:
+  %out0 = alloca i32, align 4
+  store i32 0, i32* %out0, align 4
+  %0 = call i32 asm "foo $1,$0", "=r,im,~{dirflag},~{fpsr},~{flags}"(i32* getelementptr inbounds ([2 x i32]* @marray, i32 0, i32 0)) nounwind
+  store i32 %0, i32* %out0, align 4
+  ret void
+}
+
+define void @multi_m() nounwind {
+entry:
+  %tmp = load i32* @min1, align 4
+  call void asm "foo $1,$0", "=*m|r,m|r,~{dirflag},~{fpsr},~{flags}"(i32* @mout0, i32 %tmp) nounwind
+  ret void
+}
+
+define void @multi_o() nounwind {
+entry:
+  %out0 = alloca i32, align 4
+  %index = alloca i32, align 4
+  store i32 0, i32* %out0, align 4
+  store i32 1, i32* %index, align 4
+  ret void
+}
+
+define void @multi_V() nounwind {
+entry:
+  ret void
+}
+
+define void @multi_lt() nounwind {
+entry:
+  %out0 = alloca i32, align 4
+  %in1 = alloca i32, align 4
+  store i32 0, i32* %out0, align 4
+  store i32 1, i32* %in1, align 4
+  %tmp = load i32* %in1, align 4
+  %0 = call i32 asm "foo $1,$0", "=r|r,r|<r,~{dirflag},~{fpsr},~{flags}"(i32 %tmp) nounwind
+  store i32 %0, i32* %out0, align 4
+  %tmp1 = load i32* %in1, align 4
+  %1 = call i32 asm "foo $1,$0", "=r|r,r|r<,~{dirflag},~{fpsr},~{flags}"(i32 %tmp1) nounwind
+  store i32 %1, i32* %out0, align 4
+  ret void
+}
+
+define void @multi_gt() nounwind {
+entry:
+  %out0 = alloca i32, align 4
+  %in1 = alloca i32, align 4
+  store i32 0, i32* %out0, align 4
+  store i32 1, i32* %in1, align 4
+  %tmp = load i32* %in1, align 4
+  %0 = call i32 asm "foo $1,$0", "=r|r,r|>r,~{dirflag},~{fpsr},~{flags}"(i32 %tmp) nounwind
+  store i32 %0, i32* %out0, align 4
+  %tmp1 = load i32* %in1, align 4
+  %1 = call i32 asm "foo $1,$0", "=r|r,r|r>,~{dirflag},~{fpsr},~{flags}"(i32 %tmp1) nounwind
+  store i32 %1, i32* %out0, align 4
+  ret void
+}
+
+define void @multi_r() nounwind {
+entry:
+  %out0 = alloca i32, align 4
+  %in1 = alloca i32, align 4
+  store i32 0, i32* %out0, align 4
+  store i32 1, i32* %in1, align 4
+  %tmp = load i32* %in1, align 4
+  %0 = call i32 asm "foo $1,$0", "=r|r,r|m,~{dirflag},~{fpsr},~{flags}"(i32 %tmp) nounwind
+  store i32 %0, i32* %out0, align 4
+  ret void
+}
+
+define void @multi_i() nounwind {
+entry:
+  %out0 = alloca i32, align 4
+  store i32 0, i32* %out0, align 4
+  %0 = call i32 asm "foo $1,$0", "=r|r,r|i,~{dirflag},~{fpsr},~{flags}"(i32 1) nounwind
+  store i32 %0, i32* %out0, align 4
+  ret void
+}
+
+define void @multi_n() nounwind {
+entry:
+  %out0 = alloca i32, align 4
+  store i32 0, i32* %out0, align 4
+  %0 = call i32 asm "foo $1,$0", "=r|r,r|n,~{dirflag},~{fpsr},~{flags}"(i32 1) nounwind
+  store i32 %0, i32* %out0, align 4
+  ret void
+}
+
+define void @multi_E() nounwind {
+entry:
+  %out0 = alloca double, align 8
+  store double 0.000000e+000, double* %out0, align 8
+; No lowering support.
+;  %0 = call double asm "foo $1,$0", "=r|r,r|E,~{dirflag},~{fpsr},~{flags}"(double 1.000000e+001) nounwind
+;  store double %0, double* %out0, align 8
+  ret void
+}
+
+define void @multi_F() nounwind {
+entry:
+  %out0 = alloca double, align 8
+  store double 0.000000e+000, double* %out0, align 8
+; No lowering support.
+;  %0 = call double asm "foo $1,$0", "=r|r,r|F,~{dirflag},~{fpsr},~{flags}"(double 1.000000e+000) nounwind
+;  store double %0, double* %out0, align 8
+  ret void
+}
+
+define void @multi_s() nounwind {
+entry:
+  %out0 = alloca i32, align 4
+  store i32 0, i32* %out0, align 4
+  ret void
+}
+
+define void @multi_g() nounwind {
+entry:
+  %out0 = alloca i32, align 4
+  %in1 = alloca i32, align 4
+  store i32 0, i32* %out0, align 4
+  store i32 1, i32* %in1, align 4
+  %tmp = load i32* %in1, align 4
+  %0 = call i32 asm "foo $1,$0", "=r|r,r|imr,~{dirflag},~{fpsr},~{flags}"(i32 %tmp) nounwind
+  store i32 %0, i32* %out0, align 4
+  %tmp1 = load i32* @min1, align 4
+  %1 = call i32 asm "foo $1,$0", "=r|r,r|imr,~{dirflag},~{fpsr},~{flags}"(i32 %tmp1) nounwind
+  store i32 %1, i32* %out0, align 4
+  %2 = call i32 asm "foo $1,$0", "=r|r,r|imr,~{dirflag},~{fpsr},~{flags}"(i32 1) nounwind
+  store i32 %2, i32* %out0, align 4
+  ret void
+}
+
+define void @multi_X() nounwind {
+entry:
+  %out0 = alloca i32, align 4
+  %in1 = alloca i32, align 4
+  store i32 0, i32* %out0, align 4
+  store i32 1, i32* %in1, align 4
+  %tmp = load i32* %in1, align 4
+  %0 = call i32 asm "foo $1,$0", "=r|r,r|X,~{dirflag},~{fpsr},~{flags}"(i32 %tmp) nounwind
+  store i32 %0, i32* %out0, align 4
+  %tmp1 = load i32* @min1, align 4
+  %1 = call i32 asm "foo $1,$0", "=r|r,r|X,~{dirflag},~{fpsr},~{flags}"(i32 %tmp1) nounwind
+  store i32 %1, i32* %out0, align 4
+  %2 = call i32 asm "foo $1,$0", "=r|r,r|X,~{dirflag},~{fpsr},~{flags}"(i32 1) nounwind
+  store i32 %2, i32* %out0, align 4
+  %3 = call i32 asm "foo $1,$0", "=r|r,r|X,~{dirflag},~{fpsr},~{flags}"(i32* getelementptr inbounds ([2 x i32]* @marray, i32 0, i32 0)) nounwind
+  store i32 %3, i32* %out0, align 4
+  %4 = call i32 asm "foo $1,$0", "=r|r,r|X,~{dirflag},~{fpsr},~{flags}"(double 1.000000e+001) nounwind
+  store i32 %4, i32* %out0, align 4
+  %5 = call i32 asm "foo $1,$0", "=r|r,r|X,~{dirflag},~{fpsr},~{flags}"(double 1.000000e+000) nounwind
+  store i32 %5, i32* %out0, align 4
+  ret void
+}
+
+define void @multi_p() nounwind {
+entry:
+  %out0 = alloca i32, align 4
+  store i32 0, i32* %out0, align 4
+  %0 = call i32 asm "foo $1,$0", "=r|r,r|im,~{dirflag},~{fpsr},~{flags}"(i32* getelementptr inbounds ([2 x i32]* @marray, i32 0, i32 0)) nounwind
+  store i32 %0, i32* %out0, align 4
+  ret void
+}
diff --git a/test/CodeGen/X86/mult-alt-generic-x86_64.ll b/test/CodeGen/X86/mult-alt-generic-x86_64.ll
new file mode 100644
index 000000000000..f35bb5e34079
--- /dev/null
+++ b/test/CodeGen/X86/mult-alt-generic-x86_64.ll
@@ -0,0 +1,321 @@
+; RUN: llc < %s -march=x86-64
+; ModuleID = 'mult-alt-generic.c'
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
+target triple = "x86_64"
+
+@mout0 = common global i32 0, align 4
+@min1 = common global i32 0, align 4
+@marray = common global [2 x i32] zeroinitializer, align 4
+
+define void @single_m() nounwind {
+entry:
+  call void asm "foo $1,$0", "=*m,*m,~{dirflag},~{fpsr},~{flags}"(i32* @mout0, i32* @min1) nounwind
+  ret void
+}
+
+define void @single_o() nounwind {
+entry:
+  %out0 = alloca i32, align 4
+  %index = alloca i32, align 4
+  store i32 0, i32* %out0, align 4
+  store i32 1, i32* %index, align 4
+  ret void
+}
+
+define void @single_V() nounwind {
+entry:
+  ret void
+}
+
+define void @single_lt() nounwind {
+entry:
+  %out0 = alloca i32, align 4
+  %in1 = alloca i32, align 4
+  store i32 0, i32* %out0, align 4
+  store i32 1, i32* %in1, align 4
+  %tmp = load i32* %in1, align 4
+  %0 = call i32 asm "foo $1,$0", "=r,<r,~{dirflag},~{fpsr},~{flags}"(i32 %tmp) nounwind
+  store i32 %0, i32* %out0, align 4
+  %tmp1 = load i32* %in1, align 4
+  %1 = call i32 asm "foo $1,$0", "=r,r<,~{dirflag},~{fpsr},~{flags}"(i32 %tmp1) nounwind
+  store i32 %1, i32* %out0, align 4
+  ret void
+}
+
+define void @single_gt() nounwind {
+entry:
+  %out0 = alloca i32, align 4
+  %in1 = alloca i32, align 4
+  store i32 0, i32* %out0, align 4
+  store i32 1, i32* %in1, align 4
+  %tmp = load i32* %in1, align 4
+  %0 = call i32 asm "foo $1,$0", "=r,>r,~{dirflag},~{fpsr},~{flags}"(i32 %tmp) nounwind
+  store i32 %0, i32* %out0, align 4
+  %tmp1 = load i32* %in1, align 4
+  %1 = call i32 asm "foo $1,$0", "=r,r>,~{dirflag},~{fpsr},~{flags}"(i32 %tmp1) nounwind
+  store i32 %1, i32* %out0, align 4
+  ret void
+}
+
+define void @single_r() nounwind {
+entry:
+  %out0 = alloca i32, align 4
+  %in1 = alloca i32, align 4
+  store i32 0, i32* %out0, align 4
+  store i32 1, i32* %in1, align 4
+  %tmp = load i32* %in1, align 4
+  %0 = call i32 asm "foo $1,$0", "=r,r,~{dirflag},~{fpsr},~{flags}"(i32 %tmp) nounwind
+  store i32 %0, i32* %out0, align 4
+  ret void
+}
+
+define void @single_i() nounwind {
+entry:
+  %out0 = alloca i32, align 4
+  store i32 0, i32* %out0, align 4
+  %0 = call i32 asm "foo $1,$0", "=r,i,~{dirflag},~{fpsr},~{flags}"(i32 1) nounwind
+  store i32 %0, i32* %out0, align 4
+  ret void
+}
+
+define void @single_n() nounwind {
+entry:
+  %out0 = alloca i32, align 4
+  store i32 0, i32* %out0, align 4
+  %0 = call i32 asm "foo $1,$0", "=r,n,~{dirflag},~{fpsr},~{flags}"(i32 1) nounwind
+  store i32 %0, i32* %out0, align 4
+  ret void
+}
+
+define void @single_E() nounwind {
+entry:
+  %out0 = alloca double, align 8
+  store double 0.000000e+000, double* %out0, align 8
+; No lowering support.
+;  %0 = call double asm "foo $1,$0", "=r,E,~{dirflag},~{fpsr},~{flags}"(double 1.000000e+001) nounwind
+;  store double %0, double* %out0, align 8
+  ret void
+}
+
+define void @single_F() nounwind {
+entry:
+  %out0 = alloca double, align 8
+  store double 0.000000e+000, double* %out0, align 8
+; No lowering support.
+;  %0 = call double asm "foo $1,$0", "=r,F,~{dirflag},~{fpsr},~{flags}"(double 1.000000e+000) nounwind
+;  store double %0, double* %out0, align 8
+  ret void
+}
+
+define void @single_s() nounwind {
+entry:
+  %out0 = alloca i32, align 4
+  store i32 0, i32* %out0, align 4
+  ret void
+}
+
+define void @single_g() nounwind {
+entry:
+  %out0 = alloca i32, align 4
+  %in1 = alloca i32, align 4
+  store i32 0, i32* %out0, align 4
+  store i32 1, i32* %in1, align 4
+  %tmp = load i32* %in1, align 4
+  %0 = call i32 asm "foo $1,$0", "=r,imr,~{dirflag},~{fpsr},~{flags}"(i32 %tmp) nounwind
+  store i32 %0, i32* %out0, align 4
+  %tmp1 = load i32* @min1, align 4
+  %1 = call i32 asm "foo $1,$0", "=r,imr,~{dirflag},~{fpsr},~{flags}"(i32 %tmp1) nounwind
+  store i32 %1, i32* %out0, align 4
+  %2 = call i32 asm "foo $1,$0", "=r,imr,~{dirflag},~{fpsr},~{flags}"(i32 1) nounwind
+  store i32 %2, i32* %out0, align 4
+  ret void
+}
+
+define void @single_X() nounwind {
+entry:
+  %out0 = alloca i32, align 4
+  %in1 = alloca i32, align 4
+  store i32 0, i32* %out0, align 4
+  store i32 1, i32* %in1, align 4
+  %tmp = load i32* %in1, align 4
+  %0 = call i32 asm "foo $1,$0", "=r,X,~{dirflag},~{fpsr},~{flags}"(i32 %tmp) nounwind
+  store i32 %0, i32* %out0, align 4
+  %tmp1 = load i32* @min1, align 4
+  %1 = call i32 asm "foo $1,$0", "=r,X,~{dirflag},~{fpsr},~{flags}"(i32 %tmp1) nounwind
+  store i32 %1, i32* %out0, align 4
+  %2 = call i32 asm "foo $1,$0", "=r,X,~{dirflag},~{fpsr},~{flags}"(i32 1) nounwind
+  store i32 %2, i32* %out0, align 4
+  %3 = call i32 asm "foo $1,$0", "=r,X,~{dirflag},~{fpsr},~{flags}"(i32* getelementptr inbounds ([2 x i32]* @marray, i32 0, i32 0)) nounwind
+  store i32 %3, i32* %out0, align 4
+  %4 = call i32 asm "foo $1,$0", "=r,X,~{dirflag},~{fpsr},~{flags}"(double 1.000000e+001) nounwind
+  store i32 %4, i32* %out0, align 4
+  %5 = call i32 asm "foo $1,$0", "=r,X,~{dirflag},~{fpsr},~{flags}"(double 1.000000e+000) nounwind
+  store i32 %5, i32* %out0, align 4
+  ret void
+}
+
+define void @single_p() nounwind {
+entry:
+  %out0 = alloca i32, align 4
+  store i32 0, i32* %out0, align 4
+  %0 = call i32 asm "foo $1,$0", "=r,im,~{dirflag},~{fpsr},~{flags}"(i32* getelementptr inbounds ([2 x i32]* @marray, i32 0, i32 0)) nounwind
+  store i32 %0, i32* %out0, align 4
+  ret void
+}
+
+define void @multi_m() nounwind {
+entry:
+  %tmp = load i32* @min1, align 4
+  call void asm "foo $1,$0", "=*m|r,m|r,~{dirflag},~{fpsr},~{flags}"(i32* @mout0, i32 %tmp) nounwind
+  ret void
+}
+
+define void @multi_o() nounwind {
+entry:
+  %out0 = alloca i32, align 4
+  %index = alloca i32, align 4
+  store i32 0, i32* %out0, align 4
+  store i32 1, i32* %index, align 4
+  ret void
+}
+
+define void @multi_V() nounwind {
+entry:
+  ret void
+}
+
+define void @multi_lt() nounwind {
+entry:
+  %out0 = alloca i32, align 4
+  %in1 = alloca i32, align 4
+  store i32 0, i32* %out0, align 4
+  store i32 1, i32* %in1, align 4
+  %tmp = load i32* %in1, align 4
+  %0 = call i32 asm "foo $1,$0", "=r|r,r|<r,~{dirflag},~{fpsr},~{flags}"(i32 %tmp) nounwind
+  store i32 %0, i32* %out0, align 4
+  %tmp1 = load i32* %in1, align 4
+  %1 = call i32 asm "foo $1,$0", "=r|r,r|r<,~{dirflag},~{fpsr},~{flags}"(i32 %tmp1) nounwind
+  store i32 %1, i32* %out0, align 4
+  ret void
+}
+
+define void @multi_gt() nounwind {
+entry:
+  %out0 = alloca i32, align 4
+  %in1 = alloca i32, align 4
+  store i32 0, i32* %out0, align 4
+  store i32 1, i32* %in1, align 4
+  %tmp = load i32* %in1, align 4
+  %0 = call i32 asm "foo $1,$0", "=r|r,r|>r,~{dirflag},~{fpsr},~{flags}"(i32 %tmp) nounwind
+  store i32 %0, i32* %out0, align 4
+  %tmp1 = load i32* %in1, align 4
+  %1 = call i32 asm "foo $1,$0", "=r|r,r|r>,~{dirflag},~{fpsr},~{flags}"(i32 %tmp1) nounwind
+  store i32 %1, i32* %out0, align 4
+  ret void
+}
+
+define void @multi_r() nounwind {
+entry:
+  %out0 = alloca i32, align 4
+  %in1 = alloca i32, align 4
+  store i32 0, i32* %out0, align 4
+  store i32 1, i32* %in1, align 4
+  %tmp = load i32* %in1, align 4
+  %0 = call i32 asm "foo $1,$0", "=r|r,r|m,~{dirflag},~{fpsr},~{flags}"(i32 %tmp) nounwind
+  store i32 %0, i32* %out0, align 4
+  ret void
+}
+
+define void @multi_i() nounwind {
+entry:
+  %out0 = alloca i32, align 4
+  store i32 0, i32* %out0, align 4
+  %0 = call i32 asm "foo $1,$0", "=r|r,r|i,~{dirflag},~{fpsr},~{flags}"(i32 1) nounwind
+  store i32 %0, i32* %out0, align 4
+  ret void
+}
+
+define void @multi_n() nounwind {
+entry:
+  %out0 = alloca i32, align 4
+  store i32 0, i32* %out0, align 4
+  %0 = call i32 asm "foo $1,$0", "=r|r,r|n,~{dirflag},~{fpsr},~{flags}"(i32 1) nounwind
+  store i32 %0, i32* %out0, align 4
+  ret void
+}
+
+define void @multi_E() nounwind {
+entry:
+  %out0 = alloca double, align 8
+  store double 0.000000e+000, double* %out0, align 8
+; No lowering support.
+;  %0 = call double asm "foo $1,$0", "=r|r,r|E,~{dirflag},~{fpsr},~{flags}"(double 1.000000e+001) nounwind
+;  store double %0, double* %out0, align 8
+  ret void
+}
+
+define void @multi_F() nounwind {
+entry:
+  %out0 = alloca double, align 8
+  store double 0.000000e+000, double* %out0, align 8
+; No lowering support.
+;  %0 = call double asm "foo $1,$0", "=r|r,r|F,~{dirflag},~{fpsr},~{flags}"(double 1.000000e+000) nounwind
+;  store double %0, double* %out0, align 8
+  ret void
+}
+
+define void @multi_s() nounwind {
+entry:
+  %out0 = alloca i32, align 4
+  store i32 0, i32* %out0, align 4
+  ret void
+}
+
+define void @multi_g() nounwind {
+entry:
+  %out0 = alloca i32, align 4
+  %in1 = alloca i32, align 4
+  store i32 0, i32* %out0, align 4
+  store i32 1, i32* %in1, align 4
+  %tmp = load i32* %in1, align 4
+  %0 = call i32 asm "foo $1,$0", "=r|r,r|imr,~{dirflag},~{fpsr},~{flags}"(i32 %tmp) nounwind
+  store i32 %0, i32* %out0, align 4
+  %tmp1 = load i32* @min1, align 4
+  %1 = call i32 asm "foo $1,$0", "=r|r,r|imr,~{dirflag},~{fpsr},~{flags}"(i32 %tmp1) nounwind
+  store i32 %1, i32* %out0, align 4
+  %2 = call i32 asm "foo $1,$0", "=r|r,r|imr,~{dirflag},~{fpsr},~{flags}"(i32 1) nounwind
+  store i32 %2, i32* %out0, align 4
+  ret void
+}
+
+define void @multi_X() nounwind {
+entry:
+  %out0 = alloca i32, align 4
+  %in1 = alloca i32, align 4
+  store i32 0, i32* %out0, align 4
+  store i32 1, i32* %in1, align 4
+  %tmp = load i32* %in1, align 4
+  %0 = call i32 asm "foo $1,$0", "=r|r,r|X,~{dirflag},~{fpsr},~{flags}"(i32 %tmp) nounwind
+  store i32 %0, i32* %out0, align 4
+  %tmp1 = load i32* @min1, align 4
+  %1 = call i32 asm "foo $1,$0", "=r|r,r|X,~{dirflag},~{fpsr},~{flags}"(i32 %tmp1) nounwind
+  store i32 %1, i32* %out0, align 4
+  %2 = call i32 asm "foo $1,$0", "=r|r,r|X,~{dirflag},~{fpsr},~{flags}"(i32 1) nounwind
+  store i32 %2, i32* %out0, align 4
+  %3 = call i32 asm "foo $1,$0", "=r|r,r|X,~{dirflag},~{fpsr},~{flags}"(i32* getelementptr inbounds ([2 x i32]* @marray, i32 0, i32 0)) nounwind
+  store i32 %3, i32* %out0, align 4
+  %4 = call i32 asm "foo $1,$0", "=r|r,r|X,~{dirflag},~{fpsr},~{flags}"(double 1.000000e+001) nounwind
+  store i32 %4, i32* %out0, align 4
+  %5 = call i32 asm "foo $1,$0", "=r|r,r|X,~{dirflag},~{fpsr},~{flags}"(double 1.000000e+000) nounwind
+  store i32 %5, i32* %out0, align 4
+  ret void
+}
+
+define void @multi_p() nounwind {
+entry:
+  %out0 = alloca i32, align 4
+  store i32 0, i32* %out0, align 4
+  %0 = call i32 asm "foo $1,$0", "=r|r,r|im,~{dirflag},~{fpsr},~{flags}"(i32* getelementptr inbounds ([2 x i32]* @marray, i32 0, i32 0)) nounwind
+  store i32 %0, i32* %out0, align 4
+  ret void
+}
diff --git a/test/CodeGen/X86/mult-alt-x86.ll b/test/CodeGen/X86/mult-alt-x86.ll
new file mode 100644
index 000000000000..06175da46454
--- /dev/null
+++ b/test/CodeGen/X86/mult-alt-x86.ll
@@ -0,0 +1,358 @@
+; RUN: llc < %s -march=x86 -mattr=+sse2
+; ModuleID = 'mult-alt-x86.c'
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f80:128:128-v64:64:64-v128:128:128-a0:0:64-f80:32:32-n8:16:32"
+target triple = "i686-pc-win32"
+
+@mout0 = common global i32 0, align 4
+@min1 = common global i32 0, align 4
+@dout0 = common global double 0.000000e+000, align 8
+@din1 = common global double 0.000000e+000, align 8
+@marray = common global [2 x i32] zeroinitializer, align 4
+
+define void @single_R() nounwind {
+entry:
+  %tmp = load i32* @min1, align 4
+  %0 = call i32 asm "foo $1,$0", "=R,R,~{dirflag},~{fpsr},~{flags}"(i32 %tmp) nounwind
+  store i32 %0, i32* @mout0, align 4
+  ret void
+}
+
+define void @single_q() nounwind {
+entry:
+  %tmp = load i32* @min1, align 4
+  %0 = call i32 asm "foo $1,$0", "=q,q,~{dirflag},~{fpsr},~{flags}"(i32 %tmp) nounwind
+  store i32 %0, i32* @mout0, align 4
+  ret void
+}
+
+define void @single_Q() nounwind {
+entry:
+  %tmp = load i32* @min1, align 4
+  %0 = call i32 asm "foo $1,$0", "=Q,Q,~{dirflag},~{fpsr},~{flags}"(i32 %tmp) nounwind
+  store i32 %0, i32* @mout0, align 4
+  ret void
+}
+
+define void @single_a() nounwind {
+entry:
+  %tmp = load i32* @min1, align 4
+  %0 = call i32 asm "foo $1,$0", "={ax},{ax},~{dirflag},~{fpsr},~{flags}"(i32 %tmp) nounwind
+  store i32 %0, i32* @mout0, align 4
+  ret void
+}
+
+define void @single_b() nounwind {
+entry:
+  %tmp = load i32* @min1, align 4
+  %0 = call i32 asm "foo $1,$0", "={bx},{bx},~{dirflag},~{fpsr},~{flags}"(i32 %tmp) nounwind
+  store i32 %0, i32* @mout0, align 4
+  ret void
+}
+
+define void @single_c() nounwind {
+entry:
+  %tmp = load i32* @min1, align 4
+  %0 = call i32 asm "foo $1,$0", "={cx},{cx},~{dirflag},~{fpsr},~{flags}"(i32 %tmp) nounwind
+  store i32 %0, i32* @mout0, align 4
+  ret void
+}
+
+define void @single_d() nounwind {
+entry:
+  %tmp = load i32* @min1, align 4
+  %0 = call i32 asm "foo $1,$0", "={dx},{dx},~{dirflag},~{fpsr},~{flags}"(i32 %tmp) nounwind
+  store i32 %0, i32* @mout0, align 4
+  ret void
+}
+
+define void @single_S() nounwind {
+entry:
+  %tmp = load i32* @min1, align 4
+  %0 = call i32 asm "foo $1,$0", "={si},{si},~{dirflag},~{fpsr},~{flags}"(i32 %tmp) nounwind
+  store i32 %0, i32* @mout0, align 4
+  ret void
+}
+
+define void @single_D() nounwind {
+entry:
+  %tmp = load i32* @min1, align 4
+  %0 = call i32 asm "foo $1,$0", "={di},{di},~{dirflag},~{fpsr},~{flags}"(i32 %tmp) nounwind
+  store i32 %0, i32* @mout0, align 4
+  ret void
+}
+
+define void @single_A() nounwind {
+entry:
+  %tmp = load i32* @min1, align 4
+  %0 = call i32 asm "foo $1,$0", "=A,A,~{dirflag},~{fpsr},~{flags}"(i32 %tmp) nounwind
+  store i32 %0, i32* @mout0, align 4
+  ret void
+}
+
+define void @single_f() nounwind {
+entry:
+  ret void
+}
+
+define void @single_t() nounwind {
+entry:
+  ret void
+}
+
+define void @single_u() nounwind {
+entry:
+  ret void
+}
+
+define void @single_y() nounwind {
+entry:
+  %tmp = load double* @din1, align 8
+  %0 = call double asm "foo $1,$0", "=y,y,~{dirflag},~{fpsr},~{flags}"(double %tmp) nounwind
+  store double %0, double* @dout0, align 8
+  ret void
+}
+
+define void @single_x() nounwind {
+entry:
+  %tmp = load double* @din1, align 8
+  %0 = call double asm "foo $1,$0", "=x,x,~{dirflag},~{fpsr},~{flags}"(double %tmp) nounwind
+  store double %0, double* @dout0, align 8
+  ret void
+}
+
+define void @single_Y0() nounwind {
+entry:
+  ret void
+}
+
+define void @single_I() nounwind {
+entry:
+  call void asm "foo $1,$0", "=*m,I,~{dirflag},~{fpsr},~{flags}"(i32* @mout0, i32 1) nounwind
+  ret void
+}
+
+define void @single_J() nounwind {
+entry:
+  call void asm "foo $1,$0", "=*m,J,~{dirflag},~{fpsr},~{flags}"(i32* @mout0, i32 1) nounwind
+  ret void
+}
+
+define void @single_K() nounwind {
+entry:
+  call void asm "foo $1,$0", "=*m,K,~{dirflag},~{fpsr},~{flags}"(i32* @mout0, i32 1) nounwind
+  ret void
+}
+
+define void @single_L() nounwind {
+entry:
+; Missing lowering support for 'L'.
+;  call void asm "foo $1,$0", "=*m,L,~{dirflag},~{fpsr},~{flags}"(i32* @mout0, i32 1) nounwind
+  ret void
+}
+
+define void @single_M() nounwind {
+entry:
+; Missing lowering support for 'M'.
+;  call void asm "foo $1,$0", "=*m,M,~{dirflag},~{fpsr},~{flags}"(i32* @mout0, i32 1) nounwind
+  ret void
+}
+
+define void @single_N() nounwind {
+entry:
+  call void asm "foo $1,$0", "=*m,N,~{dirflag},~{fpsr},~{flags}"(i32* @mout0, i32 1) nounwind
+  ret void
+}
+
+define void @single_G() nounwind {
+entry:
+; Missing lowering support for 'G'.
+;  call void asm "foo $1,$0", "=*m,G,~{dirflag},~{fpsr},~{flags}"(i32* @mout0, double 1.000000e+000) nounwind
+  ret void
+}
+
+define void @single_C() nounwind {
+entry:
+; Missing lowering support for 'C'.
+;  call void asm "foo $1,$0", "=*m,C,~{dirflag},~{fpsr},~{flags}"(i32* @mout0, double 1.000000e+000) nounwind
+  ret void
+}
+
+define void @single_e() nounwind {
+entry:
+  call void asm "foo $1,$0", "=*m,e,~{dirflag},~{fpsr},~{flags}"(i32* @mout0, i32 1) nounwind
+  ret void
+}
+
+define void @single_Z() nounwind {
+entry:
+  call void asm "foo $1,$0", "=*m,Z,~{dirflag},~{fpsr},~{flags}"(i32* @mout0, i32 1) nounwind
+  ret void
+}
+
+define void @multi_R() nounwind {
+entry:
+  %tmp = load i32* @min1, align 4
+  call void asm "foo $1,$0", "=*r|R|m,r|R|m,~{dirflag},~{fpsr},~{flags}"(i32* @mout0, i32 %tmp) nounwind
+  ret void
+}
+
+define void @multi_q() nounwind {
+entry:
+  %tmp = load i32* @min1, align 4
+  call void asm "foo $1,$0", "=*r|q|m,r|q|m,~{dirflag},~{fpsr},~{flags}"(i32* @mout0, i32 %tmp) nounwind
+  ret void
+}
+
+define void @multi_Q() nounwind {
+entry:
+  %tmp = load i32* @min1, align 4
+  call void asm "foo $1,$0", "=*r|Q|m,r|Q|m,~{dirflag},~{fpsr},~{flags}"(i32* @mout0, i32 %tmp) nounwind
+  ret void
+}
+
+define void @multi_a() nounwind {
+entry:
+  %tmp = load i32* @min1, align 4
+  call void asm "foo $1,$0", "=*r|{ax}|m,r|{ax}|m,~{dirflag},~{fpsr},~{flags}"(i32* @mout0, i32 %tmp) nounwind
+  ret void
+}
+
+define void @multi_b() nounwind {
+entry:
+  %tmp = load i32* @min1, align 4
+  call void asm "foo $1,$0", "=*r|{bx}|m,r|{bx}|m,~{dirflag},~{fpsr},~{flags}"(i32* @mout0, i32 %tmp) nounwind
+  ret void
+}
+
+define void @multi_c() nounwind {
+entry:
+  %tmp = load i32* @min1, align 4
+  call void asm "foo $1,$0", "=*r|{cx}|m,r|{cx}|m,~{dirflag},~{fpsr},~{flags}"(i32* @mout0, i32 %tmp) nounwind
+  ret void
+}
+
+define void @multi_d() nounwind {
+entry:
+  %tmp = load i32* @min1, align 4
+  call void asm "foo $1,$0", "=*r|{dx}|m,r|{dx},~{dirflag},~{fpsr},~{flags}"(i32* @mout0, i32 %tmp) nounwind
+  ret void
+}
+
+define void @multi_S() nounwind {
+entry:
+  %tmp = load i32* @min1, align 4
+  call void asm "foo $1,$0", "=*r|{si}|m,r|{si}|m,~{dirflag},~{fpsr},~{flags}"(i32* @mout0, i32 %tmp) nounwind
+  ret void
+}
+
+define void @multi_D() nounwind {
+entry:
+  %tmp = load i32* @min1, align 4
+  call void asm "foo $1,$0", "=*r|{di}|m,r|{di}|m,~{dirflag},~{fpsr},~{flags}"(i32* @mout0, i32 %tmp) nounwind
+  ret void
+}
+
+define void @multi_A() nounwind {
+entry:
+  %tmp = load i32* @min1, align 4
+  call void asm "foo $1,$0", "=*r|A|m,r|A|m,~{dirflag},~{fpsr},~{flags}"(i32* @mout0, i32 %tmp) nounwind
+  ret void
+}
+
+define void @multi_f() nounwind {
+entry:
+  ret void
+}
+
+define void @multi_t() nounwind {
+entry:
+  ret void
+}
+
+define void @multi_u() nounwind {
+entry:
+  ret void
+}
+
+define void @multi_y() nounwind {
+entry:
+  %tmp = load double* @din1, align 8
+  call void asm "foo $1,$0", "=*r|y|m,r|y|m,~{dirflag},~{fpsr},~{flags}"(double* @dout0, double %tmp) nounwind
+  ret void
+}
+
+define void @multi_x() nounwind {
+entry:
+  %tmp = load double* @din1, align 8
+  call void asm "foo $1,$0", "=*r|x|m,r|x|m,~{dirflag},~{fpsr},~{flags}"(double* @dout0, double %tmp) nounwind
+  ret void
+}
+
+define void @multi_Y0() nounwind {
+entry:
+  ret void
+}
+
+define void @multi_I() nounwind {
+entry:
+  call void asm "foo $1,$0", "=*r|m|m,r|I|m,~{dirflag},~{fpsr},~{flags}"(i32* @mout0, i32 1) nounwind
+  ret void
+}
+
+define void @multi_J() nounwind {
+entry:
+  call void asm "foo $1,$0", "=*r|m|m,r|J|m,~{dirflag},~{fpsr},~{flags}"(i32* @mout0, i32 1) nounwind
+  ret void
+}
+
+define void @multi_K() nounwind {
+entry:
+  call void asm "foo $1,$0", "=*r|m|m,r|K|m,~{dirflag},~{fpsr},~{flags}"(i32* @mout0, i32 1) nounwind
+  ret void
+}
+
+define void @multi_L() nounwind {
+entry:
+; Missing lowering support for 'L'.
+;  call void asm "foo $1,$0", "=*r|m|m,r|L|m,~{dirflag},~{fpsr},~{flags}"(i32* @mout0, i32 1) nounwind
+  ret void
+}
+
+define void @multi_M() nounwind {
+entry:
+; Missing lowering support for 'M'.
+;  call void asm "foo $1,$0", "=*r|m|m,r|M|m,~{dirflag},~{fpsr},~{flags}"(i32* @mout0, i32 1) nounwind
+  ret void
+}
+
+define void @multi_N() nounwind {
+entry:
+  call void asm "foo $1,$0", "=*r|m|m,r|N|m,~{dirflag},~{fpsr},~{flags}"(i32* @mout0, i32 1) nounwind
+  ret void
+}
+
+define void @multi_G() nounwind {
+entry:
+; Missing lowering support for 'G'.
+;  call void asm "foo $1,$0", "=*r|m|m,r|G|m,~{dirflag},~{fpsr},~{flags}"(i32* @mout0, double 1.000000e+000) nounwind
+  ret void
+}
+
+define void @multi_C() nounwind {
+entry:
+; Missing lowering support for 'C'.
+;  call void asm "foo $1,$0", "=*r|m|m,r|C|m,~{dirflag},~{fpsr},~{flags}"(i32* @mout0, double 1.000000e+000) nounwind
+  ret void
+}
+
+define void @multi_e() nounwind {
+entry:
+  call void asm "foo $1,$0", "=*r|m|m,r|e|m,~{dirflag},~{fpsr},~{flags}"(i32* @mout0, i32 1) nounwind
+  ret void
+}
+
+define void @multi_Z() nounwind {
+entry:
+  call void asm "foo $1,$0", "=*r|m|m,r|Z|m,~{dirflag},~{fpsr},~{flags}"(i32* @mout0, i32 1) nounwind
+  ret void
+}
diff --git a/test/CodeGen/X86/narrow-shl-load.ll b/test/CodeGen/X86/narrow-shl-load.ll
new file mode 100644
index 000000000000..ef27cbc3418c
--- /dev/null
+++ b/test/CodeGen/X86/narrow-shl-load.ll
@@ -0,0 +1,83 @@
+; RUN: llc -march=x86-64 < %s | FileCheck %s
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
+target triple = "x86_64-pc-linux-gnu"
+
+; DAGCombiner should fold this code in finite time.
+; rdar://8606584
+
+define void @test1() nounwind readnone {
+bb.nph:
+  br label %while.cond
+
+while.cond:                                       ; preds = %while.cond, %bb.nph
+  %tmp6 = load i32* undef, align 4
+  %and = or i64 undef, undef
+  %conv11 = zext i32 undef to i64
+  %conv14 = zext i32 %tmp6 to i64
+  %shl15 = shl i64 %conv14, 1
+  %shl15.masked = and i64 %shl15, 4294967294
+  %and17 = or i64 %shl15.masked, %conv11
+  %add = add i64 %and17, 1
+  %xor = xor i64 %add, %and
+  %tmp20 = load i64* undef, align 8
+  %add21 = add i64 %xor, %tmp20
+  %conv22 = trunc i64 %add21 to i32
+  store i32 %conv22, i32* undef, align 4
+  br i1 false, label %while.end, label %while.cond
+
+while.end:                                        ; preds = %while.cond
+  ret void
+}
+
+
+; DAGCombiner shouldn't fold the sdiv (ashr) away.
+; rdar://8636812
+; CHECK: test2:
+; CHECK:   sarl
+
+define i32 @test2() nounwind {
+entry:
+  %i = alloca i32, align 4
+  %j = alloca i8, align 1
+  store i32 127, i32* %i, align 4
+  store i8 0, i8* %j, align 1
+  %tmp3 = load i32* %i, align 4
+  %mul = mul nsw i32 %tmp3, 2
+  %conv4 = trunc i32 %mul to i8
+  %conv5 = sext i8 %conv4 to i32
+  %div6 = sdiv i32 %conv5, 2
+  %conv7 = trunc i32 %div6 to i8
+  %conv9 = sext i8 %conv7 to i32
+  %cmp = icmp eq i32 %conv9, -1
+  br i1 %cmp, label %if.then, label %if.end
+
+if.then:                                          ; preds = %entry
+  ret i32 0
+
+if.end:                                           ; preds = %entry
+  call void @abort() noreturn
+  unreachable
+}
+
+declare void @abort() noreturn
+
+declare void @exit(i32) noreturn
+
+; DAG Combiner can't fold this into a load of the 1'th byte.
+; PR8757
+define i32 @test3(i32 *%P) nounwind ssp {
+  volatile store i32 128, i32* %P
+  %tmp4.pre = load i32* %P
+  %phitmp = trunc i32 %tmp4.pre to i16
+  %phitmp13 = shl i16 %phitmp, 8
+  %phitmp14 = ashr i16 %phitmp13, 8
+  %phitmp15 = lshr i16 %phitmp14, 8
+  %phitmp16 = zext i16 %phitmp15 to i32
+  ret i32 %phitmp16
+  
+; CHECK: movl	$128, (%rdi)
+; CHECK-NEXT: movsbl	(%rdi), %eax
+; CHECK-NEXT: movzbl	%ah, %eax
+; CHECK-NEXT: ret
+}
diff --git a/test/CodeGen/X86/negative-sin.ll b/test/CodeGen/X86/negative-sin.ll
index 7842eb8456eb..76e557b84225 100644
--- a/test/CodeGen/X86/negative-sin.ll
+++ b/test/CodeGen/X86/negative-sin.ll
@@ -1,5 +1,5 @@
-; RUN: llc < %s -enable-unsafe-fp-math -march=x86-64 | \
-; RUN:   not egrep {addsd|subsd|xor}
+; RUN: llc < %s -enable-unsafe-fp-math -march=x86-64 | FileCheck %s
+; CHECK-NOT:     {{addsd|subsd|xor}}
 
 declare double @sin(double %f)
 
diff --git a/test/CodeGen/X86/non-globl-eh-frame.ll b/test/CodeGen/X86/non-globl-eh-frame.ll
new file mode 100644
index 000000000000..71349ecafeb6
--- /dev/null
+++ b/test/CodeGen/X86/non-globl-eh-frame.ll
@@ -0,0 +1,24 @@
+; RUN: llc < %s -mtriple x86_64-apple-darwin10 -march x86 | not grep {{.globl\[\[:space:\]\]*__Z4funcv.eh}}
+; RUN: llc < %s -mtriple x86_64-apple-darwin9  -march x86 | FileCheck %s -check-prefix=DARWIN9
+
+%struct.__pointer_type_info_pseudo = type { %struct.__type_info_pseudo, i32, %"struct.std::type_info"* }
+%struct.__type_info_pseudo = type { i8*, i8* }
+%"struct.std::type_info" = type opaque
+
+@.str = private constant [12 x i8] c"hello world\00", align 1
+@_ZTIPc = external constant %struct.__pointer_type_info_pseudo
+
+define void @_Z4funcv() noreturn optsize ssp {
+entry:
+  %0 = tail call i8* @__cxa_allocate_exception(i64 8) nounwind
+  %1 = bitcast i8* %0 to i8**
+  store i8* getelementptr inbounds ([12 x i8]* @.str, i64 0, i64 0), i8** %1, align 8
+  tail call void @__cxa_throw(i8* %0, i8* bitcast (%struct.__pointer_type_info_pseudo* @_ZTIPc to i8*), void (i8*)* null) noreturn
+  unreachable
+}
+
+; DARWIN9: .globl __Z4funcv.eh
+
+declare i8* @__cxa_allocate_exception(i64) nounwind
+
+declare void @__cxa_throw(i8*, i8*, void (i8*)*) noreturn
diff --git a/test/CodeGen/X86/phi-immediate-factoring.ll b/test/CodeGen/X86/phi-immediate-factoring.ll
index 8bed62488070..ef02af2d7851 100644
--- a/test/CodeGen/X86/phi-immediate-factoring.ll
+++ b/test/CodeGen/X86/phi-immediate-factoring.ll
@@ -1,5 +1,5 @@
+; RUN: llc < %s -march=x86 -stats |& grep {Number of blocks eliminated} | grep 6
 ; PR1296
-; RUN: llc < %s -march=x86 | grep {movl	\$1} | count 1
 
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64"
 target triple = "i686-apple-darwin8"
diff --git a/test/CodeGen/X86/phys_subreg_coalesce-2.ll b/test/CodeGen/X86/phys_subreg_coalesce-2.ll
index 23c509c9936b..13e804d94a57 100644
--- a/test/CodeGen/X86/phys_subreg_coalesce-2.ll
+++ b/test/CodeGen/X86/phys_subreg_coalesce-2.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=x86 | grep mov | count 5
+; RUN: llc < %s -march=x86 | grep mov | count 4
 ; PR2659
 
 define i32 @binomial(i32 %n, i32 %k) nounwind {
diff --git a/test/CodeGen/X86/pic.ll b/test/CodeGen/X86/pic.ll
index a1a9759dd36c..dc5fcd78dc84 100644
--- a/test/CodeGen/X86/pic.ll
+++ b/test/CodeGen/X86/pic.ll
@@ -12,7 +12,7 @@ entry:
     ret void
     
 ; LINUX:    test0:
-; LINUX:	call	.L0$pb
+; LINUX:	calll	.L0$pb
 ; LINUX-NEXT: .L0$pb:
 ; LINUX-NEXT:	popl
 ; LINUX:	addl	$_GLOBAL_OFFSET_TABLE_+(.L{{.*}}-.L0$pb),
@@ -34,7 +34,7 @@ entry:
     ret void
     
 ; LINUX: test1:
-; LINUX:	call	.L1$pb
+; LINUX:	calll	.L1$pb
 ; LINUX-NEXT: .L1$pb:
 ; LINUX-NEXT:	popl
 ; LINUX:	addl	$_GLOBAL_OFFSET_TABLE_+(.L{{.*}}-.L1$pb), %eax
@@ -54,12 +54,12 @@ entry:
 ; LINUX: test2:
 ; LINUX: 	pushl	%ebx
 ; LINUX-NEXT: 	subl	$8, %esp
-; LINUX-NEXT: 	call	.L2$pb
+; LINUX-NEXT: 	calll	.L2$pb
 ; LINUX-NEXT: .L2$pb:
 ; LINUX-NEXT: 	popl	%ebx
 ; LINUX: 	addl	$_GLOBAL_OFFSET_TABLE_+(.L{{.*}}-.L2$pb), %ebx
 ; LINUX: 	movl	$40, (%esp)
-; LINUX: 	call	malloc@PLT
+; LINUX: 	calll	malloc@PLT
 ; LINUX: 	addl	$8, %esp
 ; LINUX: 	popl	%ebx
 ; LINUX: 	ret
@@ -75,13 +75,13 @@ entry:
     call void(...)* %tmp1()
     ret void
 ; LINUX: test3:
-; LINUX: 	call	.L3$pb
+; LINUX: 	calll	.L3$pb
 ; LINUX-NEXT: .L3$pb:
 ; LINUX: 	popl
 ; LINUX: 	addl	$_GLOBAL_OFFSET_TABLE_+(.L{{.*}}-.L3$pb), %[[REG3:e..]]
 ; LINUX: 	movl	pfoo@GOT(%[[REG3]]),
-; LINUX: 	call	afoo@PLT
-; LINUX: 	call	*
+; LINUX: 	calll	afoo@PLT
+; LINUX: 	calll	*
 }
 
 declare void(...)* @afoo(...)
@@ -91,10 +91,10 @@ entry:
     call void(...)* @foo()
     ret void
 ; LINUX: test4:
-; LINUX: call	.L4$pb
+; LINUX: calll	.L4$pb
 ; LINUX: popl	%ebx
 ; LINUX: addl	$_GLOBAL_OFFSET_TABLE_+(.L{{.*}}-.L4$pb), %ebx
-; LINUX: call	foo@PLT
+; LINUX: calll	foo@PLT
 }
 
 declare void @foo(...)
@@ -112,7 +112,7 @@ entry:
     ret void
     
 ; LINUX: test5:
-; LINUX: 	call	.L5$pb
+; LINUX: 	calll	.L5$pb
 ; LINUX-NEXT: .L5$pb:
 ; LINUX-NEXT: 	popl	%eax
 ; LINUX: 	addl	$_GLOBAL_OFFSET_TABLE_+(.L{{.*}}-.L5$pb), %eax
@@ -134,7 +134,7 @@ entry:
 ; LINUX: .LCPI6_0:
 
 ; LINUX: test6:
-; LINUX:    call .L6$pb
+; LINUX:    calll .L6$pb
 ; LINUX: .L6$pb:
 ; LINUX:    addl	$_GLOBAL_OFFSET_TABLE_+(.L{{.*}}-.L6$pb), 
 ; LINUX:    fldl	.LCPI6_0@GOTOFF(
@@ -186,7 +186,7 @@ bb12:
     ret void
     
 ; LINUX: test7:
-; LINUX:   call	.L7$pb
+; LINUX:   calll	.L7$pb
 ; LINUX: .L7$pb:
 ; LINUX:   addl	$_GLOBAL_OFFSET_TABLE_+(.L{{.*}}-.L7$pb),
 ; LINUX:   .LJTI7_0@GOTOFF(
diff --git a/test/CodeGen/X86/pic_jumptable.ll b/test/CodeGen/X86/pic_jumptable.ll
index 31071bc74a78..b6761e338aa9 100644
--- a/test/CodeGen/X86/pic_jumptable.ll
+++ b/test/CodeGen/X86/pic_jumptable.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -relocation-model=pic -mtriple=i386-linux-gnu -asm-verbose=false | not grep -F .text
+; RUN: llc < %s -relocation-model=pic -mtriple=i386-linux-gnu -asm-verbose=false | grep -F .text._Z3fooILi1EEvi,"axG",@progbits,_Z3fooILi1EEvi,comdat
 ; RUN: llc < %s -relocation-model=pic -mtriple=i686-apple-darwin -asm-verbose=false | FileCheck %s
 ; RUN: llc < %s                       -mtriple=x86_64-apple-darwin | not grep 'lJTI'
 ; rdar://6971437
diff --git a/test/CodeGen/X86/popcnt.ll b/test/CodeGen/X86/popcnt.ll
new file mode 100644
index 000000000000..430214c73b13
--- /dev/null
+++ b/test/CodeGen/X86/popcnt.ll
@@ -0,0 +1,38 @@
+; RUN: llc -march=x86-64 -mattr=+popcnt < %s | FileCheck %s
+
+define i8 @cnt8(i8 %x) nounwind readnone {
+  %cnt = tail call i8 @llvm.ctpop.i8(i8 %x)
+  ret i8 %cnt
+; CHECK: cnt8:
+; CHECK: popcntw
+; CHECK: ret
+}
+
+define i16 @cnt16(i16 %x) nounwind readnone {
+  %cnt = tail call i16 @llvm.ctpop.i16(i16 %x)
+  ret i16 %cnt
+; CHECK: cnt16:
+; CHECK: popcntw
+; CHECK: ret
+}
+
+define i32 @cnt32(i32 %x) nounwind readnone {
+  %cnt = tail call i32 @llvm.ctpop.i32(i32 %x)
+  ret i32 %cnt
+; CHECK: cnt32:
+; CHECK: popcntl
+; CHECK: ret
+}
+
+define i64 @cnt64(i64 %x) nounwind readnone {
+  %cnt = tail call i64 @llvm.ctpop.i64(i64 %x)
+  ret i64 %cnt
+; CHECK: cnt64:
+; CHECK: popcntq
+; CHECK: ret
+}
+
+declare i8 @llvm.ctpop.i8(i8) nounwind readnone
+declare i16 @llvm.ctpop.i16(i16) nounwind readnone
+declare i32 @llvm.ctpop.i32(i32) nounwind readnone
+declare i64 @llvm.ctpop.i64(i64) nounwind readnone
diff --git a/test/CodeGen/X86/postra-licm.ll b/test/CodeGen/X86/postra-licm.ll
index 97cc7b4977cf..902c69b471db 100644
--- a/test/CodeGen/X86/postra-licm.ll
+++ b/test/CodeGen/X86/postra-licm.ll
@@ -68,7 +68,7 @@ bb26.preheader:                                   ; preds = %imix_test.exit
 
 bb23:                                             ; preds = %imix_test.exit
   unreachable
-; X86-32: %bb26.preheader.bb28_crit_edge
+; X86-32: %bb26.preheader
 ; X86-32: movl -16(%ebp),
 ; X86-32-NEXT: .align 4
 ; X86-32-NEXT: %bb28
diff --git a/test/CodeGen/X86/pr2659.ll b/test/CodeGen/X86/pr2659.ll
index e5daf5da9f3e..54d043d54f83 100644
--- a/test/CodeGen/X86/pr2659.ll
+++ b/test/CodeGen/X86/pr2659.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=x86 -mtriple=i686-apple-darwin9.4.0 | grep movl | count 5
+; RUN: llc < %s -march=x86 -mtriple=i686-apple-darwin9.4.0 | grep movl | count 4
 ; RUN: llc < %s -march=x86 -mtriple=i686-apple-darwin9.4.0 | FileCheck %s
 ; PR2659
 
@@ -14,10 +14,11 @@ forcond.preheader:              ; preds = %entry
   %cmp44 = icmp eq i32 %k, 0            ; <i1> [#uses=1]
   br i1 %cmp44, label %afterfor, label %forbody
 
-; CHECK: %forcond.preheader.forbody_crit_edge
+; CHECK: %forcond.preheader
 ; CHECK: movl $1
 ; CHECK-NOT: xorl
-; CHECK-NEXT: movl
+; CHECK-NOT: movl
+; CHECK-NEXT: je
 
 ifthen:         ; preds = %entry
   ret i32 0
diff --git a/test/CodeGen/X86/pr3522.ll b/test/CodeGen/X86/pr3522.ll
index 7cdeaa099271..da1623721d1c 100644
--- a/test/CodeGen/X86/pr3522.ll
+++ b/test/CodeGen/X86/pr3522.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=x86 -stats |& not grep machine-sink
+; RUN: llc < %s -march=x86 -stats |& not grep {instructions sunk}
 ; PR3522
 
 target triple = "i386-pc-linux-gnu"
diff --git a/test/CodeGen/X86/pr9127.ll b/test/CodeGen/X86/pr9127.ll
new file mode 100644
index 000000000000..45b0c6c78706
--- /dev/null
+++ b/test/CodeGen/X86/pr9127.ll
@@ -0,0 +1,12 @@
+; RUN: llc -march=x86-64 < %s | FileCheck %s
+
+define i8 @foobar(double %d, double* %x) {
+entry:
+  %tmp2 = load double* %x, align 8
+  %cmp = fcmp oeq double %tmp2, %d
+  %conv3 = zext i1 %cmp to i8
+  ret i8 %conv3
+}
+
+; test that the load is folded.
+; CHECK: ucomisd	(%rdi), %xmm0
diff --git a/test/CodeGen/X86/prefetch.ll b/test/CodeGen/X86/prefetch.ll
index fac5915aae88..48d2673e4884 100644
--- a/test/CodeGen/X86/prefetch.ll
+++ b/test/CodeGen/X86/prefetch.ll
@@ -1,11 +1,11 @@
-; RUN: llc < %s -march=x86 -mattr=+sse > %t
-; RUN: grep prefetchnta %t
-; RUN: grep prefetcht0 %t
-; RUN: grep prefetcht1 %t
-; RUN: grep prefetcht2 %t
+; RUN: llc < %s -march=x86 -mattr=+sse | FileCheck %s
 
 define void @t(i8* %ptr) nounwind  {
 entry:
+; CHECK: prefetcht2
+; CHECK: prefetcht1
+; CHECK: prefetcht0
+; CHECK: prefetchnta
 	tail call void @llvm.prefetch( i8* %ptr, i32 0, i32 1 )
 	tail call void @llvm.prefetch( i8* %ptr, i32 0, i32 2 )
 	tail call void @llvm.prefetch( i8* %ptr, i32 0, i32 3 )
diff --git a/test/CodeGen/X86/rodata-relocs.ll b/test/CodeGen/X86/rodata-relocs.ll
index 276f8bb48d06..9291200f0110 100644
--- a/test/CodeGen/X86/rodata-relocs.ll
+++ b/test/CodeGen/X86/rodata-relocs.ll
@@ -8,14 +8,14 @@
 
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
 target triple = "x86_64-unknown-linux-gnu"
-@a = internal constant [2 x i32] [i32 1, i32 2]
-@a1 = constant [2 x i32] [i32 1, i32 2]
-@e = internal constant [2 x [2 x i32]] [[2 x i32] [i32 1, i32 2], [2 x i32] [i32 3, i32 4]], align 16
-@e1 = constant [2 x [2 x i32]] [[2 x i32] [i32 1, i32 2], [2 x i32] [i32 3, i32 4]], align 16
-@p = constant i8* bitcast ([2 x i32]* @a to i8*)
-@t = constant i8* bitcast ([2 x [2 x i32]]* @e to i8*)
-@p1 = constant i8* bitcast ([2 x i32]* @a1 to i8*)
-@t1 = constant i8* bitcast ([2 x [2 x i32]]* @e1 to i8*)
+@a = internal unnamed_addr constant [2 x i32] [i32 1, i32 2]
+@a1 = unnamed_addr constant [2 x i32] [i32 1, i32 2]
+@e = internal  unnamed_addr constant [2 x [2 x i32]] [[2 x i32] [i32 1, i32 2], [2 x i32] [i32 3, i32 4]], align 16
+@e1 = unnamed_addr constant [2 x [2 x i32]] [[2 x i32] [i32 1, i32 2], [2 x i32] [i32 3, i32 4]], align 16
+@p = unnamed_addr constant i8* bitcast ([2 x i32]* @a to i8*)
+@t = unnamed_addr constant i8* bitcast ([2 x [2 x i32]]* @e to i8*)
+@p1 = unnamed_addr constant i8* bitcast ([2 x i32]* @a1 to i8*)
+@t1 = unnamed_addr constant i8* bitcast ([2 x [2 x i32]]* @e1 to i8*)
 @p2 = internal global i8* bitcast([2 x i32]* @a1 to i8*)
 @t2 = internal global i8* bitcast([2 x [2 x i32]]* @e1 to i8*)
 @p3 = internal global i8* bitcast([2 x i32]* @a to i8*)
diff --git a/test/CodeGen/X86/scalar_widen_div.ll b/test/CodeGen/X86/scalar_widen_div.ll
index 77f320f1056e..adc58ac34b9e 100644
--- a/test/CodeGen/X86/scalar_widen_div.ll
+++ b/test/CodeGen/X86/scalar_widen_div.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -disable-mmx -march=x86-64 -mattr=+sse42 |  FileCheck %s
+; RUN: llc < %s -march=x86-64 -mattr=+sse42 |  FileCheck %s
 
 ; Verify when widening a divide/remainder operation, we only generate a
 ; divide/rem per element since divide/remainder can trap.
diff --git a/test/CodeGen/X86/select-aggregate.ll b/test/CodeGen/X86/select-aggregate.ll
deleted file mode 100644
index 44cafe22af14..000000000000
--- a/test/CodeGen/X86/select-aggregate.ll
+++ /dev/null
@@ -1,15 +0,0 @@
-; RUN: llc < %s -march=x86-64 | FileCheck %s
-; PR5757
-
-; CHECK: cmovneq %rdi, %rsi
-; CHECK: movl (%rsi), %eax
-
-%0 = type { i64, i32 }
-
-define i32 @foo(%0* %p, %0* %q, i1 %r) nounwind {
-  %t0 = load %0* %p
-  %t1 = load %0* %q
-  %t4 = select i1 %r, %0 %t0, %0 %t1
-  %t5 = extractvalue %0 %t4, 1
-  ret i32 %t5
-}
diff --git a/test/CodeGen/X86/select-zero-one.ll b/test/CodeGen/X86/select-zero-one.ll
deleted file mode 100644
index c38a02080523..000000000000
--- a/test/CodeGen/X86/select-zero-one.ll
+++ /dev/null
@@ -1,25 +0,0 @@
-; RUN: llc < %s -march=x86 -mattr=+sse2 | not grep cmov
-; RUN: llc < %s -march=x86 -mattr=+sse2 | not grep xor
-; RUN: llc < %s -march=x86 -mattr=+sse2 | grep movzbl | count 1
-
-@r1 = weak global i32 0
-
-define void @t1(i32 %a, double %b) {
-  %tmp114 = fcmp ugt double %b, 1.000000e-09
-  %tmp120 = icmp eq i32 %a, 0		; <i1> [#uses=1]
-  %bothcond = or i1 %tmp114, %tmp120		; <i1> [#uses=1]
-  %storemerge = select i1 %bothcond, i32 0, i32 1		; <i32> [#uses=2]
-  store i32 %storemerge, i32* @r1, align 4
-  ret void
-}
-
-@r2 = weak global i8 0
-
-define void @t2(i32 %a, double %b) {
-  %tmp114 = fcmp ugt double %b, 1.000000e-09
-  %tmp120 = icmp eq i32 %a, 0		; <i1> [#uses=1]
-  %bothcond = or i1 %tmp114, %tmp120		; <i1> [#uses=1]
-  %storemerge = select i1 %bothcond, i8 0, i8 1		; <i32> [#uses=2]
-  store i8 %storemerge, i8* @r2, align 4
-  ret void
-}
diff --git a/test/CodeGen/X86/select.ll b/test/CodeGen/X86/select.ll
index 95ed9e97cdfd..ce04e07854a4 100644
--- a/test/CodeGen/X86/select.ll
+++ b/test/CodeGen/X86/select.ll
@@ -1,63 +1,220 @@
-; RUN: llc < %s -march=x86 -mcpu=pentium 
-; RUN: llc < %s -march=x86 -mcpu=yonah 
-; RUN: llc < %s -march=x86 -mcpu=yonah  | not grep set
+; RUN: llc < %s -mtriple=x86_64-apple-darwin10 | FileCheck %s
+; PR5757
 
-define i1 @boolSel(i1 %A, i1 %B, i1 %C) nounwind {
-	%X = select i1 %A, i1 %B, i1 %C		; <i1> [#uses=1]
-	ret i1 %X
+%0 = type { i64, i32 }
+
+define i32 @test1(%0* %p, %0* %q, i1 %r) nounwind {
+  %t0 = load %0* %p
+  %t1 = load %0* %q
+  %t4 = select i1 %r, %0 %t0, %0 %t1
+  %t5 = extractvalue %0 %t4, 1
+  ret i32 %t5
+; CHECK: test1:
+; CHECK: cmovneq %rdi, %rsi
+; CHECK: movl (%rsi), %eax
+}
+
+
+; PR2139
+define i32 @test2() nounwind {
+entry:
+	%tmp73 = tail call i1 @return_false()		; <i8> [#uses=1]
+	%g.0 = select i1 %tmp73, i16 0, i16 -480		; <i16> [#uses=2]
+	%tmp7778 = sext i16 %g.0 to i32		; <i32> [#uses=1]
+	%tmp80 = shl i32 %tmp7778, 3		; <i32> [#uses=2]
+	%tmp87 = icmp sgt i32 %tmp80, 32767		; <i1> [#uses=1]
+	br i1 %tmp87, label %bb90, label %bb91
+bb90:		; preds = %bb84, %bb72
+	unreachable
+bb91:		; preds = %bb84
+	ret i32 0
+; CHECK: test2:
+; CHECK: movnew
+; CHECK: movswl
+}
+
+declare i1 @return_false()
+
+
+;; Select between two floating point constants.
+define float @test3(i32 %x) nounwind readnone {
+entry:
+	%0 = icmp eq i32 %x, 0		; <i1> [#uses=1]
+	%iftmp.0.0 = select i1 %0, float 4.200000e+01, float 2.300000e+01		; <float> [#uses=1]
+	ret float %iftmp.0.0
+; CHECK: test3:
+; CHECK: movss	{{.*}},4), %xmm0
+}
+
+define signext i8 @test4(i8* nocapture %P, double %F) nounwind readonly {
+entry:
+	%0 = fcmp olt double %F, 4.200000e+01		; <i1> [#uses=1]
+	%iftmp.0.0 = select i1 %0, i32 4, i32 0		; <i32> [#uses=1]
+	%1 = getelementptr i8* %P, i32 %iftmp.0.0		; <i8*> [#uses=1]
+	%2 = load i8* %1, align 1		; <i8> [#uses=1]
+	ret i8 %2
+; CHECK: test4:
+; CHECK: movsbl	({{.*}},4), %eax
+}
+
+define void @test5(i1 %c, <2 x i16> %a, <2 x i16> %b, <2 x i16>* %p) nounwind {
+  %x = select i1 %c, <2 x i16> %a, <2 x i16> %b
+  store <2 x i16> %x, <2 x i16>* %p
+  ret void
+; CHECK: test5:
 }
 
-define i8 @byteSel(i1 %A, i8 %B, i8 %C) nounwind {
-	%X = select i1 %A, i8 %B, i8 %C		; <i8> [#uses=1]
-	ret i8 %X
+define void @test6(i32 %C, <4 x float>* %A, <4 x float>* %B) nounwind {
+        %tmp = load <4 x float>* %A             ; <<4 x float>> [#uses=1]
+        %tmp3 = load <4 x float>* %B            ; <<4 x float>> [#uses=2]
+        %tmp9 = fmul <4 x float> %tmp3, %tmp3            ; <<4 x float>> [#uses=1]
+        %tmp.upgrd.1 = icmp eq i32 %C, 0                ; <i1> [#uses=1]
+        %iftmp.38.0 = select i1 %tmp.upgrd.1, <4 x float> %tmp9, <4 x float> %tmp               ; <<4 x float>> [#uses=1]
+        store <4 x float> %iftmp.38.0, <4 x float>* %A
+        ret void
+; Verify that the fmul gets sunk into the one part of the diamond where it is
+; needed.
+; CHECK: test6:
+; CHECK: jne
+; CHECK: mulps
+; CHECK: ret
+; CHECK: ret
 }
 
-define i16 @shortSel(i1 %A, i16 %B, i16 %C) nounwind {
-	%X = select i1 %A, i16 %B, i16 %C		; <i16> [#uses=1]
-	ret i16 %X
+; Select with fp80's
+define x86_fp80 @test7(i32 %tmp8) nounwind {
+        %tmp9 = icmp sgt i32 %tmp8, -1          ; <i1> [#uses=1]
+        %retval = select i1 %tmp9, x86_fp80 0xK4005B400000000000000, x86_fp80 0xK40078700000000000000
+        ret x86_fp80 %retval
+; CHECK: test7:
+; CHECK: leaq
+; CHECK: fldt (%r{{.}}x,%r{{.}}x)
 }
 
-define i32 @intSel(i1 %A, i32 %B, i32 %C) nounwind {
-	%X = select i1 %A, i32 %B, i32 %C		; <i32> [#uses=1]
-	ret i32 %X
+; widening select v6i32 and then a sub
+define void @test8(i1 %c, <6 x i32>* %dst.addr, <6 x i32> %src1,<6 x i32> %src2) nounwind {
+	%x = select i1 %c, <6 x i32> %src1, <6 x i32> %src2
+	%val = sub <6 x i32> %x, < i32 1, i32 1, i32 1, i32 1, i32 1, i32 1 >
+	store <6 x i32> %val, <6 x i32>* %dst.addr
+	ret void
+        
+; CHECK: test8:
 }
 
-define i64 @longSel(i1 %A, i64 %B, i64 %C) nounwind {
-	%X = select i1 %A, i64 %B, i64 %C		; <i64> [#uses=1]
-	ret i64 %X
+
+;; Test integer select between values and constants.
+
+define i64 @test9(i64 %x, i64 %y) nounwind readnone ssp noredzone {
+  %cmp = icmp ne i64 %x, 0
+  %cond = select i1 %cmp, i64 %y, i64 -1
+  ret i64 %cond
+; CHECK: test9:
+; CHECK: cmpq	$1, %rdi
+; CHECK: sbbq	%rax, %rax
+; CHECK: orq	%rsi, %rax
+; CHECK: ret
+}
+
+;; Same as test9
+define i64 @test9a(i64 %x, i64 %y) nounwind readnone ssp noredzone {
+  %cmp = icmp eq i64 %x, 0
+  %cond = select i1 %cmp, i64 -1, i64 %y
+  ret i64 %cond
+; CHECK: test9a:
+; CHECK: cmpq	$1, %rdi
+; CHECK: sbbq	%rax, %rax
+; CHECK: orq	%rsi, %rax
+; CHECK: ret
+}
+
+define i64 @test9b(i64 %x, i64 %y) nounwind readnone ssp noredzone {
+  %cmp = icmp eq i64 %x, 0
+  %A = sext i1 %cmp to i64
+  %cond = or i64 %y, %A
+  ret i64 %cond
+; CHECK: test9b:
+; CHECK: cmpq	$1, %rdi
+; CHECK: sbbq	%rax, %rax
+; CHECK: orq	%rsi, %rax
+; CHECK: ret
 }
 
-define double @doubleSel(i1 %A, double %B, double %C) nounwind {
-	%X = select i1 %A, double %B, double %C		; <double> [#uses=1]
-	ret double %X
+;; Select between -1 and 1.
+define i64 @test10(i64 %x, i64 %y) nounwind readnone ssp noredzone {
+  %cmp = icmp eq i64 %x, 0
+  %cond = select i1 %cmp, i64 -1, i64 1
+  ret i64 %cond
+; CHECK: test10:
+; CHECK: cmpq	$1, %rdi
+; CHECK: sbbq	%rax, %rax
+; CHECK: orq	$1, %rax
+; CHECK: ret
 }
 
-define i8 @foldSel(i1 %A, i8 %B, i8 %C) nounwind {
-	%Cond = icmp slt i8 %B, %C		; <i1> [#uses=1]
-	%X = select i1 %Cond, i8 %B, i8 %C		; <i8> [#uses=1]
-	ret i8 %X
+
+
+define i64 @test11(i64 %x, i64 %y) nounwind readnone ssp noredzone {
+  %cmp = icmp eq i64 %x, 0
+  %cond = select i1 %cmp, i64 %y, i64 -1
+  ret i64 %cond
+; CHECK: test11:
+; CHECK: cmpq	$1, %rdi
+; CHECK: sbbq	%rax, %rax
+; CHECK: notq %rax
+; CHECK: orq	%rsi, %rax
+; CHECK: ret
 }
 
-define i32 @foldSel2(i1 %A, i32 %B, i32 %C) nounwind {
-	%Cond = icmp eq i32 %B, %C		; <i1> [#uses=1]
-	%X = select i1 %Cond, i32 %B, i32 %C		; <i32> [#uses=1]
-	ret i32 %X
+define i64 @test11a(i64 %x, i64 %y) nounwind readnone ssp noredzone {
+  %cmp = icmp ne i64 %x, 0
+  %cond = select i1 %cmp, i64 -1, i64 %y
+  ret i64 %cond
+; CHECK: test11a:
+; CHECK: cmpq	$1, %rdi
+; CHECK: sbbq	%rax, %rax
+; CHECK: notq %rax
+; CHECK: orq	%rsi, %rax
+; CHECK: ret
 }
 
-define i32 @foldSel2a(i1 %A, i32 %B, i32 %C, double %X, double %Y) nounwind {
-	%Cond = fcmp olt double %X, %Y		; <i1> [#uses=1]
-	%X.upgrd.1 = select i1 %Cond, i32 %B, i32 %C		; <i32> [#uses=1]
-	ret i32 %X.upgrd.1
+
+declare noalias i8* @_Znam(i64) noredzone
+
+define noalias i8* @test12(i64 %count) nounwind ssp noredzone {
+entry:
+  %A = tail call { i64, i1 } @llvm.umul.with.overflow.i64(i64 %count, i64 4)
+  %B = extractvalue { i64, i1 } %A, 1
+  %C = extractvalue { i64, i1 } %A, 0
+  %D = select i1 %B, i64 -1, i64 %C
+  %call = tail call noalias i8* @_Znam(i64 %D) nounwind noredzone
+  ret i8* %call
+; CHECK: test12:
+; CHECK: mulq
+; CHECK: movq $-1, %rdi
+; CHECK: cmovnoq	%rax, %rdi
+; CHECK: jmp	__Znam
 }
 
-define float @foldSel3(i1 %A, float %B, float %C, i32 %X, i32 %Y) nounwind {
-	%Cond = icmp ult i32 %X, %Y		; <i1> [#uses=1]
-	%X.upgrd.2 = select i1 %Cond, float %B, float %C		; <float> [#uses=1]
-	ret float %X.upgrd.2
+declare { i64, i1 } @llvm.umul.with.overflow.i64(i64, i64) nounwind readnone
+
+define i32 @test13(i32 %a, i32 %b) nounwind {
+  %c = icmp ult i32 %a, %b
+  %d = sext i1 %c to i32
+  ret i32 %d
+; CHECK: test13:
+; CHECK: cmpl
+; CHECK-NEXT: sbbl
+; CHECK-NEXT: ret
 }
 
-define float @nofoldSel4(i1 %A, float %B, float %C, i32 %X, i32 %Y) nounwind {
-	%Cond = icmp slt i32 %X, %Y		; <i1> [#uses=1]
-	%X.upgrd.3 = select i1 %Cond, float %B, float %C		; <float> [#uses=1]
-	ret float %X.upgrd.3
+define i32 @test14(i32 %a, i32 %b) nounwind {
+  %c = icmp uge i32 %a, %b
+  %d = sext i1 %c to i32
+  ret i32 %d
+; CHECK: test14:
+; CHECK: cmpl
+; CHECK-NEXT: sbbl
+; CHECK-NEXT: notl
+; CHECK-NEXT: ret
 }
+
diff --git a/test/CodeGen/X86/sext-select.ll b/test/CodeGen/X86/sext-select.ll
deleted file mode 100644
index 4aca0407b36f..000000000000
--- a/test/CodeGen/X86/sext-select.ll
+++ /dev/null
@@ -1,23 +0,0 @@
-; RUN: llc < %s -march=x86 | grep movsw
-; PR2139
-
-declare void @abort()
-
-define i32 @main() {
-entry:
-	%tmp73 = tail call i1 @return_false()		; <i8> [#uses=1]
-	%g.0 = select i1 %tmp73, i16 0, i16 -480		; <i16> [#uses=2]
-	%tmp7778 = sext i16 %g.0 to i32		; <i32> [#uses=1]
-	%tmp80 = shl i32 %tmp7778, 3		; <i32> [#uses=2]
-	%tmp87 = icmp sgt i32 %tmp80, 32767		; <i1> [#uses=1]
-	br i1 %tmp87, label %bb90, label %bb91
-bb90:		; preds = %bb84, %bb72
-	tail call void @abort()
-	unreachable
-bb91:		; preds = %bb84
-	ret i32 0
-}
-
-define i1 @return_false() {
-	ret i1 0
-}
diff --git a/test/CodeGen/X86/shift-folding.ll b/test/CodeGen/X86/shift-folding.ll
index 48ca36ca9813..d9c3061ff687 100644
--- a/test/CodeGen/X86/shift-folding.ll
+++ b/test/CodeGen/X86/shift-folding.ll
@@ -1,21 +1,21 @@
 ; RUN: llc < %s -march=x86 | \
 ; RUN:   grep {s\[ah\]\[rl\]l} | count 1
 
-define i32* @test1(i32* %P, i32 %X) {
+define i32* @test1(i32* %P, i32 %X) nounwind {
         %Y = lshr i32 %X, 2             ; <i32> [#uses=1]
         %gep.upgrd.1 = zext i32 %Y to i64               ; <i64> [#uses=1]
         %P2 = getelementptr i32* %P, i64 %gep.upgrd.1           ; <i32*> [#uses=1]
         ret i32* %P2
 }
 
-define i32* @test2(i32* %P, i32 %X) {
+define i32* @test2(i32* %P, i32 %X) nounwind {
         %Y = shl i32 %X, 2              ; <i32> [#uses=1]
         %gep.upgrd.2 = zext i32 %Y to i64               ; <i64> [#uses=1]
         %P2 = getelementptr i32* %P, i64 %gep.upgrd.2           ; <i32*> [#uses=1]
         ret i32* %P2
 }
 
-define i32* @test3(i32* %P, i32 %X) {
+define i32* @test3(i32* %P, i32 %X) nounwind {
         %Y = ashr i32 %X, 2             ; <i32> [#uses=1]
         %P2 = getelementptr i32* %P, i32 %Y             ; <i32*> [#uses=1]
         ret i32* %P2
diff --git a/test/CodeGen/X86/sibcall-3.ll b/test/CodeGen/X86/sibcall-3.ll
index f0d66cf7b696..f97abe002957 100644
--- a/test/CodeGen/X86/sibcall-3.ll
+++ b/test/CodeGen/X86/sibcall-3.ll
@@ -3,7 +3,7 @@
 
 define void @t1(i8* inreg %dst, i8* inreg %src, i8* inreg %len) nounwind {
 ; CHECK: t1:
-; CHECK: call 0
+; CHECK: calll 0
   tail call void null(i8* inreg %dst, i8* inreg %src, i8* inreg %len) nounwind
   ret void
 }
diff --git a/test/CodeGen/X86/sibcall-5.ll b/test/CodeGen/X86/sibcall-5.ll
new file mode 100644
index 000000000000..9d74121b4301
--- /dev/null
+++ b/test/CodeGen/X86/sibcall-5.ll
@@ -0,0 +1,31 @@
+; RUN: llc < %s -mtriple=i386-apple-darwin8 -mattr=+sse2  | FileCheck %s --check-prefix=X32
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+sse2 | FileCheck %s --check-prefix=X64
+
+; Sibcall optimization of expanded libcalls.
+; rdar://8707777
+
+define double @foo(double %a) nounwind readonly ssp {
+entry:
+; X32: foo:
+; X32: jmp _sin$stub
+
+; X64: foo:
+; X64: jmp _sin
+  %0 = tail call double @sin(double %a) nounwind readonly
+  ret double %0
+}
+
+define float @bar(float %a) nounwind readonly ssp {
+; X32: bar:
+; X32: jmp _sinf$stub
+
+; X64: bar:
+; X64: jmp _sinf
+entry:
+  %0 = tail call float @sinf(float %a) nounwind readonly
+  ret float %0
+}
+
+declare float @sinf(float) nounwind readonly
+
+declare double @sin(double) nounwind readonly
diff --git a/test/CodeGen/X86/sibcall.ll b/test/CodeGen/X86/sibcall.ll
index a3c9957be34e..de2a81e80bd4 100644
--- a/test/CodeGen/X86/sibcall.ll
+++ b/test/CodeGen/X86/sibcall.ll
@@ -1,7 +1,5 @@
-; RUN: llc < %s -march=x86    -mattr=+sse2 -asm-verbose=false | FileCheck %s -check-prefix=32
-; RUN: llc < %s -march=x86-64 -mattr=+sse2 -asm-verbose=false | FileCheck %s -check-prefix=64
-; Darwin 8 generates stubs, which don't match
-; XFAIL: apple-darwin8
+; RUN: llc < %s -mtriple=i686-linux   -mattr=+sse2 -asm-verbose=false | FileCheck %s -check-prefix=32
+; RUN: llc < %s -mtriple=x86_64-linux -mattr=+sse2 -asm-verbose=false | FileCheck %s -check-prefix=64
 
 define void @t1(i32 %x) nounwind ssp {
 entry:
@@ -45,7 +43,7 @@ declare i32 @foo3()
 define void @t4(void (i32)* nocapture %x) nounwind ssp {
 entry:
 ; 32: t4:
-; 32: call *
+; 32: calll *
 ; FIXME: gcc can generate a tailcall for this. But it's tricky.
 
 ; 64: t4:
@@ -71,7 +69,7 @@ entry:
 define i32 @t6(i32 %x) nounwind ssp {
 entry:
 ; 32: t6:
-; 32: call {{_?}}t6
+; 32: calll {{_?}}t6
 ; 32: jmp {{_?}}bar
 
 ; 64: t6:
@@ -108,7 +106,7 @@ declare i32 @bar2(i32, i32, i32)
 define signext i16 @t8() nounwind ssp {
 entry:
 ; 32: t8:
-; 32: call {{_?}}bar3
+; 32: calll {{_?}}bar3
 
 ; 64: t8:
 ; 64: callq {{_?}}bar3
@@ -121,7 +119,7 @@ declare signext i16 @bar3()
 define signext i16 @t9(i32 (i32)* nocapture %x) nounwind ssp {
 entry:
 ; 32: t9:
-; 32: call *
+; 32: calll *
 
 ; 64: t9:
 ; 64: callq *
@@ -133,7 +131,7 @@ entry:
 define void @t10() nounwind ssp {
 entry:
 ; 32: t10:
-; 32: call
+; 32: calll
 
 ; 64: t10:
 ; 64: callq
@@ -205,12 +203,12 @@ declare i32 @foo6(i32, i32, %struct.t* byval align 4)
 define %struct.ns* @t13(%struct.cp* %yy) nounwind ssp {
 ; 32: t13:
 ; 32-NOT: jmp
-; 32: call
+; 32: calll
 ; 32: ret
 
 ; 64: t13:
 ; 64-NOT: jmp
-; 64: call
+; 64: callq
 ; 64: ret
 entry:
   %0 = tail call fastcc %struct.ns* @foo7(%struct.cp* byval align 4 %yy, i8 signext 0) nounwind
@@ -248,7 +246,7 @@ entry:
 
 define void @t15(%struct.foo* noalias sret %agg.result) nounwind  {
 ; 32: t15:
-; 32: call {{_?}}f
+; 32: calll {{_?}}f
 ; 32: ret $4
 
 ; 64: t15:
@@ -263,7 +261,7 @@ declare void @f(%struct.foo* noalias sret) nounwind
 define void @t16() nounwind ssp {
 entry:
 ; 32: t16:
-; 32: call {{_?}}bar4
+; 32: calll {{_?}}bar4
 ; 32: fstp
 
 ; 64: t16:
@@ -293,7 +291,7 @@ declare void @bar5(...)
 define void @t18() nounwind ssp {
 entry:
 ; 32: t18:
-; 32: call {{_?}}bar6
+; 32: calll {{_?}}bar6
 ; 32: fstp %st(0)
 
 ; 64: t18:
@@ -309,7 +307,7 @@ define void @t19() alignstack(32) nounwind {
 entry:
 ; CHECK: t19:
 ; CHECK: andl $-32
-; CHECK: call {{_?}}foo
+; CHECK: calll {{_?}}foo
   tail call void @foo() nounwind
   ret void
 }
@@ -323,7 +321,7 @@ declare void @foo()
 define double @t20(double %x) nounwind {
 entry:
 ; 32: t20:
-; 32: call {{_?}}foo20
+; 32: calll {{_?}}foo20
 ; 32: fldl (%esp)
 
 ; 64: t20:
diff --git a/test/CodeGen/X86/sink-hoist.ll b/test/CodeGen/X86/sink-hoist.ll
index acba5288c0d1..31f41eebc5aa 100644
--- a/test/CodeGen/X86/sink-hoist.ll
+++ b/test/CodeGen/X86/sink-hoist.ll
@@ -6,10 +6,11 @@
 ; that it's conditionally evaluated.
 
 ; CHECK: foo:
-; CHECK:      divsd
 ; CHECK-NEXT: testb $1, %dil
-; CHECK-NEXT: jne
+; CHECK-NEXT: je
 ; CHECK-NEXT: divsd
+; CHECK-NEXT: ret
+; CHECK:      divsd
 
 define double @foo(double %x, double %y, i1 %c) nounwind {
   %a = fdiv double %x, 3.2
@@ -18,6 +19,24 @@ define double @foo(double %x, double %y, i1 %c) nounwind {
   ret double %z
 }
 
+; Make sure the critical edge is broken so the divsd is sunken below
+; the conditional branch.
+; rdar://8454886
+
+; CHECK: split:
+; CHECK-NEXT: testb $1, %dil
+; CHECK-NEXT: je
+; CHECK-NEXT: divsd
+; CHECK-NEXT: ret
+; CHECK:      movaps
+; CHECK-NEXT: ret
+define double @split(double %x, double %y, i1 %c) nounwind {
+  %a = fdiv double %x, 3.2
+  %z = select i1 %c, double %a, double %y
+  ret double %z
+}
+
+
 ; Hoist floating-point constant-pool loads out of loops.
 
 ; CHECK: bar:
@@ -68,9 +87,9 @@ return:
 ; Codegen should hoist and CSE these constants.
 
 ; CHECK: vv:
-; CHECK: LCPI2_0(%rip), %xmm0
-; CHECK: LCPI2_1(%rip), %xmm1
-; CHECK: LCPI2_2(%rip), %xmm2
+; CHECK: LCPI3_0(%rip), %xmm0
+; CHECK: LCPI3_1(%rip), %xmm1
+; CHECK: LCPI3_2(%rip), %xmm2
 ; CHECK: align
 ; CHECK-NOT: LCPI
 ; CHECK: ret
diff --git a/test/CodeGen/X86/split-select.ll b/test/CodeGen/X86/split-select.ll
deleted file mode 100644
index 07d4d52f97a3..000000000000
--- a/test/CodeGen/X86/split-select.ll
+++ /dev/null
@@ -1,7 +0,0 @@
-; RUN: llc < %s -march=x86-64 | grep test | count 1
-
-define void @foo(i1 %c, <2 x i16> %a, <2 x i16> %b, <2 x i16>* %p) {
-  %x = select i1 %c, <2 x i16> %a, <2 x i16> %b
-  store <2 x i16> %x, <2 x i16>* %p
-  ret void
-}
diff --git a/test/CodeGen/X86/sse-align-11.ll b/test/CodeGen/X86/sse-align-11.ll
index 3cc83ca0db2a..9f5d4b40d61a 100644
--- a/test/CodeGen/X86/sse-align-11.ll
+++ b/test/CodeGen/X86/sse-align-11.ll
@@ -1,5 +1,6 @@
 ; RUN: llc < %s -march=x86 -mcpu=yonah -mtriple=i686-apple-darwin8 | grep movaps
-; RUN: llc < %s -march=x86 -mcpu=yonah -mtriple=i686-linux-gnu | grep movups
+; RUN: llc < %s -march=x86 -mcpu=yonah -mtriple=i686-linux-gnu | grep movaps
+; PR8969 - make 32-bit linux have a 16-byte aligned stack
 
 define <4 x float> @foo(float %a, float %b, float %c, float %d) nounwind {
 entry:
diff --git a/test/CodeGen/X86/sse2.ll b/test/CodeGen/X86/sse2.ll
index 6fc019071f8b..5c3e32f016a7 100644
--- a/test/CodeGen/X86/sse2.ll
+++ b/test/CodeGen/X86/sse2.ll
@@ -192,3 +192,33 @@ entry:
 ; CHECK: test15:
 ; CHECK: 	movhlps	%xmm1, %xmm0
 }
+
+; PR8900
+; CHECK: test16:
+; CHECK: unpcklpd
+; CHECK: ret
+
+define  <2 x double> @test16(<4 x double> * nocapture %srcA, <2 x double>* nocapture %dst) {
+  %i5 = getelementptr inbounds <4 x double>* %srcA, i32 3
+  %i6 = load <4 x double>* %i5, align 32
+  %i7 = shufflevector <4 x double> %i6, <4 x double> undef, <2 x i32> <i32 0, i32 2>
+  ret <2 x double> %i7
+}
+
+; PR9009
+define fastcc void @test17() nounwind {
+entry:
+  %0 = insertelement <4 x i32> undef, i32 undef, i32 1
+  %1 = shufflevector <4 x i32> <i32 undef, i32 undef, i32 32768, i32 32768>, <4 x i32> %0, <4 x i32> <i32 4, i32 5, i32 2, i32 3>
+  %2 = bitcast <4 x i32> %1 to <4 x float>
+  store <4 x float> %2, <4 x float> * undef
+  ret void
+}
+
+; PR9210
+define <4 x float> @f(<4 x double>) nounwind {
+entry:
+ %double2float.i = fptrunc <4 x double> %0 to <4 x float>
+ ret <4 x float> %double2float.i
+}
+
diff --git a/test/CodeGen/X86/sse3.ll b/test/CodeGen/X86/sse3.ll
index 206cdff1ba7d..9a60091a0cf0 100644
--- a/test/CodeGen/X86/sse3.ll
+++ b/test/CodeGen/X86/sse3.ll
@@ -169,7 +169,7 @@ define internal void @t10() nounwind {
         ret void
 ; X64: 	t10:
 ; X64: 		pextrw	$4, %xmm0, %eax
-; X64: 		movlhps	%xmm1, %xmm1
+; X64: 		unpcklpd %xmm1, %xmm1
 ; X64: 		pshuflw	$8, %xmm1, %xmm1
 ; X64: 		pinsrw	$2, %eax, %xmm1
 ; X64: 		pextrw	$6, %xmm0, %eax
@@ -260,3 +260,18 @@ entry:
 ; X64: 		pinsrw	$1, %eax, %xmm0
 ; X64: 		ret
 }
+
+; rdar://8520311
+define <4 x i32> @t17() nounwind {
+entry:
+; X64: t17:
+; X64:          movddup (%rax), %xmm0
+  %tmp1 = load <4 x float>* undef, align 16
+  %tmp2 = shufflevector <4 x float> %tmp1, <4 x float> undef, <4 x i32> <i32 4, i32 1, i32 2, i32 3>
+  %tmp3 = load <4 x float>* undef, align 16
+  %tmp4 = shufflevector <4 x float> %tmp2, <4 x float> undef, <4 x i32> <i32 undef, i32 undef, i32 0, i32 1>
+  %tmp5 = bitcast <4 x float> %tmp3 to <4 x i32>
+  %tmp6 = shufflevector <4 x i32> %tmp5, <4 x i32> undef, <4 x i32> <i32 undef, i32 undef, i32 0, i32 1>
+  %tmp7 = and <4 x i32> %tmp6, <i32 undef, i32 undef, i32 -1, i32 0>
+  ret <4 x i32> %tmp7
+}
diff --git a/test/CodeGen/X86/sse41.ll b/test/CodeGen/X86/sse41.ll
index 3a14fa26300c..2ac4cb435a75 100644
--- a/test/CodeGen/X86/sse41.ll
+++ b/test/CodeGen/X86/sse41.ll
@@ -200,11 +200,11 @@ define i32 @ptestz_2(<4 x float> %t1, <4 x float> %t2) nounwind {
         ret i32 %tmp1
 ; X32: _ptestz_2:
 ; X32:    ptest 	%xmm1, %xmm0
-; X32:    setb	%al
+; X32:    sbbl	%eax
 
 ; X64: _ptestz_2:
 ; X64:    ptest 	%xmm1, %xmm0
-; X64:    setb	%al
+; X64:    sbbl	%eax
 }
 
 define i32 @ptestz_3(<4 x float> %t1, <4 x float> %t2) nounwind {
diff --git a/test/CodeGen/X86/stack-align.ll b/test/CodeGen/X86/stack-align.ll
index 8ca0b12b547f..793c0267124c 100644
--- a/test/CodeGen/X86/stack-align.ll
+++ b/test/CodeGen/X86/stack-align.ll
@@ -7,7 +7,7 @@ target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f3
 target triple = "i686-apple-darwin8"
 @G = external global double
 
-define void @test({ double, double }* byval  %z, double* %P) {
+define void @test({ double, double }* byval  %z, double* %P) nounwind {
 entry:
 	%tmp3 = load double* @G, align 16		; <double> [#uses=1]
 	%tmp4 = tail call double @fabs( double %tmp3 )		; <double> [#uses=1]
@@ -21,14 +21,14 @@ entry:
 	ret void
 }
 
-define void @test2() alignstack(16) {
+define void @test2() alignstack(16) nounwind {
 entry:
     ; CHECK: andl{{.*}}$-16, %esp
     ret void
 }
 
 ; Use a call to force a spill.
-define <2 x double> @test3(<2 x double> %x, <2 x double> %y) alignstack(32) {
+define <2 x double> @test3(<2 x double> %x, <2 x double> %y) alignstack(32) nounwind {
 entry:
     ; CHECK: andl{{.*}}$-32, %esp
     call void @test2()
@@ -38,3 +38,14 @@ entry:
 
 declare double @fabs(double)
 
+; The pointer is already known aligned, so and x,-16 is eliminable.
+define i32 @test4() nounwind {
+entry:
+  %buffer = alloca [2048 x i8], align 16
+  %0 = ptrtoint [2048 x i8]* %buffer to i32
+  %and = and i32 %0, -16
+  ret i32 %and
+; CHECK: test4:
+; CHECK-NOT: and
+; CHECK: ret
+}
diff --git a/test/CodeGen/X86/stdcall-notailcall.ll b/test/CodeGen/X86/stdcall-notailcall.ll
new file mode 100644
index 000000000000..8e33c30bf293
--- /dev/null
+++ b/test/CodeGen/X86/stdcall-notailcall.ll
@@ -0,0 +1,13 @@
+; RUN: llc -mtriple=i386-apple-darwin11 -O2 < %s | FileCheck %s
+
+%struct.I = type { i32 (...)** }
+define x86_stdcallcc void @bar(%struct.I* nocapture %this) ssp align 2 {
+; CHECK: bar:
+; CHECK-NOT: jmp
+; CHECK: ret $4
+entry:
+  tail call void @foo()
+  ret void
+}
+
+declare void @foo()
diff --git a/test/CodeGen/X86/store-narrow.ll b/test/CodeGen/X86/store-narrow.ll
index abc5174c98de..0dd228eb145f 100644
--- a/test/CodeGen/X86/store-narrow.ll
+++ b/test/CodeGen/X86/store-narrow.ll
@@ -152,3 +152,17 @@ define void @test9() nounwind {
   store i32 %or, i32* @g_16
   ret void
 }
+
+; rdar://8494845 + PR8244
+; X64: test10:
+; X64-NEXT: movsbl	(%rdi), %eax
+; X64-NEXT: shrl	$8, %eax
+; X64-NEXT: ret
+define i8 @test10(i8* %P) nounwind ssp {
+entry:
+  %tmp = load i8* %P, align 1
+  %conv = sext i8 %tmp to i32
+  %shr3 = lshr i32 %conv, 8
+  %conv2 = trunc i32 %shr3 to i8
+  ret i8 %conv2
+}
diff --git a/test/CodeGen/X86/store_op_load_fold2.ll b/test/CodeGen/X86/store_op_load_fold2.ll
index 46e59e95e53f..11686227ab9c 100644
--- a/test/CodeGen/X86/store_op_load_fold2.ll
+++ b/test/CodeGen/X86/store_op_load_fold2.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=x86 -x86-asm-syntax=intel | FileCheck %s
+; RUN: llc < %s -mtriple=i686-linux -x86-asm-syntax=intel | FileCheck %s
 
 target datalayout = "e-p:32:32"
         %struct.Macroblock = type { i32, i32, i32, i32, i32, [8 x i32], %struct.Macroblock*, %struct.Macroblock*, i32, [2 x [4 x [4 x [2 x i32]]]], [16 x i8], [16 x i8], i32, i64, [4 x i32], [4 x i32], i64, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i16, double, i32, i32, i32, i32, i32, i32, i32, i32, i32 }
diff --git a/test/CodeGen/X86/switch-bt.ll b/test/CodeGen/X86/switch-bt.ll
index ed3266ec422b..9f491d452fa8 100644
--- a/test/CodeGen/X86/switch-bt.ll
+++ b/test/CodeGen/X86/switch-bt.ll
@@ -49,3 +49,33 @@ sw.epilog:                                        ; preds = %sw.default, %sw.bb4
 }
 
 declare void @foo(i32)
+
+; Don't zero extend the test operands to pointer type if it can be avoided.
+; rdar://8781238
+define void @test2(i32 %x) nounwind ssp {
+; CHECK: test2:
+; CHECK: cmpl $6
+; CHECK: ja
+
+; CHECK-NEXT: movl $91
+; CHECK-NOT: movl
+; CHECK-NEXT: btl
+; CHECK-NEXT: jb
+entry:
+  switch i32 %x, label %if.end [
+    i32 6, label %if.then
+    i32 4, label %if.then
+    i32 3, label %if.then
+    i32 1, label %if.then
+    i32 0, label %if.then
+  ]
+
+if.then:                                          ; preds = %entry, %entry, %entry, %entry, %entry
+  tail call void @bar() nounwind
+  ret void
+
+if.end:                                           ; preds = %entry
+  ret void
+}
+
+declare void @bar()
diff --git a/test/CodeGen/X86/switch-or.ll b/test/CodeGen/X86/switch-or.ll
new file mode 100644
index 000000000000..75832c7d304c
--- /dev/null
+++ b/test/CodeGen/X86/switch-or.ll
@@ -0,0 +1,22 @@
+; RUN: llc -march=x86 -asm-verbose=false < %s | FileCheck %s
+
+; Check that merging switch cases that differ in one bit works.
+; CHECK: orl $2
+; CHECK-NEXT: cmpl $6
+
+define void @foo(i32 %variable) nounwind {
+entry:
+  switch i32 %variable, label %if.end [
+    i32 4, label %if.then
+    i32 6, label %if.then
+  ]
+
+if.then:
+  %call = tail call i32 (...)* @bar() nounwind
+  ret void
+
+if.end:
+  ret void
+}
+
+declare i32 @bar(...) nounwind
diff --git a/test/CodeGen/X86/tail-opts.ll b/test/CodeGen/X86/tail-opts.ll
index 9662ad6cd740..9291695f4d65 100644
--- a/test/CodeGen/X86/tail-opts.ll
+++ b/test/CodeGen/X86/tail-opts.ll
@@ -62,11 +62,11 @@ declare i8* @choose(i8*, i8*)
 
 ; CHECK: tail_duplicate_me:
 ; CHECK:      movl $0, GHJK(%rip)
-; CHECK-NEXT: jmpq *%rbx
+; CHECK-NEXT: jmpq *%r
 ; CHECK:      movl $0, GHJK(%rip)
-; CHECK-NEXT: jmpq *%rbx
+; CHECK-NEXT: jmpq *%r
 ; CHECK:      movl $0, GHJK(%rip)
-; CHECK-NEXT: jmpq *%rbx
+; CHECK-NEXT: jmpq *%r
 
 define void @tail_duplicate_me() nounwind {
 entry:
@@ -153,19 +153,16 @@ bb30:
 ; an unconditional jump to complete a two-way conditional branch.
 
 ; CHECK: c_expand_expr_stmt:
-; CHECK:        jmp .LBB3_7
-; CHECK-NEXT: .LBB3_12:
+; CHECK:        jmp .LBB3_11
+; CHECK-NEXT: .LBB3_9:
 ; CHECK-NEXT:   movq 8(%rax), %rax
+; CHECK-NEXT:   xorb %dl, %dl
 ; CHECK-NEXT:   movb 16(%rax), %al
 ; CHECK-NEXT:   cmpb $16, %al
-; CHECK-NEXT:   je .LBB3_6
+; CHECK-NEXT:   je .LBB3_11
 ; CHECK-NEXT:   cmpb $23, %al
-; CHECK-NEXT:   je .LBB3_6
-; CHECK-NEXT:   jmp .LBB3_15
-; CHECK-NEXT: .LBB3_14:
-; CHECK-NEXT:   cmpb $23, %bl
-; CHECK-NEXT:   jne .LBB3_15
-; CHECK-NEXT: .LBB3_15:
+; CHECK-NEXT:   jne .LBB3_14
+; CHECK-NEXT: .LBB3_11:
 
 %0 = type { %struct.rtx_def* }
 %struct.lang_decl = type opaque
@@ -276,7 +273,7 @@ declare fastcc %union.tree_node* @default_conversion(%union.tree_node*) nounwind
 ; CHECK: foo:
 ; CHECK:        callq func
 ; CHECK-NEXT: .LBB4_2:
-; CHECK-NEXT:   addq $8, %rsp
+; CHECK-NEXT:   popq
 ; CHECK-NEXT:   ret
 
 define void @foo(i1* %V) nounwind {
diff --git a/test/CodeGen/X86/tailcall-largecode.ll b/test/CodeGen/X86/tailcall-largecode.ll
index c7070f2abd25..c3f4278aecbe 100644
--- a/test/CodeGen/X86/tailcall-largecode.ll
+++ b/test/CodeGen/X86/tailcall-largecode.ll
@@ -17,7 +17,7 @@ define fastcc i32 @indirect_manyargs(i32(i32,i32,i32,i32,i32,i32,i32)* %target)
 ; Adjust the stack to enter the function.  (The amount of the
 ; adjustment may change in the future, in which case the location of
 ; the stack argument and the return adjustment will change too.)
-;  CHECK: subq $8, %rsp
+;  CHECK: pushq
 ; Put the call target into R11, which won't be clobbered while restoring
 ; callee-saved registers and won't be used for passing arguments.
 ;  CHECK: movq %rdi, %rax
@@ -31,7 +31,7 @@ define fastcc i32 @indirect_manyargs(i32(i32,i32,i32,i32,i32,i32,i32)* %target)
 ;  CHECK: movl $5, %r8d
 ;  CHECK: movl $6, %r9d
 ; Adjust the stack to "return".
-;  CHECK: addq $8, %rsp
+;  CHECK: popq
 ; And tail-call to the target.
 ;  CHECK: jmpq *%rax  # TAILCALL
   %res = tail call fastcc i32 %target(i32 1, i32 2, i32 3, i32 4, i32 5,
@@ -46,7 +46,7 @@ define fastcc i32 @direct_manyargs() {
 ; Adjust the stack to enter the function.  (The amount of the
 ; adjustment may change in the future, in which case the location of
 ; the stack argument and the return adjustment will change too.)
-;  CHECK: subq $8, %rsp
+;  CHECK: pushq
 ; Pass the stack argument.
 ;  CHECK: movl $7, 16(%rsp)
 ; Pass the register arguments, in the right registers.
@@ -62,7 +62,7 @@ define fastcc i32 @direct_manyargs() {
 ; arguments.
 ;  CHECK: movabsq $manyargs_callee, %rax
 ; Adjust the stack to "return".
-;  CHECK: addq $8, %rsp
+;  CHECK: popq
 ; And tail-call to the target.
 ;  CHECK: jmpq *%rax  # TAILCALL
   %res = tail call fastcc i32 @manyargs_callee(i32 1, i32 2, i32 3, i32 4,
diff --git a/test/CodeGen/X86/tailcall-ri64.ll b/test/CodeGen/X86/tailcall-ri64.ll
new file mode 100644
index 000000000000..914d8f7b8bc7
--- /dev/null
+++ b/test/CodeGen/X86/tailcall-ri64.ll
@@ -0,0 +1,24 @@
+; RUN: llc < %s -mtriple=x86_64-linux | FileCheck %s -check-prefix=AMD64
+; RUN: llc < %s -mtriple=x86_64-win32 | FileCheck %s -check-prefix=WIN64
+; PR8743
+; TAILJMPri64 should not receive "callee-saved" registers beyond epilogue.
+
+; AMD64: jmpq
+; AMD64-NOT: %{{e[a-z]|rbx|rbp|r10|r12|r13|r14|r15}}
+
+; WIN64: jmpq
+; WIN64-NOT: %{{e[a-z]|rbx|rsi|rdi|rbp|r12|r13|r14|r15}}
+
+%class = type { [8 x i8] }
+%vt = type { i32 (...)** }
+
+define %vt* @_ZN4llvm9UnsetInit20convertInitializerToEPNS_5RecTyE(%class*
+%this, %vt* %Ty) align 2 {
+entry:
+  %0 = bitcast %vt* %Ty to %vt* (%vt*, %class*)***
+  %vtable = load %vt* (%vt*, %class*)*** %0, align 8
+  %vfn = getelementptr inbounds %vt* (%vt*, %class*)** %vtable, i64 4
+  %1 = load %vt* (%vt*, %class*)** %vfn, align 8
+  %call = tail call %vt* %1(%vt* %Ty, %class* %this)
+  ret %vt* %call
+}
diff --git a/test/CodeGen/X86/tailcall-stackalign.ll b/test/CodeGen/X86/tailcall-stackalign.ll
index 0233139e8082..d3f811cff248 100644
--- a/test/CodeGen/X86/tailcall-stackalign.ll
+++ b/test/CodeGen/X86/tailcall-stackalign.ll
@@ -19,5 +19,5 @@ define i32 @main(i32 %argc, i8** %argv) {
  ret i32 0
 }
 
-; CHECK: call tailcaller
+; CHECK: calll tailcaller
 ; CHECK-NEXT: subl $12
diff --git a/test/CodeGen/X86/tailcallfp2.ll b/test/CodeGen/X86/tailcallfp2.ll
index 4ec127f81ac7..04c4e95710c5 100644
--- a/test/CodeGen/X86/tailcallfp2.ll
+++ b/test/CodeGen/X86/tailcallfp2.ll
@@ -1,8 +1,9 @@
-; RUN: llc < %s -march=x86 -tailcallopt | grep {jmp} | grep {\\*%edx}
+; RUN: llc < %s -march=x86 -tailcallopt | FileCheck %s
 
 declare i32 @putchar(i32)
 
 define fastcc i32 @checktail(i32 %x, i32* %f, i32 %g) nounwind {
+; CHECK: checktail:
         %tmp1 = icmp sgt i32 %x, 0
         br i1 %tmp1, label %if-then, label %if-else
 
@@ -10,6 +11,7 @@ if-then:
         %fun_ptr = bitcast i32* %f to i32(i32, i32*, i32)* 
         %arg1    = add i32 %x, -1
         call i32 @putchar(i32 90)       
+; CHECK: jmpl *%e{{.*}}
         %res = tail call fastcc i32 %fun_ptr( i32 %arg1, i32 * %f, i32 %g)
         ret i32 %res
 
diff --git a/test/CodeGen/X86/tailcallstack64.ll b/test/CodeGen/X86/tailcallstack64.ll
index 107bdf9de3e7..0c732d56b6ca 100644
--- a/test/CodeGen/X86/tailcallstack64.ll
+++ b/test/CodeGen/X86/tailcallstack64.ll
@@ -1,16 +1,20 @@
-; RUN: llc < %s -tailcallopt -march=x86-64 -post-RA-scheduler=true | FileCheck %s
+; RUN: llc < %s -tailcallopt -mtriple=x86_64-linux -post-RA-scheduler=true | FileCheck %s
+; RUN: llc < %s -tailcallopt -mtriple=x86_64-win32 -post-RA-scheduler=true | FileCheck %s
+
+; FIXME: Redundant unused stack allocation could be eliminated.
+; CHECK: subq  ${{24|88}}, %rsp
 
 ; Check that lowered arguments on the stack do not overwrite each other.
 ; Add %in1 %p1 to a different temporary register (%eax).
-; CHECK: movl  32(%rsp), %eax
+; CHECK: movl  [[A1:32|144]](%rsp), %eax
 ; Move param %in1 to temp register (%r10d).
-; CHECK: movl  40(%rsp), %r10d
+; CHECK: movl  [[A2:40|152]](%rsp), %r10d
 ; Add %in1 %p1 to a different temporary register (%eax).
-; CHECK: addl %edi, %eax
+; CHECK: addl {{%edi|%ecx}}, %eax
 ; Move param %in2 to stack.
-; CHECK: movl  %r10d, 32(%rsp)
+; CHECK: movl  %r10d, [[A1]](%rsp)
 ; Move result of addition to stack.
-; CHECK: movl  %eax, 40(%rsp)
+; CHECK: movl  %eax, [[A2]](%rsp)
 ; Eventually, do a TAILCALL
 ; CHECK: TAILCALL
 
@@ -22,4 +26,3 @@ entry:
         %retval = tail call fastcc i32 @tailcallee(i32 %p1, i32 %p2, i32 %p3, i32 %p4, i32 %p5, i32 %p6, i32 %in2,i32 %tmp)
         ret i32 %retval
 }
-
diff --git a/test/CodeGen/X86/tls-pic.ll b/test/CodeGen/X86/tls-pic.ll
index 4cad8376d8d9..b83416d4b32b 100644
--- a/test/CodeGen/X86/tls-pic.ll
+++ b/test/CodeGen/X86/tls-pic.ll
@@ -11,11 +11,11 @@ entry:
 
 ; X32: f1:
 ; X32:   leal i@TLSGD(,%ebx), %eax
-; X32:   call ___tls_get_addr@PLT
+; X32:   calll ___tls_get_addr@PLT
 
 ; X64: f1:
 ; X64:   leaq i@TLSGD(%rip), %rdi
-; X64:   call __tls_get_addr@PLT
+; X64:   callq __tls_get_addr@PLT
 
 
 @i2 = external thread_local global i32
@@ -27,11 +27,11 @@ entry:
 
 ; X32: f2:
 ; X32:   leal i@TLSGD(,%ebx), %eax
-; X32:   call ___tls_get_addr@PLT
+; X32:   calll ___tls_get_addr@PLT
 
 ; X64: f2:
 ; X64:   leaq i@TLSGD(%rip), %rdi
-; X64:   call __tls_get_addr@PLT
+; X64:   callq __tls_get_addr@PLT
 
 
 
@@ -43,11 +43,11 @@ entry:
 
 ; X32: f3:
 ; X32:   leal	i@TLSGD(,%ebx), %eax
-; X32:   call ___tls_get_addr@PLT
+; X32:   calll ___tls_get_addr@PLT
 
 ; X64: f3:
 ; X64:   leaq i@TLSGD(%rip), %rdi
-; X64:   call __tls_get_addr@PLT
+; X64:   callq __tls_get_addr@PLT
 
 
 define i32* @f4() nounwind {
@@ -57,11 +57,11 @@ entry:
 
 ; X32: f4:
 ; X32:   leal	i@TLSGD(,%ebx), %eax
-; X32:   call ___tls_get_addr@PLT
+; X32:   calll ___tls_get_addr@PLT
 
 ; X64: f4:
 ; X64:   leaq i@TLSGD(%rip), %rdi
-; X64:   call __tls_get_addr@PLT
+; X64:   callq __tls_get_addr@PLT
 
 
 
diff --git a/test/CodeGen/X86/tls9.ll b/test/CodeGen/X86/tls9.ll
index 214146fe998c..7d08df84a9fa 100644
--- a/test/CodeGen/X86/tls9.ll
+++ b/test/CodeGen/X86/tls9.ll
@@ -5,7 +5,7 @@
 
 @i = external hidden thread_local global i32
 
-define i32 @f() {
+define i32 @f() nounwind {
 entry:
 	%tmp1 = load i32* @i
 	ret i32 %tmp1
diff --git a/test/CodeGen/X86/tls-1.ll b/test/CodeGen/X86/tlv-1.ll
index de694d8d471f..42940f147ed8 100644
--- a/test/CodeGen/X86/tls-1.ll
+++ b/test/CodeGen/X86/tlv-1.ll
@@ -1,5 +1,21 @@
 ; RUN: llc < %s -mtriple x86_64-apple-darwin | FileCheck %s
 
+%struct.A = type { [48 x i8], i32, i32, i32 }
+
+@c = external thread_local global %struct.A, align 4
+
+define void @main() nounwind ssp {
+entry:
+  call void @llvm.memset.p0i8.i64(i8* getelementptr inbounds (%struct.A* @c, i32 0, i32 0, i32 0), i8 0, i64 60, i32 1, i1 false)
+  unreachable  
+  ; CHECK: movq    _c@TLVP(%rip), %rdi
+  ; CHECK-NEXT: callq   *(%rdi)
+  ; CHECK-NEXT: movl    $0, 56(%rax)
+  ; CHECK-NEXT: movq    $0, 48(%rax)
+}
+
+declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i32, i1) nounwind
+
 @a = thread_local global i32 0                    ; <i32*> [#uses=0]
 @b = thread_local global i32 0                    ; <i32*> [#uses=0]
 
diff --git a/test/CodeGen/X86/tlv-2.ll b/test/CodeGen/X86/tlv-2.ll
new file mode 100644
index 000000000000..5f29a60bef5a
--- /dev/null
+++ b/test/CodeGen/X86/tlv-2.ll
@@ -0,0 +1,32 @@
+; RUN: llc < %s -mtriple x86_64-apple-darwin -O0 | FileCheck %s
+
+@b = thread_local global i32 5, align 4
+@a = thread_local global i32 0, align 4
+@c = internal thread_local global i32 0, align 4
+@d = internal thread_local global i32 5, align 4
+
+define void @foo() nounwind ssp {
+entry:
+  store i32 1, i32* @a, align 4
+  ; CHECK: movq    _a@TLVP(%rip), %rdi
+  ; CHECK: callq   *(%rdi)
+  ; CHECK: movl    $1, (%rax)
+  
+  store i32 2, i32* @b, align 4
+  ; CHECK: movq    _b@TLVP(%rip), %rdi
+  ; CHECK: callq   *(%rdi)
+  ; CHECK: movl    $2, (%rax)
+
+  store i32 3, i32* @c, align 4
+  ; CHECK: movq    _c@TLVP(%rip), %rdi
+  ; CHECK: callq   *(%rdi)
+  ; CHECK: movl    $3, (%rax)
+  
+  store i32 4, i32* @d, align 4
+  ; CHECK: movq    _d@TLVP(%rip), %rdi
+  ; CHECK: callq   *(%rdi)
+  ; CHECK: movl    $4, (%rax)
+  ; CHECK: popq
+  
+  ret void
+}
diff --git a/test/CodeGen/X86/twoaddr-lea.ll b/test/CodeGen/X86/twoaddr-lea.ll
index a245ed7caa84..ec16dfe172e3 100644
--- a/test/CodeGen/X86/twoaddr-lea.ll
+++ b/test/CodeGen/X86/twoaddr-lea.ll
@@ -5,20 +5,32 @@
 ;; allocator turns the shift into an LEA.  This also occurs for ADD.
 
 ; Check that the shift gets turned into an LEA.
-; RUN: llc < %s -march=x86 -x86-asm-syntax=intel | \
-; RUN:   not grep {mov E.X, E.X}
+; RUN: llc < %s -mtriple=x86_64-apple-darwin | FileCheck %s
 
-@G = external global i32                ; <i32*> [#uses=3]
+@G = external global i32
 
-define i32 @test1(i32 %X, i32 %Y) {
-        %Z = add i32 %X, %Y             ; <i32> [#uses=1]
-        volatile store i32 %Y, i32* @G
+define i32 @test1(i32 %X) nounwind {
+; CHECK: test1:
+; CHECK-NOT: mov
+; CHECK: leal 1(%rdi)
+        %Z = add i32 %X, 1
         volatile store i32 %Z, i32* @G
         ret i32 %X
 }
 
-define i32 @test2(i32 %X) {
-        %Z = add i32 %X, 1              ; <i32> [#uses=1]
-        volatile store i32 %Z, i32* @G
-        ret i32 %X
+; rdar://8977508
+; The second add should not be transformed to leal nor should it be
+; commutted (which would require inserting a copy).
+define i32 @test2(i32 inreg %a, i32 inreg %b, i32 %c, i32 %d) nounwind {
+entry:
+; CHECK: test2:
+; CHECK: leal
+; CHECK-NOT: leal
+; CHECK-NOT: mov
+; CHECK-NEXT: addl
+; CHECK-NEXT: ret
+ %add = add i32 %b, %a
+ %add3 = add i32 %add, %c
+ %add5 = add i32 %add3, %d
+ ret i32 %add5
 }
diff --git a/test/CodeGen/X86/uint64-to-float.ll b/test/CodeGen/X86/uint64-to-float.ll
new file mode 100644
index 000000000000..d9f753c7a88e
--- /dev/null
+++ b/test/CodeGen/X86/uint64-to-float.ll
@@ -0,0 +1,21 @@
+; RUN: llc < %s -march=x86-64 | FileCheck %s
+; Verify that we are using the efficient uitofp --> sitofp lowering illustrated
+; by the compiler_rt implementation of __floatundisf.
+; <rdar://problem/8493982>
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
+target triple = "x86_64-apple-darwin10.0.0"
+
+; CHECK: testq %rdi, %rdi
+; CHECK-NEXT: jns LBB0_2
+; CHECK: shrq
+; CHECK-NEXT: andq
+; CHECK-NEXT: orq
+; CHECK-NEXT: cvtsi2ss
+; CHECK: LBB0_2
+; CHECK-NEXT: cvtsi2ss
+define float @test(i64 %a) {
+entry:
+  %b = uitofp i64 %a to float
+  ret float %b
+}
diff --git a/test/CodeGen/X86/umul-with-overflow.ll b/test/CodeGen/X86/umul-with-overflow.ll
index d522bd80acfd..c9976617a247 100644
--- a/test/CodeGen/X86/umul-with-overflow.ll
+++ b/test/CodeGen/X86/umul-with-overflow.ll
@@ -1,8 +1,14 @@
-; RUN: llc < %s -march=x86 | grep "\\\\\\\<mul"
+; RUN: llc < %s -march=x86 | FileCheck %s
 
 declare {i32, i1} @llvm.umul.with.overflow.i32(i32 %a, i32 %b)
 define i1 @a(i32 %x) zeroext nounwind {
   %res = call {i32, i1} @llvm.umul.with.overflow.i32(i32 %x, i32 3)
   %obil = extractvalue {i32, i1} %res, 1
   ret i1 %obil
+  
+; CHECK: a:
+; CHECK: mull
+; CHECK: seto %al
+; CHECK: movzbl	%al, %eax
+; CHECK: ret
 }
diff --git a/test/CodeGen/X86/umulo-64.ll b/test/CodeGen/X86/umulo-64.ll
new file mode 100644
index 000000000000..280bd9cb066d
--- /dev/null
+++ b/test/CodeGen/X86/umulo-64.ll
@@ -0,0 +1,28 @@
+; RUN: llc < %s -mtriple=i386-apple-darwin
+
+%0 = type { i64, i1 }
+
+define i32 @f0(i64 %a, i64 %b) nounwind ssp {
+  %1 = alloca i64, align 4
+  %2 = alloca i64, align 4
+  store i64 %a, i64* %1, align 8
+  store i64 %b, i64* %2, align 8
+  %3 = load i64* %1, align 8
+  %4 = load i64* %2, align 8
+  %5 = call %0 @llvm.smul.with.overflow.i64(i64 %3, i64 %4)
+  %6 = extractvalue %0 %5, 0
+  %7 = extractvalue %0 %5, 1
+  br i1 %7, label %8, label %9
+
+; <label>:8                                       ; preds = %0
+  call void @llvm.trap()
+  unreachable
+
+; <label>:9                                       ; preds = %0
+  %10 = trunc i64 %6 to i32
+  ret i32 %10
+}
+
+declare %0 @llvm.smul.with.overflow.i64(i64, i64) nounwind readnone
+
+declare void @llvm.trap() nounwind
diff --git a/test/CodeGen/X86/unaligned-load.ll b/test/CodeGen/X86/unaligned-load.ll
index a99af0605b12..6a493c0594de 100644
--- a/test/CodeGen/X86/unaligned-load.ll
+++ b/test/CodeGen/X86/unaligned-load.ll
@@ -13,7 +13,7 @@ entry:
 bb:
   %String2Loc9 = getelementptr inbounds [31 x i8]* %String2Loc, i64 0, i64 0
   call void @llvm.memcpy.i64(i8* %String2Loc9, i8* getelementptr inbounds ([31 x i8]* @.str3, i64 0, i64 0), i64 31, i32 1)
-; I386: call {{_?}}memcpy
+; I386: calll {{_?}}memcpy
 
 ; CORE2: movabsq
 ; CORE2: movabsq
diff --git a/test/CodeGen/X86/unknown-location.ll b/test/CodeGen/X86/unknown-location.ll
index fa98b781e4ea..09431b5564ae 100644
--- a/test/CodeGen/X86/unknown-location.ll
+++ b/test/CodeGen/X86/unknown-location.ll
@@ -1,15 +1,15 @@
-; RUN: llc < %s -asm-verbose=false -march=x86-64 -use-unknown-locations | FileCheck %s
+; RUN: llc < %s -asm-verbose=false -mtriple=x86_64-apple-darwin10 -use-unknown-locations | FileCheck %s
 
 ; The divide instruction does not have a debug location. CodeGen should
-; represent this in the debug information. This is checked by a check
-; for a label between the code for the add and the code for the divide,
-; which indicates that the add's location doesn't spill over unto the
-; divide.
+; represent this in the debug information. This is done by setting line
+; and column to 0
 
 ;      CHECK:         leal    (%rdi,%rsi), %eax
+; CHECK-NEXT:         .loc 1 0 0
 ; CHECK-NEXT: Ltmp
 ; CHECK-NEXT:         cltd
 ; CHECK-NEXT:         idivl   %r8d
+; CHECK-NEXT:         .loc 1 4 3
 ; CHECK-NEXT: Ltmp
 ; CHECK-NEXT:         addl    %ecx, %eax
 ; CHECK-NEXT:         ret
diff --git a/test/CodeGen/X86/vec-sign.ll b/test/CodeGen/X86/vec-sign.ll
new file mode 100644
index 000000000000..31b9c2eb4c77
--- /dev/null
+++ b/test/CodeGen/X86/vec-sign.ll
@@ -0,0 +1,30 @@
+; RUN: llc < %s -march=x86 -mcpu=nehalem | FileCheck %s
+
+define <4 x i32> @signd(<4 x i32> %a, <4 x i32> %b) nounwind {
+entry:
+; CHECK: signd:
+; CHECK: psignd
+; CHECK-NOT: sub
+; CHECK: ret
+  %b.lobit = ashr <4 x i32> %b, <i32 31, i32 31, i32 31, i32 31>
+  %sub = sub nsw <4 x i32> zeroinitializer, %a
+  %0 = xor <4 x i32> %b.lobit, <i32 -1, i32 -1, i32 -1, i32 -1>
+  %1 = and <4 x i32> %a, %0
+  %2 = and <4 x i32> %b.lobit, %sub
+  %cond = or <4 x i32> %1, %2
+  ret <4 x i32> %cond
+}
+
+define <4 x i32> @blendvb(<4 x i32> %b, <4 x i32> %a, <4 x i32> %c) nounwind {
+entry:
+; CHECK: blendvb:
+; CHECK: pblendvb
+; CHECK: ret
+  %b.lobit = ashr <4 x i32> %b, <i32 31, i32 31, i32 31, i32 31>
+  %sub = sub nsw <4 x i32> zeroinitializer, %a
+  %0 = xor <4 x i32> %b.lobit, <i32 -1, i32 -1, i32 -1, i32 -1>
+  %1 = and <4 x i32> %c, %0
+  %2 = and <4 x i32> %a, %b.lobit
+  %cond = or <4 x i32> %1, %2
+  ret <4 x i32> %cond
+}
diff --git a/test/CodeGen/X86/vec-trunc-store.ll b/test/CodeGen/X86/vec-trunc-store.ll
index 2f57d7b571f0..4d665f1843ef 100644
--- a/test/CodeGen/X86/vec-trunc-store.ll
+++ b/test/CodeGen/X86/vec-trunc-store.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=x86-64 -disable-mmx >/dev/null
+; RUN: llc < %s -march=x86-64
 
 define void @foo(<8 x i32>* %p) nounwind {
   %t = load <8 x i32>* %p
diff --git a/test/CodeGen/X86/vec_cast.ll b/test/CodeGen/X86/vec_cast.ll
index f8531646effa..95289c9685a1 100644
--- a/test/CodeGen/X86/vec_cast.ll
+++ b/test/CodeGen/X86/vec_cast.ll
@@ -1,5 +1,4 @@
 ; RUN: llc < %s -march=x86-64 -mcpu=core2
-; RUN: llc < %s -march=x86-64 -mcpu=core2 -disable-mmx
 
 
 define <8 x i32> @a(<8 x i16> %a) nounwind {
diff --git a/test/CodeGen/X86/vec_compare-2.ll b/test/CodeGen/X86/vec_compare-2.ll
index 091641b3bc3b..04bb7254fb08 100644
--- a/test/CodeGen/X86/vec_compare-2.ll
+++ b/test/CodeGen/X86/vec_compare-2.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=x86 -mcpu=penryn -disable-mmx | FileCheck %s
+; RUN: llc < %s -march=x86 -mcpu=penryn | FileCheck %s
 
 declare <4 x float> @llvm.x86.sse41.blendvps(<4 x float>, <4 x float>, <4 x float>) nounwind readnone
 
diff --git a/test/CodeGen/X86/vec_ext_inreg.ll b/test/CodeGen/X86/vec_ext_inreg.ll
index 8d2a3c31aedf..02b16a79f4a0 100644
--- a/test/CodeGen/X86/vec_ext_inreg.ll
+++ b/test/CodeGen/X86/vec_ext_inreg.ll
@@ -1,5 +1,4 @@
 ; RUN: llc < %s -march=x86-64 
-; RUN: llc < %s -march=x86-64 -disable-mmx
 
 define <8 x i32> @a(<8 x i32> %a) nounwind {
   %b = trunc <8 x i32> %a to <8 x i16>
diff --git a/test/CodeGen/X86/vec_insert-5.ll b/test/CodeGen/X86/vec_insert-5.ll
index 291fc0454c9c..471cc1611fce 100644
--- a/test/CodeGen/X86/vec_insert-5.ll
+++ b/test/CodeGen/X86/vec_insert-5.ll
@@ -1,15 +1,16 @@
 ; RUN: llc < %s -march=x86 -mattr=+sse2 > %t
-; RUN: grep psllq %t | grep 32
+; RUN: grep shll %t | grep 12
 ; RUN: grep pslldq %t | grep 12
 ; RUN: grep psrldq %t | grep 8
 ; RUN: grep psrldq %t | grep 12
+; There are no MMX operations in @t1
 
-define void  @t1(i32 %a, <1 x i64>* %P) nounwind {
+define void  @t1(i32 %a, x86_mmx* %P) nounwind {
        %tmp12 = shl i32 %a, 12
        %tmp21 = insertelement <2 x i32> undef, i32 %tmp12, i32 1
        %tmp22 = insertelement <2 x i32> %tmp21, i32 0, i32 0
-       %tmp23 = bitcast <2 x i32> %tmp22 to <1 x i64>
-       store <1 x i64> %tmp23, <1 x i64>* %P
+       %tmp23 = bitcast <2 x i32> %tmp22 to x86_mmx
+       store x86_mmx %tmp23, x86_mmx* %P
        ret void
 }
 
diff --git a/test/CodeGen/X86/vec_insert-7.ll b/test/CodeGen/X86/vec_insert-7.ll
index 9ede10f63d3e..268b5c4bf972 100644
--- a/test/CodeGen/X86/vec_insert-7.ll
+++ b/test/CodeGen/X86/vec_insert-7.ll
@@ -1,8 +1,15 @@
-; RUN: llc < %s -march=x86 -mattr=+mmx -mtriple=i686-apple-darwin9 -o - | grep punpckldq
+; RUN: llc < %s -march=x86 -mattr=+mmx,+sse42 -mtriple=i686-apple-darwin9 | FileCheck %s
+; MMX insertelement is not available; these are promoted to XMM.
+; (Without SSE they are split to two ints, and the code is much better.)
 
-define <2 x i32> @mmx_movzl(<2 x i32> %x) nounwind  {
+define x86_mmx @mmx_movzl(x86_mmx %x) nounwind  {
 entry:
-	%tmp3 = insertelement <2 x i32> %x, i32 32, i32 0		; <<2 x i32>> [#uses=1]
+; CHECK: mmx_movzl
+; CHECK: pinsrd
+; CHECK: pinsrd
+        %tmp = bitcast x86_mmx %x to <2 x i32> 
+	%tmp3 = insertelement <2 x i32> %tmp, i32 32, i32 0		; <<2 x i32>> [#uses=1]
 	%tmp8 = insertelement <2 x i32> %tmp3, i32 0, i32 1		; <<2 x i32>> [#uses=1]
-	ret <2 x i32> %tmp8
+        %tmp9 = bitcast <2 x i32> %tmp8 to x86_mmx
+	ret x86_mmx %tmp9
 }
diff --git a/test/CodeGen/X86/vec_select.ll b/test/CodeGen/X86/vec_select.ll
deleted file mode 100644
index 033e9f7027f9..000000000000
--- a/test/CodeGen/X86/vec_select.ll
+++ /dev/null
@@ -1,12 +0,0 @@
-; RUN: llc < %s -march=x86 -mattr=+sse
-
-define void @test(i32 %C, <4 x float>* %A, <4 x float>* %B) {
-        %tmp = load <4 x float>* %A             ; <<4 x float>> [#uses=1]
-        %tmp3 = load <4 x float>* %B            ; <<4 x float>> [#uses=2]
-        %tmp9 = fmul <4 x float> %tmp3, %tmp3            ; <<4 x float>> [#uses=1]
-        %tmp.upgrd.1 = icmp eq i32 %C, 0                ; <i1> [#uses=1]
-        %iftmp.38.0 = select i1 %tmp.upgrd.1, <4 x float> %tmp9, <4 x float> %tmp               ; <<4 x float>> [#uses=1]
-        store <4 x float> %iftmp.38.0, <4 x float>* %A
-        ret void
-}
-
diff --git a/test/CodeGen/X86/vec_set-F.ll b/test/CodeGen/X86/vec_set-F.ll
index 4f0acb2d151d..6dd3cb0abeb9 100644
--- a/test/CodeGen/X86/vec_set-F.ll
+++ b/test/CodeGen/X86/vec_set-F.ll
@@ -1,6 +1,6 @@
-; RUN: llc < %s -march=x86 -mattr=+sse2 | grep movq
-; RUN: llc < %s -march=x86 -mattr=+sse2 | grep movsd
-; RUN: llc < %s -march=x86 -mattr=+sse2 | grep mov | count 3
+; RUN: llc < %s -mtriple=i686-linux -mattr=+sse2 | grep movq
+; RUN: llc < %s -mtriple=i686-linux -mattr=+sse2 | grep movsd
+; RUN: llc < %s -mtriple=i686-linux -mattr=+sse2 | grep mov | count 3
 
 define <2 x i64> @t1(<2 x i64>* %ptr) nounwind  {
 	%tmp45 = bitcast <2 x i64>* %ptr to <2 x i32>*
diff --git a/test/CodeGen/X86/vec_shuffle-27.ll b/test/CodeGen/X86/vec_shuffle-27.ll
index d700ccbf5303..dec98c7400a5 100644
--- a/test/CodeGen/X86/vec_shuffle-27.ll
+++ b/test/CodeGen/X86/vec_shuffle-27.ll
@@ -1,7 +1,4 @@
-; RUN: llc < %s -march=x86 -mattr=sse41 -o %t
-; RUN: grep addps %t | count 2
-; RUN: grep mulps %t | count 2
-; RUN: grep subps %t | count 2
+; RUN: llc < %s -march=x86 -mattr=sse41 | FileCheck %s
 
 ; ModuleID = 'vec_shuffle-27.bc'
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:32:32"
@@ -9,9 +6,33 @@ target triple = "i686-apple-cl.1.0"
 
 define <8 x float> @my2filter4_1d(<4 x float> %a, <8 x float> %T0, <8 x float> %T1) nounwind readnone {
 entry:
+; CHECK: subps
+; CHECK: mulps
+; CHECK: addps
+; CHECK: subps
+; CHECK: mulps
+; CHECK: addps
 	%tmp7 = shufflevector <4 x float> %a, <4 x float> undef, <8 x i32> < i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3 >		; <<8 x float>> [#uses=1]
 	%sub = fsub <8 x float> %T1, %T0		; <<8 x float>> [#uses=1]
 	%mul = fmul <8 x float> %sub, %tmp7		; <<8 x float>> [#uses=1]
 	%add = fadd <8 x float> %mul, %T0		; <<8 x float>> [#uses=1]
 	ret <8 x float> %add
 }
+
+; Test case for r122206
+define void @test2(<4 x i64>* %ap, <4 x i64>* %bp) nounwind {
+entry:
+; CHECK: movdqa
+  %a = load <4 x i64> * %ap
+  %b = load <4 x i64> * %bp
+  %mulaa = mul <4 x i64> %a, %a
+  %mulbb = mul <4 x i64> %b, %b
+  %mulab = mul <4 x i64> %a, %b
+  %vect1271 = shufflevector <4 x i64> %mulaa, <4 x i64> %mulbb, <4 x i32> <i32 0, i32 4, i32 undef, i32 undef>
+  %vect1272 = shufflevector <4 x i64> %mulaa, <4 x i64> %mulbb, <4 x i32> <i32 1, i32 5, i32 undef, i32 undef>
+  %vect1487 = shufflevector <4 x i64> %vect1271, <4 x i64> %mulab, <4 x i32> <i32 0, i32 1, i32 2, i32 4>
+  %vect1488 = shufflevector <4 x i64> %vect1272, <4 x i64> %mulab, <4 x i32> <i32 0, i32 1, i32 2, i32 5>
+  store <4 x i64> %vect1487, <4 x i64>* %ap
+  store <4 x i64> %vect1488, <4 x i64>* %bp
+  ret void;
+}
+\ No newline at end of file
diff --git a/test/CodeGen/X86/vec_shuffle-30.ll b/test/CodeGen/X86/vec_shuffle-30.ll
index 3f69150ac533..1651c4cdace2 100644
--- a/test/CodeGen/X86/vec_shuffle-30.ll
+++ b/test/CodeGen/X86/vec_shuffle-30.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=x86 -mattr=sse41 -disable-mmx -o %t
+; RUN: llc < %s -march=x86 -mattr=sse41 -o %t
 ; RUN: grep pshufhw %t | grep -- -95 | count 1
 ; RUN: grep shufps %t | count 1
 ; RUN: not grep pslldq %t
diff --git a/test/CodeGen/X86/vec_shuffle-37.ll b/test/CodeGen/X86/vec_shuffle-37.ll
index 1ed858de64e8..b09093089c5a 100644
--- a/test/CodeGen/X86/vec_shuffle-37.ll
+++ b/test/CodeGen/X86/vec_shuffle-37.ll
@@ -1,4 +1,5 @@
 ; RUN: llc < %s -march=x86-64 | FileCheck %s
+; RUN: llc -O0 < %s -march=x86 -mcpu=core2 | FileCheck %s --check-prefix=CHECK_O0
 
 define <4 x i32> @t00(<4 x i32>* %a0) nounwind ssp {
 entry:
@@ -12,3 +13,12 @@ entry:
   ret <4 x i32> %2
 }
 
+define void @t01(double* %a0) nounwind ssp {
+entry:
+; CHECK_O0: movsd (%eax), %xmm0
+; CHECK_O0: unpcklpd  %xmm0, %xmm0
+  %tmp93 = load double* %a0, align 8
+  %vecinit94 = insertelement <2 x double> undef, double %tmp93, i32 1
+  store <2 x double> %vecinit94, <2 x double>* undef
+  ret void
+}
diff --git a/test/CodeGen/X86/vec_zero_cse.ll b/test/CodeGen/X86/vec_zero_cse.ll
index 3b15d4cc407b..8aa50945e635 100644
--- a/test/CodeGen/X86/vec_zero_cse.ll
+++ b/test/CodeGen/X86/vec_zero_cse.ll
@@ -1,5 +1,6 @@
-; RUN: llc < %s -relocation-model=static -march=x86 -mcpu=yonah | grep pxor | count 2
-; RUN: llc < %s -relocation-model=static -march=x86 -mcpu=yonah | grep pcmpeqd | count 2
+; RUN: llc < %s -relocation-model=static -march=x86 -mcpu=yonah | grep pxor | count 1
+; RUN: llc < %s -relocation-model=static -march=x86 -mcpu=yonah | grep pcmpeqd | count 1
+; 64-bit stores here do not use MMX.
 
 @M1 = external global <1 x i64>
 @M2 = external global <2 x i32>
diff --git a/test/CodeGen/X86/visibility.ll b/test/CodeGen/X86/visibility.ll
new file mode 100644
index 000000000000..a8d287083a80
--- /dev/null
+++ b/test/CodeGen/X86/visibility.ll
@@ -0,0 +1,11 @@
+; RUN: llc -mtriple=x86_64-unknown-linux-gnu %s -o - | FileCheck %s
+
+define hidden void @foo() nounwind {
+entry:
+  call void @bar()
+  ret void
+}
+
+declare hidden void @bar()
+
+;CHECK: .hidden	bar
diff --git a/test/CodeGen/X86/vshift-1.ll b/test/CodeGen/X86/vshift-1.ll
index ae845e0a33d1..49551562c5ae 100644
--- a/test/CodeGen/X86/vshift-1.ll
+++ b/test/CodeGen/X86/vshift-1.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=x86 -mattr=+sse2 -disable-mmx | FileCheck %s
+; RUN: llc < %s -march=x86 -mattr=+sse2 | FileCheck %s
 
 ; test vector shifts converted to proper SSE2 vector shifts when the shift
 ; amounts are the same.
diff --git a/test/CodeGen/X86/vshift-2.ll b/test/CodeGen/X86/vshift-2.ll
index 36feb11603d8..9a9b419abea5 100644
--- a/test/CodeGen/X86/vshift-2.ll
+++ b/test/CodeGen/X86/vshift-2.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=x86 -mattr=+sse2 -disable-mmx | FileCheck %s
+; RUN: llc < %s -march=x86 -mattr=+sse2 | FileCheck %s
 
 ; test vector shifts converted to proper SSE2 vector shifts when the shift
 ; amounts are the same.
diff --git a/test/CodeGen/X86/vshift-3.ll b/test/CodeGen/X86/vshift-3.ll
index 20d3f48a1a67..8e8a9aa04b27 100644
--- a/test/CodeGen/X86/vshift-3.ll
+++ b/test/CodeGen/X86/vshift-3.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=x86 -mattr=+sse2 -disable-mmx | FileCheck %s
+; RUN: llc < %s -march=x86 -mattr=+sse2 | FileCheck %s
 
 ; test vector shifts converted to proper SSE2 vector shifts when the shift
 ; amounts are the same.
diff --git a/test/CodeGen/X86/vshift-4.ll b/test/CodeGen/X86/vshift-4.ll
index 9773cbed0ae3..8e24fda1835d 100644
--- a/test/CodeGen/X86/vshift-4.ll
+++ b/test/CodeGen/X86/vshift-4.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=x86 -mattr=+sse2 -disable-mmx | FileCheck %s
+; RUN: llc < %s -march=x86 -mattr=+sse2 | FileCheck %s
 
 ; test vector shifts converted to proper SSE2 vector shifts when the shift
 ; amounts are the same when using a shuffle splat.
diff --git a/test/CodeGen/X86/vshift-5.ll b/test/CodeGen/X86/vshift-5.ll
index a543f382b513..cb254aeb5735 100644
--- a/test/CodeGen/X86/vshift-5.ll
+++ b/test/CodeGen/X86/vshift-5.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=x86 -mattr=+sse2 -disable-mmx | FileCheck %s
+; RUN: llc < %s -march=x86 -mattr=+sse2 | FileCheck %s
 
 ; When loading the shift amount from memory, avoid generating the splat.
 
diff --git a/test/CodeGen/X86/vsplit-and.ll b/test/CodeGen/X86/vsplit-and.ll
index a247c6eb00d7..97dacfdf09e0 100644
--- a/test/CodeGen/X86/vsplit-and.ll
+++ b/test/CodeGen/X86/vsplit-and.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=x86 -disable-mmx |  FileCheck %s
+; RUN: llc < %s -march=x86 |  FileCheck %s
 
 
 define void @t(<2 x i64>* %dst, <2 x i64> %src1, <2 x i64> %src2) nounwind readonly {
diff --git a/test/CodeGen/X86/widen_arith-1.ll b/test/CodeGen/X86/widen_arith-1.ll
index f8d06902c553..4b8016dc7132 100644
--- a/test/CodeGen/X86/widen_arith-1.ll
+++ b/test/CodeGen/X86/widen_arith-1.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=x86 -mattr=+sse42 -disable-mmx |  FileCheck %s
+; RUN: llc < %s -march=x86 -mattr=+sse42 |  FileCheck %s
 
 ; Widen a v3i8 to v16i8 to use a vector add
 
diff --git a/test/CodeGen/X86/widen_arith-2.ll b/test/CodeGen/X86/widen_arith-2.ll
index fdecaa3f77ff..03b3fea01f6c 100644
--- a/test/CodeGen/X86/widen_arith-2.ll
+++ b/test/CodeGen/X86/widen_arith-2.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=x86 -mattr=+sse42 -disable-mmx  | FileCheck %s
+; RUN: llc < %s -march=x86 -mattr=+sse42 | FileCheck %s
 ; CHECK: paddb
 ; CHECK: pand
 
diff --git a/test/CodeGen/X86/widen_arith-3.ll b/test/CodeGen/X86/widen_arith-3.ll
index 1f2c25068ca4..057492377a27 100644
--- a/test/CodeGen/X86/widen_arith-3.ll
+++ b/test/CodeGen/X86/widen_arith-3.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=x86 -mattr=+sse42 -disable-mmx -post-RA-scheduler=true | FileCheck %s
+; RUN: llc < %s -march=x86 -mattr=+sse42 -post-RA-scheduler=true | FileCheck %s
 ; CHECK: paddw
 ; CHECK: pextrw
 ; CHECK: movd
diff --git a/test/CodeGen/X86/widen_arith-4.ll b/test/CodeGen/X86/widen_arith-4.ll
index f7506ae3e3cd..5931d639f19b 100644
--- a/test/CodeGen/X86/widen_arith-4.ll
+++ b/test/CodeGen/X86/widen_arith-4.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=x86-64 -mattr=+sse42 -disable-mmx | FileCheck %s
+; RUN: llc < %s -march=x86-64 -mattr=+sse42 | FileCheck %s
 ; CHECK: psubw
 ; CHECK-NEXT: pmullw
 
diff --git a/test/CodeGen/X86/widen_arith-5.ll b/test/CodeGen/X86/widen_arith-5.ll
index bae5c54eea64..7f2eff09f473 100644
--- a/test/CodeGen/X86/widen_arith-5.ll
+++ b/test/CodeGen/X86/widen_arith-5.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=x86-64 -mattr=+sse42 -disable-mmx  | FileCheck %s
+; RUN: llc < %s -march=x86-64 -mattr=+sse42  | FileCheck %s
 ; CHECK: movdqa
 ; CHECK: pmulld
 ; CHECK: psubd
diff --git a/test/CodeGen/X86/widen_arith-6.ll b/test/CodeGen/X86/widen_arith-6.ll
index 538123f10c25..b983d141ddf6 100644
--- a/test/CodeGen/X86/widen_arith-6.ll
+++ b/test/CodeGen/X86/widen_arith-6.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=x86 -mattr=+sse42 -disable-mmx | FileCheck %s
+; RUN: llc < %s -march=x86 -mattr=+sse42 | FileCheck %s
 ; CHECK: mulps
 ; CHECK: addps
 
diff --git a/test/CodeGen/X86/widen_cast-1.ll b/test/CodeGen/X86/widen_cast-1.ll
index d4ab174ae9fb..1eace9e024e0 100644
--- a/test/CodeGen/X86/widen_cast-1.ll
+++ b/test/CodeGen/X86/widen_cast-1.ll
@@ -1,4 +1,4 @@
-; RUN: llc -march=x86 -mattr=+sse42 < %s -disable-mmx | FileCheck %s
+; RUN: llc -march=x86 -mattr=+sse42 < %s | FileCheck %s
 ; CHECK: paddw
 ; CHECK: pextrd
 ; CHECK: movd
diff --git a/test/CodeGen/X86/widen_cast-2.ll b/test/CodeGen/X86/widen_cast-2.ll
index 14e8f7562482..5c695ea00033 100644
--- a/test/CodeGen/X86/widen_cast-2.ll
+++ b/test/CodeGen/X86/widen_cast-2.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=x86 -mattr=+sse42 -disable-mmx | FileCheck %s
+; RUN: llc < %s -march=x86 -mattr=+sse42 | FileCheck %s
 ; CHECK: pextrd
 ; CHECK: pextrd
 ; CHECK: movd
diff --git a/test/CodeGen/X86/widen_cast-3.ll b/test/CodeGen/X86/widen_cast-3.ll
index 02674dd1459c..87486d96611b 100644
--- a/test/CodeGen/X86/widen_cast-3.ll
+++ b/test/CodeGen/X86/widen_cast-3.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=x86 -mattr=+sse42 -disable-mmx | FileCheck %s
+; RUN: llc < %s -march=x86 -mattr=+sse42 | FileCheck %s
 ; CHECK: paddd
 ; CHECK: pextrd
 ; CHECK: pextrd
diff --git a/test/CodeGen/X86/widen_cast-4.ll b/test/CodeGen/X86/widen_cast-4.ll
index 5f31e560f500..8e1adf58f869 100644
--- a/test/CodeGen/X86/widen_cast-4.ll
+++ b/test/CodeGen/X86/widen_cast-4.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=x86 -mattr=+sse42 -disable-mmx | FileCheck %s
+; RUN: llc < %s -march=x86 -mattr=+sse42 | FileCheck %s
 ; CHECK: sarb
 ; CHECK: sarb
 ; CHECK: sarb
diff --git a/test/CodeGen/X86/widen_cast-5.ll b/test/CodeGen/X86/widen_cast-5.ll
index d1d7fecbd275..136578df1e8e 100644
--- a/test/CodeGen/X86/widen_cast-5.ll
+++ b/test/CodeGen/X86/widen_cast-5.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=x86 -mattr=+sse42 -disable-mmx | FileCheck %s
+; RUN: llc < %s -march=x86 -mattr=+sse42 | FileCheck %s
 ; CHECK: movl
 ; CHECK: movd
 
diff --git a/test/CodeGen/X86/widen_cast-6.ll b/test/CodeGen/X86/widen_cast-6.ll
index 08759bf5510c..39032347c018 100644
--- a/test/CodeGen/X86/widen_cast-6.ll
+++ b/test/CodeGen/X86/widen_cast-6.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=x86 -mattr=+sse41 -disable-mmx | FileCheck %s
+; RUN: llc < %s -march=x86 -mattr=+sse41 | FileCheck %s
 ; CHECK: movd
 
 ; Test bit convert that requires widening in the operand.
diff --git a/test/CodeGen/X86/widen_conv-1.ll b/test/CodeGen/X86/widen_conv-1.ll
index a2029dd2748d..f6810cda9e35 100644
--- a/test/CodeGen/X86/widen_conv-1.ll
+++ b/test/CodeGen/X86/widen_conv-1.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=x86 -mattr=+sse42 -disable-mmx | FileCheck %s
+; RUN: llc < %s -march=x86 -mattr=+sse42 | FileCheck %s
 ; CHECK: pshufd
 ; CHECK: paddd
 
diff --git a/test/CodeGen/X86/widen_conv-2.ll b/test/CodeGen/X86/widen_conv-2.ll
index b24a9b36673c..969cb512beb3 100644
--- a/test/CodeGen/X86/widen_conv-2.ll
+++ b/test/CodeGen/X86/widen_conv-2.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=x86 -mattr=+sse42 -disable-mmx | FileCheck %s
+; RUN: llc < %s -march=x86 -mattr=+sse42 | FileCheck %s
 ; CHECK: movswl
 ; CHECK: movswl
 
diff --git a/test/CodeGen/X86/widen_conv-3.ll b/test/CodeGen/X86/widen_conv-3.ll
index 1a40800de975..a25fae9e1bc8 100644
--- a/test/CodeGen/X86/widen_conv-3.ll
+++ b/test/CodeGen/X86/widen_conv-3.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=x86 -mattr=+sse42 -disable-mmx | FileCheck %s
+; RUN: llc < %s -march=x86 -mattr=+sse42 | FileCheck %s
 ; CHECK: cvtsi2ss
 
 ; sign to float v2i16 to v2f32
diff --git a/test/CodeGen/X86/widen_conv-4.ll b/test/CodeGen/X86/widen_conv-4.ll
index e505b62a3dbf..80f3a492c494 100644
--- a/test/CodeGen/X86/widen_conv-4.ll
+++ b/test/CodeGen/X86/widen_conv-4.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=x86 -mattr=+sse42 -disable-mmx | FileCheck %s
+; RUN: llc < %s -march=x86 -mattr=+sse42 | FileCheck %s
 ; CHECK: cvtsi2ss
 
 ; unsigned to float v7i16 to v7f32
diff --git a/test/CodeGen/X86/widen_extract-1.ll b/test/CodeGen/X86/widen_extract-1.ll
index 308e6b859be6..4bcac58f2b6c 100644
--- a/test/CodeGen/X86/widen_extract-1.ll
+++ b/test/CodeGen/X86/widen_extract-1.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=x86-64 -mattr=+sse42 -disable-mmx | FileCheck %s
+; RUN: llc < %s -march=x86-64 -mattr=+sse42 | FileCheck %s
 ; widen extract subvector
 
 define void @convert(<2 x double>* %dst.addr, <3 x double> %src)  {
diff --git a/test/CodeGen/X86/widen_load-1.ll b/test/CodeGen/X86/widen_load-1.ll
index d397645f193f..639617f17774 100644
--- a/test/CodeGen/X86/widen_load-1.ll
+++ b/test/CodeGen/X86/widen_load-1.ll
@@ -1,4 +1,4 @@
-; RUN: llc %s -o - -march=x86-64 -mtriple=x86_64-unknown-linux-gnu -disable-mmx | FileCheck %s
+; RUN: llc %s -o - -march=x86-64 -mtriple=x86_64-unknown-linux-gnu | FileCheck %s
 ; PR4891
 
 ; This load should be before the call, not after.
diff --git a/test/CodeGen/X86/widen_load-2.ll b/test/CodeGen/X86/widen_load-2.ll
index 551704c498fa..642206316c6b 100644
--- a/test/CodeGen/X86/widen_load-2.ll
+++ b/test/CodeGen/X86/widen_load-2.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -o - -march=x86-64 -mattr=+sse42 -disable-mmx | FileCheck %s
+; RUN: llc < %s -o - -march=x86-64 -mattr=+sse42 | FileCheck %s
 
 ; Test based on pr5626 to load/store
 ;
diff --git a/test/CodeGen/X86/widen_select-1.ll b/test/CodeGen/X86/widen_select-1.ll
deleted file mode 100644
index d9de892933e0..000000000000
--- a/test/CodeGen/X86/widen_select-1.ll
+++ /dev/null
@@ -1,12 +0,0 @@
-; RUN: llc < %s -march=x86 -mattr=+sse42 -disable-mmx | FileCheck %s
-; CHECK: jne
-
-; widening select v6i32 and then a sub
-
-define void @select(i1 %c, <6 x i32>* %dst.addr, <6 x i32> %src1,<6 x i32> %src2) nounwind {
-entry:
-	%x = select i1 %c, <6 x i32> %src1, <6 x i32> %src2
-	%val = sub <6 x i32> %x, < i32 1, i32 1, i32 1, i32 1, i32 1, i32 1 >
-	store <6 x i32> %val, <6 x i32>* %dst.addr
-	ret void
-}
diff --git a/test/CodeGen/X86/widen_shuffle-1.ll b/test/CodeGen/X86/widen_shuffle-1.ll
index 463f522a11df..034c42c758be 100644
--- a/test/CodeGen/X86/widen_shuffle-1.ll
+++ b/test/CodeGen/X86/widen_shuffle-1.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=x86 -mattr=+sse42 -disable-mmx | FileCheck %s
+; RUN: llc < %s -march=x86 -mattr=+sse42 | FileCheck %s
 
 ; widening shuffle v3float and then a add
 define void @shuf(<3 x float>* %dst.addr, <3 x float> %src1,<3 x float> %src2) nounwind {
diff --git a/test/CodeGen/X86/win64_params.ll b/test/CodeGen/X86/win64_params.ll
new file mode 100644
index 000000000000..f9d4bf9c3094
--- /dev/null
+++ b/test/CodeGen/X86/win64_params.ll
@@ -0,0 +1,11 @@
+; RUN: llc < %s -mtriple=x86_64-pc-win32 | FileCheck %s
+
+; Verify that the 5th and 6th parameters are coming from the correct location
+; on the stack.
+define i32 @f6(i32 %p1, i32 %p2, i32 %p3, i32 %p4, i32 %p5, i32 %p6) nounwind readnone optsize {
+entry:
+; CHECK: movl    48(%rsp), %eax
+; CHECK: addl    40(%rsp), %eax
+  %add = add nsw i32 %p6, %p5
+  ret i32 %add
+}
diff --git a/test/CodeGen/X86/win64_vararg.ll b/test/CodeGen/X86/win64_vararg.ll
new file mode 100644
index 000000000000..a451318f6e8c
--- /dev/null
+++ b/test/CodeGen/X86/win64_vararg.ll
@@ -0,0 +1,20 @@
+; RUN: llc < %s -mtriple=x86_64-pc-win32 | FileCheck %s
+
+; Verify that the var arg parameters which are passed in registers are stored
+; in home stack slots allocated by the caller and that AP is correctly
+; calculated.
+define void @average_va(i32 %count, ...) nounwind {
+entry:
+; CHECK: pushq
+; CHECK: movq   %r9, 40(%rsp)
+; CHECK: movq   %r8, 32(%rsp)
+; CHECK: movq   %rdx, 24(%rsp)
+; CHECK: leaq   24(%rsp), %rax
+
+  %ap = alloca i8*, align 8                       ; <i8**> [#uses=1]
+  %ap1 = bitcast i8** %ap to i8*                  ; <i8*> [#uses=1]
+  call void @llvm.va_start(i8* %ap1)
+  ret void
+}
+
+declare void @llvm.va_start(i8*) nounwind
diff --git a/test/CodeGen/X86/win_chkstk.ll b/test/CodeGen/X86/win_chkstk.ll
index 27d3358d4ac1..82ce81d4ae70 100644
--- a/test/CodeGen/X86/win_chkstk.ll
+++ b/test/CodeGen/X86/win_chkstk.ll
@@ -1,4 +1,5 @@
 ; RUN: llc < %s -mtriple=i686-pc-win32 | FileCheck %s -check-prefix=WIN_X32
+; RUN: llc < %s -mtriple=x86_64-pc-win32 | FileCheck %s -check-prefix=WIN_X64
 ; RUN: llc < %s -mtriple=i686-pc-mingw32 | FileCheck %s -check-prefix=MINGW_X32
 ; RUN: llc < %s -mtriple=x86_64-pc-mingw32 | FileCheck %s -check-prefix=MINGW_X64
 ; RUN: llc < %s -mtriple=i386-pc-linux | FileCheck %s -check-prefix=LINUX
@@ -12,10 +13,10 @@
 ; Stack allocation >= 4096 bytes will require call to __chkstk in the Windows ABI.
 define i32 @main4k() nounwind {
 entry:
-; WIN_X32:    call __chkstk
-; WIN_X64:    call __chkstk
-; MINGW_X32:  call __alloca
-; MINGW_X64:  call _alloca
+; WIN_X32:    calll __chkstk
+; WIN_X64:    callq __chkstk
+; MINGW_X32:  calll __alloca
+; MINGW_X64:  callq __chkstk
 ; LINUX-NOT:  call __chkstk
   %array4096 = alloca [4096 x i8], align 16       ; <[4096 x i8]*> [#uses=0]
   ret i32 0
@@ -26,15 +27,15 @@ entry:
 define i32 @main128() nounwind {
 entry:
 ; WIN_X32:       # BB#0:
-; WIN_X32-NOT:   call __chkstk
+; WIN_X32-NOT:   calll __chkstk
 ; WIN_X32:       ret
 
 ; WIN_X64:       # BB#0:
-; WIN_X64-NOT:   call __chkstk
+; WIN_X64-NOT:   callq __chkstk
 ; WIN_X64:       ret
 
 ; MINGW_X64:     # BB#0:
-; MINGW_X64-NOT: call _alloca
+; MINGW_X64-NOT: callq _alloca
 ; MINGW_X64:     ret
 
 ; LINUX:         # BB#0:
diff --git a/test/CodeGen/X86/x86-64-extend-shift.ll b/test/CodeGen/X86/x86-64-extend-shift.ll
new file mode 100644
index 000000000000..6852785fd6af
--- /dev/null
+++ b/test/CodeGen/X86/x86-64-extend-shift.ll
@@ -0,0 +1,10 @@
+; RUN: llc < %s -mtriple=x86_64-apple-darwin | FileCheck %s
+; Formerly there were two shifts.
+
+define i64 @baz(i32 %A) nounwind {
+; CHECK:  shlq  $49, %rax
+        %tmp1 = shl i32 %A, 17
+        %tmp2 = zext i32 %tmp1 to i64
+        %tmp3 = shl i64 %tmp2, 32
+        ret i64 %tmp3
+}
diff --git a/test/CodeGen/X86/x86_64-mul-by-const.ll b/test/CodeGen/X86/x86_64-mul-by-const.ll
new file mode 100644
index 000000000000..df48a29156ca
--- /dev/null
+++ b/test/CodeGen/X86/x86_64-mul-by-const.ll
@@ -0,0 +1,9 @@
+; RUN: llc < %s -mtriple=x86_64-apple-darwin | FileCheck %s
+; Formerly there were two shifts.  rdar://8771012.
+
+define i32 @f9188_mul365384439_shift27(i32 %A) nounwind {
+; CHECK:  imulq $365384439,
+; CHECK:  shrq  $59, %rax
+        %tmp1 = udiv i32 %A, 1577682821         ; <i32> [#uses=1]
+        ret i32 %tmp1
+}
diff --git a/test/CodeGen/X86/zext-extract_subreg.ll b/test/CodeGen/X86/zext-extract_subreg.ll
new file mode 100644
index 000000000000..e61e8805a2fd
--- /dev/null
+++ b/test/CodeGen/X86/zext-extract_subreg.ll
@@ -0,0 +1,60 @@
+; RUN: llc < %s -mtriple=x86_64-apple-darwin | FileCheck %s
+
+define void @t() nounwind ssp {
+; CHECK: t:
+entry:
+  br i1 undef, label %return, label %if.end.i
+
+if.end.i:                                         ; preds = %entry
+  %tmp7.i = load i32* undef, align 4, !tbaa !0
+  br i1 undef, label %return, label %if.end
+
+if.end:                                           ; preds = %if.end.i
+; CHECK: %if.end
+; CHECK: movl (%{{.*}}), [[REG:%[a-z]+]]
+; CHECK-NOT: movl [[REG]], [[REG]]
+; CHECK-NEXT: xorb
+  %tmp138 = select i1 undef, i32 0, i32 %tmp7.i
+  %tmp867 = zext i32 %tmp138 to i64
+  br label %while.cond
+
+while.cond:                                       ; preds = %while.body, %if.end
+  %tmp869 = sub i64 %tmp867, 0
+  %scale2.0 = trunc i64 %tmp869 to i32
+  %cmp149 = icmp eq i32 %scale2.0, 0
+  br i1 %cmp149, label %while.end, label %land.rhs
+
+land.rhs:                                         ; preds = %while.cond
+  br i1 undef, label %while.body, label %while.end
+
+while.body:                                       ; preds = %land.rhs
+  br label %while.cond
+
+while.end:                                        ; preds = %land.rhs, %while.cond
+  br i1 undef, label %cond.false205, label %cond.true190
+
+cond.true190:                                     ; preds = %while.end
+  br i1 undef, label %cond.false242, label %cond.true225
+
+cond.false205:                                    ; preds = %while.end
+  unreachable
+
+cond.true225:                                     ; preds = %cond.true190
+  br i1 undef, label %cond.false280, label %cond.true271
+
+cond.false242:                                    ; preds = %cond.true190
+  unreachable
+
+cond.true271:                                     ; preds = %cond.true225
+  unreachable
+
+cond.false280:                                    ; preds = %cond.true225
+  unreachable
+
+return:                                           ; preds = %if.end.i, %entry
+  ret void
+}
+
+!0 = metadata !{metadata !"int", metadata !1}
+!1 = metadata !{metadata !"omnipotent char", metadata !2}
+!2 = metadata !{metadata !"Simple C/C++ TBAA", null}
diff --git a/test/CodeGen/XCore/2010-04-07-DbgValueOtherTargets.ll b/test/CodeGen/XCore/2010-04-07-DbgValueOtherTargets.ll
index f24e1d1851b4..80cf3a6d678f 100644
--- a/test/CodeGen/XCore/2010-04-07-DbgValueOtherTargets.ll
+++ b/test/CodeGen/XCore/2010-04-07-DbgValueOtherTargets.ll
@@ -1,33 +1,28 @@
 ; RUN: llc -O0 -march=xcore -asm-verbose < %s | FileCheck %s
 ; Check that DEBUG_VALUE comments come through on a variety of targets.
 
-%tart.reflect.ComplexType = type { double, double }
-
-@.type.SwitchStmtTest = constant %tart.reflect.ComplexType { double 3.0, double 2.0 }
-
-define i32 @"main(tart.core.String[])->int32"(i32 %args) {
+define i32 @main() nounwind ssp {
 entry:
 ; CHECK: DEBUG_VALUE
-  tail call void @llvm.dbg.value(metadata !14, i64 0, metadata !8)
-  tail call void @"tart.reflect.ComplexType.create->tart.core.Object"(%tart.reflect.ComplexType* @.type.SwitchStmtTest) ; <%tart.core.Object*> [#uses=2]
-  ret i32 3
+  call void @llvm.dbg.value(metadata !6, i64 0, metadata !7), !dbg !9
+  ret i32 0, !dbg !10
 }
 
+declare void @llvm.dbg.declare(metadata, metadata) nounwind readnone
+
 declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
-declare void @"tart.reflect.ComplexType.create->tart.core.Object"(%tart.reflect.ComplexType*) nounwind readnone
 
-!0 = metadata !{i32 458769, i32 0, i32 1, metadata !"sm.c", metadata !"/Volumes/MacOS9/tests/", metadata !"4.2.1 (Based on Apple Inc. build 5658) (LLVM build)", i1 true, i1 false, metadata !"", i32 0} ; [ DW_TAG_compile_unit ]
-!1 = metadata !{i32 458790, metadata !0, metadata !"", metadata !0, i32 0, i64 192, i64 64, i64 0, i32 0, metadata !2} ; [ DW_TAG_const_type ]
-!2 = metadata !{i32 458771, metadata !0, metadata !"C", metadata !0, i32 1, i64 192, i64 64, i64 0, i32 0, null, metadata !3, i32 0, null} ; [ DW_TAG_structure_type ]
-!3 = metadata !{metadata !4, metadata !6, metadata !7}
-!4 = metadata !{i32 458765, metadata !2, metadata !"x", metadata !0, i32 1, i64 64, i64 64, i64 0, i32 0, metadata !5} ; [ DW_TAG_member ]
-!5 = metadata !{i32 458788, metadata !0, metadata !"double", metadata !0, i32 0, i64 64, i64 64, i64 0, i32 0, i32 4} ; [ DW_TAG_base_type ]
-!6 = metadata !{i32 458765, metadata !2, metadata !"y", metadata !0, i32 1, i64 64, i64 64, i64 64, i32 0, metadata !5} ; [ DW_TAG_member ]
-!7 = metadata !{i32 458765, metadata !2, metadata !"z", metadata !0, i32 1, i64 64, i64 64, i64 128, i32 0, metadata !5} ; [ DW_TAG_member ]
-!8 = metadata !{i32 459008, metadata !9, metadata !"t", metadata !0, i32 5, metadata !2} ; [ DW_TAG_auto_variable ]
-!9 = metadata !{i32 458763, metadata !10}        ; [ DW_TAG_lexical_block ]
-!10 = metadata !{i32 458798, i32 0, metadata !0, metadata !"foo", metadata !"foo", metadata !"foo", metadata !0, i32 4, metadata !11, i1 false, i1 true, i32 0, i32 0, null} ; [ DW_TAG_subprogram ]
-!11 = metadata !{i32 458773, metadata !0, metadata !"", metadata !0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !12, i32 0, null} ; [ DW_TAG_subroutine_type ]
-!12 = metadata !{metadata !13}
-!13 = metadata !{i32 458788, metadata !0, metadata !"int", metadata !0, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
-!14 = metadata !{%tart.reflect.ComplexType* @.type.SwitchStmtTest}
+!llvm.dbg.sp = !{!0}
+
+!0 = metadata !{i32 589870, i32 0, metadata !1, metadata !"main", metadata !"main", metadata !"", metadata !1, i32 2, metadata !3, i1 false, i1 true, i32 0, i32 0, null, i32 0, i1 false, i32 ()* @main} ; [ DW_TAG_subprogram ]
+!1 = metadata !{i32 589865, metadata !"/tmp/x.c", metadata !"/Users/manav", metadata !2} ; [ DW_TAG_file_type ]
+!2 = metadata !{i32 589841, i32 0, i32 12, metadata !"/tmp/x.c", metadata !"/Users/manav", metadata !"clang version 2.9 (trunk 120996)", i1 true, i1 false, metadata !"", i32 0} ; [ DW_TAG_compile_unit ]
+!3 = metadata !{i32 589845, metadata !1, metadata !"", metadata !1, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !4, i32 0, null} ; [ DW_TAG_subroutine_type ]
+!4 = metadata !{metadata !5}
+!5 = metadata !{i32 589860, metadata !2, metadata !"int", metadata !1, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
+!6 = metadata !{i32 0}
+!7 = metadata !{i32 590080, metadata !8, metadata !"i", metadata !1, i32 3, metadata !5, i32 0} ; [ DW_TAG_auto_variable ]
+!8 = metadata !{i32 589835, metadata !0, i32 2, i32 12, metadata !1, i32 0} ; [ DW_TAG_lexical_block ]
+!9 = metadata !{i32 3, i32 11, metadata !8, null}
+!10 = metadata !{i32 4, i32 2, metadata !8, null}
+
diff --git a/test/CodeGen/XCore/2011-01-31-DAGCombineBug.ll b/test/CodeGen/XCore/2011-01-31-DAGCombineBug.ll
new file mode 100644
index 000000000000..f8fe0d2136ff
--- /dev/null
+++ b/test/CodeGen/XCore/2011-01-31-DAGCombineBug.ll
@@ -0,0 +1,10 @@
+; RUN: llc < %s -march=xcore
+%struct.st = type <{ i8, i32, i8, i32, i8, i32 }>
+
+@x = external global %struct.st, align 4
+
+define i32 @test_entry() nounwind {
+entry:
+  %0 = load i32* getelementptr inbounds (%struct.st* @x, i32 0, i32 3), align 2
+  ret i32 %0
+}
diff --git a/test/CodeGen/XCore/ashr.ll b/test/CodeGen/XCore/ashr.ll
index d99808fc4a2f..4514fdb8bf3b 100644
--- a/test/CodeGen/XCore/ashr.ll
+++ b/test/CodeGen/XCore/ashr.ll
@@ -50,9 +50,9 @@ define i32 @f3(i32 %a) {
 	ret i32 %2
 }
 ; CHECK: f3:
-; CHECK-NEXT: ashr r1, r0, 32
+; CHECK-NEXT: ashr r0, r0, 32
+; CHECK-NEXT: bf r0
 ; CHECK-NEXT: ldc r0, 10
-; CHECK-NEXT: bt r1
 ; CHECK: ldc r0, 17
 
 define i32 @f4(i32 %a) {
@@ -61,9 +61,9 @@ define i32 @f4(i32 %a) {
 	ret i32 %2
 }
 ; CHECK: f4:
-; CHECK-NEXT: ashr r1, r0, 32
+; CHECK-NEXT: ashr r0, r0, 32
+; CHECK-NEXT: bf r0
 ; CHECK-NEXT: ldc r0, 17
-; CHECK-NEXT: bt r1
 ; CHECK: ldc r0, 10
 
 define i32 @f5(i32 %a) {
diff --git a/test/CodeGen/XCore/globals.ll b/test/CodeGen/XCore/globals.ll
index 342e5932dd10..7487561dec96 100644
--- a/test/CodeGen/XCore/globals.ll
+++ b/test/CodeGen/XCore/globals.ll
@@ -67,7 +67,7 @@ entry:
 ; CHECK: .section .dp.bss,"awd",@nobits
 ; CHECK: G2:
 
-@G3 = constant i32 9401
+@G3 = unnamed_addr constant i32 9401
 ; CHECK: .section .cp.rodata.cst4,"aMc",@progbits,4
 ; CHECK: G3:
 
@@ -75,7 +75,7 @@ entry:
 ; CHECK: .section .dp.data,"awd",@progbits
 ; CHECK: G4:
 
-@G5 = constant i32* @G1
+@G5 = unnamed_addr constant i32* @G1
 ; CHECK: .section .cp.rodata,"ac",@progbits
 ; CHECK: G5:
 
@@ -83,7 +83,7 @@ entry:
 ; CHECK: .section .dp.data,"awd",@progbits
 ; CHECK: G6:
 
-@G7 = constant i32* @G8
+@G7 = unnamed_addr constant i32* @G8
 ; CHECK: .section .cp.rodata,"ac",@progbits
 ; CHECK: G7:
 
diff --git a/test/CodeGen/XCore/resources.ll b/test/CodeGen/XCore/resources.ll
new file mode 100644
index 000000000000..3114bdcd1777
--- /dev/null
+++ b/test/CodeGen/XCore/resources.ll
@@ -0,0 +1,111 @@
+; RUN: llc -march=xcore < %s | FileCheck %s
+
+declare i8 addrspace(1)* @llvm.xcore.getr.p1i8(i32 %type)
+declare void @llvm.xcore.freer.p1i8(i8 addrspace(1)* %r)
+declare i32 @llvm.xcore.in.p1i8(i8 addrspace(1)* %r)
+declare i32 @llvm.xcore.int.p1i8(i8 addrspace(1)* %r)
+declare i32 @llvm.xcore.inct.p1i8(i8 addrspace(1)* %r)
+declare void @llvm.xcore.out.p1i8(i8 addrspace(1)* %r, i32 %value)
+declare void @llvm.xcore.outt.p1i8(i8 addrspace(1)* %r, i32 %value)
+declare void @llvm.xcore.outct.p1i8(i8 addrspace(1)* %r, i32 %value)
+declare void @llvm.xcore.chkct.p1i8(i8 addrspace(1)* %r, i32 %value)
+declare void @llvm.xcore.setd.p1i8(i8 addrspace(1)* %r, i32 %value)
+declare void @llvm.xcore.setc.p1i8(i8 addrspace(1)* %r, i32 %value)
+
+define i8 addrspace(1)* @getr() {
+; CHECK: getr:
+; CHECK: getr r0, 5
+	%result = call i8 addrspace(1)* @llvm.xcore.getr.p1i8(i32 5)
+	ret i8 addrspace(1)* %result
+}
+
+define void @freer(i8 addrspace(1)* %r) {
+; CHECK: freer:
+; CHECK: freer res[r0]
+	call void @llvm.xcore.freer.p1i8(i8 addrspace(1)* %r)
+	ret void
+}
+
+define i32 @in(i8 addrspace(1)* %r) {
+; CHECK: in:
+; CHECK: in r0, res[r0]
+	%result = call i32 @llvm.xcore.in.p1i8(i8 addrspace(1)* %r)
+	ret i32 %result
+}
+
+define i32 @int(i8 addrspace(1)* %r) {
+; CHECK: int:
+; CHECK: int r0, res[r0]
+	%result = call i32 @llvm.xcore.int.p1i8(i8 addrspace(1)* %r)
+	ret i32 %result
+}
+
+define i32 @inct(i8 addrspace(1)* %r) {
+; CHECK: inct:
+; CHECK: inct r0, res[r0]
+	%result = call i32 @llvm.xcore.inct.p1i8(i8 addrspace(1)* %r)
+	ret i32 %result
+}
+
+define void @out(i8 addrspace(1)* %r, i32 %value) {
+; CHECK: out:
+; CHECK: out res[r0], r1
+	call void @llvm.xcore.out.p1i8(i8 addrspace(1)* %r, i32 %value)
+	ret void
+}
+
+define void @outt(i8 addrspace(1)* %r, i32 %value) {
+; CHECK: outt:
+; CHECK: outt res[r0], r1
+	call void @llvm.xcore.outt.p1i8(i8 addrspace(1)* %r, i32 %value)
+	ret void
+}
+
+define void @outct(i8 addrspace(1)* %r, i32 %value) {
+; CHECK: outct:
+; CHECK: outct res[r0], r1
+	call void @llvm.xcore.outct.p1i8(i8 addrspace(1)* %r, i32 %value)
+	ret void
+}
+
+define void @outcti(i8 addrspace(1)* %r) {
+; CHECK: outcti:
+; CHECK: outct res[r0], 11
+	call void @llvm.xcore.outct.p1i8(i8 addrspace(1)* %r, i32 11)
+	ret void
+}
+
+define void @chkct(i8 addrspace(1)* %r, i32 %value) {
+; CHECK: chkct:
+; CHECK: chkct res[r0], r1
+	call void @llvm.xcore.chkct.p1i8(i8 addrspace(1)* %r, i32 %value)
+	ret void
+}
+
+define void @chkcti(i8 addrspace(1)* %r) {
+; CHECK: chkcti:
+; CHECK: chkct res[r0], 11
+	call void @llvm.xcore.chkct.p1i8(i8 addrspace(1)* %r, i32 11)
+	ret void
+}
+
+define void @setd(i8 addrspace(1)* %r, i32 %value) {
+; CHECK: setd:
+; CHECK: setd res[r0], r1
+	call void @llvm.xcore.setd.p1i8(i8 addrspace(1)* %r, i32 %value)
+	ret void
+}
+
+define void @setc(i8 addrspace(1)* %r, i32 %value) {
+; CHECK: setc:
+; CHECK: setc res[r0], r1
+	call void @llvm.xcore.setc.p1i8(i8 addrspace(1)* %r, i32 %value)
+	ret void
+}
+
+define void @setci(i8 addrspace(1)* %r) {
+; CHECK: setci:
+; CHECK: setc res[r0], 2
+	call void @llvm.xcore.setc.p1i8(i8 addrspace(1)* %r, i32 2)
+	ret void
+}
diff --git a/test/CodeGen/XCore/trampoline.ll b/test/CodeGen/XCore/trampoline.ll
new file mode 100644
index 000000000000..18cc45edbf9f
--- /dev/null
+++ b/test/CodeGen/XCore/trampoline.ll
@@ -0,0 +1,37 @@
+; RUN: llc < %s -march=xcore | FileCheck %s
+
+%struct.FRAME.f = type { i32, i32 ()* }
+
+define void @f() nounwind {
+entry:
+; CHECK: f:
+; CHECK ldap r11, g.1101
+; CHECK stw r11, sp[7]
+  %TRAMP.23 = alloca [20 x i8], align 2
+  %FRAME.0 = alloca %struct.FRAME.f, align 4
+  %TRAMP.23.sub = getelementptr inbounds [20 x i8]* %TRAMP.23, i32 0, i32 0
+  %FRAME.02 = bitcast %struct.FRAME.f* %FRAME.0 to i8*
+  %tramp = call i8* @llvm.init.trampoline(i8* %TRAMP.23.sub, i8* bitcast (i32 (%struct.FRAME.f*)* @g.1101 to i8*), i8* %FRAME.02)
+  %0 = getelementptr inbounds %struct.FRAME.f* %FRAME.0, i32 0, i32 1
+  %1 = bitcast i8* %tramp to i32 ()*
+  store i32 ()* %1, i32 ()** %0, align 4
+  %2 = getelementptr inbounds %struct.FRAME.f* %FRAME.0, i32 0, i32 0
+  store i32 1, i32* %2, align 4
+  call void @h(i32 ()* %1) nounwind
+  ret void
+}
+
+define internal i32 @g.1101(%struct.FRAME.f* nocapture nest %CHAIN.1) nounwind readonly {
+entry:
+; CHECK: g.1101:
+; CHECK: ldw r11, sp[0]
+; CHECK-NEXT: ldw r0, r11[0]
+; CHECK-NEXT: retsp 0
+  %0 = getelementptr inbounds %struct.FRAME.f* %CHAIN.1, i32 0, i32 0
+  %1 = load i32* %0, align 4
+  ret i32 %1
+}
+
+declare i8* @llvm.init.trampoline(i8*, i8*, i8*) nounwind
+
+declare void @h(i32 ()*)