119 files changed, 2510 insertions, 370 deletions
diff --git a/CMakeLists.txt b/CMakeLists.txt
index a68e7e163c52..bd0f846f04b3 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -12,9 +12,10 @@ set(CMAKE_MODULE_PATH
 
 set(LLVM_VERSION_MAJOR 3)
 set(LLVM_VERSION_MINOR 4)
+set(LLVM_VERSION_PATCH 1)
 
 if (NOT PACKAGE_VERSION)
-  set(PACKAGE_VERSION "${LLVM_VERSION_MAJOR}.${LLVM_VERSION_MINOR}svn")
+  set(PACKAGE_VERSION "${LLVM_VERSION_MAJOR}.${LLVM_VERSION_MINOR}.${LLVM_VERSION_PATCH}")
 endif()
 
 option(LLVM_INSTALL_TOOLCHAIN_ONLY "Only include toolchain files in the 'install' target." OFF)
@@ -42,6 +43,7 @@ set(CPACK_PACKAGE_INSTALL_DIRECTORY "LLVM")
 set(CPACK_PACKAGE_VENDOR "LLVM")
 set(CPACK_PACKAGE_VERSION_MAJOR ${LLVM_VERSION_MAJOR})
 set(CPACK_PACKAGE_VERSION_MINOR ${LLVM_VERSION_MINOR})
+set(CPACK_PACKAGE_VERSION_PATCH ${LLVM_VERSION_PATCH})
 set(CPACK_PACKAGE_VERSION ${PACKAGE_VERSION})
 set(CPACK_RESOURCE_FILE_LICENSE "${CMAKE_CURRENT_SOURCE_DIR}/LICENSE.TXT")
 if(WIN32 AND NOT UNIX)
diff --git a/Makefile.config.in b/Makefile.config.in
index dcca45f36cd8..00c6d965170a 100644
--- a/Makefile.config.in
+++ b/Makefile.config.in
@@ -15,6 +15,10 @@
 # Define LLVM specific info and directories based on the autoconf variables
 LLVMPackageName   := @PACKAGE_TARNAME@
 LLVMVersion       := @PACKAGE_VERSION@
+LLVM_VERSION_MAJOR := @LLVM_VERSION_MAJOR@
+LLVM_VERSION_MINOR := @LLVM_VERSION_MINOR@
+LLVM_VERSION_PATCH := @LLVM_VERSION_PATCH@
+LLVM_VERSION_SUFFIX := @LLVM_VERSION_SUFFIX@
 LLVM_CONFIGTIME   := @LLVM_CONFIGTIME@
 
 ###########################################################################
diff --git a/Makefile.rules b/Makefile.rules
index 68f6cf8ec5d1..210abdafc8e3 100644
--- a/Makefile.rules
+++ b/Makefile.rules
@@ -757,7 +757,7 @@ else
 Ranlib        = ranlib
 endif
 
-AliasTool     = ln -s
+AliasTool     = ln -sf
 
 #----------------------------------------------------------
 # Get the list of source files and compute object file
@@ -1121,15 +1121,19 @@ ifdef LIBRARYNAME
 
 # Make sure there isn't any extraneous whitespace on the LIBRARYNAME option
 LIBRARYNAME := $(strip $(LIBRARYNAME))
+LIBRARYALIASNAME := $(strip $(LIBRARYALIASNAME))
 ifdef LOADABLE_MODULE
 BaseLibName.A  := $(LIBRARYNAME).a
 BaseLibName.SO := $(LIBRARYNAME)$(SHLIBEXT)
+BaseAliasName.SO := $(LIBRARYALIASNAME)$(SHLIBEXT)
 else
 BaseLibName.A  := lib$(LIBRARYNAME).a
 BaseLibName.SO := $(SharedPrefix)$(LIBRARYNAME)$(SHLIBEXT)
+BaseAliasName.SO := $(SharedPrefix)$(LIBRARYALIASNAME)$(SHLIBEXT)
 endif
 LibName.A  := $(LibDir)/$(BaseLibName.A)
 LibName.SO := $(SharedLibDir)/$(BaseLibName.SO)
+AliasName.SO := $(SharedLibDir)/$(BaseAliasName.SO)
 LibName.O  := $(LibDir)/$(LIBRARYNAME).o
 
 #---------------------------------------------------------
@@ -1183,12 +1187,17 @@ else
 DestSharedLibDir := $(DESTDIR)$(PROJ_libdir)
 endif
 DestSharedLib := $(DestSharedLibDir)/$(BaseLibName.SO)
+DestSharedAlias := $(DestSharedLibDir)/$(BaseAliasName.SO)
 
 install-local:: $(DestSharedLib)
 
 $(DestSharedLib): $(LibName.SO) $(DestSharedLibDir)
 	$(Echo) Installing $(BuildMode) Shared Library $(DestSharedLib)
 	$(Verb) $(INSTALL) $(LibName.SO) $(DestSharedLib)
+ifdef SHARED_ALIAS
+	$(Echo) Creating alias from $(DestSharedLib) to $(DestSharedAlias)
+	$(Verb) $(AliasTool) $(DestSharedLib) $(DestSharedAlias)
+endif
 
 uninstall-local::
 	$(Echo) Uninstalling $(BuildMode) Shared Library $(DestSharedLib)
diff --git a/autoconf/configure.ac b/autoconf/configure.ac
index a9d491548f11..7b4bae7e71a5 100644
--- a/autoconf/configure.ac
+++ b/autoconf/configure.ac
@@ -31,9 +31,22 @@ dnl===
 dnl===-----------------------------------------------------------------------===
 dnl Initialize autoconf and define the package name, version number and
 dnl address for reporting bugs.
-AC_INIT([LLVM],[3.4],[http://llvm.org/bugs/])
-AC_DEFINE([LLVM_VERSION_MAJOR], [3], [Major version of the LLVM API])
-AC_DEFINE([LLVM_VERSION_MINOR], [4], [Minor version of the LLVM API])
+
+AC_INIT([LLVM],[3.4.1],[http://llvm.org/bugs/])
+
+LLVM_VERSION_MAJOR=3
+LLVM_VERSION_MINOR=4
+LLVM_VERSION_PATCH=1
+LLVM_VERSION_SUFFIX=
+
+AC_DEFINE_UNQUOTED([LLVM_VERSION_MAJOR], $LLVM_VERSION_MAJOR, [Major version of the LLVM API])
+AC_DEFINE_UNQUOTED([LLVM_VERSION_MINOR], $LLVM_VERSION_MINOR, [Minor version of the LLVM API])
+AC_DEFINE_UNQUOTED([LLVM_VERSION_PATCH], $LLVM_VERSION_PATCH, [Patch version of the LLVM API])
+
+AC_SUBST([LLVM_VERSION_MAJOR])
+AC_SUBST([LLVM_VERSION_MINOR])
+AC_SUBST([LLVM_VERSION_PATCH])
+AC_SUBST([LLVM_VERSION_SUFFIX])
 
 dnl Provide a copyright substitution and ensure the copyright notice is included
 dnl in the output of --version option of the generated configure script.
diff --git a/configure b/configure
index 72e9b1330443..cbda923cc700 100755
--- a/configure
+++ b/configure
@@ -1,6 +1,6 @@
 #! /bin/sh
 # Guess values for system-dependent variables and create Makefiles.
-# Generated by GNU Autoconf 2.60 for LLVM 3.4.
+# Generated by GNU Autoconf 2.60 for LLVM 3.4.1.
 #
 # Report bugs to <http://llvm.org/bugs/>.
 #
@@ -561,8 +561,8 @@ SHELL=${CONFIG_SHELL-/bin/sh}
 # Identity of this package.
 PACKAGE_NAME='LLVM'
 PACKAGE_TARNAME='llvm'
-PACKAGE_VERSION='3.4'
-PACKAGE_STRING='LLVM 3.4'
+PACKAGE_VERSION='3.4.1'
+PACKAGE_STRING='LLVM 3.4.1'
 PACKAGE_BUGREPORT='http://llvm.org/bugs/'
 
 ac_unique_file="lib/IR/Module.cpp"
@@ -639,6 +639,10 @@ LIBS
 build_alias
 host_alias
 target_alias
+LLVM_VERSION_MAJOR
+LLVM_VERSION_MINOR
+LLVM_VERSION_PATCH
+LLVM_VERSION_SUFFIX
 LLVM_COPYRIGHT
 CC
 CFLAGS
@@ -1330,7 +1334,7 @@ if test "$ac_init_help" = "long"; then
   # Omit some internal or obsolete options to make the list less imposing.
   # This message is too long to be a string in the A/UX 3.1 sh.
   cat <<_ACEOF
-\`configure' configures LLVM 3.4 to adapt to many kinds of systems.
+\`configure' configures LLVM 3.4.1 to adapt to many kinds of systems.
 
 Usage: $0 [OPTION]... [VAR=VALUE]...
 
@@ -1396,7 +1400,7 @@ fi
 
 if test -n "$ac_init_help"; then
   case $ac_init_help in
-     short | recursive ) echo "Configuration of LLVM 3.4:";;
+     short | recursive ) echo "Configuration of LLVM 3.4.1:";;
    esac
   cat <<\_ACEOF
 
@@ -1564,7 +1568,7 @@ fi
 test -n "$ac_init_help" && exit $ac_status
 if $ac_init_version; then
   cat <<\_ACEOF
-LLVM configure 3.4
+LLVM configure 3.4.1
 generated by GNU Autoconf 2.60
 
 Copyright (C) 1992, 1993, 1994, 1995, 1996, 1998, 1999, 2000, 2001,
@@ -1580,7 +1584,7 @@ cat >config.log <<_ACEOF
 This file contains any messages produced by compilers while
 running configure, to aid debugging if configure makes a mistake.
 
-It was created by LLVM $as_me 3.4, which was
+It was created by LLVM $as_me 3.4.1, which was
 generated by GNU Autoconf 2.60.  Invocation command line was
 
   $ $0 $@
@@ -1934,16 +1938,32 @@ ac_compiler_gnu=$ac_cv_c_compiler_gnu
 
 
 
-cat >>confdefs.h <<\_ACEOF
-#define LLVM_VERSION_MAJOR 3
+LLVM_VERSION_MAJOR=3
+LLVM_VERSION_MINOR=4
+LLVM_VERSION_PATCH=1
+LLVM_VERSION_SUFFIX=
+
+
+cat >>confdefs.h <<_ACEOF
+#define LLVM_VERSION_MAJOR $LLVM_VERSION_MAJOR
 _ACEOF
 
 
-cat >>confdefs.h <<\_ACEOF
-#define LLVM_VERSION_MINOR 4
+cat >>confdefs.h <<_ACEOF
+#define LLVM_VERSION_MINOR $LLVM_VERSION_MINOR
+_ACEOF
+
+
+cat >>confdefs.h <<_ACEOF
+#define LLVM_VERSION_PATCH $LLVM_VERSION_PATCH
 _ACEOF
 
 
+
+
+
+
+
 LLVM_COPYRIGHT="Copyright (c) 2003-2013 University of Illinois at Urbana-Champaign."
 
 
@@ -8825,7 +8845,9 @@ if test "${enable_ltdl_install+set}" = set; then
 fi
 
 
- if test x"${enable_ltdl_install-no}" != xno; then
+
+
+if test x"${enable_ltdl_install-no}" != xno; then
   INSTALL_LTDL_TRUE=
   INSTALL_LTDL_FALSE='#'
 else
@@ -8833,7 +8855,9 @@ else
   INSTALL_LTDL_FALSE=
 fi
 
- if test x"${enable_ltdl_convenience-no}" != xno; then
+
+
+if test x"${enable_ltdl_convenience-no}" != xno; then
   CONVENIENCE_LTDL_TRUE=
   CONVENIENCE_LTDL_FALSE='#'
 else
@@ -10582,7 +10606,7 @@ else
   lt_dlunknown=0; lt_dlno_uscore=1; lt_dlneed_uscore=2
   lt_status=$lt_dlunknown
   cat > conftest.$ac_ext <<EOF
-#line 10585 "configure"
+#line 10609 "configure"
 #include "confdefs.h"
 
 #if HAVE_DLFCN_H
@@ -22745,7 +22769,7 @@ exec 6>&1
 # report actual input values of CONFIG_FILES etc. instead of their
 # values after options handling.
 ac_log="
-This file was extended by LLVM $as_me 3.4, which was
+This file was extended by LLVM $as_me 3.4.1, which was
 generated by GNU Autoconf 2.60.  Invocation command line was
 
   CONFIG_FILES    = $CONFIG_FILES
@@ -22798,7 +22822,7 @@ Report bugs to <bug-autoconf@gnu.org>."
 _ACEOF
 cat >>$CONFIG_STATUS <<_ACEOF
 ac_cs_version="\\
-LLVM config.status 3.4
+LLVM config.status 3.4.1
 configured by $0, generated by GNU Autoconf 2.60,
   with options \\"`echo "$ac_configure_args" | sed 's/^ //; s/[\\""\`\$]/\\\\&/g'`\\"
 
@@ -23036,6 +23060,10 @@ LIBS!$LIBS$ac_delim
 build_alias!$build_alias$ac_delim
 host_alias!$host_alias$ac_delim
 target_alias!$target_alias$ac_delim
+LLVM_VERSION_MAJOR!$LLVM_VERSION_MAJOR$ac_delim
+LLVM_VERSION_MINOR!$LLVM_VERSION_MINOR$ac_delim
+LLVM_VERSION_PATCH!$LLVM_VERSION_PATCH$ac_delim
+LLVM_VERSION_SUFFIX!$LLVM_VERSION_SUFFIX$ac_delim
 LLVM_COPYRIGHT!$LLVM_COPYRIGHT$ac_delim
 CC!$CC$ac_delim
 CFLAGS!$CFLAGS$ac_delim
@@ -23092,10 +23120,6 @@ DISABLE_ASSERTIONS!$DISABLE_ASSERTIONS$ac_delim
 ENABLE_WERROR!$ENABLE_WERROR$ac_delim
 ENABLE_EXPENSIVE_CHECKS!$ENABLE_EXPENSIVE_CHECKS$ac_delim
 EXPENSIVE_CHECKS!$EXPENSIVE_CHECKS$ac_delim
-DEBUG_RUNTIME!$DEBUG_RUNTIME$ac_delim
-DEBUG_SYMBOLS!$DEBUG_SYMBOLS$ac_delim
-KEEP_SYMBOLS!$KEEP_SYMBOLS$ac_delim
-JIT!$JIT$ac_delim
 _ACEOF
 
   if test `sed -n "s/.*$ac_delim\$/X/p" conf$$subs.sed | grep -c X` = 97; then
@@ -23137,6 +23161,10 @@ _ACEOF
 ac_delim='%!_!# '
 for ac_last_try in false false false false false :; do
   cat >conf$$subs.sed <<_ACEOF
+DEBUG_RUNTIME!$DEBUG_RUNTIME$ac_delim
+DEBUG_SYMBOLS!$DEBUG_SYMBOLS$ac_delim
+KEEP_SYMBOLS!$KEEP_SYMBOLS$ac_delim
+JIT!$JIT$ac_delim
 TARGET_HAS_JIT!$TARGET_HAS_JIT$ac_delim
 ENABLE_DOCS!$ENABLE_DOCS$ac_delim
 ENABLE_DOXYGEN!$ENABLE_DOXYGEN$ac_delim
@@ -23230,10 +23258,6 @@ LLVM_ETCDIR!$LLVM_ETCDIR$ac_delim
 LLVM_INCLUDEDIR!$LLVM_INCLUDEDIR$ac_delim
 LLVM_INFODIR!$LLVM_INFODIR$ac_delim
 LLVM_MANDIR!$LLVM_MANDIR$ac_delim
-LLVM_CONFIGTIME!$LLVM_CONFIGTIME$ac_delim
-BINDINGS_TO_BUILD!$BINDINGS_TO_BUILD$ac_delim
-ALL_BINDINGS!$ALL_BINDINGS$ac_delim
-OCAML_LIBDIR!$OCAML_LIBDIR$ac_delim
 _ACEOF
 
   if test `sed -n "s/.*$ac_delim\$/X/p" conf$$subs.sed | grep -c X` = 97; then
@@ -23275,6 +23299,10 @@ _ACEOF
 ac_delim='%!_!# '
 for ac_last_try in false false false false false :; do
   cat >conf$$subs.sed <<_ACEOF
+LLVM_CONFIGTIME!$LLVM_CONFIGTIME$ac_delim
+BINDINGS_TO_BUILD!$BINDINGS_TO_BUILD$ac_delim
+ALL_BINDINGS!$ALL_BINDINGS$ac_delim
+OCAML_LIBDIR!$OCAML_LIBDIR$ac_delim
 ENABLE_VISIBILITY_INLINES_HIDDEN!$ENABLE_VISIBILITY_INLINES_HIDDEN$ac_delim
 RPATH!$RPATH$ac_delim
 RDYNAMIC!$RDYNAMIC$ac_delim
@@ -23283,7 +23311,7 @@ LIBOBJS!$LIBOBJS$ac_delim
 LTLIBOBJS!$LTLIBOBJS$ac_delim
 _ACEOF
 
-  if test `sed -n "s/.*$ac_delim\$/X/p" conf$$subs.sed | grep -c X` = 6; then
+  if test `sed -n "s/.*$ac_delim\$/X/p" conf$$subs.sed | grep -c X` = 10; then
     break
   elif $ac_last_try; then
     { { echo "$as_me:$LINENO: error: could not make $CONFIG_STATUS" >&5
diff --git a/include/llvm/Config/config.h.in b/include/llvm/Config/config.h.in
index dcec8f8cd1ea..2317823349ee 100644
--- a/include/llvm/Config/config.h.in
+++ b/include/llvm/Config/config.h.in
@@ -3,9 +3,6 @@
 #ifndef CONFIG_H
 #define CONFIG_H
 
-/* Define if building universal (internal helper macro) */
-#undef AC_APPLE_UNIVERSAL_BUILD
-
 /* Bug report URL. */
 #undef BUG_REPORT_URL
 
@@ -641,6 +638,9 @@
 /* Minor version of the LLVM API */
 #undef LLVM_VERSION_MINOR
 
+/* Patch version of the LLVM API */
+#undef LLVM_VERSION_PATCH
+
 /* Define if the OS needs help to load dependent libraries for dlopen(). */
 #undef LTDL_DLOPEN_DEPLIBS
 
@@ -673,9 +673,6 @@
 /* Define to the one symbol short name of this package. */
 #undef PACKAGE_TARNAME
 
-/* Define to the home page for this package. */
-#undef PACKAGE_URL
-
 /* Define to the version of this package. */
 #undef PACKAGE_VERSION
 
@@ -700,18 +697,6 @@
 /* Type of 1st arg on ELM Callback */
 #undef WIN32_ELMCB_PCSTR
 
-/* Define WORDS_BIGENDIAN to 1 if your processor stores words with the most
-   significant byte first (like Motorola and SPARC, unlike Intel). */
-#if defined AC_APPLE_UNIVERSAL_BUILD
-# if defined __BIG_ENDIAN__
-#  define WORDS_BIGENDIAN 1
-# endif
-#else
-# ifndef WORDS_BIGENDIAN
-#  undef WORDS_BIGENDIAN
-# endif
-#endif
-
 /* Define to empty if `const' does not conform to ANSI C. */
 #undef const
 
diff --git a/include/llvm/IR/IntrinsicsX86.td b/include/llvm/IR/IntrinsicsX86.td
index 4c5718f8c8b5..46c32ae2fd00 100644
--- a/include/llvm/IR/IntrinsicsX86.td
+++ b/include/llvm/IR/IntrinsicsX86.td
@@ -1758,68 +1758,68 @@ let TargetPrefix = "x86" in {  // All intrinsics start with "llvm.x86.".
   def int_x86_avx2_gather_d_pd : GCCBuiltin<"__builtin_ia32_gatherd_pd">,
       Intrinsic<[llvm_v2f64_ty],
         [llvm_v2f64_ty, llvm_ptr_ty, llvm_v4i32_ty, llvm_v2f64_ty, llvm_i8_ty],
-        [IntrReadMem]>;
+        [IntrReadArgMem]>;
   def int_x86_avx2_gather_d_pd_256 : GCCBuiltin<"__builtin_ia32_gatherd_pd256">,
       Intrinsic<[llvm_v4f64_ty],
         [llvm_v4f64_ty, llvm_ptr_ty, llvm_v4i32_ty, llvm_v4f64_ty, llvm_i8_ty],
-        [IntrReadMem]>;
+        [IntrReadArgMem]>;
   def int_x86_avx2_gather_q_pd : GCCBuiltin<"__builtin_ia32_gatherq_pd">,
       Intrinsic<[llvm_v2f64_ty],
         [llvm_v2f64_ty, llvm_ptr_ty, llvm_v2i64_ty, llvm_v2f64_ty, llvm_i8_ty],
-        [IntrReadMem]>;
+        [IntrReadArgMem]>;
   def int_x86_avx2_gather_q_pd_256 : GCCBuiltin<"__builtin_ia32_gatherq_pd256">,
       Intrinsic<[llvm_v4f64_ty],
         [llvm_v4f64_ty, llvm_ptr_ty, llvm_v4i64_ty, llvm_v4f64_ty, llvm_i8_ty],
-        [IntrReadMem]>;
+        [IntrReadArgMem]>;
   def int_x86_avx2_gather_d_ps : GCCBuiltin<"__builtin_ia32_gatherd_ps">,
       Intrinsic<[llvm_v4f32_ty],
         [llvm_v4f32_ty, llvm_ptr_ty, llvm_v4i32_ty, llvm_v4f32_ty, llvm_i8_ty],
-        [IntrReadMem]>;
+        [IntrReadArgMem]>;
   def int_x86_avx2_gather_d_ps_256 : GCCBuiltin<"__builtin_ia32_gatherd_ps256">,
       Intrinsic<[llvm_v8f32_ty],
         [llvm_v8f32_ty, llvm_ptr_ty, llvm_v8i32_ty, llvm_v8f32_ty, llvm_i8_ty],
-        [IntrReadMem]>;
+        [IntrReadArgMem]>;
   def int_x86_avx2_gather_q_ps : GCCBuiltin<"__builtin_ia32_gatherq_ps">,
       Intrinsic<[llvm_v4f32_ty],
         [llvm_v4f32_ty, llvm_ptr_ty, llvm_v2i64_ty, llvm_v4f32_ty, llvm_i8_ty],
-        [IntrReadMem]>;
+        [IntrReadArgMem]>;
   def int_x86_avx2_gather_q_ps_256 : GCCBuiltin<"__builtin_ia32_gatherq_ps256">,
       Intrinsic<[llvm_v4f32_ty],
         [llvm_v4f32_ty, llvm_ptr_ty, llvm_v4i64_ty, llvm_v4f32_ty, llvm_i8_ty],
-        [IntrReadMem]>;
+        [IntrReadArgMem]>;
 
   def int_x86_avx2_gather_d_q : GCCBuiltin<"__builtin_ia32_gatherd_q">,
       Intrinsic<[llvm_v2i64_ty],
         [llvm_v2i64_ty, llvm_ptr_ty, llvm_v4i32_ty, llvm_v2i64_ty, llvm_i8_ty],
-        [IntrReadMem]>;
+        [IntrReadArgMem]>;
   def int_x86_avx2_gather_d_q_256 : GCCBuiltin<"__builtin_ia32_gatherd_q256">,
       Intrinsic<[llvm_v4i64_ty],
         [llvm_v4i64_ty, llvm_ptr_ty, llvm_v4i32_ty, llvm_v4i64_ty, llvm_i8_ty],
-        [IntrReadMem]>;
+        [IntrReadArgMem]>;
   def int_x86_avx2_gather_q_q : GCCBuiltin<"__builtin_ia32_gatherq_q">,
       Intrinsic<[llvm_v2i64_ty],
         [llvm_v2i64_ty, llvm_ptr_ty, llvm_v2i64_ty, llvm_v2i64_ty, llvm_i8_ty],
-        [IntrReadMem]>;
+        [IntrReadArgMem]>;
   def int_x86_avx2_gather_q_q_256 : GCCBuiltin<"__builtin_ia32_gatherq_q256">,
       Intrinsic<[llvm_v4i64_ty],
         [llvm_v4i64_ty, llvm_ptr_ty, llvm_v4i64_ty, llvm_v4i64_ty, llvm_i8_ty],
-        [IntrReadMem]>;
+        [IntrReadArgMem]>;
   def int_x86_avx2_gather_d_d : GCCBuiltin<"__builtin_ia32_gatherd_d">,
       Intrinsic<[llvm_v4i32_ty],
         [llvm_v4i32_ty, llvm_ptr_ty, llvm_v4i32_ty, llvm_v4i32_ty, llvm_i8_ty],
-        [IntrReadMem]>;
+        [IntrReadArgMem]>;
   def int_x86_avx2_gather_d_d_256 : GCCBuiltin<"__builtin_ia32_gatherd_d256">,
       Intrinsic<[llvm_v8i32_ty],
         [llvm_v8i32_ty, llvm_ptr_ty, llvm_v8i32_ty, llvm_v8i32_ty, llvm_i8_ty],
-        [IntrReadMem]>;
+        [IntrReadArgMem]>;
   def int_x86_avx2_gather_q_d : GCCBuiltin<"__builtin_ia32_gatherq_d">,
       Intrinsic<[llvm_v4i32_ty],
         [llvm_v4i32_ty, llvm_ptr_ty, llvm_v2i64_ty, llvm_v4i32_ty, llvm_i8_ty],
-        [IntrReadMem]>;
+        [IntrReadArgMem]>;
   def int_x86_avx2_gather_q_d_256 : GCCBuiltin<"__builtin_ia32_gatherq_d256">,
       Intrinsic<[llvm_v4i32_ty],
         [llvm_v4i32_ty, llvm_ptr_ty, llvm_v4i64_ty, llvm_v4i32_ty, llvm_i8_ty],
-        [IntrReadMem]>;
+        [IntrReadArgMem]>;
 }
 
 // Misc.
@@ -2909,28 +2909,28 @@ let TargetPrefix = "x86" in {
   def int_x86_avx512_gather_dpd_mask_512  : GCCBuiltin<"__builtin_ia32_mask_gatherdpd512">,
           Intrinsic<[llvm_v8f64_ty], [llvm_v8f64_ty, llvm_i8_ty,
                      llvm_v8i32_ty, llvm_ptr_ty, llvm_i32_ty],
-                    [IntrReadMem]>;
+                    [IntrReadArgMem]>;
   def int_x86_avx512_gather_dps_mask_512  : GCCBuiltin<"__builtin_ia32_mask_gatherdps512">,
           Intrinsic<[llvm_v16f32_ty], [llvm_v16f32_ty, llvm_i16_ty,
                      llvm_v16i32_ty, llvm_ptr_ty, llvm_i32_ty],
-                    [IntrReadMem]>;
+                    [IntrReadArgMem]>;
   def int_x86_avx512_gather_qpd_mask_512  : GCCBuiltin<"__builtin_ia32_mask_gatherqpd512">,
           Intrinsic<[llvm_v8f64_ty], [llvm_v8f64_ty, llvm_i8_ty,
                      llvm_v8i64_ty, llvm_ptr_ty, llvm_i32_ty],
-                    [IntrReadMem]>;
+                    [IntrReadArgMem]>;
   def int_x86_avx512_gather_qps_mask_512  : GCCBuiltin<"__builtin_ia32_mask_gatherqps512">,
           Intrinsic<[llvm_v8f32_ty], [llvm_v8f32_ty, llvm_i8_ty,
                      llvm_v8i64_ty, llvm_ptr_ty, llvm_i32_ty],
-                    [IntrReadMem]>;
+                    [IntrReadArgMem]>;
 
   def int_x86_avx512_gather_dpd_512  : GCCBuiltin<"__builtin_ia32_gatherdpd512">,
           Intrinsic<[llvm_v8f64_ty], [llvm_v8i32_ty, llvm_ptr_ty,
                      llvm_i32_ty],
-                    [IntrReadMem]>;
+                    [IntrReadArgMem]>;
   def int_x86_avx512_gather_dps_512  : GCCBuiltin<"__builtin_ia32_gatherdps512">,
           Intrinsic<[llvm_v16f32_ty], [llvm_v16i32_ty, llvm_ptr_ty,
                      llvm_i32_ty],
-                    [IntrReadMem]>;
+                    [IntrReadArgMem]>;
   def int_x86_avx512_gather_qpd_512  : GCCBuiltin<"__builtin_ia32_gatherqpd512">,
           Intrinsic<[llvm_v8f64_ty], [llvm_v8i64_ty, llvm_ptr_ty,
                      llvm_i32_ty],
@@ -2938,12 +2938,12 @@ let TargetPrefix = "x86" in {
   def int_x86_avx512_gather_qps_512  : GCCBuiltin<"__builtin_ia32_gatherqps512">,
           Intrinsic<[llvm_v8f32_ty], [llvm_v8i64_ty, llvm_ptr_ty, 
                      llvm_i32_ty],
-                    [IntrReadMem]>;
+                    [IntrReadArgMem]>;
 
   def int_x86_avx512_gather_dpq_mask_512  : GCCBuiltin<"__builtin_ia32_mask_gatherdpq512">,
           Intrinsic<[llvm_v8i64_ty], [llvm_v8i64_ty, llvm_i8_ty,
                      llvm_v8i32_ty, llvm_ptr_ty, llvm_i32_ty],
-                    [IntrReadMem]>;
+                    [IntrReadArgMem]>;
   def int_x86_avx512_gather_dpi_mask_512  : GCCBuiltin<"__builtin_ia32_mask_gatherdpi512">,
           Intrinsic<[llvm_v16i32_ty], [llvm_v16i32_ty, llvm_i16_ty,
                      llvm_v16i32_ty, llvm_ptr_ty, llvm_i32_ty],
@@ -2955,7 +2955,7 @@ let TargetPrefix = "x86" in {
   def int_x86_avx512_gather_qpi_mask_512  : GCCBuiltin<"__builtin_ia32_mask_gatherqpi512">,
           Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_i8_ty,
                      llvm_v8i64_ty, llvm_ptr_ty, llvm_i32_ty],
-                    [IntrReadMem]>;
+                    [IntrReadArgMem]>;
 
   def int_x86_avx512_gather_dpq_512  : GCCBuiltin<"__builtin_ia32_gatherdpq512">,
           Intrinsic<[llvm_v8i64_ty], [llvm_v8i32_ty, llvm_ptr_ty,
diff --git a/include/llvm/MC/MCAsmInfo.h b/include/llvm/MC/MCAsmInfo.h
index 7a9939462147..9845623d4716 100644
--- a/include/llvm/MC/MCAsmInfo.h
+++ b/include/llvm/MC/MCAsmInfo.h
@@ -266,13 +266,16 @@ namespace llvm {
     /// global as being a weak undefined symbol.
     const char *WeakRefDirective;            // Defaults to NULL.
 
-    /// WeakDefDirective - This directive, if non-null, is used to declare a
-    /// global as being a weak defined symbol.
-    const char *WeakDefDirective;            // Defaults to NULL.
+    /// True if we have a directive to declare a global as being a weak
+    /// defined symbol.
+    bool HasWeakDefDirective;                // Defaults to false.
 
-    /// LinkOnceDirective - This directive, if non-null is used to declare a
-    /// global as being a weak defined symbol.  This is used on cygwin/mingw.
-    const char *LinkOnceDirective;           // Defaults to NULL.
+    /// True if we have a directive to declare a global as being a weak
+    /// defined symbol that can be hidden (unexported).
+    bool HasWeakDefCanBeHiddenDirective;     // Defaults to false.
+
+    /// True if we have a .linkonce directive.  This is used on cygwin/mingw.
+    bool HasLinkOnceDirective;               // Defaults to false.
 
     /// HiddenVisibilityAttr - This attribute, if not MCSA_Invalid, is used to
     /// declare a symbol as having hidden visibility.
@@ -303,6 +306,10 @@ namespace llvm {
     /// uses relocations for references to other .debug_* sections.
     bool DwarfUsesRelocationsAcrossSections;
 
+    /// DwarfFDESymbolsUseAbsDiff - true if DWARF FDE symbol reference
+    /// relocations should be replaced by an absolute difference.
+    bool DwarfFDESymbolsUseAbsDiff;
+
     /// DwarfRegNumForCFI - True if dwarf register numbers are printed
     /// instead of symbolic register names in .cfi_* directives.
     bool DwarfRegNumForCFI;  // Defaults to false;
@@ -497,8 +504,11 @@ namespace llvm {
     bool hasIdentDirective() const { return HasIdentDirective; }
     bool hasNoDeadStrip() const { return HasNoDeadStrip; }
     const char *getWeakRefDirective() const { return WeakRefDirective; }
-    const char *getWeakDefDirective() const { return WeakDefDirective; }
-    const char *getLinkOnceDirective() const { return LinkOnceDirective; }
+    bool hasWeakDefDirective() const { return HasWeakDefDirective; }
+    bool hasWeakDefCanBeHiddenDirective() const {
+      return HasWeakDefCanBeHiddenDirective;
+    }
+    bool hasLinkOnceDirective() const { return HasLinkOnceDirective; }
 
     MCSymbolAttr getHiddenVisibilityAttr() const { return HiddenVisibilityAttr;}
     MCSymbolAttr getHiddenDeclarationVisibilityAttr() const {
@@ -528,6 +538,9 @@ namespace llvm {
     bool doesDwarfUseRelocationsAcrossSections() const {
       return DwarfUsesRelocationsAcrossSections;
     }
+    bool doDwarfFDESymbolsUseAbsDiff() const {
+      return DwarfFDESymbolsUseAbsDiff;
+    }
     bool useDwarfRegNumForCFI() const {
       return DwarfRegNumForCFI;
     }
diff --git a/lib/Analysis/BasicAliasAnalysis.cpp b/lib/Analysis/BasicAliasAnalysis.cpp
index b2c20110e90e..d0e186c5f23f 100644
--- a/lib/Analysis/BasicAliasAnalysis.cpp
+++ b/lib/Analysis/BasicAliasAnalysis.cpp
@@ -18,7 +18,10 @@
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/Analysis/AliasAnalysis.h"
 #include "llvm/Analysis/CaptureTracking.h"
+#include "llvm/Analysis/CFG.h"
+#include "llvm/Analysis/Dominators.h"
 #include "llvm/Analysis/InstructionSimplify.h"
+#include "llvm/Analysis/LoopInfo.h"
 #include "llvm/Analysis/MemoryBuiltins.h"
 #include "llvm/Analysis/ValueTracking.h"
 #include "llvm/IR/Constants.h"
@@ -38,6 +41,12 @@
 #include <algorithm>
 using namespace llvm;
 
+/// Cutoff after which to stop analysing a set of phi nodes potentially involved
+/// in a cycle. Because we are analysing 'through' phi nodes we need to be
+/// careful with value equivalence. We use reachability to make sure a value
+/// cannot be involved in a cycle.
+const unsigned MaxNumPhiBBsValueReachabilityCheck = 20;
+
 //===----------------------------------------------------------------------===//
 // Useful predicates
 //===----------------------------------------------------------------------===//
@@ -403,42 +412,6 @@ DecomposeGEPExpression(const Value *V, int64_t &BaseOffs,
   return V;
 }
 
-/// GetIndexDifference - Dest and Src are the variable indices from two
-/// decomposed GetElementPtr instructions GEP1 and GEP2 which have common base
-/// pointers.  Subtract the GEP2 indices from GEP1 to find the symbolic
-/// difference between the two pointers.
-static void GetIndexDifference(SmallVectorImpl<VariableGEPIndex> &Dest,
-                               const SmallVectorImpl<VariableGEPIndex> &Src) {
-  if (Src.empty()) return;
-
-  for (unsigned i = 0, e = Src.size(); i != e; ++i) {
-    const Value *V = Src[i].V;
-    ExtensionKind Extension = Src[i].Extension;
-    int64_t Scale = Src[i].Scale;
-
-    // Find V in Dest.  This is N^2, but pointer indices almost never have more
-    // than a few variable indexes.
-    for (unsigned j = 0, e = Dest.size(); j != e; ++j) {
-      if (Dest[j].V != V || Dest[j].Extension != Extension) continue;
-
-      // If we found it, subtract off Scale V's from the entry in Dest.  If it
-      // goes to zero, remove the entry.
-      if (Dest[j].Scale != Scale)
-        Dest[j].Scale -= Scale;
-      else
-        Dest.erase(Dest.begin()+j);
-      Scale = 0;
-      break;
-    }
-
-    // If we didn't consume this entry, add it to the end of the Dest list.
-    if (Scale) {
-      VariableGEPIndex Entry = { V, Extension, -Scale };
-      Dest.push_back(Entry);
-    }
-  }
-}
-
 //===----------------------------------------------------------------------===//
 // BasicAliasAnalysis Pass
 //===----------------------------------------------------------------------===//
@@ -492,6 +465,7 @@ namespace {
       // SmallDenseMap if it ever grows larger.
       // FIXME: This should really be shrink_to_inline_capacity_and_clear().
       AliasCache.shrink_and_clear();
+      VisitedPhiBBs.clear();
       return Alias;
     }
 
@@ -532,9 +506,39 @@ namespace {
     typedef SmallDenseMap<LocPair, AliasResult, 8> AliasCacheTy;
     AliasCacheTy AliasCache;
 
+    /// \brief Track phi nodes we have visited. When interpret "Value" pointer
+    /// equality as value equality we need to make sure that the "Value" is not
+    /// part of a cycle. Otherwise, two uses could come from different
+    /// "iterations" of a cycle and see different values for the same "Value"
+    /// pointer.
+    /// The following example shows the problem:
+    ///   %p = phi(%alloca1, %addr2)
+    ///   %l = load %ptr
+    ///   %addr1 = gep, %alloca2, 0, %l
+    ///   %addr2 = gep  %alloca2, 0, (%l + 1)
+    ///      alias(%p, %addr1) -> MayAlias !
+    ///   store %l, ...
+    SmallPtrSet<const BasicBlock*, 8> VisitedPhiBBs;
+
     // Visited - Track instructions visited by pointsToConstantMemory.
     SmallPtrSet<const Value*, 16> Visited;
 
+    /// \brief Check whether two Values can be considered equivalent.
+    ///
+    /// In addition to pointer equivalence of \p V1 and \p V2 this checks
+    /// whether they can not be part of a cycle in the value graph by looking at
+    /// all visited phi nodes an making sure that the phis cannot reach the
+    /// value. We have to do this because we are looking through phi nodes (That
+    /// is we say noalias(V, phi(VA, VB)) if noalias(V, VA) and noalias(V, VB).
+    bool isValueEqualInPotentialCycles(const Value *V1, const Value *V2);
+
+    /// \brief Dest and Src are the variable indices from two decomposed
+    /// GetElementPtr instructions GEP1 and GEP2 which have common base
+    /// pointers.  Subtract the GEP2 indices from GEP1 to find the symbolic
+    /// difference between the two pointers.
+    void GetIndexDifference(SmallVectorImpl<VariableGEPIndex> &Dest,
+                            const SmallVectorImpl<VariableGEPIndex> &Src);
+
     // aliasGEP - Provide a bunch of ad-hoc rules to disambiguate a GEP
     // instruction against another.
     AliasResult aliasGEP(const GEPOperator *V1, uint64_t V1Size,
@@ -1005,7 +1009,15 @@ BasicAliasAnalysis::aliasGEP(const GEPOperator *GEP1, uint64_t V1Size,
         return NoAlias;
       }
     } else {
-      if (V1Size != UnknownSize) {
+      // We have the situation where:
+      // +                +
+      // | BaseOffset     |
+      // ---------------->|
+      // |-->V1Size       |-------> V2Size
+      // GEP1             V2
+      // We need to know that V2Size is not unknown, otherwise we might have
+      // stripped a gep with negative index ('gep <ptr>, -1, ...).
+      if (V1Size != UnknownSize && V2Size != UnknownSize) {
         if (-(uint64_t)GEP1BaseOffset < V1Size)
           return PartialAlias;
         return NoAlias;
@@ -1094,6 +1106,10 @@ BasicAliasAnalysis::aliasPHI(const PHINode *PN, uint64_t PNSize,
                              const MDNode *PNTBAAInfo,
                              const Value *V2, uint64_t V2Size,
                              const MDNode *V2TBAAInfo) {
+  // Track phi nodes we have visited. We use this information when we determine
+  // value equivalence.
+  VisitedPhiBBs.insert(PN->getParent());
+
   // If the values are PHIs in the same block, we can do a more precise
   // as well as efficient check: just check for aliases between the values
   // on corresponding edges.
@@ -1187,7 +1203,13 @@ BasicAliasAnalysis::aliasCheck(const Value *V1, uint64_t V1Size,
   V2 = V2->stripPointerCasts();
 
   // Are we checking for alias of the same value?
-  if (V1 == V2) return MustAlias;
+  // Because we look 'through' phi nodes we could look at "Value" pointers from
+  // different iterations. We must therefore make sure that this is not the
+  // case. The function isValueEqualInPotentialCycles ensures that this cannot
+  // happen by looking at the visited phi nodes and making sure they cannot
+  // reach the value.
+  if (isValueEqualInPotentialCycles(V1, V2))
+    return MustAlias;
 
   if (!V1->getType()->isPointerTy() || !V2->getType()->isPointerTy())
     return NoAlias;  // Scalars cannot alias each other
@@ -1307,3 +1329,71 @@ BasicAliasAnalysis::aliasCheck(const Value *V1, uint64_t V1Size,
                          Location(V2, V2Size, V2TBAAInfo));
   return AliasCache[Locs] = Result;
 }
+
+bool BasicAliasAnalysis::isValueEqualInPotentialCycles(const Value *V,
+                                                       const Value *V2) {
+  if (V != V2)
+    return false;
+
+  const Instruction *Inst = dyn_cast<Instruction>(V);
+  if (!Inst)
+    return true;
+
+  if (VisitedPhiBBs.size() > MaxNumPhiBBsValueReachabilityCheck)
+    return false;
+
+  // Use dominance or loop info if available.
+  DominatorTree *DT = getAnalysisIfAvailable<DominatorTree>();
+  LoopInfo *LI = getAnalysisIfAvailable<LoopInfo>();
+
+  // Make sure that the visited phis cannot reach the Value. This ensures that
+  // the Values cannot come from different iterations of a potential cycle the
+  // phi nodes could be involved in.
+  for (SmallPtrSet<const BasicBlock *, 8>::iterator PI = VisitedPhiBBs.begin(),
+                                                    PE = VisitedPhiBBs.end();
+       PI != PE; ++PI)
+    if (isPotentiallyReachable((*PI)->begin(), Inst, DT, LI))
+      return false;
+
+  return true;
+}
+
+/// GetIndexDifference - Dest and Src are the variable indices from two
+/// decomposed GetElementPtr instructions GEP1 and GEP2 which have common base
+/// pointers.  Subtract the GEP2 indices from GEP1 to find the symbolic
+/// difference between the two pointers.
+void BasicAliasAnalysis::GetIndexDifference(
+    SmallVectorImpl<VariableGEPIndex> &Dest,
+    const SmallVectorImpl<VariableGEPIndex> &Src) {
+  if (Src.empty())
+    return;
+
+  for (unsigned i = 0, e = Src.size(); i != e; ++i) {
+    const Value *V = Src[i].V;
+    ExtensionKind Extension = Src[i].Extension;
+    int64_t Scale = Src[i].Scale;
+
+    // Find V in Dest.  This is N^2, but pointer indices almost never have more
+    // than a few variable indexes.
+    for (unsigned j = 0, e = Dest.size(); j != e; ++j) {
+      if (!isValueEqualInPotentialCycles(Dest[j].V, V) ||
+          Dest[j].Extension != Extension)
+        continue;
+
+      // If we found it, subtract off Scale V's from the entry in Dest.  If it
+      // goes to zero, remove the entry.
+      if (Dest[j].Scale != Scale)
+        Dest[j].Scale -= Scale;
+      else
+        Dest.erase(Dest.begin() + j);
+      Scale = 0;
+      break;
+    }
+
+    // If we didn't consume this entry, add it to the end of the Dest list.
+    if (Scale) {
+      VariableGEPIndex Entry = { V, Extension, -Scale };
+      Dest.push_back(Entry);
+    }
+  }
+}
diff --git a/lib/Analysis/IVUsers.cpp b/lib/Analysis/IVUsers.cpp
index b33e2cb9999e..5a06cdce3085 100644
--- a/lib/Analysis/IVUsers.cpp
+++ b/lib/Analysis/IVUsers.cpp
@@ -187,15 +187,34 @@ bool IVUsers::AddUsersImpl(Instruction *I,
 
     if (AddUserToIVUsers) {
       // Okay, we found a user that we cannot reduce.
-      IVUses.push_back(new IVStrideUse(this, User, I));
-      IVStrideUse &NewUse = IVUses.back();
+      IVStrideUse &NewUse = AddUser(User, I);
       // Autodetect the post-inc loop set, populating NewUse.PostIncLoops.
       // The regular return value here is discarded; instead of recording
       // it, we just recompute it when we need it.
+      const SCEV *OriginalISE = ISE;
       ISE = TransformForPostIncUse(NormalizeAutodetect,
                                    ISE, User, I,
                                    NewUse.PostIncLoops,
                                    *SE, *DT);
+
+      // PostIncNormalization effectively simplifies the expression under
+      // pre-increment assumptions. Those assumptions (no wrapping) might not
+      // hold for the post-inc value. Catch such cases by making sure the
+      // transformation is invertible.
+      if (OriginalISE != ISE) {
+        const SCEV *DenormalizedISE =
+          TransformForPostIncUse(Denormalize, ISE, User, I,
+              NewUse.PostIncLoops, *SE, *DT);
+
+        // If we normalized the expression, but denormalization doesn't give the
+        // original one, discard this user.
+        if (OriginalISE != DenormalizedISE) {
+          DEBUG(dbgs() << "   DISCARDING (NORMALIZATION ISN'T INVERTIBLE): "
+                       << *ISE << '\n');
+          IVUses.pop_back();
+          return false;
+        }
+      }
       DEBUG(if (SE->getSCEV(I) != ISE)
               dbgs() << "   NORMALIZED TO: " << *ISE << '\n');
     }
diff --git a/lib/Analysis/ScalarEvolution.cpp b/lib/Analysis/ScalarEvolution.cpp
index 0a02f4e9d747..d9b696e0798f 100644
--- a/lib/Analysis/ScalarEvolution.cpp
+++ b/lib/Analysis/ScalarEvolution.cpp
@@ -6218,7 +6218,7 @@ bool ScalarEvolution::isImpliedCond(ICmpInst::Predicate Pred,
   // LHS' type is checked for above.
   if (getTypeSizeInBits(LHS->getType()) >
       getTypeSizeInBits(FoundLHS->getType())) {
-    if (CmpInst::isSigned(Pred)) {
+    if (CmpInst::isSigned(FoundPred)) {
       FoundLHS = getSignExtendExpr(FoundLHS, LHS->getType());
       FoundRHS = getSignExtendExpr(FoundRHS, LHS->getType());
     } else {
diff --git a/lib/CodeGen/AsmPrinter/AsmPrinter.cpp b/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
index 308b0e091ac5..8be4dcba3689 100644
--- a/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
+++ b/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
@@ -222,13 +222,14 @@ void AsmPrinter::EmitLinkage(const GlobalValue *GV, MCSymbol *GVSym) const {
   case GlobalValue::WeakAnyLinkage:
   case GlobalValue::WeakODRLinkage:
   case GlobalValue::LinkerPrivateWeakLinkage:
-    if (MAI->getWeakDefDirective() != 0) {
+    if (MAI->hasWeakDefDirective()) {
       // .globl _foo
       OutStreamer.EmitSymbolAttribute(GVSym, MCSA_Global);
 
       bool CanBeHidden = false;
 
-      if (Linkage == GlobalValue::LinkOnceODRLinkage) {
+      if (Linkage == GlobalValue::LinkOnceODRLinkage &&
+          MAI->hasWeakDefCanBeHiddenDirective()) {
         if (GV->hasUnnamedAddr()) {
           CanBeHidden = true;
         } else {
@@ -243,7 +244,7 @@ void AsmPrinter::EmitLinkage(const GlobalValue *GV, MCSymbol *GVSym) const {
         OutStreamer.EmitSymbolAttribute(GVSym, MCSA_WeakDefinition);
       else
         OutStreamer.EmitSymbolAttribute(GVSym, MCSA_WeakDefAutoPrivate);
-    } else if (MAI->getLinkOnceDirective() != 0) {
+    } else if (MAI->hasLinkOnceDirective()) {
       // .globl _foo
       OutStreamer.EmitSymbolAttribute(GVSym, MCSA_Global);
       //NOTE: linkonce is handled by the section the symbol was assigned to.
diff --git a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index 43f72c5ef9b4..69cf8d9a909a 100644
--- a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -8547,7 +8547,10 @@ struct MemOpLink {
 // base ptr.
 struct ConsecutiveMemoryChainSorter {
   bool operator()(MemOpLink LHS, MemOpLink RHS) {
-    return LHS.OffsetFromBase < RHS.OffsetFromBase;
+    return
+        LHS.OffsetFromBase < RHS.OffsetFromBase ||
+        (LHS.OffsetFromBase == RHS.OffsetFromBase &&
+         LHS.SequenceNum > RHS.SequenceNum);
   }
 };
 
diff --git a/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp b/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
index 2c3cdccb56e8..3fb2d9bdf55e 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
+++ b/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
@@ -210,6 +210,7 @@ SDValue VectorLegalizer::LegalizeOp(SDValue Op) {
   case ISD::SRL:
   case ISD::ROTL:
   case ISD::ROTR:
+  case ISD::BSWAP:
   case ISD::CTLZ:
   case ISD::CTTZ:
   case ISD::CTLZ_ZERO_UNDEF:
diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp b/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp
index 054e3dd840b5..c1893c9231f0 100644
--- a/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp
+++ b/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp
@@ -219,8 +219,11 @@ void ScheduleDAGSDNodes::ClusterNeighboringLoads(SDNode *Node) {
   DenseMap<long long, SDNode*> O2SMap;  // Map from offset to SDNode.
   bool Cluster = false;
   SDNode *Base = Node;
+  // This algorithm requires a reasonably low use count before finding a match
+  // to avoid uselessly blowing up compile time in large blocks.
+  unsigned UseCount = 0;
   for (SDNode::use_iterator I = Chain->use_begin(), E = Chain->use_end();
-       I != E; ++I) {
+       I != E && UseCount < 100; ++I, ++UseCount) {
     SDNode *User = *I;
     if (User == Node || !Visited.insert(User))
       continue;
@@ -237,6 +240,8 @@ void ScheduleDAGSDNodes::ClusterNeighboringLoads(SDNode *Node) {
     if (Offset2 < Offset1)
       Base = User;
     Cluster = true;
+    // Reset UseCount to allow more matches.
+    UseCount = 0;
   }
 
   if (!Cluster)
diff --git a/lib/MC/MCAsmInfo.cpp b/lib/MC/MCAsmInfo.cpp
index 28f1c951641c..daf19e927c7a 100644
--- a/lib/MC/MCAsmInfo.cpp
+++ b/lib/MC/MCAsmInfo.cpp
@@ -76,8 +76,9 @@ MCAsmInfo::MCAsmInfo() {
   HasIdentDirective = false;
   HasNoDeadStrip = false;
   WeakRefDirective = 0;
-  WeakDefDirective = 0;
-  LinkOnceDirective = 0;
+  HasWeakDefDirective = false;
+  HasWeakDefCanBeHiddenDirective = false;
+  HasLinkOnceDirective = false;
   HiddenVisibilityAttr = MCSA_Hidden;
   HiddenDeclarationVisibilityAttr = MCSA_Hidden;
   ProtectedVisibilityAttr = MCSA_Protected;
@@ -85,6 +86,7 @@ MCAsmInfo::MCAsmInfo() {
   SupportsDebugInformation = false;
   ExceptionsType = ExceptionHandling::None;
   DwarfUsesRelocationsAcrossSections = true;
+  DwarfFDESymbolsUseAbsDiff = false;
   DwarfRegNumForCFI = false;
   HasMicrosoftFastStdCallMangling = false;
   NeedsDwarfSectionOffsetDirective = false;
diff --git a/lib/MC/MCAsmInfoCOFF.cpp b/lib/MC/MCAsmInfoCOFF.cpp
index 9d9f98e72b96..1cac71f1eda1 100644
--- a/lib/MC/MCAsmInfoCOFF.cpp
+++ b/lib/MC/MCAsmInfoCOFF.cpp
@@ -27,7 +27,7 @@ MCAsmInfoCOFF::MCAsmInfoCOFF() {
   HasSingleParameterDotFile = false;
   PrivateGlobalPrefix = "L";  // Prefix for private global symbols
   WeakRefDirective = "\t.weak\t";
-  LinkOnceDirective = "\t.linkonce discard\n";
+  HasLinkOnceDirective = true;
 
   // Doesn't support visibility:
   HiddenVisibilityAttr = HiddenDeclarationVisibilityAttr = MCSA_Invalid;
diff --git a/lib/MC/MCAsmInfoDarwin.cpp b/lib/MC/MCAsmInfoDarwin.cpp
index 704c8161f880..351ec56352a6 100644
--- a/lib/MC/MCAsmInfoDarwin.cpp
+++ b/lib/MC/MCAsmInfoDarwin.cpp
@@ -36,7 +36,8 @@ MCAsmInfoDarwin::MCAsmInfoDarwin() {
   InlineAsmEnd = " InlineAsm End";
 
   // Directives:
-  WeakDefDirective = "\t.weak_definition ";
+  HasWeakDefDirective = true;
+  HasWeakDefCanBeHiddenDirective = true;
   WeakRefDirective = "\t.weak_reference ";
   ZeroDirective = "\t.space\t";  // ".space N" emits N zeros.
   HasMachoZeroFillDirective = true;  // Uses .zerofill
diff --git a/lib/MC/MCDwarf.cpp b/lib/MC/MCDwarf.cpp
index 1e5c2e34c488..9fd13ab3af61 100644
--- a/lib/MC/MCDwarf.cpp
+++ b/lib/MC/MCDwarf.cpp
@@ -836,8 +836,9 @@ static unsigned getSizeForEncoding(MCStreamer &streamer,
   }
 }
 
-static void EmitSymbol(MCStreamer &streamer, const MCSymbol &symbol,
-                       unsigned symbolEncoding, const char *comment = 0) {
+static void EmitFDESymbol(MCStreamer &streamer, const MCSymbol &symbol,
+                       unsigned symbolEncoding, bool isEH,
+                       const char *comment = 0) {
   MCContext &context = streamer.getContext();
   const MCAsmInfo *asmInfo = context.getAsmInfo();
   const MCExpr *v = asmInfo->getExprForFDESymbol(&symbol,
@@ -845,7 +846,10 @@ static void EmitSymbol(MCStreamer &streamer, const MCSymbol &symbol,
                                                  streamer);
   unsigned size = getSizeForEncoding(streamer, symbolEncoding);
   if (streamer.isVerboseAsm() && comment) streamer.AddComment(comment);
-  streamer.EmitAbsValue(v, size);
+  if (asmInfo->doDwarfFDESymbolsUseAbsDiff() && isEH)
+    streamer.EmitAbsValue(v, size);
+  else
+    streamer.EmitValue(v, size);
 }
 
 static void EmitPersonality(MCStreamer &streamer, const MCSymbol &symbol,
@@ -1344,7 +1348,7 @@ MCSymbol *FrameEmitterImpl::EmitFDE(MCStreamer &streamer,
   unsigned PCEncoding = IsEH ? MOFI->getFDEEncoding(UsingCFI)
                              : (unsigned)dwarf::DW_EH_PE_absptr;
   unsigned PCSize = getSizeForEncoding(streamer, PCEncoding);
-  EmitSymbol(streamer, *frame.Begin, PCEncoding, "FDE initial location");
+  EmitFDESymbol(streamer, *frame.Begin, PCEncoding, IsEH, "FDE initial location");
 
   // PC Range
   const MCExpr *Range = MakeStartMinusEndExpr(streamer, *frame.Begin,
@@ -1364,8 +1368,8 @@ MCSymbol *FrameEmitterImpl::EmitFDE(MCStreamer &streamer,
 
     // Augmentation Data
     if (frame.Lsda)
-      EmitSymbol(streamer, *frame.Lsda, frame.LsdaEncoding,
-                 "Language Specific Data Area");
+      EmitFDESymbol(streamer, *frame.Lsda, frame.LsdaEncoding, true,
+                    "Language Specific Data Area");
   }
 
   // Call Frame Instructions
diff --git a/lib/MC/MCParser/AsmParser.cpp b/lib/MC/MCParser/AsmParser.cpp
index a91bd93105b6..6f7729639e42 100644
--- a/lib/MC/MCParser/AsmParser.cpp
+++ b/lib/MC/MCParser/AsmParser.cpp
@@ -4292,6 +4292,10 @@ bool AsmParser::parseMSInlineAsm(
       break;
     }
     case AOK_DotOperator:
+      // Insert the dot if the user omitted it.
+      OS.flush();
+      if (AsmStringIR.at(AsmStringIR.size() - 1) != '.')
+        OS << '.';
       OS << (*I).Val;
       break;
     }
diff --git a/lib/Target/AArch64/AArch64ISelLowering.cpp b/lib/Target/AArch64/AArch64ISelLowering.cpp
index 4fdb667b9539..cf7aec3b4538 100644
--- a/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -31,12 +31,8 @@ using namespace llvm;
 
 static TargetLoweringObjectFile *createTLOF(AArch64TargetMachine &TM) {
   const AArch64Subtarget *Subtarget = &TM.getSubtarget<AArch64Subtarget>();
-
-  if (Subtarget->isTargetLinux())
-    return new AArch64LinuxTargetObjectFile();
-  if (Subtarget->isTargetELF())
-    return new TargetLoweringObjectFileELF();
-  llvm_unreachable("unknown subtarget type");
+  assert (Subtarget->isTargetELF() && "unknown subtarget type");
+  return new AArch64ElfTargetObjectFile();
 }
 
 AArch64TargetLowering::AArch64TargetLowering(AArch64TargetMachine &TM)
@@ -2782,7 +2778,7 @@ AArch64TargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const {
 SDValue
 AArch64TargetLowering::LowerVACOPY(SDValue Op, SelectionDAG &DAG) const {
   const Value *DestSV = cast<SrcValueSDNode>(Op.getOperand(3))->getValue();
-  const Value *SrcSV = cast<SrcValueSDNode>(Op.getOperand(3))->getValue();
+  const Value *SrcSV = cast<SrcValueSDNode>(Op.getOperand(4))->getValue();
 
   // We have to make sure we copy the entire structure: 8+8+8+4+4 = 32 bytes
   // rather than just 8.
diff --git a/lib/Target/AArch64/AArch64InstrInfo.td b/lib/Target/AArch64/AArch64InstrInfo.td
index 23d81fc478e8..8e5a4d30396c 100644
--- a/lib/Target/AArch64/AArch64InstrInfo.td
+++ b/lib/Target/AArch64/AArch64InstrInfo.td
@@ -2587,6 +2587,7 @@ class A64I_SRexs_impl<bits<2> size, bits<3> opcode, string asm, dag outs,
                         pat, itin> {
   let mayStore = 1;
   let PostEncoderMethod = "fixLoadStoreExclusive<1,0>";
+  let Constraints = "@earlyclobber $Rs";
 }
 
 multiclass A64I_SRex<string asmstr, bits<3> opcode, string prefix> {
diff --git a/lib/Target/AArch64/AArch64TargetObjectFile.cpp b/lib/Target/AArch64/AArch64TargetObjectFile.cpp
index b4452f514590..f8f21198a4f9 100644
--- a/lib/Target/AArch64/AArch64TargetObjectFile.cpp
+++ b/lib/Target/AArch64/AArch64TargetObjectFile.cpp
@@ -22,3 +22,10 @@ AArch64LinuxTargetObjectFile::Initialize(MCContext &Ctx,
   TargetLoweringObjectFileELF::Initialize(Ctx, TM);
   InitializeELF(TM.Options.UseInitArray);
 }
+
+void
+AArch64ElfTargetObjectFile::Initialize(MCContext &Ctx,
+                                       const TargetMachine &TM) {
+  TargetLoweringObjectFileELF::Initialize(Ctx, TM);
+  InitializeELF(TM.Options.UseInitArray);
+}
diff --git a/lib/Target/AArch64/AArch64TargetObjectFile.h b/lib/Target/AArch64/AArch64TargetObjectFile.h
index bf0565a79ec8..f782285d1c02 100644
--- a/lib/Target/AArch64/AArch64TargetObjectFile.h
+++ b/lib/Target/AArch64/AArch64TargetObjectFile.h
@@ -20,8 +20,12 @@
 
 namespace llvm {
 
-  /// AArch64LinuxTargetObjectFile - This implementation is used for linux
-  /// AArch64.
+  /// AArch64ElfTargetObjectFile - This implementation is used for ELF
+  /// AArch64 targets.
+  class AArch64ElfTargetObjectFile : public TargetLoweringObjectFileELF {
+    virtual void Initialize(MCContext &Ctx, const TargetMachine &TM);
+  };
+
   class AArch64LinuxTargetObjectFile : public TargetLoweringObjectFileELF {
     virtual void Initialize(MCContext &Ctx, const TargetMachine &TM);
   };
diff --git a/lib/Target/ARM/A15SDOptimizer.cpp b/lib/Target/ARM/A15SDOptimizer.cpp
index ff585b41a2aa..3e805a2b68bc 100644
--- a/lib/Target/ARM/A15SDOptimizer.cpp
+++ b/lib/Target/ARM/A15SDOptimizer.cpp
@@ -418,7 +418,8 @@ SmallVector<unsigned, 8> A15SDOptimizer::getReadDPRs(MachineInstr *MI) {
     if (!MO.isReg() || !MO.isUse())
       continue;
     if (!usesRegClass(MO, &ARM::DPRRegClass) &&
-        !usesRegClass(MO, &ARM::QPRRegClass))
+        !usesRegClass(MO, &ARM::QPRRegClass) &&
+        !usesRegClass(MO, &ARM::DPairRegClass)) // Treat DPair as QPR
       continue;
 
     Defs.push_back(MO.getReg());
@@ -538,7 +539,10 @@ A15SDOptimizer::optimizeAllLanesPattern(MachineInstr *MI, unsigned Reg) {
   InsertPt++;
   unsigned Out;
 
-  if (MRI->getRegClass(Reg)->hasSuperClassEq(&ARM::QPRRegClass)) {
+  // DPair has the same length as QPR and also has two DPRs as subreg.
+  // Treat DPair as QPR.
+  if (MRI->getRegClass(Reg)->hasSuperClassEq(&ARM::QPRRegClass) ||
+      MRI->getRegClass(Reg)->hasSuperClassEq(&ARM::DPairRegClass)) {
     unsigned DSub0 = createExtractSubreg(MBB, InsertPt, DL, Reg,
                                          ARM::dsub_0, &ARM::DPRRegClass);
     unsigned DSub1 = createExtractSubreg(MBB, InsertPt, DL, Reg,
@@ -571,7 +575,9 @@ A15SDOptimizer::optimizeAllLanesPattern(MachineInstr *MI, unsigned Reg) {
       default: llvm_unreachable("Unknown preferred lane!");
     }
 
-    bool UsesQPR = usesRegClass(MI->getOperand(0), &ARM::QPRRegClass);
+    // Treat DPair as QPR
+    bool UsesQPR = usesRegClass(MI->getOperand(0), &ARM::QPRRegClass) ||
+                   usesRegClass(MI->getOperand(0), &ARM::DPairRegClass);
 
     Out = createImplicitDef(MBB, InsertPt, DL);
     Out = createInsertSubreg(MBB, InsertPt, DL, Out, PrefLane, Reg);
diff --git a/lib/Target/ARM/ARMBaseInstrInfo.cpp b/lib/Target/ARM/ARMBaseInstrInfo.cpp
index f835a4e5b5fe..658af8380d8d 100644
--- a/lib/Target/ARM/ARMBaseInstrInfo.cpp
+++ b/lib/Target/ARM/ARMBaseInstrInfo.cpp
@@ -3684,6 +3684,7 @@ ARMBaseInstrInfo::getOperandLatency(const InstrItineraryData *ItinData,
     case ARM::VLD3d16Pseudo:
     case ARM::VLD3d32Pseudo:
     case ARM::VLD1d64TPseudo:
+    case ARM::VLD1d64TPseudoWB_fixed:
     case ARM::VLD3d8Pseudo_UPD:
     case ARM::VLD3d16Pseudo_UPD:
     case ARM::VLD3d32Pseudo_UPD:
@@ -3700,6 +3701,7 @@ ARMBaseInstrInfo::getOperandLatency(const InstrItineraryData *ItinData,
     case ARM::VLD4d16Pseudo:
     case ARM::VLD4d32Pseudo:
     case ARM::VLD1d64QPseudo:
+    case ARM::VLD1d64QPseudoWB_fixed:
     case ARM::VLD4d8Pseudo_UPD:
     case ARM::VLD4d16Pseudo_UPD:
     case ARM::VLD4d32Pseudo_UPD:
diff --git a/lib/Target/ARM/ARMExpandPseudoInsts.cpp b/lib/Target/ARM/ARMExpandPseudoInsts.cpp
index e6f7f86c5587..3e62b649e0dc 100644
--- a/lib/Target/ARM/ARMExpandPseudoInsts.cpp
+++ b/lib/Target/ARM/ARMExpandPseudoInsts.cpp
@@ -136,7 +136,9 @@ static const NEONLdStTableEntry NEONLdStTable[] = {
 { ARM::VLD1LNq8Pseudo_UPD,  ARM::VLD1LNd8_UPD, true, true, true,  EvenDblSpc, 1, 8 ,true},
 
 { ARM::VLD1d64QPseudo,      ARM::VLD1d64Q,     true,  false, false, SingleSpc,  4, 1 ,false},
+{ ARM::VLD1d64QPseudoWB_fixed,  ARM::VLD1d64Qwb_fixed,   true,  true, false, SingleSpc,  4, 1 ,false},
 { ARM::VLD1d64TPseudo,      ARM::VLD1d64T,     true,  false, false, SingleSpc,  3, 1 ,false},
+{ ARM::VLD1d64TPseudoWB_fixed,  ARM::VLD1d64Twb_fixed,   true,  true, false, SingleSpc,  3, 1 ,false},
 
 { ARM::VLD2LNd16Pseudo,     ARM::VLD2LNd16,     true, false, false, SingleSpc,  2, 4 ,true},
 { ARM::VLD2LNd16Pseudo_UPD, ARM::VLD2LNd16_UPD, true, true, true,  SingleSpc,  2, 4 ,true},
@@ -1071,6 +1073,7 @@ bool ARMExpandPseudo::ExpandMI(MachineBasicBlock &MBB,
     case ARM::VLD3d16Pseudo:
     case ARM::VLD3d32Pseudo:
     case ARM::VLD1d64TPseudo:
+    case ARM::VLD1d64TPseudoWB_fixed:
     case ARM::VLD3d8Pseudo_UPD:
     case ARM::VLD3d16Pseudo_UPD:
     case ARM::VLD3d32Pseudo_UPD:
@@ -1087,6 +1090,7 @@ bool ARMExpandPseudo::ExpandMI(MachineBasicBlock &MBB,
     case ARM::VLD4d16Pseudo:
     case ARM::VLD4d32Pseudo:
     case ARM::VLD1d64QPseudo:
+    case ARM::VLD1d64QPseudoWB_fixed:
     case ARM::VLD4d8Pseudo_UPD:
     case ARM::VLD4d16Pseudo_UPD:
     case ARM::VLD4d32Pseudo_UPD:
diff --git a/lib/Target/ARM/ARMISelDAGToDAG.cpp b/lib/Target/ARM/ARMISelDAGToDAG.cpp
index 87d15226947a..6d9b18877f72 100644
--- a/lib/Target/ARM/ARMISelDAGToDAG.cpp
+++ b/lib/Target/ARM/ARMISelDAGToDAG.cpp
@@ -1673,9 +1673,61 @@ SDValue ARMDAGToDAGISel::GetVLDSTAlign(SDValue Align, unsigned NumVecs,
   return CurDAG->getTargetConstant(Alignment, MVT::i32);
 }
 
+static bool isVLDfixed(unsigned Opc)
+{
+  switch (Opc) {
+  default: return false;
+  case ARM::VLD1d8wb_fixed : return true;
+  case ARM::VLD1d16wb_fixed : return true;
+  case ARM::VLD1d64Qwb_fixed : return true;
+  case ARM::VLD1d32wb_fixed : return true;
+  case ARM::VLD1d64wb_fixed : return true;
+  case ARM::VLD1d64TPseudoWB_fixed : return true;
+  case ARM::VLD1d64QPseudoWB_fixed : return true;
+  case ARM::VLD1q8wb_fixed : return true;
+  case ARM::VLD1q16wb_fixed : return true;
+  case ARM::VLD1q32wb_fixed : return true;
+  case ARM::VLD1q64wb_fixed : return true;
+  case ARM::VLD2d8wb_fixed : return true;
+  case ARM::VLD2d16wb_fixed : return true;
+  case ARM::VLD2d32wb_fixed : return true;
+  case ARM::VLD2q8PseudoWB_fixed : return true;
+  case ARM::VLD2q16PseudoWB_fixed : return true;
+  case ARM::VLD2q32PseudoWB_fixed : return true;
+  case ARM::VLD2DUPd8wb_fixed : return true;
+  case ARM::VLD2DUPd16wb_fixed : return true;
+  case ARM::VLD2DUPd32wb_fixed : return true;
+  }
+}
+
+static bool isVSTfixed(unsigned Opc)
+{
+  switch (Opc) {
+  default: return false;
+  case ARM::VST1d8wb_fixed : return true;
+  case ARM::VST1d16wb_fixed : return true;
+  case ARM::VST1d32wb_fixed : return true;
+  case ARM::VST1d64wb_fixed : return true;
+  case ARM::VST1q8wb_fixed : return true; 
+  case ARM::VST1q16wb_fixed : return true; 
+  case ARM::VST1q32wb_fixed : return true; 
+  case ARM::VST1q64wb_fixed : return true; 
+  case ARM::VST1d64TPseudoWB_fixed : return true;
+  case ARM::VST1d64QPseudoWB_fixed : return true;
+  case ARM::VST2d8wb_fixed : return true;
+  case ARM::VST2d16wb_fixed : return true;
+  case ARM::VST2d32wb_fixed : return true;
+  case ARM::VST2q8PseudoWB_fixed : return true;
+  case ARM::VST2q16PseudoWB_fixed : return true;
+  case ARM::VST2q32PseudoWB_fixed : return true;
+  }
+}
+
 // Get the register stride update opcode of a VLD/VST instruction that
 // is otherwise equivalent to the given fixed stride updating instruction.
 static unsigned getVLDSTRegisterUpdateOpcode(unsigned Opc) {
+  assert((isVLDfixed(Opc) || isVSTfixed(Opc))
+    && "Incorrect fixed stride updating instruction.");
   switch (Opc) {
   default: break;
   case ARM::VLD1d8wb_fixed: return ARM::VLD1d8wb_register;
@@ -1686,6 +1738,10 @@ static unsigned getVLDSTRegisterUpdateOpcode(unsigned Opc) {
   case ARM::VLD1q16wb_fixed: return ARM::VLD1q16wb_register;
   case ARM::VLD1q32wb_fixed: return ARM::VLD1q32wb_register;
   case ARM::VLD1q64wb_fixed: return ARM::VLD1q64wb_register;
+  case ARM::VLD1d64Twb_fixed: return ARM::VLD1d64Twb_register;
+  case ARM::VLD1d64Qwb_fixed: return ARM::VLD1d64Qwb_register;
+  case ARM::VLD1d64TPseudoWB_fixed: return ARM::VLD1d64TPseudoWB_register;
+  case ARM::VLD1d64QPseudoWB_fixed: return ARM::VLD1d64QPseudoWB_register;
 
   case ARM::VST1d8wb_fixed: return ARM::VST1d8wb_register;
   case ARM::VST1d16wb_fixed: return ARM::VST1d16wb_register;
@@ -1785,11 +1841,11 @@ SDNode *ARMDAGToDAGISel::SelectVLD(SDNode *N, bool isUpdating, unsigned NumVecs,
       SDValue Inc = N->getOperand(AddrOpIdx + 1);
       // FIXME: VLD1/VLD2 fixed increment doesn't need Reg0. Remove the reg0
       // case entirely when the rest are updated to that form, too.
-      if ((NumVecs == 1 || NumVecs == 2) && !isa<ConstantSDNode>(Inc.getNode()))
+      if ((NumVecs <= 2) && !isa<ConstantSDNode>(Inc.getNode()))
         Opc = getVLDSTRegisterUpdateOpcode(Opc);
-      // We use a VLD1 for v1i64 even if the pseudo says vld2/3/4, so
+      // FIXME: We use a VLD1 for v1i64 even if the pseudo says vld2/3/4, so
       // check for that explicitly too. Horribly hacky, but temporary.
-      if ((NumVecs != 1 && NumVecs != 2 && Opc != ARM::VLD1q64wb_fixed) ||
+      if ((NumVecs > 2 && !isVLDfixed(Opc)) ||
           !isa<ConstantSDNode>(Inc.getNode()))
         Ops.push_back(isa<ConstantSDNode>(Inc.getNode()) ? Reg0 : Inc);
     }
@@ -1937,11 +1993,12 @@ SDNode *ARMDAGToDAGISel::SelectVST(SDNode *N, bool isUpdating, unsigned NumVecs,
       // case entirely when the rest are updated to that form, too.
       if (NumVecs <= 2 && !isa<ConstantSDNode>(Inc.getNode()))
         Opc = getVLDSTRegisterUpdateOpcode(Opc);
-      // We use a VST1 for v1i64 even if the pseudo says vld2/3/4, so
+      // FIXME: We use a VST1 for v1i64 even if the pseudo says vld2/3/4, so
       // check for that explicitly too. Horribly hacky, but temporary.
-      if ((NumVecs > 2 && Opc != ARM::VST1q64wb_fixed) ||
-          !isa<ConstantSDNode>(Inc.getNode()))
-        Ops.push_back(isa<ConstantSDNode>(Inc.getNode()) ? Reg0 : Inc);
+      if  (!isa<ConstantSDNode>(Inc.getNode()))
+        Ops.push_back(Inc);
+      else if (NumVecs > 2 && !isVSTfixed(Opc))
+        Ops.push_back(Reg0);
     }
     Ops.push_back(SrcReg);
     Ops.push_back(Pred);
@@ -2834,7 +2891,7 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) {
     static const uint16_t DOpcodes[] = { ARM::VLD3d8Pseudo_UPD,
                                          ARM::VLD3d16Pseudo_UPD,
                                          ARM::VLD3d32Pseudo_UPD,
-                                         ARM::VLD1q64wb_fixed};
+                                         ARM::VLD1d64TPseudoWB_fixed};
     static const uint16_t QOpcodes0[] = { ARM::VLD3q8Pseudo_UPD,
                                           ARM::VLD3q16Pseudo_UPD,
                                           ARM::VLD3q32Pseudo_UPD };
@@ -2848,7 +2905,7 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) {
     static const uint16_t DOpcodes[] = { ARM::VLD4d8Pseudo_UPD,
                                          ARM::VLD4d16Pseudo_UPD,
                                          ARM::VLD4d32Pseudo_UPD,
-                                         ARM::VLD1q64wb_fixed};
+                                         ARM::VLD1d64QPseudoWB_fixed};
     static const uint16_t QOpcodes0[] = { ARM::VLD4q8Pseudo_UPD,
                                           ARM::VLD4q16Pseudo_UPD,
                                           ARM::VLD4q32Pseudo_UPD };
diff --git a/lib/Target/ARM/ARMInstrNEON.td b/lib/Target/ARM/ARMInstrNEON.td
index 43bd4c21dc39..0b05c08ed948 100644
--- a/lib/Target/ARM/ARMInstrNEON.td
+++ b/lib/Target/ARM/ARMInstrNEON.td
@@ -730,6 +730,8 @@ defm VLD1d32Twb : VLD1D3WB<{1,0,0,?}, "32">;
 defm VLD1d64Twb : VLD1D3WB<{1,1,0,?}, "64">;
 
 def VLD1d64TPseudo : VLDQQPseudo<IIC_VLD1x3>;
+def VLD1d64TPseudoWB_fixed : VLDQQWBfixedPseudo<IIC_VLD1x3>;
+def VLD1d64TPseudoWB_register : VLDQQWBregisterPseudo<IIC_VLD1x3>;
 
 // ...with 4 registers
 class VLD1D4<bits<4> op7_4, string Dt>
@@ -769,6 +771,8 @@ defm VLD1d32Qwb  : VLD1D4WB<{1,0,?,?}, "32">;
 defm VLD1d64Qwb  : VLD1D4WB<{1,1,?,?}, "64">;
 
 def VLD1d64QPseudo : VLDQQPseudo<IIC_VLD1x4>;
+def VLD1d64QPseudoWB_fixed : VLDQQWBfixedPseudo<IIC_VLD1x4>;
+def VLD1d64QPseudoWB_register : VLDQQWBregisterPseudo<IIC_VLD1x4>;
 
 //   VLD2     : Vector Load (multiple 2-element structures)
 class VLD2<bits<4> op11_8, bits<4> op7_4, string Dt, RegisterOperand VdTy,
@@ -1671,7 +1675,7 @@ defm VST1d32Twb : VST1D3WB<{1,0,0,?}, "32">;
 defm VST1d64Twb : VST1D3WB<{1,1,0,?}, "64">;
 
 def VST1d64TPseudo            : VSTQQPseudo<IIC_VST1x3>;
-def VST1d64TPseudoWB_fixed    : VSTQQWBPseudo<IIC_VST1x3u>;
+def VST1d64TPseudoWB_fixed    : VSTQQWBfixedPseudo<IIC_VST1x3u>;
 def VST1d64TPseudoWB_register : VSTQQWBPseudo<IIC_VST1x3u>;
 
 // ...with 4 registers
@@ -1714,7 +1718,7 @@ defm VST1d32Qwb : VST1D4WB<{1,0,?,?}, "32">;
 defm VST1d64Qwb : VST1D4WB<{1,1,?,?}, "64">;
 
 def VST1d64QPseudo            : VSTQQPseudo<IIC_VST1x4>;
-def VST1d64QPseudoWB_fixed    : VSTQQWBPseudo<IIC_VST1x4u>;
+def VST1d64QPseudoWB_fixed    : VSTQQWBfixedPseudo<IIC_VST1x4u>;
 def VST1d64QPseudoWB_register : VSTQQWBPseudo<IIC_VST1x4u>;
 
 //   VST2     : Vector Store (multiple 2-element structures)
diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCMCAsmInfo.cpp b/lib/Target/PowerPC/MCTargetDesc/PPCMCAsmInfo.cpp
index f3dddce30120..1d9c06406a4b 100644
--- a/lib/Target/PowerPC/MCTargetDesc/PPCMCAsmInfo.cpp
+++ b/lib/Target/PowerPC/MCTargetDesc/PPCMCAsmInfo.cpp
@@ -12,10 +12,14 @@
 //===----------------------------------------------------------------------===//
 
 #include "PPCMCAsmInfo.h"
+#include "llvm/ADT/Triple.h"
+
 using namespace llvm;
 
 void PPCMCAsmInfoDarwin::anchor() { }
 
+/// This version of the constructor is here to maintain ABI compatibility with
+/// LLVM 3.4.0
 PPCMCAsmInfoDarwin::PPCMCAsmInfoDarwin(bool is64Bit) {
   if (is64Bit) {
     PointerSize = CalleeSaveStackSlotSize = 8;
@@ -32,6 +36,28 @@ PPCMCAsmInfoDarwin::PPCMCAsmInfoDarwin(bool is64Bit) {
   SupportsDebugInformation= true; // Debug information.
 }
 
+PPCMCAsmInfoDarwin::PPCMCAsmInfoDarwin(bool is64Bit, const Triple& T) {
+  if (is64Bit) {
+    PointerSize = CalleeSaveStackSlotSize = 8;
+  }
+  IsLittleEndian = false;
+
+  CommentString = ";";
+  ExceptionsType = ExceptionHandling::DwarfCFI;
+
+  if (!is64Bit)
+    Data64bitsDirective = 0;      // We can't emit a 64-bit unit in PPC32 mode.
+
+  AssemblerDialect = 1;           // New-Style mnemonics.
+  SupportsDebugInformation= true; // Debug information.
+
+  // old assembler lacks some directives
+  // FIXME: this should really be a check on the assembler characteristics
+  // rather than OS version
+  if (T.isMacOSX() && T.isMacOSXVersionLT(10, 6))
+    HasWeakDefCanBeHiddenDirective = false;
+}
+
 void PPCLinuxMCAsmInfo::anchor() { }
 
 PPCLinuxMCAsmInfo::PPCLinuxMCAsmInfo(bool is64Bit) {
diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCMCAsmInfo.h b/lib/Target/PowerPC/MCTargetDesc/PPCMCAsmInfo.h
index 1530e774cfc7..633970ccc289 100644
--- a/lib/Target/PowerPC/MCTargetDesc/PPCMCAsmInfo.h
+++ b/lib/Target/PowerPC/MCTargetDesc/PPCMCAsmInfo.h
@@ -18,11 +18,15 @@
 #include "llvm/MC/MCAsmInfoELF.h"
 
 namespace llvm {
+class Triple;
 
   class PPCMCAsmInfoDarwin : public MCAsmInfoDarwin {
     virtual void anchor();
   public:
+    /// This version of the constructor is here to maintain ABI compatibility
+    /// with LLVM 3.4.0.
     explicit PPCMCAsmInfoDarwin(bool is64Bit);
+    explicit PPCMCAsmInfoDarwin(bool is64Bit, const Triple&);
   };
 
   class PPCLinuxMCAsmInfo : public MCAsmInfoELF {
diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp b/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp
index f18d095c6d02..6a5051840181 100644
--- a/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp
+++ b/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp
@@ -72,7 +72,7 @@ static MCAsmInfo *createPPCMCAsmInfo(const MCRegisterInfo &MRI, StringRef TT) {
 
   MCAsmInfo *MAI;
   if (TheTriple.isOSDarwin())
-    MAI = new PPCMCAsmInfoDarwin(isPPC64);
+    MAI = new PPCMCAsmInfoDarwin(isPPC64, TheTriple);
   else
     MAI = new PPCLinuxMCAsmInfo(isPPC64);
 
diff --git a/lib/Target/PowerPC/PPCAsmPrinter.cpp b/lib/Target/PowerPC/PPCAsmPrinter.cpp
index ada34ed9e18a..2d92a112d5ef 100644
--- a/lib/Target/PowerPC/PPCAsmPrinter.cpp
+++ b/lib/Target/PowerPC/PPCAsmPrinter.cpp
@@ -701,13 +701,6 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) {
       return;
     }
     break;
-  case PPC::SYNC:
-    // In Book E sync is called msync, handle this special case here...
-    if (Subtarget.isBookE()) {
-      OutStreamer.EmitRawText(StringRef("\tmsync"));
-      return;
-    }
-    break;
   case PPC::LD:
   case PPC::STD:
   case PPC::LWA_32:
diff --git a/lib/Target/PowerPC/PPCCTRLoops.cpp b/lib/Target/PowerPC/PPCCTRLoops.cpp
index 4224ae2d273c..e419b9b40d8e 100644
--- a/lib/Target/PowerPC/PPCCTRLoops.cpp
+++ b/lib/Target/PowerPC/PPCCTRLoops.cpp
@@ -186,6 +186,13 @@ bool PPCCTRLoops::runOnFunction(Function &F) {
   return MadeChange;
 }
 
+static bool isLargeIntegerTy(bool Is32Bit, Type *Ty) {
+  if (IntegerType *ITy = dyn_cast<IntegerType>(Ty))
+    return ITy->getBitWidth() > (Is32Bit ? 32 : 64);
+
+  return false;
+}
+
 bool PPCCTRLoops::mightUseCTR(const Triple &TT, BasicBlock *BB) {
   for (BasicBlock::iterator J = BB->begin(), JE = BB->end();
        J != JE; ++J) {
@@ -352,13 +359,11 @@ bool PPCCTRLoops::mightUseCTR(const Triple &TT, BasicBlock *BB) {
       CastInst *CI = cast<CastInst>(J);
       if (CI->getSrcTy()->getScalarType()->isPPC_FP128Ty() ||
           CI->getDestTy()->getScalarType()->isPPC_FP128Ty() ||
-          (TT.isArch32Bit() &&
-           (CI->getSrcTy()->getScalarType()->isIntegerTy(64) ||
-            CI->getDestTy()->getScalarType()->isIntegerTy(64))
-          ))
+          isLargeIntegerTy(TT.isArch32Bit(), CI->getSrcTy()->getScalarType()) ||
+          isLargeIntegerTy(TT.isArch32Bit(), CI->getDestTy()->getScalarType()))
         return true;
-    } else if (TT.isArch32Bit() &&
-               J->getType()->getScalarType()->isIntegerTy(64) &&
+    } else if (isLargeIntegerTy(TT.isArch32Bit(),
+                                J->getType()->getScalarType()) &&
                (J->getOpcode() == Instruction::UDiv ||
                 J->getOpcode() == Instruction::SDiv ||
                 J->getOpcode() == Instruction::URem ||
diff --git a/lib/Target/PowerPC/PPCFastISel.cpp b/lib/Target/PowerPC/PPCFastISel.cpp
index 09117e7ded49..4e3b0b83244a 100644
--- a/lib/Target/PowerPC/PPCFastISel.cpp
+++ b/lib/Target/PowerPC/PPCFastISel.cpp
@@ -892,11 +892,13 @@ unsigned PPCFastISel::PPCMoveToFPReg(MVT SrcVT, unsigned SrcReg,
   unsigned LoadOpc = PPC::LFD;
 
   if (SrcVT == MVT::i32) {
-    Addr.Offset = 4;
-    if (!IsSigned)
+    if (!IsSigned) {
       LoadOpc = PPC::LFIWZX;
-    else if (PPCSubTarget.hasLFIWAX())
+      Addr.Offset = 4;
+    } else if (PPCSubTarget.hasLFIWAX()) {
       LoadOpc = PPC::LFIWAX;
+      Addr.Offset = 4;
+    }
   }
 
   const TargetRegisterClass *RC = &PPC::F8RCRegClass;
diff --git a/lib/Target/PowerPC/PPCISelDAGToDAG.cpp b/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
index 6ba6af6446e5..d25762a5bbca 100644
--- a/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
+++ b/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
@@ -261,11 +261,11 @@ SDNode *PPCDAGToDAGISel::getGlobalBaseReg() {
     DebugLoc dl;
 
     if (PPCLowering.getPointerTy() == MVT::i32) {
-      GlobalBaseReg = RegInfo->createVirtualRegister(&PPC::GPRCRegClass);
+      GlobalBaseReg = RegInfo->createVirtualRegister(&PPC::GPRC_NOR0RegClass);
       BuildMI(FirstMBB, MBBI, dl, TII.get(PPC::MovePCtoLR));
       BuildMI(FirstMBB, MBBI, dl, TII.get(PPC::MFLR), GlobalBaseReg);
     } else {
-      GlobalBaseReg = RegInfo->createVirtualRegister(&PPC::G8RCRegClass);
+      GlobalBaseReg = RegInfo->createVirtualRegister(&PPC::G8RC_NOX0RegClass);
       BuildMI(FirstMBB, MBBI, dl, TII.get(PPC::MovePCtoLR8));
       BuildMI(FirstMBB, MBBI, dl, TII.get(PPC::MFLR8), GlobalBaseReg);
     }
diff --git a/lib/Target/PowerPC/PPCISelLowering.cpp b/lib/Target/PowerPC/PPCISelLowering.cpp
index 8da5f0563c6a..25a7ca7f59a7 100644
--- a/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -2333,7 +2333,7 @@ PPCTargetLowering::LowerFormalArguments_64SVR4(
             EVT ObjType = (ObjSize == 1 ? MVT::i8 :
                            (ObjSize == 2 ? MVT::i16 : MVT::i32));
             Store = DAG.getTruncStore(Val.getValue(1), dl, Val, FIN,
-                                      MachinePointerInfo(FuncArg, CurArgOffset),
+                                      MachinePointerInfo(FuncArg),
                                       ObjType, false, false, 0);
           } else {
             // For sizes that don't fit a truncating store (3, 5, 6, 7),
@@ -2345,7 +2345,7 @@ PPCTargetLowering::LowerFormalArguments_64SVR4(
             int FI = MFI->CreateFixedObject(PtrByteSize, ArgOffset, true);
             SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
             Store = DAG.getStore(Val.getValue(1), dl, Val, FIN,
-                                 MachinePointerInfo(FuncArg, ArgOffset),
+                                 MachinePointerInfo(FuncArg),
                                  false, false, 0);
           }
 
@@ -2369,7 +2369,7 @@ PPCTargetLowering::LowerFormalArguments_64SVR4(
           SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
           SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
           SDValue Store = DAG.getStore(Val.getValue(1), dl, Val, FIN,
-                                       MachinePointerInfo(FuncArg, ArgOffset),
+                                       MachinePointerInfo(FuncArg, j),
                                        false, false, 0);
           MemOps.push_back(Store);
           ++GPR_idx;
@@ -2665,8 +2665,7 @@ PPCTargetLowering::LowerFormalArguments_Darwin(
           SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
           EVT ObjType = ObjSize == 1 ? MVT::i8 : MVT::i16;
           SDValue Store = DAG.getTruncStore(Val.getValue(1), dl, Val, FIN,
-                                            MachinePointerInfo(FuncArg,
-                                              CurArgOffset),
+                                            MachinePointerInfo(FuncArg),
                                             ObjType, false, false, 0);
           MemOps.push_back(Store);
           ++GPR_idx;
@@ -2690,7 +2689,7 @@ PPCTargetLowering::LowerFormalArguments_Darwin(
           SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
           SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
           SDValue Store = DAG.getStore(Val.getValue(1), dl, Val, FIN,
-                                       MachinePointerInfo(FuncArg, ArgOffset),
+                                       MachinePointerInfo(FuncArg, j),
                                        false, false, 0);
           MemOps.push_back(Store);
           ++GPR_idx;
diff --git a/lib/Target/PowerPC/PPCInstrInfo.cpp b/lib/Target/PowerPC/PPCInstrInfo.cpp
index 315ad04ebe3e..80bc27a95765 100644
--- a/lib/Target/PowerPC/PPCInstrInfo.cpp
+++ b/lib/Target/PowerPC/PPCInstrInfo.cpp
@@ -570,12 +570,14 @@ PPCInstrInfo::StoreRegToStackSlot(MachineFunction &MF,
   // update isStoreToStackSlot.
 
   DebugLoc DL;
-  if (PPC::GPRCRegClass.hasSubClassEq(RC)) {
+  if (PPC::GPRCRegClass.hasSubClassEq(RC) ||
+      PPC::GPRC_NOR0RegClass.hasSubClassEq(RC)) {
     NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::STW))
                                        .addReg(SrcReg,
                                                getKillRegState(isKill)),
                                        FrameIdx));
-  } else if (PPC::G8RCRegClass.hasSubClassEq(RC)) {
+  } else if (PPC::G8RCRegClass.hasSubClassEq(RC) ||
+             PPC::G8RC_NOX0RegClass.hasSubClassEq(RC)) {
     NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::STD))
                                        .addReg(SrcReg,
                                                getKillRegState(isKill)),
@@ -695,10 +697,12 @@ PPCInstrInfo::LoadRegFromStackSlot(MachineFunction &MF, DebugLoc DL,
   // Note: If additional load instructions are added here,
   // update isLoadFromStackSlot.
 
-  if (PPC::GPRCRegClass.hasSubClassEq(RC)) {
+  if (PPC::GPRCRegClass.hasSubClassEq(RC) ||
+      PPC::GPRC_NOR0RegClass.hasSubClassEq(RC)) {
     NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::LWZ),
                                                DestReg), FrameIdx));
-  } else if (PPC::G8RCRegClass.hasSubClassEq(RC)) {
+  } else if (PPC::G8RCRegClass.hasSubClassEq(RC) ||
+             PPC::G8RC_NOX0RegClass.hasSubClassEq(RC)) {
     NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::LD), DestReg),
                                        FrameIdx));
   } else if (PPC::F8RCRegClass.hasSubClassEq(RC)) {
diff --git a/lib/Target/PowerPC/PPCInstrInfo.td b/lib/Target/PowerPC/PPCInstrInfo.td
index 2bd3aadc798d..fc29c69642bf 100644
--- a/lib/Target/PowerPC/PPCInstrInfo.td
+++ b/lib/Target/PowerPC/PPCInstrInfo.td
@@ -580,6 +580,7 @@ def iaddroff : ComplexPattern<iPTR, 1, "SelectAddrImmOffs", [], []>;
 def In32BitMode  : Predicate<"!PPCSubTarget.isPPC64()">;
 def In64BitMode  : Predicate<"PPCSubTarget.isPPC64()">;
 def IsBookE  : Predicate<"PPCSubTarget.isBookE()">;
+def IsNotBookE  : Predicate<"!PPCSubTarget.isBookE()">;
 
 //===----------------------------------------------------------------------===//
 // PowerPC Multiclass Definitions.
@@ -1541,8 +1542,17 @@ def STMW : DForm_1<47, (outs), (ins gprc:$rS, memri:$dst),
                    "stmw $rS, $dst", LdStLMW, []>;
 
 def SYNC : XForm_24_sync<31, 598, (outs), (ins i32imm:$L),
-                        "sync $L", LdStSync, []>;
-def : Pat<(int_ppc_sync), (SYNC 0)>;
+                        "sync $L", LdStSync, []>, Requires<[IsNotBookE]>;
+
+let isCodeGenOnly = 1 in {
+  def MSYNC : XForm_24_sync<31, 598, (outs), (ins),
+                           "msync", LdStSync, []>, Requires<[IsBookE]> {
+    let L = 0;
+  }
+}
+
+def : Pat<(int_ppc_sync), (SYNC 0)>, Requires<[IsNotBookE]>;
+def : Pat<(int_ppc_sync), (MSYNC)>, Requires<[IsBookE]>;
 
 //===----------------------------------------------------------------------===//
 // PPC32 Arithmetic Instructions.
@@ -2284,7 +2294,8 @@ def : Pat<(f64 (extloadf32 xaddr:$src)),
 def : Pat<(f64 (fextend f32:$src)),
           (COPY_TO_REGCLASS $src, F8RC)>;
 
-def : Pat<(atomic_fence (imm), (imm)), (SYNC 0)>;
+def : Pat<(atomic_fence (imm), (imm)), (SYNC 0)>, Requires<[IsNotBookE]>;
+def : Pat<(atomic_fence (imm), (imm)), (MSYNC)>, Requires<[IsBookE]>;
 
 // Additional FNMSUB patterns: -a*c + b == -(a*c - b)
 def : Pat<(fma (fneg f64:$A), f64:$C, f64:$B),
@@ -2373,10 +2384,10 @@ class PPCAsmPseudo<string asm, dag iops>
 
 def : InstAlias<"sc", (SC 0)>;
 
-def : InstAlias<"sync", (SYNC 0)>;
-def : InstAlias<"msync", (SYNC 0)>;
-def : InstAlias<"lwsync", (SYNC 1)>;
-def : InstAlias<"ptesync", (SYNC 2)>;
+def : InstAlias<"sync", (SYNC 0)>, Requires<[IsNotBookE]>;
+def : InstAlias<"msync", (SYNC 0)>, Requires<[IsNotBookE]>;
+def : InstAlias<"lwsync", (SYNC 1)>, Requires<[IsNotBookE]>;
+def : InstAlias<"ptesync", (SYNC 2)>, Requires<[IsNotBookE]>;
 
 def : InstAlias<"wait", (WAIT 0)>;
 def : InstAlias<"waitrsv", (WAIT 1)>;
diff --git a/lib/Target/PowerPC/PPCRegisterInfo.td b/lib/Target/PowerPC/PPCRegisterInfo.td
index d566e2c3e52d..43663ce013e9 100644
--- a/lib/Target/PowerPC/PPCRegisterInfo.td
+++ b/lib/Target/PowerPC/PPCRegisterInfo.td
@@ -144,6 +144,13 @@ def CR6 : CR<6, "cr6", [CR6LT, CR6GT, CR6EQ, CR6UN]>, DwarfRegNum<[74, 74]>;
 def CR7 : CR<7, "cr7", [CR7LT, CR7GT, CR7EQ, CR7UN]>, DwarfRegNum<[75, 75]>;
 }
 
+// The full condition-code register. This is not modeled fully, but defined
+// here primarily, for compatibility with gcc, to allow the inline asm "cc"
+// clobber specification to work.
+def CC : PPCReg<"cc">, DwarfRegAlias<CR0> {
+  let Aliases = [CR0, CR1, CR2, CR3, CR4, CR5, CR6, CR7];
+}
+
 // Link register
 def LR  : SPR<8, "lr">, DwarfRegNum<[-2, 65]>;
 //let Aliases = [LR] in
@@ -234,3 +241,8 @@ def VRSAVERC : RegisterClass<"PPC", [i32], 32, (add VRSAVE)>;
 def CARRYRC : RegisterClass<"PPC", [i32], 32, (add CARRY)> {
   let CopyCost = -1;
 }
+
+def CCRC : RegisterClass<"PPC", [i32], 32, (add CC)> {
+  let isAllocatable = 0;
+}
+
diff --git a/lib/Target/PowerPC/PPCSubtarget.h b/lib/Target/PowerPC/PPCSubtarget.h
index c863a6ecc777..ec8c82ad521c 100644
--- a/lib/Target/PowerPC/PPCSubtarget.h
+++ b/lib/Target/PowerPC/PPCSubtarget.h
@@ -126,22 +126,6 @@ public:
   /// selection.
   const InstrItineraryData &getInstrItineraryData() const { return InstrItins; }
 
-  /// getDataLayoutString - Return the pointer size and type alignment
-  /// properties of this subtarget.
-  const char *getDataLayoutString() const {
-    // Note, the alignment values for f64 and i64 on ppc64 in Darwin
-    // documentation are wrong; these are correct (i.e. "what gcc does").
-    if (isPPC64() && isSVR4ABI()) {
-      if (TargetTriple.getOS() == llvm::Triple::FreeBSD)
-        return "E-p:64:64-f64:64:64-i64:64:64-v128:128:128-n32:64";
-      else
-        return "E-p:64:64-f64:64:64-i64:64:64-f128:128:128-v128:128:128-n32:64";
-    }
-
-    return isPPC64() ? "E-p:64:64-f64:64:64-i64:64:64-f128:64:128-n32:64"
-                     : "E-p:32:32-f64:64:64-i64:64:64-f128:64:128-n32";
-  }
-
   /// \brief Reset the features for the PowerPC target.
   virtual void resetSubtargetFeatures(const MachineFunction *MF);
 private:
diff --git a/lib/Target/PowerPC/PPCTargetMachine.cpp b/lib/Target/PowerPC/PPCTargetMachine.cpp
index 9acefe53ce4a..d6767d51f2cc 100644
--- a/lib/Target/PowerPC/PPCTargetMachine.cpp
+++ b/lib/Target/PowerPC/PPCTargetMachine.cpp
@@ -33,6 +33,43 @@ extern "C" void LLVMInitializePowerPCTarget() {
   RegisterTargetMachine<PPC64TargetMachine> C(ThePPC64LETarget);
 }
 
+/// Return the datalayout string of a subtarget.
+static std::string getDataLayoutString(const PPCSubtarget &ST) {
+  const Triple &T = ST.getTargetTriple();
+
+  // PPC is big endian
+  std::string Ret = "E";
+
+  // PPC64 has 64 bit pointers, PPC32 has 32 bit pointers.
+  if (ST.isPPC64())
+    Ret += "-p:64:64";
+  else
+    Ret += "-p:32:32";
+
+  // Note, the alignment values for f64 and i64 on ppc64 in Darwin
+  // documentation are wrong; these are correct (i.e. "what gcc does").
+  if (ST.isPPC64() || ST.isSVR4ABI())
+    Ret += "-f64:64:64-i64:64:64";
+  else
+    Ret += "-f64:32:64";
+
+  // Set support for 128 floats depending on the ABI.
+  if (!ST.isPPC64() && ST.isSVR4ABI())
+    Ret += "-f128:64:128";
+
+  // Some ABIs support 128 bit vectors.
+  if (ST.isPPC64() && ST.isSVR4ABI())
+    Ret += "-v128:128:128";
+
+  // PPC64 has 32 and 64 bit register, PPC32 has only 32 bit ones.
+  if (ST.isPPC64())
+    Ret += "-n32:64";
+  else
+    Ret += "-n32";
+
+  return Ret;
+}
+
 PPCTargetMachine::PPCTargetMachine(const Target &T, StringRef TT,
                                    StringRef CPU, StringRef FS,
                                    const TargetOptions &Options,
@@ -41,7 +78,7 @@ PPCTargetMachine::PPCTargetMachine(const Target &T, StringRef TT,
                                    bool is64Bit)
   : LLVMTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL),
     Subtarget(TT, CPU, FS, is64Bit),
-    DL(Subtarget.getDataLayoutString()), InstrInfo(*this),
+    DL(getDataLayoutString(Subtarget)), InstrInfo(*this),
     FrameLowering(Subtarget), JITInfo(*this, is64Bit),
     TLInfo(*this), TSInfo(*this),
     InstrItins(Subtarget.getInstrItineraryData()) {
diff --git a/lib/Target/R600/AMDGPUISelLowering.cpp b/lib/Target/R600/AMDGPUISelLowering.cpp
index c4d75ffa0d06..1029f306d632 100644
--- a/lib/Target/R600/AMDGPUISelLowering.cpp
+++ b/lib/Target/R600/AMDGPUISelLowering.cpp
@@ -133,6 +133,8 @@ AMDGPUTargetLowering::AMDGPUTargetLowering(TargetMachine &TM) :
   setLoadExtAction(ISD::SEXTLOAD, MVT::v4i16, Expand);
   setLoadExtAction(ISD::ZEXTLOAD, MVT::v4i16, Expand);
 
+  setOperationAction(ISD::BR_CC, MVT::i1, Expand);
+
   setOperationAction(ISD::FNEG, MVT::v2f32, Expand);
   setOperationAction(ISD::FNEG, MVT::v4f32, Expand);
 
diff --git a/lib/Target/R600/AMDGPUInstructions.td b/lib/Target/R600/AMDGPUInstructions.td
index 3c5375d84ecf..7acd67313eea 100644
--- a/lib/Target/R600/AMDGPUInstructions.td
+++ b/lib/Target/R600/AMDGPUInstructions.td
@@ -388,6 +388,11 @@ class SHA256MaPattern <Instruction BFI_INT, Instruction XOR> : Pat <
 
 // Bitfield extract patterns
 
+/*
+
+XXX: The BFE pattern is not working correctly because the XForm is not being
+applied.
+
 def legalshift32 : ImmLeaf <i32, [{return Imm >=0 && Imm < 32;}]>;
 def bfemask : PatLeaf <(imm), [{return isMask_32(N->getZExtValue());}],
                             SDNodeXForm<imm, [{ return CurDAG->getTargetConstant(CountTrailingOnes_32(N->getZExtValue()), MVT::i32);}]>>;
@@ -397,6 +402,8 @@ class BFEPattern <Instruction BFE> : Pat <
   (BFE $x, $y, $z)
 >;
 
+*/
+
 // rotr pattern
 class ROTRPattern <Instruction BIT_ALIGN> : Pat <
   (rotr i32:$src0, i32:$src1),
diff --git a/lib/Target/R600/MCTargetDesc/AMDGPUMCAsmInfo.cpp b/lib/Target/R600/MCTargetDesc/AMDGPUMCAsmInfo.cpp
index 4a8e1b0b2d86..9b26af7bc736 100644
--- a/lib/Target/R600/MCTargetDesc/AMDGPUMCAsmInfo.cpp
+++ b/lib/Target/R600/MCTargetDesc/AMDGPUMCAsmInfo.cpp
@@ -13,7 +13,6 @@
 using namespace llvm;
 AMDGPUMCAsmInfo::AMDGPUMCAsmInfo(StringRef &TT) : MCAsmInfo() {
   HasSingleParameterDotFile = false;
-  WeakDefDirective = 0;
   //===------------------------------------------------------------------===//
   HasSubsectionsViaSymbols = true;
   HasMachoZeroFillDirective = false;
@@ -58,7 +57,6 @@ AMDGPUMCAsmInfo::AMDGPUMCAsmInfo(StringRef &TT) : MCAsmInfo() {
   HasDotTypeDotSizeDirective = false;
   HasNoDeadStrip = true;
   WeakRefDirective = ".weakref\t";
-  LinkOnceDirective = 0;
   //===--- Dwarf Emission Directives -----------------------------------===//
   HasLEB128 = true;
   SupportsDebugInformation = true;
diff --git a/lib/Target/R600/R600ControlFlowFinalizer.cpp b/lib/Target/R600/R600ControlFlowFinalizer.cpp
index ac3d8f63d57f..2a8276b2214f 100644
--- a/lib/Target/R600/R600ControlFlowFinalizer.cpp
+++ b/lib/Target/R600/R600ControlFlowFinalizer.cpp
@@ -356,6 +356,7 @@ public:
           DEBUG(dbgs() << CfCount << ":"; I->dump(););
           FetchClauses.push_back(MakeFetchClause(MBB, I));
           CfCount++;
+          LastAlu.back() = 0;
           continue;
         }
 
diff --git a/lib/Target/R600/R600InstrInfo.cpp b/lib/Target/R600/R600InstrInfo.cpp
index c0827fc1ca40..2eca6cf43271 100644
--- a/lib/Target/R600/R600InstrInfo.cpp
+++ b/lib/Target/R600/R600InstrInfo.cpp
@@ -716,7 +716,13 @@ R600InstrInfo::AnalyzeBranch(MachineBasicBlock &MBB,
     return false;
   }
 
-  // Get the last instruction in the block.
+  // Remove successive JUMP
+  while (I != MBB.begin() && llvm::prior(I)->getOpcode() == AMDGPU::JUMP) {
+      MachineBasicBlock::iterator PriorI = llvm::prior(I);
+      if (AllowModify)
+        I->removeFromParent();
+      I = PriorI;
+  }
   MachineInstr *LastInst = I;
 
   // If there is only one terminator instruction, process it.
diff --git a/lib/Target/R600/R600Instructions.td b/lib/Target/R600/R600Instructions.td
index 0346e24ab771..74c65daa065c 100644
--- a/lib/Target/R600/R600Instructions.td
+++ b/lib/Target/R600/R600Instructions.td
@@ -1516,7 +1516,9 @@ let Predicates = [isEGorCayman] in {
                                                i32:$src2))],
     VecALU
   >;
-  def : BFEPattern <BFE_UINT_eg>;
+// XXX: This pattern is broken, disabling for now.  See comment in
+// AMDGPUInstructions.td for more info.
+//  def : BFEPattern <BFE_UINT_eg>;
 
   def BFI_INT_eg : R600_3OP <0x06, "BFI_INT", [], VecALU>;
   defm : BFIPatterns <BFI_INT_eg>;
@@ -1636,7 +1638,6 @@ class R600_LDS_1A <bits<6> lds_op, string name, list<dag> pattern> : R600_LDS <
   let src2 = 0;
   let src2_rel = 0;
 
-  let Defs = [OQAP];
   let usesCustomInserter = 1;
   let LDS_1A = 1;
   let DisableEncoding = "$dst";
@@ -1672,7 +1673,6 @@ class R600_LDS_1A1D_RET <bits<6> lds_op, string name, list<dag> pattern> :
   let BaseOp = name;
   let usesCustomInserter = 1;
   let DisableEncoding = "$dst";
-  let Defs = [OQAP];
 }
 
 class R600_LDS_1A2D <bits<6> lds_op, string name, list<dag> pattern> :
diff --git a/lib/Target/R600/SIFixSGPRCopies.cpp b/lib/Target/R600/SIFixSGPRCopies.cpp
index 3370c7955bc7..f0065ea13c5b 100644
--- a/lib/Target/R600/SIFixSGPRCopies.cpp
+++ b/lib/Target/R600/SIFixSGPRCopies.cpp
@@ -187,7 +187,7 @@ bool SIFixSGPRCopies::isVGPRToSGPRCopy(const MachineInstr &Copy,
       DstRC == &AMDGPU::M0RegRegClass)
     return false;
 
-  SrcRC = inferRegClassFromDef(TRI, MRI, SrcReg, SrcSubReg);
+  SrcRC = TRI->getSubRegClass(MRI.getRegClass(SrcReg), SrcSubReg);
   return TRI->isSGPRClass(DstRC) && TRI->hasVGPRs(SrcRC);
 }
 
diff --git a/lib/Target/R600/SIInsertWaits.cpp b/lib/Target/R600/SIInsertWaits.cpp
index 7ef662eb65b1..695ec407fdbe 100644
--- a/lib/Target/R600/SIInsertWaits.cpp
+++ b/lib/Target/R600/SIInsertWaits.cpp
@@ -314,6 +314,12 @@ Counters SIInsertWaits::handleOperands(MachineInstr &MI) {
 
   Counters Result = ZeroCounts;
 
+  // S_SENDMSG implicitly waits for all outstanding LGKM transfers to finish,
+  // but we also want to wait for any other outstanding transfers before
+  // signalling other hardware blocks
+  if (MI.getOpcode() == AMDGPU::S_SENDMSG)
+    return LastIssued;
+
   // For each register affected by this
   // instruction increase the result sequence
   for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
diff --git a/lib/Target/R600/SIInstrInfo.td b/lib/Target/R600/SIInstrInfo.td
index 4cd0daa55c5f..b7879c6eface 100644
--- a/lib/Target/R600/SIInstrInfo.td
+++ b/lib/Target/R600/SIInstrInfo.td
@@ -290,10 +290,10 @@ multiclass VOP2_64 <bits<6> op, string opName, list<dag> pattern,
   : VOP2_Helper <op, VReg_64, VSrc_64, opName, pattern, revOp>;
 
 multiclass VOP2b_32 <bits<6> op, string opName, list<dag> pattern,
-                     string revOp = opName> {
+                     RegisterClass src0_rc, string revOp = opName> {
 
   def _e32 : VOP2 <
-    op, (outs VReg_32:$dst), (ins VSrc_32:$src0, VReg_32:$src1),
+    op, (outs VReg_32:$dst), (ins src0_rc:$src0, VReg_32:$src1),
     opName#"_e32 $dst, $src0, $src1", pattern
   >, VOP <opName>, VOP2_REV<revOp#"_e32", !eq(revOp, opName)>;
 
@@ -425,26 +425,48 @@ class MTBUF_Store_Helper <bits<3> op, string asm, RegisterClass regClass> : MTBU
 
 multiclass MUBUF_Load_Helper <bits<7> op, string asm, RegisterClass regClass> {
 
-  let glc = 0, lds = 0, slc = 0, tfe = 0, soffset = 128 /* ZERO */,
-                                          mayLoad = 1 in {
-
-  let offen = 1, idxen = 0, addr64 = 0, offset = 0 in {
-    def _OFFEN  : MUBUF <op, (outs regClass:$vdata),
-                         (ins SReg_128:$srsrc, VReg_32:$vaddr),
-                         asm#" $vdata, $srsrc + $vaddr", []>;
-  }
-
-  let offen = 0, idxen = 1, addr64 = 0 in {
-    def _IDXEN  : MUBUF <op, (outs regClass:$vdata),
-                         (ins SReg_128:$srsrc, VReg_32:$vaddr, i16imm:$offset),
-                         asm#" $vdata, $srsrc[$vaddr] + $offset", []>;
-  }
+  let lds = 0, mayLoad = 1 in {
+
+    let addr64 = 0 in {
+
+      let offen = 0, idxen = 0 in {
+        def _OFFSET : MUBUF <op, (outs regClass:$vdata),
+                             (ins SReg_128:$srsrc, VReg_32:$vaddr,
+                             i16imm:$offset, SSrc_32:$soffset, i1imm:$glc,
+                             i1imm:$slc, i1imm:$tfe),
+                             asm#" $vdata, $srsrc + $offset + $soffset, glc=$glc, slc=$slc, tfe=$tfe", []>;
+      }
+
+      let offen = 1, idxen = 0, offset = 0 in {
+        def _OFFEN  : MUBUF <op, (outs regClass:$vdata),
+                             (ins SReg_128:$srsrc, VReg_32:$vaddr,
+                             SSrc_32:$soffset, i1imm:$glc, i1imm:$slc,
+                             i1imm:$tfe),
+                             asm#" $vdata, $srsrc + $vaddr + $soffset, glc=$glc, slc=$slc, tfe=$tfe", []>;
+      }
+
+      let offen = 0, idxen = 1 in {
+        def _IDXEN  : MUBUF <op, (outs regClass:$vdata),
+                             (ins SReg_128:$srsrc, VReg_32:$vaddr,
+                             i16imm:$offset, SSrc_32:$soffset, i1imm:$glc,
+                             i1imm:$slc, i1imm:$tfe),
+                             asm#" $vdata, $srsrc[$vaddr] + $offset + $soffset, glc=$glc, slc=$slc, tfe=$tfe", []>;
+      }
+
+      let offen = 1, idxen = 1 in {
+        def _BOTHEN : MUBUF <op, (outs regClass:$vdata),
+                             (ins SReg_128:$srsrc, VReg_64:$vaddr,
+                             SSrc_32:$soffset, i1imm:$glc,
+                             i1imm:$slc, i1imm:$tfe),
+                             asm#" $vdata, $srsrc[$vaddr[0]] + $vaddr[1] + $soffset, glc=$glc, slc=$slc, tfe=$tfe", []>;
+      }
+    }
 
-  let offen = 0, idxen = 0, addr64 = 1 in {
-    def _ADDR64 : MUBUF <op, (outs regClass:$vdata),
-                         (ins SReg_128:$srsrc, VReg_64:$vaddr, i16imm:$offset),
-                         asm#" $vdata, $srsrc + $vaddr + $offset", []>;
-  }
+    let offen = 0, idxen = 0, addr64 = 1, glc = 0, slc = 0, tfe = 0, soffset = 128 /* ZERO */ in {
+      def _ADDR64 : MUBUF <op, (outs regClass:$vdata),
+                           (ins SReg_128:$srsrc, VReg_64:$vaddr, i16imm:$offset),
+                           asm#" $vdata, $srsrc + $vaddr + $offset", []>;
+    }
   }
 }
 
diff --git a/lib/Target/R600/SIInstructions.td b/lib/Target/R600/SIInstructions.td
index 76f05eb49655..2ca6a95978b3 100644
--- a/lib/Target/R600/SIInstructions.td
+++ b/lib/Target/R600/SIInstructions.td
@@ -22,6 +22,8 @@ def InterpSlot : Operand<i32> {
   let PrintMethod = "printInterpSlot";
 }
 
+def SendMsgImm : Operand<i32>;
+
 def isSI : Predicate<"Subtarget.getGeneration() "
                       ">= AMDGPUSubtarget::SOUTHERN_ISLANDS">;
 
@@ -826,17 +828,25 @@ def S_BARRIER : SOPP <0x0000000a, (ins), "S_BARRIER",
 def S_WAITCNT : SOPP <0x0000000c, (ins WAIT_FLAG:$simm16), "S_WAITCNT $simm16",
   []
 >;
-} // End hasSideEffects
 //def S_SETHALT : SOPP_ <0x0000000d, "S_SETHALT", []>;
 //def S_SLEEP : SOPP_ <0x0000000e, "S_SLEEP", []>;
 //def S_SETPRIO : SOPP_ <0x0000000f, "S_SETPRIO", []>;
-//def S_SENDMSG : SOPP_ <0x00000010, "S_SENDMSG", []>;
+
+let Uses = [EXEC] in {
+  def S_SENDMSG : SOPP <0x00000010, (ins SendMsgImm:$simm16, M0Reg:$m0), "S_SENDMSG $simm16",
+      [(int_SI_sendmsg imm:$simm16, M0Reg:$m0)]
+  > {
+    let DisableEncoding = "$m0";
+  }
+} // End Uses = [EXEC]
+
 //def S_SENDMSGHALT : SOPP_ <0x00000011, "S_SENDMSGHALT", []>;
 //def S_TRAP : SOPP_ <0x00000012, "S_TRAP", []>;
 //def S_ICACHE_INV : SOPP_ <0x00000013, "S_ICACHE_INV", []>;
 //def S_INCPERFLEVEL : SOPP_ <0x00000014, "S_INCPERFLEVEL", []>;
 //def S_DECPERFLEVEL : SOPP_ <0x00000015, "S_DECPERFLEVEL", []>;
 //def S_TTRACEDATA : SOPP_ <0x00000016, "S_TTRACEDATA", []>;
+} // End hasSideEffects
 
 def V_CNDMASK_B32_e32 : VOP2 <0x00000000, (outs VReg_32:$dst),
   (ins VSrc_32:$src0, VReg_32:$src1, VCCReg:$vcc),
@@ -979,14 +989,16 @@ defm V_MBCNT_HI_U32_B32 : VOP2_32 <0x00000024, "V_MBCNT_HI_U32_B32", []>;
 let isCommutable = 1, Defs = [VCC] in { // Carry-out goes to VCC
 // No patterns so that the scalar instructions are always selected.
 // The scalar versions will be replaced with vector when needed later.
-defm V_ADD_I32 : VOP2b_32 <0x00000025, "V_ADD_I32", []>;
-defm V_SUB_I32 : VOP2b_32 <0x00000026, "V_SUB_I32", []>;
-defm V_SUBREV_I32 : VOP2b_32 <0x00000027, "V_SUBREV_I32", [], "V_SUB_I32">;
+defm V_ADD_I32 : VOP2b_32 <0x00000025, "V_ADD_I32", [], VSrc_32>;
+defm V_SUB_I32 : VOP2b_32 <0x00000026, "V_SUB_I32", [], VSrc_32>;
+defm V_SUBREV_I32 : VOP2b_32 <0x00000027, "V_SUBREV_I32", [], VSrc_32,
+                              "V_SUB_I32">;
 
 let Uses = [VCC] in { // Carry-in comes from VCC
-defm V_ADDC_U32 : VOP2b_32 <0x00000028, "V_ADDC_U32", []>;
-defm V_SUBB_U32 : VOP2b_32 <0x00000029, "V_SUBB_U32", []>;
-defm V_SUBBREV_U32 : VOP2b_32 <0x0000002a, "V_SUBBREV_U32", [], "V_SUBB_U32">;
+defm V_ADDC_U32 : VOP2b_32 <0x00000028, "V_ADDC_U32", [], VReg_32>;
+defm V_SUBB_U32 : VOP2b_32 <0x00000029, "V_SUBB_U32", [], VReg_32>;
+defm V_SUBBREV_U32 : VOP2b_32 <0x0000002a, "V_SUBBREV_U32", [], VReg_32,
+                               "V_SUBB_U32">;
 } // End Uses = [VCC]
 } // End isCommutable = 1, Defs = [VCC]
 
@@ -1403,7 +1415,7 @@ def : Pat <
 /* int_SI_vs_load_input */
 def : Pat<
   (SIload_input i128:$tlst, IMM12bit:$attr_offset, i32:$buf_idx_vgpr),
-  (BUFFER_LOAD_FORMAT_XYZW_IDXEN $tlst, $buf_idx_vgpr, imm:$attr_offset)
+  (BUFFER_LOAD_FORMAT_XYZW_IDXEN $tlst, $buf_idx_vgpr, imm:$attr_offset, 0, 0, 0, 0)
 >;
 
 /* int_SI_export */
@@ -1658,16 +1670,30 @@ def : Pat <
    0 /* ABS */, 1 /* CLAMP */, 0 /* OMOD */, 0 /* NEG */)
 >;
 
+/********** ================================ **********/
+/********** Floating point absolute/negative **********/
+/********** ================================ **********/
+
+// Manipulate the sign bit directly, as e.g. using the source negation modifier
+// in V_ADD_F32_e64 $src, 0, [...] does not result in -0.0 for $src == +0.0,
+// breaking the piglit *s-floatBitsToInt-neg* tests
+
+// TODO: Look into not implementing isFNegFree/isFAbsFree for SI, and possibly
+// removing these patterns
+
+def : Pat <
+  (fneg (fabs f32:$src)),
+  (V_OR_B32_e32 $src, (V_MOV_B32_e32 0x80000000)) /* Set sign bit */
+>;
+
 def : Pat <
   (fabs f32:$src),
-  (V_ADD_F32_e64 $src, (i32 0 /* SRC1 */),
-   1 /* ABS */, 0 /* CLAMP */, 0 /* OMOD */, 0 /* NEG */)
+  (V_AND_B32_e32 $src, (V_MOV_B32_e32 0x7fffffff)) /* Clear sign bit */
 >;
 
 def : Pat <
   (fneg f32:$src),
-  (V_ADD_F32_e64 $src, (i32 0 /* SRC1 */),
-   0 /* ABS */, 0 /* CLAMP */, 0 /* OMOD */, 1 /* NEG */)
+  (V_XOR_B32_e32 $src, (V_MOV_B32_e32 0x80000000)) /* Toggle sign bit */
 >;
 
 /********** ================== **********/
@@ -1794,6 +1820,11 @@ def : Pat <
   (V_CNDMASK_B32_e64 (i32 0), (i32 -1), $src0)
 >;
 
+def : Pat <
+  (i32 (zext i1:$src0)),
+  (V_CNDMASK_B32_e64 (i32 0), (i32 1), $src0)
+>;
+
 // 1. Offset as 8bit DWORD immediate
 def : Pat <
   (SIload_constant i128:$sbase, IMM8bitDWORD:$offset),
@@ -1809,7 +1840,7 @@ def : Pat <
 // 3. Offset in an 32Bit VGPR
 def : Pat <
   (SIload_constant i128:$sbase, i32:$voff),
-  (BUFFER_LOAD_DWORD_OFFEN $sbase, $voff)
+  (BUFFER_LOAD_DWORD_OFFEN $sbase, $voff, 0, 0, 0, 0)
 >;
 
 // The multiplication scales from [0,1] to the unsigned integer range
@@ -1970,6 +2001,50 @@ defm : MUBUFStore_Pattern <BUFFER_STORE_DWORDX2, i64, global_store>;
 defm : MUBUFStore_Pattern <BUFFER_STORE_DWORDX2, v2i32, global_store>;
 defm : MUBUFStore_Pattern <BUFFER_STORE_DWORDX4, v4i32, global_store>;
 
+// BUFFER_LOAD_DWORD*, addr64=0
+multiclass MUBUF_Load_Dword <ValueType vt, MUBUF offset, MUBUF offen, MUBUF idxen,
+                             MUBUF bothen> {
+
+  def : Pat <
+    (vt (int_SI_buffer_load_dword i128:$rsrc, i32:$vaddr, i32:$soffset,
+                                  imm:$offset, 0, 0, imm:$glc, imm:$slc,
+                                  imm:$tfe)),
+    (offset $rsrc, $vaddr, (as_i16imm $offset), $soffset, (as_i1imm $glc),
+            (as_i1imm $slc), (as_i1imm $tfe))
+  >;
+
+  def : Pat <
+    (vt (int_SI_buffer_load_dword i128:$rsrc, i32:$vaddr, i32:$soffset,
+                                  imm, 1, 0, imm:$glc, imm:$slc,
+                                  imm:$tfe)),
+    (offen $rsrc, $vaddr, $soffset, (as_i1imm $glc), (as_i1imm $slc),
+           (as_i1imm $tfe))
+  >;
+
+  def : Pat <
+    (vt (int_SI_buffer_load_dword i128:$rsrc, i32:$vaddr, i32:$soffset,
+                                  imm:$offset, 0, 1, imm:$glc, imm:$slc,
+                                  imm:$tfe)),
+    (idxen $rsrc, $vaddr, (as_i16imm $offset), $soffset, (as_i1imm $glc),
+           (as_i1imm $slc), (as_i1imm $tfe))
+  >;
+
+  def : Pat <
+    (vt (int_SI_buffer_load_dword i128:$rsrc, v2i32:$vaddr, i32:$soffset,
+                                  imm, 1, 1, imm:$glc, imm:$slc,
+                                  imm:$tfe)),
+    (bothen $rsrc, $vaddr, $soffset, (as_i1imm $glc), (as_i1imm $slc),
+            (as_i1imm $tfe))
+  >;
+}
+
+defm : MUBUF_Load_Dword <i32, BUFFER_LOAD_DWORD_OFFSET, BUFFER_LOAD_DWORD_OFFEN,
+                         BUFFER_LOAD_DWORD_IDXEN, BUFFER_LOAD_DWORD_BOTHEN>;
+defm : MUBUF_Load_Dword <v2i32, BUFFER_LOAD_DWORDX2_OFFSET, BUFFER_LOAD_DWORDX2_OFFEN,
+                         BUFFER_LOAD_DWORDX2_IDXEN, BUFFER_LOAD_DWORDX2_BOTHEN>;
+defm : MUBUF_Load_Dword <v4i32, BUFFER_LOAD_DWORDX4_OFFSET, BUFFER_LOAD_DWORDX4_OFFEN,
+                         BUFFER_LOAD_DWORDX4_IDXEN, BUFFER_LOAD_DWORDX4_BOTHEN>;
+
 //===----------------------------------------------------------------------===//
 // MTBUF Patterns
 //===----------------------------------------------------------------------===//
@@ -2057,6 +2132,11 @@ def : Pat <
   (EXTRACT_SUBREG $a, sub0)
 >;
 
+def : Pat <
+  (i1 (trunc i32:$a)),
+  (V_CMP_EQ_I32_e64 (V_AND_B32_e32 (i32 1), $a), 1)
+>;
+
 // V_ADD_I32_e32/S_ADD_I32 produces carry in VCC/SCC. For the vector
 // case, the sgpr-copies pass will fix this to use the vector version.
 def : Pat <
diff --git a/lib/Target/R600/SIIntrinsics.td b/lib/Target/R600/SIIntrinsics.td
index 7fcc96452114..00e32c03a99e 100644
--- a/lib/Target/R600/SIIntrinsics.td
+++ b/lib/Target/R600/SIIntrinsics.td
@@ -38,6 +38,22 @@ let TargetPrefix = "SI", isTarget = 1 in {
      llvm_i32_ty],   // tfe(imm)
     []>;
 
+  // Fully-flexible BUFFER_LOAD_DWORD_* except for the ADDR64 bit, which is not exposed
+  def int_SI_buffer_load_dword : Intrinsic <
+    [llvm_anyint_ty], // vdata(VGPR), overloaded for types i32, v2i32, v4i32
+    [llvm_anyint_ty,  // rsrc(SGPR)
+     llvm_anyint_ty,  // vaddr(VGPR)
+     llvm_i32_ty,     // soffset(SGPR)
+     llvm_i32_ty,     // inst_offset(imm)
+     llvm_i32_ty,     // offen(imm)
+     llvm_i32_ty,     // idxen(imm)
+     llvm_i32_ty,     // glc(imm)
+     llvm_i32_ty,     // slc(imm)
+     llvm_i32_ty],    // tfe(imm)
+    [IntrReadArgMem]>;
+
+  def int_SI_sendmsg : Intrinsic <[], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
+
   class Sample : Intrinsic <[llvm_v4f32_ty], [llvm_anyvector_ty, llvm_v32i8_ty, llvm_anyint_ty, llvm_i32_ty], [IntrNoMem]>;
 
   def int_SI_sample : Sample;
diff --git a/lib/Target/R600/SILowerControlFlow.cpp b/lib/Target/R600/SILowerControlFlow.cpp
index 958763dffc22..ef867d36692d 100644
--- a/lib/Target/R600/SILowerControlFlow.cpp
+++ b/lib/Target/R600/SILowerControlFlow.cpp
@@ -109,6 +109,23 @@ FunctionPass *llvm::createSILowerControlFlowPass(TargetMachine &tm) {
   return new SILowerControlFlowPass(tm);
 }
 
+static bool isDS(unsigned Opcode) {
+  switch(Opcode) {
+  default: return false;
+  case AMDGPU::DS_ADD_U32_RTN:
+  case AMDGPU::DS_SUB_U32_RTN:
+  case AMDGPU::DS_WRITE_B32:
+  case AMDGPU::DS_WRITE_B8:
+  case AMDGPU::DS_WRITE_B16:
+  case AMDGPU::DS_READ_B32:
+  case AMDGPU::DS_READ_I8:
+  case AMDGPU::DS_READ_U8:
+  case AMDGPU::DS_READ_I16:
+  case AMDGPU::DS_READ_U16:
+    return true;
+  }
+}
+
 bool SILowerControlFlowPass::shouldSkip(MachineBasicBlock *From,
                                         MachineBasicBlock *To) {
 
@@ -145,7 +162,9 @@ void SILowerControlFlowPass::SkipIfDead(MachineInstr &MI) {
   MachineBasicBlock &MBB = *MI.getParent();
   DebugLoc DL = MI.getDebugLoc();
 
-  if (!shouldSkip(&MBB, &MBB.getParent()->back()))
+  if (MBB.getParent()->getInfo<SIMachineFunctionInfo>()->ShaderType !=
+      ShaderType::PIXEL ||
+      !shouldSkip(&MBB, &MBB.getParent()->back()))
     return;
 
   MachineBasicBlock::iterator Insert = &MI;
@@ -296,9 +315,11 @@ void SILowerControlFlowPass::Kill(MachineInstr &MI) {
   MachineBasicBlock &MBB = *MI.getParent();
   DebugLoc DL = MI.getDebugLoc();
 
-  // Kill is only allowed in pixel shaders
+  // Kill is only allowed in pixel / geometry shaders
   assert(MBB.getParent()->getInfo<SIMachineFunctionInfo>()->ShaderType ==
-         ShaderType::PIXEL);
+         ShaderType::PIXEL ||
+         MBB.getParent()->getInfo<SIMachineFunctionInfo>()->ShaderType ==
+         ShaderType::GEOMETRY);
 
   // Clear this pixel from the exec mask if the operand is negative
   BuildMI(MBB, &MI, DL, TII->get(AMDGPU::V_CMPX_LE_F32_e32), AMDGPU::VCC)
@@ -431,6 +452,11 @@ bool SILowerControlFlowPass::runOnMachineFunction(MachineFunction &MF) {
 
       Next = llvm::next(I);
       MachineInstr &MI = *I;
+      if (isDS(MI.getOpcode())) {
+        NeedM0 = true;
+        NeedWQM = true;
+      }
+
       switch (MI.getOpcode()) {
         default: break;
         case AMDGPU::SI_IF:
@@ -491,14 +517,6 @@ bool SILowerControlFlowPass::runOnMachineFunction(MachineFunction &MF) {
           IndirectDst(MI);
           break;
 
-        case AMDGPU::DS_READ_B32:
-          NeedWQM = true;
-          // Fall through
-        case AMDGPU::DS_WRITE_B32:
-        case AMDGPU::DS_ADD_U32_RTN:
-          NeedM0 = true;
-          break;
-
         case AMDGPU::V_INTERP_P1_F32:
         case AMDGPU::V_INTERP_P2_F32:
         case AMDGPU::V_INTERP_MOV_F32:
@@ -517,7 +535,7 @@ bool SILowerControlFlowPass::runOnMachineFunction(MachineFunction &MF) {
             AMDGPU::M0).addImm(0xffffffff);
   }
 
-  if (NeedWQM && MFI->ShaderType != ShaderType::COMPUTE) {
+  if (NeedWQM && MFI->ShaderType == ShaderType::PIXEL) {
     MachineBasicBlock &MBB = MF.front();
     BuildMI(MBB, MBB.getFirstNonPHI(), DebugLoc(), TII->get(AMDGPU::S_WQM_B64),
             AMDGPU::EXEC).addReg(AMDGPU::EXEC);
diff --git a/lib/Target/X86/AsmParser/X86AsmParser.cpp b/lib/Target/X86/AsmParser/X86AsmParser.cpp
index bc8f367e9255..22b79b3b2844 100644
--- a/lib/Target/X86/AsmParser/X86AsmParser.cpp
+++ b/lib/Target/X86/AsmParser/X86AsmParser.cpp
@@ -1181,16 +1181,23 @@ X86AsmParser::CreateMemForInlineAsm(unsigned SegReg, const MCExpr *Disp,
                                     unsigned Scale, SMLoc Start, SMLoc End,
                                     unsigned Size, StringRef Identifier,
                                     InlineAsmIdentifierInfo &Info){
-  if (isa<MCSymbolRefExpr>(Disp)) {
-    // If this is not a VarDecl then assume it is a FuncDecl or some other label
-    // reference.  We need an 'r' constraint here, so we need to create register
-    // operand to ensure proper matching.  Just pick a GPR based on the size of
-    // a pointer.
-    if (!Info.IsVarDecl) {
-      unsigned RegNo = is64BitMode() ? X86::RBX : X86::EBX;
-      return X86Operand::CreateReg(RegNo, Start, End, /*AddressOf=*/true,
-                                   SMLoc(), Identifier, Info.OpDecl);
-    }
+  // If this is not a VarDecl then assume it is a FuncDecl or some other label
+  // reference.  We need an 'r' constraint here, so we need to create register
+  // operand to ensure proper matching.  Just pick a GPR based on the size of
+  // a pointer.
+  if (isa<MCSymbolRefExpr>(Disp) && !Info.IsVarDecl) {
+    unsigned RegNo = is64BitMode() ? X86::RBX : X86::EBX;
+    return X86Operand::CreateReg(RegNo, Start, End, /*AddressOf=*/true,
+                                 SMLoc(), Identifier, Info.OpDecl);
+  }
+
+  // We either have a direct symbol reference, or an offset from a symbol.  The
+  // parser always puts the symbol on the LHS, so look there for size
+  // calculation purposes.
+  const MCBinaryExpr *BinOp = dyn_cast<MCBinaryExpr>(Disp);
+  bool IsSymRef =
+      isa<MCSymbolRefExpr>(BinOp ? BinOp->getLHS() : Disp);
+  if (IsSymRef) {
     if (!Size) {
       Size = Info.Type * 8; // Size is in terms of bits in this context.
       if (Size)
@@ -1312,10 +1319,15 @@ bool X86AsmParser::ParseIntelExpression(IntelExprStateMachine &SM, SMLoc &End) {
           if (getParser().parsePrimaryExpr(Val, End))
             return Error(Tok.getLoc(), "Unexpected identifier!");
         } else {
-          InlineAsmIdentifierInfo &Info = SM.getIdentifierInfo();
-          if (ParseIntelIdentifier(Val, Identifier, Info,
-                                   /*Unevaluated=*/false, End))
-            return true;
+          // This is a dot operator, not an adjacent identifier.
+          if (Identifier.find('.') != StringRef::npos) {
+            return false;
+          } else {
+            InlineAsmIdentifierInfo &Info = SM.getIdentifierInfo();
+            if (ParseIntelIdentifier(Val, Identifier, Info,
+                                     /*Unevaluated=*/false, End))
+              return true;
+          }
         }
         SM.onIdentifierExpr(Val, Identifier);
         UpdateLocLex = false;
@@ -1366,7 +1378,7 @@ X86Operand *X86AsmParser::ParseIntelBracExpression(unsigned SegReg, SMLoc Start,
   if (ParseIntelExpression(SM, End))
     return 0;
 
-  const MCExpr *Disp;
+  const MCExpr *Disp = 0;
   if (const MCExpr *Sym = SM.getSym()) {
     // A symbolic displacement.
     Disp = Sym;
@@ -1374,13 +1386,20 @@ X86Operand *X86AsmParser::ParseIntelBracExpression(unsigned SegReg, SMLoc Start,
       RewriteIntelBracExpression(InstInfo->AsmRewrites, SM.getSymName(),
                                  ImmDisp, SM.getImm(), BracLoc, StartInBrac,
                                  End);
-  } else {
-    // An immediate displacement only.   
-    Disp = MCConstantExpr::Create(SM.getImm(), getContext());
   }
 
-  // Parse the dot operator (e.g., [ebx].foo.bar).
-  if (Tok.getString().startswith(".")) {
+  if (SM.getImm() || !Disp) {
+    const MCExpr *Imm = MCConstantExpr::Create(SM.getImm(), getContext());
+    if (Disp)
+      Disp = MCBinaryExpr::CreateAdd(Disp, Imm, getContext());
+    else
+      Disp = Imm;  // An immediate displacement only.
+  }
+
+  // Parse struct field access.  Intel requires a dot, but MSVC doesn't.  MSVC
+  // will in fact do global lookup the field name inside all global typedefs,
+  // but we don't emulate that.
+  if (Tok.getString().find('.') != StringRef::npos) {
     const MCExpr *NewDisp;
     if (ParseIntelDotOperator(Disp, NewDisp))
       return 0;
@@ -1532,8 +1551,10 @@ bool X86AsmParser::ParseIntelDotOperator(const MCExpr *Disp,
   else
     return Error(Tok.getLoc(), "Non-constant offsets are not supported!");
 
-  // Drop the '.'.
-  StringRef DotDispStr = Tok.getString().drop_front(1);
+  // Drop the optional '.'.
+  StringRef DotDispStr = Tok.getString();
+  if (DotDispStr.startswith("."))
+    DotDispStr = DotDispStr.drop_front(1);
 
   // .Imm gets lexed as a real.
   if (Tok.is(AsmToken::Real)) {
diff --git a/lib/Target/X86/Disassembler/X86DisassemblerDecoder.c b/lib/Target/X86/Disassembler/X86DisassemblerDecoder.c
index c81a85755f82..16ee0d357b77 100644
--- a/lib/Target/X86/Disassembler/X86DisassemblerDecoder.c
+++ b/lib/Target/X86/Disassembler/X86DisassemblerDecoder.c
@@ -1065,6 +1065,7 @@ static int readSIB(struct InternalInstruction* insn) {
 
   switch (base) {
   case 0x5:
+  case 0xd:
     switch (modFromModRM(insn->modRM)) {
     case 0x0:
       insn->eaDisplacement = EA_DISP_32;
@@ -1072,13 +1073,11 @@ static int readSIB(struct InternalInstruction* insn) {
       break;
     case 0x1:
       insn->eaDisplacement = EA_DISP_8;
-      insn->sibBase = (insn->addressSize == 4 ?
-                       SIB_BASE_EBP : SIB_BASE_RBP);
+      insn->sibBase = (SIBBase)(sibBaseBase + base);
       break;
     case 0x2:
       insn->eaDisplacement = EA_DISP_32;
-      insn->sibBase = (insn->addressSize == 4 ?
-                       SIB_BASE_EBP : SIB_BASE_RBP);
+      insn->sibBase = (SIBBase)(sibBaseBase + base);
       break;
     case 0x3:
       debug("Cannot have Mod = 0b11 and a SIB byte");
diff --git a/lib/Target/X86/MCTargetDesc/X86MCAsmInfo.cpp b/lib/Target/X86/MCTargetDesc/X86MCAsmInfo.cpp
index 3861e1ce290a..8d2b5954ef20 100644
--- a/lib/Target/X86/MCTargetDesc/X86MCAsmInfo.cpp
+++ b/lib/Target/X86/MCTargetDesc/X86MCAsmInfo.cpp
@@ -65,6 +65,17 @@ X86MCAsmInfoDarwin::X86MCAsmInfoDarwin(const Triple &T) {
 
   // Exceptions handling
   ExceptionsType = ExceptionHandling::DwarfCFI;
+
+  // FIXME: this should not depend on the target OS version, but on the ld64
+  // version in use.  From at least >= ld64-97.17 (Xcode 3.2.6) the abs-ified
+  // FDE relocs may be used.
+  DwarfFDESymbolsUseAbsDiff = T.isMacOSX() && !T.isMacOSXVersionLT(10, 6);
+
+  // old assembler lacks some directives
+  // FIXME: this should really be a check on the assembler characteristics
+  // rather than OS version
+  if (T.isMacOSX() && T.isMacOSXVersionLT(10, 6))
+    HasWeakDefCanBeHiddenDirective = false;
 }
 
 X86_64MCAsmInfoDarwin::X86_64MCAsmInfoDarwin(const Triple &Triple)
diff --git a/lib/Target/X86/X86AsmPrinter.cpp b/lib/Target/X86/X86AsmPrinter.cpp
index 12584411509d..1f5f91844f80 100644
--- a/lib/Target/X86/X86AsmPrinter.cpp
+++ b/lib/Target/X86/X86AsmPrinter.cpp
@@ -393,9 +393,11 @@ bool X86AsmPrinter::printAsmMRegister(const MachineOperand &MO, char Mode,
   case 'k': // Print SImode register
     Reg = getX86SubSuperRegister(Reg, MVT::i32);
     break;
-  case 'q': // Print DImode register
-    // FIXME: gcc will actually print e instead of r for 32-bit.
-    Reg = getX86SubSuperRegister(Reg, MVT::i64);
+  case 'q':
+    // Print 64-bit register names if 64-bit integer registers are available.
+    // Otherwise, print 32-bit register names.
+    MVT::SimpleValueType Ty = Subtarget->is64Bit() ? MVT::i64 : MVT::i32;
+    Reg = getX86SubSuperRegister(Reg, Ty);
     break;
   }
 
diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp
index 76eeb64650ba..716c146811a1 100644
--- a/lib/Target/X86/X86ISelLowering.cpp
+++ b/lib/Target/X86/X86ISelLowering.cpp
@@ -15226,9 +15226,15 @@ X86TargetLowering::EmitVAStartSaveXMMRegsWithCustomInserter(
     MBB->addSuccessor(EndMBB);
   }
 
+  // Make sure the last operand is EFLAGS, which gets clobbered by the branch
+  // that was just emitted, but clearly shouldn't be "saved".
+  assert((MI->getNumOperands() <= 3 ||
+          !MI->getOperand(MI->getNumOperands() - 1).isReg() ||
+          MI->getOperand(MI->getNumOperands() - 1).getReg() == X86::EFLAGS)
+         && "Expected last argument to be EFLAGS");
   unsigned MOVOpc = Subtarget->hasFp256() ? X86::VMOVAPSmr : X86::MOVAPSmr;
   // In the XMM save block, save all the XMM argument registers.
-  for (int i = 3, e = MI->getNumOperands(); i != e; ++i) {
+  for (int i = 3, e = MI->getNumOperands() - 1; i != e; ++i) {
     int64_t Offset = (i - 3) * 16 + VarArgsFPOffset;
     MachineMemOperand *MMO =
       F->getMachineMemOperand(
@@ -17577,12 +17583,30 @@ static SDValue CMPEQCombine(SDNode *N, SelectionDAG &DAG,
           // FIXME: need symbolic constants for these magic numbers.
           // See X86ATTInstPrinter.cpp:printSSECC().
           unsigned x86cc = (cc0 == X86::COND_E) ? 0 : 4;
-          SDValue OnesOrZeroesF = DAG.getNode(NTOperator, DL, MVT::f32, CMP00, CMP01,
+          SDValue OnesOrZeroesF = DAG.getNode(NTOperator, DL, CMP00.getValueType(),
+                                              CMP00, CMP01,
                                               DAG.getConstant(x86cc, MVT::i8));
-          SDValue OnesOrZeroesI = DAG.getNode(ISD::BITCAST, DL, MVT::i32,
-                                              OnesOrZeroesF);
-          SDValue ANDed = DAG.getNode(ISD::AND, DL, MVT::i32, OnesOrZeroesI,
-                                      DAG.getConstant(1, MVT::i32));
+
+          MVT IntVT = is64BitFP ? MVT::i64 : MVT::i32;
+
+          if (is64BitFP && !Subtarget->is64Bit()) {
+            // On a 32-bit target, we cannot bitcast the 64-bit float to a
+            // 64-bit integer, since that's not a legal type. Since
+            // OnesOrZeroesF is all ones of all zeroes, we don't need all the
+            // bits, but can do this little dance to extract the lowest 32 bits
+            // and work with those going forward.
+            SDValue Vector64 = DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, MVT::v2f64,
+                                           OnesOrZeroesF);
+            SDValue Vector32 = DAG.getNode(ISD::BITCAST, DL, MVT::v4f32,
+                                           Vector64);
+            OnesOrZeroesF = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32,
+                                        Vector32, DAG.getIntPtrConstant(0));
+            IntVT = MVT::i32;
+          }
+
+          SDValue OnesOrZeroesI = DAG.getNode(ISD::BITCAST, DL, IntVT, OnesOrZeroesF);
+          SDValue ANDed = DAG.getNode(ISD::AND, DL, IntVT, OnesOrZeroesI,
+                                      DAG.getConstant(1, IntVT));
           SDValue OneBitOfTruth = DAG.getNode(ISD::TRUNCATE, DL, MVT::i8, ANDed);
           return OneBitOfTruth;
         }
diff --git a/lib/Target/X86/X86InstrCompiler.td b/lib/Target/X86/X86InstrCompiler.td
index 7d10b67bfe6d..5c8840823b16 100644
--- a/lib/Target/X86/X86InstrCompiler.td
+++ b/lib/Target/X86/X86InstrCompiler.td
@@ -72,7 +72,7 @@ def ADJCALLSTACKUP64   : I<0, Pseudo, (outs), (ins i32imm:$amt1, i32imm:$amt2),
 
 
 // x86-64 va_start lowering magic.
-let usesCustomInserter = 1 in {
+let usesCustomInserter = 1, Defs = [EFLAGS] in {
 def VASTART_SAVE_XMM_REGS : I<0, Pseudo,
                               (outs),
                               (ins GR8:$al,
@@ -81,7 +81,8 @@ def VASTART_SAVE_XMM_REGS : I<0, Pseudo,
                               "#VASTART_SAVE_XMM_REGS $al, $regsavefi, $offset",
                               [(X86vastart_save_xmm_regs GR8:$al,
                                                          imm:$regsavefi,
-                                                         imm:$offset)]>;
+                                                         imm:$offset),
+                               (implicit EFLAGS)]>;
 
 // The VAARG_64 pseudo-instruction takes the address of the va_list,
 // and places the address of the next argument into a register.
diff --git a/lib/Transforms/InstCombine/InstCombineVectorOps.cpp b/lib/Transforms/InstCombine/InstCombineVectorOps.cpp
index 1e724106991a..30290ee7a799 100644
--- a/lib/Transforms/InstCombine/InstCombineVectorOps.cpp
+++ b/lib/Transforms/InstCombine/InstCombineVectorOps.cpp
@@ -25,11 +25,13 @@ static bool CheapToScalarize(Value *V, bool isConstant) {
     if (isConstant) return true;
 
     // If all elts are the same, we can extract it and use any of the values.
-    Constant *Op0 = C->getAggregateElement(0U);
-    for (unsigned i = 1, e = V->getType()->getVectorNumElements(); i != e; ++i)
-      if (C->getAggregateElement(i) != Op0)
-        return false;
-    return true;
+    if (Constant *Op0 = C->getAggregateElement(0U)) {
+      for (unsigned i = 1, e = V->getType()->getVectorNumElements(); i != e;
+           ++i)
+        if (C->getAggregateElement(i) != Op0)
+          return false;
+      return true;
+    }
   }
   Instruction *I = dyn_cast<Instruction>(V);
   if (!I) return false;
diff --git a/lib/Transforms/Scalar/LoopRerollPass.cpp b/lib/Transforms/Scalar/LoopRerollPass.cpp
index 335af81b957a..643bc78f6e58 100644
--- a/lib/Transforms/Scalar/LoopRerollPass.cpp
+++ b/lib/Transforms/Scalar/LoopRerollPass.cpp
@@ -1088,9 +1088,8 @@ bool LoopReroll::reroll(Instruction *IV, Loop *L, BasicBlock *Header,
                            L, SCEV::FlagAnyWrap));
   { // Limit the lifetime of SCEVExpander.
     SCEVExpander Expander(*SE, "reroll");
-    PHINode *NewIV =
-      cast<PHINode>(Expander.expandCodeFor(H, IV->getType(),
-                                           Header->begin()));
+    Value *NewIV = Expander.expandCodeFor(H, IV->getType(), Header->begin());
+
     for (DenseSet<Instruction *>::iterator J = BaseUseSet.begin(),
          JE = BaseUseSet.end(); J != JE; ++J)
       (*J)->replaceUsesOfWith(IV, NewIV);
@@ -1101,20 +1100,23 @@ bool LoopReroll::reroll(Instruction *IV, Loop *L, BasicBlock *Header,
         if (Inc == 1)
           ICSCEV =
             SE->getMulExpr(ICSCEV, SE->getConstant(ICSCEV->getType(), Scale));
-        Value *IC;
-        if (isa<SCEVConstant>(ICSCEV)) {
-          IC = Expander.expandCodeFor(ICSCEV, NewIV->getType(), BI);
+        // Iteration count SCEV minus 1
+        const SCEV *ICMinus1SCEV =
+          SE->getMinusSCEV(ICSCEV, SE->getConstant(ICSCEV->getType(), 1));
+
+        Value *ICMinus1; // Iteration count minus 1
+        if (isa<SCEVConstant>(ICMinus1SCEV)) {
+          ICMinus1 = Expander.expandCodeFor(ICMinus1SCEV, NewIV->getType(), BI);
         } else {
           BasicBlock *Preheader = L->getLoopPreheader();
           if (!Preheader)
             Preheader = InsertPreheaderForLoop(L, this);
 
-          IC = Expander.expandCodeFor(ICSCEV, NewIV->getType(),
-                                      Preheader->getTerminator());
+          ICMinus1 = Expander.expandCodeFor(ICMinus1SCEV, NewIV->getType(),
+                                            Preheader->getTerminator());
         }
  
-        Value *NewIVNext = NewIV->getIncomingValueForBlock(Header); 
-        Value *Cond = new ICmpInst(BI, CmpInst::ICMP_EQ, NewIVNext, IC,
+        Value *Cond = new ICmpInst(BI, CmpInst::ICMP_EQ, NewIV, ICMinus1,
                                    "exitcond");
         BI->setCondition(Cond);
 
diff --git a/lib/Transforms/Scalar/LoopStrengthReduce.cpp b/lib/Transforms/Scalar/LoopStrengthReduce.cpp
index eff5268c448a..6133962e42d7 100644
--- a/lib/Transforms/Scalar/LoopStrengthReduce.cpp
+++ b/lib/Transforms/Scalar/LoopStrengthReduce.cpp
@@ -3390,6 +3390,10 @@ void LSRInstance::GenerateICmpZeroScales(LSRUse &LU, unsigned LUIdx,
     int64_t NewBaseOffset = (uint64_t)Base.BaseOffset * Factor;
     if (NewBaseOffset / Factor != Base.BaseOffset)
       continue;
+    // If the offset will be truncated at this use, check that it is in bounds.
+    if (!IntTy->isPointerTy() &&
+        !ConstantInt::isValueValidForType(IntTy, NewBaseOffset))
+      continue;
 
     // Check that multiplying with the use offset doesn't overflow.
     int64_t Offset = LU.MinOffset;
@@ -3398,6 +3402,10 @@ void LSRInstance::GenerateICmpZeroScales(LSRUse &LU, unsigned LUIdx,
     Offset = (uint64_t)Offset * Factor;
     if (Offset / Factor != LU.MinOffset)
       continue;
+    // If the offset will be truncated at this use, check that it is in bounds.
+    if (!IntTy->isPointerTy() &&
+        !ConstantInt::isValueValidForType(IntTy, Offset))
+      continue;
 
     Formula F = Base;
     F.BaseOffset = NewBaseOffset;
@@ -3432,6 +3440,10 @@ void LSRInstance::GenerateICmpZeroScales(LSRUse &LU, unsigned LUIdx,
       F.UnfoldedOffset = (uint64_t)F.UnfoldedOffset * Factor;
       if (F.UnfoldedOffset / Factor != Base.UnfoldedOffset)
         continue;
+      // If the offset will be truncated, check that it is in bounds.
+      if (!IntTy->isPointerTy() &&
+          !ConstantInt::isValueValidForType(IntTy, F.UnfoldedOffset))
+        continue;
     }
 
     // If we make it here and it's legal, add it.
diff --git a/lib/Transforms/Utils/LCSSA.cpp b/lib/Transforms/Utils/LCSSA.cpp
index f15e8d59276b..97e7e5d95783 100644
--- a/lib/Transforms/Utils/LCSSA.cpp
+++ b/lib/Transforms/Utils/LCSSA.cpp
@@ -32,6 +32,7 @@
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/Statistic.h"
 #include "llvm/Analysis/Dominators.h"
+#include "llvm/Analysis/AliasAnalysis.h"
 #include "llvm/Analysis/LoopPass.h"
 #include "llvm/Analysis/ScalarEvolution.h"
 #include "llvm/IR/Constants.h"
@@ -70,6 +71,7 @@ namespace {
       AU.addRequired<DominatorTree>();
       AU.addRequired<LoopInfo>();
       AU.addPreservedID(LoopSimplifyID);
+      AU.addPreserved<AliasAnalysis>();
       AU.addPreserved<ScalarEvolution>();
     }
   private:
diff --git a/lib/Transforms/Vectorize/LoopVectorize.cpp b/lib/Transforms/Vectorize/LoopVectorize.cpp
index 5e758713ed19..f9f6b18940d3 100644
--- a/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -4191,13 +4191,22 @@ bool LoopVectorizationLegality::AddReductionVar(PHINode *Phi,
         continue;
       }
 
-      // Process instructions only once (termination).
+      // Process instructions only once (termination). Each reduction cycle
+      // value must only be used once, except by phi nodes and min/max
+      // reductions which are represented as a cmp followed by a select.
+      ReductionInstDesc IgnoredVal(false, 0);
       if (VisitedInsts.insert(Usr)) {
         if (isa<PHINode>(Usr))
           PHIs.push_back(Usr);
         else
           NonPHIs.push_back(Usr);
-      }
+      } else if (!isa<PHINode>(Usr) &&
+                 ((!isa<FCmpInst>(Usr) &&
+                   !isa<ICmpInst>(Usr) &&
+                   !isa<SelectInst>(Usr)) ||
+                  !isMinMaxSelectCmpPattern(Usr, IgnoredVal).IsReduction))
+        return false;
+
       // Remember that we completed the cycle.
       if (Usr == Phi)
         FoundStartPHI = true;
diff --git a/test/Analysis/BasicAA/noalias-bugs.ll b/test/Analysis/BasicAA/noalias-bugs.ll
new file mode 100644
index 000000000000..c02a302c1950
--- /dev/null
+++ b/test/Analysis/BasicAA/noalias-bugs.ll
@@ -0,0 +1,33 @@
+; RUN: opt -S -basicaa -dse < %s | FileCheck %s
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
+target triple = "x86_64-unknown-linux-gnu"
+
+; We incorrectly returned noalias in the example below for "ptr.64" and
+; "either_ptr.64".
+; PR18460
+
+%nested = type { %nested.i64 }
+%nested.i64 = type { i64 }
+
+define i64 @testcase(%nested * noalias %p1, %nested * noalias %p2,
+                     i32 %a, i32 %b) {
+  %ptr = getelementptr inbounds %nested* %p1, i64 -1, i32 0
+  %ptr.64 = getelementptr inbounds %nested.i64* %ptr, i64 0, i32 0
+  %ptr2= getelementptr inbounds %nested* %p2, i64 0, i32 0
+  %cmp = icmp ult i32 %a, %b
+  %either_ptr = select i1 %cmp, %nested.i64* %ptr2, %nested.i64* %ptr
+  %either_ptr.64 = getelementptr inbounds %nested.i64* %either_ptr, i64 0, i32 0
+
+; Because either_ptr.64 and ptr.64 can alias (we used to return noalias)
+; elimination of the first store is not valid.
+
+; CHECK: store i64 2
+; CHECK: load
+; CHECK; store i64 1
+
+  store i64 2, i64* %ptr.64, align 8
+  %r = load i64* %either_ptr.64, align 8
+  store i64 1, i64* %ptr.64, align 8
+  ret i64 %r
+}
diff --git a/test/Analysis/BasicAA/phi-aa.ll b/test/Analysis/BasicAA/phi-aa.ll
index 6aa26c185e0f..74279e1c4c93 100644
--- a/test/Analysis/BasicAA/phi-aa.ll
+++ b/test/Analysis/BasicAA/phi-aa.ll
@@ -1,10 +1,14 @@
 ; RUN: opt < %s -basicaa -aa-eval -print-all-alias-modref-info -disable-output 2>&1 | FileCheck %s
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
 ; rdar://7282591
 
 @X = common global i32 0
 @Y = common global i32 0
 @Z = common global i32 0
 
+; CHECK-LABEL: foo
 ; CHECK:  NoAlias: i32* %P, i32* @Z
 
 define void @foo(i32 %cond) nounwind {
@@ -29,3 +33,46 @@ bb2:
 return:
   ret void
 }
+
+; Pointers can vary in between iterations of loops.
+; PR18068
+
+; CHECK-LABEL: pr18068
+; CHECK: MayAlias: i32* %0, i32* %arrayidx5
+
+define i32 @pr18068(i32* %jj7, i32* %j) {
+entry:
+  %oa5 = alloca [100 x i32], align 16
+  br label %codeRepl
+
+codeRepl:
+  %0 = phi i32* [ %arrayidx13, %for.body ], [ %j, %entry ]
+  %targetBlock = call i1 @cond(i32* %jj7)
+  br i1 %targetBlock, label %for.body, label %bye
+
+for.body:
+  %1 = load i32* %jj7, align 4
+  %idxprom4 = zext i32 %1 to i64
+  %arrayidx5 = getelementptr inbounds [100 x i32]* %oa5, i64 0, i64 %idxprom4
+  %2 = load i32* %arrayidx5, align 4
+  %sub6 = sub i32 %2, 6
+  store i32 %sub6, i32* %arrayidx5, align 4
+  ; %0 and %arrayidx5 can alias! It is not safe to DSE the above store.
+  %3 = load i32* %0, align 4
+  store i32 %3, i32* %arrayidx5, align 4
+  %sub11 = add i32 %1, -1
+  %idxprom12 = zext i32 %sub11 to i64
+  %arrayidx13 = getelementptr inbounds [100 x i32]* %oa5, i64 0, i64 %idxprom12
+  call void @inc(i32* %jj7)
+  br label %codeRepl
+
+bye:
+  %.reload = load i32* %jj7, align 4
+  ret i32 %.reload
+}
+
+declare i1 @cond(i32*)
+
+declare void @inc(i32*)
+
+
diff --git a/test/Analysis/ScalarEvolution/zext-signed-addrec.ll b/test/Analysis/ScalarEvolution/zext-signed-addrec.ll
new file mode 100644
index 000000000000..27aed3b0da1b
--- /dev/null
+++ b/test/Analysis/ScalarEvolution/zext-signed-addrec.ll
@@ -0,0 +1,81 @@
+; RUN: opt -loop-reduce -S < %s | FileCheck %s
+; PR18000
+
+target datalayout = "e-i64:64-f80:128-s:64-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+@a = global i32 0, align 4
+@b = common global i32 0, align 4
+@e = common global i8 0, align 1
+@d = common global i32 0, align 4
+@c = common global i32 0, align 4
+@.str = private unnamed_addr constant [4 x i8] c"%d\0A\00", align 1
+
+; Function Attrs: nounwind optsize uwtable
+; CHECK-LABEL: foo
+define i32 @foo() {
+entry:
+  %.pr = load i32* @b, align 4
+  %cmp10 = icmp slt i32 %.pr, 1
+  br i1 %cmp10, label %for.cond1.preheader.lr.ph, label %entry.for.end9_crit_edge
+
+entry.for.end9_crit_edge:                         ; preds = %entry
+  %.pre = load i32* @c, align 4
+  br label %for.end9
+
+for.cond1.preheader.lr.ph:                        ; preds = %entry
+  %0 = load i32* @a, align 4
+  %tobool = icmp eq i32 %0, 0
+  br i1 %tobool, label %for.cond1.preheader.for.cond1.preheader.split_crit_edge, label %return.loopexit.split
+
+for.cond1.preheader.for.cond1.preheader.split_crit_edge: ; preds = %for.cond1.preheader.lr.ph, %for.inc8
+  %1 = phi i32 [ %inc, %for.inc8 ], [ %.pr, %for.cond1.preheader.lr.ph ]
+  br label %if.end
+
+; CHECK-LABEL: if.end
+if.end:                                           ; preds = %if.end, %for.cond1.preheader.for.cond1.preheader.split_crit_edge
+
+; CHECK: %lsr.iv = phi i32 [ %lsr.iv.next, %if.end ], [ 258, %for.cond1.preheader.for.cond1.preheader.split_crit_edge ]
+  %indvars.iv = phi i32 [ 1, %for.cond1.preheader.for.cond1.preheader.split_crit_edge ], [ %indvars.iv.next, %if.end ]
+
+  %2 = phi i8 [ 1, %for.cond1.preheader.for.cond1.preheader.split_crit_edge ], [ %dec, %if.end ]
+  %conv7 = mul i32 %indvars.iv, 258
+  %shl = and i32 %conv7, 510
+  store i32 %shl, i32* @c, align 4
+
+; CHECK: %lsr.iv.next = add i32 %lsr.iv, -258
+  %dec = add i8 %2, -1
+
+  %cmp2 = icmp sgt i8 %dec, -1
+  %indvars.iv.next = add i32 %indvars.iv, -1
+  br i1 %cmp2, label %if.end, label %for.inc8
+
+for.inc8:                                         ; preds = %if.end
+  store i32 0, i32* @d, align 4
+  %inc = add nsw i32 %1, 1
+  store i32 %inc, i32* @b, align 4
+  %cmp = icmp slt i32 %1, 0
+  br i1 %cmp, label %for.cond1.preheader.for.cond1.preheader.split_crit_edge, label %for.cond.for.end9_crit_edge
+
+for.cond.for.end9_crit_edge:                      ; preds = %for.inc8
+  store i8 %dec, i8* @e, align 1
+  br label %for.end9
+
+for.end9:                                         ; preds = %entry.for.end9_crit_edge, %for.cond.for.end9_crit_edge
+  %3 = phi i32 [ %.pre, %entry.for.end9_crit_edge ], [ %shl, %for.cond.for.end9_crit_edge ]
+  %call = tail call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([4 x i8]* @.str, i64 0, i64 0), i32 %3) #2
+  br label %return
+
+return.loopexit.split:                            ; preds = %for.cond1.preheader.lr.ph
+  store i8 1, i8* @e, align 1
+  store i32 0, i32* @d, align 4
+  br label %return
+
+return:                                           ; preds = %return.loopexit.split, %for.end9
+  %retval.0 = phi i32 [ 0, %for.end9 ], [ 1, %return.loopexit.split ]
+  ret i32 %retval.0
+}
+
+; Function Attrs: nounwind optsize
+declare i32 @printf(i8* nocapture readonly, ...)
+
diff --git a/test/CodeGen/AArch64/atomic-ops.ll b/test/CodeGen/AArch64/atomic-ops.ll
index de84ff46ec3b..5857faf80a16 100644
--- a/test/CodeGen/AArch64/atomic-ops.ll
+++ b/test/CodeGen/AArch64/atomic-ops.ll
@@ -1,4 +1,5 @@
 ; RUN: llc -mtriple=aarch64-none-linux-gnu -verify-machineinstrs < %s | FileCheck %s
+; RUN: llc -mtriple=aarch64-none-linux-gnu -verify-machineinstrs < %s | FileCheck --check-prefix=CHECK-REG %s
 
 @var8 = global i8 0
 @var16 = global i16 0
@@ -17,6 +18,8 @@ define i8 @test_atomic_load_add_i8(i8 %offset) nounwind {
   ; w0 below is a reasonable guess but could change: it certainly comes into the
   ;  function there.
 ; CHECK-NEXT: add [[NEW:w[0-9]+]], w[[OLD]], w0
+; CHECK-REG: add w[[NEW:[0-9]+]], w{{[0-9]+}}, w0
+; CHECK-REG-NOT: stlxrb w[[NEW]], w[[NEW]], [x{{[0-9]+}}]
 ; CHECK-NEXT: stlxrb [[STATUS:w[0-9]+]], [[NEW]], [x[[ADDR]]]
 ; CHECK-NEXT: cbnz [[STATUS]], .LBB{{[0-9]+}}_1
 ; CHECK-NOT: dmb
@@ -37,6 +40,8 @@ define i16 @test_atomic_load_add_i16(i16 %offset) nounwind {
   ; w0 below is a reasonable guess but could change: it certainly comes into the
   ;  function there.
 ; CHECK-NEXT: add [[NEW:w[0-9]+]], w[[OLD]], w0
+; CHECK-REG: add w[[NEW:[0-9]+]], w{{[0-9]+}}, w0
+; CHECK-REG-NOT: stxrh w[[NEW]], w[[NEW]], [x{{[0-9]+}}]
 ; CHECK-NEXT: stxrh [[STATUS:w[0-9]+]], [[NEW]], [x[[ADDR]]]
 ; CHECK-NEXT: cbnz [[STATUS]], .LBB{{[0-9]+}}_1
 ; CHECK-NOT: dmb
@@ -57,6 +62,8 @@ define i32 @test_atomic_load_add_i32(i32 %offset) nounwind {
   ; w0 below is a reasonable guess but could change: it certainly comes into the
   ;  function there.
 ; CHECK-NEXT: add [[NEW:w[0-9]+]], w[[OLD]], w0
+; CHECK-REG: add w[[NEW:[0-9]+]], w{{[0-9]+}}, w0
+; CHECK-REG-NOT: stlxr w[[NEW]], w[[NEW]], [x{{[0-9]+}}]
 ; CHECK-NEXT: stlxr [[STATUS:w[0-9]+]], [[NEW]], [x[[ADDR]]]
 ; CHECK-NEXT: cbnz [[STATUS]], .LBB{{[0-9]+}}_1
 ; CHECK-NOT: dmb
@@ -77,6 +84,8 @@ define i64 @test_atomic_load_add_i64(i64 %offset) nounwind {
   ; x0 below is a reasonable guess but could change: it certainly comes into the
   ; function there.
 ; CHECK-NEXT: add [[NEW:x[0-9]+]], x[[OLD]], x0
+; CHECK-REG: add x[[NEW:[0-9]+]], x{{[0-9]+}}, x0
+; CHECK-REG-NOT: stxr w[[NEW]], x[[NEW]], [x{{[0-9]+}}]
 ; CHECK-NEXT: stxr [[STATUS:w[0-9]+]], [[NEW]], [x[[ADDR]]]
 ; CHECK-NEXT: cbnz [[STATUS]], .LBB{{[0-9]+}}_1
 ; CHECK-NOT: dmb
@@ -97,6 +106,8 @@ define i8 @test_atomic_load_sub_i8(i8 %offset) nounwind {
   ; w0 below is a reasonable guess but could change: it certainly comes into the
   ;  function there.
 ; CHECK-NEXT: sub [[NEW:w[0-9]+]], w[[OLD]], w0
+; CHECK-REG: sub w[[NEW:[0-9]+]], w{{[0-9]+}}, w0
+; CHECK-REG-NOT: stxrb w[[NEW]], w[[NEW]], [x{{[0-9]+}}]
 ; CHECK-NEXT: stxrb [[STATUS:w[0-9]+]], [[NEW]], [x[[ADDR]]]
 ; CHECK-NEXT: cbnz [[STATUS]], .LBB{{[0-9]+}}_1
 ; CHECK-NOT: dmb
@@ -117,6 +128,8 @@ define i16 @test_atomic_load_sub_i16(i16 %offset) nounwind {
   ; w0 below is a reasonable guess but could change: it certainly comes into the
   ;  function there.
 ; CHECK-NEXT: sub [[NEW:w[0-9]+]], w[[OLD]], w0
+; CHECK-REG: sub w[[NEW:[0-9]+]], w{{[0-9]+}}, w0
+; CHECK-REG-NOT: stlxrh w[[NEW]], w[[NEW]], [x{{[0-9]+}}]
 ; CHECK-NEXT: stlxrh [[STATUS:w[0-9]+]], [[NEW]], [x[[ADDR]]]
 ; CHECK-NEXT: cbnz [[STATUS]], .LBB{{[0-9]+}}_1
 ; CHECK-NOT: dmb
@@ -137,6 +150,8 @@ define i32 @test_atomic_load_sub_i32(i32 %offset) nounwind {
   ; w0 below is a reasonable guess but could change: it certainly comes into the
   ;  function there.
 ; CHECK-NEXT: sub [[NEW:w[0-9]+]], w[[OLD]], w0
+; CHECK-REG: sub w[[NEW:[0-9]+]], w{{[0-9]+}}, w0
+; CHECK-REG-NOT: stxr w[[NEW]], w[[NEW]], [x{{[0-9]+}}]
 ; CHECK-NEXT: stxr [[STATUS:w[0-9]+]], [[NEW]], [x[[ADDR]]]
 ; CHECK-NEXT: cbnz [[STATUS]], .LBB{{[0-9]+}}_1
 ; CHECK-NOT: dmb
@@ -157,6 +172,8 @@ define i64 @test_atomic_load_sub_i64(i64 %offset) nounwind {
   ; x0 below is a reasonable guess but could change: it certainly comes into the
   ; function there.
 ; CHECK-NEXT: sub [[NEW:x[0-9]+]], x[[OLD]], x0
+; CHECK-REG: sub x[[NEW:[0-9]+]], x{{[0-9]+}}, x0
+; CHECK-REG-NOT: stlxr w[[NEW]], x[[NEW]], [x{{[0-9]+}}]
 ; CHECK-NEXT: stlxr [[STATUS:w[0-9]+]], [[NEW]], [x[[ADDR]]]
 ; CHECK-NEXT: cbnz [[STATUS]], .LBB{{[0-9]+}}_1
 ; CHECK-NOT: dmb
@@ -177,6 +194,8 @@ define i8 @test_atomic_load_and_i8(i8 %offset) nounwind {
   ; w0 below is a reasonable guess but could change: it certainly comes into the
   ;  function there.
 ; CHECK-NEXT: and [[NEW:w[0-9]+]], w[[OLD]], w0
+; CHECK-REG: and w[[NEW:[0-9]+]], w{{[0-9]+}}, w0
+; CHECK-REG-NOT: stlxrb w[[NEW]], w[[NEW]], [x{{[0-9]+}}]
 ; CHECK-NEXT: stlxrb [[STATUS:w[0-9]+]], [[NEW]], [x[[ADDR]]]
 ; CHECK-NEXT: cbnz [[STATUS]], .LBB{{[0-9]+}}_1
 ; CHECK-NOT: dmb
@@ -197,6 +216,8 @@ define i16 @test_atomic_load_and_i16(i16 %offset) nounwind {
   ; w0 below is a reasonable guess but could change: it certainly comes into the
   ;  function there.
 ; CHECK-NEXT: and [[NEW:w[0-9]+]], w[[OLD]], w0
+; CHECK-REG: and w[[NEW:[0-9]+]], w{{[0-9]+}}, w0
+; CHECK-REG-NOT: stxrh w[[NEW]], w[[NEW]], [x{{[0-9]+}}]
 ; CHECK-NEXT: stxrh [[STATUS:w[0-9]+]], [[NEW]], [x[[ADDR]]]
 ; CHECK-NEXT: cbnz [[STATUS]], .LBB{{[0-9]+}}_1
 ; CHECK-NOT: dmb
@@ -217,6 +238,8 @@ define i32 @test_atomic_load_and_i32(i32 %offset) nounwind {
   ; w0 below is a reasonable guess but could change: it certainly comes into the
   ;  function there.
 ; CHECK-NEXT: and [[NEW:w[0-9]+]], w[[OLD]], w0
+; CHECK-REG: and w[[NEW:[0-9]+]], w{{[0-9]+}}, w0
+; CHECK-REG-NOT: stlxr w[[NEW]], w[[NEW]], [x{{[0-9]+}}]
 ; CHECK-NEXT: stlxr [[STATUS:w[0-9]+]], [[NEW]], [x[[ADDR]]]
 ; CHECK-NEXT: cbnz [[STATUS]], .LBB{{[0-9]+}}_1
 ; CHECK-NOT: dmb
@@ -237,6 +260,8 @@ define i64 @test_atomic_load_and_i64(i64 %offset) nounwind {
   ; x0 below is a reasonable guess but could change: it certainly comes into the
   ; function there.
 ; CHECK-NEXT: and [[NEW:x[0-9]+]], x[[OLD]], x0
+; CHECK-REG: and x[[NEW:[0-9]+]], x{{[0-9]+}}, x0
+; CHECK-REG-NOT: stxr w[[NEW]], x[[NEW]], [x{{[0-9]+}}]
 ; CHECK-NEXT: stxr [[STATUS:w[0-9]+]], [[NEW]], [x[[ADDR]]]
 ; CHECK-NEXT: cbnz [[STATUS]], .LBB{{[0-9]+}}_1
 ; CHECK-NOT: dmb
@@ -257,6 +282,8 @@ define i8 @test_atomic_load_or_i8(i8 %offset) nounwind {
   ; w0 below is a reasonable guess but could change: it certainly comes into the
   ;  function there.
 ; CHECK-NEXT: orr [[NEW:w[0-9]+]], w[[OLD]], w0
+; CHECK-REG: orr w[[NEW:[0-9]+]], w{{[0-9]+}}, w0
+; CHECK-REG-NOT: stlxrb w[[NEW]], w[[NEW]], [x{{[0-9]+}}]
 ; CHECK-NEXT: stlxrb [[STATUS:w[0-9]+]], [[NEW]], [x[[ADDR]]]
 ; CHECK-NEXT: cbnz [[STATUS]], .LBB{{[0-9]+}}_1
 ; CHECK-NOT: dmb
@@ -277,6 +304,8 @@ define i16 @test_atomic_load_or_i16(i16 %offset) nounwind {
   ; w0 below is a reasonable guess but could change: it certainly comes into the
   ;  function there.
 ; CHECK-NEXT: orr [[NEW:w[0-9]+]], w[[OLD]], w0
+; CHECK-REG: orr w[[NEW:[0-9]+]], w{{[0-9]+}}, w0
+; CHECK-REG-NOT: stxrh w[[NEW]], w[[NEW]], [x{{[0-9]+}}]
 ; CHECK-NEXT: stxrh [[STATUS:w[0-9]+]], [[NEW]], [x[[ADDR]]]
 ; CHECK-NEXT: cbnz [[STATUS]], .LBB{{[0-9]+}}_1
 ; CHECK-NOT: dmb
@@ -297,6 +326,8 @@ define i32 @test_atomic_load_or_i32(i32 %offset) nounwind {
   ; w0 below is a reasonable guess but could change: it certainly comes into the
   ;  function there.
 ; CHECK-NEXT: orr [[NEW:w[0-9]+]], w[[OLD]], w0
+; CHECK-REG: orr w[[NEW:[0-9]+]], w{{[0-9]+}}, w0
+; CHECK-REG-NOT: stxr w[[NEW]], w[[NEW]], [x{{[0-9]+}}]
 ; CHECK-NEXT: stxr [[STATUS:w[0-9]+]], [[NEW]], [x[[ADDR]]]
 ; CHECK-NEXT: cbnz [[STATUS]], .LBB{{[0-9]+}}_1
 ; CHECK-NOT: dmb
@@ -317,6 +348,8 @@ define i64 @test_atomic_load_or_i64(i64 %offset) nounwind {
   ; x0 below is a reasonable guess but could change: it certainly comes into the
   ; function there.
 ; CHECK-NEXT: orr [[NEW:x[0-9]+]], x[[OLD]], x0
+; CHECK-REG: orr x[[NEW:[0-9]+]], x{{[0-9]+}}, x0
+; CHECK-REG-NOT: stlxr w[[NEW]], x[[NEW]], [x{{[0-9]+}}]
 ; CHECK-NEXT: stlxr [[STATUS:w[0-9]+]], [[NEW]], [x[[ADDR]]]
 ; CHECK-NEXT: cbnz [[STATUS]], .LBB{{[0-9]+}}_1
 ; CHECK-NOT: dmb
@@ -337,6 +370,8 @@ define i8 @test_atomic_load_xor_i8(i8 %offset) nounwind {
   ; w0 below is a reasonable guess but could change: it certainly comes into the
   ;  function there.
 ; CHECK-NEXT: eor [[NEW:w[0-9]+]], w[[OLD]], w0
+; CHECK-REG: eor w[[NEW:[0-9]+]], w{{[0-9]+}}, w0
+; CHECK-REG-NOT: stxrb w[[NEW]], w[[NEW]], [x{{[0-9]+}}]
 ; CHECK-NEXT: stxrb [[STATUS:w[0-9]+]], [[NEW]], [x[[ADDR]]]
 ; CHECK-NEXT: cbnz [[STATUS]], .LBB{{[0-9]+}}_1
 ; CHECK-NOT: dmb
@@ -357,6 +392,8 @@ define i16 @test_atomic_load_xor_i16(i16 %offset) nounwind {
   ; w0 below is a reasonable guess but could change: it certainly comes into the
   ;  function there.
 ; CHECK-NEXT: eor [[NEW:w[0-9]+]], w[[OLD]], w0
+; CHECK-REG: eor w[[NEW:[0-9]+]], w{{[0-9]+}}, w0
+; CHECK-REG-NOT: stxrh w[[NEW]], w[[NEW]], [x{{[0-9]+}}]
 ; CHECK-NEXT: stlxrh [[STATUS:w[0-9]+]], [[NEW]], [x[[ADDR]]]
 ; CHECK-NEXT: cbnz [[STATUS]], .LBB{{[0-9]+}}_1
 ; CHECK-NOT: dmb
@@ -377,6 +414,8 @@ define i32 @test_atomic_load_xor_i32(i32 %offset) nounwind {
   ; w0 below is a reasonable guess but could change: it certainly comes into the
   ;  function there.
 ; CHECK-NEXT: eor [[NEW:w[0-9]+]], w[[OLD]], w0
+; CHECK-REG: eor w[[NEW:[0-9]+]], w{{[0-9]+}}, w0
+; CHECK-REG-NOT: stlxr w[[NEW]], w[[NEW]], [x{{[0-9]+}}]
 ; CHECK-NEXT: stlxr [[STATUS:w[0-9]+]], [[NEW]], [x[[ADDR]]]
 ; CHECK-NEXT: cbnz [[STATUS]], .LBB{{[0-9]+}}_1
 ; CHECK-NOT: dmb
@@ -397,6 +436,8 @@ define i64 @test_atomic_load_xor_i64(i64 %offset) nounwind {
   ; x0 below is a reasonable guess but could change: it certainly comes into the
   ; function there.
 ; CHECK-NEXT: eor [[NEW:x[0-9]+]], x[[OLD]], x0
+; CHECK-REG: eor x[[NEW:[0-9]+]], x{{[0-9]+}}, x0
+; CHECK-REG-NOT: stxr w[[NEW]], x[[NEW]], [x{{[0-9]+}}]
 ; CHECK-NEXT: stxr [[STATUS:w[0-9]+]], [[NEW]], [x[[ADDR]]]
 ; CHECK-NEXT: cbnz [[STATUS]], .LBB{{[0-9]+}}_1
 ; CHECK-NOT: dmb
@@ -416,6 +457,7 @@ define i8 @test_atomic_load_xchg_i8(i8 %offset) nounwind {
 ; CHECK-NEXT: ldxrb w[[OLD:[0-9]+]], [x[[ADDR]]]
   ; w0 below is a reasonable guess but could change: it certainly comes into the
   ; function there.
+; CHECK-REG-NOT: stxrb w0, w0, [x{{[0-9]+}}]
 ; CHECK-NEXT: stxrb [[STATUS:w[0-9]+]], w0, [x[[ADDR]]]
 ; CHECK-NEXT: cbnz [[STATUS]], .LBB{{[0-9]+}}_1
 ; CHECK-NOT: dmb
@@ -435,6 +477,7 @@ define i16 @test_atomic_load_xchg_i16(i16 %offset) nounwind {
 ; CHECK-NEXT: ldaxrh w[[OLD:[0-9]+]], [x[[ADDR]]]
   ; w0 below is a reasonable guess but could change: it certainly comes into the
   ; function there.
+; CHECK-REG-NOT: stlxrh w0, w0, [x{{[0-9]+}}]
 ; CHECK-NEXT: stlxrh [[STATUS:w[0-9]+]], w0, [x[[ADDR]]]
 ; CHECK-NEXT: cbnz [[STATUS]], .LBB{{[0-9]+}}_1
 ; CHECK-NOT: dmb
@@ -454,6 +497,7 @@ define i32 @test_atomic_load_xchg_i32(i32 %offset) nounwind {
 ; CHECK-NEXT: ldxr w[[OLD:[0-9]+]], [x[[ADDR]]]
   ; w0 below is a reasonable guess but could change: it certainly comes into the
   ;  function there.
+; CHECK-REG-NOT: stlxr w0, w0, [x{{[0-9]+}}]
 ; CHECK-NEXT: stlxr [[STATUS:w[0-9]+]], w0, [x[[ADDR]]]
 ; CHECK-NEXT: cbnz [[STATUS]], .LBB{{[0-9]+}}_1
 ; CHECK-NOT: dmb
@@ -473,6 +517,7 @@ define i64 @test_atomic_load_xchg_i64(i64 %offset) nounwind {
 ; CHECK-NEXT: ldaxr x[[OLD:[0-9]+]], [x[[ADDR]]]
   ; x0 below is a reasonable guess but could change: it certainly comes into the
   ; function there.
+; CHECK-REG-NOT: stxr w0, x0, [x{{[0-9]+}}]
 ; CHECK-NEXT: stxr [[STATUS:w[0-9]+]], x0, [x[[ADDR]]]
 ; CHECK-NEXT: cbnz [[STATUS]], .LBB{{[0-9]+}}_1
 ; CHECK-NOT: dmb
@@ -495,6 +540,8 @@ define i8 @test_atomic_load_min_i8(i8 %offset) nounwind {
   ;  function there.
 ; CHECK-NEXT: cmp w0, w[[OLD]], sxtb
 ; CHECK-NEXT: csel [[NEW:w[0-9]+]], w[[OLD]], w0, gt
+; CHECK-REG: csel w[[NEW:[0-9]+]], w{{[0-9]+}}, w0, gt
+; CHECK-REG-NOT: stxrb w[[NEW]], w[[NEW]], [x{{[0-9]+}}]
 ; CHECK-NEXT: stxrb [[STATUS:w[0-9]+]], [[NEW]], [x[[ADDR]]]
 ; CHECK-NEXT: cbnz [[STATUS]], .LBB{{[0-9]+}}_1
 ; CHECK-NOT: dmb
@@ -516,6 +563,8 @@ define i16 @test_atomic_load_min_i16(i16 %offset) nounwind {
   ;  function there.
 ; CHECK-NEXT: cmp w0, w[[OLD]], sxth
 ; CHECK-NEXT: csel [[NEW:w[0-9]+]], w[[OLD]], w0, gt
+; CHECK-REG: csel w[[NEW:[0-9]+]], w{{[0-9]+}}, w0, gt
+; CHECK-REG-NOT: stlxrh w[[NEW]], w[[NEW]], [x{{[0-9]+}}]
 ; CHECK-NEXT: stlxrh [[STATUS:w[0-9]+]], [[NEW]], [x[[ADDR]]]
 ; CHECK-NEXT: cbnz [[STATUS]], .LBB{{[0-9]+}}_1
 ; CHECK-NOT: dmb
@@ -537,6 +586,8 @@ define i32 @test_atomic_load_min_i32(i32 %offset) nounwind {
   ;  function there.
 ; CHECK-NEXT: cmp w0, w[[OLD]]
 ; CHECK-NEXT: csel [[NEW:w[0-9]+]], w[[OLD]], w0, gt
+; CHECK-REG: csel w[[NEW:[0-9]+]], w{{[0-9]+}}, w0, gt
+; CHECK-REG-NOT: stxr w[[NEW]], w[[NEW]], [x{{[0-9]+}}]
 ; CHECK-NEXT: stxr [[STATUS:w[0-9]+]], [[NEW]], [x[[ADDR]]]
 ; CHECK-NEXT: cbnz [[STATUS]], .LBB{{[0-9]+}}_1
 ; CHECK-NOT: dmb
@@ -558,6 +609,8 @@ define i64 @test_atomic_load_min_i64(i64 %offset) nounwind {
   ; function there.
 ; CHECK-NEXT: cmp x0, x[[OLD]]
 ; CHECK-NEXT: csel [[NEW:x[0-9]+]], x[[OLD]], x0, gt
+; CHECK-REG: csel x[[NEW:[0-9]+]], x{{[0-9]+}}, x0, gt
+; CHECK-REG-NOT: stlxr w[[NEW]], x[[NEW]], [x{{[0-9]+}}]
 ; CHECK-NEXT: stlxr [[STATUS:w[0-9]+]], [[NEW]], [x[[ADDR]]]
 ; CHECK-NEXT: cbnz [[STATUS]], .LBB{{[0-9]+}}_1
 ; CHECK-NOT: dmb
@@ -579,6 +632,8 @@ define i8 @test_atomic_load_max_i8(i8 %offset) nounwind {
   ;  function there.
 ; CHECK-NEXT: cmp w0, w[[OLD]], sxtb
 ; CHECK-NEXT: csel [[NEW:w[0-9]+]], w[[OLD]], w0, lt
+; CHECK-REG: csel w[[NEW:[0-9]+]], w{{[0-9]+}}, w0, lt
+; CHECK-REG-NOT: stlxrb w[[NEW]], w[[NEW]], [x{{[0-9]+}}]
 ; CHECK-NEXT: stlxrb [[STATUS:w[0-9]+]], [[NEW]], [x[[ADDR]]]
 ; CHECK-NEXT: cbnz [[STATUS]], .LBB{{[0-9]+}}_1
 ; CHECK-NOT: dmb
@@ -600,6 +655,8 @@ define i16 @test_atomic_load_max_i16(i16 %offset) nounwind {
   ;  function there.
 ; CHECK-NEXT: cmp w0, w[[OLD]], sxth
 ; CHECK-NEXT: csel [[NEW:w[0-9]+]], w[[OLD]], w0, lt
+; CHECK-REG: csel w[[NEW:[0-9]+]], w{{[0-9]+}}, w0, lt
+; CHECK-REG-NOT: stxrh w[[NEW]], w[[NEW]], [x{{[0-9]+}}]
 ; CHECK-NEXT: stxrh [[STATUS:w[0-9]+]], [[NEW]], [x[[ADDR]]]
 ; CHECK-NEXT: cbnz [[STATUS]], .LBB{{[0-9]+}}_1
 ; CHECK-NOT: dmb
@@ -621,6 +678,8 @@ define i32 @test_atomic_load_max_i32(i32 %offset) nounwind {
   ;  function there.
 ; CHECK-NEXT: cmp w0, w[[OLD]]
 ; CHECK-NEXT: csel [[NEW:w[0-9]+]], w[[OLD]], w0, lt
+; CHECK-REG: csel w[[NEW:[0-9]+]], w{{[0-9]+}}, w0, lt
+; CHECK-REG-NOT: stlxr w[[NEW]], w[[NEW]], [x{{[0-9]+}}]
 ; CHECK-NEXT: stlxr [[STATUS:w[0-9]+]], [[NEW]], [x[[ADDR]]]
 ; CHECK-NEXT: cbnz [[STATUS]], .LBB{{[0-9]+}}_1
 ; CHECK-NOT: dmb
@@ -642,6 +701,8 @@ define i64 @test_atomic_load_max_i64(i64 %offset) nounwind {
   ; function there.
 ; CHECK-NEXT: cmp x0, x[[OLD]]
 ; CHECK-NEXT: csel [[NEW:x[0-9]+]], x[[OLD]], x0, lt
+; CHECK-REG: csel x[[NEW:[0-9]+]], x{{[0-9]+}}, x0, lt
+; CHECK-REG-NOT: stlxr w[[NEW]], x[[NEW]], [x{{[0-9]+}}]
 ; CHECK-NEXT: stxr [[STATUS:w[0-9]+]], [[NEW]], [x[[ADDR]]]
 ; CHECK-NEXT: cbnz [[STATUS]], .LBB{{[0-9]+}}_1
 ; CHECK-NOT: dmb
@@ -663,6 +724,8 @@ define i8 @test_atomic_load_umin_i8(i8 %offset) nounwind {
   ;  function there.
 ; CHECK-NEXT: cmp w0, w[[OLD]], uxtb
 ; CHECK-NEXT: csel [[NEW:w[0-9]+]], w[[OLD]], w0, hi
+; CHECK-REG: csel w[[NEW:[0-9]+]], w{{[0-9]+}}, w0, hi
+; CHECK-REG-NOT: stlxr w[[NEW]], w[[NEW]], [x{{[0-9]+}}]
 ; CHECK-NEXT: stxrb [[STATUS:w[0-9]+]], [[NEW]], [x[[ADDR]]]
 ; CHECK-NEXT: cbnz [[STATUS]], .LBB{{[0-9]+}}_1
 ; CHECK-NOT: dmb
@@ -684,6 +747,8 @@ define i16 @test_atomic_load_umin_i16(i16 %offset) nounwind {
   ;  function there.
 ; CHECK-NEXT: cmp w0, w[[OLD]], uxth
 ; CHECK-NEXT: csel [[NEW:w[0-9]+]], w[[OLD]], w0, hi
+; CHECK-REG: csel w[[NEW:[0-9]+]], w{{[0-9]+}}, w0, hi
+; CHECK-REG-NOT: stxrh w[[NEW]], w[[NEW]], [x{{[0-9]+}}]
 ; CHECK-NEXT: stxrh [[STATUS:w[0-9]+]], [[NEW]], [x[[ADDR]]]
 ; CHECK-NEXT: cbnz [[STATUS]], .LBB{{[0-9]+}}_1
 ; CHECK-NOT: dmb
@@ -705,6 +770,8 @@ define i32 @test_atomic_load_umin_i32(i32 %offset) nounwind {
   ;  function there.
 ; CHECK-NEXT: cmp w0, w[[OLD]]
 ; CHECK-NEXT: csel [[NEW:w[0-9]+]], w[[OLD]], w0, hi
+; CHECK-REG: csel w[[NEW:[0-9]+]], w{{[0-9]+}}, w0, hi
+; CHECK-REG-NOT: stlxr w[[NEW]], w[[NEW]], [x{{[0-9]+}}]
 ; CHECK-NEXT: stlxr [[STATUS:w[0-9]+]], [[NEW]], [x[[ADDR]]]
 ; CHECK-NEXT: cbnz [[STATUS]], .LBB{{[0-9]+}}_1
 ; CHECK-NOT: dmb
@@ -726,6 +793,8 @@ define i64 @test_atomic_load_umin_i64(i64 %offset) nounwind {
   ; function there.
 ; CHECK-NEXT: cmp x0, x[[OLD]]
 ; CHECK-NEXT: csel [[NEW:x[0-9]+]], x[[OLD]], x0, hi
+; CHECK-REG: csel x[[NEW:[0-9]+]], x{{[0-9]+}}, x0, hi
+; CHECK-REG-NOT: stlxr w[[NEW]], x[[NEW]], [x{{[0-9]+}}]
 ; CHECK-NEXT: stlxr [[STATUS:w[0-9]+]], [[NEW]], [x[[ADDR]]]
 ; CHECK-NEXT: cbnz [[STATUS]], .LBB{{[0-9]+}}_1
 ; CHECK-NOT: dmb
@@ -747,6 +816,8 @@ define i8 @test_atomic_load_umax_i8(i8 %offset) nounwind {
   ;  function there.
 ; CHECK-NEXT: cmp w0, w[[OLD]], uxtb
 ; CHECK-NEXT: csel [[NEW:w[0-9]+]], w[[OLD]], w0, lo
+; CHECK-REG: csel w[[NEW:[0-9]+]], w{{[0-9]+}}, w0, lo
+; CHECK-REG-NOT: stlxrb w[[NEW]], w[[NEW]], [x{{[0-9]+}}]
 ; CHECK-NEXT: stlxrb [[STATUS:w[0-9]+]], [[NEW]], [x[[ADDR]]]
 ; CHECK-NEXT: cbnz [[STATUS]], .LBB{{[0-9]+}}_1
 ; CHECK-NOT: dmb
@@ -768,6 +839,8 @@ define i16 @test_atomic_load_umax_i16(i16 %offset) nounwind {
   ;  function there.
 ; CHECK-NEXT: cmp w0, w[[OLD]], uxth
 ; CHECK-NEXT: csel [[NEW:w[0-9]+]], w[[OLD]], w0, lo
+; CHECK-REG: csel w[[NEW:[0-9]+]], w{{[0-9]+}}, w0, lo
+; CHECK-REG-NOT: stxrh w[[NEW]], w[[NEW]], [x{{[0-9]+}}]
 ; CHECK-NEXT: stxrh [[STATUS:w[0-9]+]], [[NEW]], [x[[ADDR]]]
 ; CHECK-NEXT: cbnz [[STATUS]], .LBB{{[0-9]+}}_1
 ; CHECK-NOT: dmb
@@ -789,6 +862,8 @@ define i32 @test_atomic_load_umax_i32(i32 %offset) nounwind {
   ;  function there.
 ; CHECK-NEXT: cmp w0, w[[OLD]]
 ; CHECK-NEXT: csel [[NEW:w[0-9]+]], w[[OLD]], w0, lo
+; CHECK-REG: csel w[[NEW:[0-9]+]], w{{[0-9]+}}, w0, lo
+; CHECK-REG-NOT: stlxr w[[NEW]], w[[NEW]], [x{{[0-9]+}}]
 ; CHECK-NEXT: stlxr [[STATUS:w[0-9]+]], [[NEW]], [x[[ADDR]]]
 ; CHECK-NEXT: cbnz [[STATUS]], .LBB{{[0-9]+}}_1
 ; CHECK-NOT: dmb
@@ -810,6 +885,8 @@ define i64 @test_atomic_load_umax_i64(i64 %offset) nounwind {
   ; function there.
 ; CHECK-NEXT: cmp x0, x[[OLD]]
 ; CHECK-NEXT: csel [[NEW:x[0-9]+]], x[[OLD]], x0, lo
+; CHECK-REG: csel x[[NEW:[0-9]+]], x{{[0-9]+}}, x0, lo
+; CHECK-REG-NOT: stlxr w[[NEW]], x[[NEW]], [x{{[0-9]+}}]
 ; CHECK-NEXT: stlxr [[STATUS:w[0-9]+]], [[NEW]], [x[[ADDR]]]
 ; CHECK-NEXT: cbnz [[STATUS]], .LBB{{[0-9]+}}_1
 ; CHECK-NOT: dmb
@@ -832,6 +909,7 @@ define i8 @test_atomic_cmpxchg_i8(i8 %wanted, i8 %new) nounwind {
 ; CHECK-NEXT: cmp w[[OLD]], w0
 ; CHECK-NEXT: b.ne [[GET_OUT:.LBB[0-9]+_[0-9]+]]
   ; As above, w1 is a reasonable guess.
+; CHECK-REG-NOT: stxrb w1, w1, [x{{[0-9]+}}]
 ; CHECK: stxrb [[STATUS:w[0-9]+]], w1, [x[[ADDR]]]
 ; CHECK-NEXT: cbnz [[STATUS]], [[STARTAGAIN]]
 ; CHECK-NOT: dmb
@@ -854,6 +932,7 @@ define i16 @test_atomic_cmpxchg_i16(i16 %wanted, i16 %new) nounwind {
 ; CHECK-NEXT: cmp w[[OLD]], w0
 ; CHECK-NEXT: b.ne [[GET_OUT:.LBB[0-9]+_[0-9]+]]
   ; As above, w1 is a reasonable guess.
+; CHECK-REG-NOT: stlxrh w1, w1, [x{{[0-9]+}}]
 ; CHECK: stlxrh [[STATUS:w[0-9]+]], w1, [x[[ADDR]]]
 ; CHECK-NEXT: cbnz [[STATUS]], [[STARTAGAIN]]
 ; CHECK-NOT: dmb
@@ -876,6 +955,7 @@ define i32 @test_atomic_cmpxchg_i32(i32 %wanted, i32 %new) nounwind {
 ; CHECK-NEXT: cmp w[[OLD]], w0
 ; CHECK-NEXT: b.ne [[GET_OUT:.LBB[0-9]+_[0-9]+]]
   ; As above, w1 is a reasonable guess.
+; CHECK-REG-NOT: stlxr w1, w1, [x{{[0-9]+}}]
 ; CHECK: stlxr [[STATUS:w[0-9]+]], w1, [x[[ADDR]]]
 ; CHECK-NEXT: cbnz [[STATUS]], [[STARTAGAIN]]
 ; CHECK-NOT: dmb
@@ -898,6 +978,7 @@ define i64 @test_atomic_cmpxchg_i64(i64 %wanted, i64 %new) nounwind {
 ; CHECK-NEXT: cmp x[[OLD]], x0
 ; CHECK-NEXT: b.ne [[GET_OUT:.LBB[0-9]+_[0-9]+]]
   ; As above, w1 is a reasonable guess.
+; CHECK-REG-NOT: stxr w1, x1, [x{{[0-9]+}}]
 ; CHECK: stxr [[STATUS:w[0-9]+]], x1, [x[[ADDR]]]
 ; CHECK-NEXT: cbnz [[STATUS]], [[STARTAGAIN]]
 ; CHECK-NOT: dmb
diff --git a/test/CodeGen/AArch64/init-array.ll b/test/CodeGen/AArch64/init-array.ll
index 3ff1c1a86ec6..076ae27721df 100644
--- a/test/CodeGen/AArch64/init-array.ll
+++ b/test/CodeGen/AArch64/init-array.ll
@@ -1,4 +1,5 @@
 ; RUN: llc -mtriple=aarch64-none-linux-gnu -verify-machineinstrs -use-init-array < %s | FileCheck %s
+; RUN: llc -mtriple=aarch64-none-none-eabi -verify-machineinstrs -use-init-array < %s | FileCheck %s
 
 define internal void @_GLOBAL__I_a() section ".text.startup" {
   ret void
diff --git a/test/CodeGen/AArch64/variadic.ll b/test/CodeGen/AArch64/variadic.ll
index f3d376beeb28..4c219eb83788 100644
--- a/test/CodeGen/AArch64/variadic.ll
+++ b/test/CodeGen/AArch64/variadic.ll
@@ -179,24 +179,19 @@ define void @test_va_copy() {
 
 ; Check beginning and end again:
 
-; CHECK: ldr [[BLOCK:x[0-9]+]], [{{x[0-9]+}}, #:lo12:var]
 ; CHECK: add x[[SRC_LIST:[0-9]+]], {{x[0-9]+}}, #:lo12:var
-; CHECK-NOFP: ldr [[BLOCK:x[0-9]+]], [{{x[0-9]+}}, #:lo12:var]
-; CHECK-NOFP: add x[[SRC_LIST:[0-9]+]], {{x[0-9]+}}, #:lo12:var
-
-; CHECK: str [[BLOCK]], [{{x[0-9]+}}, #:lo12:second_list]
-
-; CHECK: ldr [[BLOCK:x[0-9]+]], [x[[SRC_LIST]], #24]
 ; CHECK: add x[[DEST_LIST:[0-9]+]], {{x[0-9]+}}, #:lo12:second_list
+; CHECK: ldr [[BLOCK1:x[0-9]+]], [{{x[0-9]+}}, #:lo12:var]
+; CHECK: ldr [[BLOCK2:x[0-9]+]], [x[[SRC_LIST]], #24]
+; CHECK: str [[BLOCK1]], [{{x[0-9]+}}, #:lo12:second_list]
+; CHECK: str [[BLOCK2]], [x[[DEST_LIST]], #24]
 
-; CHECK: str [[BLOCK]], [x[[DEST_LIST]], #24]
-
-; CHECK-NOFP: str [[BLOCK]], [{{x[0-9]+}}, #:lo12:second_list]
-
-; CHECK-NOFP: ldr [[BLOCK:x[0-9]+]], [x[[SRC_LIST]], #24]
+; CHECK-NOFP: add x[[SRC_LIST:[0-9]+]], {{x[0-9]+}}, #:lo12:var
 ; CHECK-NOFP: add x[[DEST_LIST:[0-9]+]], {{x[0-9]+}}, #:lo12:second_list
-
-; CHECK-NOFP: str [[BLOCK]], [x[[DEST_LIST]], #24]
+; CHECK-NOFP: ldr [[BLOCK1:x[0-9]+]], [{{x[0-9]+}}, #:lo12:var]
+; CHECK-NOFP: ldr [[BLOCK2:x[0-9]+]], [x[[SRC_LIST]], #24]
+; CHECK-NOFP: str [[BLOCK1]], [{{x[0-9]+}}, #:lo12:second_list]
+; CHECK-NOFP: str [[BLOCK2]], [x[[DEST_LIST]], #24]
 
   ret void
 ; CHECK: ret
diff --git a/test/CodeGen/ARM/a15-SD-dep.ll b/test/CodeGen/ARM/a15-SD-dep.ll
index 019ff6129b00..5e5ca4b873f3 100644
--- a/test/CodeGen/ARM/a15-SD-dep.ll
+++ b/test/CodeGen/ARM/a15-SD-dep.ll
@@ -56,3 +56,62 @@ define arm_aapcs_vfpcc <4 x float> @t5(<4 x float> %q, float %f) {
   %i2 = fadd <4 x float> %i1, %i1
   ret <4 x float> %i2
 }
+
+; Test that DPair can be successfully passed as QPR.
+; CHECK-ENABLED-LABEL: test_DPair1:
+; CHECK-DISABLED-LABEL: test_DPair1:
+define void @test_DPair1(i32 %vsout, i8* nocapture %out, float %x, float %y) {
+entry:
+  %0 = insertelement <4 x float> undef, float %x, i32 1
+  %1 = insertelement <4 x float> %0, float %y, i32 0
+  ; CHECK-ENABLED: vdup.32 d{{[0-9]*}}, d{{[0-9]*}}[0]
+  ; CHECK-ENABLED: vdup.32 d{{[0-9]*}}, d{{[0-9]*}}[1]
+  ; CHECK-ENABLED: vdup.32 d{{[0-9]*}}, d{{[0-9]*}}[0]
+  ; CHECK-ENABLED: vdup.32 d{{[0-9]*}}, d{{[0-9]*}}[1]
+  ; CHECK-DISABLED-NOT: vdup
+  switch i32 %vsout, label %sw.epilog [
+    i32 1, label %sw.bb
+    i32 0, label %sw.bb6
+  ]
+
+sw.bb:                                            ; preds = %entry
+  %2 = insertelement <4 x float> %1, float 0.000000e+00, i32 0
+  br label %sw.bb6
+
+sw.bb6:                                           ; preds = %sw.bb, %entry
+  %sum.0 = phi <4 x float> [ %1, %entry ], [ %2, %sw.bb ]
+  %3 = extractelement <4 x float> %sum.0, i32 0
+  %conv = fptoui float %3 to i8
+  store i8 %conv, i8* %out, align 1
+  ret void
+
+sw.epilog:                                        ; preds = %entry
+  ret void
+}
+
+; CHECK-ENABLED-LABEL: test_DPair2:
+; CHECK-DISABLED-LABEL: test_DPair2:
+define void @test_DPair2(i32 %vsout, i8* nocapture %out, float %x) {
+entry:
+  %0 = insertelement <4 x float> undef, float %x, i32 0
+  ; CHECK-ENABLED: vdup.32 q{{[0-9]*}}, d{{[0-9]*}}[0]
+  ; CHECK-DISABLED-NOT: vdup
+  switch i32 %vsout, label %sw.epilog [
+    i32 1, label %sw.bb
+    i32 0, label %sw.bb1
+  ]
+
+sw.bb:                                            ; preds = %entry
+  %1 = insertelement <4 x float> %0, float 0.000000e+00, i32 0
+  br label %sw.bb1
+
+sw.bb1:                                           ; preds = %entry, %sw.bb
+  %sum.0 = phi <4 x float> [ %0, %entry ], [ %1, %sw.bb ]
+  %2 = extractelement <4 x float> %sum.0, i32 0
+  %conv = fptoui float %2 to i8
+  store i8 %conv, i8* %out, align 1
+  br label %sw.epilog
+
+sw.epilog:                                        ; preds = %entry, %sw.bb1
+  ret void
+}
+\ No newline at end of file
diff --git a/test/CodeGen/ARM/vld3.ll b/test/CodeGen/ARM/vld3.ll
index 400541fb90a2..d6eb4c2f6dd3 100644
--- a/test/CodeGen/ARM/vld3.ll
+++ b/test/CodeGen/ARM/vld3.ll
@@ -83,6 +83,19 @@ define <1 x i64> @vld3i64(i64* %A) nounwind {
 	ret <1 x i64> %tmp4
 }
 
+define <1 x i64> @vld3i64_update(i64** %ptr, i64* %A) nounwind {
+;CHECK-LABEL: vld3i64_update:
+;CHECK: vld1.64	{d16, d17, d18}, [r1:64]!
+        %tmp0 = bitcast i64* %A to i8*
+        %tmp1 = call %struct.__neon_int64x1x3_t @llvm.arm.neon.vld3.v1i64(i8* %tmp0, i32 16)
+        %tmp5 = getelementptr i64* %A, i32 3
+        store i64* %tmp5, i64** %ptr
+        %tmp2 = extractvalue %struct.__neon_int64x1x3_t %tmp1, 0
+        %tmp3 = extractvalue %struct.__neon_int64x1x3_t %tmp1, 2
+        %tmp4 = add <1 x i64> %tmp2, %tmp3
+        ret <1 x i64> %tmp4
+}
+
 define <16 x i8> @vld3Qi8(i8* %A) nounwind {
 ;CHECK-LABEL: vld3Qi8:
 ;Check the alignment value.  Max for this instruction is 64 bits:
diff --git a/test/CodeGen/ARM/vld4.ll b/test/CodeGen/ARM/vld4.ll
index f7376b503a30..ff162bb022e1 100644
--- a/test/CodeGen/ARM/vld4.ll
+++ b/test/CodeGen/ARM/vld4.ll
@@ -83,6 +83,19 @@ define <1 x i64> @vld4i64(i64* %A) nounwind {
 	ret <1 x i64> %tmp4
 }
 
+define <1 x i64> @vld4i64_update(i64** %ptr, i64* %A) nounwind {
+;CHECK-LABEL: vld4i64_update:
+;CHECK: vld1.64 {d16, d17, d18, d19}, [r1:256]!
+        %tmp0 = bitcast i64* %A to i8*
+        %tmp1 = call %struct.__neon_int64x1x4_t @llvm.arm.neon.vld4.v1i64(i8* %tmp0, i32 64)
+        %tmp5 = getelementptr i64* %A, i32 4
+        store i64* %tmp5, i64** %ptr
+        %tmp2 = extractvalue %struct.__neon_int64x1x4_t %tmp1, 0
+        %tmp3 = extractvalue %struct.__neon_int64x1x4_t %tmp1, 2
+        %tmp4 = add <1 x i64> %tmp2, %tmp3
+        ret <1 x i64> %tmp4
+}
+
 define <16 x i8> @vld4Qi8(i8* %A) nounwind {
 ;CHECK-LABEL: vld4Qi8:
 ;Check the alignment value.  Max for this instruction is 256 bits:
diff --git a/test/CodeGen/ARM/vst3.ll b/test/CodeGen/ARM/vst3.ll
index 91eb7fce2b74..65625de34573 100644
--- a/test/CodeGen/ARM/vst3.ll
+++ b/test/CodeGen/ARM/vst3.ll
@@ -61,6 +61,18 @@ define void @vst3i64(i64* %A, <1 x i64>* %B) nounwind {
 	ret void
 }
 
+define void @vst3i64_update(i64** %ptr, <1 x i64>* %B) nounwind {
+;CHECK-LABEL: vst3i64_update
+;CHECK: vst1.64	{d{{.*}}, d{{.*}}, d{{.*}}}, [r{{.*}}]!
+        %A = load i64** %ptr
+        %tmp0 = bitcast i64* %A to i8*
+        %tmp1 = load <1 x i64>* %B
+        call void @llvm.arm.neon.vst3.v1i64(i8* %tmp0, <1 x i64> %tmp1, <1 x i64> %tmp1, <1 x i64> %tmp1, i32 1)
+        %tmp2 = getelementptr i64* %A, i32 3
+        store i64* %tmp2, i64** %ptr
+        ret void
+}
+
 define void @vst3Qi8(i8* %A, <16 x i8>* %B) nounwind {
 ;CHECK-LABEL: vst3Qi8:
 ;Check the alignment value.  Max for this instruction is 64 bits:
diff --git a/test/CodeGen/ARM/vst4.ll b/test/CodeGen/ARM/vst4.ll
index ef5c83a57dbb..83a6c7048650 100644
--- a/test/CodeGen/ARM/vst4.ll
+++ b/test/CodeGen/ARM/vst4.ll
@@ -60,6 +60,18 @@ define void @vst4i64(i64* %A, <1 x i64>* %B) nounwind {
 	ret void
 }
 
+define void @vst4i64_update(i64** %ptr, <1 x i64>* %B) nounwind {
+;CHECK-LABEL: vst4i64_update:
+;CHECK: vst1.64	{d16, d17, d18, d19}, [r1]!
+        %A = load i64** %ptr
+        %tmp0 = bitcast i64* %A to i8*
+        %tmp1 = load <1 x i64>* %B
+        call void @llvm.arm.neon.vst4.v1i64(i8* %tmp0, <1 x i64> %tmp1, <1 x i64> %tmp1, <1 x i64> %tmp1, <1 x i64> %tmp1, i32 1)
+        %tmp2 = getelementptr i64* %A, i32 4
+        store i64* %tmp2, i64** %ptr
+        ret void
+}
+
 define void @vst4Qi8(i8* %A, <16 x i8>* %B) nounwind {
 ;CHECK-LABEL: vst4Qi8:
 ;Check the alignment value.  Max for this instruction is 256 bits:
diff --git a/test/CodeGen/PowerPC/anon_aggr.ll b/test/CodeGen/PowerPC/anon_aggr.ll
index 1525e05501ee..ce07d8845ddb 100644
--- a/test/CodeGen/PowerPC/anon_aggr.ll
+++ b/test/CodeGen/PowerPC/anon_aggr.ll
@@ -119,9 +119,9 @@ unequal:
 ; CHECK: ld 3, -[[OFFSET1]](1)
 
 ; DARWIN32: _func3:
-; DARWIN32: addi r[[REG1:[0-9]+]], r[[REGSP:[0-9]+]], 40
+; DARWIN32: addi r[[REG1:[0-9]+]], r[[REGSP:[0-9]+]], 36
 ; DARWIN32: addi r[[REG2:[0-9]+]], r[[REGSP]], 24
-; DARWIN32: lwz r[[REG3:[0-9]+]], 48(r[[REGSP]])
+; DARWIN32: lwz r[[REG3:[0-9]+]], 44(r[[REGSP]])
 ; DARWIN32: lwz r[[REG4:[0-9]+]], 32(r[[REGSP]])
 ; DARWIN32: cmplw cr{{[0-9]+}}, r[[REG4]], r[[REG3]]
 ; DARWIN32: stw r[[REG3]], -[[OFFSET1:[0-9]+]]
diff --git a/test/CodeGen/PowerPC/byval-agg-info.ll b/test/CodeGen/PowerPC/byval-agg-info.ll
new file mode 100644
index 000000000000..89ad8e4dcf98
--- /dev/null
+++ b/test/CodeGen/PowerPC/byval-agg-info.ll
@@ -0,0 +1,17 @@
+; RUN: llc < %s -print-after=prologepilog >%t 2>&1 && FileCheck <%t %s
+target datalayout = "E-m:e-i64:64-n32:64"
+target triple = "powerpc64-unknown-linux-gnu"
+
+%struct.anon = type { i32, i32 }
+
+declare void @foo(%struct.anon* %v)
+define void @test(i32 %a, i32 %b, %struct.anon* byval nocapture %v) {
+entry:
+  call void @foo(%struct.anon* %v)
+  ret void
+}
+
+; Make sure that the MMO on the store has no offset from the byval
+; variable itself (we used to have mem:ST8[%v+64]).
+; CHECK: STD %X5<kill>, 176, %X1; mem:ST8[%v](align=16)
+
diff --git a/test/CodeGen/PowerPC/cc.ll b/test/CodeGen/PowerPC/cc.ll
new file mode 100644
index 000000000000..ab724f5a7e2d
--- /dev/null
+++ b/test/CodeGen/PowerPC/cc.ll
@@ -0,0 +1,70 @@
+; RUN: llc -mcpu=pwr7 < %s | FileCheck %s
+target datalayout = "E-m:e-i64:64-n32:64"
+target triple = "powerpc64-unknown-linux-gnu"
+
+define i64 @test1(i64 %a, i64 %b) {
+entry:
+  %c = icmp eq i64 %a, %b
+  br label %foo
+
+foo:
+  call { i64, i64 } asm sideeffect "sc", "={r0},={r3},{r0},~{cr0},~{cr1},~{cr2},~{cr3},~{cr4},~{cr5},~{cr6},~{cr7}" (i64 %a)
+  br i1 %c, label %bar, label %end
+
+bar:
+  ret i64 %b
+
+end:
+  ret i64 %a
+
+; CHECK-LABEL: @test1
+; CHECK: mfcr [[REG1:[0-9]+]]
+; CHECK-DAG: cmpld
+; CHECK-DAG: mfocrf [[REG2:[0-9]+]],
+; CHECK-DAG: stw [[REG1]], 8(1)
+; CHECK-DAG: stw [[REG2]], -4(1)
+
+; CHECK: sc
+; CHECK: lwz [[REG3:[0-9]+]], -4(1)
+; CHECK: mtocrf 128, [[REG3]]
+
+; CHECK: lwz [[REG4:[0-9]+]], 8(1)
+; CHECK-DAG: mtocrf 32, [[REG4]]
+; CHECK-DAG: mtocrf 16, [[REG4]]
+; CHECK-DAG: mtocrf 8, [[REG4]]
+; CHECK: blr
+}
+
+define i64 @test2(i64 %a, i64 %b) {
+entry:
+  %c = icmp eq i64 %a, %b
+  br label %foo
+
+foo:
+  call { i64, i64 } asm sideeffect "sc", "={r0},={r3},{r0},~{cc}" (i64 %a)
+  br i1 %c, label %bar, label %end
+
+bar:
+  ret i64 %b
+
+end:
+  ret i64 %a
+
+; CHECK-LABEL: @test2
+; CHECK: mfcr [[REG1:[0-9]+]]
+; CHECK-DAG: cmpld
+; CHECK-DAG: mfocrf [[REG2:[0-9]+]],
+; CHECK-DAG: stw [[REG1]], 8(1)
+; CHECK-DAG: stw [[REG2]], -4(1)
+
+; CHECK: sc
+; CHECK: lwz [[REG3:[0-9]+]], -4(1)
+; CHECK: mtocrf 128, [[REG3]]
+
+; CHECK: lwz [[REG4:[0-9]+]], 8(1)
+; CHECK-DAG: mtocrf 32, [[REG4]]
+; CHECK-DAG: mtocrf 16, [[REG4]]
+; CHECK-DAG: mtocrf 8, [[REG4]]
+; CHECK: blr
+}
+
diff --git a/test/CodeGen/PowerPC/ctrloop-udivti3.ll b/test/CodeGen/PowerPC/ctrloop-udivti3.ll
new file mode 100644
index 000000000000..d07a11fe60fb
--- /dev/null
+++ b/test/CodeGen/PowerPC/ctrloop-udivti3.ll
@@ -0,0 +1,31 @@
+; RUN: llc < %s -march=ppc64 | FileCheck %s
+target datalayout = "E-m:e-i64:64-n32:64"
+target triple = "powerpc64-unknown-linux-gnu"
+
+; Function Attrs: nounwind
+define hidden void @_mpd_shortdiv(i64 %n) #0 {
+entry:
+  br i1 undef, label %for.end, label %for.body.lr.ph
+
+for.body.lr.ph:                                   ; preds = %entry
+  br label %for.body
+
+for.body:                                         ; preds = %for.body, %for.body.lr.ph
+  %i.018.in = phi i64 [ %n, %for.body.lr.ph ], [ %i.018, %for.body ]
+  %i.018 = add i64 %i.018.in, -1
+  %add.i = or i128 undef, undef
+  %div.i = udiv i128 %add.i, 0
+  %conv3.i11 = trunc i128 %div.i to i64
+  store i64 %conv3.i11, i64* undef, align 8
+  %cmp = icmp eq i64 %i.018, 0
+  br i1 %cmp, label %for.end, label %for.body
+
+for.end:                                          ; preds = %for.body, %entry
+  ret void
+}
+
+; CHECK-LABEL: @_mpd_shortdiv
+; CHECK-NOT: mtctr
+
+attributes #0 = { nounwind }
+
diff --git a/test/CodeGen/PowerPC/fast-isel-conversion-p5.ll b/test/CodeGen/PowerPC/fast-isel-conversion-p5.ll
new file mode 100644
index 000000000000..db0d8ed0ffa4
--- /dev/null
+++ b/test/CodeGen/PowerPC/fast-isel-conversion-p5.ll
@@ -0,0 +1,153 @@
+; RUN: llc < %s -O0 -verify-machineinstrs -fast-isel-abort -mtriple=powerpc64-unknown-linux-gnu -mcpu=pwr5 | FileCheck %s --check-prefix=ELF64
+
+; Test sitofp
+
+define void @sitofp_double_i32(i32 %a, double %b) nounwind ssp {
+entry:
+; ELF64: sitofp_double_i32
+  %b.addr = alloca double, align 8
+  %conv = sitofp i32 %a to double
+; ELF64: std {{[0-9]+}}, -[[OFFSET:[0-9]+]](1)
+; ELF64: lfd {{[0-9]+}}, -[[OFFSET]](1)
+; ELF64: fcfid
+  store double %conv, double* %b.addr, align 8
+  ret void
+}
+
+define void @sitofp_double_i64(i64 %a, double %b) nounwind ssp {
+entry:
+; ELF64: sitofp_double_i64
+  %b.addr = alloca double, align 8
+  %conv = sitofp i64 %a to double
+; ELF64: std {{[0-9]+}}, -[[OFFSET:[0-9]+]](1)
+; ELF64: lfd {{[0-9]+}}, -[[OFFSET]](1)
+; ELF64: fcfid
+  store double %conv, double* %b.addr, align 8
+  ret void
+}
+
+define void @sitofp_double_i16(i16 %a, double %b) nounwind ssp {
+entry:
+; ELF64: sitofp_double_i16
+  %b.addr = alloca double, align 8
+  %conv = sitofp i16 %a to double
+; ELF64: extsh
+; ELF64: std {{[0-9]+}}, -[[OFFSET:[0-9]+]](1)
+; ELF64: lfd {{[0-9]+}}, -[[OFFSET]](1)
+; ELF64: fcfid
+  store double %conv, double* %b.addr, align 8
+  ret void
+}
+
+define void @sitofp_double_i8(i8 %a, double %b) nounwind ssp {
+entry:
+; ELF64: sitofp_double_i8
+  %b.addr = alloca double, align 8
+  %conv = sitofp i8 %a to double
+; ELF64: extsb
+; ELF64: std {{[0-9]+}}, -[[OFFSET:[0-9]+]](1)
+; ELF64: lfd {{[0-9]+}}, -[[OFFSET]](1)
+; ELF64: fcfid
+  store double %conv, double* %b.addr, align 8
+  ret void
+}
+
+; Test fptosi
+
+define void @fptosi_float_i32(float %a) nounwind ssp {
+entry:
+; ELF64: fptosi_float_i32
+  %b.addr = alloca i32, align 4
+  %conv = fptosi float %a to i32
+; ELF64: fctiwz
+; ELF64: stfd
+; ELF64: lwa
+  store i32 %conv, i32* %b.addr, align 4
+  ret void
+}
+
+define void @fptosi_float_i64(float %a) nounwind ssp {
+entry:
+; ELF64: fptosi_float_i64
+  %b.addr = alloca i64, align 4
+  %conv = fptosi float %a to i64
+; ELF64: fctidz
+; ELF64: stfd
+; ELF64: ld
+  store i64 %conv, i64* %b.addr, align 4
+  ret void
+}
+
+define void @fptosi_double_i32(double %a) nounwind ssp {
+entry:
+; ELF64: fptosi_double_i32
+  %b.addr = alloca i32, align 8
+  %conv = fptosi double %a to i32
+; ELF64: fctiwz
+; ELF64: stfd
+; ELF64: lwa
+  store i32 %conv, i32* %b.addr, align 8
+  ret void
+}
+
+define void @fptosi_double_i64(double %a) nounwind ssp {
+entry:
+; ELF64: fptosi_double_i64
+  %b.addr = alloca i64, align 8
+  %conv = fptosi double %a to i64
+; ELF64: fctidz
+; ELF64: stfd
+; ELF64: ld
+  store i64 %conv, i64* %b.addr, align 8
+  ret void
+}
+
+; Test fptoui
+
+define void @fptoui_float_i32(float %a) nounwind ssp {
+entry:
+; ELF64: fptoui_float_i32
+  %b.addr = alloca i32, align 4
+  %conv = fptoui float %a to i32
+; ELF64: fctidz
+; ELF64: stfd
+; ELF64: lwz
+  store i32 %conv, i32* %b.addr, align 4
+  ret void
+}
+
+define void @fptoui_float_i64(float %a) nounwind ssp {
+entry:
+; ELF64: fptoui_float_i64
+  %b.addr = alloca i64, align 4
+  %conv = fptoui float %a to i64
+; ELF64: fctiduz
+; ELF64: stfd
+; ELF64: ld
+  store i64 %conv, i64* %b.addr, align 4
+  ret void
+}
+
+define void @fptoui_double_i32(double %a) nounwind ssp {
+entry:
+; ELF64: fptoui_double_i32
+  %b.addr = alloca i32, align 8
+  %conv = fptoui double %a to i32
+; ELF64: fctidz
+; ELF64: stfd
+; ELF64: lwz
+  store i32 %conv, i32* %b.addr, align 8
+  ret void
+}
+
+define void @fptoui_double_i64(double %a) nounwind ssp {
+entry:
+; ELF64: fptoui_double_i64
+  %b.addr = alloca i64, align 8
+  %conv = fptoui double %a to i64
+; ELF64: fctiduz
+; ELF64: stfd
+; ELF64: ld
+  store i64 %conv, i64* %b.addr, align 8
+  ret void
+}
diff --git a/test/CodeGen/PowerPC/spill-nor0.ll b/test/CodeGen/PowerPC/spill-nor0.ll
new file mode 100644
index 000000000000..65bdc0914350
--- /dev/null
+++ b/test/CodeGen/PowerPC/spill-nor0.ll
@@ -0,0 +1,23 @@
+; RUN: llc < %s -O0 -mcpu=ppc64 | FileCheck %s
+target datalayout = "E-m:e-i64:64-n32:64"
+target triple = "powerpc64-unknown-linux-gnu"
+
+; Function Attrs: nounwind
+define void @_ZN4llvm3sys17RunningOnValgrindEv() #0 {
+entry:
+  br i1 undef, label %if.then, label %if.end
+
+if.then:                                          ; preds = %entry
+  ret void
+
+if.end:                                           ; preds = %entry
+  %0 = call i64 asm sideeffect "mr 3,$1\0A\09mr 4,$2\0A\09rotldi 0,0,3  ; rotldi 0,0,13\0A\09rotldi 0,0,61 ; rotldi 0,0,51\0A\09or 1,1,1\0A\09mr $0,3", "=b,b,b,~{cc},~{memory},~{r3},~{r4}"(i32 0, i64* undef) #0
+  unreachable
+
+; CHECK-LABEL: @_ZN4llvm3sys17RunningOnValgrindEv
+; CHECK: stw
+; CHECK: lwz
+}
+
+attributes #0 = { nounwind }
+
diff --git a/test/CodeGen/PowerPC/weak_def_can_be_hidden.ll b/test/CodeGen/PowerPC/weak_def_can_be_hidden.ll
new file mode 100644
index 000000000000..130d8faaf8bc
--- /dev/null
+++ b/test/CodeGen/PowerPC/weak_def_can_be_hidden.ll
@@ -0,0 +1,38 @@
+; taken from X86 version of the same test
+; RUN: llc -mtriple=powerpc-apple-darwin10 -O0 < %s | FileCheck %s
+; RUN: llc -mtriple=powerpc-apple-darwin9 -O0 < %s | FileCheck --check-prefix=CHECK-D89 %s
+; RUN: llc -mtriple=powerpc-apple-darwin8 -O0 < %s | FileCheck --check-prefix=CHECK-D89 %s
+
+@v1 = linkonce_odr global i32 32
+; CHECK: .globl  _v1
+; CHECK: .weak_def_can_be_hidden _v1
+
+; CHECK-D89: .globl  _v1
+; CHECK-D89: .weak_definition _v1
+
+define i32 @f1() {
+  %x = load i32 * @v1
+  ret i32 %x
+}
+
+@v2 = linkonce_odr global i32 32
+; CHECK: .globl  _v2
+; CHECK: .weak_definition _v2
+
+; CHECK-D89: .globl  _v2
+; CHECK-D89: .weak_definition _v2
+
+@v3 = linkonce_odr unnamed_addr global i32 32
+; CHECK: .globl  _v3
+; CHECK: .weak_def_can_be_hidden _v3
+
+; CHECK-D89: .globl  _v3
+; CHECK-D89: .weak_definition _v3
+
+define i32* @f2() {
+  ret i32* @v2
+}
+
+define i32* @f3() {
+  ret i32* @v3
+}
diff --git a/test/CodeGen/R600/bfe_uint.ll b/test/CodeGen/R600/bfe_uint.ll
index 92570c315299..fe466e6ad5fd 100644
--- a/test/CodeGen/R600/bfe_uint.ll
+++ b/test/CodeGen/R600/bfe_uint.ll
@@ -1,5 +1,7 @@
 ; RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s
 
+; XFAIL: *
+
 ; CHECK: @bfe_def
 ; CHECK: BFE_UINT
 define void @bfe_def(i32 addrspace(1)* %out, i32 %x) {
diff --git a/test/CodeGen/R600/fabs.ll b/test/CodeGen/R600/fabs.ll
index a5f5df96b5d9..2cd3a4f604f2 100644
--- a/test/CodeGen/R600/fabs.ll
+++ b/test/CodeGen/R600/fabs.ll
@@ -9,7 +9,7 @@
 ; R600-CHECK-NOT: AND
 ; R600-CHECK: |PV.{{[XYZW]}}|
 ; SI-CHECK-LABEL: @fabs_free
-; SI-CHECK: V_ADD_F32_e64 v{{[0-9]}}, s{{[0-9]}}, 0, 1, 0, 0, 0
+; SI-CHECK: V_AND_B32
 
 define void @fabs_free(float addrspace(1)* %out, i32 %in) {
 entry:
@@ -23,8 +23,8 @@ entry:
 ; R600-CHECK: |{{(PV|T[0-9])\.[XYZW]}}|
 ; R600-CHECK: |{{(PV|T[0-9])\.[XYZW]}}|
 ; SI-CHECK-LABEL: @fabs_v2
-; SI-CHECK: V_ADD_F32_e64 v{{[0-9]}}, s{{[0-9]}}, 0, 1, 0, 0, 0
-; SI-CHECK: V_ADD_F32_e64 v{{[0-9]}}, s{{[0-9]}}, 0, 1, 0, 0, 0
+; SI-CHECK: V_AND_B32
+; SI-CHECK: V_AND_B32
 define void @fabs_v2(<2 x float> addrspace(1)* %out, <2 x float> %in) {
 entry:
   %0 = call <2 x float> @llvm.fabs.v2f32(<2 x float> %in)
@@ -38,10 +38,10 @@ entry:
 ; R600-CHECK: |{{(PV|T[0-9])\.[XYZW]}}|
 ; R600-CHECK: |{{(PV|T[0-9])\.[XYZW]}}|
 ; SI-CHECK-LABEL: @fabs_v4
-; SI-CHECK: V_ADD_F32_e64 v{{[0-9]}}, s{{[0-9]}}, 0, 1, 0, 0, 0
-; SI-CHECK: V_ADD_F32_e64 v{{[0-9]}}, s{{[0-9]}}, 0, 1, 0, 0, 0
-; SI-CHECK: V_ADD_F32_e64 v{{[0-9]}}, s{{[0-9]}}, 0, 1, 0, 0, 0
-; SI-CHECK: V_ADD_F32_e64 v{{[0-9]}}, s{{[0-9]}}, 0, 1, 0, 0, 0
+; SI-CHECK: V_AND_B32
+; SI-CHECK: V_AND_B32
+; SI-CHECK: V_AND_B32
+; SI-CHECK: V_AND_B32
 define void @fabs_v4(<4 x float> addrspace(1)* %out, <4 x float> %in) {
 entry:
   %0 = call <4 x float> @llvm.fabs.v4f32(<4 x float> %in)
diff --git a/test/CodeGen/R600/fneg-fabs.ll b/test/CodeGen/R600/fneg-fabs.ll
new file mode 100644
index 000000000000..d95e1311bc10
--- /dev/null
+++ b/test/CodeGen/R600/fneg-fabs.ll
@@ -0,0 +1,55 @@
+; RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s --check-prefix=R600-CHECK
+; RUN: llc < %s -march=r600 -mcpu=SI -verify-machineinstrs | FileCheck %s --check-prefix=SI-CHECK
+
+; DAGCombiner will transform:
+; (fabs (f32 bitcast (i32 a))) => (f32 bitcast (and (i32 a), 0x7FFFFFFF))
+; unless isFabsFree returns true
+
+; R600-CHECK-LABEL: @fneg_fabs_free
+; R600-CHECK-NOT: AND
+; R600-CHECK: |PV.{{[XYZW]}}|
+; R600-CHECK: -PV
+; SI-CHECK-LABEL: @fneg_fabs_free
+; SI-CHECK: V_OR_B32
+
+define void @fneg_fabs_free(float addrspace(1)* %out, i32 %in) {
+entry:
+  %0 = bitcast i32 %in to float
+  %1 = call float @fabs(float %0)
+  %2 = fsub float -0.000000e+00, %1
+  store float %2, float addrspace(1)* %out
+  ret void
+}
+
+; R600-CHECK-LABEL: @fneg_fabs_v2
+; R600-CHECK: |{{(PV|T[0-9])\.[XYZW]}}|
+; R600-CHECK: -PV
+; R600-CHECK: |{{(PV|T[0-9])\.[XYZW]}}|
+; R600-CHECK: -PV
+; SI-CHECK-LABEL: @fneg_fabs_v2
+; SI-CHECK: V_OR_B32
+; SI-CHECK: V_OR_B32
+define void @fneg_fabs_v2(<2 x float> addrspace(1)* %out, <2 x float> %in) {
+entry:
+  %0 = call <2 x float> @llvm.fabs.v2f32(<2 x float> %in)
+  %1 = fsub <2 x float> <float -0.000000e+00, float -0.000000e+00>, %0
+  store <2 x float> %1, <2 x float> addrspace(1)* %out
+  ret void
+}
+
+; SI-CHECK-LABEL: @fneg_fabs_v4
+; SI-CHECK: V_OR_B32
+; SI-CHECK: V_OR_B32
+; SI-CHECK: V_OR_B32
+; SI-CHECK: V_OR_B32
+define void @fneg_fabs_v4(<4 x float> addrspace(1)* %out, <4 x float> %in) {
+entry:
+  %0 = call <4 x float> @llvm.fabs.v4f32(<4 x float> %in)
+  %1 = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %0
+  store <4 x float> %1, <4 x float> addrspace(1)* %out
+  ret void
+}
+
+declare float @fabs(float ) readnone
+declare <2 x float> @llvm.fabs.v2f32(<2 x float> ) readnone
+declare <4 x float> @llvm.fabs.v4f32(<4 x float> ) readnone
diff --git a/test/CodeGen/R600/fneg.ll b/test/CodeGen/R600/fneg.ll
index 9446aa8ea9c3..f4e6be62467a 100644
--- a/test/CodeGen/R600/fneg.ll
+++ b/test/CodeGen/R600/fneg.ll
@@ -4,7 +4,7 @@
 ; R600-CHECK-LABEL: @fneg
 ; R600-CHECK: -PV
 ; SI-CHECK-LABEL: @fneg
-; SI-CHECK: V_ADD_F32_e64 v{{[0-9]}}, s{{[0-9]}}, 0, 0, 0, 0, 1
+; SI-CHECK: V_XOR_B32
 define void @fneg(float addrspace(1)* %out, float %in) {
 entry:
   %0 = fsub float -0.000000e+00, %in
@@ -16,8 +16,8 @@ entry:
 ; R600-CHECK: -PV
 ; R600-CHECK: -PV
 ; SI-CHECK-LABEL: @fneg_v2
-; SI-CHECK: V_ADD_F32_e64 v{{[0-9]}}, s{{[0-9]}}, 0, 0, 0, 0, 1
-; SI-CHECK: V_ADD_F32_e64 v{{[0-9]}}, s{{[0-9]}}, 0, 0, 0, 0, 1
+; SI-CHECK: V_XOR_B32
+; SI-CHECK: V_XOR_B32
 define void @fneg_v2(<2 x float> addrspace(1)* nocapture %out, <2 x float> %in) {
 entry:
   %0 = fsub <2 x float> <float -0.000000e+00, float -0.000000e+00>, %in
@@ -31,10 +31,10 @@ entry:
 ; R600-CHECK: -PV
 ; R600-CHECK: -PV
 ; SI-CHECK-LABEL: @fneg_v4
-; SI-CHECK: V_ADD_F32_e64 v{{[0-9]}}, s{{[0-9]}}, 0, 0, 0, 0, 1
-; SI-CHECK: V_ADD_F32_e64 v{{[0-9]}}, s{{[0-9]}}, 0, 0, 0, 0, 1
-; SI-CHECK: V_ADD_F32_e64 v{{[0-9]}}, s{{[0-9]}}, 0, 0, 0, 0, 1
-; SI-CHECK: V_ADD_F32_e64 v{{[0-9]}}, s{{[0-9]}}, 0, 0, 0, 0, 1
+; SI-CHECK: V_XOR_B32
+; SI-CHECK: V_XOR_B32
+; SI-CHECK: V_XOR_B32
+; SI-CHECK: V_XOR_B32
 define void @fneg_v4(<4 x float> addrspace(1)* nocapture %out, <4 x float> %in) {
 entry:
   %0 = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %in
diff --git a/test/CodeGen/R600/lds-oqap-crash.ll b/test/CodeGen/R600/lds-oqap-crash.ll
new file mode 100644
index 000000000000..79591506e8cb
--- /dev/null
+++ b/test/CodeGen/R600/lds-oqap-crash.ll
@@ -0,0 +1,28 @@
+; RUN: llc < %s -march=r600 -mcpu=redwood -verify-machineinstrs | FileCheck %s
+
+; The test is for a bug in R600EmitClauseMarkers.cpp where this pass
+; was searching for a use of the OQAP register in order to determine
+; if an LDS instruction could fit in the current clause, but never finding
+; one.  This created an infinite loop and hung the compiler.
+;
+; The LDS instruction should not have been defining OQAP in the first place,
+; because the LDS instructions are pseudo instructions and the OQAP
+; reads and writes are bundled together in the same instruction.
+
+; CHECK: @lds_crash
+define void @lds_crash(i32 addrspace(1)* %out, i32 addrspace(3)* %in, i32 %a, i32 %b, i32 %c) {
+entry:
+  %0 = load i32 addrspace(3)* %in
+  ; This block needs to be > 115 ISA instructions to hit the bug,
+  ; so we'll use udiv instructions.
+  %div0 = udiv i32 %0, %b
+  %div1 = udiv i32 %div0, %a
+  %div2 = udiv i32 %div1, 11
+  %div3 = udiv i32 %div2, %a
+  %div4 = udiv i32 %div3, %b
+  %div5 = udiv i32 %div4, %c
+  %div6 = udiv i32 %div5, %div0
+  %div7 = udiv i32 %div6, %div1
+  store i32 %div7, i32 addrspace(1)* %out
+  ret void
+}
diff --git a/test/CodeGen/R600/llvm.AMDGPU.kill.ll b/test/CodeGen/R600/llvm.AMDGPU.kill.ll
new file mode 100644
index 000000000000..bec5cdf65f1b
--- /dev/null
+++ b/test/CodeGen/R600/llvm.AMDGPU.kill.ll
@@ -0,0 +1,18 @@
+; RUN: llc < %s -march=r600 -mcpu=verde -verify-machineinstrs | FileCheck --check-prefix=SI %s
+
+; SI-LABEL: @kill_gs
+; SI: V_CMPX_LE_F32
+
+define void @kill_gs() #0 {
+main_body:
+  %0 = icmp ule i32 0, 3
+  %1 = select i1 %0, float 1.000000e+00, float -1.000000e+00
+  call void @llvm.AMDGPU.kill(float %1)
+  ret void
+}
+
+declare void @llvm.AMDGPU.kill(float)
+
+attributes #0 = { "ShaderType"="2" }
+
+!0 = metadata !{metadata !"const", null, i32 1}
diff --git a/test/CodeGen/R600/llvm.SI.load.dword.ll b/test/CodeGen/R600/llvm.SI.load.dword.ll
new file mode 100644
index 000000000000..a6227755b72e
--- /dev/null
+++ b/test/CodeGen/R600/llvm.SI.load.dword.ll
@@ -0,0 +1,40 @@
+;RUN: llc < %s -march=r600 -mcpu=verde -verify-machineinstrs | FileCheck %s
+
+; Example of a simple geometry shader loading vertex attributes from the
+; ESGS ring buffer
+
+; CHECK-LABEL: @main
+; CHECK: BUFFER_LOAD_DWORD
+; CHECK: BUFFER_LOAD_DWORD
+; CHECK: BUFFER_LOAD_DWORD
+; CHECK: BUFFER_LOAD_DWORD
+
+define void @main([17 x <16 x i8>] addrspace(2)* byval, [32 x <16 x i8>] addrspace(2)* byval, [16 x <32 x i8>] addrspace(2)* byval, [2 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* inreg, [17 x <16 x i8>] addrspace(2)* inreg, i32, i32, i32, i32) #0 {
+main_body:
+  %10 = getelementptr [2 x <16 x i8>] addrspace(2)* %3, i64 0, i32 1
+  %11 = load <16 x i8> addrspace(2)* %10, !tbaa !0
+  %12 = shl i32 %6, 2
+  %13 = call i32 @llvm.SI.buffer.load.dword.i32.i32(<16 x i8> %11, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 0)
+  %14 = bitcast i32 %13 to float
+  %15 = call i32 @llvm.SI.buffer.load.dword.i32.i32(<16 x i8> %11, i32 %12, i32 0, i32 0, i32 1, i32 0, i32 1, i32 1, i32 0)
+  %16 = bitcast i32 %15 to float
+  %17 = call i32 @llvm.SI.buffer.load.dword.i32.i32(<16 x i8> %11, i32 %12, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 0)
+  %18 = bitcast i32 %17 to float
+  %19 = call i32 @llvm.SI.buffer.load.dword.i32.v2i32(<16 x i8> %11, <2 x i32> <i32 0, i32 0>, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 0)
+  %20 = bitcast i32 %19 to float
+  call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %14, float %16, float %18, float %20)
+  ret void
+}
+
+; Function Attrs: nounwind readonly
+declare i32 @llvm.SI.buffer.load.dword.i32.i32(<16 x i8>, i32, i32, i32, i32, i32, i32, i32, i32) #1
+
+; Function Attrs: nounwind readonly
+declare i32 @llvm.SI.buffer.load.dword.i32.v2i32(<16 x i8>, <2 x i32>, i32, i32, i32, i32, i32, i32, i32) #1
+
+declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)
+
+attributes #0 = { "ShaderType"="1" }
+attributes #1 = { nounwind readonly }
+
+!0 = metadata !{metadata !"const", null, i32 1}
diff --git a/test/CodeGen/R600/llvm.SI.sendmsg.ll b/test/CodeGen/R600/llvm.SI.sendmsg.ll
new file mode 100644
index 000000000000..cfcc7c4e40ee
--- /dev/null
+++ b/test/CodeGen/R600/llvm.SI.sendmsg.ll
@@ -0,0 +1,21 @@
+;RUN: llc < %s -march=r600 -mcpu=verde -verify-machineinstrs | FileCheck %s
+
+; CHECK-LABEL: @main
+; CHECK: S_SENDMSG 34
+; CHECK: S_SENDMSG 274
+; CHECK: S_SENDMSG 562
+; CHECK: S_SENDMSG 3
+
+define void @main() {
+main_body:
+  call void @llvm.SI.sendmsg(i32 34, i32 0);
+  call void @llvm.SI.sendmsg(i32 274, i32 0);
+  call void @llvm.SI.sendmsg(i32 562, i32 0);
+  call void @llvm.SI.sendmsg(i32 3, i32 0);
+  ret void
+}
+
+; Function Attrs: nounwind
+declare void @llvm.SI.sendmsg(i32, i32) #0
+
+attributes #0 = { nounwind }
diff --git a/test/CodeGen/R600/load.ll b/test/CodeGen/R600/load.ll
index e4492d7d6e7b..0153524d136c 100644
--- a/test/CodeGen/R600/load.ll
+++ b/test/CodeGen/R600/load.ll
@@ -445,6 +445,7 @@ define void @load_const_addrspace_f32(float addrspace(1)* %out, float addrspace(
 ; R600-CHECK: LDS_UBYTE_READ_RET
 ; SI-CHECK-LABEL: @load_i8_local
 ; SI-CHECK-NOT: S_WQM_B64
+; SI-CHECK: S_MOV_B32 m0
 ; SI-CHECK: DS_READ_U8
 define void @load_i8_local(i32 addrspace(1)* %out, i8 addrspace(3)* %in) {
   %1 = load i8 addrspace(3)* %in
@@ -458,6 +459,7 @@ define void @load_i8_local(i32 addrspace(1)* %out, i8 addrspace(3)* %in) {
 ; R600-CHECK: ASHR
 ; SI-CHECK-LABEL: @load_i8_sext_local
 ; SI-CHECK-NOT: S_WQM_B64
+; SI-CHECK: S_MOV_B32 m0
 ; SI-CHECK: DS_READ_I8
 define void @load_i8_sext_local(i32 addrspace(1)* %out, i8 addrspace(3)* %in) {
 entry:
@@ -472,6 +474,7 @@ entry:
 ; R600-CHECK: LDS_UBYTE_READ_RET
 ; SI-CHECK-LABEL: @load_v2i8_local
 ; SI-CHECK-NOT: S_WQM_B64
+; SI-CHECK: S_MOV_B32 m0
 ; SI-CHECK: DS_READ_U8
 ; SI-CHECK: DS_READ_U8
 define void @load_v2i8_local(<2 x i32> addrspace(1)* %out, <2 x i8> addrspace(3)* %in) {
@@ -489,6 +492,7 @@ entry:
 ; R600-CHECK-DAG: ASHR
 ; SI-CHECK-LABEL: @load_v2i8_sext_local
 ; SI-CHECK-NOT: S_WQM_B64
+; SI-CHECK: S_MOV_B32 m0
 ; SI-CHECK: DS_READ_I8
 ; SI-CHECK: DS_READ_I8
 define void @load_v2i8_sext_local(<2 x i32> addrspace(1)* %out, <2 x i8> addrspace(3)* %in) {
@@ -506,6 +510,7 @@ entry:
 ; R600-CHECK: LDS_UBYTE_READ_RET
 ; SI-CHECK-LABEL: @load_v4i8_local
 ; SI-CHECK-NOT: S_WQM_B64
+; SI-CHECK: S_MOV_B32 m0
 ; SI-CHECK: DS_READ_U8
 ; SI-CHECK: DS_READ_U8
 ; SI-CHECK: DS_READ_U8
@@ -529,6 +534,7 @@ entry:
 ; R600-CHECK-DAG: ASHR
 ; SI-CHECK-LABEL: @load_v4i8_sext_local
 ; SI-CHECK-NOT: S_WQM_B64
+; SI-CHECK: S_MOV_B32 m0
 ; SI-CHECK: DS_READ_I8
 ; SI-CHECK: DS_READ_I8
 ; SI-CHECK: DS_READ_I8
@@ -546,6 +552,7 @@ entry:
 ; R600-CHECK: LDS_USHORT_READ_RET
 ; SI-CHECK-LABEL: @load_i16_local
 ; SI-CHECK-NOT: S_WQM_B64
+; SI-CHECK: S_MOV_B32 m0
 ; SI-CHECK: DS_READ_U16
 define void @load_i16_local(i32 addrspace(1)* %out, i16 addrspace(3)* %in) {
 entry:
@@ -560,6 +567,7 @@ entry:
 ; R600-CHECK: ASHR
 ; SI-CHECK-LABEL: @load_i16_sext_local
 ; SI-CHECK-NOT: S_WQM_B64
+; SI-CHECK: S_MOV_B32 m0
 ; SI-CHECK: DS_READ_I16
 define void @load_i16_sext_local(i32 addrspace(1)* %out, i16 addrspace(3)* %in) {
 entry:
@@ -574,6 +582,7 @@ entry:
 ; R600-CHECK: LDS_USHORT_READ_RET
 ; SI-CHECK-LABEL: @load_v2i16_local
 ; SI-CHECK-NOT: S_WQM_B64
+; SI-CHECK: S_MOV_B32 m0
 ; SI-CHECK: DS_READ_U16
 ; SI-CHECK: DS_READ_U16
 define void @load_v2i16_local(<2 x i32> addrspace(1)* %out, <2 x i16> addrspace(3)* %in) {
@@ -591,6 +600,7 @@ entry:
 ; R600-CHECK-DAG: ASHR
 ; SI-CHECK-LABEL: @load_v2i16_sext_local
 ; SI-CHECK-NOT: S_WQM_B64
+; SI-CHECK: S_MOV_B32 m0
 ; SI-CHECK: DS_READ_I16
 ; SI-CHECK: DS_READ_I16
 define void @load_v2i16_sext_local(<2 x i32> addrspace(1)* %out, <2 x i16> addrspace(3)* %in) {
@@ -608,6 +618,7 @@ entry:
 ; R600-CHECK: LDS_USHORT_READ_RET
 ; SI-CHECK-LABEL: @load_v4i16_local
 ; SI-CHECK-NOT: S_WQM_B64
+; SI-CHECK: S_MOV_B32 m0
 ; SI-CHECK: DS_READ_U16
 ; SI-CHECK: DS_READ_U16
 ; SI-CHECK: DS_READ_U16
@@ -631,6 +642,7 @@ entry:
 ; R600-CHECK-DAG: ASHR
 ; SI-CHECK-LABEL: @load_v4i16_sext_local
 ; SI-CHECK-NOT: S_WQM_B64
+; SI-CHECK: S_MOV_B32 m0
 ; SI-CHECK: DS_READ_I16
 ; SI-CHECK: DS_READ_I16
 ; SI-CHECK: DS_READ_I16
@@ -643,11 +655,12 @@ entry:
   ret void
 }
 
-; load an i32 value from the glocal address space.
+; load an i32 value from the local address space.
 ; R600-CHECK-LABEL: @load_i32_local
 ; R600-CHECK: LDS_READ_RET
 ; SI-CHECK-LABEL: @load_i32_local
 ; SI-CHECK-NOT: S_WQM_B64
+; SI-CHECK: S_MOV_B32 m0
 ; SI-CHECK: DS_READ_B32
 define void @load_i32_local(i32 addrspace(1)* %out, i32 addrspace(3)* %in) {
 entry:
@@ -656,10 +669,11 @@ entry:
   ret void
 }
 
-; load a f32 value from the global address space.
+; load a f32 value from the local address space.
 ; R600-CHECK-LABEL: @load_f32_local
 ; R600-CHECK: LDS_READ_RET
 ; SI-CHECK-LABEL: @load_f32_local
+; SI-CHECK: S_MOV_B32 m0
 ; SI-CHECK: DS_READ_B32
 define void @load_f32_local(float addrspace(1)* %out, float addrspace(3)* %in) {
 entry:
@@ -673,6 +687,7 @@ entry:
 ; R600-CHECK: LDS_READ_RET
 ; R600-CHECK: LDS_READ_RET
 ; SI-CHECK-LABEL: @load_v2f32_local
+; SI-CHECK: S_MOV_B32 m0
 ; SI-CHECK: DS_READ_B32
 ; SI-CHECK: DS_READ_B32
 define void @load_v2f32_local(<2 x float> addrspace(1)* %out, <2 x float> addrspace(3)* %in) {
diff --git a/test/CodeGen/R600/trunc.ll b/test/CodeGen/R600/trunc.ll
index 0bd320ad9ceb..6bbd7f7b510e 100644
--- a/test/CodeGen/R600/trunc.ll
+++ b/test/CodeGen/R600/trunc.ll
@@ -28,3 +28,13 @@ define void @trunc_shl_i64(i32 addrspace(1)* %out, i64 %a) {
   store i32 %result, i32 addrspace(1)* %out, align 4
   ret void
 }
+
+; SI-LABEL: @trunc_i32_to_i1:
+; SI: V_AND_B32
+; SI: V_CMP_EQ_I32
+define void @trunc_i32_to_i1(i32 addrspace(1)* %out, i32 %a) {
+  %trunc = trunc i32 %a to i1
+  %result = select i1 %trunc, i32 1, i32 0
+  store i32 %result, i32 addrspace(1)* %out, align 4
+  ret void
+}
diff --git a/test/CodeGen/R600/vtx-fetch-branch.ll b/test/CodeGen/R600/vtx-fetch-branch.ll
new file mode 100644
index 000000000000..0fc99dee0dbe
--- /dev/null
+++ b/test/CodeGen/R600/vtx-fetch-branch.ll
@@ -0,0 +1,29 @@
+; RUN: llc -march=r600 -mcpu=redwood %s -o - | FileCheck %s
+
+; This tests for a bug where vertex fetch clauses right before an ENDIF
+; instruction where being emitted after the ENDIF.  We were using ALU_POP_AFTER
+; for the ALU clause before the vetex fetch instead of emitting a POP instruction
+; after the fetch clause.
+
+
+; CHECK-LABEL: @test
+; CHECK-NOT: ALU_POP_AFTER
+; CHECK: TEX
+; CHECK-NEXT: POP
+define void @test(i32 addrspace(1)* %out, i32 addrspace(1)* %in, i32 %cond) {
+entry:
+  %0 = icmp eq i32 %cond, 0
+  br i1 %0, label %endif, label %if
+
+if:
+  %1 = load i32 addrspace(1)* %in
+  br label %endif
+
+endif:
+  %x = phi i32 [ %1, %if], [ 0, %entry]
+  store i32 %x, i32 addrspace(1)* %out
+  br label %done
+
+done:
+  ret void
+}
diff --git a/test/CodeGen/R600/zero_extend.ll b/test/CodeGen/R600/zero_extend.ll
index 481b3b328259..a114bfc4a02b 100644
--- a/test/CodeGen/R600/zero_extend.ll
+++ b/test/CodeGen/R600/zero_extend.ll
@@ -16,3 +16,13 @@ entry:
   store i64 %2, i64 addrspace(1)* %out
   ret void
 }
+
+; SI-CHECK-LABEL: @testi1toi32
+; SI-CHECK: V_CNDMASK_B32
+define void @testi1toi32(i32 addrspace(1)* %out, i32 %a, i32 %b) {
+entry:
+  %0 = icmp eq i32 %a, %b
+  %1 = zext i1 %0 to i32
+  store i32 %1, i32 addrspace(1)* %out
+  ret void
+}
diff --git a/test/CodeGen/X86/2009-06-05-VZextByteShort.ll b/test/CodeGen/X86/2009-06-05-VZextByteShort.ll
index 5f5d5cccf714..50c62dfb73b8 100644
--- a/test/CodeGen/X86/2009-06-05-VZextByteShort.ll
+++ b/test/CodeGen/X86/2009-06-05-VZextByteShort.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=x86 -mattr=+mmx,+sse2 > %t1
+; RUN: llc < %s -march=x86 -mcpu=core2 > %t1
 ; RUN: grep movzwl %t1 | count 2
 ; RUN: grep movzbl %t1 | count 1
 ; RUN: grep movd %t1 | count 4
diff --git a/test/CodeGen/X86/bswap-vector.ll b/test/CodeGen/X86/bswap-vector.ll
new file mode 100644
index 000000000000..7a7a8a4ebb18
--- /dev/null
+++ b/test/CodeGen/X86/bswap-vector.ll
@@ -0,0 +1,19 @@
+; RUN: llc < %s -mcpu=core | FileCheck %s
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+declare <2 x i64> @llvm.bswap.v2i64(<2 x i64>)
+
+define <2 x i64> @foo(<2 x i64> %v) #0 {
+entry:
+  %r = call <2 x i64> @llvm.bswap.v2i64(<2 x i64> %v)
+  ret <2 x i64> %r
+}
+
+; CHECK-LABEL: @foo
+; CHECK: bswapq
+; CHECK: bswapq
+; CHECK: ret
+
+attributes #0 = { nounwind uwtable }
+
diff --git a/test/CodeGen/X86/fma4-intrinsics-x86_64.ll b/test/CodeGen/X86/fma4-intrinsics-x86_64.ll
index 7a1a9ae46147..494cb28677a4 100644
--- a/test/CodeGen/X86/fma4-intrinsics-x86_64.ll
+++ b/test/CodeGen/X86/fma4-intrinsics-x86_64.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -march=x86-64 -mattr=+avx,+fma4 | FileCheck %s
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -march=x86-64 -mcpu=corei7-avx -mattr=+fma4 | FileCheck %s
 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=bdver2 -mattr=+avx,-fma | FileCheck %s
 
 ; VFMADD
diff --git a/test/CodeGen/X86/fp-fast.ll b/test/CodeGen/X86/fp-fast.ll
index 07baca84804e..7b08ad67220b 100644
--- a/test/CodeGen/X86/fp-fast.ll
+++ b/test/CodeGen/X86/fp-fast.ll
@@ -1,4 +1,4 @@
-; RUN: llc -march=x86-64 -mattr=+avx,-fma4 -mtriple=x86_64-apple-darwin -enable-unsafe-fp-math < %s | FileCheck %s
+; RUN: llc -march=x86-64 -mcpu=corei7-avx -enable-unsafe-fp-math < %s | FileCheck %s
 
 ; CHECK-LABEL: test1
 define float @test1(float %a) {
diff --git a/test/CodeGen/X86/inline-asm-modifier-q.ll b/test/CodeGen/X86/inline-asm-modifier-q.ll
new file mode 100644
index 000000000000..d20f06d29054
--- /dev/null
+++ b/test/CodeGen/X86/inline-asm-modifier-q.ll
@@ -0,0 +1,12 @@
+; RUN: llc < %s -march=x86  | FileCheck %s
+
+; If the target does not have 64-bit integer registers, emit 32-bit register
+; names.
+
+; CHECK: movq (%e{{[abcd]}}x, %ebx, 4)
+
+define void @q_modifier(i32* %p) {
+entry:
+  tail call void asm sideeffect "movq (${0:q}, %ebx, 4), %mm0", "r,~{dirflag},~{fpsr},~{flags}"(i32* %p)
+  ret void
+}
diff --git a/test/CodeGen/X86/isint.ll b/test/CodeGen/X86/isint.ll
index 4a98e63f38fc..38d05c662bd5 100644
--- a/test/CodeGen/X86/isint.ll
+++ b/test/CodeGen/X86/isint.ll
@@ -1,6 +1,11 @@
-; RUN: llc < %s -march=x86 -mattr=+sse2 | FileCheck %s
+; RUN: llc < %s -mtriple=x86_64-pc-unknown -mattr=+sse2 -mcpu=penryn | FileCheck %s
+; RUN: llc < %s -mtriple=i686-pc-unknown -mattr=+sse2 -mcpu=penryn | FileCheck %s
+
+; PR19059
+; RUN: llc < %s -mtriple=i686-pc-unknown -mattr=+sse2 -mcpu=penryn | FileCheck -check-prefix=CHECK32 %s
 
 define i32 @isint_return(double %d) nounwind {
+; CHECK-LABEL: isint_return:
 ; CHECK-NOT: xor
 ; CHECK: cvt
   %i = fptosi double %d to i32
@@ -8,6 +13,24 @@ define i32 @isint_return(double %d) nounwind {
   %e = sitofp i32 %i to double
 ; CHECK: cmpeqsd
   %c = fcmp oeq double %d, %e
+; CHECK32-NOT: movd {{.*}}, %r{{.*}}
+; CHECK32-NOT: andq
+; CHECK-NEXT: movd
+; CHECK-NEXT: andl
+  %z = zext i1 %c to i32
+  ret i32 %z
+}
+
+define i32 @isint_float_return(float %f) nounwind {
+; CHECK-LABEL: isint_float_return:
+; CHECK-NOT: xor
+; CHECK: cvt
+  %i = fptosi float %f to i32
+; CHECK-NEXT: cvt
+  %g = sitofp i32 %i to float
+; CHECK: cmpeqss
+  %c = fcmp oeq float %f, %g
+; CHECK-NOT: movd {{.*}}, %r{{.*}}
 ; CHECK-NEXT: movd
 ; CHECK-NEXT: andl
   %z = zext i1 %c to i32
@@ -17,6 +40,7 @@ define i32 @isint_return(double %d) nounwind {
 declare void @foo()
 
 define void @isint_branch(double %d) nounwind {
+; CHECK-LABEL: isint_branch:
 ; CHECK: cvt
   %i = fptosi double %d to i32
 ; CHECK-NEXT: cvt
diff --git a/test/CodeGen/X86/pr10420.ll b/test/CodeGen/X86/pr10420.ll
index 3993f24954ee..62951892619b 100644
--- a/test/CodeGen/X86/pr10420.ll
+++ b/test/CodeGen/X86/pr10420.ll
@@ -1,4 +1,9 @@
-; RUN: llc < %s -mtriple=x86_64-apple-macosx -disable-cfi | FileCheck %s
+; RUN: llc < %s -mtriple=x86_64-apple-macosx10.7 -disable-cfi | FileCheck --check-prefix=CHECK-64-D11 %s
+; RUN: llc < %s -mtriple=x86_64-apple-macosx10.6 -disable-cfi | FileCheck %s
+; RUN: llc < %s -mtriple=x86_64-apple-macosx10.5 -disable-cfi | FileCheck --check-prefix=CHECK-64-D89 %s
+; RUN: llc < %s -mtriple=i686-apple-macosx10.6 -disable-cfi | FileCheck --check-prefix=CHECK-I686-D10 %s
+; RUN: llc < %s -mtriple=i686-apple-macosx10.5 -disable-cfi | FileCheck --check-prefix=CHECK-I686-D89 %s
+; RUN: llc < %s -mtriple=i686-apple-macosx10.4 -disable-cfi | FileCheck --check-prefix=CHECK-I686-D89 %s
 
 define private void @foo() {
        ret void
@@ -19,3 +24,44 @@ define void @bar() {
 ; CHECK: Ltmp19:
 ; CHECK-NEXT: Ltmp20 = Ltmp2-Ltmp19                   ## FDE initial location
 ; CHECK-NEXT:         .quad   Ltmp20
+
+
+; CHECK-64-D11: Ltmp13:
+; CHECK-64-D11-NEXT: Ltmp14 = L_foo-Ltmp13                   ## FDE initial location
+; CHECK-64-D11-NEXT:         .quad   Ltmp14
+
+; CHECK-64-D11: Ltmp20:
+; CHECK-64-D11-NEXT: Ltmp21 = Ltmp2-Ltmp20                   ## FDE initial location
+; CHECK-64-D11-NEXT:         .quad   Ltmp21
+
+
+; CHECK-64-D89: Ltmp12:
+; CHECK-64-D89-NEXT: .quad	L_foo-Ltmp12                   ## FDE initial location
+; CHECK-64-D89-NEXT: Ltmp13 = (Ltmp0-L_foo)-0                   ## FDE address range
+; CHECK-64-D89-NEXT:         .quad   Ltmp13
+
+; CHECK-64-D89: Ltmp18:
+; CHECK-64-D89-NEXT: .quad	Ltmp2-Ltmp18                   ## FDE initial location
+; CHECK-64-D89-NEXT: Ltmp19 = (Ltmp4-Ltmp2)-0                   ## FDE address range
+; CHECK-64-D89-NEXT:         .quad   Ltmp19
+
+
+; CHECK-I686-D10: Ltmp12:
+; CHECK-I686-D10-NEXT: Ltmp13 = L_foo-Ltmp12                   ## FDE initial location
+; CHECK-I686-D10-NEXT:         .long   Ltmp13
+
+; CHECK-I686-D10: Ltmp19:
+; CHECK-I686-D10-NEXT: Ltmp20 = Ltmp2-Ltmp19                   ## FDE initial location
+; CHECK-I686-D10-NEXT:         .long   Ltmp20
+
+
+; CHECK-I686-D89: Ltmp12:
+; CHECK-I686-D89-NEXT: .long	L_foo-Ltmp12                   ## FDE initial location
+; CHECK-I686-D89-NEXT: Ltmp13 = (Ltmp0-L_foo)-0                   ## FDE address range
+; CHECK-I686-D89-NEXT:         .long   Ltmp13
+
+; CHECK-I686-D89: Ltmp18:
+; CHECK-I686-D89-NEXT: .long	Ltmp2-Ltmp18                   ## FDE initial location
+; CHECK-I686-D89-NEXT: Ltmp19 = (Ltmp4-Ltmp2)-0                   ## FDE address range
+; CHECK-I686-D89-NEXT:         .long   Ltmp19
+
diff --git a/test/CodeGen/X86/stores-merging.ll b/test/CodeGen/X86/stores-merging.ll
new file mode 100644
index 000000000000..61dea088995b
--- /dev/null
+++ b/test/CodeGen/X86/stores-merging.ll
@@ -0,0 +1,23 @@
+; RUN: llc < %s | FileCheck %s
+
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+%structTy = type { i8, i32, i32 }
+
+@e = common global %structTy zeroinitializer, align 4
+
+; CHECK-LABEL: f
+define void @f() {
+entry:
+
+; CHECK:   movabsq	$528280977409, %rax
+; CHECK:   movq    %rax, e+4(%rip)
+; CHECK:   movl    $456, e+8(%rip)
+
+  store i32 1, i32* getelementptr inbounds (%structTy* @e, i64 0, i32 1), align 4
+  store i32 123, i32* getelementptr inbounds (%structTy* @e, i64 0, i32 2), align 4
+  store i32 456, i32* getelementptr inbounds (%structTy* @e, i64 0, i32 2), align 4
+  ret void
+}
+
diff --git a/test/CodeGen/X86/vaargs.ll b/test/CodeGen/X86/vaargs.ll
new file mode 100644
index 000000000000..ddeb7a336d4a
--- /dev/null
+++ b/test/CodeGen/X86/vaargs.ll
@@ -0,0 +1,67 @@
+; RUN: llc -mcpu=corei7-avx %s -o - | FileCheck %s --check-prefix=CHECK --check-prefix=NO-FLAGS
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-apple-macosx10.9.0"
+
+%struct.__va_list_tag = type { i32, i32, i8*, i8* }
+
+; Check that vastart gets the right thing.
+define i32 @sum(i32 %count, ...) nounwind optsize ssp uwtable {
+; CHECK:      testb   %al, %al
+; CHECK-NEXT: je
+; CHECK-NEXT: ## BB#{{[0-9]+}}:
+; CHECK-NEXT: vmovaps %xmm0, 48(%rsp)
+; CHECK-NEXT: vmovaps %xmm1, 64(%rsp)
+; CHECK-NEXT: vmovaps %xmm2, 80(%rsp)
+; CHECK-NEXT: vmovaps %xmm3, 96(%rsp)
+; CHECK-NEXT: vmovaps %xmm4, 112(%rsp)
+; CHECK-NEXT: vmovaps %xmm5, 128(%rsp)
+; CHECK-NEXT: vmovaps %xmm6, 144(%rsp)
+; CHECK-NEXT: vmovaps %xmm7, 160(%rsp)
+
+; Check that [EFLAGS] hasn't been pulled in.
+; NO-FLAGS-NOT: %flags
+
+  %ap = alloca [1 x %struct.__va_list_tag], align 16
+  %1 = bitcast [1 x %struct.__va_list_tag]* %ap to i8*
+  call void @llvm.va_start(i8* %1)
+  %2 = icmp sgt i32 %count, 0
+  br i1 %2, label %.lr.ph, label %._crit_edge
+
+.lr.ph:                                           ; preds = %0
+  %3 = getelementptr inbounds [1 x %struct.__va_list_tag]* %ap, i64 0, i64 0, i32 0
+  %4 = getelementptr inbounds [1 x %struct.__va_list_tag]* %ap, i64 0, i64 0, i32 2
+  %.pre = load i32* %3, align 16
+  br label %5
+
+; <label>:5                                       ; preds = %.lr.ph, %13
+  %6 = phi i32 [ %.pre, %.lr.ph ], [ %14, %13 ]
+  %.01 = phi i32 [ %count, %.lr.ph ], [ %15, %13 ]
+  %7 = icmp ult i32 %6, 41
+  br i1 %7, label %8, label %10
+
+; <label>:8                                       ; preds = %5
+  %9 = add i32 %6, 8
+  store i32 %9, i32* %3, align 16
+  br label %13
+
+; <label>:10                                      ; preds = %5
+  %11 = load i8** %4, align 8
+  %12 = getelementptr i8* %11, i64 8
+  store i8* %12, i8** %4, align 8
+  br label %13
+
+; <label>:13                                      ; preds = %10, %8
+  %14 = phi i32 [ %6, %10 ], [ %9, %8 ]
+  %15 = add nsw i32 %.01, 1
+  %16 = icmp sgt i32 %15, 0
+  br i1 %16, label %5, label %._crit_edge
+
+._crit_edge:                                      ; preds = %13, %0
+  %.0.lcssa = phi i32 [ %count, %0 ], [ %15, %13 ]
+  call void @llvm.va_end(i8* %1)
+  ret i32 %.0.lcssa
+}
+
+declare void @llvm.va_start(i8*) nounwind
+
+declare void @llvm.va_end(i8*) nounwind
diff --git a/test/CodeGen/X86/vastart-defs-eflags.ll b/test/CodeGen/X86/vastart-defs-eflags.ll
new file mode 100644
index 000000000000..6017753fc8fd
--- /dev/null
+++ b/test/CodeGen/X86/vastart-defs-eflags.ll
@@ -0,0 +1,23 @@
+; RUN: llc %s -o - | FileCheck %s
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-apple-macosx10.10.0"
+
+; Check that vastart handling doesn't get between testb and je for the branch.
+define i32 @check_flag(i32 %flags, ...) nounwind {
+entry:
+; CHECK: {{^}} testb $2, %bh
+; CHECK-NOT: test
+; CHECK: {{^}} je
+  %and = and i32 %flags, 512
+  %tobool = icmp eq i32 %and, 0
+  br i1 %tobool, label %if.end, label %if.then
+
+if.then:                                          ; preds = %entry
+  br label %if.end
+
+if.end:                                           ; preds = %entry, %if.then
+  %hasflag = phi i32 [ 1, %if.then ], [ 0, %entry ]
+  ret i32 %hasflag
+}
+
diff --git a/test/CodeGen/X86/vec_shift4.ll b/test/CodeGen/X86/vec_shift4.ll
index e2fe45cf9724..b266a6987557 100644
--- a/test/CodeGen/X86/vec_shift4.ll
+++ b/test/CodeGen/X86/vec_shift4.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=x86 -mattr=+sse4.1 | FileCheck %s
+; RUN: llc < %s -march=x86 -mcpu=corei7 | FileCheck %s
 
 define <2 x i64> @shl1(<4 x i32> %r, <4 x i32> %a) nounwind readnone ssp {
 entry:
diff --git a/test/CodeGen/X86/vshift-4.ll b/test/CodeGen/X86/vshift-4.ll
index 4363cd9399cf..a060cf803727 100644
--- a/test/CodeGen/X86/vshift-4.ll
+++ b/test/CodeGen/X86/vshift-4.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=x86 -mattr=+sse2 | FileCheck %s
+; RUN: llc < %s -march=x86 -mcpu=core2 | FileCheck %s
 
 ; test vector shifts converted to proper SSE2 vector shifts when the shift
 ; amounts are the same when using a shuffle splat.
diff --git a/test/CodeGen/X86/weak_def_can_be_hidden.ll b/test/CodeGen/X86/weak_def_can_be_hidden.ll
index f78f3571cec9..22aa135e65e0 100644
--- a/test/CodeGen/X86/weak_def_can_be_hidden.ll
+++ b/test/CodeGen/X86/weak_def_can_be_hidden.ll
@@ -1,9 +1,16 @@
-; RUN: llc -mtriple=x86_64-apple-darwin  -O0 < %s | FileCheck %s
+; RUN: llc -mtriple=x86_64-apple-darwin11 -O0 < %s | FileCheck %s
+; RUN: llc -mtriple=x86_64-apple-darwin10 -O0 < %s | FileCheck %s
+; RUN: llc -mtriple=x86_64-apple-darwin9 -O0 < %s | FileCheck --check-prefix=CHECK-D89 %s
+; RUN: llc -mtriple=i686-apple-darwin9 -O0 < %s | FileCheck --check-prefix=CHECK-D89 %s
+; RUN: llc -mtriple=i686-apple-darwin8 -O0 < %s | FileCheck --check-prefix=CHECK-D89 %s
 
 @v1 = linkonce_odr global i32 32
 ; CHECK: .globl  _v1
 ; CHECK: .weak_def_can_be_hidden _v1
 
+; CHECK-D89: .globl  _v1
+; CHECK-D89: .weak_definition _v1
+
 define i32 @f1() {
   %x = load i32 * @v1
   ret i32 %x
@@ -13,10 +20,16 @@ define i32 @f1() {
 ; CHECK: .globl  _v2
 ; CHECK: .weak_definition _v2
 
+; CHECK-D89: .globl  _v2
+; CHECK-D89: .weak_definition _v2
+
 @v3 = linkonce_odr unnamed_addr global i32 32
 ; CHECK: .globl  _v3
 ; CHECK: .weak_def_can_be_hidden _v3
 
+; CHECK-D89: .globl  _v3
+; CHECK-D89: .weak_definition _v3
+
 define i32* @f2() {
   ret i32* @v2
 }
diff --git a/test/MC/Disassembler/X86/x86-64.txt b/test/MC/Disassembler/X86/x86-64.txt
index 8c6bc0e2964c..6f072df7e4b7 100644
--- a/test/MC/Disassembler/X86/x86-64.txt
+++ b/test/MC/Disassembler/X86/x86-64.txt
@@ -241,3 +241,27 @@
 
 # CHECK: pextrw $3, %xmm3, (%rax)
 0x66 0x0f 0x3a 0x15 0x18 0x03
+
+# CHECK: $0, 305419896(,%r8)
+0x43 0x80 0x04 0x05 0x78 0x56 0x34 0x12 0x00
+
+# CHECK: $0, 305419896(%r13,%r8)
+0x43 0x80 0x84 0x05 0x78 0x56 0x34 0x12 0x00
+
+# CHECK: $0, 305419896(,%r8)
+0x42 0x80 0x04 0x05 0x78 0x56 0x34 0x12 0x00
+
+# CHECK: $0, 305419896(%rbp,%r8)
+0x42 0x80 0x84 0x05 0x78 0x56 0x34 0x12 0x00
+
+# CHECK: $0, 305419896(,%r12)
+0x42 0x80 0x04 0x25 0x78 0x56 0x34 0x12 0x00
+
+# CHECK: $0, 305419896(%rbp,%r12)
+0x42 0x80 0x84 0x25 0x78 0x56 0x34 0x12 0x00
+
+# CHECK: $0, 305419896
+0x80 0x04 0x25 0x78 0x56 0x34 0x12 0x00
+
+# CHECK: $0, 305419896(%rbp)
+0x80 0x84 0x25 0x78 0x56 0x34 0x12 0x00
diff --git a/test/MC/X86/intel-syntax.s b/test/MC/X86/intel-syntax.s
index 9677da731c17..50f29e0a64c9 100644
--- a/test/MC/X86/intel-syntax.s
+++ b/test/MC/X86/intel-syntax.s
@@ -584,3 +584,12 @@ fsub ST(1)
 fsubr ST(1)
 fdiv ST(1)
 fdivr ST(1)
+
+.bss
+.globl _g0
+.text
+
+// CHECK: movq _g0, %rbx
+// CHECK: movq _g0+8, %rcx
+mov rbx, qword ptr [_g0]
+mov rcx, qword ptr [_g0 + 8]
diff --git a/test/Transforms/InstCombine/vec_extract_var_elt.ll b/test/Transforms/InstCombine/vec_extract_var_elt.ll
index 3c982873e288..f6f9e0134a16 100644
--- a/test/Transforms/InstCombine/vec_extract_var_elt.ll
+++ b/test/Transforms/InstCombine/vec_extract_var_elt.ll
@@ -16,3 +16,11 @@ define void @test (float %b, <8 x float> * %p)  {
   ret void    
 }
 
+; PR18600
+define i32 @test2(i32 %i) {
+  %e = extractelement <4 x i32> bitcast (<2 x i64> <i64 1, i64 2> to <4 x i32>), i32 %i
+  ret i32 %e
+
+; CHECK-LABEL: @test2
+; CHECK: extractelement
+}
diff --git a/test/Transforms/LoopReroll/basic.ll b/test/Transforms/LoopReroll/basic.ll
index 314a14947e3e..3bd6d7ae02e1 100644
--- a/test/Transforms/LoopReroll/basic.ll
+++ b/test/Transforms/LoopReroll/basic.ll
@@ -33,7 +33,7 @@ for.body:                                         ; preds = %for.body, %entry
 ; CHECK: %indvar = phi i32 [ %indvar.next, %for.body ], [ 0, %entry ]
 ; CHECK: %call = tail call i32 @foo(i32 %indvar) #1
 ; CHECK: %indvar.next = add i32 %indvar, 1
-; CHECK: %exitcond1 = icmp eq i32 %indvar.next, 498
+; CHECK: %exitcond1 = icmp eq i32 %indvar, 497
 ; CHECK: br i1 %exitcond1, label %for.end, label %for.body
 
 ; CHECK: ret
@@ -83,7 +83,7 @@ for.body:                                         ; preds = %entry, %for.body
 ; CHECK: %arrayidx = getelementptr inbounds i32* %x, i64 %indvar
 ; CHECK: store i32 %call, i32* %arrayidx, align 4
 ; CHECK: %indvar.next = add i64 %indvar, 1
-; CHECK: %exitcond = icmp eq i64 %indvar.next, 1500
+; CHECK: %exitcond = icmp eq i64 %indvar, 1499
 ; CHECK: br i1 %exitcond, label %for.end, label %for.body
 
 ; CHECK: ret
@@ -131,7 +131,7 @@ for.body:                                         ; preds = %for.body, %entry
 ; CHECK: %arrayidx = getelementptr inbounds i32* %x, i64 %indvars.iv
 ; CHECK: store i32 %call, i32* %arrayidx, align 4
 ; CHECK: %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
-; CHECK: %exitcond1 = icmp eq i64 %indvars.iv.next, 1500
+; CHECK: %exitcond1 = icmp eq i64 %indvars.iv, 1499
 ; CHECK: br i1 %exitcond1, label %for.end, label %for.body
 
 ; CHECK: ret
@@ -213,7 +213,7 @@ for.body:                                         ; preds = %entry, %for.body
 ; CHECK: %add = fadd float %1, %mul
 ; CHECK: store float %add, float* %arrayidx2, align 4
 ; CHECK: %indvar.next = add i64 %indvar, 1
-; CHECK: %exitcond = icmp eq i64 %indvar.next, 3200
+; CHECK: %exitcond = icmp eq i64 %indvar, 3199
 ; CHECK: br i1 %exitcond, label %for.end, label %for.body
 
 ; CHECK: ret
@@ -313,7 +313,7 @@ for.body:                                         ; preds = %entry, %for.body
 ; CHECK: %add = fadd float %2, %mul
 ; CHECK: store float %add, float* %arrayidx4, align 4
 ; CHECK: %indvar.next = add i64 %indvar, 1
-; CHECK: %exitcond = icmp eq i64 %indvar.next, 3200
+; CHECK: %exitcond = icmp eq i64 %indvar, 3199
 ; CHECK: br i1 %exitcond, label %for.end, label %for.body
 
 ; CHECK: ret
diff --git a/test/Transforms/LoopReroll/nonconst_lb.ll b/test/Transforms/LoopReroll/nonconst_lb.ll
new file mode 100644
index 000000000000..a45469bd2393
--- /dev/null
+++ b/test/Transforms/LoopReroll/nonconst_lb.ll
@@ -0,0 +1,152 @@
+; RUN: opt < %s -loop-reroll -S | FileCheck %s
+target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:32:64-v128:32:128-a0:0:32-n32-S32"
+target triple = "thumbv7-none-linux"
+
+;void foo(int *A, int *B, int m, int n) {
+;  for (int i = m; i < n; i+=4) {
+;    A[i+0] = B[i+0] * 4;
+;    A[i+1] = B[i+1] * 4;
+;    A[i+2] = B[i+2] * 4;
+;    A[i+3] = B[i+3] * 4;
+;  }
+;}
+define void @foo(i32* nocapture %A, i32* nocapture readonly %B, i32 %m, i32 %n) {
+entry:
+  %cmp34 = icmp slt i32 %m, %n
+  br i1 %cmp34, label %for.body, label %for.end
+
+for.body:                                         ; preds = %entry, %for.body
+  %i.035 = phi i32 [ %add18, %for.body ], [ %m, %entry ]
+  %arrayidx = getelementptr inbounds i32* %B, i32 %i.035
+  %0 = load i32* %arrayidx, align 4
+  %mul = shl nsw i32 %0, 2
+  %arrayidx2 = getelementptr inbounds i32* %A, i32 %i.035
+  store i32 %mul, i32* %arrayidx2, align 4
+  %add3 = add nsw i32 %i.035, 1
+  %arrayidx4 = getelementptr inbounds i32* %B, i32 %add3
+  %1 = load i32* %arrayidx4, align 4
+  %mul5 = shl nsw i32 %1, 2
+  %arrayidx7 = getelementptr inbounds i32* %A, i32 %add3
+  store i32 %mul5, i32* %arrayidx7, align 4
+  %add8 = add nsw i32 %i.035, 2
+  %arrayidx9 = getelementptr inbounds i32* %B, i32 %add8
+  %2 = load i32* %arrayidx9, align 4
+  %mul10 = shl nsw i32 %2, 2
+  %arrayidx12 = getelementptr inbounds i32* %A, i32 %add8
+  store i32 %mul10, i32* %arrayidx12, align 4
+  %add13 = add nsw i32 %i.035, 3
+  %arrayidx14 = getelementptr inbounds i32* %B, i32 %add13
+  %3 = load i32* %arrayidx14, align 4
+  %mul15 = shl nsw i32 %3, 2
+  %arrayidx17 = getelementptr inbounds i32* %A, i32 %add13
+  store i32 %mul15, i32* %arrayidx17, align 4
+  %add18 = add nsw i32 %i.035, 4
+  %cmp = icmp slt i32 %add18, %n
+  br i1 %cmp, label %for.body, label %for.end
+
+for.end:                                          ; preds = %for.body, %entry
+  ret void
+}
+; CHECK-LABEL: @foo
+; CHECK: for.body.preheader:                               ; preds = %entry
+; CHECK:   %0 = add i32 %n, -1
+; CHECK:   %1 = sub i32 %0, %m
+; CHECK:   %2 = lshr i32 %1, 2
+; CHECK:   %3 = mul i32 %2, 4
+; CHECK:   %4 = add i32 %m, %3
+; CHECK:   %5 = add i32 %4, 3
+; CHECK:   br label %for.body
+
+; CHECK: for.body:                                         ; preds = %for.body, %for.body.preheader
+; CHECK:   %indvar = phi i32 [ %indvar.next, %for.body ], [ 0, %for.body.preheader ]
+; CHECK:   %6 = add i32 %m, %indvar
+; CHECK:   %arrayidx = getelementptr inbounds i32* %B, i32 %6
+; CHECK:   %7 = load i32* %arrayidx, align 4
+; CHECK:   %mul = shl nsw i32 %7, 2
+; CHECK:   %arrayidx2 = getelementptr inbounds i32* %A, i32 %6
+; CHECK:   store i32 %mul, i32* %arrayidx2, align 4
+; CHECK:   %indvar.next = add i32 %indvar, 1
+; CHECK:   %exitcond = icmp eq i32 %6, %5
+; CHECK:   br i1 %exitcond, label %for.end, label %for.body
+
+;void daxpy_ur(int n,float da,float *dx,float *dy)
+;    {
+;    int m = n % 4;
+;    for (int i = m; i < n; i = i + 4)
+;        {
+;        dy[i]   = dy[i]   + da*dx[i];
+;        dy[i+1] = dy[i+1] + da*dx[i+1];
+;        dy[i+2] = dy[i+2] + da*dx[i+2];
+;        dy[i+3] = dy[i+3] + da*dx[i+3];
+;        }
+;    }
+define void @daxpy_ur(i32 %n, float %da, float* nocapture readonly %dx, float* nocapture %dy) {
+entry:
+  %rem = srem i32 %n, 4
+  %cmp55 = icmp slt i32 %rem, %n
+  br i1 %cmp55, label %for.body, label %for.end
+
+for.body:                                         ; preds = %entry, %for.body
+  %i.056 = phi i32 [ %add27, %for.body ], [ %rem, %entry ]
+  %arrayidx = getelementptr inbounds float* %dy, i32 %i.056
+  %0 = load float* %arrayidx, align 4
+  %arrayidx1 = getelementptr inbounds float* %dx, i32 %i.056
+  %1 = load float* %arrayidx1, align 4
+  %mul = fmul float %1, %da
+  %add = fadd float %0, %mul
+  store float %add, float* %arrayidx, align 4
+  %add3 = add nsw i32 %i.056, 1
+  %arrayidx4 = getelementptr inbounds float* %dy, i32 %add3
+  %2 = load float* %arrayidx4, align 4
+  %arrayidx6 = getelementptr inbounds float* %dx, i32 %add3
+  %3 = load float* %arrayidx6, align 4
+  %mul7 = fmul float %3, %da
+  %add8 = fadd float %2, %mul7
+  store float %add8, float* %arrayidx4, align 4
+  %add11 = add nsw i32 %i.056, 2
+  %arrayidx12 = getelementptr inbounds float* %dy, i32 %add11
+  %4 = load float* %arrayidx12, align 4
+  %arrayidx14 = getelementptr inbounds float* %dx, i32 %add11
+  %5 = load float* %arrayidx14, align 4
+  %mul15 = fmul float %5, %da
+  %add16 = fadd float %4, %mul15
+  store float %add16, float* %arrayidx12, align 4
+  %add19 = add nsw i32 %i.056, 3
+  %arrayidx20 = getelementptr inbounds float* %dy, i32 %add19
+  %6 = load float* %arrayidx20, align 4
+  %arrayidx22 = getelementptr inbounds float* %dx, i32 %add19
+  %7 = load float* %arrayidx22, align 4
+  %mul23 = fmul float %7, %da
+  %add24 = fadd float %6, %mul23
+  store float %add24, float* %arrayidx20, align 4
+  %add27 = add nsw i32 %i.056, 4
+  %cmp = icmp slt i32 %add27, %n
+  br i1 %cmp, label %for.body, label %for.end
+
+for.end:                                          ; preds = %for.body, %entry
+  ret void
+}
+
+; CHECK-LABEL: @daxpy_ur
+; CHECK: for.body.preheader:
+; CHECK:   %0 = add i32 %n, -1
+; CHECK:   %1 = sub i32 %0, %rem
+; CHECK:   %2 = lshr i32 %1, 2
+; CHECK:   %3 = mul i32 %2, 4
+; CHECK:   %4 = add i32 %rem, %3
+; CHECK:   %5 = add i32 %4, 3
+; CHECK:   br label %for.body
+
+; CHECK: for.body:
+; CHECK:   %indvar = phi i32 [ %indvar.next, %for.body ], [ 0, %for.body.preheader ]
+; CHECK:   %6 = add i32 %rem, %indvar
+; CHECK:   %arrayidx = getelementptr inbounds float* %dy, i32 %6
+; CHECK:   %7 = load float* %arrayidx, align 4
+; CHECK:   %arrayidx1 = getelementptr inbounds float* %dx, i32 %6
+; CHECK:   %8 = load float* %arrayidx1, align 4
+; CHECK:   %mul = fmul float %8, %da
+; CHECK:   %add = fadd float %7, %mul
+; CHECK:   store float %add, float* %arrayidx, align 4
+; CHECK:   %indvar.next = add i32 %indvar, 1
+; CHECK:   %exitcond = icmp eq i32 %6, %5
+; CHECK:   br i1 %exitcond, label %for.end, label %for.body
diff --git a/test/Transforms/LoopReroll/reduction.ll b/test/Transforms/LoopReroll/reduction.ll
index aed7670b666d..c9991c723ec6 100644
--- a/test/Transforms/LoopReroll/reduction.ll
+++ b/test/Transforms/LoopReroll/reduction.ll
@@ -38,7 +38,7 @@ for.body:                                         ; preds = %entry, %for.body
 ; CHECK: %0 = load i32* %arrayidx, align 4
 ; CHECK: %add = add nsw i32 %0, %r.029
 ; CHECK: %indvar.next = add i64 %indvar, 1
-; CHECK: %exitcond = icmp eq i64 %indvar.next, 400
+; CHECK: %exitcond = icmp eq i64 %indvar, 399
 ; CHECK: br i1 %exitcond, label %for.end, label %for.body
 
 ; CHECK: ret
@@ -83,7 +83,7 @@ for.body:                                         ; preds = %entry, %for.body
 ; CHECK: %0 = load float* %arrayidx, align 4
 ; CHECK: %add = fadd float %0, %r.029
 ; CHECK: %indvar.next = add i64 %indvar, 1
-; CHECK: %exitcond = icmp eq i64 %indvar.next, 400
+; CHECK: %exitcond = icmp eq i64 %indvar, 399
 ; CHECK: br i1 %exitcond, label %for.end, label %for.body
 
 ; CHECK: ret
diff --git a/test/Transforms/LoopStrengthReduce/X86/pr17473.ll b/test/Transforms/LoopStrengthReduce/X86/pr17473.ll
new file mode 100644
index 000000000000..4204abc7ca22
--- /dev/null
+++ b/test/Transforms/LoopStrengthReduce/X86/pr17473.ll
@@ -0,0 +1,67 @@
+; RUN: opt < %s -loop-reduce -S | FileCheck %s
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128-n8:16:32-S128"
+target triple = "x86_64-apple-macosx10.9.0"
+
+; LSR shouldn't normalize IV if it can't be denormalized to the original
+; expression.  In this testcase, the normalized expression was denormalized to
+; an expression different from the original, and we were losing sign extension.
+
+; CHECK:    [[TMP:%[a-z]+]] = trunc i32 {{.*}} to i8
+; CHECK:     {{%[a-z0-9]+}} = sext i8 [[TMP]] to i32
+
+@j = common global i32 0, align 4
+@c = common global i32 0, align 4
+@g = common global i32 0, align 4
+@h = common global i8 0, align 1
+@d = common global i32 0, align 4
+@i = common global i32 0, align 4
+@e = common global i32 0, align 4
+@.str = private unnamed_addr constant [4 x i8] c"%x\0A\00", align 1
+@a = common global i32 0, align 4
+@b = common global i16 0, align 2
+
+; Function Attrs: nounwind optsize ssp uwtable
+define i32 @main() #0 {
+entry:
+  store i8 0, i8* @h, align 1
+  %0 = load i32* @j, align 4
+  %tobool.i = icmp eq i32 %0, 0
+  %1 = load i32* @d, align 4
+  %cmp3 = icmp sgt i32 %1, -1
+  %.lobit = lshr i32 %1, 31
+  %.lobit.not = xor i32 %.lobit, 1
+  br label %for.body
+
+for.body:                                         ; preds = %entry, %fn3.exit
+  %inc9 = phi i8 [ 0, %entry ], [ %inc, %fn3.exit ]
+  %conv = sext i8 %inc9 to i32
+  br i1 %tobool.i, label %fn3.exit, label %land.rhs.i
+
+land.rhs.i:                                       ; preds = %for.body
+  store i32 0, i32* @c, align 4
+  br label %fn3.exit
+
+fn3.exit:                                         ; preds = %for.body, %land.rhs.i
+  %inc = add i8 %inc9, 1
+  %cmp = icmp sgt i8 %inc, -1
+  br i1 %cmp, label %for.body, label %for.end
+
+for.end:                                          ; preds = %fn3.exit
+  %.lobit.not. = select i1 %cmp3, i32 %.lobit.not, i32 0
+  store i32 %conv, i32* @g, align 4
+  store i32 %.lobit.not., i32* @i, align 4
+  store i8 %inc, i8* @h, align 1
+  %conv7 = sext i8 %inc to i32
+  %add = add nsw i32 %conv7, %conv
+  store i32 %add, i32* @e, align 4
+  %call = tail call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([4 x i8]* @.str, i64 0, i64 0), i32 %add) #2
+  ret i32 0
+}
+
+; Function Attrs: nounwind optsize
+declare i32 @printf(i8* nocapture readonly, ...) #1
+
+attributes #0 = { nounwind optsize ssp uwtable "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #1 = { nounwind optsize "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #2 = { nounwind optsize }
diff --git a/test/Transforms/LoopStrengthReduce/pr18165.ll b/test/Transforms/LoopStrengthReduce/pr18165.ll
new file mode 100644
index 000000000000..89adef7bd49d
--- /dev/null
+++ b/test/Transforms/LoopStrengthReduce/pr18165.ll
@@ -0,0 +1,88 @@
+; RUN: opt < %s -loop-reduce -S | FileCheck %s
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128-n8:16:32-S128"
+target triple = "x86_64-apple-macosx10.9.0"
+
+; LSR shouldn't reuse IV if the resultant offset is not valid for the operand type.
+; CHECK-NOT: trunc i32 %.ph to i8
+
+%struct.anon = type { i32, i32, i32 }
+
+@c = global i32 1, align 4
+@.str = private unnamed_addr constant [4 x i8] c"%d\0A\00", align 1
+@b = common global i32 0, align 4
+@a = common global %struct.anon zeroinitializer, align 4
+@e = common global %struct.anon zeroinitializer, align 4
+@d = common global i32 0, align 4
+@f = common global i32 0, align 4
+@g = common global i32 0, align 4
+@h = common global i32 0, align 4
+
+; Function Attrs: nounwind optsize ssp uwtable
+define i32 @main() #0 {
+entry:
+  %0 = load i32* getelementptr inbounds (%struct.anon* @a, i64 0, i32 0), align 4, !tbaa !1
+  %tobool7.i = icmp eq i32 %0, 0
+  %.promoted.i = load i32* getelementptr inbounds (%struct.anon* @a, i64 0, i32 2), align 4, !tbaa !6
+  %f.promoted.i = load i32* @f, align 4, !tbaa !7
+  br label %for.body6.i.outer
+
+for.body6.i.outer:                                ; preds = %entry, %lor.end.i
+  %.ph = phi i32 [ %add.i, %lor.end.i ], [ 0, %entry ]
+  %or1512.i.ph = phi i32 [ %or15.i, %lor.end.i ], [ %f.promoted.i, %entry ]
+  %or1410.i.ph = phi i32 [ %or14.i, %lor.end.i ], [ %.promoted.i, %entry ]
+  %p.addr.16.i.ph = phi i8 [ %inc10.i, %lor.end.i ], [ -128, %entry ]
+  br i1 %tobool7.i, label %if.end9.i, label %lbl.loopexit.i
+
+lbl.loopexit.i:                                   ; preds = %for.body6.i.outer, %lbl.loopexit.i
+  br label %lbl.loopexit.i
+
+if.end9.i:                                        ; preds = %for.body6.i.outer
+  %inc10.i = add i8 %p.addr.16.i.ph, 1
+  %tobool12.i = icmp eq i8 %p.addr.16.i.ph, 0
+  br i1 %tobool12.i, label %lor.rhs.i, label %lor.end.i
+
+lor.rhs.i:                                        ; preds = %if.end9.i
+  %1 = load i32* @b, align 4, !tbaa !7
+  %dec.i = add nsw i32 %1, -1
+  store i32 %dec.i, i32* @b, align 4, !tbaa !7
+  %tobool13.i = icmp ne i32 %1, 0
+  br label %lor.end.i
+
+lor.end.i:                                        ; preds = %lor.rhs.i, %if.end9.i
+  %2 = phi i1 [ true, %if.end9.i ], [ %tobool13.i, %lor.rhs.i ]
+  %lor.ext.i = zext i1 %2 to i32
+  %or14.i = or i32 %lor.ext.i, %or1410.i.ph
+  %or15.i = or i32 %or14.i, %or1512.i.ph
+  %add.i = add nsw i32 %.ph, 2
+  %cmp.i = icmp slt i32 %add.i, 21
+  br i1 %cmp.i, label %for.body6.i.outer, label %fn1.exit
+
+fn1.exit:                                         ; preds = %lor.end.i
+  store i32 0, i32* @g, align 4, !tbaa !7
+  store i32 %or14.i, i32* getelementptr inbounds (%struct.anon* @a, i64 0, i32 2), align 4, !tbaa !6
+  store i32 %or15.i, i32* @f, align 4, !tbaa !7
+  store i32 %add.i, i32* getelementptr inbounds (%struct.anon* @e, i64 0, i32 1), align 4, !tbaa !8
+  store i32 0, i32* @h, align 4, !tbaa !7
+  %3 = load i32* @b, align 4, !tbaa !7
+  %call1 = tail call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([4 x i8]* @.str, i64 0, i64 0), i32 %3) #2
+  ret i32 0
+}
+
+; Function Attrs: nounwind optsize
+declare i32 @printf(i8* nocapture readonly, ...) #1
+
+attributes #0 = { nounwind optsize ssp uwtable "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #1 = { nounwind optsize "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #2 = { nounwind optsize }
+
+!llvm.ident = !{!0}
+
+!0 = metadata !{metadata !"clang version 3.5 "}
+!1 = metadata !{metadata !2, metadata !3, i64 0}
+!2 = metadata !{metadata !"", metadata !3, i64 0, metadata !3, i64 4, metadata !3, i64 8}
+!3 = metadata !{metadata !"int", metadata !4, i64 0}
+!4 = metadata !{metadata !"omnipotent char", metadata !5, i64 0}
+!5 = metadata !{metadata !"Simple C/C++ TBAA"}
+!6 = metadata !{metadata !2, metadata !3, i64 8}
+!7 = metadata !{metadata !3, metadata !3, i64 0}
+!8 = metadata !{metadata !2, metadata !3, i64 4}
diff --git a/test/Transforms/LoopVectorize/multi-use-reduction-bug.ll b/test/Transforms/LoopVectorize/multi-use-reduction-bug.ll
new file mode 100644
index 000000000000..5fc5ed55a99d
--- /dev/null
+++ b/test/Transforms/LoopVectorize/multi-use-reduction-bug.ll
@@ -0,0 +1,42 @@
+; RUN: opt -indvars -loop-vectorize -force-vector-width=2 -force-vector-unroll=1 -S < %s | FileCheck %s
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-apple-macosx10.9.0"
+
+; We must not vectorize this loop. %add55 is not reduction. Its value is used
+; multiple times.
+
+; PR18526
+
+; CHECK: multiple_use_of_value
+; CHECK-NOT: <2 x i32>
+
+define void @multiple_use_of_value() {
+entry:
+  %n = alloca i32, align 4
+  %k7 = alloca i32, align 4
+  %nf = alloca i32, align 4
+  %0 = load i32* %k7, align 4
+  %.neg1 = sub i32 0, %0
+  %n.promoted = load i32* %n, align 4
+  %nf.promoted = load i32* %nf, align 4
+  br label %for.body
+
+for.body:
+  %inc107 = phi i32 [ undef, %entry ], [ %inc10, %for.body ]
+  %inc6 = phi i32 [ %nf.promoted, %entry ], [ undef, %for.body ]
+  %add55 = phi i32 [ %n.promoted, %entry ], [ %add5, %for.body ]
+  %.neg2 = sub i32 0, %inc6
+  %add.neg = add i32 0, %add55
+  %add4.neg = add i32 %add.neg, %.neg1
+  %sub = add i32 %add4.neg, %.neg2
+  %add5 = add i32 %sub, %add55
+  %inc10 = add i32 %inc107, 1
+  %cmp = icmp ult i32 %inc10, 61
+  br i1 %cmp, label %for.body, label %for.end
+
+for.end:
+  %add5.lcssa = phi i32 [ %add5, %for.body ]
+  store i32 %add5.lcssa, i32* %n, align 4
+  ret void
+}
diff --git a/tools/llvm-shlib/Makefile b/tools/llvm-shlib/Makefile
index 92f3132ff539..4a0c2ea68df3 100644
--- a/tools/llvm-shlib/Makefile
+++ b/tools/llvm-shlib/Makefile
@@ -10,10 +10,12 @@
 LEVEL := ../..
 
 LIBRARYNAME = LLVM-$(LLVMVersion)
+LIBRARYALIASNAME = LLVM-$(LLVM_VERSION_MAJOR).$(LLVM_VERSION_MINOR)$(LLVM_VERSION_SUFFIX)
 
 NO_BUILD_ARCHIVE := 1
 LINK_LIBS_IN_SHARED := 1
 SHARED_LIBRARY := 1
+SHARED_ALIAS := 1
 
 include $(LEVEL)/Makefile.config
 
diff --git a/utils/release/test-release.sh b/utils/release/test-release.sh
index 91dac4ca7821..90c5f1ab2d08 100755
--- a/utils/release/test-release.sh
+++ b/utils/release/test-release.sh
@@ -26,6 +26,7 @@ Base_url="http://llvm.org/svn/llvm-project"
 Release=""
 Release_no_dot=""
 RC=""
+DOT=""
 Triple=""
 use_gzip="no"
 do_checkout="yes"
@@ -45,6 +46,7 @@ function usage() {
     echo ""
     echo " -release X.Y      The release number to test."
     echo " -rc NUM           The pre-release candidate number."
+    echo " -dot NUM          The dot release to test e.g. X.Y.DOT_NUM [default: 0]"
     echo " -final            The final release candidate."
     echo " -triple TRIPLE    The target triple for this machine."
     echo " -j NUM            Number of compile jobs to run. [default: 3]"
@@ -76,6 +78,13 @@ while [ $# -gt 0 ]; do
         -final | --final )
             RC=final
             ;;
+        -dot | --dot )
+            shift
+            DOT="$1"
+            if [ $DOT -eq 0 ]; then
+                DOT=""
+            fi
+            ;;
         -triple | --triple )
             shift
             Triple="$1"
@@ -137,6 +146,10 @@ while [ $# -gt 0 ]; do
     shift
 done
 
+if [ -n "$DOT" ]; then
+    Release="$Release.$DOT"
+fi
+
 # Check required arguments.
 if [ -z "$Release" ]; then
     echo "error: no release number specified"
@@ -217,12 +230,29 @@ if [ `uname -s` != "Darwin" ]; then
   check_program_exists 'objdump'
 fi
 
+function get_svn_tag() {
+    case $1 in
+        # llvm and clang are the only projects currently doing dot releases.
+        llvm | cfe)
+            if [ -z $DOT ]; then
+                SvnTag="$RC"
+            else
+                SvnTag="dot$DOT-$RC"
+            fi
+            ;;
+        *)
+            SvnTag="$RC"
+            ;;
+    esac
+}
+
 # Make sure that the URLs are valid.
 function check_valid_urls() {
     for proj in $projects ; do
         echo "# Validating $proj SVN URL"
 
-        if ! svn ls $Base_url/$proj/tags/RELEASE_$Release_no_dot/$RC > /dev/null 2>&1 ; then
+        get_svn_tag "$proj"
+        if ! svn ls $Base_url/$proj/tags/RELEASE_$Release_no_dot/$SvnTag > /dev/null 2>&1 ; then
             echo "llvm $Release release candidate $RC doesn't exist!"
             exit 1
         fi
@@ -235,7 +265,8 @@ function export_sources() {
 
     for proj in $projects ; do
         echo "# Exporting $proj $Release-RC$RC sources"
-        if ! svn export -q $Base_url/$proj/tags/RELEASE_$Release_no_dot/$RC $proj.src ; then
+        get_svn_tag "$proj"
+        if ! svn export -q $Base_url/$proj/tags/RELEASE_$Release_no_dot/$SvnTag $proj.src ; then
             echo "error: failed to export $proj project"
             exit 1
         fi