885 files changed, 27685 insertions, 13888 deletions
diff --git a/Makefile b/Makefile
index efbae8ddedaf..670c1743c966 100644
--- a/Makefile
+++ b/Makefile
@@ -69,6 +69,14 @@ ifeq ($(MAKECMDGOALS),install-clang)
   NO_INSTALL = 1
 endif
 
+ifeq ($(MAKECMDGOALS),install-clang-c)
+  DIRS := tools/clang/tools/driver tools/clang/lib/Headers \
+          tools/clang/tools/libclang tools/clang/tools/c-index-test \
+	  tools/clang/include/clang-c
+  OPTIONAL_DIRS :=
+  NO_INSTALL = 1
+endif
+
 ifeq ($(MAKECMDGOALS),clang-only)
   DIRS := $(filter-out tools runtime docs unittests, $(DIRS)) tools/clang
   OPTIONAL_DIRS :=
@@ -110,6 +118,8 @@ cross-compile-build-tools:
 	  ENABLE_COVERAGE=$(ENABLE_COVERAGE) \
 	  DISABLE_ASSERTIONS=$(DISABLE_ASSERTIONS) \
 	  ENABLE_EXPENSIVE_CHECKS=$(ENABLE_EXPENSIVE_CHECKS) \
+	  CFLAGS= \
+	  CXXFLAGS= \
 	) || exit 1;
 endif
 
@@ -143,6 +153,7 @@ clang-only: all
 tools-only: all
 libs-only: all
 install-clang: install
+install-clang-c: install
 install-libs: install
 
 #------------------------------------------------------------------------
diff --git a/Makefile.config.in b/Makefile.config.in
index 1b61f0908a8c..a3384e7af32f 100644
--- a/Makefile.config.in
+++ b/Makefile.config.in
@@ -337,3 +337,7 @@ ENABLE_LLVMC_DYNAMIC_PLUGINS = 1
 NO_MISSING_FIELD_INITIALIZERS = @NO_MISSING_FIELD_INITIALIZERS@
 # -Wno-variadic-macros
 NO_VARIADIC_MACROS = @NO_VARIADIC_MACROS@
+
+# Flags supported by the linker.
+# bfd ld / gold -retain-symbols-file file
+HAVE_LINK_RETAIN_SYMBOLS_FILE = @HAVE_LINK_RETAIN_SYMBOLS_FILE@
diff --git a/Makefile.rules b/Makefile.rules
index 9a6280bf7f24..d70215e94aae 100644
--- a/Makefile.rules
+++ b/Makefile.rules
@@ -388,7 +388,6 @@ endif
 # If REQUIRES_RTTI=1 is specified then don't disable run-time type id.
 ifneq ($(REQUIRES_RTTI), 1)
   CXX.Flags += -fno-rtti
-  CXXFLAGS += -fno-rtti
 endif
 
 ifdef ENABLE_COVERAGE
@@ -561,7 +560,7 @@ ifeq ($(HOST_OS),Darwin)
   # Get "4" out of 10.4 for later pieces in the makefile.
   DARWIN_MAJVERS := $(shell echo $(DARWIN_VERSION)| sed -E 's/10.([0-9]).*/\1/')
 
-  SharedLinkOptions=-Wl,-flat_namespace -Wl,-undefined -Wl,suppress \
+  SharedLinkOptions=-Wl,-flat_namespace -Wl,-undefined,suppress \
                     -dynamiclib
   ifneq ($(ARCH),ARM)
     SharedLinkOptions += -mmacosx-version-min=$(DARWIN_VERSION)
@@ -581,13 +580,6 @@ ifeq ($(TARGET_OS),Darwin)
   endif
 endif
 
-# Adjust LD.Flags depending on the kind of library that is to be built. Note
-# that if LOADABLE_MODULE is specified then the resulting shared library can
-# be opened with dlopen.
-ifdef LOADABLE_MODULE
-  LD.Flags += -module
-endif
-
 ifdef SHARED_LIBRARY
 ifneq ($(DARWIN_MAJVERS),4)
   LD.Flags += $(RPATH) -Wl,$(LibDir)
@@ -640,6 +632,8 @@ CompileCommonOpts += -pedantic -Wno-long-long
 endif
 CompileCommonOpts += -Wall -W -Wno-unused-parameter -Wwrite-strings \
                      $(EXTRA_OPTIONS)
+# Enable cast-qual for C++; the workaround is to use const_cast.
+CXX.Flags += -Wcast-qual
 
 ifeq ($(HOST_OS),HP-UX)
   CompileCommonOpts := -D_REENTRANT -D_HPUX_SOURCE
@@ -970,6 +964,36 @@ endif
 endif
 endif
 
+# Set up the library exports file.
+ifdef EXPORTED_SYMBOL_FILE
+
+# First, set up the native export file, which may differ from the source
+# export file.
+
+ifeq ($(HOST_OS),Darwin)
+# Darwin convention prefixes symbols with underscores.
+NativeExportsFile := $(ObjDir)/$(notdir $(EXPORTED_SYMBOL_FILE)).sed
+$(NativeExportsFile): $(EXPORTED_SYMBOL_FILE) $(ObjDir)/.dir
+	$(Verb) sed -e 's/[[:<:]]/_/' < $< > $@
+clean-local::
+	-$(Verb) $(RM) -f $(NativeExportsFile)
+else
+NativeExportsFile := $(EXPORTED_SYMBOL_FILE)
+endif
+
+# Now add the linker command-line options to use the native export file.
+
+ifeq ($(HOST_OS),Darwin)
+LLVMLibsOptions += -Wl,-exported_symbols_list,$(NativeExportsFile)
+endif
+
+# gold, bfd ld, etc.
+ifeq ($(HAVE_LINK_RETAIN_SYMBOLS_FILE),1)
+LLVMLibsOptions += -Wl,-retain-symbols-file,$(NativeExportsFile)
+endif
+
+endif
+
 ###############################################################################
 # Library Build Rules: Four ways to build a library
 ###############################################################################
@@ -1060,6 +1084,10 @@ ifdef SHARED_LIBRARY
 
 all-local:: $(LibName.SO)
 
+ifdef EXPORTED_SYMBOL_FILE
+$(LibName.SO): $(NativeExportsFile)
+endif
+
 ifdef LINK_LIBS_IN_SHARED
 ifdef LOADABLE_MODULE
 SharedLibKindMessage := "Loadable Module"
@@ -1207,6 +1235,12 @@ install-local::
 uninstall-local::
 	$(Echo) Uninstall circumvented with NO_INSTALL
 else
+ifdef NO_INSTALL_ARCHIVES
+install-local::
+	$(Echo) Install circumvented with NO_INSTALL
+uninstall-local::
+	$(Echo) Uninstall circumvented with NO_INSTALL
+else
 DestArchiveLib := $(DESTDIR)$(PROJ_libdir)/lib$(LIBRARYNAME).a
 
 install-local:: $(DestArchiveLib)
@@ -1221,6 +1255,7 @@ uninstall-local::
 	-$(Verb) $(RM) -f $(DestArchiveLib)
 endif
 endif
+endif
 
 # endif LIBRARYNAME
 endif
@@ -1262,7 +1297,7 @@ ifeq ($(HOST_OS),Darwin)
 
 # Tiger tools don't support this.
 ifneq ($(DARWIN_MAJVERS),4)
-LD.Flags += -Wl,-exported_symbol -Wl,_main
+LD.Flags += -Wl,-exported_symbol,_main
 endif
 endif
 
@@ -1331,7 +1366,7 @@ DestToolAlias = $(DESTDIR)$(PROJ_bindir)/$(TOOLALIAS)$(EXEEXT)
 
 install-local:: $(DestToolAlias)
 
-$(DestToolAlias): $(DestTool) $(PROJ_bindir)
+$(DestToolAlias): $(DestTool)
 	$(Echo) Installing $(BuildMode) $(DestToolAlias)
 	$(Verb) $(RM) -f $(DestToolAlias)
 	$(Verb) $(AliasTool) $(TOOLEXENAME) $(DestToolAlias)
diff --git a/autoconf/configure.ac b/autoconf/configure.ac
index 04c0886ab78d..3b4019658ccd 100644
--- a/autoconf/configure.ac
+++ b/autoconf/configure.ac
@@ -159,6 +159,11 @@ AC_CACHE_CHECK([type of operating system we're going to host on],
     llvm_cv_no_link_all_option="-Wl,-noall_load"
     llvm_cv_os_type="Darwin"
     llvm_cv_platform_type="Unix" ;;
+  *-*-minix*)
+    llvm_cv_link_all_option="-Wl,-all_load"
+    llvm_cv_no_link_all_option="-Wl,-noall_load"
+    llvm_cv_os_type="Minix"
+    llvm_cv_platform_type="Unix" ;;
   *-*-freebsd*)
     llvm_cv_link_all_option="-Wl,--whole-archive"
     llvm_cv_no_link_all_option="-Wl,--no-whole-archive"
@@ -247,6 +252,8 @@ AC_CACHE_CHECK([type of operating system we're going to target],
     llvm_cv_target_os_type="Cygwin" ;;
   *-*-darwin*)
     llvm_cv_target_os_type="Darwin" ;;
+  *-*-minix*)
+    llvm_cv_target_os_type="Minix" ;;
   *-*-freebsd*)
     llvm_cv_target_os_type="FreeBSD" ;;
   *-*-openbsd*)
@@ -721,8 +728,9 @@ AC_MSG_CHECKING([optimization flags])
 case "$withval" in
   default)
     case "$llvm_cv_os_type" in
-    MingW) optimize_option=-O3 ;;
-    *)     optimize_option=-O2 ;;
+    FreeBSD) optimize_option=-O2 ;;
+    MingW) optimize_option=-O2 ;;
+    *)     optimize_option=-O3 ;;
     esac ;;
   *) optimize_option="$withval" ;;
 esac
@@ -1015,6 +1023,9 @@ AC_LINK_USE_R
 dnl Determine whether the linker supports the -export-dynamic option.
 AC_LINK_EXPORT_DYNAMIC
 
+dnl Determine whether the linker supports the -retain-symbols-file option.
+AC_LINK_RETAIN_SYMBOLS_FILE
+
 dnl Check for libtool and the library that has dlopen function (which must come
 dnl before the AC_PROG_LIBTOOL check in order to enable dlopening libraries with
 dnl libtool).
@@ -1283,7 +1294,7 @@ AC_CHECK_FUNCS([backtrace ceilf floorf roundf rintf nearbyintf getcwd ])
 AC_CHECK_FUNCS([powf fmodf strtof round ])
 AC_CHECK_FUNCS([getpagesize getrusage getrlimit setrlimit gettimeofday ])
 AC_CHECK_FUNCS([isatty mkdtemp mkstemp ])
-AC_CHECK_FUNCS([mktemp realpath sbrk setrlimit strdup ])
+AC_CHECK_FUNCS([mktemp posix_spawn realpath sbrk setrlimit strdup ])
 AC_CHECK_FUNCS([strerror strerror_r strerror_s setenv ])
 AC_CHECK_FUNCS([strtoll strtoq sysconf malloc_zone_statistics ])
 AC_CHECK_FUNCS([setjmp longjmp sigsetjmp siglongjmp])
diff --git a/autoconf/m4/link_options.m4 b/autoconf/m4/link_options.m4
index 66036de433d3..697abab07d4d 100644
--- a/autoconf/m4/link_options.m4
+++ b/autoconf/m4/link_options.m4
@@ -8,7 +8,7 @@ AC_DEFUN([AC_LINK_USE_R],
 [ AC_LANG_PUSH([C])
   oldcflags="$CFLAGS"
   CFLAGS="$CFLAGS -Wl,-R."
-  AC_LINK_IFELSE([AC_LANG_PROGRAM([[]],[[int main() { return 0; }]])],
+  AC_LINK_IFELSE([AC_LANG_PROGRAM([[]],[[]])],
     [llvm_cv_link_use_r=yes],[llvm_cv_link_use_r=no])
   CFLAGS="$oldcflags"
   AC_LANG_POP([C])
@@ -29,7 +29,7 @@ AC_DEFUN([AC_LINK_EXPORT_DYNAMIC],
 [ AC_LANG_PUSH([C])
   oldcflags="$CFLAGS"
   CFLAGS="$CFLAGS -Wl,-export-dynamic"
-  AC_LINK_IFELSE([AC_LANG_PROGRAM([[]],[[int main() { return 0; }]])],
+  AC_LINK_IFELSE([AC_LANG_PROGRAM([[]],[[]])],
     [llvm_cv_link_use_export_dynamic=yes],[llvm_cv_link_use_export_dynamic=no])
   CFLAGS="$oldcflags"
   AC_LANG_POP([C])
@@ -39,3 +39,46 @@ if test "$llvm_cv_link_use_export_dynamic" = yes ; then
   fi
 ])
 
+#
+# Determine if the system can handle the -retain-symbols-file option being
+# passed to the linker.
+#
+# This macro is specific to LLVM.
+#
+AC_DEFUN([AC_LINK_RETAIN_SYMBOLS_FILE],
+[AC_CACHE_CHECK([for compiler -Wl,-retain-symbols-file option],
+                [llvm_cv_link_use_retain_symbols_file],
+[ AC_LANG_PUSH([C])
+  oldcflags="$CFLAGS"
+
+  # The following code is from the autoconf manual,
+  # "11.13: Limitations of Usual Tools".
+  # Create a temporary directory $tmp in $TMPDIR (default /tmp).
+  # Use mktemp if possible; otherwise fall back on mkdir,
+  # with $RANDOM to make collisions less likely.
+  : ${TMPDIR=/tmp}
+  {
+    tmp=`
+      (umask 077 && mktemp -d "$TMPDIR/fooXXXXXX") 2>/dev/null
+    ` &&
+    test -n "$tmp" && test -d "$tmp"
+  } || {
+    tmp=$TMPDIR/foo$$-$RANDOM
+    (umask 077 && mkdir "$tmp")
+  } || exit $?
+
+  echo "main" > "$tmp/exports"
+
+  CFLAGS="$CFLAGS -Wl,-retain-symbols-file=$tmp/exports"
+  AC_LINK_IFELSE([AC_LANG_PROGRAM([[]],[[]])],
+    [llvm_cv_link_use_retain_symbols_file=yes],[llvm_cv_link_use_retain_symbols_file=no])
+  rm "$tmp/exports"
+  rmdir "$tmp"
+  CFLAGS="$oldcflags"
+  AC_LANG_POP([C])
+])
+if test "$llvm_cv_link_use_retain_symbols_file" = yes ; then
+  AC_SUBST(HAVE_LINK_RETAIN_SYMBOLS_FILE,1)
+  fi
+])
+
diff --git a/bindings/ocaml/llvm/llvm.ml b/bindings/ocaml/llvm/llvm.ml
index e801c494713a..7ab6f51efb9f 100644
--- a/bindings/ocaml/llvm/llvm.ml
+++ b/bindings/ocaml/llvm/llvm.ml
@@ -90,13 +90,13 @@ module Attribute = struct
   | Optsize
   | Ssp
   | Sspreq
-  | Alignment
+  | Alignment of int
   | Nocapture
   | Noredzone
   | Noimplicitfloat
   | Naked
   | Inlinehint
-  | Stackalignment
+  | Stackalignment of int
 end
 
 module Icmp = struct
@@ -170,6 +170,8 @@ external delete_type_name : string -> llmodule -> unit
 external type_by_name : llmodule -> string -> lltype option
                       = "llvm_type_by_name"
 external dump_module : llmodule -> unit = "llvm_dump_module"
+external set_module_inline_asm : llmodule -> string -> unit
+                               = "llvm_set_module_inline_asm"
 
 (*===-- Types -------------------------------------------------------------===*)
 external classify_type : lltype -> TypeKind.t = "llvm_classify_type"
@@ -548,10 +550,42 @@ let rec fold_right_function_range f i e init =
 let fold_right_functions f m init =
   fold_right_function_range f (function_end m) (At_start m) init
 
-external add_function_attr : llvalue -> Attribute.t -> unit
-                           = "llvm_add_function_attr"
-external remove_function_attr : llvalue -> Attribute.t -> unit
-                              = "llvm_remove_function_attr"
+external llvm_add_function_attr : llvalue -> int -> unit
+                                = "llvm_add_function_attr"
+external llvm_remove_function_attr : llvalue -> int -> unit
+                                   = "llvm_remove_function_attr"
+
+let pack_attr (attr:Attribute.t) : int =
+  match attr with
+      Attribute.Zext              -> 1 lsl 0
+    | Attribute.Sext              -> 1 lsl 1
+    | Attribute.Noreturn          -> 1 lsl 2
+    | Attribute.Inreg             -> 1 lsl 3
+    | Attribute.Structret         -> 1 lsl 4
+    | Attribute.Nounwind          -> 1 lsl 5
+    | Attribute.Noalias           -> 1 lsl 6
+    | Attribute.Byval             -> 1 lsl 7
+    | Attribute.Nest              -> 1 lsl 8
+    | Attribute.Readnone          -> 1 lsl 9
+    | Attribute.Readonly          -> 1 lsl 10
+    | Attribute.Noinline          -> 1 lsl 11
+    | Attribute.Alwaysinline      -> 1 lsl 12
+    | Attribute.Optsize           -> 1 lsl 13
+    | Attribute.Ssp               -> 1 lsl 14
+    | Attribute.Sspreq            -> 1 lsl 15
+    | Attribute.Alignment n       -> n lsl 16
+    | Attribute.Nocapture         -> 1 lsl 21
+    | Attribute.Noredzone         -> 1 lsl 22
+    | Attribute.Noimplicitfloat   -> 1 lsl 23
+    | Attribute.Naked             -> 1 lsl 24
+    | Attribute.Inlinehint        -> 1 lsl 25
+    | Attribute.Stackalignment n  -> n lsl 26
+
+let add_function_attr llval attr =
+  llvm_add_function_attr llval (pack_attr attr)
+
+let remove_function_attr llval attr =
+  llvm_remove_function_attr llval (pack_attr attr)
 
 (*--... Operations on params ...............................................--*)
 external params : llvalue -> llvalue array = "llvm_params"
@@ -602,10 +636,17 @@ let rec fold_right_param_range f init i e =
 let fold_right_params f fn init =
   fold_right_param_range f init (param_end fn) (At_start fn)
 
-external add_param_attr : llvalue -> Attribute.t -> unit
-                        = "llvm_add_param_attr"
-external remove_param_attr : llvalue -> Attribute.t -> unit
-                           = "llvm_remove_param_attr"
+external llvm_add_param_attr : llvalue -> int -> unit
+                                = "llvm_add_param_attr"
+external llvm_remove_param_attr : llvalue -> int -> unit
+                                = "llvm_remove_param_attr"
+
+let add_param_attr llval attr =
+  llvm_add_param_attr llval (pack_attr attr)
+
+let remove_param_attr llval attr =
+  llvm_remove_param_attr llval (pack_attr attr)
+
 external set_param_alignment : llvalue -> int -> unit
                              = "llvm_set_param_alignment"
 
@@ -727,10 +768,17 @@ external instruction_call_conv: llvalue -> int
                               = "llvm_instruction_call_conv"
 external set_instruction_call_conv: int -> llvalue -> unit
                                   = "llvm_set_instruction_call_conv"
-external add_instruction_param_attr : llvalue -> int -> Attribute.t -> unit
-                                    = "llvm_add_instruction_param_attr"
-external remove_instruction_param_attr : llvalue -> int -> Attribute.t -> unit
-                                       = "llvm_remove_instruction_param_attr"
+
+external llvm_add_instruction_param_attr : llvalue -> int -> int -> unit
+                                         = "llvm_add_instruction_param_attr"
+external llvm_remove_instruction_param_attr : llvalue -> int -> int -> unit
+                                         = "llvm_remove_instruction_param_attr"
+
+let add_instruction_param_attr llval i attr =
+  llvm_add_instruction_param_attr llval i (pack_attr attr)
+
+let remove_instruction_param_attr llval i attr =
+  llvm_remove_instruction_param_attr llval i (pack_attr attr)
 
 (*--... Operations on call instructions (only) .............................--*)
 external is_tail_call : llvalue -> bool = "llvm_is_tail_call"
diff --git a/bindings/ocaml/llvm/llvm.mli b/bindings/ocaml/llvm/llvm.mli
index 4b0c06da03e4..742265cd3d5c 100644
--- a/bindings/ocaml/llvm/llvm.mli
+++ b/bindings/ocaml/llvm/llvm.mli
@@ -139,13 +139,13 @@ module Attribute : sig
   | Optsize
   | Ssp
   | Sspreq
-  | Alignment
+  | Alignment of int
   | Nocapture
   | Noredzone
   | Noimplicitfloat
   | Naked
   | Inlinehint
-  | Stackalignment
+  | Stackalignment of int
 end
 
 (** The predicate for an integer comparison ([icmp]) instruction.
@@ -284,6 +284,11 @@ external type_by_name : llmodule -> string -> lltype option
     error. See the method [llvm::Module::dump]. *)
 external dump_module : llmodule -> unit = "llvm_dump_module"
 
+(** [set_module_inline_asm m asm] sets the inline assembler for the module. See
+    the method [llvm::Module::setModuleInlineAsm]. *)
+external set_module_inline_asm : llmodule -> string -> unit
+                               = "llvm_set_module_inline_asm"
+
 
 (** {6 Types} *)
 
@@ -1282,13 +1287,11 @@ external set_gc : string option -> llvalue -> unit = "llvm_set_gc"
 
 (** [add_function_attr f a] adds attribute [a] to the return type of function
     [f]. *)
-external add_function_attr : llvalue -> Attribute.t -> unit
-                           = "llvm_add_function_attr"
+val add_function_attr : llvalue -> Attribute.t -> unit
 
 (** [remove_function_attr f a] removes attribute [a] from the return type of
     function [f]. *)
-external remove_function_attr : llvalue -> Attribute.t -> unit
-                              = "llvm_remove_function_attr"
+val remove_function_attr : llvalue -> Attribute.t -> unit
 
 (** {7 Operations on params} *)
 
@@ -1343,11 +1346,10 @@ val rev_iter_params : (llvalue -> unit) -> llvalue -> unit
 val fold_right_params : (llvalue -> 'a -> 'a) -> llvalue -> 'a -> 'a
 
 (** [add_param p a] adds attribute [a] to parameter [p]. *)
-external add_param_attr : llvalue -> Attribute.t -> unit = "llvm_add_param_attr"
+val add_param_attr : llvalue -> Attribute.t -> unit
 
 (** [remove_param_attr p a] removes attribute [a] from parameter [p]. *)
-external remove_param_attr : llvalue -> Attribute.t -> unit
-                           = "llvm_remove_param_attr"
+val remove_param_attr : llvalue -> Attribute.t -> unit
 
 (** [set_param_alignment p a] set the alignment of parameter [p] to [a]. *)
 external set_param_alignment : llvalue -> int -> unit
@@ -1499,14 +1501,12 @@ external set_instruction_call_conv: int -> llvalue -> unit
 (** [add_instruction_param_attr ci i a] adds attribute [a] to the [i]th
     parameter of the call or invoke instruction [ci]. [i]=0 denotes the return
     value. *)
-external add_instruction_param_attr : llvalue -> int -> Attribute.t -> unit
-  = "llvm_add_instruction_param_attr"
+val add_instruction_param_attr : llvalue -> int -> Attribute.t -> unit
 
 (** [remove_instruction_param_attr ci i a] removes attribute [a] from the
     [i]th parameter of the call or invoke instruction [ci]. [i]=0 denotes the
     return value. *)
-external remove_instruction_param_attr : llvalue -> int -> Attribute.t -> unit
-  = "llvm_remove_instruction_param_attr"
+val remove_instruction_param_attr : llvalue -> int -> Attribute.t -> unit
 
 (** {Operations on call instructions (only)} *)
 
diff --git a/bindings/ocaml/llvm/llvm_ocaml.c b/bindings/ocaml/llvm/llvm_ocaml.c
index d526a05a5109..c4355ba2dbf1 100644
--- a/bindings/ocaml/llvm/llvm_ocaml.c
+++ b/bindings/ocaml/llvm/llvm_ocaml.c
@@ -182,6 +182,11 @@ CAMLprim value llvm_dump_module(LLVMModuleRef M) {
   return Val_unit;
 }
 
+/* llmodule -> string -> unit */
+CAMLprim value llvm_set_module_inline_asm(LLVMModuleRef M, value Asm) {
+  LLVMSetModuleInlineAsm(M, String_val(Asm));
+  return Val_unit;
+}
 
 /*===-- Types -------------------------------------------------------------===*/
 
@@ -941,13 +946,13 @@ CAMLprim value llvm_set_gc(value GC, LLVMValueRef Fn) {
 
 /* llvalue -> Attribute.t -> unit */
 CAMLprim value llvm_add_function_attr(LLVMValueRef Arg, value PA) {
-  LLVMAddFunctionAttr(Arg, 1<<Int_val(PA));
+  LLVMAddFunctionAttr(Arg, Int_val(PA));
   return Val_unit;
 }
 
 /* llvalue -> Attribute.t -> unit */
 CAMLprim value llvm_remove_function_attr(LLVMValueRef Arg, value PA) {
-  LLVMRemoveFunctionAttr(Arg, 1<<Int_val(PA));
+  LLVMRemoveFunctionAttr(Arg, Int_val(PA));
   return Val_unit;
 }
 /*--... Operations on parameters ...........................................--*/
@@ -968,13 +973,13 @@ CAMLprim value llvm_params(LLVMValueRef Fn, value Index) {
 
 /* llvalue -> Attribute.t -> unit */
 CAMLprim value llvm_add_param_attr(LLVMValueRef Arg, value PA) {
-  LLVMAddAttribute(Arg, 1<<Int_val(PA));
+  LLVMAddAttribute(Arg, Int_val(PA));
   return Val_unit;
 }
 
 /* llvalue -> Attribute.t -> unit */
 CAMLprim value llvm_remove_param_attr(LLVMValueRef Arg, value PA) {
-  LLVMRemoveAttribute(Arg, 1<<Int_val(PA));
+  LLVMRemoveAttribute(Arg, Int_val(PA));
   return Val_unit;
 }
 
@@ -1042,7 +1047,7 @@ CAMLprim value llvm_set_instruction_call_conv(value CC, LLVMValueRef Inst) {
 CAMLprim value llvm_add_instruction_param_attr(LLVMValueRef Instr,
                                                value index,
                                                value PA) {
-  LLVMAddInstrAttribute(Instr, Int_val(index), 1<<Int_val(PA));
+  LLVMAddInstrAttribute(Instr, Int_val(index), Int_val(PA));
   return Val_unit;
 }
 
@@ -1050,7 +1055,7 @@ CAMLprim value llvm_add_instruction_param_attr(LLVMValueRef Instr,
 CAMLprim value llvm_remove_instruction_param_attr(LLVMValueRef Instr,
                                                   value index,
                                                   value PA) {
-  LLVMRemoveInstrAttribute(Instr, Int_val(index), 1<<Int_val(PA));
+  LLVMRemoveInstrAttribute(Instr, Int_val(index), Int_val(PA));
   return Val_unit;
 }
 
diff --git a/configure b/configure
index 2836e9ea32b3..3e5ad9cd5af1 100755
--- a/configure
+++ b/configure
@@ -749,6 +749,7 @@ OCAMLOPT
 OCAMLDEP
 OCAMLDOC
 GAS
+HAVE_LINK_RETAIN_SYMBOLS_FILE
 INSTALL_LTDL_TRUE
 INSTALL_LTDL_FALSE
 CONVENIENCE_LTDL_TRUE
@@ -2166,6 +2167,11 @@ else
     llvm_cv_no_link_all_option="-Wl,-noall_load"
     llvm_cv_os_type="Darwin"
     llvm_cv_platform_type="Unix" ;;
+  *-*-minix*)
+    llvm_cv_link_all_option="-Wl,-all_load"
+    llvm_cv_no_link_all_option="-Wl,-noall_load"
+    llvm_cv_os_type="Minix"
+    llvm_cv_platform_type="Unix" ;;
   *-*-freebsd*)
     llvm_cv_link_all_option="-Wl,--whole-archive"
     llvm_cv_no_link_all_option="-Wl,--no-whole-archive"
@@ -2260,6 +2266,8 @@ else
     llvm_cv_target_os_type="Cygwin" ;;
   *-*-darwin*)
     llvm_cv_target_os_type="Darwin" ;;
+  *-*-minix*)
+    llvm_cv_target_os_type="Minix" ;;
   *-*-freebsd*)
     llvm_cv_target_os_type="FreeBSD" ;;
   *-*-openbsd*)
@@ -5162,8 +5170,9 @@ echo $ECHO_N "checking optimization flags... $ECHO_C" >&6; }
 case "$withval" in
   default)
     case "$llvm_cv_os_type" in
-    MingW) optimize_option=-O3 ;;
-    *)     optimize_option=-O2 ;;
+    FreeBSD) optimize_option=-O2 ;;
+    MingW) optimize_option=-O2 ;;
+    *)     optimize_option=-O3 ;;
     esac ;;
   *) optimize_option="$withval" ;;
 esac
@@ -8626,7 +8635,7 @@ cat >>conftest.$ac_ext <<_ACEOF
 int
 main ()
 {
-int main() { return 0; }
+
   ;
   return 0;
 }
@@ -8718,7 +8727,7 @@ cat >>conftest.$ac_ext <<_ACEOF
 int
 main ()
 {
-int main() { return 0; }
+
   ;
   return 0;
 }
@@ -8787,6 +8796,116 @@ _ACEOF
   fi
 
 
+{ echo "$as_me:$LINENO: checking for compiler -Wl,-retain-symbols-file option" >&5
+echo $ECHO_N "checking for compiler -Wl,-retain-symbols-file option... $ECHO_C" >&6; }
+if test "${llvm_cv_link_use_retain_symbols_file+set}" = set; then
+  echo $ECHO_N "(cached) $ECHO_C" >&6
+else
+   ac_ext=c
+ac_cpp='$CPP $CPPFLAGS'
+ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5'
+ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5'
+ac_compiler_gnu=$ac_cv_c_compiler_gnu
+
+  oldcflags="$CFLAGS"
+
+  # The following code is from the autoconf manual,
+  # "11.13: Limitations of Usual Tools".
+  # Create a temporary directory $tmp in $TMPDIR (default /tmp).
+  # Use mktemp if possible; otherwise fall back on mkdir,
+  # with $RANDOM to make collisions less likely.
+  : ${TMPDIR=/tmp}
+  {
+    tmp=`
+      (umask 077 && mktemp -d "$TMPDIR/fooXXXXXX") 2>/dev/null
+    ` &&
+    test -n "$tmp" && test -d "$tmp"
+  } || {
+    tmp=$TMPDIR/foo$$-$RANDOM
+    (umask 077 && mkdir "$tmp")
+  } || exit $?
+
+  echo "main" > "$tmp/exports"
+
+  CFLAGS="$CFLAGS -Wl,-retain-symbols-file=$tmp/exports"
+  cat >conftest.$ac_ext <<_ACEOF
+/* confdefs.h.  */
+_ACEOF
+cat confdefs.h >>conftest.$ac_ext
+cat >>conftest.$ac_ext <<_ACEOF
+/* end confdefs.h.  */
+
+int
+main ()
+{
+
+  ;
+  return 0;
+}
+_ACEOF
+rm -f conftest.$ac_objext conftest$ac_exeext
+if { (ac_try="$ac_link"
+case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_link") 2>conftest.er1
+  ac_status=$?
+  grep -v '^ *+' conftest.er1 >conftest.err
+  rm -f conftest.er1
+  cat conftest.err >&5
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); } &&
+	 { ac_try='test -z "$ac_c_werror_flag" || test ! -s conftest.err'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; } &&
+	 { ac_try='test -s conftest$ac_exeext'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; }; then
+  llvm_cv_link_use_retain_symbols_file=yes
+else
+  echo "$as_me: failed program was:" >&5
+sed 's/^/| /' conftest.$ac_ext >&5
+
+	llvm_cv_link_use_retain_symbols_file=no
+fi
+
+rm -f core conftest.err conftest.$ac_objext \
+      conftest$ac_exeext conftest.$ac_ext
+  rm "$tmp/exports"
+  rmdir "$tmp"
+  CFLAGS="$oldcflags"
+  ac_ext=c
+ac_cpp='$CPP $CPPFLAGS'
+ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5'
+ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5'
+ac_compiler_gnu=$ac_cv_c_compiler_gnu
+
+
+fi
+{ echo "$as_me:$LINENO: result: $llvm_cv_link_use_retain_symbols_file" >&5
+echo "${ECHO_T}$llvm_cv_link_use_retain_symbols_file" >&6; }
+if test "$llvm_cv_link_use_retain_symbols_file" = yes ; then
+  HAVE_LINK_RETAIN_SYMBOLS_FILE=1
+
+  fi
+
+
 
 
 { echo "$as_me:$LINENO: checking for an ANSI C-conforming const" >&5
@@ -11156,7 +11275,7 @@ else
   lt_dlunknown=0; lt_dlno_uscore=1; lt_dlneed_uscore=2
   lt_status=$lt_dlunknown
   cat > conftest.$ac_ext <<EOF
-#line 11159 "configure"
+#line 11278 "configure"
 #include "confdefs.h"
 
 #if HAVE_DLFCN_H
@@ -17749,7 +17868,8 @@ done
 
 
 
-for ac_func in mktemp realpath sbrk setrlimit strdup
+
+for ac_func in mktemp posix_spawn realpath sbrk setrlimit strdup
 do
 as_ac_var=`echo "ac_cv_func_$ac_func" | $as_tr_sh`
 { echo "$as_me:$LINENO: checking for $ac_func" >&5
@@ -21069,6 +21189,7 @@ OCAMLOPT!$OCAMLOPT$ac_delim
 OCAMLDEP!$OCAMLDEP$ac_delim
 OCAMLDOC!$OCAMLDOC$ac_delim
 GAS!$GAS$ac_delim
+HAVE_LINK_RETAIN_SYMBOLS_FILE!$HAVE_LINK_RETAIN_SYMBOLS_FILE$ac_delim
 INSTALL_LTDL_TRUE!$INSTALL_LTDL_TRUE$ac_delim
 INSTALL_LTDL_FALSE!$INSTALL_LTDL_FALSE$ac_delim
 CONVENIENCE_LTDL_TRUE!$CONVENIENCE_LTDL_TRUE$ac_delim
@@ -21112,7 +21233,7 @@ LIBOBJS!$LIBOBJS$ac_delim
 LTLIBOBJS!$LTLIBOBJS$ac_delim
 _ACEOF
 
-  if test `sed -n "s/.*$ac_delim\$/X/p" conf$$subs.sed | grep -c X` = 91; then
+  if test `sed -n "s/.*$ac_delim\$/X/p" conf$$subs.sed | grep -c X` = 92; then
     break
   elif $ac_last_try; then
     { { echo "$as_me:$LINENO: error: could not make $CONFIG_STATUS" >&5
diff --git a/docs/CodeGenerator.html b/docs/CodeGenerator.html
index 582e252ea18c..b2c3b83175ca 100644
--- a/docs/CodeGenerator.html
+++ b/docs/CodeGenerator.html
@@ -1430,7 +1430,7 @@ bool RegMapping_Fer::compatible_class(MachineFunction &amp;mf,
    instruction,
    use <tt>TargetInstrInfo::get(opcode)::ImplicitUses</tt>. Pre-colored
    registers impose constraints on any register allocation algorithm. The
-   register allocator must make sure that none of them is been overwritten by
+   register allocator must make sure that none of them are overwritten by
    the values of virtual registers while still alive.</p>
 
 </div>
@@ -2162,7 +2162,7 @@ MOVSX32rm16 -&gt; movsx, 32-bit register, 16-bit memory
 
   <a href="mailto:sabre@nondot.org">Chris Lattner</a><br>
   <a href="http://llvm.org">The LLVM Compiler Infrastructure</a><br>
-  Last modified: $Date: 2010-03-25 01:03:04 +0100 (Thu, 25 Mar 2010) $
+  Last modified: $Date: 2010-04-09 20:39:54 +0200 (Fri, 09 Apr 2010) $
 </address>
 
 </body>
diff --git a/docs/CommandGuide/lli.pod b/docs/CommandGuide/lli.pod
index 6d1e1c65d430..d368bec8660a 100644
--- a/docs/CommandGuide/lli.pod
+++ b/docs/CommandGuide/lli.pod
@@ -145,9 +145,9 @@ Disable fusing of spill code into instructions.
 
 Make the -lowerinvoke pass insert expensive, but correct, EH code.
 
-=item B<-enable-eh> 
+=item B<-jit-enable-eh> 
 
-Exception handling should be emitted.
+Exception handling should be enabled in the just-in-time compiler.
 
 =item B<-join-liveintervals> 
 
diff --git a/docs/GoldPlugin.html b/docs/GoldPlugin.html
index 77a417f5710d..66e099bad0a0 100644
--- a/docs/GoldPlugin.html
+++ b/docs/GoldPlugin.html
@@ -27,7 +27,7 @@
   <p>Building with link time optimization requires cooperation from the
 system linker. LTO support on Linux systems requires that you use
 the <a href="http://sourceware.org/binutils">gold linker</a> which supports
-LTO via plugins. This is the same system used by the upcoming
+LTO via plugins. This is the same mechanism used by the
 <a href="http://gcc.gnu.org/wiki/LinkTimeOptimization">GCC LTO</a>
 project.</p>
   <p>The LLVM gold plugin implements the
@@ -41,10 +41,15 @@ The same plugin can also be used by other tools such as <tt>ar</tt> and
 <div class="doc_section"><a name="build">How to build it</a></div>
 <!--=========================================================================-->
 <div class="doc_text">
-  <p>You need to build gold with plugin support and build the LLVMgold
-plugin.</p>
+  <p>You need to have gold with plugin support and build the LLVMgold
+plugin. Check whether you have gold running <tt>/usr/bin/ld -v</tt>. It will
+report &#8220;GNU gold&#8221; or else &#8220GNU ld&#8221; if not. If you have
+gold, check for plugin support by running <tt>/usr/bin/ld -plugin</tt>. If it
+complains &#8220missing argument&#8221 then you have plugin support. If not,
+such as an &#8220;unknown option&#8221; error then you will either need to
+build gold or install a version with plugin support.</p>
 <ul>
-  <li>Build gold with plugin support:
+  <li>To build gold with plugin support:
     <pre class="doc_code">
 mkdir binutils
 cd binutils
@@ -56,9 +61,11 @@ cd build
 ../src/configure --enable-gold --enable-plugins
 make all-gold
 </pre>
-    That should leave you with binutils/build/gold/ld-new which supports the
-<tt>-plugin</tt> option.
-
+    That should leave you with <tt>binutils/build/gold/ld-new</tt> which supports the <tt>-plugin</tt> option. It also built would have
+<tt>binutils/build/binutils/ar</tt> and <tt>nm-new</tt> which support plugins
+but don't have a visible -plugin option, instead relying on the gold plugin
+being present in <tt>../lib/bfd-plugins</tt> relative to where the binaries are
+placed.
     <li>Build the LLVMgold plugin: Configure LLVM with
     <tt>--with-binutils-include=/path/to/binutils/src/include</tt> and run
     <tt>make</tt>.
@@ -72,7 +79,7 @@ make all-gold
   the plugin <tt>.so</tt> file. To find out what link command <tt>gcc</tt>
   would run in a given situation, run <tt>gcc -v <em>[...]</em></tt> and look
   for the line where it runs <tt>collect2</tt>. Replace that with
-  <tt>ld-new -plugin /path/to/LLVMgold.so</tt> to test it out. Once you're
+  <tt>ld-new -plugin /path/to/libLLVMgold.so</tt> to test it out. Once you're
   ready to switch to using gold, backup your existing <tt>/usr/bin/ld</tt>
   then replace it with <tt>ld-new</tt>.</p>
   <p>You can produce bitcode files from <tt>llvm-gcc</tt> using
@@ -83,6 +90,11 @@ make all-gold
   passes the <tt>-plugin</tt> option to ld. It will not look for an alternate
   linker, which is why you need gold to be the installed system linker in your
   path.</p>
+  <p>If you want <tt>ar</tt> and <tt>nm</tt> to work seamlessly as well, install
+  <tt>libLLVMgold.so</tt> to <tt>/usr/lib/bfd-plugins</tt>. If you built your
+  own gold, be sure to install the <tt>ar</tt> and <tt>nm-new</tt> you built to
+  <tt>/usr/bin</tt>.
+  <p>
 </div>
 
 <!-- ======================================================================= -->
@@ -141,8 +153,9 @@ $ llvm-gcc -use-gold-plugin a.a b.o -o main # &lt;-- link with LLVMgold plugin
 <div class="doc_section"><a name="lto_autotools">Quickstart for using LTO with autotooled projects</a></div>
 <!--=========================================================================-->
 <div class="doc_text">
-  <p><tt>gold</tt>, <tt>ar</tt> and <tt>nm</tt> all support plugins now, so everything should be
-  in place for an easy to use LTO build of autotooled projects:</p>
+  <p>Once your system <tt>ld</tt>, <tt>ar</tt> and <tt>nm</tt> all support LLVM
+  bitcode, everything is in place for an easy to use LTO build of autotooled
+  projects:</p>
   <ul>
     <li>Follow the instructions <a href="#build">on how to build libLLVMgold.so</a>.</li>
     <li>Install the newly built binutils to <tt>$PREFIX</tt></li>
@@ -194,7 +207,7 @@ as much as gold could without the plugin.</p>
   src="http://www.w3.org/Icons/valid-html401-blue" alt="Valid HTML 4.01"></a>
   <a href="mailto:nicholas@metrix.on.ca">Nick Lewycky</a><br>
   <a href="http://llvm.org">The LLVM Compiler Infrastructure</a><br>
-  Last modified: $Date: 2009-01-01 23:10:51 -0800 (Thu, 01 Jan 2009) $
+  Last modified: $Date: 2010-04-16 23:58:21 -0800 (Fri, 16 Apr 2010) $
 </address>
 </body>
 </html>
diff --git a/docs/HowToSubmitABug.html b/docs/HowToSubmitABug.html
index 2ac457539628..5cf935dc93e8 100644
--- a/docs/HowToSubmitABug.html
+++ b/docs/HowToSubmitABug.html
@@ -186,9 +186,6 @@ foo.bc, one of the following commands should fail:</p>
 <li><tt><b>llc</b> foo.bc</tt></li>
 <li><tt><b>llc</b> foo.bc -relocation-model=pic</tt></li>
 <li><tt><b>llc</b> foo.bc -relocation-model=static</tt></li>
-<li><tt><b>llc</b> foo.bc -enable-eh</tt></li>
-<li><tt><b>llc</b> foo.bc -relocation-model=pic -enable-eh</tt></li>
-<li><tt><b>llc</b> foo.bc -relocation-model=static -enable-eh</tt></li>
 </ol>
 
 <p>If none of these crash, please follow the instructions for a
@@ -202,11 +199,6 @@ the one corresponding to the command above that failed):</p>
            -relocation-model=pic</tt></li>
 <li><tt><b>bugpoint</b> -run-llc foo.bc --tool-args
            -relocation-model=static</tt></li>
-<li><tt><b>bugpoint</b> -run-llc foo.bc --tool-args -enable-eh</tt></li>
-<li><tt><b>bugpoint</b> -run-llc foo.bc --tool-args
-           -relocation-model=pic -enable-eh</tt></li>
-<li><tt><b>bugpoint</b> -run-llc foo.bc --tool-args
-           -relocation-model=static -enable-eh</tt></li>
 </ol>
 
 <p>Please run this, then file a bug with the instructions and reduced .bc file
@@ -348,7 +340,7 @@ the following:</p>
   <a href="mailto:sabre@nondot.org">Chris Lattner</a><br>
   <a href="http://llvm.org">The LLVM Compiler Infrastructure</a>
   <br>
-  Last modified: $Date: 2009-10-12 16:46:08 +0200 (Mon, 12 Oct 2009) $
+  Last modified: $Date: 2010-05-02 17:36:26 +0200 (Sun, 02 May 2010) $
 </address>
 
 </body>
diff --git a/docs/LangRef.html b/docs/LangRef.html
index 04eb45d59805..3ee8de7abf3b 100644
--- a/docs/LangRef.html
+++ b/docs/LangRef.html
@@ -50,6 +50,7 @@
       <li><a href="#moduleasm">Module-Level Inline Assembly</a></li>
       <li><a href="#datalayout">Data Layout</a></li>
       <li><a href="#pointeraliasing">Pointer Aliasing Rules</a></li>
+      <li><a href="#volatile">Volatile Memory Accesses</a></li>
     </ol>
   </li>
   <li><a href="#typesystem">Type System</a>
@@ -89,6 +90,7 @@
       <li><a href="#complexconstants">Complex Constants</a></li>
       <li><a href="#globalconstants">Global Variable and Function Addresses</a></li>
       <li><a href="#undefvalues">Undefined Values</a></li>
+      <li><a href="#trapvalues">Trap Values</a></li>
       <li><a href="#blockaddress">Addresses of Basic Blocks</a></li>
       <li><a href="#constantexprs">Constant Expressions</a></li>
     </ol>
@@ -849,11 +851,15 @@ define i32 @main() {                                        <i>; i32()* </i>
 <p>LLVM allows an explicit section to be specified for globals.  If the target
    supports it, it will emit globals to the section specified.</p>
 
-<p>An explicit alignment may be specified for a global.  If not present, or if
-   the alignment is set to zero, the alignment of the global is set by the
-   target to whatever it feels convenient.  If an explicit alignment is
-   specified, the global is forced to have at least that much alignment.  All
-   alignments must be a power of 2.</p>
+<p>An explicit alignment may be specified for a global, which must be a power
+   of 2.  If not present, or if the alignment is set to zero, the alignment of
+   the global is set by the target to whatever it feels convenient.  If an
+   explicit alignment is specified, the global is forced to have exactly that
+   alignment.  Targets and optimizers are not allowed to over-align the global
+   if the global has an assigned section.  In this case, the extra alignment
+   could be observable: for example, code could assume that the globals are
+   densely packed in their section and try to iterate over them as an array,
+   alignment padding would break this iteration.</p>
 
 <p>For example, the following defines a global in a numbered address space with
    an initializer, section, and alignment:</p>
@@ -1297,7 +1303,7 @@ target datalayout = "<i>layout specification</i>"
 </dl>
 
 <p>When constructing the data layout for a given target, LLVM starts with a
-   default set of specifications which are then (possibly) overriden by the
+   default set of specifications which are then (possibly) overridden by the
    specifications in the <tt>datalayout</tt> keyword. The default specifications
    are given in this list:</p>
 
@@ -1393,6 +1399,24 @@ to implement type-based alias analysis.</p>
 
 </div>
 
+<!-- ======================================================================= -->
+<div class="doc_subsection">
+  <a name="volatile">Volatile Memory Accesses</a>
+</div>
+
+<div class="doc_text">
+
+<p>Certain memory accesses, such as <a href="#i_load"><tt>load</tt></a>s, <a
+href="#i_store"><tt>store</tt></a>s, and <a
+href="#int_memcpy"><tt>llvm.memcpy</tt></a>s may be marked <tt>volatile</tt>.
+The optimizers must not change the number of volatile operations or change their
+order of execution relative to other volatile operations.  The optimizers
+<i>may</i> change the order of volatile operations relative to non-volatile
+operations.  This is not Java's "volatile" and has no cross-thread
+synchronization behavior.</p>
+
+</div>
+
 <!-- *********************************************************************** -->
 <div class="doc_section"> <a name="typesystem">Type System</a> </div>
 <!-- *********************************************************************** -->
@@ -2303,6 +2327,114 @@ has undefined behavior.</p>
 </div>
 
 <!-- ======================================================================= -->
+<div class="doc_subsection"><a name="trapvalues">Trap Values</a></div>
+<div class="doc_text">
+
+<p>Trap values are similar to <a href="#undefvalues">undef values</a>, however
+   instead of representing an unspecified bit pattern, they represent the
+   fact that an instruction or constant expression which cannot evoke side
+   effects has nevertheless detected a condition which results in undefined
+   behavior.</p>
+
+<p>There is currently no way of representing a trap value in the IR; they
+   only exist when produced by operations such as
+   <a href="#i_add"><tt>add</tt></a> with the <tt>nsw</tt> flag.</p>
+
+<p>Trap value behavior is defined in terms of value <i>dependence</i>:</p>
+
+<p>
+<ul>
+<li>Values other than <a href="#i_phi"><tt>phi</tt></a> nodes depend on
+    their operands.</li>
+
+<li><a href="#i_phi"><tt>Phi</tt></a> nodes depend on the operand corresponding
+    to their dynamic predecessor basic block.</li>
+
+<li>Function arguments depend on the corresponding actual argument values in
+    the dynamic callers of their functions.</li>
+
+<li><a href="#i_call"><tt>Call</tt></a> instructions depend on the
+    <a href="#i_ret"><tt>ret</tt></a> instructions that dynamically transfer
+    control back to them.</li>
+
+<li><a href="#i_invoke"><tt>Invoke</tt></a> instructions depend on the
+    <a href="#i_ret"><tt>ret</tt></a>, <a href="#i_unwind"><tt>unwind</tt></a>,
+    or exception-throwing call instructions that dynamically transfer control
+    back to them.</li>
+
+<li>Non-volatile loads and stores depend on the most recent stores to all of the
+    referenced memory addresses, following the order in the IR
+    (including loads and stores implied by intrinsics such as
+    <a href="#int_memcpy"><tt>@llvm.memcpy</tt></a>.)</li>
+
+<!-- TODO: In the case of multiple threads, this only applies if the store
+     "happens-before" the load or store. -->
+
+<!-- TODO: floating-point exception state -->
+
+<li>An instruction with externally visible side effects depends on the most
+    recent preceding instruction with externally visible side effects, following
+    the order in the IR. (This includes volatile loads and stores.)</li>
+
+<li>An instruction <i>control-depends</i> on a
+    <a href="#terminators">terminator instruction</a>
+    if the terminator instruction has multiple successors and the instruction
+    is always executed when control transfers to one of the successors, and
+    may not be executed when control is transfered to another.</li>
+
+<li>Dependence is transitive.</li>
+
+</ul>
+</p>
+
+<p>Whenever a trap value is generated, all values which depend on it evaluate
+   to trap. If they have side effects, the evoke their side effects as if each
+   operand with a trap value were undef. If they have externally-visible side
+   effects, the behavior is undefined.</p>
+
+<p>Here are some examples:</p>
+
+<div class="doc_code">
+<pre>
+entry:
+  %trap = sub nuw i32 0, 1           ; Results in a trap value.
+  %still_trap = and i32 %trap, 0     ; Whereas (and i32 undef, 0) would return 0.
+  %trap_yet_again = getelementptr i32* @h, i32 %still_trap
+  store i32 0, i32* %trap_yet_again  ; undefined behavior
+
+  store i32 %trap, i32* @g           ; Trap value conceptually stored to memory.
+  %trap2 = load i32* @g              ; Returns a trap value, not just undef.
+
+  volatile store i32 %trap, i32* @g  ; External observation; undefined behavior.
+
+  %narrowaddr = bitcast i32* @g to i16*
+  %wideaddr = bitcast i32* @g to i64*
+  %trap3 = load 16* %narrowaddr      ; Returns a trap value.
+  %trap4 = load i64* %widaddr        ; Returns a trap value.
+
+  %cmp = icmp i32 slt %trap, 0       ; Returns a trap value.
+  %br i1 %cmp, %true, %end           ; Branch to either destination.
+
+true:
+  volatile store i32 0, i32* @g      ; This is control-dependent on %cmp, so
+                                     ; it has undefined behavior.
+  br label %end
+
+end:
+  %p = phi i32 [ 0, %entry ], [ 1, %true ]
+                                     ; Both edges into this PHI are
+                                     ; control-dependent on %cmp, so this
+                                     ; always results in a trap value.
+
+  volatile store i32 0, i32* @g      ; %end is control-equivalent to %entry
+                                     ; so this is defined (ignoring earlier
+                                     ; undefined behavior in this example).
+</pre>
+</div>
+
+</div>
+
+<!-- ======================================================================= -->
 <div class="doc_subsection"><a name="blockaddress">Addresses of Basic
     Blocks</a></div>
 <div class="doc_text">
@@ -2516,6 +2648,31 @@ call void asm alignstack "eieio", ""()
    documented here.  Constraints on what can be done (e.g. duplication, moving,
    etc need to be documented).  This is probably best done by reference to
    another document that covers inline asm from a holistic perspective.</p>
+</div>
+
+<div class="doc_subsubsection">
+<a name="inlineasm_md">Inline Asm Metadata</a>
+</div>
+
+<div class="doc_text">
+
+<p>The call instructions that wrap inline asm nodes may have a "!srcloc" MDNode
+   attached to it that contains a constant integer.  If present, the code
+   generator will use the integer as the location cookie value when report
+   errors through the LLVMContext error reporting mechanisms.  This allows a
+   front-end to correlate backend errors that occur with inline asm back to the
+   source code that produced it.  For example:</p>
+
+<div class="doc_code">
+<pre>
+call void asm sideeffect "something bad", ""()<b>, !srcloc !42</b>
+...
+!42 = !{ i32 1234567 }
+</pre>
+</div>
+
+<p>It is up to the front-end to make sense of the magic numbers it places in the
+   IR.</p>
 
 </div>
 
@@ -2637,8 +2794,12 @@ should not be exposed to source languages.</p>
 </div>
 
 <div class="doc_text">
-
-<p>TODO: Describe this.</p>
+<pre>
+%0 = type { i32, void ()* }
+@llvm.global_ctors = appending global [1 x %0] [%0 { i32 65535, void ()* @ctor }]
+</pre>
+<p>The <tt>@llvm.global_ctors</tt> array contains a list of constructor functions and associated priorities.  The functions referenced by this array will be called in ascending order of priority (i.e. lowest first) when the module is loaded.  The order of functions with the same priority is not defined.
+</p>
 
 </div>
 
@@ -2648,8 +2809,13 @@ should not be exposed to source languages.</p>
 </div>
 
 <div class="doc_text">
+<pre>
+%0 = type { i32, void ()* }
+@llvm.global_dtors = appending global [1 x %0] [%0 { i32 65535, void ()* @dtor }]
+</pre>
 
-<p>TODO: Describe this.</p>
+<p>The <tt>@llvm.global_dtors</tt> array contains a list of destructor functions and associated priorities.  The functions referenced by this array will be called in descending order of priority (i.e. highest first) when the module is loaded.  The order of functions with the same priority is not defined.
+</p>
 
 </div>
 
@@ -2682,7 +2848,7 @@ Instructions</a> </div>
    control flow, not values (the one exception being the
    '<a href="#i_invoke"><tt>invoke</tt></a>' instruction).</p>
 
-<p>There are six different terminator instructions: the
+<p>There are seven different terminator instructions: the
    '<a href="#i_ret"><tt>ret</tt></a>' instruction, the
    '<a href="#i_br"><tt>br</tt></a>' instruction, the
    '<a href="#i_switch"><tt>switch</tt></a>' instruction, the
@@ -3079,7 +3245,8 @@ Instruction</a> </div>
 <p><tt>nuw</tt> and <tt>nsw</tt> stand for &quot;No Unsigned Wrap&quot;
    and &quot;No Signed Wrap&quot;, respectively. If the <tt>nuw</tt> and/or
    <tt>nsw</tt> keywords are present, the result value of the <tt>add</tt>
-   is undefined if unsigned and/or signed overflow, respectively, occurs.</p>
+   is a <a href="#trapvalues">trap value</a> if unsigned and/or signed overflow,
+   respectively, occurs.</p>
 
 <h5>Example:</h5>
 <pre>
@@ -3159,7 +3326,8 @@ Instruction</a> </div>
 <p><tt>nuw</tt> and <tt>nsw</tt> stand for &quot;No Unsigned Wrap&quot;
    and &quot;No Signed Wrap&quot;, respectively. If the <tt>nuw</tt> and/or
    <tt>nsw</tt> keywords are present, the result value of the <tt>sub</tt>
-   is undefined if unsigned and/or signed overflow, respectively, occurs.</p>
+   is a <a href="#trapvalues">trap value</a> if unsigned and/or signed overflow,
+   respectively, occurs.</p>
 
 <h5>Example:</h5>
 <pre>
@@ -3245,7 +3413,8 @@ Instruction</a> </div>
 <p><tt>nuw</tt> and <tt>nsw</tt> stand for &quot;No Unsigned Wrap&quot;
    and &quot;No Signed Wrap&quot;, respectively. If the <tt>nuw</tt> and/or
    <tt>nsw</tt> keywords are present, the result value of the <tt>mul</tt>
-   is undefined if unsigned and/or signed overflow, respectively, occurs.</p>
+   is a <a href="#trapvalues">trap value</a> if unsigned and/or signed overflow,
+   respectively, occurs.</p>
 
 <h5>Example:</h5>
 <pre>
@@ -3350,8 +3519,8 @@ Instruction</a> </div>
    a 32-bit division of -2147483648 by -1.</p>
 
 <p>If the <tt>exact</tt> keyword is present, the result value of the
-   <tt>sdiv</tt> is undefined if the result would be rounded or if overflow
-   would occur.</p>
+   <tt>sdiv</tt> is a <a href="#trapvalues">trap value</a> if the result would
+   be rounded or if overflow would occur.</p>
 
 <h5>Example:</h5>
 <pre>
@@ -4133,9 +4302,8 @@ Instruction</a> </div>
    from which to load.  The pointer must point to
    a <a href="#t_firstclass">first class</a> type.  If the <tt>load</tt> is
    marked as <tt>volatile</tt>, then the optimizer is not allowed to modify the
-   number or order of execution of this <tt>load</tt> with other
-   volatile <tt>load</tt> and <tt><a href="#i_store">store</a></tt>
-   instructions.</p>
+   number or order of execution of this <tt>load</tt> with other <a
+   href="#volatile">volatile operations</a>.</p>
 
 <p>The optional constant <tt>align</tt> argument specifies the alignment of the
    operation (that is, the alignment of the memory address). A value of 0 or an
@@ -4191,11 +4359,10 @@ Instruction</a> </div>
    and an address at which to store it.  The type of the
    '<tt>&lt;pointer&gt;</tt>' operand must be a pointer to
    the <a href="#t_firstclass">first class</a> type of the
-   '<tt>&lt;value&gt;</tt>' operand. If the <tt>store</tt> is marked
-   as <tt>volatile</tt>, then the optimizer is not allowed to modify the number
-   or order of execution of this <tt>store</tt> with other
-   volatile <tt>load</tt> and <tt><a href="#i_store">store</a></tt>
-   instructions.</p>
+   '<tt>&lt;value&gt;</tt>' operand. If the <tt>store</tt> is marked as
+   <tt>volatile</tt>, then the optimizer is not allowed to modify the number or
+   order of execution of this <tt>store</tt> with other <a
+   href="#volatile">volatile operations</a>.</p>
 
 <p>The optional constant "align" argument specifies the alignment of the
    operation (that is, the alignment of the memory address). A value of 0 or an
@@ -4334,13 +4501,14 @@ entry:
 </pre>
 
 <p>If the <tt>inbounds</tt> keyword is present, the result value of the
-   <tt>getelementptr</tt> is undefined if the base pointer is not an
-   <i>in bounds</i> address of an allocated object, or if any of the addresses
-   that would be formed by successive addition of the offsets implied by the
-   indices to the base address with infinitely precise arithmetic are not an
-   <i>in bounds</i> address of that allocated object.
-   The <i>in bounds</i> addresses for an allocated object are all the addresses
-   that point into the object, plus the address one byte past the end.</p>
+   <tt>getelementptr</tt> is a <a href="#trapvalues">trap value</a> if the
+   base pointer is not an <i>in bounds</i> address of an allocated object,
+   or if any of the addresses that would be formed by successive addition of
+   the offsets implied by the indices to the base address with infinitely
+   precise arithmetic are not an <i>in bounds</i> address of that allocated
+   object. The <i>in bounds</i> addresses for an allocated object are all
+   the addresses that point into the object, plus the address one byte past
+   the end.</p>
 
 <p>If the <tt>inbounds</tt> keyword is not present, the offsets are added to
    the base address with silently-wrapping two's complement arithmetic, and
@@ -5865,17 +6033,14 @@ LLVM</a>.</p>
 
 <h5>Syntax:</h5>
 <p>This is an overloaded intrinsic. You can use <tt>llvm.memcpy</tt> on any
-   integer bit width. Not all targets support all bit widths however.</p>
+   integer bit width and for different address spaces. Not all targets support
+   all bit widths however.</p>
 
 <pre>
-  declare void @llvm.memcpy.i8(i8 * &lt;dest&gt;, i8 * &lt;src&gt;,
-                               i8 &lt;len&gt;, i32 &lt;align&gt;)
-  declare void @llvm.memcpy.i16(i8 * &lt;dest&gt;, i8 * &lt;src&gt;,
-                                i16 &lt;len&gt;, i32 &lt;align&gt;)
-  declare void @llvm.memcpy.i32(i8 * &lt;dest&gt;, i8 * &lt;src&gt;,
-                                i32 &lt;len&gt;, i32 &lt;align&gt;)
-  declare void @llvm.memcpy.i64(i8 * &lt;dest&gt;, i8 * &lt;src&gt;,
-                                i64 &lt;len&gt;, i32 &lt;align&gt;)
+  declare void @llvm.memcpy.p0i8.p0i8.i32(i8 * &lt;dest&gt;, i8 * &lt;src&gt;,
+                                          i32 &lt;len&gt;, i32 &lt;align&gt;, i1 &lt;isvolatile&gt;)
+  declare void @llvm.memcpy.p0i8.p0i8.i64(i8 * &lt;dest&gt;, i8 * &lt;src&gt;,
+                                          i64 &lt;len&gt;, i32 &lt;align&gt;, i1 &lt;isvolatile&gt;)
 </pre>
 
 <h5>Overview:</h5>
@@ -5883,19 +6048,28 @@ LLVM</a>.</p>
    source location to the destination location.</p>
 
 <p>Note that, unlike the standard libc function, the <tt>llvm.memcpy.*</tt>
-   intrinsics do not return a value, and takes an extra alignment argument.</p>
+   intrinsics do not return a value, takes extra alignment/isvolatile arguments
+   and the pointers can be in specified address spaces.</p>
 
 <h5>Arguments:</h5>
+
 <p>The first argument is a pointer to the destination, the second is a pointer
    to the source.  The third argument is an integer argument specifying the
-   number of bytes to copy, and the fourth argument is the alignment of the
-   source and destination locations.</p>
+   number of bytes to copy, the fourth argument is the alignment of the
+   source and destination locations, and the fifth is a boolean indicating a
+   volatile access.</p>
 
 <p>If the call to this intrinsic has an alignment value that is not 0 or 1,
    then the caller guarantees that both the source and destination pointers are
    aligned to that boundary.</p>
 
+<p>If the <tt>isvolatile</tt> parameter is <tt>true</tt>, the
+   <tt>llvm.memcpy</tt> call is a <a href="#volatile">volatile operation</a>.
+   The detailed access behavior is not very cleanly specified and it is unwise
+   to depend on it.</p>
+
 <h5>Semantics:</h5>
+
 <p>The '<tt>llvm.memcpy.*</tt>' intrinsics copy a block of memory from the
    source location to the destination location, which are not allowed to
    overlap.  It copies "len" bytes of memory over.  If the argument is known to
@@ -5913,17 +6087,14 @@ LLVM</a>.</p>
 
 <h5>Syntax:</h5>
 <p>This is an overloaded intrinsic. You can use llvm.memmove on any integer bit
-   width. Not all targets support all bit widths however.</p>
+   width and for different address space. Not all targets support all bit
+   widths however.</p>
 
 <pre>
-  declare void @llvm.memmove.i8(i8 * &lt;dest&gt;, i8 * &lt;src&gt;,
-                                i8 &lt;len&gt;, i32 &lt;align&gt;)
-  declare void @llvm.memmove.i16(i8 * &lt;dest&gt;, i8 * &lt;src&gt;,
-                                 i16 &lt;len&gt;, i32 &lt;align&gt;)
-  declare void @llvm.memmove.i32(i8 * &lt;dest&gt;, i8 * &lt;src&gt;,
-                                 i32 &lt;len&gt;, i32 &lt;align&gt;)
-  declare void @llvm.memmove.i64(i8 * &lt;dest&gt;, i8 * &lt;src&gt;,
-                                 i64 &lt;len&gt;, i32 &lt;align&gt;)
+  declare void @llvm.memmove.p0i8.p0i8.i32(i8 * &lt;dest&gt;, i8 * &lt;src&gt;,
+                                           i32 &lt;len&gt;, i32 &lt;align&gt;, i1 &lt;isvolatile&gt;)
+  declare void @llvm.memmove.p0i8.p0i8.i64(i8 * &lt;dest&gt;, i8 * &lt;src&gt;,
+                                           i64 &lt;len&gt;, i32 &lt;align&gt;, i1 &lt;isvolatile&gt;)
 </pre>
 
 <h5>Overview:</h5>
@@ -5933,19 +6104,28 @@ LLVM</a>.</p>
    overlap.</p>
 
 <p>Note that, unlike the standard libc function, the <tt>llvm.memmove.*</tt>
-   intrinsics do not return a value, and takes an extra alignment argument.</p>
+   intrinsics do not return a value, takes extra alignment/isvolatile arguments
+   and the pointers can be in specified address spaces.</p>
 
 <h5>Arguments:</h5>
+
 <p>The first argument is a pointer to the destination, the second is a pointer
    to the source.  The third argument is an integer argument specifying the
-   number of bytes to copy, and the fourth argument is the alignment of the
-   source and destination locations.</p>
+   number of bytes to copy, the fourth argument is the alignment of the
+   source and destination locations, and the fifth is a boolean indicating a
+   volatile access.</p>
 
 <p>If the call to this intrinsic has an alignment value that is not 0 or 1,
    then the caller guarantees that the source and destination pointers are
    aligned to that boundary.</p>
 
+<p>If the <tt>isvolatile</tt> parameter is <tt>true</tt>, the
+   <tt>llvm.memmove</tt> call is a <a href="#volatile">volatile operation</a>.
+   The detailed access behavior is not very cleanly specified and it is unwise
+   to depend on it.</p>
+
 <h5>Semantics:</h5>
+
 <p>The '<tt>llvm.memmove.*</tt>' intrinsics copy a block of memory from the
    source location to the destination location, which may overlap.  It copies
    "len" bytes of memory over.  If the argument is known to be aligned to some
@@ -5963,17 +6143,14 @@ LLVM</a>.</p>
 
 <h5>Syntax:</h5>
 <p>This is an overloaded intrinsic. You can use llvm.memset on any integer bit
-   width. Not all targets support all bit widths however.</p>
+   width and for different address spaces. Not all targets support all bit
+   widths however.</p>
 
 <pre>
-  declare void @llvm.memset.i8(i8 * &lt;dest&gt;, i8 &lt;val&gt;,
-                               i8 &lt;len&gt;, i32 &lt;align&gt;)
-  declare void @llvm.memset.i16(i8 * &lt;dest&gt;, i8 &lt;val&gt;,
-                                i16 &lt;len&gt;, i32 &lt;align&gt;)
-  declare void @llvm.memset.i32(i8 * &lt;dest&gt;, i8 &lt;val&gt;,
-                                i32 &lt;len&gt;, i32 &lt;align&gt;)
-  declare void @llvm.memset.i64(i8 * &lt;dest&gt;, i8 &lt;val&gt;,
-                                i64 &lt;len&gt;, i32 &lt;align&gt;)
+  declare void @llvm.memset.p0i8.i32(i8 * &lt;dest&gt;, i8 &lt;val&gt;,
+                                     i32 &lt;len&gt;, i32 &lt;align&gt;, i1 &lt;isvolatile&gt;)
+  declare void @llvm.memset.p0i8.i64(i8 * &lt;dest&gt;, i8 &lt;val&gt;,
+                                     i64 &lt;len&gt;, i32 &lt;align&gt;, i1 &lt;isvolatile&gt;)
 </pre>
 
 <h5>Overview:</h5>
@@ -5981,7 +6158,8 @@ LLVM</a>.</p>
    particular byte value.</p>
 
 <p>Note that, unlike the standard libc function, the <tt>llvm.memset</tt>
-   intrinsic does not return a value, and takes an extra alignment argument.</p>
+   intrinsic does not return a value, takes extra alignment/volatile arguments,
+   and the destination can be in an arbitrary address space.</p>
 
 <h5>Arguments:</h5>
 <p>The first argument is a pointer to the destination to fill, the second is the
@@ -5993,6 +6171,11 @@ LLVM</a>.</p>
    then the caller guarantees that the destination pointer is aligned to that
    boundary.</p>
 
+<p>If the <tt>isvolatile</tt> parameter is <tt>true</tt>, the
+   <tt>llvm.memset</tt> call is a <a href="#volatile">volatile operation</a>.
+   The detailed access behavior is not very cleanly specified and it is unwise
+   to depend on it.</p>
+
 <h5>Semantics:</h5>
 <p>The '<tt>llvm.memset.*</tt>' intrinsics fill "len" bytes of memory starting
    at the destination location.  If the argument is known to be aligned to some
@@ -7590,7 +7773,7 @@ LLVM</a>.</p>
 
   <a href="mailto:sabre@nondot.org">Chris Lattner</a><br>
   <a href="http://llvm.org">The LLVM Compiler Infrastructure</a><br>
-  Last modified: $Date: 2010-03-15 05:12:21 +0100 (Mon, 15 Mar 2010) $
+  Last modified: $Date: 2010-05-03 16:59:34 +0200 (Mon, 03 May 2010) $
 </address>
 
 </body>
diff --git a/docs/LinkTimeOptimization.html b/docs/LinkTimeOptimization.html
index 0934b47cbc96..c6a23999ff84 100644
--- a/docs/LinkTimeOptimization.html
+++ b/docs/LinkTimeOptimization.html
@@ -256,7 +256,7 @@ $ llvm-gcc a.o main.o -o main # &lt;-- standard link command without any modific
   <p>In this phase, the linker reads optimized a native object file and 
   updates the internal global symbol table to reflect any changes. The linker 
   also collects information about any changes in use of external symbols by 
-  LLVM bitcode files. In the examle above, the linker notes that 
+  LLVM bitcode files. In the example above, the linker notes that 
   <tt>foo4()</tt> is not used any more. If dead code stripping is enabled then 
   the linker refreshes the live symbol information appropriately and performs 
   dead code stripping.</p>
@@ -382,7 +382,7 @@ of the native object files.</p>
 
   Devang Patel and Nick Kledzik<br>
   <a href="http://llvm.org">LLVM Compiler Infrastructure</a><br>
-  Last modified: $Date: 2009-10-12 16:46:08 +0200 (Mon, 12 Oct 2009) $
+  Last modified: $Date: 2010-04-25 00:01:40 +0200 (Sun, 25 Apr 2010) $
 </address>
 
 </body>
diff --git a/docs/Passes.html b/docs/Passes.html
index e3403b4fdb61..5d8120c47090 100644
--- a/docs/Passes.html
+++ b/docs/Passes.html
@@ -114,7 +114,6 @@ perl -e '$/ = undef; for (split(/\n/, <>)) { s:^ *///? ?::; print "  <p>\n" if !
 <tr><td><a href="#block-placement">-block-placement</a></td><td>Profile Guided Basic Block Placement</td></tr>
 <tr><td><a href="#break-crit-edges">-break-crit-edges</a></td><td>Break critical edges in CFG</td></tr>
 <tr><td><a href="#codegenprepare">-codegenprepare</a></td><td>Prepare a function for code generation </td></tr>
-<tr><td><a href="#condprop">-condprop</a></td><td>Conditional Propagation</td></tr>
 <tr><td><a href="#constmerge">-constmerge</a></td><td>Merge Duplicate Global Constants</td></tr>
 <tr><td><a href="#constprop">-constprop</a></td><td>Simple constant propagation</td></tr>
 <tr><td><a href="#dce">-dce</a></td><td>Dead Code Elimination</td></tr>
@@ -641,15 +640,6 @@ perl -e '$/ = undef; for (split(/\n/, <>)) { s:^ *///? ?::; print "  <p>\n" if !
 
 <!-------------------------------------------------------------------------- -->
 <div class="doc_subsection">
-  <a name="condprop">Conditional Propagation</a>
-</div>
-<div class="doc_text">
-  <p>This pass propagates information about conditional expressions through the
-  program, allowing it to eliminate conditional branches in some cases.</p>
-</div>
-
-<!-------------------------------------------------------------------------- -->
-<div class="doc_subsection">
   <a name="constmerge">Merge Duplicate Global Constants</a>
 </div>
 <div class="doc_text">
@@ -1773,7 +1763,7 @@ if (X &lt; 3) {</pre>
 
   <a href="mailto:rspencer@x10sys.com">Reid Spencer</a><br>
   <a href="http://llvm.org">LLVM Compiler Infrastructure</a><br>
-  Last modified: $Date: 2010-03-01 20:24:17 +0100 (Mon, 01 Mar 2010) $
+  Last modified: $Date: 2010-04-22 22:48:34 +0200 (Thu, 22 Apr 2010) $
 </address>
 
 </body>
diff --git a/docs/ReleaseNotes.html b/docs/ReleaseNotes.html
index 5f6304b1fcd7..5422a9342677 100644
--- a/docs/ReleaseNotes.html
+++ b/docs/ReleaseNotes.html
@@ -4,20 +4,20 @@
 <head>
   <meta http-equiv="Content-Type" content="text/html; charset=utf-8">
   <link rel="stylesheet" href="llvm.css" type="text/css">
-  <title>LLVM 2.7 Release Notes</title>
+  <title>LLVM 2.8 Release Notes</title>
 </head>
 <body>
 
-<div class="doc_title">LLVM 2.7 Release Notes</div>
+<div class="doc_title">LLVM 2.8 Release Notes</div>
 
 <img align=right src="http://llvm.org/img/DragonSmall.png"
-    width="136" height="136">
+    width="136" height="136" alt="LLVM Dragon Logo">
 
 <ol>
   <li><a href="#intro">Introduction</a></li>
   <li><a href="#subproj">Sub-project Status Update</a></li>
-  <li><a href="#externalproj">External Projects Using LLVM 2.7</a></li>
-  <li><a href="#whatsnew">What's New in LLVM 2.7?</a></li>
+  <li><a href="#externalproj">External Projects Using LLVM 2.8</a></li>
+  <li><a href="#whatsnew">What's New in LLVM 2.8?</a></li>
   <li><a href="GettingStarted.html">Installation Instructions</a></li>
   <li><a href="#portability">Portability and Supported Platforms</a></li>
   <li><a href="#knownproblems">Known Problems</a></li>
@@ -28,10 +28,10 @@
   <p>Written by the <a href="http://llvm.org">LLVM Team</a></p>
 </div>
 
-<h1 style="color:red">These are in-progress notes for the upcoming LLVM 2.7
+<h1 style="color:red">These are in-progress notes for the upcoming LLVM 2.8
 release.<br>
 You may prefer the
-<a href="http://llvm.org/releases/2.6/docs/ReleaseNotes.html">LLVM 2.6
+<a href="http://llvm.org/releases/2.7/docs/ReleaseNotes.html">LLVM 2.7
 Release Notes</a>.</h1>
 
 <!-- *********************************************************************** -->
@@ -43,7 +43,7 @@ Release Notes</a>.</h1>
 <div class="doc_text">
 
 <p>This document contains the release notes for the LLVM Compiler
-Infrastructure, release 2.7.  Here we describe the status of LLVM, including
+Infrastructure, release 2.8.  Here we describe the status of LLVM, including
 major improvements from the previous release and significant known problems.
 All LLVM releases may be downloaded from the <a
 href="http://llvm.org/releases/">LLVM releases web site</a>.</p>
@@ -59,10 +59,6 @@ main LLVM web page, this document applies to the <i>next</i> release, not the
 current one.  To see the release notes for a specific release, please see the
 <a href="http://llvm.org/releases/">releases page</a>.</p>
 
-
-<p>FIXME: llvm.org moved to new server, mention new logo, Ted and Doug new code
-   owners.</p>
-
 </div>
  
 
@@ -71,30 +67,22 @@ Almost dead code.
   include/llvm/Analysis/LiveValues.h => Dan
   lib/Transforms/IPO/MergeFunctions.cpp => consider for 2.8.
   llvm/Analysis/PointerTracking.h => Edwin wants this, consider for 2.8.
-  ABCD, SCCVN, GEPSplitterPass
+  ABCD, GEPSplitterPass
   MSIL backend?
   lib/Transforms/Utils/SSI.cpp  -> ABCD depends on it.
 -->
  
    
-<!-- Features that need text if they're finished for 2.7:
-  gcc plugin.
+<!-- Features that need text if they're finished for 2.8:
+  combiner-aa?
   strong phi elim
-  variable debug info for optimized code
-  postalloc scheduler: anti dependence breaking, hazard recognizer?
-  metadata
+  llvm.dbg.value: variable debug info for optimized code
   loop dependence analysis
-  ELF Writer?  How stable?
-  <li>PostRA scheduler improvements, ARM adoption (David Goodwin).</li>
  -->
 
  <!-- for announcement email:
  Logo web page.
- llvm devmtg
- compiler_rt
- KLEE web page at klee.llvm.org
  Many new papers added to /pubs/
- Mention gcc plugin.
    -->
 
 <!-- *********************************************************************** -->
@@ -105,7 +93,7 @@ Almost dead code.
 
 <div class="doc_text">
 <p>
-The LLVM 2.7 distribution currently consists of code from the core LLVM
+The LLVM 2.8 distribution currently consists of code from the core LLVM
 repository (which roughly includes the LLVM optimizers, code generators
 and supporting tools), the Clang repository and the llvm-gcc repository.  In
 addition to this code, the LLVM Project includes other sub-projects that are in
@@ -122,26 +110,49 @@ development.  Here we include updates on these subprojects.
 
 <div class="doc_text">
 
-<p>The <a href="http://clang.llvm.org/">Clang project</a> is ...</p>
+<p><a href="http://clang.llvm.org/">Clang</a> is an LLVM front end for the C,
+C++, and Objective-C languages. Clang aims to provide a better user experience
+through expressive diagnostics, a high level of conformance to language
+standards, fast compilation, and low memory use. Like LLVM, Clang provides a
+modular, library-based architecture that makes it suitable for creating or
+integrating with other development tools. Clang is considered a
+production-quality compiler for C and Objective-C on x86 (32- and 64-bit).</p>
 
 <p>In the LLVM 2.7 time-frame, the Clang team has made many improvements:</p>
 
 <ul>
-<li>FIXME: C++! Include a link to cxx_compatibility.html</li>
-
-<li>FIXME: Static Analyzer improvements?</li>
+ 
+<li>C++ Support: Clang is now capable of self-hosting! While still
+alpha-quality, Clang's C++ support has matured enough to build LLVM and Clang,
+and C++ is now enabled by default. See the <a
+href="http://clang.llvm.org/cxx_compatibility.html">Clang C++ compatibility
+page</a> for common C++ migration issues.</li>
+
+<li>Objective-C: Clang now includes experimental support for an updated
+Objective-C ABI on non-Darwin platforms.  This includes support for non-fragile
+instance variables and accelerated proxies, as well as greater potential for
+future optimisations.  The new ABI is used when compiling with the
+-fobjc-nonfragile-abi and -fgnu-runtime options.  Code compiled with these
+options may be mixed with code compiled with GCC or clang using the old GNU ABI,
+but requires the libobjc2 runtime from the GNUstep project.</li>
+
+<li>New warnings: Clang contains a number of new warnings, including
+control-flow warnings (unreachable code, missing return statements in a
+non-<code>void</code> function, etc.), sign-comparison warnings, and improved
+format-string warnings.</li>
 
 <li>CIndex API and Python bindings: Clang now includes a C API as part of the
-CIndex library. Although we make make some changes to the API in the future, it
+CIndex library. Although we may make some changes to the API in the future, it
 is intended to be stable and has been designed for use by external projects. See
 the Clang
 doxygen <a href="http://clang.llvm.org/doxygen/group__CINDEX.html">CIndex</a>
-documentation for more details. The CIndex API also includings an preliminary
+documentation for more details. The CIndex API also includes a preliminary
 set of Python bindings.</li>
 
 <li>ARM Support: Clang now has ABI support for both the Darwin and Linux ARM
 ABIs. Coupled with many improvements to the LLVM ARM backend, Clang is now
-suitable for use as a a beta quality ARM compiler.</li>
+suitable for use as a beta quality ARM compiler.</li>
+
 </ul>
 </div>
 
@@ -152,13 +163,18 @@ suitable for use as a a beta quality ARM compiler.</li>
 
 <div class="doc_text">
 
-<p>Previously announced in the 2.4, 2.5, and 2.6 LLVM releases, the Clang project also
-includes an early stage static source code analysis tool for <a
-href="http://clang.llvm.org/StaticAnalysis.html">automatically finding bugs</a>
-in C and Objective-C programs. The tool performs checks to find
-bugs that occur on a specific path within a program.</p>
-
-<p>In the LLVM 2.7 time-frame, the analyzer core has sprouted legs and...</p>
+<p>The <a href="http://clang-analyzer.llvm.org/">Clang Static Analyzer</a>
+   project is an effort to use static source code analysis techniques to
+   automatically find bugs in C and Objective-C programs (and hopefully <a
+   href="http://clang-analyzer.llvm.org/dev_cxx.html">C++ in the
+   future</a>!).  The tool is very good at finding bugs that occur on specific
+   paths through code, such as on error conditions.</p>
+
+<p>In the LLVM 2.7 time-frame, the analyzer core has made several major and 
+   minor improvements, including better support for tracking the fields of
+   structures, initial support (not enabled by default yet) for doing
+   interprocedural (cross-function) analysis, and new checks have been added.
+</p>
 
 </div>
 
@@ -215,7 +231,8 @@ libgcc routines).</p>
 
 <p>
 All of the code in the compiler-rt project is available under the standard LLVM
-License, a "BSD-style" license.</p>
+License, a "BSD-style" license.  New in LLVM 2.7: compiler_rt now
+supports ARM targets.</p>
 
 </div>
 
@@ -244,12 +261,11 @@ becomes llvm-gcc-4.5!
 DragonEgg is still a work in progress.  Currently C works very well, while C++,
 Ada and Fortran work fairly well.  All other languages either don't work at all,
 or only work poorly.  For the moment only the x86-32 and x86-64 targets are
-supported, and only on linux.
+supported, and only on linux and darwin (darwin needs an additional gcc patch).
 </p>
 
 <p>
-DragonEgg has not yet been released.  Once gcc-4.5 has been released, dragonegg
-will probably be released as part of the following LLVM release.
+DragonEgg is a new project which is seeing its first release with llvm-2.7.
 </p>
 
 </div>
@@ -262,9 +278,27 @@ will probably be released as part of the following LLVM release.
 
 <div class="doc_text">
 <p>
-The LLVM Machine Code (MC) Toolkit project is ...
+The LLVM Machine Code (aka MC) sub-project of LLVM was created to solve a number
+of problems in the realm of assembly, disassembly, object file format handling,
+and a number of other related areas that CPU instruction-set level tools work
+in. It is a sub-project of LLVM which provides it with a number of advantages
+over other compilers that do not have tightly integrated assembly-level tools.
+For a gentle introduction, please see the <a
+href="http://blog.llvm.org/2010/04/intro-to-llvm-mc-project.html">Intro to the
+LLVM MC Project Blog Post</a>.
 </p>
 
+<p>2.7 includes major parts of the work required by the new MC Project.  A few
+   targets have been refactored to support it, and work is underway to support a
+   native assembler in LLVM.  This work is not complete in LLVM 2.7, but it has
+   made substantially more progress on LLVM mainline.</p>
+
+<p>One minor example of what MC can do is to transcode an AT&amp;T syntax
+   X86 .s file into intel syntax.  You can do this with something like:</p>
+<pre>
+  llvm-mc foo.s -output-asm-variant=1 -o foo-intel.s
+</pre>
+
 </div>	
 
 
@@ -281,53 +315,6 @@ The LLVM Machine Code (MC) Toolkit project is ...
    projects that have already been updated to work with LLVM 2.7.</p>
 </div>
 
-
-<!--=========================================================================-->
-<div class="doc_subsection">
-<a name="Rubinius">Rubinius</a>
-</div>
-
-<div class="doc_text">
-Need update.
-<!--
-<p><a href="http://github.com/evanphx/rubinius">Rubinius</a> is an environment
-for running Ruby code which strives to write as much of the core class
-implementation in Ruby as possible. Combined with a bytecode interpreting VM, it
-uses LLVM to optimize and compile ruby code down to machine code. Techniques
-such as type feedback, method inlining, and uncommon traps are all used to
-remove dynamism from ruby execution and increase performance.</p>
-
-<p>Since LLVM 2.5, Rubinius has made several major leaps forward, implementing
-a counter based JIT, type feedback and speculative method inlining.
--->
-</p>
-
-</div>
-
-<!--=========================================================================-->
-<div class="doc_subsection">
-<a name="macruby">MacRuby</a>
-</div>
-
-<div class="doc_text">
-
-<p>
-Need update.
-<!--
-<a href="http://macruby.org">MacRuby</a> is an implementation of Ruby on top of
-core Mac OS X technologies, such as the Objective-C common runtime and garbage
-collector and the CoreFoundation framework. It is principally developed by
-Apple and aims at enabling the creation of full-fledged Mac OS X applications.
-</p>
-
-<p>
-MacRuby uses LLVM for optimization passes, JIT and AOT compilation of Ruby
-expressions. It also uses zero-cost DWARF exceptions to implement Ruby exception
-handling.--> </p>
-
-</div>
-
-
 <!--=========================================================================-->
 <div class="doc_subsection">
 <a name="pure">Pure</a>
@@ -349,27 +336,6 @@ LLVM 2.7 (and continue to work with older LLVM releases >= 2.5).</p>
 
 </div>
 
-
-<!--=========================================================================-->
-<div class="doc_subsection">
-<a name="ldc">LLVM D Compiler</a>
-</div>
-
-<div class="doc_text">
-<p>
-Need update.
-<!--
-<a href="http://www.dsource.org/projects/ldc">LDC</a> is an implementation of
-the D Programming Language using the LLVM optimizer and code generator.
-The LDC project works great with the LLVM 2.6 release.  General improvements in
-this
-cycle have included new inline asm constraint handling, better debug info
-support, general bug fixes and better x86-64 support.  This has allowed
-some major improvements in LDC, getting it much closer to being as
-fully featured as the original DMD compiler from DigitalMars.-->
-</p>
-</div>
-
 <!--=========================================================================-->
 <div class="doc_subsection">
 <a name="RoadsendPHP">Roadsend PHP</a>
@@ -400,17 +366,38 @@ compiler.
 
 <!--=========================================================================-->
 <div class="doc_subsection">
-<a name="llvm-lua">llvm-lua</a>
+<a name="tce">TTA-based Codesign Environment (TCE)</a>
 </div>
 
 <div class="doc_text">
 <p>
-Need update.
-<!--
-<a href="http://code.google.com/p/llvm-lua/">LLVM-Lua</a> uses LLVM to add JIT
-and static compiling support to the Lua VM.  Lua bytecode is analyzed to
-remove type checks, then LLVM is used to compile the bytecode down to machine
-code.-->
+<a href="http://tce.cs.tut.fi/">TCE</a> is a toolset for designing
+application-specific processors (ASP) based on the Transport triggered
+architecture (TTA). The toolset provides a complete co-design flow from C/C++
+programs down to synthesizable VHDL and parallel program binaries. Processor
+customization points include the register files, function units, supported
+operations, and the interconnection network.</p>
+
+<p>TCE uses llvm-gcc/Clang and LLVM for C/C++ language support, target
+independent optimizations and also for parts of code generation. It generates
+new LLVM-based code generators "on the fly" for the designed TTA processors and
+loads them in to the compiler backend as runtime libraries to avoid per-target
+recompilation of larger parts of the compiler chain.</p>
+
+</div>
+
+<!--=========================================================================-->
+<div class="doc_subsection">
+<a name="safecode">SAFECode Compiler</a>
+</div>
+
+<div class="doc_text">
+<p>
+<a href="http://safecode.cs.illinois.edu">SAFECode</a> is a memory safe C
+compiler built using LLVM.  It takes standard, unannotated C code, analyzes the
+code to ensure that memory accesses and array indexing operations are safe, and
+instruments the code with run-time checks when safety cannot be proven
+statically.
 </p>
 </div>
 
@@ -421,41 +408,72 @@ code.-->
 
 <div class="doc_text">
 <p>
-Need update.
-<!--
 <a href="http://icedtea.classpath.org/wiki/Main_Page">IcedTea</a> provides a
 harness to build OpenJDK using only free software build tools and to provide
 replacements for the not-yet free parts of OpenJDK.  One of the extensions that
 IcedTea provides is a new JIT compiler named <a
 href="http://icedtea.classpath.org/wiki/ZeroSharkFaq">Shark</a> which uses LLVM
 to provide native code generation without introducing processor-dependent
-code.-->
+code.
+</p>
+<p>Icedtea6 1.8 and later have been tested and are known to work with
+LLVM 2.7 (and continue to work with older LLVM releases >= 2.6 as well).
 </p>
 </div>
 
+<!--=========================================================================-->
+<div class="doc_subsection">
+<a name="llvm-lua">LLVM-Lua</a>
+</div>
+
+<div class="doc_text">
+<p>
+<a href="http://code.google.com/p/llvm-lua/">LLVM-Lua</a> uses LLVM
+ to add JIT and static compiling support to the Lua VM. Lua 
+bytecode is analyzed to remove type checks, then LLVM is used to compile the 
+bytecode down to machine code.
+</p>
+<p>LLVM-Lua 1.2.0  have been tested and is known to work with LLVM 2.7.
+</p>
+</div>
 
 <!--=========================================================================-->
 <div class="doc_subsection">
-<a name="tce">TTA-based Codesign Environment (TCE)</a>
+<a name="MacRuby">MacRuby</a>
 </div>
 
 <div class="doc_text">
 <p>
-<a href="http://tce.cs.tut.fi/">TCE</a> is a toolset for designing
-application-specific processors (ASP) based on the Transport triggered
-architecture (TTA). The toolset provides a complete co-design flow from C/C++
-programs down to synthesizable VHDL and parallel program binaries. Processor
-customization points include the register files, function units, supported
-operations, and the interconnection network.</p>
+<a href="http://macruby.org">MacRuby</a> is an implementation of Ruby based on
+core Mac OS technologies, sponsored by Apple Inc.  It uses LLVM at runtime for
+optimization passes, JIT compilation and exception handling. It also allows
+static (ahead-of-time) compilation of Ruby code straight to machine code. 
+</p>
+<p>The upcoming MacRuby 0.6 release works with LLVM 2.7.
+</p>
+</div>
 
-<p>TCE uses llvm-gcc/Clang and LLVM for C/C++ language support, target
-independent optimizations and also for parts of code generation. It generates
-new LLVM-based code generators "on the fly" for the designed TTA processors and
-loads them in to the compiler backend as runtime libraries to avoid per-target
-recompilation of larger parts of the compiler chain.</p>
+<!--=========================================================================-->
+<div class="doc_subsection">
+<a name="GHC">Glasgow Haskell Compiler (GHC)</a>
+</div>
+
+<div class="doc_text">
+<p>
+<a href="http://www.haskell.org/ghc/">GHC</a> is an open source,
+state-of-the-art programming suite for Haskell, a standard lazy
+functional programming language. It includes an optimizing static
+compiler generating good code for a variety of platforms, together
+with an interactive system for convenient, quick development.</p>
+
+<p>In addition to the existing C and native code generators, GHC now
+supports an <a
+href="http://hackage.haskell.org/trac/ghc/wiki/Commentary/Compiler/Backends/LLVM">LLVM
+code generator</a>. GHC supports LLVM 2.7.</p>
 
 </div>
 
+
 <!-- *********************************************************************** -->
 <div class="doc_section">
   <a name="whatsnew">What's New in LLVM 2.7?</a>
@@ -473,68 +491,82 @@ in this section.
 
 <!--=========================================================================-->
 <div class="doc_subsection">
-<a name="majorfeatures">Major New Features</a>
+<a name="orgchanges">LLVM Community Changes</a>
 </div>
 
 <div class="doc_text">
 
-<p>LLVM 2.7 includes several major new capabilities:</p>
+<p>In addition to changes to the code, between LLVM 2.6 and 2.7, a number of
+organization changes have happened:
+</p>
 
 <ul>
-<li>...</li>
+<li>LLVM has a new <a href="http://llvm.org/Logo.html">official logo</a>!</li>
+
+<li>Ted Kremenek and Doug Gregor have stepped forward as <a
+ href="http://llvm.org/docs/DeveloperPolicy.html#owners">Code Owners</a> of the
+ Clang static analyzer and the Clang frontend, respectively.</li>
+
+<li>LLVM now has an <a href="http://blog.llvm.org">official Blog</a> at
+    <a href="http://blog.llvm.org">http://blog.llvm.org</a>. This is a great way
+    to learn about new LLVM-related features as they are implemented.  Several
+    features in this release are already explained on the blog.</li>
+
+<li>The LLVM web pages are now checked into the SVN server, in the "www",
+    "www-pubs" and "www-releases" SVN modules.  Previously they were hidden in a
+    largely inaccessible old CVS server.</li>
+
+<li><a href="http://llvm.org">llvm.org</a> is now hosted on a new (and much
+    faster) server.  It is still graciously hosted at the University of Illinois
+    of Urbana Champaign.</li>
 </ul>
+</div>
 
-Extensible metadata solid.
-
-Debug info improvements: using metadata instead of llvm.dbg global variables.
-This brings several enhancements including improved compile times.
-
-New instruction selector.
-GHC Haskell ABI/ calling conv support.
-Pre-Alpha support for unions in IR.
-New InlineHint and StackAlignment function attributes
-Code generator MC'ized except for debug info and EH.
-New SCEV AA pass: -scev-aa
-Inliner reuses arrays allocas when inlining multiple callers to reduce stack usage.
-MC encoding and disassembler apis.
-Optimal Edge Profiling?
-Instcombine is now a library, has its own IRBuilder to simplify itself.
-New llvm/Support/Regex.h API.  FileCheck now does regex's
-Many subtle pointer invalidation bugs in Callgraph have been fixed and it now uses asserting value handles.
-MC Disassembler (with blog post), MCInstPrinter.  Many X86 backend and AsmPrinter simplifications
-Various tools like llc and opt now read either .ll or .bc files as input.
-Malloc and free instructions got removed, along with LowerAllocations pass.
-compiler-rt support for ARM.
-completely llvm-gcc NEON support.
-Can transcode from GAS to intel syntax with "llvm-mc foo.s -output-asm-variant=1"
-JIT debug information with GDB 7.0
-New CodeGen Level CSE
-CMake can now run tests, what other improvements?
-ARM/Thumb using reg scavenging for stack object address materialization (PEI).
-New SSAUpdater and MachineSSAUpdater classes for unstructured ssa updating,
-  changed jump threading, GVN, etc to use it which simplified them and speed
-  them up.
-Combiner-AA improvements, why not on by default?
-Pre-regalloc tail duplication
-x86 sibcall optimization
-New LSR with full strength reduction mode
-The most awesome sext / zext optimization pass. ?
-
-The ARM backend now has good support for ARMv4 backend (tested on StrongARM
-  hardware), previously only supported ARMv4T and newer.
-
-
-
-Defaults to RTTI off, packagers should build with make REQUIRE_RTTI=1.
-CondProp pass removed (functionality merged into jump threading).
-AndersAA got removed (from 2.7 or mainline?)
-PredSimplify, LoopVR, GVNPRE got removed.
-LLVM command line tools now overwrite their output, before they would only do this with -f.
-DOUT removed, use DEBUG(errs() instead.
-Much stuff converted to use raw_ostream instead of std::ostream.
-TargetAsmInfo renamed to MCAsmInfo
-llvm/ADT/iterator.h gone.
+<!--=========================================================================-->
+<div class="doc_subsection">
+<a name="majorfeatures">Major New Features</a>
+</div>
 
+<div class="doc_text">
+
+<p>LLVM 2.7 includes several major new capabilities:</p>
+
+<ul>
+<li>2.7 includes initial support for the <a
+  href="http://en.wikipedia.org/wiki/MicroBlaze">MicroBlaze</a> target.
+  MicroBlaze is a soft processor core designed for Xilinx FPGAs.</li>
+
+<li>2.7 includes a new LLVM IR "extensible metadata" feature.  This feature
+ supports many different use cases, including allowing front-end authors to
+ encode source level information into LLVM IR, which is consumed by later
+ language-specific passes.  This is a great way to do high-level optimizations
+ like devirtualization, type-based alias analysis, etc.  See the <a 
+ href="http://blog.llvm.org/2010/04/extensible-metadata-in-llvm-ir.html">
+ Extensible Metadata Blog Post</a> for more information.</li>
+ 
+<li>2.7 encodes <a href="SourceLevelDebugging.html">debug information</a>
+in a completely new way, built on extensible metadata.  The new implementation
+is much more memory efficient and paves the way for improvements to optimized
+code debugging experience.</li>
+
+<li>2.7 now directly supports taking the address of a label and doing an
+    indirect branch through a pointer.  This is particularly useful for
+    interpreter loops, and is used to implement the GCC "address of label"
+    extension.  For more information, see the <a 
+href="http://blog.llvm.org/2010/01/address-of-label-and-indirect-branches.html">
+Address of Label and Indirect Branches in LLVM IR Blog Post</a>.
+
+<li>2.7 is the first release to start supporting APIs for assembling and
+    disassembling target machine code.  These APIs are useful for a variety of
+    low level clients, and are surfaced in the new "enhanced disassembly" API.
+    For more information see the <a 
+    href="http://blog.llvm.org/2010/01/x86-disassembler.html">The X86
+    Disassembler Blog Post</a> for more information.</li>
+
+<li>2.7 includes major parts of the work required by the new MC Project, 
+    see the <a href="#mc">MC update above</a> for more information.</li>
+
+</ul>
 
 </div>
 
@@ -548,7 +580,30 @@ llvm/ADT/iterator.h gone.
 expose new optimization opportunities:</p>
 
 <ul>
-<li>...</li>
+<li>LLVM IR now supports a 16-bit "half float" data type through <a
+   href="LangRef.html#int_fp16">two new intrinsics</a> and APFloat support.</li>
+<li>LLVM IR supports two new <a href="LangRef.html#fnattrs">function
+    attributes</a>: inlinehint and alignstack(n).  The former is a hint to the
+    optimizer that a function was declared 'inline' and thus the inliner should
+    weight it higher when considering inlining it.  The later
+    indicates to the code generator that the function diverges from the platform
+    ABI on stack alignment.</li>
+<li>The new <a href="LangRef.html#int_objectsize">llvm.objectsize</a> intrinsic
+    allows the optimizer to infer the sizes of memory objects in some cases.
+    This intrinsic is used to implement the GCC <tt>__builtin_object_size</tt>
+    extension.</li>
+<li>LLVM IR now supports marking load and store instructions with <a
+    href="LangRef.html#i_load">"non-temporal" hints</a> (building on the new
+    metadata feature).  This hint encourages the code
+    generator to generate non-temporal accesses when possible, which are useful
+    for code that is carefully managing cache behavior.  Currently, only the
+    X86 backend provides target support for this feature.</li>
+
+<li>LLVM 2.7 has pre-alpha support for <a
+  href="LangRef.html#t_union">unions in LLVM IR</a>.
+  Unfortunately, this support is not really usable in 2.7, so if you're
+ interested in pushing it forward, please help contribute to LLVM mainline.</li>
+
 </ul>
 
 </div>
@@ -565,12 +620,51 @@ release includes a few major enhancements and additions to the optimizers:</p>
 
 <ul>
 
-<li>...</li>
+<li>The inliner now merges arrays stack objects in different callees when
+    inlining multiple call sites into one function.  This reduces the stack size
+    of the resultant function.</li>
+<li>The -basicaa alias analysis pass (which is the default) has been improved to
+    be less dependent on "type safe" pointers.  It can now look through bitcasts
+    and other constructs more aggressively, allowing better load/store
+    optimization.</li>
+<li>The load elimination optimization in the GVN Pass [<a
+href="http://blog.llvm.org/2009/12/introduction-to-load-elimination-in-gvn.html">intro
+ blog post</a>] has been substantially improved to be more aggressive about
+ partial redundancy elimination and do more aggressive phi translation.  Please
+ see the <a
+ href="http://blog.llvm.org/2009/12/advanced-topics-in-redundant-load.html">
+ Advanced Topics in Redundant Load Elimination with a Focus on PHI Translation
+ Blog Post</a> for more details.</li>
+<li>The module <a href="LangRef.html#datalayout">target data string</a> now
+    includes a notion of 'native' integer data types for the target. This
+    helps mid-level optimizations avoid promoting complex sequences of
+    operations to data types that are not natively supported (e.g. converting
+    i32 operations to i64 on 32-bit chips).</li>
+<li>The mid-level optimizer is now conservative when operating on a module with
+    no target data.  Previously, it would default to SparcV9 settings, which is
+    not what most people expected.</li>
+<li>Jump threading is now much more aggressive at simplifying correlated
+   conditionals and threading blocks with otherwise complex logic. It has
+   subsumed the old "Conditional Propagation" pass, and -condprop has been
+   removed from LLVM 2.7.</li>
+<li>The -instcombine pass has been refactored from being one huge file to being
+    a library of its own.  Internally, it uses a customized IRBuilder to clean
+    it up and simplify it.</li>
+
+<li>The optimal edge profiling pass is reliable and much more complete than in
+    2.6.  It can be used with the llvm-prof tool but isn't wired up to the
+    llvm-gcc and clang command line options yet.</li>
+
+<li>A new experimental alias analysis implementation, -scev-aa, has been added.
+    It uses LLVM's Scalar Evolution implementation to do symbolic analysis of
+    pointer offset expressions to disambiguate pointers.  It can catch a few
+    cases that basicaa cannot, particularly in complex loop nests.</li>
+
+<li>The default pass ordering has been tweaked for improved optimization
+    effectiveness.</li>
 
 </ul>
 
-<p>Also, -anders-aa was removed</p>
-
 </div>
 
 
@@ -582,15 +676,20 @@ release includes a few major enhancements and additions to the optimizers:</p>
 <div class="doc_text">
 
 <ul>
-<li>The JIT now <a
-href="http://llvm.org/viewvc/llvm-project?view=rev&revision=85295">defaults
+<li>The JIT now supports generating debug information and is compatible with
+the new GDB 7.0 (and later) interfaces for registering dynamically generated
+debug info.</li>
+
+<li>The JIT now <a href="http://llvm.org/PR5184">defaults
 to compiling eagerly</a> to avoid a race condition in the lazy JIT.
 Clients that still want the lazy JIT can switch it on by calling
 <tt>ExecutionEngine::DisableLazyCompilation(false)</tt>.</li>
+
 <li>It is now possible to create more than one JIT instance in the same process.
 These JITs can generate machine code in parallel,
 although <a href="http://llvm.org/docs/ProgrammersManual.html#jitthreading">you
 still have to obey the other threading restrictions</a>.</li>
+
 </ul>
 
 </div>
@@ -607,8 +706,49 @@ infrastructure, which allows us to implement more aggressive algorithms and make
 it run faster:</p>
 
 <ul>
-
-<li>...</li>
+<li>The 'llc -asm-verbose' option (which is now the default) has been enhanced
+    to emit many useful comments to .s files indicating information about spill
+    slots and loop nest structure.  This should make it much easier to read and
+    understand assembly files.  This is wired up in llvm-gcc and clang to
+    the <tt>-fverbose-asm</tt> option.</li>
+
+<li>New LSR with "full strength reduction" mode, which can reduce address
+    register pressure in loops where address generation is important.</li>
+
+<li>A new codegen level Common Subexpression Elimination pass (MachineCSE)
+    is available and enabled by default.  It catches redundancies exposed by
+    lowering.</li>
+<li>A new pre-register-allocation tail duplication pass is available and enabled
+    by default, it can substantially improve branch prediction quality in some
+    cases.</li>
+<li>A new sign and zero extension optimization pass (OptimizeExtsPass) 
+    is available and enabled by default.  This pass can takes advantage
+    architecture features like x86-64 implicit zero extension behavior and
+    sub-registers.</li>
+<li>The code generator now supports a mode where it attempts to preserve the
+    order of instructions in the input code.  This is important for source that
+    is hand scheduled and extremely sensitive to scheduling.  It is compatible
+    with the GCC <tt>-fno-schedule-insns</tt> option.</li>
+<li>The target-independent code generator now supports generating code with
+    arbitrary numbers of result values.  Returning more values than was
+    previously supported is handled by returning through a hidden pointer.  In
+    2.7, only the X86 and XCore targets have adopted support for this
+    though.</li>
+<li>The code generator now supports generating code that follows the
+    <a href="LangRef.html#callingconv">Glasgow Haskell Compiler Calling
+    Convention</a> and ABI.</li>
+<li>The "<a href="CodeGenerator.html#selectiondag_select">DAG instruction
+    selection</a>" phase of the code generator has been largely rewritten for
+    2.7.  Previously, tblgen spit out tons of C++ code which was compiled and
+    linked into the target to do the pattern matching, now it emits a much
+    smaller table which is read by the target-independent code.  The primary
+    advantages of this approach is that the size and compile time of various
+    targets is much improved.  The X86 code generator shrunk by 1.5MB of code,
+    for example.</li>
+<li>Almost the entire code generator has switched to emitting code through the
+    MC interfaces instead of printing textually to the .s file.  This led to a
+    number of cleanups and speedups.  In 2.7, debug an exception handling
+    information does not go through MC yet.</li>
 </ul>
 </div>
 
@@ -622,8 +762,11 @@ it run faster:</p>
 </p>
 
 <ul>
-
-<li>...</li>
+<li>The X86 backend now optimizes tails calls much more aggressively for
+    functions that use the standard C calling convention.</li>
+<li>The X86 backend now models scalar SSE registers as subregs of the SSE vector
+    registers, making the code generator more aggressive in cases where scalars
+    and vector types are mixed.</li>
 
 </ul>
 
@@ -631,28 +774,6 @@ it run faster:</p>
 
 <!--=========================================================================-->
 <div class="doc_subsection">
-<a name="pic16">PIC16 Target Improvements</a>
-</div>
-
-<div class="doc_text">
-<p>New features of the PIC16 target include:
-</p>
-
-<ul>
-<li>...</li>
-</ul>
-
-<p>Things not yet supported:</p>
-
-<ul>
-<li>Variable arguments.</li>
-<li>Interrupts/programs.</li>
-</ul>
-
-</div>
-
-<!--=========================================================================-->
-<div class="doc_subsection">
 <a name="ARM">ARM Target Improvements</a>
 </div>
 
@@ -662,25 +783,31 @@ it run faster:</p>
 
 <ul>
 
-<li>...</li>
-</ul>
-
+<li>The ARM backend now generates instructions in unified assembly syntax.</li>
 
-</div>
+<li>llvm-gcc now has complete support for the ARM v7 NEON instruction set.  This
+   support differs slightly from the GCC implementation.  Please see the
+   <a
+href="http://blog.llvm.org/2010/04/arm-advanced-simd-neon-intrinsics-and.html">
+  ARM Advanced SIMD (NEON) Intrinsics and Types in LLVM Blog Post</a> for
+  helpful information if migrating code from GCC to LLVM-GCC.</li>
+  
+<li>The ARM and Thumb code generators now use register scavenging for stack
+    object address materialization. This allows the use of R3 as a general
+    purpose register in Thumb1 code, as it was previous reserved for use in
+    stack address materialization. Secondly, sequential uses of the same
+    value will now re-use the materialized constant.</li>
 
-<!--=========================================================================-->
-<div class="doc_subsection">
-<a name="OtherTarget">Other Target Specific Improvements</a>
-</div>
+<li>The ARM backend now has good support for ARMv4 targets and has been tested
+    on StrongARM hardware.  Previously, LLVM only supported ARMv4T and
+    newer chips.</li>
 
-<div class="doc_text">
-<p>New features of other targets include:
-</p>
+<li>Atomic builtins are now supported for ARMv6 and ARMv7 (__sync_synchronize,
+    __sync_fetch_and_add, etc.).</li>
 
-<ul>
-<li>...</li>
 </ul>
 
+
 </div>
 
 <!--=========================================================================-->
@@ -695,7 +822,34 @@ it run faster:</p>
 </p>
 
 <ul>
-<li>...</li>
+<li>The optimizer uses the new CodeMetrics class to measure the size of code.
+    Various passes (like the inliner, loop unswitcher, etc) all use this to make
+    more accurate estimates of the code size impact of various
+    optimizations.</li>
+<li>A new <a href="http://llvm.org/doxygen/InstructionSimplify_8h-source.html">
+    llvm/Analysis/InstructionSimplify.h</a> interface is available for doing
+    symbolic simplification of instructions (e.g. <tt>a+0</tt> -&gt; <tt>a</tt>)
+    without requiring the instruction to exist.  This centralizes a lot of
+    ad-hoc symbolic manipulation code scattered in various passes.</li>
+<li>The optimizer now uses a new <a
+    href="http://llvm.org/doxygen/SSAUpdater_8h-source.html">SSAUpdater</a>
+    class which efficiently supports
+    doing unstructured SSA update operations.  This centralized a bunch of code
+    scattered throughout various passes (e.g. jump threading, lcssa,
+    loop rotate, etc) for doing this sort of thing.  The code generator has a
+    similar <a href="http://llvm.org/doxygen/MachineSSAUpdater_8h-source.html">
+    MachineSSAUpdater</a> class.</li>
+<li>The <a href="http://llvm.org/doxygen/Regex_8h-source.html">
+    llvm/Support/Regex.h</a> header exposes a platform independent regular
+    expression API.  Building on this, the <a
+    href="TestingGuide.html#FileCheck">FileCheck</a> utility now supports
+    regular exressions.</li>
+<li>raw_ostream now supports a circular "debug stream" accessed with "dbgs()".
+    By default, this stream works the same way as "errs()", but if you pass
+    <tt>-debug-buffer-size=1000</tt> to opt, the debug stream is capped to a
+    fixed sized circular buffer and the output is printed at the end of the
+    program's execution.  This is helpful if you have a long lived compiler
+    process and you're interested in seeing snapshots in time.</li>
 </ul>
 
 
@@ -710,7 +864,16 @@ it run faster:</p>
 <p>Other miscellaneous features include:</p>
 
 <ul>
-<li>...</li>
+<li>You can now build LLVM as a big dynamic library (e.g. "libllvm2.7.so"). To
+    get this, configure LLVM with the --enable-shared option.</li>
+
+<li>LLVM command line tools now overwrite their output by default. Previously,
+    they would only do this with -f. This makes them more convenient to use, and
+    behave more like standard unix tools.</li>
+
+<li>The opt and llc tools now autodetect whether their input is a .ll or .bc
+    file, and automatically do the right thing.  This means you don't need to
+    explicitly use the llvm-as tool for most things.</li>
 </ul>
 
 </div>
@@ -728,46 +891,44 @@ on LLVM 2.6, this section lists some "gotchas" that you may run into upgrading
 from the previous release.</p>
 
 <ul>
+
+<li>
+The Andersen's alias analysis ("anders-aa") pass, the Predicate Simplifier
+("predsimplify") pass, the LoopVR pass, the GVNPRE pass, and the random sampling
+profiling ("rsprofiling") passes have all been removed.  They were not being
+actively maintained and had substantial problems.  If you are interested in
+these components, you are welcome to ressurect them from SVN, fix the
+correctness problems, and resubmit them to mainline.</li>
+
+<li>LLVM now defaults to building most libraries with RTTI turned off, providing
+a code size reduction.  Packagers who are interested in building LLVM to support
+plugins that require RTTI information should build with "make REQUIRE_RTTI=1"
+and should read the new <a href="Packaging.html">Advice on Packaging LLVM</a>
+document.</li>
+
 <li>The LLVM interpreter now defaults to <em>not</em> using <tt>libffi</tt> even
 if you have it installed.  This makes it more likely that an LLVM built on one
 system will work when copied to a similar system.  To use <tt>libffi</tt>,
-configure with <tt>--enable-libffi</tt>.
-</li>
-</ul>
+configure with <tt>--enable-libffi</tt>.</li>
+
+<li>Debug information uses a completely different representation, an LLVM 2.6
+.bc file should work with LLVM 2.7, but debug info won't come forward.</li>
 
+<li>The LLVM 2.6 (and earlier) "malloc" and "free" instructions got removed,
+    along with LowerAllocations pass.  Now you should just use a call to the
+    malloc and free functions in libc.  These calls are optimized as well as
+    the old instructions were.</li>
+</ul>
 
 <p>In addition, many APIs have changed in this release.  Some of the major LLVM
 API changes are:</p>
 
 <ul>
-<li><tt>ModuleProvider</tt> has been <a
-href="http://llvm.org/viewvc/llvm-project?view=rev&revision=94686">removed</a>
-and its methods moved to <tt>Module</tt> and <tt>GlobalValue</tt>.
-Most clients can remove uses of <tt>ExistingModuleProvider</tt>,
-replace <tt>getBitcodeModuleProvider</tt> with
-<tt>getLazyBitcodeModule</tt>, and pass their <tt>Module</tt> to
-functions that used to accept <tt>ModuleProvider</tt>.  Clients who
-wrote their own <tt>ModuleProvider</tt>s will need to derive from
-<tt>GVMaterializer</tt> instead and use
-<tt>Module::setMaterializer</tt> to attach it to a
-<tt>Module</tt>.</li>
-
-<li><tt>GhostLinkage</tt> has given up the ghost.
-<tt>GlobalValue</tt>s that have not yet been read from their backing
-storage have the same linkage they will have after being read in.
-Clients must replace calls to
-<tt>GlobalValue::hasNotBeenReadFromBitcode</tt> with
-<tt>GlobalValue::isMaterializable</tt>.</li>
-
-<li>FIXME: Debug info has been totally redone. Add pointers to new APIs. Substantial caveats about compatibility of .ll and .bc files.</li>
-
-<li>The <tt>llvm/Support/DataTypes.h</tt> header has moved
-to <tt>llvm/System/DataTypes.h</tt>.</li>
-
-<li>The <tt>isInteger</tt>, <tt>isIntOrIntVector</tt>, <tt>isFloatingPoint</tt>,
-<tt>isFPOrFPVector</tt> and <tt>isFPOrFPVector</tt> methods have been renamed
-<tt>isIntegerTy</tt>, <tt>isIntOrIntVectorTy</tt>, <tt>isFloatingPointTy</tt>, 
-<tt>isFPOrFPVectorTy</tt> and <tt>isFPOrFPVectorTy</tt> respectively.</li>
+
+<li>The <tt>add</tt>, <tt>sub</tt>, and <tt>mul</tt> instructions no longer
+support floating-point operands. The <tt>fadd</tt>, <tt>fsub</tt>, and
+<tt>fmul</tt> instructions should be used for this purpose instead.</li>
+
 </ul>
 
 </div>
@@ -788,7 +949,7 @@ to <tt>llvm/System/DataTypes.h</tt>.</li>
 <li>Intel and AMD machines (IA32, X86-64, AMD64, EMT-64) running Red Hat
     Linux, Fedora Core, FreeBSD and AuroraUX (and probably other unix-like
     systems).</li>
-<li>PowerPC and X86-based Mac OS X systems, running 10.3 and above in 32-bit
+<li>PowerPC and X86-based Mac OS X systems, running 10.4 and above in 32-bit
     and 64-bit modes.</li>
 <li>Intel and AMD machines running on Win32 using MinGW libraries (native).</li>
 <li>Intel and AMD machines running on Win32 with the Cygwin libraries (limited
@@ -845,7 +1006,7 @@ href="http://lists.cs.uiuc.edu/mailman/listinfo/llvmdev">LLVMdev list</a>.</p>
 <ul>
 <li>The MSIL, Alpha, SPU, MIPS, PIC16, Blackfin, MSP430, SystemZ and MicroBlaze
     backends are experimental.</li>
-<li>The <tt>llc</tt> "<tt>-filetype=asm</tt>" (the default) is the only
+<li><tt>llc</tt> "<tt>-filetype=asm</tt>" (the default) is the only
     supported value for this option.  The MachO writer is experimental, and
     works much better in mainline SVN.</li>
 </ul>
@@ -868,13 +1029,10 @@ href="http://lists.cs.uiuc.edu/mailman/listinfo/llvmdev">LLVMdev list</a>.</p>
     to generate code for systems that don't have SSE2.</li>
   <li>Win64 code generation wasn't widely tested. Everything should work, but we
     expect small issues to happen. Also, llvm-gcc cannot build the mingw64
-    runtime currently due
-    to <a href="http://llvm.org/PR2255">several</a>
-    <a href="http://llvm.org/PR2257">bugs</a> and due to lack of support for
-    the
-    'u' inline assembly constraint and for X87 floating point inline assembly.</li>
+    runtime currently due to lack of support for the 'u' inline assembly
+    constraint and for X87 floating point inline assembly.</li>
   <li>The X86-64 backend does not yet support the LLVM IR instruction
-      <tt>va_arg</tt>. Currently, the llvm-gcc and front-ends support variadic
+      <tt>va_arg</tt>. Currently, front-ends support variadic
       argument constructs on X86-64 by lowering them manually.</li>
 </ul>
 
@@ -902,9 +1060,6 @@ compilation, and lacks support for debug information.</li>
 <div class="doc_text">
 
 <ul>
-<li>Support for the Advanced SIMD (Neon) instruction set is still incomplete
-and not well tested.  Some features may not work at all, and the code quality
-may be poor in some cases.</li>
 <li>Thumb mode works only on ARMv6 or higher processors. On sub-ARMv6
 processors, thumb programs can crash or produce wrong
 results (<a href="http://llvm.org/PR1388">PR1388</a>).</li>
@@ -989,9 +1144,6 @@ appropriate nops inserted to ensure restartability.</li>
     supported on some targets (these are used when you take the address of a
     nested function).</p>
 
-<p>If you run into GCC extensions which are not supported, please let us know.
-</p>
-
 </div>
 
 <!-- ======================================================================= -->
@@ -1078,7 +1230,7 @@ lists</a>.</p>
   src="http://www.w3.org/Icons/valid-html401-blue" alt="Valid HTML 4.01"></a>
 
   <a href="http://llvm.org/">LLVM Compiler Infrastructure</a><br>
-  Last modified: $Date: 2010-04-02 11:23:15 +0200 (Fri, 02 Apr 2010) $
+  Last modified: $Date: 2010-05-04 01:52:21 +0200 (Tue, 04 May 2010) $
 </address>
 
 </body>
diff --git a/docs/TableGenFundamentals.html b/docs/TableGenFundamentals.html
index 54e8c26878a3..388a090577f8 100644
--- a/docs/TableGenFundamentals.html
+++ b/docs/TableGenFundamentals.html
@@ -333,8 +333,9 @@ The TableGen types are:</p>
   <dd>This type represents a nestable directed graph of elements.</dd>
 
 <dt><tt><b>code</b></tt></dt>
-  <dd>This represents a big hunk of text. NOTE: I don't remember why this is
-  distinct from string!</dd>
+  <dd>This represents a big hunk of text.  This is lexically distinct from 
+  string values because it doesn't require escapeing double quotes and other
+  common characters that occur in code.</dd>
 </dl>
 
 <p>To date, these types have been sufficient for describing things that
@@ -794,7 +795,7 @@ This should highlight the APIs in <tt>TableGen/Record.h</tt>.</p>
 
   <a href="mailto:sabre@nondot.org">Chris Lattner</a><br>
   <a href="http://llvm.org">LLVM Compiler Infrastructure</a><br>
-  Last modified: $Date: 2010-03-27 03:53:27 +0100 (Sat, 27 Mar 2010) $
+  Last modified: $Date: 2010-04-22 18:45:27 +0200 (Thu, 22 Apr 2010) $
 </address>
 
 </body>
diff --git a/docs/WritingAnLLVMPass.html b/docs/WritingAnLLVMPass.html
index 7618657b1874..e592da4e2bbc 100644
--- a/docs/WritingAnLLVMPass.html
+++ b/docs/WritingAnLLVMPass.html
@@ -607,7 +607,7 @@ fast).</p>
 <div class="doc_text">
 
 <div class="doc_code"><pre>
-  <b>virtual bool</b> runOnSCC(const std::vector&lt;CallGraphNode *&gt; &amp;SCCM) = 0;
+  <b>virtual bool</b> runOnSCC(CallGraphSCC &amp;SCC) = 0;
 </pre></div>
 
 <p>The <tt>runOnSCC</tt> method performs the interesting work of the pass, and
@@ -1835,7 +1835,7 @@ Despite that, we have kept the LLVM passes SMP ready, and you should too.</p>
 
   <a href="mailto:sabre@nondot.org">Chris Lattner</a><br>
   <a href="http://llvm.org">The LLVM Compiler Infrastructure</a><br>
-  Last modified: $Date: 2010-03-10 02:29:39 +0100 (Wed, 10 Mar 2010) $
+  Last modified: $Date: 2010-04-17 01:07:44 +0200 (Sat, 17 Apr 2010) $
 </address>
 
 </body>
diff --git a/examples/ExceptionDemo/ExceptionDemo.cpp b/examples/ExceptionDemo/ExceptionDemo.cpp
index 46dc4810214b..bbca96370761 100644
--- a/examples/ExceptionDemo/ExceptionDemo.cpp
+++ b/examples/ExceptionDemo/ExceptionDemo.cpp
@@ -813,8 +813,7 @@ _Unwind_Reason_Code ourPersonality(int version,
     }
 #endif
 
-    const uint8_t* lsda = (uint8_t*) 
-                            _Unwind_GetLanguageSpecificData(context);
+    const uint8_t* lsda = _Unwind_GetLanguageSpecificData(context);
 
 #ifdef DEBUG
     fprintf(stderr, 
@@ -1949,7 +1948,7 @@ int main(int argc, char* argv[]) {
     }
 
     // If not set, exception handling will not be turned on
-    llvm::DwarfExceptionHandling = true;
+    llvm::JITExceptionHandling = true;
 
     llvm::InitializeNativeTarget();
     llvm::LLVMContext& context = llvm::getGlobalContext();
diff --git a/examples/ExceptionDemo/Makefile b/examples/ExceptionDemo/Makefile
index 06bba66e8bf3..480744730eb7 100644
--- a/examples/ExceptionDemo/Makefile
+++ b/examples/ExceptionDemo/Makefile
@@ -9,9 +9,8 @@
 LEVEL = ../..
 TOOLNAME = ExceptionDemo
 EXAMPLE_TOOL = 1
+REQUIRES_EH = 1
 
 LINK_COMPONENTS := jit interpreter nativecodegen
 
 include $(LEVEL)/Makefile.common
-
-CXXFLAGS += -fexceptions
diff --git a/include/llvm-c/Core.h b/include/llvm-c/Core.h
index 733b92c57c8d..d665c89377f4 100644
--- a/include/llvm-c/Core.h
+++ b/include/llvm-c/Core.h
@@ -319,6 +319,8 @@ LLVMTypeRef LLVMGetTypeByName(LLVMModuleRef M, const char *Name);
 /** See Module::dump. */
 void LLVMDumpModule(LLVMModuleRef M);
 
+/** See Module::setModuleInlineAsm. */
+void LLVMSetModuleInlineAsm(LLVMModuleRef M, const char *Asm);
 
 /*===-- Types -------------------------------------------------------------===*/
 
diff --git a/include/llvm-c/EnhancedDisassembly.h b/include/llvm-c/EnhancedDisassembly.h
index 9cd1e1f5f3cf..ebb2cf8a82dd 100644
--- a/include/llvm-c/EnhancedDisassembly.h
+++ b/include/llvm-c/EnhancedDisassembly.h
@@ -55,7 +55,8 @@ typedef enum {
 /*! @constant kEDAssemblySyntaxX86Intel Intel syntax for i386 and x86_64. */
   kEDAssemblySyntaxX86Intel  = 0,
 /*! @constant kEDAssemblySyntaxX86ATT AT&T syntax for i386 and x86_64. */
-  kEDAssemblySyntaxX86ATT    = 1
+  kEDAssemblySyntaxX86ATT    = 1,
+  kEDAssemblySyntaxARMUAL    = 2
 } EDAssemblySyntax_t;
 
 /*!
diff --git a/include/llvm-c/Target.h b/include/llvm-c/Target.h
index e705a9989694..2948fc76f195 100644
--- a/include/llvm-c/Target.h
+++ b/include/llvm-c/Target.h
@@ -32,18 +32,18 @@ typedef struct LLVMOpaqueTargetData *LLVMTargetDataRef;
 typedef struct LLVMStructLayout *LLVMStructLayoutRef;
 
 /* Declare all of the target-initialization functions that are available. */
-#define LLVM_TARGET(TargetName) void LLVMInitialize##TargetName##TargetInfo();
+#define LLVM_TARGET(TargetName) void LLVMInitialize##TargetName##TargetInfo(void);
 #include "llvm/Config/Targets.def"
 #undef LLVM_TARGET  /* Explicit undef to make SWIG happier */
   
-#define LLVM_TARGET(TargetName) void LLVMInitialize##TargetName##Target();
+#define LLVM_TARGET(TargetName) void LLVMInitialize##TargetName##Target(void);
 #include "llvm/Config/Targets.def"
 #undef LLVM_TARGET  /* Explicit undef to make SWIG happier */
 
 /** LLVMInitializeAllTargetInfos - The main program should call this function if
     it wants access to all available targets that LLVM is configured to
     support. */
-static inline void LLVMInitializeAllTargetInfos() {
+static inline void LLVMInitializeAllTargetInfos(void) {
 #define LLVM_TARGET(TargetName) LLVMInitialize##TargetName##TargetInfo();
 #include "llvm/Config/Targets.def"
 #undef LLVM_TARGET  /* Explicit undef to make SWIG happier */
@@ -52,7 +52,7 @@ static inline void LLVMInitializeAllTargetInfos() {
 /** LLVMInitializeAllTargets - The main program should call this function if it
     wants to link in all available targets that LLVM is configured to
     support. */
-static inline void LLVMInitializeAllTargets() {
+static inline void LLVMInitializeAllTargets(void) {
 #define LLVM_TARGET(TargetName) LLVMInitialize##TargetName##Target();
 #include "llvm/Config/Targets.def"
 #undef LLVM_TARGET  /* Explicit undef to make SWIG happier */
@@ -61,7 +61,7 @@ static inline void LLVMInitializeAllTargets() {
 /** LLVMInitializeNativeTarget - The main program should call this function to
     initialize the native target corresponding to the host.  This is useful 
     for JIT applications to ensure that the target gets linked in correctly. */
-static inline LLVMBool LLVMInitializeNativeTarget() {
+static inline LLVMBool LLVMInitializeNativeTarget(void) {
   /* If we have a native target, initialize it to ensure it is linked in. */
 #ifdef LLVM_NATIVE_ARCH
 #define DoInit2(TARG) \
diff --git a/include/llvm-c/Transforms/IPO.h b/include/llvm-c/Transforms/IPO.h
index 0a94315f62b2..d16e858bca39 100644
--- a/include/llvm-c/Transforms/IPO.h
+++ b/include/llvm-c/Transforms/IPO.h
@@ -54,6 +54,12 @@ void LLVMAddLowerSetJmpPass(LLVMPassManagerRef PM);
 /** See llvm::createPruneEHPass function. */
 void LLVMAddPruneEHPass(LLVMPassManagerRef PM);
 
+/** See llvm::createIPSCCPPass function. */
+void LLVMAddIPSCCPPass(LLVMPassManagerRef PM);
+
+/** See llvm::createInternalizePass function. */
+void LLVMAddInternalizePass(LLVMPassManagerRef, unsigned AllButMain);
+
 // FIXME: Remove in LLVM 3.0.
 void LLVMAddRaiseAllocationsPass(LLVMPassManagerRef PM);
 
diff --git a/include/llvm/ADT/APInt.h b/include/llvm/ADT/APInt.h
index 3f67ffb4556c..ec76fbdb0d93 100644
--- a/include/llvm/ADT/APInt.h
+++ b/include/llvm/ADT/APInt.h
@@ -870,12 +870,28 @@ public:
   /// @brief Unsigned less than comparison
   bool ult(const APInt& RHS) const;
 
+  /// Regards both *this as an unsigned quantity and compares it with RHS for
+  /// the validity of the less-than relationship.
+  /// @returns true if *this < RHS when considered unsigned.
+  /// @brief Unsigned less than comparison
+  bool ult(uint64_t RHS) const {
+    return ult(APInt(getBitWidth(), RHS));
+  }
+
   /// Regards both *this and RHS as signed quantities and compares them for
   /// validity of the less-than relationship.
   /// @returns true if *this < RHS when both are considered signed.
   /// @brief Signed less than comparison
   bool slt(const APInt& RHS) const;
 
+  /// Regards both *this as a signed quantity and compares it with RHS for
+  /// the validity of the less-than relationship.
+  /// @returns true if *this < RHS when considered signed.
+  /// @brief Signed less than comparison
+  bool slt(uint64_t RHS) const {
+    return slt(APInt(getBitWidth(), RHS));
+  }
+
   /// Regards both *this and RHS as unsigned quantities and compares them for
   /// validity of the less-or-equal relationship.
   /// @returns true if *this <= RHS when both are considered unsigned.
@@ -884,6 +900,14 @@ public:
     return ult(RHS) || eq(RHS);
   }
 
+  /// Regards both *this as an unsigned quantity and compares it with RHS for
+  /// the validity of the less-or-equal relationship.
+  /// @returns true if *this <= RHS when considered unsigned.
+  /// @brief Unsigned less or equal comparison
+  bool ule(uint64_t RHS) const {
+    return ule(APInt(getBitWidth(), RHS));
+  }
+
   /// Regards both *this and RHS as signed quantities and compares them for
   /// validity of the less-or-equal relationship.
   /// @returns true if *this <= RHS when both are considered signed.
@@ -892,6 +916,14 @@ public:
     return slt(RHS) || eq(RHS);
   }
 
+  /// Regards both *this as a signed quantity and compares it with RHS for
+  /// the validity of the less-or-equal relationship.
+  /// @returns true if *this <= RHS when considered signed.
+  /// @brief Signed less or equal comparison
+  bool sle(uint64_t RHS) const {
+    return sle(APInt(getBitWidth(), RHS));
+  }
+
   /// Regards both *this and RHS as unsigned quantities and compares them for
   /// the validity of the greater-than relationship.
   /// @returns true if *this > RHS when both are considered unsigned.
@@ -900,6 +932,14 @@ public:
     return !ult(RHS) && !eq(RHS);
   }
 
+  /// Regards both *this as an unsigned quantity and compares it with RHS for
+  /// the validity of the greater-than relationship.
+  /// @returns true if *this > RHS when considered unsigned.
+  /// @brief Unsigned greater than comparison
+  bool ugt(uint64_t RHS) const {
+    return ugt(APInt(getBitWidth(), RHS));
+  }
+
   /// Regards both *this and RHS as signed quantities and compares them for
   /// the validity of the greater-than relationship.
   /// @returns true if *this > RHS when both are considered signed.
@@ -908,6 +948,14 @@ public:
     return !slt(RHS) && !eq(RHS);
   }
 
+  /// Regards both *this as a signed quantity and compares it with RHS for
+  /// the validity of the greater-than relationship.
+  /// @returns true if *this > RHS when considered signed.
+  /// @brief Signed greater than comparison
+  bool sgt(uint64_t RHS) const {
+    return sgt(APInt(getBitWidth(), RHS));
+  }
+
   /// Regards both *this and RHS as unsigned quantities and compares them for
   /// validity of the greater-or-equal relationship.
   /// @returns true if *this >= RHS when both are considered unsigned.
@@ -916,6 +964,14 @@ public:
     return !ult(RHS);
   }
 
+  /// Regards both *this as an unsigned quantity and compares it with RHS for
+  /// the validity of the greater-or-equal relationship.
+  /// @returns true if *this >= RHS when considered unsigned.
+  /// @brief Unsigned greater or equal comparison
+  bool uge(uint64_t RHS) const {
+    return uge(APInt(getBitWidth(), RHS));
+  }
+
   /// Regards both *this and RHS as signed quantities and compares them for
   /// validity of the greater-or-equal relationship.
   /// @returns true if *this >= RHS when both are considered signed.
@@ -924,6 +980,14 @@ public:
     return !slt(RHS);
   }
 
+  /// Regards both *this as a signed quantity and compares it with RHS for
+  /// the validity of the greater-or-equal relationship.
+  /// @returns true if *this >= RHS when considered signed.
+  /// @brief Signed greater or equal comparison
+  bool sge(uint64_t RHS) const {
+    return sge(APInt(getBitWidth(), RHS));
+  }
+
   /// This operation tests if there are any pairs of corresponding bits
   /// between this APInt and RHS that are both set.
   bool intersects(const APInt &RHS) const {
diff --git a/include/llvm/ADT/BitVector.h b/include/llvm/ADT/BitVector.h
index 3a86b0d34368..9dcb9e106f26 100644
--- a/include/llvm/ADT/BitVector.h
+++ b/include/llvm/ADT/BitVector.h
@@ -49,6 +49,11 @@ public:
 
     ~reference() {}
 
+    reference &operator=(reference t) {
+      *this = bool(t);
+      return *this;
+    }
+
     reference& operator=(bool t) {
       if (t)
         *WordRef |= 1L << BitPos;
diff --git a/include/llvm/ADT/DenseMapInfo.h b/include/llvm/ADT/DenseMapInfo.h
index 41197a1e12df..529938699270 100644
--- a/include/llvm/ADT/DenseMapInfo.h
+++ b/include/llvm/ADT/DenseMapInfo.h
@@ -92,6 +92,16 @@ template<> struct DenseMapInfo<unsigned long long> {
   }
 };
 
+// Provide DenseMapInfo for ints.
+template<> struct DenseMapInfo<int> {
+  static inline int getEmptyKey() { return 0x7fffffff; }
+  static inline int getTombstoneKey() { return -0x7fffffff - 1; }
+  static unsigned getHashValue(const int& Val) { return (unsigned)(Val * 37); }
+  static bool isEqual(const int& LHS, const int& RHS) {
+    return LHS == RHS;
+  }
+};
+
 // Provide DenseMapInfo for long longs.
 template<> struct DenseMapInfo<long long> {
   static inline long long getEmptyKey() { return 0x7fffffffffffffffLL; }
diff --git a/include/llvm/ADT/ImmutableSet.h b/include/llvm/ADT/ImmutableSet.h
index 65e70e279ab3..70c3caf2a061 100644
--- a/include/llvm/ADT/ImmutableSet.h
+++ b/include/llvm/ADT/ImmutableSet.h
@@ -189,6 +189,8 @@ public:
   unsigned verify() const {
     unsigned HL = getLeft() ? getLeft()->verify() : 0;
     unsigned HR = getRight() ? getRight()->verify() : 0;
+    (void) HL;
+    (void) HR;
 
     assert(getHeight() == ( HL > HR ? HL : HR ) + 1
             && "Height calculation wrong");
diff --git a/include/llvm/ADT/Optional.h b/include/llvm/ADT/Optional.h
new file mode 100644
index 000000000000..34e54a07a0ef
--- /dev/null
+++ b/include/llvm/ADT/Optional.h
@@ -0,0 +1,66 @@
+//===-- Optional.h - Simple variant for passing optional values ---*- C++ -*-=//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+//  This file provides Optional, a template class modeled in the spirit of
+//  OCaml's 'opt' variant.  The idea is to strongly type whether or not
+//  a value can be optional.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_ADT_OPTIONAL
+#define LLVM_ADT_OPTIONAL
+
+#include <cassert>
+
+namespace llvm {
+
+template<typename T>
+class Optional {
+  T x;
+  unsigned hasVal : 1;
+public:
+  explicit Optional() : x(), hasVal(false) {}
+  Optional(const T &y) : x(y), hasVal(true) {}
+
+  static inline Optional create(const T* y) {
+    return y ? Optional(*y) : Optional();
+  }
+
+  Optional &operator=(const T &y) {
+    x = y;
+    hasVal = true;
+    return *this;
+  }
+  
+  const T* getPointer() const { assert(hasVal); return &x; }
+  const T& getValue() const { assert(hasVal); return x; }
+
+  operator bool() const { return hasVal; }
+  bool hasValue() const { return hasVal; }
+  const T* operator->() const { return getPointer(); }
+  const T& operator*() const { assert(hasVal); return x; }
+};
+
+template<typename T> struct simplify_type;
+
+template <typename T>
+struct simplify_type<const Optional<T> > {
+  typedef const T* SimpleType;
+  static SimpleType getSimplifiedValue(const Optional<T> &Val) {
+    return Val.getPointer();
+  }
+};
+
+template <typename T>
+struct simplify_type<Optional<T> >
+  : public simplify_type<const Optional<T> > {};
+
+} // end llvm namespace
+
+#endif
diff --git a/include/llvm/ADT/SCCIterator.h b/include/llvm/ADT/SCCIterator.h
index d38ce4cd634b..c49d599cf38f 100644
--- a/include/llvm/ADT/SCCIterator.h
+++ b/include/llvm/ADT/SCCIterator.h
@@ -66,7 +66,7 @@ class scc_iterator
   std::vector<unsigned> MinVisitNumStack;
 
   // A single "visit" within the non-recursive DFS traversal.
-  void DFSVisitOne(NodeType* N) {
+  void DFSVisitOne(NodeType *N) {
     ++visitNum;                         // Global counter for the visit order
     nodeVisitNumbers[N] = visitNum;
     SCCNodeStack.push_back(N);
@@ -83,13 +83,14 @@ class scc_iterator
       // TOS has at least one more child so continue DFS
       NodeType *childN = *VisitStack.back().second++;
       if (!nodeVisitNumbers.count(childN)) {
-        // this node has never been seen
+        // this node has never been seen.
         DFSVisitOne(childN);
-      } else {
-        unsigned childNum = nodeVisitNumbers[childN];
-        if (MinVisitNumStack.back() > childNum)
-          MinVisitNumStack.back() = childNum;
+        continue;
       }
+      
+      unsigned childNum = nodeVisitNumbers[childN];
+      if (MinVisitNumStack.back() > childNum)
+        MinVisitNumStack.back() = childNum;
     }
   }
 
@@ -100,7 +101,7 @@ class scc_iterator
     while (!VisitStack.empty()) {
       DFSVisitChildren();
       assert(VisitStack.back().second ==GT::child_end(VisitStack.back().first));
-      NodeType* visitingN = VisitStack.back().first;
+      NodeType *visitingN = VisitStack.back().first;
       unsigned minVisitNum = MinVisitNumStack.back();
       VisitStack.pop_back();
       MinVisitNumStack.pop_back();
@@ -111,18 +112,19 @@ class scc_iterator
       //      " : minVisitNum = " << minVisitNum << "; Node visit num = " <<
       //      nodeVisitNumbers[visitingN] << "\n";
 
-      if (minVisitNum == nodeVisitNumbers[visitingN]) {
-        // A full SCC is on the SCCNodeStack!  It includes all nodes below
-          // visitingN on the stack.  Copy those nodes to CurrentSCC,
-          // reset their minVisit values, and return (this suspends
-          // the DFS traversal till the next ++).
-          do {
-            CurrentSCC.push_back(SCCNodeStack.back());
-            SCCNodeStack.pop_back();
-            nodeVisitNumbers[CurrentSCC.back()] = ~0U;
-          } while (CurrentSCC.back() != visitingN);
-          return;
-        }
+      if (minVisitNum != nodeVisitNumbers[visitingN])
+        continue;
+      
+      // A full SCC is on the SCCNodeStack!  It includes all nodes below
+      // visitingN on the stack.  Copy those nodes to CurrentSCC,
+      // reset their minVisit values, and return (this suspends
+      // the DFS traversal till the next ++).
+      do {
+        CurrentSCC.push_back(SCCNodeStack.back());
+        SCCNodeStack.pop_back();
+        nodeVisitNumbers[CurrentSCC.back()] = ~0U;
+      } while (CurrentSCC.back() != visitingN);
+      return;
     }
   }
 
@@ -136,11 +138,11 @@ public:
   typedef scc_iterator<GraphT, GT> _Self;
 
   // Provide static "constructors"...
-  static inline _Self begin(const GraphT& G) { return _Self(GT::getEntryNode(G)); }
-  static inline _Self end  (const GraphT& G) { return _Self(); }
+  static inline _Self begin(const GraphT &G){return _Self(GT::getEntryNode(G));}
+  static inline _Self end  (const GraphT &G) { return _Self(); }
 
-  // Direct loop termination test (I.fini() is more efficient than I == end())
-  inline bool fini() const {
+  // Direct loop termination test: I.isAtEnd() is more efficient than I == end()
+  inline bool isAtEnd() const {
     assert(!CurrentSCC.empty() || VisitStack.empty());
     return CurrentSCC.empty();
   }
@@ -181,28 +183,36 @@ public:
         return true;
     return false;
   }
+                           
+  /// ReplaceNode - This informs the scc_iterator that the specified Old node
+  /// has been deleted, and New is to be used in its place.
+  void ReplaceNode(NodeType *Old, NodeType *New) {
+    assert(nodeVisitNumbers.count(Old) && "Old not in scc_iterator?");
+    nodeVisitNumbers[New] = nodeVisitNumbers[Old];
+    nodeVisitNumbers.erase(Old);
+  }
 };
 
 
 // Global constructor for the SCC iterator.
 template <class T>
-scc_iterator<T> scc_begin(const T& G) {
+scc_iterator<T> scc_begin(const T &G) {
   return scc_iterator<T>::begin(G);
 }
 
 template <class T>
-scc_iterator<T> scc_end(const T& G) {
+scc_iterator<T> scc_end(const T &G) {
   return scc_iterator<T>::end(G);
 }
 
 template <class T>
-scc_iterator<Inverse<T> > scc_begin(const Inverse<T>& G) {
-       return scc_iterator<Inverse<T> >::begin(G);
+scc_iterator<Inverse<T> > scc_begin(const Inverse<T> &G) {
+  return scc_iterator<Inverse<T> >::begin(G);
 }
 
 template <class T>
-scc_iterator<Inverse<T> > scc_end(const Inverse<T>& G) {
-       return scc_iterator<Inverse<T> >::end(G);
+scc_iterator<Inverse<T> > scc_end(const Inverse<T> &G) {
+  return scc_iterator<Inverse<T> >::end(G);
 }
 
 } // End llvm namespace
diff --git a/include/llvm/ADT/SmallBitVector.h b/include/llvm/ADT/SmallBitVector.h
index 5c774b90ee3d..3441d0a90c9b 100644
--- a/include/llvm/ADT/SmallBitVector.h
+++ b/include/llvm/ADT/SmallBitVector.h
@@ -15,7 +15,6 @@
 #define LLVM_ADT_SMALLBITVECTOR_H
 
 #include "llvm/ADT/BitVector.h"
-#include "llvm/ADT/PointerIntPair.h"
 #include "llvm/Support/MathExtras.h"
 #include <cassert>
 
@@ -32,48 +31,85 @@ class SmallBitVector {
   // TODO: In "large" mode, a pointer to a BitVector is used, leading to an
   // unnecessary level of indirection. It would be more efficient to use a
   // pointer to memory containing size, allocation size, and the array of bits.
-  PointerIntPair<BitVector *, 1, uintptr_t> X;
+  uintptr_t X;
 
-  // The number of bits in this class.
-  static const size_t NumBaseBits = sizeof(uintptr_t) * CHAR_BIT;
+  enum {
+    // The number of bits in this class.
+    NumBaseBits = sizeof(uintptr_t) * CHAR_BIT,
 
-  // One bit is used to discriminate between small and large mode. The
-  // remaining bits are used for the small-mode representation.
-  static const size_t SmallNumRawBits = NumBaseBits - 1;
+    // One bit is used to discriminate between small and large mode. The
+    // remaining bits are used for the small-mode representation.
+    SmallNumRawBits = NumBaseBits - 1,
 
-  // A few more bits are used to store the size of the bit set in small mode.
-  // Theoretically this is a ceil-log2. These bits are encoded in the most
-  // significant bits of the raw bits.
-  static const size_t SmallNumSizeBits = (NumBaseBits == 32 ? 5 :
-                                          NumBaseBits == 64 ? 6 :
-                                          SmallNumRawBits);
+    // A few more bits are used to store the size of the bit set in small mode.
+    // Theoretically this is a ceil-log2. These bits are encoded in the most
+    // significant bits of the raw bits.
+    SmallNumSizeBits = (NumBaseBits == 32 ? 5 :
+                        NumBaseBits == 64 ? 6 :
+                        SmallNumRawBits),
 
-  // The remaining bits are used to store the actual set in small mode.
-  static const size_t SmallNumDataBits = SmallNumRawBits - SmallNumSizeBits;
+    // The remaining bits are used to store the actual set in small mode.
+    SmallNumDataBits = SmallNumRawBits - SmallNumSizeBits
+  };
 
+public:
+  // Encapsulation of a single bit.
+  class reference {
+    SmallBitVector &TheVector;
+    unsigned BitPos;
+
+  public:
+    reference(SmallBitVector &b, unsigned Idx) : TheVector(b), BitPos(Idx) {}
+
+    reference& operator=(reference t) {
+      *this = bool(t);
+      return *this;
+    }
+
+    reference& operator=(bool t) {
+      if (t)
+        TheVector.set(BitPos);
+      else
+        TheVector.reset(BitPos);
+      return *this;
+    }
+
+    operator bool() const {
+      return const_cast<const SmallBitVector &>(TheVector).operator[](BitPos);
+    }
+  };
+
+private:
   bool isSmall() const {
-    return X.getInt();
+    return X & uintptr_t(1);
+  }
+
+  BitVector *getPointer() const {
+    assert(!isSmall());
+    return reinterpret_cast<BitVector *>(X);
   }
 
   void switchToSmall(uintptr_t NewSmallBits, size_t NewSize) {
-    X.setInt(true);
+    X = 1;
     setSmallSize(NewSize);
     setSmallBits(NewSmallBits);
   }
 
   void switchToLarge(BitVector *BV) {
-    X.setInt(false);
-    X.setPointer(BV);
+    X = reinterpret_cast<uintptr_t>(BV);
+    assert(!isSmall() && "Tried to use an unaligned pointer");
   }
 
   // Return all the bits used for the "small" representation; this includes
   // bits for the size as well as the element bits.
   uintptr_t getSmallRawBits() const {
-    return reinterpret_cast<uintptr_t>(X.getPointer()) >> 1;
+    assert(isSmall());
+    return X >> 1;
   }
 
   void setSmallRawBits(uintptr_t NewRawBits) {
-    return X.setPointer(reinterpret_cast<BitVector *>(NewRawBits << 1));
+    assert(isSmall());
+    X = (NewRawBits << 1) | uintptr_t(1);
   }
 
   // Return the size.
@@ -87,22 +123,22 @@ class SmallBitVector {
 
   // Return the element bits.
   uintptr_t getSmallBits() const {
-    return getSmallRawBits() & ~(~uintptr_t(0) << SmallNumDataBits);
+    return getSmallRawBits() & ~(~uintptr_t(0) << getSmallSize());
   }
 
   void setSmallBits(uintptr_t NewBits) {
-    setSmallRawBits((getSmallRawBits() & (~uintptr_t(0) << SmallNumDataBits)) |
-                    (NewBits & ~(~uintptr_t(0) << getSmallSize())));
+    setSmallRawBits((NewBits & ~(~uintptr_t(0) << getSmallSize())) |
+                    (getSmallSize() << SmallNumDataBits));
   }
 
 public:
   /// SmallBitVector default ctor - Creates an empty bitvector.
-  SmallBitVector() : X(0, 1) {}
+  SmallBitVector() : X(1) {}
 
   /// SmallBitVector ctor - Creates a bitvector of specified number of bits. All
   /// bits are initialized to the specified value.
-  explicit SmallBitVector(unsigned s, bool t = false) : X(0, 1) {
-    if (s <= SmallNumRawBits)
+  explicit SmallBitVector(unsigned s, bool t = false) {
+    if (s <= SmallNumDataBits)
       switchToSmall(t ? ~uintptr_t(0) : 0, s);
     else
       switchToLarge(new BitVector(s, t));
@@ -113,22 +149,22 @@ public:
     if (RHS.isSmall())
       X = RHS.X;
     else
-      switchToLarge(new BitVector(*RHS.X.getPointer()));
+      switchToLarge(new BitVector(*RHS.getPointer()));
   }
 
   ~SmallBitVector() {
     if (!isSmall())
-      delete X.getPointer();
+      delete getPointer();
   }
 
   /// empty - Tests whether there are no bits in this bitvector.
   bool empty() const {
-    return isSmall() ? getSmallSize() == 0 : X.getPointer()->empty();
+    return isSmall() ? getSmallSize() == 0 : getPointer()->empty();
   }
 
   /// size - Returns the number of bits in this bitvector.
   size_t size() const {
-    return isSmall() ? getSmallSize() : X.getPointer()->size();
+    return isSmall() ? getSmallSize() : getPointer()->size();
   }
 
   /// count - Returns the number of bits which are set.
@@ -141,21 +177,21 @@ public:
         return CountPopulation_64(Bits);
       assert(0 && "Unsupported!");
     }
-    return X.getPointer()->count();
+    return getPointer()->count();
   }
 
   /// any - Returns true if any bit is set.
   bool any() const {
     if (isSmall())
       return getSmallBits() != 0;
-    return X.getPointer()->any();
+    return getPointer()->any();
   }
 
   /// none - Returns true if none of the bits are set.
   bool none() const {
     if (isSmall())
       return getSmallBits() == 0;
-    return X.getPointer()->none();
+    return getPointer()->none();
   }
 
   /// find_first - Returns the index of the first set bit, -1 if none
@@ -163,13 +199,15 @@ public:
   int find_first() const {
     if (isSmall()) {
       uintptr_t Bits = getSmallBits();
+      if (Bits == 0)
+        return -1;
       if (sizeof(uintptr_t) * CHAR_BIT == 32)
         return CountTrailingZeros_32(Bits);
       if (sizeof(uintptr_t) * CHAR_BIT == 64)
         return CountTrailingZeros_64(Bits);
       assert(0 && "Unsupported!");
     }
-    return X.getPointer()->find_first();
+    return getPointer()->find_first();
   }
 
   /// find_next - Returns the index of the next set bit following the
@@ -178,30 +216,33 @@ public:
     if (isSmall()) {
       uintptr_t Bits = getSmallBits();
       // Mask off previous bits.
-      Bits &= ~uintptr_t(0) << Prev;
+      Bits &= ~uintptr_t(0) << (Prev + 1);
+      if (Bits == 0 || Prev + 1 >= getSmallSize())
+        return -1;
       if (sizeof(uintptr_t) * CHAR_BIT == 32)
         return CountTrailingZeros_32(Bits);
       if (sizeof(uintptr_t) * CHAR_BIT == 64)
         return CountTrailingZeros_64(Bits);
       assert(0 && "Unsupported!");
     }
-    return X.getPointer()->find_next(Prev);
+    return getPointer()->find_next(Prev);
   }
 
   /// clear - Clear all bits.
   void clear() {
     if (!isSmall())
-      delete X.getPointer();
+      delete getPointer();
     switchToSmall(0, 0);
   }
 
   /// resize - Grow or shrink the bitvector.
   void resize(unsigned N, bool t = false) {
     if (!isSmall()) {
-      X.getPointer()->resize(N, t);
-    } else if (getSmallSize() >= N) {
+      getPointer()->resize(N, t);
+    } else if (SmallNumDataBits >= N) {
+      uintptr_t NewBits = t ? ~uintptr_t(0) << getSmallSize() : 0;
       setSmallSize(N);
-      setSmallBits(getSmallBits());
+      setSmallBits(NewBits | getSmallBits());
     } else {
       BitVector *BV = new BitVector(N, t);
       uintptr_t OldBits = getSmallBits();
@@ -224,7 +265,7 @@ public:
         switchToLarge(BV);
       }
     } else {
-      X.getPointer()->reserve(N);
+      getPointer()->reserve(N);
     }
   }
 
@@ -233,7 +274,7 @@ public:
     if (isSmall())
       setSmallBits(~uintptr_t(0));
     else
-      X.getPointer()->set();
+      getPointer()->set();
     return *this;
   }
 
@@ -241,7 +282,7 @@ public:
     if (isSmall())
       setSmallBits(getSmallBits() | (uintptr_t(1) << Idx));
     else
-      X.getPointer()->set(Idx);
+      getPointer()->set(Idx);
     return *this;
   }
 
@@ -249,7 +290,7 @@ public:
     if (isSmall())
       setSmallBits(0);
     else
-      X.getPointer()->reset();
+      getPointer()->reset();
     return *this;
   }
 
@@ -257,7 +298,7 @@ public:
     if (isSmall())
       setSmallBits(getSmallBits() & ~(uintptr_t(1) << Idx));
     else
-      X.getPointer()->reset(Idx);
+      getPointer()->reset(Idx);
     return *this;
   }
 
@@ -265,7 +306,7 @@ public:
     if (isSmall())
       setSmallBits(~getSmallBits());
     else
-      X.getPointer()->flip();
+      getPointer()->flip();
     return *this;
   }
 
@@ -273,7 +314,7 @@ public:
     if (isSmall())
       setSmallBits(getSmallBits() ^ (uintptr_t(1) << Idx));
     else
-      X.getPointer()->flip(Idx);
+      getPointer()->flip(Idx);
     return *this;
   }
 
@@ -283,12 +324,16 @@ public:
   }
 
   // Indexing.
-  // TODO: Add an index operator which returns a "reference" (proxy class).
+  reference operator[](unsigned Idx) {
+    assert(Idx < size() && "Out-of-bounds Bit access.");
+    return reference(*this, Idx);
+  }
+
   bool operator[](unsigned Idx) const {
     assert(Idx < size() && "Out-of-bounds Bit access.");
     if (isSmall())
       return ((getSmallBits() >> Idx) & 1) != 0;
-    return X.getPointer()->operator[](Idx);
+    return getPointer()->operator[](Idx);
   }
 
   bool test(unsigned Idx) const {
@@ -302,7 +347,7 @@ public:
     if (isSmall())
       return getSmallBits() == RHS.getSmallBits();
     else
-      return *X.getPointer() == *RHS.X.getPointer();
+      return *getPointer() == *RHS.getPointer();
   }
 
   bool operator!=(const SmallBitVector &RHS) const {
@@ -315,11 +360,11 @@ public:
     if (isSmall())
       setSmallBits(getSmallBits() & RHS.getSmallBits());
     else if (!RHS.isSmall())
-      X.getPointer()->operator&=(*RHS.X.getPointer());
+      getPointer()->operator&=(*RHS.getPointer());
     else {
       SmallBitVector Copy = RHS;
       Copy.resize(size());
-      X.getPointer()->operator&=(*Copy.X.getPointer());
+      getPointer()->operator&=(*Copy.getPointer());
     }
     return *this;
   }
@@ -329,11 +374,11 @@ public:
     if (isSmall())
       setSmallBits(getSmallBits() | RHS.getSmallBits());
     else if (!RHS.isSmall())
-      X.getPointer()->operator|=(*RHS.X.getPointer());
+      getPointer()->operator|=(*RHS.getPointer());
     else {
       SmallBitVector Copy = RHS;
       Copy.resize(size());
-      X.getPointer()->operator|=(*Copy.X.getPointer());
+      getPointer()->operator|=(*Copy.getPointer());
     }
     return *this;
   }
@@ -343,11 +388,11 @@ public:
     if (isSmall())
       setSmallBits(getSmallBits() ^ RHS.getSmallBits());
     else if (!RHS.isSmall())
-      X.getPointer()->operator^=(*RHS.X.getPointer());
+      getPointer()->operator^=(*RHS.getPointer());
     else {
       SmallBitVector Copy = RHS;
       Copy.resize(size());
-      X.getPointer()->operator^=(*Copy.X.getPointer());
+      getPointer()->operator^=(*Copy.getPointer());
     }
     return *this;
   }
@@ -358,12 +403,12 @@ public:
       if (RHS.isSmall())
         X = RHS.X;
       else
-        switchToLarge(new BitVector(*RHS.X.getPointer()));
+        switchToLarge(new BitVector(*RHS.getPointer()));
     } else {
       if (!RHS.isSmall())
-        *X.getPointer() = *RHS.X.getPointer();
+        *getPointer() = *RHS.getPointer();
       else {
-        delete X.getPointer();
+        delete getPointer();
         X = RHS.X;
       }
     }
diff --git a/include/llvm/ADT/SmallVector.h b/include/llvm/ADT/SmallVector.h
index 2d79a029d019..18c8619bf93a 100644
--- a/include/llvm/ADT/SmallVector.h
+++ b/include/llvm/ADT/SmallVector.h
@@ -59,7 +59,7 @@ protected:
   // number of union instances for the space, which guarantee maximal alignment.
   struct U {
 #ifdef __GNUC__
-    char X __attribute__((aligned));
+    char X __attribute__((aligned(8)));
 #else
     union {
       double D;
diff --git a/include/llvm/ADT/StringExtras.h b/include/llvm/ADT/StringExtras.h
index 1ea546f46f29..3c53adee63c8 100644
--- a/include/llvm/ADT/StringExtras.h
+++ b/include/llvm/ADT/StringExtras.h
@@ -57,15 +57,14 @@ static inline char *utohex_buffer(IntTy X, char *BufferEnd) {
 }
 
 static inline std::string utohexstr(uint64_t X) {
-  char Buffer[40];
-  return utohex_buffer(X, Buffer+40);
+  char Buffer[17];
+  return utohex_buffer(X, Buffer+17);
 }
 
 static inline std::string utostr_32(uint32_t X, bool isNeg = false) {
-  char Buffer[20];
-  char *BufPtr = Buffer+19;
+  char Buffer[11];
+  char *BufPtr = Buffer+11;
 
-  *BufPtr = 0;                  // Null terminate buffer...
   if (X == 0) *--BufPtr = '0';  // Handle special case...
 
   while (X) {
@@ -75,17 +74,13 @@ static inline std::string utostr_32(uint32_t X, bool isNeg = false) {
 
   if (isNeg) *--BufPtr = '-';   // Add negative sign...
 
-  return std::string(BufPtr);
+  return std::string(BufPtr, Buffer+11);
 }
 
 static inline std::string utostr(uint64_t X, bool isNeg = false) {
-  if (X == uint32_t(X))
-    return utostr_32(uint32_t(X), isNeg);
+  char Buffer[21];
+  char *BufPtr = Buffer+21;
 
-  char Buffer[40];
-  char *BufPtr = Buffer+39;
-
-  *BufPtr = 0;                  // Null terminate buffer...
   if (X == 0) *--BufPtr = '0';  // Handle special case...
 
   while (X) {
@@ -94,7 +89,7 @@ static inline std::string utostr(uint64_t X, bool isNeg = false) {
   }
 
   if (isNeg) *--BufPtr = '-';   // Add negative sign...
-  return std::string(BufPtr);
+  return std::string(BufPtr, Buffer+21);
 }
 
 
diff --git a/include/llvm/ADT/StringRef.h b/include/llvm/ADT/StringRef.h
index 925777000f52..ab6358bdfa6c 100644
--- a/include/llvm/ADT/StringRef.h
+++ b/include/llvm/ADT/StringRef.h
@@ -44,8 +44,8 @@ namespace llvm {
     // Workaround PR5482: nearly all gcc 4.x miscompile StringRef and std::min()
     // Changing the arg of min to be an integer, instead of a reference to an
     // integer works around this bug.
-    size_t min(size_t a, size_t b) const { return a < b ? a : b; }
-    size_t max(size_t a, size_t b) const { return a > b ? a : b; }
+    static size_t min(size_t a, size_t b) { return a < b ? a : b; }
+    static size_t max(size_t a, size_t b) { return a > b ? a : b; }
 
   public:
     /// @name Constructors
diff --git a/include/llvm/Analysis/CallGraph.h b/include/llvm/Analysis/CallGraph.h
index 287fe4faa4f5..a4884edd5bd6 100644
--- a/include/llvm/Analysis/CallGraph.h
+++ b/include/llvm/Analysis/CallGraph.h
@@ -187,6 +187,9 @@ public:
   
   // CallGraphNode ctor - Create a node for the specified function.
   inline CallGraphNode(Function *f) : F(f), NumReferences(0) {}
+  ~CallGraphNode() {
+    assert(NumReferences == 0 && "Node deleted while references remain");
+  }
   
   //===---------------------------------------------------------------------
   // Accessor methods.
@@ -277,6 +280,11 @@ public:
   /// time, so it should be used sparingly.
   void replaceCallEdge(CallSite CS, CallSite NewCS, CallGraphNode *NewNode);
   
+  /// allReferencesDropped - This is a special function that should only be
+  /// used by the CallGraph class.
+  void allReferencesDropped() {
+    NumReferences = 0;
+  }
 };
 
 //===----------------------------------------------------------------------===//
diff --git a/include/llvm/Analysis/DebugInfo.h b/include/llvm/Analysis/DebugInfo.h
index d68bfa3c0dfe..9b1379d90ae4 100644
--- a/include/llvm/Analysis/DebugInfo.h
+++ b/include/llvm/Analysis/DebugInfo.h
@@ -392,6 +392,7 @@ namespace llvm {
       return getFieldAs<DICompositeType>(13);
     }
     unsigned isArtificial() const    { return getUnsignedField(14); }
+    unsigned isOptimized() const;
 
     StringRef getFilename() const    { 
       if (getVersion() == llvm::LLVMDebugVersion7)
@@ -474,6 +475,10 @@ namespace llvm {
       return getType().isBlockByrefStruct();
     }
 
+    /// isInlinedFnArgument - Return trule if this variable provides debugging
+    /// information for an inlined function arguments.
+    bool isInlinedFnArgument(const Function *CurFn);
+
     /// dump - print variable.
     void dump() const;
   };
@@ -638,7 +643,8 @@ namespace llvm {
                                   unsigned VK = 0,
                                   unsigned VIndex = 0,
                                   DIType = DIType(),
-                                  bool isArtificial = 0);
+                                  bool isArtificial = 0,
+                                  bool isOptimized = false);
 
     /// CreateSubprogramDefinition - Create new subprogram descriptor for the
     /// given declaration. 
diff --git a/include/llvm/Analysis/IVUsers.h b/include/llvm/Analysis/IVUsers.h
index dc616ca2fd3a..578e6aba8338 100644
--- a/include/llvm/Analysis/IVUsers.h
+++ b/include/llvm/Analysis/IVUsers.h
@@ -16,6 +16,7 @@
 #define LLVM_ANALYSIS_IVUSERS_H
 
 #include "llvm/Analysis/LoopPass.h"
+#include "llvm/Analysis/ScalarEvolutionNormalization.h"
 #include "llvm/Support/ValueHandle.h"
 
 namespace llvm {
@@ -26,17 +27,17 @@ class Value;
 class IVUsers;
 class ScalarEvolution;
 class SCEV;
+class IVUsers;
 
 /// IVStrideUse - Keep track of one use of a strided induction variable.
 /// The Expr member keeps track of the expression, User is the actual user
 /// instruction of the operand, and 'OperandValToReplace' is the operand of
 /// the User that is the use.
 class IVStrideUse : public CallbackVH, public ilist_node<IVStrideUse> {
+  friend class IVUsers;
 public:
-  IVStrideUse(IVUsers *P, const SCEV *S, const SCEV *Off,
-              Instruction* U, Value *O)
-    : CallbackVH(U), Parent(P), Stride(S), Offset(Off),
-      OperandValToReplace(O), IsUseOfPostIncrementedValue(false) {
+  IVStrideUse(IVUsers *P, Instruction* U, Value *O)
+    : CallbackVH(U), Parent(P), OperandValToReplace(O) {
   }
 
   /// getUser - Return the user instruction for this use.
@@ -49,28 +50,6 @@ public:
     setValPtr(NewUser);
   }
 
-  /// getParent - Return a pointer to the IVUsers that owns
-  /// this IVStrideUse.
-  IVUsers *getParent() const { return Parent; }
-
-  /// getStride - Return the expression for the stride for the use.
-  const SCEV *getStride() const { return Stride; }
-
-  /// setStride - Assign a new stride to this use.
-  void setStride(const SCEV *Val) {
-    Stride = Val;
-  }
-
-  /// getOffset - Return the offset to add to a theoretical induction
-  /// variable that starts at zero and counts up by the stride to compute
-  /// the value for the use. This always has the same type as the stride.
-  const SCEV *getOffset() const { return Offset; }
-
-  /// setOffset - Assign a new offset to this use.
-  void setOffset(const SCEV *Val) {
-    Offset = Val;
-  }
-
   /// getOperandValToReplace - Return the Value of the operand in the user
   /// instruction that this IVStrideUse is representing.
   Value *getOperandValToReplace() const {
@@ -83,37 +62,27 @@ public:
     OperandValToReplace = Op;
   }
 
-  /// isUseOfPostIncrementedValue - True if this should use the
-  /// post-incremented version of this IV, not the preincremented version.
-  /// This can only be set in special cases, such as the terminating setcc
-  /// instruction for a loop or uses dominated by the loop.
-  bool isUseOfPostIncrementedValue() const {
-    return IsUseOfPostIncrementedValue;
+  /// getPostIncLoops - Return the set of loops for which the expression has
+  /// been adjusted to use post-inc mode.
+  const PostIncLoopSet &getPostIncLoops() const {
+    return PostIncLoops;
   }
 
-  /// setIsUseOfPostIncrmentedValue - set the flag that indicates whether
-  /// this is a post-increment use.
-  void setIsUseOfPostIncrementedValue(bool Val) {
-    IsUseOfPostIncrementedValue = Val;
-  }
+  /// transformToPostInc - Transform the expression to post-inc form for the
+  /// given loop.
+  void transformToPostInc(const Loop *L);
 
 private:
   /// Parent - a pointer to the IVUsers that owns this IVStrideUse.
   IVUsers *Parent;
 
-  /// Stride - The stride for this use.
-  const SCEV *Stride;
-
-  /// Offset - The offset to add to the base induction expression.
-  const SCEV *Offset;
-
   /// OperandValToReplace - The Value of the operand in the user instruction
   /// that this IVStrideUse is representing.
   WeakVH OperandValToReplace;
 
-  /// IsUseOfPostIncrementedValue - True if this should use the
-  /// post-incremented version of this IV, not the preincremented version.
-  bool IsUseOfPostIncrementedValue;
+  /// PostIncLoops - The set of loops for which Expr has been adjusted to
+  /// use post-inc mode. This corresponds with SCEVExpander's post-inc concept.
+  PostIncLoopSet PostIncLoops;
 
   /// Deleted - Implementation of CallbackVH virtual function to
   /// receive notification when the User is deleted.
@@ -174,17 +143,16 @@ public:
   /// return true.  Otherwise, return false.
   bool AddUsersIfInteresting(Instruction *I);
 
-  IVStrideUse &AddUser(const SCEV *Stride, const SCEV *Offset,
-                       Instruction *User, Value *Operand);
+  IVStrideUse &AddUser(Instruction *User, Value *Operand);
 
   /// getReplacementExpr - Return a SCEV expression which computes the
   /// value of the OperandValToReplace of the given IVStrideUse.
-  const SCEV *getReplacementExpr(const IVStrideUse &U) const;
+  const SCEV *getReplacementExpr(const IVStrideUse &IU) const;
+
+  /// getExpr - Return the expression for the use.
+  const SCEV *getExpr(const IVStrideUse &IU) const;
 
-  /// getCanonicalExpr - Return a SCEV expression which computes the
-  /// value of the SCEV of the given IVStrideUse, ignoring the 
-  /// isUseOfPostIncrementedValue flag.
-  const SCEV *getCanonicalExpr(const IVStrideUse &U) const;
+  const SCEV *getStride(const IVStrideUse &IU, const Loop *L) const;
 
   typedef ilist<IVStrideUse>::iterator iterator;
   typedef ilist<IVStrideUse>::const_iterator const_iterator;
diff --git a/include/llvm/Analysis/InlineCost.h b/include/llvm/Analysis/InlineCost.h
index a0c521d7ed3b..d51dd6cbd6fd 100644
--- a/include/llvm/Analysis/InlineCost.h
+++ b/include/llvm/Analysis/InlineCost.h
@@ -1,4 +1,4 @@
-//===- InlineCost.cpp - Cost analysis for inliner ---------------*- C++ -*-===//
+//===- InlineCost.h - Cost analysis for inliner -----------------*- C++ -*-===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -16,9 +16,9 @@
 
 #include <cassert>
 #include <climits>
-#include <map>
 #include <vector>
 #include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/ValueMap.h"
 
 namespace llvm {
 
@@ -165,7 +165,9 @@ namespace llvm {
       void analyzeFunction(Function *F);
     };
 
-    std::map<const Function *, FunctionInfo> CachedFunctionInfo;
+    // The Function* for a function can be changed (by ArgumentPromotion);
+    // the ValueMap will update itself when this happens.
+    ValueMap<const Function *, FunctionInfo> CachedFunctionInfo;
 
   public:
 
@@ -174,6 +176,14 @@ namespace llvm {
     ///
     InlineCost getInlineCost(CallSite CS,
                              SmallPtrSet<const Function *, 16> &NeverInline);
+    /// getCalledFunction - The heuristic used to determine if we should inline
+    /// the function call or not.  The callee is explicitly specified, to allow
+    /// you to calculate the cost of inlining a function via a pointer.  The
+    /// result assumes that the inlined version will always be used.  You should
+    /// weight it yourself in cases where this callee will not always be called.
+    InlineCost getInlineCost(CallSite CS,
+                             Function *Callee,
+                             SmallPtrSet<const Function *, 16> &NeverInline);
 
     /// getInlineFudgeFactor - Return a > 1.0 factor if the inliner should use a
     /// higher threshold to determine if the function call should be inlined.
@@ -189,6 +199,10 @@ namespace llvm {
     /// eliminated.
     void growCachedCostInfo(Function* Caller, Function* Callee);
   };
+
+  /// callIsSmall - If a call is likely to lower to a single target instruction,
+  /// or is otherwise deemed small return true.
+  bool callIsSmall(const Function *Callee);
 }
 
 #endif
diff --git a/include/llvm/Analysis/InstructionSimplify.h b/include/llvm/Analysis/InstructionSimplify.h
index 13314e6ea0e5..f47e740a741f 100644
--- a/include/llvm/Analysis/InstructionSimplify.h
+++ b/include/llvm/Analysis/InstructionSimplify.h
@@ -46,6 +46,10 @@ namespace llvm {
   Value *SimplifyFCmpInst(unsigned Predicate, Value *LHS, Value *RHS,
                           const TargetData *TD = 0);
   
+  /// SimplifySelectInst - Given operands for a SelectInst, see if we can fold
+  /// the result.  If not, this returns null.
+  Value *SimplifySelectInst(Value *Cond, Value *TrueVal, Value *FalseVal,
+                            const TargetData *TD = 0);
 
   /// SimplifyGEPInst - Given operands for an GetElementPtrInst, see if we can
   /// fold the result.  If not, this returns null.
diff --git a/include/llvm/Analysis/Lint.h b/include/llvm/Analysis/Lint.h
new file mode 100644
index 000000000000..2f0136608d3c
--- /dev/null
+++ b/include/llvm/Analysis/Lint.h
@@ -0,0 +1,52 @@
+//===-- llvm/Analysis/Lint.h - LLVM IR Lint ---------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines lint interfaces that can be used for some sanity checking
+// of input to the system, and for checking that transformations
+// haven't done something bad. In contrast to the Verifier, the Lint checker
+// checks for undefined behavior or constructions with likely unintended
+// behavior.
+//
+// To see what specifically is checked, look at Lint.cpp
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_ANALYSIS_LINT_H
+#define LLVM_ANALYSIS_LINT_H
+
+#include <string>
+
+namespace llvm {
+
+class FunctionPass;
+class Module;
+class Function;
+
+/// @brief Create a lint pass.
+///
+/// Check a module or function.
+FunctionPass *createLintPass();
+
+/// @brief Check a module.
+///
+/// This should only be used for debugging, because it plays games with
+/// PassManagers and stuff.
+void lintModule(
+  const Module &M,  ///< The module to be checked
+  std::string *ErrorInfo = 0      ///< Information about failures.
+);
+
+// lintFunction - Check a function.
+void lintFunction(
+  const Function &F  ///< The function to be checked
+);
+
+} // End llvm namespace
+
+#endif
diff --git a/include/llvm/Analysis/PointerTracking.h b/include/llvm/Analysis/PointerTracking.h
index a14bbf0290ea..6c4f838430b6 100644
--- a/include/llvm/Analysis/PointerTracking.h
+++ b/include/llvm/Analysis/PointerTracking.h
@@ -27,7 +27,7 @@
 #ifndef LLVM_ANALYSIS_POINTERTRACKING_H
 #define LLVM_ANALYSIS_POINTERTRACKING_H
 
-#include "llvm/ADT/SmallSet.h"
+#include "llvm/ADT/SmallPtrSet.h"
 #include "llvm/Analysis/Dominators.h"
 #include "llvm/Instructions.h"
 #include "llvm/Pass.h"
diff --git a/include/llvm/Analysis/ScalarEvolution.h b/include/llvm/Analysis/ScalarEvolution.h
index ab13a9dfa4e5..d3a8d8f4fe51 100644
--- a/include/llvm/Analysis/ScalarEvolution.h
+++ b/include/llvm/Analysis/ScalarEvolution.h
@@ -351,7 +351,8 @@ namespace llvm {
     /// (which may not be an immediate predecessor) which has exactly one
     /// successor from which BB is reachable, or null if no such block is
     /// found.
-    BasicBlock* getPredecessorWithUniqueSuccessorForBB(BasicBlock *BB);
+    std::pair<BasicBlock *, BasicBlock *>
+    getPredecessorWithUniqueSuccessorForBB(BasicBlock *BB);
 
     /// isImpliedCond - Test whether the condition described by Pred, LHS,
     /// and RHS is true whenever the given Cond value evaluates to true.
@@ -380,6 +381,13 @@ namespace llvm {
     Constant *getConstantEvolutionLoopExitValue(PHINode *PN, const APInt& BEs,
                                                 const Loop *L);
 
+    /// isKnownPredicateWithRanges - Test if the given expression is known to
+    /// satisfy the condition described by Pred and the known constant ranges
+    /// of LHS and RHS.
+    ///
+    bool isKnownPredicateWithRanges(ICmpInst::Predicate Pred,
+                                    const SCEV *LHS, const SCEV *RHS);
+
   public:
     static char ID; // Pass identification, replacement for typeid
     ScalarEvolution();
@@ -554,11 +562,11 @@ namespace llvm {
     /// getSCEVAtScope(getSCEV(V), L).
     const SCEV *getSCEVAtScope(Value *V, const Loop *L);
 
-    /// isLoopGuardedByCond - Test whether entry to the loop is protected by
-    /// a conditional between LHS and RHS.  This is used to help avoid max
+    /// isLoopEntryGuardedByCond - Test whether entry to the loop is protected
+    /// by a conditional between LHS and RHS.  This is used to help avoid max
     /// expressions in loop trip counts, and to eliminate casts.
-    bool isLoopGuardedByCond(const Loop *L, ICmpInst::Predicate Pred,
-                             const SCEV *LHS, const SCEV *RHS);
+    bool isLoopEntryGuardedByCond(const Loop *L, ICmpInst::Predicate Pred,
+                                  const SCEV *LHS, const SCEV *RHS);
 
     /// isLoopBackedgeGuardedByCond - Test whether the backedge of the loop is
     /// protected by a conditional between LHS and RHS.  This is used to
@@ -636,12 +644,21 @@ namespace llvm {
     ///
     bool isKnownNonZero(const SCEV *S);
 
-    /// isKnownNonZero - Test if the given expression is known to satisfy
+    /// isKnownPredicate - Test if the given expression is known to satisfy
     /// the condition described by Pred, LHS, and RHS.
     ///
     bool isKnownPredicate(ICmpInst::Predicate Pred,
                           const SCEV *LHS, const SCEV *RHS);
 
+    /// SimplifyICmpOperands - Simplify LHS and RHS in a comparison with
+    /// predicate Pred. Return true iff any changes were made. If the
+    /// operands are provably equal or inequal, LHS and RHS are set to
+    /// the same value and Pred is set to either ICMP_EQ or ICMP_NE.
+    ///
+    bool SimplifyICmpOperands(ICmpInst::Predicate &Pred,
+                              const SCEV *&LHS,
+                              const SCEV *&RHS);
+
     virtual bool runOnFunction(Function &F);
     virtual void releaseMemory();
     virtual void getAnalysisUsage(AnalysisUsage &AU) const;
diff --git a/include/llvm/Analysis/ScalarEvolutionExpander.h b/include/llvm/Analysis/ScalarEvolutionExpander.h
index dc9b73bd566d..baf6946b8cf8 100644
--- a/include/llvm/Analysis/ScalarEvolutionExpander.h
+++ b/include/llvm/Analysis/ScalarEvolutionExpander.h
@@ -15,6 +15,7 @@
 #define LLVM_ANALYSIS_SCALAREVOLUTION_EXPANDER_H
 
 #include "llvm/Analysis/ScalarEvolutionExpressions.h"
+#include "llvm/Analysis/ScalarEvolutionNormalization.h"
 #include "llvm/Support/IRBuilder.h"
 #include "llvm/Support/TargetFolder.h"
 #include <set>
@@ -32,12 +33,12 @@ namespace llvm {
       InsertedExpressions;
     std::set<Value*> InsertedValues;
 
-    /// PostIncLoop - When non-null, expanded addrecs referring to the given
-    /// loop expanded in post-inc mode. For example, expanding {1,+,1}<L> in
-    /// post-inc mode returns the add instruction that adds one to the phi
-    /// for {0,+,1}<L>, as opposed to a new phi starting at 1. This is only
-    /// supported in non-canonical mode.
-    const Loop *PostIncLoop;
+    /// PostIncLoops - Addrecs referring to any of the given loops are expanded
+    /// in post-inc mode. For example, expanding {1,+,1}<L> in post-inc mode
+    /// returns the add instruction that adds one to the phi for {0,+,1}<L>,
+    /// as opposed to a new phi starting at 1. This is only supported in
+    /// non-canonical mode.
+    PostIncLoopSet PostIncLoops;
 
     /// IVIncInsertPos - When this is non-null, addrecs expanded in the
     /// loop it indicates should be inserted with increments at
@@ -62,7 +63,7 @@ namespace llvm {
   public:
     /// SCEVExpander - Construct a SCEVExpander in "canonical" mode.
     explicit SCEVExpander(ScalarEvolution &se)
-      : SE(se), PostIncLoop(0), IVIncInsertLoop(0), CanonicalMode(true),
+      : SE(se), IVIncInsertLoop(0), CanonicalMode(true),
         Builder(se.getContext(), TargetFolder(se.TD)) {}
 
     /// clear - Erase the contents of the InsertedExpressions map so that users
@@ -89,14 +90,18 @@ namespace llvm {
       IVIncInsertPos = Pos;
     }
 
-    /// setPostInc - If L is non-null, enable post-inc expansion for addrecs
-    /// referring to the given loop. If L is null, disable post-inc expansion
-    /// completely. Post-inc expansion is only supported in non-canonical
+    /// setPostInc - Enable post-inc expansion for addrecs referring to the
+    /// given loops. Post-inc expansion is only supported in non-canonical
     /// mode.
-    void setPostInc(const Loop *L) {
+    void setPostInc(const PostIncLoopSet &L) {
       assert(!CanonicalMode &&
              "Post-inc expansion is not supported in CanonicalMode");
-      PostIncLoop = L;
+      PostIncLoops = L;
+    }
+
+    /// clearPostInc - Disable all post-inc expansion.
+    void clearPostInc() {
+      PostIncLoops.clear();
     }
 
     /// disableCanonicalMode - Disable the behavior of expanding expressions in
diff --git a/include/llvm/Analysis/ScalarEvolutionNormalization.h b/include/llvm/Analysis/ScalarEvolutionNormalization.h
new file mode 100644
index 000000000000..342e5937891a
--- /dev/null
+++ b/include/llvm/Analysis/ScalarEvolutionNormalization.h
@@ -0,0 +1,78 @@
+//===- llvm/Analysis/ScalarEvolutionNormalization.h - See below -*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines utilities for working with "normalized" ScalarEvolution
+// expressions.
+//
+// The following example illustrates post-increment uses and how normalized
+// expressions help.
+//
+//   for (i=0; i!=n; ++i) {
+//     ...
+//   }
+//   use(i);
+//
+// While the expression for most uses of i inside the loop is {0,+,1}<%L>, the
+// expression for the use of i outside the loop is {1,+,1}<%L>, since i is
+// incremented at the end of the loop body. This is inconveient, since it
+// suggests that we need two different induction variables, one that starts
+// at 0 and one that starts at 1. We'd prefer to be able to think of these as
+// the same induction variable, with uses inside the loop using the
+// "pre-incremented" value, and uses after the loop using the
+// "post-incremented" value.
+//
+// Expressions for post-incremented uses are represented as an expression
+// paired with a set of loops for which the expression is in "post-increment"
+// mode (there may be multiple loops).
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_ANALYSIS_SCALAREVOLUTION_NORMALIZATION_H
+#define LLVM_ANALYSIS_SCALAREVOLUTION_NORMALIZATION_H
+
+#include "llvm/ADT/SmallPtrSet.h"
+
+namespace llvm {
+
+class Instruction;
+class DominatorTree;
+class Loop;
+class ScalarEvolution;
+class SCEV;
+class Value;
+
+/// TransformKind - Different types of transformations that
+/// TransformForPostIncUse can do.
+enum TransformKind {
+  /// Normalize - Normalize according to the given loops.
+  Normalize,
+  /// NormalizeAutodetect - Detect post-inc opportunities on new expressions,
+  /// update the given loop set, and normalize.
+  NormalizeAutodetect,
+  /// Denormalize - Perform the inverse transform on the expression with the
+  /// given loop set.
+  Denormalize
+};
+
+/// PostIncLoopSet - A set of loops.
+typedef SmallPtrSet<const Loop *, 2> PostIncLoopSet;
+
+/// TransformForPostIncUse - Transform the given expression according to the
+/// given transformation kind.
+const SCEV *TransformForPostIncUse(TransformKind Kind,
+                                   const SCEV *S,
+                                   Instruction *User,
+                                   Value *OperandValToReplace,
+                                   PostIncLoopSet &Loops,
+                                   ScalarEvolution &SE,
+                                   DominatorTree &DT);
+
+}
+
+#endif
diff --git a/include/llvm/Analysis/ValueTracking.h b/include/llvm/Analysis/ValueTracking.h
index 0791b7bd4c14..d58089748413 100644
--- a/include/llvm/Analysis/ValueTracking.h
+++ b/include/llvm/Analysis/ValueTracking.h
@@ -122,7 +122,8 @@ namespace llvm {
   /// StopAtNul is set to true (the default), the returned string is truncated
   /// by a nul character in the global.  If StopAtNul is false, the nul
   /// character is included in the result string.
-  bool GetConstantStringInfo(Value *V, std::string &Str, uint64_t Offset = 0,
+  bool GetConstantStringInfo(const Value *V, std::string &Str,
+                             uint64_t Offset = 0,
                              bool StopAtNul = true);
                         
   /// GetStringLength - If we can compute the length of the string pointed to by
diff --git a/include/llvm/Analysis/Verifier.h b/include/llvm/Analysis/Verifier.h
index a6b2a6df21af..ce8aeef07645 100644
--- a/include/llvm/Analysis/Verifier.h
+++ b/include/llvm/Analysis/Verifier.h
@@ -1,4 +1,4 @@
-//===-- llvm/Analysis/Verifier.h - Module Verifier --------------*- C++ -*-===//
+//===-- llvm/Analysis/Verifier.h - LLVM IR Verifier -------------*- C++ -*-===//
 //
 //                     The LLVM Compiler Infrastructure
 //
diff --git a/include/llvm/Bitcode/Archive.h b/include/llvm/Bitcode/Archive.h
index 67f2a4a999b8..83a37585fa17 100644
--- a/include/llvm/Bitcode/Archive.h
+++ b/include/llvm/Bitcode/Archive.h
@@ -107,7 +107,7 @@ class ArchiveMember : public ilist_node<ArchiveMember> {
     /// into memory, the return value will be null.
     /// @returns a pointer to the member's data.
     /// @brief Get the data content of the archive member
-    const void* getData() const { return data; }
+    const char* getData() const { return data; }
 
     /// This method determines if the member is a regular compressed file.
     /// @returns true iff the archive member is a compressed regular file.
@@ -172,7 +172,7 @@ class ArchiveMember : public ilist_node<ArchiveMember> {
     sys::PathWithStatus path;     ///< Path of file containing the member
     sys::FileStatus     info;     ///< Status info (size,mode,date)
     unsigned            flags;    ///< Flags about the archive member
-    const void*         data;     ///< Data for the member
+    const char*         data;     ///< Data for the member
 
   /// @}
   /// @name Constructors
diff --git a/include/llvm/CallGraphSCCPass.h b/include/llvm/CallGraphSCCPass.h
index 37a454e07aab..e11b9677c74a 100644
--- a/include/llvm/CallGraphSCCPass.h
+++ b/include/llvm/CallGraphSCCPass.h
@@ -29,9 +29,10 @@ namespace llvm {
 class CallGraphNode;
 class CallGraph;
 class PMStack;
-
-struct CallGraphSCCPass : public Pass {
-
+class CallGraphSCC;
+  
+class CallGraphSCCPass : public Pass {
+public:
   explicit CallGraphSCCPass(intptr_t pid) : Pass(PT_CallGraphSCC, pid) {}
   explicit CallGraphSCCPass(void *pid) : Pass(PT_CallGraphSCC, pid) {}
 
@@ -53,7 +54,7 @@ struct CallGraphSCCPass : public Pass {
   /// SCC passes that add or delete functions to the SCC are required to update
   /// the SCC list, otherwise stale pointers may be dereferenced.
   ///
-  virtual bool runOnSCC(std::vector<CallGraphNode *> &SCC) = 0;
+  virtual bool runOnSCC(CallGraphSCC &SCC) = 0;
 
   /// doFinalization - This method is called after the SCC's of the program has
   /// been processed, allowing the pass to do final cleanup as necessary.
@@ -63,7 +64,7 @@ struct CallGraphSCCPass : public Pass {
 
   /// Assign pass manager to manager this pass
   virtual void assignPassManager(PMStack &PMS,
-                                 PassManagerType PMT = PMT_CallGraphPassManager);
+                                 PassManagerType PMT =PMT_CallGraphPassManager);
 
   ///  Return what kind of Pass Manager can manage this pass.
   virtual PassManagerType getPotentialPassManagerType() const {
@@ -76,6 +77,29 @@ struct CallGraphSCCPass : public Pass {
   virtual void getAnalysisUsage(AnalysisUsage &Info) const;
 };
 
+/// CallGraphSCC - This is a single SCC that a CallGraphSCCPass is run on. 
+class CallGraphSCC {
+  void *Context; // The CGPassManager object that is vending this.
+  std::vector<CallGraphNode*> Nodes;
+public:
+  CallGraphSCC(void *context) : Context(context) {}
+  
+  void initialize(CallGraphNode*const*I, CallGraphNode*const*E) {
+    Nodes.assign(I, E);
+  }
+  
+  bool isSingular() const { return Nodes.size() == 1; }
+  unsigned size() const { return Nodes.size(); }
+  
+  /// ReplaceNode - This informs the SCC and the pass manager that the specified
+  /// Old node has been deleted, and New is to be used in its place.
+  void ReplaceNode(CallGraphNode *Old, CallGraphNode *New);
+  
+  typedef std::vector<CallGraphNode*>::const_iterator iterator;
+  iterator begin() const { return Nodes.begin(); }
+  iterator end() const { return Nodes.end(); }
+};
+
 } // End llvm namespace
 
 #endif
diff --git a/include/llvm/CodeGen/Analysis.h b/include/llvm/CodeGen/Analysis.h
new file mode 100644
index 000000000000..f33a9dbcae73
--- /dev/null
+++ b/include/llvm/CodeGen/Analysis.h
@@ -0,0 +1,80 @@
+//===- CodeGen/Analysis.h - CodeGen LLVM IR Analysis Utilities --*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file declares several CodeGen-specific LLVM IR analysis utilties.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CODEGEN_ANALYSIS_H
+#define LLVM_CODEGEN_ANALYSIS_H
+
+#include "llvm/Instructions.h"
+#include "llvm/InlineAsm.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/CodeGen/ValueTypes.h"
+#include "llvm/CodeGen/ISDOpcodes.h"
+#include "llvm/Support/CallSite.h"
+
+namespace llvm {
+
+class TargetLowering;
+class GlobalVariable;
+
+/// ComputeLinearIndex - Given an LLVM IR aggregate type and a sequence
+/// of insertvalue or extractvalue indices that identify a member, return
+/// the linearized index of the start of the member.
+///
+unsigned ComputeLinearIndex(const TargetLowering &TLI, const Type *Ty,
+                            const unsigned *Indices,
+                            const unsigned *IndicesEnd,
+                            unsigned CurIndex = 0);
+
+/// ComputeValueVTs - Given an LLVM IR type, compute a sequence of
+/// EVTs that represent all the individual underlying
+/// non-aggregate types that comprise it.
+///
+/// If Offsets is non-null, it points to a vector to be filled in
+/// with the in-memory offsets of each of the individual values.
+///
+void ComputeValueVTs(const TargetLowering &TLI, const Type *Ty,
+                     SmallVectorImpl<EVT> &ValueVTs,
+                     SmallVectorImpl<uint64_t> *Offsets = 0,
+                     uint64_t StartingOffset = 0);
+
+/// ExtractTypeInfo - Returns the type info, possibly bitcast, encoded in V.
+GlobalVariable *ExtractTypeInfo(Value *V);
+
+/// hasInlineAsmMemConstraint - Return true if the inline asm instruction being
+/// processed uses a memory 'm' constraint.
+bool hasInlineAsmMemConstraint(std::vector<InlineAsm::ConstraintInfo> &CInfos,
+                               const TargetLowering &TLI);
+
+/// getFCmpCondCode - Return the ISD condition code corresponding to
+/// the given LLVM IR floating-point condition code.  This includes
+/// consideration of global floating-point math flags.
+///
+ISD::CondCode getFCmpCondCode(FCmpInst::Predicate Pred);
+
+/// getICmpCondCode - Return the ISD condition code corresponding to
+/// the given LLVM IR integer condition code.
+///
+ISD::CondCode getICmpCondCode(ICmpInst::Predicate Pred);
+
+/// Test if the given instruction is in a position to be optimized
+/// with a tail-call. This roughly means that it's in a block with
+/// a return and there's nothing that needs to be scheduled
+/// between it and the return.
+///
+/// This function only tests target-independent requirements.
+bool isInTailCallPosition(ImmutableCallSite CS, Attributes CalleeRetAttr,
+                          const TargetLowering &TLI);
+
+} // End llvm namespace
+
+#endif
diff --git a/include/llvm/CodeGen/AsmPrinter.h b/include/llvm/CodeGen/AsmPrinter.h
index 2eff50123a23..243ddbb5da34 100644
--- a/include/llvm/CodeGen/AsmPrinter.h
+++ b/include/llvm/CodeGen/AsmPrinter.h
@@ -34,6 +34,7 @@ namespace llvm {
   class MachineBasicBlock;
   class MachineFunction;
   class MachineInstr;
+  class MachineLocation;
   class MachineLoopInfo;
   class MachineLoop;
   class MachineConstantPool;
@@ -129,7 +130,7 @@ namespace llvm {
     unsigned getFunctionNumber() const;
     
     /// getObjFileLowering - Return information about object file lowering.
-    TargetLoweringObjectFile &getObjFileLowering() const;
+    const TargetLoweringObjectFile &getObjFileLowering() const;
 
     /// getTargetData - Return information about data layout.
     const TargetData &getTargetData() const;
@@ -203,29 +204,16 @@ namespace llvm {
     /// EmitAlignment - Emit an alignment directive to the specified power of
     /// two boundary.  For example, if you pass in 3 here, you will get an 8
     /// byte alignment.  If a global value is specified, and if that global has
-    /// an explicit alignment requested, it will unconditionally override the
-    /// alignment request.  However, if ForcedAlignBits is specified, this value
-    /// has final say: the ultimate alignment will be the max of ForcedAlignBits
-    /// and the alignment computed with NumBits and the global.  If UseFillExpr
-    /// is true, it also emits an optional second value FillValue which the
-    /// assembler uses to fill gaps to match alignment for text sections if the
-    /// has specified a non-zero fill value.
+    /// an explicit alignment requested, it will override the alignment request
+    /// if required for correctness.
     ///
-    /// The algorithm is:
-    ///     Align = NumBits;
-    ///     if (GV && GV->hasalignment) Align = GV->getalignment();
-    ///     Align = std::max(Align, ForcedAlignBits);
-    ///
-    void EmitAlignment(unsigned NumBits, const GlobalValue *GV = 0,
-                       unsigned ForcedAlignBits = 0,
-                       bool UseFillExpr = true) const;
+    void EmitAlignment(unsigned NumBits, const GlobalValue *GV = 0) const;
     
     /// EmitBasicBlockStart - This method prints the label for the specified
     /// MachineBasicBlock, an alignment (if present) and a comment describing
     /// it if appropriate.
     void EmitBasicBlockStart(const MachineBasicBlock *MBB) const;
     
-    
     /// EmitGlobalConstant - Print a general LLVM constant to the .s file.
     void EmitGlobalConstant(const Constant *CV, unsigned AddrSpace = 0);
     
@@ -333,6 +321,12 @@ namespace llvm {
     void EmitLabelDifference(const MCSymbol *Hi, const MCSymbol *Lo,
                              unsigned Size) const;
     
+    /// EmitLabelOffsetDifference - Emit something like ".long Hi+Offset-Lo" 
+    /// where the size in bytes of the directive is specified by Size and Hi/Lo
+    /// specify the labels.  This implicitly uses .set if it is available.
+    void EmitLabelOffsetDifference(const MCSymbol *Hi, uint64_t Offset,
+                                   const MCSymbol *Lo, unsigned Size) const;
+    
     //===------------------------------------------------------------------===//
     // Dwarf Emission Helper Routines
     //===------------------------------------------------------------------===//
@@ -370,7 +364,11 @@ namespace llvm {
     /// that Label lives in.
     void EmitSectionOffset(const MCSymbol *Label,
                            const MCSymbol *SectionLabel) const;
-    
+
+    /// getDebugValueLocation - Get location information encoded by DBG_VALUE
+    /// operands.
+    virtual MachineLocation getDebugValueLocation(const MachineInstr *MI) const;
+
     //===------------------------------------------------------------------===//
     // Dwarf Lowering Routines
     //===------------------------------------------------------------------===//
diff --git a/include/llvm/CodeGen/FastISel.h b/include/llvm/CodeGen/FastISel.h
index 8d336654411c..5a2b0e7c765f 100644
--- a/include/llvm/CodeGen/FastISel.h
+++ b/include/llvm/CodeGen/FastISel.h
@@ -15,8 +15,10 @@
 #define LLVM_CODEGEN_FASTISEL_H
 
 #include "llvm/ADT/DenseMap.h"
+#ifndef NDEBUG
 #include "llvm/ADT/SmallSet.h"
-#include "llvm/CodeGen/SelectionDAGNodes.h"
+#endif
+#include "llvm/CodeGen/ValueTypes.h"
 
 namespace llvm {
 
@@ -26,6 +28,7 @@ class Instruction;
 class MachineBasicBlock;
 class MachineConstantPool;
 class MachineFunction;
+class MachineInstr;
 class MachineFrameInfo;
 class MachineRegisterInfo;
 class TargetData;
@@ -44,8 +47,9 @@ protected:
   DenseMap<const Value *, unsigned> &ValueMap;
   DenseMap<const BasicBlock *, MachineBasicBlock *> &MBBMap;
   DenseMap<const AllocaInst *, int> &StaticAllocaMap;
+  std::vector<std::pair<MachineInstr*, unsigned> > &PHINodesToUpdate;
 #ifndef NDEBUG
-  SmallSet<Instruction*, 8> &CatchInfoLost;
+  SmallSet<const Instruction *, 8> &CatchInfoLost;
 #endif
   MachineFunction &MF;
   MachineRegisterInfo &MRI;
@@ -73,11 +77,6 @@ public:
     MBB = mbb;
   }
 
-  /// setCurDebugLoc - Set the current debug location information, which is used
-  /// when creating a machine instruction.
-  ///
-  void setCurDebugLoc(DebugLoc dl) { DL = dl; }
-
   /// getCurDebugLoc() - Return current debug location information.
   DebugLoc getCurDebugLoc() const { return DL; }
 
@@ -85,28 +84,28 @@ public:
   /// LLVM IR instruction, and append generated machine instructions to
   /// the current block. Return true if selection was successful.
   ///
-  bool SelectInstruction(Instruction *I);
+  bool SelectInstruction(const Instruction *I);
 
   /// SelectOperator - Do "fast" instruction selection for the given
   /// LLVM IR operator (Instruction or ConstantExpr), and append
   /// generated machine instructions to the current block. Return true
   /// if selection was successful.
   ///
-  bool SelectOperator(User *I, unsigned Opcode);
+  bool SelectOperator(const User *I, unsigned Opcode);
 
   /// getRegForValue - Create a virtual register and arrange for it to
   /// be assigned the value for the given LLVM value.
-  unsigned getRegForValue(Value *V);
+  unsigned getRegForValue(const Value *V);
 
   /// lookUpRegForValue - Look up the value to see if its value is already
   /// cached in a register. It may be defined by instructions across blocks or
   /// defined locally.
-  unsigned lookUpRegForValue(Value *V);
+  unsigned lookUpRegForValue(const Value *V);
 
   /// getRegForGEPIndex - This is a wrapper around getRegForValue that also
   /// takes care of truncating or sign-extending the given getelementptr
   /// index value.
-  unsigned getRegForGEPIndex(Value *V);
+  unsigned getRegForGEPIndex(const Value *V);
 
   virtual ~FastISel();
 
@@ -114,9 +113,10 @@ protected:
   FastISel(MachineFunction &mf,
            DenseMap<const Value *, unsigned> &vm,
            DenseMap<const BasicBlock *, MachineBasicBlock *> &bm,
-           DenseMap<const AllocaInst *, int> &am
+           DenseMap<const AllocaInst *, int> &am,
+           std::vector<std::pair<MachineInstr*, unsigned> > &PHINodesToUpdate
 #ifndef NDEBUG
-           , SmallSet<Instruction*, 8> &cil
+           , SmallSet<const Instruction *, 8> &cil
 #endif
            );
 
@@ -126,7 +126,7 @@ protected:
   /// fit into FastISel's framework. It returns true if it was successful.
   ///
   virtual bool
-  TargetSelectInstruction(Instruction *I) = 0;
+  TargetSelectInstruction(const Instruction *I) = 0;
 
   /// FastEmit_r - This method is called by target-independent code
   /// to request that an instruction with the given type and opcode
@@ -168,7 +168,7 @@ protected:
   virtual unsigned FastEmit_rf(MVT VT,
                                MVT RetVT,
                                unsigned Opcode,
-                               unsigned Op0, ConstantFP *FPImm);
+                               unsigned Op0, const ConstantFP *FPImm);
 
   /// FastEmit_rri - This method is called by target-independent code
   /// to request that an instruction with the given type, opcode, and
@@ -194,7 +194,7 @@ protected:
   /// FastEmit_rr instead.
   unsigned FastEmit_rf_(MVT VT,
                         unsigned Opcode,
-                        unsigned Op0, ConstantFP *FPImm,
+                        unsigned Op0, const ConstantFP *FPImm,
                         MVT ImmType);
   
   /// FastEmit_i - This method is called by target-independent code
@@ -211,7 +211,7 @@ protected:
   virtual unsigned FastEmit_f(MVT VT,
                               MVT RetVT,
                               unsigned Opcode,
-                              ConstantFP *FPImm);
+                              const ConstantFP *FPImm);
 
   /// FastEmitInst_ - Emit a MachineInstr with no operands and a
   /// result register in the given register class.
@@ -245,7 +245,7 @@ protected:
   ///
   unsigned FastEmitInst_rf(unsigned MachineInstOpcode,
                            const TargetRegisterClass *RC,
-                           unsigned Op0, ConstantFP *FPImm);
+                           unsigned Op0, const ConstantFP *FPImm);
 
   /// FastEmitInst_rri - Emit a MachineInstr with two register operands,
   /// an immediate, and a result register in the given register class.
@@ -275,34 +275,47 @@ protected:
   /// the CFG.
   void FastEmitBranch(MachineBasicBlock *MBB);
 
-  unsigned UpdateValueMap(Value* I, unsigned Reg);
+  unsigned UpdateValueMap(const Value* I, unsigned Reg);
 
   unsigned createResultReg(const TargetRegisterClass *RC);
   
   /// TargetMaterializeConstant - Emit a constant in a register using 
   /// target-specific logic, such as constant pool loads.
-  virtual unsigned TargetMaterializeConstant(Constant* C) {
+  virtual unsigned TargetMaterializeConstant(const Constant* C) {
     return 0;
   }
 
   /// TargetMaterializeAlloca - Emit an alloca address in a register using
   /// target-specific logic.
-  virtual unsigned TargetMaterializeAlloca(AllocaInst* C) {
+  virtual unsigned TargetMaterializeAlloca(const AllocaInst* C) {
     return 0;
   }
 
 private:
-  bool SelectBinaryOp(User *I, unsigned ISDOpcode);
+  bool SelectBinaryOp(const User *I, unsigned ISDOpcode);
 
-  bool SelectFNeg(User *I);
+  bool SelectFNeg(const User *I);
 
-  bool SelectGetElementPtr(User *I);
+  bool SelectGetElementPtr(const User *I);
 
-  bool SelectCall(User *I);
+  bool SelectCall(const User *I);
 
-  bool SelectBitCast(User *I);
+  bool SelectBitCast(const User *I);
   
-  bool SelectCast(User *I, unsigned Opcode);
+  bool SelectCast(const User *I, unsigned Opcode);
+
+  /// HandlePHINodesInSuccessorBlocks - Handle PHI nodes in successor blocks.
+  /// Emit code to ensure constants are copied into registers when needed.
+  /// Remember the virtual registers that need to be added to the Machine PHI
+  /// nodes as input.  We cannot just directly add them, because expansion
+  /// might result in multiple MBB's for one BB.  As such, the start of the
+  /// BB might correspond to a different MBB than the end.
+  bool HandlePHINodesInSuccessorBlocks(const BasicBlock *LLVMBB);
+
+  /// materializeRegForValue - Helper for getRegForVale. This function is
+  /// called when the value isn't already available in a register and must
+  /// be materialized with new instructions.
+  unsigned materializeRegForValue(const Value *V, MVT VT);
 };
 
 }
diff --git a/include/llvm/CodeGen/GCMetadata.h b/include/llvm/CodeGen/GCMetadata.h
index 783f636740c2..6de69cd82907 100644
--- a/include/llvm/CodeGen/GCMetadata.h
+++ b/include/llvm/CodeGen/GCMetadata.h
@@ -68,9 +68,9 @@ namespace llvm {
   struct GCRoot {
     int Num;            //< Usually a frame index.
     int StackOffset;    //< Offset from the stack pointer.
-    Constant *Metadata; //< Metadata straight from the call to llvm.gcroot.
+    const Constant *Metadata;//< Metadata straight from the call to llvm.gcroot.
     
-    GCRoot(int N, Constant *MD) : Num(N), StackOffset(-1), Metadata(MD) {}
+    GCRoot(int N, const Constant *MD) : Num(N), StackOffset(-1), Metadata(MD) {}
   };
   
   
@@ -114,7 +114,7 @@ namespace llvm {
     /// addStackRoot - Registers a root that lives on the stack. Num is the
     ///                stack object ID for the alloca (if the code generator is
     //                 using  MachineFrameInfo).
-    void addStackRoot(int Num, Constant *Metadata) {
+    void addStackRoot(int Num, const Constant *Metadata) {
       Roots.push_back(GCRoot(Num, Metadata));
     }
     
diff --git a/include/llvm/CodeGen/ISDOpcodes.h b/include/llvm/CodeGen/ISDOpcodes.h
new file mode 100644
index 000000000000..ba554d335d88
--- /dev/null
+++ b/include/llvm/CodeGen/ISDOpcodes.h
@@ -0,0 +1,765 @@
+//===-- llvm/CodeGen/ISDOpcodes.h - CodeGen opcodes -------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file declares codegen opcodes and related utilities.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CODEGEN_ISDOPCODES_H
+#define LLVM_CODEGEN_ISDOPCODES_H
+
+namespace llvm {
+
+/// ISD namespace - This namespace contains an enum which represents all of the
+/// SelectionDAG node types and value types.
+///
+namespace ISD {
+
+  //===--------------------------------------------------------------------===//
+  /// ISD::NodeType enum - This enum defines the target-independent operators
+  /// for a SelectionDAG.
+  ///
+  /// Targets may also define target-dependent operator codes for SDNodes. For
+  /// example, on x86, these are the enum values in the X86ISD namespace.
+  /// Targets should aim to use target-independent operators to model their
+  /// instruction sets as much as possible, and only use target-dependent
+  /// operators when they have special requirements.
+  ///
+  /// Finally, during and after selection proper, SNodes may use special
+  /// operator codes that correspond directly with MachineInstr opcodes. These
+  /// are used to represent selected instructions. See the isMachineOpcode()
+  /// and getMachineOpcode() member functions of SDNode.
+  ///
+  enum NodeType {
+    // DELETED_NODE - This is an illegal value that is used to catch
+    // errors.  This opcode is not a legal opcode for any node.
+    DELETED_NODE,
+
+    // EntryToken - This is the marker used to indicate the start of the region.
+    EntryToken,
+
+    // TokenFactor - This node takes multiple tokens as input and produces a
+    // single token result.  This is used to represent the fact that the operand
+    // operators are independent of each other.
+    TokenFactor,
+
+    // AssertSext, AssertZext - These nodes record if a register contains a
+    // value that has already been zero or sign extended from a narrower type.
+    // These nodes take two operands.  The first is the node that has already
+    // been extended, and the second is a value type node indicating the width
+    // of the extension
+    AssertSext, AssertZext,
+
+    // Various leaf nodes.
+    BasicBlock, VALUETYPE, CONDCODE, Register,
+    Constant, ConstantFP,
+    GlobalAddress, GlobalTLSAddress, FrameIndex,
+    JumpTable, ConstantPool, ExternalSymbol, BlockAddress,
+
+    // The address of the GOT
+    GLOBAL_OFFSET_TABLE,
+
+    // FRAMEADDR, RETURNADDR - These nodes represent llvm.frameaddress and
+    // llvm.returnaddress on the DAG.  These nodes take one operand, the index
+    // of the frame or return address to return.  An index of zero corresponds
+    // to the current function's frame or return address, an index of one to the
+    // parent's frame or return address, and so on.
+    FRAMEADDR, RETURNADDR,
+
+    // FRAME_TO_ARGS_OFFSET - This node represents offset from frame pointer to
+    // first (possible) on-stack argument. This is needed for correct stack
+    // adjustment during unwind.
+    FRAME_TO_ARGS_OFFSET,
+
+    // RESULT, OUTCHAIN = EXCEPTIONADDR(INCHAIN) - This node represents the
+    // address of the exception block on entry to an landing pad block.
+    EXCEPTIONADDR,
+
+    // RESULT, OUTCHAIN = LSDAADDR(INCHAIN) - This node represents the
+    // address of the Language Specific Data Area for the enclosing function.
+    LSDAADDR,
+
+    // RESULT, OUTCHAIN = EHSELECTION(INCHAIN, EXCEPTION) - This node represents
+    // the selection index of the exception thrown.
+    EHSELECTION,
+
+    // OUTCHAIN = EH_RETURN(INCHAIN, OFFSET, HANDLER) - This node represents
+    // 'eh_return' gcc dwarf builtin, which is used to return from
+    // exception. The general meaning is: adjust stack by OFFSET and pass
+    // execution to HANDLER. Many platform-related details also :)
+    EH_RETURN,
+
+    // TargetConstant* - Like Constant*, but the DAG does not do any folding,
+    // simplification, or lowering of the constant. They are used for constants
+    // which are known to fit in the immediate fields of their users, or for
+    // carrying magic numbers which are not values which need to be materialized
+    // in registers.
+    TargetConstant,
+    TargetConstantFP,
+
+    // TargetGlobalAddress - Like GlobalAddress, but the DAG does no folding or
+    // anything else with this node, and this is valid in the target-specific
+    // dag, turning into a GlobalAddress operand.
+    TargetGlobalAddress,
+    TargetGlobalTLSAddress,
+    TargetFrameIndex,
+    TargetJumpTable,
+    TargetConstantPool,
+    TargetExternalSymbol,
+    TargetBlockAddress,
+
+    /// RESULT = INTRINSIC_WO_CHAIN(INTRINSICID, arg1, arg2, ...)
+    /// This node represents a target intrinsic function with no side effects.
+    /// The first operand is the ID number of the intrinsic from the
+    /// llvm::Intrinsic namespace.  The operands to the intrinsic follow.  The
+    /// node has returns the result of the intrinsic.
+    INTRINSIC_WO_CHAIN,
+
+    /// RESULT,OUTCHAIN = INTRINSIC_W_CHAIN(INCHAIN, INTRINSICID, arg1, ...)
+    /// This node represents a target intrinsic function with side effects that
+    /// returns a result.  The first operand is a chain pointer.  The second is
+    /// the ID number of the intrinsic from the llvm::Intrinsic namespace.  The
+    /// operands to the intrinsic follow.  The node has two results, the result
+    /// of the intrinsic and an output chain.
+    INTRINSIC_W_CHAIN,
+
+    /// OUTCHAIN = INTRINSIC_VOID(INCHAIN, INTRINSICID, arg1, arg2, ...)
+    /// This node represents a target intrinsic function with side effects that
+    /// does not return a result.  The first operand is a chain pointer.  The
+    /// second is the ID number of the intrinsic from the llvm::Intrinsic
+    /// namespace.  The operands to the intrinsic follow.
+    INTRINSIC_VOID,
+
+    // CopyToReg - This node has three operands: a chain, a register number to
+    // set to this value, and a value.
+    CopyToReg,
+
+    // CopyFromReg - This node indicates that the input value is a virtual or
+    // physical register that is defined outside of the scope of this
+    // SelectionDAG.  The register is available from the RegisterSDNode object.
+    CopyFromReg,
+
+    // UNDEF - An undefined node
+    UNDEF,
+
+    // EXTRACT_ELEMENT - This is used to get the lower or upper (determined by
+    // a Constant, which is required to be operand #1) half of the integer or
+    // float value specified as operand #0.  This is only for use before
+    // legalization, for values that will be broken into multiple registers.
+    EXTRACT_ELEMENT,
+
+    // BUILD_PAIR - This is the opposite of EXTRACT_ELEMENT in some ways.  Given
+    // two values of the same integer value type, this produces a value twice as
+    // big.  Like EXTRACT_ELEMENT, this can only be used before legalization.
+    BUILD_PAIR,
+
+    // MERGE_VALUES - This node takes multiple discrete operands and returns
+    // them all as its individual results.  This nodes has exactly the same
+    // number of inputs and outputs. This node is useful for some pieces of the
+    // code generator that want to think about a single node with multiple
+    // results, not multiple nodes.
+    MERGE_VALUES,
+
+    // Simple integer binary arithmetic operators.
+    ADD, SUB, MUL, SDIV, UDIV, SREM, UREM,
+
+    // SMUL_LOHI/UMUL_LOHI - Multiply two integers of type iN, producing
+    // a signed/unsigned value of type i[2*N], and return the full value as
+    // two results, each of type iN.
+    SMUL_LOHI, UMUL_LOHI,
+
+    // SDIVREM/UDIVREM - Divide two integers and produce both a quotient and
+    // remainder result.
+    SDIVREM, UDIVREM,
+
+    // CARRY_FALSE - This node is used when folding other nodes,
+    // like ADDC/SUBC, which indicate the carry result is always false.
+    CARRY_FALSE,
+
+    // Carry-setting nodes for multiple precision addition and subtraction.
+    // These nodes take two operands of the same value type, and produce two
+    // results.  The first result is the normal add or sub result, the second
+    // result is the carry flag result.
+    ADDC, SUBC,
+
+    // Carry-using nodes for multiple precision addition and subtraction.  These
+    // nodes take three operands: The first two are the normal lhs and rhs to
+    // the add or sub, and the third is the input carry flag.  These nodes
+    // produce two results; the normal result of the add or sub, and the output
+    // carry flag.  These nodes both read and write a carry flag to allow them
+    // to them to be chained together for add and sub of arbitrarily large
+    // values.
+    ADDE, SUBE,
+
+    // RESULT, BOOL = [SU]ADDO(LHS, RHS) - Overflow-aware nodes for addition.
+    // These nodes take two operands: the normal LHS and RHS to the add. They
+    // produce two results: the normal result of the add, and a boolean that
+    // indicates if an overflow occured (*not* a flag, because it may be stored
+    // to memory, etc.).  If the type of the boolean is not i1 then the high
+    // bits conform to getBooleanContents.
+    // These nodes are generated from the llvm.[su]add.with.overflow intrinsics.
+    SADDO, UADDO,
+
+    // Same for subtraction
+    SSUBO, USUBO,
+
+    // Same for multiplication
+    SMULO, UMULO,
+
+    // Simple binary floating point operators.
+    FADD, FSUB, FMUL, FDIV, FREM,
+
+    // FCOPYSIGN(X, Y) - Return the value of X with the sign of Y.  NOTE: This
+    // DAG node does not require that X and Y have the same type, just that they
+    // are both floating point.  X and the result must have the same type.
+    // FCOPYSIGN(f32, f64) is allowed.
+    FCOPYSIGN,
+
+    // INT = FGETSIGN(FP) - Return the sign bit of the specified floating point
+    // value as an integer 0/1 value.
+    FGETSIGN,
+
+    /// BUILD_VECTOR(ELT0, ELT1, ELT2, ELT3,...) - Return a vector with the
+    /// specified, possibly variable, elements.  The number of elements is
+    /// required to be a power of two.  The types of the operands must all be
+    /// the same and must match the vector element type, except that integer
+    /// types are allowed to be larger than the element type, in which case
+    /// the operands are implicitly truncated.
+    BUILD_VECTOR,
+
+    /// INSERT_VECTOR_ELT(VECTOR, VAL, IDX) - Returns VECTOR with the element
+    /// at IDX replaced with VAL.  If the type of VAL is larger than the vector
+    /// element type then VAL is truncated before replacement.
+    INSERT_VECTOR_ELT,
+
+    /// EXTRACT_VECTOR_ELT(VECTOR, IDX) - Returns a single element from VECTOR
+    /// identified by the (potentially variable) element number IDX.  If the
+    /// return type is an integer type larger than the element type of the
+    /// vector, the result is extended to the width of the return type.
+    EXTRACT_VECTOR_ELT,
+
+    /// CONCAT_VECTORS(VECTOR0, VECTOR1, ...) - Given a number of values of
+    /// vector type with the same length and element type, this produces a
+    /// concatenated vector result value, with length equal to the sum of the
+    /// lengths of the input vectors.
+    CONCAT_VECTORS,
+
+    /// EXTRACT_SUBVECTOR(VECTOR, IDX) - Returns a subvector from VECTOR (an
+    /// vector value) starting with the (potentially variable) element number
+    /// IDX, which must be a multiple of the result vector length.
+    EXTRACT_SUBVECTOR,
+
+    /// VECTOR_SHUFFLE(VEC1, VEC2) - Returns a vector, of the same type as 
+    /// VEC1/VEC2.  A VECTOR_SHUFFLE node also contains an array of constant int
+    /// values that indicate which value (or undef) each result element will
+    /// get.  These constant ints are accessible through the 
+    /// ShuffleVectorSDNode class.  This is quite similar to the Altivec 
+    /// 'vperm' instruction, except that the indices must be constants and are
+    /// in terms of the element size of VEC1/VEC2, not in terms of bytes.
+    VECTOR_SHUFFLE,
+
+    /// SCALAR_TO_VECTOR(VAL) - This represents the operation of loading a
+    /// scalar value into element 0 of the resultant vector type.  The top
+    /// elements 1 to N-1 of the N-element vector are undefined.  The type
+    /// of the operand must match the vector element type, except when they
+    /// are integer types.  In this case the operand is allowed to be wider
+    /// than the vector element type, and is implicitly truncated to it.
+    SCALAR_TO_VECTOR,
+
+    // MULHU/MULHS - Multiply high - Multiply two integers of type iN, producing
+    // an unsigned/signed value of type i[2*N], then return the top part.
+    MULHU, MULHS,
+
+    // Bitwise operators - logical and, logical or, logical xor, shift left,
+    // shift right algebraic (shift in sign bits), shift right logical (shift in
+    // zeroes), rotate left, rotate right, and byteswap.
+    AND, OR, XOR, SHL, SRA, SRL, ROTL, ROTR, BSWAP,
+
+    // Counting operators
+    CTTZ, CTLZ, CTPOP,
+
+    // Select(COND, TRUEVAL, FALSEVAL).  If the type of the boolean COND is not
+    // i1 then the high bits must conform to getBooleanContents.
+    SELECT,
+
+    // Select with condition operator - This selects between a true value and
+    // a false value (ops #2 and #3) based on the boolean result of comparing
+    // the lhs and rhs (ops #0 and #1) of a conditional expression with the
+    // condition code in op #4, a CondCodeSDNode.
+    SELECT_CC,
+
+    // SetCC operator - This evaluates to a true value iff the condition is
+    // true.  If the result value type is not i1 then the high bits conform
+    // to getBooleanContents.  The operands to this are the left and right
+    // operands to compare (ops #0, and #1) and the condition code to compare
+    // them with (op #2) as a CondCodeSDNode.
+    SETCC,
+
+    // RESULT = VSETCC(LHS, RHS, COND) operator - This evaluates to a vector of
+    // integer elements with all bits of the result elements set to true if the
+    // comparison is true or all cleared if the comparison is false.  The
+    // operands to this are the left and right operands to compare (LHS/RHS) and
+    // the condition code to compare them with (COND) as a CondCodeSDNode.
+    VSETCC,
+
+    // SHL_PARTS/SRA_PARTS/SRL_PARTS - These operators are used for expanded
+    // integer shift operations, just like ADD/SUB_PARTS.  The operation
+    // ordering is:
+    //       [Lo,Hi] = op [LoLHS,HiLHS], Amt
+    SHL_PARTS, SRA_PARTS, SRL_PARTS,
+
+    // Conversion operators.  These are all single input single output
+    // operations.  For all of these, the result type must be strictly
+    // wider or narrower (depending on the operation) than the source
+    // type.
+
+    // SIGN_EXTEND - Used for integer types, replicating the sign bit
+    // into new bits.
+    SIGN_EXTEND,
+
+    // ZERO_EXTEND - Used for integer types, zeroing the new bits.
+    ZERO_EXTEND,
+
+    // ANY_EXTEND - Used for integer types.  The high bits are undefined.
+    ANY_EXTEND,
+
+    // TRUNCATE - Completely drop the high bits.
+    TRUNCATE,
+
+    // [SU]INT_TO_FP - These operators convert integers (whose interpreted sign
+    // depends on the first letter) to floating point.
+    SINT_TO_FP,
+    UINT_TO_FP,
+
+    // SIGN_EXTEND_INREG - This operator atomically performs a SHL/SRA pair to
+    // sign extend a small value in a large integer register (e.g. sign
+    // extending the low 8 bits of a 32-bit register to fill the top 24 bits
+    // with the 7th bit).  The size of the smaller type is indicated by the 1th
+    // operand, a ValueType node.
+    SIGN_EXTEND_INREG,
+
+    /// FP_TO_[US]INT - Convert a floating point value to a signed or unsigned
+    /// integer.
+    FP_TO_SINT,
+    FP_TO_UINT,
+
+    /// X = FP_ROUND(Y, TRUNC) - Rounding 'Y' from a larger floating point type
+    /// down to the precision of the destination VT.  TRUNC is a flag, which is
+    /// always an integer that is zero or one.  If TRUNC is 0, this is a
+    /// normal rounding, if it is 1, this FP_ROUND is known to not change the
+    /// value of Y.
+    ///
+    /// The TRUNC = 1 case is used in cases where we know that the value will
+    /// not be modified by the node, because Y is not using any of the extra
+    /// precision of source type.  This allows certain transformations like
+    /// FP_EXTEND(FP_ROUND(X,1)) -> X which are not safe for
+    /// FP_EXTEND(FP_ROUND(X,0)) because the extra bits aren't removed.
+    FP_ROUND,
+
+    // FLT_ROUNDS_ - Returns current rounding mode:
+    // -1 Undefined
+    //  0 Round to 0
+    //  1 Round to nearest
+    //  2 Round to +inf
+    //  3 Round to -inf
+    FLT_ROUNDS_,
+
+    /// X = FP_ROUND_INREG(Y, VT) - This operator takes an FP register, and
+    /// rounds it to a floating point value.  It then promotes it and returns it
+    /// in a register of the same size.  This operation effectively just
+    /// discards excess precision.  The type to round down to is specified by
+    /// the VT operand, a VTSDNode.
+    FP_ROUND_INREG,
+
+    /// X = FP_EXTEND(Y) - Extend a smaller FP type into a larger FP type.
+    FP_EXTEND,
+
+    // BIT_CONVERT - This operator converts between integer, vector and FP
+    // values, as if the value was stored to memory with one type and loaded
+    // from the same address with the other type (or equivalently for vector
+    // format conversions, etc).  The source and result are required to have
+    // the same bit size (e.g.  f32 <-> i32).  This can also be used for
+    // int-to-int or fp-to-fp conversions, but that is a noop, deleted by
+    // getNode().
+    BIT_CONVERT,
+
+    // CONVERT_RNDSAT - This operator is used to support various conversions
+    // between various types (float, signed, unsigned and vectors of those
+    // types) with rounding and saturation. NOTE: Avoid using this operator as
+    // most target don't support it and the operator might be removed in the
+    // future. It takes the following arguments:
+    //   0) value
+    //   1) dest type (type to convert to)
+    //   2) src type (type to convert from)
+    //   3) rounding imm
+    //   4) saturation imm
+    //   5) ISD::CvtCode indicating the type of conversion to do
+    CONVERT_RNDSAT,
+
+    // FP16_TO_FP32, FP32_TO_FP16 - These operators are used to perform
+    // promotions and truncation for half-precision (16 bit) floating
+    // numbers. We need special nodes since FP16 is a storage-only type with
+    // special semantics of operations.
+    FP16_TO_FP32, FP32_TO_FP16,
+
+    // FNEG, FABS, FSQRT, FSIN, FCOS, FPOWI, FPOW,
+    // FLOG, FLOG2, FLOG10, FEXP, FEXP2,
+    // FCEIL, FTRUNC, FRINT, FNEARBYINT, FFLOOR - Perform various unary floating
+    // point operations. These are inspired by libm.
+    FNEG, FABS, FSQRT, FSIN, FCOS, FPOWI, FPOW,
+    FLOG, FLOG2, FLOG10, FEXP, FEXP2,
+    FCEIL, FTRUNC, FRINT, FNEARBYINT, FFLOOR,
+
+    // LOAD and STORE have token chains as their first operand, then the same
+    // operands as an LLVM load/store instruction, then an offset node that
+    // is added / subtracted from the base pointer to form the address (for
+    // indexed memory ops).
+    LOAD, STORE,
+
+    // DYNAMIC_STACKALLOC - Allocate some number of bytes on the stack aligned
+    // to a specified boundary.  This node always has two return values: a new
+    // stack pointer value and a chain. The first operand is the token chain,
+    // the second is the number of bytes to allocate, and the third is the
+    // alignment boundary.  The size is guaranteed to be a multiple of the stack
+    // alignment, and the alignment is guaranteed to be bigger than the stack
+    // alignment (if required) or 0 to get standard stack alignment.
+    DYNAMIC_STACKALLOC,
+
+    // Control flow instructions.  These all have token chains.
+
+    // BR - Unconditional branch.  The first operand is the chain
+    // operand, the second is the MBB to branch to.
+    BR,
+
+    // BRIND - Indirect branch.  The first operand is the chain, the second
+    // is the value to branch to, which must be of the same type as the target's
+    // pointer type.
+    BRIND,
+
+    // BR_JT - Jumptable branch. The first operand is the chain, the second
+    // is the jumptable index, the last one is the jumptable entry index.
+    BR_JT,
+
+    // BRCOND - Conditional branch.  The first operand is the chain, the
+    // second is the condition, the third is the block to branch to if the
+    // condition is true.  If the type of the condition is not i1, then the
+    // high bits must conform to getBooleanContents.
+    BRCOND,
+
+    // BR_CC - Conditional branch.  The behavior is like that of SELECT_CC, in
+    // that the condition is represented as condition code, and two nodes to
+    // compare, rather than as a combined SetCC node.  The operands in order are
+    // chain, cc, lhs, rhs, block to branch to if condition is true.
+    BR_CC,
+
+    // INLINEASM - Represents an inline asm block.  This node always has two
+    // return values: a chain and a flag result.  The inputs are as follows:
+    //   Operand #0   : Input chain.
+    //   Operand #1   : a ExternalSymbolSDNode with a pointer to the asm string.
+    //   Operand #2   : a MDNodeSDNode with the !srcloc metadata.
+    //   After this, it is followed by a list of operands with this format:
+    //     ConstantSDNode: Flags that encode whether it is a mem or not, the
+    //                     of operands that follow, etc.  See InlineAsm.h.
+    //     ... however many operands ...
+    //   Operand #last: Optional, an incoming flag.
+    //
+    // The variable width operands are required to represent target addressing
+    // modes as a single "operand", even though they may have multiple
+    // SDOperands.
+    INLINEASM,
+
+    // EH_LABEL - Represents a label in mid basic block used to track
+    // locations needed for debug and exception handling tables.  These nodes
+    // take a chain as input and return a chain.
+    EH_LABEL,
+
+    // STACKSAVE - STACKSAVE has one operand, an input chain.  It produces a
+    // value, the same type as the pointer type for the system, and an output
+    // chain.
+    STACKSAVE,
+
+    // STACKRESTORE has two operands, an input chain and a pointer to restore to
+    // it returns an output chain.
+    STACKRESTORE,
+
+    // CALLSEQ_START/CALLSEQ_END - These operators mark the beginning and end of
+    // a call sequence, and carry arbitrary information that target might want
+    // to know.  The first operand is a chain, the rest are specified by the
+    // target and not touched by the DAG optimizers.
+    // CALLSEQ_START..CALLSEQ_END pairs may not be nested.
+    CALLSEQ_START,  // Beginning of a call sequence
+    CALLSEQ_END,    // End of a call sequence
+
+    // VAARG - VAARG has three operands: an input chain, a pointer, and a
+    // SRCVALUE.  It returns a pair of values: the vaarg value and a new chain.
+    VAARG,
+
+    // VACOPY - VACOPY has five operands: an input chain, a destination pointer,
+    // a source pointer, a SRCVALUE for the destination, and a SRCVALUE for the
+    // source.
+    VACOPY,
+
+    // VAEND, VASTART - VAEND and VASTART have three operands: an input chain, a
+    // pointer, and a SRCVALUE.
+    VAEND, VASTART,
+
+    // SRCVALUE - This is a node type that holds a Value* that is used to
+    // make reference to a value in the LLVM IR.
+    SRCVALUE,
+    
+    // MDNODE_SDNODE - This is a node that holdes an MDNode*, which is used to
+    // reference metadata in the IR.
+    MDNODE_SDNODE,
+
+    // PCMARKER - This corresponds to the pcmarker intrinsic.
+    PCMARKER,
+
+    // READCYCLECOUNTER - This corresponds to the readcyclecounter intrinsic.
+    // The only operand is a chain and a value and a chain are produced.  The
+    // value is the contents of the architecture specific cycle counter like
+    // register (or other high accuracy low latency clock source)
+    READCYCLECOUNTER,
+
+    // HANDLENODE node - Used as a handle for various purposes.
+    HANDLENODE,
+
+    // TRAMPOLINE - This corresponds to the init_trampoline intrinsic.
+    // It takes as input a token chain, the pointer to the trampoline,
+    // the pointer to the nested function, the pointer to pass for the
+    // 'nest' parameter, a SRCVALUE for the trampoline and another for
+    // the nested function (allowing targets to access the original
+    // Function*).  It produces the result of the intrinsic and a token
+    // chain as output.
+    TRAMPOLINE,
+
+    // TRAP - Trapping instruction
+    TRAP,
+
+    // PREFETCH - This corresponds to a prefetch intrinsic. It takes chains are
+    // their first operand. The other operands are the address to prefetch,
+    // read / write specifier, and locality specifier.
+    PREFETCH,
+
+    // OUTCHAIN = MEMBARRIER(INCHAIN, load-load, load-store, store-load,
+    //                       store-store, device)
+    // This corresponds to the memory.barrier intrinsic.
+    // it takes an input chain, 4 operands to specify the type of barrier, an
+    // operand specifying if the barrier applies to device and uncached memory
+    // and produces an output chain.
+    MEMBARRIER,
+
+    // Val, OUTCHAIN = ATOMIC_CMP_SWAP(INCHAIN, ptr, cmp, swap)
+    // this corresponds to the atomic.lcs intrinsic.
+    // cmp is compared to *ptr, and if equal, swap is stored in *ptr.
+    // the return is always the original value in *ptr
+    ATOMIC_CMP_SWAP,
+
+    // Val, OUTCHAIN = ATOMIC_SWAP(INCHAIN, ptr, amt)
+    // this corresponds to the atomic.swap intrinsic.
+    // amt is stored to *ptr atomically.
+    // the return is always the original value in *ptr
+    ATOMIC_SWAP,
+
+    // Val, OUTCHAIN = ATOMIC_LOAD_[OpName](INCHAIN, ptr, amt)
+    // this corresponds to the atomic.load.[OpName] intrinsic.
+    // op(*ptr, amt) is stored to *ptr atomically.
+    // the return is always the original value in *ptr
+    ATOMIC_LOAD_ADD,
+    ATOMIC_LOAD_SUB,
+    ATOMIC_LOAD_AND,
+    ATOMIC_LOAD_OR,
+    ATOMIC_LOAD_XOR,
+    ATOMIC_LOAD_NAND,
+    ATOMIC_LOAD_MIN,
+    ATOMIC_LOAD_MAX,
+    ATOMIC_LOAD_UMIN,
+    ATOMIC_LOAD_UMAX,
+
+    /// BUILTIN_OP_END - This must be the last enum value in this list.
+    /// The target-specific pre-isel opcode values start here.
+    BUILTIN_OP_END
+  };
+
+  /// FIRST_TARGET_MEMORY_OPCODE - Target-specific pre-isel operations
+  /// which do not reference a specific memory location should be less than
+  /// this value. Those that do must not be less than this value, and can
+  /// be used with SelectionDAG::getMemIntrinsicNode.
+  static const int FIRST_TARGET_MEMORY_OPCODE = BUILTIN_OP_END+100;
+
+  //===--------------------------------------------------------------------===//
+  /// MemIndexedMode enum - This enum defines the load / store indexed
+  /// addressing modes.
+  ///
+  /// UNINDEXED    "Normal" load / store. The effective address is already
+  ///              computed and is available in the base pointer. The offset
+  ///              operand is always undefined. In addition to producing a
+  ///              chain, an unindexed load produces one value (result of the
+  ///              load); an unindexed store does not produce a value.
+  ///
+  /// PRE_INC      Similar to the unindexed mode where the effective address is
+  /// PRE_DEC      the value of the base pointer add / subtract the offset.
+  ///              It considers the computation as being folded into the load /
+  ///              store operation (i.e. the load / store does the address
+  ///              computation as well as performing the memory transaction).
+  ///              The base operand is always undefined. In addition to
+  ///              producing a chain, pre-indexed load produces two values
+  ///              (result of the load and the result of the address
+  ///              computation); a pre-indexed store produces one value (result
+  ///              of the address computation).
+  ///
+  /// POST_INC     The effective address is the value of the base pointer. The
+  /// POST_DEC     value of the offset operand is then added to / subtracted
+  ///              from the base after memory transaction. In addition to
+  ///              producing a chain, post-indexed load produces two values
+  ///              (the result of the load and the result of the base +/- offset
+  ///              computation); a post-indexed store produces one value (the
+  ///              the result of the base +/- offset computation).
+  ///
+  enum MemIndexedMode {
+    UNINDEXED = 0,
+    PRE_INC,
+    PRE_DEC,
+    POST_INC,
+    POST_DEC,
+    LAST_INDEXED_MODE
+  };
+
+  //===--------------------------------------------------------------------===//
+  /// LoadExtType enum - This enum defines the three variants of LOADEXT
+  /// (load with extension).
+  ///
+  /// SEXTLOAD loads the integer operand and sign extends it to a larger
+  ///          integer result type.
+  /// ZEXTLOAD loads the integer operand and zero extends it to a larger
+  ///          integer result type.
+  /// EXTLOAD  is used for three things: floating point extending loads,
+  ///          integer extending loads [the top bits are undefined], and vector
+  ///          extending loads [load into low elt].
+  ///
+  enum LoadExtType {
+    NON_EXTLOAD = 0,
+    EXTLOAD,
+    SEXTLOAD,
+    ZEXTLOAD,
+    LAST_LOADEXT_TYPE
+  };
+
+  //===--------------------------------------------------------------------===//
+  /// ISD::CondCode enum - These are ordered carefully to make the bitfields
+  /// below work out, when considering SETFALSE (something that never exists
+  /// dynamically) as 0.  "U" -> Unsigned (for integer operands) or Unordered
+  /// (for floating point), "L" -> Less than, "G" -> Greater than, "E" -> Equal
+  /// to.  If the "N" column is 1, the result of the comparison is undefined if
+  /// the input is a NAN.
+  ///
+  /// All of these (except for the 'always folded ops') should be handled for
+  /// floating point.  For integer, only the SETEQ,SETNE,SETLT,SETLE,SETGT,
+  /// SETGE,SETULT,SETULE,SETUGT, and SETUGE opcodes are used.
+  ///
+  /// Note that these are laid out in a specific order to allow bit-twiddling
+  /// to transform conditions.
+  enum CondCode {
+    // Opcode          N U L G E       Intuitive operation
+    SETFALSE,      //    0 0 0 0       Always false (always folded)
+    SETOEQ,        //    0 0 0 1       True if ordered and equal
+    SETOGT,        //    0 0 1 0       True if ordered and greater than
+    SETOGE,        //    0 0 1 1       True if ordered and greater than or equal
+    SETOLT,        //    0 1 0 0       True if ordered and less than
+    SETOLE,        //    0 1 0 1       True if ordered and less than or equal
+    SETONE,        //    0 1 1 0       True if ordered and operands are unequal
+    SETO,          //    0 1 1 1       True if ordered (no nans)
+    SETUO,         //    1 0 0 0       True if unordered: isnan(X) | isnan(Y)
+    SETUEQ,        //    1 0 0 1       True if unordered or equal
+    SETUGT,        //    1 0 1 0       True if unordered or greater than
+    SETUGE,        //    1 0 1 1       True if unordered, greater than, or equal
+    SETULT,        //    1 1 0 0       True if unordered or less than
+    SETULE,        //    1 1 0 1       True if unordered, less than, or equal
+    SETUNE,        //    1 1 1 0       True if unordered or not equal
+    SETTRUE,       //    1 1 1 1       Always true (always folded)
+    // Don't care operations: undefined if the input is a nan.
+    SETFALSE2,     //  1 X 0 0 0       Always false (always folded)
+    SETEQ,         //  1 X 0 0 1       True if equal
+    SETGT,         //  1 X 0 1 0       True if greater than
+    SETGE,         //  1 X 0 1 1       True if greater than or equal
+    SETLT,         //  1 X 1 0 0       True if less than
+    SETLE,         //  1 X 1 0 1       True if less than or equal
+    SETNE,         //  1 X 1 1 0       True if not equal
+    SETTRUE2,      //  1 X 1 1 1       Always true (always folded)
+
+    SETCC_INVALID       // Marker value.
+  };
+
+  /// isSignedIntSetCC - Return true if this is a setcc instruction that
+  /// performs a signed comparison when used with integer operands.
+  inline bool isSignedIntSetCC(CondCode Code) {
+    return Code == SETGT || Code == SETGE || Code == SETLT || Code == SETLE;
+  }
+
+  /// isUnsignedIntSetCC - Return true if this is a setcc instruction that
+  /// performs an unsigned comparison when used with integer operands.
+  inline bool isUnsignedIntSetCC(CondCode Code) {
+    return Code == SETUGT || Code == SETUGE || Code == SETULT || Code == SETULE;
+  }
+
+  /// isTrueWhenEqual - Return true if the specified condition returns true if
+  /// the two operands to the condition are equal.  Note that if one of the two
+  /// operands is a NaN, this value is meaningless.
+  inline bool isTrueWhenEqual(CondCode Cond) {
+    return ((int)Cond & 1) != 0;
+  }
+
+  /// getUnorderedFlavor - This function returns 0 if the condition is always
+  /// false if an operand is a NaN, 1 if the condition is always true if the
+  /// operand is a NaN, and 2 if the condition is undefined if the operand is a
+  /// NaN.
+  inline unsigned getUnorderedFlavor(CondCode Cond) {
+    return ((int)Cond >> 3) & 3;
+  }
+
+  /// getSetCCInverse - Return the operation corresponding to !(X op Y), where
+  /// 'op' is a valid SetCC operation.
+  CondCode getSetCCInverse(CondCode Operation, bool isInteger);
+
+  /// getSetCCSwappedOperands - Return the operation corresponding to (Y op X)
+  /// when given the operation for (X op Y).
+  CondCode getSetCCSwappedOperands(CondCode Operation);
+
+  /// getSetCCOrOperation - Return the result of a logical OR between different
+  /// comparisons of identical values: ((X op1 Y) | (X op2 Y)).  This
+  /// function returns SETCC_INVALID if it is not possible to represent the
+  /// resultant comparison.
+  CondCode getSetCCOrOperation(CondCode Op1, CondCode Op2, bool isInteger);
+
+  /// getSetCCAndOperation - Return the result of a logical AND between
+  /// different comparisons of identical values: ((X op1 Y) & (X op2 Y)).  This
+  /// function returns SETCC_INVALID if it is not possible to represent the
+  /// resultant comparison.
+  CondCode getSetCCAndOperation(CondCode Op1, CondCode Op2, bool isInteger);
+
+  //===--------------------------------------------------------------------===//
+  /// CvtCode enum - This enum defines the various converts CONVERT_RNDSAT
+  /// supports.
+  enum CvtCode {
+    CVT_FF,     // Float from Float
+    CVT_FS,     // Float from Signed
+    CVT_FU,     // Float from Unsigned
+    CVT_SF,     // Signed from Float
+    CVT_UF,     // Unsigned from Float
+    CVT_SS,     // Signed from Signed
+    CVT_SU,     // Signed from Unsigned
+    CVT_US,     // Unsigned from Signed
+    CVT_UU,     // Unsigned from Unsigned
+    CVT_INVALID // Marker - Invalid opcode
+  };
+
+} // end llvm::ISD namespace
+
+} // end llvm namespace
+
+#endif
diff --git a/include/llvm/CodeGen/JITCodeEmitter.h b/include/llvm/CodeGen/JITCodeEmitter.h
index 5da4961dbd39..eb373fb145e8 100644
--- a/include/llvm/CodeGen/JITCodeEmitter.h
+++ b/include/llvm/CodeGen/JITCodeEmitter.h
@@ -21,6 +21,7 @@
 #include "llvm/System/DataTypes.h"
 #include "llvm/Support/MathExtras.h"
 #include "llvm/CodeGen/MachineCodeEmitter.h"
+#include "llvm/ADT/DenseMap.h"
 
 using namespace std;
 
@@ -173,13 +174,20 @@ public:
 
   /// emitULEB128Bytes - This callback is invoked when a ULEB128 needs to be
   /// written to the output stream.
-  void emitULEB128Bytes(uint64_t Value) {
+  void emitULEB128Bytes(uint64_t Value, unsigned PadTo = 0) {
     do {
       uint8_t Byte = Value & 0x7f;
       Value >>= 7;
-      if (Value) Byte |= 0x80;
+      if (Value || PadTo != 0) Byte |= 0x80;
       emitByte(Byte);
     } while (Value);
+
+    if (PadTo) {
+      do {
+        uint8_t Byte = (PadTo > 1) ? 0x80 : 0x0;
+        emitByte(Byte);
+      } while (--PadTo);
+    }
   }
   
   /// emitSLEB128Bytes - This callback is invoked when a SLEB128 needs to be
@@ -324,6 +332,10 @@ public:
   /// Specifies the MachineModuleInfo object. This is used for exception handling
   /// purposes.
   virtual void setModuleInfo(MachineModuleInfo* Info) = 0;
+
+  /// getLabelLocations - Return the label locations map of the label IDs to
+  /// their address.
+  virtual DenseMap<MCSymbol*, uintptr_t> *getLabelLocations() { return 0; }
 };
 
 } // End llvm namespace
diff --git a/include/llvm/CodeGen/LinkAllCodegenComponents.h b/include/llvm/CodeGen/LinkAllCodegenComponents.h
index 27947e8afee0..0064776ca294 100644
--- a/include/llvm/CodeGen/LinkAllCodegenComponents.h
+++ b/include/llvm/CodeGen/LinkAllCodegenComponents.h
@@ -34,6 +34,7 @@ namespace {
       (void) llvm::createDeadMachineInstructionElimPass();
 
       (void) llvm::createLocalRegisterAllocator();
+      (void) llvm::createFastRegisterAllocator();
       (void) llvm::createLinearScanRegisterAllocator();
       (void) llvm::createPBQPRegisterAllocator();
 
diff --git a/include/llvm/CodeGen/LiveIntervalAnalysis.h b/include/llvm/CodeGen/LiveIntervalAnalysis.h
index 8ddcac705990..351217ce583d 100644
--- a/include/llvm/CodeGen/LiveIntervalAnalysis.h
+++ b/include/llvm/CodeGen/LiveIntervalAnalysis.h
@@ -111,6 +111,12 @@ namespace llvm {
     double getScaledIntervalSize(LiveInterval& I) {
       return (1000.0 * I.getSize()) / indexes_->getIndexesLength();
     }
+
+    /// getFuncInstructionCount - Return the number of instructions in the
+    /// current function.
+    unsigned getFuncInstructionCount() {
+      return indexes_->getFunctionSize();
+    }
     
     /// getApproximateInstructionCount - computes an estimate of the number
     /// of instructions in a given LiveInterval.
diff --git a/include/llvm/CodeGen/MachineBasicBlock.h b/include/llvm/CodeGen/MachineBasicBlock.h
index 2995bea9df07..cc651ca77b77 100644
--- a/include/llvm/CodeGen/MachineBasicBlock.h
+++ b/include/llvm/CodeGen/MachineBasicBlock.h
@@ -201,12 +201,9 @@ public:
 
   // Iteration support for live in sets.  These sets are kept in sorted
   // order by their register number.
-  typedef std::vector<unsigned>::iterator       livein_iterator;
-  typedef std::vector<unsigned>::const_iterator const_livein_iterator;
-  livein_iterator       livein_begin()       { return LiveIns.begin(); }
-  const_livein_iterator livein_begin() const { return LiveIns.begin(); }
-  livein_iterator       livein_end()         { return LiveIns.end(); }
-  const_livein_iterator livein_end()   const { return LiveIns.end(); }
+  typedef std::vector<unsigned>::const_iterator livein_iterator;
+  livein_iterator livein_begin() const { return LiveIns.begin(); }
+  livein_iterator livein_end()   const { return LiveIns.end(); }
   bool            livein_empty() const { return LiveIns.empty(); }
 
   /// getAlignment - Return alignment of the basic block.
diff --git a/include/llvm/CodeGen/MachineConstantPool.h b/include/llvm/CodeGen/MachineConstantPool.h
index e6698a525e16..498f815b9b5a 100644
--- a/include/llvm/CodeGen/MachineConstantPool.h
+++ b/include/llvm/CodeGen/MachineConstantPool.h
@@ -74,7 +74,7 @@ class MachineConstantPoolEntry {
 public:
   /// The constant itself.
   union {
-    Constant *ConstVal;
+    const Constant *ConstVal;
     MachineConstantPoolValue *MachineCPVal;
   } Val;
 
@@ -82,7 +82,7 @@ public:
   /// a MachineConstantPoolValue.
   unsigned Alignment;
 
-  MachineConstantPoolEntry(Constant *V, unsigned A)
+  MachineConstantPoolEntry(const Constant *V, unsigned A)
     : Alignment(A) {
     Val.ConstVal = V;
   }
@@ -143,7 +143,7 @@ public:
   /// getConstantPoolIndex - Create a new entry in the constant pool or return
   /// an existing one.  User must specify the minimum required alignment for
   /// the object.
-  unsigned getConstantPoolIndex(Constant *C, unsigned Alignment);
+  unsigned getConstantPoolIndex(const Constant *C, unsigned Alignment);
   unsigned getConstantPoolIndex(MachineConstantPoolValue *V,unsigned Alignment);
   
   /// isEmpty - Return true if this constant pool contains no constants.
diff --git a/include/llvm/CodeGen/MachineFunction.h b/include/llvm/CodeGen/MachineFunction.h
index b3609c2ea73e..595872ad2d74 100644
--- a/include/llvm/CodeGen/MachineFunction.h
+++ b/include/llvm/CodeGen/MachineFunction.h
@@ -70,7 +70,7 @@ struct MachineFunctionInfo {
 };
 
 class MachineFunction {
-  Function *Fn;
+  const Function *Fn;
   const TargetMachine &Target;
   MCContext &Ctx;
   MachineModuleInfo &MMI;
@@ -109,10 +109,6 @@ class MachineFunction {
   typedef ilist<MachineBasicBlock> BasicBlockListType;
   BasicBlockListType BasicBlocks;
 
-  /// Default debug location. Used to print out the debug label at the beginning
-  /// of a function.
-  DebugLoc DefaultDebugLoc;
-
   /// FunctionNumber - This provides a unique ID for each function emitted in
   /// this translation unit.
   ///
@@ -124,8 +120,8 @@ class MachineFunction {
   MachineFunction(const MachineFunction &); // DO NOT IMPLEMENT
   void operator=(const MachineFunction&);   // DO NOT IMPLEMENT
 public:
-  MachineFunction(Function *Fn, const TargetMachine &TM, unsigned FunctionNum,
-                  MachineModuleInfo &MMI);
+  MachineFunction(const Function *Fn, const TargetMachine &TM,
+                  unsigned FunctionNum, MachineModuleInfo &MMI);
   ~MachineFunction();
 
   MachineModuleInfo &getMMI() const { return MMI; }
@@ -133,7 +129,7 @@ public:
   
   /// getFunction - Return the LLVM function that this machine code represents
   ///
-  Function *getFunction() const { return Fn; }
+  const Function *getFunction() const { return Fn; }
 
   /// getFunctionNumber - Return a unique ID for the current function.
   ///
@@ -394,19 +390,6 @@ public:
   /// normal 'L' label is returned.
   MCSymbol *getJTISymbol(unsigned JTI, MCContext &Ctx, 
                          bool isLinkerPrivate = false) const;
-  
-  
-  //===--------------------------------------------------------------------===//
-  // Debug location.
-  //
-
-  /// getDefaultDebugLoc - Get the default debug location for the machine
-  /// function.
-  DebugLoc getDefaultDebugLoc() const { return DefaultDebugLoc; }
-
-  /// setDefaultDebugLoc - Get the default debug location for the machine
-  /// function.
-  void setDefaultDebugLoc(DebugLoc DL) { DefaultDebugLoc = DL; }
 };
 
 //===--------------------------------------------------------------------===//
diff --git a/include/llvm/CodeGen/MachineInstr.h b/include/llvm/CodeGen/MachineInstr.h
index fa819275271a..c4adca1ebf11 100644
--- a/include/llvm/CodeGen/MachineInstr.h
+++ b/include/llvm/CodeGen/MachineInstr.h
@@ -91,15 +91,14 @@ private:
   // over time, the non-DebugLoc versions should be phased out and eventually
   // removed.
 
-  /// MachineInstr ctor - This constructor create a MachineInstr and add the
-  /// implicit operands.  It reserves space for number of operands specified by
-  /// TargetInstrDesc.  The version with a DebugLoc should be preferred.
+  /// MachineInstr ctor - This constructor creates a MachineInstr and adds the
+  /// implicit operands.  It reserves space for the number of operands specified
+  /// by the TargetInstrDesc.  The version with a DebugLoc should be preferred.
   explicit MachineInstr(const TargetInstrDesc &TID, bool NoImp = false);
 
   /// MachineInstr ctor - Work exactly the same as the ctor above, except that
   /// the MachineInstr is created and added to the end of the specified basic
   /// block.  The version with a DebugLoc should be preferred.
-  ///
   MachineInstr(MachineBasicBlock *MBB, const TargetInstrDesc &TID);
 
   /// MachineInstr ctor - This constructor create a MachineInstr and add the
@@ -111,7 +110,6 @@ private:
   /// MachineInstr ctor - Work exactly the same as the ctor above, except that
   /// the MachineInstr is created and added to the end of the specified basic
   /// block.
-  ///
   MachineInstr(MachineBasicBlock *MBB, const DebugLoc dl, 
                const TargetInstrDesc &TID);
 
@@ -357,6 +355,10 @@ public:
   /// return 0.
   unsigned isConstantValuePHI() const;
 
+  /// allDefsAreDead - Return true if all the defs of this instruction are dead.
+  ///
+  bool allDefsAreDead() const;
+
   //
   // Debugging support
   //
diff --git a/include/llvm/CodeGen/MachineInstrBuilder.h b/include/llvm/CodeGen/MachineInstrBuilder.h
index 9baa592f96e0..37ac24cab841 100644
--- a/include/llvm/CodeGen/MachineInstrBuilder.h
+++ b/include/llvm/CodeGen/MachineInstrBuilder.h
@@ -104,7 +104,7 @@ public:
     return *this;
   }
 
-  const MachineInstrBuilder &addGlobalAddress(GlobalValue *GV,
+  const MachineInstrBuilder &addGlobalAddress(const GlobalValue *GV,
                                               int64_t Offset = 0,
                                           unsigned char TargetFlags = 0) const {
     MI->addOperand(MachineOperand::CreateGA(GV, Offset, TargetFlags));
diff --git a/include/llvm/CodeGen/MachineModuleInfo.h b/include/llvm/CodeGen/MachineModuleInfo.h
index 17da43b7d621..84aef1059f67 100644
--- a/include/llvm/CodeGen/MachineModuleInfo.h
+++ b/include/llvm/CodeGen/MachineModuleInfo.h
@@ -82,7 +82,7 @@ struct LandingPadInfo {
   SmallVector<MCSymbol*, 1> BeginLabels; // Labels prior to invoke.
   SmallVector<MCSymbol*, 1> EndLabels;   // Labels after invoke.
   MCSymbol *LandingPadLabel;             // Label at beginning of landing pad.
-  Function *Personality;                 // Personality function.
+  const Function *Personality;           // Personality function.
   std::vector<int> TypeIds;              // List of type ids (filters negative)
 
   explicit LandingPadInfo(MachineBasicBlock *MBB)
@@ -101,7 +101,7 @@ class MachineModuleInfo : public ImmutablePass {
   MCContext Context;
   
   /// TheModule - This is the LLVM Module being worked on.
-  Module *TheModule;
+  const Module *TheModule;
   
   /// ObjFileMMI - This is the object-file-format-specific implementation of
   /// MachineModuleInfoImpl, which lets targets accumulate whatever info they
@@ -125,7 +125,7 @@ class MachineModuleInfo : public ImmutablePass {
 
   // TypeInfos - List of C++ TypeInfo used in the current function.
   //
-  std::vector<GlobalVariable *> TypeInfos;
+  std::vector<const GlobalVariable *> TypeInfos;
 
   // FilterIds - List of typeids encoding filters used in the current function.
   //
@@ -138,7 +138,7 @@ class MachineModuleInfo : public ImmutablePass {
 
   // Personalities - Vector of all personality functions ever seen. Used to emit
   // common EH frames.
-  std::vector<Function *> Personalities;
+  std::vector<const Function *> Personalities;
 
   /// UsedFunctions - The functions in the @llvm.used list in a more easily
   /// searchable format.  This does not include the functions in
@@ -179,8 +179,8 @@ public:
   const MCContext &getContext() const { return Context; }
   MCContext &getContext() { return Context; }
 
-  void setModule(Module *M) { TheModule = M; }
-  Module *getModule() const { return TheModule; }
+  void setModule(const Module *M) { TheModule = M; }
+  const Module *getModule() const { return TheModule; }
   
   /// getInfo - Keep track of various per-function pieces of information for
   /// backends that would like to do so.
@@ -199,7 +199,7 @@ public:
   
   /// AnalyzeModule - Scan the module for global debug information.
   ///
-  void AnalyzeModule(Module &M);
+  void AnalyzeModule(const Module &M);
   
   /// hasDebugInfo - Returns true if valid debug info is present.
   ///
@@ -252,14 +252,15 @@ public:
   
   /// addPersonality - Provide the personality function for the exception
   /// information.
-  void addPersonality(MachineBasicBlock *LandingPad, Function *Personality);
+  void addPersonality(MachineBasicBlock *LandingPad,
+                      const Function *Personality);
 
   /// getPersonalityIndex - Get index of the current personality function inside
   /// Personalitites array
   unsigned getPersonalityIndex() const;
 
   /// getPersonalities - Return array of personality functions ever seen.
-  const std::vector<Function *>& getPersonalities() const {
+  const std::vector<const Function *>& getPersonalities() const {
     return Personalities;
   }
 
@@ -273,12 +274,12 @@ public:
   /// addCatchTypeInfo - Provide the catch typeinfo for a landing pad.
   ///
   void addCatchTypeInfo(MachineBasicBlock *LandingPad,
-                        std::vector<GlobalVariable *> &TyInfo);
+                        std::vector<const GlobalVariable *> &TyInfo);
 
   /// addFilterTypeInfo - Provide the filter typeinfo for a landing pad.
   ///
   void addFilterTypeInfo(MachineBasicBlock *LandingPad,
-                         std::vector<GlobalVariable *> &TyInfo);
+                         std::vector<const GlobalVariable *> &TyInfo);
 
   /// addCleanup - Add a cleanup action for a landing pad.
   ///
@@ -286,7 +287,7 @@ public:
 
   /// getTypeIDFor - Return the type id for the specified typeinfo.  This is 
   /// function wide.
-  unsigned getTypeIDFor(GlobalVariable *TI);
+  unsigned getTypeIDFor(const GlobalVariable *TI);
 
   /// getFilterIDFor - Return the id of the filter encoded by TyIds.  This is
   /// function wide.
@@ -294,7 +295,7 @@ public:
 
   /// TidyLandingPads - Remap landing pad labels and remove any deleted landing
   /// pads.
-  void TidyLandingPads();
+  void TidyLandingPads(DenseMap<MCSymbol*, uintptr_t> *LPMap = 0);
                         
   /// getLandingPads - Return a reference to the landing pad info for the
   /// current function.
@@ -323,7 +324,7 @@ public:
 
   /// getTypeInfos - Return a reference to the C++ typeinfo for the current
   /// function.
-  const std::vector<GlobalVariable *> &getTypeInfos() const {
+  const std::vector<const GlobalVariable *> &getTypeInfos() const {
     return TypeInfos;
   }
 
@@ -335,7 +336,7 @@ public:
 
   /// getPersonality - Return a personality function if available.  The presence
   /// of one is required to emit exception handling info.
-  Function *getPersonality() const;
+  const Function *getPersonality() const;
 
   /// setVariableDbgInfo - Collect information used to emit debugging
   /// information of a variable.
diff --git a/include/llvm/CodeGen/MachineOperand.h b/include/llvm/CodeGen/MachineOperand.h
index b5f6bcd9e7ec..31858ce8081b 100644
--- a/include/llvm/CodeGen/MachineOperand.h
+++ b/include/llvm/CodeGen/MachineOperand.h
@@ -117,8 +117,8 @@ private:
       union {
         int Index;                // For MO_*Index - The index itself.
         const char *SymbolName;   // For MO_ExternalSymbol.
-        GlobalValue *GV;          // For MO_GlobalAddress.
-        BlockAddress *BA;         // For MO_BlockAddress.
+        const GlobalValue *GV;    // For MO_GlobalAddress.
+        const BlockAddress *BA;   // For MO_BlockAddress.
       } Val;
       int64_t Offset;             // An offset from the object.
     } OffsetedInfo;
@@ -315,12 +315,12 @@ public:
     return Contents.OffsetedInfo.Val.Index;
   }
   
-  GlobalValue *getGlobal() const {
+  const GlobalValue *getGlobal() const {
     assert(isGlobal() && "Wrong MachineOperand accessor");
     return Contents.OffsetedInfo.Val.GV;
   }
 
-  BlockAddress *getBlockAddress() const {
+  const BlockAddress *getBlockAddress() const {
     assert(isBlockAddress() && "Wrong MachineOperand accessor");
     return Contents.OffsetedInfo.Val.BA;
   }
@@ -457,7 +457,7 @@ public:
     Op.setTargetFlags(TargetFlags);
     return Op;
   }
-  static MachineOperand CreateGA(GlobalValue *GV, int64_t Offset,
+  static MachineOperand CreateGA(const GlobalValue *GV, int64_t Offset,
                                  unsigned char TargetFlags = 0) {
     MachineOperand Op(MachineOperand::MO_GlobalAddress);
     Op.Contents.OffsetedInfo.Val.GV = GV;
@@ -473,7 +473,7 @@ public:
     Op.setTargetFlags(TargetFlags);
     return Op;
   }
-  static MachineOperand CreateBA(BlockAddress *BA,
+  static MachineOperand CreateBA(const BlockAddress *BA,
                                  unsigned char TargetFlags = 0) {
     MachineOperand Op(MachineOperand::MO_BlockAddress);
     Op.Contents.OffsetedInfo.Val.BA = BA;
diff --git a/include/llvm/CodeGen/MachineRegisterInfo.h b/include/llvm/CodeGen/MachineRegisterInfo.h
index f2e5e102223f..b377dec37831 100644
--- a/include/llvm/CodeGen/MachineRegisterInfo.h
+++ b/include/llvm/CodeGen/MachineRegisterInfo.h
@@ -258,18 +258,18 @@ public:
   liveout_iterator liveout_end()   const { return LiveOuts.end(); }
   bool             liveout_empty() const { return LiveOuts.empty(); }
 
-  bool isLiveIn(unsigned Reg) const {
-    for (livein_iterator I = livein_begin(), E = livein_end(); I != E; ++I)
-      if (I->first == Reg || I->second == Reg)
-        return true;
-    return false;
-  }
-  bool isLiveOut(unsigned Reg) const {
-    for (liveout_iterator I = liveout_begin(), E = liveout_end(); I != E; ++I)
-      if (*I == Reg)
-        return true;
-    return false;
-  }
+  bool isLiveIn(unsigned Reg) const;
+  bool isLiveOut(unsigned Reg) const;
+
+  /// getLiveInPhysReg - If VReg is a live-in virtual register, return the
+  /// corresponding live-in physical register.
+  unsigned getLiveInPhysReg(unsigned VReg) const;
+
+  /// EmitLiveInCopies - Emit copies to initialize livein virtual registers
+  /// into the given entry block.
+  void EmitLiveInCopies(MachineBasicBlock *EntryMBB,
+                        const TargetRegisterInfo &TRI,
+                        const TargetInstrInfo &TII);
 
 private:
   void HandleVRegListReallocation();
diff --git a/include/llvm/CodeGen/MachineSSAUpdater.h b/include/llvm/CodeGen/MachineSSAUpdater.h
index ab663fe3bf55..979ef0113ba8 100644
--- a/include/llvm/CodeGen/MachineSSAUpdater.h
+++ b/include/llvm/CodeGen/MachineSSAUpdater.h
@@ -23,22 +23,27 @@ namespace llvm {
   class TargetInstrInfo;
   class TargetRegisterClass;
   template<typename T> class SmallVectorImpl;
+  class BumpPtrAllocator;
 
 /// MachineSSAUpdater - This class updates SSA form for a set of virtual
 /// registers defined in multiple blocks.  This is used when code duplication
 /// or another unstructured transformation wants to rewrite a set of uses of one
 /// vreg with uses of a set of vregs.
 class MachineSSAUpdater {
+public:
+  class BBInfo;
+  typedef SmallVectorImpl<BBInfo*> BlockListTy;
+
+private:
   /// AvailableVals - This keeps track of which value to use on a per-block
   /// basis.  When we insert PHI nodes, we keep track of them here.
   //typedef DenseMap<MachineBasicBlock*, unsigned > AvailableValsTy;
   void *AV;
 
-  /// IncomingPredInfo - We use this as scratch space when doing our recursive
-  /// walk.  This should only be used in GetValueInBlockInternal, normally it
-  /// should be empty.
-  //std::vector<std::pair<MachineBasicBlock*, unsigned > > IncomingPredInfo;
-  void *IPI;
+  /// BBMap - The GetValueAtEndOfBlock method maintains this mapping from
+  /// basic blocks to BBInfo structures.
+  /// typedef DenseMap<MachineBasicBlock*, BBInfo*> BBMapTy;
+  void *BM;
 
   /// VR - Current virtual register whose uses are being updated.
   unsigned VR;
@@ -106,6 +111,15 @@ public:
 private:
   void ReplaceRegWith(unsigned OldReg, unsigned NewReg);
   unsigned GetValueAtEndOfBlockInternal(MachineBasicBlock *BB);
+  void BuildBlockList(MachineBasicBlock *BB, BlockListTy *BlockList,
+                      BumpPtrAllocator *Allocator);
+  void FindDominators(BlockListTy *BlockList);
+  void FindPHIPlacement(BlockListTy *BlockList);
+  void FindAvailableVals(BlockListTy *BlockList);
+  void FindExistingPHI(MachineBasicBlock *BB, BlockListTy *BlockList);
+  bool CheckIfPHIMatches(MachineInstr *PHI);
+  void RecordMatchingPHI(MachineInstr *PHI);
+
   void operator=(const MachineSSAUpdater&); // DO NOT IMPLEMENT
   MachineSSAUpdater(const MachineSSAUpdater&);     // DO NOT IMPLEMENT
 };
diff --git a/include/llvm/CodeGen/Passes.h b/include/llvm/CodeGen/Passes.h
index d3c2720c15f5..2f5d57640cd6 100644
--- a/include/llvm/CodeGen/Passes.h
+++ b/include/llvm/CodeGen/Passes.h
@@ -95,6 +95,11 @@ namespace llvm {
   ///
   FunctionPass *createLocalRegisterAllocator();
 
+  /// FastRegisterAllocation Pass - This pass register allocates as fast as
+  /// possible. It is best suited for debug code where live ranges are short.
+  ///
+  FunctionPass *createFastRegisterAllocator();
+
   /// LinearScanRegisterAllocation Pass - This pass implements the linear scan
   /// register allocation algorithm, a global register allocator.
   ///
@@ -170,7 +175,7 @@ namespace llvm {
 
   /// createMachineLICMPass - This pass performs LICM on machine instructions.
   /// 
-  FunctionPass *createMachineLICMPass();
+  FunctionPass *createMachineLICMPass(bool PreRegAlloc = true);
 
   /// createMachineSinkingPass - This pass performs sinking on machine
   /// instructions.
@@ -199,7 +204,7 @@ namespace llvm {
 
   /// createDwarfEHPass - This pass mulches exception handling code into a form
   /// adapted to code generation.  Required if using dwarf exception handling.
-  FunctionPass *createDwarfEHPass(const TargetLowering *tli, bool fast);
+  FunctionPass *createDwarfEHPass(const TargetMachine *tm, bool fast);
 
   /// createSjLjEHPass - This pass adapts exception handling code to use
   /// the GCC-style builtin setjmp/longjmp (sjlj) to handling EH control flow.
diff --git a/include/llvm/CodeGen/ScheduleDAG.h b/include/llvm/CodeGen/ScheduleDAG.h
index 9563d0811cf5..7c025e3acbec 100644
--- a/include/llvm/CodeGen/ScheduleDAG.h
+++ b/include/llvm/CodeGen/ScheduleDAG.h
@@ -459,7 +459,6 @@ namespace llvm {
     const TargetLowering *TLI;            // Target lowering info
     MachineFunction &MF;                  // Machine function
     MachineRegisterInfo &MRI;             // Virtual/real register map
-    MachineConstantPool *ConstPool;       // Target constant pool
     std::vector<SUnit*> Sequence;         // The schedule. Null SUnit*'s
                                           // represent noop instructions.
     std::vector<SUnit> SUnits;            // The scheduling units.
@@ -478,8 +477,7 @@ namespace llvm {
     /// EmitSchedule - Insert MachineInstrs into the MachineBasicBlock
     /// according to the order specified in Sequence.
     ///
-    virtual MachineBasicBlock*
-    EmitSchedule(DenseMap<MachineBasicBlock*, MachineBasicBlock*>*) = 0;
+    virtual MachineBasicBlock *EmitSchedule() = 0;
 
     void dumpSchedule() const;
 
diff --git a/include/llvm/CodeGen/SelectionDAG.h b/include/llvm/CodeGen/SelectionDAG.h
index 5dd0aa8705f3..ae15230c9612 100644
--- a/include/llvm/CodeGen/SelectionDAG.h
+++ b/include/llvm/CodeGen/SelectionDAG.h
@@ -62,8 +62,15 @@ private:
 /// instead the info is kept off to the side in this structure. Each SDNode may
 /// have one or more associated dbg_value entries. This information is kept in
 /// DbgValMap.
+/// Byval parameters are handled separately because they don't use alloca's,
+/// which busts the normal mechanism.  There is good reason for handling all
+/// parameters separately:  they may not have code generated for them, they
+/// should always go at the beginning of the function regardless of other code
+/// motion, and debug info for them is potentially useful even if the parameter
+/// is unused.  Right now only byval parameters are handled separately.
 class SDDbgInfo {
   SmallVector<SDDbgValue*, 32> DbgValues;
+  SmallVector<SDDbgValue*, 32> ByvalParmDbgValues;
   DenseMap<const SDNode*, SmallVector<SDDbgValue*, 2> > DbgValMap;
 
   void operator=(const SDDbgInfo&);   // Do not implement.
@@ -71,19 +78,22 @@ class SDDbgInfo {
 public:
   SDDbgInfo() {}
 
-  void add(SDDbgValue *V, const SDNode *Node = 0) {
+  void add(SDDbgValue *V, const SDNode *Node, bool isParameter) {
+    if (isParameter) {
+      ByvalParmDbgValues.push_back(V);
+    } else     DbgValues.push_back(V);
     if (Node)
       DbgValMap[Node].push_back(V);
-    DbgValues.push_back(V);
   }
 
   void clear() {
     DbgValMap.clear();
     DbgValues.clear();
+    ByvalParmDbgValues.clear();
   }
 
   bool empty() const {
-    return DbgValues.empty();
+    return DbgValues.empty() && ByvalParmDbgValues.empty();
   }
 
   SmallVector<SDDbgValue*,2> &getSDDbgValues(const SDNode *Node) {
@@ -93,6 +103,8 @@ public:
   typedef SmallVector<SDDbgValue*,32>::iterator DbgIterator;
   DbgIterator DbgBegin() { return DbgValues.begin(); }
   DbgIterator DbgEnd()   { return DbgValues.end(); }
+  DbgIterator ByvalParmDbgBegin() { return ByvalParmDbgValues.begin(); }
+  DbgIterator ByvalParmDbgEnd()   { return ByvalParmDbgValues.end(); }
 };
 
 enum CombineLevel {
@@ -117,7 +129,8 @@ void checkForCycles(const SelectionDAG *DAG);
 /// linear form.
 ///
 class SelectionDAG {
-  TargetLowering &TLI;
+  const TargetMachine &TM;
+  const TargetLowering &TLI;
   MachineFunction *MF;
   FunctionLoweringInfo &FLI;
   LLVMContext *Context;
@@ -172,7 +185,7 @@ class SelectionDAG {
   SelectionDAG(const SelectionDAG&);   // Do not implement.
 
 public:
-  SelectionDAG(TargetLowering &tli, FunctionLoweringInfo &fli);
+  SelectionDAG(const TargetMachine &TM, FunctionLoweringInfo &fli);
   ~SelectionDAG();
 
   /// init - Prepare this SelectionDAG to process code in the given
@@ -186,8 +199,8 @@ public:
   void clear();
 
   MachineFunction &getMachineFunction() const { return *MF; }
-  const TargetMachine &getTarget() const;
-  TargetLowering &getTargetLoweringInfo() const { return TLI; }
+  const TargetMachine &getTarget() const { return TM; }
+  const TargetLowering &getTargetLoweringInfo() const { return TLI; }
   FunctionLoweringInfo &getFunctionLoweringInfo() const { return FLI; }
   LLVMContext *getContext() const {return Context; }
 
@@ -350,10 +363,10 @@ public:
   SDValue getTargetJumpTable(int JTI, EVT VT, unsigned char TargetFlags = 0) {
     return getJumpTable(JTI, VT, true, TargetFlags);
   }
-  SDValue getConstantPool(Constant *C, EVT VT,
+  SDValue getConstantPool(const Constant *C, EVT VT,
                           unsigned Align = 0, int Offs = 0, bool isT=false,
                           unsigned char TargetFlags = 0);
-  SDValue getTargetConstantPool(Constant *C, EVT VT,
+  SDValue getTargetConstantPool(const Constant *C, EVT VT,
                                 unsigned Align = 0, int Offset = 0,
                                 unsigned char TargetFlags = 0) {
     return getConstantPool(C, VT, Align, Offset, true, TargetFlags);
@@ -377,7 +390,7 @@ public:
   SDValue getValueType(EVT);
   SDValue getRegister(unsigned Reg, EVT VT);
   SDValue getEHLabel(DebugLoc dl, SDValue Root, MCSymbol *Label);
-  SDValue getBlockAddress(BlockAddress *BA, EVT VT,
+  SDValue getBlockAddress(const BlockAddress *BA, EVT VT,
                           bool isTarget = false, unsigned char TargetFlags = 0);
 
   SDValue getCopyToReg(SDValue Chain, DebugLoc dl, unsigned Reg, SDValue N) {
@@ -650,6 +663,9 @@ public:
   /// getSrcValue - Construct a node to track a Value* through the backend.
   SDValue getSrcValue(const Value *v);
 
+  /// getMDNode - Return an MDNodeSDNode which holds an MDNode.
+  SDValue getMDNode(const MDNode *MD);
+  
   /// getShiftAmountOperand - Return the specified value casted to
   /// the target's desired shift amount type.
   SDValue getShiftAmountOperand(SDValue Op);
@@ -764,7 +780,7 @@ public:
   ///
   SDDbgValue *getDbgValue(MDNode *MDPtr, SDNode *N, unsigned R, uint64_t Off,
                           DebugLoc DL, unsigned O);
-  SDDbgValue *getDbgValue(MDNode *MDPtr, Value *C, uint64_t Off,
+  SDDbgValue *getDbgValue(MDNode *MDPtr, const Value *C, uint64_t Off,
                           DebugLoc DL, unsigned O);
   SDDbgValue *getDbgValue(MDNode *MDPtr, unsigned FI, uint64_t Off,
                           DebugLoc DL, unsigned O);
@@ -873,7 +889,7 @@ public:
 
   /// AddDbgValue - Add a dbg_value SDNode. If SD is non-null that means the
   /// value is produced by SD.
-  void AddDbgValue(SDDbgValue *DB, SDNode *SD = 0);
+  void AddDbgValue(SDDbgValue *DB, SDNode *SD, bool isParameter);
 
   /// GetDbgValues - Get the debug values which reference the given SDNode.
   SmallVector<SDDbgValue*,2> &GetDbgValues(const SDNode* SD) {
@@ -886,6 +902,12 @@ public:
 
   SDDbgInfo::DbgIterator DbgBegin() { return DbgInfo->DbgBegin(); }
   SDDbgInfo::DbgIterator DbgEnd()   { return DbgInfo->DbgEnd(); }
+  SDDbgInfo::DbgIterator ByvalParmDbgBegin() { 
+    return DbgInfo->ByvalParmDbgBegin(); 
+  }
+  SDDbgInfo::DbgIterator ByvalParmDbgEnd()   { 
+    return DbgInfo->ByvalParmDbgEnd(); 
+  }
 
   void dump() const;
 
diff --git a/include/llvm/CodeGen/SelectionDAGISel.h b/include/llvm/CodeGen/SelectionDAGISel.h
index 3c000f032c97..38175808ab41 100644
--- a/include/llvm/CodeGen/SelectionDAGISel.h
+++ b/include/llvm/CodeGen/SelectionDAGISel.h
@@ -17,7 +17,6 @@
 
 #include "llvm/BasicBlock.h"
 #include "llvm/Pass.h"
-#include "llvm/Constant.h"
 #include "llvm/CodeGen/SelectionDAG.h"
 #include "llvm/CodeGen/MachineFunctionPass.h"
 
@@ -41,31 +40,28 @@ namespace llvm {
 class SelectionDAGISel : public MachineFunctionPass {
 public:
   const TargetMachine &TM;
-  TargetLowering &TLI;
+  const TargetLowering &TLI;
   FunctionLoweringInfo *FuncInfo;
   MachineFunction *MF;
   MachineRegisterInfo *RegInfo;
   SelectionDAG *CurDAG;
   SelectionDAGBuilder *SDB;
-  MachineBasicBlock *BB;
   AliasAnalysis *AA;
   GCFunctionInfo *GFI;
   CodeGenOpt::Level OptLevel;
   static char ID;
 
-  explicit SelectionDAGISel(TargetMachine &tm,
+  explicit SelectionDAGISel(const TargetMachine &tm,
                             CodeGenOpt::Level OL = CodeGenOpt::Default);
   virtual ~SelectionDAGISel();
   
-  TargetLowering &getTargetLowering() { return TLI; }
+  const TargetLowering &getTargetLowering() { return TLI; }
 
   virtual void getAnalysisUsage(AnalysisUsage &AU) const;
 
   virtual bool runOnMachineFunction(MachineFunction &MF);
 
-  unsigned MakeReg(EVT VT);
-
-  virtual void EmitFunctionEntryCode(Function &Fn, MachineFunction &MF) {}
+  virtual void EmitFunctionEntryCode() {}
   
   /// PreprocessISelDAG - This hook allows targets to hack on the graph before
   /// instruction selection starts.
@@ -95,8 +91,11 @@ public:
 
   /// IsLegalToFold - Returns true if the specific operand node N of
   /// U can be folded during instruction selection that starts at Root.
-  bool IsLegalToFold(SDValue N, SDNode *U, SDNode *Root,
-                     bool IgnoreChains = false) const;
+  /// FIXME: This is a static member function because the PIC16 target,
+  /// which uses it during lowering.
+  static bool IsLegalToFold(SDValue N, SDNode *U, SDNode *Root,
+                            CodeGenOpt::Level OptLevel,
+                            bool IgnoreChains = false);
 
   /// CreateTargetHazardRecognizer - Return a newly allocated hazard recognizer
   /// to use for this target when scheduling the DAG.
@@ -281,24 +280,21 @@ private:
   SDNode *MorphNode(SDNode *Node, unsigned TargetOpc, SDVTList VTs,
                     const SDValue *Ops, unsigned NumOps, unsigned EmitNodeInfo);
   
-  void SelectAllBasicBlocks(Function &Fn, MachineFunction &MF,
-                            const TargetInstrInfo &TII);
-  void FinishBasicBlock();
+  void PrepareEHLandingPad(MachineBasicBlock *BB);
+  void SelectAllBasicBlocks(const Function &Fn);
+  void FinishBasicBlock(MachineBasicBlock *BB);
 
-  void SelectBasicBlock(BasicBlock *LLVMBB,
-                        BasicBlock::iterator Begin,
-                        BasicBlock::iterator End,
-                        bool &HadTailCall);
-  void CodeGenAndEmitDAG();
-  void LowerArguments(BasicBlock *BB);
+  MachineBasicBlock *SelectBasicBlock(MachineBasicBlock *BB,
+                                      const BasicBlock *LLVMBB,
+                                      BasicBlock::const_iterator Begin,
+                                      BasicBlock::const_iterator End,
+                                      bool &HadTailCall);
+  MachineBasicBlock *CodeGenAndEmitDAG(MachineBasicBlock *BB);
+  void LowerArguments(const BasicBlock *BB);
   
   void ShrinkDemandedOps();
   void ComputeLiveOutVRegInfo();
 
-  void HandlePHINodesInSuccessorBlocks(BasicBlock *LLVMBB);
-
-  bool HandlePHINodesInSuccessorBlocksFast(BasicBlock *LLVMBB, FastISel *F);
-
   /// Create the scheduler. If a specific scheduler was specified
   /// via the SchedulerRegistry, use it, otherwise select the
   /// one preferred by the target.
diff --git a/include/llvm/CodeGen/SelectionDAGNodes.h b/include/llvm/CodeGen/SelectionDAGNodes.h
index 782d354bdfe7..fd529b62ce63 100644
--- a/include/llvm/CodeGen/SelectionDAGNodes.h
+++ b/include/llvm/CodeGen/SelectionDAGNodes.h
@@ -25,6 +25,7 @@
 #include "llvm/ADT/ilist_node.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/STLExtras.h"
+#include "llvm/CodeGen/ISDOpcodes.h"
 #include "llvm/CodeGen/ValueTypes.h"
 #include "llvm/CodeGen/MachineMemOperand.h"
 #include "llvm/Support/MathExtras.h"
@@ -56,568 +57,7 @@ struct SDVTList {
   unsigned int NumVTs;
 };
 
-/// ISD namespace - This namespace contains an enum which represents all of the
-/// SelectionDAG node types and value types.
-///
 namespace ISD {
-
-  //===--------------------------------------------------------------------===//
-  /// ISD::NodeType enum - This enum defines the target-independent operators
-  /// for a SelectionDAG.
-  ///
-  /// Targets may also define target-dependent operator codes for SDNodes. For
-  /// example, on x86, these are the enum values in the X86ISD namespace.
-  /// Targets should aim to use target-independent operators to model their
-  /// instruction sets as much as possible, and only use target-dependent
-  /// operators when they have special requirements.
-  ///
-  /// Finally, during and after selection proper, SNodes may use special
-  /// operator codes that correspond directly with MachineInstr opcodes. These
-  /// are used to represent selected instructions. See the isMachineOpcode()
-  /// and getMachineOpcode() member functions of SDNode.
-  ///
-  enum NodeType {
-    // DELETED_NODE - This is an illegal value that is used to catch
-    // errors.  This opcode is not a legal opcode for any node.
-    DELETED_NODE,
-
-    // EntryToken - This is the marker used to indicate the start of the region.
-    EntryToken,
-
-    // TokenFactor - This node takes multiple tokens as input and produces a
-    // single token result.  This is used to represent the fact that the operand
-    // operators are independent of each other.
-    TokenFactor,
-
-    // AssertSext, AssertZext - These nodes record if a register contains a
-    // value that has already been zero or sign extended from a narrower type.
-    // These nodes take two operands.  The first is the node that has already
-    // been extended, and the second is a value type node indicating the width
-    // of the extension
-    AssertSext, AssertZext,
-
-    // Various leaf nodes.
-    BasicBlock, VALUETYPE, CONDCODE, Register,
-    Constant, ConstantFP,
-    GlobalAddress, GlobalTLSAddress, FrameIndex,
-    JumpTable, ConstantPool, ExternalSymbol, BlockAddress,
-
-    // The address of the GOT
-    GLOBAL_OFFSET_TABLE,
-
-    // FRAMEADDR, RETURNADDR - These nodes represent llvm.frameaddress and
-    // llvm.returnaddress on the DAG.  These nodes take one operand, the index
-    // of the frame or return address to return.  An index of zero corresponds
-    // to the current function's frame or return address, an index of one to the
-    // parent's frame or return address, and so on.
-    FRAMEADDR, RETURNADDR,
-
-    // FRAME_TO_ARGS_OFFSET - This node represents offset from frame pointer to
-    // first (possible) on-stack argument. This is needed for correct stack
-    // adjustment during unwind.
-    FRAME_TO_ARGS_OFFSET,
-
-    // RESULT, OUTCHAIN = EXCEPTIONADDR(INCHAIN) - This node represents the
-    // address of the exception block on entry to an landing pad block.
-    EXCEPTIONADDR,
-
-    // RESULT, OUTCHAIN = LSDAADDR(INCHAIN) - This node represents the
-    // address of the Language Specific Data Area for the enclosing function.
-    LSDAADDR,
-
-    // RESULT, OUTCHAIN = EHSELECTION(INCHAIN, EXCEPTION) - This node represents
-    // the selection index of the exception thrown.
-    EHSELECTION,
-
-    // OUTCHAIN = EH_RETURN(INCHAIN, OFFSET, HANDLER) - This node represents
-    // 'eh_return' gcc dwarf builtin, which is used to return from
-    // exception. The general meaning is: adjust stack by OFFSET and pass
-    // execution to HANDLER. Many platform-related details also :)
-    EH_RETURN,
-
-    // TargetConstant* - Like Constant*, but the DAG does not do any folding or
-    // simplification of the constant.
-    TargetConstant,
-    TargetConstantFP,
-
-    // TargetGlobalAddress - Like GlobalAddress, but the DAG does no folding or
-    // anything else with this node, and this is valid in the target-specific
-    // dag, turning into a GlobalAddress operand.
-    TargetGlobalAddress,
-    TargetGlobalTLSAddress,
-    TargetFrameIndex,
-    TargetJumpTable,
-    TargetConstantPool,
-    TargetExternalSymbol,
-    TargetBlockAddress,
-
-    /// RESULT = INTRINSIC_WO_CHAIN(INTRINSICID, arg1, arg2, ...)
-    /// This node represents a target intrinsic function with no side effects.
-    /// The first operand is the ID number of the intrinsic from the
-    /// llvm::Intrinsic namespace.  The operands to the intrinsic follow.  The
-    /// node has returns the result of the intrinsic.
-    INTRINSIC_WO_CHAIN,
-
-    /// RESULT,OUTCHAIN = INTRINSIC_W_CHAIN(INCHAIN, INTRINSICID, arg1, ...)
-    /// This node represents a target intrinsic function with side effects that
-    /// returns a result.  The first operand is a chain pointer.  The second is
-    /// the ID number of the intrinsic from the llvm::Intrinsic namespace.  The
-    /// operands to the intrinsic follow.  The node has two results, the result
-    /// of the intrinsic and an output chain.
-    INTRINSIC_W_CHAIN,
-
-    /// OUTCHAIN = INTRINSIC_VOID(INCHAIN, INTRINSICID, arg1, arg2, ...)
-    /// This node represents a target intrinsic function with side effects that
-    /// does not return a result.  The first operand is a chain pointer.  The
-    /// second is the ID number of the intrinsic from the llvm::Intrinsic
-    /// namespace.  The operands to the intrinsic follow.
-    INTRINSIC_VOID,
-
-    // CopyToReg - This node has three operands: a chain, a register number to
-    // set to this value, and a value.
-    CopyToReg,
-
-    // CopyFromReg - This node indicates that the input value is a virtual or
-    // physical register that is defined outside of the scope of this
-    // SelectionDAG.  The register is available from the RegisterSDNode object.
-    CopyFromReg,
-
-    // UNDEF - An undefined node
-    UNDEF,
-
-    // EXTRACT_ELEMENT - This is used to get the lower or upper (determined by
-    // a Constant, which is required to be operand #1) half of the integer or
-    // float value specified as operand #0.  This is only for use before
-    // legalization, for values that will be broken into multiple registers.
-    EXTRACT_ELEMENT,
-
-    // BUILD_PAIR - This is the opposite of EXTRACT_ELEMENT in some ways.  Given
-    // two values of the same integer value type, this produces a value twice as
-    // big.  Like EXTRACT_ELEMENT, this can only be used before legalization.
-    BUILD_PAIR,
-
-    // MERGE_VALUES - This node takes multiple discrete operands and returns
-    // them all as its individual results.  This nodes has exactly the same
-    // number of inputs and outputs. This node is useful for some pieces of the
-    // code generator that want to think about a single node with multiple
-    // results, not multiple nodes.
-    MERGE_VALUES,
-
-    // Simple integer binary arithmetic operators.
-    ADD, SUB, MUL, SDIV, UDIV, SREM, UREM,
-
-    // SMUL_LOHI/UMUL_LOHI - Multiply two integers of type iN, producing
-    // a signed/unsigned value of type i[2*N], and return the full value as
-    // two results, each of type iN.
-    SMUL_LOHI, UMUL_LOHI,
-
-    // SDIVREM/UDIVREM - Divide two integers and produce both a quotient and
-    // remainder result.
-    SDIVREM, UDIVREM,
-
-    // CARRY_FALSE - This node is used when folding other nodes,
-    // like ADDC/SUBC, which indicate the carry result is always false.
-    CARRY_FALSE,
-
-    // Carry-setting nodes for multiple precision addition and subtraction.
-    // These nodes take two operands of the same value type, and produce two
-    // results.  The first result is the normal add or sub result, the second
-    // result is the carry flag result.
-    ADDC, SUBC,
-
-    // Carry-using nodes for multiple precision addition and subtraction.  These
-    // nodes take three operands: The first two are the normal lhs and rhs to
-    // the add or sub, and the third is the input carry flag.  These nodes
-    // produce two results; the normal result of the add or sub, and the output
-    // carry flag.  These nodes both read and write a carry flag to allow them
-    // to them to be chained together for add and sub of arbitrarily large
-    // values.
-    ADDE, SUBE,
-
-    // RESULT, BOOL = [SU]ADDO(LHS, RHS) - Overflow-aware nodes for addition.
-    // These nodes take two operands: the normal LHS and RHS to the add. They
-    // produce two results: the normal result of the add, and a boolean that
-    // indicates if an overflow occured (*not* a flag, because it may be stored
-    // to memory, etc.).  If the type of the boolean is not i1 then the high
-    // bits conform to getBooleanContents.
-    // These nodes are generated from the llvm.[su]add.with.overflow intrinsics.
-    SADDO, UADDO,
-
-    // Same for subtraction
-    SSUBO, USUBO,
-
-    // Same for multiplication
-    SMULO, UMULO,
-
-    // Simple binary floating point operators.
-    FADD, FSUB, FMUL, FDIV, FREM,
-
-    // FCOPYSIGN(X, Y) - Return the value of X with the sign of Y.  NOTE: This
-    // DAG node does not require that X and Y have the same type, just that they
-    // are both floating point.  X and the result must have the same type.
-    // FCOPYSIGN(f32, f64) is allowed.
-    FCOPYSIGN,
-
-    // INT = FGETSIGN(FP) - Return the sign bit of the specified floating point
-    // value as an integer 0/1 value.
-    FGETSIGN,
-
-    /// BUILD_VECTOR(ELT0, ELT1, ELT2, ELT3,...) - Return a vector with the
-    /// specified, possibly variable, elements.  The number of elements is
-    /// required to be a power of two.  The types of the operands must all be
-    /// the same and must match the vector element type, except that integer
-    /// types are allowed to be larger than the element type, in which case
-    /// the operands are implicitly truncated.
-    BUILD_VECTOR,
-
-    /// INSERT_VECTOR_ELT(VECTOR, VAL, IDX) - Returns VECTOR with the element
-    /// at IDX replaced with VAL.  If the type of VAL is larger than the vector
-    /// element type then VAL is truncated before replacement.
-    INSERT_VECTOR_ELT,
-
-    /// EXTRACT_VECTOR_ELT(VECTOR, IDX) - Returns a single element from VECTOR
-    /// identified by the (potentially variable) element number IDX.  If the
-    /// return type is an integer type larger than the element type of the
-    /// vector, the result is extended to the width of the return type.
-    EXTRACT_VECTOR_ELT,
-
-    /// CONCAT_VECTORS(VECTOR0, VECTOR1, ...) - Given a number of values of
-    /// vector type with the same length and element type, this produces a
-    /// concatenated vector result value, with length equal to the sum of the
-    /// lengths of the input vectors.
-    CONCAT_VECTORS,
-
-    /// EXTRACT_SUBVECTOR(VECTOR, IDX) - Returns a subvector from VECTOR (an
-    /// vector value) starting with the (potentially variable) element number
-    /// IDX, which must be a multiple of the result vector length.
-    EXTRACT_SUBVECTOR,
-
-    /// VECTOR_SHUFFLE(VEC1, VEC2) - Returns a vector, of the same type as 
-    /// VEC1/VEC2.  A VECTOR_SHUFFLE node also contains an array of constant int
-    /// values that indicate which value (or undef) each result element will
-    /// get.  These constant ints are accessible through the 
-    /// ShuffleVectorSDNode class.  This is quite similar to the Altivec 
-    /// 'vperm' instruction, except that the indices must be constants and are
-    /// in terms of the element size of VEC1/VEC2, not in terms of bytes.
-    VECTOR_SHUFFLE,
-
-    /// SCALAR_TO_VECTOR(VAL) - This represents the operation of loading a
-    /// scalar value into element 0 of the resultant vector type.  The top
-    /// elements 1 to N-1 of the N-element vector are undefined.  The type
-    /// of the operand must match the vector element type, except when they
-    /// are integer types.  In this case the operand is allowed to be wider
-    /// than the vector element type, and is implicitly truncated to it.
-    SCALAR_TO_VECTOR,
-
-    // MULHU/MULHS - Multiply high - Multiply two integers of type iN, producing
-    // an unsigned/signed value of type i[2*N], then return the top part.
-    MULHU, MULHS,
-
-    // Bitwise operators - logical and, logical or, logical xor, shift left,
-    // shift right algebraic (shift in sign bits), shift right logical (shift in
-    // zeroes), rotate left, rotate right, and byteswap.
-    AND, OR, XOR, SHL, SRA, SRL, ROTL, ROTR, BSWAP,
-
-    // Counting operators
-    CTTZ, CTLZ, CTPOP,
-
-    // Select(COND, TRUEVAL, FALSEVAL).  If the type of the boolean COND is not
-    // i1 then the high bits must conform to getBooleanContents.
-    SELECT,
-
-    // Select with condition operator - This selects between a true value and
-    // a false value (ops #2 and #3) based on the boolean result of comparing
-    // the lhs and rhs (ops #0 and #1) of a conditional expression with the
-    // condition code in op #4, a CondCodeSDNode.
-    SELECT_CC,
-
-    // SetCC operator - This evaluates to a true value iff the condition is
-    // true.  If the result value type is not i1 then the high bits conform
-    // to getBooleanContents.  The operands to this are the left and right
-    // operands to compare (ops #0, and #1) and the condition code to compare
-    // them with (op #2) as a CondCodeSDNode.
-    SETCC,
-
-    // RESULT = VSETCC(LHS, RHS, COND) operator - This evaluates to a vector of
-    // integer elements with all bits of the result elements set to true if the
-    // comparison is true or all cleared if the comparison is false.  The
-    // operands to this are the left and right operands to compare (LHS/RHS) and
-    // the condition code to compare them with (COND) as a CondCodeSDNode.
-    VSETCC,
-
-    // SHL_PARTS/SRA_PARTS/SRL_PARTS - These operators are used for expanded
-    // integer shift operations, just like ADD/SUB_PARTS.  The operation
-    // ordering is:
-    //       [Lo,Hi] = op [LoLHS,HiLHS], Amt
-    SHL_PARTS, SRA_PARTS, SRL_PARTS,
-
-    // Conversion operators.  These are all single input single output
-    // operations.  For all of these, the result type must be strictly
-    // wider or narrower (depending on the operation) than the source
-    // type.
-
-    // SIGN_EXTEND - Used for integer types, replicating the sign bit
-    // into new bits.
-    SIGN_EXTEND,
-
-    // ZERO_EXTEND - Used for integer types, zeroing the new bits.
-    ZERO_EXTEND,
-
-    // ANY_EXTEND - Used for integer types.  The high bits are undefined.
-    ANY_EXTEND,
-
-    // TRUNCATE - Completely drop the high bits.
-    TRUNCATE,
-
-    // [SU]INT_TO_FP - These operators convert integers (whose interpreted sign
-    // depends on the first letter) to floating point.
-    SINT_TO_FP,
-    UINT_TO_FP,
-
-    // SIGN_EXTEND_INREG - This operator atomically performs a SHL/SRA pair to
-    // sign extend a small value in a large integer register (e.g. sign
-    // extending the low 8 bits of a 32-bit register to fill the top 24 bits
-    // with the 7th bit).  The size of the smaller type is indicated by the 1th
-    // operand, a ValueType node.
-    SIGN_EXTEND_INREG,
-
-    /// FP_TO_[US]INT - Convert a floating point value to a signed or unsigned
-    /// integer.
-    FP_TO_SINT,
-    FP_TO_UINT,
-
-    /// X = FP_ROUND(Y, TRUNC) - Rounding 'Y' from a larger floating point type
-    /// down to the precision of the destination VT.  TRUNC is a flag, which is
-    /// always an integer that is zero or one.  If TRUNC is 0, this is a
-    /// normal rounding, if it is 1, this FP_ROUND is known to not change the
-    /// value of Y.
-    ///
-    /// The TRUNC = 1 case is used in cases where we know that the value will
-    /// not be modified by the node, because Y is not using any of the extra
-    /// precision of source type.  This allows certain transformations like
-    /// FP_EXTEND(FP_ROUND(X,1)) -> X which are not safe for
-    /// FP_EXTEND(FP_ROUND(X,0)) because the extra bits aren't removed.
-    FP_ROUND,
-
-    // FLT_ROUNDS_ - Returns current rounding mode:
-    // -1 Undefined
-    //  0 Round to 0
-    //  1 Round to nearest
-    //  2 Round to +inf
-    //  3 Round to -inf
-    FLT_ROUNDS_,
-
-    /// X = FP_ROUND_INREG(Y, VT) - This operator takes an FP register, and
-    /// rounds it to a floating point value.  It then promotes it and returns it
-    /// in a register of the same size.  This operation effectively just
-    /// discards excess precision.  The type to round down to is specified by
-    /// the VT operand, a VTSDNode.
-    FP_ROUND_INREG,
-
-    /// X = FP_EXTEND(Y) - Extend a smaller FP type into a larger FP type.
-    FP_EXTEND,
-
-    // BIT_CONVERT - This operator converts between integer, vector and FP
-    // values, as if the value was stored to memory with one type and loaded
-    // from the same address with the other type (or equivalently for vector
-    // format conversions, etc).  The source and result are required to have
-    // the same bit size (e.g.  f32 <-> i32).  This can also be used for
-    // int-to-int or fp-to-fp conversions, but that is a noop, deleted by
-    // getNode().
-    BIT_CONVERT,
-
-    // CONVERT_RNDSAT - This operator is used to support various conversions
-    // between various types (float, signed, unsigned and vectors of those
-    // types) with rounding and saturation. NOTE: Avoid using this operator as
-    // most target don't support it and the operator might be removed in the
-    // future. It takes the following arguments:
-    //   0) value
-    //   1) dest type (type to convert to)
-    //   2) src type (type to convert from)
-    //   3) rounding imm
-    //   4) saturation imm
-    //   5) ISD::CvtCode indicating the type of conversion to do
-    CONVERT_RNDSAT,
-
-    // FP16_TO_FP32, FP32_TO_FP16 - These operators are used to perform
-    // promotions and truncation for half-precision (16 bit) floating
-    // numbers. We need special nodes since FP16 is a storage-only type with
-    // special semantics of operations.
-    FP16_TO_FP32, FP32_TO_FP16,
-
-    // FNEG, FABS, FSQRT, FSIN, FCOS, FPOWI, FPOW,
-    // FLOG, FLOG2, FLOG10, FEXP, FEXP2,
-    // FCEIL, FTRUNC, FRINT, FNEARBYINT, FFLOOR - Perform various unary floating
-    // point operations. These are inspired by libm.
-    FNEG, FABS, FSQRT, FSIN, FCOS, FPOWI, FPOW,
-    FLOG, FLOG2, FLOG10, FEXP, FEXP2,
-    FCEIL, FTRUNC, FRINT, FNEARBYINT, FFLOOR,
-
-    // LOAD and STORE have token chains as their first operand, then the same
-    // operands as an LLVM load/store instruction, then an offset node that
-    // is added / subtracted from the base pointer to form the address (for
-    // indexed memory ops).
-    LOAD, STORE,
-
-    // DYNAMIC_STACKALLOC - Allocate some number of bytes on the stack aligned
-    // to a specified boundary.  This node always has two return values: a new
-    // stack pointer value and a chain. The first operand is the token chain,
-    // the second is the number of bytes to allocate, and the third is the
-    // alignment boundary.  The size is guaranteed to be a multiple of the stack
-    // alignment, and the alignment is guaranteed to be bigger than the stack
-    // alignment (if required) or 0 to get standard stack alignment.
-    DYNAMIC_STACKALLOC,
-
-    // Control flow instructions.  These all have token chains.
-
-    // BR - Unconditional branch.  The first operand is the chain
-    // operand, the second is the MBB to branch to.
-    BR,
-
-    // BRIND - Indirect branch.  The first operand is the chain, the second
-    // is the value to branch to, which must be of the same type as the target's
-    // pointer type.
-    BRIND,
-
-    // BR_JT - Jumptable branch. The first operand is the chain, the second
-    // is the jumptable index, the last one is the jumptable entry index.
-    BR_JT,
-
-    // BRCOND - Conditional branch.  The first operand is the chain, the
-    // second is the condition, the third is the block to branch to if the
-    // condition is true.  If the type of the condition is not i1, then the
-    // high bits must conform to getBooleanContents.
-    BRCOND,
-
-    // BR_CC - Conditional branch.  The behavior is like that of SELECT_CC, in
-    // that the condition is represented as condition code, and two nodes to
-    // compare, rather than as a combined SetCC node.  The operands in order are
-    // chain, cc, lhs, rhs, block to branch to if condition is true.
-    BR_CC,
-
-    // INLINEASM - Represents an inline asm block.  This node always has two
-    // return values: a chain and a flag result.  The inputs are as follows:
-    //   Operand #0   : Input chain.
-    //   Operand #1   : a ExternalSymbolSDNode with a pointer to the asm string.
-    //   Operand #2n+2: A RegisterNode.
-    //   Operand #2n+3: A TargetConstant, indicating if the reg is a use/def
-    //   Operand #last: Optional, an incoming flag.
-    INLINEASM,
-
-    // EH_LABEL - Represents a label in mid basic block used to track
-    // locations needed for debug and exception handling tables.  These nodes
-    // take a chain as input and return a chain.
-    EH_LABEL,
-
-    // STACKSAVE - STACKSAVE has one operand, an input chain.  It produces a
-    // value, the same type as the pointer type for the system, and an output
-    // chain.
-    STACKSAVE,
-
-    // STACKRESTORE has two operands, an input chain and a pointer to restore to
-    // it returns an output chain.
-    STACKRESTORE,
-
-    // CALLSEQ_START/CALLSEQ_END - These operators mark the beginning and end of
-    // a call sequence, and carry arbitrary information that target might want
-    // to know.  The first operand is a chain, the rest are specified by the
-    // target and not touched by the DAG optimizers.
-    // CALLSEQ_START..CALLSEQ_END pairs may not be nested.
-    CALLSEQ_START,  // Beginning of a call sequence
-    CALLSEQ_END,    // End of a call sequence
-
-    // VAARG - VAARG has three operands: an input chain, a pointer, and a
-    // SRCVALUE.  It returns a pair of values: the vaarg value and a new chain.
-    VAARG,
-
-    // VACOPY - VACOPY has five operands: an input chain, a destination pointer,
-    // a source pointer, a SRCVALUE for the destination, and a SRCVALUE for the
-    // source.
-    VACOPY,
-
-    // VAEND, VASTART - VAEND and VASTART have three operands: an input chain, a
-    // pointer, and a SRCVALUE.
-    VAEND, VASTART,
-
-    // SRCVALUE - This is a node type that holds a Value* that is used to
-    // make reference to a value in the LLVM IR.
-    SRCVALUE,
-
-    // PCMARKER - This corresponds to the pcmarker intrinsic.
-    PCMARKER,
-
-    // READCYCLECOUNTER - This corresponds to the readcyclecounter intrinsic.
-    // The only operand is a chain and a value and a chain are produced.  The
-    // value is the contents of the architecture specific cycle counter like
-    // register (or other high accuracy low latency clock source)
-    READCYCLECOUNTER,
-
-    // HANDLENODE node - Used as a handle for various purposes.
-    HANDLENODE,
-
-    // TRAMPOLINE - This corresponds to the init_trampoline intrinsic.
-    // It takes as input a token chain, the pointer to the trampoline,
-    // the pointer to the nested function, the pointer to pass for the
-    // 'nest' parameter, a SRCVALUE for the trampoline and another for
-    // the nested function (allowing targets to access the original
-    // Function*).  It produces the result of the intrinsic and a token
-    // chain as output.
-    TRAMPOLINE,
-
-    // TRAP - Trapping instruction
-    TRAP,
-
-    // PREFETCH - This corresponds to a prefetch intrinsic. It takes chains are
-    // their first operand. The other operands are the address to prefetch,
-    // read / write specifier, and locality specifier.
-    PREFETCH,
-
-    // OUTCHAIN = MEMBARRIER(INCHAIN, load-load, load-store, store-load,
-    //                       store-store, device)
-    // This corresponds to the memory.barrier intrinsic.
-    // it takes an input chain, 4 operands to specify the type of barrier, an
-    // operand specifying if the barrier applies to device and uncached memory
-    // and produces an output chain.
-    MEMBARRIER,
-
-    // Val, OUTCHAIN = ATOMIC_CMP_SWAP(INCHAIN, ptr, cmp, swap)
-    // this corresponds to the atomic.lcs intrinsic.
-    // cmp is compared to *ptr, and if equal, swap is stored in *ptr.
-    // the return is always the original value in *ptr
-    ATOMIC_CMP_SWAP,
-
-    // Val, OUTCHAIN = ATOMIC_SWAP(INCHAIN, ptr, amt)
-    // this corresponds to the atomic.swap intrinsic.
-    // amt is stored to *ptr atomically.
-    // the return is always the original value in *ptr
-    ATOMIC_SWAP,
-
-    // Val, OUTCHAIN = ATOMIC_LOAD_[OpName](INCHAIN, ptr, amt)
-    // this corresponds to the atomic.load.[OpName] intrinsic.
-    // op(*ptr, amt) is stored to *ptr atomically.
-    // the return is always the original value in *ptr
-    ATOMIC_LOAD_ADD,
-    ATOMIC_LOAD_SUB,
-    ATOMIC_LOAD_AND,
-    ATOMIC_LOAD_OR,
-    ATOMIC_LOAD_XOR,
-    ATOMIC_LOAD_NAND,
-    ATOMIC_LOAD_MIN,
-    ATOMIC_LOAD_MAX,
-    ATOMIC_LOAD_UMIN,
-    ATOMIC_LOAD_UMAX,
-
-    /// BUILTIN_OP_END - This must be the last enum value in this list.
-    /// The target-specific pre-isel opcode values start here.
-    BUILTIN_OP_END
-  };
-
-  /// FIRST_TARGET_MEMORY_OPCODE - Target-specific pre-isel operations
-  /// which do not reference a specific memory location should be less than
-  /// this value. Those that do must not be less than this value, and can
-  /// be used with SelectionDAG::getMemIntrinsicNode.
-  static const int FIRST_TARGET_MEMORY_OPCODE = BUILTIN_OP_END+100;
-
   /// Node predicates
 
   /// isBuildVectorAllOnes - Return true if the specified node is a
@@ -632,174 +72,7 @@ namespace ISD {
   /// ISD::SCALAR_TO_VECTOR node or a BUILD_VECTOR node where only the low
   /// element is not an undef.
   bool isScalarToVector(const SDNode *N);
-
-  //===--------------------------------------------------------------------===//
-  /// MemIndexedMode enum - This enum defines the load / store indexed
-  /// addressing modes.
-  ///
-  /// UNINDEXED    "Normal" load / store. The effective address is already
-  ///              computed and is available in the base pointer. The offset
-  ///              operand is always undefined. In addition to producing a
-  ///              chain, an unindexed load produces one value (result of the
-  ///              load); an unindexed store does not produce a value.
-  ///
-  /// PRE_INC      Similar to the unindexed mode where the effective address is
-  /// PRE_DEC      the value of the base pointer add / subtract the offset.
-  ///              It considers the computation as being folded into the load /
-  ///              store operation (i.e. the load / store does the address
-  ///              computation as well as performing the memory transaction).
-  ///              The base operand is always undefined. In addition to
-  ///              producing a chain, pre-indexed load produces two values
-  ///              (result of the load and the result of the address
-  ///              computation); a pre-indexed store produces one value (result
-  ///              of the address computation).
-  ///
-  /// POST_INC     The effective address is the value of the base pointer. The
-  /// POST_DEC     value of the offset operand is then added to / subtracted
-  ///              from the base after memory transaction. In addition to
-  ///              producing a chain, post-indexed load produces two values
-  ///              (the result of the load and the result of the base +/- offset
-  ///              computation); a post-indexed store produces one value (the
-  ///              the result of the base +/- offset computation).
-  ///
-  enum MemIndexedMode {
-    UNINDEXED = 0,
-    PRE_INC,
-    PRE_DEC,
-    POST_INC,
-    POST_DEC,
-    LAST_INDEXED_MODE
-  };
-
-  //===--------------------------------------------------------------------===//
-  /// LoadExtType enum - This enum defines the three variants of LOADEXT
-  /// (load with extension).
-  ///
-  /// SEXTLOAD loads the integer operand and sign extends it to a larger
-  ///          integer result type.
-  /// ZEXTLOAD loads the integer operand and zero extends it to a larger
-  ///          integer result type.
-  /// EXTLOAD  is used for three things: floating point extending loads,
-  ///          integer extending loads [the top bits are undefined], and vector
-  ///          extending loads [load into low elt].
-  ///
-  enum LoadExtType {
-    NON_EXTLOAD = 0,
-    EXTLOAD,
-    SEXTLOAD,
-    ZEXTLOAD,
-    LAST_LOADEXT_TYPE
-  };
-
-  //===--------------------------------------------------------------------===//
-  /// ISD::CondCode enum - These are ordered carefully to make the bitfields
-  /// below work out, when considering SETFALSE (something that never exists
-  /// dynamically) as 0.  "U" -> Unsigned (for integer operands) or Unordered
-  /// (for floating point), "L" -> Less than, "G" -> Greater than, "E" -> Equal
-  /// to.  If the "N" column is 1, the result of the comparison is undefined if
-  /// the input is a NAN.
-  ///
-  /// All of these (except for the 'always folded ops') should be handled for
-  /// floating point.  For integer, only the SETEQ,SETNE,SETLT,SETLE,SETGT,
-  /// SETGE,SETULT,SETULE,SETUGT, and SETUGE opcodes are used.
-  ///
-  /// Note that these are laid out in a specific order to allow bit-twiddling
-  /// to transform conditions.
-  enum CondCode {
-    // Opcode          N U L G E       Intuitive operation
-    SETFALSE,      //    0 0 0 0       Always false (always folded)
-    SETOEQ,        //    0 0 0 1       True if ordered and equal
-    SETOGT,        //    0 0 1 0       True if ordered and greater than
-    SETOGE,        //    0 0 1 1       True if ordered and greater than or equal
-    SETOLT,        //    0 1 0 0       True if ordered and less than
-    SETOLE,        //    0 1 0 1       True if ordered and less than or equal
-    SETONE,        //    0 1 1 0       True if ordered and operands are unequal
-    SETO,          //    0 1 1 1       True if ordered (no nans)
-    SETUO,         //    1 0 0 0       True if unordered: isnan(X) | isnan(Y)
-    SETUEQ,        //    1 0 0 1       True if unordered or equal
-    SETUGT,        //    1 0 1 0       True if unordered or greater than
-    SETUGE,        //    1 0 1 1       True if unordered, greater than, or equal
-    SETULT,        //    1 1 0 0       True if unordered or less than
-    SETULE,        //    1 1 0 1       True if unordered, less than, or equal
-    SETUNE,        //    1 1 1 0       True if unordered or not equal
-    SETTRUE,       //    1 1 1 1       Always true (always folded)
-    // Don't care operations: undefined if the input is a nan.
-    SETFALSE2,     //  1 X 0 0 0       Always false (always folded)
-    SETEQ,         //  1 X 0 0 1       True if equal
-    SETGT,         //  1 X 0 1 0       True if greater than
-    SETGE,         //  1 X 0 1 1       True if greater than or equal
-    SETLT,         //  1 X 1 0 0       True if less than
-    SETLE,         //  1 X 1 0 1       True if less than or equal
-    SETNE,         //  1 X 1 1 0       True if not equal
-    SETTRUE2,      //  1 X 1 1 1       Always true (always folded)
-
-    SETCC_INVALID       // Marker value.
-  };
-
-  /// isSignedIntSetCC - Return true if this is a setcc instruction that
-  /// performs a signed comparison when used with integer operands.
-  inline bool isSignedIntSetCC(CondCode Code) {
-    return Code == SETGT || Code == SETGE || Code == SETLT || Code == SETLE;
-  }
-
-  /// isUnsignedIntSetCC - Return true if this is a setcc instruction that
-  /// performs an unsigned comparison when used with integer operands.
-  inline bool isUnsignedIntSetCC(CondCode Code) {
-    return Code == SETUGT || Code == SETUGE || Code == SETULT || Code == SETULE;
-  }
-
-  /// isTrueWhenEqual - Return true if the specified condition returns true if
-  /// the two operands to the condition are equal.  Note that if one of the two
-  /// operands is a NaN, this value is meaningless.
-  inline bool isTrueWhenEqual(CondCode Cond) {
-    return ((int)Cond & 1) != 0;
-  }
-
-  /// getUnorderedFlavor - This function returns 0 if the condition is always
-  /// false if an operand is a NaN, 1 if the condition is always true if the
-  /// operand is a NaN, and 2 if the condition is undefined if the operand is a
-  /// NaN.
-  inline unsigned getUnorderedFlavor(CondCode Cond) {
-    return ((int)Cond >> 3) & 3;
-  }
-
-  /// getSetCCInverse - Return the operation corresponding to !(X op Y), where
-  /// 'op' is a valid SetCC operation.
-  CondCode getSetCCInverse(CondCode Operation, bool isInteger);
-
-  /// getSetCCSwappedOperands - Return the operation corresponding to (Y op X)
-  /// when given the operation for (X op Y).
-  CondCode getSetCCSwappedOperands(CondCode Operation);
-
-  /// getSetCCOrOperation - Return the result of a logical OR between different
-  /// comparisons of identical values: ((X op1 Y) | (X op2 Y)).  This
-  /// function returns SETCC_INVALID if it is not possible to represent the
-  /// resultant comparison.
-  CondCode getSetCCOrOperation(CondCode Op1, CondCode Op2, bool isInteger);
-
-  /// getSetCCAndOperation - Return the result of a logical AND between
-  /// different comparisons of identical values: ((X op1 Y) & (X op2 Y)).  This
-  /// function returns SETCC_INVALID if it is not possible to represent the
-  /// resultant comparison.
-  CondCode getSetCCAndOperation(CondCode Op1, CondCode Op2, bool isInteger);
-
-  //===--------------------------------------------------------------------===//
-  /// CvtCode enum - This enum defines the various converts CONVERT_RNDSAT
-  /// supports.
-  enum CvtCode {
-    CVT_FF,     // Float from Float
-    CVT_FS,     // Float from Signed
-    CVT_FU,     // Float from Unsigned
-    CVT_SF,     // Signed from Float
-    CVT_UF,     // Unsigned from Float
-    CVT_SS,     // Signed from Signed
-    CVT_SU,     // Signed from Unsigned
-    CVT_US,     // Unsigned from Signed
-    CVT_UU,     // Unsigned from Unsigned
-    CVT_INVALID // Marker - Invalid opcode
-  };
-}  // end llvm::ISD namespace
-
+}  // end llvm:ISD namespace
 
 //===----------------------------------------------------------------------===//
 /// SDValue - Unlike LLVM values, Selection DAG nodes may return multiple
@@ -1564,7 +837,7 @@ class HandleSDNode : public SDNode {
 public:
   // FIXME: Remove the "noinline" attribute once <rdar://problem/5852746> is
   // fixed.
-#ifdef __GNUC__
+#if __GNUC__==4 && __GNUC_MINOR__==2 && defined(__APPLE__) && !defined(__llvm__)
   explicit __attribute__((__noinline__)) HandleSDNode(SDValue X)
 #else
   explicit HandleSDNode(SDValue X)
@@ -1867,7 +1140,7 @@ public:
 };
 
 class GlobalAddressSDNode : public SDNode {
-  GlobalValue *TheGlobal;
+  const GlobalValue *TheGlobal;
   int64_t Offset;
   unsigned char TargetFlags;
   friend class SelectionDAG;
@@ -1875,7 +1148,7 @@ class GlobalAddressSDNode : public SDNode {
                       int64_t o, unsigned char TargetFlags);
 public:
 
-  GlobalValue *getGlobal() const { return TheGlobal; }
+  const GlobalValue *getGlobal() const { return TheGlobal; }
   int64_t getOffset() const { return Offset; }
   unsigned char getTargetFlags() const { return TargetFlags; }
   // Return the address space this GlobalAddress belongs to.
@@ -1930,15 +1203,15 @@ public:
 
 class ConstantPoolSDNode : public SDNode {
   union {
-    Constant *ConstVal;
+    const Constant *ConstVal;
     MachineConstantPoolValue *MachineCPVal;
   } Val;
   int Offset;  // It's a MachineConstantPoolValue if top bit is set.
   unsigned Alignment;  // Minimum alignment requirement of CP (not log2 value).
   unsigned char TargetFlags;
   friend class SelectionDAG;
-  ConstantPoolSDNode(bool isTarget, Constant *c, EVT VT, int o, unsigned Align,
-                     unsigned char TF)
+  ConstantPoolSDNode(bool isTarget, const Constant *c, EVT VT, int o,
+                     unsigned Align, unsigned char TF)
     : SDNode(isTarget ? ISD::TargetConstantPool : ISD::ConstantPool,
              DebugLoc(),
              getSDVTList(VT)), Offset(o), Alignment(Align), TargetFlags(TF) {
@@ -1961,7 +1234,7 @@ public:
     return (int)Offset < 0;
   }
 
-  Constant *getConstVal() const {
+  const Constant *getConstVal() const {
     assert(!isMachineConstantPoolEntry() && "Wrong constantpool type");
     return Val.ConstVal;
   }
@@ -2053,6 +1326,21 @@ public:
     return N->getOpcode() == ISD::SRCVALUE;
   }
 };
+  
+class MDNodeSDNode : public SDNode {
+  const MDNode *MD;
+  friend class SelectionDAG;
+  explicit MDNodeSDNode(const MDNode *md)
+  : SDNode(ISD::MDNODE_SDNODE, DebugLoc(), getSDVTList(MVT::Other)), MD(md) {}
+public:
+  
+  const MDNode *getMD() const { return MD; }
+  
+  static bool classof(const MDNodeSDNode *) { return true; }
+  static bool classof(const SDNode *N) {
+    return N->getOpcode() == ISD::MDNODE_SDNODE;
+  }
+};
 
 
 class RegisterSDNode : public SDNode {
@@ -2072,16 +1360,16 @@ public:
 };
 
 class BlockAddressSDNode : public SDNode {
-  BlockAddress *BA;
+  const BlockAddress *BA;
   unsigned char TargetFlags;
   friend class SelectionDAG;
-  BlockAddressSDNode(unsigned NodeTy, EVT VT, BlockAddress *ba,
+  BlockAddressSDNode(unsigned NodeTy, EVT VT, const BlockAddress *ba,
                      unsigned char Flags)
     : SDNode(NodeTy, DebugLoc(), getSDVTList(VT)),
              BA(ba), TargetFlags(Flags) {
   }
 public:
-  BlockAddress *getBlockAddress() const { return BA; }
+  const BlockAddress *getBlockAddress() const { return BA; }
   unsigned char getTargetFlags() const { return TargetFlags; }
 
   static bool classof(const BlockAddressSDNode *) { return true; }
@@ -2588,7 +1876,6 @@ namespace ISD {
   }
 }
 
-
 } // end llvm namespace
 
 #endif
diff --git a/include/llvm/CodeGen/SlotIndexes.h b/include/llvm/CodeGen/SlotIndexes.h
index caefdf4489df..3c56d0d67dd9 100644
--- a/include/llvm/CodeGen/SlotIndexes.h
+++ b/include/llvm/CodeGen/SlotIndexes.h
@@ -28,7 +28,6 @@
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/DenseMap.h"
 #include "llvm/Support/Allocator.h"
-#include "llvm/Support/ErrorHandling.h"
 
 namespace llvm {
 
@@ -37,8 +36,6 @@ namespace llvm {
   /// SlotIndex & SlotIndexes classes for the public interface to this
   /// information.
   class IndexListEntry {
-  private:
-
     static const unsigned EMPTY_KEY_INDEX = ~0U & ~3U,
                           TOMBSTONE_KEY_INDEX = ~0U & ~7U;
 
@@ -66,10 +63,9 @@ namespace llvm {
   public:
 
     IndexListEntry(MachineInstr *mi, unsigned index) : mi(mi), index(index) {
-      if (index == EMPTY_KEY_INDEX || index == TOMBSTONE_KEY_INDEX) {
-        llvm_report_error("Attempt to create invalid index. "
-                          "Available indexes may have been exhausted?.");
-      }
+      assert(index != EMPTY_KEY_INDEX && index != TOMBSTONE_KEY_INDEX &&
+             "Attempt to create invalid index. "
+             "Available indexes may have been exhausted?.");
     }
 
     bool isValid() const {
diff --git a/include/llvm/CodeGen/TargetLoweringObjectFileImpl.h b/include/llvm/CodeGen/TargetLoweringObjectFileImpl.h
index b34ac21cf1ee..90905f51e377 100644
--- a/include/llvm/CodeGen/TargetLoweringObjectFileImpl.h
+++ b/include/llvm/CodeGen/TargetLoweringObjectFileImpl.h
@@ -33,7 +33,6 @@ namespace llvm {
 
 
 class TargetLoweringObjectFileELF : public TargetLoweringObjectFile {
-  mutable void *UniquingMap;
 protected:
   /// TLSDataSection - Section directive for Thread Local data.
   ///
@@ -52,14 +51,9 @@ protected:
   const MCSection *MergeableConst4Section;
   const MCSection *MergeableConst8Section;
   const MCSection *MergeableConst16Section;
-
-protected:
-  const MCSection *getELFSection(StringRef Section, unsigned Type,
-                                 unsigned Flags, SectionKind Kind,
-                                 bool IsExplicit = false) const;
 public:
-  TargetLoweringObjectFileELF() : UniquingMap(0) {}
-  ~TargetLoweringObjectFileELF();
+  TargetLoweringObjectFileELF() {}
+  ~TargetLoweringObjectFileELF() {}
 
   virtual void Initialize(MCContext &Ctx, const TargetMachine &TM);
 
@@ -90,8 +84,6 @@ public:
 
 
 class TargetLoweringObjectFileMachO : public TargetLoweringObjectFile {
-  mutable void *UniquingMap;
-
   const MCSection *CStringSection;
   const MCSection *UStringSection;
   const MCSection *TextCoalSection;
@@ -108,8 +100,8 @@ class TargetLoweringObjectFileMachO : public TargetLoweringObjectFile {
   const MCSection *LazySymbolPointerSection;
   const MCSection *NonLazySymbolPointerSection;
 public:
-  TargetLoweringObjectFileMachO() : UniquingMap(0) {}
-  ~TargetLoweringObjectFileMachO();
+  TargetLoweringObjectFileMachO() {}
+  ~TargetLoweringObjectFileMachO() {}
 
   virtual void Initialize(MCContext &Ctx, const TargetMachine &TM);
 
@@ -129,20 +121,6 @@ public:
   virtual bool shouldEmitUsedDirectiveFor(const GlobalValue *GV,
                                           Mangler *) const;
 
-  /// getMachOSection - Return the MCSection for the specified mach-o section.
-  /// This requires the operands to be valid.
-  const MCSectionMachO *getMachOSection(StringRef Segment,
-                                        StringRef Section,
-                                        unsigned TypeAndAttributes,
-                                        SectionKind K) const {
-    return getMachOSection(Segment, Section, TypeAndAttributes, 0, K);
-  }
-  const MCSectionMachO *getMachOSection(StringRef Segment,
-                                        StringRef Section,
-                                        unsigned TypeAndAttributes,
-                                        unsigned Reserved2,
-                                        SectionKind K) const;
-
   /// getTextCoalSection - Return the "__TEXT,__textcoal_nt" section we put weak
   /// text symbols into.
   const MCSection *getTextCoalSection() const {
diff --git a/include/llvm/Config/config.h.in b/include/llvm/Config/config.h.in
index 5caf778a2d7a..c2ab23e3e321 100644
--- a/include/llvm/Config/config.h.in
+++ b/include/llvm/Config/config.h.in
@@ -264,6 +264,9 @@
 /* Define to 1 if you have the `opendir' function. */
 #undef HAVE_OPENDIR
 
+/* Define to 1 if you have the `posix_spawn' function. */
+#undef HAVE_POSIX_SPAWN
+
 /* Define to 1 if you have the `powf' function. */
 #undef HAVE_POWF
 
diff --git a/include/llvm/InlineAsm.h b/include/llvm/InlineAsm.h
index 2ac0fcab7960..f4d125b4dc39 100644
--- a/include/llvm/InlineAsm.h
+++ b/include/llvm/InlineAsm.h
@@ -146,6 +146,49 @@ public:
     return V->getValueID() == Value::InlineAsmVal;
   }
 
+  
+  // These are helper methods for dealing with flags in the INLINEASM SDNode
+  // in the backend.
+  
+  enum {
+    Op_InputChain = 0,
+    Op_AsmString = 1,
+    Op_MDNode = 2,
+    Op_FirstOperand = 3,
+    
+    Kind_RegUse = 1,
+    Kind_RegDef = 2,
+    Kind_Imm = 3,
+    Kind_Mem = 4,
+    Kind_RegDefEarlyClobber = 6,
+    
+    Flag_MatchingOperand = 0x80000000
+  };
+  
+  static unsigned getFlagWord(unsigned Kind, unsigned NumOps) {
+    assert(((NumOps << 3) & ~0xffff) == 0 && "Too many inline asm operands!");
+    return Kind | (NumOps << 3);
+  }
+  
+  /// getFlagWordForMatchingOp - Augment an existing flag word returned by
+  /// getFlagWord with information indicating that this input operand is tied 
+  /// to a previous output operand.
+  static unsigned getFlagWordForMatchingOp(unsigned InputFlag,
+                                           unsigned MatchedOperandNo) {
+    return InputFlag | Flag_MatchingOperand | (MatchedOperandNo << 16);
+  }
+
+  static unsigned getKind(unsigned Flags) {
+    return Flags & 7;
+  }
+
+  static bool isRegDefKind(unsigned Flag){ return getKind(Flag) == Kind_RegDef;}
+  static bool isImmKind(unsigned Flag) { return getKind(Flag) == Kind_Imm; }
+  static bool isMemKind(unsigned Flag) { return getKind(Flag) == Kind_Mem; }
+  static bool isRegDefEarlyClobberKind(unsigned Flag) {
+    return getKind(Flag) == Kind_RegDefEarlyClobber;
+  }
+  
   /// getNumOperandRegisters - Extract the number of registers field from the
   /// inline asm operand flag.
   static unsigned getNumOperandRegisters(unsigned Flag) {
@@ -155,9 +198,9 @@ public:
   /// isUseOperandTiedToDef - Return true if the flag of the inline asm
   /// operand indicates it is an use operand that's matched to a def operand.
   static bool isUseOperandTiedToDef(unsigned Flag, unsigned &Idx) {
-    if ((Flag & 0x80000000) == 0)
+    if ((Flag & Flag_MatchingOperand) == 0)
       return false;
-    Idx = (Flag & ~0x80000000) >> 16;
+    Idx = (Flag & ~Flag_MatchingOperand) >> 16;
     return true;
   }
 
diff --git a/include/llvm/IntrinsicInst.h b/include/llvm/IntrinsicInst.h
index bd8a8c4e9d39..5b0e90f5cc07 100644
--- a/include/llvm/IntrinsicInst.h
+++ b/include/llvm/IntrinsicInst.h
@@ -105,8 +105,7 @@ namespace llvm {
       return cast<ConstantInt>(
                              const_cast<Value*>(getOperand(2)))->getZExtValue();
     }
-    const MDNode *getVariable() const { return cast<MDNode>(getOperand(3)); }
-    MDNode *getVariable() { return cast<MDNode>(getOperand(3)); }
+    MDNode *getVariable() const { return cast<MDNode>(getOperand(3)); }
 
     // Methods for support type inquiry through isa, cast, and dyn_cast:
     static inline bool classof(const DbgValueInst *) { return true; }
diff --git a/include/llvm/IntrinsicsX86.td b/include/llvm/IntrinsicsX86.td
index a48a1ed27741..3ca9cb444191 100644
--- a/include/llvm/IntrinsicsX86.td
+++ b/include/llvm/IntrinsicsX86.td
@@ -669,16 +669,6 @@ let TargetPrefix = "x86" in {  // All intrinsics start with "llvm.x86.".
               Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty], [IntrNoMem]>;
 }
 
-// Align ops
-let TargetPrefix = "x86" in {  // All intrinsics start with "llvm.x86.".
-  def int_x86_ssse3_palign_r        :
-              Intrinsic<[llvm_v1i64_ty], [llvm_v1i64_ty,
-                         llvm_v1i64_ty, llvm_i8_ty], [IntrNoMem]>;
-  def int_x86_ssse3_palign_r_128    :
-              Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty,
-                         llvm_v2i64_ty, llvm_i8_ty], [IntrNoMem]>;
-}
-
 //===----------------------------------------------------------------------===//
 // SSE4.1
 
diff --git a/include/llvm/LLVMContext.h b/include/llvm/LLVMContext.h
index 43548407f681..afae08b07daf 100644
--- a/include/llvm/LLVMContext.h
+++ b/include/llvm/LLVMContext.h
@@ -19,6 +19,7 @@ namespace llvm {
 
 class LLVMContextImpl;
 class StringRef;
+class Instruction;
 template <typename T> class SmallVectorImpl;
 
 /// This is an important class for using LLVM in a threaded context.  It
@@ -68,6 +69,15 @@ public:
   /// setInlineAsmDiagnosticHandler.
   void *getInlineAsmDiagnosticContext() const;
   
+  
+  /// emitError - Emit an error message to the currently installed error handler
+  /// with optional location information.  This function returns, so code should
+  /// be prepared to drop the erroneous construct on the floor and "not crash".
+  /// The generated code need not be correct.  The error message will be
+  /// implicitly prefixed with "error: " and should not end with a ".".
+  void emitError(unsigned LocCookie, StringRef ErrorStr);
+  void emitError(const Instruction *I, StringRef ErrorStr);
+  void emitError(StringRef ErrorStr);
 };
 
 /// getGlobalContext - Returns a global context.  This is for LLVM clients that
diff --git a/include/llvm/LinkAllPasses.h b/include/llvm/LinkAllPasses.h
index ae538518ceb3..1e2c37a24e9a 100644
--- a/include/llvm/LinkAllPasses.h
+++ b/include/llvm/LinkAllPasses.h
@@ -23,6 +23,7 @@
 #include "llvm/Analysis/PointerTracking.h"
 #include "llvm/Analysis/PostDominators.h"
 #include "llvm/Analysis/ScalarEvolution.h"
+#include "llvm/Analysis/Lint.h"
 #include "llvm/Assembly/PrintModulePass.h"
 #include "llvm/CodeGen/Passes.h"
 #include "llvm/Function.h"
@@ -135,8 +136,8 @@ namespace {
       (void) llvm::createSSIPass();
       (void) llvm::createSSIEverythingPass();
       (void) llvm::createGEPSplitterPass();
-      (void) llvm::createSCCVNPass();
       (void) llvm::createABCDPass();
+      (void) llvm::createLintPass();
 
       (void)new llvm::IntervalPartition();
       (void)new llvm::FindUsedTypes();
diff --git a/include/llvm/MC/EDInstInfo.h b/include/llvm/MC/EDInstInfo.h
new file mode 100644
index 000000000000..dded25521a27
--- /dev/null
+++ b/include/llvm/MC/EDInstInfo.h
@@ -0,0 +1,29 @@
+//===-- llvm/MC/EDInstInfo.h - EDis instruction info ------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+#ifndef EDINSTINFO_H
+#define EDINSTINFO_H
+
+#include "llvm/System/DataTypes.h"
+
+namespace llvm {
+  
+#define EDIS_MAX_OPERANDS 13
+#define EDIS_MAX_SYNTAXES 2
+
+struct EDInstInfo {
+  uint8_t       instructionType;
+  uint8_t       numOperands;
+  uint8_t       operandTypes[EDIS_MAX_OPERANDS];
+  uint8_t       operandFlags[EDIS_MAX_OPERANDS];
+  const char    operandOrders[EDIS_MAX_SYNTAXES][EDIS_MAX_OPERANDS];
+};
+  
+} // namespace llvm
+
+#endif
diff --git a/include/llvm/MC/MCAsmInfo.h b/include/llvm/MC/MCAsmInfo.h
index 33def8618990..f57f6426e0ac 100644
--- a/include/llvm/MC/MCAsmInfo.h
+++ b/include/llvm/MC/MCAsmInfo.h
@@ -97,7 +97,11 @@ namespace llvm {
     /// AllowNameToStartWithDigit - This is true if the assembler allows symbol
     /// names to start with a digit (e.g., "0x0021").  This defaults to false.
     bool AllowNameToStartWithDigit;
-    
+
+    /// AllowPeriodsInName - This is true if the assembler allows periods in
+    /// symbol names.  This defaults to true.
+    bool AllowPeriodsInName;
+
     //===--- Data Emission Directives -------------------------------------===//
 
     /// ZeroDirective - this should be set to the directive used to get some
@@ -280,7 +284,7 @@ namespace llvm {
     /// getNonexecutableStackSection - Targets can implement this method to
     /// specify a section to switch to if the translation unit doesn't have any
     /// trampolines that require an executable stack.
-    virtual MCSection *getNonexecutableStackSection(MCContext &Ctx) const {
+    virtual const MCSection *getNonexecutableStackSection(MCContext &Ctx) const{
       return 0;
     }
     
@@ -341,6 +345,9 @@ namespace llvm {
     bool doesAllowNameToStartWithDigit() const {
       return AllowNameToStartWithDigit;
     }
+    bool doesAllowPeriodsInName() const {
+      return AllowPeriodsInName;
+    }
     const char *getZeroDirective() const {
       return ZeroDirective;
     }
diff --git a/include/llvm/MC/MCContext.h b/include/llvm/MC/MCContext.h
index 968d55f214c7..4434341f5295 100644
--- a/include/llvm/MC/MCContext.h
+++ b/include/llvm/MC/MCContext.h
@@ -10,6 +10,7 @@
 #ifndef LLVM_MC_MCCONTEXT_H
 #define LLVM_MC_MCCONTEXT_H
 
+#include "llvm/MC/SectionKind.h"
 #include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/StringMap.h"
 #include "llvm/Support/Allocator.h"
@@ -21,6 +22,7 @@ namespace llvm {
   class MCSymbol;
   class StringRef;
   class Twine;
+  class MCSectionMachO;
 
   /// MCContext - Context object for machine code objects.  This class owns all
   /// of the sections that it creates.
@@ -47,6 +49,8 @@ namespace llvm {
     /// We use a bump pointer allocator to avoid the need to track all allocated
     /// objects.
     BumpPtrAllocator Allocator;
+    
+    void *MachOUniquingMap, *ELFUniquingMap;
   public:
     explicit MCContext(const MCAsmInfo &MAI);
     ~MCContext();
@@ -72,6 +76,29 @@ namespace llvm {
     MCSymbol *LookupSymbol(StringRef Name) const;
 
     /// @}
+    
+    /// @name Section Managment
+    /// @{
+
+    /// getMachOSection - Return the MCSection for the specified mach-o section.
+    /// This requires the operands to be valid.
+    const MCSectionMachO *getMachOSection(StringRef Segment,
+                                          StringRef Section,
+                                          unsigned TypeAndAttributes,
+                                          unsigned Reserved2,
+                                          SectionKind K);
+    const MCSectionMachO *getMachOSection(StringRef Segment,
+                                          StringRef Section,
+                                          unsigned TypeAndAttributes,
+                                          SectionKind K) {
+      return getMachOSection(Segment, Section, TypeAndAttributes, 0, K);
+    }
+    
+    const MCSection *getELFSection(StringRef Section, unsigned Type,
+                                   unsigned Flags, SectionKind Kind,
+                                   bool IsExplicit = false);
+    
+    /// @}
 
     void *Allocate(unsigned Size, unsigned Align = 8) {
       return Allocator.Allocate(Size, Align);
diff --git a/include/llvm/MC/MCDisassembler.h b/include/llvm/MC/MCDisassembler.h
index ffa0e419cc52..dfb8ed5e8a10 100644
--- a/include/llvm/MC/MCDisassembler.h
+++ b/include/llvm/MC/MCDisassembler.h
@@ -16,6 +16,8 @@ namespace llvm {
 class MCInst;
 class MemoryObject;
 class raw_ostream;
+  
+struct EDInstInfo;
 
 /// MCDisassembler - Superclass for all disassemblers.  Consumes a memory region
 ///   and provides an array of assembly instructions.
@@ -43,7 +45,15 @@ public:
                                        const MemoryObject &region,
                                        uint64_t address,
                                        raw_ostream &vStream) const = 0;
-};  
+
+  /// getEDInfo - Returns the enhanced insturction information corresponding to
+  ///   the disassembler.
+  ///
+  /// @return         - An array of instruction information, with one entry for
+  ///                   each MCInst opcode this disassembler returns.
+  ///                   NULL if there is no info for this target.
+  virtual EDInstInfo   *getEDInfo() const { return (EDInstInfo*)0; }
+};
 
 } // namespace llvm
 
diff --git a/include/llvm/MC/MCObjectWriter.h b/include/llvm/MC/MCObjectWriter.h
index f70a3d1851ea..522ee77826f8 100644
--- a/include/llvm/MC/MCObjectWriter.h
+++ b/include/llvm/MC/MCObjectWriter.h
@@ -49,7 +49,7 @@ protected: // Can only create subclasses.
 public:
   virtual ~MCObjectWriter();
 
-  bool isLittleEndian() { return IsLittleEndian; }
+  bool isLittleEndian() const { return IsLittleEndian; }
 
   raw_ostream &getStream() { return OS; }
 
diff --git a/include/llvm/MC/MCParser/AsmParser.h b/include/llvm/MC/MCParser/AsmParser.h
index 23f4a1fb72a2..7a78906733af 100644
--- a/include/llvm/MC/MCParser/AsmParser.h
+++ b/include/llvm/MC/MCParser/AsmParser.h
@@ -14,7 +14,6 @@
 #ifndef ASMPARSER_H
 #define ASMPARSER_H
 
-#include <vector>
 #include "llvm/MC/MCParser/AsmLexer.h"
 #include "llvm/MC/MCParser/AsmCond.h"
 #include "llvm/MC/MCParser/MCAsmParser.h"
@@ -22,6 +21,7 @@
 #include "llvm/MC/MCStreamer.h"
 #include "llvm/MC/MCAsmInfo.h"
 #include "llvm/ADT/StringMap.h"
+#include <vector>
 
 namespace llvm {
 class AsmCond;
@@ -50,10 +50,6 @@ private:
   AsmCond TheCondState;
   std::vector<AsmCond> TheCondStack;
 
-  // FIXME: Figure out where this should leave, the code is a copy of that which
-  // is also used by TargetLoweringObjectFile.
-  mutable void *SectionUniquingMap;
-
   /// DirectiveMap - This is a table handlers for directives.  Each handler is
   /// invoked after the directive identifier is read and is responsible for
   /// parsing and validating the rest of the directive.  The handler is passed
@@ -97,13 +93,6 @@ public:
 private:
   MCSymbol *CreateSymbol(StringRef Name);
 
-  // FIXME: See comment on SectionUniquingMap.
-  const MCSection *getMachOSection(const StringRef &Segment,
-                                   const StringRef &Section,
-                                   unsigned TypeAndAttributes,
-                                   unsigned Reserved2,
-                                   SectionKind Kind) const;
-
   bool ParseStatement();
 
   bool TokError(const char *Msg);
@@ -113,8 +102,6 @@ private:
   /// EnterIncludeFile - Enter the specified file. This returns true on failure.
   bool EnterIncludeFile(const std::string &Filename);
   
-  bool ParseConditionalAssemblyDirectives(StringRef Directive,
-                                          SMLoc DirectiveLoc);
   void EatToEndOfStatement();
   
   bool ParseAssignment(const StringRef &Name);
diff --git a/include/llvm/MC/MCParser/MCAsmLexer.h b/include/llvm/MC/MCParser/MCAsmLexer.h
index 043c36330862..075b69b628a5 100644
--- a/include/llvm/MC/MCParser/MCAsmLexer.h
+++ b/include/llvm/MC/MCParser/MCAsmLexer.h
@@ -42,7 +42,7 @@ public:
     Plus, Minus, Tilde,
     Slash,    // '/'
     LParen, RParen, LBrac, RBrac, LCurly, RCurly,
-    Star, Comma, Dollar, Equal, EqualEqual,
+    Star, Dot, Comma, Dollar, Equal, EqualEqual,
     
     Pipe, PipePipe, Caret, 
     Amp, AmpAmp, Exclaim, ExclaimEqual, Percent, Hash,
diff --git a/include/llvm/MC/MCSectionELF.h b/include/llvm/MC/MCSectionELF.h
index e550cd2c1111..7054668eb14e 100644
--- a/include/llvm/MC/MCSectionELF.h
+++ b/include/llvm/MC/MCSectionELF.h
@@ -36,16 +36,14 @@ class MCSectionELF : public MCSection {
   /// explicit section specified.
   bool IsExplicit;
   
-protected:
+private:
+  friend class MCContext;
   MCSectionELF(StringRef Section, unsigned type, unsigned flags,
                SectionKind K, bool isExplicit)
     : MCSection(K), SectionName(Section), Type(type), Flags(flags), 
       IsExplicit(isExplicit) {}
+  ~MCSectionELF();
 public:
-  
-  static MCSectionELF *Create(StringRef Section, unsigned Type, 
-                              unsigned Flags, SectionKind K, bool isExplicit,
-                              MCContext &Ctx);
 
   /// ShouldOmitSectionDirective - Decides whether a '.section' directive
   /// should be printed before the section name
@@ -153,40 +151,33 @@ public:
 
     // This section holds Thread-Local Storage.
     SHF_TLS              = 0x400U,
+
+    
+    // Start of target-specific flags.
+
+    /// XCORE_SHF_CP_SECTION - All sections with the "c" flag are grouped
+    /// together by the linker to form the constant pool and the cp register is
+    /// set to the start of the constant pool by the boot code.
+    XCORE_SHF_CP_SECTION = 0x800U,
     
-    /// FIRST_TARGET_DEP_FLAG - This is the first flag that subclasses are
-    /// allowed to specify.
-    FIRST_TARGET_DEP_FLAG = 0x800U,
-
-    /// TARGET_INDEP_SHF - This is the bitmask for all the target independent
-    /// section flags.  Targets can define their own target flags above these.
-    /// If they do that, they should implement their own MCSectionELF subclasses
-    /// and implement the virtual method hooks below to handle printing needs.
-    TARGET_INDEP_SHF     = FIRST_TARGET_DEP_FLAG-1U
+    /// XCORE_SHF_DP_SECTION - All sections with the "d" flag are grouped
+    /// together by the linker to form the data section and the dp register is
+    /// set to the start of the section by the boot code.
+    XCORE_SHF_DP_SECTION = 0x1000U
   };
 
   StringRef getSectionName() const { return SectionName; }
   unsigned getType() const { return Type; }
   unsigned getFlags() const { return Flags; }
   
-  virtual void PrintSwitchToSection(const MCAsmInfo &MAI,
-                                    raw_ostream &OS) const;
+  void PrintSwitchToSection(const MCAsmInfo &MAI,
+                            raw_ostream &OS) const;
   
   /// isBaseAddressKnownZero - We know that non-allocatable sections (like
   /// debug info) have a base of zero.
   virtual bool isBaseAddressKnownZero() const {
     return (getFlags() & SHF_ALLOC) == 0;
   }
-  
-  /// PrintTargetSpecificSectionFlags - Targets that define their own
-  /// MCSectionELF subclasses with target specific section flags should
-  /// implement this method if they end up adding letters to the attributes
-  /// list.
-  virtual void PrintTargetSpecificSectionFlags(const MCAsmInfo &MAI,
-                                               raw_ostream &OS) const {
-  }
-                                               
-  
 };
 
 } // end namespace llvm
diff --git a/include/llvm/MC/MCSectionMachO.h b/include/llvm/MC/MCSectionMachO.h
index 5839c281ed6f..f3bc8edd84ef 100644
--- a/include/llvm/MC/MCSectionMachO.h
+++ b/include/llvm/MC/MCSectionMachO.h
@@ -34,30 +34,10 @@ class MCSectionMachO : public MCSection {
   unsigned Reserved2;
   
   MCSectionMachO(StringRef Segment, StringRef Section,
-                 unsigned TAA, unsigned reserved2, SectionKind K)
-    : MCSection(K), TypeAndAttributes(TAA), Reserved2(reserved2) {
-    assert(Segment.size() <= 16 && Section.size() <= 16 &&
-           "Segment or section string too long");
-    for (unsigned i = 0; i != 16; ++i) {
-      if (i < Segment.size())
-        SegmentName[i] = Segment[i];
-      else
-        SegmentName[i] = 0;
-      
-      if (i < Section.size())
-        SectionName[i] = Section[i];
-      else
-        SectionName[i] = 0;
-    }        
-  }
+                 unsigned TAA, unsigned reserved2, SectionKind K);  
+  friend class MCContext;
 public:
   
-  static MCSectionMachO *Create(StringRef Segment,
-                                StringRef Section,
-                                unsigned TypeAndAttributes,
-                                unsigned Reserved2,
-                                SectionKind K, MCContext &Ctx);
-  
   /// These are the section type and attributes fields.  A MachO section can
   /// have only one Type, but can have any of the attributes specified.
   enum {
diff --git a/include/llvm/Metadata.h b/include/llvm/Metadata.h
index ff3e03e96fd8..64ab6b7583ed 100644
--- a/include/llvm/Metadata.h
+++ b/include/llvm/Metadata.h
@@ -91,7 +91,7 @@ class MDNode : public Value, public FoldingSetNode {
     FunctionLocalBit = 1 << 0,
     
     /// NotUniquedBit - This is set on MDNodes that are not uniqued because they
-    /// have a null perand.
+    /// have a null operand.
     NotUniquedBit    = 1 << 1,
     
     /// DestroyFlag - This bit is set by destroy() so the destructor can assert
diff --git a/include/llvm/Support/Allocator.h b/include/llvm/Support/Allocator.h
index 148d47e785c7..4a7251fa1ef3 100644
--- a/include/llvm/Support/Allocator.h
+++ b/include/llvm/Support/Allocator.h
@@ -236,4 +236,6 @@ inline void *operator new(size_t Size, llvm::BumpPtrAllocator &Allocator) {
                                            offsetof(S, x)));
 }
 
+inline void operator delete(void *, llvm::BumpPtrAllocator &) {}
+
 #endif // LLVM_SUPPORT_ALLOCATOR_H
diff --git a/include/llvm/Support/CFG.h b/include/llvm/Support/CFG.h
index 57699c708890..f07c719f194d 100644
--- a/include/llvm/Support/CFG.h
+++ b/include/llvm/Support/CFG.h
@@ -25,28 +25,29 @@ namespace llvm {
 // BasicBlock pred_iterator definition
 //===----------------------------------------------------------------------===//
 
-template <class _Ptr,  class _USE_iterator> // Predecessor Iterator
+template <class Ptr, class USE_iterator> // Predecessor Iterator
 class PredIterator : public std::iterator<std::forward_iterator_tag,
-                                          _Ptr, ptrdiff_t> {
-  typedef std::iterator<std::forward_iterator_tag, _Ptr, ptrdiff_t> super;
-  _USE_iterator It;
-public:
-  typedef PredIterator<_Ptr,_USE_iterator> _Self;
-  typedef typename super::pointer pointer;
+                                          Ptr, ptrdiff_t> {
+  typedef std::iterator<std::forward_iterator_tag, Ptr, ptrdiff_t> super;
+  typedef PredIterator<Ptr, USE_iterator> Self;
+  USE_iterator It;
 
   inline void advancePastNonTerminators() {
-    // Loop to ignore non terminator uses (for example PHI nodes)...
+    // Loop to ignore non terminator uses (for example PHI nodes).
     while (!It.atEnd() && !isa<TerminatorInst>(*It))
       ++It;
   }
 
-  inline PredIterator(_Ptr *bb) : It(bb->use_begin()) {
+public:
+  typedef typename super::pointer pointer;
+
+  explicit inline PredIterator(Ptr *bb) : It(bb->use_begin()) {
     advancePastNonTerminators();
   }
-  inline PredIterator(_Ptr *bb, bool) : It(bb->use_end()) {}
+  inline PredIterator(Ptr *bb, bool) : It(bb->use_end()) {}
 
-  inline bool operator==(const _Self& x) const { return It == x.It; }
-  inline bool operator!=(const _Self& x) const { return !operator==(x); }
+  inline bool operator==(const Self& x) const { return It == x.It; }
+  inline bool operator!=(const Self& x) const { return !operator==(x); }
 
   inline pointer operator*() const {
     assert(!It.atEnd() && "pred_iterator out of range!");
@@ -54,14 +55,14 @@ public:
   }
   inline pointer *operator->() const { return &(operator*()); }
 
-  inline _Self& operator++() {   // Preincrement
+  inline Self& operator++() {   // Preincrement
     assert(!It.atEnd() && "pred_iterator out of range!");
     ++It; advancePastNonTerminators();
     return *this;
   }
 
-  inline _Self operator++(int) { // Postincrement
-    _Self tmp = *this; ++*this; return tmp;
+  inline Self operator++(int) { // Postincrement
+    Self tmp = *this; ++*this; return tmp;
   }
 };
 
@@ -90,12 +91,17 @@ class SuccIterator : public std::iterator<std::bidirectional_iterator_tag,
   const Term_ Term;
   unsigned idx;
   typedef std::iterator<std::bidirectional_iterator_tag, BB_, ptrdiff_t> super;
+  typedef SuccIterator<Term_, BB_> Self;
+
+  inline bool index_is_valid(int idx) {
+    return idx >= 0 && (unsigned) idx < Term->getNumSuccessors();
+  }
+
 public:
-  typedef SuccIterator<Term_, BB_> _Self;
   typedef typename super::pointer pointer;
   // TODO: This can be random access iterator, only operator[] missing.
 
-  inline SuccIterator(Term_ T) : Term(T), idx(0) {         // begin iterator
+  explicit inline SuccIterator(Term_ T) : Term(T), idx(0) {// begin iterator
     assert(T && "getTerminator returned null!");
   }
   inline SuccIterator(Term_ T, bool)                       // end iterator
@@ -103,78 +109,74 @@ public:
     assert(T && "getTerminator returned null!");
   }
 
-  inline const _Self &operator=(const _Self &I) {
+  inline const Self &operator=(const Self &I) {
     assert(Term == I.Term &&"Cannot assign iterators to two different blocks!");
     idx = I.idx;
     return *this;
   }
 
-  inline bool index_is_valid (int idx) {
-    return idx >= 0 && (unsigned) idx < Term->getNumSuccessors();
-  }
-
   /// getSuccessorIndex - This is used to interface between code that wants to
   /// operate on terminator instructions directly.
   unsigned getSuccessorIndex() const { return idx; }
 
-  inline bool operator==(const _Self& x) const { return idx == x.idx; }
-  inline bool operator!=(const _Self& x) const { return !operator==(x); }
+  inline bool operator==(const Self& x) const { return idx == x.idx; }
+  inline bool operator!=(const Self& x) const { return !operator==(x); }
 
   inline pointer operator*() const { return Term->getSuccessor(idx); }
   inline pointer operator->() const { return operator*(); }
 
-  inline _Self& operator++() { ++idx; return *this; } // Preincrement
+  inline Self& operator++() { ++idx; return *this; } // Preincrement
 
-  inline _Self operator++(int) { // Postincrement
-    _Self tmp = *this; ++*this; return tmp;
+  inline Self operator++(int) { // Postincrement
+    Self tmp = *this; ++*this; return tmp;
   }
 
-  inline _Self& operator--() { --idx; return *this; }  // Predecrement
-  inline _Self operator--(int) { // Postdecrement
-    _Self tmp = *this; --*this; return tmp;
+  inline Self& operator--() { --idx; return *this; }  // Predecrement
+  inline Self operator--(int) { // Postdecrement
+    Self tmp = *this; --*this; return tmp;
   }
 
-  inline bool operator<(const _Self& x) const {
+  inline bool operator<(const Self& x) const {
     assert(Term == x.Term && "Cannot compare iterators of different blocks!");
     return idx < x.idx;
   }
 
-  inline bool operator<=(const _Self& x) const {
+  inline bool operator<=(const Self& x) const {
     assert(Term == x.Term && "Cannot compare iterators of different blocks!");
     return idx <= x.idx;
   }
-  inline bool operator>=(const _Self& x) const {
+  inline bool operator>=(const Self& x) const {
     assert(Term == x.Term && "Cannot compare iterators of different blocks!");
     return idx >= x.idx;
   }
 
-  inline bool operator>(const _Self& x) const {
+  inline bool operator>(const Self& x) const {
     assert(Term == x.Term && "Cannot compare iterators of different blocks!");
     return idx > x.idx;
   }
 
-  inline _Self& operator+=(int Right) {
+  inline Self& operator+=(int Right) {
     unsigned new_idx = idx + Right;
     assert(index_is_valid(new_idx) && "Iterator index out of bound");
     idx = new_idx;
     return *this;
   }
 
-  inline _Self operator+(int Right) {
-    _Self tmp = *this;
+  inline Self operator+(int Right) {
+    Self tmp = *this;
     tmp += Right;
     return tmp;
   }
 
-  inline _Self& operator-=(int Right) {
+  inline Self& operator-=(int Right) {
     return operator+=(-Right);
   }
 
-  inline _Self operator-(int Right) {
+  inline Self operator-(int Right) {
     return operator+(-Right);
   }
 
-  inline int operator-(const _Self& x) {
+  inline int operator-(const Self& x) {
     assert(Term == x.Term && "Cannot work on iterators of different blocks!");
     int distance = idx - x.idx;
     return distance;
@@ -185,14 +187,14 @@ public:
   // be modified are not available.
   //
   // inline pointer operator[](int offset) {
-  //  _Self tmp = *this;
+  //  Self tmp = *this;
   //  tmp += offset;
   //  return tmp.operator*();
   // }
 
   /// Get the source BB of this iterator.
   inline BB_ *getSource() {
-      return Term->getParent();
+    return Term->getParent();
   }
 };
 
diff --git a/include/llvm/Support/CallSite.h b/include/llvm/Support/CallSite.h
index 9f527e2df6cf..0650b61fbcfa 100644
--- a/include/llvm/Support/CallSite.h
+++ b/include/llvm/Support/CallSite.h
@@ -30,7 +30,7 @@
 #include "llvm/ADT/PointerIntPair.h"
 #include "llvm/BasicBlock.h"
 #include "llvm/CallingConv.h"
-#include "llvm/Instruction.h"
+#include "llvm/Instructions.h"
 
 namespace llvm {
 
@@ -150,6 +150,108 @@ public:
   bool arg_empty() const { return arg_end() == arg_begin(); }
   unsigned arg_size() const { return unsigned(arg_end() - arg_begin()); }
   
+  /// getType - Return the type of the instruction that generated this call site
+  ///
+  const Type *getType() const { return (*this)->getType(); }
+
+  /// getCaller - Return the caller function for this call site
+  ///
+  FunTy *getCaller() const { return (*this)->getParent()->getParent(); }
+
+#define CALLSITE_DELEGATE_GETTER(METHOD) \
+  InstrTy *II = getInstruction();    \
+  return isCall()                        \
+    ? cast<CallInst>(II)->METHOD         \
+    : cast<InvokeInst>(II)->METHOD
+
+#define CALLSITE_DELEGATE_SETTER(METHOD) \
+  InstrTy *II = getInstruction();    \
+  if (isCall())                          \
+    cast<CallInst>(II)->METHOD;          \
+  else                                   \
+    cast<InvokeInst>(II)->METHOD
+
+  /// getCallingConv/setCallingConv - get or set the calling convention of the
+  /// call.
+  CallingConv::ID getCallingConv() const {
+    CALLSITE_DELEGATE_GETTER(getCallingConv());
+  }
+  void setCallingConv(CallingConv::ID CC) {
+    CALLSITE_DELEGATE_SETTER(setCallingConv(CC));
+  }
+
+  /// getAttributes/setAttributes - get or set the parameter attributes of
+  /// the call.
+  const AttrListPtr &getAttributes() const {
+    CALLSITE_DELEGATE_GETTER(getAttributes());
+  }
+  void setAttributes(const AttrListPtr &PAL) {
+    CALLSITE_DELEGATE_SETTER(setAttributes(PAL));
+  }
+
+  /// paramHasAttr - whether the call or the callee has the given attribute.
+  bool paramHasAttr(uint16_t i, Attributes attr) const {
+    CALLSITE_DELEGATE_GETTER(paramHasAttr(i, attr));
+  }
+
+  /// @brief Extract the alignment for a call or parameter (0=unknown).
+  uint16_t getParamAlignment(uint16_t i) const {
+    CALLSITE_DELEGATE_GETTER(getParamAlignment(i));
+  }
+
+  /// @brief Return true if the call should not be inlined.
+  bool isNoInline() const {
+    CALLSITE_DELEGATE_GETTER(isNoInline());
+  }
+  void setIsNoInline(bool Value = true) {
+    CALLSITE_DELEGATE_GETTER(setIsNoInline(Value));
+  }
+  
+  /// @brief Determine if the call does not access memory.
+  bool doesNotAccessMemory() const {
+    CALLSITE_DELEGATE_GETTER(doesNotAccessMemory());
+  }
+  void setDoesNotAccessMemory(bool doesNotAccessMemory = true) {
+    CALLSITE_DELEGATE_SETTER(setDoesNotAccessMemory(doesNotAccessMemory));
+  }
+
+  /// @brief Determine if the call does not access or only reads memory.
+  bool onlyReadsMemory() const {
+    CALLSITE_DELEGATE_GETTER(onlyReadsMemory());
+  }
+  void setOnlyReadsMemory(bool onlyReadsMemory = true) {
+    CALLSITE_DELEGATE_SETTER(setOnlyReadsMemory(onlyReadsMemory));
+  }
+
+  /// @brief Determine if the call cannot return.
+  bool doesNotReturn() const {
+    CALLSITE_DELEGATE_GETTER(doesNotReturn());
+  }
+  void setDoesNotReturn(bool doesNotReturn = true) {
+    CALLSITE_DELEGATE_SETTER(setDoesNotReturn(doesNotReturn));
+  }
+
+  /// @brief Determine if the call cannot unwind.
+  bool doesNotThrow() const {
+    CALLSITE_DELEGATE_GETTER(doesNotThrow());
+  }
+  void setDoesNotThrow(bool doesNotThrow = true) {
+    CALLSITE_DELEGATE_SETTER(setDoesNotThrow(doesNotThrow));
+  }
+
+#undef CALLSITE_DELEGATE_GETTER
+#undef CALLSITE_DELEGATE_SETTER
+
+  /// hasArgument - Returns true if this CallSite passes the given Value* as an
+  /// argument to the called function.
+  bool hasArgument(const Value *Arg) const {
+    for (arg_iterator AI = this->arg_begin(), E = this->arg_end(); AI != E;
+         ++AI)
+      if (AI->get() == Arg)
+        return true;
+    return false;
+  }
+
 private:
   /// Returns the operand number of the first argument
   unsigned getArgumentOffset() const {
@@ -179,24 +281,24 @@ private:
 
 /// ImmutableCallSite - establish a view to a call site for examination
 class ImmutableCallSite : public CallSiteBase<> {
-  typedef CallSiteBase<> _Base;
+  typedef CallSiteBase<> Base;
 public:
-  ImmutableCallSite(const Value* V) : _Base(V) {}
-  ImmutableCallSite(const CallInst *CI) : _Base(CI) {}
-  ImmutableCallSite(const InvokeInst *II) : _Base(II) {}
-  ImmutableCallSite(const Instruction *II) : _Base(II) {}
+  ImmutableCallSite(const Value* V) : Base(V) {}
+  ImmutableCallSite(const CallInst *CI) : Base(CI) {}
+  ImmutableCallSite(const InvokeInst *II) : Base(II) {}
+  ImmutableCallSite(const Instruction *II) : Base(II) {}
 };
 
 class CallSite : public CallSiteBase<Function, Value, User, Instruction,
                                      CallInst, InvokeInst, User::op_iterator> {
   typedef CallSiteBase<Function, Value, User, Instruction,
-                       CallInst, InvokeInst, User::op_iterator> _Base;
+                       CallInst, InvokeInst, User::op_iterator> Base;
 public:
   CallSite() {}
-  CallSite(_Base B) : _Base(B) {}
-  CallSite(CallInst *CI) : _Base(CI) {}
-  CallSite(InvokeInst *II) : _Base(II) {}
-  CallSite(Instruction *II) : _Base(II) {}
+  CallSite(Base B) : Base(B) {}
+  CallSite(CallInst *CI) : Base(CI) {}
+  CallSite(InvokeInst *II) : Base(II) {}
+  CallSite(Instruction *II) : Base(II) {}
 
   bool operator==(const CallSite &CS) const { return I == CS.I; }
   bool operator!=(const CallSite &CS) const { return I != CS.I; }
@@ -207,57 +309,9 @@ public:
   /// NOT a call site.
   ///
   static CallSite get(Value *V) {
-    return _Base::get(V);
+    return Base::get(V);
   }
 
-  /// getCallingConv/setCallingConv - get or set the calling convention of the
-  /// call.
-  CallingConv::ID getCallingConv() const;
-  void setCallingConv(CallingConv::ID CC);
-
-  /// getAttributes/setAttributes - get or set the parameter attributes of
-  /// the call.
-  const AttrListPtr &getAttributes() const;
-  void setAttributes(const AttrListPtr &PAL);
-
-  /// paramHasAttr - whether the call or the callee has the given attribute.
-  bool paramHasAttr(uint16_t i, Attributes attr) const;
-
-  /// @brief Extract the alignment for a call or parameter (0=unknown).
-  uint16_t getParamAlignment(uint16_t i) const;
-
-  /// @brief Return true if the call should not be inlined.
-  bool isNoInline() const;
-  void setIsNoInline(bool Value = true);
-  
-  /// @brief Determine if the call does not access memory.
-  bool doesNotAccessMemory() const;
-  void setDoesNotAccessMemory(bool doesNotAccessMemory = true);
-
-  /// @brief Determine if the call does not access or only reads memory.
-  bool onlyReadsMemory() const;
-  void setOnlyReadsMemory(bool onlyReadsMemory = true);
-
-  /// @brief Determine if the call cannot return.
-  bool doesNotReturn() const;
-  void setDoesNotReturn(bool doesNotReturn = true);
-
-  /// @brief Determine if the call cannot unwind.
-  bool doesNotThrow() const;
-  void setDoesNotThrow(bool doesNotThrow = true);
-
-  /// getType - Return the type of the instruction that generated this call site
-  ///
-  const Type *getType() const { return (*this)->getType(); }
-
-  /// getCaller - Return the caller function for this call site
-  ///
-  Function *getCaller() const { return (*this)->getParent()->getParent(); }
-
-  /// hasArgument - Returns true if this CallSite passes the given Value* as an
-  /// argument to the called function.
-  bool hasArgument(const Value *Arg) const;
-
   bool operator<(const CallSite &CS) const {
     return getInstruction() < CS.getInstruction();
   }
diff --git a/include/llvm/Support/Dwarf.h b/include/llvm/Support/Dwarf.h
index 31f4fd223dc5..3d25e0368b5a 100644
--- a/include/llvm/Support/Dwarf.h
+++ b/include/llvm/Support/Dwarf.h
@@ -230,6 +230,7 @@ enum dwarf_constants {
   DW_AT_APPLE_block = 0x3fe4,
   DW_AT_APPLE_major_runtime_vers = 0x3fe5,
   DW_AT_APPLE_runtime_class = 0x3fe6,
+  DW_AT_APPLE_omit_frame_ptr = 0x3fe7,
 
   // Attribute form encodings
   DW_FORM_addr = 0x01,
diff --git a/include/llvm/Support/ELF.h b/include/llvm/Support/ELF.h
index e747c7a74eb9..d09db3998c45 100644
--- a/include/llvm/Support/ELF.h
+++ b/include/llvm/Support/ELF.h
@@ -99,6 +99,12 @@ enum {
   ET_HIPROC = 0xffff  // Processor-specific
 };
 
+// Versioning
+enum {
+  EV_NONE = 0,
+  EV_CURRENT = 1
+};
+
 // Machine architectures
 enum {
   EM_NONE = 0,  // No machine
@@ -129,6 +135,11 @@ enum {
   ELFDATA2MSB = 2  // Big-endian object file
 };
 
+// OS ABI identification -- unused.
+enum {
+  ELFOSABI_NONE = 0
+};
+
 // Section header.
 struct Elf32_Shdr {
   Elf32_Word sh_name;      // Section name (index into string table)
diff --git a/include/llvm/Support/ErrorHandling.h b/include/llvm/Support/ErrorHandling.h
index 4d24ada48ebd..ffcb482f75c5 100644
--- a/include/llvm/Support/ErrorHandling.h
+++ b/include/llvm/Support/ErrorHandling.h
@@ -1,4 +1,4 @@
-//===- llvm/Support/ErrorHandling.h - Callbacks for errors ------*- C++ -*-===//
+//===- llvm/Support/ErrorHandling.h - Fatal error handling ------*- C++ -*-===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -7,8 +7,8 @@
 //
 //===----------------------------------------------------------------------===//
 //
-// This file defines an API used to indicate error conditions.
-// Callbacks can be registered for these errors through this API.
+// This file defines an API used to indicate fatal error conditions.  Non-fatal
+// errors (most of them) should be handled through LLVMContext.
 //
 //===----------------------------------------------------------------------===//
 
@@ -22,10 +22,10 @@ namespace llvm {
   class Twine;
 
   /// An error handler callback.
-  typedef void (*llvm_error_handler_t)(void *user_data,
-                                       const std::string& reason);
+  typedef void (*fatal_error_handler_t)(void *user_data,
+                                        const std::string& reason);
 
-  /// llvm_instal_error_handler - Installs a new error handler to be used
+  /// install_fatal_error_handler - Installs a new error handler to be used
   /// whenever a serious (non-recoverable) error is encountered by LLVM.
   ///
   /// If you are using llvm_start_multithreaded, you should register the handler
@@ -44,13 +44,13 @@ namespace llvm {
   ///
   /// \param user_data - An argument which will be passed to the install error
   /// handler.
-  void llvm_install_error_handler(llvm_error_handler_t handler,
-                                  void *user_data = 0);
+  void install_fatal_error_handler(fatal_error_handler_t handler,
+                                   void *user_data = 0);
 
   /// Restores default error handling behaviour.
   /// This must not be called between llvm_start_multithreaded() and
   /// llvm_stop_multithreaded().
-  void llvm_remove_error_handler();
+  void remove_fatal_error_handler();
 
   /// Reports a serious error, calling any installed error handler. These
   /// functions are intended to be used for error conditions which are outside
@@ -60,9 +60,9 @@ namespace llvm {
   /// standard error, followed by a newline.
   /// After the error handler is called this function will call exit(1), it 
   /// does not return.
-  NORETURN void llvm_report_error(const char *reason);
-  NORETURN void llvm_report_error(const std::string &reason);
-  NORETURN void llvm_report_error(const Twine &reason);
+  NORETURN void report_fatal_error(const char *reason);
+  NORETURN void report_fatal_error(const std::string &reason);
+  NORETURN void report_fatal_error(const Twine &reason);
 
   /// This function calls abort(), and prints the optional message to stderr.
   /// Use the llvm_unreachable macro (that adds location info), instead of
diff --git a/include/llvm/Support/GraphWriter.h b/include/llvm/Support/GraphWriter.h
index 28fa92f99e08..13e6682ebfd6 100644
--- a/include/llvm/Support/GraphWriter.h
+++ b/include/llvm/Support/GraphWriter.h
@@ -174,7 +174,8 @@ public:
       unsigned i = 0, e = DTraits.numEdgeDestLabels(Node);
       for (; i != e && i != 64; ++i) {
         if (i) O << "|";
-        O << "<d" << i << ">" << DTraits.getEdgeDestLabel(Node, i);
+        O << "<d" << i << ">"
+          << DOT::EscapeString(DTraits.getEdgeDestLabel(Node, i));
       }
 
       if (i != e)
@@ -230,7 +231,7 @@ public:
       for (unsigned i = 0; i != NumEdgeSources; ++i) {
         if (i) O << "|";
         O << "<s" << i << ">";
-        if (EdgeSourceLabels) O << (*EdgeSourceLabels)[i];
+        if (EdgeSourceLabels) O << DOT::EscapeString((*EdgeSourceLabels)[i]);
       }
       O << "}}";
     }
diff --git a/include/llvm/Support/IRBuilder.h b/include/llvm/Support/IRBuilder.h
index 660c9f8001b2..1fd965d3e773 100644
--- a/include/llvm/Support/IRBuilder.h
+++ b/include/llvm/Support/IRBuilder.h
@@ -432,12 +432,19 @@ public:
         return Folder.CreateFRem(LC, RC);
     return Insert(BinaryOperator::CreateFRem(LHS, RHS), Name);
   }
+
   Value *CreateShl(Value *LHS, Value *RHS, const Twine &Name = "") {
     if (Constant *LC = dyn_cast<Constant>(LHS))
       if (Constant *RC = dyn_cast<Constant>(RHS))
         return Folder.CreateShl(LC, RC);
     return Insert(BinaryOperator::CreateShl(LHS, RHS), Name);
   }
+  Value *CreateShl(Value *LHS, const APInt &RHS, const Twine &Name = "") {
+    Constant *RHSC = ConstantInt::get(LHS->getType(), RHS);
+    if (Constant *LC = dyn_cast<Constant>(LHS))
+      return Folder.CreateShl(LC, RHSC);
+    return Insert(BinaryOperator::CreateShl(LHS, RHSC), Name);
+  }
   Value *CreateShl(Value *LHS, uint64_t RHS, const Twine &Name = "") {
     Constant *RHSC = ConstantInt::get(LHS->getType(), RHS);
     if (Constant *LC = dyn_cast<Constant>(LHS))
@@ -451,23 +458,35 @@ public:
         return Folder.CreateLShr(LC, RC);
     return Insert(BinaryOperator::CreateLShr(LHS, RHS), Name);
   }
+  Value *CreateLShr(Value *LHS, const APInt &RHS, const Twine &Name = "") {
+    Constant *RHSC = ConstantInt::get(LHS->getType(), RHS);
+    if (Constant *LC = dyn_cast<Constant>(LHS))
+      return Folder.CreateLShr(LC, RHSC);
+    return Insert(BinaryOperator::CreateLShr(LHS, RHSC), Name);
+  }
   Value *CreateLShr(Value *LHS, uint64_t RHS, const Twine &Name = "") {
     Constant *RHSC = ConstantInt::get(LHS->getType(), RHS);
     if (Constant *LC = dyn_cast<Constant>(LHS))
       return Folder.CreateLShr(LC, RHSC);
     return Insert(BinaryOperator::CreateLShr(LHS, RHSC), Name);
   }
-  
+
   Value *CreateAShr(Value *LHS, Value *RHS, const Twine &Name = "") {
     if (Constant *LC = dyn_cast<Constant>(LHS))
       if (Constant *RC = dyn_cast<Constant>(RHS))
         return Folder.CreateAShr(LC, RC);
     return Insert(BinaryOperator::CreateAShr(LHS, RHS), Name);
   }
+  Value *CreateAShr(Value *LHS, const APInt &RHS, const Twine &Name = "") {
+    Constant *RHSC = ConstantInt::get(LHS->getType(), RHS);
+    if (Constant *LC = dyn_cast<Constant>(LHS))
+      return Folder.CreateAShr(LC, RHSC);
+    return Insert(BinaryOperator::CreateAShr(LHS, RHSC), Name);
+  }
   Value *CreateAShr(Value *LHS, uint64_t RHS, const Twine &Name = "") {
     Constant *RHSC = ConstantInt::get(LHS->getType(), RHS);
     if (Constant *LC = dyn_cast<Constant>(LHS))
-      return Folder.CreateSShr(LC, RHSC);
+      return Folder.CreateAShr(LC, RHSC);
     return Insert(BinaryOperator::CreateAShr(LHS, RHSC), Name);
   }
 
@@ -480,6 +499,19 @@ public:
     }
     return Insert(BinaryOperator::CreateAnd(LHS, RHS), Name);
   }
+  Value *CreateAnd(Value *LHS, const APInt &RHS, const Twine &Name = "") {
+    Constant *RHSC = ConstantInt::get(LHS->getType(), RHS);
+    if (Constant *LC = dyn_cast<Constant>(LHS))
+      return Folder.CreateAnd(LC, RHSC);
+    return Insert(BinaryOperator::CreateAnd(LHS, RHSC), Name);
+  }
+  Value *CreateAnd(Value *LHS, uint64_t RHS, const Twine &Name = "") {
+    Constant *RHSC = ConstantInt::get(LHS->getType(), RHS);
+    if (Constant *LC = dyn_cast<Constant>(LHS))
+      return Folder.CreateAnd(LC, RHSC);
+    return Insert(BinaryOperator::CreateAnd(LHS, RHSC), Name);
+  }
+
   Value *CreateOr(Value *LHS, Value *RHS, const Twine &Name = "") {
     if (Constant *RC = dyn_cast<Constant>(RHS)) {
       if (RC->isNullValue())
@@ -489,12 +521,37 @@ public:
     }
     return Insert(BinaryOperator::CreateOr(LHS, RHS), Name);
   }
+  Value *CreateOr(Value *LHS, const APInt &RHS, const Twine &Name = "") {
+    Constant *RHSC = ConstantInt::get(LHS->getType(), RHS);
+    if (Constant *LC = dyn_cast<Constant>(LHS))
+      return Folder.CreateOr(LC, RHSC);
+    return Insert(BinaryOperator::CreateOr(LHS, RHSC), Name);
+  }
+  Value *CreateOr(Value *LHS, uint64_t RHS, const Twine &Name = "") {
+    Constant *RHSC = ConstantInt::get(LHS->getType(), RHS);
+    if (Constant *LC = dyn_cast<Constant>(LHS))
+      return Folder.CreateOr(LC, RHSC);
+    return Insert(BinaryOperator::CreateOr(LHS, RHSC), Name);
+  }
+
   Value *CreateXor(Value *LHS, Value *RHS, const Twine &Name = "") {
     if (Constant *LC = dyn_cast<Constant>(LHS))
       if (Constant *RC = dyn_cast<Constant>(RHS))
         return Folder.CreateXor(LC, RC);
     return Insert(BinaryOperator::CreateXor(LHS, RHS), Name);
   }
+  Value *CreateXor(Value *LHS, const APInt &RHS, const Twine &Name = "") {
+    Constant *RHSC = ConstantInt::get(LHS->getType(), RHS);
+    if (Constant *LC = dyn_cast<Constant>(LHS))
+      return Folder.CreateXor(LC, RHSC);
+    return Insert(BinaryOperator::CreateXor(LHS, RHSC), Name);
+  }
+  Value *CreateXor(Value *LHS, uint64_t RHS, const Twine &Name = "") {
+    Constant *RHSC = ConstantInt::get(LHS->getType(), RHS);
+    if (Constant *LC = dyn_cast<Constant>(LHS))
+      return Folder.CreateXor(LC, RHSC);
+    return Insert(BinaryOperator::CreateXor(LHS, RHSC), Name);
+  }
 
   Value *CreateBinOp(Instruction::BinaryOps Opc,
                      Value *LHS, Value *RHS, const Twine &Name = "") {
diff --git a/include/llvm/Support/IRReader.h b/include/llvm/Support/IRReader.h
index 2a43c5fa906a..0dfc302e242a 100644
--- a/include/llvm/Support/IRReader.h
+++ b/include/llvm/Support/IRReader.h
@@ -38,8 +38,7 @@ namespace llvm {
       std::string ErrMsg;
       Module *M = getLazyBitcodeModule(Buffer, Context, &ErrMsg);
       if (M == 0) {
-        Err = SMDiagnostic(SMLoc(), Buffer->getBufferIdentifier(), -1, -1,
-                           ErrMsg, "");
+        Err = SMDiagnostic(Buffer->getBufferIdentifier(), ErrMsg);
         // ParseBitcodeFile does not take ownership of the Buffer in the
         // case of an error.
         delete Buffer;
@@ -60,8 +59,8 @@ namespace llvm {
     std::string ErrMsg;
     MemoryBuffer *F = MemoryBuffer::getFileOrSTDIN(Filename.c_str(), &ErrMsg);
     if (F == 0) {
-      Err = SMDiagnostic(SMLoc(), Filename, -1, -1,
-                         "Could not open input file '" + Filename + "'", "");
+      Err = SMDiagnostic(Filename, 
+                         "Could not open input file '" + Filename + "'");
       return 0;
     }
 
@@ -82,8 +81,7 @@ namespace llvm {
       // ParseBitcodeFile does not take ownership of the Buffer.
       delete Buffer;
       if (M == 0)
-        Err = SMDiagnostic(SMLoc(), Buffer->getBufferIdentifier(),
-                           -1, -1, ErrMsg, "");
+        Err = SMDiagnostic(Buffer->getBufferIdentifier(), ErrMsg);
       return M;
     }
 
@@ -99,8 +97,8 @@ namespace llvm {
     std::string ErrMsg;
     MemoryBuffer *F = MemoryBuffer::getFileOrSTDIN(Filename.c_str(), &ErrMsg);
     if (F == 0) {
-      Err = SMDiagnostic(SMLoc(), Filename, -1, -1,
-                         "Could not open input file '" + Filename + "'", "");
+      Err = SMDiagnostic(Filename, 
+                         "Could not open input file '" + Filename + "'");
       return 0;
     }
 
diff --git a/include/llvm/Support/RecyclingAllocator.h b/include/llvm/Support/RecyclingAllocator.h
index 49f77537188d..34ab874778c9 100644
--- a/include/llvm/Support/RecyclingAllocator.h
+++ b/include/llvm/Support/RecyclingAllocator.h
@@ -63,4 +63,11 @@ inline void *operator new(size_t,
   return Allocator.Allocate();
 }
 
+template<class AllocatorType, class T, size_t Size, size_t Align>
+inline void operator delete(void *E,
+                            llvm::RecyclingAllocator<AllocatorType,
+                                                     T, Size, Align> &A) {
+  A.Deallocate(E);
+}
+
 #endif
diff --git a/include/llvm/Support/SourceMgr.h b/include/llvm/Support/SourceMgr.h
index 3e66762ae349..9cd35d1f9311 100644
--- a/include/llvm/Support/SourceMgr.h
+++ b/include/llvm/Support/SourceMgr.h
@@ -148,6 +148,7 @@ private:
 /// SMDiagnostic - Instances of this class encapsulate one diagnostic report,
 /// allowing printing to a raw_ostream as a caret diagnostic.
 class SMDiagnostic {
+  const SourceMgr *SM;
   SMLoc Loc;
   std::string Filename;
   int LineNo, ColumnNo;
@@ -155,15 +156,25 @@ class SMDiagnostic {
   unsigned ShowLine : 1;
 
 public:
-  SMDiagnostic() : LineNo(0), ColumnNo(0), ShowLine(0) {}
-  SMDiagnostic(SMLoc L, const std::string &FN, int Line, int Col,
+  // Null diagnostic.
+  SMDiagnostic() : SM(0), LineNo(0), ColumnNo(0), ShowLine(0) {}
+  // Diagnostic with no location (e.g. file not found, command line arg error).
+  SMDiagnostic(const std::string &filename, const std::string &Msg,
+               bool showline = true)
+    : SM(0), Loc(), Filename(filename), LineNo(-1), ColumnNo(-1),
+      Message(Msg), LineContents(""), ShowLine(showline) {}
+  
+  // Diagnostic with a location.
+  SMDiagnostic(const SourceMgr &sm, SMLoc L, const std::string &FN,
+               int Line, int Col,
                const std::string &Msg, const std::string &LineStr,
                bool showline = true)
-    : Loc(L), Filename(FN), LineNo(Line), ColumnNo(Col), Message(Msg),
+    : SM(&sm), Loc(L), Filename(FN), LineNo(Line), ColumnNo(Col), Message(Msg),
       LineContents(LineStr), ShowLine(showline) {}
 
+  const SourceMgr *getSourceMgr() const { return SM; }
   SMLoc getLoc() const { return Loc; }
-  const std::string getFilename() { return Filename; }
+  const std::string &getFilename() { return Filename; }
   int getLineNo() const { return LineNo; }
   int getColumnNo() const { return ColumnNo; }
   const std::string &getMessage() const { return Message; }
diff --git a/include/llvm/Support/ValueHandle.h b/include/llvm/Support/ValueHandle.h
index 130a620ab2fa..c0cdc35e99bf 100644
--- a/include/llvm/Support/ValueHandle.h
+++ b/include/llvm/Support/ValueHandle.h
@@ -315,7 +315,7 @@ class TrackingVH : public ValueHandleBase {
 
 public:
   TrackingVH() : ValueHandleBase(Tracking) {}
-  TrackingVH(ValueTy *P) : ValueHandleBase(Tracking, P) {}
+  TrackingVH(ValueTy *P) : ValueHandleBase(Tracking, GetAsValue(P)) {}
   TrackingVH(const TrackingVH &RHS) : ValueHandleBase(Tracking, RHS) {}
 
   operator ValueTy*() const {
diff --git a/include/llvm/Support/raw_ostream.h b/include/llvm/Support/raw_ostream.h
index e0de80f1d435..90eaeea12dd8 100644
--- a/include/llvm/Support/raw_ostream.h
+++ b/include/llvm/Support/raw_ostream.h
@@ -96,7 +96,7 @@ public:
 
   /// clear_error - Set the flag read by has_error() to false. If the error
   /// flag is set at the time when this raw_ostream's destructor is called,
-  /// llvm_report_error is called to report the error. Use clear_error()
+  /// report_fatal_error is called to report the error. Use clear_error()
   /// after handling the error to avoid this behavior.
   void clear_error() {
     Error = false;
diff --git a/include/llvm/System/DataTypes.h.in b/include/llvm/System/DataTypes.h.in
index 1f8ce79f4eb2..6537f3010fae 100644
--- a/include/llvm/System/DataTypes.h.in
+++ b/include/llvm/System/DataTypes.h.in
@@ -15,7 +15,7 @@
 |*   [u]int(32|64)_t : typedefs for signed and unsigned 32/64 bit system types*|
 |*   [U]INT(8|16|32|64)_(MIN|MAX) : Constants for the min and max values.     *|
 |*                                                                            *|
-|* No library is required when using these functinons.                        *|
+|* No library is required when using these functions.                         *|
 |*                                                                            *|
 |*===----------------------------------------------------------------------===*/
 
diff --git a/include/llvm/Target/Target.td b/include/llvm/Target/Target.td
index 462c38f03d27..cc19e0de8eb4 100644
--- a/include/llvm/Target/Target.td
+++ b/include/llvm/Target/Target.td
@@ -473,13 +473,22 @@ def DBG_VALUE : Instruction {
   let Namespace = "TargetOpcode";
   let isAsCheapAsAMove = 1;
 }
+
+def REG_SEQUENCE : Instruction {
+  let OutOperandList = (outs unknown:$dst);
+  let InOperandList = (ins variable_ops);
+  let AsmString = "";
+  let Namespace = "TargetOpcode";
+  let neverHasSideEffects = 1;
+  let isAsCheapAsAMove = 1;
+}
 }
 
 //===----------------------------------------------------------------------===//
-// AsmParser - This class can be implemented by targets that wish to implement 
+// AsmParser - This class can be implemented by targets that wish to implement
 // .s file parsing.
 //
-// Subtargets can have multiple different assembly parsers (e.g. AT&T vs Intel 
+// Subtargets can have multiple different assembly parsers (e.g. AT&T vs Intel
 // syntax on X86 for example).
 //
 class AsmParser {
@@ -492,9 +501,13 @@ class AsmParser {
   // AsmParser class to call on every matched instruction. This can be used to
   // perform target specific instruction post-processing.
   string AsmParserInstCleanup  = "";
- 
+
+  // MatchInstructionName - The name of the instruction matching function to
+  // generate.
+  string MatchInstructionName  = "MatchInstruction";
+
   // Variant - AsmParsers can be of multiple different variants.  Variants are
-  // used to support targets that need to parser multiple formats for the 
+  // used to support targets that need to parser multiple formats for the
   // assembly language.
   int Variant = 0;
 
diff --git a/include/llvm/Target/TargetInstrInfo.h b/include/llvm/Target/TargetInstrInfo.h
index 4b26beb82cef..143dbcc6f5fe 100644
--- a/include/llvm/Target/TargetInstrInfo.h
+++ b/include/llvm/Target/TargetInstrInfo.h
@@ -23,6 +23,8 @@ class CalleeSavedInfo;
 class LiveVariables;
 class MCAsmInfo;
 class MachineMemOperand;
+class MDNode;
+class MCInst;
 class SDNode;
 class SelectionDAG;
 class TargetRegisterClass;
@@ -182,7 +184,7 @@ public:
   /// store to a stack slot, return true along with the FrameIndex of
   /// the loaded stack slot and the machine mem operand containing the
   /// reference.  If not, return false.  Unlike isStoreToStackSlot,
-  /// this returns true for any instructions that loads from the
+  /// this returns true for any instructions that stores to the
   /// stack.  This is just a hint, as some cases may be missed.
   virtual bool hasStoreToStackSlot(const MachineInstr *MI,
                                    const MachineMemOperand *&MMO,
@@ -361,6 +363,22 @@ public:
     return false;
   }
   
+  /// emitFrameIndexDebugValue - Emit a target-dependent form of
+  /// DBG_VALUE encoding the address of a frame index.  Addresses would
+  /// normally be lowered the same way as other addresses on the target,
+  /// e.g. in load instructions.  For targets that do not support this
+  /// the debug info is simply lost.
+  /// If you add this for a target you should handle this DBG_VALUE in the
+  /// target-specific AsmPrinter code as well; you will probably get invalid
+  /// assembly output if you don't.
+  virtual MachineInstr *emitFrameIndexDebugValue(MachineFunction &MF,
+                                                 int FrameIx,
+                                                 uint64_t Offset,
+                                                 const MDNode *MDPtr,
+                                                 DebugLoc dl) const {
+    return 0;
+  }
+
   /// foldMemoryOperand - Attempt to fold a load or store of the specified stack
   /// slot into the specified machine instruction for the specified operand(s).
   /// If this is possible, a new instruction is returned with the specified
@@ -473,6 +491,13 @@ public:
   virtual void insertNoop(MachineBasicBlock &MBB, 
                           MachineBasicBlock::iterator MI) const;
   
+  
+  /// getNoopForMachoTarget - Return the noop instruction to use for a noop.
+  virtual void getNoopForMachoTarget(MCInst &NopInst) const {
+    // Default to just using 'nop' string.
+  }
+  
+  
   /// isPredicated - Returns true if the instruction is already predicated.
   ///
   virtual bool isPredicated(const MachineInstr *MI) const {
diff --git a/include/llvm/Target/TargetInstrItineraries.h b/include/llvm/Target/TargetInstrItineraries.h
index 420fa94ce76b..3dfa8bc10bfe 100644
--- a/include/llvm/Target/TargetInstrItineraries.h
+++ b/include/llvm/Target/TargetInstrItineraries.h
@@ -47,10 +47,24 @@ namespace llvm {
 ///      indicate that the instruction requires multiple stages at the
 ///      same time.
 ///
+/// FU reservation can be of two different kinds:
+///  - FUs which instruction actually requires
+///  - FUs which instruction just reserves. Reserved unit is not available for
+///    execution of other instruction. However, several instructions can reserve
+///    the same unit several times.
+/// Such two types of units reservation is used to model instruction domain
+/// change stalls, FUs using the same resource (e.g. same register file), etc.
+
 struct InstrStage {
+  enum ReservationKinds {
+    Required = 0,
+    Reserved = 1
+  };
+
   unsigned Cycles_;  ///< Length of stage in machine cycles
   unsigned Units_;   ///< Choice of functional units
-  int NextCycles_;   ///< Number of machine cycles to next stage 
+  int NextCycles_;   ///< Number of machine cycles to next stage
+  ReservationKinds Kind_; ///< Kind of the FU reservation
 
   /// getCycles - returns the number of cycles the stage is occupied
   unsigned getCycles() const {
@@ -62,6 +76,10 @@ struct InstrStage {
     return Units_;
   }
 
+  ReservationKinds getReservationKind() const {
+    return Kind_;
+  }
+
   /// getNextCycles - returns the number of cycles from the start of
   /// this stage to the start of the next stage in the itinerary
   unsigned getNextCycles() const {
diff --git a/include/llvm/Target/TargetLowering.h b/include/llvm/Target/TargetLowering.h
index 1a94b4448ae4..4ea6c94f3b4a 100644
--- a/include/llvm/Target/TargetLowering.h
+++ b/include/llvm/Target/TargetLowering.h
@@ -104,12 +104,13 @@ public:
   };
 
   /// NOTE: The constructor takes ownership of TLOF.
-  explicit TargetLowering(TargetMachine &TM, TargetLoweringObjectFile *TLOF);
+  explicit TargetLowering(const TargetMachine &TM,
+                          const TargetLoweringObjectFile *TLOF);
   virtual ~TargetLowering();
 
-  TargetMachine &getTargetMachine() const { return TM; }
+  const TargetMachine &getTargetMachine() const { return TM; }
   const TargetData *getTargetData() const { return TD; }
-  TargetLoweringObjectFile &getObjFileLowering() const { return TLOF; }
+  const TargetLoweringObjectFile &getObjFileLowering() const { return TLOF; }
 
   bool isBigEndian() const { return !IsLittleEndian; }
   bool isLittleEndian() const { return IsLittleEndian; }
@@ -172,6 +173,13 @@ public:
     return VT.isSimple() && RegClassForVT[VT.getSimpleVT().SimpleTy] != 0;
   }
 
+  /// isTypeSynthesizable - Return true if it's OK for the compiler to create
+  /// new operations of this type.  All Legal types are synthesizable except
+  /// MMX vector types on X86.  Non-Legal types are not synthesizable.
+  bool isTypeSynthesizable(EVT VT) const {
+    return isTypeLegal(VT) && Synthesizable[VT.getSimpleVT().SimpleTy];
+  }
+
   class ValueTypeActionImpl {
     /// ValueTypeActions - This is a bitvector that contains two bits for each
     /// value type, where the two bits correspond to the LegalizeAction enum.
@@ -180,16 +188,8 @@ public:
     uint32_t ValueTypeActions[(MVT::MAX_ALLOWED_VALUETYPE/32)*2];
   public:
     ValueTypeActionImpl() {
-      ValueTypeActions[0] = ValueTypeActions[1] = 0;
-      ValueTypeActions[2] = ValueTypeActions[3] = 0;
+      std::fill(ValueTypeActions, array_endof(ValueTypeActions), 0);
     }
-    ValueTypeActionImpl(const ValueTypeActionImpl &RHS) {
-      ValueTypeActions[0] = RHS.ValueTypeActions[0];
-      ValueTypeActions[1] = RHS.ValueTypeActions[1];
-      ValueTypeActions[2] = RHS.ValueTypeActions[2];
-      ValueTypeActions[3] = RHS.ValueTypeActions[3];
-    }
-    
     LegalizeAction getTypeAction(LLVMContext &Context, EVT VT) const {
       if (VT.isExtended()) {
         if (VT.isVector()) {
@@ -317,7 +317,7 @@ public:
   };
 
   virtual bool getTgtMemIntrinsic(IntrinsicInfo &Info,
-                                  CallInst &I, unsigned Intrinsic) {
+                                  const CallInst &I, unsigned Intrinsic) const {
     return false;
   }
 
@@ -638,12 +638,14 @@ public:
   /// probably because the source does not need to be loaded. If
   /// 'NonScalarIntSafe' is true, that means it's safe to return a
   /// non-scalar-integer type, e.g. empty string source, constant, or loaded
-  /// from memory. It returns EVT::Other if SelectionDAG should be responsible
-  /// for determining it.
+  /// from memory. 'MemcpyStrSrc' indicates whether the memcpy source is
+  /// constant so it does not need to be loaded.
+  /// It returns EVT::Other if the type should be determined using generic
+  /// target-independent logic.
   virtual EVT getOptimalMemOpType(uint64_t Size,
                                   unsigned DstAlign, unsigned SrcAlign,
-                                  bool NonScalarIntSafe,
-                                  SelectionDAG &DAG) const {
+                                  bool NonScalarIntSafe, bool MemcpyStrSrc,
+                                  MachineFunction &MF) const {
     return MVT::Other;
   }
   
@@ -773,12 +775,19 @@ public:
   /// that want to combine 
   struct TargetLoweringOpt {
     SelectionDAG &DAG;
+    bool LegalTys;
+    bool LegalOps;
     bool ShrinkOps;
     SDValue Old;
     SDValue New;
 
-    explicit TargetLoweringOpt(SelectionDAG &InDAG, bool Shrink = false) :
-      DAG(InDAG), ShrinkOps(Shrink) {}
+    explicit TargetLoweringOpt(SelectionDAG &InDAG,
+                               bool LT, bool LO,
+                               bool Shrink = false) :
+      DAG(InDAG), LegalTys(LT), LegalOps(LO), ShrinkOps(Shrink) {}
+
+    bool LegalTypes() const { return LegalTys; }
+    bool LegalOperations() const { return LegalOps; }
     
     bool CombineTo(SDValue O, SDValue N) { 
       Old = O; 
@@ -862,7 +871,7 @@ public:
   /// isGAPlusOffset - Returns true (and the GlobalValue and the offset) if the
   /// node is a GlobalAddress + offset.
   virtual bool
-  isGAPlusOffset(SDNode *N, GlobalValue* &GA, int64_t &Offset) const;
+  isGAPlusOffset(SDNode *N, const GlobalValue* &GA, int64_t &Offset) const;
 
   /// PerformDAGCombine - This method will be invoked for all target nodes and
   /// for any target-independent nodes that the target has registered with
@@ -878,6 +887,22 @@ public:
   /// more complex transformations.
   ///
   virtual SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const;
+
+  /// isTypeDesirableForOp - Return true if the target has native support for
+  /// the specified value type and it is 'desirable' to use the type for the
+  /// given node type. e.g. On x86 i16 is legal, but undesirable since i16
+  /// instruction encodings are longer and some i16 instructions are slow.
+  virtual bool isTypeDesirableForOp(unsigned Opc, EVT VT) const {
+    // By default, assume all legal types are desirable.
+    return isTypeLegal(VT);
+  }
+
+  /// IsDesirableToPromoteOp - This method query the target whether it is
+  /// beneficial for dag combiner to promote the specified node. If true, it
+  /// should return the desired promotion type by reference.
+  virtual bool IsDesirableToPromoteOp(SDValue Op, EVT &PVT) const {
+    return false;
+  }
   
   //===--------------------------------------------------------------------===//
   // TargetLowering Configuration Methods - These methods should be invoked by
@@ -950,10 +975,12 @@ protected:
   /// addRegisterClass - Add the specified register class as an available
   /// regclass for the specified value type.  This indicates the selector can
   /// handle values of that class natively.
-  void addRegisterClass(EVT VT, TargetRegisterClass *RC) {
+  void addRegisterClass(EVT VT, TargetRegisterClass *RC,
+                        bool isSynthesizable = true) {
     assert((unsigned)VT.getSimpleVT().SimpleTy < array_lengthof(RegClassForVT));
     AvailableRegClasses.push_back(std::make_pair(VT, RC));
     RegClassForVT[VT.getSimpleVT().SimpleTy] = RC;
+    Synthesizable[VT.getSimpleVT().SimpleTy] = isSynthesizable;
   }
 
   /// computeRegisterProperties - Once all of the register classes are added,
@@ -1077,7 +1104,7 @@ protected:
   
 public:
 
-  virtual const TargetSubtarget *getSubtarget() {
+  virtual const TargetSubtarget *getSubtarget() const {
     assert(0 && "Not Implemented");
     return NULL;    // this is here to silence compiler errors
   }
@@ -1098,7 +1125,7 @@ public:
                          CallingConv::ID CallConv, bool isVarArg,
                          const SmallVectorImpl<ISD::InputArg> &Ins,
                          DebugLoc dl, SelectionDAG &DAG,
-                         SmallVectorImpl<SDValue> &InVals) {
+                         SmallVectorImpl<SDValue> &InVals) const {
     assert(0 && "Not Implemented");
     return SDValue();    // this is here to silence compiler errors
   }
@@ -1128,7 +1155,7 @@ public:
               bool isVarArg, bool isInreg, unsigned NumFixedArgs,
               CallingConv::ID CallConv, bool isTailCall,
               bool isReturnValueUsed, SDValue Callee, ArgListTy &Args,
-              SelectionDAG &DAG, DebugLoc dl);
+              SelectionDAG &DAG, DebugLoc dl) const;
 
   /// LowerCall - This hook must be implemented to lower calls into the
   /// the specified DAG. The outgoing arguments to the call are described
@@ -1142,7 +1169,7 @@ public:
               const SmallVectorImpl<ISD::OutputArg> &Outs,
               const SmallVectorImpl<ISD::InputArg> &Ins,
               DebugLoc dl, SelectionDAG &DAG,
-              SmallVectorImpl<SDValue> &InVals) {
+              SmallVectorImpl<SDValue> &InVals) const {
     assert(0 && "Not Implemented");
     return SDValue();    // this is here to silence compiler errors
   }
@@ -1154,11 +1181,12 @@ public:
   virtual bool CanLowerReturn(CallingConv::ID CallConv, bool isVarArg,
                const SmallVectorImpl<EVT> &OutTys,
                const SmallVectorImpl<ISD::ArgFlagsTy> &ArgsFlags,
-               SelectionDAG &DAG)
+               SelectionDAG &DAG) const
   {
     // Return true by default to get preexisting behavior.
     return true;
   }
+
   /// LowerReturn - This hook must be implemented to lower outgoing
   /// return values, described by the Outs array, into the specified
   /// DAG. The implementation should return the resulting token chain
@@ -1167,7 +1195,7 @@ public:
   virtual SDValue
     LowerReturn(SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
                 const SmallVectorImpl<ISD::OutputArg> &Outs,
-                DebugLoc dl, SelectionDAG &DAG) {
+                DebugLoc dl, SelectionDAG &DAG) const {
     assert(0 && "Not Implemented");
     return SDValue();    // this is here to silence compiler errors
   }
@@ -1192,7 +1220,7 @@ public:
                           SDValue Op3, unsigned Align, bool isVolatile,
                           bool AlwaysInline,
                           const Value *DstSV, uint64_t DstOff,
-                          const Value *SrcSV, uint64_t SrcOff) {
+                          const Value *SrcSV, uint64_t SrcOff) const {
     return SDValue();
   }
 
@@ -1208,7 +1236,7 @@ public:
                            SDValue Op1, SDValue Op2,
                            SDValue Op3, unsigned Align, bool isVolatile,
                            const Value *DstSV, uint64_t DstOff,
-                           const Value *SrcSV, uint64_t SrcOff) {
+                           const Value *SrcSV, uint64_t SrcOff) const {
     return SDValue();
   }
 
@@ -1223,7 +1251,7 @@ public:
                           SDValue Chain,
                           SDValue Op1, SDValue Op2,
                           SDValue Op3, unsigned Align, bool isVolatile,
-                          const Value *DstSV, uint64_t DstOff) {
+                          const Value *DstSV, uint64_t DstOff) const {
     return SDValue();
   }
 
@@ -1241,14 +1269,14 @@ public:
   /// The default implementation calls LowerOperation.
   virtual void LowerOperationWrapper(SDNode *N,
                                      SmallVectorImpl<SDValue> &Results,
-                                     SelectionDAG &DAG);
+                                     SelectionDAG &DAG) const;
 
   /// LowerOperation - This callback is invoked for operations that are 
   /// unsupported by the target, which are registered to use 'custom' lowering,
   /// and whose defined values are all legal.
   /// If the target has no operations that require custom lowering, it need not
   /// implement this.  The default implementation of this aborts.
-  virtual SDValue LowerOperation(SDValue Op, SelectionDAG &DAG);
+  virtual SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const;
 
   /// ReplaceNodeResults - This callback is invoked when a node result type is
   /// illegal for the target, and the operation was registered to use 'custom'
@@ -1260,7 +1288,7 @@ public:
   /// If the target has no operations that require custom lowering, it need not
   /// implement this.  The default implementation aborts.
   virtual void ReplaceNodeResults(SDNode *N, SmallVectorImpl<SDValue> &Results,
-                                  SelectionDAG &DAG) {
+                                  SelectionDAG &DAG) const {
     assert(0 && "ReplaceNodeResults not implemented for this target!");
   }
 
@@ -1274,11 +1302,12 @@ public:
   createFastISel(MachineFunction &,
                  DenseMap<const Value *, unsigned> &,
                  DenseMap<const BasicBlock *, MachineBasicBlock *> &,
-                 DenseMap<const AllocaInst *, int> &
+                 DenseMap<const AllocaInst *, int> &,
+                 std::vector<std::pair<MachineInstr*, unsigned> > &
 #ifndef NDEBUG
-                 , SmallSet<Instruction*, 8> &CatchInfoLost
+                 , SmallSet<const Instruction *, 8> &CatchInfoLost
 #endif
-                 ) {
+                 ) const {
     return 0;
   }
 
@@ -1398,12 +1427,8 @@ public:
   // insert.  The specified MachineInstr is created but not inserted into any
   // basic blocks, and this method is called to expand it into a sequence of
   // instructions, potentially also creating new basic blocks and control flow.
-  // When new basic blocks are inserted and the edges from MBB to its successors
-  // are modified, the method should insert pairs of <OldSucc, NewSucc> into the
-  // DenseMap.
-  virtual MachineBasicBlock *EmitInstrWithCustomInserter(MachineInstr *MI,
-                                                         MachineBasicBlock *MBB,
-                    DenseMap<MachineBasicBlock*, MachineBasicBlock*> *EM) const;
+  virtual MachineBasicBlock *
+    EmitInstrWithCustomInserter(MachineInstr *MI, MachineBasicBlock *MBB) const;
 
   //===--------------------------------------------------------------------===//
   // Addressing mode description hooks (used by LSR etc).
@@ -1524,9 +1549,9 @@ public:
   }
 
 private:
-  TargetMachine &TM;
+  const TargetMachine &TM;
   const TargetData *TD;
-  TargetLoweringObjectFile &TLOF;
+  const TargetLoweringObjectFile &TLOF;
 
   /// PointerTy - The type to use for pointers, usually i32 or i64.
   ///
@@ -1611,6 +1636,11 @@ private:
   unsigned char NumRegistersForVT[MVT::LAST_VALUETYPE];
   EVT RegisterTypeForVT[MVT::LAST_VALUETYPE];
 
+  /// Synthesizable indicates whether it is OK for the compiler to create new
+  /// operations using this type.  All Legal types are Synthesizable except
+  /// MMX types on X86.  Non-Legal types are not Synthesizable.
+  bool Synthesizable[MVT::LAST_VALUETYPE];
+
   /// TransformToType - For any value types we are promoting or expanding, this
   /// contains the value type that we are changing to.  For Expanded types, this
   /// contains one step of the expand (e.g. i64 -> i32), even if there are
diff --git a/include/llvm/Target/TargetMachine.h b/include/llvm/Target/TargetMachine.h
index d1d665f870ff..c734cf4ee062 100644
--- a/include/llvm/Target/TargetMachine.h
+++ b/include/llvm/Target/TargetMachine.h
@@ -28,6 +28,7 @@ class TargetInstrInfo;
 class TargetIntrinsicInfo;
 class TargetJITInfo;
 class TargetLowering;
+class TargetSelectionDAGInfo;
 class TargetFrameInfo;
 class JITCodeEmitter;
 class MCContext;
@@ -105,7 +106,8 @@ public:
   //
   virtual const TargetInstrInfo        *getInstrInfo() const { return 0; }
   virtual const TargetFrameInfo        *getFrameInfo() const { return 0; }
-  virtual       TargetLowering    *getTargetLowering() const { return 0; }
+  virtual const TargetLowering    *getTargetLowering() const { return 0; }
+  virtual const TargetSelectionDAGInfo *getSelectionDAGInfo() const{ return 0; }
   virtual const TargetData            *getTargetData() const { return 0; }
   
   /// getMCAsmInfo - Return target specific asm information.
@@ -171,6 +173,21 @@ public:
   /// is false.
   static void setAsmVerbosityDefault(bool);
 
+  /// getDataSections - Return true if data objects should be emitted into their
+  /// own section, corresponds to -fdata-sections.
+  static bool getDataSections();
+
+  /// getFunctionSections - Return true if functions should be emitted into
+  /// their own section, corresponding to -ffunction-sections.
+  static bool getFunctionSections();
+
+  /// setDataSections - Set if the data are emit into separate sections.
+  static void setDataSections(bool);
+
+  /// setFunctionSections - Set if the functions are emit into separate
+  /// sections.
+  static void setFunctionSections(bool);
+
   /// CodeGenFileType - These enums are meant to be passed into
   /// addPassesToEmitFile to indicate what type of file to emit, and returned by
   /// it to indicate what type of file could actually be made.
diff --git a/include/llvm/Target/TargetOpcodes.h b/include/llvm/Target/TargetOpcodes.h
index 48665b7431f2..c4deaa8fbc19 100644
--- a/include/llvm/Target/TargetOpcodes.h
+++ b/include/llvm/Target/TargetOpcodes.h
@@ -62,9 +62,17 @@ namespace TargetOpcode {
     /// instructions are insufficient. The actual MachineInstrs to perform
     /// the copy are emitted with the TargetInstrInfo::copyRegToReg hook.
     COPY_TO_REGCLASS = 10,
-    
+
     /// DBG_VALUE - a mapping of the llvm.dbg.value intrinsic
-    DBG_VALUE = 11
+    DBG_VALUE = 11,
+
+    /// REG_SEQUENCE - This variadic instruction is used to form a register that
+    /// represent a consecutive sequence of sub-registers. It's used as register
+    /// coalescing / allocation aid and must be eliminated before code emission.
+    /// e.g. v1027 = REG_SEQUENCE v1024, 3, v1025, 4, v1026, 5
+    /// After register coalescing references of v1024 should be replace with
+    /// v1027:3, v1025 with v1027:4, etc.
+    REG_SEQUENCE = 12
   };
 } // end namespace TargetOpcode
 } // end namespace llvm
diff --git a/include/llvm/Target/TargetOptions.h b/include/llvm/Target/TargetOptions.h
index a01a67f14dae..a316c701b542 100644
--- a/include/llvm/Target/TargetOptions.h
+++ b/include/llvm/Target/TargetOptions.h
@@ -16,6 +16,8 @@
 #define LLVM_TARGET_TARGETOPTIONS_H
 
 namespace llvm {
+  class MachineFunction;
+
   // Possible float ABI settings. Used with FloatABIType in TargetOptions.h.
   namespace FloatABI {
     enum ABIType {
@@ -35,6 +37,16 @@ namespace llvm {
   /// elimination optimization, this option should disable it.
   extern bool NoFramePointerElim;
 
+  /// NoFramePointerElimNonLeaf - This flag is enabled when the
+  /// -disable-non-leaf-fp-elim is specified on the command line. If the target
+  /// supports the frame pointer elimination optimization, this option should
+  /// disable it for non-leaf functions.
+  extern bool NoFramePointerElimNonLeaf;
+
+  /// DisableFramePointerElim - This returns true if frame pointer elimination
+  /// optimization should be disabled for the given machine function.
+  extern bool DisableFramePointerElim(const MachineFunction &MF);
+
   /// LessPreciseFPMAD - This flag is enabled when the
   /// -enable-fp-mad is specified on the command line.  When this flag is off
   /// (the default), the code generator is not allowed to generate mad
@@ -95,13 +107,9 @@ namespace llvm {
   /// crt*.o compiling).
   extern bool NoZerosInBSS;
 
-  /// DwarfExceptionHandling - This flag indicates that Dwarf exception
-  /// information should be emitted.
-  extern bool DwarfExceptionHandling;
-
-  /// SjLjExceptionHandling - This flag indicates that SJLJ exception
-  /// information should be emitted.
-  extern bool SjLjExceptionHandling;
+  /// JITExceptionHandling - This flag indicates that the JIT should emit
+  /// exception handling information.
+  extern bool JITExceptionHandling;
 
   /// JITEmitDebugInfo - This flag indicates that the JIT should try to emit
   /// debug information and notify a debugger about it.
diff --git a/include/llvm/Target/TargetRegisterInfo.h b/include/llvm/Target/TargetRegisterInfo.h
index c19744588496..29b862aa02fe 100644
--- a/include/llvm/Target/TargetRegisterInfo.h
+++ b/include/llvm/Target/TargetRegisterInfo.h
@@ -109,7 +109,7 @@ public:
   }
 
   /// contains - Return true if the specified register is included in this
-  /// register class.
+  /// register class.  This does not include virtual registers.
   bool contains(unsigned Reg) const {
     return RegSet.count(Reg);
   }
diff --git a/include/llvm/Target/TargetSchedule.td b/include/llvm/Target/TargetSchedule.td
index dcc09921d994..96c83674cb03 100644
--- a/include/llvm/Target/TargetSchedule.td
+++ b/include/llvm/Target/TargetSchedule.td
@@ -22,6 +22,13 @@
 //  
 class FuncUnit;
 
+class ReservationKind<bits<1> val> {
+  int Value = val;
+}
+
+def Required : ReservationKind<0>;
+def Reserved : ReservationKind<1>;
+
 //===----------------------------------------------------------------------===//
 // Instruction stage - These values represent a non-pipelined step in
 // the execution of an instruction.  Cycles represents the number of
@@ -36,10 +43,14 @@ class FuncUnit;
 //   InstrStage<1, [FU_x, FU_y]>     - TimeInc defaults to Cycles
 //   InstrStage<1, [FU_x, FU_y], 0>  - TimeInc explicit
 //
-class InstrStage<int cycles, list<FuncUnit> units, int timeinc = -1> {
+
+class InstrStage<int cycles, list<FuncUnit> units,
+                 int timeinc = -1,
+                 ReservationKind kind = Required> {
   int Cycles          = cycles;       // length of stage in machine cycles
   list<FuncUnit> Units = units;       // choice of functional units
   int TimeInc         = timeinc;      // cycles till start of next stage
+  int Kind            = kind.Value;   // kind of FU reservation
 }
 
 //===----------------------------------------------------------------------===//
@@ -73,11 +84,12 @@ class InstrItinData<InstrItinClass Class, list<InstrStage> stages,
 // Processor itineraries - These values represent the set of all itinerary
 // classes for a given chip set.
 //
-class ProcessorItineraries<list<InstrItinData> iid> {
+class ProcessorItineraries<list<FuncUnit> fu, list<InstrItinData> iid> {
+  list<FuncUnit> FU = fu;
   list<InstrItinData> IID = iid;
 }
 
 // NoItineraries - A marker that can be used by processors without schedule
 // info.
-def NoItineraries : ProcessorItineraries<[]>;
+def NoItineraries : ProcessorItineraries<[], []>;
 
diff --git a/include/llvm/Target/TargetSelectionDAGInfo.h b/include/llvm/Target/TargetSelectionDAGInfo.h
new file mode 100644
index 000000000000..943bdea797bf
--- /dev/null
+++ b/include/llvm/Target/TargetSelectionDAGInfo.h
@@ -0,0 +1,36 @@
+//==-- llvm/Target/TargetSelectionDAGInfo.h - SelectionDAG Info --*- C++ -*-==//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file declares the TargetSelectionDAGInfo class, which targets can
+// subclass to parameterize the SelectionDAG lowering and instruction
+// selection process.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_TARGET_TARGETSELECTIONDAGINFO_H
+#define LLVM_TARGET_TARGETSELECTIONDAGINFO_H
+
+namespace llvm {
+
+//===----------------------------------------------------------------------===//
+/// TargetSelectionDAGLowering - Targets can subclass this to parameterize the
+/// SelectionDAG lowering and instruction selection process.
+///
+class TargetSelectionDAGInfo {
+  TargetSelectionDAGInfo(const TargetSelectionDAGInfo &); // DO NOT IMPLEMENT
+  void operator=(const TargetSelectionDAGInfo &);         // DO NOT IMPLEMENT
+
+public:
+  TargetSelectionDAGInfo();
+  virtual ~TargetSelectionDAGInfo();
+};
+
+} // end llvm namespace
+
+#endif
diff --git a/include/llvm/Transforms/IPO/InlinerPass.h b/include/llvm/Transforms/IPO/InlinerPass.h
index c5be59a8cd7f..6af7ed7bdbfc 100644
--- a/include/llvm/Transforms/IPO/InlinerPass.h
+++ b/include/llvm/Transforms/IPO/InlinerPass.h
@@ -40,7 +40,7 @@ struct Inliner : public CallGraphSCCPass {
 
   // Main run interface method, this implements the interface required by the
   // Pass class.
-  virtual bool runOnSCC(std::vector<CallGraphNode *> &SCC);
+  virtual bool runOnSCC(CallGraphSCC &SCC);
 
   // doFinalization - Remove now-dead linkonce functions at the end of
   // processing to avoid breaking the SCC traversal.
@@ -77,7 +77,7 @@ struct Inliner : public CallGraphSCCPass {
 
   /// growCachedCostInfo - update the cached cost info for Caller after Callee
   /// has been inlined.
-  virtual void growCachedCostInfo(Function* Caller, Function* Callee) = 0;
+  virtual void growCachedCostInfo(Function *Caller, Function *Callee) = 0;
 
   /// removeDeadFunctions - Remove dead functions that are not included in
   /// DNR (Do Not Remove) list.
diff --git a/include/llvm/Transforms/Scalar.h b/include/llvm/Transforms/Scalar.h
index 6893badfc239..a8c9c6a084df 100644
--- a/include/llvm/Transforms/Scalar.h
+++ b/include/llvm/Transforms/Scalar.h
@@ -241,6 +241,8 @@ extern const PassInfo *const LowerSwitchID;
 // lowering pass.
 //
 FunctionPass *createLowerInvokePass(const TargetLowering *TLI = 0);
+FunctionPass *createLowerInvokePass(const TargetLowering *TLI,
+                                    bool useExpensiveEHSupport);
 extern const PassInfo *const LowerInvokePassID;
 
 //===----------------------------------------------------------------------===//
@@ -326,12 +328,6 @@ FunctionPass *createGEPSplitterPass();
 
 //===----------------------------------------------------------------------===//
 //
-// SCCVN - Aggressively eliminate redundant scalar values
-//
-FunctionPass *createSCCVNPass();
-
-//===----------------------------------------------------------------------===//
-//
 // ABCD - Elimination of Array Bounds Checks on Demand
 //
 FunctionPass *createABCDPass();
diff --git a/include/llvm/Transforms/Utils/BuildLibCalls.h b/include/llvm/Transforms/Utils/BuildLibCalls.h
index 8e76f50bb21e..6df3469ec064 100644
--- a/include/llvm/Transforms/Utils/BuildLibCalls.h
+++ b/include/llvm/Transforms/Utils/BuildLibCalls.h
@@ -42,7 +42,7 @@ namespace llvm {
   /// EmitStrNCpy - Emit a call to the strncpy function to the builder, for the
   /// specified pointer arguments and length.
   Value *EmitStrNCpy(Value *Dst, Value *Src, Value *Len, IRBuilder<> &B,
-                    const TargetData *TD);
+                    const TargetData *TD, StringRef Name = "strncpy");
   
   /// EmitMemCpy - Emit a call to the memcpy function to the builder.  This
   /// always expects that the size has type 'intptr_t' and Dst/Src are pointers.
diff --git a/include/llvm/Transforms/Utils/Cloning.h b/include/llvm/Transforms/Utils/Cloning.h
index 5f494fb50d39..22bdc99ac18d 100644
--- a/include/llvm/Transforms/Utils/Cloning.h
+++ b/include/llvm/Transforms/Utils/Cloning.h
@@ -19,7 +19,9 @@
 #define LLVM_TRANSFORMS_UTILS_CLONING_H
 
 #include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/Twine.h"
+#include "llvm/Support/ValueHandle.h"
 
 namespace llvm {
 
@@ -40,7 +42,6 @@ class TargetData;
 class Loop;
 class LoopInfo;
 class AllocaInst;
-template <typename T> class SmallVectorImpl;
 
 /// CloneModule - Return an exact copy of the specified module
 ///
@@ -158,6 +159,33 @@ void CloneAndPruneFunctionInto(Function *NewFunc, const Function *OldFunc,
                                const TargetData *TD = 0,
                                Instruction *TheCall = 0);
 
+  
+/// InlineFunctionInfo - This class captures the data input to the
+/// InlineFunction call, and records the auxiliary results produced by it. 
+class InlineFunctionInfo {
+public:
+  explicit InlineFunctionInfo(CallGraph *cg = 0, const TargetData *td = 0)
+    : CG(cg), TD(td) {}
+  
+  /// CG - If non-null, InlineFunction will update the callgraph to reflect the
+  /// changes it makes.
+  CallGraph *CG;
+  const TargetData *TD;
+
+  /// StaticAllocas - InlineFunction fills this in with all static allocas that
+  /// get copied into the caller.
+  SmallVector<AllocaInst*, 4> StaticAllocas;
+
+  /// InlinedCalls - InlineFunction fills this in with callsites that were
+  /// inlined from the callee.  This is only filled in if CG is non-null.
+  SmallVector<WeakVH, 8> InlinedCalls;
+  
+  void reset() {
+    StaticAllocas.clear();
+    InlinedCalls.clear();
+  }
+};
+  
 /// InlineFunction - This function inlines the called function into the basic
 /// block of the caller.  This returns false if it is not possible to inline
 /// this call.  The program is still in a well defined state if this occurs
@@ -168,18 +196,9 @@ void CloneAndPruneFunctionInto(Function *NewFunc, const Function *OldFunc,
 /// exists in the instruction stream.  Similiarly this will inline a recursive
 /// function by one level.
 ///
-/// If a non-null callgraph pointer is provided, these functions update the
-/// CallGraph to represent the program after inlining.
-///
-/// If StaticAllocas is non-null, InlineFunction populates it with all of the
-/// static allocas that it inlines into the caller.
-///
-bool InlineFunction(CallInst *C, CallGraph *CG = 0, const TargetData *TD = 0,
-                    SmallVectorImpl<AllocaInst*> *StaticAllocas = 0);
-bool InlineFunction(InvokeInst *II, CallGraph *CG = 0, const TargetData *TD = 0,
-                    SmallVectorImpl<AllocaInst*> *StaticAllocas = 0);
-bool InlineFunction(CallSite CS, CallGraph *CG = 0, const TargetData *TD = 0,
-                    SmallVectorImpl<AllocaInst*> *StaticAllocas = 0);
+bool InlineFunction(CallInst *C, InlineFunctionInfo &IFI);
+bool InlineFunction(InvokeInst *II, InlineFunctionInfo &IFI);
+bool InlineFunction(CallSite CS, InlineFunctionInfo &IFI);
 
 } // End llvm namespace
 
diff --git a/include/llvm/Transforms/Utils/SSAUpdater.h b/include/llvm/Transforms/Utils/SSAUpdater.h
index 927e156abfb5..5b77ed660fcb 100644
--- a/include/llvm/Transforms/Utils/SSAUpdater.h
+++ b/include/llvm/Transforms/Utils/SSAUpdater.h
@@ -21,28 +21,31 @@ namespace llvm {
   class PHINode;
   template<typename T>
   class SmallVectorImpl;
+  class BumpPtrAllocator;
 
 /// SSAUpdater - This class updates SSA form for a set of values defined in
 /// multiple blocks.  This is used when code duplication or another unstructured
 /// transformation wants to rewrite a set of uses of one value with uses of a
 /// set of values.
 class SSAUpdater {
+public:
+  class BBInfo;
+  typedef SmallVectorImpl<BBInfo*> BlockListTy;
+
+private:
   /// AvailableVals - This keeps track of which value to use on a per-block
-  /// basis.  When we insert PHI nodes, we keep track of them here.  We use
-  /// TrackingVH's for the value of the map because we RAUW PHI nodes when we
-  /// eliminate them, and want the TrackingVH's to track this.
-  //typedef DenseMap<BasicBlock*, TrackingVH<Value> > AvailableValsTy;
+  /// basis.  When we insert PHI nodes, we keep track of them here.
+  //typedef DenseMap<BasicBlock*, Value*> AvailableValsTy;
   void *AV;
 
   /// PrototypeValue is an arbitrary representative value, which we derive names
   /// and a type for PHI nodes.
   Value *PrototypeValue;
 
-  /// IncomingPredInfo - We use this as scratch space when doing our recursive
-  /// walk.  This should only be used in GetValueInBlockInternal, normally it
-  /// should be empty.
-  //std::vector<std::pair<BasicBlock*, TrackingVH<Value> > > IncomingPredInfo;
-  void *IPI;
+  /// BBMap - The GetValueAtEndOfBlock method maintains this mapping from
+  /// basic blocks to BBInfo structures.
+  /// typedef DenseMap<BasicBlock*, BBInfo*> BBMapTy;
+  void *BM;
 
   /// InsertedPHIs - If this is non-null, the SSAUpdater adds all PHI nodes that
   /// it creates to the vector.
@@ -99,6 +102,15 @@ public:
 
 private:
   Value *GetValueAtEndOfBlockInternal(BasicBlock *BB);
+  void BuildBlockList(BasicBlock *BB, BlockListTy *BlockList,
+                      BumpPtrAllocator *Allocator);
+  void FindDominators(BlockListTy *BlockList);
+  void FindPHIPlacement(BlockListTy *BlockList);
+  void FindAvailableVals(BlockListTy *BlockList);
+  void FindExistingPHI(BasicBlock *BB, BlockListTy *BlockList);
+  bool CheckIfPHIMatches(PHINode *PHI);
+  void RecordMatchingPHI(PHINode *PHI);
+
   void operator=(const SSAUpdater&); // DO NOT IMPLEMENT
   SSAUpdater(const SSAUpdater&);     // DO NOT IMPLEMENT
 };
diff --git a/include/llvm/Type.h b/include/llvm/Type.h
index df3a16c59261..52229acb3238 100644
--- a/include/llvm/Type.h
+++ b/include/llvm/Type.h
@@ -406,6 +406,7 @@ public:
   static const Type *getX86_FP80Ty(LLVMContext &C);
   static const Type *getFP128Ty(LLVMContext &C);
   static const Type *getPPC_FP128Ty(LLVMContext &C);
+  static const IntegerType *getIntNTy(LLVMContext &C, unsigned N);
   static const IntegerType *getInt1Ty(LLVMContext &C);
   static const IntegerType *getInt8Ty(LLVMContext &C);
   static const IntegerType *getInt16Ty(LLVMContext &C);
@@ -421,6 +422,8 @@ public:
   static const PointerType *getX86_FP80PtrTy(LLVMContext &C, unsigned AS = 0);
   static const PointerType *getFP128PtrTy(LLVMContext &C, unsigned AS = 0);
   static const PointerType *getPPC_FP128PtrTy(LLVMContext &C, unsigned AS = 0);
+  static const PointerType *getIntNPtrTy(LLVMContext &C, unsigned N,
+                                         unsigned AS = 0);
   static const PointerType *getInt1PtrTy(LLVMContext &C, unsigned AS = 0);
   static const PointerType *getInt8PtrTy(LLVMContext &C, unsigned AS = 0);
   static const PointerType *getInt16PtrTy(LLVMContext &C, unsigned AS = 0);
diff --git a/lib/Analysis/AliasAnalysisEvaluator.cpp b/lib/Analysis/AliasAnalysisEvaluator.cpp
index 308b9e3f640d..bfa3ff1f9e6c 100644
--- a/lib/Analysis/AliasAnalysisEvaluator.cpp
+++ b/lib/Analysis/AliasAnalysisEvaluator.cpp
@@ -108,6 +108,11 @@ PrintModRefResults(const char *Msg, bool P, Instruction *I, Value *Ptr,
   }
 }
 
+static inline bool isInterestingPointer(Value *V) {
+  return V->getType()->isPointerTy()
+      && !isa<ConstantPointerNull>(V);
+}
+
 bool AAEval::runOnFunction(Function &F) {
   AliasAnalysis &AA = getAnalysis<AliasAnalysis>();
 
@@ -115,21 +120,31 @@ bool AAEval::runOnFunction(Function &F) {
   SetVector<CallSite> CallSites;
 
   for (Function::arg_iterator I = F.arg_begin(), E = F.arg_end(); I != E; ++I)
-    if (I->getType()->isPointerTy())    // Add all pointer arguments
+    if (I->getType()->isPointerTy())    // Add all pointer arguments.
       Pointers.insert(I);
 
   for (inst_iterator I = inst_begin(F), E = inst_end(F); I != E; ++I) {
-    if (I->getType()->isPointerTy()) // Add all pointer instructions
+    if (I->getType()->isPointerTy()) // Add all pointer instructions.
       Pointers.insert(&*I);
     Instruction &Inst = *I;
-    User::op_iterator OI = Inst.op_begin();
     CallSite CS = CallSite::get(&Inst);
-    if (CS.getInstruction() &&
-        isa<Function>(CS.getCalledValue()))
-      ++OI;  // Skip actual functions for direct function calls.
-    for (; OI != Inst.op_end(); ++OI)
-      if ((*OI)->getType()->isPointerTy() && !isa<ConstantPointerNull>(*OI))
-        Pointers.insert(*OI);
+    if (CS) {
+      Value *Callee = CS.getCalledValue();
+      // Skip actual functions for direct function calls.
+      if (!isa<Function>(Callee) && isInterestingPointer(Callee))
+        Pointers.insert(Callee);
+      // Consider formals.
+      for (CallSite::arg_iterator AI = CS.arg_begin(), AE = CS.arg_end();
+           AI != AE; ++AI)
+        if (isInterestingPointer(*AI))
+          Pointers.insert(*AI);
+    } else {
+      // Consider all operands.
+      for (Instruction::op_iterator OI = Inst.op_begin(), OE = Inst.op_end();
+           OI != OE; ++OI)
+        if (isInterestingPointer(*OI))
+          Pointers.insert(*OI);
+    }
 
     if (CS.getInstruction()) CallSites.insert(CS);
   }
diff --git a/lib/Analysis/BasicAliasAnalysis.cpp b/lib/Analysis/BasicAliasAnalysis.cpp
index 31a649d5cc48..cfe7a1c0ca2d 100644
--- a/lib/Analysis/BasicAliasAnalysis.cpp
+++ b/lib/Analysis/BasicAliasAnalysis.cpp
@@ -655,6 +655,11 @@ BasicAliasAnalysis::aliasPHI(const PHINode *PN, unsigned PNSize,
 AliasAnalysis::AliasResult
 BasicAliasAnalysis::aliasCheck(const Value *V1, unsigned V1Size,
                                const Value *V2, unsigned V2Size) {
+  // If either of the memory references is empty, it doesn't matter what the
+  // pointer values are.
+  if (V1Size == 0 || V2Size == 0)
+    return NoAlias;
+
   // Strip off any casts if they exist.
   V1 = V1->stripPointerCasts();
   V2 = V2->stripPointerCasts();
diff --git a/lib/Analysis/CMakeLists.txt b/lib/Analysis/CMakeLists.txt
index 17c9b86c1c49..ad05dd99940b 100644
--- a/lib/Analysis/CMakeLists.txt
+++ b/lib/Analysis/CMakeLists.txt
@@ -21,6 +21,7 @@ add_llvm_library(LLVMAnalysis
   LazyValueInfo.cpp
   LibCallAliasAnalysis.cpp
   LibCallSemantics.cpp
+  Lint.cpp
   LiveValues.cpp
   LoopDependenceAnalysis.cpp
   LoopInfo.cpp
@@ -38,6 +39,7 @@ add_llvm_library(LLVMAnalysis
   ScalarEvolution.cpp
   ScalarEvolutionAliasAnalysis.cpp
   ScalarEvolutionExpander.cpp
+  ScalarEvolutionNormalization.cpp
   SparsePropagation.cpp
   Trace.cpp
   ValueTracking.cpp
diff --git a/lib/Analysis/ConstantFolding.cpp b/lib/Analysis/ConstantFolding.cpp
index dda1fbad64f0..37cda0221026 100644
--- a/lib/Analysis/ConstantFolding.cpp
+++ b/lib/Analysis/ConstantFolding.cpp
@@ -401,7 +401,7 @@ static Constant *FoldReinterpretLoadFromConstPtr(Constant *C,
   APInt ResultVal = APInt(IntType->getBitWidth(), RawBytes[BytesLoaded-1]);
   for (unsigned i = 1; i != BytesLoaded; ++i) {
     ResultVal <<= 8;
-    ResultVal |= APInt(IntType->getBitWidth(), RawBytes[BytesLoaded-1-i]);
+    ResultVal |= RawBytes[BytesLoaded-1-i];
   }
 
   return ConstantInt::get(IntType->getContext(), ResultVal);
diff --git a/lib/Analysis/DebugInfo.cpp b/lib/Analysis/DebugInfo.cpp
index 8ba19020b099..141b181ab716 100644
--- a/lib/Analysis/DebugInfo.cpp
+++ b/lib/Analysis/DebugInfo.cpp
@@ -402,6 +402,17 @@ uint64_t DIDerivedType::getOriginalTypeSize() const {
   return getSizeInBits();
 }
 
+/// isInlinedFnArgument - Return trule if this variable provides debugging
+/// information for an inlined function arguments.
+bool DIVariable::isInlinedFnArgument(const Function *CurFn) {
+  assert(CurFn && "Invalid function");
+  if (!getContext().isSubprogram())
+    return false;
+  // This variable is not inlined function argument if its scope 
+  // does not describe current function.
+  return !(DISubprogram(getContext().getNode()).describes(CurFn));
+}
+
 /// describes - Return true if this subprogram provides debugging
 /// information for the function F.
 bool DISubprogram::describes(const Function *F) {
@@ -414,6 +425,13 @@ bool DISubprogram::describes(const Function *F) {
   return false;
 }
 
+unsigned DISubprogram::isOptimized() const     {
+  assert (DbgNode && "Invalid subprogram descriptor!");
+  if (DbgNode->getNumOperands() == 16)
+    return getUnsignedField(15);
+  return 0;
+}
+
 StringRef DIScope::getFilename() const {
   if (!DbgNode)
     return StringRef();
@@ -901,7 +919,8 @@ DISubprogram DIFactory::CreateSubprogram(DIDescriptor Context,
                                          bool isDefinition,
                                          unsigned VK, unsigned VIndex,
                                          DIType ContainingType,
-                                         bool isArtificial) {
+                                         bool isArtificial,
+                                         bool isOptimized) {
 
   Value *Elts[] = {
     GetTagConstant(dwarf::DW_TAG_subprogram),
@@ -918,9 +937,10 @@ DISubprogram DIFactory::CreateSubprogram(DIDescriptor Context,
     ConstantInt::get(Type::getInt32Ty(VMContext), (unsigned)VK),
     ConstantInt::get(Type::getInt32Ty(VMContext), VIndex),
     ContainingType.getNode(),
-    ConstantInt::get(Type::getInt1Ty(VMContext), isArtificial)
+    ConstantInt::get(Type::getInt1Ty(VMContext), isArtificial),
+    ConstantInt::get(Type::getInt1Ty(VMContext), isOptimized)
   };
-  return DISubprogram(MDNode::get(VMContext, &Elts[0], 15));
+  return DISubprogram(MDNode::get(VMContext, &Elts[0], 16));
 }
 
 /// CreateSubprogramDefinition - Create new subprogram descriptor for the
@@ -945,9 +965,10 @@ DISubprogram DIFactory::CreateSubprogramDefinition(DISubprogram &SPDeclaration)
     DeclNode->getOperand(11), // Virtuality
     DeclNode->getOperand(12), // VIndex
     DeclNode->getOperand(13), // Containting Type
-    DeclNode->getOperand(14)  // isArtificial
+    DeclNode->getOperand(14), // isArtificial
+    DeclNode->getOperand(15)  // isOptimized
   };
-  return DISubprogram(MDNode::get(VMContext, &Elts[0], 15));
+  return DISubprogram(MDNode::get(VMContext, &Elts[0], 16));
 }
 
 /// CreateGlobalVariable - Create a new descriptor for the specified global.
@@ -1150,10 +1171,8 @@ void DebugInfoFinder::processModule(Module &M) {
     for (Function::iterator FI = (*I).begin(), FE = (*I).end(); FI != FE; ++FI)
       for (BasicBlock::iterator BI = (*FI).begin(), BE = (*FI).end(); BI != BE;
            ++BI) {
-        if (DbgDeclareInst *DDI = dyn_cast<DbgDeclareInst>(BI)) {
+        if (DbgDeclareInst *DDI = dyn_cast<DbgDeclareInst>(BI))
           processDeclare(DDI);
-          continue;
-        }
         
         DebugLoc Loc = BI->getDebugLoc();
         if (Loc.isUnknown())
diff --git a/lib/Analysis/DomPrinter.cpp b/lib/Analysis/DomPrinter.cpp
index 3af687af1849..a1676e5bb613 100644
--- a/lib/Analysis/DomPrinter.cpp
+++ b/lib/Analysis/DomPrinter.cpp
@@ -83,31 +83,6 @@ struct DOTGraphTraits<PostDominatorTree*>
 }
 
 namespace {
-template <class Analysis, bool OnlyBBS>
-struct GenericGraphViewer : public FunctionPass {
-  std::string Name;
-
-  GenericGraphViewer(std::string GraphName, const void *ID) : FunctionPass(ID) {
-    Name = GraphName;
-  }
-
-  virtual bool runOnFunction(Function &F) {
-    Analysis *Graph;
-    std::string Title, GraphName;
-    Graph = &getAnalysis<Analysis>();
-    GraphName = DOTGraphTraits<Analysis*>::getGraphName(Graph);
-    Title = GraphName + " for '" + F.getNameStr() + "' function";
-    ViewGraph(Graph, Name, OnlyBBS, Title);
-
-    return false;
-  }
-
-  virtual void getAnalysisUsage(AnalysisUsage &AU) const {
-    AU.setPreservesAll();
-    AU.addRequired<Analysis>();
-  }
-};
-
 struct DomViewer
   : public DOTGraphTraitsViewer<DominatorTree, false> {
   static char ID;
diff --git a/lib/Analysis/IPA/CallGraph.cpp b/lib/Analysis/IPA/CallGraph.cpp
index 8c43aa14885d..2bde56d71889 100644
--- a/lib/Analysis/IPA/CallGraph.cpp
+++ b/lib/Analysis/IPA/CallGraph.cpp
@@ -158,8 +158,11 @@ private:
   // destroy - Release memory for the call graph
   virtual void destroy() {
     /// CallsExternalNode is not in the function map, delete it explicitly.
-    delete CallsExternalNode;
-    CallsExternalNode = 0;
+    if (CallsExternalNode) {
+      CallsExternalNode->allReferencesDropped();
+      delete CallsExternalNode;
+      CallsExternalNode = 0;
+    }
     CallGraph::destroy();
   }
 };
@@ -181,6 +184,14 @@ void CallGraph::initialize(Module &M) {
 void CallGraph::destroy() {
   if (FunctionMap.empty()) return;
   
+  // Reset all node's use counts to zero before deleting them to prevent an
+  // assertion from firing.
+#ifndef NDEBUG
+  for (FunctionMapTy::iterator I = FunctionMap.begin(), E = FunctionMap.end();
+       I != E; ++I)
+    I->second->allReferencesDropped();
+#endif
+  
   for (FunctionMapTy::iterator I = FunctionMap.begin(), E = FunctionMap.end();
       I != E; ++I)
     delete I->second;
@@ -233,14 +244,16 @@ void CallGraphNode::print(raw_ostream &OS) const {
   else
     OS << "Call graph node <<null function>>";
   
-  OS << "<<0x" << this << ">>  #uses=" << getNumReferences() << '\n';
+  OS << "<<" << this << ">>  #uses=" << getNumReferences() << '\n';
 
-  for (const_iterator I = begin(), E = end(); I != E; ++I)
+  for (const_iterator I = begin(), E = end(); I != E; ++I) {
+    OS << "  CS<" << I->first << "> calls ";
     if (Function *FI = I->second->getFunction())
-      OS << "  Calls function '" << FI->getName() <<"'\n";
-  else
-    OS << "  Calls external node\n";
-  OS << "\n";
+      OS << "function '" << FI->getName() <<"'\n";
+    else
+      OS << "external node\n";
+  }
+  OS << '\n';
 }
 
 void CallGraphNode::dump() const { print(dbgs()); }
diff --git a/lib/Analysis/IPA/CallGraphSCCPass.cpp b/lib/Analysis/IPA/CallGraphSCCPass.cpp
index fb0804190ac1..0c01ee5b8284 100644
--- a/lib/Analysis/IPA/CallGraphSCCPass.cpp
+++ b/lib/Analysis/IPA/CallGraphSCCPass.cpp
@@ -22,11 +22,18 @@
 #include "llvm/PassManagers.h"
 #include "llvm/Analysis/CallGraph.h"
 #include "llvm/ADT/SCCIterator.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/Timer.h"
 #include "llvm/Support/raw_ostream.h"
 using namespace llvm;
 
+static cl::opt<unsigned> 
+MaxIterations("max-cg-scc-iterations", cl::ReallyHidden, cl::init(4));
+
+STATISTIC(MaxSCCIterations, "Maximum CGSCCPassMgr iterations on one SCC");
+
 //===----------------------------------------------------------------------===//
 // CGPassManager
 //
@@ -81,55 +88,31 @@ public:
   }
   
 private:
-  bool RunPassOnSCC(Pass *P, std::vector<CallGraphNode*> &CurSCC,
-                    CallGraph &CG, bool &CallGraphUpToDate);
-  void RefreshCallGraph(std::vector<CallGraphNode*> &CurSCC, CallGraph &CG,
+  bool RunAllPassesOnSCC(CallGraphSCC &CurSCC, CallGraph &CG,
+                         bool &DevirtualizedCall);
+  
+  bool RunPassOnSCC(Pass *P, CallGraphSCC &CurSCC,
+                    CallGraph &CG, bool &CallGraphUpToDate,
+                    bool &DevirtualizedCall);
+  bool RefreshCallGraph(CallGraphSCC &CurSCC, CallGraph &CG,
                         bool IsCheckingMode);
 };
 
-/// PrintCallGraphPass - Print a Module corresponding to a call graph.
-///
-class PrintCallGraphPass : public CallGraphSCCPass {
-private:
-  std::string Banner;
-  raw_ostream &Out;       // raw_ostream to print on.
-
-public:
-  static char ID;
-  PrintCallGraphPass() : CallGraphSCCPass(&ID), Out(dbgs()) {}
-  PrintCallGraphPass(const std::string &B, raw_ostream &o)
-      : CallGraphSCCPass(&ID), Banner(B), Out(o) {}
-
-  virtual void getAnalysisUsage(AnalysisUsage &AU) const {
-    AU.setPreservesAll();
-  }
-
-  bool runOnSCC(std::vector<CallGraphNode *> &SCC) {
-    Out << Banner;
-    for (std::vector<CallGraphNode *>::iterator n = SCC.begin(), ne = SCC.end();
-         n != ne;
-         ++n) {
-      (*n)->getFunction()->print(Out);
-    }
-    return false;
-  }
-};
-
 } // end anonymous namespace.
 
 char CGPassManager::ID = 0;
 
-char PrintCallGraphPass::ID = 0;
 
-bool CGPassManager::RunPassOnSCC(Pass *P, std::vector<CallGraphNode*> &CurSCC,
-                                 CallGraph &CG, bool &CallGraphUpToDate) {
+bool CGPassManager::RunPassOnSCC(Pass *P, CallGraphSCC &CurSCC,
+                                 CallGraph &CG, bool &CallGraphUpToDate,
+                                 bool &DevirtualizedCall) {
   bool Changed = false;
   PMDataManager *PM = P->getAsPMDataManager();
 
   if (PM == 0) {
     CallGraphSCCPass *CGSP = (CallGraphSCCPass*)P;
     if (!CallGraphUpToDate) {
-      RefreshCallGraph(CurSCC, CG, false);
+      DevirtualizedCall |= RefreshCallGraph(CurSCC, CG, false);
       CallGraphUpToDate = true;
     }
 
@@ -154,8 +137,9 @@ bool CGPassManager::RunPassOnSCC(Pass *P, std::vector<CallGraphNode*> &CurSCC,
   FPPassManager *FPP = (FPPassManager*)P;
   
   // Run pass P on all functions in the current SCC.
-  for (unsigned i = 0, e = CurSCC.size(); i != e; ++i) {
-    if (Function *F = CurSCC[i]->getFunction()) {
+  for (CallGraphSCC::iterator I = CurSCC.begin(), E = CurSCC.end();
+       I != E; ++I) {
+    if (Function *F = (*I)->getFunction()) {
       dumpPassInfo(P, EXECUTION_MSG, ON_FUNCTION_MSG, F->getName());
       TimeRegion PassTimer(getPassTimer(FPP));
       Changed |= FPP->runOnFunction(*F);
@@ -178,26 +162,39 @@ bool CGPassManager::RunPassOnSCC(Pass *P, std::vector<CallGraphNode*> &CurSCC,
 /// FunctionPasses have potentially munged the callgraph, and can be used after
 /// CallGraphSCC passes to verify that they correctly updated the callgraph.
 ///
-void CGPassManager::RefreshCallGraph(std::vector<CallGraphNode*> &CurSCC,
+/// This function returns true if it devirtualized an existing function call,
+/// meaning it turned an indirect call into a direct call.  This happens when
+/// a function pass like GVN optimizes away stuff feeding the indirect call.
+/// This never happens in checking mode.
+///
+bool CGPassManager::RefreshCallGraph(CallGraphSCC &CurSCC,
                                      CallGraph &CG, bool CheckingMode) {
   DenseMap<Value*, CallGraphNode*> CallSites;
   
   DEBUG(dbgs() << "CGSCCPASSMGR: Refreshing SCC with " << CurSCC.size()
                << " nodes:\n";
-        for (unsigned i = 0, e = CurSCC.size(); i != e; ++i)
-          CurSCC[i]->dump();
+        for (CallGraphSCC::iterator I = CurSCC.begin(), E = CurSCC.end();
+             I != E; ++I)
+          (*I)->dump();
         );
 
   bool MadeChange = false;
+  bool DevirtualizedCall = false;
   
   // Scan all functions in the SCC.
-  for (unsigned sccidx = 0, e = CurSCC.size(); sccidx != e; ++sccidx) {
-    CallGraphNode *CGN = CurSCC[sccidx];
+  unsigned FunctionNo = 0;
+  for (CallGraphSCC::iterator SCCIdx = CurSCC.begin(), E = CurSCC.end();
+       SCCIdx != E; ++SCCIdx, ++FunctionNo) {
+    CallGraphNode *CGN = *SCCIdx;
     Function *F = CGN->getFunction();
     if (F == 0 || F->isDeclaration()) continue;
     
     // Walk the function body looking for call sites.  Sync up the call sites in
     // CGN with those actually in the function.
+
+    // Keep track of the number of direct and indirect calls that were
+    // invalidated and removed.
+    unsigned NumDirectRemoved = 0, NumIndirectRemoved = 0;
     
     // Get the set of call sites currently in the function.
     for (CallGraphNode::iterator I = CGN->begin(), E = CGN->end(); I != E; ) {
@@ -216,6 +213,12 @@ void CGPassManager::RefreshCallGraph(std::vector<CallGraphNode*> &CurSCC,
         assert(!CheckingMode &&
                "CallGraphSCCPass did not update the CallGraph correctly!");
         
+        // If this was an indirect call site, count it.
+        if (I->second->getFunction() == 0)
+          ++NumIndirectRemoved;
+        else 
+          ++NumDirectRemoved;
+        
         // Just remove the edge from the set of callees, keep track of whether
         // I points to the last element of the vector.
         bool WasLast = I + 1 == E;
@@ -237,6 +240,9 @@ void CGPassManager::RefreshCallGraph(std::vector<CallGraphNode*> &CurSCC,
     }
     
     // Loop over all of the instructions in the function, getting the callsites.
+    // Keep track of the number of direct/indirect calls added.
+    unsigned NumDirectAdded = 0, NumIndirectAdded = 0;
+    
     for (Function::iterator BB = F->begin(), E = F->end(); BB != E; ++BB)
       for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I) {
         CallSite CS = CallSite::get(I);
@@ -271,19 +277,21 @@ void CGPassManager::RefreshCallGraph(std::vector<CallGraphNode*> &CurSCC,
           // If not, we either went from a direct call to indirect, indirect to
           // direct, or direct to different direct.
           CallGraphNode *CalleeNode;
-          if (Function *Callee = CS.getCalledFunction())
+          if (Function *Callee = CS.getCalledFunction()) {
             CalleeNode = CG.getOrInsertFunction(Callee);
-          else
+            // Keep track of whether we turned an indirect call into a direct
+            // one.
+            if (ExistingNode->getFunction() == 0) {
+              DevirtualizedCall = true;
+              DEBUG(dbgs() << "  CGSCCPASSMGR: Devirtualized call to '"
+                           << Callee->getName() << "'\n");
+            }
+          } else {
             CalleeNode = CG.getCallsExternalNode();
+          }
 
           // Update the edge target in CGN.
-          for (CallGraphNode::iterator I = CGN->begin(); ; ++I) {
-            assert(I != CGN->end() && "Didn't find call entry");
-            if (I->first == CS.getInstruction()) {
-              I->second = CalleeNode;
-              break;
-            }
-          }
+          CGN->replaceCallEdge(CS, CS, CalleeNode);
           MadeChange = true;
           continue;
         }
@@ -291,19 +299,34 @@ void CGPassManager::RefreshCallGraph(std::vector<CallGraphNode*> &CurSCC,
         assert(!CheckingMode &&
                "CallGraphSCCPass did not update the CallGraph correctly!");
 
-        // If the call site didn't exist in the CGN yet, add it.  We assume that
-        // newly introduced call sites won't be indirect.  This could be fixed
-        // in the future.
+        // If the call site didn't exist in the CGN yet, add it.
         CallGraphNode *CalleeNode;
-        if (Function *Callee = CS.getCalledFunction())
+        if (Function *Callee = CS.getCalledFunction()) {
           CalleeNode = CG.getOrInsertFunction(Callee);
-        else
+          ++NumDirectAdded;
+        } else {
           CalleeNode = CG.getCallsExternalNode();
+          ++NumIndirectAdded;
+        }
         
         CGN->addCalledFunction(CS, CalleeNode);
         MadeChange = true;
       }
     
+    // We scanned the old callgraph node, removing invalidated call sites and
+    // then added back newly found call sites.  One thing that can happen is
+    // that an old indirect call site was deleted and replaced with a new direct
+    // call.  In this case, we have devirtualized a call, and CGSCCPM would like
+    // to iteratively optimize the new code.  Unfortunately, we don't really
+    // have a great way to detect when this happens.  As an approximation, we
+    // just look at whether the number of indirect calls is reduced and the
+    // number of direct calls is increased.  There are tons of ways to fool this
+    // (e.g. DCE'ing an indirect call and duplicating an unrelated block with a
+    // direct call) but this is close enough.
+    if (NumIndirectRemoved > NumIndirectAdded &&
+        NumDirectRemoved < NumDirectAdded)
+      DevirtualizedCall = true;
+    
     // After scanning this function, if we still have entries in callsites, then
     // they are dangling pointers.  WeakVH should save us for this, so abort if
     // this happens.
@@ -311,18 +334,85 @@ void CGPassManager::RefreshCallGraph(std::vector<CallGraphNode*> &CurSCC,
     
     // Periodically do an explicit clear to remove tombstones when processing
     // large scc's.
-    if ((sccidx & 15) == 0)
+    if ((FunctionNo & 15) == 15)
       CallSites.clear();
   }
 
   DEBUG(if (MadeChange) {
           dbgs() << "CGSCCPASSMGR: Refreshed SCC is now:\n";
-          for (unsigned i = 0, e = CurSCC.size(); i != e; ++i)
-            CurSCC[i]->dump();
+          for (CallGraphSCC::iterator I = CurSCC.begin(), E = CurSCC.end();
+            I != E; ++I)
+              (*I)->dump();
+          if (DevirtualizedCall)
+            dbgs() << "CGSCCPASSMGR: Refresh devirtualized a call!\n";
+
          } else {
            dbgs() << "CGSCCPASSMGR: SCC Refresh didn't change call graph.\n";
          }
         );
+
+  return DevirtualizedCall;
+}
+
+/// RunAllPassesOnSCC -  Execute the body of the entire pass manager on the
+/// specified SCC.  This keeps track of whether a function pass devirtualizes
+/// any calls and returns it in DevirtualizedCall.
+bool CGPassManager::RunAllPassesOnSCC(CallGraphSCC &CurSCC, CallGraph &CG,
+                                      bool &DevirtualizedCall) {
+  bool Changed = false;
+  
+  // CallGraphUpToDate - Keep track of whether the callgraph is known to be
+  // up-to-date or not.  The CGSSC pass manager runs two types of passes:
+  // CallGraphSCC Passes and other random function passes.  Because other
+  // random function passes are not CallGraph aware, they may clobber the
+  // call graph by introducing new calls or deleting other ones.  This flag
+  // is set to false when we run a function pass so that we know to clean up
+  // the callgraph when we need to run a CGSCCPass again.
+  bool CallGraphUpToDate = true;
+
+  // Run all passes on current SCC.
+  for (unsigned PassNo = 0, e = getNumContainedPasses();
+       PassNo != e; ++PassNo) {
+    Pass *P = getContainedPass(PassNo);
+    
+    // If we're in -debug-pass=Executions mode, construct the SCC node list,
+    // otherwise avoid constructing this string as it is expensive.
+    if (isPassDebuggingExecutionsOrMore()) {
+      std::string Functions;
+  #ifndef NDEBUG
+      raw_string_ostream OS(Functions);
+      for (CallGraphSCC::iterator I = CurSCC.begin(), E = CurSCC.end();
+           I != E; ++I) {
+        if (I != CurSCC.begin()) OS << ", ";
+        (*I)->print(OS);
+      }
+      OS.flush();
+  #endif
+      dumpPassInfo(P, EXECUTION_MSG, ON_CG_MSG, Functions);
+    }
+    dumpRequiredSet(P);
+    
+    initializeAnalysisImpl(P);
+    
+    // Actually run this pass on the current SCC.
+    Changed |= RunPassOnSCC(P, CurSCC, CG,
+                            CallGraphUpToDate, DevirtualizedCall);
+    
+    if (Changed)
+      dumpPassInfo(P, MODIFICATION_MSG, ON_CG_MSG, "");
+    dumpPreservedSet(P);
+    
+    verifyPreservedAnalysis(P);      
+    removeNotPreservedAnalysis(P);
+    recordAvailableAnalysis(P);
+    removeDeadPasses(P, "", ON_CG_MSG);
+  }
+  
+  // If the callgraph was left out of date (because the last pass run was a
+  // functionpass), refresh it before we move on to the next SCC.
+  if (!CallGraphUpToDate)
+    DevirtualizedCall |= RefreshCallGraph(CurSCC, CG, false);
+  return Changed;
 }
 
 /// run - Execute all of the passes scheduled for execution.  Keep track of
@@ -330,72 +420,53 @@ void CGPassManager::RefreshCallGraph(std::vector<CallGraphNode*> &CurSCC,
 bool CGPassManager::runOnModule(Module &M) {
   CallGraph &CG = getAnalysis<CallGraph>();
   bool Changed = doInitialization(CG);
-
-  std::vector<CallGraphNode*> CurSCC;
   
   // Walk the callgraph in bottom-up SCC order.
-  for (scc_iterator<CallGraph*> CGI = scc_begin(&CG), E = scc_end(&CG);
-       CGI != E;) {
+  scc_iterator<CallGraph*> CGI = scc_begin(&CG);
+
+  CallGraphSCC CurSCC(&CGI);
+  while (!CGI.isAtEnd()) {
     // Copy the current SCC and increment past it so that the pass can hack
     // on the SCC if it wants to without invalidating our iterator.
-    CurSCC = *CGI;
+    std::vector<CallGraphNode*> &NodeVec = *CGI;
+    CurSCC.initialize(&NodeVec[0], &NodeVec[0]+NodeVec.size());
     ++CGI;
     
+    // At the top level, we run all the passes in this pass manager on the
+    // functions in this SCC.  However, we support iterative compilation in the
+    // case where a function pass devirtualizes a call to a function.  For
+    // example, it is very common for a function pass (often GVN or instcombine)
+    // to eliminate the addressing that feeds into a call.  With that improved
+    // information, we would like the call to be an inline candidate, infer
+    // mod-ref information etc.
+    //
+    // Because of this, we allow iteration up to a specified iteration count.
+    // This only happens in the case of a devirtualized call, so we only burn
+    // compile time in the case that we're making progress.  We also have a hard
+    // iteration count limit in case there is crazy code.
+    unsigned Iteration = 0;
+    bool DevirtualizedCall = false;
+    do {
+      DEBUG(if (Iteration)
+              dbgs() << "  SCCPASSMGR: Re-visiting SCC, iteration #"
+                     << Iteration << '\n');
+      DevirtualizedCall = false;
+      Changed |= RunAllPassesOnSCC(CurSCC, CG, DevirtualizedCall);
+    } while (Iteration++ < MaxIterations && DevirtualizedCall);
     
-    // CallGraphUpToDate - Keep track of whether the callgraph is known to be
-    // up-to-date or not.  The CGSSC pass manager runs two types of passes:
-    // CallGraphSCC Passes and other random function passes.  Because other
-    // random function passes are not CallGraph aware, they may clobber the
-    // call graph by introducing new calls or deleting other ones.  This flag
-    // is set to false when we run a function pass so that we know to clean up
-    // the callgraph when we need to run a CGSCCPass again.
-    bool CallGraphUpToDate = true;
+    if (DevirtualizedCall)
+      DEBUG(dbgs() << "  CGSCCPASSMGR: Stopped iteration after " << Iteration
+                   << " times, due to -max-cg-scc-iterations\n");
     
-    // Run all passes on current SCC.
-    for (unsigned PassNo = 0, e = getNumContainedPasses();
-         PassNo != e; ++PassNo) {
-      Pass *P = getContainedPass(PassNo);
-
-      // If we're in -debug-pass=Executions mode, construct the SCC node list,
-      // otherwise avoid constructing this string as it is expensive.
-      if (isPassDebuggingExecutionsOrMore()) {
-        std::string Functions;
-#ifndef NDEBUG
-        raw_string_ostream OS(Functions);
-        for (unsigned i = 0, e = CurSCC.size(); i != e; ++i) {
-          if (i) OS << ", ";
-          CurSCC[i]->print(OS);
-        }
-        OS.flush();
-#endif
-        dumpPassInfo(P, EXECUTION_MSG, ON_CG_MSG, Functions);
-      }
-      dumpRequiredSet(P);
-
-      initializeAnalysisImpl(P);
-
-      // Actually run this pass on the current SCC.
-      Changed |= RunPassOnSCC(P, CurSCC, CG, CallGraphUpToDate);
-
-      if (Changed)
-        dumpPassInfo(P, MODIFICATION_MSG, ON_CG_MSG, "");
-      dumpPreservedSet(P);
-
-      verifyPreservedAnalysis(P);      
-      removeNotPreservedAnalysis(P);
-      recordAvailableAnalysis(P);
-      removeDeadPasses(P, "", ON_CG_MSG);
-    }
+    if (Iteration > MaxSCCIterations)
+      MaxSCCIterations = Iteration;
     
-    // If the callgraph was left out of date (because the last pass run was a
-    // functionpass), refresh it before we move on to the next SCC.
-    if (!CallGraphUpToDate)
-      RefreshCallGraph(CurSCC, CG, false);
   }
   Changed |= doFinalization(CG);
   return Changed;
 }
 
+
 /// Initialize CG
 bool CGPassManager::doInitialization(CallGraph &CG) {
   bool Changed = false;
@@ -426,11 +497,32 @@ bool CGPassManager::doFinalization(CallGraph &CG) {
   return Changed;
 }
 
-Pass *CallGraphSCCPass::createPrinterPass(raw_ostream &O,
-                                          const std::string &Banner) const {
-  return new PrintCallGraphPass(Banner, O);
+//===----------------------------------------------------------------------===//
+// CallGraphSCC Implementation
+//===----------------------------------------------------------------------===//
+
+/// ReplaceNode - This informs the SCC and the pass manager that the specified
+/// Old node has been deleted, and New is to be used in its place.
+void CallGraphSCC::ReplaceNode(CallGraphNode *Old, CallGraphNode *New) {
+  assert(Old != New && "Should not replace node with self");
+  for (unsigned i = 0; ; ++i) {
+    assert(i != Nodes.size() && "Node not in SCC");
+    if (Nodes[i] != Old) continue;
+    Nodes[i] = New;
+    break;
+  }
+  
+  // Update the active scc_iterator so that it doesn't contain dangling
+  // pointers to the old CallGraphNode.
+  scc_iterator<CallGraph*> *CGI = (scc_iterator<CallGraph*>*)Context;
+  CGI->ReplaceNode(Old, New);
 }
 
+
+//===----------------------------------------------------------------------===//
+// CallGraphSCCPass Implementation
+//===----------------------------------------------------------------------===//
+
 /// Assign pass manager to manage this pass.
 void CallGraphSCCPass::assignPassManager(PMStack &PMS,
                                          PassManagerType PreferredType) {
@@ -475,3 +567,43 @@ void CallGraphSCCPass::getAnalysisUsage(AnalysisUsage &AU) const {
   AU.addRequired<CallGraph>();
   AU.addPreserved<CallGraph>();
 }
+
+
+//===----------------------------------------------------------------------===//
+// PrintCallGraphPass Implementation
+//===----------------------------------------------------------------------===//
+
+namespace {
+  /// PrintCallGraphPass - Print a Module corresponding to a call graph.
+  ///
+  class PrintCallGraphPass : public CallGraphSCCPass {
+    std::string Banner;
+    raw_ostream &Out;       // raw_ostream to print on.
+    
+  public:
+    static char ID;
+    PrintCallGraphPass() : CallGraphSCCPass(&ID), Out(dbgs()) {}
+    PrintCallGraphPass(const std::string &B, raw_ostream &o)
+      : CallGraphSCCPass(&ID), Banner(B), Out(o) {}
+    
+    virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+      AU.setPreservesAll();
+    }
+    
+    bool runOnSCC(CallGraphSCC &SCC) {
+      Out << Banner;
+      for (CallGraphSCC::iterator I = SCC.begin(), E = SCC.end(); I != E; ++I)
+        (*I)->getFunction()->print(Out);
+      return false;
+    }
+  };
+  
+} // end anonymous namespace.
+
+char PrintCallGraphPass::ID = 0;
+
+Pass *CallGraphSCCPass::createPrinterPass(raw_ostream &O,
+                                          const std::string &Banner) const {
+  return new PrintCallGraphPass(Banner, O);
+}
+
diff --git a/lib/Analysis/IVUsers.cpp b/lib/Analysis/IVUsers.cpp
index 47b5d4a0f08d..2c997dae5859 100644
--- a/lib/Analysis/IVUsers.cpp
+++ b/lib/Analysis/IVUsers.cpp
@@ -36,146 +36,34 @@ Pass *llvm::createIVUsersPass() {
   return new IVUsers();
 }
 
-/// CollectSubexprs - Split S into subexpressions which can be pulled out into
-/// separate registers.
-static void CollectSubexprs(const SCEV *S,
-                            SmallVectorImpl<const SCEV *> &Ops,
-                            ScalarEvolution &SE) {
-  if (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(S)) {
-    // Break out add operands.
-    for (SCEVAddExpr::op_iterator I = Add->op_begin(), E = Add->op_end();
-         I != E; ++I)
-      CollectSubexprs(*I, Ops, SE);
-    return;
-  } else if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(S)) {
-    // Split a non-zero base out of an addrec.
-    if (!AR->getStart()->isZero()) {
-      CollectSubexprs(AR->getStart(), Ops, SE);
-      CollectSubexprs(SE.getAddRecExpr(SE.getIntegerSCEV(0, AR->getType()),
-                                       AR->getStepRecurrence(SE),
-                                       AR->getLoop()), Ops, SE);
-      return;
-    }
-  }
-
-  // Otherwise use the value itself.
-  Ops.push_back(S);
-}
+/// isInteresting - Test whether the given expression is "interesting" when
+/// used by the given expression, within the context of analyzing the
+/// given loop.
+static bool isInteresting(const SCEV *S, const Instruction *I, const Loop *L) {
+  // Anything loop-invariant is interesting.
+  if (!isa<SCEVUnknown>(S) && S->isLoopInvariant(L))
+    return true;
 
-/// getSCEVStartAndStride - Compute the start and stride of this expression,
-/// returning false if the expression is not a start/stride pair, or true if it
-/// is.  The stride must be a loop invariant expression, but the start may be
-/// a mix of loop invariant and loop variant expressions.  The start cannot,
-/// however, contain an AddRec from a different loop, unless that loop is an
-/// outer loop of the current loop.
-static bool getSCEVStartAndStride(const SCEV *&SH, Loop *L, Loop *UseLoop,
-                                  const SCEV *&Start, const SCEV *&Stride,
-                                  ScalarEvolution *SE, DominatorTree *DT) {
-  const SCEV *TheAddRec = Start;   // Initialize to zero.
-
-  // If the outer level is an AddExpr, the operands are all start values except
-  // for a nested AddRecExpr.
-  if (const SCEVAddExpr *AE = dyn_cast<SCEVAddExpr>(SH)) {
-    for (unsigned i = 0, e = AE->getNumOperands(); i != e; ++i)
-      if (const SCEVAddRecExpr *AddRec =
-             dyn_cast<SCEVAddRecExpr>(AE->getOperand(i)))
-        TheAddRec = SE->getAddExpr(AddRec, TheAddRec);
-      else
-        Start = SE->getAddExpr(Start, AE->getOperand(i));
-  } else if (isa<SCEVAddRecExpr>(SH)) {
-    TheAddRec = SH;
-  } else {
-    return false;  // not analyzable.
+  // An addrec is interesting if it's affine or if it has an interesting start.
+  if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(S)) {
+    // Keep things simple. Don't touch loop-variant strides.
+    if (AR->getLoop() == L)
+      return AR->isAffine() || !L->contains(I);
+    // Otherwise recurse to see if the start value is interesting.
+    return isInteresting(AR->getStart(), I, L);
   }
 
-  // Break down TheAddRec into its component parts.
-  SmallVector<const SCEV *, 4> Subexprs;
-  CollectSubexprs(TheAddRec, Subexprs, *SE);
-
-  // Look for an addrec on the current loop among the parts.
-  const SCEV *AddRecStride = 0;
-  for (SmallVectorImpl<const SCEV *>::iterator I = Subexprs.begin(),
-       E = Subexprs.end(); I != E; ++I) {
-    const SCEV *S = *I;
-    if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(S))
-      if (AR->getLoop() == L) {
-        *I = AR->getStart();
-        AddRecStride = AR->getStepRecurrence(*SE);
-        break;
-      }
-  }
-  if (!AddRecStride)
+  // An add is interesting if any of its operands is.
+  if (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(S)) {
+    for (SCEVAddExpr::op_iterator OI = Add->op_begin(), OE = Add->op_end();
+         OI != OE; ++OI)
+      if (isInteresting(*OI, I, L))
+        return true;
     return false;
-
-  // Add up everything else into a start value (which may not be
-  // loop-invariant).
-  const SCEV *AddRecStart = SE->getAddExpr(Subexprs);
-
-  // Use getSCEVAtScope to attempt to simplify other loops out of
-  // the picture.
-  AddRecStart = SE->getSCEVAtScope(AddRecStart, UseLoop);
-
-  Start = SE->getAddExpr(Start, AddRecStart);
-
-  // If stride is an instruction, make sure it properly dominates the header.
-  // Otherwise we could end up with a use before def situation.
-  if (!isa<SCEVConstant>(AddRecStride)) {
-    BasicBlock *Header = L->getHeader();
-    if (!AddRecStride->properlyDominates(Header, DT))
-      return false;
-
-    DEBUG(dbgs() << "[";
-          WriteAsOperand(dbgs(), L->getHeader(), /*PrintType=*/false);
-          dbgs() << "] Variable stride: " << *AddRecStride << "\n");
   }
 
-  Stride = AddRecStride;
-  return true;
-}
-
-/// IVUseShouldUsePostIncValue - We have discovered a "User" of an IV expression
-/// and now we need to decide whether the user should use the preinc or post-inc
-/// value.  If this user should use the post-inc version of the IV, return true.
-///
-/// Choosing wrong here can break dominance properties (if we choose to use the
-/// post-inc value when we cannot) or it can end up adding extra live-ranges to
-/// the loop, resulting in reg-reg copies (if we use the pre-inc value when we
-/// should use the post-inc value).
-static bool IVUseShouldUsePostIncValue(Instruction *User, Instruction *IV,
-                                       const Loop *L, DominatorTree *DT) {
-  // If the user is in the loop, use the preinc value.
-  if (L->contains(User)) return false;
-
-  BasicBlock *LatchBlock = L->getLoopLatch();
-  if (!LatchBlock)
-    return false;
-
-  // Ok, the user is outside of the loop.  If it is dominated by the latch
-  // block, use the post-inc value.
-  if (DT->dominates(LatchBlock, User->getParent()))
-    return true;
-
-  // There is one case we have to be careful of: PHI nodes.  These little guys
-  // can live in blocks that are not dominated by the latch block, but (since
-  // their uses occur in the predecessor block, not the block the PHI lives in)
-  // should still use the post-inc value.  Check for this case now.
-  PHINode *PN = dyn_cast<PHINode>(User);
-  if (!PN) return false;  // not a phi, not dominated by latch block.
-
-  // Look at all of the uses of IV by the PHI node.  If any use corresponds to
-  // a block that is not dominated by the latch block, give up and use the
-  // preincremented value.
-  unsigned NumUses = 0;
-  for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i)
-    if (PN->getIncomingValue(i) == IV) {
-      ++NumUses;
-      if (!DT->dominates(LatchBlock, PN->getIncomingBlock(i)))
-        return false;
-    }
-
-  // Okay, all uses of IV by PN are in predecessor blocks that really are
-  // dominated by the latch block.  Use the post-incremented value.
-  return true;
+  // Nothing else is interesting here.
+  return false;
 }
 
 /// AddUsersIfInteresting - Inspect the specified instruction.  If it is a
@@ -194,18 +82,10 @@ bool IVUsers::AddUsersIfInteresting(Instruction *I) {
 
   // Get the symbolic expression for this instruction.
   const SCEV *ISE = SE->getSCEV(I);
-  if (isa<SCEVCouldNotCompute>(ISE)) return false;
 
-  // Get the start and stride for this expression.
-  Loop *UseLoop = LI->getLoopFor(I->getParent());
-  const SCEV *Start = SE->getIntegerSCEV(0, ISE->getType());
-  const SCEV *Stride = Start;
-
-  if (!getSCEVStartAndStride(ISE, L, UseLoop, Start, Stride, SE, DT))
-    return false;  // Non-reducible symbolic expression, bail out.
-
-  // Keep things simple. Don't touch loop-variant strides.
-  if (!Stride->isLoopInvariant(L) && L->contains(I))
+  // If we've come to an uninteresting expression, stop the traversal and
+  // call this a user.
+  if (!isInteresting(ISE, I, L))
     return false;
 
   SmallPtrSet<Instruction *, 4> UniqueUsers;
@@ -241,27 +121,22 @@ bool IVUsers::AddUsersIfInteresting(Instruction *I) {
     }
 
     if (AddUserToIVUsers) {
-      // Okay, we found a user that we cannot reduce.  Analyze the instruction
-      // and decide what to do with it.  If we are a use inside of the loop, use
-      // the value before incrementation, otherwise use it after incrementation.
-      if (IVUseShouldUsePostIncValue(User, I, L, DT)) {
-        // The value used will be incremented by the stride more than we are
-        // expecting, so subtract this off.
-        const SCEV *NewStart = SE->getMinusSCEV(Start, Stride);
-        IVUses.push_back(new IVStrideUse(this, Stride, NewStart, User, I));
-        IVUses.back().setIsUseOfPostIncrementedValue(true);
-        DEBUG(dbgs() << "   USING POSTINC SCEV, START=" << *NewStart<< "\n");
-      } else {
-        IVUses.push_back(new IVStrideUse(this, Stride, Start, User, I));
-      }
+      // Okay, we found a user that we cannot reduce.
+      IVUses.push_back(new IVStrideUse(this, User, I));
+      IVStrideUse &NewUse = IVUses.back();
+      // Transform the expression into a normalized form.
+      ISE = TransformForPostIncUse(NormalizeAutodetect,
+                                   ISE, User, I,
+                                   NewUse.PostIncLoops,
+                                   *SE, *DT);
+      DEBUG(dbgs() << "   NORMALIZED TO: " << *ISE << '\n');
     }
   }
   return true;
 }
 
-IVStrideUse &IVUsers::AddUser(const SCEV *Stride, const SCEV *Offset,
-                              Instruction *User, Value *Operand) {
-  IVUses.push_back(new IVStrideUse(this, Stride, Offset, User, Operand));
+IVStrideUse &IVUsers::AddUser(Instruction *User, Value *Operand) {
+  IVUses.push_back(new IVStrideUse(this, User, Operand));
   return IVUses.back();
 }
 
@@ -287,40 +162,11 @@ bool IVUsers::runOnLoop(Loop *l, LPPassManager &LPM) {
   // them by stride.  Start by finding all of the PHI nodes in the header for
   // this loop.  If they are induction variables, inspect their uses.
   for (BasicBlock::iterator I = L->getHeader()->begin(); isa<PHINode>(I); ++I)
-    AddUsersIfInteresting(I);
+    (void)AddUsersIfInteresting(I);
 
   return false;
 }
 
-/// getReplacementExpr - Return a SCEV expression which computes the
-/// value of the OperandValToReplace of the given IVStrideUse.
-const SCEV *IVUsers::getReplacementExpr(const IVStrideUse &U) const {
-  // Start with zero.
-  const SCEV *RetVal = SE->getIntegerSCEV(0, U.getStride()->getType());
-  // Create the basic add recurrence.
-  RetVal = SE->getAddRecExpr(RetVal, U.getStride(), L);
-  // Add the offset in a separate step, because it may be loop-variant.
-  RetVal = SE->getAddExpr(RetVal, U.getOffset());
-  // For uses of post-incremented values, add an extra stride to compute
-  // the actual replacement value.
-  if (U.isUseOfPostIncrementedValue())
-    RetVal = SE->getAddExpr(RetVal, U.getStride());
-  return RetVal;
-}
-
-/// getCanonicalExpr - Return a SCEV expression which computes the
-/// value of the SCEV of the given IVStrideUse, ignoring the 
-/// isUseOfPostIncrementedValue flag.
-const SCEV *IVUsers::getCanonicalExpr(const IVStrideUse &U) const {
-  // Start with zero.
-  const SCEV *RetVal = SE->getIntegerSCEV(0, U.getStride()->getType());
-  // Create the basic add recurrence.
-  RetVal = SE->getAddRecExpr(RetVal, U.getStride(), L);
-  // Add the offset in a separate step, because it may be loop-variant.
-  RetVal = SE->getAddExpr(RetVal, U.getOffset());
-  return RetVal;
-}
-
 void IVUsers::print(raw_ostream &OS, const Module *M) const {
   OS << "IV Users for loop ";
   WriteAsOperand(OS, L->getHeader(), false);
@@ -337,10 +183,14 @@ void IVUsers::print(raw_ostream &OS, const Module *M) const {
        E = IVUses.end(); UI != E; ++UI) {
     OS << "  ";
     WriteAsOperand(OS, UI->getOperandValToReplace(), false);
-    OS << " = "
-       << *getReplacementExpr(*UI);
-    if (UI->isUseOfPostIncrementedValue())
-      OS << " (post-inc)";
+    OS << " = " << *getReplacementExpr(*UI);
+    for (PostIncLoopSet::const_iterator
+         I = UI->PostIncLoops.begin(),
+         E = UI->PostIncLoops.end(); I != E; ++I) {
+      OS << " (post-inc with loop ";
+      WriteAsOperand(OS, (*I)->getHeader(), false);
+      OS << ")";
+    }
     OS << " in  ";
     UI->getUser()->print(OS, &Annotator);
     OS << '\n';
@@ -356,6 +206,49 @@ void IVUsers::releaseMemory() {
   IVUses.clear();
 }
 
+/// getReplacementExpr - Return a SCEV expression which computes the
+/// value of the OperandValToReplace.
+const SCEV *IVUsers::getReplacementExpr(const IVStrideUse &IU) const {
+  return SE->getSCEV(IU.getOperandValToReplace());
+}
+
+/// getExpr - Return the expression for the use.
+const SCEV *IVUsers::getExpr(const IVStrideUse &IU) const {
+  return
+    TransformForPostIncUse(Normalize, getReplacementExpr(IU),
+                           IU.getUser(), IU.getOperandValToReplace(),
+                           const_cast<PostIncLoopSet &>(IU.getPostIncLoops()),
+                           *SE, *DT);
+}
+
+static const SCEVAddRecExpr *findAddRecForLoop(const SCEV *S, const Loop *L) {
+  if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(S)) {
+    if (AR->getLoop() == L)
+      return AR;
+    return findAddRecForLoop(AR->getStart(), L);
+  }
+
+  if (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(S)) {
+    for (SCEVAddExpr::op_iterator I = Add->op_begin(), E = Add->op_end();
+         I != E; ++I)
+      if (const SCEVAddRecExpr *AR = findAddRecForLoop(*I, L))
+        return AR;
+    return 0;
+  }
+
+  return 0;
+}
+
+const SCEV *IVUsers::getStride(const IVStrideUse &IU, const Loop *L) const {
+  if (const SCEVAddRecExpr *AR = findAddRecForLoop(getExpr(IU), L))
+    return AR->getStepRecurrence(*SE);
+  return 0;
+}
+
+void IVStrideUse::transformToPostInc(const Loop *L) {
+  PostIncLoops.insert(L);
+}
+
 void IVStrideUse::deleted() {
   // Remove this user from the list.
   Parent->IVUses.erase(this);
diff --git a/lib/Analysis/InlineCost.cpp b/lib/Analysis/InlineCost.cpp
index c599e907eb60..627137159706 100644
--- a/lib/Analysis/InlineCost.cpp
+++ b/lib/Analysis/InlineCost.cpp
@@ -24,28 +24,29 @@ using namespace llvm;
 unsigned InlineCostAnalyzer::FunctionInfo::
 CountCodeReductionForConstant(Value *V) {
   unsigned Reduction = 0;
-  for (Value::use_iterator UI = V->use_begin(), E = V->use_end(); UI != E; ++UI)
-    if (isa<BranchInst>(*UI) || isa<SwitchInst>(*UI)) {
+  for (Value::use_iterator UI = V->use_begin(), E = V->use_end(); UI != E;++UI){
+    User *U = *UI;
+    if (isa<BranchInst>(U) || isa<SwitchInst>(U)) {
       // We will be able to eliminate all but one of the successors.
-      const TerminatorInst &TI = cast<TerminatorInst>(**UI);
+      const TerminatorInst &TI = cast<TerminatorInst>(*U);
       const unsigned NumSucc = TI.getNumSuccessors();
       unsigned Instrs = 0;
       for (unsigned I = 0; I != NumSucc; ++I)
         Instrs += Metrics.NumBBInsts[TI.getSuccessor(I)];
       // We don't know which blocks will be eliminated, so use the average size.
       Reduction += InlineConstants::InstrCost*Instrs*(NumSucc-1)/NumSucc;
-    } else if (CallInst *CI = dyn_cast<CallInst>(*UI)) {
+    } else if (CallInst *CI = dyn_cast<CallInst>(U)) {
       // Turning an indirect call into a direct call is a BIG win
       if (CI->getCalledValue() == V)
         Reduction += InlineConstants::IndirectCallBonus;
-    } else if (InvokeInst *II = dyn_cast<InvokeInst>(*UI)) {
+    } else if (InvokeInst *II = dyn_cast<InvokeInst>(U)) {
       // Turning an indirect call into a direct call is a BIG win
       if (II->getCalledValue() == V)
         Reduction += InlineConstants::IndirectCallBonus;
     } else {
       // Figure out if this instruction will be removed due to simple constant
       // propagation.
-      Instruction &Inst = cast<Instruction>(**UI);
+      Instruction &Inst = cast<Instruction>(*U);
 
       // We can't constant propagate instructions which have effects or
       // read memory.
@@ -74,7 +75,7 @@ CountCodeReductionForConstant(Value *V) {
         Reduction += CountCodeReductionForConstant(&Inst);
       }
     }
-
+  }
   return Reduction;
 }
 
@@ -107,10 +108,10 @@ unsigned InlineCostAnalyzer::FunctionInfo::
   return Reduction;
 }
 
-// callIsSmall - If a call is likely to lower to a single target instruction, or
-// is otherwise deemed small return true.
-// TODO: Perhaps calls like memcpy, strcpy, etc?
-static bool callIsSmall(const Function *F) {
+/// callIsSmall - If a call is likely to lower to a single target instruction,
+/// or is otherwise deemed small return true.
+/// TODO: Perhaps calls like memcpy, strcpy, etc?
+bool llvm::callIsSmall(const Function *F) {
   if (!F) return false;
   
   if (F->hasLocalLinkage()) return false;
@@ -158,10 +159,18 @@ void CodeMetrics::analyzeBasicBlock(const BasicBlock *BB) {
       // it.  This is a hack because we depend on the user marking their local
       // variables as volatile if they are live across a setjmp call, and they
       // probably won't do this in callers.
-      if (Function *F = CS.getCalledFunction())
+      if (Function *F = CS.getCalledFunction()) {
         if (F->isDeclaration() && 
             (F->getName() == "setjmp" || F->getName() == "_setjmp"))
           NeverInline = true;
+       
+        // If this call is to function itself, then the function is recursive.
+        // Inlining it into other functions is a bad idea, because this is
+        // basically just a form of loop peeling, and our metrics aren't useful
+        // for that case.
+        if (F == BB->getParent())
+          NeverInline = true;
+      }
 
       if (!isa<IntrinsicInst>(II) && !callIsSmall(CS.getCalledFunction())) {
         // Each argument to a call takes on average one instruction to set up.
@@ -249,10 +258,16 @@ void InlineCostAnalyzer::FunctionInfo::analyzeFunction(Function *F) {
 // function call or not.
 //
 InlineCost InlineCostAnalyzer::getInlineCost(CallSite CS,
-                               SmallPtrSet<const Function *, 16> &NeverInline) {
+                               SmallPtrSet<const Function*, 16> &NeverInline) {
+  return getInlineCost(CS, CS.getCalledFunction(), NeverInline);
+}
+
+InlineCost InlineCostAnalyzer::getInlineCost(CallSite CS,
+                               Function *Callee,
+                               SmallPtrSet<const Function*, 16> &NeverInline) {
   Instruction *TheCall = CS.getInstruction();
-  Function *Callee = CS.getCalledFunction();
   Function *Caller = TheCall->getParent()->getParent();
+  bool isDirectCall = CS.getCalledFunction() == Callee;
 
   // Don't inline functions which can be redefined at link-time to mean
   // something else.  Don't inline functions marked noinline or call sites
@@ -267,11 +282,11 @@ InlineCost InlineCostAnalyzer::getInlineCost(CallSite CS,
   // be inlined.  This value may go negative.
   //
   int InlineCost = 0;
-  
+
   // If there is only one call of the function, and it has internal linkage,
   // make it almost guaranteed to be inlined.
   //
-  if (Callee->hasLocalLinkage() && Callee->hasOneUse())
+  if (Callee->hasLocalLinkage() && Callee->hasOneUse() && isDirectCall)
     InlineCost += InlineConstants::LastCallToStaticBonus;
   
   // If this function uses the coldcc calling convention, prefer not to inline
@@ -288,31 +303,36 @@ InlineCost InlineCostAnalyzer::getInlineCost(CallSite CS,
   } else if (isa<UnreachableInst>(++BasicBlock::iterator(TheCall)))
     InlineCost += InlineConstants::NoreturnPenalty;
   
-  // Get information about the callee...
-  FunctionInfo &CalleeFI = CachedFunctionInfo[Callee];
+  // Get information about the callee.
+  FunctionInfo *CalleeFI = &CachedFunctionInfo[Callee];
   
   // If we haven't calculated this information yet, do so now.
-  if (CalleeFI.Metrics.NumBlocks == 0)
-    CalleeFI.analyzeFunction(Callee);
+  if (CalleeFI->Metrics.NumBlocks == 0)
+    CalleeFI->analyzeFunction(Callee);
 
   // If we should never inline this, return a huge cost.
-  if (CalleeFI.Metrics.NeverInline)
+  if (CalleeFI->Metrics.NeverInline)
     return InlineCost::getNever();
 
-  // FIXME: It would be nice to kill off CalleeFI.NeverInline. Then we
+  // FIXME: It would be nice to kill off CalleeFI->NeverInline. Then we
   // could move this up and avoid computing the FunctionInfo for
   // things we are going to just return always inline for. This
   // requires handling setjmp somewhere else, however.
   if (!Callee->isDeclaration() && Callee->hasFnAttr(Attribute::AlwaysInline))
     return InlineCost::getAlways();
     
-  if (CalleeFI.Metrics.usesDynamicAlloca) {
-    // Get infomation about the caller...
+  if (CalleeFI->Metrics.usesDynamicAlloca) {
+    // Get infomation about the caller.
     FunctionInfo &CallerFI = CachedFunctionInfo[Caller];
 
     // If we haven't calculated this information yet, do so now.
-    if (CallerFI.Metrics.NumBlocks == 0)
+    if (CallerFI.Metrics.NumBlocks == 0) {
       CallerFI.analyzeFunction(Caller);
+     
+      // Recompute the CalleeFI pointer, getting Caller could have invalidated
+      // it.
+      CalleeFI = &CachedFunctionInfo[Callee];
+    }
 
     // Don't inline a callee with dynamic alloca into a caller without them.
     // Functions containing dynamic alloca's are inefficient in various ways;
@@ -339,15 +359,15 @@ InlineCost InlineCostAnalyzer::getInlineCost(CallSite CS,
     // scalarization), so encourage the inlining of the function.
     //
     if (isa<AllocaInst>(I)) {
-      if (ArgNo < CalleeFI.ArgumentWeights.size())
-        InlineCost -= CalleeFI.ArgumentWeights[ArgNo].AllocaWeight;
+      if (ArgNo < CalleeFI->ArgumentWeights.size())
+        InlineCost -= CalleeFI->ArgumentWeights[ArgNo].AllocaWeight;
 
       // If this is a constant being passed into the function, use the argument
       // weights calculated for the callee to determine how much will be folded
       // away with this information.
     } else if (isa<Constant>(I)) {
-      if (ArgNo < CalleeFI.ArgumentWeights.size())
-        InlineCost -= CalleeFI.ArgumentWeights[ArgNo].ConstantWeight;
+      if (ArgNo < CalleeFI->ArgumentWeights.size())
+        InlineCost -= CalleeFI->ArgumentWeights[ArgNo].ConstantWeight;
     }
   }
   
@@ -355,10 +375,10 @@ InlineCost InlineCostAnalyzer::getInlineCost(CallSite CS,
   // likely to be inlined, look at factors that make us not want to inline it.
 
   // Calls usually take a long time, so they make the inlining gain smaller.
-  InlineCost += CalleeFI.Metrics.NumCalls * InlineConstants::CallPenalty;
+  InlineCost += CalleeFI->Metrics.NumCalls * InlineConstants::CallPenalty;
 
   // Look at the size of the callee. Each instruction counts as 5.
-  InlineCost += CalleeFI.Metrics.NumInsts*InlineConstants::InstrCost;
+  InlineCost += CalleeFI->Metrics.NumInsts*InlineConstants::InstrCost;
 
   return llvm::InlineCost::get(InlineCost);
 }
@@ -368,7 +388,7 @@ InlineCost InlineCostAnalyzer::getInlineCost(CallSite CS,
 float InlineCostAnalyzer::getInlineFudgeFactor(CallSite CS) {
   Function *Callee = CS.getCalledFunction();
   
-  // Get information about the callee...
+  // Get information about the callee.
   FunctionInfo &CalleeFI = CachedFunctionInfo[Callee];
   
   // If we haven't calculated this information yet, do so now.
@@ -392,41 +412,49 @@ float InlineCostAnalyzer::getInlineFudgeFactor(CallSite CS) {
 /// growCachedCostInfo - update the cached cost info for Caller after Callee has
 /// been inlined.
 void
-InlineCostAnalyzer::growCachedCostInfo(Function* Caller, Function* Callee) {
-  FunctionInfo &CallerFI = CachedFunctionInfo[Caller];
+InlineCostAnalyzer::growCachedCostInfo(Function *Caller, Function *Callee) {
+  CodeMetrics &CallerMetrics = CachedFunctionInfo[Caller].Metrics;
 
   // For small functions we prefer to recalculate the cost for better accuracy.
-  if (CallerFI.Metrics.NumBlocks < 10 || CallerFI.Metrics.NumInsts < 1000) {
+  if (CallerMetrics.NumBlocks < 10 || CallerMetrics.NumInsts < 1000) {
     resetCachedCostInfo(Caller);
     return;
   }
 
   // For large functions, we can save a lot of computation time by skipping
   // recalculations.
-  if (CallerFI.Metrics.NumCalls > 0)
-    --CallerFI.Metrics.NumCalls;
-
-  if (Callee) {
-    FunctionInfo &CalleeFI = CachedFunctionInfo[Callee];
-    if (!CalleeFI.Metrics.NumBlocks) {
-      resetCachedCostInfo(Caller);
-      return;
-    }
-    CallerFI.Metrics.NeverInline |= CalleeFI.Metrics.NeverInline;
-    CallerFI.Metrics.usesDynamicAlloca |= CalleeFI.Metrics.usesDynamicAlloca;
-
-    CallerFI.Metrics.NumInsts += CalleeFI.Metrics.NumInsts;
-    CallerFI.Metrics.NumBlocks += CalleeFI.Metrics.NumBlocks;
-    CallerFI.Metrics.NumCalls += CalleeFI.Metrics.NumCalls;
-    CallerFI.Metrics.NumVectorInsts += CalleeFI.Metrics.NumVectorInsts;
-    CallerFI.Metrics.NumRets += CalleeFI.Metrics.NumRets;
-
-    // analyzeBasicBlock counts each function argument as an inst.
-    if (CallerFI.Metrics.NumInsts >= Callee->arg_size())
-      CallerFI.Metrics.NumInsts -= Callee->arg_size();
-    else
-      CallerFI.Metrics.NumInsts = 0;
+  if (CallerMetrics.NumCalls > 0)
+    --CallerMetrics.NumCalls;
+
+  if (Callee == 0) return;
+  
+  CodeMetrics &CalleeMetrics = CachedFunctionInfo[Callee].Metrics;
+
+  // If we don't have metrics for the callee, don't recalculate them just to
+  // update an approximation in the caller.  Instead, just recalculate the
+  // caller info from scratch.
+  if (CalleeMetrics.NumBlocks == 0) {
+    resetCachedCostInfo(Caller);
+    return;
   }
+  
+  // Since CalleeMetrics were already calculated, we know that the CallerMetrics
+  // reference isn't invalidated: both were in the DenseMap.  
+  CallerMetrics.NeverInline |= CalleeMetrics.NeverInline;
+  CallerMetrics.usesDynamicAlloca |= CalleeMetrics.usesDynamicAlloca;
+
+  CallerMetrics.NumInsts += CalleeMetrics.NumInsts;
+  CallerMetrics.NumBlocks += CalleeMetrics.NumBlocks;
+  CallerMetrics.NumCalls += CalleeMetrics.NumCalls;
+  CallerMetrics.NumVectorInsts += CalleeMetrics.NumVectorInsts;
+  CallerMetrics.NumRets += CalleeMetrics.NumRets;
+
+  // analyzeBasicBlock counts each function argument as an inst.
+  if (CallerMetrics.NumInsts >= Callee->arg_size())
+    CallerMetrics.NumInsts -= Callee->arg_size();
+  else
+    CallerMetrics.NumInsts = 0;
+  
   // We are not updating the argumentweights. We have already determined that
   // Caller is a fairly large function, so we accept the loss of precision.
 }
diff --git a/lib/Analysis/InstructionSimplify.cpp b/lib/Analysis/InstructionSimplify.cpp
index 8288e96eb775..dbefc2dedb2b 100644
--- a/lib/Analysis/InstructionSimplify.cpp
+++ b/lib/Analysis/InstructionSimplify.cpp
@@ -314,6 +314,35 @@ Value *llvm::SimplifyFCmpInst(unsigned Predicate, Value *LHS, Value *RHS,
   return 0;
 }
 
+/// SimplifySelectInst - Given operands for a SelectInst, see if we can fold
+/// the result.  If not, this returns null.
+Value *llvm::SimplifySelectInst(Value *CondVal, Value *TrueVal, Value *FalseVal,
+                                const TargetData *TD) {
+  // select true, X, Y  -> X
+  // select false, X, Y -> Y
+  if (ConstantInt *CB = dyn_cast<ConstantInt>(CondVal))
+    return CB->getZExtValue() ? TrueVal : FalseVal;
+  
+  // select C, X, X -> X
+  if (TrueVal == FalseVal)
+    return TrueVal;
+  
+  if (isa<UndefValue>(TrueVal))   // select C, undef, X -> X
+    return FalseVal;
+  if (isa<UndefValue>(FalseVal))   // select C, X, undef -> X
+    return TrueVal;
+  if (isa<UndefValue>(CondVal)) {  // select undef, X, Y -> X or Y
+    if (isa<Constant>(TrueVal))
+      return TrueVal;
+    return FalseVal;
+  }
+  
+  
+  
+  return 0;
+}
+
+
 /// SimplifyGEPInst - Given operands for an GetElementPtrInst, see if we can
 /// fold the result.  If not, this returns null.
 Value *llvm::SimplifyGEPInst(Value *const *Ops, unsigned NumOps,
@@ -391,6 +420,9 @@ Value *llvm::SimplifyInstruction(Instruction *I, const TargetData *TD) {
   case Instruction::FCmp:
     return SimplifyFCmpInst(cast<FCmpInst>(I)->getPredicate(),
                             I->getOperand(0), I->getOperand(1), TD);
+  case Instruction::Select:
+    return SimplifySelectInst(I->getOperand(0), I->getOperand(1),
+                              I->getOperand(2), TD);
   case Instruction::GetElementPtr: {
     SmallVector<Value*, 8> Ops(I->op_begin(), I->op_end());
     return SimplifyGEPInst(&Ops[0], Ops.size(), TD);
diff --git a/lib/Analysis/Lint.cpp b/lib/Analysis/Lint.cpp
new file mode 100644
index 000000000000..25d4f9571dab
--- /dev/null
+++ b/lib/Analysis/Lint.cpp
@@ -0,0 +1,495 @@
+//===-- Lint.cpp - Check for common errors in LLVM IR ---------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass statically checks for common and easily-identified constructs
+// which produce undefined or likely unintended behavior in LLVM IR.
+//
+// It is not a guarantee of correctness, in two ways. First, it isn't
+// comprehensive. There are checks which could be done statically which are
+// not yet implemented. Some of these are indicated by TODO comments, but
+// those aren't comprehensive either. Second, many conditions cannot be
+// checked statically. This pass does no dynamic instrumentation, so it
+// can't check for all possible problems.
+// 
+// Another limitation is that it assumes all code will be executed. A store
+// through a null pointer in a basic block which is never reached is harmless,
+// but this pass will warn about it anyway.
+//
+// Optimization passes may make conditions that this pass checks for more or
+// less obvious. If an optimization pass appears to be introducing a warning,
+// it may be that the optimization pass is merely exposing an existing
+// condition in the code.
+// 
+// This code may be run before instcombine. In many cases, instcombine checks
+// for the same kinds of things and turns instructions with undefined behavior
+// into unreachable (or equivalent). Because of this, this pass makes some
+// effort to look through bitcasts and so on.
+// 
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Analysis/Passes.h"
+#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/Analysis/Lint.h"
+#include "llvm/Analysis/ValueTracking.h"
+#include "llvm/Assembly/Writer.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Pass.h"
+#include "llvm/PassManager.h"
+#include "llvm/IntrinsicInst.h"
+#include "llvm/Function.h"
+#include "llvm/Support/CallSite.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/InstVisitor.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/ADT/STLExtras.h"
+using namespace llvm;
+
+namespace {
+  namespace MemRef {
+    static unsigned Read     = 1;
+    static unsigned Write    = 2;
+    static unsigned Callee   = 4;
+    static unsigned Branchee = 8;
+  }
+
+  class Lint : public FunctionPass, public InstVisitor<Lint> {
+    friend class InstVisitor<Lint>;
+
+    void visitFunction(Function &F);
+
+    void visitCallSite(CallSite CS);
+    void visitMemoryReference(Instruction &I, Value *Ptr, unsigned Align,
+                              const Type *Ty, unsigned Flags);
+
+    void visitCallInst(CallInst &I);
+    void visitInvokeInst(InvokeInst &I);
+    void visitReturnInst(ReturnInst &I);
+    void visitLoadInst(LoadInst &I);
+    void visitStoreInst(StoreInst &I);
+    void visitXor(BinaryOperator &I);
+    void visitSub(BinaryOperator &I);
+    void visitLShr(BinaryOperator &I);
+    void visitAShr(BinaryOperator &I);
+    void visitShl(BinaryOperator &I);
+    void visitSDiv(BinaryOperator &I);
+    void visitUDiv(BinaryOperator &I);
+    void visitSRem(BinaryOperator &I);
+    void visitURem(BinaryOperator &I);
+    void visitAllocaInst(AllocaInst &I);
+    void visitVAArgInst(VAArgInst &I);
+    void visitIndirectBrInst(IndirectBrInst &I);
+    void visitExtractElementInst(ExtractElementInst &I);
+    void visitInsertElementInst(InsertElementInst &I);
+    void visitUnreachableInst(UnreachableInst &I);
+
+  public:
+    Module *Mod;
+    AliasAnalysis *AA;
+    TargetData *TD;
+
+    std::string Messages;
+    raw_string_ostream MessagesStr;
+
+    static char ID; // Pass identification, replacement for typeid
+    Lint() : FunctionPass(&ID), MessagesStr(Messages) {}
+
+    virtual bool runOnFunction(Function &F);
+
+    virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+      AU.setPreservesAll();
+      AU.addRequired<AliasAnalysis>();
+    }
+    virtual void print(raw_ostream &O, const Module *M) const {}
+
+    void WriteValue(const Value *V) {
+      if (!V) return;
+      if (isa<Instruction>(V)) {
+        MessagesStr << *V << '\n';
+      } else {
+        WriteAsOperand(MessagesStr, V, true, Mod);
+        MessagesStr << '\n';
+      }
+    }
+
+    void WriteType(const Type *T) {
+      if (!T) return;
+      MessagesStr << ' ';
+      WriteTypeSymbolic(MessagesStr, T, Mod);
+    }
+
+    // CheckFailed - A check failed, so print out the condition and the message
+    // that failed.  This provides a nice place to put a breakpoint if you want
+    // to see why something is not correct.
+    void CheckFailed(const Twine &Message,
+                     const Value *V1 = 0, const Value *V2 = 0,
+                     const Value *V3 = 0, const Value *V4 = 0) {
+      MessagesStr << Message.str() << "\n";
+      WriteValue(V1);
+      WriteValue(V2);
+      WriteValue(V3);
+      WriteValue(V4);
+    }
+
+    void CheckFailed(const Twine &Message, const Value *V1,
+                     const Type *T2, const Value *V3 = 0) {
+      MessagesStr << Message.str() << "\n";
+      WriteValue(V1);
+      WriteType(T2);
+      WriteValue(V3);
+    }
+
+    void CheckFailed(const Twine &Message, const Type *T1,
+                     const Type *T2 = 0, const Type *T3 = 0) {
+      MessagesStr << Message.str() << "\n";
+      WriteType(T1);
+      WriteType(T2);
+      WriteType(T3);
+    }
+  };
+}
+
+char Lint::ID = 0;
+static RegisterPass<Lint>
+X("lint", "Statically lint-checks LLVM IR", false, true);
+
+// Assert - We know that cond should be true, if not print an error message.
+#define Assert(C, M) \
+    do { if (!(C)) { CheckFailed(M); return; } } while (0)
+#define Assert1(C, M, V1) \
+    do { if (!(C)) { CheckFailed(M, V1); return; } } while (0)
+#define Assert2(C, M, V1, V2) \
+    do { if (!(C)) { CheckFailed(M, V1, V2); return; } } while (0)
+#define Assert3(C, M, V1, V2, V3) \
+    do { if (!(C)) { CheckFailed(M, V1, V2, V3); return; } } while (0)
+#define Assert4(C, M, V1, V2, V3, V4) \
+    do { if (!(C)) { CheckFailed(M, V1, V2, V3, V4); return; } } while (0)
+
+// Lint::run - This is the main Analysis entry point for a
+// function.
+//
+bool Lint::runOnFunction(Function &F) {
+  Mod = F.getParent();
+  AA = &getAnalysis<AliasAnalysis>();
+  TD = getAnalysisIfAvailable<TargetData>();
+  visit(F);
+  dbgs() << MessagesStr.str();
+  return false;
+}
+
+void Lint::visitFunction(Function &F) {
+  // This isn't undefined behavior, it's just a little unusual, and it's a
+  // fairly common mistake to neglect to name a function.
+  Assert1(F.hasName() || F.hasLocalLinkage(),
+          "Unusual: Unnamed function with non-local linkage", &F);
+}
+
+void Lint::visitCallSite(CallSite CS) {
+  Instruction &I = *CS.getInstruction();
+  Value *Callee = CS.getCalledValue();
+
+  // TODO: Check function alignment?
+  visitMemoryReference(I, Callee, 0, 0, MemRef::Callee);
+
+  if (Function *F = dyn_cast<Function>(Callee->stripPointerCasts())) {
+    Assert1(CS.getCallingConv() == F->getCallingConv(),
+            "Undefined behavior: Caller and callee calling convention differ",
+            &I);
+
+    const FunctionType *FT = F->getFunctionType();
+    unsigned NumActualArgs = unsigned(CS.arg_end()-CS.arg_begin());
+
+    Assert1(FT->isVarArg() ?
+              FT->getNumParams() <= NumActualArgs :
+              FT->getNumParams() == NumActualArgs,
+            "Undefined behavior: Call argument count mismatches callee "
+            "argument count", &I);
+      
+    // TODO: Check argument types (in case the callee was casted)
+
+    // TODO: Check ABI-significant attributes.
+
+    // TODO: Check noalias attribute.
+
+    // TODO: Check sret attribute.
+  }
+
+  // TODO: Check the "tail" keyword constraints.
+
+  if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(&I))
+    switch (II->getIntrinsicID()) {
+    default: break;
+
+    // TODO: Check more intrinsics
+
+    case Intrinsic::memcpy: {
+      MemCpyInst *MCI = cast<MemCpyInst>(&I);
+      visitMemoryReference(I, MCI->getSource(), MCI->getAlignment(), 0,
+                           MemRef::Write);
+      visitMemoryReference(I, MCI->getDest(), MCI->getAlignment(), 0,
+                           MemRef::Read);
+
+      // Check that the memcpy arguments don't overlap. The AliasAnalysis API
+      // isn't expressive enough for what we really want to do. Known partial
+      // overlap is not distinguished from the case where nothing is known.
+      unsigned Size = 0;
+      if (const ConstantInt *Len =
+            dyn_cast<ConstantInt>(MCI->getLength()->stripPointerCasts()))
+        if (Len->getValue().isIntN(32))
+          Size = Len->getValue().getZExtValue();
+      Assert1(AA->alias(MCI->getSource(), Size, MCI->getDest(), Size) !=
+              AliasAnalysis::MustAlias,
+              "Undefined behavior: memcpy source and destination overlap", &I);
+      break;
+    }
+    case Intrinsic::memmove: {
+      MemMoveInst *MMI = cast<MemMoveInst>(&I);
+      visitMemoryReference(I, MMI->getSource(), MMI->getAlignment(), 0,
+                           MemRef::Write);
+      visitMemoryReference(I, MMI->getDest(), MMI->getAlignment(), 0,
+                           MemRef::Read);
+      break;
+    }
+    case Intrinsic::memset: {
+      MemSetInst *MSI = cast<MemSetInst>(&I);
+      visitMemoryReference(I, MSI->getDest(), MSI->getAlignment(), 0,
+                           MemRef::Write);
+      break;
+    }
+
+    case Intrinsic::vastart:
+      Assert1(I.getParent()->getParent()->isVarArg(),
+              "Undefined behavior: va_start called in a non-varargs function",
+              &I);
+
+      visitMemoryReference(I, CS.getArgument(0), 0, 0,
+                           MemRef::Read | MemRef::Write);
+      break;
+    case Intrinsic::vacopy:
+      visitMemoryReference(I, CS.getArgument(0), 0, 0, MemRef::Write);
+      visitMemoryReference(I, CS.getArgument(1), 0, 0, MemRef::Read);
+      break;
+    case Intrinsic::vaend:
+      visitMemoryReference(I, CS.getArgument(0), 0, 0,
+                           MemRef::Read | MemRef::Write);
+      break;
+
+    case Intrinsic::stackrestore:
+      visitMemoryReference(I, CS.getArgument(0), 0, 0,
+                           MemRef::Read);
+      break;
+    }
+}
+
+void Lint::visitCallInst(CallInst &I) {
+  return visitCallSite(&I);
+}
+
+void Lint::visitInvokeInst(InvokeInst &I) {
+  return visitCallSite(&I);
+}
+
+void Lint::visitReturnInst(ReturnInst &I) {
+  Function *F = I.getParent()->getParent();
+  Assert1(!F->doesNotReturn(),
+          "Unusual: Return statement in function with noreturn attribute",
+          &I);
+}
+
+// TODO: Add a length argument and check that the reference is in bounds
+void Lint::visitMemoryReference(Instruction &I,
+                                Value *Ptr, unsigned Align, const Type *Ty,
+                                unsigned Flags) {
+  Value *UnderlyingObject = Ptr->getUnderlyingObject();
+  Assert1(!isa<ConstantPointerNull>(UnderlyingObject),
+          "Undefined behavior: Null pointer dereference", &I);
+  Assert1(!isa<UndefValue>(UnderlyingObject),
+          "Undefined behavior: Undef pointer dereference", &I);
+
+  if (Flags & MemRef::Write) {
+    if (const GlobalVariable *GV = dyn_cast<GlobalVariable>(UnderlyingObject))
+      Assert1(!GV->isConstant(),
+              "Undefined behavior: Write to read-only memory", &I);
+    Assert1(!isa<Function>(UnderlyingObject) &&
+            !isa<BlockAddress>(UnderlyingObject),
+            "Undefined behavior: Write to text section", &I);
+  }
+  if (Flags & MemRef::Read) {
+    Assert1(!isa<Function>(UnderlyingObject),
+            "Unusual: Load from function body", &I);
+    Assert1(!isa<BlockAddress>(UnderlyingObject),
+            "Undefined behavior: Load from block address", &I);
+  }
+  if (Flags & MemRef::Callee) {
+    Assert1(!isa<BlockAddress>(UnderlyingObject),
+            "Undefined behavior: Call to block address", &I);
+  }
+  if (Flags & MemRef::Branchee) {
+    Assert1(!isa<Constant>(UnderlyingObject) ||
+            isa<BlockAddress>(UnderlyingObject),
+            "Undefined behavior: Branch to non-blockaddress", &I);
+  }
+
+  if (TD) {
+    if (Align == 0 && Ty) Align = TD->getABITypeAlignment(Ty);
+
+    if (Align != 0) {
+      unsigned BitWidth = TD->getTypeSizeInBits(Ptr->getType());
+      APInt Mask = APInt::getAllOnesValue(BitWidth),
+                   KnownZero(BitWidth, 0), KnownOne(BitWidth, 0);
+      ComputeMaskedBits(Ptr, Mask, KnownZero, KnownOne, TD);
+      Assert1(!(KnownOne & APInt::getLowBitsSet(BitWidth, Log2_32(Align))),
+              "Undefined behavior: Memory reference address is misaligned", &I);
+    }
+  }
+}
+
+void Lint::visitLoadInst(LoadInst &I) {
+  visitMemoryReference(I, I.getPointerOperand(), I.getAlignment(), I.getType(),
+                       MemRef::Read);
+}
+
+void Lint::visitStoreInst(StoreInst &I) {
+  visitMemoryReference(I, I.getPointerOperand(), I.getAlignment(),
+                  I.getOperand(0)->getType(), MemRef::Write);
+}
+
+void Lint::visitXor(BinaryOperator &I) {
+  Assert1(!isa<UndefValue>(I.getOperand(0)) ||
+          !isa<UndefValue>(I.getOperand(1)),
+          "Undefined result: xor(undef, undef)", &I);
+}
+
+void Lint::visitSub(BinaryOperator &I) {
+  Assert1(!isa<UndefValue>(I.getOperand(0)) ||
+          !isa<UndefValue>(I.getOperand(1)),
+          "Undefined result: sub(undef, undef)", &I);
+}
+
+void Lint::visitLShr(BinaryOperator &I) {
+  if (ConstantInt *CI =
+        dyn_cast<ConstantInt>(I.getOperand(1)->stripPointerCasts()))
+    Assert1(CI->getValue().ult(cast<IntegerType>(I.getType())->getBitWidth()),
+            "Undefined result: Shift count out of range", &I);
+}
+
+void Lint::visitAShr(BinaryOperator &I) {
+  if (ConstantInt *CI =
+        dyn_cast<ConstantInt>(I.getOperand(1)->stripPointerCasts()))
+    Assert1(CI->getValue().ult(cast<IntegerType>(I.getType())->getBitWidth()),
+            "Undefined result: Shift count out of range", &I);
+}
+
+void Lint::visitShl(BinaryOperator &I) {
+  if (ConstantInt *CI =
+        dyn_cast<ConstantInt>(I.getOperand(1)->stripPointerCasts()))
+    Assert1(CI->getValue().ult(cast<IntegerType>(I.getType())->getBitWidth()),
+            "Undefined result: Shift count out of range", &I);
+}
+
+static bool isZero(Value *V, TargetData *TD) {
+  // Assume undef could be zero.
+  if (isa<UndefValue>(V)) return true;
+
+  unsigned BitWidth = cast<IntegerType>(V->getType())->getBitWidth();
+  APInt Mask = APInt::getAllOnesValue(BitWidth),
+               KnownZero(BitWidth, 0), KnownOne(BitWidth, 0);
+  ComputeMaskedBits(V, Mask, KnownZero, KnownOne, TD);
+  return KnownZero.isAllOnesValue();
+}
+
+void Lint::visitSDiv(BinaryOperator &I) {
+  Assert1(!isZero(I.getOperand(1), TD),
+          "Undefined behavior: Division by zero", &I);
+}
+
+void Lint::visitUDiv(BinaryOperator &I) {
+  Assert1(!isZero(I.getOperand(1), TD),
+          "Undefined behavior: Division by zero", &I);
+}
+
+void Lint::visitSRem(BinaryOperator &I) {
+  Assert1(!isZero(I.getOperand(1), TD),
+          "Undefined behavior: Division by zero", &I);
+}
+
+void Lint::visitURem(BinaryOperator &I) {
+  Assert1(!isZero(I.getOperand(1), TD),
+          "Undefined behavior: Division by zero", &I);
+}
+
+void Lint::visitAllocaInst(AllocaInst &I) {
+  if (isa<ConstantInt>(I.getArraySize()))
+    // This isn't undefined behavior, it's just an obvious pessimization.
+    Assert1(&I.getParent()->getParent()->getEntryBlock() == I.getParent(),
+            "Pessimization: Static alloca outside of entry block", &I);
+}
+
+void Lint::visitVAArgInst(VAArgInst &I) {
+  visitMemoryReference(I, I.getOperand(0), 0, 0,
+                       MemRef::Read | MemRef::Write);
+}
+
+void Lint::visitIndirectBrInst(IndirectBrInst &I) {
+  visitMemoryReference(I, I.getAddress(), 0, 0, MemRef::Branchee);
+}
+
+void Lint::visitExtractElementInst(ExtractElementInst &I) {
+  if (ConstantInt *CI =
+        dyn_cast<ConstantInt>(I.getIndexOperand()->stripPointerCasts()))
+    Assert1(CI->getValue().ult(I.getVectorOperandType()->getNumElements()),
+            "Undefined result: extractelement index out of range", &I);
+}
+
+void Lint::visitInsertElementInst(InsertElementInst &I) {
+  if (ConstantInt *CI =
+        dyn_cast<ConstantInt>(I.getOperand(2)->stripPointerCasts()))
+    Assert1(CI->getValue().ult(I.getType()->getNumElements()),
+            "Undefined result: insertelement index out of range", &I);
+}
+
+void Lint::visitUnreachableInst(UnreachableInst &I) {
+  // This isn't undefined behavior, it's merely suspicious.
+  Assert1(&I == I.getParent()->begin() ||
+          prior(BasicBlock::iterator(&I))->mayHaveSideEffects(),
+          "Unusual: unreachable immediately preceded by instruction without "
+          "side effects", &I);
+}
+
+//===----------------------------------------------------------------------===//
+//  Implement the public interfaces to this file...
+//===----------------------------------------------------------------------===//
+
+FunctionPass *llvm::createLintPass() {
+  return new Lint();
+}
+
+/// lintFunction - Check a function for errors, printing messages on stderr.
+///
+void llvm::lintFunction(const Function &f) {
+  Function &F = const_cast<Function&>(f);
+  assert(!F.isDeclaration() && "Cannot lint external functions");
+
+  FunctionPassManager FPM(F.getParent());
+  Lint *V = new Lint();
+  FPM.add(V);
+  FPM.run(F);
+}
+
+/// lintModule - Check a module for errors, printing messages on stderr.
+/// Return true if the module is corrupt.
+///
+void llvm::lintModule(const Module &M, std::string *ErrorInfo) {
+  PassManager PM;
+  Lint *V = new Lint();
+  PM.add(V);
+  PM.run(const_cast<Module&>(M));
+
+  if (ErrorInfo)
+    *ErrorInfo = V->MessagesStr.str();
+}
diff --git a/lib/Analysis/LoopInfo.cpp b/lib/Analysis/LoopInfo.cpp
index 1001d2b54603..735e31f2524d 100644
--- a/lib/Analysis/LoopInfo.cpp
+++ b/lib/Analysis/LoopInfo.cpp
@@ -29,9 +29,9 @@ using namespace llvm;
 
 // Always verify loopinfo if expensive checking is enabled.
 #ifdef XDEBUG
-bool VerifyLoopInfo = true;
+static bool VerifyLoopInfo = true;
 #else
-bool VerifyLoopInfo = false;
+static bool VerifyLoopInfo = false;
 #endif
 static cl::opt<bool,true>
 VerifyLoopInfoX("verify-loop-info", cl::location(VerifyLoopInfo),
diff --git a/lib/Analysis/PointerTracking.cpp b/lib/Analysis/PointerTracking.cpp
index ce7ac899cd28..14df0b719879 100644
--- a/lib/Analysis/PointerTracking.cpp
+++ b/lib/Analysis/PointerTracking.cpp
@@ -183,17 +183,17 @@ enum SolverResult PointerTracking::isLoopGuardedBy(const Loop *L,
                                                    Predicate Pred,
                                                    const SCEV *A,
                                                    const SCEV *B) const {
-  if (SE->isLoopGuardedByCond(L, Pred, A, B))
+  if (SE->isLoopEntryGuardedByCond(L, Pred, A, B))
     return AlwaysTrue;
   Pred = ICmpInst::getSwappedPredicate(Pred);
-  if (SE->isLoopGuardedByCond(L, Pred, B, A))
+  if (SE->isLoopEntryGuardedByCond(L, Pred, B, A))
     return AlwaysTrue;
 
   Pred = ICmpInst::getInversePredicate(Pred);
-  if (SE->isLoopGuardedByCond(L, Pred, B, A))
+  if (SE->isLoopEntryGuardedByCond(L, Pred, B, A))
     return AlwaysFalse;
   Pred = ICmpInst::getSwappedPredicate(Pred);
-  if (SE->isLoopGuardedByCond(L, Pred, A, B))
+  if (SE->isLoopEntryGuardedByCond(L, Pred, A, B))
     return AlwaysTrue;
   return Unknown;
 }
diff --git a/lib/Analysis/PostDominators.cpp b/lib/Analysis/PostDominators.cpp
index c38e0503f931..f0f3a05db8a0 100644
--- a/lib/Analysis/PostDominators.cpp
+++ b/lib/Analysis/PostDominators.cpp
@@ -33,7 +33,6 @@ F("postdomtree", "Post-Dominator Tree Construction", true, true);
 
 bool PostDominatorTree::runOnFunction(Function &F) {
   DT->recalculate(F);
-  DEBUG(DT->print(dbgs()));
   return false;
 }
 
diff --git a/lib/Analysis/README.txt b/lib/Analysis/README.txt
index c40109027299..0e96e4c950cc 100644
--- a/lib/Analysis/README.txt
+++ b/lib/Analysis/README.txt
@@ -16,3 +16,15 @@ In addition to being much more complicated, it involves i65 arithmetic,
 which is very inefficient when expanded into code.
 
 //===---------------------------------------------------------------------===//
+
+In formatValue in test/CodeGen/X86/lsr-delayed-fold.ll,
+
+ScalarEvolution is forming this expression:
+
+((trunc i64 (-1 * %arg5) to i32) + (trunc i64 %arg5 to i32) + (-1 * (trunc i64 undef to i32)))
+
+This could be folded to
+
+(-1 * (trunc i64 undef to i32))
+
+//===---------------------------------------------------------------------===//
diff --git a/lib/Analysis/ScalarEvolution.cpp b/lib/Analysis/ScalarEvolution.cpp
index 1af271a93eff..6870268e838b 100644
--- a/lib/Analysis/ScalarEvolution.cpp
+++ b/lib/Analysis/ScalarEvolution.cpp
@@ -188,8 +188,8 @@ const SCEV *ScalarEvolution::getConstant(const APInt& Val) {
 
 const SCEV *
 ScalarEvolution::getConstant(const Type *Ty, uint64_t V, bool isSigned) {
-  return getConstant(
-    ConstantInt::get(cast<IntegerType>(Ty), V, isSigned));
+  const IntegerType *ITy = cast<IntegerType>(getEffectiveSCEVType(Ty));
+  return getConstant(ConstantInt::get(ITy, V, isSigned));
 }
 
 const Type *SCEVConstant::getType() const { return V->getType(); }
@@ -247,11 +247,13 @@ void SCEVSignExtendExpr::print(raw_ostream &OS) const {
 }
 
 void SCEVCommutativeExpr::print(raw_ostream &OS) const {
-  assert(NumOperands > 1 && "This plus expr shouldn't exist!");
   const char *OpStr = getOperationStr();
-  OS << "(" << *Operands[0];
-  for (unsigned i = 1, e = NumOperands; i != e; ++i)
-    OS << OpStr << *Operands[i];
+  OS << "(";
+  for (op_iterator I = op_begin(), E = op_end(); I != E; ++I) {
+    OS << **I;
+    if (next(I) != E)
+      OS << OpStr;
+  }
   OS << ")";
 }
 
@@ -759,7 +761,7 @@ static const SCEV *BinomialCoefficient(const SCEV *It, unsigned K,
                                                       CalculationBits);
   const SCEV *Dividend = SE.getTruncateOrZeroExtend(It, CalculationTy);
   for (unsigned i = 1; i != K; ++i) {
-    const SCEV *S = SE.getMinusSCEV(It, SE.getIntegerSCEV(i, It->getType()));
+    const SCEV *S = SE.getMinusSCEV(It, SE.getConstant(It->getType(), i));
     Dividend = SE.getMulExpr(Dividend,
                              SE.getTruncateOrZeroExtend(S, CalculationTy));
   }
@@ -955,7 +957,7 @@ const SCEV *ScalarEvolution::getZeroExtendExpr(const SCEV *Op,
           const SCEV *N = getConstant(APInt::getMinValue(BitWidth) -
                                       getUnsignedRange(Step).getUnsignedMax());
           if (isLoopBackedgeGuardedByCond(L, ICmpInst::ICMP_ULT, AR, N) ||
-              (isLoopGuardedByCond(L, ICmpInst::ICMP_ULT, Start, N) &&
+              (isLoopEntryGuardedByCond(L, ICmpInst::ICMP_ULT, Start, N) &&
                isLoopBackedgeGuardedByCond(L, ICmpInst::ICMP_ULT,
                                            AR->getPostIncExpr(*this), N)))
             // Return the expression with the addrec on the outside.
@@ -965,8 +967,8 @@ const SCEV *ScalarEvolution::getZeroExtendExpr(const SCEV *Op,
         } else if (isKnownNegative(Step)) {
           const SCEV *N = getConstant(APInt::getMaxValue(BitWidth) -
                                       getSignedRange(Step).getSignedMin());
-          if (isLoopBackedgeGuardedByCond(L, ICmpInst::ICMP_UGT, AR, N) &&
-              (isLoopGuardedByCond(L, ICmpInst::ICMP_UGT, Start, N) ||
+          if (isLoopBackedgeGuardedByCond(L, ICmpInst::ICMP_UGT, AR, N) ||
+              (isLoopEntryGuardedByCond(L, ICmpInst::ICMP_UGT, Start, N) &&
                isLoopBackedgeGuardedByCond(L, ICmpInst::ICMP_UGT,
                                            AR->getPostIncExpr(*this), N)))
             // Return the expression with the addrec on the outside.
@@ -1090,7 +1092,7 @@ const SCEV *ScalarEvolution::getSignExtendExpr(const SCEV *Op,
           const SCEV *N = getConstant(APInt::getSignedMinValue(BitWidth) -
                                       getSignedRange(Step).getSignedMax());
           if (isLoopBackedgeGuardedByCond(L, ICmpInst::ICMP_SLT, AR, N) ||
-              (isLoopGuardedByCond(L, ICmpInst::ICMP_SLT, Start, N) &&
+              (isLoopEntryGuardedByCond(L, ICmpInst::ICMP_SLT, Start, N) &&
                isLoopBackedgeGuardedByCond(L, ICmpInst::ICMP_SLT,
                                            AR->getPostIncExpr(*this), N)))
             // Return the expression with the addrec on the outside.
@@ -1101,7 +1103,7 @@ const SCEV *ScalarEvolution::getSignExtendExpr(const SCEV *Op,
           const SCEV *N = getConstant(APInt::getSignedMaxValue(BitWidth) -
                                       getSignedRange(Step).getSignedMin());
           if (isLoopBackedgeGuardedByCond(L, ICmpInst::ICMP_SGT, AR, N) ||
-              (isLoopGuardedByCond(L, ICmpInst::ICMP_SGT, Start, N) &&
+              (isLoopEntryGuardedByCond(L, ICmpInst::ICMP_SGT, Start, N) &&
                isLoopBackedgeGuardedByCond(L, ICmpInst::ICMP_SGT,
                                            AR->getPostIncExpr(*this), N)))
             // Return the expression with the addrec on the outside.
@@ -1237,7 +1239,7 @@ CollectAddOperandsWithScales(DenseMap<const SCEV *, APInt> &M,
       }
     } else if (const SCEVConstant *C = dyn_cast<SCEVConstant>(Ops[i])) {
       // Pull a buried constant out to the outside.
-      if (Scale != 1 || AccumulatedConstant != 0 || C->isZero())
+      if (Scale != 1 || AccumulatedConstant != 0 || C->getValue()->isZero())
         Interesting = true;
       AccumulatedConstant += Scale * C->getValue()->getValue();
     } else {
@@ -1308,13 +1310,13 @@ const SCEV *ScalarEvolution::getAddExpr(SmallVectorImpl<const SCEV *> &Ops,
     }
 
     // If we are left with a constant zero being added, strip it off.
-    if (cast<SCEVConstant>(Ops[0])->getValue()->isZero()) {
+    if (LHSC->getValue()->isZero()) {
       Ops.erase(Ops.begin());
       --Idx;
     }
-  }
 
-  if (Ops.size() == 1) return Ops[0];
+    if (Ops.size() == 1) return Ops[0];
+  }
 
   // Okay, check to see if the same value occurs in the operand list twice.  If
   // so, merge them together into an multiply expression.  Since we sorted the
@@ -1324,7 +1326,7 @@ const SCEV *ScalarEvolution::getAddExpr(SmallVectorImpl<const SCEV *> &Ops,
     if (Ops[i] == Ops[i+1]) {      //  X + Y + Y  -->  X + Y*2
       // Found a match, merge the two values into a multiply, and add any
       // remaining values to the result.
-      const SCEV *Two = getIntegerSCEV(2, Ty);
+      const SCEV *Two = getConstant(Ty, 2);
       const SCEV *Mul = getMulExpr(Ops[i], Two);
       if (Ops.size() == 2)
         return Mul;
@@ -1353,9 +1355,7 @@ const SCEV *ScalarEvolution::getAddExpr(SmallVectorImpl<const SCEV *> &Ops,
         }
         LargeOps.push_back(T->getOperand());
       } else if (const SCEVConstant *C = dyn_cast<SCEVConstant>(Ops[i])) {
-        // This could be either sign or zero extension, but sign extension
-        // is much more likely to be foldable here.
-        LargeOps.push_back(getSignExtendExpr(C, SrcType));
+        LargeOps.push_back(getAnyExtendExpr(C, SrcType));
       } else if (const SCEVMulExpr *M = dyn_cast<SCEVMulExpr>(Ops[i])) {
         SmallVector<const SCEV *, 8> LargeMulOps;
         for (unsigned j = 0, f = M->getNumOperands(); j != f && Ok; ++j) {
@@ -1368,9 +1368,7 @@ const SCEV *ScalarEvolution::getAddExpr(SmallVectorImpl<const SCEV *> &Ops,
             LargeMulOps.push_back(T->getOperand());
           } else if (const SCEVConstant *C =
                        dyn_cast<SCEVConstant>(M->getOperand(j))) {
-            // This could be either sign or zero extension, but sign extension
-            // is much more likely to be foldable here.
-            LargeMulOps.push_back(getSignExtendExpr(C, SrcType));
+            LargeMulOps.push_back(getAnyExtendExpr(C, SrcType));
           } else {
             Ok = false;
             break;
@@ -1445,7 +1443,7 @@ const SCEV *ScalarEvolution::getAddExpr(SmallVectorImpl<const SCEV *> &Ops,
           Ops.push_back(getMulExpr(getConstant(I->first),
                                    getAddExpr(I->second)));
       if (Ops.empty())
-        return getIntegerSCEV(0, Ty);
+        return getConstant(Ty, 0);
       if (Ops.size() == 1)
         return Ops[0];
       return getAddExpr(Ops);
@@ -1470,7 +1468,7 @@ const SCEV *ScalarEvolution::getAddExpr(SmallVectorImpl<const SCEV *> &Ops,
             MulOps.erase(MulOps.begin()+MulOp);
             InnerMul = getMulExpr(MulOps);
           }
-          const SCEV *One = getIntegerSCEV(1, Ty);
+          const SCEV *One = getConstant(Ty, 1);
           const SCEV *AddOne = getAddExpr(InnerMul, One);
           const SCEV *OuterMul = getMulExpr(AddOne, Ops[AddOp]);
           if (Ops.size() == 2) return OuterMul;
@@ -1534,8 +1532,9 @@ const SCEV *ScalarEvolution::getAddExpr(SmallVectorImpl<const SCEV *> &Ops,
     // they are loop invariant w.r.t. the recurrence.
     SmallVector<const SCEV *, 8> LIOps;
     const SCEVAddRecExpr *AddRec = cast<SCEVAddRecExpr>(Ops[Idx]);
+    const Loop *AddRecLoop = AddRec->getLoop();
     for (unsigned i = 0, e = Ops.size(); i != e; ++i)
-      if (Ops[i]->isLoopInvariant(AddRec->getLoop())) {
+      if (Ops[i]->isLoopInvariant(AddRecLoop)) {
         LIOps.push_back(Ops[i]);
         Ops.erase(Ops.begin()+i);
         --i; --e;
@@ -1552,7 +1551,7 @@ const SCEV *ScalarEvolution::getAddExpr(SmallVectorImpl<const SCEV *> &Ops,
 
       // It's tempting to propagate NUW/NSW flags here, but nuw/nsw addition
       // is not associative so this isn't necessarily safe.
-      const SCEV *NewRec = getAddRecExpr(AddRecOps, AddRec->getLoop());
+      const SCEV *NewRec = getAddRecExpr(AddRecOps, AddRecLoop);
 
       // If all of the other operands were loop invariant, we are done.
       if (Ops.size() == 1) return NewRec;
@@ -1573,7 +1572,7 @@ const SCEV *ScalarEvolution::getAddExpr(SmallVectorImpl<const SCEV *> &Ops,
          OtherIdx < Ops.size() && isa<SCEVAddRecExpr>(Ops[OtherIdx]);++OtherIdx)
       if (OtherIdx != Idx) {
         const SCEVAddRecExpr *OtherAddRec = cast<SCEVAddRecExpr>(Ops[OtherIdx]);
-        if (AddRec->getLoop() == OtherAddRec->getLoop()) {
+        if (AddRecLoop == OtherAddRec->getLoop()) {
           // Other + {A,+,B} + {C,+,D}  -->  Other + {A+C,+,B+D}
           SmallVector<const SCEV *, 4> NewOps(AddRec->op_begin(),
                                               AddRec->op_end());
@@ -1585,7 +1584,7 @@ const SCEV *ScalarEvolution::getAddExpr(SmallVectorImpl<const SCEV *> &Ops,
             }
             NewOps[i] = getAddExpr(NewOps[i], OtherAddRec->getOperand(i));
           }
-          const SCEV *NewAddRec = getAddRecExpr(NewOps, AddRec->getLoop());
+          const SCEV *NewAddRec = getAddRecExpr(NewOps, AddRecLoop);
 
           if (Ops.size() == 2) return NewAddRec;
 
@@ -1697,15 +1696,15 @@ const SCEV *ScalarEvolution::getMulExpr(SmallVectorImpl<const SCEV *> &Ops,
             return getAddExpr(NewOps);
         }
     }
+
+    if (Ops.size() == 1)
+      return Ops[0];
   }
 
   // Skip over the add expression until we get to a multiply.
   while (Idx < Ops.size() && Ops[Idx]->getSCEVType() < scMulExpr)
     ++Idx;
 
-  if (Ops.size() == 1)
-    return Ops[0];
-
   // If there are mul operands inline them all into this expression.
   if (Idx < Ops.size()) {
     bool DeletedMul = false;
@@ -1843,77 +1842,81 @@ const SCEV *ScalarEvolution::getUDivExpr(const SCEV *LHS,
   if (const SCEVConstant *RHSC = dyn_cast<SCEVConstant>(RHS)) {
     if (RHSC->getValue()->equalsInt(1))
       return LHS;                               // X udiv 1 --> x
-    if (RHSC->isZero())
-      return getIntegerSCEV(0, LHS->getType()); // value is undefined
-
-    // Determine if the division can be folded into the operands of
-    // its operands.
-    // TODO: Generalize this to non-constants by using known-bits information.
-    const Type *Ty = LHS->getType();
-    unsigned LZ = RHSC->getValue()->getValue().countLeadingZeros();
-    unsigned MaxShiftAmt = getTypeSizeInBits(Ty) - LZ;
-    // For non-power-of-two values, effectively round the value up to the
-    // nearest power of two.
-    if (!RHSC->getValue()->getValue().isPowerOf2())
-      ++MaxShiftAmt;
-    const IntegerType *ExtTy =
-      IntegerType::get(getContext(), getTypeSizeInBits(Ty) + MaxShiftAmt);
-    // {X,+,N}/C --> {X/C,+,N/C} if safe and N/C can be folded.
-    if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(LHS))
-      if (const SCEVConstant *Step =
-            dyn_cast<SCEVConstant>(AR->getStepRecurrence(*this)))
-        if (!Step->getValue()->getValue()
-              .urem(RHSC->getValue()->getValue()) &&
-            getZeroExtendExpr(AR, ExtTy) ==
-            getAddRecExpr(getZeroExtendExpr(AR->getStart(), ExtTy),
-                          getZeroExtendExpr(Step, ExtTy),
-                          AR->getLoop())) {
-          SmallVector<const SCEV *, 4> Operands;
-          for (unsigned i = 0, e = AR->getNumOperands(); i != e; ++i)
-            Operands.push_back(getUDivExpr(AR->getOperand(i), RHS));
-          return getAddRecExpr(Operands, AR->getLoop());
-        }
-    // (A*B)/C --> A*(B/C) if safe and B/C can be folded.
-    if (const SCEVMulExpr *M = dyn_cast<SCEVMulExpr>(LHS)) {
-      SmallVector<const SCEV *, 4> Operands;
-      for (unsigned i = 0, e = M->getNumOperands(); i != e; ++i)
-        Operands.push_back(getZeroExtendExpr(M->getOperand(i), ExtTy));
-      if (getZeroExtendExpr(M, ExtTy) == getMulExpr(Operands))
-        // Find an operand that's safely divisible.
-        for (unsigned i = 0, e = M->getNumOperands(); i != e; ++i) {
-          const SCEV *Op = M->getOperand(i);
-          const SCEV *Div = getUDivExpr(Op, RHSC);
-          if (!isa<SCEVUDivExpr>(Div) && getMulExpr(Div, RHSC) == Op) {
-            Operands = SmallVector<const SCEV *, 4>(M->op_begin(), M->op_end());
-            Operands[i] = Div;
-            return getMulExpr(Operands);
+    // If the denominator is zero, the result of the udiv is undefined. Don't
+    // try to analyze it, because the resolution chosen here may differ from
+    // the resolution chosen in other parts of the compiler.
+    if (!RHSC->getValue()->isZero()) {
+      // Determine if the division can be folded into the operands of
+      // its operands.
+      // TODO: Generalize this to non-constants by using known-bits information.
+      const Type *Ty = LHS->getType();
+      unsigned LZ = RHSC->getValue()->getValue().countLeadingZeros();
+      unsigned MaxShiftAmt = getTypeSizeInBits(Ty) - LZ;
+      // For non-power-of-two values, effectively round the value up to the
+      // nearest power of two.
+      if (!RHSC->getValue()->getValue().isPowerOf2())
+        ++MaxShiftAmt;
+      const IntegerType *ExtTy =
+        IntegerType::get(getContext(), getTypeSizeInBits(Ty) + MaxShiftAmt);
+      // {X,+,N}/C --> {X/C,+,N/C} if safe and N/C can be folded.
+      if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(LHS))
+        if (const SCEVConstant *Step =
+              dyn_cast<SCEVConstant>(AR->getStepRecurrence(*this)))
+          if (!Step->getValue()->getValue()
+                .urem(RHSC->getValue()->getValue()) &&
+              getZeroExtendExpr(AR, ExtTy) ==
+              getAddRecExpr(getZeroExtendExpr(AR->getStart(), ExtTy),
+                            getZeroExtendExpr(Step, ExtTy),
+                            AR->getLoop())) {
+            SmallVector<const SCEV *, 4> Operands;
+            for (unsigned i = 0, e = AR->getNumOperands(); i != e; ++i)
+              Operands.push_back(getUDivExpr(AR->getOperand(i), RHS));
+            return getAddRecExpr(Operands, AR->getLoop());
           }
+      // (A*B)/C --> A*(B/C) if safe and B/C can be folded.
+      if (const SCEVMulExpr *M = dyn_cast<SCEVMulExpr>(LHS)) {
+        SmallVector<const SCEV *, 4> Operands;
+        for (unsigned i = 0, e = M->getNumOperands(); i != e; ++i)
+          Operands.push_back(getZeroExtendExpr(M->getOperand(i), ExtTy));
+        if (getZeroExtendExpr(M, ExtTy) == getMulExpr(Operands))
+          // Find an operand that's safely divisible.
+          for (unsigned i = 0, e = M->getNumOperands(); i != e; ++i) {
+            const SCEV *Op = M->getOperand(i);
+            const SCEV *Div = getUDivExpr(Op, RHSC);
+            if (!isa<SCEVUDivExpr>(Div) && getMulExpr(Div, RHSC) == Op) {
+              Operands = SmallVector<const SCEV *, 4>(M->op_begin(),
+                                                      M->op_end());
+              Operands[i] = Div;
+              return getMulExpr(Operands);
+            }
+          }
+      }
+      // (A+B)/C --> (A/C + B/C) if safe and A/C and B/C can be folded.
+      if (const SCEVAddRecExpr *A = dyn_cast<SCEVAddRecExpr>(LHS)) {
+        SmallVector<const SCEV *, 4> Operands;
+        for (unsigned i = 0, e = A->getNumOperands(); i != e; ++i)
+          Operands.push_back(getZeroExtendExpr(A->getOperand(i), ExtTy));
+        if (getZeroExtendExpr(A, ExtTy) == getAddExpr(Operands)) {
+          Operands.clear();
+          for (unsigned i = 0, e = A->getNumOperands(); i != e; ++i) {
+            const SCEV *Op = getUDivExpr(A->getOperand(i), RHS);
+            if (isa<SCEVUDivExpr>(Op) ||
+                getMulExpr(Op, RHS) != A->getOperand(i))
+              break;
+            Operands.push_back(Op);
+          }
+          if (Operands.size() == A->getNumOperands())
+            return getAddExpr(Operands);
         }
-    }
-    // (A+B)/C --> (A/C + B/C) if safe and A/C and B/C can be folded.
-    if (const SCEVAddRecExpr *A = dyn_cast<SCEVAddRecExpr>(LHS)) {
-      SmallVector<const SCEV *, 4> Operands;
-      for (unsigned i = 0, e = A->getNumOperands(); i != e; ++i)
-        Operands.push_back(getZeroExtendExpr(A->getOperand(i), ExtTy));
-      if (getZeroExtendExpr(A, ExtTy) == getAddExpr(Operands)) {
-        Operands.clear();
-        for (unsigned i = 0, e = A->getNumOperands(); i != e; ++i) {
-          const SCEV *Op = getUDivExpr(A->getOperand(i), RHS);
-          if (isa<SCEVUDivExpr>(Op) || getMulExpr(Op, RHS) != A->getOperand(i))
-            break;
-          Operands.push_back(Op);
-        }
-        if (Operands.size() == A->getNumOperands())
-          return getAddExpr(Operands);
       }
-    }
 
-    // Fold if both operands are constant.
-    if (const SCEVConstant *LHSC = dyn_cast<SCEVConstant>(LHS)) {
-      Constant *LHSCV = LHSC->getValue();
-      Constant *RHSCV = RHSC->getValue();
-      return getConstant(cast<ConstantInt>(ConstantExpr::getUDiv(LHSCV,
-                                                                 RHSCV)));
+      // Fold if both operands are constant.
+      if (const SCEVConstant *LHSC = dyn_cast<SCEVConstant>(LHS)) {
+        Constant *LHSCV = LHSC->getValue();
+        Constant *RHSCV = RHSC->getValue();
+        return getConstant(cast<ConstantInt>(ConstantExpr::getUDiv(LHSCV,
+                                                                   RHSCV)));
+      }
     }
   }
 
@@ -2090,9 +2093,9 @@ ScalarEvolution::getSMaxExpr(SmallVectorImpl<const SCEV *> &Ops) {
       // maximum-int.
       return Ops[0];
     }
-  }
 
-  if (Ops.size() == 1) return Ops[0];
+    if (Ops.size() == 1) return Ops[0];
+  }
 
   // Find the first SMax
   while (Idx < Ops.size() && Ops[Idx]->getSCEVType() < scSMaxExpr)
@@ -2116,7 +2119,13 @@ ScalarEvolution::getSMaxExpr(SmallVectorImpl<const SCEV *> &Ops) {
   // so, delete one.  Since we sorted the list, these values are required to
   // be adjacent.
   for (unsigned i = 0, e = Ops.size()-1; i != e; ++i)
-    if (Ops[i] == Ops[i+1]) {      //  X smax Y smax Y  -->  X smax Y
+    //  X smax Y smax Y  -->  X smax Y
+    //  X smax Y         -->  X, if X is always greater than Y
+    if (Ops[i] == Ops[i+1] ||
+        isKnownPredicate(ICmpInst::ICMP_SGE, Ops[i], Ops[i+1])) {
+      Ops.erase(Ops.begin()+i+1, Ops.begin()+i+2);
+      --i; --e;
+    } else if (isKnownPredicate(ICmpInst::ICMP_SLE, Ops[i], Ops[i+1])) {
       Ops.erase(Ops.begin()+i, Ops.begin()+i+1);
       --i; --e;
     }
@@ -2189,9 +2198,9 @@ ScalarEvolution::getUMaxExpr(SmallVectorImpl<const SCEV *> &Ops) {
       // maximum-int.
       return Ops[0];
     }
-  }
 
-  if (Ops.size() == 1) return Ops[0];
+    if (Ops.size() == 1) return Ops[0];
+  }
 
   // Find the first UMax
   while (Idx < Ops.size() && Ops[Idx]->getSCEVType() < scUMaxExpr)
@@ -2215,7 +2224,13 @@ ScalarEvolution::getUMaxExpr(SmallVectorImpl<const SCEV *> &Ops) {
   // so, delete one.  Since we sorted the list, these values are required to
   // be adjacent.
   for (unsigned i = 0, e = Ops.size()-1; i != e; ++i)
-    if (Ops[i] == Ops[i+1]) {      //  X umax Y umax Y  -->  X umax Y
+    //  X umax Y umax Y  -->  X umax Y
+    //  X umax Y         -->  X, if X is always greater than Y
+    if (Ops[i] == Ops[i+1] ||
+        isKnownPredicate(ICmpInst::ICMP_UGE, Ops[i], Ops[i+1])) {
+      Ops.erase(Ops.begin()+i+1, Ops.begin()+i+2);
+      --i; --e;
+    } else if (isKnownPredicate(ICmpInst::ICMP_ULE, Ops[i], Ops[i+1])) {
       Ops.erase(Ops.begin()+i, Ops.begin()+i+1);
       --i; --e;
     }
@@ -2254,6 +2269,13 @@ const SCEV *ScalarEvolution::getUMinExpr(const SCEV *LHS,
 }
 
 const SCEV *ScalarEvolution::getSizeOfExpr(const Type *AllocTy) {
+  // If we have TargetData, we can bypass creating a target-independent
+  // constant expression and then folding it back into a ConstantInt.
+  // This is just a compile-time optimization.
+  if (TD)
+    return getConstant(TD->getIntPtrType(getContext()),
+                       TD->getTypeAllocSize(AllocTy));
+
   Constant *C = ConstantExpr::getSizeOf(AllocTy);
   if (ConstantExpr *CE = dyn_cast<ConstantExpr>(C))
     C = ConstantFoldConstantExpression(CE, TD);
@@ -2271,6 +2293,13 @@ const SCEV *ScalarEvolution::getAlignOfExpr(const Type *AllocTy) {
 
 const SCEV *ScalarEvolution::getOffsetOfExpr(const StructType *STy,
                                              unsigned FieldNo) {
+  // If we have TargetData, we can bypass creating a target-independent
+  // constant expression and then folding it back into a ConstantInt.
+  // This is just a compile-time optimization.
+  if (TD)
+    return getConstant(TD->getIntPtrType(getContext()),
+                       TD->getStructLayout(STy)->getElementOffset(FieldNo));
+
   Constant *C = ConstantExpr::getOffsetOf(STy, FieldNo);
   if (ConstantExpr *CE = dyn_cast<ConstantExpr>(C))
     C = ConstantFoldConstantExpression(CE, TD);
@@ -2597,14 +2626,29 @@ ScalarEvolution::ForgetSymbolicName(Instruction *PN, const SCEV *SymName) {
 /// a loop header, making it a potential recurrence, or it doesn't.
 ///
 const SCEV *ScalarEvolution::createNodeForPHI(PHINode *PN) {
-  if (PN->getNumIncomingValues() == 2)  // The loops have been canonicalized.
-    if (const Loop *L = LI->getLoopFor(PN->getParent()))
-      if (L->getHeader() == PN->getParent()) {
-        // If it lives in the loop header, it has two incoming values, one
-        // from outside the loop, and one from inside.
-        unsigned IncomingEdge = L->contains(PN->getIncomingBlock(0));
-        unsigned BackEdge     = IncomingEdge^1;
-
+  if (const Loop *L = LI->getLoopFor(PN->getParent()))
+    if (L->getHeader() == PN->getParent()) {
+      // The loop may have multiple entrances or multiple exits; we can analyze
+      // this phi as an addrec if it has a unique entry value and a unique
+      // backedge value.
+      Value *BEValueV = 0, *StartValueV = 0;
+      for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) {
+        Value *V = PN->getIncomingValue(i);
+        if (L->contains(PN->getIncomingBlock(i))) {
+          if (!BEValueV) {
+            BEValueV = V;
+          } else if (BEValueV != V) {
+            BEValueV = 0;
+            break;
+          }
+        } else if (!StartValueV) {
+          StartValueV = V;
+        } else if (StartValueV != V) {
+          StartValueV = 0;
+          break;
+        }
+      }
+      if (BEValueV && StartValueV) {
         // While we are analyzing this PHI node, handle its value symbolically.
         const SCEV *SymbolicName = getUnknown(PN);
         assert(Scalars.find(PN) == Scalars.end() &&
@@ -2613,7 +2657,6 @@ const SCEV *ScalarEvolution::createNodeForPHI(PHINode *PN) {
 
         // Using this symbolic name for the PHI, analyze the value coming around
         // the back-edge.
-        Value *BEValueV = PN->getIncomingValue(BackEdge);
         const SCEV *BEValue = getSCEV(BEValueV);
 
         // NOTE: If BEValue is loop invariant, we know that the PHI node just
@@ -2657,8 +2700,7 @@ const SCEV *ScalarEvolution::createNodeForPHI(PHINode *PN) {
                   HasNSW = true;
               }
 
-              const SCEV *StartVal =
-                getSCEV(PN->getIncomingValue(IncomingEdge));
+              const SCEV *StartVal = getSCEV(StartValueV);
               const SCEV *PHISCEV =
                 getAddRecExpr(StartVal, Accum, L, HasNUW, HasNSW);
 
@@ -2684,12 +2726,12 @@ const SCEV *ScalarEvolution::createNodeForPHI(PHINode *PN) {
           // Because the other in-value of i (0) fits the evolution of BEValue
           // i really is an addrec evolution.
           if (AddRec->getLoop() == L && AddRec->isAffine()) {
-            const SCEV *StartVal = getSCEV(PN->getIncomingValue(IncomingEdge));
+            const SCEV *StartVal = getSCEV(StartValueV);
 
             // If StartVal = j.start - j.stride, we can use StartVal as the
             // initial step of the addrec evolution.
             if (StartVal == getMinusSCEV(AddRec->getOperand(0),
-                                            AddRec->getOperand(1))) {
+                                         AddRec->getOperand(1))) {
               const SCEV *PHISCEV =
                  getAddRecExpr(StartVal, AddRec->getOperand(1), L);
 
@@ -2702,9 +2744,8 @@ const SCEV *ScalarEvolution::createNodeForPHI(PHINode *PN) {
             }
           }
         }
-
-        return SymbolicName;
       }
+    }
 
   // If the PHI has a single incoming value, follow that value, unless the
   // PHI's incoming blocks are in a different loop, in which case doing so
@@ -2737,7 +2778,7 @@ const SCEV *ScalarEvolution::createNodeForGEP(GEPOperator *GEP) {
   // Don't attempt to analyze GEPs over unsized objects.
   if (!cast<PointerType>(Base->getType())->getElementType()->isSized())
     return getUnknown(GEP);
-  const SCEV *TotalOffset = getIntegerSCEV(0, IntPtrTy);
+  const SCEV *TotalOffset = getConstant(IntPtrTy, 0);
   gep_type_iterator GTI = gep_type_begin(GEP);
   for (GetElementPtrInst::op_iterator I = next(GEP->op_begin()),
                                       E = GEP->op_end();
@@ -2920,9 +2961,9 @@ ScalarEvolution::getUnsignedRange(const SCEV *S) {
     // initial value.
     if (AddRec->hasNoUnsignedWrap())
       if (const SCEVConstant *C = dyn_cast<SCEVConstant>(AddRec->getStart()))
-        ConservativeResult =
-          ConstantRange(C->getValue()->getValue(),
-                        APInt(getTypeSizeInBits(C->getType()), 0));
+        if (!C->getValue()->isZero())
+          ConservativeResult =
+            ConstantRange(C->getValue()->getValue(), APInt(BitWidth, 0));
 
     // TODO: non-affine addrec
     if (AddRec->isAffine()) {
@@ -2933,14 +2974,26 @@ ScalarEvolution::getUnsignedRange(const SCEV *S) {
         MaxBECount = getNoopOrZeroExtend(MaxBECount, Ty);
 
         const SCEV *Start = AddRec->getStart();
-        const SCEV *End = AddRec->evaluateAtIteration(MaxBECount, *this);
+        const SCEV *Step = AddRec->getStepRecurrence(*this);
 
-        // Check for overflow.
-        if (!AddRec->hasNoUnsignedWrap())
+        ConstantRange StartRange = getUnsignedRange(Start);
+        ConstantRange StepRange = getSignedRange(Step);
+        ConstantRange MaxBECountRange = getUnsignedRange(MaxBECount);
+        ConstantRange EndRange =
+          StartRange.add(MaxBECountRange.multiply(StepRange));
+
+        // Check for overflow. This must be done with ConstantRange arithmetic
+        // because we could be called from within the ScalarEvolution overflow
+        // checking code.
+        ConstantRange ExtStartRange = StartRange.zextOrTrunc(BitWidth*2+1);
+        ConstantRange ExtStepRange = StepRange.sextOrTrunc(BitWidth*2+1);
+        ConstantRange ExtMaxBECountRange =
+          MaxBECountRange.zextOrTrunc(BitWidth*2+1);
+        ConstantRange ExtEndRange = EndRange.zextOrTrunc(BitWidth*2+1);
+        if (ExtStartRange.add(ExtMaxBECountRange.multiply(ExtStepRange)) !=
+            ExtEndRange)
           return ConservativeResult;
 
-        ConstantRange StartRange = getUnsignedRange(Start);
-        ConstantRange EndRange = getUnsignedRange(End);
         APInt Min = APIntOps::umin(StartRange.getUnsignedMin(),
                                    EndRange.getUnsignedMin());
         APInt Max = APIntOps::umax(StartRange.getUnsignedMax(),
@@ -3064,14 +3117,26 @@ ScalarEvolution::getSignedRange(const SCEV *S) {
         MaxBECount = getNoopOrZeroExtend(MaxBECount, Ty);
 
         const SCEV *Start = AddRec->getStart();
-        const SCEV *End = AddRec->evaluateAtIteration(MaxBECount, *this);
+        const SCEV *Step = AddRec->getStepRecurrence(*this);
 
-        // Check for overflow.
-        if (!AddRec->hasNoSignedWrap())
+        ConstantRange StartRange = getSignedRange(Start);
+        ConstantRange StepRange = getSignedRange(Step);
+        ConstantRange MaxBECountRange = getUnsignedRange(MaxBECount);
+        ConstantRange EndRange =
+          StartRange.add(MaxBECountRange.multiply(StepRange));
+
+        // Check for overflow. This must be done with ConstantRange arithmetic
+        // because we could be called from within the ScalarEvolution overflow
+        // checking code.
+        ConstantRange ExtStartRange = StartRange.sextOrTrunc(BitWidth*2+1);
+        ConstantRange ExtStepRange = StepRange.sextOrTrunc(BitWidth*2+1);
+        ConstantRange ExtMaxBECountRange =
+          MaxBECountRange.zextOrTrunc(BitWidth*2+1);
+        ConstantRange ExtEndRange = EndRange.sextOrTrunc(BitWidth*2+1);
+        if (ExtStartRange.add(ExtMaxBECountRange.multiply(ExtStepRange)) !=
+            ExtEndRange)
           return ConservativeResult;
 
-        ConstantRange StartRange = getSignedRange(Start);
-        ConstantRange EndRange = getSignedRange(End);
         APInt Min = APIntOps::smin(StartRange.getSignedMin(),
                                    EndRange.getSignedMin());
         APInt Max = APIntOps::smax(StartRange.getSignedMax(),
@@ -3122,9 +3187,7 @@ const SCEV *ScalarEvolution::createSCEV(Value *V) {
   else if (ConstantInt *CI = dyn_cast<ConstantInt>(V))
     return getConstant(CI);
   else if (isa<ConstantPointerNull>(V))
-    return getIntegerSCEV(0, V->getType());
-  else if (isa<UndefValue>(V))
-    return getIntegerSCEV(0, V->getType());
+    return getConstant(V->getType(), 0);
   else if (GlobalAlias *GA = dyn_cast<GlobalAlias>(V))
     return GA->mayBeOverridden() ? getUnknown(V) : getSCEV(GA->getAliasee());
   else
@@ -3256,8 +3319,16 @@ const SCEV *ScalarEvolution::createSCEV(Value *V) {
     // Turn shift left of a constant amount into a multiply.
     if (ConstantInt *SA = dyn_cast<ConstantInt>(U->getOperand(1))) {
       uint32_t BitWidth = cast<IntegerType>(U->getType())->getBitWidth();
+
+      // If the shift count is not less than the bitwidth, the result of
+      // the shift is undefined. Don't try to analyze it, because the
+      // resolution chosen here may differ from the resolution chosen in
+      // other parts of the compiler.
+      if (SA->getValue().uge(BitWidth))
+        break;
+
       Constant *X = ConstantInt::get(getContext(),
-        APInt(BitWidth, 1).shl(SA->getLimitedValue(BitWidth)));
+        APInt(BitWidth, 1).shl(SA->getZExtValue()));
       return getMulExpr(getSCEV(U->getOperand(0)), getSCEV(X));
     }
     break;
@@ -3266,8 +3337,16 @@ const SCEV *ScalarEvolution::createSCEV(Value *V) {
     // Turn logical shift right of a constant into a unsigned divide.
     if (ConstantInt *SA = dyn_cast<ConstantInt>(U->getOperand(1))) {
       uint32_t BitWidth = cast<IntegerType>(U->getType())->getBitWidth();
+
+      // If the shift count is not less than the bitwidth, the result of
+      // the shift is undefined. Don't try to analyze it, because the
+      // resolution chosen here may differ from the resolution chosen in
+      // other parts of the compiler.
+      if (SA->getValue().uge(BitWidth))
+        break;
+
       Constant *X = ConstantInt::get(getContext(),
-        APInt(BitWidth, 1).shl(SA->getLimitedValue(BitWidth)));
+        APInt(BitWidth, 1).shl(SA->getZExtValue()));
       return getUDivExpr(getSCEV(U->getOperand(0)), getSCEV(X));
     }
     break;
@@ -3275,19 +3354,26 @@ const SCEV *ScalarEvolution::createSCEV(Value *V) {
   case Instruction::AShr:
     // For a two-shift sext-inreg, use sext(trunc(x)) as the SCEV expression.
     if (ConstantInt *CI = dyn_cast<ConstantInt>(U->getOperand(1)))
-      if (Instruction *L = dyn_cast<Instruction>(U->getOperand(0)))
+      if (Operator *L = dyn_cast<Operator>(U->getOperand(0)))
         if (L->getOpcode() == Instruction::Shl &&
             L->getOperand(1) == U->getOperand(1)) {
-          unsigned BitWidth = getTypeSizeInBits(U->getType());
+          uint64_t BitWidth = getTypeSizeInBits(U->getType());
+
+          // If the shift count is not less than the bitwidth, the result of
+          // the shift is undefined. Don't try to analyze it, because the
+          // resolution chosen here may differ from the resolution chosen in
+          // other parts of the compiler.
+          if (CI->getValue().uge(BitWidth))
+            break;
+
           uint64_t Amt = BitWidth - CI->getZExtValue();
           if (Amt == BitWidth)
             return getSCEV(L->getOperand(0));       // shift by zero --> noop
-          if (Amt > BitWidth)
-            return getIntegerSCEV(0, U->getType()); // value is undefined
           return
             getSignExtendExpr(getTruncateExpr(getSCEV(L->getOperand(0)),
-                                           IntegerType::get(getContext(), Amt)),
-                                 U->getType());
+                                              IntegerType::get(getContext(),
+                                                               Amt)),
+                              U->getType());
         }
     break;
 
@@ -3330,10 +3416,22 @@ const SCEV *ScalarEvolution::createSCEV(Value *V) {
         // fall through
       case ICmpInst::ICMP_SGT:
       case ICmpInst::ICMP_SGE:
-        if (LHS == U->getOperand(1) && RHS == U->getOperand(2))
-          return getSMaxExpr(getSCEV(LHS), getSCEV(RHS));
-        else if (LHS == U->getOperand(2) && RHS == U->getOperand(1))
-          return getSMinExpr(getSCEV(LHS), getSCEV(RHS));
+        // a >s b ? a+x : b+x  ->  smax(a, b)+x
+        // a >s b ? b+x : a+x  ->  smin(a, b)+x
+        if (LHS->getType() == U->getType()) {
+          const SCEV *LS = getSCEV(LHS);
+          const SCEV *RS = getSCEV(RHS);
+          const SCEV *LA = getSCEV(U->getOperand(1));
+          const SCEV *RA = getSCEV(U->getOperand(2));
+          const SCEV *LDiff = getMinusSCEV(LA, LS);
+          const SCEV *RDiff = getMinusSCEV(RA, RS);
+          if (LDiff == RDiff)
+            return getAddExpr(getSMaxExpr(LS, RS), LDiff);
+          LDiff = getMinusSCEV(LA, RS);
+          RDiff = getMinusSCEV(RA, LS);
+          if (LDiff == RDiff)
+            return getAddExpr(getSMinExpr(LS, RS), LDiff);
+        }
         break;
       case ICmpInst::ICMP_ULT:
       case ICmpInst::ICMP_ULE:
@@ -3341,28 +3439,52 @@ const SCEV *ScalarEvolution::createSCEV(Value *V) {
         // fall through
       case ICmpInst::ICMP_UGT:
       case ICmpInst::ICMP_UGE:
-        if (LHS == U->getOperand(1) && RHS == U->getOperand(2))
-          return getUMaxExpr(getSCEV(LHS), getSCEV(RHS));
-        else if (LHS == U->getOperand(2) && RHS == U->getOperand(1))
-          return getUMinExpr(getSCEV(LHS), getSCEV(RHS));
+        // a >u b ? a+x : b+x  ->  umax(a, b)+x
+        // a >u b ? b+x : a+x  ->  umin(a, b)+x
+        if (LHS->getType() == U->getType()) {
+          const SCEV *LS = getSCEV(LHS);
+          const SCEV *RS = getSCEV(RHS);
+          const SCEV *LA = getSCEV(U->getOperand(1));
+          const SCEV *RA = getSCEV(U->getOperand(2));
+          const SCEV *LDiff = getMinusSCEV(LA, LS);
+          const SCEV *RDiff = getMinusSCEV(RA, RS);
+          if (LDiff == RDiff)
+            return getAddExpr(getUMaxExpr(LS, RS), LDiff);
+          LDiff = getMinusSCEV(LA, RS);
+          RDiff = getMinusSCEV(RA, LS);
+          if (LDiff == RDiff)
+            return getAddExpr(getUMinExpr(LS, RS), LDiff);
+        }
         break;
       case ICmpInst::ICMP_NE:
-        // n != 0 ? n : 1  ->  umax(n, 1)
-        if (LHS == U->getOperand(1) &&
-            isa<ConstantInt>(U->getOperand(2)) &&
-            cast<ConstantInt>(U->getOperand(2))->isOne() &&
+        // n != 0 ? n+x : 1+x  ->  umax(n, 1)+x
+        if (LHS->getType() == U->getType() &&
             isa<ConstantInt>(RHS) &&
-            cast<ConstantInt>(RHS)->isZero())
-          return getUMaxExpr(getSCEV(LHS), getSCEV(U->getOperand(2)));
+            cast<ConstantInt>(RHS)->isZero()) {
+          const SCEV *One = getConstant(LHS->getType(), 1);
+          const SCEV *LS = getSCEV(LHS);
+          const SCEV *LA = getSCEV(U->getOperand(1));
+          const SCEV *RA = getSCEV(U->getOperand(2));
+          const SCEV *LDiff = getMinusSCEV(LA, LS);
+          const SCEV *RDiff = getMinusSCEV(RA, One);
+          if (LDiff == RDiff)
+            return getAddExpr(getUMaxExpr(LS, One), LDiff);
+        }
         break;
       case ICmpInst::ICMP_EQ:
-        // n == 0 ? 1 : n  ->  umax(n, 1)
-        if (LHS == U->getOperand(2) &&
-            isa<ConstantInt>(U->getOperand(1)) &&
-            cast<ConstantInt>(U->getOperand(1))->isOne() &&
+        // n == 0 ? 1+x : n+x  ->  umax(n, 1)+x
+        if (LHS->getType() == U->getType() &&
             isa<ConstantInt>(RHS) &&
-            cast<ConstantInt>(RHS)->isZero())
-          return getUMaxExpr(getSCEV(LHS), getSCEV(U->getOperand(1)));
+            cast<ConstantInt>(RHS)->isZero()) {
+          const SCEV *One = getConstant(LHS->getType(), 1);
+          const SCEV *LS = getSCEV(LHS);
+          const SCEV *LA = getSCEV(U->getOperand(1));
+          const SCEV *RA = getSCEV(U->getOperand(2));
+          const SCEV *LDiff = getMinusSCEV(LA, One);
+          const SCEV *RDiff = getMinusSCEV(RA, LS);
+          if (LDiff == RDiff)
+            return getAddExpr(getUMaxExpr(LS, One), LDiff);
+        }
         break;
       default:
         break;
@@ -3739,7 +3861,7 @@ ScalarEvolution::ComputeBackedgeTakenCountFromExitCond(const Loop *L,
       return getCouldNotCompute();
     else
       // The backedge is never taken.
-      return getIntegerSCEV(0, CI->getType());
+      return getConstant(CI->getType(), 0);
   }
 
   // If it's not an integer or pointer comparison then compute it the hard way.
@@ -3786,6 +3908,9 @@ ScalarEvolution::ComputeBackedgeTakenCountFromExitCondICmp(const Loop *L,
     Cond = ICmpInst::getSwappedPredicate(Cond);
   }
 
+  // Simplify the operands before analyzing them.
+  (void)SimplifyICmpOperands(Cond, LHS, RHS);
+
   // If we have a comparison of a chrec against a constant, try to use value
   // ranges to answer this query.
   if (const SCEVConstant *RHSC = dyn_cast<SCEVConstant>(RHS))
@@ -4067,7 +4192,7 @@ ScalarEvolution::getConstantEvolutionLoopExitValue(PHINode *PN,
   if (I != ConstantEvolutionLoopExitValue.end())
     return I->second;
 
-  if (BEs.ugt(APInt(BEs.getBitWidth(),MaxBruteForceIterations)))
+  if (BEs.ugt(MaxBruteForceIterations))
     return ConstantEvolutionLoopExitValue[PN] = 0;  // Not going to evaluate it.
 
   Constant *&RetVal = ConstantEvolutionLoopExitValue[PN];
@@ -4562,7 +4687,7 @@ ScalarEvolution::HowFarToNonZero(const SCEV *V, const Loop *L) {
   // already.  If so, the backedge will execute zero times.
   if (const SCEVConstant *C = dyn_cast<SCEVConstant>(V)) {
     if (!C->getValue()->isNullValue())
-      return getIntegerSCEV(0, C->getType());
+      return getConstant(C->getType(), 0);
     return getCouldNotCompute();  // Otherwise it will loop infinitely.
   }
 
@@ -4573,6 +4698,8 @@ ScalarEvolution::HowFarToNonZero(const SCEV *V, const Loop *L) {
 
 /// getLoopPredecessor - If the given loop's header has exactly one unique
 /// predecessor outside the loop, return it. Otherwise return null.
+/// This is less strict that the loop "preheader" concept, which requires
+/// the predecessor to have only one single successor.
 ///
 BasicBlock *ScalarEvolution::getLoopPredecessor(const Loop *L) {
   BasicBlock *Header = L->getHeader();
@@ -4591,21 +4718,21 @@ BasicBlock *ScalarEvolution::getLoopPredecessor(const Loop *L) {
 /// successor from which BB is reachable, or null if no such block is
 /// found.
 ///
-BasicBlock *
+std::pair<BasicBlock *, BasicBlock *>
 ScalarEvolution::getPredecessorWithUniqueSuccessorForBB(BasicBlock *BB) {
   // If the block has a unique predecessor, then there is no path from the
   // predecessor to the block that does not go through the direct edge
   // from the predecessor to the block.
   if (BasicBlock *Pred = BB->getSinglePredecessor())
-    return Pred;
+    return std::make_pair(Pred, BB);
 
   // A loop's header is defined to be a block that dominates the loop.
   // If the header has a unique predecessor outside the loop, it must be
   // a block that has exactly one successor that can reach the loop.
   if (Loop *L = LI->getLoopFor(BB))
-    return getLoopPredecessor(L);
+    return std::make_pair(getLoopPredecessor(L), L->getHeader());
 
-  return 0;
+  return std::pair<BasicBlock *, BasicBlock *>();
 }
 
 /// HasSameValue - SCEV structural equivalence is usually sufficient for
@@ -4631,6 +4758,266 @@ static bool HasSameValue(const SCEV *A, const SCEV *B) {
   return false;
 }
 
+/// SimplifyICmpOperands - Simplify LHS and RHS in a comparison with
+/// predicate Pred. Return true iff any changes were made.
+///
+bool ScalarEvolution::SimplifyICmpOperands(ICmpInst::Predicate &Pred,
+                                           const SCEV *&LHS, const SCEV *&RHS) {
+  bool Changed = false;
+
+  // Canonicalize a constant to the right side.
+  if (const SCEVConstant *LHSC = dyn_cast<SCEVConstant>(LHS)) {
+    // Check for both operands constant.
+    if (const SCEVConstant *RHSC = dyn_cast<SCEVConstant>(RHS)) {
+      if (ConstantExpr::getICmp(Pred,
+                                LHSC->getValue(),
+                                RHSC->getValue())->isNullValue())
+        goto trivially_false;
+      else
+        goto trivially_true;
+    }
+    // Otherwise swap the operands to put the constant on the right.
+    std::swap(LHS, RHS);
+    Pred = ICmpInst::getSwappedPredicate(Pred);
+    Changed = true;
+  }
+
+  // If we're comparing an addrec with a value which is loop-invariant in the
+  // addrec's loop, put the addrec on the left. Also make a dominance check,
+  // as both operands could be addrecs loop-invariant in each other's loop.
+  if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(RHS)) {
+    const Loop *L = AR->getLoop();
+    if (LHS->isLoopInvariant(L) && LHS->properlyDominates(L->getHeader(), DT)) {
+      std::swap(LHS, RHS);
+      Pred = ICmpInst::getSwappedPredicate(Pred);
+      Changed = true;
+    }
+  }
+
+  // If there's a constant operand, canonicalize comparisons with boundary
+  // cases, and canonicalize *-or-equal comparisons to regular comparisons.
+  if (const SCEVConstant *RC = dyn_cast<SCEVConstant>(RHS)) {
+    const APInt &RA = RC->getValue()->getValue();
+    switch (Pred) {
+    default: llvm_unreachable("Unexpected ICmpInst::Predicate value!");
+    case ICmpInst::ICMP_EQ:
+    case ICmpInst::ICMP_NE:
+      break;
+    case ICmpInst::ICMP_UGE:
+      if ((RA - 1).isMinValue()) {
+        Pred = ICmpInst::ICMP_NE;
+        RHS = getConstant(RA - 1);
+        Changed = true;
+        break;
+      }
+      if (RA.isMaxValue()) {
+        Pred = ICmpInst::ICMP_EQ;
+        Changed = true;
+        break;
+      }
+      if (RA.isMinValue()) goto trivially_true;
+
+      Pred = ICmpInst::ICMP_UGT;
+      RHS = getConstant(RA - 1);
+      Changed = true;
+      break;
+    case ICmpInst::ICMP_ULE:
+      if ((RA + 1).isMaxValue()) {
+        Pred = ICmpInst::ICMP_NE;
+        RHS = getConstant(RA + 1);
+        Changed = true;
+        break;
+      }
+      if (RA.isMinValue()) {
+        Pred = ICmpInst::ICMP_EQ;
+        Changed = true;
+        break;
+      }
+      if (RA.isMaxValue()) goto trivially_true;
+
+      Pred = ICmpInst::ICMP_ULT;
+      RHS = getConstant(RA + 1);
+      Changed = true;
+      break;
+    case ICmpInst::ICMP_SGE:
+      if ((RA - 1).isMinSignedValue()) {
+        Pred = ICmpInst::ICMP_NE;
+        RHS = getConstant(RA - 1);
+        Changed = true;
+        break;
+      }
+      if (RA.isMaxSignedValue()) {
+        Pred = ICmpInst::ICMP_EQ;
+        Changed = true;
+        break;
+      }
+      if (RA.isMinSignedValue()) goto trivially_true;
+
+      Pred = ICmpInst::ICMP_SGT;
+      RHS = getConstant(RA - 1);
+      Changed = true;
+      break;
+    case ICmpInst::ICMP_SLE:
+      if ((RA + 1).isMaxSignedValue()) {
+        Pred = ICmpInst::ICMP_NE;
+        RHS = getConstant(RA + 1);
+        Changed = true;
+        break;
+      }
+      if (RA.isMinSignedValue()) {
+        Pred = ICmpInst::ICMP_EQ;
+        Changed = true;
+        break;
+      }
+      if (RA.isMaxSignedValue()) goto trivially_true;
+
+      Pred = ICmpInst::ICMP_SLT;
+      RHS = getConstant(RA + 1);
+      Changed = true;
+      break;
+    case ICmpInst::ICMP_UGT:
+      if (RA.isMinValue()) {
+        Pred = ICmpInst::ICMP_NE;
+        Changed = true;
+        break;
+      }
+      if ((RA + 1).isMaxValue()) {
+        Pred = ICmpInst::ICMP_EQ;
+        RHS = getConstant(RA + 1);
+        Changed = true;
+        break;
+      }
+      if (RA.isMaxValue()) goto trivially_false;
+      break;
+    case ICmpInst::ICMP_ULT:
+      if (RA.isMaxValue()) {
+        Pred = ICmpInst::ICMP_NE;
+        Changed = true;
+        break;
+      }
+      if ((RA - 1).isMinValue()) {
+        Pred = ICmpInst::ICMP_EQ;
+        RHS = getConstant(RA - 1);
+        Changed = true;
+        break;
+      }
+      if (RA.isMinValue()) goto trivially_false;
+      break;
+    case ICmpInst::ICMP_SGT:
+      if (RA.isMinSignedValue()) {
+        Pred = ICmpInst::ICMP_NE;
+        Changed = true;
+        break;
+      }
+      if ((RA + 1).isMaxSignedValue()) {
+        Pred = ICmpInst::ICMP_EQ;
+        RHS = getConstant(RA + 1);
+        Changed = true;
+        break;
+      }
+      if (RA.isMaxSignedValue()) goto trivially_false;
+      break;
+    case ICmpInst::ICMP_SLT:
+      if (RA.isMaxSignedValue()) {
+        Pred = ICmpInst::ICMP_NE;
+        Changed = true;
+        break;
+      }
+      if ((RA - 1).isMinSignedValue()) {
+       Pred = ICmpInst::ICMP_EQ;
+       RHS = getConstant(RA - 1);
+        Changed = true;
+       break;
+      }
+      if (RA.isMinSignedValue()) goto trivially_false;
+      break;
+    }
+  }
+
+  // Check for obvious equality.
+  if (HasSameValue(LHS, RHS)) {
+    if (ICmpInst::isTrueWhenEqual(Pred))
+      goto trivially_true;
+    if (ICmpInst::isFalseWhenEqual(Pred))
+      goto trivially_false;
+  }
+
+  // If possible, canonicalize GE/LE comparisons to GT/LT comparisons, by
+  // adding or subtracting 1 from one of the operands.
+  switch (Pred) {
+  case ICmpInst::ICMP_SLE:
+    if (!getSignedRange(RHS).getSignedMax().isMaxSignedValue()) {
+      RHS = getAddExpr(getConstant(RHS->getType(), 1, true), RHS,
+                       /*HasNUW=*/false, /*HasNSW=*/true);
+      Pred = ICmpInst::ICMP_SLT;
+      Changed = true;
+    } else if (!getSignedRange(LHS).getSignedMin().isMinSignedValue()) {
+      LHS = getAddExpr(getConstant(RHS->getType(), (uint64_t)-1, true), LHS,
+                       /*HasNUW=*/false, /*HasNSW=*/true);
+      Pred = ICmpInst::ICMP_SLT;
+      Changed = true;
+    }
+    break;
+  case ICmpInst::ICMP_SGE:
+    if (!getSignedRange(RHS).getSignedMin().isMinSignedValue()) {
+      RHS = getAddExpr(getConstant(RHS->getType(), (uint64_t)-1, true), RHS,
+                       /*HasNUW=*/false, /*HasNSW=*/true);
+      Pred = ICmpInst::ICMP_SGT;
+      Changed = true;
+    } else if (!getSignedRange(LHS).getSignedMax().isMaxSignedValue()) {
+      LHS = getAddExpr(getConstant(RHS->getType(), 1, true), LHS,
+                       /*HasNUW=*/false, /*HasNSW=*/true);
+      Pred = ICmpInst::ICMP_SGT;
+      Changed = true;
+    }
+    break;
+  case ICmpInst::ICMP_ULE:
+    if (!getUnsignedRange(RHS).getUnsignedMax().isMaxValue()) {
+      RHS = getAddExpr(getConstant(RHS->getType(), 1, true), RHS,
+                       /*HasNUW=*/true, /*HasNSW=*/false);
+      Pred = ICmpInst::ICMP_ULT;
+      Changed = true;
+    } else if (!getUnsignedRange(LHS).getUnsignedMin().isMinValue()) {
+      LHS = getAddExpr(getConstant(RHS->getType(), (uint64_t)-1, true), LHS,
+                       /*HasNUW=*/true, /*HasNSW=*/false);
+      Pred = ICmpInst::ICMP_ULT;
+      Changed = true;
+    }
+    break;
+  case ICmpInst::ICMP_UGE:
+    if (!getUnsignedRange(RHS).getUnsignedMin().isMinValue()) {
+      RHS = getAddExpr(getConstant(RHS->getType(), (uint64_t)-1, true), RHS,
+                       /*HasNUW=*/true, /*HasNSW=*/false);
+      Pred = ICmpInst::ICMP_UGT;
+      Changed = true;
+    } else if (!getUnsignedRange(LHS).getUnsignedMax().isMaxValue()) {
+      LHS = getAddExpr(getConstant(RHS->getType(), 1, true), LHS,
+                       /*HasNUW=*/true, /*HasNSW=*/false);
+      Pred = ICmpInst::ICMP_UGT;
+      Changed = true;
+    }
+    break;
+  default:
+    break;
+  }
+
+  // TODO: More simplifications are possible here.
+
+  return Changed;
+
+trivially_true:
+  // Return 0 == 0.
+  LHS = RHS = getConstant(Type::getInt1Ty(getContext()), 0);
+  Pred = ICmpInst::ICMP_EQ;
+  return true;
+
+trivially_false:
+  // Return 0 != 0.
+  LHS = RHS = getConstant(Type::getInt1Ty(getContext()), 0);
+  Pred = ICmpInst::ICMP_NE;
+  return true;
+}
+
 bool ScalarEvolution::isKnownNegative(const SCEV *S) {
   return getSignedRange(S).getSignedMax().isNegative();
 }
@@ -4653,10 +5040,36 @@ bool ScalarEvolution::isKnownNonZero(const SCEV *S) {
 
 bool ScalarEvolution::isKnownPredicate(ICmpInst::Predicate Pred,
                                        const SCEV *LHS, const SCEV *RHS) {
+  // Canonicalize the inputs first.
+  (void)SimplifyICmpOperands(Pred, LHS, RHS);
+
+  // If LHS or RHS is an addrec, check to see if the condition is true in
+  // every iteration of the loop.
+  if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(LHS))
+    if (isLoopEntryGuardedByCond(
+          AR->getLoop(), Pred, AR->getStart(), RHS) &&
+        isLoopBackedgeGuardedByCond(
+          AR->getLoop(), Pred, AR->getPostIncExpr(*this), RHS))
+      return true;
+  if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(RHS))
+    if (isLoopEntryGuardedByCond(
+          AR->getLoop(), Pred, LHS, AR->getStart()) &&
+        isLoopBackedgeGuardedByCond(
+          AR->getLoop(), Pred, LHS, AR->getPostIncExpr(*this)))
+      return true;
+
+  // Otherwise see what can be done with known constant ranges.
+  return isKnownPredicateWithRanges(Pred, LHS, RHS);
+}
 
+bool
+ScalarEvolution::isKnownPredicateWithRanges(ICmpInst::Predicate Pred,
+                                            const SCEV *LHS, const SCEV *RHS) {
   if (HasSameValue(LHS, RHS))
     return ICmpInst::isTrueWhenEqual(Pred);
 
+  // This code is split out from isKnownPredicate because it is called from
+  // within isLoopEntryGuardedByCond.
   switch (Pred) {
   default:
     llvm_unreachable("Unexpected ICmpInst::Predicate value!");
@@ -4753,35 +5166,33 @@ ScalarEvolution::isLoopBackedgeGuardedByCond(const Loop *L,
                        LoopContinuePredicate->getSuccessor(0) != L->getHeader());
 }
 
-/// isLoopGuardedByCond - Test whether entry to the loop is protected
+/// isLoopEntryGuardedByCond - Test whether entry to the loop is protected
 /// by a conditional between LHS and RHS.  This is used to help avoid max
 /// expressions in loop trip counts, and to eliminate casts.
 bool
-ScalarEvolution::isLoopGuardedByCond(const Loop *L,
-                                     ICmpInst::Predicate Pred,
-                                     const SCEV *LHS, const SCEV *RHS) {
+ScalarEvolution::isLoopEntryGuardedByCond(const Loop *L,
+                                          ICmpInst::Predicate Pred,
+                                          const SCEV *LHS, const SCEV *RHS) {
   // Interpret a null as meaning no loop, where there is obviously no guard
   // (interprocedural conditions notwithstanding).
   if (!L) return false;
 
-  BasicBlock *Predecessor = getLoopPredecessor(L);
-  BasicBlock *PredecessorDest = L->getHeader();
-
   // Starting at the loop predecessor, climb up the predecessor chain, as long
   // as there are predecessors that can be found that have unique successors
   // leading to the original header.
-  for (; Predecessor;
-       PredecessorDest = Predecessor,
-       Predecessor = getPredecessorWithUniqueSuccessorForBB(Predecessor)) {
+  for (std::pair<BasicBlock *, BasicBlock *>
+         Pair(getLoopPredecessor(L), L->getHeader());
+       Pair.first;
+       Pair = getPredecessorWithUniqueSuccessorForBB(Pair.first)) {
 
     BranchInst *LoopEntryPredicate =
-      dyn_cast<BranchInst>(Predecessor->getTerminator());
+      dyn_cast<BranchInst>(Pair.first->getTerminator());
     if (!LoopEntryPredicate ||
         LoopEntryPredicate->isUnconditional())
       continue;
 
     if (isImpliedCond(LoopEntryPredicate->getCondition(), Pred, LHS, RHS,
-                      LoopEntryPredicate->getSuccessor(0) != PredecessorDest))
+                      LoopEntryPredicate->getSuccessor(0) != Pair.second))
       return true;
   }
 
@@ -4845,117 +5256,12 @@ bool ScalarEvolution::isImpliedCond(Value *CondValue,
 
   // Canonicalize the query to match the way instcombine will have
   // canonicalized the comparison.
-  // First, put a constant operand on the right.
-  if (isa<SCEVConstant>(LHS)) {
-    std::swap(LHS, RHS);
-    Pred = ICmpInst::getSwappedPredicate(Pred);
-  }
-  // Then, canonicalize comparisons with boundary cases.
-  if (const SCEVConstant *RC = dyn_cast<SCEVConstant>(RHS)) {
-    const APInt &RA = RC->getValue()->getValue();
-    switch (Pred) {
-    default: llvm_unreachable("Unexpected ICmpInst::Predicate value!");
-    case ICmpInst::ICMP_EQ:
-    case ICmpInst::ICMP_NE:
-      break;
-    case ICmpInst::ICMP_UGE:
-      if ((RA - 1).isMinValue()) {
-        Pred = ICmpInst::ICMP_NE;
-        RHS = getConstant(RA - 1);
-        break;
-      }
-      if (RA.isMaxValue()) {
-        Pred = ICmpInst::ICMP_EQ;
-        break;
-      }
-      if (RA.isMinValue()) return true;
-      break;
-    case ICmpInst::ICMP_ULE:
-      if ((RA + 1).isMaxValue()) {
-        Pred = ICmpInst::ICMP_NE;
-        RHS = getConstant(RA + 1);
-        break;
-      }
-      if (RA.isMinValue()) {
-        Pred = ICmpInst::ICMP_EQ;
-        break;
-      }
-      if (RA.isMaxValue()) return true;
-      break;
-    case ICmpInst::ICMP_SGE:
-      if ((RA - 1).isMinSignedValue()) {
-        Pred = ICmpInst::ICMP_NE;
-        RHS = getConstant(RA - 1);
-        break;
-      }
-      if (RA.isMaxSignedValue()) {
-        Pred = ICmpInst::ICMP_EQ;
-        break;
-      }
-      if (RA.isMinSignedValue()) return true;
-      break;
-    case ICmpInst::ICMP_SLE:
-      if ((RA + 1).isMaxSignedValue()) {
-        Pred = ICmpInst::ICMP_NE;
-        RHS = getConstant(RA + 1);
-        break;
-      }
-      if (RA.isMinSignedValue()) {
-        Pred = ICmpInst::ICMP_EQ;
-        break;
-      }
-      if (RA.isMaxSignedValue()) return true;
-      break;
-    case ICmpInst::ICMP_UGT:
-      if (RA.isMinValue()) {
-        Pred = ICmpInst::ICMP_NE;
-        break;
-      }
-      if ((RA + 1).isMaxValue()) {
-        Pred = ICmpInst::ICMP_EQ;
-        RHS = getConstant(RA + 1);
-        break;
-      }
-      if (RA.isMaxValue()) return false;
-      break;
-    case ICmpInst::ICMP_ULT:
-      if (RA.isMaxValue()) {
-        Pred = ICmpInst::ICMP_NE;
-        break;
-      }
-      if ((RA - 1).isMinValue()) {
-        Pred = ICmpInst::ICMP_EQ;
-        RHS = getConstant(RA - 1);
-        break;
-      }
-      if (RA.isMinValue()) return false;
-      break;
-    case ICmpInst::ICMP_SGT:
-      if (RA.isMinSignedValue()) {
-        Pred = ICmpInst::ICMP_NE;
-        break;
-      }
-      if ((RA + 1).isMaxSignedValue()) {
-        Pred = ICmpInst::ICMP_EQ;
-        RHS = getConstant(RA + 1);
-        break;
-      }
-      if (RA.isMaxSignedValue()) return false;
-      break;
-    case ICmpInst::ICMP_SLT:
-      if (RA.isMaxSignedValue()) {
-        Pred = ICmpInst::ICMP_NE;
-        break;
-      }
-      if ((RA - 1).isMinSignedValue()) {
-       Pred = ICmpInst::ICMP_EQ;
-       RHS = getConstant(RA - 1);
-       break;
-      }
-      if (RA.isMinSignedValue()) return false;
-      break;
-    }
-  }
+  if (SimplifyICmpOperands(Pred, LHS, RHS))
+    if (LHS == RHS)
+      return CmpInst::isTrueWhenEqual(Pred);
+  if (SimplifyICmpOperands(FoundPred, FoundLHS, FoundRHS))
+    if (FoundLHS == FoundRHS)
+      return CmpInst::isFalseWhenEqual(Pred);
 
   // Check to see if we can make the LHS or RHS match.
   if (LHS == FoundRHS || RHS == FoundLHS) {
@@ -5028,26 +5334,26 @@ ScalarEvolution::isImpliedCondOperandsHelper(ICmpInst::Predicate Pred,
     break;
   case ICmpInst::ICMP_SLT:
   case ICmpInst::ICMP_SLE:
-    if (isKnownPredicate(ICmpInst::ICMP_SLE, LHS, FoundLHS) &&
-        isKnownPredicate(ICmpInst::ICMP_SGE, RHS, FoundRHS))
+    if (isKnownPredicateWithRanges(ICmpInst::ICMP_SLE, LHS, FoundLHS) &&
+        isKnownPredicateWithRanges(ICmpInst::ICMP_SGE, RHS, FoundRHS))
       return true;
     break;
   case ICmpInst::ICMP_SGT:
   case ICmpInst::ICMP_SGE:
-    if (isKnownPredicate(ICmpInst::ICMP_SGE, LHS, FoundLHS) &&
-        isKnownPredicate(ICmpInst::ICMP_SLE, RHS, FoundRHS))
+    if (isKnownPredicateWithRanges(ICmpInst::ICMP_SGE, LHS, FoundLHS) &&
+        isKnownPredicateWithRanges(ICmpInst::ICMP_SLE, RHS, FoundRHS))
       return true;
     break;
   case ICmpInst::ICMP_ULT:
   case ICmpInst::ICMP_ULE:
-    if (isKnownPredicate(ICmpInst::ICMP_ULE, LHS, FoundLHS) &&
-        isKnownPredicate(ICmpInst::ICMP_UGE, RHS, FoundRHS))
+    if (isKnownPredicateWithRanges(ICmpInst::ICMP_ULE, LHS, FoundLHS) &&
+        isKnownPredicateWithRanges(ICmpInst::ICMP_UGE, RHS, FoundRHS))
       return true;
     break;
   case ICmpInst::ICMP_UGT:
   case ICmpInst::ICMP_UGE:
-    if (isKnownPredicate(ICmpInst::ICMP_UGE, LHS, FoundLHS) &&
-        isKnownPredicate(ICmpInst::ICMP_ULE, RHS, FoundRHS))
+    if (isKnownPredicateWithRanges(ICmpInst::ICMP_UGE, LHS, FoundLHS) &&
+        isKnownPredicateWithRanges(ICmpInst::ICMP_ULE, RHS, FoundRHS))
       return true;
     break;
   }
@@ -5066,7 +5372,7 @@ const SCEV *ScalarEvolution::getBECount(const SCEV *Start,
          "This code doesn't handle negative strides yet!");
 
   const Type *Ty = Start->getType();
-  const SCEV *NegOne = getIntegerSCEV(-1, Ty);
+  const SCEV *NegOne = getConstant(Ty, (uint64_t)-1);
   const SCEV *Diff = getMinusSCEV(End, Start);
   const SCEV *RoundUp = getAddExpr(Step, NegOne);
 
@@ -5122,7 +5428,7 @@ ScalarEvolution::HowManyLessThans(const SCEV *LHS, const SCEV *RHS,
       // behavior, so if wrap does occur, the loop could either terminate or
       // loop infinitely, but in either case, the loop is guaranteed to
       // iterate at least until the iteration where the wrapping occurs.
-      const SCEV *One = getIntegerSCEV(1, Step->getType());
+      const SCEV *One = getConstant(Step->getType(), 1);
       if (isSigned) {
         APInt Max = APInt::getSignedMaxValue(BitWidth);
         if ((Max - getSignedRange(getMinusSCEV(Step, One)).getSignedMax())
@@ -5156,10 +5462,10 @@ ScalarEvolution::HowManyLessThans(const SCEV *LHS, const SCEV *RHS,
     // only know that it will execute (max(m,n)-n)/s times. In both cases,
     // the division must round up.
     const SCEV *End = RHS;
-    if (!isLoopGuardedByCond(L,
-                             isSigned ? ICmpInst::ICMP_SLT :
-                                        ICmpInst::ICMP_ULT,
-                             getMinusSCEV(Start, Step), RHS))
+    if (!isLoopEntryGuardedByCond(L,
+                                  isSigned ? ICmpInst::ICMP_SLT :
+                                             ICmpInst::ICMP_ULT,
+                                  getMinusSCEV(Start, Step), RHS))
       End = isSigned ? getSMaxExpr(RHS, Start)
                      : getUMaxExpr(RHS, Start);
 
@@ -5173,7 +5479,7 @@ ScalarEvolution::HowManyLessThans(const SCEV *LHS, const SCEV *RHS,
     // This allows the subsequent ceiling division of (N+(step-1))/step to
     // compute the correct value.
     const SCEV *StepMinusOne = getMinusSCEV(Step,
-                                            getIntegerSCEV(1, Step->getType()));
+                                            getConstant(Step->getType(), 1));
     MaxEnd = isSigned ?
       getSMinExpr(MaxEnd,
                   getMinusSCEV(getConstant(APInt::getSignedMaxValue(BitWidth)),
@@ -5210,7 +5516,7 @@ const SCEV *SCEVAddRecExpr::getNumIterationsInRange(ConstantRange Range,
   if (const SCEVConstant *SC = dyn_cast<SCEVConstant>(getStart()))
     if (!SC->getValue()->isZero()) {
       SmallVector<const SCEV *, 4> Operands(op_begin(), op_end());
-      Operands[0] = SE.getIntegerSCEV(0, SC->getType());
+      Operands[0] = SE.getConstant(SC->getType(), 0);
       const SCEV *Shifted = SE.getAddRecExpr(Operands, getLoop());
       if (const SCEVAddRecExpr *ShiftedAddRec =
             dyn_cast<SCEVAddRecExpr>(Shifted))
@@ -5234,7 +5540,7 @@ const SCEV *SCEVAddRecExpr::getNumIterationsInRange(ConstantRange Range,
   // iteration exits.
   unsigned BitWidth = SE.getTypeSizeInBits(getType());
   if (!Range.contains(APInt(BitWidth, 0)))
-    return SE.getIntegerSCEV(0, getType());
+    return SE.getConstant(getType(), 0);
 
   if (isAffine()) {
     // If this is an affine expression then we have this situation:
@@ -5459,7 +5765,7 @@ void ScalarEvolution::print(raw_ostream &OS, const Module *) const {
   WriteAsOperand(OS, F, /*PrintType=*/false);
   OS << "\n";
   for (inst_iterator I = inst_begin(F), E = inst_end(F); I != E; ++I)
-    if (isSCEVable(I->getType())) {
+    if (isSCEVable(I->getType()) && !isa<CmpInst>(*I)) {
       OS << *I << '\n';
       OS << "  -->  ";
       const SCEV *SV = SE.getSCEV(&*I);
diff --git a/lib/Analysis/ScalarEvolutionExpander.cpp b/lib/Analysis/ScalarEvolutionExpander.cpp
index 2e18ceac525e..0012b84e1f08 100644
--- a/lib/Analysis/ScalarEvolutionExpander.cpp
+++ b/lib/Analysis/ScalarEvolutionExpander.cpp
@@ -192,7 +192,7 @@ static bool FactorOutConstant(const SCEV *&S,
 
   // x/x == 1.
   if (S == Factor) {
-    S = SE.getIntegerSCEV(1, S->getType());
+    S = SE.getConstant(S->getType(), 1);
     return true;
   }
 
@@ -244,7 +244,7 @@ static bool FactorOutConstant(const SCEV *&S,
       // Mul's operands. If so, we can just remove it.
       for (unsigned i = 0, e = M->getNumOperands(); i != e; ++i) {
         const SCEV *SOp = M->getOperand(i);
-        const SCEV *Remainder = SE.getIntegerSCEV(0, SOp->getType());
+        const SCEV *Remainder = SE.getConstant(SOp->getType(), 0);
         if (FactorOutConstant(SOp, Remainder, Factor, SE, TD) &&
             Remainder->isZero()) {
           SmallVector<const SCEV *, 4> NewMulOps(M->op_begin(), M->op_end());
@@ -259,7 +259,7 @@ static bool FactorOutConstant(const SCEV *&S,
   // In an AddRec, check if both start and step are divisible.
   if (const SCEVAddRecExpr *A = dyn_cast<SCEVAddRecExpr>(S)) {
     const SCEV *Step = A->getStepRecurrence(SE);
-    const SCEV *StepRem = SE.getIntegerSCEV(0, Step->getType());
+    const SCEV *StepRem = SE.getConstant(Step->getType(), 0);
     if (!FactorOutConstant(Step, StepRem, Factor, SE, TD))
       return false;
     if (!StepRem->isZero())
@@ -289,7 +289,7 @@ static void SimplifyAddOperands(SmallVectorImpl<const SCEV *> &Ops,
   SmallVector<const SCEV *, 8> AddRecs(Ops.end() - NumAddRecs, Ops.end());
   // Let ScalarEvolution sort and simplify the non-addrecs list.
   const SCEV *Sum = NoAddRecs.empty() ?
-                    SE.getIntegerSCEV(0, Ty) :
+                    SE.getConstant(Ty, 0) :
                     SE.getAddExpr(NoAddRecs);
   // If it returned an add, use the operands. Otherwise it simplified
   // the sum into a single value, so just use that.
@@ -316,7 +316,7 @@ static void SplitAddRecs(SmallVectorImpl<const SCEV *> &Ops,
     while (const SCEVAddRecExpr *A = dyn_cast<SCEVAddRecExpr>(Ops[i])) {
       const SCEV *Start = A->getStart();
       if (Start->isZero()) break;
-      const SCEV *Zero = SE.getIntegerSCEV(0, Ty);
+      const SCEV *Zero = SE.getConstant(Ty, 0);
       AddRecs.push_back(SE.getAddRecExpr(Zero,
                                          A->getStepRecurrence(SE),
                                          A->getLoop()));
@@ -392,7 +392,7 @@ Value *SCEVExpander::expandAddToGEP(const SCEV *const *op_begin,
         SmallVector<const SCEV *, 8> NewOps;
         for (unsigned i = 0, e = Ops.size(); i != e; ++i) {
           const SCEV *Op = Ops[i];
-          const SCEV *Remainder = SE.getIntegerSCEV(0, Ty);
+          const SCEV *Remainder = SE.getConstant(Ty, 0);
           if (FactorOutConstant(Op, Remainder, ElSize, SE, SE.TD)) {
             // Op now has ElSize factored out.
             ScaledOps.push_back(Op);
@@ -642,6 +642,8 @@ static const Loop *GetRelevantLoop(const SCEV *S, LoopInfo &LI,
   llvm_unreachable("Unexpected SCEV type!");
 }
 
+namespace {
+
 /// LoopCompare - Compare loops by PickMostRelevantLoop.
 class LoopCompare {
   DominatorTree &DT;
@@ -668,6 +670,8 @@ public:
   }
 };
 
+}
+
 Value *SCEVExpander::visitAddExpr(const SCEVAddExpr *S) {
   const Type *Ty = SE.getEffectiveSCEVType(S->getType());
 
@@ -705,9 +709,11 @@ Value *SCEVExpander::visitAddExpr(const SCEVAddExpr *S) {
       Sum = expandAddToGEP(NewOps.begin(), NewOps.end(), PTy, Ty, Sum);
     } else if (const PointerType *PTy = dyn_cast<PointerType>(Op->getType())) {
       // The running sum is an integer, and there's a pointer at this level.
-      // Try to form a getelementptr.
+      // Try to form a getelementptr. If the running sum is instructions,
+      // use a SCEVUnknown to avoid re-analyzing them.
       SmallVector<const SCEV *, 4> NewOps;
-      NewOps.push_back(SE.getUnknown(Sum));
+      NewOps.push_back(isa<Instruction>(Sum) ? SE.getUnknown(Sum) :
+                                               SE.getSCEV(Sum));
       for (++I; I != E && I->first == CurLoop; ++I)
         NewOps.push_back(I->second);
       Sum = expandAddToGEP(NewOps.begin(), NewOps.end(), PTy, Ty, expand(Op));
@@ -797,7 +803,7 @@ static void ExposePointerBase(const SCEV *&Base, const SCEV *&Rest,
   while (const SCEVAddRecExpr *A = dyn_cast<SCEVAddRecExpr>(Base)) {
     Base = A->getStart();
     Rest = SE.getAddExpr(Rest,
-                         SE.getAddRecExpr(SE.getIntegerSCEV(0, A->getType()),
+                         SE.getAddRecExpr(SE.getConstant(A->getType(), 0),
                                           A->getStepRecurrence(SE),
                                           A->getLoop()));
   }
@@ -966,9 +972,12 @@ Value *SCEVExpander::expandAddRecExprLiterally(const SCEVAddRecExpr *S) {
   // Determine a normalized form of this expression, which is the expression
   // before any post-inc adjustment is made.
   const SCEVAddRecExpr *Normalized = S;
-  if (L == PostIncLoop) {
-    const SCEV *Step = S->getStepRecurrence(SE);
-    Normalized = cast<SCEVAddRecExpr>(SE.getMinusSCEV(S, Step));
+  if (PostIncLoops.count(L)) {
+    PostIncLoopSet Loops;
+    Loops.insert(L);
+    Normalized =
+      cast<SCEVAddRecExpr>(TransformForPostIncUse(Normalize, S, 0, 0,
+                                                  Loops, SE, *SE.DT));
   }
 
   // Strip off any non-loop-dominating component from the addrec start.
@@ -976,7 +985,7 @@ Value *SCEVExpander::expandAddRecExprLiterally(const SCEVAddRecExpr *S) {
   const SCEV *PostLoopOffset = 0;
   if (!Start->properlyDominates(L->getHeader(), SE.DT)) {
     PostLoopOffset = Start;
-    Start = SE.getIntegerSCEV(0, Normalized->getType());
+    Start = SE.getConstant(Normalized->getType(), 0);
     Normalized =
       cast<SCEVAddRecExpr>(SE.getAddRecExpr(Start,
                                             Normalized->getStepRecurrence(SE),
@@ -986,10 +995,9 @@ Value *SCEVExpander::expandAddRecExprLiterally(const SCEVAddRecExpr *S) {
   // Strip off any non-loop-dominating component from the addrec step.
   const SCEV *Step = Normalized->getStepRecurrence(SE);
   const SCEV *PostLoopScale = 0;
-  if (!Step->hasComputableLoopEvolution(L) &&
-      !Step->dominates(L->getHeader(), SE.DT)) {
+  if (!Step->dominates(L->getHeader(), SE.DT)) {
     PostLoopScale = Step;
-    Step = SE.getIntegerSCEV(1, Normalized->getType());
+    Step = SE.getConstant(Normalized->getType(), 1);
     Normalized =
       cast<SCEVAddRecExpr>(SE.getAddRecExpr(Start, Step,
                                             Normalized->getLoop()));
@@ -1002,7 +1010,7 @@ Value *SCEVExpander::expandAddRecExprLiterally(const SCEVAddRecExpr *S) {
 
   // Accommodate post-inc mode, if necessary.
   Value *Result;
-  if (L != PostIncLoop)
+  if (!PostIncLoops.count(L))
     Result = PN;
   else {
     // In PostInc mode, use the post-incremented value.
@@ -1072,7 +1080,7 @@ Value *SCEVExpander::visitAddRecExpr(const SCEVAddRecExpr *S) {
   // {X,+,F} --> X + {0,+,F}
   if (!S->getStart()->isZero()) {
     SmallVector<const SCEV *, 4> NewOps(S->op_begin(), S->op_end());
-    NewOps[0] = SE.getIntegerSCEV(0, Ty);
+    NewOps[0] = SE.getConstant(Ty, 0);
     const SCEV *Rest = SE.getAddRecExpr(NewOps, L);
 
     // Turn things like ptrtoint+arithmetic+inttoptr into GEP. See the
@@ -1100,7 +1108,7 @@ Value *SCEVExpander::visitAddRecExpr(const SCEVAddRecExpr *S) {
 
   // {0,+,1} --> Insert a canonical induction variable into the loop!
   if (S->isAffine() &&
-      S->getOperand(1) == SE.getIntegerSCEV(1, Ty)) {
+      S->getOperand(1) == SE.getConstant(Ty, 1)) {
     // If there's a canonical IV, just use it.
     if (CanonicalIV) {
       assert(Ty == SE.getEffectiveSCEVType(CanonicalIV->getType()) &&
@@ -1274,7 +1282,7 @@ Value *SCEVExpander::expand(const SCEV *S) {
       // If the SCEV is computable at this level, insert it into the header
       // after the PHIs (and after any other instructions that we've inserted
       // there) so that it is guaranteed to dominate any user inside the loop.
-      if (L && S->hasComputableLoopEvolution(L) && L != PostIncLoop)
+      if (L && S->hasComputableLoopEvolution(L) && !PostIncLoops.count(L))
         InsertPt = L->getHeader()->getFirstNonPHI();
       while (isInsertedInstruction(InsertPt) || isa<DbgInfoIntrinsic>(InsertPt))
         InsertPt = llvm::next(BasicBlock::iterator(InsertPt));
@@ -1296,7 +1304,7 @@ Value *SCEVExpander::expand(const SCEV *S) {
   Value *V = visit(S);
 
   // Remember the expanded value for this SCEV at this location.
-  if (!PostIncLoop)
+  if (PostIncLoops.empty())
     InsertedExpressions[std::make_pair(S, InsertPt)] = V;
 
   restoreInsertPoint(SaveInsertBB, SaveInsertPt);
@@ -1304,7 +1312,7 @@ Value *SCEVExpander::expand(const SCEV *S) {
 }
 
 void SCEVExpander::rememberInstruction(Value *I) {
-  if (!PostIncLoop)
+  if (PostIncLoops.empty())
     InsertedValues.insert(I);
 
   // If we just claimed an existing instruction and that instruction had
@@ -1334,8 +1342,8 @@ Value *
 SCEVExpander::getOrInsertCanonicalInductionVariable(const Loop *L,
                                                     const Type *Ty) {
   assert(Ty->isIntegerTy() && "Can only insert integer induction variables!");
-  const SCEV *H = SE.getAddRecExpr(SE.getIntegerSCEV(0, Ty),
-                                   SE.getIntegerSCEV(1, Ty), L);
+  const SCEV *H = SE.getAddRecExpr(SE.getConstant(Ty, 0),
+                                   SE.getConstant(Ty, 1), L);
   BasicBlock *SaveInsertBB = Builder.GetInsertBlock();
   BasicBlock::iterator SaveInsertPt = Builder.GetInsertPoint();
   Value *V = expandCodeFor(H, 0, L->getHeader()->begin());
diff --git a/lib/Analysis/ScalarEvolutionNormalization.cpp b/lib/Analysis/ScalarEvolutionNormalization.cpp
new file mode 100644
index 000000000000..75c381d5efab
--- /dev/null
+++ b/lib/Analysis/ScalarEvolutionNormalization.cpp
@@ -0,0 +1,150 @@
+//===- ScalarEvolutionNormalization.cpp - See below -------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements utilities for working with "normalized" expressions.
+// See the comments at the top of ScalarEvolutionNormalization.h for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Analysis/Dominators.h"
+#include "llvm/Analysis/LoopInfo.h"
+#include "llvm/Analysis/ScalarEvolutionExpressions.h"
+#include "llvm/Analysis/ScalarEvolutionNormalization.h"
+using namespace llvm;
+
+/// IVUseShouldUsePostIncValue - We have discovered a "User" of an IV expression
+/// and now we need to decide whether the user should use the preinc or post-inc
+/// value.  If this user should use the post-inc version of the IV, return true.
+///
+/// Choosing wrong here can break dominance properties (if we choose to use the
+/// post-inc value when we cannot) or it can end up adding extra live-ranges to
+/// the loop, resulting in reg-reg copies (if we use the pre-inc value when we
+/// should use the post-inc value).
+static bool IVUseShouldUsePostIncValue(Instruction *User, Instruction *IV,
+                                       const Loop *L, DominatorTree *DT) {
+  // If the user is in the loop, use the preinc value.
+  if (L->contains(User)) return false;
+
+  BasicBlock *LatchBlock = L->getLoopLatch();
+  if (!LatchBlock)
+    return false;
+
+  // Ok, the user is outside of the loop.  If it is dominated by the latch
+  // block, use the post-inc value.
+  if (DT->dominates(LatchBlock, User->getParent()))
+    return true;
+
+  // There is one case we have to be careful of: PHI nodes.  These little guys
+  // can live in blocks that are not dominated by the latch block, but (since
+  // their uses occur in the predecessor block, not the block the PHI lives in)
+  // should still use the post-inc value.  Check for this case now.
+  PHINode *PN = dyn_cast<PHINode>(User);
+  if (!PN) return false;  // not a phi, not dominated by latch block.
+
+  // Look at all of the uses of IV by the PHI node.  If any use corresponds to
+  // a block that is not dominated by the latch block, give up and use the
+  // preincremented value.
+  unsigned NumUses = 0;
+  for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i)
+    if (PN->getIncomingValue(i) == IV) {
+      ++NumUses;
+      if (!DT->dominates(LatchBlock, PN->getIncomingBlock(i)))
+        return false;
+    }
+
+  // Okay, all uses of IV by PN are in predecessor blocks that really are
+  // dominated by the latch block.  Use the post-incremented value.
+  return true;
+}
+
+const SCEV *llvm::TransformForPostIncUse(TransformKind Kind,
+                                         const SCEV *S,
+                                         Instruction *User,
+                                         Value *OperandValToReplace,
+                                         PostIncLoopSet &Loops,
+                                         ScalarEvolution &SE,
+                                         DominatorTree &DT) {
+  if (isa<SCEVConstant>(S) || isa<SCEVUnknown>(S))
+    return S;
+  if (const SCEVCastExpr *X = dyn_cast<SCEVCastExpr>(S)) {
+    const SCEV *O = X->getOperand();
+    const SCEV *N = TransformForPostIncUse(Kind, O, User, OperandValToReplace,
+                                           Loops, SE, DT);
+    if (O != N)
+      switch (S->getSCEVType()) {
+      case scZeroExtend: return SE.getZeroExtendExpr(N, S->getType());
+      case scSignExtend: return SE.getSignExtendExpr(N, S->getType());
+      case scTruncate: return SE.getTruncateExpr(N, S->getType());
+      default: llvm_unreachable("Unexpected SCEVCastExpr kind!");
+      }
+    return S;
+  }
+  if (const SCEVNAryExpr *X = dyn_cast<SCEVNAryExpr>(S)) {
+    SmallVector<const SCEV *, 8> Operands;
+    bool Changed = false;
+    for (SCEVNAryExpr::op_iterator I = X->op_begin(), E = X->op_end();
+         I != E; ++I) {
+      const SCEV *O = *I;
+      const SCEV *N = TransformForPostIncUse(Kind, O, User, OperandValToReplace,
+                                             Loops, SE, DT);
+      Changed |= N != O;
+      Operands.push_back(N);
+    }
+    if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(S)) {
+      // An addrec. This is the interesting part.
+      const Loop *L = AR->getLoop();
+      const SCEV *Result = SE.getAddRecExpr(Operands, L);
+      switch (Kind) {
+      default: llvm_unreachable("Unexpected transform name!");
+      case NormalizeAutodetect:
+        if (Instruction *OI = dyn_cast<Instruction>(OperandValToReplace))
+          if (IVUseShouldUsePostIncValue(User, OI, L, &DT)) {
+            Result = SE.getMinusSCEV(Result, AR->getStepRecurrence(SE));
+            Loops.insert(L);
+          }
+        break;
+      case Normalize:
+        if (Loops.count(L))
+          Result = SE.getMinusSCEV(Result, AR->getStepRecurrence(SE));
+        break;
+      case Denormalize:
+        if (Loops.count(L)) {
+          const SCEV *TransformedStep =
+            TransformForPostIncUse(Kind, AR->getStepRecurrence(SE),
+                                   User, OperandValToReplace, Loops, SE, DT);
+          Result = SE.getAddExpr(Result, TransformedStep);
+        }
+        break;
+      }
+      return Result;
+    }
+    if (Changed)
+      switch (S->getSCEVType()) {
+      case scAddExpr: return SE.getAddExpr(Operands);
+      case scMulExpr: return SE.getMulExpr(Operands);
+      case scSMaxExpr: return SE.getSMaxExpr(Operands);
+      case scUMaxExpr: return SE.getUMaxExpr(Operands);
+      default: llvm_unreachable("Unexpected SCEVNAryExpr kind!");
+      }
+    return S;
+  }
+  if (const SCEVUDivExpr *X = dyn_cast<SCEVUDivExpr>(S)) {
+    const SCEV *LO = X->getLHS();
+    const SCEV *RO = X->getRHS();
+    const SCEV *LN = TransformForPostIncUse(Kind, LO, User, OperandValToReplace,
+                                            Loops, SE, DT);
+    const SCEV *RN = TransformForPostIncUse(Kind, RO, User, OperandValToReplace,
+                                            Loops, SE, DT);
+    if (LO != LN || RO != RN)
+      return SE.getUDivExpr(LN, RN);
+    return S;
+  }
+  llvm_unreachable("Unexpected SCEV kind!");
+  return 0;
+}
diff --git a/lib/Analysis/ValueTracking.cpp b/lib/Analysis/ValueTracking.cpp
index 5ae72f7aba9a..7e8ec2e06115 100644
--- a/lib/Analysis/ValueTracking.cpp
+++ b/lib/Analysis/ValueTracking.cpp
@@ -1342,22 +1342,23 @@ Value *llvm::FindInsertedValue(Value *V, const unsigned *idx_begin,
 /// GetConstantStringInfo - This function computes the length of a
 /// null-terminated C string pointed to by V.  If successful, it returns true
 /// and returns the string in Str.  If unsuccessful, it returns false.
-bool llvm::GetConstantStringInfo(Value *V, std::string &Str, uint64_t Offset,
+bool llvm::GetConstantStringInfo(const Value *V, std::string &Str,
+                                 uint64_t Offset,
                                  bool StopAtNul) {
   // If V is NULL then return false;
   if (V == NULL) return false;
 
   // Look through bitcast instructions.
-  if (BitCastInst *BCI = dyn_cast<BitCastInst>(V))
+  if (const BitCastInst *BCI = dyn_cast<BitCastInst>(V))
     return GetConstantStringInfo(BCI->getOperand(0), Str, Offset, StopAtNul);
   
   // If the value is not a GEP instruction nor a constant expression with a
   // GEP instruction, then return false because ConstantArray can't occur
   // any other way
-  User *GEP = 0;
-  if (GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(V)) {
+  const User *GEP = 0;
+  if (const GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(V)) {
     GEP = GEPI;
-  } else if (ConstantExpr *CE = dyn_cast<ConstantExpr>(V)) {
+  } else if (const ConstantExpr *CE = dyn_cast<ConstantExpr>(V)) {
     if (CE->getOpcode() == Instruction::BitCast)
       return GetConstantStringInfo(CE->getOperand(0), Str, Offset, StopAtNul);
     if (CE->getOpcode() != Instruction::GetElementPtr)
@@ -1378,7 +1379,7 @@ bool llvm::GetConstantStringInfo(Value *V, std::string &Str, uint64_t Offset,
     
     // Check to make sure that the first operand of the GEP is an integer and
     // has value 0 so that we are sure we're indexing into the initializer.
-    ConstantInt *FirstIdx = dyn_cast<ConstantInt>(GEP->getOperand(1));
+    const ConstantInt *FirstIdx = dyn_cast<ConstantInt>(GEP->getOperand(1));
     if (FirstIdx == 0 || !FirstIdx->isZero())
       return false;
     
@@ -1386,7 +1387,7 @@ bool llvm::GetConstantStringInfo(Value *V, std::string &Str, uint64_t Offset,
     // into the array.  If this occurs, we can't say anything meaningful about
     // the string.
     uint64_t StartIdx = 0;
-    if (ConstantInt *CI = dyn_cast<ConstantInt>(GEP->getOperand(2)))
+    if (const ConstantInt *CI = dyn_cast<ConstantInt>(GEP->getOperand(2)))
       StartIdx = CI->getZExtValue();
     else
       return false;
@@ -1397,10 +1398,10 @@ bool llvm::GetConstantStringInfo(Value *V, std::string &Str, uint64_t Offset,
   // The GEP instruction, constant or instruction, must reference a global
   // variable that is a constant and is initialized. The referenced constant
   // initializer is the array that we'll use for optimization.
-  GlobalVariable* GV = dyn_cast<GlobalVariable>(V);
+  const GlobalVariable* GV = dyn_cast<GlobalVariable>(V);
   if (!GV || !GV->isConstant() || !GV->hasDefinitiveInitializer())
     return false;
-  Constant *GlobalInit = GV->getInitializer();
+  const Constant *GlobalInit = GV->getInitializer();
   
   // Handle the ConstantAggregateZero case
   if (isa<ConstantAggregateZero>(GlobalInit)) {
@@ -1411,7 +1412,7 @@ bool llvm::GetConstantStringInfo(Value *V, std::string &Str, uint64_t Offset,
   }
   
   // Must be a Constant Array
-  ConstantArray *Array = dyn_cast<ConstantArray>(GlobalInit);
+  const ConstantArray *Array = dyn_cast<ConstantArray>(GlobalInit);
   if (Array == 0 || !Array->getType()->getElementType()->isIntegerTy(8))
     return false;
   
@@ -1425,8 +1426,8 @@ bool llvm::GetConstantStringInfo(Value *V, std::string &Str, uint64_t Offset,
   // to in the array.
   Str.reserve(NumElts-Offset);
   for (unsigned i = Offset; i != NumElts; ++i) {
-    Constant *Elt = Array->getOperand(i);
-    ConstantInt *CI = dyn_cast<ConstantInt>(Elt);
+    const Constant *Elt = Array->getOperand(i);
+    const ConstantInt *CI = dyn_cast<ConstantInt>(Elt);
     if (!CI) // This array isn't suitable, non-int initializer.
       return false;
     if (StopAtNul && CI->isZero())
diff --git a/lib/Archive/Archive.cpp b/lib/Archive/Archive.cpp
index f4f8a4349ef8..54c715c604d2 100644
--- a/lib/Archive/Archive.cpp
+++ b/lib/Archive/Archive.cpp
@@ -233,15 +233,14 @@ bool llvm::GetBitcodeSymbols(const sys::Path& fName,
 }
 
 Module*
-llvm::GetBitcodeSymbols(const unsigned char *BufPtr, unsigned Length,
+llvm::GetBitcodeSymbols(const char *BufPtr, unsigned Length,
                         const std::string& ModuleID,
                         LLVMContext& Context,
                         std::vector<std::string>& symbols,
                         std::string* ErrMsg) {
   // Get the module.
   std::auto_ptr<MemoryBuffer> Buffer(
-    MemoryBuffer::getNewMemBuffer(Length, ModuleID.c_str()));
-  memcpy((char*)Buffer->getBufferStart(), BufPtr, Length);
+    MemoryBuffer::getMemBufferCopy(StringRef(BufPtr, Length),ModuleID.c_str()));
   
   Module *M = ParseBitcodeFile(Buffer.get(), Context, ErrMsg);
   if (!M)
diff --git a/lib/Archive/ArchiveInternals.h b/lib/Archive/ArchiveInternals.h
index baea544d20b8..08f20e74811e 100644
--- a/lib/Archive/ArchiveInternals.h
+++ b/lib/Archive/ArchiveInternals.h
@@ -77,7 +77,7 @@ namespace llvm {
                           std::vector<std::string>& symbols,
                           std::string* ErrMsg);
   
-  Module* GetBitcodeSymbols(const unsigned char*Buffer,unsigned Length,
+  Module* GetBitcodeSymbols(const char *Buffer, unsigned Length,
                             const std::string& ModuleID,
                             LLVMContext& Context,
                             std::vector<std::string>& symbols,
diff --git a/lib/Archive/ArchiveReader.cpp b/lib/Archive/ArchiveReader.cpp
index 3ef15d25d690..eef6fe0b1c1d 100644
--- a/lib/Archive/ArchiveReader.cpp
+++ b/lib/Archive/ArchiveReader.cpp
@@ -121,7 +121,7 @@ Archive::parseMemberHeader(const char*& At, const char* End, std::string* error)
         if (isdigit(Hdr->name[3])) {
           unsigned len = atoi(&Hdr->name[3]);
           const char *nulp = (const char *)memchr(At, '\0', len);
-          pathname.assign(At, nulp != 0 ? nulp - At : len);
+          pathname.assign(At, nulp != 0 ? (uintptr_t)(nulp - At) : len);
           At += len;
           MemberSize -= len;
           flags |= ArchiveMember::HasLongFilenameFlag;
@@ -348,8 +348,8 @@ Archive::getAllModules(std::vector<Module*>& Modules,
       std::string FullMemberName = archPath.str() +
         "(" + I->getPath().str() + ")";
       MemoryBuffer *Buffer =
-        MemoryBuffer::getNewMemBuffer(I->getSize(), FullMemberName.c_str());
-      memcpy((char*)Buffer->getBufferStart(), I->getData(), I->getSize());
+        MemoryBuffer::getMemBufferCopy(StringRef(I->getData(), I->getSize()),
+                                       FullMemberName.c_str());
       
       Module *M = ParseBitcodeFile(Buffer, Context, ErrMessage);
       delete Buffer;
@@ -487,9 +487,9 @@ Archive::findModuleDefiningSymbol(const std::string& symbol,
   // Now, load the bitcode module to get the Module.
   std::string FullMemberName = archPath.str() + "(" +
     mbr->getPath().str() + ")";
-  MemoryBuffer *Buffer =MemoryBuffer::getNewMemBuffer(mbr->getSize(),
-                                                      FullMemberName.c_str());
-  memcpy((char*)Buffer->getBufferStart(), mbr->getData(), mbr->getSize());
+  MemoryBuffer *Buffer =
+    MemoryBuffer::getMemBufferCopy(StringRef(mbr->getData(), mbr->getSize()),
+                                   FullMemberName.c_str());
   
   Module *m = getLazyBitcodeModule(Buffer, Context, ErrMsg);
   if (!m)
@@ -538,8 +538,8 @@ Archive::findModulesDefiningSymbols(std::set<std::string>& symbols,
         std::string FullMemberName = archPath.str() + "(" +
           mbr->getPath().str() + ")";
         Module* M = 
-          GetBitcodeSymbols((const unsigned char*)At, mbr->getSize(),
-                            FullMemberName, Context, symbols, error);
+          GetBitcodeSymbols(At, mbr->getSize(), FullMemberName, Context,
+                            symbols, error);
 
         if (M) {
           // Insert the module's symbols into the symbol table
@@ -616,8 +616,8 @@ bool Archive::isBitcodeArchive() {
       archPath.str() + "(" + I->getPath().str() + ")";
 
     MemoryBuffer *Buffer =
-      MemoryBuffer::getNewMemBuffer(I->getSize(), FullMemberName.c_str());
-    memcpy((char*)Buffer->getBufferStart(), I->getData(), I->getSize());
+      MemoryBuffer::getMemBufferCopy(StringRef(I->getData(), I->getSize()),
+                                     FullMemberName.c_str());
     Module *M = ParseBitcodeFile(Buffer, Context);
     delete Buffer;
     if (!M)
diff --git a/lib/Archive/ArchiveWriter.cpp b/lib/Archive/ArchiveWriter.cpp
index a02601a4aadc..21d4f6566c56 100644
--- a/lib/Archive/ArchiveWriter.cpp
+++ b/lib/Archive/ArchiveWriter.cpp
@@ -226,8 +226,7 @@ Archive::writeMember(
     std::string FullMemberName = archPath.str() + "(" + member.getPath().str()
       + ")";
     Module* M = 
-      GetBitcodeSymbols((const unsigned char*)data,fSize,
-                        FullMemberName, Context, symbols, ErrMsg);
+      GetBitcodeSymbols(data, fSize, FullMemberName, Context, symbols, ErrMsg);
 
     // If the bitcode parsed successfully
     if ( M ) {
diff --git a/lib/Archive/CMakeLists.txt b/lib/Archive/CMakeLists.txt
index 27698cb17182..7ff478a41a59 100644
--- a/lib/Archive/CMakeLists.txt
+++ b/lib/Archive/CMakeLists.txt
@@ -2,4 +2,4 @@ add_llvm_library(LLVMArchive
   Archive.cpp
   ArchiveReader.cpp
   ArchiveWriter.cpp
-  )
-\ No newline at end of file
+  )
diff --git a/lib/AsmParser/LLParser.cpp b/lib/AsmParser/LLParser.cpp
index cdad0770a38a..3b08ca18053c 100644
--- a/lib/AsmParser/LLParser.cpp
+++ b/lib/AsmParser/LLParser.cpp
@@ -322,9 +322,8 @@ bool LLParser::ParseUnnamedType() {
       return true;
   }
 
-  assert(Lex.getKind() == lltok::kw_type);
   LocTy TypeLoc = Lex.getLoc();
-  Lex.Lex(); // eat kw_type
+  if (ParseToken(lltok::kw_type, "expected 'type' after '='")) return true;
 
   PATypeHolder Ty(Type::getVoidTy(Context));
   if (ParseType(Ty)) return true;
@@ -1171,10 +1170,10 @@ bool LLParser::ParseOptionalCommaAlign(unsigned &Alignment,
       return false;
     }
     
-    if (Lex.getKind() == lltok::kw_align) {
-      if (ParseOptionalAlignment(Alignment)) return true;
-    } else
-      return true;
+    if (Lex.getKind() != lltok::kw_align)
+      return Error(Lex.getLoc(), "expected metadata or 'align'");
+    
+    if (ParseOptionalAlignment(Alignment)) return true;
   }
 
   return false;
@@ -2352,11 +2351,28 @@ bool LLParser::ParseValID(ValID &ID, PerFunctionState *PFS) {
       if (NSW)
         return Error(ModifierLoc, "nsw only applies to integer operations");
     }
-    // API compatibility: Accept either integer or floating-point types with
-    // add, sub, and mul.
-    if (!Val0->getType()->isIntOrIntVectorTy() &&
-        !Val0->getType()->isFPOrFPVectorTy())
-      return Error(ID.Loc,"constexpr requires integer, fp, or vector operands");
+    // Check that the type is valid for the operator.
+    switch (Opc) {
+    case Instruction::Add:
+    case Instruction::Sub:
+    case Instruction::Mul:
+    case Instruction::UDiv:
+    case Instruction::SDiv:
+    case Instruction::URem:
+    case Instruction::SRem:
+      if (!Val0->getType()->isIntOrIntVectorTy())
+        return Error(ID.Loc, "constexpr requires integer operands");
+      break;
+    case Instruction::FAdd:
+    case Instruction::FSub:
+    case Instruction::FMul:
+    case Instruction::FDiv:
+    case Instruction::FRem:
+      if (!Val0->getType()->isFPOrFPVectorTy())
+        return Error(ID.Loc, "constexpr requires fp operands");
+      break;
+    default: llvm_unreachable("Unknown binary operator!");
+    }
     unsigned Flags = 0;
     if (NUW)   Flags |= OverflowingBinaryOperator::NoUnsignedWrap;
     if (NSW)   Flags |= OverflowingBinaryOperator::NoSignedWrap;
@@ -2788,6 +2804,10 @@ bool LLParser::ParseFunctionHeader(Function *&Fn, bool isDefine) {
       ForwardRefVals.find(FunctionName);
     if (FRVI != ForwardRefVals.end()) {
       Fn = M->getFunction(FunctionName);
+      if (Fn->getType() != PFT)
+        return Error(FRVI->second.second, "invalid forward reference to "
+                     "function '" + FunctionName + "' with wrong type!");
+      
       ForwardRefVals.erase(FRVI);
     } else if ((Fn = M->getFunction(FunctionName))) {
       // If this function already exists in the symbol table, then it is
@@ -2933,6 +2953,8 @@ bool LLParser::ParseBasicBlock(PerFunctionState &PFS) {
     default: assert(0 && "Unknown ParseInstruction result!");
     case InstError: return true;
     case InstNormal:
+      BB->getInstList().push_back(Inst);
+
       // With a normal result, we check to see if the instruction is followed by
       // a comma and metadata.
       if (EatIfPresent(lltok::comma))
@@ -2940,6 +2962,8 @@ bool LLParser::ParseBasicBlock(PerFunctionState &PFS) {
           return true;
       break;
     case InstExtraComma:
+      BB->getInstList().push_back(Inst);
+
       // If the instruction parser ate an extra comma at the end of it, it
       // *must* be followed by metadata.
       if (ParseInstructionMetadata(Inst))
@@ -2947,8 +2971,6 @@ bool LLParser::ParseBasicBlock(PerFunctionState &PFS) {
       break;        
     }
 
-    BB->getInstList().push_back(Inst);
-
     // Set the name on the instruction.
     if (PFS.SetInstName(NameID, NameStr, NameLoc, Inst)) return true;
   } while (!isa<TerminatorInst>(Inst));
@@ -2995,8 +3017,7 @@ int LLParser::ParseInstruction(Instruction *&Inst, BasicBlock *BB,
       if (EatIfPresent(lltok::kw_nuw))
         NUW = true;
     }
-    // API compatibility: Accept either integer or floating-point types.
-    bool Result = ParseArithmetic(Inst, PFS, KeywordVal, 0);
+    bool Result = ParseArithmetic(Inst, PFS, KeywordVal, 1);
     if (!Result) {
       if (!Inst->getType()->isIntOrIntVectorTy()) {
         if (NUW)
diff --git a/lib/AsmParser/LLParser.h b/lib/AsmParser/LLParser.h
index ae460bbb57b3..c8f669f641a8 100644
--- a/lib/AsmParser/LLParser.h
+++ b/lib/AsmParser/LLParser.h
@@ -74,7 +74,7 @@ namespace llvm {
   public:
     typedef LLLexer::LocTy LocTy;
   private:
-    LLVMContext& Context;
+    LLVMContext &Context;
     LLLexer Lex;
     Module *M;
     
diff --git a/lib/AsmParser/Parser.cpp b/lib/AsmParser/Parser.cpp
index 7280cf479f22..e511cbe29c75 100644
--- a/lib/AsmParser/Parser.cpp
+++ b/lib/AsmParser/Parser.cpp
@@ -44,9 +44,9 @@ Module *llvm::ParseAssemblyFile(const std::string &Filename, SMDiagnostic &Err,
   std::string ErrorStr;
   MemoryBuffer *F = MemoryBuffer::getFileOrSTDIN(Filename.c_str(), &ErrorStr);
   if (F == 0) {
-    Err = SMDiagnostic(SMLoc(), "", -1, -1,
+    Err = SMDiagnostic(Filename,
                        "Could not open input file '" + Filename + "': " +
-                       ErrorStr, "");
+                       ErrorStr);
     return 0;
   }
 
diff --git a/lib/CodeGen/AggressiveAntiDepBreaker.cpp b/lib/CodeGen/AggressiveAntiDepBreaker.cpp
index 8840622f9ae5..4008a6a63cf8 100644
--- a/lib/CodeGen/AggressiveAntiDepBreaker.cpp
+++ b/lib/CodeGen/AggressiveAntiDepBreaker.cpp
@@ -31,12 +31,12 @@ using namespace llvm;
 // If DebugDiv > 0 then only break antidep with (ID % DebugDiv) == DebugMod
 static cl::opt<int>
 DebugDiv("agg-antidep-debugdiv",
-                      cl::desc("Debug control for aggressive anti-dep breaker"),
-                      cl::init(0), cl::Hidden);
+         cl::desc("Debug control for aggressive anti-dep breaker"),
+         cl::init(0), cl::Hidden);
 static cl::opt<int>
 DebugMod("agg-antidep-debugmod",
-                      cl::desc("Debug control for aggressive anti-dep breaker"),
-                      cl::init(0), cl::Hidden);
+         cl::desc("Debug control for aggressive anti-dep breaker"),
+         cl::init(0), cl::Hidden);
 
 AggressiveAntiDepState::AggressiveAntiDepState(const unsigned TargetRegs,
                                                MachineBasicBlock *BB) :
@@ -210,7 +210,7 @@ void AggressiveAntiDepBreaker::FinishBlock() {
 }
 
 void AggressiveAntiDepBreaker::Observe(MachineInstr *MI, unsigned Count,
-                                     unsigned InsertPosIndex) {
+                                       unsigned InsertPosIndex) {
   assert(Count < InsertPosIndex && "Instruction index out of expected range!");
 
   std::set<unsigned> PassthruRegs;
@@ -244,7 +244,7 @@ void AggressiveAntiDepBreaker::Observe(MachineInstr *MI, unsigned Count,
 }
 
 bool AggressiveAntiDepBreaker::IsImplicitDefUse(MachineInstr *MI,
-                                            MachineOperand& MO)
+                                                MachineOperand& MO)
 {
   if (!MO.isReg() || !MO.isImplicit())
     return false;
@@ -281,9 +281,9 @@ void AggressiveAntiDepBreaker::GetPassthruRegs(MachineInstr *MI,
 
 /// AntiDepEdges - Return in Edges the anti- and output- dependencies
 /// in SU that we want to consider for breaking.
-static void AntiDepEdges(SUnit *SU, std::vector<SDep*>& Edges) {
+static void AntiDepEdges(const SUnit *SU, std::vector<const SDep*>& Edges) {
   SmallSet<unsigned, 4> RegSet;
-  for (SUnit::pred_iterator P = SU->Preds.begin(), PE = SU->Preds.end();
+  for (SUnit::const_pred_iterator P = SU->Preds.begin(), PE = SU->Preds.end();
        P != PE; ++P) {
     if ((P->getKind() == SDep::Anti) || (P->getKind() == SDep::Output)) {
       unsigned Reg = P->getReg();
@@ -297,14 +297,14 @@ static void AntiDepEdges(SUnit *SU, std::vector<SDep*>& Edges) {
 
 /// CriticalPathStep - Return the next SUnit after SU on the bottom-up
 /// critical path.
-static SUnit *CriticalPathStep(SUnit *SU) {
-  SDep *Next = 0;
+static const SUnit *CriticalPathStep(const SUnit *SU) {
+  const SDep *Next = 0;
   unsigned NextDepth = 0;
   // Find the predecessor edge with the greatest depth.
   if (SU != 0) {
-    for (SUnit::pred_iterator P = SU->Preds.begin(), PE = SU->Preds.end();
+    for (SUnit::const_pred_iterator P = SU->Preds.begin(), PE = SU->Preds.end();
          P != PE; ++P) {
-      SUnit *PredSU = P->getSUnit();
+      const SUnit *PredSU = P->getSUnit();
       unsigned PredLatency = P->getLatency();
       unsigned PredTotalLatency = PredSU->getDepth() + PredLatency;
       // In the case of a latency tie, prefer an anti-dependency edge over
@@ -359,8 +359,7 @@ void AggressiveAntiDepBreaker::HandleLastUse(unsigned Reg, unsigned KillIdx,
 
 void AggressiveAntiDepBreaker::PrescanInstruction(MachineInstr *MI,
                                                   unsigned Count,
-                                              std::set<unsigned>& PassthruRegs)
-{
+                                             std::set<unsigned>& PassthruRegs) {
   unsigned *DefIndices = State->GetDefIndices();
   std::multimap<unsigned, AggressiveAntiDepState::RegisterReference>&
     RegRefs = State->GetRegRefs();
@@ -439,7 +438,7 @@ void AggressiveAntiDepBreaker::PrescanInstruction(MachineInstr *MI,
 }
 
 void AggressiveAntiDepBreaker::ScanInstruction(MachineInstr *MI,
-                                           unsigned Count) {
+                                               unsigned Count) {
   DEBUG(dbgs() << "\tUse Groups:");
   std::multimap<unsigned, AggressiveAntiDepState::RegisterReference>&
     RegRefs = State->GetRegRefs();
@@ -704,9 +703,9 @@ bool AggressiveAntiDepBreaker::FindSuitableFreeRegisters(
 /// ScheduleDAG and break them by renaming registers.
 ///
 unsigned AggressiveAntiDepBreaker::BreakAntiDependencies(
-                              std::vector<SUnit>& SUnits,
-                              MachineBasicBlock::iterator& Begin,
-                              MachineBasicBlock::iterator& End,
+                              const std::vector<SUnit>& SUnits,
+                              MachineBasicBlock::iterator Begin,
+                              MachineBasicBlock::iterator End,
                               unsigned InsertPosIndex) {
   unsigned *KillIndices = State->GetKillIndices();
   unsigned *DefIndices = State->GetDefIndices();
@@ -721,20 +720,21 @@ unsigned AggressiveAntiDepBreaker::BreakAntiDependencies(
   RenameOrderType RenameOrder;
 
   // ...need a map from MI to SUnit.
-  std::map<MachineInstr *, SUnit *> MISUnitMap;
+  std::map<MachineInstr *, const SUnit *> MISUnitMap;
   for (unsigned i = 0, e = SUnits.size(); i != e; ++i) {
-    SUnit *SU = &SUnits[i];
-    MISUnitMap.insert(std::pair<MachineInstr *, SUnit *>(SU->getInstr(), SU));
+    const SUnit *SU = &SUnits[i];
+    MISUnitMap.insert(std::pair<MachineInstr *, const SUnit *>(SU->getInstr(),
+                                                               SU));
   }
 
   // Track progress along the critical path through the SUnit graph as
   // we walk the instructions. This is needed for regclasses that only
   // break critical-path anti-dependencies.
-  SUnit *CriticalPathSU = 0;
+  const SUnit *CriticalPathSU = 0;
   MachineInstr *CriticalPathMI = 0;
   if (CriticalPathSet.any()) {
     for (unsigned i = 0, e = SUnits.size(); i != e; ++i) {
-      SUnit *SU = &SUnits[i];
+      const SUnit *SU = &SUnits[i];
       if (!CriticalPathSU ||
           ((SU->getDepth() + SU->Latency) >
            (CriticalPathSU->getDepth() + CriticalPathSU->Latency))) {
@@ -775,8 +775,8 @@ unsigned AggressiveAntiDepBreaker::BreakAntiDependencies(
 
     // The dependence edges that represent anti- and output-
     // dependencies that are candidates for breaking.
-    std::vector<SDep*> Edges;
-    SUnit *PathSU = MISUnitMap[MI];
+    std::vector<const SDep *> Edges;
+    const SUnit *PathSU = MISUnitMap[MI];
     AntiDepEdges(PathSU, Edges);
 
     // If MI is not on the critical path, then we don't rename
@@ -794,7 +794,7 @@ unsigned AggressiveAntiDepBreaker::BreakAntiDependencies(
     if (!MI->isKill()) {
       // Attempt to break each anti-dependency...
       for (unsigned i = 0, e = Edges.size(); i != e; ++i) {
-        SDep *Edge = Edges[i];
+        const SDep *Edge = Edges[i];
         SUnit *NextSU = Edge->getSUnit();
 
         if ((Edge->getKind() != SDep::Anti) &&
@@ -838,7 +838,7 @@ unsigned AggressiveAntiDepBreaker::BreakAntiDependencies(
           // Also, if there are dependencies on other SUnits with the
           // same register as the anti-dependency, don't attempt to
           // break it.
-          for (SUnit::pred_iterator P = PathSU->Preds.begin(),
+          for (SUnit::const_pred_iterator P = PathSU->Preds.begin(),
                  PE = PathSU->Preds.end(); P != PE; ++P) {
             if (P->getSUnit() == NextSU ?
                 (P->getKind() != SDep::Anti || P->getReg() != AntiDepReg) :
@@ -847,7 +847,7 @@ unsigned AggressiveAntiDepBreaker::BreakAntiDependencies(
               break;
             }
           }
-          for (SUnit::pred_iterator P = PathSU->Preds.begin(),
+          for (SUnit::const_pred_iterator P = PathSU->Preds.begin(),
                  PE = PathSU->Preds.end(); P != PE; ++P) {
             if ((P->getSUnit() == NextSU) && (P->getKind() != SDep::Anti) &&
                 (P->getKind() != SDep::Output)) {
diff --git a/lib/CodeGen/AggressiveAntiDepBreaker.h b/lib/CodeGen/AggressiveAntiDepBreaker.h
index a62d68c2a834..506d43e7f3fc 100644
--- a/lib/CodeGen/AggressiveAntiDepBreaker.h
+++ b/lib/CodeGen/AggressiveAntiDepBreaker.h
@@ -142,9 +142,9 @@ namespace llvm {
     /// path
     /// of the ScheduleDAG and break them by renaming registers.
     ///
-    unsigned BreakAntiDependencies(std::vector<SUnit>& SUnits,
-                                   MachineBasicBlock::iterator& Begin,
-                                   MachineBasicBlock::iterator& End,
+    unsigned BreakAntiDependencies(const std::vector<SUnit>& SUnits,
+                                   MachineBasicBlock::iterator Begin,
+                                   MachineBasicBlock::iterator End,
                                    unsigned InsertPosIndex);
 
     /// Observe - Update liveness information to account for the current
diff --git a/lib/CodeGen/Analysis.cpp b/lib/CodeGen/Analysis.cpp
new file mode 100644
index 000000000000..f71eee5d01b8
--- /dev/null
+++ b/lib/CodeGen/Analysis.cpp
@@ -0,0 +1,285 @@
+//===-- Analysis.cpp - CodeGen LLVM IR Analysis Utilities --*- C++ ------*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines several CodeGen-specific LLVM IR analysis utilties.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/Analysis.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/Function.h"
+#include "llvm/Instructions.h"
+#include "llvm/IntrinsicInst.h"
+#include "llvm/LLVMContext.h"
+#include "llvm/Module.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetLowering.h"
+#include "llvm/Target/TargetOptions.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/MathExtras.h"
+using namespace llvm;
+
+/// ComputeLinearIndex - Given an LLVM IR aggregate type and a sequence
+/// of insertvalue or extractvalue indices that identify a member, return
+/// the linearized index of the start of the member.
+///
+unsigned llvm::ComputeLinearIndex(const TargetLowering &TLI, const Type *Ty,
+                                  const unsigned *Indices,
+                                  const unsigned *IndicesEnd,
+                                  unsigned CurIndex) {
+  // Base case: We're done.
+  if (Indices && Indices == IndicesEnd)
+    return CurIndex;
+
+  // Given a struct type, recursively traverse the elements.
+  if (const StructType *STy = dyn_cast<StructType>(Ty)) {
+    for (StructType::element_iterator EB = STy->element_begin(),
+                                      EI = EB,
+                                      EE = STy->element_end();
+        EI != EE; ++EI) {
+      if (Indices && *Indices == unsigned(EI - EB))
+        return ComputeLinearIndex(TLI, *EI, Indices+1, IndicesEnd, CurIndex);
+      CurIndex = ComputeLinearIndex(TLI, *EI, 0, 0, CurIndex);
+    }
+    return CurIndex;
+  }
+  // Given an array type, recursively traverse the elements.
+  else if (const ArrayType *ATy = dyn_cast<ArrayType>(Ty)) {
+    const Type *EltTy = ATy->getElementType();
+    for (unsigned i = 0, e = ATy->getNumElements(); i != e; ++i) {
+      if (Indices && *Indices == i)
+        return ComputeLinearIndex(TLI, EltTy, Indices+1, IndicesEnd, CurIndex);
+      CurIndex = ComputeLinearIndex(TLI, EltTy, 0, 0, CurIndex);
+    }
+    return CurIndex;
+  }
+  // We haven't found the type we're looking for, so keep searching.
+  return CurIndex + 1;
+}
+
+/// ComputeValueVTs - Given an LLVM IR type, compute a sequence of
+/// EVTs that represent all the individual underlying
+/// non-aggregate types that comprise it.
+///
+/// If Offsets is non-null, it points to a vector to be filled in
+/// with the in-memory offsets of each of the individual values.
+///
+void llvm::ComputeValueVTs(const TargetLowering &TLI, const Type *Ty,
+                           SmallVectorImpl<EVT> &ValueVTs,
+                           SmallVectorImpl<uint64_t> *Offsets,
+                           uint64_t StartingOffset) {
+  // Given a struct type, recursively traverse the elements.
+  if (const StructType *STy = dyn_cast<StructType>(Ty)) {
+    const StructLayout *SL = TLI.getTargetData()->getStructLayout(STy);
+    for (StructType::element_iterator EB = STy->element_begin(),
+                                      EI = EB,
+                                      EE = STy->element_end();
+         EI != EE; ++EI)
+      ComputeValueVTs(TLI, *EI, ValueVTs, Offsets,
+                      StartingOffset + SL->getElementOffset(EI - EB));
+    return;
+  }
+  // Given an array type, recursively traverse the elements.
+  if (const ArrayType *ATy = dyn_cast<ArrayType>(Ty)) {
+    const Type *EltTy = ATy->getElementType();
+    uint64_t EltSize = TLI.getTargetData()->getTypeAllocSize(EltTy);
+    for (unsigned i = 0, e = ATy->getNumElements(); i != e; ++i)
+      ComputeValueVTs(TLI, EltTy, ValueVTs, Offsets,
+                      StartingOffset + i * EltSize);
+    return;
+  }
+  // Interpret void as zero return values.
+  if (Ty->isVoidTy())
+    return;
+  // Base case: we can get an EVT for this LLVM IR type.
+  ValueVTs.push_back(TLI.getValueType(Ty));
+  if (Offsets)
+    Offsets->push_back(StartingOffset);
+}
+
+/// ExtractTypeInfo - Returns the type info, possibly bitcast, encoded in V.
+GlobalVariable *llvm::ExtractTypeInfo(Value *V) {
+  V = V->stripPointerCasts();
+  GlobalVariable *GV = dyn_cast<GlobalVariable>(V);
+
+  if (GV && GV->getName() == ".llvm.eh.catch.all.value") {
+    assert(GV->hasInitializer() &&
+           "The EH catch-all value must have an initializer");
+    Value *Init = GV->getInitializer();
+    GV = dyn_cast<GlobalVariable>(Init);
+    if (!GV) V = cast<ConstantPointerNull>(Init);
+  }
+
+  assert((GV || isa<ConstantPointerNull>(V)) &&
+         "TypeInfo must be a global variable or NULL");
+  return GV;
+}
+
+/// hasInlineAsmMemConstraint - Return true if the inline asm instruction being
+/// processed uses a memory 'm' constraint.
+bool
+llvm::hasInlineAsmMemConstraint(std::vector<InlineAsm::ConstraintInfo> &CInfos,
+                                const TargetLowering &TLI) {
+  for (unsigned i = 0, e = CInfos.size(); i != e; ++i) {
+    InlineAsm::ConstraintInfo &CI = CInfos[i];
+    for (unsigned j = 0, ee = CI.Codes.size(); j != ee; ++j) {
+      TargetLowering::ConstraintType CType = TLI.getConstraintType(CI.Codes[j]);
+      if (CType == TargetLowering::C_Memory)
+        return true;
+    }
+
+    // Indirect operand accesses access memory.
+    if (CI.isIndirect)
+      return true;
+  }
+
+  return false;
+}
+
+/// getFCmpCondCode - Return the ISD condition code corresponding to
+/// the given LLVM IR floating-point condition code.  This includes
+/// consideration of global floating-point math flags.
+///
+ISD::CondCode llvm::getFCmpCondCode(FCmpInst::Predicate Pred) {
+  ISD::CondCode FPC, FOC;
+  switch (Pred) {
+  case FCmpInst::FCMP_FALSE: FOC = FPC = ISD::SETFALSE; break;
+  case FCmpInst::FCMP_OEQ:   FOC = ISD::SETEQ; FPC = ISD::SETOEQ; break;
+  case FCmpInst::FCMP_OGT:   FOC = ISD::SETGT; FPC = ISD::SETOGT; break;
+  case FCmpInst::FCMP_OGE:   FOC = ISD::SETGE; FPC = ISD::SETOGE; break;
+  case FCmpInst::FCMP_OLT:   FOC = ISD::SETLT; FPC = ISD::SETOLT; break;
+  case FCmpInst::FCMP_OLE:   FOC = ISD::SETLE; FPC = ISD::SETOLE; break;
+  case FCmpInst::FCMP_ONE:   FOC = ISD::SETNE; FPC = ISD::SETONE; break;
+  case FCmpInst::FCMP_ORD:   FOC = FPC = ISD::SETO;   break;
+  case FCmpInst::FCMP_UNO:   FOC = FPC = ISD::SETUO;  break;
+  case FCmpInst::FCMP_UEQ:   FOC = ISD::SETEQ; FPC = ISD::SETUEQ; break;
+  case FCmpInst::FCMP_UGT:   FOC = ISD::SETGT; FPC = ISD::SETUGT; break;
+  case FCmpInst::FCMP_UGE:   FOC = ISD::SETGE; FPC = ISD::SETUGE; break;
+  case FCmpInst::FCMP_ULT:   FOC = ISD::SETLT; FPC = ISD::SETULT; break;
+  case FCmpInst::FCMP_ULE:   FOC = ISD::SETLE; FPC = ISD::SETULE; break;
+  case FCmpInst::FCMP_UNE:   FOC = ISD::SETNE; FPC = ISD::SETUNE; break;
+  case FCmpInst::FCMP_TRUE:  FOC = FPC = ISD::SETTRUE; break;
+  default:
+    llvm_unreachable("Invalid FCmp predicate opcode!");
+    FOC = FPC = ISD::SETFALSE;
+    break;
+  }
+  if (FiniteOnlyFPMath())
+    return FOC;
+  else
+    return FPC;
+}
+
+/// getICmpCondCode - Return the ISD condition code corresponding to
+/// the given LLVM IR integer condition code.
+///
+ISD::CondCode llvm::getICmpCondCode(ICmpInst::Predicate Pred) {
+  switch (Pred) {
+  case ICmpInst::ICMP_EQ:  return ISD::SETEQ;
+  case ICmpInst::ICMP_NE:  return ISD::SETNE;
+  case ICmpInst::ICMP_SLE: return ISD::SETLE;
+  case ICmpInst::ICMP_ULE: return ISD::SETULE;
+  case ICmpInst::ICMP_SGE: return ISD::SETGE;
+  case ICmpInst::ICMP_UGE: return ISD::SETUGE;
+  case ICmpInst::ICMP_SLT: return ISD::SETLT;
+  case ICmpInst::ICMP_ULT: return ISD::SETULT;
+  case ICmpInst::ICMP_SGT: return ISD::SETGT;
+  case ICmpInst::ICMP_UGT: return ISD::SETUGT;
+  default:
+    llvm_unreachable("Invalid ICmp predicate opcode!");
+    return ISD::SETNE;
+  }
+}
+
+/// Test if the given instruction is in a position to be optimized
+/// with a tail-call. This roughly means that it's in a block with
+/// a return and there's nothing that needs to be scheduled
+/// between it and the return.
+///
+/// This function only tests target-independent requirements.
+bool llvm::isInTailCallPosition(ImmutableCallSite CS, Attributes CalleeRetAttr,
+                                const TargetLowering &TLI) {
+  const Instruction *I = CS.getInstruction();
+  const BasicBlock *ExitBB = I->getParent();
+  const TerminatorInst *Term = ExitBB->getTerminator();
+  const ReturnInst *Ret = dyn_cast<ReturnInst>(Term);
+  const Function *F = ExitBB->getParent();
+
+  // The block must end in a return statement or unreachable.
+  //
+  // FIXME: Decline tailcall if it's not guaranteed and if the block ends in
+  // an unreachable, for now. The way tailcall optimization is currently
+  // implemented means it will add an epilogue followed by a jump. That is
+  // not profitable. Also, if the callee is a special function (e.g.
+  // longjmp on x86), it can end up causing miscompilation that has not
+  // been fully understood.
+  if (!Ret &&
+      (!GuaranteedTailCallOpt || !isa<UnreachableInst>(Term))) return false;
+
+  // If I will have a chain, make sure no other instruction that will have a
+  // chain interposes between I and the return.
+  if (I->mayHaveSideEffects() || I->mayReadFromMemory() ||
+      !I->isSafeToSpeculativelyExecute())
+    for (BasicBlock::const_iterator BBI = prior(prior(ExitBB->end())); ;
+         --BBI) {
+      if (&*BBI == I)
+        break;
+      // Debug info intrinsics do not get in the way of tail call optimization.
+      if (isa<DbgInfoIntrinsic>(BBI))
+        continue;
+      if (BBI->mayHaveSideEffects() || BBI->mayReadFromMemory() ||
+          !BBI->isSafeToSpeculativelyExecute())
+        return false;
+    }
+
+  // If the block ends with a void return or unreachable, it doesn't matter
+  // what the call's return type is.
+  if (!Ret || Ret->getNumOperands() == 0) return true;
+
+  // If the return value is undef, it doesn't matter what the call's
+  // return type is.
+  if (isa<UndefValue>(Ret->getOperand(0))) return true;
+
+  // Conservatively require the attributes of the call to match those of
+  // the return. Ignore noalias because it doesn't affect the call sequence.
+  unsigned CallerRetAttr = F->getAttributes().getRetAttributes();
+  if ((CalleeRetAttr ^ CallerRetAttr) & ~Attribute::NoAlias)
+    return false;
+
+  // It's not safe to eliminate the sign / zero extension of the return value.
+  if ((CallerRetAttr & Attribute::ZExt) || (CallerRetAttr & Attribute::SExt))
+    return false;
+
+  // Otherwise, make sure the unmodified return value of I is the return value.
+  for (const Instruction *U = dyn_cast<Instruction>(Ret->getOperand(0)); ;
+       U = dyn_cast<Instruction>(U->getOperand(0))) {
+    if (!U)
+      return false;
+    if (!U->hasOneUse())
+      return false;
+    if (U == I)
+      break;
+    // Check for a truly no-op truncate.
+    if (isa<TruncInst>(U) &&
+        TLI.isTruncateFree(U->getOperand(0)->getType(), U->getType()))
+      continue;
+    // Check for a truly no-op bitcast.
+    if (isa<BitCastInst>(U) &&
+        (U->getOperand(0)->getType() == U->getType() ||
+         (U->getOperand(0)->getType()->isPointerTy() &&
+          U->getType()->isPointerTy())))
+      continue;
+    // Otherwise it's not a true no-op.
+    return false;
+  }
+
+  return true;
+}
+
diff --git a/lib/CodeGen/AntiDepBreaker.h b/lib/CodeGen/AntiDepBreaker.h
index 3ee30c6a18e3..086b75795638 100644
--- a/lib/CodeGen/AntiDepBreaker.h
+++ b/lib/CodeGen/AntiDepBreaker.h
@@ -39,9 +39,9 @@ public:
   /// basic-block region and break them by renaming registers. Return
   /// the number of anti-dependencies broken.
   ///
-  virtual unsigned BreakAntiDependencies(std::vector<SUnit>& SUnits,
-                                MachineBasicBlock::iterator& Begin,
-                                MachineBasicBlock::iterator& End,
+  virtual unsigned BreakAntiDependencies(const std::vector<SUnit>& SUnits,
+                                MachineBasicBlock::iterator Begin,
+                                MachineBasicBlock::iterator End,
                                 unsigned InsertPosIndex) =0;
   
   /// Observe - Update liveness information to account for the current
diff --git a/lib/CodeGen/AsmPrinter/AsmPrinter.cpp b/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
index c86e2411d303..ded4b3f18bbc 100644
--- a/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
+++ b/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
@@ -42,8 +42,13 @@
 #include "llvm/ADT/Statistic.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/Format.h"
+#include "llvm/Support/Timer.h"
 using namespace llvm;
 
+static const char *DWARFGroupName = "DWARF Emission";
+static const char *DbgTimerName = "DWARF Debug Writer";
+static const char *EHTimerName = "DWARF Exception Writer";
+
 STATISTIC(EmittedInsts, "Number of machine instrs printed");
 
 char AsmPrinter::ID = 0;
@@ -56,6 +61,35 @@ static gcp_map_type &getGCMap(void *&P) {
 }
 
 
+/// getGVAlignmentLog2 - Return the alignment to use for the specified global
+/// value in log2 form.  This rounds up to the preferred alignment if possible
+/// and legal.
+static unsigned getGVAlignmentLog2(const GlobalValue *GV, const TargetData &TD,
+                                   unsigned InBits = 0) {
+  unsigned NumBits = 0;
+  if (const GlobalVariable *GVar = dyn_cast<GlobalVariable>(GV))
+    NumBits = TD.getPreferredAlignmentLog(GVar);
+  
+  // If InBits is specified, round it to it.
+  if (InBits > NumBits)
+    NumBits = InBits;
+  
+  // If the GV has a specified alignment, take it into account.
+  if (GV->getAlignment() == 0)
+    return NumBits;
+  
+  unsigned GVAlign = Log2_32(GV->getAlignment());
+  
+  // If the GVAlign is larger than NumBits, or if we are required to obey
+  // NumBits because the GV has an assigned section, obey it.
+  if (GVAlign > NumBits || GV->hasSection())
+    NumBits = GVAlign;
+  return NumBits;
+}
+
+
+
+
 AsmPrinter::AsmPrinter(TargetMachine &tm, MCStreamer &Streamer)
   : MachineFunctionPass(&ID),
     TM(tm), MAI(tm.getMCAsmInfo()),
@@ -88,7 +122,7 @@ unsigned AsmPrinter::getFunctionNumber() const {
   return MF->getFunctionNumber();
 }
 
-TargetLoweringObjectFile &AsmPrinter::getObjFileLowering() const {
+const TargetLoweringObjectFile &AsmPrinter::getObjFileLowering() const {
   return TM.getTargetLowering()->getObjFileLowering();
 }
 
@@ -222,8 +256,12 @@ void AsmPrinter::EmitGlobalVariable(const GlobalVariable *GV) {
   SectionKind GVKind = TargetLoweringObjectFile::getKindForGlobal(GV, TM);
 
   const TargetData *TD = TM.getTargetData();
-  unsigned Size = TD->getTypeAllocSize(GV->getType()->getElementType());
-  unsigned AlignLog = TD->getPreferredAlignmentLog(GV);
+  uint64_t Size = TD->getTypeAllocSize(GV->getType()->getElementType());
+  
+  // If the alignment is specified, we *must* obey it.  Overaligning a global
+  // with a specified alignment is a prompt way to break globals emitted to
+  // sections and expected to be contiguous (e.g. ObjC metadata).
+  unsigned AlignLog = getGVAlignmentLog2(GV, *TD);
   
   // Handle common and BSS local symbols (.lcomm).
   if (GVKind.isCommon() || GVKind.isBSSLocal()) {
@@ -270,6 +308,8 @@ void AsmPrinter::EmitGlobalVariable(const GlobalVariable *GV) {
   // Handle the zerofill directive on darwin, which is a special form of BSS
   // emission.
   if (GVKind.isBSSExtern() && MAI->hasMachoZeroFillDirective()) {
+    if (Size == 0) Size = 1;  // zerofill of 0 bytes is undefined.
+    
     // .globl _foo
     OutStreamer.EmitSymbolAttribute(GVSym, MCSA_Global);
     // .zerofill __DATA, __common, _foo, 400, 5
@@ -347,8 +387,22 @@ void AsmPrinter::EmitFunctionHeader() {
   }
   
   // Emit pre-function debug and/or EH information.
-  if (DE) DE->BeginFunction(MF);
-  if (DD) DD->beginFunction(MF);
+  if (DE) {
+    if (TimePassesIsEnabled) {
+      NamedRegionTimer T(EHTimerName, DWARFGroupName);
+      DE->BeginFunction(MF);
+    } else {
+      DE->BeginFunction(MF);
+    }
+  }
+  if (DD) {
+    if (TimePassesIsEnabled) {
+      NamedRegionTimer T(DbgTimerName, DWARFGroupName);
+      DD->beginFunction(MF);
+    } else {
+      DD->beginFunction(MF);
+    }
+  }
 }
 
 /// EmitFunctionEntryLabel - Emit the label that is the entrypoint for the
@@ -434,7 +488,58 @@ static void EmitKill(const MachineInstr *MI, AsmPrinter &AP) {
   AP.OutStreamer.AddBlankLine();
 }
 
+/// EmitDebugValueComment - This method handles the target-independent form
+/// of DBG_VALUE, returning true if it was able to do so.  A false return
+/// means the target will need to handle MI in EmitInstruction.
+static bool EmitDebugValueComment(const MachineInstr *MI, AsmPrinter &AP) {
+  // This code handles only the 3-operand target-independent form.
+  if (MI->getNumOperands() != 3)
+    return false;
 
+  SmallString<128> Str;
+  raw_svector_ostream OS(Str);
+  OS << '\t' << AP.MAI->getCommentString() << "DEBUG_VALUE: ";
+
+  // cast away const; DIetc do not take const operands for some reason.
+  DIVariable V(const_cast<MDNode*>(MI->getOperand(2).getMetadata()));
+  if (V.getContext().isSubprogram())
+    OS << DISubprogram(V.getContext().getNode()).getDisplayName() << ":";
+  OS << V.getName() << " <- ";
+
+  // Register or immediate value. Register 0 means undef.
+  if (MI->getOperand(0).isFPImm()) {
+    APFloat APF = APFloat(MI->getOperand(0).getFPImm()->getValueAPF());
+    if (MI->getOperand(0).getFPImm()->getType()->isFloatTy()) {
+      OS << (double)APF.convertToFloat();
+    } else if (MI->getOperand(0).getFPImm()->getType()->isDoubleTy()) {
+      OS << APF.convertToDouble();
+    } else {
+      // There is no good way to print long double.  Convert a copy to
+      // double.  Ah well, it's only a comment.
+      bool ignored;
+      APF.convert(APFloat::IEEEdouble, APFloat::rmNearestTiesToEven,
+                  &ignored);
+      OS << "(long double) " << APF.convertToDouble();
+    }
+  } else if (MI->getOperand(0).isImm()) {
+    OS << MI->getOperand(0).getImm();
+  } else {
+    assert(MI->getOperand(0).isReg() && "Unknown operand type");
+    if (MI->getOperand(0).getReg() == 0) {
+      // Suppress offset, it is not meaningful here.
+      OS << "undef";
+      // NOTE: Want this comment at start of line, don't emit with AddComment.
+      AP.OutStreamer.EmitRawText(OS.str());
+      return true;
+    }
+    OS << AP.TM.getRegisterInfo()->getName(MI->getOperand(0).getReg());
+  }
+  
+  OS << '+' << MI->getOperand(1).getImm();
+  // NOTE: Want this comment at start of line, don't emit with AddComment.
+  AP.OutStreamer.EmitRawText(OS.str());
+  return true;
+}
 
 /// EmitFunctionBody - This method emits the body and trailer for a
 /// function.
@@ -453,13 +558,20 @@ void AsmPrinter::EmitFunctionBody() {
     for (MachineBasicBlock::const_iterator II = I->begin(), IE = I->end();
          II != IE; ++II) {
       // Print the assembly for the instruction.
-      if (!II->isLabel())
+      if (!II->isLabel() && !II->isImplicitDef() && !II->isKill() &&
+          !II->isDebugValue()) {
         HasAnyRealCode = true;
-      
-      ++EmittedInsts;
-      
-      if (ShouldPrintDebugScopes)
-        DD->beginScope(II);
+        ++EmittedInsts;
+      }
+
+      if (ShouldPrintDebugScopes) {
+	if (TimePassesIsEnabled) {
+	  NamedRegionTimer T(DbgTimerName, DWARFGroupName);
+	  DD->beginScope(II);
+	} else {
+	  DD->beginScope(II);
+	}
+      }
       
       if (isVerbose())
         EmitComments(*II, OutStreamer.GetCommentOS());
@@ -473,6 +585,12 @@ void AsmPrinter::EmitFunctionBody() {
       case TargetOpcode::INLINEASM:
         EmitInlineAsm(II);
         break;
+      case TargetOpcode::DBG_VALUE:
+        if (isVerbose()) {
+          if (!EmitDebugValueComment(II, *this))
+            EmitInstruction(II);
+        }
+        break;
       case TargetOpcode::IMPLICIT_DEF:
         if (isVerbose()) EmitImplicitDef(II, *this);
         break;
@@ -484,16 +602,29 @@ void AsmPrinter::EmitFunctionBody() {
         break;
       }
       
-      if (ShouldPrintDebugScopes)
-        DD->endScope(II);
+      if (ShouldPrintDebugScopes) {
+	if (TimePassesIsEnabled) {
+	  NamedRegionTimer T(DbgTimerName, DWARFGroupName);
+	  DD->endScope(II);
+	} else {
+	  DD->endScope(II);
+	}
+      }
     }
   }
   
   // If the function is empty and the object file uses .subsections_via_symbols,
   // then we need to emit *something* to the function body to prevent the
-  // labels from collapsing together.  Just emit a 0 byte.
-  if (MAI->hasSubsectionsViaSymbols() && !HasAnyRealCode)
-    OutStreamer.EmitIntValue(0, 1, 0/*addrspace*/);
+  // labels from collapsing together.  Just emit a noop.
+  if (MAI->hasSubsectionsViaSymbols() && !HasAnyRealCode) {
+    MCInst Noop;
+    TM.getInstrInfo()->getNoopForMachoTarget(Noop);
+    if (Noop.getOpcode()) {
+      OutStreamer.AddComment("avoids zero-length function");
+      OutStreamer.EmitInstruction(Noop);
+    } else  // Target not mc-ized yet.
+      OutStreamer.EmitRawText(StringRef("\tnop\n"));
+  }
   
   // Emit target-specific gunk after the function body.
   EmitFunctionBodyEnd();
@@ -514,8 +645,22 @@ void AsmPrinter::EmitFunctionBody() {
   }
   
   // Emit post-function debug information.
-  if (DD) DD->endFunction(MF);
-  if (DE) DE->EndFunction();
+  if (DD) {
+    if (TimePassesIsEnabled) {
+      NamedRegionTimer T(DbgTimerName, DWARFGroupName);
+      DD->endFunction(MF);
+    } else {
+      DD->endFunction(MF);
+    }
+  }
+  if (DE) {
+    if (TimePassesIsEnabled) {
+      NamedRegionTimer T(EHTimerName, DWARFGroupName);
+      DE->EndFunction();
+    } else {
+      DE->EndFunction();
+    }
+  }
   MMI->EndFunction();
   
   // Print out jump tables referenced by the function.
@@ -524,6 +669,12 @@ void AsmPrinter::EmitFunctionBody() {
   OutStreamer.AddBlankLine();
 }
 
+/// getDebugValueLocation - Get location information encoded by DBG_VALUE
+/// operands.
+MachineLocation AsmPrinter::getDebugValueLocation(const MachineInstr *MI) const {
+  // Target specific DBG_VALUE instructions are handled by each target.
+  return MachineLocation();
+}
 
 bool AsmPrinter::doFinalization(Module &M) {
   // Emit global variables.
@@ -533,11 +684,21 @@ bool AsmPrinter::doFinalization(Module &M) {
   
   // Finalize debug and EH information.
   if (DE) {
-    DE->EndModule();
+    if (TimePassesIsEnabled) {
+      NamedRegionTimer T(EHTimerName, DWARFGroupName);
+      DE->EndModule();
+    } else {
+      DE->EndModule();
+    }
     delete DE; DE = 0;
   }
   if (DD) {
-    DD->endModule();
+    if (TimePassesIsEnabled) {
+      NamedRegionTimer T(DbgTimerName, DWARFGroupName);
+      DD->endModule();
+    } else {
+      DD->endModule();
+    }
     delete DD; DD = 0;
   }
   
@@ -595,7 +756,7 @@ bool AsmPrinter::doFinalization(Module &M) {
   // to be executable. Some targets have a directive to declare this.
   Function *InitTrampolineIntrinsic = M.getFunction("llvm.init.trampoline");
   if (!InitTrampolineIntrinsic || InitTrampolineIntrinsic->use_empty())
-    if (MCSection *S = MAI->getNonexecutableStackSection(OutContext))
+    if (const MCSection *S = MAI->getNonexecutableStackSection(OutContext))
       OutStreamer.SwitchSection(S);
   
   // Allow the target to emit any magic that it wants at the end of the file,
@@ -877,7 +1038,7 @@ bool AsmPrinter::EmitSpecialLLVMGlobal(const GlobalVariable *GV) {
   unsigned Align = Log2_32(TD->getPointerPrefAlignment());
   if (GV->getName() == "llvm.global_ctors") {
     OutStreamer.SwitchSection(getObjFileLowering().getStaticCtorSection());
-    EmitAlignment(Align, 0);
+    EmitAlignment(Align);
     EmitXXStructorList(GV->getInitializer());
     
     if (TM.getRelocationModel() == Reloc::Static &&
@@ -891,7 +1052,7 @@ bool AsmPrinter::EmitSpecialLLVMGlobal(const GlobalVariable *GV) {
   
   if (GV->getName() == "llvm.global_dtors") {
     OutStreamer.SwitchSection(getObjFileLowering().getStaticDtorSection());
-    EmitAlignment(Align, 0);
+    EmitAlignment(Align);
     EmitXXStructorList(GV->getInitializer());
 
     if (TM.getRelocationModel() == Reloc::Static &&
@@ -984,30 +1145,49 @@ void AsmPrinter::EmitLabelDifference(const MCSymbol *Hi, const MCSymbol *Lo,
   OutStreamer.EmitSymbolValue(SetLabel, Size, 0/*AddrSpace*/);
 }
 
+/// EmitLabelOffsetDifference - Emit something like ".long Hi+Offset-Lo" 
+/// where the size in bytes of the directive is specified by Size and Hi/Lo
+/// specify the labels.  This implicitly uses .set if it is available.
+void AsmPrinter::EmitLabelOffsetDifference(const MCSymbol *Hi, uint64_t Offset,
+                                           const MCSymbol *Lo, unsigned Size) 
+  const {
+  
+  // Emit Hi+Offset - Lo
+  // Get the Hi+Offset expression.
+  const MCExpr *Plus =
+    MCBinaryExpr::CreateAdd(MCSymbolRefExpr::Create(Hi, OutContext), 
+                            MCConstantExpr::Create(Offset, OutContext),
+                            OutContext);
+  
+  // Get the Hi+Offset-Lo expression.
+  const MCExpr *Diff = 
+    MCBinaryExpr::CreateSub(Plus,
+                            MCSymbolRefExpr::Create(Lo, OutContext),
+                            OutContext);
+  
+  if (!MAI->hasSetDirective()) 
+    OutStreamer.EmitValue(Diff, 4, 0/*AddrSpace*/);
+  else {
+    // Otherwise, emit with .set (aka assignment).
+    MCSymbol *SetLabel = GetTempSymbol("set", SetCounter++);
+    OutStreamer.EmitAssignment(SetLabel, Diff);
+    OutStreamer.EmitSymbolValue(SetLabel, 4, 0/*AddrSpace*/);
+  }
+}
+    
 
 //===----------------------------------------------------------------------===//
 
 // EmitAlignment - Emit an alignment directive to the specified power of
 // two boundary.  For example, if you pass in 3 here, you will get an 8
 // byte alignment.  If a global value is specified, and if that global has
-// an explicit alignment requested, it will unconditionally override the
-// alignment request.  However, if ForcedAlignBits is specified, this value
-// has final say: the ultimate alignment will be the max of ForcedAlignBits
-// and the alignment computed with NumBits and the global.
+// an explicit alignment requested, it will override the alignment request
+// if required for correctness.
 //
-// The algorithm is:
-//     Align = NumBits;
-//     if (GV && GV->hasalignment) Align = GV->getalignment();
-//     Align = std::max(Align, ForcedAlignBits);
-//
-void AsmPrinter::EmitAlignment(unsigned NumBits, const GlobalValue *GV,
-                               unsigned ForcedAlignBits,
-                               bool UseFillExpr) const {
-  if (GV && GV->getAlignment())
-    NumBits = Log2_32(GV->getAlignment());
-  NumBits = std::max(NumBits, ForcedAlignBits);
+void AsmPrinter::EmitAlignment(unsigned NumBits, const GlobalValue *GV) const {
+  if (GV) NumBits = getGVAlignmentLog2(GV, *TM.getTargetData(), NumBits);
   
-  if (NumBits == 0) return;   // No need to emit alignment.
+  if (NumBits == 0) return;   // 1-byte aligned: no need to emit alignment.
   
   if (getCurrentSection()->getKind().isText())
     OutStreamer.EmitCodeAlignment(1 << NumBits);
@@ -1015,6 +1195,10 @@ void AsmPrinter::EmitAlignment(unsigned NumBits, const GlobalValue *GV,
     OutStreamer.EmitValueToAlignment(1 << NumBits, 0, 1, 0);
 }
 
+//===----------------------------------------------------------------------===//
+// Constant emission.
+//===----------------------------------------------------------------------===//
+
 /// LowerConstant - Lower the specified LLVM Constant to an MCExpr.
 ///
 static const MCExpr *LowerConstant(const Constant *CV, AsmPrinter &AP) {
@@ -1142,12 +1326,15 @@ static const MCExpr *LowerConstant(const Constant *CV, AsmPrinter &AP) {
   }
 }
 
+static void EmitGlobalConstantImpl(const Constant *C, unsigned AddrSpace,
+                                   AsmPrinter &AP);
+
 static void EmitGlobalConstantArray(const ConstantArray *CA, unsigned AddrSpace,
                                     AsmPrinter &AP) {
   if (AddrSpace != 0 || !CA->isString()) {
     // Not a string.  Print the values in successive locations
     for (unsigned i = 0, e = CA->getNumOperands(); i != e; ++i)
-      AP.EmitGlobalConstant(CA->getOperand(i), AddrSpace);
+      EmitGlobalConstantImpl(CA->getOperand(i), AddrSpace, AP);
     return;
   }
   
@@ -1163,7 +1350,7 @@ static void EmitGlobalConstantArray(const ConstantArray *CA, unsigned AddrSpace,
 static void EmitGlobalConstantVector(const ConstantVector *CV,
                                      unsigned AddrSpace, AsmPrinter &AP) {
   for (unsigned i = 0, e = CV->getType()->getNumElements(); i != e; ++i)
-    AP.EmitGlobalConstant(CV->getOperand(i), AddrSpace);
+    EmitGlobalConstantImpl(CV->getOperand(i), AddrSpace, AP);
 }
 
 static void EmitGlobalConstantStruct(const ConstantStruct *CS,
@@ -1183,7 +1370,7 @@ static void EmitGlobalConstantStruct(const ConstantStruct *CS,
     SizeSoFar += FieldSize + PadSize;
 
     // Now print the actual field value.
-    AP.EmitGlobalConstant(Field, AddrSpace);
+    EmitGlobalConstantImpl(Field, AddrSpace, AP);
 
     // Insert padding - this may include padding to increase the size of the
     // current field up to the ABI size (if the struct is not packed) as well
@@ -1203,7 +1390,7 @@ static void EmitGlobalConstantUnion(const ConstantUnion *CU,
   unsigned FilledSize = TD->getTypeAllocSize(Contents->getType());
     
   // Print the actually filled part
-  AP.EmitGlobalConstant(Contents, AddrSpace);
+  EmitGlobalConstantImpl(Contents, AddrSpace, AP);
 
   // And pad with enough zeroes
   AP.OutStreamer.EmitZeros(Size-FilledSize, AddrSpace);
@@ -1236,7 +1423,7 @@ static void EmitGlobalConstantFP(const ConstantFP *CFP, unsigned AddrSpace,
   
   if (CFP->getType()->isX86_FP80Ty()) {
     // all long double variants are printed as hex
-    // api needed to prevent premature destruction
+    // API needed to prevent premature destruction
     APInt API = CFP->getValueAPF().bitcastToAPInt();
     const uint64_t *p = API.getRawData();
     if (AP.isVerbose()) {
@@ -1266,8 +1453,8 @@ static void EmitGlobalConstantFP(const ConstantFP *CFP, unsigned AddrSpace,
   
   assert(CFP->getType()->isPPC_FP128Ty() &&
          "Floating point constant type not handled");
-  // All long double variants are printed as hex api needed to prevent
-  // premature destruction.
+  // All long double variants are printed as hex
+  // API needed to prevent premature destruction.
   APInt API = CFP->getValueAPF().bitcastToAPInt();
   const uint64_t *p = API.getRawData();
   if (AP.TM.getTargetData()->isBigEndian()) {
@@ -1295,57 +1482,68 @@ static void EmitGlobalConstantLargeInt(const ConstantInt *CI,
   }
 }
 
-/// EmitGlobalConstant - Print a general LLVM constant to the .s file.
-void AsmPrinter::EmitGlobalConstant(const Constant *CV, unsigned AddrSpace) {
+static void EmitGlobalConstantImpl(const Constant *CV, unsigned AddrSpace,
+                                   AsmPrinter &AP) {
   if (isa<ConstantAggregateZero>(CV) || isa<UndefValue>(CV)) {
-    uint64_t Size = TM.getTargetData()->getTypeAllocSize(CV->getType());
-    if (Size == 0) Size = 1; // An empty "_foo:" followed by a section is undef.
-    return OutStreamer.EmitZeros(Size, AddrSpace);
+    uint64_t Size = AP.TM.getTargetData()->getTypeAllocSize(CV->getType());
+    return AP.OutStreamer.EmitZeros(Size, AddrSpace);
   }
 
   if (const ConstantInt *CI = dyn_cast<ConstantInt>(CV)) {
-    unsigned Size = TM.getTargetData()->getTypeAllocSize(CV->getType());
+    unsigned Size = AP.TM.getTargetData()->getTypeAllocSize(CV->getType());
     switch (Size) {
     case 1:
     case 2:
     case 4:
     case 8:
-      if (isVerbose())
-        OutStreamer.GetCommentOS() << format("0x%llx\n", CI->getZExtValue());
-      OutStreamer.EmitIntValue(CI->getZExtValue(), Size, AddrSpace);
+      if (AP.isVerbose())
+        AP.OutStreamer.GetCommentOS() << format("0x%llx\n", CI->getZExtValue());
+        AP.OutStreamer.EmitIntValue(CI->getZExtValue(), Size, AddrSpace);
       return;
     default:
-      EmitGlobalConstantLargeInt(CI, AddrSpace, *this);
+      EmitGlobalConstantLargeInt(CI, AddrSpace, AP);
       return;
     }
   }
   
   if (const ConstantArray *CVA = dyn_cast<ConstantArray>(CV))
-    return EmitGlobalConstantArray(CVA, AddrSpace, *this);
+    return EmitGlobalConstantArray(CVA, AddrSpace, AP);
   
   if (const ConstantStruct *CVS = dyn_cast<ConstantStruct>(CV))
-    return EmitGlobalConstantStruct(CVS, AddrSpace, *this);
+    return EmitGlobalConstantStruct(CVS, AddrSpace, AP);
 
   if (const ConstantFP *CFP = dyn_cast<ConstantFP>(CV))
-    return EmitGlobalConstantFP(CFP, AddrSpace, *this);
+    return EmitGlobalConstantFP(CFP, AddrSpace, AP);
 
   if (isa<ConstantPointerNull>(CV)) {
-    unsigned Size = TM.getTargetData()->getTypeAllocSize(CV->getType());
-    OutStreamer.EmitIntValue(0, Size, AddrSpace);
+    unsigned Size = AP.TM.getTargetData()->getTypeAllocSize(CV->getType());
+    AP.OutStreamer.EmitIntValue(0, Size, AddrSpace);
     return;
   }
   
   if (const ConstantUnion *CVU = dyn_cast<ConstantUnion>(CV))
-    return EmitGlobalConstantUnion(CVU, AddrSpace, *this);
+    return EmitGlobalConstantUnion(CVU, AddrSpace, AP);
   
   if (const ConstantVector *V = dyn_cast<ConstantVector>(CV))
-    return EmitGlobalConstantVector(V, AddrSpace, *this);
+    return EmitGlobalConstantVector(V, AddrSpace, AP);
   
   // Otherwise, it must be a ConstantExpr.  Lower it to an MCExpr, then emit it
   // thread the streamer with EmitValue.
-  OutStreamer.EmitValue(LowerConstant(CV, *this),
-                        TM.getTargetData()->getTypeAllocSize(CV->getType()),
-                        AddrSpace);
+  AP.OutStreamer.EmitValue(LowerConstant(CV, AP),
+                         AP.TM.getTargetData()->getTypeAllocSize(CV->getType()),
+                           AddrSpace);
+}
+
+/// EmitGlobalConstant - Print a general LLVM constant to the .s file.
+void AsmPrinter::EmitGlobalConstant(const Constant *CV, unsigned AddrSpace) {
+  uint64_t Size = TM.getTargetData()->getTypeAllocSize(CV->getType());
+  if (Size)
+    EmitGlobalConstantImpl(CV, AddrSpace, *this);
+  else if (MAI->hasSubsectionsViaSymbols()) {
+    // If the global has zero size, emit a single byte so that two labels don't
+    // look like they are at the same location.
+    OutStreamer.EmitIntValue(0, 1, AddrSpace);
+  }
 }
 
 void AsmPrinter::EmitMachineConstantPoolValue(MachineConstantPoolValue *MCPV) {
@@ -1613,7 +1811,7 @@ GCMetadataPrinter *AsmPrinter::GetOrCreateGCPrinter(GCStrategy *S) {
       return GMP;
     }
   
-  llvm_report_error("no GCMetadataPrinter registered for GC: " + Twine(Name));
+  report_fatal_error("no GCMetadataPrinter registered for GC: " + Twine(Name));
   return 0;
 }
 
diff --git a/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp b/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp
index 255bcd413f22..37d10e5c4ccd 100644
--- a/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp
+++ b/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp
@@ -13,6 +13,7 @@
 
 #define DEBUG_TYPE "asm-printer"
 #include "llvm/CodeGen/AsmPrinter.h"
+#include "llvm/Constants.h"
 #include "llvm/InlineAsm.h"
 #include "llvm/LLVMContext.h"
 #include "llvm/Module.h"
@@ -74,15 +75,15 @@ void AsmPrinter::EmitInlineAsm(StringRef Str, unsigned LocCookie) const {
   AsmParser Parser(SrcMgr, OutContext, OutStreamer, *MAI);
   OwningPtr<TargetAsmParser> TAP(TM.getTarget().createAsmParser(Parser));
   if (!TAP)
-    llvm_report_error("Inline asm not supported by this streamer because"
-                      " we don't have an asm parser for this target\n");
+    report_fatal_error("Inline asm not supported by this streamer because"
+                       " we don't have an asm parser for this target\n");
   Parser.setTargetParser(*TAP.get());
 
   // Don't implicitly switch to the text section before the asm.
   int Res = Parser.Run(/*NoInitialTextSection*/ true,
                        /*NoFinalize*/ true);
   if (Res && !HasDiagHandler)
-    llvm_report_error("Error parsing inline asm\n");
+    report_fatal_error("Error parsing inline asm\n");
 }
 
 
@@ -97,7 +98,7 @@ void AsmPrinter::EmitInlineAsm(const MachineInstr *MI) const {
   unsigned NumDefs = 0;
   for (; MI->getOperand(NumDefs).isReg() && MI->getOperand(NumDefs).isDef();
        ++NumDefs)
-    assert(NumDefs != NumOperands-1 && "No asm string?");
+    assert(NumDefs != NumOperands-2 && "No asm string?");
   
   assert(MI->getOperand(NumDefs).isSymbol() && "No asm string?");
 
@@ -123,6 +124,20 @@ void AsmPrinter::EmitInlineAsm(const MachineInstr *MI) const {
     OutStreamer.EmitRawText(Twine("\t")+MAI->getCommentString()+
                             MAI->getInlineAsmStart());
 
+  // Get the !srcloc metadata node if we have it, and decode the loc cookie from
+  // it.
+  unsigned LocCookie = 0;
+  for (unsigned i = MI->getNumOperands(); i != 0; --i) {
+    if (MI->getOperand(i-1).isMetadata())
+      if (const MDNode *SrcLoc = MI->getOperand(i-1).getMetadata())
+        if (SrcLoc->getNumOperands() != 0)
+          if (const ConstantInt *CI =
+              dyn_cast<ConstantInt>(SrcLoc->getOperand(0))) {
+            LocCookie = CI->getZExtValue();
+            break;
+          }
+  }
+  
   // Emit the inline asm to a temporary string so we can emit it through
   // EmitInlineAsm.
   SmallString<256> StringData;
@@ -167,10 +182,9 @@ void AsmPrinter::EmitInlineAsm(const MachineInstr *MI) const {
         break;
       case '(':             // $( -> same as GCC's { character.
         ++LastEmitted;      // Consume '(' character.
-        if (CurVariant != -1) {
-          llvm_report_error("Nested variants found in inline asm string: '"
-                            + std::string(AsmStr) + "'");
-        }
+        if (CurVariant != -1)
+          report_fatal_error("Nested variants found in inline asm string: '" +
+                             Twine(AsmStr) + "'");
         CurVariant = 0;     // We're in the first variant now.
         break;
       case '|':
@@ -204,8 +218,8 @@ void AsmPrinter::EmitInlineAsm(const MachineInstr *MI) const {
         const char *StrStart = LastEmitted;
         const char *StrEnd = strchr(StrStart, '}');
         if (StrEnd == 0)
-          llvm_report_error(Twine("Unterminated ${:foo} operand in inline asm"
-                                  " string: '") + Twine(AsmStr) + "'");
+          report_fatal_error("Unterminated ${:foo} operand in inline asm"
+                             " string: '" + Twine(AsmStr) + "'");
         
         std::string Val(StrStart, StrEnd);
         PrintSpecial(MI, OS, Val.c_str());
@@ -219,8 +233,8 @@ void AsmPrinter::EmitInlineAsm(const MachineInstr *MI) const {
       
       unsigned Val;
       if (StringRef(IDStart, IDEnd-IDStart).getAsInteger(10, Val))
-        llvm_report_error("Bad $ operand number in inline asm string: '" 
-                          + std::string(AsmStr) + "'");
+        report_fatal_error("Bad $ operand number in inline asm string: '" +
+                           Twine(AsmStr) + "'");
       LastEmitted = IDEnd;
       
       char Modifier[2] = { 0, 0 };
@@ -231,22 +245,22 @@ void AsmPrinter::EmitInlineAsm(const MachineInstr *MI) const {
         if (*LastEmitted == ':') {
           ++LastEmitted;    // Consume ':' character.
           if (*LastEmitted == 0)
-            llvm_report_error("Bad ${:} expression in inline asm string: '" +
-                              std::string(AsmStr) + "'");
+            report_fatal_error("Bad ${:} expression in inline asm string: '" +
+                               Twine(AsmStr) + "'");
           
           Modifier[0] = *LastEmitted;
           ++LastEmitted;    // Consume modifier character.
         }
         
         if (*LastEmitted != '}')
-          llvm_report_error("Bad ${} expression in inline asm string: '" 
-                            + std::string(AsmStr) + "'");
+          report_fatal_error("Bad ${} expression in inline asm string: '" +
+                             Twine(AsmStr) + "'");
         ++LastEmitted;    // Consume '}' character.
       }
       
       if (Val >= NumOperands-1)
-        llvm_report_error("Invalid $ operand number in inline asm string: '" 
-                          + std::string(AsmStr) + "'");
+        report_fatal_error("Invalid $ operand number in inline asm string: '" +
+                           Twine(AsmStr) + "'");
       
       // Okay, we finally have a value number.  Ask the target to print this
       // operand!
@@ -273,7 +287,7 @@ void AsmPrinter::EmitInlineAsm(const MachineInstr *MI) const {
             OS << *MI->getOperand(OpNo).getMBB()->getSymbol();
           else {
             AsmPrinter *AP = const_cast<AsmPrinter*>(this);
-            if ((OpFlags & 7) == 4) {
+            if (InlineAsm::isMemKind(OpFlags)) {
               Error = AP->PrintAsmMemoryOperand(MI, OpNo, AsmPrinterVariant,
                                                 Modifier[0] ? Modifier : 0,
                                                 OS);
@@ -286,9 +300,8 @@ void AsmPrinter::EmitInlineAsm(const MachineInstr *MI) const {
         if (Error) {
           std::string msg;
           raw_string_ostream Msg(msg);
-          Msg << "Invalid operand found in inline asm: '" << AsmStr << "'\n";
-          MI->print(Msg);
-          llvm_report_error(Msg.str());
+          Msg << "invalid operand in inline asm: '" << AsmStr << "'";
+          MMI->getModule()->getContext().emitError(LocCookie, Msg.str());
         }
       }
       break;
@@ -296,7 +309,7 @@ void AsmPrinter::EmitInlineAsm(const MachineInstr *MI) const {
     }
   }
   OS << '\n' << (char)0;  // null terminate string.
-  EmitInlineAsm(OS.str(), 0/*no loc cookie*/);
+  EmitInlineAsm(OS.str(), LocCookie);
   
   // Emit the #NOAPP end marker.  This has to happen even if verbose-asm isn't
   // enabled, so we use EmitRawText.
@@ -334,7 +347,7 @@ void AsmPrinter::PrintSpecial(const MachineInstr *MI, raw_ostream &OS,
     raw_string_ostream Msg(msg);
     Msg << "Unknown special formatter '" << Code
          << "' for machine instr: " << *MI;
-    llvm_report_error(Msg.str());
+    report_fatal_error(Msg.str());
   }    
 }
 
diff --git a/lib/CodeGen/AsmPrinter/CMakeLists.txt b/lib/CodeGen/AsmPrinter/CMakeLists.txt
index afc482dd15bf..ca8b8436c11f 100644
--- a/lib/CodeGen/AsmPrinter/CMakeLists.txt
+++ b/lib/CodeGen/AsmPrinter/CMakeLists.txt
@@ -7,3 +7,5 @@ add_llvm_library(LLVMAsmPrinter
   DwarfException.cpp
   OcamlGCPrinter.cpp
   )
+
+target_link_libraries (LLVMAsmPrinter LLVMMCParser)
diff --git a/lib/CodeGen/AsmPrinter/DIE.h b/lib/CodeGen/AsmPrinter/DIE.h
index 454326db0ea1..9cb8314bd959 100644
--- a/lib/CodeGen/AsmPrinter/DIE.h
+++ b/lib/CodeGen/AsmPrinter/DIE.h
@@ -261,11 +261,12 @@ namespace llvm {
     ///
     virtual void EmitValue(AsmPrinter *AP, unsigned Form) const;
 
+    uint64_t getValue() const { return Integer; }
+
     /// SizeOf - Determine size of integer value in bytes.
     ///
     virtual unsigned SizeOf(AsmPrinter *AP, unsigned Form) const;
 
-
     // Implement isa/cast/dyncast.
     static bool classof(const DIEInteger *) { return true; }
     static bool classof(const DIEValue *I) { return I->getType() == isInteger; }
diff --git a/lib/CodeGen/AsmPrinter/DwarfDebug.cpp b/lib/CodeGen/AsmPrinter/DwarfDebug.cpp
index b472d1e5335c..e9e9ba55db13 100644
--- a/lib/CodeGen/AsmPrinter/DwarfDebug.cpp
+++ b/lib/CodeGen/AsmPrinter/DwarfDebug.cpp
@@ -28,9 +28,11 @@
 #include "llvm/Target/TargetLoweringObjectFile.h"
 #include "llvm/Target/TargetMachine.h"
 #include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Target/TargetOptions.h"
 #include "llvm/Analysis/DebugInfo.h"
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/StringExtras.h"
+#include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/ValueHandle.h"
@@ -39,6 +41,17 @@
 #include "llvm/System/Path.h"
 using namespace llvm;
 
+static cl::opt<bool> PrintDbgScope("print-dbgscope", cl::Hidden,
+     cl::desc("Print DbgScope information for each machine instruction"));
+
+static cl::opt<bool> DisableDebugInfoPrinting("disable-debug-info-print", cl::Hidden,
+     cl::desc("Disable debug info printing"));
+
+namespace {
+  const char *DWARFGroupName = "DWARF Emission";
+  const char *DbgTimerName = "DWARF Debug Writer";
+} // end anonymous namespace
+
 //===----------------------------------------------------------------------===//
 
 /// Configuration values for initial hash set sizes (log2).
@@ -179,6 +192,12 @@ public:
 };
 
 //===----------------------------------------------------------------------===//
+/// DbgRange - This is used to track range of instructions with identical
+/// debug info scope.
+///
+typedef std::pair<const MachineInstr *, const MachineInstr *> DbgRange;
+
+//===----------------------------------------------------------------------===//
 /// DbgScope - This class is used to track scope information.
 ///
 class DbgScope {
@@ -187,22 +206,21 @@ class DbgScope {
   // Location at which this scope is inlined.
   AssertingVH<MDNode> InlinedAtLocation;  
   bool AbstractScope;                 // Abstract Scope
-  MCSymbol *StartLabel;               // Label ID of the beginning of scope.
-  MCSymbol *EndLabel;                 // Label ID of the end of scope.
   const MachineInstr *LastInsn;       // Last instruction of this scope.
   const MachineInstr *FirstInsn;      // First instruction of this scope.
+  unsigned DFSIn, DFSOut;
   // Scopes defined in scope.  Contents not owned.
   SmallVector<DbgScope *, 4> Scopes;
   // Variables declared in scope.  Contents owned.
   SmallVector<DbgVariable *, 8> Variables;
-
+  SmallVector<DbgRange, 4> Ranges;
   // Private state for dump()
   mutable unsigned IndentLevel;
 public:
   DbgScope(DbgScope *P, DIDescriptor D, MDNode *I = 0)
     : Parent(P), Desc(D), InlinedAtLocation(I), AbstractScope(false),
-      StartLabel(0), EndLabel(0),
-      LastInsn(0), FirstInsn(0), IndentLevel(0) {}
+      LastInsn(0), FirstInsn(0),
+      DFSIn(0), DFSOut(0), IndentLevel(0) {}
   virtual ~DbgScope();
 
   // Accessors.
@@ -211,18 +229,57 @@ public:
   DIDescriptor getDesc()         const { return Desc; }
   MDNode *getInlinedAt()         const { return InlinedAtLocation; }
   MDNode *getScopeNode()         const { return Desc.getNode(); }
-  MCSymbol *getStartLabel()      const { return StartLabel; }
-  MCSymbol *getEndLabel()        const { return EndLabel; }
   const SmallVector<DbgScope *, 4> &getScopes() { return Scopes; }
   const SmallVector<DbgVariable *, 8> &getVariables() { return Variables; }
-  void setStartLabel(MCSymbol *S) { StartLabel = S; }
-  void setEndLabel(MCSymbol *E)   { EndLabel = E; }
-  void setLastInsn(const MachineInstr *MI) { LastInsn = MI; }
-  const MachineInstr *getLastInsn()      { return LastInsn; }
-  void setFirstInsn(const MachineInstr *MI) { FirstInsn = MI; }
+  const SmallVector<DbgRange, 4> &getRanges() { return Ranges; }
+
+  /// openInsnRange - This scope covers instruction range starting from MI.
+  void openInsnRange(const MachineInstr *MI) {
+    if (!FirstInsn) 
+      FirstInsn = MI;
+    
+    if (Parent)
+      Parent->openInsnRange(MI);
+  }
+
+  /// extendInsnRange - Extend the current instruction range covered by 
+  /// this scope.
+  void extendInsnRange(const MachineInstr *MI) {
+    assert (FirstInsn && "MI Range is not open!");
+    LastInsn = MI;
+    if (Parent)
+      Parent->extendInsnRange(MI);
+  }
+
+  /// closeInsnRange - Create a range based on FirstInsn and LastInsn collected
+  /// until now. This is used when a new scope is encountered while walking
+  /// machine instructions.
+  void closeInsnRange(DbgScope *NewScope = NULL) {
+    assert (LastInsn && "Last insn missing!");
+    Ranges.push_back(DbgRange(FirstInsn, LastInsn));
+    FirstInsn = NULL;    
+    LastInsn = NULL;
+    // If Parent dominates NewScope then do not close Parent's instruction 
+    // range.
+    if (Parent && (!NewScope || !Parent->dominates(NewScope)))
+      Parent->closeInsnRange(NewScope);
+  }
+
   void setAbstractScope() { AbstractScope = true; }
   bool isAbstractScope() const { return AbstractScope; }
-  const MachineInstr *getFirstInsn()      { return FirstInsn; }
+
+  // Depth First Search support to walk and mainpluate DbgScope hierarchy.
+  unsigned getDFSOut() const { return DFSOut; }
+  void setDFSOut(unsigned O) { DFSOut = O; }
+  unsigned getDFSIn() const  { return DFSIn; }
+  void setDFSIn(unsigned I)  { DFSIn = I; }
+  bool dominates(const DbgScope *S) {
+    if (S == this) 
+      return true;
+    if (DFSIn < S->getDFSIn() && DFSOut > S->getDFSOut())
+      return true;
+    return false;
+  }
 
   /// addScope - Add a scope to the scope.
   ///
@@ -232,48 +289,11 @@ public:
   ///
   void addVariable(DbgVariable *V) { Variables.push_back(V); }
 
-  void fixInstructionMarkers(DenseMap<const MachineInstr *, 
-                             unsigned> &MIIndexMap) {
-    assert(getFirstInsn() && "First instruction is missing!");
-    
-    // Use the end of last child scope as end of this scope.
-    const SmallVector<DbgScope *, 4> &Scopes = getScopes();
-    const MachineInstr *LastInsn = getFirstInsn();
-    unsigned LIndex = 0;
-    if (Scopes.empty()) {
-      assert(getLastInsn() && "Inner most scope does not have last insn!");
-      return;
-    }
-    for (SmallVector<DbgScope *, 4>::const_iterator SI = Scopes.begin(),
-           SE = Scopes.end(); SI != SE; ++SI) {
-      DbgScope *DS = *SI;
-      DS->fixInstructionMarkers(MIIndexMap);
-      const MachineInstr *DSLastInsn = DS->getLastInsn();
-      unsigned DSI = MIIndexMap[DSLastInsn];
-      if (DSI > LIndex) {
-        LastInsn = DSLastInsn;
-        LIndex = DSI;
-      }
-    }
-
-    unsigned CurrentLastInsnIndex = 0;
-    if (const MachineInstr *CL = getLastInsn()) 
-      CurrentLastInsnIndex = MIIndexMap[CL];
-    unsigned FIndex = MIIndexMap[getFirstInsn()];
-
-    // Set LastInsn as the last instruction for this scope only if
-    // it follows 
-    //  1) this scope's first instruction and
-    //  2) current last instruction for this scope, if any.
-    if (LIndex >= CurrentLastInsnIndex && LIndex >= FIndex)
-      setLastInsn(LastInsn);
-  }
-
 #ifndef NDEBUG
   void dump() const;
 #endif
 };
-  
+
 } // end llvm namespace
 
 #ifndef NDEBUG
@@ -282,7 +302,6 @@ void DbgScope::dump() const {
   err.indent(IndentLevel);
   MDNode *N = Desc.getNode();
   N->dump();
-  err << " [" << StartLabel << ", " << EndLabel << "]\n";
   if (AbstractScope)
     err << "Abstract Scope\n";
 
@@ -305,22 +324,23 @@ DbgScope::~DbgScope() {
 DwarfDebug::DwarfDebug(AsmPrinter *A, Module *M)
   : Asm(A), MMI(Asm->MMI), ModuleCU(0),
     AbbreviationsSet(InitAbbreviationsSetSize), 
-    CurrentFnDbgScope(0), DebugTimer(0) {
+    CurrentFnDbgScope(0), PrevLabel(NULL) {
   NextStringPoolNumber = 0;
       
   DwarfFrameSectionSym = DwarfInfoSectionSym = DwarfAbbrevSectionSym = 0;
   DwarfStrSectionSym = TextSectionSym = 0;
-      
-  if (TimePassesIsEnabled)
-    DebugTimer = new Timer("Dwarf Debug Writer");
-      
-  beginModule(M);
+  DwarfDebugRangeSectionSym = 0;
+  FunctionBeginSym = 0;
+  if (TimePassesIsEnabled) {
+      NamedRegionTimer T(DbgTimerName, DWARFGroupName);
+      beginModule(M);
+  } else {
+      beginModule(M);
+  }
 }
 DwarfDebug::~DwarfDebug() {
   for (unsigned j = 0, M = DIEBlocks.size(); j < M; ++j)
     DIEBlocks[j]->~DIEBlock();
-
-  delete DebugTimer;
 }
 
 MCSymbol *DwarfDebug::getStringPoolEntry(StringRef Str) {
@@ -792,6 +812,64 @@ void DwarfDebug::addAddress(DIE *Die, unsigned Attribute,
   addBlock(Die, Attribute, 0, Block);
 }
 
+/// addRegisterAddress - Add register location entry in variable DIE.
+bool DwarfDebug::addRegisterAddress(DIE *Die, DbgVariable *DV,
+                                    const MachineOperand &MO) {
+  assert (MO.isReg() && "Invalid machine operand!");
+  if (!MO.getReg())
+    return false;
+  MachineLocation Location;
+  Location.set(MO.getReg());
+  addAddress(Die, dwarf::DW_AT_location, Location);
+  if (MCSymbol *VS = DV->getDbgValueLabel())
+    addLabel(Die, dwarf::DW_AT_start_scope, dwarf::DW_FORM_addr, VS);
+  return true;
+}
+
+/// addConstantValue - Add constant value entry in variable DIE.
+bool DwarfDebug::addConstantValue(DIE *Die, DbgVariable *DV, 
+                                  const MachineOperand &MO) {
+  assert (MO.isImm() && "Invalid machine operand!");
+  DIEBlock *Block = new (DIEValueAllocator) DIEBlock();
+  unsigned Imm = MO.getImm();
+  addUInt(Block, 0, dwarf::DW_FORM_udata, Imm);
+  addBlock(Die, dwarf::DW_AT_const_value, 0, Block);
+  if (MCSymbol *VS = DV->getDbgValueLabel())
+    addLabel(Die, dwarf::DW_AT_start_scope, dwarf::DW_FORM_addr, VS);
+  return true;
+}
+
+/// addConstantFPValue - Add constant value entry in variable DIE.
+bool DwarfDebug::addConstantFPValue(DIE *Die, DbgVariable *DV, 
+                                    const MachineOperand &MO) {
+  assert (MO.isFPImm() && "Invalid machine operand!");
+  DIEBlock *Block = new (DIEValueAllocator) DIEBlock();
+  APFloat FPImm = MO.getFPImm()->getValueAPF();
+  
+  // Get the raw data form of the floating point.
+  const APInt FltVal = FPImm.bitcastToAPInt();
+  const char *FltPtr = (const char*)FltVal.getRawData();
+  
+  int NumBytes = FltVal.getBitWidth() / 8; // 8 bits per byte.
+  bool LittleEndian = Asm->getTargetData().isLittleEndian();
+  int Incr = (LittleEndian ? 1 : -1);
+  int Start = (LittleEndian ? 0 : NumBytes - 1);
+  int Stop = (LittleEndian ? NumBytes : -1);
+  
+  // Output the constant to DWARF one byte at a time.
+  for (; Start != Stop; Start += Incr)
+    addUInt(Block, 0, dwarf::DW_FORM_data1,
+            (unsigned char)0xFF & FltPtr[Start]);
+  
+  addBlock(Die, dwarf::DW_AT_const_value, 0, Block);
+  
+  if (MCSymbol *VS = DV->getDbgValueLabel())
+    addLabel(Die, dwarf::DW_AT_start_scope, dwarf::DW_FORM_addr,
+             VS);
+  return true; 
+}
+
+
 /// addToContextOwner - Add Die into the list of its context owner's children.
 void DwarfDebug::addToContextOwner(DIE *Die, DIDescriptor Context) {
   if (Context.isType()) {
@@ -1250,59 +1328,16 @@ DIE *DwarfDebug::createSubprogramDIE(const DISubprogram &SP, bool MakeDecl) {
   if (SP.isArtificial())
     addUInt(SPDie, dwarf::DW_AT_artificial, dwarf::DW_FORM_flag, 1);
 
+  if (!SP.isLocalToUnit())
+    addUInt(SPDie, dwarf::DW_AT_external, dwarf::DW_FORM_flag, 1);
+  
+  if (SP.isOptimized())
+    addUInt(SPDie, dwarf::DW_AT_APPLE_optimized, dwarf::DW_FORM_flag, 1);
+
   // DW_TAG_inlined_subroutine may refer to this DIE.
   ModuleCU->insertDIE(SP.getNode(), SPDie);
-  return SPDie;
-}
-
-/// getUpdatedDbgScope - Find DbgScope assicated with the instruction.
-/// Update scope hierarchy. Create abstract scope if required.
-DbgScope *DwarfDebug::getUpdatedDbgScope(MDNode *N, const MachineInstr *MI,
-                                         MDNode *InlinedAt) {
-  assert(N && "Invalid Scope encoding!");
-  assert(MI && "Missing machine instruction!");
-  bool isAConcreteScope = InlinedAt != 0;
-
-  DbgScope *NScope = NULL;
-
-  if (InlinedAt)
-    NScope = DbgScopeMap.lookup(InlinedAt);
-  else
-    NScope = DbgScopeMap.lookup(N);
-  assert(NScope && "Unable to find working scope!");
-
-  if (NScope->getFirstInsn())
-    return NScope;
 
-  DbgScope *Parent = NULL;
-  if (isAConcreteScope) {
-    DILocation IL(InlinedAt);
-    Parent = getUpdatedDbgScope(IL.getScope().getNode(), MI,
-                         IL.getOrigLocation().getNode());
-    assert(Parent && "Unable to find Parent scope!");
-    NScope->setParent(Parent);
-    Parent->addScope(NScope);
-  } else if (DIDescriptor(N).isLexicalBlock()) {
-    DILexicalBlock DB(N);
-    Parent = getUpdatedDbgScope(DB.getContext().getNode(), MI, InlinedAt);
-    NScope->setParent(Parent);
-    Parent->addScope(NScope);
-  }
-
-  NScope->setFirstInsn(MI);
-
-  if (!Parent && !InlinedAt) {
-    StringRef SPName = DISubprogram(N).getLinkageName();
-    if (SPName == Asm->MF->getFunction()->getName())
-      CurrentFnDbgScope = NScope;
-  }
-
-  if (isAConcreteScope) {
-    ConcreteScopes[InlinedAt] = NScope;
-    getOrCreateAbstractScope(N);
-  }
-
-  return NScope;
+  return SPDie;
 }
 
 DbgScope *DwarfDebug::getOrCreateAbstractScope(MDNode *N) {
@@ -1332,6 +1367,19 @@ DbgScope *DwarfDebug::getOrCreateAbstractScope(MDNode *N) {
   return AScope;
 }
 
+/// isSubprogramContext - Return true if Context is either a subprogram
+/// or another context nested inside a subprogram.
+static bool isSubprogramContext(MDNode *Context) {
+  if (!Context)
+    return false;
+  DIDescriptor D(Context);
+  if (D.isSubprogram())
+    return true;
+  if (D.isType())
+    return isSubprogramContext(DIType(Context).getContext().getNode());
+  return false;
+}
+
 /// updateSubprogramScopeDIE - Find DIE for the given subprogram and
 /// attach appropriate DW_AT_low_pc and DW_AT_high_pc attributes.
 /// If there are global variables in this scope then create and insert
@@ -1347,7 +1395,8 @@ DIE *DwarfDebug::updateSubprogramScopeDIE(MDNode *SPNode) {
   // expect specification DIE in parent function. So avoid creating 
   // specification DIE for a function defined inside a function.
   if (SP.isDefinition() && !SP.getContext().isCompileUnit() &&
-      !SP.getContext().isFile() && !SP.getContext().isSubprogram()) {
+      !SP.getContext().isFile() && 
+      !isSubprogramContext(SP.getContext().getNode())) {
     addUInt(SPDie, dwarf::DW_AT_declaration, dwarf::DW_FORM_flag, 1);
     
     // Add arguments. 
@@ -1378,31 +1427,48 @@ DIE *DwarfDebug::updateSubprogramScopeDIE(MDNode *SPNode) {
   MachineLocation Location(RI->getFrameRegister(*Asm->MF));
   addAddress(SPDie, dwarf::DW_AT_frame_base, Location);
 
-  if (!DISubprogram(SPNode).isLocalToUnit())
-    addUInt(SPDie, dwarf::DW_AT_external, dwarf::DW_FORM_flag, 1);
-  
   return SPDie;
 }
 
 /// constructLexicalScope - Construct new DW_TAG_lexical_block
 /// for this scope and attach DW_AT_low_pc/DW_AT_high_pc labels.
 DIE *DwarfDebug::constructLexicalScopeDIE(DbgScope *Scope) {
-  MCSymbol *Start = Scope->getStartLabel();
-  MCSymbol *End = Scope->getEndLabel();
-  if (Start == 0 || End == 0) return 0;
 
-  assert(Start->isDefined() && "Invalid starting label for an inlined scope!");
-  assert(End->isDefined() && "Invalid end label for an inlined scope!");
-  
   DIE *ScopeDIE = new DIE(dwarf::DW_TAG_lexical_block);
   if (Scope->isAbstractScope())
     return ScopeDIE;
 
-  addLabel(ScopeDIE, dwarf::DW_AT_low_pc, dwarf::DW_FORM_addr,
-           Start ? Start : Asm->GetTempSymbol("func_begin",
-                                              Asm->getFunctionNumber()));
-  addLabel(ScopeDIE, dwarf::DW_AT_high_pc, dwarf::DW_FORM_addr,
-           End ? End : Asm->GetTempSymbol("func_end",Asm->getFunctionNumber()));
+  const SmallVector<DbgRange, 4> &Ranges = Scope->getRanges();
+  if (Ranges.empty())
+    return 0;
+
+  SmallVector<DbgRange, 4>::const_iterator RI = Ranges.begin();
+  if (Ranges.size() > 1) {
+    // .debug_range section has not been laid out yet. Emit offset in
+    // .debug_range as a uint, size 4, for now. emitDIE will handle 
+    // DW_AT_ranges appropriately.
+    addUInt(ScopeDIE, dwarf::DW_AT_ranges, dwarf::DW_FORM_data4,
+            DebugRangeSymbols.size() * Asm->getTargetData().getPointerSize());
+    for (SmallVector<DbgRange, 4>::const_iterator RI = Ranges.begin(),
+         RE = Ranges.end(); RI != RE; ++RI) {
+      DebugRangeSymbols.push_back(LabelsBeforeInsn.lookup(RI->first));
+      DebugRangeSymbols.push_back(LabelsAfterInsn.lookup(RI->second));
+    }
+    DebugRangeSymbols.push_back(NULL);
+    DebugRangeSymbols.push_back(NULL);
+    return ScopeDIE;
+  }
+
+  MCSymbol *Start = LabelsBeforeInsn.lookup(RI->first);
+  MCSymbol *End = LabelsAfterInsn.lookup(RI->second);
+
+  if (Start == 0 || End == 0) return 0;
+
+  assert(Start->isDefined() && "Invalid starting label for an inlined scope!");
+  assert(End->isDefined() && "Invalid end label for an inlined scope!");
+  
+  addLabel(ScopeDIE, dwarf::DW_AT_low_pc, dwarf::DW_FORM_addr, Start);
+  addLabel(ScopeDIE, dwarf::DW_AT_high_pc, dwarf::DW_FORM_addr, End);
 
   return ScopeDIE;
 }
@@ -1411,14 +1477,28 @@ DIE *DwarfDebug::constructLexicalScopeDIE(DbgScope *Scope) {
 /// a function. Construct DIE to represent this concrete inlined copy
 /// of the function.
 DIE *DwarfDebug::constructInlinedScopeDIE(DbgScope *Scope) {
-  MCSymbol *StartLabel = Scope->getStartLabel();
-  MCSymbol *EndLabel = Scope->getEndLabel();
-  if (StartLabel == 0 || EndLabel == 0) return 0;
-  
+
+  const SmallVector<DbgRange, 4> &Ranges = Scope->getRanges();
+  assert (Ranges.empty() == false 
+          && "DbgScope does not have instruction markers!");
+
+  // FIXME : .debug_inlined section specification does not clearly state how
+  // to emit inlined scope that is split into multiple instruction ranges.
+  // For now, use first instruction range and emit low_pc/high_pc pair and
+  // corresponding .debug_inlined section entry for this pair.
+  SmallVector<DbgRange, 4>::const_iterator RI = Ranges.begin();
+  MCSymbol *StartLabel = LabelsBeforeInsn.lookup(RI->first);
+  MCSymbol *EndLabel = LabelsAfterInsn.lookup(RI->second);
+
+  if (StartLabel == 0 || EndLabel == 0) {
+    assert (0 && "Unexpected Start and End  labels for a inlined scope!");
+    return 0;
+  }
   assert(StartLabel->isDefined() &&
          "Invalid starting label for an inlined scope!");
   assert(EndLabel->isDefined() &&
          "Invalid end label for an inlined scope!");
+
   if (!Scope->getScopeNode())
     return NULL;
   DIScope DS(Scope->getScopeNode());
@@ -1512,59 +1592,34 @@ DIE *DwarfDebug::constructVariableDIE(DbgVariable *DV, DbgScope *Scope) {
   // Add variable address.
   if (!Scope->isAbstractScope()) {
     // Check if variable is described by DBG_VALUE instruction.
-    if (const MachineInstr *DbgValueInsn = DV->getDbgValue()) {
-      if (DbgValueInsn->getNumOperands() == 3) {
-        // FIXME : Handle getNumOperands != 3 
-        if (DbgValueInsn->getOperand(0).getType() 
-            == MachineOperand::MO_Register
-            && DbgValueInsn->getOperand(0).getReg()) {
-          MachineLocation Location;
-          Location.set(DbgValueInsn->getOperand(0).getReg());
+    if (const MachineInstr *DVInsn = DV->getDbgValue()) {
+      bool updated = false;
+      // FIXME : Handle getNumOperands != 3 
+      if (DVInsn->getNumOperands() == 3) {
+        if (DVInsn->getOperand(0).isReg())
+          updated = addRegisterAddress(VariableDie, DV, DVInsn->getOperand(0));
+        else if (DVInsn->getOperand(0).isImm())
+          updated = addConstantValue(VariableDie, DV, DVInsn->getOperand(0));
+        else if (DVInsn->getOperand(0).isFPImm()) 
+          updated = addConstantFPValue(VariableDie, DV, DVInsn->getOperand(0));
+      } else {
+        MachineLocation Location = Asm->getDebugValueLocation(DVInsn);
+        if (Location.getReg()) {
           addAddress(VariableDie, dwarf::DW_AT_location, Location);
           if (MCSymbol *VS = DV->getDbgValueLabel())
             addLabel(VariableDie, dwarf::DW_AT_start_scope, dwarf::DW_FORM_addr,
                      VS);
-        } else if (DbgValueInsn->getOperand(0).getType() == 
-                   MachineOperand::MO_Immediate) {
-          DIEBlock *Block = new (DIEValueAllocator) DIEBlock();
-          unsigned Imm = DbgValueInsn->getOperand(0).getImm();
-          addUInt(Block, 0, dwarf::DW_FORM_udata, Imm);
-          addBlock(VariableDie, dwarf::DW_AT_const_value, 0, Block);
-          if (MCSymbol *VS = DV->getDbgValueLabel())
-            addLabel(VariableDie, dwarf::DW_AT_start_scope, dwarf::DW_FORM_addr,
-                     VS);
-        } else if (DbgValueInsn->getOperand(0).getType() == 
-                   MachineOperand::MO_FPImmediate) {
-          DIEBlock *Block = new (DIEValueAllocator) DIEBlock();
-          APFloat FPImm = DbgValueInsn->getOperand(0).getFPImm()->getValueAPF();
-
-          // Get the raw data form of the floating point.
-          const APInt FltVal = FPImm.bitcastToAPInt();
-          const char *FltPtr = (const char*)FltVal.getRawData();
-
-          unsigned NumBytes = FltVal.getBitWidth() / 8; // 8 bits per byte.
-          bool LittleEndian = Asm->getTargetData().isLittleEndian();
-          int Incr = (LittleEndian ? 1 : -1);
-          int Start = (LittleEndian ? 0 : NumBytes - 1);
-          int Stop = (LittleEndian ? NumBytes : -1);
-
-          // Output the constant to DWARF one byte at a time.
-          for (; Start != Stop; Start += Incr)
-            addUInt(Block, 0, dwarf::DW_FORM_data1,
-                    (unsigned char)0xFF & FltPtr[Start]);
-
-          addBlock(VariableDie, dwarf::DW_AT_const_value, 0, Block);
-
-          if (MCSymbol *VS = DV->getDbgValueLabel())
-            addLabel(VariableDie, dwarf::DW_AT_start_scope, dwarf::DW_FORM_addr,
-                     VS);
-        } else {
-          //FIXME : Handle other operand types.
-          delete VariableDie;
-          return NULL;
+          updated = true;
         }
-      } 
-    } else {
+      }
+      if (!updated) {
+        // If variableDie is not updated then DBG_VALUE instruction does not
+        // have valid variable info.
+        delete VariableDie;
+        return NULL;
+      }
+    } 
+    else {
       MachineLocation Location;
       unsigned FrameReg;
       const TargetRegisterInfo *RI = Asm->TM.getRegisterInfo();
@@ -1600,7 +1655,8 @@ void DwarfDebug::addPubTypes(DISubprogram SP) {
     if (!ATy.isValid())
       continue;
     DICompositeType CATy = getDICompositeType(ATy);
-    if (DIDescriptor(CATy.getNode()).Verify() && !CATy.getName().empty()) {
+    if (DIDescriptor(CATy.getNode()).Verify() && !CATy.getName().empty()
+        && !CATy.isForwardDecl()) {
       if (DIEEntry *Entry = ModuleCU->getDIEEntry(CATy.getNode()))
         ModuleCU->addGlobalType(CATy.getName(), Entry->getEntry());
     }
@@ -1716,9 +1772,9 @@ void DwarfDebug::constructCompileUnit(MDNode *N) {
   addUInt(Die, dwarf::DW_AT_language, dwarf::DW_FORM_data1,
           DIUnit.getLanguage());
   addString(Die, dwarf::DW_AT_name, dwarf::DW_FORM_string, FN);
-  addLabel(Die, dwarf::DW_AT_low_pc, dwarf::DW_FORM_addr, TextSectionSym);
-  addLabel(Die, dwarf::DW_AT_high_pc, dwarf::DW_FORM_addr,
-           Asm->GetTempSymbol("text_end"));
+  // Use DW_AT_entry_pc instead of DW_AT_low_pc/DW_AT_high_pc pair. This
+  // simplifies debug range entries.
+  addUInt(Die, dwarf::DW_AT_entry_pc, dwarf::DW_FORM_data4, 0);
   // DW_AT_stmt_list is a offset of line number information for this
   // compile unit in debug_line section. It is always zero when only one
   // compile unit is emitted in one object file.
@@ -1766,7 +1822,8 @@ void DwarfDebug::constructGlobalVariableDIE(MDNode *N) {
   // Do not create specification DIE if context is either compile unit
   // or a subprogram.
   if (DI_GV.isDefinition() && !GVContext.isCompileUnit() &&
-      !GVContext.isFile() && !GVContext.isSubprogram()) {
+      !GVContext.isFile() && 
+      !isSubprogramContext(GVContext.getNode())) {
     // Create specification DIE.
     DIE *VariableSpecDIE = new DIE(dwarf::DW_TAG_variable);
     addDIEEntry(VariableSpecDIE, dwarf::DW_AT_specification,
@@ -1791,7 +1848,8 @@ void DwarfDebug::constructGlobalVariableDIE(MDNode *N) {
   ModuleCU->addGlobal(DI_GV.getName(), VariableDie);
 
   DIType GTy = DI_GV.getType();
-  if (GTy.isCompositeType() && !GTy.getName().empty()) {
+  if (GTy.isCompositeType() && !GTy.getName().empty()
+      && !GTy.isForwardDecl()) {
     DIEEntry *Entry = ModuleCU->getDIEEntry(GTy.getNode());
     assert(Entry && "Missing global type!");
     ModuleCU->addGlobalType(GTy.getName(), Entry->getEntry());
@@ -1829,7 +1887,8 @@ void DwarfDebug::constructSubprogramDIE(MDNode *N) {
 /// content. Create global DIEs and emit initial debug info sections.
 /// This is inovked by the target AsmPrinter.
 void DwarfDebug::beginModule(Module *M) {
-  TimeRegion Timer(DebugTimer);
+  if (DisableDebugInfoPrinting)
+    return;
 
   DebugInfoFinder DbgFinder;
   DbgFinder.processModule(*M);
@@ -1893,10 +1952,7 @@ void DwarfDebug::beginModule(Module *M) {
 /// endModule - Emit all Dwarf sections that should come after the content.
 ///
 void DwarfDebug::endModule() {
-  if (!ModuleCU)
-    return;
-
-  TimeRegion Timer(DebugTimer);
+  if (!ModuleCU) return;
 
   // Attach DW_AT_inline attribute with inlined subprogram DIEs.
   for (SmallPtrSet<DIE *, 4>::iterator AI = InlinedSubprogramDIEs.begin(),
@@ -1905,11 +1961,6 @@ void DwarfDebug::endModule() {
     addUInt(ISP, dwarf::DW_AT_inline, 0, dwarf::DW_INL_inlined);
   }
 
-  // Insert top level DIEs.
-  for (SmallVector<DIE *, 4>::iterator TI = TopLevelDIEsVector.begin(),
-         TE = TopLevelDIEsVector.end(); TI != TE; ++TI)
-    ModuleCU->getCUDie()->addChild(*TI);
-
   for (DenseMap<DIE *, MDNode *>::iterator CI = ContainingTypeMap.begin(),
          CE = ContainingTypeMap.end(); CI != CE; ++CI) {
     DIE *SPDie = CI->first;
@@ -1918,8 +1969,6 @@ void DwarfDebug::endModule() {
     DIE *NDie = ModuleCU->getDIE(N);
     if (!NDie) continue;
     addDIEEntry(SPDie, dwarf::DW_AT_containing_type, dwarf::DW_FORM_ref4, NDie);
-    // FIXME - This is not the correct approach.
-    //addDIEEntry(NDie, dwarf::DW_AT_containing_type, dwarf::DW_FORM_ref4, NDie
   }
 
   // Standard sections final addresses.
@@ -2062,11 +2111,13 @@ void DwarfDebug::collectVariableInfo() {
       if (!MInsn->isDebugValue())
         continue;
 
-      // FIXME : Lift this restriction.
-      if (MInsn->getNumOperands() != 3)
+      // Ignore Undef values.
+      if (MInsn->getOperand(0).isReg() && !MInsn->getOperand(0).getReg())
         continue;
-      DIVariable DV((MDNode*)(MInsn->getOperand(MInsn->getNumOperands()
-                                                - 1).getMetadata()));
+
+      DIVariable DV(
+        const_cast<MDNode *>(MInsn->getOperand(MInsn->getNumOperands() - 1)
+                               .getMetadata()));
       if (DV.getTag() == dwarf::DW_TAG_arg_variable)  {
         // FIXME Handle inlined subroutine arguments.
         DbgVariable *ArgVar = new DbgVariable(DV, MInsn, NULL);
@@ -2102,10 +2153,6 @@ void DwarfDebug::beginScope(const MachineInstr *MI) {
   if (DL.isUnknown())
     return;
 
-  // Check and update last known location info.
-  if (DL == PrevInstLoc)
-    return;
-  
   MDNode *Scope = DL.getScope(Asm->MF->getFunction()->getContext());
   
   // FIXME: Should only verify each scope once!
@@ -2117,78 +2164,174 @@ void DwarfDebug::beginScope(const MachineInstr *MI) {
     DenseMap<const MachineInstr *, DbgVariable *>::iterator DI
       = DbgValueStartMap.find(MI);
     if (DI != DbgValueStartMap.end()) {
-      MCSymbol *Label = recordSourceLine(DL.getLine(), DL.getCol(), Scope);
-      PrevInstLoc = DL;
+      MCSymbol *Label = NULL;
+      if (DL == PrevInstLoc)
+        Label = PrevLabel;
+      else {
+        Label = recordSourceLine(DL.getLine(), DL.getCol(), Scope);
+        PrevInstLoc = DL;
+        PrevLabel = Label;
+      }
       DI->second->setDbgValueLabel(Label);
     }
     return;
   }
 
   // Emit a label to indicate location change. This is used for line 
-  // table even if this instruction does start a new scope.
-  MCSymbol *Label = recordSourceLine(DL.getLine(), DL.getCol(), Scope);
-  PrevInstLoc = DL;
-
-  // update DbgScope if this instruction starts a new scope.
-  InsnToDbgScopeMapTy::iterator I = DbgScopeBeginMap.find(MI);
-  if (I == DbgScopeBeginMap.end())
-    return;
+  // table even if this instruction does not start a new scope.
+  MCSymbol *Label = NULL;
+  if (DL == PrevInstLoc)
+    Label = PrevLabel;
+  else {
+    Label = recordSourceLine(DL.getLine(), DL.getCol(), Scope);
+    PrevInstLoc = DL;
+    PrevLabel = Label;
+  }
 
-  ScopeVector &SD = I->second;
-  for (ScopeVector::iterator SDI = SD.begin(), SDE = SD.end();
-       SDI != SDE; ++SDI)
-    (*SDI)->setStartLabel(Label);
+  // If this instruction begins a scope then note down corresponding label.
+  if (InsnsBeginScopeSet.count(MI) != 0)
+    LabelsBeforeInsn[MI] = Label;
 }
 
 /// endScope - Process end of a scope.
 void DwarfDebug::endScope(const MachineInstr *MI) {
-  // Ignore DBG_VALUE instruction.
-  if (MI->isDebugValue())
-    return;
-
-  // Check location.
-  DebugLoc DL = MI->getDebugLoc();
-  if (DL.isUnknown())
-    return;
-
-  // Emit a label and update DbgScope if this instruction ends a scope.
-  InsnToDbgScopeMapTy::iterator I = DbgScopeEndMap.find(MI);
-  if (I == DbgScopeEndMap.end())
-    return;
-  
-  MCSymbol *Label = MMI->getContext().CreateTempSymbol();
-  Asm->OutStreamer.EmitLabel(Label);
-
-  SmallVector<DbgScope*, 2> &SD = I->second;
-  for (SmallVector<DbgScope *, 2>::iterator SDI = SD.begin(), SDE = SD.end();
-       SDI != SDE; ++SDI)
-    (*SDI)->setEndLabel(Label);
-  return;
+  if (InsnsEndScopeSet.count(MI) != 0) {
+    // Emit a label if this instruction ends a scope.
+    MCSymbol *Label = MMI->getContext().CreateTempSymbol();
+    Asm->OutStreamer.EmitLabel(Label);
+    LabelsAfterInsn[MI] = Label;
+  }
 }
 
-/// createDbgScope - Create DbgScope for the scope.
-void DwarfDebug::createDbgScope(MDNode *Scope, MDNode *InlinedAt) {
+/// getOrCreateDbgScope - Create DbgScope for the scope.
+DbgScope *DwarfDebug::getOrCreateDbgScope(MDNode *Scope, MDNode *InlinedAt) {
   if (!InlinedAt) {
     DbgScope *WScope = DbgScopeMap.lookup(Scope);
     if (WScope)
-      return;
+      return WScope;
     WScope = new DbgScope(NULL, DIDescriptor(Scope), NULL);
     DbgScopeMap.insert(std::make_pair(Scope, WScope));
-    if (DIDescriptor(Scope).isLexicalBlock())
-      createDbgScope(DILexicalBlock(Scope).getContext().getNode(), NULL);
-    return;
+    if (DIDescriptor(Scope).isLexicalBlock()) {
+      DbgScope *Parent = 
+        getOrCreateDbgScope(DILexicalBlock(Scope).getContext().getNode(), NULL);
+      WScope->setParent(Parent);
+      Parent->addScope(WScope);
+    }
+
+    if (!WScope->getParent()) {
+      StringRef SPName = DISubprogram(Scope).getLinkageName();
+      if (SPName == Asm->MF->getFunction()->getName())
+        CurrentFnDbgScope = WScope;
+    }
+    
+    return WScope;
   }
 
   DbgScope *WScope = DbgScopeMap.lookup(InlinedAt);
   if (WScope)
-    return;
+    return WScope;
 
   WScope = new DbgScope(NULL, DIDescriptor(Scope), InlinedAt);
   DbgScopeMap.insert(std::make_pair(InlinedAt, WScope));
   DILocation DL(InlinedAt);
-  createDbgScope(DL.getScope().getNode(), DL.getOrigLocation().getNode());
+  DbgScope *Parent =
+    getOrCreateDbgScope(DL.getScope().getNode(), DL.getOrigLocation().getNode());
+  WScope->setParent(Parent);
+  Parent->addScope(WScope);
+
+  ConcreteScopes[InlinedAt] = WScope;
+  getOrCreateAbstractScope(Scope);
+
+  return WScope;
 }
 
+/// hasValidLocation - Return true if debug location entry attached with
+/// machine instruction encodes valid location info.
+static bool hasValidLocation(LLVMContext &Ctx,
+                             const MachineInstr *MInsn,
+                             MDNode *&Scope, MDNode *&InlinedAt) {
+  if (MInsn->isDebugValue())
+    return false;
+  DebugLoc DL = MInsn->getDebugLoc();
+  if (DL.isUnknown()) return false;
+      
+  MDNode *S = DL.getScope(Ctx);
+  
+  // There is no need to create another DIE for compile unit. For all
+  // other scopes, create one DbgScope now. This will be translated
+  // into a scope DIE at the end.
+  if (DIScope(S).isCompileUnit()) return false;
+     
+  Scope = S;
+  InlinedAt = DL.getInlinedAt(Ctx);
+  return true;
+}
+
+/// calculateDominanceGraph - Calculate dominance graph for DbgScope
+/// hierarchy.
+static void calculateDominanceGraph(DbgScope *Scope) {
+  assert (Scope && "Unable to calculate scop edominance graph!");
+  SmallVector<DbgScope *, 4> WorkStack;
+  WorkStack.push_back(Scope);
+  unsigned Counter = 0;
+  while (!WorkStack.empty()) {
+    DbgScope *WS = WorkStack.back();
+    const SmallVector<DbgScope *, 4> &Children = WS->getScopes();
+    bool visitedChildren = false;
+    for (SmallVector<DbgScope *, 4>::const_iterator SI = Children.begin(),
+           SE = Children.end(); SI != SE; ++SI) {
+      DbgScope *ChildScope = *SI;
+      if (!ChildScope->getDFSOut()) {
+        WorkStack.push_back(ChildScope);
+        visitedChildren = true;
+        ChildScope->setDFSIn(++Counter);
+        break;
+      }
+    }
+    if (!visitedChildren) {
+      WorkStack.pop_back();
+      WS->setDFSOut(++Counter);
+    }
+  }
+}
+
+/// printDbgScopeInfo - Print DbgScope info for each machine instruction.
+static 
+void printDbgScopeInfo(LLVMContext &Ctx, const MachineFunction *MF,
+                       DenseMap<const MachineInstr *, DbgScope *> &MI2ScopeMap)
+{
+#ifndef NDEBUG
+  unsigned PrevDFSIn = 0;
+  for (MachineFunction::const_iterator I = MF->begin(), E = MF->end();
+       I != E; ++I) {
+    for (MachineBasicBlock::const_iterator II = I->begin(), IE = I->end();
+         II != IE; ++II) {
+      const MachineInstr *MInsn = II;
+      MDNode *Scope = NULL;
+      MDNode *InlinedAt = NULL;
+
+      // Check if instruction has valid location information.
+      if (hasValidLocation(Ctx, MInsn, Scope, InlinedAt)) {
+        dbgs() << " [ ";
+        if (InlinedAt) 
+          dbgs() << "*";
+        DenseMap<const MachineInstr *, DbgScope *>::iterator DI = 
+          MI2ScopeMap.find(MInsn);
+        if (DI != MI2ScopeMap.end()) {
+          DbgScope *S = DI->second;
+          dbgs() << S->getDFSIn();
+          PrevDFSIn = S->getDFSIn();
+        } else
+          dbgs() << PrevDFSIn;
+      } else 
+        dbgs() << " [ x" << PrevDFSIn;
+      dbgs() << " ]";
+      MInsn->dump();
+    }
+    dbgs() << "\n";
+  }
+#endif
+}
 /// extractScopeInformation - Scan machine instructions in this function
 /// and collect DbgScopes. Return true, if at least one scope was found.
 bool DwarfDebug::extractScopeInformation() {
@@ -2197,71 +2340,100 @@ bool DwarfDebug::extractScopeInformation() {
   if (!DbgScopeMap.empty())
     return false;
 
-  DenseMap<const MachineInstr *, unsigned> MIIndexMap;
-  unsigned MIIndex = 0;
-  LLVMContext &Ctx = Asm->MF->getFunction()->getContext();
-  
   // Scan each instruction and create scopes. First build working set of scopes.
+  LLVMContext &Ctx = Asm->MF->getFunction()->getContext();
+  SmallVector<DbgRange, 4> MIRanges;
+  DenseMap<const MachineInstr *, DbgScope *> MI2ScopeMap;
+  MDNode *PrevScope = NULL;
+  MDNode *PrevInlinedAt = NULL;
+  const MachineInstr *RangeBeginMI = NULL;
+  const MachineInstr *PrevMI = NULL;
   for (MachineFunction::const_iterator I = Asm->MF->begin(), E = Asm->MF->end();
        I != E; ++I) {
     for (MachineBasicBlock::const_iterator II = I->begin(), IE = I->end();
          II != IE; ++II) {
       const MachineInstr *MInsn = II;
-      // FIXME : Remove DBG_VALUE check.
-      if (MInsn->isDebugValue()) continue;
-      MIIndexMap[MInsn] = MIIndex++;
-      
-      DebugLoc DL = MInsn->getDebugLoc();
-      if (DL.isUnknown()) continue;
-      
-      MDNode *Scope = DL.getScope(Ctx);
-      
-      // There is no need to create another DIE for compile unit. For all
-      // other scopes, create one DbgScope now. This will be translated
-      // into a scope DIE at the end.
-      if (DIScope(Scope).isCompileUnit()) continue;
-      createDbgScope(Scope, DL.getInlinedAt(Ctx));
-    }
-  }
+      MDNode *Scope = NULL;
+      MDNode *InlinedAt = NULL;
 
+      // Check if instruction has valid location information.
+      if (!hasValidLocation(Ctx, MInsn, Scope, InlinedAt)) {
+        PrevMI = MInsn;
+        continue;
+      }
+      
+      // If scope has not changed then skip this instruction.
+      if (Scope == PrevScope && PrevInlinedAt == InlinedAt) {
+        PrevMI = MInsn;
+        continue;
+      }
 
-  // Build scope hierarchy using working set of scopes.
-  for (MachineFunction::const_iterator I = Asm->MF->begin(), E = Asm->MF->end();
-       I != E; ++I) {
-    for (MachineBasicBlock::const_iterator II = I->begin(), IE = I->end();
-         II != IE; ++II) {
-      const MachineInstr *MInsn = II;
-      // FIXME : Remove DBG_VALUE check.
-      if (MInsn->isDebugValue()) continue;
-      DebugLoc DL = MInsn->getDebugLoc();
-      if (DL.isUnknown()) continue;
+      if (RangeBeginMI) {      
+        // If we have alread seen a beginning of a instruction range and 
+        // current instruction scope does not match scope of first instruction
+        // in this range then create a new instruction range.
+        DbgRange R(RangeBeginMI, PrevMI);
+        MI2ScopeMap[RangeBeginMI] = getOrCreateDbgScope(PrevScope, PrevInlinedAt);
+        MIRanges.push_back(R);
+      } 
 
-      MDNode *Scope = DL.getScope(Ctx);
-      if (Scope == 0) continue;
+      // This is a beginning of a new instruction range.
+      RangeBeginMI = MInsn;
       
-      // There is no need to create another DIE for compile unit. For all
-      // other scopes, create one DbgScope now. This will be translated
-      // into a scope DIE at the end.
-      if (DIScope(Scope).isCompileUnit()) continue;
-      DbgScope *DScope = getUpdatedDbgScope(Scope, MInsn, DL.getInlinedAt(Ctx));
-      DScope->setLastInsn(MInsn);
+      // Reset previous markers.
+      PrevMI = MInsn;
+      PrevScope = Scope;
+      PrevInlinedAt = InlinedAt;
     }
   }
 
+  // Create last instruction range.
+  if (RangeBeginMI && PrevMI && PrevScope) {
+    DbgRange R(RangeBeginMI, PrevMI);
+    MIRanges.push_back(R);
+    MI2ScopeMap[RangeBeginMI] = getOrCreateDbgScope(PrevScope, PrevInlinedAt);
+  }
+  
   if (!CurrentFnDbgScope)
     return false;
 
-  CurrentFnDbgScope->fixInstructionMarkers(MIIndexMap);
+  calculateDominanceGraph(CurrentFnDbgScope);
+  if (PrintDbgScope)
+    printDbgScopeInfo(Ctx, Asm->MF, MI2ScopeMap);
+
+  // Find ranges of instructions covered by each DbgScope;
+  DbgScope *PrevDbgScope = NULL;
+  for (SmallVector<DbgRange, 4>::const_iterator RI = MIRanges.begin(),
+         RE = MIRanges.end(); RI != RE; ++RI) {
+    const DbgRange &R = *RI;
+    DbgScope *S = MI2ScopeMap.lookup(R.first);
+    assert (S && "Lost DbgScope for a machine instruction!");
+    if (PrevDbgScope && !PrevDbgScope->dominates(S))
+      PrevDbgScope->closeInsnRange(S);
+    S->openInsnRange(R.first);
+    S->extendInsnRange(R.second);
+    PrevDbgScope = S;
+  }
 
-  // Each scope has first instruction and last instruction to mark beginning
-  // and end of a scope respectively. Create an inverse map that list scopes
-  // starts (and ends) with an instruction. One instruction may start (or end)
-  // multiple scopes. Ignore scopes that are not reachable.
+  if (PrevDbgScope)
+    PrevDbgScope->closeInsnRange();
+
+  identifyScopeMarkers();
+
+  return !DbgScopeMap.empty();
+}
+
+/// identifyScopeMarkers() - 
+/// Each DbgScope has first instruction and last instruction to mark beginning
+/// and end of a scope respectively. Create an inverse map that list scopes
+/// starts (and ends) with an instruction. One instruction may start (or end)
+/// multiple scopes. Ignore scopes that are not reachable.
+void DwarfDebug::identifyScopeMarkers() {
   SmallVector<DbgScope *, 4> WorkList;
   WorkList.push_back(CurrentFnDbgScope);
   while (!WorkList.empty()) {
     DbgScope *S = WorkList.pop_back_val();
-
+    
     const SmallVector<DbgScope *, 4> &Children = S->getScopes();
     if (!Children.empty()) 
       for (SmallVector<DbgScope *, 4>::const_iterator SI = Children.begin(),
@@ -2270,45 +2442,51 @@ bool DwarfDebug::extractScopeInformation() {
 
     if (S->isAbstractScope())
       continue;
-    const MachineInstr *MI = S->getFirstInsn();
-    assert(MI && "DbgScope does not have first instruction!");
-
-    InsnToDbgScopeMapTy::iterator IDI = DbgScopeBeginMap.find(MI);
-    if (IDI != DbgScopeBeginMap.end())
-      IDI->second.push_back(S);
-    else
-      DbgScopeBeginMap[MI].push_back(S);
-
-    MI = S->getLastInsn();
-    assert(MI && "DbgScope does not have last instruction!");
-    IDI = DbgScopeEndMap.find(MI);
-    if (IDI != DbgScopeEndMap.end())
-      IDI->second.push_back(S);
-    else
-      DbgScopeEndMap[MI].push_back(S);
+    
+    const SmallVector<DbgRange, 4> &Ranges = S->getRanges();
+    if (Ranges.empty())
+      continue;
+    for (SmallVector<DbgRange, 4>::const_iterator RI = Ranges.begin(),
+           RE = Ranges.end(); RI != RE; ++RI) {
+      assert(RI->first && "DbgRange does not have first instruction!");      
+      assert(RI->second && "DbgRange does not have second instruction!");      
+      InsnsBeginScopeSet.insert(RI->first);
+      InsnsEndScopeSet.insert(RI->second);
+    }
   }
+}
 
-  return !DbgScopeMap.empty();
+/// FindFirstDebugLoc - Find the first debug location in the function. This
+/// is intended to be an approximation for the source position of the
+/// beginning of the function.
+static DebugLoc FindFirstDebugLoc(const MachineFunction *MF) {
+  for (MachineFunction::const_iterator I = MF->begin(), E = MF->end();
+       I != E; ++I)
+    for (MachineBasicBlock::const_iterator MBBI = I->begin(), MBBE = I->end();
+         MBBI != MBBE; ++MBBI) {
+      DebugLoc DL = MBBI->getDebugLoc();
+      if (!DL.isUnknown())
+        return DL;
+    }
+  return DebugLoc();
 }
 
 /// beginFunction - Gather pre-function debug information.  Assumes being
 /// emitted immediately after the function entry point.
 void DwarfDebug::beginFunction(const MachineFunction *MF) {
   if (!MMI->hasDebugInfo()) return;
-  
-  TimeRegion Timer(DebugTimer);
-  if (!extractScopeInformation())
-    return;
+  if (!extractScopeInformation()) return;
   
   collectVariableInfo();
 
+  FunctionBeginSym = Asm->GetTempSymbol("func_begin",
+                                        Asm->getFunctionNumber());
   // Assumes in correct section after the entry point.
-  Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("func_begin",
-                                                Asm->getFunctionNumber()));
+  Asm->OutStreamer.EmitLabel(FunctionBeginSym);
 
   // Emit label for the implicitly defined dbg.stoppoint at the start of the
   // function.
-  DebugLoc FDL = MF->getDefaultDebugLoc();
+  DebugLoc FDL = FindFirstDebugLoc(MF);
   if (FDL.isUnknown()) return;
   
   MDNode *Scope = FDL.getScope(MF->getFunction()->getContext());
@@ -2329,10 +2507,7 @@ void DwarfDebug::beginFunction(const MachineFunction *MF) {
 /// endFunction - Gather and emit post-function debug information.
 ///
 void DwarfDebug::endFunction(const MachineFunction *MF) {
-  if (!MMI->hasDebugInfo() ||
-      DbgScopeMap.empty()) return;
-  
-  TimeRegion Timer(DebugTimer);
+  if (!MMI->hasDebugInfo() || DbgScopeMap.empty()) return;
 
   if (CurrentFnDbgScope) {
     // Define end label for subprogram.
@@ -2355,8 +2530,13 @@ void DwarfDebug::endFunction(const MachineFunction *MF) {
            AE = AbstractScopesList.end(); AI != AE; ++AI)
       constructScopeDIE(*AI);
     
-    constructScopeDIE(CurrentFnDbgScope);
+    DIE *CurFnDIE = constructScopeDIE(CurrentFnDbgScope);
     
+    if (!DisableFramePointerElim(*MF))
+      addUInt(CurFnDIE, dwarf::DW_AT_APPLE_omit_frame_ptr, 
+              dwarf::DW_FORM_flag, 1);
+
+
     DebugFrames.push_back(FunctionDebugFrameInfo(Asm->getFunctionNumber(),
                                                  MMI->getFrameMoves()));
   }
@@ -2364,22 +2544,23 @@ void DwarfDebug::endFunction(const MachineFunction *MF) {
   // Clear debug info
   CurrentFnDbgScope = NULL;
   DeleteContainerSeconds(DbgScopeMap);
-  DbgScopeBeginMap.clear();
-  DbgScopeEndMap.clear();
+  InsnsBeginScopeSet.clear();
+  InsnsEndScopeSet.clear();
   DbgValueStartMap.clear();
   ConcreteScopes.clear();
   DeleteContainerSeconds(AbstractScopes);
   AbstractScopesList.clear();
   AbstractVariables.clear();
+  LabelsBeforeInsn.clear();
+  LabelsAfterInsn.clear();
   Lines.clear();
+  PrevLabel = NULL;
 }
 
 /// recordSourceLine - Register a source line with debug info. Returns the
 /// unique label that was emitted and which provides correspondence to
 /// the source line list.
 MCSymbol *DwarfDebug::recordSourceLine(unsigned Line, unsigned Col, MDNode *S) {
-  TimeRegion Timer(DebugTimer);
-
   StringRef Dir;
   StringRef Fn;
 
@@ -2407,17 +2588,6 @@ MCSymbol *DwarfDebug::recordSourceLine(unsigned Line, unsigned Col, MDNode *S) {
   return Label;
 }
 
-/// getOrCreateSourceID - Public version of GetOrCreateSourceID. This can be
-/// timed. Look up the source id with the given directory and source file
-/// names. If none currently exists, create a new id and insert it in the
-/// SourceIds map. This can update DirectoryNames and SourceFileNames maps as
-/// well.
-unsigned DwarfDebug::getOrCreateSourceID(const std::string &DirName,
-                                         const std::string &FileName) {
-  TimeRegion Timer(DebugTimer);
-  return GetOrCreateSourceID(DirName.c_str(), FileName.c_str());
-}
-
 //===----------------------------------------------------------------------===//
 // Emit Methods
 //===----------------------------------------------------------------------===//
@@ -2481,7 +2651,6 @@ void DwarfDebug::computeSizeAndOffsets() {
     sizeof(int8_t);   // Pointer Size (in bytes)
 
   computeSizeAndOffset(ModuleCU->getCUDie(), Offset, true);
-  CompileUnitOffsets[ModuleCU] = 0;
 }
 
 /// EmitSectionSym - Switch to the specified MCSection and emit an assembler
@@ -2522,7 +2691,8 @@ void DwarfDebug::EmitSectionLabels() {
   EmitSectionSym(Asm, TLOF.getDwarfPubTypesSection());
   DwarfStrSectionSym = 
     EmitSectionSym(Asm, TLOF.getDwarfStrSection(), "section_str");
-  EmitSectionSym(Asm, TLOF.getDwarfRangesSection());
+  DwarfDebugRangeSectionSym = EmitSectionSym(Asm, TLOF.getDwarfRangesSection(),
+                                             "debug_range");
 
   TextSectionSym = EmitSectionSym(Asm, TLOF.getTextSection(), "text_begin");
   EmitSectionSym(Asm, TLOF.getDataSection());
@@ -2566,6 +2736,15 @@ void DwarfDebug::emitDIE(DIE *Die) {
       Asm->EmitInt32(Addr);
       break;
     }
+    case dwarf::DW_AT_ranges: {
+      // DW_AT_range Value encodes offset in debug_range section.
+      DIEInteger *V = cast<DIEInteger>(Values[i]);
+      Asm->EmitLabelOffsetDifference(DwarfDebugRangeSectionSym,
+                                     V->getValue(),
+                                     DwarfDebugRangeSectionSym,
+                                     4);
+      break;
+    }
     default:
       // Emit an attribute using the defined form.
       Values[i]->EmitValue(Asm, Form);
@@ -2900,7 +3079,7 @@ void DwarfDebug::emitCommonDebugFrame() {
 
   Asm->EmitFrameMoves(Moves, 0, false);
 
-  Asm->EmitAlignment(2, 0, 0, false);
+  Asm->EmitAlignment(2);
   Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("debug_frame_common_end"));
 }
 
@@ -2942,7 +3121,7 @@ emitFunctionDebugFrame(const FunctionDebugFrameInfo &DebugFrameInfo) {
 
   Asm->EmitFrameMoves(DebugFrameInfo.Moves, FuncBeginSym, false);
 
-  Asm->EmitAlignment(2, 0, 0, false);
+  Asm->EmitAlignment(2);
   Asm->OutStreamer.EmitLabel(DebugFrameEnd);
 }
 
@@ -3087,7 +3266,16 @@ void DwarfDebug::EmitDebugARanges() {
 void DwarfDebug::emitDebugRanges() {
   // Start the dwarf ranges section.
   Asm->OutStreamer.SwitchSection(
-                            Asm->getObjFileLowering().getDwarfRangesSection());
+    Asm->getObjFileLowering().getDwarfRangesSection());
+  unsigned char Size = Asm->getTargetData().getPointerSize();
+  for (SmallVector<const MCSymbol *, 8>::iterator
+         I = DebugRangeSymbols.begin(), E = DebugRangeSymbols.end(); 
+       I != E; ++I) {
+    if (*I)
+      Asm->OutStreamer.EmitSymbolValue(const_cast<MCSymbol*>(*I), Size, 0);
+    else
+      Asm->OutStreamer.EmitIntValue(0, Size, /*addrspace*/0);
+  }
 }
 
 /// emitDebugMacInfo - Emit visible names into a debug macinfo section.
diff --git a/lib/CodeGen/AsmPrinter/DwarfDebug.h b/lib/CodeGen/AsmPrinter/DwarfDebug.h
index c7baf5f5d38d..b964b23fe6f8 100644
--- a/lib/CodeGen/AsmPrinter/DwarfDebug.h
+++ b/lib/CodeGen/AsmPrinter/DwarfDebug.h
@@ -32,8 +32,8 @@ class DbgVariable;
 class MachineFrameInfo;
 class MachineLocation;
 class MachineModuleInfo;
+class MachineOperand;
 class MCAsmInfo;
-class Timer;
 class DIEAbbrev;
 class DIE;
 class DIEBlock;
@@ -174,24 +174,14 @@ class DwarfDebug {
   /// (at the end of the module) as DW_AT_inline.
   SmallPtrSet<DIE *, 4> InlinedSubprogramDIEs;
 
+  /// ContainingTypeMap - This map is used to keep track of subprogram DIEs that
+  /// need DW_AT_containing_type attribute. This attribute points to a DIE that
+  /// corresponds to the MDNode mapped with the subprogram DIE.
   DenseMap<DIE *, MDNode *> ContainingTypeMap;
 
-  /// AbstractSubprogramDIEs - Collection of abstruct subprogram DIEs.
-  SmallPtrSet<DIE *, 4> AbstractSubprogramDIEs;
-
-  /// TopLevelDIEs - Collection of top level DIEs. 
-  SmallPtrSet<DIE *, 4> TopLevelDIEs;
-  SmallVector<DIE *, 4> TopLevelDIEsVector;
-
   typedef SmallVector<DbgScope *, 2> ScopeVector;
-  typedef DenseMap<const MachineInstr *, ScopeVector>
-    InsnToDbgScopeMapTy;
-
-  /// DbgScopeBeginMap - Maps instruction with a list of DbgScopes it starts.
-  InsnToDbgScopeMapTy DbgScopeBeginMap;
-
-  /// DbgScopeEndMap - Maps instruction with a list DbgScopes it ends.
-  InsnToDbgScopeMapTy DbgScopeEndMap;
+  SmallPtrSet<const MachineInstr *, 8> InsnsBeginScopeSet;
+  SmallPtrSet<const MachineInstr *, 8> InsnsEndScopeSet;
 
   /// InlineInfo - Keep track of inlined functions and their location.  This
   /// information is used to populate debug_inlined section.
@@ -199,18 +189,21 @@ class DwarfDebug {
   DenseMap<MDNode*, SmallVector<InlineInfoLabels, 4> > InlineInfo;
   SmallVector<MDNode *, 4> InlinedSPNodes;
 
-  /// CompileUnitOffsets - A vector of the offsets of the compile units. This is
-  /// used when calculating the "origin" of a concrete instance of an inlined
-  /// function.
-  DenseMap<CompileUnit *, unsigned> CompileUnitOffsets;
+  /// LabelsBeforeInsn - Maps instruction with label emitted before 
+  /// instruction.
+  DenseMap<const MachineInstr *, MCSymbol *> LabelsBeforeInsn;
+
+  /// LabelsAfterInsn - Maps instruction with label emitted after
+  /// instruction.
+  DenseMap<const MachineInstr *, MCSymbol *> LabelsAfterInsn;
+
+  SmallVector<const MCSymbol *, 8> DebugRangeSymbols;
 
   /// Previous instruction's location information. This is used to determine
   /// label location to indicate scope boundries in dwarf debug info.
   DebugLoc PrevInstLoc;
+  MCSymbol *PrevLabel;
 
-  /// DebugTimer - Timer for the Dwarf debug writer.
-  Timer *DebugTimer;
-  
   struct FunctionDebugFrameInfo {
     unsigned Number;
     std::vector<MachineMove> Moves;
@@ -225,8 +218,9 @@ class DwarfDebug {
   // the beginning of each supported dwarf section.  These are used to form
   // section offsets and are created by EmitSectionLabels.
   MCSymbol *DwarfFrameSectionSym, *DwarfInfoSectionSym, *DwarfAbbrevSectionSym;
-  MCSymbol *DwarfStrSectionSym, *TextSectionSym;
-  
+  MCSymbol *DwarfStrSectionSym, *TextSectionSym, *DwarfDebugRangeSectionSym;
+
+  MCSymbol *FunctionBeginSym;
 private:
   
   /// getSourceDirectoryAndFileIds - Return the directory and file ids that
@@ -311,6 +305,15 @@ private:
   void addAddress(DIE *Die, unsigned Attribute,
                   const MachineLocation &Location);
 
+  /// addRegisterAddress - Add register location entry in variable DIE.
+  bool addRegisterAddress(DIE *Die, DbgVariable *DV, const MachineOperand &MO);
+
+  /// addConstantValue - Add constant value entry in variable DIE.
+  bool addConstantValue(DIE *Die, DbgVariable *DV, const MachineOperand &MO);
+
+  /// addConstantFPValue - Add constant value entry in variable DIE.
+  bool addConstantFPValue(DIE *Die, DbgVariable *DV, const MachineOperand &MO);
+
   /// addComplexAddress - Start with the address based on the location provided,
   /// and generate the DWARF information necessary to find the actual variable
   /// (navigating the extra location information encoded in the type) based on
@@ -376,13 +379,8 @@ private:
   /// createSubprogramDIE - Create new DIE using SP.
   DIE *createSubprogramDIE(const DISubprogram &SP, bool MakeDecl = false);
 
-  /// getUpdatedDbgScope - Find or create DbgScope assicated with 
-  /// the instruction. Initialize scope and update scope hierarchy.
-  DbgScope *getUpdatedDbgScope(MDNode *N, const MachineInstr *MI,
-                               MDNode *InlinedAt);
-
-  /// createDbgScope - Create DbgScope for the scope.
-  void createDbgScope(MDNode *Scope, MDNode *InlinedAt);
+  /// getOrCreateDbgScope - Create DbgScope for the scope.
+  DbgScope *getOrCreateDbgScope(MDNode *Scope, MDNode *InlinedAt);
 
   DbgScope *getOrCreateAbstractScope(MDNode *N);
 
@@ -531,14 +529,10 @@ private:
     return Lines.size();
   }
   
-  /// getOrCreateSourceID - Public version of GetOrCreateSourceID. This can be
-  /// timed. Look up the source id with the given directory and source file
-  /// names. If none currently exists, create a new id and insert it in the
-  /// SourceIds map. This can update DirectoryNames and SourceFileNames maps as
-  /// well.
-  unsigned getOrCreateSourceID(const std::string &DirName,
-                               const std::string &FileName);
-  
+  /// identifyScopeMarkers() - Indentify instructions that are marking
+  /// beginning of or end of a scope.
+  void identifyScopeMarkers();
+
   /// extractScopeInformation - Scan machine instructions in this function
   /// and collect DbgScopes. Return true, if atleast one scope was found.
   bool extractScopeInformation();
diff --git a/lib/CodeGen/AsmPrinter/DwarfException.cpp b/lib/CodeGen/AsmPrinter/DwarfException.cpp
index 72c97a43085c..0ff1036e89e2 100644
--- a/lib/CodeGen/AsmPrinter/DwarfException.cpp
+++ b/lib/CodeGen/AsmPrinter/DwarfException.cpp
@@ -33,7 +33,6 @@
 #include "llvm/Target/TargetRegisterInfo.h"
 #include "llvm/Support/Dwarf.h"
 #include "llvm/Support/FormattedStream.h"
-#include "llvm/Support/Timer.h"
 #include "llvm/ADT/SmallString.h"
 #include "llvm/ADT/StringExtras.h"
 #include "llvm/ADT/Twine.h"
@@ -41,15 +40,9 @@ using namespace llvm;
 
 DwarfException::DwarfException(AsmPrinter *A)
   : Asm(A), MMI(Asm->MMI), shouldEmitTable(false), shouldEmitMoves(false),
-    shouldEmitTableModule(false), shouldEmitMovesModule(false),
-    ExceptionTimer(0) {
-  if (TimePassesIsEnabled)
-    ExceptionTimer = new Timer("DWARF Exception Writer");
-}
+    shouldEmitTableModule(false), shouldEmitMovesModule(false) {}
 
-DwarfException::~DwarfException() {
-  delete ExceptionTimer;
-}
+DwarfException::~DwarfException() {}
 
 /// EmitCIE - Emit a Common Information Entry (CIE). This holds information that
 /// is shared among many Frame Description Entries.  There is at least one CIE
@@ -159,8 +152,7 @@ void DwarfException::EmitCIE(const Function *PersonalityFn, unsigned Index) {
   // On Darwin the linker honors the alignment of eh_frame, which means it must
   // be 8-byte on 64-bit targets to match what gcc does.  Otherwise you get
   // holes which confuse readers of eh_frame.
-  Asm->EmitAlignment(Asm->getTargetData().getPointerSize() == 4 ? 2 : 3,
-                     0, 0, false);
+  Asm->EmitAlignment(Asm->getTargetData().getPointerSize() == 4 ? 2 : 3);
   Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("eh_frame_common_end", Index));
 }
 
@@ -262,8 +254,7 @@ void DwarfException::EmitFDE(const FunctionEHFrameInfo &EHFrameInfo) {
     // On Darwin the linker honors the alignment of eh_frame, which means it
     // must be 8-byte on 64-bit targets to match what gcc does.  Otherwise you
     // get holes which confuse readers of eh_frame.
-    Asm->EmitAlignment(Asm->getTargetData().getPointerSize() == 4 ? 2 : 3,
-                       0, 0, false);
+    Asm->EmitAlignment(Asm->getTargetData().getPointerSize() == 4 ? 2 : 3);
     Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("eh_frame_end",
                                                   EHFrameInfo.Number));
 
@@ -432,7 +423,7 @@ bool DwarfException::CallToNoUnwindFunction(const MachineInstr *MI) {
 
     if (!MO.isGlobal()) continue;
     
-    Function *F = dyn_cast<Function>(MO.getGlobal());
+    const Function *F = dyn_cast<Function>(MO.getGlobal());
     if (F == 0) continue;
 
     if (SawFunc) {
@@ -586,7 +577,7 @@ ComputeCallSiteTable(SmallVectorImpl<CallSiteEntry> &CallSites,
 ///  3. Type ID table contains references to all the C++ typeinfo for all
 ///     catches in the function.  This tables is reverse indexed base 1.
 void DwarfException::EmitExceptionTable() {
-  const std::vector<GlobalVariable *> &TypeInfos = MMI->getTypeInfos();
+  const std::vector<const GlobalVariable *> &TypeInfos = MMI->getTypeInfos();
   const std::vector<unsigned> &FilterIds = MMI->getFilterIds();
   const std::vector<LandingPadInfo> &PadInfos = MMI->getLandingPads();
 
@@ -692,7 +683,7 @@ void DwarfException::EmitExceptionTable() {
 
   // Begin the exception table.
   Asm->OutStreamer.SwitchSection(LSDASection);
-  Asm->EmitAlignment(2, 0, 0, false);
+  Asm->EmitAlignment(2);
 
   // Emit the LSDA.
   MCSymbol *GCCETSym = 
@@ -868,7 +859,7 @@ void DwarfException::EmitExceptionTable() {
     Asm->OutStreamer.AddComment("-- Catch TypeInfos --");
     Asm->OutStreamer.AddBlankLine();
   }
-  for (std::vector<GlobalVariable *>::const_reverse_iterator
+  for (std::vector<const GlobalVariable *>::const_reverse_iterator
          I = TypeInfos.rbegin(), E = TypeInfos.rend(); I != E; ++I) {
     const GlobalVariable *GV = *I;
 
@@ -891,7 +882,7 @@ void DwarfException::EmitExceptionTable() {
     Asm->EmitULEB128(TypeID, TypeID != 0 ? "Exception specification" : 0);
   }
 
-  Asm->EmitAlignment(2, 0, 0, false);
+  Asm->EmitAlignment(2);
 }
 
 /// EndModule - Emit all exception information that should come after the
@@ -903,9 +894,7 @@ void DwarfException::EndModule() {
   if (!shouldEmitMovesModule && !shouldEmitTableModule)
     return;
 
-  TimeRegion Timer(ExceptionTimer);
-
-  const std::vector<Function *> Personalities = MMI->getPersonalities();
+  const std::vector<const Function *> Personalities = MMI->getPersonalities();
 
   for (unsigned I = 0, E = Personalities.size(); I < E; ++I)
     EmitCIE(Personalities[I], I);
@@ -918,7 +907,6 @@ void DwarfException::EndModule() {
 /// BeginFunction - Gather pre-function exception information. Assumes it's
 /// being emitted immediately after the function entry point.
 void DwarfException::BeginFunction(const MachineFunction *MF) {
-  TimeRegion Timer(ExceptionTimer);
   shouldEmitTable = shouldEmitMoves = false;
 
   // If any landing pads survive, we need an EH table.
@@ -942,7 +930,6 @@ void DwarfException::BeginFunction(const MachineFunction *MF) {
 void DwarfException::EndFunction() {
   if (!shouldEmitMoves && !shouldEmitTable) return;
 
-  TimeRegion Timer(ExceptionTimer);
   Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("eh_func_end",
                                                 Asm->getFunctionNumber()));
 
diff --git a/lib/CodeGen/AsmPrinter/DwarfException.h b/lib/CodeGen/AsmPrinter/DwarfException.h
index f35c0b616c1f..5839f8c0d4aa 100644
--- a/lib/CodeGen/AsmPrinter/DwarfException.h
+++ b/lib/CodeGen/AsmPrinter/DwarfException.h
@@ -28,7 +28,6 @@ class MachineFunction;
 class MCAsmInfo;
 class MCExpr;
 class MCSymbol;
-class Timer;
 class Function;
 class AsmPrinter;
 
@@ -82,9 +81,6 @@ class DwarfException {
   /// should be emitted.
   bool shouldEmitMovesModule;
 
-  /// ExceptionTimer - Timer for the Dwarf exception writer.
-  Timer *ExceptionTimer;
-
   /// EmitCIE - Emit a Common Information Entry (CIE). This holds information
   /// that is shared among many Frame Description Entries.  There is at least
   /// one CIE in every non-empty .debug_frame section.
diff --git a/lib/CodeGen/AsmPrinter/OcamlGCPrinter.cpp b/lib/CodeGen/AsmPrinter/OcamlGCPrinter.cpp
index 1db178f020ec..a8c3c7b21765 100644
--- a/lib/CodeGen/AsmPrinter/OcamlGCPrinter.cpp
+++ b/lib/CodeGen/AsmPrinter/OcamlGCPrinter.cpp
@@ -109,13 +109,11 @@ void OcamlGCMetadataPrinter::finishAssembly(AsmPrinter &AP) {
 
     uint64_t FrameSize = FI.getFrameSize();
     if (FrameSize >= 1<<16) {
-      std::string msg;
-      raw_string_ostream Msg(msg);
-      Msg << "Function '" << FI.getFunction().getName()
-           << "' is too large for the ocaml GC! "
-           << "Frame size " << FrameSize << " >= 65536.\n";
-      Msg << "(" << uintptr_t(&FI) << ")";
-      llvm_report_error(Msg.str()); // Very rude!
+      // Very rude!
+      report_fatal_error("Function '" + FI.getFunction().getName() +
+                         "' is too large for the ocaml GC! "
+                         "Frame size " + Twine(FrameSize) + ">= 65536.\n"
+                         "(" + Twine(uintptr_t(&FI)) + ")");
     }
 
     AP.OutStreamer.AddComment("live roots for " +
@@ -125,12 +123,10 @@ void OcamlGCMetadataPrinter::finishAssembly(AsmPrinter &AP) {
     for (GCFunctionInfo::iterator J = FI.begin(), JE = FI.end(); J != JE; ++J) {
       size_t LiveCount = FI.live_size(J);
       if (LiveCount >= 1<<16) {
-        std::string msg;
-        raw_string_ostream Msg(msg);
-        Msg << "Function '" << FI.getFunction().getName()
-             << "' is too large for the ocaml GC! "
-             << "Live root count " << LiveCount << " >= 65536.";
-        llvm_report_error(Msg.str()); // Very rude!
+        // Very rude!
+        report_fatal_error("Function '" + FI.getFunction().getName() +
+                           "' is too large for the ocaml GC! "
+                           "Live root count "+Twine(LiveCount)+" >= 65536.");
       }
 
       AP.OutStreamer.EmitSymbolValue(J->Label, IntPtrSize, 0);
diff --git a/lib/CodeGen/BranchFolding.cpp b/lib/CodeGen/BranchFolding.cpp
index 8f519407ccd4..9dec22ec78a3 100644
--- a/lib/CodeGen/BranchFolding.cpp
+++ b/lib/CodeGen/BranchFolding.cpp
@@ -264,14 +264,8 @@ static unsigned HashMachineInstr(const MachineInstr *MI) {
   return Hash;
 }
 
-/// HashEndOfMBB - Hash the last few instructions in the MBB.  For blocks
-/// with no successors, we hash two instructions, because cross-jumping
-/// only saves code when at least two instructions are removed (since a
-/// branch must be inserted).  For blocks with a successor, one of the
-/// two blocks to be tail-merged will end with a branch already, so
-/// it gains to cross-jump even for one instruction.
-static unsigned HashEndOfMBB(const MachineBasicBlock *MBB,
-                             unsigned minCommonTailLength) {
+/// HashEndOfMBB - Hash the last instruction in the MBB.
+static unsigned HashEndOfMBB(const MachineBasicBlock *MBB) {
   MachineBasicBlock::const_iterator I = MBB->end();
   if (I == MBB->begin())
     return 0;   // Empty MBB.
@@ -283,20 +277,8 @@ static unsigned HashEndOfMBB(const MachineBasicBlock *MBB,
       return 0;      // MBB empty except for debug info.
     --I;
   }
-  unsigned Hash = HashMachineInstr(I);
 
-  if (I == MBB->begin() || minCommonTailLength == 1)
-    return Hash;   // Single instr MBB.
-
-  --I;
-  while (I->isDebugValue()) {
-    if (I==MBB->begin())
-      return Hash;      // MBB with single non-debug instr.
-    --I;
-  }
-  // Hash in the second-to-last instruction.
-  Hash ^= HashMachineInstr(I) << 2;
-  return Hash;
+  return HashMachineInstr(I);
 }
 
 /// ComputeCommonTailLength - Given two machine basic blocks, compute the number
@@ -811,7 +793,7 @@ bool BranchFolder::TailMergeBlocks(MachineFunction &MF) {
   MergePotentials.clear();
   for (MachineFunction::iterator I = MF.begin(), E = MF.end(); I != E; ++I) {
     if (I->succ_empty())
-      MergePotentials.push_back(MergePotentialsElt(HashEndOfMBB(I, 2U), I));
+      MergePotentials.push_back(MergePotentialsElt(HashEndOfMBB(I), I));
   }
 
   // See if we can do any tail merging on those.
@@ -897,8 +879,7 @@ bool BranchFolder::TailMergeBlocks(MachineFunction &MF) {
               // reinsert conditional branch only, for now
               TII->InsertBranch(*PBB, (TBB == IBB) ? FBB : TBB, 0, NewCond);
           }
-          MergePotentials.push_back(MergePotentialsElt(HashEndOfMBB(PBB, 1U),
-                                                       *P));
+          MergePotentials.push_back(MergePotentialsElt(HashEndOfMBB(PBB), *P));
         }
       }
       if (MergePotentials.size() >= 2)
diff --git a/lib/CodeGen/CMakeLists.txt b/lib/CodeGen/CMakeLists.txt
index 62d18836c93e..3e38872a36d6 100644
--- a/lib/CodeGen/CMakeLists.txt
+++ b/lib/CodeGen/CMakeLists.txt
@@ -1,4 +1,5 @@
 add_llvm_library(LLVMCodeGen
+  Analysis.cpp
   AggressiveAntiDepBreaker.cpp
   BranchFolding.cpp
   CalcSpillWeights.cpp
@@ -49,6 +50,7 @@ add_llvm_library(LLVMCodeGen
   ProcessImplicitDefs.cpp
   PrologEpilogInserter.cpp
   PseudoSourceValue.cpp
+  RegAllocFast.cpp
   RegAllocLinearScan.cpp
   RegAllocLocal.cpp
   RegAllocPBQP.cpp
diff --git a/lib/CodeGen/CriticalAntiDepBreaker.cpp b/lib/CodeGen/CriticalAntiDepBreaker.cpp
index 7d3de89ada2a..759fbaaef30a 100644
--- a/lib/CodeGen/CriticalAntiDepBreaker.cpp
+++ b/lib/CodeGen/CriticalAntiDepBreaker.cpp
@@ -143,13 +143,13 @@ void CriticalAntiDepBreaker::Observe(MachineInstr *MI, unsigned Count,
 
 /// CriticalPathStep - Return the next SUnit after SU on the bottom-up
 /// critical path.
-static SDep *CriticalPathStep(SUnit *SU) {
-  SDep *Next = 0;
+static const SDep *CriticalPathStep(const SUnit *SU) {
+  const SDep *Next = 0;
   unsigned NextDepth = 0;
   // Find the predecessor edge with the greatest depth.
-  for (SUnit::pred_iterator P = SU->Preds.begin(), PE = SU->Preds.end();
+  for (SUnit::const_pred_iterator P = SU->Preds.begin(), PE = SU->Preds.end();
        P != PE; ++P) {
-    SUnit *PredSU = P->getSUnit();
+    const SUnit *PredSU = P->getSUnit();
     unsigned PredLatency = P->getLatency();
     unsigned PredTotalLatency = PredSU->getDepth() + PredLatency;
     // In the case of a latency tie, prefer an anti-dependency edge over
@@ -326,18 +326,18 @@ CriticalAntiDepBreaker::findSuitableFreeRegister(MachineInstr *MI,
 }
 
 unsigned CriticalAntiDepBreaker::
-BreakAntiDependencies(std::vector<SUnit>& SUnits,
-                      MachineBasicBlock::iterator& Begin,
-                      MachineBasicBlock::iterator& End,
+BreakAntiDependencies(const std::vector<SUnit>& SUnits,
+                      MachineBasicBlock::iterator Begin,
+                      MachineBasicBlock::iterator End,
                       unsigned InsertPosIndex) {
   // The code below assumes that there is at least one instruction,
   // so just duck out immediately if the block is empty.
   if (SUnits.empty()) return 0;
 
   // Find the node at the bottom of the critical path.
-  SUnit *Max = 0;
+  const SUnit *Max = 0;
   for (unsigned i = 0, e = SUnits.size(); i != e; ++i) {
-    SUnit *SU = &SUnits[i];
+    const SUnit *SU = &SUnits[i];
     if (!Max || SU->getDepth() + SU->Latency > Max->getDepth() + Max->Latency)
       Max = SU;
   }
@@ -357,7 +357,7 @@ BreakAntiDependencies(std::vector<SUnit>& SUnits,
 
   // Track progress along the critical path through the SUnit graph as we walk
   // the instructions.
-  SUnit *CriticalPathSU = Max;
+  const SUnit *CriticalPathSU = Max;
   MachineInstr *CriticalPathMI = CriticalPathSU->getInstr();
 
   // Consider this pattern:
@@ -429,8 +429,8 @@ BreakAntiDependencies(std::vector<SUnit>& SUnits,
     // the anti-dependencies in an instruction in order to be effective.
     unsigned AntiDepReg = 0;
     if (MI == CriticalPathMI) {
-      if (SDep *Edge = CriticalPathStep(CriticalPathSU)) {
-        SUnit *NextSU = Edge->getSUnit();
+      if (const SDep *Edge = CriticalPathStep(CriticalPathSU)) {
+        const SUnit *NextSU = Edge->getSUnit();
 
         // Only consider anti-dependence edges.
         if (Edge->getKind() == SDep::Anti) {
@@ -452,7 +452,7 @@ BreakAntiDependencies(std::vector<SUnit>& SUnits,
             // Also, if there are dependencies on other SUnits with the
             // same register as the anti-dependency, don't attempt to
             // break it.
-            for (SUnit::pred_iterator P = CriticalPathSU->Preds.begin(),
+            for (SUnit::const_pred_iterator P = CriticalPathSU->Preds.begin(),
                  PE = CriticalPathSU->Preds.end(); P != PE; ++P)
               if (P->getSUnit() == NextSU ?
                     (P->getKind() != SDep::Anti || P->getReg() != AntiDepReg) :
diff --git a/lib/CodeGen/CriticalAntiDepBreaker.h b/lib/CodeGen/CriticalAntiDepBreaker.h
index 9e8db022621a..cc42dd2b8e32 100644
--- a/lib/CodeGen/CriticalAntiDepBreaker.h
+++ b/lib/CodeGen/CriticalAntiDepBreaker.h
@@ -72,9 +72,9 @@ namespace llvm {
     /// path
     /// of the ScheduleDAG and break them by renaming registers.
     ///
-    unsigned BreakAntiDependencies(std::vector<SUnit>& SUnits,
-                                   MachineBasicBlock::iterator& Begin,
-                                   MachineBasicBlock::iterator& End,
+    unsigned BreakAntiDependencies(const std::vector<SUnit>& SUnits,
+                                   MachineBasicBlock::iterator Begin,
+                                   MachineBasicBlock::iterator End,
                                    unsigned InsertPosIndex);
 
     /// Observe - Update liveness information to account for the current
diff --git a/lib/CodeGen/DwarfEHPrepare.cpp b/lib/CodeGen/DwarfEHPrepare.cpp
index 7dbfd7d168bd..f6739f434044 100644
--- a/lib/CodeGen/DwarfEHPrepare.cpp
+++ b/lib/CodeGen/DwarfEHPrepare.cpp
@@ -34,6 +34,7 @@ STATISTIC(NumStackTempsIntroduced, "Number of stack temporaries introduced");
 
 namespace {
   class DwarfEHPrepare : public FunctionPass {
+    const TargetMachine *TM;
     const TargetLowering *TLI;
     bool CompileFast;
 
@@ -84,7 +85,7 @@ namespace {
     }
 
     /// CleanupSelectors - Any remaining eh.selector intrinsic calls which still
-    /// use the ".llvm.eh.catch.all.value" call need to convert to using it's
+    /// use the ".llvm.eh.catch.all.value" call need to convert to using its
     /// initializer instead.
     bool CleanupSelectors();
 
@@ -154,8 +155,9 @@ namespace {
 
   public:
     static char ID; // Pass identification, replacement for typeid.
-    DwarfEHPrepare(const TargetLowering *tli, bool fast) :
-      FunctionPass(&ID), TLI(tli), CompileFast(fast),
+    DwarfEHPrepare(const TargetMachine *tm, bool fast) :
+      FunctionPass(&ID), TM(tm), TLI(TM->getTargetLowering()),
+      CompileFast(fast),
       ExceptionValueIntrinsic(0), SelectorIntrinsic(0),
       URoR(0), EHCatchAllValue(0), RewindFunction(0) {}
 
@@ -180,8 +182,8 @@ namespace {
 
 char DwarfEHPrepare::ID = 0;
 
-FunctionPass *llvm::createDwarfEHPass(const TargetLowering *tli, bool fast) {
-  return new DwarfEHPrepare(tli, fast);
+FunctionPass *llvm::createDwarfEHPass(const TargetMachine *tm, bool fast) {
+  return new DwarfEHPrepare(tm, fast);
 }
 
 /// FindAllCleanupSelectors - Find all eh.selector calls that are clean-ups.
@@ -218,7 +220,7 @@ FindAllURoRInvokes(SmallPtrSet<InvokeInst*, 32> &URoRInvokes) {
 }
 
 /// CleanupSelectors - Any remaining eh.selector intrinsic calls which still use
-/// the ".llvm.eh.catch.all.value" call need to convert to using it's
+/// the ".llvm.eh.catch.all.value" call need to convert to using its
 /// initializer instead.
 bool DwarfEHPrepare::CleanupSelectors() {
   if (!EHCatchAllValue) return false;
@@ -421,7 +423,7 @@ bool DwarfEHPrepare::HandleURoRInvokes() {
 bool DwarfEHPrepare::NormalizeLandingPads() {
   bool Changed = false;
 
-  const MCAsmInfo *MAI = TLI->getTargetMachine().getMCAsmInfo();
+  const MCAsmInfo *MAI = TM->getMCAsmInfo();
   bool usingSjLjEH = MAI->getExceptionHandlingType() == ExceptionHandling::SjLj;
 
   for (Function::iterator I = F->begin(), E = F->end(); I != E; ++I) {
diff --git a/lib/CodeGen/ELFWriter.cpp b/lib/CodeGen/ELFWriter.cpp
index eda167cb54a5..b644ebeb4be5 100644
--- a/lib/CodeGen/ELFWriter.cpp
+++ b/lib/CodeGen/ELFWriter.cpp
@@ -208,7 +208,7 @@ ELFSection &ELFWriter::getDtorSection() {
 }
 
 // getTextSection - Get the text section for the specified function
-ELFSection &ELFWriter::getTextSection(Function *F) {
+ELFSection &ELFWriter::getTextSection(const Function *F) {
   const MCSectionELF *Text = 
     (const MCSectionELF *)TLOF.SectionForGlobal(F, Mang, TM);
   return getSection(Text->getSectionName(), Text->getType(), Text->getFlags());
@@ -507,7 +507,7 @@ void ELFWriter::EmitGlobalConstant(const Constant *CV, ELFSection &GblS) {
   std::string msg;
   raw_string_ostream ErrorMsg(msg);
   ErrorMsg << "Constant unimp for type: " << *CV->getType();
-  llvm_report_error(ErrorMsg.str());
+  report_fatal_error(ErrorMsg.str());
 }
 
 // ResolveConstantExpr - Resolve the constant expression until it stop
@@ -572,10 +572,8 @@ CstExprResTy ELFWriter::ResolveConstantExpr(const Constant *CV) {
   }
   }
 
-  std::string msg(CE->getOpcodeName());
-  raw_string_ostream ErrorMsg(msg);
-  ErrorMsg << ": Unsupported ConstantExpr type";
-  llvm_report_error(ErrorMsg.str());
+  report_fatal_error(CE->getOpcodeName() +
+                     StringRef(": Unsupported ConstantExpr type"));
 
   return std::make_pair(CV, 0); // silence warning
 }
diff --git a/lib/CodeGen/ELFWriter.h b/lib/CodeGen/ELFWriter.h
index b61b4848b654..db66ecc6dd83 100644
--- a/lib/CodeGen/ELFWriter.h
+++ b/lib/CodeGen/ELFWriter.h
@@ -191,7 +191,7 @@ namespace llvm {
     ELFSection &getDtorSection();
     ELFSection &getJumpTableSection();
     ELFSection &getConstantPoolSection(MachineConstantPoolEntry &CPE);
-    ELFSection &getTextSection(Function *F);
+    ELFSection &getTextSection(const Function *F);
     ELFSection &getRelocSection(ELFSection &S);
 
     // Helpers for obtaining ELF specific info.
diff --git a/lib/CodeGen/ExactHazardRecognizer.cpp b/lib/CodeGen/ExactHazardRecognizer.cpp
index 61959bba0e23..af5f2892c2f0 100644
--- a/lib/CodeGen/ExactHazardRecognizer.cpp
+++ b/lib/CodeGen/ExactHazardRecognizer.cpp
@@ -29,7 +29,7 @@ ExactHazardRecognizer(const InstrItineraryData &LItinData) :
   // Determine the maximum depth of any itinerary. This determines the
   // depth of the scoreboard. We always make the scoreboard at least 1
   // cycle deep to avoid dealing with the boundary condition.
-  ScoreboardDepth = 1;
+  unsigned ScoreboardDepth = 1;
   if (!ItinData.isEmpty()) {
     for (unsigned idx = 0; ; ++idx) {
       if (ItinData.isEndMarker(idx))
@@ -45,35 +45,27 @@ ExactHazardRecognizer(const InstrItineraryData &LItinData) :
     }
   }
 
-  Scoreboard = new unsigned[ScoreboardDepth];
-  ScoreboardHead = 0;
+  ReservedScoreboard.reset(ScoreboardDepth);
+  RequiredScoreboard.reset(ScoreboardDepth);
 
   DEBUG(dbgs() << "Using exact hazard recognizer: ScoreboardDepth = " 
                << ScoreboardDepth << '\n');
 }
 
-ExactHazardRecognizer::~ExactHazardRecognizer() {
-  delete [] Scoreboard;
-}
-
 void ExactHazardRecognizer::Reset() {
-  memset(Scoreboard, 0, ScoreboardDepth * sizeof(unsigned));
-  ScoreboardHead = 0;
+  RequiredScoreboard.reset();
+  ReservedScoreboard.reset();
 }
 
-unsigned ExactHazardRecognizer::getFutureIndex(unsigned offset) {
-  return (ScoreboardHead + offset) % ScoreboardDepth;
-}
-
-void ExactHazardRecognizer::dumpScoreboard() {
+void ExactHazardRecognizer::ScoreBoard::dump() const {
   dbgs() << "Scoreboard:\n";
-  
-  unsigned last = ScoreboardDepth - 1;
-  while ((last > 0) && (Scoreboard[getFutureIndex(last)] == 0))
+
+  unsigned last = Depth - 1;
+  while ((last > 0) && ((*this)[last] == 0))
     last--;
 
   for (unsigned i = 0; i <= last; i++) {
-    unsigned FUs = Scoreboard[getFutureIndex(i)];
+    unsigned FUs = (*this)[i];
     dbgs() << "\t";
     for (int j = 31; j >= 0; j--)
       dbgs() << ((FUs & (1 << j)) ? '1' : '0');
@@ -96,11 +88,23 @@ ExactHazardRecognizer::HazardType ExactHazardRecognizer::getHazardType(SUnit *SU
     // stage is occupied. FIXME it would be more accurate to find the
     // same unit free in all the cycles.
     for (unsigned int i = 0; i < IS->getCycles(); ++i) {
-      assert(((cycle + i) < ScoreboardDepth) && 
+      assert(((cycle + i) < RequiredScoreboard.getDepth()) &&
              "Scoreboard depth exceeded!");
-      
-      unsigned index = getFutureIndex(cycle + i);
-      unsigned freeUnits = IS->getUnits() & ~Scoreboard[index];
+
+      unsigned freeUnits = IS->getUnits();
+      switch (IS->getReservationKind()) {
+      default:
+       assert(0 && "Invalid FU reservation");
+      case InstrStage::Required:
+        // Required FUs conflict with both reserved and required ones
+        freeUnits &= ~ReservedScoreboard[cycle + i];
+        // FALLTHROUGH
+      case InstrStage::Reserved:
+        // Reserved FUs can conflict only with required ones.
+        freeUnits &= ~RequiredScoreboard[cycle + i];
+        break;
+      }
+
       if (!freeUnits) {
         DEBUG(dbgs() << "*** Hazard in cycle " << (cycle + i) << ", ");
         DEBUG(dbgs() << "SU(" << SU->NodeNum << "): ");
@@ -108,14 +112,14 @@ ExactHazardRecognizer::HazardType ExactHazardRecognizer::getHazardType(SUnit *SU
         return Hazard;
       }
     }
-    
+
     // Advance the cycle to the next stage.
     cycle += IS->getNextCycles();
   }
 
   return NoHazard;
 }
-    
+
 void ExactHazardRecognizer::EmitInstruction(SUnit *SU) {
   if (ItinData.isEmpty())
     return;
@@ -125,37 +129,52 @@ void ExactHazardRecognizer::EmitInstruction(SUnit *SU) {
   // Use the itinerary for the underlying instruction to reserve FU's
   // in the scoreboard at the appropriate future cycles.
   unsigned idx = SU->getInstr()->getDesc().getSchedClass();
-  for (const InstrStage *IS = ItinData.beginStage(idx), 
+  for (const InstrStage *IS = ItinData.beginStage(idx),
          *E = ItinData.endStage(idx); IS != E; ++IS) {
     // We must reserve one of the stage's units for every cycle the
     // stage is occupied. FIXME it would be more accurate to reserve
     // the same unit free in all the cycles.
     for (unsigned int i = 0; i < IS->getCycles(); ++i) {
-      assert(((cycle + i) < ScoreboardDepth) &&
+      assert(((cycle + i) < RequiredScoreboard.getDepth()) &&
              "Scoreboard depth exceeded!");
-      
-      unsigned index = getFutureIndex(cycle + i);
-      unsigned freeUnits = IS->getUnits() & ~Scoreboard[index];
-      
+
+      unsigned freeUnits = IS->getUnits();
+      switch (IS->getReservationKind()) {
+      default:
+       assert(0 && "Invalid FU reservation");
+      case InstrStage::Required:
+        // Required FUs conflict with both reserved and required ones
+        freeUnits &= ~ReservedScoreboard[cycle + i];
+        // FALLTHROUGH
+      case InstrStage::Reserved:
+        // Reserved FUs can conflict only with required ones.
+        freeUnits &= ~RequiredScoreboard[cycle + i];
+        break;
+      }
+
       // reduce to a single unit
       unsigned freeUnit = 0;
       do {
         freeUnit = freeUnits;
         freeUnits = freeUnit & (freeUnit - 1);
       } while (freeUnits);
-      
+
       assert(freeUnit && "No function unit available!");
-      Scoreboard[index] |= freeUnit;
+      if (IS->getReservationKind() == InstrStage::Required)
+        RequiredScoreboard[cycle + i] |= freeUnit;
+      else
+        ReservedScoreboard[cycle + i] |= freeUnit;
     }
-    
+
     // Advance the cycle to the next stage.
     cycle += IS->getNextCycles();
   }
-  
-  DEBUG(dumpScoreboard());
+
+  DEBUG(ReservedScoreboard.dump());
+  DEBUG(RequiredScoreboard.dump());
 }
-    
+
 void ExactHazardRecognizer::AdvanceCycle() {
-  Scoreboard[ScoreboardHead] = 0;
-  ScoreboardHead = getFutureIndex(1);
+  ReservedScoreboard[0] = 0; ReservedScoreboard.advance();
+  RequiredScoreboard[0] = 0; RequiredScoreboard.advance();
 }
diff --git a/lib/CodeGen/ExactHazardRecognizer.h b/lib/CodeGen/ExactHazardRecognizer.h
index 71ac979e6cd8..91c81a970fa5 100644
--- a/lib/CodeGen/ExactHazardRecognizer.h
+++ b/lib/CodeGen/ExactHazardRecognizer.h
@@ -22,35 +22,60 @@
 
 namespace llvm {
   class ExactHazardRecognizer : public ScheduleHazardRecognizer {
-    // Itinerary data for the target.
-    const InstrItineraryData &ItinData;
-
-    // Scoreboard to track function unit usage. Scoreboard[0] is a
+    // ScoreBoard to track function unit usage. ScoreBoard[0] is a
     // mask of the FUs in use in the cycle currently being
-    // schedule. Scoreboard[1] is a mask for the next cycle. The
-    // Scoreboard is used as a circular buffer with the current cycle
-    // indicated by ScoreboardHead.
-    unsigned *Scoreboard;
+    // schedule. ScoreBoard[1] is a mask for the next cycle. The
+    // ScoreBoard is used as a circular buffer with the current cycle
+    // indicated by Head.
+    class ScoreBoard {
+      unsigned *Data;
+
+      // The maximum number of cycles monitored by the Scoreboard. This
+      // value is determined based on the target itineraries to ensure
+      // that all hazards can be tracked.
+      size_t Depth;
+      // Indices into the Scoreboard that represent the current cycle.
+      size_t Head;
+    public:
+      ScoreBoard():Data(NULL), Depth(0), Head(0) { }
+      ~ScoreBoard() {
+        delete[] Data;
+      }
+
+      size_t getDepth() const { return Depth; }
+      unsigned& operator[](size_t idx) const {
+        assert(Depth && "ScoreBoard was not initialized properly!");
+
+        return Data[(Head + idx) % Depth];
+      }
 
-    // The maximum number of cycles monitored by the Scoreboard. This
-    // value is determined based on the target itineraries to ensure
-    // that all hazards can be tracked.
-    unsigned ScoreboardDepth;
+      void reset(size_t d = 1) {
+        if (Data == NULL) {
+          Depth = d;
+          Data = new unsigned[Depth];
+        }
 
-    // Indices into the Scoreboard that represent the current cycle.
-    unsigned ScoreboardHead;
+        memset(Data, 0, Depth * sizeof(Data[0]));
+        Head = 0;
+      }
 
-    // Return the scoreboard index to use for 'offset' cycles in the
-    // future. 'offset' of 0 returns ScoreboardHead.
-    unsigned getFutureIndex(unsigned offset);
+      void advance() {
+        Head = (Head + 1) % Depth;
+      }
 
-    // Print the scoreboard.
-    void dumpScoreboard();
+      // Print the scoreboard.
+      void dump() const;
+    };
+
+    // Itinerary data for the target.
+    const InstrItineraryData &ItinData;
+
+    ScoreBoard ReservedScoreboard;
+    ScoreBoard RequiredScoreboard;
 
   public:
     ExactHazardRecognizer(const InstrItineraryData &ItinData);
-    ~ExactHazardRecognizer();
-    
+
     virtual HazardType getHazardType(SUnit *SU);
     virtual void Reset();
     virtual void EmitInstruction(SUnit *SU);
diff --git a/lib/CodeGen/GCStrategy.cpp b/lib/CodeGen/GCStrategy.cpp
index 6d7cc51547d2..790cb2164897 100644
--- a/lib/CodeGen/GCStrategy.cpp
+++ b/lib/CodeGen/GCStrategy.cpp
@@ -181,9 +181,10 @@ bool LowerIntrinsics::InsertRootInitializers(Function &F, AllocaInst **Roots,
   
   for (AllocaInst **I = Roots, **E = Roots + Count; I != E; ++I)
     if (!InitedRoots.count(*I)) {
-      new StoreInst(ConstantPointerNull::get(cast<PointerType>(
-                      cast<PointerType>((*I)->getType())->getElementType())),
-                    *I, IP);
+      StoreInst* SI = new StoreInst(ConstantPointerNull::get(cast<PointerType>(
+                        cast<PointerType>((*I)->getType())->getElementType())),
+                        *I);
+      SI->insertAfter(*I);
       MadeChange = true;
     }
   
diff --git a/lib/CodeGen/IntrinsicLowering.cpp b/lib/CodeGen/IntrinsicLowering.cpp
index 87ab7ef52e54..e1c52f72ac42 100644
--- a/lib/CodeGen/IntrinsicLowering.cpp
+++ b/lib/CodeGen/IntrinsicLowering.cpp
@@ -331,15 +331,15 @@ void IntrinsicLowering::LowerIntrinsicCall(CallInst *CI) {
   IRBuilder<> Builder(CI->getParent(), CI);
   LLVMContext &Context = CI->getContext();
 
-  Function *Callee = CI->getCalledFunction();
+  const Function *Callee = CI->getCalledFunction();
   assert(Callee && "Cannot lower an indirect call!");
 
   switch (Callee->getIntrinsicID()) {
   case Intrinsic::not_intrinsic:
-    llvm_report_error("Cannot lower a call to a non-intrinsic function '"+
+    report_fatal_error("Cannot lower a call to a non-intrinsic function '"+
                       Callee->getName() + "'!");
   default:
-    llvm_report_error("Code generator does not support intrinsic function '"+
+    report_fatal_error("Code generator does not support intrinsic function '"+
                       Callee->getName()+"'!");
 
     // The setjmp/longjmp intrinsics should only exist in the code if it was
diff --git a/lib/CodeGen/LLVMTargetMachine.cpp b/lib/CodeGen/LLVMTargetMachine.cpp
index ed57f4cb101f..331dc7d4af6e 100644
--- a/lib/CodeGen/LLVMTargetMachine.cpp
+++ b/lib/CodeGen/LLVMTargetMachine.cpp
@@ -50,6 +50,9 @@ static cl::opt<bool> DisableSSC("disable-ssc", cl::Hidden,
     cl::desc("Disable Stack Slot Coloring"));
 static cl::opt<bool> DisableMachineLICM("disable-machine-licm", cl::Hidden,
     cl::desc("Disable Machine LICM"));
+static cl::opt<bool> DisablePostRAMachineLICM("disable-postra-machine-licm",
+    cl::Hidden,
+    cl::desc("Disable Machine LICM"));
 static cl::opt<bool> DisableMachineSink("disable-machine-sink", cl::Hidden,
     cl::desc("Disable Machine Sinking"));
 static cl::opt<bool> DisableLSR("disable-lsr", cl::Hidden,
@@ -245,10 +248,10 @@ bool LLVMTargetMachine::addCommonCodeGenPasses(PassManagerBase &PM,
     // pad is shared by multiple invokes and is also a target of a normal
     // edge from elsewhere.
     PM.add(createSjLjEHPass(getTargetLowering()));
-    PM.add(createDwarfEHPass(getTargetLowering(), OptLevel==CodeGenOpt::None));
+    PM.add(createDwarfEHPass(this, OptLevel==CodeGenOpt::None));
     break;
   case ExceptionHandling::Dwarf:
-    PM.add(createDwarfEHPass(getTargetLowering(), OptLevel==CodeGenOpt::None));
+    PM.add(createDwarfEHPass(this, OptLevel==CodeGenOpt::None));
     break;
   case ExceptionHandling::None:
     PM.add(createLowerInvokePass(getTargetLowering()));
@@ -337,12 +340,18 @@ bool LLVMTargetMachine::addCommonCodeGenPasses(PassManagerBase &PM,
   PM.add(createRegisterAllocator());
   printAndVerify(PM, "After Register Allocation");
 
-  // Perform stack slot coloring.
-  if (OptLevel != CodeGenOpt::None && !DisableSSC) {
+  // Perform stack slot coloring and post-ra machine LICM.
+  if (OptLevel != CodeGenOpt::None) {
     // FIXME: Re-enable coloring with register when it's capable of adding
     // kill markers.
-    PM.add(createStackSlotColoringPass(false));
-    printAndVerify(PM, "After StackSlotColoring");
+    if (!DisableSSC)
+      PM.add(createStackSlotColoringPass(false));
+
+    // Run post-ra machine LICM to hoist reloads / remats.
+    if (!DisablePostRAMachineLICM)
+      PM.add(createMachineLICMPass(false));
+
+    printAndVerify(PM, "After StackSlotColoring and postra Machine LICM");
   }
 
   // Run post-ra passes.
diff --git a/lib/CodeGen/LiveIntervalAnalysis.cpp b/lib/CodeGen/LiveIntervalAnalysis.cpp
index 23cff0786a5e..26a7190110f9 100644
--- a/lib/CodeGen/LiveIntervalAnalysis.cpp
+++ b/lib/CodeGen/LiveIntervalAnalysis.cpp
@@ -665,10 +665,11 @@ void LiveIntervals::computeIntervals() {
 
     // Track the index of the current machine instr.
     SlotIndex MIIndex = getMBBStartIdx(MBB);
-    DEBUG(dbgs() << MBB->getName() << ":\n");
+    DEBUG(dbgs() << "BB#" << MBB->getNumber()
+          << ":\t\t# derived from " << MBB->getName() << "\n");
 
     // Create intervals for live-ins to this BB first.
-    for (MachineBasicBlock::const_livein_iterator LI = MBB->livein_begin(),
+    for (MachineBasicBlock::livein_iterator LI = MBB->livein_begin(),
            LE = MBB->livein_end(); LI != LE; ++LI) {
       handleLiveInRegister(MBB, MIIndex, getOrCreateInterval(*LI));
       // Multiple live-ins can alias the same register.
@@ -1296,9 +1297,26 @@ rewriteInstructionsForSpills(const LiveInterval &li, bool TrySplit,
     MachineOperand &O = ri.getOperand();
     ++ri;
     if (MI->isDebugValue()) {
-      // Remove debug info for now.
-      O.setReg(0U);
+      // Modify DBG_VALUE now that the value is in a spill slot.
+      if (Slot != VirtRegMap::MAX_STACK_SLOT || isLoadSS) {
+        uint64_t Offset = MI->getOperand(1).getImm();
+        const MDNode *MDPtr = MI->getOperand(2).getMetadata();
+        DebugLoc DL = MI->getDebugLoc();
+        int FI = isLoadSS ? LdSlot : (int)Slot;
+        if (MachineInstr *NewDV = tii_->emitFrameIndexDebugValue(*mf_, FI,
+                                                           Offset, MDPtr, DL)) {
+          DEBUG(dbgs() << "Modifying debug info due to spill:" << "\t" << *MI);
+          ReplaceMachineInstrInMaps(MI, NewDV);
+          MachineBasicBlock *MBB = MI->getParent();
+          MBB->insert(MBB->erase(MI), NewDV);
+          continue;
+        }
+      }
+
       DEBUG(dbgs() << "Removing debug info due to spill:" << "\t" << *MI);
+      RemoveMachineInstrFromMaps(MI);
+      vrm.RemoveMachineInstrFromMaps(MI);
+      MI->eraseFromParent();
       continue;
     }
     assert(!O.isImplicit() && "Spilling register that's used as implicit use?");
@@ -2085,7 +2103,7 @@ bool LiveIntervals::spillPhysRegAroundRegDefsUses(const LiveInterval &li,
               << "constraints:\n";
           MI->print(Msg, tm_);
         }
-        llvm_report_error(Msg.str());
+        report_fatal_error(Msg.str());
       }
       for (const unsigned* AS = tri_->getSubRegisters(PReg); *AS; ++AS) {
         if (!hasInterval(*AS))
diff --git a/lib/CodeGen/LiveVariables.cpp b/lib/CodeGen/LiveVariables.cpp
index ca8ecff8e61d..079684eea079 100644
--- a/lib/CodeGen/LiveVariables.cpp
+++ b/lib/CodeGen/LiveVariables.cpp
@@ -531,7 +531,7 @@ bool LiveVariables::runOnMachineFunction(MachineFunction &mf) {
 
     // Mark live-in registers as live-in.
     SmallVector<unsigned, 4> Defs;
-    for (MachineBasicBlock::const_livein_iterator II = MBB->livein_begin(),
+    for (MachineBasicBlock::livein_iterator II = MBB->livein_begin(),
            EE = MBB->livein_end(); II != EE; ++II) {
       assert(TargetRegisterInfo::isPhysicalRegister(*II) &&
              "Cannot have a live-in virtual register!");
diff --git a/lib/CodeGen/MachineBasicBlock.cpp b/lib/CodeGen/MachineBasicBlock.cpp
index bd0ccb40f53b..eaaa1f85b563 100644
--- a/lib/CodeGen/MachineBasicBlock.cpp
+++ b/lib/CodeGen/MachineBasicBlock.cpp
@@ -191,7 +191,7 @@ void MachineBasicBlock::print(raw_ostream &OS) const {
   const TargetRegisterInfo *TRI = MF->getTarget().getRegisterInfo();  
   if (!livein_empty()) {
     OS << "    Live Ins:";
-    for (const_livein_iterator I = livein_begin(),E = livein_end(); I != E; ++I)
+    for (livein_iterator I = livein_begin(),E = livein_end(); I != E; ++I)
       OutputReg(OS, *I, TRI);
     OS << '\n';
   }
@@ -218,13 +218,14 @@ void MachineBasicBlock::print(raw_ostream &OS) const {
 }
 
 void MachineBasicBlock::removeLiveIn(unsigned Reg) {
-  livein_iterator I = std::find(livein_begin(), livein_end(), Reg);
-  assert(I != livein_end() && "Not a live in!");
+  std::vector<unsigned>::iterator I =
+    std::find(LiveIns.begin(), LiveIns.end(), Reg);
+  assert(I != LiveIns.end() && "Not a live in!");
   LiveIns.erase(I);
 }
 
 bool MachineBasicBlock::isLiveIn(unsigned Reg) const {
-  const_livein_iterator I = std::find(livein_begin(), livein_end(), Reg);
+  livein_iterator I = std::find(livein_begin(), livein_end(), Reg);
   return I != livein_end();
 }
 
diff --git a/lib/CodeGen/MachineCSE.cpp b/lib/CodeGen/MachineCSE.cpp
index 597d51d4f370..84c3d717f7f9 100644
--- a/lib/CodeGen/MachineCSE.cpp
+++ b/lib/CodeGen/MachineCSE.cpp
@@ -20,6 +20,7 @@
 #include "llvm/CodeGen/MachineRegisterInfo.h"
 #include "llvm/Analysis/AliasAnalysis.h"
 #include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/ScopedHashTable.h"
 #include "llvm/ADT/Statistic.h"
 #include "llvm/Support/Debug.h"
@@ -51,9 +52,12 @@ namespace {
     }
 
   private:
-    unsigned CurrVN;
+    typedef ScopedHashTableScope<MachineInstr*, unsigned,
+                                 MachineInstrExpressionTrait> ScopeType;
+    DenseMap<MachineBasicBlock*, ScopeType*> ScopeMap;
     ScopedHashTable<MachineInstr*, unsigned, MachineInstrExpressionTrait> VNT;
     SmallVector<MachineInstr*, 64> Exps;
+    unsigned CurrVN;
 
     bool PerformTrivialCoalescing(MachineInstr *MI, MachineBasicBlock *MBB);
     bool isPhysDefTriviallyDead(unsigned Reg,
@@ -63,7 +67,13 @@ namespace {
     bool isCSECandidate(MachineInstr *MI);
     bool isProfitableToCSE(unsigned CSReg, unsigned Reg,
                            MachineInstr *CSMI, MachineInstr *MI);
-    bool ProcessBlock(MachineDomTreeNode *Node);
+    void EnterScope(MachineBasicBlock *MBB);
+    void ExitScope(MachineBasicBlock *MBB);
+    bool ProcessBlock(MachineBasicBlock *MBB);
+    void ExitScopeIfDone(MachineDomTreeNode *Node,
+                 DenseMap<MachineDomTreeNode*, unsigned> &OpenChildren,
+                 DenseMap<MachineDomTreeNode*, MachineDomTreeNode*> &ParentMap);
+    bool PerformCSE(MachineDomTreeNode *Node);
   };
 } // end anonymous namespace
 
@@ -277,13 +287,24 @@ bool MachineCSE::isProfitableToCSE(unsigned CSReg, unsigned Reg,
   return CSBBs.count(MI->getParent());
 }
 
-bool MachineCSE::ProcessBlock(MachineDomTreeNode *Node) {
+void MachineCSE::EnterScope(MachineBasicBlock *MBB) {
+  DEBUG(dbgs() << "Entering: " << MBB->getName() << '\n');
+  ScopeType *Scope = new ScopeType(VNT);
+  ScopeMap[MBB] = Scope;
+}
+
+void MachineCSE::ExitScope(MachineBasicBlock *MBB) {
+  DEBUG(dbgs() << "Exiting: " << MBB->getName() << '\n');
+  DenseMap<MachineBasicBlock*, ScopeType*>::iterator SI = ScopeMap.find(MBB);
+  assert(SI != ScopeMap.end());
+  ScopeMap.erase(SI);
+  delete SI->second;
+}
+
+bool MachineCSE::ProcessBlock(MachineBasicBlock *MBB) {
   bool Changed = false;
 
   SmallVector<std::pair<unsigned, unsigned>, 8> CSEPairs;
-  ScopedHashTableScope<MachineInstr*, unsigned,
-    MachineInstrExpressionTrait> VNTS(VNT);
-  MachineBasicBlock *MBB = Node->getBlock();
   for (MachineBasicBlock::iterator I = MBB->begin(), E = MBB->end(); I != E; ) {
     MachineInstr *MI = &*I;
     ++I;
@@ -356,10 +377,63 @@ bool MachineCSE::ProcessBlock(MachineDomTreeNode *Node) {
     CSEPairs.clear();
   }
 
-  // Recursively call ProcessBlock with childred.
-  const std::vector<MachineDomTreeNode*> &Children = Node->getChildren();
-  for (unsigned i = 0, e = Children.size(); i != e; ++i)
-    Changed |= ProcessBlock(Children[i]);
+  return Changed;
+}
+
+/// ExitScopeIfDone - Destroy scope for the MBB that corresponds to the given
+/// dominator tree node if its a leaf or all of its children are done. Walk
+/// up the dominator tree to destroy ancestors which are now done.
+void
+MachineCSE::ExitScopeIfDone(MachineDomTreeNode *Node,
+                DenseMap<MachineDomTreeNode*, unsigned> &OpenChildren,
+                DenseMap<MachineDomTreeNode*, MachineDomTreeNode*> &ParentMap) {
+  if (OpenChildren[Node])
+    return;
+
+  // Pop scope.
+  ExitScope(Node->getBlock());
+
+  // Now traverse upwards to pop ancestors whose offsprings are all done.
+  while (MachineDomTreeNode *Parent = ParentMap[Node]) {
+    unsigned Left = --OpenChildren[Parent];
+    if (Left != 0)
+      break;
+    ExitScope(Parent->getBlock());
+    Node = Parent;
+  }
+}
+
+bool MachineCSE::PerformCSE(MachineDomTreeNode *Node) {
+  SmallVector<MachineDomTreeNode*, 32> Scopes;
+  SmallVector<MachineDomTreeNode*, 8> WorkList;
+  DenseMap<MachineDomTreeNode*, MachineDomTreeNode*> ParentMap;
+  DenseMap<MachineDomTreeNode*, unsigned> OpenChildren;
+
+  // Perform a DFS walk to determine the order of visit.
+  WorkList.push_back(Node);
+  do {
+    Node = WorkList.pop_back_val();
+    Scopes.push_back(Node);
+    const std::vector<MachineDomTreeNode*> &Children = Node->getChildren();
+    unsigned NumChildren = Children.size();
+    OpenChildren[Node] = NumChildren;
+    for (unsigned i = 0; i != NumChildren; ++i) {
+      MachineDomTreeNode *Child = Children[i];
+      ParentMap[Child] = Node;
+      WorkList.push_back(Child);
+    }
+  } while (!WorkList.empty());
+
+  // Now perform CSE.
+  bool Changed = false;
+  for (unsigned i = 0, e = Scopes.size(); i != e; ++i) {
+    MachineDomTreeNode *Node = Scopes[i];
+    MachineBasicBlock *MBB = Node->getBlock();
+    EnterScope(MBB);
+    Changed |= ProcessBlock(MBB);
+    // If it's a leaf node, it's done. Traverse upwards to pop ancestors.
+    ExitScopeIfDone(Node, OpenChildren, ParentMap);
+  }
 
   return Changed;
 }
@@ -370,5 +444,5 @@ bool MachineCSE::runOnMachineFunction(MachineFunction &MF) {
   MRI = &MF.getRegInfo();
   AA = &getAnalysis<AliasAnalysis>();
   DT = &getAnalysis<MachineDominatorTree>();
-  return ProcessBlock(DT->getRootNode());
+  return PerformCSE(DT->getRootNode());
 }
diff --git a/lib/CodeGen/MachineFunction.cpp b/lib/CodeGen/MachineFunction.cpp
index e4ed7dbac4e7..3cf10b3ac65d 100644
--- a/lib/CodeGen/MachineFunction.cpp
+++ b/lib/CodeGen/MachineFunction.cpp
@@ -51,7 +51,7 @@ void ilist_traits<MachineBasicBlock>::deleteNode(MachineBasicBlock *MBB) {
   MBB->getParent()->DeleteMachineBasicBlock(MBB);
 }
 
-MachineFunction::MachineFunction(Function *F, const TargetMachine &TM,
+MachineFunction::MachineFunction(const Function *F, const TargetMachine &TM,
                                  unsigned FunctionNum, MachineModuleInfo &mmi)
   : Fn(F), Target(TM), Ctx(mmi.getContext()), MMI(mmi) {
   if (TM.getRegisterInfo())
@@ -630,7 +630,7 @@ MachineConstantPool::~MachineConstantPool() {
 
 /// CanShareConstantPoolEntry - Test whether the given two constants
 /// can be allocated the same constant pool entry.
-static bool CanShareConstantPoolEntry(Constant *A, Constant *B,
+static bool CanShareConstantPoolEntry(const Constant *A, const Constant *B,
                                       const TargetData *TD) {
   // Handle the trivial case quickly.
   if (A == B) return true;
@@ -645,17 +645,17 @@ static bool CanShareConstantPoolEntry(Constant *A, Constant *B,
 
   // If a floating-point value and an integer value have the same encoding,
   // they can share a constant-pool entry.
-  if (ConstantFP *AFP = dyn_cast<ConstantFP>(A))
-    if (ConstantInt *BI = dyn_cast<ConstantInt>(B))
+  if (const ConstantFP *AFP = dyn_cast<ConstantFP>(A))
+    if (const ConstantInt *BI = dyn_cast<ConstantInt>(B))
       return AFP->getValueAPF().bitcastToAPInt() == BI->getValue();
-  if (ConstantFP *BFP = dyn_cast<ConstantFP>(B))
-    if (ConstantInt *AI = dyn_cast<ConstantInt>(A))
+  if (const ConstantFP *BFP = dyn_cast<ConstantFP>(B))
+    if (const ConstantInt *AI = dyn_cast<ConstantInt>(A))
       return BFP->getValueAPF().bitcastToAPInt() == AI->getValue();
 
   // Two vectors can share an entry if each pair of corresponding
   // elements could.
-  if (ConstantVector *AV = dyn_cast<ConstantVector>(A))
-    if (ConstantVector *BV = dyn_cast<ConstantVector>(B)) {
+  if (const ConstantVector *AV = dyn_cast<ConstantVector>(A))
+    if (const ConstantVector *BV = dyn_cast<ConstantVector>(B)) {
       if (AV->getType()->getNumElements() != BV->getType()->getNumElements())
         return false;
       for (unsigned i = 0, e = AV->getType()->getNumElements(); i != e; ++i)
@@ -674,7 +674,7 @@ static bool CanShareConstantPoolEntry(Constant *A, Constant *B,
 /// an existing one.  User must specify the log2 of the minimum required
 /// alignment for the object.
 ///
-unsigned MachineConstantPool::getConstantPoolIndex(Constant *C, 
+unsigned MachineConstantPool::getConstantPoolIndex(const Constant *C, 
                                                    unsigned Alignment) {
   assert(Alignment && "Alignment must be specified!");
   if (Alignment > PoolAlignment) PoolAlignment = Alignment;
diff --git a/lib/CodeGen/MachineFunctionAnalysis.cpp b/lib/CodeGen/MachineFunctionAnalysis.cpp
index 3b2eb6d388b4..07a0f45c0f48 100644
--- a/lib/CodeGen/MachineFunctionAnalysis.cpp
+++ b/lib/CodeGen/MachineFunctionAnalysis.cpp
@@ -44,7 +44,8 @@ bool MachineFunctionAnalysis::doInitialization(Module &M) {
   MachineModuleInfo *MMI = getAnalysisIfAvailable<MachineModuleInfo>();
   assert(MMI && "MMI not around yet??");
   MMI->setModule(&M);
-  NextFnNum = 1; return false;
+  NextFnNum = 0;
+  return false;
 }
 
 
diff --git a/lib/CodeGen/MachineInstr.cpp b/lib/CodeGen/MachineInstr.cpp
index 39b7fb507f8b..99b5beb13656 100644
--- a/lib/CodeGen/MachineInstr.cpp
+++ b/lib/CodeGen/MachineInstr.cpp
@@ -192,6 +192,8 @@ bool MachineOperand::isIdenticalTo(const MachineOperand &Other) const {
     return getBlockAddress() == Other.getBlockAddress();
   case MachineOperand::MO_MCSymbol:
     return getMCSymbol() == Other.getMCSymbol();
+  case MachineOperand::MO_Metadata:
+    return getMetadata() == Other.getMetadata();
   }
 }
 
@@ -409,19 +411,14 @@ void MachineInstr::addImplicitDefUseOperands() {
       addOperand(MachineOperand::CreateReg(*ImpUses, false, true));
 }
 
-/// MachineInstr ctor - This constructor create a MachineInstr and add the
-/// implicit operands. It reserves space for number of operands specified by
-/// TargetInstrDesc or the numOperands if it is not zero. (for
-/// instructions with variable number of operands).
+/// MachineInstr ctor - This constructor creates a MachineInstr and adds the
+/// implicit operands. It reserves space for the number of operands specified by
+/// the TargetInstrDesc.
 MachineInstr::MachineInstr(const TargetInstrDesc &tid, bool NoImp)
   : TID(&tid), NumImplicitOps(0), AsmPrinterFlags(0),
     MemRefs(0), MemRefsEnd(0), Parent(0) {
-  if (!NoImp && TID->getImplicitDefs())
-    for (const unsigned *ImpDefs = TID->getImplicitDefs(); *ImpDefs; ++ImpDefs)
-      NumImplicitOps++;
-  if (!NoImp && TID->getImplicitUses())
-    for (const unsigned *ImpUses = TID->getImplicitUses(); *ImpUses; ++ImpUses)
-      NumImplicitOps++;
+  if (!NoImp)
+    NumImplicitOps = TID->getNumImplicitDefs() + TID->getNumImplicitUses();
   Operands.reserve(NumImplicitOps + TID->getNumOperands());
   if (!NoImp)
     addImplicitDefUseOperands();
@@ -434,12 +431,8 @@ MachineInstr::MachineInstr(const TargetInstrDesc &tid, const DebugLoc dl,
                            bool NoImp)
   : TID(&tid), NumImplicitOps(0), AsmPrinterFlags(0), MemRefs(0), MemRefsEnd(0),
     Parent(0), debugLoc(dl) {
-  if (!NoImp && TID->getImplicitDefs())
-    for (const unsigned *ImpDefs = TID->getImplicitDefs(); *ImpDefs; ++ImpDefs)
-      NumImplicitOps++;
-  if (!NoImp && TID->getImplicitUses())
-    for (const unsigned *ImpUses = TID->getImplicitUses(); *ImpUses; ++ImpUses)
-      NumImplicitOps++;
+  if (!NoImp)
+    NumImplicitOps = TID->getNumImplicitDefs() + TID->getNumImplicitUses();
   Operands.reserve(NumImplicitOps + TID->getNumOperands());
   if (!NoImp)
     addImplicitDefUseOperands();
@@ -450,17 +443,11 @@ MachineInstr::MachineInstr(const TargetInstrDesc &tid, const DebugLoc dl,
 /// MachineInstr ctor - Work exactly the same as the ctor two above, except
 /// that the MachineInstr is created and added to the end of the specified 
 /// basic block.
-///
 MachineInstr::MachineInstr(MachineBasicBlock *MBB, const TargetInstrDesc &tid)
   : TID(&tid), NumImplicitOps(0), AsmPrinterFlags(0),
     MemRefs(0), MemRefsEnd(0), Parent(0) {
   assert(MBB && "Cannot use inserting ctor with null basic block!");
-  if (TID->ImplicitDefs)
-    for (const unsigned *ImpDefs = TID->getImplicitDefs(); *ImpDefs; ++ImpDefs)
-      NumImplicitOps++;
-  if (TID->ImplicitUses)
-    for (const unsigned *ImpUses = TID->getImplicitUses(); *ImpUses; ++ImpUses)
-      NumImplicitOps++;
+  NumImplicitOps = TID->getNumImplicitDefs() + TID->getNumImplicitUses();
   Operands.reserve(NumImplicitOps + TID->getNumOperands());
   addImplicitDefUseOperands();
   // Make sure that we get added to a machine basicblock
@@ -475,12 +462,7 @@ MachineInstr::MachineInstr(MachineBasicBlock *MBB, const DebugLoc dl,
   : TID(&tid), NumImplicitOps(0), AsmPrinterFlags(0), MemRefs(0), MemRefsEnd(0),
     Parent(0), debugLoc(dl) {
   assert(MBB && "Cannot use inserting ctor with null basic block!");
-  if (TID->ImplicitDefs)
-    for (const unsigned *ImpDefs = TID->getImplicitDefs(); *ImpDefs; ++ImpDefs)
-      NumImplicitOps++;
-  if (TID->ImplicitUses)
-    for (const unsigned *ImpUses = TID->getImplicitUses(); *ImpUses; ++ImpUses)
-      NumImplicitOps++;
+  NumImplicitOps = TID->getNumImplicitDefs() + TID->getNumImplicitUses();
   Operands.reserve(NumImplicitOps + TID->getNumOperands());
   addImplicitDefUseOperands();
   // Make sure that we get added to a machine basicblock
@@ -1123,6 +1105,19 @@ unsigned MachineInstr::isConstantValuePHI() const {
   return Reg;
 }
 
+/// allDefsAreDead - Return true if all the defs of this instruction are dead.
+///
+bool MachineInstr::allDefsAreDead() const {
+  for (unsigned i = 0, e = getNumOperands(); i < e; ++i) {
+    const MachineOperand &MO = getOperand(i);
+    if (!MO.isReg() || MO.isUse())
+      continue;
+    if (!MO.isDead())
+      return false;
+  }
+  return true;
+}
+
 void MachineInstr::dump() const {
   dbgs() << "  " << *this;
 }
@@ -1192,7 +1187,15 @@ void MachineInstr::print(raw_ostream &OS, const TargetMachine *TM) const {
       if (TOI.isOptionalDef())
         OS << "opt:";
     }
-    MO.print(OS, TM);
+    if (isDebugValue() && MO.isMetadata()) {
+      // Pretty print DBG_VALUE instructions.
+      const MDNode *MD = MO.getMetadata();
+      if (const MDString *MDS = dyn_cast<MDString>(MD->getOperand(2)))
+        OS << "!\"" << MDS->getString() << '\"';
+      else
+        MO.print(OS, TM);
+    } else
+      MO.print(OS, TM);
   }
 
   // Briefly indicate whether any call clobbers were omitted.
diff --git a/lib/CodeGen/MachineLICM.cpp b/lib/CodeGen/MachineLICM.cpp
index 0361694d6636..b2e757d8d65d 100644
--- a/lib/CodeGen/MachineLICM.cpp
+++ b/lib/CodeGen/MachineLICM.cpp
@@ -22,8 +22,8 @@
 
 #define DEBUG_TYPE "machine-licm"
 #include "llvm/CodeGen/Passes.h"
-#include "llvm/CodeGen/MachineConstantPool.h"
 #include "llvm/CodeGen/MachineDominators.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
 #include "llvm/CodeGen/MachineLoopInfo.h"
 #include "llvm/CodeGen/MachineMemOperand.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
@@ -33,6 +33,7 @@
 #include "llvm/Target/TargetMachine.h"
 #include "llvm/Analysis/AliasAnalysis.h"
 #include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/SmallSet.h"
 #include "llvm/ADT/Statistic.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/raw_ostream.h"
@@ -41,32 +42,41 @@ using namespace llvm;
 
 STATISTIC(NumHoisted, "Number of machine instructions hoisted out of loops");
 STATISTIC(NumCSEed,   "Number of hoisted machine instructions CSEed");
+STATISTIC(NumPostRAHoisted,
+          "Number of machine instructions hoisted out of loops post regalloc");
 
 namespace {
   class MachineLICM : public MachineFunctionPass {
-    MachineConstantPool *MCP;
+    bool PreRegAlloc;
+
     const TargetMachine   *TM;
     const TargetInstrInfo *TII;
     const TargetRegisterInfo *TRI;
-    BitVector AllocatableSet;
+    const MachineFrameInfo *MFI;
+    MachineRegisterInfo *RegInfo;
 
     // Various analyses that we use...
     AliasAnalysis        *AA;      // Alias analysis info.
-    MachineLoopInfo      *LI;      // Current MachineLoopInfo
+    MachineLoopInfo      *MLI;     // Current MachineLoopInfo
     MachineDominatorTree *DT;      // Machine dominator tree for the cur loop
-    MachineRegisterInfo  *RegInfo; // Machine register information
 
     // State that is updated as we process loops
     bool         Changed;          // True if a loop is changed.
-    bool         FirstInLoop;      // True if it's the first LICM in the loop.
     MachineLoop *CurLoop;          // The current loop we are working on.
     MachineBasicBlock *CurPreheader; // The preheader for CurLoop.
 
+    BitVector AllocatableSet;
+
     // For each opcode, keep a list of potentail CSE instructions.
     DenseMap<unsigned, std::vector<const MachineInstr*> > CSEMap;
+
   public:
     static char ID; // Pass identification, replacement for typeid
-    MachineLICM() : MachineFunctionPass(&ID) {}
+    MachineLICM() :
+      MachineFunctionPass(&ID), PreRegAlloc(true) {}
+
+    explicit MachineLICM(bool PreRA) :
+      MachineFunctionPass(&ID), PreRegAlloc(PreRA) {}
 
     virtual bool runOnMachineFunction(MachineFunction &MF);
 
@@ -88,6 +98,39 @@ namespace {
     }
 
   private:
+    /// CandidateInfo - Keep track of information about hoisting candidates.
+    struct CandidateInfo {
+      MachineInstr *MI;
+      unsigned      Def;
+      int           FI;
+      CandidateInfo(MachineInstr *mi, unsigned def, int fi)
+        : MI(mi), Def(def), FI(fi) {}
+    };
+
+    /// HoistRegionPostRA - Walk the specified region of the CFG and hoist loop
+    /// invariants out to the preheader.
+    void HoistRegionPostRA();
+
+    /// HoistPostRA - When an instruction is found to only use loop invariant
+    /// operands that is safe to hoist, this instruction is called to do the
+    /// dirty work.
+    void HoistPostRA(MachineInstr *MI, unsigned Def);
+
+    /// ProcessMI - Examine the instruction for potentai LICM candidate. Also
+    /// gather register def and frame object update information.
+    void ProcessMI(MachineInstr *MI, unsigned *PhysRegDefs,
+                   SmallSet<int, 32> &StoredFIs,
+                   SmallVector<CandidateInfo, 32> &Candidates);
+
+    /// AddToLiveIns - Add register 'Reg' to the livein sets of BBs in the
+    /// current loop.
+    void AddToLiveIns(unsigned Reg);
+
+    /// IsLICMCandidate - Returns true if the instruction may be a suitable
+    /// candidate for LICM. e.g. If the instruction is a call, then it's obviously
+    /// not safe to hoist it.
+    bool IsLICMCandidate(MachineInstr &I);
+
     /// IsLoopInvariantInst - Returns true if the instruction is loop
     /// invariant. I.e., all virtual register operands are defined outside of
     /// the loop, physical registers aren't accessed (explicitly or implicitly),
@@ -145,7 +188,9 @@ char MachineLICM::ID = 0;
 static RegisterPass<MachineLICM>
 X("machinelicm", "Machine Loop Invariant Code Motion");
 
-FunctionPass *llvm::createMachineLICMPass() { return new MachineLICM(); }
+FunctionPass *llvm::createMachineLICMPass(bool PreRegAlloc) {
+  return new MachineLICM(PreRegAlloc);
+}
 
 /// LoopIsOuterMostWithPreheader - Test if the given loop is the outer-most
 /// loop that has a preheader.
@@ -156,31 +201,31 @@ static bool LoopIsOuterMostWithPreheader(MachineLoop *CurLoop) {
   return true;
 }
 
-/// Hoist expressions out of the specified loop. Note, alias info for inner loop
-/// is not preserved so it is not a good idea to run LICM multiple times on one
-/// loop.
-///
 bool MachineLICM::runOnMachineFunction(MachineFunction &MF) {
-  DEBUG(dbgs() << "******** Machine LICM ********\n");
+  if (PreRegAlloc)
+    DEBUG(dbgs() << "******** Pre-regalloc Machine LICM ********\n");
+  else
+    DEBUG(dbgs() << "******** Post-regalloc Machine LICM ********\n");
 
-  Changed = FirstInLoop = false;
-  MCP = MF.getConstantPool();
+  Changed = false;
   TM = &MF.getTarget();
   TII = TM->getInstrInfo();
   TRI = TM->getRegisterInfo();
+  MFI = MF.getFrameInfo();
   RegInfo = &MF.getRegInfo();
   AllocatableSet = TRI->getAllocatableSet(MF);
 
   // Get our Loop information...
-  LI = &getAnalysis<MachineLoopInfo>();
-  DT = &getAnalysis<MachineDominatorTree>();
-  AA = &getAnalysis<AliasAnalysis>();
+  MLI = &getAnalysis<MachineLoopInfo>();
+  DT  = &getAnalysis<MachineDominatorTree>();
+  AA  = &getAnalysis<AliasAnalysis>();
 
-  for (MachineLoopInfo::iterator I = LI->begin(), E = LI->end(); I != E; ++I) {
+  for (MachineLoopInfo::iterator I = MLI->begin(), E = MLI->end(); I != E; ++I){
     CurLoop = *I;
 
-    // Only visit outer-most preheader-sporting loops.
-    if (!LoopIsOuterMostWithPreheader(CurLoop))
+    // If this is done before regalloc, only visit outer-most preheader-sporting
+    // loops.
+    if (PreRegAlloc && !LoopIsOuterMostWithPreheader(CurLoop))
       continue;
 
     // Determine the block to which to hoist instructions. If we can't find a
@@ -193,16 +238,230 @@ bool MachineLICM::runOnMachineFunction(MachineFunction &MF) {
     if (!CurPreheader)
       continue;
 
-    // CSEMap is initialized for loop header when the first instruction is
-    // being hoisted.
-    FirstInLoop = true;
-    HoistRegion(DT->getNode(CurLoop->getHeader()));
-    CSEMap.clear();
+    if (!PreRegAlloc)
+      HoistRegionPostRA();
+    else {
+      // CSEMap is initialized for loop header when the first instruction is
+      // being hoisted.
+      MachineDomTreeNode *N = DT->getNode(CurLoop->getHeader());
+      HoistRegion(N);
+      CSEMap.clear();
+    }
   }
 
   return Changed;
 }
 
+/// InstructionStoresToFI - Return true if instruction stores to the
+/// specified frame.
+static bool InstructionStoresToFI(const MachineInstr *MI, int FI) {
+  for (MachineInstr::mmo_iterator o = MI->memoperands_begin(),
+         oe = MI->memoperands_end(); o != oe; ++o) {
+    if (!(*o)->isStore() || !(*o)->getValue())
+      continue;
+    if (const FixedStackPseudoSourceValue *Value =
+        dyn_cast<const FixedStackPseudoSourceValue>((*o)->getValue())) {
+      if (Value->getFrameIndex() == FI)
+        return true;
+    }
+  }
+  return false;
+}
+
+/// ProcessMI - Examine the instruction for potentai LICM candidate. Also
+/// gather register def and frame object update information.
+void MachineLICM::ProcessMI(MachineInstr *MI,
+                            unsigned *PhysRegDefs,
+                            SmallSet<int, 32> &StoredFIs,
+                            SmallVector<CandidateInfo, 32> &Candidates) {
+  bool RuledOut = false;
+  bool HasNonInvariantUse = false;
+  unsigned Def = 0;
+  for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+    const MachineOperand &MO = MI->getOperand(i);
+    if (MO.isFI()) {
+      // Remember if the instruction stores to the frame index.
+      int FI = MO.getIndex();
+      if (!StoredFIs.count(FI) &&
+          MFI->isSpillSlotObjectIndex(FI) &&
+          InstructionStoresToFI(MI, FI))
+        StoredFIs.insert(FI);
+      HasNonInvariantUse = true;
+      continue;
+    }
+
+    if (!MO.isReg())
+      continue;
+    unsigned Reg = MO.getReg();
+    if (!Reg)
+      continue;
+    assert(TargetRegisterInfo::isPhysicalRegister(Reg) &&
+           "Not expecting virtual register!");
+
+    if (!MO.isDef()) {
+      if (Reg && PhysRegDefs[Reg])
+        // If it's using a non-loop-invariant register, then it's obviously not
+        // safe to hoist.
+        HasNonInvariantUse = true;
+      continue;
+    }
+
+    if (MO.isImplicit()) {
+      ++PhysRegDefs[Reg];
+      for (const unsigned *AS = TRI->getAliasSet(Reg); *AS; ++AS)
+        ++PhysRegDefs[*AS];
+      if (!MO.isDead())
+        // Non-dead implicit def? This cannot be hoisted.
+        RuledOut = true;
+      // No need to check if a dead implicit def is also defined by
+      // another instruction.
+      continue;
+    }
+
+    // FIXME: For now, avoid instructions with multiple defs, unless
+    // it's a dead implicit def.
+    if (Def)
+      RuledOut = true;
+    else
+      Def = Reg;
+
+    // If we have already seen another instruction that defines the same
+    // register, then this is not safe.
+    if (++PhysRegDefs[Reg] > 1)
+      // MI defined register is seen defined by another instruction in
+      // the loop, it cannot be a LICM candidate.
+      RuledOut = true;
+    for (const unsigned *AS = TRI->getAliasSet(Reg); *AS; ++AS)
+      if (++PhysRegDefs[*AS] > 1)
+        RuledOut = true;
+  }
+
+  // Only consider reloads for now and remats which do not have register
+  // operands. FIXME: Consider unfold load folding instructions.
+  if (Def && !RuledOut) {
+    int FI = INT_MIN;
+    if ((!HasNonInvariantUse && IsLICMCandidate(*MI)) ||
+        (TII->isLoadFromStackSlot(MI, FI) && MFI->isSpillSlotObjectIndex(FI)))
+      Candidates.push_back(CandidateInfo(MI, Def, FI));
+  }
+}
+
+/// HoistRegionPostRA - Walk the specified region of the CFG and hoist loop
+/// invariants out to the preheader.
+void MachineLICM::HoistRegionPostRA() {
+  unsigned NumRegs = TRI->getNumRegs();
+  unsigned *PhysRegDefs = new unsigned[NumRegs];
+  std::fill(PhysRegDefs, PhysRegDefs + NumRegs, 0);
+
+  SmallVector<CandidateInfo, 32> Candidates;
+  SmallSet<int, 32> StoredFIs;
+
+  // Walk the entire region, count number of defs for each register, and
+  // collect potential LICM candidates.
+  const std::vector<MachineBasicBlock*> Blocks = CurLoop->getBlocks();
+  for (unsigned i = 0, e = Blocks.size(); i != e; ++i) {
+    MachineBasicBlock *BB = Blocks[i];
+    // Conservatively treat live-in's as an external def.
+    // FIXME: That means a reload that're reused in successor block(s) will not
+    // be LICM'ed.
+    for (MachineBasicBlock::livein_iterator I = BB->livein_begin(),
+           E = BB->livein_end(); I != E; ++I) {
+      unsigned Reg = *I;
+      ++PhysRegDefs[Reg];
+      for (const unsigned *AS = TRI->getAliasSet(Reg); *AS; ++AS)
+        ++PhysRegDefs[*AS];
+    }
+
+    for (MachineBasicBlock::iterator
+           MII = BB->begin(), E = BB->end(); MII != E; ++MII) {
+      MachineInstr *MI = &*MII;
+      ProcessMI(MI, PhysRegDefs, StoredFIs, Candidates);
+    }
+  }
+
+  // Now evaluate whether the potential candidates qualify.
+  // 1. Check if the candidate defined register is defined by another
+  //    instruction in the loop.
+  // 2. If the candidate is a load from stack slot (always true for now),
+  //    check if the slot is stored anywhere in the loop.
+  for (unsigned i = 0, e = Candidates.size(); i != e; ++i) {
+    if (Candidates[i].FI != INT_MIN &&
+        StoredFIs.count(Candidates[i].FI))
+      continue;
+
+    if (PhysRegDefs[Candidates[i].Def] == 1) {
+      bool Safe = true;
+      MachineInstr *MI = Candidates[i].MI;
+      for (unsigned j = 0, ee = MI->getNumOperands(); j != ee; ++j) {
+        const MachineOperand &MO = MI->getOperand(j);
+        if (!MO.isReg() || MO.isDef() || !MO.getReg())
+          continue;
+        if (PhysRegDefs[MO.getReg()]) {
+          // If it's using a non-loop-invariant register, then it's obviously
+          // not safe to hoist.
+          Safe = false;
+          break;
+        }
+      }
+      if (Safe)
+        HoistPostRA(MI, Candidates[i].Def);
+    }
+  }
+
+  delete[] PhysRegDefs;
+}
+
+/// AddToLiveIns - Add register 'Reg' to the livein sets of BBs in the current
+/// loop, and make sure it is not killed by any instructions in the loop.
+void MachineLICM::AddToLiveIns(unsigned Reg) {
+  const std::vector<MachineBasicBlock*> Blocks = CurLoop->getBlocks();
+  for (unsigned i = 0, e = Blocks.size(); i != e; ++i) {
+    MachineBasicBlock *BB = Blocks[i];
+    if (!BB->isLiveIn(Reg))
+      BB->addLiveIn(Reg);
+    for (MachineBasicBlock::iterator
+           MII = BB->begin(), E = BB->end(); MII != E; ++MII) {
+      MachineInstr *MI = &*MII;
+      for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+        MachineOperand &MO = MI->getOperand(i);
+        if (!MO.isReg() || !MO.getReg() || MO.isDef()) continue;
+        if (MO.getReg() == Reg || TRI->isSuperRegister(Reg, MO.getReg()))
+          MO.setIsKill(false);
+      }
+    }
+  }
+}
+
+/// HoistPostRA - When an instruction is found to only use loop invariant
+/// operands that is safe to hoist, this instruction is called to do the
+/// dirty work.
+void MachineLICM::HoistPostRA(MachineInstr *MI, unsigned Def) {
+  // Now move the instructions to the predecessor, inserting it before any
+  // terminator instructions.
+  DEBUG({
+      dbgs() << "Hoisting " << *MI;
+      if (CurPreheader->getBasicBlock())
+        dbgs() << " to MachineBasicBlock "
+               << CurPreheader->getName();
+      if (MI->getParent()->getBasicBlock())
+        dbgs() << " from MachineBasicBlock "
+               << MI->getParent()->getName();
+      dbgs() << "\n";
+    });
+
+  // Splice the instruction to the preheader.
+  MachineBasicBlock *MBB = MI->getParent();
+  CurPreheader->splice(CurPreheader->getFirstTerminator(), MBB, MI);
+
+  // Add register to livein list to all the BBs in the current loop since a 
+  // loop invariant must be kept live throughout the whole loop. This is
+  // important to ensure later passes do not scavenge the def register.
+  AddToLiveIns(Def);
+
+  ++NumPostRAHoisted;
+  Changed = true;
+}
+
 /// HoistRegion - Walk the specified region of the CFG (defined by all blocks
 /// dominated by the specified block, and that are in the current loop) in depth
 /// first order w.r.t the DominatorTree. This allows us to visit definitions
@@ -223,17 +482,17 @@ void MachineLICM::HoistRegion(MachineDomTreeNode *N) {
   }
 
   const std::vector<MachineDomTreeNode*> &Children = N->getChildren();
-
   for (unsigned I = 0, E = Children.size(); I != E; ++I)
     HoistRegion(Children[I]);
 }
 
-/// IsLoopInvariantInst - Returns true if the instruction is loop
-/// invariant. I.e., all virtual register operands are defined outside of the
-/// loop, physical registers aren't accessed explicitly, and there are no side
-/// effects that aren't captured by the operands or other flags.
-/// 
-bool MachineLICM::IsLoopInvariantInst(MachineInstr &I) {
+/// IsLICMCandidate - Returns true if the instruction may be a suitable
+/// candidate for LICM. e.g. If the instruction is a call, then it's obviously
+/// not safe to hoist it.
+bool MachineLICM::IsLICMCandidate(MachineInstr &I) {
+  if (I.isImplicitDef())
+    return false;
+
   const TargetInstrDesc &TID = I.getDesc();
   
   // Ignore stuff that we obviously can't hoist.
@@ -251,6 +510,17 @@ bool MachineLICM::IsLoopInvariantInst(MachineInstr &I) {
       // This is a trivial form of alias analysis.
       return false;
   }
+  return true;
+}
+
+/// IsLoopInvariantInst - Returns true if the instruction is loop
+/// invariant. I.e., all virtual register operands are defined outside of the
+/// loop, physical registers aren't accessed explicitly, and there are no side
+/// effects that aren't captured by the operands or other flags.
+/// 
+bool MachineLICM::IsLoopInvariantInst(MachineInstr &I) {
+  if (!IsLICMCandidate(I))
+    return false;
 
   // The instruction is loop invariant if all of its operands are.
   for (unsigned i = 0, e = I.getNumOperands(); i != e; ++i) {
@@ -341,9 +611,6 @@ bool MachineLICM::isLoadFromConstantMemory(MachineInstr *MI) {
 /// IsProfitableToHoist - Return true if it is potentially profitable to hoist
 /// the given loop invariant.
 bool MachineLICM::IsProfitableToHoist(MachineInstr &MI) {
-  if (MI.isImplicitDef())
-    return false;
-
   // FIXME: For now, only hoist re-materilizable instructions. LICM will
   // increase register pressure. We want to make sure it doesn't increase
   // spilling.
diff --git a/lib/CodeGen/MachineModuleInfo.cpp b/lib/CodeGen/MachineModuleInfo.cpp
index f813a553670b..25284d6f5fcf 100644
--- a/lib/CodeGen/MachineModuleInfo.cpp
+++ b/lib/CodeGen/MachineModuleInfo.cpp
@@ -12,8 +12,6 @@
 #include "llvm/Constants.h"
 #include "llvm/DerivedTypes.h"
 #include "llvm/GlobalVariable.h"
-#include "llvm/Intrinsics.h"
-#include "llvm/Instructions.h"
 #include "llvm/Module.h"
 #include "llvm/Analysis/ValueTracking.h"
 #include "llvm/CodeGen/MachineFunctionPass.h"
@@ -315,18 +313,18 @@ void MachineModuleInfo::EndFunction() {
 
 /// AnalyzeModule - Scan the module for global debug information.
 ///
-void MachineModuleInfo::AnalyzeModule(Module &M) {
+void MachineModuleInfo::AnalyzeModule(const Module &M) {
   // Insert functions in the llvm.used array (but not llvm.compiler.used) into
   // UsedFunctions.
-  GlobalVariable *GV = M.getGlobalVariable("llvm.used");
+  const GlobalVariable *GV = M.getGlobalVariable("llvm.used");
   if (!GV || !GV->hasInitializer()) return;
 
   // Should be an array of 'i8*'.
-  ConstantArray *InitList = dyn_cast<ConstantArray>(GV->getInitializer());
+  const ConstantArray *InitList = dyn_cast<ConstantArray>(GV->getInitializer());
   if (InitList == 0) return;
 
   for (unsigned i = 0, e = InitList->getNumOperands(); i != e; ++i)
-    if (Function *F =
+    if (const Function *F =
           dyn_cast<Function>(InitList->getOperand(i)->stripPointerCasts()))
       UsedFunctions.insert(F);
 }
@@ -407,7 +405,7 @@ MCSymbol *MachineModuleInfo::addLandingPad(MachineBasicBlock *LandingPad) {
 /// addPersonality - Provide the personality function for the exception
 /// information.
 void MachineModuleInfo::addPersonality(MachineBasicBlock *LandingPad,
-                                       Function *Personality) {
+                                       const Function *Personality) {
   LandingPadInfo &LP = getOrCreateLandingPadInfo(LandingPad);
   LP.Personality = Personality;
 
@@ -426,7 +424,7 @@ void MachineModuleInfo::addPersonality(MachineBasicBlock *LandingPad,
 /// addCatchTypeInfo - Provide the catch typeinfo for a landing pad.
 ///
 void MachineModuleInfo::addCatchTypeInfo(MachineBasicBlock *LandingPad,
-                                        std::vector<GlobalVariable *> &TyInfo) {
+                                  std::vector<const GlobalVariable *> &TyInfo) {
   LandingPadInfo &LP = getOrCreateLandingPadInfo(LandingPad);
   for (unsigned N = TyInfo.size(); N; --N)
     LP.TypeIds.push_back(getTypeIDFor(TyInfo[N - 1]));
@@ -435,7 +433,7 @@ void MachineModuleInfo::addCatchTypeInfo(MachineBasicBlock *LandingPad,
 /// addFilterTypeInfo - Provide the filter typeinfo for a landing pad.
 ///
 void MachineModuleInfo::addFilterTypeInfo(MachineBasicBlock *LandingPad,
-                                        std::vector<GlobalVariable *> &TyInfo) {
+                                  std::vector<const GlobalVariable *> &TyInfo) {
   LandingPadInfo &LP = getOrCreateLandingPadInfo(LandingPad);
   std::vector<unsigned> IdsInFilter(TyInfo.size());
   for (unsigned I = 0, E = TyInfo.size(); I != E; ++I)
@@ -452,10 +450,12 @@ void MachineModuleInfo::addCleanup(MachineBasicBlock *LandingPad) {
 
 /// TidyLandingPads - Remap landing pad labels and remove any deleted landing
 /// pads.
-void MachineModuleInfo::TidyLandingPads() {
+void MachineModuleInfo::TidyLandingPads(DenseMap<MCSymbol*, uintptr_t> *LPMap) {
   for (unsigned i = 0; i != LandingPads.size(); ) {
     LandingPadInfo &LandingPad = LandingPads[i];
-    if (LandingPad.LandingPadLabel && !LandingPad.LandingPadLabel->isDefined())
+    if (LandingPad.LandingPadLabel &&
+        !LandingPad.LandingPadLabel->isDefined() &&
+        (!LPMap || (*LPMap)[LandingPad.LandingPadLabel] == 0))
       LandingPad.LandingPadLabel = 0;
 
     // Special case: we *should* emit LPs with null LP MBB. This indicates
@@ -468,7 +468,10 @@ void MachineModuleInfo::TidyLandingPads() {
     for (unsigned j = 0, e = LandingPads[i].BeginLabels.size(); j != e; ++j) {
       MCSymbol *BeginLabel = LandingPad.BeginLabels[j];
       MCSymbol *EndLabel = LandingPad.EndLabels[j];
-      if (BeginLabel->isDefined() && EndLabel->isDefined()) continue;
+      if ((BeginLabel->isDefined() ||
+           (LPMap && (*LPMap)[BeginLabel] != 0)) &&
+          (EndLabel->isDefined() ||
+           (LPMap && (*LPMap)[EndLabel] != 0))) continue;
       
       LandingPad.BeginLabels.erase(LandingPad.BeginLabels.begin() + j);
       LandingPad.EndLabels.erase(LandingPad.EndLabels.begin() + j);
@@ -492,7 +495,7 @@ void MachineModuleInfo::TidyLandingPads() {
 
 /// getTypeIDFor - Return the type id for the specified typeinfo.  This is
 /// function wide.
-unsigned MachineModuleInfo::getTypeIDFor(GlobalVariable *TI) {
+unsigned MachineModuleInfo::getTypeIDFor(const GlobalVariable *TI) {
   for (unsigned i = 0, N = TypeInfos.size(); i != N; ++i)
     if (TypeInfos[i] == TI) return i + 1;
 
@@ -532,7 +535,7 @@ try_next:;
 }
 
 /// getPersonality - Return the personality function for the current function.
-Function *MachineModuleInfo::getPersonality() const {
+const Function *MachineModuleInfo::getPersonality() const {
   // FIXME: Until PR1414 will be fixed, we're using 1 personality function per
   // function
   return !LandingPads.empty() ? LandingPads[0].Personality : NULL;
diff --git a/lib/CodeGen/MachineRegisterInfo.cpp b/lib/CodeGen/MachineRegisterInfo.cpp
index d9ab6773a53a..ea5ca0cad8a1 100644
--- a/lib/CodeGen/MachineRegisterInfo.cpp
+++ b/lib/CodeGen/MachineRegisterInfo.cpp
@@ -12,6 +12,9 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Support/CommandLine.h"
 using namespace llvm;
 
 MachineRegisterInfo::MachineRegisterInfo(const TargetRegisterInfo &TRI) {
@@ -130,6 +133,138 @@ bool MachineRegisterInfo::hasOneNonDBGUse(unsigned RegNo) const {
   return ++UI == use_nodbg_end();
 }
 
+bool MachineRegisterInfo::isLiveIn(unsigned Reg) const {
+  for (livein_iterator I = livein_begin(), E = livein_end(); I != E; ++I)
+    if (I->first == Reg || I->second == Reg)
+      return true;
+  return false;
+}
+
+bool MachineRegisterInfo::isLiveOut(unsigned Reg) const {
+  for (liveout_iterator I = liveout_begin(), E = liveout_end(); I != E; ++I)
+    if (*I == Reg)
+      return true;
+  return false;
+}
+
+/// getLiveInPhysReg - If VReg is a live-in virtual register, return the
+/// corresponding live-in physical register.
+unsigned MachineRegisterInfo::getLiveInPhysReg(unsigned VReg) const {
+  for (livein_iterator I = livein_begin(), E = livein_end(); I != E; ++I)
+    if (I->second == VReg)
+      return I->first;
+  return 0;
+}
+
+static cl::opt<bool>
+SchedLiveInCopies("schedule-livein-copies", cl::Hidden,
+                  cl::desc("Schedule copies of livein registers"),
+                  cl::init(false));
+
+/// EmitLiveInCopy - Emit a copy for a live in physical register. If the
+/// physical register has only a single copy use, then coalesced the copy
+/// if possible.
+static void EmitLiveInCopy(MachineBasicBlock *MBB,
+                           MachineBasicBlock::iterator &InsertPos,
+                           unsigned VirtReg, unsigned PhysReg,
+                           const TargetRegisterClass *RC,
+                           DenseMap<MachineInstr*, unsigned> &CopyRegMap,
+                           const MachineRegisterInfo &MRI,
+                           const TargetRegisterInfo &TRI,
+                           const TargetInstrInfo &TII) {
+  unsigned NumUses = 0;
+  MachineInstr *UseMI = NULL;
+  for (MachineRegisterInfo::use_iterator UI = MRI.use_begin(VirtReg),
+         UE = MRI.use_end(); UI != UE; ++UI) {
+    UseMI = &*UI;
+    if (++NumUses > 1)
+      break;
+  }
+
+  // If the number of uses is not one, or the use is not a move instruction,
+  // don't coalesce. Also, only coalesce away a virtual register to virtual
+  // register copy.
+  bool Coalesced = false;
+  unsigned SrcReg, DstReg, SrcSubReg, DstSubReg;
+  if (NumUses == 1 &&
+      TII.isMoveInstr(*UseMI, SrcReg, DstReg, SrcSubReg, DstSubReg) &&
+      TargetRegisterInfo::isVirtualRegister(DstReg)) {
+    VirtReg = DstReg;
+    Coalesced = true;
+  }
+
+  // Now find an ideal location to insert the copy.
+  MachineBasicBlock::iterator Pos = InsertPos;
+  while (Pos != MBB->begin()) {
+    MachineInstr *PrevMI = prior(Pos);
+    DenseMap<MachineInstr*, unsigned>::iterator RI = CopyRegMap.find(PrevMI);
+    // copyRegToReg might emit multiple instructions to do a copy.
+    unsigned CopyDstReg = (RI == CopyRegMap.end()) ? 0 : RI->second;
+    if (CopyDstReg && !TRI.regsOverlap(CopyDstReg, PhysReg))
+      // This is what the BB looks like right now:
+      // r1024 = mov r0
+      // ...
+      // r1    = mov r1024
+      //
+      // We want to insert "r1025 = mov r1". Inserting this copy below the
+      // move to r1024 makes it impossible for that move to be coalesced.
+      //
+      // r1025 = mov r1
+      // r1024 = mov r0
+      // ...
+      // r1    = mov 1024
+      // r2    = mov 1025
+      break; // Woot! Found a good location.
+    --Pos;
+  }
+
+  bool Emitted = TII.copyRegToReg(*MBB, Pos, VirtReg, PhysReg, RC, RC);
+  assert(Emitted && "Unable to issue a live-in copy instruction!\n");
+  (void) Emitted;
+
+  CopyRegMap.insert(std::make_pair(prior(Pos), VirtReg));
+  if (Coalesced) {
+    if (&*InsertPos == UseMI) ++InsertPos;
+    MBB->erase(UseMI);
+  }
+}
+
+/// EmitLiveInCopies - Emit copies to initialize livein virtual registers
+/// into the given entry block.
+void
+MachineRegisterInfo::EmitLiveInCopies(MachineBasicBlock *EntryMBB,
+                                      const TargetRegisterInfo &TRI,
+                                      const TargetInstrInfo &TII) {
+  if (SchedLiveInCopies) {
+    // Emit the copies at a heuristically-determined location in the block.
+    DenseMap<MachineInstr*, unsigned> CopyRegMap;
+    MachineBasicBlock::iterator InsertPos = EntryMBB->begin();
+    for (MachineRegisterInfo::livein_iterator LI = livein_begin(),
+           E = livein_end(); LI != E; ++LI)
+      if (LI->second) {
+        const TargetRegisterClass *RC = getRegClass(LI->second);
+        EmitLiveInCopy(EntryMBB, InsertPos, LI->second, LI->first,
+                       RC, CopyRegMap, *this, TRI, TII);
+      }
+  } else {
+    // Emit the copies into the top of the block.
+    for (MachineRegisterInfo::livein_iterator LI = livein_begin(),
+           E = livein_end(); LI != E; ++LI)
+      if (LI->second) {
+        const TargetRegisterClass *RC = getRegClass(LI->second);
+        bool Emitted = TII.copyRegToReg(*EntryMBB, EntryMBB->begin(),
+                                        LI->second, LI->first, RC, RC);
+        assert(Emitted && "Unable to issue a live-in copy instruction!\n");
+        (void) Emitted;
+      }
+  }
+
+  // Add function live-ins to entry block live-in set.
+  for (MachineRegisterInfo::livein_iterator I = livein_begin(),
+       E = livein_end(); I != E; ++I)
+    EntryMBB->addLiveIn(I->first);
+}
+
 #ifndef NDEBUG
 void MachineRegisterInfo::dumpUses(unsigned Reg) const {
   for (use_iterator I = use_begin(Reg), E = use_end(); I != E; ++I)
diff --git a/lib/CodeGen/MachineSSAUpdater.cpp b/lib/CodeGen/MachineSSAUpdater.cpp
index b79cdbb660de..b8996d46f940 100644
--- a/lib/CodeGen/MachineSSAUpdater.cpp
+++ b/lib/CodeGen/MachineSSAUpdater.cpp
@@ -21,34 +21,50 @@
 #include "llvm/Target/TargetRegisterInfo.h"
 #include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/SmallVector.h"
+#include "llvm/Support/AlignOf.h"
+#include "llvm/Support/Allocator.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/raw_ostream.h"
 using namespace llvm;
 
-typedef DenseMap<MachineBasicBlock*, unsigned> AvailableValsTy;
-typedef std::vector<std::pair<MachineBasicBlock*, unsigned> >
-                IncomingPredInfoTy;
+/// BBInfo - Per-basic block information used internally by MachineSSAUpdater.
+class MachineSSAUpdater::BBInfo {
+public:
+  MachineBasicBlock *BB; // Back-pointer to the corresponding block.
+  unsigned AvailableVal; // Value to use in this block.
+  BBInfo *DefBB;         // Block that defines the available value.
+  int BlkNum;            // Postorder number.
+  BBInfo *IDom;          // Immediate dominator.
+  unsigned NumPreds;     // Number of predecessor blocks.
+  BBInfo **Preds;        // Array[NumPreds] of predecessor blocks.
+  MachineInstr *PHITag;  // Marker for existing PHIs that match.
+
+  BBInfo(MachineBasicBlock *ThisBB, unsigned V)
+    : BB(ThisBB), AvailableVal(V), DefBB(V ? this : 0), BlkNum(0), IDom(0),
+      NumPreds(0), Preds(0), PHITag(0) { }
+};
+
+typedef DenseMap<MachineBasicBlock*, MachineSSAUpdater::BBInfo*> BBMapTy;
 
+typedef DenseMap<MachineBasicBlock*, unsigned> AvailableValsTy;
 static AvailableValsTy &getAvailableVals(void *AV) {
   return *static_cast<AvailableValsTy*>(AV);
 }
 
-static IncomingPredInfoTy &getIncomingPredInfo(void *IPI) {
-  return *static_cast<IncomingPredInfoTy*>(IPI);
+static BBMapTy *getBBMap(void *BM) {
+  return static_cast<BBMapTy*>(BM);
 }
 
-
 MachineSSAUpdater::MachineSSAUpdater(MachineFunction &MF,
                                      SmallVectorImpl<MachineInstr*> *NewPHI)
-  : AV(0), IPI(0), InsertedPHIs(NewPHI) {
+  : AV(0), BM(0), InsertedPHIs(NewPHI) {
   TII = MF.getTarget().getInstrInfo();
   MRI = &MF.getRegInfo();
 }
 
 MachineSSAUpdater::~MachineSSAUpdater() {
   delete &getAvailableVals(AV);
-  delete &getIncomingPredInfo(IPI);
 }
 
 /// Initialize - Reset this object to get ready for a new set of SSA
@@ -59,11 +75,6 @@ void MachineSSAUpdater::Initialize(unsigned V) {
   else
     getAvailableVals(AV).clear();
 
-  if (IPI == 0)
-    IPI = new IncomingPredInfoTy();
-  else
-    getIncomingPredInfo(IPI).clear();
-
   VR = V;
   VRC = MRI->getRegClass(VR);
 }
@@ -127,7 +138,7 @@ MachineInstr *InsertNewDef(unsigned Opcode,
   unsigned NewVR = MRI->createVirtualRegister(RC);
   return BuildMI(*BB, I, DebugLoc(), TII->get(Opcode), NewVR);
 }
-                          
+
 /// GetValueInMiddleOfBlock - Construct SSA form, materializing a value that
 /// is live in the middle of the specified block.
 ///
@@ -150,7 +161,7 @@ MachineInstr *InsertNewDef(unsigned Opcode,
 unsigned MachineSSAUpdater::GetValueInMiddleOfBlock(MachineBasicBlock *BB) {
   // If there is no definition of the renamed variable in this block, just use
   // GetValueAtEndOfBlock to do our work.
-  if (!getAvailableVals(AV).count(BB))
+  if (!HasValueForBlock(BB))
     return GetValueAtEndOfBlockInternal(BB);
 
   // If there are no predecessors, just return undef.
@@ -254,141 +265,436 @@ void MachineSSAUpdater::ReplaceRegWith(unsigned OldReg, unsigned NewReg) {
 
 /// GetValueAtEndOfBlockInternal - Check to see if AvailableVals has an entry
 /// for the specified BB and if so, return it.  If not, construct SSA form by
-/// walking predecessors inserting PHI nodes as needed until we get to a block
-/// where the value is available.
-///
+/// first calculating the required placement of PHIs and then inserting new
+/// PHIs where needed.
 unsigned MachineSSAUpdater::GetValueAtEndOfBlockInternal(MachineBasicBlock *BB){
   AvailableValsTy &AvailableVals = getAvailableVals(AV);
+  if (unsigned V = AvailableVals[BB])
+    return V;
 
-  // Query AvailableVals by doing an insertion of null.
-  std::pair<AvailableValsTy::iterator, bool> InsertRes =
-    AvailableVals.insert(std::make_pair(BB, 0));
-
-  // Handle the case when the insertion fails because we have already seen BB.
-  if (!InsertRes.second) {
-    // If the insertion failed, there are two cases.  The first case is that the
-    // value is already available for the specified block.  If we get this, just
-    // return the value.
-    if (InsertRes.first->second != 0)
-      return InsertRes.first->second;
-
-    // Otherwise, if the value we find is null, then this is the value is not
-    // known but it is being computed elsewhere in our recursion.  This means
-    // that we have a cycle.  Handle this by inserting a PHI node and returning
-    // it.  When we get back to the first instance of the recursion we will fill
-    // in the PHI node.
-    MachineBasicBlock::iterator Loc = BB->empty() ? BB->end() : BB->front();
-    MachineInstr *NewPHI = InsertNewDef(TargetOpcode::PHI, BB, Loc,
-                                        VRC, MRI,TII);
-    unsigned NewVR = NewPHI->getOperand(0).getReg();
-    InsertRes.first->second = NewVR;
-    return NewVR;
-  }
+  // Pool allocation used internally by GetValueAtEndOfBlock.
+  BumpPtrAllocator Allocator;
+  BBMapTy BBMapObj;
+  BM = &BBMapObj;
 
-  // If there are no predecessors, then we must have found an unreachable block
-  // just return 'undef'.  Since there are no predecessors, InsertRes must not
-  // be invalidated.
-  if (BB->pred_empty()) {
+  SmallVector<BBInfo*, 100> BlockList;
+  BuildBlockList(BB, &BlockList, &Allocator);
+
+  // Special case: bail out if BB is unreachable.
+  if (BlockList.size() == 0) {
+    BM = 0;
     // Insert an implicit_def to represent an undef value.
     MachineInstr *NewDef = InsertNewDef(TargetOpcode::IMPLICIT_DEF,
                                         BB, BB->getFirstTerminator(),
                                         VRC, MRI, TII);
-    return InsertRes.first->second = NewDef->getOperand(0).getReg();
+    unsigned V = NewDef->getOperand(0).getReg();
+    AvailableVals[BB] = V;
+    return V;
   }
 
-  // Okay, the value isn't in the map and we just inserted a null in the entry
-  // to indicate that we're processing the block.  Since we have no idea what
-  // value is in this block, we have to recurse through our predecessors.
-  //
-  // While we're walking our predecessors, we keep track of them in a vector,
-  // then insert a PHI node in the end if we actually need one.  We could use a
-  // smallvector here, but that would take a lot of stack space for every level
-  // of the recursion, just use IncomingPredInfo as an explicit stack.
-  IncomingPredInfoTy &IncomingPredInfo = getIncomingPredInfo(IPI);
-  unsigned FirstPredInfoEntry = IncomingPredInfo.size();
-
-  // As we're walking the predecessors, keep track of whether they are all
-  // producing the same value.  If so, this value will capture it, if not, it
-  // will get reset to null.  We distinguish the no-predecessor case explicitly
-  // below.
-  unsigned SingularValue = 0;
-  bool isFirstPred = true;
+  FindDominators(&BlockList);
+  FindPHIPlacement(&BlockList);
+  FindAvailableVals(&BlockList);
+
+  BM = 0;
+  return BBMapObj[BB]->DefBB->AvailableVal;
+}
+
+/// FindPredecessorBlocks - Put the predecessors of Info->BB into the Preds
+/// vector, set Info->NumPreds, and allocate space in Info->Preds.
+static void FindPredecessorBlocks(MachineSSAUpdater::BBInfo *Info,
+                                  SmallVectorImpl<MachineBasicBlock*> *Preds,
+                                  BumpPtrAllocator *Allocator) {
+  MachineBasicBlock *BB = Info->BB;
   for (MachineBasicBlock::pred_iterator PI = BB->pred_begin(),
-         E = BB->pred_end(); PI != E; ++PI) {
-    MachineBasicBlock *PredBB = *PI;
-    unsigned PredVal = GetValueAtEndOfBlockInternal(PredBB);
-    IncomingPredInfo.push_back(std::make_pair(PredBB, PredVal));
+         E = BB->pred_end(); PI != E; ++PI)
+    Preds->push_back(*PI);
 
-    // Compute SingularValue.
-    if (isFirstPred) {
-      SingularValue = PredVal;
-      isFirstPred = false;
-    } else if (PredVal != SingularValue)
-      SingularValue = 0;
+  Info->NumPreds = Preds->size();
+  Info->Preds = static_cast<MachineSSAUpdater::BBInfo**>
+    (Allocator->Allocate(Info->NumPreds * sizeof(MachineSSAUpdater::BBInfo*),
+                         AlignOf<MachineSSAUpdater::BBInfo*>::Alignment));
+}
+
+/// BuildBlockList - Starting from the specified basic block, traverse back
+/// through its predecessors until reaching blocks with known values.  Create
+/// BBInfo structures for the blocks and append them to the block list.
+void MachineSSAUpdater::BuildBlockList(MachineBasicBlock *BB,
+                                       BlockListTy *BlockList,
+                                       BumpPtrAllocator *Allocator) {
+  AvailableValsTy &AvailableVals = getAvailableVals(AV);
+  BBMapTy *BBMap = getBBMap(BM);
+  SmallVector<BBInfo*, 10> RootList;
+  SmallVector<BBInfo*, 64> WorkList;
+
+  BBInfo *Info = new (*Allocator) BBInfo(BB, 0);
+  (*BBMap)[BB] = Info;
+  WorkList.push_back(Info);
+
+  // Search backward from BB, creating BBInfos along the way and stopping when
+  // reaching blocks that define the value.  Record those defining blocks on
+  // the RootList.
+  SmallVector<MachineBasicBlock*, 10> Preds;
+  while (!WorkList.empty()) {
+    Info = WorkList.pop_back_val();
+    Preds.clear();
+    FindPredecessorBlocks(Info, &Preds, Allocator);
+
+    // Treat an unreachable predecessor as a definition with 'undef'.
+    if (Info->NumPreds == 0) {
+      // Insert an implicit_def to represent an undef value.
+      MachineInstr *NewDef = InsertNewDef(TargetOpcode::IMPLICIT_DEF,
+                                          Info->BB,
+                                          Info->BB->getFirstTerminator(),
+                                          VRC, MRI, TII);
+      Info->AvailableVal = NewDef->getOperand(0).getReg();
+      Info->DefBB = Info;
+      RootList.push_back(Info);
+      continue;
+    }
+
+    for (unsigned p = 0; p != Info->NumPreds; ++p) {
+      MachineBasicBlock *Pred = Preds[p];
+      // Check if BBMap already has a BBInfo for the predecessor block.
+      BBMapTy::value_type &BBMapBucket = BBMap->FindAndConstruct(Pred);
+      if (BBMapBucket.second) {
+        Info->Preds[p] = BBMapBucket.second;
+        continue;
+      }
+
+      // Create a new BBInfo for the predecessor.
+      unsigned PredVal = AvailableVals.lookup(Pred);
+      BBInfo *PredInfo = new (*Allocator) BBInfo(Pred, PredVal);
+      BBMapBucket.second = PredInfo;
+      Info->Preds[p] = PredInfo;
+
+      if (PredInfo->AvailableVal) {
+        RootList.push_back(PredInfo);
+        continue;
+      }
+      WorkList.push_back(PredInfo);
+    }
+  }
+
+  // Now that we know what blocks are backwards-reachable from the starting
+  // block, do a forward depth-first traversal to assign postorder numbers
+  // to those blocks.
+  BBInfo *PseudoEntry = new (*Allocator) BBInfo(0, 0);
+  unsigned BlkNum = 1;
+
+  // Initialize the worklist with the roots from the backward traversal.
+  while (!RootList.empty()) {
+    Info = RootList.pop_back_val();
+    Info->IDom = PseudoEntry;
+    Info->BlkNum = -1;
+    WorkList.push_back(Info);
+  }
+
+  while (!WorkList.empty()) {
+    Info = WorkList.back();
+
+    if (Info->BlkNum == -2) {
+      // All the successors have been handled; assign the postorder number.
+      Info->BlkNum = BlkNum++;
+      // If not a root, put it on the BlockList.
+      if (!Info->AvailableVal)
+        BlockList->push_back(Info);
+      WorkList.pop_back();
+      continue;
+    }
+
+    // Leave this entry on the worklist, but set its BlkNum to mark that its
+    // successors have been put on the worklist.  When it returns to the top
+    // the list, after handling its successors, it will be assigned a number.
+    Info->BlkNum = -2;
+
+    // Add unvisited successors to the work list.
+    for (MachineBasicBlock::succ_iterator SI = Info->BB->succ_begin(),
+           E = Info->BB->succ_end(); SI != E; ++SI) {
+      BBInfo *SuccInfo = (*BBMap)[*SI];
+      if (!SuccInfo || SuccInfo->BlkNum)
+        continue;
+      SuccInfo->BlkNum = -1;
+      WorkList.push_back(SuccInfo);
+    }
   }
+  PseudoEntry->BlkNum = BlkNum;
+}
 
-  /// Look up BB's entry in AvailableVals.  'InsertRes' may be invalidated.  If
-  /// this block is involved in a loop, a no-entry PHI node will have been
-  /// inserted as InsertedVal.  Otherwise, we'll still have the null we inserted
-  /// above.
-  unsigned &InsertedVal = AvailableVals[BB];
-
-  // If all the predecessor values are the same then we don't need to insert a
-  // PHI.  This is the simple and common case.
-  if (SingularValue) {
-    // If a PHI node got inserted, replace it with the singlar value and delete
-    // it.
-    if (InsertedVal) {
-      MachineInstr *OldVal = MRI->getVRegDef(InsertedVal);
-      // Be careful about dead loops.  These RAUW's also update InsertedVal.
-      assert(InsertedVal != SingularValue && "Dead loop?");
-      ReplaceRegWith(InsertedVal, SingularValue);
-      OldVal->eraseFromParent();
+/// IntersectDominators - This is the dataflow lattice "meet" operation for
+/// finding dominators.  Given two basic blocks, it walks up the dominator
+/// tree until it finds a common dominator of both.  It uses the postorder
+/// number of the blocks to determine how to do that.
+static MachineSSAUpdater::BBInfo *
+IntersectDominators(MachineSSAUpdater::BBInfo *Blk1,
+                    MachineSSAUpdater::BBInfo *Blk2) {
+  while (Blk1 != Blk2) {
+    while (Blk1->BlkNum < Blk2->BlkNum) {
+      Blk1 = Blk1->IDom;
+      if (!Blk1)
+        return Blk2;
     }
+    while (Blk2->BlkNum < Blk1->BlkNum) {
+      Blk2 = Blk2->IDom;
+      if (!Blk2)
+        return Blk1;
+    }
+  }
+  return Blk1;
+}
 
-    InsertedVal = SingularValue;
+/// FindDominators - Calculate the dominator tree for the subset of the CFG
+/// corresponding to the basic blocks on the BlockList.  This uses the
+/// algorithm from: "A Simple, Fast Dominance Algorithm" by Cooper, Harvey and
+/// Kennedy, published in Software--Practice and Experience, 2001, 4:1-10.
+/// Because the CFG subset does not include any edges leading into blocks that
+/// define the value, the results are not the usual dominator tree.  The CFG
+/// subset has a single pseudo-entry node with edges to a set of root nodes
+/// for blocks that define the value.  The dominators for this subset CFG are
+/// not the standard dominators but they are adequate for placing PHIs within
+/// the subset CFG.
+void MachineSSAUpdater::FindDominators(BlockListTy *BlockList) {
+  bool Changed;
+  do {
+    Changed = false;
+    // Iterate over the list in reverse order, i.e., forward on CFG edges.
+    for (BlockListTy::reverse_iterator I = BlockList->rbegin(),
+           E = BlockList->rend(); I != E; ++I) {
+      BBInfo *Info = *I;
+
+      // Start with the first predecessor.
+      assert(Info->NumPreds > 0 && "unreachable block");
+      BBInfo *NewIDom = Info->Preds[0];
+
+      // Iterate through the block's other predecessors.
+      for (unsigned p = 1; p != Info->NumPreds; ++p) {
+        BBInfo *Pred = Info->Preds[p];
+        NewIDom = IntersectDominators(NewIDom, Pred);
+      }
 
-    // Drop the entries we added in IncomingPredInfo to restore the stack.
-    IncomingPredInfo.erase(IncomingPredInfo.begin()+FirstPredInfoEntry,
-                           IncomingPredInfo.end());
-    return InsertedVal;
+      // Check if the IDom value has changed.
+      if (NewIDom != Info->IDom) {
+        Info->IDom = NewIDom;
+        Changed = true;
+      }
+    }
+  } while (Changed);
+}
+
+/// IsDefInDomFrontier - Search up the dominator tree from Pred to IDom for
+/// any blocks containing definitions of the value.  If one is found, then the
+/// successor of Pred is in the dominance frontier for the definition, and
+/// this function returns true.
+static bool IsDefInDomFrontier(const MachineSSAUpdater::BBInfo *Pred,
+                               const MachineSSAUpdater::BBInfo *IDom) {
+  for (; Pred != IDom; Pred = Pred->IDom) {
+    if (Pred->DefBB == Pred)
+      return true;
   }
+  return false;
+}
 
+/// FindPHIPlacement - PHIs are needed in the iterated dominance frontiers of
+/// the known definitions.  Iteratively add PHIs in the dom frontiers until
+/// nothing changes.  Along the way, keep track of the nearest dominating
+/// definitions for non-PHI blocks.
+void MachineSSAUpdater::FindPHIPlacement(BlockListTy *BlockList) {
+  bool Changed;
+  do {
+    Changed = false;
+    // Iterate over the list in reverse order, i.e., forward on CFG edges.
+    for (BlockListTy::reverse_iterator I = BlockList->rbegin(),
+           E = BlockList->rend(); I != E; ++I) {
+      BBInfo *Info = *I;
+
+      // If this block already needs a PHI, there is nothing to do here.
+      if (Info->DefBB == Info)
+        continue;
+
+      // Default to use the same def as the immediate dominator.
+      BBInfo *NewDefBB = Info->IDom->DefBB;
+      for (unsigned p = 0; p != Info->NumPreds; ++p) {
+        if (IsDefInDomFrontier(Info->Preds[p], Info->IDom)) {
+          // Need a PHI here.
+          NewDefBB = Info;
+          break;
+        }
+      }
 
-  // Otherwise, we do need a PHI: insert one now if we don't already have one.
-  MachineInstr *InsertedPHI;
-  if (InsertedVal == 0) {
-    MachineBasicBlock::iterator Loc = BB->empty() ? BB->end() : BB->front();
-    InsertedPHI = InsertNewDef(TargetOpcode::PHI, BB, Loc,
-                               VRC, MRI, TII);
-    InsertedVal = InsertedPHI->getOperand(0).getReg();
-  } else {
-    InsertedPHI = MRI->getVRegDef(InsertedVal);
+      // Check if anything changed.
+      if (NewDefBB != Info->DefBB) {
+        Info->DefBB = NewDefBB;
+        Changed = true;
+      }
+    }
+  } while (Changed);
+}
+
+/// FindAvailableVal - If this block requires a PHI, first check if an existing
+/// PHI matches the PHI placement and reaching definitions computed earlier,
+/// and if not, create a new PHI.  Visit all the block's predecessors to
+/// calculate the available value for each one and fill in the incoming values
+/// for a new PHI.
+void MachineSSAUpdater::FindAvailableVals(BlockListTy *BlockList) {
+  AvailableValsTy &AvailableVals = getAvailableVals(AV);
+
+  // Go through the worklist in forward order (i.e., backward through the CFG)
+  // and check if existing PHIs can be used.  If not, create empty PHIs where
+  // they are needed.
+  for (BlockListTy::iterator I = BlockList->begin(), E = BlockList->end();
+       I != E; ++I) {
+    BBInfo *Info = *I;
+    // Check if there needs to be a PHI in BB.
+    if (Info->DefBB != Info)
+      continue;
+
+    // Look for an existing PHI.
+    FindExistingPHI(Info->BB, BlockList);
+    if (Info->AvailableVal)
+      continue;
+
+    MachineBasicBlock::iterator Loc =
+      Info->BB->empty() ? Info->BB->end() : Info->BB->front();
+    MachineInstr *InsertedPHI = InsertNewDef(TargetOpcode::PHI, Info->BB, Loc,
+                                             VRC, MRI, TII);
+    unsigned PHI = InsertedPHI->getOperand(0).getReg();
+    Info->AvailableVal = PHI;
+    AvailableVals[Info->BB] = PHI;
   }
 
-  // Fill in all the predecessors of the PHI.
-  MachineInstrBuilder MIB(InsertedPHI);
-  for (IncomingPredInfoTy::iterator I =
-         IncomingPredInfo.begin()+FirstPredInfoEntry,
-         E = IncomingPredInfo.end(); I != E; ++I)
-    MIB.addReg(I->second).addMBB(I->first);
+  // Now go back through the worklist in reverse order to fill in the arguments
+  // for any new PHIs added in the forward traversal.
+  for (BlockListTy::reverse_iterator I = BlockList->rbegin(),
+         E = BlockList->rend(); I != E; ++I) {
+    BBInfo *Info = *I;
+
+    if (Info->DefBB != Info) {
+      // Record the available value at join nodes to speed up subsequent
+      // uses of this SSAUpdater for the same value.
+      if (Info->NumPreds > 1)
+        AvailableVals[Info->BB] = Info->DefBB->AvailableVal;
+      continue;
+    }
 
-  // Drop the entries we added in IncomingPredInfo to restore the stack.
-  IncomingPredInfo.erase(IncomingPredInfo.begin()+FirstPredInfoEntry,
-                         IncomingPredInfo.end());
+    // Check if this block contains a newly added PHI.
+    unsigned PHI = Info->AvailableVal;
+    MachineInstr *InsertedPHI = MRI->getVRegDef(PHI);
+    if (!InsertedPHI->isPHI() || InsertedPHI->getNumOperands() > 1)
+      continue;
+
+    // Iterate through the block's predecessors.
+    MachineInstrBuilder MIB(InsertedPHI);
+    for (unsigned p = 0; p != Info->NumPreds; ++p) {
+      BBInfo *PredInfo = Info->Preds[p];
+      MachineBasicBlock *Pred = PredInfo->BB;
+      // Skip to the nearest preceding definition.
+      if (PredInfo->DefBB != PredInfo)
+        PredInfo = PredInfo->DefBB;
+      MIB.addReg(PredInfo->AvailableVal).addMBB(Pred);
+    }
 
-  // See if the PHI node can be merged to a single value.  This can happen in
-  // loop cases when we get a PHI of itself and one other value.
-  if (unsigned ConstVal = InsertedPHI->isConstantValuePHI()) {
-    MRI->replaceRegWith(InsertedVal, ConstVal);
-    InsertedPHI->eraseFromParent();
-    InsertedVal = ConstVal;
-  } else {
     DEBUG(dbgs() << "  Inserted PHI: " << *InsertedPHI << "\n");
 
     // If the client wants to know about all new instructions, tell it.
     if (InsertedPHIs) InsertedPHIs->push_back(InsertedPHI);
   }
+}
 
-  return InsertedVal;
+/// FindExistingPHI - Look through the PHI nodes in a block to see if any of
+/// them match what is needed.
+void MachineSSAUpdater::FindExistingPHI(MachineBasicBlock *BB,
+                                        BlockListTy *BlockList) {
+  for (MachineBasicBlock::iterator BBI = BB->begin(), BBE = BB->end();
+       BBI != BBE && BBI->isPHI(); ++BBI) {
+    if (CheckIfPHIMatches(BBI)) {
+      RecordMatchingPHI(BBI);
+      break;
+    }
+    // Match failed: clear all the PHITag values.
+    for (BlockListTy::iterator I = BlockList->begin(), E = BlockList->end();
+         I != E; ++I)
+      (*I)->PHITag = 0;
+  }
+}
+
+/// CheckIfPHIMatches - Check if a PHI node matches the placement and values
+/// in the BBMap.
+bool MachineSSAUpdater::CheckIfPHIMatches(MachineInstr *PHI) {
+  BBMapTy *BBMap = getBBMap(BM);
+  SmallVector<MachineInstr*, 20> WorkList;
+  WorkList.push_back(PHI);
+
+  // Mark that the block containing this PHI has been visited.
+  (*BBMap)[PHI->getParent()]->PHITag = PHI;
+
+  while (!WorkList.empty()) {
+    PHI = WorkList.pop_back_val();
+
+    // Iterate through the PHI's incoming values.
+    for (unsigned i = 1, e = PHI->getNumOperands(); i != e; i += 2) {
+      unsigned IncomingVal = PHI->getOperand(i).getReg();
+      BBInfo *PredInfo = (*BBMap)[PHI->getOperand(i+1).getMBB()];
+      // Skip to the nearest preceding definition.
+      if (PredInfo->DefBB != PredInfo)
+        PredInfo = PredInfo->DefBB;
+
+      // Check if it matches the expected value.
+      if (PredInfo->AvailableVal) {
+        if (IncomingVal == PredInfo->AvailableVal)
+          continue;
+        return false;
+      }
+
+      // Check if the value is a PHI in the correct block.
+      MachineInstr *IncomingPHIVal = MRI->getVRegDef(IncomingVal);
+      if (!IncomingPHIVal->isPHI() ||
+          IncomingPHIVal->getParent() != PredInfo->BB)
+        return false;
+
+      // If this block has already been visited, check if this PHI matches.
+      if (PredInfo->PHITag) {
+        if (IncomingPHIVal == PredInfo->PHITag)
+          continue;
+        return false;
+      }
+      PredInfo->PHITag = IncomingPHIVal;
+
+      WorkList.push_back(IncomingPHIVal);
+    }
+  }
+  return true;
+}
+
+/// RecordMatchingPHI - For a PHI node that matches, record it and its input
+/// PHIs in both the BBMap and the AvailableVals mapping.
+void MachineSSAUpdater::RecordMatchingPHI(MachineInstr *PHI) {
+  BBMapTy *BBMap = getBBMap(BM);
+  AvailableValsTy &AvailableVals = getAvailableVals(AV);
+  SmallVector<MachineInstr*, 20> WorkList;
+  WorkList.push_back(PHI);
+
+  // Record this PHI.
+  MachineBasicBlock *BB = PHI->getParent();
+  AvailableVals[BB] = PHI->getOperand(0).getReg();
+  (*BBMap)[BB]->AvailableVal = PHI->getOperand(0).getReg();
+
+  while (!WorkList.empty()) {
+    PHI = WorkList.pop_back_val();
+
+    // Iterate through the PHI's incoming values.
+    for (unsigned i = 1, e = PHI->getNumOperands(); i != e; i += 2) {
+      unsigned IncomingVal = PHI->getOperand(i).getReg();
+      MachineInstr *IncomingPHIVal = MRI->getVRegDef(IncomingVal);
+      if (!IncomingPHIVal->isPHI()) continue;
+      BB = IncomingPHIVal->getParent();
+      BBInfo *Info = (*BBMap)[BB];
+      if (!Info || Info->AvailableVal)
+        continue;
+
+      // Record the PHI and add it to the worklist.
+      AvailableVals[BB] = IncomingVal;
+      Info->AvailableVal = IncomingVal;
+      WorkList.push_back(IncomingPHIVal);
+    }
+  }
 }
diff --git a/lib/CodeGen/MachineSink.cpp b/lib/CodeGen/MachineSink.cpp
index e65961901578..ef489dc55603 100644
--- a/lib/CodeGen/MachineSink.cpp
+++ b/lib/CodeGen/MachineSink.cpp
@@ -20,6 +20,7 @@
 #include "llvm/CodeGen/Passes.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
 #include "llvm/CodeGen/MachineDominators.h"
+#include "llvm/CodeGen/MachineLoopInfo.h"
 #include "llvm/Analysis/AliasAnalysis.h"
 #include "llvm/Target/TargetRegisterInfo.h"
 #include "llvm/Target/TargetInstrInfo.h"
@@ -37,6 +38,7 @@ namespace {
     const TargetRegisterInfo *TRI;
     MachineRegisterInfo  *RegInfo; // Machine register information
     MachineDominatorTree *DT;   // Machine dominator tree
+    MachineLoopInfo *LI;
     AliasAnalysis *AA;
     BitVector AllocatableSet;   // Which physregs are allocatable?
 
@@ -51,7 +53,9 @@ namespace {
       MachineFunctionPass::getAnalysisUsage(AU);
       AU.addRequired<AliasAnalysis>();
       AU.addRequired<MachineDominatorTree>();
+      AU.addRequired<MachineLoopInfo>();
       AU.addPreserved<MachineDominatorTree>();
+      AU.addPreserved<MachineLoopInfo>();
     }
   private:
     bool ProcessBlock(MachineBasicBlock &MBB);
@@ -102,6 +106,7 @@ bool MachineSinking::runOnMachineFunction(MachineFunction &MF) {
   TRI = TM.getRegisterInfo();
   RegInfo = &MF.getRegInfo();
   DT = &getAnalysis<MachineDominatorTree>();
+  LI = &getAnalysis<MachineLoopInfo>();
   AA = &getAnalysis<AliasAnalysis>();
   AllocatableSet = TRI->getAllocatableSet(MF);
 
@@ -276,8 +281,29 @@ bool MachineSinking::SinkInstruction(MachineInstr *MI, bool &SawStore) {
   // but for now we just punt.
   // FIXME: Split critical edges if not backedges.
   if (SuccToSinkTo->pred_size() > 1) {
-    DEBUG(dbgs() << " *** PUNTING: Critical edge found\n");
-    return false;
+    // We cannot sink a load across a critical edge - there may be stores in
+    // other code paths.
+    bool store = true;
+    if (!MI->isSafeToMove(TII, AA, store)) {
+      DEBUG(dbgs() << " *** PUNTING: Wont sink load along critical edge.\n");
+      return false;
+    }
+
+    // We don't want to sink across a critical edge if we don't dominate the
+    // successor. We could be introducing calculations to new code paths.
+    if (!DT->dominates(ParentBlock, SuccToSinkTo)) {
+      DEBUG(dbgs() << " *** PUNTING: Critical edge found\n");
+      return false;
+    }
+
+    // Don't sink instructions into a loop.
+    if (LI->isLoopHeader(SuccToSinkTo)) {
+      DEBUG(dbgs() << " *** PUNTING: Loop header found\n");
+      return false;
+    }
+
+    // Otherwise we are OK with sinking along a critical edge.
+    DEBUG(dbgs() << "Sinking along critical edge.\n");
   }
   
   // Determine where to insert into.  Skip phi nodes.
diff --git a/lib/CodeGen/MachineVerifier.cpp b/lib/CodeGen/MachineVerifier.cpp
index 434a1e87246e..0b75c559827b 100644
--- a/lib/CodeGen/MachineVerifier.cpp
+++ b/lib/CodeGen/MachineVerifier.cpp
@@ -279,7 +279,7 @@ bool MachineVerifier::runOnMachineFunction(MachineFunction &MF) {
   if (OutFile)
     delete OutFile;
   else if (foundErrors)
-    llvm_report_error("Found "+Twine(foundErrors)+" machine code errors.");
+    report_fatal_error("Found "+Twine(foundErrors)+" machine code errors.");
 
   // Clean up.
   regsLive.clear();
@@ -351,8 +351,8 @@ void MachineVerifier::visitMachineFunctionBefore() {
 }
 
 // Does iterator point to a and b as the first two elements?
-bool matchPair(MachineBasicBlock::const_succ_iterator i,
-               const MachineBasicBlock *a, const MachineBasicBlock *b) {
+static bool matchPair(MachineBasicBlock::const_succ_iterator i,
+                      const MachineBasicBlock *a, const MachineBasicBlock *b) {
   if (*i == a)
     return *++i == b;
   if (*i == b)
@@ -470,7 +470,7 @@ MachineVerifier::visitMachineBasicBlockBefore(const MachineBasicBlock *MBB) {
   }
 
   regsLive.clear();
-  for (MachineBasicBlock::const_livein_iterator I = MBB->livein_begin(),
+  for (MachineBasicBlock::livein_iterator I = MBB->livein_begin(),
          E = MBB->livein_end(); I != E; ++I) {
     if (!TargetRegisterInfo::isPhysicalRegister(*I)) {
       report("MBB live-in list contains non-physical register", MBB);
diff --git a/lib/CodeGen/PostRASchedulerList.cpp b/lib/CodeGen/PostRASchedulerList.cpp
index 424181c02549..d3e1295df3a3 100644
--- a/lib/CodeGen/PostRASchedulerList.cpp
+++ b/lib/CodeGen/PostRASchedulerList.cpp
@@ -46,7 +46,6 @@
 #include "llvm/Support/raw_ostream.h"
 #include "llvm/ADT/BitVector.h"
 #include "llvm/ADT/Statistic.h"
-#include <map>
 #include <set>
 using namespace llvm;
 
@@ -266,6 +265,17 @@ bool PostRAScheduler::runOnMachineFunction(MachineFunction &Fn) {
     // Initialize register live-range state for scheduling in this block.
     Scheduler.StartBlock(MBB);
 
+    // FIXME: Temporary workaround for <rdar://problem/7759363>: The post-RA
+    // scheduler has some sort of problem with DebugValue instructions that
+    // causes an assertion in LeaksContext.h to fail occasionally.  Just
+    // remove all those instructions for now.
+    for (MachineBasicBlock::iterator I = MBB->begin(), E = MBB->end();
+         I != E; ) {
+      MachineInstr *MI = &*I++;
+      if (MI->isDebugValue())
+        MI->eraseFromParent();
+    }
+
     // Schedule each sequence of instructions not interrupted by a label
     // or anything else that effectively needs to shut down scheduling.
     MachineBasicBlock::iterator Current = MBB->end();
@@ -274,7 +284,7 @@ bool PostRAScheduler::runOnMachineFunction(MachineFunction &Fn) {
       MachineInstr *MI = prior(I);
       if (isSchedulingBoundary(MI, Fn)) {
         Scheduler.Run(MBB, I, Current, CurrentCount);
-        Scheduler.EmitSchedule(0);
+        Scheduler.EmitSchedule();
         Current = MI;
         CurrentCount = Count - 1;
         Scheduler.Observe(MI, CurrentCount);
@@ -286,7 +296,7 @@ bool PostRAScheduler::runOnMachineFunction(MachineFunction &Fn) {
     assert((MBB->begin() == Current || CurrentCount != 0) &&
            "Instruction count mismatch!");
     Scheduler.Run(MBB, MBB->begin(), Current, CurrentCount);
-    Scheduler.EmitSchedule(0);
+    Scheduler.EmitSchedule();
 
     // Clean up register live-range state.
     Scheduler.FinishBlock();
diff --git a/lib/CodeGen/PrologEpilogInserter.cpp b/lib/CodeGen/PrologEpilogInserter.cpp
index 27cb566d9512..a454b627afd0 100644
--- a/lib/CodeGen/PrologEpilogInserter.cpp
+++ b/lib/CodeGen/PrologEpilogInserter.cpp
@@ -131,10 +131,10 @@ void PEI::getAnalysisUsage(AnalysisUsage &AU) const {
 /// pseudo instructions.
 void PEI::calculateCallsInformation(MachineFunction &Fn) {
   const TargetRegisterInfo *RegInfo = Fn.getTarget().getRegisterInfo();
-  MachineFrameInfo *FFI = Fn.getFrameInfo();
+  MachineFrameInfo *MFI = Fn.getFrameInfo();
 
   unsigned MaxCallFrameSize = 0;
-  bool HasCalls = FFI->hasCalls();
+  bool HasCalls = MFI->hasCalls();
 
   // Get the function call frame set-up and tear-down instruction opcode
   int FrameSetupOpcode   = RegInfo->getCallFrameSetupOpcode();
@@ -162,8 +162,8 @@ void PEI::calculateCallsInformation(MachineFunction &Fn) {
         HasCalls = true;
       }
 
-  FFI->setHasCalls(HasCalls);
-  FFI->setMaxCallFrameSize(MaxCallFrameSize);
+  MFI->setHasCalls(HasCalls);
+  MFI->setMaxCallFrameSize(MaxCallFrameSize);
 
   for (std::vector<MachineBasicBlock::iterator>::iterator
          i = FrameSDOps.begin(), e = FrameSDOps.end(); i != e; ++i) {
@@ -184,7 +184,7 @@ void PEI::calculateCallsInformation(MachineFunction &Fn) {
 void PEI::calculateCalleeSavedRegisters(MachineFunction &Fn) {
   const TargetRegisterInfo *RegInfo = Fn.getTarget().getRegisterInfo();
   const TargetFrameInfo *TFI = Fn.getTarget().getFrameInfo();
-  MachineFrameInfo *FFI = Fn.getFrameInfo();
+  MachineFrameInfo *MFI = Fn.getFrameInfo();
 
   // Get the callee saved register list...
   const unsigned *CSRegs = RegInfo->getCalleeSavedRegs(&Fn);
@@ -197,6 +197,10 @@ void PEI::calculateCalleeSavedRegisters(MachineFunction &Fn) {
   if (CSRegs == 0 || CSRegs[0] == 0)
     return;
 
+  // In Naked functions we aren't going to save any registers.
+  if (Fn.getFunction()->hasFnAttr(Attribute::Naked))
+    return;
+
   // Figure out which *callee saved* registers are modified by the current
   // function, thus needing to be saved and restored in the prolog/epilog.
   const TargetRegisterClass * const *CSRegClasses =
@@ -255,19 +259,19 @@ void PEI::calculateCalleeSavedRegisters(MachineFunction &Fn) {
       // the TargetRegisterClass if the stack alignment is smaller. Use the
       // min.
       Align = std::min(Align, StackAlign);
-      FrameIdx = FFI->CreateStackObject(RC->getSize(), Align, true);
+      FrameIdx = MFI->CreateStackObject(RC->getSize(), Align, true);
       if ((unsigned)FrameIdx < MinCSFrameIndex) MinCSFrameIndex = FrameIdx;
       if ((unsigned)FrameIdx > MaxCSFrameIndex) MaxCSFrameIndex = FrameIdx;
     } else {
       // Spill it to the stack where we must.
-      FrameIdx = FFI->CreateFixedObject(RC->getSize(), FixedSlot->Offset,
+      FrameIdx = MFI->CreateFixedObject(RC->getSize(), FixedSlot->Offset,
                                         true, false);
     }
 
     I->setFrameIdx(FrameIdx);
   }
 
-  FFI->setCalleeSavedInfo(CSI);
+  MFI->setCalleeSavedInfo(CSI);
 }
 
 /// insertCSRSpillsAndRestores - Insert spill and restore code for
@@ -275,10 +279,10 @@ void PEI::calculateCalleeSavedRegisters(MachineFunction &Fn) {
 ///
 void PEI::insertCSRSpillsAndRestores(MachineFunction &Fn) {
   // Get callee saved register information.
-  MachineFrameInfo *FFI = Fn.getFrameInfo();
-  const std::vector<CalleeSavedInfo> &CSI = FFI->getCalleeSavedInfo();
+  MachineFrameInfo *MFI = Fn.getFrameInfo();
+  const std::vector<CalleeSavedInfo> &CSI = MFI->getCalleeSavedInfo();
 
-  FFI->setCalleeSavedInfoValid(true);
+  MFI->setCalleeSavedInfoValid(true);
 
   // Early exit if no callee saved registers are modified!
   if (CSI.empty())
@@ -436,14 +440,14 @@ void PEI::insertCSRSpillsAndRestores(MachineFunction &Fn) {
 
 /// AdjustStackOffset - Helper function used to adjust the stack frame offset.
 static inline void
-AdjustStackOffset(MachineFrameInfo *FFI, int FrameIdx,
+AdjustStackOffset(MachineFrameInfo *MFI, int FrameIdx,
                   bool StackGrowsDown, int64_t &Offset,
                   unsigned &MaxAlign) {
   // If the stack grows down, add the object size to find the lowest address.
   if (StackGrowsDown)
-    Offset += FFI->getObjectSize(FrameIdx);
+    Offset += MFI->getObjectSize(FrameIdx);
 
-  unsigned Align = FFI->getObjectAlignment(FrameIdx);
+  unsigned Align = MFI->getObjectAlignment(FrameIdx);
 
   // If the alignment of this object is greater than that of the stack, then
   // increase the stack alignment to match.
@@ -453,10 +457,10 @@ AdjustStackOffset(MachineFrameInfo *FFI, int FrameIdx,
   Offset = (Offset + Align - 1) / Align * Align;
 
   if (StackGrowsDown) {
-    FFI->setObjectOffset(FrameIdx, -Offset); // Set the computed offset
+    MFI->setObjectOffset(FrameIdx, -Offset); // Set the computed offset
   } else {
-    FFI->setObjectOffset(FrameIdx, Offset);
-    Offset += FFI->getObjectSize(FrameIdx);
+    MFI->setObjectOffset(FrameIdx, Offset);
+    Offset += MFI->getObjectSize(FrameIdx);
   }
 }
 
@@ -470,7 +474,7 @@ void PEI::calculateFrameObjectOffsets(MachineFunction &Fn) {
     TFI.getStackGrowthDirection() == TargetFrameInfo::StackGrowsDown;
 
   // Loop over all of the stack objects, assigning sequential addresses...
-  MachineFrameInfo *FFI = Fn.getFrameInfo();
+  MachineFrameInfo *MFI = Fn.getFrameInfo();
 
   // Start at the beginning of the local area.
   // The Offset is the distance from the stack top in the direction
@@ -487,17 +491,17 @@ void PEI::calculateFrameObjectOffsets(MachineFunction &Fn) {
   // We currently don't support filling in holes in between fixed sized
   // objects, so we adjust 'Offset' to point to the end of last fixed sized
   // preallocated object.
-  for (int i = FFI->getObjectIndexBegin(); i != 0; ++i) {
+  for (int i = MFI->getObjectIndexBegin(); i != 0; ++i) {
     int64_t FixedOff;
     if (StackGrowsDown) {
       // The maximum distance from the stack pointer is at lower address of
       // the object -- which is given by offset. For down growing stack
       // the offset is negative, so we negate the offset to get the distance.
-      FixedOff = -FFI->getObjectOffset(i);
+      FixedOff = -MFI->getObjectOffset(i);
     } else {
       // The maximum distance from the start pointer is at the upper
       // address of the object.
-      FixedOff = FFI->getObjectOffset(i) + FFI->getObjectSize(i);
+      FixedOff = MFI->getObjectOffset(i) + MFI->getObjectSize(i);
     }
     if (FixedOff > Offset) Offset = FixedOff;
   }
@@ -506,29 +510,29 @@ void PEI::calculateFrameObjectOffsets(MachineFunction &Fn) {
   // callee saved registers.
   if (StackGrowsDown) {
     for (unsigned i = MinCSFrameIndex; i <= MaxCSFrameIndex; ++i) {
-      // If stack grows down, we need to add size of find the lowest
+      // If the stack grows down, we need to add the size to find the lowest
       // address of the object.
-      Offset += FFI->getObjectSize(i);
+      Offset += MFI->getObjectSize(i);
 
-      unsigned Align = FFI->getObjectAlignment(i);
+      unsigned Align = MFI->getObjectAlignment(i);
       // Adjust to alignment boundary
       Offset = (Offset+Align-1)/Align*Align;
 
-      FFI->setObjectOffset(i, -Offset);        // Set the computed offset
+      MFI->setObjectOffset(i, -Offset);        // Set the computed offset
     }
   } else {
     int MaxCSFI = MaxCSFrameIndex, MinCSFI = MinCSFrameIndex;
     for (int i = MaxCSFI; i >= MinCSFI ; --i) {
-      unsigned Align = FFI->getObjectAlignment(i);
+      unsigned Align = MFI->getObjectAlignment(i);
       // Adjust to alignment boundary
       Offset = (Offset+Align-1)/Align*Align;
 
-      FFI->setObjectOffset(i, Offset);
-      Offset += FFI->getObjectSize(i);
+      MFI->setObjectOffset(i, Offset);
+      Offset += MFI->getObjectSize(i);
     }
   }
 
-  unsigned MaxAlign = FFI->getMaxAlignment();
+  unsigned MaxAlign = MFI->getMaxAlignment();
 
   // Make sure the special register scavenging spill slot is closest to the
   // frame pointer if a frame pointer is required.
@@ -536,28 +540,28 @@ void PEI::calculateFrameObjectOffsets(MachineFunction &Fn) {
   if (RS && RegInfo->hasFP(Fn) && !RegInfo->needsStackRealignment(Fn)) {
     int SFI = RS->getScavengingFrameIndex();
     if (SFI >= 0)
-      AdjustStackOffset(FFI, SFI, StackGrowsDown, Offset, MaxAlign);
+      AdjustStackOffset(MFI, SFI, StackGrowsDown, Offset, MaxAlign);
   }
 
   // Make sure that the stack protector comes before the local variables on the
   // stack.
-  if (FFI->getStackProtectorIndex() >= 0)
-    AdjustStackOffset(FFI, FFI->getStackProtectorIndex(), StackGrowsDown,
+  if (MFI->getStackProtectorIndex() >= 0)
+    AdjustStackOffset(MFI, MFI->getStackProtectorIndex(), StackGrowsDown,
                       Offset, MaxAlign);
 
   // Then assign frame offsets to stack objects that are not used to spill
   // callee saved registers.
-  for (unsigned i = 0, e = FFI->getObjectIndexEnd(); i != e; ++i) {
+  for (unsigned i = 0, e = MFI->getObjectIndexEnd(); i != e; ++i) {
     if (i >= MinCSFrameIndex && i <= MaxCSFrameIndex)
       continue;
     if (RS && (int)i == RS->getScavengingFrameIndex())
       continue;
-    if (FFI->isDeadObjectIndex(i))
+    if (MFI->isDeadObjectIndex(i))
       continue;
-    if (FFI->getStackProtectorIndex() == (int)i)
+    if (MFI->getStackProtectorIndex() == (int)i)
       continue;
 
-    AdjustStackOffset(FFI, i, StackGrowsDown, Offset, MaxAlign);
+    AdjustStackOffset(MFI, i, StackGrowsDown, Offset, MaxAlign);
   }
 
   // Make sure the special register scavenging spill slot is closest to the
@@ -565,15 +569,15 @@ void PEI::calculateFrameObjectOffsets(MachineFunction &Fn) {
   if (RS && (!RegInfo->hasFP(Fn) || RegInfo->needsStackRealignment(Fn))) {
     int SFI = RS->getScavengingFrameIndex();
     if (SFI >= 0)
-      AdjustStackOffset(FFI, SFI, StackGrowsDown, Offset, MaxAlign);
+      AdjustStackOffset(MFI, SFI, StackGrowsDown, Offset, MaxAlign);
   }
 
   if (!RegInfo->targetHandlesStackFrameRounding()) {
     // If we have reserved argument space for call sites in the function
     // immediately on entry to the current function, count it as part of the
     // overall stack size.
-    if (FFI->hasCalls() && RegInfo->hasReservedCallFrame(Fn))
-      Offset += FFI->getMaxCallFrameSize();
+    if (MFI->hasCalls() && RegInfo->hasReservedCallFrame(Fn))
+      Offset += MFI->getMaxCallFrameSize();
 
     // Round up the size to a multiple of the alignment.  If the function has
     // any calls or alloca's, align to the target's StackAlignment value to
@@ -581,8 +585,8 @@ void PEI::calculateFrameObjectOffsets(MachineFunction &Fn) {
     // otherwise, for leaf functions, align to the TransientStackAlignment
     // value.
     unsigned StackAlign;
-    if (FFI->hasCalls() || FFI->hasVarSizedObjects() ||
-        (RegInfo->needsStackRealignment(Fn) && FFI->getObjectIndexEnd() != 0))
+    if (MFI->hasCalls() || MFI->hasVarSizedObjects() ||
+        (RegInfo->needsStackRealignment(Fn) && MFI->getObjectIndexEnd() != 0))
       StackAlign = TFI.getStackAlignment();
     else
       StackAlign = TFI.getTransientStackAlignment();
@@ -594,7 +598,7 @@ void PEI::calculateFrameObjectOffsets(MachineFunction &Fn) {
   }
 
   // Update frame info to pretend that this is part of the stack...
-  FFI->setStackSize(Offset - LocalAreaOffset);
+  MFI->setStackSize(Offset - LocalAreaOffset);
 }
 
 
diff --git a/lib/CodeGen/RegAllocFast.cpp b/lib/CodeGen/RegAllocFast.cpp
new file mode 100644
index 000000000000..2caf1dfa38e7
--- /dev/null
+++ b/lib/CodeGen/RegAllocFast.cpp
@@ -0,0 +1,932 @@
+//===-- RegAllocFast.cpp - A fast register allocator for debug code -------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This register allocator allocates registers to a basic block at a time,
+// attempting to keep values in registers and reusing registers as appropriate.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "regalloc"
+#include "llvm/BasicBlock.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/LiveVariables.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/CodeGen/RegAllocRegistry.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/IndexedMap.h"
+#include "llvm/ADT/SmallSet.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/STLExtras.h"
+#include <algorithm>
+using namespace llvm;
+
+STATISTIC(NumStores, "Number of stores added");
+STATISTIC(NumLoads , "Number of loads added");
+
+static RegisterRegAlloc
+  fastRegAlloc("fast", "fast register allocator", createFastRegisterAllocator);
+
+namespace {
+  class RAFast : public MachineFunctionPass {
+  public:
+    static char ID;
+    RAFast() : MachineFunctionPass(&ID), StackSlotForVirtReg(-1) {}
+  private:
+    const TargetMachine *TM;
+    MachineFunction *MF;
+    const TargetRegisterInfo *TRI;
+    const TargetInstrInfo *TII;
+
+    // StackSlotForVirtReg - Maps virtual regs to the frame index where these
+    // values are spilled.
+    IndexedMap<int, VirtReg2IndexFunctor> StackSlotForVirtReg;
+
+    // Virt2PhysRegMap - This map contains entries for each virtual register
+    // that is currently available in a physical register.
+    IndexedMap<unsigned, VirtReg2IndexFunctor> Virt2PhysRegMap;
+
+    unsigned &getVirt2PhysRegMapSlot(unsigned VirtReg) {
+      return Virt2PhysRegMap[VirtReg];
+    }
+
+    // PhysRegsUsed - This array is effectively a map, containing entries for
+    // each physical register that currently has a value (ie, it is in
+    // Virt2PhysRegMap).  The value mapped to is the virtual register
+    // corresponding to the physical register (the inverse of the
+    // Virt2PhysRegMap), or 0.  The value is set to 0 if this register is pinned
+    // because it is used by a future instruction, and to -2 if it is not
+    // allocatable.  If the entry for a physical register is -1, then the
+    // physical register is "not in the map".
+    //
+    std::vector<int> PhysRegsUsed;
+
+    // UsedInInstr - BitVector of physregs that are used in the current
+    // instruction, and so cannot be allocated.
+    BitVector UsedInInstr;
+
+    // Virt2LastUseMap - This maps each virtual register to its last use
+    // (MachineInstr*, operand index pair).
+    IndexedMap<std::pair<MachineInstr*, unsigned>, VirtReg2IndexFunctor>
+    Virt2LastUseMap;
+
+    std::pair<MachineInstr*,unsigned>& getVirtRegLastUse(unsigned Reg) {
+      assert(TargetRegisterInfo::isVirtualRegister(Reg) && "Illegal VirtReg!");
+      return Virt2LastUseMap[Reg];
+    }
+
+    // VirtRegModified - This bitset contains information about which virtual
+    // registers need to be spilled back to memory when their registers are
+    // scavenged.  If a virtual register has simply been rematerialized, there
+    // is no reason to spill it to memory when we need the register back.
+    //
+    BitVector VirtRegModified;
+
+    // UsedInMultipleBlocks - Tracks whether a particular register is used in
+    // more than one block.
+    BitVector UsedInMultipleBlocks;
+
+    void markVirtRegModified(unsigned Reg, bool Val = true) {
+      assert(TargetRegisterInfo::isVirtualRegister(Reg) && "Illegal VirtReg!");
+      Reg -= TargetRegisterInfo::FirstVirtualRegister;
+      if (Val)
+        VirtRegModified.set(Reg);
+      else
+        VirtRegModified.reset(Reg);
+    }
+
+    bool isVirtRegModified(unsigned Reg) const {
+      assert(TargetRegisterInfo::isVirtualRegister(Reg) && "Illegal VirtReg!");
+      assert(Reg - TargetRegisterInfo::FirstVirtualRegister <
+             VirtRegModified.size() && "Illegal virtual register!");
+      return VirtRegModified[Reg - TargetRegisterInfo::FirstVirtualRegister];
+    }
+
+  public:
+    virtual const char *getPassName() const {
+      return "Fast Register Allocator";
+    }
+
+    virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+      AU.setPreservesCFG();
+      AU.addRequired<LiveVariables>();
+      AU.addRequiredID(PHIEliminationID);
+      AU.addRequiredID(TwoAddressInstructionPassID);
+      MachineFunctionPass::getAnalysisUsage(AU);
+    }
+
+  private:
+    /// runOnMachineFunction - Register allocate the whole function
+    bool runOnMachineFunction(MachineFunction &Fn);
+
+    /// AllocateBasicBlock - Register allocate the specified basic block.
+    void AllocateBasicBlock(MachineBasicBlock &MBB);
+
+
+    /// areRegsEqual - This method returns true if the specified registers are
+    /// related to each other.  To do this, it checks to see if they are equal
+    /// or if the first register is in the alias set of the second register.
+    ///
+    bool areRegsEqual(unsigned R1, unsigned R2) const {
+      if (R1 == R2) return true;
+      for (const unsigned *AliasSet = TRI->getAliasSet(R2);
+           *AliasSet; ++AliasSet) {
+        if (*AliasSet == R1) return true;
+      }
+      return false;
+    }
+
+    /// getStackSpaceFor - This returns the frame index of the specified virtual
+    /// register on the stack, allocating space if necessary.
+    int getStackSpaceFor(unsigned VirtReg, const TargetRegisterClass *RC);
+
+    /// removePhysReg - This method marks the specified physical register as no
+    /// longer being in use.
+    ///
+    void removePhysReg(unsigned PhysReg);
+
+    /// spillVirtReg - This method spills the value specified by PhysReg into
+    /// the virtual register slot specified by VirtReg.  It then updates the RA
+    /// data structures to indicate the fact that PhysReg is now available.
+    ///
+    void spillVirtReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI,
+                      unsigned VirtReg, unsigned PhysReg);
+
+    /// spillPhysReg - This method spills the specified physical register into
+    /// the virtual register slot associated with it.  If OnlyVirtRegs is set to
+    /// true, then the request is ignored if the physical register does not
+    /// contain a virtual register.
+    ///
+    void spillPhysReg(MachineBasicBlock &MBB, MachineInstr *I,
+                      unsigned PhysReg, bool OnlyVirtRegs = false);
+
+    /// assignVirtToPhysReg - This method updates local state so that we know
+    /// that PhysReg is the proper container for VirtReg now.  The physical
+    /// register must not be used for anything else when this is called.
+    ///
+    void assignVirtToPhysReg(unsigned VirtReg, unsigned PhysReg);
+
+    /// isPhysRegAvailable - Return true if the specified physical register is
+    /// free and available for use.  This also includes checking to see if
+    /// aliased registers are all free...
+    ///
+    bool isPhysRegAvailable(unsigned PhysReg) const;
+
+    /// isPhysRegSpillable - Can PhysReg be freed by spilling?
+    bool isPhysRegSpillable(unsigned PhysReg) const;
+
+    /// getFreeReg - Look to see if there is a free register available in the
+    /// specified register class.  If not, return 0.
+    ///
+    unsigned getFreeReg(const TargetRegisterClass *RC);
+
+    /// getReg - Find a physical register to hold the specified virtual
+    /// register.  If all compatible physical registers are used, this method
+    /// spills the last used virtual register to the stack, and uses that
+    /// register. If NoFree is true, that means the caller knows there isn't
+    /// a free register, do not call getFreeReg().
+    unsigned getReg(MachineBasicBlock &MBB, MachineInstr *MI,
+                    unsigned VirtReg, bool NoFree = false);
+
+    /// reloadVirtReg - This method transforms the specified virtual
+    /// register use to refer to a physical register.  This method may do this
+    /// in one of several ways: if the register is available in a physical
+    /// register already, it uses that physical register.  If the value is not
+    /// in a physical register, and if there are physical registers available,
+    /// it loads it into a register: PhysReg if that is an available physical
+    /// register, otherwise any physical register of the right class.
+    /// If register pressure is high, and it is possible, it tries to fold the
+    /// load of the virtual register into the instruction itself.  It avoids
+    /// doing this if register pressure is low to improve the chance that
+    /// subsequent instructions can use the reloaded value.  This method
+    /// returns the modified instruction.
+    ///
+    MachineInstr *reloadVirtReg(MachineBasicBlock &MBB, MachineInstr *MI,
+                                unsigned OpNum, SmallSet<unsigned, 4> &RRegs,
+                                unsigned PhysReg);
+
+    void reloadPhysReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator &I,
+                       unsigned PhysReg);
+  };
+  char RAFast::ID = 0;
+}
+
+/// getStackSpaceFor - This allocates space for the specified virtual register
+/// to be held on the stack.
+int RAFast::getStackSpaceFor(unsigned VirtReg, const TargetRegisterClass *RC) {
+  // Find the location Reg would belong...
+  int SS = StackSlotForVirtReg[VirtReg];
+  if (SS != -1)
+    return SS;          // Already has space allocated?
+
+  // Allocate a new stack object for this spill location...
+  int FrameIdx = MF->getFrameInfo()->CreateSpillStackObject(RC->getSize(),
+                                                            RC->getAlignment());
+
+  // Assign the slot.
+  StackSlotForVirtReg[VirtReg] = FrameIdx;
+  return FrameIdx;
+}
+
+
+/// removePhysReg - This method marks the specified physical register as no
+/// longer being in use.
+///
+void RAFast::removePhysReg(unsigned PhysReg) {
+  PhysRegsUsed[PhysReg] = -1;      // PhyReg no longer used
+}
+
+
+/// spillVirtReg - This method spills the value specified by PhysReg into the
+/// virtual register slot specified by VirtReg.  It then updates the RA data
+/// structures to indicate the fact that PhysReg is now available.
+///
+void RAFast::spillVirtReg(MachineBasicBlock &MBB,
+                           MachineBasicBlock::iterator I,
+                           unsigned VirtReg, unsigned PhysReg) {
+  assert(VirtReg && "Spilling a physical register is illegal!"
+         " Must not have appropriate kill for the register or use exists beyond"
+         " the intended one.");
+  DEBUG(dbgs() << "  Spilling register " << TRI->getName(PhysReg)
+               << " containing %reg" << VirtReg);
+
+  if (!isVirtRegModified(VirtReg)) {
+    DEBUG(dbgs() << " which has not been modified, so no store necessary!");
+    std::pair<MachineInstr*, unsigned> &LastUse = getVirtRegLastUse(VirtReg);
+    if (LastUse.first)
+      LastUse.first->getOperand(LastUse.second).setIsKill();
+  } else {
+    // Otherwise, there is a virtual register corresponding to this physical
+    // register.  We only need to spill it into its stack slot if it has been
+    // modified.
+    const TargetRegisterClass *RC = MF->getRegInfo().getRegClass(VirtReg);
+    int FrameIndex = getStackSpaceFor(VirtReg, RC);
+    DEBUG(dbgs() << " to stack slot #" << FrameIndex);
+    // If the instruction reads the register that's spilled, (e.g. this can
+    // happen if it is a move to a physical register), then the spill
+    // instruction is not a kill.
+    bool isKill = !(I != MBB.end() && I->readsRegister(PhysReg));
+    TII->storeRegToStackSlot(MBB, I, PhysReg, isKill, FrameIndex, RC);
+    ++NumStores;   // Update statistics
+  }
+
+  getVirt2PhysRegMapSlot(VirtReg) = 0;   // VirtReg no longer available
+
+  DEBUG(dbgs() << '\n');
+  removePhysReg(PhysReg);
+}
+
+
+/// spillPhysReg - This method spills the specified physical register into the
+/// virtual register slot associated with it.  If OnlyVirtRegs is set to true,
+/// then the request is ignored if the physical register does not contain a
+/// virtual register.
+///
+void RAFast::spillPhysReg(MachineBasicBlock &MBB, MachineInstr *I,
+                           unsigned PhysReg, bool OnlyVirtRegs) {
+  if (PhysRegsUsed[PhysReg] != -1) {            // Only spill it if it's used!
+    assert(PhysRegsUsed[PhysReg] != -2 && "Non allocable reg used!");
+    if (PhysRegsUsed[PhysReg] || !OnlyVirtRegs)
+      spillVirtReg(MBB, I, PhysRegsUsed[PhysReg], PhysReg);
+    return;
+  }
+
+  // If the selected register aliases any other registers, we must make
+  // sure that one of the aliases isn't alive.
+  for (const unsigned *AliasSet = TRI->getAliasSet(PhysReg);
+       *AliasSet; ++AliasSet) {
+    if (PhysRegsUsed[*AliasSet] == -1 ||     // Spill aliased register.
+        PhysRegsUsed[*AliasSet] == -2)       // If allocatable.
+      continue;
+
+    if (PhysRegsUsed[*AliasSet])
+      spillVirtReg(MBB, I, PhysRegsUsed[*AliasSet], *AliasSet);
+  }
+}
+
+
+/// assignVirtToPhysReg - This method updates local state so that we know
+/// that PhysReg is the proper container for VirtReg now.  The physical
+/// register must not be used for anything else when this is called.
+///
+void RAFast::assignVirtToPhysReg(unsigned VirtReg, unsigned PhysReg) {
+  assert(PhysRegsUsed[PhysReg] == -1 && "Phys reg already assigned!");
+  // Update information to note the fact that this register was just used, and
+  // it holds VirtReg.
+  PhysRegsUsed[PhysReg] = VirtReg;
+  getVirt2PhysRegMapSlot(VirtReg) = PhysReg;
+  UsedInInstr.set(PhysReg);
+}
+
+
+/// isPhysRegAvailable - Return true if the specified physical register is free
+/// and available for use.  This also includes checking to see if aliased
+/// registers are all free...
+///
+bool RAFast::isPhysRegAvailable(unsigned PhysReg) const {
+  if (PhysRegsUsed[PhysReg] != -1) return false;
+
+  // If the selected register aliases any other allocated registers, it is
+  // not free!
+  for (const unsigned *AliasSet = TRI->getAliasSet(PhysReg);
+       *AliasSet; ++AliasSet)
+    if (PhysRegsUsed[*AliasSet] >= 0) // Aliased register in use?
+      return false;                    // Can't use this reg then.
+  return true;
+}
+
+/// isPhysRegSpillable - Return true if the specified physical register can be
+/// spilled for use in the current instruction.
+///
+bool RAFast::isPhysRegSpillable(unsigned PhysReg) const {
+  // Test that PhysReg and all aliases are either free or assigned to a VirtReg
+  // that is not used in the instruction.
+  if (PhysRegsUsed[PhysReg] != -1 &&
+      (PhysRegsUsed[PhysReg] <= 0 || UsedInInstr.test(PhysReg)))
+    return false;
+
+  for (const unsigned *AliasSet = TRI->getAliasSet(PhysReg);
+       *AliasSet; ++AliasSet)
+    if (PhysRegsUsed[*AliasSet] != -1 &&
+        (PhysRegsUsed[*AliasSet] <= 0 || UsedInInstr.test(*AliasSet)))
+      return false;
+  return true;
+}
+
+
+/// getFreeReg - Look to see if there is a free register available in the
+/// specified register class.  If not, return 0.
+///
+unsigned RAFast::getFreeReg(const TargetRegisterClass *RC) {
+  // Get iterators defining the range of registers that are valid to allocate in
+  // this class, which also specifies the preferred allocation order.
+  TargetRegisterClass::iterator RI = RC->allocation_order_begin(*MF);
+  TargetRegisterClass::iterator RE = RC->allocation_order_end(*MF);
+
+  for (; RI != RE; ++RI)
+    if (isPhysRegAvailable(*RI)) {       // Is reg unused?
+      assert(*RI != 0 && "Cannot use register!");
+      return *RI; // Found an unused register!
+    }
+  return 0;
+}
+
+
+/// getReg - Find a physical register to hold the specified virtual
+/// register.  If all compatible physical registers are used, this method spills
+/// the last used virtual register to the stack, and uses that register.
+///
+unsigned RAFast::getReg(MachineBasicBlock &MBB, MachineInstr *I,
+                         unsigned VirtReg, bool NoFree) {
+  const TargetRegisterClass *RC = MF->getRegInfo().getRegClass(VirtReg);
+
+  // First check to see if we have a free register of the requested type...
+  unsigned PhysReg = NoFree ? 0 : getFreeReg(RC);
+
+  if (PhysReg != 0) {
+    // Assign the register.
+    assignVirtToPhysReg(VirtReg, PhysReg);
+    return PhysReg;
+  }
+
+  // If we didn't find an unused register, scavenge one now! Don't be fancy,
+  // just grab the first possible register.
+  TargetRegisterClass::iterator RI = RC->allocation_order_begin(*MF);
+  TargetRegisterClass::iterator RE = RC->allocation_order_end(*MF);
+
+  for (; RI != RE; ++RI)
+    if (isPhysRegSpillable(*RI)) {
+      PhysReg = *RI;
+      break;
+    }
+
+  assert(PhysReg && "Physical register not assigned!?!?");
+  spillPhysReg(MBB, I, PhysReg);
+  assignVirtToPhysReg(VirtReg, PhysReg);
+  return PhysReg;
+}
+
+
+/// reloadVirtReg - This method transforms the specified virtual
+/// register use to refer to a physical register.  This method may do this in
+/// one of several ways: if the register is available in a physical register
+/// already, it uses that physical register.  If the value is not in a physical
+/// register, and if there are physical registers available, it loads it into a
+/// register: PhysReg if that is an available physical register, otherwise any
+/// register.  If register pressure is high, and it is possible, it tries to
+/// fold the load of the virtual register into the instruction itself.  It
+/// avoids doing this if register pressure is low to improve the chance that
+/// subsequent instructions can use the reloaded value.  This method returns
+/// the modified instruction.
+///
+MachineInstr *RAFast::reloadVirtReg(MachineBasicBlock &MBB, MachineInstr *MI,
+                                     unsigned OpNum,
+                                     SmallSet<unsigned, 4> &ReloadedRegs,
+                                     unsigned PhysReg) {
+  unsigned VirtReg = MI->getOperand(OpNum).getReg();
+
+  // If the virtual register is already available, just update the instruction
+  // and return.
+  if (unsigned PR = getVirt2PhysRegMapSlot(VirtReg)) {
+    MI->getOperand(OpNum).setReg(PR);  // Assign the input register
+    if (!MI->isDebugValue()) {
+      // Do not do these for DBG_VALUE as they can affect codegen.
+      UsedInInstr.set(PR);
+      getVirtRegLastUse(VirtReg) = std::make_pair(MI, OpNum);
+    }
+    return MI;
+  }
+
+  // Otherwise, we need to fold it into the current instruction, or reload it.
+  // If we have registers available to hold the value, use them.
+  const TargetRegisterClass *RC = MF->getRegInfo().getRegClass(VirtReg);
+  // If we already have a PhysReg (this happens when the instruction is a
+  // reg-to-reg copy with a PhysReg destination) use that.
+  if (!PhysReg || !TargetRegisterInfo::isPhysicalRegister(PhysReg) ||
+      !isPhysRegAvailable(PhysReg))
+    PhysReg = getFreeReg(RC);
+  int FrameIndex = getStackSpaceFor(VirtReg, RC);
+
+  if (PhysReg) {   // Register is available, allocate it!
+    assignVirtToPhysReg(VirtReg, PhysReg);
+  } else {         // No registers available.
+    // Force some poor hapless value out of the register file to
+    // make room for the new register, and reload it.
+    PhysReg = getReg(MBB, MI, VirtReg, true);
+  }
+
+  markVirtRegModified(VirtReg, false);   // Note that this reg was just reloaded
+
+  DEBUG(dbgs() << "  Reloading %reg" << VirtReg << " into "
+               << TRI->getName(PhysReg) << "\n");
+
+  // Add move instruction(s)
+  TII->loadRegFromStackSlot(MBB, MI, PhysReg, FrameIndex, RC);
+  ++NumLoads;    // Update statistics
+
+  MF->getRegInfo().setPhysRegUsed(PhysReg);
+  MI->getOperand(OpNum).setReg(PhysReg);  // Assign the input register
+  getVirtRegLastUse(VirtReg) = std::make_pair(MI, OpNum);
+
+  if (!ReloadedRegs.insert(PhysReg)) {
+    std::string msg;
+    raw_string_ostream Msg(msg);
+    Msg << "Ran out of registers during register allocation!";
+    if (MI->isInlineAsm()) {
+      Msg << "\nPlease check your inline asm statement for invalid "
+           << "constraints:\n";
+      MI->print(Msg, TM);
+    }
+    report_fatal_error(Msg.str());
+  }
+  for (const unsigned *SubRegs = TRI->getSubRegisters(PhysReg);
+       *SubRegs; ++SubRegs) {
+    if (ReloadedRegs.insert(*SubRegs)) continue;
+
+    std::string msg;
+    raw_string_ostream Msg(msg);
+    Msg << "Ran out of registers during register allocation!";
+    if (MI->isInlineAsm()) {
+      Msg << "\nPlease check your inline asm statement for invalid "
+           << "constraints:\n";
+      MI->print(Msg, TM);
+    }
+    report_fatal_error(Msg.str());
+  }
+
+  return MI;
+}
+
+/// isReadModWriteImplicitKill - True if this is an implicit kill for a
+/// read/mod/write register, i.e. update partial register.
+static bool isReadModWriteImplicitKill(MachineInstr *MI, unsigned Reg) {
+  for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+    MachineOperand &MO = MI->getOperand(i);
+    if (MO.isReg() && MO.getReg() == Reg && MO.isImplicit() &&
+        MO.isDef() && !MO.isDead())
+      return true;
+  }
+  return false;
+}
+
+/// isReadModWriteImplicitDef - True if this is an implicit def for a
+/// read/mod/write register, i.e. update partial register.
+static bool isReadModWriteImplicitDef(MachineInstr *MI, unsigned Reg) {
+  for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+    MachineOperand &MO = MI->getOperand(i);
+    if (MO.isReg() && MO.getReg() == Reg && MO.isImplicit() &&
+        !MO.isDef() && MO.isKill())
+      return true;
+  }
+  return false;
+}
+
+void RAFast::AllocateBasicBlock(MachineBasicBlock &MBB) {
+  // loop over each instruction
+  MachineBasicBlock::iterator MII = MBB.begin();
+
+  DEBUG({
+      const BasicBlock *LBB = MBB.getBasicBlock();
+      if (LBB)
+        dbgs() << "\nStarting RegAlloc of BB: " << LBB->getName();
+    });
+
+  // Add live-in registers as active.
+  for (MachineBasicBlock::livein_iterator I = MBB.livein_begin(),
+         E = MBB.livein_end(); I != E; ++I) {
+    unsigned Reg = *I;
+    MF->getRegInfo().setPhysRegUsed(Reg);
+    PhysRegsUsed[Reg] = 0;            // It is free and reserved now
+    for (const unsigned *SubRegs = TRI->getSubRegisters(Reg);
+         *SubRegs; ++SubRegs) {
+      if (PhysRegsUsed[*SubRegs] == -2) continue;
+      PhysRegsUsed[*SubRegs] = 0;  // It is free and reserved now
+      MF->getRegInfo().setPhysRegUsed(*SubRegs);
+    }
+  }
+
+  // Otherwise, sequentially allocate each instruction in the MBB.
+  while (MII != MBB.end()) {
+    MachineInstr *MI = MII++;
+    const TargetInstrDesc &TID = MI->getDesc();
+    DEBUG({
+        dbgs() << "\nStarting RegAlloc of: " << *MI;
+        dbgs() << "  Regs have values: ";
+        for (unsigned i = 0; i != TRI->getNumRegs(); ++i)
+          if (PhysRegsUsed[i] != -1 && PhysRegsUsed[i] != -2)
+            dbgs() << "[" << TRI->getName(i)
+                   << ",%reg" << PhysRegsUsed[i] << "] ";
+        dbgs() << '\n';
+      });
+
+    // Track registers used by instruction.
+    UsedInInstr.reset();
+
+    // Determine whether this is a copy instruction.  The cases where the
+    // source or destination are phys regs are handled specially.
+    unsigned SrcCopyReg, DstCopyReg, SrcCopySubReg, DstCopySubReg;
+    unsigned SrcCopyPhysReg = 0U;
+    bool isCopy = TII->isMoveInstr(*MI, SrcCopyReg, DstCopyReg,
+                                   SrcCopySubReg, DstCopySubReg);
+    if (isCopy && TargetRegisterInfo::isVirtualRegister(SrcCopyReg))
+      SrcCopyPhysReg = getVirt2PhysRegMapSlot(SrcCopyReg);
+
+    // Loop over the implicit uses, making sure they don't get reallocated.
+    if (TID.ImplicitUses) {
+      for (const unsigned *ImplicitUses = TID.ImplicitUses;
+           *ImplicitUses; ++ImplicitUses)
+        UsedInInstr.set(*ImplicitUses);
+    }
+
+    SmallVector<unsigned, 8> Kills;
+    for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+      MachineOperand &MO = MI->getOperand(i);
+      if (!MO.isReg() || !MO.isKill()) continue;
+
+      if (!MO.isImplicit())
+        Kills.push_back(MO.getReg());
+      else if (!isReadModWriteImplicitKill(MI, MO.getReg()))
+        // These are extra physical register kills when a sub-register
+        // is defined (def of a sub-register is a read/mod/write of the
+        // larger registers). Ignore.
+        Kills.push_back(MO.getReg());
+    }
+
+    // If any physical regs are earlyclobber, spill any value they might
+    // have in them, then mark them unallocatable.
+    // If any virtual regs are earlyclobber, allocate them now (before
+    // freeing inputs that are killed).
+    if (MI->isInlineAsm()) {
+      for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+        MachineOperand &MO = MI->getOperand(i);
+        if (!MO.isReg() || !MO.isDef() || !MO.isEarlyClobber() ||
+            !MO.getReg())
+          continue;
+
+        if (TargetRegisterInfo::isVirtualRegister(MO.getReg())) {
+          unsigned DestVirtReg = MO.getReg();
+          unsigned DestPhysReg;
+
+          // If DestVirtReg already has a value, use it.
+          if (!(DestPhysReg = getVirt2PhysRegMapSlot(DestVirtReg)))
+            DestPhysReg = getReg(MBB, MI, DestVirtReg);
+          MF->getRegInfo().setPhysRegUsed(DestPhysReg);
+          markVirtRegModified(DestVirtReg);
+          getVirtRegLastUse(DestVirtReg) =
+                 std::make_pair((MachineInstr*)0, 0);
+          DEBUG(dbgs() << "  Assigning " << TRI->getName(DestPhysReg)
+                       << " to %reg" << DestVirtReg << "\n");
+          MO.setReg(DestPhysReg);  // Assign the earlyclobber register
+        } else {
+          unsigned Reg = MO.getReg();
+          if (PhysRegsUsed[Reg] == -2) continue;  // Something like ESP.
+          // These are extra physical register defs when a sub-register
+          // is defined (def of a sub-register is a read/mod/write of the
+          // larger registers). Ignore.
+          if (isReadModWriteImplicitDef(MI, MO.getReg())) continue;
+
+          MF->getRegInfo().setPhysRegUsed(Reg);
+          spillPhysReg(MBB, MI, Reg, true); // Spill any existing value in reg
+          PhysRegsUsed[Reg] = 0;            // It is free and reserved now
+
+          for (const unsigned *SubRegs = TRI->getSubRegisters(Reg);
+               *SubRegs; ++SubRegs) {
+            if (PhysRegsUsed[*SubRegs] == -2) continue;
+            MF->getRegInfo().setPhysRegUsed(*SubRegs);
+            PhysRegsUsed[*SubRegs] = 0;  // It is free and reserved now
+          }
+        }
+      }
+    }
+
+    // If a DBG_VALUE says something is located in a spilled register,
+    // change the DBG_VALUE to be undef, which prevents the register
+    // from being reloaded here.  Doing that would change the generated
+    // code, unless another use immediately follows this instruction.
+    if (MI->isDebugValue() &&
+        MI->getNumOperands()==3 && MI->getOperand(0).isReg()) {
+      unsigned VirtReg = MI->getOperand(0).getReg();
+      if (VirtReg && TargetRegisterInfo::isVirtualRegister(VirtReg) &&
+          !getVirt2PhysRegMapSlot(VirtReg))
+        MI->getOperand(0).setReg(0U);
+    }
+
+    // Get the used operands into registers.  This has the potential to spill
+    // incoming values if we are out of registers.  Note that we completely
+    // ignore physical register uses here.  We assume that if an explicit
+    // physical register is referenced by the instruction, that it is guaranteed
+    // to be live-in, or the input is badly hosed.
+    //
+    SmallSet<unsigned, 4> ReloadedRegs;
+    for (unsigned i = 0; i != MI->getNumOperands(); ++i) {
+      MachineOperand &MO = MI->getOperand(i);
+      // here we are looking for only used operands (never def&use)
+      if (MO.isReg() && !MO.isDef() && MO.getReg() && !MO.isImplicit() &&
+          TargetRegisterInfo::isVirtualRegister(MO.getReg()))
+        MI = reloadVirtReg(MBB, MI, i, ReloadedRegs,
+                           isCopy ? DstCopyReg : 0);
+    }
+
+    // If this instruction is the last user of this register, kill the
+    // value, freeing the register being used, so it doesn't need to be
+    // spilled to memory.
+    //
+    for (unsigned i = 0, e = Kills.size(); i != e; ++i) {
+      unsigned VirtReg = Kills[i];
+      unsigned PhysReg = VirtReg;
+      if (TargetRegisterInfo::isVirtualRegister(VirtReg)) {
+        // If the virtual register was never materialized into a register, it
+        // might not be in the map, but it won't hurt to zero it out anyway.
+        unsigned &PhysRegSlot = getVirt2PhysRegMapSlot(VirtReg);
+        PhysReg = PhysRegSlot;
+        PhysRegSlot = 0;
+      } else if (PhysRegsUsed[PhysReg] == -2) {
+        // Unallocatable register dead, ignore.
+        continue;
+      } else {
+        assert((!PhysRegsUsed[PhysReg] || PhysRegsUsed[PhysReg] == -1) &&
+               "Silently clearing a virtual register?");
+      }
+
+      if (!PhysReg) continue;
+
+      DEBUG(dbgs() << "  Last use of " << TRI->getName(PhysReg)
+                   << "[%reg" << VirtReg <<"], removing it from live set\n");
+      removePhysReg(PhysReg);
+      for (const unsigned *SubRegs = TRI->getSubRegisters(PhysReg);
+           *SubRegs; ++SubRegs) {
+        if (PhysRegsUsed[*SubRegs] != -2) {
+          DEBUG(dbgs()  << "  Last use of "
+                        << TRI->getName(*SubRegs) << "[%reg" << VirtReg
+                        <<"], removing it from live set\n");
+          removePhysReg(*SubRegs);
+        }
+      }
+    }
+
+    // Track registers defined by instruction.
+    UsedInInstr.reset();
+
+    // Loop over all of the operands of the instruction, spilling registers that
+    // are defined, and marking explicit destinations in the PhysRegsUsed map.
+    for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+      MachineOperand &MO = MI->getOperand(i);
+      if (!MO.isReg() || !MO.isDef() || MO.isImplicit() || !MO.getReg() ||
+          MO.isEarlyClobber() ||
+          !TargetRegisterInfo::isPhysicalRegister(MO.getReg()))
+        continue;
+
+      unsigned Reg = MO.getReg();
+      if (PhysRegsUsed[Reg] == -2) continue;  // Something like ESP.
+      // These are extra physical register defs when a sub-register
+      // is defined (def of a sub-register is a read/mod/write of the
+      // larger registers). Ignore.
+      if (isReadModWriteImplicitDef(MI, MO.getReg())) continue;
+
+      MF->getRegInfo().setPhysRegUsed(Reg);
+      spillPhysReg(MBB, MI, Reg, true); // Spill any existing value in reg
+      PhysRegsUsed[Reg] = 0;            // It is free and reserved now
+
+      for (const unsigned *SubRegs = TRI->getSubRegisters(Reg);
+           *SubRegs; ++SubRegs) {
+        if (PhysRegsUsed[*SubRegs] == -2) continue;
+
+        MF->getRegInfo().setPhysRegUsed(*SubRegs);
+        PhysRegsUsed[*SubRegs] = 0;  // It is free and reserved now
+      }
+    }
+
+    // Loop over the implicit defs, spilling them as well.
+    if (TID.ImplicitDefs) {
+      for (const unsigned *ImplicitDefs = TID.ImplicitDefs;
+           *ImplicitDefs; ++ImplicitDefs) {
+        unsigned Reg = *ImplicitDefs;
+        if (PhysRegsUsed[Reg] != -2) {
+          spillPhysReg(MBB, MI, Reg, true);
+          PhysRegsUsed[Reg] = 0;            // It is free and reserved now
+        }
+        MF->getRegInfo().setPhysRegUsed(Reg);
+        for (const unsigned *SubRegs = TRI->getSubRegisters(Reg);
+             *SubRegs; ++SubRegs) {
+          if (PhysRegsUsed[*SubRegs] == -2) continue;
+
+          PhysRegsUsed[*SubRegs] = 0;  // It is free and reserved now
+          MF->getRegInfo().setPhysRegUsed(*SubRegs);
+        }
+      }
+    }
+
+    SmallVector<unsigned, 8> DeadDefs;
+    for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+      MachineOperand &MO = MI->getOperand(i);
+      if (MO.isReg() && MO.isDead())
+        DeadDefs.push_back(MO.getReg());
+    }
+
+    // Okay, we have allocated all of the source operands and spilled any values
+    // that would be destroyed by defs of this instruction.  Loop over the
+    // explicit defs and assign them to a register, spilling incoming values if
+    // we need to scavenge a register.
+    //
+    for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+      MachineOperand &MO = MI->getOperand(i);
+      if (!MO.isReg() || !MO.isDef() || !MO.getReg() ||
+          MO.isEarlyClobber() ||
+          !TargetRegisterInfo::isVirtualRegister(MO.getReg()))
+        continue;
+
+      unsigned DestVirtReg = MO.getReg();
+      unsigned DestPhysReg;
+
+      // If DestVirtReg already has a value, use it.
+      if (!(DestPhysReg = getVirt2PhysRegMapSlot(DestVirtReg))) {
+        // If this is a copy try to reuse the input as the output;
+        // that will make the copy go away.
+        // If this is a copy, the source reg is a phys reg, and
+        // that reg is available, use that phys reg for DestPhysReg.
+        // If this is a copy, the source reg is a virtual reg, and
+        // the phys reg that was assigned to that virtual reg is now
+        // available, use that phys reg for DestPhysReg.  (If it's now
+        // available that means this was the last use of the source.)
+        if (isCopy &&
+            TargetRegisterInfo::isPhysicalRegister(SrcCopyReg) &&
+            isPhysRegAvailable(SrcCopyReg)) {
+          DestPhysReg = SrcCopyReg;
+          assignVirtToPhysReg(DestVirtReg, DestPhysReg);
+        } else if (isCopy &&
+            TargetRegisterInfo::isVirtualRegister(SrcCopyReg) &&
+            SrcCopyPhysReg && isPhysRegAvailable(SrcCopyPhysReg) &&
+            MF->getRegInfo().getRegClass(DestVirtReg)->
+                             contains(SrcCopyPhysReg)) {
+          DestPhysReg = SrcCopyPhysReg;
+          assignVirtToPhysReg(DestVirtReg, DestPhysReg);
+        } else
+          DestPhysReg = getReg(MBB, MI, DestVirtReg);
+      }
+      MF->getRegInfo().setPhysRegUsed(DestPhysReg);
+      markVirtRegModified(DestVirtReg);
+      getVirtRegLastUse(DestVirtReg) = std::make_pair((MachineInstr*)0, 0);
+      DEBUG(dbgs() << "  Assigning " << TRI->getName(DestPhysReg)
+                   << " to %reg" << DestVirtReg << "\n");
+      MO.setReg(DestPhysReg);  // Assign the output register
+      UsedInInstr.set(DestPhysReg);
+    }
+
+    // If this instruction defines any registers that are immediately dead,
+    // kill them now.
+    //
+    for (unsigned i = 0, e = DeadDefs.size(); i != e; ++i) {
+      unsigned VirtReg = DeadDefs[i];
+      unsigned PhysReg = VirtReg;
+      if (TargetRegisterInfo::isVirtualRegister(VirtReg)) {
+        unsigned &PhysRegSlot = getVirt2PhysRegMapSlot(VirtReg);
+        PhysReg = PhysRegSlot;
+        assert(PhysReg != 0);
+        PhysRegSlot = 0;
+      } else if (PhysRegsUsed[PhysReg] == -2) {
+        // Unallocatable register dead, ignore.
+        continue;
+      } else if (!PhysReg)
+        continue;
+
+      DEBUG(dbgs()  << "  Register " << TRI->getName(PhysReg)
+                    << " [%reg" << VirtReg
+                    << "] is never used, removing it from live set\n");
+      removePhysReg(PhysReg);
+      for (const unsigned *AliasSet = TRI->getAliasSet(PhysReg);
+           *AliasSet; ++AliasSet) {
+        if (PhysRegsUsed[*AliasSet] != -2) {
+          DEBUG(dbgs()  << "  Register " << TRI->getName(*AliasSet)
+                        << " [%reg" << *AliasSet
+                        << "] is never used, removing it from live set\n");
+          removePhysReg(*AliasSet);
+        }
+      }
+    }
+
+    // Finally, if this is a noop copy instruction, zap it.  (Except that if
+    // the copy is dead, it must be kept to avoid messing up liveness info for
+    // the register scavenger.  See pr4100.)
+    if (TII->isMoveInstr(*MI, SrcCopyReg, DstCopyReg,
+                         SrcCopySubReg, DstCopySubReg) &&
+        SrcCopyReg == DstCopyReg && DeadDefs.empty())
+      MBB.erase(MI);
+  }
+
+  MachineBasicBlock::iterator MI = MBB.getFirstTerminator();
+
+  // Spill all physical registers holding virtual registers now.
+  for (unsigned i = 0, e = TRI->getNumRegs(); i != e; ++i)
+    if (PhysRegsUsed[i] != -1 && PhysRegsUsed[i] != -2) {
+      if (unsigned VirtReg = PhysRegsUsed[i])
+        spillVirtReg(MBB, MI, VirtReg, i);
+      else
+        removePhysReg(i);
+    }
+}
+
+/// runOnMachineFunction - Register allocate the whole function
+///
+bool RAFast::runOnMachineFunction(MachineFunction &Fn) {
+  DEBUG(dbgs() << "Machine Function\n");
+  MF = &Fn;
+  TM = &Fn.getTarget();
+  TRI = TM->getRegisterInfo();
+  TII = TM->getInstrInfo();
+
+  PhysRegsUsed.assign(TRI->getNumRegs(), -1);
+  UsedInInstr.resize(TRI->getNumRegs());
+
+  // At various places we want to efficiently check to see whether a register
+  // is allocatable.  To handle this, we mark all unallocatable registers as
+  // being pinned down, permanently.
+  {
+    BitVector Allocable = TRI->getAllocatableSet(Fn);
+    for (unsigned i = 0, e = Allocable.size(); i != e; ++i)
+      if (!Allocable[i])
+        PhysRegsUsed[i] = -2;  // Mark the reg unallocable.
+  }
+
+  // initialize the virtual->physical register map to have a 'null'
+  // mapping for all virtual registers
+  unsigned LastVirtReg = MF->getRegInfo().getLastVirtReg();
+  StackSlotForVirtReg.grow(LastVirtReg);
+  Virt2PhysRegMap.grow(LastVirtReg);
+  Virt2LastUseMap.grow(LastVirtReg);
+  VirtRegModified.resize(LastVirtReg+1 -
+                         TargetRegisterInfo::FirstVirtualRegister);
+  UsedInMultipleBlocks.resize(LastVirtReg+1 -
+                              TargetRegisterInfo::FirstVirtualRegister);
+
+  // Loop over all of the basic blocks, eliminating virtual register references
+  for (MachineFunction::iterator MBB = Fn.begin(), MBBe = Fn.end();
+       MBB != MBBe; ++MBB)
+    AllocateBasicBlock(*MBB);
+
+  StackSlotForVirtReg.clear();
+  PhysRegsUsed.clear();
+  VirtRegModified.clear();
+  UsedInMultipleBlocks.clear();
+  Virt2PhysRegMap.clear();
+  Virt2LastUseMap.clear();
+  return true;
+}
+
+FunctionPass *llvm::createFastRegisterAllocator() {
+  return new RAFast();
+}
diff --git a/lib/CodeGen/RegAllocLinearScan.cpp b/lib/CodeGen/RegAllocLinearScan.cpp
index 5c5a394cc018..6c8fc0c2a322 100644
--- a/lib/CodeGen/RegAllocLinearScan.cpp
+++ b/lib/CodeGen/RegAllocLinearScan.cpp
@@ -1177,7 +1177,7 @@ void RALinScan::assignRegOrStackSlotAtInterval(LiveInterval* cur) {
         assignRegOrStackSlotAtInterval(cur);
       } else {
         assert(false && "Ran out of registers during register allocation!");
-        llvm_report_error("Ran out of registers during register allocation!");
+        report_fatal_error("Ran out of registers during register allocation!");
       }
       return;
     }
diff --git a/lib/CodeGen/RegAllocLocal.cpp b/lib/CodeGen/RegAllocLocal.cpp
index 0ef041e76b20..94456d143855 100644
--- a/lib/CodeGen/RegAllocLocal.cpp
+++ b/lib/CodeGen/RegAllocLocal.cpp
@@ -189,6 +189,9 @@ namespace {
     ///
     void removePhysReg(unsigned PhysReg);
 
+    void storeVirtReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
+                      unsigned VirtReg, unsigned PhysReg, bool isKill);
+
     /// spillVirtReg - This method spills the value specified by PhysReg into
     /// the virtual register slot specified by VirtReg.  It then updates the RA
     /// data structures to indicate the fact that PhysReg is now available.
@@ -286,6 +289,17 @@ void RALocal::removePhysReg(unsigned PhysReg) {
     PhysRegsUseOrder.erase(It);
 }
 
+/// storeVirtReg - Store a virtual register to its assigned stack slot.
+void RALocal::storeVirtReg(MachineBasicBlock &MBB,
+                           MachineBasicBlock::iterator I,
+                           unsigned VirtReg, unsigned PhysReg,
+                           bool isKill) {
+  const TargetRegisterClass *RC = MF->getRegInfo().getRegClass(VirtReg);
+  int FrameIndex = getStackSpaceFor(VirtReg, RC);
+  DEBUG(dbgs() << " to stack slot #" << FrameIndex);
+  TII->storeRegToStackSlot(MBB, I, PhysReg, isKill, FrameIndex, RC);
+  ++NumStores;   // Update statistics
+}
 
 /// spillVirtReg - This method spills the value specified by PhysReg into the
 /// virtual register slot specified by VirtReg.  It then updates the RA data
@@ -309,15 +323,11 @@ void RALocal::spillVirtReg(MachineBasicBlock &MBB,
     // Otherwise, there is a virtual register corresponding to this physical
     // register.  We only need to spill it into its stack slot if it has been
     // modified.
-    const TargetRegisterClass *RC = MF->getRegInfo().getRegClass(VirtReg);
-    int FrameIndex = getStackSpaceFor(VirtReg, RC);
-    DEBUG(dbgs() << " to stack slot #" << FrameIndex);
     // If the instruction reads the register that's spilled, (e.g. this can
     // happen if it is a move to a physical register), then the spill
     // instruction is not a kill.
     bool isKill = !(I != MBB.end() && I->readsRegister(PhysReg));
-    TII->storeRegToStackSlot(MBB, I, PhysReg, isKill, FrameIndex, RC);
-    ++NumStores;   // Update statistics
+    storeVirtReg(MBB, I, VirtReg, PhysReg, isKill);
   }
 
   getVirt2PhysRegMapSlot(VirtReg) = 0;   // VirtReg no longer available
@@ -549,7 +559,7 @@ MachineInstr *RALocal::reloadVirtReg(MachineBasicBlock &MBB, MachineInstr *MI,
            << "constraints:\n";
       MI->print(Msg, TM);
     }
-    llvm_report_error(Msg.str());
+    report_fatal_error(Msg.str());
   }
   for (const unsigned *SubRegs = TRI->getSubRegisters(PhysReg);
        *SubRegs; ++SubRegs) {
@@ -563,7 +573,7 @@ MachineInstr *RALocal::reloadVirtReg(MachineBasicBlock &MBB, MachineInstr *MI,
            << "constraints:\n";
       MI->print(Msg, TM);
     }
-    llvm_report_error(Msg.str());
+    report_fatal_error(Msg.str());
   }
 
   return MI;
@@ -633,7 +643,10 @@ void RALocal::ComputeLocalLiveness(MachineBasicBlock& MBB) {
       // uses regs before it defs them.
       if (!MO.isReg() || !MO.getReg() || !MO.isUse())
         continue;
-      
+
+      // Ignore helpful kill flags from earlier passes.
+      MO.setIsKill(false);
+
       LastUseDef[MO.getReg()] = std::make_pair(I, i);
       
       if (TargetRegisterInfo::isVirtualRegister(MO.getReg())) continue;
@@ -801,9 +814,12 @@ void RALocal::AllocateBasicBlock(MachineBasicBlock &MBB) {
         dbgs() << "\nStarting RegAlloc of: " << *MI;
         dbgs() << "  Regs have values: ";
         for (unsigned i = 0; i != TRI->getNumRegs(); ++i)
-          if (PhysRegsUsed[i] != -1 && PhysRegsUsed[i] != -2)
+          if (PhysRegsUsed[i] != -1 && PhysRegsUsed[i] != -2) {
+            if (PhysRegsUsed[i] && isVirtRegModified(PhysRegsUsed[i]))
+              dbgs() << "*";
             dbgs() << "[" << TRI->getName(i)
                    << ",%reg" << PhysRegsUsed[i] << "] ";
+          }
         dbgs() << '\n';
       });
 
@@ -1092,6 +1108,20 @@ void RALocal::AllocateBasicBlock(MachineBasicBlock &MBB) {
       }
     }
     
+    // If this instruction is a call, make sure there are no dirty registers. The
+    // call might throw an exception, and the landing pad expects to find all
+    // registers in stack slots.
+    if (TID.isCall())
+      for (unsigned i = 0, e = TRI->getNumRegs(); i != e; ++i) {
+        if (PhysRegsUsed[i] <= 0) continue;
+        unsigned VirtReg = PhysRegsUsed[i];
+        if (!isVirtRegModified(VirtReg)) continue;
+        DEBUG(dbgs() << "  Storing dirty %reg" << VirtReg);
+        storeVirtReg(MBB, MI, VirtReg, i, false);
+        markVirtRegModified(VirtReg, false);
+        DEBUG(dbgs() << " because the call might throw\n");
+      }
+
     // Finally, if this is a noop copy instruction, zap it.  (Except that if
     // the copy is dead, it must be kept to avoid messing up liveness info for
     // the register scavenger.  See pr4100.)
diff --git a/lib/CodeGen/RegisterScavenging.cpp b/lib/CodeGen/RegisterScavenging.cpp
index 67bf209c7325..179984f2a998 100644
--- a/lib/CodeGen/RegisterScavenging.cpp
+++ b/lib/CodeGen/RegisterScavenging.cpp
@@ -64,7 +64,7 @@ void RegScavenger::initRegState() {
     return;
 
   // Live-in registers are in use.
-  for (MachineBasicBlock::const_livein_iterator I = MBB->livein_begin(),
+  for (MachineBasicBlock::livein_iterator I = MBB->livein_begin(),
          E = MBB->livein_end(); I != E; ++I)
     setUsed(*I);
 
@@ -136,6 +136,9 @@ void RegScavenger::forward() {
     ScavengeRestore = NULL;
   }
 
+  if (MI->isDebugValue())
+    return;
+
   // Find out which registers are early clobbered, killed, defined, and marked
   // def-dead in this instruction.
   BitVector EarlyClobberRegs(NumPhysRegs);
diff --git a/lib/CodeGen/ScheduleDAG.cpp b/lib/CodeGen/ScheduleDAG.cpp
index 1f3e295fe979..587f001cc7bf 100644
--- a/lib/CodeGen/ScheduleDAG.cpp
+++ b/lib/CodeGen/ScheduleDAG.cpp
@@ -29,7 +29,6 @@ ScheduleDAG::ScheduleDAG(MachineFunction &mf)
     TRI(TM.getRegisterInfo()),
     TLI(TM.getTargetLowering()),
     MF(mf), MRI(mf.getRegInfo()),
-    ConstPool(MF.getConstantPool()),
     EntrySU(), ExitSU() {
 }
 
diff --git a/lib/CodeGen/ScheduleDAGInstrs.cpp b/lib/CodeGen/ScheduleDAGInstrs.cpp
index ecc49e2fc1b6..ca235c3179ac 100644
--- a/lib/CodeGen/ScheduleDAGInstrs.cpp
+++ b/lib/CodeGen/ScheduleDAGInstrs.cpp
@@ -272,8 +272,8 @@ void ScheduleDAGInstrs::BuildSchedGraph(AliasAnalysis *AA) {
           // perform its own adjustments.
           const SDep& dep = SDep(SU, SDep::Data, LDataLatency, Reg);
           if (!UnitLatencies) {
-            ComputeOperandLatency(SU, UseSU, (SDep &)dep);
-            ST.adjustSchedDependency(SU, UseSU, (SDep &)dep);
+            ComputeOperandLatency(SU, UseSU, const_cast<SDep &>(dep));
+            ST.adjustSchedDependency(SU, UseSU, const_cast<SDep &>(dep));
           }
           UseSU->addPred(dep);
         }
@@ -285,8 +285,8 @@ void ScheduleDAGInstrs::BuildSchedGraph(AliasAnalysis *AA) {
               continue;
             const SDep& dep = SDep(SU, SDep::Data, DataLatency, *Alias);
             if (!UnitLatencies) {
-              ComputeOperandLatency(SU, UseSU, (SDep &)dep);
-              ST.adjustSchedDependency(SU, UseSU, (SDep &)dep);
+              ComputeOperandLatency(SU, UseSU, const_cast<SDep &>(dep));
+              ST.adjustSchedDependency(SU, UseSU, const_cast<SDep &>(dep));
             }
             UseSU->addPred(dep);
           }
@@ -572,8 +572,7 @@ std::string ScheduleDAGInstrs::getGraphNodeLabel(const SUnit *SU) const {
 }
 
 // EmitSchedule - Emit the machine code in scheduled order.
-MachineBasicBlock *ScheduleDAGInstrs::
-EmitSchedule(DenseMap<MachineBasicBlock*, MachineBasicBlock*> *EM) {
+MachineBasicBlock *ScheduleDAGInstrs::EmitSchedule() {
   // For MachineInstr-based scheduling, we're rescheduling the instructions in
   // the block, so start by removing them from the block.
   while (Begin != InsertPos) {
diff --git a/lib/CodeGen/ScheduleDAGInstrs.h b/lib/CodeGen/ScheduleDAGInstrs.h
index c9b44de85ec4..d70608f30498 100644
--- a/lib/CodeGen/ScheduleDAGInstrs.h
+++ b/lib/CodeGen/ScheduleDAGInstrs.h
@@ -20,7 +20,6 @@
 #include "llvm/CodeGen/ScheduleDAG.h"
 #include "llvm/Support/Compiler.h"
 #include "llvm/Target/TargetRegisterInfo.h"
-#include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/SmallSet.h"
 #include <map>
 
@@ -171,8 +170,7 @@ namespace llvm {
     virtual void ComputeOperandLatency(SUnit *Def, SUnit *Use,
                                        SDep& dep) const;
 
-    virtual MachineBasicBlock*
-    EmitSchedule(DenseMap<MachineBasicBlock*, MachineBasicBlock*>*);
+    virtual MachineBasicBlock *EmitSchedule();
 
     /// StartBlock - Prepare to perform scheduling in the given block.
     ///
diff --git a/lib/CodeGen/SelectionDAG/CMakeLists.txt b/lib/CodeGen/SelectionDAG/CMakeLists.txt
index 80c7d7c9eb9c..0cfd5e1d7e21 100644
--- a/lib/CodeGen/SelectionDAG/CMakeLists.txt
+++ b/lib/CodeGen/SelectionDAG/CMakeLists.txt
@@ -20,6 +20,7 @@ add_llvm_library(LLVMSelectionDAG
   SelectionDAGISel.cpp
   SelectionDAGPrinter.cpp
   TargetLowering.cpp
+  TargetSelectionDAGInfo.cpp
   )
 
 target_link_libraries (LLVMSelectionDAG LLVMAnalysis LLVMAsmPrinter LLVMCodeGen)
diff --git a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index a336e0a01b56..3639f8047460 100644
--- a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -129,6 +129,14 @@ namespace {
     bool CombineToPreIndexedLoadStore(SDNode *N);
     bool CombineToPostIndexedLoadStore(SDNode *N);
 
+    void ReplaceLoadWithPromotedLoad(SDNode *Load, SDNode *ExtLoad);
+    SDValue PromoteOperand(SDValue Op, EVT PVT, bool &Replace);
+    SDValue SExtPromoteOperand(SDValue Op, EVT PVT);
+    SDValue ZExtPromoteOperand(SDValue Op, EVT PVT);
+    SDValue PromoteIntBinOp(SDValue Op);
+    SDValue PromoteIntShiftOp(SDValue Op);
+    SDValue PromoteExtend(SDValue Op);
+    bool PromoteLoad(SDValue Op);
 
     /// combine - call the node-specific routine that knows how to fold each
     /// particular type of node. If that doesn't do anything, try the
@@ -254,24 +262,28 @@ namespace {
     /// looking for a better chain (aliasing node.)
     SDValue FindBetterChain(SDNode *N, SDValue Chain);
 
-    /// getShiftAmountTy - Returns a type large enough to hold any valid
-    /// shift amount - before type legalization these can be huge.
-    EVT getShiftAmountTy() {
-      return LegalTypes ?  TLI.getShiftAmountTy() : TLI.getPointerTy();
-    }
-
-public:
+  public:
     DAGCombiner(SelectionDAG &D, AliasAnalysis &A, CodeGenOpt::Level OL)
-      : DAG(D),
-        TLI(D.getTargetLoweringInfo()),
-        Level(Unrestricted),
-        OptLevel(OL),
-        LegalOperations(false),
-        LegalTypes(false),
-        AA(A) {}
+      : DAG(D), TLI(D.getTargetLoweringInfo()), Level(Unrestricted),
+        OptLevel(OL), LegalOperations(false), LegalTypes(false), AA(A) {}
 
     /// Run - runs the dag combiner on all nodes in the work list
     void Run(CombineLevel AtLevel);
+    
+    SelectionDAG &getDAG() const { return DAG; }
+    
+    /// getShiftAmountTy - Returns a type large enough to hold any valid
+    /// shift amount - before type legalization these can be huge.
+    EVT getShiftAmountTy() {
+      return LegalTypes ? TLI.getShiftAmountTy() : TLI.getPointerTy();
+    }
+    
+    /// isTypeLegal - This method returns true if we are running before type
+    /// legalization or if the specified VT is legal.
+    bool isTypeLegal(const EVT &VT) {
+      if (!LegalTypes) return true;
+      return TLI.isTypeLegal(VT);
+    }
   };
 }
 
@@ -577,9 +589,8 @@ SDValue DAGCombiner::CombineTo(SDNode *N, const SDValue *To, unsigned NumTo,
   return SDValue(N, 0);
 }
 
-void
-DAGCombiner::CommitTargetLoweringOpt(const TargetLowering::TargetLoweringOpt &
-                                                                          TLO) {
+void DAGCombiner::
+CommitTargetLoweringOpt(const TargetLowering::TargetLoweringOpt &TLO) {
   // Replace all uses.  If any nodes become isomorphic to other nodes and
   // are deleted, make sure to remove them from our worklist.
   WorkListRemover DeadNodes(*this);
@@ -609,7 +620,7 @@ DAGCombiner::CommitTargetLoweringOpt(const TargetLowering::TargetLoweringOpt &
 /// it can be simplified or if things it uses can be simplified by bit
 /// propagation.  If so, return true.
 bool DAGCombiner::SimplifyDemandedBits(SDValue Op, const APInt &Demanded) {
-  TargetLowering::TargetLoweringOpt TLO(DAG);
+  TargetLowering::TargetLoweringOpt TLO(DAG, LegalTypes, LegalOperations);
   APInt KnownZero, KnownOne;
   if (!TLI.SimplifyDemandedBits(Op, Demanded, KnownZero, KnownOne, TLO))
     return false;
@@ -629,6 +640,274 @@ bool DAGCombiner::SimplifyDemandedBits(SDValue Op, const APInt &Demanded) {
   return true;
 }
 
+void DAGCombiner::ReplaceLoadWithPromotedLoad(SDNode *Load, SDNode *ExtLoad) {
+  DebugLoc dl = Load->getDebugLoc();
+  EVT VT = Load->getValueType(0);
+  SDValue Trunc = DAG.getNode(ISD::TRUNCATE, dl, VT, SDValue(ExtLoad, 0));
+
+  DEBUG(dbgs() << "\nReplacing.9 ";
+        Load->dump(&DAG);
+        dbgs() << "\nWith: ";
+        Trunc.getNode()->dump(&DAG);
+        dbgs() << '\n');
+  WorkListRemover DeadNodes(*this);
+  DAG.ReplaceAllUsesOfValueWith(SDValue(Load, 0), Trunc, &DeadNodes);
+  DAG.ReplaceAllUsesOfValueWith(SDValue(Load, 1), SDValue(ExtLoad, 1),
+                                &DeadNodes);
+  removeFromWorkList(Load);
+  DAG.DeleteNode(Load);
+  AddToWorkList(Trunc.getNode());
+}
+
+SDValue DAGCombiner::PromoteOperand(SDValue Op, EVT PVT, bool &Replace) {
+  Replace = false;
+  DebugLoc dl = Op.getDebugLoc();
+  if (LoadSDNode *LD = dyn_cast<LoadSDNode>(Op)) {
+    EVT MemVT = LD->getMemoryVT();
+    ISD::LoadExtType ExtType = ISD::isNON_EXTLoad(LD)
+      ? (TLI.isLoadExtLegal(ISD::ZEXTLOAD, MemVT) ? ISD::ZEXTLOAD : ISD::EXTLOAD)
+      : LD->getExtensionType();
+    Replace = true;
+    return DAG.getExtLoad(ExtType, dl, PVT,
+                          LD->getChain(), LD->getBasePtr(),
+                          LD->getSrcValue(), LD->getSrcValueOffset(),
+                          MemVT, LD->isVolatile(),
+                          LD->isNonTemporal(), LD->getAlignment());
+  }
+
+  unsigned Opc = Op.getOpcode();
+  switch (Opc) {
+  default: break;
+  case ISD::AssertSext:
+    return DAG.getNode(ISD::AssertSext, dl, PVT,
+                       SExtPromoteOperand(Op.getOperand(0), PVT),
+                       Op.getOperand(1));
+  case ISD::AssertZext:
+    return DAG.getNode(ISD::AssertZext, dl, PVT,
+                       ZExtPromoteOperand(Op.getOperand(0), PVT),
+                       Op.getOperand(1));
+  case ISD::Constant: {
+    unsigned ExtOpc =
+      Op.getValueType().isByteSized() ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
+    return DAG.getNode(ExtOpc, dl, PVT, Op);
+  }    
+  }
+
+  if (!TLI.isOperationLegal(ISD::ANY_EXTEND, PVT))
+    return SDValue();
+  return DAG.getNode(ISD::ANY_EXTEND, dl, PVT, Op);
+}
+
+SDValue DAGCombiner::SExtPromoteOperand(SDValue Op, EVT PVT) {
+  if (!TLI.isOperationLegal(ISD::SIGN_EXTEND_INREG, PVT))
+    return SDValue();
+  EVT OldVT = Op.getValueType();
+  DebugLoc dl = Op.getDebugLoc();
+  bool Replace = false;
+  SDValue NewOp = PromoteOperand(Op, PVT, Replace);
+  if (NewOp.getNode() == 0)
+    return SDValue();
+  AddToWorkList(NewOp.getNode());
+
+  if (Replace)
+    ReplaceLoadWithPromotedLoad(Op.getNode(), NewOp.getNode());
+  return DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, NewOp.getValueType(), NewOp,
+                     DAG.getValueType(OldVT));
+}
+
+SDValue DAGCombiner::ZExtPromoteOperand(SDValue Op, EVT PVT) {
+  EVT OldVT = Op.getValueType();
+  DebugLoc dl = Op.getDebugLoc();
+  bool Replace = false;
+  SDValue NewOp = PromoteOperand(Op, PVT, Replace);
+  if (NewOp.getNode() == 0)
+    return SDValue();
+  AddToWorkList(NewOp.getNode());
+
+  if (Replace)
+    ReplaceLoadWithPromotedLoad(Op.getNode(), NewOp.getNode());
+  return DAG.getZeroExtendInReg(NewOp, dl, OldVT);
+}
+
+/// PromoteIntBinOp - Promote the specified integer binary operation if the
+/// target indicates it is beneficial. e.g. On x86, it's usually better to
+/// promote i16 operations to i32 since i16 instructions are longer.
+SDValue DAGCombiner::PromoteIntBinOp(SDValue Op) {
+  if (!LegalOperations)
+    return SDValue();
+
+  EVT VT = Op.getValueType();
+  if (VT.isVector() || !VT.isInteger())
+    return SDValue();
+
+  // If operation type is 'undesirable', e.g. i16 on x86, consider
+  // promoting it.
+  unsigned Opc = Op.getOpcode();
+  if (TLI.isTypeDesirableForOp(Opc, VT))
+    return SDValue();
+
+  EVT PVT = VT;
+  // Consult target whether it is a good idea to promote this operation and
+  // what's the right type to promote it to.
+  if (TLI.IsDesirableToPromoteOp(Op, PVT)) {
+    assert(PVT != VT && "Don't know what type to promote to!");
+
+    bool Replace0 = false;
+    SDValue N0 = Op.getOperand(0);
+    SDValue NN0 = PromoteOperand(N0, PVT, Replace0);
+    if (NN0.getNode() == 0)
+      return SDValue();
+
+    bool Replace1 = false;
+    SDValue N1 = Op.getOperand(1);
+    SDValue NN1 = PromoteOperand(N1, PVT, Replace1);
+    if (NN1.getNode() == 0)
+      return SDValue();
+
+    AddToWorkList(NN0.getNode());
+    AddToWorkList(NN1.getNode());
+
+    if (Replace0)
+      ReplaceLoadWithPromotedLoad(N0.getNode(), NN0.getNode());
+    if (Replace1)
+      ReplaceLoadWithPromotedLoad(N1.getNode(), NN1.getNode());
+
+    DEBUG(dbgs() << "\nPromoting ";
+          Op.getNode()->dump(&DAG));
+    DebugLoc dl = Op.getDebugLoc();
+    return DAG.getNode(ISD::TRUNCATE, dl, VT,
+                       DAG.getNode(Opc, dl, PVT, NN0, NN1));
+  }
+  return SDValue();
+}
+
+/// PromoteIntShiftOp - Promote the specified integer shift operation if the
+/// target indicates it is beneficial. e.g. On x86, it's usually better to
+/// promote i16 operations to i32 since i16 instructions are longer.
+SDValue DAGCombiner::PromoteIntShiftOp(SDValue Op) {
+  if (!LegalOperations)
+    return SDValue();
+
+  EVT VT = Op.getValueType();
+  if (VT.isVector() || !VT.isInteger())
+    return SDValue();
+
+  // If operation type is 'undesirable', e.g. i16 on x86, consider
+  // promoting it.
+  unsigned Opc = Op.getOpcode();
+  if (TLI.isTypeDesirableForOp(Opc, VT))
+    return SDValue();
+
+  EVT PVT = VT;
+  // Consult target whether it is a good idea to promote this operation and
+  // what's the right type to promote it to.
+  if (TLI.IsDesirableToPromoteOp(Op, PVT)) {
+    assert(PVT != VT && "Don't know what type to promote to!");
+
+    bool Replace = false;
+    SDValue N0 = Op.getOperand(0);
+    if (Opc == ISD::SRA)
+      N0 = SExtPromoteOperand(Op.getOperand(0), PVT);
+    else if (Opc == ISD::SRL)
+      N0 = ZExtPromoteOperand(Op.getOperand(0), PVT);
+    else
+      N0 = PromoteOperand(N0, PVT, Replace);
+    if (N0.getNode() == 0)
+      return SDValue();
+
+    AddToWorkList(N0.getNode());
+    if (Replace)
+      ReplaceLoadWithPromotedLoad(Op.getOperand(0).getNode(), N0.getNode());
+
+    DEBUG(dbgs() << "\nPromoting ";
+          Op.getNode()->dump(&DAG));
+    DebugLoc dl = Op.getDebugLoc();
+    return DAG.getNode(ISD::TRUNCATE, dl, VT,
+                       DAG.getNode(Opc, dl, PVT, N0, Op.getOperand(1)));
+  }
+  return SDValue();
+}
+
+SDValue DAGCombiner::PromoteExtend(SDValue Op) {
+  if (!LegalOperations)
+    return SDValue();
+
+  EVT VT = Op.getValueType();
+  if (VT.isVector() || !VT.isInteger())
+    return SDValue();
+
+  // If operation type is 'undesirable', e.g. i16 on x86, consider
+  // promoting it.
+  unsigned Opc = Op.getOpcode();
+  if (TLI.isTypeDesirableForOp(Opc, VT))
+    return SDValue();
+
+  EVT PVT = VT;
+  // Consult target whether it is a good idea to promote this operation and
+  // what's the right type to promote it to.
+  if (TLI.IsDesirableToPromoteOp(Op, PVT)) {
+    assert(PVT != VT && "Don't know what type to promote to!");
+    // fold (aext (aext x)) -> (aext x)
+    // fold (aext (zext x)) -> (zext x)
+    // fold (aext (sext x)) -> (sext x)
+    DEBUG(dbgs() << "\nPromoting ";
+          Op.getNode()->dump(&DAG));
+    return DAG.getNode(Op.getOpcode(), Op.getDebugLoc(), VT, Op.getOperand(0));
+  }
+  return SDValue();
+}
+
+bool DAGCombiner::PromoteLoad(SDValue Op) {
+  if (!LegalOperations)
+    return false;
+
+  EVT VT = Op.getValueType();
+  if (VT.isVector() || !VT.isInteger())
+    return false;
+
+  // If operation type is 'undesirable', e.g. i16 on x86, consider
+  // promoting it.
+  unsigned Opc = Op.getOpcode();
+  if (TLI.isTypeDesirableForOp(Opc, VT))
+    return false;
+
+  EVT PVT = VT;
+  // Consult target whether it is a good idea to promote this operation and
+  // what's the right type to promote it to.
+  if (TLI.IsDesirableToPromoteOp(Op, PVT)) {
+    assert(PVT != VT && "Don't know what type to promote to!");
+
+    DebugLoc dl = Op.getDebugLoc();
+    SDNode *N = Op.getNode();
+    LoadSDNode *LD = cast<LoadSDNode>(N);
+    EVT MemVT = LD->getMemoryVT();
+    ISD::LoadExtType ExtType = ISD::isNON_EXTLoad(LD)
+      ? (TLI.isLoadExtLegal(ISD::ZEXTLOAD, MemVT) ? ISD::ZEXTLOAD : ISD::EXTLOAD)
+      : LD->getExtensionType();
+    SDValue NewLD = DAG.getExtLoad(ExtType, dl, PVT,
+                                   LD->getChain(), LD->getBasePtr(),
+                                   LD->getSrcValue(), LD->getSrcValueOffset(),
+                                   MemVT, LD->isVolatile(),
+                                   LD->isNonTemporal(), LD->getAlignment());
+    SDValue Result = DAG.getNode(ISD::TRUNCATE, dl, VT, NewLD);
+
+    DEBUG(dbgs() << "\nPromoting ";
+          N->dump(&DAG);
+          dbgs() << "\nTo: ";
+          Result.getNode()->dump(&DAG);
+          dbgs() << '\n');
+    WorkListRemover DeadNodes(*this);
+    DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result, &DeadNodes);
+    DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), NewLD.getValue(1), &DeadNodes);
+    removeFromWorkList(N);
+    DAG.DeleteNode(N);
+    AddToWorkList(Result.getNode());
+    return true;
+  }
+  return false;
+}
+
+
 //===----------------------------------------------------------------------===//
 //  Main DAG Combiner implementation
 //===----------------------------------------------------------------------===//
@@ -732,7 +1011,7 @@ void DAGCombiner::Run(CombineLevel AtLevel) {
 }
 
 SDValue DAGCombiner::visit(SDNode *N) {
-  switch(N->getOpcode()) {
+  switch (N->getOpcode()) {
   default: break;
   case ISD::TokenFactor:        return visitTokenFactor(N);
   case ISD::MERGE_VALUES:       return visitMERGE_VALUES(N);
@@ -817,6 +1096,35 @@ SDValue DAGCombiner::combine(SDNode *N) {
     }
   }
 
+  // If nothing happened still, try promoting the operation.
+  if (RV.getNode() == 0) {
+    switch (N->getOpcode()) {
+    default: break;
+    case ISD::ADD:
+    case ISD::SUB:
+    case ISD::MUL:
+    case ISD::AND:
+    case ISD::OR:
+    case ISD::XOR:
+      RV = PromoteIntBinOp(SDValue(N, 0));
+      break;
+    case ISD::SHL:
+    case ISD::SRA:
+    case ISD::SRL:
+      RV = PromoteIntShiftOp(SDValue(N, 0));
+      break;
+    case ISD::SIGN_EXTEND:
+    case ISD::ZERO_EXTEND:
+    case ISD::ANY_EXTEND:
+      RV = PromoteExtend(SDValue(N, 0));
+      break;
+    case ISD::LOAD:
+      if (PromoteLoad(SDValue(N, 0)))
+        RV = SDValue(N, 0);
+      break;
+    }
+  }
+
   // If N is a commutative binary node, try commuting it to enable more
   // sdisel CSE.
   if (RV.getNode() == 0 &&
@@ -1720,8 +2028,10 @@ SDValue DAGCombiner::SimplifyBinOpWithSameOpcodeHands(SDNode *N) {
   // into a vsetcc.
   EVT Op0VT = N0.getOperand(0).getValueType();
   if ((N0.getOpcode() == ISD::ZERO_EXTEND ||
-       N0.getOpcode() == ISD::ANY_EXTEND  ||
        N0.getOpcode() == ISD::SIGN_EXTEND ||
+       // Avoid infinite looping with PromoteIntBinOp.
+       (N0.getOpcode() == ISD::ANY_EXTEND &&
+        (!LegalTypes || TLI.isTypeDesirableForOp(N->getOpcode(), Op0VT))) ||
        (N0.getOpcode() == ISD::TRUNCATE && TLI.isTypeLegal(Op0VT))) &&
       !VT.isVector() &&
       Op0VT == N1.getOperand(0).getValueType() &&
@@ -2579,7 +2889,13 @@ SDValue DAGCombiner::visitSHL(SDNode *N) {
                        HiBitsMask);
   }
 
-  return N1C ? visitShiftByConstant(N, N1C->getZExtValue()) : SDValue();
+  if (N1C) {
+    SDValue NewSHL = visitShiftByConstant(N, N1C->getZExtValue());
+    if (NewSHL.getNode())
+      return NewSHL;
+  }
+
+  return SDValue();
 }
 
 SDValue DAGCombiner::visitSRA(SDNode *N) {
@@ -2693,7 +3009,13 @@ SDValue DAGCombiner::visitSRA(SDNode *N) {
   if (DAG.SignBitIsZero(N0))
     return DAG.getNode(ISD::SRL, N->getDebugLoc(), VT, N0, N1);
 
-  return N1C ? visitShiftByConstant(N, N1C->getZExtValue()) : SDValue();
+  if (N1C) {
+    SDValue NewSRA = visitShiftByConstant(N, N1C->getZExtValue());
+    if (NewSRA.getNode())
+      return NewSRA;
+  }
+
+  return SDValue();
 }
 
 SDValue DAGCombiner::visitSRL(SDNode *N) {
@@ -2731,6 +3053,15 @@ SDValue DAGCombiner::visitSRL(SDNode *N) {
     return DAG.getNode(ISD::SRL, N->getDebugLoc(), VT, N0.getOperand(0),
                        DAG.getConstant(c1 + c2, N1.getValueType()));
   }
+  
+  // fold (srl (shl x, c), c) -> (and x, cst2)
+  if (N1C && N0.getOpcode() == ISD::SHL && N0.getOperand(1) == N1 &&
+      N0.getValueSizeInBits() <= 64) {
+    uint64_t ShAmt = N1C->getZExtValue()+64-N0.getValueSizeInBits();
+    return DAG.getNode(ISD::AND, N->getDebugLoc(), VT, N0.getOperand(0),
+                       DAG.getConstant(~0ULL >> ShAmt, VT));
+  }
+  
 
   // fold (srl (anyextend x), c) -> (anyextend (srl x, c))
   if (N1C && N0.getOpcode() == ISD::ANY_EXTEND) {
@@ -2739,10 +3070,12 @@ SDValue DAGCombiner::visitSRL(SDNode *N) {
     if (N1C->getZExtValue() >= SmallVT.getSizeInBits())
       return DAG.getUNDEF(VT);
 
-    SDValue SmallShift = DAG.getNode(ISD::SRL, N0.getDebugLoc(), SmallVT,
-                                     N0.getOperand(0), N1);
-    AddToWorkList(SmallShift.getNode());
-    return DAG.getNode(ISD::ANY_EXTEND, N->getDebugLoc(), VT, SmallShift);
+    if (!LegalTypes || TLI.isTypeDesirableForOp(ISD::SRL, SmallVT)) {
+      SDValue SmallShift = DAG.getNode(ISD::SRL, N0.getDebugLoc(), SmallVT,
+                                       N0.getOperand(0), N1);
+      AddToWorkList(SmallShift.getNode());
+      return DAG.getNode(ISD::ANY_EXTEND, N->getDebugLoc(), VT, SmallShift);
+    }
   }
 
   // fold (srl (sra X, Y), 31) -> (srl X, 31).  This srl only looks at the sign
@@ -3205,24 +3538,40 @@ SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) {
 
   if (N0.getOpcode() == ISD::SETCC) {
     // sext(setcc) -> sext_in_reg(vsetcc) for vectors.
-    if (VT.isVector() &&
+    // Only do this before legalize for now.
+    if (VT.isVector() && !LegalOperations) {
+      EVT N0VT = N0.getOperand(0).getValueType();
         // We know that the # elements of the results is the same as the
         // # elements of the compare (and the # elements of the compare result
         // for that matter).  Check to see that they are the same size.  If so,
         // we know that the element size of the sext'd result matches the
         // element size of the compare operands.
-        VT.getSizeInBits() == N0.getOperand(0).getValueType().getSizeInBits() &&
-      
-        // Only do this before legalize for now.
-        !LegalOperations) {
-      return DAG.getVSetCC(N->getDebugLoc(), VT, N0.getOperand(0),
-                           N0.getOperand(1),
-                           cast<CondCodeSDNode>(N0.getOperand(2))->get());
+      if (VT.getSizeInBits() == N0VT.getSizeInBits())
+	return DAG.getVSetCC(N->getDebugLoc(), VT, N0.getOperand(0),
+			     N0.getOperand(1),
+			     cast<CondCodeSDNode>(N0.getOperand(2))->get());
+      // If the desired elements are smaller or larger than the source
+      // elements we can use a matching integer vector type and then
+      // truncate/sign extend
+      else {
+	EVT MatchingElementType =
+	  EVT::getIntegerVT(*DAG.getContext(),
+			    N0VT.getScalarType().getSizeInBits());
+	EVT MatchingVectorType =
+	  EVT::getVectorVT(*DAG.getContext(), MatchingElementType,
+			   N0VT.getVectorNumElements());
+	SDValue VsetCC =
+	  DAG.getVSetCC(N->getDebugLoc(), MatchingVectorType, N0.getOperand(0),
+			N0.getOperand(1),
+			cast<CondCodeSDNode>(N0.getOperand(2))->get());
+	return 	DAG.getSExtOrTrunc(VsetCC, N->getDebugLoc(),  VT);
+      }
     }
-    
+
     // sext(setcc x, y, cc) -> (select_cc x, y, -1, 0, cc)
+    unsigned ElementWidth = VT.getScalarType().getSizeInBits();
     SDValue NegOne =
-      DAG.getConstant(APInt::getAllOnesValue(VT.getSizeInBits()), VT);
+      DAG.getConstant(APInt::getAllOnesValue(ElementWidth), VT);
     SDValue SCC =
       SimplifySelectCC(N->getDebugLoc(), N0.getOperand(0), N0.getOperand(1),
                        NegOne, DAG.getConstant(0, VT),
@@ -3624,7 +3973,7 @@ SDValue DAGCombiner::ReduceLoadWidth(SDNode *N) {
   // Do not generate loads of non-round integer types since these can
   // be expensive (and would be wrong if the type is not byte sized).
   if (isa<LoadSDNode>(N0) && N0.hasOneUse() && ExtVT.isRound() &&
-      cast<LoadSDNode>(N0)->getMemoryVT().getSizeInBits() > EVTBits &&
+      cast<LoadSDNode>(N0)->getMemoryVT().getSizeInBits() >= EVTBits &&
       // Do not change the width of a volatile load.
       !cast<LoadSDNode>(N0)->isVolatile()) {
     LoadSDNode *LN0 = cast<LoadSDNode>(N0);
@@ -3694,7 +4043,8 @@ SDValue DAGCombiner::visitSIGN_EXTEND_INREG(SDNode *N) {
   // if x is small enough.
   if (N0.getOpcode() == ISD::SIGN_EXTEND || N0.getOpcode() == ISD::ANY_EXTEND) {
     SDValue N00 = N0.getOperand(0);
-    if (N00.getValueType().getScalarType().getSizeInBits() < EVTBits)
+    if (N00.getValueType().getScalarType().getSizeInBits() <= EVTBits &&
+        (!LegalOperations || TLI.isOperationLegal(ISD::SIGN_EXTEND, VT)))
       return DAG.getNode(ISD::SIGN_EXTEND, N->getDebugLoc(), VT, N00, N1);
   }
 
@@ -3779,7 +4129,8 @@ SDValue DAGCombiner::visitTRUNCATE(SDNode *N) {
   if (N0.getOpcode() == ISD::TRUNCATE)
     return DAG.getNode(ISD::TRUNCATE, N->getDebugLoc(), VT, N0.getOperand(0));
   // fold (truncate (ext x)) -> (ext x) or (truncate x) or x
-  if (N0.getOpcode() == ISD::ZERO_EXTEND || N0.getOpcode() == ISD::SIGN_EXTEND||
+  if (N0.getOpcode() == ISD::ZERO_EXTEND ||
+      N0.getOpcode() == ISD::SIGN_EXTEND ||
       N0.getOpcode() == ISD::ANY_EXTEND) {
     if (N0.getOperand(0).getValueType().bitsLT(VT))
       // if the source is smaller than the dest, we still need an extend
@@ -3805,7 +4156,9 @@ SDValue DAGCombiner::visitTRUNCATE(SDNode *N) {
 
   // fold (truncate (load x)) -> (smaller load x)
   // fold (truncate (srl (load x), c)) -> (smaller load (x+c/evtbits))
-  return ReduceLoadWidth(N);
+  if (!LegalTypes || TLI.isTypeDesirableForOp(N0.getOpcode(), VT))
+    return ReduceLoadWidth(N);
+  return SDValue();
 }
 
 static SDNode *getBuildPairElt(SDNode *N, unsigned i) {
@@ -3949,7 +4302,7 @@ SDValue DAGCombiner::visitBIT_CONVERT(SDNode *N) {
       VT.isInteger() && !VT.isVector()) {
     unsigned OrigXWidth = N0.getOperand(1).getValueType().getSizeInBits();
     EVT IntXVT = EVT::getIntegerVT(*DAG.getContext(), OrigXWidth);
-    if (TLI.isTypeLegal(IntXVT) || !LegalTypes) {
+    if (isTypeLegal(IntXVT)) {
       SDValue X = DAG.getNode(ISD::BIT_CONVERT, N0.getDebugLoc(),
                               IntXVT, N0.getOperand(1));
       AddToWorkList(X.getNode());
@@ -4075,8 +4428,8 @@ ConstantFoldBIT_CONVERTofBUILD_VECTOR(SDNode *BV, EVT DstEltVT) {
         if (Op.getOpcode() == ISD::UNDEF) continue;
         EltIsUndef = false;
 
-        NewBits |= (APInt(cast<ConstantSDNode>(Op)->getAPIntValue()).
-                    zextOrTrunc(SrcBitSize).zext(DstBitSize));
+        NewBits |= APInt(cast<ConstantSDNode>(Op)->getAPIntValue()).
+                   zextOrTrunc(SrcBitSize).zext(DstBitSize);
       }
 
       if (EltIsUndef)
@@ -4464,7 +4817,7 @@ SDValue DAGCombiner::visitFP_ROUND_INREG(SDNode *N) {
   ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
 
   // fold (fp_round_inreg c1fp) -> c1fp
-  if (N0CFP && (TLI.isTypeLegal(EVT) || !LegalTypes)) {
+  if (N0CFP && isTypeLegal(EVT)) {
     SDValue Round = DAG.getConstantFP(*N0CFP->getConstantFPValue(), EVT);
     return DAG.getNode(ISD::FP_EXTEND, N->getDebugLoc(), VT, Round);
   }
@@ -4676,7 +5029,7 @@ SDValue DAGCombiner::visitBRCOND(SDNode *N) {
     if (Op0.getOpcode() == Op1.getOpcode()) {
       // Avoid missing important xor optimizations.
       SDValue Tmp = visitXOR(TheXor);
-      if (Tmp.getNode()) {
+      if (Tmp.getNode() && Tmp.getNode() != TheXor) {
         DEBUG(dbgs() << "\nReplacing.8 ";
               TheXor->dump(&DAG);
               dbgs() << "\nWith: ";
@@ -5145,6 +5498,136 @@ SDValue DAGCombiner::visitLOAD(SDNode *N) {
   return SDValue();
 }
 
+/// CheckForMaskedLoad - Check to see if V is (and load (ptr), imm), where the
+/// load is having specific bytes cleared out.  If so, return the byte size
+/// being masked out and the shift amount.
+static std::pair<unsigned, unsigned>
+CheckForMaskedLoad(SDValue V, SDValue Ptr, SDValue Chain) {
+  std::pair<unsigned, unsigned> Result(0, 0);
+  
+  // Check for the structure we're looking for.
+  if (V->getOpcode() != ISD::AND ||
+      !isa<ConstantSDNode>(V->getOperand(1)) ||
+      !ISD::isNormalLoad(V->getOperand(0).getNode()))
+    return Result;
+  
+  // Check the chain and pointer.
+  LoadSDNode *LD = cast<LoadSDNode>(V->getOperand(0));
+  if (LD->getBasePtr() != Ptr) return Result;  // Not from same pointer.
+  
+  // The store should be chained directly to the load or be an operand of a
+  // tokenfactor.
+  if (LD == Chain.getNode())
+    ; // ok.
+  else if (Chain->getOpcode() != ISD::TokenFactor)
+    return Result; // Fail.
+  else {
+    bool isOk = false;
+    for (unsigned i = 0, e = Chain->getNumOperands(); i != e; ++i)
+      if (Chain->getOperand(i).getNode() == LD) {
+        isOk = true;
+        break;
+      }
+    if (!isOk) return Result;
+  }
+  
+  // This only handles simple types.
+  if (V.getValueType() != MVT::i16 &&
+      V.getValueType() != MVT::i32 &&
+      V.getValueType() != MVT::i64)
+    return Result;
+
+  // Check the constant mask.  Invert it so that the bits being masked out are
+  // 0 and the bits being kept are 1.  Use getSExtValue so that leading bits
+  // follow the sign bit for uniformity.
+  uint64_t NotMask = ~cast<ConstantSDNode>(V->getOperand(1))->getSExtValue();
+  unsigned NotMaskLZ = CountLeadingZeros_64(NotMask);
+  if (NotMaskLZ & 7) return Result;  // Must be multiple of a byte.
+  unsigned NotMaskTZ = CountTrailingZeros_64(NotMask);
+  if (NotMaskTZ & 7) return Result;  // Must be multiple of a byte.
+  if (NotMaskLZ == 64) return Result;  // All zero mask.
+  
+  // See if we have a continuous run of bits.  If so, we have 0*1+0*
+  if (CountTrailingOnes_64(NotMask >> NotMaskTZ)+NotMaskTZ+NotMaskLZ != 64)
+    return Result;
+
+  // Adjust NotMaskLZ down to be from the actual size of the int instead of i64.
+  if (V.getValueType() != MVT::i64 && NotMaskLZ)
+    NotMaskLZ -= 64-V.getValueSizeInBits();
+  
+  unsigned MaskedBytes = (V.getValueSizeInBits()-NotMaskLZ-NotMaskTZ)/8;
+  switch (MaskedBytes) {
+  case 1: 
+  case 2: 
+  case 4: break;
+  default: return Result; // All one mask, or 5-byte mask.
+  }
+  
+  // Verify that the first bit starts at a multiple of mask so that the access
+  // is aligned the same as the access width.
+  if (NotMaskTZ && NotMaskTZ/8 % MaskedBytes) return Result;
+  
+  Result.first = MaskedBytes;
+  Result.second = NotMaskTZ/8;
+  return Result;
+}
+
+
+/// ShrinkLoadReplaceStoreWithStore - Check to see if IVal is something that
+/// provides a value as specified by MaskInfo.  If so, replace the specified
+/// store with a narrower store of truncated IVal.
+static SDNode *
+ShrinkLoadReplaceStoreWithStore(const std::pair<unsigned, unsigned> &MaskInfo,
+                                SDValue IVal, StoreSDNode *St,
+                                DAGCombiner *DC) {
+  unsigned NumBytes = MaskInfo.first;
+  unsigned ByteShift = MaskInfo.second;
+  SelectionDAG &DAG = DC->getDAG();
+  
+  // Check to see if IVal is all zeros in the part being masked in by the 'or'
+  // that uses this.  If not, this is not a replacement.
+  APInt Mask = ~APInt::getBitsSet(IVal.getValueSizeInBits(),
+                                  ByteShift*8, (ByteShift+NumBytes)*8);
+  if (!DAG.MaskedValueIsZero(IVal, Mask)) return 0;
+  
+  // Check that it is legal on the target to do this.  It is legal if the new
+  // VT we're shrinking to (i8/i16/i32) is legal or we're still before type
+  // legalization.
+  MVT VT = MVT::getIntegerVT(NumBytes*8);
+  if (!DC->isTypeLegal(VT))
+    return 0;
+  
+  // Okay, we can do this!  Replace the 'St' store with a store of IVal that is
+  // shifted by ByteShift and truncated down to NumBytes.
+  if (ByteShift)
+    IVal = DAG.getNode(ISD::SRL, IVal->getDebugLoc(), IVal.getValueType(), IVal,
+                       DAG.getConstant(ByteShift*8, DC->getShiftAmountTy()));
+
+  // Figure out the offset for the store and the alignment of the access.
+  unsigned StOffset;
+  unsigned NewAlign = St->getAlignment();
+
+  if (DAG.getTargetLoweringInfo().isLittleEndian())
+    StOffset = ByteShift;
+  else
+    StOffset = IVal.getValueType().getStoreSize() - ByteShift - NumBytes;
+  
+  SDValue Ptr = St->getBasePtr();
+  if (StOffset) {
+    Ptr = DAG.getNode(ISD::ADD, IVal->getDebugLoc(), Ptr.getValueType(),
+                      Ptr, DAG.getConstant(StOffset, Ptr.getValueType()));
+    NewAlign = MinAlign(NewAlign, StOffset);
+  }
+  
+  // Truncate down to the new size.
+  IVal = DAG.getNode(ISD::TRUNCATE, IVal->getDebugLoc(), VT, IVal);
+  
+  ++OpsNarrowed;
+  return DAG.getStore(St->getChain(), St->getDebugLoc(), IVal, Ptr, 
+                      St->getSrcValue(), St->getSrcValueOffset()+StOffset,
+                      false, false, NewAlign).getNode();
+}
+
 
 /// ReduceLoadOpStoreWidth - Look for sequence of load / op / store where op is
 /// one of 'or', 'xor', and 'and' of immediates. If 'op' is only touching some
@@ -5164,6 +5647,28 @@ SDValue DAGCombiner::ReduceLoadOpStoreWidth(SDNode *N) {
     return SDValue();
 
   unsigned Opc = Value.getOpcode();
+  
+  // If this is "store (or X, Y), P" and X is "(and (load P), cst)", where cst
+  // is a byte mask indicating a consecutive number of bytes, check to see if
+  // Y is known to provide just those bytes.  If so, we try to replace the
+  // load + replace + store sequence with a single (narrower) store, which makes
+  // the load dead.
+  if (Opc == ISD::OR) {
+    std::pair<unsigned, unsigned> MaskedLoad;
+    MaskedLoad = CheckForMaskedLoad(Value.getOperand(0), Ptr, Chain);
+    if (MaskedLoad.first)
+      if (SDNode *NewST = ShrinkLoadReplaceStoreWithStore(MaskedLoad,
+                                                  Value.getOperand(1), ST,this))
+        return SDValue(NewST, 0);
+                                           
+    // Or is commutative, so try swapping X and Y.
+    MaskedLoad = CheckForMaskedLoad(Value.getOperand(1), Ptr, Chain);
+    if (MaskedLoad.first)
+      if (SDNode *NewST = ShrinkLoadReplaceStoreWithStore(MaskedLoad,
+                                                  Value.getOperand(0), ST,this))
+        return SDValue(NewST, 0);
+  }
+  
   if ((Opc != ISD::OR && Opc != ISD::XOR && Opc != ISD::AND) ||
       Value.getOperand(1).getOpcode() != ISD::Constant)
     return SDValue();
@@ -5211,8 +5716,8 @@ SDValue DAGCombiner::ReduceLoadOpStoreWidth(SDNode *N) {
         PtrOff = (BitWidth + 7 - NewBW) / 8 - PtrOff;
 
       unsigned NewAlign = MinAlign(LD->getAlignment(), PtrOff);
-      if (NewAlign <
-          TLI.getTargetData()->getABITypeAlignment(NewVT.getTypeForEVT(*DAG.getContext())))
+      const Type *NewVTTy = NewVT.getTypeForEVT(*DAG.getContext());
+      if (NewAlign < TLI.getTargetData()->getABITypeAlignment(NewVTTy))
         return SDValue();
 
       SDValue NewPtr = DAG.getNode(ISD::ADD, LD->getDebugLoc(),
@@ -5282,8 +5787,7 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) {
       case MVT::ppcf128:
         break;
       case MVT::f32:
-        if (((TLI.isTypeLegal(MVT::i32) || !LegalTypes) && !LegalOperations &&
-             !ST->isVolatile()) ||
+        if ((isTypeLegal(MVT::i32) && !LegalOperations && !ST->isVolatile()) ||
             TLI.isOperationLegalOrCustom(ISD::STORE, MVT::i32)) {
           Tmp = DAG.getConstant((uint32_t)CFP->getValueAPF().
                               bitcastToAPInt().getZExtValue(), MVT::i32);
@@ -5294,7 +5798,7 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) {
         }
         break;
       case MVT::f64:
-        if (((TLI.isTypeLegal(MVT::i64) || !LegalTypes) && !LegalOperations &&
+        if ((TLI.isTypeLegal(MVT::i64) && !LegalOperations &&
              !ST->isVolatile()) ||
             TLI.isOperationLegalOrCustom(ISD::STORE, MVT::i64)) {
           Tmp = DAG.getConstant(CFP->getValueAPF().bitcastToAPInt().
@@ -5551,7 +6055,7 @@ SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) {
         InVec = InVec.getOperand(0);
       if (ISD::isNormalLoad(InVec.getNode())) {
         LN0 = cast<LoadSDNode>(InVec);
-        Elt = (Idx < (int)NumElems) ? Idx : Idx - NumElems;
+        Elt = (Idx < (int)NumElems) ? Idx : Idx - (int)NumElems;
       }
     }
 
@@ -5659,7 +6163,7 @@ SDValue DAGCombiner::visitBUILD_VECTOR(SDNode *N) {
     }
 
     // Add count and size info.
-    if (!TLI.isTypeLegal(VT) && LegalTypes)
+    if (!isTypeLegal(VT))
       return SDValue();
 
     // Return the new VECTOR_SHUFFLE node.
@@ -6287,7 +6791,7 @@ SDValue DAGCombiner::BuildUDIV(SDNode *N) {
 /// FindBaseOffset - Return true if base is a frame index, which is known not
 // to alias with anything but itself.  Provides base object and offset as results.
 static bool FindBaseOffset(SDValue Ptr, SDValue &Base, int64_t &Offset,
-                           GlobalValue *&GV, void *&CV) {
+                           const GlobalValue *&GV, void *&CV) {
   // Assume it is a primitive operation.
   Base = Ptr; Offset = 0; GV = 0; CV = 0;
 
@@ -6335,7 +6839,7 @@ bool DAGCombiner::isAlias(SDValue Ptr1, int64_t Size1,
   // Gather base node and offset information.
   SDValue Base1, Base2;
   int64_t Offset1, Offset2;
-  GlobalValue *GV1, *GV2;
+  const GlobalValue *GV1, *GV2;
   void *CV1, *CV2;
   bool isFrameIndex1 = FindBaseOffset(Ptr1, Base1, Offset1, GV1, CV1);
   bool isFrameIndex2 = FindBaseOffset(Ptr2, Base2, Offset2, GV2, CV2);
diff --git a/lib/CodeGen/SelectionDAG/FastISel.cpp b/lib/CodeGen/SelectionDAG/FastISel.cpp
index 4bf41f28c250..b4c38332691c 100644
--- a/lib/CodeGen/SelectionDAG/FastISel.cpp
+++ b/lib/CodeGen/SelectionDAG/FastISel.cpp
@@ -1,4 +1,4 @@
-///===-- FastISel.cpp - Implementation of the FastISel class --------------===//
+//===-- FastISel.cpp - Implementation of the FastISel class ---------------===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -52,10 +52,11 @@
 #include "llvm/Target/TargetInstrInfo.h"
 #include "llvm/Target/TargetLowering.h"
 #include "llvm/Target/TargetMachine.h"
+#include "llvm/Support/ErrorHandling.h"
 #include "FunctionLoweringInfo.h"
 using namespace llvm;
 
-unsigned FastISel::getRegForValue(Value *V) {
+unsigned FastISel::getRegForValue(const Value *V) {
   EVT RealVT = TLI.getValueType(V->getType(), /*AllowUnknown=*/true);
   // Don't handle non-simple values in FastISel.
   if (!RealVT.isSimple())
@@ -83,7 +84,16 @@ unsigned FastISel::getRegForValue(Value *V) {
   if (Reg != 0)
     return Reg;
 
-  if (ConstantInt *CI = dyn_cast<ConstantInt>(V)) {
+  return materializeRegForValue(V, VT);
+}
+
+/// materializeRegForValue - Helper for getRegForVale. This function is
+/// called when the value isn't already available in a register and must
+/// be materialized with new instructions.
+unsigned FastISel::materializeRegForValue(const Value *V, MVT VT) {
+  unsigned Reg = 0;
+
+  if (const ConstantInt *CI = dyn_cast<ConstantInt>(V)) {
     if (CI->getValue().getActiveBits() <= 64)
       Reg = FastEmit_i(VT, VT, ISD::Constant, CI->getZExtValue());
   } else if (isa<AllocaInst>(V)) {
@@ -93,10 +103,12 @@ unsigned FastISel::getRegForValue(Value *V) {
     // local-CSE'd with actual integer zeros.
     Reg =
       getRegForValue(Constant::getNullValue(TD.getIntPtrType(V->getContext())));
-  } else if (ConstantFP *CF = dyn_cast<ConstantFP>(V)) {
+  } else if (const ConstantFP *CF = dyn_cast<ConstantFP>(V)) {
+    // Try to emit the constant directly.
     Reg = FastEmit_f(VT, VT, ISD::ConstantFP, CF);
 
     if (!Reg) {
+      // Try to emit the constant by using an integer constant with a cast.
       const APFloat &Flt = CF->getValueAPF();
       EVT IntVT = TLI.getPointerTy();
 
@@ -114,9 +126,9 @@ unsigned FastISel::getRegForValue(Value *V) {
           Reg = FastEmit_r(IntVT.getSimpleVT(), VT, ISD::SINT_TO_FP, IntegerReg);
       }
     }
-  } else if (ConstantExpr *CE = dyn_cast<ConstantExpr>(V)) {
-    if (!SelectOperator(CE, CE->getOpcode())) return 0;
-    Reg = LocalValueMap[CE];
+  } else if (const Operator *Op = dyn_cast<Operator>(V)) {
+    if (!SelectOperator(Op, Op->getOpcode())) return 0;
+    Reg = LocalValueMap[Op];
   } else if (isa<UndefValue>(V)) {
     Reg = createResultReg(TLI.getRegClassFor(VT));
     BuildMI(MBB, DL, TII.get(TargetOpcode::IMPLICIT_DEF), Reg);
@@ -134,11 +146,11 @@ unsigned FastISel::getRegForValue(Value *V) {
   return Reg;
 }
 
-unsigned FastISel::lookUpRegForValue(Value *V) {
+unsigned FastISel::lookUpRegForValue(const Value *V) {
   // Look up the value to see if we already have a register for it. We
   // cache values defined by Instructions across blocks, and other values
   // only locally. This is because Instructions already have the SSA
-  // def-dominatess-use requirement enforced.
+  // def-dominates-use requirement enforced.
   if (ValueMap.count(V))
     return ValueMap[V];
   return LocalValueMap[V];
@@ -150,7 +162,7 @@ unsigned FastISel::lookUpRegForValue(Value *V) {
 /// NOTE: This is only necessary because we might select a block that uses
 /// a value before we select the block that defines the value.  It might be
 /// possible to fix this by selecting blocks in reverse postorder.
-unsigned FastISel::UpdateValueMap(Value* I, unsigned Reg) {
+unsigned FastISel::UpdateValueMap(const Value *I, unsigned Reg) {
   if (!isa<Instruction>(I)) {
     LocalValueMap[I] = Reg;
     return Reg;
@@ -167,7 +179,7 @@ unsigned FastISel::UpdateValueMap(Value* I, unsigned Reg) {
   return AssignedReg;
 }
 
-unsigned FastISel::getRegForGEPIndex(Value *Idx) {
+unsigned FastISel::getRegForGEPIndex(const Value *Idx) {
   unsigned IdxN = getRegForValue(Idx);
   if (IdxN == 0)
     // Unhandled operand. Halt "fast" selection and bail.
@@ -186,7 +198,7 @@ unsigned FastISel::getRegForGEPIndex(Value *Idx) {
 /// SelectBinaryOp - Select and emit code for a binary operator instruction,
 /// which has an opcode which directly corresponds to the given ISD opcode.
 ///
-bool FastISel::SelectBinaryOp(User *I, unsigned ISDOpcode) {
+bool FastISel::SelectBinaryOp(const User *I, unsigned ISDOpcode) {
   EVT VT = EVT::getEVT(I->getType(), /*HandleUnknown=*/true);
   if (VT == MVT::Other || !VT.isSimple())
     // Unhandled type. Halt "fast" selection and bail.
@@ -252,7 +264,7 @@ bool FastISel::SelectBinaryOp(User *I, unsigned ISDOpcode) {
   return true;
 }
 
-bool FastISel::SelectGetElementPtr(User *I) {
+bool FastISel::SelectGetElementPtr(const User *I) {
   unsigned N = getRegForValue(I->getOperand(0));
   if (N == 0)
     // Unhandled operand. Halt "fast" selection and bail.
@@ -260,9 +272,9 @@ bool FastISel::SelectGetElementPtr(User *I) {
 
   const Type *Ty = I->getOperand(0)->getType();
   MVT VT = TLI.getPointerTy();
-  for (GetElementPtrInst::op_iterator OI = I->op_begin()+1, E = I->op_end();
-       OI != E; ++OI) {
-    Value *Idx = *OI;
+  for (GetElementPtrInst::const_op_iterator OI = I->op_begin()+1,
+       E = I->op_end(); OI != E; ++OI) {
+    const Value *Idx = *OI;
     if (const StructType *StTy = dyn_cast<StructType>(Ty)) {
       unsigned Field = cast<ConstantInt>(Idx)->getZExtValue();
       if (Field) {
@@ -280,7 +292,7 @@ bool FastISel::SelectGetElementPtr(User *I) {
       Ty = cast<SequentialType>(Ty)->getElementType();
 
       // If this is a constant subscript, handle it quickly.
-      if (ConstantInt *CI = dyn_cast<ConstantInt>(Idx)) {
+      if (const ConstantInt *CI = dyn_cast<ConstantInt>(Idx)) {
         if (CI->getZExtValue() == 0) continue;
         uint64_t Offs = 
           TD.getTypeAllocSize(Ty)*cast<ConstantInt>(CI)->getSExtValue();
@@ -316,51 +328,56 @@ bool FastISel::SelectGetElementPtr(User *I) {
   return true;
 }
 
-bool FastISel::SelectCall(User *I) {
-  Function *F = cast<CallInst>(I)->getCalledFunction();
+bool FastISel::SelectCall(const User *I) {
+  const Function *F = cast<CallInst>(I)->getCalledFunction();
   if (!F) return false;
 
+  // Handle selected intrinsic function calls.
   unsigned IID = F->getIntrinsicID();
   switch (IID) {
   default: break;
   case Intrinsic::dbg_declare: {
-    DbgDeclareInst *DI = cast<DbgDeclareInst>(I);
+    const DbgDeclareInst *DI = cast<DbgDeclareInst>(I);
     if (!DIDescriptor::ValidDebugInfo(DI->getVariable(), CodeGenOpt::None) ||
         !MF.getMMI().hasDebugInfo())
       return true;
 
-    Value *Address = DI->getAddress();
+    const Value *Address = DI->getAddress();
     if (!Address)
       return true;
-    AllocaInst *AI = dyn_cast<AllocaInst>(Address);
+    if (isa<UndefValue>(Address))
+      return true;
+    const AllocaInst *AI = dyn_cast<AllocaInst>(Address);
     // Don't handle byval struct arguments or VLAs, for example.
-    if (!AI) break;
-    DenseMap<const AllocaInst*, int>::iterator SI =
-      StaticAllocaMap.find(AI);
-    if (SI == StaticAllocaMap.end()) break; // VLAs.
-    int FI = SI->second;
-    if (!DI->getDebugLoc().isUnknown())
-      MF.getMMI().setVariableDbgInfo(DI->getVariable(), FI, DI->getDebugLoc());
-    
-    // Building the map above is target independent.  Generating DBG_VALUE
-    // inline is target dependent; do this now.
-    (void)TargetSelectInstruction(cast<Instruction>(I));
+    // Note that if we have a byval struct argument, fast ISel is turned off;
+    // those are handled in SelectionDAGBuilder.
+    if (AI) {
+      DenseMap<const AllocaInst*, int>::iterator SI =
+        StaticAllocaMap.find(AI);
+      if (SI == StaticAllocaMap.end()) break; // VLAs.
+      int FI = SI->second;
+      if (!DI->getDebugLoc().isUnknown())
+        MF.getMMI().setVariableDbgInfo(DI->getVariable(), FI, DI->getDebugLoc());
+    } else
+      // Building the map above is target independent.  Generating DBG_VALUE
+      // inline is target dependent; do this now.
+      (void)TargetSelectInstruction(cast<Instruction>(I));
     return true;
   }
   case Intrinsic::dbg_value: {
-    // This requires target support, but right now X86 is the only Fast target.
-    DbgValueInst *DI = cast<DbgValueInst>(I);
+    // This form of DBG_VALUE is target-independent.
+    const DbgValueInst *DI = cast<DbgValueInst>(I);
     const TargetInstrDesc &II = TII.get(TargetOpcode::DBG_VALUE);
-    Value *V = DI->getValue();
+    const Value *V = DI->getValue();
     if (!V) {
       // Currently the optimizer can produce this; insert an undef to
       // help debugging.  Probably the optimizer should not do this.
       BuildMI(MBB, DL, II).addReg(0U).addImm(DI->getOffset()).
                                      addMetadata(DI->getVariable());
-    } else if (ConstantInt *CI = dyn_cast<ConstantInt>(V)) {
+    } else if (const ConstantInt *CI = dyn_cast<ConstantInt>(V)) {
       BuildMI(MBB, DL, II).addImm(CI->getZExtValue()).addImm(DI->getOffset()).
                                      addMetadata(DI->getVariable());
-    } else if (ConstantFP *CF = dyn_cast<ConstantFP>(V)) {
+    } else if (const ConstantFP *CF = dyn_cast<ConstantFP>(V)) {
       BuildMI(MBB, DL, II).addFPImm(CF).addImm(DI->getOffset()).
                                      addMetadata(DI->getVariable());
     } else if (unsigned Reg = lookUpRegForValue(V)) {
@@ -438,10 +455,12 @@ bool FastISel::SelectCall(User *I) {
     break;
   }
   }
+
+  // An arbitrary call. Bail.
   return false;
 }
 
-bool FastISel::SelectCast(User *I, unsigned Opcode) {
+bool FastISel::SelectCast(const User *I, unsigned Opcode) {
   EVT SrcVT = TLI.getValueType(I->getOperand(0)->getType());
   EVT DstVT = TLI.getValueType(I->getType());
     
@@ -493,7 +512,7 @@ bool FastISel::SelectCast(User *I, unsigned Opcode) {
   return true;
 }
 
-bool FastISel::SelectBitCast(User *I) {
+bool FastISel::SelectBitCast(const User *I) {
   // If the bitcast doesn't change the type, just use the operand value.
   if (I->getType() == I->getOperand(0)->getType()) {
     unsigned Reg = getRegForValue(I->getOperand(0));
@@ -544,15 +563,28 @@ bool FastISel::SelectBitCast(User *I) {
 }
 
 bool
-FastISel::SelectInstruction(Instruction *I) {
+FastISel::SelectInstruction(const Instruction *I) {
+  // Just before the terminator instruction, insert instructions to
+  // feed PHI nodes in successor blocks.
+  if (isa<TerminatorInst>(I))
+    if (!HandlePHINodesInSuccessorBlocks(I->getParent()))
+      return false;
+
+  DL = I->getDebugLoc();
+
   // First, try doing target-independent selection.
-  if (SelectOperator(I, I->getOpcode()))
+  if (SelectOperator(I, I->getOpcode())) {
+    DL = DebugLoc();
     return true;
+  }
 
   // Next, try calling the target to attempt to handle the instruction.
-  if (TargetSelectInstruction(I))
+  if (TargetSelectInstruction(I)) {
+    DL = DebugLoc();
     return true;
+  }
 
+  DL = DebugLoc();
   return false;
 }
 
@@ -573,7 +605,7 @@ FastISel::FastEmitBranch(MachineBasicBlock *MSucc) {
 /// SelectFNeg - Emit an FNeg operation.
 ///
 bool
-FastISel::SelectFNeg(User *I) {
+FastISel::SelectFNeg(const User *I) {
   unsigned OpReg = getRegForValue(BinaryOperator::getFNegArgument(I));
   if (OpReg == 0) return false;
 
@@ -614,7 +646,7 @@ FastISel::SelectFNeg(User *I) {
 }
 
 bool
-FastISel::SelectOperator(User *I, unsigned Opcode) {
+FastISel::SelectOperator(const User *I, unsigned Opcode) {
   switch (Opcode) {
   case Instruction::Add:
     return SelectBinaryOp(I, ISD::ADD);
@@ -660,10 +692,10 @@ FastISel::SelectOperator(User *I, unsigned Opcode) {
     return SelectGetElementPtr(I);
 
   case Instruction::Br: {
-    BranchInst *BI = cast<BranchInst>(I);
+    const BranchInst *BI = cast<BranchInst>(I);
 
     if (BI->isUnconditional()) {
-      BasicBlock *LLVMSucc = BI->getSuccessor(0);
+      const BasicBlock *LLVMSucc = BI->getSuccessor(0);
       MachineBasicBlock *MSucc = MBBMap[LLVMSucc];
       FastEmitBranch(MSucc);
       return true;
@@ -678,10 +710,6 @@ FastISel::SelectOperator(User *I, unsigned Opcode) {
     // Nothing to emit.
     return true;
 
-  case Instruction::PHI:
-    // PHI nodes are already emitted.
-    return true;
-
   case Instruction::Alloca:
     // FunctionLowering has the static-sized case covered.
     if (StaticAllocaMap.count(cast<AllocaInst>(I)))
@@ -721,6 +749,9 @@ FastISel::SelectOperator(User *I, unsigned Opcode) {
     return true;
   }
 
+  case Instruction::PHI:
+    llvm_unreachable("FastISel shouldn't visit PHI nodes!");
+
   default:
     // Unhandled instruction. Halt "fast" selection and bail.
     return false;
@@ -730,15 +761,17 @@ FastISel::SelectOperator(User *I, unsigned Opcode) {
 FastISel::FastISel(MachineFunction &mf,
                    DenseMap<const Value *, unsigned> &vm,
                    DenseMap<const BasicBlock *, MachineBasicBlock *> &bm,
-                   DenseMap<const AllocaInst *, int> &am
+                   DenseMap<const AllocaInst *, int> &am,
+                   std::vector<std::pair<MachineInstr*, unsigned> > &pn
 #ifndef NDEBUG
-                   , SmallSet<Instruction*, 8> &cil
+                   , SmallSet<const Instruction *, 8> &cil
 #endif
                    )
   : MBB(0),
     ValueMap(vm),
     MBBMap(bm),
     StaticAllocaMap(am),
+    PHINodesToUpdate(pn),
 #ifndef NDEBUG
     CatchInfoLost(cil),
 #endif
@@ -775,7 +808,7 @@ unsigned FastISel::FastEmit_i(MVT, MVT, unsigned, uint64_t /*Imm*/) {
 }
 
 unsigned FastISel::FastEmit_f(MVT, MVT,
-                              unsigned, ConstantFP * /*FPImm*/) {
+                              unsigned, const ConstantFP * /*FPImm*/) {
   return 0;
 }
 
@@ -787,7 +820,7 @@ unsigned FastISel::FastEmit_ri(MVT, MVT,
 
 unsigned FastISel::FastEmit_rf(MVT, MVT,
                                unsigned, unsigned /*Op0*/,
-                               ConstantFP * /*FPImm*/) {
+                               const ConstantFP * /*FPImm*/) {
   return 0;
 }
 
@@ -820,7 +853,7 @@ unsigned FastISel::FastEmit_ri_(MVT VT, unsigned Opcode,
 /// FastEmit_rf. If that fails, it materializes the immediate into a register
 /// and try FastEmit_rr instead.
 unsigned FastISel::FastEmit_rf_(MVT VT, unsigned Opcode,
-                                unsigned Op0, ConstantFP *FPImm,
+                                unsigned Op0, const ConstantFP *FPImm,
                                 MVT ImmType) {
   // First check if immediate type is legal. If not, we can't use the rf form.
   unsigned ResultReg = FastEmit_rf(VT, VT, Opcode, Op0, FPImm);
@@ -930,7 +963,7 @@ unsigned FastISel::FastEmitInst_ri(unsigned MachineInstOpcode,
 
 unsigned FastISel::FastEmitInst_rf(unsigned MachineInstOpcode,
                                    const TargetRegisterClass *RC,
-                                   unsigned Op0, ConstantFP *FPImm) {
+                                   unsigned Op0, const ConstantFP *FPImm) {
   unsigned ResultReg = createResultReg(RC);
   const TargetInstrDesc &II = TII.get(MachineInstOpcode);
 
@@ -1006,3 +1039,67 @@ unsigned FastISel::FastEmitInst_extractsubreg(MVT RetVT,
 unsigned FastISel::FastEmitZExtFromI1(MVT VT, unsigned Op) {
   return FastEmit_ri(VT, VT, ISD::AND, Op, 1);
 }
+
+/// HandlePHINodesInSuccessorBlocks - Handle PHI nodes in successor blocks.
+/// Emit code to ensure constants are copied into registers when needed.
+/// Remember the virtual registers that need to be added to the Machine PHI
+/// nodes as input.  We cannot just directly add them, because expansion
+/// might result in multiple MBB's for one BB.  As such, the start of the
+/// BB might correspond to a different MBB than the end.
+bool FastISel::HandlePHINodesInSuccessorBlocks(const BasicBlock *LLVMBB) {
+  const TerminatorInst *TI = LLVMBB->getTerminator();
+
+  SmallPtrSet<MachineBasicBlock *, 4> SuccsHandled;
+  unsigned OrigNumPHINodesToUpdate = PHINodesToUpdate.size();
+
+  // Check successor nodes' PHI nodes that expect a constant to be available
+  // from this block.
+  for (unsigned succ = 0, e = TI->getNumSuccessors(); succ != e; ++succ) {
+    const BasicBlock *SuccBB = TI->getSuccessor(succ);
+    if (!isa<PHINode>(SuccBB->begin())) continue;
+    MachineBasicBlock *SuccMBB = MBBMap[SuccBB];
+
+    // If this terminator has multiple identical successors (common for
+    // switches), only handle each succ once.
+    if (!SuccsHandled.insert(SuccMBB)) continue;
+
+    MachineBasicBlock::iterator MBBI = SuccMBB->begin();
+
+    // At this point we know that there is a 1-1 correspondence between LLVM PHI
+    // nodes and Machine PHI nodes, but the incoming operands have not been
+    // emitted yet.
+    for (BasicBlock::const_iterator I = SuccBB->begin();
+         const PHINode *PN = dyn_cast<PHINode>(I); ++I) {
+      // Ignore dead phi's.
+      if (PN->use_empty()) continue;
+
+      // Only handle legal types. Two interesting things to note here. First,
+      // by bailing out early, we may leave behind some dead instructions,
+      // since SelectionDAG's HandlePHINodesInSuccessorBlocks will insert its
+      // own moves. Second, this check is necessary becuase FastISel doesn't
+      // use CreateRegForValue to create registers, so it always creates
+      // exactly one register for each non-void instruction.
+      EVT VT = TLI.getValueType(PN->getType(), /*AllowUnknown=*/true);
+      if (VT == MVT::Other || !TLI.isTypeLegal(VT)) {
+        // Promote MVT::i1.
+        if (VT == MVT::i1)
+          VT = TLI.getTypeToTransformTo(LLVMBB->getContext(), VT);
+        else {
+          PHINodesToUpdate.resize(OrigNumPHINodesToUpdate);
+          return false;
+        }
+      }
+
+      const Value *PHIOp = PN->getIncomingValueForBlock(LLVMBB);
+
+      unsigned Reg = getRegForValue(PHIOp);
+      if (Reg == 0) {
+        PHINodesToUpdate.resize(OrigNumPHINodesToUpdate);
+        return false;
+      }
+      PHINodesToUpdate.push_back(std::make_pair(MBBI++, Reg));
+    }
+  }
+
+  return true;
+}
diff --git a/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp b/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp
index 4fb2aa299cbe..65c36c1289db 100644
--- a/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp
+++ b/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp
@@ -14,19 +14,18 @@
 
 #define DEBUG_TYPE "function-lowering-info"
 #include "FunctionLoweringInfo.h"
-#include "llvm/CallingConv.h"
 #include "llvm/DerivedTypes.h"
 #include "llvm/Function.h"
 #include "llvm/Instructions.h"
 #include "llvm/IntrinsicInst.h"
 #include "llvm/LLVMContext.h"
 #include "llvm/Module.h"
+#include "llvm/CodeGen/Analysis.h"
 #include "llvm/CodeGen/MachineFunction.h"
 #include "llvm/CodeGen/MachineFrameInfo.h"
 #include "llvm/CodeGen/MachineInstrBuilder.h"
 #include "llvm/CodeGen/MachineModuleInfo.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
-#include "llvm/Analysis/DebugInfo.h"
 #include "llvm/Target/TargetRegisterInfo.h"
 #include "llvm/Target/TargetData.h"
 #include "llvm/Target/TargetFrameInfo.h"
@@ -34,99 +33,21 @@
 #include "llvm/Target/TargetIntrinsicInfo.h"
 #include "llvm/Target/TargetLowering.h"
 #include "llvm/Target/TargetOptions.h"
-#include "llvm/Support/Compiler.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/MathExtras.h"
-#include "llvm/Support/raw_ostream.h"
 #include <algorithm>
 using namespace llvm;
 
-/// ComputeLinearIndex - Given an LLVM IR aggregate type and a sequence
-/// of insertvalue or extractvalue indices that identify a member, return
-/// the linearized index of the start of the member.
-///
-unsigned llvm::ComputeLinearIndex(const TargetLowering &TLI, const Type *Ty,
-                                  const unsigned *Indices,
-                                  const unsigned *IndicesEnd,
-                                  unsigned CurIndex) {
-  // Base case: We're done.
-  if (Indices && Indices == IndicesEnd)
-    return CurIndex;
-
-  // Given a struct type, recursively traverse the elements.
-  if (const StructType *STy = dyn_cast<StructType>(Ty)) {
-    for (StructType::element_iterator EB = STy->element_begin(),
-                                      EI = EB,
-                                      EE = STy->element_end();
-        EI != EE; ++EI) {
-      if (Indices && *Indices == unsigned(EI - EB))
-        return ComputeLinearIndex(TLI, *EI, Indices+1, IndicesEnd, CurIndex);
-      CurIndex = ComputeLinearIndex(TLI, *EI, 0, 0, CurIndex);
-    }
-    return CurIndex;
-  }
-  // Given an array type, recursively traverse the elements.
-  else if (const ArrayType *ATy = dyn_cast<ArrayType>(Ty)) {
-    const Type *EltTy = ATy->getElementType();
-    for (unsigned i = 0, e = ATy->getNumElements(); i != e; ++i) {
-      if (Indices && *Indices == i)
-        return ComputeLinearIndex(TLI, EltTy, Indices+1, IndicesEnd, CurIndex);
-      CurIndex = ComputeLinearIndex(TLI, EltTy, 0, 0, CurIndex);
-    }
-    return CurIndex;
-  }
-  // We haven't found the type we're looking for, so keep searching.
-  return CurIndex + 1;
-}
-
-/// ComputeValueVTs - Given an LLVM IR type, compute a sequence of
-/// EVTs that represent all the individual underlying
-/// non-aggregate types that comprise it.
-///
-/// If Offsets is non-null, it points to a vector to be filled in
-/// with the in-memory offsets of each of the individual values.
-///
-void llvm::ComputeValueVTs(const TargetLowering &TLI, const Type *Ty,
-                           SmallVectorImpl<EVT> &ValueVTs,
-                           SmallVectorImpl<uint64_t> *Offsets,
-                           uint64_t StartingOffset) {
-  // Given a struct type, recursively traverse the elements.
-  if (const StructType *STy = dyn_cast<StructType>(Ty)) {
-    const StructLayout *SL = TLI.getTargetData()->getStructLayout(STy);
-    for (StructType::element_iterator EB = STy->element_begin(),
-                                      EI = EB,
-                                      EE = STy->element_end();
-         EI != EE; ++EI)
-      ComputeValueVTs(TLI, *EI, ValueVTs, Offsets,
-                      StartingOffset + SL->getElementOffset(EI - EB));
-    return;
-  }
-  // Given an array type, recursively traverse the elements.
-  if (const ArrayType *ATy = dyn_cast<ArrayType>(Ty)) {
-    const Type *EltTy = ATy->getElementType();
-    uint64_t EltSize = TLI.getTargetData()->getTypeAllocSize(EltTy);
-    for (unsigned i = 0, e = ATy->getNumElements(); i != e; ++i)
-      ComputeValueVTs(TLI, EltTy, ValueVTs, Offsets,
-                      StartingOffset + i * EltSize);
-    return;
-  }
-  // Interpret void as zero return values.
-  if (Ty->isVoidTy())
-    return;
-  // Base case: we can get an EVT for this LLVM IR type.
-  ValueVTs.push_back(TLI.getValueType(Ty));
-  if (Offsets)
-    Offsets->push_back(StartingOffset);
-}
-
 /// isUsedOutsideOfDefiningBlock - Return true if this instruction is used by
 /// PHI nodes or outside of the basic block that defines it, or used by a
 /// switch or atomic instruction, which may expand to multiple basic blocks.
-static bool isUsedOutsideOfDefiningBlock(Instruction *I) {
+static bool isUsedOutsideOfDefiningBlock(const Instruction *I) {
+  if (I->use_empty()) return false;
   if (isa<PHINode>(I)) return true;
-  BasicBlock *BB = I->getParent();
-  for (Value::use_iterator UI = I->use_begin(), E = I->use_end(); UI != E; ++UI)
+  const BasicBlock *BB = I->getParent();
+  for (Value::const_use_iterator UI = I->use_begin(), E = I->use_end();
+        UI != E; ++UI)
     if (cast<Instruction>(*UI)->getParent() != BB || isa<PHINode>(*UI))
       return true;
   return false;
@@ -135,26 +56,25 @@ static bool isUsedOutsideOfDefiningBlock(Instruction *I) {
 /// isOnlyUsedInEntryBlock - If the specified argument is only used in the
 /// entry block, return true.  This includes arguments used by switches, since
 /// the switch may expand into multiple basic blocks.
-static bool isOnlyUsedInEntryBlock(Argument *A, bool EnableFastISel) {
+static bool isOnlyUsedInEntryBlock(const Argument *A, bool EnableFastISel) {
   // With FastISel active, we may be splitting blocks, so force creation
   // of virtual registers for all non-dead arguments.
-  // Don't force virtual registers for byval arguments though, because
-  // fast-isel can't handle those in all cases.
-  if (EnableFastISel && !A->hasByValAttr())
+  if (EnableFastISel)
     return A->use_empty();
 
-  BasicBlock *Entry = A->getParent()->begin();
-  for (Value::use_iterator UI = A->use_begin(), E = A->use_end(); UI != E; ++UI)
+  const BasicBlock *Entry = A->getParent()->begin();
+  for (Value::const_use_iterator UI = A->use_begin(), E = A->use_end();
+       UI != E; ++UI)
     if (cast<Instruction>(*UI)->getParent() != Entry || isa<SwitchInst>(*UI))
       return false;  // Use not in entry block.
   return true;
 }
 
-FunctionLoweringInfo::FunctionLoweringInfo(TargetLowering &tli)
+FunctionLoweringInfo::FunctionLoweringInfo(const TargetLowering &tli)
   : TLI(tli) {
 }
 
-void FunctionLoweringInfo::set(Function &fn, MachineFunction &mf,
+void FunctionLoweringInfo::set(const Function &fn, MachineFunction &mf,
                                bool EnableFastISel) {
   Fn = &fn;
   MF = &mf;
@@ -162,7 +82,7 @@ void FunctionLoweringInfo::set(Function &fn, MachineFunction &mf,
 
   // Create a vreg for each argument register that is not dead and is used
   // outside of the entry block for the function.
-  for (Function::arg_iterator AI = Fn->arg_begin(), E = Fn->arg_end();
+  for (Function::const_arg_iterator AI = Fn->arg_begin(), E = Fn->arg_end();
        AI != E; ++AI)
     if (!isOnlyUsedInEntryBlock(AI, EnableFastISel))
       InitializeRegForValue(AI);
@@ -170,10 +90,10 @@ void FunctionLoweringInfo::set(Function &fn, MachineFunction &mf,
   // Initialize the mapping of values to registers.  This is only set up for
   // instruction values that are used outside of the block that defines
   // them.
-  Function::iterator BB = Fn->begin(), EB = Fn->end();
-  for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I)
-    if (AllocaInst *AI = dyn_cast<AllocaInst>(I))
-      if (ConstantInt *CUI = dyn_cast<ConstantInt>(AI->getArraySize())) {
+  Function::const_iterator BB = Fn->begin(), EB = Fn->end();
+  for (BasicBlock::const_iterator I = BB->begin(), E = BB->end(); I != E; ++I)
+    if (const AllocaInst *AI = dyn_cast<AllocaInst>(I))
+      if (const ConstantInt *CUI = dyn_cast<ConstantInt>(AI->getArraySize())) {
         const Type *Ty = AI->getAllocatedType();
         uint64_t TySize = TLI.getTargetData()->getTypeAllocSize(Ty);
         unsigned Align =
@@ -187,8 +107,8 @@ void FunctionLoweringInfo::set(Function &fn, MachineFunction &mf,
       }
 
   for (; BB != EB; ++BB)
-    for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I)
-      if (!I->use_empty() && isUsedOutsideOfDefiningBlock(I))
+    for (BasicBlock::const_iterator I = BB->begin(), E = BB->end(); I != E; ++I)
+      if (isUsedOutsideOfDefiningBlock(I))
         if (!isa<AllocaInst>(I) ||
             !StaticAllocaMap.count(cast<AllocaInst>(I)))
           InitializeRegForValue(I);
@@ -196,7 +116,7 @@ void FunctionLoweringInfo::set(Function &fn, MachineFunction &mf,
   // Create an initial MachineBasicBlock for each LLVM BasicBlock in F.  This
   // also creates the initial PHI MachineInstrs, though none of the input
   // operands are populated.
-  for (BB = Fn->begin(), EB = Fn->end(); BB != EB; ++BB) {
+  for (BB = Fn->begin(); BB != EB; ++BB) {
     MachineBasicBlock *MBB = mf.CreateMachineBasicBlock(BB);
     MBBMap[BB] = MBB;
     MF->push_back(MBB);
@@ -209,14 +129,11 @@ void FunctionLoweringInfo::set(Function &fn, MachineFunction &mf,
 
     // Create Machine PHI nodes for LLVM PHI nodes, lowering them as
     // appropriate.
-    PHINode *PN;
-    DebugLoc DL;
-    for (BasicBlock::iterator
-           I = BB->begin(), E = BB->end(); I != E; ++I) {
-
-      PN = dyn_cast<PHINode>(I);
-      if (!PN || PN->use_empty()) continue;
+    for (BasicBlock::const_iterator I = BB->begin();
+         const PHINode *PN = dyn_cast<PHINode>(I); ++I) {
+      if (PN->use_empty()) continue;
 
+      DebugLoc DL = PN->getDebugLoc();
       unsigned PHIReg = ValueMap[PN];
       assert(PHIReg && "PHI node does not have an assigned virtual register!");
 
@@ -232,12 +149,20 @@ void FunctionLoweringInfo::set(Function &fn, MachineFunction &mf,
       }
     }
   }
+
+  // Mark landing pad blocks.
+  for (BB = Fn->begin(); BB != EB; ++BB)
+    if (const InvokeInst *Invoke = dyn_cast<InvokeInst>(BB->getTerminator()))
+      MBBMap[Invoke->getSuccessor(1)]->setIsLandingPad();
 }
 
 /// clear - Clear out all the function-specific state. This returns this
 /// FunctionLoweringInfo to an empty state, ready to be used for a
 /// different function.
 void FunctionLoweringInfo::clear() {
+  assert(CatchInfoFound.size() == CatchInfoLost.size() &&
+         "Not all catch info was assigned to a landing pad!");
+
   MBBMap.clear();
   ValueMap.clear();
   StaticAllocaMap.clear();
@@ -246,6 +171,7 @@ void FunctionLoweringInfo::clear() {
   CatchInfoFound.clear();
 #endif
   LiveOutRegInfo.clear();
+  ArgDbgValues.clear();
 }
 
 unsigned FunctionLoweringInfo::MakeReg(EVT VT) {
@@ -277,30 +203,12 @@ unsigned FunctionLoweringInfo::CreateRegForValue(const Value *V) {
   return FirstReg;
 }
 
-/// ExtractTypeInfo - Returns the type info, possibly bitcast, encoded in V.
-GlobalVariable *llvm::ExtractTypeInfo(Value *V) {
-  V = V->stripPointerCasts();
-  GlobalVariable *GV = dyn_cast<GlobalVariable>(V);
-
-  if (GV && GV->getName() == ".llvm.eh.catch.all.value") {
-    assert(GV->hasInitializer() &&
-           "The EH catch-all value must have an initializer");
-    Value *Init = GV->getInitializer();
-    GV = dyn_cast<GlobalVariable>(Init);
-    if (!GV) V = cast<ConstantPointerNull>(Init);
-  }
-
-  assert((GV || isa<ConstantPointerNull>(V)) &&
-         "TypeInfo must be a global variable or NULL");
-  return GV;
-}
-
 /// AddCatchInfo - Extract the personality and type infos from an eh.selector
 /// call, and add them to the specified machine basic block.
-void llvm::AddCatchInfo(CallInst &I, MachineModuleInfo *MMI,
+void llvm::AddCatchInfo(const CallInst &I, MachineModuleInfo *MMI,
                         MachineBasicBlock *MBB) {
   // Inform the MachineModuleInfo of the personality for this landing pad.
-  ConstantExpr *CE = cast<ConstantExpr>(I.getOperand(2));
+  const ConstantExpr *CE = cast<ConstantExpr>(I.getOperand(2));
   assert(CE->getOpcode() == Instruction::BitCast &&
          isa<Function>(CE->getOperand(0)) &&
          "Personality should be a function");
@@ -308,11 +216,11 @@ void llvm::AddCatchInfo(CallInst &I, MachineModuleInfo *MMI,
 
   // Gather all the type infos for this landing pad and pass them along to
   // MachineModuleInfo.
-  std::vector<GlobalVariable *> TyInfo;
+  std::vector<const GlobalVariable *> TyInfo;
   unsigned N = I.getNumOperands();
 
   for (unsigned i = N - 1; i > 2; --i) {
-    if (ConstantInt *CI = dyn_cast<ConstantInt>(I.getOperand(i))) {
+    if (const ConstantInt *CI = dyn_cast<ConstantInt>(I.getOperand(i))) {
       unsigned FilterLength = CI->getZExtValue();
       unsigned FirstCatch = i + FilterLength + !FilterLength;
       assert (FirstCatch <= N && "Invalid filter length");
@@ -349,10 +257,11 @@ void llvm::AddCatchInfo(CallInst &I, MachineModuleInfo *MMI,
   }
 }
 
-void llvm::CopyCatchInfo(BasicBlock *SrcBB, BasicBlock *DestBB,
+void llvm::CopyCatchInfo(const BasicBlock *SrcBB, const BasicBlock *DestBB,
                          MachineModuleInfo *MMI, FunctionLoweringInfo &FLI) {
-  for (BasicBlock::iterator I = SrcBB->begin(), E = --SrcBB->end(); I != E; ++I)
-    if (EHSelectorInst *EHSel = dyn_cast<EHSelectorInst>(I)) {
+  for (BasicBlock::const_iterator I = SrcBB->begin(), E = --SrcBB->end();
+       I != E; ++I)
+    if (const EHSelectorInst *EHSel = dyn_cast<EHSelectorInst>(I)) {
       // Apply the catch info to DestBB.
       AddCatchInfo(*EHSel, MMI, FLI.MBBMap[DestBB]);
 #ifndef NDEBUG
diff --git a/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.h b/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.h
index d851e6429c0c..4067a5b33044 100644
--- a/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.h
+++ b/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.h
@@ -15,12 +15,17 @@
 #ifndef FUNCTIONLOWERINGINFO_H
 #define FUNCTIONLOWERINGINFO_H
 
+#include "llvm/InlineAsm.h"
+#include "llvm/Instructions.h"
 #include "llvm/ADT/APInt.h"
 #include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/SmallVector.h"
 #ifndef NDEBUG
 #include "llvm/ADT/SmallSet.h"
 #endif
 #include "llvm/CodeGen/ValueTypes.h"
+#include "llvm/CodeGen/ISDOpcodes.h"
+#include "llvm/Support/CallSite.h"
 #include <vector>
 
 namespace llvm {
@@ -31,6 +36,7 @@ class CallInst;
 class Function;
 class GlobalVariable;
 class Instruction;
+class MachineInstr;
 class MachineBasicBlock;
 class MachineFunction;
 class MachineModuleInfo;
@@ -44,8 +50,8 @@ class Value;
 ///
 class FunctionLoweringInfo {
 public:
-  TargetLowering &TLI;
-  Function *Fn;
+  const TargetLowering &TLI;
+  const Function *Fn;
   MachineFunction *MF;
   MachineRegisterInfo *RegInfo;
 
@@ -57,13 +63,6 @@ public:
   /// allocated to hold a pointer to the hidden sret parameter.
   unsigned DemoteRegister;
 
-  explicit FunctionLoweringInfo(TargetLowering &TLI);
-
-  /// set - Initialize this FunctionLoweringInfo with the given Function
-  /// and its associated MachineFunction.
-  ///
-  void set(Function &Fn, MachineFunction &MF, bool EnableFastISel);
-
   /// MBBMap - A mapping from LLVM basic blocks to their machine code entry.
   DenseMap<const BasicBlock*, MachineBasicBlock *> MBBMap;
 
@@ -77,27 +76,15 @@ public:
   /// anywhere in the function.
   DenseMap<const AllocaInst*, int> StaticAllocaMap;
 
+  /// ArgDbgValues - A list of DBG_VALUE instructions created during isel for
+  /// function arguments that are inserted after scheduling is completed.
+  SmallVector<MachineInstr*, 8> ArgDbgValues;
+
 #ifndef NDEBUG
-  SmallSet<Instruction*, 8> CatchInfoLost;
-  SmallSet<Instruction*, 8> CatchInfoFound;
+  SmallSet<const Instruction *, 8> CatchInfoLost;
+  SmallSet<const Instruction *, 8> CatchInfoFound;
 #endif
 
-  unsigned MakeReg(EVT VT);
-  
-  /// isExportedInst - Return true if the specified value is an instruction
-  /// exported from its block.
-  bool isExportedInst(const Value *V) {
-    return ValueMap.count(V);
-  }
-
-  unsigned CreateRegForValue(const Value *V);
-  
-  unsigned InitializeRegForValue(const Value *V) {
-    unsigned &R = ValueMap[V];
-    assert(R == 0 && "Already initialized this value register!");
-    return R = CreateRegForValue(V);
-  }
-  
   struct LiveOutInfo {
     unsigned NumSignBits;
     APInt KnownOne, KnownZero;
@@ -108,42 +95,48 @@ public:
   /// register number offset by 'FirstVirtualRegister'.
   std::vector<LiveOutInfo> LiveOutRegInfo;
 
+  /// PHINodesToUpdate - A list of phi instructions whose operand list will
+  /// be updated after processing the current basic block.
+  /// TODO: This isn't per-function state, it's per-basic-block state. But
+  /// there's no other convenient place for it to live right now.
+  std::vector<std::pair<MachineInstr*, unsigned> > PHINodesToUpdate;
+
+  explicit FunctionLoweringInfo(const TargetLowering &TLI);
+
+  /// set - Initialize this FunctionLoweringInfo with the given Function
+  /// and its associated MachineFunction.
+  ///
+  void set(const Function &Fn, MachineFunction &MF, bool EnableFastISel);
+
   /// clear - Clear out all the function-specific state. This returns this
   /// FunctionLoweringInfo to an empty state, ready to be used for a
   /// different function.
   void clear();
-};
 
-/// ComputeLinearIndex - Given an LLVM IR aggregate type and a sequence
-/// of insertvalue or extractvalue indices that identify a member, return
-/// the linearized index of the start of the member.
-///
-unsigned ComputeLinearIndex(const TargetLowering &TLI, const Type *Ty,
-                            const unsigned *Indices,
-                            const unsigned *IndicesEnd,
-                            unsigned CurIndex = 0);
-
-/// ComputeValueVTs - Given an LLVM IR type, compute a sequence of
-/// EVTs that represent all the individual underlying
-/// non-aggregate types that comprise it.
-///
-/// If Offsets is non-null, it points to a vector to be filled in
-/// with the in-memory offsets of each of the individual values.
-///
-void ComputeValueVTs(const TargetLowering &TLI, const Type *Ty,
-                     SmallVectorImpl<EVT> &ValueVTs,
-                     SmallVectorImpl<uint64_t> *Offsets = 0,
-                     uint64_t StartingOffset = 0);
+  unsigned MakeReg(EVT VT);
+  
+  /// isExportedInst - Return true if the specified value is an instruction
+  /// exported from its block.
+  bool isExportedInst(const Value *V) {
+    return ValueMap.count(V);
+  }
 
-/// ExtractTypeInfo - Returns the type info, possibly bitcast, encoded in V.
-GlobalVariable *ExtractTypeInfo(Value *V);
+  unsigned CreateRegForValue(const Value *V);
+  
+  unsigned InitializeRegForValue(const Value *V) {
+    unsigned &R = ValueMap[V];
+    assert(R == 0 && "Already initialized this value register!");
+    return R = CreateRegForValue(V);
+  }
+};
 
 /// AddCatchInfo - Extract the personality and type infos from an eh.selector
 /// call, and add them to the specified machine basic block.
-void AddCatchInfo(CallInst &I, MachineModuleInfo *MMI, MachineBasicBlock *MBB);
+void AddCatchInfo(const CallInst &I,
+                  MachineModuleInfo *MMI, MachineBasicBlock *MBB);
 
 /// CopyCatchInfo - Copy catch information from DestBB to SrcBB.
-void CopyCatchInfo(BasicBlock *SrcBB, BasicBlock *DestBB,
+void CopyCatchInfo(const BasicBlock *SrcBB, const BasicBlock *DestBB,
                    MachineModuleInfo *MMI, FunctionLoweringInfo &FLI);
 
 } // end namespace llvm
diff --git a/lib/CodeGen/SelectionDAG/InstrEmitter.cpp b/lib/CodeGen/SelectionDAG/InstrEmitter.cpp
index 28ba343e7fe6..c5dae826fff7 100644
--- a/lib/CodeGen/SelectionDAG/InstrEmitter.cpp
+++ b/lib/CodeGen/SelectionDAG/InstrEmitter.cpp
@@ -296,8 +296,19 @@ InstrEmitter::AddRegisterOperand(MachineInstr *MI, SDValue Op,
     }
   }
 
+  // If this value has only one use, that use is a kill. This is a
+  // conservative approximation. Tied operands are never killed, so we need
+  // to check that. And that means we need to determine the index of the
+  // operand.
+  unsigned Idx = MI->getNumOperands();
+  while (Idx > 0 &&
+         MI->getOperand(Idx-1).isReg() && MI->getOperand(Idx-1).isImplicit())
+    --Idx;
+  bool isTied = MI->getDesc().getOperandConstraint(Idx, TOI::TIED_TO) != -1;
+  bool isKill = Op.hasOneUse() && !isTied && !IsDebug;
+
   MI->addOperand(MachineOperand::CreateReg(VReg, isOptDef,
-                                           false/*isImp*/, false/*isKill*/,
+                                           false/*isImp*/, isKill,
                                            false/*isDead*/, false/*isUndef*/,
                                            false/*isEarlyClobber*/,
                                            0/*SubReg*/, IsDebug));
@@ -440,8 +451,7 @@ void InstrEmitter::EmitSubregNode(SDNode *Node,
     unsigned SubIdx = cast<ConstantSDNode>(N2)->getZExtValue();
     const TargetRegisterClass *TRC = MRI->getRegClass(SubReg);
     const TargetRegisterClass *SRC =
-      getSuperRegisterRegClass(TRC, SubIdx,
-                               Node->getValueType(0));
+      getSuperRegisterRegClass(TRC, SubIdx, Node->getValueType(0));
 
     // Figure out the register class to create for the destreg.
     // Note that if we're going to directly use an existing register,
@@ -504,41 +514,83 @@ InstrEmitter::EmitCopyToRegClassNode(SDNode *Node,
   assert(isNew && "Node emitted out of order - early");
 }
 
+/// EmitRegSequence - Generate machine code for REG_SEQUENCE nodes.
+///
+void InstrEmitter::EmitRegSequence(SDNode *Node,
+                                  DenseMap<SDValue, unsigned> &VRBaseMap) {
+  const TargetRegisterClass *RC = TLI->getRegClassFor(Node->getValueType(0));
+  unsigned NewVReg = MRI->createVirtualRegister(RC);
+  MachineInstr *MI = BuildMI(*MF, Node->getDebugLoc(),
+                             TII->get(TargetOpcode::REG_SEQUENCE), NewVReg);
+  unsigned NumOps = Node->getNumOperands();
+  assert((NumOps & 1) == 0 &&
+         "REG_SEQUENCE must have an even number of operands!");
+  const TargetInstrDesc &II = TII->get(TargetOpcode::REG_SEQUENCE);
+  for (unsigned i = 0; i != NumOps; ++i) {
+    SDValue Op = Node->getOperand(i);
+#ifndef NDEBUG
+    if (i & 1) {
+      unsigned SubIdx = cast<ConstantSDNode>(Op)->getZExtValue();
+      unsigned SubReg = getVR(Node->getOperand(i-1), VRBaseMap);
+    const TargetRegisterClass *TRC = MRI->getRegClass(SubReg);
+    const TargetRegisterClass *SRC =
+      getSuperRegisterRegClass(TRC, SubIdx, Node->getValueType(0));
+    assert(SRC == RC && "Invalid subregister index in REG_SEQUENCE");
+    }
+#endif
+    AddOperand(MI, Op, i+1, &II, VRBaseMap);
+  }
+
+  MBB->insert(InsertPos, MI);
+  SDValue Op(Node, 0);
+  bool isNew = VRBaseMap.insert(std::make_pair(Op, NewVReg)).second;
+  isNew = isNew; // Silence compiler warning.
+  assert(isNew && "Node emitted out of order - early");
+}
+
 /// EmitDbgValue - Generate machine instruction for a dbg_value node.
 ///
-MachineInstr *InstrEmitter::EmitDbgValue(SDDbgValue *SD,
-                                         MachineBasicBlock *InsertBB,
-                                         DenseMap<SDValue, unsigned> &VRBaseMap,
-                         DenseMap<MachineBasicBlock*, MachineBasicBlock*> *EM) {
+MachineInstr *
+InstrEmitter::EmitDbgValue(SDDbgValue *SD,
+                           DenseMap<SDValue, unsigned> &VRBaseMap) {
   uint64_t Offset = SD->getOffset();
   MDNode* MDPtr = SD->getMDPtr();
   DebugLoc DL = SD->getDebugLoc();
 
+  if (SD->getKind() == SDDbgValue::FRAMEIX) {
+    // Stack address; this needs to be lowered in target-dependent fashion.
+    // EmitTargetCodeForFrameDebugValue is responsible for allocation.
+    unsigned FrameIx = SD->getFrameIx();
+    return TII->emitFrameIndexDebugValue(*MF, FrameIx, Offset, MDPtr, DL);
+  }
+  // Otherwise, we're going to create an instruction here.
   const TargetInstrDesc &II = TII->get(TargetOpcode::DBG_VALUE);
   MachineInstrBuilder MIB = BuildMI(*MF, DL, II);
   if (SD->getKind() == SDDbgValue::SDNODE) {
-    AddOperand(&*MIB, SDValue(SD->getSDNode(), SD->getResNo()),
-               (*MIB).getNumOperands(), &II, VRBaseMap, true /*IsDebug*/);
+    SDNode *Node = SD->getSDNode();
+    SDValue Op = SDValue(Node, SD->getResNo());
+    // It's possible we replaced this SDNode with other(s) and therefore
+    // didn't generate code for it.  It's better to catch these cases where
+    // they happen and transfer the debug info, but trying to guarantee that
+    // in all cases would be very fragile; this is a safeguard for any
+    // that were missed.
+    DenseMap<SDValue, unsigned>::iterator I = VRBaseMap.find(Op);
+    if (I==VRBaseMap.end())
+      MIB.addReg(0U);       // undef
+    else
+      AddOperand(&*MIB, Op, (*MIB).getNumOperands(), &II, VRBaseMap,
+                 true /*IsDebug*/);
   } else if (SD->getKind() == SDDbgValue::CONST) {
-    Value *V = SD->getConst();
-    if (ConstantInt *CI = dyn_cast<ConstantInt>(V)) {
+    const Value *V = SD->getConst();
+    if (const ConstantInt *CI = dyn_cast<ConstantInt>(V)) {
       MIB.addImm(CI->getSExtValue());
-    } else if (ConstantFP *CF = dyn_cast<ConstantFP>(V)) {
+    } else if (const ConstantFP *CF = dyn_cast<ConstantFP>(V)) {
       MIB.addFPImm(CF);
     } else {
       // Could be an Undef.  In any case insert an Undef so we can see what we
       // dropped.
       MIB.addReg(0U);
     }
-  } else if (SD->getKind() == SDDbgValue::FRAMEIX) {
-    unsigned FrameIx = SD->getFrameIx();
-    // Stack address; this needs to be lowered in target-dependent fashion.
-    // FIXME test that the target supports this somehow; if not emit Undef.
-    // Create a pseudo for EmitInstrWithCustomInserter's consumption.
-    MIB.addImm(FrameIx).addImm(Offset).addMetadata(MDPtr);
-    abort();
-    TLI->EmitInstrWithCustomInserter(&*MIB, InsertBB, EM);
-    return 0;
   } else {
     // Insert an Undef so we can see what we dropped.
     MIB.addReg(0U);
@@ -553,8 +605,7 @@ MachineInstr *InstrEmitter::EmitDbgValue(SDDbgValue *SD,
 ///
 void InstrEmitter::
 EmitMachineNode(SDNode *Node, bool IsClone, bool IsCloned,
-                DenseMap<SDValue, unsigned> &VRBaseMap,
-                DenseMap<MachineBasicBlock*, MachineBasicBlock*> *EM) {
+                DenseMap<SDValue, unsigned> &VRBaseMap) {
   unsigned Opc = Node->getMachineOpcode();
   
   // Handle subreg insert/extract specially
@@ -571,6 +622,12 @@ EmitMachineNode(SDNode *Node, bool IsClone, bool IsCloned,
     return;
   }
 
+  // Handle REG_SEQUENCE specially.
+  if (Opc == TargetOpcode::REG_SEQUENCE) {
+    EmitRegSequence(Node, VRBaseMap);
+    return;
+  }
+
   if (Opc == TargetOpcode::IMPLICIT_DEF)
     // We want a unique VR for each IMPLICIT_DEF use.
     return;
@@ -615,7 +672,7 @@ EmitMachineNode(SDNode *Node, bool IsClone, bool IsCloned,
   if (II.usesCustomInsertionHook()) {
     // Insert this instruction into the basic block using a target
     // specific inserter which may returns a new basic block.
-    MBB = TLI->EmitInstrWithCustomInserter(MI, MBB, EM);
+    MBB = TLI->EmitInstrWithCustomInserter(MI, MBB);
     InsertPos = MBB->end();
     return;
   }
@@ -646,7 +703,6 @@ EmitMachineNode(SDNode *Node, bool IsClone, bool IsCloned,
            i != e; ++i)
         MI->addRegisterDead(IDList[i-II.getNumDefs()], TRI);
     }
-  return;
 }
 
 /// EmitSpecialNode - Generate machine code for a target-independent node and
@@ -720,12 +776,12 @@ EmitSpecialNode(SDNode *Node, bool IsClone, bool IsCloned,
                                TII->get(TargetOpcode::INLINEASM));
 
     // Add the asm string as an external symbol operand.
-    const char *AsmStr =
-      cast<ExternalSymbolSDNode>(Node->getOperand(1))->getSymbol();
+    SDValue AsmStrV = Node->getOperand(InlineAsm::Op_AsmString);
+    const char *AsmStr = cast<ExternalSymbolSDNode>(AsmStrV)->getSymbol();
     MI->addOperand(MachineOperand::CreateES(AsmStr));
       
     // Add all of the operand registers to the instruction.
-    for (unsigned i = 2; i != NumOps;) {
+    for (unsigned i = InlineAsm::Op_FirstOperand; i != NumOps;) {
       unsigned Flags =
         cast<ConstantSDNode>(Node->getOperand(i))->getZExtValue();
       unsigned NumVals = InlineAsm::getNumOperandRegisters(Flags);
@@ -733,24 +789,24 @@ EmitSpecialNode(SDNode *Node, bool IsClone, bool IsCloned,
       MI->addOperand(MachineOperand::CreateImm(Flags));
       ++i;  // Skip the ID value.
         
-      switch (Flags & 7) {
+      switch (InlineAsm::getKind(Flags)) {
       default: llvm_unreachable("Bad flags!");
-      case 2:   // Def of register.
+        case InlineAsm::Kind_RegDef:
         for (; NumVals; --NumVals, ++i) {
           unsigned Reg = cast<RegisterSDNode>(Node->getOperand(i))->getReg();
           MI->addOperand(MachineOperand::CreateReg(Reg, true));
         }
         break;
-      case 6:   // Def of earlyclobber register.
+      case InlineAsm::Kind_RegDefEarlyClobber:
         for (; NumVals; --NumVals, ++i) {
           unsigned Reg = cast<RegisterSDNode>(Node->getOperand(i))->getReg();
           MI->addOperand(MachineOperand::CreateReg(Reg, true, false, false, 
                                                    false, false, true));
         }
         break;
-      case 1:  // Use of register.
-      case 3:  // Immediate.
-      case 4:  // Addressing mode.
+      case InlineAsm::Kind_RegUse:  // Use of register.
+      case InlineAsm::Kind_Imm:  // Immediate.
+      case InlineAsm::Kind_Mem:  // Addressing mode.
         // The addressing mode has been selected, just add all of the
         // operands to the machine instruction.
         for (; NumVals; --NumVals, ++i)
@@ -758,6 +814,13 @@ EmitSpecialNode(SDNode *Node, bool IsClone, bool IsCloned,
         break;
       }
     }
+    
+    // Get the mdnode from the asm if it exists and add it to the instruction.
+    SDValue MDV = Node->getOperand(InlineAsm::Op_MDNode);
+    const MDNode *MD = cast<MDNodeSDNode>(MDV)->getMD();
+    if (MD)
+      MI->addOperand(MachineOperand::CreateMetadata(MD));
+    
     MBB->insert(InsertPos, MI);
     break;
   }
diff --git a/lib/CodeGen/SelectionDAG/InstrEmitter.h b/lib/CodeGen/SelectionDAG/InstrEmitter.h
index baabb7554b60..c7e7c71268a2 100644
--- a/lib/CodeGen/SelectionDAG/InstrEmitter.h
+++ b/lib/CodeGen/SelectionDAG/InstrEmitter.h
@@ -88,6 +88,9 @@ class InstrEmitter {
   void EmitCopyToRegClassNode(SDNode *Node,
                               DenseMap<SDValue, unsigned> &VRBaseMap);
 
+  /// EmitRegSequence - Generate machine code for REG_SEQUENCE nodes.
+  ///
+  void EmitRegSequence(SDNode *Node, DenseMap<SDValue, unsigned> &VRBaseMap);
 public:
   /// CountResults - The results of target nodes have register or immediate
   /// operands first, then an optional chain, and optional flag operands
@@ -103,17 +106,14 @@ public:
   /// EmitDbgValue - Generate machine instruction for a dbg_value node.
   ///
   MachineInstr *EmitDbgValue(SDDbgValue *SD,
-                          MachineBasicBlock *InsertBB,
-                          DenseMap<SDValue, unsigned> &VRBaseMap,
-                          DenseMap<MachineBasicBlock*, MachineBasicBlock*> *EM);
+                             DenseMap<SDValue, unsigned> &VRBaseMap);
 
   /// EmitNode - Generate machine code for a node and needed dependencies.
   ///
   void EmitNode(SDNode *Node, bool IsClone, bool IsCloned,
-                DenseMap<SDValue, unsigned> &VRBaseMap,
-                DenseMap<MachineBasicBlock*, MachineBasicBlock*> *EM) {
+                DenseMap<SDValue, unsigned> &VRBaseMap) {
     if (Node->isMachineOpcode())
-      EmitMachineNode(Node, IsClone, IsCloned, VRBaseMap, EM);
+      EmitMachineNode(Node, IsClone, IsCloned, VRBaseMap);
     else
       EmitSpecialNode(Node, IsClone, IsCloned, VRBaseMap);
   }
@@ -130,8 +130,7 @@ public:
   
 private:
   void EmitMachineNode(SDNode *Node, bool IsClone, bool IsCloned,
-                       DenseMap<SDValue, unsigned> &VRBaseMap,
-                       DenseMap<MachineBasicBlock*, MachineBasicBlock*> *EM);
+                       DenseMap<SDValue, unsigned> &VRBaseMap);
   void EmitSpecialNode(SDNode *Node, bool IsClone, bool IsCloned,
                        DenseMap<SDValue, unsigned> &VRBaseMap);
 };
diff --git a/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
index d35f0da393e4..bedfa57b9fc9 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
+++ b/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
@@ -32,13 +32,11 @@
 #include "llvm/LLVMContext.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Debug.h"
-#include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/MathExtras.h"
 #include "llvm/Support/raw_ostream.h"
 #include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/SmallPtrSet.h"
-#include <map>
 using namespace llvm;
 
 //===----------------------------------------------------------------------===//
@@ -55,7 +53,8 @@ using namespace llvm;
 ///
 namespace {
 class SelectionDAGLegalize {
-  TargetLowering &TLI;
+  const TargetMachine &TM;
+  const TargetLowering &TLI;
   SelectionDAG &DAG;
   CodeGenOpt::Level OptLevel;
 
@@ -213,7 +212,8 @@ SelectionDAGLegalize::ShuffleWithNarrowerEltType(EVT NVT, EVT VT,  DebugLoc dl,
 
 SelectionDAGLegalize::SelectionDAGLegalize(SelectionDAG &dag,
                                            CodeGenOpt::Level ol)
-  : TLI(dag.getTargetLoweringInfo()), DAG(dag), OptLevel(ol),
+  : TM(dag.getTarget()), TLI(dag.getTargetLoweringInfo()),
+    DAG(dag), OptLevel(ol),
     ValueTypeActions(TLI.getValueTypeActions()) {
   assert(MVT::LAST_VALUETYPE <= MVT::MAX_ALLOWED_VALUETYPE &&
          "Too many value types for ValueTypeActions to hold!");
@@ -409,7 +409,9 @@ SDValue ExpandUnalignedStore(StoreSDNode *ST, SelectionDAG &DAG,
       // to the final destination using (unaligned) integer loads and stores.
       EVT StoredVT = ST->getMemoryVT();
       EVT RegVT =
-        TLI.getRegisterType(*DAG.getContext(), EVT::getIntegerVT(*DAG.getContext(), StoredVT.getSizeInBits()));
+        TLI.getRegisterType(*DAG.getContext(),
+                            EVT::getIntegerVT(*DAG.getContext(),
+                                              StoredVT.getSizeInBits()));
       unsigned StoredBytes = StoredVT.getSizeInBits() / 8;
       unsigned RegBytes = RegVT.getSizeInBits() / 8;
       unsigned NumRegs = (StoredBytes + RegBytes - 1) / RegBytes;
@@ -445,7 +447,8 @@ SDValue ExpandUnalignedStore(StoreSDNode *ST, SelectionDAG &DAG,
       // The last store may be partial.  Do a truncating store.  On big-endian
       // machines this requires an extending load from the stack slot to ensure
       // that the bits are in the right place.
-      EVT MemVT = EVT::getIntegerVT(*DAG.getContext(), 8 * (StoredBytes - Offset));
+      EVT MemVT = EVT::getIntegerVT(*DAG.getContext(),
+                                    8 * (StoredBytes - Offset));
 
       // Load from the stack slot.
       SDValue Load = DAG.getExtLoad(ISD::EXTLOAD, dl, RegVT, Store, StackPtr,
@@ -548,7 +551,8 @@ SDValue ExpandUnalignedLoad(LoadSDNode *LD, SelectionDAG &DAG,
       }
 
       // The last copy may be partial.  Do an extending load.
-      EVT MemVT = EVT::getIntegerVT(*DAG.getContext(), 8 * (LoadedBytes - Offset));
+      EVT MemVT = EVT::getIntegerVT(*DAG.getContext(),
+                                    8 * (LoadedBytes - Offset));
       SDValue Load = DAG.getExtLoad(ISD::EXTLOAD, dl, RegVT, Chain, Ptr,
                                     LD->getSrcValue(), SVOffset + Offset,
                                     MemVT, LD->isVolatile(),
@@ -968,11 +972,11 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) {
     Node->dump( &DAG);
     dbgs() << "\n";
 #endif
-    llvm_unreachable("Do not know how to legalize this operator!");
+    assert(0 && "Do not know how to legalize this operator!");
 
   case ISD::BUILD_VECTOR:
     switch (TLI.getOperationAction(ISD::BUILD_VECTOR, Node->getValueType(0))) {
-    default: llvm_unreachable("This action is not supported yet!");
+    default: assert(0 && "This action is not supported yet!");
     case TargetLowering::Custom:
       Tmp3 = TLI.LowerOperation(Result, DAG);
       if (Tmp3.getNode()) {
@@ -1089,7 +1093,7 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) {
       Tmp4 = Result.getValue(1);
 
       switch (TLI.getOperationAction(Node->getOpcode(), VT)) {
-      default: llvm_unreachable("This action is not supported yet!");
+      default: assert(0 && "This action is not supported yet!");
       case TargetLowering::Legal:
         // If this is an unaligned load and the target doesn't support it,
         // expand it.
@@ -1259,7 +1263,7 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) {
         Tmp2 = LegalizeOp(Ch);
       } else {
         switch (TLI.getLoadExtAction(ExtType, SrcVT)) {
-        default: llvm_unreachable("This action is not supported yet!");
+        default: assert(0 && "This action is not supported yet!");
         case TargetLowering::Custom:
           isCustom = true;
           // FALLTHROUGH
@@ -1357,7 +1361,7 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) {
 
         EVT VT = Tmp3.getValueType();
         switch (TLI.getOperationAction(ISD::STORE, VT)) {
-        default: llvm_unreachable("This action is not supported yet!");
+        default: assert(0 && "This action is not supported yet!");
         case TargetLowering::Legal:
           // If this is an unaligned store and the target doesn't support it,
           // expand it.
@@ -1394,7 +1398,8 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) {
         // Promote to a byte-sized store with upper bits zero if not
         // storing an integral number of bytes.  For example, promote
         // TRUNCSTORE:i1 X -> TRUNCSTORE:i8 (and X, 1)
-        EVT NVT = EVT::getIntegerVT(*DAG.getContext(), StVT.getStoreSizeInBits());
+        EVT NVT = EVT::getIntegerVT(*DAG.getContext(),
+                                    StVT.getStoreSizeInBits());
         Tmp3 = DAG.getZeroExtendInReg(Tmp3, dl, StVT);
         Result = DAG.getTruncStore(Tmp1, dl, Tmp3, Tmp2, ST->getSrcValue(),
                                    SVOffset, NVT, isVolatile, isNonTemporal,
@@ -1459,7 +1464,7 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) {
                                           ST->getOffset());
 
         switch (TLI.getTruncStoreAction(ST->getValue().getValueType(), StVT)) {
-        default: llvm_unreachable("This action is not supported yet!");
+        default: assert(0 && "This action is not supported yet!");
         case TargetLowering::Legal:
           // If this is an unaligned store and the target doesn't support it,
           // expand it.
@@ -1658,8 +1663,7 @@ void SelectionDAGLegalize::ExpandDYNAMIC_STACKALLOC(SDNode* Node,
   SDValue SP = DAG.getCopyFromReg(Chain, dl, SPReg, VT);
   Chain = SP.getValue(1);
   unsigned Align = cast<ConstantSDNode>(Tmp3)->getZExtValue();
-  unsigned StackAlign =
-    TLI.getTargetMachine().getFrameInfo()->getStackAlignment();
+  unsigned StackAlign = TM.getFrameInfo()->getStackAlignment();
   if (Align > StackAlign)
     SP = DAG.getNode(ISD::AND, dl, VT, SP,
                       DAG.getConstant(-(uint64_t)Align, VT));
@@ -1683,7 +1687,7 @@ void SelectionDAGLegalize::LegalizeSetCCCondCode(EVT VT,
   EVT OpVT = LHS.getValueType();
   ISD::CondCode CCCode = cast<CondCodeSDNode>(CC)->get();
   switch (TLI.getCondCodeAction(CCCode, OpVT)) {
-  default: llvm_unreachable("Unknown condition code action!");
+  default: assert(0 && "Unknown condition code action!");
   case TargetLowering::Legal:
     // Nothing to do.
     break;
@@ -1691,7 +1695,7 @@ void SelectionDAGLegalize::LegalizeSetCCCondCode(EVT VT,
     ISD::CondCode CC1 = ISD::SETCC_INVALID, CC2 = ISD::SETCC_INVALID;
     unsigned Opc = 0;
     switch (CCCode) {
-    default: llvm_unreachable("Don't know how to expand this condition!");
+    default: assert(0 && "Don't know how to expand this condition!");
     case ISD::SETOEQ: CC1 = ISD::SETEQ; CC2 = ISD::SETO;  Opc = ISD::AND; break;
     case ISD::SETOGT: CC1 = ISD::SETGT; CC2 = ISD::SETO;  Opc = ISD::AND; break;
     case ISD::SETOGE: CC1 = ISD::SETGE; CC2 = ISD::SETO;  Opc = ISD::AND; break;
@@ -1738,8 +1742,8 @@ SDValue SelectionDAGLegalize::EmitStackConvert(SDValue SrcOp,
   unsigned SrcSize = SrcOp.getValueType().getSizeInBits();
   unsigned SlotSize = SlotVT.getSizeInBits();
   unsigned DestSize = DestVT.getSizeInBits();
-  unsigned DestAlign =
-    TLI.getTargetData()->getPrefTypeAlignment(DestVT.getTypeForEVT(*DAG.getContext()));
+  const Type *DestType = DestVT.getTypeForEVT(*DAG.getContext());
+  unsigned DestAlign = TLI.getTargetData()->getPrefTypeAlignment(DestType);
 
   // Emit a store to the stack slot.  Use a truncstore if the input value is
   // later than DestVT.
@@ -1930,7 +1934,7 @@ SDValue SelectionDAGLegalize::ExpandFPLibCall(SDNode* Node,
                                               RTLIB::Libcall Call_PPCF128) {
   RTLIB::Libcall LC;
   switch (Node->getValueType(0).getSimpleVT().SimpleTy) {
-  default: llvm_unreachable("Unexpected request for libcall!");
+  default: assert(0 && "Unexpected request for libcall!");
   case MVT::f32: LC = Call_F32; break;
   case MVT::f64: LC = Call_F64; break;
   case MVT::f80: LC = Call_F80; break;
@@ -1947,7 +1951,7 @@ SDValue SelectionDAGLegalize::ExpandIntLibCall(SDNode* Node, bool isSigned,
                                                RTLIB::Libcall Call_I128) {
   RTLIB::Libcall LC;
   switch (Node->getValueType(0).getSimpleVT().SimpleTy) {
-  default: llvm_unreachable("Unexpected request for libcall!");
+  default: assert(0 && "Unexpected request for libcall!");
   case MVT::i8:   LC = Call_I8; break;
   case MVT::i16:  LC = Call_I16; break;
   case MVT::i32:  LC = Call_I32; break;
@@ -2062,7 +2066,7 @@ SDValue SelectionDAGLegalize::ExpandLegalINT_TO_FP(bool isSigned,
   // offset depending on the data type.
   uint64_t FF;
   switch (Op0.getValueType().getSimpleVT().SimpleTy) {
-  default: llvm_unreachable("Unsupported integer type!");
+  default: assert(0 && "Unsupported integer type!");
   case MVT::i8 : FF = 0x43800000ULL; break;  // 2^8  (as a float)
   case MVT::i16: FF = 0x47800000ULL; break;  // 2^16 (as a float)
   case MVT::i32: FF = 0x4F800000ULL; break;  // 2^32 (as a float)
@@ -2182,7 +2186,7 @@ SDValue SelectionDAGLegalize::ExpandBSWAP(SDValue Op, DebugLoc dl) {
   EVT SHVT = TLI.getShiftAmountTy();
   SDValue Tmp1, Tmp2, Tmp3, Tmp4, Tmp5, Tmp6, Tmp7, Tmp8;
   switch (VT.getSimpleVT().SimpleTy) {
-  default: llvm_unreachable("Unhandled Expand type in BSWAP!");
+  default: assert(0 && "Unhandled Expand type in BSWAP!");
   case MVT::i16:
     Tmp2 = DAG.getNode(ISD::SHL, dl, VT, Op, DAG.getConstant(8, SHVT));
     Tmp1 = DAG.getNode(ISD::SRL, dl, VT, Op, DAG.getConstant(8, SHVT));
@@ -2227,7 +2231,7 @@ SDValue SelectionDAGLegalize::ExpandBSWAP(SDValue Op, DebugLoc dl) {
 SDValue SelectionDAGLegalize::ExpandBitCount(unsigned Opc, SDValue Op,
                                              DebugLoc dl) {
   switch (Opc) {
-  default: llvm_unreachable("Cannot expand this yet!");
+  default: assert(0 && "Cannot expand this yet!");
   case ISD::CTPOP: {
     static const uint64_t mask[6] = {
       0x5555555555555555ULL, 0x3333333333333333ULL,
@@ -2333,10 +2337,10 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node,
     EVT VT = Node->getValueType(0);
     if (VT.isInteger())
       Results.push_back(DAG.getConstant(0, VT));
-    else if (VT.isFloatingPoint())
+    else {
+      assert(VT.isFloatingPoint() && "Unknown value type!");
       Results.push_back(DAG.getConstantFP(0, VT));
-    else
-      llvm_unreachable("Unknown value type!");
+    }
     break;
   }
   case ISD::TRAP: {
@@ -2431,7 +2435,7 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node,
     // Increment the pointer, VAList, to the next vaarg
     Tmp3 = DAG.getNode(ISD::ADD, dl, TLI.getPointerTy(), VAList,
                        DAG.getConstant(TLI.getTargetData()->
-                                       getTypeAllocSize(VT.getTypeForEVT(*DAG.getContext())),
+                          getTypeAllocSize(VT.getTypeForEVT(*DAG.getContext())),
                                        TLI.getPointerTy()));
     // Store the incremented VAList to the legalized pointer
     Tmp3 = DAG.getStore(VAList.getValue(1), dl, Tmp3, Tmp2, V, 0,
@@ -2842,7 +2846,14 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node,
       BottomHalf = DAG.getNode(Ops[isSigned][1], dl, DAG.getVTList(VT, VT), LHS,
                                RHS);
       TopHalf = BottomHalf.getValue(1);
-    } else if (TLI.isTypeLegal(EVT::getIntegerVT(*DAG.getContext(), VT.getSizeInBits() * 2))) {
+    } else {
+      // FIXME: We should be able to fall back to a libcall with an illegal
+      // type in some cases.
+      // Also, we can fall back to a division in some cases, but that's a big
+      // performance hit in the general case.
+      assert(TLI.isTypeLegal(EVT::getIntegerVT(*DAG.getContext(),
+                                               VT.getSizeInBits() * 2)) &&
+             "Don't know how to expand this operation yet!");
       EVT WideVT = EVT::getIntegerVT(*DAG.getContext(), VT.getSizeInBits() * 2);
       LHS = DAG.getNode(Ops[isSigned][2], dl, WideVT, LHS);
       RHS = DAG.getNode(Ops[isSigned][2], dl, WideVT, RHS);
@@ -2851,12 +2862,6 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node,
                                DAG.getIntPtrConstant(0));
       TopHalf = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, VT, Tmp1,
                             DAG.getIntPtrConstant(1));
-    } else {
-      // FIXME: We should be able to fall back to a libcall with an illegal
-      // type in some cases.
-      // Also, we can fall back to a division in some cases, but that's a big
-      // performance hit in the general case.
-      llvm_unreachable("Don't know how to expand this operation yet!");
     }
     if (isSigned) {
       Tmp1 = DAG.getConstant(VT.getSizeInBits() - 1, TLI.getShiftAmountTy());
@@ -2916,7 +2921,7 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node,
                                 PseudoSourceValue::getJumpTable(), 0, MemVT,
                                 false, false, 0);
     Addr = LD;
-    if (TLI.getTargetMachine().getRelocationModel() == Reloc::PIC_) {
+    if (TM.getRelocationModel() == Reloc::PIC_) {
       // For PIC, the sequence is:
       // BRIND(load(Jumptable + index) + RelocBase)
       // RelocBase can be JumpTable, GOT or some sort of global base.
@@ -3078,11 +3083,10 @@ void SelectionDAGLegalize::PromoteNode(SDNode *Node,
     if (OVT.isVector()) {
       ExtOp   = ISD::BIT_CONVERT;
       TruncOp = ISD::BIT_CONVERT;
-    } else if (OVT.isInteger()) {
+    } else {
+      assert(OVT.isInteger() && "Cannot promote logic operation");
       ExtOp   = ISD::ANY_EXTEND;
       TruncOp = ISD::TRUNCATE;
-    } else {
-      llvm_report_error("Cannot promote logic operation");
     }
     // Promote each of the values to the new type.
     Tmp1 = DAG.getNode(ExtOp, dl, NVT, Node->getOperand(0));
diff --git a/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp
index 665b21f5a6bc..e3eb949567a3 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp
+++ b/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp
@@ -109,14 +109,16 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_BIT_CONVERT(SDNode *N) {
 SDValue DAGTypeLegalizer::SoftenFloatRes_BUILD_PAIR(SDNode *N) {
   // Convert the inputs to integers, and build a new pair out of them.
   return DAG.getNode(ISD::BUILD_PAIR, N->getDebugLoc(),
-                     TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)),
+                     TLI.getTypeToTransformTo(*DAG.getContext(),
+                                              N->getValueType(0)),
                      BitConvertToInteger(N->getOperand(0)),
                      BitConvertToInteger(N->getOperand(1)));
 }
 
 SDValue DAGTypeLegalizer::SoftenFloatRes_ConstantFP(ConstantFPSDNode *N) {
   return DAG.getConstant(N->getValueAPF().bitcastToAPInt(),
-                         TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)));
+                         TLI.getTypeToTransformTo(*DAG.getContext(),
+                                                  N->getValueType(0)));
 }
 
 SDValue DAGTypeLegalizer::SoftenFloatRes_EXTRACT_VECTOR_ELT(SDNode *N) {
@@ -338,7 +340,8 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FP_EXTEND(SDNode *N) {
 SDValue DAGTypeLegalizer::SoftenFloatRes_FP16_TO_FP32(SDNode *N) {
   EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
   SDValue Op = N->getOperand(0);
-  return MakeLibCall(RTLIB::FPEXT_F16_F32, NVT, &Op, 1, false, N->getDebugLoc());
+  return MakeLibCall(RTLIB::FPEXT_F16_F32, NVT, &Op, 1, false,
+                     N->getDebugLoc());
 }
 
 SDValue DAGTypeLegalizer::SoftenFloatRes_FP_ROUND(SDNode *N) {
@@ -489,7 +492,8 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_SELECT_CC(SDNode *N) {
 }
 
 SDValue DAGTypeLegalizer::SoftenFloatRes_UNDEF(SDNode *N) {
-  return DAG.getUNDEF(TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)));
+  return DAG.getUNDEF(TLI.getTypeToTransformTo(*DAG.getContext(),
+                                               N->getValueType(0)));
 }
 
 SDValue DAGTypeLegalizer::SoftenFloatRes_VAARG(SDNode *N) {
@@ -531,7 +535,8 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_XINT_TO_FP(SDNode *N) {
   // Sign/zero extend the argument if the libcall takes a larger type.
   SDValue Op = DAG.getNode(Signed ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND, dl,
                            NVT, N->getOperand(0));
-  return MakeLibCall(LC, TLI.getTypeToTransformTo(*DAG.getContext(), RVT), &Op, 1, false, dl);
+  return MakeLibCall(LC, TLI.getTypeToTransformTo(*DAG.getContext(), RVT),
+                     &Op, 1, false, dl);
 }
 
 
@@ -1403,7 +1408,8 @@ SDValue DAGTypeLegalizer::ExpandFloatOp_STORE(SDNode *N, unsigned OpNo) {
   SDValue Chain = ST->getChain();
   SDValue Ptr = ST->getBasePtr();
 
-  EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), ST->getValue().getValueType());
+  EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(),
+                                     ST->getValue().getValueType());
   assert(NVT.isByteSized() && "Expanded type not byte sized!");
   assert(ST->getMemoryVT().bitsLE(NVT) && "Float type not round?");
 
diff --git a/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
index 48f64c34d6d2..548454c633a5 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
+++ b/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
@@ -204,7 +204,8 @@ SDValue DAGTypeLegalizer::PromoteIntRes_BIT_CONVERT(SDNode *N) {
       std::swap(Lo, Hi);
 
     InOp = DAG.getNode(ISD::ANY_EXTEND, dl,
-                       EVT::getIntegerVT(*DAG.getContext(), NOutVT.getSizeInBits()),
+                       EVT::getIntegerVT(*DAG.getContext(),
+                                         NOutVT.getSizeInBits()),
                        JoinIntegers(Lo, Hi));
     return DAG.getNode(ISD::BIT_CONVERT, dl, NOutVT, InOp);
   }
@@ -464,7 +465,7 @@ SDValue DAGTypeLegalizer::PromoteIntRes_SETCC(SDNode *N) {
 
 SDValue DAGTypeLegalizer::PromoteIntRes_SHL(SDNode *N) {
   return DAG.getNode(ISD::SHL, N->getDebugLoc(),
-                     TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)),
+                TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)),
                      GetPromotedInteger(N->getOperand(0)), N->getOperand(1));
 }
 
@@ -555,7 +556,8 @@ SDValue DAGTypeLegalizer::PromoteIntRes_UDIV(SDNode *N) {
 }
 
 SDValue DAGTypeLegalizer::PromoteIntRes_UNDEF(SDNode *N) {
-  return DAG.getUNDEF(TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)));
+  return DAG.getUNDEF(TLI.getTypeToTransformTo(*DAG.getContext(),
+                                               N->getValueType(0)));
 }
 
 SDValue DAGTypeLegalizer::PromoteIntRes_VAARG(SDNode *N) {
@@ -1383,7 +1385,8 @@ void DAGTypeLegalizer::ExpandIntRes_AssertSext(SDNode *N,
 
   if (NVTBits < EVTBits) {
     Hi = DAG.getNode(ISD::AssertSext, dl, NVT, Hi,
-                     DAG.getValueType(EVT::getIntegerVT(*DAG.getContext(), EVTBits - NVTBits)));
+                     DAG.getValueType(EVT::getIntegerVT(*DAG.getContext(),
+                                                        EVTBits - NVTBits)));
   } else {
     Lo = DAG.getNode(ISD::AssertSext, dl, NVT, Lo, DAG.getValueType(EVT));
     // The high part replicates the sign bit of Lo, make it explicit.
@@ -1403,7 +1406,8 @@ void DAGTypeLegalizer::ExpandIntRes_AssertZext(SDNode *N,
 
   if (NVTBits < EVTBits) {
     Hi = DAG.getNode(ISD::AssertZext, dl, NVT, Hi,
-                     DAG.getValueType(EVT::getIntegerVT(*DAG.getContext(), EVTBits - NVTBits)));
+                     DAG.getValueType(EVT::getIntegerVT(*DAG.getContext(),
+                                                        EVTBits - NVTBits)));
   } else {
     Lo = DAG.getNode(ISD::AssertZext, dl, NVT, Lo, DAG.getValueType(EVT));
     // The high part must be zero, make it explicit.
@@ -1846,7 +1850,8 @@ void DAGTypeLegalizer::ExpandIntRes_SIGN_EXTEND(SDNode *N,
     unsigned ExcessBits =
       Op.getValueType().getSizeInBits() - NVT.getSizeInBits();
     Hi = DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, Hi.getValueType(), Hi,
-                     DAG.getValueType(EVT::getIntegerVT(*DAG.getContext(), ExcessBits)));
+                     DAG.getValueType(EVT::getIntegerVT(*DAG.getContext(),
+                                                        ExcessBits)));
   }
 }
 
@@ -1968,7 +1973,8 @@ void DAGTypeLegalizer::ExpandIntRes_ZERO_EXTEND(SDNode *N,
     SplitInteger(Res, Lo, Hi);
     unsigned ExcessBits =
       Op.getValueType().getSizeInBits() - NVT.getSizeInBits();
-    Hi = DAG.getZeroExtendInReg(Hi, dl, EVT::getIntegerVT(*DAG.getContext(), ExcessBits));
+    Hi = DAG.getZeroExtendInReg(Hi, dl,
+                                EVT::getIntegerVT(*DAG.getContext(), ExcessBits));
   }
 }
 
@@ -2269,7 +2275,8 @@ SDValue DAGTypeLegalizer::ExpandIntOp_STORE(StoreSDNode *N, unsigned OpNo) {
     unsigned EBytes = ExtVT.getStoreSize();
     unsigned IncrementSize = NVT.getSizeInBits()/8;
     unsigned ExcessBits = (EBytes - IncrementSize)*8;
-    EVT HiVT = EVT::getIntegerVT(*DAG.getContext(), ExtVT.getSizeInBits() - ExcessBits);
+    EVT HiVT = EVT::getIntegerVT(*DAG.getContext(),
+                                 ExtVT.getSizeInBits() - ExcessBits);
 
     if (ExcessBits < NVT.getSizeInBits()) {
       // Transfer high bits from the top of Lo to the bottom of Hi.
diff --git a/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp
index f3e7ca4fa38f..17f131b21e4a 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp
+++ b/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp
@@ -721,7 +721,8 @@ void DAGTypeLegalizer::ReplaceValueWith(SDValue From, SDValue To) {
 }
 
 void DAGTypeLegalizer::SetPromotedInteger(SDValue Op, SDValue Result) {
-  assert(Result.getValueType() == TLI.getTypeToTransformTo(*DAG.getContext(), Op.getValueType()) &&
+  assert(Result.getValueType() ==
+         TLI.getTypeToTransformTo(*DAG.getContext(), Op.getValueType()) &&
          "Invalid type for promoted integer");
   AnalyzeNewValue(Result);
 
@@ -731,7 +732,8 @@ void DAGTypeLegalizer::SetPromotedInteger(SDValue Op, SDValue Result) {
 }
 
 void DAGTypeLegalizer::SetSoftenedFloat(SDValue Op, SDValue Result) {
-  assert(Result.getValueType() == TLI.getTypeToTransformTo(*DAG.getContext(), Op.getValueType()) &&
+  assert(Result.getValueType() ==
+         TLI.getTypeToTransformTo(*DAG.getContext(), Op.getValueType()) &&
          "Invalid type for softened float");
   AnalyzeNewValue(Result);
 
@@ -762,7 +764,8 @@ void DAGTypeLegalizer::GetExpandedInteger(SDValue Op, SDValue &Lo,
 
 void DAGTypeLegalizer::SetExpandedInteger(SDValue Op, SDValue Lo,
                                           SDValue Hi) {
-  assert(Lo.getValueType() == TLI.getTypeToTransformTo(*DAG.getContext(), Op.getValueType()) &&
+  assert(Lo.getValueType() ==
+         TLI.getTypeToTransformTo(*DAG.getContext(), Op.getValueType()) &&
          Hi.getValueType() == Lo.getValueType() &&
          "Invalid type for expanded integer");
   // Lo/Hi may have been newly allocated, if so, add nodeid's as relevant.
@@ -788,7 +791,8 @@ void DAGTypeLegalizer::GetExpandedFloat(SDValue Op, SDValue &Lo,
 
 void DAGTypeLegalizer::SetExpandedFloat(SDValue Op, SDValue Lo,
                                         SDValue Hi) {
-  assert(Lo.getValueType() == TLI.getTypeToTransformTo(*DAG.getContext(), Op.getValueType()) &&
+  assert(Lo.getValueType() ==
+         TLI.getTypeToTransformTo(*DAG.getContext(), Op.getValueType()) &&
          Hi.getValueType() == Lo.getValueType() &&
          "Invalid type for expanded float");
   // Lo/Hi may have been newly allocated, if so, add nodeid's as relevant.
@@ -832,7 +836,8 @@ void DAGTypeLegalizer::SetSplitVector(SDValue Op, SDValue Lo,
 }
 
 void DAGTypeLegalizer::SetWidenedVector(SDValue Op, SDValue Result) {
-  assert(Result.getValueType() == TLI.getTypeToTransformTo(*DAG.getContext(), Op.getValueType()) &&
+  assert(Result.getValueType() ==
+         TLI.getTypeToTransformTo(*DAG.getContext(), Op.getValueType()) &&
          "Invalid type for widened vector");
   AnalyzeNewValue(Result);
 
@@ -940,7 +945,8 @@ void DAGTypeLegalizer::GetSplitDestVTs(EVT InVT, EVT &LoVT, EVT &HiVT) {
   } else {
     unsigned NumElements = InVT.getVectorNumElements();
     assert(!(NumElements & 1) && "Splitting vector, but not in half!");
-    LoVT = HiVT = EVT::getVectorVT(*DAG.getContext(), InVT.getVectorElementType(), NumElements/2);
+    LoVT = HiVT = EVT::getVectorVT(*DAG.getContext(),
+                                   InVT.getVectorElementType(), NumElements/2);
   }
 }
 
@@ -980,7 +986,8 @@ SDValue DAGTypeLegalizer::JoinIntegers(SDValue Lo, SDValue Hi) {
   DebugLoc dlLo = Lo.getDebugLoc();
   EVT LVT = Lo.getValueType();
   EVT HVT = Hi.getValueType();
-  EVT NVT = EVT::getIntegerVT(*DAG.getContext(), LVT.getSizeInBits() + HVT.getSizeInBits());
+  EVT NVT = EVT::getIntegerVT(*DAG.getContext(),
+                              LVT.getSizeInBits() + HVT.getSizeInBits());
 
   Lo = DAG.getNode(ISD::ZERO_EXTEND, dlLo, NVT, Lo);
   Hi = DAG.getNode(ISD::ANY_EXTEND, dlHi, NVT, Hi);
@@ -1082,7 +1089,8 @@ void DAGTypeLegalizer::SplitInteger(SDValue Op,
 /// type half the size of Op's.
 void DAGTypeLegalizer::SplitInteger(SDValue Op,
                                     SDValue &Lo, SDValue &Hi) {
-  EVT HalfVT = EVT::getIntegerVT(*DAG.getContext(), Op.getValueType().getSizeInBits()/2);
+  EVT HalfVT = EVT::getIntegerVT(*DAG.getContext(),
+                                 Op.getValueType().getSizeInBits()/2);
   SplitInteger(Op, HalfVT, HalfVT, Lo, Hi);
 }
 
diff --git a/lib/CodeGen/SelectionDAG/LegalizeTypes.h b/lib/CodeGen/SelectionDAG/LegalizeTypes.h
index 9dd9796d0122..d60ad60017db 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeTypes.h
+++ b/lib/CodeGen/SelectionDAG/LegalizeTypes.h
@@ -33,7 +33,7 @@ namespace llvm {
 /// into small values.
 ///
 class VISIBILITY_HIDDEN DAGTypeLegalizer {
-  TargetLowering &TLI;
+  const TargetLowering &TLI;
   SelectionDAG &DAG;
 public:
   // NodeIdFlags - This pass uses the NodeId on the SDNodes to hold information
diff --git a/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp b/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp
index 5e83b4ba33d6..88e1e624ae32 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp
+++ b/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp
@@ -173,8 +173,9 @@ void DAGTypeLegalizer::ExpandRes_EXTRACT_VECTOR_ELT(SDNode *N, SDValue &Lo,
   EVT NewVT = TLI.getTypeToTransformTo(*DAG.getContext(), OldVT);
 
   SDValue NewVec = DAG.getNode(ISD::BIT_CONVERT, dl,
-                                 EVT::getVectorVT(*DAG.getContext(), NewVT, 2*OldElts),
-                                 OldVec);
+                               EVT::getVectorVT(*DAG.getContext(),
+                                                NewVT, 2*OldElts),
+                               OldVec);
 
   // Extract the elements at 2 * Idx and 2 * Idx + 1 from the new vector.
   SDValue Idx = N->getOperand(1);
@@ -268,7 +269,9 @@ SDValue DAGTypeLegalizer::ExpandOp_BIT_CONVERT(SDNode *N) {
     // is no point, and it might create expansion loops).  For example, on
     // x86 this turns v1i64 = BIT_CONVERT i64 into v1i64 = BIT_CONVERT v2i32.
     EVT OVT = N->getOperand(0).getValueType();
-    EVT NVT = EVT::getVectorVT(*DAG.getContext(), TLI.getTypeToTransformTo(*DAG.getContext(), OVT), 2);
+    EVT NVT = EVT::getVectorVT(*DAG.getContext(),
+                               TLI.getTypeToTransformTo(*DAG.getContext(), OVT),
+                               2);
 
     if (isTypeLegal(NVT)) {
       SDValue Parts[2];
@@ -312,8 +315,9 @@ SDValue DAGTypeLegalizer::ExpandOp_BUILD_VECTOR(SDNode *N) {
   }
 
   SDValue NewVec = DAG.getNode(ISD::BUILD_VECTOR, dl,
-                                 EVT::getVectorVT(*DAG.getContext(), NewVT, NewElts.size()),
-                                 &NewElts[0], NewElts.size());
+                               EVT::getVectorVT(*DAG.getContext(),
+                                                NewVT, NewElts.size()),
+                               &NewElts[0], NewElts.size());
 
   // Convert the new vector to the old vector type.
   return DAG.getNode(ISD::BIT_CONVERT, dl, VecVT, NewVec);
@@ -380,7 +384,8 @@ SDValue DAGTypeLegalizer::ExpandOp_NormalStore(SDNode *N, unsigned OpNo) {
   DebugLoc dl = N->getDebugLoc();
 
   StoreSDNode *St = cast<StoreSDNode>(N);
-  EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), St->getValue().getValueType());
+  EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(),
+                                     St->getValue().getValueType());
   SDValue Chain = St->getChain();
   SDValue Ptr = St->getBasePtr();
   int SVOffset = St->getSrcValueOffset();
diff --git a/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp b/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
index b5f84c0abb23..0e2bd0233712 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
+++ b/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
@@ -34,7 +34,7 @@ using namespace llvm;
 namespace {
 class VectorLegalizer {
   SelectionDAG& DAG;
-  TargetLowering& TLI;
+  const TargetLowering &TLI;
   bool Changed; // Keep track of whether anything changed
 
   /// LegalizedNodes - For nodes that are of legal width, and that have more
diff --git a/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
index ed5f24c1cc95..7efeea1ddaf9 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
+++ b/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
@@ -705,8 +705,9 @@ void DAGTypeLegalizer::SplitVecRes_INSERT_VECTOR_ELT(SDNode *N, SDValue &Lo,
   // Store the new element.  This may be larger than the vector element type,
   // so use a truncating store.
   SDValue EltPtr = GetVectorElementPointer(StackPtr, EltVT, Idx);
+  const Type *VecType = VecVT.getTypeForEVT(*DAG.getContext());
   unsigned Alignment =
-    TLI.getTargetData()->getPrefTypeAlignment(VecVT.getTypeForEVT(*DAG.getContext()));
+    TLI.getTargetData()->getPrefTypeAlignment(VecType);
   Store = DAG.getTruncStore(Store, dl, Elt, EltPtr, NULL, 0, EltVT,
                             false, false, 0);
 
@@ -1419,7 +1420,8 @@ SDValue DAGTypeLegalizer::WidenVecRes_Shift(SDNode *N) {
     ShOp = GetWidenedVector(ShOp);
     ShVT = ShOp.getValueType();
   }
-  EVT ShWidenVT = EVT::getVectorVT(*DAG.getContext(), ShVT.getVectorElementType(),
+  EVT ShWidenVT = EVT::getVectorVT(*DAG.getContext(),
+                                   ShVT.getVectorElementType(),
                                    WidenVT.getVectorNumElements());
   if (ShVT != ShWidenVT)
     ShOp = ModifyToType(ShOp, ShWidenVT);
@@ -1493,7 +1495,8 @@ SDValue DAGTypeLegalizer::WidenVecRes_BIT_CONVERT(SDNode *N) {
     unsigned NewNumElts = WidenSize / InSize;
     if (InVT.isVector()) {
       EVT InEltVT = InVT.getVectorElementType();
-      NewInVT= EVT::getVectorVT(*DAG.getContext(), InEltVT, WidenSize / InEltVT.getSizeInBits());
+      NewInVT= EVT::getVectorVT(*DAG.getContext(), InEltVT,
+                                WidenSize / InEltVT.getSizeInBits());
     } else {
       NewInVT = EVT::getVectorVT(*DAG.getContext(), InVT, NewNumElts);
     }
@@ -1617,7 +1620,8 @@ SDValue DAGTypeLegalizer::WidenVecRes_CONVERT_RNDSAT(SDNode *N) {
   SDValue RndOp = N->getOperand(3);
   SDValue SatOp = N->getOperand(4);
 
-  EVT      WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+  EVT      WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(),
+                                              N->getValueType(0));
   unsigned WidenNumElts = WidenVT.getVectorNumElements();
 
   EVT InVT = InOp.getValueType();
@@ -1791,7 +1795,8 @@ SDValue DAGTypeLegalizer::WidenVecRes_SELECT(SDNode *N) {
   EVT CondVT = Cond1.getValueType();
   if (CondVT.isVector()) {
     EVT CondEltVT = CondVT.getVectorElementType();
-    EVT CondWidenVT =  EVT::getVectorVT(*DAG.getContext(), CondEltVT, WidenNumElts);
+    EVT CondWidenVT =  EVT::getVectorVT(*DAG.getContext(),
+                                        CondEltVT, WidenNumElts);
     if (getTypeAction(CondVT) == WidenVector)
       Cond1 = GetWidenedVector(Cond1);
 
@@ -1859,7 +1864,8 @@ SDValue DAGTypeLegalizer::WidenVecRes_VSETCC(SDNode *N) {
   SDValue InOp1 = N->getOperand(0);
   EVT InVT = InOp1.getValueType();
   assert(InVT.isVector() && "can not widen non vector type");
-  EVT WidenInVT = EVT::getVectorVT(*DAG.getContext(), InVT.getVectorElementType(), WidenNumElts);
+  EVT WidenInVT = EVT::getVectorVT(*DAG.getContext(),
+                                   InVT.getVectorElementType(), WidenNumElts);
   InOp1 = GetWidenedVector(InOp1);
   SDValue InOp2 = GetWidenedVector(N->getOperand(1));
 
@@ -2124,7 +2130,7 @@ SDValue DAGTypeLegalizer::GenWidenVectorLoads(SmallVector<SDValue, 16>& LdChain,
   // The routines chops the vector into the largest vector loads with the same
   // element type or scalar loads and then recombines it to the widen vector
   // type.
-  EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), LD->getValueType(0));
+  EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(),LD->getValueType(0));
   unsigned WidenWidth = WidenVT.getSizeInBits();
   EVT LdVT    = LD->getMemoryVT();
   DebugLoc dl = LD->getDebugLoc();
diff --git a/lib/CodeGen/SelectionDAG/SDNodeDbgValue.h b/lib/CodeGen/SelectionDAG/SDNodeDbgValue.h
index 9d1568f01f40..ac2d33884b26 100644
--- a/lib/CodeGen/SelectionDAG/SDNodeDbgValue.h
+++ b/lib/CodeGen/SelectionDAG/SDNodeDbgValue.h
@@ -41,7 +41,7 @@ private:
       SDNode *Node;         // valid for expressions
       unsigned ResNo;       // valid for expressions
     } s;
-    Value *Const;           // valid for constants
+    const Value *Const;     // valid for constants
     unsigned FrameIx;       // valid for stack objects
   } u;
   MDNode *mdPtr;
@@ -60,7 +60,8 @@ public:
   }
 
   // Constructor for constants.
-  SDDbgValue(MDNode *mdP, Value *C, uint64_t off, DebugLoc dl, unsigned O) : 
+  SDDbgValue(MDNode *mdP, const Value *C, uint64_t off, DebugLoc dl,
+             unsigned O) : 
     mdPtr(mdP), Offset(off), DL(dl), Order(O), Invalid(false) {
     kind = CONST;
     u.Const = C;
@@ -86,7 +87,7 @@ public:
   unsigned getResNo() { assert (kind==SDNODE); return u.s.ResNo; }
 
   // Returns the Value* for a constant
-  Value *getConst() { assert (kind==CONST); return u.Const; }
+  const Value *getConst() { assert (kind==CONST); return u.Const; }
 
   // Returns the FrameIx for a stack object
   unsigned getFrameIx() { assert (kind==FRAMEIX); return u.FrameIx; }
diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp b/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp
index 3f1766d9e9f2..da028500bd0a 100644
--- a/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp
+++ b/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp
@@ -17,18 +17,19 @@
 
 #define DEBUG_TYPE "pre-RA-sched"
 #include "ScheduleDAGSDNodes.h"
+#include "llvm/InlineAsm.h"
 #include "llvm/CodeGen/SchedulerRegistry.h"
 #include "llvm/CodeGen/SelectionDAGISel.h"
 #include "llvm/Target/TargetRegisterInfo.h"
 #include "llvm/Target/TargetData.h"
 #include "llvm/Target/TargetMachine.h"
 #include "llvm/Target/TargetInstrInfo.h"
-#include "llvm/Support/Debug.h"
-#include "llvm/Support/ErrorHandling.h"
 #include "llvm/ADT/PriorityQueue.h"
 #include "llvm/ADT/SmallSet.h"
 #include "llvm/ADT/Statistic.h"
 #include "llvm/ADT/STLExtras.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/raw_ostream.h"
 #include <climits>
 using namespace llvm;
@@ -647,13 +648,14 @@ bool ScheduleDAGRRList::DelayForLiveRegsBottomUp(SUnit *SU,
       if (Node->getOperand(NumOps-1).getValueType() == MVT::Flag)
         --NumOps;  // Ignore the flag operand.
 
-      for (unsigned i = 2; i != NumOps;) {
+      for (unsigned i = InlineAsm::Op_FirstOperand; i != NumOps;) {
         unsigned Flags =
           cast<ConstantSDNode>(Node->getOperand(i))->getZExtValue();
-        unsigned NumVals = (Flags & 0xffff) >> 3;
+        unsigned NumVals = InlineAsm::getNumOperandRegisters(Flags);
 
         ++i; // Skip the ID value.
-        if ((Flags & 7) == 2 || (Flags & 7) == 6) {
+        if (InlineAsm::isRegDefKind(Flags) ||
+            InlineAsm::isRegDefEarlyClobberKind(Flags)) {
           // Check for def of register or earlyclobber register.
           for (; NumVals; --NumVals, ++i) {
             unsigned Reg = cast<RegisterSDNode>(Node->getOperand(i))->getReg();
diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp b/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp
index e7ab2f003963..76e47718f501 100644
--- a/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp
+++ b/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp
@@ -353,8 +353,8 @@ void ScheduleDAGSDNodes::AddSchedEdges() {
         const SDep& dep = SDep(OpSU, isChain ? SDep::Order : SDep::Data,
                                OpSU->Latency, PhysReg);
         if (!isChain && !UnitLatencies) {
-          ComputeOperandLatency(OpSU, SU, (SDep &)dep);
-          ST.adjustSchedDependency(OpSU, SU, (SDep &)dep);
+          ComputeOperandLatency(OpSU, SU, const_cast<SDep &>(dep));
+          ST.adjustSchedDependency(OpSU, SU, const_cast<SDep &>(dep));
         }
 
         SU->addPred(dep);
@@ -422,7 +422,6 @@ namespace {
 // instructions in the right order.
 static void ProcessSourceNode(SDNode *N, SelectionDAG *DAG,
                            InstrEmitter &Emitter,
-                           DenseMap<MachineBasicBlock*, MachineBasicBlock*> *EM,
                            DenseMap<SDValue, unsigned> &VRBaseMap,
                     SmallVector<std::pair<unsigned, MachineInstr*>, 32> &Orders,
                            SmallSet<unsigned, 8> &Seen) {
@@ -449,9 +448,11 @@ static void ProcessSourceNode(SDNode *N, SelectionDAG *DAG,
       continue;
     unsigned DVOrder = DVs[i]->getOrder();
     if (DVOrder == ++Order) {
-      MachineInstr *DbgMI = Emitter.EmitDbgValue(DVs[i], BB, VRBaseMap, EM);
-      Orders.push_back(std::make_pair(DVOrder, DbgMI));
-      BB->insert(InsertPos, DbgMI);
+      MachineInstr *DbgMI = Emitter.EmitDbgValue(DVs[i], VRBaseMap);
+      if (DbgMI) {
+        Orders.push_back(std::make_pair(DVOrder, DbgMI));
+        BB->insert(InsertPos, DbgMI);
+      }
       DVs[i]->setIsInvalidated();
     }
   }
@@ -459,8 +460,7 @@ static void ProcessSourceNode(SDNode *N, SelectionDAG *DAG,
 
 
 /// EmitSchedule - Emit the machine code in scheduled order.
-MachineBasicBlock *ScheduleDAGSDNodes::
-EmitSchedule(DenseMap<MachineBasicBlock*, MachineBasicBlock*> *EM) {
+MachineBasicBlock *ScheduleDAGSDNodes::EmitSchedule() {
   InstrEmitter Emitter(BB, InsertPos);
   DenseMap<SDValue, unsigned> VRBaseMap;
   DenseMap<SUnit*, unsigned> CopyVRBaseMap;
@@ -468,6 +468,17 @@ EmitSchedule(DenseMap<MachineBasicBlock*, MachineBasicBlock*> *EM) {
   SmallSet<unsigned, 8> Seen;
   bool HasDbg = DAG->hasDebugValues();
 
+  // If this is the first BB, emit byval parameter dbg_value's.
+  if (HasDbg && BB->getParent()->begin() == MachineFunction::iterator(BB)) {
+    SDDbgInfo::DbgIterator PDI = DAG->ByvalParmDbgBegin();
+    SDDbgInfo::DbgIterator PDE = DAG->ByvalParmDbgEnd();
+    for (; PDI != PDE; ++PDI) {
+      MachineInstr *DbgMI= Emitter.EmitDbgValue(*PDI, VRBaseMap);
+      if (DbgMI)
+        BB->insert(BB->end(), DbgMI);
+    }
+  }
+
   for (unsigned i = 0, e = Sequence.size(); i != e; i++) {
     SUnit *SU = Sequence[i];
     if (!SU) {
@@ -491,21 +502,21 @@ EmitSchedule(DenseMap<MachineBasicBlock*, MachineBasicBlock*> *EM) {
     while (!FlaggedNodes.empty()) {
       SDNode *N = FlaggedNodes.back();
       Emitter.EmitNode(FlaggedNodes.back(), SU->OrigNode != SU, SU->isCloned,
-                       VRBaseMap, EM);
-      // Remember the the source order of the inserted instruction.
+                       VRBaseMap);
+      // Remember the source order of the inserted instruction.
       if (HasDbg)
-        ProcessSourceNode(N, DAG, Emitter, EM, VRBaseMap, Orders, Seen);
+        ProcessSourceNode(N, DAG, Emitter, VRBaseMap, Orders, Seen);
       FlaggedNodes.pop_back();
     }
     Emitter.EmitNode(SU->getNode(), SU->OrigNode != SU, SU->isCloned,
-                     VRBaseMap, EM);
-    // Remember the the source order of the inserted instruction.
+                     VRBaseMap);
+    // Remember the source order of the inserted instruction.
     if (HasDbg)
-      ProcessSourceNode(SU->getNode(), DAG, Emitter, EM, VRBaseMap, Orders,
+      ProcessSourceNode(SU->getNode(), DAG, Emitter, VRBaseMap, Orders,
                         Seen);
   }
 
-  // Insert all the dbg_value which have not already been inserted in source
+  // Insert all the dbg_values which have not already been inserted in source
   // order sequence.
   if (HasDbg) {
     MachineBasicBlock::iterator BBBegin = BB->empty() ? BB->end() : BB->begin();
@@ -540,13 +551,15 @@ EmitSchedule(DenseMap<MachineBasicBlock*, MachineBasicBlock*> *EM) {
 #endif
         if ((*DI)->isInvalidated())
           continue;
-        MachineInstr *DbgMI = Emitter.EmitDbgValue(*DI, MIBB, VRBaseMap, EM);
-        if (!LastOrder)
-          // Insert to start of the BB (after PHIs).
-          BB->insert(BBBegin, DbgMI);
-        else {
-          MachineBasicBlock::iterator Pos = MI;
-          MIBB->insert(llvm::next(Pos), DbgMI);
+        MachineInstr *DbgMI = Emitter.EmitDbgValue(*DI, VRBaseMap);
+        if (DbgMI) {
+          if (!LastOrder)
+            // Insert to start of the BB (after PHIs).
+            BB->insert(BBBegin, DbgMI);
+          else {
+            MachineBasicBlock::iterator Pos = MI;
+            MIBB->insert(llvm::next(Pos), DbgMI);
+          }
         }
       }
       LastOrder = Order;
@@ -558,8 +571,9 @@ EmitSchedule(DenseMap<MachineBasicBlock*, MachineBasicBlock*> *EM) {
       MachineBasicBlock *InsertBB = Emitter.getBlock();
       MachineBasicBlock::iterator Pos= Emitter.getBlock()->getFirstTerminator();
       if (!(*DI)->isInvalidated()) {
-        MachineInstr *DbgMI= Emitter.EmitDbgValue(*DI, InsertBB, VRBaseMap, EM);
-        InsertBB->insert(Pos, DbgMI);
+        MachineInstr *DbgMI= Emitter.EmitDbgValue(*DI, VRBaseMap);
+        if (DbgMI)
+          InsertBB->insert(Pos, DbgMI);
       }
       ++DI;
     }
diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h b/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h
index 6b829b61066b..7ae8ec236fbf 100644
--- a/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h
+++ b/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h
@@ -59,7 +59,8 @@ namespace llvm {
       if (isa<JumpTableSDNode>(Node))      return true;
       if (isa<ExternalSymbolSDNode>(Node)) return true;
       if (isa<BlockAddressSDNode>(Node))   return true;
-      if (Node->getOpcode() == ISD::EntryToken) return true;
+      if (Node->getOpcode() == ISD::EntryToken ||
+          isa<MDNodeSDNode>(Node)) return true;
       return false;
     }
 
@@ -93,8 +94,7 @@ namespace llvm {
     ///
     virtual void ComputeLatency(SUnit *SU);
 
-    virtual MachineBasicBlock *
-    EmitSchedule(DenseMap<MachineBasicBlock*, MachineBasicBlock*> *EM);
+    virtual MachineBasicBlock *EmitSchedule();
 
     /// Schedule - Order nodes according to selected style, filling
     /// in the Sequence member.
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
index 8c0554d186d9..e6df742bc336 100644
--- a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
+++ b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
@@ -304,10 +304,6 @@ ISD::CondCode ISD::getSetCCAndOperation(ISD::CondCode Op1, ISD::CondCode Op2,
   return Result;
 }
 
-const TargetMachine &SelectionDAG::getTarget() const {
-  return MF->getTarget();
-}
-
 //===----------------------------------------------------------------------===//
 //                           SDNode Profile Support
 //===----------------------------------------------------------------------===//
@@ -792,8 +788,8 @@ unsigned SelectionDAG::getEVTAlignment(EVT VT) const {
 }
 
 // EntryNode could meaningfully have debug info if we can find it...
-SelectionDAG::SelectionDAG(TargetLowering &tli, FunctionLoweringInfo &fli)
-  : TLI(tli), FLI(fli),
+SelectionDAG::SelectionDAG(const TargetMachine &tm, FunctionLoweringInfo &fli)
+  : TM(tm), TLI(*tm.getTargetLowering()), FLI(fli),
     EntryNode(ISD::EntryToken, DebugLoc(), getVTList(MVT::Other)),
     Root(getEntryNode()), Ordering(0) {
   AllNodes.push_back(&EntryNode);
@@ -1048,7 +1044,7 @@ SDValue SelectionDAG::getJumpTable(int JTI, EVT VT, bool isTarget,
   return SDValue(N, 0);
 }
 
-SDValue SelectionDAG::getConstantPool(Constant *C, EVT VT,
+SDValue SelectionDAG::getConstantPool(const Constant *C, EVT VT,
                                       unsigned Alignment, int Offset,
                                       bool isTarget,
                                       unsigned char TargetFlags) {
@@ -1319,7 +1315,7 @@ SDValue SelectionDAG::getEHLabel(DebugLoc dl, SDValue Root, MCSymbol *Label) {
 }
 
 
-SDValue SelectionDAG::getBlockAddress(BlockAddress *BA, EVT VT,
+SDValue SelectionDAG::getBlockAddress(const BlockAddress *BA, EVT VT,
                                       bool isTarget,
                                       unsigned char TargetFlags) {
   unsigned Opc = isTarget ? ISD::TargetBlockAddress : ISD::BlockAddress;
@@ -1356,6 +1352,23 @@ SDValue SelectionDAG::getSrcValue(const Value *V) {
   return SDValue(N, 0);
 }
 
+/// getMDNode - Return an MDNodeSDNode which holds an MDNode.
+SDValue SelectionDAG::getMDNode(const MDNode *MD) {
+  FoldingSetNodeID ID;
+  AddNodeIDNode(ID, ISD::MDNODE_SDNODE, getVTList(MVT::Other), 0, 0);
+  ID.AddPointer(MD);
+  
+  void *IP = 0;
+  if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP))
+    return SDValue(E, 0);
+  
+  SDNode *N = new (NodeAllocator) MDNodeSDNode(MD);
+  CSEMap.InsertNode(N, IP);
+  AllNodes.push_back(N);
+  return SDValue(N, 0);
+}
+
+
 /// getShiftAmountOperand - Return the specified value casted to
 /// the target's desired shift amount type.
 SDValue SelectionDAG::getShiftAmountOperand(SDValue Op) {
@@ -1904,7 +1917,8 @@ void SelectionDAG::ComputeMaskedBits(SDValue Op, const APInt &Mask,
     // Output known-0 bits are known if clear or set in both the low clear bits
     // common to both LHS & RHS.  For example, 8+(X<<3) is known to have the
     // low 3 bits clear.
-    APInt Mask2 = APInt::getLowBitsSet(BitWidth, Mask.countTrailingOnes());
+    APInt Mask2 = APInt::getLowBitsSet(BitWidth,
+                                       BitWidth - Mask.countLeadingZeros());
     ComputeMaskedBits(Op.getOperand(0), Mask2, KnownZero2, KnownOne2, Depth+1);
     assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?");
     unsigned KnownZeroOut = KnownZero2.countTrailingOnes();
@@ -2253,7 +2267,7 @@ bool SelectionDAG::isVerifiedDebugInfoDesc(SDValue Op) const {
   GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(Op);
   if (!GA) return false;
   if (GA->getOffset() != 0) return false;
-  GlobalVariable *GV = dyn_cast<GlobalVariable>(GA->getGlobal());
+  const GlobalVariable *GV = dyn_cast<GlobalVariable>(GA->getGlobal());
   if (!GV) return false;
   return MF->getMMI().hasDebugInfo();
 }
@@ -2778,14 +2792,19 @@ SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, EVT VT,
       // If the indices are the same, return the inserted element else
       // if the indices are known different, extract the element from
       // the original vector.
-      if (N1.getOperand(2) == N2) {
-        if (VT == N1.getOperand(1).getValueType())
-          return N1.getOperand(1);
-        else
-          return getSExtOrTrunc(N1.getOperand(1), DL, VT);
-      } else if (isa<ConstantSDNode>(N1.getOperand(2)) &&
-                 isa<ConstantSDNode>(N2))
+      SDValue N1Op2 = N1.getOperand(2);
+      ConstantSDNode *N1Op2C = dyn_cast<ConstantSDNode>(N1Op2.getNode());
+
+      if (N1Op2C && N2C) {
+        if (N1Op2C->getZExtValue() == N2C->getZExtValue()) {
+          if (VT == N1.getOperand(1).getValueType())
+            return N1.getOperand(1);
+          else
+            return getSExtOrTrunc(N1.getOperand(1), DL, VT);
+        }
+
         return getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT, N1.getOperand(0), N2);
+      }
     }
     break;
   case ISD::EXTRACT_ELEMENT:
@@ -3178,7 +3197,7 @@ static bool isMemSrcFromString(SDValue Src, std::string &Str) {
   if (!G)
     return false;
 
-  GlobalVariable *GV = dyn_cast<GlobalVariable>(G->getGlobal());
+  const GlobalVariable *GV = dyn_cast<GlobalVariable>(G->getGlobal());
   if (GV && GetConstantStringInfo(GV, Str, SrcDelta, false))
     return true;
 
@@ -3193,6 +3212,7 @@ static bool FindOptimalMemOpLowering(std::vector<EVT> &MemOps,
                                      unsigned Limit, uint64_t Size,
                                      unsigned DstAlign, unsigned SrcAlign,
                                      bool NonScalarIntSafe,
+                                     bool MemcpyStrSrc,
                                      SelectionDAG &DAG,
                                      const TargetLowering &TLI) {
   assert((SrcAlign == 0 || SrcAlign >= DstAlign) &&
@@ -3201,9 +3221,12 @@ static bool FindOptimalMemOpLowering(std::vector<EVT> &MemOps,
   // the value, i.e. memset or memcpy from constant string. Otherwise, it's
   // the inferred alignment of the source. 'DstAlign', on the other hand, is the
   // specified alignment of the memory operation. If it is zero, that means
-  // it's possible to change the alignment of the destination.
+  // it's possible to change the alignment of the destination. 'MemcpyStrSrc'
+  // indicates whether the memcpy source is constant so it does not need to be
+  // loaded.
   EVT VT = TLI.getOptimalMemOpType(Size, DstAlign, SrcAlign,
-                                   NonScalarIntSafe, DAG);
+                                   NonScalarIntSafe, MemcpyStrSrc,
+                                   DAG.getMachineFunction());
 
   if (VT == MVT::Other) {
     if (DstAlign >= TLI.getTargetData()->getPointerPrefAlignment() ||
@@ -3269,9 +3292,6 @@ static SDValue getMemcpyLoadsAndStores(SelectionDAG &DAG, DebugLoc dl,
   // below a certain threshold.
   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
   std::vector<EVT> MemOps;
-  uint64_t Limit = -1ULL;
-  if (!AlwaysInline)
-    Limit = TLI.getMaxStoresPerMemcpy();
   bool DstAlignCanChange = false;
   MachineFrameInfo *MFI = DAG.getMachineFunction().getFrameInfo();
   FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(Dst);
@@ -3283,9 +3303,13 @@ static SDValue getMemcpyLoadsAndStores(SelectionDAG &DAG, DebugLoc dl,
   std::string Str;
   bool CopyFromStr = isMemSrcFromString(Src, Str);
   bool isZeroStr = CopyFromStr && Str.empty();
+  uint64_t Limit = -1ULL;
+  if (!AlwaysInline)
+    Limit = TLI.getMaxStoresPerMemcpy();
   if (!FindOptimalMemOpLowering(MemOps, Limit, Size,
                                 (DstAlignCanChange ? 0 : Align),
-                                (isZeroStr ? 0 : SrcAlign), true, DAG, TLI))
+                                (isZeroStr ? 0 : SrcAlign),
+                                true, CopyFromStr, DAG, TLI))
     return SDValue();
 
   if (DstAlignCanChange) {
@@ -3373,7 +3397,7 @@ static SDValue getMemmoveLoadsAndStores(SelectionDAG &DAG, DebugLoc dl,
 
   if (!FindOptimalMemOpLowering(MemOps, Limit, Size,
                                 (DstAlignCanChange ? 0 : Align),
-                                SrcAlign, true, DAG, TLI))
+                                SrcAlign, true, false, DAG, TLI))
     return SDValue();
 
   if (DstAlignCanChange) {
@@ -3445,7 +3469,7 @@ static SDValue getMemsetStores(SelectionDAG &DAG, DebugLoc dl,
     isa<ConstantSDNode>(Src) && cast<ConstantSDNode>(Src)->isNullValue();
   if (!FindOptimalMemOpLowering(MemOps, TLI.getMaxStoresPerMemset(),
                                 Size, (DstAlignCanChange ? 0 : Align), 0,
-                                NonScalarIntSafe, DAG, TLI))
+                                NonScalarIntSafe, false, DAG, TLI))
     return SDValue();
 
   if (DstAlignCanChange) {
@@ -3571,8 +3595,10 @@ SDValue SelectionDAG::getMemmove(SDValue Chain, DebugLoc dl, SDValue Dst,
   if (Result.getNode())
     return Result;
 
+  // FIXME: If the memmove is volatile, lowering it to plain libc memmove may
+  // not be safe.  See memcpy above for more details.
+
   // Emit a library call.
-  assert(!isVol && "library memmove does not support volatile");
   TargetLowering::ArgListTy Args;
   TargetLowering::ArgListEntry Entry;
   Entry.Ty = TLI.getTargetData()->getIntPtrType(*getContext());
@@ -3620,8 +3646,7 @@ SDValue SelectionDAG::getMemset(SDValue Chain, DebugLoc dl, SDValue Dst,
   if (Result.getNode())
     return Result;
 
-  // Emit a library call.
-  assert(!isVol && "library memset does not support volatile");
+  // Emit a library call.  
   const Type *IntPtrTy = TLI.getTargetData()->getIntPtrType(*getContext());
   TargetLowering::ArgListTy Args;
   TargetLowering::ArgListEntry Entry;
@@ -4913,7 +4938,7 @@ SelectionDAG::getDbgValue(MDNode *MDPtr, SDNode *N, unsigned R, uint64_t Off,
 }
 
 SDDbgValue *
-SelectionDAG::getDbgValue(MDNode *MDPtr, Value *C, uint64_t Off,
+SelectionDAG::getDbgValue(MDNode *MDPtr, const Value *C, uint64_t Off,
                           DebugLoc DL, unsigned O) {
   return new (Allocator) SDDbgValue(MDPtr, C, Off, DL, O);
 }
@@ -5321,8 +5346,8 @@ unsigned SelectionDAG::GetOrdering(const SDNode *SD) const {
 
 /// AddDbgValue - Add a dbg_value SDNode. If SD is non-null that means the
 /// value is produced by SD.
-void SelectionDAG::AddDbgValue(SDDbgValue *DB, SDNode *SD) {
-  DbgInfo->add(DB, SD);
+void SelectionDAG::AddDbgValue(SDDbgValue *DB, SDNode *SD, bool isParameter) {
+  DbgInfo->add(DB, SD, isParameter);
   if (SD)
     SD->setHasDebugValue(true);
 }
@@ -5338,7 +5363,7 @@ HandleSDNode::~HandleSDNode() {
 GlobalAddressSDNode::GlobalAddressSDNode(unsigned Opc, const GlobalValue *GA,
                                          EVT VT, int64_t o, unsigned char TF)
   : SDNode(Opc, DebugLoc(), getSDVTList(VT)), Offset(o), TargetFlags(TF) {
-  TheGlobal = const_cast<GlobalValue*>(GA);
+  TheGlobal = GA;
 }
 
 MemSDNode::MemSDNode(unsigned Opc, DebugLoc dl, SDVTList VTs, EVT memvt,
@@ -5558,6 +5583,7 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const {
   case ISD::PCMARKER:      return "PCMarker";
   case ISD::READCYCLECOUNTER: return "ReadCycleCounter";
   case ISD::SRCVALUE:      return "SrcValue";
+  case ISD::MDNODE_SDNODE: return "MDNode";
   case ISD::EntryToken:    return "EntryToken";
   case ISD::TokenFactor:   return "TokenFactor";
   case ISD::AssertSext:    return "AssertSext";
@@ -5926,6 +5952,11 @@ void SDNode::print_details(raw_ostream &OS, const SelectionDAG *G) const {
       OS << "<" << M->getValue() << ">";
     else
       OS << "<null>";
+  } else if (const MDNodeSDNode *MD = dyn_cast<MDNodeSDNode>(this)) {
+    if (MD->getMD())
+      OS << "<" << MD->getMD() << ">";
+    else
+      OS << "<null>";
   } else if (const VTSDNode *N = dyn_cast<VTSDNode>(this)) {
     OS << ":" << N->getVT().getEVTString();
   }
@@ -6063,7 +6094,7 @@ SDValue SelectionDAG::UnrollVectorOp(SDNode *N, unsigned ResNE) {
 
   unsigned i;
   for (i= 0; i != NE; ++i) {
-    for (unsigned j = 0; j != N->getNumOperands(); ++j) {
+    for (unsigned j = 0, e = N->getNumOperands(); j != e; ++j) {
       SDValue Operand = N->getOperand(j);
       EVT OperandVT = Operand.getValueType();
       if (OperandVT.isVector()) {
@@ -6141,8 +6172,8 @@ bool SelectionDAG::isConsecutiveLoad(LoadSDNode *LD, LoadSDNode *Base,
       return true;
   }
 
-  GlobalValue *GV1 = NULL;
-  GlobalValue *GV2 = NULL;
+  const GlobalValue *GV1 = NULL;
+  const GlobalValue *GV2 = NULL;
   int64_t Offset1 = 0;
   int64_t Offset2 = 0;
   bool isGA1 = TLI.isGAPlusOffset(Loc.getNode(), GV1, Offset1);
@@ -6157,14 +6188,14 @@ bool SelectionDAG::isConsecutiveLoad(LoadSDNode *LD, LoadSDNode *Base,
 /// it cannot be inferred.
 unsigned SelectionDAG::InferPtrAlignment(SDValue Ptr) const {
   // If this is a GlobalAddress + cst, return the alignment.
-  GlobalValue *GV;
+  const GlobalValue *GV;
   int64_t GVOffset = 0;
   if (TLI.isGAPlusOffset(Ptr.getNode(), GV, GVOffset)) {
     // If GV has specified alignment, then use it. Otherwise, use the preferred
     // alignment.
     unsigned Align = GV->getAlignment();
     if (!Align) {
-      if (GlobalVariable *GVar = dyn_cast<GlobalVariable>(GV)) {
+      if (const GlobalVariable *GVar = dyn_cast<GlobalVariable>(GV)) {
         if (GVar->hasInitializer()) {
           const TargetData *TD = TLI.getTargetData();
           Align = TD->getPreferredAlignment(GVar);
@@ -6326,8 +6357,8 @@ bool BuildVectorSDNode::isConstantSplat(APInt &SplatValue,
     if (OpVal.getOpcode() == ISD::UNDEF)
       SplatUndef |= APInt::getBitsSet(sz, BitPos, BitPos + EltBitSize);
     else if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(OpVal))
-      SplatValue |= (APInt(CN->getAPIntValue()).zextOrTrunc(EltBitSize).
-                     zextOrTrunc(sz) << BitPos);
+      SplatValue |= APInt(CN->getAPIntValue()).zextOrTrunc(EltBitSize).
+                    zextOrTrunc(sz) << BitPos;
     else if (ConstantFPSDNode *CN = dyn_cast<ConstantFPSDNode>(OpVal))
       SplatValue |= CN->getValueAPF().bitcastToAPInt().zextOrTrunc(sz) <<BitPos;
      else
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
index 4bbb3dea92f4..a38b204b247e 100644
--- a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
+++ b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
@@ -28,7 +28,9 @@
 #include "llvm/Instructions.h"
 #include "llvm/Intrinsics.h"
 #include "llvm/IntrinsicInst.h"
+#include "llvm/LLVMContext.h"
 #include "llvm/Module.h"
+#include "llvm/CodeGen/Analysis.h"
 #include "llvm/CodeGen/FastISel.h"
 #include "llvm/CodeGen/GCStrategy.h"
 #include "llvm/CodeGen/GCMetadata.h"
@@ -168,7 +170,7 @@ namespace {
     /// AddInlineAsmOperands - Add this value to the specified inlineasm node
     /// operand list.  This adds the code marker, matching input operand index
     /// (if applicable), and includes the number of values added into it.
-    void AddInlineAsmOperands(unsigned Code,
+    void AddInlineAsmOperands(unsigned Kind,
                               bool HasMatching, unsigned MatchingIdx,
                               SelectionDAG &DAG,
                               std::vector<SDValue> &Ops) const;
@@ -533,7 +535,7 @@ void SelectionDAGBuilder::init(GCFunctionInfo *gfi, AliasAnalysis &aa) {
   TD = DAG.getTarget().getTargetData();
 }
 
-/// clear - Clear out the curret SelectionDAG and the associated
+/// clear - Clear out the current SelectionDAG and the associated
 /// state and prepare this SelectionDAGBuilder object to be used
 /// for a new block. This doesn't clear out information about
 /// additional blocks that are needed to complete switch lowering
@@ -543,8 +545,6 @@ void SelectionDAGBuilder::clear() {
   NodeMap.clear();
   PendingLoads.clear();
   PendingExports.clear();
-  EdgeMapping.clear();
-  DAG.clear();
   CurDebugLoc = DebugLoc();
   HasTailCall = false;
 }
@@ -612,11 +612,26 @@ void SelectionDAGBuilder::AssignOrderingToNode(const SDNode *Node) {
     AssignOrderingToNode(Node->getOperand(I).getNode());
 }
 
-void SelectionDAGBuilder::visit(Instruction &I) {
+void SelectionDAGBuilder::visit(const Instruction &I) {
+  // Set up outgoing PHI node register values before emitting the terminator.
+  if (isa<TerminatorInst>(&I))
+    HandlePHINodesInSuccessorBlocks(I.getParent());
+
+  CurDebugLoc = I.getDebugLoc();
+
   visit(I.getOpcode(), I);
+
+  if (!isa<TerminatorInst>(&I) && !HasTailCall)
+    CopyToExportRegsIfNeeded(&I);
+
+  CurDebugLoc = DebugLoc();
 }
 
-void SelectionDAGBuilder::visit(unsigned Opcode, User &I) {
+void SelectionDAGBuilder::visitPHI(const PHINode &) {
+  llvm_unreachable("SelectionDAGBuilder shouldn't visit PHI nodes!");
+}
+
+void SelectionDAGBuilder::visit(unsigned Opcode, const User &I) {
   // Note: this doesn't use InstVisitor, because it has to work with
   // ConstantExpr's in addition to instructions.
   switch (Opcode) {
@@ -638,28 +653,28 @@ SDValue SelectionDAGBuilder::getValue(const Value *V) {
   SDValue &N = NodeMap[V];
   if (N.getNode()) return N;
 
-  if (Constant *C = const_cast<Constant*>(dyn_cast<Constant>(V))) {
+  if (const Constant *C = dyn_cast<Constant>(V)) {
     EVT VT = TLI.getValueType(V->getType(), true);
 
-    if (ConstantInt *CI = dyn_cast<ConstantInt>(C))
+    if (const ConstantInt *CI = dyn_cast<ConstantInt>(C))
       return N = DAG.getConstant(*CI, VT);
 
-    if (GlobalValue *GV = dyn_cast<GlobalValue>(C))
+    if (const GlobalValue *GV = dyn_cast<GlobalValue>(C))
       return N = DAG.getGlobalAddress(GV, VT);
 
     if (isa<ConstantPointerNull>(C))
       return N = DAG.getConstant(0, TLI.getPointerTy());
 
-    if (ConstantFP *CFP = dyn_cast<ConstantFP>(C))
+    if (const ConstantFP *CFP = dyn_cast<ConstantFP>(C))
       return N = DAG.getConstantFP(*CFP, VT);
 
     if (isa<UndefValue>(C) && !V->getType()->isAggregateType())
       return N = DAG.getUNDEF(VT);
 
-    if (ConstantExpr *CE = dyn_cast<ConstantExpr>(C)) {
+    if (const ConstantExpr *CE = dyn_cast<ConstantExpr>(C)) {
       visit(CE->getOpcode(), *CE);
       SDValue N1 = NodeMap[V];
-      assert(N1.getNode() && "visit didn't populate the ValueMap!");
+      assert(N1.getNode() && "visit didn't populate the NodeMap!");
       return N1;
     }
 
@@ -704,7 +719,7 @@ SDValue SelectionDAGBuilder::getValue(const Value *V) {
                                 getCurDebugLoc());
     }
 
-    if (BlockAddress *BA = dyn_cast<BlockAddress>(C))
+    if (const BlockAddress *BA = dyn_cast<BlockAddress>(C))
       return DAG.getBlockAddress(BA, VT);
 
     const VectorType *VecTy = cast<VectorType>(V->getType());
@@ -713,7 +728,7 @@ SDValue SelectionDAGBuilder::getValue(const Value *V) {
     // Now that we know the number and type of the elements, get that number of
     // elements into the Ops array based on what kind of constant it is.
     SmallVector<SDValue, 16> Ops;
-    if (ConstantVector *CP = dyn_cast<ConstantVector>(C)) {
+    if (const ConstantVector *CP = dyn_cast<ConstantVector>(C)) {
       for (unsigned i = 0; i != NumElements; ++i)
         Ops.push_back(getValue(CP->getOperand(i)));
     } else {
@@ -756,7 +771,7 @@ SDValue SelectionDAGBuilder::getValue(const Value *V) {
 static void getReturnInfo(const Type* ReturnType,
                    Attributes attr, SmallVectorImpl<EVT> &OutVTs,
                    SmallVectorImpl<ISD::ArgFlagsTy> &OutFlags,
-                   TargetLowering &TLI,
+                   const TargetLowering &TLI,
                    SmallVectorImpl<uint64_t> *Offsets = 0) {
   SmallVector<EVT, 4> ValueVTs;
   ComputeValueVTs(TLI, ReturnType, ValueVTs);
@@ -811,7 +826,7 @@ static void getReturnInfo(const Type* ReturnType,
   }
 }
 
-void SelectionDAGBuilder::visitRet(ReturnInst &I) {
+void SelectionDAGBuilder::visitRet(const ReturnInst &I) {
   SDValue Chain = getControlRoot();
   SmallVector<ISD::OutputArg, 8> Outs;
   FunctionLoweringInfo &FLI = DAG.getFunctionLoweringInfo();
@@ -916,18 +931,18 @@ void SelectionDAGBuilder::visitRet(ReturnInst &I) {
 /// CopyToExportRegsIfNeeded - If the given value has virtual registers
 /// created for it, emit nodes to copy the value into the virtual
 /// registers.
-void SelectionDAGBuilder::CopyToExportRegsIfNeeded(Value *V) {
-  if (!V->use_empty()) {
-    DenseMap<const Value *, unsigned>::iterator VMI = FuncInfo.ValueMap.find(V);
-    if (VMI != FuncInfo.ValueMap.end())
-      CopyValueToVirtualRegister(V, VMI->second);
+void SelectionDAGBuilder::CopyToExportRegsIfNeeded(const Value *V) {
+  DenseMap<const Value *, unsigned>::iterator VMI = FuncInfo.ValueMap.find(V);
+  if (VMI != FuncInfo.ValueMap.end()) {
+    assert(!V->use_empty() && "Unused value assigned virtual registers!");
+    CopyValueToVirtualRegister(V, VMI->second);
   }
 }
 
 /// ExportFromCurrentBlock - If this condition isn't known to be exported from
 /// the current basic block, add it to ValueMap now so that we'll get a
 /// CopyTo/FromReg.
-void SelectionDAGBuilder::ExportFromCurrentBlock(Value *V) {
+void SelectionDAGBuilder::ExportFromCurrentBlock(const Value *V) {
   // No need to export constants.
   if (!isa<Instruction>(V) && !isa<Argument>(V)) return;
 
@@ -938,11 +953,11 @@ void SelectionDAGBuilder::ExportFromCurrentBlock(Value *V) {
   CopyValueToVirtualRegister(V, Reg);
 }
 
-bool SelectionDAGBuilder::isExportableFromCurrentBlock(Value *V,
+bool SelectionDAGBuilder::isExportableFromCurrentBlock(const Value *V,
                                                      const BasicBlock *FromBB) {
   // The operands of the setcc have to be in this block.  We don't know
   // how to export them from some other block.
-  if (Instruction *VI = dyn_cast<Instruction>(V)) {
+  if (const Instruction *VI = dyn_cast<Instruction>(V)) {
     // Can export from current BB.
     if (VI->getParent() == FromBB)
       return true;
@@ -971,85 +986,31 @@ static bool InBlock(const Value *V, const BasicBlock *BB) {
   return true;
 }
 
-/// getFCmpCondCode - Return the ISD condition code corresponding to
-/// the given LLVM IR floating-point condition code.  This includes
-/// consideration of global floating-point math flags.
-///
-static ISD::CondCode getFCmpCondCode(FCmpInst::Predicate Pred) {
-  ISD::CondCode FPC, FOC;
-  switch (Pred) {
-  case FCmpInst::FCMP_FALSE: FOC = FPC = ISD::SETFALSE; break;
-  case FCmpInst::FCMP_OEQ:   FOC = ISD::SETEQ; FPC = ISD::SETOEQ; break;
-  case FCmpInst::FCMP_OGT:   FOC = ISD::SETGT; FPC = ISD::SETOGT; break;
-  case FCmpInst::FCMP_OGE:   FOC = ISD::SETGE; FPC = ISD::SETOGE; break;
-  case FCmpInst::FCMP_OLT:   FOC = ISD::SETLT; FPC = ISD::SETOLT; break;
-  case FCmpInst::FCMP_OLE:   FOC = ISD::SETLE; FPC = ISD::SETOLE; break;
-  case FCmpInst::FCMP_ONE:   FOC = ISD::SETNE; FPC = ISD::SETONE; break;
-  case FCmpInst::FCMP_ORD:   FOC = FPC = ISD::SETO;   break;
-  case FCmpInst::FCMP_UNO:   FOC = FPC = ISD::SETUO;  break;
-  case FCmpInst::FCMP_UEQ:   FOC = ISD::SETEQ; FPC = ISD::SETUEQ; break;
-  case FCmpInst::FCMP_UGT:   FOC = ISD::SETGT; FPC = ISD::SETUGT; break;
-  case FCmpInst::FCMP_UGE:   FOC = ISD::SETGE; FPC = ISD::SETUGE; break;
-  case FCmpInst::FCMP_ULT:   FOC = ISD::SETLT; FPC = ISD::SETULT; break;
-  case FCmpInst::FCMP_ULE:   FOC = ISD::SETLE; FPC = ISD::SETULE; break;
-  case FCmpInst::FCMP_UNE:   FOC = ISD::SETNE; FPC = ISD::SETUNE; break;
-  case FCmpInst::FCMP_TRUE:  FOC = FPC = ISD::SETTRUE; break;
-  default:
-    llvm_unreachable("Invalid FCmp predicate opcode!");
-    FOC = FPC = ISD::SETFALSE;
-    break;
-  }
-  if (FiniteOnlyFPMath())
-    return FOC;
-  else
-    return FPC;
-}
-
-/// getICmpCondCode - Return the ISD condition code corresponding to
-/// the given LLVM IR integer condition code.
-///
-static ISD::CondCode getICmpCondCode(ICmpInst::Predicate Pred) {
-  switch (Pred) {
-  case ICmpInst::ICMP_EQ:  return ISD::SETEQ;
-  case ICmpInst::ICMP_NE:  return ISD::SETNE;
-  case ICmpInst::ICMP_SLE: return ISD::SETLE;
-  case ICmpInst::ICMP_ULE: return ISD::SETULE;
-  case ICmpInst::ICMP_SGE: return ISD::SETGE;
-  case ICmpInst::ICMP_UGE: return ISD::SETUGE;
-  case ICmpInst::ICMP_SLT: return ISD::SETLT;
-  case ICmpInst::ICMP_ULT: return ISD::SETULT;
-  case ICmpInst::ICMP_SGT: return ISD::SETGT;
-  case ICmpInst::ICMP_UGT: return ISD::SETUGT;
-  default:
-    llvm_unreachable("Invalid ICmp predicate opcode!");
-    return ISD::SETNE;
-  }
-}
-
 /// EmitBranchForMergedCondition - Helper method for FindMergedConditions.
 /// This function emits a branch and is used at the leaves of an OR or an
 /// AND operator tree.
 ///
 void
-SelectionDAGBuilder::EmitBranchForMergedCondition(Value *Cond,
+SelectionDAGBuilder::EmitBranchForMergedCondition(const Value *Cond,
                                                   MachineBasicBlock *TBB,
                                                   MachineBasicBlock *FBB,
-                                                  MachineBasicBlock *CurBB) {
+                                                  MachineBasicBlock *CurBB,
+                                                  MachineBasicBlock *SwitchBB) {
   const BasicBlock *BB = CurBB->getBasicBlock();
 
   // If the leaf of the tree is a comparison, merge the condition into
   // the caseblock.
-  if (CmpInst *BOp = dyn_cast<CmpInst>(Cond)) {
+  if (const CmpInst *BOp = dyn_cast<CmpInst>(Cond)) {
     // The operands of the cmp have to be in this block.  We don't know
     // how to export them from some other block.  If this is the first block
     // of the sequence, no exporting is needed.
-    if (CurBB == CurMBB ||
+    if (CurBB == SwitchBB ||
         (isExportableFromCurrentBlock(BOp->getOperand(0), BB) &&
          isExportableFromCurrentBlock(BOp->getOperand(1), BB))) {
       ISD::CondCode Condition;
-      if (ICmpInst *IC = dyn_cast<ICmpInst>(Cond)) {
+      if (const ICmpInst *IC = dyn_cast<ICmpInst>(Cond)) {
         Condition = getICmpCondCode(IC->getPredicate());
-      } else if (FCmpInst *FC = dyn_cast<FCmpInst>(Cond)) {
+      } else if (const FCmpInst *FC = dyn_cast<FCmpInst>(Cond)) {
         Condition = getFCmpCondCode(FC->getPredicate());
       } else {
         Condition = ISD::SETEQ; // silence warning.
@@ -1070,19 +1031,20 @@ SelectionDAGBuilder::EmitBranchForMergedCondition(Value *Cond,
 }
 
 /// FindMergedConditions - If Cond is an expression like
-void SelectionDAGBuilder::FindMergedConditions(Value *Cond,
+void SelectionDAGBuilder::FindMergedConditions(const Value *Cond,
                                                MachineBasicBlock *TBB,
                                                MachineBasicBlock *FBB,
                                                MachineBasicBlock *CurBB,
+                                               MachineBasicBlock *SwitchBB,
                                                unsigned Opc) {
   // If this node is not part of the or/and tree, emit it as a branch.
-  Instruction *BOp = dyn_cast<Instruction>(Cond);
+  const Instruction *BOp = dyn_cast<Instruction>(Cond);
   if (!BOp || !(isa<BinaryOperator>(BOp) || isa<CmpInst>(BOp)) ||
       (unsigned)BOp->getOpcode() != Opc || !BOp->hasOneUse() ||
       BOp->getParent() != CurBB->getBasicBlock() ||
       !InBlock(BOp->getOperand(0), CurBB->getBasicBlock()) ||
       !InBlock(BOp->getOperand(1), CurBB->getBasicBlock())) {
-    EmitBranchForMergedCondition(Cond, TBB, FBB, CurBB);
+    EmitBranchForMergedCondition(Cond, TBB, FBB, CurBB, SwitchBB);
     return;
   }
 
@@ -1102,10 +1064,10 @@ void SelectionDAGBuilder::FindMergedConditions(Value *Cond,
     //
 
     // Emit the LHS condition.
-    FindMergedConditions(BOp->getOperand(0), TBB, TmpBB, CurBB, Opc);
+    FindMergedConditions(BOp->getOperand(0), TBB, TmpBB, CurBB, SwitchBB, Opc);
 
     // Emit the RHS condition into TmpBB.
-    FindMergedConditions(BOp->getOperand(1), TBB, FBB, TmpBB, Opc);
+    FindMergedConditions(BOp->getOperand(1), TBB, FBB, TmpBB, SwitchBB, Opc);
   } else {
     assert(Opc == Instruction::And && "Unknown merge op!");
     // Codegen X & Y as:
@@ -1118,10 +1080,10 @@ void SelectionDAGBuilder::FindMergedConditions(Value *Cond,
     //  This requires creation of TmpBB after CurBB.
 
     // Emit the LHS condition.
-    FindMergedConditions(BOp->getOperand(0), TmpBB, FBB, CurBB, Opc);
+    FindMergedConditions(BOp->getOperand(0), TmpBB, FBB, CurBB, SwitchBB, Opc);
 
     // Emit the RHS condition into TmpBB.
-    FindMergedConditions(BOp->getOperand(1), TBB, FBB, TmpBB, Opc);
+    FindMergedConditions(BOp->getOperand(1), TBB, FBB, TmpBB, SwitchBB, Opc);
   }
 }
 
@@ -1156,19 +1118,21 @@ SelectionDAGBuilder::ShouldEmitAsBranches(const std::vector<CaseBlock> &Cases){
   return true;
 }
 
-void SelectionDAGBuilder::visitBr(BranchInst &I) {
+void SelectionDAGBuilder::visitBr(const BranchInst &I) {
+  MachineBasicBlock *BrMBB = FuncInfo.MBBMap[I.getParent()];
+
   // Update machine-CFG edges.
   MachineBasicBlock *Succ0MBB = FuncInfo.MBBMap[I.getSuccessor(0)];
 
   // Figure out which block is immediately after the current one.
   MachineBasicBlock *NextBlock = 0;
-  MachineFunction::iterator BBI = CurMBB;
+  MachineFunction::iterator BBI = BrMBB;
   if (++BBI != FuncInfo.MF->end())
     NextBlock = BBI;
 
   if (I.isUnconditional()) {
     // Update machine-CFG edges.
-    CurMBB->addSuccessor(Succ0MBB);
+    BrMBB->addSuccessor(Succ0MBB);
 
     // If this is not a fall-through branch, emit the branch.
     if (Succ0MBB != NextBlock)
@@ -1181,7 +1145,7 @@ void SelectionDAGBuilder::visitBr(BranchInst &I) {
 
   // If this condition is one of the special cases we handle, do special stuff
   // now.
-  Value *CondVal = I.getCondition();
+  const Value *CondVal = I.getCondition();
   MachineBasicBlock *Succ1MBB = FuncInfo.MBBMap[I.getSuccessor(1)];
 
   // If this is a series of conditions that are or'd or and'd together, emit
@@ -1199,15 +1163,16 @@ void SelectionDAGBuilder::visitBr(BranchInst &I) {
   //     cmp D, E
   //     jle foo
   //
-  if (BinaryOperator *BOp = dyn_cast<BinaryOperator>(CondVal)) {
+  if (const BinaryOperator *BOp = dyn_cast<BinaryOperator>(CondVal)) {
     if (BOp->hasOneUse() &&
         (BOp->getOpcode() == Instruction::And ||
          BOp->getOpcode() == Instruction::Or)) {
-      FindMergedConditions(BOp, Succ0MBB, Succ1MBB, CurMBB, BOp->getOpcode());
+      FindMergedConditions(BOp, Succ0MBB, Succ1MBB, BrMBB, BrMBB,
+                           BOp->getOpcode());
       // If the compares in later blocks need to use values not currently
       // exported from this block, export them now.  This block should always
       // be the first entry.
-      assert(SwitchCases[0].ThisBB == CurMBB && "Unexpected lowering!");
+      assert(SwitchCases[0].ThisBB == BrMBB && "Unexpected lowering!");
 
       // Allow some cases to be rejected.
       if (ShouldEmitAsBranches(SwitchCases)) {
@@ -1217,7 +1182,7 @@ void SelectionDAGBuilder::visitBr(BranchInst &I) {
         }
 
         // Emit the branch for this block.
-        visitSwitchCase(SwitchCases[0]);
+        visitSwitchCase(SwitchCases[0], BrMBB);
         SwitchCases.erase(SwitchCases.begin());
         return;
       }
@@ -1233,16 +1198,17 @@ void SelectionDAGBuilder::visitBr(BranchInst &I) {
 
   // Create a CaseBlock record representing this branch.
   CaseBlock CB(ISD::SETEQ, CondVal, ConstantInt::getTrue(*DAG.getContext()),
-               NULL, Succ0MBB, Succ1MBB, CurMBB);
+               NULL, Succ0MBB, Succ1MBB, BrMBB);
 
   // Use visitSwitchCase to actually insert the fast branch sequence for this
   // cond branch.
-  visitSwitchCase(CB);
+  visitSwitchCase(CB, BrMBB);
 }
 
 /// visitSwitchCase - Emits the necessary code to represent a single node in
 /// the binary search tree resulting from lowering a switch instruction.
-void SelectionDAGBuilder::visitSwitchCase(CaseBlock &CB) {
+void SelectionDAGBuilder::visitSwitchCase(CaseBlock &CB,
+                                          MachineBasicBlock *SwitchBB) {
   SDValue Cond;
   SDValue CondLHS = getValue(CB.CmpLHS);
   DebugLoc dl = getCurDebugLoc();
@@ -1281,13 +1247,13 @@ void SelectionDAGBuilder::visitSwitchCase(CaseBlock &CB) {
   }
 
   // Update successor info
-  CurMBB->addSuccessor(CB.TrueBB);
-  CurMBB->addSuccessor(CB.FalseBB);
+  SwitchBB->addSuccessor(CB.TrueBB);
+  SwitchBB->addSuccessor(CB.FalseBB);
 
   // Set NextBlock to be the MBB immediately after the current one, if any.
   // This is used to avoid emitting unnecessary branches to the next block.
   MachineBasicBlock *NextBlock = 0;
-  MachineFunction::iterator BBI = CurMBB;
+  MachineFunction::iterator BBI = SwitchBB;
   if (++BBI != FuncInfo.MF->end())
     NextBlock = BBI;
 
@@ -1305,11 +1271,11 @@ void SelectionDAGBuilder::visitSwitchCase(CaseBlock &CB) {
 
   // If the branch was constant folded, fix up the CFG.
   if (BrCond.getOpcode() == ISD::BR) {
-    CurMBB->removeSuccessor(CB.FalseBB);
+    SwitchBB->removeSuccessor(CB.FalseBB);
   } else {
     // Otherwise, go ahead and insert the false branch.
     if (BrCond == getControlRoot())
-      CurMBB->removeSuccessor(CB.TrueBB);
+      SwitchBB->removeSuccessor(CB.TrueBB);
 
     if (CB.FalseBB != NextBlock)
       BrCond = DAG.getNode(ISD::BR, dl, MVT::Other, BrCond,
@@ -1336,7 +1302,8 @@ void SelectionDAGBuilder::visitJumpTable(JumpTable &JT) {
 /// visitJumpTableHeader - This function emits necessary code to produce index
 /// in the JumpTable from switch case.
 void SelectionDAGBuilder::visitJumpTableHeader(JumpTable &JT,
-                                               JumpTableHeader &JTH) {
+                                               JumpTableHeader &JTH,
+                                               MachineBasicBlock *SwitchBB) {
   // Subtract the lowest switch case value from the value being switched on and
   // conditional branch to default mbb if the result is greater than the
   // difference between smallest and largest cases.
@@ -1368,7 +1335,7 @@ void SelectionDAGBuilder::visitJumpTableHeader(JumpTable &JT,
   // Set NextBlock to be the MBB immediately after the current one, if any.
   // This is used to avoid emitting unnecessary branches to the next block.
   MachineBasicBlock *NextBlock = 0;
-  MachineFunction::iterator BBI = CurMBB;
+  MachineFunction::iterator BBI = SwitchBB;
 
   if (++BBI != FuncInfo.MF->end())
     NextBlock = BBI;
@@ -1386,7 +1353,8 @@ void SelectionDAGBuilder::visitJumpTableHeader(JumpTable &JT,
 
 /// visitBitTestHeader - This function emits necessary code to produce value
 /// suitable for "bit tests"
-void SelectionDAGBuilder::visitBitTestHeader(BitTestBlock &B) {
+void SelectionDAGBuilder::visitBitTestHeader(BitTestBlock &B,
+                                             MachineBasicBlock *SwitchBB) {
   // Subtract the minimum value
   SDValue SwitchOp = getValue(B.SValue);
   EVT VT = SwitchOp.getValueType();
@@ -1409,14 +1377,14 @@ void SelectionDAGBuilder::visitBitTestHeader(BitTestBlock &B) {
   // Set NextBlock to be the MBB immediately after the current one, if any.
   // This is used to avoid emitting unnecessary branches to the next block.
   MachineBasicBlock *NextBlock = 0;
-  MachineFunction::iterator BBI = CurMBB;
+  MachineFunction::iterator BBI = SwitchBB;
   if (++BBI != FuncInfo.MF->end())
     NextBlock = BBI;
 
   MachineBasicBlock* MBB = B.Cases[0].ThisBB;
 
-  CurMBB->addSuccessor(B.Default);
-  CurMBB->addSuccessor(MBB);
+  SwitchBB->addSuccessor(B.Default);
+  SwitchBB->addSuccessor(MBB);
 
   SDValue BrRange = DAG.getNode(ISD::BRCOND, getCurDebugLoc(),
                                 MVT::Other, CopyTo, RangeCmp,
@@ -1432,7 +1400,8 @@ void SelectionDAGBuilder::visitBitTestHeader(BitTestBlock &B) {
 /// visitBitTestCase - this function produces one "bit test"
 void SelectionDAGBuilder::visitBitTestCase(MachineBasicBlock* NextMBB,
                                            unsigned Reg,
-                                           BitTestCase &B) {
+                                           BitTestCase &B,
+                                           MachineBasicBlock *SwitchBB) {
   // Make desired shift
   SDValue ShiftOp = DAG.getCopyFromReg(getControlRoot(), getCurDebugLoc(), Reg,
                                        TLI.getPointerTy());
@@ -1450,8 +1419,8 @@ void SelectionDAGBuilder::visitBitTestCase(MachineBasicBlock* NextMBB,
                                 AndOp, DAG.getConstant(0, TLI.getPointerTy()),
                                 ISD::SETNE);
 
-  CurMBB->addSuccessor(B.TargetBB);
-  CurMBB->addSuccessor(NextMBB);
+  SwitchBB->addSuccessor(B.TargetBB);
+  SwitchBB->addSuccessor(NextMBB);
 
   SDValue BrAnd = DAG.getNode(ISD::BRCOND, getCurDebugLoc(),
                               MVT::Other, getControlRoot(),
@@ -1460,7 +1429,7 @@ void SelectionDAGBuilder::visitBitTestCase(MachineBasicBlock* NextMBB,
   // Set NextBlock to be the MBB immediately after the current one, if any.
   // This is used to avoid emitting unnecessary branches to the next block.
   MachineBasicBlock *NextBlock = 0;
-  MachineFunction::iterator BBI = CurMBB;
+  MachineFunction::iterator BBI = SwitchBB;
   if (++BBI != FuncInfo.MF->end())
     NextBlock = BBI;
 
@@ -1471,7 +1440,9 @@ void SelectionDAGBuilder::visitBitTestCase(MachineBasicBlock* NextMBB,
   DAG.setRoot(BrAnd);
 }
 
-void SelectionDAGBuilder::visitInvoke(InvokeInst &I) {
+void SelectionDAGBuilder::visitInvoke(const InvokeInst &I) {
+  MachineBasicBlock *InvokeMBB = FuncInfo.MBBMap[I.getParent()];
+
   // Retrieve successors.
   MachineBasicBlock *Return = FuncInfo.MBBMap[I.getSuccessor(0)];
   MachineBasicBlock *LandingPad = FuncInfo.MBBMap[I.getSuccessor(1)];
@@ -1487,8 +1458,8 @@ void SelectionDAGBuilder::visitInvoke(InvokeInst &I) {
   CopyToExportRegsIfNeeded(&I);
 
   // Update successor info
-  CurMBB->addSuccessor(Return);
-  CurMBB->addSuccessor(LandingPad);
+  InvokeMBB->addSuccessor(Return);
+  InvokeMBB->addSuccessor(LandingPad);
 
   // Drop into normal successor.
   DAG.setRoot(DAG.getNode(ISD::BR, getCurDebugLoc(),
@@ -1496,15 +1467,16 @@ void SelectionDAGBuilder::visitInvoke(InvokeInst &I) {
                           DAG.getBasicBlock(Return)));
 }
 
-void SelectionDAGBuilder::visitUnwind(UnwindInst &I) {
+void SelectionDAGBuilder::visitUnwind(const UnwindInst &I) {
 }
 
 /// handleSmallSwitchCaseRange - Emit a series of specific tests (suitable for
 /// small case ranges).
 bool SelectionDAGBuilder::handleSmallSwitchRange(CaseRec& CR,
                                                  CaseRecVector& WorkList,
-                                                 Value* SV,
-                                                 MachineBasicBlock* Default) {
+                                                 const Value* SV,
+                                                 MachineBasicBlock *Default,
+                                                 MachineBasicBlock *SwitchBB) {
   Case& BackCase  = *(CR.Range.second-1);
 
   // Size is the number of Cases represented by this range.
@@ -1557,7 +1529,7 @@ bool SelectionDAGBuilder::handleSmallSwitchRange(CaseRec& CR,
       FallThrough = Default;
     }
 
-    Value *RHS, *LHS, *MHS;
+    const Value *RHS, *LHS, *MHS;
     ISD::CondCode CC;
     if (I->High == I->Low) {
       // This is just small small case range :) containing exactly 1 case
@@ -1573,8 +1545,8 @@ bool SelectionDAGBuilder::handleSmallSwitchRange(CaseRec& CR,
     // code into the current block.  Otherwise, push the CaseBlock onto the
     // vector to be later processed by SDISel, and insert the node's MBB
     // before the next MBB.
-    if (CurBlock == CurMBB)
-      visitSwitchCase(CB);
+    if (CurBlock == SwitchBB)
+      visitSwitchCase(CB, SwitchBB);
     else
       SwitchCases.push_back(CB);
 
@@ -1600,8 +1572,9 @@ static APInt ComputeRange(const APInt &First, const APInt &Last) {
 /// handleJTSwitchCase - Emit jumptable for current switch case range
 bool SelectionDAGBuilder::handleJTSwitchCase(CaseRec& CR,
                                              CaseRecVector& WorkList,
-                                             Value* SV,
-                                             MachineBasicBlock* Default) {
+                                             const Value* SV,
+                                             MachineBasicBlock* Default,
+                                             MachineBasicBlock *SwitchBB) {
   Case& FrontCase = *CR.Range.first;
   Case& BackCase  = *(CR.Range.second-1);
 
@@ -1613,7 +1586,7 @@ bool SelectionDAGBuilder::handleJTSwitchCase(CaseRec& CR,
        I!=E; ++I)
     TSize += I->size();
 
-  if (!areJTsAllowed(TLI) || TSize.ult(APInt(First.getBitWidth(), 4)))
+  if (!areJTsAllowed(TLI) || TSize.ult(4))
     return false;
 
   APInt Range = ComputeRange(First, Last);
@@ -1682,9 +1655,9 @@ bool SelectionDAGBuilder::handleJTSwitchCase(CaseRec& CR,
   // Set the jump table information so that we can codegen it as a second
   // MachineBasicBlock
   JumpTable JT(-1U, JTI, JumpTableBB, Default);
-  JumpTableHeader JTH(First, Last, SV, CR.CaseBB, (CR.CaseBB == CurMBB));
-  if (CR.CaseBB == CurMBB)
-    visitJumpTableHeader(JT, JTH);
+  JumpTableHeader JTH(First, Last, SV, CR.CaseBB, (CR.CaseBB == SwitchBB));
+  if (CR.CaseBB == SwitchBB)
+    visitJumpTableHeader(JT, JTH, SwitchBB);
 
   JTCases.push_back(JumpTableBlock(JTH, JT));
 
@@ -1695,8 +1668,9 @@ bool SelectionDAGBuilder::handleJTSwitchCase(CaseRec& CR,
 /// 2 subtrees.
 bool SelectionDAGBuilder::handleBTSplitSwitchCase(CaseRec& CR,
                                                   CaseRecVector& WorkList,
-                                                  Value* SV,
-                                                  MachineBasicBlock* Default) {
+                                                  const Value* SV,
+                                                  MachineBasicBlock *Default,
+                                                  MachineBasicBlock *SwitchBB) {
   // Get the MachineFunction which holds the current MBB.  This is used when
   // inserting any additional MBBs necessary to represent the switch.
   MachineFunction *CurMF = FuncInfo.MF;
@@ -1810,8 +1784,8 @@ bool SelectionDAGBuilder::handleBTSplitSwitchCase(CaseRec& CR,
   // Otherwise, branch to LHS.
   CaseBlock CB(ISD::SETLT, SV, C, NULL, TrueBB, FalseBB, CR.CaseBB);
 
-  if (CR.CaseBB == CurMBB)
-    visitSwitchCase(CB);
+  if (CR.CaseBB == SwitchBB)
+    visitSwitchCase(CB, SwitchBB);
   else
     SwitchCases.push_back(CB);
 
@@ -1823,8 +1797,9 @@ bool SelectionDAGBuilder::handleBTSplitSwitchCase(CaseRec& CR,
 /// of masks and emit bit tests with these masks.
 bool SelectionDAGBuilder::handleBitTestsSwitchCase(CaseRec& CR,
                                                    CaseRecVector& WorkList,
-                                                   Value* SV,
-                                                   MachineBasicBlock* Default){
+                                                   const Value* SV,
+                                                   MachineBasicBlock* Default,
+                                                   MachineBasicBlock *SwitchBB){
   EVT PTy = TLI.getPointerTy();
   unsigned IntPtrBits = PTy.getSizeInBits();
 
@@ -1867,7 +1842,7 @@ bool SelectionDAGBuilder::handleBitTestsSwitchCase(CaseRec& CR,
                << "Low bound: " << minValue << '\n'
                << "High bound: " << maxValue << '\n');
 
-  if (cmpRange.uge(APInt(cmpRange.getBitWidth(), IntPtrBits)) ||
+  if (cmpRange.uge(IntPtrBits) ||
       (!(Dests.size() == 1 && numCmps >= 3) &&
        !(Dests.size() == 2 && numCmps >= 5) &&
        !(Dests.size() >= 3 && numCmps >= 6)))
@@ -1879,8 +1854,7 @@ bool SelectionDAGBuilder::handleBitTestsSwitchCase(CaseRec& CR,
   // Optimize the case where all the case values fit in a
   // word without having to subtract minValue. In this case,
   // we can optimize away the subtraction.
-  if (minValue.isNonNegative() &&
-      maxValue.slt(APInt(maxValue.getBitWidth(), IntPtrBits))) {
+  if (minValue.isNonNegative() && maxValue.slt(IntPtrBits)) {
     cmpRange = maxValue;
   } else {
     lowBound = minValue;
@@ -1940,11 +1914,11 @@ bool SelectionDAGBuilder::handleBitTestsSwitchCase(CaseRec& CR,
   }
 
   BitTestBlock BTB(lowBound, cmpRange, SV,
-                   -1U, (CR.CaseBB == CurMBB),
+                   -1U, (CR.CaseBB == SwitchBB),
                    CR.CaseBB, Default, BTC);
 
-  if (CR.CaseBB == CurMBB)
-    visitBitTestHeader(BTB);
+  if (CR.CaseBB == SwitchBB)
+    visitBitTestHeader(BTB, SwitchBB);
 
   BitTestCases.push_back(BTB);
 
@@ -1994,7 +1968,9 @@ size_t SelectionDAGBuilder::Clusterify(CaseVector& Cases,
   return numCmps;
 }
 
-void SelectionDAGBuilder::visitSwitch(SwitchInst &SI) {
+void SelectionDAGBuilder::visitSwitch(const SwitchInst &SI) {
+  MachineBasicBlock *SwitchMBB = FuncInfo.MBBMap[SI.getParent()];
+
   // Figure out which block is immediately after the current one.
   MachineBasicBlock *NextBlock = 0;
   MachineBasicBlock *Default = FuncInfo.MBBMap[SI.getDefaultDest()];
@@ -2005,7 +1981,7 @@ void SelectionDAGBuilder::visitSwitch(SwitchInst &SI) {
     // Update machine-CFG edges.
 
     // If this is not a fall-through branch, emit the branch.
-    CurMBB->addSuccessor(Default);
+    SwitchMBB->addSuccessor(Default);
     if (Default != NextBlock)
       DAG.setRoot(DAG.getNode(ISD::BR, getCurDebugLoc(),
                               MVT::Other, getControlRoot(),
@@ -2026,38 +2002,41 @@ void SelectionDAGBuilder::visitSwitch(SwitchInst &SI) {
   // Get the Value to be switched on and default basic blocks, which will be
   // inserted into CaseBlock records, representing basic blocks in the binary
   // search tree.
-  Value *SV = SI.getOperand(0);
+  const Value *SV = SI.getOperand(0);
 
   // Push the initial CaseRec onto the worklist
   CaseRecVector WorkList;
-  WorkList.push_back(CaseRec(CurMBB,0,0,CaseRange(Cases.begin(),Cases.end())));
+  WorkList.push_back(CaseRec(SwitchMBB,0,0,
+                             CaseRange(Cases.begin(),Cases.end())));
 
   while (!WorkList.empty()) {
     // Grab a record representing a case range to process off the worklist
     CaseRec CR = WorkList.back();
     WorkList.pop_back();
 
-    if (handleBitTestsSwitchCase(CR, WorkList, SV, Default))
+    if (handleBitTestsSwitchCase(CR, WorkList, SV, Default, SwitchMBB))
       continue;
 
     // If the range has few cases (two or less) emit a series of specific
     // tests.
-    if (handleSmallSwitchRange(CR, WorkList, SV, Default))
+    if (handleSmallSwitchRange(CR, WorkList, SV, Default, SwitchMBB))
       continue;
 
     // If the switch has more than 5 blocks, and at least 40% dense, and the
     // target supports indirect branches, then emit a jump table rather than
     // lowering the switch to a binary tree of conditional branches.
-    if (handleJTSwitchCase(CR, WorkList, SV, Default))
+    if (handleJTSwitchCase(CR, WorkList, SV, Default, SwitchMBB))
       continue;
 
     // Emit binary tree. We need to pick a pivot, and push left and right ranges
     // onto the worklist. Leafs are handled via handleSmallSwitchRange() call.
-    handleBTSplitSwitchCase(CR, WorkList, SV, Default);
+    handleBTSplitSwitchCase(CR, WorkList, SV, Default, SwitchMBB);
   }
 }
 
-void SelectionDAGBuilder::visitIndirectBr(IndirectBrInst &I) {
+void SelectionDAGBuilder::visitIndirectBr(const IndirectBrInst &I) {
+  MachineBasicBlock *IndirectBrMBB = FuncInfo.MBBMap[I.getParent()];
+
   // Update machine-CFG edges with unique successors.
   SmallVector<BasicBlock*, 32> succs;
   succs.reserve(I.getNumSuccessors());
@@ -2066,14 +2045,14 @@ void SelectionDAGBuilder::visitIndirectBr(IndirectBrInst &I) {
   array_pod_sort(succs.begin(), succs.end());
   succs.erase(std::unique(succs.begin(), succs.end()), succs.end());
   for (unsigned i = 0, e = succs.size(); i != e; ++i)
-    CurMBB->addSuccessor(FuncInfo.MBBMap[succs[i]]);
+    IndirectBrMBB->addSuccessor(FuncInfo.MBBMap[succs[i]]);
 
   DAG.setRoot(DAG.getNode(ISD::BRIND, getCurDebugLoc(),
                           MVT::Other, getControlRoot(),
                           getValue(I.getAddress())));
 }
 
-void SelectionDAGBuilder::visitFSub(User &I) {
+void SelectionDAGBuilder::visitFSub(const User &I) {
   // -0.0 - X --> fneg
   const Type *Ty = I.getType();
   if (Ty->isVectorTy()) {
@@ -2103,14 +2082,14 @@ void SelectionDAGBuilder::visitFSub(User &I) {
   visitBinary(I, ISD::FSUB);
 }
 
-void SelectionDAGBuilder::visitBinary(User &I, unsigned OpCode) {
+void SelectionDAGBuilder::visitBinary(const User &I, unsigned OpCode) {
   SDValue Op1 = getValue(I.getOperand(0));
   SDValue Op2 = getValue(I.getOperand(1));
   setValue(&I, DAG.getNode(OpCode, getCurDebugLoc(),
                            Op1.getValueType(), Op1, Op2));
 }
 
-void SelectionDAGBuilder::visitShift(User &I, unsigned Opcode) {
+void SelectionDAGBuilder::visitShift(const User &I, unsigned Opcode) {
   SDValue Op1 = getValue(I.getOperand(0));
   SDValue Op2 = getValue(I.getOperand(1));
   if (!I.getType()->isVectorTy() &&
@@ -2144,11 +2123,11 @@ void SelectionDAGBuilder::visitShift(User &I, unsigned Opcode) {
                            Op1.getValueType(), Op1, Op2));
 }
 
-void SelectionDAGBuilder::visitICmp(User &I) {
+void SelectionDAGBuilder::visitICmp(const User &I) {
   ICmpInst::Predicate predicate = ICmpInst::BAD_ICMP_PREDICATE;
-  if (ICmpInst *IC = dyn_cast<ICmpInst>(&I))
+  if (const ICmpInst *IC = dyn_cast<ICmpInst>(&I))
     predicate = IC->getPredicate();
-  else if (ConstantExpr *IC = dyn_cast<ConstantExpr>(&I))
+  else if (const ConstantExpr *IC = dyn_cast<ConstantExpr>(&I))
     predicate = ICmpInst::Predicate(IC->getPredicate());
   SDValue Op1 = getValue(I.getOperand(0));
   SDValue Op2 = getValue(I.getOperand(1));
@@ -2158,11 +2137,11 @@ void SelectionDAGBuilder::visitICmp(User &I) {
   setValue(&I, DAG.getSetCC(getCurDebugLoc(), DestVT, Op1, Op2, Opcode));
 }
 
-void SelectionDAGBuilder::visitFCmp(User &I) {
+void SelectionDAGBuilder::visitFCmp(const User &I) {
   FCmpInst::Predicate predicate = FCmpInst::BAD_FCMP_PREDICATE;
-  if (FCmpInst *FC = dyn_cast<FCmpInst>(&I))
+  if (const FCmpInst *FC = dyn_cast<FCmpInst>(&I))
     predicate = FC->getPredicate();
-  else if (ConstantExpr *FC = dyn_cast<ConstantExpr>(&I))
+  else if (const ConstantExpr *FC = dyn_cast<ConstantExpr>(&I))
     predicate = FCmpInst::Predicate(FC->getPredicate());
   SDValue Op1 = getValue(I.getOperand(0));
   SDValue Op2 = getValue(I.getOperand(1));
@@ -2171,7 +2150,7 @@ void SelectionDAGBuilder::visitFCmp(User &I) {
   setValue(&I, DAG.getSetCC(getCurDebugLoc(), DestVT, Op1, Op2, Condition));
 }
 
-void SelectionDAGBuilder::visitSelect(User &I) {
+void SelectionDAGBuilder::visitSelect(const User &I) {
   SmallVector<EVT, 4> ValueVTs;
   ComputeValueVTs(TLI, I.getType(), ValueVTs);
   unsigned NumValues = ValueVTs.size();
@@ -2196,14 +2175,14 @@ void SelectionDAGBuilder::visitSelect(User &I) {
                            &Values[0], NumValues));
 }
 
-void SelectionDAGBuilder::visitTrunc(User &I) {
+void SelectionDAGBuilder::visitTrunc(const User &I) {
   // TruncInst cannot be a no-op cast because sizeof(src) > sizeof(dest).
   SDValue N = getValue(I.getOperand(0));
   EVT DestVT = TLI.getValueType(I.getType());
   setValue(&I, DAG.getNode(ISD::TRUNCATE, getCurDebugLoc(), DestVT, N));
 }
 
-void SelectionDAGBuilder::visitZExt(User &I) {
+void SelectionDAGBuilder::visitZExt(const User &I) {
   // ZExt cannot be a no-op cast because sizeof(src) < sizeof(dest).
   // ZExt also can't be a cast to bool for same reason. So, nothing much to do
   SDValue N = getValue(I.getOperand(0));
@@ -2211,7 +2190,7 @@ void SelectionDAGBuilder::visitZExt(User &I) {
   setValue(&I, DAG.getNode(ISD::ZERO_EXTEND, getCurDebugLoc(), DestVT, N));
 }
 
-void SelectionDAGBuilder::visitSExt(User &I) {
+void SelectionDAGBuilder::visitSExt(const User &I) {
   // SExt cannot be a no-op cast because sizeof(src) < sizeof(dest).
   // SExt also can't be a cast to bool for same reason. So, nothing much to do
   SDValue N = getValue(I.getOperand(0));
@@ -2219,7 +2198,7 @@ void SelectionDAGBuilder::visitSExt(User &I) {
   setValue(&I, DAG.getNode(ISD::SIGN_EXTEND, getCurDebugLoc(), DestVT, N));
 }
 
-void SelectionDAGBuilder::visitFPTrunc(User &I) {
+void SelectionDAGBuilder::visitFPTrunc(const User &I) {
   // FPTrunc is never a no-op cast, no need to check
   SDValue N = getValue(I.getOperand(0));
   EVT DestVT = TLI.getValueType(I.getType());
@@ -2227,42 +2206,42 @@ void SelectionDAGBuilder::visitFPTrunc(User &I) {
                            DestVT, N, DAG.getIntPtrConstant(0)));
 }
 
-void SelectionDAGBuilder::visitFPExt(User &I){
+void SelectionDAGBuilder::visitFPExt(const User &I){
   // FPTrunc is never a no-op cast, no need to check
   SDValue N = getValue(I.getOperand(0));
   EVT DestVT = TLI.getValueType(I.getType());
   setValue(&I, DAG.getNode(ISD::FP_EXTEND, getCurDebugLoc(), DestVT, N));
 }
 
-void SelectionDAGBuilder::visitFPToUI(User &I) {
+void SelectionDAGBuilder::visitFPToUI(const User &I) {
   // FPToUI is never a no-op cast, no need to check
   SDValue N = getValue(I.getOperand(0));
   EVT DestVT = TLI.getValueType(I.getType());
   setValue(&I, DAG.getNode(ISD::FP_TO_UINT, getCurDebugLoc(), DestVT, N));
 }
 
-void SelectionDAGBuilder::visitFPToSI(User &I) {
+void SelectionDAGBuilder::visitFPToSI(const User &I) {
   // FPToSI is never a no-op cast, no need to check
   SDValue N = getValue(I.getOperand(0));
   EVT DestVT = TLI.getValueType(I.getType());
   setValue(&I, DAG.getNode(ISD::FP_TO_SINT, getCurDebugLoc(), DestVT, N));
 }
 
-void SelectionDAGBuilder::visitUIToFP(User &I) {
+void SelectionDAGBuilder::visitUIToFP(const User &I) {
   // UIToFP is never a no-op cast, no need to check
   SDValue N = getValue(I.getOperand(0));
   EVT DestVT = TLI.getValueType(I.getType());
   setValue(&I, DAG.getNode(ISD::UINT_TO_FP, getCurDebugLoc(), DestVT, N));
 }
 
-void SelectionDAGBuilder::visitSIToFP(User &I){
+void SelectionDAGBuilder::visitSIToFP(const User &I){
   // SIToFP is never a no-op cast, no need to check
   SDValue N = getValue(I.getOperand(0));
   EVT DestVT = TLI.getValueType(I.getType());
   setValue(&I, DAG.getNode(ISD::SINT_TO_FP, getCurDebugLoc(), DestVT, N));
 }
 
-void SelectionDAGBuilder::visitPtrToInt(User &I) {
+void SelectionDAGBuilder::visitPtrToInt(const User &I) {
   // What to do depends on the size of the integer and the size of the pointer.
   // We can either truncate, zero extend, or no-op, accordingly.
   SDValue N = getValue(I.getOperand(0));
@@ -2271,7 +2250,7 @@ void SelectionDAGBuilder::visitPtrToInt(User &I) {
   setValue(&I, DAG.getZExtOrTrunc(N, getCurDebugLoc(), DestVT));
 }
 
-void SelectionDAGBuilder::visitIntToPtr(User &I) {
+void SelectionDAGBuilder::visitIntToPtr(const User &I) {
   // What to do depends on the size of the integer and the size of the pointer.
   // We can either truncate, zero extend, or no-op, accordingly.
   SDValue N = getValue(I.getOperand(0));
@@ -2280,7 +2259,7 @@ void SelectionDAGBuilder::visitIntToPtr(User &I) {
   setValue(&I, DAG.getZExtOrTrunc(N, getCurDebugLoc(), DestVT));
 }
 
-void SelectionDAGBuilder::visitBitCast(User &I) {
+void SelectionDAGBuilder::visitBitCast(const User &I) {
   SDValue N = getValue(I.getOperand(0));
   EVT DestVT = TLI.getValueType(I.getType());
 
@@ -2293,7 +2272,7 @@ void SelectionDAGBuilder::visitBitCast(User &I) {
     setValue(&I, N);            // noop cast.
 }
 
-void SelectionDAGBuilder::visitInsertElement(User &I) {
+void SelectionDAGBuilder::visitInsertElement(const User &I) {
   SDValue InVec = getValue(I.getOperand(0));
   SDValue InVal = getValue(I.getOperand(1));
   SDValue InIdx = DAG.getNode(ISD::ZERO_EXTEND, getCurDebugLoc(),
@@ -2304,7 +2283,7 @@ void SelectionDAGBuilder::visitInsertElement(User &I) {
                            InVec, InVal, InIdx));
 }
 
-void SelectionDAGBuilder::visitExtractElement(User &I) {
+void SelectionDAGBuilder::visitExtractElement(const User &I) {
   SDValue InVec = getValue(I.getOperand(0));
   SDValue InIdx = DAG.getNode(ISD::ZERO_EXTEND, getCurDebugLoc(),
                               TLI.getPointerTy(),
@@ -2323,7 +2302,7 @@ static bool SequentialMask(SmallVectorImpl<int> &Mask, unsigned SIndx) {
   return true;
 }
 
-void SelectionDAGBuilder::visitShuffleVector(User &I) {
+void SelectionDAGBuilder::visitShuffleVector(const User &I) {
   SmallVector<int, 8> Mask;
   SDValue Src1 = getValue(I.getOperand(0));
   SDValue Src2 = getValue(I.getOperand(1));
@@ -2504,7 +2483,7 @@ void SelectionDAGBuilder::visitShuffleVector(User &I) {
                            VT, &Ops[0], Ops.size()));
 }
 
-void SelectionDAGBuilder::visitInsertValue(InsertValueInst &I) {
+void SelectionDAGBuilder::visitInsertValue(const InsertValueInst &I) {
   const Value *Op0 = I.getOperand(0);
   const Value *Op1 = I.getOperand(1);
   const Type *AggTy = I.getType();
@@ -2545,7 +2524,7 @@ void SelectionDAGBuilder::visitInsertValue(InsertValueInst &I) {
                            &Values[0], NumAggValues));
 }
 
-void SelectionDAGBuilder::visitExtractValue(ExtractValueInst &I) {
+void SelectionDAGBuilder::visitExtractValue(const ExtractValueInst &I) {
   const Value *Op0 = I.getOperand(0);
   const Type *AggTy = Op0->getType();
   const Type *ValTy = I.getType();
@@ -2573,13 +2552,13 @@ void SelectionDAGBuilder::visitExtractValue(ExtractValueInst &I) {
                            &Values[0], NumValValues));
 }
 
-void SelectionDAGBuilder::visitGetElementPtr(User &I) {
+void SelectionDAGBuilder::visitGetElementPtr(const User &I) {
   SDValue N = getValue(I.getOperand(0));
   const Type *Ty = I.getOperand(0)->getType();
 
-  for (GetElementPtrInst::op_iterator OI = I.op_begin()+1, E = I.op_end();
+  for (GetElementPtrInst::const_op_iterator OI = I.op_begin()+1, E = I.op_end();
        OI != E; ++OI) {
-    Value *Idx = *OI;
+    const Value *Idx = *OI;
     if (const StructType *StTy = dyn_cast<StructType>(Ty)) {
       unsigned Field = cast<ConstantInt>(Idx)->getZExtValue();
       if (Field) {
@@ -2599,7 +2578,7 @@ void SelectionDAGBuilder::visitGetElementPtr(User &I) {
       Ty = cast<SequentialType>(Ty)->getElementType();
 
       // If this is a constant subscript, handle it quickly.
-      if (ConstantInt *CI = dyn_cast<ConstantInt>(Idx)) {
+      if (const ConstantInt *CI = dyn_cast<ConstantInt>(Idx)) {
         if (CI->getZExtValue() == 0) continue;
         uint64_t Offs =
             TD->getTypeAllocSize(Ty)*cast<ConstantInt>(CI)->getSExtValue();
@@ -2650,7 +2629,7 @@ void SelectionDAGBuilder::visitGetElementPtr(User &I) {
   setValue(&I, N);
 }
 
-void SelectionDAGBuilder::visitAlloca(AllocaInst &I) {
+void SelectionDAGBuilder::visitAlloca(const AllocaInst &I) {
   // If this is a fixed sized alloca in the entry block of the function,
   // allocate it statically on the stack.
   if (FuncInfo.StaticAllocaMap.count(&I))
@@ -2674,8 +2653,7 @@ void SelectionDAGBuilder::visitAlloca(AllocaInst &I) {
   // Handle alignment.  If the requested alignment is less than or equal to
   // the stack alignment, ignore it.  If the size is greater than or equal to
   // the stack alignment, we note this in the DYNAMIC_STACKALLOC node.
-  unsigned StackAlign =
-    TLI.getTargetMachine().getFrameInfo()->getStackAlignment();
+  unsigned StackAlign = TM.getFrameInfo()->getStackAlignment();
   if (Align <= StackAlign)
     Align = 0;
 
@@ -2702,7 +2680,7 @@ void SelectionDAGBuilder::visitAlloca(AllocaInst &I) {
   FuncInfo.MF->getFrameInfo()->CreateVariableSizedObject();
 }
 
-void SelectionDAGBuilder::visitLoad(LoadInst &I) {
+void SelectionDAGBuilder::visitLoad(const LoadInst &I) {
   const Value *SV = I.getOperand(0);
   SDValue Ptr = getValue(SV);
 
@@ -2762,9 +2740,9 @@ void SelectionDAGBuilder::visitLoad(LoadInst &I) {
                            &Values[0], NumValues));
 }
 
-void SelectionDAGBuilder::visitStore(StoreInst &I) {
-  Value *SrcV = I.getOperand(0);
-  Value *PtrV = I.getOperand(1);
+void SelectionDAGBuilder::visitStore(const StoreInst &I) {
+  const Value *SrcV = I.getOperand(0);
+  const Value *PtrV = I.getOperand(1);
 
   SmallVector<EVT, 4> ValueVTs;
   SmallVector<uint64_t, 4> Offsets;
@@ -2801,7 +2779,7 @@ void SelectionDAGBuilder::visitStore(StoreInst &I) {
 
 /// visitTargetIntrinsic - Lower a call of a target intrinsic to an INTRINSIC
 /// node.
-void SelectionDAGBuilder::visitTargetIntrinsic(CallInst &I,
+void SelectionDAGBuilder::visitTargetIntrinsic(const CallInst &I,
                                                unsigned Intrinsic) {
   bool HasChain = !I.doesNotAccessMemory();
   bool OnlyLoad = HasChain && I.onlyReadsMemory();
@@ -2927,7 +2905,8 @@ getF32Constant(SelectionDAG &DAG, unsigned Flt) {
 /// visitIntrinsicCall: I is a call instruction
 ///                     Op is the associated NodeType for I
 const char *
-SelectionDAGBuilder::implVisitBinaryAtomic(CallInst& I, ISD::NodeType Op) {
+SelectionDAGBuilder::implVisitBinaryAtomic(const CallInst& I,
+                                           ISD::NodeType Op) {
   SDValue Root = getRoot();
   SDValue L =
     DAG.getAtomic(Op, getCurDebugLoc(),
@@ -2943,7 +2922,7 @@ SelectionDAGBuilder::implVisitBinaryAtomic(CallInst& I, ISD::NodeType Op) {
 
 // implVisitAluOverflow - Lower arithmetic overflow instrinsics.
 const char *
-SelectionDAGBuilder::implVisitAluOverflow(CallInst &I, ISD::NodeType Op) {
+SelectionDAGBuilder::implVisitAluOverflow(const CallInst &I, ISD::NodeType Op) {
   SDValue Op1 = getValue(I.getOperand(1));
   SDValue Op2 = getValue(I.getOperand(2));
 
@@ -2955,7 +2934,7 @@ SelectionDAGBuilder::implVisitAluOverflow(CallInst &I, ISD::NodeType Op) {
 /// visitExp - Lower an exp intrinsic. Handles the special sequences for
 /// limited-precision mode.
 void
-SelectionDAGBuilder::visitExp(CallInst &I) {
+SelectionDAGBuilder::visitExp(const CallInst &I) {
   SDValue result;
   DebugLoc dl = getCurDebugLoc();
 
@@ -3081,7 +3060,7 @@ SelectionDAGBuilder::visitExp(CallInst &I) {
 /// visitLog - Lower a log intrinsic. Handles the special sequences for
 /// limited-precision mode.
 void
-SelectionDAGBuilder::visitLog(CallInst &I) {
+SelectionDAGBuilder::visitLog(const CallInst &I) {
   SDValue result;
   DebugLoc dl = getCurDebugLoc();
 
@@ -3191,7 +3170,7 @@ SelectionDAGBuilder::visitLog(CallInst &I) {
 /// visitLog2 - Lower a log2 intrinsic. Handles the special sequences for
 /// limited-precision mode.
 void
-SelectionDAGBuilder::visitLog2(CallInst &I) {
+SelectionDAGBuilder::visitLog2(const CallInst &I) {
   SDValue result;
   DebugLoc dl = getCurDebugLoc();
 
@@ -3300,7 +3279,7 @@ SelectionDAGBuilder::visitLog2(CallInst &I) {
 /// visitLog10 - Lower a log10 intrinsic. Handles the special sequences for
 /// limited-precision mode.
 void
-SelectionDAGBuilder::visitLog10(CallInst &I) {
+SelectionDAGBuilder::visitLog10(const CallInst &I) {
   SDValue result;
   DebugLoc dl = getCurDebugLoc();
 
@@ -3402,7 +3381,7 @@ SelectionDAGBuilder::visitLog10(CallInst &I) {
 /// visitExp2 - Lower an exp2 intrinsic. Handles the special sequences for
 /// limited-precision mode.
 void
-SelectionDAGBuilder::visitExp2(CallInst &I) {
+SelectionDAGBuilder::visitExp2(const CallInst &I) {
   SDValue result;
   DebugLoc dl = getCurDebugLoc();
 
@@ -3516,9 +3495,9 @@ SelectionDAGBuilder::visitExp2(CallInst &I) {
 /// visitPow - Lower a pow intrinsic. Handles the special sequences for
 /// limited-precision mode with x == 10.0f.
 void
-SelectionDAGBuilder::visitPow(CallInst &I) {
+SelectionDAGBuilder::visitPow(const CallInst &I) {
   SDValue result;
-  Value *Val = I.getOperand(1);
+  const Value *Val = I.getOperand(1);
   DebugLoc dl = getCurDebugLoc();
   bool IsExp10 = false;
 
@@ -3664,7 +3643,7 @@ static SDValue ExpandPowI(DebugLoc DL, SDValue LHS, SDValue RHS,
     if (Val == 0)
       return DAG.getConstantFP(1.0, LHS.getValueType());
 
-    Function *F = DAG.getMachineFunction().getFunction();
+    const Function *F = DAG.getMachineFunction().getFunction();
     if (!F->hasFnAttr(Attribute::OptimizeForSize) ||
         // If optimizing for size, don't insert too many multiplies.  This
         // inserts up to 5 multiplies.
@@ -3700,12 +3679,58 @@ static SDValue ExpandPowI(DebugLoc DL, SDValue LHS, SDValue RHS,
   return DAG.getNode(ISD::FPOWI, DL, LHS.getValueType(), LHS, RHS);
 }
 
+/// EmitFuncArgumentDbgValue - If the DbgValueInst is a dbg_value of a function
+/// argument, create the corresponding DBG_VALUE machine instruction for it now.
+/// At the end of instruction selection, they will be inserted to the entry BB.
+bool
+SelectionDAGBuilder::EmitFuncArgumentDbgValue(const DbgValueInst &DI,
+                                              const Value *V, MDNode *Variable,
+                                              uint64_t Offset,
+                                              const SDValue &N) {
+  if (!isa<Argument>(V))
+    return false;
+
+  MachineFunction &MF = DAG.getMachineFunction();
+  // Ignore inlined function arguments here.
+  DIVariable DV(Variable);
+  if (DV.isInlinedFnArgument(MF.getFunction()))
+    return false;
+
+  MachineBasicBlock *MBB = FuncInfo.MBBMap[DI.getParent()];
+  if (MBB != &MF.front())
+    return false;
+
+  unsigned Reg = 0;
+  if (N.getOpcode() == ISD::CopyFromReg) {
+    Reg = cast<RegisterSDNode>(N.getOperand(1))->getReg();
+    if (Reg && TargetRegisterInfo::isVirtualRegister(Reg)) {
+      MachineRegisterInfo &RegInfo = MF.getRegInfo();
+      unsigned PR = RegInfo.getLiveInPhysReg(Reg);
+      if (PR)
+        Reg = PR;
+    }
+  }
+
+  if (!Reg) {
+    DenseMap<const Value *, unsigned>::iterator VMI = FuncInfo.ValueMap.find(V);
+    if (VMI == FuncInfo.ValueMap.end())
+      return false;
+    Reg = VMI->second;
+  }
+
+  const TargetInstrInfo *TII = DAG.getTarget().getInstrInfo();
+  MachineInstrBuilder MIB = BuildMI(MF, getCurDebugLoc(),
+                                    TII->get(TargetOpcode::DBG_VALUE))
+    .addReg(Reg, RegState::Debug).addImm(Offset).addMetadata(Variable);
+  FuncInfo.ArgDbgValues.push_back(&*MIB);
+  return true;
+}
 
 /// visitIntrinsicCall - Lower the call to the specified intrinsic function.  If
 /// we want to emit this as a call to a named external function, return the name
 /// otherwise lower it and return null.
 const char *
-SelectionDAGBuilder::visitIntrinsicCall(CallInst &I, unsigned Intrinsic) {
+SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
   DebugLoc dl = getCurDebugLoc();
   SDValue Res;
 
@@ -3792,44 +3817,76 @@ SelectionDAGBuilder::visitIntrinsicCall(CallInst &I, unsigned Intrinsic) {
     return 0;
   }
   case Intrinsic::dbg_declare: {
-    // FIXME: currently, we get here only if OptLevel != CodeGenOpt::None.
-    // The real handling of this intrinsic is in FastISel.
-    if (OptLevel != CodeGenOpt::None)
-      // FIXME: Variable debug info is not supported here.
-      return 0;
-    DbgDeclareInst &DI = cast<DbgDeclareInst>(I);
+    const DbgDeclareInst &DI = cast<DbgDeclareInst>(I);
     if (!DIDescriptor::ValidDebugInfo(DI.getVariable(), CodeGenOpt::None))
       return 0;
 
     MDNode *Variable = DI.getVariable();
-    Value *Address = DI.getAddress();
+    // Parameters are handled specially.
+    bool isParameter = 
+      DIVariable(Variable).getTag() == dwarf::DW_TAG_arg_variable;
+    const Value *Address = DI.getAddress();
     if (!Address)
       return 0;
-    if (BitCastInst *BCI = dyn_cast<BitCastInst>(Address))
+    if (const BitCastInst *BCI = dyn_cast<BitCastInst>(Address))
       Address = BCI->getOperand(0);
-    AllocaInst *AI = dyn_cast<AllocaInst>(Address);
-    // Don't handle byval struct arguments or VLAs, for example.
-    if (!AI)
-      return 0;
-    DenseMap<const AllocaInst*, int>::iterator SI =
-      FuncInfo.StaticAllocaMap.find(AI);
-    if (SI == FuncInfo.StaticAllocaMap.end())
-      return 0; // VLAs.
-    int FI = SI->second;
+    const AllocaInst *AI = dyn_cast<AllocaInst>(Address);
+    if (AI) {
+      // Don't handle byval arguments or VLAs, for example.
+      // Non-byval arguments are handled here (they refer to the stack temporary
+      // alloca at this point).
+      DenseMap<const AllocaInst*, int>::iterator SI =
+        FuncInfo.StaticAllocaMap.find(AI);
+      if (SI == FuncInfo.StaticAllocaMap.end())
+        return 0; // VLAs.
+      int FI = SI->second;
+
+      MachineModuleInfo &MMI = DAG.getMachineFunction().getMMI();
+      if (!DI.getDebugLoc().isUnknown() && MMI.hasDebugInfo())
+        MMI.setVariableDbgInfo(Variable, FI, DI.getDebugLoc());
+    }
 
-    MachineModuleInfo &MMI = DAG.getMachineFunction().getMMI();
-    if (!DI.getDebugLoc().isUnknown() && MMI.hasDebugInfo())
-      MMI.setVariableDbgInfo(Variable, FI, DI.getDebugLoc());
+    // Build an entry in DbgOrdering.  Debug info input nodes get an SDNodeOrder
+    // but do not always have a corresponding SDNode built.  The SDNodeOrder
+    // absolute, but not relative, values are different depending on whether
+    // debug info exists.
+    ++SDNodeOrder;
+    SDValue &N = NodeMap[Address];
+    SDDbgValue *SDV;
+    if (N.getNode()) {
+      if (isParameter && !AI) {
+        FrameIndexSDNode *FINode = dyn_cast<FrameIndexSDNode>(N.getNode());
+        if (FINode)
+          // Byval parameter.  We have a frame index at this point.
+          SDV = DAG.getDbgValue(Variable, FINode->getIndex(),
+                                0, dl, SDNodeOrder);
+        else
+          // Can't do anything with other non-AI cases yet.  This might be a
+          // parameter of a callee function that got inlined, for example.
+          return 0;
+      } else if (AI)
+        SDV = DAG.getDbgValue(Variable, N.getNode(), N.getResNo(),
+                              0, dl, SDNodeOrder);
+      else
+        // Can't do anything with other non-AI cases yet.
+        return 0;
+      DAG.AddDbgValue(SDV, N.getNode(), isParameter);
+    } else {
+      // This isn't useful, but it shows what we're missing.
+      SDV = DAG.getDbgValue(Variable, UndefValue::get(Address->getType()),
+                            0, dl, SDNodeOrder);
+      DAG.AddDbgValue(SDV, 0, isParameter);
+    }
     return 0;
   }
   case Intrinsic::dbg_value: {
-    DbgValueInst &DI = cast<DbgValueInst>(I);
+    const DbgValueInst &DI = cast<DbgValueInst>(I);
     if (!DIDescriptor::ValidDebugInfo(DI.getVariable(), CodeGenOpt::None))
       return 0;
 
     MDNode *Variable = DI.getVariable();
     uint64_t Offset = DI.getOffset();
-    Value *V = DI.getValue();
+    const Value *V = DI.getValue();
     if (!V)
       return 0;
 
@@ -3838,26 +3895,31 @@ SelectionDAGBuilder::visitIntrinsicCall(CallInst &I, unsigned Intrinsic) {
     // absolute, but not relative, values are different depending on whether
     // debug info exists.
     ++SDNodeOrder;
+    SDDbgValue *SDV;
     if (isa<ConstantInt>(V) || isa<ConstantFP>(V)) {
-      DAG.AddDbgValue(DAG.getDbgValue(Variable, V, Offset, dl, SDNodeOrder));
+      SDV = DAG.getDbgValue(Variable, V, Offset, dl, SDNodeOrder);
+      DAG.AddDbgValue(SDV, 0, false);
     } else {
       SDValue &N = NodeMap[V];
-      if (N.getNode())
-        DAG.AddDbgValue(DAG.getDbgValue(Variable, N.getNode(),
-                                        N.getResNo(), Offset, dl, SDNodeOrder),
-                        N.getNode());
-      else
+      if (N.getNode()) {
+        if (!EmitFuncArgumentDbgValue(DI, V, Variable, Offset, N)) {
+          SDV = DAG.getDbgValue(Variable, N.getNode(),
+                                N.getResNo(), Offset, dl, SDNodeOrder);
+          DAG.AddDbgValue(SDV, N.getNode(), false);
+        }
+      } else {
         // We may expand this to cover more cases.  One case where we have no
         // data available is an unreferenced parameter; we need this fallback.
-        DAG.AddDbgValue(DAG.getDbgValue(Variable, 
-                                        UndefValue::get(V->getType()),
-                                        Offset, dl, SDNodeOrder));
+        SDV = DAG.getDbgValue(Variable, UndefValue::get(V->getType()),
+                              Offset, dl, SDNodeOrder);
+        DAG.AddDbgValue(SDV, 0, false);
+      }
     }
 
     // Build a debug info table entry.
-    if (BitCastInst *BCI = dyn_cast<BitCastInst>(V))
+    if (const BitCastInst *BCI = dyn_cast<BitCastInst>(V))
       V = BCI->getOperand(0);
-    AllocaInst *AI = dyn_cast<AllocaInst>(V);
+    const AllocaInst *AI = dyn_cast<AllocaInst>(V);
     // Don't handle byval struct arguments or VLAs, for example.
     if (!AI)
       return 0;
@@ -3874,7 +3936,8 @@ SelectionDAGBuilder::visitIntrinsicCall(CallInst &I, unsigned Intrinsic) {
   }
   case Intrinsic::eh_exception: {
     // Insert the EXCEPTIONADDR instruction.
-    assert(CurMBB->isLandingPad() &&"Call to eh.exception not in landing pad!");
+    assert(FuncInfo.MBBMap[I.getParent()]->isLandingPad() &&
+           "Call to eh.exception not in landing pad!");
     SDVTList VTs = DAG.getVTList(TLI.getPointerTy(), MVT::Other);
     SDValue Ops[1];
     Ops[0] = DAG.getRoot();
@@ -3885,16 +3948,17 @@ SelectionDAGBuilder::visitIntrinsicCall(CallInst &I, unsigned Intrinsic) {
   }
 
   case Intrinsic::eh_selector: {
+    MachineBasicBlock *CallMBB = FuncInfo.MBBMap[I.getParent()];
     MachineModuleInfo &MMI = DAG.getMachineFunction().getMMI();
-    if (CurMBB->isLandingPad())
-      AddCatchInfo(I, &MMI, CurMBB);
+    if (CallMBB->isLandingPad())
+      AddCatchInfo(I, &MMI, CallMBB);
     else {
 #ifndef NDEBUG
       FuncInfo.CatchInfoLost.insert(&I);
 #endif
       // FIXME: Mark exception selector register as live in.  Hack for PR1508.
       unsigned Reg = TLI.getExceptionSelectorRegister();
-      if (Reg) CurMBB->addLiveIn(Reg);
+      if (Reg) FuncInfo.MBBMap[I.getParent()]->addLiveIn(Reg);
     }
 
     // Insert the EHSELECTION instruction.
@@ -3977,7 +4041,7 @@ SelectionDAGBuilder::visitIntrinsicCall(CallInst &I, unsigned Intrinsic) {
     case Intrinsic::convertuu:  Code = ISD::CVT_UU; break;
     }
     EVT DestVT = TLI.getValueType(I.getType());
-    Value *Op1 = I.getOperand(1);
+    const Value *Op1 = I.getOperand(1);
     Res = DAG.getConvertRndSat(DestVT, getCurDebugLoc(), getValue(Op1),
                                DAG.getValueType(DestVT),
                                DAG.getValueType(getValue(Op1).getValueType()),
@@ -4146,8 +4210,8 @@ SelectionDAGBuilder::visitIntrinsicCall(CallInst &I, unsigned Intrinsic) {
   }
   case Intrinsic::gcroot:
     if (GFI) {
-      Value *Alloca = I.getOperand(1);
-      Constant *TypeMap = cast<Constant>(I.getOperand(2));
+      const Value *Alloca = I.getOperand(1);
+      const Constant *TypeMap = cast<Constant>(I.getOperand(2));
 
       FrameIndexSDNode *FI = cast<FrameIndexSDNode>(getValue(Alloca).getNode());
       GFI->addStackRoot(FI->getIndex(), TypeMap);
@@ -4244,93 +4308,7 @@ SelectionDAGBuilder::visitIntrinsicCall(CallInst &I, unsigned Intrinsic) {
   }
 }
 
-/// Test if the given instruction is in a position to be optimized
-/// with a tail-call. This roughly means that it's in a block with
-/// a return and there's nothing that needs to be scheduled
-/// between it and the return.
-///
-/// This function only tests target-independent requirements.
-static bool
-isInTailCallPosition(CallSite CS, Attributes CalleeRetAttr,
-                     const TargetLowering &TLI) {
-  const Instruction *I = CS.getInstruction();
-  const BasicBlock *ExitBB = I->getParent();
-  const TerminatorInst *Term = ExitBB->getTerminator();
-  const ReturnInst *Ret = dyn_cast<ReturnInst>(Term);
-  const Function *F = ExitBB->getParent();
-
-  // The block must end in a return statement or unreachable.
-  //
-  // FIXME: Decline tailcall if it's not guaranteed and if the block ends in
-  // an unreachable, for now. The way tailcall optimization is currently
-  // implemented means it will add an epilogue followed by a jump. That is
-  // not profitable. Also, if the callee is a special function (e.g.
-  // longjmp on x86), it can end up causing miscompilation that has not
-  // been fully understood.
-  if (!Ret &&
-      (!GuaranteedTailCallOpt || !isa<UnreachableInst>(Term))) return false;
-
-  // If I will have a chain, make sure no other instruction that will have a
-  // chain interposes between I and the return.
-  if (I->mayHaveSideEffects() || I->mayReadFromMemory() ||
-      !I->isSafeToSpeculativelyExecute())
-    for (BasicBlock::const_iterator BBI = prior(prior(ExitBB->end())); ;
-         --BBI) {
-      if (&*BBI == I)
-        break;
-      // Debug info intrinsics do not get in the way of tail call optimization.
-      if (isa<DbgInfoIntrinsic>(BBI))
-        continue;
-      if (BBI->mayHaveSideEffects() || BBI->mayReadFromMemory() ||
-          !BBI->isSafeToSpeculativelyExecute())
-        return false;
-    }
-
-  // If the block ends with a void return or unreachable, it doesn't matter
-  // what the call's return type is.
-  if (!Ret || Ret->getNumOperands() == 0) return true;
-
-  // If the return value is undef, it doesn't matter what the call's
-  // return type is.
-  if (isa<UndefValue>(Ret->getOperand(0))) return true;
-
-  // Conservatively require the attributes of the call to match those of
-  // the return. Ignore noalias because it doesn't affect the call sequence.
-  unsigned CallerRetAttr = F->getAttributes().getRetAttributes();
-  if ((CalleeRetAttr ^ CallerRetAttr) & ~Attribute::NoAlias)
-    return false;
-
-  // It's not safe to eliminate the sign / zero extension of the return value.
-  if ((CallerRetAttr & Attribute::ZExt) || (CallerRetAttr & Attribute::SExt))
-    return false;
-
-  // Otherwise, make sure the unmodified return value of I is the return value.
-  for (const Instruction *U = dyn_cast<Instruction>(Ret->getOperand(0)); ;
-       U = dyn_cast<Instruction>(U->getOperand(0))) {
-    if (!U)
-      return false;
-    if (!U->hasOneUse())
-      return false;
-    if (U == I)
-      break;
-    // Check for a truly no-op truncate.
-    if (isa<TruncInst>(U) &&
-        TLI.isTruncateFree(U->getOperand(0)->getType(), U->getType()))
-      continue;
-    // Check for a truly no-op bitcast.
-    if (isa<BitCastInst>(U) &&
-        (U->getOperand(0)->getType() == U->getType() ||
-         (U->getOperand(0)->getType()->isPointerTy() &&
-          U->getType()->isPointerTy())))
-      continue;
-    // Otherwise it's not a true no-op.
-    return false;
-  }
-
-  return true;
-}
-
-void SelectionDAGBuilder::LowerCallTo(CallSite CS, SDValue Callee,
+void SelectionDAGBuilder::LowerCallTo(ImmutableCallSite CS, SDValue Callee,
                                       bool isTailCall,
                                       MachineBasicBlock *LandingPad) {
   const PointerType *PT = cast<PointerType>(CS.getCalledValue()->getType());
@@ -4378,7 +4356,7 @@ void SelectionDAGBuilder::LowerCallTo(CallSite CS, SDValue Callee,
     RetTy = Type::getVoidTy(FTy->getContext());
   }
 
-  for (CallSite::arg_iterator i = CS.arg_begin(), e = CS.arg_end();
+  for (ImmutableCallSite::arg_iterator i = CS.arg_begin(), e = CS.arg_end();
        i != e; ++i) {
     SDValue ArgNode = getValue(*i);
     Entry.Node = ArgNode; Entry.Ty = (*i)->getType();
@@ -4509,12 +4487,12 @@ void SelectionDAGBuilder::LowerCallTo(CallSite CS, SDValue Callee,
 
 /// IsOnlyUsedInZeroEqualityComparison - Return true if it only matters that the
 /// value is equal or not-equal to zero.
-static bool IsOnlyUsedInZeroEqualityComparison(Value *V) {
-  for (Value::use_iterator UI = V->use_begin(), E = V->use_end();
+static bool IsOnlyUsedInZeroEqualityComparison(const Value *V) {
+  for (Value::const_use_iterator UI = V->use_begin(), E = V->use_end();
        UI != E; ++UI) {
-    if (ICmpInst *IC = dyn_cast<ICmpInst>(*UI))
+    if (const ICmpInst *IC = dyn_cast<ICmpInst>(*UI))
       if (IC->isEquality())
-        if (Constant *C = dyn_cast<Constant>(IC->getOperand(1)))
+        if (const Constant *C = dyn_cast<Constant>(IC->getOperand(1)))
           if (C->isNullValue())
             continue;
     // Unknown instruction.
@@ -4523,17 +4501,20 @@ static bool IsOnlyUsedInZeroEqualityComparison(Value *V) {
   return true;
 }
 
-static SDValue getMemCmpLoad(Value *PtrVal, MVT LoadVT, const Type *LoadTy,
+static SDValue getMemCmpLoad(const Value *PtrVal, MVT LoadVT,
+                             const Type *LoadTy,
                              SelectionDAGBuilder &Builder) {
 
   // Check to see if this load can be trivially constant folded, e.g. if the
   // input is from a string literal.
-  if (Constant *LoadInput = dyn_cast<Constant>(PtrVal)) {
+  if (const Constant *LoadInput = dyn_cast<Constant>(PtrVal)) {
     // Cast pointer to the type we really want to load.
-    LoadInput = ConstantExpr::getBitCast(LoadInput,
+    LoadInput = ConstantExpr::getBitCast(const_cast<Constant *>(LoadInput),
                                          PointerType::getUnqual(LoadTy));
 
-    if (Constant *LoadCst = ConstantFoldLoadFromConstPtr(LoadInput, Builder.TD))
+    if (const Constant *LoadCst =
+          ConstantFoldLoadFromConstPtr(const_cast<Constant *>(LoadInput),
+                                       Builder.TD))
       return Builder.getValue(LoadCst);
   }
 
@@ -4566,18 +4547,18 @@ static SDValue getMemCmpLoad(Value *PtrVal, MVT LoadVT, const Type *LoadTy,
 /// visitMemCmpCall - See if we can lower a call to memcmp in an optimized form.
 /// If so, return true and lower it, otherwise return false and it will be
 /// lowered like a normal call.
-bool SelectionDAGBuilder::visitMemCmpCall(CallInst &I) {
+bool SelectionDAGBuilder::visitMemCmpCall(const CallInst &I) {
   // Verify that the prototype makes sense.  int memcmp(void*,void*,size_t)
   if (I.getNumOperands() != 4)
     return false;
 
-  Value *LHS = I.getOperand(1), *RHS = I.getOperand(2);
+  const Value *LHS = I.getOperand(1), *RHS = I.getOperand(2);
   if (!LHS->getType()->isPointerTy() || !RHS->getType()->isPointerTy() ||
       !I.getOperand(3)->getType()->isIntegerTy() ||
       !I.getType()->isIntegerTy())
     return false;
 
-  ConstantInt *Size = dyn_cast<ConstantInt>(I.getOperand(3));
+  const ConstantInt *Size = dyn_cast<ConstantInt>(I.getOperand(3));
 
   // memcmp(S1,S2,2) != 0 -> (*(short*)LHS != *(short*)RHS)  != 0
   // memcmp(S1,S2,4) != 0 -> (*(int*)LHS != *(int*)RHS)  != 0
@@ -4643,11 +4624,11 @@ bool SelectionDAGBuilder::visitMemCmpCall(CallInst &I) {
 }
 
 
-void SelectionDAGBuilder::visitCall(CallInst &I) {
+void SelectionDAGBuilder::visitCall(const CallInst &I) {
   const char *RenameFn = 0;
   if (Function *F = I.getCalledFunction()) {
     if (F->isDeclaration()) {
-      const TargetIntrinsicInfo *II = TLI.getTargetMachine().getIntrinsicInfo();
+      const TargetIntrinsicInfo *II = TM.getIntrinsicInfo();
       if (II) {
         if (unsigned IID = II->getIntrinsicID(F)) {
           RenameFn = visitIntrinsicCall(I, IID);
@@ -4871,14 +4852,13 @@ void RegsForValue::getCopyToRegs(SDValue Val, SelectionDAG &DAG, DebugLoc dl,
 /// AddInlineAsmOperands - Add this value to the specified inlineasm node
 /// operand list.  This adds the code marker and includes the number of
 /// values added into it.
-void RegsForValue::AddInlineAsmOperands(unsigned Code,
-                                        bool HasMatching,unsigned MatchingIdx,
+void RegsForValue::AddInlineAsmOperands(unsigned Code, bool HasMatching,
+                                        unsigned MatchingIdx,
                                         SelectionDAG &DAG,
                                         std::vector<SDValue> &Ops) const {
-  assert(Regs.size() < (1 << 13) && "Too many inline asm outputs!");
-  unsigned Flag = Code | (Regs.size() << 3);
+  unsigned Flag = InlineAsm::getFlagWord(Code, Regs.size());
   if (HasMatching)
-    Flag |= 0x80000000 | (MatchingIdx << 16);
+    Flag = InlineAsm::getFlagWordForMatchingOp(Flag, MatchingIdx);
   SDValue Res = DAG.getTargetConstant(Flag, MVT::i32);
   Ops.push_back(Res);
 
@@ -4994,7 +4974,7 @@ public:
     if (isIndirect) {
       const llvm::PointerType *PtrTy = dyn_cast<PointerType>(OpTy);
       if (!PtrTy)
-        llvm_report_error("Indirect operand for inline asm not a pointer!");
+        report_fatal_error("Indirect operand for inline asm not a pointer!");
       OpTy = PtrTy->getElementType();
     }
 
@@ -5214,31 +5194,10 @@ GetRegistersForValue(SDISelAsmOperandInfo &OpInfo,
   // Otherwise, we couldn't allocate enough registers for this.
 }
 
-/// hasInlineAsmMemConstraint - Return true if the inline asm instruction being
-/// processed uses a memory 'm' constraint.
-static bool
-hasInlineAsmMemConstraint(std::vector<InlineAsm::ConstraintInfo> &CInfos,
-                          const TargetLowering &TLI) {
-  for (unsigned i = 0, e = CInfos.size(); i != e; ++i) {
-    InlineAsm::ConstraintInfo &CI = CInfos[i];
-    for (unsigned j = 0, ee = CI.Codes.size(); j != ee; ++j) {
-      TargetLowering::ConstraintType CType = TLI.getConstraintType(CI.Codes[j]);
-      if (CType == TargetLowering::C_Memory)
-        return true;
-    }
-
-    // Indirect operand accesses access memory.
-    if (CI.isIndirect)
-      return true;
-  }
-
-  return false;
-}
-
 /// visitInlineAsm - Handle a call to an InlineAsm object.
 ///
-void SelectionDAGBuilder::visitInlineAsm(CallSite CS) {
-  InlineAsm *IA = cast<InlineAsm>(CS.getCalledValue());
+void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) {
+  const InlineAsm *IA = cast<InlineAsm>(CS.getCalledValue());
 
   /// ConstraintOperands - Information about all of the constraints.
   std::vector<SDISelAsmOperandInfo> ConstraintOperands;
@@ -5274,7 +5233,7 @@ void SelectionDAGBuilder::visitInlineAsm(CallSite CS) {
     case InlineAsm::isOutput:
       // Indirect outputs just consume an argument.
       if (OpInfo.isIndirect) {
-        OpInfo.CallOperandVal = CS.getArgument(ArgNo++);
+        OpInfo.CallOperandVal = const_cast<Value *>(CS.getArgument(ArgNo++));
         break;
       }
 
@@ -5291,7 +5250,7 @@ void SelectionDAGBuilder::visitInlineAsm(CallSite CS) {
       ++ResNo;
       break;
     case InlineAsm::isInput:
-      OpInfo.CallOperandVal = CS.getArgument(ArgNo++);
+      OpInfo.CallOperandVal = const_cast<Value *>(CS.getArgument(ArgNo++));
       break;
     case InlineAsm::isClobber:
       // Nothing to do.
@@ -5304,7 +5263,7 @@ void SelectionDAGBuilder::visitInlineAsm(CallSite CS) {
       // Strip bitcasts, if any.  This mostly comes up for functions.
       OpInfo.CallOperandVal = OpInfo.CallOperandVal->stripPointerCasts();
 
-      if (BasicBlock *BB = dyn_cast<BasicBlock>(OpInfo.CallOperandVal)) {
+      if (const BasicBlock *BB = dyn_cast<BasicBlock>(OpInfo.CallOperandVal)) {
         OpInfo.CallOperand = DAG.getBasicBlock(FuncInfo.MBBMap[BB]);
       } else {
         OpInfo.CallOperand = getValue(OpInfo.CallOperandVal);
@@ -5327,14 +5286,15 @@ void SelectionDAGBuilder::visitInlineAsm(CallSite CS) {
     // error.
     if (OpInfo.hasMatchingInput()) {
       SDISelAsmOperandInfo &Input = ConstraintOperands[OpInfo.MatchingInput];
+      
       if (OpInfo.ConstraintVT != Input.ConstraintVT) {
         if ((OpInfo.ConstraintVT.isInteger() !=
              Input.ConstraintVT.isInteger()) ||
             (OpInfo.ConstraintVT.getSizeInBits() !=
              Input.ConstraintVT.getSizeInBits())) {
-          llvm_report_error("Unsupported asm: input constraint"
-                            " with a matching output constraint of incompatible"
-                            " type!");
+          report_fatal_error("Unsupported asm: input constraint"
+                             " with a matching output constraint of"
+                             " incompatible type!");
         }
         Input.ConstraintVT = OpInfo.ConstraintVT;
       }
@@ -5356,7 +5316,7 @@ void SelectionDAGBuilder::visitInlineAsm(CallSite CS) {
 
       // If the operand is a float, integer, or vector constant, spill to a
       // constant pool entry to get its address.
-      Value *OpVal = OpInfo.CallOperandVal;
+      const Value *OpVal = OpInfo.CallOperandVal;
       if (isa<ConstantFP>(OpVal) || isa<ConstantInt>(OpVal) ||
           isa<ConstantVector>(OpVal)) {
         OpInfo.CallOperand = DAG.getConstantPool(cast<Constant>(OpVal),
@@ -5409,6 +5369,11 @@ void SelectionDAGBuilder::visitInlineAsm(CallSite CS) {
           DAG.getTargetExternalSymbol(IA->getAsmString().c_str(),
                                       TLI.getPointerTy()));
 
+  // If we have a !srcloc metadata node associated with it, we want to attach
+  // this to the ultimately generated inline asm machineinstr.  To do this, we
+  // pass in the third operand as this (potentially null) inline asm MDNode.
+  const MDNode *SrcLoc = CS.getInstruction()->getMetadata("srcloc");
+  AsmNodeOperands.push_back(DAG.getMDNode(SrcLoc));
 
   // Loop over all of the inputs, copying the operand values into the
   // appropriate registers and processing the output regs.
@@ -5428,8 +5393,8 @@ void SelectionDAGBuilder::visitInlineAsm(CallSite CS) {
         assert(OpInfo.isIndirect && "Memory output must be indirect operand");
 
         // Add information to the INLINEASM node to know about this output.
-        unsigned ResOpType = 4/*MEM*/ | (1<<3);
-        AsmNodeOperands.push_back(DAG.getTargetConstant(ResOpType,
+        unsigned OpFlags = InlineAsm::getFlagWord(InlineAsm::Kind_Mem, 1);
+        AsmNodeOperands.push_back(DAG.getTargetConstant(OpFlags,
                                                         TLI.getPointerTy()));
         AsmNodeOperands.push_back(OpInfo.CallOperand);
         break;
@@ -5439,10 +5404,9 @@ void SelectionDAGBuilder::visitInlineAsm(CallSite CS) {
 
       // Copy the output from the appropriate register.  Find a register that
       // we can use.
-      if (OpInfo.AssignedRegs.Regs.empty()) {
-        llvm_report_error("Couldn't allocate output reg for"
-                          " constraint '" + OpInfo.ConstraintCode + "'!");
-      }
+      if (OpInfo.AssignedRegs.Regs.empty())
+        report_fatal_error("Couldn't allocate output reg for constraint '" +
+                           Twine(OpInfo.ConstraintCode) + "'!");
 
       // If this is an indirect operand, store through the pointer after the
       // asm.
@@ -5459,8 +5423,8 @@ void SelectionDAGBuilder::visitInlineAsm(CallSite CS) {
       // Add information to the INLINEASM node to know that this register is
       // set.
       OpInfo.AssignedRegs.AddInlineAsmOperands(OpInfo.isEarlyClobber ?
-                                               6 /* EARLYCLOBBER REGDEF */ :
-                                               2 /* REGDEF */ ,
+                                           InlineAsm::Kind_RegDefEarlyClobber :
+                                               InlineAsm::Kind_RegDef,
                                                false,
                                                0,
                                                DAG,
@@ -5477,27 +5441,30 @@ void SelectionDAGBuilder::visitInlineAsm(CallSite CS) {
 
         // Scan until we find the definition we already emitted of this operand.
         // When we find it, create a RegsForValue operand.
-        unsigned CurOp = 2;  // The first operand.
+        unsigned CurOp = InlineAsm::Op_FirstOperand;
         for (; OperandNo; --OperandNo) {
           // Advance to the next operand.
           unsigned OpFlag =
             cast<ConstantSDNode>(AsmNodeOperands[CurOp])->getZExtValue();
-          assert(((OpFlag & 7) == 2 /*REGDEF*/ ||
-                  (OpFlag & 7) == 6 /*EARLYCLOBBER REGDEF*/ ||
-                  (OpFlag & 7) == 4 /*MEM*/) &&
-                 "Skipped past definitions?");
+          assert((InlineAsm::isRegDefKind(OpFlag) ||
+                  InlineAsm::isRegDefEarlyClobberKind(OpFlag) ||
+                  InlineAsm::isMemKind(OpFlag)) && "Skipped past definitions?");
           CurOp += InlineAsm::getNumOperandRegisters(OpFlag)+1;
         }
 
         unsigned OpFlag =
           cast<ConstantSDNode>(AsmNodeOperands[CurOp])->getZExtValue();
-        if ((OpFlag & 7) == 2 /*REGDEF*/
-            || (OpFlag & 7) == 6 /* EARLYCLOBBER REGDEF */) {
+        if (InlineAsm::isRegDefKind(OpFlag) ||
+            InlineAsm::isRegDefEarlyClobberKind(OpFlag)) {
           // Add (OpFlag&0xffff)>>3 registers to MatchedRegs.
           if (OpInfo.isIndirect) {
-            llvm_report_error("Don't know how to handle tied indirect "
-                              "register inputs yet!");
+            // This happens on gcc/testsuite/gcc.dg/pr8788-1.c
+            LLVMContext &Ctx = *DAG.getContext();
+            Ctx.emitError(CS.getInstruction(),  "inline asm not supported yet:"
+                          " don't know how to handle tied "
+                          "indirect register inputs");
           }
+          
           RegsForValue MatchedRegs;
           MatchedRegs.TLI = &TLI;
           MatchedRegs.ValueVTs.push_back(InOperandVal.getValueType());
@@ -5512,22 +5479,23 @@ void SelectionDAGBuilder::visitInlineAsm(CallSite CS) {
           // Use the produced MatchedRegs object to
           MatchedRegs.getCopyToRegs(InOperandVal, DAG, getCurDebugLoc(),
                                     Chain, &Flag);
-          MatchedRegs.AddInlineAsmOperands(1 /*REGUSE*/,
+          MatchedRegs.AddInlineAsmOperands(InlineAsm::Kind_RegUse,
                                            true, OpInfo.getMatchedOperand(),
                                            DAG, AsmNodeOperands);
           break;
-        } else {
-          assert(((OpFlag & 7) == 4) && "Unknown matching constraint!");
-          assert((InlineAsm::getNumOperandRegisters(OpFlag)) == 1 &&
-                 "Unexpected number of operands");
-          // Add information to the INLINEASM node to know about this input.
-          // See InlineAsm.h isUseOperandTiedToDef.
-          OpFlag |= 0x80000000 | (OpInfo.getMatchedOperand() << 16);
-          AsmNodeOperands.push_back(DAG.getTargetConstant(OpFlag,
-                                                          TLI.getPointerTy()));
-          AsmNodeOperands.push_back(AsmNodeOperands[CurOp+1]);
-          break;
         }
+        
+        assert(InlineAsm::isMemKind(OpFlag) && "Unknown matching constraint!");
+        assert(InlineAsm::getNumOperandRegisters(OpFlag) == 1 &&
+               "Unexpected number of operands");
+        // Add information to the INLINEASM node to know about this input.
+        // See InlineAsm.h isUseOperandTiedToDef.
+        OpFlag = InlineAsm::getFlagWordForMatchingOp(OpFlag,
+                                                    OpInfo.getMatchedOperand());
+        AsmNodeOperands.push_back(DAG.getTargetConstant(OpFlag,
+                                                        TLI.getPointerTy()));
+        AsmNodeOperands.push_back(AsmNodeOperands[CurOp+1]);
+        break;
       }
 
       if (OpInfo.ConstraintType == TargetLowering::C_Other) {
@@ -5537,24 +5505,26 @@ void SelectionDAGBuilder::visitInlineAsm(CallSite CS) {
         std::vector<SDValue> Ops;
         TLI.LowerAsmOperandForConstraint(InOperandVal, OpInfo.ConstraintCode[0],
                                          hasMemory, Ops, DAG);
-        if (Ops.empty()) {
-          llvm_report_error("Invalid operand for inline asm"
-                            " constraint '" + OpInfo.ConstraintCode + "'!");
-        }
+        if (Ops.empty())
+          report_fatal_error("Invalid operand for inline asm constraint '" +
+                             Twine(OpInfo.ConstraintCode) + "'!");
 
         // Add information to the INLINEASM node to know about this input.
-        unsigned ResOpType = 3 /*IMM*/ | (Ops.size() << 3);
+        unsigned ResOpType =
+          InlineAsm::getFlagWord(InlineAsm::Kind_Imm, Ops.size());
         AsmNodeOperands.push_back(DAG.getTargetConstant(ResOpType,
                                                         TLI.getPointerTy()));
         AsmNodeOperands.insert(AsmNodeOperands.end(), Ops.begin(), Ops.end());
         break;
-      } else if (OpInfo.ConstraintType == TargetLowering::C_Memory) {
+      }
+      
+      if (OpInfo.ConstraintType == TargetLowering::C_Memory) {
         assert(OpInfo.isIndirect && "Operand must be indirect to be a mem!");
         assert(InOperandVal.getValueType() == TLI.getPointerTy() &&
                "Memory operands expect pointer values");
 
         // Add information to the INLINEASM node to know about this input.
-        unsigned ResOpType = 4/*MEM*/ | (1<<3);
+        unsigned ResOpType = InlineAsm::getFlagWord(InlineAsm::Kind_Mem, 1);
         AsmNodeOperands.push_back(DAG.getTargetConstant(ResOpType,
                                                         TLI.getPointerTy()));
         AsmNodeOperands.push_back(InOperandVal);
@@ -5569,15 +5539,14 @@ void SelectionDAGBuilder::visitInlineAsm(CallSite CS) {
 
       // Copy the input into the appropriate registers.
       if (OpInfo.AssignedRegs.Regs.empty() ||
-          !OpInfo.AssignedRegs.areValueTypesLegal()) {
-        llvm_report_error("Couldn't allocate input reg for"
-                          " constraint '"+ OpInfo.ConstraintCode +"'!");
-      }
+          !OpInfo.AssignedRegs.areValueTypesLegal())
+        report_fatal_error("Couldn't allocate input reg for constraint '" +
+                           Twine(OpInfo.ConstraintCode) + "'!");
 
       OpInfo.AssignedRegs.getCopyToRegs(InOperandVal, DAG, getCurDebugLoc(),
                                         Chain, &Flag);
 
-      OpInfo.AssignedRegs.AddInlineAsmOperands(1/*REGUSE*/, false, 0,
+      OpInfo.AssignedRegs.AddInlineAsmOperands(InlineAsm::Kind_RegUse, false, 0,
                                                DAG, AsmNodeOperands);
       break;
     }
@@ -5585,7 +5554,8 @@ void SelectionDAGBuilder::visitInlineAsm(CallSite CS) {
       // Add the clobbered value to the operand list, so that the register
       // allocator is aware that the physreg got clobbered.
       if (!OpInfo.AssignedRegs.Regs.empty())
-        OpInfo.AssignedRegs.AddInlineAsmOperands(6 /* EARLYCLOBBER REGDEF */,
+        OpInfo.AssignedRegs.AddInlineAsmOperands(
+                                            InlineAsm::Kind_RegDefEarlyClobber,
                                                  false, 0, DAG,
                                                  AsmNodeOperands);
       break;
@@ -5593,7 +5563,7 @@ void SelectionDAGBuilder::visitInlineAsm(CallSite CS) {
     }
   }
 
-  // Finish up input operands.
+  // Finish up input operands.  Set the input chain and add the flag last.
   AsmNodeOperands[0] = Chain;
   if (Flag.getNode()) AsmNodeOperands.push_back(Flag);
 
@@ -5638,17 +5608,16 @@ void SelectionDAGBuilder::visitInlineAsm(CallSite CS) {
       return;
   }
 
-  std::vector<std::pair<SDValue, Value*> > StoresToEmit;
+  std::vector<std::pair<SDValue, const Value *> > StoresToEmit;
 
   // Process indirect outputs, first output all of the flagged copies out of
   // physregs.
   for (unsigned i = 0, e = IndirectStoresToEmit.size(); i != e; ++i) {
     RegsForValue &OutRegs = IndirectStoresToEmit[i].first;
-    Value *Ptr = IndirectStoresToEmit[i].second;
+    const Value *Ptr = IndirectStoresToEmit[i].second;
     SDValue OutVal = OutRegs.getCopyFromRegs(DAG, getCurDebugLoc(),
                                              Chain, &Flag);
     StoresToEmit.push_back(std::make_pair(OutVal, Ptr));
-
   }
 
   // Emit the non-flagged stores from the physregs.
@@ -5669,14 +5638,14 @@ void SelectionDAGBuilder::visitInlineAsm(CallSite CS) {
   DAG.setRoot(Chain);
 }
 
-void SelectionDAGBuilder::visitVAStart(CallInst &I) {
+void SelectionDAGBuilder::visitVAStart(const CallInst &I) {
   DAG.setRoot(DAG.getNode(ISD::VASTART, getCurDebugLoc(),
                           MVT::Other, getRoot(),
                           getValue(I.getOperand(1)),
                           DAG.getSrcValue(I.getOperand(1))));
 }
 
-void SelectionDAGBuilder::visitVAArg(VAArgInst &I) {
+void SelectionDAGBuilder::visitVAArg(const VAArgInst &I) {
   SDValue V = DAG.getVAArg(TLI.getValueType(I.getType()), getCurDebugLoc(),
                            getRoot(), getValue(I.getOperand(0)),
                            DAG.getSrcValue(I.getOperand(0)));
@@ -5684,14 +5653,14 @@ void SelectionDAGBuilder::visitVAArg(VAArgInst &I) {
   DAG.setRoot(V.getValue(1));
 }
 
-void SelectionDAGBuilder::visitVAEnd(CallInst &I) {
+void SelectionDAGBuilder::visitVAEnd(const CallInst &I) {
   DAG.setRoot(DAG.getNode(ISD::VAEND, getCurDebugLoc(),
                           MVT::Other, getRoot(),
                           getValue(I.getOperand(1)),
                           DAG.getSrcValue(I.getOperand(1))));
 }
 
-void SelectionDAGBuilder::visitVACopy(CallInst &I) {
+void SelectionDAGBuilder::visitVACopy(const CallInst &I) {
   DAG.setRoot(DAG.getNode(ISD::VACOPY, getCurDebugLoc(),
                           MVT::Other, getRoot(),
                           getValue(I.getOperand(1)),
@@ -5711,7 +5680,8 @@ TargetLowering::LowerCallTo(SDValue Chain, const Type *RetTy,
                             CallingConv::ID CallConv, bool isTailCall,
                             bool isReturnValueUsed,
                             SDValue Callee,
-                            ArgListTy &Args, SelectionDAG &DAG, DebugLoc dl) {
+                            ArgListTy &Args, SelectionDAG &DAG,
+                            DebugLoc dl) const {
   // Handle all of the outgoing arguments.
   SmallVector<ISD::OutputArg, 32> Outs;
   for (unsigned i = 0, e = Args.size(); i != e; ++i) {
@@ -5862,18 +5832,19 @@ TargetLowering::LowerCallTo(SDValue Chain, const Type *RetTy,
 
 void TargetLowering::LowerOperationWrapper(SDNode *N,
                                            SmallVectorImpl<SDValue> &Results,
-                                           SelectionDAG &DAG) {
+                                           SelectionDAG &DAG) const {
   SDValue Res = LowerOperation(SDValue(N, 0), DAG);
   if (Res.getNode())
     Results.push_back(Res);
 }
 
-SDValue TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) {
+SDValue TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
   llvm_unreachable("LowerOperation not implemented for this target!");
   return SDValue();
 }
 
-void SelectionDAGBuilder::CopyValueToVirtualRegister(Value *V, unsigned Reg) {
+void
+SelectionDAGBuilder::CopyValueToVirtualRegister(const Value *V, unsigned Reg) {
   SDValue Op = getValue(V);
   assert((Op.getOpcode() != ISD::CopyFromReg ||
           cast<RegisterSDNode>(Op.getOperand(1))->getReg() != Reg) &&
@@ -5888,9 +5859,9 @@ void SelectionDAGBuilder::CopyValueToVirtualRegister(Value *V, unsigned Reg) {
 
 #include "llvm/CodeGen/SelectionDAGISel.h"
 
-void SelectionDAGISel::LowerArguments(BasicBlock *LLVMBB) {
+void SelectionDAGISel::LowerArguments(const BasicBlock *LLVMBB) {
   // If this is the entry block, emit arguments.
-  Function &F = *LLVMBB->getParent();
+  const Function &F = *LLVMBB->getParent();
   SelectionDAG &DAG = SDB->DAG;
   SDValue OldRoot = DAG.getRoot();
   DebugLoc dl = SDB->getCurDebugLoc();
@@ -5915,14 +5886,14 @@ void SelectionDAGISel::LowerArguments(BasicBlock *LLVMBB) {
     // or one register.
     ISD::ArgFlagsTy Flags;
     Flags.setSRet();
-    EVT RegisterVT = TLI.getRegisterType(*CurDAG->getContext(), ValueVTs[0]);
+    EVT RegisterVT = TLI.getRegisterType(*DAG.getContext(), ValueVTs[0]);
     ISD::InputArg RetArg(Flags, RegisterVT, true);
     Ins.push_back(RetArg);
   }
 
   // Set up the incoming argument description vector.
   unsigned Idx = 1;
-  for (Function::arg_iterator I = F.arg_begin(), E = F.arg_end();
+  for (Function::const_arg_iterator I = F.arg_begin(), E = F.arg_end();
        I != E; ++I, ++Idx) {
     SmallVector<EVT, 4> ValueVTs;
     ComputeValueVTs(TLI, I->getType(), ValueVTs);
@@ -6024,7 +5995,7 @@ void SelectionDAGISel::LowerArguments(BasicBlock *LLVMBB) {
     ++i;
   }
 
-  for (Function::arg_iterator I = F.arg_begin(), E = F.arg_end(); I != E;
+  for (Function::const_arg_iterator I = F.arg_begin(), E = F.arg_end(); I != E;
       ++I, ++Idx) {
     SmallVector<SDValue, 4> ArgValues;
     SmallVector<EVT, 4> ValueVTs;
@@ -6067,7 +6038,7 @@ void SelectionDAGISel::LowerArguments(BasicBlock *LLVMBB) {
 
   // Finally, if the target has anything special to do, allow it to do so.
   // FIXME: this should insert code into the DAG!
-  EmitFunctionEntryCode(F, SDB->DAG.getMachineFunction());
+  EmitFunctionEntryCode();
 }
 
 /// Handle PHI nodes in successor blocks.  Emit code into the SelectionDAG to
@@ -6078,51 +6049,50 @@ void SelectionDAGISel::LowerArguments(BasicBlock *LLVMBB) {
 /// the end.
 ///
 void
-SelectionDAGISel::HandlePHINodesInSuccessorBlocks(BasicBlock *LLVMBB) {
-  TerminatorInst *TI = LLVMBB->getTerminator();
+SelectionDAGBuilder::HandlePHINodesInSuccessorBlocks(const BasicBlock *LLVMBB) {
+  const TerminatorInst *TI = LLVMBB->getTerminator();
 
   SmallPtrSet<MachineBasicBlock *, 4> SuccsHandled;
 
   // Check successor nodes' PHI nodes that expect a constant to be available
   // from this block.
   for (unsigned succ = 0, e = TI->getNumSuccessors(); succ != e; ++succ) {
-    BasicBlock *SuccBB = TI->getSuccessor(succ);
+    const BasicBlock *SuccBB = TI->getSuccessor(succ);
     if (!isa<PHINode>(SuccBB->begin())) continue;
-    MachineBasicBlock *SuccMBB = FuncInfo->MBBMap[SuccBB];
+    MachineBasicBlock *SuccMBB = FuncInfo.MBBMap[SuccBB];
 
     // If this terminator has multiple identical successors (common for
     // switches), only handle each succ once.
     if (!SuccsHandled.insert(SuccMBB)) continue;
 
     MachineBasicBlock::iterator MBBI = SuccMBB->begin();
-    PHINode *PN;
 
     // At this point we know that there is a 1-1 correspondence between LLVM PHI
     // nodes and Machine PHI nodes, but the incoming operands have not been
     // emitted yet.
-    for (BasicBlock::iterator I = SuccBB->begin();
-         (PN = dyn_cast<PHINode>(I)); ++I) {
+    for (BasicBlock::const_iterator I = SuccBB->begin();
+         const PHINode *PN = dyn_cast<PHINode>(I); ++I) {
       // Ignore dead phi's.
       if (PN->use_empty()) continue;
 
       unsigned Reg;
-      Value *PHIOp = PN->getIncomingValueForBlock(LLVMBB);
+      const Value *PHIOp = PN->getIncomingValueForBlock(LLVMBB);
 
-      if (Constant *C = dyn_cast<Constant>(PHIOp)) {
-        unsigned &RegOut = SDB->ConstantsOut[C];
+      if (const Constant *C = dyn_cast<Constant>(PHIOp)) {
+        unsigned &RegOut = ConstantsOut[C];
         if (RegOut == 0) {
-          RegOut = FuncInfo->CreateRegForValue(C);
-          SDB->CopyValueToVirtualRegister(C, RegOut);
+          RegOut = FuncInfo.CreateRegForValue(C);
+          CopyValueToVirtualRegister(C, RegOut);
         }
         Reg = RegOut;
       } else {
-        Reg = FuncInfo->ValueMap[PHIOp];
+        Reg = FuncInfo.ValueMap[PHIOp];
         if (Reg == 0) {
           assert(isa<AllocaInst>(PHIOp) &&
-                 FuncInfo->StaticAllocaMap.count(cast<AllocaInst>(PHIOp)) &&
+                 FuncInfo.StaticAllocaMap.count(cast<AllocaInst>(PHIOp)) &&
                  "Didn't codegen value into a register!??");
-          Reg = FuncInfo->CreateRegForValue(PHIOp);
-          SDB->CopyValueToVirtualRegister(PHIOp, Reg);
+          Reg = FuncInfo.CreateRegForValue(PHIOp);
+          CopyValueToVirtualRegister(PHIOp, Reg);
         }
       }
 
@@ -6132,77 +6102,12 @@ SelectionDAGISel::HandlePHINodesInSuccessorBlocks(BasicBlock *LLVMBB) {
       ComputeValueVTs(TLI, PN->getType(), ValueVTs);
       for (unsigned vti = 0, vte = ValueVTs.size(); vti != vte; ++vti) {
         EVT VT = ValueVTs[vti];
-        unsigned NumRegisters = TLI.getNumRegisters(*CurDAG->getContext(), VT);
+        unsigned NumRegisters = TLI.getNumRegisters(*DAG.getContext(), VT);
         for (unsigned i = 0, e = NumRegisters; i != e; ++i)
-          SDB->PHINodesToUpdate.push_back(std::make_pair(MBBI++, Reg+i));
+          FuncInfo.PHINodesToUpdate.push_back(std::make_pair(MBBI++, Reg+i));
         Reg += NumRegisters;
       }
     }
   }
-  SDB->ConstantsOut.clear();
-}
-
-/// This is the Fast-ISel version of HandlePHINodesInSuccessorBlocks. It only
-/// supports legal types, and it emits MachineInstrs directly instead of
-/// creating SelectionDAG nodes.
-///
-bool
-SelectionDAGISel::HandlePHINodesInSuccessorBlocksFast(BasicBlock *LLVMBB,
-                                                      FastISel *F) {
-  TerminatorInst *TI = LLVMBB->getTerminator();
-
-  SmallPtrSet<MachineBasicBlock *, 4> SuccsHandled;
-  unsigned OrigNumPHINodesToUpdate = SDB->PHINodesToUpdate.size();
-
-  // Check successor nodes' PHI nodes that expect a constant to be available
-  // from this block.
-  for (unsigned succ = 0, e = TI->getNumSuccessors(); succ != e; ++succ) {
-    BasicBlock *SuccBB = TI->getSuccessor(succ);
-    if (!isa<PHINode>(SuccBB->begin())) continue;
-    MachineBasicBlock *SuccMBB = FuncInfo->MBBMap[SuccBB];
-
-    // If this terminator has multiple identical successors (common for
-    // switches), only handle each succ once.
-    if (!SuccsHandled.insert(SuccMBB)) continue;
-
-    MachineBasicBlock::iterator MBBI = SuccMBB->begin();
-    PHINode *PN;
-
-    // At this point we know that there is a 1-1 correspondence between LLVM PHI
-    // nodes and Machine PHI nodes, but the incoming operands have not been
-    // emitted yet.
-    for (BasicBlock::iterator I = SuccBB->begin();
-         (PN = dyn_cast<PHINode>(I)); ++I) {
-      // Ignore dead phi's.
-      if (PN->use_empty()) continue;
-
-      // Only handle legal types. Two interesting things to note here. First,
-      // by bailing out early, we may leave behind some dead instructions,
-      // since SelectionDAG's HandlePHINodesInSuccessorBlocks will insert its
-      // own moves. Second, this check is necessary becuase FastISel doesn't
-      // use CreateRegForValue to create registers, so it always creates
-      // exactly one register for each non-void instruction.
-      EVT VT = TLI.getValueType(PN->getType(), /*AllowUnknown=*/true);
-      if (VT == MVT::Other || !TLI.isTypeLegal(VT)) {
-        // Promote MVT::i1.
-        if (VT == MVT::i1)
-          VT = TLI.getTypeToTransformTo(*CurDAG->getContext(), VT);
-        else {
-          SDB->PHINodesToUpdate.resize(OrigNumPHINodesToUpdate);
-          return false;
-        }
-      }
-
-      Value *PHIOp = PN->getIncomingValueForBlock(LLVMBB);
-
-      unsigned Reg = F->getRegForValue(PHIOp);
-      if (Reg == 0) {
-        SDB->PHINodesToUpdate.resize(OrigNumPHINodesToUpdate);
-        return false;
-      }
-      SDB->PHINodesToUpdate.push_back(std::make_pair(MBBI++, Reg));
-    }
-  }
-
-  return true;
+  ConstantsOut.clear();
 }
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h
index fdcba0f0bc72..3fcd4b9dc437 100644
--- a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h
+++ b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h
@@ -36,6 +36,7 @@ class BasicBlock;
 class BitCastInst;
 class BranchInst;
 class CallInst;
+class DbgValueInst;
 class ExtractElementInst;
 class ExtractValueInst;
 class FCmpInst;
@@ -58,6 +59,7 @@ class LoadInst;
 class MachineBasicBlock;
 class MachineInstr;
 class MachineRegisterInfo;
+class MDNode;
 class PHINode;
 class PtrToIntInst;
 class ReturnInst;
@@ -80,11 +82,8 @@ class ZExtInst;
 //===----------------------------------------------------------------------===//
 /// SelectionDAGBuilder - This is the common target-independent lowering
 /// implementation that is parameterized by a TargetLowering object.
-/// Also, targets can overload any lowering method.
 ///
 class SelectionDAGBuilder {
-  MachineBasicBlock *CurMBB;
-
   /// CurDebugLoc - current file + line number.  Changes as we build the DAG.
   DebugLoc CurDebugLoc;
 
@@ -143,15 +142,16 @@ private:
   /// CaseRec - A struct with ctor used in lowering switches to a binary tree
   /// of conditional branches.
   struct CaseRec {
-    CaseRec(MachineBasicBlock *bb, Constant *lt, Constant *ge, CaseRange r) :
+    CaseRec(MachineBasicBlock *bb, const Constant *lt, const Constant *ge,
+            CaseRange r) :
     CaseBB(bb), LT(lt), GE(ge), Range(r) {}
 
     /// CaseBB - The MBB in which to emit the compare and branch
     MachineBasicBlock *CaseBB;
     /// LT, GE - If nonzero, we know the current case value must be less-than or
     /// greater-than-or-equal-to these Constants.
-    Constant *LT;
-    Constant *GE;
+    const Constant *LT;
+    const Constant *GE;
     /// Range - A pair of iterators representing the range of case values to be
     /// processed at this point in the binary search tree.
     CaseRange Range;
@@ -182,7 +182,8 @@ private:
   /// SelectionDAGBuilder and SDISel for the code generation of additional basic
   /// blocks needed by multi-case switch statements.
   struct CaseBlock {
-    CaseBlock(ISD::CondCode cc, Value *cmplhs, Value *cmprhs, Value *cmpmiddle,
+    CaseBlock(ISD::CondCode cc, const Value *cmplhs, const Value *cmprhs,
+              const Value *cmpmiddle,
               MachineBasicBlock *truebb, MachineBasicBlock *falsebb,
               MachineBasicBlock *me)
       : CC(cc), CmpLHS(cmplhs), CmpMHS(cmpmiddle), CmpRHS(cmprhs),
@@ -192,7 +193,7 @@ private:
     // CmpLHS/CmpRHS/CmpMHS - The LHS/MHS/RHS of the comparison to emit.
     // Emit by default LHS op RHS. MHS is used for range comparisons:
     // If MHS is not null: (LHS <= MHS) and (MHS <= RHS).
-    Value *CmpLHS, *CmpMHS, *CmpRHS;
+    const Value *CmpLHS, *CmpMHS, *CmpRHS;
     // TrueBB/FalseBB - the block to branch to if the setcc is true/false.
     MachineBasicBlock *TrueBB, *FalseBB;
     // ThisBB - the block into which to emit the code for the setcc and branches
@@ -214,12 +215,12 @@ private:
     MachineBasicBlock *Default;
   };
   struct JumpTableHeader {
-    JumpTableHeader(APInt F, APInt L, Value *SV, MachineBasicBlock *H,
+    JumpTableHeader(APInt F, APInt L, const Value *SV, MachineBasicBlock *H,
                     bool E = false):
       First(F), Last(L), SValue(SV), HeaderBB(H), Emitted(E) {}
     APInt First;
     APInt Last;
-    Value *SValue;
+    const Value *SValue;
     MachineBasicBlock *HeaderBB;
     bool Emitted;
   };
@@ -236,7 +237,7 @@ private:
   typedef SmallVector<BitTestCase, 3> BitTestInfo;
 
   struct BitTestBlock {
-    BitTestBlock(APInt F, APInt R, Value* SV,
+    BitTestBlock(APInt F, APInt R, const Value* SV,
                  unsigned Rg, bool E,
                  MachineBasicBlock* P, MachineBasicBlock* D,
                  const BitTestInfo& C):
@@ -244,7 +245,7 @@ private:
       Parent(P), Default(D), Cases(C) { }
     APInt First;
     APInt Range;
-    Value  *SValue;
+    const Value *SValue;
     unsigned Reg;
     bool Emitted;
     MachineBasicBlock *Parent;
@@ -256,7 +257,8 @@ public:
   // TLI - This is information that describes the available target features we
   // need for lowering.  This indicates when operations are unavailable,
   // implemented with a libcall, etc.
-  TargetLowering &TLI;
+  const TargetMachine &TM;
+  const TargetLowering &TLI;
   SelectionDAG &DAG;
   const TargetData *TD;
   AliasAnalysis *AA;
@@ -271,17 +273,9 @@ public:
   /// SwitchInst code generation information.
   std::vector<BitTestBlock> BitTestCases;
 
-  /// PHINodesToUpdate - A list of phi instructions whose operand list will
-  /// be updated after processing the current basic block.
-  std::vector<std::pair<MachineInstr*, unsigned> > PHINodesToUpdate;
-
-  /// EdgeMapping - If an edge from CurMBB to any MBB is changed (e.g. due to
-  /// scheduler custom lowering), track the change here.
-  DenseMap<MachineBasicBlock*, MachineBasicBlock*> EdgeMapping;
-
   // Emit PHI-node-operand constants only once even if used by multiple
   // PHI nodes.
-  DenseMap<Constant*, unsigned> ConstantsOut;
+  DenseMap<const Constant *, unsigned> ConstantsOut;
 
   /// FuncInfo - Information about the function as a whole.
   ///
@@ -302,16 +296,16 @@ public:
 
   LLVMContext *Context;
 
-  SelectionDAGBuilder(SelectionDAG &dag, TargetLowering &tli,
-                      FunctionLoweringInfo &funcinfo,
+  SelectionDAGBuilder(SelectionDAG &dag, FunctionLoweringInfo &funcinfo,
                       CodeGenOpt::Level ol)
-    : SDNodeOrder(0), TLI(tli), DAG(dag), FuncInfo(funcinfo), OptLevel(ol),
+    : SDNodeOrder(0), TM(dag.getTarget()), TLI(dag.getTargetLoweringInfo()),
+      DAG(dag), FuncInfo(funcinfo), OptLevel(ol),
       HasTailCall(false), Context(dag.getContext()) {
   }
 
   void init(GCFunctionInfo *gfi, AliasAnalysis &aa);
 
-  /// clear - Clear out the curret SelectionDAG and the associated
+  /// clear - Clear out the current SelectionDAG and the associated
   /// state and prepare this SelectionDAGBuilder object to be used
   /// for a new block. This doesn't clear out information about
   /// additional blocks that are needed to complete switch lowering
@@ -333,22 +327,19 @@ public:
   SDValue getControlRoot();
 
   DebugLoc getCurDebugLoc() const { return CurDebugLoc; }
-  void setCurDebugLoc(DebugLoc dl) { CurDebugLoc = dl; }
 
   unsigned getSDNodeOrder() const { return SDNodeOrder; }
 
-  void CopyValueToVirtualRegister(Value *V, unsigned Reg);
+  void CopyValueToVirtualRegister(const Value *V, unsigned Reg);
 
   /// AssignOrderingToNode - Assign an ordering to the node. The order is gotten
   /// from how the code appeared in the source. The ordering is used by the
   /// scheduler to effectively turn off scheduling.
   void AssignOrderingToNode(const SDNode *Node);
 
-  void visit(Instruction &I);
-
-  void visit(unsigned Opcode, User &I);
+  void visit(const Instruction &I);
 
-  void setCurrentBasicBlock(MachineBasicBlock *MBB) { CurMBB = MBB; }
+  void visit(unsigned Opcode, const User &I);
 
   SDValue getValue(const Value *V);
 
@@ -362,136 +353,154 @@ public:
                             std::set<unsigned> &OutputRegs, 
                             std::set<unsigned> &InputRegs);
 
-  void FindMergedConditions(Value *Cond, MachineBasicBlock *TBB,
+  void FindMergedConditions(const Value *Cond, MachineBasicBlock *TBB,
                             MachineBasicBlock *FBB, MachineBasicBlock *CurBB,
-                            unsigned Opc);
-  void EmitBranchForMergedCondition(Value *Cond, MachineBasicBlock *TBB,
+                            MachineBasicBlock *SwitchBB, unsigned Opc);
+  void EmitBranchForMergedCondition(const Value *Cond, MachineBasicBlock *TBB,
                                     MachineBasicBlock *FBB,
-                                    MachineBasicBlock *CurBB);
+                                    MachineBasicBlock *CurBB,
+                                    MachineBasicBlock *SwitchBB);
   bool ShouldEmitAsBranches(const std::vector<CaseBlock> &Cases);
-  bool isExportableFromCurrentBlock(Value *V, const BasicBlock *FromBB);
-  void CopyToExportRegsIfNeeded(Value *V);
-  void ExportFromCurrentBlock(Value *V);
-  void LowerCallTo(CallSite CS, SDValue Callee, bool IsTailCall,
+  bool isExportableFromCurrentBlock(const Value *V, const BasicBlock *FromBB);
+  void CopyToExportRegsIfNeeded(const Value *V);
+  void ExportFromCurrentBlock(const Value *V);
+  void LowerCallTo(ImmutableCallSite CS, SDValue Callee, bool IsTailCall,
                    MachineBasicBlock *LandingPad = NULL);
 
 private:
   // Terminator instructions.
-  void visitRet(ReturnInst &I);
-  void visitBr(BranchInst &I);
-  void visitSwitch(SwitchInst &I);
-  void visitIndirectBr(IndirectBrInst &I);
-  void visitUnreachable(UnreachableInst &I) { /* noop */ }
+  void visitRet(const ReturnInst &I);
+  void visitBr(const BranchInst &I);
+  void visitSwitch(const SwitchInst &I);
+  void visitIndirectBr(const IndirectBrInst &I);
+  void visitUnreachable(const UnreachableInst &I) { /* noop */ }
 
   // Helpers for visitSwitch
   bool handleSmallSwitchRange(CaseRec& CR,
                               CaseRecVector& WorkList,
-                              Value* SV,
-                              MachineBasicBlock* Default);
+                              const Value* SV,
+                              MachineBasicBlock* Default,
+                              MachineBasicBlock *SwitchBB);
   bool handleJTSwitchCase(CaseRec& CR,
                           CaseRecVector& WorkList,
-                          Value* SV,
-                          MachineBasicBlock* Default);
+                          const Value* SV,
+                          MachineBasicBlock* Default,
+                          MachineBasicBlock *SwitchBB);
   bool handleBTSplitSwitchCase(CaseRec& CR,
                                CaseRecVector& WorkList,
-                               Value* SV,
-                               MachineBasicBlock* Default);
+                               const Value* SV,
+                               MachineBasicBlock* Default,
+                               MachineBasicBlock *SwitchBB);
   bool handleBitTestsSwitchCase(CaseRec& CR,
                                 CaseRecVector& WorkList,
-                                Value* SV,
-                                MachineBasicBlock* Default);  
+                                const Value* SV,
+                                MachineBasicBlock* Default,
+                                MachineBasicBlock *SwitchBB);
 public:
-  void visitSwitchCase(CaseBlock &CB);
-  void visitBitTestHeader(BitTestBlock &B);
+  void visitSwitchCase(CaseBlock &CB,
+                       MachineBasicBlock *SwitchBB);
+  void visitBitTestHeader(BitTestBlock &B, MachineBasicBlock *SwitchBB);
   void visitBitTestCase(MachineBasicBlock* NextMBB,
                         unsigned Reg,
-                        BitTestCase &B);
+                        BitTestCase &B,
+                        MachineBasicBlock *SwitchBB);
   void visitJumpTable(JumpTable &JT);
-  void visitJumpTableHeader(JumpTable &JT, JumpTableHeader &JTH);
+  void visitJumpTableHeader(JumpTable &JT, JumpTableHeader &JTH,
+                            MachineBasicBlock *SwitchBB);
   
 private:
   // These all get lowered before this pass.
-  void visitInvoke(InvokeInst &I);
-  void visitUnwind(UnwindInst &I);
-
-  void visitBinary(User &I, unsigned OpCode);
-  void visitShift(User &I, unsigned Opcode);
-  void visitAdd(User &I)  { visitBinary(I, ISD::ADD); }
-  void visitFAdd(User &I) { visitBinary(I, ISD::FADD); }
-  void visitSub(User &I)  { visitBinary(I, ISD::SUB); }
-  void visitFSub(User &I);
-  void visitMul(User &I)  { visitBinary(I, ISD::MUL); }
-  void visitFMul(User &I) { visitBinary(I, ISD::FMUL); }
-  void visitURem(User &I) { visitBinary(I, ISD::UREM); }
-  void visitSRem(User &I) { visitBinary(I, ISD::SREM); }
-  void visitFRem(User &I) { visitBinary(I, ISD::FREM); }
-  void visitUDiv(User &I) { visitBinary(I, ISD::UDIV); }
-  void visitSDiv(User &I) { visitBinary(I, ISD::SDIV); }
-  void visitFDiv(User &I) { visitBinary(I, ISD::FDIV); }
-  void visitAnd (User &I) { visitBinary(I, ISD::AND); }
-  void visitOr  (User &I) { visitBinary(I, ISD::OR); }
-  void visitXor (User &I) { visitBinary(I, ISD::XOR); }
-  void visitShl (User &I) { visitShift(I, ISD::SHL); }
-  void visitLShr(User &I) { visitShift(I, ISD::SRL); }
-  void visitAShr(User &I) { visitShift(I, ISD::SRA); }
-  void visitICmp(User &I);
-  void visitFCmp(User &I);
+  void visitInvoke(const InvokeInst &I);
+  void visitUnwind(const UnwindInst &I);
+
+  void visitBinary(const User &I, unsigned OpCode);
+  void visitShift(const User &I, unsigned Opcode);
+  void visitAdd(const User &I)  { visitBinary(I, ISD::ADD); }
+  void visitFAdd(const User &I) { visitBinary(I, ISD::FADD); }
+  void visitSub(const User &I)  { visitBinary(I, ISD::SUB); }
+  void visitFSub(const User &I);
+  void visitMul(const User &I)  { visitBinary(I, ISD::MUL); }
+  void visitFMul(const User &I) { visitBinary(I, ISD::FMUL); }
+  void visitURem(const User &I) { visitBinary(I, ISD::UREM); }
+  void visitSRem(const User &I) { visitBinary(I, ISD::SREM); }
+  void visitFRem(const User &I) { visitBinary(I, ISD::FREM); }
+  void visitUDiv(const User &I) { visitBinary(I, ISD::UDIV); }
+  void visitSDiv(const User &I) { visitBinary(I, ISD::SDIV); }
+  void visitFDiv(const User &I) { visitBinary(I, ISD::FDIV); }
+  void visitAnd (const User &I) { visitBinary(I, ISD::AND); }
+  void visitOr  (const User &I) { visitBinary(I, ISD::OR); }
+  void visitXor (const User &I) { visitBinary(I, ISD::XOR); }
+  void visitShl (const User &I) { visitShift(I, ISD::SHL); }
+  void visitLShr(const User &I) { visitShift(I, ISD::SRL); }
+  void visitAShr(const User &I) { visitShift(I, ISD::SRA); }
+  void visitICmp(const User &I);
+  void visitFCmp(const User &I);
   // Visit the conversion instructions
-  void visitTrunc(User &I);
-  void visitZExt(User &I);
-  void visitSExt(User &I);
-  void visitFPTrunc(User &I);
-  void visitFPExt(User &I);
-  void visitFPToUI(User &I);
-  void visitFPToSI(User &I);
-  void visitUIToFP(User &I);
-  void visitSIToFP(User &I);
-  void visitPtrToInt(User &I);
-  void visitIntToPtr(User &I);
-  void visitBitCast(User &I);
-
-  void visitExtractElement(User &I);
-  void visitInsertElement(User &I);
-  void visitShuffleVector(User &I);
-
-  void visitExtractValue(ExtractValueInst &I);
-  void visitInsertValue(InsertValueInst &I);
-
-  void visitGetElementPtr(User &I);
-  void visitSelect(User &I);
-
-  void visitAlloca(AllocaInst &I);
-  void visitLoad(LoadInst &I);
-  void visitStore(StoreInst &I);
-  void visitPHI(PHINode &I) { } // PHI nodes are handled specially.
-  void visitCall(CallInst &I);
-  bool visitMemCmpCall(CallInst &I);
+  void visitTrunc(const User &I);
+  void visitZExt(const User &I);
+  void visitSExt(const User &I);
+  void visitFPTrunc(const User &I);
+  void visitFPExt(const User &I);
+  void visitFPToUI(const User &I);
+  void visitFPToSI(const User &I);
+  void visitUIToFP(const User &I);
+  void visitSIToFP(const User &I);
+  void visitPtrToInt(const User &I);
+  void visitIntToPtr(const User &I);
+  void visitBitCast(const User &I);
+
+  void visitExtractElement(const User &I);
+  void visitInsertElement(const User &I);
+  void visitShuffleVector(const User &I);
+
+  void visitExtractValue(const ExtractValueInst &I);
+  void visitInsertValue(const InsertValueInst &I);
+
+  void visitGetElementPtr(const User &I);
+  void visitSelect(const User &I);
+
+  void visitAlloca(const AllocaInst &I);
+  void visitLoad(const LoadInst &I);
+  void visitStore(const StoreInst &I);
+  void visitPHI(const PHINode &I);
+  void visitCall(const CallInst &I);
+  bool visitMemCmpCall(const CallInst &I);
   
-  void visitInlineAsm(CallSite CS);
-  const char *visitIntrinsicCall(CallInst &I, unsigned Intrinsic);
-  void visitTargetIntrinsic(CallInst &I, unsigned Intrinsic);
-
-  void visitPow(CallInst &I);
-  void visitExp2(CallInst &I);
-  void visitExp(CallInst &I);
-  void visitLog(CallInst &I);
-  void visitLog2(CallInst &I);
-  void visitLog10(CallInst &I);
-
-  void visitVAStart(CallInst &I);
-  void visitVAArg(VAArgInst &I);
-  void visitVAEnd(CallInst &I);
-  void visitVACopy(CallInst &I);
-
-  void visitUserOp1(Instruction &I) {
+  void visitInlineAsm(ImmutableCallSite CS);
+  const char *visitIntrinsicCall(const CallInst &I, unsigned Intrinsic);
+  void visitTargetIntrinsic(const CallInst &I, unsigned Intrinsic);
+
+  void visitPow(const CallInst &I);
+  void visitExp2(const CallInst &I);
+  void visitExp(const CallInst &I);
+  void visitLog(const CallInst &I);
+  void visitLog2(const CallInst &I);
+  void visitLog10(const CallInst &I);
+
+  void visitVAStart(const CallInst &I);
+  void visitVAArg(const VAArgInst &I);
+  void visitVAEnd(const CallInst &I);
+  void visitVACopy(const CallInst &I);
+
+  void visitUserOp1(const Instruction &I) {
     llvm_unreachable("UserOp1 should not exist at instruction selection time!");
   }
-  void visitUserOp2(Instruction &I) {
+  void visitUserOp2(const Instruction &I) {
     llvm_unreachable("UserOp2 should not exist at instruction selection time!");
   }
   
-  const char *implVisitBinaryAtomic(CallInst& I, ISD::NodeType Op);
-  const char *implVisitAluOverflow(CallInst &I, ISD::NodeType Op);
+  const char *implVisitBinaryAtomic(const CallInst& I, ISD::NodeType Op);
+  const char *implVisitAluOverflow(const CallInst &I, ISD::NodeType Op);
+
+  void HandlePHINodesInSuccessorBlocks(const BasicBlock *LLVMBB);
+
+  /// EmitFuncArgumentDbgValue - If the DbgValueInst is a dbg_value of a
+  /// function argument, create the corresponding DBG_VALUE machine instruction
+  /// for it now. At the end of instruction selection, they will be inserted to
+  /// the entry BB.
+  bool EmitFuncArgumentDbgValue(const DbgValueInst &DI,
+                                const Value *V, MDNode *Variable,
+                                uint64_t Offset, const SDValue &N);
 };
 
 } // end namespace llvm
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
index 9b137a52f7c3..422cb7aaafc7 100644
--- a/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
+++ b/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
@@ -19,10 +19,7 @@
 #include "llvm/Analysis/AliasAnalysis.h"
 #include "llvm/Analysis/DebugInfo.h"
 #include "llvm/Constants.h"
-#include "llvm/CallingConv.h"
-#include "llvm/DerivedTypes.h"
 #include "llvm/Function.h"
-#include "llvm/GlobalVariable.h"
 #include "llvm/InlineAsm.h"
 #include "llvm/Instructions.h"
 #include "llvm/Intrinsics.h"
@@ -32,18 +29,13 @@
 #include "llvm/CodeGen/GCStrategy.h"
 #include "llvm/CodeGen/GCMetadata.h"
 #include "llvm/CodeGen/MachineFunction.h"
-#include "llvm/CodeGen/MachineFunctionAnalysis.h"
-#include "llvm/CodeGen/MachineFrameInfo.h"
 #include "llvm/CodeGen/MachineInstrBuilder.h"
-#include "llvm/CodeGen/MachineJumpTableInfo.h"
 #include "llvm/CodeGen/MachineModuleInfo.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
 #include "llvm/CodeGen/ScheduleHazardRecognizer.h"
 #include "llvm/CodeGen/SchedulerRegistry.h"
 #include "llvm/CodeGen/SelectionDAG.h"
 #include "llvm/Target/TargetRegisterInfo.h"
-#include "llvm/Target/TargetData.h"
-#include "llvm/Target/TargetFrameInfo.h"
 #include "llvm/Target/TargetIntrinsicInfo.h"
 #include "llvm/Target/TargetInstrInfo.h"
 #include "llvm/Target/TargetLowering.h"
@@ -52,7 +44,6 @@
 #include "llvm/Support/Compiler.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/ErrorHandling.h"
-#include "llvm/Support/MathExtras.h"
 #include "llvm/Support/Timer.h"
 #include "llvm/Support/raw_ostream.h"
 #include "llvm/ADT/Statistic.h"
@@ -69,10 +60,6 @@ EnableFastISelVerbose("fast-isel-verbose", cl::Hidden,
 static cl::opt<bool>
 EnableFastISelAbort("fast-isel-abort", cl::Hidden,
           cl::desc("Enable abort calls when \"fast\" instruction fails"));
-static cl::opt<bool>
-SchedLiveInCopies("schedule-livein-copies", cl::Hidden,
-                  cl::desc("Schedule copies of livein registers"),
-                  cl::init(false));
 
 #ifndef NDEBUG
 static cl::opt<bool>
@@ -161,9 +148,9 @@ namespace llvm {
 // When new basic blocks are inserted and the edges from MBB to its successors
 // are modified, the method should insert pairs of <OldSucc, NewSucc> into the
 // DenseMap.
-MachineBasicBlock *TargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
-                                                         MachineBasicBlock *MBB,
-                   DenseMap<MachineBasicBlock*, MachineBasicBlock*> *EM) const {
+MachineBasicBlock *
+TargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
+                                            MachineBasicBlock *MBB) const {
 #ifndef NDEBUG
   dbgs() << "If a target marks an instruction with "
           "'usesCustomInserter', it must implement "
@@ -173,115 +160,15 @@ MachineBasicBlock *TargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
   return 0;
 }
 
-/// EmitLiveInCopy - Emit a copy for a live in physical register. If the
-/// physical register has only a single copy use, then coalesced the copy
-/// if possible.
-static void EmitLiveInCopy(MachineBasicBlock *MBB,
-                           MachineBasicBlock::iterator &InsertPos,
-                           unsigned VirtReg, unsigned PhysReg,
-                           const TargetRegisterClass *RC,
-                           DenseMap<MachineInstr*, unsigned> &CopyRegMap,
-                           const MachineRegisterInfo &MRI,
-                           const TargetRegisterInfo &TRI,
-                           const TargetInstrInfo &TII) {
-  unsigned NumUses = 0;
-  MachineInstr *UseMI = NULL;
-  for (MachineRegisterInfo::use_iterator UI = MRI.use_begin(VirtReg),
-         UE = MRI.use_end(); UI != UE; ++UI) {
-    UseMI = &*UI;
-    if (++NumUses > 1)
-      break;
-  }
-
-  // If the number of uses is not one, or the use is not a move instruction,
-  // don't coalesce. Also, only coalesce away a virtual register to virtual
-  // register copy.
-  bool Coalesced = false;
-  unsigned SrcReg, DstReg, SrcSubReg, DstSubReg;
-  if (NumUses == 1 &&
-      TII.isMoveInstr(*UseMI, SrcReg, DstReg, SrcSubReg, DstSubReg) &&
-      TargetRegisterInfo::isVirtualRegister(DstReg)) {
-    VirtReg = DstReg;
-    Coalesced = true;
-  }
-
-  // Now find an ideal location to insert the copy.
-  MachineBasicBlock::iterator Pos = InsertPos;
-  while (Pos != MBB->begin()) {
-    MachineInstr *PrevMI = prior(Pos);
-    DenseMap<MachineInstr*, unsigned>::iterator RI = CopyRegMap.find(PrevMI);
-    // copyRegToReg might emit multiple instructions to do a copy.
-    unsigned CopyDstReg = (RI == CopyRegMap.end()) ? 0 : RI->second;
-    if (CopyDstReg && !TRI.regsOverlap(CopyDstReg, PhysReg))
-      // This is what the BB looks like right now:
-      // r1024 = mov r0
-      // ...
-      // r1    = mov r1024
-      //
-      // We want to insert "r1025 = mov r1". Inserting this copy below the
-      // move to r1024 makes it impossible for that move to be coalesced.
-      //
-      // r1025 = mov r1
-      // r1024 = mov r0
-      // ...
-      // r1    = mov 1024
-      // r2    = mov 1025
-      break; // Woot! Found a good location.
-    --Pos;
-  }
-
-  bool Emitted = TII.copyRegToReg(*MBB, Pos, VirtReg, PhysReg, RC, RC);
-  assert(Emitted && "Unable to issue a live-in copy instruction!\n");
-  (void) Emitted;
-
-  CopyRegMap.insert(std::make_pair(prior(Pos), VirtReg));
-  if (Coalesced) {
-    if (&*InsertPos == UseMI) ++InsertPos;
-    MBB->erase(UseMI);
-  }
-}
-
-/// EmitLiveInCopies - If this is the first basic block in the function,
-/// and if it has live ins that need to be copied into vregs, emit the
-/// copies into the block.
-static void EmitLiveInCopies(MachineBasicBlock *EntryMBB,
-                             const MachineRegisterInfo &MRI,
-                             const TargetRegisterInfo &TRI,
-                             const TargetInstrInfo &TII) {
-  if (SchedLiveInCopies) {
-    // Emit the copies at a heuristically-determined location in the block.
-    DenseMap<MachineInstr*, unsigned> CopyRegMap;
-    MachineBasicBlock::iterator InsertPos = EntryMBB->begin();
-    for (MachineRegisterInfo::livein_iterator LI = MRI.livein_begin(),
-           E = MRI.livein_end(); LI != E; ++LI)
-      if (LI->second) {
-        const TargetRegisterClass *RC = MRI.getRegClass(LI->second);
-        EmitLiveInCopy(EntryMBB, InsertPos, LI->second, LI->first,
-                       RC, CopyRegMap, MRI, TRI, TII);
-      }
-  } else {
-    // Emit the copies into the top of the block.
-    for (MachineRegisterInfo::livein_iterator LI = MRI.livein_begin(),
-           E = MRI.livein_end(); LI != E; ++LI)
-      if (LI->second) {
-        const TargetRegisterClass *RC = MRI.getRegClass(LI->second);
-        bool Emitted = TII.copyRegToReg(*EntryMBB, EntryMBB->begin(),
-                                        LI->second, LI->first, RC, RC);
-        assert(Emitted && "Unable to issue a live-in copy instruction!\n");
-        (void) Emitted;
-      }
-  }
-}
-
 //===----------------------------------------------------------------------===//
 // SelectionDAGISel code
 //===----------------------------------------------------------------------===//
 
-SelectionDAGISel::SelectionDAGISel(TargetMachine &tm, CodeGenOpt::Level OL) :
+SelectionDAGISel::SelectionDAGISel(const TargetMachine &tm, CodeGenOpt::Level OL) :
   MachineFunctionPass(&ID), TM(tm), TLI(*tm.getTargetLowering()),
   FuncInfo(new FunctionLoweringInfo(TLI)),
-  CurDAG(new SelectionDAG(TLI, *FuncInfo)),
-  SDB(new SelectionDAGBuilder(*CurDAG, TLI, *FuncInfo, OL)),
+  CurDAG(new SelectionDAG(tm, *FuncInfo)),
+  SDB(new SelectionDAGBuilder(*CurDAG, *FuncInfo, OL)),
   GFI(),
   OptLevel(OL),
   DAGSize(0)
@@ -293,10 +180,6 @@ SelectionDAGISel::~SelectionDAGISel() {
   delete FuncInfo;
 }
 
-unsigned SelectionDAGISel::MakeReg(EVT VT) {
-  return RegInfo->createVirtualRegister(TLI.getRegClassFor(VT));
-}
-
 void SelectionDAGISel::getAnalysisUsage(AnalysisUsage &AU) const {
   AU.addRequired<AliasAnalysis>();
   AU.addPreserved<AliasAnalysis>();
@@ -306,129 +189,75 @@ void SelectionDAGISel::getAnalysisUsage(AnalysisUsage &AU) const {
 }
 
 bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) {
-  Function &Fn = *mf.getFunction();
-
   // Do some sanity-checking on the command-line options.
   assert((!EnableFastISelVerbose || EnableFastISel) &&
          "-fast-isel-verbose requires -fast-isel");
   assert((!EnableFastISelAbort || EnableFastISel) &&
          "-fast-isel-abort requires -fast-isel");
 
-  // Get alias analysis for load/store combining.
-  AA = &getAnalysis<AliasAnalysis>();
-
-  MF = &mf;
+  const Function &Fn = *mf.getFunction();
   const TargetInstrInfo &TII = *TM.getInstrInfo();
   const TargetRegisterInfo &TRI = *TM.getRegisterInfo();
 
-  if (Fn.hasGC())
-    GFI = &getAnalysis<GCModuleInfo>().getFunctionInfo(Fn);
-  else
-    GFI = 0;
+  MF = &mf;
   RegInfo = &MF->getRegInfo();
+  AA = &getAnalysis<AliasAnalysis>();
+  GFI = Fn.hasGC() ? &getAnalysis<GCModuleInfo>().getFunctionInfo(Fn) : 0;
+
   DEBUG(dbgs() << "\n\n\n=== " << Fn.getName() << "\n");
 
   CurDAG->init(*MF);
   FuncInfo->set(Fn, *MF, EnableFastISel);
   SDB->init(GFI, *AA);
 
-  for (Function::iterator I = Fn.begin(), E = Fn.end(); I != E; ++I)
-    if (InvokeInst *Invoke = dyn_cast<InvokeInst>(I->getTerminator()))
-      // Mark landing pad.
-      FuncInfo->MBBMap[Invoke->getSuccessor(1)]->setIsLandingPad();
-
-  SelectAllBasicBlocks(Fn, *MF, TII);
+  SelectAllBasicBlocks(Fn);
 
   // If the first basic block in the function has live ins that need to be
   // copied into vregs, emit the copies into the top of the block before
   // emitting the code for the block.
-  EmitLiveInCopies(MF->begin(), *RegInfo, TRI, TII);
-
-  // Add function live-ins to entry block live-in set.
-  for (MachineRegisterInfo::livein_iterator I = RegInfo->livein_begin(),
-         E = RegInfo->livein_end(); I != E; ++I)
-    MF->begin()->addLiveIn(I->first);
-
-#ifndef NDEBUG
-  assert(FuncInfo->CatchInfoFound.size() == FuncInfo->CatchInfoLost.size() &&
-         "Not all catch info was assigned to a landing pad!");
-#endif
+  MachineBasicBlock *EntryMBB = MF->begin();
+  RegInfo->EmitLiveInCopies(EntryMBB, TRI, TII);
+
+  // Insert DBG_VALUE instructions for function arguments to the entry block.
+  for (unsigned i = 0, e = FuncInfo->ArgDbgValues.size(); i != e; ++i) {
+    MachineInstr *MI = FuncInfo->ArgDbgValues[e-i-1];
+    unsigned Reg = MI->getOperand(0).getReg();
+    if (TargetRegisterInfo::isPhysicalRegister(Reg))
+      EntryMBB->insert(EntryMBB->begin(), MI);
+    else {
+      MachineInstr *Def = RegInfo->getVRegDef(Reg);
+      MachineBasicBlock::iterator InsertPos = Def;
+      // FIXME: VR def may not be in entry block.
+      Def->getParent()->insert(llvm::next(InsertPos), MI);
+    }
+  }
 
+  // Release function-specific state. SDB and CurDAG are already cleared
+  // at this point.
   FuncInfo->clear();
 
   return true;
 }
 
-/// SetDebugLoc - Update MF's and SDB's DebugLocs if debug information is
-/// attached with this instruction.
-static void SetDebugLoc(Instruction *I, SelectionDAGBuilder *SDB,
-                        FastISel *FastIS, MachineFunction *MF) {
-  DebugLoc DL = I->getDebugLoc();
-  if (DL.isUnknown()) return;
-  
-  SDB->setCurDebugLoc(DL);
-
-  if (FastIS)
-    FastIS->setCurDebugLoc(DL);
-
-  // If the function doesn't have a default debug location yet, set
-  // it. This is kind of a hack.
-  if (MF->getDefaultDebugLoc().isUnknown())
-    MF->setDefaultDebugLoc(DL);
-}
-
-/// ResetDebugLoc - Set MF's and SDB's DebugLocs to Unknown.
-static void ResetDebugLoc(SelectionDAGBuilder *SDB, FastISel *FastIS) {
-  SDB->setCurDebugLoc(DebugLoc());
-  if (FastIS)
-    FastIS->setCurDebugLoc(DebugLoc());
-}
-
-void SelectionDAGISel::SelectBasicBlock(BasicBlock *LLVMBB,
-                                        BasicBlock::iterator Begin,
-                                        BasicBlock::iterator End,
-                                        bool &HadTailCall) {
-  SDB->setCurrentBasicBlock(BB);
-
+MachineBasicBlock *
+SelectionDAGISel::SelectBasicBlock(MachineBasicBlock *BB,
+                                   const BasicBlock *LLVMBB,
+                                   BasicBlock::const_iterator Begin,
+                                   BasicBlock::const_iterator End,
+                                   bool &HadTailCall) {
   // Lower all of the non-terminator instructions. If a call is emitted
-  // as a tail call, cease emitting nodes for this block.
-  for (BasicBlock::iterator I = Begin; I != End && !SDB->HasTailCall; ++I) {
-    SetDebugLoc(I, SDB, 0, MF);
-
-    if (!isa<TerminatorInst>(I)) {
-      SDB->visit(*I);
-
-      // Set the current debug location back to "unknown" so that it doesn't
-      // spuriously apply to subsequent instructions.
-      ResetDebugLoc(SDB, 0);
-    }
-  }
-
-  if (!SDB->HasTailCall) {
-    // Ensure that all instructions which are used outside of their defining
-    // blocks are available as virtual registers.  Invoke is handled elsewhere.
-    for (BasicBlock::iterator I = Begin; I != End; ++I)
-      if (!isa<PHINode>(I) && !isa<InvokeInst>(I))
-        SDB->CopyToExportRegsIfNeeded(I);
-
-    // Handle PHI nodes in successor blocks.
-    if (End == LLVMBB->end()) {
-      HandlePHINodesInSuccessorBlocks(LLVMBB);
-
-      // Lower the terminator after the copies are emitted.
-      SetDebugLoc(LLVMBB->getTerminator(), SDB, 0, MF);
-      SDB->visit(*LLVMBB->getTerminator());
-      ResetDebugLoc(SDB, 0);
-    }
-  }
+  // as a tail call, cease emitting nodes for this block. Terminators
+  // are handled below.
+  for (BasicBlock::const_iterator I = Begin; I != End && !SDB->HasTailCall; ++I)
+    SDB->visit(*I);
 
   // Make sure the root of the DAG is up-to-date.
   CurDAG->setRoot(SDB->getControlRoot());
-
-  // Final step, emit the lowered DAG as machine code.
-  CodeGenAndEmitDAG();
   HadTailCall = SDB->HasTailCall;
   SDB->clear();
+
+  // Final step, emit the lowered DAG as machine code.
+  return CodeGenAndEmitDAG(BB);
 }
 
 namespace {
@@ -493,7 +322,7 @@ void SelectionDAGISel::ShrinkDemandedOps() {
     InWorklist.insert(I);
   }
 
-  TargetLowering::TargetLoweringOpt TLO(*CurDAG, true);
+  TargetLowering::TargetLoweringOpt TLO(*CurDAG, true, true, true);
   while (!Worklist.empty()) {
     SDNode *N = Worklist.pop_back_val();
     InWorklist.erase(N);
@@ -613,7 +442,7 @@ void SelectionDAGISel::ComputeLiveOutVRegInfo() {
   } while (!Worklist.empty());
 }
 
-void SelectionDAGISel::CodeGenAndEmitDAG() {
+MachineBasicBlock *SelectionDAGISel::CodeGenAndEmitDAG(MachineBasicBlock *BB) {
   std::string GroupName;
   if (TimePassesIsEnabled)
     GroupName = "Instruction Selection and Scheduling";
@@ -763,9 +592,9 @@ void SelectionDAGISel::CodeGenAndEmitDAG() {
   // inserted into.
   if (TimePassesIsEnabled) {
     NamedRegionTimer T("Instruction Creation", GroupName);
-    BB = Scheduler->EmitSchedule(&SDB->EdgeMapping);
+    BB = Scheduler->EmitSchedule();
   } else {
-    BB = Scheduler->EmitSchedule(&SDB->EdgeMapping);
+    BB = Scheduler->EmitSchedule();
   }
 
   // Free the scheduler state.
@@ -776,8 +605,10 @@ void SelectionDAGISel::CodeGenAndEmitDAG() {
     delete Scheduler;
   }
 
-  DEBUG(dbgs() << "Selected machine code:\n");
-  DEBUG(BB->dump());
+  // Free the SelectionDAG state, now that we're finished with it.
+  CurDAG->clear();
+
+  return BB;
 }
 
 void SelectionDAGISel::DoInstructionSelection() {
@@ -836,128 +667,94 @@ void SelectionDAGISel::DoInstructionSelection() {
   PostprocessISelDAG();
 }
 
+/// PrepareEHLandingPad - Emit an EH_LABEL, set up live-in registers, and
+/// do other setup for EH landing-pad blocks.
+void SelectionDAGISel::PrepareEHLandingPad(MachineBasicBlock *BB) {
+  // Add a label to mark the beginning of the landing pad.  Deletion of the
+  // landing pad can thus be detected via the MachineModuleInfo.
+  MCSymbol *Label = MF->getMMI().addLandingPad(BB);
+
+  const TargetInstrDesc &II = TM.getInstrInfo()->get(TargetOpcode::EH_LABEL);
+  BuildMI(BB, SDB->getCurDebugLoc(), II).addSym(Label);
+
+  // Mark exception register as live in.
+  unsigned Reg = TLI.getExceptionAddressRegister();
+  if (Reg) BB->addLiveIn(Reg);
+
+  // Mark exception selector register as live in.
+  Reg = TLI.getExceptionSelectorRegister();
+  if (Reg) BB->addLiveIn(Reg);
+
+  // FIXME: Hack around an exception handling flaw (PR1508): the personality
+  // function and list of typeids logically belong to the invoke (or, if you
+  // like, the basic block containing the invoke), and need to be associated
+  // with it in the dwarf exception handling tables.  Currently however the
+  // information is provided by an intrinsic (eh.selector) that can be moved
+  // to unexpected places by the optimizers: if the unwind edge is critical,
+  // then breaking it can result in the intrinsics being in the successor of
+  // the landing pad, not the landing pad itself.  This results
+  // in exceptions not being caught because no typeids are associated with
+  // the invoke.  This may not be the only way things can go wrong, but it
+  // is the only way we try to work around for the moment.
+  const BasicBlock *LLVMBB = BB->getBasicBlock();
+  const BranchInst *Br = dyn_cast<BranchInst>(LLVMBB->getTerminator());
+
+  if (Br && Br->isUnconditional()) { // Critical edge?
+    BasicBlock::const_iterator I, E;
+    for (I = LLVMBB->begin(), E = --LLVMBB->end(); I != E; ++I)
+      if (isa<EHSelectorInst>(I))
+        break;
 
-void SelectionDAGISel::SelectAllBasicBlocks(Function &Fn,
-                                            MachineFunction &MF,
-                                            const TargetInstrInfo &TII) {
+    if (I == E)
+      // No catch info found - try to extract some from the successor.
+      CopyCatchInfo(Br->getSuccessor(0), LLVMBB, &MF->getMMI(), *FuncInfo);
+  }
+}
+
+void SelectionDAGISel::SelectAllBasicBlocks(const Function &Fn) {
   // Initialize the Fast-ISel state, if needed.
   FastISel *FastIS = 0;
   if (EnableFastISel)
-    FastIS = TLI.createFastISel(MF, FuncInfo->ValueMap, FuncInfo->MBBMap,
-                                FuncInfo->StaticAllocaMap
+    FastIS = TLI.createFastISel(*MF, FuncInfo->ValueMap, FuncInfo->MBBMap,
+                                FuncInfo->StaticAllocaMap,
+                                FuncInfo->PHINodesToUpdate
 #ifndef NDEBUG
                                 , FuncInfo->CatchInfoLost
 #endif
                                 );
 
   // Iterate over all basic blocks in the function.
-  for (Function::iterator I = Fn.begin(), E = Fn.end(); I != E; ++I) {
-    BasicBlock *LLVMBB = &*I;
-    BB = FuncInfo->MBBMap[LLVMBB];
+  for (Function::const_iterator I = Fn.begin(), E = Fn.end(); I != E; ++I) {
+    const BasicBlock *LLVMBB = &*I;
+    MachineBasicBlock *BB = FuncInfo->MBBMap[LLVMBB];
 
-    BasicBlock::iterator const Begin = LLVMBB->begin();
-    BasicBlock::iterator const End = LLVMBB->end();
-    BasicBlock::iterator BI = Begin;
+    BasicBlock::const_iterator const Begin = LLVMBB->getFirstNonPHI();
+    BasicBlock::const_iterator const End = LLVMBB->end();
+    BasicBlock::const_iterator BI = Begin;
 
     // Lower any arguments needed in this block if this is the entry block.
-    bool SuppressFastISel = false;
-    if (LLVMBB == &Fn.getEntryBlock()) {
+    if (LLVMBB == &Fn.getEntryBlock())
       LowerArguments(LLVMBB);
 
-      // If any of the arguments has the byval attribute, forgo
-      // fast-isel in the entry block.
-      if (FastIS) {
-        unsigned j = 1;
-        for (Function::arg_iterator I = Fn.arg_begin(), E = Fn.arg_end();
-             I != E; ++I, ++j)
-          if (Fn.paramHasAttr(j, Attribute::ByVal)) {
-            if (EnableFastISelVerbose || EnableFastISelAbort)
-              dbgs() << "FastISel skips entry block due to byval argument\n";
-            SuppressFastISel = true;
-            break;
-          }
-      }
-    }
-
-    if (BB->isLandingPad()) {
-      // Add a label to mark the beginning of the landing pad.  Deletion of the
-      // landing pad can thus be detected via the MachineModuleInfo.
-      MCSymbol *Label = MF.getMMI().addLandingPad(BB);
-
-      const TargetInstrDesc &II = TII.get(TargetOpcode::EH_LABEL);
-      BuildMI(BB, SDB->getCurDebugLoc(), II).addSym(Label);
-
-      // Mark exception register as live in.
-      unsigned Reg = TLI.getExceptionAddressRegister();
-      if (Reg) BB->addLiveIn(Reg);
-
-      // Mark exception selector register as live in.
-      Reg = TLI.getExceptionSelectorRegister();
-      if (Reg) BB->addLiveIn(Reg);
-
-      // FIXME: Hack around an exception handling flaw (PR1508): the personality
-      // function and list of typeids logically belong to the invoke (or, if you
-      // like, the basic block containing the invoke), and need to be associated
-      // with it in the dwarf exception handling tables.  Currently however the
-      // information is provided by an intrinsic (eh.selector) that can be moved
-      // to unexpected places by the optimizers: if the unwind edge is critical,
-      // then breaking it can result in the intrinsics being in the successor of
-      // the landing pad, not the landing pad itself.  This results
-      // in exceptions not being caught because no typeids are associated with
-      // the invoke.  This may not be the only way things can go wrong, but it
-      // is the only way we try to work around for the moment.
-      BranchInst *Br = dyn_cast<BranchInst>(LLVMBB->getTerminator());
-
-      if (Br && Br->isUnconditional()) { // Critical edge?
-        BasicBlock::iterator I, E;
-        for (I = LLVMBB->begin(), E = --LLVMBB->end(); I != E; ++I)
-          if (isa<EHSelectorInst>(I))
-            break;
-
-        if (I == E)
-          // No catch info found - try to extract some from the successor.
-          CopyCatchInfo(Br->getSuccessor(0), LLVMBB, &MF.getMMI(), *FuncInfo);
-      }
-    }
-
+    // Setup an EH landing-pad block.
+    if (BB->isLandingPad())
+      PrepareEHLandingPad(BB);
+    
     // Before doing SelectionDAG ISel, see if FastISel has been requested.
-    if (FastIS && !SuppressFastISel) {
+    if (FastIS) {
       // Emit code for any incoming arguments. This must happen before
       // beginning FastISel on the entry block.
       if (LLVMBB == &Fn.getEntryBlock()) {
         CurDAG->setRoot(SDB->getControlRoot());
-        CodeGenAndEmitDAG();
         SDB->clear();
+        BB = CodeGenAndEmitDAG(BB);
       }
       FastIS->startNewBlock(BB);
       // Do FastISel on as many instructions as possible.
       for (; BI != End; ++BI) {
-        // Just before the terminator instruction, insert instructions to
-        // feed PHI nodes in successor blocks.
-        if (isa<TerminatorInst>(BI))
-          if (!HandlePHINodesInSuccessorBlocksFast(LLVMBB, FastIS)) {
-            ++NumFastIselFailures;
-            ResetDebugLoc(SDB, FastIS);
-            if (EnableFastISelVerbose || EnableFastISelAbort) {
-              dbgs() << "FastISel miss: ";
-              BI->dump();
-            }
-            assert(!EnableFastISelAbort &&
-                   "FastISel didn't handle a PHI in a successor");
-            break;
-          }
-
-        SetDebugLoc(BI, SDB, FastIS, &MF);
-
         // Try to select the instruction with FastISel.
-        if (FastIS->SelectInstruction(BI)) {
-          ResetDebugLoc(SDB, FastIS);
+        if (FastIS->SelectInstruction(BI))
           continue;
-        }
-
-        // Clear out the debug location so that it doesn't carry over to
-        // unrelated instructions.
-        ResetDebugLoc(SDB, FastIS);
 
         // Then handle certain instructions as single-LLVM-Instruction blocks.
         if (isa<CallInst>(BI)) {
@@ -967,14 +764,14 @@ void SelectionDAGISel::SelectAllBasicBlocks(Function &Fn,
             BI->dump();
           }
 
-          if (!BI->getType()->isVoidTy()) {
+          if (!BI->getType()->isVoidTy() && !BI->use_empty()) {
             unsigned &R = FuncInfo->ValueMap[BI];
             if (!R)
               R = FuncInfo->CreateRegForValue(BI);
           }
 
           bool HadTailCall = false;
-          SelectBasicBlock(LLVMBB, BI, llvm::next(BI), HadTailCall);
+          BB = SelectBasicBlock(BB, LLVMBB, BI, llvm::next(BI), HadTailCall);
 
           // If the call was emitted as a tail call, we're done with the block.
           if (HadTailCall) {
@@ -1010,44 +807,41 @@ void SelectionDAGISel::SelectAllBasicBlocks(Function &Fn,
     // block.
     if (BI != End) {
       bool HadTailCall;
-      SelectBasicBlock(LLVMBB, BI, End, HadTailCall);
+      BB = SelectBasicBlock(BB, LLVMBB, BI, End, HadTailCall);
     }
 
-    FinishBasicBlock();
+    FinishBasicBlock(BB);
+    FuncInfo->PHINodesToUpdate.clear();
   }
 
   delete FastIS;
 }
 
 void
-SelectionDAGISel::FinishBasicBlock() {
-
-  DEBUG(dbgs() << "Target-post-processed machine code:\n");
-  DEBUG(BB->dump());
+SelectionDAGISel::FinishBasicBlock(MachineBasicBlock *BB) {
 
   DEBUG(dbgs() << "Total amount of phi nodes to update: "
-               << SDB->PHINodesToUpdate.size() << "\n");
-  DEBUG(for (unsigned i = 0, e = SDB->PHINodesToUpdate.size(); i != e; ++i)
+               << FuncInfo->PHINodesToUpdate.size() << "\n");
+  DEBUG(for (unsigned i = 0, e = FuncInfo->PHINodesToUpdate.size(); i != e; ++i)
           dbgs() << "Node " << i << " : ("
-                 << SDB->PHINodesToUpdate[i].first
-                 << ", " << SDB->PHINodesToUpdate[i].second << ")\n");
+                 << FuncInfo->PHINodesToUpdate[i].first
+                 << ", " << FuncInfo->PHINodesToUpdate[i].second << ")\n");
 
   // Next, now that we know what the last MBB the LLVM BB expanded is, update
   // PHI nodes in successors.
   if (SDB->SwitchCases.empty() &&
       SDB->JTCases.empty() &&
       SDB->BitTestCases.empty()) {
-    for (unsigned i = 0, e = SDB->PHINodesToUpdate.size(); i != e; ++i) {
-      MachineInstr *PHI = SDB->PHINodesToUpdate[i].first;
+    for (unsigned i = 0, e = FuncInfo->PHINodesToUpdate.size(); i != e; ++i) {
+      MachineInstr *PHI = FuncInfo->PHINodesToUpdate[i].first;
       assert(PHI->isPHI() &&
              "This is not a machine PHI node that we are updating!");
       if (!BB->isSuccessor(PHI->getParent()))
         continue;
-      PHI->addOperand(MachineOperand::CreateReg(SDB->PHINodesToUpdate[i].second,
-                                                false));
+      PHI->addOperand(
+        MachineOperand::CreateReg(FuncInfo->PHINodesToUpdate[i].second, false));
       PHI->addOperand(MachineOperand::CreateMBB(BB));
     }
-    SDB->PHINodesToUpdate.clear();
     return;
   }
 
@@ -1056,37 +850,38 @@ SelectionDAGISel::FinishBasicBlock() {
     if (!SDB->BitTestCases[i].Emitted) {
       // Set the current basic block to the mbb we wish to insert the code into
       BB = SDB->BitTestCases[i].Parent;
-      SDB->setCurrentBasicBlock(BB);
       // Emit the code
-      SDB->visitBitTestHeader(SDB->BitTestCases[i]);
+      SDB->visitBitTestHeader(SDB->BitTestCases[i], BB);
       CurDAG->setRoot(SDB->getRoot());
-      CodeGenAndEmitDAG();
       SDB->clear();
+      BB = CodeGenAndEmitDAG(BB);
     }
 
     for (unsigned j = 0, ej = SDB->BitTestCases[i].Cases.size(); j != ej; ++j) {
       // Set the current basic block to the mbb we wish to insert the code into
       BB = SDB->BitTestCases[i].Cases[j].ThisBB;
-      SDB->setCurrentBasicBlock(BB);
       // Emit the code
       if (j+1 != ej)
         SDB->visitBitTestCase(SDB->BitTestCases[i].Cases[j+1].ThisBB,
                               SDB->BitTestCases[i].Reg,
-                              SDB->BitTestCases[i].Cases[j]);
+                              SDB->BitTestCases[i].Cases[j],
+                              BB);
       else
         SDB->visitBitTestCase(SDB->BitTestCases[i].Default,
                               SDB->BitTestCases[i].Reg,
-                              SDB->BitTestCases[i].Cases[j]);
+                              SDB->BitTestCases[i].Cases[j],
+                              BB);
 
 
       CurDAG->setRoot(SDB->getRoot());
-      CodeGenAndEmitDAG();
       SDB->clear();
+      BB = CodeGenAndEmitDAG(BB);
     }
 
     // Update PHI Nodes
-    for (unsigned pi = 0, pe = SDB->PHINodesToUpdate.size(); pi != pe; ++pi) {
-      MachineInstr *PHI = SDB->PHINodesToUpdate[pi].first;
+    for (unsigned pi = 0, pe = FuncInfo->PHINodesToUpdate.size();
+         pi != pe; ++pi) {
+      MachineInstr *PHI = FuncInfo->PHINodesToUpdate[pi].first;
       MachineBasicBlock *PHIBB = PHI->getParent();
       assert(PHI->isPHI() &&
              "This is not a machine PHI node that we are updating!");
@@ -1094,10 +889,12 @@ SelectionDAGISel::FinishBasicBlock() {
       // from last "case" BB.
       if (PHIBB == SDB->BitTestCases[i].Default) {
         PHI->addOperand(MachineOperand::
-                        CreateReg(SDB->PHINodesToUpdate[pi].second, false));
+                        CreateReg(FuncInfo->PHINodesToUpdate[pi].second,
+                                  false));
         PHI->addOperand(MachineOperand::CreateMBB(SDB->BitTestCases[i].Parent));
         PHI->addOperand(MachineOperand::
-                        CreateReg(SDB->PHINodesToUpdate[pi].second, false));
+                        CreateReg(FuncInfo->PHINodesToUpdate[pi].second,
+                                  false));
         PHI->addOperand(MachineOperand::CreateMBB(SDB->BitTestCases[i].Cases.
                                                   back().ThisBB));
       }
@@ -1107,7 +904,8 @@ SelectionDAGISel::FinishBasicBlock() {
         MachineBasicBlock* cBB = SDB->BitTestCases[i].Cases[j].ThisBB;
         if (cBB->isSuccessor(PHIBB)) {
           PHI->addOperand(MachineOperand::
-                          CreateReg(SDB->PHINodesToUpdate[pi].second, false));
+                          CreateReg(FuncInfo->PHINodesToUpdate[pi].second,
+                                    false));
           PHI->addOperand(MachineOperand::CreateMBB(cBB));
         }
       }
@@ -1123,40 +921,42 @@ SelectionDAGISel::FinishBasicBlock() {
     if (!SDB->JTCases[i].first.Emitted) {
       // Set the current basic block to the mbb we wish to insert the code into
       BB = SDB->JTCases[i].first.HeaderBB;
-      SDB->setCurrentBasicBlock(BB);
       // Emit the code
-      SDB->visitJumpTableHeader(SDB->JTCases[i].second, SDB->JTCases[i].first);
+      SDB->visitJumpTableHeader(SDB->JTCases[i].second, SDB->JTCases[i].first,
+                                BB);
       CurDAG->setRoot(SDB->getRoot());
-      CodeGenAndEmitDAG();
       SDB->clear();
+      BB = CodeGenAndEmitDAG(BB);
     }
 
     // Set the current basic block to the mbb we wish to insert the code into
     BB = SDB->JTCases[i].second.MBB;
-    SDB->setCurrentBasicBlock(BB);
     // Emit the code
     SDB->visitJumpTable(SDB->JTCases[i].second);
     CurDAG->setRoot(SDB->getRoot());
-    CodeGenAndEmitDAG();
     SDB->clear();
+    BB = CodeGenAndEmitDAG(BB);
 
     // Update PHI Nodes
-    for (unsigned pi = 0, pe = SDB->PHINodesToUpdate.size(); pi != pe; ++pi) {
-      MachineInstr *PHI = SDB->PHINodesToUpdate[pi].first;
+    for (unsigned pi = 0, pe = FuncInfo->PHINodesToUpdate.size();
+         pi != pe; ++pi) {
+      MachineInstr *PHI = FuncInfo->PHINodesToUpdate[pi].first;
       MachineBasicBlock *PHIBB = PHI->getParent();
       assert(PHI->isPHI() &&
              "This is not a machine PHI node that we are updating!");
       // "default" BB. We can go there only from header BB.
       if (PHIBB == SDB->JTCases[i].second.Default) {
         PHI->addOperand
-          (MachineOperand::CreateReg(SDB->PHINodesToUpdate[pi].second, false));
+          (MachineOperand::CreateReg(FuncInfo->PHINodesToUpdate[pi].second,
+                                     false));
         PHI->addOperand
           (MachineOperand::CreateMBB(SDB->JTCases[i].first.HeaderBB));
       }
       // JT BB. Just iterate over successors here
       if (BB->isSuccessor(PHIBB)) {
         PHI->addOperand
-          (MachineOperand::CreateReg(SDB->PHINodesToUpdate[pi].second, false));
+          (MachineOperand::CreateReg(FuncInfo->PHINodesToUpdate[pi].second,
+                                     false));
         PHI->addOperand(MachineOperand::CreateMBB(BB));
       }
     }
@@ -1165,13 +965,13 @@ SelectionDAGISel::FinishBasicBlock() {
 
   // If the switch block involved a branch to one of the actual successors, we
   // need to update PHI nodes in that block.
-  for (unsigned i = 0, e = SDB->PHINodesToUpdate.size(); i != e; ++i) {
-    MachineInstr *PHI = SDB->PHINodesToUpdate[i].first;
+  for (unsigned i = 0, e = FuncInfo->PHINodesToUpdate.size(); i != e; ++i) {
+    MachineInstr *PHI = FuncInfo->PHINodesToUpdate[i].first;
     assert(PHI->isPHI() &&
            "This is not a machine PHI node that we are updating!");
     if (BB->isSuccessor(PHI->getParent())) {
-      PHI->addOperand(MachineOperand::CreateReg(SDB->PHINodesToUpdate[i].second,
-                                                false));
+      PHI->addOperand(
+        MachineOperand::CreateReg(FuncInfo->PHINodesToUpdate[i].second, false));
       PHI->addOperand(MachineOperand::CreateMBB(BB));
     }
   }
@@ -1181,26 +981,26 @@ SelectionDAGISel::FinishBasicBlock() {
   for (unsigned i = 0, e = SDB->SwitchCases.size(); i != e; ++i) {
     // Set the current basic block to the mbb we wish to insert the code into
     MachineBasicBlock *ThisBB = BB = SDB->SwitchCases[i].ThisBB;
-    SDB->setCurrentBasicBlock(BB);
 
-    // Emit the code
-    SDB->visitSwitchCase(SDB->SwitchCases[i]);
+    // Determine the unique successors.
+    SmallVector<MachineBasicBlock *, 2> Succs;
+    Succs.push_back(SDB->SwitchCases[i].TrueBB);
+    if (SDB->SwitchCases[i].TrueBB != SDB->SwitchCases[i].FalseBB)
+      Succs.push_back(SDB->SwitchCases[i].FalseBB);
+
+    // Emit the code. Note that this could result in ThisBB being split, so
+    // we need to check for updates.
+    SDB->visitSwitchCase(SDB->SwitchCases[i], BB);
     CurDAG->setRoot(SDB->getRoot());
-    CodeGenAndEmitDAG();
+    SDB->clear();
+    ThisBB = CodeGenAndEmitDAG(BB);
 
     // Handle any PHI nodes in successors of this chunk, as if we were coming
     // from the original BB before switch expansion.  Note that PHI nodes can
     // occur multiple times in PHINodesToUpdate.  We have to be very careful to
     // handle them the right number of times.
-    while ((BB = SDB->SwitchCases[i].TrueBB)) {  // Handle LHS and RHS.
-      // If new BB's are created during scheduling, the edges may have been
-      // updated. That is, the edge from ThisBB to BB may have been split and
-      // BB's predecessor is now another block.
-      DenseMap<MachineBasicBlock*, MachineBasicBlock*>::iterator EI =
-        SDB->EdgeMapping.find(BB);
-      if (EI != SDB->EdgeMapping.end())
-        ThisBB = EI->second;
-
+    for (unsigned i = 0, e = Succs.size(); i != e; ++i) {
+      BB = Succs[i];
       // BB may have been removed from the CFG if a branch was constant folded.
       if (ThisBB->isSuccessor(BB)) {
         for (MachineBasicBlock::iterator Phi = BB->begin();
@@ -1208,11 +1008,11 @@ SelectionDAGISel::FinishBasicBlock() {
              ++Phi) {
           // This value for this PHI node is recorded in PHINodesToUpdate.
           for (unsigned pn = 0; ; ++pn) {
-            assert(pn != SDB->PHINodesToUpdate.size() &&
+            assert(pn != FuncInfo->PHINodesToUpdate.size() &&
                    "Didn't find PHI entry!");
-            if (SDB->PHINodesToUpdate[pn].first == Phi) {
+            if (FuncInfo->PHINodesToUpdate[pn].first == Phi) {
               Phi->addOperand(MachineOperand::
-                              CreateReg(SDB->PHINodesToUpdate[pn].second,
+                              CreateReg(FuncInfo->PHINodesToUpdate[pn].second,
                                         false));
               Phi->addOperand(MachineOperand::CreateMBB(ThisBB));
               break;
@@ -1220,21 +1020,9 @@ SelectionDAGISel::FinishBasicBlock() {
           }
         }
       }
-
-      // Don't process RHS if same block as LHS.
-      if (BB == SDB->SwitchCases[i].FalseBB)
-        SDB->SwitchCases[i].FalseBB = 0;
-
-      // If we haven't handled the RHS, do so now.  Otherwise, we're done.
-      SDB->SwitchCases[i].TrueBB = SDB->SwitchCases[i].FalseBB;
-      SDB->SwitchCases[i].FalseBB = 0;
     }
-    assert(SDB->SwitchCases[i].TrueBB == 0 && SDB->SwitchCases[i].FalseBB == 0);
-    SDB->clear();
   }
   SDB->SwitchCases.clear();
-
-  SDB->PHINodesToUpdate.clear();
 }
 
 
@@ -1333,16 +1121,17 @@ SelectInlineAsmMemoryOperands(std::vector<SDValue> &Ops) {
   std::vector<SDValue> InOps;
   std::swap(InOps, Ops);
 
-  Ops.push_back(InOps[0]);  // input chain.
-  Ops.push_back(InOps[1]);  // input asm string.
+  Ops.push_back(InOps[InlineAsm::Op_InputChain]); // 0
+  Ops.push_back(InOps[InlineAsm::Op_AsmString]);  // 1
+  Ops.push_back(InOps[InlineAsm::Op_MDNode]);     // 2, !srcloc
 
-  unsigned i = 2, e = InOps.size();
+  unsigned i = InlineAsm::Op_FirstOperand, e = InOps.size();
   if (InOps[e-1].getValueType() == MVT::Flag)
     --e;  // Don't process a flag operand if it is here.
 
   while (i != e) {
     unsigned Flags = cast<ConstantSDNode>(InOps[i])->getZExtValue();
-    if ((Flags & 7) != 4 /*MEM*/) {
+    if (!InlineAsm::isMemKind(Flags)) {
       // Just skip over this operand, copying the operands verbatim.
       Ops.insert(Ops.end(), InOps.begin()+i,
                  InOps.begin()+i+InlineAsm::getNumOperandRegisters(Flags) + 1);
@@ -1352,14 +1141,14 @@ SelectInlineAsmMemoryOperands(std::vector<SDValue> &Ops) {
              "Memory operand with multiple values?");
       // Otherwise, this is a memory operand.  Ask the target to select it.
       std::vector<SDValue> SelOps;
-      if (SelectInlineAsmMemoryOperand(InOps[i+1], 'm', SelOps)) {
-        llvm_report_error("Could not match memory address.  Inline asm"
-                          " failure!");
-      }
+      if (SelectInlineAsmMemoryOperand(InOps[i+1], 'm', SelOps))
+        report_fatal_error("Could not match memory address.  Inline asm"
+                           " failure!");
 
       // Add this to the output node.
-      Ops.push_back(CurDAG->getTargetConstant(4/*MEM*/ | (SelOps.size()<< 3),
-                                              MVT::i32));
+      unsigned NewFlags =
+        InlineAsm::getFlagWord(InlineAsm::Kind_Mem, SelOps.size());
+      Ops.push_back(CurDAG->getTargetConstant(NewFlags, MVT::i32));
       Ops.insert(Ops.end(), SelOps.begin(), SelOps.end());
       i += 2;
     }
@@ -1436,7 +1225,8 @@ bool SelectionDAGISel::IsProfitableToFold(SDValue N, SDNode *U,
 /// IsLegalToFold - Returns true if the specific operand node N of
 /// U can be folded during instruction selection that starts at Root.
 bool SelectionDAGISel::IsLegalToFold(SDValue N, SDNode *U, SDNode *Root,
-                                     bool IgnoreChains) const {
+                                     CodeGenOpt::Level OptLevel,
+                                     bool IgnoreChains) {
   if (OptLevel == CodeGenOpt::None) return false;
 
   // If Root use can somehow reach N through a path that that doesn't contain
@@ -2011,6 +1801,7 @@ static unsigned IsPredicateKnownToFail(const unsigned char *Table,
   }
 }
 
+namespace {
 
 struct MatchScope {
   /// FailIndex - If this match fails, this is the index to continue with.
@@ -2032,6 +1823,8 @@ struct MatchScope {
   bool HasChainNodesMatched, HasFlagResultNodesMatched;
 };
 
+}
+
 SDNode *SelectionDAGISel::
 SelectCodeCommon(SDNode *NodeToMatch, const unsigned char *MatcherTable,
                  unsigned TableSize) {
@@ -2045,6 +1838,7 @@ SelectCodeCommon(SDNode *NodeToMatch, const unsigned char *MatcherTable,
   //case ISD::VALUETYPE:
   //case ISD::CONDCODE:
   case ISD::HANDLENODE:
+  case ISD::MDNODE_SDNODE:
   case ISD::TargetConstant:
   case ISD::TargetConstantFP:
   case ISD::TargetConstantPool:
@@ -2383,7 +2177,8 @@ SelectCodeCommon(SDNode *NodeToMatch, const unsigned char *MatcherTable,
       if (!IsProfitableToFold(N, NodeStack[NodeStack.size()-2].getNode(),
                               NodeToMatch) ||
           !IsLegalToFold(N, NodeStack[NodeStack.size()-2].getNode(),
-                         NodeToMatch, true/*We validate our own chains*/))
+                         NodeToMatch, OptLevel,
+                         true/*We validate our own chains*/))
         break;
       
       continue;
@@ -2776,7 +2571,7 @@ void SelectionDAGISel::CannotYetSelect(SDNode *N) {
     else
       Msg << "unknown intrinsic #" << iid;
   }
-  llvm_report_error(Msg.str());
+  report_fatal_error(Msg.str());
 }
 
 char SelectionDAGISel::ID = 0;
diff --git a/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/lib/CodeGen/SelectionDAG/TargetLowering.cpp
index ea2ff2f36891..8a4a1b172608 100644
--- a/lib/CodeGen/SelectionDAG/TargetLowering.cpp
+++ b/lib/CodeGen/SelectionDAG/TargetLowering.cpp
@@ -480,7 +480,8 @@ static void InitCmpLibcallCCs(ISD::CondCode *CCs) {
 }
 
 /// NOTE: The constructor takes ownership of TLOF.
-TargetLowering::TargetLowering(TargetMachine &tm,TargetLoweringObjectFile *tlof)
+TargetLowering::TargetLowering(const TargetMachine &tm,
+                               const TargetLoweringObjectFile *tlof)
   : TM(tm), TD(TM.getTargetData()), TLOF(*tlof) {
   // All operations default to being supported.
   memset(OpActions, 0, sizeof(OpActions));
@@ -720,7 +721,7 @@ void TargetLowering::computeRegisterProperties() {
       unsigned NElts = VT.getVectorNumElements();
       for (unsigned nVT = i+1; nVT <= MVT::LAST_VECTOR_VALUETYPE; ++nVT) {
         EVT SVT = (MVT::SimpleValueType)nVT;
-        if (isTypeLegal(SVT) && SVT.getVectorElementType() == EltVT &&
+        if (isTypeSynthesizable(SVT) && SVT.getVectorElementType() == EltVT &&
             SVT.getVectorNumElements() > NElts && NElts != 1) {
           TransformToType[i] = SVT;
           ValueTypeActions.setTypeAction(VT, Promote);
@@ -1279,8 +1280,9 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op,
     // variable.  The low bit of the shift cannot be an input sign bit unless
     // the shift amount is >= the size of the datatype, which is undefined.
     if (DemandedMask == 1)
-      return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::SRL, dl, Op.getValueType(),
-                                               Op.getOperand(0), Op.getOperand(1)));
+      return TLO.CombineTo(Op,
+                           TLO.DAG.getNode(ISD::SRL, dl, Op.getValueType(),
+                                           Op.getOperand(0), Op.getOperand(1)));
 
     if (ConstantSDNode *SA = dyn_cast<ConstantSDNode>(Op.getOperand(1))) {
       EVT VT = Op.getValueType();
@@ -1465,23 +1467,29 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op,
       case ISD::SRL:
         // Shrink SRL by a constant if none of the high bits shifted in are
         // demanded.
-        if (ConstantSDNode *ShAmt = dyn_cast<ConstantSDNode>(In.getOperand(1))){
-          APInt HighBits = APInt::getHighBitsSet(OperandBitWidth,
-                                                 OperandBitWidth - BitWidth);
-          HighBits = HighBits.lshr(ShAmt->getZExtValue());
-          HighBits.trunc(BitWidth);
-          
-          if (ShAmt->getZExtValue() < BitWidth && !(HighBits & NewMask)) {
-            // None of the shifted in bits are needed.  Add a truncate of the
-            // shift input, then shift it.
-            SDValue NewTrunc = TLO.DAG.getNode(ISD::TRUNCATE, dl,
-                                                 Op.getValueType(), 
-                                                 In.getOperand(0));
-            return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::SRL, dl,
-                                                     Op.getValueType(),
-                                                     NewTrunc, 
-                                                     In.getOperand(1)));
-          }
+        if (TLO.LegalTypes() &&
+            !isTypeDesirableForOp(ISD::SRL, Op.getValueType()))
+          // Do not turn (vt1 truncate (vt2 srl)) into (vt1 srl) if vt1 is
+          // undesirable.
+          break;
+        ConstantSDNode *ShAmt = dyn_cast<ConstantSDNode>(In.getOperand(1));
+        if (!ShAmt)
+          break;
+        APInt HighBits = APInt::getHighBitsSet(OperandBitWidth,
+                                               OperandBitWidth - BitWidth);
+        HighBits = HighBits.lshr(ShAmt->getZExtValue());
+        HighBits.trunc(BitWidth);
+
+        if (ShAmt->getZExtValue() < BitWidth && !(HighBits & NewMask)) {
+          // None of the shifted in bits are needed.  Add a truncate of the
+          // shift input, then shift it.
+          SDValue NewTrunc = TLO.DAG.getNode(ISD::TRUNCATE, dl,
+                                             Op.getValueType(), 
+                                             In.getOperand(0));
+          return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::SRL, dl,
+                                                   Op.getValueType(),
+                                                   NewTrunc, 
+                                                   In.getOperand(1)));
         }
         break;
       }
@@ -1874,10 +1882,15 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
                 isa<ConstantSDNode>(Op0.getOperand(1)) &&
                 cast<ConstantSDNode>(Op0.getOperand(1))->getAPIntValue() == 1) {
           // If this is (X&1) == / != 1, normalize it to (X&1) != / == 0.
-          if (Op0.getValueType() != VT)
+          if (Op0.getValueType().bitsGT(VT))
             Op0 = DAG.getNode(ISD::AND, dl, VT,
                           DAG.getNode(ISD::TRUNCATE, dl, VT, Op0.getOperand(0)),
                           DAG.getConstant(1, VT));
+          else if (Op0.getValueType().bitsLT(VT))
+            Op0 = DAG.getNode(ISD::AND, dl, VT,
+                        DAG.getNode(ISD::ANY_EXTEND, dl, VT, Op0.getOperand(0)),
+                        DAG.getConstant(1, VT));
+
           return DAG.getSetCC(dl, VT, Op0,
                               DAG.getConstant(0, Op0.getValueType()),
                               Cond == ISD::SETEQ ? ISD::SETNE : ISD::SETEQ);
@@ -2245,7 +2258,7 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
 
 /// isGAPlusOffset - Returns true (and the GlobalValue and the offset) if the
 /// node is a GlobalAddress + offset.
-bool TargetLowering::isGAPlusOffset(SDNode *N, GlobalValue* &GA,
+bool TargetLowering::isGAPlusOffset(SDNode *N, const GlobalValue* &GA,
                                     int64_t &Offset) const {
   if (isa<GlobalAddressSDNode>(N)) {
     GlobalAddressSDNode *GASD = cast<GlobalAddressSDNode>(N);
diff --git a/lib/CodeGen/SelectionDAG/TargetSelectionDAGInfo.cpp b/lib/CodeGen/SelectionDAG/TargetSelectionDAGInfo.cpp
new file mode 100644
index 000000000000..d20477fd890e
--- /dev/null
+++ b/lib/CodeGen/SelectionDAG/TargetSelectionDAGInfo.cpp
@@ -0,0 +1,21 @@
+//===-- TargetSelectionDAGInfo.cpp - SelectionDAG Info --------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This implements the TargetSelectionDAGInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Target/TargetSelectionDAGInfo.h"
+using namespace llvm;
+
+TargetSelectionDAGInfo::TargetSelectionDAGInfo() {
+}
+
+TargetSelectionDAGInfo::~TargetSelectionDAGInfo() {
+}
diff --git a/lib/CodeGen/ShadowStackGC.cpp b/lib/CodeGen/ShadowStackGC.cpp
index 0e6d4796e22c..5240bef5a5ff 100644
--- a/lib/CodeGen/ShadowStackGC.cpp
+++ b/lib/CodeGen/ShadowStackGC.cpp
@@ -160,7 +160,7 @@ namespace {
           Args.clear();
           Args.append(CI->op_begin() + 1, CI->op_end());
 
-          InvokeInst *II = InvokeInst::Create(CI->getOperand(0),
+          InvokeInst *II = InvokeInst::Create(CI->getCalledValue(),
                                               NewBB, CleanupBB,
                                               Args.begin(), Args.end(),
                                               CI->getName(), CallBB);
diff --git a/lib/CodeGen/SimpleRegisterCoalescing.cpp b/lib/CodeGen/SimpleRegisterCoalescing.cpp
index 15ca3740b8f2..1f68a6f276f7 100644
--- a/lib/CodeGen/SimpleRegisterCoalescing.cpp
+++ b/lib/CodeGen/SimpleRegisterCoalescing.cpp
@@ -179,7 +179,7 @@ bool SimpleRegisterCoalescing::AdjustCopiesBackFrom(LiveInterval &IntA,
     for (const unsigned* SR = tri_->getSubRegisters(IntB.reg); *SR; ++SR)
       if (li_->hasInterval(*SR) && IntA.overlaps(li_->getInterval(*SR))) {
         DEBUG({
-            dbgs() << "Interfere with sub-register ";
+            dbgs() << "\t\tInterfere with sub-register ";
             li_->getInterval(*SR).print(dbgs(), tri_);
           });
         return false;
@@ -187,7 +187,7 @@ bool SimpleRegisterCoalescing::AdjustCopiesBackFrom(LiveInterval &IntA,
   }
 
   DEBUG({
-      dbgs() << "\nExtending: ";
+      dbgs() << "Extending: ";
       IntB.print(dbgs(), tri_);
     });
 
@@ -236,7 +236,7 @@ bool SimpleRegisterCoalescing::AdjustCopiesBackFrom(LiveInterval &IntA,
   // If the copy instruction was killing the destination register before the
   // merge, find the last use and trim the live range. That will also add the
   // isKill marker.
-  if (CopyMI->killsRegister(IntA.reg))
+  if (ALR->valno->isKill(CopyIdx))
     TrimLiveIntervalToLastUse(CopyUseIdx, CopyMI->getParent(), IntA, ALR);
 
   ++numExtends;
@@ -259,6 +259,9 @@ bool SimpleRegisterCoalescing::HasOtherReachingDefs(LiveInterval &IntA,
     for (; BI != IntB.ranges.end() && AI->end >= BI->start; ++BI) {
       if (BI->valno == BValNo)
         continue;
+      // When BValNo is null, we're looking for a dummy clobber-value for a subreg.
+      if (!BValNo && !BI->valno->isDefAccurate() && !BI->valno->getCopy())
+        continue;
       if (BI->start <= AI->start && BI->end > AI->start)
         return true;
       if (BI->start > AI->start && BI->start < AI->end)
@@ -369,6 +372,17 @@ bool SimpleRegisterCoalescing::RemoveCopyByCommutingDef(LiveInterval &IntA,
   if (HasOtherReachingDefs(IntA, IntB, AValNo, BValNo))
     return false;
 
+  bool BHasSubRegs = false;
+  if (TargetRegisterInfo::isPhysicalRegister(IntB.reg))
+    BHasSubRegs = *tri_->getSubRegisters(IntB.reg);
+
+  // Abort if the subregisters of IntB.reg have values that are not simply the
+  // clobbers from the superreg.
+  if (BHasSubRegs)
+    for (const unsigned *SR = tri_->getSubRegisters(IntB.reg); *SR; ++SR)
+      if (HasOtherReachingDefs(IntA, li_->getInterval(*SR), AValNo, 0))
+        return false;
+
   // If some of the uses of IntA.reg is already coalesced away, return false.
   // It's not possible to determine whether it's safe to perform the coalescing.
   for (MachineRegisterInfo::use_nodbg_iterator UI = 
@@ -417,9 +431,6 @@ bool SimpleRegisterCoalescing::RemoveCopyByCommutingDef(LiveInterval &IntA,
     BExtend[ALR->end] = BLR->end;
 
   // Update uses of IntA of the specific Val# with IntB.
-  bool BHasSubRegs = false;
-  if (TargetRegisterInfo::isPhysicalRegister(IntB.reg))
-    BHasSubRegs = *tri_->getSubRegisters(IntB.reg);
   for (MachineRegisterInfo::use_iterator UI = mri_->use_begin(IntA.reg),
          UE = mri_->use_end(); UI != UE;) {
     MachineOperand &UseMO = UI.getOperand();
@@ -470,7 +481,7 @@ bool SimpleRegisterCoalescing::RemoveCopyByCommutingDef(LiveInterval &IntA,
   // We need to insert a new liverange: [ALR.start, LastUse). It may be we can
   // simply extend BLR if CopyMI doesn't end the range.
   DEBUG({
-      dbgs() << "\nExtending: ";
+      dbgs() << "Extending: ";
       IntB.print(dbgs(), tri_);
     });
 
@@ -523,7 +534,6 @@ bool SimpleRegisterCoalescing::RemoveCopyByCommutingDef(LiveInterval &IntA,
   DEBUG({
       dbgs() << "   result = ";
       IntB.print(dbgs(), tri_);
-      dbgs() << '\n';
       dbgs() << "\nShortening: ";
       IntA.print(dbgs(), tri_);
     });
@@ -709,7 +719,7 @@ bool SimpleRegisterCoalescing::ReMaterializeTrivialDef(LiveInterval &SrcInt,
   // kill.
   bool checkForDeadDef = false;
   MachineBasicBlock *MBB = CopyMI->getParent();
-  if (CopyMI->killsRegister(SrcInt.reg))
+  if (SrcLR->valno->isKill(DefIdx))
     if (!TrimLiveIntervalToLastUse(CopyIdx, MBB, SrcInt, SrcLR)) {
       checkForDeadDef = true;
     }
@@ -804,7 +814,8 @@ SimpleRegisterCoalescing::UpdateRegDefsUses(unsigned SrcReg, unsigned DstReg,
           CopySrcReg == SrcReg && CopyDstReg != UseDstReg) {
         // If the use is a copy and it won't be coalesced away, and its source
         // is defined by a trivial computation, try to rematerialize it instead.
-        if (ReMaterializeTrivialDef(li_->getInterval(SrcReg), CopyDstReg,
+        if (!JoinedCopies.count(UseMI) &&
+            ReMaterializeTrivialDef(li_->getInterval(SrcReg), CopyDstReg,
                                     CopyDstSubIdx, UseMI))
           continue;
       }
@@ -824,6 +835,8 @@ SimpleRegisterCoalescing::UpdateRegDefsUses(unsigned SrcReg, unsigned DstReg,
                     UseMI->isRegTiedToDefOperand(&O-&UseMI->getOperand(0))))
           UseMI->addRegisterKilled(DstReg, tri_, true);
       }
+      DEBUG(dbgs() << "\t\tupdated: " << li_->getInstructionIndex(UseMI)
+                   << "\t" << *UseMI);
       continue;
     }
 
@@ -836,11 +849,11 @@ SimpleRegisterCoalescing::UpdateRegDefsUses(unsigned SrcReg, unsigned DstReg,
       assert(OldSubIdx < SubIdx && "Conflicting sub-register index!");
     else if (SubIdx)
       O.setSubReg(SubIdx);
-    // Remove would-be duplicated kill marker.
-    if (O.isKill() && UseMI->killsRegister(DstReg))
-      O.setIsKill(false);
     O.setReg(DstReg);
 
+    DEBUG(dbgs() << "\t\tupdated: " << li_->getInstructionIndex(UseMI)
+                 << "\t" << *UseMI);
+
     // After updating the operand, check if the machine instruction has
     // become a copy. If so, update its val# information.
     if (JoinedCopies.count(UseMI))
@@ -865,38 +878,6 @@ SimpleRegisterCoalescing::UpdateRegDefsUses(unsigned SrcReg, unsigned DstReg,
   }
 }
 
-/// RemoveUnnecessaryKills - Remove kill markers that are no longer accurate
-/// due to live range lengthening as the result of coalescing.
-void SimpleRegisterCoalescing::RemoveUnnecessaryKills(unsigned Reg,
-                                                      LiveInterval &LI) {
-  for (MachineRegisterInfo::use_iterator UI = mri_->use_begin(Reg),
-         UE = mri_->use_end(); UI != UE; ++UI) {
-    MachineOperand &UseMO = UI.getOperand();
-    if (!UseMO.isKill())
-      continue;
-    MachineInstr *UseMI = UseMO.getParent();
-    SlotIndex UseIdx =
-      li_->getInstructionIndex(UseMI).getUseIndex();
-    const LiveRange *LR = LI.getLiveRangeContaining(UseIdx);
-    if (!LR ||
-        (!LR->valno->isKill(UseIdx.getDefIndex()) &&
-         LR->valno->def != UseIdx.getDefIndex())) {
-      // Interesting problem. After coalescing reg1027's def and kill are both
-      // at the same point:  %reg1027,0.000000e+00 = [56,814:0)  0@70-(814)
-      //
-      // bb5:
-      // 60   %reg1027<def> = t2MOVr %reg1027, 14, %reg0, %reg0
-      // 68   %reg1027<def> = t2LDRi12 %reg1027<kill>, 8, 14, %reg0
-      // 76   t2CMPzri %reg1038<kill,undef>, 0, 14, %reg0, %CPSR<imp-def>
-      // 84   %reg1027<def> = t2MOVr %reg1027, 14, %reg0, %reg0
-      // 96   t2Bcc mbb<bb5,0x2030910>, 1, %CPSR<kill>
-      //
-      // Do not remove the kill marker on t2LDRi12.
-      UseMO.setIsKill(false);
-    }
-  }
-}
-
 /// removeIntervalIfEmpty - Check if the live interval of a physical register
 /// is empty, if so remove it and also remove the empty intervals of its
 /// sub-registers. Return true if live interval is removed.
@@ -1064,9 +1045,8 @@ SimpleRegisterCoalescing::isWinToJoinVRWithSrcPhysReg(MachineInstr *CopyMI,
   unsigned Threshold = allocatableRCRegs_[RC].count() * 2;
   unsigned Length = li_->getApproximateInstructionCount(DstInt);
   if (Length > Threshold &&
-      (((float)std::distance(mri_->use_nodbg_begin(DstInt.reg),
-                             mri_->use_nodbg_end()) / Length) < 
-        (1.0 / Threshold)))
+      std::distance(mri_->use_nodbg_begin(DstInt.reg),
+                    mri_->use_nodbg_end()) * Threshold < Length)
     return false;
 
   // If the virtual register live interval extends into a loop, turn down
@@ -1122,9 +1102,8 @@ SimpleRegisterCoalescing::isWinToJoinVRWithDstPhysReg(MachineInstr *CopyMI,
   unsigned Threshold = allocatableRCRegs_[RC].count() * 2;
   unsigned Length = li_->getApproximateInstructionCount(SrcInt);
   if (Length > Threshold &&
-      (((float)std::distance(mri_->use_nodbg_begin(SrcInt.reg),
-                             mri_->use_nodbg_end()) / Length) < 
-          (1.0 / Threshold)))
+      std::distance(mri_->use_nodbg_begin(SrcInt.reg),
+                    mri_->use_nodbg_end()) * Threshold < Length)
     return false;
 
   if (SrcInt.empty())
@@ -1168,20 +1147,42 @@ SimpleRegisterCoalescing::isWinToJoinVRWithDstPhysReg(MachineInstr *CopyMI,
 /// isWinToJoinCrossClass - Return true if it's profitable to coalesce
 /// two virtual registers from different register classes.
 bool
-SimpleRegisterCoalescing::isWinToJoinCrossClass(unsigned LargeReg,
-                                                unsigned SmallReg,
-                                                unsigned Threshold) {
-  // Then make sure the intervals are *short*.
-  LiveInterval &LargeInt = li_->getInterval(LargeReg);
-  LiveInterval &SmallInt = li_->getInterval(SmallReg);
-  unsigned LargeSize = li_->getApproximateInstructionCount(LargeInt);
-  unsigned SmallSize = li_->getApproximateInstructionCount(SmallInt);
-  if (LargeSize > Threshold) {
-    unsigned SmallUses = std::distance(mri_->use_nodbg_begin(SmallReg),
-                                       mri_->use_nodbg_end());
-    unsigned LargeUses = std::distance(mri_->use_nodbg_begin(LargeReg),
-                                       mri_->use_nodbg_end());
-    if (SmallUses*LargeSize < LargeUses*SmallSize)
+SimpleRegisterCoalescing::isWinToJoinCrossClass(unsigned SrcReg,
+                                                unsigned DstReg,
+                                             const TargetRegisterClass *SrcRC,
+                                             const TargetRegisterClass *DstRC,
+                                             const TargetRegisterClass *NewRC) {
+  unsigned NewRCCount = allocatableRCRegs_[NewRC].count();
+  // This heuristics is good enough in practice, but it's obviously not *right*.
+  // 4 is a magic number that works well enough for x86, ARM, etc. It filter
+  // out all but the most restrictive register classes.
+  if (NewRCCount > 4 ||
+      // Early exit if the function is fairly small, coalesce aggressively if
+      // that's the case. For really special register classes with 3 or
+      // fewer registers, be a bit more careful.
+      (li_->getFuncInstructionCount() / NewRCCount) < 8)
+    return true;
+  LiveInterval &SrcInt = li_->getInterval(SrcReg);
+  LiveInterval &DstInt = li_->getInterval(DstReg);
+  unsigned SrcSize = li_->getApproximateInstructionCount(SrcInt);
+  unsigned DstSize = li_->getApproximateInstructionCount(DstInt);
+  if (SrcSize <= NewRCCount && DstSize <= NewRCCount)
+    return true;
+  // Estimate *register use density*. If it doubles or more, abort.
+  unsigned SrcUses = std::distance(mri_->use_nodbg_begin(SrcReg),
+                                   mri_->use_nodbg_end());
+  unsigned DstUses = std::distance(mri_->use_nodbg_begin(DstReg),
+                                   mri_->use_nodbg_end());
+  unsigned NewUses = SrcUses + DstUses;
+  unsigned NewSize = SrcSize + DstSize;
+  if (SrcRC != NewRC && SrcSize > NewRCCount) {
+    unsigned SrcRCCount = allocatableRCRegs_[SrcRC].count();
+    if (NewUses*SrcSize*SrcRCCount > 2*SrcUses*NewSize*NewRCCount)
+      return false;
+  }
+  if (DstRC != NewRC && DstSize > NewRCCount) {
+    unsigned DstRCCount = allocatableRCRegs_[DstRC].count();
+    if (NewUses*DstSize*DstRCCount > 2*DstUses*NewSize*NewRCCount)
       return false;
   }
   return true;
@@ -1263,7 +1264,7 @@ SimpleRegisterCoalescing::CanJoinExtractSubRegToPhysReg(unsigned DstReg,
   if (li_->hasInterval(RealDstReg) &&
       RHS.overlaps(li_->getInterval(RealDstReg))) {
     DEBUG({
-        dbgs() << "Interfere with register ";
+        dbgs() << "\t\tInterfere with register ";
         li_->getInterval(RealDstReg).print(dbgs(), tri_);
       });
     return false; // Not coalescable
@@ -1275,7 +1276,7 @@ SimpleRegisterCoalescing::CanJoinExtractSubRegToPhysReg(unsigned DstReg,
         !tri_->isSubRegister(DstReg, *SR) &&
         li_->hasInterval(*SR) && RHS.overlaps(li_->getInterval(*SR))) {
       DEBUG({
-          dbgs() << "Interfere with sub-register ";
+          dbgs() << "\t\tInterfere with sub-register ";
           li_->getInterval(*SR).print(dbgs(), tri_);
         });
       return false; // Not coalescable
@@ -1298,7 +1299,7 @@ SimpleRegisterCoalescing::CanJoinInsertSubRegToPhysReg(unsigned DstReg,
   if (li_->hasInterval(RealSrcReg) &&
       LHS.overlaps(li_->getInterval(RealSrcReg))) {
     DEBUG({
-        dbgs() << "Interfere with register ";
+        dbgs() << "\t\tInterfere with register ";
         li_->getInterval(RealSrcReg).print(dbgs(), tri_);
       });
     return false; // Not coalescable
@@ -1310,7 +1311,7 @@ SimpleRegisterCoalescing::CanJoinInsertSubRegToPhysReg(unsigned DstReg,
         !tri_->isSubRegister(SrcReg, *SR) &&
         li_->hasInterval(*SR) && LHS.overlaps(li_->getInterval(*SR))) {
       DEBUG({
-          dbgs() << "Interfere with sub-register ";
+          dbgs() << "\t\tInterfere with sub-register ";
           li_->getInterval(*SR).print(dbgs(), tri_);
         });
       return false; // Not coalescable
@@ -1400,6 +1401,13 @@ bool SimpleRegisterCoalescing::JoinCopy(CopyRec &TheCopy, bool &Again) {
     return false;  // Not coalescable.
   }
 
+  // We cannot handle dual subreg indices and mismatched classes at the same
+  // time.
+  if (SrcSubIdx && DstSubIdx && differingRegisterClasses(SrcReg, DstReg)) {
+    DEBUG(dbgs() << "\tCannot handle subreg indices and mismatched classes.\n");
+    return false;
+  }
+
   // Check that a physical source register is compatible with dst regclass
   if (SrcIsPhys) {
     unsigned SrcSubReg = SrcSubIdx ?
@@ -1517,10 +1525,11 @@ bool SimpleRegisterCoalescing::JoinCopy(CopyRec &TheCopy, bool &Again) {
           return false;  // Not coalescable
         }
 
-        unsigned LargeReg = isExtSubReg ? SrcReg : DstReg;
-        unsigned SmallReg = isExtSubReg ? DstReg : SrcReg;
-        unsigned Limit= allocatableRCRegs_[mri_->getRegClass(SmallReg)].count();
-        if (!isWinToJoinCrossClass(LargeReg, SmallReg, Limit)) {
+        if (!isWinToJoinCrossClass(SrcReg, DstReg, SrcRC, DstRC, NewRC)) {
+          DEBUG(dbgs() << "\tAvoid coalescing to constrained register class: "
+                       << SrcRC->getName() << "/"
+                       << DstRC->getName() << " -> "
+                       << NewRC->getName() << ".\n");
           Again = true;  // May be possible to coalesce later.
           return false;
         }
@@ -1568,49 +1577,40 @@ bool SimpleRegisterCoalescing::JoinCopy(CopyRec &TheCopy, bool &Again) {
       }
     }
 
-    unsigned LargeReg = SrcReg;
-    unsigned SmallReg = DstReg;
-
     // Now determine the register class of the joined register.
-    if (isExtSubReg) {
-      if (SubIdx && DstRC && DstRC->isASubClass()) {
-        // This is a move to a sub-register class. However, the source is a
-        // sub-register of a larger register class. We don't know what should
-        // the register class be. FIXME.
-        Again = true;
-        return false;
+    if (!SrcIsPhys && !DstIsPhys) {
+      if (isExtSubReg) {
+        NewRC =
+          SubIdx ? tri_->getMatchingSuperRegClass(SrcRC, DstRC, SubIdx) : SrcRC;
+      } else if (isInsSubReg) {
+        NewRC =
+          SubIdx ? tri_->getMatchingSuperRegClass(DstRC, SrcRC, SubIdx) : DstRC;
+      } else {
+        NewRC = getCommonSubClass(SrcRC, DstRC);
       }
-      if (!DstIsPhys && !SrcIsPhys)
-        NewRC = SrcRC;
-    } else if (!SrcIsPhys && !DstIsPhys) {
-      NewRC = getCommonSubClass(SrcRC, DstRC);
+
       if (!NewRC) {
         DEBUG(dbgs() << "\tDisjoint regclasses: "
                      << SrcRC->getName() << ", "
                      << DstRC->getName() << ".\n");
         return false;           // Not coalescable.
       }
-      if (DstRC->getSize() > SrcRC->getSize())
-        std::swap(LargeReg, SmallReg);
-    }
 
-    // If we are joining two virtual registers and the resulting register
-    // class is more restrictive (fewer register, smaller size). Check if it's
-    // worth doing the merge.
-    if (!SrcIsPhys && !DstIsPhys &&
-        (isExtSubReg || DstRC->isASubClass()) &&
-        !isWinToJoinCrossClass(LargeReg, SmallReg,
-                               allocatableRCRegs_[NewRC].count())) {
-      DEBUG(dbgs() << "\tSrc/Dest are different register classes: "
-                   << SrcRC->getName() << "/"
-                   << DstRC->getName() << " -> "
-                   << NewRC->getName() << ".\n");
-      // Allow the coalescer to try again in case either side gets coalesced to
-      // a physical register that's compatible with the other side. e.g.
-      // r1024 = MOV32to32_ r1025
-      // But later r1024 is assigned EAX then r1025 may be coalesced with EAX.
-      Again = true;  // May be possible to coalesce later.
-      return false;
+      // If we are joining two virtual registers and the resulting register
+      // class is more restrictive (fewer register, smaller size). Check if it's
+      // worth doing the merge.
+      if (!isWinToJoinCrossClass(SrcReg, DstReg, SrcRC, DstRC, NewRC)) {
+        DEBUG(dbgs() << "\tAvoid coalescing to constrained register class: "
+                     << SrcRC->getName() << "/"
+                     << DstRC->getName() << " -> "
+                     << NewRC->getName() << ".\n");
+        // Allow the coalescer to try again in case either side gets coalesced to
+        // a physical register that's compatible with the other side. e.g.
+        // r1024 = MOV32to32_ r1025
+        // But later r1024 is assigned EAX then r1025 may be coalesced with EAX.
+        Again = true;  // May be possible to coalesce later.
+        return false;
+      }
     }
   }
 
@@ -1626,9 +1626,13 @@ bool SimpleRegisterCoalescing::JoinCopy(CopyRec &TheCopy, bool &Again) {
          "Register mapping is horribly broken!");
 
   DEBUG({
-      dbgs() << "\t\tInspecting "; SrcInt.print(dbgs(), tri_);
-      dbgs() << " and "; DstInt.print(dbgs(), tri_);
-      dbgs() << ": ";
+      dbgs() << "\t\tInspecting ";
+      if (SrcRC) dbgs() << SrcRC->getName() << ": ";
+      SrcInt.print(dbgs(), tri_);
+      dbgs() << "\n\t\t       and ";
+      if (DstRC) dbgs() << DstRC->getName() << ": ";
+      DstInt.print(dbgs(), tri_);
+      dbgs() << "\n";
     });
 
   // Save a copy of the virtual register live interval. We'll manually
@@ -1672,10 +1676,9 @@ bool SimpleRegisterCoalescing::JoinCopy(CopyRec &TheCopy, bool &Again) {
       const TargetRegisterClass *RC = mri_->getRegClass(JoinVReg);
       unsigned Threshold = allocatableRCRegs_[RC].count() * 2;
       unsigned Length = li_->getApproximateInstructionCount(JoinVInt);
-      float Ratio = 1.0 / Threshold;
       if (Length > Threshold &&
-          (((float)std::distance(mri_->use_nodbg_begin(JoinVReg),
-                                 mri_->use_nodbg_end()) / Length) < Ratio)) {
+          std::distance(mri_->use_nodbg_begin(JoinVReg),
+                        mri_->use_nodbg_end()) * Threshold < Length) {
         // Before giving up coalescing, if definition of source is defined by
         // trivial computation, try rematerializing it.
         if (ReMaterializeTrivialDef(SrcInt, DstReg, DstSubIdx, CopyMI))
@@ -1701,7 +1704,7 @@ bool SimpleRegisterCoalescing::JoinCopy(CopyRec &TheCopy, bool &Again) {
       // Only coalesce an empty interval (defined by implicit_def) with
       // another interval which has a valno defined by the CopyMI and the CopyMI
       // is a kill of the implicit def.
-      DEBUG(dbgs() << "Not profitable!\n");
+      DEBUG(dbgs() << "\tNot profitable!\n");
       return false;
     }
   } else if (!JoinIntervals(DstInt, SrcInt, Swapped)) {
@@ -1718,12 +1721,12 @@ bool SimpleRegisterCoalescing::JoinCopy(CopyRec &TheCopy, bool &Again) {
         (AdjustCopiesBackFrom(SrcInt, DstInt, CopyMI) ||
          RemoveCopyByCommutingDef(SrcInt, DstInt, CopyMI))) {
       JoinedCopies.insert(CopyMI);
-      DEBUG(dbgs() << "Trivial!\n");
+      DEBUG(dbgs() << "\tTrivial!\n");
       return true;
     }
 
     // Otherwise, we are unable to join the intervals.
-    DEBUG(dbgs() << "Interference!\n");
+    DEBUG(dbgs() << "\tInterference!\n");
     Again = true;  // May be possible to coalesce later.
     return false;
   }
@@ -1794,12 +1797,6 @@ bool SimpleRegisterCoalescing::JoinCopy(CopyRec &TheCopy, bool &Again) {
   // Remember to delete the copy instruction.
   JoinedCopies.insert(CopyMI);
 
-  // Some live range has been lengthened due to colaescing, eliminate the
-  // unnecessary kills.
-  RemoveUnnecessaryKills(SrcReg, *ResDstInt);
-  if (TargetRegisterInfo::isVirtualRegister(DstReg))
-    RemoveUnnecessaryKills(DstReg, *ResDstInt);
-
   UpdateRegDefsUses(SrcReg, DstReg, SubIdx);
 
   // If we have extended the live range of a physical register, make sure we
@@ -1843,7 +1840,7 @@ bool SimpleRegisterCoalescing::JoinCopy(CopyRec &TheCopy, bool &Again) {
   }
 
   DEBUG({
-      dbgs() << "\n\t\tJoined.  Result = ";
+      dbgs() << "\t\tJoined. Result = ";
       ResDstInt->print(dbgs(), tri_);
       dbgs() << "\n";
     });
@@ -2198,7 +2195,7 @@ SimpleRegisterCoalescing::JoinIntervals(LiveInterval &LHS, LiveInterval &RHS,
       for (const unsigned* SR = tri_->getSubRegisters(LHS.reg); *SR; ++SR)
         if (li_->hasInterval(*SR) && RHS.overlaps(li_->getInterval(*SR))) {
           DEBUG({
-              dbgs() << "Interfere with sub-register ";
+              dbgs() << "\tInterfere with sub-register ";
               li_->getInterval(*SR).print(dbgs(), tri_);
             });
           return false;
@@ -2215,7 +2212,7 @@ SimpleRegisterCoalescing::JoinIntervals(LiveInterval &LHS, LiveInterval &RHS,
       for (const unsigned* SR = tri_->getSubRegisters(RHS.reg); *SR; ++SR)
         if (li_->hasInterval(*SR) && LHS.overlaps(li_->getInterval(*SR))) {
           DEBUG({
-              dbgs() << "Interfere with sub-register ";
+              dbgs() << "\tInterfere with sub-register ";
               li_->getInterval(*SR).print(dbgs(), tri_);
             });
           return false;
@@ -2673,13 +2670,6 @@ SimpleRegisterCoalescing::lastRegisterUse(SlotIndex Start,
   return NULL;
 }
 
-void SimpleRegisterCoalescing::printRegName(unsigned reg) const {
-  if (TargetRegisterInfo::isPhysicalRegister(reg))
-    dbgs() << tri_->getName(reg);
-  else
-    dbgs() << "%reg" << reg;
-}
-
 void SimpleRegisterCoalescing::releaseMemory() {
   JoinedCopies.clear();
   ReMatCopies.clear();
@@ -2744,7 +2734,7 @@ bool SimpleRegisterCoalescing::runOnMachineFunction(MachineFunction &fn) {
             // delete them later.
             DoDelete = false;
         }
-        if (MI->registerDefIsDead(DstReg)) {
+        if (MI->allDefsAreDead()) {
           LiveInterval &li = li_->getInterval(DstReg);
           if (!ShortenDeadCopySrcLiveRange(li, MI))
             ShortenDeadCopyLiveRange(li, MI);
@@ -2808,8 +2798,25 @@ bool SimpleRegisterCoalescing::runOnMachineFunction(MachineFunction &fn) {
         li_->RemoveMachineInstrFromMaps(MI);
         mii = mbbi->erase(mii);
         ++numPeep;
-      } else {
-        ++mii;
+        continue;
+      }
+
+      ++mii;
+
+      // Check for now unnecessary kill flags.
+      if (li_->isNotInMIMap(MI)) continue;
+      SlotIndex UseIdx = li_->getInstructionIndex(MI).getUseIndex();
+      for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+        MachineOperand &MO = MI->getOperand(i);
+        if (!MO.isReg() || !MO.isKill()) continue;
+        unsigned reg = MO.getReg();
+        if (!reg || !li_->hasInterval(reg)) continue;
+        LiveInterval &LI = li_->getInterval(reg);
+        const LiveRange *LR = LI.getLiveRangeContaining(UseIdx);
+        if (!LR ||
+            (!LR->valno->isKill(UseIdx.getDefIndex()) &&
+             LR->valno->def != UseIdx.getDefIndex()))
+          MO.setIsKill(false);
       }
     }
   }
diff --git a/lib/CodeGen/SimpleRegisterCoalescing.h b/lib/CodeGen/SimpleRegisterCoalescing.h
index f668064ab08e..1be04f32aa69 100644
--- a/lib/CodeGen/SimpleRegisterCoalescing.h
+++ b/lib/CodeGen/SimpleRegisterCoalescing.h
@@ -179,8 +179,11 @@ namespace llvm {
 
     /// isWinToJoinCrossClass - Return true if it's profitable to coalesce
     /// two virtual registers from different register classes.
-    bool isWinToJoinCrossClass(unsigned LargeReg, unsigned SmallReg,
-                               unsigned Threshold);
+    bool isWinToJoinCrossClass(unsigned SrcReg,
+                               unsigned DstReg,
+                               const TargetRegisterClass *SrcRC,
+                               const TargetRegisterClass *DstRC,
+                               const TargetRegisterClass *NewRC);
 
     /// HasIncompatibleSubRegDefUse - If we are trying to coalesce a virtual
     /// register with a physical register, check if any of the virtual register
@@ -220,10 +223,6 @@ namespace llvm {
     /// subregister.
     void UpdateRegDefsUses(unsigned SrcReg, unsigned DstReg, unsigned SubIdx);
 
-    /// RemoveUnnecessaryKills - Remove kill markers that are no longer accurate
-    /// due to live range lengthening as the result of coalescing.
-    void RemoveUnnecessaryKills(unsigned Reg, LiveInterval &LI);
-
     /// ShortenDeadCopyLiveRange - Shorten a live range defined by a dead copy.
     /// Return true if live interval is removed.
     bool ShortenDeadCopyLiveRange(LiveInterval &li, MachineInstr *CopyMI);
@@ -243,8 +242,6 @@ namespace llvm {
     /// cycles Start and End or NULL if there are no uses.
     MachineOperand *lastRegisterUse(SlotIndex Start, SlotIndex End,
                                     unsigned Reg, SlotIndex &LastUseIdx) const;
-
-    void printRegName(unsigned reg) const;
   };
 
 } // End llvm namespace
diff --git a/lib/CodeGen/Spiller.cpp b/lib/CodeGen/Spiller.cpp
index 7ba44031714b..63c55549a3ea 100644
--- a/lib/CodeGen/Spiller.cpp
+++ b/lib/CodeGen/Spiller.cpp
@@ -46,7 +46,6 @@ namespace {
 /// Utility class for spillers.
 class SpillerBase : public Spiller {
 protected:
-
   MachineFunction *mf;
   LiveIntervals *lis;
   MachineFrameInfo *mfi;
@@ -160,9 +159,11 @@ protected:
 
     return added;
   }
-
 };
 
+} // end anonymous namespace
+
+namespace {
 
 /// Spills any live range using the spill-everywhere method with no attempt at
 /// folding.
@@ -178,9 +179,12 @@ public:
     // Ignore spillIs - we don't use it.
     return trivialSpillEverywhere(li);
   }
-
 };
 
+} // end anonymous namespace
+
+namespace {
+
 /// Falls back on LiveIntervals::addIntervalsForSpills.
 class StandardSpiller : public Spiller {
 protected:
@@ -198,9 +202,12 @@ public:
                                    SlotIndex*) {
     return lis->addIntervalsForSpills(*li, spillIs, loopInfo, *vrm);
   }
-
 };
 
+} // end anonymous namespace
+
+namespace {
+
 /// When a call to spill is placed this spiller will first try to break the
 /// interval up into its component values (one new interval per value).
 /// If this fails, or if a call is placed to spill a previously split interval
@@ -513,15 +520,16 @@ private:
 
 };
 
-}
+} // end anonymous namespace
+
 
 llvm::Spiller* llvm::createSpiller(MachineFunction *mf, LiveIntervals *lis,
                                    const MachineLoopInfo *loopInfo,
                                    VirtRegMap *vrm) {
   switch (spillerOpt) {
-    case trivial: return new TrivialSpiller(mf, lis, vrm); break;
-    case standard: return new StandardSpiller(lis, loopInfo, vrm); break;
-    case splitting: return new SplittingSpiller(mf, lis, loopInfo, vrm); break;
-    default: llvm_unreachable("Unreachable!"); break;
+  default: assert(0 && "unknown spiller");
+  case trivial: return new TrivialSpiller(mf, lis, vrm);
+  case standard: return new StandardSpiller(lis, loopInfo, vrm);
+  case splitting: return new SplittingSpiller(mf, lis, loopInfo, vrm);
   }
 }
diff --git a/lib/CodeGen/StackSlotColoring.cpp b/lib/CodeGen/StackSlotColoring.cpp
index 12d38f0a76d0..42dfd7fbcb08 100644
--- a/lib/CodeGen/StackSlotColoring.cpp
+++ b/lib/CodeGen/StackSlotColoring.cpp
@@ -182,7 +182,8 @@ void StackSlotColoring::ScanForSpillSlotRefs(MachineFunction &MF) {
         if (!LS->hasInterval(FI))
           continue;
         LiveInterval &li = LS->getInterval(FI);
-        li.weight += LiveIntervals::getSpillWeight(false, true, loopDepth);
+        if (!MI->isDebugValue())
+          li.weight += LiveIntervals::getSpillWeight(false, true, loopDepth);
         SSRefs[FI].push_back(MI);
       }
     }
diff --git a/lib/CodeGen/TargetInstrInfoImpl.cpp b/lib/CodeGen/TargetInstrInfoImpl.cpp
index e9e998f81d55..0ad6619ac4fd 100644
--- a/lib/CodeGen/TargetInstrInfoImpl.cpp
+++ b/lib/CodeGen/TargetInstrInfoImpl.cpp
@@ -40,7 +40,7 @@ MachineInstr *TargetInstrInfoImpl::commuteInstruction(MachineInstr *MI,
     std::string msg;
     raw_string_ostream Msg(msg);
     Msg << "Don't know how to commute: " << *MI;
-    llvm_report_error(Msg.str());
+    report_fatal_error(Msg.str());
   }
 
   assert(MI->getOperand(Idx1).isReg() && MI->getOperand(Idx2).isReg() &&
diff --git a/lib/CodeGen/TargetLoweringObjectFileImpl.cpp b/lib/CodeGen/TargetLoweringObjectFileImpl.cpp
index d6bdb1040f3e..9f959932e6d5 100644
--- a/lib/CodeGen/TargetLoweringObjectFileImpl.cpp
+++ b/lib/CodeGen/TargetLoweringObjectFileImpl.cpp
@@ -38,110 +38,88 @@ using namespace dwarf;
 //===----------------------------------------------------------------------===//
 //                                  ELF
 //===----------------------------------------------------------------------===//
-typedef StringMap<const MCSectionELF*> ELFUniqueMapTy;
-
-TargetLoweringObjectFileELF::~TargetLoweringObjectFileELF() {
-  // If we have the section uniquing map, free it.
-  delete (ELFUniqueMapTy*)UniquingMap;
-}
-
-const MCSection *TargetLoweringObjectFileELF::
-getELFSection(StringRef Section, unsigned Type, unsigned Flags,
-              SectionKind Kind, bool IsExplicit) const {
-  if (UniquingMap == 0)
-    UniquingMap = new ELFUniqueMapTy();
-  ELFUniqueMapTy &Map = *(ELFUniqueMapTy*)UniquingMap;
-
-  // Do the lookup, if we have a hit, return it.
-  StringMapEntry<const MCSectionELF*> &Entry = Map.GetOrCreateValue(Section);
-  if (Entry.getValue()) return Entry.getValue();
-
-  MCSectionELF *Result = MCSectionELF::Create(Entry.getKey(), Type, Flags, Kind,
-                                              IsExplicit, getContext());
-  Entry.setValue(Result);
-  return Result;
-}
 
 void TargetLoweringObjectFileELF::Initialize(MCContext &Ctx,
                                              const TargetMachine &TM) {
-  if (UniquingMap != 0)
-    ((ELFUniqueMapTy*)UniquingMap)->clear();
   TargetLoweringObjectFile::Initialize(Ctx, TM);
 
   BSSSection =
-    getELFSection(".bss", MCSectionELF::SHT_NOBITS,
-                  MCSectionELF::SHF_WRITE | MCSectionELF::SHF_ALLOC,
-                  SectionKind::getBSS());
+    getContext().getELFSection(".bss", MCSectionELF::SHT_NOBITS,
+                               MCSectionELF::SHF_WRITE |MCSectionELF::SHF_ALLOC,
+                               SectionKind::getBSS());
 
   TextSection =
-    getELFSection(".text", MCSectionELF::SHT_PROGBITS,
-                  MCSectionELF::SHF_EXECINSTR | MCSectionELF::SHF_ALLOC,
-                  SectionKind::getText());
+    getContext().getELFSection(".text", MCSectionELF::SHT_PROGBITS,
+                               MCSectionELF::SHF_EXECINSTR |
+                               MCSectionELF::SHF_ALLOC,
+                               SectionKind::getText());
 
   DataSection =
-    getELFSection(".data", MCSectionELF::SHT_PROGBITS,
-                  MCSectionELF::SHF_WRITE | MCSectionELF::SHF_ALLOC,
-                  SectionKind::getDataRel());
+    getContext().getELFSection(".data", MCSectionELF::SHT_PROGBITS,
+                               MCSectionELF::SHF_WRITE |MCSectionELF::SHF_ALLOC,
+                               SectionKind::getDataRel());
 
   ReadOnlySection =
-    getELFSection(".rodata", MCSectionELF::SHT_PROGBITS,
-                  MCSectionELF::SHF_ALLOC,
-                  SectionKind::getReadOnly());
+    getContext().getELFSection(".rodata", MCSectionELF::SHT_PROGBITS,
+                               MCSectionELF::SHF_ALLOC,
+                               SectionKind::getReadOnly());
 
   TLSDataSection =
-    getELFSection(".tdata", MCSectionELF::SHT_PROGBITS,
-                  MCSectionELF::SHF_ALLOC | MCSectionELF::SHF_TLS |
-                  MCSectionELF::SHF_WRITE, SectionKind::getThreadData());
+    getContext().getELFSection(".tdata", MCSectionELF::SHT_PROGBITS,
+                               MCSectionELF::SHF_ALLOC | MCSectionELF::SHF_TLS |
+                               MCSectionELF::SHF_WRITE,
+                               SectionKind::getThreadData());
 
   TLSBSSSection =
-    getELFSection(".tbss", MCSectionELF::SHT_NOBITS,
-                  MCSectionELF::SHF_ALLOC | MCSectionELF::SHF_TLS |
-                  MCSectionELF::SHF_WRITE, SectionKind::getThreadBSS());
+    getContext().getELFSection(".tbss", MCSectionELF::SHT_NOBITS,
+                               MCSectionELF::SHF_ALLOC | MCSectionELF::SHF_TLS |
+                               MCSectionELF::SHF_WRITE,
+                               SectionKind::getThreadBSS());
 
   DataRelSection =
-    getELFSection(".data.rel", MCSectionELF::SHT_PROGBITS,
-                  MCSectionELF::SHF_ALLOC | MCSectionELF::SHF_WRITE,
-                  SectionKind::getDataRel());
+    getContext().getELFSection(".data.rel", MCSectionELF::SHT_PROGBITS,
+                               MCSectionELF::SHF_ALLOC |MCSectionELF::SHF_WRITE,
+                               SectionKind::getDataRel());
 
   DataRelLocalSection =
-    getELFSection(".data.rel.local", MCSectionELF::SHT_PROGBITS,
-                  MCSectionELF::SHF_ALLOC | MCSectionELF::SHF_WRITE,
-                  SectionKind::getDataRelLocal());
+    getContext().getELFSection(".data.rel.local", MCSectionELF::SHT_PROGBITS,
+                               MCSectionELF::SHF_ALLOC |MCSectionELF::SHF_WRITE,
+                               SectionKind::getDataRelLocal());
 
   DataRelROSection =
-    getELFSection(".data.rel.ro", MCSectionELF::SHT_PROGBITS,
-                  MCSectionELF::SHF_ALLOC | MCSectionELF::SHF_WRITE,
-                  SectionKind::getReadOnlyWithRel());
+    getContext().getELFSection(".data.rel.ro", MCSectionELF::SHT_PROGBITS,
+                               MCSectionELF::SHF_ALLOC |MCSectionELF::SHF_WRITE,
+                               SectionKind::getReadOnlyWithRel());
 
   DataRelROLocalSection =
-    getELFSection(".data.rel.ro.local", MCSectionELF::SHT_PROGBITS,
-                  MCSectionELF::SHF_ALLOC | MCSectionELF::SHF_WRITE,
-                  SectionKind::getReadOnlyWithRelLocal());
+    getContext().getELFSection(".data.rel.ro.local", MCSectionELF::SHT_PROGBITS,
+                               MCSectionELF::SHF_ALLOC |MCSectionELF::SHF_WRITE,
+                               SectionKind::getReadOnlyWithRelLocal());
 
   MergeableConst4Section =
-    getELFSection(".rodata.cst4", MCSectionELF::SHT_PROGBITS,
-                  MCSectionELF::SHF_ALLOC | MCSectionELF::SHF_MERGE,
-                  SectionKind::getMergeableConst4());
+    getContext().getELFSection(".rodata.cst4", MCSectionELF::SHT_PROGBITS,
+                               MCSectionELF::SHF_ALLOC |MCSectionELF::SHF_MERGE,
+                               SectionKind::getMergeableConst4());
 
   MergeableConst8Section =
-    getELFSection(".rodata.cst8", MCSectionELF::SHT_PROGBITS,
-                  MCSectionELF::SHF_ALLOC | MCSectionELF::SHF_MERGE,
-                  SectionKind::getMergeableConst8());
+    getContext().getELFSection(".rodata.cst8", MCSectionELF::SHT_PROGBITS,
+                               MCSectionELF::SHF_ALLOC |MCSectionELF::SHF_MERGE,
+                               SectionKind::getMergeableConst8());
 
   MergeableConst16Section =
-    getELFSection(".rodata.cst16", MCSectionELF::SHT_PROGBITS,
-                  MCSectionELF::SHF_ALLOC | MCSectionELF::SHF_MERGE,
-                  SectionKind::getMergeableConst16());
+    getContext().getELFSection(".rodata.cst16", MCSectionELF::SHT_PROGBITS,
+                               MCSectionELF::SHF_ALLOC |MCSectionELF::SHF_MERGE,
+                               SectionKind::getMergeableConst16());
 
   StaticCtorSection =
-    getELFSection(".ctors", MCSectionELF::SHT_PROGBITS,
-                  MCSectionELF::SHF_ALLOC | MCSectionELF::SHF_WRITE,
-                  SectionKind::getDataRel());
+    getContext().getELFSection(".ctors", MCSectionELF::SHT_PROGBITS,
+                               MCSectionELF::SHF_ALLOC |MCSectionELF::SHF_WRITE,
+                               SectionKind::getDataRel());
 
   StaticDtorSection =
-    getELFSection(".dtors", MCSectionELF::SHT_PROGBITS,
-                  MCSectionELF::SHF_ALLOC | MCSectionELF::SHF_WRITE,
-                  SectionKind::getDataRel());
+    getContext().getELFSection(".dtors", MCSectionELF::SHT_PROGBITS,
+                               MCSectionELF::SHF_ALLOC |MCSectionELF::SHF_WRITE,
+                               SectionKind::getDataRel());
 
   // Exception Handling Sections.
 
@@ -150,47 +128,48 @@ void TargetLoweringObjectFileELF::Initialize(MCContext &Ctx,
   // runtime hit for C++ apps.  Either the contents of the LSDA need to be
   // adjusted or this should be a data section.
   LSDASection =
-    getELFSection(".gcc_except_table", MCSectionELF::SHT_PROGBITS,
-                  MCSectionELF::SHF_ALLOC, SectionKind::getReadOnly());
+    getContext().getELFSection(".gcc_except_table", MCSectionELF::SHT_PROGBITS,
+                               MCSectionELF::SHF_ALLOC,
+                               SectionKind::getReadOnly());
   EHFrameSection =
-    getELFSection(".eh_frame", MCSectionELF::SHT_PROGBITS,
-                  MCSectionELF::SHF_ALLOC | MCSectionELF::SHF_WRITE,
-                  SectionKind::getDataRel());
+    getContext().getELFSection(".eh_frame", MCSectionELF::SHT_PROGBITS,
+                               MCSectionELF::SHF_ALLOC |MCSectionELF::SHF_WRITE,
+                               SectionKind::getDataRel());
 
   // Debug Info Sections.
   DwarfAbbrevSection =
-    getELFSection(".debug_abbrev", MCSectionELF::SHT_PROGBITS, 0,
-                  SectionKind::getMetadata());
+    getContext().getELFSection(".debug_abbrev", MCSectionELF::SHT_PROGBITS, 0,
+                               SectionKind::getMetadata());
   DwarfInfoSection =
-    getELFSection(".debug_info", MCSectionELF::SHT_PROGBITS, 0,
-                  SectionKind::getMetadata());
+    getContext().getELFSection(".debug_info", MCSectionELF::SHT_PROGBITS, 0,
+                               SectionKind::getMetadata());
   DwarfLineSection =
-    getELFSection(".debug_line", MCSectionELF::SHT_PROGBITS, 0,
-                  SectionKind::getMetadata());
+    getContext().getELFSection(".debug_line", MCSectionELF::SHT_PROGBITS, 0,
+                               SectionKind::getMetadata());
   DwarfFrameSection =
-    getELFSection(".debug_frame", MCSectionELF::SHT_PROGBITS, 0,
-                  SectionKind::getMetadata());
+    getContext().getELFSection(".debug_frame", MCSectionELF::SHT_PROGBITS, 0,
+                               SectionKind::getMetadata());
   DwarfPubNamesSection =
-    getELFSection(".debug_pubnames", MCSectionELF::SHT_PROGBITS, 0,
-                  SectionKind::getMetadata());
+    getContext().getELFSection(".debug_pubnames", MCSectionELF::SHT_PROGBITS, 0,
+                               SectionKind::getMetadata());
   DwarfPubTypesSection =
-    getELFSection(".debug_pubtypes", MCSectionELF::SHT_PROGBITS, 0,
-                  SectionKind::getMetadata());
+    getContext().getELFSection(".debug_pubtypes", MCSectionELF::SHT_PROGBITS, 0,
+                               SectionKind::getMetadata());
   DwarfStrSection =
-    getELFSection(".debug_str", MCSectionELF::SHT_PROGBITS, 0,
-                  SectionKind::getMetadata());
+    getContext().getELFSection(".debug_str", MCSectionELF::SHT_PROGBITS, 0,
+                               SectionKind::getMetadata());
   DwarfLocSection =
-    getELFSection(".debug_loc", MCSectionELF::SHT_PROGBITS, 0,
-                  SectionKind::getMetadata());
+    getContext().getELFSection(".debug_loc", MCSectionELF::SHT_PROGBITS, 0,
+                               SectionKind::getMetadata());
   DwarfARangesSection =
-    getELFSection(".debug_aranges", MCSectionELF::SHT_PROGBITS, 0,
-                  SectionKind::getMetadata());
+    getContext().getELFSection(".debug_aranges", MCSectionELF::SHT_PROGBITS, 0,
+                               SectionKind::getMetadata());
   DwarfRangesSection =
-    getELFSection(".debug_ranges", MCSectionELF::SHT_PROGBITS, 0,
-                  SectionKind::getMetadata());
+    getContext().getELFSection(".debug_ranges", MCSectionELF::SHT_PROGBITS, 0,
+                               SectionKind::getMetadata());
   DwarfMacroInfoSection =
-    getELFSection(".debug_macinfo", MCSectionELF::SHT_PROGBITS, 0,
-                  SectionKind::getMetadata());
+    getContext().getELFSection(".debug_macinfo", MCSectionELF::SHT_PROGBITS, 0,
+                               SectionKind::getMetadata());
 }
 
 
@@ -279,9 +258,9 @@ getExplicitSectionGlobal(const GlobalValue *GV, SectionKind Kind,
   // Infer section flags from the section name if we can.
   Kind = getELFKindForNamedSection(SectionName, Kind);
 
-  return getELFSection(SectionName,
-                       getELFSectionType(SectionName, Kind),
-                       getELFSectionFlags(Kind), Kind, true);
+  return getContext().getELFSection(SectionName,
+                                    getELFSectionType(SectionName, Kind),
+                                    getELFSectionFlags(Kind), Kind, true);
 }
 
 static const char *getSectionPrefixForUniqueGlobal(SectionKind Kind) {
@@ -300,19 +279,54 @@ static const char *getSectionPrefixForUniqueGlobal(SectionKind Kind) {
   return ".gnu.linkonce.d.rel.ro.";
 }
 
+/// getSectionPrefixForGlobal - Return the section prefix name used by options
+/// FunctionsSections and DataSections.
+static const char *getSectionPrefixForGlobal(SectionKind Kind) {
+  if (Kind.isText())                 return ".text.";
+  if (Kind.isReadOnly())             return ".rodata.";
+
+  if (Kind.isThreadData())           return ".tdata.";
+  if (Kind.isThreadBSS())            return ".tbss.";
+
+  if (Kind.isDataNoRel())            return ".data.";
+  if (Kind.isDataRelLocal())         return ".data.rel.local.";
+  if (Kind.isDataRel())              return ".data.rel.";
+  if (Kind.isReadOnlyWithRelLocal()) return ".data.rel.ro.local.";
+
+  assert(Kind.isReadOnlyWithRel() && "Unknown section kind");
+  return ".data.rel.ro.";
+}
+
+
 const MCSection *TargetLoweringObjectFileELF::
 SelectSectionForGlobal(const GlobalValue *GV, SectionKind Kind,
                        Mangler *Mang, const TargetMachine &TM) const {
+  // If we have -ffunction-section or -fdata-section then we should emit the
+  // global value to a uniqued section specifically for it.
+  bool EmitUniquedSection;
+  if (Kind.isText())
+    EmitUniquedSection = TM.getFunctionSections();
+  else 
+    EmitUniquedSection = TM.getDataSections();
 
   // If this global is linkonce/weak and the target handles this by emitting it
   // into a 'uniqued' section name, create and return the section now.
-  if (GV->isWeakForLinker() && !Kind.isCommon() && !Kind.isBSS()) {
-    const char *Prefix = getSectionPrefixForUniqueGlobal(Kind);
+  if ((GV->isWeakForLinker() || EmitUniquedSection) &&
+      !Kind.isCommon() && !Kind.isBSS()) {
+    const char *Prefix;
+    if (GV->isWeakForLinker())
+      Prefix = getSectionPrefixForUniqueGlobal(Kind);
+    else {
+      assert(EmitUniquedSection);
+      Prefix = getSectionPrefixForGlobal(Kind);
+    }
+
     SmallString<128> Name(Prefix, Prefix+strlen(Prefix));
     MCSymbol *Sym = Mang->getSymbol(GV);
     Name.append(Sym->getName().begin(), Sym->getName().end());
-    return getELFSection(Name.str(), getELFSectionType(Name.str(), Kind),
-                         getELFSectionFlags(Kind), Kind);
+    return getContext().getELFSection(Name.str(),
+                                      getELFSectionType(Name.str(), Kind),
+                                      getELFSectionFlags(Kind), Kind);
   }
 
   if (Kind.isText()) return TextSection;
@@ -337,11 +351,11 @@ SelectSectionForGlobal(const GlobalValue *GV, SectionKind Kind,
 
 
     std::string Name = SizeSpec + utostr(Align);
-    return getELFSection(Name, MCSectionELF::SHT_PROGBITS,
-                         MCSectionELF::SHF_ALLOC |
-                         MCSectionELF::SHF_MERGE |
-                         MCSectionELF::SHF_STRINGS,
-                         Kind);
+    return getContext().getELFSection(Name, MCSectionELF::SHT_PROGBITS,
+                                      MCSectionELF::SHF_ALLOC |
+                                      MCSectionELF::SHF_MERGE |
+                                      MCSectionELF::SHF_STRINGS,
+                                      Kind);
   }
 
   if (Kind.isMergeableConst()) {
@@ -426,43 +440,6 @@ getExprForDwarfGlobalReference(const GlobalValue *GV, Mangler *Mang,
 //                                 MachO
 //===----------------------------------------------------------------------===//
 
-typedef StringMap<const MCSectionMachO*> MachOUniqueMapTy;
-
-TargetLoweringObjectFileMachO::~TargetLoweringObjectFileMachO() {
-  // If we have the MachO uniquing map, free it.
-  delete (MachOUniqueMapTy*)UniquingMap;
-}
-
-
-const MCSectionMachO *TargetLoweringObjectFileMachO::
-getMachOSection(StringRef Segment, StringRef Section,
-                unsigned TypeAndAttributes,
-                unsigned Reserved2, SectionKind Kind) const {
-  // We unique sections by their segment/section pair.  The returned section
-  // may not have the same flags as the requested section, if so this should be
-  // diagnosed by the client as an error.
-
-  // Create the map if it doesn't already exist.
-  if (UniquingMap == 0)
-    UniquingMap = new MachOUniqueMapTy();
-  MachOUniqueMapTy &Map = *(MachOUniqueMapTy*)UniquingMap;
-
-  // Form the name to look up.
-  SmallString<64> Name;
-  Name += Segment;
-  Name.push_back(',');
-  Name += Section;
-
-  // Do the lookup, if we have a hit, return it.
-  const MCSectionMachO *&Entry = Map[Name.str()];
-  if (Entry) return Entry;
-
-  // Otherwise, return a new section.
-  return Entry = MCSectionMachO::Create(Segment, Section, TypeAndAttributes,
-                                        Reserved2, Kind, getContext());
-}
-
-
 void TargetLoweringObjectFileMachO::Initialize(MCContext &Ctx,
                                                const TargetMachine &TM) {
   // _foo.eh symbols are currently always exported so that the linker knows
@@ -473,29 +450,31 @@ void TargetLoweringObjectFileMachO::Initialize(MCContext &Ctx,
   IsFunctionEHFrameSymbolPrivate = false;
   SupportsWeakOmittedEHFrame = false;
   
-  if (UniquingMap != 0)
-    ((MachOUniqueMapTy*)UniquingMap)->clear();
   TargetLoweringObjectFile::Initialize(Ctx, TM);
 
   TextSection // .text
-    = getMachOSection("__TEXT", "__text",
-                      MCSectionMachO::S_ATTR_PURE_INSTRUCTIONS,
-                      SectionKind::getText());
+    = getContext().getMachOSection("__TEXT", "__text",
+                                   MCSectionMachO::S_ATTR_PURE_INSTRUCTIONS,
+                                   SectionKind::getText());
   DataSection // .data
-    = getMachOSection("__DATA", "__data", 0, SectionKind::getDataRel());
+    = getContext().getMachOSection("__DATA", "__data", 0,
+                                   SectionKind::getDataRel());
 
   CStringSection // .cstring
-    = getMachOSection("__TEXT", "__cstring", MCSectionMachO::S_CSTRING_LITERALS,
-                      SectionKind::getMergeable1ByteCString());
+    = getContext().getMachOSection("__TEXT", "__cstring", 
+                                   MCSectionMachO::S_CSTRING_LITERALS,
+                                   SectionKind::getMergeable1ByteCString());
   UStringSection
-    = getMachOSection("__TEXT","__ustring", 0,
-                      SectionKind::getMergeable2ByteCString());
+    = getContext().getMachOSection("__TEXT","__ustring", 0,
+                                   SectionKind::getMergeable2ByteCString());
   FourByteConstantSection // .literal4
-    = getMachOSection("__TEXT", "__literal4", MCSectionMachO::S_4BYTE_LITERALS,
-                      SectionKind::getMergeableConst4());
+    = getContext().getMachOSection("__TEXT", "__literal4",
+                                   MCSectionMachO::S_4BYTE_LITERALS,
+                                   SectionKind::getMergeableConst4());
   EightByteConstantSection // .literal8
-    = getMachOSection("__TEXT", "__literal8", MCSectionMachO::S_8BYTE_LITERALS,
-                      SectionKind::getMergeableConst8());
+    = getContext().getMachOSection("__TEXT", "__literal8", 
+                                   MCSectionMachO::S_8BYTE_LITERALS,
+                                   SectionKind::getMergeableConst8());
 
   // ld_classic doesn't support .literal16 in 32-bit mode, and ld64 falls back
   // to using it in -static mode.
@@ -503,110 +482,130 @@ void TargetLoweringObjectFileMachO::Initialize(MCContext &Ctx,
   if (TM.getRelocationModel() != Reloc::Static &&
       TM.getTargetData()->getPointerSize() == 32)
     SixteenByteConstantSection =   // .literal16
-      getMachOSection("__TEXT", "__literal16",MCSectionMachO::S_16BYTE_LITERALS,
-                      SectionKind::getMergeableConst16());
+      getContext().getMachOSection("__TEXT", "__literal16",
+                                   MCSectionMachO::S_16BYTE_LITERALS,
+                                   SectionKind::getMergeableConst16());
 
   ReadOnlySection  // .const
-    = getMachOSection("__TEXT", "__const", 0, SectionKind::getReadOnly());
+    = getContext().getMachOSection("__TEXT", "__const", 0,
+                                   SectionKind::getReadOnly());
 
   TextCoalSection
-    = getMachOSection("__TEXT", "__textcoal_nt",
-                      MCSectionMachO::S_COALESCED |
-                      MCSectionMachO::S_ATTR_PURE_INSTRUCTIONS,
-                      SectionKind::getText());
+    = getContext().getMachOSection("__TEXT", "__textcoal_nt",
+                                   MCSectionMachO::S_COALESCED |
+                                   MCSectionMachO::S_ATTR_PURE_INSTRUCTIONS,
+                                   SectionKind::getText());
   ConstTextCoalSection
-    = getMachOSection("__TEXT", "__const_coal", MCSectionMachO::S_COALESCED,
-                      SectionKind::getText());
+    = getContext().getMachOSection("__TEXT", "__const_coal", 
+                                   MCSectionMachO::S_COALESCED,
+                                   SectionKind::getText());
   ConstDataCoalSection
-    = getMachOSection("__DATA","__const_coal", MCSectionMachO::S_COALESCED,
-                      SectionKind::getText());
+    = getContext().getMachOSection("__DATA","__const_coal",
+                                   MCSectionMachO::S_COALESCED,
+                                   SectionKind::getText());
   ConstDataSection  // .const_data
-    = getMachOSection("__DATA", "__const", 0,
-                      SectionKind::getReadOnlyWithRel());
+    = getContext().getMachOSection("__DATA", "__const", 0,
+                                   SectionKind::getReadOnlyWithRel());
   DataCoalSection
-    = getMachOSection("__DATA","__datacoal_nt", MCSectionMachO::S_COALESCED,
-                      SectionKind::getDataRel());
+    = getContext().getMachOSection("__DATA","__datacoal_nt", 
+                                   MCSectionMachO::S_COALESCED,
+                                   SectionKind::getDataRel());
   DataCommonSection
-    = getMachOSection("__DATA","__common", MCSectionMachO::S_ZEROFILL,
-                      SectionKind::getBSS());
+    = getContext().getMachOSection("__DATA","__common",
+                                   MCSectionMachO::S_ZEROFILL,
+                                   SectionKind::getBSS());
   DataBSSSection
-    = getMachOSection("__DATA","__bss", MCSectionMachO::S_ZEROFILL,
-                    SectionKind::getBSS());
+    = getContext().getMachOSection("__DATA","__bss", MCSectionMachO::S_ZEROFILL,
+                                   SectionKind::getBSS());
   
 
   LazySymbolPointerSection
-    = getMachOSection("__DATA", "__la_symbol_ptr",
-                      MCSectionMachO::S_LAZY_SYMBOL_POINTERS,
-                      SectionKind::getMetadata());
+    = getContext().getMachOSection("__DATA", "__la_symbol_ptr",
+                                   MCSectionMachO::S_LAZY_SYMBOL_POINTERS,
+                                   SectionKind::getMetadata());
   NonLazySymbolPointerSection
-    = getMachOSection("__DATA", "__nl_symbol_ptr",
-                      MCSectionMachO::S_NON_LAZY_SYMBOL_POINTERS,
-                      SectionKind::getMetadata());
+    = getContext().getMachOSection("__DATA", "__nl_symbol_ptr",
+                                   MCSectionMachO::S_NON_LAZY_SYMBOL_POINTERS,
+                                   SectionKind::getMetadata());
 
   if (TM.getRelocationModel() == Reloc::Static) {
     StaticCtorSection
-      = getMachOSection("__TEXT", "__constructor", 0,SectionKind::getDataRel());
+      = getContext().getMachOSection("__TEXT", "__constructor", 0,
+                                     SectionKind::getDataRel());
     StaticDtorSection
-      = getMachOSection("__TEXT", "__destructor", 0, SectionKind::getDataRel());
+      = getContext().getMachOSection("__TEXT", "__destructor", 0,
+                                     SectionKind::getDataRel());
   } else {
     StaticCtorSection
-      = getMachOSection("__DATA", "__mod_init_func",
-                        MCSectionMachO::S_MOD_INIT_FUNC_POINTERS,
-                        SectionKind::getDataRel());
+      = getContext().getMachOSection("__DATA", "__mod_init_func",
+                                     MCSectionMachO::S_MOD_INIT_FUNC_POINTERS,
+                                     SectionKind::getDataRel());
     StaticDtorSection
-      = getMachOSection("__DATA", "__mod_term_func",
-                        MCSectionMachO::S_MOD_TERM_FUNC_POINTERS,
-                        SectionKind::getDataRel());
+      = getContext().getMachOSection("__DATA", "__mod_term_func",
+                                     MCSectionMachO::S_MOD_TERM_FUNC_POINTERS,
+                                     SectionKind::getDataRel());
   }
 
   // Exception Handling.
-  LSDASection = getMachOSection("__TEXT", "__gcc_except_tab", 0,
-                                SectionKind::getReadOnlyWithRel());
+  LSDASection = getContext().getMachOSection("__TEXT", "__gcc_except_tab", 0,
+                                             SectionKind::getReadOnlyWithRel());
   EHFrameSection =
-    getMachOSection("__TEXT", "__eh_frame",
-                    MCSectionMachO::S_COALESCED |
-                    MCSectionMachO::S_ATTR_NO_TOC |
-                    MCSectionMachO::S_ATTR_STRIP_STATIC_SYMS |
-                    MCSectionMachO::S_ATTR_LIVE_SUPPORT,
-                    SectionKind::getReadOnly());
+    getContext().getMachOSection("__TEXT", "__eh_frame",
+                                 MCSectionMachO::S_COALESCED |
+                                 MCSectionMachO::S_ATTR_NO_TOC |
+                                 MCSectionMachO::S_ATTR_STRIP_STATIC_SYMS |
+                                 MCSectionMachO::S_ATTR_LIVE_SUPPORT,
+                                 SectionKind::getReadOnly());
 
   // Debug Information.
   DwarfAbbrevSection =
-    getMachOSection("__DWARF", "__debug_abbrev", MCSectionMachO::S_ATTR_DEBUG,
-                    SectionKind::getMetadata());
+    getContext().getMachOSection("__DWARF", "__debug_abbrev", 
+                                 MCSectionMachO::S_ATTR_DEBUG,
+                                 SectionKind::getMetadata());
   DwarfInfoSection =
-    getMachOSection("__DWARF", "__debug_info", MCSectionMachO::S_ATTR_DEBUG,
-                    SectionKind::getMetadata());
+    getContext().getMachOSection("__DWARF", "__debug_info",
+                                 MCSectionMachO::S_ATTR_DEBUG,
+                                 SectionKind::getMetadata());
   DwarfLineSection =
-    getMachOSection("__DWARF", "__debug_line", MCSectionMachO::S_ATTR_DEBUG,
-                    SectionKind::getMetadata());
+    getContext().getMachOSection("__DWARF", "__debug_line",
+                                 MCSectionMachO::S_ATTR_DEBUG,
+                                 SectionKind::getMetadata());
   DwarfFrameSection =
-    getMachOSection("__DWARF", "__debug_frame", MCSectionMachO::S_ATTR_DEBUG,
-                    SectionKind::getMetadata());
+    getContext().getMachOSection("__DWARF", "__debug_frame",
+                                 MCSectionMachO::S_ATTR_DEBUG,
+                                 SectionKind::getMetadata());
   DwarfPubNamesSection =
-    getMachOSection("__DWARF", "__debug_pubnames", MCSectionMachO::S_ATTR_DEBUG,
-                    SectionKind::getMetadata());
+    getContext().getMachOSection("__DWARF", "__debug_pubnames",
+                                 MCSectionMachO::S_ATTR_DEBUG,
+                                 SectionKind::getMetadata());
   DwarfPubTypesSection =
-    getMachOSection("__DWARF", "__debug_pubtypes", MCSectionMachO::S_ATTR_DEBUG,
-                    SectionKind::getMetadata());
+    getContext().getMachOSection("__DWARF", "__debug_pubtypes",
+                                 MCSectionMachO::S_ATTR_DEBUG,
+                                 SectionKind::getMetadata());
   DwarfStrSection =
-    getMachOSection("__DWARF", "__debug_str", MCSectionMachO::S_ATTR_DEBUG,
-                    SectionKind::getMetadata());
+    getContext().getMachOSection("__DWARF", "__debug_str",
+                                 MCSectionMachO::S_ATTR_DEBUG,
+                                 SectionKind::getMetadata());
   DwarfLocSection =
-    getMachOSection("__DWARF", "__debug_loc", MCSectionMachO::S_ATTR_DEBUG,
-                    SectionKind::getMetadata());
+    getContext().getMachOSection("__DWARF", "__debug_loc",
+                                 MCSectionMachO::S_ATTR_DEBUG,
+                                 SectionKind::getMetadata());
   DwarfARangesSection =
-    getMachOSection("__DWARF", "__debug_aranges", MCSectionMachO::S_ATTR_DEBUG,
-                    SectionKind::getMetadata());
+    getContext().getMachOSection("__DWARF", "__debug_aranges",
+                                 MCSectionMachO::S_ATTR_DEBUG,
+                                 SectionKind::getMetadata());
   DwarfRangesSection =
-    getMachOSection("__DWARF", "__debug_ranges", MCSectionMachO::S_ATTR_DEBUG,
-                    SectionKind::getMetadata());
+    getContext().getMachOSection("__DWARF", "__debug_ranges",
+                                 MCSectionMachO::S_ATTR_DEBUG,
+                                 SectionKind::getMetadata());
   DwarfMacroInfoSection =
-    getMachOSection("__DWARF", "__debug_macinfo", MCSectionMachO::S_ATTR_DEBUG,
-                    SectionKind::getMetadata());
+    getContext().getMachOSection("__DWARF", "__debug_macinfo",
+                                 MCSectionMachO::S_ATTR_DEBUG,
+                                 SectionKind::getMetadata());
   DwarfDebugInlineSection =
-    getMachOSection("__DWARF", "__debug_inlined", MCSectionMachO::S_ATTR_DEBUG,
-                    SectionKind::getMetadata());
+    getContext().getMachOSection("__DWARF", "__debug_inlined",
+                                 MCSectionMachO::S_ATTR_DEBUG,
+                                 SectionKind::getMetadata());
 }
 
 const MCSection *TargetLoweringObjectFileMachO::
@@ -619,8 +618,8 @@ getExplicitSectionGlobal(const GlobalValue *GV, SectionKind Kind,
     MCSectionMachO::ParseSectionSpecifier(GV->getSection(), Segment, Section,
                                           TAA, StubSize);
   if (!ErrorCode.empty()) {
-    // If invalid, report the error with llvm_report_error.
-    llvm_report_error("Global variable '" + GV->getNameStr() +
+    // If invalid, report the error with report_fatal_error.
+    report_fatal_error("Global variable '" + GV->getNameStr() +
                       "' has an invalid section specifier '" + GV->getSection()+
                       "': " + ErrorCode + ".");
     // Fall back to dropping it into the data section.
@@ -629,14 +628,14 @@ getExplicitSectionGlobal(const GlobalValue *GV, SectionKind Kind,
 
   // Get the section.
   const MCSectionMachO *S =
-    getMachOSection(Segment, Section, TAA, StubSize, Kind);
+    getContext().getMachOSection(Segment, Section, TAA, StubSize, Kind);
 
   // Okay, now that we got the section, verify that the TAA & StubSize agree.
   // If the user declared multiple globals with different section flags, we need
   // to reject it here.
   if (S->getTypeAndAttributes() != TAA || S->getStubSize() != StubSize) {
-    // If invalid, report the error with llvm_report_error.
-    llvm_report_error("Global variable '" + GV->getNameStr() +
+    // If invalid, report the error with report_fatal_error.
+    report_fatal_error("Global variable '" + GV->getNameStr() +
                       "' section type or attributes does not match previous"
                       " section specifier");
   }
@@ -806,7 +805,7 @@ const MCSection *TargetLoweringObjectFileCOFF::
 getCOFFSection(StringRef Name, bool isDirective, SectionKind Kind) const {
   // Create the map if it doesn't already exist.
   if (UniquingMap == 0)
-    UniquingMap = new MachOUniqueMapTy();
+    UniquingMap = new COFFUniqueMapTy();
   COFFUniqueMapTy &Map = *(COFFUniqueMapTy*)UniquingMap;
 
   // Do the lookup, if we have a hit, return it.
diff --git a/lib/CodeGen/VirtRegRewriter.cpp b/lib/CodeGen/VirtRegRewriter.cpp
index 0b7fde7ec8f6..7f0412cf2d6d 100644
--- a/lib/CodeGen/VirtRegRewriter.cpp
+++ b/lib/CodeGen/VirtRegRewriter.cpp
@@ -9,7 +9,9 @@
 
 #define DEBUG_TYPE "virtregrewriter"
 #include "VirtRegRewriter.h"
+#include "VirtRegMap.h"
 #include "llvm/Function.h"
+#include "llvm/CodeGen/LiveIntervalAnalysis.h"
 #include "llvm/CodeGen/MachineFrameInfo.h"
 #include "llvm/CodeGen/MachineInstrBuilder.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
@@ -893,7 +895,7 @@ unsigned ReuseInfo::GetRegForReload(const TargetRegisterClass *RC,
 
         bool DoReMat = NewOp.StackSlotOrReMat > VirtRegMap::MAX_STACK_SLOT;
         int SSorRMId = DoReMat
-          ? VRM.getReMatId(NewOp.VirtReg) : NewOp.StackSlotOrReMat;
+          ? VRM.getReMatId(NewOp.VirtReg) : (int) NewOp.StackSlotOrReMat;
 
         // Back-schedule reloads and remats.
         MachineBasicBlock::iterator InsertLoc =
@@ -1064,6 +1066,7 @@ class LocalRewriter : public VirtRegRewriter {
   VirtRegMap *VRM;
   BitVector AllocatableRegs;
   DenseMap<MachineInstr*, unsigned> DistanceMap;
+  DenseMap<int, SmallVector<MachineInstr*,4> > Slot2DbgValues;
 
   MachineBasicBlock *MBB;       // Basic block currently being processed.
 
@@ -1188,12 +1191,24 @@ bool LocalRewriter::runOnMachineFunction(MachineFunction &MF, VirtRegMap &vrm,
   // Mark unused spill slots.
   MachineFrameInfo *MFI = MF.getFrameInfo();
   int SS = VRM->getLowSpillSlot();
-  if (SS != VirtRegMap::NO_STACK_SLOT)
-    for (int e = VRM->getHighSpillSlot(); SS <= e; ++SS)
+  if (SS != VirtRegMap::NO_STACK_SLOT) {
+    for (int e = VRM->getHighSpillSlot(); SS <= e; ++SS) {
+      SmallVector<MachineInstr*, 4> &DbgValues = Slot2DbgValues[SS];
       if (!VRM->isSpillSlotUsed(SS)) {
         MFI->RemoveStackObject(SS);
+        for (unsigned j = 0, ee = DbgValues.size(); j != ee; ++j) {
+          MachineInstr *DVMI = DbgValues[j];
+          MachineBasicBlock *DVMBB = DVMI->getParent();
+          DEBUG(dbgs() << "Removing debug info referencing FI#" << SS << '\n');
+          VRM->RemoveMachineInstrFromMaps(DVMI);
+          DVMBB->erase(DVMI);
+        }
         ++NumDSS;
       }
+      DbgValues.clear();
+    }
+  }
+  Slot2DbgValues.clear();
 
   return true;
 }
@@ -1903,6 +1918,10 @@ LocalRewriter::RewriteMBB(LiveIntervals *LIs,
     bool BackTracked = false;
     MachineInstr &MI = *MII;
 
+    // Remember DbgValue's which reference stack slots.
+    if (MI.isDebugValue() && MI.getOperand(0).isFI())
+      Slot2DbgValues[MI.getOperand(0).getIndex()].push_back(&MI);
+
     /// ReusedOperands - Keep track of operand reuse in case we need to undo
     /// reuse.
     ReuseInfo ReusedOperands(MI, TRI);
diff --git a/lib/CodeGen/VirtRegRewriter.h b/lib/CodeGen/VirtRegRewriter.h
index 44f9df659c81..93474e0d7ff7 100644
--- a/lib/CodeGen/VirtRegRewriter.h
+++ b/lib/CodeGen/VirtRegRewriter.h
@@ -10,11 +10,10 @@
 #ifndef LLVM_CODEGEN_VIRTREGREWRITER_H
 #define LLVM_CODEGEN_VIRTREGREWRITER_H
 
-#include "llvm/CodeGen/LiveIntervalAnalysis.h"
-#include "llvm/CodeGen/MachineFunction.h"
-#include "VirtRegMap.h"
-
 namespace llvm {
+  class LiveIntervals;
+  class MachineFunction;
+  class VirtRegMap;
   
   /// VirtRegRewriter interface: Implementations of this interface assign
   /// spilled virtual registers to stack slots, rewriting the code.
diff --git a/lib/ExecutionEngine/ExecutionEngine.cpp b/lib/ExecutionEngine/ExecutionEngine.cpp
index da21c2de9aa4..b17827ef255f 100644
--- a/lib/ExecutionEngine/ExecutionEngine.cpp
+++ b/lib/ExecutionEngine/ExecutionEngine.cpp
@@ -379,27 +379,27 @@ int ExecutionEngine::runFunctionAsMain(Function *Fn,
   switch (NumArgs) {
   case 3:
    if (FTy->getParamType(2) != PPInt8Ty) {
-     llvm_report_error("Invalid type for third argument of main() supplied");
+     report_fatal_error("Invalid type for third argument of main() supplied");
    }
    // FALLS THROUGH
   case 2:
    if (FTy->getParamType(1) != PPInt8Ty) {
-     llvm_report_error("Invalid type for second argument of main() supplied");
+     report_fatal_error("Invalid type for second argument of main() supplied");
    }
    // FALLS THROUGH
   case 1:
    if (!FTy->getParamType(0)->isIntegerTy(32)) {
-     llvm_report_error("Invalid type for first argument of main() supplied");
+     report_fatal_error("Invalid type for first argument of main() supplied");
    }
    // FALLS THROUGH
   case 0:
    if (!FTy->getReturnType()->isIntegerTy() &&
        !FTy->getReturnType()->isVoidTy()) {
-     llvm_report_error("Invalid return type of main() supplied");
+     report_fatal_error("Invalid return type of main() supplied");
    }
    break;
   default:
-   llvm_report_error("Invalid number of arguments of main() supplied");
+   report_fatal_error("Invalid number of arguments of main() supplied");
   }
   
   ArgvArray CArgv;
@@ -771,7 +771,7 @@ GenericValue ExecutionEngine::getConstantValue(const Constant *C) {
     std::string msg;
     raw_string_ostream Msg(msg);
     Msg << "ConstantExpr not handled: " << *CE;
-    llvm_report_error(Msg.str());
+    report_fatal_error(Msg.str());
   }
 
   GenericValue Result;
@@ -807,7 +807,7 @@ GenericValue ExecutionEngine::getConstantValue(const Constant *C) {
     std::string msg;
     raw_string_ostream Msg(msg);
     Msg << "ERROR: Constant unimplemented for type: " << *C->getType();
-    llvm_report_error(Msg.str());
+    report_fatal_error(Msg.str());
   }
   return Result;
 }
@@ -935,7 +935,7 @@ void ExecutionEngine::LoadValueFromMemory(GenericValue &Result,
     std::string msg;
     raw_string_ostream Msg(msg);
     Msg << "Cannot load value of type " << *Ty << "!";
-    llvm_report_error(Msg.str());
+    report_fatal_error(Msg.str());
   }
 }
 
@@ -1051,7 +1051,7 @@ void ExecutionEngine::emitGlobals() {
             sys::DynamicLibrary::SearchForAddressOfSymbol(I->getName()))
           addGlobalMapping(I, SymAddr);
         else {
-          llvm_report_error("Could not resolve external global address: "
+          report_fatal_error("Could not resolve external global address: "
                             +I->getName());
         }
       }
diff --git a/lib/ExecutionEngine/Interpreter/Execution.cpp b/lib/ExecutionEngine/Interpreter/Execution.cpp
index a2aad5ac556a..0748b546081b 100644
--- a/lib/ExecutionEngine/Interpreter/Execution.cpp
+++ b/lib/ExecutionEngine/Interpreter/Execution.cpp
@@ -631,7 +631,7 @@ void Interpreter::visitUnwindInst(UnwindInst &I) {
   do {
     ECStack.pop_back();
     if (ECStack.empty())
-      llvm_report_error("Empty stack during unwind!");
+      report_fatal_error("Empty stack during unwind!");
     Inst = ECStack.back().Caller.getInstruction();
   } while (!(Inst && isa<InvokeInst>(Inst)));
 
@@ -644,7 +644,7 @@ void Interpreter::visitUnwindInst(UnwindInst &I) {
 }
 
 void Interpreter::visitUnreachableInst(UnreachableInst &I) {
-  llvm_report_error("Program executed an 'unreachable' instruction!");
+  report_fatal_error("Program executed an 'unreachable' instruction!");
 }
 
 void Interpreter::visitBranchInst(BranchInst &I) {
diff --git a/lib/ExecutionEngine/Interpreter/ExternalFunctions.cpp b/lib/ExecutionEngine/Interpreter/ExternalFunctions.cpp
index 3ba783bb27a7..26a53b58f491 100644
--- a/lib/ExecutionEngine/Interpreter/ExternalFunctions.cpp
+++ b/lib/ExecutionEngine/Interpreter/ExternalFunctions.cpp
@@ -126,7 +126,7 @@ static ffi_type *ffiTypeFor(const Type *Ty) {
     default: break;
   }
   // TODO: Support other types such as StructTyID, ArrayTyID, OpaqueTyID, etc.
-  llvm_report_error("Type could not be mapped for use with libffi.");
+  report_fatal_error("Type could not be mapped for use with libffi.");
   return NULL;
 }
 
@@ -174,7 +174,7 @@ static void *ffiValueFor(const Type *Ty, const GenericValue &AV,
     default: break;
   }
   // TODO: Support other types such as StructTyID, ArrayTyID, OpaqueTyID, etc.
-  llvm_report_error("Type value could not be mapped for use with libffi.");
+  report_fatal_error("Type value could not be mapped for use with libffi.");
   return NULL;
 }
 
@@ -188,7 +188,7 @@ static bool ffiInvoke(RawFunc Fn, Function *F,
   // TODO: We don't have type information about the remaining arguments, because
   // this information is never passed into ExecutionEngine::runFunction().
   if (ArgVals.size() > NumArgs && F->isVarArg()) {
-    llvm_report_error("Calling external var arg function '" + F->getName()
+    report_fatal_error("Calling external var arg function '" + F->getName()
                       + "' is not supported by the Interpreter.");
   }
 
@@ -284,7 +284,7 @@ GenericValue Interpreter::callExternalFunction(Function *F,
     errs() << "Tried to execute an unknown external function: "
       << F->getType()->getDescription() << " __main\n";
   else
-    llvm_report_error("Tried to execute an unknown external function: " +
+    report_fatal_error("Tried to execute an unknown external function: " +
                       F->getType()->getDescription() + " " +F->getName());
 #ifndef USE_LIBFFI
   errs() << "Recompiling LLVM with --enable-libffi might help.\n";
@@ -325,7 +325,7 @@ GenericValue lle_X_exit(const FunctionType *FT,
 GenericValue lle_X_abort(const FunctionType *FT,
                          const std::vector<GenericValue> &Args) {
   //FIXME: should we report or raise here?
-  //llvm_report_error("Interpreted program raised SIGABRT");
+  //report_fatal_error("Interpreted program raised SIGABRT");
   raise (SIGABRT);
   return GenericValue();
 }
diff --git a/lib/ExecutionEngine/Interpreter/Interpreter.h b/lib/ExecutionEngine/Interpreter/Interpreter.h
index bc4200b19151..564e9abad9e7 100644
--- a/lib/ExecutionEngine/Interpreter/Interpreter.h
+++ b/lib/ExecutionEngine/Interpreter/Interpreter.h
@@ -190,12 +190,10 @@ public:
     return &(ECStack.back ().VarArgs[0]);
   }
 
-  //FIXME: private:
-public:
+private:  // Helper functions
   GenericValue executeGEPOperation(Value *Ptr, gep_type_iterator I,
                                    gep_type_iterator E, ExecutionContext &SF);
 
-private:  // Helper functions
   // SwitchToNewBasicBlock - Start execution in a new basic block and run any
   // PHI nodes in the top of the block.  This is used for intraprocedural
   // control flow.
diff --git a/lib/ExecutionEngine/JIT/Intercept.cpp b/lib/ExecutionEngine/JIT/Intercept.cpp
index c00b60a276c2..b367033d32b5 100644
--- a/lib/ExecutionEngine/JIT/Intercept.cpp
+++ b/lib/ExecutionEngine/JIT/Intercept.cpp
@@ -142,7 +142,7 @@ void *JIT::getPointerToNamedFunction(const std::string &Name,
       return RP;
 
   if (AbortOnFailure) {
-    llvm_report_error("Program used external function '"+Name+
+    report_fatal_error("Program used external function '"+Name+
                       "' which could not be resolved!");
   }
   return 0;
diff --git a/lib/ExecutionEngine/JIT/JIT.cpp b/lib/ExecutionEngine/JIT/JIT.cpp
index dd74d73208c0..546d2b21be95 100644
--- a/lib/ExecutionEngine/JIT/JIT.cpp
+++ b/lib/ExecutionEngine/JIT/JIT.cpp
@@ -304,7 +304,7 @@ JIT::JIT(Module *M, TargetMachine &tm, TargetJITInfo &tji,
   // Turn the machine code intermediate representation into bytes in memory that
   // may be executed.
   if (TM.addPassesToEmitMachineCode(PM, *JCE, OptLevel)) {
-    llvm_report_error("Target does not support machine code emission!");
+    report_fatal_error("Target does not support machine code emission!");
   }
   
   // Register routine for informing unwinding runtime about new EH frames
@@ -352,7 +352,7 @@ void JIT::addModule(Module *M) {
     // Turn the machine code intermediate representation into bytes in memory
     // that may be executed.
     if (TM.addPassesToEmitMachineCode(PM, *JCE, CodeGenOpt::Default)) {
-      llvm_report_error("Target does not support machine code emission!");
+      report_fatal_error("Target does not support machine code emission!");
     }
     
     // Initialize passes.
@@ -383,7 +383,7 @@ bool JIT::removeModule(Module *M) {
     // Turn the machine code intermediate representation into bytes in memory
     // that may be executed.
     if (TM.addPassesToEmitMachineCode(PM, *JCE, CodeGenOpt::Default)) {
-      llvm_report_error("Target does not support machine code emission!");
+      report_fatal_error("Target does not support machine code emission!");
     }
     
     // Initialize passes.
@@ -665,7 +665,7 @@ void *JIT::getPointerToFunction(Function *F) {
   // exists in this Module.
   std::string ErrorMsg;
   if (F->Materialize(&ErrorMsg)) {
-    llvm_report_error("Error reading function '" + F->getName()+
+    report_fatal_error("Error reading function '" + F->getName()+
                       "' from bitcode file: " + ErrorMsg);
   }
 
@@ -704,7 +704,7 @@ void *JIT::getOrEmitGlobalVariable(const GlobalVariable *GV) {
 #endif
     Ptr = sys::DynamicLibrary::SearchForAddressOfSymbol(GV->getName());
     if (Ptr == 0) {
-      llvm_report_error("Could not resolve external global address: "
+      report_fatal_error("Could not resolve external global address: "
                         +GV->getName());
     }
     addGlobalMapping(GV, Ptr);
@@ -754,7 +754,7 @@ char* JIT::getMemoryForGV(const GlobalVariable* GV) {
   // situation. It's returned in the same block of memory as code which may
   // not be writable.
   if (isGVCompilationDisabled() && !GV->isConstant()) {
-    llvm_report_error("Compilation of non-internal GlobalValue is disabled!");
+    report_fatal_error("Compilation of non-internal GlobalValue is disabled!");
   }
 
   // Some applications require globals and code to live together, so they may
diff --git a/lib/ExecutionEngine/JIT/JITDebugRegisterer.cpp b/lib/ExecutionEngine/JIT/JITDebugRegisterer.cpp
index 68471bd4d5f4..749a57d92c94 100644
--- a/lib/ExecutionEngine/JIT/JITDebugRegisterer.cpp
+++ b/lib/ExecutionEngine/JIT/JITDebugRegisterer.cpp
@@ -80,7 +80,7 @@ std::string JITDebugRegisterer::MakeELF(const Function *F, DebugInfo &I) {
 
   // Copy the binary into the .text section.  This isn't necessary, but it's
   // useful to be able to disassemble the ELF by hand.
-  ELFSection &Text = EW.getTextSection((Function *)F);
+  ELFSection &Text = EW.getTextSection(const_cast<Function *>(F));
   Text.Addr = (uint64_t)I.FnStart;
   // TODO: We could eliminate this copy if we somehow used a pointer/size pair
   // instead of a vector.
diff --git a/lib/ExecutionEngine/JIT/JITDwarfEmitter.cpp b/lib/ExecutionEngine/JIT/JITDwarfEmitter.cpp
index 2f42e6bfe6d9..4b3ca8759b8a 100644
--- a/lib/ExecutionEngine/JIT/JITDwarfEmitter.cpp
+++ b/lib/ExecutionEngine/JIT/JITDwarfEmitter.cpp
@@ -52,7 +52,7 @@ unsigned char* JITDwarfEmitter::EmitDwarfTable(MachineFunction& F,
       
   unsigned char* Result = 0;
 
-  const std::vector<Function *> Personalities = MMI->getPersonalities();
+  const std::vector<const Function *> Personalities = MMI->getPersonalities();
   EHFramePtr = EmitCommonEHFrame(Personalities[MMI->getPersonalityIndex()]);
 
   Result = EmitEHFrame(Personalities[MMI->getPersonalityIndex()], EHFramePtr,
@@ -75,7 +75,7 @@ JITDwarfEmitter::EmitFrameMoves(intptr_t BaseLabelPtr,
     MCSymbol *Label = Move.getLabel();
     
     // Throw out move if the label is invalid.
-    if (Label && !Label->isDefined())
+    if (Label && (*JCE->getLabelLocations())[Label] == 0)
       continue;
     
     intptr_t LabelPtr = 0;
@@ -199,9 +199,9 @@ unsigned char* JITDwarfEmitter::EmitExceptionTable(MachineFunction* MF,
   assert(MMI && "MachineModuleInfo not registered!");
 
   // Map all labels and get rid of any dead landing pads.
-  MMI->TidyLandingPads();
+  MMI->TidyLandingPads(JCE->getLabelLocations());
 
-  const std::vector<GlobalVariable *> &TypeInfos = MMI->getTypeInfos();
+  const std::vector<const GlobalVariable *> &TypeInfos = MMI->getTypeInfos();
   const std::vector<unsigned> &FilterIds = MMI->getFilterIds();
   const std::vector<LandingPadInfo> &PadInfos = MMI->getLandingPads();
   if (PadInfos.empty()) return 0;
@@ -450,7 +450,7 @@ unsigned char* JITDwarfEmitter::EmitExceptionTable(MachineFunction* MF,
 
   // Emit the type ids.
   for (unsigned M = TypeInfos.size(); M; --M) {
-    GlobalVariable *GV = TypeInfos[M - 1];
+    const GlobalVariable *GV = TypeInfos[M - 1];
     
     if (GV) {
       if (TD->getPointerSize() == sizeof(int32_t))
@@ -609,7 +609,7 @@ unsigned JITDwarfEmitter::GetDwarfTableSizeInBytes(MachineFunction& F,
   
   FinalSize += GetExceptionTableSizeInBytes(&F);
       
-  const std::vector<Function *> Personalities = MMI->getPersonalities();
+  const std::vector<const Function *> Personalities = MMI->getPersonalities();
   FinalSize += 
     GetCommonEHFrameSizeInBytes(Personalities[MMI->getPersonalityIndex()]);
 
@@ -711,7 +711,7 @@ JITDwarfEmitter::GetFrameMovesSizeInBytes(intptr_t BaseLabelPtr,
     MCSymbol *Label = Move.getLabel();
     
     // Throw out move if the label is invalid.
-    if (Label && !Label->isDefined())
+    if (Label && (*JCE->getLabelLocations())[Label] == 0)
       continue;
     
     intptr_t LabelPtr = 0;
@@ -780,9 +780,9 @@ JITDwarfEmitter::GetExceptionTableSizeInBytes(MachineFunction* MF) const {
   unsigned FinalSize = 0;
 
   // Map all labels and get rid of any dead landing pads.
-  MMI->TidyLandingPads();
+  MMI->TidyLandingPads(JCE->getLabelLocations());
 
-  const std::vector<GlobalVariable *> &TypeInfos = MMI->getTypeInfos();
+  const std::vector<const GlobalVariable *> &TypeInfos = MMI->getTypeInfos();
   const std::vector<unsigned> &FilterIds = MMI->getFilterIds();
   const std::vector<LandingPadInfo> &PadInfos = MMI->getLandingPads();
   if (PadInfos.empty()) return 0;
diff --git a/lib/ExecutionEngine/JIT/JITEmitter.cpp b/lib/ExecutionEngine/JIT/JITEmitter.cpp
index 7b8ab9ed3f58..e3855b277928 100644
--- a/lib/ExecutionEngine/JIT/JITEmitter.cpp
+++ b/lib/ExecutionEngine/JIT/JITEmitter.cpp
@@ -23,6 +23,7 @@
 #include "llvm/Analysis/DebugInfo.h"
 #include "llvm/CodeGen/JITCodeEmitter.h"
 #include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineCodeInfo.h"
 #include "llvm/CodeGen/MachineConstantPool.h"
 #include "llvm/CodeGen/MachineJumpTableInfo.h"
 #include "llvm/CodeGen/MachineModuleInfo.h"
@@ -30,8 +31,8 @@
 #include "llvm/ExecutionEngine/GenericValue.h"
 #include "llvm/ExecutionEngine/JITEventListener.h"
 #include "llvm/ExecutionEngine/JITMemoryManager.h"
-#include "llvm/CodeGen/MachineCodeInfo.h"
 #include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetInstrInfo.h"
 #include "llvm/Target/TargetJITInfo.h"
 #include "llvm/Target/TargetMachine.h"
 #include "llvm/Target/TargetOptions.h"
@@ -43,7 +44,6 @@
 #include "llvm/Support/raw_ostream.h"
 #include "llvm/System/Disassembler.h"
 #include "llvm/System/Memory.h"
-#include "llvm/Target/TargetInstrInfo.h"
 #include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/SmallPtrSet.h"
 #include "llvm/ADT/SmallVector.h"
@@ -369,7 +369,7 @@ namespace {
     ValueMap<const Function *, EmittedCode,
              EmittedFunctionConfig> EmittedFunctions;
 
-    DILocation PrevDLT;
+    DebugLoc PrevDL;
 
     /// Instance of the JIT
     JIT *TheJIT;
@@ -377,14 +377,14 @@ namespace {
   public:
     JITEmitter(JIT &jit, JITMemoryManager *JMM, TargetMachine &TM)
       : SizeEstimate(0), Resolver(jit, *this), MMI(0), CurFn(0),
-        EmittedFunctions(this), PrevDLT(NULL), TheJIT(&jit) {
+        EmittedFunctions(this), TheJIT(&jit) {
       MemMgr = JMM ? JMM : JITMemoryManager::CreateDefaultMemManager();
       if (jit.getJITInfo().needsGOT()) {
         MemMgr->AllocateGOT();
         DEBUG(dbgs() << "JIT is managing a GOT\n");
       }
 
-      if (DwarfExceptionHandling || JITEmitDebugInfo) {
+      if (JITExceptionHandling || JITEmitDebugInfo) {
         DE.reset(new JITDwarfEmitter(jit));
       }
       if (JITEmitDebugInfo) {
@@ -463,6 +463,10 @@ namespace {
       LabelLocations[Label] = getCurrentPCValue();
     }
 
+    virtual DenseMap<MCSymbol*, uintptr_t> *getLabelLocations() {
+      return &LabelLocations;
+    }
+
     virtual uintptr_t getLabelAddress(MCSymbol *Label) const {
       assert(LabelLocations.count(Label) && "Label not emitted!");
       return LabelLocations.find(Label)->second;
@@ -737,7 +741,7 @@ void *JITResolver::JITCompilerFn(void *Stub) {
 
     // If lazy compilation is disabled, emit a useful error message and abort.
     if (!JR->TheJIT->isCompilingLazily()) {
-      llvm_report_error("LLVM JIT requested to do lazy compilation of function '"
+      report_fatal_error("LLVM JIT requested to do lazy compilation of function '"
                         + F->getName() + "' when lazy compiles are disabled!");
     }
 
@@ -823,19 +827,17 @@ void *JITEmitter::getPointerToGVIndirectSym(GlobalValue *V, void *Reference) {
 void JITEmitter::processDebugLoc(DebugLoc DL, bool BeforePrintingInsn) {
   if (DL.isUnknown()) return;
   if (!BeforePrintingInsn) return;
-
-  // FIXME: This is horribly inefficient.
-  DILocation CurDLT(DL.getAsMDNode(
-    EmissionDetails.MF->getFunction()->getContext()));
   
-  if (CurDLT.getScope().getNode() != 0 && PrevDLT.getNode() !=CurDLT.getNode()){
+  const LLVMContext& Context = EmissionDetails.MF->getFunction()->getContext();
+
+  if (DL.getScope(Context) != 0 && PrevDL != DL) {
     JITEvent_EmittedFunctionDetails::LineStart NextLine;
     NextLine.Address = getCurrentPCValue();
     NextLine.Loc = DL;
     EmissionDetails.LineStarts.push_back(NextLine);
   }
 
-  PrevDLT = CurDLT;
+  PrevDL = DL;
 }
 
 static unsigned GetConstantPoolSizeInBytes(MachineConstantPool *MCP,
@@ -945,7 +947,7 @@ unsigned JITEmitter::addSizeOfGlobalsInConstantVal(
        std::string msg;
        raw_string_ostream Msg(msg);
        Msg << "ConstantExpr not handled: " << *CE;
-       llvm_report_error(Msg.str());
+       report_fatal_error(Msg.str());
     }
     }
   }
@@ -997,12 +999,13 @@ unsigned JITEmitter::GetSizeOfGlobalsInBytes(MachineFunction &MF) {
       for (unsigned CurOp = 0; CurOp < NumOps; CurOp++) {
         const MachineOperand &MO = MI.getOperand(CurOp);
         if (MO.isGlobal()) {
-          GlobalValue* V = MO.getGlobal();
+          const GlobalValue* V = MO.getGlobal();
           const GlobalVariable *GV = dyn_cast<const GlobalVariable>(V);
           if (!GV)
             continue;
           // If seen in previous function, it will have an entry here.
-          if (TheJIT->getPointerToGlobalIfAvailable(GV))
+          if (TheJIT->getPointerToGlobalIfAvailable(
+                const_cast<GlobalVariable *>(GV)))
             continue;
           // If seen earlier in this function, it will have an entry here.
           // FIXME: it should be possible to combine these tables, by
@@ -1212,6 +1215,9 @@ bool JITEmitter::finishFunction(MachineFunction &F) {
   TheJIT->NotifyFunctionEmitted(*F.getFunction(), FnStart, FnEnd-FnStart,
                                 EmissionDetails);
 
+  // Reset the previous debug location.
+  PrevDL = DebugLoc();
+
   DEBUG(dbgs() << "JIT: Finished CodeGen of [" << (void*)FnStart
         << "] Function: " << F.getFunction()->getName()
         << ": " << (FnEnd-FnStart) << " bytes of text, "
@@ -1223,45 +1229,44 @@ bool JITEmitter::finishFunction(MachineFunction &F) {
   // Mark code region readable and executable if it's not so already.
   MemMgr->setMemoryExecutable();
 
-  DEBUG(
-    if (sys::hasDisassembler()) {
-      dbgs() << "JIT: Disassembled code:\n";
-      dbgs() << sys::disassembleBuffer(FnStart, FnEnd-FnStart,
-                                       (uintptr_t)FnStart);
-    } else {
-      dbgs() << "JIT: Binary code:\n";
-      uint8_t* q = FnStart;
-      for (int i = 0; q < FnEnd; q += 4, ++i) {
-        if (i == 4)
-          i = 0;
-        if (i == 0)
-          dbgs() << "JIT: " << (long)(q - FnStart) << ": ";
-        bool Done = false;
-        for (int j = 3; j >= 0; --j) {
-          if (q + j >= FnEnd)
-            Done = true;
-          else
-            dbgs() << (unsigned short)q[j];
+  DEBUG({
+      if (sys::hasDisassembler()) {
+        dbgs() << "JIT: Disassembled code:\n";
+        dbgs() << sys::disassembleBuffer(FnStart, FnEnd-FnStart,
+                                         (uintptr_t)FnStart);
+      } else {
+        dbgs() << "JIT: Binary code:\n";
+        uint8_t* q = FnStart;
+        for (int i = 0; q < FnEnd; q += 4, ++i) {
+          if (i == 4)
+            i = 0;
+          if (i == 0)
+            dbgs() << "JIT: " << (long)(q - FnStart) << ": ";
+          bool Done = false;
+          for (int j = 3; j >= 0; --j) {
+            if (q + j >= FnEnd)
+              Done = true;
+            else
+              dbgs() << (unsigned short)q[j];
+          }
+          if (Done)
+            break;
+          dbgs() << ' ';
+          if (i == 3)
+            dbgs() << '\n';
         }
-        if (Done)
-          break;
-        dbgs() << ' ';
-        if (i == 3)
-          dbgs() << '\n';
+        dbgs()<< '\n';
       }
-      dbgs()<< '\n';
-    }
-        );
+    });
 
-  if (DwarfExceptionHandling || JITEmitDebugInfo) {
+  if (JITExceptionHandling || JITEmitDebugInfo) {
     uintptr_t ActualSize = 0;
     SavedBufferBegin = BufferBegin;
     SavedBufferEnd = BufferEnd;
     SavedCurBufferPtr = CurBufferPtr;
 
-    if (MemMgr->NeedsExactSize()) {
+    if (MemMgr->NeedsExactSize())
       ActualSize = DE->GetDwarfTableSizeInBytes(F, *this, FnStart, FnEnd);
-    }
 
     BufferBegin = CurBufferPtr = MemMgr->startExceptionTable(F.getFunction(),
                                                              ActualSize);
@@ -1277,7 +1282,7 @@ bool JITEmitter::finishFunction(MachineFunction &F) {
     BufferEnd = SavedBufferEnd;
     CurBufferPtr = SavedCurBufferPtr;
 
-    if (DwarfExceptionHandling) {
+    if (JITExceptionHandling) {
       TheJIT->RegisterTable(FrameRegister);
     }
 
@@ -1375,7 +1380,7 @@ void JITEmitter::emitConstantPool(MachineConstantPool *MCP) {
     ConstPoolAddresses.push_back(CAddr);
     if (CPE.isMachineConstantPoolEntry()) {
       // FIXME: add support to lower machine constant pool values into bytes!
-      llvm_report_error("Initialize memory with machine specific constant pool"
+      report_fatal_error("Initialize memory with machine specific constant pool"
                         "entry has not been implemented!");
     }
     TheJIT->InitializeMemory(CPE.Val.ConstVal, (void*)CAddr);
diff --git a/lib/ExecutionEngine/JIT/JITMemoryManager.cpp b/lib/ExecutionEngine/JIT/JITMemoryManager.cpp
index a17caa17f4dd..653e6f1fc07c 100644
--- a/lib/ExecutionEngine/JIT/JITMemoryManager.cpp
+++ b/lib/ExecutionEngine/JIT/JITMemoryManager.cpp
@@ -15,6 +15,7 @@
 #include "llvm/ExecutionEngine/JITMemoryManager.h"
 #include "llvm/ADT/SmallPtrSet.h"
 #include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/Twine.h"
 #include "llvm/GlobalValue.h"
 #include "llvm/Support/Allocator.h"
 #include "llvm/Support/Compiler.h"
@@ -22,12 +23,9 @@
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/raw_ostream.h"
 #include "llvm/System/Memory.h"
-#include <map>
 #include <vector>
 #include <cassert>
 #include <climits>
-#include <cstdio>
-#include <cstdlib>
 #include <cstring>
 using namespace llvm;
 
@@ -614,8 +612,8 @@ sys::MemoryBlock DefaultJITMemoryManager::allocateNewSlab(size_t size) {
   sys::MemoryBlock *LastSlabPtr = LastSlab.base() ? &LastSlab : 0;
   sys::MemoryBlock B = sys::Memory::AllocateRWX(size, LastSlabPtr, &ErrMsg);
   if (B.base() == 0) {
-    llvm_report_error("Allocation failed when allocating new memory in the"
-                      " JIT\n" + ErrMsg);
+    report_fatal_error("Allocation failed when allocating new memory in the"
+                       " JIT\n" + Twine(ErrMsg));
   }
   LastSlab = B;
   ++NumSlabs;
diff --git a/lib/ExecutionEngine/JIT/OProfileJITEventListener.cpp b/lib/ExecutionEngine/JIT/OProfileJITEventListener.cpp
index 9c01b7329a5c..1ca084b5808b 100644
--- a/lib/ExecutionEngine/JIT/OProfileJITEventListener.cpp
+++ b/lib/ExecutionEngine/JIT/OProfileJITEventListener.cpp
@@ -114,26 +114,43 @@ void OProfileJITEventListener::NotifyFunctionEmitted(
     return;
   }
 
-  // Now we convert the line number information from the address/DebugLoc format
-  // in Details to the address/filename/lineno format that OProfile expects.
-  // OProfile 0.9.4 (and maybe later versions) has a bug that causes it to
-  // ignore line numbers for addresses above 4G.
-  FilenameCache Filenames;
-  std::vector<debug_line_info> LineInfo;
-  LineInfo.reserve(1 + Details.LineStarts.size());
-  if (!Details.MF->getDefaultDebugLoc().isUnknown()) {
-    LineInfo.push_back(LineStartToOProfileFormat(
-        *Details.MF, Filenames,
-        reinterpret_cast<uintptr_t>(FnStart),
-        Details.MF->getDefaultDebugLoc()));
-  }
-  for (std::vector<EmittedFunctionDetails::LineStart>::const_iterator
+  if (!Details.LineStarts.empty()) {
+    // Now we convert the line number information from the address/DebugLoc
+    // format in Details to the address/filename/lineno format that OProfile
+    // expects.  Note that OProfile 0.9.4 has a bug that causes it to ignore
+    // line numbers for addresses above 4G.
+    FilenameCache Filenames;
+    std::vector<debug_line_info> LineInfo;
+    LineInfo.reserve(1 + Details.LineStarts.size());
+
+    DebugLoc FirstLoc = Details.LineStarts[0].Loc;
+    assert(!FirstLoc.isUnknown()
+           && "LineStarts should not contain unknown DebugLocs");
+    MDNode *FirstLocScope = FirstLoc.getScope(F.getContext());
+    DISubprogram FunctionDI = getDISubprogram(FirstLocScope);
+    if (FunctionDI.Verify()) {
+      // If we have debug info for the function itself, use that as the line
+      // number of the first several instructions.  Otherwise, after filling
+      // LineInfo, we'll adjust the address of the first line number to point at
+      // the start of the function.
+      debug_line_info line_info;
+      line_info.vma = reinterpret_cast<uintptr_t>(FnStart);
+      line_info.lineno = FunctionDI.getLineNumber();
+      line_info.filename = Filenames.getFilename(FirstLocScope);
+      LineInfo.push_back(line_info);
+    }
+
+    for (std::vector<EmittedFunctionDetails::LineStart>::const_iterator
            I = Details.LineStarts.begin(), E = Details.LineStarts.end();
-       I != E; ++I) {
-    LineInfo.push_back(LineStartToOProfileFormat(
-        *Details.MF, Filenames, I->Address, I->Loc));
-  }
-  if (!LineInfo.empty()) {
+         I != E; ++I) {
+      LineInfo.push_back(LineStartToOProfileFormat(
+                           *Details.MF, Filenames, I->Address, I->Loc));
+    }
+
+    // In case the function didn't have line info of its own, adjust the first
+    // line info's address to include the start of the function.
+    LineInfo[0].vma = reinterpret_cast<uintptr_t>(FnStart);
+
     if (op_write_debug_line_info(Agent, FnStart,
                                  LineInfo.size(), &*LineInfo.begin()) == -1) {
       DEBUG(dbgs() 
diff --git a/lib/MC/MCAsmInfo.cpp b/lib/MC/MCAsmInfo.cpp
index f0da694a19e5..2b2399463703 100644
--- a/lib/MC/MCAsmInfo.cpp
+++ b/lib/MC/MCAsmInfo.cpp
@@ -35,6 +35,7 @@ MCAsmInfo::MCAsmInfo() {
   AssemblerDialect = 0;
   AllowQuotesInName = false;
   AllowNameToStartWithDigit = false;
+  AllowPeriodsInName = true;
   ZeroDirective = "\t.zero\t";
   AsciiDirective = "\t.ascii\t";
   AscizDirective = "\t.asciz\t";
diff --git a/lib/MC/MCAssembler.cpp b/lib/MC/MCAssembler.cpp
index 03b8bd340b4e..69afcc8fb7e2 100644
--- a/lib/MC/MCAssembler.cpp
+++ b/lib/MC/MCAssembler.cpp
@@ -212,7 +212,7 @@ static bool isScatteredFixupFullyResolvedSimple(const MCAssembler &Asm,
   // resolved. This also works in conjunction with absolutized .set, which
   // requires the compiler to use .set to absolutize the differences between
   // symbols which the compiler knows to be assembly time constants, so we don't
-  // need to worry about consider symbol differences fully resolved.
+  // need to worry about considering symbol differences fully resolved.
 
   // Non-relative fixups are only resolved if constant.
   if (!BaseSection)
@@ -342,7 +342,7 @@ bool MCAssembler::EvaluateFixup(const MCAsmLayout &Layout,
   ++stats::EvaluateFixup;
 
   if (!Fixup.Value->EvaluateAsRelocatable(Target, &Layout))
-    llvm_report_error("expected relocatable expression");
+    report_fatal_error("expected relocatable expression");
 
   // FIXME: How do non-scattered symbols work in ELF? I presume the linker
   // doesn't support small relocations, but then under what criteria does the
@@ -466,12 +466,12 @@ uint64_t MCAssembler::LayoutSection(MCSectionData &SD,
 
       int64_t TargetLocation;
       if (!OF.getOffset().EvaluateAsAbsolute(TargetLocation, &Layout))
-        llvm_report_error("expected assembly-time absolute expression");
+        report_fatal_error("expected assembly-time absolute expression");
 
       // FIXME: We need a way to communicate this error.
       int64_t Offset = TargetLocation - FragmentOffset;
       if (Offset < 0)
-        llvm_report_error("invalid .org offset '" + Twine(TargetLocation) +
+        report_fatal_error("invalid .org offset '" + Twine(TargetLocation) +
                           "' (at offset '" + Twine(FragmentOffset) + "'");
 
       EffectiveSize = Offset;
@@ -526,7 +526,7 @@ static void WriteFragmentData(const MCAssembler &Asm, const MCAsmLayout &Layout,
     // multiple .align directives to enforce the semantics it wants), but is
     // severe enough that we want to report it. How to handle this?
     if (Count * AF.getValueSize() != FragmentSize)
-      llvm_report_error("undefined .align directive, value size '" +
+      report_fatal_error("undefined .align directive, value size '" +
                         Twine(AF.getValueSize()) +
                         "' is not a divisor of padding size '" +
                         Twine(FragmentSize) + "'");
@@ -537,7 +537,7 @@ static void WriteFragmentData(const MCAssembler &Asm, const MCAsmLayout &Layout,
     // If we are aligning with nops, ask that target to emit the right data.
     if (AF.getEmitNops()) {
       if (!Asm.getBackend().WriteNopData(Count, OW))
-        llvm_report_error("unable to write nop sequence of " +
+        report_fatal_error("unable to write nop sequence of " +
                           Twine(Count) + " bytes");
       break;
     }
@@ -662,7 +662,7 @@ void MCAssembler::Finish() {
   uint64_t StartOffset = OS.tell();
   llvm::OwningPtr<MCObjectWriter> Writer(getBackend().createObjectWriter(OS));
   if (!Writer)
-    llvm_report_error("unable to create object writer!");
+    report_fatal_error("unable to create object writer!");
 
   // Allow the object writer a chance to perform post-layout binding (for
   // example, to set the index fields in the symbol data).
@@ -715,6 +715,8 @@ bool MCAssembler::FixupNeedsRelaxation(const MCAsmFixup &Fixup,
     return true;
 
   // Otherwise, relax if the value is too big for a (signed) i8.
+  //
+  // FIXME: This is target dependent!
   return int64_t(Value) != int64_t(int8_t(Value));
 }
 
diff --git a/lib/MC/MCContext.cpp b/lib/MC/MCContext.cpp
index e02cbc74622d..dc757bb0bffa 100644
--- a/lib/MC/MCContext.cpp
+++ b/lib/MC/MCContext.cpp
@@ -9,20 +9,35 @@
 
 #include "llvm/MC/MCContext.h"
 #include "llvm/MC/MCAsmInfo.h"
-#include "llvm/MC/MCSection.h"
+#include "llvm/MC/MCSectionMachO.h"
+#include "llvm/MC/MCSectionELF.h"
 #include "llvm/MC/MCSymbol.h"
 #include "llvm/ADT/SmallString.h"
 #include "llvm/ADT/Twine.h"
 using namespace llvm;
 
+typedef StringMap<const MCSectionMachO*> MachOUniqueMapTy;
+typedef StringMap<const MCSectionELF*> ELFUniqueMapTy;
+
+
 MCContext::MCContext(const MCAsmInfo &mai) : MAI(mai), NextUniqueID(0) {
+  MachOUniquingMap = 0;
+  ELFUniquingMap = 0;
 }
 
 MCContext::~MCContext() {
-  // NOTE: The sections are all allocated out of a bump pointer allocator,
+  // NOTE: The symbols are all allocated out of a bump pointer allocator,
   // we don't need to free them here.
+  
+  // If we have the MachO uniquing map, free it.
+  delete (MachOUniqueMapTy*)MachOUniquingMap;
+  delete (ELFUniqueMapTy*)ELFUniquingMap;
 }
 
+//===----------------------------------------------------------------------===//
+// Symbol Manipulation
+//===----------------------------------------------------------------------===//
+
 MCSymbol *MCContext::GetOrCreateSymbol(StringRef Name) {
   assert(!Name.empty() && "Normal symbols cannot be unnamed!");
   
@@ -55,3 +70,56 @@ MCSymbol *MCContext::CreateTempSymbol() {
 MCSymbol *MCContext::LookupSymbol(StringRef Name) const {
   return Symbols.lookup(Name);
 }
+
+//===----------------------------------------------------------------------===//
+// Section Management
+//===----------------------------------------------------------------------===//
+
+const MCSectionMachO *MCContext::
+getMachOSection(StringRef Segment, StringRef Section,
+                unsigned TypeAndAttributes,
+                unsigned Reserved2, SectionKind Kind) {
+  
+  // We unique sections by their segment/section pair.  The returned section
+  // may not have the same flags as the requested section, if so this should be
+  // diagnosed by the client as an error.
+  
+  // Create the map if it doesn't already exist.
+  if (MachOUniquingMap == 0)
+    MachOUniquingMap = new MachOUniqueMapTy();
+  MachOUniqueMapTy &Map = *(MachOUniqueMapTy*)MachOUniquingMap;
+  
+  // Form the name to look up.
+  SmallString<64> Name;
+  Name += Segment;
+  Name.push_back(',');
+  Name += Section;
+  
+  // Do the lookup, if we have a hit, return it.
+  const MCSectionMachO *&Entry = Map[Name.str()];
+  if (Entry) return Entry;
+  
+  // Otherwise, return a new section.
+  return Entry = new (*this) MCSectionMachO(Segment, Section, TypeAndAttributes,
+                                            Reserved2, Kind);
+}
+
+
+const MCSection *MCContext::
+getELFSection(StringRef Section, unsigned Type, unsigned Flags,
+              SectionKind Kind, bool IsExplicit) {
+  if (ELFUniquingMap == 0)
+    ELFUniquingMap = new ELFUniqueMapTy();
+  ELFUniqueMapTy &Map = *(ELFUniqueMapTy*)ELFUniquingMap;
+  
+  // Do the lookup, if we have a hit, return it.
+  StringMapEntry<const MCSectionELF*> &Entry = Map.GetOrCreateValue(Section);
+  if (Entry.getValue()) return Entry.getValue();
+  
+  MCSectionELF *Result = new (*this) MCSectionELF(Entry.getKey(), Type, Flags,
+                                                  Kind, IsExplicit);
+  Entry.setValue(Result);
+  return Result;
+}
+
+
diff --git a/lib/MC/MCParser/AsmLexer.cpp b/lib/MC/MCParser/AsmLexer.cpp
index 22c8d762df3a..118331278484 100644
--- a/lib/MC/MCParser/AsmLexer.cpp
+++ b/lib/MC/MCParser/AsmLexer.cpp
@@ -74,6 +74,11 @@ AsmToken AsmLexer::LexIdentifier() {
   while (isalnum(*CurPtr) || *CurPtr == '_' || *CurPtr == '$' ||
          *CurPtr == '.' || *CurPtr == '@')
     ++CurPtr;
+  
+  // Handle . as a special case.
+  if (CurPtr == TokStart+1 && TokStart[0] == '.')
+    return AsmToken(AsmToken::Dot, StringRef(TokStart, 1));
+  
   return AsmToken(AsmToken::Identifier, StringRef(TokStart, CurPtr - TokStart));
 }
 
diff --git a/lib/MC/MCParser/AsmParser.cpp b/lib/MC/MCParser/AsmParser.cpp
index 4e62689c51ee..a63d2e43df10 100644
--- a/lib/MC/MCParser/AsmParser.cpp
+++ b/lib/MC/MCParser/AsmParser.cpp
@@ -30,16 +30,10 @@ using namespace llvm;
 
 enum { DEFAULT_ADDRSPACE = 0 };
 
-// Mach-O section uniquing.
-//
-// FIXME: Figure out where this should live, it should be shared by
-// TargetLoweringObjectFile.
-typedef StringMap<const MCSectionMachO*> MachOUniqueMapTy;
-
 AsmParser::AsmParser(SourceMgr &_SM, MCContext &_Ctx, MCStreamer &_Out,
                      const MCAsmInfo &_MAI) 
   : Lexer(_MAI), Ctx(_Ctx), Out(_Out), SrcMgr(_SM), TargetParser(0),
-    CurBuffer(0), SectionUniquingMap(0) {
+    CurBuffer(0) {
   Lexer.setBuffer(SrcMgr.getMemoryBuffer(CurBuffer));
   
   // Debugging directives.
@@ -51,39 +45,6 @@ AsmParser::AsmParser(SourceMgr &_SM, MCContext &_Ctx, MCStreamer &_Out,
 
 
 AsmParser::~AsmParser() {
-  // If we have the MachO uniquing map, free it.
-  delete (MachOUniqueMapTy*)SectionUniquingMap;
-}
-
-const MCSection *AsmParser::getMachOSection(const StringRef &Segment,
-                                            const StringRef &Section,
-                                            unsigned TypeAndAttributes,
-                                            unsigned Reserved2,
-                                            SectionKind Kind) const {
-  // We unique sections by their segment/section pair.  The returned section
-  // may not have the same flags as the requested section, if so this should be
-  // diagnosed by the client as an error.
-  
-  // Create the map if it doesn't already exist.
-  if (SectionUniquingMap == 0)
-    SectionUniquingMap = new MachOUniqueMapTy();
-  MachOUniqueMapTy &Map = *(MachOUniqueMapTy*)SectionUniquingMap;
-  
-  // Form the name to look up.
-  SmallString<64> Name;
-  Name += Segment;
-  Name.push_back(',');
-  Name += Section;
-
-  // Do the lookup, if we have a hit, return it.
-  const MCSectionMachO *&Entry = Map[Name.str()];
-
-  // FIXME: This should validate the type and attributes.
-  if (Entry) return Entry;
-
-  // Otherwise, return a new section.
-  return Entry = MCSectionMachO::Create(Segment, Section, TypeAndAttributes,
-                                        Reserved2, Kind, Ctx);
 }
 
 void AsmParser::Warning(SMLoc L, const Twine &Msg) {
@@ -143,7 +104,7 @@ bool AsmParser::Run(bool NoInitialTextSection, bool NoFinalize) {
   //
   // FIXME: Target hook & command line option for initial section.
   if (!NoInitialTextSection)
-    Out.SwitchSection(getMachOSection("__TEXT", "__text",
+    Out.SwitchSection(Ctx.getMachOSection("__TEXT", "__text",
                                       MCSectionMachO::S_ATTR_PURE_INSTRUCTIONS,
                                       0, SectionKind::getText()));
 
@@ -156,29 +117,6 @@ bool AsmParser::Run(bool NoInitialTextSection, bool NoFinalize) {
 
   // While we have input, parse each statement.
   while (Lexer.isNot(AsmToken::Eof)) {
-    // Handle conditional assembly here before calling ParseStatement()
-    if (Lexer.getKind() == AsmToken::Identifier) {
-      // If we have an identifier, handle it as the key symbol.
-      AsmToken ID = getTok();
-      SMLoc IDLoc = ID.getLoc();
-      StringRef IDVal = ID.getString();
-
-      if (IDVal == ".if" ||
-          IDVal == ".elseif" ||
-          IDVal == ".else" ||
-          IDVal == ".endif") {
-        if (!ParseConditionalAssemblyDirectives(IDVal, IDLoc))
-          continue;
-	HadError = true;
-	EatToEndOfStatement();
-	continue;
-      }
-    }
-    if (TheCondState.Ignore) {
-      EatToEndOfStatement();
-      continue;
-    }
-
     if (!ParseStatement()) continue;
   
     // We had an error, remember it and recover by skipping to the next line.
@@ -198,21 +136,6 @@ bool AsmParser::Run(bool NoInitialTextSection, bool NoFinalize) {
   return HadError;
 }
 
-/// ParseConditionalAssemblyDirectives - parse the conditional assembly
-/// directives
-bool AsmParser::ParseConditionalAssemblyDirectives(StringRef Directive,
-                                                   SMLoc DirectiveLoc) {
-  if (Directive == ".if")
-    return ParseDirectiveIf(DirectiveLoc);
-  if (Directive == ".elseif")
-    return ParseDirectiveElseIf(DirectiveLoc);
-  if (Directive == ".else")
-    return ParseDirectiveElse(DirectiveLoc);
-  if (Directive == ".endif")
-    return ParseDirectiveEndIf(DirectiveLoc);
-  return true;
-}
-
 /// EatToEndOfStatement - Throw away the rest of the line for testing purposes.
 void AsmParser::EatToEndOfStatement() {
   while (Lexer.isNot(AsmToken::EndOfStatement) &&
@@ -248,6 +171,7 @@ MCSymbol *AsmParser::CreateSymbol(StringRef Name) {
 ///  primaryexpr ::= (parenexpr
 ///  primaryexpr ::= symbol
 ///  primaryexpr ::= number
+///  primaryexpr ::= '.'
 ///  primaryexpr ::= ~,+,- primaryexpr
 bool AsmParser::ParsePrimaryExpr(const MCExpr *&Res, SMLoc &EndLoc) {
   switch (Lexer.getKind()) {
@@ -292,6 +216,17 @@ bool AsmParser::ParsePrimaryExpr(const MCExpr *&Res, SMLoc &EndLoc) {
     EndLoc = Lexer.getLoc();
     Lex(); // Eat token.
     return false;
+  case AsmToken::Dot: {
+    // This is a '.' reference, which references the current PC.  Emit a
+    // temporary label to the streamer and refer to it.
+    MCSymbol *Sym = Ctx.CreateTempSymbol();
+    Out.EmitLabel(Sym);
+    Res = MCSymbolRefExpr::Create(Sym, MCSymbolRefExpr::VK_None, getContext());
+    EndLoc = Lexer.getLoc();
+    Lex(); // Eat identifier.
+    return false;
+  }
+      
   case AsmToken::LParen:
     Lex(); // Eat the '('.
     return ParseParenExpr(Res, EndLoc);
@@ -484,9 +419,30 @@ bool AsmParser::ParseStatement() {
   AsmToken ID = getTok();
   SMLoc IDLoc = ID.getLoc();
   StringRef IDVal;
-  if (ParseIdentifier(IDVal))
-    return TokError("unexpected token at start of statement");
+  if (ParseIdentifier(IDVal)) {
+    if (!TheCondState.Ignore)
+      return TokError("unexpected token at start of statement");
+    IDVal = "";
+  }
 
+  // Handle conditional assembly here before checking for skipping.  We
+  // have to do this so that .endif isn't skipped in a ".if 0" block for
+  // example.
+  if (IDVal == ".if")
+    return ParseDirectiveIf(IDLoc);
+  if (IDVal == ".elseif")
+    return ParseDirectiveElseIf(IDLoc);
+  if (IDVal == ".else")
+    return ParseDirectiveElse(IDLoc);
+  if (IDVal == ".endif")
+    return ParseDirectiveEndIf(IDLoc);
+    
+  // If we are in a ".if 0" block, ignore this statement.
+  if (TheCondState.Ignore) {
+    EatToEndOfStatement();
+    return false;
+  }
+  
   // FIXME: Recurse on local labels?
 
   // See what kind of statement we have.
@@ -773,39 +729,38 @@ bool AsmParser::ParseStatement() {
     return false;
   }
 
-  
   SmallVector<MCParsedAsmOperand*, 8> ParsedOperands;
-  if (getTargetParser().ParseInstruction(IDVal, IDLoc, ParsedOperands))
-    // FIXME: Leaking ParsedOperands on failure.
-    return true;
-  
-  if (Lexer.isNot(AsmToken::EndOfStatement))
-    // FIXME: Leaking ParsedOperands on failure.
-    return TokError("unexpected token in argument list");
-
-  // Eat the end of statement marker.
-  Lex();
-  
-
-  MCInst Inst;
+  bool HadError = getTargetParser().ParseInstruction(IDVal, IDLoc,
+                                                     ParsedOperands);
+  if (!HadError && Lexer.isNot(AsmToken::EndOfStatement))
+    HadError = TokError("unexpected token in argument list");
+
+  // If parsing succeeded, match the instruction.
+  if (!HadError) {
+    MCInst Inst;
+    if (!getTargetParser().MatchInstruction(ParsedOperands, Inst)) {
+      // Emit the instruction on success.
+      Out.EmitInstruction(Inst);
+    } else {
+      // Otherwise emit a diagnostic about the match failure and set the error
+      // flag.
+      //
+      // FIXME: We should give nicer diagnostics about the exact failure.
+      Error(IDLoc, "unrecognized instruction");
+      HadError = true;
+    }
+  }
 
-  bool MatchFail = getTargetParser().MatchInstruction(ParsedOperands, Inst);
+  // If there was no error, consume the end-of-statement token. Otherwise this
+  // will be done by our caller.
+  if (!HadError)
+    Lex();
 
   // Free any parsed operands.
   for (unsigned i = 0, e = ParsedOperands.size(); i != e; ++i)
     delete ParsedOperands[i];
 
-  if (MatchFail) {
-    // FIXME: We should give nicer diagnostics about the exact failure.
-    Error(IDLoc, "unrecognized instruction");
-    return true;
-  }
-  
-  // Instruction is good, process it.
-  Out.EmitInstruction(Inst);
-  
-  // Skip to end of line for now.
-  return false;
+  return HadError;
 }
 
 bool AsmParser::ParseAssignment(const StringRef &Name) {
@@ -919,9 +874,9 @@ bool AsmParser::ParseDirectiveDarwinSection() {
   
   // FIXME: Arch specific.
   bool isText = Segment == "__TEXT";  // FIXME: Hack.
-  Out.SwitchSection(getMachOSection(Segment, Section, TAA, StubSize,
-                                    isText ? SectionKind::getText()
-                                           : SectionKind::getDataRel()));
+  Out.SwitchSection(Ctx.getMachOSection(Segment, Section, TAA, StubSize,
+                                        isText ? SectionKind::getText()
+                                               : SectionKind::getDataRel()));
   return false;
 }
 
@@ -936,9 +891,9 @@ bool AsmParser::ParseDirectiveSectionSwitch(const char *Segment,
   
   // FIXME: Arch specific.
   bool isText = StringRef(Segment) == "__TEXT";  // FIXME: Hack.
-  Out.SwitchSection(getMachOSection(Segment, Section, TAA, StubSize,
-                                    isText ? SectionKind::getText()
-                                    : SectionKind::getDataRel()));
+  Out.SwitchSection(Ctx.getMachOSection(Segment, Section, TAA, StubSize,
+                                        isText ? SectionKind::getText()
+                                               : SectionKind::getDataRel()));
 
   // Set the implicit alignment, if any.
   //
@@ -1374,9 +1329,9 @@ bool AsmParser::ParseDirectiveComm(bool IsLocal) {
   // '.lcomm' is equivalent to '.zerofill'.
   // Create the Symbol as a common or local common with Size and Pow2Alignment
   if (IsLocal) {
-    Out.EmitZerofill(getMachOSection("__DATA", "__bss",
-                                     MCSectionMachO::S_ZEROFILL, 0,
-                                     SectionKind::getBSS()),
+    Out.EmitZerofill(Ctx.getMachOSection("__DATA", "__bss",
+                                         MCSectionMachO::S_ZEROFILL, 0,
+                                         SectionKind::getBSS()),
                      Sym, Size, 1 << Pow2Alignment);
     return false;
   }
@@ -1410,9 +1365,9 @@ bool AsmParser::ParseDirectiveDarwinZerofill() {
   // the section but with no symbol.
   if (Lexer.is(AsmToken::EndOfStatement)) {
     // Create the zerofill section but no symbol
-    Out.EmitZerofill(getMachOSection(Segment, Section,
-                                     MCSectionMachO::S_ZEROFILL, 0,
-                                     SectionKind::getBSS()));
+    Out.EmitZerofill(Ctx.getMachOSection(Segment, Section,
+                                         MCSectionMachO::S_ZEROFILL, 0,
+                                         SectionKind::getBSS()));
     return false;
   }
 
@@ -1468,9 +1423,9 @@ bool AsmParser::ParseDirectiveDarwinZerofill() {
   // Create the zerofill Symbol with Size and Pow2Alignment
   //
   // FIXME: Arch specific.
-  Out.EmitZerofill(getMachOSection(Segment, Section,
-                                 MCSectionMachO::S_ZEROFILL, 0,
-                                 SectionKind::getBSS()),
+  Out.EmitZerofill(Ctx.getMachOSection(Segment, Section,
+                                       MCSectionMachO::S_ZEROFILL, 0,
+                                       SectionKind::getBSS()),
                    Sym, Size, 1 << Pow2Alignment);
 
   return false;
@@ -1604,9 +1559,6 @@ bool AsmParser::ParseDirectiveDarwinDumpOrLoad(SMLoc IDLoc, bool IsDump) {
 /// ParseDirectiveIf
 /// ::= .if expression
 bool AsmParser::ParseDirectiveIf(SMLoc DirectiveLoc) {
-  // Consume the identifier that was the .if directive
-  Lex();
-
   TheCondStack.push_back(TheCondState);
   TheCondState.TheCond = AsmCond::IfCond;
   if(TheCondState.Ignore) {
@@ -1638,9 +1590,6 @@ bool AsmParser::ParseDirectiveElseIf(SMLoc DirectiveLoc) {
                           " an .elseif");
   TheCondState.TheCond = AsmCond::ElseIfCond;
 
-  // Consume the identifier that was the .elseif directive
-  Lex();
-
   bool LastIgnoreState = false;
   if (!TheCondStack.empty())
       LastIgnoreState = TheCondStack.back().Ignore;
@@ -1667,9 +1616,6 @@ bool AsmParser::ParseDirectiveElseIf(SMLoc DirectiveLoc) {
 /// ParseDirectiveElse
 /// ::= .else
 bool AsmParser::ParseDirectiveElse(SMLoc DirectiveLoc) {
-  // Consume the identifier that was the .else directive
-  Lex();
-
   if (Lexer.isNot(AsmToken::EndOfStatement))
     return TokError("unexpected token in '.else' directive");
   
@@ -1694,9 +1640,6 @@ bool AsmParser::ParseDirectiveElse(SMLoc DirectiveLoc) {
 /// ParseDirectiveEndIf
 /// ::= .endif
 bool AsmParser::ParseDirectiveEndIf(SMLoc DirectiveLoc) {
-  // Consume the identifier that was the .endif directive
-  Lex();
-
   if (Lexer.isNot(AsmToken::EndOfStatement))
     return TokError("unexpected token in '.endif' directive");
   
diff --git a/lib/MC/MCSectionELF.cpp b/lib/MC/MCSectionELF.cpp
index ebfe2691719c..a7599de1b7b4 100644
--- a/lib/MC/MCSectionELF.cpp
+++ b/lib/MC/MCSectionELF.cpp
@@ -14,11 +14,7 @@
 #include "llvm/Support/raw_ostream.h"
 using namespace llvm;
 
-MCSectionELF *MCSectionELF::
-Create(StringRef Section, unsigned Type, unsigned Flags,
-       SectionKind K, bool isExplicit, MCContext &Ctx) {
-  return new (Ctx) MCSectionELF(Section, Type, Flags, K, isExplicit);
-}
+MCSectionELF::~MCSectionELF() {} // anchor.
 
 // ShouldOmitSectionDirective - Decides whether a '.section' directive
 // should be printed before the section name
@@ -62,59 +58,63 @@ void MCSectionELF::PrintSwitchToSection(const MCAsmInfo &MAI,
       OS << ",#write";
     if (Flags & MCSectionELF::SHF_TLS)
       OS << ",#tls";
-  } else {
-    OS << ",\"";
-    if (Flags & MCSectionELF::SHF_ALLOC)
-      OS << 'a';
-    if (Flags & MCSectionELF::SHF_EXECINSTR)
-      OS << 'x';
-    if (Flags & MCSectionELF::SHF_WRITE)
-      OS << 'w';
-    if (Flags & MCSectionELF::SHF_MERGE)
-      OS << 'M';
-    if (Flags & MCSectionELF::SHF_STRINGS)
-      OS << 'S';
-    if (Flags & MCSectionELF::SHF_TLS)
-      OS << 'T';
-    
-    // If there are target-specific flags, print them.
-    if (Flags & ~MCSectionELF::TARGET_INDEP_SHF)
-      PrintTargetSpecificSectionFlags(MAI, OS);
-    
-    OS << '"';
+    OS << '\n';
+    return;
+  }
+  
+  OS << ",\"";
+  if (Flags & MCSectionELF::SHF_ALLOC)
+    OS << 'a';
+  if (Flags & MCSectionELF::SHF_EXECINSTR)
+    OS << 'x';
+  if (Flags & MCSectionELF::SHF_WRITE)
+    OS << 'w';
+  if (Flags & MCSectionELF::SHF_MERGE)
+    OS << 'M';
+  if (Flags & MCSectionELF::SHF_STRINGS)
+    OS << 'S';
+  if (Flags & MCSectionELF::SHF_TLS)
+    OS << 'T';
+  
+  // If there are target-specific flags, print them.
+  if (Flags & MCSectionELF::XCORE_SHF_CP_SECTION)
+    OS << 'c';
+  if (Flags & MCSectionELF::XCORE_SHF_DP_SECTION)
+    OS << 'd';
+  
+  OS << '"';
 
-    if (ShouldPrintSectionType(Type)) {
-      OS << ',';
-   
-      // If comment string is '@', e.g. as on ARM - use '%' instead
-      if (MAI.getCommentString()[0] == '@')
-        OS << '%';
-      else
-        OS << '@';
-    
-      if (Type == MCSectionELF::SHT_INIT_ARRAY)
-        OS << "init_array";
-      else if (Type == MCSectionELF::SHT_FINI_ARRAY)
-        OS << "fini_array";
-      else if (Type == MCSectionELF::SHT_PREINIT_ARRAY)
-        OS << "preinit_array";
-      else if (Type == MCSectionELF::SHT_NOBITS)
-        OS << "nobits";
-      else if (Type == MCSectionELF::SHT_PROGBITS)
-        OS << "progbits";
-    
-      if (getKind().isMergeable1ByteCString()) {
-        OS << ",1";
-      } else if (getKind().isMergeable2ByteCString()) {
-        OS << ",2";
-      } else if (getKind().isMergeable4ByteCString() || 
-                 getKind().isMergeableConst4()) {
-        OS << ",4";
-      } else if (getKind().isMergeableConst8()) {
-        OS << ",8";
-      } else if (getKind().isMergeableConst16()) {
-        OS << ",16";
-      }
+  if (ShouldPrintSectionType(Type)) {
+    OS << ',';
+ 
+    // If comment string is '@', e.g. as on ARM - use '%' instead
+    if (MAI.getCommentString()[0] == '@')
+      OS << '%';
+    else
+      OS << '@';
+  
+    if (Type == MCSectionELF::SHT_INIT_ARRAY)
+      OS << "init_array";
+    else if (Type == MCSectionELF::SHT_FINI_ARRAY)
+      OS << "fini_array";
+    else if (Type == MCSectionELF::SHT_PREINIT_ARRAY)
+      OS << "preinit_array";
+    else if (Type == MCSectionELF::SHT_NOBITS)
+      OS << "nobits";
+    else if (Type == MCSectionELF::SHT_PROGBITS)
+      OS << "progbits";
+  
+    if (getKind().isMergeable1ByteCString()) {
+      OS << ",1";
+    } else if (getKind().isMergeable2ByteCString()) {
+      OS << ",2";
+    } else if (getKind().isMergeable4ByteCString() || 
+               getKind().isMergeableConst4()) {
+      OS << ",4";
+    } else if (getKind().isMergeableConst8()) {
+      OS << ",8";
+    } else if (getKind().isMergeableConst16()) {
+      OS << ",16";
     }
   }
   
diff --git a/lib/MC/MCSectionMachO.cpp b/lib/MC/MCSectionMachO.cpp
index 6cc67a222521..3a18cee83952 100644
--- a/lib/MC/MCSectionMachO.cpp
+++ b/lib/MC/MCSectionMachO.cpp
@@ -64,14 +64,22 @@ ENTRY(0 /*FIXME*/,           S_ATTR_LOC_RELOC)
   { AttrFlagEnd, 0, 0 }
 };
 
-
-MCSectionMachO *MCSectionMachO::
-Create(StringRef Segment, StringRef Section,
-       unsigned TypeAndAttributes, unsigned Reserved2,
-       SectionKind K, MCContext &Ctx) {
-  // S_SYMBOL_STUBS must be set for Reserved2 to be non-zero.
-  return new (Ctx) MCSectionMachO(Segment, Section, TypeAndAttributes,
-                                  Reserved2, K);
+MCSectionMachO::MCSectionMachO(StringRef Segment, StringRef Section,
+                               unsigned TAA, unsigned reserved2, SectionKind K)
+  : MCSection(K), TypeAndAttributes(TAA), Reserved2(reserved2) {
+  assert(Segment.size() <= 16 && Section.size() <= 16 &&
+         "Segment or section string too long");
+  for (unsigned i = 0; i != 16; ++i) {
+    if (i < Segment.size())
+      SegmentName[i] = Segment[i];
+    else
+      SegmentName[i] = 0;
+    
+    if (i < Section.size())
+      SectionName[i] = Section[i];
+    else
+      SectionName[i] = 0;
+  }        
 }
 
 void MCSectionMachO::PrintSwitchToSection(const MCAsmInfo &MAI,
diff --git a/lib/MC/MachObjectWriter.cpp b/lib/MC/MachObjectWriter.cpp
index e073eb5a0d46..a533ccfdc642 100644
--- a/lib/MC/MachObjectWriter.cpp
+++ b/lib/MC/MachObjectWriter.cpp
@@ -418,7 +418,7 @@ public:
         unsigned Log2Size = Log2_32(Align);
         assert((1U << Log2Size) == Align && "Invalid 'common' alignment!");
         if (Log2Size > 15)
-          llvm_report_error("invalid 'common' alignment '" +
+          report_fatal_error("invalid 'common' alignment '" +
                             Twine(Align) + "'");
         // FIXME: Keep this mask with the SymbolFlags enumeration.
         Flags = (Flags & 0xF0FF) | (Log2Size << 8);
@@ -477,7 +477,7 @@ public:
       // actual expression addend without the PCrel bias. However, instructions
       // with data following the relocation are not accomodated for (see comment
       // below regarding SIGNED{1,2,4}), so it isn't exactly that either.
-      Value += 1 << Log2Size;
+      Value += 1LL << Log2Size;
     }
 
     if (Target.isAbsolute()) { // constant
@@ -506,23 +506,23 @@ public:
       // Neither symbol can be modified.
       if (Target.getSymA()->getKind() != MCSymbolRefExpr::VK_None ||
           Target.getSymB()->getKind() != MCSymbolRefExpr::VK_None)
-        llvm_report_error("unsupported relocation of modified symbol");
+        report_fatal_error("unsupported relocation of modified symbol");
 
       // We don't support PCrel relocations of differences. Darwin 'as' doesn't
       // implement most of these correctly.
       if (IsPCRel)
-        llvm_report_error("unsupported pc-relative relocation of difference");
+        report_fatal_error("unsupported pc-relative relocation of difference");
 
       // We don't currently support any situation where one or both of the
       // symbols would require a local relocation. This is almost certainly
       // unused and may not be possible to encode correctly.
       if (!A_Base || !B_Base)
-        llvm_report_error("unsupported local relocations in difference");
+        report_fatal_error("unsupported local relocations in difference");
 
       // Darwin 'as' doesn't emit correct relocations for this (it ends up with
       // a single SIGNED relocation); reject it for now.
       if (A_Base == B_Base)
-        llvm_report_error("unsupported relocation with identical base");
+        report_fatal_error("unsupported relocation with identical base");
 
       Value += Layout.getSymbolAddress(&A_SD) - Layout.getSymbolAddress(A_Base);
       Value -= Layout.getSymbolAddress(&B_SD) - Layout.getSymbolAddress(B_Base);
@@ -580,12 +580,12 @@ public:
             else
               Type = RIT_X86_64_GOT;
           } else if (Modifier != MCSymbolRefExpr::VK_None)
-            llvm_report_error("unsupported symbol modifier in relocation");
+            report_fatal_error("unsupported symbol modifier in relocation");
           else
             Type = RIT_X86_64_Signed;
         } else {
           if (Modifier != MCSymbolRefExpr::VK_None)
-            llvm_report_error("unsupported symbol modifier in branch "
+            report_fatal_error("unsupported symbol modifier in branch "
                               "relocation");
 
           Type = RIT_X86_64_Branch;
@@ -605,7 +605,7 @@ public:
         // well based on the actual encoded instruction (the additional bias),
         // but instead appear to just look at the final offset.
         if (IsRIPRel) {
-          switch (-(Target.getConstant() + (1 << Log2Size))) {
+          switch (-(Target.getConstant() + (1LL << Log2Size))) {
           case 1: Type = RIT_X86_64_Signed1; break;
           case 2: Type = RIT_X86_64_Signed2; break;
           case 4: Type = RIT_X86_64_Signed4; break;
@@ -622,7 +622,7 @@ public:
           Type = RIT_X86_64_GOT;
           IsPCRel = 1;
         } else if (Modifier != MCSymbolRefExpr::VK_None)
-          llvm_report_error("unsupported symbol modifier in relocation");
+          report_fatal_error("unsupported symbol modifier in relocation");
         else
           Type = RIT_X86_64_Unsigned;
       }
@@ -657,7 +657,7 @@ public:
     MCSymbolData *A_SD = &Asm.getSymbolData(*A);
 
     if (!A_SD->getFragment())
-      llvm_report_error("symbol '" + A->getName() +
+      report_fatal_error("symbol '" + A->getName() +
                         "' can not be undefined in a subtraction expression");
 
     uint32_t Value = Layout.getSymbolAddress(A_SD);
@@ -667,7 +667,7 @@ public:
       MCSymbolData *B_SD = &Asm.getSymbolData(B->getSymbol());
 
       if (!B_SD->getFragment())
-        llvm_report_error("symbol '" + B->getSymbol().getName() +
+        report_fatal_error("symbol '" + B->getSymbol().getName() +
                           "' can not be undefined in a subtraction expression");
 
       // Select the appropriate difference relocation type.
diff --git a/lib/Support/Allocator.cpp b/lib/Support/Allocator.cpp
index 31b45c8d4aae..90df262336c5 100644
--- a/lib/Support/Allocator.cpp
+++ b/lib/Support/Allocator.cpp
@@ -23,9 +23,7 @@ namespace llvm {
 BumpPtrAllocator::BumpPtrAllocator(size_t size, size_t threshold,
                                    SlabAllocator &allocator)
     : SlabSize(size), SizeThreshold(threshold), Allocator(allocator),
-      CurSlab(0), BytesAllocated(0) {
-  StartNewSlab();
-}
+      CurSlab(0), BytesAllocated(0) { }
 
 BumpPtrAllocator::~BumpPtrAllocator() {
   DeallocateSlabs(CurSlab);
@@ -72,6 +70,8 @@ void BumpPtrAllocator::DeallocateSlabs(MemSlab *Slab) {
 /// Reset - Deallocate all but the current slab and reset the current pointer
 /// to the beginning of it, freeing all memory allocated so far.
 void BumpPtrAllocator::Reset() {
+  if (!CurSlab)
+    return;
   DeallocateSlabs(CurSlab->NextPtr);
   CurSlab->NextPtr = 0;
   CurPtr = (char*)(CurSlab + 1);
@@ -81,6 +81,9 @@ void BumpPtrAllocator::Reset() {
 /// Allocate - Allocate space at the specified alignment.
 ///
 void *BumpPtrAllocator::Allocate(size_t Size, size_t Alignment) {
+  if (!CurSlab) // Start a new slab if we haven't allocated one already.
+    StartNewSlab();
+
   // Keep track of how many bytes we've allocated.
   BytesAllocated += Size;
 
diff --git a/lib/Support/Debug.cpp b/lib/Support/Debug.cpp
index eccfa0bd0725..7f48f8aae717 100644
--- a/lib/Support/Debug.cpp
+++ b/lib/Support/Debug.cpp
@@ -51,12 +51,19 @@ DebugBufferSize("debug-buffer-size",
                 cl::init(0));
 
 static std::string CurrentDebugType;
-static struct DebugOnlyOpt {
+
+namespace {
+
+struct DebugOnlyOpt {
   void operator=(const std::string &Val) const {
     DebugFlag |= !Val.empty();
     CurrentDebugType = Val;
   }
-} DebugOnlyOptLoc;
+};
+
+}
+
+static DebugOnlyOpt DebugOnlyOptLoc;
 
 static cl::opt<DebugOnlyOpt, true, cl::parser<std::string> >
 DebugOnly("debug-only", cl::desc("Enable a specific type of debug output"),
diff --git a/lib/Support/Dwarf.cpp b/lib/Support/Dwarf.cpp
index d1230b9de8ad..c19c2d6b0897 100644
--- a/lib/Support/Dwarf.cpp
+++ b/lib/Support/Dwarf.cpp
@@ -196,8 +196,9 @@ const char *llvm::dwarf::AttributeString(unsigned Attribute) {
   case DW_AT_APPLE_flags:                return "DW_AT_APPLE_flags";
   case DW_AT_APPLE_isa:                  return "DW_AT_APPLE_isa";
   case DW_AT_APPLE_block:                return "DW_AT_APPLE_block";
-  case DW_AT_APPLE_major_runtime_vers:  return "DW_AT_APPLE_major_runtime_vers";
+  case DW_AT_APPLE_major_runtime_vers:   return "DW_AT_APPLE_major_runtime_vers";
   case DW_AT_APPLE_runtime_class:        return "DW_AT_APPLE_runtime_class";
+  case DW_AT_APPLE_omit_frame_ptr:       return "DW_AT_APPLE_omit_frame_ptr";
   }
   return 0;
 }
diff --git a/lib/Support/ErrorHandling.cpp b/lib/Support/ErrorHandling.cpp
index 4412cb2dd0d4..56a171cb7d37 100644
--- a/lib/Support/ErrorHandling.cpp
+++ b/lib/Support/ErrorHandling.cpp
@@ -1,4 +1,4 @@
-//===- lib/Support/ErrorHandling.cpp - Callbacks for errors -----*- C++ -*-===//
+//===- lib/Support/ErrorHandling.cpp - Callbacks for errors ---------------===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -7,9 +7,9 @@
 //
 //===----------------------------------------------------------------------===//
 //
-// This file defines an API for error handling, it supersedes cerr+abort(), and
-// cerr+exit() style error handling.
-// Callbacks can be registered for these errors through this API.
+// This file defines an API used to indicate fatal error conditions.  Non-fatal
+// errors (most of them) should be handled through LLVMContext.
+//
 //===----------------------------------------------------------------------===//
 
 #include "llvm/ADT/Twine.h"
@@ -19,16 +19,14 @@
 #include "llvm/System/Threading.h"
 #include <cassert>
 #include <cstdlib>
-
 using namespace llvm;
 using namespace std;
 
-static llvm_error_handler_t ErrorHandler = 0;
+static fatal_error_handler_t ErrorHandler = 0;
 static void *ErrorHandlerUserData = 0;
 
-namespace llvm {
-void llvm_install_error_handler(llvm_error_handler_t handler,
-                                void *user_data) {
+void llvm::install_fatal_error_handler(fatal_error_handler_t handler,
+                                       void *user_data) {
   assert(!llvm_is_multithreaded() &&
          "Cannot register error handlers after starting multithreaded mode!\n");
   assert(!ErrorHandler && "Error handler already registered!\n");
@@ -36,19 +34,19 @@ void llvm_install_error_handler(llvm_error_handler_t handler,
   ErrorHandlerUserData = user_data;
 }
 
-void llvm_remove_error_handler() {
+void llvm::remove_fatal_error_handler() {
   ErrorHandler = 0;
 }
 
-void llvm_report_error(const char *reason) {
-  llvm_report_error(Twine(reason));
+void llvm::report_fatal_error(const char *reason) {
+  report_fatal_error(Twine(reason));
 }
 
-void llvm_report_error(const std::string &reason) {
-  llvm_report_error(Twine(reason));
+void llvm::report_fatal_error(const std::string &reason) {
+  report_fatal_error(Twine(reason));
 }
 
-void llvm_report_error(const Twine &reason) {
+void llvm::report_fatal_error(const Twine &reason) {
   if (!ErrorHandler) {
     errs() << "LLVM ERROR: " << reason << "\n";
   } else {
@@ -57,8 +55,8 @@ void llvm_report_error(const Twine &reason) {
   exit(1);
 }
 
-void llvm_unreachable_internal(const char *msg, const char *file,
-                               unsigned line) {
+void llvm::llvm_unreachable_internal(const char *msg, const char *file,
+                                     unsigned line) {
   // This code intentionally doesn't call the ErrorHandler callback, because
   // llvm_unreachable is intended to be used to indicate "impossible"
   // situations, and not legitimate runtime errors.
@@ -70,4 +68,3 @@ void llvm_unreachable_internal(const char *msg, const char *file,
   dbgs() << "!\n";
   abort();
 }
-}
diff --git a/lib/Support/GraphWriter.cpp b/lib/Support/GraphWriter.cpp
index ec84f9beca06..fdd6285a8c5e 100644
--- a/lib/Support/GraphWriter.cpp
+++ b/lib/Support/GraphWriter.cpp
@@ -130,28 +130,28 @@ void llvm::DisplayGraph(const sys::Path &Filename, bool wait,
   if (sys::Program::ExecuteAndWait(prog, &args[0], 0, 0, 0, 0, &ErrMsg)) {
      errs() << "Error viewing graph " << Filename.str() << ": '"
             << ErrMsg << "\n";
-  } else {
-    errs() << " done. \n";
+    return;
+  }
+  errs() << " done. \n";
 
-    sys::Path gv(LLVM_PATH_GV);
-    args.clear();
-    args.push_back(gv.c_str());
-    args.push_back(PSFilename.c_str());
-    args.push_back("--spartan");
-    args.push_back(0);
-    
-    ErrMsg.clear();
-    if (wait) {
-       if (sys::Program::ExecuteAndWait(gv, &args[0],0,0,0,0,&ErrMsg))
-          errs() << "Error viewing graph: " << ErrMsg << "\n";
-       Filename.eraseFromDisk();
-       PSFilename.eraseFromDisk();
-    }
-    else {
-       sys::Program::ExecuteNoWait(gv, &args[0],0,0,0,&ErrMsg);
-       errs() << "Remember to erase graph files: " << Filename.str() << " "
-              << PSFilename.str() << "\n";
-    }
+  sys::Path gv(LLVM_PATH_GV);
+  args.clear();
+  args.push_back(gv.c_str());
+  args.push_back(PSFilename.c_str());
+  args.push_back("--spartan");
+  args.push_back(0);
+  
+  ErrMsg.clear();
+  if (wait) {
+     if (sys::Program::ExecuteAndWait(gv, &args[0],0,0,0,0,&ErrMsg))
+        errs() << "Error viewing graph: " << ErrMsg << "\n";
+     Filename.eraseFromDisk();
+     PSFilename.eraseFromDisk();
+  }
+  else {
+     sys::Program::ExecuteNoWait(gv, &args[0],0,0,0,&ErrMsg);
+     errs() << "Remember to erase graph files: " << Filename.str() << " "
+            << PSFilename.str() << "\n";
   }
 #elif HAVE_DOTTY
   sys::Path dotty(LLVM_PATH_DOTTY);
@@ -166,7 +166,8 @@ void llvm::DisplayGraph(const sys::Path &Filename, bool wait,
      errs() << "Error viewing graph " << Filename.str() << ": "
             << ErrMsg << "\n";
   } else {
-#ifdef __MINGW32__ // Dotty spawns another app and doesn't wait until it returns
+// Dotty spawns another app and doesn't wait until it returns
+#if defined (__MINGW32__) || defined (_WINDOWS)
     return;
 #endif
     Filename.eraseFromDisk();
diff --git a/lib/Support/SourceMgr.cpp b/lib/Support/SourceMgr.cpp
index 4e7520c9d339..da5681c5bc09 100644
--- a/lib/Support/SourceMgr.cpp
+++ b/lib/Support/SourceMgr.cpp
@@ -168,7 +168,7 @@ SMDiagnostic SourceMgr::GetMessage(SMLoc Loc, const std::string &Msg,
   }
   PrintedMsg += Msg;
 
-  return SMDiagnostic(Loc,
+  return SMDiagnostic(*this, Loc,
                       CurMB->getBufferIdentifier(), FindLineNumber(Loc, CurBuf),
                       Loc.getPointer()-LineStart, PrintedMsg,
                       LineStr, ShowLine);
diff --git a/lib/Support/Timer.cpp b/lib/Support/Timer.cpp
index 4fac0737f0c0..481f6ba5086d 100644
--- a/lib/Support/Timer.cpp
+++ b/lib/Support/Timer.cpp
@@ -190,6 +190,8 @@ void TimeRecord::print(const TimeRecord &Total, raw_ostream &OS) const {
 //   NamedRegionTimer Implementation
 //===----------------------------------------------------------------------===//
 
+namespace {
+
 typedef StringMap<Timer> Name2TimerMap;
 
 class Name2PairMap {
@@ -216,6 +218,8 @@ public:
   }
 };
 
+}
+
 static ManagedStatic<Name2TimerMap> NamedTimers;
 static ManagedStatic<Name2PairMap> NamedGroupedTimers;
 
diff --git a/lib/Support/circular_raw_ostream.cpp b/lib/Support/circular_raw_ostream.cpp
index e52996dc5051..ca0d30db388c 100644
--- a/lib/Support/circular_raw_ostream.cpp
+++ b/lib/Support/circular_raw_ostream.cpp
@@ -1,4 +1,4 @@
-//===- circulat_raw_ostream.cpp - Implement the circular_raw_ostream class -===//
+//===- circular_raw_ostream.cpp - Implement circular_raw_ostream ----------===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -12,9 +12,7 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/Support/circular_raw_ostream.h"
-
 #include <algorithm>
-
 using namespace llvm;
 
 void circular_raw_ostream::write_impl(const char *Ptr, size_t Size) {
@@ -25,7 +23,8 @@ void circular_raw_ostream::write_impl(const char *Ptr, size_t Size) {
 
   // Write into the buffer, wrapping if necessary.
   while (Size != 0) {
-    unsigned Bytes = std::min(Size, BufferSize - (Cur - BufferArray));
+    unsigned Bytes =
+      std::min(unsigned(Size), unsigned(BufferSize - (Cur - BufferArray)));
     memcpy(Cur, Ptr, Bytes);
     Size -= Bytes;
     Cur += Bytes;
@@ -37,11 +36,10 @@ void circular_raw_ostream::write_impl(const char *Ptr, size_t Size) {
   }    
 }
 
-void circular_raw_ostream::flushBufferWithBanner(void) {
+void circular_raw_ostream::flushBufferWithBanner() {
   if (BufferSize != 0) {
     // Write out the buffer
-    int num = std::strlen(Banner); 
-    TheStream->write(Banner, num);
+    TheStream->write(Banner, std::strlen(Banner));
     flushBuffer();
   }
 }
diff --git a/lib/Support/raw_ostream.cpp b/lib/Support/raw_ostream.cpp
index f59bd0d90301..0b05c5449ae6 100644
--- a/lib/Support/raw_ostream.cpp
+++ b/lib/Support/raw_ostream.cpp
@@ -57,11 +57,11 @@ raw_ostream::~raw_ostream() {
     delete [] OutBufStart;
 
   // If there are any pending errors, report them now. Clients wishing
-  // to avoid llvm_report_error calls should check for errors with
+  // to avoid report_fatal_error calls should check for errors with
   // has_error() and clear the error flag with clear_error() before
   // destructing raw_ostream objects which may have errors.
   if (Error)
-    llvm_report_error("IO failure on output stream.");
+    report_fatal_error("IO failure on output stream.");
 }
 
 // An out of line virtual method to provide a home for the class vtable.
@@ -442,7 +442,8 @@ uint64_t raw_fd_ostream::seek(uint64_t off) {
 }
 
 size_t raw_fd_ostream::preferred_buffer_size() const {
-#if !defined(_MSC_VER) && !defined(__MINGW32__) // Windows has no st_blksize.
+#if !defined(_MSC_VER) && !defined(__MINGW32__) && !defined(_MINIX)
+  // Windows and Minix have no st_blksize.
   assert(FD >= 0 && "File not yet open!");
   struct stat statbuf;
   if (fstat(FD, &statbuf) != 0)
diff --git a/lib/Support/regengine.inc b/lib/Support/regengine.inc
index bf55543dab8a..7e41f96f359d 100644
--- a/lib/Support/regengine.inc
+++ b/lib/Support/regengine.inc
@@ -185,7 +185,7 @@ matcher(struct re_guts *g, const char *string, size_t nmatch,
 		endp = fast(m, start, stop, gf, gl);
 		if (endp == NULL) {		/* a miss */
 			free(m->pmatch);
-			free(m->lastpos);
+			free((void*)m->lastpos);
 			STATETEARDOWN(m);
 			return(REG_NOMATCH);
 		}
diff --git a/lib/System/DynamicLibrary.cpp b/lib/System/DynamicLibrary.cpp
index d6f3140a88e8..6f6890c06c49 100644
--- a/lib/System/DynamicLibrary.cpp
+++ b/lib/System/DynamicLibrary.cpp
@@ -24,12 +24,18 @@
 // Collection of symbol name/value pairs to be searched prior to any libraries.
 static std::map<std::string, void*> *ExplicitSymbols = 0;
 
-static struct ExplicitSymbolsDeleter {
+namespace {
+
+struct ExplicitSymbolsDeleter {
   ~ExplicitSymbolsDeleter() {
     if (ExplicitSymbols)
       delete ExplicitSymbols;
   }
-} Dummy;
+};
+
+}
+
+static ExplicitSymbolsDeleter Dummy;
 
 void llvm::sys::DynamicLibrary::AddSymbol(const char* symbolName,
                                           void *symbolValue) {
@@ -44,6 +50,7 @@ void llvm::sys::DynamicLibrary::AddSymbol(const char* symbolName,
 
 #else
 
+#if HAVE_DLFCN_H
 #include <dlfcn.h>
 using namespace llvm;
 using namespace llvm::sys;
@@ -68,6 +75,17 @@ bool DynamicLibrary::LoadLibraryPermanently(const char *Filename,
   OpenedHandles->push_back(H);
   return false;
 }
+#else
+
+using namespace llvm;
+using namespace llvm::sys;
+
+bool DynamicLibrary::LoadLibraryPermanently(const char *Filename,
+                                            std::string *ErrMsg) {
+  if (ErrMsg) *ErrMsg = "dlopen() not supported on this platform";
+  return true;
+}
+#endif
 
 namespace llvm {
 void *SearchForAddressOfSpecialSymbol(const char* symbolName);
@@ -84,6 +102,7 @@ void* DynamicLibrary::SearchForAddressOfSymbol(const char* symbolName) {
       return I->second;
   }
 
+#if HAVE_DLFCN_H
   // Now search the libraries.
   if (OpenedHandles) {
     for (std::vector<void *>::iterator I = OpenedHandles->begin(),
@@ -95,6 +114,7 @@ void* DynamicLibrary::SearchForAddressOfSymbol(const char* symbolName) {
       }
     }
   }
+#endif
 
   if (void *Result = llvm::SearchForAddressOfSpecialSymbol(symbolName))
     return Result;
diff --git a/lib/System/Program.cpp b/lib/System/Program.cpp
index a3049d46fd65..cd58c2cc578c 100644
--- a/lib/System/Program.cpp
+++ b/lib/System/Program.cpp
@@ -13,8 +13,7 @@
 
 #include "llvm/System/Program.h"
 #include "llvm/Config/config.h"
-
-namespace llvm {
+using namespace llvm;
 using namespace sys;
 
 //===----------------------------------------------------------------------===//
@@ -48,9 +47,6 @@ Program::ExecuteNoWait(const Path& path,
   prg.Execute(path, args, envp, redirects, memoryLimit, ErrMsg);
 }
 
-
-}
-
 // Include the platform-specific parts of this class.
 #ifdef LLVM_ON_UNIX
 #include "Unix/Program.inc"
diff --git a/lib/System/Unix/Path.inc b/lib/System/Unix/Path.inc
index 52253b30a570..74596dc6ab06 100644
--- a/lib/System/Unix/Path.inc
+++ b/lib/System/Unix/Path.inc
@@ -858,15 +858,20 @@ Path::makeUnique(bool reuse_current, std::string* ErrMsg) {
 
   // Append an XXXXXX pattern to the end of the file for use with mkstemp,
   // mktemp or our own implementation.
-  std::string Buf(path);
+  // This uses std::vector instead of SmallVector to avoid a dependence on
+  // libSupport. And performance isn't critical here.
+  std::vector<char> Buf;
+  Buf.resize(path.size()+8);
+  char *FNBuffer = &Buf[0];
+    path.copy(FNBuffer,path.size());
   if (isDirectory())
-    Buf += "/XXXXXX";
+    strcpy(FNBuffer+path.size(), "/XXXXXX");
   else
-    Buf += "-XXXXXX";
+    strcpy(FNBuffer+path.size(), "-XXXXXX");
 
 #if defined(HAVE_MKSTEMP)
   int TempFD;
-  if ((TempFD = mkstemp((char*)Buf.c_str())) == -1)
+  if ((TempFD = mkstemp(FNBuffer)) == -1)
     return MakeErrMsg(ErrMsg, path + ": can't make unique filename");
 
   // We don't need to hold the temp file descriptor... we will trust that no one
@@ -874,21 +879,21 @@ Path::makeUnique(bool reuse_current, std::string* ErrMsg) {
   close(TempFD);
 
   // Save the name
-  path = Buf;
+  path = FNBuffer;
 #elif defined(HAVE_MKTEMP)
   // If we don't have mkstemp, use the old and obsolete mktemp function.
-  if (mktemp(Buf.c_str()) == 0)
+  if (mktemp(FNBuffer) == 0)
     return MakeErrMsg(ErrMsg, path + ": can't make unique filename");
 
   // Save the name
-  path = Buf;
+  path = FNBuffer;
 #else
   // Okay, looks like we have to do it all by our lonesome.
   static unsigned FCounter = 0;
   unsigned offset = path.size() + 1;
-  while (FCounter < 999999 && exists()) {
-    sprintf(Buf.data()+offset, "%06u", ++FCounter);
-    path = Buf;
+  while ( FCounter < 999999 && exists()) {
+    sprintf(FNBuffer+offset,"%06u",++FCounter);
+    path = FNBuffer;
   }
   if (FCounter > 999999)
     return MakeErrMsg(ErrMsg,
diff --git a/lib/System/Unix/Program.inc b/lib/System/Unix/Program.inc
index b4cc875df8c7..358415f52663 100644
--- a/lib/System/Unix/Program.inc
+++ b/lib/System/Unix/Program.inc
@@ -30,6 +30,14 @@
 #if HAVE_FCNTL_H
 #include <fcntl.h>
 #endif
+#ifdef HAVE_POSIX_SPAWN
+#include <spawn.h>
+#if !defined(__APPLE__)
+  extern char **environ;
+#else
+#include <crt_externs.h> // _NSGetEnviron
+#endif
+#endif
 
 namespace llvm {
 using namespace sys;
@@ -94,20 +102,19 @@ Program::FindProgramByName(const std::string& progName) {
 }
 
 static bool RedirectIO(const Path *Path, int FD, std::string* ErrMsg) {
-  if (Path == 0)
-    // Noop
+  if (Path == 0) // Noop
     return false;
-  std::string File;
+  const char *File;
   if (Path->isEmpty())
     // Redirect empty paths to /dev/null
     File = "/dev/null";
   else
-    File = Path->str();
+    File = Path->c_str();
 
   // Open the file
-  int InFD = open(File.c_str(), FD == 0 ? O_RDONLY : O_WRONLY|O_CREAT, 0666);
+  int InFD = open(File, FD == 0 ? O_RDONLY : O_WRONLY|O_CREAT, 0666);
   if (InFD == -1) {
-    MakeErrMsg(ErrMsg, "Cannot open file '" + File + "' for "
+    MakeErrMsg(ErrMsg, "Cannot open file '" + std::string(File) + "' for "
               + (FD == 0 ? "input" : "output"));
     return true;
   }
@@ -122,6 +129,25 @@ static bool RedirectIO(const Path *Path, int FD, std::string* ErrMsg) {
   return false;
 }
 
+#ifdef HAVE_POSIX_SPAWN
+static bool RedirectIO_PS(const Path *Path, int FD, std::string *ErrMsg,
+                          posix_spawn_file_actions_t &FileActions) {
+  if (Path == 0) // Noop
+    return false;
+  const char *File;
+  if (Path->isEmpty())
+    // Redirect empty paths to /dev/null
+    File = "/dev/null";
+  else
+    File = Path->c_str();
+
+  if (int Err = posix_spawn_file_actions_addopen(&FileActions, FD,
+                            File, FD == 0 ? O_RDONLY : O_WRONLY|O_CREAT, 0666))
+    return MakeErrMsg(ErrMsg, "Cannot dup2", Err);
+  return false;
+}
+#endif
+
 static void TimeOutHandler(int Sig) {
 }
 
@@ -151,13 +177,55 @@ static void SetMemoryLimits (unsigned size)
 }
 
 bool
-Program::Execute(const Path& path,
-                 const char** args,
-                 const char** envp,
-                 const Path** redirects,
-                 unsigned memoryLimit,
-                 std::string* ErrMsg)
-{
+Program::Execute(const Path &path, const char **args, const char **envp,
+                 const Path **redirects, unsigned memoryLimit,
+                  std::string *ErrMsg) {
+  // If this OS has posix_spawn and there is no memory limit being implied, use
+  // posix_spawn.  It is more efficient than fork/exec.
+#ifdef HAVE_POSIX_SPAWN
+  if (memoryLimit == 0) {
+    posix_spawn_file_actions_t FileActions;
+    posix_spawn_file_actions_init(&FileActions);
+
+    if (redirects) {
+      // Redirect stdin/stdout.
+      if (RedirectIO_PS(redirects[0], 0, ErrMsg, FileActions) ||
+          RedirectIO_PS(redirects[1], 1, ErrMsg, FileActions))
+        return false;
+      if (redirects[1] == 0 || redirects[2] == 0 ||
+          *redirects[1] != *redirects[2]) {
+        // Just redirect stderr
+        if (RedirectIO_PS(redirects[2], 2, ErrMsg, FileActions)) return false;
+      } else {       
+        // If stdout and stderr should go to the same place, redirect stderr
+        // to the FD already open for stdout.
+        if (int Err = posix_spawn_file_actions_adddup2(&FileActions, 1, 2))
+          return !MakeErrMsg(ErrMsg, "Can't redirect stderr to stdout", Err);
+      }
+    }
+
+    if (!envp)
+#if !defined(__APPLE__)
+      envp = const_cast<const char **>(environ);
+#else
+      // environ is missing in dylibs.
+      envp = const_cast<const char **>(*_NSGetEnviron());
+#endif
+
+    pid_t PID;
+    int Err = posix_spawn(&PID, path.c_str(), &FileActions, /*attrp*/0,
+                          const_cast<char **>(args), const_cast<char **>(envp));
+                          
+    posix_spawn_file_actions_destroy(&FileActions);
+
+    if (Err)
+     return !MakeErrMsg(ErrMsg, "posix_spawn failed", Err);
+      
+    Data_ = reinterpret_cast<void*>(PID);
+    return true;
+  }
+#endif
+  
   if (!path.canExecute()) {
     if (ErrMsg)
       *ErrMsg = path.str() + " is not executable";
@@ -201,9 +269,12 @@ Program::Execute(const Path& path,
 
       // Execute!
       if (envp != 0)
-        execve(path.c_str(), (char**)args, (char**)envp);
+        execve(path.c_str(),
+               const_cast<char **>(args),
+               const_cast<char **>(envp));
       else
-        execv(path.c_str(), (char**)args);
+        execv(path.c_str(),
+              const_cast<char **>(args));
       // If the execve() failed, we should exit. Follow Unix protocol and
       // return 127 if the executable was not found, and 126 otherwise.
       // Use _exit rather than exit so that atexit functions and static
@@ -239,7 +310,9 @@ Program::Wait(unsigned secondsToWait,
   // fact of having a handler at all causes the wait below to return with EINTR,
   // unlike if we used SIG_IGN.
   if (secondsToWait) {
+#ifndef __HAIKU__
     Act.sa_sigaction = 0;
+#endif
     Act.sa_handler = TimeOutHandler;
     sigemptyset(&Act.sa_mask);
     Act.sa_flags = 0;
diff --git a/lib/Target/ARM/ARM.td b/lib/Target/ARM/ARM.td
index 8d9c62253cdd..b4dec0cfd126 100644
--- a/lib/Target/ARM/ARM.td
+++ b/lib/Target/ARM/ARM.td
@@ -124,7 +124,8 @@ def : Processor<"arm1156t2f-s",    ARMV6Itineraries,
 def : Processor<"cortex-a8",        CortexA8Itineraries,
                 [ArchV7A, FeatureThumb2, FeatureNEON, FeatureHasSlowVMLx,
                  FeatureNEONForFP]>;
-def : ProcNoItin<"cortex-a9",       [ArchV7A, FeatureThumb2, FeatureNEON]>;
+def : Processor<"cortex-a9",        CortexA9Itineraries,
+                [ArchV7A, FeatureThumb2, FeatureNEON]>;
 
 //===----------------------------------------------------------------------===//
 // Register File Description
diff --git a/lib/Target/ARM/ARMAddressingModes.h b/lib/Target/ARM/ARMAddressingModes.h
index ea62c3330e64..e68354a42f8d 100644
--- a/lib/Target/ARM/ARMAddressingModes.h
+++ b/lib/Target/ARM/ARMAddressingModes.h
@@ -151,22 +151,13 @@ namespace ARM_AM {
     if ((rotr32(Imm, RotAmt) & ~255U) == 0)
       return (32-RotAmt)&31;  // HW rotates right, not left.
 
-    // For values like 0xF000000F, we should skip the first run of ones, then
+    // For values like 0xF000000F, we should ignore the low 6 bits, then
     // retry the hunt.
-    if (Imm & 1) {
-      unsigned TrailingOnes = CountTrailingZeros_32(~Imm);
-      if (TrailingOnes != 32) {  // Avoid overflow on 0xFFFFFFFF
-        // Restart the search for a high-order bit after the initial seconds of
-        // ones.
-        unsigned TZ2 = CountTrailingZeros_32(Imm & ~((1 << TrailingOnes)-1));
-
-        // Rotate amount must be even.
-        unsigned RotAmt2 = TZ2 & ~1;
-
-        // If this fits, use it.
-        if (RotAmt2 != 32 && (rotr32(Imm, RotAmt2) & ~255U) == 0)
-          return (32-RotAmt2)&31;  // HW rotates right, not left.
-      }
+    if (Imm & 63U) {
+      unsigned TZ2 = CountTrailingZeros_32(Imm & ~63U);
+      unsigned RotAmt2 = TZ2 & ~1;
+      if ((rotr32(Imm, RotAmt2) & ~255U) == 0)
+        return (32-RotAmt2)&31;  // HW rotates right, not left.
     }
 
     // Otherwise, we have no way to cover this span of bits with a single
diff --git a/lib/Target/ARM/ARMBaseInstrInfo.cpp b/lib/Target/ARM/ARMBaseInstrInfo.cpp
index 1995f79fa373..a1938582ae05 100644
--- a/lib/Target/ARM/ARMBaseInstrInfo.cpp
+++ b/lib/Target/ARM/ARMBaseInstrInfo.cpp
@@ -467,6 +467,7 @@ unsigned ARMBaseInstrInfo::GetInstSizeInBytes(const MachineInstr *MI) const {
     case TargetOpcode::KILL:
     case TargetOpcode::DBG_LABEL:
     case TargetOpcode::EH_LABEL:
+    case TargetOpcode::DBG_VALUE:
       return 0;
     }
     break;
@@ -481,10 +482,11 @@ unsigned ARMBaseInstrInfo::GetInstSizeInBytes(const MachineInstr *MI) const {
       // operand #2.
       return MI->getOperand(2).getImm();
     case ARM::Int_eh_sjlj_setjmp:
+    case ARM::Int_eh_sjlj_setjmp_nofp:
       return 24;
     case ARM::tInt_eh_sjlj_setjmp:
-      return 14;
     case ARM::t2Int_eh_sjlj_setjmp:
+    case ARM::t2Int_eh_sjlj_setjmp_nofp:
       return 14;
     case ARM::BR_JTr:
     case ARM::BR_JTm:
@@ -815,6 +817,16 @@ loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
   }
 }
 
+MachineInstr*
+ARMBaseInstrInfo::emitFrameIndexDebugValue(MachineFunction &MF,
+                                           int FrameIx, uint64_t Offset,
+                                           const MDNode *MDPtr,
+                                           DebugLoc DL) const {
+  MachineInstrBuilder MIB = BuildMI(MF, DL, get(ARM::DBG_VALUE))
+    .addFrameIndex(FrameIx).addImm(0).addImm(Offset).addMetadata(MDPtr);
+  return &*MIB;
+}
+
 MachineInstr *ARMBaseInstrInfo::
 foldMemoryOperandImpl(MachineFunction &MF, MachineInstr *MI,
                       const SmallVectorImpl<unsigned> &Ops, int FI) const {
diff --git a/lib/Target/ARM/ARMBaseInstrInfo.h b/lib/Target/ARM/ARMBaseInstrInfo.h
index 292c4984dd63..7a5630ea37c5 100644
--- a/lib/Target/ARM/ARMBaseInstrInfo.h
+++ b/lib/Target/ARM/ARMBaseInstrInfo.h
@@ -269,6 +269,12 @@ public:
                                     unsigned DestReg, int FrameIndex,
                                     const TargetRegisterClass *RC) const;
 
+  virtual MachineInstr *emitFrameIndexDebugValue(MachineFunction &MF,
+                                                 int FrameIx,
+                                                 uint64_t Offset,
+                                                 const MDNode *MDPtr,
+                                                 DebugLoc DL) const;
+
   virtual bool canFoldMemoryOperand(const MachineInstr *MI,
                                     const SmallVectorImpl<unsigned> &Ops) const;
 
diff --git a/lib/Target/ARM/ARMBaseRegisterInfo.cpp b/lib/Target/ARM/ARMBaseRegisterInfo.cpp
index f1625469085e..bc1218743684 100644
--- a/lib/Target/ARM/ARMBaseRegisterInfo.cpp
+++ b/lib/Target/ARM/ARMBaseRegisterInfo.cpp
@@ -38,11 +38,14 @@
 #include "llvm/ADT/BitVector.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/Support/CommandLine.h"
-using namespace llvm;
 
+namespace llvm {
 cl::opt<bool>
 ReuseFrameIndexVals("arm-reuse-frame-index-vals", cl::Hidden, cl::init(true),
           cl::desc("Reuse repeated frame index values"));
+}
+
+using namespace llvm;
 
 unsigned ARMBaseRegisterInfo::getRegisterNumbering(unsigned RegEnum,
                                                    bool *isSPVFP) {
@@ -478,7 +481,7 @@ ARMBaseRegisterInfo::UpdateRegAllocHint(unsigned Reg, unsigned NewReg,
 ///
 bool ARMBaseRegisterInfo::hasFP(const MachineFunction &MF) const {
   const MachineFrameInfo *MFI = MF.getFrameInfo();
-  return ((NoFramePointerElim && MFI->hasCalls())||
+  return ((DisableFramePointerElim(MF) && MFI->hasCalls())||
           needsStackRealignment(MF) ||
           MFI->hasVarSizedObjects() ||
           MFI->isFrameAddressTaken());
@@ -506,7 +509,7 @@ needsStackRealignment(const MachineFunction &MF) const {
 bool ARMBaseRegisterInfo::
 cannotEliminateFrame(const MachineFunction &MF) const {
   const MachineFrameInfo *MFI = MF.getFrameInfo();
-  if (NoFramePointerElim && MFI->hasCalls())
+  if (DisableFramePointerElim(MF) && MFI->hasCalls())
     return true;
   return MFI->hasVarSizedObjects() || MFI->isFrameAddressTaken()
     || needsStackRealignment(MF);
@@ -1050,7 +1053,7 @@ emitLoadConstPool(MachineBasicBlock &MBB,
                   unsigned PredReg) const {
   MachineFunction &MF = *MBB.getParent();
   MachineConstantPool *ConstantPool = MF.getConstantPool();
-  Constant *C =
+  const Constant *C =
         ConstantInt::get(Type::getInt32Ty(MF.getFunction()->getContext()), Val);
   unsigned Idx = ConstantPool->getConstantPoolIndex(C, 4);
 
@@ -1180,6 +1183,13 @@ ARMBaseRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
     SPAdj = 0;
   Offset += SPAdj;
 
+  // Special handling of dbg_value instructions.
+  if (MI.isDebugValue()) {
+    MI.getOperand(i).  ChangeToRegister(FrameReg, false /*isDef*/);
+    MI.getOperand(i+1).ChangeToImmediate(Offset);
+    return 0;
+  }
+
   // Modify MI as necessary to handle as much of 'Offset' as possible
   bool Done = false;
   if (!AFI->isThumbFunction())
diff --git a/lib/Target/ARM/ARMCodeEmitter.cpp b/lib/Target/ARM/ARMCodeEmitter.cpp
index e7aa0c86d40e..f84f85a86141 100644
--- a/lib/Target/ARM/ARMCodeEmitter.cpp
+++ b/lib/Target/ARM/ARMCodeEmitter.cpp
@@ -64,7 +64,8 @@ namespace {
     static char ID;
   public:
     ARMCodeEmitter(TargetMachine &tm, JITCodeEmitter &mce)
-      : MachineFunctionPass(&ID), JTI(0), II((ARMInstrInfo*)tm.getInstrInfo()),
+      : MachineFunctionPass(&ID), JTI(0),
+        II((const ARMInstrInfo *)tm.getInstrInfo()),
         TD(tm.getTargetData()), TM(tm),
     MCE(mce), MCPEs(0), MJTEs(0),
     IsPIC(TM.getRelocationModel() == Reloc::PIC_) {}
@@ -150,7 +151,7 @@ namespace {
 
     /// Routines that handle operands which add machine relocations which are
     /// fixed up by the relocation stage.
-    void emitGlobalAddress(GlobalValue *GV, unsigned Reloc,
+    void emitGlobalAddress(const GlobalValue *GV, unsigned Reloc,
                            bool MayNeedFarStub,  bool Indirect,
                            intptr_t ACPV = 0);
     void emitExternalSymbolAddress(const char *ES, unsigned Reloc);
@@ -174,9 +175,9 @@ bool ARMCodeEmitter::runOnMachineFunction(MachineFunction &MF) {
   assert((MF.getTarget().getRelocationModel() != Reloc::Default ||
           MF.getTarget().getRelocationModel() != Reloc::Static) &&
          "JIT relocation model must be set to static or default!");
-  JTI = ((ARMTargetMachine&)MF.getTarget()).getJITInfo();
-  II = ((ARMTargetMachine&)MF.getTarget()).getInstrInfo();
-  TD = ((ARMTargetMachine&)MF.getTarget()).getTargetData();
+  JTI = ((ARMTargetMachine &)MF.getTarget()).getJITInfo();
+  II = ((const ARMTargetMachine &)MF.getTarget()).getInstrInfo();
+  TD = ((const ARMTargetMachine &)MF.getTarget()).getTargetData();
   Subtarget = &TM.getSubtarget<ARMSubtarget>();
   MCPEs = &MF.getConstantPool()->getConstants();
   MJTEs = 0;
@@ -249,14 +250,16 @@ unsigned ARMCodeEmitter::getMachineOpValue(const MachineInstr &MI,
 
 /// emitGlobalAddress - Emit the specified address to the code stream.
 ///
-void ARMCodeEmitter::emitGlobalAddress(GlobalValue *GV, unsigned Reloc,
+void ARMCodeEmitter::emitGlobalAddress(const GlobalValue *GV, unsigned Reloc,
                                        bool MayNeedFarStub, bool Indirect,
                                        intptr_t ACPV) {
   MachineRelocation MR = Indirect
     ? MachineRelocation::getIndirectSymbol(MCE.getCurrentPCOffset(), Reloc,
-                                           GV, ACPV, MayNeedFarStub)
+                                           const_cast<GlobalValue *>(GV),
+                                           ACPV, MayNeedFarStub)
     : MachineRelocation::getGV(MCE.getCurrentPCOffset(), Reloc,
-                               GV, ACPV, MayNeedFarStub);
+                               const_cast<GlobalValue *>(GV), ACPV,
+                               MayNeedFarStub);
   MCE.addRelocation(MR);
 }
 
@@ -391,7 +394,7 @@ void ARMCodeEmitter::emitConstPoolInstruction(const MachineInstr &MI) {
           << (void*)MCE.getCurrentPCValue() << " " << *ACPV << '\n');
 
     assert(ACPV->isGlobalValue() && "unsupported constant pool value");
-    GlobalValue *GV = ACPV->getGV();
+    const GlobalValue *GV = ACPV->getGV();
     if (GV) {
       Reloc::Model RelocM = TM.getRelocationModel();
       emitGlobalAddress(GV, ARM::reloc_arm_machine_cp_entry,
@@ -403,7 +406,7 @@ void ARMCodeEmitter::emitConstPoolInstruction(const MachineInstr &MI) {
     }
     emitWordLE(0);
   } else {
-    Constant *CV = MCPE.Val.ConstVal;
+    const Constant *CV = MCPE.Val.ConstVal;
 
     DEBUG({
         errs() << "  ** Constant pool #" << CPI << " @ "
@@ -415,7 +418,7 @@ void ARMCodeEmitter::emitConstPoolInstruction(const MachineInstr &MI) {
         errs() << '\n';
       });
 
-    if (GlobalValue *GV = dyn_cast<GlobalValue>(CV)) {
+    if (const GlobalValue *GV = dyn_cast<GlobalValue>(CV)) {
       emitGlobalAddress(GV, ARM::reloc_arm_absolute, isa<Function>(GV), false);
       emitWordLE(0);
     } else if (const ConstantInt *CI = dyn_cast<ConstantInt>(CV)) {
@@ -559,7 +562,7 @@ void ARMCodeEmitter::emitPseudoInstruction(const MachineInstr &MI) {
     // We allow inline assembler nodes with empty bodies - they can
     // implicitly define registers, which is ok for JIT.
     if (MI.getOperand(0).getSymbolName()[0]) {
-      llvm_report_error("JIT does not support inline asm!");
+      report_fatal_error("JIT does not support inline asm!");
     }
     break;
   }
@@ -704,7 +707,7 @@ void ARMCodeEmitter::emitDataProcessingInstruction(const MachineInstr &MI,
   const TargetInstrDesc &TID = MI.getDesc();
 
   if (TID.Opcode == ARM::BFC) {
-    llvm_report_error("ARMv6t2 JIT is not yet supported.");
+    report_fatal_error("ARMv6t2 JIT is not yet supported.");
   }
 
   // Part of binary is determined by TableGn.
diff --git a/lib/Target/ARM/ARMConstantPoolValue.cpp b/lib/Target/ARM/ARMConstantPoolValue.cpp
index 90dd0c7fd962..f13ccc638448 100644
--- a/lib/Target/ARM/ARMConstantPoolValue.cpp
+++ b/lib/Target/ARM/ARMConstantPoolValue.cpp
@@ -21,7 +21,7 @@
 #include <cstdlib>
 using namespace llvm;
 
-ARMConstantPoolValue::ARMConstantPoolValue(Constant *cval, unsigned id,
+ARMConstantPoolValue::ARMConstantPoolValue(const Constant *cval, unsigned id,
                                            ARMCP::ARMCPKind K,
                                            unsigned char PCAdj,
                                            const char *Modif,
@@ -39,16 +39,17 @@ ARMConstantPoolValue::ARMConstantPoolValue(LLVMContext &C,
     CVal(NULL), S(strdup(s)), LabelId(id), Kind(ARMCP::CPExtSymbol),
     PCAdjust(PCAdj), Modifier(Modif), AddCurrentAddress(AddCA) {}
 
-ARMConstantPoolValue::ARMConstantPoolValue(GlobalValue *gv, const char *Modif)
+ARMConstantPoolValue::ARMConstantPoolValue(const GlobalValue *gv,
+                                           const char *Modif)
   : MachineConstantPoolValue((const Type*)Type::getInt32Ty(gv->getContext())),
     CVal(gv), S(NULL), LabelId(0), Kind(ARMCP::CPValue), PCAdjust(0),
     Modifier(Modif) {}
 
-GlobalValue *ARMConstantPoolValue::getGV() const {
+const GlobalValue *ARMConstantPoolValue::getGV() const {
   return dyn_cast_or_null<GlobalValue>(CVal);
 }
 
-BlockAddress *ARMConstantPoolValue::getBlockAddress() const {
+const BlockAddress *ARMConstantPoolValue::getBlockAddress() const {
   return dyn_cast_or_null<BlockAddress>(CVal);
 }
 
diff --git a/lib/Target/ARM/ARMConstantPoolValue.h b/lib/Target/ARM/ARMConstantPoolValue.h
index 741acde27b1d..6f4eddf7361d 100644
--- a/lib/Target/ARM/ARMConstantPoolValue.h
+++ b/lib/Target/ARM/ARMConstantPoolValue.h
@@ -36,7 +36,7 @@ namespace ARMCP {
 /// represent PC-relative displacement between the address of the load
 /// instruction and the constant being loaded, i.e. (&GV-(LPIC+8)).
 class ARMConstantPoolValue : public MachineConstantPoolValue {
-  Constant *CVal;          // Constant being loaded.
+  const Constant *CVal;    // Constant being loaded.
   const char *S;           // ExtSymbol being loaded.
   unsigned LabelId;        // Label id of the load.
   ARMCP::ARMCPKind Kind;   // Kind of constant.
@@ -46,20 +46,20 @@ class ARMConstantPoolValue : public MachineConstantPoolValue {
   bool AddCurrentAddress;
 
 public:
-  ARMConstantPoolValue(Constant *cval, unsigned id,
+  ARMConstantPoolValue(const Constant *cval, unsigned id,
                        ARMCP::ARMCPKind Kind = ARMCP::CPValue,
                        unsigned char PCAdj = 0, const char *Modifier = NULL,
                        bool AddCurrentAddress = false);
   ARMConstantPoolValue(LLVMContext &C, const char *s, unsigned id,
                        unsigned char PCAdj = 0, const char *Modifier = NULL,
                        bool AddCurrentAddress = false);
-  ARMConstantPoolValue(GlobalValue *GV, const char *Modifier);
+  ARMConstantPoolValue(const GlobalValue *GV, const char *Modifier);
   ARMConstantPoolValue();
   ~ARMConstantPoolValue();
 
-  GlobalValue *getGV() const;
+  const GlobalValue *getGV() const;
   const char *getSymbol() const { return S; }
-  BlockAddress *getBlockAddress() const;
+  const BlockAddress *getBlockAddress() const;
   const char *getModifier() const { return Modifier; }
   bool hasModifier() const { return Modifier != NULL; }
   bool mustAddCurrentAddress() const { return AddCurrentAddress; }
diff --git a/lib/Target/ARM/ARMExpandPseudoInsts.cpp b/lib/Target/ARM/ARMExpandPseudoInsts.cpp
index 1b8727d9848d..845d088f56c3 100644
--- a/lib/Target/ARM/ARMExpandPseudoInsts.cpp
+++ b/lib/Target/ARM/ARMExpandPseudoInsts.cpp
@@ -91,7 +91,7 @@ bool ARMExpandPseudo::ExpandMBB(MachineBasicBlock &MBB) {
           LO16 = LO16.addImm(Lo16);
           HI16 = HI16.addImm(Hi16);
         } else {
-          GlobalValue *GV = MO.getGlobal();
+          const GlobalValue *GV = MO.getGlobal();
           unsigned TF = MO.getTargetFlags();
           LO16 = LO16.addGlobalAddress(GV, MO.getOffset(), TF | ARMII::MO_LO16);
           HI16 = HI16.addGlobalAddress(GV, MO.getOffset(), TF | ARMII::MO_HI16);
diff --git a/lib/Target/ARM/ARMISelDAGToDAG.cpp b/lib/Target/ARM/ARMISelDAGToDAG.cpp
index 7d486631897a..36a1827ce6e8 100644
--- a/lib/Target/ARM/ARMISelDAGToDAG.cpp
+++ b/lib/Target/ARM/ARMISelDAGToDAG.cpp
@@ -13,7 +13,6 @@
 
 #include "ARM.h"
 #include "ARMAddressingModes.h"
-#include "ARMISelLowering.h"
 #include "ARMTargetMachine.h"
 #include "llvm/CallingConv.h"
 #include "llvm/Constants.h"
@@ -121,9 +120,6 @@ private:
   SDNode *SelectARMIndexedLoad(SDNode *N);
   SDNode *SelectT2IndexedLoad(SDNode *N);
 
-  /// SelectDYN_ALLOC - Select dynamic alloc for Thumb.
-  SDNode *SelectDYN_ALLOC(SDNode *N);
-
   /// SelectVLD - Select NEON load intrinsics.  NumVecs should be
   /// 1, 2, 3 or 4.  The opcode arrays specify the instructions used for
   /// loads of D registers and even subregs and odd subregs of Q registers.
@@ -146,7 +142,7 @@ private:
                           unsigned *QOpcodes1);
 
   /// SelectV6T2BitfieldExtractOp - Select SBFX/UBFX instructions for ARM.
-  SDNode *SelectV6T2BitfieldExtractOp(SDNode *N, unsigned Opc);
+  SDNode *SelectV6T2BitfieldExtractOp(SDNode *N, bool isSigned);
 
   /// SelectCMOVOp - Select CMOV instructions for ARM.
   SDNode *SelectCMOVOp(SDNode *N);
@@ -939,59 +935,6 @@ SDNode *ARMDAGToDAGISel::SelectT2IndexedLoad(SDNode *N) {
   return NULL;
 }
 
-SDNode *ARMDAGToDAGISel::SelectDYN_ALLOC(SDNode *N) {
-  DebugLoc dl = N->getDebugLoc();
-  EVT VT = N->getValueType(0);
-  SDValue Chain = N->getOperand(0);
-  SDValue Size = N->getOperand(1);
-  SDValue Align = N->getOperand(2);
-  SDValue SP = CurDAG->getRegister(ARM::SP, MVT::i32);
-  int32_t AlignVal = cast<ConstantSDNode>(Align)->getSExtValue();
-  if (AlignVal < 0)
-    // We need to align the stack. Use Thumb1 tAND which is the only thumb
-    // instruction that can read and write SP. This matches to a pseudo
-    // instruction that has a chain to ensure the result is written back to
-    // the stack pointer.
-    SP = SDValue(CurDAG->getMachineNode(ARM::tANDsp, dl, VT, SP, Align), 0);
-
-  bool isC = isa<ConstantSDNode>(Size);
-  uint32_t C = isC ? cast<ConstantSDNode>(Size)->getZExtValue() : ~0UL;
-  // Handle the most common case for both Thumb1 and Thumb2:
-  // tSUBspi - immediate is between 0 ... 508 inclusive.
-  if (C <= 508 && ((C & 3) == 0))
-    // FIXME: tSUBspi encode scale 4 implicitly.
-    return CurDAG->SelectNodeTo(N, ARM::tSUBspi_, VT, MVT::Other, SP,
-                                CurDAG->getTargetConstant(C/4, MVT::i32),
-                                Chain);
-
-  if (Subtarget->isThumb1Only()) {
-    // Use tADDspr since Thumb1 does not have a sub r, sp, r. ARMISelLowering
-    // should have negated the size operand already. FIXME: We can't insert
-    // new target independent node at this stage so we are forced to negate
-    // it earlier. Is there a better solution?
-    return CurDAG->SelectNodeTo(N, ARM::tADDspr_, VT, MVT::Other, SP, Size,
-                                Chain);
-  } else if (Subtarget->isThumb2()) {
-    if (isC && Predicate_t2_so_imm(Size.getNode())) {
-      // t2SUBrSPi
-      SDValue Ops[] = { SP, CurDAG->getTargetConstant(C, MVT::i32), Chain };
-      return CurDAG->SelectNodeTo(N, ARM::t2SUBrSPi_, VT, MVT::Other, Ops, 3);
-    } else if (isC && Predicate_imm0_4095(Size.getNode())) {
-      // t2SUBrSPi12
-      SDValue Ops[] = { SP, CurDAG->getTargetConstant(C, MVT::i32), Chain };
-      return CurDAG->SelectNodeTo(N, ARM::t2SUBrSPi12_, VT, MVT::Other, Ops, 3);
-    } else {
-      // t2SUBrSPs
-      SDValue Ops[] = { SP, Size,
-                        getI32Imm(ARM_AM::getSORegOpc(ARM_AM::lsl,0)), Chain };
-      return CurDAG->SelectNodeTo(N, ARM::t2SUBrSPs_, VT, MVT::Other, Ops, 4);
-    }
-  }
-
-  // FIXME: Add ADD / SUB sp instructions for ARM.
-  return 0;
-}
-
 /// PairDRegs - Insert a pair of double registers into an implicit def to
 /// form a quad register.
 SDNode *ARMDAGToDAGISel::PairDRegs(EVT VT, SDValue V0, SDValue V1) {
@@ -1052,7 +995,7 @@ SDNode *ARMDAGToDAGISel::SelectVLD(SDNode *N, unsigned NumVecs,
     break;
   }
 
-  SDValue Pred = CurDAG->getTargetConstant(14, MVT::i32);
+  SDValue Pred = getAL(CurDAG);
   SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
   if (is64BitVector) {
     unsigned Opc = DOpcodes[OpcodeIndex];
@@ -1142,7 +1085,7 @@ SDNode *ARMDAGToDAGISel::SelectVST(SDNode *N, unsigned NumVecs,
     break;
   }
 
-  SDValue Pred = CurDAG->getTargetConstant(14, MVT::i32);
+  SDValue Pred = getAL(CurDAG);
   SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
 
   SmallVector<SDValue, 10> Ops;
@@ -1249,7 +1192,7 @@ SDNode *ARMDAGToDAGISel::SelectVLDSTLane(SDNode *N, bool IsLoad,
   case MVT::v4i32: OpcodeIndex = 1; break;
   }
 
-  SDValue Pred = CurDAG->getTargetConstant(14, MVT::i32);
+  SDValue Pred = getAL(CurDAG);
   SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
 
   SmallVector<SDValue, 10> Ops;
@@ -1305,10 +1248,42 @@ SDNode *ARMDAGToDAGISel::SelectVLDSTLane(SDNode *N, bool IsLoad,
 }
 
 SDNode *ARMDAGToDAGISel::SelectV6T2BitfieldExtractOp(SDNode *N,
-                                                     unsigned Opc) {
+                                                     bool isSigned) {
   if (!Subtarget->hasV6T2Ops())
     return NULL;
 
+  unsigned Opc = isSigned ? (Subtarget->isThumb() ? ARM::t2SBFX : ARM::SBFX)
+    : (Subtarget->isThumb() ? ARM::t2UBFX : ARM::UBFX);
+
+
+  // For unsigned extracts, check for a shift right and mask
+  unsigned And_imm = 0;
+  if (N->getOpcode() == ISD::AND) {
+    if (isOpcWithIntImmediate(N, ISD::AND, And_imm)) {
+
+      // The immediate is a mask of the low bits iff imm & (imm+1) == 0
+      if (And_imm & (And_imm + 1))
+        return NULL;
+
+      unsigned Srl_imm = 0;
+      if (isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::SRL,
+                                Srl_imm)) {
+        assert(Srl_imm > 0 && Srl_imm < 32 && "bad amount in shift node!");
+
+        unsigned Width = CountTrailingOnes_32(And_imm);
+        unsigned LSB = Srl_imm;
+        SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
+        SDValue Ops[] = { N->getOperand(0).getOperand(0),
+                          CurDAG->getTargetConstant(LSB, MVT::i32),
+                          CurDAG->getTargetConstant(Width, MVT::i32),
+          getAL(CurDAG), Reg0 };
+        return CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops, 5);
+      }
+    }
+    return NULL;
+  }
+
+  // Otherwise, we're looking for a shift of a shift
   unsigned Shl_imm = 0;
   if (isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::SHL, Shl_imm)) {
     assert(Shl_imm > 0 && Shl_imm < 32 && "bad amount in shift node!");
@@ -1531,7 +1506,7 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) {
 
       SDNode *ResNode;
       if (Subtarget->isThumb1Only()) {
-        SDValue Pred = CurDAG->getTargetConstant(14, MVT::i32);
+        SDValue Pred = getAL(CurDAG);
         SDValue PredReg = CurDAG->getRegister(0, MVT::i32);
         SDValue Ops[] = { CPIdx, Pred, PredReg, CurDAG->getEntryNode() };
         ResNode = CurDAG->getMachineNode(ARM::tLDRcp, dl, MVT::i32, MVT::Other,
@@ -1571,16 +1546,12 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) {
       return CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops, 5);
     }
   }
-  case ARMISD::DYN_ALLOC:
-    return SelectDYN_ALLOC(N);
   case ISD::SRL:
-    if (SDNode *I = SelectV6T2BitfieldExtractOp(N,
-                      Subtarget->isThumb() ? ARM::t2UBFX : ARM::UBFX))
+    if (SDNode *I = SelectV6T2BitfieldExtractOp(N, false))
       return I;
     break;
   case ISD::SRA:
-    if (SDNode *I = SelectV6T2BitfieldExtractOp(N,
-                      Subtarget->isThumb() ? ARM::t2SBFX : ARM::SBFX))
+    if (SDNode *I = SelectV6T2BitfieldExtractOp(N, true))
       return I;
     break;
   case ISD::MUL:
@@ -1624,6 +1595,10 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) {
     }
     break;
   case ISD::AND: {
+    // Check for unsigned bitfield extract
+    if (SDNode *I = SelectV6T2BitfieldExtractOp(N, false))
+      return I;
+
     // (and (or x, c2), c1) and top 16-bits of c1 and c2 match, lower 16-bits
     // of c1 are 0xffff, and lower 16-bit of c2 are 0. That is, the top 16-bits
     // are entirely contributed by c2 and lower 16-bits are entirely contributed
@@ -1708,7 +1683,7 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) {
       SDValue Chain = N->getOperand(0);
       SDValue AM5Opc =
         CurDAG->getTargetConstant(ARM_AM::getAM5Opc(ARM_AM::ia, 4), MVT::i32);
-      SDValue Pred = CurDAG->getTargetConstant(14, MVT::i32);
+      SDValue Pred = getAL(CurDAG);
       SDValue PredReg = CurDAG->getRegister(0, MVT::i32);
       SDValue Ops[] = { N->getOperand(1), AM5Opc, Pred, PredReg, Chain };
       return CurDAG->getMachineNode(ARM::VLDMQ, dl, MVT::v2f64, MVT::Other,
@@ -1724,7 +1699,7 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) {
       SDValue Chain = N->getOperand(0);
       SDValue AM5Opc =
         CurDAG->getTargetConstant(ARM_AM::getAM5Opc(ARM_AM::ia, 4), MVT::i32);
-      SDValue Pred = CurDAG->getTargetConstant(14, MVT::i32);
+      SDValue Pred = getAL(CurDAG);
       SDValue PredReg = CurDAG->getRegister(0, MVT::i32);
       SDValue Ops[] = { N->getOperand(1), N->getOperand(2),
                         AM5Opc, Pred, PredReg, Chain };
@@ -1816,7 +1791,7 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) {
     case MVT::v4f32:
     case MVT::v4i32: Opc = ARM::VZIPq32; break;
     }
-    SDValue Pred = CurDAG->getTargetConstant(14, MVT::i32);
+    SDValue Pred = getAL(CurDAG);
     SDValue PredReg = CurDAG->getRegister(0, MVT::i32);
     SDValue Ops[] = { N->getOperand(0), N->getOperand(1), Pred, PredReg };
     return CurDAG->getMachineNode(Opc, dl, VT, VT, Ops, 4);
@@ -1835,7 +1810,7 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) {
     case MVT::v4f32:
     case MVT::v4i32: Opc = ARM::VUZPq32; break;
     }
-    SDValue Pred = CurDAG->getTargetConstant(14, MVT::i32);
+    SDValue Pred = getAL(CurDAG);
     SDValue PredReg = CurDAG->getRegister(0, MVT::i32);
     SDValue Ops[] = { N->getOperand(0), N->getOperand(1), Pred, PredReg };
     return CurDAG->getMachineNode(Opc, dl, VT, VT, Ops, 4);
@@ -1854,7 +1829,7 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) {
     case MVT::v4f32:
     case MVT::v4i32: Opc = ARM::VTRNq32; break;
     }
-    SDValue Pred = CurDAG->getTargetConstant(14, MVT::i32);
+    SDValue Pred = getAL(CurDAG);
     SDValue PredReg = CurDAG->getRegister(0, MVT::i32);
     SDValue Ops[] = { N->getOperand(0), N->getOperand(1), Pred, PredReg };
     return CurDAG->getMachineNode(Opc, dl, VT, VT, Ops, 4);
diff --git a/lib/Target/ARM/ARMISelLowering.cpp b/lib/Target/ARM/ARMISelLowering.cpp
index 77fb0c3cdbd0..d3842a69d833 100644
--- a/lib/Target/ARM/ARMISelLowering.cpp
+++ b/lib/Target/ARM/ARMISelLowering.cpp
@@ -40,12 +40,18 @@
 #include "llvm/MC/MCSectionMachO.h"
 #include "llvm/Target/TargetOptions.h"
 #include "llvm/ADT/VectorExtras.h"
+#include "llvm/Support/CommandLine.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/MathExtras.h"
 #include "llvm/Support/raw_ostream.h"
 #include <sstream>
 using namespace llvm;
 
+static cl::opt<bool>
+EnableARMLongCalls("arm-long-calls", cl::Hidden,
+  cl::desc("Generate calls via indirect call instructions."),
+  cl::init(false));
+
 static bool CC_ARM_APCS_Custom_f64(unsigned &ValNo, EVT &ValVT, EVT &LocVT,
                                    CCValAssign::LocInfo &LocInfo,
                                    ISD::ArgFlagsTy &ArgFlags,
@@ -90,6 +96,8 @@ void ARMTargetLowering::addTypeForNEON(EVT VT, EVT PromotedLdStVT,
   setOperationAction(ISD::VECTOR_SHUFFLE, VT.getSimpleVT(), Custom);
   setOperationAction(ISD::CONCAT_VECTORS, VT.getSimpleVT(), Custom);
   setOperationAction(ISD::EXTRACT_SUBVECTOR, VT.getSimpleVT(), Expand);
+  setOperationAction(ISD::SELECT, VT.getSimpleVT(), Expand);
+  setOperationAction(ISD::SELECT_CC, VT.getSimpleVT(), Expand);
   if (VT.isInteger()) {
     setOperationAction(ISD::SHL, VT.getSimpleVT(), Custom);
     setOperationAction(ISD::SRA, VT.getSimpleVT(), Custom);
@@ -376,10 +384,7 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM)
   // FIXME: Shouldn't need this, since no register is used, but the legalizer
   // doesn't yet know how to not do that for SjLj.
   setExceptionSelectorRegister(ARM::R0);
-  if (Subtarget->isThumb())
-    setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32, Custom);
-  else
-    setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32, Expand);
+  setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32, Expand);
   setOperationAction(ISD::MEMBARRIER,         MVT::Other, Custom);
 
   if (!Subtarget->hasV6Ops() && !Subtarget->isThumb2()) {
@@ -783,7 +788,7 @@ ARMTargetLowering::LowerCallResult(SDValue Chain, SDValue InFlag,
                                    CallingConv::ID CallConv, bool isVarArg,
                                    const SmallVectorImpl<ISD::InputArg> &Ins,
                                    DebugLoc dl, SelectionDAG &DAG,
-                                   SmallVectorImpl<SDValue> &InVals) {
+                                   SmallVectorImpl<SDValue> &InVals) const {
 
   // Assign locations to each value returned by this call.
   SmallVector<CCValAssign, 16> RVLocs;
@@ -871,7 +876,7 @@ ARMTargetLowering::LowerMemOpCallTo(SDValue Chain,
                                     SDValue StackPtr, SDValue Arg,
                                     DebugLoc dl, SelectionDAG &DAG,
                                     const CCValAssign &VA,
-                                    ISD::ArgFlagsTy Flags) {
+                                    ISD::ArgFlagsTy Flags) const {
   unsigned LocMemOffset = VA.getLocMemOffset();
   SDValue PtrOff = DAG.getIntPtrConstant(LocMemOffset);
   PtrOff = DAG.getNode(ISD::ADD, dl, getPointerTy(), StackPtr, PtrOff);
@@ -889,7 +894,7 @@ void ARMTargetLowering::PassF64ArgInRegs(DebugLoc dl, SelectionDAG &DAG,
                                          CCValAssign &VA, CCValAssign &NextVA,
                                          SDValue &StackPtr,
                                          SmallVector<SDValue, 8> &MemOpChains,
-                                         ISD::ArgFlagsTy Flags) {
+                                         ISD::ArgFlagsTy Flags) const {
 
   SDValue fmrrd = DAG.getNode(ARMISD::VMOVRRD, dl,
                               DAG.getVTList(MVT::i32, MVT::i32), Arg);
@@ -918,7 +923,7 @@ ARMTargetLowering::LowerCall(SDValue Chain, SDValue Callee,
                              const SmallVectorImpl<ISD::OutputArg> &Outs,
                              const SmallVectorImpl<ISD::InputArg> &Ins,
                              DebugLoc dl, SelectionDAG &DAG,
-                             SmallVectorImpl<SDValue> &InVals) {
+                             SmallVectorImpl<SDValue> &InVals) const {
   // ARM target does not yet support tail call optimization.
   isTailCall = false;
 
@@ -1025,8 +1030,44 @@ ARMTargetLowering::LowerCall(SDValue Chain, SDValue Callee,
   bool isLocalARMFunc = false;
   MachineFunction &MF = DAG.getMachineFunction();
   ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
-  if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
-    GlobalValue *GV = G->getGlobal();
+
+  if (EnableARMLongCalls) {
+    assert (getTargetMachine().getRelocationModel() == Reloc::Static
+            && "long-calls with non-static relocation model!");
+    // Handle a global address or an external symbol. If it's not one of
+    // those, the target's already in a register, so we don't need to do
+    // anything extra.
+    if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
+      const GlobalValue *GV = G->getGlobal();
+      // Create a constant pool entry for the callee address
+      unsigned ARMPCLabelIndex = AFI->createConstPoolEntryUId();
+      ARMConstantPoolValue *CPV = new ARMConstantPoolValue(GV,
+                                                           ARMPCLabelIndex,
+                                                           ARMCP::CPValue, 0);
+      // Get the address of the callee into a register
+      SDValue CPAddr = DAG.getTargetConstantPool(CPV, getPointerTy(), 4);
+      CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
+      Callee = DAG.getLoad(getPointerTy(), dl,
+                           DAG.getEntryNode(), CPAddr,
+                           PseudoSourceValue::getConstantPool(), 0,
+                           false, false, 0);
+    } else if (ExternalSymbolSDNode *S=dyn_cast<ExternalSymbolSDNode>(Callee)) {
+      const char *Sym = S->getSymbol();
+
+      // Create a constant pool entry for the callee address
+      unsigned ARMPCLabelIndex = AFI->createConstPoolEntryUId();
+      ARMConstantPoolValue *CPV = new ARMConstantPoolValue(*DAG.getContext(),
+                                                       Sym, ARMPCLabelIndex, 0);
+      // Get the address of the callee into a register
+      SDValue CPAddr = DAG.getTargetConstantPool(CPV, getPointerTy(), 4);
+      CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
+      Callee = DAG.getLoad(getPointerTy(), dl,
+                           DAG.getEntryNode(), CPAddr,
+                           PseudoSourceValue::getConstantPool(), 0,
+                           false, false, 0);
+    }
+  } else if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
+    const GlobalValue *GV = G->getGlobal();
     isDirect = true;
     bool isExt = GV->isDeclaration() || GV->isWeakForLinker();
     bool isStub = (isExt && Subtarget->isTargetDarwin()) &&
@@ -1049,7 +1090,7 @@ ARMTargetLowering::LowerCall(SDValue Chain, SDValue Callee,
       SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, MVT::i32);
       Callee = DAG.getNode(ARMISD::PIC_ADD, dl,
                            getPointerTy(), Callee, PICLabel);
-   } else
+    } else
       Callee = DAG.getTargetGlobalAddress(GV, getPointerTy());
   } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) {
     isDirect = true;
@@ -1125,7 +1166,7 @@ SDValue
 ARMTargetLowering::LowerReturn(SDValue Chain,
                                CallingConv::ID CallConv, bool isVarArg,
                                const SmallVectorImpl<ISD::OutputArg> &Outs,
-                               DebugLoc dl, SelectionDAG &DAG) {
+                               DebugLoc dl, SelectionDAG &DAG) const {
 
   // CCValAssign - represent the assignment of the return value to a location.
   SmallVector<CCValAssign, 16> RVLocs;
@@ -1232,13 +1273,14 @@ static SDValue LowerConstantPool(SDValue Op, SelectionDAG &DAG) {
   return DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, Res);
 }
 
-SDValue ARMTargetLowering::LowerBlockAddress(SDValue Op, SelectionDAG &DAG) {
+SDValue ARMTargetLowering::LowerBlockAddress(SDValue Op,
+                                             SelectionDAG &DAG) const {
   MachineFunction &MF = DAG.getMachineFunction();
   ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
   unsigned ARMPCLabelIndex = 0;
   DebugLoc DL = Op.getDebugLoc();
   EVT PtrVT = getPointerTy();
-  BlockAddress *BA = cast<BlockAddressSDNode>(Op)->getBlockAddress();
+  const BlockAddress *BA = cast<BlockAddressSDNode>(Op)->getBlockAddress();
   Reloc::Model RelocM = getTargetMachine().getRelocationModel();
   SDValue CPAddr;
   if (RelocM == Reloc::Static) {
@@ -1264,7 +1306,7 @@ SDValue ARMTargetLowering::LowerBlockAddress(SDValue Op, SelectionDAG &DAG) {
 // Lower ISD::GlobalTLSAddress using the "general dynamic" model
 SDValue
 ARMTargetLowering::LowerToTLSGeneralDynamicModel(GlobalAddressSDNode *GA,
-                                                 SelectionDAG &DAG) {
+                                                 SelectionDAG &DAG) const {
   DebugLoc dl = GA->getDebugLoc();
   EVT PtrVT = getPointerTy();
   unsigned char PCAdj = Subtarget->isThumb() ? 4 : 8;
@@ -1303,8 +1345,8 @@ ARMTargetLowering::LowerToTLSGeneralDynamicModel(GlobalAddressSDNode *GA,
 // "local exec" model.
 SDValue
 ARMTargetLowering::LowerToTLSExecModels(GlobalAddressSDNode *GA,
-                                        SelectionDAG &DAG) {
-  GlobalValue *GV = GA->getGlobal();
+                                        SelectionDAG &DAG) const {
+  const GlobalValue *GV = GA->getGlobal();
   DebugLoc dl = GA->getDebugLoc();
   SDValue Offset;
   SDValue Chain = DAG.getEntryNode();
@@ -1350,7 +1392,7 @@ ARMTargetLowering::LowerToTLSExecModels(GlobalAddressSDNode *GA,
 }
 
 SDValue
-ARMTargetLowering::LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) {
+ARMTargetLowering::LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const {
   // TODO: implement the "local dynamic" model
   assert(Subtarget->isTargetELF() &&
          "TLS not implemented for non-ELF targets");
@@ -1364,10 +1406,10 @@ ARMTargetLowering::LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) {
 }
 
 SDValue ARMTargetLowering::LowerGlobalAddressELF(SDValue Op,
-                                                 SelectionDAG &DAG) {
+                                                 SelectionDAG &DAG) const {
   EVT PtrVT = getPointerTy();
   DebugLoc dl = Op.getDebugLoc();
-  GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal();
+  const GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal();
   Reloc::Model RelocM = getTargetMachine().getRelocationModel();
   if (RelocM == Reloc::PIC_) {
     bool UseGOTOFF = GV->hasLocalLinkage() || GV->hasHiddenVisibility();
@@ -1404,13 +1446,13 @@ SDValue ARMTargetLowering::LowerGlobalAddressELF(SDValue Op,
 }
 
 SDValue ARMTargetLowering::LowerGlobalAddressDarwin(SDValue Op,
-                                                    SelectionDAG &DAG) {
+                                                    SelectionDAG &DAG) const {
   MachineFunction &MF = DAG.getMachineFunction();
   ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
   unsigned ARMPCLabelIndex = 0;
   EVT PtrVT = getPointerTy();
   DebugLoc dl = Op.getDebugLoc();
-  GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal();
+  const GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal();
   Reloc::Model RelocM = getTargetMachine().getRelocationModel();
   SDValue CPAddr;
   if (RelocM == Reloc::Static)
@@ -1443,7 +1485,7 @@ SDValue ARMTargetLowering::LowerGlobalAddressDarwin(SDValue Op,
 }
 
 SDValue ARMTargetLowering::LowerGLOBAL_OFFSET_TABLE(SDValue Op,
-                                                    SelectionDAG &DAG){
+                                                    SelectionDAG &DAG) const {
   assert(Subtarget->isTargetELF() &&
          "GLOBAL OFFSET TABLE not implemented for non-ELF targets");
   MachineFunction &MF = DAG.getMachineFunction();
@@ -1466,7 +1508,8 @@ SDValue ARMTargetLowering::LowerGLOBAL_OFFSET_TABLE(SDValue Op,
 
 SDValue
 ARMTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG,
-                                           const ARMSubtarget *Subtarget) {
+                                           const ARMSubtarget *Subtarget)
+                                             const {
   unsigned IntNo = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
   DebugLoc dl = Op.getDebugLoc();
   switch (IntNo) {
@@ -1533,20 +1576,23 @@ static SDValue LowerMEMBARRIER(SDValue Op, SelectionDAG &DAG,
   return Res;
 }
 
-static SDValue LowerVASTART(SDValue Op, SelectionDAG &DAG,
-                            unsigned VarArgsFrameIndex) {
+static SDValue LowerVASTART(SDValue Op, SelectionDAG &DAG) {
+  MachineFunction &MF = DAG.getMachineFunction();
+  ARMFunctionInfo *FuncInfo = MF.getInfo<ARMFunctionInfo>();
+
   // vastart just stores the address of the VarArgsFrameIndex slot into the
   // memory location argument.
   DebugLoc dl = Op.getDebugLoc();
   EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
-  SDValue FR = DAG.getFrameIndex(VarArgsFrameIndex, PtrVT);
+  SDValue FR = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT);
   const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
   return DAG.getStore(Op.getOperand(0), dl, FR, Op.getOperand(1), SV, 0,
                       false, false, 0);
 }
 
 SDValue
-ARMTargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) {
+ARMTargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op,
+                                           SelectionDAG &DAG) const {
   SDNode *Node = Op.getNode();
   DebugLoc dl = Node->getDebugLoc();
   EVT VT = Node->getValueType(0);
@@ -1595,7 +1641,7 @@ ARMTargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) {
 SDValue
 ARMTargetLowering::GetF64FormalArgument(CCValAssign &VA, CCValAssign &NextVA,
                                         SDValue &Root, SelectionDAG &DAG,
-                                        DebugLoc dl) {
+                                        DebugLoc dl) const {
   MachineFunction &MF = DAG.getMachineFunction();
   ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
 
@@ -1611,10 +1657,8 @@ ARMTargetLowering::GetF64FormalArgument(CCValAssign &VA, CCValAssign &NextVA,
 
   SDValue ArgValue2;
   if (NextVA.isMemLoc()) {
-    unsigned ArgSize = NextVA.getLocVT().getSizeInBits()/8;
     MachineFrameInfo *MFI = MF.getFrameInfo();
-    int FI = MFI->CreateFixedObject(ArgSize, NextVA.getLocMemOffset(),
-                                    true, false);
+    int FI = MFI->CreateFixedObject(4, NextVA.getLocMemOffset(), true, false);
 
     // Create load node to retrieve arguments from the stack.
     SDValue FIN = DAG.getFrameIndex(FI, getPointerTy());
@@ -1635,7 +1679,8 @@ ARMTargetLowering::LowerFormalArguments(SDValue Chain,
                                         const SmallVectorImpl<ISD::InputArg>
                                           &Ins,
                                         DebugLoc dl, SelectionDAG &DAG,
-                                        SmallVectorImpl<SDValue> &InVals) {
+                                        SmallVectorImpl<SDValue> &InVals)
+                                          const {
 
   MachineFunction &MF = DAG.getMachineFunction();
   MachineFrameInfo *MFI = MF.getFrameInfo();
@@ -1663,14 +1708,22 @@ ARMTargetLowering::LowerFormalArguments(SDValue Chain,
       if (VA.needsCustom()) {
         // f64 and vector types are split up into multiple registers or
         // combinations of registers and stack slots.
-        RegVT = MVT::i32;
-
         if (VA.getLocVT() == MVT::v2f64) {
           SDValue ArgValue1 = GetF64FormalArgument(VA, ArgLocs[++i],
                                                    Chain, DAG, dl);
           VA = ArgLocs[++i]; // skip ahead to next loc
-          SDValue ArgValue2 = GetF64FormalArgument(VA, ArgLocs[++i],
-                                                   Chain, DAG, dl);
+          SDValue ArgValue2;
+          if (VA.isMemLoc()) {
+            int FI = MFI->CreateFixedObject(8, VA.getLocMemOffset(),
+                                            true, false);
+            SDValue FIN = DAG.getFrameIndex(FI, getPointerTy());
+            ArgValue2 = DAG.getLoad(MVT::f64, dl, Chain, FIN,
+                                    PseudoSourceValue::getFixedStack(FI), 0,
+                                    false, false, 0);
+          } else {
+            ArgValue2 = GetF64FormalArgument(VA, ArgLocs[++i],
+                                             Chain, DAG, dl);
+          }
           ArgValue = DAG.getNode(ISD::UNDEF, dl, MVT::v2f64);
           ArgValue = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v2f64,
                                  ArgValue, ArgValue1, DAG.getIntPtrConstant(0));
@@ -1758,10 +1811,12 @@ ARMTargetLowering::LowerFormalArguments(SDValue Chain,
       // to their spots on the stack so that they may be loaded by deferencing
       // the result of va_next.
       AFI->setVarArgsRegSaveSize(VARegSaveSize);
-      VarArgsFrameIndex = MFI->CreateFixedObject(VARegSaveSize, ArgOffset +
-                                                 VARegSaveSize - VARegSize,
-                                                 true, false);
-      SDValue FIN = DAG.getFrameIndex(VarArgsFrameIndex, getPointerTy());
+      AFI->setVarArgsFrameIndex(
+        MFI->CreateFixedObject(VARegSaveSize,
+                               ArgOffset + VARegSaveSize - VARegSize,
+                               true, false));
+      SDValue FIN = DAG.getFrameIndex(AFI->getVarArgsFrameIndex(),
+                                      getPointerTy());
 
       SmallVector<SDValue, 4> MemOps;
       for (; NumGPRs < 4; ++NumGPRs) {
@@ -1773,9 +1828,10 @@ ARMTargetLowering::LowerFormalArguments(SDValue Chain,
 
         unsigned VReg = MF.addLiveIn(GPRArgRegs[NumGPRs], RC);
         SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i32);
-        SDValue Store = DAG.getStore(Val.getValue(1), dl, Val, FIN,
-                                     PseudoSourceValue::getFixedStack(VarArgsFrameIndex), 0,
-                                     false, false, 0);
+        SDValue Store =
+          DAG.getStore(Val.getValue(1), dl, Val, FIN,
+                       PseudoSourceValue::getFixedStack(AFI->getVarArgsFrameIndex()), 0,
+                       false, false, 0);
         MemOps.push_back(Store);
         FIN = DAG.getNode(ISD::ADD, dl, getPointerTy(), FIN,
                           DAG.getConstant(4, getPointerTy()));
@@ -1785,7 +1841,8 @@ ARMTargetLowering::LowerFormalArguments(SDValue Chain,
                             &MemOps[0], MemOps.size());
     } else
       // This will point to the next argument passed via stack.
-      VarArgsFrameIndex = MFI->CreateFixedObject(4, ArgOffset, true, false);
+      AFI->setVarArgsFrameIndex(MFI->CreateFixedObject(4, ArgOffset,
+                                                       true, false));
   }
 
   return Chain;
@@ -1800,7 +1857,7 @@ static bool isFloatingPointZero(SDValue Op) {
     if (Op.getOperand(1).getOpcode() == ARMISD::Wrapper) {
       SDValue WrapperOp = Op.getOperand(1).getOperand(0);
       if (ConstantPoolSDNode *CP = dyn_cast<ConstantPoolSDNode>(WrapperOp))
-        if (ConstantFP *CFP = dyn_cast<ConstantFP>(CP->getConstVal()))
+        if (const ConstantFP *CFP = dyn_cast<ConstantFP>(CP->getConstVal()))
           return CFP->getValueAPF().isPosZero();
     }
   }
@@ -1811,7 +1868,8 @@ static bool isFloatingPointZero(SDValue Op) {
 /// the given operands.
 SDValue
 ARMTargetLowering::getARMCmp(SDValue LHS, SDValue RHS, ISD::CondCode CC,
-                             SDValue &ARMCC, SelectionDAG &DAG, DebugLoc dl) {
+                             SDValue &ARMCC, SelectionDAG &DAG,
+                             DebugLoc dl) const {
   if (ConstantSDNode *RHSC = dyn_cast<ConstantSDNode>(RHS.getNode())) {
     unsigned C = RHSC->getZExtValue();
     if (!isLegalICmpImmediate(C)) {
@@ -1877,7 +1935,7 @@ static SDValue getVFPCmp(SDValue LHS, SDValue RHS, SelectionDAG &DAG,
   return DAG.getNode(ARMISD::FMSTAT, dl, MVT::Flag, Cmp);
 }
 
-SDValue ARMTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) {
+SDValue ARMTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const {
   EVT VT = Op.getValueType();
   SDValue LHS = Op.getOperand(0);
   SDValue RHS = Op.getOperand(1);
@@ -1911,7 +1969,7 @@ SDValue ARMTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) {
   return Result;
 }
 
-SDValue ARMTargetLowering::LowerBR_CC(SDValue Op, SelectionDAG &DAG) {
+SDValue ARMTargetLowering::LowerBR_CC(SDValue Op, SelectionDAG &DAG) const {
   SDValue  Chain = Op.getOperand(0);
   ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(1))->get();
   SDValue    LHS = Op.getOperand(2);
@@ -1945,7 +2003,7 @@ SDValue ARMTargetLowering::LowerBR_CC(SDValue Op, SelectionDAG &DAG) {
   return Res;
 }
 
-SDValue ARMTargetLowering::LowerBR_JT(SDValue Op, SelectionDAG &DAG) {
+SDValue ARMTargetLowering::LowerBR_JT(SDValue Op, SelectionDAG &DAG) const {
   SDValue Chain = Op.getOperand(0);
   SDValue Table = Op.getOperand(1);
   SDValue Index = Op.getOperand(2);
@@ -2034,7 +2092,7 @@ static SDValue LowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG) {
   return DAG.getNode(ARMISD::CNEG, dl, VT, AbsVal, AbsVal, ARMCC, CCR, Cmp);
 }
 
-SDValue ARMTargetLowering::LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) {
+SDValue ARMTargetLowering::LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const {
   MachineFrameInfo *MFI = DAG.getMachineFunction().getFrameInfo();
   MFI->setFrameAddressIsTaken(true);
   EVT VT = Op.getValueType();
@@ -2055,8 +2113,10 @@ ARMTargetLowering::EmitTargetCodeForMemcpy(SelectionDAG &DAG, DebugLoc dl,
                                            SDValue Dst, SDValue Src,
                                            SDValue Size, unsigned Align,
                                            bool isVolatile, bool AlwaysInline,
-                                         const Value *DstSV, uint64_t DstSVOff,
-                                         const Value *SrcSV, uint64_t SrcSVOff){
+                                           const Value *DstSV,
+                                           uint64_t DstSVOff,
+                                           const Value *SrcSV,
+                                           uint64_t SrcSVOff) const {
   // Do repeated 4-byte loads and stores. To be improved.
   // This requires 4-byte alignment.
   if ((Align & 3) != 0)
@@ -2157,11 +2217,25 @@ ARMTargetLowering::EmitTargetCodeForMemcpy(SelectionDAG &DAG, DebugLoc dl,
   return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, &TFOps[0], i);
 }
 
+/// ExpandBIT_CONVERT - If the target supports VFP, this function is called to
+/// expand a bit convert where either the source or destination type is i64 to
+/// use a VMOVDRR or VMOVRRD node.  This should not be done when the non-i64
+/// operand type is illegal (e.g., v2f32 for a target that doesn't support
+/// vectors), since the legalizer won't know what to do with that.
 static SDValue ExpandBIT_CONVERT(SDNode *N, SelectionDAG &DAG) {
-  SDValue Op = N->getOperand(0);
+  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
   DebugLoc dl = N->getDebugLoc();
-  if (N->getValueType(0) == MVT::f64) {
-    // Turn i64->f64 into VMOVDRR.
+  SDValue Op = N->getOperand(0);
+
+  // This function is only supposed to be called for i64 types, either as the
+  // source or destination of the bit convert.
+  EVT SrcVT = Op.getValueType();
+  EVT DstVT = N->getValueType(0);
+  assert((SrcVT == MVT::i64 || DstVT == MVT::i64) &&
+         "ExpandBIT_CONVERT called for non-i64 type");
+
+  // Turn i64->f64 into VMOVDRR.
+  if (SrcVT == MVT::i64 && TLI.isTypeLegal(DstVT)) {
     SDValue Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, Op,
                              DAG.getConstant(0, MVT::i32));
     SDValue Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, Op,
@@ -2170,11 +2244,14 @@ static SDValue ExpandBIT_CONVERT(SDNode *N, SelectionDAG &DAG) {
   }
 
   // Turn f64->i64 into VMOVRRD.
-  SDValue Cvt = DAG.getNode(ARMISD::VMOVRRD, dl,
-                            DAG.getVTList(MVT::i32, MVT::i32), &Op, 1);
+  if (DstVT == MVT::i64 && TLI.isTypeLegal(SrcVT)) {
+    SDValue Cvt = DAG.getNode(ARMISD::VMOVRRD, dl,
+                              DAG.getVTList(MVT::i32, MVT::i32), &Op, 1);
+    // Merge the pieces into a single i64 value.
+    return DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, Cvt, Cvt.getValue(1));
+  }
 
-  // Merge the pieces into a single i64 value.
-  return DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, Cvt, Cvt.getValue(1));
+  return SDValue();
 }
 
 /// getZeroVector - Returns a vector of specified type with all zero elements.
@@ -2227,7 +2304,8 @@ static SDValue getOnesVector(EVT VT, SelectionDAG &DAG, DebugLoc dl) {
 
 /// LowerShiftRightParts - Lower SRA_PARTS, which returns two
 /// i32 values and take a 2 x i32 value to shift plus a shift amount.
-SDValue ARMTargetLowering::LowerShiftRightParts(SDValue Op, SelectionDAG &DAG) {
+SDValue ARMTargetLowering::LowerShiftRightParts(SDValue Op,
+                                                SelectionDAG &DAG) const {
   assert(Op.getNumOperands() == 3 && "Not a double-shift!");
   EVT VT = Op.getValueType();
   unsigned VTBits = VT.getSizeInBits();
@@ -2262,7 +2340,8 @@ SDValue ARMTargetLowering::LowerShiftRightParts(SDValue Op, SelectionDAG &DAG) {
 
 /// LowerShiftLeftParts - Lower SHL_PARTS, which returns two
 /// i32 values and take a 2 x i32 value to shift plus a shift amount.
-SDValue ARMTargetLowering::LowerShiftLeftParts(SDValue Op, SelectionDAG &DAG) {
+SDValue ARMTargetLowering::LowerShiftLeftParts(SDValue Op,
+                                               SelectionDAG &DAG) const {
   assert(Op.getNumOperands() == 3 && "Not a double-shift!");
   EVT VT = Op.getValueType();
   unsigned VTBits = VT.getSizeInBits();
@@ -3059,7 +3138,7 @@ static SDValue LowerCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG) {
   return DAG.getNode(ISD::BIT_CONVERT, dl, Op.getValueType(), Val);
 }
 
-SDValue ARMTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) {
+SDValue ARMTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
   switch (Op.getOpcode()) {
   default: llvm_unreachable("Don't know how to custom lower this!");
   case ISD::ConstantPool:  return LowerConstantPool(Op, DAG);
@@ -3072,7 +3151,7 @@ SDValue ARMTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) {
   case ISD::BR_CC:         return LowerBR_CC(Op, DAG);
   case ISD::BR_JT:         return LowerBR_JT(Op, DAG);
   case ISD::DYNAMIC_STACKALLOC: return LowerDYNAMIC_STACKALLOC(Op, DAG);
-  case ISD::VASTART:       return LowerVASTART(Op, DAG, VarArgsFrameIndex);
+  case ISD::VASTART:       return LowerVASTART(Op, DAG);
   case ISD::MEMBARRIER:    return LowerMEMBARRIER(Op, DAG, Subtarget);
   case ISD::SINT_TO_FP:
   case ISD::UINT_TO_FP:    return LowerINT_TO_FP(Op, DAG);
@@ -3105,22 +3184,22 @@ SDValue ARMTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) {
 /// type with new values built out of custom code.
 void ARMTargetLowering::ReplaceNodeResults(SDNode *N,
                                            SmallVectorImpl<SDValue>&Results,
-                                           SelectionDAG &DAG) {
+                                           SelectionDAG &DAG) const {
+  SDValue Res;
   switch (N->getOpcode()) {
   default:
     llvm_unreachable("Don't know how to custom expand this!");
-    return;
+    break;
   case ISD::BIT_CONVERT:
-    Results.push_back(ExpandBIT_CONVERT(N, DAG));
-    return;
+    Res = ExpandBIT_CONVERT(N, DAG);
+    break;
   case ISD::SRL:
-  case ISD::SRA: {
-    SDValue Res = LowerShift(N, DAG, Subtarget);
-    if (Res.getNode())
-      Results.push_back(Res);
-    return;
-  }
+  case ISD::SRA:
+    Res = LowerShift(N, DAG, Subtarget);
+    break;
   }
+  if (Res.getNode())
+    Results.push_back(Res);
 }
 
 //===----------------------------------------------------------------------===//
@@ -3302,8 +3381,7 @@ ARMTargetLowering::EmitAtomicBinary(MachineInstr *MI, MachineBasicBlock *BB,
 
 MachineBasicBlock *
 ARMTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
-                                               MachineBasicBlock *BB,
-                   DenseMap<MachineBasicBlock*, MachineBasicBlock*> *EM) const {
+                                               MachineBasicBlock *BB) const {
   const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
   DebugLoc dl = MI->getDebugLoc();
   bool isThumb2 = Subtarget->isThumb2();
@@ -3387,12 +3465,9 @@ ARMTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
     F->insert(It, sinkMBB);
     // Update machine-CFG edges by first adding all successors of the current
     // block to the new block which will contain the Phi node for the select.
-    // Also inform sdisel of the edge changes.
     for (MachineBasicBlock::succ_iterator I = BB->succ_begin(), 
-           E = BB->succ_end(); I != E; ++I) {
-      EM->insert(std::make_pair(*I, sinkMBB));
+           E = BB->succ_end(); I != E; ++I)
       sinkMBB->addSuccessor(*I);
-    }
     // Next, remove all successors of the current block, and add the true
     // and fallthrough blocks as its successors.
     while (!BB->succ_empty())
diff --git a/lib/Target/ARM/ARMISelLowering.h b/lib/Target/ARM/ARMISelLowering.h
index fa33ad307506..d8a230ff697c 100644
--- a/lib/Target/ARM/ARMISelLowering.h
+++ b/lib/Target/ARM/ARMISelLowering.h
@@ -159,25 +159,24 @@ namespace llvm {
   //  ARMTargetLowering - ARM Implementation of the TargetLowering interface
 
   class ARMTargetLowering : public TargetLowering {
-    int VarArgsFrameIndex;            // FrameIndex for start of varargs area.
   public:
     explicit ARMTargetLowering(TargetMachine &TM);
 
-    virtual SDValue LowerOperation(SDValue Op, SelectionDAG &DAG);
+    virtual SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const;
 
     /// ReplaceNodeResults - Replace the results of node with an illegal result
     /// type with new values built out of custom code.
     ///
     virtual void ReplaceNodeResults(SDNode *N, SmallVectorImpl<SDValue>&Results,
-                                    SelectionDAG &DAG);
+                                    SelectionDAG &DAG) const;
 
     virtual SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const;
 
     virtual const char *getTargetNodeName(unsigned Opcode) const;
 
-    virtual MachineBasicBlock *EmitInstrWithCustomInserter(MachineInstr *MI,
-                                                         MachineBasicBlock *MBB,
-                       DenseMap<MachineBasicBlock*, MachineBasicBlock*>*) const;
+    virtual MachineBasicBlock *
+      EmitInstrWithCustomInserter(MachineInstr *MI,
+                                  MachineBasicBlock *MBB) const;
 
     /// allowsUnalignedMemoryAccesses - Returns true if the target allows
     /// unaligned memory accesses. of the specified type.
@@ -237,7 +236,7 @@ namespace llvm {
                                               std::vector<SDValue> &Ops,
                                               SelectionDAG &DAG) const;
 
-    virtual const ARMSubtarget* getSubtarget() {
+    virtual const ARMSubtarget* getSubtarget() const {
       return Subtarget;
     }
 
@@ -272,54 +271,57 @@ namespace llvm {
                           CCValAssign &VA, CCValAssign &NextVA,
                           SDValue &StackPtr,
                           SmallVector<SDValue, 8> &MemOpChains,
-                          ISD::ArgFlagsTy Flags);
+                          ISD::ArgFlagsTy Flags) const;
     SDValue GetF64FormalArgument(CCValAssign &VA, CCValAssign &NextVA,
-                                 SDValue &Root, SelectionDAG &DAG, DebugLoc dl);
+                                 SDValue &Root, SelectionDAG &DAG,
+                                 DebugLoc dl) const;
 
     CCAssignFn *CCAssignFnForNode(CallingConv::ID CC, bool Return, bool isVarArg) const;
     SDValue LowerMemOpCallTo(SDValue Chain, SDValue StackPtr, SDValue Arg,
                              DebugLoc dl, SelectionDAG &DAG,
                              const CCValAssign &VA,
-                             ISD::ArgFlagsTy Flags);
-    SDValue LowerINTRINSIC_W_CHAIN(SDValue Op, SelectionDAG &DAG);
+                             ISD::ArgFlagsTy Flags) const;
+    SDValue LowerINTRINSIC_W_CHAIN(SDValue Op, SelectionDAG &DAG) const;
     SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG,
-                                    const ARMSubtarget *Subtarget);
-    SDValue LowerBlockAddress(SDValue Op, SelectionDAG &DAG);
-    SDValue LowerGlobalAddressDarwin(SDValue Op, SelectionDAG &DAG);
-    SDValue LowerGlobalAddressELF(SDValue Op, SelectionDAG &DAG);
-    SDValue LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG);
+                                    const ARMSubtarget *Subtarget) const;
+    SDValue LowerBlockAddress(SDValue Op, SelectionDAG &DAG) const;
+    SDValue LowerGlobalAddressDarwin(SDValue Op, SelectionDAG &DAG) const;
+    SDValue LowerGlobalAddressELF(SDValue Op, SelectionDAG &DAG) const;
+    SDValue LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const;
     SDValue LowerToTLSGeneralDynamicModel(GlobalAddressSDNode *GA,
-                                            SelectionDAG &DAG);
+                                            SelectionDAG &DAG) const;
     SDValue LowerToTLSExecModels(GlobalAddressSDNode *GA,
-                                   SelectionDAG &DAG);
-    SDValue LowerGLOBAL_OFFSET_TABLE(SDValue Op, SelectionDAG &DAG);
-    SDValue LowerBR_JT(SDValue Op, SelectionDAG &DAG);
-    SDValue LowerSELECT_CC(SDValue Op, SelectionDAG &DAG);
-    SDValue LowerBR_CC(SDValue Op, SelectionDAG &DAG);
-    SDValue LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG);
-    SDValue LowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG);
-    SDValue LowerShiftRightParts(SDValue Op, SelectionDAG &DAG);
-    SDValue LowerShiftLeftParts(SDValue Op, SelectionDAG &DAG);
+                                   SelectionDAG &DAG) const;
+    SDValue LowerGLOBAL_OFFSET_TABLE(SDValue Op, SelectionDAG &DAG) const;
+    SDValue LowerBR_JT(SDValue Op, SelectionDAG &DAG) const;
+    SDValue LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const;
+    SDValue LowerBR_CC(SDValue Op, SelectionDAG &DAG) const;
+    SDValue LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const;
+    SDValue LowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) const;
+    SDValue LowerShiftRightParts(SDValue Op, SelectionDAG &DAG) const;
+    SDValue LowerShiftLeftParts(SDValue Op, SelectionDAG &DAG) const;
 
     SDValue EmitTargetCodeForMemcpy(SelectionDAG &DAG, DebugLoc dl,
                                       SDValue Chain,
                                       SDValue Dst, SDValue Src,
                                       SDValue Size, unsigned Align,
                                       bool isVolatile, bool AlwaysInline,
-                                      const Value *DstSV, uint64_t DstSVOff,
-                                      const Value *SrcSV, uint64_t SrcSVOff);
+                                      const Value *DstSV,
+                                      uint64_t DstSVOff,
+                                      const Value *SrcSV,
+                                      uint64_t SrcSVOff) const;
     SDValue LowerCallResult(SDValue Chain, SDValue InFlag,
                             CallingConv::ID CallConv, bool isVarArg,
                             const SmallVectorImpl<ISD::InputArg> &Ins,
                             DebugLoc dl, SelectionDAG &DAG,
-                            SmallVectorImpl<SDValue> &InVals);
+                            SmallVectorImpl<SDValue> &InVals) const;
 
     virtual SDValue
       LowerFormalArguments(SDValue Chain,
                            CallingConv::ID CallConv, bool isVarArg,
                            const SmallVectorImpl<ISD::InputArg> &Ins,
                            DebugLoc dl, SelectionDAG &DAG,
-                           SmallVectorImpl<SDValue> &InVals);
+                           SmallVectorImpl<SDValue> &InVals) const;
 
     virtual SDValue
       LowerCall(SDValue Chain, SDValue Callee,
@@ -328,16 +330,16 @@ namespace llvm {
                 const SmallVectorImpl<ISD::OutputArg> &Outs,
                 const SmallVectorImpl<ISD::InputArg> &Ins,
                 DebugLoc dl, SelectionDAG &DAG,
-                SmallVectorImpl<SDValue> &InVals);
+                SmallVectorImpl<SDValue> &InVals) const;
 
     virtual SDValue
       LowerReturn(SDValue Chain,
                   CallingConv::ID CallConv, bool isVarArg,
                   const SmallVectorImpl<ISD::OutputArg> &Outs,
-                  DebugLoc dl, SelectionDAG &DAG);
+                  DebugLoc dl, SelectionDAG &DAG) const;
 
     SDValue getARMCmp(SDValue LHS, SDValue RHS, ISD::CondCode CC,
-                      SDValue &ARMCC, SelectionDAG &DAG, DebugLoc dl);
+                      SDValue &ARMCC, SelectionDAG &DAG, DebugLoc dl) const;
 
     MachineBasicBlock *EmitAtomicCmpSwap(MachineInstr *MI,
                                          MachineBasicBlock *BB,
diff --git a/lib/Target/ARM/ARMInstrInfo.td b/lib/Target/ARM/ARMInstrInfo.td
index f2ab06f328f2..ce5f2f877efb 100644
--- a/lib/Target/ARM/ARMInstrInfo.td
+++ b/lib/Target/ARM/ARMInstrInfo.td
@@ -124,6 +124,7 @@ def HasV6     : Predicate<"Subtarget->hasV6Ops()">;
 def HasV6T2   : Predicate<"Subtarget->hasV6T2Ops()">;
 def NoV6T2    : Predicate<"!Subtarget->hasV6T2Ops()">;
 def HasV7     : Predicate<"Subtarget->hasV7Ops()">;
+def NoVFP     : Predicate<"!Subtarget->hasVFP2()">;
 def HasVFP2   : Predicate<"Subtarget->hasVFP2()">;
 def HasVFP3   : Predicate<"Subtarget->hasVFP3()">;
 def HasNEON   : Predicate<"Subtarget->hasNEON()">;
@@ -1231,7 +1232,7 @@ def LDRBT : AI2ldbpo<(outs GPR:$dst, GPR:$base_wb),
 }
 
 def LDRSBT : AI3ldsbpo<(outs GPR:$dst, GPR:$base_wb),
-                 (ins GPR:$base,am2offset:$offset), LdMiscFrm, IIC_iLoadru,
+                 (ins GPR:$base,am3offset:$offset), LdMiscFrm, IIC_iLoadru,
                  "ldrsbt", "\t$dst, [$base], $offset", "$base = $base_wb", []> {
   let Inst{21} = 1; // overwrite
 }
@@ -2533,7 +2534,23 @@ let Defs =
                                "mov\tr0, #0\n\t"
                                "add\tpc, pc, #0\n\t"
                                "mov\tr0, #1 @ eh_setjmp end", "",
-                         [(set R0, (ARMeh_sjlj_setjmp GPR:$src, GPR:$val))]>;
+                         [(set R0, (ARMeh_sjlj_setjmp GPR:$src, GPR:$val))]>,
+                           Requires<[IsARM, HasVFP2]>;
+}
+
+let Defs =
+  [ R0,  R1,  R2,  R3,  R4,  R5,  R6,  R7,  R8,  R9,  R10, R11, R12, LR ] in {
+  def Int_eh_sjlj_setjmp_nofp : XI<(outs), (ins GPR:$src, GPR:$val),
+                                   AddrModeNone, SizeSpecial, IndexModeNone,
+                                   Pseudo, NoItinerary,
+                                   "str\tsp, [$src, #+8] @ eh_setjmp begin\n\t"
+                                   "add\t$val, pc, #8\n\t"
+                                   "str\t$val, [$src, #+4]\n\t"
+                                   "mov\tr0, #0\n\t"
+                                   "add\tpc, pc, #0\n\t"
+                                   "mov\tr0, #1 @ eh_setjmp end", "",
+                         [(set R0, (ARMeh_sjlj_setjmp GPR:$src, GPR:$val))]>,
+                                Requires<[IsARM, NoVFP]>;
 }
 
 //===----------------------------------------------------------------------===//
@@ -2747,7 +2764,7 @@ multiclass LdStCop<bits<4> op31_28, bit load, string opc> {
 
   def L_OFFSET : ACI<(outs),
       (ins nohash_imm:$cop, nohash_imm:$CRd, addrmode2:$addr),
-      opc, "l\tp$cop, cr$CRd, $addr"> {
+      !strconcat(opc, "l"), "\tp$cop, cr$CRd, $addr"> {
     let Inst{31-28} = op31_28;
     let Inst{24} = 1; // P = 1
     let Inst{21} = 0; // W = 0
@@ -2757,7 +2774,7 @@ multiclass LdStCop<bits<4> op31_28, bit load, string opc> {
 
   def L_PRE : ACI<(outs),
       (ins nohash_imm:$cop, nohash_imm:$CRd, addrmode2:$addr),
-      opc, "l\tp$cop, cr$CRd, $addr!"> {
+      !strconcat(opc, "l"), "\tp$cop, cr$CRd, $addr!"> {
     let Inst{31-28} = op31_28;
     let Inst{24} = 1; // P = 1
     let Inst{21} = 1; // W = 1
@@ -2767,7 +2784,7 @@ multiclass LdStCop<bits<4> op31_28, bit load, string opc> {
 
   def L_POST : ACI<(outs),
       (ins nohash_imm:$cop, nohash_imm:$CRd, GPR:$base, am2offset:$offset),
-      opc, "l\tp$cop, cr$CRd, [$base], $offset"> {
+      !strconcat(opc, "l"), "\tp$cop, cr$CRd, [$base], $offset"> {
     let Inst{31-28} = op31_28;
     let Inst{24} = 0; // P = 0
     let Inst{21} = 1; // W = 1
@@ -2777,7 +2794,7 @@ multiclass LdStCop<bits<4> op31_28, bit load, string opc> {
 
   def L_OPTION : ACI<(outs),
       (ins nohash_imm:$cop, nohash_imm:$CRd, GPR:$base, nohash_imm:$option),
-      opc, "l\tp$cop, cr$CRd, [$base], $option"> {
+      !strconcat(opc, "l"), "\tp$cop, cr$CRd, [$base], $option"> {
     let Inst{31-28} = op31_28;
     let Inst{24} = 0; // P = 0
     let Inst{23} = 1; // U = 1
diff --git a/lib/Target/ARM/ARMInstrNEON.td b/lib/Target/ARM/ARMInstrNEON.td
index ed9d31d80f36..d5ce2b8468df 100644
--- a/lib/Target/ARM/ARMInstrNEON.td
+++ b/lib/Target/ARM/ARMInstrNEON.td
@@ -1621,12 +1621,13 @@ multiclass N3VNInt_HSD<bit op24, bit op23, bits<4> op11_8, bit op4,
 
 // First with only element sizes of 16 and 32 bits:
 multiclass N3VLInt_HS<bit op24, bit op23, bits<4> op11_8, bit op4,
-                      InstrItinClass itin, string OpcodeStr, string Dt,
+                      InstrItinClass itin16, InstrItinClass itin32,
+                      string OpcodeStr, string Dt,
                       Intrinsic IntOp, bit Commutable = 0> {
-  def v4i32 : N3VLInt<op24, op23, 0b01, op11_8, op4, itin, 
+  def v4i32 : N3VLInt<op24, op23, 0b01, op11_8, op4, itin16, 
                       OpcodeStr, !strconcat(Dt, "16"),
                       v4i32, v4i16, IntOp, Commutable>;
-  def v2i64 : N3VLInt<op24, op23, 0b10, op11_8, op4, itin,
+  def v2i64 : N3VLInt<op24, op23, 0b10, op11_8, op4, itin32,
                       OpcodeStr, !strconcat(Dt, "32"),
                       v2i64, v2i32, IntOp, Commutable>;
 }
@@ -1642,11 +1643,12 @@ multiclass N3VLIntSL_HS<bit op24, bits<4> op11_8,
 
 // ....then also with element size of 8 bits:
 multiclass N3VLInt_QHS<bit op24, bit op23, bits<4> op11_8, bit op4,
-                       InstrItinClass itin, string OpcodeStr, string Dt,
+                       InstrItinClass itin16, InstrItinClass itin32,
+                       string OpcodeStr, string Dt,
                        Intrinsic IntOp, bit Commutable = 0>
-  : N3VLInt_HS<op24, op23, op11_8, op4, itin, OpcodeStr, Dt,
+  : N3VLInt_HS<op24, op23, op11_8, op4, itin16, itin32, OpcodeStr, Dt,
                IntOp, Commutable> {
-  def v8i16 : N3VLInt<op24, op23, 0b00, op11_8, op4, itin, 
+  def v8i16 : N3VLInt<op24, op23, 0b00, op11_8, op4, itin16,
                       OpcodeStr, !strconcat(Dt, "8"),
                       v8i16, v8i8, IntOp, Commutable>;
 }
@@ -1711,21 +1713,22 @@ multiclass N3VMulOpSL_HS<bits<4> op11_8,
 // Neon 3-argument intrinsics,
 //   element sizes of 8, 16 and 32 bits:
 multiclass N3VInt3_QHS<bit op24, bit op23, bits<4> op11_8, bit op4,
+                       InstrItinClass itinD, InstrItinClass itinQ,
                        string OpcodeStr, string Dt, Intrinsic IntOp> {
   // 64-bit vector types.
-  def v8i8  : N3VDInt3<op24, op23, 0b00, op11_8, op4, IIC_VMACi16D,
+  def v8i8  : N3VDInt3<op24, op23, 0b00, op11_8, op4, itinD,
                        OpcodeStr, !strconcat(Dt, "8"), v8i8, v8i8, IntOp>;
-  def v4i16 : N3VDInt3<op24, op23, 0b01, op11_8, op4, IIC_VMACi16D,
+  def v4i16 : N3VDInt3<op24, op23, 0b01, op11_8, op4, itinD,
                        OpcodeStr, !strconcat(Dt, "16"), v4i16, v4i16, IntOp>;
-  def v2i32 : N3VDInt3<op24, op23, 0b10, op11_8, op4, IIC_VMACi32D,
+  def v2i32 : N3VDInt3<op24, op23, 0b10, op11_8, op4, itinD,
                        OpcodeStr, !strconcat(Dt, "32"), v2i32, v2i32, IntOp>;
 
   // 128-bit vector types.
-  def v16i8 : N3VQInt3<op24, op23, 0b00, op11_8, op4, IIC_VMACi16Q,
+  def v16i8 : N3VQInt3<op24, op23, 0b00, op11_8, op4, itinQ,
                        OpcodeStr, !strconcat(Dt, "8"), v16i8, v16i8, IntOp>;
-  def v8i16 : N3VQInt3<op24, op23, 0b01, op11_8, op4, IIC_VMACi16Q,
+  def v8i16 : N3VQInt3<op24, op23, 0b01, op11_8, op4, itinQ,
                        OpcodeStr, !strconcat(Dt, "16"), v8i16, v8i16, IntOp>;
-  def v4i32 : N3VQInt3<op24, op23, 0b10, op11_8, op4, IIC_VMACi32Q,
+  def v4i32 : N3VQInt3<op24, op23, 0b10, op11_8, op4, itinQ,
                        OpcodeStr, !strconcat(Dt, "32"), v4i32, v4i32, IntOp>;
 }
 
@@ -1734,10 +1737,11 @@ multiclass N3VInt3_QHS<bit op24, bit op23, bits<4> op11_8, bit op4,
 
 // First with only element sizes of 16 and 32 bits:
 multiclass N3VLInt3_HS<bit op24, bit op23, bits<4> op11_8, bit op4,
+                       InstrItinClass itin16, InstrItinClass itin32,
                        string OpcodeStr, string Dt, Intrinsic IntOp> {
-  def v4i32 : N3VLInt3<op24, op23, 0b01, op11_8, op4, IIC_VMACi16D,
+  def v4i32 : N3VLInt3<op24, op23, 0b01, op11_8, op4, itin16,
                        OpcodeStr, !strconcat(Dt, "16"), v4i32, v4i16, IntOp>;
-  def v2i64 : N3VLInt3<op24, op23, 0b10, op11_8, op4, IIC_VMACi16D,
+  def v2i64 : N3VLInt3<op24, op23, 0b10, op11_8, op4, itin32,
                        OpcodeStr, !strconcat(Dt, "32"), v2i64, v2i32, IntOp>;
 }
 
@@ -1751,9 +1755,10 @@ multiclass N3VLInt3SL_HS<bit op24, bits<4> op11_8,
 
 // ....then also with element size of 8 bits:
 multiclass N3VLInt3_QHS<bit op24, bit op23, bits<4> op11_8, bit op4,
+                        InstrItinClass itin16, InstrItinClass itin32,
                         string OpcodeStr, string Dt, Intrinsic IntOp>
-  : N3VLInt3_HS<op24, op23, op11_8, op4, OpcodeStr, Dt, IntOp> {
-  def v8i16 : N3VLInt3<op24, op23, 0b00, op11_8, op4, IIC_VMACi16D,
+  : N3VLInt3_HS<op24, op23, op11_8, op4, itin16, itin32, OpcodeStr, Dt, IntOp> {
+  def v8i16 : N3VLInt3<op24, op23, 0b00, op11_8, op4, itin16,
                        OpcodeStr, !strconcat(Dt, "8"), v8i16, v8i8, IntOp>;
 }
 
@@ -2001,10 +2006,10 @@ def  VADDfd   : N3VD<0, 0, 0b00, 0b1101, 0, IIC_VBIND, "vadd", "f32",
 def  VADDfq   : N3VQ<0, 0, 0b00, 0b1101, 0, IIC_VBINQ, "vadd", "f32",
                      v4f32, v4f32, fadd, 1>;
 //   VADDL    : Vector Add Long (Q = D + D)
-defm VADDLs   : N3VLInt_QHS<0,1,0b0000,0, IIC_VSHLiD, "vaddl", "s",
-                            int_arm_neon_vaddls, 1>;
-defm VADDLu   : N3VLInt_QHS<1,1,0b0000,0, IIC_VSHLiD, "vaddl", "u",
-                            int_arm_neon_vaddlu, 1>;
+defm VADDLs   : N3VLInt_QHS<0,1,0b0000,0, IIC_VSHLiD, IIC_VSHLiD,
+                            "vaddl", "s", int_arm_neon_vaddls, 1>;
+defm VADDLu   : N3VLInt_QHS<1,1,0b0000,0, IIC_VSHLiD, IIC_VSHLiD,
+                            "vaddl", "u", int_arm_neon_vaddlu, 1>;
 //   VADDW    : Vector Add Wide (Q = Q + D)
 defm VADDWs   : N3VWInt_QHS<0,1,0b0001,0, "vaddw", "s", int_arm_neon_vaddws, 0>;
 defm VADDWu   : N3VWInt_QHS<1,1,0b0001,0, "vaddw", "u", int_arm_neon_vaddwu, 0>;
@@ -2118,10 +2123,10 @@ def : Pat<(v4i32 (int_arm_neon_vqrdmulh (v4i32 QPR:$src1),
                                   (SubReg_i32_lane imm:$lane)))>;
 
 //   VMULL    : Vector Multiply Long (integer and polynomial) (Q = D * D)
-defm VMULLs   : N3VLInt_QHS<0,1,0b1100,0, IIC_VMULi16D, "vmull", "s",
-                            int_arm_neon_vmulls, 1>;
-defm VMULLu   : N3VLInt_QHS<1,1,0b1100,0, IIC_VMULi16D, "vmull", "u",
-                            int_arm_neon_vmullu, 1>;
+defm VMULLs   : N3VLInt_QHS<0,1,0b1100,0, IIC_VMULi16D, IIC_VMULi32D,
+                            "vmull", "s", int_arm_neon_vmulls, 1>;
+defm VMULLu   : N3VLInt_QHS<1,1,0b1100,0, IIC_VMULi16D, IIC_VMULi32D,
+                            "vmull", "u", int_arm_neon_vmullu, 1>;
 def  VMULLp   : N3VLInt<0, 1, 0b00, 0b1110, 0, IIC_VMULi16D, "vmull", "p8",
                         v8i16, v8i8, int_arm_neon_vmullp, 1>;
 defm VMULLsls : N3VLIntSL_HS<0, 0b1010, IIC_VMULi16D, "vmull", "s",
@@ -2130,10 +2135,10 @@ defm VMULLslu : N3VLIntSL_HS<1, 0b1010, IIC_VMULi16D, "vmull", "u",
                              int_arm_neon_vmullu>;
 
 //   VQDMULL  : Vector Saturating Doubling Multiply Long (Q = D * D)
-defm VQDMULL  : N3VLInt_HS<0,1,0b1101,0, IIC_VMULi16D, "vqdmull", "s",
-                           int_arm_neon_vqdmull, 1>;
-defm VQDMULLsl: N3VLIntSL_HS<0, 0b1011, IIC_VMULi16D, "vqdmull", "s",
-                             int_arm_neon_vqdmull>;
+defm VQDMULL  : N3VLInt_HS<0,1,0b1101,0, IIC_VMULi16D, IIC_VMULi32D,
+                           "vqdmull", "s", int_arm_neon_vqdmull, 1>;
+defm VQDMULLsl: N3VLIntSL_HS<0, 0b1011, IIC_VMULi16D,
+                             "vqdmull", "s", int_arm_neon_vqdmull>;
 
 // Vector Multiply-Accumulate and Multiply-Subtract Operations.
 
@@ -2177,15 +2182,17 @@ def : Pat<(v4f32 (fadd (v4f32 QPR:$src1),
                            (SubReg_i32_lane imm:$lane)))>;
 
 //   VMLAL    : Vector Multiply Accumulate Long (Q += D * D)
-defm VMLALs   : N3VLInt3_QHS<0,1,0b1000,0, "vmlal", "s", int_arm_neon_vmlals>;
-defm VMLALu   : N3VLInt3_QHS<1,1,0b1000,0, "vmlal", "u", int_arm_neon_vmlalu>;
+defm VMLALs   : N3VLInt3_QHS<0,1,0b1000,0, IIC_VMACi16D, IIC_VMACi32D,
+                             "vmlal", "s", int_arm_neon_vmlals>;
+defm VMLALu   : N3VLInt3_QHS<1,1,0b1000,0, IIC_VMACi16D, IIC_VMACi32D,
+                             "vmlal", "u", int_arm_neon_vmlalu>;
 
 defm VMLALsls : N3VLInt3SL_HS<0, 0b0010, "vmlal", "s", int_arm_neon_vmlals>;
 defm VMLALslu : N3VLInt3SL_HS<1, 0b0010, "vmlal", "u", int_arm_neon_vmlalu>;
 
 //   VQDMLAL  : Vector Saturating Doubling Multiply Accumulate Long (Q += D * D)
-defm VQDMLAL  : N3VLInt3_HS<0, 1, 0b1001, 0, "vqdmlal", "s",
-                            int_arm_neon_vqdmlal>;
+defm VQDMLAL  : N3VLInt3_HS<0, 1, 0b1001, 0, IIC_VMACi16D, IIC_VMACi32D,
+                            "vqdmlal", "s", int_arm_neon_vqdmlal>;
 defm VQDMLALsl: N3VLInt3SL_HS<0, 0b0011, "vqdmlal", "s", int_arm_neon_vqdmlal>;
 
 //   VMLS     : Vector Multiply Subtract (integer and floating-point)
@@ -2227,15 +2234,17 @@ def : Pat<(v4f32 (fsub (v4f32 QPR:$src1),
                            (SubReg_i32_lane imm:$lane)))>;
 
 //   VMLSL    : Vector Multiply Subtract Long (Q -= D * D)
-defm VMLSLs   : N3VLInt3_QHS<0,1,0b1010,0, "vmlsl", "s", int_arm_neon_vmlsls>;
-defm VMLSLu   : N3VLInt3_QHS<1,1,0b1010,0, "vmlsl", "u", int_arm_neon_vmlslu>;
+defm VMLSLs   : N3VLInt3_QHS<0,1,0b1010,0, IIC_VMACi16D, IIC_VMACi32D,
+                             "vmlsl", "s", int_arm_neon_vmlsls>;
+defm VMLSLu   : N3VLInt3_QHS<1,1,0b1010,0, IIC_VMACi16D, IIC_VMACi32D,
+                             "vmlsl", "u", int_arm_neon_vmlslu>;
 
 defm VMLSLsls : N3VLInt3SL_HS<0, 0b0110, "vmlsl", "s", int_arm_neon_vmlsls>;
 defm VMLSLslu : N3VLInt3SL_HS<1, 0b0110, "vmlsl", "u", int_arm_neon_vmlslu>;
 
 //   VQDMLSL  : Vector Saturating Doubling Multiply Subtract Long (Q -= D * D)
-defm VQDMLSL  : N3VLInt3_HS<0, 1, 0b1011, 0, "vqdmlsl", "s",
-                            int_arm_neon_vqdmlsl>;
+defm VQDMLSL  : N3VLInt3_HS<0, 1, 0b1011, 0, IIC_VMACi16D, IIC_VMACi32D,
+                            "vqdmlsl", "s", int_arm_neon_vqdmlsl>;
 defm VQDMLSLsl: N3VLInt3SL_HS<0, 0b111, "vqdmlsl", "s", int_arm_neon_vqdmlsl>;
 
 // Vector Subtract Operations.
@@ -2248,26 +2257,26 @@ def  VSUBfd   : N3VD<0, 0, 0b10, 0b1101, 0, IIC_VBIND, "vsub", "f32",
 def  VSUBfq   : N3VQ<0, 0, 0b10, 0b1101, 0, IIC_VBINQ, "vsub", "f32",
                      v4f32, v4f32, fsub, 0>;
 //   VSUBL    : Vector Subtract Long (Q = D - D)
-defm VSUBLs   : N3VLInt_QHS<0,1,0b0010,0, IIC_VSHLiD, "vsubl", "s",
-                            int_arm_neon_vsubls, 1>;
-defm VSUBLu   : N3VLInt_QHS<1,1,0b0010,0, IIC_VSHLiD, "vsubl", "u",
-                            int_arm_neon_vsublu, 1>;
+defm VSUBLs   : N3VLInt_QHS<0,1,0b0010,0, IIC_VSHLiD, IIC_VSHLiD,
+                            "vsubl", "s", int_arm_neon_vsubls, 1>;
+defm VSUBLu   : N3VLInt_QHS<1,1,0b0010,0, IIC_VSHLiD, IIC_VSHLiD,
+                            "vsubl", "u", int_arm_neon_vsublu, 1>;
 //   VSUBW    : Vector Subtract Wide (Q = Q - D)
 defm VSUBWs   : N3VWInt_QHS<0,1,0b0011,0, "vsubw", "s", int_arm_neon_vsubws, 0>;
 defm VSUBWu   : N3VWInt_QHS<1,1,0b0011,0, "vsubw", "u", int_arm_neon_vsubwu, 0>;
 //   VHSUB    : Vector Halving Subtract
 defm VHSUBs   : N3VInt_QHS<0, 0, 0b0010, 0, N3RegFrm,
-                           IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, IIC_VBINi4Q,
+                           IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, IIC_VSUBi4Q,
                            "vhsub", "s", int_arm_neon_vhsubs, 0>;
 defm VHSUBu   : N3VInt_QHS<1, 0, 0b0010, 0, N3RegFrm,
-                           IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, IIC_VBINi4Q,
+                           IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, IIC_VSUBi4Q,
                            "vhsub", "u", int_arm_neon_vhsubu, 0>;
 //   VQSUB    : Vector Saturing Subtract
 defm VQSUBs   : N3VInt_QHSD<0, 0, 0b0010, 1, N3RegFrm,
-                            IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, IIC_VBINi4Q,
+                            IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, IIC_VSUBi4Q,
                             "vqsub", "s", int_arm_neon_vqsubs, 0>;
 defm VQSUBu   : N3VInt_QHSD<1, 0, 0b0010, 1, N3RegFrm,
-                            IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, IIC_VBINi4Q,
+                            IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, IIC_VSUBi4Q,
                             "vqsub", "u", int_arm_neon_vqsubu, 0>;
 //   VSUBHN   : Vector Subtract and Narrow Returning High Half (D = Q - Q)
 defm VSUBHN   : N3VNInt_HSD<0,1,0b0110,0, "vsubhn", "i",
@@ -2279,8 +2288,8 @@ defm VRSUBHN  : N3VNInt_HSD<1,1,0b0110,0, "vrsubhn", "i",
 // Vector Comparisons.
 
 //   VCEQ     : Vector Compare Equal
-defm VCEQ     : N3V_QHS<1, 0, 0b1000, 1, IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q,
-                        IIC_VBINi4Q, "vceq", "i", NEONvceq, 1>;
+defm VCEQ     : N3V_QHS<1, 0, 0b1000, 1, IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q,
+                        IIC_VSUBi4Q, "vceq", "i", NEONvceq, 1>;
 def  VCEQfd   : N3VD<0,0,0b00,0b1110,0, IIC_VBIND, "vceq", "f32", v2i32, v2f32,
                      NEONvceq, 1>;
 def  VCEQfq   : N3VQ<0,0,0b00,0b1110,0, IIC_VBINQ, "vceq", "f32", v4i32, v4f32,
@@ -2290,10 +2299,10 @@ defm VCEQz    : N2V_QHS_cmp<0b11, 0b11, 0b01, 0b00010, 0, "vceq", "i",
                            "$dst, $src, #0">;
 
 //   VCGE     : Vector Compare Greater Than or Equal
-defm VCGEs    : N3V_QHS<0, 0, 0b0011, 1, IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q,
-                        IIC_VBINi4Q, "vcge", "s", NEONvcge, 0>;
-defm VCGEu    : N3V_QHS<1, 0, 0b0011, 1, IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, 
-                        IIC_VBINi4Q, "vcge", "u", NEONvcgeu, 0>;
+defm VCGEs    : N3V_QHS<0, 0, 0b0011, 1, IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q,
+                        IIC_VSUBi4Q, "vcge", "s", NEONvcge, 0>;
+defm VCGEu    : N3V_QHS<1, 0, 0b0011, 1, IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, 
+                        IIC_VSUBi4Q, "vcge", "u", NEONvcgeu, 0>;
 def  VCGEfd   : N3VD<1,0,0b00,0b1110,0, IIC_VBIND, "vcge", "f32", v2i32, v2f32,
                      NEONvcge, 0>;
 def  VCGEfq   : N3VQ<1,0,0b00,0b1110,0, IIC_VBINQ, "vcge", "f32", v4i32, v4f32,
@@ -2306,10 +2315,10 @@ defm VCLEz    : N2V_QHS_cmp<0b11, 0b11, 0b01, 0b00011, 0, "vcle", "s",
                             "$dst, $src, #0">;
 
 //   VCGT     : Vector Compare Greater Than
-defm VCGTs    : N3V_QHS<0, 0, 0b0011, 0, IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, 
-                        IIC_VBINi4Q, "vcgt", "s", NEONvcgt, 0>;
-defm VCGTu    : N3V_QHS<1, 0, 0b0011, 0, IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, 
-                        IIC_VBINi4Q, "vcgt", "u", NEONvcgtu, 0>;
+defm VCGTs    : N3V_QHS<0, 0, 0b0011, 0, IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q,
+                        IIC_VSUBi4Q, "vcgt", "s", NEONvcgt, 0>;
+defm VCGTu    : N3V_QHS<1, 0, 0b0011, 0, IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q,
+                        IIC_VSUBi4Q, "vcgt", "u", NEONvcgtu, 0>;
 def  VCGTfd   : N3VD<1,0,0b10,0b1110,0, IIC_VBIND, "vcgt", "f32", v2i32, v2f32,
                      NEONvcgt, 0>;
 def  VCGTfq   : N3VQ<1,0,0b10,0b1110,0, IIC_VBINQ, "vcgt", "f32", v4i32, v4f32,
@@ -2387,11 +2396,11 @@ def  VORNq    : N3VX<0, 0, 0b11, 0b0001, 1, 1, (outs QPR:$dst),
 
 //   VMVN     : Vector Bitwise NOT
 def  VMVNd    : N2VX<0b11, 0b11, 0b00, 0b00, 0b01011, 0, 0,
-                     (outs DPR:$dst), (ins DPR:$src), IIC_VSHLiD,
+                     (outs DPR:$dst), (ins DPR:$src), IIC_VSUBiD,
                      "vmvn", "$dst, $src", "",
                      [(set DPR:$dst, (v2i32 (vnot8 DPR:$src)))]>;
 def  VMVNq    : N2VX<0b11, 0b11, 0b00, 0b00, 0b01011, 1, 0,
-                     (outs QPR:$dst), (ins QPR:$src), IIC_VSHLiD,
+                     (outs QPR:$dst), (ins QPR:$src), IIC_VSUBiD,
                      "vmvn", "$dst, $src", "",
                      [(set QPR:$dst, (v4i32 (vnot16 QPR:$src)))]>;
 def : Pat<(v2i32 (vnot8 DPR:$src)), (VMVNd DPR:$src)>;
@@ -2447,10 +2456,10 @@ def  VBITq    : N3VX<1, 0, 0b10, 0b0001, 1, 1,
 
 //   VABD     : Vector Absolute Difference
 defm VABDs    : N3VInt_QHS<0, 0, 0b0111, 0, N3RegFrm,
-                            IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, IIC_VBINi4Q,
+                           IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, IIC_VSUBi4Q,
                            "vabd", "s", int_arm_neon_vabds, 0>;
 defm VABDu    : N3VInt_QHS<1, 0, 0b0111, 0, N3RegFrm,
-                            IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, IIC_VBINi4Q,
+                           IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, IIC_VSUBi4Q,
                            "vabd", "u", int_arm_neon_vabdu, 0>;
 def  VABDfd   : N3VDInt<1, 0, 0b10, 0b1101, 0, N3RegFrm, IIC_VBIND,
                         "vabd", "f32", v2f32, v2f32, int_arm_neon_vabds, 0>;
@@ -2458,56 +2467,68 @@ def  VABDfq   : N3VQInt<1, 0, 0b10, 0b1101, 0, N3RegFrm, IIC_VBINQ,
                         "vabd", "f32", v4f32, v4f32, int_arm_neon_vabds, 0>;
 
 //   VABDL    : Vector Absolute Difference Long (Q = | D - D |)
-defm VABDLs   : N3VLInt_QHS<0,1,0b0111,0, IIC_VBINi4Q,
+defm VABDLs   : N3VLInt_QHS<0,1,0b0111,0, IIC_VSUBi4Q, IIC_VSUBi4Q,
                             "vabdl", "s", int_arm_neon_vabdls, 0>;
-defm VABDLu   : N3VLInt_QHS<1,1,0b0111,0, IIC_VBINi4Q,
+defm VABDLu   : N3VLInt_QHS<1,1,0b0111,0, IIC_VSUBi4Q, IIC_VSUBi4Q,
                              "vabdl", "u", int_arm_neon_vabdlu, 0>;
 
 //   VABA     : Vector Absolute Difference and Accumulate
-defm VABAs    : N3VInt3_QHS<0,0,0b0111,1, "vaba", "s", int_arm_neon_vabas>;
-defm VABAu    : N3VInt3_QHS<1,0,0b0111,1, "vaba", "u", int_arm_neon_vabau>;
+defm VABAs    : N3VInt3_QHS<0,0,0b0111,1, IIC_VABAD, IIC_VABAQ,
+                            "vaba", "s", int_arm_neon_vabas>;
+defm VABAu    : N3VInt3_QHS<1,0,0b0111,1, IIC_VABAD, IIC_VABAQ,
+                            "vaba", "u", int_arm_neon_vabau>;
 
 //   VABAL    : Vector Absolute Difference and Accumulate Long (Q += | D - D |)
-defm VABALs   : N3VLInt3_QHS<0,1,0b0101,0, "vabal", "s", int_arm_neon_vabals>;
-defm VABALu   : N3VLInt3_QHS<1,1,0b0101,0, "vabal", "u", int_arm_neon_vabalu>;
+defm VABALs   : N3VLInt3_QHS<0,1,0b0101,0, IIC_VABAD, IIC_VABAD,
+                             "vabal", "s", int_arm_neon_vabals>;
+defm VABALu   : N3VLInt3_QHS<1,1,0b0101,0, IIC_VABAD, IIC_VABAD,
+                             "vabal", "u", int_arm_neon_vabalu>;
 
 // Vector Maximum and Minimum.
 
 //   VMAX     : Vector Maximum
 defm VMAXs    : N3VInt_QHS<0, 0, 0b0110, 0, N3RegFrm,
-                           IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, IIC_VBINi4Q,
+                           IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, IIC_VSUBi4Q,
                            "vmax", "s", int_arm_neon_vmaxs, 1>;
 defm VMAXu    : N3VInt_QHS<1, 0, 0b0110, 0, N3RegFrm,
-                           IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, IIC_VBINi4Q,
+                           IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, IIC_VSUBi4Q,
                            "vmax", "u", int_arm_neon_vmaxu, 1>;
-def  VMAXfd   : N3VDInt<0, 0, 0b00, 0b1111, 0, N3RegFrm, IIC_VBIND, "vmax",
-                        "f32", v2f32, v2f32, int_arm_neon_vmaxs, 1>;
-def  VMAXfq   : N3VQInt<0, 0, 0b00, 0b1111, 0, N3RegFrm, IIC_VBINQ, "vmax",
-                        "f32", v4f32, v4f32, int_arm_neon_vmaxs, 1>;
+def  VMAXfd   : N3VDInt<0, 0, 0b00, 0b1111, 0, N3RegFrm, IIC_VBIND,
+                        "vmax", "f32",
+                        v2f32, v2f32, int_arm_neon_vmaxs, 1>;
+def  VMAXfq   : N3VQInt<0, 0, 0b00, 0b1111, 0, N3RegFrm, IIC_VBINQ,
+                        "vmax", "f32",
+                        v4f32, v4f32, int_arm_neon_vmaxs, 1>;
 
 //   VMIN     : Vector Minimum
 defm VMINs    : N3VInt_QHS<0, 0, 0b0110, 1, N3RegFrm,
-                           IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, IIC_VBINi4Q,
+                           IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, IIC_VSUBi4Q,
                            "vmin", "s", int_arm_neon_vmins, 1>;
 defm VMINu    : N3VInt_QHS<1, 0, 0b0110, 1, N3RegFrm,
-                           IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, IIC_VBINi4Q,
+                           IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, IIC_VSUBi4Q,
                            "vmin", "u", int_arm_neon_vminu, 1>;
-def  VMINfd   : N3VDInt<0, 0, 0b10, 0b1111, 0, N3RegFrm, IIC_VBIND, "vmin",
-                        "f32", v2f32, v2f32, int_arm_neon_vmins, 1>;
-def  VMINfq   : N3VQInt<0, 0, 0b10, 0b1111, 0, N3RegFrm, IIC_VBINQ, "vmin",
-                        "f32", v4f32, v4f32, int_arm_neon_vmins, 1>;
+def  VMINfd   : N3VDInt<0, 0, 0b10, 0b1111, 0, N3RegFrm, IIC_VBIND,
+                        "vmin", "f32",
+                        v2f32, v2f32, int_arm_neon_vmins, 1>;
+def  VMINfq   : N3VQInt<0, 0, 0b10, 0b1111, 0, N3RegFrm, IIC_VBINQ,
+                        "vmin", "f32",
+                        v4f32, v4f32, int_arm_neon_vmins, 1>;
 
 // Vector Pairwise Operations.
 
 //   VPADD    : Vector Pairwise Add
-def  VPADDi8  : N3VDInt<0, 0, 0b00, 0b1011, 1, N3RegFrm, IIC_VBINiD, "vpadd",
-                        "i8", v8i8, v8i8, int_arm_neon_vpadd, 0>;
-def  VPADDi16 : N3VDInt<0, 0, 0b01, 0b1011, 1, N3RegFrm, IIC_VBINiD, "vpadd",
-                        "i16", v4i16, v4i16, int_arm_neon_vpadd, 0>;
-def  VPADDi32 : N3VDInt<0, 0, 0b10, 0b1011, 1, N3RegFrm, IIC_VBINiD, "vpadd",
-                        "i32", v2i32, v2i32, int_arm_neon_vpadd, 0>;
-def  VPADDf   : N3VDInt<1, 0, 0b00, 0b1101, 0, N3RegFrm, IIC_VBIND, "vpadd",
-                        "f32", v2f32, v2f32, int_arm_neon_vpadd, 0>;
+def  VPADDi8  : N3VDInt<0, 0, 0b00, 0b1011, 1, N3RegFrm, IIC_VSHLiD,
+                        "vpadd", "i8",
+                        v8i8, v8i8, int_arm_neon_vpadd, 0>;
+def  VPADDi16 : N3VDInt<0, 0, 0b01, 0b1011, 1, N3RegFrm, IIC_VSHLiD,
+                        "vpadd", "i16",
+                        v4i16, v4i16, int_arm_neon_vpadd, 0>;
+def  VPADDi32 : N3VDInt<0, 0, 0b10, 0b1011, 1, N3RegFrm, IIC_VSHLiD,
+                        "vpadd", "i32",
+                        v2i32, v2i32, int_arm_neon_vpadd, 0>;
+def  VPADDf   : N3VDInt<1, 0, 0b00, 0b1101, 0, N3RegFrm, 
+                        IIC_VBIND, "vpadd", "f32",
+                        v2f32, v2f32, int_arm_neon_vpadd, 0>;
 
 //   VPADDL   : Vector Pairwise Add Long
 defm VPADDLs  : N2VPLInt_QHS<0b11, 0b11, 0b00, 0b00100, 0, "vpaddl", "s",
@@ -2522,35 +2543,35 @@ defm VPADALu  : N2VPLInt2_QHS<0b11, 0b11, 0b00, 0b01101, 0, "vpadal", "u",
                               int_arm_neon_vpadalu>;
 
 //   VPMAX    : Vector Pairwise Maximum
-def  VPMAXs8  : N3VDInt<0, 0, 0b00, 0b1010, 0, N3RegFrm, IIC_VBINi4D, "vpmax",
+def  VPMAXs8  : N3VDInt<0, 0, 0b00, 0b1010, 0, N3RegFrm, IIC_VSUBi4D, "vpmax",
                         "s8", v8i8, v8i8, int_arm_neon_vpmaxs, 0>;
-def  VPMAXs16 : N3VDInt<0, 0, 0b01, 0b1010, 0, N3RegFrm, IIC_VBINi4D, "vpmax",
+def  VPMAXs16 : N3VDInt<0, 0, 0b01, 0b1010, 0, N3RegFrm, IIC_VSUBi4D, "vpmax",
                         "s16", v4i16, v4i16, int_arm_neon_vpmaxs, 0>;
-def  VPMAXs32 : N3VDInt<0, 0, 0b10, 0b1010, 0, N3RegFrm, IIC_VBINi4D, "vpmax",
+def  VPMAXs32 : N3VDInt<0, 0, 0b10, 0b1010, 0, N3RegFrm, IIC_VSUBi4D, "vpmax",
                         "s32", v2i32, v2i32, int_arm_neon_vpmaxs, 0>;
-def  VPMAXu8  : N3VDInt<1, 0, 0b00, 0b1010, 0, N3RegFrm, IIC_VBINi4D, "vpmax",
+def  VPMAXu8  : N3VDInt<1, 0, 0b00, 0b1010, 0, N3RegFrm, IIC_VSUBi4D, "vpmax",
                         "u8", v8i8, v8i8, int_arm_neon_vpmaxu, 0>;
-def  VPMAXu16 : N3VDInt<1, 0, 0b01, 0b1010, 0, N3RegFrm, IIC_VBINi4D, "vpmax",
+def  VPMAXu16 : N3VDInt<1, 0, 0b01, 0b1010, 0, N3RegFrm, IIC_VSUBi4D, "vpmax",
                         "u16", v4i16, v4i16, int_arm_neon_vpmaxu, 0>;
-def  VPMAXu32 : N3VDInt<1, 0, 0b10, 0b1010, 0, N3RegFrm, IIC_VBINi4D, "vpmax",
+def  VPMAXu32 : N3VDInt<1, 0, 0b10, 0b1010, 0, N3RegFrm, IIC_VSUBi4D, "vpmax",
                         "u32", v2i32, v2i32, int_arm_neon_vpmaxu, 0>;
-def  VPMAXf   : N3VDInt<1, 0, 0b00, 0b1111, 0, N3RegFrm, IIC_VBINi4D, "vpmax",
+def  VPMAXf   : N3VDInt<1, 0, 0b00, 0b1111, 0, N3RegFrm, IIC_VSUBi4D, "vpmax",
                         "f32", v2f32, v2f32, int_arm_neon_vpmaxs, 0>;
 
 //   VPMIN    : Vector Pairwise Minimum
-def  VPMINs8  : N3VDInt<0, 0, 0b00, 0b1010, 1, N3RegFrm, IIC_VBINi4D, "vpmin",
+def  VPMINs8  : N3VDInt<0, 0, 0b00, 0b1010, 1, N3RegFrm, IIC_VSUBi4D, "vpmin",
                         "s8", v8i8, v8i8, int_arm_neon_vpmins, 0>;
-def  VPMINs16 : N3VDInt<0, 0, 0b01, 0b1010, 1, N3RegFrm, IIC_VBINi4D, "vpmin",
+def  VPMINs16 : N3VDInt<0, 0, 0b01, 0b1010, 1, N3RegFrm, IIC_VSUBi4D, "vpmin",
                         "s16", v4i16, v4i16, int_arm_neon_vpmins, 0>;
-def  VPMINs32 : N3VDInt<0, 0, 0b10, 0b1010, 1, N3RegFrm, IIC_VBINi4D, "vpmin",
+def  VPMINs32 : N3VDInt<0, 0, 0b10, 0b1010, 1, N3RegFrm, IIC_VSUBi4D, "vpmin",
                         "s32", v2i32, v2i32, int_arm_neon_vpmins, 0>;
-def  VPMINu8  : N3VDInt<1, 0, 0b00, 0b1010, 1, N3RegFrm, IIC_VBINi4D, "vpmin",
+def  VPMINu8  : N3VDInt<1, 0, 0b00, 0b1010, 1, N3RegFrm, IIC_VSUBi4D, "vpmin",
                         "u8", v8i8, v8i8, int_arm_neon_vpminu, 0>;
-def  VPMINu16 : N3VDInt<1, 0, 0b01, 0b1010, 1, N3RegFrm, IIC_VBINi4D, "vpmin",
+def  VPMINu16 : N3VDInt<1, 0, 0b01, 0b1010, 1, N3RegFrm, IIC_VSUBi4D, "vpmin",
                         "u16", v4i16, v4i16, int_arm_neon_vpminu, 0>;
-def  VPMINu32 : N3VDInt<1, 0, 0b10, 0b1010, 1, N3RegFrm, IIC_VBINi4D, "vpmin",
+def  VPMINu32 : N3VDInt<1, 0, 0b10, 0b1010, 1, N3RegFrm, IIC_VSUBi4D, "vpmin",
                         "u32", v2i32, v2i32, int_arm_neon_vpminu, 0>;
-def  VPMINf   : N3VDInt<1, 0, 0b10, 0b1111, 0, N3RegFrm, IIC_VBINi4D, "vpmin",
+def  VPMINf   : N3VDInt<1, 0, 0b10, 0b1111, 0, N3RegFrm, IIC_VSUBi4D, "vpmin",
                         "f32", v2f32, v2f32, int_arm_neon_vpmins, 0>;
 
 // Vector Reciprocal and Reciprocal Square Root Estimate and Step.
diff --git a/lib/Target/ARM/ARMInstrThumb2.td b/lib/Target/ARM/ARMInstrThumb2.td
index 262aae48913a..742bd403cdde 100644
--- a/lib/Target/ARM/ARMInstrThumb2.td
+++ b/lib/Target/ARM/ARMInstrThumb2.td
@@ -2386,9 +2386,25 @@ let Defs =
                                "\tb\t1f\n"
                                "\tmovs\tr0, #1\t@ end eh.setjmp\n"
                                "1:", "",
-                          [(set R0, (ARMeh_sjlj_setjmp GPR:$src, tGPR:$val))]>;
+                          [(set R0, (ARMeh_sjlj_setjmp GPR:$src, tGPR:$val))]>,
+                             Requires<[IsThumb2, HasVFP2]>;
 }
 
+let Defs =
+  [ R0,  R1,  R2,  R3,  R4,  R5,  R6,  R7,  R8,  R9,  R10, R11, R12, LR ] in {
+  def t2Int_eh_sjlj_setjmp_nofp : Thumb2XI<(outs), (ins GPR:$src, tGPR:$val),
+                               AddrModeNone, SizeSpecial, NoItinerary,
+                               "str\t$val, [$src, #8]\t@ begin eh.setjmp\n"
+                               "\tmov\t$val, pc\n"
+                               "\tadds\t$val, #9\n"
+                               "\tstr\t$val, [$src, #4]\n"
+                               "\tmovs\tr0, #0\n"
+                               "\tb\t1f\n"
+                               "\tmovs\tr0, #1\t@ end eh.setjmp\n"
+                               "1:", "",
+                          [(set R0, (ARMeh_sjlj_setjmp GPR:$src, tGPR:$val))]>,
+                                  Requires<[IsThumb2, NoVFP]>;
+}
 
 
 //===----------------------------------------------------------------------===//
diff --git a/lib/Target/ARM/ARMInstrVFP.td b/lib/Target/ARM/ARMInstrVFP.td
index 045838928660..36fcaa13049d 100644
--- a/lib/Target/ARM/ARMInstrVFP.td
+++ b/lib/Target/ARM/ARMInstrVFP.td
@@ -256,25 +256,25 @@ def VCVTSD : VFPAI<(outs SPR:$dst), (ins DPR:$a), VFPUnaryFrm,
 // Between half-precision and single-precision.  For disassembly only.
 
 def VCVTBSH : ASuI<0b11101, 0b11, 0b0010, 0b01, 0, (outs SPR:$dst), (ins SPR:$a),
-                 /* FIXME */ IIC_fpCVTDS, "vcvtb", ".f32.f16\t$dst, $a",
+                 /* FIXME */ IIC_fpCVTSH, "vcvtb", ".f32.f16\t$dst, $a",
                  [/* For disassembly only; pattern left blank */]>;
 
 def : ARMPat<(f32_to_f16 SPR:$a),
              (i32 (COPY_TO_REGCLASS (VCVTBSH SPR:$a), GPR))>;
 
 def VCVTBHS : ASuI<0b11101, 0b11, 0b0011, 0b01, 0, (outs SPR:$dst), (ins SPR:$a),
-                 /* FIXME */ IIC_fpCVTDS, "vcvtb", ".f16.f32\t$dst, $a",
+                 /* FIXME */ IIC_fpCVTHS, "vcvtb", ".f16.f32\t$dst, $a",
                  [/* For disassembly only; pattern left blank */]>;
 
 def : ARMPat<(f16_to_f32 GPR:$a),
              (VCVTBHS (COPY_TO_REGCLASS GPR:$a, SPR))>;
 
 def VCVTTSH : ASuI<0b11101, 0b11, 0b0010, 0b11, 0, (outs SPR:$dst), (ins SPR:$a),
-                 /* FIXME */ IIC_fpCVTDS, "vcvtt", ".f32.f16\t$dst, $a",
+                 /* FIXME */ IIC_fpCVTSH, "vcvtt", ".f32.f16\t$dst, $a",
                  [/* For disassembly only; pattern left blank */]>;
 
 def VCVTTHS : ASuI<0b11101, 0b11, 0b0011, 0b11, 0, (outs SPR:$dst), (ins SPR:$a),
-                 /* FIXME */ IIC_fpCVTDS, "vcvtt", ".f16.f32\t$dst, $a",
+                 /* FIXME */ IIC_fpCVTHS, "vcvtt", ".f16.f32\t$dst, $a",
                  [/* For disassembly only; pattern left blank */]>;
 
 let neverHasSideEffects = 1 in {
@@ -306,23 +306,23 @@ def VSQRTS : ASuI<0b11101, 0b11, 0b0001, 0b11, 0, (outs SPR:$dst), (ins SPR:$a),
 //
 
 def VMOVRS : AVConv2I<0b11100001, 0b1010, (outs GPR:$dst), (ins SPR:$src),
-                 IIC_VMOVSI, "vmov", "\t$dst, $src",
+                 IIC_fpMOVSI, "vmov", "\t$dst, $src",
                  [(set GPR:$dst, (bitconvert SPR:$src))]>;
 
 def VMOVSR : AVConv4I<0b11100000, 0b1010, (outs SPR:$dst), (ins GPR:$src),
-                 IIC_VMOVIS, "vmov", "\t$dst, $src",
+                 IIC_fpMOVIS, "vmov", "\t$dst, $src",
                  [(set SPR:$dst, (bitconvert GPR:$src))]>;
 
 def VMOVRRD  : AVConv3I<0b11000101, 0b1011,
                       (outs GPR:$wb, GPR:$dst2), (ins DPR:$src),
-                 IIC_VMOVDI, "vmov", "\t$wb, $dst2, $src",
+                 IIC_fpMOVDI, "vmov", "\t$wb, $dst2, $src",
                  [/* FIXME: Can't write pattern for multiple result instr*/]> {
   let Inst{7-6} = 0b00;
 }
 
 def VMOVRRS  : AVConv3I<0b11000101, 0b1010,
                       (outs GPR:$wb, GPR:$dst2), (ins SPR:$src1, SPR:$src2),
-                 IIC_VMOVDI, "vmov", "\t$wb, $dst2, $src1, $src2",
+                 IIC_fpMOVDI, "vmov", "\t$wb, $dst2, $src1, $src2",
                  [/* For disassembly only; pattern left blank */]> {
   let Inst{7-6} = 0b00;
 }
@@ -332,14 +332,14 @@ def VMOVRRS  : AVConv3I<0b11000101, 0b1010,
 
 def VMOVDRR : AVConv5I<0b11000100, 0b1011,
                      (outs DPR:$dst), (ins GPR:$src1, GPR:$src2),
-                IIC_VMOVID, "vmov", "\t$dst, $src1, $src2",
+                IIC_fpMOVID, "vmov", "\t$dst, $src1, $src2",
                 [(set DPR:$dst, (arm_fmdrr GPR:$src1, GPR:$src2))]> {
   let Inst{7-6} = 0b00;
 }
 
 def VMOVSRR : AVConv5I<0b11000100, 0b1010,
                      (outs SPR:$dst1, SPR:$dst2), (ins GPR:$src1, GPR:$src2),
-                IIC_VMOVID, "vmov", "\t$dst1, $dst2, $src1, $src2",
+                IIC_fpMOVID, "vmov", "\t$dst1, $dst2, $src1, $src2",
                 [/* For disassembly only; pattern left blank */]> {
   let Inst{7-6} = 0b00;
 }
@@ -678,7 +678,7 @@ def VMSR : VFPAI<(outs), (ins GPR:$src), VFPMiscFrm, IIC_fpSTAT, "vmsr",
 // Materialize FP immediates. VFP3 only.
 let isReMaterializable = 1 in {
 def FCONSTD : VFPAI<(outs DPR:$dst), (ins vfp_f64imm:$imm),
-                    VFPMiscFrm, IIC_VMOVImm,
+                    VFPMiscFrm, IIC_fpUNA64,
                     "vmov", ".f64\t$dst, $imm",
                     [(set DPR:$dst, vfp_f64imm:$imm)]>, Requires<[HasVFP3]> {
   let Inst{27-23} = 0b11101;
@@ -689,7 +689,7 @@ def FCONSTD : VFPAI<(outs DPR:$dst), (ins vfp_f64imm:$imm),
 }
 
 def FCONSTS : VFPAI<(outs SPR:$dst), (ins vfp_f32imm:$imm),
-                    VFPMiscFrm, IIC_VMOVImm,
+                    VFPMiscFrm, IIC_fpUNA32,
                     "vmov", ".f32\t$dst, $imm",
                     [(set SPR:$dst, vfp_f32imm:$imm)]>, Requires<[HasVFP3]> {
   let Inst{27-23} = 0b11101;
diff --git a/lib/Target/ARM/ARMJITInfo.cpp b/lib/Target/ARM/ARMJITInfo.cpp
index 8c0b7206d22e..b31a4fa343ba 100644
--- a/lib/Target/ARM/ARMJITInfo.cpp
+++ b/lib/Target/ARM/ARMJITInfo.cpp
@@ -27,7 +27,7 @@
 using namespace llvm;
 
 void ARMJITInfo::replaceMachineCodeForFunction(void *Old, void *New) {
-  llvm_report_error("ARMJITInfo::replaceMachineCodeForFunction");
+  report_fatal_error("ARMJITInfo::replaceMachineCodeForFunction");
 }
 
 /// JITCompilerFunction - This contains the address of the JIT function used to
diff --git a/lib/Target/ARM/ARMLoadStoreOptimizer.cpp b/lib/Target/ARM/ARMLoadStoreOptimizer.cpp
index cb762a4669fd..8585c1e50105 100644
--- a/lib/Target/ARM/ARMLoadStoreOptimizer.cpp
+++ b/lib/Target/ARM/ARMLoadStoreOptimizer.cpp
@@ -1358,7 +1358,7 @@ ARMPreAllocLoadStoreOpt::CanFormLdStDWord(MachineInstr *Op0, MachineInstr *Op1,
     return false;
 
   unsigned Align = (*Op0->memoperands_begin())->getAlignment();
-  Function *Func = MF->getFunction();
+  const Function *Func = MF->getFunction();
   unsigned ReqAlign = STI->hasV6Ops()
     ? TD->getPrefTypeAlignment(Type::getInt64Ty(Func->getContext())) 
     : 8;  // Pre-v6 need 8-byte align
diff --git a/lib/Target/ARM/ARMMachineFunctionInfo.h b/lib/Target/ARM/ARMMachineFunctionInfo.h
index c998edeb1fe4..013427635592 100644
--- a/lib/Target/ARM/ARMMachineFunctionInfo.h
+++ b/lib/Target/ARM/ARMMachineFunctionInfo.h
@@ -85,6 +85,9 @@ class ARMFunctionInfo : public MachineFunctionInfo {
 
   unsigned ConstPoolEntryUId;
 
+  /// VarArgsFrameIndex - FrameIndex for start of varargs area.
+  int VarArgsFrameIndex;
+
 public:
   ARMFunctionInfo() :
     isThumb(false),
@@ -94,7 +97,7 @@ public:
     FramePtrSpillOffset(0), GPRCS1Offset(0), GPRCS2Offset(0), DPRCSOffset(0),
     GPRCS1Size(0), GPRCS2Size(0), DPRCSSize(0),
     GPRCS1Frames(0), GPRCS2Frames(0), DPRCSFrames(0),
-    JumpTableUId(0), ConstPoolEntryUId(0) {}
+    JumpTableUId(0), ConstPoolEntryUId(0), VarArgsFrameIndex(0) {}
 
   explicit ARMFunctionInfo(MachineFunction &MF) :
     isThumb(MF.getTarget().getSubtarget<ARMSubtarget>().isThumb()),
@@ -105,7 +108,7 @@ public:
     GPRCS1Size(0), GPRCS2Size(0), DPRCSSize(0),
     GPRCS1Frames(32), GPRCS2Frames(32), DPRCSFrames(32),
     SpilledCSRegs(MF.getTarget().getRegisterInfo()->getNumRegs()),
-    JumpTableUId(0), ConstPoolEntryUId(0) {}
+    JumpTableUId(0), ConstPoolEntryUId(0), VarArgsFrameIndex(0) {}
 
   bool isThumbFunction() const { return isThumb; }
   bool isThumb1OnlyFunction() const { return isThumb && !hasThumb2; }
@@ -223,6 +226,9 @@ public:
   unsigned createConstPoolEntryUId() {
     return ConstPoolEntryUId++;
   }
+
+  int getVarArgsFrameIndex() const { return VarArgsFrameIndex; }
+  void setVarArgsFrameIndex(int Index) { VarArgsFrameIndex = Index; }
 };
 } // End llvm namespace
 
diff --git a/lib/Target/ARM/ARMSchedule.td b/lib/Target/ARM/ARMSchedule.td
index fc4c5f5830b0..b60ccca46867 100644
--- a/lib/Target/ARM/ARMSchedule.td
+++ b/lib/Target/ARM/ARMSchedule.td
@@ -8,17 +8,6 @@
 //===----------------------------------------------------------------------===//
 
 //===----------------------------------------------------------------------===//
-// Functional units across ARM processors
-//
-def FU_Issue   : FuncUnit; // issue
-def FU_Pipe0   : FuncUnit; // pipeline 0
-def FU_Pipe1   : FuncUnit; // pipeline 1
-def FU_LdSt0   : FuncUnit; // pipeline 0 load/store
-def FU_LdSt1   : FuncUnit; // pipeline 1 load/store
-def FU_NPipe   : FuncUnit; // NEON ALU/MUL pipe
-def FU_NLSPipe : FuncUnit; // NEON LS pipe
-
-//===----------------------------------------------------------------------===//
 // Instruction Itinerary classes used for ARM
 //
 def IIC_iALUx      : InstrItinClass;
@@ -69,10 +58,16 @@ def IIC_fpCMP32    : InstrItinClass;
 def IIC_fpCMP64    : InstrItinClass;
 def IIC_fpCVTSD    : InstrItinClass;
 def IIC_fpCVTDS    : InstrItinClass;
+def IIC_fpCVTSH    : InstrItinClass;
+def IIC_fpCVTHS    : InstrItinClass;
 def IIC_fpCVTIS    : InstrItinClass;
 def IIC_fpCVTID    : InstrItinClass;
 def IIC_fpCVTSI    : InstrItinClass;
 def IIC_fpCVTDI    : InstrItinClass;
+def IIC_fpMOVIS    : InstrItinClass;
+def IIC_fpMOVID    : InstrItinClass;
+def IIC_fpMOVSI    : InstrItinClass;
+def IIC_fpMOVDI    : InstrItinClass;
 def IIC_fpALU32    : InstrItinClass;
 def IIC_fpALU64    : InstrItinClass;
 def IIC_fpMUL32    : InstrItinClass;
@@ -125,6 +120,10 @@ def IIC_VSUBiD     : InstrItinClass;
 def IIC_VSUBiQ     : InstrItinClass;
 def IIC_VBINi4D    : InstrItinClass;
 def IIC_VBINi4Q    : InstrItinClass;
+def IIC_VSUBi4D    : InstrItinClass;
+def IIC_VSUBi4Q    : InstrItinClass;
+def IIC_VABAD      : InstrItinClass;
+def IIC_VABAQ      : InstrItinClass;
 def IIC_VSHLiD     : InstrItinClass;
 def IIC_VSHLiQ     : InstrItinClass;
 def IIC_VSHLi4D    : InstrItinClass;
@@ -153,8 +152,8 @@ def IIC_VTBX4      : InstrItinClass;
 //===----------------------------------------------------------------------===//
 // Processor instruction itineraries.
 
-def GenericItineraries : ProcessorItineraries<[]>;
-
+def GenericItineraries : ProcessorItineraries<[], []>;
 
 include "ARMScheduleV6.td"
-include "ARMScheduleV7.td"
+include "ARMScheduleA8.td"
+include "ARMScheduleA9.td"
diff --git a/lib/Target/ARM/ARMScheduleA8.td b/lib/Target/ARM/ARMScheduleA8.td
new file mode 100644
index 000000000000..bbfc0b2b4744
--- /dev/null
+++ b/lib/Target/ARM/ARMScheduleA8.td
@@ -0,0 +1,618 @@
+//=- ARMScheduleA8.td - ARM Cortex-A8 Scheduling Definitions -*- tablegen -*-=//
+// 
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+// 
+//===----------------------------------------------------------------------===//
+//
+// This file defines the itinerary class data for the ARM Cortex A8 processors.
+//
+//===----------------------------------------------------------------------===//
+
+//
+// Scheduling information derived from "Cortex-A8 Technical Reference Manual".
+// Functional Units.
+def A8_Issue   : FuncUnit; // issue
+def A8_Pipe0   : FuncUnit; // pipeline 0
+def A8_Pipe1   : FuncUnit; // pipeline 1
+def A8_LdSt0   : FuncUnit; // pipeline 0 load/store
+def A8_LdSt1   : FuncUnit; // pipeline 1 load/store
+def A8_NPipe   : FuncUnit; // NEON ALU/MUL pipe
+def A8_NLSPipe : FuncUnit; // NEON LS pipe
+//
+// Dual issue pipeline represented by A8_Pipe0 | A8_Pipe1
+//
+def CortexA8Itineraries : ProcessorItineraries<
+  [A8_Issue, A8_Pipe0, A8_Pipe1, A8_LdSt0, A8_LdSt1, A8_NPipe, A8_NLSPipe], [
+  // Two fully-pipelined integer ALU pipelines
+  //
+  // No operand cycles
+  InstrItinData<IIC_iALUx    , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>]>,
+  //
+  // Binary Instructions that produce a result
+  InstrItinData<IIC_iALUi    , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2, 2]>,
+  InstrItinData<IIC_iALUr    , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2, 2, 2]>,
+  InstrItinData<IIC_iALUsi   , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2, 2, 1]>,
+  InstrItinData<IIC_iALUsr   , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2, 2, 1, 1]>,
+  //
+  // Unary Instructions that produce a result
+  InstrItinData<IIC_iUNAr    , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2, 2]>,
+  InstrItinData<IIC_iUNAsi   , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2, 1]>,
+  InstrItinData<IIC_iUNAsr   , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2, 1, 1]>,
+  //
+  // Compare instructions
+  InstrItinData<IIC_iCMPi    , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2]>,
+  InstrItinData<IIC_iCMPr    , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2, 2]>,
+  InstrItinData<IIC_iCMPsi   , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2, 1]>,
+  InstrItinData<IIC_iCMPsr   , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2, 1, 1]>,
+  //
+  // Move instructions, unconditional
+  InstrItinData<IIC_iMOVi    , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [1]>,
+  InstrItinData<IIC_iMOVr    , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [1, 1]>,
+  InstrItinData<IIC_iMOVsi   , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [1, 1]>,
+  InstrItinData<IIC_iMOVsr   , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [1, 1, 1]>,
+  //
+  // Move instructions, conditional
+  InstrItinData<IIC_iCMOVi   , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2]>,
+  InstrItinData<IIC_iCMOVr   , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2, 1]>,
+  InstrItinData<IIC_iCMOVsi  , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2, 1]>,
+  InstrItinData<IIC_iCMOVsr  , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2, 1, 1]>,
+
+  // Integer multiply pipeline
+  // Result written in E5, but that is relative to the last cycle of multicycle,
+  // so we use 6 for those cases
+  //
+  InstrItinData<IIC_iMUL16   , [InstrStage<1, [A8_Pipe0]>], [5, 1, 1]>,
+  InstrItinData<IIC_iMAC16   , [InstrStage<1, [A8_Pipe1], 0>, 
+                                InstrStage<2, [A8_Pipe0]>], [6, 1, 1, 4]>,
+  InstrItinData<IIC_iMUL32   , [InstrStage<1, [A8_Pipe1], 0>, 
+                                InstrStage<2, [A8_Pipe0]>], [6, 1, 1]>,
+  InstrItinData<IIC_iMAC32   , [InstrStage<1, [A8_Pipe1], 0>, 
+                                InstrStage<2, [A8_Pipe0]>], [6, 1, 1, 4]>,
+  InstrItinData<IIC_iMUL64   , [InstrStage<2, [A8_Pipe1], 0>, 
+                                InstrStage<3, [A8_Pipe0]>], [6, 6, 1, 1]>,
+  InstrItinData<IIC_iMAC64   , [InstrStage<2, [A8_Pipe1], 0>, 
+                                InstrStage<3, [A8_Pipe0]>], [6, 6, 1, 1]>,
+  
+  // Integer load pipeline
+  //
+  // loads have an extra cycle of latency, but are fully pipelined
+  // use A8_Issue to enforce the 1 load/store per cycle limit
+  //
+  // Immediate offset
+  InstrItinData<IIC_iLoadi   , [InstrStage<1, [A8_Issue], 0>,
+                                InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
+                                InstrStage<1, [A8_LdSt0]>], [3, 1]>,
+  //
+  // Register offset
+  InstrItinData<IIC_iLoadr   , [InstrStage<1, [A8_Issue], 0>,
+                                InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
+                                InstrStage<1, [A8_LdSt0]>], [3, 1, 1]>,
+  //
+  // Scaled register offset, issues over 2 cycles
+  InstrItinData<IIC_iLoadsi  , [InstrStage<2, [A8_Issue], 0>,
+                                InstrStage<1, [A8_Pipe0], 0>,
+                                InstrStage<1, [A8_Pipe1]>,
+                                InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
+                                InstrStage<1, [A8_LdSt0]>], [4, 1, 1]>,
+  //
+  // Immediate offset with update
+  InstrItinData<IIC_iLoadiu  , [InstrStage<1, [A8_Issue], 0>,
+                                InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
+                                InstrStage<1, [A8_LdSt0]>], [3, 2, 1]>,
+  //
+  // Register offset with update
+  InstrItinData<IIC_iLoadru  , [InstrStage<1, [A8_Issue], 0>,
+                                InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
+                                InstrStage<1, [A8_LdSt0]>], [3, 2, 1, 1]>,
+  //
+  // Scaled register offset with update, issues over 2 cycles
+  InstrItinData<IIC_iLoadsiu , [InstrStage<2, [A8_Issue], 0>,
+                                InstrStage<1, [A8_Pipe0], 0>,
+                                InstrStage<1, [A8_Pipe1]>,
+                                InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
+                                InstrStage<1, [A8_LdSt0]>], [4, 3, 1, 1]>,
+  //
+  // Load multiple
+  InstrItinData<IIC_iLoadm   , [InstrStage<2, [A8_Issue], 0>,
+                                InstrStage<2, [A8_Pipe0], 0>,
+                                InstrStage<2, [A8_Pipe1]>,
+                                InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
+                                InstrStage<1, [A8_LdSt0]>]>,
+
+  // Integer store pipeline
+  //
+  // use A8_Issue to enforce the 1 load/store per cycle limit
+  //
+  // Immediate offset
+  InstrItinData<IIC_iStorei  , [InstrStage<1, [A8_Issue], 0>,
+                                InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
+                                InstrStage<1, [A8_LdSt0]>], [3, 1]>,
+  //
+  // Register offset
+  InstrItinData<IIC_iStorer  , [InstrStage<1, [A8_Issue], 0>,
+                                InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
+                                InstrStage<1, [A8_LdSt0]>], [3, 1, 1]>,
+  //
+  // Scaled register offset, issues over 2 cycles
+  InstrItinData<IIC_iStoresi , [InstrStage<2, [A8_Issue], 0>,
+                                InstrStage<1, [A8_Pipe0], 0>,
+                                InstrStage<1, [A8_Pipe1]>,
+                                InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
+                                InstrStage<1, [A8_LdSt0]>], [3, 1, 1]>,
+  //
+  // Immediate offset with update
+  InstrItinData<IIC_iStoreiu , [InstrStage<1, [A8_Issue], 0>,
+                                InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
+                                InstrStage<1, [A8_LdSt0]>], [2, 3, 1]>,
+  //
+  // Register offset with update
+  InstrItinData<IIC_iStoreru  , [InstrStage<1, [A8_Issue], 0>,
+                                InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
+                                InstrStage<1, [A8_LdSt0]>], [2, 3, 1, 1]>,
+  //
+  // Scaled register offset with update, issues over 2 cycles
+  InstrItinData<IIC_iStoresiu, [InstrStage<2, [A8_Issue], 0>,
+                                InstrStage<1, [A8_Pipe0], 0>,
+                                InstrStage<1, [A8_Pipe1]>,
+                                InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
+                                InstrStage<1, [A8_LdSt0]>], [3, 3, 1, 1]>,
+  //
+  // Store multiple
+  InstrItinData<IIC_iStorem  , [InstrStage<2, [A8_Issue], 0>,
+                                InstrStage<2, [A8_Pipe0], 0>,
+                                InstrStage<2, [A8_Pipe1]>,
+                                InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
+                                InstrStage<1, [A8_LdSt0]>]>,
+  
+  // Branch
+  //
+  // no delay slots, so the latency of a branch is unimportant
+  InstrItinData<IIC_Br      , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>]>,
+
+  // VFP
+  // Issue through integer pipeline, and execute in NEON unit. We assume
+  // RunFast mode so that NFP pipeline is used for single-precision when
+  // possible.
+  //
+  // FP Special Register to Integer Register File Move
+  InstrItinData<IIC_fpSTAT , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
+                              InstrStage<1, [A8_NLSPipe]>]>,
+  //
+  // Single-precision FP Unary
+  InstrItinData<IIC_fpUNA32 , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
+                               InstrStage<1, [A8_NPipe]>], [7, 1]>,
+  //
+  // Double-precision FP Unary
+  InstrItinData<IIC_fpUNA64 , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
+                               InstrStage<4, [A8_NPipe], 0>,
+                               InstrStage<4, [A8_NLSPipe]>], [4, 1]>,
+  //
+  // Single-precision FP Compare
+  InstrItinData<IIC_fpCMP32 , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
+                               InstrStage<1, [A8_NPipe]>], [1, 1]>,
+  //
+  // Double-precision FP Compare
+  InstrItinData<IIC_fpCMP64 , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
+                               InstrStage<4, [A8_NPipe], 0>,
+                               InstrStage<4, [A8_NLSPipe]>], [4, 1]>,
+  //
+  // Single to Double FP Convert
+  InstrItinData<IIC_fpCVTSD , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
+                               InstrStage<7, [A8_NPipe], 0>,
+                               InstrStage<7, [A8_NLSPipe]>], [7, 1]>,
+  //
+  // Double to Single FP Convert
+  InstrItinData<IIC_fpCVTDS , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
+                               InstrStage<5, [A8_NPipe], 0>,
+                               InstrStage<5, [A8_NLSPipe]>], [5, 1]>,
+  //
+  // Single-Precision FP to Integer Convert
+  InstrItinData<IIC_fpCVTSI , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
+                               InstrStage<1, [A8_NPipe]>], [7, 1]>,
+  //
+  // Double-Precision FP to Integer Convert
+  InstrItinData<IIC_fpCVTDI , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
+                               InstrStage<8, [A8_NPipe], 0>,
+                               InstrStage<8, [A8_NLSPipe]>], [8, 1]>,
+  //
+  // Integer to Single-Precision FP Convert
+  InstrItinData<IIC_fpCVTIS , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
+                               InstrStage<1, [A8_NPipe]>], [7, 1]>,
+  //
+  // Integer to Double-Precision FP Convert
+  InstrItinData<IIC_fpCVTID , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
+                               InstrStage<8, [A8_NPipe], 0>,
+                               InstrStage<8, [A8_NLSPipe]>], [8, 1]>,
+  //
+  // Single-precision FP ALU
+  InstrItinData<IIC_fpALU32 , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
+                               InstrStage<1, [A8_NPipe]>], [7, 1, 1]>,
+  //
+  // Double-precision FP ALU
+  InstrItinData<IIC_fpALU64 , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
+                               InstrStage<9, [A8_NPipe], 0>,
+                               InstrStage<9, [A8_NLSPipe]>], [9, 1, 1]>,
+  //
+  // Single-precision FP Multiply
+  InstrItinData<IIC_fpMUL32 , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
+                               InstrStage<1, [A8_NPipe]>], [7, 1, 1]>,
+  //
+  // Double-precision FP Multiply
+  InstrItinData<IIC_fpMUL64 , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
+                               InstrStage<11, [A8_NPipe], 0>,
+                               InstrStage<11, [A8_NLSPipe]>], [11, 1, 1]>,
+  //
+  // Single-precision FP MAC
+  InstrItinData<IIC_fpMAC32 , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
+                               InstrStage<1, [A8_NPipe]>], [7, 2, 1, 1]>,
+  //
+  // Double-precision FP MAC
+  InstrItinData<IIC_fpMAC64 , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
+                               InstrStage<19, [A8_NPipe], 0>,
+                               InstrStage<19, [A8_NLSPipe]>], [19, 2, 1, 1]>,
+  //
+  // Single-precision FP DIV
+  InstrItinData<IIC_fpDIV32 , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
+                               InstrStage<20, [A8_NPipe], 0>,
+                               InstrStage<20, [A8_NLSPipe]>], [20, 1, 1]>,
+  //
+  // Double-precision FP DIV
+  InstrItinData<IIC_fpDIV64 , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
+                               InstrStage<29, [A8_NPipe], 0>,
+                               InstrStage<29, [A8_NLSPipe]>], [29, 1, 1]>,
+  //
+  // Single-precision FP SQRT
+  InstrItinData<IIC_fpSQRT32, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
+                               InstrStage<19, [A8_NPipe], 0>,
+                               InstrStage<19, [A8_NLSPipe]>], [19, 1]>,
+  //
+  // Double-precision FP SQRT
+  InstrItinData<IIC_fpSQRT64, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
+                               InstrStage<29, [A8_NPipe], 0>,
+                               InstrStage<29, [A8_NLSPipe]>], [29, 1]>,
+  //
+  // Single-precision FP Load
+  // use A8_Issue to enforce the 1 load/store per cycle limit
+  InstrItinData<IIC_fpLoad32, [InstrStage<1, [A8_Issue], 0>, 
+                               InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
+                               InstrStage<1, [A8_LdSt0], 0>,
+                               InstrStage<1, [A8_NLSPipe]>]>,
+  //
+  // Double-precision FP Load
+  // use A8_Issue to enforce the 1 load/store per cycle limit
+  InstrItinData<IIC_fpLoad64, [InstrStage<2, [A8_Issue], 0>, 
+                               InstrStage<1, [A8_Pipe0], 0>,
+                               InstrStage<1, [A8_Pipe1]>,
+                               InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
+                               InstrStage<1, [A8_LdSt0], 0>,
+                               InstrStage<1, [A8_NLSPipe]>]>,
+  //
+  // FP Load Multiple
+  // use A8_Issue to enforce the 1 load/store per cycle limit
+  InstrItinData<IIC_fpLoadm,  [InstrStage<3, [A8_Issue], 0>, 
+                               InstrStage<2, [A8_Pipe0], 0>,
+                               InstrStage<2, [A8_Pipe1]>,
+                               InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
+                               InstrStage<1, [A8_LdSt0], 0>,
+                               InstrStage<1, [A8_NLSPipe]>]>,
+  //
+  // Single-precision FP Store
+  // use A8_Issue to enforce the 1 load/store per cycle limit
+  InstrItinData<IIC_fpStore32,[InstrStage<1, [A8_Issue], 0>, 
+                               InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
+                               InstrStage<1, [A8_LdSt0], 0>,
+                               InstrStage<1, [A8_NLSPipe]>]>,
+  //
+  // Double-precision FP Store
+  // use A8_Issue to enforce the 1 load/store per cycle limit
+  InstrItinData<IIC_fpStore64,[InstrStage<2, [A8_Issue], 0>, 
+                               InstrStage<1, [A8_Pipe0], 0>,
+                               InstrStage<1, [A8_Pipe1]>,
+                               InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
+                               InstrStage<1, [A8_LdSt0], 0>,
+                               InstrStage<1, [A8_NLSPipe]>]>,
+  //
+  // FP Store Multiple
+  // use A8_Issue to enforce the 1 load/store per cycle limit
+  InstrItinData<IIC_fpStorem, [InstrStage<3, [A8_Issue], 0>, 
+                               InstrStage<2, [A8_Pipe0], 0>,
+                               InstrStage<2, [A8_Pipe1]>,
+                               InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
+                               InstrStage<1, [A8_LdSt0], 0>,
+                               InstrStage<1, [A8_NLSPipe]>]>,
+
+  // NEON
+  // Issue through integer pipeline, and execute in NEON unit.
+  //
+  // VLD1
+  // FIXME: We don't model this instruction properly
+  InstrItinData<IIC_VLD1,     [InstrStage<1, [A8_Issue], 0>, 
+                               InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
+                               InstrStage<1, [A8_LdSt0], 0>,
+                               InstrStage<1, [A8_NLSPipe]>]>,
+  //
+  // VLD2
+  // FIXME: We don't model this instruction properly
+  InstrItinData<IIC_VLD2,     [InstrStage<1, [A8_Issue], 0>, 
+                               InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
+                               InstrStage<1, [A8_LdSt0], 0>,
+                               InstrStage<1, [A8_NLSPipe]>], [2, 2, 1]>,
+  //
+  // VLD3
+  // FIXME: We don't model this instruction properly
+  InstrItinData<IIC_VLD3,     [InstrStage<1, [A8_Issue], 0>, 
+                               InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
+                               InstrStage<1, [A8_LdSt0], 0>,
+                               InstrStage<1, [A8_NLSPipe]>], [2, 2, 2, 1]>,
+  //
+  // VLD4
+  // FIXME: We don't model this instruction properly
+  InstrItinData<IIC_VLD4,     [InstrStage<1, [A8_Issue], 0>, 
+                               InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
+                               InstrStage<1, [A8_LdSt0], 0>,
+                               InstrStage<1, [A8_NLSPipe]>], [2, 2, 2, 2, 1]>,
+  //
+  // VST
+  // FIXME: We don't model this instruction properly
+  InstrItinData<IIC_VST,      [InstrStage<1, [A8_Issue], 0>, 
+                               InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
+                               InstrStage<1, [A8_LdSt0], 0>,
+                               InstrStage<1, [A8_NLSPipe]>]>,
+  //
+  // Double-register FP Unary
+  InstrItinData<IIC_VUNAD,    [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
+                               InstrStage<1, [A8_NPipe]>], [5, 2]>,
+  //
+  // Quad-register FP Unary
+  // Result written in N5, but that is relative to the last cycle of multicycle,
+  // so we use 6 for those cases
+  InstrItinData<IIC_VUNAQ,    [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
+                               InstrStage<2, [A8_NPipe]>], [6, 2]>,
+  //
+  // Double-register FP Binary
+  InstrItinData<IIC_VBIND,    [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
+                               InstrStage<1, [A8_NPipe]>], [5, 2, 2]>,
+  //
+  // Quad-register FP Binary
+  // Result written in N5, but that is relative to the last cycle of multicycle,
+  // so we use 6 for those cases
+  InstrItinData<IIC_VBINQ,    [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
+                               InstrStage<2, [A8_NPipe]>], [6, 2, 2]>,
+  //
+  // Move Immediate
+  InstrItinData<IIC_VMOVImm,  [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
+                               InstrStage<1, [A8_NPipe]>], [3]>,
+  //
+  // Double-register Permute Move
+  InstrItinData<IIC_VMOVD,    [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
+                               InstrStage<1, [A8_NLSPipe]>], [2, 1]>,
+  //
+  // Quad-register Permute Move
+  // Result written in N2, but that is relative to the last cycle of multicycle,
+  // so we use 3 for those cases
+  InstrItinData<IIC_VMOVQ,    [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
+                               InstrStage<2, [A8_NLSPipe]>], [3, 1]>,
+  //
+  // Integer to Single-precision Move
+  InstrItinData<IIC_VMOVIS ,  [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
+                               InstrStage<1, [A8_NLSPipe]>], [2, 1]>,
+  //
+  // Integer to Double-precision Move
+  InstrItinData<IIC_VMOVID ,  [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
+                               InstrStage<1, [A8_NLSPipe]>], [2, 1, 1]>,
+  //
+  // Single-precision to Integer Move
+  InstrItinData<IIC_VMOVSI ,  [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
+                               InstrStage<1, [A8_NLSPipe]>], [20, 1]>,
+  //
+  // Double-precision to Integer Move
+  InstrItinData<IIC_VMOVDI ,  [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
+                               InstrStage<1, [A8_NLSPipe]>], [20, 20, 1]>,
+  //
+  // Integer to Lane Move
+  InstrItinData<IIC_VMOVISL , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
+                               InstrStage<2, [A8_NLSPipe]>], [3, 1, 1]>,
+  //
+  // Double-register Permute
+  InstrItinData<IIC_VPERMD,   [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
+                               InstrStage<1, [A8_NLSPipe]>], [2, 2, 1, 1]>,
+  //
+  // Quad-register Permute
+  // Result written in N2, but that is relative to the last cycle of multicycle,
+  // so we use 3 for those cases
+  InstrItinData<IIC_VPERMQ,   [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
+                               InstrStage<2, [A8_NLSPipe]>], [3, 3, 1, 1]>,
+  //
+  // Quad-register Permute (3 cycle issue)
+  // Result written in N2, but that is relative to the last cycle of multicycle,
+  // so we use 4 for those cases
+  InstrItinData<IIC_VPERMQ3,  [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
+                               InstrStage<1, [A8_NLSPipe]>,
+                               InstrStage<1, [A8_NPipe], 0>,
+                               InstrStage<2, [A8_NLSPipe]>], [4, 4, 1, 1]>,
+  //
+  // Double-register FP Multiple-Accumulate
+  InstrItinData<IIC_VMACD,    [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
+                               InstrStage<1, [A8_NPipe]>], [9, 3, 2, 2]>,
+  //
+  // Quad-register FP Multiple-Accumulate
+  // Result written in N9, but that is relative to the last cycle of multicycle,
+  // so we use 10 for those cases
+  InstrItinData<IIC_VMACQ,    [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
+                               InstrStage<2, [A8_NPipe]>], [10, 3, 2, 2]>,
+  //
+  // Double-register Reciprical Step
+  InstrItinData<IIC_VRECSD,   [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
+                               InstrStage<1, [A8_NPipe]>], [9, 2, 2]>,
+  //
+  // Quad-register Reciprical Step
+  InstrItinData<IIC_VRECSQ,   [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
+                               InstrStage<2, [A8_NPipe]>], [10, 2, 2]>,
+  //
+  // Double-register Integer Count
+  InstrItinData<IIC_VCNTiD,   [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
+                               InstrStage<1, [A8_NPipe]>], [3, 2, 2]>,
+  //
+  // Quad-register Integer Count
+  // Result written in N3, but that is relative to the last cycle of multicycle,
+  // so we use 4 for those cases
+  InstrItinData<IIC_VCNTiQ,   [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
+                               InstrStage<2, [A8_NPipe]>], [4, 2, 2]>,
+  //
+  // Double-register Integer Unary
+  InstrItinData<IIC_VUNAiD,   [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
+                               InstrStage<1, [A8_NPipe]>], [4, 2]>,
+  //
+  // Quad-register Integer Unary
+  InstrItinData<IIC_VUNAiQ,   [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
+                               InstrStage<1, [A8_NPipe]>], [4, 2]>,
+  //
+  // Double-register Integer Q-Unary
+  InstrItinData<IIC_VQUNAiD,  [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
+                               InstrStage<1, [A8_NPipe]>], [4, 1]>,
+  //
+  // Quad-register Integer CountQ-Unary
+  InstrItinData<IIC_VQUNAiQ,  [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
+                               InstrStage<1, [A8_NPipe]>], [4, 1]>,
+  //
+  // Double-register Integer Binary
+  InstrItinData<IIC_VBINiD,   [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
+                               InstrStage<1, [A8_NPipe]>], [3, 2, 2]>,
+  //
+  // Quad-register Integer Binary
+  InstrItinData<IIC_VBINiQ,   [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
+                               InstrStage<1, [A8_NPipe]>], [3, 2, 2]>,
+  //
+  // Double-register Integer Binary (4 cycle)
+  InstrItinData<IIC_VBINi4D,  [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
+                               InstrStage<1, [A8_NPipe]>], [4, 2, 1]>,
+  //
+  // Quad-register Integer Binary (4 cycle)
+  InstrItinData<IIC_VBINi4Q,  [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
+                               InstrStage<1, [A8_NPipe]>], [4, 2, 1]>,
+
+  //
+  // Double-register Integer Subtract
+  InstrItinData<IIC_VSUBiD,   [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
+                               InstrStage<1, [A8_NPipe]>], [3, 2, 1]>,
+  //
+  // Quad-register Integer Subtract
+  InstrItinData<IIC_VSUBiQ,   [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
+                               InstrStage<1, [A8_NPipe]>], [3, 2, 1]>,
+  //
+  // Double-register Integer Subtract
+  InstrItinData<IIC_VSUBi4D,  [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
+                               InstrStage<1, [A8_NPipe]>], [4, 2, 1]>,
+  //
+  // Quad-register Integer Subtract
+  InstrItinData<IIC_VSUBi4Q,  [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
+                               InstrStage<1, [A8_NPipe]>], [4, 2, 1]>,
+  //
+  // Double-register Integer Shift
+  InstrItinData<IIC_VSHLiD,   [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
+                               InstrStage<1, [A8_NPipe]>], [3, 1, 1]>,
+  //
+  // Quad-register Integer Shift
+  InstrItinData<IIC_VSHLiQ,   [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
+                               InstrStage<2, [A8_NPipe]>], [4, 1, 1]>,
+  //
+  // Double-register Integer Shift (4 cycle)
+  InstrItinData<IIC_VSHLi4D,   [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
+                               InstrStage<1, [A8_NPipe]>], [4, 1, 1]>,
+  //
+  // Quad-register Integer Shift (4 cycle)
+  InstrItinData<IIC_VSHLi4Q,   [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
+                               InstrStage<2, [A8_NPipe]>], [5, 1, 1]>,
+  //
+  // Double-register Integer Pair Add Long
+  InstrItinData<IIC_VPALiD,   [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
+                               InstrStage<1, [A8_NPipe]>], [6, 3, 1]>,
+  //
+  // Quad-register Integer Pair Add Long
+  InstrItinData<IIC_VPALiQ,   [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
+                               InstrStage<2, [A8_NPipe]>], [7, 3, 1]>,
+  //
+  // Double-register Absolute Difference and Accumulate
+  InstrItinData<IIC_VABAD,    [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
+                               InstrStage<1, [A8_NPipe]>], [6, 3, 2, 1]>,
+  //
+  // Quad-register Absolute Difference and Accumulate
+  InstrItinData<IIC_VABAQ,    [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
+                               InstrStage<2, [A8_NPipe]>], [6, 3, 2, 1]>,
+
+  //
+  // Double-register Integer Multiply (.8, .16)
+  InstrItinData<IIC_VMULi16D, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
+                               InstrStage<1, [A8_NPipe]>], [6, 2, 2]>,
+  //
+  // Double-register Integer Multiply (.32)
+  InstrItinData<IIC_VMULi32D, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
+                               InstrStage<2, [A8_NPipe]>], [7, 2, 1]>,
+  //
+  // Quad-register Integer Multiply (.8, .16)
+  InstrItinData<IIC_VMULi16Q, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
+                               InstrStage<2, [A8_NPipe]>], [7, 2, 2]>,
+  //
+  // Quad-register Integer Multiply (.32)
+  InstrItinData<IIC_VMULi32Q, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
+                               InstrStage<1, [A8_NPipe]>,
+                               InstrStage<2, [A8_NLSPipe], 0>,
+                               InstrStage<3, [A8_NPipe]>], [9, 2, 1]>,
+  //
+  // Double-register Integer Multiply-Accumulate (.8, .16)
+  InstrItinData<IIC_VMACi16D, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
+                               InstrStage<1, [A8_NPipe]>], [6, 3, 2, 2]>,
+  //
+  // Double-register Integer Multiply-Accumulate (.32)
+  InstrItinData<IIC_VMACi32D, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
+                               InstrStage<2, [A8_NPipe]>], [7, 3, 2, 1]>,
+  //
+  // Quad-register Integer Multiply-Accumulate (.8, .16)
+  InstrItinData<IIC_VMACi16Q, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
+                               InstrStage<2, [A8_NPipe]>], [7, 3, 2, 2]>,
+  //
+  // Quad-register Integer Multiply-Accumulate (.32)
+  InstrItinData<IIC_VMACi32Q, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
+                               InstrStage<1, [A8_NPipe]>,
+                               InstrStage<2, [A8_NLSPipe], 0>,
+                               InstrStage<3, [A8_NPipe]>], [9, 3, 2, 1]>,
+  //
+  // Double-register VEXT
+  InstrItinData<IIC_VEXTD,    [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
+                               InstrStage<1, [A8_NLSPipe]>], [2, 1, 1]>,
+  //
+  // Quad-register VEXT
+  InstrItinData<IIC_VEXTQ,    [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
+                               InstrStage<2, [A8_NLSPipe]>], [3, 1, 1]>,
+  //
+  // VTB
+  InstrItinData<IIC_VTB1,     [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
+                               InstrStage<2, [A8_NLSPipe]>], [3, 2, 1]>,
+  InstrItinData<IIC_VTB2,     [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
+                               InstrStage<2, [A8_NLSPipe]>], [3, 2, 2, 1]>,
+  InstrItinData<IIC_VTB3,     [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
+                               InstrStage<1, [A8_NLSPipe]>,
+                               InstrStage<1, [A8_NPipe], 0>,
+                               InstrStage<2, [A8_NLSPipe]>], [4, 2, 2, 3, 1]>,
+  InstrItinData<IIC_VTB4,     [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
+                               InstrStage<1, [A8_NLSPipe]>,
+                               InstrStage<1, [A8_NPipe], 0>,
+                               InstrStage<2, [A8_NLSPipe]>], [4, 2, 2, 3, 3, 1]>,
+  //
+  // VTBX
+  InstrItinData<IIC_VTBX1,    [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
+                               InstrStage<2, [A8_NLSPipe]>], [3, 1, 2, 1]>,
+  InstrItinData<IIC_VTBX2,    [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
+                               InstrStage<2, [A8_NLSPipe]>], [3, 1, 2, 2, 1]>,
+  InstrItinData<IIC_VTBX3,    [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
+                               InstrStage<1, [A8_NLSPipe]>,
+                               InstrStage<1, [A8_NPipe], 0>,
+                               InstrStage<2, [A8_NLSPipe]>], [4, 1, 2, 2, 3, 1]>,
+  InstrItinData<IIC_VTBX4,    [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
+                               InstrStage<1, [A8_NLSPipe]>,
+                               InstrStage<1, [A8_NPipe], 0>,
+                               InstrStage<2, [A8_NLSPipe]>], [4, 1, 2, 2, 3, 3, 1]>
+]>;
diff --git a/lib/Target/ARM/ARMScheduleA9.td b/lib/Target/ARM/ARMScheduleA9.td
new file mode 100644
index 000000000000..75320d929952
--- /dev/null
+++ b/lib/Target/ARM/ARMScheduleA9.td
@@ -0,0 +1,749 @@
+//=- ARMScheduleA9.td - ARM Cortex-A9 Scheduling Definitions -*- tablegen -*-=//
+// 
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+// 
+//===----------------------------------------------------------------------===//
+//
+// This file defines the itinerary class data for the ARM Cortex A9 processors.
+//
+//===----------------------------------------------------------------------===//
+
+//
+// Ad-hoc scheduling information derived from pretty vague "Cortex-A9 Technical
+// Reference Manual".
+//
+// Functional units
+def A9_Issue   : FuncUnit; // issue
+def A9_Pipe0   : FuncUnit; // pipeline 0
+def A9_Pipe1   : FuncUnit; // pipeline 1
+def A9_LSPipe  : FuncUnit; // LS pipe
+def A9_NPipe   : FuncUnit; // NEON ALU/MUL pipe
+def A9_DRegsVFP: FuncUnit; // FP register set, VFP side
+def A9_DRegsN  : FuncUnit; // FP register set, NEON side
+
+// Dual issue pipeline represented by A9_Pipe0 | A9_Pipe1
+//
+def CortexA9Itineraries : ProcessorItineraries<
+  [A9_NPipe, A9_DRegsN, A9_DRegsVFP, A9_LSPipe, A9_Pipe0, A9_Pipe1, A9_Issue], [
+  // VFP and NEON shares the same register file. This means that every VFP
+  // instruction should wait for full completion of the consecutive NEON
+  // instruction and vice-versa. We model this behavior with two artificial FUs:
+  // DRegsVFP and DRegsVFP.
+  //
+  // Every VFP instruction:
+  //  - Acquires DRegsVFP resource for 1 cycle
+  //  - Reserves DRegsN resource for the whole duration (including time to
+  //    register file writeback!).
+  // Every NEON instruction does the same but with FUs swapped.
+  //
+  // Since the reserved FU cannot be acquired this models precisly "cross-domain"
+  // stalls.
+
+  // VFP
+  // Issue through integer pipeline, and execute in NEON unit.
+
+  // FP Special Register to Integer Register File Move
+  InstrItinData<IIC_fpSTAT , [InstrStage<1, [A9_DRegsVFP], 0, Required>,
+                              InstrStage<2, [A9_DRegsN],   0, Reserved>,
+                              InstrStage<1, [A9_Pipe0, A9_Pipe1]>,
+                              InstrStage<1, [A9_NPipe]>]>,
+  //
+  // Single-precision FP Unary
+  InstrItinData<IIC_fpUNA32 , [InstrStage<1, [A9_DRegsVFP], 0, Required>,
+                               // Extra latency cycles since wbck is 2 cycles
+                               InstrStage<3, [A9_DRegsN],   0, Reserved>,
+                               InstrStage<1, [A9_Pipe0, A9_Pipe1]>,
+                               InstrStage<1, [A9_NPipe]>], [1, 1]>,
+  //
+  // Double-precision FP Unary
+  InstrItinData<IIC_fpUNA64 , [InstrStage<1, [A9_DRegsVFP], 0, Required>,
+                               // Extra latency cycles since wbck is 2 cycles
+                               InstrStage<3, [A9_DRegsN],   0, Reserved>,
+                               InstrStage<1, [A9_Pipe0, A9_Pipe1]>,
+                               InstrStage<1, [A9_NPipe]>], [1, 1]>,
+
+  //
+  // Single-precision FP Compare
+  InstrItinData<IIC_fpCMP32 , [InstrStage<1, [A9_DRegsVFP], 0, Required>,
+                               // Extra latency cycles since wbck is 4 cycles
+                               InstrStage<5, [A9_DRegsN],   0, Reserved>,
+                               InstrStage<1, [A9_Pipe0, A9_Pipe1]>,
+                               InstrStage<1, [A9_NPipe]>], [1, 1]>,
+  //
+  // Double-precision FP Compare
+  InstrItinData<IIC_fpCMP64 , [InstrStage<1, [A9_DRegsVFP], 0, Required>,
+                               // Extra latency cycles since wbck is 4 cycles
+                               InstrStage<5, [A9_DRegsN],   0, Reserved>,
+                               InstrStage<1, [A9_Pipe0, A9_Pipe1]>,
+                               InstrStage<1, [A9_NPipe]>], [1, 1]>,
+  //
+  // Single to Double FP Convert
+  InstrItinData<IIC_fpCVTSD , [InstrStage<1, [A9_DRegsVFP], 0, Required>,
+                               InstrStage<5, [A9_DRegsN],   0, Reserved>,
+                               InstrStage<1, [A9_Pipe0, A9_Pipe1]>,
+                               InstrStage<1, [A9_NPipe]>], [4, 1]>,
+  //
+  // Double to Single FP Convert
+  InstrItinData<IIC_fpCVTDS , [InstrStage<1, [A9_DRegsVFP], 0, Required>,
+                               InstrStage<5, [A9_DRegsN],   0, Reserved>,
+                               InstrStage<1, [A9_Pipe0, A9_Pipe1]>,
+                               InstrStage<1, [A9_NPipe]>], [4, 1]>,
+
+  //
+  // Single to Half FP Convert
+  InstrItinData<IIC_fpCVTSH , [InstrStage<1, [A9_DRegsVFP], 0, Required>,
+                               InstrStage<5, [A9_DRegsN],   0, Reserved>,
+                               InstrStage<1, [A9_Pipe0, A9_Pipe1]>,
+                               InstrStage<1, [A9_NPipe]>], [4, 1]>,
+  //
+  // Half to Single FP Convert
+  InstrItinData<IIC_fpCVTHS , [InstrStage<1, [A9_DRegsVFP], 0, Required>,
+                               InstrStage<3, [A9_DRegsN],   0, Reserved>,
+                               InstrStage<1, [A9_Pipe0, A9_Pipe1]>,
+                               InstrStage<1, [A9_NPipe]>], [2, 1]>,
+
+  //
+  // Single-Precision FP to Integer Convert
+  InstrItinData<IIC_fpCVTSI , [InstrStage<1, [A9_DRegsVFP], 0, Required>,
+                               InstrStage<5, [A9_DRegsN],   0, Reserved>,
+                               InstrStage<1, [A9_Pipe0, A9_Pipe1]>,
+                               InstrStage<1, [A9_NPipe]>], [4, 1]>,
+  //
+  // Double-Precision FP to Integer Convert
+  InstrItinData<IIC_fpCVTDI , [InstrStage<1, [A9_DRegsVFP], 0, Required>,
+                               InstrStage<5, [A9_DRegsN],   0, Reserved>,
+                               InstrStage<1, [A9_Pipe0, A9_Pipe1]>,
+                               InstrStage<1, [A9_NPipe]>], [4, 1]>,
+  //
+  // Integer to Single-Precision FP Convert
+  InstrItinData<IIC_fpCVTIS , [InstrStage<1, [A9_DRegsVFP], 0, Required>,
+                               InstrStage<5, [A9_DRegsN],   0, Reserved>,
+                               InstrStage<1, [A9_Pipe0, A9_Pipe1]>,
+                               InstrStage<1, [A9_NPipe]>], [4, 1]>,
+  //
+  // Integer to Double-Precision FP Convert
+  InstrItinData<IIC_fpCVTID , [InstrStage<1, [A9_DRegsVFP], 0, Required>,
+                               InstrStage<5, [A9_DRegsN],   0, Reserved>,
+                               InstrStage<1, [A9_Pipe0, A9_Pipe1]>,
+                               InstrStage<1, [A9_NPipe]>], [4, 1]>,
+  //
+  // Single-precision FP ALU
+  InstrItinData<IIC_fpALU32 , [InstrStage<1, [A9_DRegsVFP], 0, Required>,
+                               InstrStage<5, [A9_DRegsN],   0, Reserved>,
+                               InstrStage<1, [A9_Pipe0, A9_Pipe1]>,
+                               InstrStage<1, [A9_NPipe]>], [4, 1, 1]>,
+  //
+  // Double-precision FP ALU
+  InstrItinData<IIC_fpALU64 , [InstrStage<1, [A9_DRegsVFP], 0, Required>,
+                               InstrStage<5, [A9_DRegsN],   0, Reserved>,
+                               InstrStage<1, [A9_Pipe0, A9_Pipe1]>,
+                               InstrStage<1, [A9_NPipe]>], [4, 1, 1]>,
+  //
+  // Single-precision FP Multiply
+  InstrItinData<IIC_fpMUL32 , [InstrStage<1, [A9_DRegsVFP], 0, Required>,
+                               InstrStage<6, [A9_DRegsN],   0, Reserved>,
+                               InstrStage<1, [A9_Pipe0, A9_Pipe1]>,
+                               InstrStage<1, [A9_NPipe]>], [5, 1, 1]>,
+  //
+  // Double-precision FP Multiply
+  InstrItinData<IIC_fpMUL64 , [InstrStage<1, [A9_DRegsVFP], 0, Required>,
+                               InstrStage<7, [A9_DRegsN],   0, Reserved>,
+                               InstrStage<1, [A9_Pipe0, A9_Pipe1]>,
+                               InstrStage<2, [A9_NPipe]>], [6, 1, 1]>,
+  //
+  // Single-precision FP MAC
+  InstrItinData<IIC_fpMAC32 , [InstrStage<1, [A9_DRegsVFP], 0, Required>,
+                               InstrStage<9, [A9_DRegsN],   0, Reserved>,
+                               InstrStage<1, [A9_Pipe0, A9_Pipe1]>,
+                               InstrStage<1, [A9_NPipe]>], [8, 0, 1, 1]>,
+  //
+  // Double-precision FP MAC
+  InstrItinData<IIC_fpMAC64 , [InstrStage<1,  [A9_DRegsVFP], 0, Required>,
+                               InstrStage<10, [A9_DRegsN],  0, Reserved>,
+                               InstrStage<1,  [A9_Pipe0, A9_Pipe1]>,
+                               InstrStage<2,  [A9_NPipe]>], [9, 0, 1, 1]>,
+  //
+  // Single-precision FP DIV
+  InstrItinData<IIC_fpDIV32 , [InstrStage<1,  [A9_DRegsVFP], 0, Required>,
+                               InstrStage<16, [A9_DRegsN],  0, Reserved>,
+                               InstrStage<1,  [A9_Pipe0, A9_Pipe1]>,
+                               InstrStage<10, [A9_NPipe]>], [15, 1, 1]>,
+  //
+  // Double-precision FP DIV
+  InstrItinData<IIC_fpDIV64 , [InstrStage<1,  [A9_DRegsVFP], 0, Required>,
+                               InstrStage<26, [A9_DRegsN],  0, Reserved>,
+                               InstrStage<1,  [A9_Pipe0, A9_Pipe1]>,
+                               InstrStage<20, [A9_NPipe]>], [25, 1, 1]>,
+  //
+  // Single-precision FP SQRT
+  InstrItinData<IIC_fpSQRT32, [InstrStage<1,  [A9_DRegsVFP], 0, Required>,
+                               InstrStage<18, [A9_DRegsN],   0, Reserved>,
+                               InstrStage<1,   [A9_Pipe0, A9_Pipe1]>,
+                               InstrStage<13,  [A9_NPipe]>], [17, 1]>,
+  //
+  // Double-precision FP SQRT
+  InstrItinData<IIC_fpSQRT64, [InstrStage<1,  [A9_DRegsVFP], 0, Required>,
+                               InstrStage<33, [A9_DRegsN],   0, Reserved>,
+                               InstrStage<1,  [A9_Pipe0, A9_Pipe1]>,
+                               InstrStage<28, [A9_NPipe]>], [32, 1]>,
+
+  //
+  // Integer to Single-precision Move
+  InstrItinData<IIC_fpMOVIS,  [InstrStage<1, [A9_DRegsVFP], 0, Required>,
+                               // Extra 1 latency cycle since wbck is 2 cycles
+                               InstrStage<3, [A9_DRegsN],   0, Reserved>,
+                               InstrStage<1, [A9_Pipe0, A9_Pipe1]>,
+                               InstrStage<1, [A9_NPipe]>], [1, 1]>,
+  //
+  // Integer to Double-precision Move
+  InstrItinData<IIC_fpMOVID,  [InstrStage<1, [A9_DRegsVFP], 0, Required>,
+                               // Extra 1 latency cycle since wbck is 2 cycles
+                               InstrStage<3, [A9_DRegsN],   0, Reserved>,
+                               InstrStage<1, [A9_Pipe0, A9_Pipe1]>,
+                               InstrStage<1, [A9_NPipe]>], [1, 1, 1]>,
+  //
+  // Single-precision to Integer Move
+  InstrItinData<IIC_fpMOVSI,  [InstrStage<1, [A9_DRegsVFP], 0, Required>,
+                               InstrStage<2, [A9_DRegsN],   0, Reserved>,
+                               InstrStage<1, [A9_Pipe0, A9_Pipe1]>,
+                               InstrStage<1, [A9_NPipe]>], [1, 1]>,
+  //
+  // Double-precision to Integer Move
+  InstrItinData<IIC_fpMOVDI,  [InstrStage<1, [A9_DRegsVFP], 0, Required>,
+                               InstrStage<2, [A9_DRegsN],   0, Reserved>,
+                               InstrStage<1, [A9_Pipe0, A9_Pipe1]>,
+                               InstrStage<1, [A9_NPipe]>], [1, 1, 1]>,
+  //
+  // Single-precision FP Load
+  // use A9_Issue to enforce the 1 load/store per cycle limit
+  InstrItinData<IIC_fpLoad32, [InstrStage<1, [A9_DRegsVFP], 0, Required>,
+                               InstrStage<2, [A9_DRegsN],   0, Reserved>,
+                               InstrStage<1, [A9_Issue], 0>, 
+                               InstrStage<1, [A9_Pipe0, A9_Pipe1]>,
+                               InstrStage<1, [A9_LSPipe], 0>,
+                               InstrStage<1, [A9_NPipe]>]>,
+  //
+  // Double-precision FP Load
+  // use A9_Issue to enforce the 1 load/store per cycle limit
+  InstrItinData<IIC_fpLoad64, [InstrStage<1, [A9_DRegsVFP], 0, Required>,
+                               InstrStage<2, [A9_DRegsN],   0, Reserved>,
+                               InstrStage<1, [A9_Issue], 0>, 
+                               InstrStage<1, [A9_Pipe0, A9_Pipe1]>,
+                               InstrStage<1, [A9_LSPipe], 0>,
+                               InstrStage<1, [A9_NPipe]>]>,
+  //
+  // FP Load Multiple
+  // use A9_Issue to enforce the 1 load/store per cycle limit
+  InstrItinData<IIC_fpLoadm,  [InstrStage<1, [A9_DRegsVFP], 0, Required>,
+                               InstrStage<2, [A9_DRegsN],   0, Reserved>,
+                               InstrStage<1, [A9_Issue], 0>, 
+                               InstrStage<1, [A9_Pipe0, A9_Pipe1]>,
+                               InstrStage<1, [A9_LSPipe], 0>,
+                               InstrStage<1, [A9_NPipe]>]>,
+  //
+  // Single-precision FP Store
+  // use A9_Issue to enforce the 1 load/store per cycle limit
+  InstrItinData<IIC_fpStore32,[InstrStage<1, [A9_DRegsVFP], 0, Required>,
+                               InstrStage<2, [A9_DRegsN],   0, Reserved>,
+                               InstrStage<1, [A9_Issue], 0>, 
+                               InstrStage<1, [A9_Pipe0, A9_Pipe1]>,
+                               InstrStage<1, [A9_LSPipe], 0>,
+                               InstrStage<1, [A9_NPipe]>]>,
+  //
+  // Double-precision FP Store
+  // use A9_Issue to enforce the 1 load/store per cycle limit
+  InstrItinData<IIC_fpStore64,[InstrStage<1, [A9_DRegsVFP], 0, Required>,
+                               InstrStage<2, [A9_DRegsN],   0, Reserved>,
+                               InstrStage<1, [A9_Issue], 0>, 
+                               InstrStage<1, [A9_Pipe0, A9_Pipe1]>,
+                               InstrStage<1, [A9_LSPipe], 0>,
+                               InstrStage<1, [A9_NPipe]>]>,
+  //
+  // FP Store Multiple
+  // use A9_Issue to enforce the 1 load/store per cycle limit
+  InstrItinData<IIC_fpStorem, [InstrStage<1, [A9_DRegsVFP], 0, Required>,
+                               InstrStage<2, [A9_DRegsN],   0, Reserved>,
+                               InstrStage<1, [A9_Issue], 0>, 
+                               InstrStage<1, [A9_Pipe0, A9_Pipe1]>,
+                               InstrStage<1, [A9_LSPipe], 0>,
+                               InstrStage<1, [A9_NPipe]>]>,
+  // NEON
+  // Issue through integer pipeline, and execute in NEON unit.
+  // FIXME: Neon pipeline and LdSt unit are multiplexed. 
+  //        Add some syntactic sugar to model this!
+  // VLD1
+  // FIXME: We don't model this instruction properly
+  InstrItinData<IIC_VLD1,     [InstrStage<1, [A9_DRegsN],   0, Required>,
+                               InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
+                               InstrStage<1, [A9_Issue], 0>, 
+                               InstrStage<1, [A9_Pipe0, A9_Pipe1]>,
+                               InstrStage<1, [A9_LSPipe], 0>,
+                               InstrStage<1, [A9_NPipe]>]>,
+  //
+  // VLD2
+  // FIXME: We don't model this instruction properly
+  InstrItinData<IIC_VLD2,     [InstrStage<1, [A9_DRegsN],   0, Required>,
+                               // Extra latency cycles since wbck is 6 cycles
+                               InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
+                               InstrStage<1, [A9_Issue], 0>, 
+                               InstrStage<1, [A9_Pipe0, A9_Pipe1]>,
+                               InstrStage<1, [A9_LSPipe], 0>,
+                               InstrStage<1, [A9_NPipe]>], [2, 2, 1]>,
+  //
+  // VLD3
+  // FIXME: We don't model this instruction properly
+  InstrItinData<IIC_VLD3,     [InstrStage<1, [A9_DRegsN],   0, Required>,
+                               // Extra latency cycles since wbck is 6 cycles
+                               InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
+                               InstrStage<1, [A9_Issue], 0>, 
+                               InstrStage<1, [A9_Pipe0, A9_Pipe1]>,
+                               InstrStage<1, [A9_LSPipe], 0>,
+                               InstrStage<1, [A9_NPipe]>], [2, 2, 2, 1]>,
+  //
+  // VLD4
+  // FIXME: We don't model this instruction properly
+  InstrItinData<IIC_VLD4,     [InstrStage<1, [A9_DRegsN],   0, Required>,
+                               // Extra latency cycles since wbck is 6 cycles
+                               InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
+                               InstrStage<1, [A9_Issue], 0>, 
+                               InstrStage<1, [A9_Pipe0, A9_Pipe1]>,
+                               InstrStage<1, [A9_LSPipe], 0>,
+                               InstrStage<1, [A9_NPipe]>], [2, 2, 2, 2, 1]>,
+  //
+  // VST
+  // FIXME: We don't model this instruction properly
+  InstrItinData<IIC_VST,      [InstrStage<1, [A9_DRegsN],   0, Required>,
+                               // Extra latency cycles since wbck is 6 cycles
+                               InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
+                               InstrStage<1, [A9_Issue], 0>, 
+                               InstrStage<1, [A9_Pipe0, A9_Pipe1]>,
+                               InstrStage<1, [A9_LSPipe], 0>,
+                               InstrStage<1, [A9_NPipe]>]>,
+  //
+  // Double-register Integer Unary
+  InstrItinData<IIC_VUNAiD,   [InstrStage<1, [A9_DRegsN],   0, Required>,
+                               // Extra latency cycles since wbck is 6 cycles
+                               InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
+                               InstrStage<1, [A9_Pipe0, A9_Pipe1]>,
+                               InstrStage<1, [A9_NPipe]>], [4, 2]>,
+  //
+  // Quad-register Integer Unary
+  InstrItinData<IIC_VUNAiQ,   [InstrStage<1, [A9_DRegsN],   0, Required>,
+                               // Extra latency cycles since wbck is 6 cycles
+                               InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
+                               InstrStage<1, [A9_Pipe0, A9_Pipe1]>,
+                               InstrStage<1, [A9_NPipe]>], [4, 2]>,
+  //
+  // Double-register Integer Q-Unary
+  InstrItinData<IIC_VQUNAiD,  [InstrStage<1, [A9_DRegsN],   0, Required>,
+                               // Extra latency cycles since wbck is 6 cycles
+                               InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
+                               InstrStage<1, [A9_Pipe0, A9_Pipe1]>,
+                               InstrStage<1, [A9_NPipe]>], [4, 1]>,
+  //
+  // Quad-register Integer CountQ-Unary
+  InstrItinData<IIC_VQUNAiQ,  [InstrStage<1, [A9_DRegsN],   0, Required>,
+                               // Extra latency cycles since wbck is 6 cycles
+                               InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
+                               InstrStage<1, [A9_Pipe0, A9_Pipe1]>,
+                               InstrStage<1, [A9_NPipe]>], [4, 1]>,
+  //
+  // Double-register Integer Binary
+  InstrItinData<IIC_VBINiD,   [InstrStage<1, [A9_DRegsN],   0, Required>,
+                               // Extra latency cycles since wbck is 6 cycles
+                               InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
+                               InstrStage<1, [A9_Pipe0, A9_Pipe1]>,
+                               InstrStage<1, [A9_NPipe]>], [3, 2, 2]>,
+  //
+  // Quad-register Integer Binary
+  InstrItinData<IIC_VBINiQ,   [InstrStage<1, [A9_DRegsN],   0, Required>,
+                               // Extra latency cycles since wbck is 6 cycles
+                               InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
+                               InstrStage<1, [A9_Pipe0, A9_Pipe1]>,
+                               InstrStage<1, [A9_NPipe]>], [3, 2, 2]>,
+  //
+  // Double-register Integer Subtract
+  InstrItinData<IIC_VSUBiD,   [InstrStage<1, [A9_DRegsN],   0, Required>,
+                               // Extra latency cycles since wbck is 6 cycles
+                               InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
+                               InstrStage<1, [A9_Pipe0, A9_Pipe1]>,
+                               InstrStage<1, [A9_NPipe]>], [3, 2, 1]>,
+  //
+  // Quad-register Integer Subtract
+  InstrItinData<IIC_VSUBiQ,   [InstrStage<1, [A9_DRegsN],   0, Required>,
+                               // Extra latency cycles since wbck is 6 cycles
+                               InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
+                               InstrStage<1, [A9_Pipe0, A9_Pipe1]>,
+                               InstrStage<1, [A9_NPipe]>], [3, 2, 1]>,
+  //
+  // Double-register Integer Shift
+  InstrItinData<IIC_VSHLiD,   [InstrStage<1, [A9_DRegsN],   0, Required>,
+                               // Extra latency cycles since wbck is 6 cycles
+                               InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
+                               InstrStage<1, [A9_Pipe0, A9_Pipe1]>,
+                               InstrStage<1, [A9_NPipe]>], [3, 1, 1]>,
+  //
+  // Quad-register Integer Shift
+  InstrItinData<IIC_VSHLiQ,   [InstrStage<1, [A9_DRegsN],   0, Required>,
+                               // Extra latency cycles since wbck is 6 cycles
+                               InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
+                               InstrStage<1, [A9_Pipe0, A9_Pipe1]>,
+                               InstrStage<1, [A9_NPipe]>], [3, 1, 1]>,
+  //
+  // Double-register Integer Shift (4 cycle)
+  InstrItinData<IIC_VSHLi4D,  [InstrStage<1, [A9_DRegsN],   0, Required>,
+                               // Extra latency cycles since wbck is 6 cycles
+                               InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
+                               InstrStage<1, [A9_Pipe0, A9_Pipe1]>,
+                               InstrStage<1, [A9_NPipe]>], [4, 1, 1]>,
+  //
+  // Quad-register Integer Shift (4 cycle)
+  InstrItinData<IIC_VSHLi4Q,  [InstrStage<1, [A9_DRegsN],   0, Required>,
+                               // Extra latency cycles since wbck is 6 cycles
+                               InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
+                               InstrStage<1, [A9_Pipe0, A9_Pipe1]>,
+                               InstrStage<1, [A9_NPipe]>], [4, 1, 1]>,
+  //
+  // Double-register Integer Binary (4 cycle)
+  InstrItinData<IIC_VBINi4D,  [InstrStage<1, [A9_DRegsN],   0, Required>,
+                               // Extra latency cycles since wbck is 6 cycles
+                               InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
+                               InstrStage<1, [A9_Pipe0, A9_Pipe1]>,
+                               InstrStage<1, [A9_NPipe]>], [4, 2, 2]>,
+  //
+  // Quad-register Integer Binary (4 cycle)
+  InstrItinData<IIC_VBINi4Q,  [InstrStage<1, [A9_DRegsN],   0, Required>,
+                               // Extra latency cycles since wbck is 6 cycles
+                               InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
+                               InstrStage<1, [A9_Pipe0, A9_Pipe1]>,
+                               InstrStage<1, [A9_NPipe]>], [4, 2, 2]>,
+  //
+  // Double-register Integer Subtract (4 cycle)
+  InstrItinData<IIC_VSUBiD,   [InstrStage<1, [A9_DRegsN],   0, Required>,
+                               // Extra latency cycles since wbck is 6 cycles
+                               InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
+                               InstrStage<1, [A9_Pipe0, A9_Pipe1]>,
+                               InstrStage<1, [A9_NPipe]>], [4, 2, 1]>,
+  //
+  // Quad-register Integer Subtract (4 cycle)
+  InstrItinData<IIC_VSUBiQ,   [InstrStage<1, [A9_DRegsN],   0, Required>,
+                               // Extra latency cycles since wbck is 6 cycles
+                               InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
+                               InstrStage<1, [A9_Pipe0, A9_Pipe1]>,
+                               InstrStage<1, [A9_NPipe]>], [4, 2, 1]>,
+
+  //
+  // Double-register Integer Count
+  InstrItinData<IIC_VCNTiD,   [InstrStage<1, [A9_DRegsN],   0, Required>,
+                               // Extra latency cycles since wbck is 6 cycles
+                               InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
+                               InstrStage<1, [A9_Pipe0, A9_Pipe1]>,
+                               InstrStage<1, [A9_NPipe]>], [3, 2, 2]>,
+  //
+  // Quad-register Integer Count
+  // Result written in N3, but that is relative to the last cycle of multicycle,
+  // so we use 4 for those cases
+  InstrItinData<IIC_VCNTiQ,   [InstrStage<1, [A9_DRegsN],   0, Required>,
+                               // Extra latency cycles since wbck is 7 cycles
+                               InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
+                               InstrStage<1, [A9_Pipe0, A9_Pipe1]>,
+                               InstrStage<2, [A9_NPipe]>], [4, 2, 2]>,
+  //
+  // Double-register Absolute Difference and Accumulate
+  InstrItinData<IIC_VABAD,    [InstrStage<1, [A9_DRegsN],   0, Required>,
+                               // Extra latency cycles since wbck is 6 cycles
+                               InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
+                               InstrStage<1, [A9_Pipe0, A9_Pipe1]>,
+                               InstrStage<1, [A9_NPipe]>], [6, 3, 2, 1]>,
+  //
+  // Quad-register Absolute Difference and Accumulate
+  InstrItinData<IIC_VABAQ,    [InstrStage<1, [A9_DRegsN],   0, Required>,
+                               // Extra latency cycles since wbck is 6 cycles
+                               InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
+                               InstrStage<1, [A9_Pipe0, A9_Pipe1]>,
+                               InstrStage<2, [A9_NPipe]>], [6, 3, 2, 1]>,
+  //
+  // Double-register Integer Pair Add Long
+  InstrItinData<IIC_VPALiD,   [InstrStage<1, [A9_DRegsN],   0, Required>,
+                               // Extra latency cycles since wbck is 6 cycles
+                               InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
+                               InstrStage<1, [A9_Pipe0, A9_Pipe1]>,
+                               InstrStage<1, [A9_NPipe]>], [6, 3, 1]>,
+  //
+  // Quad-register Integer Pair Add Long
+  InstrItinData<IIC_VPALiQ,   [InstrStage<1, [A9_DRegsN],   0, Required>,
+                               // Extra latency cycles since wbck is 6 cycles
+                               InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
+                               InstrStage<1, [A9_Pipe0, A9_Pipe1]>,
+                               InstrStage<2, [A9_NPipe]>], [6, 3, 1]>,
+
+  //
+  // Double-register Integer Multiply (.8, .16)
+  InstrItinData<IIC_VMULi16D, [InstrStage<1, [A9_DRegsN],   0, Required>,
+                               // Extra latency cycles since wbck is 6 cycles
+                               InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
+                               InstrStage<1, [A9_Pipe0, A9_Pipe1]>,
+                               InstrStage<1, [A9_NPipe]>], [6, 2, 2]>,
+  //
+  // Quad-register Integer Multiply (.8, .16)
+  InstrItinData<IIC_VMULi16Q, [InstrStage<1, [A9_DRegsN],   0, Required>,
+                               // Extra latency cycles since wbck is 7 cycles
+                               InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
+                               InstrStage<1, [A9_Pipe0, A9_Pipe1]>,
+                               InstrStage<2, [A9_NPipe]>], [7, 2, 2]>,
+
+  //
+  // Double-register Integer Multiply (.32)
+  InstrItinData<IIC_VMULi32D, [InstrStage<1, [A9_DRegsN],   0, Required>,
+                               // Extra latency cycles since wbck is 7 cycles
+                               InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
+                               InstrStage<1, [A9_Pipe0, A9_Pipe1]>,
+                               InstrStage<2, [A9_NPipe]>], [7, 2, 1]>,
+  //
+  // Quad-register Integer Multiply (.32)
+  InstrItinData<IIC_VMULi32Q, [InstrStage<1, [A9_DRegsN],   0, Required>,
+                               // Extra latency cycles since wbck is 9 cycles
+                               InstrStage<10, [A9_DRegsVFP], 0, Reserved>,
+                               InstrStage<1, [A9_Pipe0, A9_Pipe1]>,
+                               InstrStage<4, [A9_NPipe]>], [9, 2, 1]>,
+  //
+  // Double-register Integer Multiply-Accumulate (.8, .16)
+  InstrItinData<IIC_VMACi16D, [InstrStage<1, [A9_DRegsN],   0, Required>,
+                               // Extra latency cycles since wbck is 6 cycles
+                               InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
+                               InstrStage<1, [A9_Pipe0, A9_Pipe1]>,
+                               InstrStage<1, [A9_NPipe]>], [6, 3, 2, 2]>,
+  //
+  // Double-register Integer Multiply-Accumulate (.32)
+  InstrItinData<IIC_VMACi32D, [InstrStage<1, [A9_DRegsN],   0, Required>,
+                               // Extra latency cycles since wbck is 7 cycles
+                               InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
+                               InstrStage<1, [A9_Pipe0, A9_Pipe1]>,
+                               InstrStage<2, [A9_NPipe]>], [7, 3, 2, 1]>,
+  //
+  // Quad-register Integer Multiply-Accumulate (.8, .16)
+  InstrItinData<IIC_VMACi16Q, [InstrStage<1, [A9_DRegsN],   0, Required>,
+                               // Extra latency cycles since wbck is 7 cycles
+                               InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
+                               InstrStage<1, [A9_Pipe0, A9_Pipe1]>,
+                               InstrStage<2, [A9_NPipe]>], [7, 3, 2, 2]>,
+  //
+  // Quad-register Integer Multiply-Accumulate (.32)
+  InstrItinData<IIC_VMACi32Q, [InstrStage<1, [A9_DRegsN],   0, Required>,
+                               // Extra latency cycles since wbck is 9 cycles
+                               InstrStage<10, [A9_DRegsVFP], 0, Reserved>,
+                               InstrStage<1, [A9_Pipe0, A9_Pipe1]>,
+                               InstrStage<4, [A9_NPipe]>], [9, 3, 2, 1]>,
+  //
+  // Move Immediate
+  InstrItinData<IIC_VMOVImm,  [InstrStage<1, [A9_DRegsN],   0, Required>,
+                               // Extra latency cycles since wbck is 6 cycles
+                               InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
+                               InstrStage<1, [A9_Pipe0, A9_Pipe1]>,
+                               InstrStage<1, [A9_NPipe]>], [3]>,
+  //
+  // Double-register Permute Move
+  InstrItinData<IIC_VMOVD,    [InstrStage<1, [A9_DRegsN],   0, Required>,
+  // FIXME: all latencies are arbitrary, no information is available
+                               InstrStage<3, [A9_DRegsVFP], 0, Reserved>,
+                               InstrStage<1, [A9_Pipe0, A9_Pipe1]>,
+                               InstrStage<1, [A9_LSPipe]>], [2, 1]>,
+  //
+  // Quad-register Permute Move
+  // Result written in N2, but that is relative to the last cycle of multicycle,
+  // so we use 3 for those cases
+  InstrItinData<IIC_VMOVQ,    [InstrStage<1, [A9_DRegsN],   0, Required>,
+  // FIXME: all latencies are arbitrary, no information is available
+                               InstrStage<4, [A9_DRegsVFP], 0, Reserved>,
+                               InstrStage<1, [A9_Pipe0, A9_Pipe1]>,
+                               InstrStage<2, [A9_NPipe]>], [3, 1]>,
+  //
+  // Integer to Single-precision Move
+  InstrItinData<IIC_VMOVIS ,  [InstrStage<1, [A9_DRegsN],   0, Required>,
+  // FIXME: all latencies are arbitrary, no information is available
+                               InstrStage<3, [A9_DRegsVFP], 0, Reserved>,
+                               InstrStage<1, [A9_Pipe0, A9_Pipe1]>,
+                               InstrStage<1, [A9_NPipe]>], [2, 1]>,
+  //
+  // Integer to Double-precision Move
+  InstrItinData<IIC_VMOVID ,  [InstrStage<1, [A9_DRegsN],   0, Required>,
+  // FIXME: all latencies are arbitrary, no information is available
+                               InstrStage<3, [A9_DRegsVFP], 0, Reserved>,
+                               InstrStage<1, [A9_Pipe0, A9_Pipe1]>,
+                               InstrStage<1, [A9_NPipe]>], [2, 1, 1]>,
+  //
+  // Single-precision to Integer Move
+  InstrItinData<IIC_VMOVSI ,  [InstrStage<1, [A9_DRegsN],   0, Required>,
+  // FIXME: all latencies are arbitrary, no information is available
+                               InstrStage<3, [A9_DRegsVFP], 0, Reserved>,
+                               InstrStage<1, [A9_Pipe0, A9_Pipe1]>,
+                               InstrStage<1, [A9_NPipe]>], [2, 1]>,
+  //
+  // Double-precision to Integer Move
+  InstrItinData<IIC_VMOVDI ,  [InstrStage<1, [A9_DRegsN],   0, Required>,
+  // FIXME: all latencies are arbitrary, no information is available
+                               InstrStage<3, [A9_DRegsVFP], 0, Reserved>,
+                               InstrStage<1, [A9_Pipe0, A9_Pipe1]>,
+                               InstrStage<1, [A9_NPipe]>], [2, 2, 1]>,
+  //
+  // Integer to Lane Move
+  InstrItinData<IIC_VMOVISL , [InstrStage<1, [A9_DRegsN],   0, Required>,
+  // FIXME: all latencies are arbitrary, no information is available
+                               InstrStage<4, [A9_DRegsVFP], 0, Reserved>,
+                               InstrStage<1, [A9_Pipe0, A9_Pipe1]>,
+                               InstrStage<2, [A9_NPipe]>], [3, 1, 1]>,
+
+  //
+  // Double-register FP Unary
+  InstrItinData<IIC_VUNAD,    [InstrStage<1, [A9_DRegsN],   0, Required>,
+                               // Extra latency cycles since wbck is 6 cycles
+                               InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
+                               InstrStage<1, [A9_Pipe0, A9_Pipe1]>,
+                               InstrStage<1, [A9_NPipe]>], [5, 2]>,
+  //
+  // Quad-register FP Unary
+  // Result written in N5, but that is relative to the last cycle of multicycle,
+  // so we use 6 for those cases
+  InstrItinData<IIC_VUNAQ,    [InstrStage<1, [A9_DRegsN],   0, Required>,
+                               // Extra latency cycles since wbck is 7 cycles
+                               InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
+                               InstrStage<1, [A9_Pipe0, A9_Pipe1]>,
+                               InstrStage<2, [A9_NPipe]>], [6, 2]>,
+  //
+  // Double-register FP Binary
+  // FIXME: We're using this itin for many instructions and [2, 2] here is too
+  // optimistic.
+  InstrItinData<IIC_VBIND,    [InstrStage<1, [A9_DRegsN],   0, Required>,
+                               // Extra latency cycles since wbck is 7 cycles
+                               InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
+                               InstrStage<1, [A9_Pipe0, A9_Pipe1]>,
+                               InstrStage<1, [A9_NPipe]>], [5, 2, 2]>,
+  //
+  // Quad-register FP Binary
+  // Result written in N5, but that is relative to the last cycle of multicycle,
+  // so we use 6 for those cases
+  // FIXME: We're using this itin for many instructions and [2, 2] here is too
+  // optimistic.
+  InstrItinData<IIC_VBINQ,    [InstrStage<1, [A9_DRegsN],   0, Required>,
+                               // Extra latency cycles since wbck is 8 cycles
+                               InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
+                               InstrStage<1, [A9_Pipe0, A9_Pipe1]>,
+                               InstrStage<2, [A9_NPipe]>], [6, 2, 2]>,
+  //
+  // Double-register FP Multiple-Accumulate
+  InstrItinData<IIC_VMACD,    [InstrStage<1, [A9_DRegsN],   0, Required>,
+                               // Extra latency cycles since wbck is 7 cycles
+                               InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
+                               InstrStage<1, [A9_Pipe0, A9_Pipe1]>,
+                               InstrStage<2, [A9_NPipe]>], [6, 3, 2, 1]>,
+  //
+  // Quad-register FP Multiple-Accumulate
+  // Result written in N9, but that is relative to the last cycle of multicycle,
+  // so we use 10 for those cases
+  InstrItinData<IIC_VMACQ,    [InstrStage<1, [A9_DRegsN],   0, Required>,
+                               // Extra latency cycles since wbck is 9 cycles
+                               InstrStage<10, [A9_DRegsVFP], 0, Reserved>,
+                               InstrStage<1, [A9_Pipe0, A9_Pipe1]>,
+                               InstrStage<4, [A9_NPipe]>], [8, 4, 2, 1]>,
+  //
+  // Double-register Reciprical Step
+  InstrItinData<IIC_VRECSD,   [InstrStage<1, [A9_DRegsN],   0, Required>,
+                               // Extra latency cycles since wbck is 7 cycles
+                               InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
+                               InstrStage<1, [A9_Pipe0, A9_Pipe1]>,
+                               InstrStage<2, [A9_NPipe]>], [6, 2, 2]>,
+  //
+  // Quad-register Reciprical Step
+  InstrItinData<IIC_VRECSQ,   [InstrStage<1, [A9_DRegsN],   0, Required>,
+                               // Extra latency cycles since wbck is 9 cycles
+                               InstrStage<10, [A9_DRegsVFP], 0, Reserved>,
+                               InstrStage<1, [A9_Pipe0, A9_Pipe1]>,
+                               InstrStage<4, [A9_NPipe]>], [8, 2, 2]>,
+  //
+  // Double-register Permute
+  InstrItinData<IIC_VPERMD,   [InstrStage<1, [A9_DRegsN],   0, Required>,
+                               // Extra latency cycles since wbck is 6 cycles
+                               InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
+                               InstrStage<1, [A9_Pipe0, A9_Pipe1]>,
+                               InstrStage<1, [A9_NPipe]>], [2, 2, 1, 1]>,
+  //
+  // Quad-register Permute
+  // Result written in N2, but that is relative to the last cycle of multicycle,
+  // so we use 3 for those cases
+  InstrItinData<IIC_VPERMQ,   [InstrStage<1, [A9_DRegsN],   0, Required>,
+                               // Extra latency cycles since wbck is 7 cycles
+                               InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
+                               InstrStage<1, [A9_Pipe0, A9_Pipe1]>,
+                               InstrStage<2, [A9_NPipe]>], [3, 3, 1, 1]>,
+  //
+  // Quad-register Permute (3 cycle issue)
+  // Result written in N2, but that is relative to the last cycle of multicycle,
+  // so we use 4 for those cases
+  InstrItinData<IIC_VPERMQ3,  [InstrStage<1, [A9_DRegsN],   0, Required>,
+                               // Extra latency cycles since wbck is 8 cycles
+                               InstrStage<9, [A9_DRegsVFP], 0, Reserved>,
+                               InstrStage<1, [A9_Pipe0, A9_Pipe1]>,
+                               InstrStage<3, [A9_LSPipe]>], [4, 4, 1, 1]>,
+
+  //
+  // Double-register VEXT
+  InstrItinData<IIC_VEXTD,    [InstrStage<1, [A9_DRegsN],   0, Required>,
+                               // Extra latency cycles since wbck is 7 cycles
+                               InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
+                               InstrStage<1, [A9_Pipe0, A9_Pipe1]>,
+                               InstrStage<1, [A9_NPipe]>], [2, 1, 1]>,
+  //
+  // Quad-register VEXT
+  InstrItinData<IIC_VEXTQ,    [InstrStage<1, [A9_DRegsN],   0, Required>,
+                               // Extra latency cycles since wbck is 9 cycles
+                               InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
+                               InstrStage<1, [A9_Pipe0, A9_Pipe1]>,
+                               InstrStage<2, [A9_NPipe]>], [3, 1, 1]>,
+  //
+  // VTB
+  InstrItinData<IIC_VTB1,     [InstrStage<1, [A9_DRegsN],   0, Required>,
+                               // Extra latency cycles since wbck is 7 cycles
+                               InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
+                               InstrStage<1, [A9_Pipe0, A9_Pipe1]>,
+                               InstrStage<2, [A9_NPipe]>], [3, 2, 1]>,
+  InstrItinData<IIC_VTB2,     [InstrStage<2, [A9_DRegsN],   0, Required>,
+                               // Extra latency cycles since wbck is 7 cycles
+                               InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
+                               InstrStage<1, [A9_Pipe0, A9_Pipe1]>,
+                               InstrStage<2, [A9_NPipe]>], [3, 2, 2, 1]>,
+  InstrItinData<IIC_VTB3,     [InstrStage<2, [A9_DRegsN],   0, Required>,
+                               // Extra latency cycles since wbck is 8 cycles
+                               InstrStage<9, [A9_DRegsVFP], 0, Reserved>,
+                               InstrStage<1, [A9_Pipe0, A9_Pipe1]>,
+                               InstrStage<3, [A9_NPipe]>], [4, 2, 2, 3, 1]>,
+  InstrItinData<IIC_VTB4,     [InstrStage<1, [A9_DRegsN],   0, Required>,
+                               // Extra latency cycles since wbck is 8 cycles
+                               InstrStage<9, [A9_DRegsVFP], 0, Reserved>,
+                               InstrStage<1, [A9_Pipe0, A9_Pipe1]>,
+                               InstrStage<3, [A9_NPipe]>], [4, 2, 2, 3, 3, 1]>,
+  //
+  // VTBX
+  InstrItinData<IIC_VTBX1,    [InstrStage<1, [A9_DRegsN],   0, Required>,
+                               // Extra latency cycles since wbck is 7 cycles
+                               InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
+                               InstrStage<1, [A9_Pipe0, A9_Pipe1]>,
+                               InstrStage<2, [A9_NPipe]>], [3, 1, 2, 1]>,
+  InstrItinData<IIC_VTBX2,    [InstrStage<1, [A9_DRegsN],   0, Required>,
+                               // Extra latency cycles since wbck is 7 cycles
+                               InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
+                               InstrStage<1, [A9_Pipe0, A9_Pipe1]>,
+                               InstrStage<2, [A9_NPipe]>], [3, 1, 2, 2, 1]>,
+  InstrItinData<IIC_VTBX3,    [InstrStage<1, [A9_DRegsN],   0, Required>,
+                               // Extra latency cycles since wbck is 8 cycles
+                               InstrStage<9, [A9_DRegsVFP], 0, Reserved>,
+                               InstrStage<1, [A9_Pipe0, A9_Pipe1]>,
+                               InstrStage<3, [A9_NPipe]>], [4, 1, 2, 2, 3, 1]>,
+  InstrItinData<IIC_VTBX4,    [InstrStage<1, [A9_DRegsN],   0, Required>,
+                               // Extra latency cycles since wbck is 8 cycles
+                               InstrStage<9, [A9_DRegsVFP], 0, Reserved>,
+                               InstrStage<1, [A9_Pipe0, A9_Pipe1]>,
+                               InstrStage<2, [A9_NPipe]>], [4, 1, 2, 2, 3, 3, 1]>
+]>;
diff --git a/lib/Target/ARM/ARMScheduleV6.td b/lib/Target/ARM/ARMScheduleV6.td
index 0fef466ad18c..f813022d8741 100644
--- a/lib/Target/ARM/ARMScheduleV6.td
+++ b/lib/Target/ARM/ARMScheduleV6.td
@@ -13,103 +13,107 @@
 
 // Model based on ARM1176
 //
+// Functional Units
+def V6_Pipe : FuncUnit; // pipeline
+
 // Scheduling information derived from "ARM1176JZF-S Technical Reference Manual".
 //
-def ARMV6Itineraries : ProcessorItineraries<[
+def ARMV6Itineraries : ProcessorItineraries<
+  [V6_Pipe], [
   //
   // No operand cycles
-  InstrItinData<IIC_iALUx    , [InstrStage<1, [FU_Pipe0]>]>,
+  InstrItinData<IIC_iALUx    , [InstrStage<1, [V6_Pipe]>]>,
   //
   // Binary Instructions that produce a result
-  InstrItinData<IIC_iALUi    , [InstrStage<1, [FU_Pipe0]>], [2, 2]>,
-  InstrItinData<IIC_iALUr    , [InstrStage<1, [FU_Pipe0]>], [2, 2, 2]>,
-  InstrItinData<IIC_iALUsi   , [InstrStage<1, [FU_Pipe0]>], [2, 2, 1]>,
-  InstrItinData<IIC_iALUsr   , [InstrStage<2, [FU_Pipe0]>], [3, 3, 2, 1]>,
+  InstrItinData<IIC_iALUi    , [InstrStage<1, [V6_Pipe]>], [2, 2]>,
+  InstrItinData<IIC_iALUr    , [InstrStage<1, [V6_Pipe]>], [2, 2, 2]>,
+  InstrItinData<IIC_iALUsi   , [InstrStage<1, [V6_Pipe]>], [2, 2, 1]>,
+  InstrItinData<IIC_iALUsr   , [InstrStage<2, [V6_Pipe]>], [3, 3, 2, 1]>,
   //
   // Unary Instructions that produce a result
-  InstrItinData<IIC_iUNAr    , [InstrStage<1, [FU_Pipe0]>], [2, 2]>,
-  InstrItinData<IIC_iUNAsi   , [InstrStage<1, [FU_Pipe0]>], [2, 1]>,
-  InstrItinData<IIC_iUNAsr   , [InstrStage<2, [FU_Pipe0]>], [3, 2, 1]>,
+  InstrItinData<IIC_iUNAr    , [InstrStage<1, [V6_Pipe]>], [2, 2]>,
+  InstrItinData<IIC_iUNAsi   , [InstrStage<1, [V6_Pipe]>], [2, 1]>,
+  InstrItinData<IIC_iUNAsr   , [InstrStage<2, [V6_Pipe]>], [3, 2, 1]>,
   //
   // Compare instructions
-  InstrItinData<IIC_iCMPi    , [InstrStage<1, [FU_Pipe0]>], [2]>,
-  InstrItinData<IIC_iCMPr    , [InstrStage<1, [FU_Pipe0]>], [2, 2]>,
-  InstrItinData<IIC_iCMPsi   , [InstrStage<1, [FU_Pipe0]>], [2, 1]>,
-  InstrItinData<IIC_iCMPsr   , [InstrStage<2, [FU_Pipe0]>], [3, 2, 1]>,
+  InstrItinData<IIC_iCMPi    , [InstrStage<1, [V6_Pipe]>], [2]>,
+  InstrItinData<IIC_iCMPr    , [InstrStage<1, [V6_Pipe]>], [2, 2]>,
+  InstrItinData<IIC_iCMPsi   , [InstrStage<1, [V6_Pipe]>], [2, 1]>,
+  InstrItinData<IIC_iCMPsr   , [InstrStage<2, [V6_Pipe]>], [3, 2, 1]>,
   //
   // Move instructions, unconditional
-  InstrItinData<IIC_iMOVi    , [InstrStage<1, [FU_Pipe0]>], [2]>,
-  InstrItinData<IIC_iMOVr    , [InstrStage<1, [FU_Pipe0]>], [2, 2]>,
-  InstrItinData<IIC_iMOVsi   , [InstrStage<1, [FU_Pipe0]>], [2, 1]>,
-  InstrItinData<IIC_iMOVsr   , [InstrStage<2, [FU_Pipe0]>], [3, 2, 1]>,
+  InstrItinData<IIC_iMOVi    , [InstrStage<1, [V6_Pipe]>], [2]>,
+  InstrItinData<IIC_iMOVr    , [InstrStage<1, [V6_Pipe]>], [2, 2]>,
+  InstrItinData<IIC_iMOVsi   , [InstrStage<1, [V6_Pipe]>], [2, 1]>,
+  InstrItinData<IIC_iMOVsr   , [InstrStage<2, [V6_Pipe]>], [3, 2, 1]>,
   //
   // Move instructions, conditional
-  InstrItinData<IIC_iCMOVi   , [InstrStage<1, [FU_Pipe0]>], [3]>,
-  InstrItinData<IIC_iCMOVr   , [InstrStage<1, [FU_Pipe0]>], [3, 2]>,
-  InstrItinData<IIC_iCMOVsi  , [InstrStage<1, [FU_Pipe0]>], [3, 1]>,
-  InstrItinData<IIC_iCMOVsr  , [InstrStage<1, [FU_Pipe0]>], [4, 2, 1]>,
+  InstrItinData<IIC_iCMOVi   , [InstrStage<1, [V6_Pipe]>], [3]>,
+  InstrItinData<IIC_iCMOVr   , [InstrStage<1, [V6_Pipe]>], [3, 2]>,
+  InstrItinData<IIC_iCMOVsi  , [InstrStage<1, [V6_Pipe]>], [3, 1]>,
+  InstrItinData<IIC_iCMOVsr  , [InstrStage<1, [V6_Pipe]>], [4, 2, 1]>,
 
   // Integer multiply pipeline
   //
-  InstrItinData<IIC_iMUL16   , [InstrStage<1, [FU_Pipe0]>], [4, 1, 1]>,
-  InstrItinData<IIC_iMAC16   , [InstrStage<1, [FU_Pipe0]>], [4, 1, 1, 2]>,
-  InstrItinData<IIC_iMUL32   , [InstrStage<2, [FU_Pipe0]>], [5, 1, 1]>,
-  InstrItinData<IIC_iMAC32   , [InstrStage<2, [FU_Pipe0]>], [5, 1, 1, 2]>,
-  InstrItinData<IIC_iMUL64   , [InstrStage<3, [FU_Pipe0]>], [6, 1, 1]>,
-  InstrItinData<IIC_iMAC64   , [InstrStage<3, [FU_Pipe0]>], [6, 1, 1, 2]>,
+  InstrItinData<IIC_iMUL16   , [InstrStage<1, [V6_Pipe]>], [4, 1, 1]>,
+  InstrItinData<IIC_iMAC16   , [InstrStage<1, [V6_Pipe]>], [4, 1, 1, 2]>,
+  InstrItinData<IIC_iMUL32   , [InstrStage<2, [V6_Pipe]>], [5, 1, 1]>,
+  InstrItinData<IIC_iMAC32   , [InstrStage<2, [V6_Pipe]>], [5, 1, 1, 2]>,
+  InstrItinData<IIC_iMUL64   , [InstrStage<3, [V6_Pipe]>], [6, 1, 1]>,
+  InstrItinData<IIC_iMAC64   , [InstrStage<3, [V6_Pipe]>], [6, 1, 1, 2]>,
   
   // Integer load pipeline
   //
   // Immediate offset
-  InstrItinData<IIC_iLoadi   , [InstrStage<1, [FU_Pipe0]>], [4, 1]>,
+  InstrItinData<IIC_iLoadi   , [InstrStage<1, [V6_Pipe]>], [4, 1]>,
   //
   // Register offset
-  InstrItinData<IIC_iLoadr   , [InstrStage<1, [FU_Pipe0]>], [4, 1, 1]>,
+  InstrItinData<IIC_iLoadr   , [InstrStage<1, [V6_Pipe]>], [4, 1, 1]>,
   //
   // Scaled register offset, issues over 2 cycles
-  InstrItinData<IIC_iLoadsi  , [InstrStage<2, [FU_Pipe0]>], [5, 2, 1]>,
+  InstrItinData<IIC_iLoadsi  , [InstrStage<2, [V6_Pipe]>], [5, 2, 1]>,
   //
   // Immediate offset with update
-  InstrItinData<IIC_iLoadiu  , [InstrStage<1, [FU_Pipe0]>], [4, 2, 1]>,
+  InstrItinData<IIC_iLoadiu  , [InstrStage<1, [V6_Pipe]>], [4, 2, 1]>,
   //
   // Register offset with update
-  InstrItinData<IIC_iLoadru  , [InstrStage<1, [FU_Pipe0]>], [4, 2, 1, 1]>,
+  InstrItinData<IIC_iLoadru  , [InstrStage<1, [V6_Pipe]>], [4, 2, 1, 1]>,
   //
   // Scaled register offset with update, issues over 2 cycles
-  InstrItinData<IIC_iLoadsiu , [InstrStage<2, [FU_Pipe0]>], [5, 2, 2, 1]>,
+  InstrItinData<IIC_iLoadsiu , [InstrStage<2, [V6_Pipe]>], [5, 2, 2, 1]>,
 
   //
   // Load multiple
-  InstrItinData<IIC_iLoadm   , [InstrStage<3, [FU_Pipe0]>]>,
+  InstrItinData<IIC_iLoadm   , [InstrStage<3, [V6_Pipe]>]>,
 
   // Integer store pipeline
   //
   // Immediate offset
-  InstrItinData<IIC_iStorei  , [InstrStage<1, [FU_Pipe0]>], [2, 1]>,
+  InstrItinData<IIC_iStorei  , [InstrStage<1, [V6_Pipe]>], [2, 1]>,
   //
   // Register offset
-  InstrItinData<IIC_iStorer  , [InstrStage<1, [FU_Pipe0]>], [2, 1, 1]>,
+  InstrItinData<IIC_iStorer  , [InstrStage<1, [V6_Pipe]>], [2, 1, 1]>,
 
   //
   // Scaled register offset, issues over 2 cycles
-  InstrItinData<IIC_iStoresi , [InstrStage<2, [FU_Pipe0]>], [2, 2, 1]>,
+  InstrItinData<IIC_iStoresi , [InstrStage<2, [V6_Pipe]>], [2, 2, 1]>,
   //
   // Immediate offset with update
-  InstrItinData<IIC_iStoreiu , [InstrStage<1, [FU_Pipe0]>], [2, 2, 1]>,
+  InstrItinData<IIC_iStoreiu , [InstrStage<1, [V6_Pipe]>], [2, 2, 1]>,
   //
   // Register offset with update
-  InstrItinData<IIC_iStoreru , [InstrStage<1, [FU_Pipe0]>], [2, 2, 1, 1]>,
+  InstrItinData<IIC_iStoreru , [InstrStage<1, [V6_Pipe]>], [2, 2, 1, 1]>,
   //
   // Scaled register offset with update, issues over 2 cycles
-  InstrItinData<IIC_iStoresiu, [InstrStage<2, [FU_Pipe0]>], [2, 2, 2, 1]>,
+  InstrItinData<IIC_iStoresiu, [InstrStage<2, [V6_Pipe]>], [2, 2, 2, 1]>,
   //
   // Store multiple
-  InstrItinData<IIC_iStorem   , [InstrStage<3, [FU_Pipe0]>]>,
+  InstrItinData<IIC_iStorem   , [InstrStage<3, [V6_Pipe]>]>,
   
   // Branch
   //
   // no delay slots, so the latency of a branch is unimportant
-  InstrItinData<IIC_Br      , [InstrStage<1, [FU_Pipe0]>]>,
+  InstrItinData<IIC_Br      , [InstrStage<1, [V6_Pipe]>]>,
 
   // VFP
   // Issue through integer pipeline, and execute in NEON unit. We assume
@@ -117,84 +121,84 @@ def ARMV6Itineraries : ProcessorItineraries<[
   // possible.
   //
   // FP Special Register to Integer Register File Move
-  InstrItinData<IIC_fpSTAT , [InstrStage<1, [FU_Pipe0]>], [3]>,
+  InstrItinData<IIC_fpSTAT , [InstrStage<1, [V6_Pipe]>], [3]>,
   //
   // Single-precision FP Unary
-  InstrItinData<IIC_fpUNA32 , [InstrStage<1, [FU_Pipe0]>], [5, 2]>,
+  InstrItinData<IIC_fpUNA32 , [InstrStage<1, [V6_Pipe]>], [5, 2]>,
   //
   // Double-precision FP Unary
-  InstrItinData<IIC_fpUNA64 , [InstrStage<1, [FU_Pipe0]>], [5, 2]>,
+  InstrItinData<IIC_fpUNA64 , [InstrStage<1, [V6_Pipe]>], [5, 2]>,
   //
   // Single-precision FP Compare
-  InstrItinData<IIC_fpCMP32 , [InstrStage<1, [FU_Pipe0]>], [2, 2]>,
+  InstrItinData<IIC_fpCMP32 , [InstrStage<1, [V6_Pipe]>], [2, 2]>,
   //
   // Double-precision FP Compare
-  InstrItinData<IIC_fpCMP64 , [InstrStage<1, [FU_Pipe0]>], [2, 2]>,
+  InstrItinData<IIC_fpCMP64 , [InstrStage<1, [V6_Pipe]>], [2, 2]>,
   //
   // Single to Double FP Convert
-  InstrItinData<IIC_fpCVTSD , [InstrStage<1, [FU_Pipe0]>], [5, 2]>,
+  InstrItinData<IIC_fpCVTSD , [InstrStage<1, [V6_Pipe]>], [5, 2]>,
   //
   // Double to Single FP Convert
-  InstrItinData<IIC_fpCVTDS , [InstrStage<1, [FU_Pipe0]>], [5, 2]>,
+  InstrItinData<IIC_fpCVTDS , [InstrStage<1, [V6_Pipe]>], [5, 2]>,
   //
   // Single-Precision FP to Integer Convert
-  InstrItinData<IIC_fpCVTSI , [InstrStage<1, [FU_Pipe0]>], [9, 2]>,
+  InstrItinData<IIC_fpCVTSI , [InstrStage<1, [V6_Pipe]>], [9, 2]>,
   //
   // Double-Precision FP to Integer Convert
-  InstrItinData<IIC_fpCVTDI , [InstrStage<1, [FU_Pipe0]>], [9, 2]>,
+  InstrItinData<IIC_fpCVTDI , [InstrStage<1, [V6_Pipe]>], [9, 2]>,
   //
   // Integer to Single-Precision FP Convert
-  InstrItinData<IIC_fpCVTIS , [InstrStage<1, [FU_Pipe0]>], [9, 2]>,
+  InstrItinData<IIC_fpCVTIS , [InstrStage<1, [V6_Pipe]>], [9, 2]>,
   //
   // Integer to Double-Precision FP Convert
-  InstrItinData<IIC_fpCVTID , [InstrStage<1, [FU_Pipe0]>], [9, 2]>,
+  InstrItinData<IIC_fpCVTID , [InstrStage<1, [V6_Pipe]>], [9, 2]>,
   //
   // Single-precision FP ALU
-  InstrItinData<IIC_fpALU32 , [InstrStage<1, [FU_Pipe0]>], [9, 2, 2]>,
+  InstrItinData<IIC_fpALU32 , [InstrStage<1, [V6_Pipe]>], [9, 2, 2]>,
   //
   // Double-precision FP ALU
-  InstrItinData<IIC_fpALU64 , [InstrStage<1, [FU_Pipe0]>], [9, 2, 2]>,
+  InstrItinData<IIC_fpALU64 , [InstrStage<1, [V6_Pipe]>], [9, 2, 2]>,
   //
   // Single-precision FP Multiply
-  InstrItinData<IIC_fpMUL32 , [InstrStage<1, [FU_Pipe0]>], [9, 2, 2]>,
+  InstrItinData<IIC_fpMUL32 , [InstrStage<1, [V6_Pipe]>], [9, 2, 2]>,
   //
   // Double-precision FP Multiply
-  InstrItinData<IIC_fpMUL64 , [InstrStage<2, [FU_Pipe0]>], [9, 2, 2]>,
+  InstrItinData<IIC_fpMUL64 , [InstrStage<2, [V6_Pipe]>], [9, 2, 2]>,
   //
   // Single-precision FP MAC
-  InstrItinData<IIC_fpMAC32 , [InstrStage<1, [FU_Pipe0]>], [9, 2, 2, 2]>,
+  InstrItinData<IIC_fpMAC32 , [InstrStage<1, [V6_Pipe]>], [9, 2, 2, 2]>,
   //
   // Double-precision FP MAC
-  InstrItinData<IIC_fpMAC64 , [InstrStage<2, [FU_Pipe0]>], [9, 2, 2, 2]>,
+  InstrItinData<IIC_fpMAC64 , [InstrStage<2, [V6_Pipe]>], [9, 2, 2, 2]>,
   //
   // Single-precision FP DIV
-  InstrItinData<IIC_fpDIV32 , [InstrStage<15, [FU_Pipe0]>], [20, 2, 2]>,
+  InstrItinData<IIC_fpDIV32 , [InstrStage<15, [V6_Pipe]>], [20, 2, 2]>,
   //
   // Double-precision FP DIV
-  InstrItinData<IIC_fpDIV64 , [InstrStage<29, [FU_Pipe0]>], [34, 2, 2]>,
+  InstrItinData<IIC_fpDIV64 , [InstrStage<29, [V6_Pipe]>], [34, 2, 2]>,
   //
   // Single-precision FP SQRT
-  InstrItinData<IIC_fpSQRT32 , [InstrStage<15, [FU_Pipe0]>], [20, 2, 2]>,
+  InstrItinData<IIC_fpSQRT32 , [InstrStage<15, [V6_Pipe]>], [20, 2, 2]>,
   //
   // Double-precision FP SQRT
-  InstrItinData<IIC_fpSQRT64 , [InstrStage<29, [FU_Pipe0]>], [34, 2, 2]>,
+  InstrItinData<IIC_fpSQRT64 , [InstrStage<29, [V6_Pipe]>], [34, 2, 2]>,
   //
   // Single-precision FP Load
-  InstrItinData<IIC_fpLoad32 , [InstrStage<1, [FU_Pipe0]>], [5, 2, 2]>,
+  InstrItinData<IIC_fpLoad32 , [InstrStage<1, [V6_Pipe]>], [5, 2, 2]>,
   //
   // Double-precision FP Load
-  InstrItinData<IIC_fpLoad64 , [InstrStage<1, [FU_Pipe0]>], [5, 2, 2]>,
+  InstrItinData<IIC_fpLoad64 , [InstrStage<1, [V6_Pipe]>], [5, 2, 2]>,
   //
   // FP Load Multiple
-  InstrItinData<IIC_fpLoadm , [InstrStage<3, [FU_Pipe0]>]>,
+  InstrItinData<IIC_fpLoadm , [InstrStage<3, [V6_Pipe]>]>,
   //
   // Single-precision FP Store
-  InstrItinData<IIC_fpStore32 , [InstrStage<1, [FU_Pipe0]>], [2, 2, 2]>,
+  InstrItinData<IIC_fpStore32 , [InstrStage<1, [V6_Pipe]>], [2, 2, 2]>,
   //
   // Double-precision FP Store
   // use FU_Issue to enforce the 1 load/store per cycle limit
-  InstrItinData<IIC_fpStore64 , [InstrStage<1, [FU_Pipe0]>], [2, 2, 2]>,
+  InstrItinData<IIC_fpStore64 , [InstrStage<1, [V6_Pipe]>], [2, 2, 2]>,
   //
   // FP Store Multiple
-  InstrItinData<IIC_fpStorem , [InstrStage<3, [FU_Pipe0]>]>
+  InstrItinData<IIC_fpStorem , [InstrStage<3, [V6_Pipe]>]>
 ]>;
diff --git a/lib/Target/ARM/ARMScheduleV7.td b/lib/Target/ARM/ARMScheduleV7.td
deleted file mode 100644
index bbbf41397566..000000000000
--- a/lib/Target/ARM/ARMScheduleV7.td
+++ /dev/null
@@ -1,587 +0,0 @@
-//===- ARMScheduleV7.td - ARM v7 Scheduling Definitions ----*- tablegen -*-===//
-// 
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-// 
-//===----------------------------------------------------------------------===//
-//
-// This file defines the itinerary class data for the ARM v7 processors.
-//
-//===----------------------------------------------------------------------===//
-
-//
-// Scheduling information derived from "Cortex-A8 Technical Reference Manual".
-//
-// Dual issue pipeline represented by FU_Pipe0 | FU_Pipe1
-//
-def CortexA8Itineraries : ProcessorItineraries<[
-
-  // Two fully-pipelined integer ALU pipelines
-  //
-  // No operand cycles
-  InstrItinData<IIC_iALUx    , [InstrStage<1, [FU_Pipe0, FU_Pipe1]>]>,
-  //
-  // Binary Instructions that produce a result
-  InstrItinData<IIC_iALUi    , [InstrStage<1, [FU_Pipe0, FU_Pipe1]>], [2, 2]>,
-  InstrItinData<IIC_iALUr    , [InstrStage<1, [FU_Pipe0, FU_Pipe1]>], [2, 2, 2]>,
-  InstrItinData<IIC_iALUsi   , [InstrStage<1, [FU_Pipe0, FU_Pipe1]>], [2, 2, 1]>,
-  InstrItinData<IIC_iALUsr   , [InstrStage<1, [FU_Pipe0, FU_Pipe1]>], [2, 2, 1, 1]>,
-  //
-  // Unary Instructions that produce a result
-  InstrItinData<IIC_iUNAr    , [InstrStage<1, [FU_Pipe0, FU_Pipe1]>], [2, 2]>,
-  InstrItinData<IIC_iUNAsi   , [InstrStage<1, [FU_Pipe0, FU_Pipe1]>], [2, 1]>,
-  InstrItinData<IIC_iUNAsr   , [InstrStage<1, [FU_Pipe0, FU_Pipe1]>], [2, 1, 1]>,
-  //
-  // Compare instructions
-  InstrItinData<IIC_iCMPi    , [InstrStage<1, [FU_Pipe0, FU_Pipe1]>], [2]>,
-  InstrItinData<IIC_iCMPr    , [InstrStage<1, [FU_Pipe0, FU_Pipe1]>], [2, 2]>,
-  InstrItinData<IIC_iCMPsi   , [InstrStage<1, [FU_Pipe0, FU_Pipe1]>], [2, 1]>,
-  InstrItinData<IIC_iCMPsr   , [InstrStage<1, [FU_Pipe0, FU_Pipe1]>], [2, 1, 1]>,
-  //
-  // Move instructions, unconditional
-  InstrItinData<IIC_iMOVi    , [InstrStage<1, [FU_Pipe0, FU_Pipe1]>], [1]>,
-  InstrItinData<IIC_iMOVr    , [InstrStage<1, [FU_Pipe0, FU_Pipe1]>], [1, 1]>,
-  InstrItinData<IIC_iMOVsi   , [InstrStage<1, [FU_Pipe0, FU_Pipe1]>], [1, 1]>,
-  InstrItinData<IIC_iMOVsr   , [InstrStage<1, [FU_Pipe0, FU_Pipe1]>], [1, 1, 1]>,
-  //
-  // Move instructions, conditional
-  InstrItinData<IIC_iCMOVi   , [InstrStage<1, [FU_Pipe0, FU_Pipe1]>], [2]>,
-  InstrItinData<IIC_iCMOVr   , [InstrStage<1, [FU_Pipe0, FU_Pipe1]>], [2, 1]>,
-  InstrItinData<IIC_iCMOVsi  , [InstrStage<1, [FU_Pipe0, FU_Pipe1]>], [2, 1]>,
-  InstrItinData<IIC_iCMOVsr  , [InstrStage<1, [FU_Pipe0, FU_Pipe1]>], [2, 1, 1]>,
-
-  // Integer multiply pipeline
-  // Result written in E5, but that is relative to the last cycle of multicycle,
-  // so we use 6 for those cases
-  //
-  InstrItinData<IIC_iMUL16   , [InstrStage<1, [FU_Pipe0]>], [5, 1, 1]>,
-  InstrItinData<IIC_iMAC16   , [InstrStage<1, [FU_Pipe1], 0>, 
-                                InstrStage<2, [FU_Pipe0]>], [6, 1, 1, 4]>,
-  InstrItinData<IIC_iMUL32   , [InstrStage<1, [FU_Pipe1], 0>, 
-                                InstrStage<2, [FU_Pipe0]>], [6, 1, 1]>,
-  InstrItinData<IIC_iMAC32   , [InstrStage<1, [FU_Pipe1], 0>, 
-                                InstrStage<2, [FU_Pipe0]>], [6, 1, 1, 4]>,
-  InstrItinData<IIC_iMUL64   , [InstrStage<2, [FU_Pipe1], 0>, 
-                                InstrStage<3, [FU_Pipe0]>], [6, 6, 1, 1]>,
-  InstrItinData<IIC_iMAC64   , [InstrStage<2, [FU_Pipe1], 0>, 
-                                InstrStage<3, [FU_Pipe0]>], [6, 6, 1, 1]>,
-  
-  // Integer load pipeline
-  //
-  // loads have an extra cycle of latency, but are fully pipelined
-  // use FU_Issue to enforce the 1 load/store per cycle limit
-  //
-  // Immediate offset
-  InstrItinData<IIC_iLoadi   , [InstrStage<1, [FU_Issue], 0>,
-                                InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
-                                InstrStage<1, [FU_LdSt0]>], [3, 1]>,
-  //
-  // Register offset
-  InstrItinData<IIC_iLoadr   , [InstrStage<1, [FU_Issue], 0>,
-                                InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
-                                InstrStage<1, [FU_LdSt0]>], [3, 1, 1]>,
-  //
-  // Scaled register offset, issues over 2 cycles
-  InstrItinData<IIC_iLoadsi  , [InstrStage<2, [FU_Issue], 0>,
-                                InstrStage<1, [FU_Pipe0], 0>,
-                                InstrStage<1, [FU_Pipe1]>,
-                                InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
-                                InstrStage<1, [FU_LdSt0]>], [4, 1, 1]>,
-  //
-  // Immediate offset with update
-  InstrItinData<IIC_iLoadiu  , [InstrStage<1, [FU_Issue], 0>,
-                                InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
-                                InstrStage<1, [FU_LdSt0]>], [3, 2, 1]>,
-  //
-  // Register offset with update
-  InstrItinData<IIC_iLoadru  , [InstrStage<1, [FU_Issue], 0>,
-                                InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
-                                InstrStage<1, [FU_LdSt0]>], [3, 2, 1, 1]>,
-  //
-  // Scaled register offset with update, issues over 2 cycles
-  InstrItinData<IIC_iLoadsiu , [InstrStage<2, [FU_Issue], 0>,
-                                InstrStage<1, [FU_Pipe0], 0>,
-                                InstrStage<1, [FU_Pipe1]>,
-                                InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
-                                InstrStage<1, [FU_LdSt0]>], [4, 3, 1, 1]>,
-  //
-  // Load multiple
-  InstrItinData<IIC_iLoadm   , [InstrStage<2, [FU_Issue], 0>,
-                                InstrStage<2, [FU_Pipe0], 0>,
-                                InstrStage<2, [FU_Pipe1]>,
-                                InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
-                                InstrStage<1, [FU_LdSt0]>]>,
-
-  // Integer store pipeline
-  //
-  // use FU_Issue to enforce the 1 load/store per cycle limit
-  //
-  // Immediate offset
-  InstrItinData<IIC_iStorei  , [InstrStage<1, [FU_Issue], 0>,
-                                InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
-                                InstrStage<1, [FU_LdSt0]>], [3, 1]>,
-  //
-  // Register offset
-  InstrItinData<IIC_iStorer  , [InstrStage<1, [FU_Issue], 0>,
-                                InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
-                                InstrStage<1, [FU_LdSt0]>], [3, 1, 1]>,
-  //
-  // Scaled register offset, issues over 2 cycles
-  InstrItinData<IIC_iStoresi , [InstrStage<2, [FU_Issue], 0>,
-                                InstrStage<1, [FU_Pipe0], 0>,
-                                InstrStage<1, [FU_Pipe1]>,
-                                InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
-                                InstrStage<1, [FU_LdSt0]>], [3, 1, 1]>,
-  //
-  // Immediate offset with update
-  InstrItinData<IIC_iStoreiu , [InstrStage<1, [FU_Issue], 0>,
-                                InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
-                                InstrStage<1, [FU_LdSt0]>], [2, 3, 1]>,
-  //
-  // Register offset with update
-  InstrItinData<IIC_iStoreru  , [InstrStage<1, [FU_Issue], 0>,
-                                InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
-                                InstrStage<1, [FU_LdSt0]>], [2, 3, 1, 1]>,
-  //
-  // Scaled register offset with update, issues over 2 cycles
-  InstrItinData<IIC_iStoresiu, [InstrStage<2, [FU_Issue], 0>,
-                                InstrStage<1, [FU_Pipe0], 0>,
-                                InstrStage<1, [FU_Pipe1]>,
-                                InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
-                                InstrStage<1, [FU_LdSt0]>], [3, 3, 1, 1]>,
-  //
-  // Store multiple
-  InstrItinData<IIC_iStorem  , [InstrStage<2, [FU_Issue], 0>,
-                                InstrStage<2, [FU_Pipe0], 0>,
-                                InstrStage<2, [FU_Pipe1]>,
-                                InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
-                                InstrStage<1, [FU_LdSt0]>]>,
-  
-  // Branch
-  //
-  // no delay slots, so the latency of a branch is unimportant
-  InstrItinData<IIC_Br      , [InstrStage<1, [FU_Pipe0, FU_Pipe1]>]>,
-
-  // VFP
-  // Issue through integer pipeline, and execute in NEON unit. We assume
-  // RunFast mode so that NFP pipeline is used for single-precision when
-  // possible.
-  //
-  // FP Special Register to Integer Register File Move
-  InstrItinData<IIC_fpSTAT , [InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
-                              InstrStage<1, [FU_NLSPipe]>]>,
-  //
-  // Single-precision FP Unary
-  InstrItinData<IIC_fpUNA32 , [InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
-                               InstrStage<1, [FU_NPipe]>], [7, 1]>,
-  //
-  // Double-precision FP Unary
-  InstrItinData<IIC_fpUNA64 , [InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
-                               InstrStage<4, [FU_NPipe], 0>,
-                               InstrStage<4, [FU_NLSPipe]>], [4, 1]>,
-  //
-  // Single-precision FP Compare
-  InstrItinData<IIC_fpCMP32 , [InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
-                               InstrStage<1, [FU_NPipe]>], [1, 1]>,
-  //
-  // Double-precision FP Compare
-  InstrItinData<IIC_fpCMP64 , [InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
-                               InstrStage<4, [FU_NPipe], 0>,
-                               InstrStage<4, [FU_NLSPipe]>], [4, 1]>,
-  //
-  // Single to Double FP Convert
-  InstrItinData<IIC_fpCVTSD , [InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
-                               InstrStage<7, [FU_NPipe], 0>,
-                               InstrStage<7, [FU_NLSPipe]>], [7, 1]>,
-  //
-  // Double to Single FP Convert
-  InstrItinData<IIC_fpCVTDS , [InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
-                               InstrStage<5, [FU_NPipe], 0>,
-                               InstrStage<5, [FU_NLSPipe]>], [5, 1]>,
-  //
-  // Single-Precision FP to Integer Convert
-  InstrItinData<IIC_fpCVTSI , [InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
-                               InstrStage<1, [FU_NPipe]>], [7, 1]>,
-  //
-  // Double-Precision FP to Integer Convert
-  InstrItinData<IIC_fpCVTDI , [InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
-                               InstrStage<8, [FU_NPipe], 0>,
-                               InstrStage<8, [FU_NLSPipe]>], [8, 1]>,
-  //
-  // Integer to Single-Precision FP Convert
-  InstrItinData<IIC_fpCVTIS , [InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
-                               InstrStage<1, [FU_NPipe]>], [7, 1]>,
-  //
-  // Integer to Double-Precision FP Convert
-  InstrItinData<IIC_fpCVTID , [InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
-                               InstrStage<8, [FU_NPipe], 0>,
-                               InstrStage<8, [FU_NLSPipe]>], [8, 1]>,
-  //
-  // Single-precision FP ALU
-  InstrItinData<IIC_fpALU32 , [InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
-                               InstrStage<1, [FU_NPipe]>], [7, 1, 1]>,
-  //
-  // Double-precision FP ALU
-  InstrItinData<IIC_fpALU64 , [InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
-                               InstrStage<9, [FU_NPipe], 0>,
-                               InstrStage<9, [FU_NLSPipe]>], [9, 1, 1]>,
-  //
-  // Single-precision FP Multiply
-  InstrItinData<IIC_fpMUL32 , [InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
-                               InstrStage<1, [FU_NPipe]>], [7, 1, 1]>,
-  //
-  // Double-precision FP Multiply
-  InstrItinData<IIC_fpMUL64 , [InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
-                               InstrStage<11, [FU_NPipe], 0>,
-                               InstrStage<11, [FU_NLSPipe]>], [11, 1, 1]>,
-  //
-  // Single-precision FP MAC
-  InstrItinData<IIC_fpMAC32 , [InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
-                               InstrStage<1, [FU_NPipe]>], [7, 2, 1, 1]>,
-  //
-  // Double-precision FP MAC
-  InstrItinData<IIC_fpMAC64 , [InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
-                               InstrStage<19, [FU_NPipe], 0>,
-                               InstrStage<19, [FU_NLSPipe]>], [19, 2, 1, 1]>,
-  //
-  // Single-precision FP DIV
-  InstrItinData<IIC_fpDIV32 , [InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
-                               InstrStage<20, [FU_NPipe], 0>,
-                               InstrStage<20, [FU_NLSPipe]>], [20, 1, 1]>,
-  //
-  // Double-precision FP DIV
-  InstrItinData<IIC_fpDIV64 , [InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
-                               InstrStage<29, [FU_NPipe], 0>,
-                               InstrStage<29, [FU_NLSPipe]>], [29, 1, 1]>,
-  //
-  // Single-precision FP SQRT
-  InstrItinData<IIC_fpSQRT32, [InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
-                               InstrStage<19, [FU_NPipe], 0>,
-                               InstrStage<19, [FU_NLSPipe]>], [19, 1]>,
-  //
-  // Double-precision FP SQRT
-  InstrItinData<IIC_fpSQRT64, [InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
-                               InstrStage<29, [FU_NPipe], 0>,
-                               InstrStage<29, [FU_NLSPipe]>], [29, 1]>,
-  //
-  // Single-precision FP Load
-  // use FU_Issue to enforce the 1 load/store per cycle limit
-  InstrItinData<IIC_fpLoad32, [InstrStage<1, [FU_Issue], 0>, 
-                               InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
-                               InstrStage<1, [FU_LdSt0], 0>,
-                               InstrStage<1, [FU_NLSPipe]>]>,
-  //
-  // Double-precision FP Load
-  // use FU_Issue to enforce the 1 load/store per cycle limit
-  InstrItinData<IIC_fpLoad64, [InstrStage<2, [FU_Issue], 0>, 
-                               InstrStage<1, [FU_Pipe0], 0>,
-                               InstrStage<1, [FU_Pipe1]>,
-                               InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
-                               InstrStage<1, [FU_LdSt0], 0>,
-                               InstrStage<1, [FU_NLSPipe]>]>,
-  //
-  // FP Load Multiple
-  // use FU_Issue to enforce the 1 load/store per cycle limit
-  InstrItinData<IIC_fpLoadm,  [InstrStage<3, [FU_Issue], 0>, 
-                               InstrStage<2, [FU_Pipe0], 0>,
-                               InstrStage<2, [FU_Pipe1]>,
-                               InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
-                               InstrStage<1, [FU_LdSt0], 0>,
-                               InstrStage<1, [FU_NLSPipe]>]>,
-  //
-  // Single-precision FP Store
-  // use FU_Issue to enforce the 1 load/store per cycle limit
-  InstrItinData<IIC_fpStore32,[InstrStage<1, [FU_Issue], 0>, 
-                               InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
-                               InstrStage<1, [FU_LdSt0], 0>,
-                               InstrStage<1, [FU_NLSPipe]>]>,
-  //
-  // Double-precision FP Store
-  // use FU_Issue to enforce the 1 load/store per cycle limit
-  InstrItinData<IIC_fpStore64,[InstrStage<2, [FU_Issue], 0>, 
-                               InstrStage<1, [FU_Pipe0], 0>,
-                               InstrStage<1, [FU_Pipe1]>,
-                               InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
-                               InstrStage<1, [FU_LdSt0], 0>,
-                               InstrStage<1, [FU_NLSPipe]>]>,
-  //
-  // FP Store Multiple
-  // use FU_Issue to enforce the 1 load/store per cycle limit
-  InstrItinData<IIC_fpStorem, [InstrStage<3, [FU_Issue], 0>, 
-                               InstrStage<2, [FU_Pipe0], 0>,
-                               InstrStage<2, [FU_Pipe1]>,
-                               InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
-                               InstrStage<1, [FU_LdSt0], 0>,
-                               InstrStage<1, [FU_NLSPipe]>]>,
-
-  // NEON
-  // Issue through integer pipeline, and execute in NEON unit.
-  //
-  // VLD1
-  InstrItinData<IIC_VLD1,     [InstrStage<1, [FU_Issue], 0>, 
-                               InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
-                               InstrStage<1, [FU_LdSt0], 0>,
-                               InstrStage<1, [FU_NLSPipe]>]>,
-  //
-  // VLD2
-  InstrItinData<IIC_VLD2,     [InstrStage<1, [FU_Issue], 0>, 
-                               InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
-                               InstrStage<1, [FU_LdSt0], 0>,
-                               InstrStage<1, [FU_NLSPipe]>], [2, 2, 1]>,
-  //
-  // VLD3
-  InstrItinData<IIC_VLD3,     [InstrStage<1, [FU_Issue], 0>, 
-                               InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
-                               InstrStage<1, [FU_LdSt0], 0>,
-                               InstrStage<1, [FU_NLSPipe]>], [2, 2, 2, 1]>,
-  //
-  // VLD4
-  InstrItinData<IIC_VLD4,     [InstrStage<1, [FU_Issue], 0>, 
-                               InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
-                               InstrStage<1, [FU_LdSt0], 0>,
-                               InstrStage<1, [FU_NLSPipe]>], [2, 2, 2, 2, 1]>,
-  //
-  // VST
-  InstrItinData<IIC_VST,      [InstrStage<1, [FU_Issue], 0>, 
-                               InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
-                               InstrStage<1, [FU_LdSt0], 0>,
-                               InstrStage<1, [FU_NLSPipe]>]>,
-  //
-  // Double-register FP Unary
-  InstrItinData<IIC_VUNAD,    [InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
-                               InstrStage<1, [FU_NPipe]>], [5, 2]>,
-  //
-  // Quad-register FP Unary
-  // Result written in N5, but that is relative to the last cycle of multicycle,
-  // so we use 6 for those cases
-  InstrItinData<IIC_VUNAQ,    [InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
-                               InstrStage<2, [FU_NPipe]>], [6, 2]>,
-  //
-  // Double-register FP Binary
-  InstrItinData<IIC_VBIND,    [InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
-                               InstrStage<1, [FU_NPipe]>], [5, 2, 2]>,
-  //
-  // Quad-register FP Binary
-  // Result written in N5, but that is relative to the last cycle of multicycle,
-  // so we use 6 for those cases
-  InstrItinData<IIC_VBINQ,    [InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
-                               InstrStage<2, [FU_NPipe]>], [6, 2, 2]>,
-  //
-  // Move Immediate
-  InstrItinData<IIC_VMOVImm,  [InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
-                               InstrStage<1, [FU_NPipe]>], [3]>,
-  //
-  // Double-register Permute Move
-  InstrItinData<IIC_VMOVD,    [InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
-                               InstrStage<1, [FU_NLSPipe]>], [2, 1]>,
-  //
-  // Quad-register Permute Move
-  // Result written in N2, but that is relative to the last cycle of multicycle,
-  // so we use 3 for those cases
-  InstrItinData<IIC_VMOVQ,    [InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
-                               InstrStage<2, [FU_NLSPipe]>], [3, 1]>,
-  //
-  // Integer to Single-precision Move
-  InstrItinData<IIC_VMOVIS ,  [InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
-                               InstrStage<1, [FU_NLSPipe]>], [2, 1]>,
-  //
-  // Integer to Double-precision Move
-  InstrItinData<IIC_VMOVID ,  [InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
-                               InstrStage<1, [FU_NLSPipe]>], [2, 1, 1]>,
-  //
-  // Single-precision to Integer Move
-  InstrItinData<IIC_VMOVSI ,  [InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
-                               InstrStage<1, [FU_NLSPipe]>], [20, 1]>,
-  //
-  // Double-precision to Integer Move
-  InstrItinData<IIC_VMOVDI ,  [InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
-                               InstrStage<1, [FU_NLSPipe]>], [20, 20, 1]>,
-  //
-  // Integer to Lane Move
-  InstrItinData<IIC_VMOVISL , [InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
-                               InstrStage<2, [FU_NLSPipe]>], [3, 1, 1]>,
-  //
-  // Double-register Permute
-  InstrItinData<IIC_VPERMD,   [InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
-                               InstrStage<1, [FU_NLSPipe]>], [2, 2, 1, 1]>,
-  //
-  // Quad-register Permute
-  // Result written in N2, but that is relative to the last cycle of multicycle,
-  // so we use 3 for those cases
-  InstrItinData<IIC_VPERMQ,   [InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
-                               InstrStage<2, [FU_NLSPipe]>], [3, 3, 1, 1]>,
-  //
-  // Quad-register Permute (3 cycle issue)
-  // Result written in N2, but that is relative to the last cycle of multicycle,
-  // so we use 4 for those cases
-  InstrItinData<IIC_VPERMQ3,  [InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
-                               InstrStage<1, [FU_NLSPipe]>,
-                               InstrStage<1, [FU_NPipe], 0>,
-                               InstrStage<2, [FU_NLSPipe]>], [4, 4, 1, 1]>,
-  //
-  // Double-register FP Multiple-Accumulate
-  InstrItinData<IIC_VMACD,    [InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
-                               InstrStage<1, [FU_NPipe]>], [9, 2, 2, 3]>,
-  //
-  // Quad-register FP Multiple-Accumulate
-  // Result written in N9, but that is relative to the last cycle of multicycle,
-  // so we use 10 for those cases
-  InstrItinData<IIC_VMACQ,    [InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
-                               InstrStage<2, [FU_NPipe]>], [10, 2, 2, 3]>,
-  //
-  // Double-register Reciprical Step
-  InstrItinData<IIC_VRECSD,   [InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
-                               InstrStage<1, [FU_NPipe]>], [9, 2, 2]>,
-  //
-  // Quad-register Reciprical Step
-  InstrItinData<IIC_VRECSQ,   [InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
-                               InstrStage<2, [FU_NPipe]>], [10, 2, 2]>,
-  //
-  // Double-register Integer Count
-  InstrItinData<IIC_VCNTiD,   [InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
-                               InstrStage<1, [FU_NPipe]>], [3, 2, 2]>,
-  //
-  // Quad-register Integer Count
-  // Result written in N3, but that is relative to the last cycle of multicycle,
-  // so we use 4 for those cases
-  InstrItinData<IIC_VCNTiQ,   [InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
-                               InstrStage<2, [FU_NPipe]>], [4, 2, 2]>,
-  //
-  // Double-register Integer Unary
-  InstrItinData<IIC_VUNAiD,   [InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
-                               InstrStage<1, [FU_NPipe]>], [4, 2]>,
-  //
-  // Quad-register Integer Unary
-  InstrItinData<IIC_VUNAiQ,   [InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
-                               InstrStage<1, [FU_NPipe]>], [4, 2]>,
-  //
-  // Double-register Integer Q-Unary
-  InstrItinData<IIC_VQUNAiD,  [InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
-                               InstrStage<1, [FU_NPipe]>], [4, 1]>,
-  //
-  // Quad-register Integer CountQ-Unary
-  InstrItinData<IIC_VQUNAiQ,  [InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
-                               InstrStage<1, [FU_NPipe]>], [4, 1]>,
-  //
-  // Double-register Integer Binary
-  InstrItinData<IIC_VBINiD,   [InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
-                               InstrStage<1, [FU_NPipe]>], [3, 2, 2]>,
-  //
-  // Quad-register Integer Binary
-  InstrItinData<IIC_VBINiQ,   [InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
-                               InstrStage<1, [FU_NPipe]>], [3, 2, 2]>,
-  //
-  // Double-register Integer Binary (4 cycle)
-  InstrItinData<IIC_VBINi4D,  [InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
-                               InstrStage<1, [FU_NPipe]>], [4, 2, 1]>,
-  //
-  // Quad-register Integer Binary (4 cycle)
-  InstrItinData<IIC_VBINi4Q,  [InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
-                               InstrStage<1, [FU_NPipe]>], [4, 2, 1]>,
-  //
-  // Double-register Integer Subtract
-  InstrItinData<IIC_VSUBiD,   [InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
-                               InstrStage<1, [FU_NPipe]>], [3, 2, 1]>,
-  //
-  // Quad-register Integer Subtract
-  InstrItinData<IIC_VSUBiQ,   [InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
-                               InstrStage<1, [FU_NPipe]>], [3, 2, 1]>,
-  //
-  // Double-register Integer Shift
-  InstrItinData<IIC_VSHLiD,   [InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
-                               InstrStage<1, [FU_NPipe]>], [3, 1, 1]>,
-  //
-  // Quad-register Integer Shift
-  InstrItinData<IIC_VSHLiQ,   [InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
-                               InstrStage<2, [FU_NPipe]>], [4, 1, 1]>,
-  //
-  // Double-register Integer Shift (4 cycle)
-  InstrItinData<IIC_VSHLi4D,   [InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
-                               InstrStage<1, [FU_NPipe]>], [4, 1, 1]>,
-  //
-  // Quad-register Integer Shift (4 cycle)
-  InstrItinData<IIC_VSHLi4Q,   [InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
-                               InstrStage<2, [FU_NPipe]>], [5, 1, 1]>,
-  //
-  // Double-register Integer Pair Add Long
-  InstrItinData<IIC_VPALiD,   [InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
-                               InstrStage<1, [FU_NPipe]>], [6, 3, 2, 1]>,
-  //
-  // Quad-register Integer Pair Add Long
-  InstrItinData<IIC_VPALiQ,   [InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
-                               InstrStage<2, [FU_NPipe]>], [7, 3, 2, 1]>,
-  //
-  // Double-register Integer Multiply (.8, .16)
-  InstrItinData<IIC_VMULi16D, [InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
-                               InstrStage<1, [FU_NPipe]>], [6, 2, 2]>,
-  //
-  // Double-register Integer Multiply (.32)
-  InstrItinData<IIC_VMULi32D, [InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
-                               InstrStage<2, [FU_NPipe]>], [7, 2, 1]>,
-  //
-  // Quad-register Integer Multiply (.8, .16)
-  InstrItinData<IIC_VMULi16Q, [InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
-                               InstrStage<2, [FU_NPipe]>], [7, 2, 2]>,
-  //
-  // Quad-register Integer Multiply (.32)
-  InstrItinData<IIC_VMULi32Q, [InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
-                               InstrStage<1, [FU_NPipe]>,
-                               InstrStage<2, [FU_NLSPipe], 0>,
-                               InstrStage<3, [FU_NPipe]>], [9, 2, 1]>,
-  //
-  // Double-register Integer Multiply-Accumulate (.8, .16)
-  InstrItinData<IIC_VMACi16D, [InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
-                               InstrStage<1, [FU_NPipe]>], [6, 2, 2, 3]>,
-  //
-  // Double-register Integer Multiply-Accumulate (.32)
-  InstrItinData<IIC_VMACi32D, [InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
-                               InstrStage<2, [FU_NPipe]>], [7, 2, 1, 3]>,
-  //
-  // Quad-register Integer Multiply-Accumulate (.8, .16)
-  InstrItinData<IIC_VMACi16Q, [InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
-                               InstrStage<2, [FU_NPipe]>], [7, 2, 2, 3]>,
-  //
-  // Quad-register Integer Multiply-Accumulate (.32)
-  InstrItinData<IIC_VMACi32Q, [InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
-                               InstrStage<1, [FU_NPipe]>,
-                               InstrStage<2, [FU_NLSPipe], 0>,
-                               InstrStage<3, [FU_NPipe]>], [9, 2, 1, 3]>,
-  //
-  // Double-register VEXT
-  InstrItinData<IIC_VEXTD,    [InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
-                               InstrStage<1, [FU_NLSPipe]>], [2, 1, 1]>,
-  //
-  // Quad-register VEXT
-  InstrItinData<IIC_VEXTQ,    [InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
-                               InstrStage<2, [FU_NLSPipe]>], [3, 1, 1]>,
-  //
-  // VTB
-  InstrItinData<IIC_VTB1,     [InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
-                               InstrStage<2, [FU_NLSPipe]>], [3, 2, 1]>,
-  InstrItinData<IIC_VTB2,     [InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
-                               InstrStage<2, [FU_NLSPipe]>], [3, 2, 2, 1]>,
-  InstrItinData<IIC_VTB3,     [InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
-                               InstrStage<1, [FU_NLSPipe]>,
-                               InstrStage<1, [FU_NPipe], 0>,
-                               InstrStage<2, [FU_NLSPipe]>], [4, 2, 2, 3, 1]>,
-  InstrItinData<IIC_VTB4,     [InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
-                               InstrStage<1, [FU_NLSPipe]>,
-                               InstrStage<1, [FU_NPipe], 0>,
-                               InstrStage<2, [FU_NLSPipe]>], [4, 2, 2, 3, 3, 1]>,
-  //
-  // VTBX
-  InstrItinData<IIC_VTBX1,    [InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
-                               InstrStage<2, [FU_NLSPipe]>], [3, 1, 2, 1]>,
-  InstrItinData<IIC_VTBX2,    [InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
-                               InstrStage<2, [FU_NLSPipe]>], [3, 1, 2, 2, 1]>,
-  InstrItinData<IIC_VTBX3,    [InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
-                               InstrStage<1, [FU_NLSPipe]>,
-                               InstrStage<1, [FU_NPipe], 0>,
-                               InstrStage<2, [FU_NLSPipe]>], [4, 1, 2, 2, 3, 1]>,
-  InstrItinData<IIC_VTBX4,    [InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
-                               InstrStage<1, [FU_NLSPipe]>,
-                               InstrStage<1, [FU_NPipe], 0>,
-                               InstrStage<2, [FU_NLSPipe]>], [4, 1, 2, 2, 3, 3, 1]>
-]>;
diff --git a/lib/Target/ARM/ARMSelectionDAGInfo.cpp b/lib/Target/ARM/ARMSelectionDAGInfo.cpp
new file mode 100644
index 000000000000..c04ee3829c1d
--- /dev/null
+++ b/lib/Target/ARM/ARMSelectionDAGInfo.cpp
@@ -0,0 +1,22 @@
+//===-- ARMSelectionDAGInfo.cpp - ARM SelectionDAG Info -------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the ARMSelectionDAGInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "arm-selectiondag-info"
+#include "ARMSelectionDAGInfo.h"
+using namespace llvm;
+
+ARMSelectionDAGInfo::ARMSelectionDAGInfo() {
+}
+
+ARMSelectionDAGInfo::~ARMSelectionDAGInfo() {
+}
diff --git a/lib/Target/ARM/ARMSelectionDAGInfo.h b/lib/Target/ARM/ARMSelectionDAGInfo.h
new file mode 100644
index 000000000000..afe9a47201bb
--- /dev/null
+++ b/lib/Target/ARM/ARMSelectionDAGInfo.h
@@ -0,0 +1,29 @@
+//===-- ARMSelectionDAGInfo.h - ARM SelectionDAG Info -----------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the ARM subclass for TargetSelectionDAGInfo.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef ARMSELECTIONDAGINFO_H
+#define ARMSELECTIONDAGINFO_H
+
+#include "llvm/Target/TargetSelectionDAGInfo.h"
+
+namespace llvm {
+
+class ARMSelectionDAGInfo : public TargetSelectionDAGInfo {
+public:
+  ARMSelectionDAGInfo();
+  ~ARMSelectionDAGInfo();
+};
+
+}
+
+#endif
diff --git a/lib/Target/ARM/ARMSubtarget.cpp b/lib/Target/ARM/ARMSubtarget.cpp
index 9e55cd870031..b11580a65c00 100644
--- a/lib/Target/ARM/ARMSubtarget.cpp
+++ b/lib/Target/ARM/ARMSubtarget.cpp
@@ -116,7 +116,8 @@ ARMSubtarget::ARMSubtarget(const std::string &TT, const std::string &FS,
 
 /// GVIsIndirectSymbol - true if the GV will be accessed via an indirect symbol.
 bool
-ARMSubtarget::GVIsIndirectSymbol(GlobalValue *GV, Reloc::Model RelocM) const {
+ARMSubtarget::GVIsIndirectSymbol(const GlobalValue *GV,
+                                 Reloc::Model RelocM) const {
   if (RelocM == Reloc::Static)
     return false;
 
diff --git a/lib/Target/ARM/ARMSubtarget.h b/lib/Target/ARM/ARMSubtarget.h
index fa56a9195bc2..288a19a45712 100644
--- a/lib/Target/ARM/ARMSubtarget.h
+++ b/lib/Target/ARM/ARMSubtarget.h
@@ -160,7 +160,7 @@ protected:
 
   /// GVIsIndirectSymbol - true if the GV will be accessed via an indirect
   /// symbol.
-  bool GVIsIndirectSymbol(GlobalValue *GV, Reloc::Model RelocM) const;
+  bool GVIsIndirectSymbol(const GlobalValue *GV, Reloc::Model RelocM) const;
 };
 } // End llvm namespace
 
diff --git a/lib/Target/ARM/ARMTargetMachine.cpp b/lib/Target/ARM/ARMTargetMachine.cpp
index 95f57b7b34fa..662e61e36c4b 100644
--- a/lib/Target/ARM/ARMTargetMachine.cpp
+++ b/lib/Target/ARM/ARMTargetMachine.cpp
@@ -102,8 +102,12 @@ bool ARMBaseTargetMachine::addPreRegAlloc(PassManagerBase &PM,
 bool ARMBaseTargetMachine::addPreSched2(PassManagerBase &PM,
                                         CodeGenOpt::Level OptLevel) {
   // FIXME: temporarily disabling load / store optimization pass for Thumb1.
-  if (OptLevel != CodeGenOpt::None && !Subtarget.isThumb1Only())
-    PM.add(createARMLoadStoreOptimizationPass());
+  if (OptLevel != CodeGenOpt::None) {
+    if (!Subtarget.isThumb1Only())
+      PM.add(createARMLoadStoreOptimizationPass());
+    if (Subtarget.hasNEON())
+      PM.add(createNEONMoveFixPass());
+  }
 
   // Expand some pseudo instructions into multiple instructions to allow
   // proper scheduling.
@@ -118,8 +122,6 @@ bool ARMBaseTargetMachine::addPreEmitPass(PassManagerBase &PM,
   if (OptLevel != CodeGenOpt::None) {
     if (!Subtarget.isThumb1Only())
       PM.add(createIfConverterPass());
-    if (Subtarget.hasNEON())
-      PM.add(createNEONMoveFixPass());
   }
 
   if (Subtarget.isThumb2()) {
diff --git a/lib/Target/ARM/ARMTargetMachine.h b/lib/Target/ARM/ARMTargetMachine.h
index c32f16c77a24..4e205df9a313 100644
--- a/lib/Target/ARM/ARMTargetMachine.h
+++ b/lib/Target/ARM/ARMTargetMachine.h
@@ -71,8 +71,8 @@ public:
     return &InstrInfo.getRegisterInfo();
   }
 
-  virtual       ARMTargetLowering *getTargetLowering() const {
-    return const_cast<ARMTargetLowering*>(&TLInfo);
+  virtual const ARMTargetLowering *getTargetLowering() const {
+    return &TLInfo;
   }
 
   virtual const ARMInstrInfo     *getInstrInfo() const { return &InstrInfo; }
@@ -97,8 +97,8 @@ public:
     return &InstrInfo->getRegisterInfo();
   }
 
-  virtual ARMTargetLowering *getTargetLowering() const {
-    return const_cast<ARMTargetLowering*>(&TLInfo);
+  virtual const ARMTargetLowering *getTargetLowering() const {
+    return &TLInfo;
   }
 
   /// returns either Thumb1InstrInfo or Thumb2InstrInfo
diff --git a/lib/Target/ARM/ARMTargetObjectFile.cpp b/lib/Target/ARM/ARMTargetObjectFile.cpp
index 680d03254f10..091a3b3d8497 100644
--- a/lib/Target/ARM/ARMTargetObjectFile.cpp
+++ b/lib/Target/ARM/ARMTargetObjectFile.cpp
@@ -9,6 +9,7 @@
 
 #include "ARMTargetObjectFile.h"
 #include "ARMSubtarget.h"
+#include "llvm/MC/MCContext.h"
 #include "llvm/MC/MCSectionELF.h"
 #include "llvm/Support/Dwarf.h"
 #include "llvm/Target/TargetMachine.h"
@@ -25,12 +26,14 @@ void ARMElfTargetObjectFile::Initialize(MCContext &Ctx,
 
   if (TM.getSubtarget<ARMSubtarget>().isAAPCS_ABI()) {
     StaticCtorSection =
-      getELFSection(".init_array", MCSectionELF::SHT_INIT_ARRAY,
-                    MCSectionELF::SHF_WRITE | MCSectionELF::SHF_ALLOC,
-                    SectionKind::getDataRel());
+      getContext().getELFSection(".init_array", MCSectionELF::SHT_INIT_ARRAY,
+                                 MCSectionELF::SHF_WRITE |
+                                 MCSectionELF::SHF_ALLOC,
+                                 SectionKind::getDataRel());
     StaticDtorSection =
-      getELFSection(".fini_array", MCSectionELF::SHT_FINI_ARRAY,
-                    MCSectionELF::SHF_WRITE | MCSectionELF::SHF_ALLOC,
-                    SectionKind::getDataRel());
+      getContext().getELFSection(".fini_array", MCSectionELF::SHT_FINI_ARRAY,
+                                 MCSectionELF::SHF_WRITE |
+                                 MCSectionELF::SHF_ALLOC,
+                                 SectionKind::getDataRel());
   }
 }
diff --git a/lib/Target/ARM/AsmParser/ARMAsmLexer.cpp b/lib/Target/ARM/AsmParser/ARMAsmLexer.cpp
new file mode 100644
index 000000000000..f859d1b1c95f
--- /dev/null
+++ b/lib/Target/ARM/AsmParser/ARMAsmLexer.cpp
@@ -0,0 +1,140 @@
+//===-- ARMAsmLexer.cpp - Tokenize ARM assembly to AsmTokens --------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "ARM.h"
+#include "ARMTargetMachine.h"
+
+#include "llvm/ADT/OwningPtr.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/StringExtras.h"
+
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCParser/MCAsmLexer.h"
+#include "llvm/MC/MCParser/MCParsedAsmOperand.h"
+
+#include "llvm/Target/TargetAsmLexer.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetRegistry.h"
+
+#include <string>
+#include <map>
+
+using namespace llvm;
+
+namespace {
+  
+  class ARMBaseAsmLexer : public TargetAsmLexer {
+    const MCAsmInfo &AsmInfo;
+    
+    const AsmToken &lexDefinite() {
+      return getLexer()->Lex();
+    }
+    
+    AsmToken LexTokenUAL();
+  protected:
+    typedef std::map <std::string, unsigned> rmap_ty;
+    
+    rmap_ty RegisterMap;
+    
+    void InitRegisterMap(const TargetRegisterInfo *info) {
+      unsigned numRegs = info->getNumRegs();
+
+      for (unsigned i = 0; i < numRegs; ++i) {
+        const char *regName = info->getName(i);
+        if (regName)
+          RegisterMap[regName] = i;
+      }
+    }
+    
+    unsigned MatchRegisterName(StringRef Name) {
+      rmap_ty::iterator iter = RegisterMap.find(Name.str());
+      if (iter != RegisterMap.end())
+        return iter->second;
+      else
+        return 0;
+    }
+    
+    AsmToken LexToken() {
+      if (!Lexer) {
+        SetError(SMLoc(), "No MCAsmLexer installed");
+        return AsmToken(AsmToken::Error, "", 0);
+      }
+      
+      switch (AsmInfo.getAssemblerDialect()) {
+      default:
+        SetError(SMLoc(), "Unhandled dialect");
+        return AsmToken(AsmToken::Error, "", 0);
+      case 0:
+        return LexTokenUAL();
+      }
+    }
+  public:
+    ARMBaseAsmLexer(const Target &T, const MCAsmInfo &MAI)
+      : TargetAsmLexer(T), AsmInfo(MAI) {
+    }
+  };
+  
+  class ARMAsmLexer : public ARMBaseAsmLexer {
+  public:
+    ARMAsmLexer(const Target &T, const MCAsmInfo &MAI)
+      : ARMBaseAsmLexer(T, MAI) {
+      std::string tripleString("arm-unknown-unknown");
+      std::string featureString;
+      OwningPtr<const TargetMachine> 
+        targetMachine(T.createTargetMachine(tripleString, featureString));
+      InitRegisterMap(targetMachine->getRegisterInfo());
+    }
+  };
+  
+  class ThumbAsmLexer : public ARMBaseAsmLexer {
+  public:
+    ThumbAsmLexer(const Target &T, const MCAsmInfo &MAI)
+      : ARMBaseAsmLexer(T, MAI) {
+      std::string tripleString("thumb-unknown-unknown");
+      std::string featureString;
+      OwningPtr<const TargetMachine> 
+        targetMachine(T.createTargetMachine(tripleString, featureString));
+      InitRegisterMap(targetMachine->getRegisterInfo());
+    }
+  };
+}
+
+AsmToken ARMBaseAsmLexer::LexTokenUAL() {
+  const AsmToken &lexedToken = lexDefinite();
+  
+  switch (lexedToken.getKind()) {
+  default:
+    return AsmToken(lexedToken);
+  case AsmToken::Error:
+    SetError(Lexer->getErrLoc(), Lexer->getErr());
+    return AsmToken(lexedToken);
+  case AsmToken::Identifier:
+  {
+    std::string upperCase = lexedToken.getString().str();
+    std::string lowerCase = LowercaseString(upperCase);
+    StringRef lowerRef(lowerCase);
+    
+    unsigned regID = MatchRegisterName(lowerRef);
+    
+    if (regID) {
+      return AsmToken(AsmToken::Register,
+                      lexedToken.getString(),
+                      static_cast<int64_t>(regID));
+    } else {
+      return AsmToken(lexedToken);
+    }
+  }
+  }
+}
+
+extern "C" void LLVMInitializeARMAsmLexer() {
+  RegisterAsmLexer<ARMAsmLexer> X(TheARMTarget);
+  RegisterAsmLexer<ThumbAsmLexer> Y(TheThumbTarget);
+}
+
diff --git a/lib/Target/ARM/AsmParser/ARMAsmParser.cpp b/lib/Target/ARM/AsmParser/ARMAsmParser.cpp
index cf55377bc677..bfa89c4a4696 100644
--- a/lib/Target/ARM/AsmParser/ARMAsmParser.cpp
+++ b/lib/Target/ARM/AsmParser/ARMAsmParser.cpp
@@ -812,8 +812,11 @@ bool ARMAsmParser::ParseDirectiveCode(SMLoc L) {
   return false;
 }
 
+extern "C" void LLVMInitializeARMAsmLexer();
+
 /// Force static initialization.
 extern "C" void LLVMInitializeARMAsmParser() {
   RegisterAsmParser<ARMAsmParser> X(TheARMTarget);
   RegisterAsmParser<ARMAsmParser> Y(TheThumbTarget);
+  LLVMInitializeARMAsmLexer();
 }
diff --git a/lib/Target/ARM/AsmParser/CMakeLists.txt b/lib/Target/ARM/AsmParser/CMakeLists.txt
index 308c6cff8da9..9ba7c0125d7f 100644
--- a/lib/Target/ARM/AsmParser/CMakeLists.txt
+++ b/lib/Target/ARM/AsmParser/CMakeLists.txt
@@ -1,6 +1,7 @@
 include_directories( ${CMAKE_CURRENT_BINARY_DIR}/.. ${CMAKE_CURRENT_SOURCE_DIR}/.. )
 
 add_llvm_library(LLVMARMAsmParser
+  ARMAsmLexer.cpp
   ARMAsmParser.cpp
   )
 
diff --git a/lib/Target/ARM/AsmPrinter/ARMAsmPrinter.cpp b/lib/Target/ARM/AsmPrinter/ARMAsmPrinter.cpp
index 15c52946cf58..80a9d2d714e6 100644
--- a/lib/Target/ARM/AsmPrinter/ARMAsmPrinter.cpp
+++ b/lib/Target/ARM/AsmPrinter/ARMAsmPrinter.cpp
@@ -21,6 +21,7 @@
 #include "ARMMachineFunctionInfo.h"
 #include "ARMMCInstLower.h"
 #include "ARMTargetMachine.h"
+#include "llvm/Analysis/DebugInfo.h"
 #include "llvm/Constants.h"
 #include "llvm/Module.h"
 #include "llvm/Type.h"
@@ -239,7 +240,7 @@ namespace {
       } else if (ACPV->isBlockAddress()) {
         O << *GetBlockAddressSymbol(ACPV->getBlockAddress());
       } else if (ACPV->isGlobalValue()) {
-        GlobalValue *GV = ACPV->getGV();
+        const GlobalValue *GV = ACPV->getGV();
         bool isIndirect = Subtarget->isTargetDarwin() &&
           Subtarget->GVIsIndirectSymbol(GV, TM.getRelocationModel());
         if (!isIndirect)
@@ -352,7 +353,7 @@ void ARMAsmPrinter::printOperand(const MachineInstr *MI, int OpNum,
     return;
   case MachineOperand::MO_GlobalAddress: {
     bool isCallOp = Modifier && !strcmp(Modifier, "call");
-    GlobalValue *GV = MO.getGlobal();
+    const GlobalValue *GV = MO.getGlobal();
 
     if ((Modifier && strcmp(Modifier, "lo16") == 0) ||
         (TF & ARMII::MO_LO16))
@@ -504,7 +505,6 @@ void ARMAsmPrinter::printAddrMode2OffsetOperand(const MachineInstr *MI, int Op,
 
   if (!MO1.getReg()) {
     unsigned ImmOffs = ARM_AM::getAM2Offset(MO2.getImm());
-    assert(ImmOffs && "Malformed indexed load / store!");
     O << "#"
       << ARM_AM::getAddrOpcStr(ARM_AM::getAM2Op(MO2.getImm()))
       << ImmOffs;
@@ -556,7 +556,6 @@ void ARMAsmPrinter::printAddrMode3OffsetOperand(const MachineInstr *MI, int Op,
   }
 
   unsigned ImmOffs = ARM_AM::getAM3Offset(MO2.getImm());
-  assert(ImmOffs && "Malformed indexed load / store!");
   O << "#"
     << ARM_AM::getAddrOpcStr(ARM_AM::getAM3Op(MO2.getImm()))
     << ImmOffs;
@@ -1110,6 +1109,24 @@ void ARMAsmPrinter::EmitInstruction(const MachineInstr *MI) {
   
   SmallString<128> Str;
   raw_svector_ostream OS(Str);
+  if (MI->getOpcode() == ARM::DBG_VALUE) {
+    unsigned NOps = MI->getNumOperands();
+    assert(NOps==4);
+    OS << '\t' << MAI->getCommentString() << "DEBUG_VALUE: ";
+    // cast away const; DIetc do not take const operands for some reason.
+    DIVariable V(const_cast<MDNode *>(MI->getOperand(NOps-1).getMetadata()));
+    OS << V.getName();
+    OS << " <- ";
+    // Frame address.  Currently handles register +- offset only.
+    assert(MI->getOperand(0).isReg() && MI->getOperand(1).isImm());
+    OS << '['; printOperand(MI, 0, OS); OS << '+'; printOperand(MI, 1, OS);
+    OS << ']';
+    OS << "+";
+    printOperand(MI, NOps-2, OS);
+    OutStreamer.EmitRawText(OS.str());
+    return;
+  }
+
   printInstruction(MI, OS);
   OutStreamer.EmitRawText(OS.str());
   
@@ -1129,22 +1146,23 @@ void ARMAsmPrinter::EmitStartOfAsmFile(Module &M) {
       // avoid out-of-range branches that are due a fundamental limitation of
       // the way symbol offsets are encoded with the current Darwin ARM
       // relocations.
-      TargetLoweringObjectFileMachO &TLOFMacho = 
-        static_cast<TargetLoweringObjectFileMachO &>(getObjFileLowering());
+      const TargetLoweringObjectFileMachO &TLOFMacho = 
+        static_cast<const TargetLoweringObjectFileMachO &>(
+          getObjFileLowering());
       OutStreamer.SwitchSection(TLOFMacho.getTextSection());
       OutStreamer.SwitchSection(TLOFMacho.getTextCoalSection());
       OutStreamer.SwitchSection(TLOFMacho.getConstTextCoalSection());
       if (RelocM == Reloc::DynamicNoPIC) {
         const MCSection *sect =
-          TLOFMacho.getMachOSection("__TEXT", "__symbol_stub4",
-                                    MCSectionMachO::S_SYMBOL_STUBS,
-                                    12, SectionKind::getText());
+          OutContext.getMachOSection("__TEXT", "__symbol_stub4",
+                                     MCSectionMachO::S_SYMBOL_STUBS,
+                                     12, SectionKind::getText());
         OutStreamer.SwitchSection(sect);
       } else {
         const MCSection *sect =
-          TLOFMacho.getMachOSection("__TEXT", "__picsymbolstub4",
-                                    MCSectionMachO::S_SYMBOL_STUBS,
-                                    16, SectionKind::getText());
+          OutContext.getMachOSection("__TEXT", "__picsymbolstub4",
+                                     MCSectionMachO::S_SYMBOL_STUBS,
+                                     16, SectionKind::getText());
         OutStreamer.SwitchSection(sect);
       }
     }
@@ -1201,8 +1219,8 @@ void ARMAsmPrinter::EmitStartOfAsmFile(Module &M) {
 void ARMAsmPrinter::EmitEndOfAsmFile(Module &M) {
   if (Subtarget->isTargetDarwin()) {
     // All darwin targets use mach-o.
-    TargetLoweringObjectFileMachO &TLOFMacho =
-      static_cast<TargetLoweringObjectFileMachO &>(getObjFileLowering());
+    const TargetLoweringObjectFileMachO &TLOFMacho =
+      static_cast<const TargetLoweringObjectFileMachO &>(getObjFileLowering());
     MachineModuleInfoMachO &MMIMacho =
       MMI->getObjFileInfo<MachineModuleInfoMachO>();
 
diff --git a/lib/Target/ARM/AsmPrinter/ARMInstPrinter.cpp b/lib/Target/ARM/AsmPrinter/ARMInstPrinter.cpp
index ef5ead6e473b..ac6331f931fc 100644
--- a/lib/Target/ARM/AsmPrinter/ARMInstPrinter.cpp
+++ b/lib/Target/ARM/AsmPrinter/ARMInstPrinter.cpp
@@ -18,6 +18,7 @@
 #include "llvm/MC/MCInst.h"
 #include "llvm/MC/MCAsmInfo.h"
 #include "llvm/MC/MCExpr.h"
+#include "llvm/ADT/StringExtras.h"
 #include "llvm/Support/raw_ostream.h"
 using namespace llvm;
 
@@ -330,7 +331,6 @@ void ARMInstPrinter::printAddrMode2OffsetOperand(const MCInst *MI,
   
   if (!MO1.getReg()) {
     unsigned ImmOffs = ARM_AM::getAM2Offset(MO2.getImm());
-    assert(ImmOffs && "Malformed indexed load / store!");
     O << '#'
       << ARM_AM::getAddrOpcStr(ARM_AM::getAM2Op(MO2.getImm()))
       << ImmOffs;
@@ -380,7 +380,6 @@ void ARMInstPrinter::printAddrMode3OffsetOperand(const MCInst *MI,
   }
   
   unsigned ImmOffs = ARM_AM::getAM3Offset(MO2.getImm());
-  assert(ImmOffs && "Malformed indexed load / store!");
   O << '#'
     << ARM_AM::getAddrOpcStr(ARM_AM::getAM3Op(MO2.getImm()))
     << ImmOffs;
@@ -779,3 +778,22 @@ void ARMInstPrinter::printVFPf64ImmOperand(const MCInst *MI, unsigned OpNum,
   O << '#' << MI->getOperand(OpNum).getImm();
 }
 
+void ARMInstPrinter::printHex8ImmOperand(const MCInst *MI, unsigned OpNum,
+                                         raw_ostream &O) {
+  O << "#0x" << utohexstr(MI->getOperand(OpNum).getImm() & 0xff);
+}
+
+void ARMInstPrinter::printHex16ImmOperand(const MCInst *MI, unsigned OpNum,
+                                          raw_ostream &O) {
+  O << "#0x" << utohexstr(MI->getOperand(OpNum).getImm() & 0xffff);
+}
+
+void ARMInstPrinter::printHex32ImmOperand(const MCInst *MI, unsigned OpNum,
+                                          raw_ostream &O) {
+  O << "#0x" << utohexstr(MI->getOperand(OpNum).getImm() & 0xffffffff);
+}
+
+void ARMInstPrinter::printHex64ImmOperand(const MCInst *MI, unsigned OpNum,
+                                          raw_ostream &O) {
+  O << "#0x" << utohexstr(MI->getOperand(OpNum).getImm());
+}
diff --git a/lib/Target/ARM/AsmPrinter/ARMInstPrinter.h b/lib/Target/ARM/AsmPrinter/ARMInstPrinter.h
index dd006fc52e74..be0b7c1eb027 100644
--- a/lib/Target/ARM/AsmPrinter/ARMInstPrinter.h
+++ b/lib/Target/ARM/AsmPrinter/ARMInstPrinter.h
@@ -104,10 +104,10 @@ public:
   void printNoHashImmediate(const MCInst *MI, unsigned OpNum, raw_ostream &O);
   void printVFPf32ImmOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O);
   void printVFPf64ImmOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O);
-  void printHex8ImmOperand(const MCInst *MI, int OpNum, raw_ostream &O) {}
-  void printHex16ImmOperand(const MCInst *MI, int OpNum, raw_ostream &O) {}
-  void printHex32ImmOperand(const MCInst *MI, int OpNum, raw_ostream &O) {}
-  void printHex64ImmOperand(const MCInst *MI, int OpNum, raw_ostream &O) {}
+  void printHex8ImmOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O);
+  void printHex16ImmOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O);
+  void printHex32ImmOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O);
+  void printHex64ImmOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O);
 
   void printPCLabel(const MCInst *MI, unsigned OpNum, raw_ostream &O);  
   // FIXME: Implement.
diff --git a/lib/Target/ARM/CMakeLists.txt b/lib/Target/ARM/CMakeLists.txt
index bbc0095f6ae7..29e66e13b168 100644
--- a/lib/Target/ARM/CMakeLists.txt
+++ b/lib/Target/ARM/CMakeLists.txt
@@ -10,6 +10,7 @@ tablegen(ARMGenAsmWriter.inc -gen-asm-writer)
 tablegen(ARMGenDAGISel.inc -gen-dag-isel)
 tablegen(ARMGenCallingConv.inc -gen-callingconv)
 tablegen(ARMGenSubtarget.inc -gen-subtarget)
+tablegen(ARMGenEDInfo.inc -gen-enhanced-disassembly-info)
 
 add_llvm_target(ARMCodeGen
   ARMBaseInstrInfo.cpp
@@ -36,6 +37,7 @@ add_llvm_target(ARMCodeGen
   Thumb2InstrInfo.cpp
   Thumb2RegisterInfo.cpp
   Thumb2SizeReduction.cpp
+  ARMSelectionDAGInfo.cpp
   )
 
 target_link_libraries (LLVMARMCodeGen LLVMSelectionDAG)
diff --git a/lib/Target/ARM/Disassembler/ARMDisassembler.cpp b/lib/Target/ARM/Disassembler/ARMDisassembler.cpp
index 47c31043d9c0..4de697e8bf67 100644
--- a/lib/Target/ARM/Disassembler/ARMDisassembler.cpp
+++ b/lib/Target/ARM/Disassembler/ARMDisassembler.cpp
@@ -18,6 +18,7 @@
 #include "ARMDisassembler.h"
 #include "ARMDisassemblerCore.h"
 
+#include "llvm/MC/EDInstInfo.h"
 #include "llvm/MC/MCInst.h"
 #include "llvm/Target/TargetRegistry.h"
 #include "llvm/Support/Debug.h"
@@ -38,7 +39,9 @@
 ///
 #include "../ARMGenDecoderTables.inc"
 
-namespace llvm {
+#include "../ARMGenEDInfo.inc"
+
+using namespace llvm;
 
 /// showBitVector - Use the raw_ostream to log a diagnostic message describing
 /// the inidividual bits of the instruction.
@@ -247,27 +250,27 @@ static unsigned T2Morph2LoadLiteral(unsigned Opcode) {
 
   case ARM::t2LDR_POST:   case ARM::t2LDR_PRE:
   case ARM::t2LDRi12:     case ARM::t2LDRi8:
-  case ARM::t2LDRs:
+  case ARM::t2LDRs:       case ARM::t2LDRT:
     return ARM::t2LDRpci;
 
   case ARM::t2LDRB_POST:  case ARM::t2LDRB_PRE:
   case ARM::t2LDRBi12:    case ARM::t2LDRBi8:
-  case ARM::t2LDRBs:
+  case ARM::t2LDRBs:      case ARM::t2LDRBT:
     return ARM::t2LDRBpci;
 
   case ARM::t2LDRH_POST:  case ARM::t2LDRH_PRE:
   case ARM::t2LDRHi12:    case ARM::t2LDRHi8:
-  case ARM::t2LDRHs:
+  case ARM::t2LDRHs:      case ARM::t2LDRHT:
     return ARM::t2LDRHpci;
 
   case ARM::t2LDRSB_POST:  case ARM::t2LDRSB_PRE:
   case ARM::t2LDRSBi12:    case ARM::t2LDRSBi8:
-  case ARM::t2LDRSBs:
+  case ARM::t2LDRSBs:      case ARM::t2LDRSBT:
     return ARM::t2LDRSBpci;
 
   case ARM::t2LDRSH_POST:  case ARM::t2LDRSH_PRE:
   case ARM::t2LDRSHi12:    case ARM::t2LDRSHi8:
-  case ARM::t2LDRSHs:
+  case ARM::t2LDRSHs:      case ARM::t2LDRSHT:
     return ARM::t2LDRSHpci;
   }
 }
@@ -404,7 +407,6 @@ bool ARMDisassembler::getInstruction(MCInst &MI,
     });
 
   ARMBasicMCBuilder *Builder = CreateMCBuilder(Opcode, Format);
-
   if (!Builder)
     return false;
 
@@ -492,11 +494,11 @@ bool ThumbDisassembler::getInstruction(MCInst &MI,
     });
 
   ARMBasicMCBuilder *Builder = CreateMCBuilder(Opcode, Format);
-  Builder->setSession(const_cast<Session *>(&SO));
-
   if (!Builder)
     return false;
 
+  Builder->SetSession(const_cast<Session *>(&SO));
+
   if (!Builder->Build(MI, insn))
     return false;
 
@@ -506,17 +508,37 @@ bool ThumbDisassembler::getInstruction(MCInst &MI,
 }
 
 // A8.6.50
+// Valid return values are {1, 2, 3, 4}, with 0 signifying an error condition.
 static unsigned short CountITSize(unsigned ITMask) {
   // First count the trailing zeros of the IT mask.
   unsigned TZ = CountTrailingZeros_32(ITMask);
-  assert(TZ <= 3 && "Encoding error");
+  if (TZ > 3) {
+    DEBUG(errs() << "Encoding error: IT Mask '0000'");
+    return 0;
+  }
   return (4 - TZ);
 }
 
-/// Init ITState.
-void Session::InitIT(unsigned short bits7_0) {
+/// Init ITState.  Note that at least one bit is always 1 in mask.
+bool Session::InitIT(unsigned short bits7_0) {
   ITCounter = CountITSize(slice(bits7_0, 3, 0));
+  if (ITCounter == 0)
+    return false;
+
+  // A8.6.50 IT
+  unsigned short FirstCond = slice(bits7_0, 7, 4);
+  if (FirstCond == 0xF) {
+    DEBUG(errs() << "Encoding error: IT FirstCond '1111'");
+    return false;
+  }
+  if (FirstCond == 0xE && ITCounter != 1) {
+    DEBUG(errs() << "Encoding error: IT FirstCond '1110' && Mask != '1000'");
+    return false;
+  }
+
   ITState = bits7_0;
+
+  return true;
 }
 
 /// Update ITState if necessary.
@@ -547,4 +569,10 @@ extern "C" void LLVMInitializeARMDisassembler() {
                                          createThumbDisassembler);
 }
 
-} // namespace llvm
+EDInstInfo *ARMDisassembler::getEDInfo() const {
+  return instInfoARM;
+}
+
+EDInstInfo *ThumbDisassembler::getEDInfo() const {
+  return instInfoARM;
+}
diff --git a/lib/Target/ARM/Disassembler/ARMDisassembler.h b/lib/Target/ARM/Disassembler/ARMDisassembler.h
index 44592e0f1567..0a74a3866eed 100644
--- a/lib/Target/ARM/Disassembler/ARMDisassembler.h
+++ b/lib/Target/ARM/Disassembler/ARMDisassembler.h
@@ -24,6 +24,8 @@ class MCInst;
 class MemoryObject;
 class raw_ostream;
   
+struct EDInstInfo;
+  
 /// ARMDisassembler - ARM disassembler for all ARM platforms.
 class ARMDisassembler : public MCDisassembler {
 public:
@@ -42,6 +44,9 @@ public:
                       const MemoryObject &region,
                       uint64_t address,
                       raw_ostream &vStream) const;
+  
+  /// getEDInfo - See MCDisassembler.
+  EDInstInfo *getEDInfo() const;
 private:
 };
 
@@ -55,7 +60,7 @@ public:
   Session() : ITCounter(0), ITState(0) {}
   ~Session() {}
   /// InitIT - Initializes ITCounter/ITState.
-  void InitIT(unsigned short bits7_0);
+  bool InitIT(unsigned short bits7_0);
   /// UpdateIT - Updates ITCounter/ITState as IT Block progresses.
   void UpdateIT();
 
@@ -82,6 +87,9 @@ public:
                       const MemoryObject &region,
                       uint64_t address,
                       raw_ostream &vStream) const;
+  
+  /// getEDInfo - See MCDisassembler.
+  EDInstInfo *getEDInfo() const;
 private:
   Session SO;
 };
diff --git a/lib/Target/ARM/Disassembler/ARMDisassemblerCore.cpp b/lib/Target/ARM/Disassembler/ARMDisassemblerCore.cpp
index db921ef0b628..adb7795a746c 100644
--- a/lib/Target/ARM/Disassembler/ARMDisassemblerCore.cpp
+++ b/lib/Target/ARM/Disassembler/ARMDisassemblerCore.cpp
@@ -13,8 +13,12 @@
 //
 //===----------------------------------------------------------------------===//
 
+#define DEBUG_TYPE "arm-disassembler"
+
 #include "ARMDisassemblerCore.h"
 #include "ARMAddressingModes.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
 
 /// ARMGenInstrInfo.inc - ARMGenInstrInfo.inc contains the static const
 /// TargetInstrDesc ARMInsts[] definition and the TargetOperandInfo[]'s
@@ -75,7 +79,7 @@ const char *ARMUtils::OpcodeName(unsigned Opcode) {
 // Return the register enum Based on RegClass and the raw register number.
 // For DRegPair, see comments below.
 // FIXME: Auto-gened?
-static unsigned getRegisterEnum(unsigned RegClassID, unsigned RawRegister,
+static unsigned getRegisterEnum(BO B, unsigned RegClassID, unsigned RawRegister,
                                 bool DRegPair = false) {
 
   if (DRegPair && RegClassID == ARM::QPRRegClassID) {
@@ -345,7 +349,9 @@ static unsigned getRegisterEnum(unsigned RegClassID, unsigned RawRegister,
     }
     break;
   }
-  assert(0 && "Invalid (RegClassID, RawRegister) combination");
+  DEBUG(errs() << "Invalid (RegClassID, RawRegister) combination\n");
+  // Encoding error.  Mark the builder with error code != 0.
+  B->SetErr(-1);
   return 0;
 }
 
@@ -509,7 +515,7 @@ static bool DisassemblePseudo(MCInst &MI, unsigned Opcode, uint32_t insn,
 // Inst{3-0} => Rm
 // Inst{11-8} => Rs
 static bool DisassembleMulFrm(MCInst &MI, unsigned Opcode, uint32_t insn,
-    unsigned short NumOps, unsigned &NumOpsAdded, BO) {
+    unsigned short NumOps, unsigned &NumOpsAdded, BO B) {
 
   const TargetInstrDesc &TID = ARMInsts[Opcode];
   unsigned short NumDefs = TID.getNumDefs();
@@ -529,26 +535,26 @@ static bool DisassembleMulFrm(MCInst &MI, unsigned Opcode, uint32_t insn,
   if (NumDefs == 2) {
     assert(NumOps >= 4 && OpInfo[3].RegClass == ARM::GPRRegClassID &&
            "Expect 4th register operand");
-    MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::GPRRegClassID,
+    MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
                                                        decodeRd(insn))));
     ++OpIdx;
   }
 
   // The destination register: RdHi{19-16} or Rd{19-16}.
-  MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::GPRRegClassID,
+  MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
                                                      decodeRn(insn))));
 
   // The two src regsiters: Rn{3-0}, then Rm{11-8}.
-  MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::GPRRegClassID,
+  MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
                                                      decodeRm(insn))));
-  MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::GPRRegClassID,
+  MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
                                                      decodeRs(insn))));
   OpIdx += 3;
 
   // Many multiply instructions (e.g., MLA) have three src registers.
   // The third register operand is Ra{15-12}.
   if (OpIdx < NumOps && OpInfo[OpIdx].RegClass == ARM::GPRRegClassID) {
-    MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::GPRRegClassID,
+    MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
                                                        decodeRd(insn))));
     ++OpIdx;
   }
@@ -610,7 +616,7 @@ static inline unsigned GetCopOpc(uint32_t insn) {
 // and friends
 //
 static bool DisassembleCoprocessor(MCInst &MI, unsigned Opcode, uint32_t insn,
-    unsigned short NumOps, unsigned &NumOpsAdded) {
+    unsigned short NumOps, unsigned &NumOpsAdded, BO B) {
 
   assert(NumOps >= 5 && "Num of operands >= 5 for coprocessor instr");
 
@@ -631,7 +637,7 @@ static bool DisassembleCoprocessor(MCInst &MI, unsigned Opcode, uint32_t insn,
 
     MI.addOperand(MCOperand::CreateImm(decodeRd(insn)));
 
-    MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::GPRRegClassID,
+    MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
                                                        decodeRn(insn))));
 
     if (PW) {
@@ -651,11 +657,11 @@ static bool DisassembleCoprocessor(MCInst &MI, unsigned Opcode, uint32_t insn,
 
     MI.addOperand(NoGPR ? MCOperand::CreateImm(decodeRd(insn))
                         : MCOperand::CreateReg(
-                            getRegisterEnum(ARM::GPRRegClassID,
+                            getRegisterEnum(B, ARM::GPRRegClassID,
                                             decodeRd(insn))));
 
     MI.addOperand(OneCopOpc ? MCOperand::CreateReg(
-                                getRegisterEnum(ARM::GPRRegClassID,
+                                getRegisterEnum(B, ARM::GPRRegClassID,
                                                 decodeRn(insn)))
                             : MCOperand::CreateImm(decodeRn(insn)));
 
@@ -688,18 +694,19 @@ static bool DisassembleCoprocessor(MCInst &MI, unsigned Opcode, uint32_t insn,
 // SRSW/SRS: addrmode4:$addr mode_imm
 // RFEW/RFE: addrmode4:$addr Rn
 static bool DisassembleBrFrm(MCInst &MI, unsigned Opcode, uint32_t insn,
-    unsigned short NumOps, unsigned &NumOpsAdded, BO) {
+    unsigned short NumOps, unsigned &NumOpsAdded, BO B) {
 
   if (CoprocessorOpcode(Opcode))
-    return DisassembleCoprocessor(MI, Opcode, insn, NumOps, NumOpsAdded);
+    return DisassembleCoprocessor(MI, Opcode, insn, NumOps, NumOpsAdded, B);
 
   const TargetOperandInfo *OpInfo = ARMInsts[Opcode].OpInfo;
+  if (!OpInfo) return false;
 
   // MRS and MRSsys take one GPR reg Rd.
   if (Opcode == ARM::MRS || Opcode == ARM::MRSsys) {
     assert(NumOps >= 1 && OpInfo[0].RegClass == ARM::GPRRegClassID &&
            "Reg operand expected");
-    MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::GPRRegClassID,
+    MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
                                                        decodeRd(insn))));
     NumOpsAdded = 1;
     return true;
@@ -708,7 +715,7 @@ static bool DisassembleBrFrm(MCInst &MI, unsigned Opcode, uint32_t insn,
   if (Opcode == ARM::BXJ) {
     assert(NumOps >= 1 && OpInfo[0].RegClass == ARM::GPRRegClassID &&
            "Reg operand expected");
-    MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::GPRRegClassID,
+    MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
                                                        decodeRm(insn))));
     NumOpsAdded = 1;
     return true;
@@ -717,7 +724,7 @@ static bool DisassembleBrFrm(MCInst &MI, unsigned Opcode, uint32_t insn,
   if (Opcode == ARM::MSR || Opcode == ARM::MSRsys) {
     assert(NumOps >= 1 && OpInfo[0].RegClass == ARM::GPRRegClassID &&
            "Reg operand expected");
-    MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::GPRRegClassID,
+    MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
                                                        decodeRm(insn))));
     MI.addOperand(MCOperand::CreateImm(slice(insn, 19, 16)));
     NumOpsAdded = 2;
@@ -748,7 +755,7 @@ static bool DisassembleBrFrm(MCInst &MI, unsigned Opcode, uint32_t insn,
     if (Opcode == ARM::SRSW || Opcode == ARM::SRS)
       MI.addOperand(MCOperand::CreateImm(slice(insn, 4, 0)));
     else
-      MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::GPRRegClassID,
+      MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
                                                          decodeRn(insn))));
     NumOpsAdded = 3;
     return true;
@@ -791,9 +798,11 @@ static bool DisassembleBrFrm(MCInst &MI, unsigned Opcode, uint32_t insn,
 // BLXr9, BXr9
 // BRIND, BX_RET
 static bool DisassembleBrMiscFrm(MCInst &MI, unsigned Opcode, uint32_t insn,
-    unsigned short NumOps, unsigned &NumOpsAdded, BO) {
+    unsigned short NumOps, unsigned &NumOpsAdded, BO B) {
 
   const TargetOperandInfo *OpInfo = ARMInsts[Opcode].OpInfo;
+  if (!OpInfo) return false;
+
   unsigned &OpIdx = NumOpsAdded;
 
   OpIdx = 0;
@@ -806,7 +815,7 @@ static bool DisassembleBrMiscFrm(MCInst &MI, unsigned Opcode, uint32_t insn,
   if (Opcode == ARM::BLXr9 || Opcode == ARM::BRIND) {
     assert(NumOps >= 1 && OpInfo[OpIdx].RegClass == ARM::GPRRegClassID &&
            "Reg operand expected");
-    MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::GPRRegClassID,
+    MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
                                                        decodeRm(insn))));
     OpIdx = 1;
     return true;
@@ -817,9 +826,9 @@ static bool DisassembleBrMiscFrm(MCInst &MI, unsigned Opcode, uint32_t insn,
     // InOperandList with GPR:$target and GPR:$idx regs.
 
     assert(NumOps == 4 && "Expect 4 operands");
-    MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::GPRRegClassID,
+    MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
                                                        decodeRn(insn))));
-    MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::GPRRegClassID,
+    MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
                                                        decodeRm(insn))));
 
     // Fill in the two remaining imm operands to signify build completion.
@@ -835,7 +844,7 @@ static bool DisassembleBrMiscFrm(MCInst &MI, unsigned Opcode, uint32_t insn,
     // InOperandList with GPR::$target reg.
 
     assert(NumOps == 3 && "Expect 3 operands");
-    MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::GPRRegClassID,
+    MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
                                                        decodeRm(insn))));
 
     // Fill in the two remaining imm operands to signify build completion.
@@ -852,13 +861,13 @@ static bool DisassembleBrMiscFrm(MCInst &MI, unsigned Opcode, uint32_t insn,
     // See also ARMAddressingModes.h (Addressing Mode #2).
 
     assert(NumOps == 5 && getIBit(insn) == 1 && "Expect 5 operands && I-bit=1");
-    MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::GPRRegClassID,
+    MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
                                                        decodeRn(insn))));
 
     ARM_AM::AddrOpc AddrOpcode = getUBit(insn) ? ARM_AM::add : ARM_AM::sub;
 
     // Disassemble the offset reg (Rm), shift type, and immediate shift length.
-    MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::GPRRegClassID,
+    MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
                                                        decodeRm(insn))));
     // Inst{6-5} encodes the shift opcode.
     ARM_AM::ShiftOpc ShOp = getShiftOpcForBits(slice(insn, 6, 5));
@@ -882,14 +891,19 @@ static bool DisassembleBrMiscFrm(MCInst &MI, unsigned Opcode, uint32_t insn,
   return false;
 }
 
-static inline uint32_t getBFCInvMask(uint32_t insn) {
+static inline bool getBFCInvMask(uint32_t insn, uint32_t &mask) {
   uint32_t lsb = slice(insn, 11, 7);
   uint32_t msb = slice(insn, 20, 16);
   uint32_t Val = 0;
-  assert(lsb <= msb && "Encoding error: lsb > msb");
+  if (msb < lsb) {
+    DEBUG(errs() << "Encoding error: msb < lsb\n");
+    return false;
+  }
+
   for (uint32_t i = lsb; i <= msb; ++i)
     Val |= (1 << i);
-  return ~Val;
+  mask = ~Val;
+  return true;
 }
 
 static inline bool SaturateOpcode(unsigned Opcode) {
@@ -924,7 +938,7 @@ static inline unsigned decodeSaturatePos(unsigned Opcode, uint32_t insn) {
 // operations have Rd Rm Rn, instead of the "normal" Rd Rn Rm.
 // They are QADD, QDADD, QDSUB, and QSUB.
 static bool DisassembleDPFrm(MCInst &MI, unsigned Opcode, uint32_t insn,
-    unsigned short NumOps, unsigned &NumOpsAdded, BO) {
+    unsigned short NumOps, unsigned &NumOpsAdded, BO B) {
 
   const TargetInstrDesc &TID = ARMInsts[Opcode];
   unsigned short NumDefs = TID.getNumDefs();
@@ -936,7 +950,7 @@ static bool DisassembleDPFrm(MCInst &MI, unsigned Opcode, uint32_t insn,
 
   // Disassemble register def if there is one.
   if (NumDefs && (OpInfo[OpIdx].RegClass == ARM::GPRRegClassID)) {
-    MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::GPRRegClassID,
+    MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
                                                        decodeRd(insn))));
     ++OpIdx;
   }
@@ -949,7 +963,7 @@ static bool DisassembleDPFrm(MCInst &MI, unsigned Opcode, uint32_t insn,
   if (SaturateOpcode(Opcode)) {
     MI.addOperand(MCOperand::CreateImm(decodeSaturatePos(Opcode, insn)));
 
-    MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::GPRRegClassID,
+    MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
                                                        decodeRm(insn))));
 
     if (Opcode == ARM::SSAT16 || Opcode == ARM::USAT16) {
@@ -977,14 +991,18 @@ static bool DisassembleDPFrm(MCInst &MI, unsigned Opcode, uint32_t insn,
   if (Opcode == ARM::BFC || Opcode == ARM::BFI) {
     // TIED_TO operand skipped for BFC and Inst{3-0} (Reg) for BFI.
     MI.addOperand(MCOperand::CreateReg(Opcode == ARM::BFC ? 0
-                                       : getRegisterEnum(ARM::GPRRegClassID,
+                                       : getRegisterEnum(B, ARM::GPRRegClassID,
                                                          decodeRm(insn))));
-    MI.addOperand(MCOperand::CreateImm(getBFCInvMask(insn)));
+    uint32_t mask = 0;
+    if (!getBFCInvMask(insn, mask))
+      return false;
+
+    MI.addOperand(MCOperand::CreateImm(mask));
     OpIdx += 2;
     return true;
   }
   if (Opcode == ARM::SBFX || Opcode == ARM::UBFX) {
-    MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::GPRRegClassID,
+    MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
                                                        decodeRm(insn))));
     MI.addOperand(MCOperand::CreateImm(slice(insn, 11, 7)));
     MI.addOperand(MCOperand::CreateImm(slice(insn, 20, 16) + 1));
@@ -1000,7 +1018,7 @@ static bool DisassembleDPFrm(MCInst &MI, unsigned Opcode, uint32_t insn,
     assert(OpInfo[OpIdx].RegClass == ARM::GPRRegClassID &&
            "Reg operand expected");
     MI.addOperand(MCOperand::CreateReg(
-                    getRegisterEnum(ARM::GPRRegClassID,
+                    getRegisterEnum(B, ARM::GPRRegClassID,
                                     RmRn ? decodeRm(insn) : decodeRn(insn))));
     ++OpIdx;
   }
@@ -1021,7 +1039,7 @@ static bool DisassembleDPFrm(MCInst &MI, unsigned Opcode, uint32_t insn,
     // routed here as well.
     // assert(getIBit(insn) == 0 && "I_Bit != '0' reg/reg form");
     MI.addOperand(MCOperand::CreateReg(
-                    getRegisterEnum(ARM::GPRRegClassID,
+                    getRegisterEnum(B, ARM::GPRRegClassID,
                                     RmRn? decodeRn(insn) : decodeRm(insn))));
     ++OpIdx;
   } else if (Opcode == ARM::MOVi16 || Opcode == ARM::MOVTi16) {
@@ -1046,7 +1064,7 @@ static bool DisassembleDPFrm(MCInst &MI, unsigned Opcode, uint32_t insn,
 }
 
 static bool DisassembleDPSoRegFrm(MCInst &MI, unsigned Opcode, uint32_t insn,
-    unsigned short NumOps, unsigned &NumOpsAdded, BO) {
+    unsigned short NumOps, unsigned &NumOpsAdded, BO B) {
 
   const TargetInstrDesc &TID = ARMInsts[Opcode];
   unsigned short NumDefs = TID.getNumDefs();
@@ -1058,7 +1076,7 @@ static bool DisassembleDPSoRegFrm(MCInst &MI, unsigned Opcode, uint32_t insn,
 
   // Disassemble register def if there is one.
   if (NumDefs && (OpInfo[OpIdx].RegClass == ARM::GPRRegClassID)) {
-    MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::GPRRegClassID,
+    MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
                                                        decodeRd(insn))));
     ++OpIdx;
   }
@@ -1071,7 +1089,7 @@ static bool DisassembleDPSoRegFrm(MCInst &MI, unsigned Opcode, uint32_t insn,
   if (!isUnary) {
     assert(OpInfo[OpIdx].RegClass == ARM::GPRRegClassID &&
            "Reg operand expected");
-    MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::GPRRegClassID,
+    MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
                                                        decodeRn(insn))));
     ++OpIdx;
   }
@@ -1094,11 +1112,11 @@ static bool DisassembleDPSoRegFrm(MCInst &MI, unsigned Opcode, uint32_t insn,
   // Register-controlled shifts have Inst{7} = 0 and Inst{4} = 1.
   unsigned Rs = slice(insn, 4, 4);
 
-  MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::GPRRegClassID,
+  MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
                                                      decodeRm(insn))));
   if (Rs) {
     // Register-controlled shifts: [Rm, Rs, shift].
-    MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::GPRRegClassID,
+    MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
                                                        decodeRs(insn))));
     // Inst{6-5} encodes the shift opcode.
     ARM_AM::ShiftOpc ShOp = getShiftOpcForBits(slice(insn, 6, 5));
@@ -1121,24 +1139,26 @@ static bool DisassembleDPSoRegFrm(MCInst &MI, unsigned Opcode, uint32_t insn,
 }
 
 static bool DisassembleLdStFrm(MCInst &MI, unsigned Opcode, uint32_t insn,
-    unsigned short NumOps, unsigned &NumOpsAdded, bool isStore) {
+    unsigned short NumOps, unsigned &NumOpsAdded, bool isStore, BO B) {
 
   const TargetInstrDesc &TID = ARMInsts[Opcode];
-  unsigned short NumDefs = TID.getNumDefs();
   bool isPrePost = isPrePostLdSt(TID.TSFlags);
   const TargetOperandInfo *OpInfo = TID.OpInfo;
+  if (!OpInfo) return false;
+
   unsigned &OpIdx = NumOpsAdded;
 
   OpIdx = 0;
 
-  assert(((!isStore && NumDefs > 0) || (isStore && (NumDefs == 0 || isPrePost)))
+  assert(((!isStore && TID.getNumDefs() > 0) ||
+          (isStore && (TID.getNumDefs() == 0 || isPrePost)))
          && "Invalid arguments");
 
   // Operand 0 of a pre- and post-indexed store is the address base writeback.
   if (isPrePost && isStore) {
     assert(OpInfo[OpIdx].RegClass == ARM::GPRRegClassID &&
            "Reg operand expected");
-    MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::GPRRegClassID,
+    MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
                                                        decodeRn(insn))));
     ++OpIdx;
   }
@@ -1149,7 +1169,7 @@ static bool DisassembleLdStFrm(MCInst &MI, unsigned Opcode, uint32_t insn,
 
   assert(OpInfo[OpIdx].RegClass == ARM::GPRRegClassID &&
          "Reg operand expected");
-  MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::GPRRegClassID,
+  MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
                                                      decodeRd(insn))));
   ++OpIdx;
 
@@ -1157,7 +1177,7 @@ static bool DisassembleLdStFrm(MCInst &MI, unsigned Opcode, uint32_t insn,
   if (isPrePost && !isStore) {
     assert(OpInfo[OpIdx].RegClass == ARM::GPRRegClassID &&
            "Reg operand expected");
-    MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::GPRRegClassID,
+    MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
                                                        decodeRn(insn))));
     ++OpIdx;
   }
@@ -1170,7 +1190,7 @@ static bool DisassembleLdStFrm(MCInst &MI, unsigned Opcode, uint32_t insn,
          "Reg operand expected");
   assert((!isPrePost || (TID.getOperandConstraint(OpIdx, TOI::TIED_TO) != -1))
          && "Index mode or tied_to operand expected");
-  MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::GPRRegClassID,
+  MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
                                                      decodeRn(insn))));
   ++OpIdx;
 
@@ -1194,7 +1214,7 @@ static bool DisassembleLdStFrm(MCInst &MI, unsigned Opcode, uint32_t insn,
     MI.addOperand(MCOperand::CreateImm(Offset));
   } else {
     // Disassemble the offset reg (Rm), shift type, and immediate shift length.
-    MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::GPRRegClassID,
+    MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
                                                        decodeRm(insn))));
     // Inst{6-5} encodes the shift opcode.
     ARM_AM::ShiftOpc ShOp = getShiftOpcForBits(slice(insn, 6, 5));
@@ -1212,13 +1232,13 @@ static bool DisassembleLdStFrm(MCInst &MI, unsigned Opcode, uint32_t insn,
 }
 
 static bool DisassembleLdFrm(MCInst &MI, unsigned Opcode, uint32_t insn,
-    unsigned short NumOps, unsigned &NumOpsAdded, BO) {
-  return DisassembleLdStFrm(MI, Opcode, insn, NumOps, NumOpsAdded, false);
+    unsigned short NumOps, unsigned &NumOpsAdded, BO B) {
+  return DisassembleLdStFrm(MI, Opcode, insn, NumOps, NumOpsAdded, false, B);
 }
 
 static bool DisassembleStFrm(MCInst &MI, unsigned Opcode, uint32_t insn,
-    unsigned short NumOps, unsigned &NumOpsAdded, BO) {
-  return DisassembleLdStFrm(MI, Opcode, insn, NumOps, NumOpsAdded, true);
+    unsigned short NumOps, unsigned &NumOpsAdded, BO B) {
+  return DisassembleLdStFrm(MI, Opcode, insn, NumOps, NumOpsAdded, true, B);
 }
 
 static bool HasDualReg(unsigned Opcode) {
@@ -1232,24 +1252,26 @@ static bool HasDualReg(unsigned Opcode) {
 }
 
 static bool DisassembleLdStMiscFrm(MCInst &MI, unsigned Opcode, uint32_t insn,
-    unsigned short NumOps, unsigned &NumOpsAdded, bool isStore) {
+    unsigned short NumOps, unsigned &NumOpsAdded, bool isStore, BO B) {
 
   const TargetInstrDesc &TID = ARMInsts[Opcode];
-  unsigned short NumDefs = TID.getNumDefs();
   bool isPrePost = isPrePostLdSt(TID.TSFlags);
   const TargetOperandInfo *OpInfo = TID.OpInfo;
+  if (!OpInfo) return false;
+
   unsigned &OpIdx = NumOpsAdded;
 
   OpIdx = 0;
 
-  assert(((!isStore && NumDefs > 0) || (isStore && (NumDefs == 0 || isPrePost)))
+  assert(((!isStore && TID.getNumDefs() > 0) ||
+          (isStore && (TID.getNumDefs() == 0 || isPrePost)))
          && "Invalid arguments");
 
   // Operand 0 of a pre- and post-indexed store is the address base writeback.
   if (isPrePost && isStore) {
     assert(OpInfo[OpIdx].RegClass == ARM::GPRRegClassID &&
            "Reg operand expected");
-    MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::GPRRegClassID,
+    MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
                                                        decodeRn(insn))));
     ++OpIdx;
   }
@@ -1262,13 +1284,13 @@ static bool DisassembleLdStMiscFrm(MCInst &MI, unsigned Opcode, uint32_t insn,
 
   assert(OpInfo[OpIdx].RegClass == ARM::GPRRegClassID &&
          "Reg operand expected");
-  MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::GPRRegClassID,
+  MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
                                                      decodeRd(insn))));
   ++OpIdx;
 
   // Fill in LDRD and STRD's second operand.
   if (DualReg) {
-    MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::GPRRegClassID,
+    MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
                                                        decodeRd(insn) + 1)));
     ++OpIdx;
   }
@@ -1277,7 +1299,7 @@ static bool DisassembleLdStMiscFrm(MCInst &MI, unsigned Opcode, uint32_t insn,
   if (isPrePost && !isStore) {
     assert(OpInfo[OpIdx].RegClass == ARM::GPRRegClassID &&
            "Reg operand expected");
-    MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::GPRRegClassID,
+    MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
                                                        decodeRn(insn))));
     ++OpIdx;
   }
@@ -1290,7 +1312,7 @@ static bool DisassembleLdStMiscFrm(MCInst &MI, unsigned Opcode, uint32_t insn,
          "Reg operand expected");
   assert((!isPrePost || (TID.getOperandConstraint(OpIdx, TOI::TIED_TO) != -1))
          && "Index mode or tied_to operand expected");
-  MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::GPRRegClassID,
+  MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
                                                      decodeRn(insn))));
   ++OpIdx;
 
@@ -1315,7 +1337,7 @@ static bool DisassembleLdStMiscFrm(MCInst &MI, unsigned Opcode, uint32_t insn,
     MI.addOperand(MCOperand::CreateImm(Offset));
   } else {
     // Disassemble the offset reg (Rm).
-    MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::GPRRegClassID,
+    MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
                                                        decodeRm(insn))));
     unsigned Offset = ARM_AM::getAM3Opc(AddrOpcode, 0);
     MI.addOperand(MCOperand::CreateImm(Offset));
@@ -1326,13 +1348,14 @@ static bool DisassembleLdStMiscFrm(MCInst &MI, unsigned Opcode, uint32_t insn,
 }
 
 static bool DisassembleLdMiscFrm(MCInst &MI, unsigned Opcode, uint32_t insn,
-    unsigned short NumOps, unsigned &NumOpsAdded, BO) {
-  return DisassembleLdStMiscFrm(MI, Opcode, insn, NumOps, NumOpsAdded, false);
+    unsigned short NumOps, unsigned &NumOpsAdded, BO B) {
+  return DisassembleLdStMiscFrm(MI, Opcode, insn, NumOps, NumOpsAdded, false,
+                                B);
 }
 
 static bool DisassembleStMiscFrm(MCInst &MI, unsigned Opcode, uint32_t insn,
-    unsigned short NumOps, unsigned &NumOpsAdded, BO) {
-  return DisassembleLdStMiscFrm(MI, Opcode, insn, NumOps, NumOpsAdded, true);
+    unsigned short NumOps, unsigned &NumOpsAdded, BO B) {
+  return DisassembleLdStMiscFrm(MI, Opcode, insn, NumOps, NumOpsAdded, true, B);
 }
 
 // The algorithm for disassembly of LdStMulFrm is different from others because
@@ -1340,7 +1363,7 @@ static bool DisassembleStMiscFrm(MCInst &MI, unsigned Opcode, uint32_t insn,
 // and operand 1 (the AM4 mode imm).  After operand 3, we need to populate the
 // reglist with each affected register encoded as an MCOperand.
 static bool DisassembleLdStMulFrm(MCInst &MI, unsigned Opcode, uint32_t insn,
-    unsigned short NumOps, unsigned &NumOpsAdded, BO) {
+    unsigned short NumOps, unsigned &NumOpsAdded, BO B) {
 
   assert(NumOps >= 5 && "LdStMulFrm expects NumOps >= 5");
 
@@ -1348,7 +1371,7 @@ static bool DisassembleLdStMulFrm(MCInst &MI, unsigned Opcode, uint32_t insn,
 
   OpIdx = 0;
 
-  unsigned Base = getRegisterEnum(ARM::GPRRegClassID, decodeRn(insn));
+  unsigned Base = getRegisterEnum(B, ARM::GPRRegClassID, decodeRn(insn));
 
   // Writeback to base, if necessary.
   if (Opcode == ARM::LDM_UPD || Opcode == ARM::STM_UPD) {
@@ -1372,7 +1395,7 @@ static bool DisassembleLdStMulFrm(MCInst &MI, unsigned Opcode, uint32_t insn,
   unsigned RegListBits = insn & ((1 << 16) - 1);
   for (unsigned i = 0; i < 16; ++i) {
     if ((RegListBits >> i) & 1) {
-      MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::GPRRegClassID,
+      MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
                                                          i)));
       ++OpIdx;
     }
@@ -1388,9 +1411,11 @@ static bool DisassembleLdStMulFrm(MCInst &MI, unsigned Opcode, uint32_t insn,
 //
 // SWP, SWPB:             Rd Rm Rn
 static bool DisassembleLdStExFrm(MCInst &MI, unsigned Opcode, uint32_t insn,
-    unsigned short NumOps, unsigned &NumOpsAdded, BO) {
+    unsigned short NumOps, unsigned &NumOpsAdded, BO B) {
 
   const TargetOperandInfo *OpInfo = ARMInsts[Opcode].OpInfo;
+  if (!OpInfo) return false;
+
   unsigned &OpIdx = NumOpsAdded;
 
   OpIdx = 0;
@@ -1404,29 +1429,29 @@ static bool DisassembleLdStExFrm(MCInst &MI, unsigned Opcode, uint32_t insn,
   bool isDW = (Opcode == ARM::LDREXD || Opcode == ARM::STREXD);
 
   // Add the destination operand.
-  MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::GPRRegClassID,
+  MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
                                                      decodeRd(insn))));
   ++OpIdx;
 
   // Store register Exclusive needs a source operand.
   if (isStore) {
-    MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::GPRRegClassID,
+    MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
                                                        decodeRm(insn))));
     ++OpIdx;
 
     if (isDW) {
-      MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::GPRRegClassID,
+      MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
                                                          decodeRm(insn)+1)));
       ++OpIdx;
     }
   } else if (isDW) {
-    MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::GPRRegClassID,
+    MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
                                                        decodeRd(insn)+1)));
     ++OpIdx;
   }
 
   // Finally add the pointer operand.
-  MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::GPRRegClassID,
+  MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
                                                      decodeRn(insn))));
   ++OpIdx;
 
@@ -1438,7 +1463,7 @@ static bool DisassembleLdStExFrm(MCInst &MI, unsigned Opcode, uint32_t insn,
 // PKHBT, PKHTB: Rd Rn Rm , LSL/ASR #imm5
 // RBIT, REV, REV16, REVSH: Rd Rm
 static bool DisassembleArithMiscFrm(MCInst &MI, unsigned Opcode, uint32_t insn,
-    unsigned short NumOps, unsigned &NumOpsAdded, BO) {
+    unsigned short NumOps, unsigned &NumOpsAdded, BO B) {
 
   const TargetOperandInfo *OpInfo = ARMInsts[Opcode].OpInfo;
   unsigned &OpIdx = NumOpsAdded;
@@ -1452,18 +1477,18 @@ static bool DisassembleArithMiscFrm(MCInst &MI, unsigned Opcode, uint32_t insn,
 
   bool ThreeReg = NumOps > 2 && OpInfo[2].RegClass == ARM::GPRRegClassID;
 
-  MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::GPRRegClassID,
+  MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
                                                      decodeRd(insn))));
   ++OpIdx;
 
   if (ThreeReg) {
     assert(NumOps >= 4 && "Expect >= 4 operands");
-    MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::GPRRegClassID,
+    MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
                                                        decodeRn(insn))));
     ++OpIdx;
   }
 
-  MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::GPRRegClassID,
+  MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
                                                      decodeRm(insn))));
   ++OpIdx;
 
@@ -1485,7 +1510,7 @@ static bool DisassembleArithMiscFrm(MCInst &MI, unsigned Opcode, uint32_t insn,
 // The 2nd operand register is Rn and the 3rd operand regsiter is Rm for the
 // three register operand form.  Otherwise, Rn=0b1111 and only Rm is used.
 static bool DisassembleExtFrm(MCInst &MI, unsigned Opcode, uint32_t insn,
-    unsigned short NumOps, unsigned &NumOpsAdded, BO) {
+    unsigned short NumOps, unsigned &NumOpsAdded, BO B) {
 
   const TargetOperandInfo *OpInfo = ARMInsts[Opcode].OpInfo;
   unsigned &OpIdx = NumOpsAdded;
@@ -1499,17 +1524,17 @@ static bool DisassembleExtFrm(MCInst &MI, unsigned Opcode, uint32_t insn,
 
   bool ThreeReg = NumOps > 2 && OpInfo[2].RegClass == ARM::GPRRegClassID;
 
-  MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::GPRRegClassID,
+  MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
                                                      decodeRd(insn))));
   ++OpIdx;
 
   if (ThreeReg) {
-    MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::GPRRegClassID,
+    MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
                                                        decodeRn(insn))));
     ++OpIdx;
   }
 
-  MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::GPRRegClassID,
+  MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
                                                      decodeRm(insn))));
   ++OpIdx;
 
@@ -1591,7 +1616,7 @@ static uint64_t VFPExpandImm(unsigned char byte, unsigned N) {
 // VCVTDS, VCVTSD: converts between double-precision and single-precision
 // The rest of the instructions have homogeneous [VFP]Rd and [VFP]Rm registers.
 static bool DisassembleVFPUnaryFrm(MCInst &MI, unsigned Opcode, uint32_t insn,
-    unsigned short NumOps, unsigned &NumOpsAdded, BO) {
+    unsigned short NumOps, unsigned &NumOpsAdded, BO B) {
 
   assert(NumOps >= 1 && "VFPUnaryFrm expects NumOps >= 1");
 
@@ -1606,7 +1631,7 @@ static bool DisassembleVFPUnaryFrm(MCInst &MI, unsigned Opcode, uint32_t insn,
   bool isSP = (RegClass == ARM::SPRRegClassID);
 
   MI.addOperand(MCOperand::CreateReg(
-                  getRegisterEnum(RegClass, decodeVFPRd(insn, isSP))));
+                  getRegisterEnum(B, RegClass, decodeVFPRd(insn, isSP))));
   ++OpIdx;
 
   // Early return for compare with zero instructions.
@@ -1620,7 +1645,7 @@ static bool DisassembleVFPUnaryFrm(MCInst &MI, unsigned Opcode, uint32_t insn,
   isSP = (RegClass == ARM::SPRRegClassID);
 
   MI.addOperand(MCOperand::CreateReg(
-                  getRegisterEnum(RegClass, decodeVFPRm(insn, isSP))));
+                  getRegisterEnum(B, RegClass, decodeVFPRm(insn, isSP))));
   ++OpIdx;
 
   return true;
@@ -1631,7 +1656,7 @@ static bool DisassembleVFPUnaryFrm(MCInst &MI, unsigned Opcode, uint32_t insn,
 // InOperandList to that of the dst.  As far as asm printing is concerned, this
 // tied_to operand is simply skipped.
 static bool DisassembleVFPBinaryFrm(MCInst &MI, unsigned Opcode, uint32_t insn,
-    unsigned short NumOps, unsigned &NumOpsAdded, BO) {
+    unsigned short NumOps, unsigned &NumOpsAdded, BO B) {
 
   assert(NumOps >= 3 && "VFPBinaryFrm expects NumOps >= 3");
 
@@ -1647,7 +1672,7 @@ static bool DisassembleVFPBinaryFrm(MCInst &MI, unsigned Opcode, uint32_t insn,
   bool isSP = (RegClass == ARM::SPRRegClassID);
 
   MI.addOperand(MCOperand::CreateReg(
-                  getRegisterEnum(RegClass, decodeVFPRd(insn, isSP))));
+                  getRegisterEnum(B, RegClass, decodeVFPRd(insn, isSP))));
   ++OpIdx;
 
   // Skip tied_to operand constraint.
@@ -1658,11 +1683,11 @@ static bool DisassembleVFPBinaryFrm(MCInst &MI, unsigned Opcode, uint32_t insn,
   }
 
   MI.addOperand(MCOperand::CreateReg(
-                  getRegisterEnum(RegClass, decodeVFPRn(insn, isSP))));
+                  getRegisterEnum(B, RegClass, decodeVFPRn(insn, isSP))));
   ++OpIdx;
 
   MI.addOperand(MCOperand::CreateReg(
-                  getRegisterEnum(RegClass, decodeVFPRm(insn, isSP))));
+                  getRegisterEnum(B, RegClass, decodeVFPRm(insn, isSP))));
   ++OpIdx;
 
   return true;
@@ -1675,12 +1700,13 @@ static bool DisassembleVFPBinaryFrm(MCInst &MI, unsigned Opcode, uint32_t insn,
 // A8.6.297 vcvt (floating-point and fixed-point)
 // Dd|Sd Dd|Sd(TIED_TO) #fbits(= 16|32 - UInt(imm4:i))
 static bool DisassembleVFPConv1Frm(MCInst &MI, unsigned Opcode, uint32_t insn,
-    unsigned short NumOps, unsigned &NumOpsAdded, BO) {
+    unsigned short NumOps, unsigned &NumOpsAdded, BO B) {
 
   assert(NumOps >= 2 && "VFPConv1Frm expects NumOps >= 2");
 
   const TargetInstrDesc &TID = ARMInsts[Opcode];
   const TargetOperandInfo *OpInfo = TID.OpInfo;
+  if (!OpInfo) return false;
 
   bool SP = slice(insn, 8, 8) == 0; // A8.6.295 & A8.6.297
   bool fixed_point = slice(insn, 17, 17) == 1; // A8.6.297
@@ -1692,7 +1718,7 @@ static bool DisassembleVFPConv1Frm(MCInst &MI, unsigned Opcode, uint32_t insn,
     int size = slice(insn, 7, 7) == 0 ? 16 : 32;
     int fbits = size - (slice(insn,3,0) << 1 | slice(insn,5,5));
     MI.addOperand(MCOperand::CreateReg(
-                    getRegisterEnum(RegClassID,
+                    getRegisterEnum(B, RegClassID,
                                     decodeVFPRd(insn, SP))));
 
     assert(TID.getOperandConstraint(1, TOI::TIED_TO) != -1 &&
@@ -1712,15 +1738,15 @@ static bool DisassembleVFPConv1Frm(MCInst &MI, unsigned Opcode, uint32_t insn,
     if (slice(insn, 18, 18) == 1) { // to_integer operation
       d = decodeVFPRd(insn, true /* Is Single Precision */);
       MI.addOperand(MCOperand::CreateReg(
-                      getRegisterEnum(ARM::SPRRegClassID, d)));
+                      getRegisterEnum(B, ARM::SPRRegClassID, d)));
       m = decodeVFPRm(insn, SP);
-      MI.addOperand(MCOperand::CreateReg(getRegisterEnum(RegClassID, m)));
+      MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, RegClassID, m)));
     } else {
       d = decodeVFPRd(insn, SP);
-      MI.addOperand(MCOperand::CreateReg(getRegisterEnum(RegClassID, d)));
+      MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, RegClassID, d)));
       m = decodeVFPRm(insn, true /* Is Single Precision */);
       MI.addOperand(MCOperand::CreateReg(
-                      getRegisterEnum(ARM::SPRRegClassID, m)));
+                      getRegisterEnum(B, ARM::SPRRegClassID, m)));
     }
     NumOpsAdded = 2;
   }
@@ -1731,13 +1757,13 @@ static bool DisassembleVFPConv1Frm(MCInst &MI, unsigned Opcode, uint32_t insn,
 // VMOVRS - A8.6.330
 // Rt => Rd; Sn => UInt(Vn:N)
 static bool DisassembleVFPConv2Frm(MCInst &MI, unsigned Opcode, uint32_t insn,
-    unsigned short NumOps, unsigned &NumOpsAdded, BO) {
+    unsigned short NumOps, unsigned &NumOpsAdded, BO B) {
 
   assert(NumOps >= 2 && "VFPConv2Frm expects NumOps >= 2");
 
-  MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::GPRRegClassID,
+  MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
                                                      decodeRd(insn))));
-  MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::SPRRegClassID,
+  MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::SPRRegClassID,
                                                      decodeVFPRn(insn, true))));
   NumOpsAdded = 2;
   return true;
@@ -1749,29 +1775,29 @@ static bool DisassembleVFPConv2Frm(MCInst &MI, unsigned Opcode, uint32_t insn,
 // VMOVRRS - A8.6.331
 // Rt => Rd; Rt2 => Rn; Sm => UInt(Vm:M); Sm1 = Sm+1
 static bool DisassembleVFPConv3Frm(MCInst &MI, unsigned Opcode, uint32_t insn,
-    unsigned short NumOps, unsigned &NumOpsAdded, BO) {
+    unsigned short NumOps, unsigned &NumOpsAdded, BO B) {
 
   assert(NumOps >= 3 && "VFPConv3Frm expects NumOps >= 3");
 
   const TargetOperandInfo *OpInfo = ARMInsts[Opcode].OpInfo;
   unsigned &OpIdx = NumOpsAdded;
 
-  MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::GPRRegClassID,
+  MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
                                                      decodeRd(insn))));
-  MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::GPRRegClassID,
+  MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
                                                      decodeRn(insn))));
   OpIdx = 2;
 
   if (OpInfo[OpIdx].RegClass == ARM::SPRRegClassID) {
     unsigned Sm = decodeVFPRm(insn, true);
-    MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::SPRRegClassID,
+    MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::SPRRegClassID,
                                                        Sm)));
-    MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::SPRRegClassID,
+    MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::SPRRegClassID,
                                                        Sm+1)));
     OpIdx += 2;
   } else {
     MI.addOperand(MCOperand::CreateReg(
-                    getRegisterEnum(ARM::DPRRegClassID,
+                    getRegisterEnum(B, ARM::DPRRegClassID,
                                     decodeVFPRm(insn, false))));
     ++OpIdx;
   }
@@ -1781,13 +1807,13 @@ static bool DisassembleVFPConv3Frm(MCInst &MI, unsigned Opcode, uint32_t insn,
 // VMOVSR - A8.6.330
 // Rt => Rd; Sn => UInt(Vn:N)
 static bool DisassembleVFPConv4Frm(MCInst &MI, unsigned Opcode, uint32_t insn,
-    unsigned short NumOps, unsigned &NumOpsAdded, BO) {
+    unsigned short NumOps, unsigned &NumOpsAdded, BO B) {
 
   assert(NumOps >= 2 && "VFPConv4Frm expects NumOps >= 2");
 
-  MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::SPRRegClassID,
+  MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::SPRRegClassID,
                                                      decodeVFPRn(insn, true))));
-  MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::GPRRegClassID,
+  MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
                                                      decodeRd(insn))));
   NumOpsAdded = 2;
   return true;
@@ -1799,7 +1825,7 @@ static bool DisassembleVFPConv4Frm(MCInst &MI, unsigned Opcode, uint32_t insn,
 // VMOVRRS - A8.6.331
 // Rt => Rd; Rt2 => Rn; Sm => UInt(Vm:M); Sm1 = Sm+1
 static bool DisassembleVFPConv5Frm(MCInst &MI, unsigned Opcode, uint32_t insn,
-    unsigned short NumOps, unsigned &NumOpsAdded, BO) {
+    unsigned short NumOps, unsigned &NumOpsAdded, BO B) {
 
   assert(NumOps >= 3 && "VFPConv5Frm expects NumOps >= 3");
 
@@ -1810,21 +1836,21 @@ static bool DisassembleVFPConv5Frm(MCInst &MI, unsigned Opcode, uint32_t insn,
 
   if (OpInfo[OpIdx].RegClass == ARM::SPRRegClassID) {
     unsigned Sm = decodeVFPRm(insn, true);
-    MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::SPRRegClassID,
+    MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::SPRRegClassID,
                                                        Sm)));
-    MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::SPRRegClassID,
+    MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::SPRRegClassID,
                                                        Sm+1)));
     OpIdx += 2;
   } else {
     MI.addOperand(MCOperand::CreateReg(
-                    getRegisterEnum(ARM::DPRRegClassID,
+                    getRegisterEnum(B, ARM::DPRRegClassID,
                                     decodeVFPRm(insn, false))));
     ++OpIdx;
   }
 
-  MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::GPRRegClassID,
+  MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
                                                      decodeRd(insn))));
-  MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::GPRRegClassID,
+  MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
                                                      decodeRn(insn))));
   OpIdx += 2;
   return true;
@@ -1833,7 +1859,7 @@ static bool DisassembleVFPConv5Frm(MCInst &MI, unsigned Opcode, uint32_t insn,
 // VFP Load/Store Instructions.
 // VLDRD, VLDRS, VSTRD, VSTRS
 static bool DisassembleVFPLdStFrm(MCInst &MI, unsigned Opcode, uint32_t insn,
-    unsigned short NumOps, unsigned &NumOpsAdded, BO) {
+    unsigned short NumOps, unsigned &NumOpsAdded, BO B) {
 
   assert(NumOps >= 3 && "VFPLdStFrm expects NumOps >= 3");
 
@@ -1843,9 +1869,9 @@ static bool DisassembleVFPLdStFrm(MCInst &MI, unsigned Opcode, uint32_t insn,
   // Extract Dd/Sd for operand 0.
   unsigned RegD = decodeVFPRd(insn, isSPVFP);
 
-  MI.addOperand(MCOperand::CreateReg(getRegisterEnum(RegClassID, RegD)));
+  MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, RegClassID, RegD)));
 
-  unsigned Base = getRegisterEnum(ARM::GPRRegClassID, decodeRn(insn));
+  unsigned Base = getRegisterEnum(B, ARM::GPRRegClassID, decodeRn(insn));
   MI.addOperand(MCOperand::CreateReg(Base));
 
   // Next comes the AM5 Opcode.
@@ -1865,7 +1891,7 @@ static bool DisassembleVFPLdStFrm(MCInst &MI, unsigned Opcode, uint32_t insn,
 //
 // VLDMD[_UPD], VLDMS[_UPD], VSTMD[_UPD], VSTMS[_UPD]
 static bool DisassembleVFPLdStMulFrm(MCInst &MI, unsigned Opcode, uint32_t insn,
-    unsigned short NumOps, unsigned &NumOpsAdded, BO) {
+    unsigned short NumOps, unsigned &NumOpsAdded, BO B) {
 
   assert(NumOps >= 5 && "VFPLdStMulFrm expects NumOps >= 5");
 
@@ -1873,7 +1899,7 @@ static bool DisassembleVFPLdStMulFrm(MCInst &MI, unsigned Opcode, uint32_t insn,
 
   OpIdx = 0;
 
-  unsigned Base = getRegisterEnum(ARM::GPRRegClassID, decodeRn(insn));
+  unsigned Base = getRegisterEnum(B, ARM::GPRRegClassID, decodeRn(insn));
 
   // Writeback to base, if necessary.
   if (Opcode == ARM::VLDMD_UPD || Opcode == ARM::VLDMS_UPD ||
@@ -1886,6 +1912,12 @@ static bool DisassembleVFPLdStMulFrm(MCInst &MI, unsigned Opcode, uint32_t insn,
 
   // Next comes the AM5 Opcode.
   ARM_AM::AMSubMode SubMode = getAMSubModeForBits(getPUBits(insn));
+  // Must be either "ia" or "db" submode.
+  if (SubMode != ARM_AM::ia && SubMode != ARM_AM::db) {
+    DEBUG(errs() << "Illegal addressing mode 5 sub-mode!\n");
+    return false;
+  }
+
   unsigned char Imm8 = insn & 0xFF;
   MI.addOperand(MCOperand::CreateImm(ARM_AM::getAM5Opc(SubMode, Imm8)));
 
@@ -1906,7 +1938,7 @@ static bool DisassembleVFPLdStMulFrm(MCInst &MI, unsigned Opcode, uint32_t insn,
   // Fill the variadic part of reglist.
   unsigned Regs = isSPVFP ? Imm8 : Imm8/2;
   for (unsigned i = 0; i < Regs; ++i) {
-    MI.addOperand(MCOperand::CreateReg(getRegisterEnum(RegClassID,
+    MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, RegClassID,
                                                        RegD + i)));
     ++OpIdx;
   }
@@ -1920,7 +1952,7 @@ static bool DisassembleVFPLdStMulFrm(MCInst &MI, unsigned Opcode, uint32_t insn,
 // FCONSTS (SPR and a VFPf32Imm operand)
 // VMRS/VMSR (GPR operand)
 static bool DisassembleVFPMiscFrm(MCInst &MI, unsigned Opcode, uint32_t insn,
-    unsigned short NumOps, unsigned &NumOpsAdded, BO) {
+    unsigned short NumOps, unsigned &NumOpsAdded, BO B) {
 
   const TargetOperandInfo *OpInfo = ARMInsts[Opcode].OpInfo;
   unsigned &OpIdx = NumOpsAdded;
@@ -1935,13 +1967,13 @@ static bool DisassembleVFPMiscFrm(MCInst &MI, unsigned Opcode, uint32_t insn,
   unsigned RegEnum = 0;
   switch (OpInfo[0].RegClass) {
   case ARM::DPRRegClassID:
-    RegEnum = getRegisterEnum(ARM::DPRRegClassID, decodeVFPRd(insn, false));
+    RegEnum = getRegisterEnum(B, ARM::DPRRegClassID, decodeVFPRd(insn, false));
     break;
   case ARM::SPRRegClassID:
-    RegEnum = getRegisterEnum(ARM::SPRRegClassID, decodeVFPRd(insn, true));
+    RegEnum = getRegisterEnum(B, ARM::SPRRegClassID, decodeVFPRd(insn, true));
     break;
   case ARM::GPRRegClassID:
-    RegEnum = getRegisterEnum(ARM::GPRRegClassID, decodeRd(insn));
+    RegEnum = getRegisterEnum(B, ARM::GPRRegClassID, decodeRd(insn));
     break;
   default:
     assert(0 && "Invalid reg class id");
@@ -1986,7 +2018,7 @@ static bool DisassembleVFPMiscFrm(MCInst &MI, unsigned Opcode, uint32_t insn,
 // D = Inst{22}, Vd = Inst{15-12}
 static unsigned decodeNEONRd(uint32_t insn) {
   return ((insn >> ARMII::NEON_D_BitShift) & 1) << 4
-    | (insn >> ARMII::NEON_RegRdShift) & ARMII::NEONRegMask;
+    | ((insn >> ARMII::NEON_RegRdShift) & ARMII::NEONRegMask);
 }
 
 // Extract/Decode NEON N/Vn:
@@ -1997,7 +2029,7 @@ static unsigned decodeNEONRd(uint32_t insn) {
 // N = Inst{7}, Vn = Inst{19-16}
 static unsigned decodeNEONRn(uint32_t insn) {
   return ((insn >> ARMII::NEON_N_BitShift) & 1) << 4
-    | (insn >> ARMII::NEON_RegRnShift) & ARMII::NEONRegMask;
+    | ((insn >> ARMII::NEON_RegRnShift) & ARMII::NEONRegMask);
 }
 
 // Extract/Decode NEON M/Vm:
@@ -2008,7 +2040,7 @@ static unsigned decodeNEONRn(uint32_t insn) {
 // M = Inst{5}, Vm = Inst{3-0}
 static unsigned decodeNEONRm(uint32_t insn) {
   return ((insn >> ARMII::NEON_M_BitShift) & 1) << 4
-    | (insn >> ARMII::NEON_RegRmShift) & ARMII::NEONRegMask;
+    | ((insn >> ARMII::NEON_RegRmShift) & ARMII::NEONRegMask);
 }
 
 namespace {
@@ -2072,7 +2104,7 @@ static uint64_t decodeN1VImm(uint32_t insn, ElemSize esize) {
   case ESize64: {
     for (unsigned i = 0; i < 8; ++i)
       if ((Imm8 >> i) & 1)
-        Imm64 |= 0xFF << 8*i;
+        Imm64 |= (uint64_t)0xFF << 8*i;
     break;
   }
   default:
@@ -2200,6 +2232,22 @@ static unsigned decodeN3VImm(uint32_t insn) {
   return (insn >> 8) & 0xF;
 }
 
+static bool UseDRegPair(unsigned Opcode) {
+  switch (Opcode) {
+  default:
+    return false;
+  case ARM::VLD1q8_UPD:
+  case ARM::VLD1q16_UPD:
+  case ARM::VLD1q32_UPD:
+  case ARM::VLD1q64_UPD:
+  case ARM::VST1q8_UPD:
+  case ARM::VST1q16_UPD:
+  case ARM::VST1q32_UPD:
+  case ARM::VST1q64_UPD:
+    return true;
+  }
+}
+
 // VLD*
 //   D[d] D[d2] ... Rn [TIED_TO Rn] align [Rm]
 // VLD*LN*
@@ -2211,7 +2259,8 @@ static unsigned decodeN3VImm(uint32_t insn) {
 //
 // Correctly set VLD*/VST*'s TIED_TO GPR, as the asm printer needs it.
 static bool DisassembleNLdSt0(MCInst &MI, unsigned Opcode, uint32_t insn,
-    unsigned short NumOps, unsigned &NumOpsAdded, bool Store, bool DblSpaced) {
+    unsigned short NumOps, unsigned &NumOpsAdded, bool Store, bool DblSpaced,
+    BO B) {
 
   const TargetInstrDesc &TID = ARMInsts[Opcode];
   const TargetOperandInfo *OpInfo = TID.OpInfo;
@@ -2239,7 +2288,7 @@ static bool DisassembleNLdSt0(MCInst &MI, unsigned Opcode, uint32_t insn,
   // LLVM Addressing Mode #6.
   unsigned RmEnum = 0;
   if (WB && Rm != 13)
-    RmEnum = getRegisterEnum(ARM::GPRRegClassID, Rm);
+    RmEnum = getRegisterEnum(B, ARM::GPRRegClassID, Rm);
 
   if (Store) {
     // Consume possible WB, AddrMode6, possible increment reg, the DPR/QPR's,
@@ -2248,14 +2297,14 @@ static bool DisassembleNLdSt0(MCInst &MI, unsigned Opcode, uint32_t insn,
            "Reg operand expected");
 
     if (WB) {
-      MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::GPRRegClassID,
+      MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
                                                          Rn)));
       ++OpIdx;
     }
 
     assert((OpIdx+1) < NumOps && OpInfo[OpIdx].RegClass == ARM::GPRRegClassID &&
            OpInfo[OpIdx + 1].RegClass == 0 && "Addrmode #6 Operands expected");
-    MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::GPRRegClassID,
+    MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
                                                        Rn)));
     MI.addOperand(MCOperand::CreateImm(0)); // Alignment ignored?
     OpIdx += 2;
@@ -2272,10 +2321,9 @@ static bool DisassembleNLdSt0(MCInst &MI, unsigned Opcode, uint32_t insn,
 
     RegClass = OpInfo[OpIdx].RegClass;
     while (OpIdx < NumOps && OpInfo[OpIdx].RegClass == RegClass) {
-      if (Opcode >= ARM::VST1q16 && Opcode <= ARM::VST1q8)
-        MI.addOperand(MCOperand::CreateReg(getRegisterEnum(RegClass,Rd,true)));
-      else
-        MI.addOperand(MCOperand::CreateReg(getRegisterEnum(RegClass,Rd)));
+      MI.addOperand(MCOperand::CreateReg(
+                      getRegisterEnum(B, RegClass, Rd,
+                                      UseDRegPair(Opcode))));
       Rd += Inc;
       ++OpIdx;
     }
@@ -2293,23 +2341,22 @@ static bool DisassembleNLdSt0(MCInst &MI, unsigned Opcode, uint32_t insn,
     RegClass = OpInfo[0].RegClass;
 
     while (OpIdx < NumOps && OpInfo[OpIdx].RegClass == RegClass) {
-      if (Opcode >= ARM::VLD1q16 && Opcode <= ARM::VLD1q8)
-        MI.addOperand(MCOperand::CreateReg(getRegisterEnum(RegClass,Rd,true)));
-      else
-        MI.addOperand(MCOperand::CreateReg(getRegisterEnum(RegClass,Rd)));
+      MI.addOperand(MCOperand::CreateReg(
+                      getRegisterEnum(B, RegClass, Rd,
+                                      UseDRegPair(Opcode))));
       Rd += Inc;
       ++OpIdx;
     }
 
     if (WB) {
-      MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::GPRRegClassID,
+      MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
                                                          Rn)));
       ++OpIdx;
     }
 
     assert((OpIdx+1) < NumOps && OpInfo[OpIdx].RegClass == ARM::GPRRegClassID &&
            OpInfo[OpIdx + 1].RegClass == 0 && "Addrmode #6 Operands expected");
-    MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::GPRRegClassID,
+    MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
                                                        Rn)));
     MI.addOperand(MCOperand::CreateImm(0)); // Alignment ignored?
     OpIdx += 2;
@@ -2342,7 +2389,7 @@ static bool DisassembleNLdSt0(MCInst &MI, unsigned Opcode, uint32_t insn,
 // Find out about double-spaced-ness of the Opcode and pass it on to
 // DisassembleNLdSt0().
 static bool DisassembleNLdSt(MCInst &MI, unsigned Opcode, uint32_t insn,
-    unsigned short NumOps, unsigned &NumOpsAdded, BO) {
+    unsigned short NumOps, unsigned &NumOpsAdded, BO B) {
 
   const StringRef Name = ARMInsts[Opcode].Name;
   bool DblSpaced = false;
@@ -2377,13 +2424,13 @@ static bool DisassembleNLdSt(MCInst &MI, unsigned Opcode, uint32_t insn,
     
   }
   return DisassembleNLdSt0(MI, Opcode, insn, NumOps, NumOpsAdded,
-                           slice(insn, 21, 21) == 0, DblSpaced);
+                           slice(insn, 21, 21) == 0, DblSpaced, B);
 }
 
 // VMOV (immediate)
 //   Qd/Dd imm
 static bool DisassembleN1RegModImmFrm(MCInst &MI, unsigned Opcode,
-    uint32_t insn, unsigned short NumOps, unsigned &NumOpsAdded, BO) {
+    uint32_t insn, unsigned short NumOps, unsigned &NumOpsAdded, BO B) {
 
   const TargetInstrDesc &TID = ARMInsts[Opcode];
   const TargetOperandInfo *OpInfo = TID.OpInfo;
@@ -2395,7 +2442,7 @@ static bool DisassembleN1RegModImmFrm(MCInst &MI, unsigned Opcode,
          "Expect 1 reg operand followed by 1 imm operand");
 
   // Qd/Dd = Inst{22:15-12} => NEON Rd
-  MI.addOperand(MCOperand::CreateReg(getRegisterEnum(OpInfo[0].RegClass,
+  MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, OpInfo[0].RegClass,
                                                      decodeNEONRd(insn))));
 
   ElemSize esize = ESizeNA;
@@ -2415,6 +2462,7 @@ static bool DisassembleN1RegModImmFrm(MCInst &MI, unsigned Opcode,
   case ARM::VMOVv1i64:
   case ARM::VMOVv2i64:
     esize = ESize64;
+    break;
   default:
     assert(0 && "Unreachable code!");
     return false;
@@ -2451,7 +2499,7 @@ enum N2VFlag {
 //
 // Others
 static bool DisassembleNVdVmOptImm(MCInst &MI, unsigned Opc, uint32_t insn,
-    unsigned short NumOps, unsigned &NumOpsAdded, N2VFlag Flag = N2V_None) {
+    unsigned short NumOps, unsigned &NumOpsAdded, N2VFlag Flag, BO B) {
 
   const TargetInstrDesc &TID = ARMInsts[Opc];
   const TargetOperandInfo *OpInfo = TID.OpInfo;
@@ -2478,7 +2526,7 @@ static bool DisassembleNVdVmOptImm(MCInst &MI, unsigned Opc, uint32_t insn,
   }
 
   // Qd/Dd = Inst{22:15-12} => NEON Rd
-  MI.addOperand(MCOperand::CreateReg(getRegisterEnum(OpInfo[OpIdx].RegClass,
+  MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, OpInfo[OpIdx].RegClass,
                                                      decodeNEONRd(insn))));
   ++OpIdx;
 
@@ -2490,7 +2538,7 @@ static bool DisassembleNVdVmOptImm(MCInst &MI, unsigned Opc, uint32_t insn,
   }
 
   // Dm = Inst{5:3-0} => NEON Rm
-  MI.addOperand(MCOperand::CreateReg(getRegisterEnum(OpInfo[OpIdx].RegClass,
+  MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, OpInfo[OpIdx].RegClass,
                                                      decodeNEONRm(insn))));
   ++OpIdx;
 
@@ -2523,21 +2571,22 @@ static bool DisassembleNVdVmOptImm(MCInst &MI, unsigned Opc, uint32_t insn,
 }
 
 static bool DisassembleN2RegFrm(MCInst &MI, unsigned Opc, uint32_t insn,
-    unsigned short NumOps, unsigned &NumOpsAdded, BO) {
+    unsigned short NumOps, unsigned &NumOpsAdded, BO B) {
 
-  return DisassembleNVdVmOptImm(MI, Opc, insn, NumOps, NumOpsAdded);
+  return DisassembleNVdVmOptImm(MI, Opc, insn, NumOps, NumOpsAdded,
+                                N2V_None, B);
 }
 static bool DisassembleNVCVTFrm(MCInst &MI, unsigned Opc, uint32_t insn,
-    unsigned short NumOps, unsigned &NumOpsAdded, BO) {
+    unsigned short NumOps, unsigned &NumOpsAdded, BO B) {
 
   return DisassembleNVdVmOptImm(MI, Opc, insn, NumOps, NumOpsAdded,
-                                N2V_VectorConvert_Between_Float_Fixed);
+                                N2V_VectorConvert_Between_Float_Fixed, B);
 }
 static bool DisassembleNVecDupLnFrm(MCInst &MI, unsigned Opc, uint32_t insn,
-    unsigned short NumOps, unsigned &NumOpsAdded, BO) {
+    unsigned short NumOps, unsigned &NumOpsAdded, BO B) {
 
   return DisassembleNVdVmOptImm(MI, Opc, insn, NumOps, NumOpsAdded,
-                                N2V_VectorDupLane);
+                                N2V_VectorDupLane, B);
 }
 
 // Vector Shift [Accumulate] Instructions.
@@ -2547,7 +2596,7 @@ static bool DisassembleNVecDupLnFrm(MCInst &MI, unsigned Opc, uint32_t insn,
 // VSHLLi16, VSHLLi32, VSHLLi8: Qd Dm imm (== size)
 //
 static bool DisassembleNVectorShift(MCInst &MI, unsigned Opcode, uint32_t insn,
-    unsigned short NumOps, unsigned &NumOpsAdded, bool LeftShift) {
+    unsigned short NumOps, unsigned &NumOpsAdded, bool LeftShift, BO B) {
 
   const TargetInstrDesc &TID = ARMInsts[Opcode];
   const TargetOperandInfo *OpInfo = TID.OpInfo;
@@ -2564,7 +2613,7 @@ static bool DisassembleNVectorShift(MCInst &MI, unsigned Opcode, uint32_t insn,
   OpIdx = 0;
 
   // Qd/Dd = Inst{22:15-12} => NEON Rd
-  MI.addOperand(MCOperand::CreateReg(getRegisterEnum(OpInfo[OpIdx].RegClass,
+  MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, OpInfo[OpIdx].RegClass,
                                                      decodeNEONRd(insn))));
   ++OpIdx;
 
@@ -2579,7 +2628,7 @@ static bool DisassembleNVectorShift(MCInst &MI, unsigned Opcode, uint32_t insn,
          "Reg operand expected");
 
   // Qm/Dm = Inst{5:3-0} => NEON Rm
-  MI.addOperand(MCOperand::CreateReg(getRegisterEnum(OpInfo[OpIdx].RegClass,
+  MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, OpInfo[OpIdx].RegClass,
                                                      decodeNEONRm(insn))));
   ++OpIdx;
 
@@ -2611,15 +2660,17 @@ static bool DisassembleNVectorShift(MCInst &MI, unsigned Opcode, uint32_t insn,
 
 // Left shift instructions.
 static bool DisassembleN2RegVecShLFrm(MCInst &MI, unsigned Opcode,
-    uint32_t insn, unsigned short NumOps, unsigned &NumOpsAdded, BO) {
+    uint32_t insn, unsigned short NumOps, unsigned &NumOpsAdded, BO B) {
 
-  return DisassembleNVectorShift(MI, Opcode, insn, NumOps, NumOpsAdded, true);
+  return DisassembleNVectorShift(MI, Opcode, insn, NumOps, NumOpsAdded, true,
+                                 B);
 }
 // Right shift instructions have different shift amount interpretation.
 static bool DisassembleN2RegVecShRFrm(MCInst &MI, unsigned Opcode,
-    uint32_t insn, unsigned short NumOps, unsigned &NumOpsAdded, BO) {
+    uint32_t insn, unsigned short NumOps, unsigned &NumOpsAdded, BO B) {
 
-  return DisassembleNVectorShift(MI, Opcode, insn, NumOps, NumOpsAdded, false);
+  return DisassembleNVectorShift(MI, Opcode, insn, NumOps, NumOpsAdded, false,
+                                 B);
 }
 
 namespace {
@@ -2644,7 +2695,7 @@ enum N3VFlag {
 //
 // Others
 static bool DisassembleNVdVnVmOptImm(MCInst &MI, unsigned Opcode, uint32_t insn,
-    unsigned short NumOps, unsigned &NumOpsAdded, N3VFlag Flag = N3V_None) {
+    unsigned short NumOps, unsigned &NumOpsAdded, N3VFlag Flag, BO B) {
 
   const TargetInstrDesc &TID = ARMInsts[Opcode];
   const TargetOperandInfo *OpInfo = TID.OpInfo;
@@ -2673,7 +2724,7 @@ static bool DisassembleNVdVnVmOptImm(MCInst &MI, unsigned Opcode, uint32_t insn,
   }
 
   // Qd/Dd = Inst{22:15-12} => NEON Rd
-  MI.addOperand(MCOperand::CreateReg(getRegisterEnum(OpInfo[OpIdx].RegClass,
+  MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, OpInfo[OpIdx].RegClass,
                                                      decodeNEONRd(insn))));
   ++OpIdx;
 
@@ -2688,7 +2739,7 @@ static bool DisassembleNVdVnVmOptImm(MCInst &MI, unsigned Opcode, uint32_t insn,
   // or
   // Dm = Inst{5:3-0} => NEON Rm
   MI.addOperand(MCOperand::CreateReg(
-                  getRegisterEnum(OpInfo[OpIdx].RegClass,
+                  getRegisterEnum(B, OpInfo[OpIdx].RegClass,
                                   VdVnVm ? decodeNEONRn(insn)
                                          : decodeNEONRm(insn))));
   ++OpIdx;
@@ -2708,7 +2759,7 @@ static bool DisassembleNVdVnVmOptImm(MCInst &MI, unsigned Opcode, uint32_t insn,
                       : decodeNEONRn(insn);
 
   MI.addOperand(MCOperand::CreateReg(
-                  getRegisterEnum(OpInfo[OpIdx].RegClass, m)));
+                  getRegisterEnum(B, OpInfo[OpIdx].RegClass, m)));
   ++OpIdx;
 
   if (OpIdx < NumOps && OpInfo[OpIdx].RegClass == 0
@@ -2732,27 +2783,28 @@ static bool DisassembleNVdVnVmOptImm(MCInst &MI, unsigned Opcode, uint32_t insn,
 }
 
 static bool DisassembleN3RegFrm(MCInst &MI, unsigned Opcode, uint32_t insn,
-    unsigned short NumOps, unsigned &NumOpsAdded, BO) {
+    unsigned short NumOps, unsigned &NumOpsAdded, BO B) {
 
-  return DisassembleNVdVnVmOptImm(MI, Opcode, insn, NumOps, NumOpsAdded);
+  return DisassembleNVdVnVmOptImm(MI, Opcode, insn, NumOps, NumOpsAdded,
+                                  N3V_None, B);
 }
 static bool DisassembleN3RegVecShFrm(MCInst &MI, unsigned Opcode,
-    uint32_t insn, unsigned short NumOps, unsigned &NumOpsAdded, BO) {
+    uint32_t insn, unsigned short NumOps, unsigned &NumOpsAdded, BO B) {
 
   return DisassembleNVdVnVmOptImm(MI, Opcode, insn, NumOps, NumOpsAdded,
-                                  N3V_VectorShift);
+                                  N3V_VectorShift, B);
 }
 static bool DisassembleNVecExtractFrm(MCInst &MI, unsigned Opcode, uint32_t insn,
-    unsigned short NumOps, unsigned &NumOpsAdded, BO) {
+    unsigned short NumOps, unsigned &NumOpsAdded, BO B) {
 
   return DisassembleNVdVnVmOptImm(MI, Opcode, insn, NumOps, NumOpsAdded,
-                                  N3V_VectorExtract);
+                                  N3V_VectorExtract, B);
 }
 static bool DisassembleNVecMulScalarFrm(MCInst &MI, unsigned Opcode,
-    uint32_t insn, unsigned short NumOps, unsigned &NumOpsAdded, BO) {
+    uint32_t insn, unsigned short NumOps, unsigned &NumOpsAdded, BO B) {
 
   return DisassembleNVdVnVmOptImm(MI, Opcode, insn, NumOps, NumOpsAdded,
-                                  N3V_Multiply_By_Scalar);
+                                  N3V_Multiply_By_Scalar, B);
 }
 
 // Vector Table Lookup
@@ -2762,10 +2814,11 @@ static bool DisassembleNVecMulScalarFrm(MCInst &MI, unsigned Opcode,
 // VTBL3, VTBX3: Dd [Dd(TIED_TO)] Dn Dn+1 Dn+2 Dm
 // VTBL4, VTBX4: Dd [Dd(TIED_TO)] Dn Dn+1 Dn+2 Dn+3 Dm
 static bool DisassembleNVTBLFrm(MCInst &MI, unsigned Opcode, uint32_t insn,
-    unsigned short NumOps, unsigned &NumOpsAdded, BO) {
+    unsigned short NumOps, unsigned &NumOpsAdded, BO B) {
 
   const TargetInstrDesc &TID = ARMInsts[Opcode];
   const TargetOperandInfo *OpInfo = TID.OpInfo;
+  if (!OpInfo) return false;
 
   assert(NumOps >= 3 &&
          OpInfo[0].RegClass == ARM::DPRRegClassID &&
@@ -2786,7 +2839,7 @@ static bool DisassembleNVTBLFrm(MCInst &MI, unsigned Opcode, uint32_t insn,
   unsigned Len = slice(insn, 9, 8) + 1;
 
   // Dd (the destination vector)
-  MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::DPRRegClassID,
+  MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::DPRRegClassID,
                                                      decodeNEONRd(insn))));
   ++OpIdx;
 
@@ -2801,7 +2854,7 @@ static bool DisassembleNVTBLFrm(MCInst &MI, unsigned Opcode, uint32_t insn,
   for (unsigned i = 0; i < Len; ++i) {
     assert(OpIdx < NumOps && OpInfo[OpIdx].RegClass == ARM::DPRRegClassID &&
            "Reg operand expected");
-    MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::DPRRegClassID,
+    MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::DPRRegClassID,
                                                        Rn + i)));
     ++OpIdx;
   }
@@ -2809,7 +2862,7 @@ static bool DisassembleNVTBLFrm(MCInst &MI, unsigned Opcode, uint32_t insn,
   // Dm (the index vector)
   assert(OpIdx < NumOps && OpInfo[OpIdx].RegClass == ARM::DPRRegClassID &&
          "Reg operand (index vector) expected");
-  MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::DPRRegClassID,
+  MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::DPRRegClassID,
                                                      decodeNEONRm(insn))));
   ++OpIdx;
 
@@ -2825,13 +2878,13 @@ static bool DisassembleNEONFrm(MCInst &MI, unsigned Opcode, uint32_t insn,
 // Vector Get Lane (move scalar to ARM core register) Instructions.
 // VGETLNi32, VGETLNs16, VGETLNs8, VGETLNu16, VGETLNu8: Rt Dn index
 static bool DisassembleNEONGetLnFrm(MCInst &MI, unsigned Opcode, uint32_t insn,
-    unsigned short NumOps, unsigned &NumOpsAdded, BO) {
+    unsigned short NumOps, unsigned &NumOpsAdded, BO B) {
 
   const TargetInstrDesc &TID = ARMInsts[Opcode];
-  unsigned short NumDefs = TID.getNumDefs();
   const TargetOperandInfo *OpInfo = TID.OpInfo;
+  if (!OpInfo) return false;
 
-  assert(NumDefs == 1 && NumOps >= 3 &&
+  assert(TID.getNumDefs() == 1 && NumOps >= 3 &&
          OpInfo[0].RegClass == ARM::GPRRegClassID &&
          OpInfo[1].RegClass == ARM::DPRRegClassID &&
          OpInfo[2].RegClass == 0 &&
@@ -2843,11 +2896,11 @@ static bool DisassembleNEONGetLnFrm(MCInst &MI, unsigned Opcode, uint32_t insn,
                                                                 : ESize32);
 
   // Rt = Inst{15-12} => ARM Rd
-  MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::GPRRegClassID,
+  MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
                                                      decodeRd(insn))));
 
   // Dn = Inst{7:19-16} => NEON Rn
-  MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::DPRRegClassID,
+  MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::DPRRegClassID,
                                                      decodeNEONRn(insn))));
 
   MI.addOperand(MCOperand::CreateImm(decodeNVLaneOpIndex(insn, esize)));
@@ -2859,13 +2912,13 @@ static bool DisassembleNEONGetLnFrm(MCInst &MI, unsigned Opcode, uint32_t insn,
 // Vector Set Lane (move ARM core register to scalar) Instructions.
 // VSETLNi16, VSETLNi32, VSETLNi8: Dd Dd (TIED_TO) Rt index
 static bool DisassembleNEONSetLnFrm(MCInst &MI, unsigned Opcode, uint32_t insn,
-    unsigned short NumOps, unsigned &NumOpsAdded, BO) {
+    unsigned short NumOps, unsigned &NumOpsAdded, BO B) {
 
   const TargetInstrDesc &TID = ARMInsts[Opcode];
-  unsigned short NumDefs = TID.getNumDefs();
   const TargetOperandInfo *OpInfo = TID.OpInfo;
+  if (!OpInfo) return false;
 
-  assert(NumDefs == 1 && NumOps >= 3 &&
+  assert(TID.getNumDefs() == 1 && NumOps >= 3 &&
          OpInfo[0].RegClass == ARM::DPRRegClassID &&
          OpInfo[1].RegClass == ARM::DPRRegClassID &&
          TID.getOperandConstraint(1, TOI::TIED_TO) != -1 &&
@@ -2879,14 +2932,14 @@ static bool DisassembleNEONSetLnFrm(MCInst &MI, unsigned Opcode, uint32_t insn,
                                                         : ESize32);
 
   // Dd = Inst{7:19-16} => NEON Rn
-  MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::DPRRegClassID,
+  MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::DPRRegClassID,
                                                      decodeNEONRn(insn))));
 
   // TIED_TO operand.
   MI.addOperand(MCOperand::CreateReg(0));
 
   // Rt = Inst{15-12} => ARM Rd
-  MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::GPRRegClassID,
+  MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
                                                      decodeRd(insn))));
 
   MI.addOperand(MCOperand::CreateImm(decodeNVLaneOpIndex(insn, esize)));
@@ -2898,7 +2951,7 @@ static bool DisassembleNEONSetLnFrm(MCInst &MI, unsigned Opcode, uint32_t insn,
 // Vector Duplicate Instructions (from ARM core register to all elements).
 // VDUP8d, VDUP16d, VDUP32d, VDUP8q, VDUP16q, VDUP32q: Qd/Dd Rt
 static bool DisassembleNEONDupFrm(MCInst &MI, unsigned Opcode, uint32_t insn,
-    unsigned short NumOps, unsigned &NumOpsAdded, BO) {
+    unsigned short NumOps, unsigned &NumOpsAdded, BO B) {
 
   const TargetOperandInfo *OpInfo = ARMInsts[Opcode].OpInfo;
 
@@ -2911,11 +2964,11 @@ static bool DisassembleNEONDupFrm(MCInst &MI, unsigned Opcode, uint32_t insn,
   unsigned RegClass = OpInfo[0].RegClass;
 
   // Qd/Dd = Inst{7:19-16} => NEON Rn
-  MI.addOperand(MCOperand::CreateReg(getRegisterEnum(RegClass,
+  MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, RegClass,
                                                      decodeNEONRn(insn))));
 
   // Rt = Inst{15-12} => ARM Rd
-  MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::GPRRegClassID,
+  MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
                                                      decodeRd(insn))));
 
   NumOpsAdded = 2;
@@ -2945,13 +2998,13 @@ static inline bool PreLoadOpcode(unsigned Opcode) {
 }
 
 static bool DisassemblePreLoadFrm(MCInst &MI, unsigned Opcode, uint32_t insn,
-    unsigned short NumOps, unsigned &NumOpsAdded) {
+    unsigned short NumOps, unsigned &NumOpsAdded, BO B) {
 
   // Preload Data/Instruction requires either 2 or 4 operands.
   // PLDi, PLDWi, PLIi:                Rn [+/-]imm12 add = (U == '1')
   // PLDr[a|m], PLDWr[a|m], PLIr[a|m]: Rn Rm addrmode2_opc
 
-  MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::GPRRegClassID,
+  MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
                                                      decodeRn(insn))));
 
   if (Opcode == ARM::PLDi || Opcode == ARM::PLDWi || Opcode == ARM::PLIi) {
@@ -2961,7 +3014,7 @@ static bool DisassemblePreLoadFrm(MCInst &MI, unsigned Opcode, uint32_t insn,
     MI.addOperand(MCOperand::CreateImm(Offset));
     NumOpsAdded = 2;
   } else {
-    MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::GPRRegClassID,
+    MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
                                                        decodeRm(insn))));
 
     ARM_AM::AddrOpc AddrOpcode = getUBit(insn) ? ARM_AM::add : ARM_AM::sub;
@@ -2982,7 +3035,7 @@ static bool DisassemblePreLoadFrm(MCInst &MI, unsigned Opcode, uint32_t insn,
 }
 
 static bool DisassembleMiscFrm(MCInst &MI, unsigned Opcode, uint32_t insn,
-    unsigned short NumOps, unsigned &NumOpsAdded, BO) {
+    unsigned short NumOps, unsigned &NumOpsAdded, BO B) {
 
   if (MemBarrierInstr(insn))
     return true;
@@ -3031,7 +3084,7 @@ static bool DisassembleMiscFrm(MCInst &MI, unsigned Opcode, uint32_t insn,
   }
 
   if (PreLoadOpcode(Opcode))
-    return DisassemblePreLoadFrm(MI, Opcode, insn, NumOps, NumOpsAdded);
+    return DisassemblePreLoadFrm(MI, Opcode, insn, NumOps, NumOpsAdded, B);
 
   assert(0 && "Unexpected misc instruction!");
   return false;
@@ -3147,7 +3200,7 @@ bool ARMBasicMCBuilder::BuildIt(MCInst &MI, uint32_t insn) {
   unsigned NumOpsAdded = 0;
   bool OK = (*Disasm)(MI, Opcode, insn, NumOps, NumOpsAdded, this);
 
-  if (!OK) return false;
+  if (!OK || this->Err != 0) return false;
   if (NumOpsAdded >= NumOps)
     return true;
 
@@ -3156,6 +3209,49 @@ bool ARMBasicMCBuilder::BuildIt(MCInst &MI, uint32_t insn) {
   return TryPredicateAndSBitModifier(MI, Opcode, insn, NumOps - NumOpsAdded);
 }
 
+// A8.3 Conditional execution
+// A8.3.1 Pseudocode details of conditional execution
+// Condition bits '111x' indicate the instruction is always executed.
+static uint32_t CondCode(uint32_t CondField) {
+  if (CondField == 0xF)
+    return ARMCC::AL;
+  return CondField;
+}
+
+/// DoPredicateOperands - DoPredicateOperands process the predicate operands
+/// of some Thumb instructions which come before the reglist operands.  It
+/// returns true if the two predicate operands have been processed.
+bool ARMBasicMCBuilder::DoPredicateOperands(MCInst& MI, unsigned Opcode,
+    uint32_t /* insn */, unsigned short NumOpsRemaining) {
+
+  assert(NumOpsRemaining > 0 && "Invalid argument");
+
+  const TargetOperandInfo *OpInfo = ARMInsts[Opcode].OpInfo;
+  unsigned Idx = MI.getNumOperands();
+
+  // First, we check whether this instr specifies the PredicateOperand through
+  // a pair of TargetOperandInfos with isPredicate() property.
+  if (NumOpsRemaining >= 2 &&
+      OpInfo[Idx].isPredicate() && OpInfo[Idx+1].isPredicate() &&
+      OpInfo[Idx].RegClass == 0 && OpInfo[Idx+1].RegClass == ARM::CCRRegClassID)
+  {
+    // If we are inside an IT block, get the IT condition bits maintained via
+    // ARMBasicMCBuilder::ITState[7:0], through ARMBasicMCBuilder::GetITCond().
+    // See also A2.5.2.
+    if (InITBlock())
+      MI.addOperand(MCOperand::CreateImm(GetITCond()));
+    else
+      MI.addOperand(MCOperand::CreateImm(ARMCC::AL));
+    MI.addOperand(MCOperand::CreateReg(ARM::CPSR));
+    return true;
+  }
+
+  return false;
+}
+  
+/// TryPredicateAndSBitModifier - TryPredicateAndSBitModifier tries to process
+/// the possible Predicate and SBitModifier, to build the remaining MCOperand
+/// constituents.
 bool ARMBasicMCBuilder::TryPredicateAndSBitModifier(MCInst& MI, unsigned Opcode,
     uint32_t insn, unsigned short NumOpsRemaining) {
 
@@ -3183,27 +3279,24 @@ bool ARMBasicMCBuilder::TryPredicateAndSBitModifier(MCInst& MI, unsigned Opcode,
         //
         // A8.6.16 B
         if (Name == "t2Bcc")
-          MI.addOperand(MCOperand::CreateImm(slice(insn, 25, 22)));
+          MI.addOperand(MCOperand::CreateImm(CondCode(slice(insn, 25, 22))));
         else if (Name == "tBcc")
-          MI.addOperand(MCOperand::CreateImm(slice(insn, 11, 8)));
+          MI.addOperand(MCOperand::CreateImm(CondCode(slice(insn, 11, 8))));
         else
           MI.addOperand(MCOperand::CreateImm(ARMCC::AL));
       } else {
-        // ARM Instructions.  Check condition field.
-        int64_t CondVal = getCondField(insn);
-        if (CondVal == 0xF)
-          MI.addOperand(MCOperand::CreateImm(ARMCC::AL));
-        else
-          MI.addOperand(MCOperand::CreateImm(CondVal));
+        // ARM instructions get their condition field from Inst{31-28}.
+        MI.addOperand(MCOperand::CreateImm(CondCode(getCondField(insn))));
       }
     }
     MI.addOperand(MCOperand::CreateReg(ARM::CPSR));
     Idx += 2;
     NumOpsRemaining -= 2;
-    if (NumOpsRemaining == 0)
-      return true;
   }
 
+  if (NumOpsRemaining == 0)
+    return true;
+
   // Next, if OptionalDefOperand exists, we check whether the 'S' bit is set.
   if (OpInfo[Idx].isOptionalDef() && OpInfo[Idx].RegClass==ARM::CCRRegClassID) {
     MI.addOperand(MCOperand::CreateReg(getSBit(insn) == 1 ? ARM::CPSR : 0));
@@ -3224,7 +3317,7 @@ bool ARMBasicMCBuilder::RunBuildAfterHook(bool Status, MCInst &MI,
   if (!SP) return Status;
 
   if (Opcode == ARM::t2IT)
-    SP->InitIT(slice(insn, 7, 0));
+    Status = SP->InitIT(slice(insn, 7, 0)) ? Status : false;
   else if (InITBlock())
     SP->UpdateIT();
 
@@ -3234,7 +3327,7 @@ bool ARMBasicMCBuilder::RunBuildAfterHook(bool Status, MCInst &MI,
 /// Opcode, Format, and NumOperands make up an ARM Basic MCBuilder.
 ARMBasicMCBuilder::ARMBasicMCBuilder(unsigned opc, ARMFormat format,
                                      unsigned short num)
-  : Opcode(opc), Format(format), NumOps(num), SP(0) {
+  : Opcode(opc), Format(format), NumOps(num), SP(0), Err(0) {
   unsigned Idx = (unsigned)format;
   assert(Idx < (array_lengthof(FuncPtrs) - 1) && "Unknown format");
   Disasm = FuncPtrs[Idx];
@@ -3246,6 +3339,11 @@ ARMBasicMCBuilder::ARMBasicMCBuilder(unsigned opc, ARMFormat format,
 /// are responsible for freeing up of the allocated memory.  Cacheing can be
 /// performed by the API clients to improve performance.
 ARMBasicMCBuilder *llvm::CreateMCBuilder(unsigned Opcode, ARMFormat Format) {
+  // For "Unknown format", fail by returning a NULL pointer.
+  if ((unsigned)Format >= (array_lengthof(FuncPtrs) - 1)) {
+    DEBUG(errs() << "Unknown format\n");
+    return 0;
+  }
 
   return new ARMBasicMCBuilder(Opcode, Format,
                                ARMInsts[Opcode].getNumOperands());
diff --git a/lib/Target/ARM/Disassembler/ARMDisassemblerCore.h b/lib/Target/ARM/Disassembler/ARMDisassemblerCore.h
index 307523037089..b1d90df34177 100644
--- a/lib/Target/ARM/Disassembler/ARMDisassemblerCore.h
+++ b/lib/Target/ARM/Disassembler/ARMDisassemblerCore.h
@@ -171,30 +171,51 @@ typedef ARMBasicMCBuilder *BO;
 typedef bool (*DisassembleFP)(MCInst &MI, unsigned Opcode, uint32_t insn,
     unsigned short NumOps, unsigned &NumOpsAdded, BO Builder);
 
+/// CreateMCBuilder - Return an ARMBasicMCBuilder that can build up the MC
+/// infrastructure of an MCInst given the Opcode and Format of the instr.
+/// Return NULL if it fails to create/return a proper builder.  API clients
+/// are responsible for freeing up of the allocated memory.  Cacheing can be
+/// performed by the API clients to improve performance.
+extern ARMBasicMCBuilder *CreateMCBuilder(unsigned Opcode, ARMFormat Format);
+
 /// ARMBasicMCBuilder - ARMBasicMCBuilder represents an ARM MCInst builder that
 /// knows how to build up the MCOperand list.
 class ARMBasicMCBuilder {
+  friend ARMBasicMCBuilder *CreateMCBuilder(unsigned Opcode, ARMFormat Format);
   unsigned Opcode;
   ARMFormat Format;
   unsigned short NumOps;
   DisassembleFP Disasm;
   Session *SP;
+  int Err; // !=0 if the builder encounters some error condition during build.
+
+private:
+  /// Opcode, Format, and NumOperands make up an ARM Basic MCBuilder.
+  ARMBasicMCBuilder(unsigned opc, ARMFormat format, unsigned short num);
 
 public:
   ARMBasicMCBuilder(ARMBasicMCBuilder &B)
     : Opcode(B.Opcode), Format(B.Format), NumOps(B.NumOps), Disasm(B.Disasm),
-      SP(B.SP)
-  {}
-
-  /// Opcode, Format, and NumOperands make up an ARM Basic MCBuilder.
-  ARMBasicMCBuilder(unsigned opc, ARMFormat format, unsigned short num);
+      SP(B.SP) {
+    Err = 0;
+  }
 
   virtual ~ARMBasicMCBuilder() {}
 
-  void setSession(Session *sp) {
+  void SetSession(Session *sp) {
     SP = sp;
   }
 
+  void SetErr(int ErrCode) {
+    Err = ErrCode;
+  }
+
+  /// DoPredicateOperands - DoPredicateOperands process the predicate operands
+  /// of some Thumb instructions which come before the reglist operands.  It
+  /// returns true if the two predicate operands have been processed.
+  bool DoPredicateOperands(MCInst& MI, unsigned Opcode,
+      uint32_t insn, unsigned short NumOpsRemaning);
+  
   /// TryPredicateAndSBitModifier - TryPredicateAndSBitModifier tries to process
   /// the possible Predicate and SBitModifier, to build the remaining MCOperand
   /// constituents.
@@ -236,13 +257,6 @@ private:
   }
 };
 
-/// CreateMCBuilder - Return an ARMBasicMCBuilder that can build up the MC
-/// infrastructure of an MCInst given the Opcode and Format of the instr.
-/// Return NULL if it fails to create/return a proper builder.  API clients
-/// are responsible for freeing up of the allocated memory.  Cacheing can be
-/// performed by the API clients to improve performance.
-extern ARMBasicMCBuilder *CreateMCBuilder(unsigned Opcode, ARMFormat Format);
-
 } // namespace llvm
 
 #endif
diff --git a/lib/Target/ARM/Disassembler/Makefile b/lib/Target/ARM/Disassembler/Makefile
new file mode 100644
index 000000000000..031b6aca5a48
--- /dev/null
+++ b/lib/Target/ARM/Disassembler/Makefile
@@ -0,0 +1,16 @@
+##===- lib/Target/ARM/Disassembler/Makefile ----------------*- Makefile -*-===##
+#
+#                     The LLVM Compiler Infrastructure
+#
+# This file is distributed under the University of Illinois Open Source
+# License. See LICENSE.TXT for details.
+#
+##===----------------------------------------------------------------------===##
+
+LEVEL = ../../../..
+LIBRARYNAME = LLVMARMDisassembler
+
+# Hack: we need to include 'main' arm target directory to grab private headers
+CPPFLAGS = -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/..
+
+include $(LEVEL)/Makefile.common
diff --git a/lib/Target/ARM/Disassembler/ThumbDisassemblerCore.h b/lib/Target/ARM/Disassembler/ThumbDisassemblerCore.h
index 481f25d6f486..4b2e308248c5 100644
--- a/lib/Target/ARM/Disassembler/ThumbDisassemblerCore.h
+++ b/lib/Target/ARM/Disassembler/ThumbDisassemblerCore.h
@@ -193,14 +193,18 @@ static inline unsigned getShiftAmtBits(uint32_t insn) {
 // A8.6.17 BFC
 // Encoding T1 ARMv6T2, ARMv7
 // LLVM-specific encoding for #<lsb> and #<width>
-static inline uint32_t getBitfieldInvMask(uint32_t insn) {
+static inline bool getBitfieldInvMask(uint32_t insn, uint32_t &mask) {
   uint32_t lsb = getImm3(insn) << 2 | getImm2(insn);
   uint32_t msb = getMsb(insn);
   uint32_t Val = 0;
-  assert(lsb <= msb && "Encoding error: lsb > msb");
+  if (msb < lsb) {
+    DEBUG(errs() << "Encoding error: msb < lsb\n");
+    return false;
+  }
   for (uint32_t i = lsb; i <= msb; ++i)
     Val |= (1 << i);
-  return ~Val;
+  mask = ~Val;
+  return true;
 }
 
 // A8.4 Shifts applied to a register
@@ -342,7 +346,7 @@ static inline unsigned decodeRotate(uint32_t insn) {
 // Special case:
 // tMOVSr:                  tRd tRn
 static bool DisassembleThumb1General(MCInst &MI, unsigned Opcode, uint32_t insn,
-    unsigned short NumOps, unsigned &NumOpsAdded, BO Builder) {
+    unsigned short NumOps, unsigned &NumOpsAdded, BO B) {
 
   const TargetOperandInfo *OpInfo = ARMInsts[Opcode].OpInfo;
   unsigned &OpIdx = NumOpsAdded;
@@ -360,14 +364,14 @@ static bool DisassembleThumb1General(MCInst &MI, unsigned Opcode, uint32_t insn,
 
   // Add the destination operand.
   MI.addOperand(MCOperand::CreateReg(
-                  getRegisterEnum(ARM::tGPRRegClassID,
+                  getRegisterEnum(B, ARM::tGPRRegClassID,
                                   UseRt ? getT1tRt(insn) : getT1tRd(insn))));
   ++OpIdx;
 
   // Check whether the next operand to be added is a CCR Register.
   if (OpInfo[OpIdx].RegClass == ARM::CCRRegClassID) {
     assert(OpInfo[OpIdx].isOptionalDef() && "Optional def operand expected");
-    MI.addOperand(MCOperand::CreateReg(Builder->InITBlock() ? 0 : ARM::CPSR));
+    MI.addOperand(MCOperand::CreateReg(B->InITBlock() ? 0 : ARM::CPSR));
     ++OpIdx;
   }
 
@@ -376,7 +380,7 @@ static bool DisassembleThumb1General(MCInst &MI, unsigned Opcode, uint32_t insn,
   if (OpInfo[OpIdx].RegClass == ARM::tGPRRegClassID) {
     // For UseRt, the reg operand is tied to the first reg operand.
     MI.addOperand(MCOperand::CreateReg(
-                    getRegisterEnum(ARM::tGPRRegClassID,
+                    getRegisterEnum(B, ARM::tGPRRegClassID,
                                     UseRt ? getT1tRt(insn) : getT1tRn(insn))));
     ++OpIdx;
   }
@@ -388,7 +392,7 @@ static bool DisassembleThumb1General(MCInst &MI, unsigned Opcode, uint32_t insn,
   // The next available operand is either a reg operand or an imm operand.
   if (OpInfo[OpIdx].RegClass == ARM::tGPRRegClassID) {
     // Three register operand instructions.
-    MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::tGPRRegClassID,
+    MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::tGPRRegClassID,
                                                        getT1tRm(insn))));
   } else {
     assert(OpInfo[OpIdx].RegClass == 0 &&
@@ -409,7 +413,7 @@ static bool DisassembleThumb1General(MCInst &MI, unsigned Opcode, uint32_t insn,
 // tMVN, tRSB:        tRd CPSR tRn
 // Others:            tRd CPSR tRd(TIED_TO) tRn
 static bool DisassembleThumb1DP(MCInst &MI, unsigned Opcode, uint32_t insn,
-    unsigned short NumOps, unsigned &NumOpsAdded, BO Builder) {
+    unsigned short NumOps, unsigned &NumOpsAdded, BO B) {
 
   const TargetInstrDesc &TID = ARMInsts[Opcode];
   const TargetOperandInfo *OpInfo = TID.OpInfo;
@@ -423,14 +427,14 @@ static bool DisassembleThumb1DP(MCInst &MI, unsigned Opcode, uint32_t insn,
          && "Invalid arguments");
 
   // Add the destination operand.
-  MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::tGPRRegClassID,
+  MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::tGPRRegClassID,
                                                      getT1tRd(insn))));
   ++OpIdx;
 
   // Check whether the next operand to be added is a CCR Register.
   if (OpInfo[OpIdx].RegClass == ARM::CCRRegClassID) {
     assert(OpInfo[OpIdx].isOptionalDef() && "Optional def operand expected");
-    MI.addOperand(MCOperand::CreateReg(Builder->InITBlock() ? 0 : ARM::CPSR));
+    MI.addOperand(MCOperand::CreateReg(B->InITBlock() ? 0 : ARM::CPSR));
     ++OpIdx;
   }
 
@@ -449,7 +453,7 @@ static bool DisassembleThumb1DP(MCInst &MI, unsigned Opcode, uint32_t insn,
   // Process possible next reg operand.
   if (OpIdx < NumOps && OpInfo[OpIdx].RegClass == ARM::tGPRRegClassID) {
     // Add tRn operand.
-    MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::tGPRRegClassID,
+    MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::tGPRRegClassID,
                                                        getT1tRn(insn))));
     ++OpIdx;
   }
@@ -466,7 +470,7 @@ static bool DisassembleThumb1DP(MCInst &MI, unsigned Opcode, uint32_t insn,
 // tBX_RET_vararg: Rm
 // tBLXr_r9: Rm
 static bool DisassembleThumb1Special(MCInst &MI, unsigned Opcode, uint32_t insn,
-    unsigned short NumOps, unsigned &NumOpsAdded) {
+    unsigned short NumOps, unsigned &NumOpsAdded, BO B) {
 
   // tBX_RET has 0 operand.
   if (NumOps == 0)
@@ -474,7 +478,7 @@ static bool DisassembleThumb1Special(MCInst &MI, unsigned Opcode, uint32_t insn,
 
   // BX/BLX has 1 reg operand: Rm.
   if (NumOps == 1) {
-    MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::GPRRegClassID,
+    MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
                                                        getT1Rm(insn))));
     NumOpsAdded = 1;
     return true;
@@ -489,7 +493,7 @@ static bool DisassembleThumb1Special(MCInst &MI, unsigned Opcode, uint32_t insn,
   // Add the destination operand.
   unsigned RegClass = OpInfo[OpIdx].RegClass;
   MI.addOperand(MCOperand::CreateReg(
-                  getRegisterEnum(RegClass,
+                  getRegisterEnum(B, RegClass,
                                   IsGPR(RegClass) ? getT1Rd(insn)
                                                   : getT1tRd(insn))));
   ++OpIdx;
@@ -509,7 +513,7 @@ static bool DisassembleThumb1Special(MCInst &MI, unsigned Opcode, uint32_t insn,
   assert(OpIdx < NumOps && "More operands expected");
   RegClass = OpInfo[OpIdx].RegClass;
   MI.addOperand(MCOperand::CreateReg(
-                  getRegisterEnum(RegClass,
+                  getRegisterEnum(B, RegClass,
                                   IsGPR(RegClass) ? getT1Rm(insn)
                                                   : getT1tRn(insn))));
   ++OpIdx;
@@ -521,9 +525,10 @@ static bool DisassembleThumb1Special(MCInst &MI, unsigned Opcode, uint32_t insn,
 //
 // tLDRpci: tRt imm8*4
 static bool DisassembleThumb1LdPC(MCInst &MI, unsigned Opcode, uint32_t insn,
-    unsigned short NumOps, unsigned &NumOpsAdded) {
+    unsigned short NumOps, unsigned &NumOpsAdded, BO B) {
 
   const TargetOperandInfo *OpInfo = ARMInsts[Opcode].OpInfo;
+  if (!OpInfo) return false;
 
   assert(NumOps >= 2 && OpInfo[0].RegClass == ARM::tGPRRegClassID &&
          (OpInfo[1].RegClass == 0 &&
@@ -532,7 +537,7 @@ static bool DisassembleThumb1LdPC(MCInst &MI, unsigned Opcode, uint32_t insn,
          && "Invalid arguments");
 
   // Add the destination operand.
-  MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::tGPRRegClassID,
+  MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::tGPRRegClassID,
                                                      getT1tRt(insn))));
 
   // And the (imm8 << 2) operand.
@@ -564,7 +569,7 @@ static bool DisassembleThumb1LdPC(MCInst &MI, unsigned Opcode, uint32_t insn,
 // Load/Store Register (reg|imm):      tRd tRn imm5 tRm
 // Load Register Signed Byte|Halfword: tRd tRn tRm
 static bool DisassembleThumb1LdSt(unsigned opA, MCInst &MI, unsigned Opcode,
-    uint32_t insn, unsigned short NumOps, unsigned &NumOpsAdded) {
+    uint32_t insn, unsigned short NumOps, unsigned &NumOpsAdded, BO B) {
 
   const TargetInstrDesc &TID = ARMInsts[Opcode];
   const TargetOperandInfo *OpInfo = TID.OpInfo;
@@ -581,9 +586,9 @@ static bool DisassembleThumb1LdSt(unsigned opA, MCInst &MI, unsigned Opcode,
          && "Expect >= 2 operands and first two as thumb reg operands");
 
   // Add the destination reg and the base reg.
-  MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::tGPRRegClassID,
+  MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::tGPRRegClassID,
                                                      getT1tRd(insn))));
-  MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::tGPRRegClassID,
+  MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::tGPRRegClassID,
                                                      getT1tRn(insn))));
   OpIdx = 2;
 
@@ -603,9 +608,10 @@ static bool DisassembleThumb1LdSt(unsigned opA, MCInst &MI, unsigned Opcode,
   // The next reg operand is tRm, the offset.
   assert(OpIdx < NumOps && OpInfo[OpIdx].RegClass == ARM::tGPRRegClassID
          && "Thumb reg operand expected");
-  MI.addOperand(MCOperand::CreateReg(Imm5 ? 0
-                                          : getRegisterEnum(ARM::tGPRRegClassID,
-                                                            getT1tRm(insn))));
+  MI.addOperand(MCOperand::CreateReg(
+                  Imm5 ? 0
+                       : getRegisterEnum(B, ARM::tGPRRegClassID,
+                                         getT1tRm(insn))));
   ++OpIdx;
 
   return true;
@@ -615,12 +621,13 @@ static bool DisassembleThumb1LdSt(unsigned opA, MCInst &MI, unsigned Opcode,
 //
 // Load/Store Register SP relative: tRt ARM::SP imm8
 static bool DisassembleThumb1LdStSP(MCInst &MI, unsigned Opcode, uint32_t insn,
-    unsigned short NumOps, unsigned &NumOpsAdded) {
+    unsigned short NumOps, unsigned &NumOpsAdded, BO B) {
 
   assert((Opcode == ARM::tLDRspi || Opcode == ARM::tSTRspi)
-         && "Invalid opcode");
+         && "Unexpected opcode");
 
   const TargetOperandInfo *OpInfo = ARMInsts[Opcode].OpInfo;
+  if (!OpInfo) return false;
 
   assert(NumOps >= 3 &&
          OpInfo[0].RegClass == ARM::tGPRRegClassID &&
@@ -630,7 +637,7 @@ static bool DisassembleThumb1LdStSP(MCInst &MI, unsigned Opcode, uint32_t insn,
           !OpInfo[2].isOptionalDef())
          && "Invalid arguments");
 
-  MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::tGPRRegClassID,
+  MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::tGPRRegClassID,
                                                      getT1tRt(insn))));
   MI.addOperand(MCOperand::CreateReg(ARM::SP));
   MI.addOperand(MCOperand::CreateImm(getT1Imm8(insn)));
@@ -643,11 +650,12 @@ static bool DisassembleThumb1LdStSP(MCInst &MI, unsigned Opcode, uint32_t insn,
 //
 // tADDrPCi: tRt imm8
 static bool DisassembleThumb1AddPCi(MCInst &MI, unsigned Opcode, uint32_t insn,
-    unsigned short NumOps, unsigned &NumOpsAdded) {
+    unsigned short NumOps, unsigned &NumOpsAdded, BO B) {
 
-  assert(Opcode == ARM::tADDrPCi && "Invalid opcode");
+  assert(Opcode == ARM::tADDrPCi && "Unexpected opcode");
 
   const TargetOperandInfo *OpInfo = ARMInsts[Opcode].OpInfo;
+  if (!OpInfo) return false;
 
   assert(NumOps >= 2 && OpInfo[0].RegClass == ARM::tGPRRegClassID &&
          (OpInfo[1].RegClass == 0 &&
@@ -655,7 +663,7 @@ static bool DisassembleThumb1AddPCi(MCInst &MI, unsigned Opcode, uint32_t insn,
           !OpInfo[1].isOptionalDef())
          && "Invalid arguments");
 
-  MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::tGPRRegClassID,
+  MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::tGPRRegClassID,
                                                      getT1tRt(insn))));
   MI.addOperand(MCOperand::CreateImm(getT1Imm8(insn)));
   NumOpsAdded = 2;
@@ -667,11 +675,12 @@ static bool DisassembleThumb1AddPCi(MCInst &MI, unsigned Opcode, uint32_t insn,
 //
 // tADDrSPi: tRt ARM::SP imm8
 static bool DisassembleThumb1AddSPi(MCInst &MI, unsigned Opcode, uint32_t insn,
-    unsigned short NumOps, unsigned &NumOpsAdded) {
+    unsigned short NumOps, unsigned &NumOpsAdded, BO B) {
 
-  assert(Opcode == ARM::tADDrSPi && "Invalid opcode");
+  assert(Opcode == ARM::tADDrSPi && "Unexpected opcode");
 
   const TargetOperandInfo *OpInfo = ARMInsts[Opcode].OpInfo;
+  if (!OpInfo) return false;
 
   assert(NumOps >= 3 &&
          OpInfo[0].RegClass == ARM::tGPRRegClassID &&
@@ -681,7 +690,7 @@ static bool DisassembleThumb1AddSPi(MCInst &MI, unsigned Opcode, uint32_t insn,
           !OpInfo[2].isOptionalDef())
          && "Invalid arguments");
 
-  MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::tGPRRegClassID,
+  MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::tGPRRegClassID,
                                                      getT1tRt(insn))));
   MI.addOperand(MCOperand::CreateReg(ARM::SP));
   MI.addOperand(MCOperand::CreateImm(getT1Imm8(insn)));
@@ -697,23 +706,27 @@ static bool DisassembleThumb1AddSPi(MCInst &MI, unsigned Opcode, uint32_t insn,
 // "low registers" is specified by Inst{7-0}
 // lr|pc is specified by Inst{8}
 static bool DisassembleThumb1PushPop(MCInst &MI, unsigned Opcode, uint32_t insn,
-    unsigned short NumOps, unsigned &NumOpsAdded) {
+    unsigned short NumOps, unsigned &NumOpsAdded, BO B) {
 
-  assert((Opcode == ARM::tPUSH || Opcode == ARM::tPOP) && "Invalid opcode");
+  assert((Opcode == ARM::tPUSH || Opcode == ARM::tPOP) && "Unexpected opcode");
 
   unsigned &OpIdx = NumOpsAdded;
 
   // Handling the two predicate operands before the reglist.
-  MI.addOperand(MCOperand::CreateImm(ARMCC::AL));
-  MI.addOperand(MCOperand::CreateReg(ARM::CPSR));
-  OpIdx = 2;
+  if (B->DoPredicateOperands(MI, Opcode, insn, NumOps))
+    OpIdx += 2;
+  else {
+    DEBUG(errs() << "Expected predicate operands not found.\n");
+    return false;
+  }
 
-  // Fill the variadic part of reglist.
   unsigned RegListBits = slice(insn, 8, 8) << (Opcode == ARM::tPUSH ? 14 : 15)
     | slice(insn, 7, 0);
+
+  // Fill the variadic part of reglist.
   for (unsigned i = 0; i < 16; ++i) {
     if ((RegListBits >> i) & 1) {
-      MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::GPRRegClassID,
+      MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
                                                          i)));
       ++OpIdx;
     }
@@ -735,13 +748,13 @@ static bool DisassembleThumb1PushPop(MCInst &MI, unsigned Opcode, uint32_t insn,
 //   no operand
 // Others:           tRd tRn
 static bool DisassembleThumb1Misc(MCInst &MI, unsigned Opcode, uint32_t insn,
-    unsigned short NumOps, unsigned &NumOpsAdded) {
+    unsigned short NumOps, unsigned &NumOpsAdded, BO B) {
 
   if (NumOps == 0)
     return true;
 
   if (Opcode == ARM::tPUSH || Opcode == ARM::tPOP)
-    return DisassembleThumb1PushPop(MI, Opcode, insn, NumOps, NumOpsAdded);
+    return DisassembleThumb1PushPop(MI, Opcode, insn, NumOps, NumOpsAdded, B);
 
   const TargetOperandInfo *OpInfo = ARMInsts[Opcode].OpInfo;
 
@@ -799,16 +812,16 @@ static bool DisassembleThumb1Misc(MCInst &MI, unsigned Opcode, uint32_t insn,
          && "Expect >=2 operands");
 
   // Add the destination operand.
-  MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::tGPRRegClassID,
+  MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::tGPRRegClassID,
                                                      getT1tRd(insn))));
 
   if (OpInfo[1].RegClass == ARM::tGPRRegClassID) {
     // Two register instructions.
-    MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::tGPRRegClassID,
+    MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::tGPRRegClassID,
                                                        getT1tRn(insn))));
   } else {
     // CBNZ, CBZ
-    assert((Opcode == ARM::tCBNZ || Opcode == ARM::tCBZ) && "Invalid opcode");
+    assert((Opcode == ARM::tCBNZ || Opcode == ARM::tCBZ) &&"Unexpected opcode");
     MI.addOperand(MCOperand::CreateImm(getT1Imm6(insn) * 2));
   }
 
@@ -823,42 +836,47 @@ static bool DisassembleThumb1Misc(MCInst &MI, unsigned Opcode, uint32_t insn,
 // tLDM_UPD/tSTM_UPD: tRt tRt AM4ModeImm Pred-Imm Pred-CCR register_list
 // tLDM:              tRt AM4ModeImm Pred-Imm Pred-CCR register_list
 static bool DisassembleThumb1LdStMul(bool Ld, MCInst &MI, unsigned Opcode,
-    uint32_t insn, unsigned short NumOps, unsigned &NumOpsAdded) {
+    uint32_t insn, unsigned short NumOps, unsigned &NumOpsAdded, BO B) {
 
   assert((Opcode == ARM::tLDM || Opcode == ARM::tLDM_UPD ||
-          Opcode == ARM::tSTM_UPD) && "Invalid opcode");
+          Opcode == ARM::tSTM_UPD) && "Unexpected opcode");
 
   unsigned &OpIdx = NumOpsAdded;
 
   unsigned tRt = getT1tRt(insn);
-  unsigned RegListBits = slice(insn, 7, 0);
 
   OpIdx = 0;
 
   // WB register, if necessary.
   if (Opcode == ARM::tLDM_UPD || Opcode == ARM::tSTM_UPD) {
-    MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::GPRRegClassID,
+    MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
                                                        tRt)));
     ++OpIdx;
   }
 
-  MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::GPRRegClassID,
+  MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
                                                      tRt)));
   ++OpIdx;
 
   // A8.6.53 LDM / LDMIA / LDMFD - Encoding T1
+  // A8.6.53 STM / STMIA / STMEA - Encoding T1
   MI.addOperand(MCOperand::CreateImm(ARM_AM::getAM4ModeImm(ARM_AM::ia)));
   ++OpIdx;
 
   // Handling the two predicate operands before the reglist.
-  MI.addOperand(MCOperand::CreateImm(ARMCC::AL));
-  MI.addOperand(MCOperand::CreateReg(ARM::CPSR));
-  OpIdx += 2;
+  if (B->DoPredicateOperands(MI, Opcode, insn, NumOps))
+    OpIdx += 2;
+  else {
+    DEBUG(errs() << "Expected predicate operands not found.\n");
+    return false;
+  }
+
+  unsigned RegListBits = slice(insn, 7, 0);
 
   // Fill the variadic part of reglist.
   for (unsigned i = 0; i < 8; ++i) {
     if ((RegListBits >> i) & 1) {
-      MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::tGPRRegClassID,
+      MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::tGPRRegClassID,
                                                          i)));
       ++OpIdx;
     }
@@ -868,13 +886,15 @@ static bool DisassembleThumb1LdStMul(bool Ld, MCInst &MI, unsigned Opcode,
 }
 
 static bool DisassembleThumb1LdMul(MCInst &MI, unsigned Opcode, uint32_t insn,
-    unsigned short NumOps, unsigned &NumOpsAdded) {
-  return DisassembleThumb1LdStMul(true, MI, Opcode, insn, NumOps, NumOpsAdded);
+    unsigned short NumOps, unsigned &NumOpsAdded, BO B) {
+  return DisassembleThumb1LdStMul(true, MI, Opcode, insn, NumOps, NumOpsAdded,
+                                  B);
 }
 
 static bool DisassembleThumb1StMul(MCInst &MI, unsigned Opcode, uint32_t insn,
-    unsigned short NumOps, unsigned &NumOpsAdded) {
-  return DisassembleThumb1LdStMul(false, MI, Opcode, insn, NumOps, NumOpsAdded);
+    unsigned short NumOps, unsigned &NumOpsAdded, BO B) {
+  return DisassembleThumb1LdStMul(false, MI, Opcode, insn, NumOps, NumOpsAdded,
+                                  B);
 }
 
 // A8.6.16 B Encoding T1
@@ -885,12 +905,14 @@ static bool DisassembleThumb1StMul(MCInst &MI, unsigned Opcode, uint32_t insn,
 // tSVC: imm8 Pred-Imm Pred-CCR
 // tTRAP: 0 operand (early return)
 static bool DisassembleThumb1CondBr(MCInst &MI, unsigned Opcode, uint32_t insn,
-    unsigned short NumOps, unsigned &NumOpsAdded) {
+    unsigned short NumOps, unsigned &NumOpsAdded, BO) {
 
   if (Opcode == ARM::tTRAP)
     return true;
 
   const TargetOperandInfo *OpInfo = ARMInsts[Opcode].OpInfo;
+  if (!OpInfo) return false;
+
   assert(NumOps == 3 && OpInfo[0].RegClass == 0 &&
          OpInfo[1].isPredicate() && OpInfo[2].RegClass == ARM::CCRRegClassID
          && "Exactly 3 operands expected");
@@ -912,9 +934,11 @@ static bool DisassembleThumb1CondBr(MCInst &MI, unsigned Opcode, uint32_t insn,
 //
 // tB: offset
 static bool DisassembleThumb1Br(MCInst &MI, unsigned Opcode, uint32_t insn,
-    unsigned short NumOps, unsigned &NumOpsAdded) {
+    unsigned short NumOps, unsigned &NumOpsAdded, BO) {
 
   const TargetOperandInfo *OpInfo = ARMInsts[Opcode].OpInfo;
+  if (!OpInfo) return false;
+
   assert(NumOps == 1 && OpInfo[0].RegClass == 0 && "1 imm operand expected");
 
   unsigned Imm11 = getT1Imm11(insn);
@@ -952,9 +976,8 @@ static bool DisassembleThumb1Br(MCInst &MI, unsigned Opcode, uint32_t insn,
 // 1101xx	Conditional branch, and Supervisor Call on page A6-13
 // 11100x	Unconditional Branch, see B on page A8-44
 //
-static bool DisassembleThumb1(uint16_t op,
-    MCInst &MI, unsigned Opcode, uint32_t insn,
-    unsigned short NumOps, unsigned &NumOpsAdded, BO Builder) {
+static bool DisassembleThumb1(uint16_t op, MCInst &MI, unsigned Opcode,
+    uint32_t insn, unsigned short NumOps, unsigned &NumOpsAdded, BO B) {
 
   unsigned op1 = slice(op, 5, 4);
   unsigned op2 = slice(op, 3, 2);
@@ -963,27 +986,27 @@ static bool DisassembleThumb1(uint16_t op,
   switch (op1) {
   case 0:
     // A6.2.1 Shift (immediate), add, subtract, move, and compare
-    return DisassembleThumb1General(MI, Opcode, insn, NumOps, NumOpsAdded,
-                                    Builder);
+    return DisassembleThumb1General(MI, Opcode, insn, NumOps, NumOpsAdded, B);
   case 1:
     switch (op2) {
     case 0:
       switch (op3) {
       case 0:
         // A6.2.2 Data-processing
-        return DisassembleThumb1DP(MI, Opcode, insn, NumOps, NumOpsAdded,
-                                   Builder);
+        return DisassembleThumb1DP(MI, Opcode, insn, NumOps, NumOpsAdded, B);
       case 1:
         // A6.2.3 Special data instructions and branch and exchange
-        return DisassembleThumb1Special(MI, Opcode, insn, NumOps, NumOpsAdded);
+        return DisassembleThumb1Special(MI, Opcode, insn, NumOps, NumOpsAdded,
+                                        B);
       default:
         // A8.6.59 LDR (literal)
-        return DisassembleThumb1LdPC(MI, Opcode, insn, NumOps, NumOpsAdded);
+        return DisassembleThumb1LdPC(MI, Opcode, insn, NumOps, NumOpsAdded, B);
       }
       break;
     default:
       // A6.2.4 Load/store single data item
-      return DisassembleThumb1LdSt(opA, MI, Opcode, insn, NumOps, NumOpsAdded);
+      return DisassembleThumb1LdSt(opA, MI, Opcode, insn, NumOps, NumOpsAdded,
+                                   B);
       break;
     }
     break;
@@ -991,21 +1014,24 @@ static bool DisassembleThumb1(uint16_t op,
     switch (op2) {
     case 0:
       // A6.2.4 Load/store single data item
-      return DisassembleThumb1LdSt(opA, MI, Opcode, insn, NumOps, NumOpsAdded);
+      return DisassembleThumb1LdSt(opA, MI, Opcode, insn, NumOps, NumOpsAdded,
+                                   B);
     case 1:
       // A6.2.4 Load/store single data item
-      return DisassembleThumb1LdStSP(MI, Opcode, insn, NumOps, NumOpsAdded);
+      return DisassembleThumb1LdStSP(MI, Opcode, insn, NumOps, NumOpsAdded, B);
     case 2:
       if (op3 <= 1) {
         // A8.6.10 ADR
-        return DisassembleThumb1AddPCi(MI, Opcode, insn, NumOps, NumOpsAdded);
+        return DisassembleThumb1AddPCi(MI, Opcode, insn, NumOps, NumOpsAdded,
+                                       B);
       } else {
         // A8.6.8 ADD (SP plus immediate)
-        return DisassembleThumb1AddSPi(MI, Opcode, insn, NumOps, NumOpsAdded);
+        return DisassembleThumb1AddSPi(MI, Opcode, insn, NumOps, NumOpsAdded,
+                                       B);
       }
     default:
       // A6.2.5 Miscellaneous 16-bit instructions
-      return DisassembleThumb1Misc(MI, Opcode, insn, NumOps, NumOpsAdded);
+      return DisassembleThumb1Misc(MI, Opcode, insn, NumOps, NumOpsAdded, B);
     }
     break;
   case 3:
@@ -1013,17 +1039,17 @@ static bool DisassembleThumb1(uint16_t op,
     case 0:
       if (op3 <= 1) {
         // A8.6.189 STM / STMIA / STMEA
-        return DisassembleThumb1StMul(MI, Opcode, insn, NumOps, NumOpsAdded);
+        return DisassembleThumb1StMul(MI, Opcode, insn, NumOps, NumOpsAdded, B);
       } else {
         // A8.6.53 LDM / LDMIA / LDMFD
-        return DisassembleThumb1LdMul(MI, Opcode, insn, NumOps, NumOpsAdded);
+        return DisassembleThumb1LdMul(MI, Opcode, insn, NumOps, NumOpsAdded, B);
       }
     case 1:
       // A6.2.6 Conditional branch, and Supervisor Call
-      return DisassembleThumb1CondBr(MI, Opcode, insn, NumOps, NumOpsAdded);
+      return DisassembleThumb1CondBr(MI, Opcode, insn, NumOps, NumOpsAdded, B);
     case 2:
       // Unconditional Branch, see B on page A8-44
-      return DisassembleThumb1Br(MI, Opcode, insn, NumOps, NumOpsAdded);
+      return DisassembleThumb1Br(MI, Opcode, insn, NumOps, NumOpsAdded, B);
     default:
       assert(0 && "Unreachable code");
       break;
@@ -1079,32 +1105,32 @@ static bool DisassembleThumb2SRS(MCInst &MI, unsigned Opcode, uint32_t insn,
 
 // t2RFE[IA|DB]W/t2RFE[IA|DB]: Rn
 static bool DisassembleThumb2RFE(MCInst &MI, unsigned Opcode, uint32_t insn,
-    unsigned short NumOps, unsigned &NumOpsAdded) {
-  MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::GPRRegClassID,
+    unsigned short NumOps, unsigned &NumOpsAdded, BO B) {
+  MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
                                                      decodeRn(insn))));
   NumOpsAdded = 1;
   return true;
 }
 
 static bool DisassembleThumb2LdStMul(MCInst &MI, unsigned Opcode, uint32_t insn,
-    unsigned short NumOps, unsigned &NumOpsAdded) {
+    unsigned short NumOps, unsigned &NumOpsAdded, BO B) {
 
   if (Thumb2SRSOpcode(Opcode))
     return DisassembleThumb2SRS(MI, Opcode, insn, NumOps, NumOpsAdded);
 
   if (Thumb2RFEOpcode(Opcode))
-    return DisassembleThumb2RFE(MI, Opcode, insn, NumOps, NumOpsAdded);
+    return DisassembleThumb2RFE(MI, Opcode, insn, NumOps, NumOpsAdded, B);
 
   assert((Opcode == ARM::t2LDM || Opcode == ARM::t2LDM_UPD ||
           Opcode == ARM::t2STM || Opcode == ARM::t2STM_UPD)
-         && "Invalid opcode");
+         && "Unexpected opcode");
   assert(NumOps >= 5 && "Thumb2 LdStMul expects NumOps >= 5");
 
   unsigned &OpIdx = NumOpsAdded;
 
   OpIdx = 0;
 
-  unsigned Base = getRegisterEnum(ARM::GPRRegClassID, decodeRn(insn));
+  unsigned Base = getRegisterEnum(B, ARM::GPRRegClassID, decodeRn(insn));
 
   // Writeback to base.
   if (Opcode == ARM::t2LDM_UPD || Opcode == ARM::t2STM_UPD) {
@@ -1120,15 +1146,19 @@ static bool DisassembleThumb2LdStMul(MCInst &MI, unsigned Opcode, uint32_t insn,
   ++OpIdx;
 
   // Handling the two predicate operands before the reglist.
-  MI.addOperand(MCOperand::CreateImm(ARMCC::AL));
-  MI.addOperand(MCOperand::CreateReg(ARM::CPSR));
-  OpIdx += 2;
+  if (B->DoPredicateOperands(MI, Opcode, insn, NumOps))
+    OpIdx += 2;
+  else {
+    DEBUG(errs() << "Expected predicate operands not found.\n");
+    return false;
+  }
 
-  // Fill the variadic part of reglist.
   unsigned RegListBits = insn & ((1 << 16) - 1);
+
+  // Fill the variadic part of reglist.
   for (unsigned i = 0; i < 16; ++i) {
     if ((RegListBits >> i) & 1) {
-      MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::GPRRegClassID,
+      MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
                                                          i)));
       ++OpIdx;
     }
@@ -1144,9 +1174,11 @@ static bool DisassembleThumb2LdStMul(MCInst &MI, unsigned Opcode, uint32_t insn,
 // t2STREXD: Rm Rd Rs Rn
 // t2STREXB, t2STREXH: Rm Rd Rn
 static bool DisassembleThumb2LdStEx(MCInst &MI, unsigned Opcode, uint32_t insn,
-    unsigned short NumOps, unsigned &NumOpsAdded) {
+    unsigned short NumOps, unsigned &NumOpsAdded, BO B) {
 
   const TargetOperandInfo *OpInfo = ARMInsts[Opcode].OpInfo;
+  if (!OpInfo) return false;
+
   unsigned &OpIdx = NumOpsAdded;
 
   OpIdx = 0;
@@ -1163,25 +1195,25 @@ static bool DisassembleThumb2LdStEx(MCInst &MI, unsigned Opcode, uint32_t insn,
   // Add the destination operand for store.
   if (isStore) {
     MI.addOperand(MCOperand::CreateReg(
-                    getRegisterEnum(ARM::GPRRegClassID,
+                    getRegisterEnum(B, ARM::GPRRegClassID,
                                     isSW ? decodeRs(insn) : decodeRm(insn))));
     ++OpIdx;
   }
 
   // Source operand for store and destination operand for load.
-  MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::GPRRegClassID,
+  MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
                                                      decodeRd(insn))));
   ++OpIdx;
 
   // Thumb2 doubleword complication: with an extra source/destination operand.
   if (isDW) {
-    MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::GPRRegClassID,
+    MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
                                                        decodeRs(insn))));
     ++OpIdx;
   }
 
   // Finally add the pointer operand.
-  MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::GPRRegClassID,
+  MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
                                                      decodeRn(insn))));
   ++OpIdx;
 
@@ -1198,9 +1230,10 @@ static bool DisassembleThumb2LdStEx(MCInst &MI, unsigned Opcode, uint32_t insn,
 // Ditto for t2LDRD_PRE, t2LDRD_POST, t2STRD_PRE, t2STRD_POST, which are for
 // disassembly only and do not have a tied_to writeback base register operand.
 static bool DisassembleThumb2LdStDual(MCInst &MI, unsigned Opcode,
-    uint32_t insn, unsigned short NumOps, unsigned &NumOpsAdded) {
+    uint32_t insn, unsigned short NumOps, unsigned &NumOpsAdded, BO B) {
 
   const TargetOperandInfo *OpInfo = ARMInsts[Opcode].OpInfo;
+  if (!OpInfo) return false;
 
   assert(NumOps >= 4
          && OpInfo[0].RegClass == ARM::GPRRegClassID
@@ -1210,11 +1243,11 @@ static bool DisassembleThumb2LdStDual(MCInst &MI, unsigned Opcode,
          && "Expect >= 4 operands and first 3 as reg operands");
 
   // Add the <Rt> <Rt2> operands.
-  MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::GPRRegClassID,
+  MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
                                                      decodeRd(insn))));
-  MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::GPRRegClassID,
+  MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
                                                      decodeRs(insn))));
-  MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::GPRRegClassID,
+  MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
                                                      decodeRn(insn))));
 
   // Finally add (+/-)imm8*4, depending on the U bit.
@@ -1235,15 +1268,15 @@ static bool DisassembleThumb2LdStDual(MCInst &MI, unsigned Opcode,
 //
 // t2TBBgen, t2TBHgen: Rn Rm Pred-Imm Pred-CCR
 static bool DisassembleThumb2TB(MCInst &MI, unsigned Opcode,
-    uint32_t insn, unsigned short NumOps, unsigned &NumOpsAdded) {
+    uint32_t insn, unsigned short NumOps, unsigned &NumOpsAdded, BO B) {
 
   assert(NumOps >= 2 && "Expect >= 2 operands");
 
   // The generic version of TBB/TBH needs a base register.
-  MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::GPRRegClassID,
+  MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
                                                      decodeRn(insn))));
   // Add the index register.
-  MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::GPRRegClassID,
+  MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
                                                      decodeRm(insn))));
   NumOpsAdded = 2;
 
@@ -1278,7 +1311,7 @@ static inline bool Thumb2ShiftOpcode(unsigned Opcode) {
 // nothing else, because the shift amount is already specified.
 // Similar case holds for t2MOVrx, t2ADDrr, ..., etc.
 static bool DisassembleThumb2DPSoReg(MCInst &MI, unsigned Opcode, uint32_t insn,
-    unsigned short NumOps, unsigned &NumOpsAdded) {
+    unsigned short NumOps, unsigned &NumOpsAdded, BO B) {
 
   const TargetInstrDesc &TID = ARMInsts[Opcode];
   const TargetOperandInfo *OpInfo = TID.OpInfo;
@@ -1293,7 +1326,7 @@ static bool DisassembleThumb2DPSoReg(MCInst &MI, unsigned Opcode, uint32_t insn,
            && OpInfo[3].RegClass == 0
            && "Exactlt 4 operands expect and first two as reg operands");
     // Only need to populate the src reg operand.
-    MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::GPRRegClassID,
+    MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
                                                        decodeRm(insn))));
     MI.addOperand(MCOperand::CreateReg(0));
     MI.addOperand(MCOperand::CreateImm(0));
@@ -1315,7 +1348,7 @@ static bool DisassembleThumb2DPSoReg(MCInst &MI, unsigned Opcode, uint32_t insn,
   // Build the register operands, followed by the constant shift specifier.
 
   MI.addOperand(MCOperand::CreateReg(
-                  getRegisterEnum(ARM::GPRRegClassID,
+                  getRegisterEnum(B, ARM::GPRRegClassID,
                                   NoDstReg ? decodeRn(insn) : decodeRs(insn))));
   ++OpIdx;
 
@@ -1324,15 +1357,18 @@ static bool DisassembleThumb2DPSoReg(MCInst &MI, unsigned Opcode, uint32_t insn,
     if ((Idx = TID.getOperandConstraint(OpIdx, TOI::TIED_TO)) != -1) {
       // Process tied_to operand constraint.
       MI.addOperand(MI.getOperand(Idx));
-    } else {
-      assert(!NoDstReg && "Internal error");
-      MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::GPRRegClassID,
+      ++OpIdx;
+    } else if (!NoDstReg) {
+      MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
                                                          decodeRn(insn))));
+      ++OpIdx;
+    } else {
+      DEBUG(errs() << "Thumb2 encoding error: d==15 for three-reg operands.\n");
+      return false;
     }
-    ++OpIdx;
   }
 
-  MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::GPRRegClassID,
+  MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
                                                      decodeRm(insn))));
   ++OpIdx;
 
@@ -1373,7 +1409,7 @@ static bool DisassembleThumb2DPSoReg(MCInst &MI, unsigned Opcode, uint32_t insn,
 //
 // ModImm = ThumbExpandImm(i:imm3:imm8)
 static bool DisassembleThumb2DPModImm(MCInst &MI, unsigned Opcode,
-    uint32_t insn, unsigned short NumOps, unsigned &NumOpsAdded) {
+    uint32_t insn, unsigned short NumOps, unsigned &NumOpsAdded, BO B) {
 
   const TargetOperandInfo *OpInfo = ARMInsts[Opcode].OpInfo;
   unsigned &OpIdx = NumOpsAdded;
@@ -1389,13 +1425,16 @@ static bool DisassembleThumb2DPModImm(MCInst &MI, unsigned Opcode,
   // Build the register operands, followed by the modified immediate.
 
   MI.addOperand(MCOperand::CreateReg(
-                  getRegisterEnum(ARM::GPRRegClassID,
+                  getRegisterEnum(B, ARM::GPRRegClassID,
                                   NoDstReg ? decodeRn(insn) : decodeRs(insn))));
   ++OpIdx;
 
   if (TwoReg) {
-    assert(!NoDstReg && "Internal error");
-    MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::GPRRegClassID,
+    if (NoDstReg) {
+      DEBUG(errs()<<"Thumb2 encoding error: d==15 for DPModImm 2-reg instr.\n");
+      return false;
+    }
+    MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
                                                        decodeRn(insn))));
     ++OpIdx;
   }
@@ -1437,7 +1476,7 @@ static inline unsigned decodeThumb2SaturatePos(unsigned Opcode, uint32_t insn) {
   case ARM::t2USAT16:
     return slice(insn, 3, 0);
   default:
-    assert(0 && "Invalid opcode passed in");
+    assert(0 && "Unexpected opcode");
     return 0;
   }
 }
@@ -1459,7 +1498,7 @@ static inline unsigned decodeThumb2SaturatePos(unsigned Opcode, uint32_t insn) {
 // o t2SSAT[lsl|asr], t2USAT[lsl|asr]: Rs sat_pos Rn shamt
 // o t2SSAT16, t2USAT16: Rs sat_pos Rn
 static bool DisassembleThumb2DPBinImm(MCInst &MI, unsigned Opcode,
-    uint32_t insn, unsigned short NumOps, unsigned &NumOpsAdded) {
+    uint32_t insn, unsigned short NumOps, unsigned &NumOpsAdded, BO B) {
 
   const TargetInstrDesc &TID = ARMInsts[Opcode];
   const TargetOperandInfo *OpInfo = TID.OpInfo;
@@ -1474,7 +1513,7 @@ static bool DisassembleThumb2DPBinImm(MCInst &MI, unsigned Opcode,
 
   // Build the register operand(s), followed by the immediate(s).
 
-  MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::GPRRegClassID,
+  MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
                                                      decodeRs(insn))));
   ++OpIdx;
 
@@ -1482,7 +1521,7 @@ static bool DisassembleThumb2DPBinImm(MCInst &MI, unsigned Opcode,
   if (Thumb2SaturateOpcode(Opcode)) {
     MI.addOperand(MCOperand::CreateImm(decodeThumb2SaturatePos(Opcode, insn)));
 
-    MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::GPRRegClassID,
+    MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
                                                        decodeRn(insn))));
 
     if (Opcode == ARM::t2SSAT16 || Opcode == ARM::t2USAT16) {
@@ -1510,7 +1549,7 @@ static bool DisassembleThumb2DPBinImm(MCInst &MI, unsigned Opcode,
       MI.addOperand(MI.getOperand(Idx));
     } else {
       // Add src reg operand.
-      MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::GPRRegClassID,
+      MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
                                                          decodeRn(insn))));
     }
     ++OpIdx;
@@ -1528,15 +1567,22 @@ static bool DisassembleThumb2DPBinImm(MCInst &MI, unsigned Opcode,
     MI.addOperand(MCOperand::CreateImm(getIImm3Imm8(insn)));
   else if (Opcode == ARM::t2MOVi16 || Opcode == ARM::t2MOVTi16)
     MI.addOperand(MCOperand::CreateImm(getImm16(insn)));
-  else if (Opcode == ARM::t2BFC)
-    MI.addOperand(MCOperand::CreateImm(getBitfieldInvMask(insn)));
-  else {
+  else if (Opcode == ARM::t2BFC) {
+    uint32_t mask = 0;
+    if (getBitfieldInvMask(insn, mask))
+      MI.addOperand(MCOperand::CreateImm(mask));
+    else
+      return false;
+  } else {
     // Handle the case of: lsb width
     assert((Opcode == ARM::t2SBFX || Opcode == ARM::t2UBFX ||
-            Opcode == ARM::t2BFI) && "Invalid opcode");
+            Opcode == ARM::t2BFI) && "Unexpected opcode");
     MI.addOperand(MCOperand::CreateImm(getLsb(insn)));
     if (Opcode == ARM::t2BFI) {
-      assert(getMsb(insn) >= getLsb(insn) && "Encoding error");
+      if (getMsb(insn) < getLsb(insn)) {
+        DEBUG(errs() << "Encoding error: msb < lsb\n");
+        return false;
+      }
       MI.addOperand(MCOperand::CreateImm(getMsb(insn) - getLsb(insn) + 1));
     } else
       MI.addOperand(MCOperand::CreateImm(getWidthMinus1(insn) + 1));
@@ -1585,7 +1631,7 @@ static inline bool t2MiscCtrlInstr(uint32_t insn) {
 // t2MSR/t2MSRsys -> Rn mask=Inst{11-8}
 // t2SMC -> imm4 = Inst{19-16}
 static bool DisassembleThumb2BrMiscCtrl(MCInst &MI, unsigned Opcode,
-    uint32_t insn, unsigned short NumOps, unsigned &NumOpsAdded) {
+    uint32_t insn, unsigned short NumOps, unsigned &NumOpsAdded, BO B) {
 
   if (NumOps == 0)
     return true;
@@ -1627,21 +1673,21 @@ static bool DisassembleThumb2BrMiscCtrl(MCInst &MI, unsigned Opcode,
 
   // MRS and MRSsys take one GPR reg Rs.
   if (Opcode == ARM::t2MRS || Opcode == ARM::t2MRSsys) {
-    MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::GPRRegClassID,
+    MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
                                                        decodeRs(insn))));
     NumOpsAdded = 1;
     return true;
   }
   // BXJ takes one GPR reg Rn.
   if (Opcode == ARM::t2BXJ) {
-    MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::GPRRegClassID,
+    MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
                                                        decodeRn(insn))));
     NumOpsAdded = 1;
     return true;
   }
   // MSR and MSRsys take one GPR reg Rn, followed by the mask.
   if (Opcode == ARM::t2MSR || Opcode == ARM::t2MSRsys || Opcode == ARM::t2BXJ) {
-    MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::GPRRegClassID,
+    MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
                                                        decodeRn(insn))));
     MI.addOperand(MCOperand::CreateImm(slice(insn, 11, 8)));
     NumOpsAdded = 2;
@@ -1659,7 +1705,7 @@ static bool DisassembleThumb2BrMiscCtrl(MCInst &MI, unsigned Opcode,
 
   switch (Opcode) {
   default:
-    assert(0 && "Unreachable code");
+    assert(0 && "Unexpected opcode");
     return false;
   case ARM::t2B:
     Offset = decodeImm32_B_EncodingT4(insn);
@@ -1700,7 +1746,7 @@ static inline bool Thumb2PreloadOpcode(unsigned Opcode) {
 }
 
 static bool DisassembleThumb2PreLoad(MCInst &MI, unsigned Opcode, uint32_t insn,
-    unsigned short NumOps, unsigned &NumOpsAdded) {
+    unsigned short NumOps, unsigned &NumOpsAdded, BO B) {
 
   // Preload Data/Instruction requires either 2 or 3 operands.
   // t2PLDi12, t2PLDi8, t2PLDpci: Rn [+/-]imm12/imm8
@@ -1718,12 +1764,12 @@ static bool DisassembleThumb2PreLoad(MCInst &MI, unsigned Opcode, uint32_t insn,
          OpInfo[0].RegClass == ARM::GPRRegClassID &&
          "Expect >= 2 operands and first one as reg operand");
 
-  MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::GPRRegClassID,
+  MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
                                                      decodeRn(insn))));
   ++OpIdx;
 
   if (OpInfo[OpIdx].RegClass == ARM::GPRRegClassID) {
-    MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::GPRRegClassID,
+    MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
                                                        decodeRm(insn))));
   } else {
     assert(OpInfo[OpIdx].RegClass == 0 && !OpInfo[OpIdx].isPredicate()
@@ -1765,9 +1811,10 @@ static bool DisassembleThumb2PreLoad(MCInst &MI, unsigned Opcode, uint32_t insn,
 // These instrs calculate an address from the PC value and an immediate offset.
 // Rd Rn=PC (+/-)imm12 (+ if Inst{23} == 0b1)
 static bool DisassembleThumb2Ldpci(MCInst &MI, unsigned Opcode,
-    uint32_t insn, unsigned short NumOps, unsigned &NumOpsAdded) {
+    uint32_t insn, unsigned short NumOps, unsigned &NumOpsAdded, BO B) {
 
   const TargetOperandInfo *OpInfo = ARMInsts[Opcode].OpInfo;
+  if (!OpInfo) return false;
 
   assert(NumOps >= 2 &&
          OpInfo[0].RegClass == ARM::GPRRegClassID &&
@@ -1776,7 +1823,7 @@ static bool DisassembleThumb2Ldpci(MCInst &MI, unsigned Opcode,
 
   // Build the register operand, followed by the (+/-)imm12 immediate.
 
-  MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::GPRRegClassID,
+  MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
                                                      decodeRd(insn))));
 
   MI.addOperand(MCOperand::CreateImm(decodeImm12(insn)));
@@ -1812,16 +1859,16 @@ static bool DisassembleThumb2Ldpci(MCInst &MI, unsigned Opcode,
 // Delegates to DisassembleThumb2PreLoad() for preload data/instruction.
 // Delegates to DisassembleThumb2Ldpci() for load * literal operations.
 static bool DisassembleThumb2LdSt(bool Load, MCInst &MI, unsigned Opcode,
-    uint32_t insn, unsigned short NumOps, unsigned &NumOpsAdded) {
+    uint32_t insn, unsigned short NumOps, unsigned &NumOpsAdded, BO B) {
 
   unsigned Rn = decodeRn(insn);
 
   if (Thumb2PreloadOpcode(Opcode))
-    return DisassembleThumb2PreLoad(MI, Opcode, insn, NumOps, NumOpsAdded);
+    return DisassembleThumb2PreLoad(MI, Opcode, insn, NumOps, NumOpsAdded, B);
 
   // See, for example, A6.3.7 Load word: Table A6-18 Load word.
   if (Load && Rn == 15)
-    return DisassembleThumb2Ldpci(MI, Opcode, insn, NumOps, NumOpsAdded);
+    return DisassembleThumb2Ldpci(MI, Opcode, insn, NumOps, NumOpsAdded, B);
 
   const TargetInstrDesc &TID = ARMInsts[Opcode];
   const TargetOperandInfo *OpInfo = TID.OpInfo;
@@ -1870,13 +1917,16 @@ static bool DisassembleThumb2LdSt(bool Load, MCInst &MI, unsigned Opcode,
       Imm = decodeImm8(insn);
   }
   
-  MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::GPRRegClassID, R0)));
+  MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
+                                                     R0)));
   ++OpIdx;
-  MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::GPRRegClassID, R1)));
+  MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
+                                                     R1)));
   ++OpIdx;
 
   if (ThreeReg) {
-    MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::GPRRegClassID,R2)));
+    MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
+                                                       R2)));
     ++OpIdx;
   }
 
@@ -1900,7 +1950,7 @@ static bool DisassembleThumb2LdSt(bool Load, MCInst &MI, unsigned Opcode,
 //
 // Miscellaneous operations: Rs [Rn] Rm
 static bool DisassembleThumb2DPReg(MCInst &MI, unsigned Opcode, uint32_t insn,
-    unsigned short NumOps, unsigned &NumOpsAdded) {
+    unsigned short NumOps, unsigned &NumOpsAdded, BO B) {
 
   const TargetInstrDesc &TID = ARMInsts[Opcode];
   const TargetOperandInfo *OpInfo = TID.OpInfo;
@@ -1917,17 +1967,17 @@ static bool DisassembleThumb2DPReg(MCInst &MI, unsigned Opcode, uint32_t insn,
 
   bool ThreeReg = NumOps > 2 && OpInfo[2].RegClass == ARM::GPRRegClassID;
 
-  MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::GPRRegClassID,
+  MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
                                                      decodeRs(insn))));
   ++OpIdx;
 
   if (ThreeReg) {
-    MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::GPRRegClassID,
+    MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
                                                        decodeRn(insn))));
     ++OpIdx;
   }
 
-  MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::GPRRegClassID,
+  MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
                                                      decodeRm(insn))));
   ++OpIdx;
 
@@ -1954,7 +2004,7 @@ static bool DisassembleThumb2DPReg(MCInst &MI, unsigned Opcode, uint32_t insn,
 // Unsigned Sum of Absolute Differences [and Accumulate]
 //    Rs Rn Rm [Ra=Inst{15-12}]
 static bool DisassembleThumb2Mul(MCInst &MI, unsigned Opcode, uint32_t insn,
-    unsigned short NumOps, unsigned &NumOpsAdded) {
+    unsigned short NumOps, unsigned &NumOpsAdded, BO B) {
 
   const TargetOperandInfo *OpInfo = ARMInsts[Opcode].OpInfo;
 
@@ -1968,17 +2018,17 @@ static bool DisassembleThumb2Mul(MCInst &MI, unsigned Opcode, uint32_t insn,
 
   bool FourReg = NumOps > 3 && OpInfo[3].RegClass == ARM::GPRRegClassID;
 
-  MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::GPRRegClassID,
+  MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
                                                      decodeRs(insn))));
 
-  MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::GPRRegClassID,
+  MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
                                                      decodeRn(insn))));
 
-  MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::GPRRegClassID,
+  MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
                                                      decodeRm(insn))));
 
   if (FourReg)
-    MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::GPRRegClassID,
+    MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
                                                        decodeRd(insn))));
 
   NumOpsAdded = FourReg ? 4 : 3;
@@ -1999,7 +2049,7 @@ static bool DisassembleThumb2Mul(MCInst &MI, unsigned Opcode, uint32_t insn,
 //
 // Signed/Unsigned divide: t2SDIV, t2UDIV: Rs Rn Rm
 static bool DisassembleThumb2LongMul(MCInst &MI, unsigned Opcode, uint32_t insn,
-    unsigned short NumOps, unsigned &NumOpsAdded) {
+    unsigned short NumOps, unsigned &NumOpsAdded, BO B) {
 
   const TargetOperandInfo *OpInfo = ARMInsts[Opcode].OpInfo;
 
@@ -2014,16 +2064,16 @@ static bool DisassembleThumb2LongMul(MCInst &MI, unsigned Opcode, uint32_t insn,
   // Build the register operands.
 
   if (FourReg)
-    MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::GPRRegClassID,
+    MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
                                                        decodeRd(insn))));
 
-  MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::GPRRegClassID,
+  MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
                                                      decodeRs(insn))));
 
-  MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::GPRRegClassID,
+  MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
                                                      decodeRn(insn))));
 
-  MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::GPRRegClassID,
+  MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
                                                      decodeRm(insn))));
 
   if (FourReg)
@@ -2059,38 +2109,41 @@ static bool DisassembleThumb2LongMul(MCInst &MI, unsigned Opcode, uint32_t insn,
 // 		1xxxxxx	-	Coprocessor instructions on page A6-40
 //
 static bool DisassembleThumb2(uint16_t op1, uint16_t op2, uint16_t op,
-    MCInst &MI, unsigned Opcode, uint32_t insn,
-    unsigned short NumOps, unsigned &NumOpsAdded) {
+    MCInst &MI, unsigned Opcode, uint32_t insn, unsigned short NumOps,
+    unsigned &NumOpsAdded, BO B) {
 
   switch (op1) {
   case 1:
     if (slice(op2, 6, 5) == 0) {
       if (slice(op2, 2, 2) == 0) {
         // Load/store multiple.
-        return DisassembleThumb2LdStMul(MI, Opcode, insn, NumOps, NumOpsAdded);
+        return DisassembleThumb2LdStMul(MI, Opcode, insn, NumOps, NumOpsAdded,
+                                        B);
       }
 
       // Load/store dual, load/store exclusive, table branch, otherwise.
-      assert(slice(op2, 2, 2) == 1 && "Encoding error");
+      assert(slice(op2, 2, 2) == 1 && "Thumb2 encoding error!");
       if ((ARM::t2LDREX <= Opcode && Opcode <= ARM::t2LDREXH) ||
           (ARM::t2STREX <= Opcode && Opcode <= ARM::t2STREXH)) {
         // Load/store exclusive.
-        return DisassembleThumb2LdStEx(MI, Opcode, insn, NumOps, NumOpsAdded);
+        return DisassembleThumb2LdStEx(MI, Opcode, insn, NumOps, NumOpsAdded,
+                                       B);
       }
       if (Opcode == ARM::t2LDRDi8 ||
           Opcode == ARM::t2LDRD_PRE || Opcode == ARM::t2LDRD_POST ||
           Opcode == ARM::t2STRDi8 ||
           Opcode == ARM::t2STRD_PRE || Opcode == ARM::t2STRD_POST) {
         // Load/store dual.
-        return DisassembleThumb2LdStDual(MI, Opcode, insn, NumOps, NumOpsAdded);
+        return DisassembleThumb2LdStDual(MI, Opcode, insn, NumOps, NumOpsAdded,
+                                         B);
       }
       if (Opcode == ARM::t2TBBgen || Opcode == ARM::t2TBHgen) {
         // Table branch.
-        return DisassembleThumb2TB(MI, Opcode, insn, NumOps, NumOpsAdded);
+        return DisassembleThumb2TB(MI, Opcode, insn, NumOps, NumOpsAdded, B);
       }
     } else if (slice(op2, 6, 5) == 1) {
       // Data-processing (shifted register).
-      return DisassembleThumb2DPSoReg(MI, Opcode, insn, NumOps, NumOpsAdded);
+      return DisassembleThumb2DPSoReg(MI, Opcode, insn, NumOps, NumOpsAdded, B);
     }
 
     // FIXME: A6.3.18 Coprocessor instructions
@@ -2101,14 +2154,17 @@ static bool DisassembleThumb2(uint16_t op1, uint16_t op2, uint16_t op,
     if (op == 0) {
       if (slice(op2, 5, 5) == 0) {
         // Data-processing (modified immediate)
-        return DisassembleThumb2DPModImm(MI, Opcode, insn, NumOps, NumOpsAdded);
+        return DisassembleThumb2DPModImm(MI, Opcode, insn, NumOps, NumOpsAdded,
+                                         B);
       } else {
         // Data-processing (plain binary immediate)
-        return DisassembleThumb2DPBinImm(MI, Opcode, insn, NumOps, NumOpsAdded);
+        return DisassembleThumb2DPBinImm(MI, Opcode, insn, NumOps, NumOpsAdded,
+                                         B);
       }
     } else {
       // Branches and miscellaneous control on page A6-20.
-      return DisassembleThumb2BrMiscCtrl(MI, Opcode, insn, NumOps, NumOpsAdded);
+      return DisassembleThumb2BrMiscCtrl(MI, Opcode, insn, NumOps, NumOpsAdded,
+                                         B);
     }
 
     break;
@@ -2119,7 +2175,8 @@ static bool DisassembleThumb2(uint16_t op1, uint16_t op2, uint16_t op,
       if (slice(op2, 0, 0) == 0) {
         if (slice(op2, 4, 4) == 0) {
           // Store single data item on page A6-30
-          return DisassembleThumb2LdSt(false, MI,Opcode,insn,NumOps,NumOpsAdded);
+          return DisassembleThumb2LdSt(false, MI,Opcode,insn,NumOps,NumOpsAdded,
+                                       B);
         } else {
           // FIXME: Advanced SIMD element or structure load/store instructions.
           // But see ThumbDisassembler::getInstruction().
@@ -2127,19 +2184,20 @@ static bool DisassembleThumb2(uint16_t op1, uint16_t op2, uint16_t op,
         }
       } else {
         // Table A6-9 32-bit Thumb instruction encoding: Load byte|halfword|word
-        return DisassembleThumb2LdSt(true, MI,Opcode,insn,NumOps,NumOpsAdded);
+        return DisassembleThumb2LdSt(true, MI,Opcode,insn,NumOps,NumOpsAdded, B);
       }
       break;
     case 1:
       if (slice(op2, 4, 4) == 0) {
         // A6.3.12 Data-processing (register)
-        return DisassembleThumb2DPReg(MI, Opcode, insn, NumOps, NumOpsAdded);
+        return DisassembleThumb2DPReg(MI, Opcode, insn, NumOps, NumOpsAdded, B);
       } else if (slice(op2, 3, 3) == 0) {
         // A6.3.16 Multiply, multiply accumulate, and absolute difference
-        return DisassembleThumb2Mul(MI, Opcode, insn, NumOps, NumOpsAdded);
+        return DisassembleThumb2Mul(MI, Opcode, insn, NumOps, NumOpsAdded, B);
       } else {
         // A6.3.17 Long multiply, long multiply accumulate, and divide
-        return DisassembleThumb2LongMul(MI, Opcode, insn, NumOps, NumOpsAdded);
+        return DisassembleThumb2LongMul(MI, Opcode, insn, NumOps, NumOpsAdded,
+                                        B);
       }
       break;
     default:
@@ -2151,7 +2209,7 @@ static bool DisassembleThumb2(uint16_t op1, uint16_t op2, uint16_t op,
 
     break;
   default:
-    assert(0 && "Encoding error for Thumb2 instruction!");
+    assert(0 && "Thumb2 encoding error!");
     break;
   }
 
@@ -2174,8 +2232,10 @@ static bool DisassembleThumbFrm(MCInst &MI, unsigned Opcode, uint32_t insn,
   unsigned bits15_11 = slice(HalfWord, 15, 11);
 
   // A6.1 Thumb instruction set encoding
-  assert((bits15_11 == 0x1D || bits15_11 == 0x1E || bits15_11 == 0x1F) &&
-         "Bits [15:11] of first halfword of a Thumb2 instruction out of range");
+  if (!(bits15_11 == 0x1D || bits15_11 == 0x1E || bits15_11 == 0x1F)) {
+    assert("Bits[15:11] first halfword of Thumb2 instruction is out of range");
+    return false;
+  }
 
   // A6.3 32-bit Thumb instruction encoding
   
@@ -2183,5 +2243,6 @@ static bool DisassembleThumbFrm(MCInst &MI, unsigned Opcode, uint32_t insn,
   uint16_t op2 = slice(HalfWord, 10, 4);
   uint16_t op = slice(insn, 15, 15);
 
-  return DisassembleThumb2(op1, op2, op, MI, Opcode, insn, NumOps, NumOpsAdded);
+  return DisassembleThumb2(op1, op2, op, MI, Opcode, insn, NumOps, NumOpsAdded,
+                           Builder);
 }
diff --git a/lib/Target/ARM/Makefile b/lib/Target/ARM/Makefile
index a8dd38cb362e..9e3ff29e07c4 100644
--- a/lib/Target/ARM/Makefile
+++ b/lib/Target/ARM/Makefile
@@ -16,8 +16,9 @@ BUILT_SOURCES = ARMGenRegisterInfo.h.inc ARMGenRegisterNames.inc \
                 ARMGenRegisterInfo.inc ARMGenInstrNames.inc \
                 ARMGenInstrInfo.inc ARMGenAsmWriter.inc \
                 ARMGenDAGISel.inc ARMGenSubtarget.inc \
-                ARMGenCodeEmitter.inc ARMGenCallingConv.inc
+                ARMGenCodeEmitter.inc ARMGenCallingConv.inc \
+                ARMGenDecoderTables.inc ARMGenEDInfo.inc
 
-DIRS = AsmPrinter AsmParser TargetInfo
+DIRS = AsmPrinter AsmParser Disassembler TargetInfo
 
 include $(LEVEL)/Makefile.common
diff --git a/lib/Target/ARM/Thumb1RegisterInfo.cpp b/lib/Target/ARM/Thumb1RegisterInfo.cpp
index a5dfcb34f7bc..2f635fe50ad5 100644
--- a/lib/Target/ARM/Thumb1RegisterInfo.cpp
+++ b/lib/Target/ARM/Thumb1RegisterInfo.cpp
@@ -36,9 +36,12 @@
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/raw_ostream.h"
-using namespace llvm;
 
+namespace llvm {
 extern cl::opt<bool> ReuseFrameIndexVals;
+}
+
+using namespace llvm;
 
 Thumb1RegisterInfo::Thumb1RegisterInfo(const ARMBaseInstrInfo &tii,
                                        const ARMSubtarget &sti)
@@ -56,7 +59,7 @@ void Thumb1RegisterInfo::emitLoadConstPool(MachineBasicBlock &MBB,
                                            unsigned PredReg) const {
   MachineFunction &MF = *MBB.getParent();
   MachineConstantPool *ConstantPool = MF.getConstantPool();
-  Constant *C = ConstantInt::get(
+  const Constant *C = ConstantInt::get(
           Type::getInt32Ty(MBB.getParent()->getFunction()->getContext()), Val);
   unsigned Idx = ConstantPool->getConstantPoolIndex(C, 4);
 
@@ -461,6 +464,13 @@ Thumb1RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
     Offset -= AFI->getFramePtrSpillOffset();
   }
 
+  // Special handling of dbg_value instructions.
+  if (MI.isDebugValue()) {
+    MI.getOperand(i).  ChangeToRegister(FrameReg, false /*isDef*/);
+    MI.getOperand(i+1).ChangeToImmediate(Offset);
+    return 0;
+  }
+
   unsigned Opcode = MI.getOpcode();
   const TargetInstrDesc &Desc = MI.getDesc();
   unsigned AddrMode = (Desc.TSFlags & ARMII::AddrModeMask);
diff --git a/lib/Target/ARM/Thumb2InstrInfo.cpp b/lib/Target/ARM/Thumb2InstrInfo.cpp
index de4605669315..b143bd978ba5 100644
--- a/lib/Target/ARM/Thumb2InstrInfo.cpp
+++ b/lib/Target/ARM/Thumb2InstrInfo.cpp
@@ -44,18 +44,22 @@ Thumb2InstrInfo::copyRegToReg(MachineBasicBlock &MBB,
   DebugLoc DL;
   if (I != MBB.end()) DL = I->getDebugLoc();
 
-  if (DestRC == ARM::GPRRegisterClass &&
-      SrcRC == ARM::GPRRegisterClass) {
-    BuildMI(MBB, I, DL, get(ARM::tMOVgpr2gpr), DestReg).addReg(SrcReg);
-    return true;
-  } else if (DestRC == ARM::GPRRegisterClass &&
-             SrcRC == ARM::tGPRRegisterClass) {
-    BuildMI(MBB, I, DL, get(ARM::tMOVtgpr2gpr), DestReg).addReg(SrcReg);
-    return true;
-  } else if (DestRC == ARM::tGPRRegisterClass &&
-             SrcRC == ARM::GPRRegisterClass) {
-    BuildMI(MBB, I, DL, get(ARM::tMOVgpr2tgpr), DestReg).addReg(SrcReg);
-    return true;
+  if (DestRC == ARM::GPRRegisterClass) {
+    if (SrcRC == ARM::GPRRegisterClass) {
+      BuildMI(MBB, I, DL, get(ARM::tMOVgpr2gpr), DestReg).addReg(SrcReg);
+      return true;
+    } else if (SrcRC == ARM::tGPRRegisterClass) {
+      BuildMI(MBB, I, DL, get(ARM::tMOVtgpr2gpr), DestReg).addReg(SrcReg);
+      return true;
+    }
+  } else if (DestRC == ARM::tGPRRegisterClass) {
+    if (SrcRC == ARM::GPRRegisterClass) {
+      BuildMI(MBB, I, DL, get(ARM::tMOVgpr2tgpr), DestReg).addReg(SrcReg);
+      return true;
+    } else if (SrcRC == ARM::tGPRRegisterClass) {
+      BuildMI(MBB, I, DL, get(ARM::tMOVr), DestReg).addReg(SrcReg);
+      return true;
+    }
   }
 
   // Handle SPR, DPR, and QPR copies.
diff --git a/lib/Target/ARM/Thumb2RegisterInfo.cpp b/lib/Target/ARM/Thumb2RegisterInfo.cpp
index f24d3e256ff3..07dd0be078d7 100644
--- a/lib/Target/ARM/Thumb2RegisterInfo.cpp
+++ b/lib/Target/ARM/Thumb2RegisterInfo.cpp
@@ -52,7 +52,7 @@ void Thumb2RegisterInfo::emitLoadConstPool(MachineBasicBlock &MBB,
                                            unsigned PredReg) const {
   MachineFunction &MF = *MBB.getParent();
   MachineConstantPool *ConstantPool = MF.getConstantPool();
-  Constant *C = ConstantInt::get(
+  const Constant *C = ConstantInt::get(
            Type::getInt32Ty(MBB.getParent()->getFunction()->getContext()), Val);
   unsigned Idx = ConstantPool->getConstantPoolIndex(C, 4);
 
diff --git a/lib/Target/ARM/Thumb2SizeReduction.cpp b/lib/Target/ARM/Thumb2SizeReduction.cpp
index 2bc75f28d503..8fe2e42a7cdd 100644
--- a/lib/Target/ARM/Thumb2SizeReduction.cpp
+++ b/lib/Target/ARM/Thumb2SizeReduction.cpp
@@ -656,15 +656,8 @@ static bool UpdateCPSRUse(MachineInstr &MI, bool LiveCPSR) {
 bool Thumb2SizeReduce::ReduceMBB(MachineBasicBlock &MBB) {
   bool Modified = false;
 
-  bool LiveCPSR = false;
   // Yes, CPSR could be livein.
-  for (MachineBasicBlock::const_livein_iterator I = MBB.livein_begin(),
-         E = MBB.livein_end(); I != E; ++I) {
-    if (*I == ARM::CPSR) {
-      LiveCPSR = true;
-      break;
-    }
-  }
+  bool LiveCPSR = MBB.isLiveIn(ARM::CPSR);
 
   MachineBasicBlock::iterator MII = MBB.begin(), E = MBB.end();
   MachineBasicBlock::iterator NextMII;
diff --git a/lib/Target/Alpha/AlphaCodeEmitter.cpp b/lib/Target/Alpha/AlphaCodeEmitter.cpp
index eb5e4290ee4f..a6c6f52704f6 100644
--- a/lib/Target/Alpha/AlphaCodeEmitter.cpp
+++ b/lib/Target/Alpha/AlphaCodeEmitter.cpp
@@ -192,10 +192,13 @@ unsigned AlphaCodeEmitter::getMachineOpValue(const MachineInstr &MI,
       llvm_unreachable("unknown relocatable instruction");
     }
     if (MO.isGlobal())
-      MCE.addRelocation(MachineRelocation::getGV(MCE.getCurrentPCOffset(),
-                                                 Reloc, MO.getGlobal(), Offset,
-                                                 isa<Function>(MO.getGlobal()),
-                                                 useGOT));
+      MCE.addRelocation(MachineRelocation::getGV(
+            MCE.getCurrentPCOffset(),
+            Reloc,
+            const_cast<GlobalValue *>(MO.getGlobal()),
+            Offset,
+            isa<Function>(MO.getGlobal()),
+            useGOT));
     else if (MO.isSymbol())
       MCE.addRelocation(MachineRelocation::getExtSym(MCE.getCurrentPCOffset(),
                                                      Reloc, MO.getSymbolName(),
diff --git a/lib/Target/Alpha/AlphaISelDAGToDAG.cpp b/lib/Target/Alpha/AlphaISelDAGToDAG.cpp
index 5303d853cca2..d526dc0827b2 100644
--- a/lib/Target/Alpha/AlphaISelDAGToDAG.cpp
+++ b/lib/Target/Alpha/AlphaISelDAGToDAG.cpp
@@ -14,7 +14,6 @@
 
 #include "Alpha.h"
 #include "AlphaTargetMachine.h"
-#include "AlphaISelLowering.h"
 #include "llvm/CodeGen/MachineInstrBuilder.h"
 #include "llvm/CodeGen/MachineFrameInfo.h"
 #include "llvm/CodeGen/MachineFunction.h"
@@ -309,7 +308,7 @@ SDNode *AlphaDAGToDAGISel::Select(SDNode *N) {
                                   T, CurDAG->getRegister(Alpha::F31, T),
                                   CurDAG->getRegister(Alpha::F31, T));
     } else {
-      llvm_report_error("Unhandled FP constant type");
+      report_fatal_error("Unhandled FP constant type");
     }
     break;
   }
diff --git a/lib/Target/Alpha/AlphaISelLowering.cpp b/lib/Target/Alpha/AlphaISelLowering.cpp
index 94c6f80c0361..1d85f12c3ef9 100644
--- a/lib/Target/Alpha/AlphaISelLowering.cpp
+++ b/lib/Target/Alpha/AlphaISelLowering.cpp
@@ -13,6 +13,7 @@
 
 #include "AlphaISelLowering.h"
 #include "AlphaTargetMachine.h"
+#include "AlphaMachineFunctionInfo.h"
 #include "llvm/CodeGen/CallingConvLower.h"
 #include "llvm/CodeGen/MachineFrameInfo.h"
 #include "llvm/CodeGen/MachineFunction.h"
@@ -225,7 +226,7 @@ AlphaTargetLowering::LowerCall(SDValue Chain, SDValue Callee,
                                const SmallVectorImpl<ISD::OutputArg> &Outs,
                                const SmallVectorImpl<ISD::InputArg> &Ins,
                                DebugLoc dl, SelectionDAG &DAG,
-                               SmallVectorImpl<SDValue> &InVals) {
+                               SmallVectorImpl<SDValue> &InVals) const {
   // Alpha target does not yet support tail call optimization.
   isTailCall = false;
 
@@ -342,7 +343,7 @@ AlphaTargetLowering::LowerCallResult(SDValue Chain, SDValue InFlag,
                                      CallingConv::ID CallConv, bool isVarArg,
                                      const SmallVectorImpl<ISD::InputArg> &Ins,
                                      DebugLoc dl, SelectionDAG &DAG,
-                                     SmallVectorImpl<SDValue> &InVals) {
+                                     SmallVectorImpl<SDValue> &InVals) const {
 
   // Assign locations to each value returned by this call.
   SmallVector<CCValAssign, 16> RVLocs;
@@ -385,10 +386,12 @@ AlphaTargetLowering::LowerFormalArguments(SDValue Chain,
                                           const SmallVectorImpl<ISD::InputArg>
                                             &Ins,
                                           DebugLoc dl, SelectionDAG &DAG,
-                                          SmallVectorImpl<SDValue> &InVals) {
+                                          SmallVectorImpl<SDValue> &InVals)
+                                            const {
 
   MachineFunction &MF = DAG.getMachineFunction();
   MachineFrameInfo *MFI = MF.getFrameInfo();
+  AlphaMachineFunctionInfo *FuncInfo = MF.getInfo<AlphaMachineFunctionInfo>();
 
   unsigned args_int[] = {
     Alpha::R16, Alpha::R17, Alpha::R18, Alpha::R19, Alpha::R20, Alpha::R21};
@@ -435,14 +438,14 @@ AlphaTargetLowering::LowerFormalArguments(SDValue Chain,
 
   // If the functions takes variable number of arguments, copy all regs to stack
   if (isVarArg) {
-    VarArgsOffset = Ins.size() * 8;
+    FuncInfo->setVarArgsOffset(Ins.size() * 8);
     std::vector<SDValue> LS;
     for (int i = 0; i < 6; ++i) {
       if (TargetRegisterInfo::isPhysicalRegister(args_int[i]))
         args_int[i] = AddLiveIn(MF, args_int[i], &Alpha::GPRCRegClass);
       SDValue argt = DAG.getCopyFromReg(Chain, dl, args_int[i], MVT::i64);
       int FI = MFI->CreateFixedObject(8, -8 * (6 - i), true, false);
-      if (i == 0) VarArgsBase = FI;
+      if (i == 0) FuncInfo->setVarArgsBase(FI);
       SDValue SDFI = DAG.getFrameIndex(FI, MVT::i64);
       LS.push_back(DAG.getStore(Chain, dl, argt, SDFI, NULL, 0,
                                 false, false, 0));
@@ -467,7 +470,7 @@ SDValue
 AlphaTargetLowering::LowerReturn(SDValue Chain,
                                  CallingConv::ID CallConv, bool isVarArg,
                                  const SmallVectorImpl<ISD::OutputArg> &Outs,
-                                 DebugLoc dl, SelectionDAG &DAG) {
+                                 DebugLoc dl, SelectionDAG &DAG) const {
 
   SDValue Copy = DAG.getCopyToReg(Chain, dl, Alpha::R26,
                                   DAG.getNode(AlphaISD::GlobalRetAddr,
@@ -525,7 +528,8 @@ AlphaTargetLowering::LowerReturn(SDValue Chain,
 }
 
 void AlphaTargetLowering::LowerVAARG(SDNode *N, SDValue &Chain,
-                                     SDValue &DataPtr, SelectionDAG &DAG) {
+                                     SDValue &DataPtr,
+                                     SelectionDAG &DAG) const {
   Chain = N->getOperand(0);
   SDValue VAListP = N->getOperand(1);
   const Value *VAListS = cast<SrcValueSDNode>(N->getOperand(2))->getValue();
@@ -556,7 +560,8 @@ void AlphaTargetLowering::LowerVAARG(SDNode *N, SDValue &Chain,
 
 /// LowerOperation - Provide custom lowering hooks for some operations.
 ///
-SDValue AlphaTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) {
+SDValue AlphaTargetLowering::LowerOperation(SDValue Op,
+                                            SelectionDAG &DAG) const {
   DebugLoc dl = Op.getDebugLoc();
   switch (Op.getOpcode()) {
   default: llvm_unreachable("Wasn't expecting to be able to lower this!");
@@ -624,7 +629,7 @@ SDValue AlphaTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) {
   }
   case ISD::ConstantPool: {
     ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(Op);
-    Constant *C = CP->getConstVal();
+    const Constant *C = CP->getConstVal();
     SDValue CPI = DAG.getTargetConstantPool(C, MVT::i64, CP->getAlignment());
     // FIXME there isn't really any debug info here
 
@@ -637,7 +642,7 @@ SDValue AlphaTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) {
     llvm_unreachable("TLS not implemented for Alpha.");
   case ISD::GlobalAddress: {
     GlobalAddressSDNode *GSDN = cast<GlobalAddressSDNode>(Op);
-    GlobalValue *GV = GSDN->getGlobal();
+    const GlobalValue *GV = GSDN->getGlobal();
     SDValue GA = DAG.getTargetGlobalAddress(GV, MVT::i64, GSDN->getOffset());
     // FIXME there isn't really any debug info here
 
@@ -725,17 +730,22 @@ SDValue AlphaTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) {
                              false, false, 0);
   }
   case ISD::VASTART: {
+    MachineFunction &MF = DAG.getMachineFunction();
+    AlphaMachineFunctionInfo *FuncInfo = MF.getInfo<AlphaMachineFunctionInfo>();
+
     SDValue Chain = Op.getOperand(0);
     SDValue VAListP = Op.getOperand(1);
     const Value *VAListS = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
 
     // vastart stores the address of the VarArgsBase and VarArgsOffset
-    SDValue FR  = DAG.getFrameIndex(VarArgsBase, MVT::i64);
+    SDValue FR  = DAG.getFrameIndex(FuncInfo->getVarArgsBase(), MVT::i64);
     SDValue S1  = DAG.getStore(Chain, dl, FR, VAListP, VAListS, 0,
                                false, false, 0);
     SDValue SA2 = DAG.getNode(ISD::ADD, dl, MVT::i64, VAListP,
                                 DAG.getConstant(8, MVT::i64));
-    return DAG.getTruncStore(S1, dl, DAG.getConstant(VarArgsOffset, MVT::i64),
+    return DAG.getTruncStore(S1, dl,
+                             DAG.getConstant(FuncInfo->getVarArgsOffset(),
+                                             MVT::i64),
                              SA2, NULL, 0, MVT::i32, false, false, 0);
   }
   case ISD::RETURNADDR:
@@ -749,7 +759,7 @@ SDValue AlphaTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) {
 
 void AlphaTargetLowering::ReplaceNodeResults(SDNode *N,
                                              SmallVectorImpl<SDValue>&Results,
-                                             SelectionDAG &DAG) {
+                                             SelectionDAG &DAG) const {
   DebugLoc dl = N->getDebugLoc();
   assert(N->getValueType(0) == MVT::i32 &&
          N->getOpcode() == ISD::VAARG &&
@@ -822,8 +832,7 @@ getRegClassForInlineAsmConstraint(const std::string &Constraint,
 
 MachineBasicBlock *
 AlphaTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
-                                                 MachineBasicBlock *BB,
-                   DenseMap<MachineBasicBlock*, MachineBasicBlock*> *EM) const {
+                                                 MachineBasicBlock *BB) const {
   const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
   assert((MI->getOpcode() == Alpha::CAS32 ||
           MI->getOpcode() == Alpha::CAS64 ||
@@ -854,11 +863,6 @@ AlphaTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
   MachineBasicBlock *llscMBB = F->CreateMachineBasicBlock(LLVM_BB);
   MachineBasicBlock *sinkMBB = F->CreateMachineBasicBlock(LLVM_BB);
 
-  // Inform sdisel of the edge changes.
-  for (MachineBasicBlock::succ_iterator I = BB->succ_begin(),
-         E = BB->succ_end(); I != E; ++I)
-    EM->insert(std::make_pair(*I, sinkMBB));
-
   sinkMBB->transferSuccessors(thisMBB);
 
   F->insert(It, llscMBB);
diff --git a/lib/Target/Alpha/AlphaISelLowering.h b/lib/Target/Alpha/AlphaISelLowering.h
index 0f17025b7747..7ee823a86a4d 100644
--- a/lib/Target/Alpha/AlphaISelLowering.h
+++ b/lib/Target/Alpha/AlphaISelLowering.h
@@ -60,8 +60,6 @@ namespace llvm {
   }
 
   class AlphaTargetLowering : public TargetLowering {
-    int VarArgsOffset;  // What is the offset to the first vaarg
-    int VarArgsBase;    // What is the base FrameIndex
   public:
     explicit AlphaTargetLowering(TargetMachine &TM);
     
@@ -70,13 +68,13 @@ namespace llvm {
 
     /// LowerOperation - Provide custom lowering hooks for some operations.
     ///
-    virtual SDValue LowerOperation(SDValue Op, SelectionDAG &DAG);
+    virtual SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const;
 
     /// ReplaceNodeResults - Replace the results of node with an illegal result
     /// type with new values built out of custom code.
     ///
     virtual void ReplaceNodeResults(SDNode *N, SmallVectorImpl<SDValue>&Results,
-                                    SelectionDAG &DAG);
+                                    SelectionDAG &DAG) const;
 
     // Friendly names for dumps
     const char *getTargetNodeName(unsigned Opcode) const;
@@ -85,7 +83,7 @@ namespace llvm {
                             CallingConv::ID CallConv, bool isVarArg,
                             const SmallVectorImpl<ISD::InputArg> &Ins,
                             DebugLoc dl, SelectionDAG &DAG,
-                            SmallVectorImpl<SDValue> &InVals);
+                            SmallVectorImpl<SDValue> &InVals) const;
 
     ConstraintType getConstraintType(const std::string &Constraint) const;
 
@@ -93,9 +91,9 @@ namespace llvm {
       getRegClassForInlineAsmConstraint(const std::string &Constraint,
                                         EVT VT) const;
 
-    MachineBasicBlock *EmitInstrWithCustomInserter(MachineInstr *MI,
-                                                   MachineBasicBlock *BB,
-                    DenseMap<MachineBasicBlock*, MachineBasicBlock*> *EM) const;
+    MachineBasicBlock *
+      EmitInstrWithCustomInserter(MachineInstr *MI,
+                                  MachineBasicBlock *BB) const;
 
     virtual bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const;
 
@@ -110,14 +108,14 @@ namespace llvm {
   private:
     // Helpers for custom lowering.
     void LowerVAARG(SDNode *N, SDValue &Chain, SDValue &DataPtr,
-                    SelectionDAG &DAG);
+                    SelectionDAG &DAG) const;
 
     virtual SDValue
       LowerFormalArguments(SDValue Chain,
                            CallingConv::ID CallConv, bool isVarArg,
                            const SmallVectorImpl<ISD::InputArg> &Ins,
                            DebugLoc dl, SelectionDAG &DAG,
-                           SmallVectorImpl<SDValue> &InVals);
+                           SmallVectorImpl<SDValue> &InVals) const;
 
     virtual SDValue
       LowerCall(SDValue Chain, SDValue Callee,
@@ -125,13 +123,13 @@ namespace llvm {
                 const SmallVectorImpl<ISD::OutputArg> &Outs,
                 const SmallVectorImpl<ISD::InputArg> &Ins,
                 DebugLoc dl, SelectionDAG &DAG,
-                SmallVectorImpl<SDValue> &InVals);
+                SmallVectorImpl<SDValue> &InVals) const;
 
     virtual SDValue
       LowerReturn(SDValue Chain,
                   CallingConv::ID CallConv, bool isVarArg,
                   const SmallVectorImpl<ISD::OutputArg> &Outs,
-                  DebugLoc dl, SelectionDAG &DAG);
+                  DebugLoc dl, SelectionDAG &DAG) const;
   };
 }
 
diff --git a/lib/Target/Alpha/AlphaJITInfo.cpp b/lib/Target/Alpha/AlphaJITInfo.cpp
index cb8eb514656f..12685ed17e3c 100644
--- a/lib/Target/Alpha/AlphaJITInfo.cpp
+++ b/lib/Target/Alpha/AlphaJITInfo.cpp
@@ -103,7 +103,7 @@ extern "C" {
 
   asm(
       ".text\n"
-      ".globl AlphaComilationCallbackC\n"
+      ".globl AlphaCompilationCallbackC\n"
       ".align 4\n"
       ".globl AlphaCompilationCallback\n"
       ".ent AlphaCompilationCallback\n"
diff --git a/lib/Target/Alpha/AlphaMachineFunctionInfo.h b/lib/Target/Alpha/AlphaMachineFunctionInfo.h
index 8221fc7a7c97..186738c20c70 100644
--- a/lib/Target/Alpha/AlphaMachineFunctionInfo.h
+++ b/lib/Target/Alpha/AlphaMachineFunctionInfo.h
@@ -30,17 +30,31 @@ class AlphaMachineFunctionInfo : public MachineFunctionInfo {
   /// the return address value.
   unsigned GlobalRetAddr;
 
+  /// VarArgsOffset - What is the offset to the first vaarg
+  int VarArgsOffset;
+  /// VarArgsBase - What is the base FrameIndex
+  int VarArgsBase;
+
 public:
-  AlphaMachineFunctionInfo() : GlobalBaseReg(0), GlobalRetAddr(0) {}
+  AlphaMachineFunctionInfo() : GlobalBaseReg(0), GlobalRetAddr(0),
+                               VarArgsOffset(0), VarArgsBase(0) {}
 
   explicit AlphaMachineFunctionInfo(MachineFunction &MF) : GlobalBaseReg(0),
-                                                           GlobalRetAddr(0) {}
+                                                           GlobalRetAddr(0),
+                                                           VarArgsOffset(0),
+                                                           VarArgsBase(0) {}
 
   unsigned getGlobalBaseReg() const { return GlobalBaseReg; }
   void setGlobalBaseReg(unsigned Reg) { GlobalBaseReg = Reg; }
 
   unsigned getGlobalRetAddr() const { return GlobalRetAddr; }
   void setGlobalRetAddr(unsigned Reg) { GlobalRetAddr = Reg; }
+
+  int getVarArgsOffset() const { return VarArgsOffset; }
+  void setVarArgsOffset(int Offset) { VarArgsOffset = Offset; }
+
+  int getVarArgsBase() const { return VarArgsBase; }
+  void setVarArgsBase(int Base) { VarArgsBase = Base; }
 };
 
 } // End llvm namespace
diff --git a/lib/Target/Alpha/AlphaRegisterInfo.cpp b/lib/Target/Alpha/AlphaRegisterInfo.cpp
index 16a23cc120fb..c083d8c30b16 100644
--- a/lib/Target/Alpha/AlphaRegisterInfo.cpp
+++ b/lib/Target/Alpha/AlphaRegisterInfo.cpp
@@ -212,15 +212,14 @@ void AlphaRegisterInfo::emitPrologue(MachineFunction &MF) const {
 
   //handle GOP offset
   BuildMI(MBB, MBBI, dl, TII.get(Alpha::LDAHg), Alpha::R29)
-    .addGlobalAddress(const_cast<Function*>(MF.getFunction()))
+    .addGlobalAddress(MF.getFunction())
     .addReg(Alpha::R27).addImm(++curgpdist);
   BuildMI(MBB, MBBI, dl, TII.get(Alpha::LDAg), Alpha::R29)
-    .addGlobalAddress(const_cast<Function*>(MF.getFunction()))
+    .addGlobalAddress(MF.getFunction())
     .addReg(Alpha::R29).addImm(curgpdist);
 
-  //evil const_cast until MO stuff setup to handle const
   BuildMI(MBB, MBBI, dl, TII.get(Alpha::ALTENT))
-    .addGlobalAddress(const_cast<Function*>(MF.getFunction()));
+    .addGlobalAddress(MF.getFunction());
 
   // Get the number of bytes to allocate from the FrameInfo
   long NumBytes = MFI->getStackSize();
@@ -248,10 +247,7 @@ void AlphaRegisterInfo::emitPrologue(MachineFunction &MF) const {
     BuildMI(MBB, MBBI, dl, TII.get(Alpha::LDA), Alpha::R30)
       .addImm(getLower16(NumBytes)).addReg(Alpha::R30);
   } else {
-    std::string msg;
-    raw_string_ostream Msg(msg); 
-    Msg << "Too big a stack frame at " << NumBytes;
-    llvm_report_error(Msg.str());
+    report_fatal_error("Too big a stack frame at " + Twine(NumBytes));
   }
 
   //now if we need to, save the old FP and set the new
@@ -300,10 +296,7 @@ void AlphaRegisterInfo::emitEpilogue(MachineFunction &MF,
       BuildMI(MBB, MBBI, dl, TII.get(Alpha::LDA), Alpha::R30)
         .addImm(getLower16(NumBytes)).addReg(Alpha::R30);
     } else {
-      std::string msg;
-      raw_string_ostream Msg(msg); 
-      Msg << "Too big a stack frame at " << NumBytes;
-      llvm_report_error(Msg.str());
+      report_fatal_error("Too big a stack frame at " + Twine(NumBytes));
     }
   }
 }
diff --git a/lib/Target/Alpha/AlphaSchedule.td b/lib/Target/Alpha/AlphaSchedule.td
index b7b456084709..4dc04b88a70b 100644
--- a/lib/Target/Alpha/AlphaSchedule.td
+++ b/lib/Target/Alpha/AlphaSchedule.td
@@ -53,7 +53,8 @@ def s_pseudo : InstrItinClass;
 //Table 2�4 Instruction Class Latency in Cycles
 //modified some
 
-def Alpha21264Itineraries : ProcessorItineraries<[
+def Alpha21264Itineraries : ProcessorItineraries<
+  [L0, L1, FST0, FST1, U0, U1, FA, FM], [
   InstrItinData<s_ild    , [InstrStage<3, [L0, L1]>]>,
   InstrItinData<s_fld    , [InstrStage<4, [L0, L1]>]>,
   InstrItinData<s_ist    , [InstrStage<0, [L0, L1]>]>,
diff --git a/lib/Target/Alpha/AlphaSelectionDAGInfo.cpp b/lib/Target/Alpha/AlphaSelectionDAGInfo.cpp
new file mode 100644
index 000000000000..0eb7b8f28a68
--- /dev/null
+++ b/lib/Target/Alpha/AlphaSelectionDAGInfo.cpp
@@ -0,0 +1,22 @@
+//===-- AlphaSelectionDAGInfo.cpp - Alpha SelectionDAG Info ---------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the AlphaSelectionDAGInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "alpha-selectiondag-info"
+#include "AlphaSelectionDAGInfo.h"
+using namespace llvm;
+
+AlphaSelectionDAGInfo::AlphaSelectionDAGInfo() {
+}
+
+AlphaSelectionDAGInfo::~AlphaSelectionDAGInfo() {
+}
diff --git a/lib/Target/Alpha/AlphaSelectionDAGInfo.h b/lib/Target/Alpha/AlphaSelectionDAGInfo.h
new file mode 100644
index 000000000000..70889ae422e0
--- /dev/null
+++ b/lib/Target/Alpha/AlphaSelectionDAGInfo.h
@@ -0,0 +1,29 @@
+//===-- AlphaSelectionDAGInfo.h - Alpha SelectionDAG Info -------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the Alpha subclass for TargetSelectionDAGInfo.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef ALPHASELECTIONDAGINFO_H
+#define ALPHASELECTIONDAGINFO_H
+
+#include "llvm/Target/TargetSelectionDAGInfo.h"
+
+namespace llvm {
+
+class AlphaSelectionDAGInfo : public TargetSelectionDAGInfo {
+public:
+  AlphaSelectionDAGInfo();
+  ~AlphaSelectionDAGInfo();
+};
+
+}
+
+#endif
diff --git a/lib/Target/Alpha/AlphaTargetMachine.h b/lib/Target/Alpha/AlphaTargetMachine.h
index 6f3a774a1eaf..0990f6d7032b 100644
--- a/lib/Target/Alpha/AlphaTargetMachine.h
+++ b/lib/Target/Alpha/AlphaTargetMachine.h
@@ -44,8 +44,8 @@ public:
   virtual const AlphaRegisterInfo *getRegisterInfo() const {
     return &InstrInfo.getRegisterInfo();
   }
-  virtual AlphaTargetLowering* getTargetLowering() const {
-    return const_cast<AlphaTargetLowering*>(&TLInfo);
+  virtual const AlphaTargetLowering* getTargetLowering() const {
+    return &TLInfo;
   }
   virtual const TargetData       *getTargetData() const { return &DataLayout; }
   virtual AlphaJITInfo* getJITInfo() {
diff --git a/lib/Target/Alpha/AsmPrinter/AlphaAsmPrinter.cpp b/lib/Target/Alpha/AsmPrinter/AlphaAsmPrinter.cpp
index 2a1f5559053f..9f4aff6fd5f5 100644
--- a/lib/Target/Alpha/AsmPrinter/AlphaAsmPrinter.cpp
+++ b/lib/Target/Alpha/AsmPrinter/AlphaAsmPrinter.cpp
@@ -73,7 +73,7 @@ namespace {
 void AlphaAsmPrinter::printOperand(const MachineInstr *MI, int opNum,
                                    raw_ostream &O) {
   const MachineOperand &MO = MI->getOperand(opNum);
-  if (MO.getType() == MachineOperand::MO_Register) {
+  if (MO.isReg()) {
     assert(TargetRegisterInfo::isPhysicalRegister(MO.getReg()) &&
            "Not physreg??");
     O << getRegisterName(MO.getReg());
diff --git a/lib/Target/Alpha/CMakeLists.txt b/lib/Target/Alpha/CMakeLists.txt
index b4f41aebd8db..fbf7f3ab6b30 100644
--- a/lib/Target/Alpha/CMakeLists.txt
+++ b/lib/Target/Alpha/CMakeLists.txt
@@ -23,6 +23,7 @@ add_llvm_target(AlphaCodeGen
   AlphaRegisterInfo.cpp
   AlphaSubtarget.cpp
   AlphaTargetMachine.cpp
+  AlphaSelectionDAGInfo.cpp
   )
 
 target_link_libraries (LLVMAlphaCodeGen LLVMSelectionDAG)
diff --git a/lib/Target/Blackfin/BlackfinISelDAGToDAG.cpp b/lib/Target/Blackfin/BlackfinISelDAGToDAG.cpp
index c8d71aabd1bd..b4da96cba59e 100644
--- a/lib/Target/Blackfin/BlackfinISelDAGToDAG.cpp
+++ b/lib/Target/Blackfin/BlackfinISelDAGToDAG.cpp
@@ -12,7 +12,6 @@
 //===----------------------------------------------------------------------===//
 
 #include "Blackfin.h"
-#include "BlackfinISelLowering.h"
 #include "BlackfinTargetMachine.h"
 #include "BlackfinRegisterInfo.h"
 #include "llvm/Intrinsics.h"
diff --git a/lib/Target/Blackfin/BlackfinISelLowering.cpp b/lib/Target/Blackfin/BlackfinISelLowering.cpp
index 5ce201347dc8..adf211881870 100644
--- a/lib/Target/Blackfin/BlackfinISelLowering.cpp
+++ b/lib/Target/Blackfin/BlackfinISelLowering.cpp
@@ -139,15 +139,16 @@ MVT::SimpleValueType BlackfinTargetLowering::getSetCCResultType(EVT VT) const {
 }
 
 SDValue BlackfinTargetLowering::LowerGlobalAddress(SDValue Op,
-                                                   SelectionDAG &DAG) {
+                                                   SelectionDAG &DAG) const {
   DebugLoc DL = Op.getDebugLoc();
-  GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal();
+  const GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal();
 
   Op = DAG.getTargetGlobalAddress(GV, MVT::i32);
   return DAG.getNode(BFISD::Wrapper, DL, MVT::i32, Op);
 }
 
-SDValue BlackfinTargetLowering::LowerJumpTable(SDValue Op, SelectionDAG &DAG) {
+SDValue BlackfinTargetLowering::LowerJumpTable(SDValue Op,
+                                               SelectionDAG &DAG) const {
   DebugLoc DL = Op.getDebugLoc();
   int JTI = cast<JumpTableSDNode>(Op)->getIndex();
 
@@ -161,7 +162,8 @@ BlackfinTargetLowering::LowerFormalArguments(SDValue Chain,
                                             const SmallVectorImpl<ISD::InputArg>
                                                &Ins,
                                              DebugLoc dl, SelectionDAG &DAG,
-                                             SmallVectorImpl<SDValue> &InVals) {
+                                             SmallVectorImpl<SDValue> &InVals)
+                                               const {
 
   MachineFunction &MF = DAG.getMachineFunction();
   MachineFrameInfo *MFI = MF.getFrameInfo();
@@ -218,7 +220,7 @@ SDValue
 BlackfinTargetLowering::LowerReturn(SDValue Chain,
                                     CallingConv::ID CallConv, bool isVarArg,
                                     const SmallVectorImpl<ISD::OutputArg> &Outs,
-                                    DebugLoc dl, SelectionDAG &DAG) {
+                                    DebugLoc dl, SelectionDAG &DAG) const {
 
   // CCValAssign - represent the assignment of the return value to locations.
   SmallVector<CCValAssign, 16> RVLocs;
@@ -278,7 +280,7 @@ BlackfinTargetLowering::LowerCall(SDValue Chain, SDValue Callee,
                                   const SmallVectorImpl<ISD::OutputArg> &Outs,
                                   const SmallVectorImpl<ISD::InputArg> &Ins,
                                   DebugLoc dl, SelectionDAG &DAG,
-                                  SmallVectorImpl<SDValue> &InVals) {
+                                  SmallVectorImpl<SDValue> &InVals) const {
   // Blackfin target does not yet support tail call optimization.
   isTailCall = false;
 
@@ -414,7 +416,7 @@ BlackfinTargetLowering::LowerCall(SDValue Chain, SDValue Callee,
 
 // Expansion of ADDE / SUBE. This is a bit involved since blackfin doesn't have
 // add-with-carry instructions.
-SDValue BlackfinTargetLowering::LowerADDE(SDValue Op, SelectionDAG &DAG) {
+SDValue BlackfinTargetLowering::LowerADDE(SDValue Op, SelectionDAG &DAG) const {
   // Operands: lhs, rhs, carry-in (AC0 flag)
   // Results: sum, carry-out (AC0 flag)
   DebugLoc dl = Op.getDebugLoc();
@@ -448,7 +450,8 @@ SDValue BlackfinTargetLowering::LowerADDE(SDValue Op, SelectionDAG &DAG) {
   return DAG.getMergeValues(ops, 2, dl);
 }
 
-SDValue BlackfinTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) {
+SDValue BlackfinTargetLowering::LowerOperation(SDValue Op,
+                                               SelectionDAG &DAG) const {
   switch (Op.getOpcode()) {
   default:
     Op.getNode()->dump();
@@ -468,7 +471,7 @@ SDValue BlackfinTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) {
 void
 BlackfinTargetLowering::ReplaceNodeResults(SDNode *N,
                                            SmallVectorImpl<SDValue> &Results,
-                                           SelectionDAG &DAG) {
+                                           SelectionDAG &DAG) const {
   DebugLoc dl = N->getDebugLoc();
   switch (N->getOpcode()) {
   default:
diff --git a/lib/Target/Blackfin/BlackfinISelLowering.h b/lib/Target/Blackfin/BlackfinISelLowering.h
index 5f399103f157..a7842482687f 100644
--- a/lib/Target/Blackfin/BlackfinISelLowering.h
+++ b/lib/Target/Blackfin/BlackfinISelLowering.h
@@ -30,16 +30,13 @@ namespace llvm {
   }
 
   class BlackfinTargetLowering : public TargetLowering {
-    int VarArgsFrameOffset;   // Frame offset to start of varargs area.
   public:
     BlackfinTargetLowering(TargetMachine &TM);
     virtual MVT::SimpleValueType getSetCCResultType(EVT VT) const;
-    virtual SDValue LowerOperation(SDValue Op, SelectionDAG &DAG);
+    virtual SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const;
     virtual void ReplaceNodeResults(SDNode *N,
                                     SmallVectorImpl<SDValue> &Results,
-                                    SelectionDAG &DAG);
-
-    int getVarArgsFrameOffset() const { return VarArgsFrameOffset; }
+                                    SelectionDAG &DAG) const;
 
     ConstraintType getConstraintType(const std::string &Constraint) const;
     std::pair<unsigned, const TargetRegisterClass*>
@@ -52,29 +49,29 @@ namespace llvm {
     unsigned getFunctionAlignment(const Function *F) const;
 
   private:
-    SDValue LowerGlobalAddress(SDValue Op, SelectionDAG &DAG);
-    SDValue LowerJumpTable(SDValue Op, SelectionDAG &DAG);
-    SDValue LowerADDE(SDValue Op, SelectionDAG &DAG);
+    SDValue LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const;
+    SDValue LowerJumpTable(SDValue Op, SelectionDAG &DAG) const;
+    SDValue LowerADDE(SDValue Op, SelectionDAG &DAG) const;
 
     virtual SDValue
       LowerFormalArguments(SDValue Chain,
                            CallingConv::ID CallConv, bool isVarArg,
                            const SmallVectorImpl<ISD::InputArg> &Ins,
                            DebugLoc dl, SelectionDAG &DAG,
-                           SmallVectorImpl<SDValue> &InVals);
+                           SmallVectorImpl<SDValue> &InVals) const;
     virtual SDValue
       LowerCall(SDValue Chain, SDValue Callee,
                 CallingConv::ID CallConv, bool isVarArg, bool &isTailCall,
                 const SmallVectorImpl<ISD::OutputArg> &Outs,
                 const SmallVectorImpl<ISD::InputArg> &Ins,
                 DebugLoc dl, SelectionDAG &DAG,
-                SmallVectorImpl<SDValue> &InVals);
+                SmallVectorImpl<SDValue> &InVals) const;
 
     virtual SDValue
       LowerReturn(SDValue Chain,
                   CallingConv::ID CallConv, bool isVarArg,
                   const SmallVectorImpl<ISD::OutputArg> &Outs,
-                  DebugLoc dl, SelectionDAG &DAG);
+                  DebugLoc dl, SelectionDAG &DAG) const;
   };
 } // end namespace llvm
 
diff --git a/lib/Target/Blackfin/BlackfinRegisterInfo.cpp b/lib/Target/Blackfin/BlackfinRegisterInfo.cpp
index 6fd610fa3b53..2512c9b7fb1d 100644
--- a/lib/Target/Blackfin/BlackfinRegisterInfo.cpp
+++ b/lib/Target/Blackfin/BlackfinRegisterInfo.cpp
@@ -110,7 +110,8 @@ BlackfinRegisterInfo::getPhysicalRegisterRegClass(unsigned reg, EVT VT) const {
 // if frame pointer elimination is disabled.
 bool BlackfinRegisterInfo::hasFP(const MachineFunction &MF) const {
   const MachineFrameInfo *MFI = MF.getFrameInfo();
-  return NoFramePointerElim || MFI->hasCalls() || MFI->hasVarSizedObjects();
+  return DisableFramePointerElim(MF) ||
+    MFI->hasCalls() || MFI->hasVarSizedObjects();
 }
 
 bool BlackfinRegisterInfo::
diff --git a/lib/Target/Blackfin/BlackfinSelectionDAGInfo.cpp b/lib/Target/Blackfin/BlackfinSelectionDAGInfo.cpp
new file mode 100644
index 000000000000..f4bb25fb7529
--- /dev/null
+++ b/lib/Target/Blackfin/BlackfinSelectionDAGInfo.cpp
@@ -0,0 +1,22 @@
+//===-- BlackfinSelectionDAGInfo.cpp - Blackfin SelectionDAG Info ---------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the BlackfinSelectionDAGInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "blackfin-selectiondag-info"
+#include "BlackfinSelectionDAGInfo.h"
+using namespace llvm;
+
+BlackfinSelectionDAGInfo::BlackfinSelectionDAGInfo() {
+}
+
+BlackfinSelectionDAGInfo::~BlackfinSelectionDAGInfo() {
+}
diff --git a/lib/Target/Blackfin/BlackfinSelectionDAGInfo.h b/lib/Target/Blackfin/BlackfinSelectionDAGInfo.h
new file mode 100644
index 000000000000..a620330d18d4
--- /dev/null
+++ b/lib/Target/Blackfin/BlackfinSelectionDAGInfo.h
@@ -0,0 +1,29 @@
+//===-- BlackfinSelectionDAGInfo.h - Blackfin SelectionDAG Info -*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the Blackfin subclass for TargetSelectionDAGInfo.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef BLACKFINSELECTIONDAGINFO_H
+#define BLACKFINSELECTIONDAGINFO_H
+
+#include "llvm/Target/TargetSelectionDAGInfo.h"
+
+namespace llvm {
+
+class BlackfinSelectionDAGInfo : public TargetSelectionDAGInfo {
+public:
+  BlackfinSelectionDAGInfo();
+  ~BlackfinSelectionDAGInfo();
+};
+
+}
+
+#endif
diff --git a/lib/Target/Blackfin/BlackfinTargetMachine.h b/lib/Target/Blackfin/BlackfinTargetMachine.h
index a14052bc4db5..07e73944143d 100644
--- a/lib/Target/Blackfin/BlackfinTargetMachine.h
+++ b/lib/Target/Blackfin/BlackfinTargetMachine.h
@@ -43,8 +43,8 @@ namespace llvm {
     virtual const BlackfinRegisterInfo *getRegisterInfo() const {
       return &InstrInfo.getRegisterInfo();
     }
-    virtual BlackfinTargetLowering* getTargetLowering() const {
-      return const_cast<BlackfinTargetLowering*>(&TLInfo);
+    virtual const BlackfinTargetLowering* getTargetLowering() const {
+      return &TLInfo;
     }
     virtual const TargetData *getTargetData() const { return &DataLayout; }
     virtual bool addInstSelector(PassManagerBase &PM,
diff --git a/lib/Target/Blackfin/CMakeLists.txt b/lib/Target/Blackfin/CMakeLists.txt
index deb005d89eb5..f8847d057da6 100644
--- a/lib/Target/Blackfin/CMakeLists.txt
+++ b/lib/Target/Blackfin/CMakeLists.txt
@@ -20,4 +20,5 @@ add_llvm_target(BlackfinCodeGen
   BlackfinRegisterInfo.cpp
   BlackfinSubtarget.cpp
   BlackfinTargetMachine.cpp
+  BlackfinSelectionDAGInfo.cpp
   )
diff --git a/lib/Target/CBackend/CBackend.cpp b/lib/Target/CBackend/CBackend.cpp
index 0c265adf7419..67f513b09860 100644
--- a/lib/Target/CBackend/CBackend.cpp
+++ b/lib/Target/CBackend/CBackend.cpp
@@ -274,8 +274,8 @@ namespace {
     
     // isInlineAsm - Check if the instruction is a call to an inline asm chunk
     static bool isInlineAsm(const Instruction& I) {
-      if (isa<CallInst>(&I) && isa<InlineAsm>(I.getOperand(0)))
-        return true;
+      if (const CallInst *CI = dyn_cast<CallInst>(&I))
+        return isa<InlineAsm>(CI->getCalledValue());
       return false;
     }
     
@@ -473,8 +473,9 @@ void CWriter::printStructReturnPointerFunctionType(raw_ostream &Out,
     PrintedType = true;
   }
   if (FTy->isVarArg()) {
-    if (PrintedType)
-      FunctionInnards << ", ...";
+    if (!PrintedType)
+      FunctionInnards << " int"; //dummy argument for empty vararg functs
+    FunctionInnards << ", ...";
   } else if (!PrintedType) {
     FunctionInnards << "void";
   }
@@ -568,8 +569,9 @@ raw_ostream &CWriter::printType(raw_ostream &Out, const Type *Ty,
       ++Idx;
     }
     if (FTy->isVarArg()) {
-      if (FTy->getNumParams())
-        FunctionInnards << ", ...";
+      if (!FTy->getNumParams())
+        FunctionInnards << " int"; //dummy argument for empty vaarg functs
+      FunctionInnards << ", ...";
     } else if (!FTy->getNumParams()) {
       FunctionInnards << "void";
     }
@@ -1344,7 +1346,7 @@ void CWriter::writeInstComputationInline(Instruction &I) {
         Ty!=Type::getInt16Ty(I.getContext()) &&
         Ty!=Type::getInt32Ty(I.getContext()) &&
         Ty!=Type::getInt64Ty(I.getContext()))) {
-      llvm_report_error("The C backend does not currently support integer "
+      report_fatal_error("The C backend does not currently support integer "
                         "types of widths other than 1, 8, 16, 32, 64.\n"
                         "This is being tracked as PR 4158.");
   }
@@ -2237,12 +2239,16 @@ void CWriter::printFunctionSignature(const Function *F, bool Prototype) {
     }
   }
 
+  if (!PrintedArg && FT->isVarArg()) {
+    FunctionInnards << "int vararg_dummy_arg";
+    PrintedArg = true;
+  }
+
   // Finish printing arguments... if this is a vararg function, print the ...,
   // unless there are no known types, in which case, we just emit ().
   //
   if (FT->isVarArg() && PrintedArg) {
-    if (PrintedArg) FunctionInnards << ", ";
-    FunctionInnards << "...";  // Output varargs portion of signature!
+    FunctionInnards << ",...";  // Output varargs portion of signature!
   } else if (!FT->isVarArg() && !PrintedArg) {
     FunctionInnards << "void"; // ret() -> ret(void) in C.
   }
@@ -2858,7 +2864,7 @@ void CWriter::lowerIntrinsics(Function &F) {
 }
 
 void CWriter::visitCallInst(CallInst &I) {
-  if (isa<InlineAsm>(I.getOperand(0)))
+  if (isa<InlineAsm>(I.getCalledValue()))
     return visitInlineAsm(I);
 
   bool WroteCallee = false;
@@ -2928,6 +2934,12 @@ void CWriter::visitCallInst(CallInst &I) {
 
   Out << '(';
 
+  bool PrintedArg = false;
+  if(FTy->isVarArg() && !FTy->getNumParams()) {
+    Out << "0 /*dummy arg*/";
+    PrintedArg = true;
+  }
+
   unsigned NumDeclaredParams = FTy->getNumParams();
 
   CallSite::arg_iterator AI = I.op_begin()+1, AE = I.op_end();
@@ -2937,7 +2949,7 @@ void CWriter::visitCallInst(CallInst &I) {
     ++ArgNo;
   }
       
-  bool PrintedArg = false;
+
   for (; AI != AE; ++AI, ++ArgNo) {
     if (PrintedArg) Out << ", ";
     if (ArgNo < NumDeclaredParams &&
@@ -2987,15 +2999,10 @@ bool CWriter::visitBuiltinCall(CallInst &I, Intrinsic::ID ID,
     writeOperand(I.getOperand(1));
     Out << ", ";
     // Output the last argument to the enclosing function.
-    if (I.getParent()->getParent()->arg_empty()) {
-      std::string msg;
-      raw_string_ostream Msg(msg);
-      Msg << "The C backend does not currently support zero "
-           << "argument varargs functions, such as '"
-           << I.getParent()->getParent()->getName() << "'!";
-      llvm_report_error(Msg.str());
-    }
-    writeOperand(--I.getParent()->getParent()->arg_end());
+    if (I.getParent()->getParent()->arg_empty())
+      Out << "vararg_dummy_arg";
+    else
+      writeOperand(--I.getParent()->getParent()->arg_end());
     Out << ')';
     return true;
   case Intrinsic::vaend:
@@ -3165,7 +3172,7 @@ static std::string gccifyAsm(std::string asmstr) {
 //TODO: assumptions about what consume arguments from the call are likely wrong
 //      handle communitivity
 void CWriter::visitInlineAsm(CallInst &CI) {
-  InlineAsm* as = cast<InlineAsm>(CI.getOperand(0));
+  InlineAsm* as = cast<InlineAsm>(CI.getCalledValue());
   std::vector<InlineAsm::ConstraintInfo> Constraints = as->ParseConstraints();
   
   std::vector<std::pair<Value*, int> > ResultVals;
diff --git a/lib/Target/CellSPU/AsmPrinter/CMakeLists.txt b/lib/Target/CellSPU/AsmPrinter/CMakeLists.txt
index 1e508fe18908..8a2b59a88a68 100644
--- a/lib/Target/CellSPU/AsmPrinter/CMakeLists.txt
+++ b/lib/Target/CellSPU/AsmPrinter/CMakeLists.txt
@@ -1,9 +1,9 @@
-include_directories(
-  ${CMAKE_CURRENT_BINARY_DIR}/..
-  ${CMAKE_CURRENT_SOURCE_DIR}/..
-  )
-
-add_llvm_library(LLVMCellSPUAsmPrinter
+include_directories(
+  ${CMAKE_CURRENT_BINARY_DIR}/..
+  ${CMAKE_CURRENT_SOURCE_DIR}/..
+  )
+
+add_llvm_library(LLVMCellSPUAsmPrinter
   SPUAsmPrinter.cpp
-  )
-add_dependencies(LLVMCellSPUAsmPrinter CellSPUCodeGenTable_gen)
-\ No newline at end of file
+  )
+add_dependencies(LLVMCellSPUAsmPrinter CellSPUCodeGenTable_gen)
diff --git a/lib/Target/CellSPU/AsmPrinter/SPUAsmPrinter.cpp b/lib/Target/CellSPU/AsmPrinter/SPUAsmPrinter.cpp
index 0ef36e550d03..3e955310b513 100644
--- a/lib/Target/CellSPU/AsmPrinter/SPUAsmPrinter.cpp
+++ b/lib/Target/CellSPU/AsmPrinter/SPUAsmPrinter.cpp
@@ -280,7 +280,7 @@ namespace {
 void SPUAsmPrinter::printOp(const MachineOperand &MO, raw_ostream &O) {
   switch (MO.getType()) {
   case MachineOperand::MO_Immediate:
-    llvm_report_error("printOp() does not handle immediate values");
+    report_fatal_error("printOp() does not handle immediate values");
     return;
 
   case MachineOperand::MO_MachineBasicBlock:
@@ -307,7 +307,7 @@ void SPUAsmPrinter::printOp(const MachineOperand &MO, raw_ostream &O) {
     // External or weakly linked global variables need non-lazily-resolved
     // stubs
     if (TM.getRelocationModel() != Reloc::Static) {
-      GlobalValue *GV = MO.getGlobal();
+      const GlobalValue *GV = MO.getGlobal();
       if (((GV->isDeclaration() || GV->hasWeakLinkage() ||
             GV->hasLinkOnceLinkage() || GV->hasCommonLinkage()))) {
         O << *GetSymbolWithGlobalValueBase(GV, "$non_lazy_ptr");
diff --git a/lib/Target/CellSPU/CMakeLists.txt b/lib/Target/CellSPU/CMakeLists.txt
index 0cb6676d7df7..ddfca37d23e3 100644
--- a/lib/Target/CellSPU/CMakeLists.txt
+++ b/lib/Target/CellSPU/CMakeLists.txt
@@ -21,6 +21,7 @@ add_llvm_target(CellSPUCodeGen
   SPURegisterInfo.cpp
   SPUSubtarget.cpp
   SPUTargetMachine.cpp
+  SPUSelectionDAGInfo.cpp
   )
 
 target_link_libraries (LLVMCellSPUCodeGen LLVMSelectionDAG)
diff --git a/lib/Target/CellSPU/SPUISelDAGToDAG.cpp b/lib/Target/CellSPU/SPUISelDAGToDAG.cpp
index 90f83100cfab..c3c2b3947e06 100644
--- a/lib/Target/CellSPU/SPUISelDAGToDAG.cpp
+++ b/lib/Target/CellSPU/SPUISelDAGToDAG.cpp
@@ -14,7 +14,6 @@
 
 #include "SPU.h"
 #include "SPUTargetMachine.h"
-#include "SPUISelLowering.h"
 #include "SPUHazardRecognizers.h"
 #include "SPUFrameInfo.h"
 #include "SPURegisterNames.h"
@@ -194,11 +193,8 @@ namespace {
 
 #ifndef NDEBUG
     if (retval == 0) {
-      std::string msg;
-      raw_string_ostream Msg(msg);
-      Msg << "SPUISelDAGToDAG.cpp: getValueTypeMapEntry returns NULL for "
-           << VT.getEVTString();
-      llvm_report_error(Msg.str());
+      report_fatal_error("SPUISelDAGToDAG.cpp: getValueTypeMapEntry returns"
+                         "NULL for " + Twine(VT.getEVTString()));
     }
 #endif
 
@@ -242,8 +238,8 @@ namespace {
   class SPUDAGToDAGISel :
     public SelectionDAGISel
   {
-    SPUTargetMachine &TM;
-    SPUTargetLowering &SPUtli;
+    const SPUTargetMachine &TM;
+    const SPUTargetLowering &SPUtli;
     unsigned GlobalBaseReg;
 
   public:
@@ -305,16 +301,15 @@ namespace {
       std::vector<Constant*> CV;
 
       for (size_t i = 0; i < bvNode->getNumOperands(); ++i) {
-        ConstantSDNode *V = dyn_cast<ConstantSDNode > (bvNode->getOperand(i));
+        ConstantSDNode *V = cast<ConstantSDNode > (bvNode->getOperand(i));
         CV.push_back(const_cast<ConstantInt *>(V->getConstantIntValue()));
       }
 
-      Constant *CP = ConstantVector::get(CV);
+      const Constant *CP = ConstantVector::get(CV);
       SDValue CPIdx = CurDAG->getConstantPool(CP, SPUtli.getPointerTy());
       unsigned Alignment = cast<ConstantPoolSDNode>(CPIdx)->getAlignment();
       SDValue CGPoolOffset =
-              SPU::LowerConstantPool(CPIdx, *CurDAG,
-                                     SPUtli.getSPUTargetMachine());
+              SPU::LowerConstantPool(CPIdx, *CurDAG, TM);
       
       HandleSDNode Dummy(CurDAG->getLoad(vecVT, dl,
                                          CurDAG->getEntryNode(), CGPoolOffset,
@@ -433,13 +428,13 @@ SPUDAGToDAGISel::SelectAFormAddr(SDNode *Op, SDValue N, SDValue &Base,
   case ISD::Constant:
   case ISD::ConstantPool:
   case ISD::GlobalAddress:
-    llvm_report_error("SPU SelectAFormAddr: Constant/Pool/Global not lowered.");
+    report_fatal_error("SPU SelectAFormAddr: Constant/Pool/Global not lowered.");
     /*NOTREACHED*/
 
   case ISD::TargetConstant:
   case ISD::TargetGlobalAddress:
   case ISD::TargetJumpTable:
-    llvm_report_error("SPUSelectAFormAddr: Target Constant/Pool/Global "
+    report_fatal_error("SPUSelectAFormAddr: Target Constant/Pool/Global "
                       "not wrapped as A-form address.");
     /*NOTREACHED*/
 
@@ -457,7 +452,7 @@ SPUDAGToDAGISel::SelectAFormAddr(SDNode *Op, SDValue N, SDValue &Base,
 
       case ISD::TargetGlobalAddress: {
         GlobalAddressSDNode *GSDN = cast<GlobalAddressSDNode>(Op0);
-        GlobalValue *GV = GSDN->getGlobal();
+        const GlobalValue *GV = GSDN->getGlobal();
         if (GV->getAlignment() == 16) {
           Base = Op0;
           Index = Zero;
@@ -510,7 +505,7 @@ SPUDAGToDAGISel::DFormAddressPredicate(SDNode *Op, SDValue N, SDValue &Base,
 
   if (Opc == ISD::FrameIndex) {
     // Stack frame index must be less than 512 (divided by 16):
-    FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>(N);
+    FrameIndexSDNode *FIN = cast<FrameIndexSDNode>(N);
     int FI = int(FIN->getIndex());
     DEBUG(errs() << "SelectDFormAddr: ISD::FrameIndex = "
                << FI << "\n");
@@ -531,11 +526,11 @@ SPUDAGToDAGISel::DFormAddressPredicate(SDNode *Op, SDValue N, SDValue &Base,
       return true;
     } else if (Op1.getOpcode() == ISD::Constant
                || Op1.getOpcode() == ISD::TargetConstant) {
-      ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Op1);
+      ConstantSDNode *CN = cast<ConstantSDNode>(Op1);
       int32_t offset = int32_t(CN->getSExtValue());
 
       if (Op0.getOpcode() == ISD::FrameIndex) {
-        FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>(Op0);
+        FrameIndexSDNode *FIN = cast<FrameIndexSDNode>(Op0);
         int FI = int(FIN->getIndex());
         DEBUG(errs() << "SelectDFormAddr: ISD::ADD offset = " << offset
                    << " frame index = " << FI << "\n");
@@ -552,11 +547,11 @@ SPUDAGToDAGISel::DFormAddressPredicate(SDNode *Op, SDValue N, SDValue &Base,
       }
     } else if (Op0.getOpcode() == ISD::Constant
                || Op0.getOpcode() == ISD::TargetConstant) {
-      ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Op0);
+      ConstantSDNode *CN = cast<ConstantSDNode>(Op0);
       int32_t offset = int32_t(CN->getSExtValue());
 
       if (Op1.getOpcode() == ISD::FrameIndex) {
-        FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>(Op1);
+        FrameIndexSDNode *FIN = cast<FrameIndexSDNode>(Op1);
         int FI = int(FIN->getIndex());
         DEBUG(errs() << "SelectDFormAddr: ISD::ADD offset = " << offset
                    << " frame index = " << FI << "\n");
@@ -725,7 +720,7 @@ SPUDAGToDAGISel::Select(SDNode *N) {
 
     switch (Op0VT.getSimpleVT().SimpleTy) {
     default:
-      llvm_report_error("CellSPU Select: Unhandled zero/any extend EVT");
+      report_fatal_error("CellSPU Select: Unhandled zero/any extend EVT");
       /*NOTREACHED*/
     case MVT::i32:
       shufMask = CurDAG->getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32,
@@ -915,11 +910,8 @@ SPUDAGToDAGISel::Select(SDNode *N) {
     const valtype_map_s *vtm = getValueTypeMapEntry(VT);
 
     if (vtm->ldresult_ins == 0) {
-      std::string msg;
-      raw_string_ostream Msg(msg);
-      Msg << "LDRESULT for unsupported type: "
-           << VT.getEVTString();
-      llvm_report_error(Msg.str());
+      report_fatal_error("LDRESULT for unsupported type: " +
+                         Twine(VT.getEVTString()));
     }
 
     Opc = vtm->ldresult_ins;
@@ -1252,7 +1244,7 @@ SDNode *SPUDAGToDAGISel::SelectI64Constant(uint64_t Value64, EVT OpVT,
     return CurDAG->getMachineNode(SPU::ORi64_v2i64, dl, OpVT,
                                   SDValue(emitBuildVector(i64vec.getNode()), 0));
   } else {
-    llvm_report_error("SPUDAGToDAGISel::SelectI64Constant: Unhandled i64vec"
+    report_fatal_error("SPUDAGToDAGISel::SelectI64Constant: Unhandled i64vec"
                       "condition");
   }
 }
diff --git a/lib/Target/CellSPU/SPUISelLowering.cpp b/lib/Target/CellSPU/SPUISelLowering.cpp
index 4b0d4429d28b..5e04454f6a5a 100644
--- a/lib/Target/CellSPU/SPUISelLowering.cpp
+++ b/lib/Target/CellSPU/SPUISelLowering.cpp
@@ -15,6 +15,7 @@
 #include "SPUISelLowering.h"
 #include "SPUTargetMachine.h"
 #include "SPUFrameInfo.h"
+#include "SPUMachineFunction.h"
 #include "llvm/Constants.h"
 #include "llvm/Function.h"
 #include "llvm/Intrinsics.h"
@@ -71,11 +72,8 @@ namespace {
 
 #ifndef NDEBUG
     if (retval == 0) {
-      std::string msg;
-      raw_string_ostream Msg(msg);
-      Msg << "getValueTypeMapEntry returns NULL for "
-           << VT.getEVTString();
-      llvm_report_error(Msg.str());
+      report_fatal_error("getValueTypeMapEntry returns NULL for " +
+                         Twine(VT.getEVTString()));
     }
 #endif
 
@@ -91,7 +89,7 @@ namespace {
 
   SDValue
   ExpandLibCall(RTLIB::Libcall LC, SDValue Op, SelectionDAG &DAG,
-                bool isSigned, SDValue &Hi, SPUTargetLowering &TLI) {
+                bool isSigned, SDValue &Hi, const SPUTargetLowering &TLI) {
     // The input chain to this libcall is the entry node of the function.
     // Legalizing the call will automatically add the previous call to the
     // dependence.
@@ -714,12 +712,9 @@ LowerLOAD(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
   case ISD::POST_DEC:
   case ISD::LAST_INDEXED_MODE:
     {
-      std::string msg;
-      raw_string_ostream Msg(msg);
-      Msg << "LowerLOAD: Got a LoadSDNode with an addr mode other than "
-            "UNINDEXED\n";
-      Msg << (unsigned) LN->getAddressingMode();
-      llvm_report_error(Msg.str());
+      report_fatal_error("LowerLOAD: Got a LoadSDNode with an addr mode other "
+                         "than UNINDEXED\n" +
+                         Twine((unsigned)LN->getAddressingMode()));
       /*NOTREACHED*/
     }
   }
@@ -884,12 +879,9 @@ LowerSTORE(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
   case ISD::POST_DEC:
   case ISD::LAST_INDEXED_MODE:
     {
-      std::string msg;
-      raw_string_ostream Msg(msg);
-      Msg << "LowerLOAD: Got a LoadSDNode with an addr mode other than "
-            "UNINDEXED\n";
-      Msg << (unsigned) SN->getAddressingMode();
-      llvm_report_error(Msg.str());
+      report_fatal_error("LowerLOAD: Got a LoadSDNode with an addr mode other "
+                         "than UNINDEXED\n" +
+                         Twine((unsigned)SN->getAddressingMode()));
       /*NOTREACHED*/
     }
   }
@@ -902,7 +894,7 @@ static SDValue
 LowerConstantPool(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
   EVT PtrVT = Op.getValueType();
   ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(Op);
-  Constant *C = CP->getConstVal();
+  const Constant *C = CP->getConstVal();
   SDValue CPI = DAG.getTargetConstantPool(C, PtrVT, CP->getAlignment());
   SDValue Zero = DAG.getConstant(0, PtrVT);
   const TargetMachine &TM = DAG.getTarget();
@@ -960,7 +952,7 @@ static SDValue
 LowerGlobalAddress(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
   EVT PtrVT = Op.getValueType();
   GlobalAddressSDNode *GSDN = cast<GlobalAddressSDNode>(Op);
-  GlobalValue *GV = GSDN->getGlobal();
+  const GlobalValue *GV = GSDN->getGlobal();
   SDValue GA = DAG.getTargetGlobalAddress(GV, PtrVT, GSDN->getOffset());
   const TargetMachine &TM = DAG.getTarget();
   SDValue Zero = DAG.getConstant(0, PtrVT);
@@ -976,7 +968,7 @@ LowerGlobalAddress(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
       return DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, Hi, Lo);
     }
   } else {
-    llvm_report_error("LowerGlobalAddress: Relocation model other than static"
+    report_fatal_error("LowerGlobalAddress: Relocation model other than static"
                       "not supported.");
     /*NOTREACHED*/
   }
@@ -1013,11 +1005,13 @@ SPUTargetLowering::LowerFormalArguments(SDValue Chain,
                                         const SmallVectorImpl<ISD::InputArg>
                                           &Ins,
                                         DebugLoc dl, SelectionDAG &DAG,
-                                        SmallVectorImpl<SDValue> &InVals) {
+                                        SmallVectorImpl<SDValue> &InVals)
+                                          const {
 
   MachineFunction &MF = DAG.getMachineFunction();
   MachineFrameInfo *MFI = MF.getFrameInfo();
   MachineRegisterInfo &RegInfo = MF.getRegInfo();
+  SPUFunctionInfo *FuncInfo = MF.getInfo<SPUFunctionInfo>();
 
   const unsigned *ArgRegs = SPURegisterInfo::getArgRegs();
   const unsigned NumArgRegs = SPURegisterInfo::getNumArgRegs();
@@ -1038,13 +1032,9 @@ SPUTargetLowering::LowerFormalArguments(SDValue Chain,
       const TargetRegisterClass *ArgRegClass;
 
       switch (ObjectVT.getSimpleVT().SimpleTy) {
-      default: {
-        std::string msg;
-        raw_string_ostream Msg(msg);
-        Msg << "LowerFormalArguments Unhandled argument type: "
-             << ObjectVT.getEVTString();
-        llvm_report_error(Msg.str());
-      }
+      default:
+        report_fatal_error("LowerFormalArguments Unhandled argument type: " +
+                           Twine(ObjectVT.getEVTString()));
       case MVT::i8:
         ArgRegClass = &SPU::R8CRegClass;
         break;
@@ -1104,9 +1094,10 @@ SPUTargetLowering::LowerFormalArguments(SDValue Chain,
     // Create the frame slot
 
     for (; ArgRegIdx != NumArgRegs; ++ArgRegIdx) {
-      VarArgsFrameIndex = MFI->CreateFixedObject(StackSlotSize, ArgOffset,
-                                                 true, false);
-      SDValue FIN = DAG.getFrameIndex(VarArgsFrameIndex, PtrVT);
+      FuncInfo->setVarArgsFrameIndex(
+        MFI->CreateFixedObject(StackSlotSize, ArgOffset,
+                               true, false));
+      SDValue FIN = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT);
       unsigned VReg = MF.addLiveIn(ArgRegs[ArgRegIdx], &SPU::R32CRegClass);
       SDValue ArgVal = DAG.getRegister(VReg, MVT::v16i8);
       SDValue Store = DAG.getStore(Chain, dl, ArgVal, FIN, NULL, 0,
@@ -1146,7 +1137,7 @@ SPUTargetLowering::LowerCall(SDValue Chain, SDValue Callee,
                              const SmallVectorImpl<ISD::OutputArg> &Outs,
                              const SmallVectorImpl<ISD::InputArg> &Ins,
                              DebugLoc dl, SelectionDAG &DAG,
-                             SmallVectorImpl<SDValue> &InVals) {
+                             SmallVectorImpl<SDValue> &InVals) const {
   // CellSPU target does not yet support tail call optimization.
   isTailCall = false;
 
@@ -1255,7 +1246,7 @@ SPUTargetLowering::LowerCall(SDValue Chain, SDValue Callee,
   // direct call is) turn it into a TargetGlobalAddress/TargetExternalSymbol
   // node so that legalize doesn't hack it.
   if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
-    GlobalValue *GV = G->getGlobal();
+    const GlobalValue *GV = G->getGlobal();
     EVT CalleeVT = Callee.getValueType();
     SDValue Zero = DAG.getConstant(0, PtrVT);
     SDValue GA = DAG.getTargetGlobalAddress(GV, CalleeVT);
@@ -1339,22 +1330,12 @@ SPUTargetLowering::LowerCall(SDValue Chain, SDValue Callee,
       InVals.push_back(Chain.getValue(0));
     }
     break;
+  case MVT::i8:
+  case MVT::i16:
   case MVT::i64:
-    Chain = DAG.getCopyFromReg(Chain, dl, SPU::R3, MVT::i64,
-                               InFlag).getValue(1);
-    InVals.push_back(Chain.getValue(0));
-    break;
   case MVT::i128:
-    Chain = DAG.getCopyFromReg(Chain, dl, SPU::R3, MVT::i128,
-                               InFlag).getValue(1);
-    InVals.push_back(Chain.getValue(0));
-    break;
   case MVT::f32:
   case MVT::f64:
-    Chain = DAG.getCopyFromReg(Chain, dl, SPU::R3, Ins[0].VT,
-                               InFlag).getValue(1);
-    InVals.push_back(Chain.getValue(0));
-    break;
   case MVT::v2f64:
   case MVT::v2i64:
   case MVT::v4f32:
@@ -1374,7 +1355,7 @@ SDValue
 SPUTargetLowering::LowerReturn(SDValue Chain,
                                CallingConv::ID CallConv, bool isVarArg,
                                const SmallVectorImpl<ISD::OutputArg> &Outs,
-                               DebugLoc dl, SelectionDAG &DAG) {
+                               DebugLoc dl, SelectionDAG &DAG) const {
 
   SmallVector<CCValAssign, 16> RVLocs;
   CCState CCInfo(CallConv, isVarArg, getTargetMachine(),
@@ -1581,14 +1562,10 @@ LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) {
   uint64_t SplatBits = APSplatBits.getZExtValue();
 
   switch (VT.getSimpleVT().SimpleTy) {
-  default: {
-    std::string msg;
-    raw_string_ostream Msg(msg);
-    Msg << "CellSPU: Unhandled VT in LowerBUILD_VECTOR, VT = "
-         << VT.getEVTString();
-    llvm_report_error(Msg.str());
+  default:
+    report_fatal_error("CellSPU: Unhandled VT in LowerBUILD_VECTOR, VT = " +
+                       Twine(VT.getEVTString()));
     /*NOTREACHED*/
-  }
   case MVT::v4f32: {
     uint32_t Value32 = uint32_t(SplatBits);
     assert(SplatBitSize == 32
@@ -2004,7 +1981,7 @@ static SDValue LowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) {
     // slot 0 across the vector
     EVT VecVT = N.getValueType();
     if (!VecVT.isSimple() || !VecVT.isVector() || !VecVT.is128BitVector()) {
-      llvm_report_error("LowerEXTRACT_VECTOR_ELT: Must have a simple, 128-bit"
+      report_fatal_error("LowerEXTRACT_VECTOR_ELT: Must have a simple, 128-bit"
                         "vector type!");
     }
 
@@ -2032,7 +2009,7 @@ static SDValue LowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) {
 
     switch (VT.getSimpleVT().SimpleTy) {
     default:
-      llvm_report_error("LowerEXTRACT_VECTOR_ELT(varable): Unhandled vector"
+      report_fatal_error("LowerEXTRACT_VECTOR_ELT(varable): Unhandled vector"
                         "type");
       /*NOTREACHED*/
     case MVT::i8: {
@@ -2368,7 +2345,7 @@ static SDValue LowerCTPOP(SDValue Op, SelectionDAG &DAG) {
  All conversions to i64 are expanded to a libcall.
  */
 static SDValue LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG,
-                              SPUTargetLowering &TLI) {
+                              const SPUTargetLowering &TLI) {
   EVT OpVT = Op.getValueType();
   SDValue Op0 = Op.getOperand(0);
   EVT Op0VT = Op0.getValueType();
@@ -2394,7 +2371,7 @@ static SDValue LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG,
  All conversions from i64 are expanded to a libcall.
  */
 static SDValue LowerINT_TO_FP(SDValue Op, SelectionDAG &DAG,
-                              SPUTargetLowering &TLI) {
+                              const SPUTargetLowering &TLI) {
   EVT OpVT = Op.getValueType();
   SDValue Op0 = Op.getOperand(0);
   EVT Op0VT = Op0.getValueType();
@@ -2515,7 +2492,7 @@ static SDValue LowerSETCC(SDValue Op, SelectionDAG &DAG,
   case ISD::SETONE:
     compareOp = ISD::SETNE; break;
   default:
-    llvm_report_error("CellSPU ISel Select: unimplemented f64 condition");
+    report_fatal_error("CellSPU ISel Select: unimplemented f64 condition");
   }
 
   SDValue result =
@@ -2670,7 +2647,7 @@ static SDValue LowerSIGN_EXTEND(SDValue Op, SelectionDAG &DAG)
   lowering of nodes.
  */
 SDValue
-SPUTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG)
+SPUTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const
 {
   unsigned Opc = (unsigned) Op.getOpcode();
   EVT VT = Op.getValueType();
@@ -2766,7 +2743,7 @@ SPUTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG)
 
 void SPUTargetLowering::ReplaceNodeResults(SDNode *N,
                                            SmallVectorImpl<SDValue>&Results,
-                                           SelectionDAG &DAG)
+                                           SelectionDAG &DAG) const
 {
 #if 0
   unsigned Opc = (unsigned) N->getOpcode();
diff --git a/lib/Target/CellSPU/SPUISelLowering.h b/lib/Target/CellSPU/SPUISelLowering.h
index 3c511772680d..9ebd442b43c7 100644
--- a/lib/Target/CellSPU/SPUISelLowering.h
+++ b/lib/Target/CellSPU/SPUISelLowering.h
@@ -109,11 +109,11 @@ namespace llvm {
     virtual MVT::SimpleValueType getSetCCResultType(EVT VT) const;
 
     //! Custom lowering hooks
-    virtual SDValue LowerOperation(SDValue Op, SelectionDAG &DAG);
+    virtual SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const;
 
     //! Custom lowering hook for nodes with illegal result types.
     virtual void ReplaceNodeResults(SDNode *N, SmallVectorImpl<SDValue>&Results,
-                                    SelectionDAG &DAG);
+                                    SelectionDAG &DAG) const;
 
     virtual SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const;
 
@@ -153,7 +153,7 @@ namespace llvm {
                            CallingConv::ID CallConv, bool isVarArg,
                            const SmallVectorImpl<ISD::InputArg> &Ins,
                            DebugLoc dl, SelectionDAG &DAG,
-                           SmallVectorImpl<SDValue> &InVals);
+                           SmallVectorImpl<SDValue> &InVals) const;
 
     virtual SDValue
       LowerCall(SDValue Chain, SDValue Callee,
@@ -162,13 +162,13 @@ namespace llvm {
                 const SmallVectorImpl<ISD::OutputArg> &Outs,
                 const SmallVectorImpl<ISD::InputArg> &Ins,
                 DebugLoc dl, SelectionDAG &DAG,
-                SmallVectorImpl<SDValue> &InVals);
+                SmallVectorImpl<SDValue> &InVals) const;
 
     virtual SDValue
       LowerReturn(SDValue Chain,
                   CallingConv::ID CallConv, bool isVarArg,
                   const SmallVectorImpl<ISD::OutputArg> &Outs,
-                  DebugLoc dl, SelectionDAG &DAG);
+                  DebugLoc dl, SelectionDAG &DAG) const;
   };
 }
 
diff --git a/lib/Target/CellSPU/SPUMCAsmInfo.cpp b/lib/Target/CellSPU/SPUMCAsmInfo.cpp
index 3e17a51b505f..68445cf6bf9d 100644
--- a/lib/Target/CellSPU/SPUMCAsmInfo.cpp
+++ b/lib/Target/CellSPU/SPUMCAsmInfo.cpp
@@ -18,7 +18,6 @@ SPULinuxMCAsmInfo::SPULinuxMCAsmInfo(const Target &T, const StringRef &TT) {
   ZeroDirective = "\t.space\t";
   Data64bitsDirective = "\t.quad\t";
   AlignmentIsInBytes = false;
-  HasLCOMMDirective = true;
       
   PCSymbol = ".";
   CommentString = "#";
diff --git a/lib/Target/CellSPU/SPUMachineFunction.h b/lib/Target/CellSPU/SPUMachineFunction.h
index 6a66967bc050..3ef3ccbcaaee 100644
--- a/lib/Target/CellSPU/SPUMachineFunction.h
+++ b/lib/Target/CellSPU/SPUMachineFunction.h
@@ -26,14 +26,20 @@ private:
   ///
   bool UsesLR;
 
+  // VarArgsFrameIndex - FrameIndex for start of varargs area.
+  int VarArgsFrameIndex;
+
 public:
   SPUFunctionInfo(MachineFunction& MF) 
-  : UsesLR(false)
+  : UsesLR(false),
+    VarArgsFrameIndex(0)
   {}
 
   void setUsesLR(bool U) { UsesLR = U; }
   bool usesLR()          { return UsesLR; }
 
+  int getVarArgsFrameIndex() const { return VarArgsFrameIndex; }
+  void setVarArgsFrameIndex(int Index) { VarArgsFrameIndex = Index; }
 };
 
 } // end of namespace llvm
diff --git a/lib/Target/CellSPU/SPURegisterInfo.cpp b/lib/Target/CellSPU/SPURegisterInfo.cpp
index ffac58182aec..fdbe10f84a7a 100644
--- a/lib/Target/CellSPU/SPURegisterInfo.cpp
+++ b/lib/Target/CellSPU/SPURegisterInfo.cpp
@@ -179,7 +179,7 @@ unsigned SPURegisterInfo::getRegisterNumbering(unsigned RegEnum) {
   case SPU::R126: return 126;
   case SPU::R127: return 127;
   default:
-    llvm_report_error("Unhandled reg in SPURegisterInfo::getRegisterNumbering");
+    report_fatal_error("Unhandled reg in SPURegisterInfo::getRegisterNumbering");
   }
 }
 
@@ -303,7 +303,7 @@ BitVector SPURegisterInfo::getReservedRegs(const MachineFunction &MF) const {
 //
 static bool needsFP(const MachineFunction &MF) {
   const MachineFrameInfo *MFI = MF.getFrameInfo();
-  return NoFramePointerElim || MFI->hasVarSizedObjects();
+  return DisableFramePointerElim(MF) || MFI->hasVarSizedObjects();
 }
 
 //--------------------------------------------------------------------------
@@ -509,10 +509,7 @@ void SPURegisterInfo::emitPrologue(MachineFunction &MF) const
         .addReg(SPU::R2)
         .addReg(SPU::R1);
     } else {
-      std::string msg;
-      raw_string_ostream Msg(msg);
-      Msg << "Unhandled frame size: " << FrameSize;
-      llvm_report_error(Msg.str());
+      report_fatal_error("Unhandled frame size: " + Twine(FrameSize));
     }
 
     if (hasDebugInfo) {
@@ -605,10 +602,7 @@ SPURegisterInfo::emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const
         .addReg(SPU::R2)
         .addReg(SPU::R1);
     } else {
-      std::string msg;
-      raw_string_ostream Msg(msg);
-      Msg << "Unhandled frame size: " << FrameSize;
-      llvm_report_error(Msg.str());
+      report_fatal_error("Unhandled frame size: " + Twine(FrameSize));
     }
    }
 }
diff --git a/lib/Target/CellSPU/SPUSchedule.td b/lib/Target/CellSPU/SPUSchedule.td
index 785dc4660110..a0b581f1632b 100644
--- a/lib/Target/CellSPU/SPUSchedule.td
+++ b/lib/Target/CellSPU/SPUSchedule.td
@@ -36,7 +36,7 @@ def RotateShift  : InstrItinClass;              // EVEN_UNIT
 def ImmLoad      : InstrItinClass;              // EVEN_UNIT
 
 /* Note: The itinerary for the Cell SPU is somewhat contrived... */
-def SPUItineraries : ProcessorItineraries<[
+def SPUItineraries : ProcessorItineraries<[ODD_UNIT, EVEN_UNIT], [
   InstrItinData<LoadStore   , [InstrStage<6,  [ODD_UNIT]>]>,
   InstrItinData<BranchHints , [InstrStage<6,  [ODD_UNIT]>]>,
   InstrItinData<BranchResolv, [InstrStage<4,  [ODD_UNIT]>]>,
diff --git a/lib/Target/CellSPU/SPUSelectionDAGInfo.cpp b/lib/Target/CellSPU/SPUSelectionDAGInfo.cpp
new file mode 100644
index 000000000000..ca2a4bf2199a
--- /dev/null
+++ b/lib/Target/CellSPU/SPUSelectionDAGInfo.cpp
@@ -0,0 +1,22 @@
+//===-- SPUSelectionDAGInfo.cpp - CellSPU SelectionDAG Info ---------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the SPUSelectionDAGInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "cellspu-selectiondag-info"
+#include "SPUSelectionDAGInfo.h"
+using namespace llvm;
+
+SPUSelectionDAGInfo::SPUSelectionDAGInfo() {
+}
+
+SPUSelectionDAGInfo::~SPUSelectionDAGInfo() {
+}
diff --git a/lib/Target/CellSPU/SPUSelectionDAGInfo.h b/lib/Target/CellSPU/SPUSelectionDAGInfo.h
new file mode 100644
index 000000000000..0a6b4c171f18
--- /dev/null
+++ b/lib/Target/CellSPU/SPUSelectionDAGInfo.h
@@ -0,0 +1,29 @@
+//===-- SPUSelectionDAGInfo.h - CellSPU SelectionDAG Info -------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the CellSPU subclass for TargetSelectionDAGInfo.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef CELLSPUSELECTIONDAGINFO_H
+#define CELLSPUSELECTIONDAGINFO_H
+
+#include "llvm/Target/TargetSelectionDAGInfo.h"
+
+namespace llvm {
+
+class SPUSelectionDAGInfo : public TargetSelectionDAGInfo {
+public:
+  SPUSelectionDAGInfo();
+  ~SPUSelectionDAGInfo();
+};
+
+}
+
+#endif
diff --git a/lib/Target/CellSPU/SPUTargetMachine.h b/lib/Target/CellSPU/SPUTargetMachine.h
index 9fdcfe9ab619..37e7cd2b7b3a 100644
--- a/lib/Target/CellSPU/SPUTargetMachine.h
+++ b/lib/Target/CellSPU/SPUTargetMachine.h
@@ -57,8 +57,8 @@ public:
     return NULL;
   }
 
-  virtual       SPUTargetLowering *getTargetLowering() const { 
-   return const_cast<SPUTargetLowering*>(&TLInfo); 
+  virtual const SPUTargetLowering *getTargetLowering() const { 
+   return &TLInfo;
   }
 
   virtual const SPURegisterInfo *getRegisterInfo() const {
diff --git a/lib/Target/CppBackend/CPPBackend.cpp b/lib/Target/CppBackend/CPPBackend.cpp
index 9c5893cec0d0..e739b26bc5c3 100644
--- a/lib/Target/CppBackend/CPPBackend.cpp
+++ b/lib/Target/CppBackend/CPPBackend.cpp
@@ -210,7 +210,7 @@ namespace {
   }
 
   void CppWriter::error(const std::string& msg) {
-    llvm_report_error(msg);
+    report_fatal_error(msg);
   }
 
   // printCFP - Print a floating point constant .. very carefully :)
@@ -1082,8 +1082,9 @@ namespace {
 
     // Before we emit this instruction, we need to take care of generating any
     // forward references. So, we get the names of all the operands in advance
-    std::string* opNames = new std::string[I->getNumOperands()];
-    for (unsigned i = 0; i < I->getNumOperands(); i++) {
+    const unsigned Ops(I->getNumOperands());
+    std::string* opNames = new std::string[Ops];
+    for (unsigned i = 0; i < Ops; i++) {
       opNames[i] = getOpName(I->getOperand(i));
     }
 
@@ -1144,15 +1145,15 @@ namespace {
       const InvokeInst* inv = cast<InvokeInst>(I);
       Out << "std::vector<Value*> " << iName << "_params;";
       nl(Out);
-      for (unsigned i = 3; i < inv->getNumOperands(); ++i) {
+      for (unsigned i = 0; i < inv->getNumOperands() - 3; ++i) {
         Out << iName << "_params.push_back("
             << opNames[i] << ");";
         nl(Out);
       }
       Out << "InvokeInst *" << iName << " = InvokeInst::Create("
-          << opNames[0] << ", "
-          << opNames[1] << ", "
-          << opNames[2] << ", "
+          << opNames[Ops - 3] << ", "
+          << opNames[Ops - 2] << ", "
+          << opNames[Ops - 1] << ", "
           << iName << "_params.begin(), " << iName << "_params.end(), \"";
       printEscapedString(inv->getName());
       Out << "\", " << bbname << ");";
diff --git a/lib/Target/MBlaze/AsmPrinter/CMakeLists.txt b/lib/Target/MBlaze/AsmPrinter/CMakeLists.txt
index cfb2fc8bf950..fac2c1959d7a 100644
--- a/lib/Target/MBlaze/AsmPrinter/CMakeLists.txt
+++ b/lib/Target/MBlaze/AsmPrinter/CMakeLists.txt
@@ -1,9 +1,9 @@
-include_directories(
-  ${CMAKE_CURRENT_BINARY_DIR}/..
-  ${CMAKE_CURRENT_SOURCE_DIR}/..
-  )
-
-add_llvm_library(LLVMMBlazeAsmPrinter
+include_directories(
+  ${CMAKE_CURRENT_BINARY_DIR}/..
+  ${CMAKE_CURRENT_SOURCE_DIR}/..
+  )
+
+add_llvm_library(LLVMMBlazeAsmPrinter
   MBlazeAsmPrinter.cpp
-  )
-add_dependencies(LLVMMBlazeAsmPrinter MBlazeCodeGenTable_gen)
-\ No newline at end of file
+  )
+add_dependencies(LLVMMBlazeAsmPrinter MBlazeCodeGenTable_gen)
diff --git a/lib/Target/MBlaze/AsmPrinter/MBlazeAsmPrinter.cpp b/lib/Target/MBlaze/AsmPrinter/MBlazeAsmPrinter.cpp
index b1df926864e6..04dfb0ae6f9b 100644
--- a/lib/Target/MBlaze/AsmPrinter/MBlazeAsmPrinter.cpp
+++ b/lib/Target/MBlaze/AsmPrinter/MBlazeAsmPrinter.cpp
@@ -268,7 +268,7 @@ void MBlazeAsmPrinter::printOperand(const MachineInstr *MI, int opNum,
 void MBlazeAsmPrinter::printUnsignedImm(const MachineInstr *MI, int opNum,
                                         raw_ostream &O) {
   const MachineOperand &MO = MI->getOperand(opNum);
-  if (MO.getType() == MachineOperand::MO_Immediate)
+  if (MO.isImm())
     O << (unsigned int)MO.getImm();
   else
     printOperand(MI, opNum, O);
@@ -277,7 +277,7 @@ void MBlazeAsmPrinter::printUnsignedImm(const MachineInstr *MI, int opNum,
 void MBlazeAsmPrinter::printFSLImm(const MachineInstr *MI, int opNum,
                                    raw_ostream &O) {
   const MachineOperand &MO = MI->getOperand(opNum);
-  if (MO.getType() == MachineOperand::MO_Immediate)
+  if (MO.isImm())
     O << "rfsl" << (unsigned int)MO.getImm();
   else
     printOperand(MI, opNum, O);
diff --git a/lib/Target/MBlaze/CMakeLists.txt b/lib/Target/MBlaze/CMakeLists.txt
index c93e3dfc7879..7f85bf82518d 100644
--- a/lib/Target/MBlaze/CMakeLists.txt
+++ b/lib/Target/MBlaze/CMakeLists.txt
@@ -22,6 +22,7 @@ add_llvm_target(MBlazeCodeGen
   MBlazeTargetMachine.cpp
   MBlazeTargetObjectFile.cpp
   MBlazeIntrinsicInfo.cpp
+  MBlazeSelectionDAGInfo.cpp
   )
 
 target_link_libraries (LLVMMBlazeCodeGen LLVMSelectionDAG)
diff --git a/lib/Target/MBlaze/MBlazeISelDAGToDAG.cpp b/lib/Target/MBlaze/MBlazeISelDAGToDAG.cpp
index 7e59c4a164df..c7cd5f4e44a9 100644
--- a/lib/Target/MBlaze/MBlazeISelDAGToDAG.cpp
+++ b/lib/Target/MBlaze/MBlazeISelDAGToDAG.cpp
@@ -13,7 +13,6 @@
 
 #define DEBUG_TYPE "mblaze-isel"
 #include "MBlaze.h"
-#include "MBlazeISelLowering.h"
 #include "MBlazeMachineFunction.h"
 #include "MBlazeRegisterInfo.h"
 #include "MBlazeSubtarget.h"
diff --git a/lib/Target/MBlaze/MBlazeISelLowering.cpp b/lib/Target/MBlaze/MBlazeISelLowering.cpp
index f0864d0f4922..23889b128ffe 100644
--- a/lib/Target/MBlaze/MBlazeISelLowering.cpp
+++ b/lib/Target/MBlaze/MBlazeISelLowering.cpp
@@ -185,7 +185,8 @@ unsigned MBlazeTargetLowering::getFunctionAlignment(const Function *) const {
   return 2;
 }
 
-SDValue MBlazeTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) {
+SDValue MBlazeTargetLowering::LowerOperation(SDValue Op,
+                                             SelectionDAG &DAG) const {
   switch (Op.getOpcode())
   {
     case ISD::ConstantPool:       return LowerConstantPool(Op, DAG);
@@ -201,10 +202,9 @@ SDValue MBlazeTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) {
 //===----------------------------------------------------------------------===//
 //  Lower helper functions
 //===----------------------------------------------------------------------===//
-MachineBasicBlock* MBlazeTargetLowering::
-EmitInstrWithCustomInserter(MachineInstr *MI, MachineBasicBlock *BB,
-                            DenseMap<MachineBasicBlock*,
-                            MachineBasicBlock*> *EM) const {
+MachineBasicBlock*
+MBlazeTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
+                                                  MachineBasicBlock *BB) const {
   const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
   DebugLoc dl = MI->getDebugLoc();
 
@@ -254,12 +254,9 @@ EmitInstrWithCustomInserter(MachineInstr *MI, MachineBasicBlock *BB,
 
     // Update machine-CFG edges by first adding all successors of the current
     // block to the new block which will contain the Phi node for the select.
-    // Also inform sdisel of the edge changes.
     for(MachineBasicBlock::succ_iterator i = BB->succ_begin(),
-          e = BB->succ_end(); i != e; ++i) {
-      EM->insert(std::make_pair(*i, finish));
+          e = BB->succ_end(); i != e; ++i)
       finish->addSuccessor(*i);
-    }
 
     // Next, remove all successors of the current block, and add the true
     // and fallthrough blocks as its successors.
@@ -350,12 +347,9 @@ EmitInstrWithCustomInserter(MachineInstr *MI, MachineBasicBlock *BB,
 
     // Update machine-CFG edges by first adding all successors of the current
     // block to the new block which will contain the Phi node for the select.
-    // Also inform sdisel of the edge changes.
     for(MachineBasicBlock::succ_iterator i = BB->succ_begin(),
-          e = BB->succ_end(); i != e; ++i) {
-      EM->insert(std::make_pair(*i, dneBB));
+          e = BB->succ_end(); i != e; ++i)
       dneBB->addSuccessor(*i);
-    }
 
     // Next, remove all successors of the current block, and add the true
     // and fallthrough blocks as its successors.
@@ -387,7 +381,8 @@ EmitInstrWithCustomInserter(MachineInstr *MI, MachineBasicBlock *BB,
 //===----------------------------------------------------------------------===//
 //
 
-SDValue MBlazeTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) {
+SDValue MBlazeTargetLowering::LowerSELECT_CC(SDValue Op,
+                                             SelectionDAG &DAG) const {
   SDValue LHS = Op.getOperand(0);
   SDValue RHS = Op.getOperand(1);
   SDValue TrueVal = Op.getOperand(2);
@@ -409,23 +404,23 @@ SDValue MBlazeTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) {
 }
 
 SDValue MBlazeTargetLowering::
-LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) {
+LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const {
   // FIXME there isn't actually debug info here
   DebugLoc dl = Op.getDebugLoc();
-  GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal();
+  const GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal();
   SDValue GA = DAG.getTargetGlobalAddress(GV, MVT::i32);
 
   return DAG.getNode(MBlazeISD::Wrap, dl, MVT::i32, GA);
 }
 
 SDValue MBlazeTargetLowering::
-LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) {
+LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const {
   llvm_unreachable("TLS not implemented for MicroBlaze.");
   return SDValue(); // Not reached
 }
 
 SDValue MBlazeTargetLowering::
-LowerJumpTable(SDValue Op, SelectionDAG &DAG) {
+LowerJumpTable(SDValue Op, SelectionDAG &DAG) const {
   SDValue ResNode;
   SDValue HiPart;
   // FIXME there isn't actually debug info here
@@ -442,11 +437,11 @@ LowerJumpTable(SDValue Op, SelectionDAG &DAG) {
 }
 
 SDValue MBlazeTargetLowering::
-LowerConstantPool(SDValue Op, SelectionDAG &DAG) {
+LowerConstantPool(SDValue Op, SelectionDAG &DAG) const {
   SDValue ResNode;
   EVT PtrVT = Op.getValueType();
   ConstantPoolSDNode *N = cast<ConstantPoolSDNode>(Op);
-  Constant *C = N->getConstVal();
+  const Constant *C = N->getConstVal();
   SDValue Zero = DAG.getConstant(0, PtrVT);
   DebugLoc dl = Op.getDebugLoc();
 
@@ -455,9 +450,14 @@ LowerConstantPool(SDValue Op, SelectionDAG &DAG) {
   return DAG.getNode(MBlazeISD::Wrap, dl, MVT::i32, CP);
 }
 
-SDValue MBlazeTargetLowering::LowerVASTART(SDValue Op, SelectionDAG &DAG) {
+SDValue MBlazeTargetLowering::LowerVASTART(SDValue Op,
+                                           SelectionDAG &DAG) const {
+  MachineFunction &MF = DAG.getMachineFunction();
+  MBlazeFunctionInfo *FuncInfo = MF.getInfo<MBlazeFunctionInfo>();
+
   DebugLoc dl = Op.getDebugLoc();
-  SDValue FI = DAG.getFrameIndex(VarArgsFrameIndex, getPointerTy());
+  SDValue FI = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(),
+                                 getPointerTy());
 
   // vastart just stores the address of the VarArgsFrameIndex slot into the
   // memory location argument.
@@ -533,7 +533,7 @@ LowerCall(SDValue Chain, SDValue Callee, CallingConv::ID CallConv,
           const SmallVectorImpl<ISD::OutputArg> &Outs,
           const SmallVectorImpl<ISD::InputArg> &Ins,
           DebugLoc dl, SelectionDAG &DAG,
-          SmallVectorImpl<SDValue> &InVals) {
+          SmallVectorImpl<SDValue> &InVals) const {
   // MBlaze does not yet support tail call optimization
   isTailCall = false;
 
@@ -669,7 +669,7 @@ SDValue MBlazeTargetLowering::
 LowerCallResult(SDValue Chain, SDValue InFlag, CallingConv::ID CallConv,
                 bool isVarArg, const SmallVectorImpl<ISD::InputArg> &Ins,
                 DebugLoc dl, SelectionDAG &DAG,
-                SmallVectorImpl<SDValue> &InVals) {
+                SmallVectorImpl<SDValue> &InVals) const {
   // Assign locations to each value returned by this call.
   SmallVector<CCValAssign, 16> RVLocs;
   CCState CCInfo(CallConv, isVarArg, getTargetMachine(),
@@ -699,13 +699,13 @@ SDValue MBlazeTargetLowering::
 LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
                      const SmallVectorImpl<ISD::InputArg> &Ins,
                      DebugLoc dl, SelectionDAG &DAG,
-                     SmallVectorImpl<SDValue> &InVals) {
+                     SmallVectorImpl<SDValue> &InVals) const {
   MachineFunction &MF = DAG.getMachineFunction();
   MachineFrameInfo *MFI = MF.getFrameInfo();
   MBlazeFunctionInfo *MBlazeFI = MF.getInfo<MBlazeFunctionInfo>();
 
   unsigned StackReg = MF.getTarget().getRegisterInfo()->getFrameRegister(MF);
-  VarArgsFrameIndex = 0;
+  MBlazeFI->setVarArgsFrameIndex(0);
 
   // Used with vargs to acumulate store chains.
   std::vector<SDValue> OutChains;
@@ -818,8 +818,8 @@ LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
 
       // Record the frame index of the first variable argument
       // which is a value necessary to VASTART.
-      if (!VarArgsFrameIndex)
-        VarArgsFrameIndex = FI;
+      if (!MBlazeFI->getVarArgsFrameIndex())
+        MBlazeFI->setVarArgsFrameIndex(FI);
     }
   }
 
@@ -841,7 +841,7 @@ LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
 SDValue MBlazeTargetLowering::
 LowerReturn(SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
             const SmallVectorImpl<ISD::OutputArg> &Outs,
-            DebugLoc dl, SelectionDAG &DAG) {
+            DebugLoc dl, SelectionDAG &DAG) const {
   // CCValAssign - represent the assignment of
   // the return value to a location
   SmallVector<CCValAssign, 16> RVLocs;
diff --git a/lib/Target/MBlaze/MBlazeISelLowering.h b/lib/Target/MBlaze/MBlazeISelLowering.h
index f8b147024de4..9f9ac899c6fc 100644
--- a/lib/Target/MBlaze/MBlazeISelLowering.h
+++ b/lib/Target/MBlaze/MBlazeISelLowering.h
@@ -63,14 +63,11 @@ namespace llvm {
   //===--------------------------------------------------------------------===//
 
   class MBlazeTargetLowering : public TargetLowering  {
-    int VarArgsFrameIndex;            // FrameIndex for start of varargs area.
-
   public:
-
     explicit MBlazeTargetLowering(MBlazeTargetMachine &TM);
 
     /// LowerOperation - Provide custom lowering hooks for some operations.
-    virtual SDValue LowerOperation(SDValue Op, SelectionDAG &DAG);
+    virtual SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const;
 
     /// getTargetNodeName - This method returns the name of a target specific
     //  DAG node.
@@ -90,22 +87,22 @@ namespace llvm {
                             CallingConv::ID CallConv, bool isVarArg,
                             const SmallVectorImpl<ISD::InputArg> &Ins,
                             DebugLoc dl, SelectionDAG &DAG,
-                            SmallVectorImpl<SDValue> &InVals);
+                            SmallVectorImpl<SDValue> &InVals) const;
 
     // Lower Operand specifics
-    SDValue LowerConstantPool(SDValue Op, SelectionDAG &DAG);
-    SDValue LowerGlobalAddress(SDValue Op, SelectionDAG &DAG);
-    SDValue LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG);
-    SDValue LowerJumpTable(SDValue Op, SelectionDAG &DAG);
-    SDValue LowerSELECT_CC(SDValue Op, SelectionDAG &DAG);
-    SDValue LowerVASTART(SDValue Op, SelectionDAG &DAG);
+    SDValue LowerConstantPool(SDValue Op, SelectionDAG &DAG) const;
+    SDValue LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const;
+    SDValue LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const;
+    SDValue LowerJumpTable(SDValue Op, SelectionDAG &DAG) const;
+    SDValue LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const;
+    SDValue LowerVASTART(SDValue Op, SelectionDAG &DAG) const;
 
     virtual SDValue
       LowerFormalArguments(SDValue Chain,
                            CallingConv::ID CallConv, bool isVarArg,
                            const SmallVectorImpl<ISD::InputArg> &Ins,
                            DebugLoc dl, SelectionDAG &DAG,
-                           SmallVectorImpl<SDValue> &InVals);
+                           SmallVectorImpl<SDValue> &InVals) const;
 
     virtual SDValue
       LowerCall(SDValue Chain, SDValue Callee,
@@ -114,17 +111,17 @@ namespace llvm {
                 const SmallVectorImpl<ISD::OutputArg> &Outs,
                 const SmallVectorImpl<ISD::InputArg> &Ins,
                 DebugLoc dl, SelectionDAG &DAG,
-                SmallVectorImpl<SDValue> &InVals);
+                SmallVectorImpl<SDValue> &InVals) const;
 
     virtual SDValue
       LowerReturn(SDValue Chain,
                   CallingConv::ID CallConv, bool isVarArg,
                   const SmallVectorImpl<ISD::OutputArg> &Outs,
-                  DebugLoc dl, SelectionDAG &DAG);
+                  DebugLoc dl, SelectionDAG &DAG) const;
 
-    virtual MachineBasicBlock *EmitInstrWithCustomInserter(MachineInstr *MI,
-                                                         MachineBasicBlock *MBB,
-                    DenseMap<MachineBasicBlock*, MachineBasicBlock*> *EM) const;
+    virtual MachineBasicBlock *
+      EmitInstrWithCustomInserter(MachineInstr *MI,
+                                  MachineBasicBlock *MBB) const;
 
     // Inline asm support
     ConstraintType getConstraintType(const std::string &Constraint) const;
diff --git a/lib/Target/MBlaze/MBlazeMachineFunction.h b/lib/Target/MBlaze/MBlazeMachineFunction.h
index 08d4dcad6e3f..1f956c1f90fb 100644
--- a/lib/Target/MBlaze/MBlazeMachineFunction.h
+++ b/lib/Target/MBlaze/MBlazeMachineFunction.h
@@ -79,11 +79,14 @@ private:
   /// relocation models.
   unsigned GlobalBaseReg;
 
+  // VarArgsFrameIndex - FrameIndex for start of varargs area.
+  int VarArgsFrameIndex;
+
 public:
   MBlazeFunctionInfo(MachineFunction& MF) 
   : FPStackOffset(0), RAStackOffset(0), CPUTopSavedRegOff(0), 
     GPHolder(-1,-1), HasLoadArgs(false), HasStoreVarArgs(false),
-    SRetReturnReg(0), GlobalBaseReg(0)
+    SRetReturnReg(0), GlobalBaseReg(0), VarArgsFrameIndex(0)
   {}
 
   int getFPStackOffset() const { return FPStackOffset; }
@@ -129,6 +132,9 @@ public:
 
   unsigned getGlobalBaseReg() const { return GlobalBaseReg; }
   void setGlobalBaseReg(unsigned Reg) { GlobalBaseReg = Reg; }
+
+  int getVarArgsFrameIndex() const { return VarArgsFrameIndex; }
+  void setVarArgsFrameIndex(int Index) { VarArgsFrameIndex = Index; }
 };
 
 } // end of namespace llvm
diff --git a/lib/Target/MBlaze/MBlazeRegisterInfo.cpp b/lib/Target/MBlaze/MBlazeRegisterInfo.cpp
index a12310a2bd32..e15176eb06be 100644
--- a/lib/Target/MBlaze/MBlazeRegisterInfo.cpp
+++ b/lib/Target/MBlaze/MBlazeRegisterInfo.cpp
@@ -243,7 +243,7 @@ void MBlazeRegisterInfo::adjustMBlazeStackFrame(MachineFunction &MF) const {
 // if frame pointer elimination is disabled.
 bool MBlazeRegisterInfo::hasFP(const MachineFunction &MF) const {
   const MachineFrameInfo *MFI = MF.getFrameInfo();
-  return NoFramePointerElim || MFI->hasVarSizedObjects();
+  return DisableFramePointerElim(MF) || MFI->hasVarSizedObjects();
 }
 
 // This function eliminate ADJCALLSTACKDOWN,
diff --git a/lib/Target/MBlaze/MBlazeSchedule.td b/lib/Target/MBlaze/MBlazeSchedule.td
index 6a94491db403..1fec9e694005 100644
--- a/lib/Target/MBlaze/MBlazeSchedule.td
+++ b/lib/Target/MBlaze/MBlazeSchedule.td
@@ -40,7 +40,8 @@ def IIPseudo           : InstrItinClass;
 //===----------------------------------------------------------------------===//
 // MBlaze Generic instruction itineraries.
 //===----------------------------------------------------------------------===//
-def MBlazeGenericItineraries : ProcessorItineraries<[
+def MBlazeGenericItineraries : ProcessorItineraries<
+  [ALU, IMULDIV], [
   InstrItinData<IIAlu              , [InstrStage<1,  [ALU]>]>,
   InstrItinData<IILoad             , [InstrStage<3,  [ALU]>]>,
   InstrItinData<IIStore            , [InstrStage<1,  [ALU]>]>,
diff --git a/lib/Target/MBlaze/MBlazeSelectionDAGInfo.cpp b/lib/Target/MBlaze/MBlazeSelectionDAGInfo.cpp
new file mode 100644
index 000000000000..105e42a639da
--- /dev/null
+++ b/lib/Target/MBlaze/MBlazeSelectionDAGInfo.cpp
@@ -0,0 +1,22 @@
+//===-- MBlazeSelectionDAGInfo.cpp - MBlaze SelectionDAG Info -------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the MBlazeSelectionDAGInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "mblaze-selectiondag-info"
+#include "MBlazeSelectionDAGInfo.h"
+using namespace llvm;
+
+MBlazeSelectionDAGInfo::MBlazeSelectionDAGInfo() {
+}
+
+MBlazeSelectionDAGInfo::~MBlazeSelectionDAGInfo() {
+}
diff --git a/lib/Target/MBlaze/MBlazeSelectionDAGInfo.h b/lib/Target/MBlaze/MBlazeSelectionDAGInfo.h
new file mode 100644
index 000000000000..11e6879911e5
--- /dev/null
+++ b/lib/Target/MBlaze/MBlazeSelectionDAGInfo.h
@@ -0,0 +1,29 @@
+//===-- MBlazeSelectionDAGInfo.h - MBlaze SelectionDAG Info -----*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the MBlaze subclass for TargetSelectionDAGInfo.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef MBLAZESELECTIONDAGINFO_H
+#define MBLAZESELECTIONDAGINFO_H
+
+#include "llvm/Target/TargetSelectionDAGInfo.h"
+
+namespace llvm {
+
+class MBlazeSelectionDAGInfo : public TargetSelectionDAGInfo {
+public:
+  MBlazeSelectionDAGInfo();
+  ~MBlazeSelectionDAGInfo();
+};
+
+}
+
+#endif
diff --git a/lib/Target/MBlaze/MBlazeTargetMachine.h b/lib/Target/MBlaze/MBlazeTargetMachine.h
index 85c975cae95c..9bf989849c4d 100644
--- a/lib/Target/MBlaze/MBlazeTargetMachine.h
+++ b/lib/Target/MBlaze/MBlazeTargetMachine.h
@@ -51,8 +51,8 @@ namespace llvm {
     virtual const MBlazeRegisterInfo *getRegisterInfo() const
     { return &InstrInfo.getRegisterInfo(); }
 
-    virtual MBlazeTargetLowering   *getTargetLowering() const
-    { return const_cast<MBlazeTargetLowering*>(&TLInfo); }
+    virtual const MBlazeTargetLowering *getTargetLowering() const
+    { return &TLInfo; }
 
     const TargetIntrinsicInfo *getIntrinsicInfo() const
     { return &IntrinsicInfo; }
diff --git a/lib/Target/MBlaze/MBlazeTargetObjectFile.cpp b/lib/Target/MBlaze/MBlazeTargetObjectFile.cpp
index 79c9494c4d4b..05c01ef7a5d9 100644
--- a/lib/Target/MBlaze/MBlazeTargetObjectFile.cpp
+++ b/lib/Target/MBlaze/MBlazeTargetObjectFile.cpp
@@ -11,6 +11,7 @@
 #include "MBlazeSubtarget.h"
 #include "llvm/DerivedTypes.h"
 #include "llvm/GlobalVariable.h"
+#include "llvm/MC/MCContext.h"
 #include "llvm/MC/MCSectionELF.h"
 #include "llvm/Target/TargetData.h"
 #include "llvm/Target/TargetMachine.h"
@@ -22,14 +23,14 @@ Initialize(MCContext &Ctx, const TargetMachine &TM) {
   TargetLoweringObjectFileELF::Initialize(Ctx, TM);
 
   SmallDataSection =
-    getELFSection(".sdata", MCSectionELF::SHT_PROGBITS,
-                  MCSectionELF::SHF_WRITE | MCSectionELF::SHF_ALLOC,
-                  SectionKind::getDataRel());
+    getContext().getELFSection(".sdata", MCSectionELF::SHT_PROGBITS,
+                               MCSectionELF::SHF_WRITE |MCSectionELF::SHF_ALLOC,
+                               SectionKind::getDataRel());
 
   SmallBSSSection =
-    getELFSection(".sbss", MCSectionELF::SHT_NOBITS,
-                  MCSectionELF::SHF_WRITE | MCSectionELF::SHF_ALLOC,
-                  SectionKind::getBSS());
+    getContext().getELFSection(".sbss", MCSectionELF::SHT_NOBITS,
+                               MCSectionELF::SHF_WRITE |MCSectionELF::SHF_ALLOC,
+                               SectionKind::getBSS());
 
 }
 
diff --git a/lib/Target/MSP430/AsmPrinter/MSP430MCInstLower.cpp b/lib/Target/MSP430/AsmPrinter/MSP430MCInstLower.cpp
index f4d7d8a52886..d1d9a1158635 100644
--- a/lib/Target/MSP430/AsmPrinter/MSP430MCInstLower.cpp
+++ b/lib/Target/MSP430/AsmPrinter/MSP430MCInstLower.cpp
@@ -78,6 +78,16 @@ GetConstantPoolIndexSymbol(const MachineOperand &MO) const {
   return Ctx.GetOrCreateSymbol(Name.str());
 }
 
+MCSymbol *MSP430MCInstLower::
+GetBlockAddressSymbol(const MachineOperand &MO) const {
+  switch (MO.getTargetFlags()) {
+  default: assert(0 && "Unknown target flag on GV operand");
+  case 0: break;
+  }
+
+  return Printer.GetBlockAddressSymbol(MO.getBlockAddress());
+}
+
 MCOperand MSP430MCInstLower::
 LowerSymbolOperand(const MachineOperand &MO, MCSymbol *Sym) const {
   // FIXME: We would like an efficient form for this, so we don't have to do a
@@ -131,6 +141,8 @@ void MSP430MCInstLower::Lower(const MachineInstr *MI, MCInst &OutMI) const {
     case MachineOperand::MO_ConstantPoolIndex:
       MCOp = LowerSymbolOperand(MO, GetConstantPoolIndexSymbol(MO));
       break;
+    case MachineOperand::MO_BlockAddress:
+      MCOp = LowerSymbolOperand(MO, GetBlockAddressSymbol(MO));
     }
 
     OutMI.addOperand(MCOp);
diff --git a/lib/Target/MSP430/AsmPrinter/MSP430MCInstLower.h b/lib/Target/MSP430/AsmPrinter/MSP430MCInstLower.h
index a2b99ae83c01..f9620e8ccfd2 100644
--- a/lib/Target/MSP430/AsmPrinter/MSP430MCInstLower.h
+++ b/lib/Target/MSP430/AsmPrinter/MSP430MCInstLower.h
@@ -42,6 +42,7 @@ public:
   MCSymbol *GetExternalSymbolSymbol(const MachineOperand &MO) const;
   MCSymbol *GetJumpTableSymbol(const MachineOperand &MO) const;
   MCSymbol *GetConstantPoolIndexSymbol(const MachineOperand &MO) const;
+  MCSymbol *GetBlockAddressSymbol(const MachineOperand &MO) const;
 };
 
 }
diff --git a/lib/Target/MSP430/CMakeLists.txt b/lib/Target/MSP430/CMakeLists.txt
index 29abe46c880a..a3f60d2a44f1 100644
--- a/lib/Target/MSP430/CMakeLists.txt
+++ b/lib/Target/MSP430/CMakeLists.txt
@@ -19,6 +19,7 @@ add_llvm_target(MSP430CodeGen
   MSP430RegisterInfo.cpp
   MSP430Subtarget.cpp
   MSP430TargetMachine.cpp
+  MSP430SelectionDAGInfo.cpp
   )
 
 target_link_libraries (LLVMMSP430CodeGen LLVMSelectionDAG)
diff --git a/lib/Target/MSP430/MSP430BranchSelector.cpp b/lib/Target/MSP430/MSP430BranchSelector.cpp
index 836e4250abae..68cb342b08f4 100644
--- a/lib/Target/MSP430/MSP430BranchSelector.cpp
+++ b/lib/Target/MSP430/MSP430BranchSelector.cpp
@@ -157,7 +157,7 @@ bool MSP430BSel::runOnMachineFunction(MachineFunction &Fn) {
           NewSize = 6;
         }
         // Uncond branch to the real destination.
-        I = BuildMI(MBB, I, dl, TII->get(MSP430::B)).addMBB(Dest);
+        I = BuildMI(MBB, I, dl, TII->get(MSP430::Bi)).addMBB(Dest);
 
         // Remove the old branch from the function.
         OldBranch->eraseFromParent();
diff --git a/lib/Target/MSP430/MSP430ISelDAGToDAG.cpp b/lib/Target/MSP430/MSP430ISelDAGToDAG.cpp
index 911cfcbe6d96..7b328bb12c3d 100644
--- a/lib/Target/MSP430/MSP430ISelDAGToDAG.cpp
+++ b/lib/Target/MSP430/MSP430ISelDAGToDAG.cpp
@@ -12,7 +12,6 @@
 //===----------------------------------------------------------------------===//
 
 #include "MSP430.h"
-#include "MSP430ISelLowering.h"
 #include "MSP430TargetMachine.h"
 #include "llvm/DerivedTypes.h"
 #include "llvm/Function.h"
@@ -45,9 +44,9 @@ namespace {
     } Base;
 
     int16_t Disp;
-    GlobalValue *GV;
-    Constant *CP;
-    BlockAddress *BlockAddr;
+    const GlobalValue *GV;
+    const Constant *CP;
+    const BlockAddress *BlockAddr;
     const char *ES;
     int JT;
     unsigned Align;    // CP alignment.
@@ -100,7 +99,7 @@ namespace {
 ///
 namespace {
   class MSP430DAGToDAGISel : public SelectionDAGISel {
-    MSP430TargetLowering &Lowering;
+    const MSP430TargetLowering &Lowering;
     const MSP430Subtarget &Subtarget;
 
   public:
@@ -364,7 +363,7 @@ SDNode *MSP430DAGToDAGISel::SelectIndexedBinOp(SDNode *Op,
                                                unsigned Opc8, unsigned Opc16) {
   if (N1.getOpcode() == ISD::LOAD &&
       N1.hasOneUse() &&
-      IsLegalToFold(N1, Op, Op)) {
+      IsLegalToFold(N1, Op, Op, OptLevel)) {
     LoadSDNode *LD = cast<LoadSDNode>(N1);
     if (!isValidIndexedLoad(LD))
       return NULL;
diff --git a/lib/Target/MSP430/MSP430ISelLowering.cpp b/lib/Target/MSP430/MSP430ISelLowering.cpp
index e6c7e1ecd813..c3e2bdf7b46f 100644
--- a/lib/Target/MSP430/MSP430ISelLowering.cpp
+++ b/lib/Target/MSP430/MSP430ISelLowering.cpp
@@ -110,8 +110,8 @@ MSP430TargetLowering::MSP430TargetLowering(MSP430TargetMachine &tm) :
   setOperationAction(ISD::ROTR,             MVT::i16,   Expand);
   setOperationAction(ISD::GlobalAddress,    MVT::i16,   Custom);
   setOperationAction(ISD::ExternalSymbol,   MVT::i16,   Custom);
+  setOperationAction(ISD::BlockAddress,     MVT::i16,   Custom);
   setOperationAction(ISD::BR_JT,            MVT::Other, Expand);
-  setOperationAction(ISD::BRIND,            MVT::Other, Expand);
   setOperationAction(ISD::BR_CC,            MVT::i8,    Custom);
   setOperationAction(ISD::BR_CC,            MVT::i16,   Custom);
   setOperationAction(ISD::BRCOND,           MVT::Other, Expand);
@@ -176,12 +176,14 @@ MSP430TargetLowering::MSP430TargetLowering(MSP430TargetMachine &tm) :
   }
 }
 
-SDValue MSP430TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) {
+SDValue MSP430TargetLowering::LowerOperation(SDValue Op,
+                                             SelectionDAG &DAG) const {
   switch (Op.getOpcode()) {
   case ISD::SHL: // FALLTHROUGH
   case ISD::SRL:
   case ISD::SRA:              return LowerShifts(Op, DAG);
   case ISD::GlobalAddress:    return LowerGlobalAddress(Op, DAG);
+  case ISD::BlockAddress:     return LowerBlockAddress(Op, DAG);
   case ISD::ExternalSymbol:   return LowerExternalSymbol(Op, DAG);
   case ISD::SETCC:            return LowerSETCC(Op, DAG);
   case ISD::BR_CC:            return LowerBR_CC(Op, DAG);
@@ -252,7 +254,8 @@ MSP430TargetLowering::LowerFormalArguments(SDValue Chain,
                                              &Ins,
                                            DebugLoc dl,
                                            SelectionDAG &DAG,
-                                           SmallVectorImpl<SDValue> &InVals) {
+                                           SmallVectorImpl<SDValue> &InVals)
+                                             const {
 
   switch (CallConv) {
   default:
@@ -264,7 +267,7 @@ MSP430TargetLowering::LowerFormalArguments(SDValue Chain,
    if (Ins.empty())
      return Chain;
    else {
-    llvm_report_error("ISRs cannot have arguments");
+    report_fatal_error("ISRs cannot have arguments");
     return SDValue();
    }
   }
@@ -277,7 +280,7 @@ MSP430TargetLowering::LowerCall(SDValue Chain, SDValue Callee,
                                 const SmallVectorImpl<ISD::OutputArg> &Outs,
                                 const SmallVectorImpl<ISD::InputArg> &Ins,
                                 DebugLoc dl, SelectionDAG &DAG,
-                                SmallVectorImpl<SDValue> &InVals) {
+                                SmallVectorImpl<SDValue> &InVals) const {
   // MSP430 target does not yet support tail call optimization.
   isTailCall = false;
 
@@ -289,7 +292,7 @@ MSP430TargetLowering::LowerCall(SDValue Chain, SDValue Callee,
     return LowerCCCCallTo(Chain, Callee, CallConv, isVarArg, isTailCall,
                           Outs, Ins, dl, DAG, InVals);
   case CallingConv::MSP430_INTR:
-    llvm_report_error("ISRs cannot be called directly");
+    report_fatal_error("ISRs cannot be called directly");
     return SDValue();
   }
 }
@@ -306,7 +309,8 @@ MSP430TargetLowering::LowerCCCArguments(SDValue Chain,
                                           &Ins,
                                         DebugLoc dl,
                                         SelectionDAG &DAG,
-                                        SmallVectorImpl<SDValue> &InVals) {
+                                        SmallVectorImpl<SDValue> &InVals)
+                                          const {
   MachineFunction &MF = DAG.getMachineFunction();
   MachineFrameInfo *MFI = MF.getFrameInfo();
   MachineRegisterInfo &RegInfo = MF.getRegInfo();
@@ -383,14 +387,14 @@ SDValue
 MSP430TargetLowering::LowerReturn(SDValue Chain,
                                   CallingConv::ID CallConv, bool isVarArg,
                                   const SmallVectorImpl<ISD::OutputArg> &Outs,
-                                  DebugLoc dl, SelectionDAG &DAG) {
+                                  DebugLoc dl, SelectionDAG &DAG) const {
 
   // CCValAssign - represent the assignment of the return value to a location
   SmallVector<CCValAssign, 16> RVLocs;
 
   // ISRs cannot return any value.
   if (CallConv == CallingConv::MSP430_INTR && !Outs.empty()) {
-    llvm_report_error("ISRs cannot return any value");
+    report_fatal_error("ISRs cannot return any value");
     return SDValue();
   }
 
@@ -445,7 +449,7 @@ MSP430TargetLowering::LowerCCCCallTo(SDValue Chain, SDValue Callee,
                                        &Outs,
                                      const SmallVectorImpl<ISD::InputArg> &Ins,
                                      DebugLoc dl, SelectionDAG &DAG,
-                                     SmallVectorImpl<SDValue> &InVals) {
+                                     SmallVectorImpl<SDValue> &InVals) const {
   // Analyze operands of the call, assigning locations to each operand.
   SmallVector<CCValAssign, 16> ArgLocs;
   CCState CCInfo(CallConv, isVarArg, getTargetMachine(),
@@ -568,7 +572,7 @@ MSP430TargetLowering::LowerCallResult(SDValue Chain, SDValue InFlag,
                                       CallingConv::ID CallConv, bool isVarArg,
                                       const SmallVectorImpl<ISD::InputArg> &Ins,
                                       DebugLoc dl, SelectionDAG &DAG,
-                                      SmallVectorImpl<SDValue> &InVals) {
+                                      SmallVectorImpl<SDValue> &InVals) const {
 
   // Assign locations to each value returned by this call.
   SmallVector<CCValAssign, 16> RVLocs;
@@ -589,7 +593,7 @@ MSP430TargetLowering::LowerCallResult(SDValue Chain, SDValue InFlag,
 }
 
 SDValue MSP430TargetLowering::LowerShifts(SDValue Op,
-                                          SelectionDAG &DAG) {
+                                          SelectionDAG &DAG) const {
   unsigned Opc = Op.getOpcode();
   SDNode* N = Op.getNode();
   EVT VT = Op.getValueType();
@@ -632,7 +636,8 @@ SDValue MSP430TargetLowering::LowerShifts(SDValue Op,
   return Victim;
 }
 
-SDValue MSP430TargetLowering::LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) {
+SDValue MSP430TargetLowering::LowerGlobalAddress(SDValue Op,
+                                                 SelectionDAG &DAG) const {
   const GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal();
   int64_t Offset = cast<GlobalAddressSDNode>(Op)->getOffset();
 
@@ -643,7 +648,7 @@ SDValue MSP430TargetLowering::LowerGlobalAddress(SDValue Op, SelectionDAG &DAG)
 }
 
 SDValue MSP430TargetLowering::LowerExternalSymbol(SDValue Op,
-                                                  SelectionDAG &DAG) {
+                                                  SelectionDAG &DAG) const {
   DebugLoc dl = Op.getDebugLoc();
   const char *Sym = cast<ExternalSymbolSDNode>(Op)->getSymbol();
   SDValue Result = DAG.getTargetExternalSymbol(Sym, getPointerTy());
@@ -651,6 +656,15 @@ SDValue MSP430TargetLowering::LowerExternalSymbol(SDValue Op,
   return DAG.getNode(MSP430ISD::Wrapper, dl, getPointerTy(), Result);;
 }
 
+SDValue MSP430TargetLowering::LowerBlockAddress(SDValue Op,
+                                                SelectionDAG &DAG) const {
+  DebugLoc dl = Op.getDebugLoc();
+  const BlockAddress *BA = cast<BlockAddressSDNode>(Op)->getBlockAddress();
+  SDValue Result = DAG.getBlockAddress(BA, getPointerTy(), /*isTarget=*/true);
+
+  return DAG.getNode(MSP430ISD::Wrapper, dl, getPointerTy(), Result);;
+}
+
 static SDValue EmitCMP(SDValue &LHS, SDValue &RHS, SDValue &TargetCC,
                        ISD::CondCode CC,
                        DebugLoc dl, SelectionDAG &DAG) {
@@ -734,7 +748,7 @@ static SDValue EmitCMP(SDValue &LHS, SDValue &RHS, SDValue &TargetCC,
 }
 
 
-SDValue MSP430TargetLowering::LowerBR_CC(SDValue Op, SelectionDAG &DAG) {
+SDValue MSP430TargetLowering::LowerBR_CC(SDValue Op, SelectionDAG &DAG) const {
   SDValue Chain = Op.getOperand(0);
   ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(1))->get();
   SDValue LHS   = Op.getOperand(2);
@@ -749,8 +763,7 @@ SDValue MSP430TargetLowering::LowerBR_CC(SDValue Op, SelectionDAG &DAG) {
                      Chain, Dest, TargetCC, Flag);
 }
 
-
-SDValue MSP430TargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) {
+SDValue MSP430TargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const {
   SDValue LHS   = Op.getOperand(0);
   SDValue RHS   = Op.getOperand(1);
   DebugLoc dl   = Op.getDebugLoc();
@@ -830,7 +843,8 @@ SDValue MSP430TargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) {
   }
 }
 
-SDValue MSP430TargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) {
+SDValue MSP430TargetLowering::LowerSELECT_CC(SDValue Op,
+                                             SelectionDAG &DAG) const {
   SDValue LHS    = Op.getOperand(0);
   SDValue RHS    = Op.getOperand(1);
   SDValue TrueV  = Op.getOperand(2);
@@ -852,7 +866,7 @@ SDValue MSP430TargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) {
 }
 
 SDValue MSP430TargetLowering::LowerSIGN_EXTEND(SDValue Op,
-                                               SelectionDAG &DAG) {
+                                               SelectionDAG &DAG) const {
   SDValue Val = Op.getOperand(0);
   EVT VT      = Op.getValueType();
   DebugLoc dl = Op.getDebugLoc();
@@ -864,7 +878,8 @@ SDValue MSP430TargetLowering::LowerSIGN_EXTEND(SDValue Op,
                      DAG.getValueType(Val.getValueType()));
 }
 
-SDValue MSP430TargetLowering::getReturnAddressFrameIndex(SelectionDAG &DAG) {
+SDValue
+MSP430TargetLowering::getReturnAddressFrameIndex(SelectionDAG &DAG) const {
   MachineFunction &MF = DAG.getMachineFunction();
   MSP430MachineFunctionInfo *FuncInfo = MF.getInfo<MSP430MachineFunctionInfo>();
   int ReturnAddrIndex = FuncInfo->getRAIndex();
@@ -880,7 +895,8 @@ SDValue MSP430TargetLowering::getReturnAddressFrameIndex(SelectionDAG &DAG) {
   return DAG.getFrameIndex(ReturnAddrIndex, getPointerTy());
 }
 
-SDValue MSP430TargetLowering::LowerRETURNADDR(SDValue Op, SelectionDAG &DAG) {
+SDValue MSP430TargetLowering::LowerRETURNADDR(SDValue Op,
+                                              SelectionDAG &DAG) const {
   unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
   DebugLoc dl = Op.getDebugLoc();
 
@@ -900,7 +916,8 @@ SDValue MSP430TargetLowering::LowerRETURNADDR(SDValue Op, SelectionDAG &DAG) {
                      RetAddrFI, NULL, 0, false, false, 0);
 }
 
-SDValue MSP430TargetLowering::LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) {
+SDValue MSP430TargetLowering::LowerFRAMEADDR(SDValue Op,
+                                             SelectionDAG &DAG) const {
   MachineFrameInfo *MFI = DAG.getMachineFunction().getFrameInfo();
   MFI->setFrameAddressIsTaken(true);
   EVT VT = Op.getValueType();
@@ -999,8 +1016,7 @@ bool MSP430TargetLowering::isZExtFree(EVT VT1, EVT VT2) const {
 
 MachineBasicBlock*
 MSP430TargetLowering::EmitShiftInstr(MachineInstr *MI,
-                                     MachineBasicBlock *BB,
-                   DenseMap<MachineBasicBlock*, MachineBasicBlock*> *EM) const {
+                                     MachineBasicBlock *BB) const {
   MachineFunction *F = BB->getParent();
   MachineRegisterInfo &RI = F->getRegInfo();
   DebugLoc dl = MI->getDebugLoc();
@@ -1052,11 +1068,6 @@ MSP430TargetLowering::EmitShiftInstr(MachineInstr *MI,
   // block to the block containing instructions after shift.
   RemBB->transferSuccessors(BB);
 
-  // Inform sdisel of the edge changes.
-  for (MachineBasicBlock::succ_iterator SI = BB->succ_begin(),
-         SE = BB->succ_end(); SI != SE; ++SI)
-    EM->insert(std::make_pair(*SI, RemBB));
-
   // Add adges BB => LoopBB => RemBB, BB => RemBB, LoopBB => LoopBB
   BB->addSuccessor(LoopBB);
   BB->addSuccessor(RemBB);
@@ -1111,14 +1122,13 @@ MSP430TargetLowering::EmitShiftInstr(MachineInstr *MI,
 
 MachineBasicBlock*
 MSP430TargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
-                                                  MachineBasicBlock *BB,
-                   DenseMap<MachineBasicBlock*, MachineBasicBlock*> *EM) const {
+                                                  MachineBasicBlock *BB) const {
   unsigned Opc = MI->getOpcode();
 
   if (Opc == MSP430::Shl8 || Opc == MSP430::Shl16 ||
       Opc == MSP430::Sra8 || Opc == MSP430::Sra16 ||
       Opc == MSP430::Srl8 || Opc == MSP430::Srl16)
-    return EmitShiftInstr(MI, BB, EM);
+    return EmitShiftInstr(MI, BB);
 
   const TargetInstrInfo &TII = *getTargetMachine().getInstrInfo();
   DebugLoc dl = MI->getDebugLoc();
@@ -1149,10 +1159,6 @@ MSP430TargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
     .addImm(MI->getOperand(3).getImm());
   F->insert(I, copy0MBB);
   F->insert(I, copy1MBB);
-  // Inform sdisel of the edge changes.
-  for (MachineBasicBlock::succ_iterator SI = BB->succ_begin(), 
-         SE = BB->succ_end(); SI != SE; ++SI)
-    EM->insert(std::make_pair(*SI, copy1MBB));
   // Update machine-CFG edges by transferring all successors of the current
   // block to the new block which will contain the Phi node for the select.
   copy1MBB->transferSuccessors(BB);
diff --git a/lib/Target/MSP430/MSP430ISelLowering.h b/lib/Target/MSP430/MSP430ISelLowering.h
index 87a790b047b7..01c5071622a7 100644
--- a/lib/Target/MSP430/MSP430ISelLowering.h
+++ b/lib/Target/MSP430/MSP430ISelLowering.h
@@ -74,7 +74,7 @@ namespace llvm {
     explicit MSP430TargetLowering(MSP430TargetMachine &TM);
 
     /// LowerOperation - Provide custom lowering hooks for some operations.
-    virtual SDValue LowerOperation(SDValue Op, SelectionDAG &DAG);
+    virtual SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const;
 
     /// getTargetNodeName - This method returns the name of a target specific
     /// DAG node.
@@ -83,16 +83,17 @@ namespace llvm {
     /// getFunctionAlignment - Return the Log2 alignment of this function.
     virtual unsigned getFunctionAlignment(const Function *F) const;
 
-    SDValue LowerShifts(SDValue Op, SelectionDAG &DAG);
-    SDValue LowerGlobalAddress(SDValue Op, SelectionDAG &DAG);
-    SDValue LowerExternalSymbol(SDValue Op, SelectionDAG &DAG);
-    SDValue LowerBR_CC(SDValue Op, SelectionDAG &DAG);
-    SDValue LowerSETCC(SDValue Op, SelectionDAG &DAG);
-    SDValue LowerSELECT_CC(SDValue Op, SelectionDAG &DAG);
-    SDValue LowerSIGN_EXTEND(SDValue Op, SelectionDAG &DAG);
-    SDValue LowerRETURNADDR(SDValue Op, SelectionDAG &DAG);
-    SDValue LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG);
-    SDValue getReturnAddressFrameIndex(SelectionDAG &DAG);
+    SDValue LowerShifts(SDValue Op, SelectionDAG &DAG) const;
+    SDValue LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const;
+    SDValue LowerBlockAddress(SDValue Op, SelectionDAG &DAG) const;
+    SDValue LowerExternalSymbol(SDValue Op, SelectionDAG &DAG) const;
+    SDValue LowerBR_CC(SDValue Op, SelectionDAG &DAG) const;
+    SDValue LowerSETCC(SDValue Op, SelectionDAG &DAG) const;
+    SDValue LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const;
+    SDValue LowerSIGN_EXTEND(SDValue Op, SelectionDAG &DAG) const;
+    SDValue LowerRETURNADDR(SDValue Op, SelectionDAG &DAG) const;
+    SDValue LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const;
+    SDValue getReturnAddressFrameIndex(SelectionDAG &DAG) const;
 
     TargetLowering::ConstraintType
     getConstraintType(const std::string &Constraint) const;
@@ -117,11 +118,9 @@ namespace llvm {
     virtual bool isZExtFree(EVT VT1, EVT VT2) const;
 
     MachineBasicBlock* EmitInstrWithCustomInserter(MachineInstr *MI,
-                                                   MachineBasicBlock *BB,
-                    DenseMap<MachineBasicBlock*, MachineBasicBlock*> *EM) const;
+                                                   MachineBasicBlock *BB) const;
     MachineBasicBlock* EmitShiftInstr(MachineInstr *MI,
-                                      MachineBasicBlock *BB,
-                    DenseMap<MachineBasicBlock*, MachineBasicBlock*> *EM) const;
+                                      MachineBasicBlock *BB) const;
 
   private:
     SDValue LowerCCCCallTo(SDValue Chain, SDValue Callee,
@@ -130,7 +129,7 @@ namespace llvm {
                            const SmallVectorImpl<ISD::OutputArg> &Outs,
                            const SmallVectorImpl<ISD::InputArg> &Ins,
                            DebugLoc dl, SelectionDAG &DAG,
-                           SmallVectorImpl<SDValue> &InVals);
+                           SmallVectorImpl<SDValue> &InVals) const;
 
     SDValue LowerCCCArguments(SDValue Chain,
                               CallingConv::ID CallConv,
@@ -138,33 +137,33 @@ namespace llvm {
                               const SmallVectorImpl<ISD::InputArg> &Ins,
                               DebugLoc dl,
                               SelectionDAG &DAG,
-                              SmallVectorImpl<SDValue> &InVals);
+                              SmallVectorImpl<SDValue> &InVals) const;
 
     SDValue LowerCallResult(SDValue Chain, SDValue InFlag,
                             CallingConv::ID CallConv, bool isVarArg,
                             const SmallVectorImpl<ISD::InputArg> &Ins,
                             DebugLoc dl, SelectionDAG &DAG,
-                            SmallVectorImpl<SDValue> &InVals);
+                            SmallVectorImpl<SDValue> &InVals) const;
 
     virtual SDValue
       LowerFormalArguments(SDValue Chain,
                            CallingConv::ID CallConv, bool isVarArg,
                            const SmallVectorImpl<ISD::InputArg> &Ins,
                            DebugLoc dl, SelectionDAG &DAG,
-                           SmallVectorImpl<SDValue> &InVals);
+                           SmallVectorImpl<SDValue> &InVals) const;
     virtual SDValue
       LowerCall(SDValue Chain, SDValue Callee,
                 CallingConv::ID CallConv, bool isVarArg, bool &isTailCall,
                 const SmallVectorImpl<ISD::OutputArg> &Outs,
                 const SmallVectorImpl<ISD::InputArg> &Ins,
                 DebugLoc dl, SelectionDAG &DAG,
-                SmallVectorImpl<SDValue> &InVals);
+                SmallVectorImpl<SDValue> &InVals) const;
 
     virtual SDValue
       LowerReturn(SDValue Chain,
                   CallingConv::ID CallConv, bool isVarArg,
                   const SmallVectorImpl<ISD::OutputArg> &Outs,
-                  DebugLoc dl, SelectionDAG &DAG);
+                  DebugLoc dl, SelectionDAG &DAG) const;
 
     virtual bool getPostIndexedAddressParts(SDNode *N, SDNode *Op,
                                             SDValue &Base,
diff --git a/lib/Target/MSP430/MSP430InstrInfo.cpp b/lib/Target/MSP430/MSP430InstrInfo.cpp
index 03819041067c..2b09b3d5268d 100644
--- a/lib/Target/MSP430/MSP430InstrInfo.cpp
+++ b/lib/Target/MSP430/MSP430InstrInfo.cpp
@@ -176,7 +176,9 @@ unsigned MSP430InstrInfo::RemoveBranch(MachineBasicBlock &MBB) const {
     if (I->isDebugValue())
       continue;
     if (I->getOpcode() != MSP430::JMP &&
-        I->getOpcode() != MSP430::JCC)
+        I->getOpcode() != MSP430::JCC &&
+        I->getOpcode() != MSP430::Br &&
+        I->getOpcode() != MSP430::Bm)
       break;
     // Remove the branch.
     I->eraseFromParent();
@@ -256,6 +258,11 @@ bool MSP430InstrInfo::AnalyzeBranch(MachineBasicBlock &MBB,
     if (!I->getDesc().isBranch())
       return true;
 
+    // Cannot handle indirect branches.
+    if (I->getOpcode() == MSP430::Br ||
+        I->getOpcode() == MSP430::Bm)
+      return true;
+
     // Handle unconditional branches.
     if (I->getOpcode() == MSP430::JMP) {
       if (!AllowModify) {
@@ -365,6 +372,7 @@ unsigned MSP430InstrInfo::GetInstSizeInBytes(const MachineInstr *MI) const {
     case TargetOpcode::EH_LABEL:
     case TargetOpcode::IMPLICIT_DEF:
     case TargetOpcode::KILL:
+    case TargetOpcode::DBG_VALUE:
       return 0;
     case TargetOpcode::INLINEASM: {
       const MachineFunction *MF = MI->getParent()->getParent();
diff --git a/lib/Target/MSP430/MSP430InstrInfo.td b/lib/Target/MSP430/MSP430InstrInfo.td
index 144ba26cfeb4..6b9a2f2f29f4 100644
--- a/lib/Target/MSP430/MSP430InstrInfo.td
+++ b/lib/Target/MSP430/MSP430InstrInfo.td
@@ -76,8 +76,8 @@ def memdst : Operand<i16> {
   let MIOperandInfo = (ops GR16, i16imm);
 }
 
-// Branch targets have OtherVT type.
-def brtarget : Operand<OtherVT> {
+// Short jump targets have OtherVT type and are printed as pcrel imm values.
+def jmptarget : Operand<OtherVT> {
   let PrintMethod = "printPCRelImmOperand";
 }
 
@@ -169,21 +169,27 @@ let isBranch = 1, isTerminator = 1 in {
 // Direct branch
 let isBarrier = 1 in {
   // Short branch
-  def JMP : CJForm<0, 0,
-                   (outs), (ins brtarget:$dst),
+  def JMP : CJForm<0, 0, (outs), (ins jmptarget:$dst),
                    "jmp\t$dst",
                    [(br bb:$dst)]>;
-  // Long branch
-  def B   : I16ri<0,
-                  (outs), (ins brtarget:$dst),
-                  "br\t$dst",
-                  []>;
+  let isIndirectBranch = 1 in {
+    // Long branches
+    def Bi  : I16ri<0, (outs), (ins i16imm:$brdst),
+                    "br\t$brdst",
+                    [(brind tblockaddress:$brdst)]>;
+    def Br  : I16rr<0, (outs), (ins GR16:$brdst),
+                    "mov.w\t{$brdst, pc}",
+                    [(brind GR16:$brdst)]>;
+    def Bm  : I16rm<0, (outs), (ins memsrc:$brdst),
+                    "mov.w\t{$brdst, pc}",
+                    [(brind (load addr:$brdst))]>;
+  }
 }
 
 // Conditional branches
 let Uses = [SRW] in
   def JCC : CJForm<0, 0,
-                   (outs), (ins brtarget:$dst, cc:$cc),
+                   (outs), (ins jmptarget:$dst, cc:$cc),
                    "j$cc\t$dst",
                    [(MSP430brcc bb:$dst, imm:$cc)]>;
 } // isBranch, isTerminator
@@ -1126,16 +1132,21 @@ def : Pat<(i8 (trunc GR16:$src)),
 // GlobalAddress, ExternalSymbol
 def : Pat<(i16 (MSP430Wrapper tglobaladdr:$dst)), (MOV16ri tglobaladdr:$dst)>;
 def : Pat<(i16 (MSP430Wrapper texternalsym:$dst)), (MOV16ri texternalsym:$dst)>;
+def : Pat<(i16 (MSP430Wrapper tblockaddress:$dst)), (MOV16ri tblockaddress:$dst)>;
 
 def : Pat<(add GR16:$src1, (MSP430Wrapper tglobaladdr :$src2)),
           (ADD16ri GR16:$src1, tglobaladdr:$src2)>;
 def : Pat<(add GR16:$src1, (MSP430Wrapper texternalsym:$src2)),
           (ADD16ri GR16:$src1, texternalsym:$src2)>;
+def : Pat<(add GR16:$src1, (MSP430Wrapper tblockaddress:$src2)),
+          (ADD16ri GR16:$src1, tblockaddress:$src2)>;
 
 def : Pat<(store (i16 (MSP430Wrapper tglobaladdr:$src)), addr:$dst),
           (MOV16mi addr:$dst, tglobaladdr:$src)>;
 def : Pat<(store (i16 (MSP430Wrapper texternalsym:$src)), addr:$dst),
           (MOV16mi addr:$dst, texternalsym:$src)>;
+def : Pat<(store (i16 (MSP430Wrapper tblockaddress:$src)), addr:$dst),
+          (MOV16mi addr:$dst, tblockaddress:$src)>;
 
 // calls
 def : Pat<(MSP430call (i16 tglobaladdr:$dst)),
diff --git a/lib/Target/MSP430/MSP430RegisterInfo.cpp b/lib/Target/MSP430/MSP430RegisterInfo.cpp
index d91783a80c83..0cae26714bfc 100644
--- a/lib/Target/MSP430/MSP430RegisterInfo.cpp
+++ b/lib/Target/MSP430/MSP430RegisterInfo.cpp
@@ -138,7 +138,7 @@ MSP430RegisterInfo::getPointerRegClass(unsigned Kind) const {
 bool MSP430RegisterInfo::hasFP(const MachineFunction &MF) const {
   const MachineFrameInfo *MFI = MF.getFrameInfo();
 
-  return (NoFramePointerElim ||
+  return (DisableFramePointerElim(MF) ||
           MF.getFrameInfo()->hasVarSizedObjects() ||
           MFI->isFrameAddressTaken());
 }
diff --git a/lib/Target/MSP430/MSP430SelectionDAGInfo.cpp b/lib/Target/MSP430/MSP430SelectionDAGInfo.cpp
new file mode 100644
index 000000000000..a54c929e8356
--- /dev/null
+++ b/lib/Target/MSP430/MSP430SelectionDAGInfo.cpp
@@ -0,0 +1,22 @@
+//===-- MSP430SelectionDAGInfo.cpp - MSP430 SelectionDAG Info -------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the MSP430SelectionDAGInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "msp430-selectiondag-info"
+#include "MSP430SelectionDAGInfo.h"
+using namespace llvm;
+
+MSP430SelectionDAGInfo::MSP430SelectionDAGInfo() {
+}
+
+MSP430SelectionDAGInfo::~MSP430SelectionDAGInfo() {
+}
diff --git a/lib/Target/MSP430/MSP430SelectionDAGInfo.h b/lib/Target/MSP430/MSP430SelectionDAGInfo.h
new file mode 100644
index 000000000000..c952ab726afe
--- /dev/null
+++ b/lib/Target/MSP430/MSP430SelectionDAGInfo.h
@@ -0,0 +1,29 @@
+//===-- MSP430SelectionDAGInfo.h - MSP430 SelectionDAG Info -----*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the MSP430 subclass for TargetSelectionDAGInfo.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef MSP430SELECTIONDAGINFO_H
+#define MSP430SELECTIONDAGINFO_H
+
+#include "llvm/Target/TargetSelectionDAGInfo.h"
+
+namespace llvm {
+
+class MSP430SelectionDAGInfo : public TargetSelectionDAGInfo {
+public:
+  MSP430SelectionDAGInfo();
+  ~MSP430SelectionDAGInfo();
+};
+
+}
+
+#endif
diff --git a/lib/Target/MSP430/MSP430TargetMachine.h b/lib/Target/MSP430/MSP430TargetMachine.h
index d93ac5c6efe9..68bde9a55156 100644
--- a/lib/Target/MSP430/MSP430TargetMachine.h
+++ b/lib/Target/MSP430/MSP430TargetMachine.h
@@ -50,8 +50,8 @@ public:
     return &InstrInfo.getRegisterInfo();
   }
 
-  virtual MSP430TargetLowering *getTargetLowering() const {
-    return const_cast<MSP430TargetLowering*>(&TLInfo);
+  virtual const MSP430TargetLowering *getTargetLowering() const {
+    return &TLInfo;
   }
 
   virtual bool addInstSelector(PassManagerBase &PM, CodeGenOpt::Level OptLevel);
diff --git a/lib/Target/Mangler.cpp b/lib/Target/Mangler.cpp
index 1d5c51164e3f..4ef017ab9295 100644
--- a/lib/Target/Mangler.cpp
+++ b/lib/Target/Mangler.cpp
@@ -22,11 +22,12 @@
 #include "llvm/ADT/Twine.h"
 using namespace llvm;
 
-static bool isAcceptableChar(char C) {
+static bool isAcceptableChar(char C, bool AllowPeriod) {
   if ((C < 'a' || C > 'z') &&
       (C < 'A' || C > 'Z') &&
       (C < '0' || C > '9') &&
-      C != '_' && C != '$' && C != '.' && C != '@')
+      C != '_' && C != '$' && C != '@' &&
+      !(AllowPeriod && C == '.'))
     return false;
   return true;
 }
@@ -54,8 +55,9 @@ static bool NameNeedsEscaping(StringRef Str, const MCAsmInfo &MAI) {
   
   // If any of the characters in the string is an unacceptable character, force
   // quotes.
+  bool AllowPeriod = MAI.doesAllowPeriodsInName();
   for (unsigned i = 0, e = Str.size(); i != e; ++i)
-    if (!isAcceptableChar(Str[i]))
+    if (!isAcceptableChar(Str[i], AllowPeriod))
       return true;
   return false;
 }
@@ -70,9 +72,10 @@ static void appendMangledName(SmallVectorImpl<char> &OutName, StringRef Str,
     MangleLetter(OutName, Str[0]);
     Str = Str.substr(1);
   }
-  
+
+  bool AllowPeriod = MAI.doesAllowPeriodsInName();
   for (unsigned i = 0, e = Str.size(); i != e; ++i) {
-    if (!isAcceptableChar(Str[i]))
+    if (!isAcceptableChar(Str[i], AllowPeriod))
       MangleLetter(OutName, Str[i]);
     else
       OutName.push_back(Str[i]);
diff --git a/lib/Target/Mips/AsmPrinter/CMakeLists.txt b/lib/Target/Mips/AsmPrinter/CMakeLists.txt
index 56c68a6b4160..d3099d2a4e3e 100644
--- a/lib/Target/Mips/AsmPrinter/CMakeLists.txt
+++ b/lib/Target/Mips/AsmPrinter/CMakeLists.txt
@@ -1,9 +1,9 @@
-include_directories(
-  ${CMAKE_CURRENT_BINARY_DIR}/..
-  ${CMAKE_CURRENT_SOURCE_DIR}/..
-  )
-
-add_llvm_library(LLVMMipsAsmPrinter
+include_directories(
+  ${CMAKE_CURRENT_BINARY_DIR}/..
+  ${CMAKE_CURRENT_SOURCE_DIR}/..
+  )
+
+add_llvm_library(LLVMMipsAsmPrinter
   MipsAsmPrinter.cpp
-  )
-add_dependencies(LLVMMipsAsmPrinter MipsCodeGenTable_gen)
-\ No newline at end of file
+  )
+add_dependencies(LLVMMipsAsmPrinter MipsCodeGenTable_gen)
diff --git a/lib/Target/Mips/AsmPrinter/MipsAsmPrinter.cpp b/lib/Target/Mips/AsmPrinter/MipsAsmPrinter.cpp
index 69743715607d..d269153ef0e7 100644
--- a/lib/Target/Mips/AsmPrinter/MipsAsmPrinter.cpp
+++ b/lib/Target/Mips/AsmPrinter/MipsAsmPrinter.cpp
@@ -306,7 +306,7 @@ void MipsAsmPrinter::printOperand(const MachineInstr *MI, int opNum,
 void MipsAsmPrinter::printUnsignedImm(const MachineInstr *MI, int opNum,
                                       raw_ostream &O) {
   const MachineOperand &MO = MI->getOperand(opNum);
-  if (MO.getType() == MachineOperand::MO_Immediate)
+  if (MO.isImm())
     O << (unsigned short int)MO.getImm();
   else 
     printOperand(MI, opNum, O);
diff --git a/lib/Target/Mips/CMakeLists.txt b/lib/Target/Mips/CMakeLists.txt
index 0e3bf5a96d40..a77802aec52a 100644
--- a/lib/Target/Mips/CMakeLists.txt
+++ b/lib/Target/Mips/CMakeLists.txt
@@ -20,6 +20,7 @@ add_llvm_target(MipsCodeGen
   MipsSubtarget.cpp
   MipsTargetMachine.cpp
   MipsTargetObjectFile.cpp
+  MipsSelectionDAGInfo.cpp
   )
 
 target_link_libraries (LLVMMipsCodeGen LLVMSelectionDAG)
diff --git a/lib/Target/Mips/MipsISelDAGToDAG.cpp b/lib/Target/Mips/MipsISelDAGToDAG.cpp
index c4746dba3267..ee85a3f38007 100644
--- a/lib/Target/Mips/MipsISelDAGToDAG.cpp
+++ b/lib/Target/Mips/MipsISelDAGToDAG.cpp
@@ -13,7 +13,6 @@
 
 #define DEBUG_TYPE "mips-isel"
 #include "Mips.h"
-#include "MipsISelLowering.h"
 #include "MipsMachineFunction.h"
 #include "MipsRegisterInfo.h"
 #include "MipsSubtarget.h"
diff --git a/lib/Target/Mips/MipsISelLowering.cpp b/lib/Target/Mips/MipsISelLowering.cpp
index 584b8875eef7..e979c3fe69fc 100644
--- a/lib/Target/Mips/MipsISelLowering.cpp
+++ b/lib/Target/Mips/MipsISelLowering.cpp
@@ -166,7 +166,7 @@ unsigned MipsTargetLowering::getFunctionAlignment(const Function *) const {
 }
 
 SDValue MipsTargetLowering::
-LowerOperation(SDValue Op, SelectionDAG &DAG) 
+LowerOperation(SDValue Op, SelectionDAG &DAG) const
 {
   switch (Op.getOpcode()) 
   {
@@ -252,8 +252,7 @@ static Mips::CondCode FPCondCCodeToFCC(ISD::CondCode CC) {
 
 MachineBasicBlock *
 MipsTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
-                                                MachineBasicBlock *BB,
-                   DenseMap<MachineBasicBlock*, MachineBasicBlock*> *EM) const {
+                                                MachineBasicBlock *BB) const {
   const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
   bool isFPCmp = false;
   DebugLoc dl = MI->getDebugLoc();
@@ -301,12 +300,9 @@ MipsTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
     F->insert(It, sinkMBB);
     // Update machine-CFG edges by first adding all successors of the current
     // block to the new block which will contain the Phi node for the select.
-    // Also inform sdisel of the edge changes.
     for(MachineBasicBlock::succ_iterator i = BB->succ_begin(),
-          e = BB->succ_end(); i != e; ++i) {
-      EM->insert(std::make_pair(*i, sinkMBB));
+          e = BB->succ_end(); i != e; ++i)
       sinkMBB->addSuccessor(*i);
-    }
     // Next, remove all successors of the current block, and add the true
     // and fallthrough blocks as its successors.
     while(!BB->succ_empty())
@@ -341,7 +337,7 @@ MipsTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
 //===----------------------------------------------------------------------===//
 
 SDValue MipsTargetLowering::
-LowerFP_TO_SINT(SDValue Op, SelectionDAG &DAG)
+LowerFP_TO_SINT(SDValue Op, SelectionDAG &DAG) const
 {
   if (!Subtarget->isMips1())
     return Op;
@@ -374,7 +370,7 @@ LowerFP_TO_SINT(SDValue Op, SelectionDAG &DAG)
 }
 
 SDValue MipsTargetLowering::
-LowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG)
+LowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) const
 {
   SDValue Chain = Op.getOperand(0);
   SDValue Size = Op.getOperand(1);
@@ -398,7 +394,7 @@ LowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG)
 }
 
 SDValue MipsTargetLowering::
-LowerANDOR(SDValue Op, SelectionDAG &DAG)
+LowerANDOR(SDValue Op, SelectionDAG &DAG) const
 {
   SDValue LHS   = Op.getOperand(0);
   SDValue RHS   = Op.getOperand(1);
@@ -419,7 +415,7 @@ LowerANDOR(SDValue Op, SelectionDAG &DAG)
 }
 
 SDValue MipsTargetLowering::
-LowerBRCOND(SDValue Op, SelectionDAG &DAG)
+LowerBRCOND(SDValue Op, SelectionDAG &DAG) const
 {
   // The first operand is the chain, the second is the condition, the third is 
   // the block to branch to if the condition is true.
@@ -441,7 +437,7 @@ LowerBRCOND(SDValue Op, SelectionDAG &DAG)
 }
 
 SDValue MipsTargetLowering::
-LowerSETCC(SDValue Op, SelectionDAG &DAG)
+LowerSETCC(SDValue Op, SelectionDAG &DAG) const
 {
   // The operands to this are the left and right operands to compare (ops #0, 
   // and #1) and the condition code to compare them with (op #2) as a 
@@ -457,7 +453,7 @@ LowerSETCC(SDValue Op, SelectionDAG &DAG)
 }
 
 SDValue MipsTargetLowering::
-LowerSELECT(SDValue Op, SelectionDAG &DAG) 
+LowerSELECT(SDValue Op, SelectionDAG &DAG) const
 {
   SDValue Cond  = Op.getOperand(0); 
   SDValue True  = Op.getOperand(1);
@@ -481,10 +477,11 @@ LowerSELECT(SDValue Op, SelectionDAG &DAG)
                      Cond, True, False, CCNode);
 }
 
-SDValue MipsTargetLowering::LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) {
+SDValue MipsTargetLowering::LowerGlobalAddress(SDValue Op,
+                                               SelectionDAG &DAG) const {
   // FIXME there isn't actually debug info here
   DebugLoc dl = Op.getDebugLoc();
-  GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal();
+  const GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal();
 
   if (getTargetMachine().getRelocationModel() != Reloc::PIC_) {
     SDVTList VTs = DAG.getVTList(MVT::i32);
@@ -525,14 +522,14 @@ SDValue MipsTargetLowering::LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) {
 }
 
 SDValue MipsTargetLowering::
-LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG)
+LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const
 {
   llvm_unreachable("TLS not implemented for MIPS.");
   return SDValue(); // Not reached
 }
 
 SDValue MipsTargetLowering::
-LowerJumpTable(SDValue Op, SelectionDAG &DAG) 
+LowerJumpTable(SDValue Op, SelectionDAG &DAG) const
 {
   SDValue ResNode;
   SDValue HiPart; 
@@ -560,11 +557,11 @@ LowerJumpTable(SDValue Op, SelectionDAG &DAG)
 }
 
 SDValue MipsTargetLowering::
-LowerConstantPool(SDValue Op, SelectionDAG &DAG) 
+LowerConstantPool(SDValue Op, SelectionDAG &DAG) const
 {
   SDValue ResNode;
   ConstantPoolSDNode *N = cast<ConstantPoolSDNode>(Op);
-  Constant *C = N->getConstVal();
+  const Constant *C = N->getConstVal();
   // FIXME there isn't actually debug info here
   DebugLoc dl = Op.getDebugLoc();
 
@@ -596,9 +593,13 @@ LowerConstantPool(SDValue Op, SelectionDAG &DAG)
   return ResNode;
 }
 
-SDValue MipsTargetLowering::LowerVASTART(SDValue Op, SelectionDAG &DAG) {
+SDValue MipsTargetLowering::LowerVASTART(SDValue Op, SelectionDAG &DAG) const {
+  MachineFunction &MF = DAG.getMachineFunction();
+  MipsFunctionInfo *FuncInfo = MF.getInfo<MipsFunctionInfo>();
+
   DebugLoc dl = Op.getDebugLoc();
-  SDValue FI = DAG.getFrameIndex(VarArgsFrameIndex, getPointerTy());
+  SDValue FI = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(),
+                                 getPointerTy());
 
   // vastart just stores the address of the VarArgsFrameIndex slot into the
   // memory location argument.
@@ -769,7 +770,7 @@ MipsTargetLowering::LowerCall(SDValue Chain, SDValue Callee,
                               const SmallVectorImpl<ISD::OutputArg> &Outs,
                               const SmallVectorImpl<ISD::InputArg> &Ins,
                               DebugLoc dl, SelectionDAG &DAG,
-                              SmallVectorImpl<SDValue> &InVals) {
+                              SmallVectorImpl<SDValue> &InVals) const {
   // MIPs target does not yet support tail call optimization.
   isTailCall = false;
 
@@ -963,7 +964,7 @@ MipsTargetLowering::LowerCallResult(SDValue Chain, SDValue InFlag,
                                     CallingConv::ID CallConv, bool isVarArg,
                                     const SmallVectorImpl<ISD::InputArg> &Ins,
                                     DebugLoc dl, SelectionDAG &DAG,
-                                    SmallVectorImpl<SDValue> &InVals) {
+                                    SmallVectorImpl<SDValue> &InVals) const {
 
   // Assign locations to each value returned by this call.
   SmallVector<CCValAssign, 16> RVLocs;
@@ -995,14 +996,15 @@ MipsTargetLowering::LowerFormalArguments(SDValue Chain,
                                         const SmallVectorImpl<ISD::InputArg>
                                         &Ins,
                                         DebugLoc dl, SelectionDAG &DAG,
-                                        SmallVectorImpl<SDValue> &InVals) {
+                                        SmallVectorImpl<SDValue> &InVals)
+                                          const {
 
   MachineFunction &MF = DAG.getMachineFunction();
   MachineFrameInfo *MFI = MF.getFrameInfo();
   MipsFunctionInfo *MipsFI = MF.getInfo<MipsFunctionInfo>();
 
   unsigned StackReg = MF.getTarget().getRegisterInfo()->getFrameRegister(MF);
-  VarArgsFrameIndex = 0;
+  MipsFI->setVarArgsFrameIndex(0);
 
   // Used with vargs to acumulate store chains.
   std::vector<SDValue> OutChains;
@@ -1143,8 +1145,8 @@ MipsTargetLowering::LowerFormalArguments(SDValue Chain,
 
       // Record the frame index of the first variable argument
       // which is a value necessary to VASTART.
-      if (!VarArgsFrameIndex)
-        VarArgsFrameIndex = FI;
+      if (!MipsFI->getVarArgsFrameIndex())
+        MipsFI->setVarArgsFrameIndex(FI);
     }
   }
 
@@ -1167,7 +1169,7 @@ SDValue
 MipsTargetLowering::LowerReturn(SDValue Chain,
                                 CallingConv::ID CallConv, bool isVarArg,
                                 const SmallVectorImpl<ISD::OutputArg> &Outs,
-                                DebugLoc dl, SelectionDAG &DAG) {
+                                DebugLoc dl, SelectionDAG &DAG) const {
 
   // CCValAssign - represent the assignment of
   // the return value to a location
diff --git a/lib/Target/Mips/MipsISelLowering.h b/lib/Target/Mips/MipsISelLowering.h
index 7256617242fa..f2de489a2643 100644
--- a/lib/Target/Mips/MipsISelLowering.h
+++ b/lib/Target/Mips/MipsISelLowering.h
@@ -68,14 +68,11 @@ namespace llvm {
   //===--------------------------------------------------------------------===//
   
   class MipsTargetLowering : public TargetLowering  {
-    int VarArgsFrameIndex;            // FrameIndex for start of varargs area.
-
   public:
-
     explicit MipsTargetLowering(MipsTargetMachine &TM);
 
     /// LowerOperation - Provide custom lowering hooks for some operations.
-    virtual SDValue LowerOperation(SDValue Op, SelectionDAG &DAG);
+    virtual SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const;
 
     /// getTargetNodeName - This method returns the name of a target specific 
     //  DAG node.
@@ -96,27 +93,27 @@ namespace llvm {
                             CallingConv::ID CallConv, bool isVarArg,
                             const SmallVectorImpl<ISD::InputArg> &Ins,
                             DebugLoc dl, SelectionDAG &DAG,
-                            SmallVectorImpl<SDValue> &InVals);
+                            SmallVectorImpl<SDValue> &InVals) const;
 
     // Lower Operand specifics
-    SDValue LowerANDOR(SDValue Op, SelectionDAG &DAG);
-    SDValue LowerBRCOND(SDValue Op, SelectionDAG &DAG);
-    SDValue LowerConstantPool(SDValue Op, SelectionDAG &DAG);
-    SDValue LowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG);
-    SDValue LowerFP_TO_SINT(SDValue Op, SelectionDAG &DAG);
-    SDValue LowerGlobalAddress(SDValue Op, SelectionDAG &DAG);
-    SDValue LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG);
-    SDValue LowerJumpTable(SDValue Op, SelectionDAG &DAG);
-    SDValue LowerSELECT(SDValue Op, SelectionDAG &DAG);
-    SDValue LowerSETCC(SDValue Op, SelectionDAG &DAG);
-    SDValue LowerVASTART(SDValue Op, SelectionDAG &DAG);
+    SDValue LowerANDOR(SDValue Op, SelectionDAG &DAG) const;
+    SDValue LowerBRCOND(SDValue Op, SelectionDAG &DAG) const;
+    SDValue LowerConstantPool(SDValue Op, SelectionDAG &DAG) const;
+    SDValue LowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) const;
+    SDValue LowerFP_TO_SINT(SDValue Op, SelectionDAG &DAG) const;
+    SDValue LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const;
+    SDValue LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const;
+    SDValue LowerJumpTable(SDValue Op, SelectionDAG &DAG) const;
+    SDValue LowerSELECT(SDValue Op, SelectionDAG &DAG) const;
+    SDValue LowerSETCC(SDValue Op, SelectionDAG &DAG) const;
+    SDValue LowerVASTART(SDValue Op, SelectionDAG &DAG) const;
 
     virtual SDValue
       LowerFormalArguments(SDValue Chain,
                            CallingConv::ID CallConv, bool isVarArg,
                            const SmallVectorImpl<ISD::InputArg> &Ins,
                            DebugLoc dl, SelectionDAG &DAG,
-                           SmallVectorImpl<SDValue> &InVals);
+                           SmallVectorImpl<SDValue> &InVals) const;
 
     virtual SDValue
       LowerCall(SDValue Chain, SDValue Callee,
@@ -125,17 +122,17 @@ namespace llvm {
                 const SmallVectorImpl<ISD::OutputArg> &Outs,
                 const SmallVectorImpl<ISD::InputArg> &Ins,
                 DebugLoc dl, SelectionDAG &DAG,
-                SmallVectorImpl<SDValue> &InVals);
+                SmallVectorImpl<SDValue> &InVals) const;
 
     virtual SDValue
       LowerReturn(SDValue Chain,
                   CallingConv::ID CallConv, bool isVarArg,
                   const SmallVectorImpl<ISD::OutputArg> &Outs,
-                  DebugLoc dl, SelectionDAG &DAG);
+                  DebugLoc dl, SelectionDAG &DAG) const;
 
-    virtual MachineBasicBlock *EmitInstrWithCustomInserter(MachineInstr *MI,
-                                                         MachineBasicBlock *MBB,
-                    DenseMap<MachineBasicBlock*, MachineBasicBlock*> *EM) const;
+    virtual MachineBasicBlock *
+      EmitInstrWithCustomInserter(MachineInstr *MI,
+                                  MachineBasicBlock *MBB) const;
 
     // Inline asm support
     ConstraintType getConstraintType(const std::string &Constraint) const;
diff --git a/lib/Target/Mips/MipsMachineFunction.h b/lib/Target/Mips/MipsMachineFunction.h
index a300f49ff5d4..5723f9ea1517 100644
--- a/lib/Target/Mips/MipsMachineFunction.h
+++ b/lib/Target/Mips/MipsMachineFunction.h
@@ -80,11 +80,15 @@ private:
   /// relocation models.
   unsigned GlobalBaseReg;
 
+  /// VarArgsFrameIndex - FrameIndex for start of varargs area.
+  int VarArgsFrameIndex;
+
 public:
   MipsFunctionInfo(MachineFunction& MF) 
   : FPStackOffset(0), RAStackOffset(0), CPUTopSavedRegOff(0), 
     FPUTopSavedRegOff(0), GPHolder(-1,-1), HasLoadArgs(false), 
-    HasStoreVarArgs(false), SRetReturnReg(0), GlobalBaseReg(0)
+    HasStoreVarArgs(false), SRetReturnReg(0), GlobalBaseReg(0),
+    VarArgsFrameIndex(0)
   {}
 
   int getFPStackOffset() const { return FPStackOffset; }
@@ -133,6 +137,9 @@ public:
 
   unsigned getGlobalBaseReg() const { return GlobalBaseReg; }
   void setGlobalBaseReg(unsigned Reg) { GlobalBaseReg = Reg; }
+
+  int getVarArgsFrameIndex() const { return VarArgsFrameIndex; }
+  void setVarArgsFrameIndex(int Index) { VarArgsFrameIndex = Index; }
 };
 
 } // end of namespace llvm
diff --git a/lib/Target/Mips/MipsRegisterInfo.cpp b/lib/Target/Mips/MipsRegisterInfo.cpp
index f43e69b35457..478da84cad0c 100644
--- a/lib/Target/Mips/MipsRegisterInfo.cpp
+++ b/lib/Target/Mips/MipsRegisterInfo.cpp
@@ -338,7 +338,7 @@ void MipsRegisterInfo::adjustMipsStackFrame(MachineFunction &MF) const
 bool MipsRegisterInfo::
 hasFP(const MachineFunction &MF) const {
   const MachineFrameInfo *MFI = MF.getFrameInfo();
-  return NoFramePointerElim || MFI->hasVarSizedObjects();
+  return DisableFramePointerElim(MF) || MFI->hasVarSizedObjects();
 }
 
 // This function eliminate ADJCALLSTACKDOWN, 
diff --git a/lib/Target/Mips/MipsSchedule.td b/lib/Target/Mips/MipsSchedule.td
index 0c3ca57361cd..616a79bf831c 100644
--- a/lib/Target/Mips/MipsSchedule.td
+++ b/lib/Target/Mips/MipsSchedule.td
@@ -40,7 +40,7 @@ def IIPseudo           : InstrItinClass;
 //===----------------------------------------------------------------------===//
 // Mips Generic instruction itineraries.
 //===----------------------------------------------------------------------===//
-def MipsGenericItineraries : ProcessorItineraries<[
+def MipsGenericItineraries : ProcessorItineraries<[ALU, IMULDIV], [
   InstrItinData<IIAlu              , [InstrStage<1,  [ALU]>]>,
   InstrItinData<IILoad             , [InstrStage<3,  [ALU]>]>,
   InstrItinData<IIStore            , [InstrStage<1,  [ALU]>]>,
diff --git a/lib/Target/Mips/MipsSelectionDAGInfo.cpp b/lib/Target/Mips/MipsSelectionDAGInfo.cpp
new file mode 100644
index 000000000000..72c149decc26
--- /dev/null
+++ b/lib/Target/Mips/MipsSelectionDAGInfo.cpp
@@ -0,0 +1,22 @@
+//===-- MipsSelectionDAGInfo.cpp - Mips SelectionDAG Info -----------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the MipsSelectionDAGInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "mips-selectiondag-info"
+#include "MipsSelectionDAGInfo.h"
+using namespace llvm;
+
+MipsSelectionDAGInfo::MipsSelectionDAGInfo() {
+}
+
+MipsSelectionDAGInfo::~MipsSelectionDAGInfo() {
+}
diff --git a/lib/Target/Mips/MipsSelectionDAGInfo.h b/lib/Target/Mips/MipsSelectionDAGInfo.h
new file mode 100644
index 000000000000..6eaf0c930f2e
--- /dev/null
+++ b/lib/Target/Mips/MipsSelectionDAGInfo.h
@@ -0,0 +1,29 @@
+//===-- MipsSelectionDAGInfo.h - Mips SelectionDAG Info ---------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the Mips subclass for TargetSelectionDAGInfo.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef MIPSSELECTIONDAGINFO_H
+#define MIPSSELECTIONDAGINFO_H
+
+#include "llvm/Target/TargetSelectionDAGInfo.h"
+
+namespace llvm {
+
+class MipsSelectionDAGInfo : public TargetSelectionDAGInfo {
+public:
+  MipsSelectionDAGInfo();
+  ~MipsSelectionDAGInfo();
+};
+
+}
+
+#endif
diff --git a/lib/Target/Mips/MipsTargetMachine.h b/lib/Target/Mips/MipsTargetMachine.h
index c3428be48f59..cd671cf3d064 100644
--- a/lib/Target/Mips/MipsTargetMachine.h
+++ b/lib/Target/Mips/MipsTargetMachine.h
@@ -47,8 +47,8 @@ namespace llvm {
       return &InstrInfo.getRegisterInfo();
     }
 
-    virtual MipsTargetLowering   *getTargetLowering() const { 
-      return const_cast<MipsTargetLowering*>(&TLInfo); 
+    virtual const MipsTargetLowering *getTargetLowering() const { 
+      return &TLInfo;
     }
 
     // Pass Pipeline Configuration
diff --git a/lib/Target/Mips/MipsTargetObjectFile.cpp b/lib/Target/Mips/MipsTargetObjectFile.cpp
index 0fb423dc1b24..405f41981fa3 100644
--- a/lib/Target/Mips/MipsTargetObjectFile.cpp
+++ b/lib/Target/Mips/MipsTargetObjectFile.cpp
@@ -11,6 +11,7 @@
 #include "MipsSubtarget.h"
 #include "llvm/DerivedTypes.h"
 #include "llvm/GlobalVariable.h"
+#include "llvm/MC/MCContext.h"
 #include "llvm/MC/MCSectionELF.h"
 #include "llvm/Target/TargetData.h"
 #include "llvm/Target/TargetMachine.h"
@@ -26,14 +27,14 @@ void MipsTargetObjectFile::Initialize(MCContext &Ctx, const TargetMachine &TM){
   TargetLoweringObjectFileELF::Initialize(Ctx, TM);
  
   SmallDataSection =
-    getELFSection(".sdata", MCSectionELF::SHT_PROGBITS,
-                  MCSectionELF::SHF_WRITE | MCSectionELF::SHF_ALLOC,
-                  SectionKind::getDataRel());
+    getContext().getELFSection(".sdata", MCSectionELF::SHT_PROGBITS,
+                               MCSectionELF::SHF_WRITE |MCSectionELF::SHF_ALLOC,
+                               SectionKind::getDataRel());
   
   SmallBSSSection =
-    getELFSection(".sbss", MCSectionELF::SHT_NOBITS,
-                  MCSectionELF::SHF_WRITE | MCSectionELF::SHF_ALLOC,
-                  SectionKind::getBSS());
+    getContext().getELFSection(".sbss", MCSectionELF::SHT_NOBITS,
+                               MCSectionELF::SHF_WRITE |MCSectionELF::SHF_ALLOC,
+                               SectionKind::getBSS());
   
 }
 
diff --git a/lib/Target/PIC16/AsmPrinter/CMakeLists.txt b/lib/Target/PIC16/AsmPrinter/CMakeLists.txt
index 2e1b809b92d7..d36bb8eb4a5f 100644
--- a/lib/Target/PIC16/AsmPrinter/CMakeLists.txt
+++ b/lib/Target/PIC16/AsmPrinter/CMakeLists.txt
@@ -1,9 +1,9 @@
-include_directories(
-  ${CMAKE_CURRENT_BINARY_DIR}/..
-  ${CMAKE_CURRENT_SOURCE_DIR}/..
-  )
-
-add_llvm_library(LLVMPIC16AsmPrinter
+include_directories(
+  ${CMAKE_CURRENT_BINARY_DIR}/..
+  ${CMAKE_CURRENT_SOURCE_DIR}/..
+  )
+
+add_llvm_library(LLVMPIC16AsmPrinter
   PIC16AsmPrinter.cpp
-  )
+  )
 add_dependencies(LLVMPIC16AsmPrinter PIC16CodeGenTable_gen)
diff --git a/lib/Target/PIC16/AsmPrinter/PIC16AsmPrinter.cpp b/lib/Target/PIC16/AsmPrinter/PIC16AsmPrinter.cpp
index c46db46ea5b4..b665817e614e 100644
--- a/lib/Target/PIC16/AsmPrinter/PIC16AsmPrinter.cpp
+++ b/lib/Target/PIC16/AsmPrinter/PIC16AsmPrinter.cpp
@@ -16,6 +16,7 @@
 #include "PIC16AsmPrinter.h"
 #include "PIC16Section.h"
 #include "PIC16MCAsmInfo.h"
+#include "PIC16MachineFunctionInfo.h"
 #include "llvm/DerivedTypes.h"
 #include "llvm/Function.h"
 #include "llvm/Module.h"
@@ -36,9 +37,8 @@ using namespace llvm;
 
 PIC16AsmPrinter::PIC16AsmPrinter(TargetMachine &TM, MCStreamer &Streamer)
 : AsmPrinter(TM, Streamer), DbgInfo(Streamer, TM.getMCAsmInfo()) {
-  PTLI = static_cast<PIC16TargetLowering*>(TM.getTargetLowering());
   PMAI = static_cast<const PIC16MCAsmInfo*>(TM.getMCAsmInfo());
-  PTOF = (PIC16TargetObjectFile *)&PTLI->getObjFileLowering();
+  PTOF = &getObjFileLowering();
 }
 
 void PIC16AsmPrinter::EmitInstruction(const MachineInstr *MI) {
@@ -379,6 +379,8 @@ bool PIC16AsmPrinter::doFinalization(Module &M) {
 void PIC16AsmPrinter::EmitFunctionFrame(MachineFunction &MF) {
   const Function *F = MF.getFunction();
   const TargetData *TD = TM.getTargetData();
+  PIC16MachineFunctionInfo *FuncInfo = MF.getInfo<PIC16MachineFunctionInfo>();
+
   // Emit the data section name.
   
   PIC16Section *fPDataSection =
@@ -420,7 +422,7 @@ void PIC16AsmPrinter::EmitFunctionFrame(MachineFunction &MF) {
                           Twine(" RES ") + Twine(ArgSize));
 
   // Emit temporary space
-  int TempSize = PTLI->GetTmpSize();
+  int TempSize = FuncInfo->getTmpSize();
   if (TempSize > 0)
     OutStreamer.EmitRawText(PAN::getTempdataLabel(CurrentFnSym->getName()) +
                             Twine(" RES  ") + Twine(TempSize));
diff --git a/lib/Target/PIC16/AsmPrinter/PIC16AsmPrinter.h b/lib/Target/PIC16/AsmPrinter/PIC16AsmPrinter.h
index e27778fbb070..a424c270aaad 100644
--- a/lib/Target/PIC16/AsmPrinter/PIC16AsmPrinter.h
+++ b/lib/Target/PIC16/AsmPrinter/PIC16AsmPrinter.h
@@ -37,8 +37,8 @@ namespace llvm {
       return "PIC16 Assembly Printer";
     }
     
-    PIC16TargetObjectFile &getObjFileLowering() const {
-      return (PIC16TargetObjectFile &)AsmPrinter::getObjFileLowering();
+    const PIC16TargetObjectFile &getObjFileLowering() const {
+      return (const PIC16TargetObjectFile &)AsmPrinter::getObjFileLowering();
     }
 
     bool runOnMachineFunction(MachineFunction &F);
@@ -76,8 +76,7 @@ namespace llvm {
     }
     
   private:
-    PIC16TargetObjectFile *PTOF;
-    PIC16TargetLowering *PTLI;
+    const PIC16TargetObjectFile *PTOF;
     PIC16DbgInfo DbgInfo;
     const PIC16MCAsmInfo *PMAI;
     std::set<std::string> LibcallDecls; // Sorted & uniqued set of extern decls.
diff --git a/lib/Target/PIC16/CMakeLists.txt b/lib/Target/PIC16/CMakeLists.txt
index 208b0678fec2..cd4afe8e2342 100644
--- a/lib/Target/PIC16/CMakeLists.txt
+++ b/lib/Target/PIC16/CMakeLists.txt
@@ -22,4 +22,5 @@ add_llvm_target(PIC16
   PIC16Subtarget.cpp
   PIC16TargetMachine.cpp
   PIC16TargetObjectFile.cpp
+  PIC16SelectionDAGInfo.cpp
   )
diff --git a/lib/Target/PIC16/PIC16.h b/lib/Target/PIC16/PIC16.h
index 8d067de676b6..cee55f4f260f 100644
--- a/lib/Target/PIC16/PIC16.h
+++ b/lib/Target/PIC16/PIC16.h
@@ -21,6 +21,7 @@
 #include <sstream>
 #include <cstring>
 #include <string>
+#include <vector>
 
 namespace llvm {
   class PIC16TargetMachine;
@@ -52,17 +53,34 @@ namespace PIC16CC {
       UDATA_SHR
     };
 
+  class ESNames {
+    std::vector<char*> stk;
+    ESNames() {}
+    public:
+    ~ESNames() {
+      std::vector<char*>::iterator it = stk.end();
+      it--;
+      while(stk.end() != stk.begin())
+        {
+        char* p = *it;
+        delete [] p;
+        it--;
+        stk.pop_back();
+        }
+    }
 
-  // External symbol names require memory to live till the program end.
-  // So we have to allocate it and keep.
-  // FIXME: Don't leak the allocated strings.
-  inline static const char *createESName (const std::string &name) {
-    char *tmpName = new char[name.size() + 1];
-    memcpy(tmpName, name.c_str(), name.size() + 1);
-    return tmpName;
-  }
-
+    // External symbol names require memory to live till the program end.
+    // So we have to allocate it and keep. Push all such allocations into a 
+    // vector so that they get freed up on termination.
+    inline static const char *createESName (const std::string &name) {
+      static ESNames esn;
+      char *tmpName = new char[name.size() + 1];
+      memcpy(tmpName, name.c_str(), name.size() + 1);
+      esn.stk.push_back(tmpName);
+      return tmpName;
+    }
 
+ };
 
   inline static const char *PIC16CondCodeToString(PIC16CC::CondCodes CC) {
     switch (CC) {
diff --git a/lib/Target/PIC16/PIC16ISelDAGToDAG.h b/lib/Target/PIC16/PIC16ISelDAGToDAG.h
index 8ed5bf717296..f1fcec5bad96 100644
--- a/lib/Target/PIC16/PIC16ISelDAGToDAG.h
+++ b/lib/Target/PIC16/PIC16ISelDAGToDAG.h
@@ -14,9 +14,9 @@
 #define DEBUG_TYPE "pic16-isel"
 
 #include "PIC16.h"
-#include "PIC16ISelLowering.h"
 #include "PIC16RegisterInfo.h"
 #include "PIC16TargetMachine.h"
+#include "PIC16MachineFunctionInfo.h"
 #include "llvm/CodeGen/SelectionDAGISel.h"
 #include "llvm/Support/Compiler.h"
 #include "llvm/Support/raw_ostream.h"
@@ -29,19 +29,16 @@ namespace {
 class VISIBILITY_HIDDEN PIC16DAGToDAGISel : public SelectionDAGISel {
 
   /// TM - Keep a reference to PIC16TargetMachine.
-  PIC16TargetMachine &TM;
+  const PIC16TargetMachine &TM;
 
   /// PIC16Lowering - This object fully describes how to lower LLVM code to an
   /// PIC16-specific SelectionDAG.
-  PIC16TargetLowering &PIC16Lowering;
+  const PIC16TargetLowering &PIC16Lowering;
 
 public:
   explicit PIC16DAGToDAGISel(PIC16TargetMachine &tm) : 
         SelectionDAGISel(tm),
-        TM(tm), PIC16Lowering(*TM.getTargetLowering()) { 
-    // Keep PIC16 specific DAGISel to use during the lowering
-    PIC16Lowering.ISel = this;
-  }
+        TM(tm), PIC16Lowering(*TM.getTargetLowering()) {}
   
   // Pass Name
   virtual const char *getPassName() const {
diff --git a/lib/Target/PIC16/PIC16ISelLowering.cpp b/lib/Target/PIC16/PIC16ISelLowering.cpp
index d17abb9ed0a8..f479f4626fd7 100644
--- a/lib/Target/PIC16/PIC16ISelLowering.cpp
+++ b/lib/Target/PIC16/PIC16ISelLowering.cpp
@@ -16,6 +16,7 @@
 #include "PIC16ISelLowering.h"
 #include "PIC16TargetObjectFile.h"
 #include "PIC16TargetMachine.h"
+#include "PIC16MachineFunctionInfo.h"
 #include "llvm/DerivedTypes.h"
 #include "llvm/GlobalValue.h"
 #include "llvm/Function.h"
@@ -24,6 +25,7 @@
 #include "llvm/CodeGen/MachineFunction.h"
 #include "llvm/CodeGen/MachineInstrBuilder.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/SelectionDAGISel.h"
 #include "llvm/Support/ErrorHandling.h"
 
 
@@ -116,7 +118,7 @@ static const char *getIntrinsicName(unsigned opcode) {
   std::string Fullname = prefix + tagname + Basename; 
 
   // The name has to live through program life.
-  return createESName(Fullname);
+  return ESNames::createESName(Fullname);
 }
 
 // getStdLibCallName - Get the name for the standard library function.
@@ -139,12 +141,12 @@ static const char *getStdLibCallName(unsigned opcode) {
   std::string LibCallName = prefix + BaseName;
 
   // The name has to live through program life.
-  return createESName(LibCallName);
+  return ESNames::createESName(LibCallName);
 }
 
 // PIC16TargetLowering Constructor.
 PIC16TargetLowering::PIC16TargetLowering(PIC16TargetMachine &TM)
-  : TargetLowering(TM, new PIC16TargetObjectFile()), TmpSize(0) {
+  : TargetLowering(TM, new PIC16TargetObjectFile()) {
  
   Subtarget = &TM.getSubtarget<PIC16Subtarget>();
 
@@ -321,18 +323,29 @@ static SDValue getOutFlag(SDValue &Op) {
   return Flag;
 }
 // Get the TmpOffset for FrameIndex
-unsigned PIC16TargetLowering::GetTmpOffsetForFI(unsigned FI, unsigned size) {
+unsigned PIC16TargetLowering::GetTmpOffsetForFI(unsigned FI, unsigned size,
+                                                MachineFunction &MF) const {
+  PIC16MachineFunctionInfo *FuncInfo = MF.getInfo<PIC16MachineFunctionInfo>();
+  std::map<unsigned, unsigned> &FiTmpOffsetMap = FuncInfo->getFiTmpOffsetMap();
+
   std::map<unsigned, unsigned>::iterator 
             MapIt = FiTmpOffsetMap.find(FI);
   if (MapIt != FiTmpOffsetMap.end())
       return MapIt->second;
 
   // This FI (FrameIndex) is not yet mapped, so map it
-  FiTmpOffsetMap[FI] = TmpSize; 
-  TmpSize += size;
+  FiTmpOffsetMap[FI] = FuncInfo->getTmpSize(); 
+  FuncInfo->setTmpSize(FuncInfo->getTmpSize() + size);
   return FiTmpOffsetMap[FI];
 }
 
+void PIC16TargetLowering::ResetTmpOffsetMap(SelectionDAG &DAG) const {
+  MachineFunction &MF = DAG.getMachineFunction();
+  PIC16MachineFunctionInfo *FuncInfo = MF.getInfo<PIC16MachineFunctionInfo>();
+  FuncInfo->getFiTmpOffsetMap().clear();
+  FuncInfo->setTmpSize(0);
+}
+
 // To extract chain value from the SDValue Nodes
 // This function will help to maintain the chain extracting
 // code at one place. In case of any change in future it will
@@ -390,7 +403,7 @@ PIC16TargetLowering::setPIC16LibcallName(PIC16ISD::PIC16Libcall Call,
 }
 
 const char *
-PIC16TargetLowering::getPIC16LibcallName(PIC16ISD::PIC16Libcall Call) {
+PIC16TargetLowering::getPIC16LibcallName(PIC16ISD::PIC16Libcall Call) const {
   return PIC16LibcallNames[Call];
 }
 
@@ -398,7 +411,7 @@ SDValue
 PIC16TargetLowering::MakePIC16Libcall(PIC16ISD::PIC16Libcall Call,
                                       EVT RetVT, const SDValue *Ops,
                                       unsigned NumOps, bool isSigned,
-                                      SelectionDAG &DAG, DebugLoc dl) {
+                                      SelectionDAG &DAG, DebugLoc dl) const {
 
   TargetLowering::ArgListTy Args;
   Args.reserve(NumOps);
@@ -456,7 +469,7 @@ const char *PIC16TargetLowering::getTargetNodeName(unsigned Opcode) const {
 
 void PIC16TargetLowering::ReplaceNodeResults(SDNode *N,
                                              SmallVectorImpl<SDValue>&Results,
-                                             SelectionDAG &DAG) {
+                                             SelectionDAG &DAG) const {
 
   switch (N->getOpcode()) {
     case ISD::GlobalAddress:
@@ -483,7 +496,8 @@ void PIC16TargetLowering::ReplaceNodeResults(SDNode *N,
   }
 }
 
-SDValue PIC16TargetLowering::ExpandFrameIndex(SDNode *N, SelectionDAG &DAG) {
+SDValue PIC16TargetLowering::ExpandFrameIndex(SDNode *N,
+                                              SelectionDAG &DAG) const {
 
   // Currently handling FrameIndex of size MVT::i16 only
   // One example of this scenario is when return value is written on
@@ -518,7 +532,7 @@ SDValue PIC16TargetLowering::ExpandFrameIndex(SDNode *N, SelectionDAG &DAG) {
 }
 
 
-SDValue PIC16TargetLowering::ExpandStore(SDNode *N, SelectionDAG &DAG) { 
+SDValue PIC16TargetLowering::ExpandStore(SDNode *N, SelectionDAG &DAG) const { 
   StoreSDNode *St = cast<StoreSDNode>(N);
   SDValue Chain = St->getChain();
   SDValue Src = St->getValue();
@@ -636,8 +650,9 @@ SDValue PIC16TargetLowering::ExpandStore(SDNode *N, SelectionDAG &DAG) {
   }
 }
 
-SDValue PIC16TargetLowering::ExpandExternalSymbol(SDNode *N, SelectionDAG &DAG)
-{
+SDValue PIC16TargetLowering::ExpandExternalSymbol(SDNode *N,
+                                                  SelectionDAG &DAG)
+ const {
   ExternalSymbolSDNode *ES = dyn_cast<ExternalSymbolSDNode>(SDValue(N, 0));
   // FIXME there isn't really debug info here
   DebugLoc dl = ES->getDebugLoc();
@@ -651,7 +666,8 @@ SDValue PIC16TargetLowering::ExpandExternalSymbol(SDNode *N, SelectionDAG &DAG)
 }
 
 // ExpandGlobalAddress - 
-SDValue PIC16TargetLowering::ExpandGlobalAddress(SDNode *N, SelectionDAG &DAG) {
+SDValue PIC16TargetLowering::ExpandGlobalAddress(SDNode *N,
+                                                 SelectionDAG &DAG) const {
   GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(SDValue(N, 0));
   // FIXME there isn't really debug info here
   DebugLoc dl = G->getDebugLoc();
@@ -666,7 +682,7 @@ SDValue PIC16TargetLowering::ExpandGlobalAddress(SDNode *N, SelectionDAG &DAG) {
   return DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i16, Lo, Hi);
 }
 
-bool PIC16TargetLowering::isDirectAddress(const SDValue &Op) {
+bool PIC16TargetLowering::isDirectAddress(const SDValue &Op) const {
   assert (Op.getNode() != NULL && "Can't operate on NULL SDNode!!");
 
   if (Op.getOpcode() == ISD::BUILD_PAIR) {
@@ -677,7 +693,7 @@ bool PIC16TargetLowering::isDirectAddress(const SDValue &Op) {
 }
 
 // Return true if DirectAddress is in ROM_SPACE
-bool PIC16TargetLowering::isRomAddress(const SDValue &Op) {
+bool PIC16TargetLowering::isRomAddress(const SDValue &Op) const {
 
   // RomAddress is a GlobalAddress in ROM_SPACE_
   // If the Op is not a GlobalAddress return NULL without checking
@@ -703,7 +719,7 @@ bool PIC16TargetLowering::isRomAddress(const SDValue &Op) {
 // parts of Op in Lo and Hi. 
 
 void PIC16TargetLowering::GetExpandedParts(SDValue Op, SelectionDAG &DAG,
-                                           SDValue &Lo, SDValue &Hi) {  
+                                           SDValue &Lo, SDValue &Hi) const {
   SDNode *N = Op.getNode();
   DebugLoc dl = N->getDebugLoc();
   EVT NewVT = getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
@@ -720,11 +736,12 @@ void PIC16TargetLowering::GetExpandedParts(SDValue Op, SelectionDAG &DAG,
 // Legalize FrameIndex into ExternalSymbol and offset.
 void 
 PIC16TargetLowering::LegalizeFrameIndex(SDValue Op, SelectionDAG &DAG,
-                                        SDValue &ES, int &Offset) {
+                                        SDValue &ES, int &Offset) const {
 
   MachineFunction &MF = DAG.getMachineFunction();
   const Function *Func = MF.getFunction();
   MachineFrameInfo *MFI = MF.getFrameInfo();
+  PIC16MachineFunctionInfo *FuncInfo = MF.getInfo<PIC16MachineFunctionInfo>();
   const std::string Name = Func->getName();
 
   FrameIndexSDNode *FR = dyn_cast<FrameIndexSDNode>(Op);
@@ -736,8 +753,8 @@ PIC16TargetLowering::LegalizeFrameIndex(SDValue Op, SelectionDAG &DAG,
   // the list and add their requested size.
   unsigned FIndex = FR->getIndex();
   const char *tmpName;
-  if (FIndex < ReservedFrameCount) {
-    tmpName = createESName(PAN::getFrameLabel(Name));
+  if (FIndex < FuncInfo->getReservedFrameCount()) {
+    tmpName = ESNames::createESName(PAN::getFrameLabel(Name));
     ES = DAG.getTargetExternalSymbol(tmpName, MVT::i8);
     Offset = 0;
     for (unsigned i=0; i<FIndex ; ++i) {
@@ -745,9 +762,9 @@ PIC16TargetLowering::LegalizeFrameIndex(SDValue Op, SelectionDAG &DAG,
     }
   } else {
    // FrameIndex has been made for some temporary storage 
-    tmpName = createESName(PAN::getTempdataLabel(Name));
+    tmpName = ESNames::createESName(PAN::getTempdataLabel(Name));
     ES = DAG.getTargetExternalSymbol(tmpName, MVT::i8);
-    Offset = GetTmpOffsetForFI(FIndex, MFI->getObjectSize(FIndex));
+    Offset = GetTmpOffsetForFI(FIndex, MFI->getObjectSize(FIndex), MF);
   }
 
   return;
@@ -767,7 +784,7 @@ PIC16TargetLowering::LegalizeFrameIndex(SDValue Op, SelectionDAG &DAG,
 
 void PIC16TargetLowering::LegalizeAddress(SDValue Ptr, SelectionDAG &DAG, 
                                           SDValue &Lo, SDValue &Hi,
-                                          unsigned &Offset, DebugLoc dl) {
+                                          unsigned &Offset, DebugLoc dl) const {
 
   // Offset, by default, should be 0
   Offset = 0;
@@ -846,7 +863,7 @@ void PIC16TargetLowering::LegalizeAddress(SDValue Ptr, SelectionDAG &DAG,
   return;
 }
 
-SDValue PIC16TargetLowering::ExpandLoad(SDNode *N, SelectionDAG &DAG) {
+SDValue PIC16TargetLowering::ExpandLoad(SDNode *N, SelectionDAG &DAG) const {
   LoadSDNode *LD = dyn_cast<LoadSDNode>(SDValue(N, 0));
   SDValue Chain = LD->getChain();
   SDValue Ptr = LD->getBasePtr();
@@ -961,7 +978,7 @@ SDValue PIC16TargetLowering::ExpandLoad(SDNode *N, SelectionDAG &DAG) {
   return DAG.getNode(ISD::MERGE_VALUES, dl, Tys, BP, Chain);
 }
 
-SDValue PIC16TargetLowering::LowerShift(SDValue Op, SelectionDAG &DAG) {
+SDValue PIC16TargetLowering::LowerShift(SDValue Op, SelectionDAG &DAG) const {
   // We should have handled larger operands in type legalizer itself.
   assert (Op.getValueType() == MVT::i8 && "illegal shift to lower");
  
@@ -991,7 +1008,7 @@ SDValue PIC16TargetLowering::LowerShift(SDValue Op, SelectionDAG &DAG) {
   return Call;
 }
 
-SDValue PIC16TargetLowering::LowerMUL(SDValue Op, SelectionDAG &DAG) {
+SDValue PIC16TargetLowering::LowerMUL(SDValue Op, SelectionDAG &DAG) const {
   // We should have handled larger operands in type legalizer itself.
   assert (Op.getValueType() == MVT::i8 && "illegal multiply to lower");
 
@@ -1007,7 +1024,7 @@ SDValue PIC16TargetLowering::LowerMUL(SDValue Op, SelectionDAG &DAG) {
 void
 PIC16TargetLowering::LowerOperationWrapper(SDNode *N,
                                            SmallVectorImpl<SDValue>&Results,
-                                           SelectionDAG &DAG) {
+                                           SelectionDAG &DAG) const {
   SDValue Op = SDValue(N, 0);
   SDValue Res;
   unsigned i;
@@ -1031,7 +1048,8 @@ PIC16TargetLowering::LowerOperationWrapper(SDNode *N,
   }
 }
 
-SDValue PIC16TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) {
+SDValue PIC16TargetLowering::LowerOperation(SDValue Op,
+                                            SelectionDAG &DAG) const {
   switch (Op.getOpcode()) {
     case ISD::ADD:
     case ISD::ADDC:
@@ -1065,7 +1083,7 @@ SDValue PIC16TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) {
 
 SDValue PIC16TargetLowering::ConvertToMemOperand(SDValue Op,
                                                  SelectionDAG &DAG,
-                                                 DebugLoc dl) {
+                                                 DebugLoc dl) const {
   assert (Op.getValueType() == MVT::i8 
           && "illegal value type to store on stack.");
 
@@ -1077,7 +1095,7 @@ SDValue PIC16TargetLowering::ConvertToMemOperand(SDValue Op,
   // Put the value on stack.
   // Get a stack slot index and convert to es.
   int FI = MF.getFrameInfo()->CreateStackObject(1, 1, false);
-  const char *tmpName = createESName(PAN::getTempdataLabel(FuncName));
+  const char *tmpName = ESNames::createESName(PAN::getTempdataLabel(FuncName));
   SDValue ES = DAG.getTargetExternalSymbol(tmpName, MVT::i8);
 
   // Store the value to ES.
@@ -1085,14 +1103,14 @@ SDValue PIC16TargetLowering::ConvertToMemOperand(SDValue Op,
                                DAG.getEntryNode(),
                                Op, ES, 
                                DAG.getConstant (1, MVT::i8), // Banksel.
-                               DAG.getConstant (GetTmpOffsetForFI(FI, 1), 
+                               DAG.getConstant (GetTmpOffsetForFI(FI, 1, MF), 
                                                 MVT::i8));
 
   // Load the value from ES.
   SDVTList Tys = DAG.getVTList(MVT::i8, MVT::Other);
   SDValue Load = DAG.getNode(PIC16ISD::PIC16Load, dl, Tys, Store,
                              ES, DAG.getConstant (1, MVT::i8),
-                             DAG.getConstant (GetTmpOffsetForFI(FI, 1), 
+                             DAG.getConstant (GetTmpOffsetForFI(FI, 1, MF), 
                              MVT::i8));
     
   return Load.getValue(0);
@@ -1103,7 +1121,7 @@ LowerIndirectCallArguments(SDValue Chain, SDValue InFlag,
                            SDValue DataAddr_Lo, SDValue DataAddr_Hi,
                            const SmallVectorImpl<ISD::OutputArg> &Outs,
                            const SmallVectorImpl<ISD::InputArg> &Ins,
-                           DebugLoc dl, SelectionDAG &DAG) {
+                           DebugLoc dl, SelectionDAG &DAG) const {
   unsigned NumOps = Outs.size();
 
   // If call has no arguments then do nothing and return.
@@ -1140,7 +1158,7 @@ LowerIndirectCallArguments(SDValue Chain, SDValue InFlag,
 SDValue PIC16TargetLowering::
 LowerDirectCallArguments(SDValue ArgLabel, SDValue Chain, SDValue InFlag,
                          const SmallVectorImpl<ISD::OutputArg> &Outs,
-                         DebugLoc dl, SelectionDAG &DAG) {
+                         DebugLoc dl, SelectionDAG &DAG) const {
   unsigned NumOps = Outs.size();
   std::string Name;
   SDValue Arg, StoreAt;
@@ -1197,7 +1215,7 @@ LowerIndirectCallReturn(SDValue Chain, SDValue InFlag,
                         SDValue DataAddr_Lo, SDValue DataAddr_Hi,
                         const SmallVectorImpl<ISD::InputArg> &Ins,
                         DebugLoc dl, SelectionDAG &DAG,
-                        SmallVectorImpl<SDValue> &InVals) {
+                        SmallVectorImpl<SDValue> &InVals) const {
   unsigned RetVals = Ins.size();
 
   // If call does not have anything to return
@@ -1224,7 +1242,7 @@ SDValue PIC16TargetLowering::
 LowerDirectCallReturn(SDValue RetLabel, SDValue Chain, SDValue InFlag,
                       const SmallVectorImpl<ISD::InputArg> &Ins,
                       DebugLoc dl, SelectionDAG &DAG,
-                      SmallVectorImpl<SDValue> &InVals) {
+                      SmallVectorImpl<SDValue> &InVals) const {
 
   // Currently handling primitive types only. They will come in
   // i8 parts
@@ -1264,7 +1282,7 @@ SDValue
 PIC16TargetLowering::LowerReturn(SDValue Chain,
                                  CallingConv::ID CallConv, bool isVarArg,
                                  const SmallVectorImpl<ISD::OutputArg> &Outs,
-                                 DebugLoc dl, SelectionDAG &DAG) {
+                                 DebugLoc dl, SelectionDAG &DAG) const {
 
   // Number of values to return 
   unsigned NumRet = Outs.size();
@@ -1275,7 +1293,7 @@ PIC16TargetLowering::LowerReturn(SDValue Chain,
   const Function *F = MF.getFunction();
   std::string FuncName = F->getName();
 
-  const char *tmpName = createESName(PAN::getFrameLabel(FuncName));
+  const char *tmpName = ESNames::createESName(PAN::getFrameLabel(FuncName));
   SDValue ES = DAG.getTargetExternalSymbol(tmpName, MVT::i8);
   SDValue BS = DAG.getConstant(1, MVT::i8);
   SDValue RetVal;
@@ -1292,7 +1310,7 @@ PIC16TargetLowering::LowerReturn(SDValue Chain,
 void PIC16TargetLowering::
 GetDataAddress(DebugLoc dl, SDValue Callee, SDValue &Chain, 
                SDValue &DataAddr_Lo, SDValue &DataAddr_Hi,
-               SelectionDAG &DAG) {
+               SelectionDAG &DAG) const {
    assert (Callee.getOpcode() == PIC16ISD::PIC16Connect
            && "Don't know what to do of such callee!!");
    SDValue ZeroOperand = DAG.getConstant(0, MVT::i8);
@@ -1358,7 +1376,7 @@ PIC16TargetLowering::LowerCall(SDValue Chain, SDValue Callee,
                                const SmallVectorImpl<ISD::OutputArg> &Outs,
                                const SmallVectorImpl<ISD::InputArg> &Ins,
                                DebugLoc dl, SelectionDAG &DAG,
-                               SmallVectorImpl<SDValue> &InVals) {
+                               SmallVectorImpl<SDValue> &InVals) const {
     // PIC16 target does not yet support tail call optimization.
     isTailCall = false;
 
@@ -1409,7 +1427,7 @@ PIC16TargetLowering::LowerCall(SDValue Chain, SDValue Callee,
     if (IsDirectCall) { 
        // Considering the GlobalAddressNode case here.
        if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
-          GlobalValue *GV = G->getGlobal();
+          const GlobalValue *GV = G->getGlobal();
           Callee = DAG.getTargetGlobalAddress(GV, MVT::i8);
           Name = G->getGlobal()->getName();
        } else {// Considering the ExternalSymbol case here
@@ -1419,11 +1437,11 @@ PIC16TargetLowering::LowerCall(SDValue Chain, SDValue Callee,
        }
 
        // Label for argument passing
-       const char *argFrame = createESName(PAN::getArgsLabel(Name));
+       const char *argFrame = ESNames::createESName(PAN::getArgsLabel(Name));
        ArgLabel = DAG.getTargetExternalSymbol(argFrame, MVT::i8);
 
        // Label for reading return value
-       const char *retName = createESName(PAN::getRetvalLabel(Name));
+       const char *retName = ESNames::createESName(PAN::getRetvalLabel(Name));
        RetLabel = DAG.getTargetExternalSymbol(retName, MVT::i8);
     } else {
        // if indirect call
@@ -1476,7 +1494,7 @@ PIC16TargetLowering::LowerCall(SDValue Chain, SDValue Callee,
                                      DataAddr_Hi, Ins, dl, DAG, InVals);
 }
 
-bool PIC16TargetLowering::isDirectLoad(const SDValue Op) {
+bool PIC16TargetLowering::isDirectLoad(const SDValue Op) const {
   if (Op.getOpcode() == PIC16ISD::PIC16Load)
     if (Op.getOperand(1).getOpcode() == ISD::TargetGlobalAddress
      || Op.getOperand(1).getOpcode() == ISD::TargetExternalSymbol)
@@ -1490,7 +1508,7 @@ bool PIC16TargetLowering::isDirectLoad(const SDValue Op) {
 // no instruction that can operation on two registers. Most insns take
 // one register and one memory operand (addwf) / Constant (addlw).
 bool PIC16TargetLowering::NeedToConvertToMemOp(SDValue Op, unsigned &MemOp, 
-                      SelectionDAG &DAG) {
+                      SelectionDAG &DAG) const {
   // If one of the operand is a constant, return false.
   if (Op.getOperand(0).getOpcode() == ISD::Constant ||
       Op.getOperand(1).getOpcode() == ISD::Constant)
@@ -1512,7 +1530,9 @@ bool PIC16TargetLowering::NeedToConvertToMemOp(SDValue Op, unsigned &MemOp,
       // Direct load operands are folded in binary operations. But before folding
       // verify if this folding is legal. Fold only if it is legal otherwise
       // convert this direct load to a separate memory operation.
-      if(ISel->IsLegalToFold(Op.getOperand(0), Op.getNode(), Op.getNode()))
+      if (SelectionDAGISel::IsLegalToFold(Op.getOperand(0),
+                                          Op.getNode(), Op.getNode(),
+                                          CodeGenOpt::Default))
         return false;
       else 
         MemOp = 0;
@@ -1539,7 +1559,9 @@ bool PIC16TargetLowering::NeedToConvertToMemOp(SDValue Op, unsigned &MemOp,
       // Direct load operands are folded in binary operations. But before folding
       // verify if this folding is legal. Fold only if it is legal otherwise
       // convert this direct load to a separate memory operation.
-      if(ISel->IsLegalToFold(Op.getOperand(1), Op.getNode(), Op.getNode()))
+      if (SelectionDAGISel::IsLegalToFold(Op.getOperand(1),
+                                          Op.getNode(), Op.getNode(),
+                                          CodeGenOpt::Default))
          return false;
       else 
          MemOp = 1; 
@@ -1550,7 +1572,7 @@ bool PIC16TargetLowering::NeedToConvertToMemOp(SDValue Op, unsigned &MemOp,
 
 // LowerBinOp - Lower a commutative binary operation that does not
 // affect status flag carry.
-SDValue PIC16TargetLowering::LowerBinOp(SDValue Op, SelectionDAG &DAG) {
+SDValue PIC16TargetLowering::LowerBinOp(SDValue Op, SelectionDAG &DAG) const {
   DebugLoc dl = Op.getDebugLoc();
 
   // We should have handled larger operands in type legalizer itself.
@@ -1571,7 +1593,7 @@ SDValue PIC16TargetLowering::LowerBinOp(SDValue Op, SelectionDAG &DAG) {
 
 // LowerADD - Lower all types of ADD operations including the ones
 // that affects carry.
-SDValue PIC16TargetLowering::LowerADD(SDValue Op, SelectionDAG &DAG) {
+SDValue PIC16TargetLowering::LowerADD(SDValue Op, SelectionDAG &DAG) const {
   // We should have handled larger operands in type legalizer itself.
   assert (Op.getValueType() == MVT::i8 && "illegal add to lower");
   DebugLoc dl = Op.getDebugLoc();
@@ -1600,7 +1622,7 @@ SDValue PIC16TargetLowering::LowerADD(SDValue Op, SelectionDAG &DAG) {
     return Op;
 }
 
-SDValue PIC16TargetLowering::LowerSUB(SDValue Op, SelectionDAG &DAG) {
+SDValue PIC16TargetLowering::LowerSUB(SDValue Op, SelectionDAG &DAG) const {
   DebugLoc dl = Op.getDebugLoc();
   // We should have handled larger operands in type legalizer itself.
   assert (Op.getValueType() == MVT::i8 && "illegal sub to lower");
@@ -1647,15 +1669,19 @@ SDValue PIC16TargetLowering::LowerSUB(SDValue Op, SelectionDAG &DAG) {
     return Op;
 }
 
-void PIC16TargetLowering::InitReservedFrameCount(const Function *F) {
+void PIC16TargetLowering::InitReservedFrameCount(const Function *F,
+                                                 SelectionDAG &DAG) const {
+  MachineFunction &MF = DAG.getMachineFunction();
+  PIC16MachineFunctionInfo *FuncInfo = MF.getInfo<PIC16MachineFunctionInfo>();
+
   unsigned NumArgs = F->arg_size();
 
   bool isVoidFunc = (F->getReturnType()->getTypeID() == Type::VoidTyID);
 
   if (isVoidFunc)
-    ReservedFrameCount = NumArgs;
+    FuncInfo->setReservedFrameCount(NumArgs);
   else
-    ReservedFrameCount = NumArgs + 1;
+    FuncInfo->setReservedFrameCount(NumArgs + 1);
 }
 
 // LowerFormalArguments - Argument values are loaded from the
@@ -1669,7 +1695,8 @@ PIC16TargetLowering::LowerFormalArguments(SDValue Chain,
                                       const SmallVectorImpl<ISD::InputArg> &Ins,
                                           DebugLoc dl,
                                           SelectionDAG &DAG,
-                                          SmallVectorImpl<SDValue> &InVals) {
+                                          SmallVectorImpl<SDValue> &InVals)
+                                            const {
   unsigned NumArgVals = Ins.size();
 
   // Get the callee's name to create the <fname>.args label to pass args.
@@ -1678,12 +1705,12 @@ PIC16TargetLowering::LowerFormalArguments(SDValue Chain,
   std::string FuncName = F->getName();
 
   // Reset the map of FI and TmpOffset 
-  ResetTmpOffsetMap();
+  ResetTmpOffsetMap(DAG);
   // Initialize the ReserveFrameCount
-  InitReservedFrameCount(F);
+  InitReservedFrameCount(F, DAG);
 
   // Create the <fname>.args external symbol.
-  const char *tmpName = createESName(PAN::getArgsLabel(FuncName));
+  const char *tmpName = ESNames::createESName(PAN::getArgsLabel(FuncName));
   SDValue ES = DAG.getTargetExternalSymbol(tmpName, MVT::i8);
 
   // Load arg values from the label + offset.
@@ -1782,7 +1809,7 @@ static void LookThroughSetCC(SDValue &LHS, SDValue &RHS,
 // Returns appropriate CMP insn and corresponding condition code in PIC16CC
 SDValue PIC16TargetLowering::getPIC16Cmp(SDValue LHS, SDValue RHS, 
                                          unsigned CC, SDValue &PIC16CC, 
-                                         SelectionDAG &DAG, DebugLoc dl) {
+                                         SelectionDAG &DAG, DebugLoc dl) const {
   PIC16CC::CondCodes CondCode = (PIC16CC::CondCodes) CC;
 
   // PIC16 sub is literal - W. So Swap the operands and condition if needed.
@@ -1846,7 +1873,8 @@ SDValue PIC16TargetLowering::getPIC16Cmp(SDValue LHS, SDValue RHS,
 }
 
 
-SDValue PIC16TargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) {
+SDValue PIC16TargetLowering::LowerSELECT_CC(SDValue Op,
+                                            SelectionDAG &DAG) const {
   SDValue LHS = Op.getOperand(0);
   SDValue RHS = Op.getOperand(1);
   ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(4))->get();
@@ -1874,8 +1902,7 @@ SDValue PIC16TargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) {
 
 MachineBasicBlock *
 PIC16TargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
-                                                 MachineBasicBlock *BB,
-                   DenseMap<MachineBasicBlock*, MachineBasicBlock*> *EM) const {
+                                                 MachineBasicBlock *BB) const {
   const TargetInstrInfo &TII = *getTargetMachine().getInstrInfo();
   unsigned CC = (PIC16CC::CondCodes)MI->getOperand(3).getImm();
   DebugLoc dl = MI->getDebugLoc();
@@ -1903,12 +1930,9 @@ PIC16TargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
 
   // Update machine-CFG edges by first adding all successors of the current
   // block to the new block which will contain the Phi node for the select.
-  // Also inform sdisel of the edge changes.
   for (MachineBasicBlock::succ_iterator I = BB->succ_begin(), 
-         E = BB->succ_end(); I != E; ++I) {
-    EM->insert(std::make_pair(*I, sinkMBB));
+         E = BB->succ_end(); I != E; ++I)
     sinkMBB->addSuccessor(*I);
-  }
   // Next, remove all successors of the current block, and add the true
   // and fallthrough blocks as its successors.
   while (!BB->succ_empty())
@@ -1938,7 +1962,7 @@ PIC16TargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
 }
 
 
-SDValue PIC16TargetLowering::LowerBR_CC(SDValue Op, SelectionDAG &DAG) {
+SDValue PIC16TargetLowering::LowerBR_CC(SDValue Op, SelectionDAG &DAG) const {
   SDValue Chain = Op.getOperand(0);
   ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(1))->get();
   SDValue LHS = Op.getOperand(2);   // LHS of the condition.
diff --git a/lib/Target/PIC16/PIC16ISelLowering.h b/lib/Target/PIC16/PIC16ISelLowering.h
index de1452015f60..eea17f898365 100644
--- a/lib/Target/PIC16/PIC16ISelLowering.h
+++ b/lib/Target/PIC16/PIC16ISelLowering.h
@@ -18,7 +18,6 @@
 #include "PIC16.h"
 #include "PIC16Subtarget.h"
 #include "llvm/CodeGen/SelectionDAG.h"
-#include "llvm/CodeGen/SelectionDAGISel.h"
 #include "llvm/Target/TargetLowering.h"
 #include <map>
 
@@ -85,53 +84,52 @@ namespace llvm {
     /// getSetCCResultType - Return the ISD::SETCC ValueType
     virtual MVT::SimpleValueType getSetCCResultType(EVT ValType) const;
     virtual MVT::SimpleValueType getCmpLibcallReturnType() const;
-    SDValue LowerShift(SDValue Op, SelectionDAG &DAG);
-    SDValue LowerMUL(SDValue Op, SelectionDAG &DAG);
-    SDValue LowerADD(SDValue Op, SelectionDAG &DAG);
-    SDValue LowerSUB(SDValue Op, SelectionDAG &DAG);
-    SDValue LowerBinOp(SDValue Op, SelectionDAG &DAG);
+    SDValue LowerShift(SDValue Op, SelectionDAG &DAG) const;
+    SDValue LowerMUL(SDValue Op, SelectionDAG &DAG) const;
+    SDValue LowerADD(SDValue Op, SelectionDAG &DAG) const;
+    SDValue LowerSUB(SDValue Op, SelectionDAG &DAG) const;
+    SDValue LowerBinOp(SDValue Op, SelectionDAG &DAG) const;
     // Call returns
     SDValue 
     LowerDirectCallReturn(SDValue RetLabel, SDValue Chain, SDValue InFlag,
                           const SmallVectorImpl<ISD::InputArg> &Ins,
                           DebugLoc dl, SelectionDAG &DAG,
-                          SmallVectorImpl<SDValue> &InVals);
+                          SmallVectorImpl<SDValue> &InVals) const;
     SDValue 
     LowerIndirectCallReturn(SDValue Chain, SDValue InFlag,
                              SDValue DataAddr_Lo, SDValue DataAddr_Hi,
                             const SmallVectorImpl<ISD::InputArg> &Ins,
                             DebugLoc dl, SelectionDAG &DAG,
-                            SmallVectorImpl<SDValue> &InVals);
+                            SmallVectorImpl<SDValue> &InVals) const;
 
     // Call arguments
     SDValue 
     LowerDirectCallArguments(SDValue ArgLabel, SDValue Chain, SDValue InFlag,
                              const SmallVectorImpl<ISD::OutputArg> &Outs,
-                             DebugLoc dl, SelectionDAG &DAG);
+                             DebugLoc dl, SelectionDAG &DAG) const;
 
     SDValue 
     LowerIndirectCallArguments(SDValue Chain, SDValue InFlag,
                                SDValue DataAddr_Lo, SDValue DataAddr_Hi, 
                                const SmallVectorImpl<ISD::OutputArg> &Outs,
                                const SmallVectorImpl<ISD::InputArg> &Ins,
-                               DebugLoc dl, SelectionDAG &DAG);
+                               DebugLoc dl, SelectionDAG &DAG) const;
 
-    SDValue LowerBR_CC(SDValue Op, SelectionDAG &DAG);
-    SDValue LowerSELECT_CC(SDValue Op, SelectionDAG &DAG);
+    SDValue LowerBR_CC(SDValue Op, SelectionDAG &DAG) const;
+    SDValue LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const;
     SDValue getPIC16Cmp(SDValue LHS, SDValue RHS, unsigned OrigCC, SDValue &CC,
-                        SelectionDAG &DAG, DebugLoc dl);
-    virtual MachineBasicBlock *EmitInstrWithCustomInserter(MachineInstr *MI,
-                                                         MachineBasicBlock *MBB,
-                    DenseMap<MachineBasicBlock*, MachineBasicBlock*> *EM) const;
+                        SelectionDAG &DAG, DebugLoc dl) const;
+    virtual MachineBasicBlock *
+      EmitInstrWithCustomInserter(MachineInstr *MI,
+                                  MachineBasicBlock *MBB) const;
 
-
-    virtual SDValue LowerOperation(SDValue Op, SelectionDAG &DAG);
+    virtual SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const;
     virtual void ReplaceNodeResults(SDNode *N,
                                     SmallVectorImpl<SDValue> &Results,
-                                    SelectionDAG &DAG);
+                                    SelectionDAG &DAG) const;
     virtual void LowerOperationWrapper(SDNode *N,
                                        SmallVectorImpl<SDValue> &Results,
-                                       SelectionDAG &DAG);
+                                       SelectionDAG &DAG) const;
 
     virtual SDValue
     LowerFormalArguments(SDValue Chain,
@@ -139,7 +137,7 @@ namespace llvm {
                          bool isVarArg,
                          const SmallVectorImpl<ISD::InputArg> &Ins,
                          DebugLoc dl, SelectionDAG &DAG,
-                         SmallVectorImpl<SDValue> &InVals);
+                         SmallVectorImpl<SDValue> &InVals) const;
 
     virtual SDValue
       LowerCall(SDValue Chain, SDValue Callee,
@@ -147,19 +145,19 @@ namespace llvm {
                 const SmallVectorImpl<ISD::OutputArg> &Outs,
                 const SmallVectorImpl<ISD::InputArg> &Ins,
                 DebugLoc dl, SelectionDAG &DAG,
-                SmallVectorImpl<SDValue> &InVals);
+                SmallVectorImpl<SDValue> &InVals) const;
 
     virtual SDValue
       LowerReturn(SDValue Chain,
                   CallingConv::ID CallConv, bool isVarArg,
                   const SmallVectorImpl<ISD::OutputArg> &Outs,
-                  DebugLoc dl, SelectionDAG &DAG);
+                  DebugLoc dl, SelectionDAG &DAG) const;
 
-    SDValue ExpandStore(SDNode *N, SelectionDAG &DAG);
-    SDValue ExpandLoad(SDNode *N, SelectionDAG &DAG);
-    SDValue ExpandGlobalAddress(SDNode *N, SelectionDAG &DAG);
-    SDValue ExpandExternalSymbol(SDNode *N, SelectionDAG &DAG);
-    SDValue ExpandFrameIndex(SDNode *N, SelectionDAG &DAG);
+    SDValue ExpandStore(SDNode *N, SelectionDAG &DAG) const;
+    SDValue ExpandLoad(SDNode *N, SelectionDAG &DAG) const;
+    SDValue ExpandGlobalAddress(SDNode *N, SelectionDAG &DAG) const;
+    SDValue ExpandExternalSymbol(SDNode *N, SelectionDAG &DAG) const;
+    SDValue ExpandFrameIndex(SDNode *N, SelectionDAG &DAG) const;
 
     SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const; 
     SDValue PerformPIC16LoadCombine(SDNode *N, DAGCombinerInfo &DCI) const; 
@@ -168,13 +166,11 @@ namespace llvm {
     // This function returns the Tmp Offset for FrameIndex. If any TmpOffset 
     // already exists for the FI then it returns the same else it creates the 
     // new offset and returns.
-    unsigned GetTmpOffsetForFI(unsigned FI, unsigned slot_size); 
-    void ResetTmpOffsetMap() { FiTmpOffsetMap.clear(); SetTmpSize(0); }
-    void InitReservedFrameCount(const Function *F); 
-
-    // Return the size of Tmp variable 
-    unsigned GetTmpSize() { return TmpSize; }
-    void SetTmpSize(unsigned Size) { TmpSize = Size; }
+    unsigned GetTmpOffsetForFI(unsigned FI, unsigned slot_size,
+                               MachineFunction &MF) const;
+    void ResetTmpOffsetMap(SelectionDAG &DAG) const;
+    void InitReservedFrameCount(const Function *F,
+                                SelectionDAG &DAG) const;
 
     /// getFunctionAlignment - Return the Log2 alignment of this function.
     virtual unsigned getFunctionAlignment(const Function *) const {
@@ -184,43 +180,45 @@ namespace llvm {
   private:
     // If the Node is a BUILD_PAIR representing a direct Address,
     // then this function will return true.
-    bool isDirectAddress(const SDValue &Op);
+    bool isDirectAddress(const SDValue &Op) const;
 
     // If the Node is a DirectAddress in ROM_SPACE then this 
     // function will return true
-    bool isRomAddress(const SDValue &Op);
+    bool isRomAddress(const SDValue &Op) const;
 
     // Extract the Lo and Hi component of Op. 
     void GetExpandedParts(SDValue Op, SelectionDAG &DAG, SDValue &Lo, 
-                          SDValue &Hi); 
+                          SDValue &Hi) const;
 
 
     // Load pointer can be a direct or indirect address. In PIC16 direct
     // addresses need Banksel and Indirect addresses need to be loaded to
     // FSR first. Handle address specific cases here.
     void LegalizeAddress(SDValue Ptr, SelectionDAG &DAG, SDValue &Chain, 
-                         SDValue &NewPtr, unsigned &Offset, DebugLoc dl);
+                         SDValue &NewPtr, unsigned &Offset, DebugLoc dl) const;
 
     // FrameIndex should be broken down into ExternalSymbol and FrameOffset. 
     void LegalizeFrameIndex(SDValue Op, SelectionDAG &DAG, SDValue &ES, 
-                            int &Offset);
+                            int &Offset) const;
 
     // For indirect calls data address of the callee frame need to be
     // extracted. This function fills the arguments DataAddr_Lo and 
     // DataAddr_Hi with the address of the callee frame.
     void GetDataAddress(DebugLoc dl, SDValue Callee, SDValue &Chain,
                         SDValue &DataAddr_Lo, SDValue &DataAddr_Hi,
-                        SelectionDAG &DAG); 
+                        SelectionDAG &DAG) const;
 
     // We can not have both operands of a binary operation in W.
     // This function is used to put one operand on stack and generate a load.
-    SDValue ConvertToMemOperand(SDValue Op, SelectionDAG &DAG, DebugLoc dl); 
+    SDValue ConvertToMemOperand(SDValue Op, SelectionDAG &DAG,
+                                DebugLoc dl) const; 
 
     // This function checks if we need to put an operand of an operation on
     // stack and generate a load or not.
     // DAG parameter is required to access DAG information during
     // analysis.
-    bool NeedToConvertToMemOp(SDValue Op, unsigned &MemOp, SelectionDAG &DAG); 
+    bool NeedToConvertToMemOp(SDValue Op, unsigned &MemOp,
+                              SelectionDAG &DAG) const;
 
     /// Subtarget - Keep a pointer to the PIC16Subtarget around so that we can
     /// make the right decision when generating code for different targets.
@@ -233,31 +231,15 @@ namespace llvm {
 
     // To set and retrieve the lib call names.
     void setPIC16LibcallName(PIC16ISD::PIC16Libcall Call, const char *Name);
-    const char *getPIC16LibcallName(PIC16ISD::PIC16Libcall Call);
+    const char *getPIC16LibcallName(PIC16ISD::PIC16Libcall Call) const;
 
     // Make PIC16 Libcall.
     SDValue MakePIC16Libcall(PIC16ISD::PIC16Libcall Call, EVT RetVT, 
                              const SDValue *Ops, unsigned NumOps, bool isSigned,
-                             SelectionDAG &DAG, DebugLoc dl);
+                             SelectionDAG &DAG, DebugLoc dl) const;
 
     // Check if operation has a direct load operand.
-    inline bool isDirectLoad(const SDValue Op);
-
-  public:
-    // Keep a pointer to SelectionDAGISel to access its public 
-    // interface (It is required during legalization)
-    SelectionDAGISel   *ISel;
-
-  private:
-    // The frameindexes generated for spill/reload are stack based.
-    // This maps maintain zero based indexes for these FIs.
-    std::map<unsigned, unsigned> FiTmpOffsetMap;
-    unsigned TmpSize;
-
-    // These are the frames for return value and argument passing 
-    // These FrameIndices will be expanded to foo.frame external symbol
-    // and all others will be expanded to foo.tmp external symbol.
-    unsigned ReservedFrameCount; 
+    inline bool isDirectLoad(const SDValue Op) const;
   };
 } // namespace llvm
 
diff --git a/lib/Target/PIC16/PIC16InstrInfo.cpp b/lib/Target/PIC16/PIC16InstrInfo.cpp
index 365e8b20b7a1..9e415e069a9c 100644
--- a/lib/Target/PIC16/PIC16InstrInfo.cpp
+++ b/lib/Target/PIC16/PIC16InstrInfo.cpp
@@ -71,14 +71,14 @@ void PIC16InstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB,
                                          MachineBasicBlock::iterator I,
                                          unsigned SrcReg, bool isKill, int FI,
                                          const TargetRegisterClass *RC) const {
-  PIC16TargetLowering *PTLI = TM.getTargetLowering();
+  const PIC16TargetLowering *PTLI = TM.getTargetLowering();
   DebugLoc DL;
   if (I != MBB.end()) DL = I->getDebugLoc();
 
   const Function *Func = MBB.getParent()->getFunction();
   const std::string FuncName = Func->getName();
 
-  const char *tmpName = createESName(PAN::getTempdataLabel(FuncName));
+  const char *tmpName = ESNames::createESName(PAN::getTempdataLabel(FuncName));
 
   // On the order of operands here: think "movwf SrcReg, tmp_slot, offset".
   if (RC == PIC16::GPRRegisterClass) {
@@ -86,7 +86,7 @@ void PIC16InstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB,
     //MachineRegisterInfo &RI = MF.getRegInfo();
     BuildMI(MBB, I, DL, get(PIC16::movwf))
       .addReg(SrcReg, getKillRegState(isKill))
-      .addImm(PTLI->GetTmpOffsetForFI(FI, 1))
+      .addImm(PTLI->GetTmpOffsetForFI(FI, 1, *MBB.getParent()))
       .addExternalSymbol(tmpName)
       .addImm(1); // Emit banksel for it.
   }
@@ -101,7 +101,7 @@ void PIC16InstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB,
                                                  : PIC16::save_fsr1;
     BuildMI(MBB, I, DL, get(opcode))
       .addReg(SrcReg, getKillRegState(isKill))
-      .addImm(PTLI->GetTmpOffsetForFI(FI, 3))
+      .addImm(PTLI->GetTmpOffsetForFI(FI, 3, *MBB.getParent()))
       .addExternalSymbol(tmpName)
       .addImm(1); // Emit banksel for it.
   }
@@ -113,21 +113,21 @@ void PIC16InstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB,
                                           MachineBasicBlock::iterator I,
                                           unsigned DestReg, int FI,
                                           const TargetRegisterClass *RC) const {
-  PIC16TargetLowering *PTLI = TM.getTargetLowering();
+  const PIC16TargetLowering *PTLI = TM.getTargetLowering();
   DebugLoc DL;
   if (I != MBB.end()) DL = I->getDebugLoc();
 
   const Function *Func = MBB.getParent()->getFunction();
   const std::string FuncName = Func->getName();
 
-  const char *tmpName = createESName(PAN::getTempdataLabel(FuncName));
+  const char *tmpName = ESNames::createESName(PAN::getTempdataLabel(FuncName));
 
   // On the order of operands here: think "movf FrameIndex, W".
   if (RC == PIC16::GPRRegisterClass) {
     //MachineFunction &MF = *MBB.getParent();
     //MachineRegisterInfo &RI = MF.getRegInfo();
     BuildMI(MBB, I, DL, get(PIC16::movf), DestReg)
-      .addImm(PTLI->GetTmpOffsetForFI(FI, 1))
+      .addImm(PTLI->GetTmpOffsetForFI(FI, 1, *MBB.getParent()))
       .addExternalSymbol(tmpName)
       .addImm(1); // Emit banksel for it.
   }
@@ -141,7 +141,7 @@ void PIC16InstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB,
     unsigned opcode = (DestReg == PIC16::FSR0) ? PIC16::restore_fsr0 
                                                  : PIC16::restore_fsr1;
     BuildMI(MBB, I, DL, get(opcode), DestReg)
-      .addImm(PTLI->GetTmpOffsetForFI(FI, 3))
+      .addImm(PTLI->GetTmpOffsetForFI(FI, 3, *MBB.getParent()))
       .addExternalSymbol(tmpName)
       .addImm(1); // Emit banksel for it.
   }
diff --git a/lib/Target/PIC16/PIC16MachineFunctionInfo.h b/lib/Target/PIC16/PIC16MachineFunctionInfo.h
new file mode 100644
index 000000000000..bdf50867f2e1
--- /dev/null
+++ b/lib/Target/PIC16/PIC16MachineFunctionInfo.h
@@ -0,0 +1,52 @@
+//====- PIC16MachineFuctionInfo.h - PIC16 machine function info -*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file declares PIC16-specific per-machine-function information.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef PIC16MACHINEFUNCTIONINFO_H
+#define PIC16MACHINEFUNCTIONINFO_H
+
+#include "llvm/CodeGen/MachineFunction.h"
+
+namespace llvm {
+
+/// PIC16MachineFunctionInfo - This class is derived from MachineFunction
+/// private PIC16 target-specific information for each MachineFunction.
+class PIC16MachineFunctionInfo : public MachineFunctionInfo {
+  // The frameindexes generated for spill/reload are stack based.
+  // This maps maintain zero based indexes for these FIs.
+  std::map<unsigned, unsigned> FiTmpOffsetMap;
+  unsigned TmpSize;
+
+  // These are the frames for return value and argument passing 
+  // These FrameIndices will be expanded to foo.frame external symbol
+  // and all others will be expanded to foo.tmp external symbol.
+  unsigned ReservedFrameCount;
+
+public:
+  PIC16MachineFunctionInfo()
+    : TmpSize(0), ReservedFrameCount(0) {}
+
+  explicit PIC16MachineFunctionInfo(MachineFunction &MF)
+    : TmpSize(0), ReservedFrameCount(0) {}
+
+  std::map<unsigned, unsigned> &getFiTmpOffsetMap() { return FiTmpOffsetMap; }
+
+  unsigned getTmpSize() const { return TmpSize; }
+  void setTmpSize(unsigned Size) { TmpSize = Size; }
+
+  unsigned getReservedFrameCount() const { return ReservedFrameCount; }
+  void setReservedFrameCount(unsigned Count) { ReservedFrameCount = Count; }
+};
+
+} // End llvm namespace
+
+#endif
diff --git a/lib/Target/PIC16/PIC16Passes/PIC16Cloner.cpp b/lib/Target/PIC16/PIC16Passes/PIC16Cloner.cpp
index 865da35de3c5..c282521be324 100644
--- a/lib/Target/PIC16/PIC16Passes/PIC16Cloner.cpp
+++ b/lib/Target/PIC16/PIC16Passes/PIC16Cloner.cpp
@@ -172,7 +172,7 @@ void PIC16Cloner::CloneAutos(Function *F) {
     VarName = I->getName().str();
     if (PAN::isLocalToFunc(FnName, VarName)) {
       // Auto variable for current function found. Clone it.
-      GlobalVariable *GV = I;
+      const GlobalVariable *GV = I;
 
       const Type *InitTy = GV->getInitializer()->getType();
       GlobalVariable *ClonedGV = 
diff --git a/lib/Target/PIC16/PIC16SelectionDAGInfo.cpp b/lib/Target/PIC16/PIC16SelectionDAGInfo.cpp
new file mode 100644
index 000000000000..76c6c6091e5a
--- /dev/null
+++ b/lib/Target/PIC16/PIC16SelectionDAGInfo.cpp
@@ -0,0 +1,22 @@
+//===-- PIC16SelectionDAGInfo.cpp - PIC16 SelectionDAG Info ---------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the PIC16SelectionDAGInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "pic16-selectiondag-info"
+#include "PIC16SelectionDAGInfo.h"
+using namespace llvm;
+
+PIC16SelectionDAGInfo::PIC16SelectionDAGInfo() {
+}
+
+PIC16SelectionDAGInfo::~PIC16SelectionDAGInfo() {
+}
diff --git a/lib/Target/PIC16/PIC16SelectionDAGInfo.h b/lib/Target/PIC16/PIC16SelectionDAGInfo.h
new file mode 100644
index 000000000000..112480e50cc7
--- /dev/null
+++ b/lib/Target/PIC16/PIC16SelectionDAGInfo.h
@@ -0,0 +1,29 @@
+//===-- PIC16SelectionDAGInfo.h - PIC16 SelectionDAG Info -------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the PIC16 subclass for TargetSelectionDAGInfo.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef PIC16SELECTIONDAGINFO_H
+#define PIC16SELECTIONDAGINFO_H
+
+#include "llvm/Target/TargetSelectionDAGInfo.h"
+
+namespace llvm {
+
+class PIC16SelectionDAGInfo : public TargetSelectionDAGInfo {
+public:
+  PIC16SelectionDAGInfo();
+  ~PIC16SelectionDAGInfo();
+};
+
+}
+
+#endif
diff --git a/lib/Target/PIC16/PIC16TargetMachine.h b/lib/Target/PIC16/PIC16TargetMachine.h
index b11fdd5dba50..849845afd430 100644
--- a/lib/Target/PIC16/PIC16TargetMachine.h
+++ b/lib/Target/PIC16/PIC16TargetMachine.h
@@ -50,8 +50,8 @@ public:
     return &(InstrInfo.getRegisterInfo()); 
   }
 
-  virtual PIC16TargetLowering *getTargetLowering() const { 
-    return const_cast<PIC16TargetLowering*>(&TLInfo); 
+  virtual const PIC16TargetLowering *getTargetLowering() const { 
+    return &TLInfo;
   }
 
   virtual bool addInstSelector(PassManagerBase &PM,
diff --git a/lib/Target/PIC16/PIC16TargetObjectFile.cpp b/lib/Target/PIC16/PIC16TargetObjectFile.cpp
index b891c18c4643..ff0f971cc382 100644
--- a/lib/Target/PIC16/PIC16TargetObjectFile.cpp
+++ b/lib/Target/PIC16/PIC16TargetObjectFile.cpp
@@ -8,7 +8,6 @@
 //===----------------------------------------------------------------------===//
 
 #include "PIC16TargetObjectFile.h"
-#include "PIC16ISelLowering.h"
 #include "PIC16TargetMachine.h"
 #include "PIC16Section.h"
 #include "llvm/DerivedTypes.h"
@@ -27,7 +26,7 @@ PIC16TargetObjectFile::~PIC16TargetObjectFile() {
 
 /// Find a pic16 section. Return null if not found. Do not create one.
 PIC16Section *PIC16TargetObjectFile::
-findPIC16Section(const std::string &Name) {
+findPIC16Section(const std::string &Name) const {
   /// Return if we have an already existing one.
   PIC16Section *Entry = SectionsByName[Name];
   if (Entry)
@@ -134,7 +133,7 @@ void PIC16TargetObjectFile::Initialize(MCContext &Ctx, const TargetMachine &tm){
 const MCSection *
 PIC16TargetObjectFile::allocateUDATA(const GlobalVariable *GV) const {
   assert(GV->hasInitializer() && "This global doesn't need space");
-  Constant *C = GV->getInitializer();
+  const Constant *C = GV->getInitializer();
   assert(C->isNullValue() && "Unitialized globals has non-zero initializer");
 
   // Find how much space this global needs.
@@ -169,7 +168,7 @@ PIC16TargetObjectFile::allocateUDATA(const GlobalVariable *GV) const {
 const MCSection *
 PIC16TargetObjectFile::allocateIDATA(const GlobalVariable *GV) const{
   assert(GV->hasInitializer() && "This global doesn't need space");
-  Constant *C = GV->getInitializer();
+  const Constant *C = GV->getInitializer();
   assert(!C->isNullValue() && "initialized globals has zero initializer");
   assert(GV->getType()->getAddressSpace() == PIC16ISD::RAM_SPACE &&
          "can allocate initialized RAM data only");
diff --git a/lib/Target/PIC16/PIC16TargetObjectFile.h b/lib/Target/PIC16/PIC16TargetObjectFile.h
index cf8bf848e45e..b1eb9f9d854a 100644
--- a/lib/Target/PIC16/PIC16TargetObjectFile.h
+++ b/lib/Target/PIC16/PIC16TargetObjectFile.h
@@ -122,7 +122,7 @@ namespace llvm {
     void Initialize(MCContext &Ctx, const TargetMachine &TM);
 
     /// Return the section with the given Name. Null if not found.
-    PIC16Section *findPIC16Section(const std::string &Name);
+    PIC16Section *findPIC16Section(const std::string &Name) const;
 
     /// Override section allocations for user specified sections.
     virtual const MCSection *
diff --git a/lib/Target/PowerPC/AsmPrinter/CMakeLists.txt b/lib/Target/PowerPC/AsmPrinter/CMakeLists.txt
index 236b264d8f80..42cd4862a98f 100644
--- a/lib/Target/PowerPC/AsmPrinter/CMakeLists.txt
+++ b/lib/Target/PowerPC/AsmPrinter/CMakeLists.txt
@@ -3,4 +3,4 @@ include_directories( ${CMAKE_CURRENT_BINARY_DIR}/.. ${CMAKE_CURRENT_SOURCE_DIR}/
 add_llvm_library(LLVMPowerPCAsmPrinter
   PPCAsmPrinter.cpp
   )
-add_dependencies(LLVMPowerPCAsmPrinter PowerPCCodeGenTable_gen)
-\ No newline at end of file
+add_dependencies(LLVMPowerPCAsmPrinter PowerPCCodeGenTable_gen)
diff --git a/lib/Target/PowerPC/AsmPrinter/PPCAsmPrinter.cpp b/lib/Target/PowerPC/AsmPrinter/PPCAsmPrinter.cpp
index 605656493c4b..e35dc579f2cd 100644
--- a/lib/Target/PowerPC/AsmPrinter/PPCAsmPrinter.cpp
+++ b/lib/Target/PowerPC/AsmPrinter/PPCAsmPrinter.cpp
@@ -21,6 +21,7 @@
 #include "PPCPredicates.h"
 #include "PPCTargetMachine.h"
 #include "PPCSubtarget.h"
+#include "llvm/Analysis/DebugInfo.h"
 #include "llvm/Constants.h"
 #include "llvm/DerivedTypes.h"
 #include "llvm/Module.h"
@@ -199,8 +200,8 @@ namespace {
                           raw_ostream &O) {
       const MachineOperand &MO = MI->getOperand(OpNo);
       if (TM.getRelocationModel() != Reloc::Static) {
-        if (MO.getType() == MachineOperand::MO_GlobalAddress) {
-          GlobalValue *GV = MO.getGlobal();
+        if (MO.isGlobal()) {
+          const GlobalValue *GV = MO.getGlobal();
           if (GV->isDeclaration() || GV->isWeakForLinker()) {
             // Dynamically-resolved functions need a stub for the function.
             MCSymbol *Sym = GetSymbolWithGlobalValueBase(GV, "$stub");
@@ -213,7 +214,7 @@ namespace {
             return;
           }
         }
-        if (MO.getType() == MachineOperand::MO_ExternalSymbol) {
+        if (MO.isSymbol()) {
           SmallString<128> TempNameStr;
           TempNameStr += StringRef(MO.getSymbolName());
           TempNameStr += StringRef("$stub");
@@ -311,7 +312,7 @@ namespace {
     void printTOCEntryLabel(const MachineInstr *MI, unsigned OpNo,
                             raw_ostream &O) {
       const MachineOperand &MO = MI->getOperand(OpNo);
-      assert(MO.getType() == MachineOperand::MO_GlobalAddress);
+      assert(MO.isGlobal());
       MCSymbol *Sym = Mang->getSymbol(MO.getGlobal());
 
       // Map symbol -> label of TOC entry.
@@ -405,7 +406,7 @@ void PPCAsmPrinter::printOp(const MachineOperand &MO, raw_ostream &O) {
   }
   case MachineOperand::MO_GlobalAddress: {
     // Computing the address of a global symbol, not calling it.
-    GlobalValue *GV = MO.getGlobal();
+    const GlobalValue *GV = MO.getGlobal();
     MCSymbol *SymToPrint;
 
     // External or weakly linked global variables need non-lazily-resolved stubs
@@ -535,6 +536,23 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) {
   SmallString<128> Str;
   raw_svector_ostream O(Str);
 
+  if (MI->getOpcode() == TargetOpcode::DBG_VALUE) {
+    unsigned NOps = MI->getNumOperands();
+    assert(NOps==4);
+    O << '\t' << MAI->getCommentString() << "DEBUG_VALUE: ";
+    // cast away const; DIetc do not take const operands for some reason.
+    DIVariable V(const_cast<MDNode *>(MI->getOperand(NOps-1).getMetadata()));
+    O << V.getName();
+    O << " <- ";
+    // Frame address.  Currently handles register +- offset only.
+    assert(MI->getOperand(0).isReg() && MI->getOperand(1).isImm());
+    O << '['; printOperand(MI, 0, O); O << '+'; printOperand(MI, 1, O);
+    O << ']';
+    O << "+";
+    printOperand(MI, NOps-2, O);
+    OutStreamer.EmitRawText(O.str());
+    return;
+  }
   // Check for slwi/srwi mnemonics.
   if (MI->getOpcode() == PPC::RLWINM) {
     unsigned char SH = MI->getOperand(2).getImm();
@@ -649,18 +667,18 @@ void PPCDarwinAsmPrinter::EmitStartOfAsmFile(Module &M) {
 
   // Prime text sections so they are adjacent.  This reduces the likelihood a
   // large data or debug section causes a branch to exceed 16M limit.
-  TargetLoweringObjectFileMachO &TLOFMacho = 
-    static_cast<TargetLoweringObjectFileMachO &>(getObjFileLowering());
+  const TargetLoweringObjectFileMachO &TLOFMacho = 
+    static_cast<const TargetLoweringObjectFileMachO &>(getObjFileLowering());
   OutStreamer.SwitchSection(TLOFMacho.getTextCoalSection());
   if (TM.getRelocationModel() == Reloc::PIC_) {
     OutStreamer.SwitchSection(
-            TLOFMacho.getMachOSection("__TEXT", "__picsymbolstub1",
+           OutContext.getMachOSection("__TEXT", "__picsymbolstub1",
                                       MCSectionMachO::S_SYMBOL_STUBS |
                                       MCSectionMachO::S_ATTR_PURE_INSTRUCTIONS,
                                       32, SectionKind::getText()));
   } else if (TM.getRelocationModel() == Reloc::DynamicNoPIC) {
     OutStreamer.SwitchSection(
-            TLOFMacho.getMachOSection("__TEXT","__symbol_stub1",
+           OutContext.getMachOSection("__TEXT","__symbol_stub1",
                                       MCSectionMachO::S_SYMBOL_STUBS |
                                       MCSectionMachO::S_ATTR_PURE_INSTRUCTIONS,
                                       16, SectionKind::getText()));
@@ -686,8 +704,8 @@ void PPCDarwinAsmPrinter::
 EmitFunctionStubs(const MachineModuleInfoMachO::SymbolListTy &Stubs) {
   bool isPPC64 = TM.getTargetData()->getPointerSizeInBits() == 64;
   
-  TargetLoweringObjectFileMachO &TLOFMacho = 
-    static_cast<TargetLoweringObjectFileMachO &>(getObjFileLowering());
+  const TargetLoweringObjectFileMachO &TLOFMacho = 
+    static_cast<const TargetLoweringObjectFileMachO &>(getObjFileLowering());
 
   // .lazy_symbol_pointer
   const MCSection *LSPSection = TLOFMacho.getLazySymbolPointerSection();
@@ -695,10 +713,10 @@ EmitFunctionStubs(const MachineModuleInfoMachO::SymbolListTy &Stubs) {
   // Output stubs for dynamically-linked functions
   if (TM.getRelocationModel() == Reloc::PIC_) {
     const MCSection *StubSection = 
-    TLOFMacho.getMachOSection("__TEXT", "__picsymbolstub1",
-                              MCSectionMachO::S_SYMBOL_STUBS |
-                              MCSectionMachO::S_ATTR_PURE_INSTRUCTIONS,
-                              32, SectionKind::getText());
+    OutContext.getMachOSection("__TEXT", "__picsymbolstub1",
+                               MCSectionMachO::S_SYMBOL_STUBS |
+                               MCSectionMachO::S_ATTR_PURE_INSTRUCTIONS,
+                               32, SectionKind::getText());
     for (unsigned i = 0, e = Stubs.size(); i != e; ++i) {
       OutStreamer.SwitchSection(StubSection);
       EmitAlignment(4);
@@ -742,10 +760,10 @@ EmitFunctionStubs(const MachineModuleInfoMachO::SymbolListTy &Stubs) {
   }
   
   const MCSection *StubSection =
-    TLOFMacho.getMachOSection("__TEXT","__symbol_stub1",
-                              MCSectionMachO::S_SYMBOL_STUBS |
-                              MCSectionMachO::S_ATTR_PURE_INSTRUCTIONS,
-                              16, SectionKind::getText());
+    OutContext.getMachOSection("__TEXT","__symbol_stub1",
+                               MCSectionMachO::S_SYMBOL_STUBS |
+                               MCSectionMachO::S_ATTR_PURE_INSTRUCTIONS,
+                               16, SectionKind::getText());
   for (unsigned i = 0, e = Stubs.size(); i != e; ++i) {
     MCSymbol *Stub = Stubs[i].first;
     MCSymbol *RawSym = Stubs[i].second.getPointer();
@@ -782,8 +800,8 @@ bool PPCDarwinAsmPrinter::doFinalization(Module &M) {
   bool isPPC64 = TM.getTargetData()->getPointerSizeInBits() == 64;
 
   // Darwin/PPC always uses mach-o.
-  TargetLoweringObjectFileMachO &TLOFMacho = 
-    static_cast<TargetLoweringObjectFileMachO &>(getObjFileLowering());
+  const TargetLoweringObjectFileMachO &TLOFMacho = 
+    static_cast<const TargetLoweringObjectFileMachO &>(getObjFileLowering());
   MachineModuleInfoMachO &MMIMacho =
     MMI->getObjFileInfo<MachineModuleInfoMachO>();
   
@@ -794,8 +812,8 @@ bool PPCDarwinAsmPrinter::doFinalization(Module &M) {
   if (MAI->doesSupportExceptionHandling() && MMI) {
     // Add the (possibly multiple) personalities to the set of global values.
     // Only referenced functions get into the Personalities list.
-    const std::vector<Function *> &Personalities = MMI->getPersonalities();
-    for (std::vector<Function *>::const_iterator I = Personalities.begin(),
+    const std::vector<const Function*> &Personalities = MMI->getPersonalities();
+    for (std::vector<const Function*>::const_iterator I = Personalities.begin(),
          E = Personalities.end(); I != E; ++I) {
       if (*I) {
         MCSymbol *NLPSym = GetSymbolWithGlobalValueBase(*I, "$non_lazy_ptr");
diff --git a/lib/Target/PowerPC/CMakeLists.txt b/lib/Target/PowerPC/CMakeLists.txt
index c997c5cfc7a0..7ffc5eb5f311 100644
--- a/lib/Target/PowerPC/CMakeLists.txt
+++ b/lib/Target/PowerPC/CMakeLists.txt
@@ -24,6 +24,7 @@ add_llvm_target(PowerPCCodeGen
   PPCRegisterInfo.cpp
   PPCSubtarget.cpp
   PPCTargetMachine.cpp
+  PPCSelectionDAGInfo.cpp
   )
 
 target_link_libraries (LLVMPowerPCCodeGen LLVMSelectionDAG)
diff --git a/lib/Target/PowerPC/PPCCodeEmitter.cpp b/lib/Target/PowerPC/PPCCodeEmitter.cpp
index f7c27d40a5b3..361fa70fb4c4 100644
--- a/lib/Target/PowerPC/PPCCodeEmitter.cpp
+++ b/lib/Target/PowerPC/PPCCodeEmitter.cpp
@@ -202,7 +202,7 @@ unsigned PPCCodeEmitter::getMachineOpValue(const MachineInstr &MI,
     MachineRelocation R;
     if (MO.isGlobal()) {
       R = MachineRelocation::getGV(MCE.getCurrentPCOffset(), Reloc,
-                                   MO.getGlobal(), 0,
+                                   const_cast<GlobalValue *>(MO.getGlobal()), 0,
                                    isa<Function>(MO.getGlobal()));
     } else if (MO.isSymbol()) {
       R = MachineRelocation::getExtSym(MCE.getCurrentPCOffset(),
diff --git a/lib/Target/PowerPC/PPCISelDAGToDAG.cpp b/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
index 1e323849d1ef..3d9f8aa6ee5c 100644
--- a/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
+++ b/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
@@ -16,7 +16,6 @@
 #include "PPC.h"
 #include "PPCPredicates.h"
 #include "PPCTargetMachine.h"
-#include "PPCISelLowering.h"
 #include "PPCHazardRecognizers.h"
 #include "llvm/CodeGen/MachineInstrBuilder.h"
 #include "llvm/CodeGen/MachineFunction.h"
@@ -41,8 +40,8 @@ namespace {
   /// instructions for SelectionDAG operations.
   ///
   class PPCDAGToDAGISel : public SelectionDAGISel {
-    PPCTargetMachine &TM;
-    PPCTargetLowering &PPCLowering;
+    const PPCTargetMachine &TM;
+    const PPCTargetLowering &PPCLowering;
     const PPCSubtarget &PPCSubTarget;
     unsigned GlobalBaseReg;
   public:
diff --git a/lib/Target/PowerPC/PPCISelLowering.cpp b/lib/Target/PowerPC/PPCISelLowering.cpp
index 9cd01be54e42..6f1195393bd0 100644
--- a/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -397,7 +397,7 @@ PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM)
 /// getByValTypeAlignment - Return the desired alignment for ByVal aggregate
 /// function arguments in the caller parameter area.
 unsigned PPCTargetLowering::getByValTypeAlignment(const Type *Ty) const {
-  TargetMachine &TM = getTargetMachine();
+  const TargetMachine &TM = getTargetMachine();
   // Darwin passes everything on 4 byte boundary.
   if (TM.getSubtarget<PPCSubtarget>().isDarwin())
     return 4;
@@ -476,7 +476,7 @@ static bool isFloatingPointZero(SDValue Op) {
   else if (ISD::isEXTLoad(Op.getNode()) || ISD::isNON_EXTLoad(Op.getNode())) {
     // Maybe this has already been legalized into the constant pool?
     if (ConstantPoolSDNode *CP = dyn_cast<ConstantPoolSDNode>(Op.getOperand(1)))
-      if (ConstantFP *CFP = dyn_cast<ConstantFP>(CP->getConstVal()))
+      if (const ConstantFP *CFP = dyn_cast<ConstantFP>(CP->getConstVal()))
         return CFP->getValueAPF().isZero();
   }
   return false;
@@ -1095,10 +1095,10 @@ bool PPCTargetLowering::getPreIndexedAddressParts(SDNode *N, SDValue &Base,
 //===----------------------------------------------------------------------===//
 
 SDValue PPCTargetLowering::LowerConstantPool(SDValue Op,
-                                             SelectionDAG &DAG) {
+                                             SelectionDAG &DAG) const {
   EVT PtrVT = Op.getValueType();
   ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(Op);
-  Constant *C = CP->getConstVal();
+  const Constant *C = CP->getConstVal();
   SDValue CPI = DAG.getTargetConstantPool(C, PtrVT, CP->getAlignment());
   SDValue Zero = DAG.getConstant(0, PtrVT);
   // FIXME there isn't really any debug info here
@@ -1129,7 +1129,7 @@ SDValue PPCTargetLowering::LowerConstantPool(SDValue Op,
   return Lo;
 }
 
-SDValue PPCTargetLowering::LowerJumpTable(SDValue Op, SelectionDAG &DAG) {
+SDValue PPCTargetLowering::LowerJumpTable(SDValue Op, SelectionDAG &DAG) const {
   EVT PtrVT = Op.getValueType();
   JumpTableSDNode *JT = cast<JumpTableSDNode>(Op);
   SDValue JTI = DAG.getTargetJumpTable(JT->getIndex(), PtrVT);
@@ -1163,16 +1163,17 @@ SDValue PPCTargetLowering::LowerJumpTable(SDValue Op, SelectionDAG &DAG) {
 }
 
 SDValue PPCTargetLowering::LowerGlobalTLSAddress(SDValue Op,
-                                                   SelectionDAG &DAG) {
+                                                 SelectionDAG &DAG) const {
   llvm_unreachable("TLS not implemented for PPC.");
   return SDValue(); // Not reached
 }
 
-SDValue PPCTargetLowering::LowerBlockAddress(SDValue Op, SelectionDAG &DAG) {
+SDValue PPCTargetLowering::LowerBlockAddress(SDValue Op,
+                                             SelectionDAG &DAG) const {
   EVT PtrVT = Op.getValueType();
   DebugLoc DL = Op.getDebugLoc();
 
-  BlockAddress *BA = cast<BlockAddressSDNode>(Op)->getBlockAddress();
+  const BlockAddress *BA = cast<BlockAddressSDNode>(Op)->getBlockAddress();
   SDValue TgtBA = DAG.getBlockAddress(BA, PtrVT, /*isTarget=*/true);
   SDValue Zero = DAG.getConstant(0, PtrVT);
   SDValue Hi = DAG.getNode(PPCISD::Hi, DL, PtrVT, TgtBA, Zero);
@@ -1199,10 +1200,10 @@ SDValue PPCTargetLowering::LowerBlockAddress(SDValue Op, SelectionDAG &DAG) {
 }
 
 SDValue PPCTargetLowering::LowerGlobalAddress(SDValue Op,
-                                              SelectionDAG &DAG) {
+                                              SelectionDAG &DAG) const {
   EVT PtrVT = Op.getValueType();
   GlobalAddressSDNode *GSDN = cast<GlobalAddressSDNode>(Op);
-  GlobalValue *GV = GSDN->getGlobal();
+  const GlobalValue *GV = GSDN->getGlobal();
   SDValue GA = DAG.getTargetGlobalAddress(GV, PtrVT, GSDN->getOffset());
   SDValue Zero = DAG.getConstant(0, PtrVT);
   // FIXME there isn't really any debug info here
@@ -1247,7 +1248,7 @@ SDValue PPCTargetLowering::LowerGlobalAddress(SDValue Op,
                      false, false, 0);
 }
 
-SDValue PPCTargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) {
+SDValue PPCTargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const {
   ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(2))->get();
   DebugLoc dl = Op.getDebugLoc();
 
@@ -1291,17 +1292,14 @@ SDValue PPCTargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) {
 }
 
 SDValue PPCTargetLowering::LowerVAARG(SDValue Op, SelectionDAG &DAG,
-                              int VarArgsFrameIndex,
-                              int VarArgsStackOffset,
-                              unsigned VarArgsNumGPR,
-                              unsigned VarArgsNumFPR,
-                              const PPCSubtarget &Subtarget) {
+                                      const PPCSubtarget &Subtarget) const {
 
   llvm_unreachable("VAARG not yet implemented for the SVR4 ABI!");
   return SDValue(); // Not reached
 }
 
-SDValue PPCTargetLowering::LowerTRAMPOLINE(SDValue Op, SelectionDAG &DAG) {
+SDValue PPCTargetLowering::LowerTRAMPOLINE(SDValue Op,
+                                           SelectionDAG &DAG) const {
   SDValue Chain = Op.getOperand(0);
   SDValue Trmp = Op.getOperand(1); // trampoline
   SDValue FPtr = Op.getOperand(2); // nested function
@@ -1343,18 +1341,17 @@ SDValue PPCTargetLowering::LowerTRAMPOLINE(SDValue Op, SelectionDAG &DAG) {
 }
 
 SDValue PPCTargetLowering::LowerVASTART(SDValue Op, SelectionDAG &DAG,
-                                        int VarArgsFrameIndex,
-                                        int VarArgsStackOffset,
-                                        unsigned VarArgsNumGPR,
-                                        unsigned VarArgsNumFPR,
-                                        const PPCSubtarget &Subtarget) {
+                                        const PPCSubtarget &Subtarget) const {
+  MachineFunction &MF = DAG.getMachineFunction();
+  PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
+
   DebugLoc dl = Op.getDebugLoc();
 
   if (Subtarget.isDarwinABI() || Subtarget.isPPC64()) {
     // vastart just stores the address of the VarArgsFrameIndex slot into the
     // memory location argument.
     EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
-    SDValue FR = DAG.getFrameIndex(VarArgsFrameIndex, PtrVT);
+    SDValue FR = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT);
     const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
     return DAG.getStore(Op.getOperand(0), dl, FR, Op.getOperand(1), SV, 0,
                         false, false, 0);
@@ -1385,14 +1382,16 @@ SDValue PPCTargetLowering::LowerVASTART(SDValue Op, SelectionDAG &DAG,
   // } va_list[1];
 
 
-  SDValue ArgGPR = DAG.getConstant(VarArgsNumGPR, MVT::i32);
-  SDValue ArgFPR = DAG.getConstant(VarArgsNumFPR, MVT::i32);
+  SDValue ArgGPR = DAG.getConstant(FuncInfo->getVarArgsNumGPR(), MVT::i32);
+  SDValue ArgFPR = DAG.getConstant(FuncInfo->getVarArgsNumFPR(), MVT::i32);
 
 
   EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
 
-  SDValue StackOffsetFI = DAG.getFrameIndex(VarArgsStackOffset, PtrVT);
-  SDValue FR = DAG.getFrameIndex(VarArgsFrameIndex, PtrVT);
+  SDValue StackOffsetFI = DAG.getFrameIndex(FuncInfo->getVarArgsStackOffset(),
+                                            PtrVT);
+  SDValue FR = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(),
+                                 PtrVT);
 
   uint64_t FrameOffset = PtrVT.getSizeInBits()/8;
   SDValue ConstFrameOffset = DAG.getConstant(FrameOffset, PtrVT);
@@ -1525,7 +1524,8 @@ PPCTargetLowering::LowerFormalArguments(SDValue Chain,
                                         const SmallVectorImpl<ISD::InputArg>
                                           &Ins,
                                         DebugLoc dl, SelectionDAG &DAG,
-                                        SmallVectorImpl<SDValue> &InVals) {
+                                        SmallVectorImpl<SDValue> &InVals)
+                                          const {
   if (PPCSubTarget.isSVR4ABI() && !PPCSubTarget.isPPC64()) {
     return LowerFormalArguments_SVR4(Chain, CallConv, isVarArg, Ins,
                                      dl, DAG, InVals);
@@ -1542,7 +1542,7 @@ PPCTargetLowering::LowerFormalArguments_SVR4(
                                       const SmallVectorImpl<ISD::InputArg>
                                         &Ins,
                                       DebugLoc dl, SelectionDAG &DAG,
-                                      SmallVectorImpl<SDValue> &InVals) {
+                                      SmallVectorImpl<SDValue> &InVals) const {
 
   // 32-bit SVR4 ABI Stack Frame Layout:
   //              +-----------------------------------+
@@ -1575,6 +1575,7 @@ PPCTargetLowering::LowerFormalArguments_SVR4(
   
   MachineFunction &MF = DAG.getMachineFunction();
   MachineFrameInfo *MFI = MF.getFrameInfo();
+  PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
 
   EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
   // Potential tail calls could cause overwriting of argument stack slots.
@@ -1688,24 +1689,27 @@ PPCTargetLowering::LowerFormalArguments_SVR4(
     };
     const unsigned NumFPArgRegs = array_lengthof(FPArgRegs);
 
-    VarArgsNumGPR = CCInfo.getFirstUnallocated(GPArgRegs, NumGPArgRegs);
-    VarArgsNumFPR = CCInfo.getFirstUnallocated(FPArgRegs, NumFPArgRegs);
+    FuncInfo->setVarArgsNumGPR(CCInfo.getFirstUnallocated(GPArgRegs,
+                                                          NumGPArgRegs));
+    FuncInfo->setVarArgsNumFPR(CCInfo.getFirstUnallocated(FPArgRegs,
+                                                          NumFPArgRegs));
 
     // Make room for NumGPArgRegs and NumFPArgRegs.
     int Depth = NumGPArgRegs * PtrVT.getSizeInBits()/8 +
                 NumFPArgRegs * EVT(MVT::f64).getSizeInBits()/8;
 
-    VarArgsStackOffset = MFI->CreateFixedObject(PtrVT.getSizeInBits()/8,
-                                                CCInfo.getNextStackOffset(),
-                                                true, false);
+    FuncInfo->setVarArgsStackOffset(
+      MFI->CreateFixedObject(PtrVT.getSizeInBits()/8,
+                             CCInfo.getNextStackOffset(),
+                             true, false));
 
-    VarArgsFrameIndex = MFI->CreateStackObject(Depth, 8, false);
-    SDValue FIN = DAG.getFrameIndex(VarArgsFrameIndex, PtrVT);
+    FuncInfo->setVarArgsFrameIndex(MFI->CreateStackObject(Depth, 8, false));
+    SDValue FIN = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT);
 
     // The fixed integer arguments of a variadic function are
     // stored to the VarArgsFrameIndex on the stack.
     unsigned GPRIndex = 0;
-    for (; GPRIndex != VarArgsNumGPR; ++GPRIndex) {
+    for (; GPRIndex != FuncInfo->getVarArgsNumGPR(); ++GPRIndex) {
       SDValue Val = DAG.getRegister(GPArgRegs[GPRIndex], PtrVT);
       SDValue Store = DAG.getStore(Chain, dl, Val, FIN, NULL, 0,
                                    false, false, 0);
@@ -1736,7 +1740,7 @@ PPCTargetLowering::LowerFormalArguments_SVR4(
     // The double arguments are stored to the VarArgsFrameIndex
     // on the stack.
     unsigned FPRIndex = 0;
-    for (FPRIndex = 0; FPRIndex != VarArgsNumFPR; ++FPRIndex) {
+    for (FPRIndex = 0; FPRIndex != FuncInfo->getVarArgsNumFPR(); ++FPRIndex) {
       SDValue Val = DAG.getRegister(FPArgRegs[FPRIndex], MVT::f64);
       SDValue Store = DAG.getStore(Chain, dl, Val, FIN, NULL, 0,
                                    false, false, 0);
@@ -1775,11 +1779,12 @@ PPCTargetLowering::LowerFormalArguments_Darwin(
                                       const SmallVectorImpl<ISD::InputArg>
                                         &Ins,
                                       DebugLoc dl, SelectionDAG &DAG,
-                                      SmallVectorImpl<SDValue> &InVals) {
+                                      SmallVectorImpl<SDValue> &InVals) const {
   // TODO: add description of PPC stack frame format, or at least some docs.
   //
   MachineFunction &MF = DAG.getMachineFunction();
   MachineFrameInfo *MFI = MF.getFrameInfo();
+  PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
 
   EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
   bool isPPC64 = PtrVT == MVT::i64;
@@ -2090,9 +2095,10 @@ PPCTargetLowering::LowerFormalArguments_Darwin(
   if (isVarArg) {
     int Depth = ArgOffset;
 
-    VarArgsFrameIndex = MFI->CreateFixedObject(PtrVT.getSizeInBits()/8,
-                                               Depth, true, false);
-    SDValue FIN = DAG.getFrameIndex(VarArgsFrameIndex, PtrVT);
+    FuncInfo->setVarArgsFrameIndex(
+      MFI->CreateFixedObject(PtrVT.getSizeInBits()/8,
+                             Depth, true, false));
+    SDValue FIN = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT);
 
     // If this function is vararg, store any remaining integer argument regs
     // to their spots on the stack so that they may be loaded by deferencing the
@@ -2359,7 +2365,7 @@ SDValue PPCTargetLowering::EmitTailCallLoadFPAndRetAddr(SelectionDAG & DAG,
                                                         SDValue &LROpOut,
                                                         SDValue &FPOpOut,
                                                         bool isDarwinABI,
-                                                        DebugLoc dl) {
+                                                        DebugLoc dl) const {
   if (SPDiff) {
     // Load the LR and FP stack slot for later adjusting.
     EVT VT = PPCSubTarget.isPPC64() ? MVT::i64 : MVT::i32;
@@ -2582,7 +2588,7 @@ PPCTargetLowering::LowerCallResult(SDValue Chain, SDValue InFlag,
                                    CallingConv::ID CallConv, bool isVarArg,
                                    const SmallVectorImpl<ISD::InputArg> &Ins,
                                    DebugLoc dl, SelectionDAG &DAG,
-                                   SmallVectorImpl<SDValue> &InVals) {
+                                   SmallVectorImpl<SDValue> &InVals) const {
 
   SmallVector<CCValAssign, 16> RVLocs;
   CCState CCRetInfo(CallConv, isVarArg, getTargetMachine(),
@@ -2613,7 +2619,7 @@ PPCTargetLowering::FinishCall(CallingConv::ID CallConv, DebugLoc dl,
                               SDValue &Callee,
                               int SPDiff, unsigned NumBytes,
                               const SmallVectorImpl<ISD::InputArg> &Ins,
-                              SmallVectorImpl<SDValue> &InVals) {
+                              SmallVectorImpl<SDValue> &InVals) const {
   std::vector<EVT> NodeTys;
   SmallVector<SDValue, 8> Ops;
   unsigned CallOpc = PrepareCall(DAG, Callee, InFlag, Chain, dl, SPDiff,
@@ -2701,7 +2707,7 @@ PPCTargetLowering::LowerCall(SDValue Chain, SDValue Callee,
                              const SmallVectorImpl<ISD::OutputArg> &Outs,
                              const SmallVectorImpl<ISD::InputArg> &Ins,
                              DebugLoc dl, SelectionDAG &DAG,
-                             SmallVectorImpl<SDValue> &InVals) {
+                             SmallVectorImpl<SDValue> &InVals) const {
   if (isTailCall)
     isTailCall = IsEligibleForTailCallOptimization(Callee, CallConv, isVarArg,
                                                    Ins, DAG);
@@ -2724,7 +2730,7 @@ PPCTargetLowering::LowerCall_SVR4(SDValue Chain, SDValue Callee,
                                   const SmallVectorImpl<ISD::OutputArg> &Outs,
                                   const SmallVectorImpl<ISD::InputArg> &Ins,
                                   DebugLoc dl, SelectionDAG &DAG,
-                                  SmallVectorImpl<SDValue> &InVals) {
+                                  SmallVectorImpl<SDValue> &InVals) const {
   // See PPCTargetLowering::LowerFormalArguments_SVR4() for a description
   // of the 32-bit SVR4 ABI stack frame layout.
 
@@ -2930,7 +2936,7 @@ PPCTargetLowering::LowerCall_Darwin(SDValue Chain, SDValue Callee,
                                     const SmallVectorImpl<ISD::OutputArg> &Outs,
                                     const SmallVectorImpl<ISD::InputArg> &Ins,
                                     DebugLoc dl, SelectionDAG &DAG,
-                                    SmallVectorImpl<SDValue> &InVals) {
+                                    SmallVectorImpl<SDValue> &InVals) const {
 
   unsigned NumOps  = Outs.size();
 
@@ -3291,7 +3297,7 @@ SDValue
 PPCTargetLowering::LowerReturn(SDValue Chain,
                                CallingConv::ID CallConv, bool isVarArg,
                                const SmallVectorImpl<ISD::OutputArg> &Outs,
-                               DebugLoc dl, SelectionDAG &DAG) {
+                               DebugLoc dl, SelectionDAG &DAG) const {
 
   SmallVector<CCValAssign, 16> RVLocs;
   CCState CCInfo(CallConv, isVarArg, getTargetMachine(),
@@ -3323,7 +3329,7 @@ PPCTargetLowering::LowerReturn(SDValue Chain,
 }
 
 SDValue PPCTargetLowering::LowerSTACKRESTORE(SDValue Op, SelectionDAG &DAG,
-                                   const PPCSubtarget &Subtarget) {
+                                   const PPCSubtarget &Subtarget) const {
   // When we pop the dynamic allocation we need to restore the SP link.
   DebugLoc dl = Op.getDebugLoc();
 
@@ -3407,7 +3413,7 @@ PPCTargetLowering::getFramePointerFrameIndex(SelectionDAG & DAG) const {
 
 SDValue PPCTargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op,
                                          SelectionDAG &DAG,
-                                         const PPCSubtarget &Subtarget) {
+                                         const PPCSubtarget &Subtarget) const {
   // Get the inputs.
   SDValue Chain = Op.getOperand(0);
   SDValue Size  = Op.getOperand(1);
@@ -3428,7 +3434,7 @@ SDValue PPCTargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op,
 
 /// LowerSELECT_CC - Lower floating point select_cc's into fsel instruction when
 /// possible.
-SDValue PPCTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) {
+SDValue PPCTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const {
   // Not FP? Not a fsel.
   if (!Op.getOperand(0).getValueType().isFloatingPoint() ||
       !Op.getOperand(2).getValueType().isFloatingPoint())
@@ -3502,7 +3508,7 @@ SDValue PPCTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) {
 
 // FIXME: Split this code up when LegalizeDAGTypes lands.
 SDValue PPCTargetLowering::LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG,
-                                           DebugLoc dl) {
+                                           DebugLoc dl) const {
   assert(Op.getOperand(0).getValueType().isFloatingPoint());
   SDValue Src = Op.getOperand(0);
   if (Src.getValueType() == MVT::f32)
@@ -3537,7 +3543,8 @@ SDValue PPCTargetLowering::LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG,
                      false, false, 0);
 }
 
-SDValue PPCTargetLowering::LowerSINT_TO_FP(SDValue Op, SelectionDAG &DAG) {
+SDValue PPCTargetLowering::LowerSINT_TO_FP(SDValue Op,
+                                           SelectionDAG &DAG) const {
   DebugLoc dl = Op.getDebugLoc();
   // Don't handle ppc_fp128 here; let it be lowered to a libcall.
   if (Op.getValueType() != MVT::f32 && Op.getValueType() != MVT::f64)
@@ -3586,7 +3593,8 @@ SDValue PPCTargetLowering::LowerSINT_TO_FP(SDValue Op, SelectionDAG &DAG) {
   return FP;
 }
 
-SDValue PPCTargetLowering::LowerFLT_ROUNDS_(SDValue Op, SelectionDAG &DAG) {
+SDValue PPCTargetLowering::LowerFLT_ROUNDS_(SDValue Op,
+                                            SelectionDAG &DAG) const {
   DebugLoc dl = Op.getDebugLoc();
   /*
    The rounding mode is in bits 30:31 of FPSR, and has the following
@@ -3649,7 +3657,7 @@ SDValue PPCTargetLowering::LowerFLT_ROUNDS_(SDValue Op, SelectionDAG &DAG) {
                       ISD::TRUNCATE : ISD::ZERO_EXTEND), dl, VT, RetVal);
 }
 
-SDValue PPCTargetLowering::LowerSHL_PARTS(SDValue Op, SelectionDAG &DAG) {
+SDValue PPCTargetLowering::LowerSHL_PARTS(SDValue Op, SelectionDAG &DAG) const {
   EVT VT = Op.getValueType();
   unsigned BitWidth = VT.getSizeInBits();
   DebugLoc dl = Op.getDebugLoc();
@@ -3678,7 +3686,7 @@ SDValue PPCTargetLowering::LowerSHL_PARTS(SDValue Op, SelectionDAG &DAG) {
   return DAG.getMergeValues(OutOps, 2, dl);
 }
 
-SDValue PPCTargetLowering::LowerSRL_PARTS(SDValue Op, SelectionDAG &DAG) {
+SDValue PPCTargetLowering::LowerSRL_PARTS(SDValue Op, SelectionDAG &DAG) const {
   EVT VT = Op.getValueType();
   DebugLoc dl = Op.getDebugLoc();
   unsigned BitWidth = VT.getSizeInBits();
@@ -3707,7 +3715,7 @@ SDValue PPCTargetLowering::LowerSRL_PARTS(SDValue Op, SelectionDAG &DAG) {
   return DAG.getMergeValues(OutOps, 2, dl);
 }
 
-SDValue PPCTargetLowering::LowerSRA_PARTS(SDValue Op, SelectionDAG &DAG) {
+SDValue PPCTargetLowering::LowerSRA_PARTS(SDValue Op, SelectionDAG &DAG) const {
   DebugLoc dl = Op.getDebugLoc();
   EVT VT = Op.getValueType();
   unsigned BitWidth = VT.getSizeInBits();
@@ -3808,7 +3816,8 @@ static SDValue BuildVSLDOI(SDValue LHS, SDValue RHS, unsigned Amt,
 // selects to a single instruction, return Op.  Otherwise, if we can codegen
 // this case more efficiently than a constant pool load, lower it to the
 // sequence of ops that should be used.
-SDValue PPCTargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) {
+SDValue PPCTargetLowering::LowerBUILD_VECTOR(SDValue Op,
+                                             SelectionDAG &DAG) const {
   DebugLoc dl = Op.getDebugLoc();
   BuildVectorSDNode *BVN = dyn_cast<BuildVectorSDNode>(Op.getNode());
   assert(BVN != 0 && "Expected a BuildVectorSDNode in LowerBUILD_VECTOR");
@@ -4050,7 +4059,7 @@ static SDValue GeneratePerfectShuffle(unsigned PFEntry, SDValue LHS,
 /// return the code it can be lowered into.  Worst case, it can always be
 /// lowered into a vperm.
 SDValue PPCTargetLowering::LowerVECTOR_SHUFFLE(SDValue Op,
-                                               SelectionDAG &DAG) {
+                                               SelectionDAG &DAG) const {
   DebugLoc dl = Op.getDebugLoc();
   SDValue V1 = Op.getOperand(0);
   SDValue V2 = Op.getOperand(1);
@@ -4216,7 +4225,7 @@ static bool getAltivecCompareInfo(SDValue Intrin, int &CompareOpc,
 /// LowerINTRINSIC_WO_CHAIN - If this is an intrinsic that we want to custom
 /// lower, do it, otherwise return null.
 SDValue PPCTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
-                                                   SelectionDAG &DAG) {
+                                                   SelectionDAG &DAG) const {
   // If this is a lowered altivec predicate compare, CompareOpc is set to the
   // opcode number of the comparison.
   DebugLoc dl = Op.getDebugLoc();
@@ -4284,12 +4293,12 @@ SDValue PPCTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
 }
 
 SDValue PPCTargetLowering::LowerSCALAR_TO_VECTOR(SDValue Op,
-                                                   SelectionDAG &DAG) {
+                                                   SelectionDAG &DAG) const {
   DebugLoc dl = Op.getDebugLoc();
   // Create a stack slot that is 16-byte aligned.
   MachineFrameInfo *FrameInfo = DAG.getMachineFunction().getFrameInfo();
   int FrameIdx = FrameInfo->CreateStackObject(16, 16, false);
-  EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
+  EVT PtrVT = getPointerTy();
   SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT);
 
   // Store the input value into Value#0 of the stack slot.
@@ -4301,7 +4310,7 @@ SDValue PPCTargetLowering::LowerSCALAR_TO_VECTOR(SDValue Op,
                      false, false, 0);
 }
 
-SDValue PPCTargetLowering::LowerMUL(SDValue Op, SelectionDAG &DAG) {
+SDValue PPCTargetLowering::LowerMUL(SDValue Op, SelectionDAG &DAG) const {
   DebugLoc dl = Op.getDebugLoc();
   if (Op.getValueType() == MVT::v4i32) {
     SDValue LHS = Op.getOperand(0), RHS = Op.getOperand(1);
@@ -4362,7 +4371,7 @@ SDValue PPCTargetLowering::LowerMUL(SDValue Op, SelectionDAG &DAG) {
 
 /// LowerOperation - Provide custom lowering hooks for some operations.
 ///
-SDValue PPCTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) {
+SDValue PPCTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
   switch (Op.getOpcode()) {
   default: llvm_unreachable("Wasn't expecting to be able to lower this!");
   case ISD::ConstantPool:       return LowerConstantPool(Op, DAG);
@@ -4373,12 +4382,10 @@ SDValue PPCTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) {
   case ISD::SETCC:              return LowerSETCC(Op, DAG);
   case ISD::TRAMPOLINE:         return LowerTRAMPOLINE(Op, DAG);
   case ISD::VASTART:
-    return LowerVASTART(Op, DAG, VarArgsFrameIndex, VarArgsStackOffset,
-                        VarArgsNumGPR, VarArgsNumFPR, PPCSubTarget);
+    return LowerVASTART(Op, DAG, PPCSubTarget);
 
   case ISD::VAARG:
-    return LowerVAARG(Op, DAG, VarArgsFrameIndex, VarArgsStackOffset,
-                      VarArgsNumGPR, VarArgsNumFPR, PPCSubTarget);
+    return LowerVAARG(Op, DAG, PPCSubTarget);
 
   case ISD::STACKRESTORE:       return LowerSTACKRESTORE(Op, DAG, PPCSubTarget);
   case ISD::DYNAMIC_STACKALLOC:
@@ -4412,7 +4419,7 @@ SDValue PPCTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) {
 
 void PPCTargetLowering::ReplaceNodeResults(SDNode *N,
                                            SmallVectorImpl<SDValue>&Results,
-                                           SelectionDAG &DAG) {
+                                           SelectionDAG &DAG) const {
   DebugLoc dl = N->getDebugLoc();
   switch (N->getOpcode()) {
   default:
@@ -4677,8 +4684,7 @@ PPCTargetLowering::EmitPartwordAtomicBinary(MachineInstr *MI,
 
 MachineBasicBlock *
 PPCTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
-                                               MachineBasicBlock *BB,
-                   DenseMap<MachineBasicBlock*, MachineBasicBlock*> *EM) const {
+                                               MachineBasicBlock *BB) const {
   const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
 
   // To "insert" these instructions we actually have to insert their
@@ -4716,12 +4722,9 @@ PPCTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
     F->insert(It, sinkMBB);
     // Update machine-CFG edges by first adding all successors of the current
     // block to the new block which will contain the Phi node for the select.
-    // Also inform sdisel of the edge changes.
     for (MachineBasicBlock::succ_iterator I = BB->succ_begin(), 
-           E = BB->succ_end(); I != E; ++I) {
-      EM->insert(std::make_pair(*I, sinkMBB));
+           E = BB->succ_end(); I != E; ++I)
       sinkMBB->addSuccessor(*I);
-    }
     // Next, remove all successors of the current block, and add the true
     // and fallthrough blocks as its successors.
     while (!BB->succ_empty())
@@ -5032,7 +5035,7 @@ PPCTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
 
 SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N,
                                              DAGCombinerInfo &DCI) const {
-  TargetMachine &TM = getTargetMachine();
+  const TargetMachine &TM = getTargetMachine();
   SelectionDAG &DAG = DCI.DAG;
   DebugLoc dl = N->getDebugLoc();
   switch (N->getOpcode()) {
@@ -5491,46 +5494,59 @@ bool PPCTargetLowering::isLegalAddressImmediate(llvm::GlobalValue* GV) const {
   return false;
 }
 
-SDValue PPCTargetLowering::LowerRETURNADDR(SDValue Op, SelectionDAG &DAG) {
+SDValue PPCTargetLowering::LowerRETURNADDR(SDValue Op,
+                                           SelectionDAG &DAG) const {
   DebugLoc dl = Op.getDebugLoc();
-  // Depths > 0 not supported yet!
-  if (cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue() > 0)
-    return SDValue();
+  unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
 
+  // Make sure the function does not optimize away the store of the RA to
+  // the stack.
   MachineFunction &MF = DAG.getMachineFunction();
   PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
+  FuncInfo->setLRStoreRequired();
+  bool isPPC64 = PPCSubTarget.isPPC64();
+  bool isDarwinABI = PPCSubTarget.isDarwinABI();
+
+  if (Depth > 0) {
+    SDValue FrameAddr = LowerFRAMEADDR(Op, DAG);
+    SDValue Offset =
+    
+      DAG.getConstant(PPCFrameInfo::getReturnSaveOffset(isPPC64, isDarwinABI),
+                      isPPC64? MVT::i64 : MVT::i32);
+    return DAG.getLoad(getPointerTy(), dl, DAG.getEntryNode(),
+                       DAG.getNode(ISD::ADD, dl, getPointerTy(),
+                                   FrameAddr, Offset),
+                       NULL, 0, false, false, 0);
+  }
 
   // Just load the return address off the stack.
   SDValue RetAddrFI = getReturnAddrFrameIndex(DAG);
-
-  // Make sure the function really does not optimize away the store of the RA
-  // to the stack.
-  FuncInfo->setLRStoreRequired();
-  return DAG.getLoad(getPointerTy(), dl,
-                     DAG.getEntryNode(), RetAddrFI, NULL, 0,
-                     false, false, 0);
+  return DAG.getLoad(getPointerTy(), dl, DAG.getEntryNode(),
+                     RetAddrFI, NULL, 0, false, false, 0);
 }
 
-SDValue PPCTargetLowering::LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) {
+SDValue PPCTargetLowering::LowerFRAMEADDR(SDValue Op,
+                                          SelectionDAG &DAG) const {
   DebugLoc dl = Op.getDebugLoc();
-  // Depths > 0 not supported yet!
-  if (cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue() > 0)
-    return SDValue();
+  unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
 
   EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
   bool isPPC64 = PtrVT == MVT::i64;
 
   MachineFunction &MF = DAG.getMachineFunction();
   MachineFrameInfo *MFI = MF.getFrameInfo();
-  bool is31 = (NoFramePointerElim || MFI->hasVarSizedObjects())
-                  && MFI->getStackSize();
-
-  if (isPPC64)
-    return DAG.getCopyFromReg(DAG.getEntryNode(), dl, is31 ? PPC::X31 : PPC::X1,
-      MVT::i64);
-  else
-    return DAG.getCopyFromReg(DAG.getEntryNode(), dl, is31 ? PPC::R31 : PPC::R1,
-      MVT::i32);
+  MFI->setFrameAddressIsTaken(true);
+  bool is31 = (DisableFramePointerElim(MF) || MFI->hasVarSizedObjects()) &&
+                  MFI->getStackSize() &&
+                  !MF.getFunction()->hasFnAttr(Attribute::Naked);
+  unsigned FrameReg = isPPC64 ? (is31 ? PPC::X31 : PPC::X1) :
+                                (is31 ? PPC::R31 : PPC::R1);
+  SDValue FrameAddr = DAG.getCopyFromReg(DAG.getEntryNode(), dl, FrameReg,
+                                         PtrVT);
+  while (Depth--)
+    FrameAddr = DAG.getLoad(Op.getValueType(), dl, DAG.getEntryNode(),
+                            FrameAddr, NULL, 0, false, false, 0);
+  return FrameAddr;
 }
 
 bool
@@ -5547,12 +5563,15 @@ PPCTargetLowering::isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const {
 /// probably because the source does not need to be loaded. If
 /// 'NonScalarIntSafe' is true, that means it's safe to return a
 /// non-scalar-integer type, e.g. empty string source, constant, or loaded
-/// from memory. It returns EVT::Other if SelectionDAG should be responsible
-/// for determining it.
+/// from memory. 'MemcpyStrSrc' indicates whether the memcpy source is
+/// constant so it does not need to be loaded.
+/// It returns EVT::Other if the type should be determined using generic
+/// target-independent logic.
 EVT PPCTargetLowering::getOptimalMemOpType(uint64_t Size,
                                            unsigned DstAlign, unsigned SrcAlign,
                                            bool NonScalarIntSafe,
-                                           SelectionDAG &DAG) const {
+                                           bool MemcpyStrSrc,
+                                           MachineFunction &MF) const {
   if (this->PPCSubTarget.isPPC64()) {
     return MVT::i64;
   } else {
diff --git a/lib/Target/PowerPC/PPCISelLowering.h b/lib/Target/PowerPC/PPCISelLowering.h
index f816bddaf5a3..1d05f3d4ea69 100644
--- a/lib/Target/PowerPC/PPCISelLowering.h
+++ b/lib/Target/PowerPC/PPCISelLowering.h
@@ -233,14 +233,8 @@ namespace llvm {
   }
   
   class PPCTargetLowering : public TargetLowering {
-    int VarArgsFrameIndex;            // FrameIndex for start of varargs area.
-    int VarArgsStackOffset;           // StackOffset for start of stack
-                                      // arguments.
-    unsigned VarArgsNumGPR;           // Index of the first unused integer
-                                      // register for parameter passing.
-    unsigned VarArgsNumFPR;           // Index of the first unused double
-                                      // register for parameter passing.
     const PPCSubtarget &PPCSubTarget;
+
   public:
     explicit PPCTargetLowering(PPCTargetMachine &TM);
     
@@ -285,13 +279,13 @@ namespace llvm {
     
     /// LowerOperation - Provide custom lowering hooks for some operations.
     ///
-    virtual SDValue LowerOperation(SDValue Op, SelectionDAG &DAG);
+    virtual SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const;
 
     /// ReplaceNodeResults - Replace the results of node with an illegal result
     /// type with new values built out of custom code.
     ///
     virtual void ReplaceNodeResults(SDNode *N, SmallVectorImpl<SDValue>&Results,
-                                    SelectionDAG &DAG);
+                                    SelectionDAG &DAG) const;
 
     virtual SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const;
     
@@ -302,9 +296,9 @@ namespace llvm {
                                                 const SelectionDAG &DAG,
                                                 unsigned Depth = 0) const;
 
-    virtual MachineBasicBlock *EmitInstrWithCustomInserter(MachineInstr *MI,
-                                                         MachineBasicBlock *MBB,
-                    DenseMap<MachineBasicBlock*, MachineBasicBlock*> *EM) const;
+    virtual MachineBasicBlock *
+      EmitInstrWithCustomInserter(MachineInstr *MI,
+                                  MachineBasicBlock *MBB) const;
     MachineBasicBlock *EmitAtomicBinary(MachineInstr *MI, 
                                         MachineBasicBlock *MBB, bool is64Bit,
                                         unsigned BinOpcode) const;
@@ -355,12 +349,14 @@ namespace llvm {
     /// probably because the source does not need to be loaded. If
     /// 'NonScalarIntSafe' is true, that means it's safe to return a
     /// non-scalar-integer type, e.g. empty string source, constant, or loaded
-    /// from memory. It returns EVT::Other if SelectionDAG should be responsible
-    /// for determining it.
+    /// from memory. 'MemcpyStrSrc' indicates whether the memcpy source is
+    /// constant so it does not need to be loaded.
+    /// It returns EVT::Other if the type should be determined using generic
+    /// target-independent logic.
     virtual EVT
-    getOptimalMemOpType(uint64_t Size,
-                        unsigned DstAlign, unsigned SrcAlign,
-                        bool NonScalarIntSafe, SelectionDAG &DAG) const;
+    getOptimalMemOpType(uint64_t Size, unsigned DstAlign, unsigned SrcAlign,
+                        bool NonScalarIntSafe, bool MemcpyStrSrc,
+                        MachineFunction &MF) const;
 
     /// getFunctionAlignment - Return the Log2 alignment of this function.
     virtual unsigned getFunctionAlignment(const Function *F) const;
@@ -382,46 +378,43 @@ namespace llvm {
                                          SDValue &LROpOut,
                                          SDValue &FPOpOut,
                                          bool isDarwinABI,
-                                         DebugLoc dl);
-
-    SDValue LowerRETURNADDR(SDValue Op, SelectionDAG &DAG);
-    SDValue LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG);
-    SDValue LowerConstantPool(SDValue Op, SelectionDAG &DAG);
-    SDValue LowerBlockAddress(SDValue Op, SelectionDAG &DAG);
-    SDValue LowerGlobalAddress(SDValue Op, SelectionDAG &DAG);
-    SDValue LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG);
-    SDValue LowerJumpTable(SDValue Op, SelectionDAG &DAG);
-    SDValue LowerSETCC(SDValue Op, SelectionDAG &DAG);
-    SDValue LowerTRAMPOLINE(SDValue Op, SelectionDAG &DAG);
+                                         DebugLoc dl) const;
+
+    SDValue LowerRETURNADDR(SDValue Op, SelectionDAG &DAG) const;
+    SDValue LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const;
+    SDValue LowerConstantPool(SDValue Op, SelectionDAG &DAG) const;
+    SDValue LowerBlockAddress(SDValue Op, SelectionDAG &DAG) const;
+    SDValue LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const;
+    SDValue LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const;
+    SDValue LowerJumpTable(SDValue Op, SelectionDAG &DAG) const;
+    SDValue LowerSETCC(SDValue Op, SelectionDAG &DAG) const;
+    SDValue LowerTRAMPOLINE(SDValue Op, SelectionDAG &DAG) const;
     SDValue LowerVASTART(SDValue Op, SelectionDAG &DAG,
-                           int VarArgsFrameIndex, int VarArgsStackOffset,
-                           unsigned VarArgsNumGPR, unsigned VarArgsNumFPR,
-                           const PPCSubtarget &Subtarget);
-    SDValue LowerVAARG(SDValue Op, SelectionDAG &DAG, int VarArgsFrameIndex,
-                         int VarArgsStackOffset, unsigned VarArgsNumGPR,
-                         unsigned VarArgsNumFPR, const PPCSubtarget &Subtarget);
+                         const PPCSubtarget &Subtarget) const;
+    SDValue LowerVAARG(SDValue Op, SelectionDAG &DAG,
+                       const PPCSubtarget &Subtarget) const;
     SDValue LowerSTACKRESTORE(SDValue Op, SelectionDAG &DAG,
-                                const PPCSubtarget &Subtarget);
+                                const PPCSubtarget &Subtarget) const;
     SDValue LowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG,
-                                      const PPCSubtarget &Subtarget);
-    SDValue LowerSELECT_CC(SDValue Op, SelectionDAG &DAG);
-    SDValue LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG, DebugLoc dl);
-    SDValue LowerSINT_TO_FP(SDValue Op, SelectionDAG &DAG);
-    SDValue LowerFLT_ROUNDS_(SDValue Op, SelectionDAG &DAG);
-    SDValue LowerSHL_PARTS(SDValue Op, SelectionDAG &DAG);
-    SDValue LowerSRL_PARTS(SDValue Op, SelectionDAG &DAG);
-    SDValue LowerSRA_PARTS(SDValue Op, SelectionDAG &DAG);
-    SDValue LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG);
-    SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG);
-    SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG);
-    SDValue LowerSCALAR_TO_VECTOR(SDValue Op, SelectionDAG &DAG);
-    SDValue LowerMUL(SDValue Op, SelectionDAG &DAG);
+                                      const PPCSubtarget &Subtarget) const;
+    SDValue LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const;
+    SDValue LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG, DebugLoc dl) const;
+    SDValue LowerSINT_TO_FP(SDValue Op, SelectionDAG &DAG) const;
+    SDValue LowerFLT_ROUNDS_(SDValue Op, SelectionDAG &DAG) const;
+    SDValue LowerSHL_PARTS(SDValue Op, SelectionDAG &DAG) const;
+    SDValue LowerSRL_PARTS(SDValue Op, SelectionDAG &DAG) const;
+    SDValue LowerSRA_PARTS(SDValue Op, SelectionDAG &DAG) const;
+    SDValue LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const;
+    SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const;
+    SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const;
+    SDValue LowerSCALAR_TO_VECTOR(SDValue Op, SelectionDAG &DAG) const;
+    SDValue LowerMUL(SDValue Op, SelectionDAG &DAG) const;
 
     SDValue LowerCallResult(SDValue Chain, SDValue InFlag,
                             CallingConv::ID CallConv, bool isVarArg,
                             const SmallVectorImpl<ISD::InputArg> &Ins,
                             DebugLoc dl, SelectionDAG &DAG,
-                            SmallVectorImpl<SDValue> &InVals);
+                            SmallVectorImpl<SDValue> &InVals) const;
     SDValue FinishCall(CallingConv::ID CallConv, DebugLoc dl, bool isTailCall,
                        bool isVarArg,
                        SelectionDAG &DAG,
@@ -431,14 +424,14 @@ namespace llvm {
                        SDValue &Callee,
                        int SPDiff, unsigned NumBytes,
                        const SmallVectorImpl<ISD::InputArg> &Ins,
-                       SmallVectorImpl<SDValue> &InVals);
+                       SmallVectorImpl<SDValue> &InVals) const;
 
     virtual SDValue
       LowerFormalArguments(SDValue Chain,
                            CallingConv::ID CallConv, bool isVarArg,
                            const SmallVectorImpl<ISD::InputArg> &Ins,
                            DebugLoc dl, SelectionDAG &DAG,
-                           SmallVectorImpl<SDValue> &InVals);
+                           SmallVectorImpl<SDValue> &InVals) const;
 
     virtual SDValue
       LowerCall(SDValue Chain, SDValue Callee,
@@ -446,26 +439,26 @@ namespace llvm {
                 const SmallVectorImpl<ISD::OutputArg> &Outs,
                 const SmallVectorImpl<ISD::InputArg> &Ins,
                 DebugLoc dl, SelectionDAG &DAG,
-                SmallVectorImpl<SDValue> &InVals);
+                SmallVectorImpl<SDValue> &InVals) const;
 
     virtual SDValue
       LowerReturn(SDValue Chain,
                   CallingConv::ID CallConv, bool isVarArg,
                   const SmallVectorImpl<ISD::OutputArg> &Outs,
-                  DebugLoc dl, SelectionDAG &DAG);
+                  DebugLoc dl, SelectionDAG &DAG) const;
 
     SDValue
       LowerFormalArguments_Darwin(SDValue Chain,
                                   CallingConv::ID CallConv, bool isVarArg,
                                   const SmallVectorImpl<ISD::InputArg> &Ins,
                                   DebugLoc dl, SelectionDAG &DAG,
-                                  SmallVectorImpl<SDValue> &InVals);
+                                  SmallVectorImpl<SDValue> &InVals) const;
     SDValue
       LowerFormalArguments_SVR4(SDValue Chain,
                                 CallingConv::ID CallConv, bool isVarArg,
                                 const SmallVectorImpl<ISD::InputArg> &Ins,
                                 DebugLoc dl, SelectionDAG &DAG,
-                                SmallVectorImpl<SDValue> &InVals);
+                                SmallVectorImpl<SDValue> &InVals) const;
 
     SDValue
       LowerCall_Darwin(SDValue Chain, SDValue Callee,
@@ -473,14 +466,14 @@ namespace llvm {
                        const SmallVectorImpl<ISD::OutputArg> &Outs,
                        const SmallVectorImpl<ISD::InputArg> &Ins,
                        DebugLoc dl, SelectionDAG &DAG,
-                       SmallVectorImpl<SDValue> &InVals);
+                       SmallVectorImpl<SDValue> &InVals) const;
     SDValue
       LowerCall_SVR4(SDValue Chain, SDValue Callee,
                      CallingConv::ID CallConv, bool isVarArg, bool isTailCall,
                      const SmallVectorImpl<ISD::OutputArg> &Outs,
                      const SmallVectorImpl<ISD::InputArg> &Ins,
                      DebugLoc dl, SelectionDAG &DAG,
-                     SmallVectorImpl<SDValue> &InVals);
+                     SmallVectorImpl<SDValue> &InVals) const;
   };
 }
 
diff --git a/lib/Target/PowerPC/PPCInstrInfo.cpp b/lib/Target/PowerPC/PPCInstrInfo.cpp
index 6b0a282af09c..ae1fbd8220a7 100644
--- a/lib/Target/PowerPC/PPCInstrInfo.cpp
+++ b/lib/Target/PowerPC/PPCInstrInfo.cpp
@@ -24,10 +24,13 @@
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/raw_ostream.h"
 #include "llvm/MC/MCAsmInfo.h"
-using namespace llvm;
 
+namespace llvm {
 extern cl::opt<bool> EnablePPC32RS;  // FIXME (64-bit): See PPCRegisterInfo.cpp.
 extern cl::opt<bool> EnablePPC64RS;  // FIXME (64-bit): See PPCRegisterInfo.cpp.
+}
+
+using namespace llvm;
 
 PPCInstrInfo::PPCInstrInfo(PPCTargetMachine &tm)
   : TargetInstrInfoImpl(PPCInsts, array_lengthof(PPCInsts)), TM(tm),
@@ -642,6 +645,16 @@ PPCInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB,
     MBB.insert(MI, NewMIs[i]);
 }
 
+MachineInstr*
+PPCInstrInfo::emitFrameIndexDebugValue(MachineFunction &MF,
+                                       int FrameIx, uint64_t Offset,
+                                       const MDNode *MDPtr,
+                                       DebugLoc DL) const {
+  MachineInstrBuilder MIB = BuildMI(MF, DL, get(PPC::DBG_VALUE));
+  addFrameReference(MIB, FrameIx, 0, false).addImm(Offset).addMetadata(MDPtr);
+  return &*MIB;
+}
+
 /// foldMemoryOperand - PowerPC (like most RISC's) can only fold spills into
 /// copy instructions, turning them into load/store instructions.
 MachineInstr *PPCInstrInfo::foldMemoryOperandImpl(MachineFunction &MF,
@@ -778,6 +791,7 @@ unsigned PPCInstrInfo::GetInstSizeInBytes(const MachineInstr *MI) const {
   case PPC::DBG_LABEL:
   case PPC::EH_LABEL:
   case PPC::GC_LABEL:
+  case PPC::DBG_VALUE:
     return 0;
   default:
     return 4; // PowerPC instructions are all 4 bytes
diff --git a/lib/Target/PowerPC/PPCInstrInfo.h b/lib/Target/PowerPC/PPCInstrInfo.h
index 57facac94354..9fb6e7dce1f7 100644
--- a/lib/Target/PowerPC/PPCInstrInfo.h
+++ b/lib/Target/PowerPC/PPCInstrInfo.h
@@ -126,6 +126,12 @@ public:
                                     unsigned DestReg, int FrameIndex,
                                     const TargetRegisterClass *RC) const;
   
+  virtual MachineInstr *emitFrameIndexDebugValue(MachineFunction &MF,
+                                                 int FrameIx,
+                                                 uint64_t Offset,
+                                                 const MDNode *MDPtr,
+                                                 DebugLoc DL) const;
+
   /// foldMemoryOperand - PowerPC (like most RISC's) can only fold spills into
   /// copy instructions, turning them into load/store instructions.
   virtual MachineInstr* foldMemoryOperandImpl(MachineFunction &MF,
diff --git a/lib/Target/PowerPC/PPCMachineFunctionInfo.h b/lib/Target/PowerPC/PPCMachineFunctionInfo.h
index b359dd33bd0e..e2649c8b380f 100644
--- a/lib/Target/PowerPC/PPCMachineFunctionInfo.h
+++ b/lib/Target/PowerPC/PPCMachineFunctionInfo.h
@@ -58,6 +58,18 @@ private:
   /// how the caller's stack pointer should be calculated (epilog/dynamicalloc).
   bool HasFastCall;
 
+  /// VarArgsFrameIndex - FrameIndex for start of varargs area.
+  int VarArgsFrameIndex;
+  /// VarArgsStackOffset - StackOffset for start of stack
+  /// arguments.
+  int VarArgsStackOffset;
+  /// VarArgsNumGPR - Index of the first unused integer
+  /// register for parameter passing.
+  unsigned VarArgsNumGPR;
+  /// VarArgsNumFPR - Index of the first unused double
+  /// register for parameter passing.
+  unsigned VarArgsNumFPR;
+
 public:
   explicit PPCFunctionInfo(MachineFunction &MF) 
     : FramePointerSaveIndex(0),
@@ -66,7 +78,11 @@ public:
       LRStoreRequired(false),
       MinReservedArea(0),
       TailCallSPDelta(0),
-      HasFastCall(false) {}
+      HasFastCall(false),
+      VarArgsFrameIndex(0),
+      VarArgsStackOffset(0),
+      VarArgsNumGPR(0),
+      VarArgsNumFPR(0) {}
 
   int getFramePointerSaveIndex() const { return FramePointerSaveIndex; }
   void setFramePointerSaveIndex(int Idx) { FramePointerSaveIndex = Idx; }
@@ -96,6 +112,18 @@ public:
 
   void setHasFastCall() { HasFastCall = true; }
   bool hasFastCall() const { return HasFastCall;}
+
+  int getVarArgsFrameIndex() const { return VarArgsFrameIndex; }
+  void setVarArgsFrameIndex(int Index) { VarArgsFrameIndex = Index; }
+
+  int getVarArgsStackOffset() const { return VarArgsStackOffset; }
+  void setVarArgsStackOffset(int Offset) { VarArgsStackOffset = Offset; }
+
+  unsigned getVarArgsNumGPR() const { return VarArgsNumGPR; }
+  void setVarArgsNumGPR(unsigned Num) { VarArgsNumGPR = Num; }
+
+  unsigned getVarArgsNumFPR() const { return VarArgsNumFPR; }
+  void setVarArgsNumFPR(unsigned Num) { VarArgsNumFPR = Num; }
 };
 
 } // end of namespace llvm
diff --git a/lib/Target/PowerPC/PPCRegisterInfo.cpp b/lib/Target/PowerPC/PPCRegisterInfo.cpp
index 52d87cde803f..5f1e04eecb4a 100644
--- a/lib/Target/PowerPC/PPCRegisterInfo.cpp
+++ b/lib/Target/PowerPC/PPCRegisterInfo.cpp
@@ -43,7 +43,6 @@
 #include "llvm/ADT/BitVector.h"
 #include "llvm/ADT/STLExtras.h"
 #include <cstdlib>
-using namespace llvm;
 
 // FIXME This disables some code that aligns the stack to a boundary
 // bigger than the default (16 bytes on Darwin) when there is a stack local
@@ -56,14 +55,19 @@ using namespace llvm;
 #define ALIGN_STACK 0
 
 // FIXME (64-bit): Eventually enable by default.
+namespace llvm {
 cl::opt<bool> EnablePPC32RS("enable-ppc32-regscavenger",
-                            cl::init(false),
-                            cl::desc("Enable PPC32 register scavenger"),
-                            cl::Hidden);
+                                   cl::init(false),
+                                   cl::desc("Enable PPC32 register scavenger"),
+                                   cl::Hidden);
 cl::opt<bool> EnablePPC64RS("enable-ppc64-regscavenger",
-                            cl::init(false),
-                            cl::desc("Enable PPC64 register scavenger"),
-                            cl::Hidden);
+                                   cl::init(false),
+                                   cl::desc("Enable PPC64 register scavenger"),
+                                   cl::Hidden);
+}
+
+using namespace llvm;
+
 #define EnableRegisterScavenging \
   ((EnablePPC32RS && !Subtarget.isPPC64()) || \
    (EnablePPC64RS && Subtarget.isPPC64()))
@@ -405,7 +409,10 @@ PPCRegisterInfo::getCalleeSavedRegClasses(const MachineFunction *MF) const {
 //
 static bool needsFP(const MachineFunction &MF) {
   const MachineFrameInfo *MFI = MF.getFrameInfo();
-  return NoFramePointerElim || MFI->hasVarSizedObjects() ||
+  // Naked functions have no stack frame pushed, so we don't have a frame pointer.
+  if (MF.getFunction()->hasFnAttr(Attribute::Naked))
+    return false;
+  return DisableFramePointerElim(MF) || MFI->hasVarSizedObjects() ||
     (GuaranteedTailCallOpt && MF.getInfo<PPCFunctionInfo>()->hasFastCall());
 }
 
@@ -790,7 +797,10 @@ PPCRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
   // If we're not using a Frame Pointer that has been set to the value of the
   // SP before having the stack size subtracted from it, then add the stack size
   // to Offset to get the correct offset.
-  Offset += MFI->getStackSize();
+  // Naked functions have stack size 0, although getStackSize may not reflect that
+  // because we didn't call all the pieces that compute it for naked functions.
+  if (!MF.getFunction()->hasFnAttr(Attribute::Naked))
+    Offset += MFI->getStackSize();
 
   // If we can, encode the offset directly into the instruction.  If this is a
   // normal PPC "ri" instruction, any 16-bit value can be safely encoded.  If
diff --git a/lib/Target/PowerPC/PPCSchedule.td b/lib/Target/PowerPC/PPCSchedule.td
index d589414c0154..9664f1457171 100644
--- a/lib/Target/PowerPC/PPCSchedule.td
+++ b/lib/Target/PowerPC/PPCSchedule.td
@@ -15,8 +15,6 @@ def SLU    : FuncUnit; // Store/load unit
 def SRU    : FuncUnit; // special register unit
 def IU1    : FuncUnit; // integer unit 1 (simple)
 def IU2    : FuncUnit; // integer unit 2 (complex)
-def IU3    : FuncUnit; // integer unit 3 (7450 simple)
-def IU4    : FuncUnit; // integer unit 4 (7450 simple)
 def FPU1   : FuncUnit; // floating point unit 1
 def FPU2   : FuncUnit; // floating point unit 2
 def VPU    : FuncUnit; // vector permutation unit
@@ -24,7 +22,6 @@ def VIU1   : FuncUnit; // vector integer unit 1 (simple)
 def VIU2   : FuncUnit; // vector integer unit 2 (complex)
 def VFPU   : FuncUnit; // vector floating point unit
 
-
 //===----------------------------------------------------------------------===//
 // Instruction Itinerary classes used for PowerPC
 //
diff --git a/lib/Target/PowerPC/PPCScheduleG3.td b/lib/Target/PowerPC/PPCScheduleG3.td
index f72194d6de0e..73447631b2ad 100644
--- a/lib/Target/PowerPC/PPCScheduleG3.td
+++ b/lib/Target/PowerPC/PPCScheduleG3.td
@@ -12,7 +12,8 @@
 //===----------------------------------------------------------------------===//
 
 
-def G3Itineraries : ProcessorItineraries<[
+def G3Itineraries : ProcessorItineraries<
+  [IU1, IU2, FPU1, BPU, SRU, SLU], [
   InstrItinData<IntGeneral  , [InstrStage<1, [IU1, IU2]>]>,
   InstrItinData<IntCompare  , [InstrStage<1, [IU1, IU2]>]>,
   InstrItinData<IntDivW     , [InstrStage<19, [IU1]>]>,
diff --git a/lib/Target/PowerPC/PPCScheduleG4.td b/lib/Target/PowerPC/PPCScheduleG4.td
index 92ed20f17ce5..7efc693fa8c9 100644
--- a/lib/Target/PowerPC/PPCScheduleG4.td
+++ b/lib/Target/PowerPC/PPCScheduleG4.td
@@ -11,7 +11,8 @@
 //
 //===----------------------------------------------------------------------===//
 
-def G4Itineraries : ProcessorItineraries<[
+def G4Itineraries : ProcessorItineraries<
+  [IU1, IU2, SLU, SRU, BPU, FPU1, VIU1, VIU2, VPU, VFPU], [
   InstrItinData<IntGeneral  , [InstrStage<1, [IU1, IU2]>]>,
   InstrItinData<IntCompare  , [InstrStage<1, [IU1, IU2]>]>,
   InstrItinData<IntDivW     , [InstrStage<19, [IU1]>]>,
diff --git a/lib/Target/PowerPC/PPCScheduleG4Plus.td b/lib/Target/PowerPC/PPCScheduleG4Plus.td
index 7474ba494d10..15056c0cfe44 100644
--- a/lib/Target/PowerPC/PPCScheduleG4Plus.td
+++ b/lib/Target/PowerPC/PPCScheduleG4Plus.td
@@ -11,7 +11,11 @@
 //
 //===----------------------------------------------------------------------===//
 
-def G4PlusItineraries : ProcessorItineraries<[
+def IU3    : FuncUnit; // integer unit 3 (7450 simple)
+def IU4    : FuncUnit; // integer unit 4 (7450 simple)
+
+def G4PlusItineraries : ProcessorItineraries<
+  [IU1, IU2, IU3, IU4, BPU, SLU, FPU1, VFPU, VIU1, VIU2, VPU], [
   InstrItinData<IntGeneral  , [InstrStage<1, [IU1, IU2, IU3, IU4]>]>,
   InstrItinData<IntCompare  , [InstrStage<1, [IU1, IU2, IU3, IU4]>]>,
   InstrItinData<IntDivW     , [InstrStage<23, [IU2]>]>,
diff --git a/lib/Target/PowerPC/PPCScheduleG5.td b/lib/Target/PowerPC/PPCScheduleG5.td
index d28214715a76..2dffc48b238f 100644
--- a/lib/Target/PowerPC/PPCScheduleG5.td
+++ b/lib/Target/PowerPC/PPCScheduleG5.td
@@ -11,7 +11,8 @@
 //
 //===----------------------------------------------------------------------===//
 
-def G5Itineraries : ProcessorItineraries<[
+def G5Itineraries : ProcessorItineraries<
+  [IU1, IU2, SLU, BPU, FPU1, FPU2, VFPU, VIU1, VIU2, VPU], [
   InstrItinData<IntGeneral  , [InstrStage<2, [IU1, IU2]>]>,
   InstrItinData<IntCompare  , [InstrStage<3, [IU1, IU2]>]>,
   InstrItinData<IntDivD     , [InstrStage<68, [IU1]>]>,
diff --git a/lib/Target/PowerPC/PPCSelectionDAGInfo.cpp b/lib/Target/PowerPC/PPCSelectionDAGInfo.cpp
new file mode 100644
index 000000000000..c0004a9b4197
--- /dev/null
+++ b/lib/Target/PowerPC/PPCSelectionDAGInfo.cpp
@@ -0,0 +1,22 @@
+//===-- PPCSelectionDAGInfo.cpp - PowerPC SelectionDAG Info ---------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the PPCSelectionDAGInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "powerpc-selectiondag-info"
+#include "PPCSelectionDAGInfo.h"
+using namespace llvm;
+
+PPCSelectionDAGInfo::PPCSelectionDAGInfo() {
+}
+
+PPCSelectionDAGInfo::~PPCSelectionDAGInfo() {
+}
diff --git a/lib/Target/PowerPC/PPCSelectionDAGInfo.h b/lib/Target/PowerPC/PPCSelectionDAGInfo.h
new file mode 100644
index 000000000000..3ad341844082
--- /dev/null
+++ b/lib/Target/PowerPC/PPCSelectionDAGInfo.h
@@ -0,0 +1,29 @@
+//===-- PPCSelectionDAGInfo.h - PowerPC SelectionDAG Info -------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the PowerPC subclass for TargetSelectionDAGInfo.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef POWERPCCSELECTIONDAGINFO_H
+#define POWERPCCSELECTIONDAGINFO_H
+
+#include "llvm/Target/TargetSelectionDAGInfo.h"
+
+namespace llvm {
+
+class PPCSelectionDAGInfo : public TargetSelectionDAGInfo {
+public:
+  PPCSelectionDAGInfo();
+  ~PPCSelectionDAGInfo();
+};
+
+}
+
+#endif
diff --git a/lib/Target/PowerPC/PPCTargetMachine.h b/lib/Target/PowerPC/PPCTargetMachine.h
index ac9ae2b43fd4..35e33a234c43 100644
--- a/lib/Target/PowerPC/PPCTargetMachine.h
+++ b/lib/Target/PowerPC/PPCTargetMachine.h
@@ -44,8 +44,8 @@ public:
   virtual const PPCInstrInfo     *getInstrInfo() const { return &InstrInfo; }
   virtual const PPCFrameInfo     *getFrameInfo() const { return &FrameInfo; }
   virtual       PPCJITInfo       *getJITInfo()         { return &JITInfo; }
-  virtual       PPCTargetLowering *getTargetLowering() const { 
-   return const_cast<PPCTargetLowering*>(&TLInfo); 
+  virtual const PPCTargetLowering *getTargetLowering() const { 
+   return &TLInfo;
   }
   virtual const PPCRegisterInfo  *getRegisterInfo() const {
     return &InstrInfo.getRegisterInfo();
diff --git a/lib/Target/README.txt b/lib/Target/README.txt
index 052a5756da9f..144bf5d3e3dc 100644
--- a/lib/Target/README.txt
+++ b/lib/Target/README.txt
@@ -263,19 +263,6 @@ if anyone cared enough about sincos.
 
 //===---------------------------------------------------------------------===//
 
-Turn this into a single byte store with no load (the other 3 bytes are
-unmodified):
-
-define void @test(i32* %P) {
-	%tmp = load i32* %P
-        %tmp14 = or i32 %tmp, 3305111552
-        %tmp15 = and i32 %tmp14, 3321888767
-        store i32 %tmp15, i32* %P
-        ret void
-}
-
-//===---------------------------------------------------------------------===//
-
 quantum_sigma_x in 462.libquantum contains the following loop:
 
       for(i=0; i<reg->size; i++)
@@ -1843,3 +1830,21 @@ entry:
 
 //===---------------------------------------------------------------------===//
 
+We should use DSE + llvm.lifetime.end to delete dead vtable pointer updates.
+See GCC PR34949
+
+//===---------------------------------------------------------------------===//
+
+In this code:
+
+long foo(long x) {
+  return x > 1 ? x : 1;
+}
+
+LLVM emits a comparison with 1 instead of 0. 0 would be equivalent
+and cheaper on most targets.
+
+LLVM prefers comparisons with zero over non-zero in general, but in this
+case it choses instead to keep the max operation obvious.
+
+//===---------------------------------------------------------------------===//
diff --git a/lib/Target/Sparc/AsmPrinter/CMakeLists.txt b/lib/Target/Sparc/AsmPrinter/CMakeLists.txt
index e3ca18e3b1b1..da629f6e63fb 100644
--- a/lib/Target/Sparc/AsmPrinter/CMakeLists.txt
+++ b/lib/Target/Sparc/AsmPrinter/CMakeLists.txt
@@ -3,4 +3,4 @@ include_directories( ${CMAKE_CURRENT_BINARY_DIR}/.. ${CMAKE_CURRENT_SOURCE_DIR}/
 add_llvm_library(LLVMSparcAsmPrinter
   SparcAsmPrinter.cpp
   )
-add_dependencies(LLVMSparcAsmPrinter SparcCodeGenTable_gen)
-\ No newline at end of file
+add_dependencies(LLVMSparcAsmPrinter SparcCodeGenTable_gen)
diff --git a/lib/Target/Sparc/CMakeLists.txt b/lib/Target/Sparc/CMakeLists.txt
index 74f320a00035..684cadfb57f7 100644
--- a/lib/Target/Sparc/CMakeLists.txt
+++ b/lib/Target/Sparc/CMakeLists.txt
@@ -20,6 +20,7 @@ add_llvm_target(SparcCodeGen
   SparcRegisterInfo.cpp
   SparcSubtarget.cpp
   SparcTargetMachine.cpp
+  SparcSelectionDAGInfo.cpp
   )
 
 target_link_libraries (LLVMSparcCodeGen LLVMSelectionDAG)
diff --git a/lib/Target/Sparc/SparcISelDAGToDAG.cpp b/lib/Target/Sparc/SparcISelDAGToDAG.cpp
index a7d1805e2a36..698923e3c9e0 100644
--- a/lib/Target/Sparc/SparcISelDAGToDAG.cpp
+++ b/lib/Target/Sparc/SparcISelDAGToDAG.cpp
@@ -11,7 +11,6 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "SparcISelLowering.h"
 #include "SparcTargetMachine.h"
 #include "llvm/Intrinsics.h"
 #include "llvm/CodeGen/SelectionDAGISel.h"
@@ -68,7 +67,6 @@ private:
 }  // end anonymous namespace
 
 SDNode* SparcDAGToDAGISel::getGlobalBaseReg() {
-  MachineFunction *MF = BB->getParent();
   unsigned GlobalBaseReg = TM.getInstrInfo()->getGlobalBaseReg(MF);
   return CurDAG->getRegister(GlobalBaseReg, TLI.getPointerTy()).getNode();
 }
diff --git a/lib/Target/Sparc/SparcISelLowering.cpp b/lib/Target/Sparc/SparcISelLowering.cpp
index 4e93ef05a1cc..f47e53acbfc1 100644
--- a/lib/Target/Sparc/SparcISelLowering.cpp
+++ b/lib/Target/Sparc/SparcISelLowering.cpp
@@ -14,6 +14,7 @@
 
 #include "SparcISelLowering.h"
 #include "SparcTargetMachine.h"
+#include "SparcMachineFunctionInfo.h"
 #include "llvm/Function.h"
 #include "llvm/CodeGen/CallingConvLower.h"
 #include "llvm/CodeGen/MachineFrameInfo.h"
@@ -37,7 +38,7 @@ SDValue
 SparcTargetLowering::LowerReturn(SDValue Chain,
                                  CallingConv::ID CallConv, bool isVarArg,
                                  const SmallVectorImpl<ISD::OutputArg> &Outs,
-                                 DebugLoc dl, SelectionDAG &DAG) {
+                                 DebugLoc dl, SelectionDAG &DAG) const {
 
   // CCValAssign - represent the assignment of the return value to locations.
   SmallVector<CCValAssign, 16> RVLocs;
@@ -85,10 +86,12 @@ SparcTargetLowering::LowerFormalArguments(SDValue Chain,
                                           const SmallVectorImpl<ISD::InputArg>
                                             &Ins,
                                           DebugLoc dl, SelectionDAG &DAG,
-                                          SmallVectorImpl<SDValue> &InVals) {
+                                          SmallVectorImpl<SDValue> &InVals)
+                                            const {
 
   MachineFunction &MF = DAG.getMachineFunction();
   MachineRegisterInfo &RegInfo = MF.getRegInfo();
+  SparcMachineFunctionInfo *FuncInfo = MF.getInfo<SparcMachineFunctionInfo>();
 
   // Assign locations to all of the incoming arguments.
   SmallVector<CCValAssign, 16> ArgLocs;
@@ -226,7 +229,7 @@ SparcTargetLowering::LowerFormalArguments(SDValue Chain,
   // Store remaining ArgRegs to the stack if this is a varargs function.
   if (isVarArg) {
     // Remember the vararg offset for the va_start implementation.
-    VarArgsFrameOffset = ArgOffset;
+    FuncInfo->setVarArgsFrameOffset(ArgOffset);
 
     std::vector<SDValue> OutChains;
 
@@ -261,7 +264,7 @@ SparcTargetLowering::LowerCall(SDValue Chain, SDValue Callee,
                                const SmallVectorImpl<ISD::OutputArg> &Outs,
                                const SmallVectorImpl<ISD::InputArg> &Ins,
                                DebugLoc dl, SelectionDAG &DAG,
-                               SmallVectorImpl<SDValue> &InVals) {
+                               SmallVectorImpl<SDValue> &InVals) const {
   // Sparc target does not yet support tail call optimization.
   isTailCall = false;
 
@@ -752,8 +755,8 @@ static void LookThroughSetCC(SDValue &LHS, SDValue &RHS,
 }
 
 SDValue SparcTargetLowering::LowerGlobalAddress(SDValue Op, 
-                                                SelectionDAG &DAG) {
-  GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal();
+                                                SelectionDAG &DAG) const {
+  const GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal();
   // FIXME there isn't really any debug info here
   DebugLoc dl = Op.getDebugLoc();
   SDValue GA = DAG.getTargetGlobalAddress(GV, MVT::i32);
@@ -773,11 +776,11 @@ SDValue SparcTargetLowering::LowerGlobalAddress(SDValue Op,
 }
 
 SDValue SparcTargetLowering::LowerConstantPool(SDValue Op,
-                                               SelectionDAG &DAG) {
+                                               SelectionDAG &DAG) const {
   ConstantPoolSDNode *N = cast<ConstantPoolSDNode>(Op);
   // FIXME there isn't really any debug info here
   DebugLoc dl = Op.getDebugLoc();
-  Constant *C = N->getConstVal();
+  const Constant *C = N->getConstVal();
   SDValue CP = DAG.getTargetConstantPool(C, MVT::i32, N->getAlignment());
   SDValue Hi = DAG.getNode(SPISD::Hi, dl, MVT::i32, CP);
   SDValue Lo = DAG.getNode(SPISD::Lo, dl, MVT::i32, CP);
@@ -873,14 +876,18 @@ static SDValue LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) {
 }
 
 static SDValue LowerVASTART(SDValue Op, SelectionDAG &DAG,
-                              SparcTargetLowering &TLI) {
+                            const SparcTargetLowering &TLI) {
+  MachineFunction &MF = DAG.getMachineFunction();
+  SparcMachineFunctionInfo *FuncInfo = MF.getInfo<SparcMachineFunctionInfo>();
+
   // vastart just stores the address of the VarArgsFrameIndex slot into the
   // memory location argument.
   DebugLoc dl = Op.getDebugLoc();
-  SDValue Offset = DAG.getNode(ISD::ADD, dl, MVT::i32,
-                                 DAG.getRegister(SP::I6, MVT::i32),
-                                 DAG.getConstant(TLI.getVarArgsFrameOffset(),
-                                                 MVT::i32));
+  SDValue Offset =
+    DAG.getNode(ISD::ADD, dl, MVT::i32,
+                DAG.getRegister(SP::I6, MVT::i32),
+                DAG.getConstant(FuncInfo->getVarArgsFrameOffset(),
+                                MVT::i32));
   const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
   return DAG.getStore(Op.getOperand(0), dl, Offset, Op.getOperand(1), SV, 0,
                       false, false, 0);
@@ -939,7 +946,7 @@ static SDValue LowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) {
 
 
 SDValue SparcTargetLowering::
-LowerOperation(SDValue Op, SelectionDAG &DAG) {
+LowerOperation(SDValue Op, SelectionDAG &DAG) const {
   switch (Op.getOpcode()) {
   default: llvm_unreachable("Should not custom lower this!");
   // Frame & Return address.  Currently unimplemented
@@ -961,8 +968,7 @@ LowerOperation(SDValue Op, SelectionDAG &DAG) {
 
 MachineBasicBlock *
 SparcTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
-                                                 MachineBasicBlock *BB,
-                   DenseMap<MachineBasicBlock*, MachineBasicBlock*> *EM) const {
+                                                 MachineBasicBlock *BB) const {
   const TargetInstrInfo &TII = *getTargetMachine().getInstrInfo();
   unsigned BROpcode;
   unsigned CC;
@@ -1006,12 +1012,9 @@ SparcTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
   F->insert(It, sinkMBB);
   // Update machine-CFG edges by first adding all successors of the current
   // block to the new block which will contain the Phi node for the select.
-  // Also inform sdisel of the edge changes.
   for (MachineBasicBlock::succ_iterator I = BB->succ_begin(), 
-         E = BB->succ_end(); I != E; ++I) {
-    EM->insert(std::make_pair(*I, sinkMBB));
+         E = BB->succ_end(); I != E; ++I)
     sinkMBB->addSuccessor(*I);
-  }
   // Next, remove all successors of the current block, and add the true
   // and fallthrough blocks as its successors.
   while (!BB->succ_empty())
diff --git a/lib/Target/Sparc/SparcISelLowering.h b/lib/Target/Sparc/SparcISelLowering.h
index 2ee73c1ac909..5ebdcacba57d 100644
--- a/lib/Target/Sparc/SparcISelLowering.h
+++ b/lib/Target/Sparc/SparcISelLowering.h
@@ -41,12 +41,9 @@ namespace llvm {
   }
 
   class SparcTargetLowering : public TargetLowering {
-    int VarArgsFrameOffset;   // Frame offset to start of varargs area.
   public:
     SparcTargetLowering(TargetMachine &TM);
-    virtual SDValue LowerOperation(SDValue Op, SelectionDAG &DAG);
-
-    int getVarArgsFrameOffset() const { return VarArgsFrameOffset; }
+    virtual SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const;
 
     /// computeMaskedBitsForTargetNode - Determine which of the bits specified
     /// in Mask are known to be either zero or one and return them in the
@@ -58,9 +55,9 @@ namespace llvm {
                                                 const SelectionDAG &DAG,
                                                 unsigned Depth = 0) const;
 
-    virtual MachineBasicBlock *EmitInstrWithCustomInserter(MachineInstr *MI,
-                                                         MachineBasicBlock *MBB,
-                    DenseMap<MachineBasicBlock*, MachineBasicBlock*> *EM) const;
+    virtual MachineBasicBlock *
+      EmitInstrWithCustomInserter(MachineInstr *MI,
+                                  MachineBasicBlock *MBB) const;
 
     virtual const char *getTargetNodeName(unsigned Opcode) const;
 
@@ -82,7 +79,7 @@ namespace llvm {
                            bool isVarArg,
                            const SmallVectorImpl<ISD::InputArg> &Ins,
                            DebugLoc dl, SelectionDAG &DAG,
-                           SmallVectorImpl<SDValue> &InVals);
+                           SmallVectorImpl<SDValue> &InVals) const;
 
     virtual SDValue
       LowerCall(SDValue Chain, SDValue Callee,
@@ -91,16 +88,16 @@ namespace llvm {
                 const SmallVectorImpl<ISD::OutputArg> &Outs,
                 const SmallVectorImpl<ISD::InputArg> &Ins,
                 DebugLoc dl, SelectionDAG &DAG,
-                SmallVectorImpl<SDValue> &InVals);
+                SmallVectorImpl<SDValue> &InVals) const;
 
     virtual SDValue
       LowerReturn(SDValue Chain,
                   CallingConv::ID CallConv, bool isVarArg,
                   const SmallVectorImpl<ISD::OutputArg> &Outs,
-                  DebugLoc dl, SelectionDAG &DAG);
+                  DebugLoc dl, SelectionDAG &DAG) const;
 
-    SDValue LowerGlobalAddress(SDValue Op, SelectionDAG &DAG);
-    SDValue LowerConstantPool(SDValue Op, SelectionDAG &DAG);
+    SDValue LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const;
+    SDValue LowerConstantPool(SDValue Op, SelectionDAG &DAG) const;
   };
 } // end namespace llvm
 
diff --git a/lib/Target/Sparc/SparcMachineFunctionInfo.h b/lib/Target/Sparc/SparcMachineFunctionInfo.h
index 56d8708dbe7d..e34c1312810c 100644
--- a/lib/Target/Sparc/SparcMachineFunctionInfo.h
+++ b/lib/Target/Sparc/SparcMachineFunctionInfo.h
@@ -20,12 +20,20 @@ namespace llvm {
   class SparcMachineFunctionInfo : public MachineFunctionInfo {
   private:
     unsigned GlobalBaseReg;
+
+    /// VarArgsFrameOffset - Frame offset to start of varargs area.
+    int VarArgsFrameOffset;
+
   public:
-    SparcMachineFunctionInfo() : GlobalBaseReg(0) {}
-    explicit SparcMachineFunctionInfo(MachineFunction &MF) : GlobalBaseReg(0) {}
+    SparcMachineFunctionInfo() : GlobalBaseReg(0), VarArgsFrameOffset(0) {}
+    explicit SparcMachineFunctionInfo(MachineFunction &MF)
+      : GlobalBaseReg(0), VarArgsFrameOffset(0) {}
 
     unsigned getGlobalBaseReg() const { return GlobalBaseReg; }
     void setGlobalBaseReg(unsigned Reg) { GlobalBaseReg = Reg; }
+
+    int getVarArgsFrameOffset() const { return VarArgsFrameOffset; }
+    void setVarArgsFrameOffset(int Offset) { VarArgsFrameOffset = Offset; }
   };
 }
 
diff --git a/lib/Target/Sparc/SparcSelectionDAGInfo.cpp b/lib/Target/Sparc/SparcSelectionDAGInfo.cpp
new file mode 100644
index 000000000000..4825aa928636
--- /dev/null
+++ b/lib/Target/Sparc/SparcSelectionDAGInfo.cpp
@@ -0,0 +1,22 @@
+//===-- SparcSelectionDAGInfo.cpp - Sparc SelectionDAG Info ---------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the SparcSelectionDAGInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "sparc-selectiondag-info"
+#include "SparcSelectionDAGInfo.h"
+using namespace llvm;
+
+SparcSelectionDAGInfo::SparcSelectionDAGInfo() {
+}
+
+SparcSelectionDAGInfo::~SparcSelectionDAGInfo() {
+}
diff --git a/lib/Target/Sparc/SparcSelectionDAGInfo.h b/lib/Target/Sparc/SparcSelectionDAGInfo.h
new file mode 100644
index 000000000000..bc1b561f6e86
--- /dev/null
+++ b/lib/Target/Sparc/SparcSelectionDAGInfo.h
@@ -0,0 +1,29 @@
+//===-- SparcSelectionDAGInfo.h - Sparc SelectionDAG Info -------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the Sparc subclass for TargetSelectionDAGInfo.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef SPARCSELECTIONDAGINFO_H
+#define SPARCSELECTIONDAGINFO_H
+
+#include "llvm/Target/TargetSelectionDAGInfo.h"
+
+namespace llvm {
+
+class SparcSelectionDAGInfo : public TargetSelectionDAGInfo {
+public:
+  SparcSelectionDAGInfo();
+  ~SparcSelectionDAGInfo();
+};
+
+}
+
+#endif
diff --git a/lib/Target/Sparc/SparcTargetMachine.h b/lib/Target/Sparc/SparcTargetMachine.h
index 5834d08457d7..1367a3112964 100644
--- a/lib/Target/Sparc/SparcTargetMachine.h
+++ b/lib/Target/Sparc/SparcTargetMachine.h
@@ -39,8 +39,8 @@ public:
   virtual const SparcRegisterInfo *getRegisterInfo() const {
     return &InstrInfo.getRegisterInfo();
   }
-  virtual SparcTargetLowering* getTargetLowering() const {
-    return const_cast<SparcTargetLowering*>(&TLInfo);
+  virtual const SparcTargetLowering* getTargetLowering() const {
+    return &TLInfo;
   }
   virtual const TargetData       *getTargetData() const { return &DataLayout; }
 
diff --git a/lib/Target/SystemZ/CMakeLists.txt b/lib/Target/SystemZ/CMakeLists.txt
index 81e51d89ad9f..880e56f0525b 100644
--- a/lib/Target/SystemZ/CMakeLists.txt
+++ b/lib/Target/SystemZ/CMakeLists.txt
@@ -18,6 +18,7 @@ add_llvm_target(SystemZCodeGen
   SystemZRegisterInfo.cpp
   SystemZSubtarget.cpp
   SystemZTargetMachine.cpp
+  SystemZSelectionDAGInfo.cpp
   )
 
 target_link_libraries (LLVMSystemZCodeGen LLVMSelectionDAG)
diff --git a/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp b/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp
index 8152e1dbe1a1..75d563b9c4d5 100644
--- a/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp
+++ b/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp
@@ -12,7 +12,6 @@
 //===----------------------------------------------------------------------===//
 
 #include "SystemZ.h"
-#include "SystemZISelLowering.h"
 #include "SystemZTargetMachine.h"
 #include "llvm/DerivedTypes.h"
 #include "llvm/Function.h"
@@ -85,7 +84,7 @@ namespace {
 ///
 namespace {
   class SystemZDAGToDAGISel : public SelectionDAGISel {
-    SystemZTargetLowering &Lowering;
+    const SystemZTargetLowering &Lowering;
     const SystemZSubtarget &Subtarget;
 
     void getAddressOperandsRI(const SystemZRRIAddressMode &AM,
@@ -588,7 +587,7 @@ bool SystemZDAGToDAGISel::SelectLAAddr(SDNode *Op, SDValue Addr,
 bool SystemZDAGToDAGISel::TryFoldLoad(SDNode *P, SDValue N,
                                  SDValue &Base, SDValue &Disp, SDValue &Index) {
   if (ISD::isNON_EXTLoad(N.getNode()) &&
-      IsLegalToFold(N, P, P))
+      IsLegalToFold(N, P, P, OptLevel))
     return SelectAddrRRI20(P, N.getOperand(1), Base, Disp, Index);
   return false;
 }
diff --git a/lib/Target/SystemZ/SystemZISelLowering.cpp b/lib/Target/SystemZ/SystemZISelLowering.cpp
index 6f4b30faaf60..e98f18b9a31e 100644
--- a/lib/Target/SystemZ/SystemZISelLowering.cpp
+++ b/lib/Target/SystemZ/SystemZISelLowering.cpp
@@ -158,7 +158,8 @@ SystemZTargetLowering::SystemZTargetLowering(SystemZTargetMachine &tm) :
   setTruncStoreAction(MVT::f64, MVT::f32, Expand);
 }
 
-SDValue SystemZTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) {
+SDValue SystemZTargetLowering::LowerOperation(SDValue Op,
+                                              SelectionDAG &DAG) const {
   switch (Op.getOpcode()) {
   case ISD::BR_CC:            return LowerBR_CC(Op, DAG);
   case ISD::SELECT_CC:        return LowerSELECT_CC(Op, DAG);
@@ -236,7 +237,8 @@ SystemZTargetLowering::LowerFormalArguments(SDValue Chain,
                                               &Ins,
                                             DebugLoc dl,
                                             SelectionDAG &DAG,
-                                            SmallVectorImpl<SDValue> &InVals) {
+                                            SmallVectorImpl<SDValue> &InVals)
+                                              const {
 
   switch (CallConv) {
   default:
@@ -254,7 +256,7 @@ SystemZTargetLowering::LowerCall(SDValue Chain, SDValue Callee,
                                  const SmallVectorImpl<ISD::OutputArg> &Outs,
                                  const SmallVectorImpl<ISD::InputArg> &Ins,
                                  DebugLoc dl, SelectionDAG &DAG,
-                                 SmallVectorImpl<SDValue> &InVals) {
+                                 SmallVectorImpl<SDValue> &InVals) const {
   // SystemZ target does not yet support tail call optimization.
   isTailCall = false;
 
@@ -280,7 +282,8 @@ SystemZTargetLowering::LowerCCCArguments(SDValue Chain,
                                            &Ins,
                                          DebugLoc dl,
                                          SelectionDAG &DAG,
-                                         SmallVectorImpl<SDValue> &InVals) {
+                                         SmallVectorImpl<SDValue> &InVals)
+                                           const {
 
   MachineFunction &MF = DAG.getMachineFunction();
   MachineFrameInfo *MFI = MF.getFrameInfo();
@@ -293,7 +296,7 @@ SystemZTargetLowering::LowerCCCArguments(SDValue Chain,
   CCInfo.AnalyzeFormalArguments(Ins, CC_SystemZ);
 
   if (isVarArg)
-    llvm_report_error("Varargs not supported yet");
+    report_fatal_error("Varargs not supported yet");
 
   for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
     SDValue ArgValue;
@@ -371,7 +374,7 @@ SystemZTargetLowering::LowerCCCCallTo(SDValue Chain, SDValue Callee,
                                         &Outs,
                                       const SmallVectorImpl<ISD::InputArg> &Ins,
                                       DebugLoc dl, SelectionDAG &DAG,
-                                      SmallVectorImpl<SDValue> &InVals) {
+                                      SmallVectorImpl<SDValue> &InVals) const {
 
   MachineFunction &MF = DAG.getMachineFunction();
 
@@ -505,7 +508,7 @@ SystemZTargetLowering::LowerCallResult(SDValue Chain, SDValue InFlag,
                                        const SmallVectorImpl<ISD::InputArg>
                                          &Ins,
                                        DebugLoc dl, SelectionDAG &DAG,
-                                       SmallVectorImpl<SDValue> &InVals) {
+                                       SmallVectorImpl<SDValue> &InVals) const {
 
   // Assign locations to each value returned by this call.
   SmallVector<CCValAssign, 16> RVLocs;
@@ -547,7 +550,7 @@ SDValue
 SystemZTargetLowering::LowerReturn(SDValue Chain,
                                    CallingConv::ID CallConv, bool isVarArg,
                                    const SmallVectorImpl<ISD::OutputArg> &Outs,
-                                   DebugLoc dl, SelectionDAG &DAG) {
+                                   DebugLoc dl, SelectionDAG &DAG) const {
 
   // CCValAssign - represent the assignment of the return value to a location
   SmallVector<CCValAssign, 16> RVLocs;
@@ -600,7 +603,7 @@ SystemZTargetLowering::LowerReturn(SDValue Chain,
 
 SDValue SystemZTargetLowering::EmitCmp(SDValue LHS, SDValue RHS,
                                        ISD::CondCode CC, SDValue &SystemZCC,
-                                       SelectionDAG &DAG) {
+                                       SelectionDAG &DAG) const {
   // FIXME: Emit a test if RHS is zero
 
   bool isUnsigned = false;
@@ -678,7 +681,7 @@ SDValue SystemZTargetLowering::EmitCmp(SDValue LHS, SDValue RHS,
 }
 
 
-SDValue SystemZTargetLowering::LowerBR_CC(SDValue Op, SelectionDAG &DAG) {
+SDValue SystemZTargetLowering::LowerBR_CC(SDValue Op, SelectionDAG &DAG) const {
   SDValue Chain = Op.getOperand(0);
   ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(1))->get();
   SDValue LHS   = Op.getOperand(2);
@@ -692,7 +695,8 @@ SDValue SystemZTargetLowering::LowerBR_CC(SDValue Op, SelectionDAG &DAG) {
                      Chain, Dest, SystemZCC, Flag);
 }
 
-SDValue SystemZTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) {
+SDValue SystemZTargetLowering::LowerSELECT_CC(SDValue Op,
+                                              SelectionDAG &DAG) const {
   SDValue LHS    = Op.getOperand(0);
   SDValue RHS    = Op.getOperand(1);
   SDValue TrueV  = Op.getOperand(2);
@@ -714,9 +718,9 @@ SDValue SystemZTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) {
 }
 
 SDValue SystemZTargetLowering::LowerGlobalAddress(SDValue Op,
-                                                  SelectionDAG &DAG) {
+                                                  SelectionDAG &DAG) const {
   DebugLoc dl = Op.getDebugLoc();
-  GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal();
+  const GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal();
   int64_t Offset = cast<GlobalAddressSDNode>(Op)->getOffset();
 
   bool IsPic = getTargetMachine().getRelocationModel() == Reloc::PIC_;
@@ -753,7 +757,7 @@ SDValue SystemZTargetLowering::LowerGlobalAddress(SDValue Op,
 
 // FIXME: PIC here
 SDValue SystemZTargetLowering::LowerJumpTable(SDValue Op,
-                                              SelectionDAG &DAG) {
+                                              SelectionDAG &DAG) const {
   DebugLoc dl = Op.getDebugLoc();
   JumpTableSDNode *JT = cast<JumpTableSDNode>(Op);
   SDValue Result = DAG.getTargetJumpTable(JT->getIndex(), getPointerTy());
@@ -765,7 +769,7 @@ SDValue SystemZTargetLowering::LowerJumpTable(SDValue Op,
 // FIXME: PIC here
 // FIXME: This is just dirty hack. We need to lower cpool properly
 SDValue SystemZTargetLowering::LowerConstantPool(SDValue Op,
-                                                 SelectionDAG &DAG) {
+                                                 SelectionDAG &DAG) const {
   DebugLoc dl = Op.getDebugLoc();
   ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(Op);
 
@@ -795,8 +799,7 @@ const char *SystemZTargetLowering::getTargetNodeName(unsigned Opcode) const {
 
 MachineBasicBlock*
 SystemZTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
-                                                   MachineBasicBlock *BB,
-                   DenseMap<MachineBasicBlock*, MachineBasicBlock*> *EM) const {
+                                                   MachineBasicBlock *BB) const {
   const SystemZInstrInfo &TII = *TM.getInstrInfo();
   DebugLoc dl = MI->getDebugLoc();
   assert((MI->getOpcode() == SystemZ::Select32  ||
@@ -827,10 +830,6 @@ SystemZTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
   BuildMI(BB, dl, TII.getBrCond(CC)).addMBB(copy1MBB);
   F->insert(I, copy0MBB);
   F->insert(I, copy1MBB);
-  // Inform sdisel of the edge changes.
-  for (MachineBasicBlock::succ_iterator SI = BB->succ_begin(), 
-         SE = BB->succ_end(); SI != SE; ++SI)
-    EM->insert(std::make_pair(*SI, copy1MBB));
   // Update machine-CFG edges by transferring all successors of the current
   // block to the new block which will contain the Phi node for the select.
   copy1MBB->transferSuccessors(BB);
diff --git a/lib/Target/SystemZ/SystemZISelLowering.h b/lib/Target/SystemZ/SystemZISelLowering.h
index 36ff994d0319..94bd90617517 100644
--- a/lib/Target/SystemZ/SystemZISelLowering.h
+++ b/lib/Target/SystemZ/SystemZISelLowering.h
@@ -58,7 +58,7 @@ namespace llvm {
     explicit SystemZTargetLowering(SystemZTargetMachine &TM);
 
     /// LowerOperation - Provide custom lowering hooks for some operations.
-    virtual SDValue LowerOperation(SDValue Op, SelectionDAG &DAG);
+    virtual SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const;
 
     /// getTargetNodeName - This method returns the name of a target specific
     /// DAG node.
@@ -74,20 +74,19 @@ namespace llvm {
     TargetLowering::ConstraintType
     getConstraintType(const std::string &Constraint) const;
 
-    SDValue LowerBR_CC(SDValue Op, SelectionDAG &DAG);
-    SDValue LowerSELECT_CC(SDValue Op, SelectionDAG &DAG);
-    SDValue LowerGlobalAddress(SDValue Op, SelectionDAG &DAG);
-    SDValue LowerJumpTable(SDValue Op, SelectionDAG &DAG);
-    SDValue LowerConstantPool(SDValue Op, SelectionDAG &DAG);
+    SDValue LowerBR_CC(SDValue Op, SelectionDAG &DAG) const;
+    SDValue LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const;
+    SDValue LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const;
+    SDValue LowerJumpTable(SDValue Op, SelectionDAG &DAG) const;
+    SDValue LowerConstantPool(SDValue Op, SelectionDAG &DAG) const;
 
     SDValue EmitCmp(SDValue LHS, SDValue RHS,
                     ISD::CondCode CC, SDValue &SystemZCC,
-                    SelectionDAG &DAG);
+                    SelectionDAG &DAG) const;
 
 
     MachineBasicBlock* EmitInstrWithCustomInserter(MachineInstr *MI,
-                                                   MachineBasicBlock *BB,
-                    DenseMap<MachineBasicBlock*, MachineBasicBlock*> *EM) const;
+                                                   MachineBasicBlock *BB) const;
 
     /// isFPImmLegal - Returns true if the target can instruction select the
     /// specified FP immediate natively. If false, the legalizer will
@@ -101,7 +100,7 @@ namespace llvm {
                            const SmallVectorImpl<ISD::OutputArg> &Outs,
                            const SmallVectorImpl<ISD::InputArg> &Ins,
                            DebugLoc dl, SelectionDAG &DAG,
-                           SmallVectorImpl<SDValue> &InVals);
+                           SmallVectorImpl<SDValue> &InVals) const;
 
     SDValue LowerCCCArguments(SDValue Chain,
                               CallingConv::ID CallConv,
@@ -109,33 +108,33 @@ namespace llvm {
                               const SmallVectorImpl<ISD::InputArg> &Ins,
                               DebugLoc dl,
                               SelectionDAG &DAG,
-                              SmallVectorImpl<SDValue> &InVals);
+                              SmallVectorImpl<SDValue> &InVals) const;
 
     SDValue LowerCallResult(SDValue Chain, SDValue InFlag,
                             CallingConv::ID CallConv, bool isVarArg,
                             const SmallVectorImpl<ISD::InputArg> &Ins,
                             DebugLoc dl, SelectionDAG &DAG,
-                            SmallVectorImpl<SDValue> &InVals);
+                            SmallVectorImpl<SDValue> &InVals) const;
 
     virtual SDValue
       LowerFormalArguments(SDValue Chain,
                            CallingConv::ID CallConv, bool isVarArg,
                            const SmallVectorImpl<ISD::InputArg> &Ins,
                            DebugLoc dl, SelectionDAG &DAG,
-                           SmallVectorImpl<SDValue> &InVals);
+                           SmallVectorImpl<SDValue> &InVals) const;
     virtual SDValue
       LowerCall(SDValue Chain, SDValue Callee,
                 CallingConv::ID CallConv, bool isVarArg, bool &isTailCall,
                 const SmallVectorImpl<ISD::OutputArg> &Outs,
                 const SmallVectorImpl<ISD::InputArg> &Ins,
                 DebugLoc dl, SelectionDAG &DAG,
-                SmallVectorImpl<SDValue> &InVals);
+                SmallVectorImpl<SDValue> &InVals) const;
 
     virtual SDValue
       LowerReturn(SDValue Chain,
                   CallingConv::ID CallConv, bool isVarArg,
                   const SmallVectorImpl<ISD::OutputArg> &Outs,
-                  DebugLoc dl, SelectionDAG &DAG);
+                  DebugLoc dl, SelectionDAG &DAG) const;
 
     const SystemZSubtarget &Subtarget;
     const SystemZTargetMachine &TM;
diff --git a/lib/Target/SystemZ/SystemZInstrBuilder.h b/lib/Target/SystemZ/SystemZInstrBuilder.h
index b69d2f6ce9ff..fa87061a7df3 100644
--- a/lib/Target/SystemZ/SystemZInstrBuilder.h
+++ b/lib/Target/SystemZ/SystemZInstrBuilder.h
@@ -44,7 +44,7 @@ struct SystemZAddressMode {
 
   unsigned IndexReg;
   int32_t Disp;
-  GlobalValue *GV;
+  const GlobalValue *GV;
 
   SystemZAddressMode() : BaseType(RegBase), IndexReg(0), Disp(0) {
     Base.Reg = 0;
diff --git a/lib/Target/SystemZ/SystemZMCAsmInfo.cpp b/lib/Target/SystemZ/SystemZMCAsmInfo.cpp
index 1a0920609b15..f9ccc47b0b94 100644
--- a/lib/Target/SystemZ/SystemZMCAsmInfo.cpp
+++ b/lib/Target/SystemZ/SystemZMCAsmInfo.cpp
@@ -12,6 +12,7 @@
 //===----------------------------------------------------------------------===//
 
 #include "SystemZMCAsmInfo.h"
+#include "llvm/MC/MCContext.h"
 #include "llvm/MC/MCSectionELF.h"
 using namespace llvm;
 
@@ -21,7 +22,8 @@ SystemZMCAsmInfo::SystemZMCAsmInfo(const Target &T, const StringRef &TT) {
   PCSymbol = ".";
 }
 
-MCSection *SystemZMCAsmInfo::getNonexecutableStackSection(MCContext &Ctx) const{
-  return MCSectionELF::Create(".note.GNU-stack", MCSectionELF::SHT_PROGBITS,
-                              0, SectionKind::getMetadata(), false, Ctx);
+const MCSection *SystemZMCAsmInfo::
+getNonexecutableStackSection(MCContext &Ctx) const{
+  return Ctx.getELFSection(".note.GNU-stack", MCSectionELF::SHT_PROGBITS,
+                           0, SectionKind::getMetadata(), false);
 }
diff --git a/lib/Target/SystemZ/SystemZMCAsmInfo.h b/lib/Target/SystemZ/SystemZMCAsmInfo.h
index 00cb99b34c05..87908f21f722 100644
--- a/lib/Target/SystemZ/SystemZMCAsmInfo.h
+++ b/lib/Target/SystemZ/SystemZMCAsmInfo.h
@@ -22,7 +22,7 @@ namespace llvm {
 
   struct SystemZMCAsmInfo : public MCAsmInfo {
     explicit SystemZMCAsmInfo(const Target &T, const StringRef &TT);
-    virtual MCSection *getNonexecutableStackSection(MCContext &Ctx) const;
+    virtual const MCSection *getNonexecutableStackSection(MCContext &Ctx) const;
   };
   
 } // namespace llvm
diff --git a/lib/Target/SystemZ/SystemZRegisterInfo.cpp b/lib/Target/SystemZ/SystemZRegisterInfo.cpp
index 302c418d1ec9..638fd17c9953 100644
--- a/lib/Target/SystemZ/SystemZRegisterInfo.cpp
+++ b/lib/Target/SystemZ/SystemZRegisterInfo.cpp
@@ -77,7 +77,7 @@ BitVector SystemZRegisterInfo::getReservedRegs(const MachineFunction &MF) const
 /// allocas or if frame pointer elimination is disabled.
 bool SystemZRegisterInfo::hasFP(const MachineFunction &MF) const {
   const MachineFrameInfo *MFI = MF.getFrameInfo();
-  return NoFramePointerElim || MFI->hasVarSizedObjects();
+  return DisableFramePointerElim(MF) || MFI->hasVarSizedObjects();
 }
 
 void SystemZRegisterInfo::
@@ -200,7 +200,7 @@ void emitSPUpdate(MachineBasicBlock &MBB, MachineBasicBlock::iterator &MBBI,
     uint64_t ThisVal = (Offset > Chunk) ? Chunk : Offset;
     MachineInstr *MI =
       BuildMI(MBB, MBBI, DL, TII.get(Opc), SystemZ::R15D)
-      .addReg(SystemZ::R15D).addImm((isSub ? -(int64_t)ThisVal : ThisVal));
+      .addReg(SystemZ::R15D).addImm(isSub ? -ThisVal : ThisVal);
     // The PSW implicit def is dead.
     MI->getOperand(3).setIsDead();
     Offset -= ThisVal;
diff --git a/lib/Target/SystemZ/SystemZSelectionDAGInfo.cpp b/lib/Target/SystemZ/SystemZSelectionDAGInfo.cpp
new file mode 100644
index 000000000000..87c831b49479
--- /dev/null
+++ b/lib/Target/SystemZ/SystemZSelectionDAGInfo.cpp
@@ -0,0 +1,22 @@
+//===-- SystemZSelectionDAGInfo.cpp - SystemZ SelectionDAG Info -----------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the SystemZSelectionDAGInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "systemz-selectiondag-info"
+#include "SystemZSelectionDAGInfo.h"
+using namespace llvm;
+
+SystemZSelectionDAGInfo::SystemZSelectionDAGInfo() {
+}
+
+SystemZSelectionDAGInfo::~SystemZSelectionDAGInfo() {
+}
diff --git a/lib/Target/SystemZ/SystemZSelectionDAGInfo.h b/lib/Target/SystemZ/SystemZSelectionDAGInfo.h
new file mode 100644
index 000000000000..5292de91dc0a
--- /dev/null
+++ b/lib/Target/SystemZ/SystemZSelectionDAGInfo.h
@@ -0,0 +1,29 @@
+//===-- SystemZSelectionDAGInfo.h - SystemZ SelectionDAG Info ---*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the SystemZ subclass for TargetSelectionDAGInfo.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef SYSTEMZSELECTIONDAGINFO_H
+#define SYSTEMZSELECTIONDAGINFO_H
+
+#include "llvm/Target/TargetSelectionDAGInfo.h"
+
+namespace llvm {
+
+class SystemZSelectionDAGInfo : public TargetSelectionDAGInfo {
+public:
+  SystemZSelectionDAGInfo();
+  ~SystemZSelectionDAGInfo();
+};
+
+}
+
+#endif
diff --git a/lib/Target/SystemZ/SystemZTargetMachine.h b/lib/Target/SystemZ/SystemZTargetMachine.h
index 551aeb5a3e47..d3357cc76574 100644
--- a/lib/Target/SystemZ/SystemZTargetMachine.h
+++ b/lib/Target/SystemZ/SystemZTargetMachine.h
@@ -49,8 +49,8 @@ public:
     return &InstrInfo.getRegisterInfo();
   }
 
-  virtual SystemZTargetLowering *getTargetLowering() const {
-    return const_cast<SystemZTargetLowering*>(&TLInfo);
+  virtual const SystemZTargetLowering *getTargetLowering() const {
+    return &TLInfo;
   }
 
   virtual bool addInstSelector(PassManagerBase &PM, CodeGenOpt::Level OptLevel);
diff --git a/lib/Target/TargetData.cpp b/lib/Target/TargetData.cpp
index 643b3977461b..5870d8a87004 100644
--- a/lib/Target/TargetData.cpp
+++ b/lib/Target/TargetData.cpp
@@ -228,7 +228,7 @@ void TargetData::init(StringRef Desc) {
 /// @note This has to exist, because this is a pass, but it should never be
 /// used.
 TargetData::TargetData() : ImmutablePass(&ID) {
-  llvm_report_error("Bad TargetData ctor used.  "
+  report_fatal_error("Bad TargetData ctor used.  "
                     "Tool did not specify a TargetData to use?");
 }
 
@@ -269,18 +269,8 @@ unsigned TargetData::getAlignmentInfo(AlignTypeEnum AlignType,
       return ABIInfo ? Alignments[i].ABIAlign : Alignments[i].PrefAlign;
     
     // The best match so far depends on what we're looking for.
-    if (AlignType == VECTOR_ALIGN && Alignments[i].AlignType == VECTOR_ALIGN) {
-      // If this is a specification for a smaller vector type, we will fall back
-      // to it.  This happens because <128 x double> can be implemented in terms
-      // of 64 <2 x double>.
-      if (Alignments[i].TypeBitWidth < BitWidth) {
-        // Verify that we pick the biggest of the fallbacks.
-        if (BestMatchIdx == -1 ||
-            Alignments[BestMatchIdx].TypeBitWidth < Alignments[i].TypeBitWidth)
-          BestMatchIdx = i;
-      }
-    } else if (AlignType == INTEGER_ALIGN && 
-               Alignments[i].AlignType == INTEGER_ALIGN) {
+     if (AlignType == INTEGER_ALIGN && 
+         Alignments[i].AlignType == INTEGER_ALIGN) {
       // The "best match" for integers is the smallest size that is larger than
       // the BitWidth requested.
       if (Alignments[i].TypeBitWidth > BitWidth && (BestMatchIdx == -1 || 
@@ -303,10 +293,15 @@ unsigned TargetData::getAlignmentInfo(AlignTypeEnum AlignType,
     } else {
       assert(AlignType == VECTOR_ALIGN && "Unknown alignment type!");
 
-      // If we didn't find a vector size that is smaller or equal to this type,
-      // then we will end up scalarizing this to its element type.  Just return
-      // the alignment of the element.
-      return getAlignment(cast<VectorType>(Ty)->getElementType(), ABIInfo);
+      // By default, use natural alignment for vector types. This is consistent
+      // with what clang and llvm-gcc do.
+      unsigned Align = getTypeAllocSize(cast<VectorType>(Ty)->getElementType());
+      Align *= cast<VectorType>(Ty)->getNumElements();
+      // If the alignment is not a power of 2, round up to the next power of 2.
+      // This happens for non-power-of-2 length vectors.
+      if (Align & (Align-1))
+        Align = llvm::NextPowerOf2(Align);
+      return Align;
     }
   }
 
@@ -630,8 +625,8 @@ uint64_t TargetData::getIndexedOffset(const Type *ptrTy, Value* const* Indices,
       Ty = cast<SequentialType>(Ty)->getElementType();
 
       // Get the array index and the size of each array element.
-      int64_t arrayIdx = cast<ConstantInt>(Indices[CurIDX])->getSExtValue();
-      Result += arrayIdx * (int64_t)getTypeAllocSize(Ty);
+      if (int64_t arrayIdx = cast<ConstantInt>(Indices[CurIDX])->getSExtValue())
+        Result += arrayIdx * (int64_t)getTypeAllocSize(Ty);
     }
   }
 
diff --git a/lib/Target/TargetLoweringObjectFile.cpp b/lib/Target/TargetLoweringObjectFile.cpp
index 44722b39e311..b9372d04bb17 100644
--- a/lib/Target/TargetLoweringObjectFile.cpp
+++ b/lib/Target/TargetLoweringObjectFile.cpp
@@ -310,7 +310,7 @@ getExprForDwarfReference(const MCSymbol *Sym, Mangler *Mang,
 
   switch (Encoding & 0xF0) {
   default:
-    llvm_report_error("We do not support this DWARF encoding yet!");
+    report_fatal_error("We do not support this DWARF encoding yet!");
   case dwarf::DW_EH_PE_absptr:
     // Do nothing special
     return Res;
diff --git a/lib/Target/TargetMachine.cpp b/lib/Target/TargetMachine.cpp
index 88871e3580cc..ac67c91f1706 100644
--- a/lib/Target/TargetMachine.cpp
+++ b/lib/Target/TargetMachine.cpp
@@ -11,6 +11,8 @@
 //
 //===----------------------------------------------------------------------===//
 
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
 #include "llvm/MC/MCAsmInfo.h"
 #include "llvm/Target/TargetMachine.h"
 #include "llvm/Target/TargetOptions.h"
@@ -25,6 +27,7 @@ namespace llvm {
   bool LessPreciseFPMADOption;
   bool PrintMachineCode;
   bool NoFramePointerElim;
+  bool NoFramePointerElimNonLeaf;
   bool NoExcessFPPrecision;
   bool UnsafeFPMath;
   bool FiniteOnlyFPMathOption;
@@ -33,8 +36,7 @@ namespace llvm {
   FloatABI::ABIType FloatABIType;
   bool NoImplicitFloat;
   bool NoZerosInBSS;
-  bool DwarfExceptionHandling;
-  bool SjLjExceptionHandling;
+  bool JITExceptionHandling;
   bool JITEmitDebugInfo;
   bool JITEmitDebugInfoToDisk;
   bool UnwindTablesMandatory;
@@ -58,6 +60,11 @@ DisableFPElim("disable-fp-elim",
   cl::location(NoFramePointerElim),
   cl::init(false));
 static cl::opt<bool, true>
+DisableFPElimNonLeaf("disable-non-leaf-fp-elim",
+  cl::desc("Disable frame pointer elimination optimization for non-leaf funcs"),
+  cl::location(NoFramePointerElimNonLeaf),
+  cl::init(false));
+static cl::opt<bool, true>
 DisableExcessPrecision("disable-excess-fp-precision",
   cl::desc("Disable optimizations that may increase FP precision"),
   cl::location(NoExcessFPPrecision),
@@ -107,14 +114,9 @@ DontPlaceZerosInBSS("nozero-initialized-in-bss",
   cl::location(NoZerosInBSS),
   cl::init(false));
 static cl::opt<bool, true>
-EnableDwarfExceptionHandling("enable-eh",
-  cl::desc("Emit DWARF exception handling (default if target supports)"),
-  cl::location(DwarfExceptionHandling),
-  cl::init(false));
-static cl::opt<bool, true>
-EnableSjLjExceptionHandling("enable-sjlj-eh",
-  cl::desc("Emit SJLJ exception handling (default if target supports)"),
-  cl::location(SjLjExceptionHandling),
+EnableJITExceptionHandling("jit-enable-eh",
+  cl::desc("Emit exception handling information"),
+  cl::location(JITExceptionHandling),
   cl::init(false));
 // In debug builds, make this default to true.
 #ifdef NDEBUG
@@ -197,7 +199,14 @@ EnableStrongPHIElim(cl::Hidden, "strong-phi-elim",
   cl::desc("Use strong PHI elimination."),
   cl::location(StrongPHIElim),
   cl::init(false));
-
+static cl::opt<bool>
+DataSections("fdata-sections",
+  cl::desc("Emit data into separate sections"),
+  cl::init(false));
+static cl::opt<bool>
+FunctionSections("ffunction-sections",
+  cl::desc("Emit functions into separate sections"),
+  cl::init(false));
 //---------------------------------------------------------------------------
 // TargetMachine Class
 //
@@ -244,7 +253,35 @@ void TargetMachine::setAsmVerbosityDefault(bool V) {
   AsmVerbosityDefault = V;
 }
 
+bool TargetMachine::getFunctionSections() {
+  return FunctionSections;
+}
+
+bool TargetMachine::getDataSections() {
+  return DataSections;
+}
+
+void TargetMachine::setFunctionSections(bool V) {
+  FunctionSections = V;
+}
+
+void TargetMachine::setDataSections(bool V) {
+  DataSections = V;
+}
+
 namespace llvm {
+  /// DisableFramePointerElim - This returns true if frame pointer elimination
+  /// optimization should be disabled for the given machine function.
+  bool DisableFramePointerElim(const MachineFunction &MF) {
+    if (NoFramePointerElim)
+      return true;
+    if (NoFramePointerElimNonLeaf) {
+      const MachineFrameInfo *MFI = MF.getFrameInfo();
+      return MFI->hasCalls();
+    }
+    return false;
+  }
+
   /// LessPreciseFPMAD - This flag return true when -enable-fp-mad option
   /// is specified on the command line.  When this flag is off(default), the
   /// code generator is not allowed to generate mad (multiply add) if the
diff --git a/lib/Target/X86/AsmParser/X86AsmParser.cpp b/lib/Target/X86/AsmParser/X86AsmParser.cpp
index 47873d14a911..da013505dabd 100644
--- a/lib/Target/X86/AsmParser/X86AsmParser.cpp
+++ b/lib/Target/X86/AsmParser/X86AsmParser.cpp
@@ -44,7 +44,7 @@ private:
   bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc);
 
   X86Operand *ParseOperand();
-  X86Operand *ParseMemOperand();
+  X86Operand *ParseMemOperand(unsigned SegReg, SMLoc StartLoc);
 
   bool ParseDirectiveWord(unsigned Size, SMLoc L);
 
@@ -368,14 +368,22 @@ bool X86ATTAsmParser::ParseRegister(unsigned &RegNo,
 X86Operand *X86ATTAsmParser::ParseOperand() {
   switch (getLexer().getKind()) {
   default:
-    return ParseMemOperand();
+    // Parse a memory operand with no segment register.
+    return ParseMemOperand(0, Parser.getTok().getLoc());
   case AsmToken::Percent: {
-    // FIXME: if a segment register, this could either be just the seg reg, or
-    // the start of a memory operand.
+    // Read the register.
     unsigned RegNo;
     SMLoc Start, End;
     if (ParseRegister(RegNo, Start, End)) return 0;
-    return X86Operand::CreateReg(RegNo, Start, End);
+    
+    // If this is a segment register followed by a ':', then this is the start
+    // of a memory reference, otherwise this is a normal register reference.
+    if (getLexer().isNot(AsmToken::Colon))
+      return X86Operand::CreateReg(RegNo, Start, End);
+    
+    
+    getParser().Lex(); // Eat the colon.
+    return ParseMemOperand(RegNo, Start);
   }
   case AsmToken::Dollar: {
     // $42 -> immediate.
@@ -389,13 +397,10 @@ X86Operand *X86ATTAsmParser::ParseOperand() {
   }
 }
 
-/// ParseMemOperand: segment: disp(basereg, indexreg, scale)
-X86Operand *X86ATTAsmParser::ParseMemOperand() {
-  SMLoc MemStart = Parser.getTok().getLoc();
-  
-  // FIXME: If SegReg ':'  (e.g. %gs:), eat and remember.
-  unsigned SegReg = 0;
-  
+/// ParseMemOperand: segment: disp(basereg, indexreg, scale).  The '%ds:' prefix
+/// has already been parsed if present.
+X86Operand *X86ATTAsmParser::ParseMemOperand(unsigned SegReg, SMLoc MemStart) {
+ 
   // We have to disambiguate a parenthesized expression "(4+5)" from the start
   // of a memory operand with a missing displacement "(%ebx)" or "(,%eax)".  The
   // only way to do this without lookahead is to eat the '(' and see what is
diff --git a/lib/Target/X86/AsmPrinter/X86AsmPrinter.cpp b/lib/Target/X86/AsmPrinter/X86AsmPrinter.cpp
index f5923969361d..8b0ed1ce34ce 100644
--- a/lib/Target/X86/AsmPrinter/X86AsmPrinter.cpp
+++ b/lib/Target/X86/AsmPrinter/X86AsmPrinter.cpp
@@ -474,9 +474,6 @@ void X86AsmPrinter::EmitStartOfAsmFile(Module &M) {
 void X86AsmPrinter::EmitEndOfAsmFile(Module &M) {
   if (Subtarget->isTargetDarwin()) {
     // All darwin targets use mach-o.
-    TargetLoweringObjectFileMachO &TLOFMacho = 
-      static_cast<TargetLoweringObjectFileMachO &>(getObjFileLowering());
-    
     MachineModuleInfoMachO &MMIMacho =
       MMI->getObjFileInfo<MachineModuleInfoMachO>();
     
@@ -486,11 +483,11 @@ void X86AsmPrinter::EmitEndOfAsmFile(Module &M) {
     Stubs = MMIMacho.GetFnStubList();
     if (!Stubs.empty()) {
       const MCSection *TheSection = 
-        TLOFMacho.getMachOSection("__IMPORT", "__jump_table",
-                                  MCSectionMachO::S_SYMBOL_STUBS |
-                                  MCSectionMachO::S_ATTR_SELF_MODIFYING_CODE |
-                                  MCSectionMachO::S_ATTR_PURE_INSTRUCTIONS,
-                                  5, SectionKind::getMetadata());
+        OutContext.getMachOSection("__IMPORT", "__jump_table",
+                                   MCSectionMachO::S_SYMBOL_STUBS |
+                                   MCSectionMachO::S_ATTR_SELF_MODIFYING_CODE |
+                                   MCSectionMachO::S_ATTR_PURE_INSTRUCTIONS,
+                                   5, SectionKind::getMetadata());
       OutStreamer.SwitchSection(TheSection);
 
       for (unsigned i = 0, e = Stubs.size(); i != e; ++i) {
@@ -512,9 +509,9 @@ void X86AsmPrinter::EmitEndOfAsmFile(Module &M) {
     Stubs = MMIMacho.GetGVStubList();
     if (!Stubs.empty()) {
       const MCSection *TheSection = 
-        TLOFMacho.getMachOSection("__IMPORT", "__pointers",
-                                  MCSectionMachO::S_NON_LAZY_SYMBOL_POINTERS,
-                                  SectionKind::getMetadata());
+        OutContext.getMachOSection("__IMPORT", "__pointers",
+                                   MCSectionMachO::S_NON_LAZY_SYMBOL_POINTERS,
+                                   SectionKind::getMetadata());
       OutStreamer.SwitchSection(TheSection);
 
       for (unsigned i = 0, e = Stubs.size(); i != e; ++i) {
@@ -587,8 +584,8 @@ void X86AsmPrinter::EmitEndOfAsmFile(Module &M) {
       // Necessary for dllexport support
       std::vector<const MCSymbol*> DLLExportedFns, DLLExportedGlobals;
 
-      TargetLoweringObjectFileCOFF &TLOFCOFF =
-        static_cast<TargetLoweringObjectFileCOFF&>(getObjFileLowering());
+      const TargetLoweringObjectFileCOFF &TLOFCOFF =
+        static_cast<const TargetLoweringObjectFileCOFF&>(getObjFileLowering());
 
       for (Module::const_iterator I = M.begin(), E = M.end(); I != E; ++I)
         if (I->hasDLLExportLinkage())
@@ -617,8 +614,8 @@ void X86AsmPrinter::EmitEndOfAsmFile(Module &M) {
   }
 
   if (Subtarget->isTargetELF()) {
-    TargetLoweringObjectFileELF &TLOFELF =
-      static_cast<TargetLoweringObjectFileELF &>(getObjFileLowering());
+    const TargetLoweringObjectFileELF &TLOFELF =
+      static_cast<const TargetLoweringObjectFileELF &>(getObjFileLowering());
 
     MachineModuleInfoELF &MMIELF = MMI->getObjFileInfo<MachineModuleInfoELF>();
 
diff --git a/lib/Target/X86/AsmPrinter/X86AsmPrinter.h b/lib/Target/X86/AsmPrinter/X86AsmPrinter.h
index ee59289dc572..95984b22cdc3 100644
--- a/lib/Target/X86/AsmPrinter/X86AsmPrinter.h
+++ b/lib/Target/X86/AsmPrinter/X86AsmPrinter.h
@@ -80,6 +80,8 @@ class VISIBILITY_HIDDEN X86AsmPrinter : public AsmPrinter {
   bool runOnMachineFunction(MachineFunction &F);
   
   void PrintDebugValueComment(const MachineInstr *MI, raw_ostream &OS);
+
+  MachineLocation getDebugValueLocation(const MachineInstr *MI) const;
 };
 
 } // end namespace llvm
diff --git a/lib/Target/X86/AsmPrinter/X86MCInstLower.cpp b/lib/Target/X86/AsmPrinter/X86MCInstLower.cpp
index a290eb0f2a5f..effc8ed38116 100644
--- a/lib/Target/X86/AsmPrinter/X86MCInstLower.cpp
+++ b/lib/Target/X86/AsmPrinter/X86MCInstLower.cpp
@@ -169,6 +169,15 @@ MCOperand X86MCInstLower::LowerSymbolOperand(const MachineOperand &MO,
     Expr = MCBinaryExpr::CreateSub(Expr, 
                                MCSymbolRefExpr::Create(GetPICBaseSymbol(), Ctx),
                                    Ctx);
+    if (MO.isJTI() && AsmPrinter.MAI->hasSetDirective()) {
+      // If .set directive is supported, use it to reduce the number of
+      // relocations the assembler will generate for differences between
+      // local labels. This is only safe when the symbols are in the same
+      // section so we are restricting it to jumptable references.
+      MCSymbol *Label = Ctx.CreateTempSymbol();
+      AsmPrinter.OutStreamer.EmitAssignment(Label, Expr);
+      Expr = MCSymbolRefExpr::Create(Label, Ctx);
+    }
     break;
   }
   
@@ -328,61 +337,36 @@ void X86MCInstLower::Lower(const MachineInstr *MI, MCInst &OutMI) const {
 
 void X86AsmPrinter::PrintDebugValueComment(const MachineInstr *MI,
                                            raw_ostream &O) {
-  // FIXME: if this is implemented for another target before it goes
-  // away completely, the common part should be moved into AsmPrinter.
-  O << '\t' << MAI->getCommentString() << "DEBUG_VALUE: ";
+  // Only the target-dependent form of DBG_VALUE should get here.
+  // Referencing the offset and metadata as NOps-2 and NOps-1 is
+  // probably portable to other targets; frame pointer location is not.
   unsigned NOps = MI->getNumOperands();
+  assert(NOps==7);
+  O << '\t' << MAI->getCommentString() << "DEBUG_VALUE: ";
   // cast away const; DIetc do not take const operands for some reason.
-  DIVariable V((MDNode*)(MI->getOperand(NOps-1).getMetadata()));
+  DIVariable V(const_cast<MDNode *>(MI->getOperand(NOps-1).getMetadata()));
+  if (V.getContext().isSubprogram())
+    O << DISubprogram(V.getContext().getNode()).getDisplayName() << ":";
   O << V.getName();
   O << " <- ";
-  if (NOps==3) {
-    // Register or immediate value. Register 0 means undef.
-    assert(MI->getOperand(0).isReg() ||
-           MI->getOperand(0).isImm() ||
-           MI->getOperand(0).isFPImm());
-    if (MI->getOperand(0).isReg() && MI->getOperand(0).getReg() == 0) {
-      // Suppress offset in this case, it is not meaningful.
-      O << "undef";
-      OutStreamer.AddBlankLine();
-      return;
-    }
-    
-    if (MI->getOperand(0).isFPImm()) {
-      // This is more naturally done in printOperand, but since the only use
-      // of such an operand is in this comment and that is temporary (and it's
-      // ugly), we prefer to keep this localized.
-      // The include of Type.h may be removable when this code is.
-      if (MI->getOperand(0).getFPImm()->getType()->isFloatTy() ||
-          MI->getOperand(0).getFPImm()->getType()->isDoubleTy())
-        MI->getOperand(0).print(O, &TM);
-      else {
-        // There is no good way to print long double.  Convert a copy to
-        // double.  Ah well, it's only a comment.
-        bool ignored;
-        APFloat APF = APFloat(MI->getOperand(0).getFPImm()->getValueAPF());
-        APF.convert(APFloat::IEEEdouble, APFloat::rmNearestTiesToEven,
-                    &ignored);
-        O << "(long double) " << APF.convertToDouble();
-      }
-    } else
-      printOperand(MI, 0, O);
-  } else {
-    if (MI->getOperand(0).isReg() && MI->getOperand(0).getReg() == 0) {
-      // Suppress offset in this case, it is not meaningful.
-      O << "undef";
-      OutStreamer.AddBlankLine();
-      return;
-    }
-    // Frame address.  Currently handles register +- offset only.
-    assert(MI->getOperand(0).isReg() && MI->getOperand(3).isImm());
-    O << '['; printOperand(MI, 0, O); O << '+'; printOperand(MI, 3, O);
-    O << ']';
-  }
+  // Frame address.  Currently handles register +- offset only.
+  assert(MI->getOperand(0).isReg() && MI->getOperand(3).isImm());
+  O << '['; printOperand(MI, 0, O); O << '+'; printOperand(MI, 3, O);
+  O << ']';
   O << "+";
   printOperand(MI, NOps-2, O);
 }
 
+MachineLocation 
+X86AsmPrinter::getDebugValueLocation(const MachineInstr *MI) const {
+  MachineLocation Location;
+  assert (MI->getNumOperands() == 7 && "Invalid no. of machine operands!");
+  // Frame address.  Currently handles register +- offset only.
+  assert(MI->getOperand(0).isReg() && MI->getOperand(3).isImm());
+  Location.set(MI->getOperand(0).getReg(), MI->getOperand(3).getImm());
+  return Location;
+}
+
 
 void X86AsmPrinter::EmitInstruction(const MachineInstr *MI) {
   X86MCInstLower MCInstLowering(OutContext, Mang, *this);
@@ -395,7 +379,7 @@ void X86AsmPrinter::EmitInstruction(const MachineInstr *MI) {
       OutStreamer.EmitRawText(StringRef(OS.str()));
     }
     return;
-      
+
   case X86::MOVPC32r: {
     MCInst TmpInst;
     // This is a pseudo op for a two instruction sequence with a label, which
diff --git a/lib/Target/X86/CMakeLists.txt b/lib/Target/X86/CMakeLists.txt
index 22285f193234..da6bb9140d05 100644
--- a/lib/Target/X86/CMakeLists.txt
+++ b/lib/Target/X86/CMakeLists.txt
@@ -13,6 +13,7 @@ tablegen(X86GenDAGISel.inc -gen-dag-isel)
 tablegen(X86GenFastISel.inc -gen-fast-isel)
 tablegen(X86GenCallingConv.inc -gen-callingconv)
 tablegen(X86GenSubtarget.inc -gen-subtarget)
+tablegen(X86GenEDInfo.inc -gen-enhanced-disassembly-info)
 
 set(sources
   SSEDomainFix.cpp
@@ -33,6 +34,7 @@ set(sources
   X86TargetMachine.cpp
   X86TargetObjectFile.cpp
   X86FastISel.cpp
+  X86SelectionDAGInfo.cpp
   )
 
 if( CMAKE_CL_64 )
diff --git a/lib/Target/X86/Disassembler/CMakeLists.txt b/lib/Target/X86/Disassembler/CMakeLists.txt
index 2a83a9c26858..9f91060179e6 100644
--- a/lib/Target/X86/Disassembler/CMakeLists.txt
+++ b/lib/Target/X86/Disassembler/CMakeLists.txt
@@ -4,4 +4,11 @@ add_llvm_library(LLVMX86Disassembler
   X86Disassembler.cpp
   X86DisassemblerDecoder.c
   )
+# workaround for hanging compilation on MSVC9
+if( MSVC_VERSION EQUAL 1500 )
+set_property(
+  SOURCE X86Disassembler.cpp
+  PROPERTY COMPILE_FLAGS "/Od"
+  )
+endif()
 add_dependencies(LLVMX86Disassembler X86CodeGenTable_gen)
diff --git a/lib/Target/X86/Disassembler/X86Disassembler.cpp b/lib/Target/X86/Disassembler/X86Disassembler.cpp
index 7328dc0ba8d7..62e7357b8f34 100644
--- a/lib/Target/X86/Disassembler/X86Disassembler.cpp
+++ b/lib/Target/X86/Disassembler/X86Disassembler.cpp
@@ -17,6 +17,7 @@
 #include "X86Disassembler.h"
 #include "X86DisassemblerDecoder.h"
 
+#include "llvm/MC/EDInstInfo.h"
 #include "llvm/MC/MCDisassembler.h"
 #include "llvm/MC/MCDisassembler.h"
 #include "llvm/MC/MCInst.h"
@@ -26,6 +27,7 @@
 #include "llvm/Support/raw_ostream.h"
 
 #include "X86GenRegisterNames.inc"
+#include "X86GenEDInfo.inc"
 
 using namespace llvm;
 using namespace llvm::X86Disassembler;
@@ -69,6 +71,10 @@ X86GenericDisassembler::X86GenericDisassembler(DisassemblerMode mode) :
 X86GenericDisassembler::~X86GenericDisassembler() {
 }
 
+EDInstInfo *X86GenericDisassembler::getEDInfo() const {
+  return instInfoX86;
+}
+
 /// regionReader - a callback function that wraps the readByte method from
 ///   MemoryObject.
 ///
diff --git a/lib/Target/X86/Disassembler/X86Disassembler.h b/lib/Target/X86/Disassembler/X86Disassembler.h
index 0e6e0b0e519f..9c542628d709 100644
--- a/lib/Target/X86/Disassembler/X86Disassembler.h
+++ b/lib/Target/X86/Disassembler/X86Disassembler.h
@@ -94,6 +94,8 @@ namespace llvm {
 class MCInst;
 class MemoryObject;
 class raw_ostream;
+
+struct EDInstInfo;
   
 namespace X86Disassembler {
 
@@ -115,6 +117,9 @@ public:
                       const MemoryObject &region,
                       uint64_t address,
                       raw_ostream &vStream) const;
+
+  /// getEDInfo - See MCDisassembler.
+  EDInstInfo *getEDInfo() const;
 private:
   DisassemblerMode              fMode;
 };
diff --git a/lib/Target/X86/Disassembler/X86DisassemblerDecoder.c b/lib/Target/X86/Disassembler/X86DisassemblerDecoder.c
index db694bc2f3c5..64f6b2dc7e8b 100644
--- a/lib/Target/X86/Disassembler/X86DisassemblerDecoder.c
+++ b/lib/Target/X86/Disassembler/X86DisassemblerDecoder.c
@@ -1277,6 +1277,9 @@ static int readOperands(struct InternalInstruction* insn) {
     case ENCODING_IB:
       if (readImmediate(insn, 1))
         return -1;
+      if (insn->spec->operands[index].type == TYPE_IMM3 &&
+          insn->immediates[insn->numImmediatesConsumed - 1] > 7)
+        return -1;
       break;
     case ENCODING_IW:
       if (readImmediate(insn, 2))
@@ -1293,6 +1296,7 @@ static int readOperands(struct InternalInstruction* insn) {
     case ENCODING_Iv:
       if (readImmediate(insn, insn->immediateSize))
         return -1;
+      break;
     case ENCODING_Ia:
       if (readImmediate(insn, insn->addressSize))
         return -1;
diff --git a/lib/Target/X86/Disassembler/X86DisassemblerDecoderCommon.h b/lib/Target/X86/Disassembler/X86DisassemblerDecoderCommon.h
index c213f89ebc81..4a7cd57f2e23 100644
--- a/lib/Target/X86/Disassembler/X86DisassemblerDecoderCommon.h
+++ b/lib/Target/X86/Disassembler/X86DisassemblerDecoderCommon.h
@@ -236,6 +236,7 @@ struct ContextDecision {
   ENUM_ENTRY(TYPE_IMM16,      "2-byte")                                        \
   ENUM_ENTRY(TYPE_IMM32,      "4-byte")                                        \
   ENUM_ENTRY(TYPE_IMM64,      "8-byte")                                        \
+  ENUM_ENTRY(TYPE_IMM3,       "1-byte immediate operand between 0 and 7")      \
   ENUM_ENTRY(TYPE_RM8,        "1-byte register or memory operand")             \
   ENUM_ENTRY(TYPE_RM16,       "2-byte")                                        \
   ENUM_ENTRY(TYPE_RM32,       "4-byte")                                        \
diff --git a/lib/Target/X86/SSEDomainFix.cpp b/lib/Target/X86/SSEDomainFix.cpp
index 4b546768b4f1..5e808450d1cd 100644
--- a/lib/Target/X86/SSEDomainFix.cpp
+++ b/lib/Target/X86/SSEDomainFix.cpp
@@ -159,7 +159,7 @@ int SSEDomainFixPass::RegIndex(unsigned reg) {
   // We just need them to be consecutive, ordering doesn't matter.
   assert(X86::XMM9 == X86::XMM0+NumRegs-1 && "Unexpected sort");
   reg -= X86::XMM0;
-  return reg < NumRegs ? reg : -1;
+  return reg < NumRegs ? (int) reg : -1;
 }
 
 DomainValue *SSEDomainFixPass::Alloc(int domain) {
@@ -216,8 +216,15 @@ void SSEDomainFixPass::Force(int rx, unsigned domain) {
   if (LiveRegs && (dv = LiveRegs[rx])) {
     if (dv->isCollapsed())
       dv->addDomain(domain);
-    else
+    else if (dv->hasDomain(domain))
       Collapse(dv, domain);
+    else {
+      // This is an incompatible open DomainValue. Collapse it to whatever and force
+      // the new value into domain. This costs a domain crossing.
+      Collapse(dv, dv->getFirstDomain());
+      assert(LiveRegs[rx] && "Not live after collapse?");
+      LiveRegs[rx]->addDomain(domain);
+    }
   } else {
     // Set up basic collapsed DomainValue.
     SetLiveReg(rx, Alloc(domain));
@@ -263,7 +270,7 @@ bool SSEDomainFixPass::Merge(DomainValue *A, DomainValue *B) {
 
 void SSEDomainFixPass::enterBasicBlock() {
   // Try to coalesce live-out registers from predecessors.
-  for (MachineBasicBlock::const_livein_iterator i = MBB->livein_begin(),
+  for (MachineBasicBlock::livein_iterator i = MBB->livein_begin(),
          e = MBB->livein_end(); i != e; ++i) {
     int rx = RegIndex(*i);
     if (rx < 0) continue;
@@ -281,8 +288,9 @@ void SSEDomainFixPass::enterBasicBlock() {
       // We have a live DomainValue from more than one predecessor.
       if (LiveRegs[rx]->isCollapsed()) {
         // We are already collapsed, but predecessor is not. Force him.
-        if (!pdv->isCollapsed())
-          Collapse(pdv, LiveRegs[rx]->getFirstDomain());
+        unsigned domain = LiveRegs[rx]->getFirstDomain();
+        if (!pdv->isCollapsed() && pdv->hasDomain(domain))
+          Collapse(pdv, domain);
         continue;
       }
 
@@ -290,7 +298,7 @@ void SSEDomainFixPass::enterBasicBlock() {
       if (!pdv->isCollapsed())
         Merge(LiveRegs[rx], pdv);
       else
-        Collapse(LiveRegs[rx], pdv->getFirstDomain());
+        Force(rx, pdv->getFirstDomain());
     }
   }
 }
diff --git a/lib/Target/X86/X86.h b/lib/Target/X86/X86.h
index 9be38a4b56a9..22e89a57f63d 100644
--- a/lib/Target/X86/X86.h
+++ b/lib/Target/X86/X86.h
@@ -69,6 +69,12 @@ TargetAsmBackend *createX86_64AsmBackend(const Target &, const std::string &);
 ///
 FunctionPass *createEmitX86CodeToMemory();
 
+/// createX86MaxStackAlignmentHeuristicPass - This function returns a pass
+/// which determines whether the frame pointer register should be
+/// reserved in case dynamic stack alignment is later required.
+///
+FunctionPass *createX86MaxStackAlignmentHeuristicPass();
+
 extern Target TheX86_32Target, TheX86_64Target;
 
 } // End llvm namespace
diff --git a/lib/Target/X86/X86AsmBackend.cpp b/lib/Target/X86/X86AsmBackend.cpp
index 8e2928c3b713..ba9c1d0588e3 100644
--- a/lib/Target/X86/X86AsmBackend.cpp
+++ b/lib/Target/X86/X86AsmBackend.cpp
@@ -111,7 +111,7 @@ void X86AsmBackend::RelaxInstruction(const MCInstFragment *IF,
     SmallString<256> Tmp;
     raw_svector_ostream OS(Tmp);
     IF->getInst().dump_pretty(OS);
-    llvm_report_error("unexpected instruction to relax: " + OS.str());
+    report_fatal_error("unexpected instruction to relax: " + OS.str());
   }
 
   Res = IF->getInst();
diff --git a/lib/Target/X86/X86CodeEmitter.cpp b/lib/Target/X86/X86CodeEmitter.cpp
index 6638e110f4f4..8f026049cb84 100644
--- a/lib/Target/X86/X86CodeEmitter.cpp
+++ b/lib/Target/X86/X86CodeEmitter.cpp
@@ -79,7 +79,7 @@ namespace {
 
   private:
     void emitPCRelativeBlockAddress(MachineBasicBlock *MBB);
-    void emitGlobalAddress(GlobalValue *GV, unsigned Reloc,
+    void emitGlobalAddress(const GlobalValue *GV, unsigned Reloc,
                            intptr_t Disp = 0, intptr_t PCAdj = 0,
                            bool Indirect = false);
     void emitExternalSymbolAddress(const char *ES, unsigned Reloc);
@@ -163,7 +163,8 @@ void Emitter<CodeEmitter>::emitPCRelativeBlockAddress(MachineBasicBlock *MBB) {
 /// this is part of a "take the address of a global" instruction.
 ///
 template<class CodeEmitter>
-void Emitter<CodeEmitter>::emitGlobalAddress(GlobalValue *GV, unsigned Reloc,
+void Emitter<CodeEmitter>::emitGlobalAddress(const GlobalValue *GV,
+                                unsigned Reloc,
                                 intptr_t Disp /* = 0 */,
                                 intptr_t PCAdj /* = 0 */,
                                 bool Indirect /* = false */) {
@@ -174,9 +175,10 @@ void Emitter<CodeEmitter>::emitGlobalAddress(GlobalValue *GV, unsigned Reloc,
     RelocCST = PCAdj;
   MachineRelocation MR = Indirect
     ? MachineRelocation::getIndirectSymbol(MCE.getCurrentPCOffset(), Reloc,
-                                           GV, RelocCST, false)
+                                           const_cast<GlobalValue *>(GV),
+                                           RelocCST, false)
     : MachineRelocation::getGV(MCE.getCurrentPCOffset(), Reloc,
-                               GV, RelocCST, false);
+                               const_cast<GlobalValue *>(GV), RelocCST, false);
   MCE.addRelocation(MR);
   // The relocated value will be added to the displacement
   if (Reloc == X86::reloc_absolute_dword)
@@ -378,6 +380,16 @@ void Emitter<CodeEmitter>::emitMemModRMByte(const MachineInstr &MI,
   const MachineOperand &IndexReg = MI.getOperand(Op+2);
 
   unsigned BaseReg = Base.getReg();
+  
+  // Handle %rip relative addressing.
+  if (BaseReg == X86::RIP ||
+      (Is64BitMode && DispForReloc)) { // [disp32+RIP] in X86-64 mode
+    assert(IndexReg.getReg() == 0 && Is64BitMode &&
+           "Invalid rip-relative address");
+    MCE.emitByte(ModRMByte(0, RegOpcodeField, 5));
+    emitDisplacementField(DispForReloc, DispVal, PCAdj, true);
+    return;
+  }
 
   // Indicate that the displacement will use an pcrel or absolute reference
   // by default. MCEs able to resolve addresses on-the-fly use pcrel by default
@@ -445,7 +457,7 @@ void Emitter<CodeEmitter>::emitMemModRMByte(const MachineInstr &MI,
     // Emit the normal disp32 encoding.
     MCE.emitByte(ModRMByte(2, RegOpcodeField, 4));
     ForceDisp32 = true;
-  } else if (DispVal == 0 && getX86RegNum(BaseReg) != N86::EBP) {
+  } else if (DispVal == 0 && BaseRegNo != N86::EBP) {
     // Emit no displacement ModR/M byte
     MCE.emitByte(ModRMByte(0, RegOpcodeField, 4));
   } else if (isDisp8(DispVal)) {
@@ -600,7 +612,7 @@ void Emitter<CodeEmitter>::emitInstruction(const MachineInstr &MI,
       // We allow inline assembler nodes with empty bodies - they can
       // implicitly define registers, which is ok for JIT.
       if (MI.getOperand(0).getSymbolName()[0])
-        llvm_report_error("JIT does not support inline asm!");
+        report_fatal_error("JIT does not support inline asm!");
       break;
     case TargetOpcode::DBG_LABEL:
     case TargetOpcode::GC_LABEL:
diff --git a/lib/Target/X86/X86ELFWriterInfo.cpp b/lib/Target/X86/X86ELFWriterInfo.cpp
index 1597d2b31d22..f84995dcf342 100644
--- a/lib/Target/X86/X86ELFWriterInfo.cpp
+++ b/lib/Target/X86/X86ELFWriterInfo.cpp
@@ -26,7 +26,6 @@ using namespace llvm;
 
 X86ELFWriterInfo::X86ELFWriterInfo(TargetMachine &TM)
   : TargetELFWriterInfo(TM) {
-    bool is64Bit = TM.getTargetData()->getPointerSizeInBits() == 64;
     EMachine = is64Bit ? EM_X86_64 : EM_386;
   }
 
diff --git a/lib/Target/X86/X86FastISel.cpp b/lib/Target/X86/X86FastISel.cpp
index 7849b51accc4..ff9208c2c1e9 100644
--- a/lib/Target/X86/X86FastISel.cpp
+++ b/lib/Target/X86/X86FastISel.cpp
@@ -15,7 +15,6 @@
 
 #include "X86.h"
 #include "X86InstrBuilder.h"
-#include "X86ISelLowering.h"
 #include "X86RegisterInfo.h"
 #include "X86Subtarget.h"
 #include "X86TargetMachine.h"
@@ -56,12 +55,13 @@ public:
   explicit X86FastISel(MachineFunction &mf,
                        DenseMap<const Value *, unsigned> &vm,
                        DenseMap<const BasicBlock *, MachineBasicBlock *> &bm,
-                       DenseMap<const AllocaInst *, int> &am
+                       DenseMap<const AllocaInst *, int> &am,
+                       std::vector<std::pair<MachineInstr*, unsigned> > &pn
 #ifndef NDEBUG
-                       , SmallSet<Instruction*, 8> &cil
+                       , SmallSet<const Instruction *, 8> &cil
 #endif
                        )
-    : FastISel(mf, vm, bm, am
+    : FastISel(mf, vm, bm, am, pn
 #ifndef NDEBUG
                , cil
 #endif
@@ -72,16 +72,16 @@ public:
     X86ScalarSSEf32 = Subtarget->hasSSE1();
   }
 
-  virtual bool TargetSelectInstruction(Instruction *I);
+  virtual bool TargetSelectInstruction(const Instruction *I);
 
 #include "X86GenFastISel.inc"
 
 private:
-  bool X86FastEmitCompare(Value *LHS, Value *RHS, EVT VT);
+  bool X86FastEmitCompare(const Value *LHS, const Value *RHS, EVT VT);
   
   bool X86FastEmitLoad(EVT VT, const X86AddressMode &AM, unsigned &RR);
 
-  bool X86FastEmitStore(EVT VT, Value *Val,
+  bool X86FastEmitStore(EVT VT, const Value *Val,
                         const X86AddressMode &AM);
   bool X86FastEmitStore(EVT VT, unsigned Val,
                         const X86AddressMode &AM);
@@ -89,32 +89,32 @@ private:
   bool X86FastEmitExtend(ISD::NodeType Opc, EVT DstVT, unsigned Src, EVT SrcVT,
                          unsigned &ResultReg);
   
-  bool X86SelectAddress(Value *V, X86AddressMode &AM);
-  bool X86SelectCallAddress(Value *V, X86AddressMode &AM);
+  bool X86SelectAddress(const Value *V, X86AddressMode &AM);
+  bool X86SelectCallAddress(const Value *V, X86AddressMode &AM);
 
-  bool X86SelectLoad(Instruction *I);
+  bool X86SelectLoad(const Instruction *I);
   
-  bool X86SelectStore(Instruction *I);
+  bool X86SelectStore(const Instruction *I);
 
-  bool X86SelectCmp(Instruction *I);
+  bool X86SelectCmp(const Instruction *I);
 
-  bool X86SelectZExt(Instruction *I);
+  bool X86SelectZExt(const Instruction *I);
 
-  bool X86SelectBranch(Instruction *I);
+  bool X86SelectBranch(const Instruction *I);
 
-  bool X86SelectShift(Instruction *I);
+  bool X86SelectShift(const Instruction *I);
 
-  bool X86SelectSelect(Instruction *I);
+  bool X86SelectSelect(const Instruction *I);
 
-  bool X86SelectTrunc(Instruction *I);
+  bool X86SelectTrunc(const Instruction *I);
  
-  bool X86SelectFPExt(Instruction *I);
-  bool X86SelectFPTrunc(Instruction *I);
+  bool X86SelectFPExt(const Instruction *I);
+  bool X86SelectFPTrunc(const Instruction *I);
 
-  bool X86SelectExtractValue(Instruction *I);
+  bool X86SelectExtractValue(const Instruction *I);
 
-  bool X86VisitIntrinsicCall(IntrinsicInst &I);
-  bool X86SelectCall(Instruction *I);
+  bool X86VisitIntrinsicCall(const IntrinsicInst &I);
+  bool X86SelectCall(const Instruction *I);
 
   CCAssignFn *CCAssignFnForCall(CallingConv::ID CC, bool isTailCall = false);
 
@@ -125,9 +125,9 @@ private:
     return static_cast<const X86TargetMachine *>(&TM);
   }
 
-  unsigned TargetMaterializeConstant(Constant *C);
+  unsigned TargetMaterializeConstant(const Constant *C);
 
-  unsigned TargetMaterializeAlloca(AllocaInst *C);
+  unsigned TargetMaterializeAlloca(const AllocaInst *C);
 
   /// isScalarFPTypeInSSEReg - Return true if the specified scalar FP type is
   /// computed in an SSE register, not on the X87 floating point stack.
@@ -280,14 +280,14 @@ X86FastISel::X86FastEmitStore(EVT VT, unsigned Val,
   return true;
 }
 
-bool X86FastISel::X86FastEmitStore(EVT VT, Value *Val,
+bool X86FastISel::X86FastEmitStore(EVT VT, const Value *Val,
                                    const X86AddressMode &AM) {
   // Handle 'null' like i32/i64 0.
   if (isa<ConstantPointerNull>(Val))
     Val = Constant::getNullValue(TD.getIntPtrType(Val->getContext()));
   
   // If this is a store of a simple constant, fold the constant into the store.
-  if (ConstantInt *CI = dyn_cast<ConstantInt>(Val)) {
+  if (const ConstantInt *CI = dyn_cast<ConstantInt>(Val)) {
     unsigned Opc = 0;
     bool Signed = true;
     switch (VT.getSimpleVT().SimpleTy) {
@@ -305,7 +305,7 @@ bool X86FastISel::X86FastEmitStore(EVT VT, Value *Val,
     
     if (Opc) {
       addFullAddress(BuildMI(MBB, DL, TII.get(Opc)), AM)
-                             .addImm(Signed ? CI->getSExtValue() :
+                             .addImm(Signed ? (uint64_t) CI->getSExtValue() :
                                               CI->getZExtValue());
       return true;
     }
@@ -335,13 +335,13 @@ bool X86FastISel::X86FastEmitExtend(ISD::NodeType Opc, EVT DstVT,
 
 /// X86SelectAddress - Attempt to fill in an address from the given value.
 ///
-bool X86FastISel::X86SelectAddress(Value *V, X86AddressMode &AM) {
-  User *U = NULL;
+bool X86FastISel::X86SelectAddress(const Value *V, X86AddressMode &AM) {
+  const User *U = NULL;
   unsigned Opcode = Instruction::UserOp1;
-  if (Instruction *I = dyn_cast<Instruction>(V)) {
+  if (const Instruction *I = dyn_cast<Instruction>(V)) {
     Opcode = I->getOpcode();
     U = I;
-  } else if (ConstantExpr *C = dyn_cast<ConstantExpr>(V)) {
+  } else if (const ConstantExpr *C = dyn_cast<ConstantExpr>(V)) {
     Opcode = C->getOpcode();
     U = C;
   }
@@ -378,7 +378,7 @@ bool X86FastISel::X86SelectAddress(Value *V, X86AddressMode &AM) {
 
   case Instruction::Add: {
     // Adds of constants are common and easy enough.
-    if (ConstantInt *CI = dyn_cast<ConstantInt>(U->getOperand(1))) {
+    if (const ConstantInt *CI = dyn_cast<ConstantInt>(U->getOperand(1))) {
       uint64_t Disp = (int32_t)AM.Disp + (uint64_t)CI->getSExtValue();
       // They have to fit in the 32-bit signed displacement field though.
       if (isInt<32>(Disp)) {
@@ -399,16 +399,16 @@ bool X86FastISel::X86SelectAddress(Value *V, X86AddressMode &AM) {
     gep_type_iterator GTI = gep_type_begin(U);
     // Iterate through the indices, folding what we can. Constants can be
     // folded, and one dynamic index can be handled, if the scale is supported.
-    for (User::op_iterator i = U->op_begin() + 1, e = U->op_end();
+    for (User::const_op_iterator i = U->op_begin() + 1, e = U->op_end();
          i != e; ++i, ++GTI) {
-      Value *Op = *i;
+      const Value *Op = *i;
       if (const StructType *STy = dyn_cast<StructType>(*GTI)) {
         const StructLayout *SL = TD.getStructLayout(STy);
         unsigned Idx = cast<ConstantInt>(Op)->getZExtValue();
         Disp += SL->getElementOffset(Idx);
       } else {
         uint64_t S = TD.getTypeAllocSize(GTI.getIndexedType());
-        if (ConstantInt *CI = dyn_cast<ConstantInt>(Op)) {
+        if (const ConstantInt *CI = dyn_cast<ConstantInt>(Op)) {
           // Constant-offset addressing.
           Disp += CI->getSExtValue() * S;
         } else if (IndexReg == 0 &&
@@ -446,7 +446,7 @@ bool X86FastISel::X86SelectAddress(Value *V, X86AddressMode &AM) {
   }
 
   // Handle constant address.
-  if (GlobalValue *GV = dyn_cast<GlobalValue>(V)) {
+  if (const GlobalValue *GV = dyn_cast<GlobalValue>(V)) {
     // Can't handle alternate code models yet.
     if (TM.getCodeModel() != CodeModel::Small)
       return false;
@@ -457,7 +457,7 @@ bool X86FastISel::X86SelectAddress(Value *V, X86AddressMode &AM) {
       return false;
 
     // Can't handle TLS yet.
-    if (GlobalVariable *GVar = dyn_cast<GlobalVariable>(GV))
+    if (const GlobalVariable *GVar = dyn_cast<GlobalVariable>(GV))
       if (GVar->isThreadLocal())
         return false;
 
@@ -544,13 +544,13 @@ bool X86FastISel::X86SelectAddress(Value *V, X86AddressMode &AM) {
 
 /// X86SelectCallAddress - Attempt to fill in an address from the given value.
 ///
-bool X86FastISel::X86SelectCallAddress(Value *V, X86AddressMode &AM) {
-  User *U = NULL;
+bool X86FastISel::X86SelectCallAddress(const Value *V, X86AddressMode &AM) {
+  const User *U = NULL;
   unsigned Opcode = Instruction::UserOp1;
-  if (Instruction *I = dyn_cast<Instruction>(V)) {
+  if (const Instruction *I = dyn_cast<Instruction>(V)) {
     Opcode = I->getOpcode();
     U = I;
-  } else if (ConstantExpr *C = dyn_cast<ConstantExpr>(V)) {
+  } else if (const ConstantExpr *C = dyn_cast<ConstantExpr>(V)) {
     Opcode = C->getOpcode();
     U = C;
   }
@@ -575,7 +575,7 @@ bool X86FastISel::X86SelectCallAddress(Value *V, X86AddressMode &AM) {
   }
 
   // Handle constant address.
-  if (GlobalValue *GV = dyn_cast<GlobalValue>(V)) {
+  if (const GlobalValue *GV = dyn_cast<GlobalValue>(V)) {
     // Can't handle alternate code models yet.
     if (TM.getCodeModel() != CodeModel::Small)
       return false;
@@ -586,7 +586,7 @@ bool X86FastISel::X86SelectCallAddress(Value *V, X86AddressMode &AM) {
       return false;
 
     // Can't handle TLS or DLLImport.
-    if (GlobalVariable *GVar = dyn_cast<GlobalVariable>(GV))
+    if (const GlobalVariable *GVar = dyn_cast<GlobalVariable>(GV))
       if (GVar->isThreadLocal() || GVar->hasDLLImportLinkage())
         return false;
 
@@ -627,7 +627,7 @@ bool X86FastISel::X86SelectCallAddress(Value *V, X86AddressMode &AM) {
 
 
 /// X86SelectStore - Select and emit code to implement store instructions.
-bool X86FastISel::X86SelectStore(Instruction* I) {
+bool X86FastISel::X86SelectStore(const Instruction *I) {
   EVT VT;
   if (!isTypeLegal(I->getOperand(0)->getType(), VT, /*AllowI1=*/true))
     return false;
@@ -641,7 +641,7 @@ bool X86FastISel::X86SelectStore(Instruction* I) {
 
 /// X86SelectLoad - Select and emit code to implement load instructions.
 ///
-bool X86FastISel::X86SelectLoad(Instruction *I)  {
+bool X86FastISel::X86SelectLoad(const Instruction *I)  {
   EVT VT;
   if (!isTypeLegal(I->getType(), VT, /*AllowI1=*/true))
     return false;
@@ -673,7 +673,7 @@ static unsigned X86ChooseCmpOpcode(EVT VT) {
 /// X86ChooseCmpImmediateOpcode - If we have a comparison with RHS as the RHS
 /// of the comparison, return an opcode that works for the compare (e.g.
 /// CMP32ri) otherwise return 0.
-static unsigned X86ChooseCmpImmediateOpcode(EVT VT, ConstantInt *RHSC) {
+static unsigned X86ChooseCmpImmediateOpcode(EVT VT, const ConstantInt *RHSC) {
   switch (VT.getSimpleVT().SimpleTy) {
   // Otherwise, we can't fold the immediate into this comparison.
   default: return 0;
@@ -689,7 +689,8 @@ static unsigned X86ChooseCmpImmediateOpcode(EVT VT, ConstantInt *RHSC) {
   }
 }
 
-bool X86FastISel::X86FastEmitCompare(Value *Op0, Value *Op1, EVT VT) {
+bool X86FastISel::X86FastEmitCompare(const Value *Op0, const Value *Op1,
+                                     EVT VT) {
   unsigned Op0Reg = getRegForValue(Op0);
   if (Op0Reg == 0) return false;
   
@@ -700,7 +701,7 @@ bool X86FastISel::X86FastEmitCompare(Value *Op0, Value *Op1, EVT VT) {
   // We have two options: compare with register or immediate.  If the RHS of
   // the compare is an immediate that we can fold into this compare, use
   // CMPri, otherwise use CMPrr.
-  if (ConstantInt *Op1C = dyn_cast<ConstantInt>(Op1)) {
+  if (const ConstantInt *Op1C = dyn_cast<ConstantInt>(Op1)) {
     if (unsigned CompareImmOpc = X86ChooseCmpImmediateOpcode(VT, Op1C)) {
       BuildMI(MBB, DL, TII.get(CompareImmOpc)).addReg(Op0Reg)
                                           .addImm(Op1C->getSExtValue());
@@ -718,8 +719,8 @@ bool X86FastISel::X86FastEmitCompare(Value *Op0, Value *Op1, EVT VT) {
   return true;
 }
 
-bool X86FastISel::X86SelectCmp(Instruction *I) {
-  CmpInst *CI = cast<CmpInst>(I);
+bool X86FastISel::X86SelectCmp(const Instruction *I) {
+  const CmpInst *CI = cast<CmpInst>(I);
 
   EVT VT;
   if (!isTypeLegal(I->getOperand(0)->getType(), VT))
@@ -781,7 +782,7 @@ bool X86FastISel::X86SelectCmp(Instruction *I) {
     return false;
   }
 
-  Value *Op0 = CI->getOperand(0), *Op1 = CI->getOperand(1);
+  const Value *Op0 = CI->getOperand(0), *Op1 = CI->getOperand(1);
   if (SwapArgs)
     std::swap(Op0, Op1);
 
@@ -794,7 +795,7 @@ bool X86FastISel::X86SelectCmp(Instruction *I) {
   return true;
 }
 
-bool X86FastISel::X86SelectZExt(Instruction *I) {
+bool X86FastISel::X86SelectZExt(const Instruction *I) {
   // Handle zero-extension from i1 to i8, which is common.
   if (I->getType()->isIntegerTy(8) &&
       I->getOperand(0)->getType()->isIntegerTy(1)) {
@@ -811,15 +812,15 @@ bool X86FastISel::X86SelectZExt(Instruction *I) {
 }
 
 
-bool X86FastISel::X86SelectBranch(Instruction *I) {
+bool X86FastISel::X86SelectBranch(const Instruction *I) {
   // Unconditional branches are selected by tablegen-generated code.
   // Handle a conditional branch.
-  BranchInst *BI = cast<BranchInst>(I);
+  const BranchInst *BI = cast<BranchInst>(I);
   MachineBasicBlock *TrueMBB = MBBMap[BI->getSuccessor(0)];
   MachineBasicBlock *FalseMBB = MBBMap[BI->getSuccessor(1)];
 
   // Fold the common case of a conditional branch with a comparison.
-  if (CmpInst *CI = dyn_cast<CmpInst>(BI->getCondition())) {
+  if (const CmpInst *CI = dyn_cast<CmpInst>(BI->getCondition())) {
     if (CI->hasOneUse()) {
       EVT VT = TLI.getValueType(CI->getOperand(0)->getType());
 
@@ -866,7 +867,7 @@ bool X86FastISel::X86SelectBranch(Instruction *I) {
         return false;
       }
       
-      Value *Op0 = CI->getOperand(0), *Op1 = CI->getOperand(1);
+      const Value *Op0 = CI->getOperand(0), *Op1 = CI->getOperand(1);
       if (SwapArgs)
         std::swap(Op0, Op1);
 
@@ -901,7 +902,8 @@ bool X86FastISel::X86SelectBranch(Instruction *I) {
     // looking for the SETO/SETB instruction. If an instruction modifies the
     // EFLAGS register before we reach the SETO/SETB instruction, then we can't
     // convert the branch into a JO/JB instruction.
-    if (IntrinsicInst *CI = dyn_cast<IntrinsicInst>(EI->getAggregateOperand())){
+    if (const IntrinsicInst *CI =
+          dyn_cast<IntrinsicInst>(EI->getAggregateOperand())){
       if (CI->getIntrinsicID() == Intrinsic::sadd_with_overflow ||
           CI->getIntrinsicID() == Intrinsic::uadd_with_overflow) {
         const MachineInstr *SetMI = 0;
@@ -956,7 +958,7 @@ bool X86FastISel::X86SelectBranch(Instruction *I) {
   return true;
 }
 
-bool X86FastISel::X86SelectShift(Instruction *I) {
+bool X86FastISel::X86SelectShift(const Instruction *I) {
   unsigned CReg = 0, OpReg = 0, OpImm = 0;
   const TargetRegisterClass *RC = NULL;
   if (I->getType()->isIntegerTy(8)) {
@@ -1007,7 +1009,7 @@ bool X86FastISel::X86SelectShift(Instruction *I) {
   if (Op0Reg == 0) return false;
   
   // Fold immediate in shl(x,3).
-  if (ConstantInt *CI = dyn_cast<ConstantInt>(I->getOperand(1))) {
+  if (const ConstantInt *CI = dyn_cast<ConstantInt>(I->getOperand(1))) {
     unsigned ResultReg = createResultReg(RC);
     BuildMI(MBB, DL, TII.get(OpImm), 
             ResultReg).addReg(Op0Reg).addImm(CI->getZExtValue() & 0xff);
@@ -1032,7 +1034,7 @@ bool X86FastISel::X86SelectShift(Instruction *I) {
   return true;
 }
 
-bool X86FastISel::X86SelectSelect(Instruction *I) {
+bool X86FastISel::X86SelectSelect(const Instruction *I) {
   EVT VT = TLI.getValueType(I->getType(), /*HandleUnknown=*/true);
   if (VT == MVT::Other || !isTypeLegal(I->getType(), VT))
     return false;
@@ -1066,11 +1068,11 @@ bool X86FastISel::X86SelectSelect(Instruction *I) {
   return true;
 }
 
-bool X86FastISel::X86SelectFPExt(Instruction *I) {
+bool X86FastISel::X86SelectFPExt(const Instruction *I) {
   // fpext from float to double.
   if (Subtarget->hasSSE2() &&
       I->getType()->isDoubleTy()) {
-    Value *V = I->getOperand(0);
+    const Value *V = I->getOperand(0);
     if (V->getType()->isFloatTy()) {
       unsigned OpReg = getRegForValue(V);
       if (OpReg == 0) return false;
@@ -1084,10 +1086,10 @@ bool X86FastISel::X86SelectFPExt(Instruction *I) {
   return false;
 }
 
-bool X86FastISel::X86SelectFPTrunc(Instruction *I) {
+bool X86FastISel::X86SelectFPTrunc(const Instruction *I) {
   if (Subtarget->hasSSE2()) {
     if (I->getType()->isFloatTy()) {
-      Value *V = I->getOperand(0);
+      const Value *V = I->getOperand(0);
       if (V->getType()->isDoubleTy()) {
         unsigned OpReg = getRegForValue(V);
         if (OpReg == 0) return false;
@@ -1102,7 +1104,7 @@ bool X86FastISel::X86SelectFPTrunc(Instruction *I) {
   return false;
 }
 
-bool X86FastISel::X86SelectTrunc(Instruction *I) {
+bool X86FastISel::X86SelectTrunc(const Instruction *I) {
   if (Subtarget->is64Bit())
     // All other cases should be handled by the tblgen generated code.
     return false;
@@ -1139,11 +1141,11 @@ bool X86FastISel::X86SelectTrunc(Instruction *I) {
   return true;
 }
 
-bool X86FastISel::X86SelectExtractValue(Instruction *I) {
-  ExtractValueInst *EI = cast<ExtractValueInst>(I);
-  Value *Agg = EI->getAggregateOperand();
+bool X86FastISel::X86SelectExtractValue(const Instruction *I) {
+  const ExtractValueInst *EI = cast<ExtractValueInst>(I);
+  const Value *Agg = EI->getAggregateOperand();
 
-  if (IntrinsicInst *CI = dyn_cast<IntrinsicInst>(Agg)) {
+  if (const IntrinsicInst *CI = dyn_cast<IntrinsicInst>(Agg)) {
     switch (CI->getIntrinsicID()) {
     default: break;
     case Intrinsic::sadd_with_overflow:
@@ -1160,7 +1162,7 @@ bool X86FastISel::X86SelectExtractValue(Instruction *I) {
   return false;
 }
 
-bool X86FastISel::X86VisitIntrinsicCall(IntrinsicInst &I) {
+bool X86FastISel::X86VisitIntrinsicCall(const IntrinsicInst &I) {
   // FIXME: Handle more intrinsics.
   switch (I.getIntrinsicID()) {
   default: return false;
@@ -1168,8 +1170,8 @@ bool X86FastISel::X86VisitIntrinsicCall(IntrinsicInst &I) {
     // Emit code inline code to store the stack guard onto the stack.
     EVT PtrTy = TLI.getPointerTy();
 
-    Value *Op1 = I.getOperand(1); // The guard's value.
-    AllocaInst *Slot = cast<AllocaInst>(I.getOperand(2));
+    const Value *Op1 = I.getOperand(1); // The guard's value.
+    const AllocaInst *Slot = cast<AllocaInst>(I.getOperand(2));
 
     // Grab the frame index.
     X86AddressMode AM;
@@ -1204,7 +1206,7 @@ bool X86FastISel::X86VisitIntrinsicCall(IntrinsicInst &I) {
     return true;
   }
   case Intrinsic::dbg_declare: {
-    DbgDeclareInst *DI = cast<DbgDeclareInst>(&I);
+    const DbgDeclareInst *DI = cast<DbgDeclareInst>(&I);
     X86AddressMode AM;
     assert(DI->getAddress() && "Null address should be checked earlier!");
     if (!X86SelectAddress(DI->getAddress(), AM))
@@ -1235,8 +1237,8 @@ bool X86FastISel::X86VisitIntrinsicCall(IntrinsicInst &I) {
     if (!isTypeLegal(RetTy, VT))
       return false;
 
-    Value *Op1 = I.getOperand(1);
-    Value *Op2 = I.getOperand(2);
+    const Value *Op1 = I.getOperand(1);
+    const Value *Op2 = I.getOperand(2);
     unsigned Reg1 = getRegForValue(Op1);
     unsigned Reg2 = getRegForValue(Op2);
 
@@ -1277,20 +1279,20 @@ bool X86FastISel::X86VisitIntrinsicCall(IntrinsicInst &I) {
   }
 }
 
-bool X86FastISel::X86SelectCall(Instruction *I) {
-  CallInst *CI = cast<CallInst>(I);
-  Value *Callee = I->getOperand(0);
+bool X86FastISel::X86SelectCall(const Instruction *I) {
+  const CallInst *CI = cast<CallInst>(I);
+  const Value *Callee = I->getOperand(0);
 
   // Can't handle inline asm yet.
   if (isa<InlineAsm>(Callee))
     return false;
 
   // Handle intrinsic calls.
-  if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(CI))
+  if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(CI))
     return X86VisitIntrinsicCall(*II);
 
   // Handle only C and fastcc calling conventions for now.
-  CallSite CS(CI);
+  ImmutableCallSite CS(CI);
   CallingConv::ID CC = CS.getCallingConv();
   if (CC != CallingConv::C &&
       CC != CallingConv::Fast &&
@@ -1322,7 +1324,7 @@ bool X86FastISel::X86SelectCall(Instruction *I) {
   if (!X86SelectCallAddress(Callee, CalleeAM))
     return false;
   unsigned CalleeOp = 0;
-  GlobalValue *GV = 0;
+  const GlobalValue *GV = 0;
   if (CalleeAM.GV != 0) {
     GV = CalleeAM.GV;
   } else if (CalleeAM.Base.Reg != 0) {
@@ -1338,7 +1340,7 @@ bool X86FastISel::X86SelectCall(Instruction *I) {
   }
 
   // Deal with call operands first.
-  SmallVector<Value*, 8> ArgVals;
+  SmallVector<const Value *, 8> ArgVals;
   SmallVector<unsigned, 8> Args;
   SmallVector<EVT, 8> ArgVTs;
   SmallVector<ISD::ArgFlagsTy, 8> ArgFlags;
@@ -1346,7 +1348,7 @@ bool X86FastISel::X86SelectCall(Instruction *I) {
   ArgVals.reserve(CS.arg_size());
   ArgVTs.reserve(CS.arg_size());
   ArgFlags.reserve(CS.arg_size());
-  for (CallSite::arg_iterator i = CS.arg_begin(), e = CS.arg_end();
+  for (ImmutableCallSite::arg_iterator i = CS.arg_begin(), e = CS.arg_end();
        i != e; ++i) {
     unsigned Arg = getRegForValue(*i);
     if (Arg == 0)
@@ -1454,7 +1456,7 @@ bool X86FastISel::X86SelectCall(Instruction *I) {
       X86AddressMode AM;
       AM.Base.Reg = StackPtr;
       AM.Disp = LocMemOffset;
-      Value *ArgVal = ArgVals[VA.getValNo()];
+      const Value *ArgVal = ArgVals[VA.getValNo()];
       
       // If this is a really simple value, emit this with the Value* version of
       // X86FastEmitStore.  If it isn't simple, we don't want to do this, as it
@@ -1585,7 +1587,7 @@ bool X86FastISel::X86SelectCall(Instruction *I) {
 
 
 bool
-X86FastISel::TargetSelectInstruction(Instruction *I)  {
+X86FastISel::TargetSelectInstruction(const Instruction *I)  {
   switch (I->getOpcode()) {
   default: break;
   case Instruction::Load:
@@ -1633,7 +1635,7 @@ X86FastISel::TargetSelectInstruction(Instruction *I)  {
   return false;
 }
 
-unsigned X86FastISel::TargetMaterializeConstant(Constant *C) {
+unsigned X86FastISel::TargetMaterializeConstant(const Constant *C) {
   EVT VT;
   if (!isTypeLegal(C->getType(), VT))
     return false;
@@ -1728,7 +1730,7 @@ unsigned X86FastISel::TargetMaterializeConstant(Constant *C) {
   return ResultReg;
 }
 
-unsigned X86FastISel::TargetMaterializeAlloca(AllocaInst *C) {
+unsigned X86FastISel::TargetMaterializeAlloca(const AllocaInst *C) {
   // Fail on dynamic allocas. At this point, getRegForValue has already
   // checked its CSE maps, so if we're here trying to handle a dynamic
   // alloca, we're not going to succeed. X86SelectAddress has a
@@ -1753,12 +1755,13 @@ namespace llvm {
   llvm::FastISel *X86::createFastISel(MachineFunction &mf,
                         DenseMap<const Value *, unsigned> &vm,
                         DenseMap<const BasicBlock *, MachineBasicBlock *> &bm,
-                        DenseMap<const AllocaInst *, int> &am
+                        DenseMap<const AllocaInst *, int> &am,
+                        std::vector<std::pair<MachineInstr*, unsigned> > &pn
 #ifndef NDEBUG
-                        , SmallSet<Instruction*, 8> &cil
+                        , SmallSet<const Instruction *, 8> &cil
 #endif
                         ) {
-    return new X86FastISel(mf, vm, bm, am
+    return new X86FastISel(mf, vm, bm, am, pn
 #ifndef NDEBUG
                            , cil
 #endif
diff --git a/lib/Target/X86/X86FloatingPoint.cpp b/lib/Target/X86/X86FloatingPoint.cpp
index 6d6fe771d9a9..93460ef308cc 100644
--- a/lib/Target/X86/X86FloatingPoint.cpp
+++ b/lib/Target/X86/X86FloatingPoint.cpp
@@ -1088,8 +1088,7 @@ void FPS::handleSpecialFP(MachineBasicBlock::iterator &I) {
     // 'f' constraint.  These should be turned into the current ST(x) register
     // in the machine instr.  Also, any kills should be explicitly popped after
     // the inline asm.
-    unsigned Kills[7];
-    unsigned NumKills = 0;
+    unsigned Kills = 0;
     for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
       MachineOperand &Op = MI->getOperand(i);
       if (!Op.isReg() || Op.getReg() < X86::FP0 || Op.getReg() > X86::FP6)
@@ -1103,7 +1102,7 @@ void FPS::handleSpecialFP(MachineBasicBlock::iterator &I) {
       // asm.  We just remember it for now, and pop them all off at the end in
       // a batch.
       if (Op.isKill())
-        Kills[NumKills++] = FPReg;
+        Kills |= 1U << FPReg;
     }
 
     // If this asm kills any FP registers (is the last use of them) we must
@@ -1114,9 +1113,11 @@ void FPS::handleSpecialFP(MachineBasicBlock::iterator &I) {
     // Note: this might be a non-optimal pop sequence.  We might be able to do
     // better by trying to pop in stack order or something.
     MachineBasicBlock::iterator InsertPt = MI;
-    while (NumKills)
-      freeStackSlotAfter(InsertPt, Kills[--NumKills]);
-
+    while (Kills) {
+      unsigned FPReg = CountTrailingZeros_32(Kills);
+      freeStackSlotAfter(InsertPt, FPReg);
+      Kills &= ~(1U << FPReg);
+    }
     // Don't delete the inline asm!
     return;
   }
diff --git a/lib/Target/X86/X86ISelDAGToDAG.cpp b/lib/Target/X86/X86ISelDAGToDAG.cpp
index da45dac807f6..fd8bb1e860f7 100644
--- a/lib/Target/X86/X86ISelDAGToDAG.cpp
+++ b/lib/Target/X86/X86ISelDAGToDAG.cpp
@@ -15,12 +15,10 @@
 #define DEBUG_TYPE "x86-isel"
 #include "X86.h"
 #include "X86InstrBuilder.h"
-#include "X86ISelLowering.h"
 #include "X86MachineFunctionInfo.h"
 #include "X86RegisterInfo.h"
 #include "X86Subtarget.h"
 #include "X86TargetMachine.h"
-#include "llvm/GlobalValue.h"
 #include "llvm/Instructions.h"
 #include "llvm/Intrinsics.h"
 #include "llvm/Support/CFG.h"
@@ -57,25 +55,24 @@ namespace {
       FrameIndexBase
     } BaseType;
 
-    struct {            // This is really a union, discriminated by BaseType!
-      SDValue Reg;
-      int FrameIndex;
-    } Base;
+    // This is really a union, discriminated by BaseType!
+    SDValue Base_Reg;
+    int Base_FrameIndex;
 
     unsigned Scale;
     SDValue IndexReg; 
     int32_t Disp;
     SDValue Segment;
-    GlobalValue *GV;
-    Constant *CP;
-    BlockAddress *BlockAddr;
+    const GlobalValue *GV;
+    const Constant *CP;
+    const BlockAddress *BlockAddr;
     const char *ES;
     int JT;
     unsigned Align;    // CP alignment.
     unsigned char SymbolFlags;  // X86II::MO_*
 
     X86ISelAddressMode()
-      : BaseType(RegBase), Scale(1), IndexReg(), Disp(0),
+      : BaseType(RegBase), Base_FrameIndex(0), Scale(1), IndexReg(), Disp(0),
         Segment(), GV(0), CP(0), BlockAddr(0), ES(0), JT(-1), Align(0),
         SymbolFlags(X86II::MO_NO_FLAG) {
     }
@@ -85,7 +82,7 @@ namespace {
     }
     
     bool hasBaseOrIndexReg() const {
-      return IndexReg.getNode() != 0 || Base.Reg.getNode() != 0;
+      return IndexReg.getNode() != 0 || Base_Reg.getNode() != 0;
     }
     
     /// isRIPRelative - Return true if this addressing mode is already RIP
@@ -93,24 +90,24 @@ namespace {
     bool isRIPRelative() const {
       if (BaseType != RegBase) return false;
       if (RegisterSDNode *RegNode =
-            dyn_cast_or_null<RegisterSDNode>(Base.Reg.getNode()))
+            dyn_cast_or_null<RegisterSDNode>(Base_Reg.getNode()))
         return RegNode->getReg() == X86::RIP;
       return false;
     }
     
     void setBaseReg(SDValue Reg) {
       BaseType = RegBase;
-      Base.Reg = Reg;
+      Base_Reg = Reg;
     }
 
     void dump() {
       dbgs() << "X86ISelAddressMode " << this << '\n';
-      dbgs() << "Base.Reg ";
-      if (Base.Reg.getNode() != 0)
-        Base.Reg.getNode()->dump(); 
+      dbgs() << "Base_Reg ";
+      if (Base_Reg.getNode() != 0)
+        Base_Reg.getNode()->dump(); 
       else
         dbgs() << "nul";
-      dbgs() << " Base.FrameIndex " << Base.FrameIndex << '\n'
+      dbgs() << " Base.FrameIndex " << Base_FrameIndex << '\n'
              << " Scale" << Scale << '\n'
              << "IndexReg ";
       if (IndexReg.getNode() != 0)
@@ -162,7 +159,7 @@ namespace {
   class X86DAGToDAGISel : public SelectionDAGISel {
     /// X86Lowering - This object fully describes how to lower LLVM code to an
     /// X86-specific SelectionDAG.
-    X86TargetLowering &X86Lowering;
+    const X86TargetLowering &X86Lowering;
 
     /// Subtarget - Keep a pointer to the X86Subtarget around so that we can
     /// make the right decision when generating code for different targets.
@@ -183,7 +180,7 @@ namespace {
       return "X86 DAG->DAG Instruction Selection";
     }
 
-    virtual void EmitFunctionEntryCode(Function &Fn, MachineFunction &MF);
+    virtual void EmitFunctionEntryCode();
 
     virtual bool IsProfitableToFold(SDValue N, SDNode *U, SDNode *Root) const;
 
@@ -235,8 +232,8 @@ namespace {
                                    SDValue &Scale, SDValue &Index,
                                    SDValue &Disp, SDValue &Segment) {
       Base  = (AM.BaseType == X86ISelAddressMode::FrameIndexBase) ?
-        CurDAG->getTargetFrameIndex(AM.Base.FrameIndex, TLI.getPointerTy()) :
-        AM.Base.Reg;
+        CurDAG->getTargetFrameIndex(AM.Base_FrameIndex, TLI.getPointerTy()) :
+        AM.Base_Reg;
       Scale = getI8Imm(AM.Scale);
       Index = AM.IndexReg;
       // These are 32-bit even in 64-bit mode since RIP relative offset
@@ -546,11 +543,11 @@ void X86DAGToDAGISel::EmitSpecialCodeForMain(MachineBasicBlock *BB,
             TII->get(X86::CALLpcrel32)).addExternalSymbol("__main");
 }
 
-void X86DAGToDAGISel::EmitFunctionEntryCode(Function &Fn, MachineFunction &MF) {
+void X86DAGToDAGISel::EmitFunctionEntryCode() {
   // If this is main, emit special code for main.
-  MachineBasicBlock *BB = MF.begin();
-  if (Fn.hasExternalLinkage() && Fn.getName() == "main")
-    EmitSpecialCodeForMain(BB, MF.getFrameInfo());
+  if (const Function *Fn = MF->getFunction())
+    if (Fn->hasExternalLinkage() && Fn->getName() == "main")
+      EmitSpecialCodeForMain(MF->begin(), MF->getFrameInfo());
 }
 
 
@@ -675,8 +672,8 @@ bool X86DAGToDAGISel::MatchAddress(SDValue N, X86ISelAddressMode &AM) {
   // a smaller encoding and avoids a scaled-index.
   if (AM.Scale == 2 &&
       AM.BaseType == X86ISelAddressMode::RegBase &&
-      AM.Base.Reg.getNode() == 0) {
-    AM.Base.Reg = AM.IndexReg;
+      AM.Base_Reg.getNode() == 0) {
+    AM.Base_Reg = AM.IndexReg;
     AM.Scale = 1;
   }
 
@@ -687,15 +684,34 @@ bool X86DAGToDAGISel::MatchAddress(SDValue N, X86ISelAddressMode &AM) {
       Subtarget->is64Bit() &&
       AM.Scale == 1 &&
       AM.BaseType == X86ISelAddressMode::RegBase &&
-      AM.Base.Reg.getNode() == 0 &&
+      AM.Base_Reg.getNode() == 0 &&
       AM.IndexReg.getNode() == 0 &&
       AM.SymbolFlags == X86II::MO_NO_FLAG &&
       AM.hasSymbolicDisplacement())
-    AM.Base.Reg = CurDAG->getRegister(X86::RIP, MVT::i64);
+    AM.Base_Reg = CurDAG->getRegister(X86::RIP, MVT::i64);
 
   return false;
 }
 
+/// isLogicallyAddWithConstant - Return true if this node is semantically an
+/// add of a value with a constantint.
+static bool isLogicallyAddWithConstant(SDValue V, SelectionDAG *CurDAG) {
+  // Check for (add x, Cst)
+  if (V->getOpcode() == ISD::ADD)
+    return isa<ConstantSDNode>(V->getOperand(1));
+
+  // Check for (or x, Cst), where Cst & x == 0.
+  if (V->getOpcode() != ISD::OR ||
+      !isa<ConstantSDNode>(V->getOperand(1)))
+    return false;
+  
+  // Handle "X | C" as "X + C" iff X is known to have C bits clear.
+  ConstantSDNode *CN = cast<ConstantSDNode>(V->getOperand(1));
+    
+  // Check to see if the LHS & C is zero.
+  return CurDAG->MaskedValueIsZero(V->getOperand(0), CN->getAPIntValue());
+}
+
 bool X86DAGToDAGISel::MatchAddressRecursively(SDValue N, X86ISelAddressMode &AM,
                                               X86ISelListener &DeadNodes,
                                               unsigned Depth) {
@@ -762,9 +778,9 @@ bool X86DAGToDAGISel::MatchAddressRecursively(SDValue N, X86ISelAddressMode &AM,
 
   case ISD::FrameIndex:
     if (AM.BaseType == X86ISelAddressMode::RegBase
-        && AM.Base.Reg.getNode() == 0) {
+        && AM.Base_Reg.getNode() == 0) {
       AM.BaseType = X86ISelAddressMode::FrameIndexBase;
-      AM.Base.FrameIndex = cast<FrameIndexSDNode>(N)->getIndex();
+      AM.Base_FrameIndex = cast<FrameIndexSDNode>(N)->getIndex();
       return false;
     }
     break;
@@ -787,8 +803,7 @@ bool X86DAGToDAGISel::MatchAddressRecursively(SDValue N, X86ISelAddressMode &AM,
         // Okay, we know that we have a scale by now.  However, if the scaled
         // value is an add of something and a constant, we can fold the
         // constant into the disp field here.
-        if (ShVal.getNode()->getOpcode() == ISD::ADD &&
-            isa<ConstantSDNode>(ShVal.getNode()->getOperand(1))) {
+        if (isLogicallyAddWithConstant(ShVal, CurDAG)) {
           AM.IndexReg = ShVal.getNode()->getOperand(0);
           ConstantSDNode *AddVal =
             cast<ConstantSDNode>(ShVal.getNode()->getOperand(1));
@@ -816,7 +831,7 @@ bool X86DAGToDAGISel::MatchAddressRecursively(SDValue N, X86ISelAddressMode &AM,
   case X86ISD::MUL_IMM:
     // X*[3,5,9] -> X+X*[2,4,8]
     if (AM.BaseType == X86ISelAddressMode::RegBase &&
-        AM.Base.Reg.getNode() == 0 &&
+        AM.Base_Reg.getNode() == 0 &&
         AM.IndexReg.getNode() == 0) {
       if (ConstantSDNode
             *CN = dyn_cast<ConstantSDNode>(N.getNode()->getOperand(1)))
@@ -847,7 +862,7 @@ bool X86DAGToDAGISel::MatchAddressRecursively(SDValue N, X86ISelAddressMode &AM,
             Reg = N.getNode()->getOperand(0);
           }
 
-          AM.IndexReg = AM.Base.Reg = Reg;
+          AM.IndexReg = AM.Base_Reg = Reg;
           return false;
         }
     }
@@ -892,8 +907,8 @@ bool X86DAGToDAGISel::MatchAddressRecursively(SDValue N, X86ISelAddressMode &AM,
     // If the base is a register with multiple uses, this
     // transformation may save a mov.
     if ((AM.BaseType == X86ISelAddressMode::RegBase &&
-         AM.Base.Reg.getNode() &&
-         !AM.Base.Reg.getNode()->hasOneUse()) ||
+         AM.Base_Reg.getNode() &&
+         !AM.Base_Reg.getNode()->hasOneUse()) ||
         AM.BaseType == X86ISelAddressMode::FrameIndexBase)
       --Cost;
     // If the folded LHS was interesting, this transformation saves
@@ -963,9 +978,9 @@ bool X86DAGToDAGISel::MatchAddressRecursively(SDValue N, X86ISelAddressMode &AM,
     // see if we can just put each operand into a register and fold at least
     // the add.
     if (AM.BaseType == X86ISelAddressMode::RegBase &&
-        !AM.Base.Reg.getNode() &&
+        !AM.Base_Reg.getNode() &&
         !AM.IndexReg.getNode()) {
-      AM.Base.Reg = N.getNode()->getOperand(0);
+      AM.Base_Reg = N.getNode()->getOperand(0);
       AM.IndexReg = N.getNode()->getOperand(1);
       AM.Scale = 1;
       return false;
@@ -975,14 +990,11 @@ bool X86DAGToDAGISel::MatchAddressRecursively(SDValue N, X86ISelAddressMode &AM,
 
   case ISD::OR:
     // Handle "X | C" as "X + C" iff X is known to have C bits clear.
-    if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
+    if (isLogicallyAddWithConstant(N, CurDAG)) {
       X86ISelAddressMode Backup = AM;
+      ConstantSDNode *CN = cast<ConstantSDNode>(N.getOperand(1));
       uint64_t Offset = CN->getSExtValue();
 
-      // Check to see if the LHS & C is zero.
-      if (!CurDAG->MaskedValueIsZero(N.getOperand(0), CN->getAPIntValue()))
-        break;
-
       // Start with the LHS as an addr mode.
       if (!MatchAddressRecursively(N.getOperand(0), AM, DeadNodes, Depth+1) &&
           // Address could not have picked a GV address for the displacement.
@@ -1127,7 +1139,7 @@ bool X86DAGToDAGISel::MatchAddressRecursively(SDValue N, X86ISelAddressMode &AM,
 /// specified addressing mode without any further recursion.
 bool X86DAGToDAGISel::MatchAddressBase(SDValue N, X86ISelAddressMode &AM) {
   // Is the base register already occupied?
-  if (AM.BaseType != X86ISelAddressMode::RegBase || AM.Base.Reg.getNode()) {
+  if (AM.BaseType != X86ISelAddressMode::RegBase || AM.Base_Reg.getNode()) {
     // If so, check to see if the scale index register is set.
     if (AM.IndexReg.getNode() == 0) {
       AM.IndexReg = N;
@@ -1141,7 +1153,7 @@ bool X86DAGToDAGISel::MatchAddressBase(SDValue N, X86ISelAddressMode &AM) {
 
   // Default, generate it as a register.
   AM.BaseType = X86ISelAddressMode::RegBase;
-  AM.Base.Reg = N;
+  AM.Base_Reg = N;
   return false;
 }
 
@@ -1157,8 +1169,8 @@ bool X86DAGToDAGISel::SelectAddr(SDNode *Op, SDValue N, SDValue &Base,
 
   EVT VT = N.getValueType();
   if (AM.BaseType == X86ISelAddressMode::RegBase) {
-    if (!AM.Base.Reg.getNode())
-      AM.Base.Reg = CurDAG->getRegister(0, VT);
+    if (!AM.Base_Reg.getNode())
+      AM.Base_Reg = CurDAG->getRegister(0, VT);
   }
 
   if (!AM.IndexReg.getNode())
@@ -1185,7 +1197,7 @@ bool X86DAGToDAGISel::SelectScalarSSELoad(SDNode *Root,
     if (ISD::isNON_EXTLoad(PatternNodeWithChain.getNode()) &&
         PatternNodeWithChain.hasOneUse() &&
         IsProfitableToFold(N.getOperand(0), N.getNode(), Root) &&
-        IsLegalToFold(N.getOperand(0), N.getNode(), Root)) {
+        IsLegalToFold(N.getOperand(0), N.getNode(), Root, OptLevel)) {
       LoadSDNode *LD = cast<LoadSDNode>(PatternNodeWithChain);
       if (!SelectAddr(Root, LD->getBasePtr(), Base, Scale, Index, Disp,Segment))
         return false;
@@ -1202,7 +1214,7 @@ bool X86DAGToDAGISel::SelectScalarSSELoad(SDNode *Root,
       ISD::isNON_EXTLoad(N.getOperand(0).getOperand(0).getNode()) &&
       N.getOperand(0).getOperand(0).hasOneUse() &&
       IsProfitableToFold(N.getOperand(0), N.getNode(), Root) &&
-      IsLegalToFold(N.getOperand(0), N.getNode(), Root)) {
+      IsLegalToFold(N.getOperand(0), N.getNode(), Root, OptLevel)) {
     // Okay, this is a zero extending load.  Fold it.
     LoadSDNode *LD = cast<LoadSDNode>(N.getOperand(0).getOperand(0));
     if (!SelectAddr(Root, LD->getBasePtr(), Base, Scale, Index, Disp, Segment))
@@ -1234,10 +1246,10 @@ bool X86DAGToDAGISel::SelectLEAAddr(SDNode *Op, SDValue N,
   EVT VT = N.getValueType();
   unsigned Complexity = 0;
   if (AM.BaseType == X86ISelAddressMode::RegBase)
-    if (AM.Base.Reg.getNode())
+    if (AM.Base_Reg.getNode())
       Complexity = 1;
     else
-      AM.Base.Reg = CurDAG->getRegister(0, VT);
+      AM.Base_Reg = CurDAG->getRegister(0, VT);
   else if (AM.BaseType == X86ISelAddressMode::FrameIndexBase)
     Complexity = 4;
 
@@ -1265,7 +1277,7 @@ bool X86DAGToDAGISel::SelectLEAAddr(SDNode *Op, SDValue N,
       Complexity += 2;
   }
 
-  if (AM.Disp && (AM.Base.Reg.getNode() || AM.IndexReg.getNode()))
+  if (AM.Disp && (AM.Base_Reg.getNode() || AM.IndexReg.getNode()))
     Complexity++;
 
   // If it isn't worth using an LEA, reject it.
@@ -1287,7 +1299,7 @@ bool X86DAGToDAGISel::SelectTLSADDRAddr(SDNode *Op, SDValue N, SDValue &Base,
   X86ISelAddressMode AM;
   AM.GV = GA->getGlobal();
   AM.Disp += GA->getOffset();
-  AM.Base.Reg = CurDAG->getRegister(0, N.getValueType());
+  AM.Base_Reg = CurDAG->getRegister(0, N.getValueType());
   AM.SymbolFlags = GA->getTargetFlags();
 
   if (N.getValueType() == MVT::i32) {
@@ -1309,7 +1321,7 @@ bool X86DAGToDAGISel::TryFoldLoad(SDNode *P, SDValue N,
                                   SDValue &Segment) {
   if (!ISD::isNON_EXTLoad(N.getNode()) ||
       !IsProfitableToFold(N, P, P) ||
-      !IsLegalToFold(N, P, P))
+      !IsLegalToFold(N, P, P, OptLevel))
     return false;
   
   return SelectAddr(P, N.getOperand(1), Base, Scale, Index, Disp, Segment);
@@ -1841,6 +1853,9 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) {
 
     // Look for (X86cmp (and $op, $imm), 0) and see if we can convert it to
     // use a smaller encoding.
+    if (N0.getOpcode() == ISD::TRUNCATE && N0.hasOneUse())
+      // Look past the truncate if CMP is the only use of it.
+      N0 = N0.getOperand(0);
     if (N0.getNode()->getOpcode() == ISD::AND && N0.getNode()->hasOneUse() &&
         N0.getValueType() != MVT::i8 &&
         X86::isZeroNode(N1)) {
diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp
index 64702f1d5c9f..6ce9ab711bbc 100644
--- a/lib/Target/X86/X86ISelLowering.cpp
+++ b/lib/Target/X86/X86ISelLowering.cpp
@@ -57,14 +57,6 @@ STATISTIC(NumTailCalls, "Number of tail calls");
 static cl::opt<bool>
 DisableMMX("disable-mmx", cl::Hidden, cl::desc("Disable use of MMX"));
 
-// Disable16Bit - 16-bit operations typically have a larger encoding than
-// corresponding 32-bit instructions, and 16-bit code is slow on some
-// processors. This is an experimental flag to disable 16-bit operations
-// (which forces them to be Legalized to 32-bit operations).
-static cl::opt<bool>
-Disable16Bit("disable-16bit", cl::Hidden,
-             cl::desc("Disable use of 16-bit instructions"));
-
 // Forward declarations.
 static SDValue getMOVL(SelectionDAG &DAG, DebugLoc dl, EVT VT, SDValue V1,
                        SDValue V2);
@@ -120,8 +112,7 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
 
   // Set up the register classes.
   addRegisterClass(MVT::i8, X86::GR8RegisterClass);
-  if (!Disable16Bit)
-    addRegisterClass(MVT::i16, X86::GR16RegisterClass);
+  addRegisterClass(MVT::i16, X86::GR16RegisterClass);
   addRegisterClass(MVT::i32, X86::GR32RegisterClass);
   if (Subtarget->is64Bit())
     addRegisterClass(MVT::i64, X86::GR64RegisterClass);
@@ -130,11 +121,9 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
 
   // We don't accept any truncstore of integer registers.
   setTruncStoreAction(MVT::i64, MVT::i32, Expand);
-  if (!Disable16Bit)
-    setTruncStoreAction(MVT::i64, MVT::i16, Expand);
+  setTruncStoreAction(MVT::i64, MVT::i16, Expand);
   setTruncStoreAction(MVT::i64, MVT::i8 , Expand);
-  if (!Disable16Bit)
-    setTruncStoreAction(MVT::i32, MVT::i16, Expand);
+  setTruncStoreAction(MVT::i32, MVT::i16, Expand);
   setTruncStoreAction(MVT::i32, MVT::i8 , Expand);
   setTruncStoreAction(MVT::i16, MVT::i8,  Expand);
 
@@ -285,13 +274,8 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
   setOperationAction(ISD::CTTZ             , MVT::i8   , Custom);
   setOperationAction(ISD::CTLZ             , MVT::i8   , Custom);
   setOperationAction(ISD::CTPOP            , MVT::i16  , Expand);
-  if (Disable16Bit) {
-    setOperationAction(ISD::CTTZ           , MVT::i16  , Expand);
-    setOperationAction(ISD::CTLZ           , MVT::i16  , Expand);
-  } else {
-    setOperationAction(ISD::CTTZ           , MVT::i16  , Custom);
-    setOperationAction(ISD::CTLZ           , MVT::i16  , Custom);
-  }
+  setOperationAction(ISD::CTTZ             , MVT::i16  , Custom);
+  setOperationAction(ISD::CTLZ             , MVT::i16  , Custom);
   setOperationAction(ISD::CTPOP            , MVT::i32  , Expand);
   setOperationAction(ISD::CTTZ             , MVT::i32  , Custom);
   setOperationAction(ISD::CTLZ             , MVT::i32  , Custom);
@@ -308,19 +292,13 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
   setOperationAction(ISD::SELECT          , MVT::i1   , Promote);
   // X86 wants to expand cmov itself.
   setOperationAction(ISD::SELECT          , MVT::i8   , Custom);
-  if (Disable16Bit)
-    setOperationAction(ISD::SELECT        , MVT::i16  , Expand);
-  else
-    setOperationAction(ISD::SELECT        , MVT::i16  , Custom);
+  setOperationAction(ISD::SELECT        , MVT::i16  , Custom);
   setOperationAction(ISD::SELECT          , MVT::i32  , Custom);
   setOperationAction(ISD::SELECT          , MVT::f32  , Custom);
   setOperationAction(ISD::SELECT          , MVT::f64  , Custom);
   setOperationAction(ISD::SELECT          , MVT::f80  , Custom);
   setOperationAction(ISD::SETCC           , MVT::i8   , Custom);
-  if (Disable16Bit)
-    setOperationAction(ISD::SETCC         , MVT::i16  , Expand);
-  else
-    setOperationAction(ISD::SETCC         , MVT::i16  , Custom);
+  setOperationAction(ISD::SETCC           , MVT::i16  , Custom);
   setOperationAction(ISD::SETCC           , MVT::i32  , Custom);
   setOperationAction(ISD::SETCC           , MVT::f32  , Custom);
   setOperationAction(ISD::SETCC           , MVT::f64  , Custom);
@@ -623,11 +601,11 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
   // FIXME: In order to prevent SSE instructions being expanded to MMX ones
   // with -msoft-float, disable use of MMX as well.
   if (!UseSoftFloat && !DisableMMX && Subtarget->hasMMX()) {
-    addRegisterClass(MVT::v8i8,  X86::VR64RegisterClass);
-    addRegisterClass(MVT::v4i16, X86::VR64RegisterClass);
-    addRegisterClass(MVT::v2i32, X86::VR64RegisterClass);
-    addRegisterClass(MVT::v2f32, X86::VR64RegisterClass);
-    addRegisterClass(MVT::v1i64, X86::VR64RegisterClass);
+    addRegisterClass(MVT::v8i8,  X86::VR64RegisterClass, false);
+    addRegisterClass(MVT::v4i16, X86::VR64RegisterClass, false);
+    addRegisterClass(MVT::v2i32, X86::VR64RegisterClass, false);
+    addRegisterClass(MVT::v2f32, X86::VR64RegisterClass, false);
+    addRegisterClass(MVT::v1i64, X86::VR64RegisterClass, false);
 
     setOperationAction(ISD::ADD,                MVT::v8i8,  Legal);
     setOperationAction(ISD::ADD,                MVT::v4i16, Legal);
@@ -1067,23 +1045,27 @@ unsigned X86TargetLowering::getByValTypeAlignment(const Type *Ty) const {
 }
 
 /// getOptimalMemOpType - Returns the target specific optimal type for load
-/// and store operations as a result of memset, memcpy, and memmove lowering.
-/// If DstAlign is zero that means it's safe to destination alignment can
-/// satisfy any constraint. Similarly if SrcAlign is zero it means there
-/// isn't a need to check it against alignment requirement, probably because
-/// the source does not need to be loaded. If 'NonScalarIntSafe' is true, that
-/// means it's safe to return a non-scalar-integer type, e.g. constant string
-/// source or loaded from memory. It returns EVT::Other if SelectionDAG should
-/// be responsible for determining it.
+/// and store operations as a result of memset, memcpy, and memmove
+/// lowering. If DstAlign is zero that means it's safe to destination
+/// alignment can satisfy any constraint. Similarly if SrcAlign is zero it
+/// means there isn't a need to check it against alignment requirement,
+/// probably because the source does not need to be loaded. If
+/// 'NonScalarIntSafe' is true, that means it's safe to return a
+/// non-scalar-integer type, e.g. empty string source, constant, or loaded
+/// from memory. 'MemcpyStrSrc' indicates whether the memcpy source is
+/// constant so it does not need to be loaded.
+/// It returns EVT::Other if the type should be determined using generic
+/// target-independent logic.
 EVT
 X86TargetLowering::getOptimalMemOpType(uint64_t Size,
                                        unsigned DstAlign, unsigned SrcAlign,
                                        bool NonScalarIntSafe,
-                                       SelectionDAG &DAG) const {
+                                       bool MemcpyStrSrc,
+                                       MachineFunction &MF) const {
   // FIXME: This turns off use of xmm stores for memset/memcpy on targets like
   // linux.  This is because the stack realignment code can't handle certain
   // cases like PR2962.  This should be removed when PR2962 is fixed.
-  const Function *F = DAG.getMachineFunction().getFunction();
+  const Function *F = MF.getFunction();
   if (NonScalarIntSafe &&
       !F->hasFnAttr(Attribute::NoImplicitFloat)) {
     if (Size >= 16 &&
@@ -1095,11 +1077,14 @@ X86TargetLowering::getOptimalMemOpType(uint64_t Size,
         return MVT::v4i32;
       if (Subtarget->hasSSE1())
         return MVT::v4f32;
-    } else if (Size >= 8 &&
+    } else if (!MemcpyStrSrc && Size >= 8 &&
                !Subtarget->is64Bit() &&
                Subtarget->getStackAlignment() >= 8 &&
-               Subtarget->hasSSE2())
+               Subtarget->hasSSE2()) {
+      // Do not use f64 to lower memcpy if source is string constant. It's
+      // better to use i32 to avoid the loads.
       return MVT::f64;
+    }
   }
   if (Subtarget->is64Bit() && Size >= 8)
     return MVT::i64;
@@ -1182,7 +1167,7 @@ bool
 X86TargetLowering::CanLowerReturn(CallingConv::ID CallConv, bool isVarArg,
                         const SmallVectorImpl<EVT> &OutTys,
                         const SmallVectorImpl<ISD::ArgFlagsTy> &ArgsFlags,
-                        SelectionDAG &DAG) {
+                        SelectionDAG &DAG) const {
   SmallVector<CCValAssign, 16> RVLocs;
   CCState CCInfo(CallConv, isVarArg, getTargetMachine(),
                  RVLocs, *DAG.getContext());
@@ -1193,7 +1178,9 @@ SDValue
 X86TargetLowering::LowerReturn(SDValue Chain,
                                CallingConv::ID CallConv, bool isVarArg,
                                const SmallVectorImpl<ISD::OutputArg> &Outs,
-                               DebugLoc dl, SelectionDAG &DAG) {
+                               DebugLoc dl, SelectionDAG &DAG) const {
+  MachineFunction &MF = DAG.getMachineFunction();
+  X86MachineFunctionInfo *FuncInfo = MF.getInfo<X86MachineFunctionInfo>();
 
   SmallVector<CCValAssign, 16> RVLocs;
   CCState CCInfo(CallConv, isVarArg, getTargetMachine(),
@@ -1211,7 +1198,8 @@ X86TargetLowering::LowerReturn(SDValue Chain,
   SmallVector<SDValue, 6> RetOps;
   RetOps.push_back(Chain); // Operand #0 = Chain (updated below)
   // Operand #1 = Bytes To Pop
-  RetOps.push_back(DAG.getTargetConstant(getBytesToPopOnReturn(), MVT::i16));
+  RetOps.push_back(DAG.getTargetConstant(FuncInfo->getBytesToPopOnReturn(),
+                   MVT::i16));
 
   // Copy the result values into the output registers.
   for (unsigned i = 0; i != RVLocs.size(); ++i) {
@@ -1287,7 +1275,7 @@ X86TargetLowering::LowerCallResult(SDValue Chain, SDValue InFlag,
                                    CallingConv::ID CallConv, bool isVarArg,
                                    const SmallVectorImpl<ISD::InputArg> &Ins,
                                    DebugLoc dl, SelectionDAG &DAG,
-                                   SmallVectorImpl<SDValue> &InVals) {
+                                   SmallVectorImpl<SDValue> &InVals) const {
 
   // Assign locations to each value returned by this call.
   SmallVector<CCValAssign, 16> RVLocs;
@@ -1304,7 +1292,7 @@ X86TargetLowering::LowerCallResult(SDValue Chain, SDValue InFlag,
     // If this is x86-64, and we disabled SSE, we can't return FP values
     if ((CopyVT == MVT::f32 || CopyVT == MVT::f64) &&
         ((Is64Bit || Ins[i].Flags.isInReg()) && !Subtarget->hasSSE1())) {
-      llvm_report_error("SSE register return with SSE disabled");
+      report_fatal_error("SSE register return with SSE disabled");
     }
 
     // If this is a call to a function that returns an fp value on the floating
@@ -1384,7 +1372,8 @@ ArgsAreStructReturn(const SmallVectorImpl<ISD::InputArg> &Ins) {
 
 /// IsCalleePop - Determines whether the callee is required to pop its
 /// own arguments. Callee pop is necessary to support tail calls.
-bool X86TargetLowering::IsCalleePop(bool IsVarArg, CallingConv::ID CallingConv){
+bool X86TargetLowering::IsCalleePop(bool IsVarArg,
+                                    CallingConv::ID CallingConv) const {
   if (IsVarArg)
     return false;
 
@@ -1457,7 +1446,7 @@ X86TargetLowering::LowerMemArgument(SDValue Chain,
                                     DebugLoc dl, SelectionDAG &DAG,
                                     const CCValAssign &VA,
                                     MachineFrameInfo *MFI,
-                                    unsigned i) {
+                                    unsigned i) const {
   // Create the nodes corresponding to a load from this parameter slot.
   ISD::ArgFlagsTy Flags = Ins[i].Flags;
   bool AlwaysUseMutable = FuncIsMadeTailCallSafe(CallConv);
@@ -1496,7 +1485,8 @@ X86TargetLowering::LowerFormalArguments(SDValue Chain,
                                       const SmallVectorImpl<ISD::InputArg> &Ins,
                                         DebugLoc dl,
                                         SelectionDAG &DAG,
-                                        SmallVectorImpl<SDValue> &InVals) {
+                                        SmallVectorImpl<SDValue> &InVals)
+                                          const {
   MachineFunction &MF = DAG.getMachineFunction();
   X86MachineFunctionInfo *FuncInfo = MF.getInfo<X86MachineFunctionInfo>();
 
@@ -1607,7 +1597,8 @@ X86TargetLowering::LowerFormalArguments(SDValue Chain,
   // the start of the first vararg value... for expansion of llvm.va_start.
   if (isVarArg) {
     if (Is64Bit || CallConv != CallingConv::X86_FastCall) {
-      VarArgsFrameIndex = MFI->CreateFixedObject(1, StackSize, true, false);
+      FuncInfo->setVarArgsFrameIndex(MFI->CreateFixedObject(1, StackSize,
+                                                            true, false));
     }
     if (Is64Bit) {
       unsigned TotalNumIntRegs = 0, TotalNumXMMRegs = 0;
@@ -1655,16 +1646,17 @@ X86TargetLowering::LowerFormalArguments(SDValue Chain,
       // For X86-64, if there are vararg parameters that are passed via
       // registers, then we must store them to their spots on the stack so they
       // may be loaded by deferencing the result of va_next.
-      VarArgsGPOffset = NumIntRegs * 8;
-      VarArgsFPOffset = TotalNumIntRegs * 8 + NumXMMRegs * 16;
-      RegSaveFrameIndex = MFI->CreateStackObject(TotalNumIntRegs * 8 +
-                                                 TotalNumXMMRegs * 16, 16,
-                                                 false);
+      FuncInfo->setVarArgsGPOffset(NumIntRegs * 8);
+      FuncInfo->setVarArgsFPOffset(TotalNumIntRegs * 8 + NumXMMRegs * 16);
+      FuncInfo->setRegSaveFrameIndex(
+        MFI->CreateStackObject(TotalNumIntRegs * 8 + TotalNumXMMRegs * 16, 16,
+                               false));
 
       // Store the integer parameter registers.
       SmallVector<SDValue, 8> MemOps;
-      SDValue RSFIN = DAG.getFrameIndex(RegSaveFrameIndex, getPointerTy());
-      unsigned Offset = VarArgsGPOffset;
+      SDValue RSFIN = DAG.getFrameIndex(FuncInfo->getRegSaveFrameIndex(),
+                                        getPointerTy());
+      unsigned Offset = FuncInfo->getVarArgsGPOffset();
       for (; NumIntRegs != TotalNumIntRegs; ++NumIntRegs) {
         SDValue FIN = DAG.getNode(ISD::ADD, dl, getPointerTy(), RSFIN,
                                   DAG.getIntPtrConstant(Offset));
@@ -1673,7 +1665,8 @@ X86TargetLowering::LowerFormalArguments(SDValue Chain,
         SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i64);
         SDValue Store =
           DAG.getStore(Val.getValue(1), dl, Val, FIN,
-                       PseudoSourceValue::getFixedStack(RegSaveFrameIndex),
+                       PseudoSourceValue::getFixedStack(
+                         FuncInfo->getRegSaveFrameIndex()),
                        Offset, false, false, 0);
         MemOps.push_back(Store);
         Offset += 8;
@@ -1688,8 +1681,10 @@ X86TargetLowering::LowerFormalArguments(SDValue Chain,
         SDValue ALVal = DAG.getCopyFromReg(DAG.getEntryNode(), dl, AL, MVT::i8);
         SaveXMMOps.push_back(ALVal);
 
-        SaveXMMOps.push_back(DAG.getIntPtrConstant(RegSaveFrameIndex));
-        SaveXMMOps.push_back(DAG.getIntPtrConstant(VarArgsFPOffset));
+        SaveXMMOps.push_back(DAG.getIntPtrConstant(
+                               FuncInfo->getRegSaveFrameIndex()));
+        SaveXMMOps.push_back(DAG.getIntPtrConstant(
+                               FuncInfo->getVarArgsFPOffset()));
 
         for (; NumXMMRegs != TotalNumXMMRegs; ++NumXMMRegs) {
           unsigned VReg = MF.addLiveIn(XMMArgRegs[NumXMMRegs],
@@ -1710,22 +1705,22 @@ X86TargetLowering::LowerFormalArguments(SDValue Chain,
 
   // Some CCs need callee pop.
   if (IsCalleePop(isVarArg, CallConv)) {
-    BytesToPopOnReturn  = StackSize; // Callee pops everything.
+    FuncInfo->setBytesToPopOnReturn(StackSize); // Callee pops everything.
   } else {
-    BytesToPopOnReturn  = 0; // Callee pops nothing.
+    FuncInfo->setBytesToPopOnReturn(0); // Callee pops nothing.
     // If this is an sret function, the return should pop the hidden pointer.
     if (!Is64Bit && !IsTailCallConvention(CallConv) && ArgsAreStructReturn(Ins))
-      BytesToPopOnReturn = 4;
+      FuncInfo->setBytesToPopOnReturn(4);
   }
 
   if (!Is64Bit) {
-    RegSaveFrameIndex = 0xAAAAAAA;   // RegSaveFrameIndex is X86-64 only.
+    // RegSaveFrameIndex is X86-64 only.
+    FuncInfo->setRegSaveFrameIndex(0xAAAAAAA);
     if (CallConv == CallingConv::X86_FastCall)
-      VarArgsFrameIndex = 0xAAAAAAA;   // fastcc functions can't have varargs.
+      // fastcc functions can't have varargs.
+      FuncInfo->setVarArgsFrameIndex(0xAAAAAAA);
   }
 
-  FuncInfo->setBytesToPopOnReturn(BytesToPopOnReturn);
-
   return Chain;
 }
 
@@ -1734,7 +1729,7 @@ X86TargetLowering::LowerMemOpCallTo(SDValue Chain,
                                     SDValue StackPtr, SDValue Arg,
                                     DebugLoc dl, SelectionDAG &DAG,
                                     const CCValAssign &VA,
-                                    ISD::ArgFlagsTy Flags) {
+                                    ISD::ArgFlagsTy Flags) const {
   const unsigned FirstStackArgOffset = (Subtarget->isTargetWin64() ? 32 : 0);
   unsigned LocMemOffset = FirstStackArgOffset + VA.getLocMemOffset();
   SDValue PtrOff = DAG.getIntPtrConstant(LocMemOffset);
@@ -1753,7 +1748,7 @@ SDValue
 X86TargetLowering::EmitTailCallLoadRetAddr(SelectionDAG &DAG,
                                            SDValue &OutRetAddr, SDValue Chain,
                                            bool IsTailCall, bool Is64Bit,
-                                           int FPDiff, DebugLoc dl) {
+                                           int FPDiff, DebugLoc dl) const {
   // Adjust the Return address stack slot.
   EVT VT = getPointerTy();
   OutRetAddr = getReturnAddressFrameIndex(DAG);
@@ -1790,7 +1785,7 @@ X86TargetLowering::LowerCall(SDValue Chain, SDValue Callee,
                              const SmallVectorImpl<ISD::OutputArg> &Outs,
                              const SmallVectorImpl<ISD::InputArg> &Ins,
                              DebugLoc dl, SelectionDAG &DAG,
-                             SmallVectorImpl<SDValue> &InVals) {
+                             SmallVectorImpl<SDValue> &InVals) const {
   MachineFunction &MF = DAG.getMachineFunction();
   bool Is64Bit        = Subtarget->is64Bit();
   bool IsStructRet    = CallIsStructReturn(Outs);
@@ -2060,7 +2055,7 @@ X86TargetLowering::LowerCall(SDValue Chain, SDValue Callee,
 
     // We should use extra load for direct calls to dllimported functions in
     // non-JIT mode.
-    GlobalValue *GV = G->getGlobal();
+    const GlobalValue *GV = G->getGlobal();
     if (!GV->hasDLLImportLinkage()) {
       unsigned char OpFlags = 0;
 
@@ -2219,8 +2214,9 @@ X86TargetLowering::LowerCall(SDValue Chain, SDValue Callee,
 
 /// GetAlignedArgumentStackSize - Make the stack size align e.g 16n + 12 aligned
 /// for a 16 byte align requirement.
-unsigned X86TargetLowering::GetAlignedArgumentStackSize(unsigned StackSize,
-                                                        SelectionDAG& DAG) {
+unsigned
+X86TargetLowering::GetAlignedArgumentStackSize(unsigned StackSize,
+                                               SelectionDAG& DAG) const {
   MachineFunction &MF = DAG.getMachineFunction();
   const TargetMachine &TM = MF.getTarget();
   const TargetFrameInfo &TFI = *TM.getFrameInfo();
@@ -2308,9 +2304,11 @@ X86TargetLowering::IsEligibleForTailCallOptimization(SDValue Callee,
   // If -tailcallopt is specified, make fastcc functions tail-callable.
   const MachineFunction &MF = DAG.getMachineFunction();
   const Function *CallerF = DAG.getMachineFunction().getFunction();
+  CallingConv::ID CallerCC = CallerF->getCallingConv();
+  bool CCMatch = CallerCC == CalleeCC;
+
   if (GuaranteedTailCallOpt) {
-    if (IsTailCallConvention(CalleeCC) &&
-        CallerF->getCallingConv() == CalleeCC)
+    if (IsTailCallConvention(CalleeCC) && CCMatch)
       return true;
     return false;
   }
@@ -2348,13 +2346,43 @@ X86TargetLowering::IsEligibleForTailCallOptimization(SDValue Callee,
     CCState CCInfo(CalleeCC, false, getTargetMachine(),
                    RVLocs, *DAG.getContext());
     CCInfo.AnalyzeCallResult(Ins, RetCC_X86);
-    for (unsigned i = 0; i != RVLocs.size(); ++i) {
+    for (unsigned i = 0, e = RVLocs.size(); i != e; ++i) {
       CCValAssign &VA = RVLocs[i];
       if (VA.getLocReg() == X86::ST0 || VA.getLocReg() == X86::ST1)
         return false;
     }
   }
 
+  // If the calling conventions do not match, then we'd better make sure the
+  // results are returned in the same way as what the caller expects.
+  if (!CCMatch) {
+    SmallVector<CCValAssign, 16> RVLocs1;
+    CCState CCInfo1(CalleeCC, false, getTargetMachine(),
+                    RVLocs1, *DAG.getContext());
+    CCInfo1.AnalyzeCallResult(Ins, RetCC_X86);
+
+    SmallVector<CCValAssign, 16> RVLocs2;
+    CCState CCInfo2(CallerCC, false, getTargetMachine(),
+                    RVLocs2, *DAG.getContext());
+    CCInfo2.AnalyzeCallResult(Ins, RetCC_X86);
+
+    if (RVLocs1.size() != RVLocs2.size())
+      return false;
+    for (unsigned i = 0, e = RVLocs1.size(); i != e; ++i) {
+      if (RVLocs1[i].isRegLoc() != RVLocs2[i].isRegLoc())
+        return false;
+      if (RVLocs1[i].getLocInfo() != RVLocs2[i].getLocInfo())
+        return false;
+      if (RVLocs1[i].isRegLoc()) {
+        if (RVLocs1[i].getLocReg() != RVLocs2[i].getLocReg())
+          return false;
+      } else {
+        if (RVLocs1[i].getLocMemOffset() != RVLocs2[i].getLocMemOffset())
+          return false;
+      }
+    }
+  }
+
   // If the callee takes no arguments then go on to check the results of the
   // call.
   if (!Outs.empty()) {
@@ -2401,12 +2429,13 @@ FastISel *
 X86TargetLowering::createFastISel(MachineFunction &mf,
                             DenseMap<const Value *, unsigned> &vm,
                             DenseMap<const BasicBlock*, MachineBasicBlock*> &bm,
-                            DenseMap<const AllocaInst *, int> &am
+                            DenseMap<const AllocaInst *, int> &am,
+                            std::vector<std::pair<MachineInstr*, unsigned> > &pn
 #ifndef NDEBUG
-                          , SmallSet<Instruction*, 8> &cil
+                          , SmallSet<const Instruction *, 8> &cil
 #endif
-                                  ) {
-  return X86::createFastISel(mf, vm, bm, am
+                                  ) const {
+  return X86::createFastISel(mf, vm, bm, am, pn
 #ifndef NDEBUG
                              , cil
 #endif
@@ -2419,7 +2448,7 @@ X86TargetLowering::createFastISel(MachineFunction &mf,
 //===----------------------------------------------------------------------===//
 
 
-SDValue X86TargetLowering::getReturnAddressFrameIndex(SelectionDAG &DAG) {
+SDValue X86TargetLowering::getReturnAddressFrameIndex(SelectionDAG &DAG) const {
   MachineFunction &MF = DAG.getMachineFunction();
   X86MachineFunctionInfo *FuncInfo = MF.getInfo<X86MachineFunctionInfo>();
   int ReturnAddrIndex = FuncInfo->getRAIndex();
@@ -3440,7 +3469,7 @@ unsigned getNumOfConsecutiveZeros(ShuffleVectorSDNode *SVOp, int NumElems,
 /// FIXME: split into pslldqi, psrldqi, palignr variants.
 static bool isVectorShift(ShuffleVectorSDNode *SVOp, SelectionDAG &DAG,
                           bool &isLeft, SDValue &ShVal, unsigned &ShAmt) {
-  int NumElems = SVOp->getValueType(0).getVectorNumElements();
+  unsigned NumElems = SVOp->getValueType(0).getVectorNumElements();
 
   isLeft = true;
   unsigned NumZeros = getNumOfConsecutiveZeros(SVOp, NumElems, true, DAG);
@@ -3452,11 +3481,12 @@ static bool isVectorShift(ShuffleVectorSDNode *SVOp, SelectionDAG &DAG,
   }
   bool SeenV1 = false;
   bool SeenV2 = false;
-  for (int i = NumZeros; i < NumElems; ++i) {
-    int Val = isLeft ? (i - NumZeros) : i;
-    int Idx = SVOp->getMaskElt(isLeft ? i : (i - NumZeros));
-    if (Idx < 0)
+  for (unsigned i = NumZeros; i < NumElems; ++i) {
+    unsigned Val = isLeft ? (i - NumZeros) : i;
+    int Idx_ = SVOp->getMaskElt(isLeft ? i : (i - NumZeros));
+    if (Idx_ < 0)
       continue;
+    unsigned Idx = (unsigned) Idx_;
     if (Idx < NumElems)
       SeenV1 = true;
     else {
@@ -3479,7 +3509,8 @@ static bool isVectorShift(ShuffleVectorSDNode *SVOp, SelectionDAG &DAG,
 ///
 static SDValue LowerBuildVectorv16i8(SDValue Op, unsigned NonZeros,
                                        unsigned NumNonZero, unsigned NumZero,
-                                       SelectionDAG &DAG, TargetLowering &TLI) {
+                                       SelectionDAG &DAG,
+                                       const TargetLowering &TLI) {
   if (NumNonZero > 8)
     return SDValue();
 
@@ -3524,8 +3555,9 @@ static SDValue LowerBuildVectorv16i8(SDValue Op, unsigned NonZeros,
 /// LowerBuildVectorv8i16 - Custom lower build_vector of v8i16.
 ///
 static SDValue LowerBuildVectorv8i16(SDValue Op, unsigned NonZeros,
-                                       unsigned NumNonZero, unsigned NumZero,
-                                       SelectionDAG &DAG, TargetLowering &TLI) {
+                                     unsigned NumNonZero, unsigned NumZero,
+                                     SelectionDAG &DAG,
+                                     const TargetLowering &TLI) {
   if (NumNonZero > 4)
     return SDValue();
 
@@ -3567,7 +3599,7 @@ static SDValue getVShift(bool isLeft, EVT VT, SDValue SrcOp,
 
 SDValue
 X86TargetLowering::LowerAsSplatVectorLoad(SDValue SrcOp, EVT VT, DebugLoc dl,
-                                          SelectionDAG &DAG) {
+                                          SelectionDAG &DAG) const {
   
   // Check if the scalar load can be widened into a vector load. And if
   // the address is "base + cst" see if the cst can be "absorbed" into
@@ -3699,7 +3731,7 @@ static SDValue EltsFromConsecutiveLoads(EVT VT, SmallVectorImpl<SDValue> &Elts,
 }
 
 SDValue
-X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) {
+X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const {
   DebugLoc dl = Op.getDebugLoc();
   // All zero's are handled with pxor, all one's are handled with pcmpeqd.
   if (ISD::isBuildVectorAllZeros(Op.getNode())
@@ -3965,7 +3997,7 @@ X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) {
 }
 
 SDValue
-X86TargetLowering::LowerCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG) {
+X86TargetLowering::LowerCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG) const {
   // We support concatenate two MMX registers and place them in a MMX
   // register.  This is better than doing a stack convert.
   DebugLoc dl = Op.getDebugLoc();
@@ -3998,7 +4030,8 @@ X86TargetLowering::LowerCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG) {
 // 4. [all]   mov + pshuflw + pshufhw + N x (pextrw + pinsrw)
 static
 SDValue LowerVECTOR_SHUFFLEv8i16(ShuffleVectorSDNode *SVOp,
-                                 SelectionDAG &DAG, X86TargetLowering &TLI) {
+                                 SelectionDAG &DAG,
+                                 const X86TargetLowering &TLI) {
   SDValue V1 = SVOp->getOperand(0);
   SDValue V2 = SVOp->getOperand(1);
   DebugLoc dl = SVOp->getDebugLoc();
@@ -4241,7 +4274,8 @@ SDValue LowerVECTOR_SHUFFLEv8i16(ShuffleVectorSDNode *SVOp,
 // 3. [all]   v8i16 shuffle + N x pextrw + rotate + pinsrw
 static
 SDValue LowerVECTOR_SHUFFLEv16i8(ShuffleVectorSDNode *SVOp,
-                                 SelectionDAG &DAG, X86TargetLowering &TLI) {
+                                 SelectionDAG &DAG,
+                                 const X86TargetLowering &TLI) {
   SDValue V1 = SVOp->getOperand(0);
   SDValue V2 = SVOp->getOperand(1);
   DebugLoc dl = SVOp->getDebugLoc();
@@ -4387,7 +4421,7 @@ SDValue LowerVECTOR_SHUFFLEv16i8(ShuffleVectorSDNode *SVOp,
 static
 SDValue RewriteAsNarrowerShuffle(ShuffleVectorSDNode *SVOp,
                                  SelectionDAG &DAG,
-                                 TargetLowering &TLI, DebugLoc dl) {
+                                 const TargetLowering &TLI, DebugLoc dl) {
   EVT VT = SVOp->getValueType(0);
   SDValue V1 = SVOp->getOperand(0);
   SDValue V2 = SVOp->getOperand(1);
@@ -4619,7 +4653,7 @@ LowerVECTOR_SHUFFLE_4wide(ShuffleVectorSDNode *SVOp, SelectionDAG &DAG) {
 }
 
 SDValue
-X86TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) {
+X86TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const {
   ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(Op);
   SDValue V1 = Op.getOperand(0);
   SDValue V2 = Op.getOperand(1);
@@ -4806,7 +4840,7 @@ X86TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) {
 
 SDValue
 X86TargetLowering::LowerEXTRACT_VECTOR_ELT_SSE4(SDValue Op,
-                                                SelectionDAG &DAG) {
+                                                SelectionDAG &DAG) const {
   EVT VT = Op.getValueType();
   DebugLoc dl = Op.getDebugLoc();
   if (VT.getSizeInBits() == 8) {
@@ -4860,7 +4894,8 @@ X86TargetLowering::LowerEXTRACT_VECTOR_ELT_SSE4(SDValue Op,
 
 
 SDValue
-X86TargetLowering::LowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) {
+X86TargetLowering::LowerEXTRACT_VECTOR_ELT(SDValue Op,
+                                           SelectionDAG &DAG) const {
   if (!isa<ConstantSDNode>(Op.getOperand(1)))
     return SDValue();
 
@@ -4924,7 +4959,8 @@ X86TargetLowering::LowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) {
 }
 
 SDValue
-X86TargetLowering::LowerINSERT_VECTOR_ELT_SSE4(SDValue Op, SelectionDAG &DAG){
+X86TargetLowering::LowerINSERT_VECTOR_ELT_SSE4(SDValue Op,
+                                               SelectionDAG &DAG) const {
   EVT VT = Op.getValueType();
   EVT EltVT = VT.getVectorElementType();
   DebugLoc dl = Op.getDebugLoc();
@@ -4973,7 +5009,7 @@ X86TargetLowering::LowerINSERT_VECTOR_ELT_SSE4(SDValue Op, SelectionDAG &DAG){
 }
 
 SDValue
-X86TargetLowering::LowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) {
+X86TargetLowering::LowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const {
   EVT VT = Op.getValueType();
   EVT EltVT = VT.getVectorElementType();
 
@@ -5002,7 +5038,7 @@ X86TargetLowering::LowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) {
 }
 
 SDValue
-X86TargetLowering::LowerSCALAR_TO_VECTOR(SDValue Op, SelectionDAG &DAG) {
+X86TargetLowering::LowerSCALAR_TO_VECTOR(SDValue Op, SelectionDAG &DAG) const {
   DebugLoc dl = Op.getDebugLoc();
   if (Op.getValueType() == MVT::v2f32)
     return DAG.getNode(ISD::BIT_CONVERT, dl, MVT::v2f32,
@@ -5033,7 +5069,7 @@ X86TargetLowering::LowerSCALAR_TO_VECTOR(SDValue Op, SelectionDAG &DAG) {
 // be used to form addressing mode. These wrapped nodes will be selected
 // into MOV32ri.
 SDValue
-X86TargetLowering::LowerConstantPool(SDValue Op, SelectionDAG &DAG) {
+X86TargetLowering::LowerConstantPool(SDValue Op, SelectionDAG &DAG) const {
   ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(Op);
 
   // In PIC mode (unless we're in RIPRel PIC mode) we add an offset to the
@@ -5066,7 +5102,7 @@ X86TargetLowering::LowerConstantPool(SDValue Op, SelectionDAG &DAG) {
   return Result;
 }
 
-SDValue X86TargetLowering::LowerJumpTable(SDValue Op, SelectionDAG &DAG) {
+SDValue X86TargetLowering::LowerJumpTable(SDValue Op, SelectionDAG &DAG) const {
   JumpTableSDNode *JT = cast<JumpTableSDNode>(Op);
 
   // In PIC mode (unless we're in RIPRel PIC mode) we add an offset to the
@@ -5100,7 +5136,7 @@ SDValue X86TargetLowering::LowerJumpTable(SDValue Op, SelectionDAG &DAG) {
 }
 
 SDValue
-X86TargetLowering::LowerExternalSymbol(SDValue Op, SelectionDAG &DAG) {
+X86TargetLowering::LowerExternalSymbol(SDValue Op, SelectionDAG &DAG) const {
   const char *Sym = cast<ExternalSymbolSDNode>(Op)->getSymbol();
 
   // In PIC mode (unless we're in RIPRel PIC mode) we add an offset to the
@@ -5136,12 +5172,12 @@ X86TargetLowering::LowerExternalSymbol(SDValue Op, SelectionDAG &DAG) {
 }
 
 SDValue
-X86TargetLowering::LowerBlockAddress(SDValue Op, SelectionDAG &DAG) {
+X86TargetLowering::LowerBlockAddress(SDValue Op, SelectionDAG &DAG) const {
   // Create the TargetBlockAddressAddress node.
   unsigned char OpFlags =
     Subtarget->ClassifyBlockAddressReference();
   CodeModel::Model M = getTargetMachine().getCodeModel();
-  BlockAddress *BA = cast<BlockAddressSDNode>(Op)->getBlockAddress();
+  const BlockAddress *BA = cast<BlockAddressSDNode>(Op)->getBlockAddress();
   DebugLoc dl = Op.getDebugLoc();
   SDValue Result = DAG.getBlockAddress(BA, getPointerTy(),
                                        /*isTarget=*/true, OpFlags);
@@ -5210,7 +5246,7 @@ X86TargetLowering::LowerGlobalAddress(const GlobalValue *GV, DebugLoc dl,
 }
 
 SDValue
-X86TargetLowering::LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) {
+X86TargetLowering::LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const {
   const GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal();
   int64_t Offset = cast<GlobalAddressSDNode>(Op)->getOffset();
   return LowerGlobalAddress(GV, Op.getDebugLoc(), Offset, DAG);
@@ -5310,7 +5346,7 @@ static SDValue LowerToTLSExecModel(GlobalAddressSDNode *GA, SelectionDAG &DAG,
 }
 
 SDValue
-X86TargetLowering::LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) {
+X86TargetLowering::LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const {
   // TODO: implement the "local dynamic" model
   // TODO: implement the "initial exec"model for pic executables
   assert(Subtarget->isTargetELF() &&
@@ -5346,7 +5382,7 @@ X86TargetLowering::LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) {
 
 /// LowerShift - Lower SRA_PARTS and friends, which return two i32 values and
 /// take a 2 x i32 value to shift plus a shift amount.
-SDValue X86TargetLowering::LowerShift(SDValue Op, SelectionDAG &DAG) {
+SDValue X86TargetLowering::LowerShift(SDValue Op, SelectionDAG &DAG) const {
   assert(Op.getNumOperands() == 3 && "Not a double-shift!");
   EVT VT = Op.getValueType();
   unsigned VTBits = VT.getSizeInBits();
@@ -5390,7 +5426,8 @@ SDValue X86TargetLowering::LowerShift(SDValue Op, SelectionDAG &DAG) {
   return DAG.getMergeValues(Ops, 2, dl);
 }
 
-SDValue X86TargetLowering::LowerSINT_TO_FP(SDValue Op, SelectionDAG &DAG) {
+SDValue X86TargetLowering::LowerSINT_TO_FP(SDValue Op,
+                                           SelectionDAG &DAG) const {
   EVT SrcVT = Op.getOperand(0).getValueType();
 
   if (SrcVT.isVector()) {
@@ -5426,7 +5463,7 @@ SDValue X86TargetLowering::LowerSINT_TO_FP(SDValue Op, SelectionDAG &DAG) {
 
 SDValue X86TargetLowering::BuildFILD(SDValue Op, EVT SrcVT, SDValue Chain,
                                      SDValue StackSlot,
-                                     SelectionDAG &DAG) {
+                                     SelectionDAG &DAG) const {
   // Build the FILD
   DebugLoc dl = Op.getDebugLoc();
   SDVTList Tys;
@@ -5463,7 +5500,8 @@ SDValue X86TargetLowering::BuildFILD(SDValue Op, EVT SrcVT, SDValue Chain,
 }
 
 // LowerUINT_TO_FP_i64 - 64-bit unsigned integer to double expansion.
-SDValue X86TargetLowering::LowerUINT_TO_FP_i64(SDValue Op, SelectionDAG &DAG) {
+SDValue X86TargetLowering::LowerUINT_TO_FP_i64(SDValue Op,
+                                               SelectionDAG &DAG) const {
   // This algorithm is not obvious. Here it is in C code, more or less:
   /*
     double uint64_to_double( uint32_t hi, uint32_t lo ) {
@@ -5547,7 +5585,8 @@ SDValue X86TargetLowering::LowerUINT_TO_FP_i64(SDValue Op, SelectionDAG &DAG) {
 }
 
 // LowerUINT_TO_FP_i32 - 32-bit unsigned integer to float expansion.
-SDValue X86TargetLowering::LowerUINT_TO_FP_i32(SDValue Op, SelectionDAG &DAG) {
+SDValue X86TargetLowering::LowerUINT_TO_FP_i32(SDValue Op,
+                                               SelectionDAG &DAG) const {
   DebugLoc dl = Op.getDebugLoc();
   // FP constant to bias correct the final result.
   SDValue Bias = DAG.getConstantFP(BitsToDouble(0x4330000000000000ULL),
@@ -5592,7 +5631,8 @@ SDValue X86TargetLowering::LowerUINT_TO_FP_i32(SDValue Op, SelectionDAG &DAG) {
   return Sub;
 }
 
-SDValue X86TargetLowering::LowerUINT_TO_FP(SDValue Op, SelectionDAG &DAG) {
+SDValue X86TargetLowering::LowerUINT_TO_FP(SDValue Op,
+                                           SelectionDAG &DAG) const {
   SDValue N0 = Op.getOperand(0);
   DebugLoc dl = Op.getDebugLoc();
 
@@ -5628,7 +5668,7 @@ SDValue X86TargetLowering::LowerUINT_TO_FP(SDValue Op, SelectionDAG &DAG) {
 }
 
 std::pair<SDValue,SDValue> X86TargetLowering::
-FP_TO_INTHelper(SDValue Op, SelectionDAG &DAG, bool IsSigned) {
+FP_TO_INTHelper(SDValue Op, SelectionDAG &DAG, bool IsSigned) const {
   DebugLoc dl = Op.getDebugLoc();
 
   EVT DstTy = Op.getValueType();
@@ -5690,7 +5730,8 @@ FP_TO_INTHelper(SDValue Op, SelectionDAG &DAG, bool IsSigned) {
   return std::make_pair(FIST, StackSlot);
 }
 
-SDValue X86TargetLowering::LowerFP_TO_SINT(SDValue Op, SelectionDAG &DAG) {
+SDValue X86TargetLowering::LowerFP_TO_SINT(SDValue Op,
+                                           SelectionDAG &DAG) const {
   if (Op.getValueType().isVector()) {
     if (Op.getValueType() == MVT::v2i32 &&
         Op.getOperand(0).getValueType() == MVT::v2f64) {
@@ -5709,7 +5750,8 @@ SDValue X86TargetLowering::LowerFP_TO_SINT(SDValue Op, SelectionDAG &DAG) {
                      FIST, StackSlot, NULL, 0, false, false, 0);
 }
 
-SDValue X86TargetLowering::LowerFP_TO_UINT(SDValue Op, SelectionDAG &DAG) {
+SDValue X86TargetLowering::LowerFP_TO_UINT(SDValue Op,
+                                           SelectionDAG &DAG) const {
   std::pair<SDValue,SDValue> Vals = FP_TO_INTHelper(Op, DAG, false);
   SDValue FIST = Vals.first, StackSlot = Vals.second;
   assert(FIST.getNode() && "Unexpected failure");
@@ -5719,7 +5761,8 @@ SDValue X86TargetLowering::LowerFP_TO_UINT(SDValue Op, SelectionDAG &DAG) {
                      FIST, StackSlot, NULL, 0, false, false, 0);
 }
 
-SDValue X86TargetLowering::LowerFABS(SDValue Op, SelectionDAG &DAG) {
+SDValue X86TargetLowering::LowerFABS(SDValue Op,
+                                     SelectionDAG &DAG) const {
   LLVMContext *Context = DAG.getContext();
   DebugLoc dl = Op.getDebugLoc();
   EVT VT = Op.getValueType();
@@ -5746,7 +5789,7 @@ SDValue X86TargetLowering::LowerFABS(SDValue Op, SelectionDAG &DAG) {
   return DAG.getNode(X86ISD::FAND, dl, VT, Op.getOperand(0), Mask);
 }
 
-SDValue X86TargetLowering::LowerFNEG(SDValue Op, SelectionDAG &DAG) {
+SDValue X86TargetLowering::LowerFNEG(SDValue Op, SelectionDAG &DAG) const {
   LLVMContext *Context = DAG.getContext();
   DebugLoc dl = Op.getDebugLoc();
   EVT VT = Op.getValueType();
@@ -5781,7 +5824,7 @@ SDValue X86TargetLowering::LowerFNEG(SDValue Op, SelectionDAG &DAG) {
   }
 }
 
-SDValue X86TargetLowering::LowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG) {
+SDValue X86TargetLowering::LowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG) const {
   LLVMContext *Context = DAG.getContext();
   SDValue Op0 = Op.getOperand(0);
   SDValue Op1 = Op.getOperand(1);
@@ -5857,7 +5900,7 @@ SDValue X86TargetLowering::LowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG) {
 /// Emit nodes that will be selected as "test Op0,Op0", or something
 /// equivalent.
 SDValue X86TargetLowering::EmitTest(SDValue Op, unsigned X86CC,
-                                    SelectionDAG &DAG) {
+                                    SelectionDAG &DAG) const {
   DebugLoc dl = Op.getDebugLoc();
 
   // CF and OF aren't always set the way we want. Determine which
@@ -5885,15 +5928,20 @@ SDValue X86TargetLowering::EmitTest(SDValue Op, unsigned X86CC,
     unsigned NumOperands = 0;
     switch (Op.getNode()->getOpcode()) {
     case ISD::ADD:
-      // Due to an isel shortcoming, be conservative if this add is likely to
-      // be selected as part of a load-modify-store instruction. When the root
-      // node in a match is a store, isel doesn't know how to remap non-chain
-      // non-flag uses of other nodes in the match, such as the ADD in this
-      // case. This leads to the ADD being left around and reselected, with
-      // the result being two adds in the output.
+      // Due to an isel shortcoming, be conservative if this add is
+      // likely to be selected as part of a load-modify-store
+      // instruction. When the root node in a match is a store, isel
+      // doesn't know how to remap non-chain non-flag uses of other
+      // nodes in the match, such as the ADD in this case. This leads
+      // to the ADD being left around and reselected, with the result
+      // being two adds in the output.  Alas, even if none our users
+      // are stores, that doesn't prove we're O.K.  Ergo, if we have
+      // any parents that aren't CopyToReg or SETCC, eschew INC/DEC.
+      // A better fix seems to require climbing the DAG back to the
+      // root, and it doesn't seem to be worth the effort.
       for (SDNode::use_iterator UI = Op.getNode()->use_begin(),
-           UE = Op.getNode()->use_end(); UI != UE; ++UI)
-        if (UI->getOpcode() == ISD::STORE)
+             UE = Op.getNode()->use_end(); UI != UE; ++UI)
+        if (UI->getOpcode() != ISD::CopyToReg && UI->getOpcode() != ISD::SETCC)
           goto default_case;
       if (ConstantSDNode *C =
             dyn_cast<ConstantSDNode>(Op.getNode()->getOperand(1))) {
@@ -5988,7 +6036,7 @@ SDValue X86TargetLowering::EmitTest(SDValue Op, unsigned X86CC,
 /// Emit nodes that will be selected as "cmp Op0,Op1", or something
 /// equivalent.
 SDValue X86TargetLowering::EmitCmp(SDValue Op0, SDValue Op1, unsigned X86CC,
-                                   SelectionDAG &DAG) {
+                                   SelectionDAG &DAG) const {
   if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op1))
     if (C->getAPIntValue() == 0)
       return EmitTest(Op0, X86CC, DAG);
@@ -5999,8 +6047,8 @@ SDValue X86TargetLowering::EmitCmp(SDValue Op0, SDValue Op1, unsigned X86CC,
 
 /// LowerToBT - Result of 'and' is compared against zero. Turn it into a BT node
 /// if it's possible.
-static SDValue LowerToBT(SDValue And, ISD::CondCode CC,
-                         DebugLoc dl, SelectionDAG &DAG) {
+SDValue X86TargetLowering::LowerToBT(SDValue And, ISD::CondCode CC,
+                                     DebugLoc dl, SelectionDAG &DAG) const {
   SDValue Op0 = And.getOperand(0);
   SDValue Op1 = And.getOperand(1);
   if (Op0.getOpcode() == ISD::TRUNCATE)
@@ -6031,11 +6079,13 @@ static SDValue LowerToBT(SDValue And, ISD::CondCode CC,
   }
 
   if (LHS.getNode()) {
-    // If LHS is i8, promote it to i16 with any_extend.  There is no i8 BT
+    // If LHS is i8, promote it to i32 with any_extend.  There is no i8 BT
     // instruction.  Since the shift amount is in-range-or-undefined, we know
-    // that doing a bittest on the i16 value is ok.  We extend to i32 because
+    // that doing a bittest on the i32 value is ok.  We extend to i32 because
     // the encoding for the i16 version is larger than the i32 version.
-    if (LHS.getValueType() == MVT::i8)
+    // Also promote i16 to i32 for performance / code size reason.
+    if (LHS.getValueType() == MVT::i8 ||
+        LHS.getValueType() == MVT::i16)
       LHS = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i32, LHS);
 
     // If the operand types disagree, extend the shift amount to match.  Since
@@ -6052,7 +6102,7 @@ static SDValue LowerToBT(SDValue And, ISD::CondCode CC,
   return SDValue();
 }
 
-SDValue X86TargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) {
+SDValue X86TargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const {
   assert(Op.getValueType() == MVT::i8 && "SetCC type must be 8-bit integer");
   SDValue Op0 = Op.getOperand(0);
   SDValue Op1 = Op.getOperand(1);
@@ -6088,7 +6138,7 @@ SDValue X86TargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) {
                        DAG.getConstant(CCode, MVT::i8), Op0.getOperand(1));
   }
 
-  bool isFP = Op.getOperand(1).getValueType().isFloatingPoint();
+  bool isFP = Op1.getValueType().isFloatingPoint();
   unsigned X86CC = TranslateX86CC(CC, isFP, Op0, Op1, DAG);
   if (X86CC == X86::COND_INVALID)
     return SDValue();
@@ -6106,7 +6156,7 @@ SDValue X86TargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) {
                      DAG.getConstant(X86CC, MVT::i8), Cond);
 }
 
-SDValue X86TargetLowering::LowerVSETCC(SDValue Op, SelectionDAG &DAG) {
+SDValue X86TargetLowering::LowerVSETCC(SDValue Op, SelectionDAG &DAG) const {
   SDValue Cond;
   SDValue Op0 = Op.getOperand(0);
   SDValue Op1 = Op.getOperand(1);
@@ -6242,7 +6292,7 @@ static bool isX86LogicalCmp(SDValue Op) {
   return false;
 }
 
-SDValue X86TargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) {
+SDValue X86TargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const {
   bool addTest = true;
   SDValue Cond  = Op.getOperand(0);
   DebugLoc dl = Op.getDebugLoc();
@@ -6362,7 +6412,7 @@ static bool isXor1OfSetCC(SDValue Op) {
   return false;
 }
 
-SDValue X86TargetLowering::LowerBRCOND(SDValue Op, SelectionDAG &DAG) {
+SDValue X86TargetLowering::LowerBRCOND(SDValue Op, SelectionDAG &DAG) const {
   bool addTest = true;
   SDValue Chain = Op.getOperand(0);
   SDValue Cond  = Op.getOperand(1);
@@ -6514,7 +6564,7 @@ SDValue X86TargetLowering::LowerBRCOND(SDValue Op, SelectionDAG &DAG) {
 // correct sequence.
 SDValue
 X86TargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op,
-                                           SelectionDAG &DAG) {
+                                           SelectionDAG &DAG) const {
   assert(Subtarget->isTargetCygMing() &&
          "This should be used only on Cygwin/Mingw targets");
   DebugLoc dl = Op.getDebugLoc();
@@ -6550,7 +6600,7 @@ X86TargetLowering::EmitTargetCodeForMemset(SelectionDAG &DAG, DebugLoc dl,
                                            SDValue Size, unsigned Align,
                                            bool isVolatile,
                                            const Value *DstSV,
-                                           uint64_t DstSVOff) {
+                                           uint64_t DstSVOff) const {
   ConstantSDNode *ConstantSize = dyn_cast<ConstantSDNode>(Size);
 
   // If not DWORD aligned or size is more than the threshold, call the library.
@@ -6689,8 +6739,10 @@ X86TargetLowering::EmitTargetCodeForMemcpy(SelectionDAG &DAG, DebugLoc dl,
                                       SDValue Chain, SDValue Dst, SDValue Src,
                                       SDValue Size, unsigned Align,
                                       bool isVolatile, bool AlwaysInline,
-                                      const Value *DstSV, uint64_t DstSVOff,
-                                      const Value *SrcSV, uint64_t SrcSVOff) {
+                                      const Value *DstSV,
+                                      uint64_t DstSVOff,
+                                      const Value *SrcSV,
+                                      uint64_t SrcSVOff) const {
   // This requires the copy size to be a constant, preferrably
   // within a subtarget-specific limit.
   ConstantSDNode *ConstantSize = dyn_cast<ConstantSDNode>(Size);
@@ -6720,7 +6772,7 @@ X86TargetLowering::EmitTargetCodeForMemcpy(SelectionDAG &DAG, DebugLoc dl,
                             Count, InFlag);
   InFlag = Chain.getValue(1);
   Chain  = DAG.getCopyToReg(Chain, dl, Subtarget->is64Bit() ? X86::RDI :
-                                                             X86::EDI,
+                                                              X86::EDI,
                             Dst, InFlag);
   InFlag = Chain.getValue(1);
   Chain  = DAG.getCopyToReg(Chain, dl, Subtarget->is64Bit() ? X86::RSI :
@@ -6756,14 +6808,18 @@ X86TargetLowering::EmitTargetCodeForMemcpy(SelectionDAG &DAG, DebugLoc dl,
                      &Results[0], Results.size());
 }
 
-SDValue X86TargetLowering::LowerVASTART(SDValue Op, SelectionDAG &DAG) {
+SDValue X86TargetLowering::LowerVASTART(SDValue Op, SelectionDAG &DAG) const {
+  MachineFunction &MF = DAG.getMachineFunction();
+  X86MachineFunctionInfo *FuncInfo = MF.getInfo<X86MachineFunctionInfo>();
+
   const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
   DebugLoc dl = Op.getDebugLoc();
 
   if (!Subtarget->is64Bit()) {
     // vastart just stores the address of the VarArgsFrameIndex slot into the
     // memory location argument.
-    SDValue FR = DAG.getFrameIndex(VarArgsFrameIndex, getPointerTy());
+    SDValue FR = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(),
+                                   getPointerTy());
     return DAG.getStore(Op.getOperand(0), dl, FR, Op.getOperand(1), SV, 0,
                         false, false, 0);
   }
@@ -6777,7 +6833,8 @@ SDValue X86TargetLowering::LowerVASTART(SDValue Op, SelectionDAG &DAG) {
   SDValue FIN = Op.getOperand(1);
   // Store gp_offset
   SDValue Store = DAG.getStore(Op.getOperand(0), dl,
-                               DAG.getConstant(VarArgsGPOffset, MVT::i32),
+                               DAG.getConstant(FuncInfo->getVarArgsGPOffset(),
+                                               MVT::i32),
                                FIN, SV, 0, false, false, 0);
   MemOps.push_back(Store);
 
@@ -6785,14 +6842,16 @@ SDValue X86TargetLowering::LowerVASTART(SDValue Op, SelectionDAG &DAG) {
   FIN = DAG.getNode(ISD::ADD, dl, getPointerTy(),
                     FIN, DAG.getIntPtrConstant(4));
   Store = DAG.getStore(Op.getOperand(0), dl,
-                       DAG.getConstant(VarArgsFPOffset, MVT::i32),
+                       DAG.getConstant(FuncInfo->getVarArgsFPOffset(),
+                                       MVT::i32),
                        FIN, SV, 0, false, false, 0);
   MemOps.push_back(Store);
 
   // Store ptr to overflow_arg_area
   FIN = DAG.getNode(ISD::ADD, dl, getPointerTy(),
                     FIN, DAG.getIntPtrConstant(4));
-  SDValue OVFIN = DAG.getFrameIndex(VarArgsFrameIndex, getPointerTy());
+  SDValue OVFIN = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(),
+                                    getPointerTy());
   Store = DAG.getStore(Op.getOperand(0), dl, OVFIN, FIN, SV, 0,
                        false, false, 0);
   MemOps.push_back(Store);
@@ -6800,7 +6859,8 @@ SDValue X86TargetLowering::LowerVASTART(SDValue Op, SelectionDAG &DAG) {
   // Store ptr to reg_save_area.
   FIN = DAG.getNode(ISD::ADD, dl, getPointerTy(),
                     FIN, DAG.getIntPtrConstant(8));
-  SDValue RSFIN = DAG.getFrameIndex(RegSaveFrameIndex, getPointerTy());
+  SDValue RSFIN = DAG.getFrameIndex(FuncInfo->getRegSaveFrameIndex(),
+                                    getPointerTy());
   Store = DAG.getStore(Op.getOperand(0), dl, RSFIN, FIN, SV, 0,
                        false, false, 0);
   MemOps.push_back(Store);
@@ -6808,18 +6868,18 @@ SDValue X86TargetLowering::LowerVASTART(SDValue Op, SelectionDAG &DAG) {
                      &MemOps[0], MemOps.size());
 }
 
-SDValue X86TargetLowering::LowerVAARG(SDValue Op, SelectionDAG &DAG) {
+SDValue X86TargetLowering::LowerVAARG(SDValue Op, SelectionDAG &DAG) const {
   // X86-64 va_list is a struct { i32, i32, i8*, i8* }.
   assert(Subtarget->is64Bit() && "This code only handles 64-bit va_arg!");
   SDValue Chain = Op.getOperand(0);
   SDValue SrcPtr = Op.getOperand(1);
   SDValue SrcSV = Op.getOperand(2);
 
-  llvm_report_error("VAArgInst is not yet implemented for x86-64!");
+  report_fatal_error("VAArgInst is not yet implemented for x86-64!");
   return SDValue();
 }
 
-SDValue X86TargetLowering::LowerVACOPY(SDValue Op, SelectionDAG &DAG) {
+SDValue X86TargetLowering::LowerVACOPY(SDValue Op, SelectionDAG &DAG) const {
   // X86-64 va_list is a struct { i32, i32, i8*, i8* }.
   assert(Subtarget->is64Bit() && "This code only handles 64-bit va_copy!");
   SDValue Chain = Op.getOperand(0);
@@ -6835,7 +6895,7 @@ SDValue X86TargetLowering::LowerVACOPY(SDValue Op, SelectionDAG &DAG) {
 }
 
 SDValue
-X86TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) {
+X86TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const {
   DebugLoc dl = Op.getDebugLoc();
   unsigned IntNo = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
   switch (IntNo) {
@@ -7076,7 +7136,8 @@ X86TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) {
   }
 }
 
-SDValue X86TargetLowering::LowerRETURNADDR(SDValue Op, SelectionDAG &DAG) {
+SDValue X86TargetLowering::LowerRETURNADDR(SDValue Op,
+                                           SelectionDAG &DAG) const {
   unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
   DebugLoc dl = Op.getDebugLoc();
 
@@ -7097,7 +7158,7 @@ SDValue X86TargetLowering::LowerRETURNADDR(SDValue Op, SelectionDAG &DAG) {
                      RetAddrFI, NULL, 0, false, false, 0);
 }
 
-SDValue X86TargetLowering::LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) {
+SDValue X86TargetLowering::LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const {
   MachineFrameInfo *MFI = DAG.getMachineFunction().getFrameInfo();
   MFI->setFrameAddressIsTaken(true);
   EVT VT = Op.getValueType();
@@ -7112,12 +7173,11 @@ SDValue X86TargetLowering::LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) {
 }
 
 SDValue X86TargetLowering::LowerFRAME_TO_ARGS_OFFSET(SDValue Op,
-                                                     SelectionDAG &DAG) {
+                                                     SelectionDAG &DAG) const {
   return DAG.getIntPtrConstant(2*TD->getPointerSize());
 }
 
-SDValue X86TargetLowering::LowerEH_RETURN(SDValue Op, SelectionDAG &DAG)
-{
+SDValue X86TargetLowering::LowerEH_RETURN(SDValue Op, SelectionDAG &DAG) const {
   MachineFunction &MF = DAG.getMachineFunction();
   SDValue Chain     = Op.getOperand(0);
   SDValue Offset    = Op.getOperand(1);
@@ -7141,7 +7201,7 @@ SDValue X86TargetLowering::LowerEH_RETURN(SDValue Op, SelectionDAG &DAG)
 }
 
 SDValue X86TargetLowering::LowerTRAMPOLINE(SDValue Op,
-                                             SelectionDAG &DAG) {
+                                             SelectionDAG &DAG) const {
   SDValue Root = Op.getOperand(0);
   SDValue Trmp = Op.getOperand(1); // trampoline
   SDValue FPtr = Op.getOperand(2); // nested function
@@ -7232,7 +7292,7 @@ SDValue X86TargetLowering::LowerTRAMPOLINE(SDValue Op,
             InRegCount += (TD->getTypeSizeInBits(*I) + 31) / 32;
 
         if (InRegCount > 2) {
-          llvm_report_error("Nest register in use - reduce number of inreg parameters!");
+          report_fatal_error("Nest register in use - reduce number of inreg parameters!");
         }
       }
       break;
@@ -7281,7 +7341,8 @@ SDValue X86TargetLowering::LowerTRAMPOLINE(SDValue Op,
   }
 }
 
-SDValue X86TargetLowering::LowerFLT_ROUNDS_(SDValue Op, SelectionDAG &DAG) {
+SDValue X86TargetLowering::LowerFLT_ROUNDS_(SDValue Op,
+                                            SelectionDAG &DAG) const {
   /*
    The rounding mode is in bits 11:10 of FPSR, and has the following
    settings:
@@ -7343,7 +7404,7 @@ SDValue X86TargetLowering::LowerFLT_ROUNDS_(SDValue Op, SelectionDAG &DAG) {
                       ISD::TRUNCATE : ISD::ZERO_EXTEND), dl, VT, RetVal);
 }
 
-SDValue X86TargetLowering::LowerCTLZ(SDValue Op, SelectionDAG &DAG) {
+SDValue X86TargetLowering::LowerCTLZ(SDValue Op, SelectionDAG &DAG) const {
   EVT VT = Op.getValueType();
   EVT OpVT = VT;
   unsigned NumBits = VT.getSizeInBits();
@@ -7377,7 +7438,7 @@ SDValue X86TargetLowering::LowerCTLZ(SDValue Op, SelectionDAG &DAG) {
   return Op;
 }
 
-SDValue X86TargetLowering::LowerCTTZ(SDValue Op, SelectionDAG &DAG) {
+SDValue X86TargetLowering::LowerCTTZ(SDValue Op, SelectionDAG &DAG) const {
   EVT VT = Op.getValueType();
   EVT OpVT = VT;
   unsigned NumBits = VT.getSizeInBits();
@@ -7407,7 +7468,7 @@ SDValue X86TargetLowering::LowerCTTZ(SDValue Op, SelectionDAG &DAG) {
   return Op;
 }
 
-SDValue X86TargetLowering::LowerMUL_V2I64(SDValue Op, SelectionDAG &DAG) {
+SDValue X86TargetLowering::LowerMUL_V2I64(SDValue Op, SelectionDAG &DAG) const {
   EVT VT = Op.getValueType();
   assert(VT == MVT::v2i64 && "Only know how to lower V2I64 multiply");
   DebugLoc dl = Op.getDebugLoc();
@@ -7452,7 +7513,7 @@ SDValue X86TargetLowering::LowerMUL_V2I64(SDValue Op, SelectionDAG &DAG) {
 }
 
 
-SDValue X86TargetLowering::LowerXALUO(SDValue Op, SelectionDAG &DAG) {
+SDValue X86TargetLowering::LowerXALUO(SDValue Op, SelectionDAG &DAG) const {
   // Lower the "add/sub/mul with overflow" instruction into a regular ins plus
   // a "setcc" instruction that checks the overflow flag. The "brcond" lowering
   // looks for this combo and may remove the "setcc" instruction if the "setcc"
@@ -7520,7 +7581,7 @@ SDValue X86TargetLowering::LowerXALUO(SDValue Op, SelectionDAG &DAG) {
   return Sum;
 }
 
-SDValue X86TargetLowering::LowerCMP_SWAP(SDValue Op, SelectionDAG &DAG) {
+SDValue X86TargetLowering::LowerCMP_SWAP(SDValue Op, SelectionDAG &DAG) const {
   EVT T = Op.getValueType();
   DebugLoc dl = Op.getDebugLoc();
   unsigned Reg = 0;
@@ -7551,7 +7612,7 @@ SDValue X86TargetLowering::LowerCMP_SWAP(SDValue Op, SelectionDAG &DAG) {
 }
 
 SDValue X86TargetLowering::LowerREADCYCLECOUNTER(SDValue Op,
-                                                 SelectionDAG &DAG) {
+                                                 SelectionDAG &DAG) const {
   assert(Subtarget->is64Bit() && "Result not type legalized?");
   SDVTList Tys = DAG.getVTList(MVT::Other, MVT::Flag);
   SDValue TheChain = Op.getOperand(0);
@@ -7569,7 +7630,7 @@ SDValue X86TargetLowering::LowerREADCYCLECOUNTER(SDValue Op,
   return DAG.getMergeValues(Ops, 2, dl);
 }
 
-SDValue X86TargetLowering::LowerLOAD_SUB(SDValue Op, SelectionDAG &DAG) {
+SDValue X86TargetLowering::LowerLOAD_SUB(SDValue Op, SelectionDAG &DAG) const {
   SDNode *Node = Op.getNode();
   DebugLoc dl = Node->getDebugLoc();
   EVT T = Node->getValueType(0);
@@ -7585,7 +7646,7 @@ SDValue X86TargetLowering::LowerLOAD_SUB(SDValue Op, SelectionDAG &DAG) {
 
 /// LowerOperation - Provide custom lowering hooks for some operations.
 ///
-SDValue X86TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) {
+SDValue X86TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
   switch (Op.getOpcode()) {
   default: llvm_unreachable("Should not custom lower this!");
   case ISD::ATOMIC_CMP_SWAP:    return LowerCMP_SWAP(Op,DAG);
@@ -7643,7 +7704,7 @@ SDValue X86TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) {
 
 void X86TargetLowering::
 ReplaceATOMIC_BINARY_64(SDNode *Node, SmallVectorImpl<SDValue>&Results,
-                        SelectionDAG &DAG, unsigned NewOp) {
+                        SelectionDAG &DAG, unsigned NewOp) const {
   EVT T = Node->getValueType(0);
   DebugLoc dl = Node->getDebugLoc();
   assert (T == MVT::i64 && "Only know how to expand i64 atomics");
@@ -7668,7 +7729,7 @@ ReplaceATOMIC_BINARY_64(SDNode *Node, SmallVectorImpl<SDValue>&Results,
 /// with a new node built out of custom code.
 void X86TargetLowering::ReplaceNodeResults(SDNode *N,
                                            SmallVectorImpl<SDValue>&Results,
-                                           SelectionDAG &DAG) {
+                                           SelectionDAG &DAG) const {
   DebugLoc dl = N->getDebugLoc();
   switch (N->getOpcode()) {
   default:
@@ -7941,9 +8002,9 @@ bool X86TargetLowering::isNarrowingProfitable(EVT VT1, EVT VT2) const {
 bool
 X86TargetLowering::isShuffleMaskLegal(const SmallVectorImpl<int> &M,
                                       EVT VT) const {
-  // Only do shuffles on 128-bit vector types for now.
+  // Very little shuffling can be done for 64-bit vectors right now.
   if (VT.getSizeInBits() == 64)
-    return false;
+    return isPALIGNRMask(M, VT, Subtarget->hasSSSE3());
 
   // FIXME: pshufb, blends, shifts.
   return (VT.getVectorNumElements() == 2 ||
@@ -8448,8 +8509,7 @@ X86TargetLowering::EmitVAStartSaveXMMRegsWithCustomInserter(
 
 MachineBasicBlock *
 X86TargetLowering::EmitLoweredSelect(MachineInstr *MI,
-                                     MachineBasicBlock *BB,
-                   DenseMap<MachineBasicBlock*, MachineBasicBlock*> *EM) const {
+                                     MachineBasicBlock *BB) const {
   const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
   DebugLoc DL = MI->getDebugLoc();
 
@@ -8478,12 +8538,9 @@ X86TargetLowering::EmitLoweredSelect(MachineInstr *MI,
   F->insert(It, sinkMBB);
   // Update machine-CFG edges by first adding all successors of the current
   // block to the new block which will contain the Phi node for the select.
-  // Also inform sdisel of the edge changes.
   for (MachineBasicBlock::succ_iterator I = BB->succ_begin(),
-         E = BB->succ_end(); I != E; ++I) {
-    EM->insert(std::make_pair(*I, sinkMBB));
+         E = BB->succ_end(); I != E; ++I)
     sinkMBB->addSuccessor(*I);
-  }
   // Next, remove all successors of the current block, and add the true
   // and fallthrough blocks as its successors.
   while (!BB->succ_empty())
@@ -8495,27 +8552,22 @@ X86TargetLowering::EmitLoweredSelect(MachineInstr *MI,
   //  copy0MBB:
   //   %FalseValue = ...
   //   # fallthrough to sinkMBB
-  BB = copy0MBB;
-
-  // Update machine-CFG edges
-  BB->addSuccessor(sinkMBB);
+  copy0MBB->addSuccessor(sinkMBB);
 
   //  sinkMBB:
   //   %Result = phi [ %FalseValue, copy0MBB ], [ %TrueValue, thisMBB ]
   //  ...
-  BB = sinkMBB;
-  BuildMI(BB, DL, TII->get(X86::PHI), MI->getOperand(0).getReg())
+  BuildMI(sinkMBB, DL, TII->get(X86::PHI), MI->getOperand(0).getReg())
     .addReg(MI->getOperand(1).getReg()).addMBB(copy0MBB)
     .addReg(MI->getOperand(2).getReg()).addMBB(thisMBB);
 
   F->DeleteMachineInstr(MI);   // The pseudo instruction is gone now.
-  return BB;
+  return sinkMBB;
 }
 
 MachineBasicBlock *
 X86TargetLowering::EmitLoweredMingwAlloca(MachineInstr *MI,
-                                          MachineBasicBlock *BB,
-                   DenseMap<MachineBasicBlock*, MachineBasicBlock*> *EM) const {
+                                          MachineBasicBlock *BB) const {
   const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
   DebugLoc DL = MI->getDebugLoc();
   MachineFunction *F = BB->getParent();
@@ -8538,12 +8590,11 @@ X86TargetLowering::EmitLoweredMingwAlloca(MachineInstr *MI,
 
 MachineBasicBlock *
 X86TargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
-                                               MachineBasicBlock *BB,
-                   DenseMap<MachineBasicBlock*, MachineBasicBlock*> *EM) const {
+                                               MachineBasicBlock *BB) const {
   switch (MI->getOpcode()) {
   default: assert(false && "Unexpected instr type to insert");
   case X86::MINGW_ALLOCA:
-    return EmitLoweredMingwAlloca(MI, BB, EM);
+    return EmitLoweredMingwAlloca(MI, BB);
   case X86::CMOV_GR8:
   case X86::CMOV_V1I64:
   case X86::CMOV_FR32:
@@ -8556,7 +8607,7 @@ X86TargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
   case X86::CMOV_RFP32:
   case X86::CMOV_RFP64:
   case X86::CMOV_RFP80:
-    return EmitLoweredSelect(MI, BB, EM);
+    return EmitLoweredSelect(MI, BB);
 
   case X86::FP32_TO_INT16_IN_MEM:
   case X86::FP32_TO_INT32_IN_MEM:
@@ -8638,21 +8689,6 @@ X86TargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
     F->DeleteMachineInstr(MI);   // The pseudo instruction is gone now.
     return BB;
   }
-    // DBG_VALUE.  Only the frame index case is done here.
-  case X86::DBG_VALUE: {
-    const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
-    DebugLoc DL = MI->getDebugLoc();
-    X86AddressMode AM;
-    MachineFunction *F = BB->getParent();
-    AM.BaseType = X86AddressMode::FrameIndexBase;
-    AM.Base.FrameIndex = MI->getOperand(0).getImm();
-    addFullAddress(BuildMI(BB, DL, TII->get(X86::DBG_VALUE)), AM).
-      addImm(MI->getOperand(1).getImm()).
-      addMetadata(MI->getOperand(2).getMetadata());
-    F->DeleteMachineInstr(MI);      // Remove pseudo.
-    return BB;
-  }
-
     // String/text processing lowering.
   case X86::PCMPISTRM128REG:
     return EmitPCMP(MI, BB, 3, false /* in-mem */);
@@ -8874,7 +8910,8 @@ void X86TargetLowering::computeMaskedBitsForTargetNode(const SDValue Op,
 /// isGAPlusOffset - Returns true (and the GlobalValue and the offset) if the
 /// node is a GlobalAddress + offset.
 bool X86TargetLowering::isGAPlusOffset(SDNode *N,
-                                       GlobalValue* &GA, int64_t &Offset) const{
+                                       const GlobalValue* &GA,
+                                       int64_t &Offset) const {
   if (N->getOpcode() == X86ISD::Wrapper) {
     if (isa<GlobalAddressSDNode>(N->getOperand(0))) {
       GA = cast<GlobalAddressSDNode>(N->getOperand(0))->getGlobal();
@@ -9558,9 +9595,13 @@ static SDValue PerformShiftCombine(SDNode* N, SelectionDAG &DAG,
 }
 
 static SDValue PerformOrCombine(SDNode *N, SelectionDAG &DAG,
+                                TargetLowering::DAGCombinerInfo &DCI,
                                 const X86Subtarget *Subtarget) {
+  if (DCI.isBeforeLegalizeOps())
+    return SDValue();
+
   EVT VT = N->getValueType(0);
-  if (VT != MVT::i64 || !Subtarget->is64Bit())
+  if (VT != MVT::i16 && VT != MVT::i32 && VT != MVT::i64)
     return SDValue();
 
   // fold (or (x << c) | (y >> (64 - c))) ==> (shld64 x, y, c)
@@ -9570,6 +9611,8 @@ static SDValue PerformOrCombine(SDNode *N, SelectionDAG &DAG,
     std::swap(N0, N1);
   if (N0.getOpcode() != ISD::SHL || N1.getOpcode() != ISD::SRL)
     return SDValue();
+  if (!N0.hasOneUse() || !N1.hasOneUse())
+    return SDValue();
 
   SDValue ShAmt0 = N0.getOperand(1);
   if (ShAmt0.getValueType() != MVT::i8)
@@ -9592,10 +9635,11 @@ static SDValue PerformOrCombine(SDNode *N, SelectionDAG &DAG,
     std::swap(ShAmt0, ShAmt1);
   }
 
+  unsigned Bits = VT.getSizeInBits();
   if (ShAmt1.getOpcode() == ISD::SUB) {
     SDValue Sum = ShAmt1.getOperand(0);
     if (ConstantSDNode *SumC = dyn_cast<ConstantSDNode>(Sum)) {
-      if (SumC->getSExtValue() == 64 &&
+      if (SumC->getSExtValue() == Bits &&
           ShAmt1.getOperand(1) == ShAmt0)
         return DAG.getNode(Opc, DL, VT,
                            Op0, Op1,
@@ -9605,7 +9649,7 @@ static SDValue PerformOrCombine(SDNode *N, SelectionDAG &DAG,
   } else if (ConstantSDNode *ShAmt1C = dyn_cast<ConstantSDNode>(ShAmt1)) {
     ConstantSDNode *ShAmt0C = dyn_cast<ConstantSDNode>(ShAmt0);
     if (ShAmt0C &&
-        ShAmt0C->getSExtValue() + ShAmt1C->getSExtValue() == 64)
+        ShAmt0C->getSExtValue() + ShAmt1C->getSExtValue() == Bits)
       return DAG.getNode(Opc, DL, VT,
                          N0.getOperand(0), N1.getOperand(0),
                          DAG.getNode(ISD::TRUNCATE, DL,
@@ -9769,8 +9813,9 @@ static SDValue PerformBTCombine(SDNode *N,
     unsigned BitWidth = Op1.getValueSizeInBits();
     APInt DemandedMask = APInt::getLowBitsSet(BitWidth, Log2_32(BitWidth));
     APInt KnownZero, KnownOne;
-    TargetLowering::TargetLoweringOpt TLO(DAG);
-    TargetLowering &TLI = DAG.getTargetLoweringInfo();
+    TargetLowering::TargetLoweringOpt TLO(DAG, !DCI.isBeforeLegalize(),
+                                          !DCI.isBeforeLegalizeOps());
+    const TargetLowering &TLI = DAG.getTargetLoweringInfo();
     if (TLO.ShrinkDemandedConstant(Op1, DemandedMask) ||
         TLI.SimplifyDemandedBits(Op1, DemandedMask, KnownZero, KnownOne, TLO))
       DCI.CommitTargetLoweringOpt(TLO);
@@ -9883,7 +9928,7 @@ SDValue X86TargetLowering::PerformDAGCombine(SDNode *N,
   case ISD::SHL:
   case ISD::SRA:
   case ISD::SRL:            return PerformShiftCombine(N, DAG, Subtarget);
-  case ISD::OR:             return PerformOrCombine(N, DAG, Subtarget);
+  case ISD::OR:             return PerformOrCombine(N, DAG, DCI, Subtarget);
   case ISD::STORE:          return PerformSTORECombine(N, DAG, Subtarget);
   case X86ISD::FXOR:
   case X86ISD::FOR:         return PerformFORCombine(N, DAG);
@@ -9897,6 +9942,111 @@ SDValue X86TargetLowering::PerformDAGCombine(SDNode *N,
   return SDValue();
 }
 
+/// isTypeDesirableForOp - Return true if the target has native support for
+/// the specified value type and it is 'desirable' to use the type for the
+/// given node type. e.g. On x86 i16 is legal, but undesirable since i16
+/// instruction encodings are longer and some i16 instructions are slow.
+bool X86TargetLowering::isTypeDesirableForOp(unsigned Opc, EVT VT) const {
+  if (!isTypeLegal(VT))
+    return false;
+  if (VT != MVT::i16)
+    return true;
+
+  switch (Opc) {
+  default:
+    return true;
+  case ISD::LOAD:
+  case ISD::SIGN_EXTEND:
+  case ISD::ZERO_EXTEND:
+  case ISD::ANY_EXTEND:
+  case ISD::SHL:
+  case ISD::SRL:
+  case ISD::SUB:
+  case ISD::ADD:
+  case ISD::MUL:
+  case ISD::AND:
+  case ISD::OR:
+  case ISD::XOR:
+    return false;
+  }
+}
+
+static bool MayFoldLoad(SDValue Op) {
+  return Op.hasOneUse() && ISD::isNormalLoad(Op.getNode());
+}
+
+static bool MayFoldIntoStore(SDValue Op) {
+  return Op.hasOneUse() && ISD::isNormalStore(*Op.getNode()->use_begin());
+}
+
+/// IsDesirableToPromoteOp - This method query the target whether it is
+/// beneficial for dag combiner to promote the specified node. If true, it
+/// should return the desired promotion type by reference.
+bool X86TargetLowering::IsDesirableToPromoteOp(SDValue Op, EVT &PVT) const {
+  EVT VT = Op.getValueType();
+  if (VT != MVT::i16)
+    return false;
+
+  bool Promote = false;
+  bool Commute = false;
+  switch (Op.getOpcode()) {
+  default: break;
+  case ISD::LOAD: {
+    LoadSDNode *LD = cast<LoadSDNode>(Op);
+    // If the non-extending load has a single use and it's not live out, then it
+    // might be folded.
+    if (LD->getExtensionType() == ISD::NON_EXTLOAD /*&&
+                                                     Op.hasOneUse()*/) {
+      for (SDNode::use_iterator UI = Op.getNode()->use_begin(),
+             UE = Op.getNode()->use_end(); UI != UE; ++UI) {
+        // The only case where we'd want to promote LOAD (rather then it being
+        // promoted as an operand is when it's only use is liveout.
+        if (UI->getOpcode() != ISD::CopyToReg)
+          return false;
+      }
+    }
+    Promote = true;
+    break;
+  }
+  case ISD::SIGN_EXTEND:
+  case ISD::ZERO_EXTEND:
+  case ISD::ANY_EXTEND:
+    Promote = true;
+    break;
+  case ISD::SHL:
+  case ISD::SRL: {
+    SDValue N0 = Op.getOperand(0);
+    // Look out for (store (shl (load), x)).
+    if (MayFoldLoad(N0) && MayFoldIntoStore(Op))
+      return false;
+    Promote = true;
+    break;
+  }
+  case ISD::ADD:
+  case ISD::MUL:
+  case ISD::AND:
+  case ISD::OR:
+  case ISD::XOR:
+    Commute = true;
+    // fallthrough
+  case ISD::SUB: {
+    SDValue N0 = Op.getOperand(0);
+    SDValue N1 = Op.getOperand(1);
+    if (!Commute && MayFoldLoad(N1))
+      return false;
+    // Avoid disabling potential load folding opportunities.
+    if (MayFoldLoad(N0) && (!isa<ConstantSDNode>(N1) || MayFoldIntoStore(Op)))
+      return false;
+    if (MayFoldLoad(N1) && (!isa<ConstantSDNode>(N0) || MayFoldIntoStore(Op)))
+      return false;
+    Promote = true;
+  }
+  }
+
+  PVT = MVT::i32;
+  return Promote;
+}
+
 //===----------------------------------------------------------------------===//
 //                           X86 Inline Assembly Support
 //===----------------------------------------------------------------------===//
@@ -10159,7 +10309,7 @@ void X86TargetLowering::LowerAsmOperandForConstraint(SDValue Op,
       return;
     }
 
-    GlobalValue *GV = GA->getGlobal();
+    const GlobalValue *GV = GA->getGlobal();
     // If we require an extra load to get this address, as in PIC mode, we
     // can't accept it.
     if (isGlobalStubReference(Subtarget->ClassifyGlobalReference(GV,
diff --git a/lib/Target/X86/X86ISelLowering.h b/lib/Target/X86/X86ISelLowering.h
index 1026480ef0a6..440601f98276 100644
--- a/lib/Target/X86/X86ISelLowering.h
+++ b/lib/Target/X86/X86ISelLowering.h
@@ -374,12 +374,6 @@ namespace llvm {
   //===--------------------------------------------------------------------===//
   //  X86TargetLowering - X86 Implementation of the TargetLowering interface
   class X86TargetLowering : public TargetLowering {
-    int VarArgsFrameIndex;            // FrameIndex for start of varargs area.
-    int RegSaveFrameIndex;            // X86-64 vararg func register save area.
-    unsigned VarArgsGPOffset;         // X86-64 vararg func int reg offset.
-    unsigned VarArgsFPOffset;         // X86-64 vararg func fp reg offset.
-    int BytesToPopOnReturn;           // Number of arg bytes ret should pop.
-
   public:
     explicit X86TargetLowering(X86TargetMachine &TM);
 
@@ -401,11 +395,6 @@ namespace llvm {
     getPICJumpTableRelocBaseExpr(const MachineFunction *MF,
                                  unsigned JTI, MCContext &Ctx) const;
     
-    // Return the number of bytes that a function should pop when it returns (in
-    // addition to the space used by the return address).
-    //
-    unsigned getBytesToPopOnReturn() const { return BytesToPopOnReturn; }
-
     /// getStackPtrReg - Return the stack pointer register we are using: either
     /// ESP or RSP.
     unsigned getStackPtrReg() const { return X86StackPtr; }
@@ -424,12 +413,14 @@ namespace llvm {
     /// probably because the source does not need to be loaded. If
     /// 'NonScalarIntSafe' is true, that means it's safe to return a
     /// non-scalar-integer type, e.g. empty string source, constant, or loaded
-    /// from memory. It returns EVT::Other if SelectionDAG should be responsible
-    /// for determining it.
+    /// from memory. 'MemcpyStrSrc' indicates whether the memcpy source is
+    /// constant so it does not need to be loaded.
+    /// It returns EVT::Other if the type should be determined using generic
+    /// target-independent logic.
     virtual EVT
-    getOptimalMemOpType(uint64_t Size,
-                        unsigned DstAlign, unsigned SrcAlign,
-                        bool NonScalarIntSafe, SelectionDAG &DAG) const;
+    getOptimalMemOpType(uint64_t Size, unsigned DstAlign, unsigned SrcAlign,
+                        bool NonScalarIntSafe, bool MemcpyStrSrc,
+                        MachineFunction &MF) const;
 
     /// allowsUnalignedMemoryAccesses - Returns true if the target allows
     /// unaligned memory accesses. of the specified type.
@@ -439,20 +430,32 @@ namespace llvm {
 
     /// LowerOperation - Provide custom lowering hooks for some operations.
     ///
-    virtual SDValue LowerOperation(SDValue Op, SelectionDAG &DAG);
+    virtual SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const;
 
     /// ReplaceNodeResults - Replace the results of node with an illegal result
     /// type with new values built out of custom code.
     ///
     virtual void ReplaceNodeResults(SDNode *N, SmallVectorImpl<SDValue>&Results,
-                                    SelectionDAG &DAG);
+                                    SelectionDAG &DAG) const;
 
     
     virtual SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const;
 
-    virtual MachineBasicBlock *EmitInstrWithCustomInserter(MachineInstr *MI,
-                                                         MachineBasicBlock *MBB,
-                    DenseMap<MachineBasicBlock*, MachineBasicBlock*> *EM) const;
+    /// isTypeDesirableForOp - Return true if the target has native support for
+    /// the specified value type and it is 'desirable' to use the type for the
+    /// given node type. e.g. On x86 i16 is legal, but undesirable since i16
+    /// instruction encodings are longer and some i16 instructions are slow.
+    virtual bool isTypeDesirableForOp(unsigned Opc, EVT VT) const;
+
+    /// isTypeDesirable - Return true if the target has native support for the
+    /// specified value type and it is 'desirable' to use the type. e.g. On x86
+    /// i16 is legal, but undesirable since i16 instruction encodings are longer
+    /// and some i16 instructions are slow.
+    virtual bool IsDesirableToPromoteOp(SDValue Op, EVT &PVT) const;
+
+    virtual MachineBasicBlock *
+      EmitInstrWithCustomInserter(MachineInstr *MI,
+                                  MachineBasicBlock *MBB) const;
 
  
     /// getTargetNodeName - This method returns the name of a target specific
@@ -473,9 +476,9 @@ namespace llvm {
                                                 unsigned Depth = 0) const;
 
     virtual bool
-    isGAPlusOffset(SDNode *N, GlobalValue* &GA, int64_t &Offset) const;
+    isGAPlusOffset(SDNode *N, const GlobalValue* &GA, int64_t &Offset) const;
     
-    SDValue getReturnAddressFrameIndex(SelectionDAG &DAG);
+    SDValue getReturnAddressFrameIndex(SelectionDAG &DAG) const;
 
     virtual bool ExpandInlineAsm(CallInst *CI) const;
     
@@ -560,7 +563,7 @@ namespace llvm {
       return !X86ScalarSSEf64 || VT == MVT::f80;
     }
     
-    virtual const X86Subtarget* getSubtarget() {
+    virtual const X86Subtarget* getSubtarget() const {
       return Subtarget;
     }
 
@@ -577,11 +580,12 @@ namespace llvm {
     createFastISel(MachineFunction &mf,
                    DenseMap<const Value *, unsigned> &,
                    DenseMap<const BasicBlock *, MachineBasicBlock *> &,
-                   DenseMap<const AllocaInst *, int> &
+                   DenseMap<const AllocaInst *, int> &,
+                   std::vector<std::pair<MachineInstr*, unsigned> > &
 #ifndef NDEBUG
-                   , SmallSet<Instruction*, 8> &
+                   , SmallSet<const Instruction *, 8> &
 #endif
-                   );
+                   ) const;
 
     /// getFunctionAlignment - Return the Log2 alignment of this function.
     virtual unsigned getFunctionAlignment(const Function *F) const;
@@ -616,17 +620,17 @@ namespace llvm {
                             CallingConv::ID CallConv, bool isVarArg,
                             const SmallVectorImpl<ISD::InputArg> &Ins,
                             DebugLoc dl, SelectionDAG &DAG,
-                            SmallVectorImpl<SDValue> &InVals);
+                            SmallVectorImpl<SDValue> &InVals) const;
     SDValue LowerMemArgument(SDValue Chain,
                              CallingConv::ID CallConv,
                              const SmallVectorImpl<ISD::InputArg> &ArgInfo,
                              DebugLoc dl, SelectionDAG &DAG,
                              const CCValAssign &VA,  MachineFrameInfo *MFI,
-                              unsigned i);
+                              unsigned i) const;
     SDValue LowerMemOpCallTo(SDValue Chain, SDValue StackPtr, SDValue Arg,
                              DebugLoc dl, SelectionDAG &DAG,
                              const CCValAssign &VA,
-                             ISD::ArgFlagsTy Flags);
+                             ISD::ArgFlagsTy Flags) const;
 
     // Call lowering helpers.
 
@@ -641,114 +645,120 @@ namespace llvm {
                                     const SmallVectorImpl<ISD::OutputArg> &Outs,
                                     const SmallVectorImpl<ISD::InputArg> &Ins,
                                            SelectionDAG& DAG) const;
-    bool IsCalleePop(bool isVarArg, CallingConv::ID CallConv);
+    bool IsCalleePop(bool isVarArg, CallingConv::ID CallConv) const;
     SDValue EmitTailCallLoadRetAddr(SelectionDAG &DAG, SDValue &OutRetAddr,
                                 SDValue Chain, bool IsTailCall, bool Is64Bit,
-                                int FPDiff, DebugLoc dl);
+                                int FPDiff, DebugLoc dl) const;
 
     CCAssignFn *CCAssignFnForNode(CallingConv::ID CallConv) const;
-    unsigned GetAlignedArgumentStackSize(unsigned StackSize, SelectionDAG &DAG);
+    unsigned GetAlignedArgumentStackSize(unsigned StackSize,
+                                         SelectionDAG &DAG) const;
 
     std::pair<SDValue,SDValue> FP_TO_INTHelper(SDValue Op, SelectionDAG &DAG,
-                                               bool isSigned);
+                                               bool isSigned) const;
 
     SDValue LowerAsSplatVectorLoad(SDValue SrcOp, EVT VT, DebugLoc dl,
-                                   SelectionDAG &DAG);
-    SDValue LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG);
-    SDValue LowerCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG);
-    SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG);
-    SDValue LowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG);
-    SDValue LowerEXTRACT_VECTOR_ELT_SSE4(SDValue Op, SelectionDAG &DAG);
-    SDValue LowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG);
-    SDValue LowerINSERT_VECTOR_ELT_SSE4(SDValue Op, SelectionDAG &DAG);
-    SDValue LowerSCALAR_TO_VECTOR(SDValue Op, SelectionDAG &DAG);
-    SDValue LowerConstantPool(SDValue Op, SelectionDAG &DAG);
-    SDValue LowerBlockAddress(SDValue Op, SelectionDAG &DAG);
+                                   SelectionDAG &DAG) const;
+    SDValue LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const;
+    SDValue LowerCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG) const;
+    SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const;
+    SDValue LowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const;
+    SDValue LowerEXTRACT_VECTOR_ELT_SSE4(SDValue Op, SelectionDAG &DAG) const;
+    SDValue LowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const;
+    SDValue LowerINSERT_VECTOR_ELT_SSE4(SDValue Op, SelectionDAG &DAG) const;
+    SDValue LowerSCALAR_TO_VECTOR(SDValue Op, SelectionDAG &DAG) const;
+    SDValue LowerConstantPool(SDValue Op, SelectionDAG &DAG) const;
+    SDValue LowerBlockAddress(SDValue Op, SelectionDAG &DAG) const;
     SDValue LowerGlobalAddress(const GlobalValue *GV, DebugLoc dl,
                                int64_t Offset, SelectionDAG &DAG) const;
-    SDValue LowerGlobalAddress(SDValue Op, SelectionDAG &DAG);
-    SDValue LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG);
-    SDValue LowerExternalSymbol(SDValue Op, SelectionDAG &DAG);
-    SDValue LowerShift(SDValue Op, SelectionDAG &DAG);
+    SDValue LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const;
+    SDValue LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const;
+    SDValue LowerExternalSymbol(SDValue Op, SelectionDAG &DAG) const;
+    SDValue LowerShift(SDValue Op, SelectionDAG &DAG) const;
     SDValue BuildFILD(SDValue Op, EVT SrcVT, SDValue Chain, SDValue StackSlot,
-                      SelectionDAG &DAG);
-    SDValue LowerSINT_TO_FP(SDValue Op, SelectionDAG &DAG);
-    SDValue LowerUINT_TO_FP(SDValue Op, SelectionDAG &DAG);
-    SDValue LowerUINT_TO_FP_i64(SDValue Op, SelectionDAG &DAG);
-    SDValue LowerUINT_TO_FP_i32(SDValue Op, SelectionDAG &DAG);
-    SDValue LowerFP_TO_SINT(SDValue Op, SelectionDAG &DAG);
-    SDValue LowerFP_TO_UINT(SDValue Op, SelectionDAG &DAG);
-    SDValue LowerFABS(SDValue Op, SelectionDAG &DAG);
-    SDValue LowerFNEG(SDValue Op, SelectionDAG &DAG);
-    SDValue LowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG);
-    SDValue LowerSETCC(SDValue Op, SelectionDAG &DAG);
-    SDValue LowerVSETCC(SDValue Op, SelectionDAG &DAG);
-    SDValue LowerSELECT(SDValue Op, SelectionDAG &DAG);
-    SDValue LowerBRCOND(SDValue Op, SelectionDAG &DAG);
-    SDValue LowerMEMSET(SDValue Op, SelectionDAG &DAG);
-    SDValue LowerJumpTable(SDValue Op, SelectionDAG &DAG);
-    SDValue LowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG);
-    SDValue LowerVASTART(SDValue Op, SelectionDAG &DAG);
-    SDValue LowerVAARG(SDValue Op, SelectionDAG &DAG);
-    SDValue LowerVACOPY(SDValue Op, SelectionDAG &DAG);
-    SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG);
-    SDValue LowerRETURNADDR(SDValue Op, SelectionDAG &DAG);
-    SDValue LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG);
-    SDValue LowerFRAME_TO_ARGS_OFFSET(SDValue Op, SelectionDAG &DAG);
-    SDValue LowerEH_RETURN(SDValue Op, SelectionDAG &DAG);
-    SDValue LowerTRAMPOLINE(SDValue Op, SelectionDAG &DAG);
-    SDValue LowerFLT_ROUNDS_(SDValue Op, SelectionDAG &DAG);
-    SDValue LowerCTLZ(SDValue Op, SelectionDAG &DAG);
-    SDValue LowerCTTZ(SDValue Op, SelectionDAG &DAG);
-    SDValue LowerMUL_V2I64(SDValue Op, SelectionDAG &DAG);
-    SDValue LowerXALUO(SDValue Op, SelectionDAG &DAG);
-
-    SDValue LowerCMP_SWAP(SDValue Op, SelectionDAG &DAG);
-    SDValue LowerLOAD_SUB(SDValue Op, SelectionDAG &DAG);
-    SDValue LowerREADCYCLECOUNTER(SDValue Op, SelectionDAG &DAG);
+                      SelectionDAG &DAG) const;
+    SDValue LowerSINT_TO_FP(SDValue Op, SelectionDAG &DAG) const;
+    SDValue LowerUINT_TO_FP(SDValue Op, SelectionDAG &DAG) const;
+    SDValue LowerUINT_TO_FP_i64(SDValue Op, SelectionDAG &DAG) const;
+    SDValue LowerUINT_TO_FP_i32(SDValue Op, SelectionDAG &DAG) const;
+    SDValue LowerFP_TO_SINT(SDValue Op, SelectionDAG &DAG) const;
+    SDValue LowerFP_TO_UINT(SDValue Op, SelectionDAG &DAG) const;
+    SDValue LowerFABS(SDValue Op, SelectionDAG &DAG) const;
+    SDValue LowerFNEG(SDValue Op, SelectionDAG &DAG) const;
+    SDValue LowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG) const;
+    SDValue LowerToBT(SDValue And, ISD::CondCode CC,
+                      DebugLoc dl, SelectionDAG &DAG) const;
+    SDValue LowerSETCC(SDValue Op, SelectionDAG &DAG) const;
+    SDValue LowerVSETCC(SDValue Op, SelectionDAG &DAG) const;
+    SDValue LowerSELECT(SDValue Op, SelectionDAG &DAG) const;
+    SDValue LowerBRCOND(SDValue Op, SelectionDAG &DAG) const;
+    SDValue LowerMEMSET(SDValue Op, SelectionDAG &DAG) const;
+    SDValue LowerJumpTable(SDValue Op, SelectionDAG &DAG) const;
+    SDValue LowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) const;
+    SDValue LowerVASTART(SDValue Op, SelectionDAG &DAG) const;
+    SDValue LowerVAARG(SDValue Op, SelectionDAG &DAG) const;
+    SDValue LowerVACOPY(SDValue Op, SelectionDAG &DAG) const;
+    SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const;
+    SDValue LowerRETURNADDR(SDValue Op, SelectionDAG &DAG) const;
+    SDValue LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const;
+    SDValue LowerFRAME_TO_ARGS_OFFSET(SDValue Op, SelectionDAG &DAG) const;
+    SDValue LowerEH_RETURN(SDValue Op, SelectionDAG &DAG) const;
+    SDValue LowerTRAMPOLINE(SDValue Op, SelectionDAG &DAG) const;
+    SDValue LowerFLT_ROUNDS_(SDValue Op, SelectionDAG &DAG) const;
+    SDValue LowerCTLZ(SDValue Op, SelectionDAG &DAG) const;
+    SDValue LowerCTTZ(SDValue Op, SelectionDAG &DAG) const;
+    SDValue LowerMUL_V2I64(SDValue Op, SelectionDAG &DAG) const;
+    SDValue LowerXALUO(SDValue Op, SelectionDAG &DAG) const;
+
+    SDValue LowerCMP_SWAP(SDValue Op, SelectionDAG &DAG) const;
+    SDValue LowerLOAD_SUB(SDValue Op, SelectionDAG &DAG) const;
+    SDValue LowerREADCYCLECOUNTER(SDValue Op, SelectionDAG &DAG) const;
 
     virtual SDValue
       LowerFormalArguments(SDValue Chain,
                            CallingConv::ID CallConv, bool isVarArg,
                            const SmallVectorImpl<ISD::InputArg> &Ins,
                            DebugLoc dl, SelectionDAG &DAG,
-                           SmallVectorImpl<SDValue> &InVals);
+                           SmallVectorImpl<SDValue> &InVals) const;
     virtual SDValue
       LowerCall(SDValue Chain, SDValue Callee,
                 CallingConv::ID CallConv, bool isVarArg, bool &isTailCall,
                 const SmallVectorImpl<ISD::OutputArg> &Outs,
                 const SmallVectorImpl<ISD::InputArg> &Ins,
                 DebugLoc dl, SelectionDAG &DAG,
-                SmallVectorImpl<SDValue> &InVals);
+                SmallVectorImpl<SDValue> &InVals) const;
 
     virtual SDValue
       LowerReturn(SDValue Chain,
                   CallingConv::ID CallConv, bool isVarArg,
                   const SmallVectorImpl<ISD::OutputArg> &Outs,
-                  DebugLoc dl, SelectionDAG &DAG);
+                  DebugLoc dl, SelectionDAG &DAG) const;
 
     virtual bool
       CanLowerReturn(CallingConv::ID CallConv, bool isVarArg,
                      const SmallVectorImpl<EVT> &OutTys,
                      const SmallVectorImpl<ISD::ArgFlagsTy> &ArgsFlags,
-                     SelectionDAG &DAG);
+                     SelectionDAG &DAG) const;
 
     void ReplaceATOMIC_BINARY_64(SDNode *N, SmallVectorImpl<SDValue> &Results,
-                                 SelectionDAG &DAG, unsigned NewOp);
+                                 SelectionDAG &DAG, unsigned NewOp) const;
 
     SDValue EmitTargetCodeForMemset(SelectionDAG &DAG, DebugLoc dl,
                                     SDValue Chain,
                                     SDValue Dst, SDValue Src,
                                     SDValue Size, unsigned Align,
                                     bool isVolatile,
-                                    const Value *DstSV, uint64_t DstSVOff);
+                                    const Value *DstSV,
+                                    uint64_t DstSVOff) const;
     SDValue EmitTargetCodeForMemcpy(SelectionDAG &DAG, DebugLoc dl,
                                     SDValue Chain,
                                     SDValue Dst, SDValue Src,
                                     SDValue Size, unsigned Align,
                                     bool isVolatile, bool AlwaysInline,
-                                    const Value *DstSV, uint64_t DstSVOff,
-                                    const Value *SrcSV, uint64_t SrcSVOff);
+                                    const Value *DstSV,
+                                    uint64_t DstSVOff,
+                                    const Value *SrcSV,
+                                    uint64_t SrcSVOff) const;
     
     /// Utility function to emit string processing sse4.2 instructions
     /// that return in xmm0.
@@ -796,30 +806,29 @@ namespace llvm {
                                                    MachineBasicBlock *BB) const;
 
     MachineBasicBlock *EmitLoweredSelect(MachineInstr *I,
-                                         MachineBasicBlock *BB,
-                    DenseMap<MachineBasicBlock*, MachineBasicBlock*> *EM) const;
+                                         MachineBasicBlock *BB) const;
 
     MachineBasicBlock *EmitLoweredMingwAlloca(MachineInstr *MI,
-                                              MachineBasicBlock *BB,
-                    DenseMap<MachineBasicBlock*, MachineBasicBlock*> *EM) const;
+                                              MachineBasicBlock *BB) const;
 
     /// Emit nodes that will be selected as "test Op0,Op0", or something
     /// equivalent, for use with the given x86 condition code.
-    SDValue EmitTest(SDValue Op0, unsigned X86CC, SelectionDAG &DAG);
+    SDValue EmitTest(SDValue Op0, unsigned X86CC, SelectionDAG &DAG) const;
 
     /// Emit nodes that will be selected as "cmp Op0,Op1", or something
     /// equivalent, for use with the given x86 condition code.
     SDValue EmitCmp(SDValue Op0, SDValue Op1, unsigned X86CC,
-                    SelectionDAG &DAG);
+                    SelectionDAG &DAG) const;
   };
 
   namespace X86 {
     FastISel *createFastISel(MachineFunction &mf,
                            DenseMap<const Value *, unsigned> &,
                            DenseMap<const BasicBlock *, MachineBasicBlock *> &,
-                           DenseMap<const AllocaInst *, int> &
+                           DenseMap<const AllocaInst *, int> &,
+                           std::vector<std::pair<MachineInstr*, unsigned> > &
 #ifndef NDEBUG
-                           , SmallSet<Instruction*, 8> &
+                           , SmallSet<const Instruction*, 8> &
 #endif
                            );
   }
diff --git a/lib/Target/X86/X86Instr64bit.td b/lib/Target/X86/X86Instr64bit.td
index eef2ca078967..f5c3dbf2ad28 100644
--- a/lib/Target/X86/X86Instr64bit.td
+++ b/lib/Target/X86/X86Instr64bit.td
@@ -2086,6 +2086,11 @@ def : Pat<(and (srl_su GR32:$src, (i8 8)), (i32 255)),
             (EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS GR32:$src, GR32_ABCD)),
                             x86_subreg_8bit_hi))>,
       Requires<[In64BitMode]>;
+def : Pat<(srl (and_su GR32:$src, 0xff00), (i8 8)),
+          (MOVZX32_NOREXrr8 (EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS GR32:$src, 
+                                                                   GR32_ABCD)),
+                                             x86_subreg_8bit_hi))>,
+      Requires<[In64BitMode]>;
 def : Pat<(srl GR16:$src, (i8 8)),
           (EXTRACT_SUBREG
             (MOVZX32_NOREXrr8
@@ -2156,21 +2161,6 @@ def : Pat<(sra GR64:$src1, (and CL, 63)),
 def : Pat<(store (sra (loadi64 addr:$dst), (and CL, 63)), addr:$dst),
           (SAR64mCL addr:$dst)>;
 
-// Double shift patterns
-def : Pat<(shrd GR64:$src1, (i8 imm:$amt1), GR64:$src2, (i8 imm)),
-          (SHRD64rri8 GR64:$src1, GR64:$src2, (i8 imm:$amt1))>;
-
-def : Pat<(store (shrd (loadi64 addr:$dst), (i8 imm:$amt1),
-                       GR64:$src2, (i8 imm)), addr:$dst),
-          (SHRD64mri8 addr:$dst, GR64:$src2, (i8 imm:$amt1))>;
-
-def : Pat<(shld GR64:$src1, (i8 imm:$amt1), GR64:$src2, (i8 imm)),
-          (SHLD64rri8 GR64:$src1, GR64:$src2, (i8 imm:$amt1))>;
-
-def : Pat<(store (shld (loadi64 addr:$dst), (i8 imm:$amt1),
-                       GR64:$src2, (i8 imm)), addr:$dst),
-          (SHLD64mri8 addr:$dst, GR64:$src2, (i8 imm:$amt1))>;
-
 // (or x1, x2) -> (add x1, x2) if two operands are known not to share bits.
 let AddedComplexity = 5 in {  // Try this before the selecting to OR
 def : Pat<(or_is_add GR64:$src1, i64immSExt8:$src2),
@@ -2294,7 +2284,7 @@ def MOVPQIto64rr  : RPDI<0x7E, MRMDestReg, (outs GR64:$dst), (ins VR128:$src),
 def MOV64toSDrr : RPDI<0x6E, MRMSrcReg, (outs FR64:$dst), (ins GR64:$src),
                        "mov{d|q}\t{$src, $dst|$dst, $src}",
                        [(set FR64:$dst, (bitconvert GR64:$src))]>;
-def MOV64toSDrm : RPDI<0x6E, MRMSrcMem, (outs FR64:$dst), (ins i64mem:$src),
+def MOV64toSDrm : S3SI<0x7E, MRMSrcMem, (outs FR64:$dst), (ins i64mem:$src),
                        "movq\t{$src, $dst|$dst, $src}",
                        [(set FR64:$dst, (bitconvert (loadi64 addr:$src)))]>;
 
@@ -2347,15 +2337,3 @@ let isTwoAddress = 1 in {
 }
 
 defm PINSRQ      : SS41I_insert64<0x22, "pinsrq">;
-
-// -disable-16bit support.
-def : Pat<(truncstorei16 (i16 imm:$src), addr:$dst),
-          (MOV16mi addr:$dst, imm:$src)>;
-def : Pat<(truncstorei16 GR64:$src, addr:$dst),
-          (MOV16mr addr:$dst, (EXTRACT_SUBREG GR64:$src, x86_subreg_16bit))>;
-def : Pat<(i64 (sextloadi16 addr:$dst)),
-          (MOVSX64rm16 addr:$dst)>;
-def : Pat<(i64 (zextloadi16 addr:$dst)),
-          (MOVZX64rm16 addr:$dst)>;
-def : Pat<(i64 (extloadi16 addr:$dst)),
-          (MOVZX64rm16 addr:$dst)>;
diff --git a/lib/Target/X86/X86InstrBuilder.h b/lib/Target/X86/X86InstrBuilder.h
index c475b56d12f4..5a82a7b1979b 100644
--- a/lib/Target/X86/X86InstrBuilder.h
+++ b/lib/Target/X86/X86InstrBuilder.h
@@ -49,7 +49,7 @@ struct X86AddressMode {
   unsigned Scale;
   unsigned IndexReg;
   int Disp;
-  GlobalValue *GV;
+  const GlobalValue *GV;
   unsigned GVOpFlags;
 
   X86AddressMode()
diff --git a/lib/Target/X86/X86InstrFPStack.td b/lib/Target/X86/X86InstrFPStack.td
index e6d1fee16f61..0aae4a8653b0 100644
--- a/lib/Target/X86/X86InstrFPStack.td
+++ b/lib/Target/X86/X86InstrFPStack.td
@@ -327,8 +327,8 @@ def TST_F  : FPI<0xE4, RawFrm, (outs), (ins), "ftst">, D9;
 
 // Versions of FP instructions that take a single memory operand.  Added for the
 //   disassembler; remove as they are included with patterns elsewhere.
-def FCOM32m  : FPI<0xD8, MRM2m, (outs), (ins f32mem:$src), "fcom{l}\t$src">;
-def FCOMP32m : FPI<0xD8, MRM3m, (outs), (ins f32mem:$src), "fcomp{l}\t$src">;
+def FCOM32m  : FPI<0xD8, MRM2m, (outs), (ins f32mem:$src), "fcom{s}\t$src">;
+def FCOMP32m : FPI<0xD8, MRM3m, (outs), (ins f32mem:$src), "fcomp{s}\t$src">;
 
 def FLDENVm  : FPI<0xD9, MRM4m, (outs), (ins f32mem:$src), "fldenv\t$src">;
 def FSTENVm  : FPI<0xD9, MRM6m, (outs f32mem:$dst), (ins), "fnstenv\t$dst">;
@@ -336,15 +336,15 @@ def FSTENVm  : FPI<0xD9, MRM6m, (outs f32mem:$dst), (ins), "fnstenv\t$dst">;
 def FICOM32m : FPI<0xDA, MRM2m, (outs), (ins i32mem:$src), "ficom{l}\t$src">;
 def FICOMP32m: FPI<0xDA, MRM3m, (outs), (ins i32mem:$src), "ficomp{l}\t$src">;
 
-def FCOM64m  : FPI<0xDC, MRM2m, (outs), (ins f64mem:$src), "fcom{ll}\t$src">;
-def FCOMP64m : FPI<0xDC, MRM3m, (outs), (ins f64mem:$src), "fcomp{ll}\t$src">;
+def FCOM64m  : FPI<0xDC, MRM2m, (outs), (ins f64mem:$src), "fcom{l}\t$src">;
+def FCOMP64m : FPI<0xDC, MRM3m, (outs), (ins f64mem:$src), "fcomp{l}\t$src">;
 
 def FRSTORm  : FPI<0xDD, MRM4m, (outs f32mem:$dst), (ins), "frstor\t$dst">;
 def FSAVEm   : FPI<0xDD, MRM6m, (outs f32mem:$dst), (ins), "fnsave\t$dst">;
 def FNSTSWm  : FPI<0xDD, MRM7m, (outs f32mem:$dst), (ins), "fnstsw\t$dst">;
 
-def FICOM16m : FPI<0xDE, MRM2m, (outs), (ins i16mem:$src), "ficom{w}\t$src">;
-def FICOMP16m: FPI<0xDE, MRM3m, (outs), (ins i16mem:$src), "ficomp{w}\t$src">;
+def FICOM16m : FPI<0xDE, MRM2m, (outs), (ins i16mem:$src), "ficom{s}\t$src">;
+def FICOMP16m: FPI<0xDE, MRM3m, (outs), (ins i16mem:$src), "ficomp{s}\t$src">;
 
 def FBLDm    : FPI<0xDF, MRM4m, (outs), (ins f32mem:$src), "fbld\t$src">;
 def FBSTPm   : FPI<0xDF, MRM6m, (outs f32mem:$dst), (ins), "fbstp\t$dst">;
diff --git a/lib/Target/X86/X86InstrInfo.cpp b/lib/Target/X86/X86InstrInfo.cpp
index ccb7b055b079..a21bfb9ea9d0 100644
--- a/lib/Target/X86/X86InstrInfo.cpp
+++ b/lib/Target/X86/X86InstrInfo.cpp
@@ -27,6 +27,7 @@
 #include "llvm/CodeGen/MachineRegisterInfo.h"
 #include "llvm/CodeGen/LiveVariables.h"
 #include "llvm/CodeGen/PseudoSourceValue.h"
+#include "llvm/MC/MCInst.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/ErrorHandling.h"
@@ -1684,6 +1685,7 @@ bool X86InstrInfo::AnalyzeBranch(MachineBasicBlock &MBB,
   // Start from the bottom of the block and work up, examining the
   // terminator instructions.
   MachineBasicBlock::iterator I = MBB.end();
+  MachineBasicBlock::iterator UnCondBrIter = MBB.end();
   while (I != MBB.begin()) {
     --I;
     if (I->isDebugValue())
@@ -1701,6 +1703,8 @@ bool X86InstrInfo::AnalyzeBranch(MachineBasicBlock &MBB,
 
     // Handle unconditional branches.
     if (I->getOpcode() == X86::JMP_4) {
+      UnCondBrIter = I;
+
       if (!AllowModify) {
         TBB = I->getOperand(0).getMBB();
         continue;
@@ -1718,10 +1722,11 @@ bool X86InstrInfo::AnalyzeBranch(MachineBasicBlock &MBB,
         TBB = 0;
         I->eraseFromParent();
         I = MBB.end();
+        UnCondBrIter = MBB.end();
         continue;
       }
 
-      // TBB is used to indicate the unconditinal destination.
+      // TBB is used to indicate the unconditional destination.
       TBB = I->getOperand(0).getMBB();
       continue;
     }
@@ -1733,6 +1738,45 @@ bool X86InstrInfo::AnalyzeBranch(MachineBasicBlock &MBB,
 
     // Working from the bottom, handle the first conditional branch.
     if (Cond.empty()) {
+      MachineBasicBlock *TargetBB = I->getOperand(0).getMBB();
+      if (AllowModify && UnCondBrIter != MBB.end() &&
+          MBB.isLayoutSuccessor(TargetBB)) {
+        // If we can modify the code and it ends in something like:
+        //
+        //     jCC L1
+        //     jmp L2
+        //   L1:
+        //     ...
+        //   L2:
+        //
+        // Then we can change this to:
+        //
+        //     jnCC L2
+        //   L1:
+        //     ...
+        //   L2:
+        //
+        // Which is a bit more efficient.
+        // We conditionally jump to the fall-through block.
+        BranchCode = GetOppositeBranchCondition(BranchCode);
+        unsigned JNCC = GetCondBranchFromCond(BranchCode);
+        MachineBasicBlock::iterator OldInst = I;
+
+        BuildMI(MBB, UnCondBrIter, MBB.findDebugLoc(I), get(JNCC))
+          .addMBB(UnCondBrIter->getOperand(0).getMBB());
+        BuildMI(MBB, UnCondBrIter, MBB.findDebugLoc(I), get(X86::JMP_4))
+          .addMBB(TargetBB);
+        MBB.addSuccessor(TargetBB);
+
+        OldInst->eraseFromParent();
+        UnCondBrIter->eraseFromParent();
+
+        // Restart the analysis.
+        UnCondBrIter = MBB.end();
+        I = MBB.end();
+        continue;
+      }
+
       FBB = TBB;
       TBB = I->getOperand(0).getMBB();
       Cond.push_back(MachineOperand::CreateImm(BranchCode));
@@ -2276,6 +2320,19 @@ bool X86InstrInfo::restoreCalleeSavedRegisters(MachineBasicBlock &MBB,
   return true;
 }
 
+MachineInstr*
+X86InstrInfo::emitFrameIndexDebugValue(MachineFunction &MF,
+                                       int FrameIx, uint64_t Offset,
+                                       const MDNode *MDPtr,
+                                       DebugLoc DL) const {
+  X86AddressMode AM;
+  AM.BaseType = X86AddressMode::FrameIndexBase;
+  AM.Base.FrameIndex = FrameIx;
+  MachineInstrBuilder MIB = BuildMI(MF, DL, get(X86::DBG_VALUE));
+  addFullAddress(MIB, AM).addImm(Offset).addMetadata(MDPtr);
+  return &*MIB;
+}
+
 static MachineInstr *FuseTwoAddrInst(MachineFunction &MF, unsigned Opcode,
                                      const SmallVectorImpl<MachineOperand> &MOs,
                                      MachineInstr *MI,
@@ -2586,7 +2643,7 @@ MachineInstr* X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF,
       Ty = Type::getDoubleTy(MF.getFunction()->getContext());
     else
       Ty = VectorType::get(Type::getInt32Ty(MF.getFunction()->getContext()), 4);
-    Constant *C = LoadMI->getOpcode() == X86::V_SETALLONES ?
+    const Constant *C = LoadMI->getOpcode() == X86::V_SETALLONES ?
                     Constant::getAllOnesValue(Ty) :
                     Constant::getNullValue(Ty);
     unsigned CPI = MCP.getConstantPoolIndex(C, Alignment);
@@ -3406,6 +3463,7 @@ static unsigned GetInstSizeWithDesc(const MachineInstr &MI,
     }
     case TargetOpcode::DBG_LABEL:
     case TargetOpcode::EH_LABEL:
+    case TargetOpcode::DBG_VALUE:
       break;
     case TargetOpcode::IMPLICIT_DEF:
     case TargetOpcode::KILL:
@@ -3603,7 +3661,7 @@ static unsigned GetInstSizeWithDesc(const MachineInstr &MI,
     std::string msg;
     raw_string_ostream Msg(msg);
     Msg << "Cannot determine size: " << MI;
-    llvm_report_error(Msg.str());
+    report_fatal_error(Msg.str());
   }
   
 
@@ -3709,3 +3767,9 @@ void X86InstrInfo::SetSSEDomain(MachineInstr *MI, unsigned Domain) const {
   assert(table && "Cannot change domain");
   MI->setDesc(get(table[Domain-1]));
 }
+
+/// getNoopForMachoTarget - Return the noop instruction to use for a noop.
+void X86InstrInfo::getNoopForMachoTarget(MCInst &NopInst) const {
+  NopInst.setOpcode(X86::NOOP);
+}
+
diff --git a/lib/Target/X86/X86InstrInfo.h b/lib/Target/X86/X86InstrInfo.h
index f0bdd06cce88..df99c7fd63c4 100644
--- a/lib/Target/X86/X86InstrInfo.h
+++ b/lib/Target/X86/X86InstrInfo.h
@@ -623,6 +623,12 @@ public:
                                            MachineBasicBlock::iterator MI,
                                  const std::vector<CalleeSavedInfo> &CSI) const;
   
+  virtual
+  MachineInstr *emitFrameIndexDebugValue(MachineFunction &MF,
+                                         int FrameIx, uint64_t Offset,
+                                         const MDNode *MDPtr,
+                                         DebugLoc DL) const;
+
   /// foldMemoryOperand - If this target supports it, fold a load or store of
   /// the specified stack slot into the specified machine instruction for the
   /// specified operand(s).  If this is possible, the target should perform the
@@ -687,6 +693,8 @@ public:
                                        int64_t Offset1, int64_t Offset2,
                                        unsigned NumLoads) const;
 
+  virtual void getNoopForMachoTarget(MCInst &NopInst) const;
+
   virtual
   bool ReverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const;
 
diff --git a/lib/Target/X86/X86InstrInfo.td b/lib/Target/X86/X86InstrInfo.td
index 940b439d22a9..a2754eac2154 100644
--- a/lib/Target/X86/X86InstrInfo.td
+++ b/lib/Target/X86/X86InstrInfo.td
@@ -487,34 +487,6 @@ def or_is_add : PatFrag<(ops node:$lhs, node:$rhs), (or node:$lhs, node:$rhs),[{
   return (~KnownZero0 & ~KnownZero1) == 0;
 }]>;
 
-// 'shld' and 'shrd' instruction patterns. Note that even though these have
-// the srl and shl in their patterns, the C++ code must still check for them,
-// because predicates are tested before children nodes are explored.
-
-def shrd : PatFrag<(ops node:$src1, node:$amt1, node:$src2, node:$amt2),
-                   (or (srl node:$src1, node:$amt1),
-                       (shl node:$src2, node:$amt2)), [{
-  assert(N->getOpcode() == ISD::OR);
-  return N->getOperand(0).getOpcode() == ISD::SRL &&
-         N->getOperand(1).getOpcode() == ISD::SHL &&
-         isa<ConstantSDNode>(N->getOperand(0).getOperand(1)) &&
-         isa<ConstantSDNode>(N->getOperand(1).getOperand(1)) &&
-         N->getOperand(0).getConstantOperandVal(1) ==
-         N->getValueSizeInBits(0) - N->getOperand(1).getConstantOperandVal(1);
-}]>;
-
-def shld : PatFrag<(ops node:$src1, node:$amt1, node:$src2, node:$amt2),
-                   (or (shl node:$src1, node:$amt1),
-                       (srl node:$src2, node:$amt2)), [{
-  assert(N->getOpcode() == ISD::OR);
-  return N->getOperand(0).getOpcode() == ISD::SHL &&
-         N->getOperand(1).getOpcode() == ISD::SRL &&
-         isa<ConstantSDNode>(N->getOperand(0).getOperand(1)) &&
-         isa<ConstantSDNode>(N->getOperand(1).getOperand(1)) &&
-         N->getOperand(0).getConstantOperandVal(1) ==
-         N->getValueSizeInBits(0) - N->getOperand(1).getConstantOperandVal(1);
-}]>;
-
 //===----------------------------------------------------------------------===//
 // Instruction list...
 //
@@ -781,11 +753,11 @@ def PUSH32rmm: I<0xFF, MRM6m, (outs), (ins i32mem:$src), "push{l}\t$src",[]>;
 }
 
 let Defs = [ESP], Uses = [ESP], neverHasSideEffects = 1, mayStore = 1 in {
-def PUSH32i8   : Ii8<0x6a, RawFrm, (outs), (ins i8imm:$imm), 
-                     "push{l}\t$imm", []>;
-def PUSH32i16  : Ii16<0x68, RawFrm, (outs), (ins i16imm:$imm), 
+def PUSHi8   : Ii8<0x6a, RawFrm, (outs), (ins i32i8imm:$imm), 
                       "push{l}\t$imm", []>;
-def PUSH32i32  : Ii32<0x68, RawFrm, (outs), (ins i32imm:$imm), 
+def PUSHi16  : Ii16<0x68, RawFrm, (outs), (ins i16imm:$imm), 
+                      "push{w}\t$imm", []>, OpSize;
+def PUSHi32  : Ii32<0x68, RawFrm, (outs), (ins i32imm:$imm), 
                       "push{l}\t$imm", []>;
 }
 
@@ -809,10 +781,11 @@ let isTwoAddress = 1 in                               // GR32 = bswap GR32
 let Defs = [EFLAGS] in {
 def BSF16rr  : I<0xBC, MRMSrcReg, (outs GR16:$dst), (ins GR16:$src),
                  "bsf{w}\t{$src, $dst|$dst, $src}",
-                 [(set GR16:$dst, EFLAGS, (X86bsf GR16:$src))]>, TB;
+                 [(set GR16:$dst, EFLAGS, (X86bsf GR16:$src))]>, TB, OpSize;
 def BSF16rm  : I<0xBC, MRMSrcMem, (outs GR16:$dst), (ins i16mem:$src),
                  "bsf{w}\t{$src, $dst|$dst, $src}",
-                 [(set GR16:$dst, EFLAGS, (X86bsf (loadi16 addr:$src)))]>, TB;
+                 [(set GR16:$dst, EFLAGS, (X86bsf (loadi16 addr:$src)))]>, TB,
+                 OpSize;
 def BSF32rr  : I<0xBC, MRMSrcReg, (outs GR32:$dst), (ins GR32:$src),
                  "bsf{l}\t{$src, $dst|$dst, $src}",
                  [(set GR32:$dst, EFLAGS, (X86bsf GR32:$src))]>, TB;
@@ -822,10 +795,11 @@ def BSF32rm  : I<0xBC, MRMSrcMem, (outs GR32:$dst), (ins i32mem:$src),
 
 def BSR16rr  : I<0xBD, MRMSrcReg, (outs GR16:$dst), (ins GR16:$src),
                  "bsr{w}\t{$src, $dst|$dst, $src}",
-                 [(set GR16:$dst, EFLAGS, (X86bsr GR16:$src))]>, TB;
+                 [(set GR16:$dst, EFLAGS, (X86bsr GR16:$src))]>, TB, OpSize;
 def BSR16rm  : I<0xBD, MRMSrcMem, (outs GR16:$dst), (ins i16mem:$src),
                  "bsr{w}\t{$src, $dst|$dst, $src}",
-                 [(set GR16:$dst, EFLAGS, (X86bsr (loadi16 addr:$src)))]>, TB;
+                 [(set GR16:$dst, EFLAGS, (X86bsr (loadi16 addr:$src)))]>, TB,
+                 OpSize;
 def BSR32rr  : I<0xBD, MRMSrcReg, (outs GR32:$dst), (ins GR32:$src),
                  "bsr{l}\t{$src, $dst|$dst, $src}",
                  [(set GR32:$dst, EFLAGS, (X86bsr GR32:$src))]>, TB;
@@ -4476,7 +4450,11 @@ def : Pat<(extloadi32i16 addr:$src), (MOVZX32rm16 addr:$src)>;
 // avoid partial-register updates.
 def : Pat<(i16 (anyext GR8 :$src)), (MOVZX16rr8  GR8 :$src)>;
 def : Pat<(i32 (anyext GR8 :$src)), (MOVZX32rr8  GR8 :$src)>;
-def : Pat<(i32 (anyext GR16:$src)), (MOVZX32rr16 GR16:$src)>;
+
+// Except for i16 -> i32 since isel expect i16 ops to be promoted to i32.
+def : Pat<(i32 (anyext GR16:$src)),
+          (INSERT_SUBREG (i32 (IMPLICIT_DEF)), GR16:$src, x86_subreg_16bit)>;
+
 
 //===----------------------------------------------------------------------===//
 // Some peepholes
@@ -4537,11 +4515,11 @@ def : Pat<(i8 (trunc GR16:$src)),
 
 // h-register tricks
 def : Pat<(i8 (trunc (srl_su GR16:$src, (i8 8)))),
-          (EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS GR16:$src, GR16_ABCD)),
+          (EXTRACT_SUBREG (i16 (COPY_TO_REGCLASS GR16:$src, GR16_ABCD)),
                           x86_subreg_8bit_hi)>,
       Requires<[In32BitMode]>;
 def : Pat<(i8 (trunc (srl_su GR32:$src, (i8 8)))),
-          (EXTRACT_SUBREG (i16 (COPY_TO_REGCLASS GR32:$src, GR32_ABCD)),
+          (EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS GR32:$src, GR32_ABCD)),
                           x86_subreg_8bit_hi)>,
       Requires<[In32BitMode]>;
 def : Pat<(srl GR16:$src, (i8 8)),
@@ -4566,6 +4544,11 @@ def : Pat<(and (srl_su GR32:$src, (i8 8)), (i32 255)),
                                                              GR32_ABCD)),
                                       x86_subreg_8bit_hi))>,
       Requires<[In32BitMode]>;
+def : Pat<(srl (and_su GR32:$src, 0xff00), (i8 8)),
+          (MOVZX32rr8 (EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS GR32:$src, 
+                                                             GR32_ABCD)),
+                                      x86_subreg_8bit_hi))>,
+      Requires<[In32BitMode]>;
 
 // (shl x, 1) ==> (add x, x)
 def : Pat<(shl GR8 :$src1, (i8 1)), (ADD8rr  GR8 :$src1, GR8 :$src1)>;
@@ -4612,111 +4595,13 @@ def : Pat<(store (sra (loadi16 addr:$dst), (and CL, 31)), addr:$dst),
 def : Pat<(store (sra (loadi32 addr:$dst), (and CL, 31)), addr:$dst),
           (SAR32mCL addr:$dst)>;
 
-// (or (x >> c) | (y << (32 - c))) ==> (shrd32 x, y, c)
-def : Pat<(or (srl GR32:$src1, CL:$amt),
-              (shl GR32:$src2, (sub 32, CL:$amt))),
-          (SHRD32rrCL GR32:$src1, GR32:$src2)>;
-
-def : Pat<(store (or (srl (loadi32 addr:$dst), CL:$amt),
-                     (shl GR32:$src2, (sub 32, CL:$amt))), addr:$dst),
-          (SHRD32mrCL addr:$dst, GR32:$src2)>;
-
-def : Pat<(or (srl GR32:$src1, (i8 (trunc ECX:$amt))),
-              (shl GR32:$src2, (i8 (trunc (sub 32, ECX:$amt))))),
-          (SHRD32rrCL GR32:$src1, GR32:$src2)>;
-
-def : Pat<(store (or (srl (loadi32 addr:$dst), (i8 (trunc ECX:$amt))),
-                     (shl GR32:$src2, (i8 (trunc (sub 32, ECX:$amt))))),
-                 addr:$dst),
-          (SHRD32mrCL addr:$dst, GR32:$src2)>;
-
-def : Pat<(shrd GR32:$src1, (i8 imm:$amt1), GR32:$src2, (i8 imm/*:$amt2*/)),
-          (SHRD32rri8 GR32:$src1, GR32:$src2, (i8 imm:$amt1))>;
-
-def : Pat<(store (shrd (loadi32 addr:$dst), (i8 imm:$amt1),
-                       GR32:$src2, (i8 imm/*:$amt2*/)), addr:$dst),
-          (SHRD32mri8 addr:$dst, GR32:$src2, (i8 imm:$amt1))>;
-
-// (or (x << c) | (y >> (32 - c))) ==> (shld32 x, y, c)
-def : Pat<(or (shl GR32:$src1, CL:$amt),
-              (srl GR32:$src2, (sub 32, CL:$amt))),
-          (SHLD32rrCL GR32:$src1, GR32:$src2)>;
-
-def : Pat<(store (or (shl (loadi32 addr:$dst), CL:$amt),
-                     (srl GR32:$src2, (sub 32, CL:$amt))), addr:$dst),
-          (SHLD32mrCL addr:$dst, GR32:$src2)>;
-
-def : Pat<(or (shl GR32:$src1, (i8 (trunc ECX:$amt))),
-              (srl GR32:$src2, (i8 (trunc (sub 32, ECX:$amt))))),
-          (SHLD32rrCL GR32:$src1, GR32:$src2)>;
-
-def : Pat<(store (or (shl (loadi32 addr:$dst), (i8 (trunc ECX:$amt))),
-                     (srl GR32:$src2, (i8 (trunc (sub 32, ECX:$amt))))),
-                 addr:$dst),
-          (SHLD32mrCL addr:$dst, GR32:$src2)>;
-
-def : Pat<(shld GR32:$src1, (i8 imm:$amt1), GR32:$src2, (i8 imm/*:$amt2*/)),
-          (SHLD32rri8 GR32:$src1, GR32:$src2, (i8 imm:$amt1))>;
-
-def : Pat<(store (shld (loadi32 addr:$dst), (i8 imm:$amt1),
-                       GR32:$src2, (i8 imm/*:$amt2*/)), addr:$dst),
-          (SHLD32mri8 addr:$dst, GR32:$src2, (i8 imm:$amt1))>;
-
-// (or (x >> c) | (y << (16 - c))) ==> (shrd16 x, y, c)
-def : Pat<(or (srl GR16:$src1, CL:$amt),
-              (shl GR16:$src2, (sub 16, CL:$amt))),
-          (SHRD16rrCL GR16:$src1, GR16:$src2)>;
-
-def : Pat<(store (or (srl (loadi16 addr:$dst), CL:$amt),
-                     (shl GR16:$src2, (sub 16, CL:$amt))), addr:$dst),
-          (SHRD16mrCL addr:$dst, GR16:$src2)>;
-
-def : Pat<(or (srl GR16:$src1, (i8 (trunc CX:$amt))),
-              (shl GR16:$src2, (i8 (trunc (sub 16, CX:$amt))))),
-          (SHRD16rrCL GR16:$src1, GR16:$src2)>;
-
-def : Pat<(store (or (srl (loadi16 addr:$dst), (i8 (trunc CX:$amt))),
-                     (shl GR16:$src2, (i8 (trunc (sub 16, CX:$amt))))),
-                 addr:$dst),
-          (SHRD16mrCL addr:$dst, GR16:$src2)>;
-
-def : Pat<(shrd GR16:$src1, (i8 imm:$amt1), GR16:$src2, (i8 imm/*:$amt2*/)),
-          (SHRD16rri8 GR16:$src1, GR16:$src2, (i8 imm:$amt1))>;
-
-def : Pat<(store (shrd (loadi16 addr:$dst), (i8 imm:$amt1),
-                       GR16:$src2, (i8 imm/*:$amt2*/)), addr:$dst),
-          (SHRD16mri8 addr:$dst, GR16:$src2, (i8 imm:$amt1))>;
-
-// (or (x << c) | (y >> (16 - c))) ==> (shld16 x, y, c)
-def : Pat<(or (shl GR16:$src1, CL:$amt),
-              (srl GR16:$src2, (sub 16, CL:$amt))),
-          (SHLD16rrCL GR16:$src1, GR16:$src2)>;
-
-def : Pat<(store (or (shl (loadi16 addr:$dst), CL:$amt),
-                     (srl GR16:$src2, (sub 16, CL:$amt))), addr:$dst),
-          (SHLD16mrCL addr:$dst, GR16:$src2)>;
-
-def : Pat<(or (shl GR16:$src1, (i8 (trunc CX:$amt))),
-              (srl GR16:$src2, (i8 (trunc (sub 16, CX:$amt))))),
-          (SHLD16rrCL GR16:$src1, GR16:$src2)>;
-
-def : Pat<(store (or (shl (loadi16 addr:$dst), (i8 (trunc CX:$amt))),
-                     (srl GR16:$src2, (i8 (trunc (sub 16, CX:$amt))))),
-                 addr:$dst),
-          (SHLD16mrCL addr:$dst, GR16:$src2)>;
-
-def : Pat<(shld GR16:$src1, (i8 imm:$amt1), GR16:$src2, (i8 imm/*:$amt2*/)),
-          (SHLD16rri8 GR16:$src1, GR16:$src2, (i8 imm:$amt1))>;
-
-def : Pat<(store (shld (loadi16 addr:$dst), (i8 imm:$amt1),
-                       GR16:$src2, (i8 imm/*:$amt2*/)), addr:$dst),
-          (SHLD16mri8 addr:$dst, GR16:$src2, (i8 imm:$amt1))>;
-
 // (anyext (setcc_carry)) -> (setcc_carry)
 def : Pat<(i16 (anyext (i8 (X86setcc_c X86_COND_B, EFLAGS)))),
           (SETB_C16r)>;
 def : Pat<(i32 (anyext (i8 (X86setcc_c X86_COND_B, EFLAGS)))),
           (SETB_C32r)>;
+def : Pat<(i32 (anyext (i16 (X86setcc_c X86_COND_B, EFLAGS)))),
+          (SETB_C32r)>;
 
 // (or x1, x2) -> (add x1, x2) if two operands are known not to share bits.
 let AddedComplexity = 5 in { // Try this before the selecting to OR
@@ -4907,18 +4792,6 @@ def : Pat<(and GR16:$src1, i16immSExt8:$src2),
 def : Pat<(and GR32:$src1, i32immSExt8:$src2),
           (AND32ri8 GR32:$src1, i32immSExt8:$src2)>;
 
-// -disable-16bit support.
-def : Pat<(truncstorei16 (i16 imm:$src), addr:$dst),
-          (MOV16mi addr:$dst, imm:$src)>;
-def : Pat<(truncstorei16 GR32:$src, addr:$dst),
-          (MOV16mr addr:$dst, (EXTRACT_SUBREG GR32:$src, x86_subreg_16bit))>;
-def : Pat<(i32 (sextloadi16 addr:$dst)),
-          (MOVSX32rm16 addr:$dst)>;
-def : Pat<(i32 (zextloadi16 addr:$dst)),
-          (MOVZX32rm16 addr:$dst)>;
-def : Pat<(i32 (extloadi16 addr:$dst)),
-          (MOVZX32rm16 addr:$dst)>;
-
 //===----------------------------------------------------------------------===//
 // Floating Point Stack Support
 //===----------------------------------------------------------------------===//
diff --git a/lib/Target/X86/X86InstrMMX.td b/lib/Target/X86/X86InstrMMX.td
index 1c81c5e981ad..744af50e3e45 100644
--- a/lib/Target/X86/X86InstrMMX.td
+++ b/lib/Target/X86/X86InstrMMX.td
@@ -117,7 +117,7 @@ def MMX_MOVD64mr : MMXI<0x7E, MRMDestMem, (outs), (ins i32mem:$dst, VR64:$src),
                         "movd\t{$src, $dst|$dst, $src}", []>;
 def MMX_MOVD64grr : MMXI<0x7E, MRMDestReg, (outs), (ins GR32:$dst, VR64:$src),
                         "movd\t{$src, $dst|$dst, $src}", []>;
-def MMX_MOVQ64gmr : MMXRI<0x7E, MRMDestMem, (outs), 
+def MMX_MOVQ64gmr : MMXRI<0x7F, MRMDestMem, (outs), 
                          (ins i64mem:$dst, VR64:$src),
                          "movq\t{$src, $dst|$dst, $src}", []>;
 
@@ -167,6 +167,9 @@ let neverHasSideEffects = 1 in
 def MMX_MOVQ2FR64rr: SSDIi8<0xD6, MRMSrcReg, (outs FR64:$dst), (ins VR64:$src),
                            "movq2dq\t{$src, $dst|$dst, $src}", []>;
 
+def MMX_MOVFR642Qrr: SSDIi8<0xD6, MRMSrcReg, (outs VR64:$dst), (ins FR64:$src),
+                           "movdq2q\t{$src, $dst|$dst, $src}", []>;
+
 def MMX_MOVNTQmr  : MMXI<0xE7, MRMDestMem, (outs), (ins i64mem:$dst, VR64:$src),
                          "movntq\t{$src, $dst|$dst, $src}",
                          [(int_x86_mmx_movnt_dq addr:$dst, VR64:$src)]>;
@@ -569,6 +572,14 @@ def : Pat<(f64 (bitconvert (v4i16 VR64:$src))),
           (MMX_MOVQ2FR64rr VR64:$src)>;
 def : Pat<(f64 (bitconvert (v8i8 VR64:$src))),
           (MMX_MOVQ2FR64rr VR64:$src)>;
+def : Pat<(v1i64 (bitconvert (f64 FR64:$src))),
+          (MMX_MOVFR642Qrr FR64:$src)>;
+def : Pat<(v2i32 (bitconvert (f64 FR64:$src))),
+          (MMX_MOVFR642Qrr FR64:$src)>;
+def : Pat<(v4i16 (bitconvert (f64 FR64:$src))),
+          (MMX_MOVFR642Qrr FR64:$src)>;
+def : Pat<(v8i8 (bitconvert (f64 FR64:$src))),
+          (MMX_MOVFR642Qrr FR64:$src)>;
 
 let AddedComplexity = 20 in {
   def : Pat<(v2i32 (X86vzmovl (bc_v2i32 (load_mmx addr:$src)))),
diff --git a/lib/Target/X86/X86InstrSSE.td b/lib/Target/X86/X86InstrSSE.td
index 11f7e27c4fc3..212958025bde 100644
--- a/lib/Target/X86/X86InstrSSE.td
+++ b/lib/Target/X86/X86InstrSSE.td
@@ -2878,6 +2878,7 @@ let Constraints = "$src1 = $dst" in {
   }
 }
 
+let ImmT = NoImm in {  // None of these have i8 immediate fields.
 defm PHADDW      : SS3I_binop_rm_int_16<0x01, "phaddw",
                                         int_x86_ssse3_phadd_w,
                                         int_x86_ssse3_phadd_w_128>;
@@ -2902,6 +2903,7 @@ defm PMADDUBSW   : SS3I_binop_rm_int_8 <0x04, "pmaddubsw",
 defm PMULHRSW    : SS3I_binop_rm_int_16<0x0B, "pmulhrsw",
                                         int_x86_ssse3_pmul_hr_sw,
                                         int_x86_ssse3_pmul_hr_sw_128, 1>;
+                                        
 defm PSHUFB      : SS3I_binop_rm_int_8 <0x00, "pshufb",
                                         int_x86_ssse3_pshuf_b,
                                         int_x86_ssse3_pshuf_b_128>;
@@ -2914,7 +2916,9 @@ defm PSIGNW      : SS3I_binop_rm_int_16<0x09, "psignw",
 defm PSIGND      : SS3I_binop_rm_int_32<0x0A, "psignd",
                                         int_x86_ssse3_psign_d,
                                         int_x86_ssse3_psign_d_128>;
+}
 
+// palignr patterns.
 let Constraints = "$src1 = $dst" in {
   def PALIGNR64rr  : SS3AI<0x0F, MRMSrcReg, (outs VR64:$dst),
                            (ins VR64:$src1, VR64:$src2, i8imm:$src3),
@@ -2935,26 +2939,29 @@ let Constraints = "$src1 = $dst" in {
                            []>, OpSize;
 }
 
-// palignr patterns.
-def : Pat<(int_x86_ssse3_palign_r VR64:$src1, VR64:$src2, (i8 imm:$src3)),
-          (PALIGNR64rr VR64:$src1, VR64:$src2, (BYTE_imm imm:$src3))>,
+let AddedComplexity = 5 in {
+
+def : Pat<(v1i64 (palign:$src3 VR64:$src1, VR64:$src2)),
+          (PALIGNR64rr VR64:$src2, VR64:$src1,
+                       (SHUFFLE_get_palign_imm VR64:$src3))>,
           Requires<[HasSSSE3]>;
-def : Pat<(int_x86_ssse3_palign_r VR64:$src1,
-                                      (memop64 addr:$src2),
-                                      (i8 imm:$src3)),
-          (PALIGNR64rm VR64:$src1, addr:$src2, (BYTE_imm imm:$src3))>,
+def : Pat<(v2i32 (palign:$src3 VR64:$src1, VR64:$src2)),
+          (PALIGNR64rr VR64:$src2, VR64:$src1,
+                       (SHUFFLE_get_palign_imm VR64:$src3))>,
           Requires<[HasSSSE3]>;
-
-def : Pat<(int_x86_ssse3_palign_r_128 VR128:$src1, VR128:$src2, (i8 imm:$src3)),
-          (PALIGNR128rr VR128:$src1, VR128:$src2, (BYTE_imm imm:$src3))>,
+def : Pat<(v2f32 (palign:$src3 VR64:$src1, VR64:$src2)),
+          (PALIGNR64rr VR64:$src2, VR64:$src1,
+                       (SHUFFLE_get_palign_imm VR64:$src3))>,
           Requires<[HasSSSE3]>;
-def : Pat<(int_x86_ssse3_palign_r_128 VR128:$src1,
-                                      (memopv2i64 addr:$src2),
-                                      (i8 imm:$src3)),
-          (PALIGNR128rm VR128:$src1, addr:$src2, (BYTE_imm imm:$src3))>,
+def : Pat<(v4i16 (palign:$src3 VR64:$src1, VR64:$src2)),
+          (PALIGNR64rr VR64:$src2, VR64:$src1,
+                       (SHUFFLE_get_palign_imm VR64:$src3))>,
+          Requires<[HasSSSE3]>;
+def : Pat<(v8i8 (palign:$src3 VR64:$src1, VR64:$src2)),
+          (PALIGNR64rr VR64:$src2, VR64:$src1,
+                       (SHUFFLE_get_palign_imm VR64:$src3))>,
           Requires<[HasSSSE3]>;
 
-let AddedComplexity = 5 in {
 def : Pat<(v4i32 (palign:$src3 VR128:$src1, VR128:$src2)),
           (PALIGNR128rr VR128:$src2, VR128:$src1,
                         (SHUFFLE_get_palign_imm VR128:$src3))>,
@@ -3510,7 +3517,7 @@ defm DPPS         : SS41I_binop_rmi_int<0x40, "dpps",
 defm DPPD         : SS41I_binop_rmi_int<0x41, "dppd",
                                         int_x86_sse41_dppd, 1>;
 defm MPSADBW      : SS41I_binop_rmi_int<0x42, "mpsadbw",
-                                        int_x86_sse41_mpsadbw, 1>;
+                                        int_x86_sse41_mpsadbw, 0>;
 
 
 /// SS41I_ternary_int - SSE 4.1 ternary operator
diff --git a/lib/Target/X86/X86MCAsmInfo.cpp b/lib/Target/X86/X86MCAsmInfo.cpp
index d257ee38c5c7..2b8720bac343 100644
--- a/lib/Target/X86/X86MCAsmInfo.cpp
+++ b/lib/Target/X86/X86MCAsmInfo.cpp
@@ -14,6 +14,7 @@
 #include "X86MCAsmInfo.h"
 #include "X86TargetMachine.h"
 #include "llvm/ADT/Triple.h"
+#include "llvm/MC/MCContext.h"
 #include "llvm/MC/MCSectionELF.h"
 #include "llvm/Support/CommandLine.h"
 using namespace llvm;
@@ -95,9 +96,10 @@ X86ELFMCAsmInfo::X86ELFMCAsmInfo(const Triple &T) {
     Data64bitsDirective = 0;
 }
 
-MCSection *X86ELFMCAsmInfo::getNonexecutableStackSection(MCContext &Ctx) const {
-  return MCSectionELF::Create(".note.GNU-stack", MCSectionELF::SHT_PROGBITS,
-                              0, SectionKind::getMetadata(), false, Ctx);
+const MCSection *X86ELFMCAsmInfo::
+getNonexecutableStackSection(MCContext &Ctx) const {
+  return Ctx.getELFSection(".note.GNU-stack", MCSectionELF::SHT_PROGBITS,
+                           0, SectionKind::getMetadata(), false);
 }
 
 X86MCAsmInfoCOFF::X86MCAsmInfoCOFF(const Triple &Triple) {
diff --git a/lib/Target/X86/X86MCAsmInfo.h b/lib/Target/X86/X86MCAsmInfo.h
index 69716bfc07f6..581522567d09 100644
--- a/lib/Target/X86/X86MCAsmInfo.h
+++ b/lib/Target/X86/X86MCAsmInfo.h
@@ -27,7 +27,7 @@ namespace llvm {
 
   struct X86ELFMCAsmInfo : public MCAsmInfo {
     explicit X86ELFMCAsmInfo(const Triple &Triple);
-    virtual MCSection *getNonexecutableStackSection(MCContext &Ctx) const;
+    virtual const MCSection *getNonexecutableStackSection(MCContext &Ctx) const;
   };
 
   struct X86MCAsmInfoCOFF : public MCAsmInfoCOFF {
diff --git a/lib/Target/X86/X86MachineFunctionInfo.h b/lib/Target/X86/X86MachineFunctionInfo.h
index 4b2529bccf01..06043ecd3f30 100644
--- a/lib/Target/X86/X86MachineFunctionInfo.h
+++ b/lib/Target/X86/X86MachineFunctionInfo.h
@@ -31,7 +31,8 @@ class X86MachineFunctionInfo : public MachineFunctionInfo {
   /// stack frame in bytes.
   unsigned CalleeSavedFrameSize;
 
-  /// BytesToPopOnReturn - Number of bytes function pops on return.
+  /// BytesToPopOnReturn - Number of bytes function pops on return (in addition
+  /// to the space used by the return address).
   /// Used on windows platform for stdcall & fastcall name decoration
   unsigned BytesToPopOnReturn;
 
@@ -52,6 +53,19 @@ class X86MachineFunctionInfo : public MachineFunctionInfo {
   /// relocation models.
   unsigned GlobalBaseReg;
 
+  /// ReserveFP - whether the function should reserve the frame pointer
+  /// when allocating, even if there may not actually be a frame pointer used.
+  bool ReserveFP;
+
+  /// VarArgsFrameIndex - FrameIndex for start of varargs area.
+  int VarArgsFrameIndex;
+  /// RegSaveFrameIndex - X86-64 vararg func register save area.
+  int RegSaveFrameIndex;
+  /// VarArgsGPOffset - X86-64 vararg func int reg offset.
+  unsigned VarArgsGPOffset;
+  /// VarArgsFPOffset - X86-64 vararg func fp reg offset.
+  unsigned VarArgsFPOffset;
+
 public:
   X86MachineFunctionInfo() : ForceFramePointer(false),
                              CalleeSavedFrameSize(0),
@@ -59,7 +73,11 @@ public:
                              ReturnAddrIndex(0),
                              TailCallReturnAddrDelta(0),
                              SRetReturnReg(0),
-                             GlobalBaseReg(0) {}
+                             GlobalBaseReg(0),
+                             VarArgsFrameIndex(0),
+                             RegSaveFrameIndex(0),
+                             VarArgsGPOffset(0),
+                             VarArgsFPOffset(0) {}
   
   explicit X86MachineFunctionInfo(MachineFunction &MF)
     : ForceFramePointer(false),
@@ -68,7 +86,12 @@ public:
       ReturnAddrIndex(0),
       TailCallReturnAddrDelta(0),
       SRetReturnReg(0),
-      GlobalBaseReg(0) {}
+      GlobalBaseReg(0),
+      ReserveFP(false),
+      VarArgsFrameIndex(0),
+      RegSaveFrameIndex(0),
+      VarArgsGPOffset(0),
+      VarArgsFPOffset(0) {}
   
   bool getForceFramePointer() const { return ForceFramePointer;} 
   void setForceFramePointer(bool forceFP) { ForceFramePointer = forceFP; }
@@ -90,6 +113,21 @@ public:
 
   unsigned getGlobalBaseReg() const { return GlobalBaseReg; }
   void setGlobalBaseReg(unsigned Reg) { GlobalBaseReg = Reg; }
+
+  bool getReserveFP() const { return ReserveFP; }
+  void setReserveFP(bool reserveFP) { ReserveFP = reserveFP; }
+
+  int getVarArgsFrameIndex() const { return VarArgsFrameIndex; }
+  void setVarArgsFrameIndex(int Idx) { VarArgsFrameIndex = Idx; }
+
+  int getRegSaveFrameIndex() const { return RegSaveFrameIndex; }
+  void setRegSaveFrameIndex(int Idx) { RegSaveFrameIndex = Idx; }
+
+  unsigned getVarArgsGPOffset() const { return VarArgsGPOffset; }
+  void setVarArgsGPOffset(unsigned Offset) { VarArgsGPOffset = Offset; }
+
+  unsigned getVarArgsFPOffset() const { return VarArgsFPOffset; }
+  void setVarArgsFPOffset(unsigned Offset) { VarArgsFPOffset = Offset; }
 };
 
 } // End llvm namespace
diff --git a/lib/Target/X86/X86RegisterInfo.cpp b/lib/Target/X86/X86RegisterInfo.cpp
index 32f28a520074..a3e04b098600 100644
--- a/lib/Target/X86/X86RegisterInfo.cpp
+++ b/lib/Target/X86/X86RegisterInfo.cpp
@@ -439,7 +439,7 @@ bool X86RegisterInfo::hasFP(const MachineFunction &MF) const {
   const MachineFrameInfo *MFI = MF.getFrameInfo();
   const MachineModuleInfo &MMI = MF.getMMI();
 
-  return (NoFramePointerElim ||
+  return (DisableFramePointerElim(MF) ||
           needsStackRealignment(MF) ||
           MFI->hasVarSizedObjects() ||
           MFI->isFrameAddressTaken() ||
@@ -464,7 +464,7 @@ bool X86RegisterInfo::needsStackRealignment(const MachineFunction &MF) const {
   //        variable-sized allocas.
   // FIXME: Temporary disable the error - it seems to be too conservative.
   if (0 && requiresRealignment && MFI->hasVarSizedObjects())
-    llvm_report_error(
+    report_fatal_error(
       "Stack realignment in presense of dynamic allocas is not supported");
 
   return (requiresRealignment && !MFI->hasVarSizedObjects());
@@ -608,8 +608,12 @@ X86RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
   int FrameIndex = MI.getOperand(i).getIndex();
   unsigned BasePtr;
 
+  unsigned Opc = MI.getOpcode();
+  bool AfterFPPop = Opc == X86::TAILJMPm64 || Opc == X86::TAILJMPm;
   if (needsStackRealignment(MF))
     BasePtr = (FrameIndex < 0 ? FramePtr : StackPtr);
+  else if (AfterFPPop)
+    BasePtr = StackPtr;
   else
     BasePtr = (hasFP(MF) ? FramePtr : StackPtr);
 
@@ -618,16 +622,22 @@ X86RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
   MI.getOperand(i).ChangeToRegister(BasePtr, false);
 
   // Now add the frame object offset to the offset from EBP.
+  int FIOffset;
+  if (AfterFPPop) {
+    // Tail call jmp happens after FP is popped.
+    const TargetFrameInfo &TFI = *MF.getTarget().getFrameInfo();
+    const MachineFrameInfo *MFI = MF.getFrameInfo();
+    FIOffset = MFI->getObjectOffset(FrameIndex) - TFI.getOffsetOfLocalArea();
+  } else
+    FIOffset = getFrameIndexOffset(MF, FrameIndex);
+
   if (MI.getOperand(i+3).isImm()) {
     // Offset is a 32-bit integer.
-    int Offset = getFrameIndexOffset(MF, FrameIndex) +
-      (int)(MI.getOperand(i + 3).getImm());
-
+    int Offset = FIOffset + (int)(MI.getOperand(i + 3).getImm());
     MI.getOperand(i + 3).ChangeToImmediate(Offset);
   } else {
     // Offset is symbolic. This is extremely rare.
-    uint64_t Offset = getFrameIndexOffset(MF, FrameIndex) +
-                      (uint64_t)MI.getOperand(i+3).getOffset();
+    uint64_t Offset = FIOffset + (uint64_t)MI.getOperand(i+3).getOffset();
     MI.getOperand(i+3).setOffset(Offset);
   }
   return 0;
@@ -1487,3 +1497,46 @@ unsigned getX86SubSuperRegister(unsigned Reg, EVT VT, bool High) {
 }
 
 #include "X86GenRegisterInfo.inc"
+
+namespace {
+  struct MSAH : public MachineFunctionPass {
+    static char ID;
+    MSAH() : MachineFunctionPass(&ID) {}
+
+    virtual bool runOnMachineFunction(MachineFunction &MF) {
+      const X86TargetMachine *TM =
+        static_cast<const X86TargetMachine *>(&MF.getTarget());
+      const X86RegisterInfo *X86RI = TM->getRegisterInfo();
+      MachineRegisterInfo &RI = MF.getRegInfo();
+      X86MachineFunctionInfo *FuncInfo = MF.getInfo<X86MachineFunctionInfo>();
+      unsigned StackAlignment = X86RI->getStackAlignment();
+
+      // Be over-conservative: scan over all vreg defs and find whether vector
+      // registers are used. If yes, there is a possibility that vector register
+      // will be spilled and thus require dynamic stack realignment.
+      for (unsigned RegNum = TargetRegisterInfo::FirstVirtualRegister;
+           RegNum < RI.getLastVirtReg(); ++RegNum)
+        if (RI.getRegClass(RegNum)->getAlignment() > StackAlignment) {
+          FuncInfo->setReserveFP(true);
+          return true;
+        }
+
+      // Nothing to do
+      return false;
+    }
+
+    virtual const char *getPassName() const {
+      return "X86 Maximal Stack Alignment Check";
+    }
+
+    virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+      AU.setPreservesCFG();
+      MachineFunctionPass::getAnalysisUsage(AU);
+    }
+  };
+
+  char MSAH::ID = 0;
+}
+
+FunctionPass*
+llvm::createX86MaxStackAlignmentHeuristicPass() { return new MSAH(); }
diff --git a/lib/Target/X86/X86RegisterInfo.td b/lib/Target/X86/X86RegisterInfo.td
index 76b8f7a953c9..49a6ca092814 100644
--- a/lib/Target/X86/X86RegisterInfo.td
+++ b/lib/Target/X86/X86RegisterInfo.td
@@ -352,11 +352,12 @@ def GR8 : RegisterClass<"X86", [i8],  8,
       const TargetMachine &TM = MF.getTarget();
       const TargetRegisterInfo *RI = TM.getRegisterInfo();
       const X86Subtarget &Subtarget = TM.getSubtarget<X86Subtarget>();
+      const X86MachineFunctionInfo *MFI = MF.getInfo<X86MachineFunctionInfo>();
       // Does the function dedicate RBP / EBP to being a frame ptr?
       if (!Subtarget.is64Bit())
         // In 32-mode, none of the 8-bit registers aliases EBP or ESP.
         return begin() + 8;
-      else if (RI->hasFP(MF))
+      else if (RI->hasFP(MF) || MFI->getReserveFP())
         // If so, don't allocate SPL or BPL.
         return array_endof(X86_GR8_AO_64) - 1;
       else
@@ -396,9 +397,10 @@ def GR16 : RegisterClass<"X86", [i16], 16,
       const TargetMachine &TM = MF.getTarget();
       const TargetRegisterInfo *RI = TM.getRegisterInfo();
       const X86Subtarget &Subtarget = TM.getSubtarget<X86Subtarget>();
+      const X86MachineFunctionInfo *MFI = MF.getInfo<X86MachineFunctionInfo>();
       if (Subtarget.is64Bit()) {
         // Does the function dedicate RBP to being a frame ptr?
-        if (RI->hasFP(MF))
+        if (RI->hasFP(MF) || MFI->getReserveFP())
           // If so, don't allocate SP or BP.
           return array_endof(X86_GR16_AO_64) - 1;
         else
@@ -406,7 +408,7 @@ def GR16 : RegisterClass<"X86", [i16], 16,
           return array_endof(X86_GR16_AO_64);
       } else {
         // Does the function dedicate EBP to being a frame ptr?
-        if (RI->hasFP(MF))
+        if (RI->hasFP(MF) || MFI->getReserveFP())
           // If so, don't allocate SP or BP.
           return begin() + 6;
         else
@@ -447,9 +449,10 @@ def GR32 : RegisterClass<"X86", [i32], 32,
       const TargetMachine &TM = MF.getTarget();
       const TargetRegisterInfo *RI = TM.getRegisterInfo();
       const X86Subtarget &Subtarget = TM.getSubtarget<X86Subtarget>();
+      const X86MachineFunctionInfo *MFI = MF.getInfo<X86MachineFunctionInfo>();
       if (Subtarget.is64Bit()) {
         // Does the function dedicate RBP to being a frame ptr?
-        if (RI->hasFP(MF))
+        if (RI->hasFP(MF) || MFI->getReserveFP())
           // If so, don't allocate ESP or EBP.
           return array_endof(X86_GR32_AO_64) - 1;
         else
@@ -457,7 +460,7 @@ def GR32 : RegisterClass<"X86", [i32], 32,
           return array_endof(X86_GR32_AO_64);
       } else {
         // Does the function dedicate EBP to being a frame ptr?
-        if (RI->hasFP(MF))
+        if (RI->hasFP(MF) || MFI->getReserveFP())
           // If so, don't allocate ESP or EBP.
           return begin() + 6;
         else
@@ -484,9 +487,11 @@ def GR64 : RegisterClass<"X86", [i64], 64,
       const TargetMachine &TM = MF.getTarget();
       const TargetRegisterInfo *RI = TM.getRegisterInfo();
       const X86Subtarget &Subtarget = TM.getSubtarget<X86Subtarget>();
+      const X86MachineFunctionInfo *MFI = MF.getInfo<X86MachineFunctionInfo>();
       if (!Subtarget.is64Bit())
         return begin();  // None of these are allocatable in 32-bit.
-      if (RI->hasFP(MF)) // Does the function dedicate RBP to being a frame ptr?
+      // Does the function dedicate RBP to being a frame ptr?
+      if (RI->hasFP(MF) || MFI->getReserveFP())
         return end()-3;  // If so, don't allocate RIP, RSP or RBP
       else
         return end()-2;  // If not, just don't allocate RIP or RSP
@@ -589,8 +594,9 @@ def GR16_NOREX : RegisterClass<"X86", [i16], 16,
     GR16_NOREXClass::allocation_order_end(const MachineFunction &MF) const {
       const TargetMachine &TM = MF.getTarget();
       const TargetRegisterInfo *RI = TM.getRegisterInfo();
+      const X86MachineFunctionInfo *MFI = MF.getInfo<X86MachineFunctionInfo>();
       // Does the function dedicate RBP / EBP to being a frame ptr?
-      if (RI->hasFP(MF))
+      if (RI->hasFP(MF) || MFI->getReserveFP())
         // If so, don't allocate SP or BP.
         return end() - 2;
       else
@@ -611,8 +617,9 @@ def GR32_NOREX : RegisterClass<"X86", [i32], 32,
     GR32_NOREXClass::allocation_order_end(const MachineFunction &MF) const {
       const TargetMachine &TM = MF.getTarget();
       const TargetRegisterInfo *RI = TM.getRegisterInfo();
+      const X86MachineFunctionInfo *MFI = MF.getInfo<X86MachineFunctionInfo>();
       // Does the function dedicate RBP / EBP to being a frame ptr?
-      if (RI->hasFP(MF))
+      if (RI->hasFP(MF) || MFI->getReserveFP())
         // If so, don't allocate ESP or EBP.
         return end() - 2;
       else
@@ -633,8 +640,9 @@ def GR64_NOREX : RegisterClass<"X86", [i64], 64,
     GR64_NOREXClass::allocation_order_end(const MachineFunction &MF) const {
       const TargetMachine &TM = MF.getTarget();
       const TargetRegisterInfo *RI = TM.getRegisterInfo();
+      const X86MachineFunctionInfo *MFI = MF.getInfo<X86MachineFunctionInfo>();
       // Does the function dedicate RBP to being a frame ptr?
-      if (RI->hasFP(MF))
+      if (RI->hasFP(MF) || MFI->getReserveFP())
         // If so, don't allocate RIP, RSP or RBP.
         return end() - 3;
       else
@@ -675,9 +683,10 @@ def GR32_NOSP : RegisterClass<"X86", [i32], 32,
       const TargetMachine &TM = MF.getTarget();
       const TargetRegisterInfo *RI = TM.getRegisterInfo();
       const X86Subtarget &Subtarget = TM.getSubtarget<X86Subtarget>();
+      const X86MachineFunctionInfo *MFI = MF.getInfo<X86MachineFunctionInfo>();
       if (Subtarget.is64Bit()) {
         // Does the function dedicate RBP to being a frame ptr?
-        if (RI->hasFP(MF))
+        if (RI->hasFP(MF) || MFI->getReserveFP())
           // If so, don't allocate EBP.
           return array_endof(X86_GR32_NOSP_AO_64) - 1;
         else
@@ -685,7 +694,7 @@ def GR32_NOSP : RegisterClass<"X86", [i32], 32,
           return array_endof(X86_GR32_NOSP_AO_64);
       } else {
         // Does the function dedicate EBP to being a frame ptr?
-        if (RI->hasFP(MF))
+        if (RI->hasFP(MF) || MFI->getReserveFP())
           // If so, don't allocate EBP.
           return begin() + 6;
         else
@@ -710,9 +719,11 @@ def GR64_NOSP : RegisterClass<"X86", [i64], 64,
       const TargetMachine &TM = MF.getTarget();
       const TargetRegisterInfo *RI = TM.getRegisterInfo();
       const X86Subtarget &Subtarget = TM.getSubtarget<X86Subtarget>();
+      const X86MachineFunctionInfo *MFI = MF.getInfo<X86MachineFunctionInfo>();
       if (!Subtarget.is64Bit())
         return begin();  // None of these are allocatable in 32-bit.
-      if (RI->hasFP(MF)) // Does the function dedicate RBP to being a frame ptr?
+      // Does the function dedicate RBP to being a frame ptr?
+      if (RI->hasFP(MF) || MFI->getReserveFP())
         return end()-1;  // If so, don't allocate RBP
       else
         return end();  // If not, any reg in this class is ok.
@@ -733,8 +744,9 @@ def GR64_NOREX_NOSP : RegisterClass<"X86", [i64], 64,
   {
       const TargetMachine &TM = MF.getTarget();
       const TargetRegisterInfo *RI = TM.getRegisterInfo();
+      const X86MachineFunctionInfo *MFI = MF.getInfo<X86MachineFunctionInfo>();
       // Does the function dedicate RBP to being a frame ptr?
-      if (RI->hasFP(MF))
+      if (RI->hasFP(MF) || MFI->getReserveFP())
         // If so, don't allocate RBP.
         return end() - 1;
       else
diff --git a/lib/Target/X86/X86SelectionDAGInfo.cpp b/lib/Target/X86/X86SelectionDAGInfo.cpp
new file mode 100644
index 000000000000..cd87b82d31a6
--- /dev/null
+++ b/lib/Target/X86/X86SelectionDAGInfo.cpp
@@ -0,0 +1,22 @@
+//===-- X86SelectionDAGInfo.cpp - X86 SelectionDAG Info -------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the X86SelectionDAGInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "x86-selectiondag-info"
+#include "X86SelectionDAGInfo.h"
+using namespace llvm;
+
+X86SelectionDAGInfo::X86SelectionDAGInfo() {
+}
+
+X86SelectionDAGInfo::~X86SelectionDAGInfo() {
+}
diff --git a/lib/Target/X86/X86SelectionDAGInfo.h b/lib/Target/X86/X86SelectionDAGInfo.h
new file mode 100644
index 000000000000..983475461f5e
--- /dev/null
+++ b/lib/Target/X86/X86SelectionDAGInfo.h
@@ -0,0 +1,29 @@
+//===-- X86SelectionDAGInfo.h - X86 SelectionDAG Info -----------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the X86 subclass for TargetSelectionDAGInfo.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef X86SELECTIONDAGINFO_H
+#define X86SELECTIONDAGINFO_H
+
+#include "llvm/Target/TargetSelectionDAGInfo.h"
+
+namespace llvm {
+
+class X86SelectionDAGInfo : public TargetSelectionDAGInfo {
+public:
+  X86SelectionDAGInfo();
+  ~X86SelectionDAGInfo();
+};
+
+}
+
+#endif
diff --git a/lib/Target/X86/X86Subtarget.h b/lib/Target/X86/X86Subtarget.h
index 8a873f04df4e..646af91517fe 100644
--- a/lib/Target/X86/X86Subtarget.h
+++ b/lib/Target/X86/X86Subtarget.h
@@ -85,8 +85,7 @@ protected:
   bool IsUAMemFast;
 
   /// HasVectorUAMem - True if SIMD operations can have unaligned memory
-  ///                  operands. This may require setting a feature bit in the
-  ///                  processor.
+  /// operands. This may require setting a feature bit in the processor.
   bool HasVectorUAMem;
 
   /// DarwinVers - Nonzero if this is a darwin platform: the numeric
diff --git a/lib/Target/X86/X86TargetMachine.cpp b/lib/Target/X86/X86TargetMachine.cpp
index c608e56c8fbf..f39904ef7eb7 100644
--- a/lib/Target/X86/X86TargetMachine.cpp
+++ b/lib/Target/X86/X86TargetMachine.cpp
@@ -17,12 +17,17 @@
 #include "llvm/PassManager.h"
 #include "llvm/CodeGen/MachineFunction.h"
 #include "llvm/CodeGen/Passes.h"
-#include "llvm/Support/CommandLine.h"
 #include "llvm/Support/FormattedStream.h"
 #include "llvm/Target/TargetOptions.h"
 #include "llvm/Target/TargetRegistry.h"
+#include "llvm/Support/CommandLine.h"
+
 using namespace llvm;
 
+static cl::opt<bool> DisableSSEDomain("disable-sse-domain",
+    cl::init(false), cl::Hidden,
+    cl::desc("Disable SSE Domain Fixing"));
+
 static MCAsmInfo *createMCAsmInfo(const Target &T, StringRef TT) {
   Triple TheTriple(TT);
   switch (TheTriple.getOS()) {
@@ -161,6 +166,7 @@ bool X86TargetMachine::addInstSelector(PassManagerBase &PM,
 
 bool X86TargetMachine::addPreRegAlloc(PassManagerBase &PM,
                                       CodeGenOpt::Level OptLevel) {
+  PM.add(createX86MaxStackAlignmentHeuristicPass());
   return false;  // -print-machineinstr shouldn't print after this.
 }
 
@@ -172,7 +178,8 @@ bool X86TargetMachine::addPostRegAlloc(PassManagerBase &PM,
 
 bool X86TargetMachine::addPreEmitPass(PassManagerBase &PM,
                                       CodeGenOpt::Level OptLevel) {
-  if (OptLevel != CodeGenOpt::None && Subtarget.hasSSE2()) {
+  if (OptLevel != CodeGenOpt::None && Subtarget.hasSSE2() &&
+      !DisableSSEDomain) {
     PM.add(createSSEDomainFixPass());
     return true;
   }
diff --git a/lib/Target/X86/X86TargetMachine.h b/lib/Target/X86/X86TargetMachine.h
index ae7b5b29af14..dc4234c4a90a 100644
--- a/lib/Target/X86/X86TargetMachine.h
+++ b/lib/Target/X86/X86TargetMachine.h
@@ -51,8 +51,8 @@ public:
   virtual const TargetFrameInfo  *getFrameInfo() const { return &FrameInfo; }
   virtual       X86JITInfo       *getJITInfo()         { return &JITInfo; }
   virtual const X86Subtarget     *getSubtargetImpl() const{ return &Subtarget; }
-  virtual       X86TargetLowering *getTargetLowering() const { 
-    return const_cast<X86TargetLowering*>(&TLInfo); 
+  virtual const X86TargetLowering *getTargetLowering() const { 
+    return &TLInfo;
   }
   virtual const X86RegisterInfo  *getRegisterInfo() const {
     return &InstrInfo.getRegisterInfo();
diff --git a/lib/Target/XCore/AsmPrinter/XCoreAsmPrinter.cpp b/lib/Target/XCore/AsmPrinter/XCoreAsmPrinter.cpp
index 5801b40b7e90..c100c590135e 100644
--- a/lib/Target/XCore/AsmPrinter/XCoreAsmPrinter.cpp
+++ b/lib/Target/XCore/AsmPrinter/XCoreAsmPrinter.cpp
@@ -123,7 +123,7 @@ void XCoreAsmPrinter::EmitGlobalVariable(const GlobalVariable *GV) {
 
   switch (GV->getLinkage()) {
   case GlobalValue::AppendingLinkage:
-    llvm_report_error("AppendingLinkage is not supported by this target!");
+    report_fatal_error("AppendingLinkage is not supported by this target!");
   case GlobalValue::LinkOnceAnyLinkage:
   case GlobalValue::LinkOnceODRLinkage:
   case GlobalValue::WeakAnyLinkage:
@@ -148,7 +148,7 @@ void XCoreAsmPrinter::EmitGlobalVariable(const GlobalVariable *GV) {
     llvm_unreachable("Unknown linkage type!");
   }
 
-  EmitAlignment(Align, GV, 2);
+  EmitAlignment(Align > 2 ? Align : 2, GV);
   
   unsigned Size = TD->getTypeAllocSize(C->getType());
   if (GV->isThreadLocal()) {
diff --git a/lib/Target/XCore/CMakeLists.txt b/lib/Target/XCore/CMakeLists.txt
index 0965323b998a..1b8e7edfc7ca 100644
--- a/lib/Target/XCore/CMakeLists.txt
+++ b/lib/Target/XCore/CMakeLists.txt
@@ -11,7 +11,6 @@ tablegen(XCoreGenCallingConv.inc -gen-callingconv)
 tablegen(XCoreGenSubtarget.inc -gen-subtarget)
 
 add_llvm_target(XCore
-  MCSectionXCore.cpp
   XCoreFrameInfo.cpp
   XCoreInstrInfo.cpp
   XCoreISelDAGToDAG.cpp
@@ -21,4 +20,5 @@ add_llvm_target(XCore
   XCoreSubtarget.cpp
   XCoreTargetMachine.cpp
   XCoreTargetObjectFile.cpp
+  XCoreSelectionDAGInfo.cpp
   )
diff --git a/lib/Target/XCore/MCSectionXCore.cpp b/lib/Target/XCore/MCSectionXCore.cpp
deleted file mode 100644
index 5acceafe9ea3..000000000000
--- a/lib/Target/XCore/MCSectionXCore.cpp
+++ /dev/null
@@ -1,35 +0,0 @@
-//===- MCSectionXCore.cpp - XCore-specific section representation ---------===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file implements the MCSectionXCore class.
-//
-//===----------------------------------------------------------------------===//
-
-#include "MCSectionXCore.h"
-#include "llvm/MC/MCContext.h"
-#include "llvm/Support/raw_ostream.h"
-using namespace llvm;
-
-MCSectionXCore *
-MCSectionXCore::Create(const StringRef &Section, unsigned Type,
-                       unsigned Flags, SectionKind K,
-                       bool isExplicit, MCContext &Ctx) {
-  return new (Ctx) MCSectionXCore(Section, Type, Flags, K, isExplicit);
-}
-
-
-/// PrintTargetSpecificSectionFlags - This handles the XCore-specific cp/dp
-/// section flags.
-void MCSectionXCore::PrintTargetSpecificSectionFlags(const MCAsmInfo &MAI,
-                                                     raw_ostream &OS) const {
-  if (getFlags() & MCSectionXCore::SHF_CP_SECTION)
-    OS << 'c';
-  if (getFlags() & MCSectionXCore::SHF_DP_SECTION)
-    OS << 'd';
-}
diff --git a/lib/Target/XCore/MCSectionXCore.h b/lib/Target/XCore/MCSectionXCore.h
deleted file mode 100644
index 02f8f95572c8..000000000000
--- a/lib/Target/XCore/MCSectionXCore.h
+++ /dev/null
@@ -1,54 +0,0 @@
-//===- MCSectionXCore.h - XCore-specific section representation -*- C++ -*-===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file declares the MCSectionXCore class.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef LLVM_MCSECTION_XCORE_H
-#define LLVM_MCSECTION_XCORE_H
-
-#include "llvm/MC/MCSectionELF.h"
-
-namespace llvm {
-  
-class MCSectionXCore : public MCSectionELF {
-  MCSectionXCore(const StringRef &Section, unsigned Type, unsigned Flags,
-                 SectionKind K, bool isExplicit)
-    : MCSectionELF(Section, Type, Flags, K, isExplicit) {}
-  
-public:
-  
-  enum {
-    /// SHF_CP_SECTION - All sections with the "c" flag are grouped together
-    /// by the linker to form the constant pool and the cp register is set to
-    /// the start of the constant pool by the boot code.
-    SHF_CP_SECTION = FIRST_TARGET_DEP_FLAG,
-    
-    /// SHF_DP_SECTION - All sections with the "d" flag are grouped together
-    /// by the linker to form the data section and the dp register is set to
-    /// the start of the section by the boot code.
-    SHF_DP_SECTION = FIRST_TARGET_DEP_FLAG << 1
-  };
-  
-  static MCSectionXCore *Create(const StringRef &Section, unsigned Type,
-                                unsigned Flags, SectionKind K,
-                                bool isExplicit, MCContext &Ctx);
-  
-  
-  /// PrintTargetSpecificSectionFlags - This handles the XCore-specific cp/dp
-  /// section flags.
-  virtual void PrintTargetSpecificSectionFlags(const MCAsmInfo &MAI,
-                                               raw_ostream &OS) const;
-
-};
-  
-} // end namespace llvm
-
-#endif
diff --git a/lib/Target/XCore/XCoreISelDAGToDAG.cpp b/lib/Target/XCore/XCoreISelDAGToDAG.cpp
index 1615547b4152..5564ddf133ea 100644
--- a/lib/Target/XCore/XCoreISelDAGToDAG.cpp
+++ b/lib/Target/XCore/XCoreISelDAGToDAG.cpp
@@ -12,7 +12,6 @@
 //===----------------------------------------------------------------------===//
 
 #include "XCore.h"
-#include "XCoreISelLowering.h"
 #include "XCoreTargetMachine.h"
 #include "llvm/DerivedTypes.h"
 #include "llvm/Function.h"
@@ -40,7 +39,7 @@ using namespace llvm;
 ///
 namespace {
   class XCoreDAGToDAGISel : public SelectionDAGISel {
-    XCoreTargetLowering &Lowering;
+    const XCoreTargetLowering &Lowering;
     const XCoreSubtarget &Subtarget;
 
   public:
diff --git a/lib/Target/XCore/XCoreISelLowering.cpp b/lib/Target/XCore/XCoreISelLowering.cpp
index 27e523324664..3990b8b3c012 100644
--- a/lib/Target/XCore/XCoreISelLowering.cpp
+++ b/lib/Target/XCore/XCoreISelLowering.cpp
@@ -158,7 +158,7 @@ XCoreTargetLowering::XCoreTargetLowering(XCoreTargetMachine &XTM)
 }
 
 SDValue XCoreTargetLowering::
-LowerOperation(SDValue Op, SelectionDAG &DAG) {
+LowerOperation(SDValue Op, SelectionDAG &DAG) const {
   switch (Op.getOpcode()) 
   {
   case ISD::GlobalAddress:    return LowerGlobalAddress(Op, DAG);
@@ -187,7 +187,7 @@ LowerOperation(SDValue Op, SelectionDAG &DAG) {
 /// type with new values built out of custom code.
 void XCoreTargetLowering::ReplaceNodeResults(SDNode *N,
                                              SmallVectorImpl<SDValue>&Results,
-                                             SelectionDAG &DAG) {
+                                             SelectionDAG &DAG) const {
   switch (N->getOpcode()) {
   default:
     llvm_unreachable("Don't know how to custom expand this!");
@@ -210,7 +210,7 @@ getFunctionAlignment(const Function *) const {
 //===----------------------------------------------------------------------===//
 
 SDValue XCoreTargetLowering::
-LowerSELECT_CC(SDValue Op, SelectionDAG &DAG)
+LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const
 {
   DebugLoc dl = Op.getDebugLoc();
   SDValue Cond = DAG.getNode(ISD::SETCC, dl, MVT::i32, Op.getOperand(2),
@@ -220,7 +220,8 @@ LowerSELECT_CC(SDValue Op, SelectionDAG &DAG)
 }
 
 SDValue XCoreTargetLowering::
-getGlobalAddressWrapper(SDValue GA, GlobalValue *GV, SelectionDAG &DAG)
+getGlobalAddressWrapper(SDValue GA, const GlobalValue *GV,
+                        SelectionDAG &DAG) const
 {
   // FIXME there is no actual debug info here
   DebugLoc dl = GA.getDebugLoc();
@@ -241,9 +242,9 @@ getGlobalAddressWrapper(SDValue GA, GlobalValue *GV, SelectionDAG &DAG)
 }
 
 SDValue XCoreTargetLowering::
-LowerGlobalAddress(SDValue Op, SelectionDAG &DAG)
+LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const
 {
-  GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal();
+  const GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal();
   SDValue GA = DAG.getTargetGlobalAddress(GV, MVT::i32);
   // If it's a debug information descriptor, don't mess with it.
   if (DAG.isVerifiedDebugInfoDesc(Op))
@@ -262,12 +263,12 @@ static inline bool isZeroLengthArray(const Type *Ty) {
 }
 
 SDValue XCoreTargetLowering::
-LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG)
+LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const
 {
   // FIXME there isn't really debug info here
   DebugLoc dl = Op.getDebugLoc();
   // transform to label + getid() * size
-  GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal();
+  const GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal();
   SDValue GA = DAG.getTargetGlobalAddress(GV, MVT::i32);
   const GlobalVariable *GVar = dyn_cast<GlobalVariable>(GV);
   if (!GVar) {
@@ -296,18 +297,18 @@ LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG)
 }
 
 SDValue XCoreTargetLowering::
-LowerBlockAddress(SDValue Op, SelectionDAG &DAG)
+LowerBlockAddress(SDValue Op, SelectionDAG &DAG) const
 {
   DebugLoc DL = Op.getDebugLoc();
 
-  BlockAddress *BA = cast<BlockAddressSDNode>(Op)->getBlockAddress();
+  const BlockAddress *BA = cast<BlockAddressSDNode>(Op)->getBlockAddress();
   SDValue Result = DAG.getBlockAddress(BA, getPointerTy(), /*isTarget=*/true);
 
   return DAG.getNode(XCoreISD::PCRelativeWrapper, DL, getPointerTy(), Result);
 }
 
 SDValue XCoreTargetLowering::
-LowerConstantPool(SDValue Op, SelectionDAG &DAG)
+LowerConstantPool(SDValue Op, SelectionDAG &DAG) const
 {
   ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(Op);
   // FIXME there isn't really debug info here
@@ -329,7 +330,7 @@ unsigned XCoreTargetLowering::getJumpTableEncoding() const {
 }
 
 SDValue XCoreTargetLowering::
-LowerBR_JT(SDValue Op, SelectionDAG &DAG)
+LowerBR_JT(SDValue Op, SelectionDAG &DAG) const
 {
   SDValue Chain = Op.getOperand(0);
   SDValue Table = Op.getOperand(1);
@@ -391,7 +392,7 @@ IsWordAlignedBasePlusConstantOffset(SDValue Addr, SDValue &AlignedBase,
 }
 
 SDValue XCoreTargetLowering::
-LowerLOAD(SDValue Op, SelectionDAG &DAG)
+LowerLOAD(SDValue Op, SelectionDAG &DAG) const
 {
   LoadSDNode *LD = cast<LoadSDNode>(Op);
   assert(LD->getExtensionType() == ISD::NON_EXTLOAD &&
@@ -494,7 +495,7 @@ LowerLOAD(SDValue Op, SelectionDAG &DAG)
 }
 
 SDValue XCoreTargetLowering::
-LowerSTORE(SDValue Op, SelectionDAG &DAG)
+LowerSTORE(SDValue Op, SelectionDAG &DAG) const
 {
   StoreSDNode *ST = cast<StoreSDNode>(Op);
   assert(!ST->isTruncatingStore() && "Unexpected store type");
@@ -554,7 +555,7 @@ LowerSTORE(SDValue Op, SelectionDAG &DAG)
 }
 
 SDValue XCoreTargetLowering::
-LowerSMUL_LOHI(SDValue Op, SelectionDAG &DAG)
+LowerSMUL_LOHI(SDValue Op, SelectionDAG &DAG) const
 {
   assert(Op.getValueType() == MVT::i32 && Op.getOpcode() == ISD::SMUL_LOHI &&
          "Unexpected operand to lower!");
@@ -571,7 +572,7 @@ LowerSMUL_LOHI(SDValue Op, SelectionDAG &DAG)
 }
 
 SDValue XCoreTargetLowering::
-LowerUMUL_LOHI(SDValue Op, SelectionDAG &DAG)
+LowerUMUL_LOHI(SDValue Op, SelectionDAG &DAG) const
 {
   assert(Op.getValueType() == MVT::i32 && Op.getOpcode() == ISD::UMUL_LOHI &&
          "Unexpected operand to lower!");
@@ -647,7 +648,7 @@ isADDADDMUL(SDValue Op, SDValue &Mul0, SDValue &Mul1, SDValue &Addend0,
 }
 
 SDValue XCoreTargetLowering::
-TryExpandADDWithMul(SDNode *N, SelectionDAG &DAG)
+TryExpandADDWithMul(SDNode *N, SelectionDAG &DAG) const
 {
   SDValue Mul;
   SDValue Other;
@@ -707,7 +708,7 @@ TryExpandADDWithMul(SDNode *N, SelectionDAG &DAG)
 }
 
 SDValue XCoreTargetLowering::
-ExpandADDSUB(SDNode *N, SelectionDAG &DAG)
+ExpandADDSUB(SDNode *N, SelectionDAG &DAG) const
 {
   assert(N->getValueType(0) == MVT::i64 &&
          (N->getOpcode() == ISD::ADD || N->getOpcode() == ISD::SUB) &&
@@ -747,7 +748,7 @@ ExpandADDSUB(SDNode *N, SelectionDAG &DAG)
 }
 
 SDValue XCoreTargetLowering::
-LowerVAARG(SDValue Op, SelectionDAG &DAG)
+LowerVAARG(SDValue Op, SelectionDAG &DAG) const
 {
   llvm_unreachable("unimplemented");
   // FIX Arguments passed by reference need a extra dereference.
@@ -769,7 +770,7 @@ LowerVAARG(SDValue Op, SelectionDAG &DAG)
 }
 
 SDValue XCoreTargetLowering::
-LowerVASTART(SDValue Op, SelectionDAG &DAG)
+LowerVASTART(SDValue Op, SelectionDAG &DAG) const
 {
   DebugLoc dl = Op.getDebugLoc();
   // vastart stores the address of the VarArgsFrameIndex slot into the
@@ -782,7 +783,8 @@ LowerVASTART(SDValue Op, SelectionDAG &DAG)
                       false, false, 0);
 }
 
-SDValue XCoreTargetLowering::LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) {
+SDValue XCoreTargetLowering::LowerFRAMEADDR(SDValue Op,
+                                            SelectionDAG &DAG) const {
   DebugLoc dl = Op.getDebugLoc();
   // Depths > 0 not supported yet! 
   if (cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue() > 0)
@@ -812,7 +814,7 @@ XCoreTargetLowering::LowerCall(SDValue Chain, SDValue Callee,
                                const SmallVectorImpl<ISD::OutputArg> &Outs,
                                const SmallVectorImpl<ISD::InputArg> &Ins,
                                DebugLoc dl, SelectionDAG &DAG,
-                               SmallVectorImpl<SDValue> &InVals) {
+                               SmallVectorImpl<SDValue> &InVals) const {
   // XCore target does not yet support tail call optimization.
   isTailCall = false;
 
@@ -839,7 +841,7 @@ XCoreTargetLowering::LowerCCCCallTo(SDValue Chain, SDValue Callee,
                                     const SmallVectorImpl<ISD::OutputArg> &Outs,
                                     const SmallVectorImpl<ISD::InputArg> &Ins,
                                     DebugLoc dl, SelectionDAG &DAG,
-                                    SmallVectorImpl<SDValue> &InVals) {
+                                    SmallVectorImpl<SDValue> &InVals) const {
 
   // Analyze operands of the call, assigning locations to each operand.
   SmallVector<CCValAssign, 16> ArgLocs;
@@ -962,7 +964,7 @@ XCoreTargetLowering::LowerCallResult(SDValue Chain, SDValue InFlag,
                                      CallingConv::ID CallConv, bool isVarArg,
                                      const SmallVectorImpl<ISD::InputArg> &Ins,
                                      DebugLoc dl, SelectionDAG &DAG,
-                                     SmallVectorImpl<SDValue> &InVals) {
+                                     SmallVectorImpl<SDValue> &InVals) const {
 
   // Assign locations to each value returned by this call.
   SmallVector<CCValAssign, 16> RVLocs;
@@ -994,7 +996,8 @@ XCoreTargetLowering::LowerFormalArguments(SDValue Chain,
                                       const SmallVectorImpl<ISD::InputArg> &Ins,
                                           DebugLoc dl,
                                           SelectionDAG &DAG,
-                                          SmallVectorImpl<SDValue> &InVals) {
+                                          SmallVectorImpl<SDValue> &InVals)
+                                            const {
   switch (CallConv)
   {
     default:
@@ -1018,7 +1021,7 @@ XCoreTargetLowering::LowerCCCArguments(SDValue Chain,
                                          &Ins,
                                        DebugLoc dl,
                                        SelectionDAG &DAG,
-                                       SmallVectorImpl<SDValue> &InVals) {
+                                       SmallVectorImpl<SDValue> &InVals) const {
   MachineFunction &MF = DAG.getMachineFunction();
   MachineFrameInfo *MFI = MF.getFrameInfo();
   MachineRegisterInfo &RegInfo = MF.getRegInfo();
@@ -1132,7 +1135,7 @@ bool XCoreTargetLowering::
 CanLowerReturn(CallingConv::ID CallConv, bool isVarArg,
                const SmallVectorImpl<EVT> &OutTys,
                const SmallVectorImpl<ISD::ArgFlagsTy> &ArgsFlags,
-               SelectionDAG &DAG) {
+               SelectionDAG &DAG) const {
   SmallVector<CCValAssign, 16> RVLocs;
   CCState CCInfo(CallConv, isVarArg, getTargetMachine(),
                  RVLocs, *DAG.getContext());
@@ -1143,7 +1146,7 @@ SDValue
 XCoreTargetLowering::LowerReturn(SDValue Chain,
                                  CallingConv::ID CallConv, bool isVarArg,
                                  const SmallVectorImpl<ISD::OutputArg> &Outs,
-                                 DebugLoc dl, SelectionDAG &DAG) {
+                                 DebugLoc dl, SelectionDAG &DAG) const {
 
   // CCValAssign - represent the assignment of
   // the return value to a location
@@ -1194,8 +1197,7 @@ XCoreTargetLowering::LowerReturn(SDValue Chain,
 
 MachineBasicBlock *
 XCoreTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
-                                                 MachineBasicBlock *BB,
-                   DenseMap<MachineBasicBlock*, MachineBasicBlock*> *EM) const {
+                                                 MachineBasicBlock *BB) const {
   const TargetInstrInfo &TII = *getTargetMachine().getInstrInfo();
   DebugLoc dl = MI->getDebugLoc();
   assert((MI->getOpcode() == XCore::SELECT_CC) &&
@@ -1225,12 +1227,9 @@ XCoreTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
   F->insert(It, sinkMBB);
   // Update machine-CFG edges by first adding all successors of the current
   // block to the new block which will contain the Phi node for the select.
-  // Also inform sdisel of the edge changes.
   for (MachineBasicBlock::succ_iterator I = BB->succ_begin(), 
-         E = BB->succ_end(); I != E; ++I) {
-    EM->insert(std::make_pair(*I, sinkMBB));
+         E = BB->succ_end(); I != E; ++I)
     sinkMBB->addSuccessor(*I);
-  }
   // Next, remove all successors of the current block, and add the true
   // and fallthrough blocks as its successors.
   while (!BB->succ_empty())
diff --git a/lib/Target/XCore/XCoreISelLowering.h b/lib/Target/XCore/XCoreISelLowering.h
index 3ccdeec141bb..d8d2a3aa7315 100644
--- a/lib/Target/XCore/XCoreISelLowering.h
+++ b/lib/Target/XCore/XCoreISelLowering.h
@@ -83,21 +83,21 @@ namespace llvm {
     virtual unsigned getJumpTableEncoding() const;
 
     /// LowerOperation - Provide custom lowering hooks for some operations.
-    virtual SDValue LowerOperation(SDValue Op, SelectionDAG &DAG);
+    virtual SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const;
 
     /// ReplaceNodeResults - Replace the results of node with an illegal result
     /// type with new values built out of custom code.
     ///
     virtual void ReplaceNodeResults(SDNode *N, SmallVectorImpl<SDValue>&Results,
-                                    SelectionDAG &DAG);
+                                    SelectionDAG &DAG) const;
 
     /// getTargetNodeName - This method returns the name of a target specific 
     //  DAG node.
     virtual const char *getTargetNodeName(unsigned Opcode) const;
   
-    virtual MachineBasicBlock *EmitInstrWithCustomInserter(MachineInstr *MI,
-                                                         MachineBasicBlock *MBB,
-                    DenseMap<MachineBasicBlock*, MachineBasicBlock*> *EM) const;
+    virtual MachineBasicBlock *
+      EmitInstrWithCustomInserter(MachineInstr *MI,
+                                  MachineBasicBlock *MBB) const;
 
     virtual bool isLegalAddressingMode(const AddrMode &AM,
                                        const Type *Ty) const;
@@ -115,37 +115,37 @@ namespace llvm {
                               bool isVarArg,
                               const SmallVectorImpl<ISD::InputArg> &Ins,
                               DebugLoc dl, SelectionDAG &DAG,
-                              SmallVectorImpl<SDValue> &InVals);
+                              SmallVectorImpl<SDValue> &InVals) const;
     SDValue LowerCCCCallTo(SDValue Chain, SDValue Callee,
                            CallingConv::ID CallConv, bool isVarArg,
                            bool isTailCall,
                            const SmallVectorImpl<ISD::OutputArg> &Outs,
                            const SmallVectorImpl<ISD::InputArg> &Ins,
                            DebugLoc dl, SelectionDAG &DAG,
-                           SmallVectorImpl<SDValue> &InVals);
+                           SmallVectorImpl<SDValue> &InVals) const;
     SDValue LowerCallResult(SDValue Chain, SDValue InFlag,
                             CallingConv::ID CallConv, bool isVarArg,
                             const SmallVectorImpl<ISD::InputArg> &Ins,
                             DebugLoc dl, SelectionDAG &DAG,
-                            SmallVectorImpl<SDValue> &InVals);
-    SDValue getReturnAddressFrameIndex(SelectionDAG &DAG);
-    SDValue getGlobalAddressWrapper(SDValue GA, GlobalValue *GV,
-                                    SelectionDAG &DAG);
+                            SmallVectorImpl<SDValue> &InVals) const;
+    SDValue getReturnAddressFrameIndex(SelectionDAG &DAG) const;
+    SDValue getGlobalAddressWrapper(SDValue GA, const GlobalValue *GV,
+                                    SelectionDAG &DAG) const;
 
     // Lower Operand specifics
-    SDValue LowerLOAD(SDValue Op, SelectionDAG &DAG);
-    SDValue LowerSTORE(SDValue Op, SelectionDAG &DAG);
-    SDValue LowerGlobalAddress(SDValue Op, SelectionDAG &DAG);
-    SDValue LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG);
-    SDValue LowerBlockAddress(SDValue Op, SelectionDAG &DAG);
-    SDValue LowerConstantPool(SDValue Op, SelectionDAG &DAG);
-    SDValue LowerBR_JT(SDValue Op, SelectionDAG &DAG);
-    SDValue LowerSELECT_CC(SDValue Op, SelectionDAG &DAG);
-    SDValue LowerVAARG(SDValue Op, SelectionDAG &DAG);
-    SDValue LowerVASTART(SDValue Op, SelectionDAG &DAG);
-    SDValue LowerUMUL_LOHI(SDValue Op, SelectionDAG &DAG);
-    SDValue LowerSMUL_LOHI(SDValue Op, SelectionDAG &DAG);
-    SDValue LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG);
+    SDValue LowerLOAD(SDValue Op, SelectionDAG &DAG) const;
+    SDValue LowerSTORE(SDValue Op, SelectionDAG &DAG) const;
+    SDValue LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const;
+    SDValue LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const;
+    SDValue LowerBlockAddress(SDValue Op, SelectionDAG &DAG) const;
+    SDValue LowerConstantPool(SDValue Op, SelectionDAG &DAG) const;
+    SDValue LowerBR_JT(SDValue Op, SelectionDAG &DAG) const;
+    SDValue LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const;
+    SDValue LowerVAARG(SDValue Op, SelectionDAG &DAG) const;
+    SDValue LowerVASTART(SDValue Op, SelectionDAG &DAG) const;
+    SDValue LowerUMUL_LOHI(SDValue Op, SelectionDAG &DAG) const;
+    SDValue LowerSMUL_LOHI(SDValue Op, SelectionDAG &DAG) const;
+    SDValue LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const;
   
     // Inline asm support
     std::vector<unsigned>
@@ -153,8 +153,8 @@ namespace llvm {
               EVT VT) const;
   
     // Expand specifics
-    SDValue TryExpandADDWithMul(SDNode *Op, SelectionDAG &DAG);
-    SDValue ExpandADDSUB(SDNode *Op, SelectionDAG &DAG);
+    SDValue TryExpandADDWithMul(SDNode *Op, SelectionDAG &DAG) const;
+    SDValue ExpandADDSUB(SDNode *Op, SelectionDAG &DAG) const;
 
     virtual SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const;
 
@@ -171,7 +171,7 @@ namespace llvm {
                            bool isVarArg,
                            const SmallVectorImpl<ISD::InputArg> &Ins,
                            DebugLoc dl, SelectionDAG &DAG,
-                           SmallVectorImpl<SDValue> &InVals);
+                           SmallVectorImpl<SDValue> &InVals) const;
 
     virtual SDValue
       LowerCall(SDValue Chain, SDValue Callee,
@@ -180,19 +180,19 @@ namespace llvm {
                 const SmallVectorImpl<ISD::OutputArg> &Outs,
                 const SmallVectorImpl<ISD::InputArg> &Ins,
                 DebugLoc dl, SelectionDAG &DAG,
-                SmallVectorImpl<SDValue> &InVals);
+                SmallVectorImpl<SDValue> &InVals) const;
 
     virtual SDValue
       LowerReturn(SDValue Chain,
                   CallingConv::ID CallConv, bool isVarArg,
                   const SmallVectorImpl<ISD::OutputArg> &Outs,
-                  DebugLoc dl, SelectionDAG &DAG);
+                  DebugLoc dl, SelectionDAG &DAG) const;
 
     virtual bool
       CanLowerReturn(CallingConv::ID CallConv, bool isVarArg,
                      const SmallVectorImpl<EVT> &OutTys,
                      const SmallVectorImpl<ISD::ArgFlagsTy> &ArgsFlags,
-                     SelectionDAG &DAG);
+                     SelectionDAG &DAG) const;
   };
 }
 
diff --git a/lib/Target/XCore/XCoreRegisterInfo.cpp b/lib/Target/XCore/XCoreRegisterInfo.cpp
index ab71d0535444..0cfb358617e8 100644
--- a/lib/Target/XCore/XCoreRegisterInfo.cpp
+++ b/lib/Target/XCore/XCoreRegisterInfo.cpp
@@ -113,7 +113,7 @@ XCoreRegisterInfo::requiresRegisterScavenging(const MachineFunction &MF) const {
 }
 
 bool XCoreRegisterInfo::hasFP(const MachineFunction &MF) const {
-  return NoFramePointerElim || MF.getFrameInfo()->hasVarSizedObjects();
+  return DisableFramePointerElim(MF) || MF.getFrameInfo()->hasVarSizedObjects();
 }
 
 // This function eliminates ADJCALLSTACKDOWN,
@@ -225,12 +225,9 @@ XCoreRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
     unsigned FramePtr = XCore::R10;
     
     if (!isUs) {
-      if (!RS) {
-        std::string msg;
-        raw_string_ostream Msg(msg);
-        Msg << "eliminateFrameIndex Frame size too big: " << Offset;
-        llvm_report_error(Msg.str());
-      }
+      if (!RS)
+        report_fatal_error("eliminateFrameIndex Frame size too big: " +
+                           Twine(Offset));
       unsigned ScratchReg = RS->scavengeRegister(XCore::GRRegsRegisterClass, II,
                                                  SPAdj);
       loadConstant(MBB, II, ScratchReg, Offset, dl);
@@ -278,12 +275,9 @@ XCoreRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
     }
   } else {
     bool isU6 = isImmU6(Offset);
-    if (!isU6 && !isImmU16(Offset)) {
-      std::string msg;
-      raw_string_ostream Msg(msg);
-      Msg << "eliminateFrameIndex Frame size too big: " << Offset;
-      llvm_report_error(Msg.str());
-    }
+    if (!isU6 && !isImmU16(Offset))
+      report_fatal_error("eliminateFrameIndex Frame size too big: " +
+                         Twine(Offset));
 
     switch (MI.getOpcode()) {
     int NewOpcode;
@@ -360,10 +354,7 @@ loadConstant(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
   // TODO use mkmsk if possible.
   if (!isImmU16(Value)) {
     // TODO use constant pool.
-    std::string msg;
-    raw_string_ostream Msg(msg);
-    Msg << "loadConstant value too big " << Value;
-    llvm_report_error(Msg.str());
+    report_fatal_error("loadConstant value too big " + Twine(Value));
   }
   int Opcode = isImmU6(Value) ? XCore::LDC_ru6 : XCore::LDC_lru6;
   BuildMI(MBB, I, dl, TII.get(Opcode), DstReg).addImm(Value);
@@ -375,12 +366,8 @@ storeToStack(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
   assert(Offset%4 == 0 && "Misaligned stack offset");
   Offset/=4;
   bool isU6 = isImmU6(Offset);
-  if (!isU6 && !isImmU16(Offset)) {
-    std::string msg;
-    raw_string_ostream Msg(msg);
-    Msg << "storeToStack offset too big " << Offset;
-    llvm_report_error(Msg.str());
-  }
+  if (!isU6 && !isImmU16(Offset))
+    report_fatal_error("storeToStack offset too big " + Twine(Offset));
   int Opcode = isU6 ? XCore::STWSP_ru6 : XCore::STWSP_lru6;
   BuildMI(MBB, I, dl, TII.get(Opcode))
     .addReg(SrcReg)
@@ -393,12 +380,8 @@ loadFromStack(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
   assert(Offset%4 == 0 && "Misaligned stack offset");
   Offset/=4;
   bool isU6 = isImmU6(Offset);
-  if (!isU6 && !isImmU16(Offset)) {
-    std::string msg;
-    raw_string_ostream Msg(msg);
-    Msg << "loadFromStack offset too big " << Offset;
-    llvm_report_error(Msg.str());
-  }
+  if (!isU6 && !isImmU16(Offset))
+    report_fatal_error("loadFromStack offset too big " + Twine(Offset));
   int Opcode = isU6 ? XCore::LDWSP_ru6 : XCore::LDWSP_lru6;
   BuildMI(MBB, I, dl, TII.get(Opcode), DstReg)
     .addImm(Offset);
@@ -425,10 +408,7 @@ void XCoreRegisterInfo::emitPrologue(MachineFunction &MF) const {
 
   if (!isU6 && !isImmU16(FrameSize)) {
     // FIXME could emit multiple instructions.
-    std::string msg;
-    raw_string_ostream Msg(msg);
-    Msg << "emitPrologue Frame size too big: " << FrameSize;
-    llvm_report_error(Msg.str());
+    report_fatal_error("emitPrologue Frame size too big: " + Twine(FrameSize));
   }
   bool emitFrameMoves = needsFrameMoves(MF);
 
@@ -549,10 +529,7 @@ void XCoreRegisterInfo::emitEpilogue(MachineFunction &MF,
 
   if (!isU6 && !isImmU16(FrameSize)) {
     // FIXME could emit multiple instructions.
-    std::string msg;
-    raw_string_ostream Msg(msg);
-    Msg << "emitEpilogue Frame size too big: " << FrameSize;
-    llvm_report_error(Msg.str());
+    report_fatal_error("emitEpilogue Frame size too big: " + Twine(FrameSize));
   }
 
   if (FrameSize) {
diff --git a/lib/Target/XCore/XCoreSelectionDAGInfo.cpp b/lib/Target/XCore/XCoreSelectionDAGInfo.cpp
new file mode 100644
index 000000000000..6aac23753101
--- /dev/null
+++ b/lib/Target/XCore/XCoreSelectionDAGInfo.cpp
@@ -0,0 +1,22 @@
+//===-- XCoreSelectionDAGInfo.cpp - XCore SelectionDAG Info ---------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the XCoreSelectionDAGInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "xcore-selectiondag-info"
+#include "XCoreSelectionDAGInfo.h"
+using namespace llvm;
+
+XCoreSelectionDAGInfo::XCoreSelectionDAGInfo() {
+}
+
+XCoreSelectionDAGInfo::~XCoreSelectionDAGInfo() {
+}
diff --git a/lib/Target/XCore/XCoreSelectionDAGInfo.h b/lib/Target/XCore/XCoreSelectionDAGInfo.h
new file mode 100644
index 000000000000..fd9671681fc6
--- /dev/null
+++ b/lib/Target/XCore/XCoreSelectionDAGInfo.h
@@ -0,0 +1,29 @@
+//===-- XCoreSelectionDAGInfo.h - XCore SelectionDAG Info -------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the XCore subclass for TargetSelectionDAGInfo.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef XCORESELECTIONDAGINFO_H
+#define XCORESELECTIONDAGINFO_H
+
+#include "llvm/Target/TargetSelectionDAGInfo.h"
+
+namespace llvm {
+
+class XCoreSelectionDAGInfo : public TargetSelectionDAGInfo {
+public:
+  XCoreSelectionDAGInfo();
+  ~XCoreSelectionDAGInfo();
+};
+
+}
+
+#endif
diff --git a/lib/Target/XCore/XCoreTargetMachine.h b/lib/Target/XCore/XCoreTargetMachine.h
index b0b1464dbe0c..701a6f1dfafc 100644
--- a/lib/Target/XCore/XCoreTargetMachine.h
+++ b/lib/Target/XCore/XCoreTargetMachine.h
@@ -36,8 +36,8 @@ public:
   virtual const XCoreInstrInfo *getInstrInfo() const { return &InstrInfo; }
   virtual const XCoreFrameInfo *getFrameInfo() const { return &FrameInfo; }
   virtual const XCoreSubtarget *getSubtargetImpl() const { return &Subtarget; }
-  virtual       XCoreTargetLowering *getTargetLowering() const {
-    return const_cast<XCoreTargetLowering*>(&TLInfo);
+  virtual const XCoreTargetLowering *getTargetLowering() const {
+    return &TLInfo;
   }
 
   virtual const TargetRegisterInfo *getRegisterInfo() const {
diff --git a/lib/Target/XCore/XCoreTargetObjectFile.cpp b/lib/Target/XCore/XCoreTargetObjectFile.cpp
index 7de3b55d38f6..cdf5a5371e22 100644
--- a/lib/Target/XCore/XCoreTargetObjectFile.cpp
+++ b/lib/Target/XCore/XCoreTargetObjectFile.cpp
@@ -9,7 +9,8 @@
 
 #include "XCoreTargetObjectFile.h"
 #include "XCoreSubtarget.h"
-#include "MCSectionXCore.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCSectionELF.h"
 #include "llvm/Target/TargetMachine.h"
 using namespace llvm;
 
@@ -18,34 +19,31 @@ void XCoreTargetObjectFile::Initialize(MCContext &Ctx, const TargetMachine &TM){
   TargetLoweringObjectFileELF::Initialize(Ctx, TM);
 
   DataSection =
-    MCSectionXCore::Create(".dp.data", MCSectionELF::SHT_PROGBITS, 
-                           MCSectionELF::SHF_ALLOC | MCSectionELF::SHF_WRITE |
-                           MCSectionXCore::SHF_DP_SECTION,
-                           SectionKind::getDataRel(), false, getContext());
+    Ctx.getELFSection(".dp.data", MCSectionELF::SHT_PROGBITS, 
+                      MCSectionELF::SHF_ALLOC | MCSectionELF::SHF_WRITE |
+                      MCSectionELF::XCORE_SHF_DP_SECTION,
+                      SectionKind::getDataRel(), false);
   BSSSection =
-    MCSectionXCore::Create(".dp.bss", MCSectionELF::SHT_NOBITS,
-                           MCSectionELF::SHF_ALLOC | MCSectionELF::SHF_WRITE |
-                           MCSectionXCore::SHF_DP_SECTION,
-                           SectionKind::getBSS(), false, getContext());
+    Ctx.getELFSection(".dp.bss", MCSectionELF::SHT_NOBITS,
+                      MCSectionELF::SHF_ALLOC | MCSectionELF::SHF_WRITE |
+                      MCSectionELF::XCORE_SHF_DP_SECTION,
+                      SectionKind::getBSS(), false);
   
   MergeableConst4Section = 
-    MCSectionXCore::Create(".cp.rodata.cst4", MCSectionELF::SHT_PROGBITS,
-                           MCSectionELF::SHF_ALLOC | MCSectionELF::SHF_MERGE |
-                           MCSectionXCore::SHF_CP_SECTION,
-                           SectionKind::getMergeableConst4(), false,
-                           getContext());
+    Ctx.getELFSection(".cp.rodata.cst4", MCSectionELF::SHT_PROGBITS,
+                      MCSectionELF::SHF_ALLOC | MCSectionELF::SHF_MERGE |
+                      MCSectionELF::XCORE_SHF_CP_SECTION,
+                      SectionKind::getMergeableConst4(), false);
   MergeableConst8Section = 
-    MCSectionXCore::Create(".cp.rodata.cst8", MCSectionELF::SHT_PROGBITS,
-                           MCSectionELF::SHF_ALLOC | MCSectionELF::SHF_MERGE |
-                           MCSectionXCore::SHF_CP_SECTION,
-                           SectionKind::getMergeableConst8(), false,
-                           getContext());
+    Ctx.getELFSection(".cp.rodata.cst8", MCSectionELF::SHT_PROGBITS,
+                      MCSectionELF::SHF_ALLOC | MCSectionELF::SHF_MERGE |
+                      MCSectionELF::XCORE_SHF_CP_SECTION,
+                      SectionKind::getMergeableConst8(), false);
   MergeableConst16Section = 
-    MCSectionXCore::Create(".cp.rodata.cst16", MCSectionELF::SHT_PROGBITS,
-                           MCSectionELF::SHF_ALLOC | MCSectionELF::SHF_MERGE |
-                           MCSectionXCore::SHF_CP_SECTION,
-                           SectionKind::getMergeableConst16(), false,
-                           getContext());
+    Ctx.getELFSection(".cp.rodata.cst16", MCSectionELF::SHT_PROGBITS,
+                      MCSectionELF::SHF_ALLOC | MCSectionELF::SHF_MERGE |
+                      MCSectionELF::XCORE_SHF_CP_SECTION,
+                      SectionKind::getMergeableConst16(), false);
   
   // TLS globals are lowered in the backend to arrays indexed by the current
   // thread id. After lowering they require no special handling by the linker
@@ -54,11 +52,10 @@ void XCoreTargetObjectFile::Initialize(MCContext &Ctx, const TargetMachine &TM){
   TLSBSSSection = BSSSection;
 
   ReadOnlySection = 
-    MCSectionXCore::Create(".cp.rodata", MCSectionELF::SHT_PROGBITS,
-                           MCSectionELF::SHF_ALLOC |
-                           MCSectionXCore::SHF_CP_SECTION,
-                           SectionKind::getReadOnlyWithRel(), false,
-                           getContext());
+    Ctx.getELFSection(".cp.rodata", MCSectionELF::SHT_PROGBITS,
+                      MCSectionELF::SHF_ALLOC |
+                      MCSectionELF::XCORE_SHF_CP_SECTION,
+                      SectionKind::getReadOnlyWithRel(), false);
 
   // Dynamic linking is not supported. Data with relocations is placed in the
   // same section as data without relocations.
diff --git a/lib/Transforms/IPO/ArgumentPromotion.cpp b/lib/Transforms/IPO/ArgumentPromotion.cpp
index 40a87e880d7b..89f213e2ac36 100644
--- a/lib/Transforms/IPO/ArgumentPromotion.cpp
+++ b/lib/Transforms/IPO/ArgumentPromotion.cpp
@@ -64,7 +64,7 @@ namespace {
       CallGraphSCCPass::getAnalysisUsage(AU);
     }
 
-    virtual bool runOnSCC(std::vector<CallGraphNode *> &SCC);
+    virtual bool runOnSCC(CallGraphSCC &SCC);
     static char ID; // Pass identification, replacement for typeid
     explicit ArgPromotion(unsigned maxElements = 3)
       : CallGraphSCCPass(&ID), maxElements(maxElements) {}
@@ -91,20 +91,21 @@ Pass *llvm::createArgumentPromotionPass(unsigned maxElements) {
   return new ArgPromotion(maxElements);
 }
 
-bool ArgPromotion::runOnSCC(std::vector<CallGraphNode *> &SCC) {
+bool ArgPromotion::runOnSCC(CallGraphSCC &SCC) {
   bool Changed = false, LocalChange;
 
   do {  // Iterate until we stop promoting from this SCC.
     LocalChange = false;
     // Attempt to promote arguments from all functions in this SCC.
-    for (unsigned i = 0, e = SCC.size(); i != e; ++i)
-      if (CallGraphNode *CGN = PromoteArguments(SCC[i])) {
+    for (CallGraphSCC::iterator I = SCC.begin(), E = SCC.end(); I != E; ++I) {
+      if (CallGraphNode *CGN = PromoteArguments(*I)) {
         LocalChange = true;
-        SCC[i] = CGN;
+        SCC.ReplaceNode(*I, CGN);
       }
+    }
     Changed |= LocalChange;               // Remember that we changed something.
   } while (LocalChange);
-
+  
   return Changed;
 }
 
@@ -873,8 +874,14 @@ CallGraphNode *ArgPromotion::DoPromotion(Function *F,
   
   NF_CGN->stealCalledFunctionsFrom(CG[F]);
   
-  // Now that the old function is dead, delete it.
-  delete CG.removeFunctionFromModule(F);
+  // Now that the old function is dead, delete it.  If there is a dangling
+  // reference to the CallgraphNode, just leave the dead function around for
+  // someone else to nuke.
+  CallGraphNode *CGN = CG[F];
+  if (CGN->getNumReferences() == 0)
+    delete CG.removeFunctionFromModule(CGN);
+  else
+    F->setLinkage(Function::ExternalLinkage);
   
   return NF_CGN;
 }
diff --git a/lib/Transforms/IPO/CMakeLists.txt b/lib/Transforms/IPO/CMakeLists.txt
index 92bef3bb75e9..65483e8fed63 100644
--- a/lib/Transforms/IPO/CMakeLists.txt
+++ b/lib/Transforms/IPO/CMakeLists.txt
@@ -23,3 +23,5 @@ add_llvm_library(LLVMipo
   StripSymbols.cpp
   StructRetPromotion.cpp
   )
+
+target_link_libraries (LLVMipo LLVMScalarOpts LLVMInstCombine)
diff --git a/lib/Transforms/IPO/DeadArgumentElimination.cpp b/lib/Transforms/IPO/DeadArgumentElimination.cpp
index 227602d19f56..6443dd4f47a6 100644
--- a/lib/Transforms/IPO/DeadArgumentElimination.cpp
+++ b/lib/Transforms/IPO/DeadArgumentElimination.cpp
@@ -243,6 +243,9 @@ bool DAE::DeleteDeadVarargs(Function &Fn) {
       if (cast<CallInst>(Call)->isTailCall())
         cast<CallInst>(New)->setTailCall();
     }
+    if (MDNode *N = Call->getDbgMetadata())
+      New->setDbgMetadata(N);
+
     Args.clear();
 
     if (!Call->use_empty())
@@ -694,18 +697,6 @@ bool DAE::RemoveDeadStuffFromFunction(Function *F) {
   AttrListPtr NewPAL = AttrListPtr::get(AttributesVec.begin(),
                                         AttributesVec.end());
 
-  // Work around LLVM bug PR56: the CWriter cannot emit varargs functions which
-  // have zero fixed arguments.
-  //
-  // Note that we apply this hack for a vararg fuction that does not have any
-  // arguments anymore, but did have them before (so don't bother fixing
-  // functions that were already broken wrt CWriter).
-  bool ExtraArgHack = false;
-  if (Params.empty() && FTy->isVarArg() && FTy->getNumParams() != 0) {
-    ExtraArgHack = true;
-    Params.push_back(Type::getInt32Ty(F->getContext()));
-  }
-
   // Create the new function type based on the recomputed parameters.
   FunctionType *NFTy = FunctionType::get(NRetTy, Params, FTy->isVarArg());
 
@@ -755,9 +746,6 @@ bool DAE::RemoveDeadStuffFromFunction(Function *F) {
           AttributesVec.push_back(AttributeWithIndex::get(Args.size(), Attrs));
       }
 
-    if (ExtraArgHack)
-      Args.push_back(UndefValue::get(Type::getInt32Ty(F->getContext())));
-
     // Push any varargs arguments on the list. Don't forget their attributes.
     for (CallSite::arg_iterator E = CS.arg_end(); I != E; ++I, ++i) {
       Args.push_back(*I);
@@ -785,6 +773,9 @@ bool DAE::RemoveDeadStuffFromFunction(Function *F) {
       if (cast<CallInst>(Call)->isTailCall())
         cast<CallInst>(New)->setTailCall();
     }
+    if (MDNode *N = Call->getDbgMetadata())
+      New->setDbgMetadata(N);
+
     Args.clear();
 
     if (!Call->use_empty()) {
diff --git a/lib/Transforms/IPO/FunctionAttrs.cpp b/lib/Transforms/IPO/FunctionAttrs.cpp
index 298d5cf39162..9bd7af61c531 100644
--- a/lib/Transforms/IPO/FunctionAttrs.cpp
+++ b/lib/Transforms/IPO/FunctionAttrs.cpp
@@ -44,20 +44,20 @@ namespace {
     FunctionAttrs() : CallGraphSCCPass(&ID) {}
 
     // runOnSCC - Analyze the SCC, performing the transformation if possible.
-    bool runOnSCC(std::vector<CallGraphNode *> &SCC);
+    bool runOnSCC(CallGraphSCC &SCC);
 
     // AddReadAttrs - Deduce readonly/readnone attributes for the SCC.
-    bool AddReadAttrs(const std::vector<CallGraphNode *> &SCC);
+    bool AddReadAttrs(const CallGraphSCC &SCC);
 
     // AddNoCaptureAttrs - Deduce nocapture attributes for the SCC.
-    bool AddNoCaptureAttrs(const std::vector<CallGraphNode *> &SCC);
+    bool AddNoCaptureAttrs(const CallGraphSCC &SCC);
 
     // IsFunctionMallocLike - Does this function allocate new memory?
     bool IsFunctionMallocLike(Function *F,
                               SmallPtrSet<Function*, 8> &) const;
 
     // AddNoAliasAttrs - Deduce noalias attributes for the SCC.
-    bool AddNoAliasAttrs(const std::vector<CallGraphNode *> &SCC);
+    bool AddNoAliasAttrs(const CallGraphSCC &SCC);
 
     virtual void getAnalysisUsage(AnalysisUsage &AU) const {
       AU.setPreservesCFG();
@@ -123,19 +123,19 @@ bool FunctionAttrs::PointsToLocalMemory(Value *V) {
 }
 
 /// AddReadAttrs - Deduce readonly/readnone attributes for the SCC.
-bool FunctionAttrs::AddReadAttrs(const std::vector<CallGraphNode *> &SCC) {
+bool FunctionAttrs::AddReadAttrs(const CallGraphSCC &SCC) {
   SmallPtrSet<Function*, 8> SCCNodes;
 
   // Fill SCCNodes with the elements of the SCC.  Used for quickly
   // looking up whether a given CallGraphNode is in this SCC.
-  for (unsigned i = 0, e = SCC.size(); i != e; ++i)
-    SCCNodes.insert(SCC[i]->getFunction());
+  for (CallGraphSCC::iterator I = SCC.begin(), E = SCC.end(); I != E; ++I)
+    SCCNodes.insert((*I)->getFunction());
 
   // Check if any of the functions in the SCC read or write memory.  If they
   // write memory then they can't be marked readnone or readonly.
   bool ReadsMemory = false;
-  for (unsigned i = 0, e = SCC.size(); i != e; ++i) {
-    Function *F = SCC[i]->getFunction();
+  for (CallGraphSCC::iterator I = SCC.begin(), E = SCC.end(); I != E; ++I) {
+    Function *F = (*I)->getFunction();
 
     if (F == 0)
       // External node - may write memory.  Just give up.
@@ -210,8 +210,8 @@ bool FunctionAttrs::AddReadAttrs(const std::vector<CallGraphNode *> &SCC) {
   // Success!  Functions in this SCC do not access memory, or only read memory.
   // Give them the appropriate attribute.
   bool MadeChange = false;
-  for (unsigned i = 0, e = SCC.size(); i != e; ++i) {
-    Function *F = SCC[i]->getFunction();
+  for (CallGraphSCC::iterator I = SCC.begin(), E = SCC.end(); I != E; ++I) {
+    Function *F = (*I)->getFunction();
 
     if (F->doesNotAccessMemory())
       // Already perfect!
@@ -239,13 +239,13 @@ bool FunctionAttrs::AddReadAttrs(const std::vector<CallGraphNode *> &SCC) {
 }
 
 /// AddNoCaptureAttrs - Deduce nocapture attributes for the SCC.
-bool FunctionAttrs::AddNoCaptureAttrs(const std::vector<CallGraphNode *> &SCC) {
+bool FunctionAttrs::AddNoCaptureAttrs(const CallGraphSCC &SCC) {
   bool Changed = false;
 
   // Check each function in turn, determining which pointer arguments are not
   // captured.
-  for (unsigned i = 0, e = SCC.size(); i != e; ++i) {
-    Function *F = SCC[i]->getFunction();
+  for (CallGraphSCC::iterator I = SCC.begin(), E = SCC.end(); I != E; ++I) {
+    Function *F = (*I)->getFunction();
 
     if (F == 0)
       // External node - skip it;
@@ -334,18 +334,18 @@ bool FunctionAttrs::IsFunctionMallocLike(Function *F,
 }
 
 /// AddNoAliasAttrs - Deduce noalias attributes for the SCC.
-bool FunctionAttrs::AddNoAliasAttrs(const std::vector<CallGraphNode *> &SCC) {
+bool FunctionAttrs::AddNoAliasAttrs(const CallGraphSCC &SCC) {
   SmallPtrSet<Function*, 8> SCCNodes;
 
   // Fill SCCNodes with the elements of the SCC.  Used for quickly
   // looking up whether a given CallGraphNode is in this SCC.
-  for (unsigned i = 0, e = SCC.size(); i != e; ++i)
-    SCCNodes.insert(SCC[i]->getFunction());
+  for (CallGraphSCC::iterator I = SCC.begin(), E = SCC.end(); I != E; ++I)
+    SCCNodes.insert((*I)->getFunction());
 
   // Check each function in turn, determining which functions return noalias
   // pointers.
-  for (unsigned i = 0, e = SCC.size(); i != e; ++i) {
-    Function *F = SCC[i]->getFunction();
+  for (CallGraphSCC::iterator I = SCC.begin(), E = SCC.end(); I != E; ++I) {
+    Function *F = (*I)->getFunction();
 
     if (F == 0)
       // External node - skip it;
@@ -370,8 +370,8 @@ bool FunctionAttrs::AddNoAliasAttrs(const std::vector<CallGraphNode *> &SCC) {
   }
 
   bool MadeChange = false;
-  for (unsigned i = 0, e = SCC.size(); i != e; ++i) {
-    Function *F = SCC[i]->getFunction();
+  for (CallGraphSCC::iterator I = SCC.begin(), E = SCC.end(); I != E; ++I) {
+    Function *F = (*I)->getFunction();
     if (F->doesNotAlias(0) || !F->getReturnType()->isPointerTy())
       continue;
 
@@ -383,7 +383,7 @@ bool FunctionAttrs::AddNoAliasAttrs(const std::vector<CallGraphNode *> &SCC) {
   return MadeChange;
 }
 
-bool FunctionAttrs::runOnSCC(std::vector<CallGraphNode *> &SCC) {
+bool FunctionAttrs::runOnSCC(CallGraphSCC &SCC) {
   bool Changed = AddReadAttrs(SCC);
   Changed |= AddNoCaptureAttrs(SCC);
   Changed |= AddNoAliasAttrs(SCC);
diff --git a/lib/Transforms/IPO/GlobalOpt.cpp b/lib/Transforms/IPO/GlobalOpt.cpp
index ddff5ef8b36c..b429213a7db3 100644
--- a/lib/Transforms/IPO/GlobalOpt.cpp
+++ b/lib/Transforms/IPO/GlobalOpt.cpp
@@ -143,7 +143,8 @@ struct GlobalStatus {
 static bool SafeToDestroyConstant(const Constant *C) {
   if (isa<GlobalValue>(C)) return false;
 
-  for (Value::const_use_iterator UI = C->use_begin(), E = C->use_end(); UI != E; ++UI)
+  for (Value::const_use_iterator UI = C->use_begin(), E = C->use_end(); UI != E;
+       ++UI)
     if (const Constant *CU = dyn_cast<Constant>(*UI)) {
       if (!SafeToDestroyConstant(CU)) return false;
     } else
@@ -158,7 +159,8 @@ static bool SafeToDestroyConstant(const Constant *C) {
 ///
 static bool AnalyzeGlobal(const Value *V, GlobalStatus &GS,
                           SmallPtrSet<const PHINode*, 16> &PHIUsers) {
-  for (Value::const_use_iterator UI = V->use_begin(), E = V->use_end(); UI != E; ++UI)
+  for (Value::const_use_iterator UI = V->use_begin(), E = V->use_end(); UI != E;
+       ++UI)
     if (const ConstantExpr *CE = dyn_cast<ConstantExpr>(*UI)) {
       GS.HasNonInstructionUser = true;
 
@@ -185,7 +187,8 @@ static bool AnalyzeGlobal(const Value *V, GlobalStatus &GS,
         // value, not an aggregate), keep more specific information about
         // stores.
         if (GS.StoredType != GlobalStatus::isStored) {
-          if (const GlobalVariable *GV = dyn_cast<GlobalVariable>(SI->getOperand(1))){
+          if (const GlobalVariable *GV = dyn_cast<GlobalVariable>(
+                                                           SI->getOperand(1))) {
             Value *StoredVal = SI->getOperand(0);
             if (StoredVal == GV->getInitializer()) {
               if (GS.StoredType < GlobalStatus::isInitializerStored)
@@ -610,62 +613,69 @@ static GlobalVariable *SRAGlobal(GlobalVariable *GV, const TargetData &TD) {
 /// AllUsesOfValueWillTrapIfNull - Return true if all users of the specified
 /// value will trap if the value is dynamically null.  PHIs keeps track of any 
 /// phi nodes we've seen to avoid reprocessing them.
-static bool AllUsesOfValueWillTrapIfNull(Value *V,
-                                         SmallPtrSet<PHINode*, 8> &PHIs) {
-  for (Value::use_iterator UI = V->use_begin(), E = V->use_end(); UI != E; ++UI)
-    if (isa<LoadInst>(*UI)) {
+static bool AllUsesOfValueWillTrapIfNull(const Value *V,
+                                         SmallPtrSet<const PHINode*, 8> &PHIs) {
+  for (Value::const_use_iterator UI = V->use_begin(), E = V->use_end(); UI != E;
+       ++UI) {
+    const User *U = *UI;
+
+    if (isa<LoadInst>(U)) {
       // Will trap.
-    } else if (StoreInst *SI = dyn_cast<StoreInst>(*UI)) {
+    } else if (const StoreInst *SI = dyn_cast<StoreInst>(U)) {
       if (SI->getOperand(0) == V) {
-        //cerr << "NONTRAPPING USE: " << **UI;
+        //cerr << "NONTRAPPING USE: " << *U;
         return false;  // Storing the value.
       }
-    } else if (CallInst *CI = dyn_cast<CallInst>(*UI)) {
+    } else if (const CallInst *CI = dyn_cast<CallInst>(U)) {
       if (CI->getCalledValue() != V) {
-        //cerr << "NONTRAPPING USE: " << **UI;
+        //cerr << "NONTRAPPING USE: " << *U;
         return false;  // Not calling the ptr
       }
-    } else if (InvokeInst *II = dyn_cast<InvokeInst>(*UI)) {
+    } else if (const InvokeInst *II = dyn_cast<InvokeInst>(U)) {
       if (II->getCalledValue() != V) {
-        //cerr << "NONTRAPPING USE: " << **UI;
+        //cerr << "NONTRAPPING USE: " << *U;
         return false;  // Not calling the ptr
       }
-    } else if (BitCastInst *CI = dyn_cast<BitCastInst>(*UI)) {
+    } else if (const BitCastInst *CI = dyn_cast<BitCastInst>(U)) {
       if (!AllUsesOfValueWillTrapIfNull(CI, PHIs)) return false;
-    } else if (GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(*UI)) {
+    } else if (const GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(U)) {
       if (!AllUsesOfValueWillTrapIfNull(GEPI, PHIs)) return false;
-    } else if (PHINode *PN = dyn_cast<PHINode>(*UI)) {
+    } else if (const PHINode *PN = dyn_cast<PHINode>(U)) {
       // If we've already seen this phi node, ignore it, it has already been
       // checked.
       if (PHIs.insert(PN) && !AllUsesOfValueWillTrapIfNull(PN, PHIs))
         return false;
-    } else if (isa<ICmpInst>(*UI) &&
+    } else if (isa<ICmpInst>(U) &&
                isa<ConstantPointerNull>(UI->getOperand(1))) {
       // Ignore icmp X, null
     } else {
-      //cerr << "NONTRAPPING USE: " << **UI;
+      //cerr << "NONTRAPPING USE: " << *U;
       return false;
     }
+  }
   return true;
 }
 
 /// AllUsesOfLoadedValueWillTrapIfNull - Return true if all uses of any loads
 /// from GV will trap if the loaded value is null.  Note that this also permits
 /// comparisons of the loaded value against null, as a special case.
-static bool AllUsesOfLoadedValueWillTrapIfNull(GlobalVariable *GV) {
-  for (Value::use_iterator UI = GV->use_begin(), E = GV->use_end(); UI!=E; ++UI)
-    if (LoadInst *LI = dyn_cast<LoadInst>(*UI)) {
-      SmallPtrSet<PHINode*, 8> PHIs;
+static bool AllUsesOfLoadedValueWillTrapIfNull(const GlobalVariable *GV) {
+  for (Value::const_use_iterator UI = GV->use_begin(), E = GV->use_end();
+       UI != E; ++UI) {
+    const User *U = *UI;
+
+    if (const LoadInst *LI = dyn_cast<LoadInst>(U)) {
+      SmallPtrSet<const PHINode*, 8> PHIs;
       if (!AllUsesOfValueWillTrapIfNull(LI, PHIs))
         return false;
-    } else if (isa<StoreInst>(*UI)) {
+    } else if (isa<StoreInst>(U)) {
       // Ignore stores to the global.
     } else {
       // We don't know or understand this user, bail out.
-      //cerr << "UNKNOWN USER OF GLOBAL!: " << **UI;
+      //cerr << "UNKNOWN USER OF GLOBAL!: " << *U;
       return false;
     }
-
+  }
   return true;
 }
 
@@ -682,16 +692,17 @@ static bool OptimizeAwayTrappingUsesOfValue(Value *V, Constant *NewV) {
         Changed = true;
       }
     } else if (isa<CallInst>(I) || isa<InvokeInst>(I)) {
-      if (I->getOperand(0) == V) {
+      CallSite CS(I);
+      if (CS.getCalledValue() == V) {
         // Calling through the pointer!  Turn into a direct call, but be careful
         // that the pointer is not also being passed as an argument.
-        I->setOperand(0, NewV);
+        CS.setCalledFunction(NewV);
         Changed = true;
         bool PassedAsArg = false;
-        for (unsigned i = 1, e = I->getNumOperands(); i != e; ++i)
-          if (I->getOperand(i) == V) {
+        for (unsigned i = 0, e = CS.arg_size(); i != e; ++i)
+          if (CS.getArgument(i) == V) {
             PassedAsArg = true;
-            I->setOperand(i, NewV);
+            CS.setArgument(i, NewV);
           }
 
         if (PassedAsArg) {
@@ -938,29 +949,31 @@ static GlobalVariable *OptimizeGlobalAddressOfMalloc(GlobalVariable *GV,
 /// to make sure that there are no complex uses of V.  We permit simple things
 /// like dereferencing the pointer, but not storing through the address, unless
 /// it is to the specified global.
-static bool ValueIsOnlyUsedLocallyOrStoredToOneGlobal(Instruction *V,
-                                                      GlobalVariable *GV,
-                                              SmallPtrSet<PHINode*, 8> &PHIs) {
-  for (Value::use_iterator UI = V->use_begin(), E = V->use_end(); UI != E;++UI){
-    Instruction *Inst = cast<Instruction>(*UI);
-    
+static bool ValueIsOnlyUsedLocallyOrStoredToOneGlobal(const Instruction *V,
+                                                      const GlobalVariable *GV,
+                                         SmallPtrSet<const PHINode*, 8> &PHIs) {
+  for (Value::const_use_iterator UI = V->use_begin(), E = V->use_end();
+       UI != E; ++UI) {
+    const Instruction *Inst = cast<Instruction>(*UI);
+
     if (isa<LoadInst>(Inst) || isa<CmpInst>(Inst)) {
       continue; // Fine, ignore.
     }
     
-    if (StoreInst *SI = dyn_cast<StoreInst>(Inst)) {
+    if (const StoreInst *SI = dyn_cast<StoreInst>(Inst)) {
       if (SI->getOperand(0) == V && SI->getOperand(1) != GV)
         return false;  // Storing the pointer itself... bad.
       continue; // Otherwise, storing through it, or storing into GV... fine.
     }
     
-    if (isa<GetElementPtrInst>(Inst)) {
+    // Must index into the array and into the struct.
+    if (isa<GetElementPtrInst>(Inst) && Inst->getNumOperands() >= 3) {
       if (!ValueIsOnlyUsedLocallyOrStoredToOneGlobal(Inst, GV, PHIs))
         return false;
       continue;
     }
     
-    if (PHINode *PN = dyn_cast<PHINode>(Inst)) {
+    if (const PHINode *PN = dyn_cast<PHINode>(Inst)) {
       // PHIs are ok if all uses are ok.  Don't infinitely recurse through PHI
       // cycles.
       if (PHIs.insert(PN))
@@ -969,7 +982,7 @@ static bool ValueIsOnlyUsedLocallyOrStoredToOneGlobal(Instruction *V,
       continue;
     }
     
-    if (BitCastInst *BCI = dyn_cast<BitCastInst>(Inst)) {
+    if (const BitCastInst *BCI = dyn_cast<BitCastInst>(Inst)) {
       if (!ValueIsOnlyUsedLocallyOrStoredToOneGlobal(BCI, GV, PHIs))
         return false;
       continue;
@@ -1029,11 +1042,12 @@ static void ReplaceUsesOfMallocWithGlobal(Instruction *Alloc,
 /// of a load) are simple enough to perform heap SRA on.  This permits GEP's
 /// that index through the array and struct field, icmps of null, and PHIs.
 static bool LoadUsesSimpleEnoughForHeapSRA(const Value *V,
-                              SmallPtrSet<const PHINode*, 32> &LoadUsingPHIs,
-                              SmallPtrSet<const PHINode*, 32> &LoadUsingPHIsPerLoad) {
+                        SmallPtrSet<const PHINode*, 32> &LoadUsingPHIs,
+                        SmallPtrSet<const PHINode*, 32> &LoadUsingPHIsPerLoad) {
   // We permit two users of the load: setcc comparing against the null
   // pointer, and a getelementptr of a specific form.
-  for (Value::const_use_iterator UI = V->use_begin(), E = V->use_end(); UI != E;++UI){
+  for (Value::const_use_iterator UI = V->use_begin(), E = V->use_end(); UI != E;
+       ++UI) {
     const Instruction *User = cast<Instruction>(*UI);
     
     // Comparison against null is ok.
@@ -1084,8 +1098,8 @@ static bool AllGlobalLoadUsesSimpleEnoughForHeapSRA(const GlobalVariable *GV,
                                                     Instruction *StoredVal) {
   SmallPtrSet<const PHINode*, 32> LoadUsingPHIs;
   SmallPtrSet<const PHINode*, 32> LoadUsingPHIsPerLoad;
-  for (Value::const_use_iterator UI = GV->use_begin(), E = GV->use_end(); UI != E; 
-       ++UI)
+  for (Value::const_use_iterator UI = GV->use_begin(), E = GV->use_end();
+       UI != E; ++UI)
     if (const LoadInst *LI = dyn_cast<LoadInst>(*UI)) {
       if (!LoadUsesSimpleEnoughForHeapSRA(LI, LoadUsingPHIs,
                                           LoadUsingPHIsPerLoad))
@@ -1098,8 +1112,8 @@ static bool AllGlobalLoadUsesSimpleEnoughForHeapSRA(const GlobalVariable *GV,
   // that all inputs the to the PHI nodes are in the same equivalence sets. 
   // Check to verify that all operands of the PHIs are either PHIS that can be
   // transformed, loads from GV, or MI itself.
-  for (SmallPtrSet<const PHINode*, 32>::const_iterator I = LoadUsingPHIs.begin(),
-       E = LoadUsingPHIs.end(); I != E; ++I) {
+  for (SmallPtrSet<const PHINode*, 32>::const_iterator I = LoadUsingPHIs.begin()
+       , E = LoadUsingPHIs.end(); I != E; ++I) {
     const PHINode *PN = *I;
     for (unsigned op = 0, e = PN->getNumIncomingValues(); op != e; ++op) {
       Value *InVal = PN->getIncomingValue(op);
@@ -1448,6 +1462,9 @@ static bool TryToOptimizeStoreOfMallocToGlobal(GlobalVariable *GV,
                                                const Type *AllocTy,
                                                Module::global_iterator &GVI,
                                                TargetData *TD) {
+  if (!TD)
+    return false;
+          
   // If this is a malloc of an abstract type, don't touch it.
   if (!AllocTy->isSized())
     return false;
@@ -1466,66 +1483,66 @@ static bool TryToOptimizeStoreOfMallocToGlobal(GlobalVariable *GV,
   // malloc to be stored into the specified global, loaded setcc'd, and
   // GEP'd.  These are all things we could transform to using the global
   // for.
-  {
-    SmallPtrSet<PHINode*, 8> PHIs;
-    if (!ValueIsOnlyUsedLocallyOrStoredToOneGlobal(CI, GV, PHIs))
-      return false;
-  }  
+  SmallPtrSet<const PHINode*, 8> PHIs;
+  if (!ValueIsOnlyUsedLocallyOrStoredToOneGlobal(CI, GV, PHIs))
+    return false;
 
   // If we have a global that is only initialized with a fixed size malloc,
   // transform the program to use global memory instead of malloc'd memory.
   // This eliminates dynamic allocation, avoids an indirection accessing the
   // data, and exposes the resultant global to further GlobalOpt.
   // We cannot optimize the malloc if we cannot determine malloc array size.
-  if (Value *NElems = getMallocArraySize(CI, TD, true)) {
-    if (ConstantInt *NElements = dyn_cast<ConstantInt>(NElems))
-      // Restrict this transformation to only working on small allocations
-      // (2048 bytes currently), as we don't want to introduce a 16M global or
-      // something.
-      if (TD && 
-          NElements->getZExtValue() * TD->getTypeAllocSize(AllocTy) < 2048) {
-        GVI = OptimizeGlobalAddressOfMalloc(GV, CI, AllocTy, NElements, TD);
-        return true;
-      }
+  Value *NElems = getMallocArraySize(CI, TD, true);
+  if (!NElems)
+    return false;
+
+  if (ConstantInt *NElements = dyn_cast<ConstantInt>(NElems))
+    // Restrict this transformation to only working on small allocations
+    // (2048 bytes currently), as we don't want to introduce a 16M global or
+    // something.
+    if (NElements->getZExtValue() * TD->getTypeAllocSize(AllocTy) < 2048) {
+      GVI = OptimizeGlobalAddressOfMalloc(GV, CI, AllocTy, NElements, TD);
+      return true;
+    }
   
-    // If the allocation is an array of structures, consider transforming this
-    // into multiple malloc'd arrays, one for each field.  This is basically
-    // SRoA for malloc'd memory.
-
-    // If this is an allocation of a fixed size array of structs, analyze as a
-    // variable size array.  malloc [100 x struct],1 -> malloc struct, 100
-    if (NElems == ConstantInt::get(CI->getOperand(1)->getType(), 1))
-      if (const ArrayType *AT = dyn_cast<ArrayType>(AllocTy))
-        AllocTy = AT->getElementType();
+  // If the allocation is an array of structures, consider transforming this
+  // into multiple malloc'd arrays, one for each field.  This is basically
+  // SRoA for malloc'd memory.
+
+  // If this is an allocation of a fixed size array of structs, analyze as a
+  // variable size array.  malloc [100 x struct],1 -> malloc struct, 100
+  if (NElems == ConstantInt::get(CI->getOperand(1)->getType(), 1))
+    if (const ArrayType *AT = dyn_cast<ArrayType>(AllocTy))
+      AllocTy = AT->getElementType();
   
-    if (const StructType *AllocSTy = dyn_cast<StructType>(AllocTy)) {
-      // This the structure has an unreasonable number of fields, leave it
-      // alone.
-      if (AllocSTy->getNumElements() <= 16 && AllocSTy->getNumElements() != 0 &&
-          AllGlobalLoadUsesSimpleEnoughForHeapSRA(GV, CI)) {
-
-        // If this is a fixed size array, transform the Malloc to be an alloc of
-        // structs.  malloc [100 x struct],1 -> malloc struct, 100
-        if (const ArrayType *AT =
-                              dyn_cast<ArrayType>(getMallocAllocatedType(CI))) {
-          const Type *IntPtrTy = TD->getIntPtrType(CI->getContext());
-          unsigned TypeSize = TD->getStructLayout(AllocSTy)->getSizeInBytes();
-          Value *AllocSize = ConstantInt::get(IntPtrTy, TypeSize);
-          Value *NumElements = ConstantInt::get(IntPtrTy, AT->getNumElements());
-          Instruction *Malloc = CallInst::CreateMalloc(CI, IntPtrTy, AllocSTy,
-                                                       AllocSize, NumElements,
-                                                       CI->getName());
-          Instruction *Cast = new BitCastInst(Malloc, CI->getType(), "tmp", CI);
-          CI->replaceAllUsesWith(Cast);
-          CI->eraseFromParent();
-          CI = dyn_cast<BitCastInst>(Malloc) ?
-               extractMallocCallFromBitCast(Malloc) : cast<CallInst>(Malloc);
-        }
-      
-        GVI = PerformHeapAllocSRoA(GV, CI, getMallocArraySize(CI, TD, true),TD);
-        return true;
-      }
+  const StructType *AllocSTy = dyn_cast<StructType>(AllocTy);
+  if (!AllocSTy)
+    return false;
+
+  // This the structure has an unreasonable number of fields, leave it
+  // alone.
+  if (AllocSTy->getNumElements() <= 16 && AllocSTy->getNumElements() != 0 &&
+      AllGlobalLoadUsesSimpleEnoughForHeapSRA(GV, CI)) {
+
+    // If this is a fixed size array, transform the Malloc to be an alloc of
+    // structs.  malloc [100 x struct],1 -> malloc struct, 100
+    if (const ArrayType *AT = dyn_cast<ArrayType>(getMallocAllocatedType(CI))) {
+      const Type *IntPtrTy = TD->getIntPtrType(CI->getContext());
+      unsigned TypeSize = TD->getStructLayout(AllocSTy)->getSizeInBytes();
+      Value *AllocSize = ConstantInt::get(IntPtrTy, TypeSize);
+      Value *NumElements = ConstantInt::get(IntPtrTy, AT->getNumElements());
+      Instruction *Malloc = CallInst::CreateMalloc(CI, IntPtrTy, AllocSTy,
+                                                   AllocSize, NumElements,
+                                                   CI->getName());
+      Instruction *Cast = new BitCastInst(Malloc, CI->getType(), "tmp", CI);
+      CI->replaceAllUsesWith(Cast);
+      CI->eraseFromParent();
+      CI = dyn_cast<BitCastInst>(Malloc) ?
+        extractMallocCallFromBitCast(Malloc) : cast<CallInst>(Malloc);
     }
+      
+    GVI = PerformHeapAllocSRoA(GV, CI, getMallocArraySize(CI, TD, true),TD);
+    return true;
   }
   
   return false;
@@ -1689,8 +1706,8 @@ bool GlobalOpt::ProcessInternalGlobal(GlobalVariable *GV,
     if (GS.StoredType == GlobalStatus::isStoredOnce && GS.StoredOnceValue)
       DEBUG(dbgs() << "  StoredOnceValue = " << *GS.StoredOnceValue << "\n");
     if (GS.AccessingFunction && !GS.HasMultipleAccessingFunctions)
-      DEBUG(dbgs() << "  AccessingFunction = " << GS.AccessingFunction->getName()
-                  << "\n");
+      DEBUG(dbgs() << "  AccessingFunction = "
+                   << GS.AccessingFunction->getName() << "\n");
     DEBUG(dbgs() << "  HasMultipleAccessingFunctions =  "
                  << GS.HasMultipleAccessingFunctions << "\n");
     DEBUG(dbgs() << "  HasNonInstructionUser = " 
@@ -2278,10 +2295,10 @@ static bool EvaluateFunction(Function *F, Constant *&RetVal,
       }
 
       // Cannot handle inline asm.
-      if (isa<InlineAsm>(CI->getOperand(0))) return false;
+      if (isa<InlineAsm>(CI->getCalledValue())) return false;
 
       // Resolve function pointers.
-      Function *Callee = dyn_cast<Function>(getVal(Values, CI->getOperand(0)));
+      Function *Callee = dyn_cast<Function>(getVal(Values, CI->getCalledValue()));
       if (!Callee) return false;  // Cannot resolve.
 
       SmallVector<Constant*, 8> Formals;
@@ -2500,7 +2517,7 @@ bool GlobalOpt::OptimizeGlobalAliases(Module &M) {
         continue;
 
       // Do not perform the transform if multiple aliases potentially target the
-      // aliasee.  This check also ensures that it is safe to replace the section
+      // aliasee. This check also ensures that it is safe to replace the section
       // and other attributes of the aliasee with those of the alias.
       if (!hasOneUse)
         continue;
diff --git a/lib/Transforms/IPO/IPO.cpp b/lib/Transforms/IPO/IPO.cpp
index 83e8624fe09d..340b70eb0268 100644
--- a/lib/Transforms/IPO/IPO.cpp
+++ b/lib/Transforms/IPO/IPO.cpp
@@ -62,6 +62,15 @@ void LLVMAddPruneEHPass(LLVMPassManagerRef PM) {
   unwrap(PM)->add(createPruneEHPass());
 }
 
+void LLVMAddIPSCCPPass(LLVMPassManagerRef PM) {
+  unwrap(PM)->add(createIPSCCPPass());
+}
+
+void LLVMAddInternalizePass(LLVMPassManagerRef PM, unsigned AllButMain) {
+  unwrap(PM)->add(createInternalizePass(AllButMain != 0));
+}
+
+
 void LLVMAddRaiseAllocationsPass(LLVMPassManagerRef PM) {
   // FIXME: Remove in LLVM 3.0.
 }
diff --git a/lib/Transforms/IPO/Inliner.cpp b/lib/Transforms/IPO/Inliner.cpp
index 03ec72c03043..b785bb0a9390 100644
--- a/lib/Transforms/IPO/Inliner.cpp
+++ b/lib/Transforms/IPO/Inliner.cpp
@@ -73,16 +73,14 @@ InlinedArrayAllocasTy;
 /// available from other  functions inlined into the caller.  If we are able to
 /// inline this call site we attempt to reuse already available allocas or add
 /// any new allocas to the set if not possible.
-static bool InlineCallIfPossible(CallSite CS, CallGraph &CG,
-                                 const TargetData *TD,
+static bool InlineCallIfPossible(CallSite CS, InlineFunctionInfo &IFI,
                                  InlinedArrayAllocasTy &InlinedArrayAllocas) {
   Function *Callee = CS.getCalledFunction();
   Function *Caller = CS.getCaller();
 
   // Try to inline the function.  Get the list of static allocas that were
   // inlined.
-  SmallVector<AllocaInst*, 16> StaticAllocas;
-  if (!InlineFunction(CS, &CG, TD, &StaticAllocas))
+  if (!InlineFunction(CS, IFI))
     return false;
 
   // If the inlined function had a higher stack protection level than the
@@ -119,9 +117,9 @@ static bool InlineCallIfPossible(CallSite CS, CallGraph &CG,
   
   // Loop over all the allocas we have so far and see if they can be merged with
   // a previously inlined alloca.  If not, remember that we had it.
-  for (unsigned AllocaNo = 0, e = StaticAllocas.size();
+  for (unsigned AllocaNo = 0, e = IFI.StaticAllocas.size();
        AllocaNo != e; ++AllocaNo) {
-    AllocaInst *AI = StaticAllocas[AllocaNo];
+    AllocaInst *AI = IFI.StaticAllocas[AllocaNo];
     
     // Don't bother trying to merge array allocations (they will usually be
     // canonicalized to be an allocation *of* an array), or allocations whose
@@ -292,14 +290,29 @@ bool Inliner::shouldInline(CallSite CS) {
   return true;
 }
 
-bool Inliner::runOnSCC(std::vector<CallGraphNode*> &SCC) {
+/// InlineHistoryIncludes - Return true if the specified inline history ID
+/// indicates an inline history that includes the specified function.
+static bool InlineHistoryIncludes(Function *F, int InlineHistoryID,
+            const SmallVectorImpl<std::pair<Function*, int> > &InlineHistory) {
+  while (InlineHistoryID != -1) {
+    assert(unsigned(InlineHistoryID) < InlineHistory.size() &&
+           "Invalid inline history ID");
+    if (InlineHistory[InlineHistoryID].first == F)
+      return true;
+    InlineHistoryID = InlineHistory[InlineHistoryID].second;
+  }
+  return false;
+}
+
+
+bool Inliner::runOnSCC(CallGraphSCC &SCC) {
   CallGraph &CG = getAnalysis<CallGraph>();
   const TargetData *TD = getAnalysisIfAvailable<TargetData>();
 
   SmallPtrSet<Function*, 8> SCCFunctions;
   DEBUG(dbgs() << "Inliner visiting SCC:");
-  for (unsigned i = 0, e = SCC.size(); i != e; ++i) {
-    Function *F = SCC[i]->getFunction();
+  for (CallGraphSCC::iterator I = SCC.begin(), E = SCC.end(); I != E; ++I) {
+    Function *F = (*I)->getFunction();
     if (F) SCCFunctions.insert(F);
     DEBUG(dbgs() << " " << (F ? F->getName() : "INDIRECTNODE"));
   }
@@ -307,10 +320,16 @@ bool Inliner::runOnSCC(std::vector<CallGraphNode*> &SCC) {
   // Scan through and identify all call sites ahead of time so that we only
   // inline call sites in the original functions, not call sites that result
   // from inlining other functions.
-  SmallVector<CallSite, 16> CallSites;
-
-  for (unsigned i = 0, e = SCC.size(); i != e; ++i) {
-    Function *F = SCC[i]->getFunction();
+  SmallVector<std::pair<CallSite, int>, 16> CallSites;
+  
+  // When inlining a callee produces new call sites, we want to keep track of
+  // the fact that they were inlined from the callee.  This allows us to avoid
+  // infinite inlining in some obscure cases.  To represent this, we use an
+  // index into the InlineHistory vector.
+  SmallVector<std::pair<Function*, int>, 8> InlineHistory;
+
+  for (CallGraphSCC::iterator I = SCC.begin(), E = SCC.end(); I != E; ++I) {
+    Function *F = (*I)->getFunction();
     if (!F) continue;
     
     for (Function::iterator BB = F->begin(), E = F->end(); BB != E; ++BB)
@@ -327,22 +346,27 @@ bool Inliner::runOnSCC(std::vector<CallGraphNode*> &SCC) {
         if (CS.getCalledFunction() && CS.getCalledFunction()->isDeclaration())
           continue;
         
-        CallSites.push_back(CS);
+        CallSites.push_back(std::make_pair(CS, -1));
       }
   }
 
   DEBUG(dbgs() << ": " << CallSites.size() << " call sites.\n");
 
+  // If there are no calls in this function, exit early.
+  if (CallSites.empty())
+    return false;
+  
   // Now that we have all of the call sites, move the ones to functions in the
   // current SCC to the end of the list.
   unsigned FirstCallInSCC = CallSites.size();
   for (unsigned i = 0; i < FirstCallInSCC; ++i)
-    if (Function *F = CallSites[i].getCalledFunction())
+    if (Function *F = CallSites[i].first.getCalledFunction())
       if (SCCFunctions.count(F))
         std::swap(CallSites[i--], CallSites[--FirstCallInSCC]);
 
   
   InlinedArrayAllocasTy InlinedArrayAllocas;
+  InlineFunctionInfo InlineInfo(&CG, TD);
   
   // Now that we have all of the call sites, loop over them and inline them if
   // it looks profitable to do so.
@@ -353,7 +377,7 @@ bool Inliner::runOnSCC(std::vector<CallGraphNode*> &SCC) {
     // Iterate over the outer loop because inlining functions can cause indirect
     // calls to become direct calls.
     for (unsigned CSi = 0; CSi != CallSites.size(); ++CSi) {
-      CallSite CS = CallSites[CSi];
+      CallSite CS = CallSites[CSi].first;
       
       Function *Caller = CS.getCaller();
       Function *Callee = CS.getCalledFunction();
@@ -375,16 +399,42 @@ bool Inliner::runOnSCC(std::vector<CallGraphNode*> &SCC) {
         // We can only inline direct calls to non-declarations.
         if (Callee == 0 || Callee->isDeclaration()) continue;
       
+        // If this call sites was obtained by inlining another function, verify
+        // that the include path for the function did not include the callee
+        // itself.  If so, we'd be recursively inlinling the same function,
+        // which would provide the same callsites, which would cause us to
+        // infinitely inline.
+        int InlineHistoryID = CallSites[CSi].second;
+        if (InlineHistoryID != -1 &&
+            InlineHistoryIncludes(Callee, InlineHistoryID, InlineHistory))
+          continue;
+        
+        
         // If the policy determines that we should inline this function,
         // try to do so.
         if (!shouldInline(CS))
           continue;
 
-        // Attempt to inline the function...
-        if (!InlineCallIfPossible(CS, CG, TD, InlinedArrayAllocas))
+        // Attempt to inline the function.
+        if (!InlineCallIfPossible(CS, InlineInfo, InlinedArrayAllocas))
           continue;
         ++NumInlined;
-
+        
+        // If inlining this function gave us any new call sites, throw them
+        // onto our worklist to process.  They are useful inline candidates.
+        if (!InlineInfo.InlinedCalls.empty()) {
+          // Create a new inline history entry for this, so that we remember
+          // that these new callsites came about due to inlining Callee.
+          int NewHistoryID = InlineHistory.size();
+          InlineHistory.push_back(std::make_pair(Callee, InlineHistoryID));
+
+          for (unsigned i = 0, e = InlineInfo.InlinedCalls.size();
+               i != e; ++i) {
+            Value *Ptr = InlineInfo.InlinedCalls[i];
+            CallSites.push_back(std::make_pair(CallSite(Ptr), NewHistoryID));
+          }
+        }
+        
         // Update the cached cost info with the inlined call.
         growCachedCostInfo(Caller, Callee);
       }
@@ -417,7 +467,7 @@ bool Inliner::runOnSCC(std::vector<CallGraphNode*> &SCC) {
       // swap/pop_back for efficiency, but do not use it if doing so would
       // move a call site to a function in this SCC before the
       // 'FirstCallInSCC' barrier.
-      if (SCC.size() == 1) {
+      if (SCC.isSingular()) {
         std::swap(CallSites[CSi], CallSites.back());
         CallSites.pop_back();
       } else {
diff --git a/lib/Transforms/IPO/PartialInlining.cpp b/lib/Transforms/IPO/PartialInlining.cpp
index f8ec72227380..07525eaada5e 100644
--- a/lib/Transforms/IPO/PartialInlining.cpp
+++ b/lib/Transforms/IPO/PartialInlining.cpp
@@ -120,15 +120,17 @@ Function* PartialInliner::unswitchFunction(Function* F) {
   // Extract the body of the if.
   Function* extractedFunction = ExtractCodeRegion(DT, toExtract);
   
+  InlineFunctionInfo IFI;
+  
   // Inline the top-level if test into all callers.
   std::vector<User*> Users(duplicateFunction->use_begin(), 
                            duplicateFunction->use_end());
   for (std::vector<User*>::iterator UI = Users.begin(), UE = Users.end();
        UI != UE; ++UI)
-    if (CallInst* CI = dyn_cast<CallInst>(*UI))
-      InlineFunction(CI);
-    else if (InvokeInst* II = dyn_cast<InvokeInst>(*UI))
-      InlineFunction(II);
+    if (CallInst *CI = dyn_cast<CallInst>(*UI))
+      InlineFunction(CI, IFI);
+    else if (InvokeInst *II = dyn_cast<InvokeInst>(*UI))
+      InlineFunction(II, IFI);
   
   // Ditch the duplicate, since we're done with it, and rewrite all remaining
   // users (function pointers, etc.) back to the original function.
diff --git a/lib/Transforms/IPO/PruneEH.cpp b/lib/Transforms/IPO/PruneEH.cpp
index 161246bc2598..de6099cc1daa 100644
--- a/lib/Transforms/IPO/PruneEH.cpp
+++ b/lib/Transforms/IPO/PruneEH.cpp
@@ -40,7 +40,7 @@ namespace {
     PruneEH() : CallGraphSCCPass(&ID) {}
 
     // runOnSCC - Analyze the SCC, performing the transformation if possible.
-    bool runOnSCC(std::vector<CallGraphNode *> &SCC);
+    bool runOnSCC(CallGraphSCC &SCC);
 
     bool SimplifyFunction(Function *F);
     void DeleteBasicBlock(BasicBlock *BB);
@@ -54,20 +54,20 @@ X("prune-eh", "Remove unused exception handling info");
 Pass *llvm::createPruneEHPass() { return new PruneEH(); }
 
 
-bool PruneEH::runOnSCC(std::vector<CallGraphNode *> &SCC) {
+bool PruneEH::runOnSCC(CallGraphSCC &SCC) {
   SmallPtrSet<CallGraphNode *, 8> SCCNodes;
   CallGraph &CG = getAnalysis<CallGraph>();
   bool MadeChange = false;
 
   // Fill SCCNodes with the elements of the SCC.  Used for quickly
   // looking up whether a given CallGraphNode is in this SCC.
-  for (unsigned i = 0, e = SCC.size(); i != e; ++i)
-    SCCNodes.insert(SCC[i]);
+  for (CallGraphSCC::iterator I = SCC.begin(), E = SCC.end(); I != E; ++I)
+    SCCNodes.insert(*I);
 
   // First pass, scan all of the functions in the SCC, simplifying them
   // according to what we know.
-  for (unsigned i = 0, e = SCC.size(); i != e; ++i)
-    if (Function *F = SCC[i]->getFunction())
+  for (CallGraphSCC::iterator I = SCC.begin(), E = SCC.end(); I != E; ++I)
+    if (Function *F = (*I)->getFunction())
       MadeChange |= SimplifyFunction(F);
 
   // Next, check to see if any callees might throw or if there are any external
@@ -78,9 +78,9 @@ bool PruneEH::runOnSCC(std::vector<CallGraphNode *> &SCC) {
   // obviously the SCC might throw.
   //
   bool SCCMightUnwind = false, SCCMightReturn = false;
-  for (unsigned i = 0, e = SCC.size();
-       (!SCCMightUnwind || !SCCMightReturn) && i != e; ++i) {
-    Function *F = SCC[i]->getFunction();
+  for (CallGraphSCC::iterator I = SCC.begin(), E = SCC.end(); 
+       (!SCCMightUnwind || !SCCMightReturn) && I != E; ++I) {
+    Function *F = (*I)->getFunction();
     if (F == 0) {
       SCCMightUnwind = true;
       SCCMightReturn = true;
@@ -132,7 +132,7 @@ bool PruneEH::runOnSCC(std::vector<CallGraphNode *> &SCC) {
 
   // If the SCC doesn't unwind or doesn't throw, note this fact.
   if (!SCCMightUnwind || !SCCMightReturn)
-    for (unsigned i = 0, e = SCC.size(); i != e; ++i) {
+    for (CallGraphSCC::iterator I = SCC.begin(), E = SCC.end(); I != E; ++I) {
       Attributes NewAttributes = Attribute::None;
 
       if (!SCCMightUnwind)
@@ -140,19 +140,20 @@ bool PruneEH::runOnSCC(std::vector<CallGraphNode *> &SCC) {
       if (!SCCMightReturn)
         NewAttributes |= Attribute::NoReturn;
 
-      const AttrListPtr &PAL = SCC[i]->getFunction()->getAttributes();
+      Function *F = (*I)->getFunction();
+      const AttrListPtr &PAL = F->getAttributes();
       const AttrListPtr &NPAL = PAL.addAttr(~0, NewAttributes);
       if (PAL != NPAL) {
         MadeChange = true;
-        SCC[i]->getFunction()->setAttributes(NPAL);
+        F->setAttributes(NPAL);
       }
     }
 
-  for (unsigned i = 0, e = SCC.size(); i != e; ++i) {
+  for (CallGraphSCC::iterator I = SCC.begin(), E = SCC.end(); I != E; ++I) {
     // Convert any invoke instructions to non-throwing functions in this node
     // into call instructions with a branch.  This makes the exception blocks
     // dead.
-    if (Function *F = SCC[i]->getFunction())
+    if (Function *F = (*I)->getFunction())
       MadeChange |= SimplifyFunction(F);
   }
 
diff --git a/lib/Transforms/IPO/StructRetPromotion.cpp b/lib/Transforms/IPO/StructRetPromotion.cpp
index dda32d02c873..473e83cec45e 100644
--- a/lib/Transforms/IPO/StructRetPromotion.cpp
+++ b/lib/Transforms/IPO/StructRetPromotion.cpp
@@ -48,7 +48,7 @@ namespace {
       CallGraphSCCPass::getAnalysisUsage(AU);
     }
 
-    virtual bool runOnSCC(std::vector<CallGraphNode *> &SCC);
+    virtual bool runOnSCC(CallGraphSCC &SCC);
     static char ID; // Pass identification, replacement for typeid
     SRETPromotion() : CallGraphSCCPass(&ID) {}
 
@@ -69,12 +69,12 @@ Pass *llvm::createStructRetPromotionPass() {
   return new SRETPromotion();
 }
 
-bool SRETPromotion::runOnSCC(std::vector<CallGraphNode *> &SCC) {
+bool SRETPromotion::runOnSCC(CallGraphSCC &SCC) {
   bool Changed = false;
 
-  for (unsigned i = 0, e = SCC.size(); i != e; ++i)
-    if (CallGraphNode *NewNode = PromoteReturn(SCC[i])) {
-      SCC[i] = NewNode;
+  for (CallGraphSCC::iterator I = SCC.begin(), E = SCC.end(); I != E; ++I)
+    if (CallGraphNode *NewNode = PromoteReturn(*I)) {
+      SCC.ReplaceNode(*I, NewNode);
       Changed = true;
     }
 
diff --git a/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp b/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
index 3fb3de75075e..8586054fce0e 100644
--- a/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
+++ b/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
@@ -1735,16 +1735,12 @@ Instruction *InstCombiner::visitXor(BinaryOperator &I) {
   
   
   if (ConstantInt *RHS = dyn_cast<ConstantInt>(Op1)) {
-    if (RHS->isOne() && Op0->hasOneUse()) {
+    if (RHS->isOne() && Op0->hasOneUse())
       // xor (cmp A, B), true = not (cmp A, B) = !cmp A, B
-      if (ICmpInst *ICI = dyn_cast<ICmpInst>(Op0))
-        return new ICmpInst(ICI->getInversePredicate(),
-                            ICI->getOperand(0), ICI->getOperand(1));
-
-      if (FCmpInst *FCI = dyn_cast<FCmpInst>(Op0))
-        return new FCmpInst(FCI->getInversePredicate(),
-                            FCI->getOperand(0), FCI->getOperand(1));
-    }
+      if (CmpInst *CI = dyn_cast<CmpInst>(Op0))
+        return CmpInst::Create(CI->getOpcode(),
+                               CI->getInversePredicate(),
+                               CI->getOperand(0), CI->getOperand(1));
 
     // fold (xor(zext(cmp)), 1) and (xor(sext(cmp)), -1) to ext(!cmp).
     if (CastInst *Op0C = dyn_cast<CastInst>(Op0)) {
diff --git a/lib/Transforms/InstCombine/InstCombineCalls.cpp b/lib/Transforms/InstCombine/InstCombineCalls.cpp
index e025b053765b..38e7b6ec2d1f 100644
--- a/lib/Transforms/InstCombine/InstCombineCalls.cpp
+++ b/lib/Transforms/InstCombine/InstCombineCalls.cpp
@@ -59,29 +59,32 @@ static unsigned EnforceKnownAlignment(Value *V,
       // Treat this like a bitcast.
       return EnforceKnownAlignment(U->getOperand(0), Align, PrefAlign);
     }
-    break;
+    return Align;
+  }
+  case Instruction::Alloca: {
+    AllocaInst *AI = cast<AllocaInst>(V);
+    // If there is a requested alignment and if this is an alloca, round up.
+    if (AI->getAlignment() >= PrefAlign)
+      return AI->getAlignment();
+    AI->setAlignment(PrefAlign);
+    return PrefAlign;
   }
   }
 
   if (GlobalValue *GV = dyn_cast<GlobalValue>(V)) {
     // If there is a large requested alignment and we can, bump up the alignment
     // of the global.
-    if (!GV->isDeclaration()) {
-      if (GV->getAlignment() >= PrefAlign)
-        Align = GV->getAlignment();
-      else {
-        GV->setAlignment(PrefAlign);
-        Align = PrefAlign;
-      }
-    }
-  } else if (AllocaInst *AI = dyn_cast<AllocaInst>(V)) {
-    // If there is a requested alignment and if this is an alloca, round up.
-    if (AI->getAlignment() >= PrefAlign)
-      Align = AI->getAlignment();
-    else {
-      AI->setAlignment(PrefAlign);
-      Align = PrefAlign;
-    }
+    if (GV->isDeclaration()) return Align;
+    
+    if (GV->getAlignment() >= PrefAlign)
+      return GV->getAlignment();
+    // We can only increase the alignment of the global if it has no alignment
+    // specified or if it is not assigned a section.  If it is assigned a
+    // section, the global could be densely packed with other objects in the
+    // section, increasing the alignment could cause padding issues.
+    if (!GV->hasSection() || GV->getAlignment() == 0)
+      GV->setAlignment(PrefAlign);
+    return GV->getAlignment();
   }
 
   return Align;
@@ -287,7 +290,7 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
           const Type *Tys[3] = { CI.getOperand(1)->getType(),
                                  CI.getOperand(2)->getType(),
                                  CI.getOperand(3)->getType() };
-          CI.setOperand(0, 
+          CI.setCalledFunction( 
                         Intrinsic::getDeclaration(M, MemCpyID, Tys, 3));
           Changed = true;
         }
@@ -526,7 +529,7 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
       // X + 0 -> {X, false}
       if (RHS->isZero()) {
         Constant *V[] = {
-          UndefValue::get(II->getOperand(0)->getType()),
+          UndefValue::get(II->getCalledValue()->getType()),
           ConstantInt::getFalse(II->getContext())
         };
         Constant *Struct = ConstantStruct::get(II->getContext(), V, 2, false);
diff --git a/lib/Transforms/InstCombine/InstCombineCasts.cpp b/lib/Transforms/InstCombine/InstCombineCasts.cpp
index a68fc6df4768..eb7628e741bd 100644
--- a/lib/Transforms/InstCombine/InstCombineCasts.cpp
+++ b/lib/Transforms/InstCombine/InstCombineCasts.cpp
@@ -1323,7 +1323,7 @@ Instruction *InstCombiner::visitBitCast(BitCastInst &CI) {
 
   if (ShuffleVectorInst *SVI = dyn_cast<ShuffleVectorInst>(Src)) {
     // Okay, we have (bitcast (shuffle ..)).  Check to see if this is
-    // a bitconvert to a vector with the same # elts.
+    // a bitcast to a vector with the same # elts.
     if (SVI->hasOneUse() && DestTy->isVectorTy() && 
         cast<VectorType>(DestTy)->getNumElements() ==
               SVI->getType()->getNumElements() &&
diff --git a/lib/Transforms/InstCombine/InstCombineCompares.cpp b/lib/Transforms/InstCombine/InstCombineCompares.cpp
index 72fd5588d120..861cf92d281e 100644
--- a/lib/Transforms/InstCombine/InstCombineCompares.cpp
+++ b/lib/Transforms/InstCombine/InstCombineCompares.cpp
@@ -1325,7 +1325,7 @@ Instruction *InstCombiner::visitICmpInstWithInstAndIntCst(ICmpInst &ICI,
         // If we have a signed (X % (2^c)) == 0, turn it into an unsigned one.
         if (RHSV == 0 && isa<ConstantInt>(BO->getOperand(1)) &&BO->hasOneUse()){
           const APInt &V = cast<ConstantInt>(BO->getOperand(1))->getValue();
-          if (V.sgt(APInt(V.getBitWidth(), 1)) && V.isPowerOf2()) {
+          if (V.sgt(1) && V.isPowerOf2()) {
             Value *NewRem =
               Builder->CreateURem(BO->getOperand(0), BO->getOperand(1),
                                   BO->getName());
diff --git a/lib/Transforms/InstCombine/InstCombineSelect.cpp b/lib/Transforms/InstCombine/InstCombineSelect.cpp
index 2fc932594f7b..c958cde0917b 100644
--- a/lib/Transforms/InstCombine/InstCombineSelect.cpp
+++ b/lib/Transforms/InstCombine/InstCombineSelect.cpp
@@ -13,6 +13,7 @@
 
 #include "InstCombine.h"
 #include "llvm/Support/PatternMatch.h"
+#include "llvm/Analysis/InstructionSimplify.h"
 using namespace llvm;
 using namespace PatternMatch;
 
@@ -421,49 +422,30 @@ Instruction *InstCombiner::visitSelectInst(SelectInst &SI) {
   Value *TrueVal = SI.getTrueValue();
   Value *FalseVal = SI.getFalseValue();
 
-  // select true, X, Y  -> X
-  // select false, X, Y -> Y
-  if (ConstantInt *C = dyn_cast<ConstantInt>(CondVal))
-    return ReplaceInstUsesWith(SI, C->getZExtValue() ? TrueVal : FalseVal);
-
-  // select C, X, X -> X
-  if (TrueVal == FalseVal)
-    return ReplaceInstUsesWith(SI, TrueVal);
-
-  if (isa<UndefValue>(TrueVal))   // select C, undef, X -> X
-    return ReplaceInstUsesWith(SI, FalseVal);
-  if (isa<UndefValue>(FalseVal))   // select C, X, undef -> X
-    return ReplaceInstUsesWith(SI, TrueVal);
-  if (isa<UndefValue>(CondVal)) {  // select undef, X, Y -> X or Y
-    if (isa<Constant>(TrueVal))
-      return ReplaceInstUsesWith(SI, TrueVal);
-    else
-      return ReplaceInstUsesWith(SI, FalseVal);
-  }
+  if (Value *V = SimplifySelectInst(CondVal, TrueVal, FalseVal, TD))
+    return ReplaceInstUsesWith(SI, V);
 
   if (SI.getType()->isIntegerTy(1)) {
     if (ConstantInt *C = dyn_cast<ConstantInt>(TrueVal)) {
       if (C->getZExtValue()) {
         // Change: A = select B, true, C --> A = or B, C
         return BinaryOperator::CreateOr(CondVal, FalseVal);
-      } else {
-        // Change: A = select B, false, C --> A = and !B, C
-        Value *NotCond =
-          InsertNewInstBefore(BinaryOperator::CreateNot(CondVal,
-                                             "not."+CondVal->getName()), SI);
-        return BinaryOperator::CreateAnd(NotCond, FalseVal);
       }
+      // Change: A = select B, false, C --> A = and !B, C
+      Value *NotCond =
+        InsertNewInstBefore(BinaryOperator::CreateNot(CondVal,
+                                           "not."+CondVal->getName()), SI);
+      return BinaryOperator::CreateAnd(NotCond, FalseVal);
     } else if (ConstantInt *C = dyn_cast<ConstantInt>(FalseVal)) {
       if (C->getZExtValue() == false) {
         // Change: A = select B, C, false --> A = and B, C
         return BinaryOperator::CreateAnd(CondVal, TrueVal);
-      } else {
-        // Change: A = select B, C, true --> A = or !B, C
-        Value *NotCond =
-          InsertNewInstBefore(BinaryOperator::CreateNot(CondVal,
-                                             "not."+CondVal->getName()), SI);
-        return BinaryOperator::CreateOr(NotCond, TrueVal);
       }
+      // Change: A = select B, C, true --> A = or !B, C
+      Value *NotCond =
+        InsertNewInstBefore(BinaryOperator::CreateNot(CondVal,
+                                           "not."+CondVal->getName()), SI);
+      return BinaryOperator::CreateOr(NotCond, TrueVal);
     }
     
     // select a, b, a  -> a&b
diff --git a/lib/Transforms/Makefile b/lib/Transforms/Makefile
index ea4a1158acc7..e527be25decb 100644
--- a/lib/Transforms/Makefile
+++ b/lib/Transforms/Makefile
@@ -13,7 +13,7 @@ PARALLEL_DIRS = Utils Instrumentation Scalar InstCombine IPO Hello
 include $(LEVEL)/Makefile.config
 
 # No support for plugins on windows targets
-ifeq ($(HOST_OS), $(filter $(HOST_OS), Cygwin MingW))
+ifeq ($(HOST_OS), $(filter $(HOST_OS), Cygwin MingW Minix))
   PARALLEL_DIRS := $(filter-out Hello, $(PARALLEL_DIRS))
 endif
 
diff --git a/lib/Transforms/Scalar/CMakeLists.txt b/lib/Transforms/Scalar/CMakeLists.txt
index 683c1c2fd708..57788642f23f 100644
--- a/lib/Transforms/Scalar/CMakeLists.txt
+++ b/lib/Transforms/Scalar/CMakeLists.txt
@@ -21,7 +21,6 @@ add_llvm_library(LLVMScalarOpts
   Reassociate.cpp
   Reg2Mem.cpp
   SCCP.cpp
-  SCCVN.cpp
   Scalar.cpp
   ScalarReplAggregates.cpp
   SimplifyCFGPass.cpp
diff --git a/lib/Transforms/Scalar/GVN.cpp b/lib/Transforms/Scalar/GVN.cpp
index 642d59da044f..321def7eb619 100644
--- a/lib/Transforms/Scalar/GVN.cpp
+++ b/lib/Transforms/Scalar/GVN.cpp
@@ -1217,7 +1217,7 @@ static Value *GetMemInstValueForLoad(MemIntrinsic *SrcInst, unsigned Offset,
   return ConstantFoldLoadFromConstPtr(Src, &TD);
 }
 
-
+namespace {
 
 struct AvailableValueInBlock {
   /// BB - The basic block in question.
@@ -1291,6 +1291,8 @@ struct AvailableValueInBlock {
   }
 };
 
+}
+
 /// ConstructSSAForLoadSet - Given a set of loads specified by ValuesPerBlock,
 /// construct SSA form, allowing us to eliminate LI.  This returns the value
 /// that should be used at LI's definition site.
@@ -1333,8 +1335,8 @@ static Value *ConstructSSAForLoadSet(LoadInst *LI,
   return V;
 }
 
-static bool isLifetimeStart(Instruction *Inst) {
-  if (IntrinsicInst* II = dyn_cast<IntrinsicInst>(Inst))
+static bool isLifetimeStart(const Instruction *Inst) {
+  if (const IntrinsicInst* II = dyn_cast<IntrinsicInst>(Inst))
     return II->getIntrinsicID() == Intrinsic::lifetime_start;
   return false;
 }
diff --git a/lib/Transforms/Scalar/IndVarSimplify.cpp b/lib/Transforms/Scalar/IndVarSimplify.cpp
index 6605666e45d1..36bea6795542 100644
--- a/lib/Transforms/Scalar/IndVarSimplify.cpp
+++ b/lib/Transforms/Scalar/IndVarSimplify.cpp
@@ -97,6 +97,8 @@ namespace {
 
   private:
 
+    void EliminateIVComparisons();
+    void EliminateIVRemainders();
     void RewriteNonIntegerIVs(Loop *L);
 
     ICmpInst *LinearFunctionTestReplace(Loop *L, const SCEV *BackedgeTakenCount,
@@ -133,6 +135,24 @@ ICmpInst *IndVarSimplify::LinearFunctionTestReplace(Loop *L,
                                    BasicBlock *ExitingBlock,
                                    BranchInst *BI,
                                    SCEVExpander &Rewriter) {
+  // Special case: If the backedge-taken count is a UDiv, it's very likely a
+  // UDiv that ScalarEvolution produced in order to compute a precise
+  // expression, rather than a UDiv from the user's code. If we can't find a
+  // UDiv in the code with some simple searching, assume the former and forego
+  // rewriting the loop.
+  if (isa<SCEVUDivExpr>(BackedgeTakenCount)) {
+    ICmpInst *OrigCond = dyn_cast<ICmpInst>(BI->getCondition());
+    if (!OrigCond) return 0;
+    const SCEV *R = SE->getSCEV(OrigCond->getOperand(1));
+    R = SE->getMinusSCEV(R, SE->getConstant(R->getType(), 1));
+    if (R != BackedgeTakenCount) {
+      const SCEV *L = SE->getSCEV(OrigCond->getOperand(0));
+      L = SE->getMinusSCEV(L, SE->getConstant(L->getType(), 1));
+      if (L != BackedgeTakenCount)
+        return 0;
+    }
+  }
+
   // If the exiting block is not the same as the backedge block, we must compare
   // against the preincremented value, otherwise we prefer to compare against
   // the post-incremented value.
@@ -142,12 +162,12 @@ ICmpInst *IndVarSimplify::LinearFunctionTestReplace(Loop *L,
     // Add one to the "backedge-taken" count to get the trip count.
     // If this addition may overflow, we have to be more pessimistic and
     // cast the induction variable before doing the add.
-    const SCEV *Zero = SE->getIntegerSCEV(0, BackedgeTakenCount->getType());
+    const SCEV *Zero = SE->getConstant(BackedgeTakenCount->getType(), 0);
     const SCEV *N =
       SE->getAddExpr(BackedgeTakenCount,
-                     SE->getIntegerSCEV(1, BackedgeTakenCount->getType()));
+                     SE->getConstant(BackedgeTakenCount->getType(), 1));
     if ((isa<SCEVConstant>(N) && !N->isZero()) ||
-        SE->isLoopGuardedByCond(L, ICmpInst::ICMP_NE, N, Zero)) {
+        SE->isLoopEntryGuardedByCond(L, ICmpInst::ICMP_NE, N, Zero)) {
       // No overflow. Cast the sum.
       RHS = SE->getTruncateOrZeroExtend(N, IndVar->getType());
     } else {
@@ -155,7 +175,7 @@ ICmpInst *IndVarSimplify::LinearFunctionTestReplace(Loop *L,
       RHS = SE->getTruncateOrZeroExtend(BackedgeTakenCount,
                                         IndVar->getType());
       RHS = SE->getAddExpr(RHS,
-                           SE->getIntegerSCEV(1, IndVar->getType()));
+                           SE->getConstant(IndVar->getType(), 1));
     }
 
     // The BackedgeTaken expression contains the number of times that the
@@ -336,6 +356,116 @@ void IndVarSimplify::RewriteNonIntegerIVs(Loop *L) {
     SE->forgetLoop(L);
 }
 
+void IndVarSimplify::EliminateIVComparisons() {
+  SmallVector<WeakVH, 16> DeadInsts;
+
+  // Look for ICmp users.
+  for (IVUsers::iterator I = IU->begin(), E = IU->end(); I != E; ++I) {
+    IVStrideUse &UI = *I;
+    ICmpInst *ICmp = dyn_cast<ICmpInst>(UI.getUser());
+    if (!ICmp) continue;
+
+    bool Swapped = UI.getOperandValToReplace() == ICmp->getOperand(1);
+    ICmpInst::Predicate Pred = ICmp->getPredicate();
+    if (Swapped) Pred = ICmpInst::getSwappedPredicate(Pred);
+
+    // Get the SCEVs for the ICmp operands.
+    const SCEV *S = IU->getReplacementExpr(UI);
+    const SCEV *X = SE->getSCEV(ICmp->getOperand(!Swapped));
+
+    // Simplify unnecessary loops away.
+    const Loop *ICmpLoop = LI->getLoopFor(ICmp->getParent());
+    S = SE->getSCEVAtScope(S, ICmpLoop);
+    X = SE->getSCEVAtScope(X, ICmpLoop);
+
+    // If the condition is always true or always false, replace it with
+    // a constant value.
+    if (SE->isKnownPredicate(Pred, S, X))
+      ICmp->replaceAllUsesWith(ConstantInt::getTrue(ICmp->getContext()));
+    else if (SE->isKnownPredicate(ICmpInst::getInversePredicate(Pred), S, X))
+      ICmp->replaceAllUsesWith(ConstantInt::getFalse(ICmp->getContext()));
+    else
+      continue;
+
+    DEBUG(dbgs() << "INDVARS: Eliminated comparison: " << *ICmp << '\n');
+    DeadInsts.push_back(ICmp);
+  }
+
+  // Now that we're done iterating through lists, clean up any instructions
+  // which are now dead.
+  while (!DeadInsts.empty())
+    if (Instruction *Inst =
+          dyn_cast_or_null<Instruction>(DeadInsts.pop_back_val()))
+      RecursivelyDeleteTriviallyDeadInstructions(Inst);
+}
+
+void IndVarSimplify::EliminateIVRemainders() {
+  SmallVector<WeakVH, 16> DeadInsts;
+
+  // Look for SRem and URem users.
+  for (IVUsers::iterator I = IU->begin(), E = IU->end(); I != E; ++I) {
+    IVStrideUse &UI = *I;
+    BinaryOperator *Rem = dyn_cast<BinaryOperator>(UI.getUser());
+    if (!Rem) continue;
+
+    bool isSigned = Rem->getOpcode() == Instruction::SRem;
+    if (!isSigned && Rem->getOpcode() != Instruction::URem)
+      continue;
+
+    // We're only interested in the case where we know something about
+    // the numerator.
+    if (UI.getOperandValToReplace() != Rem->getOperand(0))
+      continue;
+
+    // Get the SCEVs for the ICmp operands.
+    const SCEV *S = SE->getSCEV(Rem->getOperand(0));
+    const SCEV *X = SE->getSCEV(Rem->getOperand(1));
+
+    // Simplify unnecessary loops away.
+    const Loop *ICmpLoop = LI->getLoopFor(Rem->getParent());
+    S = SE->getSCEVAtScope(S, ICmpLoop);
+    X = SE->getSCEVAtScope(X, ICmpLoop);
+
+    // i % n  -->  i  if i is in [0,n).
+    if ((!isSigned || SE->isKnownNonNegative(S)) &&
+        SE->isKnownPredicate(isSigned ? ICmpInst::ICMP_SLT : ICmpInst::ICMP_ULT,
+                             S, X))
+      Rem->replaceAllUsesWith(Rem->getOperand(0));
+    else {
+      // (i+1) % n  -->  (i+1)==n?0:(i+1)  if i is in [0,n).
+      const SCEV *LessOne =
+        SE->getMinusSCEV(S, SE->getConstant(S->getType(), 1));
+      if ((!isSigned || SE->isKnownNonNegative(LessOne)) &&
+          SE->isKnownPredicate(isSigned ? ICmpInst::ICMP_SLT : ICmpInst::ICMP_ULT,
+                               LessOne, X)) {
+        ICmpInst *ICmp = new ICmpInst(Rem, ICmpInst::ICMP_EQ,
+                                      Rem->getOperand(0), Rem->getOperand(1),
+                                      "tmp");
+        SelectInst *Sel =
+          SelectInst::Create(ICmp,
+                             ConstantInt::get(Rem->getType(), 0),
+                             Rem->getOperand(0), "tmp", Rem);
+        Rem->replaceAllUsesWith(Sel);
+      } else
+        continue;
+    }
+
+    // Inform IVUsers about the new users.
+    if (Instruction *I = dyn_cast<Instruction>(Rem->getOperand(0)))
+      IU->AddUsersIfInteresting(I);
+
+    DEBUG(dbgs() << "INDVARS: Simplified rem: " << *Rem << '\n');
+    DeadInsts.push_back(Rem);
+  }
+
+  // Now that we're done iterating through lists, clean up any instructions
+  // which are now dead.
+  while (!DeadInsts.empty())
+    if (Instruction *Inst =
+          dyn_cast_or_null<Instruction>(DeadInsts.pop_back_val()))
+      RecursivelyDeleteTriviallyDeadInstructions(Inst);
+}
+
 bool IndVarSimplify::runOnLoop(Loop *L, LPPassManager &LPM) {
   IU = &getAnalysis<IVUsers>();
   LI = &getAnalysis<LoopInfo>();
@@ -362,6 +492,12 @@ bool IndVarSimplify::runOnLoop(Loop *L, LPPassManager &LPM) {
   if (!isa<SCEVCouldNotCompute>(BackedgeTakenCount))
     RewriteLoopExitValues(L, Rewriter);
 
+  // Simplify ICmp IV users.
+  EliminateIVComparisons();
+
+  // Simplify SRem and URem IV users.
+  EliminateIVRemainders();
+
   // Compute the type of the largest recurrence expression, and decide whether
   // a canonical induction variable should be inserted.
   const Type *LargestType = 0;
@@ -454,6 +590,46 @@ bool IndVarSimplify::runOnLoop(Loop *L, LPPassManager &LPM) {
   return Changed;
 }
 
+// FIXME: It is an extremely bad idea to indvar substitute anything more
+// complex than affine induction variables.  Doing so will put expensive
+// polynomial evaluations inside of the loop, and the str reduction pass
+// currently can only reduce affine polynomials.  For now just disable
+// indvar subst on anything more complex than an affine addrec, unless
+// it can be expanded to a trivial value.
+static bool isSafe(const SCEV *S, const Loop *L) {
+  // Loop-invariant values are safe.
+  if (S->isLoopInvariant(L)) return true;
+
+  // Affine addrecs are safe. Non-affine are not, because LSR doesn't know how
+  // to transform them into efficient code.
+  if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(S))
+    return AR->isAffine();
+
+  // An add is safe it all its operands are safe.
+  if (const SCEVCommutativeExpr *Commutative = dyn_cast<SCEVCommutativeExpr>(S)) {
+    for (SCEVCommutativeExpr::op_iterator I = Commutative->op_begin(),
+         E = Commutative->op_end(); I != E; ++I)
+      if (!isSafe(*I, L)) return false;
+    return true;
+  }
+  
+  // A cast is safe if its operand is.
+  if (const SCEVCastExpr *C = dyn_cast<SCEVCastExpr>(S))
+    return isSafe(C->getOperand(), L);
+
+  // A udiv is safe if its operands are.
+  if (const SCEVUDivExpr *UD = dyn_cast<SCEVUDivExpr>(S))
+    return isSafe(UD->getLHS(), L) &&
+           isSafe(UD->getRHS(), L);
+
+  // SCEVUnknown is always safe.
+  if (isa<SCEVUnknown>(S))
+    return true;
+
+  // Nothing else is safe.
+  return false;
+}
+
 void IndVarSimplify::RewriteIVExpressions(Loop *L, SCEVExpander &Rewriter) {
   SmallVector<WeakVH, 16> DeadInsts;
 
@@ -465,7 +641,6 @@ void IndVarSimplify::RewriteIVExpressions(Loop *L, SCEVExpander &Rewriter) {
   // the need for the code evaluation methods to insert induction variables
   // of different sizes.
   for (IVUsers::iterator UI = IU->begin(), E = IU->end(); UI != E; ++UI) {
-    const SCEV *Stride = UI->getStride();
     Value *Op = UI->getOperandValToReplace();
     const Type *UseTy = Op->getType();
     Instruction *User = UI->getUser();
@@ -486,7 +661,7 @@ void IndVarSimplify::RewriteIVExpressions(Loop *L, SCEVExpander &Rewriter) {
     // currently can only reduce affine polynomials.  For now just disable
     // indvar subst on anything more complex than an affine addrec, unless
     // it can be expanded to a trivial value.
-    if (!AR->isLoopInvariant(L) && !Stride->isLoopInvariant(L))
+    if (!isSafe(AR, L))
       continue;
 
     // Determine the insertion point for this user. By default, insert
diff --git a/lib/Transforms/Scalar/JumpThreading.cpp b/lib/Transforms/Scalar/JumpThreading.cpp
index a6489ecc2dcf..df05b712dcbf 100644
--- a/lib/Transforms/Scalar/JumpThreading.cpp
+++ b/lib/Transforms/Scalar/JumpThreading.cpp
@@ -670,8 +670,10 @@ bool JumpThreading::ProcessBranchOnDuplicateCond(BasicBlock *PredBB,
     Value *OldCond = DestBI->getCondition();
     DestBI->setCondition(ConstantInt::get(Type::getInt1Ty(BB->getContext()),
                                           BranchDir));
-    ConstantFoldTerminator(BB);
+    // Delete dead instructions before we fold the branch.  Folding the branch
+    // can eliminate edges from the CFG which can end up deleting OldCond.
     RecursivelyDeleteTriviallyDeadInstructions(OldCond);
+    ConstantFoldTerminator(BB);
     return true;
   }
  
diff --git a/lib/Transforms/Scalar/LICM.cpp b/lib/Transforms/Scalar/LICM.cpp
index d7ace342fcba..73473952912e 100644
--- a/lib/Transforms/Scalar/LICM.cpp
+++ b/lib/Transforms/Scalar/LICM.cpp
@@ -683,16 +683,18 @@ void LICM::PromoteValuesInLoop() {
       // to LI as we are loading or storing.  Since we know that the value is
       // stored in this loop, this will always succeed.
       for (Value::use_iterator UI = Ptr->use_begin(), E = Ptr->use_end();
-           UI != E; ++UI)
-        if (LoadInst *LI = dyn_cast<LoadInst>(*UI)) {
+           UI != E; ++UI) {
+        User *U = *UI;
+        if (LoadInst *LI = dyn_cast<LoadInst>(U)) {
           LoadValue = LI;
           break;
-        } else if (StoreInst *SI = dyn_cast<StoreInst>(*UI)) {
+        } else if (StoreInst *SI = dyn_cast<StoreInst>(U)) {
           if (SI->getOperand(1) == Ptr) {
             LoadValue = SI->getOperand(0);
             break;
           }
         }
+      }
       assert(LoadValue && "No store through the pointer found!");
       PointerValueNumbers.push_back(LoadValue);  // Remember this for later.
     }
diff --git a/lib/Transforms/Scalar/LoopIndexSplit.cpp b/lib/Transforms/Scalar/LoopIndexSplit.cpp
index 16d3f2f703ff..101ff5b20019 100644
--- a/lib/Transforms/Scalar/LoopIndexSplit.cpp
+++ b/lib/Transforms/Scalar/LoopIndexSplit.cpp
@@ -948,6 +948,25 @@ bool LoopIndexSplit::splitLoop() {
   if (!IVBasedValues.count(SplitCondition->getOperand(!SVOpNum)))
     return false;
 
+  // Check for side effects.
+  for (Loop::block_iterator I = L->block_begin(), E = L->block_end();
+       I != E; ++I) {
+    BasicBlock *BB = *I;
+
+    assert(DT->dominates(Header, BB));
+    if (DT->properlyDominates(SplitCondition->getParent(), BB))
+      continue;
+
+    for (BasicBlock::iterator BI = BB->begin(), BE = BB->end();
+	 BI != BE; ++BI) {
+      Instruction *Inst = BI;
+
+      if (!Inst->isSafeToSpeculativelyExecute() && !isa<PHINode>(Inst)
+	  && !isa<BranchInst>(Inst) && !isa<DbgInfoIntrinsic>(Inst))
+        return false;
+    }
+  }
+
   // Normalize loop conditions so that it is easier to calculate new loop
   // bounds.
   if (IVisGT(*ExitCondition) || IVisGE(*ExitCondition)) {
diff --git a/lib/Transforms/Scalar/LoopStrengthReduce.cpp b/lib/Transforms/Scalar/LoopStrengthReduce.cpp
index 625a75d6cca9..cf3d16f05dbb 100644
--- a/lib/Transforms/Scalar/LoopStrengthReduce.cpp
+++ b/lib/Transforms/Scalar/LoopStrengthReduce.cpp
@@ -221,7 +221,7 @@ static void DoInitialMatch(const SCEV *S, Loop *L,
   if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(S))
     if (!AR->getStart()->isZero()) {
       DoInitialMatch(AR->getStart(), L, Good, Bad, SE, DT);
-      DoInitialMatch(SE.getAddRecExpr(SE.getIntegerSCEV(0, AR->getType()),
+      DoInitialMatch(SE.getAddRecExpr(SE.getConstant(AR->getType(), 0),
                                       AR->getStepRecurrence(SE),
                                       AR->getLoop()),
                      L, Good, Bad, SE, DT);
@@ -262,11 +262,15 @@ void Formula::InitialMatch(const SCEV *S, Loop *L,
   SmallVector<const SCEV *, 4> Bad;
   DoInitialMatch(S, L, Good, Bad, SE, DT);
   if (!Good.empty()) {
-    BaseRegs.push_back(SE.getAddExpr(Good));
+    const SCEV *Sum = SE.getAddExpr(Good);
+    if (!Sum->isZero())
+      BaseRegs.push_back(Sum);
     AM.HasBaseReg = true;
   }
   if (!Bad.empty()) {
-    BaseRegs.push_back(SE.getAddExpr(Bad));
+    const SCEV *Sum = SE.getAddExpr(Bad);
+    if (!Sum->isZero())
+      BaseRegs.push_back(Sum);
     AM.HasBaseReg = true;
   }
 }
@@ -375,7 +379,7 @@ static const SCEV *getExactSDiv(const SCEV *LHS, const SCEV *RHS,
                                 bool IgnoreSignificantBits = false) {
   // Handle the trivial case, which works for any SCEV type.
   if (LHS == RHS)
-    return SE.getIntegerSCEV(1, LHS->getType());
+    return SE.getConstant(LHS->getType(), 1);
 
   // Handle x /s -1 as x * -1, to give ScalarEvolution a chance to do some
   // folding.
@@ -450,7 +454,7 @@ static const SCEV *getExactSDiv(const SCEV *LHS, const SCEV *RHS,
 static int64_t ExtractImmediate(const SCEV *&S, ScalarEvolution &SE) {
   if (const SCEVConstant *C = dyn_cast<SCEVConstant>(S)) {
     if (C->getValue()->getValue().getMinSignedBits() <= 64) {
-      S = SE.getIntegerSCEV(0, C->getType());
+      S = SE.getConstant(C->getType(), 0);
       return C->getValue()->getSExtValue();
     }
   } else if (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(S)) {
@@ -473,7 +477,7 @@ static int64_t ExtractImmediate(const SCEV *&S, ScalarEvolution &SE) {
 static GlobalValue *ExtractSymbol(const SCEV *&S, ScalarEvolution &SE) {
   if (const SCEVUnknown *U = dyn_cast<SCEVUnknown>(S)) {
     if (GlobalValue *GV = dyn_cast<GlobalValue>(U->getValue())) {
-      S = SE.getIntegerSCEV(0, GV->getType());
+      S = SE.getConstant(GV->getType(), 0);
       return GV;
     }
   } else if (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(S)) {
@@ -781,10 +785,10 @@ struct LSRFixup {
   /// will be replaced.
   Value *OperandValToReplace;
 
-  /// PostIncLoop - If this user is to use the post-incremented value of an
+  /// PostIncLoops - If this user is to use the post-incremented value of an
   /// induction variable, this variable is non-null and holds the loop
   /// associated with the induction variable.
-  const Loop *PostIncLoop;
+  PostIncLoopSet PostIncLoops;
 
   /// LUIdx - The index of the LSRUse describing the expression which
   /// this fixup needs, minus an offset (below).
@@ -795,6 +799,8 @@ struct LSRFixup {
   /// offsets, for example in an unrolled loop.
   int64_t Offset;
 
+  bool isUseFullyOutsideLoop(const Loop *L) const;
+
   LSRFixup();
 
   void print(raw_ostream &OS) const;
@@ -804,9 +810,24 @@ struct LSRFixup {
 }
 
 LSRFixup::LSRFixup()
-  : UserInst(0), OperandValToReplace(0), PostIncLoop(0),
+  : UserInst(0), OperandValToReplace(0),
     LUIdx(~size_t(0)), Offset(0) {}
 
+/// isUseFullyOutsideLoop - Test whether this fixup always uses its
+/// value outside of the given loop.
+bool LSRFixup::isUseFullyOutsideLoop(const Loop *L) const {
+  // PHI nodes use their value in their incoming blocks.
+  if (const PHINode *PN = dyn_cast<PHINode>(UserInst)) {
+    for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i)
+      if (PN->getIncomingValue(i) == OperandValToReplace &&
+          L->contains(PN->getIncomingBlock(i)))
+        return false;
+    return true;
+  }
+
+  return !L->contains(UserInst);
+}
+
 void LSRFixup::print(raw_ostream &OS) const {
   OS << "UserInst=";
   // Store is common and interesting enough to be worth special-casing.
@@ -821,9 +842,10 @@ void LSRFixup::print(raw_ostream &OS) const {
   OS << ", OperandValToReplace=";
   WriteAsOperand(OS, OperandValToReplace, /*PrintType=*/false);
 
-  if (PostIncLoop) {
+  for (PostIncLoopSet::const_iterator I = PostIncLoops.begin(),
+       E = PostIncLoops.end(); I != E; ++I) {
     OS << ", PostIncLoop=";
-    WriteAsOperand(OS, PostIncLoop->getHeader(), /*PrintType=*/false);
+    WriteAsOperand(OS, (*I)->getHeader(), /*PrintType=*/false);
   }
 
   if (LUIdx != ~size_t(0))
@@ -1135,6 +1157,7 @@ class LSRInstance {
   IVUsers &IU;
   ScalarEvolution &SE;
   DominatorTree &DT;
+  LoopInfo &LI;
   const TargetLowering *const TLI;
   Loop *const L;
   bool Changed;
@@ -1214,6 +1237,13 @@ public:
                     DenseSet<const SCEV *> &VisitedRegs) const;
   void Solve(SmallVectorImpl<const Formula *> &Solution) const;
 
+  BasicBlock::iterator
+    HoistInsertPosition(BasicBlock::iterator IP,
+                        const SmallVectorImpl<Instruction *> &Inputs) const;
+  BasicBlock::iterator AdjustInsertPositionForExpand(BasicBlock::iterator IP,
+                                                     const LSRFixup &LF,
+                                                     const LSRUse &LU) const;
+
   Value *Expand(const LSRFixup &LF,
                 const Formula &F,
                 BasicBlock::iterator IP,
@@ -1427,16 +1457,30 @@ ICmpInst *LSRInstance::OptimizeMax(ICmpInst *Cond, IVStrideUse* &CondUse) {
   const SCEV *BackedgeTakenCount = SE.getBackedgeTakenCount(L);
   if (isa<SCEVCouldNotCompute>(BackedgeTakenCount))
     return Cond;
-  const SCEV *One = SE.getIntegerSCEV(1, BackedgeTakenCount->getType());
+  const SCEV *One = SE.getConstant(BackedgeTakenCount->getType(), 1);
 
   // Add one to the backedge-taken count to get the trip count.
   const SCEV *IterationCount = SE.getAddExpr(BackedgeTakenCount, One);
-
-  // Check for a max calculation that matches the pattern.
-  if (!isa<SCEVSMaxExpr>(IterationCount) && !isa<SCEVUMaxExpr>(IterationCount))
+  if (IterationCount != SE.getSCEV(Sel)) return Cond;
+
+  // Check for a max calculation that matches the pattern. There's no check
+  // for ICMP_ULE here because the comparison would be with zero, which
+  // isn't interesting.
+  CmpInst::Predicate Pred = ICmpInst::BAD_ICMP_PREDICATE;
+  const SCEVNAryExpr *Max = 0;
+  if (const SCEVSMaxExpr *S = dyn_cast<SCEVSMaxExpr>(BackedgeTakenCount)) {
+    Pred = ICmpInst::ICMP_SLE;
+    Max = S;
+  } else if (const SCEVSMaxExpr *S = dyn_cast<SCEVSMaxExpr>(IterationCount)) {
+    Pred = ICmpInst::ICMP_SLT;
+    Max = S;
+  } else if (const SCEVUMaxExpr *U = dyn_cast<SCEVUMaxExpr>(IterationCount)) {
+    Pred = ICmpInst::ICMP_ULT;
+    Max = U;
+  } else {
+    // No match; bail.
     return Cond;
-  const SCEVNAryExpr *Max = cast<SCEVNAryExpr>(IterationCount);
-  if (Max != SE.getSCEV(Sel)) return Cond;
+  }
 
   // To handle a max with more than two operands, this optimization would
   // require additional checking and setup.
@@ -1445,7 +1489,13 @@ ICmpInst *LSRInstance::OptimizeMax(ICmpInst *Cond, IVStrideUse* &CondUse) {
 
   const SCEV *MaxLHS = Max->getOperand(0);
   const SCEV *MaxRHS = Max->getOperand(1);
-  if (!MaxLHS || MaxLHS != One) return Cond;
+
+  // ScalarEvolution canonicalizes constants to the left. For < and >, look
+  // for a comparison with 1. For <= and >=, a comparison with zero.
+  if (!MaxLHS ||
+      (ICmpInst::isTrueWhenEqual(Pred) ? !MaxLHS->isZero() : (MaxLHS != One)))
+    return Cond;
+
   // Check the relevant induction variable for conformance to
   // the pattern.
   const SCEV *IV = SE.getSCEV(Cond->getOperand(0));
@@ -1461,16 +1511,29 @@ ICmpInst *LSRInstance::OptimizeMax(ICmpInst *Cond, IVStrideUse* &CondUse) {
   // Check the right operand of the select, and remember it, as it will
   // be used in the new comparison instruction.
   Value *NewRHS = 0;
-  if (SE.getSCEV(Sel->getOperand(1)) == MaxRHS)
+  if (ICmpInst::isTrueWhenEqual(Pred)) {
+    // Look for n+1, and grab n.
+    if (AddOperator *BO = dyn_cast<AddOperator>(Sel->getOperand(1)))
+      if (isa<ConstantInt>(BO->getOperand(1)) &&
+          cast<ConstantInt>(BO->getOperand(1))->isOne() &&
+          SE.getSCEV(BO->getOperand(0)) == MaxRHS)
+        NewRHS = BO->getOperand(0);
+    if (AddOperator *BO = dyn_cast<AddOperator>(Sel->getOperand(2)))
+      if (isa<ConstantInt>(BO->getOperand(1)) &&
+          cast<ConstantInt>(BO->getOperand(1))->isOne() &&
+          SE.getSCEV(BO->getOperand(0)) == MaxRHS)
+        NewRHS = BO->getOperand(0);
+    if (!NewRHS)
+      return Cond;
+  } else if (SE.getSCEV(Sel->getOperand(1)) == MaxRHS)
     NewRHS = Sel->getOperand(1);
   else if (SE.getSCEV(Sel->getOperand(2)) == MaxRHS)
     NewRHS = Sel->getOperand(2);
-  if (!NewRHS) return Cond;
+  else
+    llvm_unreachable("Max doesn't match expected pattern!");
 
   // Determine the new comparison opcode. It may be signed or unsigned,
   // and the original comparison may be either equality or inequality.
-  CmpInst::Predicate Pred =
-    isa<SCEVSMaxExpr>(Max) ? CmpInst::ICMP_SLT : CmpInst::ICMP_ULT;
   if (Cond->getPredicate() == CmpInst::ICMP_EQ)
     Pred = CmpInst::getInversePredicate(Pred);
 
@@ -1545,8 +1608,9 @@ LSRInstance::OptimizeLoopTermCond() {
             !DT.properlyDominates(UI->getUser()->getParent(), ExitingBlock)) {
           // Conservatively assume there may be reuse if the quotient of their
           // strides could be a legal scale.
-          const SCEV *A = CondUse->getStride();
-          const SCEV *B = UI->getStride();
+          const SCEV *A = IU.getStride(*CondUse, L);
+          const SCEV *B = IU.getStride(*UI, L);
+          if (!A || !B) continue;
           if (SE.getTypeSizeInBits(A->getType()) !=
               SE.getTypeSizeInBits(B->getType())) {
             if (SE.getTypeSizeInBits(A->getType()) >
@@ -1598,8 +1662,7 @@ LSRInstance::OptimizeLoopTermCond() {
         ExitingBlock->getInstList().insert(TermBr, Cond);
 
         // Clone the IVUse, as the old use still exists!
-        CondUse = &IU.AddUser(CondUse->getStride(), CondUse->getOffset(),
-                              Cond, CondUse->getOperandValToReplace());
+        CondUse = &IU.AddUser(Cond, CondUse->getOperandValToReplace());
         TermBr->replaceUsesOfWith(OldCond, Cond);
       }
     }
@@ -1607,9 +1670,7 @@ LSRInstance::OptimizeLoopTermCond() {
     // If we get to here, we know that we can transform the setcc instruction to
     // use the post-incremented version of the IV, allowing us to coalesce the
     // live ranges for the IV correctly.
-    CondUse->setOffset(SE.getMinusSCEV(CondUse->getOffset(),
-                                       CondUse->getStride()));
-    CondUse->setIsUseOfPostIncrementedValue(true);
+    CondUse->transformToPostInc(L);
     Changed = true;
 
     PostIncs.insert(Cond);
@@ -1717,19 +1778,24 @@ void LSRInstance::CollectInterestingTypesAndFactors() {
   SmallSetVector<const SCEV *, 4> Strides;
 
   // Collect interesting types and strides.
+  SmallVector<const SCEV *, 4> Worklist;
   for (IVUsers::const_iterator UI = IU.begin(), E = IU.end(); UI != E; ++UI) {
-    const SCEV *Stride = UI->getStride();
+    const SCEV *Expr = IU.getExpr(*UI);
 
     // Collect interesting types.
-    Types.insert(SE.getEffectiveSCEVType(Stride->getType()));
-
-    // Add the stride for this loop.
-    Strides.insert(Stride);
-
-    // Add strides for other mentioned loops.
-    for (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(UI->getOffset());
-         AR; AR = dyn_cast<SCEVAddRecExpr>(AR->getStart()))
-      Strides.insert(AR->getStepRecurrence(SE));
+    Types.insert(SE.getEffectiveSCEVType(Expr->getType()));
+
+    // Add strides for mentioned loops.
+    Worklist.push_back(Expr);
+    do {
+      const SCEV *S = Worklist.pop_back_val();
+      if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(S)) {
+        Strides.insert(AR->getStepRecurrence(SE));
+        Worklist.push_back(AR->getStart());
+      } else if (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(S)) {
+        Worklist.insert(Worklist.end(), Add->op_begin(), Add->op_end());
+      }
+    } while (!Worklist.empty());
   }
 
   // Compute interesting factors from the set of interesting strides.
@@ -1776,8 +1842,7 @@ void LSRInstance::CollectFixupsAndInitialFormulae() {
     LSRFixup &LF = getNewFixup();
     LF.UserInst = UI->getUser();
     LF.OperandValToReplace = UI->getOperandValToReplace();
-    if (UI->isUseOfPostIncrementedValue())
-      LF.PostIncLoop = L;
+    LF.PostIncLoops = UI->getPostIncLoops();
 
     LSRUse::KindType Kind = LSRUse::Basic;
     const Type *AccessTy = 0;
@@ -1786,7 +1851,7 @@ void LSRInstance::CollectFixupsAndInitialFormulae() {
       AccessTy = getAccessType(LF.UserInst);
     }
 
-    const SCEV *S = IU.getCanonicalExpr(*UI);
+    const SCEV *S = IU.getExpr(*UI);
 
     // Equality (== and !=) ICmps are special. We can rewrite (i == N) as
     // (N - i == 0), and this allows (N - i) to be the expression that we work
@@ -1824,7 +1889,7 @@ void LSRInstance::CollectFixupsAndInitialFormulae() {
     LF.LUIdx = P.first;
     LF.Offset = P.second;
     LSRUse &LU = Uses[LF.LUIdx];
-    LU.AllFixupsOutsideLoop &= !L->contains(LF.UserInst);
+    LU.AllFixupsOutsideLoop &= LF.isUseFullyOutsideLoop(L);
 
     // If this is the first use of this LSRUse, give it a formula.
     if (LU.Formulae.empty()) {
@@ -1918,9 +1983,17 @@ LSRInstance::CollectLoopInvariantFixupsAndFormulae() {
           continue;
         // Ignore uses which are part of other SCEV expressions, to avoid
         // analyzing them multiple times.
-        if (SE.isSCEVable(UserInst->getType()) &&
-            !isa<SCEVUnknown>(SE.getSCEV(const_cast<Instruction *>(UserInst))))
-          continue;
+        if (SE.isSCEVable(UserInst->getType())) {
+          const SCEV *UserS = SE.getSCEV(const_cast<Instruction *>(UserInst));
+          // If the user is a no-op, look through to its uses.
+          if (!isa<SCEVUnknown>(UserS))
+            continue;
+          if (UserS == U) {
+            Worklist.push_back(
+              SE.getUnknown(const_cast<Instruction *>(UserInst)));
+            continue;
+          }
+        }
         // Ignore icmp instructions which are already being analyzed.
         if (const ICmpInst *ICI = dyn_cast<ICmpInst>(UserInst)) {
           unsigned OtherIdx = !UI.getOperandNo();
@@ -1936,7 +2009,7 @@ LSRInstance::CollectLoopInvariantFixupsAndFormulae() {
         LF.LUIdx = P.first;
         LF.Offset = P.second;
         LSRUse &LU = Uses[LF.LUIdx];
-        LU.AllFixupsOutsideLoop &= L->contains(LF.UserInst);
+        LU.AllFixupsOutsideLoop &= LF.isUseFullyOutsideLoop(L);
         InsertSupplementalFormula(U, LU, LF.LUIdx);
         CountRegisters(LU.Formulae.back(), Uses.size() - 1);
         break;
@@ -1959,7 +2032,7 @@ static void CollectSubexprs(const SCEV *S, const SCEVConstant *C,
   } else if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(S)) {
     // Split a non-zero base out of an addrec.
     if (!AR->getStart()->isZero()) {
-      CollectSubexprs(SE.getAddRecExpr(SE.getIntegerSCEV(0, AR->getType()),
+      CollectSubexprs(SE.getAddRecExpr(SE.getConstant(AR->getType(), 0),
                                        AR->getStepRecurrence(SE),
                                        AR->getLoop()), C, Ops, SE);
       CollectSubexprs(AR->getStart(), C, Ops, SE);
@@ -2020,8 +2093,11 @@ void LSRInstance::GenerateReassociations(LSRUse &LU, unsigned LUIdx,
                            LU.Kind, LU.AccessTy, TLI, SE))
         continue;
 
+      const SCEV *InnerSum = SE.getAddExpr(InnerAddOps);
+      if (InnerSum->isZero())
+        continue;
       Formula F = Base;
-      F.BaseRegs[i] = SE.getAddExpr(InnerAddOps);
+      F.BaseRegs[i] = InnerSum;
       F.BaseRegs.push_back(*J);
       if (InsertFormula(LU, LUIdx, F))
         // If that formula hadn't been seen before, recurse to find more like
@@ -2102,7 +2178,7 @@ void LSRInstance::GenerateConstantOffsets(LSRUse &LU, unsigned LUIdx,
       F.AM.BaseOffs = (uint64_t)Base.AM.BaseOffs - *I;
       if (isLegalUse(F.AM, LU.MinOffset - *I, LU.MaxOffset - *I,
                      LU.Kind, LU.AccessTy, TLI)) {
-        F.BaseRegs[i] = SE.getAddExpr(G, SE.getIntegerSCEV(*I, G->getType()));
+        F.BaseRegs[i] = SE.getAddExpr(G, SE.getConstant(G->getType(), *I));
 
         (void)InsertFormula(LU, LUIdx, F);
       }
@@ -2165,7 +2241,7 @@ void LSRInstance::GenerateICmpZeroScales(LSRUse &LU, unsigned LUIdx,
     // Compensate for the use having MinOffset built into it.
     F.AM.BaseOffs = (uint64_t)F.AM.BaseOffs + Offset - LU.MinOffset;
 
-    const SCEV *FactorS = SE.getIntegerSCEV(Factor, IntTy);
+    const SCEV *FactorS = SE.getConstant(IntTy, Factor);
 
     // Check that multiplying with each base register doesn't overflow.
     for (size_t i = 0, e = F.BaseRegs.size(); i != e; ++i) {
@@ -2227,7 +2303,7 @@ void LSRInstance::GenerateScales(LSRUse &LU, unsigned LUIdx,
     for (size_t i = 0, e = Base.BaseRegs.size(); i != e; ++i)
       if (const SCEVAddRecExpr *AR =
             dyn_cast<SCEVAddRecExpr>(Base.BaseRegs[i])) {
-        const SCEV *FactorS = SE.getIntegerSCEV(Factor, IntTy);
+        const SCEV *FactorS = SE.getConstant(IntTy, Factor);
         if (FactorS->isZero())
           continue;
         // Divide out the factor, ignoring high bits, since we'll be
@@ -2426,7 +2502,7 @@ void LSRInstance::GenerateCrossUseConstantOffsets() {
           if (C->getValue()->getValue().isNegative() !=
                 (NewF.AM.BaseOffs < 0) &&
               (C->getValue()->getValue().abs() * APInt(BitWidth, F.AM.Scale))
-                .ule(APInt(BitWidth, NewF.AM.BaseOffs).abs()))
+                .ule(abs64(NewF.AM.BaseOffs)))
             continue;
 
         // OK, looks good.
@@ -2454,7 +2530,7 @@ void LSRInstance::GenerateCrossUseConstantOffsets() {
               if (C->getValue()->getValue().isNegative() !=
                     (NewF.AM.BaseOffs < 0) &&
                   C->getValue()->getValue().abs()
-                    .ule(APInt(BitWidth, NewF.AM.BaseOffs).abs()))
+                    .ule(abs64(NewF.AM.BaseOffs)))
                 goto skip_formula;
 
           // Ok, looks good.
@@ -2776,37 +2852,33 @@ static BasicBlock *getImmediateDominator(BasicBlock *BB, DominatorTree &DT) {
   return Node->getBlock();
 }
 
-Value *LSRInstance::Expand(const LSRFixup &LF,
-                           const Formula &F,
-                           BasicBlock::iterator IP,
-                           SCEVExpander &Rewriter,
-                           SmallVectorImpl<WeakVH> &DeadInsts) const {
-  const LSRUse &LU = Uses[LF.LUIdx];
-
-  // Then, collect some instructions which we will remain dominated by when
-  // expanding the replacement. These must be dominated by any operands that
-  // will be required in the expansion.
-  SmallVector<Instruction *, 4> Inputs;
-  if (Instruction *I = dyn_cast<Instruction>(LF.OperandValToReplace))
-    Inputs.push_back(I);
-  if (LU.Kind == LSRUse::ICmpZero)
-    if (Instruction *I =
-          dyn_cast<Instruction>(cast<ICmpInst>(LF.UserInst)->getOperand(1)))
-      Inputs.push_back(I);
-  if (LF.PostIncLoop) {
-    if (!L->contains(LF.UserInst))
-      Inputs.push_back(L->getLoopLatch()->getTerminator());
-    else
-      Inputs.push_back(IVIncInsertPos);
-  }
-
-  // Then, climb up the immediate dominator tree as far as we can go while
-  // still being dominated by the input positions.
+/// HoistInsertPosition - Helper for AdjustInsertPositionForExpand. Climb up
+/// the dominator tree far as we can go while still being dominated by the
+/// input positions. This helps canonicalize the insert position, which
+/// encourages sharing.
+BasicBlock::iterator
+LSRInstance::HoistInsertPosition(BasicBlock::iterator IP,
+                                 const SmallVectorImpl<Instruction *> &Inputs)
+                                                                         const {
   for (;;) {
+    const Loop *IPLoop = LI.getLoopFor(IP->getParent());
+    unsigned IPLoopDepth = IPLoop ? IPLoop->getLoopDepth() : 0;
+
+    BasicBlock *IDom;
+    for (BasicBlock *Rung = IP->getParent(); ; Rung = IDom) {
+      IDom = getImmediateDominator(Rung, DT);
+      if (!IDom) return IP;
+
+      // Don't climb into a loop though.
+      const Loop *IDomLoop = LI.getLoopFor(IDom);
+      unsigned IDomDepth = IDomLoop ? IDomLoop->getLoopDepth() : 0;
+      if (IDomDepth <= IPLoopDepth &&
+          (IDomDepth != IPLoopDepth || IDomLoop == IPLoop))
+        break;
+    }
+
     bool AllDominate = true;
     Instruction *BetterPos = 0;
-    BasicBlock *IDom = getImmediateDominator(IP->getParent(), DT);
-    if (!IDom) break;
     Instruction *Tentative = IDom->getTerminator();
     for (SmallVectorImpl<Instruction *>::const_iterator I = Inputs.begin(),
          E = Inputs.end(); I != E; ++I) {
@@ -2815,6 +2887,8 @@ Value *LSRInstance::Expand(const LSRFixup &LF,
         AllDominate = false;
         break;
       }
+      // Attempt to find an insert position in the middle of the block,
+      // instead of at the end, so that it can be used for other expansions.
       if (IDom == Inst->getParent() &&
           (!BetterPos || DT.dominates(BetterPos, Inst)))
         BetterPos = next(BasicBlock::iterator(Inst));
@@ -2826,12 +2900,77 @@ Value *LSRInstance::Expand(const LSRFixup &LF,
     else
       IP = Tentative;
   }
+
+  return IP;
+}
+
+/// AdjustInsertPositionForExpand - Determine an input position which will be
+/// dominated by the operands and which will dominate the result.
+BasicBlock::iterator
+LSRInstance::AdjustInsertPositionForExpand(BasicBlock::iterator IP,
+                                           const LSRFixup &LF,
+                                           const LSRUse &LU) const {
+  // Collect some instructions which must be dominated by the
+  // expanding replacement. These must be dominated by any operands that
+  // will be required in the expansion.
+  SmallVector<Instruction *, 4> Inputs;
+  if (Instruction *I = dyn_cast<Instruction>(LF.OperandValToReplace))
+    Inputs.push_back(I);
+  if (LU.Kind == LSRUse::ICmpZero)
+    if (Instruction *I =
+          dyn_cast<Instruction>(cast<ICmpInst>(LF.UserInst)->getOperand(1)))
+      Inputs.push_back(I);
+  if (LF.PostIncLoops.count(L)) {
+    if (LF.isUseFullyOutsideLoop(L))
+      Inputs.push_back(L->getLoopLatch()->getTerminator());
+    else
+      Inputs.push_back(IVIncInsertPos);
+  }
+  // The expansion must also be dominated by the increment positions of any
+  // loops it for which it is using post-inc mode.
+  for (PostIncLoopSet::const_iterator I = LF.PostIncLoops.begin(),
+       E = LF.PostIncLoops.end(); I != E; ++I) {
+    const Loop *PIL = *I;
+    if (PIL == L) continue;
+
+    // Be dominated by the loop exit.
+    SmallVector<BasicBlock *, 4> ExitingBlocks;
+    PIL->getExitingBlocks(ExitingBlocks);
+    if (!ExitingBlocks.empty()) {
+      BasicBlock *BB = ExitingBlocks[0];
+      for (unsigned i = 1, e = ExitingBlocks.size(); i != e; ++i)
+        BB = DT.findNearestCommonDominator(BB, ExitingBlocks[i]);
+      Inputs.push_back(BB->getTerminator());
+    }
+  }
+
+  // Then, climb up the immediate dominator tree as far as we can go while
+  // still being dominated by the input positions.
+  IP = HoistInsertPosition(IP, Inputs);
+
+  // Don't insert instructions before PHI nodes.
   while (isa<PHINode>(IP)) ++IP;
+
+  // Ignore debug intrinsics.
   while (isa<DbgInfoIntrinsic>(IP)) ++IP;
 
+  return IP;
+}
+
+Value *LSRInstance::Expand(const LSRFixup &LF,
+                           const Formula &F,
+                           BasicBlock::iterator IP,
+                           SCEVExpander &Rewriter,
+                           SmallVectorImpl<WeakVH> &DeadInsts) const {
+  const LSRUse &LU = Uses[LF.LUIdx];
+
+  // Determine an input position which will be dominated by the operands and
+  // which will dominate the result.
+  IP = AdjustInsertPositionForExpand(IP, LF, LU);
+
   // Inform the Rewriter if we have a post-increment use, so that it can
   // perform an advantageous expansion.
-  Rewriter.setPostInc(LF.PostIncLoop);
+  Rewriter.setPostInc(LF.PostIncLoops);
 
   // This is the type that the user actually needs.
   const Type *OpTy = LF.OperandValToReplace->getType();
@@ -2855,24 +2994,11 @@ Value *LSRInstance::Expand(const LSRFixup &LF,
     const SCEV *Reg = *I;
     assert(!Reg->isZero() && "Zero allocated in a base register!");
 
-    // If we're expanding for a post-inc user for the add-rec's loop, make the
-    // post-inc adjustment.
-    const SCEV *Start = Reg;
-    while (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(Start)) {
-      if (AR->getLoop() == LF.PostIncLoop) {
-        Reg = SE.getAddExpr(Reg, AR->getStepRecurrence(SE));
-        // If the user is inside the loop, insert the code after the increment
-        // so that it is dominated by its operand. If the original insert point
-        // was already dominated by the increment, keep it, because there may
-        // be loop-variant operands that need to be respected also.
-        if (L->contains(LF.UserInst) && !DT.dominates(IVIncInsertPos, IP)) {
-          IP = IVIncInsertPos;
-          while (isa<DbgInfoIntrinsic>(IP)) ++IP;
-        }
-        break;
-      }
-      Start = AR->getStart();
-    }
+    // If we're expanding for a post-inc user, make the post-inc adjustment.
+    PostIncLoopSet &Loops = const_cast<PostIncLoopSet &>(LF.PostIncLoops);
+    Reg = TransformForPostIncUse(Denormalize, Reg,
+                                 LF.UserInst, LF.OperandValToReplace,
+                                 Loops, SE, DT);
 
     Ops.push_back(SE.getUnknown(Rewriter.expandCodeFor(Reg, 0, IP)));
   }
@@ -2889,11 +3015,11 @@ Value *LSRInstance::Expand(const LSRFixup &LF,
   if (F.AM.Scale != 0) {
     const SCEV *ScaledS = F.ScaledReg;
 
-    // If we're expanding for a post-inc user for the add-rec's loop, make the
-    // post-inc adjustment.
-    if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(ScaledS))
-      if (AR->getLoop() == LF.PostIncLoop)
-        ScaledS = SE.getAddExpr(ScaledS, AR->getStepRecurrence(SE));
+    // If we're expanding for a post-inc user, make the post-inc adjustment.
+    PostIncLoopSet &Loops = const_cast<PostIncLoopSet &>(LF.PostIncLoops);
+    ScaledS = TransformForPostIncUse(Denormalize, ScaledS,
+                                     LF.UserInst, LF.OperandValToReplace,
+                                     Loops, SE, DT);
 
     if (LU.Kind == LSRUse::ICmpZero) {
       // An interesting way of "folding" with an icmp is to use a negated
@@ -2907,8 +3033,7 @@ Value *LSRInstance::Expand(const LSRFixup &LF,
       // which is expected to be matched as part of the address.
       ScaledS = SE.getUnknown(Rewriter.expandCodeFor(ScaledS, 0, IP));
       ScaledS = SE.getMulExpr(ScaledS,
-                              SE.getIntegerSCEV(F.AM.Scale,
-                                                ScaledS->getType()));
+                              SE.getConstant(ScaledS->getType(), F.AM.Scale));
       Ops.push_back(ScaledS);
 
       // Flush the operand list to suppress SCEVExpander hoisting.
@@ -2949,12 +3074,12 @@ Value *LSRInstance::Expand(const LSRFixup &LF,
 
   // Emit instructions summing all the operands.
   const SCEV *FullS = Ops.empty() ?
-                      SE.getIntegerSCEV(0, IntTy) :
+                      SE.getConstant(IntTy, 0) :
                       SE.getAddExpr(Ops);
   Value *FullV = Rewriter.expandCodeFor(FullS, Ty, IP);
 
   // We're done expanding now, so reset the rewriter.
-  Rewriter.setPostInc(0);
+  Rewriter.clearPostInc();
 
   // An ICmpZero Formula represents an ICmp which we're handling as a
   // comparison against zero. Now that we've expanded an expression for that
@@ -3118,6 +3243,7 @@ LSRInstance::LSRInstance(const TargetLowering *tli, Loop *l, Pass *P)
   : IU(P->getAnalysis<IVUsers>()),
     SE(P->getAnalysis<ScalarEvolution>()),
     DT(P->getAnalysis<DominatorTree>()),
+    LI(P->getAnalysis<LoopInfo>()),
     TLI(tli), L(l), Changed(false), IVIncInsertPos(0) {
 
   // If LoopSimplify form is not available, stay out of trouble.
@@ -3274,9 +3400,10 @@ void LoopStrengthReduce::getAnalysisUsage(AnalysisUsage &AU) const {
   // We split critical edges, so we change the CFG.  However, we do update
   // many analyses if they are around.
   AU.addPreservedID(LoopSimplifyID);
-  AU.addPreserved<LoopInfo>();
   AU.addPreserved("domfrontier");
 
+  AU.addRequired<LoopInfo>();
+  AU.addPreserved<LoopInfo>();
   AU.addRequiredID(LoopSimplifyID);
   AU.addRequired<DominatorTree>();
   AU.addPreserved<DominatorTree>();
diff --git a/lib/Transforms/Scalar/LoopUnswitch.cpp b/lib/Transforms/Scalar/LoopUnswitch.cpp
index 3918738cc8be..ae7bf40e0e1a 100644
--- a/lib/Transforms/Scalar/LoopUnswitch.cpp
+++ b/lib/Transforms/Scalar/LoopUnswitch.cpp
@@ -34,6 +34,7 @@
 #include "llvm/Instructions.h"
 #include "llvm/Analysis/ConstantFolding.h"
 #include "llvm/Analysis/InlineCost.h"
+#include "llvm/Analysis/InstructionSimplify.h"
 #include "llvm/Analysis/LoopInfo.h"
 #include "llvm/Analysis/LoopPass.h"
 #include "llvm/Analysis/Dominators.h"
@@ -677,15 +678,22 @@ void LoopUnswitch::UnswitchNontrivialCondition(Value *LIC, Constant *Val,
   LoopProcessWorklist.push_back(NewLoop);
   redoLoop = true;
 
+  // Keep a WeakVH holding onto LIC.  If the first call to RewriteLoopBody
+  // deletes the instruction (for example by simplifying a PHI that feeds into
+  // the condition that we're unswitching on), we don't rewrite the second
+  // iteration.
+  WeakVH LICHandle(LIC);
+  
   // Now we rewrite the original code to know that the condition is true and the
   // new code to know that the condition is false.
   RewriteLoopBodyWithConditionConstant(L, LIC, Val, false);
-  
-  // It's possible that simplifying one loop could cause the other to be
-  // deleted.  If so, don't simplify it.
-  if (!LoopProcessWorklist.empty() && LoopProcessWorklist.back() == NewLoop)
-    RewriteLoopBodyWithConditionConstant(NewLoop, LIC, Val, true);
 
+  // It's possible that simplifying one loop could cause the other to be
+  // changed to another value or a constant.  If its a constant, don't simplify
+  // it.
+  if (!LoopProcessWorklist.empty() && LoopProcessWorklist.back() == NewLoop &&
+      LICHandle && !isa<Constant>(LICHandle))
+    RewriteLoopBodyWithConditionConstant(NewLoop, LICHandle, Val, true);
 }
 
 /// RemoveFromWorklist - Remove all instances of I from the worklist vector
@@ -981,45 +989,16 @@ void LoopUnswitch::SimplifyCode(std::vector<Instruction*> &Worklist, Loop *L) {
       continue;
     }
     
+    // See if instruction simplification can hack this up.  This is common for
+    // things like "select false, X, Y" after unswitching made the condition be
+    // 'false'.
+    if (Value *V = SimplifyInstruction(I)) {
+      ReplaceUsesOfWith(I, V, Worklist, L, LPM);
+      continue;
+    }
+    
     // Special case hacks that appear commonly in unswitched code.
-    switch (I->getOpcode()) {
-    case Instruction::Select:
-      if (ConstantInt *CB = dyn_cast<ConstantInt>(I->getOperand(0))) {
-        ReplaceUsesOfWith(I, I->getOperand(!CB->getZExtValue()+1), Worklist, L,
-                          LPM);
-        continue;
-      }
-      break;
-    case Instruction::And:
-      if (isa<ConstantInt>(I->getOperand(0)) && 
-          // constant -> RHS
-          I->getOperand(0)->getType()->isIntegerTy(1))
-        cast<BinaryOperator>(I)->swapOperands();
-      if (ConstantInt *CB = dyn_cast<ConstantInt>(I->getOperand(1))) 
-        if (CB->getType()->isIntegerTy(1)) {
-          if (CB->isOne())      // X & 1 -> X
-            ReplaceUsesOfWith(I, I->getOperand(0), Worklist, L, LPM);
-          else                  // X & 0 -> 0
-            ReplaceUsesOfWith(I, I->getOperand(1), Worklist, L, LPM);
-          continue;
-        }
-      break;
-    case Instruction::Or:
-      if (isa<ConstantInt>(I->getOperand(0)) &&
-          // constant -> RHS
-          I->getOperand(0)->getType()->isIntegerTy(1))
-        cast<BinaryOperator>(I)->swapOperands();
-      if (ConstantInt *CB = dyn_cast<ConstantInt>(I->getOperand(1)))
-        if (CB->getType()->isIntegerTy(1)) {
-          if (CB->isOne())   // X | 1 -> 1
-            ReplaceUsesOfWith(I, I->getOperand(1), Worklist, L, LPM);
-          else                  // X | 0 -> X
-            ReplaceUsesOfWith(I, I->getOperand(0), Worklist, L, LPM);
-          continue;
-        }
-      break;
-    case Instruction::Br: {
-      BranchInst *BI = cast<BranchInst>(I);
+    if (BranchInst *BI = dyn_cast<BranchInst>(I)) {
       if (BI->isUnconditional()) {
         // If BI's parent is the only pred of the successor, fold the two blocks
         // together.
@@ -1052,13 +1031,13 @@ void LoopUnswitch::SimplifyCode(std::vector<Instruction*> &Worklist, Loop *L) {
         LPM->deleteSimpleAnalysisValue(Succ, L);
         Succ->eraseFromParent();
         ++NumSimplify;
-        break;
+        continue;
       }
       
       if (ConstantInt *CB = dyn_cast<ConstantInt>(BI->getCondition())){
         // Conditional branch.  Turn it into an unconditional branch, then
         // remove dead blocks.
-        break;  // FIXME: Enable.
+        continue;  // FIXME: Enable.
 
         DEBUG(dbgs() << "Folded branch: " << *BI);
         BasicBlock *DeadSucc = BI->getSuccessor(CB->getZExtValue());
@@ -1072,8 +1051,7 @@ void LoopUnswitch::SimplifyCode(std::vector<Instruction*> &Worklist, Loop *L) {
 
         RemoveBlockIfDead(DeadSucc, Worklist, L);
       }
-      break;
-    }
+      continue;
     }
   }
 }
diff --git a/lib/Transforms/Scalar/MemCpyOptimizer.cpp b/lib/Transforms/Scalar/MemCpyOptimizer.cpp
index 3b305ae766e8..3611b8ebe562 100644
--- a/lib/Transforms/Scalar/MemCpyOptimizer.cpp
+++ b/lib/Transforms/Scalar/MemCpyOptimizer.cpp
@@ -744,7 +744,7 @@ bool MemCpyOpt::processMemMove(MemMoveInst *M) {
   const Type *ArgTys[3] = { M->getRawDest()->getType(),
                             M->getRawSource()->getType(),
                             M->getLength()->getType() };
-  M->setOperand(0,Intrinsic::getDeclaration(Mod, Intrinsic::memcpy, ArgTys, 3));
+  M->setCalledFunction(Intrinsic::getDeclaration(Mod, Intrinsic::memcpy, ArgTys, 3));
 
   // MemDep may have over conservative information about this instruction, just
   // conservatively flush it from the cache.
diff --git a/lib/Transforms/Scalar/Reg2Mem.cpp b/lib/Transforms/Scalar/Reg2Mem.cpp
index 7a6eec3d46b4..13222ac22004 100644
--- a/lib/Transforms/Scalar/Reg2Mem.cpp
+++ b/lib/Transforms/Scalar/Reg2Mem.cpp
@@ -46,10 +46,11 @@ namespace {
    bool valueEscapes(const Instruction *Inst) const {
      const BasicBlock *BB = Inst->getParent();
       for (Value::const_use_iterator UI = Inst->use_begin(),E = Inst->use_end();
-           UI != E; ++UI)
-        if (cast<Instruction>(*UI)->getParent() != BB ||
-            isa<PHINode>(*UI))
+           UI != E; ++UI) {
+        const Instruction *I = cast<Instruction>(*UI);
+        if (I->getParent() != BB || isa<PHINode>(I))
           return true;
+      }
       return false;
     }
 
diff --git a/lib/Transforms/Scalar/SCCP.cpp b/lib/Transforms/Scalar/SCCP.cpp
index 4f09bee53a81..907ece8fcce9 100644
--- a/lib/Transforms/Scalar/SCCP.cpp
+++ b/lib/Transforms/Scalar/SCCP.cpp
@@ -317,7 +317,10 @@ private:
   void markConstant(LatticeVal &IV, Value *V, Constant *C) {
     if (!IV.markConstant(C)) return;
     DEBUG(dbgs() << "markConstant: " << *C << ": " << *V << '\n');
-    InstWorkList.push_back(V);
+    if (IV.isOverdefined())
+      OverdefinedInstWorkList.push_back(V);
+    else
+      InstWorkList.push_back(V);
   }
   
   void markConstant(Value *V, Constant *C) {
@@ -327,9 +330,13 @@ private:
 
   void markForcedConstant(Value *V, Constant *C) {
     assert(!V->getType()->isStructTy() && "Should use other method");
-    ValueState[V].markForcedConstant(C);
+    LatticeVal &IV = ValueState[V];
+    IV.markForcedConstant(C);
     DEBUG(dbgs() << "markForcedConstant: " << *C << ": " << *V << '\n');
-    InstWorkList.push_back(V);
+    if (IV.isOverdefined())
+      OverdefinedInstWorkList.push_back(V);
+    else
+      InstWorkList.push_back(V);
   }
   
   
@@ -1445,6 +1452,8 @@ bool SCCPSolver::ResolvedUndefsIn(Function &F) {
         // After a zero extend, we know the top part is zero.  SExt doesn't have
         // to be handled here, because we don't know whether the top part is 1's
         // or 0's.
+      case Instruction::SIToFP:  // some FP values are not possible, just use 0.
+      case Instruction::UIToFP:  // some FP values are not possible, just use 0.
         markForcedConstant(I, Constant::getNullValue(ITy));
         return true;
       case Instruction::Mul:
diff --git a/lib/Transforms/Scalar/SCCVN.cpp b/lib/Transforms/Scalar/SCCVN.cpp
deleted file mode 100644
index 9685a2945f8c..000000000000
--- a/lib/Transforms/Scalar/SCCVN.cpp
+++ /dev/null
@@ -1,716 +0,0 @@
-//===- SCCVN.cpp - Eliminate redundant values -----------------------------===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This pass performs global value numbering to eliminate fully redundant
-// instructions.  This is based on the paper "SCC-based Value Numbering"
-// by Cooper, et al.
-//
-//===----------------------------------------------------------------------===//
-
-#define DEBUG_TYPE "sccvn"
-#include "llvm/Transforms/Scalar.h"
-#include "llvm/BasicBlock.h"
-#include "llvm/Constants.h"
-#include "llvm/DerivedTypes.h"
-#include "llvm/Function.h"
-#include "llvm/Operator.h"
-#include "llvm/Value.h"
-#include "llvm/ADT/DenseMap.h"
-#include "llvm/ADT/DepthFirstIterator.h"
-#include "llvm/ADT/PostOrderIterator.h"
-#include "llvm/ADT/SmallPtrSet.h"
-#include "llvm/ADT/SmallVector.h"
-#include "llvm/ADT/SparseBitVector.h"
-#include "llvm/ADT/Statistic.h"
-#include "llvm/Analysis/Dominators.h"
-#include "llvm/Support/CFG.h"
-#include "llvm/Support/CommandLine.h"
-#include "llvm/Support/Debug.h"
-#include "llvm/Support/ErrorHandling.h"
-#include "llvm/Transforms/Utils/SSAUpdater.h"
-using namespace llvm;
-
-STATISTIC(NumSCCVNInstr,  "Number of instructions deleted by SCCVN");
-STATISTIC(NumSCCVNPhi,  "Number of phis deleted by SCCVN");
-
-//===----------------------------------------------------------------------===//
-//                         ValueTable Class
-//===----------------------------------------------------------------------===//
-
-/// This class holds the mapping between values and value numbers.  It is used
-/// as an efficient mechanism to determine the expression-wise equivalence of
-/// two values.
-namespace {
-  struct Expression {
-    enum ExpressionOpcode { ADD, FADD, SUB, FSUB, MUL, FMUL,
-                            UDIV, SDIV, FDIV, UREM, SREM,
-                            FREM, SHL, LSHR, ASHR, AND, OR, XOR, ICMPEQ,
-                            ICMPNE, ICMPUGT, ICMPUGE, ICMPULT, ICMPULE,
-                            ICMPSGT, ICMPSGE, ICMPSLT, ICMPSLE, FCMPOEQ,
-                            FCMPOGT, FCMPOGE, FCMPOLT, FCMPOLE, FCMPONE,
-                            FCMPORD, FCMPUNO, FCMPUEQ, FCMPUGT, FCMPUGE,
-                            FCMPULT, FCMPULE, FCMPUNE, EXTRACT, INSERT,
-                            SHUFFLE, SELECT, TRUNC, ZEXT, SEXT, FPTOUI,
-                            FPTOSI, UITOFP, SITOFP, FPTRUNC, FPEXT,
-                            PTRTOINT, INTTOPTR, BITCAST, GEP, CALL, CONSTANT,
-                            INSERTVALUE, EXTRACTVALUE, EMPTY, TOMBSTONE };
-
-    ExpressionOpcode opcode;
-    const Type* type;
-    SmallVector<uint32_t, 4> varargs;
-
-    Expression() { }
-    Expression(ExpressionOpcode o) : opcode(o) { }
-
-    bool operator==(const Expression &other) const {
-      if (opcode != other.opcode)
-        return false;
-      else if (opcode == EMPTY || opcode == TOMBSTONE)
-        return true;
-      else if (type != other.type)
-        return false;
-      else {
-        if (varargs.size() != other.varargs.size())
-          return false;
-
-        for (size_t i = 0; i < varargs.size(); ++i)
-          if (varargs[i] != other.varargs[i])
-            return false;
-
-        return true;
-      }
-    }
-
-    bool operator!=(const Expression &other) const {
-      return !(*this == other);
-    }
-  };
-
-  class ValueTable {
-    private:
-      DenseMap<Value*, uint32_t> valueNumbering;
-      DenseMap<Expression, uint32_t> expressionNumbering;
-      DenseMap<Value*, uint32_t> constantsNumbering;
-
-      uint32_t nextValueNumber;
-
-      Expression::ExpressionOpcode getOpcode(BinaryOperator* BO);
-      Expression::ExpressionOpcode getOpcode(CmpInst* C);
-      Expression::ExpressionOpcode getOpcode(CastInst* C);
-      Expression create_expression(BinaryOperator* BO);
-      Expression create_expression(CmpInst* C);
-      Expression create_expression(ShuffleVectorInst* V);
-      Expression create_expression(ExtractElementInst* C);
-      Expression create_expression(InsertElementInst* V);
-      Expression create_expression(SelectInst* V);
-      Expression create_expression(CastInst* C);
-      Expression create_expression(GetElementPtrInst* G);
-      Expression create_expression(CallInst* C);
-      Expression create_expression(Constant* C);
-      Expression create_expression(ExtractValueInst* C);
-      Expression create_expression(InsertValueInst* C);
-    public:
-      ValueTable() : nextValueNumber(1) { }
-      uint32_t computeNumber(Value *V);
-      uint32_t lookup(Value *V);
-      void add(Value *V, uint32_t num);
-      void clear();
-      void clearExpressions();
-      void erase(Value *v);
-      unsigned size();
-      void verifyRemoved(const Value *) const;
-  };
-}
-
-namespace llvm {
-template <> struct DenseMapInfo<Expression> {
-  static inline Expression getEmptyKey() {
-    return Expression(Expression::EMPTY);
-  }
-
-  static inline Expression getTombstoneKey() {
-    return Expression(Expression::TOMBSTONE);
-  }
-
-  static unsigned getHashValue(const Expression e) {
-    unsigned hash = e.opcode;
-
-    hash = ((unsigned)((uintptr_t)e.type >> 4) ^
-            (unsigned)((uintptr_t)e.type >> 9));
-
-    for (SmallVector<uint32_t, 4>::const_iterator I = e.varargs.begin(),
-         E = e.varargs.end(); I != E; ++I)
-      hash = *I + hash * 37;
-
-    return hash;
-  }
-  static bool isEqual(const Expression &LHS, const Expression &RHS) {
-    return LHS == RHS;
-  }
-};
-template <>
-struct isPodLike<Expression> { static const bool value = true; };
-
-}
-
-//===----------------------------------------------------------------------===//
-//                     ValueTable Internal Functions
-//===----------------------------------------------------------------------===//
-Expression::ExpressionOpcode ValueTable::getOpcode(BinaryOperator* BO) {
-  switch(BO->getOpcode()) {
-  default: // THIS SHOULD NEVER HAPPEN
-    llvm_unreachable("Binary operator with unknown opcode?");
-  case Instruction::Add:  return Expression::ADD;
-  case Instruction::FAdd: return Expression::FADD;
-  case Instruction::Sub:  return Expression::SUB;
-  case Instruction::FSub: return Expression::FSUB;
-  case Instruction::Mul:  return Expression::MUL;
-  case Instruction::FMul: return Expression::FMUL;
-  case Instruction::UDiv: return Expression::UDIV;
-  case Instruction::SDiv: return Expression::SDIV;
-  case Instruction::FDiv: return Expression::FDIV;
-  case Instruction::URem: return Expression::UREM;
-  case Instruction::SRem: return Expression::SREM;
-  case Instruction::FRem: return Expression::FREM;
-  case Instruction::Shl:  return Expression::SHL;
-  case Instruction::LShr: return Expression::LSHR;
-  case Instruction::AShr: return Expression::ASHR;
-  case Instruction::And:  return Expression::AND;
-  case Instruction::Or:   return Expression::OR;
-  case Instruction::Xor:  return Expression::XOR;
-  }
-}
-
-Expression::ExpressionOpcode ValueTable::getOpcode(CmpInst* C) {
-  if (isa<ICmpInst>(C)) {
-    switch (C->getPredicate()) {
-    default:  // THIS SHOULD NEVER HAPPEN
-      llvm_unreachable("Comparison with unknown predicate?");
-    case ICmpInst::ICMP_EQ:  return Expression::ICMPEQ;
-    case ICmpInst::ICMP_NE:  return Expression::ICMPNE;
-    case ICmpInst::ICMP_UGT: return Expression::ICMPUGT;
-    case ICmpInst::ICMP_UGE: return Expression::ICMPUGE;
-    case ICmpInst::ICMP_ULT: return Expression::ICMPULT;
-    case ICmpInst::ICMP_ULE: return Expression::ICMPULE;
-    case ICmpInst::ICMP_SGT: return Expression::ICMPSGT;
-    case ICmpInst::ICMP_SGE: return Expression::ICMPSGE;
-    case ICmpInst::ICMP_SLT: return Expression::ICMPSLT;
-    case ICmpInst::ICMP_SLE: return Expression::ICMPSLE;
-    }
-  } else {
-    switch (C->getPredicate()) {
-    default: // THIS SHOULD NEVER HAPPEN
-      llvm_unreachable("Comparison with unknown predicate?");
-    case FCmpInst::FCMP_OEQ: return Expression::FCMPOEQ;
-    case FCmpInst::FCMP_OGT: return Expression::FCMPOGT;
-    case FCmpInst::FCMP_OGE: return Expression::FCMPOGE;
-    case FCmpInst::FCMP_OLT: return Expression::FCMPOLT;
-    case FCmpInst::FCMP_OLE: return Expression::FCMPOLE;
-    case FCmpInst::FCMP_ONE: return Expression::FCMPONE;
-    case FCmpInst::FCMP_ORD: return Expression::FCMPORD;
-    case FCmpInst::FCMP_UNO: return Expression::FCMPUNO;
-    case FCmpInst::FCMP_UEQ: return Expression::FCMPUEQ;
-    case FCmpInst::FCMP_UGT: return Expression::FCMPUGT;
-    case FCmpInst::FCMP_UGE: return Expression::FCMPUGE;
-    case FCmpInst::FCMP_ULT: return Expression::FCMPULT;
-    case FCmpInst::FCMP_ULE: return Expression::FCMPULE;
-    case FCmpInst::FCMP_UNE: return Expression::FCMPUNE;
-    }
-  }
-}
-
-Expression::ExpressionOpcode ValueTable::getOpcode(CastInst* C) {
-  switch(C->getOpcode()) {
-  default: // THIS SHOULD NEVER HAPPEN
-    llvm_unreachable("Cast operator with unknown opcode?");
-  case Instruction::Trunc:    return Expression::TRUNC;
-  case Instruction::ZExt:     return Expression::ZEXT;
-  case Instruction::SExt:     return Expression::SEXT;
-  case Instruction::FPToUI:   return Expression::FPTOUI;
-  case Instruction::FPToSI:   return Expression::FPTOSI;
-  case Instruction::UIToFP:   return Expression::UITOFP;
-  case Instruction::SIToFP:   return Expression::SITOFP;
-  case Instruction::FPTrunc:  return Expression::FPTRUNC;
-  case Instruction::FPExt:    return Expression::FPEXT;
-  case Instruction::PtrToInt: return Expression::PTRTOINT;
-  case Instruction::IntToPtr: return Expression::INTTOPTR;
-  case Instruction::BitCast:  return Expression::BITCAST;
-  }
-}
-
-Expression ValueTable::create_expression(CallInst* C) {
-  Expression e;
-
-  e.type = C->getType();
-  e.opcode = Expression::CALL;
-
-  e.varargs.push_back(lookup(C->getCalledFunction()));
-  for (CallInst::op_iterator I = C->op_begin()+1, E = C->op_end();
-       I != E; ++I)
-    e.varargs.push_back(lookup(*I));
-
-  return e;
-}
-
-Expression ValueTable::create_expression(BinaryOperator* BO) {
-  Expression e;
-  e.varargs.push_back(lookup(BO->getOperand(0)));
-  e.varargs.push_back(lookup(BO->getOperand(1)));
-  e.type = BO->getType();
-  e.opcode = getOpcode(BO);
-
-  return e;
-}
-
-Expression ValueTable::create_expression(CmpInst* C) {
-  Expression e;
-
-  e.varargs.push_back(lookup(C->getOperand(0)));
-  e.varargs.push_back(lookup(C->getOperand(1)));
-  e.type = C->getType();
-  e.opcode = getOpcode(C);
-
-  return e;
-}
-
-Expression ValueTable::create_expression(CastInst* C) {
-  Expression e;
-
-  e.varargs.push_back(lookup(C->getOperand(0)));
-  e.type = C->getType();
-  e.opcode = getOpcode(C);
-
-  return e;
-}
-
-Expression ValueTable::create_expression(ShuffleVectorInst* S) {
-  Expression e;
-
-  e.varargs.push_back(lookup(S->getOperand(0)));
-  e.varargs.push_back(lookup(S->getOperand(1)));
-  e.varargs.push_back(lookup(S->getOperand(2)));
-  e.type = S->getType();
-  e.opcode = Expression::SHUFFLE;
-
-  return e;
-}
-
-Expression ValueTable::create_expression(ExtractElementInst* E) {
-  Expression e;
-
-  e.varargs.push_back(lookup(E->getOperand(0)));
-  e.varargs.push_back(lookup(E->getOperand(1)));
-  e.type = E->getType();
-  e.opcode = Expression::EXTRACT;
-
-  return e;
-}
-
-Expression ValueTable::create_expression(InsertElementInst* I) {
-  Expression e;
-
-  e.varargs.push_back(lookup(I->getOperand(0)));
-  e.varargs.push_back(lookup(I->getOperand(1)));
-  e.varargs.push_back(lookup(I->getOperand(2)));
-  e.type = I->getType();
-  e.opcode = Expression::INSERT;
-
-  return e;
-}
-
-Expression ValueTable::create_expression(SelectInst* I) {
-  Expression e;
-
-  e.varargs.push_back(lookup(I->getCondition()));
-  e.varargs.push_back(lookup(I->getTrueValue()));
-  e.varargs.push_back(lookup(I->getFalseValue()));
-  e.type = I->getType();
-  e.opcode = Expression::SELECT;
-
-  return e;
-}
-
-Expression ValueTable::create_expression(GetElementPtrInst* G) {
-  Expression e;
-
-  e.varargs.push_back(lookup(G->getPointerOperand()));
-  e.type = G->getType();
-  e.opcode = Expression::GEP;
-
-  for (GetElementPtrInst::op_iterator I = G->idx_begin(), E = G->idx_end();
-       I != E; ++I)
-    e.varargs.push_back(lookup(*I));
-
-  return e;
-}
-
-Expression ValueTable::create_expression(ExtractValueInst* E) {
-  Expression e;
-
-  e.varargs.push_back(lookup(E->getAggregateOperand()));
-  for (ExtractValueInst::idx_iterator II = E->idx_begin(), IE = E->idx_end();
-       II != IE; ++II)
-    e.varargs.push_back(*II);
-  e.type = E->getType();
-  e.opcode = Expression::EXTRACTVALUE;
-
-  return e;
-}
-
-Expression ValueTable::create_expression(InsertValueInst* E) {
-  Expression e;
-
-  e.varargs.push_back(lookup(E->getAggregateOperand()));
-  e.varargs.push_back(lookup(E->getInsertedValueOperand()));
-  for (InsertValueInst::idx_iterator II = E->idx_begin(), IE = E->idx_end();
-       II != IE; ++II)
-    e.varargs.push_back(*II);
-  e.type = E->getType();
-  e.opcode = Expression::INSERTVALUE;
-
-  return e;
-}
-
-//===----------------------------------------------------------------------===//
-//                     ValueTable External Functions
-//===----------------------------------------------------------------------===//
-
-/// add - Insert a value into the table with a specified value number.
-void ValueTable::add(Value *V, uint32_t num) {
-  valueNumbering[V] = num;
-}
-
-/// computeNumber - Returns the value number for the specified value, assigning
-/// it a new number if it did not have one before.
-uint32_t ValueTable::computeNumber(Value *V) {
-  if (uint32_t v = valueNumbering[V])
-    return v;
-  else if (uint32_t v= constantsNumbering[V])
-    return v;
-
-  if (!isa<Instruction>(V)) {
-    constantsNumbering[V] = nextValueNumber;
-    return nextValueNumber++;
-  }
-  
-  Instruction* I = cast<Instruction>(V);
-  Expression exp;
-  switch (I->getOpcode()) {
-    case Instruction::Add:
-    case Instruction::FAdd:
-    case Instruction::Sub:
-    case Instruction::FSub:
-    case Instruction::Mul:
-    case Instruction::FMul:
-    case Instruction::UDiv:
-    case Instruction::SDiv:
-    case Instruction::FDiv:
-    case Instruction::URem:
-    case Instruction::SRem:
-    case Instruction::FRem:
-    case Instruction::Shl:
-    case Instruction::LShr:
-    case Instruction::AShr:
-    case Instruction::And:
-    case Instruction::Or :
-    case Instruction::Xor:
-      exp = create_expression(cast<BinaryOperator>(I));
-      break;
-    case Instruction::ICmp:
-    case Instruction::FCmp:
-      exp = create_expression(cast<CmpInst>(I));
-      break;
-    case Instruction::Trunc:
-    case Instruction::ZExt:
-    case Instruction::SExt:
-    case Instruction::FPToUI:
-    case Instruction::FPToSI:
-    case Instruction::UIToFP:
-    case Instruction::SIToFP:
-    case Instruction::FPTrunc:
-    case Instruction::FPExt:
-    case Instruction::PtrToInt:
-    case Instruction::IntToPtr:
-    case Instruction::BitCast:
-      exp = create_expression(cast<CastInst>(I));
-      break;
-    case Instruction::Select:
-      exp = create_expression(cast<SelectInst>(I));
-      break;
-    case Instruction::ExtractElement:
-      exp = create_expression(cast<ExtractElementInst>(I));
-      break;
-    case Instruction::InsertElement:
-      exp = create_expression(cast<InsertElementInst>(I));
-      break;
-    case Instruction::ShuffleVector:
-      exp = create_expression(cast<ShuffleVectorInst>(I));
-      break;
-    case Instruction::ExtractValue:
-      exp = create_expression(cast<ExtractValueInst>(I));
-      break;
-    case Instruction::InsertValue:
-      exp = create_expression(cast<InsertValueInst>(I));
-      break;      
-    case Instruction::GetElementPtr:
-      exp = create_expression(cast<GetElementPtrInst>(I));
-      break;
-    default:
-      valueNumbering[V] = nextValueNumber;
-      return nextValueNumber++;
-  }
-
-  uint32_t& e = expressionNumbering[exp];
-  if (!e) e = nextValueNumber++;
-  valueNumbering[V] = e;
-  
-  return e;
-}
-
-/// lookup - Returns the value number of the specified value. Returns 0 if
-/// the value has not yet been numbered.
-uint32_t ValueTable::lookup(Value *V) {
-  if (!isa<Instruction>(V)) {
-    if (!constantsNumbering.count(V))
-      constantsNumbering[V] = nextValueNumber++;
-    return constantsNumbering[V];
-  }
-  
-  return valueNumbering[V];
-}
-
-/// clear - Remove all entries from the ValueTable
-void ValueTable::clear() {
-  valueNumbering.clear();
-  expressionNumbering.clear();
-  constantsNumbering.clear();
-  nextValueNumber = 1;
-}
-
-void ValueTable::clearExpressions() {
-  expressionNumbering.clear();
-  constantsNumbering.clear();
-  nextValueNumber = 1;
-}
-
-/// erase - Remove a value from the value numbering
-void ValueTable::erase(Value *V) {
-  valueNumbering.erase(V);
-}
-
-/// verifyRemoved - Verify that the value is removed from all internal data
-/// structures.
-void ValueTable::verifyRemoved(const Value *V) const {
-  for (DenseMap<Value*, uint32_t>::const_iterator
-         I = valueNumbering.begin(), E = valueNumbering.end(); I != E; ++I) {
-    assert(I->first != V && "Inst still occurs in value numbering map!");
-  }
-}
-
-//===----------------------------------------------------------------------===//
-//                              SCCVN Pass
-//===----------------------------------------------------------------------===//
-
-namespace {
-
-  struct ValueNumberScope {
-    ValueNumberScope* parent;
-    DenseMap<uint32_t, Value*> table;
-    SparseBitVector<128> availIn;
-    SparseBitVector<128> availOut;
-    
-    ValueNumberScope(ValueNumberScope* p) : parent(p) { }
-  };
-
-  class SCCVN : public FunctionPass {
-    bool runOnFunction(Function &F);
-  public:
-    static char ID; // Pass identification, replacement for typeid
-    SCCVN() : FunctionPass(&ID) { }
-
-  private:
-    ValueTable VT;
-    DenseMap<BasicBlock*, ValueNumberScope*> BBMap;
-    
-    // This transformation requires dominator postdominator info
-    virtual void getAnalysisUsage(AnalysisUsage &AU) const {
-      AU.addRequired<DominatorTree>();
-
-      AU.addPreserved<DominatorTree>();
-      AU.setPreservesCFG();
-    }
-  };
-
-  char SCCVN::ID = 0;
-}
-
-// createSCCVNPass - The public interface to this file...
-FunctionPass *llvm::createSCCVNPass() { return new SCCVN(); }
-
-static RegisterPass<SCCVN> X("sccvn",
-                              "SCC Value Numbering");
-
-static Value *lookupNumber(ValueNumberScope *Locals, uint32_t num) {
-  while (Locals) {
-    DenseMap<uint32_t, Value*>::iterator I = Locals->table.find(num);
-    if (I != Locals->table.end())
-      return I->second;
-    Locals = Locals->parent;
-  }
-
-  return 0;
-}
-
-bool SCCVN::runOnFunction(Function& F) {
-  // Implement the RPO version of the SCCVN algorithm.  Conceptually, 
-  // we optimisitically assume that all instructions with the same opcode have
-  // the same VN.  Then we deepen our comparison by one level, to all 
-  // instructions whose operands have the same opcodes get the same VN.  We
-  // iterate this process until the partitioning stops changing, at which
-  // point we have computed a full numbering.
-  ReversePostOrderTraversal<Function*> RPOT(&F);
-  bool done = false;
-  while (!done) {
-    done = true;
-    VT.clearExpressions();
-    for (ReversePostOrderTraversal<Function*>::rpo_iterator I = RPOT.begin(),
-         E = RPOT.end(); I != E; ++I) {
-      BasicBlock* BB = *I;
-      for (BasicBlock::iterator BI = BB->begin(), BE = BB->end();
-           BI != BE; ++BI) {
-         uint32_t origVN = VT.lookup(BI);
-         uint32_t newVN = VT.computeNumber(BI);
-         if (origVN != newVN)
-           done = false;
-      }
-    }
-  }
-  
-  // Now, do a dominator walk, eliminating simple, dominated redundancies as we
-  // go.  Also, build the ValueNumberScope structure that will be used for
-  // computing full availability.
-  DominatorTree& DT = getAnalysis<DominatorTree>();
-  bool changed = false;
-  for (df_iterator<DomTreeNode*> DI = df_begin(DT.getRootNode()),
-       DE = df_end(DT.getRootNode()); DI != DE; ++DI) {
-    BasicBlock* BB = DI->getBlock();
-    if (DI->getIDom())
-      BBMap[BB] = new ValueNumberScope(BBMap[DI->getIDom()->getBlock()]);
-    else
-      BBMap[BB] = new ValueNumberScope(0);
-    
-    for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ) {
-      uint32_t num = VT.lookup(I);
-      Value* repl = lookupNumber(BBMap[BB], num);
-      
-      if (repl) {
-        if (isa<PHINode>(I))
-          ++NumSCCVNPhi;
-        else
-          ++NumSCCVNInstr;
-        I->replaceAllUsesWith(repl);
-        Instruction* OldInst = I;
-        ++I;
-        BBMap[BB]->table[num] = repl;
-        OldInst->eraseFromParent();
-        changed = true;
-      } else {
-        BBMap[BB]->table[num] = I;
-        BBMap[BB]->availOut.set(num);
-  
-        ++I;
-      }
-    }
-  }
-
-  // Perform a forward data-flow to compute availability at all points on
-  // the CFG.
-  do {
-    changed = false;
-    for (ReversePostOrderTraversal<Function*>::rpo_iterator I = RPOT.begin(),
-         E = RPOT.end(); I != E; ++I) {
-      BasicBlock* BB = *I;
-      ValueNumberScope *VNS = BBMap[BB];
-      
-      SparseBitVector<128> preds;
-      bool first = true;
-      for (pred_iterator PI = pred_begin(BB), PE = pred_end(BB);
-           PI != PE; ++PI) {
-        if (first) {
-          preds = BBMap[*PI]->availOut;
-          first = false;
-        } else {
-          preds &= BBMap[*PI]->availOut;
-        }
-      }
-      
-      changed |= (VNS->availIn |= preds);
-      changed |= (VNS->availOut |= preds);
-    }
-  } while (changed);
-  
-  // Use full availability information to perform non-dominated replacements.
-  SSAUpdater SSU; 
-  for (Function::iterator FI = F.begin(), FE = F.end(); FI != FE; ++FI) {
-    if (!BBMap.count(FI)) continue;
-    for (BasicBlock::iterator BI = FI->begin(), BE = FI->end();
-         BI != BE; ) {
-      uint32_t num = VT.lookup(BI);
-      if (!BBMap[FI]->availIn.test(num)) {
-        ++BI;
-        continue;
-      }
-      
-      SSU.Initialize(BI);
-      
-      SmallPtrSet<BasicBlock*, 8> visited;
-      SmallVector<BasicBlock*, 8> stack;
-      visited.insert(FI);
-      for (pred_iterator PI = pred_begin(FI), PE = pred_end(FI);
-           PI != PE; ++PI)
-        if (!visited.count(*PI))
-          stack.push_back(*PI);
-      
-      while (!stack.empty()) {
-        BasicBlock* CurrBB = stack.pop_back_val();
-        visited.insert(CurrBB);
-        
-        ValueNumberScope* S = BBMap[CurrBB];
-        if (S->table.count(num)) {
-          SSU.AddAvailableValue(CurrBB, S->table[num]);
-        } else {
-          for (pred_iterator PI = pred_begin(CurrBB), PE = pred_end(CurrBB);
-               PI != PE; ++PI)
-            if (!visited.count(*PI))
-              stack.push_back(*PI);
-        }
-      }
-      
-      Value* repl = SSU.GetValueInMiddleOfBlock(FI);
-      BI->replaceAllUsesWith(repl);
-      Instruction* CurInst = BI;
-      ++BI;
-      BBMap[FI]->table[num] = repl;
-      if (isa<PHINode>(CurInst))
-        ++NumSCCVNPhi;
-      else
-        ++NumSCCVNInstr;
-        
-      CurInst->eraseFromParent();
-    }
-  }
-
-  VT.clear();
-  for (DenseMap<BasicBlock*, ValueNumberScope*>::iterator
-       I = BBMap.begin(), E = BBMap.end(); I != E; ++I)
-    delete I->second;
-  BBMap.clear();
-  
-  return changed;
-}
diff --git a/lib/Transforms/Scalar/ScalarReplAggregates.cpp b/lib/Transforms/Scalar/ScalarReplAggregates.cpp
index 6211beb70ba2..5ca9ce376f25 100644
--- a/lib/Transforms/Scalar/ScalarReplAggregates.cpp
+++ b/lib/Transforms/Scalar/ScalarReplAggregates.cpp
@@ -130,14 +130,7 @@ namespace {
     void RewriteLoadUserOfWholeAlloca(LoadInst *LI, AllocaInst *AI,
                                       SmallVector<AllocaInst*, 32> &NewElts);
     
-    bool CanConvertToScalar(Value *V, bool &IsNotTrivial, const Type *&VecTy,
-                            bool &SawVec, uint64_t Offset, unsigned AllocaSize);
-    void ConvertUsesToScalar(Value *Ptr, AllocaInst *NewAI, uint64_t Offset);
-    Value *ConvertScalar_ExtractValue(Value *NV, const Type *ToType,
-                                     uint64_t Offset, IRBuilder<> &Builder);
-    Value *ConvertScalar_InsertValue(Value *StoredVal, Value *ExistingVal,
-                                     uint64_t Offset, IRBuilder<> &Builder);
-    static Instruction *isOnlyCopiedFromConstantGlobal(AllocaInst *AI);
+    static MemTransferInst *isOnlyCopiedFromConstantGlobal(AllocaInst *AI);
   };
 }
 
@@ -150,6 +143,596 @@ FunctionPass *llvm::createScalarReplAggregatesPass(signed int Threshold) {
 }
 
 
+//===----------------------------------------------------------------------===//
+// Convert To Scalar Optimization.
+//===----------------------------------------------------------------------===//
+
+namespace {
+/// ConvertToScalarInfo - This class implements the "Convert To Scalar"
+/// optimization, which scans the uses of an alloca and determines if it can
+/// rewrite it in terms of a single new alloca that can be mem2reg'd.
+class ConvertToScalarInfo {
+  /// AllocaSize - The size of the alloca being considered.
+  unsigned AllocaSize;
+  const TargetData &TD;
+ 
+  /// IsNotTrivial - This is set to true if there is some access to the object
+  /// which means that mem2reg can't promote it.
+  bool IsNotTrivial;
+  
+  /// VectorTy - This tracks the type that we should promote the vector to if
+  /// it is possible to turn it into a vector.  This starts out null, and if it
+  /// isn't possible to turn into a vector type, it gets set to VoidTy.
+  const Type *VectorTy;
+  
+  /// HadAVector - True if there is at least one vector access to the alloca.
+  /// We don't want to turn random arrays into vectors and use vector element
+  /// insert/extract, but if there are element accesses to something that is
+  /// also declared as a vector, we do want to promote to a vector.
+  bool HadAVector;
+
+public:
+  explicit ConvertToScalarInfo(unsigned Size, const TargetData &td)
+    : AllocaSize(Size), TD(td) {
+    IsNotTrivial = false;
+    VectorTy = 0;
+    HadAVector = false;
+  }
+  
+  AllocaInst *TryConvert(AllocaInst *AI);
+  
+private:
+  bool CanConvertToScalar(Value *V, uint64_t Offset);
+  void MergeInType(const Type *In, uint64_t Offset);
+  void ConvertUsesToScalar(Value *Ptr, AllocaInst *NewAI, uint64_t Offset);
+  
+  Value *ConvertScalar_ExtractValue(Value *NV, const Type *ToType,
+                                    uint64_t Offset, IRBuilder<> &Builder);
+  Value *ConvertScalar_InsertValue(Value *StoredVal, Value *ExistingVal,
+                                   uint64_t Offset, IRBuilder<> &Builder);
+};
+} // end anonymous namespace.
+
+/// TryConvert - Analyze the specified alloca, and if it is safe to do so,
+/// rewrite it to be a new alloca which is mem2reg'able.  This returns the new
+/// alloca if possible or null if not.
+AllocaInst *ConvertToScalarInfo::TryConvert(AllocaInst *AI) {
+  // If we can't convert this scalar, or if mem2reg can trivially do it, bail
+  // out.
+  if (!CanConvertToScalar(AI, 0) || !IsNotTrivial)
+    return 0;
+  
+  // If we were able to find a vector type that can handle this with
+  // insert/extract elements, and if there was at least one use that had
+  // a vector type, promote this to a vector.  We don't want to promote
+  // random stuff that doesn't use vectors (e.g. <9 x double>) because then
+  // we just get a lot of insert/extracts.  If at least one vector is
+  // involved, then we probably really do have a union of vector/array.
+  const Type *NewTy;
+  if (VectorTy && VectorTy->isVectorTy() && HadAVector) {
+    DEBUG(dbgs() << "CONVERT TO VECTOR: " << *AI << "\n  TYPE = "
+          << *VectorTy << '\n');
+    NewTy = VectorTy;  // Use the vector type.
+  } else {
+    DEBUG(dbgs() << "CONVERT TO SCALAR INTEGER: " << *AI << "\n");
+    // Create and insert the integer alloca.
+    NewTy = IntegerType::get(AI->getContext(), AllocaSize*8);
+  }
+  AllocaInst *NewAI = new AllocaInst(NewTy, 0, "", AI->getParent()->begin());
+  ConvertUsesToScalar(AI, NewAI, 0);
+  return NewAI;
+}
+
+/// MergeInType - Add the 'In' type to the accumulated vector type (VectorTy)
+/// so far at the offset specified by Offset (which is specified in bytes).
+///
+/// There are two cases we handle here:
+///   1) A union of vector types of the same size and potentially its elements.
+///      Here we turn element accesses into insert/extract element operations.
+///      This promotes a <4 x float> with a store of float to the third element
+///      into a <4 x float> that uses insert element.
+///   2) A fully general blob of memory, which we turn into some (potentially
+///      large) integer type with extract and insert operations where the loads
+///      and stores would mutate the memory.  We mark this by setting VectorTy
+///      to VoidTy.
+void ConvertToScalarInfo::MergeInType(const Type *In, uint64_t Offset) {
+  // If we already decided to turn this into a blob of integer memory, there is
+  // nothing to be done.
+  if (VectorTy && VectorTy->isVoidTy())
+    return;
+  
+  // If this could be contributing to a vector, analyze it.
+
+  // If the In type is a vector that is the same size as the alloca, see if it
+  // matches the existing VecTy.
+  if (const VectorType *VInTy = dyn_cast<VectorType>(In)) {
+    // Remember if we saw a vector type.
+    HadAVector = true;
+    
+    if (VInTy->getBitWidth()/8 == AllocaSize && Offset == 0) {
+      // If we're storing/loading a vector of the right size, allow it as a
+      // vector.  If this the first vector we see, remember the type so that
+      // we know the element size.  If this is a subsequent access, ignore it
+      // even if it is a differing type but the same size.  Worst case we can
+      // bitcast the resultant vectors.
+      if (VectorTy == 0)
+        VectorTy = VInTy;
+      return;
+    }
+  } else if (In->isFloatTy() || In->isDoubleTy() ||
+             (In->isIntegerTy() && In->getPrimitiveSizeInBits() >= 8 &&
+              isPowerOf2_32(In->getPrimitiveSizeInBits()))) {
+    // If we're accessing something that could be an element of a vector, see
+    // if the implied vector agrees with what we already have and if Offset is
+    // compatible with it.
+    unsigned EltSize = In->getPrimitiveSizeInBits()/8;
+    if (Offset % EltSize == 0 && AllocaSize % EltSize == 0 &&
+        (VectorTy == 0 || 
+         cast<VectorType>(VectorTy)->getElementType()
+               ->getPrimitiveSizeInBits()/8 == EltSize)) {
+      if (VectorTy == 0)
+        VectorTy = VectorType::get(In, AllocaSize/EltSize);
+      return;
+    }
+  }
+  
+  // Otherwise, we have a case that we can't handle with an optimized vector
+  // form.  We can still turn this into a large integer.
+  VectorTy = Type::getVoidTy(In->getContext());
+}
+
+/// CanConvertToScalar - V is a pointer.  If we can convert the pointee and all
+/// its accesses to a single vector type, return true and set VecTy to
+/// the new type.  If we could convert the alloca into a single promotable
+/// integer, return true but set VecTy to VoidTy.  Further, if the use is not a
+/// completely trivial use that mem2reg could promote, set IsNotTrivial.  Offset
+/// is the current offset from the base of the alloca being analyzed.
+///
+/// If we see at least one access to the value that is as a vector type, set the
+/// SawVec flag.
+bool ConvertToScalarInfo::CanConvertToScalar(Value *V, uint64_t Offset) {
+  for (Value::use_iterator UI = V->use_begin(), E = V->use_end(); UI!=E; ++UI) {
+    Instruction *User = cast<Instruction>(*UI);
+    
+    if (LoadInst *LI = dyn_cast<LoadInst>(User)) {
+      // Don't break volatile loads.
+      if (LI->isVolatile())
+        return false;
+      MergeInType(LI->getType(), Offset);
+      continue;
+    }
+    
+    if (StoreInst *SI = dyn_cast<StoreInst>(User)) {
+      // Storing the pointer, not into the value?
+      if (SI->getOperand(0) == V || SI->isVolatile()) return false;
+      MergeInType(SI->getOperand(0)->getType(), Offset);
+      continue;
+    }
+    
+    if (BitCastInst *BCI = dyn_cast<BitCastInst>(User)) {
+      IsNotTrivial = true;  // Can't be mem2reg'd.
+      if (!CanConvertToScalar(BCI, Offset))
+        return false;
+      continue;
+    }
+
+    if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(User)) {
+      // If this is a GEP with a variable indices, we can't handle it.
+      if (!GEP->hasAllConstantIndices())
+        return false;
+      
+      // Compute the offset that this GEP adds to the pointer.
+      SmallVector<Value*, 8> Indices(GEP->op_begin()+1, GEP->op_end());
+      uint64_t GEPOffset = TD.getIndexedOffset(GEP->getPointerOperandType(),
+                                               &Indices[0], Indices.size());
+      // See if all uses can be converted.
+      if (!CanConvertToScalar(GEP, Offset+GEPOffset))
+        return false;
+      IsNotTrivial = true;  // Can't be mem2reg'd.
+      continue;
+    }
+
+    // If this is a constant sized memset of a constant value (e.g. 0) we can
+    // handle it.
+    if (MemSetInst *MSI = dyn_cast<MemSetInst>(User)) {
+      // Store of constant value and constant size.
+      if (!isa<ConstantInt>(MSI->getValue()) ||
+          !isa<ConstantInt>(MSI->getLength()))
+        return false;
+      IsNotTrivial = true;  // Can't be mem2reg'd.
+      continue;
+    }
+
+    // If this is a memcpy or memmove into or out of the whole allocation, we
+    // can handle it like a load or store of the scalar type.
+    if (MemTransferInst *MTI = dyn_cast<MemTransferInst>(User)) {
+      ConstantInt *Len = dyn_cast<ConstantInt>(MTI->getLength());
+      if (Len == 0 || Len->getZExtValue() != AllocaSize || Offset != 0)
+        return false;
+      
+      IsNotTrivial = true;  // Can't be mem2reg'd.
+      continue;
+    }
+    
+    // Otherwise, we cannot handle this!
+    return false;
+  }
+  
+  return true;
+}
+
+/// ConvertUsesToScalar - Convert all of the users of Ptr to use the new alloca
+/// directly.  This happens when we are converting an "integer union" to a
+/// single integer scalar, or when we are converting a "vector union" to a
+/// vector with insert/extractelement instructions.
+///
+/// Offset is an offset from the original alloca, in bits that need to be
+/// shifted to the right.  By the end of this, there should be no uses of Ptr.
+void ConvertToScalarInfo::ConvertUsesToScalar(Value *Ptr, AllocaInst *NewAI,
+                                              uint64_t Offset) {
+  while (!Ptr->use_empty()) {
+    Instruction *User = cast<Instruction>(Ptr->use_back());
+
+    if (BitCastInst *CI = dyn_cast<BitCastInst>(User)) {
+      ConvertUsesToScalar(CI, NewAI, Offset);
+      CI->eraseFromParent();
+      continue;
+    }
+
+    if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(User)) {
+      // Compute the offset that this GEP adds to the pointer.
+      SmallVector<Value*, 8> Indices(GEP->op_begin()+1, GEP->op_end());
+      uint64_t GEPOffset = TD.getIndexedOffset(GEP->getPointerOperandType(),
+                                               &Indices[0], Indices.size());
+      ConvertUsesToScalar(GEP, NewAI, Offset+GEPOffset*8);
+      GEP->eraseFromParent();
+      continue;
+    }
+    
+    IRBuilder<> Builder(User->getParent(), User);
+    
+    if (LoadInst *LI = dyn_cast<LoadInst>(User)) {
+      // The load is a bit extract from NewAI shifted right by Offset bits.
+      Value *LoadedVal = Builder.CreateLoad(NewAI, "tmp");
+      Value *NewLoadVal
+        = ConvertScalar_ExtractValue(LoadedVal, LI->getType(), Offset, Builder);
+      LI->replaceAllUsesWith(NewLoadVal);
+      LI->eraseFromParent();
+      continue;
+    }
+    
+    if (StoreInst *SI = dyn_cast<StoreInst>(User)) {
+      assert(SI->getOperand(0) != Ptr && "Consistency error!");
+      Instruction *Old = Builder.CreateLoad(NewAI, NewAI->getName()+".in");
+      Value *New = ConvertScalar_InsertValue(SI->getOperand(0), Old, Offset,
+                                             Builder);
+      Builder.CreateStore(New, NewAI);
+      SI->eraseFromParent();
+      
+      // If the load we just inserted is now dead, then the inserted store
+      // overwrote the entire thing.
+      if (Old->use_empty())
+        Old->eraseFromParent();
+      continue;
+    }
+    
+    // If this is a constant sized memset of a constant value (e.g. 0) we can
+    // transform it into a store of the expanded constant value.
+    if (MemSetInst *MSI = dyn_cast<MemSetInst>(User)) {
+      assert(MSI->getRawDest() == Ptr && "Consistency error!");
+      unsigned NumBytes = cast<ConstantInt>(MSI->getLength())->getZExtValue();
+      if (NumBytes != 0) {
+        unsigned Val = cast<ConstantInt>(MSI->getValue())->getZExtValue();
+        
+        // Compute the value replicated the right number of times.
+        APInt APVal(NumBytes*8, Val);
+
+        // Splat the value if non-zero.
+        if (Val)
+          for (unsigned i = 1; i != NumBytes; ++i)
+            APVal |= APVal << 8;
+        
+        Instruction *Old = Builder.CreateLoad(NewAI, NewAI->getName()+".in");
+        Value *New = ConvertScalar_InsertValue(
+                                    ConstantInt::get(User->getContext(), APVal),
+                                               Old, Offset, Builder);
+        Builder.CreateStore(New, NewAI);
+        
+        // If the load we just inserted is now dead, then the memset overwrote
+        // the entire thing.
+        if (Old->use_empty())
+          Old->eraseFromParent();        
+      }
+      MSI->eraseFromParent();
+      continue;
+    }
+
+    // If this is a memcpy or memmove into or out of the whole allocation, we
+    // can handle it like a load or store of the scalar type.
+    if (MemTransferInst *MTI = dyn_cast<MemTransferInst>(User)) {
+      assert(Offset == 0 && "must be store to start of alloca");
+      
+      // If the source and destination are both to the same alloca, then this is
+      // a noop copy-to-self, just delete it.  Otherwise, emit a load and store
+      // as appropriate.
+      AllocaInst *OrigAI = cast<AllocaInst>(Ptr->getUnderlyingObject(0));
+      
+      if (MTI->getSource()->getUnderlyingObject(0) != OrigAI) {
+        // Dest must be OrigAI, change this to be a load from the original
+        // pointer (bitcasted), then a store to our new alloca.
+        assert(MTI->getRawDest() == Ptr && "Neither use is of pointer?");
+        Value *SrcPtr = MTI->getSource();
+        SrcPtr = Builder.CreateBitCast(SrcPtr, NewAI->getType());
+        
+        LoadInst *SrcVal = Builder.CreateLoad(SrcPtr, "srcval");
+        SrcVal->setAlignment(MTI->getAlignment());
+        Builder.CreateStore(SrcVal, NewAI);
+      } else if (MTI->getDest()->getUnderlyingObject(0) != OrigAI) {
+        // Src must be OrigAI, change this to be a load from NewAI then a store
+        // through the original dest pointer (bitcasted).
+        assert(MTI->getRawSource() == Ptr && "Neither use is of pointer?");
+        LoadInst *SrcVal = Builder.CreateLoad(NewAI, "srcval");
+
+        Value *DstPtr = Builder.CreateBitCast(MTI->getDest(), NewAI->getType());
+        StoreInst *NewStore = Builder.CreateStore(SrcVal, DstPtr);
+        NewStore->setAlignment(MTI->getAlignment());
+      } else {
+        // Noop transfer. Src == Dst
+      }
+
+      MTI->eraseFromParent();
+      continue;
+    }
+    
+    llvm_unreachable("Unsupported operation!");
+  }
+}
+
+/// ConvertScalar_ExtractValue - Extract a value of type ToType from an integer
+/// or vector value FromVal, extracting the bits from the offset specified by
+/// Offset.  This returns the value, which is of type ToType.
+///
+/// This happens when we are converting an "integer union" to a single
+/// integer scalar, or when we are converting a "vector union" to a vector with
+/// insert/extractelement instructions.
+///
+/// Offset is an offset from the original alloca, in bits that need to be
+/// shifted to the right.
+Value *ConvertToScalarInfo::
+ConvertScalar_ExtractValue(Value *FromVal, const Type *ToType,
+                           uint64_t Offset, IRBuilder<> &Builder) {
+  // If the load is of the whole new alloca, no conversion is needed.
+  if (FromVal->getType() == ToType && Offset == 0)
+    return FromVal;
+
+  // If the result alloca is a vector type, this is either an element
+  // access or a bitcast to another vector type of the same size.
+  if (const VectorType *VTy = dyn_cast<VectorType>(FromVal->getType())) {
+    if (ToType->isVectorTy())
+      return Builder.CreateBitCast(FromVal, ToType, "tmp");
+
+    // Otherwise it must be an element access.
+    unsigned Elt = 0;
+    if (Offset) {
+      unsigned EltSize = TD.getTypeAllocSizeInBits(VTy->getElementType());
+      Elt = Offset/EltSize;
+      assert(EltSize*Elt == Offset && "Invalid modulus in validity checking");
+    }
+    // Return the element extracted out of it.
+    Value *V = Builder.CreateExtractElement(FromVal, ConstantInt::get(
+                    Type::getInt32Ty(FromVal->getContext()), Elt), "tmp");
+    if (V->getType() != ToType)
+      V = Builder.CreateBitCast(V, ToType, "tmp");
+    return V;
+  }
+  
+  // If ToType is a first class aggregate, extract out each of the pieces and
+  // use insertvalue's to form the FCA.
+  if (const StructType *ST = dyn_cast<StructType>(ToType)) {
+    const StructLayout &Layout = *TD.getStructLayout(ST);
+    Value *Res = UndefValue::get(ST);
+    for (unsigned i = 0, e = ST->getNumElements(); i != e; ++i) {
+      Value *Elt = ConvertScalar_ExtractValue(FromVal, ST->getElementType(i),
+                                        Offset+Layout.getElementOffsetInBits(i),
+                                              Builder);
+      Res = Builder.CreateInsertValue(Res, Elt, i, "tmp");
+    }
+    return Res;
+  }
+  
+  if (const ArrayType *AT = dyn_cast<ArrayType>(ToType)) {
+    uint64_t EltSize = TD.getTypeAllocSizeInBits(AT->getElementType());
+    Value *Res = UndefValue::get(AT);
+    for (unsigned i = 0, e = AT->getNumElements(); i != e; ++i) {
+      Value *Elt = ConvertScalar_ExtractValue(FromVal, AT->getElementType(),
+                                              Offset+i*EltSize, Builder);
+      Res = Builder.CreateInsertValue(Res, Elt, i, "tmp");
+    }
+    return Res;
+  }
+
+  // Otherwise, this must be a union that was converted to an integer value.
+  const IntegerType *NTy = cast<IntegerType>(FromVal->getType());
+
+  // If this is a big-endian system and the load is narrower than the
+  // full alloca type, we need to do a shift to get the right bits.
+  int ShAmt = 0;
+  if (TD.isBigEndian()) {
+    // On big-endian machines, the lowest bit is stored at the bit offset
+    // from the pointer given by getTypeStoreSizeInBits.  This matters for
+    // integers with a bitwidth that is not a multiple of 8.
+    ShAmt = TD.getTypeStoreSizeInBits(NTy) -
+            TD.getTypeStoreSizeInBits(ToType) - Offset;
+  } else {
+    ShAmt = Offset;
+  }
+
+  // Note: we support negative bitwidths (with shl) which are not defined.
+  // We do this to support (f.e.) loads off the end of a structure where
+  // only some bits are used.
+  if (ShAmt > 0 && (unsigned)ShAmt < NTy->getBitWidth())
+    FromVal = Builder.CreateLShr(FromVal,
+                                 ConstantInt::get(FromVal->getType(),
+                                                           ShAmt), "tmp");
+  else if (ShAmt < 0 && (unsigned)-ShAmt < NTy->getBitWidth())
+    FromVal = Builder.CreateShl(FromVal, 
+                                ConstantInt::get(FromVal->getType(),
+                                                          -ShAmt), "tmp");
+
+  // Finally, unconditionally truncate the integer to the right width.
+  unsigned LIBitWidth = TD.getTypeSizeInBits(ToType);
+  if (LIBitWidth < NTy->getBitWidth())
+    FromVal =
+      Builder.CreateTrunc(FromVal, IntegerType::get(FromVal->getContext(), 
+                                                    LIBitWidth), "tmp");
+  else if (LIBitWidth > NTy->getBitWidth())
+    FromVal =
+       Builder.CreateZExt(FromVal, IntegerType::get(FromVal->getContext(), 
+                                                    LIBitWidth), "tmp");
+
+  // If the result is an integer, this is a trunc or bitcast.
+  if (ToType->isIntegerTy()) {
+    // Should be done.
+  } else if (ToType->isFloatingPointTy() || ToType->isVectorTy()) {
+    // Just do a bitcast, we know the sizes match up.
+    FromVal = Builder.CreateBitCast(FromVal, ToType, "tmp");
+  } else {
+    // Otherwise must be a pointer.
+    FromVal = Builder.CreateIntToPtr(FromVal, ToType, "tmp");
+  }
+  assert(FromVal->getType() == ToType && "Didn't convert right?");
+  return FromVal;
+}
+
+/// ConvertScalar_InsertValue - Insert the value "SV" into the existing integer
+/// or vector value "Old" at the offset specified by Offset.
+///
+/// This happens when we are converting an "integer union" to a
+/// single integer scalar, or when we are converting a "vector union" to a
+/// vector with insert/extractelement instructions.
+///
+/// Offset is an offset from the original alloca, in bits that need to be
+/// shifted to the right.
+Value *ConvertToScalarInfo::
+ConvertScalar_InsertValue(Value *SV, Value *Old,
+                          uint64_t Offset, IRBuilder<> &Builder) {
+  // Convert the stored type to the actual type, shift it left to insert
+  // then 'or' into place.
+  const Type *AllocaType = Old->getType();
+  LLVMContext &Context = Old->getContext();
+
+  if (const VectorType *VTy = dyn_cast<VectorType>(AllocaType)) {
+    uint64_t VecSize = TD.getTypeAllocSizeInBits(VTy);
+    uint64_t ValSize = TD.getTypeAllocSizeInBits(SV->getType());
+    
+    // Changing the whole vector with memset or with an access of a different
+    // vector type?
+    if (ValSize == VecSize)
+      return Builder.CreateBitCast(SV, AllocaType, "tmp");
+
+    uint64_t EltSize = TD.getTypeAllocSizeInBits(VTy->getElementType());
+
+    // Must be an element insertion.
+    unsigned Elt = Offset/EltSize;
+    
+    if (SV->getType() != VTy->getElementType())
+      SV = Builder.CreateBitCast(SV, VTy->getElementType(), "tmp");
+    
+    SV = Builder.CreateInsertElement(Old, SV, 
+                     ConstantInt::get(Type::getInt32Ty(SV->getContext()), Elt),
+                                     "tmp");
+    return SV;
+  }
+  
+  // If SV is a first-class aggregate value, insert each value recursively.
+  if (const StructType *ST = dyn_cast<StructType>(SV->getType())) {
+    const StructLayout &Layout = *TD.getStructLayout(ST);
+    for (unsigned i = 0, e = ST->getNumElements(); i != e; ++i) {
+      Value *Elt = Builder.CreateExtractValue(SV, i, "tmp");
+      Old = ConvertScalar_InsertValue(Elt, Old, 
+                                      Offset+Layout.getElementOffsetInBits(i),
+                                      Builder);
+    }
+    return Old;
+  }
+  
+  if (const ArrayType *AT = dyn_cast<ArrayType>(SV->getType())) {
+    uint64_t EltSize = TD.getTypeAllocSizeInBits(AT->getElementType());
+    for (unsigned i = 0, e = AT->getNumElements(); i != e; ++i) {
+      Value *Elt = Builder.CreateExtractValue(SV, i, "tmp");
+      Old = ConvertScalar_InsertValue(Elt, Old, Offset+i*EltSize, Builder);
+    }
+    return Old;
+  }
+
+  // If SV is a float, convert it to the appropriate integer type.
+  // If it is a pointer, do the same.
+  unsigned SrcWidth = TD.getTypeSizeInBits(SV->getType());
+  unsigned DestWidth = TD.getTypeSizeInBits(AllocaType);
+  unsigned SrcStoreWidth = TD.getTypeStoreSizeInBits(SV->getType());
+  unsigned DestStoreWidth = TD.getTypeStoreSizeInBits(AllocaType);
+  if (SV->getType()->isFloatingPointTy() || SV->getType()->isVectorTy())
+    SV = Builder.CreateBitCast(SV,
+                            IntegerType::get(SV->getContext(),SrcWidth), "tmp");
+  else if (SV->getType()->isPointerTy())
+    SV = Builder.CreatePtrToInt(SV, TD.getIntPtrType(SV->getContext()), "tmp");
+
+  // Zero extend or truncate the value if needed.
+  if (SV->getType() != AllocaType) {
+    if (SV->getType()->getPrimitiveSizeInBits() <
+             AllocaType->getPrimitiveSizeInBits())
+      SV = Builder.CreateZExt(SV, AllocaType, "tmp");
+    else {
+      // Truncation may be needed if storing more than the alloca can hold
+      // (undefined behavior).
+      SV = Builder.CreateTrunc(SV, AllocaType, "tmp");
+      SrcWidth = DestWidth;
+      SrcStoreWidth = DestStoreWidth;
+    }
+  }
+
+  // If this is a big-endian system and the store is narrower than the
+  // full alloca type, we need to do a shift to get the right bits.
+  int ShAmt = 0;
+  if (TD.isBigEndian()) {
+    // On big-endian machines, the lowest bit is stored at the bit offset
+    // from the pointer given by getTypeStoreSizeInBits.  This matters for
+    // integers with a bitwidth that is not a multiple of 8.
+    ShAmt = DestStoreWidth - SrcStoreWidth - Offset;
+  } else {
+    ShAmt = Offset;
+  }
+
+  // Note: we support negative bitwidths (with shr) which are not defined.
+  // We do this to support (f.e.) stores off the end of a structure where
+  // only some bits in the structure are set.
+  APInt Mask(APInt::getLowBitsSet(DestWidth, SrcWidth));
+  if (ShAmt > 0 && (unsigned)ShAmt < DestWidth) {
+    SV = Builder.CreateShl(SV, ConstantInt::get(SV->getType(),
+                           ShAmt), "tmp");
+    Mask <<= ShAmt;
+  } else if (ShAmt < 0 && (unsigned)-ShAmt < DestWidth) {
+    SV = Builder.CreateLShr(SV, ConstantInt::get(SV->getType(),
+                            -ShAmt), "tmp");
+    Mask = Mask.lshr(-ShAmt);
+  }
+
+  // Mask out the bits we are about to insert from the old value, and or
+  // in the new bits.
+  if (SrcWidth != DestWidth) {
+    assert(DestWidth > SrcWidth);
+    Old = Builder.CreateAnd(Old, ConstantInt::get(Context, ~Mask), "mask");
+    SV = Builder.CreateOr(Old, SV, "ins");
+  }
+  return SV;
+}
+
+
+//===----------------------------------------------------------------------===//
+// SRoA Driver
+//===----------------------------------------------------------------------===//
+
+
 bool SROA::runOnFunction(Function &F) {
   TD = getAnalysisIfAvailable<TargetData>();
 
@@ -202,6 +785,7 @@ bool SROA::performPromotion(Function &F) {
   return Changed;
 }
 
+
 /// ShouldAttemptScalarRepl - Decide if an alloca is a good candidate for
 /// SROA.  It must be a struct or array type with a small number of elements.
 static bool ShouldAttemptScalarRepl(AllocaInst *AI) {
@@ -216,6 +800,7 @@ static bool ShouldAttemptScalarRepl(AllocaInst *AI) {
   return false;
 }
 
+
 // performScalarRepl - This algorithm is a simple worklist driven algorithm,
 // which runs on all of the malloc/alloca instructions in the function, removing
 // them if they are only used by getelementptr instructions.
@@ -223,7 +808,7 @@ static bool ShouldAttemptScalarRepl(AllocaInst *AI) {
 bool SROA::performScalarRepl(Function &F) {
   std::vector<AllocaInst*> WorkList;
 
-  // Scan the entry basic block, adding any alloca's and mallocs to the worklist
+  // Scan the entry basic block, adding allocas to the worklist.
   BasicBlock &BB = F.getEntryBlock();
   for (BasicBlock::iterator I = BB.begin(), E = BB.end(); I != E; ++I)
     if (AllocaInst *A = dyn_cast<AllocaInst>(I))
@@ -239,6 +824,7 @@ bool SROA::performScalarRepl(Function &F) {
     // with unused elements.
     if (AI->use_empty()) {
       AI->eraseFromParent();
+      Changed = true;
       continue;
     }
 
@@ -251,10 +837,10 @@ bool SROA::performScalarRepl(Function &F) {
     // the constant global instead.  This is commonly produced by the CFE by
     // constructs like "void foo() { int A[] = {1,2,3,4,5,6,7,8,9...}; }" if 'A'
     // is only subsequently read.
-    if (Instruction *TheCopy = isOnlyCopiedFromConstantGlobal(AI)) {
+    if (MemTransferInst *TheCopy = isOnlyCopiedFromConstantGlobal(AI)) {
       DEBUG(dbgs() << "Found alloca equal to global: " << *AI << '\n');
       DEBUG(dbgs() << "  memcpy = " << *TheCopy << '\n');
-      Constant *TheSrc = cast<Constant>(TheCopy->getOperand(2));
+      Constant *TheSrc = cast<Constant>(TheCopy->getSource());
       AI->replaceAllUsesWith(ConstantExpr::getBitCast(TheSrc, AI->getType()));
       TheCopy->eraseFromParent();  // Don't mutate the global.
       AI->eraseFromParent();
@@ -271,7 +857,10 @@ bool SROA::performScalarRepl(Function &F) {
 
     // Do not promote [0 x %struct].
     if (AllocaSize == 0) continue;
-
+    
+    // Do not promote any struct whose size is too big.
+    if (AllocaSize > SRThreshold) continue;
+    
     // If the alloca looks like a good candidate for scalar replacement, and if
     // all its users can be transformed, then split up the aggregate into its
     // separate elements.
@@ -281,48 +870,20 @@ bool SROA::performScalarRepl(Function &F) {
       continue;
     }
 
-    // Do not promote any struct whose size is too big.
-    if (AllocaSize > SRThreshold) continue;
-
     // If we can turn this aggregate value (potentially with casts) into a
     // simple scalar value that can be mem2reg'd into a register value.
     // IsNotTrivial tracks whether this is something that mem2reg could have
     // promoted itself.  If so, we don't want to transform it needlessly.  Note
     // that we can't just check based on the type: the alloca may be of an i32
     // but that has pointer arithmetic to set byte 3 of it or something.
-    bool IsNotTrivial = false;
-    const Type *VectorTy = 0;
-    bool HadAVector = false;
-    if (CanConvertToScalar(AI, IsNotTrivial, VectorTy, HadAVector, 
-                           0, unsigned(AllocaSize)) && IsNotTrivial) {
-      AllocaInst *NewAI;
-      // If we were able to find a vector type that can handle this with
-      // insert/extract elements, and if there was at least one use that had
-      // a vector type, promote this to a vector.  We don't want to promote
-      // random stuff that doesn't use vectors (e.g. <9 x double>) because then
-      // we just get a lot of insert/extracts.  If at least one vector is
-      // involved, then we probably really do have a union of vector/array.
-      if (VectorTy && VectorTy->isVectorTy() && HadAVector) {
-        DEBUG(dbgs() << "CONVERT TO VECTOR: " << *AI << "\n  TYPE = "
-                     << *VectorTy << '\n');
-        
-        // Create and insert the vector alloca.
-        NewAI = new AllocaInst(VectorTy, 0, "",  AI->getParent()->begin());
-        ConvertUsesToScalar(AI, NewAI, 0);
-      } else {
-        DEBUG(dbgs() << "CONVERT TO SCALAR INTEGER: " << *AI << "\n");
-        
-        // Create and insert the integer alloca.
-        const Type *NewTy = IntegerType::get(AI->getContext(), AllocaSize*8);
-        NewAI = new AllocaInst(NewTy, 0, "", AI->getParent()->begin());
-        ConvertUsesToScalar(AI, NewAI, 0);
-      }
+    if (AllocaInst *NewAI =
+          ConvertToScalarInfo((unsigned)AllocaSize, *TD).TryConvert(AI)) {
       NewAI->takeName(AI);
       AI->eraseFromParent();
       ++NumConverted;
       Changed = true;
       continue;
-    }
+    }      
     
     // Otherwise, couldn't process this alloca.
   }
@@ -698,7 +1259,6 @@ void SROA::RewriteMemIntrinUserOfAlloca(MemIntrinsic *MI, Instruction *Inst,
   // that doesn't have anything to do with the alloca that we are promoting. For
   // memset, this Value* stays null.
   Value *OtherPtr = 0;
-  LLVMContext &Context = MI->getContext();
   unsigned MemAlignment = MI->getAlignment();
   if (MemTransferInst *MTI = dyn_cast<MemTransferInst>(MI)) { // memmove/memcopy
     if (Inst == MTI->getRawDest())
@@ -756,7 +1316,7 @@ void SROA::RewriteMemIntrinUserOfAlloca(MemIntrinsic *MI, Instruction *Inst,
   }
   
   // Process each element of the aggregate.
-  Value *TheFn = MI->getOperand(0);
+  Value *TheFn = MI->getCalledValue();
   const Type *BytePtrTy = MI->getRawDest()->getType();
   bool SROADest = MI->getRawDest() == Inst;
   
@@ -775,12 +1335,11 @@ void SROA::RewriteMemIntrinUserOfAlloca(MemIntrinsic *MI, Instruction *Inst,
                                                    MI);
       uint64_t EltOffset;
       const PointerType *OtherPtrTy = cast<PointerType>(OtherPtr->getType());
-      if (const StructType *ST =
-            dyn_cast<StructType>(OtherPtrTy->getElementType())) {
+      const Type *OtherTy = OtherPtrTy->getElementType();
+      if (const StructType *ST = dyn_cast<StructType>(OtherTy)) {
         EltOffset = TD->getStructLayout(ST)->getElementOffset(i);
       } else {
-        const Type *EltTy =
-          cast<SequentialType>(OtherPtr->getType())->getElementType();
+        const Type *EltTy = cast<SequentialType>(OtherTy)->getElementType();
         EltOffset = TD->getTypeAllocSize(EltTy)*i;
       }
       
@@ -832,7 +1391,7 @@ void SROA::RewriteMemIntrinUserOfAlloca(MemIntrinsic *MI, Instruction *Inst,
           }
           
           // Convert the integer value to the appropriate type.
-          StoreVal = ConstantInt::get(Context, TotalVal);
+          StoreVal = ConstantInt::get(CI->getContext(), TotalVal);
           if (ValTy->isPointerTy())
             StoreVal = ConstantExpr::getIntToPtr(StoreVal, ValTy);
           else if (ValTy->isFloatingPointTy())
@@ -1174,509 +1733,6 @@ bool SROA::isSafeAllocaToScalarRepl(AllocaInst *AI) {
   return true;
 }
 
-/// MergeInType - Add the 'In' type to the accumulated type (Accum) so far at
-/// the offset specified by Offset (which is specified in bytes).
-///
-/// There are two cases we handle here:
-///   1) A union of vector types of the same size and potentially its elements.
-///      Here we turn element accesses into insert/extract element operations.
-///      This promotes a <4 x float> with a store of float to the third element
-///      into a <4 x float> that uses insert element.
-///   2) A fully general blob of memory, which we turn into some (potentially
-///      large) integer type with extract and insert operations where the loads
-///      and stores would mutate the memory.
-static void MergeInType(const Type *In, uint64_t Offset, const Type *&VecTy,
-                        unsigned AllocaSize, const TargetData &TD,
-                        LLVMContext &Context) {
-  // If this could be contributing to a vector, analyze it.
-  if (VecTy != Type::getVoidTy(Context)) { // either null or a vector type.
-
-    // If the In type is a vector that is the same size as the alloca, see if it
-    // matches the existing VecTy.
-    if (const VectorType *VInTy = dyn_cast<VectorType>(In)) {
-      if (VInTy->getBitWidth()/8 == AllocaSize && Offset == 0) {
-        // If we're storing/loading a vector of the right size, allow it as a
-        // vector.  If this the first vector we see, remember the type so that
-        // we know the element size.
-        if (VecTy == 0)
-          VecTy = VInTy;
-        return;
-      }
-    } else if (In->isFloatTy() || In->isDoubleTy() ||
-               (In->isIntegerTy() && In->getPrimitiveSizeInBits() >= 8 &&
-                isPowerOf2_32(In->getPrimitiveSizeInBits()))) {
-      // If we're accessing something that could be an element of a vector, see
-      // if the implied vector agrees with what we already have and if Offset is
-      // compatible with it.
-      unsigned EltSize = In->getPrimitiveSizeInBits()/8;
-      if (Offset % EltSize == 0 &&
-          AllocaSize % EltSize == 0 &&
-          (VecTy == 0 || 
-           cast<VectorType>(VecTy)->getElementType()
-                 ->getPrimitiveSizeInBits()/8 == EltSize)) {
-        if (VecTy == 0)
-          VecTy = VectorType::get(In, AllocaSize/EltSize);
-        return;
-      }
-    }
-  }
-  
-  // Otherwise, we have a case that we can't handle with an optimized vector
-  // form.  We can still turn this into a large integer.
-  VecTy = Type::getVoidTy(Context);
-}
-
-/// CanConvertToScalar - V is a pointer.  If we can convert the pointee and all
-/// its accesses to a single vector type, return true and set VecTy to
-/// the new type.  If we could convert the alloca into a single promotable
-/// integer, return true but set VecTy to VoidTy.  Further, if the use is not a
-/// completely trivial use that mem2reg could promote, set IsNotTrivial.  Offset
-/// is the current offset from the base of the alloca being analyzed.
-///
-/// If we see at least one access to the value that is as a vector type, set the
-/// SawVec flag.
-bool SROA::CanConvertToScalar(Value *V, bool &IsNotTrivial, const Type *&VecTy,
-                              bool &SawVec, uint64_t Offset,
-                              unsigned AllocaSize) {
-  for (Value::use_iterator UI = V->use_begin(), E = V->use_end(); UI!=E; ++UI) {
-    Instruction *User = cast<Instruction>(*UI);
-    
-    if (LoadInst *LI = dyn_cast<LoadInst>(User)) {
-      // Don't break volatile loads.
-      if (LI->isVolatile())
-        return false;
-      MergeInType(LI->getType(), Offset, VecTy,
-                  AllocaSize, *TD, V->getContext());
-      SawVec |= LI->getType()->isVectorTy();
-      continue;
-    }
-    
-    if (StoreInst *SI = dyn_cast<StoreInst>(User)) {
-      // Storing the pointer, not into the value?
-      if (SI->getOperand(0) == V || SI->isVolatile()) return 0;
-      MergeInType(SI->getOperand(0)->getType(), Offset,
-                  VecTy, AllocaSize, *TD, V->getContext());
-      SawVec |= SI->getOperand(0)->getType()->isVectorTy();
-      continue;
-    }
-    
-    if (BitCastInst *BCI = dyn_cast<BitCastInst>(User)) {
-      if (!CanConvertToScalar(BCI, IsNotTrivial, VecTy, SawVec, Offset,
-                              AllocaSize))
-        return false;
-      IsNotTrivial = true;
-      continue;
-    }
-
-    if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(User)) {
-      // If this is a GEP with a variable indices, we can't handle it.
-      if (!GEP->hasAllConstantIndices())
-        return false;
-      
-      // Compute the offset that this GEP adds to the pointer.
-      SmallVector<Value*, 8> Indices(GEP->op_begin()+1, GEP->op_end());
-      uint64_t GEPOffset = TD->getIndexedOffset(GEP->getPointerOperandType(),
-                                                &Indices[0], Indices.size());
-      // See if all uses can be converted.
-      if (!CanConvertToScalar(GEP, IsNotTrivial, VecTy, SawVec,Offset+GEPOffset,
-                              AllocaSize))
-        return false;
-      IsNotTrivial = true;
-      continue;
-    }
-
-    // If this is a constant sized memset of a constant value (e.g. 0) we can
-    // handle it.
-    if (MemSetInst *MSI = dyn_cast<MemSetInst>(User)) {
-      // Store of constant value and constant size.
-      if (isa<ConstantInt>(MSI->getValue()) &&
-          isa<ConstantInt>(MSI->getLength())) {
-        IsNotTrivial = true;
-        continue;
-      }
-    }
-
-    // If this is a memcpy or memmove into or out of the whole allocation, we
-    // can handle it like a load or store of the scalar type.
-    if (MemTransferInst *MTI = dyn_cast<MemTransferInst>(User)) {
-      if (ConstantInt *Len = dyn_cast<ConstantInt>(MTI->getLength()))
-        if (Len->getZExtValue() == AllocaSize && Offset == 0) {
-          IsNotTrivial = true;
-          continue;
-        }
-    }
-    
-    // Otherwise, we cannot handle this!
-    return false;
-  }
-  
-  return true;
-}
-
-/// ConvertUsesToScalar - Convert all of the users of Ptr to use the new alloca
-/// directly.  This happens when we are converting an "integer union" to a
-/// single integer scalar, or when we are converting a "vector union" to a
-/// vector with insert/extractelement instructions.
-///
-/// Offset is an offset from the original alloca, in bits that need to be
-/// shifted to the right.  By the end of this, there should be no uses of Ptr.
-void SROA::ConvertUsesToScalar(Value *Ptr, AllocaInst *NewAI, uint64_t Offset) {
-  while (!Ptr->use_empty()) {
-    Instruction *User = cast<Instruction>(Ptr->use_back());
-
-    if (BitCastInst *CI = dyn_cast<BitCastInst>(User)) {
-      ConvertUsesToScalar(CI, NewAI, Offset);
-      CI->eraseFromParent();
-      continue;
-    }
-
-    if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(User)) {
-      // Compute the offset that this GEP adds to the pointer.
-      SmallVector<Value*, 8> Indices(GEP->op_begin()+1, GEP->op_end());
-      uint64_t GEPOffset = TD->getIndexedOffset(GEP->getPointerOperandType(),
-                                                &Indices[0], Indices.size());
-      ConvertUsesToScalar(GEP, NewAI, Offset+GEPOffset*8);
-      GEP->eraseFromParent();
-      continue;
-    }
-    
-    IRBuilder<> Builder(User->getParent(), User);
-    
-    if (LoadInst *LI = dyn_cast<LoadInst>(User)) {
-      // The load is a bit extract from NewAI shifted right by Offset bits.
-      Value *LoadedVal = Builder.CreateLoad(NewAI, "tmp");
-      Value *NewLoadVal
-        = ConvertScalar_ExtractValue(LoadedVal, LI->getType(), Offset, Builder);
-      LI->replaceAllUsesWith(NewLoadVal);
-      LI->eraseFromParent();
-      continue;
-    }
-    
-    if (StoreInst *SI = dyn_cast<StoreInst>(User)) {
-      assert(SI->getOperand(0) != Ptr && "Consistency error!");
-      Instruction *Old = Builder.CreateLoad(NewAI, NewAI->getName()+".in");
-      Value *New = ConvertScalar_InsertValue(SI->getOperand(0), Old, Offset,
-                                             Builder);
-      Builder.CreateStore(New, NewAI);
-      SI->eraseFromParent();
-      
-      // If the load we just inserted is now dead, then the inserted store
-      // overwrote the entire thing.
-      if (Old->use_empty())
-        Old->eraseFromParent();
-      continue;
-    }
-    
-    // If this is a constant sized memset of a constant value (e.g. 0) we can
-    // transform it into a store of the expanded constant value.
-    if (MemSetInst *MSI = dyn_cast<MemSetInst>(User)) {
-      assert(MSI->getRawDest() == Ptr && "Consistency error!");
-      unsigned NumBytes = cast<ConstantInt>(MSI->getLength())->getZExtValue();
-      if (NumBytes != 0) {
-        unsigned Val = cast<ConstantInt>(MSI->getValue())->getZExtValue();
-        
-        // Compute the value replicated the right number of times.
-        APInt APVal(NumBytes*8, Val);
-
-        // Splat the value if non-zero.
-        if (Val)
-          for (unsigned i = 1; i != NumBytes; ++i)
-            APVal |= APVal << 8;
-        
-        Instruction *Old = Builder.CreateLoad(NewAI, NewAI->getName()+".in");
-        Value *New = ConvertScalar_InsertValue(
-                                    ConstantInt::get(User->getContext(), APVal),
-                                               Old, Offset, Builder);
-        Builder.CreateStore(New, NewAI);
-        
-        // If the load we just inserted is now dead, then the memset overwrote
-        // the entire thing.
-        if (Old->use_empty())
-          Old->eraseFromParent();        
-      }
-      MSI->eraseFromParent();
-      continue;
-    }
-
-    // If this is a memcpy or memmove into or out of the whole allocation, we
-    // can handle it like a load or store of the scalar type.
-    if (MemTransferInst *MTI = dyn_cast<MemTransferInst>(User)) {
-      assert(Offset == 0 && "must be store to start of alloca");
-      
-      // If the source and destination are both to the same alloca, then this is
-      // a noop copy-to-self, just delete it.  Otherwise, emit a load and store
-      // as appropriate.
-      AllocaInst *OrigAI = cast<AllocaInst>(Ptr->getUnderlyingObject(0));
-      
-      if (MTI->getSource()->getUnderlyingObject(0) != OrigAI) {
-        // Dest must be OrigAI, change this to be a load from the original
-        // pointer (bitcasted), then a store to our new alloca.
-        assert(MTI->getRawDest() == Ptr && "Neither use is of pointer?");
-        Value *SrcPtr = MTI->getSource();
-        SrcPtr = Builder.CreateBitCast(SrcPtr, NewAI->getType());
-        
-        LoadInst *SrcVal = Builder.CreateLoad(SrcPtr, "srcval");
-        SrcVal->setAlignment(MTI->getAlignment());
-        Builder.CreateStore(SrcVal, NewAI);
-      } else if (MTI->getDest()->getUnderlyingObject(0) != OrigAI) {
-        // Src must be OrigAI, change this to be a load from NewAI then a store
-        // through the original dest pointer (bitcasted).
-        assert(MTI->getRawSource() == Ptr && "Neither use is of pointer?");
-        LoadInst *SrcVal = Builder.CreateLoad(NewAI, "srcval");
-
-        Value *DstPtr = Builder.CreateBitCast(MTI->getDest(), NewAI->getType());
-        StoreInst *NewStore = Builder.CreateStore(SrcVal, DstPtr);
-        NewStore->setAlignment(MTI->getAlignment());
-      } else {
-        // Noop transfer. Src == Dst
-      }
-
-      MTI->eraseFromParent();
-      continue;
-    }
-    
-    llvm_unreachable("Unsupported operation!");
-  }
-}
-
-/// ConvertScalar_ExtractValue - Extract a value of type ToType from an integer
-/// or vector value FromVal, extracting the bits from the offset specified by
-/// Offset.  This returns the value, which is of type ToType.
-///
-/// This happens when we are converting an "integer union" to a single
-/// integer scalar, or when we are converting a "vector union" to a vector with
-/// insert/extractelement instructions.
-///
-/// Offset is an offset from the original alloca, in bits that need to be
-/// shifted to the right.
-Value *SROA::ConvertScalar_ExtractValue(Value *FromVal, const Type *ToType,
-                                        uint64_t Offset, IRBuilder<> &Builder) {
-  // If the load is of the whole new alloca, no conversion is needed.
-  if (FromVal->getType() == ToType && Offset == 0)
-    return FromVal;
-
-  // If the result alloca is a vector type, this is either an element
-  // access or a bitcast to another vector type of the same size.
-  if (const VectorType *VTy = dyn_cast<VectorType>(FromVal->getType())) {
-    if (ToType->isVectorTy())
-      return Builder.CreateBitCast(FromVal, ToType, "tmp");
-
-    // Otherwise it must be an element access.
-    unsigned Elt = 0;
-    if (Offset) {
-      unsigned EltSize = TD->getTypeAllocSizeInBits(VTy->getElementType());
-      Elt = Offset/EltSize;
-      assert(EltSize*Elt == Offset && "Invalid modulus in validity checking");
-    }
-    // Return the element extracted out of it.
-    Value *V = Builder.CreateExtractElement(FromVal, ConstantInt::get(
-                    Type::getInt32Ty(FromVal->getContext()), Elt), "tmp");
-    if (V->getType() != ToType)
-      V = Builder.CreateBitCast(V, ToType, "tmp");
-    return V;
-  }
-  
-  // If ToType is a first class aggregate, extract out each of the pieces and
-  // use insertvalue's to form the FCA.
-  if (const StructType *ST = dyn_cast<StructType>(ToType)) {
-    const StructLayout &Layout = *TD->getStructLayout(ST);
-    Value *Res = UndefValue::get(ST);
-    for (unsigned i = 0, e = ST->getNumElements(); i != e; ++i) {
-      Value *Elt = ConvertScalar_ExtractValue(FromVal, ST->getElementType(i),
-                                        Offset+Layout.getElementOffsetInBits(i),
-                                              Builder);
-      Res = Builder.CreateInsertValue(Res, Elt, i, "tmp");
-    }
-    return Res;
-  }
-  
-  if (const ArrayType *AT = dyn_cast<ArrayType>(ToType)) {
-    uint64_t EltSize = TD->getTypeAllocSizeInBits(AT->getElementType());
-    Value *Res = UndefValue::get(AT);
-    for (unsigned i = 0, e = AT->getNumElements(); i != e; ++i) {
-      Value *Elt = ConvertScalar_ExtractValue(FromVal, AT->getElementType(),
-                                              Offset+i*EltSize, Builder);
-      Res = Builder.CreateInsertValue(Res, Elt, i, "tmp");
-    }
-    return Res;
-  }
-
-  // Otherwise, this must be a union that was converted to an integer value.
-  const IntegerType *NTy = cast<IntegerType>(FromVal->getType());
-
-  // If this is a big-endian system and the load is narrower than the
-  // full alloca type, we need to do a shift to get the right bits.
-  int ShAmt = 0;
-  if (TD->isBigEndian()) {
-    // On big-endian machines, the lowest bit is stored at the bit offset
-    // from the pointer given by getTypeStoreSizeInBits.  This matters for
-    // integers with a bitwidth that is not a multiple of 8.
-    ShAmt = TD->getTypeStoreSizeInBits(NTy) -
-            TD->getTypeStoreSizeInBits(ToType) - Offset;
-  } else {
-    ShAmt = Offset;
-  }
-
-  // Note: we support negative bitwidths (with shl) which are not defined.
-  // We do this to support (f.e.) loads off the end of a structure where
-  // only some bits are used.
-  if (ShAmt > 0 && (unsigned)ShAmt < NTy->getBitWidth())
-    FromVal = Builder.CreateLShr(FromVal,
-                                 ConstantInt::get(FromVal->getType(),
-                                                           ShAmt), "tmp");
-  else if (ShAmt < 0 && (unsigned)-ShAmt < NTy->getBitWidth())
-    FromVal = Builder.CreateShl(FromVal, 
-                                ConstantInt::get(FromVal->getType(),
-                                                          -ShAmt), "tmp");
-
-  // Finally, unconditionally truncate the integer to the right width.
-  unsigned LIBitWidth = TD->getTypeSizeInBits(ToType);
-  if (LIBitWidth < NTy->getBitWidth())
-    FromVal =
-      Builder.CreateTrunc(FromVal, IntegerType::get(FromVal->getContext(), 
-                                                    LIBitWidth), "tmp");
-  else if (LIBitWidth > NTy->getBitWidth())
-    FromVal =
-       Builder.CreateZExt(FromVal, IntegerType::get(FromVal->getContext(), 
-                                                    LIBitWidth), "tmp");
-
-  // If the result is an integer, this is a trunc or bitcast.
-  if (ToType->isIntegerTy()) {
-    // Should be done.
-  } else if (ToType->isFloatingPointTy() || ToType->isVectorTy()) {
-    // Just do a bitcast, we know the sizes match up.
-    FromVal = Builder.CreateBitCast(FromVal, ToType, "tmp");
-  } else {
-    // Otherwise must be a pointer.
-    FromVal = Builder.CreateIntToPtr(FromVal, ToType, "tmp");
-  }
-  assert(FromVal->getType() == ToType && "Didn't convert right?");
-  return FromVal;
-}
-
-/// ConvertScalar_InsertValue - Insert the value "SV" into the existing integer
-/// or vector value "Old" at the offset specified by Offset.
-///
-/// This happens when we are converting an "integer union" to a
-/// single integer scalar, or when we are converting a "vector union" to a
-/// vector with insert/extractelement instructions.
-///
-/// Offset is an offset from the original alloca, in bits that need to be
-/// shifted to the right.
-Value *SROA::ConvertScalar_InsertValue(Value *SV, Value *Old,
-                                       uint64_t Offset, IRBuilder<> &Builder) {
-
-  // Convert the stored type to the actual type, shift it left to insert
-  // then 'or' into place.
-  const Type *AllocaType = Old->getType();
-  LLVMContext &Context = Old->getContext();
-
-  if (const VectorType *VTy = dyn_cast<VectorType>(AllocaType)) {
-    uint64_t VecSize = TD->getTypeAllocSizeInBits(VTy);
-    uint64_t ValSize = TD->getTypeAllocSizeInBits(SV->getType());
-    
-    // Changing the whole vector with memset or with an access of a different
-    // vector type?
-    if (ValSize == VecSize)
-      return Builder.CreateBitCast(SV, AllocaType, "tmp");
-
-    uint64_t EltSize = TD->getTypeAllocSizeInBits(VTy->getElementType());
-
-    // Must be an element insertion.
-    unsigned Elt = Offset/EltSize;
-    
-    if (SV->getType() != VTy->getElementType())
-      SV = Builder.CreateBitCast(SV, VTy->getElementType(), "tmp");
-    
-    SV = Builder.CreateInsertElement(Old, SV, 
-                     ConstantInt::get(Type::getInt32Ty(SV->getContext()), Elt),
-                                     "tmp");
-    return SV;
-  }
-  
-  // If SV is a first-class aggregate value, insert each value recursively.
-  if (const StructType *ST = dyn_cast<StructType>(SV->getType())) {
-    const StructLayout &Layout = *TD->getStructLayout(ST);
-    for (unsigned i = 0, e = ST->getNumElements(); i != e; ++i) {
-      Value *Elt = Builder.CreateExtractValue(SV, i, "tmp");
-      Old = ConvertScalar_InsertValue(Elt, Old, 
-                                      Offset+Layout.getElementOffsetInBits(i),
-                                      Builder);
-    }
-    return Old;
-  }
-  
-  if (const ArrayType *AT = dyn_cast<ArrayType>(SV->getType())) {
-    uint64_t EltSize = TD->getTypeAllocSizeInBits(AT->getElementType());
-    for (unsigned i = 0, e = AT->getNumElements(); i != e; ++i) {
-      Value *Elt = Builder.CreateExtractValue(SV, i, "tmp");
-      Old = ConvertScalar_InsertValue(Elt, Old, Offset+i*EltSize, Builder);
-    }
-    return Old;
-  }
-
-  // If SV is a float, convert it to the appropriate integer type.
-  // If it is a pointer, do the same.
-  unsigned SrcWidth = TD->getTypeSizeInBits(SV->getType());
-  unsigned DestWidth = TD->getTypeSizeInBits(AllocaType);
-  unsigned SrcStoreWidth = TD->getTypeStoreSizeInBits(SV->getType());
-  unsigned DestStoreWidth = TD->getTypeStoreSizeInBits(AllocaType);
-  if (SV->getType()->isFloatingPointTy() || SV->getType()->isVectorTy())
-    SV = Builder.CreateBitCast(SV,
-                            IntegerType::get(SV->getContext(),SrcWidth), "tmp");
-  else if (SV->getType()->isPointerTy())
-    SV = Builder.CreatePtrToInt(SV, TD->getIntPtrType(SV->getContext()), "tmp");
-
-  // Zero extend or truncate the value if needed.
-  if (SV->getType() != AllocaType) {
-    if (SV->getType()->getPrimitiveSizeInBits() <
-             AllocaType->getPrimitiveSizeInBits())
-      SV = Builder.CreateZExt(SV, AllocaType, "tmp");
-    else {
-      // Truncation may be needed if storing more than the alloca can hold
-      // (undefined behavior).
-      SV = Builder.CreateTrunc(SV, AllocaType, "tmp");
-      SrcWidth = DestWidth;
-      SrcStoreWidth = DestStoreWidth;
-    }
-  }
-
-  // If this is a big-endian system and the store is narrower than the
-  // full alloca type, we need to do a shift to get the right bits.
-  int ShAmt = 0;
-  if (TD->isBigEndian()) {
-    // On big-endian machines, the lowest bit is stored at the bit offset
-    // from the pointer given by getTypeStoreSizeInBits.  This matters for
-    // integers with a bitwidth that is not a multiple of 8.
-    ShAmt = DestStoreWidth - SrcStoreWidth - Offset;
-  } else {
-    ShAmt = Offset;
-  }
-
-  // Note: we support negative bitwidths (with shr) which are not defined.
-  // We do this to support (f.e.) stores off the end of a structure where
-  // only some bits in the structure are set.
-  APInt Mask(APInt::getLowBitsSet(DestWidth, SrcWidth));
-  if (ShAmt > 0 && (unsigned)ShAmt < DestWidth) {
-    SV = Builder.CreateShl(SV, ConstantInt::get(SV->getType(),
-                           ShAmt), "tmp");
-    Mask <<= ShAmt;
-  } else if (ShAmt < 0 && (unsigned)-ShAmt < DestWidth) {
-    SV = Builder.CreateLShr(SV, ConstantInt::get(SV->getType(),
-                            -ShAmt), "tmp");
-    Mask = Mask.lshr(-ShAmt);
-  }
-
-  // Mask out the bits we are about to insert from the old value, and or
-  // in the new bits.
-  if (SrcWidth != DestWidth) {
-    assert(DestWidth > SrcWidth);
-    Old = Builder.CreateAnd(Old, ConstantInt::get(Context, ~Mask), "mask");
-    SV = Builder.CreateOr(Old, SV, "ins");
-  }
-  return SV;
-}
-
 
 
 /// PointsToConstantGlobal - Return true if V (possibly indirectly) points to
@@ -1699,21 +1755,23 @@ static bool PointsToConstantGlobal(Value *V) {
 /// the uses.  If we see a memcpy/memmove that targets an unoffseted pointer to
 /// the alloca, and if the source pointer is a pointer to a constant  global, we
 /// can optimize this.
-static bool isOnlyCopiedFromConstantGlobal(Value *V, Instruction *&TheCopy,
+static bool isOnlyCopiedFromConstantGlobal(Value *V, MemTransferInst *&TheCopy,
                                            bool isOffset) {
   for (Value::use_iterator UI = V->use_begin(), E = V->use_end(); UI!=E; ++UI) {
-    if (LoadInst *LI = dyn_cast<LoadInst>(*UI))
+    User *U = cast<Instruction>(*UI);
+
+    if (LoadInst *LI = dyn_cast<LoadInst>(U))
       // Ignore non-volatile loads, they are always ok.
       if (!LI->isVolatile())
         continue;
     
-    if (BitCastInst *BCI = dyn_cast<BitCastInst>(*UI)) {
+    if (BitCastInst *BCI = dyn_cast<BitCastInst>(U)) {
       // If uses of the bitcast are ok, we are ok.
       if (!isOnlyCopiedFromConstantGlobal(BCI, TheCopy, isOffset))
         return false;
       continue;
     }
-    if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(*UI)) {
+    if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(U)) {
       // If the GEP has all zero indices, it doesn't offset the pointer.  If it
       // doesn't, it does.
       if (!isOnlyCopiedFromConstantGlobal(GEP, TheCopy,
@@ -1724,7 +1782,8 @@ static bool isOnlyCopiedFromConstantGlobal(Value *V, Instruction *&TheCopy,
     
     // If this is isn't our memcpy/memmove, reject it as something we can't
     // handle.
-    if (!isa<MemTransferInst>(*UI))
+    MemTransferInst *MI = dyn_cast<MemTransferInst>(U);
+    if (MI == 0)
       return false;
 
     // If we already have seen a copy, reject the second one.
@@ -1737,10 +1796,8 @@ static bool isOnlyCopiedFromConstantGlobal(Value *V, Instruction *&TheCopy,
     // If the memintrinsic isn't using the alloca as the dest, reject it.
     if (UI.getOperandNo() != 1) return false;
     
-    MemIntrinsic *MI = cast<MemIntrinsic>(*UI);
-    
     // If the source of the memcpy/move is not a constant global, reject it.
-    if (!PointsToConstantGlobal(MI->getOperand(2)))
+    if (!PointsToConstantGlobal(MI->getSource()))
       return false;
     
     // Otherwise, the transform is safe.  Remember the copy instruction.
@@ -1752,8 +1809,8 @@ static bool isOnlyCopiedFromConstantGlobal(Value *V, Instruction *&TheCopy,
 /// isOnlyCopiedFromConstantGlobal - Return true if the specified alloca is only
 /// modified by a copy from a constant global.  If we can prove this, we can
 /// replace any uses of the alloca with uses of the global directly.
-Instruction *SROA::isOnlyCopiedFromConstantGlobal(AllocaInst *AI) {
-  Instruction *TheCopy = 0;
+MemTransferInst *SROA::isOnlyCopiedFromConstantGlobal(AllocaInst *AI) {
+  MemTransferInst *TheCopy = 0;
   if (::isOnlyCopiedFromConstantGlobal(AI, TheCopy, false))
     return TheCopy;
   return 0;
diff --git a/lib/Transforms/Scalar/SimplifyHalfPowrLibCalls.cpp b/lib/Transforms/Scalar/SimplifyHalfPowrLibCalls.cpp
index 4464961a0799..c3408e77807f 100644
--- a/lib/Transforms/Scalar/SimplifyHalfPowrLibCalls.cpp
+++ b/lib/Transforms/Scalar/SimplifyHalfPowrLibCalls.cpp
@@ -93,7 +93,8 @@ InlineHalfPowrs(const std::vector<Instruction *> &HalfPowrs,
     // Inline the call, taking care of what code ends up where.
     NewBlock = SplitBlock(NextInst->getParent(), NextInst, this);
 
-    bool B = InlineFunction(Call, 0, TD);
+    InlineFunctionInfo IFI(0, TD);
+    bool B = InlineFunction(Call, IFI);
     assert(B && "half_powr didn't inline?"); B=B;
 
     BasicBlock *NewBody = NewBlock->getSinglePredecessor();
diff --git a/lib/Transforms/Scalar/TailRecursionElimination.cpp b/lib/Transforms/Scalar/TailRecursionElimination.cpp
index 162d902cfa4c..5ad5de2ce6a5 100644
--- a/lib/Transforms/Scalar/TailRecursionElimination.cpp
+++ b/lib/Transforms/Scalar/TailRecursionElimination.cpp
@@ -59,6 +59,8 @@
 #include "llvm/Instructions.h"
 #include "llvm/Pass.h"
 #include "llvm/Analysis/CaptureTracking.h"
+#include "llvm/Analysis/InlineCost.h"
+#include "llvm/Support/CallSite.h"
 #include "llvm/Support/CFG.h"
 #include "llvm/ADT/Statistic.h"
 using namespace llvm;
@@ -328,15 +330,6 @@ bool TailCallElim::ProcessReturningBlock(ReturnInst *Ret, BasicBlock *&OldEntry,
   if (&BB->front() == Ret) // Make sure there is something before the ret...
     return false;
   
-  // If the return is in the entry block, then making this transformation would
-  // turn infinite recursion into an infinite loop.  This transformation is ok
-  // in theory, but breaks some code like:
-  //   double fabs(double f) { return __builtin_fabs(f); } // a 'fabs' call
-  // disable this xform in this case, because the code generator will lower the
-  // call to fabs into inline code.
-  if (BB == &F->getEntryBlock())
-    return false;
-
   // Scan backwards from the return, checking to see if there is a tail call in
   // this block.  If so, set CI to it.
   CallInst *CI;
@@ -356,6 +349,25 @@ bool TailCallElim::ProcessReturningBlock(ReturnInst *Ret, BasicBlock *&OldEntry,
   if (CI->isTailCall() && CannotTailCallElimCallsMarkedTail)
     return false;
 
+  // As a special case, detect code like this:
+  //   double fabs(double f) { return __builtin_fabs(f); } // a 'fabs' call
+  // and disable this xform in this case, because the code generator will
+  // lower the call to fabs into inline code.
+  if (BB == &F->getEntryBlock() && 
+      &BB->front() == CI && &*++BB->begin() == Ret &&
+      callIsSmall(F)) {
+    // A single-block function with just a call and a return. Check that
+    // the arguments match.
+    CallSite::arg_iterator I = CallSite(CI).arg_begin(),
+                           E = CallSite(CI).arg_end();
+    Function::arg_iterator FI = F->arg_begin(),
+                           FE = F->arg_end();
+    for (; I != E && FI != FE; ++I, ++FI)
+      if (*I != &*FI) break;
+    if (I == E && FI == FE)
+      return false;
+  }
+
   // If we are introducing accumulator recursion to eliminate associative
   // operations after the call instruction, this variable contains the initial
   // value for the accumulator.  If this value is set, we actually perform
diff --git a/lib/Transforms/Utils/AddrModeMatcher.cpp b/lib/Transforms/Utils/AddrModeMatcher.cpp
index c70bab5492ef..ea9d1c1b1468 100644
--- a/lib/Transforms/Utils/AddrModeMatcher.cpp
+++ b/lib/Transforms/Utils/AddrModeMatcher.cpp
@@ -434,19 +434,21 @@ static bool FindAllMemoryUses(Instruction *I,
   // Loop over all the uses, recursively processing them.
   for (Value::use_iterator UI = I->use_begin(), E = I->use_end();
        UI != E; ++UI) {
-    if (LoadInst *LI = dyn_cast<LoadInst>(*UI)) {
+    User *U = *UI;
+
+    if (LoadInst *LI = dyn_cast<LoadInst>(U)) {
       MemoryUses.push_back(std::make_pair(LI, UI.getOperandNo()));
       continue;
     }
     
-    if (StoreInst *SI = dyn_cast<StoreInst>(*UI)) {
+    if (StoreInst *SI = dyn_cast<StoreInst>(U)) {
       unsigned opNo = UI.getOperandNo();
       if (opNo == 0) return true; // Storing addr, not into addr.
       MemoryUses.push_back(std::make_pair(SI, opNo));
       continue;
     }
     
-    if (CallInst *CI = dyn_cast<CallInst>(*UI)) {
+    if (CallInst *CI = dyn_cast<CallInst>(U)) {
       InlineAsm *IA = dyn_cast<InlineAsm>(CI->getCalledValue());
       if (IA == 0) return true;
       
@@ -456,7 +458,7 @@ static bool FindAllMemoryUses(Instruction *I,
       continue;
     }
     
-    if (FindAllMemoryUses(cast<Instruction>(*UI), MemoryUses, ConsideredInsts,
+    if (FindAllMemoryUses(cast<Instruction>(U), MemoryUses, ConsideredInsts,
                           TLI))
       return true;
   }
diff --git a/lib/Transforms/Utils/BasicInliner.cpp b/lib/Transforms/Utils/BasicInliner.cpp
index c580b8fed98c..f0e31efa30c4 100644
--- a/lib/Transforms/Utils/BasicInliner.cpp
+++ b/lib/Transforms/Utils/BasicInliner.cpp
@@ -129,7 +129,8 @@ void BasicInlinerImpl::inlineFunctions() {
         }
         
         // Inline
-        if (InlineFunction(CS, NULL, TD)) {
+        InlineFunctionInfo IFI(0, TD);
+        if (InlineFunction(CS, IFI)) {
           if (Callee->use_empty() && (Callee->hasLocalLinkage() ||
                                       Callee->hasAvailableExternallyLinkage()))
             DeadFunctions.insert(Callee);
diff --git a/lib/Transforms/Utils/BuildLibCalls.cpp b/lib/Transforms/Utils/BuildLibCalls.cpp
index fff817928f34..767fa3a0a6b3 100644
--- a/lib/Transforms/Utils/BuildLibCalls.cpp
+++ b/lib/Transforms/Utils/BuildLibCalls.cpp
@@ -90,15 +90,15 @@ Value *llvm::EmitStrCpy(Value *Dst, Value *Src, IRBuilder<> &B,
 /// EmitStrNCpy - Emit a call to the strncpy function to the builder, for the
 /// specified pointer arguments.
 Value *llvm::EmitStrNCpy(Value *Dst, Value *Src, Value *Len,
-                         IRBuilder<> &B, const TargetData *TD) {
+                         IRBuilder<> &B, const TargetData *TD, StringRef Name) {
   Module *M = B.GetInsertBlock()->getParent()->getParent();
   AttributeWithIndex AWI[2];
   AWI[0] = AttributeWithIndex::get(2, Attribute::NoCapture);
   AWI[1] = AttributeWithIndex::get(~0u, Attribute::NoUnwind);
   const Type *I8Ptr = B.getInt8PtrTy();
-  Value *StrNCpy = M->getOrInsertFunction("strncpy", AttrListPtr::get(AWI, 2),
-                                         I8Ptr, I8Ptr, I8Ptr,
-                                         Len->getType(), NULL);
+  Value *StrNCpy = M->getOrInsertFunction(Name, AttrListPtr::get(AWI, 2),
+                                          I8Ptr, I8Ptr, I8Ptr,
+                                          Len->getType(), NULL);
   CallInst *CI = B.CreateCall3(StrNCpy, CastToCStr(Dst, B), CastToCStr(Src, B),
                                Len, "strncpy");
   if (const Function *F = dyn_cast<Function>(StrNCpy->stripPointerCasts()))
@@ -373,15 +373,29 @@ void llvm::EmitFWrite(Value *Ptr, Value *Size, Value *File,
 SimplifyFortifiedLibCalls::~SimplifyFortifiedLibCalls() { }
 
 bool SimplifyFortifiedLibCalls::fold(CallInst *CI, const TargetData *TD) {
+  // We really need TargetData for later.
+  if (!TD) return false;
+  
   this->CI = CI;
-  StringRef Name = CI->getCalledFunction()->getName();
+  Function *Callee = CI->getCalledFunction();
+  StringRef Name = Callee->getName();
+  const FunctionType *FT = Callee->getFunctionType();
   BasicBlock *BB = CI->getParent();
-  IRBuilder<> B(CI->getParent()->getContext());
+  LLVMContext &Context = CI->getParent()->getContext();
+  IRBuilder<> B(Context);
 
   // Set the builder to the instruction after the call.
   B.SetInsertPoint(BB, CI);
 
   if (Name == "__memcpy_chk") {
+    // Check if this has the right signature.
+    if (FT->getNumParams() != 4 || FT->getReturnType() != FT->getParamType(0) ||
+        !FT->getParamType(0)->isPointerTy() ||
+        !FT->getParamType(1)->isPointerTy() ||
+        FT->getParamType(2) != TD->getIntPtrType(Context) ||
+        FT->getParamType(3) != TD->getIntPtrType(Context))
+      return false;
+    
     if (isFoldable(4, 3, false)) {
       EmitMemCpy(CI->getOperand(1), CI->getOperand(2), CI->getOperand(3),
                  1, false, B, TD);
@@ -397,6 +411,14 @@ bool SimplifyFortifiedLibCalls::fold(CallInst *CI, const TargetData *TD) {
   }
 
   if (Name == "__memmove_chk") {
+    // Check if this has the right signature.
+    if (FT->getNumParams() != 4 || FT->getReturnType() != FT->getParamType(0) ||
+        !FT->getParamType(0)->isPointerTy() ||
+        !FT->getParamType(1)->isPointerTy() ||
+        FT->getParamType(2) != TD->getIntPtrType(Context) ||
+        FT->getParamType(3) != TD->getIntPtrType(Context))
+      return false;
+    
     if (isFoldable(4, 3, false)) {
       EmitMemMove(CI->getOperand(1), CI->getOperand(2), CI->getOperand(3),
                   1, false, B, TD);
@@ -407,6 +429,14 @@ bool SimplifyFortifiedLibCalls::fold(CallInst *CI, const TargetData *TD) {
   }
 
   if (Name == "__memset_chk") {
+    // Check if this has the right signature.
+    if (FT->getNumParams() != 4 || FT->getReturnType() != FT->getParamType(0) ||
+        !FT->getParamType(0)->isPointerTy() ||
+        !FT->getParamType(1)->isIntegerTy() ||
+        FT->getParamType(2) != TD->getIntPtrType(Context) ||
+        FT->getParamType(3) != TD->getIntPtrType(Context))
+      return false;
+    
     if (isFoldable(4, 3, false)) {
       Value *Val = B.CreateIntCast(CI->getOperand(2), B.getInt8Ty(),
                                    false);
@@ -418,6 +448,15 @@ bool SimplifyFortifiedLibCalls::fold(CallInst *CI, const TargetData *TD) {
   }
 
   if (Name == "__strcpy_chk" || Name == "__stpcpy_chk") {
+    // Check if this has the right signature.
+    if (FT->getNumParams() != 3 ||
+        FT->getReturnType() != FT->getParamType(0) ||
+        FT->getParamType(0) != FT->getParamType(1) ||
+        FT->getParamType(0) != Type::getInt8PtrTy(Context) ||
+        FT->getParamType(2) != TD->getIntPtrType(Context))
+      return 0;
+    
+    
     // If a) we don't have any length information, or b) we know this will
     // fit then just lower to a plain st[rp]cpy. Otherwise we'll keep our
     // st[rp]cpy_chk call which may fail at runtime if the size is too long.
@@ -432,10 +471,18 @@ bool SimplifyFortifiedLibCalls::fold(CallInst *CI, const TargetData *TD) {
     return false;
   }
 
-  if (Name == "__strncpy_chk") {
+  if (Name == "__strncpy_chk" || Name == "__stpncpy_chk") {
+    // Check if this has the right signature.
+    if (FT->getNumParams() != 4 || FT->getReturnType() != FT->getParamType(0) ||
+        FT->getParamType(0) != FT->getParamType(1) ||
+        FT->getParamType(0) != Type::getInt8PtrTy(Context) ||
+        !FT->getParamType(2)->isIntegerTy() ||
+        FT->getParamType(3) != TD->getIntPtrType(Context))
+      return false;
+    
     if (isFoldable(4, 3, false)) {
       Value *Ret = EmitStrNCpy(CI->getOperand(1), CI->getOperand(2),
-                               CI->getOperand(3), B, TD);
+                               CI->getOperand(3), B, TD, Name.substr(2, 7));
       replaceCall(Ret);
       return true;
     }
diff --git a/lib/Transforms/Utils/CloneFunction.cpp b/lib/Transforms/Utils/CloneFunction.cpp
index 62fc2ec10b14..8ad66dd01afc 100644
--- a/lib/Transforms/Utils/CloneFunction.cpp
+++ b/lib/Transforms/Utils/CloneFunction.cpp
@@ -23,7 +23,7 @@
 #include "llvm/LLVMContext.h"
 #include "llvm/Metadata.h"
 #include "llvm/Support/CFG.h"
-#include "llvm/Transforms/Utils/ValueMapper.h"
+#include "ValueMapper.h"
 #include "llvm/Analysis/ConstantFolding.h"
 #include "llvm/Analysis/DebugInfo.h"
 #include "llvm/ADT/SmallVector.h"
diff --git a/lib/Transforms/Utils/CloneModule.cpp b/lib/Transforms/Utils/CloneModule.cpp
index a163f89cc37f..b87c082793e0 100644
--- a/lib/Transforms/Utils/CloneModule.cpp
+++ b/lib/Transforms/Utils/CloneModule.cpp
@@ -17,7 +17,7 @@
 #include "llvm/DerivedTypes.h"
 #include "llvm/TypeSymbolTable.h"
 #include "llvm/Constant.h"
-#include "llvm/Transforms/Utils/ValueMapper.h"
+#include "ValueMapper.h"
 using namespace llvm;
 
 /// CloneModule - Return an exact copy of the specified module.  This is not as
diff --git a/lib/Transforms/Utils/CodeExtractor.cpp b/lib/Transforms/Utils/CodeExtractor.cpp
index b20849485306..b51f751e1317 100644
--- a/lib/Transforms/Utils/CodeExtractor.cpp
+++ b/lib/Transforms/Utils/CodeExtractor.cpp
@@ -751,7 +751,7 @@ ExtractCodeRegion(const std::vector<BasicBlock*> &code) {
   //  verifyFunction(*oldFunction);
 
   DEBUG(if (verifyFunction(*newFunction)) 
-        llvm_report_error("verifyFunction failed!"));
+        report_fatal_error("verifyFunction failed!"));
   return newFunction;
 }
 
diff --git a/lib/Transforms/Utils/InlineFunction.cpp b/lib/Transforms/Utils/InlineFunction.cpp
index 75c9ccdd7a93..91390bc7beca 100644
--- a/lib/Transforms/Utils/InlineFunction.cpp
+++ b/lib/Transforms/Utils/InlineFunction.cpp
@@ -15,7 +15,6 @@
 #include "llvm/Transforms/Utils/Cloning.h"
 #include "llvm/Constants.h"
 #include "llvm/DerivedTypes.h"
-#include "llvm/LLVMContext.h"
 #include "llvm/Module.h"
 #include "llvm/Instructions.h"
 #include "llvm/IntrinsicInst.h"
@@ -29,13 +28,11 @@
 #include "llvm/Support/CallSite.h"
 using namespace llvm;
 
-bool llvm::InlineFunction(CallInst *CI, CallGraph *CG, const TargetData *TD,
-                          SmallVectorImpl<AllocaInst*> *StaticAllocas) {
-  return InlineFunction(CallSite(CI), CG, TD, StaticAllocas);
+bool llvm::InlineFunction(CallInst *CI, InlineFunctionInfo &IFI) {
+  return InlineFunction(CallSite(CI), IFI);
 }
-bool llvm::InlineFunction(InvokeInst *II, CallGraph *CG, const TargetData *TD,
-                          SmallVectorImpl<AllocaInst*> *StaticAllocas) {
-  return InlineFunction(CallSite(II), CG, TD, StaticAllocas);
+bool llvm::InlineFunction(InvokeInst *II, InlineFunctionInfo &IFI) {
+  return InlineFunction(CallSite(II), IFI);
 }
 
 
@@ -75,7 +72,7 @@ static void HandleCallsInBlockInlinedThroughInvoke(BasicBlock *BB,
     II->setAttributes(CI->getAttributes());
     
     // Make sure that anything using the call now uses the invoke!  This also
-    // updates the CallGraph if present.
+    // updates the CallGraph if present, because it uses a WeakVH.
     CI->replaceAllUsesWith(II);
     
     // Delete the unconditional branch inserted by splitBasicBlock
@@ -173,7 +170,8 @@ static void HandleInlinedInvoke(InvokeInst *II, BasicBlock *FirstNewBlock,
 static void UpdateCallGraphAfterInlining(CallSite CS,
                                          Function::iterator FirstNewBlock,
                                        DenseMap<const Value*, Value*> &ValueMap,
-                                         CallGraph &CG) {
+                                         InlineFunctionInfo &IFI) {
+  CallGraph &CG = *IFI.CG;
   const Function *Caller = CS.getInstruction()->getParent()->getParent();
   const Function *Callee = CS.getCalledFunction();
   CallGraphNode *CalleeNode = CG[Callee];
@@ -201,8 +199,27 @@ static void UpdateCallGraphAfterInlining(CallSite CS,
     
     // If the call was inlined, but then constant folded, there is no edge to
     // add.  Check for this case.
-    if (Instruction *NewCall = dyn_cast<Instruction>(VMI->second))
-      CallerNode->addCalledFunction(CallSite::get(NewCall), I->second);
+    Instruction *NewCall = dyn_cast<Instruction>(VMI->second);
+    if (NewCall == 0) continue;
+
+    // Remember that this call site got inlined for the client of
+    // InlineFunction.
+    IFI.InlinedCalls.push_back(NewCall);
+
+    // It's possible that inlining the callsite will cause it to go from an
+    // indirect to a direct call by resolving a function pointer.  If this
+    // happens, set the callee of the new call site to a more precise
+    // destination.  This can also happen if the call graph node of the caller
+    // was just unnecessarily imprecise.
+    if (I->second->getFunction() == 0)
+      if (Function *F = CallSite(NewCall).getCalledFunction()) {
+        // Indirect call site resolved to direct call.
+        CallerNode->addCalledFunction(CallSite::get(NewCall), CG[F]);
+        
+        continue;
+      }
+    
+    CallerNode->addCalledFunction(CallSite::get(NewCall), I->second);
   }
   
   // Update the call graph by deleting the edge from Callee to Caller.  We must
@@ -219,13 +236,15 @@ static void UpdateCallGraphAfterInlining(CallSite CS,
 // exists in the instruction stream.  Similiarly this will inline a recursive
 // function by one level.
 //
-bool llvm::InlineFunction(CallSite CS, CallGraph *CG, const TargetData *TD,
-                          SmallVectorImpl<AllocaInst*> *StaticAllocas) {
+bool llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI) {
   Instruction *TheCall = CS.getInstruction();
   LLVMContext &Context = TheCall->getContext();
   assert(TheCall->getParent() && TheCall->getParent()->getParent() &&
          "Instruction not in function!");
 
+  // If IFI has any state in it, zap it before we fill it in.
+  IFI.reset();
+  
   const Function *CalledFunc = CS.getCalledFunction();
   if (CalledFunc == 0 ||          // Can't inline external function or indirect
       CalledFunc->isDeclaration() || // call, or call to a vararg function!
@@ -292,7 +311,7 @@ bool llvm::InlineFunction(CallSite CS, CallGraph *CG, const TargetData *TD,
 
         // Create the alloca.  If we have TargetData, use nice alignment.
         unsigned Align = 1;
-        if (TD) Align = TD->getPrefTypeAlignment(AggTy);
+        if (IFI.TD) Align = IFI.TD->getPrefTypeAlignment(AggTy);
         Value *NewAlloca = new AllocaInst(AggTy, 0, Align, 
                                           I->getName(), 
                                           &*Caller->begin()->begin());
@@ -305,11 +324,11 @@ bool llvm::InlineFunction(CallSite CS, CallGraph *CG, const TargetData *TD,
         Value *SrcCast = new BitCastInst(*AI, VoidPtrTy, "tmp", TheCall);
 
         Value *Size;
-        if (TD == 0)
+        if (IFI.TD == 0)
           Size = ConstantExpr::getSizeOf(AggTy);
         else
           Size = ConstantInt::get(Type::getInt64Ty(Context),
-                                  TD->getTypeStoreSize(AggTy));
+                                  IFI.TD->getTypeStoreSize(AggTy));
 
         // Always generate a memcpy of alignment 1 here because we don't know
         // the alignment of the src pointer.  Other optimizations can infer
@@ -323,7 +342,7 @@ bool llvm::InlineFunction(CallSite CS, CallGraph *CG, const TargetData *TD,
           CallInst::Create(MemCpyFn, CallArgs, CallArgs+5, "", TheCall);
 
         // If we have a call graph, update it.
-        if (CG) {
+        if (CallGraph *CG = IFI.CG) {
           CallGraphNode *MemCpyCGN = CG->getOrInsertFunction(MemCpyFn);
           CallGraphNode *CallerNode = (*CG)[Caller];
           CallerNode->addCalledFunction(TheMemCpy, MemCpyCGN);
@@ -342,14 +361,14 @@ bool llvm::InlineFunction(CallSite CS, CallGraph *CG, const TargetData *TD,
     // (which can happen, e.g., because an argument was constant), but we'll be
     // happy with whatever the cloner can do.
     CloneAndPruneFunctionInto(Caller, CalledFunc, ValueMap, Returns, ".i",
-                              &InlinedFunctionInfo, TD, TheCall);
+                              &InlinedFunctionInfo, IFI.TD, TheCall);
 
     // Remember the first block that is newly cloned over.
     FirstNewBlock = LastBlock; ++FirstNewBlock;
 
     // Update the callgraph if requested.
-    if (CG)
-      UpdateCallGraphAfterInlining(CS, FirstNewBlock, ValueMap, *CG);
+    if (IFI.CG)
+      UpdateCallGraphAfterInlining(CS, FirstNewBlock, ValueMap, IFI);
   }
 
   // If there are any alloca instructions in the block that used to be the entry
@@ -376,13 +395,13 @@ bool llvm::InlineFunction(CallSite CS, CallGraph *CG, const TargetData *TD,
       
       // Keep track of the static allocas that we inline into the caller if the
       // StaticAllocas pointer is non-null.
-      if (StaticAllocas) StaticAllocas->push_back(AI);
+      IFI.StaticAllocas.push_back(AI);
       
       // Scan for the block of allocas that we can move over, and move them
       // all at once.
       while (isa<AllocaInst>(I) &&
              isa<Constant>(cast<AllocaInst>(I)->getArraySize())) {
-        if (StaticAllocas) StaticAllocas->push_back(cast<AllocaInst>(I));
+        IFI.StaticAllocas.push_back(cast<AllocaInst>(I));
         ++I;
       }
 
@@ -406,7 +425,7 @@ bool llvm::InlineFunction(CallSite CS, CallGraph *CG, const TargetData *TD,
     // If we are preserving the callgraph, add edges to the stacksave/restore
     // functions for the calls we insert.
     CallGraphNode *StackSaveCGN = 0, *StackRestoreCGN = 0, *CallerNode = 0;
-    if (CG) {
+    if (CallGraph *CG = IFI.CG) {
       StackSaveCGN    = CG->getOrInsertFunction(StackSave);
       StackRestoreCGN = CG->getOrInsertFunction(StackRestore);
       CallerNode = (*CG)[Caller];
@@ -415,13 +434,13 @@ bool llvm::InlineFunction(CallSite CS, CallGraph *CG, const TargetData *TD,
     // Insert the llvm.stacksave.
     CallInst *SavedPtr = CallInst::Create(StackSave, "savedstack",
                                           FirstNewBlock->begin());
-    if (CG) CallerNode->addCalledFunction(SavedPtr, StackSaveCGN);
+    if (IFI.CG) CallerNode->addCalledFunction(SavedPtr, StackSaveCGN);
 
     // Insert a call to llvm.stackrestore before any return instructions in the
     // inlined function.
     for (unsigned i = 0, e = Returns.size(); i != e; ++i) {
       CallInst *CI = CallInst::Create(StackRestore, SavedPtr, "", Returns[i]);
-      if (CG) CallerNode->addCalledFunction(CI, StackRestoreCGN);
+      if (IFI.CG) CallerNode->addCalledFunction(CI, StackRestoreCGN);
     }
 
     // Count the number of StackRestore calls we insert.
@@ -434,7 +453,7 @@ bool llvm::InlineFunction(CallSite CS, CallGraph *CG, const TargetData *TD,
            BB != E; ++BB)
         if (UnwindInst *UI = dyn_cast<UnwindInst>(BB->getTerminator())) {
           CallInst *CI = CallInst::Create(StackRestore, SavedPtr, "", UI);
-          if (CG) CallerNode->addCalledFunction(CI, StackRestoreCGN);
+          if (IFI.CG) CallerNode->addCalledFunction(CI, StackRestoreCGN);
           ++NumStackRestores;
         }
     }
diff --git a/lib/Transforms/Utils/LoopUnroll.cpp b/lib/Transforms/Utils/LoopUnroll.cpp
index ac59b4d7b3e8..84fd1eb175df 100644
--- a/lib/Transforms/Utils/LoopUnroll.cpp
+++ b/lib/Transforms/Utils/LoopUnroll.cpp
@@ -183,8 +183,8 @@ bool llvm::UnrollLoop(Loop *L, unsigned Count, LoopInfo* LI, LPPassManager* LPM)
 
   // For the first iteration of the loop, we should use the precloned values for
   // PHI nodes.  Insert associations now.
-  typedef DenseMap<const Value*, Value*> ValueMapTy;
-  ValueMapTy LastValueMap;
+  typedef DenseMap<const Value*, Value*> ValueToValueMapTy;
+  ValueToValueMapTy LastValueMap;
   std::vector<PHINode*> OrigPHINode;
   for (BasicBlock::iterator I = Header->begin(); isa<PHINode>(I); ++I) {
     PHINode *PN = cast<PHINode>(I);
@@ -205,7 +205,7 @@ bool llvm::UnrollLoop(Loop *L, unsigned Count, LoopInfo* LI, LPPassManager* LPM)
     
     for (std::vector<BasicBlock*>::iterator BB = LoopBlocks.begin(),
          E = LoopBlocks.end(); BB != E; ++BB) {
-      ValueMapTy ValueMap;
+      ValueToValueMapTy ValueMap;
       BasicBlock *New = CloneBasicBlock(*BB, ValueMap, "." + Twine(It));
       Header->getParent()->getBasicBlockList().push_back(New);
 
@@ -224,7 +224,7 @@ bool llvm::UnrollLoop(Loop *L, unsigned Count, LoopInfo* LI, LPPassManager* LPM)
 
       // Update our running map of newest clones
       LastValueMap[*BB] = New;
-      for (ValueMapTy::iterator VI = ValueMap.begin(), VE = ValueMap.end();
+      for (ValueToValueMapTy::iterator VI = ValueMap.begin(), VE = ValueMap.end();
            VI != VE; ++VI)
         LastValueMap[VI->first] = VI->second;
 
diff --git a/lib/Transforms/Utils/LowerInvoke.cpp b/lib/Transforms/Utils/LowerInvoke.cpp
index bbbcc1adae74..0ed8c7227b07 100644
--- a/lib/Transforms/Utils/LowerInvoke.cpp
+++ b/lib/Transforms/Utils/LowerInvoke.cpp
@@ -70,15 +70,18 @@ namespace {
     // Used for expensive EH support.
     const Type *JBLinkTy;
     GlobalVariable *JBListHead;
-    Constant *SetJmpFn, *LongJmpFn;
+    Constant *SetJmpFn, *LongJmpFn, *StackSaveFn, *StackRestoreFn;
+    bool useExpensiveEHSupport;
 
     // We peek in TLI to grab the target's jmp_buf size and alignment
     const TargetLowering *TLI;
 
   public:
     static char ID; // Pass identification, replacement for typeid
-    explicit LowerInvoke(const TargetLowering *tli = NULL)
-      : FunctionPass(&ID), TLI(tli) { }
+    explicit LowerInvoke(const TargetLowering *tli = NULL,
+                         bool useExpensiveEHSupport = ExpensiveEHSupport)
+      : FunctionPass(&ID), useExpensiveEHSupport(useExpensiveEHSupport),
+        TLI(tli) { }
     bool doInitialization(Module &M);
     bool runOnFunction(Function &F);
 
@@ -94,7 +97,8 @@ namespace {
     bool insertCheapEHSupport(Function &F);
     void splitLiveRangesLiveAcrossInvokes(std::vector<InvokeInst*> &Invokes);
     void rewriteExpensiveInvoke(InvokeInst *II, unsigned InvokeNo,
-                                AllocaInst *InvokeNum, SwitchInst *CatchSwitch);
+                                AllocaInst *InvokeNum, AllocaInst *StackPtr,
+                                SwitchInst *CatchSwitch);
     bool insertExpensiveEHSupport(Function &F);
   };
 }
@@ -107,7 +111,11 @@ const PassInfo *const llvm::LowerInvokePassID = &X;
 
 // Public Interface To the LowerInvoke pass.
 FunctionPass *llvm::createLowerInvokePass(const TargetLowering *TLI) {
-  return new LowerInvoke(TLI);
+  return new LowerInvoke(TLI, ExpensiveEHSupport);
+}
+FunctionPass *llvm::createLowerInvokePass(const TargetLowering *TLI,
+                                          bool useExpensiveEHSupport) {
+  return new LowerInvoke(TLI, useExpensiveEHSupport);
 }
 
 // doInitialization - Make sure that there is a prototype for abort in the
@@ -116,7 +124,7 @@ bool LowerInvoke::doInitialization(Module &M) {
   const Type *VoidPtrTy =
           Type::getInt8PtrTy(M.getContext());
   AbortMessage = 0;
-  if (ExpensiveEHSupport) {
+  if (useExpensiveEHSupport) {
     // Insert a type for the linked list of jump buffers.
     unsigned JBSize = TLI ? TLI->getJumpBufSize() : 0;
     JBSize = JBSize ? JBSize : 200;
@@ -160,6 +168,8 @@ bool LowerInvoke::doInitialization(Module &M) {
 #endif
 
     LongJmpFn = Intrinsic::getDeclaration(&M, Intrinsic::longjmp);
+    StackSaveFn = Intrinsic::getDeclaration(&M, Intrinsic::stacksave);
+    StackRestoreFn = Intrinsic::getDeclaration(&M, Intrinsic::stackrestore);
   }
 
   // We need the 'write' and 'abort' functions for both models.
@@ -175,7 +185,7 @@ bool LowerInvoke::doInitialization(Module &M) {
 }
 
 void LowerInvoke::createAbortMessage(Module *M) {
-  if (ExpensiveEHSupport) {
+  if (useExpensiveEHSupport) {
     // The abort message for expensive EH support tells the user that the
     // program 'unwound' without an 'invoke' instruction.
     Constant *Msg =
@@ -229,7 +239,8 @@ bool LowerInvoke::insertCheapEHSupport(Function &F) {
       std::vector<Value*> CallArgs(II->op_begin(), II->op_end() - 3);
       // Insert a normal call instruction...
       CallInst *NewCall = CallInst::Create(II->getCalledValue(),
-                                           CallArgs.begin(), CallArgs.end(), "",II);
+                                           CallArgs.begin(), CallArgs.end(),
+                                           "",II);
       NewCall->takeName(II);
       NewCall->setCallingConv(II->getCallingConv());
       NewCall->setAttributes(II->getAttributes());
@@ -270,6 +281,7 @@ bool LowerInvoke::insertCheapEHSupport(Function &F) {
 /// specified invoke instruction with a call.
 void LowerInvoke::rewriteExpensiveInvoke(InvokeInst *II, unsigned InvokeNo,
                                          AllocaInst *InvokeNum,
+                                         AllocaInst *StackPtr,
                                          SwitchInst *CatchSwitch) {
   ConstantInt *InvokeNoC = ConstantInt::get(Type::getInt32Ty(II->getContext()),
                                             InvokeNo);
@@ -288,12 +300,22 @@ void LowerInvoke::rewriteExpensiveInvoke(InvokeInst *II, unsigned InvokeNo,
   // Insert a store of the invoke num before the invoke and store zero into the
   // location afterward.
   new StoreInst(InvokeNoC, InvokeNum, true, II);  // volatile
+  
+  // Insert a store of the stack ptr before the invoke, so we can restore it
+  // later in the exception case.
+  CallInst* StackSaveRet = CallInst::Create(StackSaveFn, "ssret", II);
+  new StoreInst(StackSaveRet, StackPtr, true, II); // volatile
 
   BasicBlock::iterator NI = II->getNormalDest()->getFirstNonPHI();
   // nonvolatile.
   new StoreInst(Constant::getNullValue(Type::getInt32Ty(II->getContext())), 
                 InvokeNum, false, NI);
 
+  Instruction* StackPtrLoad = new LoadInst(StackPtr, "stackptr.restore", true,
+                                           II->getUnwindDest()->getFirstNonPHI()
+                                           );
+  CallInst::Create(StackRestoreFn, StackPtrLoad, "")->insertAfter(StackPtrLoad);
+    
   // Add a switch case to our unwind block.
   CatchSwitch->addCase(InvokeNoC, II->getUnwindDest());
 
@@ -500,6 +522,12 @@ bool LowerInvoke::insertExpensiveEHSupport(Function &F) {
     BasicBlock *CatchBB =
             BasicBlock::Create(F.getContext(), "setjmp.catch", &F);
 
+    // Create an alloca which keeps track of the stack pointer before every
+    // invoke, this allows us to properly restore the stack pointer after
+    // long jumping.
+    AllocaInst *StackPtr = new AllocaInst(Type::getInt8PtrTy(F.getContext()), 0,
+                                          "stackptr", EntryBB->begin());
+
     // Create an alloca which keeps track of which invoke is currently
     // executing.  For normal calls it contains zero.
     AllocaInst *InvokeNum = new AllocaInst(Type::getInt32Ty(F.getContext()), 0,
@@ -546,7 +574,7 @@ bool LowerInvoke::insertExpensiveEHSupport(Function &F) {
 
     // At this point, we are all set up, rewrite each invoke instruction.
     for (unsigned i = 0, e = Invokes.size(); i != e; ++i)
-      rewriteExpensiveInvoke(Invokes[i], i+1, InvokeNum, CatchSwitch);
+      rewriteExpensiveInvoke(Invokes[i], i+1, InvokeNum, StackPtr, CatchSwitch);
   }
 
   // We know that there is at least one unwind.
@@ -622,7 +650,7 @@ bool LowerInvoke::insertExpensiveEHSupport(Function &F) {
 }
 
 bool LowerInvoke::runOnFunction(Function &F) {
-  if (ExpensiveEHSupport)
+  if (useExpensiveEHSupport)
     return insertExpensiveEHSupport(F);
   else
     return insertCheapEHSupport(F);
diff --git a/lib/Transforms/Utils/SSAUpdater.cpp b/lib/Transforms/Utils/SSAUpdater.cpp
index a31235a1f5cb..25d50dbf2a8b 100644
--- a/lib/Transforms/Utils/SSAUpdater.cpp
+++ b/lib/Transforms/Utils/SSAUpdater.cpp
@@ -11,34 +11,52 @@
 //
 //===----------------------------------------------------------------------===//
 
+#define DEBUG_TYPE "ssaupdater"
 #include "llvm/Transforms/Utils/SSAUpdater.h"
 #include "llvm/Instructions.h"
 #include "llvm/ADT/DenseMap.h"
+#include "llvm/Support/AlignOf.h"
+#include "llvm/Support/Allocator.h"
 #include "llvm/Support/CFG.h"
 #include "llvm/Support/Debug.h"
-#include "llvm/Support/ValueHandle.h"
 #include "llvm/Support/raw_ostream.h"
 using namespace llvm;
 
-typedef DenseMap<BasicBlock*, TrackingVH<Value> > AvailableValsTy;
-typedef std::vector<std::pair<BasicBlock*, TrackingVH<Value> > >
-                IncomingPredInfoTy;
-
+/// BBInfo - Per-basic block information used internally by SSAUpdater.
+/// The predecessors of each block are cached here since pred_iterator is
+/// slow and we need to iterate over the blocks at least a few times.
+class SSAUpdater::BBInfo {
+public:
+  BasicBlock *BB;      // Back-pointer to the corresponding block.
+  Value *AvailableVal; // Value to use in this block.
+  BBInfo *DefBB;       // Block that defines the available value.
+  int BlkNum;          // Postorder number.
+  BBInfo *IDom;        // Immediate dominator.
+  unsigned NumPreds;   // Number of predecessor blocks.
+  BBInfo **Preds;      // Array[NumPreds] of predecessor blocks.
+  PHINode *PHITag;     // Marker for existing PHIs that match.
+
+  BBInfo(BasicBlock *ThisBB, Value *V)
+    : BB(ThisBB), AvailableVal(V), DefBB(V ? this : 0), BlkNum(0), IDom(0),
+      NumPreds(0), Preds(0), PHITag(0) { }
+};
+
+typedef DenseMap<BasicBlock*, SSAUpdater::BBInfo*> BBMapTy;
+
+typedef DenseMap<BasicBlock*, Value*> AvailableValsTy;
 static AvailableValsTy &getAvailableVals(void *AV) {
   return *static_cast<AvailableValsTy*>(AV);
 }
 
-static IncomingPredInfoTy &getIncomingPredInfo(void *IPI) {
-  return *static_cast<IncomingPredInfoTy*>(IPI);
+static BBMapTy *getBBMap(void *BM) {
+  return static_cast<BBMapTy*>(BM);
 }
 
-
 SSAUpdater::SSAUpdater(SmallVectorImpl<PHINode*> *NewPHI)
-  : AV(0), PrototypeValue(0), IPI(0), InsertedPHIs(NewPHI) {}
+  : AV(0), PrototypeValue(0), BM(0), InsertedPHIs(NewPHI) {}
 
 SSAUpdater::~SSAUpdater() {
   delete &getAvailableVals(AV);
-  delete &getIncomingPredInfo(IPI);
 }
 
 /// Initialize - Reset this object to get ready for a new set of SSA
@@ -48,11 +66,6 @@ void SSAUpdater::Initialize(Value *ProtoValue) {
     AV = new AvailableValsTy();
   else
     getAvailableVals(AV).clear();
-
-  if (IPI == 0)
-    IPI = new IncomingPredInfoTy();
-  else
-    getIncomingPredInfo(IPI).clear();
   PrototypeValue = ProtoValue;
 }
 
@@ -73,7 +86,7 @@ void SSAUpdater::AddAvailableValue(BasicBlock *BB, Value *V) {
 
 /// IsEquivalentPHI - Check if PHI has the same incoming value as specified
 /// in ValueMapping for each predecessor block.
-static bool IsEquivalentPHI(PHINode *PHI, 
+static bool IsEquivalentPHI(PHINode *PHI,
                             DenseMap<BasicBlock*, Value*> &ValueMapping) {
   unsigned PHINumValues = PHI->getNumIncomingValues();
   if (PHINumValues != ValueMapping.size())
@@ -89,38 +102,12 @@ static bool IsEquivalentPHI(PHINode *PHI,
   return true;
 }
 
-/// GetExistingPHI - Check if BB already contains a phi node that is equivalent
-/// to the specified mapping from predecessor blocks to incoming values.
-static Value *GetExistingPHI(BasicBlock *BB,
-                             DenseMap<BasicBlock*, Value*> &ValueMapping) {
-  PHINode *SomePHI;
-  for (BasicBlock::iterator It = BB->begin();
-       (SomePHI = dyn_cast<PHINode>(It)); ++It) {
-    if (IsEquivalentPHI(SomePHI, ValueMapping))
-      return SomePHI;
-  }
-  return 0;
-}
-
-/// GetExistingPHI - Check if BB already contains an equivalent phi node.
-/// The InputIt type must be an iterator over std::pair<BasicBlock*, Value*>
-/// objects that specify the mapping from predecessor blocks to incoming values.
-template<typename InputIt>
-static Value *GetExistingPHI(BasicBlock *BB, const InputIt &I,
-                             const InputIt &E) {
-  // Avoid create the mapping if BB has no phi nodes at all.
-  if (!isa<PHINode>(BB->begin()))
-    return 0;
-  DenseMap<BasicBlock*, Value*> ValueMapping(I, E);
-  return GetExistingPHI(BB, ValueMapping);
-}
-
 /// GetValueAtEndOfBlock - Construct SSA form, materializing a value that is
 /// live at the end of the specified block.
 Value *SSAUpdater::GetValueAtEndOfBlock(BasicBlock *BB) {
-  assert(getIncomingPredInfo(IPI).empty() && "Unexpected Internal State");
+  assert(BM == 0 && "Unexpected Internal State");
   Value *Res = GetValueAtEndOfBlockInternal(BB);
-  assert(getIncomingPredInfo(IPI).empty() && "Unexpected Internal State");
+  assert(BM == 0 && "Unexpected Internal State");
   return Res;
 }
 
@@ -146,7 +133,7 @@ Value *SSAUpdater::GetValueAtEndOfBlock(BasicBlock *BB) {
 Value *SSAUpdater::GetValueInMiddleOfBlock(BasicBlock *BB) {
   // If there is no definition of the renamed variable in this block, just use
   // GetValueAtEndOfBlock to do our work.
-  if (!getAvailableVals(AV).count(BB))
+  if (!HasValueForBlock(BB))
     return GetValueAtEndOfBlock(BB);
 
   // Otherwise, we have the hard case.  Get the live-in values for each
@@ -193,10 +180,18 @@ Value *SSAUpdater::GetValueInMiddleOfBlock(BasicBlock *BB) {
   if (SingularValue != 0)
     return SingularValue;
 
-  // Otherwise, we do need a PHI.
-  if (Value *ExistingPHI = GetExistingPHI(BB, PredValues.begin(),
-                                          PredValues.end()))
-    return ExistingPHI;
+  // Otherwise, we do need a PHI: check to see if we already have one available
+  // in this block that produces the right value.
+  if (isa<PHINode>(BB->begin())) {
+    DenseMap<BasicBlock*, Value*> ValueMapping(PredValues.begin(),
+                                               PredValues.end());
+    PHINode *SomePHI;
+    for (BasicBlock::iterator It = BB->begin();
+         (SomePHI = dyn_cast<PHINode>(It)); ++It) {
+      if (IsEquivalentPHI(SomePHI, ValueMapping))
+        return SomePHI;
+    }
+  }
 
   // Ok, we have no way out, insert a new one now.
   PHINode *InsertedPHI = PHINode::Create(PrototypeValue->getType(),
@@ -226,7 +221,7 @@ Value *SSAUpdater::GetValueInMiddleOfBlock(BasicBlock *BB) {
 /// which use their value in the corresponding predecessor.
 void SSAUpdater::RewriteUse(Use &U) {
   Instruction *User = cast<Instruction>(U.getUser());
-  
+
   Value *V;
   if (PHINode *UserPN = dyn_cast<PHINode>(User))
     V = GetValueAtEndOfBlock(UserPN->getIncomingBlock(U));
@@ -236,161 +231,427 @@ void SSAUpdater::RewriteUse(Use &U) {
   U.set(V);
 }
 
-
 /// GetValueAtEndOfBlockInternal - Check to see if AvailableVals has an entry
 /// for the specified BB and if so, return it.  If not, construct SSA form by
-/// walking predecessors inserting PHI nodes as needed until we get to a block
-/// where the value is available.
-///
+/// first calculating the required placement of PHIs and then inserting new
+/// PHIs where needed.
 Value *SSAUpdater::GetValueAtEndOfBlockInternal(BasicBlock *BB) {
   AvailableValsTy &AvailableVals = getAvailableVals(AV);
+  if (Value *V = AvailableVals[BB])
+    return V;
+
+  // Pool allocation used internally by GetValueAtEndOfBlock.
+  BumpPtrAllocator Allocator;
+  BBMapTy BBMapObj;
+  BM = &BBMapObj;
+
+  SmallVector<BBInfo*, 100> BlockList;
+  BuildBlockList(BB, &BlockList, &Allocator);
 
-  // Query AvailableVals by doing an insertion of null.
-  std::pair<AvailableValsTy::iterator, bool> InsertRes =
-    AvailableVals.insert(std::make_pair(BB, TrackingVH<Value>()));
-
-  // Handle the case when the insertion fails because we have already seen BB.
-  if (!InsertRes.second) {
-    // If the insertion failed, there are two cases.  The first case is that the
-    // value is already available for the specified block.  If we get this, just
-    // return the value.
-    if (InsertRes.first->second != 0)
-      return InsertRes.first->second;
-
-    // Otherwise, if the value we find is null, then this is the value is not
-    // known but it is being computed elsewhere in our recursion.  This means
-    // that we have a cycle.  Handle this by inserting a PHI node and returning
-    // it.  When we get back to the first instance of the recursion we will fill
-    // in the PHI node.
-    return InsertRes.first->second =
-      PHINode::Create(PrototypeValue->getType(), PrototypeValue->getName(),
-                      &BB->front());
+  // Special case: bail out if BB is unreachable.
+  if (BlockList.size() == 0) {
+    BM = 0;
+    return UndefValue::get(PrototypeValue->getType());
   }
 
-  // Okay, the value isn't in the map and we just inserted a null in the entry
-  // to indicate that we're processing the block.  Since we have no idea what
-  // value is in this block, we have to recurse through our predecessors.
-  //
-  // While we're walking our predecessors, we keep track of them in a vector,
-  // then insert a PHI node in the end if we actually need one.  We could use a
-  // smallvector here, but that would take a lot of stack space for every level
-  // of the recursion, just use IncomingPredInfo as an explicit stack.
-  IncomingPredInfoTy &IncomingPredInfo = getIncomingPredInfo(IPI);
-  unsigned FirstPredInfoEntry = IncomingPredInfo.size();
-
-  // As we're walking the predecessors, keep track of whether they are all
-  // producing the same value.  If so, this value will capture it, if not, it
-  // will get reset to null.  We distinguish the no-predecessor case explicitly
-  // below.
-  TrackingVH<Value> ExistingValue;
+  FindDominators(&BlockList);
+  FindPHIPlacement(&BlockList);
+  FindAvailableVals(&BlockList);
 
-  // We can get our predecessor info by walking the pred_iterator list, but it
-  // is relatively slow.  If we already have PHI nodes in this block, walk one
-  // of them to get the predecessor list instead.
+  BM = 0;
+  return BBMapObj[BB]->DefBB->AvailableVal;
+}
+
+/// FindPredecessorBlocks - Put the predecessors of Info->BB into the Preds
+/// vector, set Info->NumPreds, and allocate space in Info->Preds.
+static void FindPredecessorBlocks(SSAUpdater::BBInfo *Info,
+                                  SmallVectorImpl<BasicBlock*> *Preds,
+                                  BumpPtrAllocator *Allocator) {
+  // We can get our predecessor info by walking the pred_iterator list,
+  // but it is relatively slow.  If we already have PHI nodes in this
+  // block, walk one of them to get the predecessor list instead.
+  BasicBlock *BB = Info->BB;
   if (PHINode *SomePhi = dyn_cast<PHINode>(BB->begin())) {
-    for (unsigned i = 0, e = SomePhi->getNumIncomingValues(); i != e; ++i) {
-      BasicBlock *PredBB = SomePhi->getIncomingBlock(i);
-      Value *PredVal = GetValueAtEndOfBlockInternal(PredBB);
-      IncomingPredInfo.push_back(std::make_pair(PredBB, PredVal));
+    for (unsigned PI = 0, E = SomePhi->getNumIncomingValues(); PI != E; ++PI)
+      Preds->push_back(SomePhi->getIncomingBlock(PI));
+  } else {
+    for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI)
+      Preds->push_back(*PI);
+  }
 
-      // Set ExistingValue to singular value from all predecessors so far.
-      if (i == 0)
-        ExistingValue = PredVal;
-      else if (PredVal != ExistingValue)
-        ExistingValue = 0;
+  Info->NumPreds = Preds->size();
+  Info->Preds = static_cast<SSAUpdater::BBInfo**>
+    (Allocator->Allocate(Info->NumPreds * sizeof(SSAUpdater::BBInfo*),
+                         AlignOf<SSAUpdater::BBInfo*>::Alignment));
+}
+
+/// BuildBlockList - Starting from the specified basic block, traverse back
+/// through its predecessors until reaching blocks with known values.  Create
+/// BBInfo structures for the blocks and append them to the block list.
+void SSAUpdater::BuildBlockList(BasicBlock *BB, BlockListTy *BlockList,
+                                BumpPtrAllocator *Allocator) {
+  AvailableValsTy &AvailableVals = getAvailableVals(AV);
+  BBMapTy *BBMap = getBBMap(BM);
+  SmallVector<BBInfo*, 10> RootList;
+  SmallVector<BBInfo*, 64> WorkList;
+
+  BBInfo *Info = new (*Allocator) BBInfo(BB, 0);
+  (*BBMap)[BB] = Info;
+  WorkList.push_back(Info);
+
+  // Search backward from BB, creating BBInfos along the way and stopping when
+  // reaching blocks that define the value.  Record those defining blocks on
+  // the RootList.
+  SmallVector<BasicBlock*, 10> Preds;
+  while (!WorkList.empty()) {
+    Info = WorkList.pop_back_val();
+    Preds.clear();
+    FindPredecessorBlocks(Info, &Preds, Allocator);
+
+    // Treat an unreachable predecessor as a definition with 'undef'.
+    if (Info->NumPreds == 0) {
+      Info->AvailableVal = UndefValue::get(PrototypeValue->getType());
+      Info->DefBB = Info;
+      RootList.push_back(Info);
+      continue;
     }
-  } else {
-    bool isFirstPred = true;
-    for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI) {
-      BasicBlock *PredBB = *PI;
-      Value *PredVal = GetValueAtEndOfBlockInternal(PredBB);
-      IncomingPredInfo.push_back(std::make_pair(PredBB, PredVal));
 
-      // Set ExistingValue to singular value from all predecessors so far.
-      if (isFirstPred) {
-        ExistingValue = PredVal;
-        isFirstPred = false;
-      } else if (PredVal != ExistingValue)
-        ExistingValue = 0;
+    for (unsigned p = 0; p != Info->NumPreds; ++p) {
+      BasicBlock *Pred = Preds[p];
+      // Check if BBMap already has a BBInfo for the predecessor block.
+      BBMapTy::value_type &BBMapBucket = BBMap->FindAndConstruct(Pred);
+      if (BBMapBucket.second) {
+        Info->Preds[p] = BBMapBucket.second;
+        continue;
+      }
+
+      // Create a new BBInfo for the predecessor.
+      Value *PredVal = AvailableVals.lookup(Pred);
+      BBInfo *PredInfo = new (*Allocator) BBInfo(Pred, PredVal);
+      BBMapBucket.second = PredInfo;
+      Info->Preds[p] = PredInfo;
+
+      if (PredInfo->AvailableVal) {
+        RootList.push_back(PredInfo);
+        continue;
+      }
+      WorkList.push_back(PredInfo);
+    }
+  }
+
+  // Now that we know what blocks are backwards-reachable from the starting
+  // block, do a forward depth-first traversal to assign postorder numbers
+  // to those blocks.
+  BBInfo *PseudoEntry = new (*Allocator) BBInfo(0, 0);
+  unsigned BlkNum = 1;
+
+  // Initialize the worklist with the roots from the backward traversal.
+  while (!RootList.empty()) {
+    Info = RootList.pop_back_val();
+    Info->IDom = PseudoEntry;
+    Info->BlkNum = -1;
+    WorkList.push_back(Info);
+  }
+
+  while (!WorkList.empty()) {
+    Info = WorkList.back();
+
+    if (Info->BlkNum == -2) {
+      // All the successors have been handled; assign the postorder number.
+      Info->BlkNum = BlkNum++;
+      // If not a root, put it on the BlockList.
+      if (!Info->AvailableVal)
+        BlockList->push_back(Info);
+      WorkList.pop_back();
+      continue;
+    }
+
+    // Leave this entry on the worklist, but set its BlkNum to mark that its
+    // successors have been put on the worklist.  When it returns to the top
+    // the list, after handling its successors, it will be assigned a number.
+    Info->BlkNum = -2;
+
+    // Add unvisited successors to the work list.
+    for (succ_iterator SI = succ_begin(Info->BB), E = succ_end(Info->BB);
+         SI != E; ++SI) {
+      BBInfo *SuccInfo = (*BBMap)[*SI];
+      if (!SuccInfo || SuccInfo->BlkNum)
+        continue;
+      SuccInfo->BlkNum = -1;
+      WorkList.push_back(SuccInfo);
     }
   }
+  PseudoEntry->BlkNum = BlkNum;
+}
 
-  // If there are no predecessors, then we must have found an unreachable block
-  // just return 'undef'.  Since there are no predecessors, InsertRes must not
-  // be invalidated.
-  if (IncomingPredInfo.size() == FirstPredInfoEntry)
-    return InsertRes.first->second = UndefValue::get(PrototypeValue->getType());
-
-  /// Look up BB's entry in AvailableVals.  'InsertRes' may be invalidated.  If
-  /// this block is involved in a loop, a no-entry PHI node will have been
-  /// inserted as InsertedVal.  Otherwise, we'll still have the null we inserted
-  /// above.
-  TrackingVH<Value> &InsertedVal = AvailableVals[BB];
-
-  // If the predecessor values are not all the same, then check to see if there
-  // is an existing PHI that can be used.
-  if (!ExistingValue)
-    ExistingValue = GetExistingPHI(BB,
-                                   IncomingPredInfo.begin()+FirstPredInfoEntry,
-                                   IncomingPredInfo.end());
-
-  // If there is an existing value we can use, then we don't need to insert a
-  // PHI.  This is the simple and common case.
-  if (ExistingValue) {
-    // If a PHI node got inserted, replace it with the existing value and delete
-    // it.
-    if (InsertedVal) {
-      PHINode *OldVal = cast<PHINode>(InsertedVal);
-      // Be careful about dead loops.  These RAUW's also update InsertedVal.
-      if (InsertedVal != ExistingValue)
-        OldVal->replaceAllUsesWith(ExistingValue);
-      else
-        OldVal->replaceAllUsesWith(UndefValue::get(InsertedVal->getType()));
-      OldVal->eraseFromParent();
-    } else {
-      InsertedVal = ExistingValue;
+/// IntersectDominators - This is the dataflow lattice "meet" operation for
+/// finding dominators.  Given two basic blocks, it walks up the dominator
+/// tree until it finds a common dominator of both.  It uses the postorder
+/// number of the blocks to determine how to do that.
+static SSAUpdater::BBInfo *IntersectDominators(SSAUpdater::BBInfo *Blk1,
+                                               SSAUpdater::BBInfo *Blk2) {
+  while (Blk1 != Blk2) {
+    while (Blk1->BlkNum < Blk2->BlkNum) {
+      Blk1 = Blk1->IDom;
+      if (!Blk1)
+        return Blk2;
     }
+    while (Blk2->BlkNum < Blk1->BlkNum) {
+      Blk2 = Blk2->IDom;
+      if (!Blk2)
+        return Blk1;
+    }
+  }
+  return Blk1;
+}
 
-    // Either path through the 'if' should have set InsertedVal -> ExistingVal.
-    assert((InsertedVal == ExistingValue || isa<UndefValue>(InsertedVal)) &&
-           "RAUW didn't change InsertedVal to be ExistingValue");
+/// FindDominators - Calculate the dominator tree for the subset of the CFG
+/// corresponding to the basic blocks on the BlockList.  This uses the
+/// algorithm from: "A Simple, Fast Dominance Algorithm" by Cooper, Harvey and
+/// Kennedy, published in Software--Practice and Experience, 2001, 4:1-10.
+/// Because the CFG subset does not include any edges leading into blocks that
+/// define the value, the results are not the usual dominator tree.  The CFG
+/// subset has a single pseudo-entry node with edges to a set of root nodes
+/// for blocks that define the value.  The dominators for this subset CFG are
+/// not the standard dominators but they are adequate for placing PHIs within
+/// the subset CFG.
+void SSAUpdater::FindDominators(BlockListTy *BlockList) {
+  bool Changed;
+  do {
+    Changed = false;
+    // Iterate over the list in reverse order, i.e., forward on CFG edges.
+    for (BlockListTy::reverse_iterator I = BlockList->rbegin(),
+           E = BlockList->rend(); I != E; ++I) {
+      BBInfo *Info = *I;
+
+      // Start with the first predecessor.
+      assert(Info->NumPreds > 0 && "unreachable block");
+      BBInfo *NewIDom = Info->Preds[0];
+
+      // Iterate through the block's other predecessors.
+      for (unsigned p = 1; p != Info->NumPreds; ++p) {
+        BBInfo *Pred = Info->Preds[p];
+        NewIDom = IntersectDominators(NewIDom, Pred);
+      }
+
+      // Check if the IDom value has changed.
+      if (NewIDom != Info->IDom) {
+        Info->IDom = NewIDom;
+        Changed = true;
+      }
+    }
+  } while (Changed);
+}
 
-    // Drop the entries we added in IncomingPredInfo to restore the stack.
-    IncomingPredInfo.erase(IncomingPredInfo.begin()+FirstPredInfoEntry,
-                           IncomingPredInfo.end());
-    return ExistingValue;
+/// IsDefInDomFrontier - Search up the dominator tree from Pred to IDom for
+/// any blocks containing definitions of the value.  If one is found, then the
+/// successor of Pred is in the dominance frontier for the definition, and
+/// this function returns true.
+static bool IsDefInDomFrontier(const SSAUpdater::BBInfo *Pred,
+                               const SSAUpdater::BBInfo *IDom) {
+  for (; Pred != IDom; Pred = Pred->IDom) {
+    if (Pred->DefBB == Pred)
+      return true;
   }
+  return false;
+}
 
-  // Otherwise, we do need a PHI: insert one now if we don't already have one.
-  if (InsertedVal == 0)
-    InsertedVal = PHINode::Create(PrototypeValue->getType(),
-                                  PrototypeValue->getName(), &BB->front());
+/// FindPHIPlacement - PHIs are needed in the iterated dominance frontiers of
+/// the known definitions.  Iteratively add PHIs in the dom frontiers until
+/// nothing changes.  Along the way, keep track of the nearest dominating
+/// definitions for non-PHI blocks.
+void SSAUpdater::FindPHIPlacement(BlockListTy *BlockList) {
+  bool Changed;
+  do {
+    Changed = false;
+    // Iterate over the list in reverse order, i.e., forward on CFG edges.
+    for (BlockListTy::reverse_iterator I = BlockList->rbegin(),
+           E = BlockList->rend(); I != E; ++I) {
+      BBInfo *Info = *I;
+
+      // If this block already needs a PHI, there is nothing to do here.
+      if (Info->DefBB == Info)
+        continue;
+
+      // Default to use the same def as the immediate dominator.
+      BBInfo *NewDefBB = Info->IDom->DefBB;
+      for (unsigned p = 0; p != Info->NumPreds; ++p) {
+        if (IsDefInDomFrontier(Info->Preds[p], Info->IDom)) {
+          // Need a PHI here.
+          NewDefBB = Info;
+          break;
+        }
+      }
+
+      // Check if anything changed.
+      if (NewDefBB != Info->DefBB) {
+        Info->DefBB = NewDefBB;
+        Changed = true;
+      }
+    }
+  } while (Changed);
+}
 
-  PHINode *InsertedPHI = cast<PHINode>(InsertedVal);
-  InsertedPHI->reserveOperandSpace(IncomingPredInfo.size()-FirstPredInfoEntry);
+/// FindAvailableVal - If this block requires a PHI, first check if an existing
+/// PHI matches the PHI placement and reaching definitions computed earlier,
+/// and if not, create a new PHI.  Visit all the block's predecessors to
+/// calculate the available value for each one and fill in the incoming values
+/// for a new PHI.
+void SSAUpdater::FindAvailableVals(BlockListTy *BlockList) {
+  AvailableValsTy &AvailableVals = getAvailableVals(AV);
 
-  // Fill in all the predecessors of the PHI.
-  for (IncomingPredInfoTy::iterator I =
-         IncomingPredInfo.begin()+FirstPredInfoEntry,
-       E = IncomingPredInfo.end(); I != E; ++I)
-    InsertedPHI->addIncoming(I->second, I->first);
+  // Go through the worklist in forward order (i.e., backward through the CFG)
+  // and check if existing PHIs can be used.  If not, create empty PHIs where
+  // they are needed.
+  for (BlockListTy::iterator I = BlockList->begin(), E = BlockList->end();
+       I != E; ++I) {
+    BBInfo *Info = *I;
+    // Check if there needs to be a PHI in BB.
+    if (Info->DefBB != Info)
+      continue;
+
+    // Look for an existing PHI.
+    FindExistingPHI(Info->BB, BlockList);
+    if (Info->AvailableVal)
+      continue;
+
+    PHINode *PHI = PHINode::Create(PrototypeValue->getType(),
+                                   PrototypeValue->getName(),
+                                   &Info->BB->front());
+    PHI->reserveOperandSpace(Info->NumPreds);
+    Info->AvailableVal = PHI;
+    AvailableVals[Info->BB] = PHI;
+  }
 
-  // Drop the entries we added in IncomingPredInfo to restore the stack.
-  IncomingPredInfo.erase(IncomingPredInfo.begin()+FirstPredInfoEntry,
-                         IncomingPredInfo.end());
+  // Now go back through the worklist in reverse order to fill in the arguments
+  // for any new PHIs added in the forward traversal.
+  for (BlockListTy::reverse_iterator I = BlockList->rbegin(),
+         E = BlockList->rend(); I != E; ++I) {
+    BBInfo *Info = *I;
+
+    if (Info->DefBB != Info) {
+      // Record the available value at join nodes to speed up subsequent
+      // uses of this SSAUpdater for the same value.
+      if (Info->NumPreds > 1)
+        AvailableVals[Info->BB] = Info->DefBB->AvailableVal;
+      continue;
+    }
 
-  // See if the PHI node can be merged to a single value.  This can happen in
-  // loop cases when we get a PHI of itself and one other value.
-  if (Value *ConstVal = InsertedPHI->hasConstantValue()) {
-    InsertedPHI->replaceAllUsesWith(ConstVal);
-    InsertedPHI->eraseFromParent();
-    InsertedVal = ConstVal;
-  } else {
-    DEBUG(dbgs() << "  Inserted PHI: " << *InsertedPHI << "\n");
+    // Check if this block contains a newly added PHI.
+    PHINode *PHI = dyn_cast<PHINode>(Info->AvailableVal);
+    if (!PHI || PHI->getNumIncomingValues() == Info->NumPreds)
+      continue;
+
+    // Iterate through the block's predecessors.
+    for (unsigned p = 0; p != Info->NumPreds; ++p) {
+      BBInfo *PredInfo = Info->Preds[p];
+      BasicBlock *Pred = PredInfo->BB;
+      // Skip to the nearest preceding definition.
+      if (PredInfo->DefBB != PredInfo)
+        PredInfo = PredInfo->DefBB;
+      PHI->addIncoming(PredInfo->AvailableVal, Pred);
+    }
+
+    DEBUG(dbgs() << "  Inserted PHI: " << *PHI << "\n");
 
     // If the client wants to know about all new instructions, tell it.
-    if (InsertedPHIs) InsertedPHIs->push_back(InsertedPHI);
+    if (InsertedPHIs) InsertedPHIs->push_back(PHI);
+  }
+}
+
+/// FindExistingPHI - Look through the PHI nodes in a block to see if any of
+/// them match what is needed.
+void SSAUpdater::FindExistingPHI(BasicBlock *BB, BlockListTy *BlockList) {
+  PHINode *SomePHI;
+  for (BasicBlock::iterator It = BB->begin();
+       (SomePHI = dyn_cast<PHINode>(It)); ++It) {
+    if (CheckIfPHIMatches(SomePHI)) {
+      RecordMatchingPHI(SomePHI);
+      break;
+    }
+    // Match failed: clear all the PHITag values.
+    for (BlockListTy::iterator I = BlockList->begin(), E = BlockList->end();
+         I != E; ++I)
+      (*I)->PHITag = 0;
+  }
+}
+
+/// CheckIfPHIMatches - Check if a PHI node matches the placement and values
+/// in the BBMap.
+bool SSAUpdater::CheckIfPHIMatches(PHINode *PHI) {
+  BBMapTy *BBMap = getBBMap(BM);
+  SmallVector<PHINode*, 20> WorkList;
+  WorkList.push_back(PHI);
+
+  // Mark that the block containing this PHI has been visited.
+  (*BBMap)[PHI->getParent()]->PHITag = PHI;
+
+  while (!WorkList.empty()) {
+    PHI = WorkList.pop_back_val();
+
+    // Iterate through the PHI's incoming values.
+    for (unsigned i = 0, e = PHI->getNumIncomingValues(); i != e; ++i) {
+      Value *IncomingVal = PHI->getIncomingValue(i);
+      BBInfo *PredInfo = (*BBMap)[PHI->getIncomingBlock(i)];
+      // Skip to the nearest preceding definition.
+      if (PredInfo->DefBB != PredInfo)
+        PredInfo = PredInfo->DefBB;
+
+      // Check if it matches the expected value.
+      if (PredInfo->AvailableVal) {
+        if (IncomingVal == PredInfo->AvailableVal)
+          continue;
+        return false;
+      }
+
+      // Check if the value is a PHI in the correct block.
+      PHINode *IncomingPHIVal = dyn_cast<PHINode>(IncomingVal);
+      if (!IncomingPHIVal || IncomingPHIVal->getParent() != PredInfo->BB)
+        return false;
+
+      // If this block has already been visited, check if this PHI matches.
+      if (PredInfo->PHITag) {
+        if (IncomingPHIVal == PredInfo->PHITag)
+          continue;
+        return false;
+      }
+      PredInfo->PHITag = IncomingPHIVal;
+
+      WorkList.push_back(IncomingPHIVal);
+    }
   }
+  return true;
+}
 
-  return InsertedVal;
+/// RecordMatchingPHI - For a PHI node that matches, record it and its input
+/// PHIs in both the BBMap and the AvailableVals mapping.
+void SSAUpdater::RecordMatchingPHI(PHINode *PHI) {
+  BBMapTy *BBMap = getBBMap(BM);
+  AvailableValsTy &AvailableVals = getAvailableVals(AV);
+  SmallVector<PHINode*, 20> WorkList;
+  WorkList.push_back(PHI);
+
+  // Record this PHI.
+  BasicBlock *BB = PHI->getParent();
+  AvailableVals[BB] = PHI;
+  (*BBMap)[BB]->AvailableVal = PHI;
+
+  while (!WorkList.empty()) {
+    PHI = WorkList.pop_back_val();
+
+    // Iterate through the PHI's incoming values.
+    for (unsigned i = 0, e = PHI->getNumIncomingValues(); i != e; ++i) {
+      PHINode *IncomingPHIVal = dyn_cast<PHINode>(PHI->getIncomingValue(i));
+      if (!IncomingPHIVal) continue;
+      BB = IncomingPHIVal->getParent();
+      BBInfo *Info = (*BBMap)[BB];
+      if (!Info || Info->AvailableVal)
+        continue;
+
+      // Record the PHI and add it to the worklist.
+      AvailableVals[BB] = IncomingPHIVal;
+      Info->AvailableVal = IncomingPHIVal;
+      WorkList.push_back(IncomingPHIVal);
+    }
+  }
 }
diff --git a/lib/Transforms/Utils/ValueMapper.cpp b/lib/Transforms/Utils/ValueMapper.cpp
index 60450487cd78..87ce631ca626 100644
--- a/lib/Transforms/Utils/ValueMapper.cpp
+++ b/lib/Transforms/Utils/ValueMapper.cpp
@@ -12,7 +12,7 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "llvm/Transforms/Utils/ValueMapper.h"
+#include "ValueMapper.h"
 #include "llvm/Type.h"
 #include "llvm/Constants.h"
 #include "llvm/Function.h"
@@ -20,7 +20,7 @@
 #include "llvm/ADT/SmallVector.h"
 using namespace llvm;
 
-Value *llvm::MapValue(const Value *V, ValueMapTy &VM) {
+Value *llvm::MapValue(const Value *V, ValueToValueMapTy &VM) {
   Value *&VMSlot = VM[V];
   if (VMSlot) return VMSlot;      // Does it exist in the map yet?
   
@@ -127,7 +127,7 @@ Value *llvm::MapValue(const Value *V, ValueMapTy &VM) {
 /// RemapInstruction - Convert the instruction operands from referencing the
 /// current values into those specified by ValueMap.
 ///
-void llvm::RemapInstruction(Instruction *I, ValueMapTy &ValueMap) {
+void llvm::RemapInstruction(Instruction *I, ValueToValueMapTy &ValueMap) {
   for (User::op_iterator op = I->op_begin(), E = I->op_end(); op != E; ++op) {
     Value *V = MapValue(*op, ValueMap);
     assert(V && "Referenced value not in value map!");
diff --git a/include/llvm/Transforms/Utils/ValueMapper.h b/lib/Transforms/Utils/ValueMapper.h
index ed3341364181..d61c24c6a685 100644
--- a/include/llvm/Transforms/Utils/ValueMapper.h
+++ b/lib/Transforms/Utils/ValueMapper.h
@@ -20,10 +20,10 @@
 namespace llvm {
   class Value;
   class Instruction;
-  typedef DenseMap<const Value *, Value *> ValueMapTy;
+  typedef DenseMap<const Value *, Value *> ValueToValueMapTy;
 
-  Value *MapValue(const Value *V, ValueMapTy &VM);
-  void RemapInstruction(Instruction *I, ValueMapTy &VM);
+  Value *MapValue(const Value *V, ValueToValueMapTy &VM);
+  void RemapInstruction(Instruction *I, ValueToValueMapTy &VM);
 } // End llvm namespace
 
 #endif
diff --git a/lib/VMCore/AsmWriter.cpp b/lib/VMCore/AsmWriter.cpp
index f6a6076df7bc..6c1aa5ed10c6 100644
--- a/lib/VMCore/AsmWriter.cpp
+++ b/lib/VMCore/AsmWriter.cpp
@@ -227,13 +227,15 @@ void TypePrinting::CalcTypeName(const Type *Ty,
     const StructType *STy = cast<StructType>(Ty);
     if (STy->isPacked())
       OS << '<';
-    OS << "{ ";
+    OS << '{';
     for (StructType::element_iterator I = STy->element_begin(),
          E = STy->element_end(); I != E; ++I) {
+      OS << ' ';
       CalcTypeName(*I, TypeStack, OS);
-      if (next(I) != STy->element_end())
+      if (next(I) == STy->element_end())
+        OS << ' ';
+      else
         OS << ',';
-      OS << ' ';
     }
     OS << '}';
     if (STy->isPacked())
@@ -242,13 +244,15 @@ void TypePrinting::CalcTypeName(const Type *Ty,
   }
   case Type::UnionTyID: {
     const UnionType *UTy = cast<UnionType>(Ty);
-    OS << "union { ";
+    OS << "union {";
     for (StructType::element_iterator I = UTy->element_begin(),
          E = UTy->element_end(); I != E; ++I) {
+      OS << ' ';
       CalcTypeName(*I, TypeStack, OS);
-      if (next(I) != UTy->element_end())
+      if (next(I) == UTy->element_end())
+        OS << ' ';
+      else
         OS << ',';
-      OS << ' ';
     }
     OS << '}';
     break;
diff --git a/lib/VMCore/AutoUpgrade.cpp b/lib/VMCore/AutoUpgrade.cpp
index 4d06b6668169..0144210767d8 100644
--- a/lib/VMCore/AutoUpgrade.cpp
+++ b/lib/VMCore/AutoUpgrade.cpp
@@ -19,6 +19,7 @@
 #include "llvm/IntrinsicInst.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/IRBuilder.h"
 #include <cstring>
 using namespace llvm;
 
@@ -277,8 +278,13 @@ static bool UpgradeIntrinsicFunction1(Function *F, Function *&NewFn) {
       // Calls to these intrinsics are transformed into vector multiplies.
       NewFn = 0;
       return true;
+    } else if (Name.compare(5, 18, "x86.ssse3.palign.r", 18) == 0 ||
+               Name.compare(5, 22, "x86.ssse3.palign.r.128", 22) == 0) {
+      // Calls to these intrinsics are transformed into vector shuffles, shifts,
+      // or 0.
+      NewFn = 0;
+      return true;           
     }
-    
 
     break;
   }
@@ -420,6 +426,118 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) {
         
       // Remove upgraded multiply.
       CI->eraseFromParent();
+    } else if (F->getName() == "llvm.x86.ssse3.palign.r") {
+      Value *Op1 = CI->getOperand(1);
+      Value *Op2 = CI->getOperand(2);
+      Value *Op3 = CI->getOperand(3);
+      unsigned shiftVal = cast<ConstantInt>(Op3)->getZExtValue();
+      Value *Rep;
+      IRBuilder<> Builder(C);
+      Builder.SetInsertPoint(CI->getParent(), CI);
+
+      // If palignr is shifting the pair of input vectors less than 9 bytes,
+      // emit a shuffle instruction.
+      if (shiftVal <= 8) {
+        const Type *IntTy = Type::getInt32Ty(C);
+        const Type *EltTy = Type::getInt8Ty(C);
+        const Type *VecTy = VectorType::get(EltTy, 8);
+        
+        Op2 = Builder.CreateBitCast(Op2, VecTy);
+        Op1 = Builder.CreateBitCast(Op1, VecTy);
+
+        llvm::SmallVector<llvm::Constant*, 8> Indices;
+        for (unsigned i = 0; i != 8; ++i)
+          Indices.push_back(ConstantInt::get(IntTy, shiftVal + i));
+
+        Value *SV = ConstantVector::get(Indices.begin(), Indices.size());
+        Rep = Builder.CreateShuffleVector(Op2, Op1, SV, "palignr");
+        Rep = Builder.CreateBitCast(Rep, F->getReturnType());
+      }
+
+      // If palignr is shifting the pair of input vectors more than 8 but less
+      // than 16 bytes, emit a logical right shift of the destination.
+      else if (shiftVal < 16) {
+        // MMX has these as 1 x i64 vectors for some odd optimization reasons.
+        const Type *EltTy = Type::getInt64Ty(C);
+        const Type *VecTy = VectorType::get(EltTy, 1);
+
+        Op1 = Builder.CreateBitCast(Op1, VecTy, "cast");
+        Op2 = ConstantInt::get(VecTy, (shiftVal-8) * 8);
+
+        // create i32 constant
+        Function *I =
+          Intrinsic::getDeclaration(F->getParent(), Intrinsic::x86_mmx_psrl_q);
+        Rep = Builder.CreateCall2(I, Op1, Op2, "palignr");
+      }
+
+      // If palignr is shifting the pair of vectors more than 32 bytes, emit zero.
+      else {
+        Rep = Constant::getNullValue(F->getReturnType());
+      }
+      
+      // Replace any uses with our new instruction.
+      if (!CI->use_empty())
+        CI->replaceAllUsesWith(Rep);
+        
+      // Remove upgraded instruction.
+      CI->eraseFromParent();
+      
+    } else if (F->getName() == "llvm.x86.ssse3.palign.r.128") {
+      Value *Op1 = CI->getOperand(1);
+      Value *Op2 = CI->getOperand(2);
+      Value *Op3 = CI->getOperand(3);
+      unsigned shiftVal = cast<ConstantInt>(Op3)->getZExtValue();
+      Value *Rep;
+      IRBuilder<> Builder(C);
+      Builder.SetInsertPoint(CI->getParent(), CI);
+
+      // If palignr is shifting the pair of input vectors less than 17 bytes,
+      // emit a shuffle instruction.
+      if (shiftVal <= 16) {
+        const Type *IntTy = Type::getInt32Ty(C);
+        const Type *EltTy = Type::getInt8Ty(C);
+        const Type *VecTy = VectorType::get(EltTy, 16);
+        
+        Op2 = Builder.CreateBitCast(Op2, VecTy);
+        Op1 = Builder.CreateBitCast(Op1, VecTy);
+
+        llvm::SmallVector<llvm::Constant*, 16> Indices;
+        for (unsigned i = 0; i != 16; ++i)
+          Indices.push_back(ConstantInt::get(IntTy, shiftVal + i));
+
+        Value *SV = ConstantVector::get(Indices.begin(), Indices.size());
+        Rep = Builder.CreateShuffleVector(Op2, Op1, SV, "palignr");
+        Rep = Builder.CreateBitCast(Rep, F->getReturnType());
+      }
+
+      // If palignr is shifting the pair of input vectors more than 16 but less
+      // than 32 bytes, emit a logical right shift of the destination.
+      else if (shiftVal < 32) {
+        const Type *EltTy = Type::getInt64Ty(C);
+        const Type *VecTy = VectorType::get(EltTy, 2);
+        const Type *IntTy = Type::getInt32Ty(C);
+
+        Op1 = Builder.CreateBitCast(Op1, VecTy, "cast");
+        Op2 = ConstantInt::get(IntTy, (shiftVal-16) * 8);
+
+        // create i32 constant
+        Function *I =
+          Intrinsic::getDeclaration(F->getParent(), Intrinsic::x86_sse2_psrl_dq);
+        Rep = Builder.CreateCall2(I, Op1, Op2, "palignr");
+      }
+
+      // If palignr is shifting the pair of vectors more than 32 bytes, emit zero.
+      else {
+        Rep = Constant::getNullValue(F->getReturnType());
+      }
+      
+      // Replace any uses with our new instruction.
+      if (!CI->use_empty())
+        CI->replaceAllUsesWith(Rep);
+        
+      // Remove upgraded instruction.
+      CI->eraseFromParent();
+      
     } else {
       llvm_unreachable("Unknown function for CallInst upgrade.");
     }
diff --git a/lib/VMCore/Constants.cpp b/lib/VMCore/Constants.cpp
index 1553bd51342d..00b009401dcc 100644
--- a/lib/VMCore/Constants.cpp
+++ b/lib/VMCore/Constants.cpp
@@ -1224,20 +1224,20 @@ Constant *ConstantExpr::getCast(unsigned oc, Constant *C, const Type *Ty) {
 
 Constant *ConstantExpr::getZExtOrBitCast(Constant *C, const Type *Ty) {
   if (C->getType()->getScalarSizeInBits() == Ty->getScalarSizeInBits())
-    return getCast(Instruction::BitCast, C, Ty);
-  return getCast(Instruction::ZExt, C, Ty);
+    return getBitCast(C, Ty);
+  return getZExt(C, Ty);
 }
 
 Constant *ConstantExpr::getSExtOrBitCast(Constant *C, const Type *Ty) {
   if (C->getType()->getScalarSizeInBits() == Ty->getScalarSizeInBits())
-    return getCast(Instruction::BitCast, C, Ty);
-  return getCast(Instruction::SExt, C, Ty);
+    return getBitCast(C, Ty);
+  return getSExt(C, Ty);
 }
 
 Constant *ConstantExpr::getTruncOrBitCast(Constant *C, const Type *Ty) {
   if (C->getType()->getScalarSizeInBits() == Ty->getScalarSizeInBits())
-    return getCast(Instruction::BitCast, C, Ty);
-  return getCast(Instruction::Trunc, C, Ty);
+    return getBitCast(C, Ty);
+  return getTrunc(C, Ty);
 }
 
 Constant *ConstantExpr::getPointerCast(Constant *S, const Type *Ty) {
@@ -1245,8 +1245,8 @@ Constant *ConstantExpr::getPointerCast(Constant *S, const Type *Ty) {
   assert((Ty->isIntegerTy() || Ty->isPointerTy()) && "Invalid cast");
 
   if (Ty->isIntegerTy())
-    return getCast(Instruction::PtrToInt, S, Ty);
-  return getCast(Instruction::BitCast, S, Ty);
+    return getPtrToInt(S, Ty);
+  return getBitCast(S, Ty);
 }
 
 Constant *ConstantExpr::getIntegerCast(Constant *C, const Type *Ty, 
@@ -1450,12 +1450,6 @@ Constant *ConstantExpr::getCompareTy(unsigned short predicate,
 
 Constant *ConstantExpr::get(unsigned Opcode, Constant *C1, Constant *C2,
                             unsigned Flags) {
-  // API compatibility: Adjust integer opcodes to floating-point opcodes.
-  if (C1->getType()->isFPOrFPVectorTy()) {
-    if (Opcode == Instruction::Add) Opcode = Instruction::FAdd;
-    else if (Opcode == Instruction::Sub) Opcode = Instruction::FSub;
-    else if (Opcode == Instruction::Mul) Opcode = Instruction::FMul;
-  }
 #ifndef NDEBUG
   switch (Opcode) {
   case Instruction::Add:
@@ -1523,8 +1517,8 @@ Constant* ConstantExpr::getSizeOf(const Type* Ty) {
   Constant *GEPIdx = ConstantInt::get(Type::getInt32Ty(Ty->getContext()), 1);
   Constant *GEP = getGetElementPtr(
                  Constant::getNullValue(PointerType::getUnqual(Ty)), &GEPIdx, 1);
-  return getCast(Instruction::PtrToInt, GEP, 
-                 Type::getInt64Ty(Ty->getContext()));
+  return getPtrToInt(GEP, 
+                     Type::getInt64Ty(Ty->getContext()));
 }
 
 Constant* ConstantExpr::getAlignOf(const Type* Ty) {
@@ -1537,8 +1531,8 @@ Constant* ConstantExpr::getAlignOf(const Type* Ty) {
   Constant *One = ConstantInt::get(Type::getInt32Ty(Ty->getContext()), 1);
   Constant *Indices[2] = { Zero, One };
   Constant *GEP = getGetElementPtr(NullPtr, Indices, 2);
-  return getCast(Instruction::PtrToInt, GEP,
-                 Type::getInt64Ty(Ty->getContext()));
+  return getPtrToInt(GEP,
+                     Type::getInt64Ty(Ty->getContext()));
 }
 
 Constant* ConstantExpr::getOffsetOf(const StructType* STy, unsigned FieldNo) {
@@ -1555,8 +1549,8 @@ Constant* ConstantExpr::getOffsetOf(const Type* Ty, Constant *FieldNo) {
   };
   Constant *GEP = getGetElementPtr(
                 Constant::getNullValue(PointerType::getUnqual(Ty)), GEPIdx, 2);
-  return getCast(Instruction::PtrToInt, GEP,
-                 Type::getInt64Ty(Ty->getContext()));
+  return getPtrToInt(GEP,
+                     Type::getInt64Ty(Ty->getContext()));
 }
 
 Constant *ConstantExpr::getCompare(unsigned short pred, 
@@ -1840,9 +1834,6 @@ Constant *ConstantExpr::getExtractValue(Constant *Agg,
 }
 
 Constant* ConstantExpr::getNeg(Constant* C) {
-  // API compatibility: Adjust integer opcodes to floating-point opcodes.
-  if (C->getType()->isFPOrFPVectorTy())
-    return getFNeg(C);
   assert(C->getType()->isIntOrIntVectorTy() &&
          "Cannot NEG a nonintegral value!");
   return get(Instruction::Sub,
diff --git a/lib/VMCore/Core.cpp b/lib/VMCore/Core.cpp
index 634407ca13ff..bbf1375ab0c7 100644
--- a/lib/VMCore/Core.cpp
+++ b/lib/VMCore/Core.cpp
@@ -119,6 +119,11 @@ void LLVMDumpModule(LLVMModuleRef M) {
   unwrap(M)->dump();
 }
 
+/*--.. Operations on inline assembler ......................................--*/
+void LLVMSetModuleInlineAsm(LLVMModuleRef M, const char *Asm) {
+  unwrap(M)->setModuleInlineAsm(StringRef(Asm));
+}
+
 
 /*===-- Operations on types -----------------------------------------------===*/
 
@@ -322,8 +327,7 @@ LLVMTypeRef LLVMUnionTypeInContext(LLVMContextRef C, LLVMTypeRef *ElementTypes,
   return wrap(UnionType::get(&Tys[0], Tys.size()));
 }
 
-LLVMTypeRef LLVMUnionType(LLVMTypeRef *ElementTypes,
-                           unsigned ElementCount, int Packed) {
+LLVMTypeRef LLVMUnionType(LLVMTypeRef *ElementTypes, unsigned ElementCount) {
   return LLVMUnionTypeInContext(LLVMGetGlobalContext(), ElementTypes,
                                 ElementCount);
 }
diff --git a/lib/VMCore/Dominators.cpp b/lib/VMCore/Dominators.cpp
index 34417505814f..10a866fab622 100644
--- a/lib/VMCore/Dominators.cpp
+++ b/lib/VMCore/Dominators.cpp
@@ -30,9 +30,9 @@ using namespace llvm;
 
 // Always verify dominfo if expensive checking is enabled.
 #ifdef XDEBUG
-bool VerifyDomInfo = true;
+static bool VerifyDomInfo = true;
 #else
-bool VerifyDomInfo = false;
+static bool VerifyDomInfo = false;
 #endif
 static cl::opt<bool,true>
 VerifyDomInfoX("verify-dom-info", cl::location(VerifyDomInfo),
@@ -119,7 +119,7 @@ void DominanceFrontier::verifyAnalysis() const {
   assert(!compare(OtherDF) && "Invalid DominanceFrontier info!");
 }
 
-// NewBB is split and now it has one successor. Update dominace frontier to
+// NewBB is split and now it has one successor. Update dominance frontier to
 // reflect this change.
 void DominanceFrontier::splitBlock(BasicBlock *NewBB) {
   assert(NewBB->getTerminator()->getNumSuccessors() == 1
@@ -129,7 +129,7 @@ void DominanceFrontier::splitBlock(BasicBlock *NewBB) {
   SmallVector<BasicBlock*, 8> PredBlocks;
   for (pred_iterator PI = pred_begin(NewBB), PE = pred_end(NewBB);
        PI != PE; ++PI)
-      PredBlocks.push_back(*PI);  
+    PredBlocks.push_back(*PI);  
 
   if (PredBlocks.empty())
     // If NewBB does not have any predecessors then it is a entry block.
diff --git a/lib/VMCore/Instructions.cpp b/lib/VMCore/Instructions.cpp
index 4609a64213bf..f64b220c3fde 100644
--- a/lib/VMCore/Instructions.cpp
+++ b/lib/VMCore/Instructions.cpp
@@ -30,80 +30,6 @@ using namespace llvm;
 //                            CallSite Class
 //===----------------------------------------------------------------------===//
 
-#define CALLSITE_DELEGATE_GETTER(METHOD) \
-  Instruction *II = getInstruction();    \
-  return isCall()                        \
-    ? cast<CallInst>(II)->METHOD         \
-    : cast<InvokeInst>(II)->METHOD
-
-#define CALLSITE_DELEGATE_SETTER(METHOD) \
-  Instruction *II = getInstruction();    \
-  if (isCall())                          \
-    cast<CallInst>(II)->METHOD;          \
-  else                                   \
-    cast<InvokeInst>(II)->METHOD
-
-CallingConv::ID CallSite::getCallingConv() const {
-  CALLSITE_DELEGATE_GETTER(getCallingConv());
-}
-void CallSite::setCallingConv(CallingConv::ID CC) {
-  CALLSITE_DELEGATE_SETTER(setCallingConv(CC));
-}
-const AttrListPtr &CallSite::getAttributes() const {
-  CALLSITE_DELEGATE_GETTER(getAttributes());
-}
-void CallSite::setAttributes(const AttrListPtr &PAL) {
-  CALLSITE_DELEGATE_SETTER(setAttributes(PAL));
-}
-bool CallSite::paramHasAttr(uint16_t i, Attributes attr) const {
-  CALLSITE_DELEGATE_GETTER(paramHasAttr(i, attr));
-}
-uint16_t CallSite::getParamAlignment(uint16_t i) const {
-  CALLSITE_DELEGATE_GETTER(getParamAlignment(i));
-}
-
-/// @brief Return true if the call should not be inlined.
-bool CallSite::isNoInline() const {
-  CALLSITE_DELEGATE_GETTER(isNoInline());
-}
-
-void CallSite::setIsNoInline(bool Value) {
-  CALLSITE_DELEGATE_GETTER(setIsNoInline(Value));
-}
-
-
-bool CallSite::doesNotAccessMemory() const {
-  CALLSITE_DELEGATE_GETTER(doesNotAccessMemory());
-}
-void CallSite::setDoesNotAccessMemory(bool doesNotAccessMemory) {
-  CALLSITE_DELEGATE_SETTER(setDoesNotAccessMemory(doesNotAccessMemory));
-}
-bool CallSite::onlyReadsMemory() const {
-  CALLSITE_DELEGATE_GETTER(onlyReadsMemory());
-}
-void CallSite::setOnlyReadsMemory(bool onlyReadsMemory) {
-  CALLSITE_DELEGATE_SETTER(setOnlyReadsMemory(onlyReadsMemory));
-}
-bool CallSite::doesNotReturn() const {
- CALLSITE_DELEGATE_GETTER(doesNotReturn());
-}
-void CallSite::setDoesNotReturn(bool doesNotReturn) {
-  CALLSITE_DELEGATE_SETTER(setDoesNotReturn(doesNotReturn));
-}
-bool CallSite::doesNotThrow() const {
-  CALLSITE_DELEGATE_GETTER(doesNotThrow());
-}
-void CallSite::setDoesNotThrow(bool doesNotThrow) {
-  CALLSITE_DELEGATE_SETTER(setDoesNotThrow(doesNotThrow));
-}
-
-bool CallSite::hasArgument(const Value *Arg) const {
-  for (arg_iterator AI = this->arg_begin(), E = this->arg_end(); AI != E; ++AI)
-    if (AI->get() == Arg)
-      return true;
-  return false;
-}
-
 User::op_iterator CallSite::getCallee() const {
   Instruction *II(getInstruction());
   return isCall()
@@ -111,9 +37,6 @@ User::op_iterator CallSite::getCallee() const {
     : cast<InvokeInst>(II)->op_end() - 3; // Skip BB, BB, Function
 }
 
-#undef CALLSITE_DELEGATE_GETTER
-#undef CALLSITE_DELEGATE_SETTER
-
 //===----------------------------------------------------------------------===//
 //                            TerminatorInst Class
 //===----------------------------------------------------------------------===//
@@ -1639,43 +1562,29 @@ const Type* ExtractValueInst::getIndexedType(const Type *Agg,
 //                             BinaryOperator Class
 //===----------------------------------------------------------------------===//
 
-/// AdjustIType - Map Add, Sub, and Mul to FAdd, FSub, and FMul when the
-/// type is floating-point, to help provide compatibility with an older API.
-///
-static BinaryOperator::BinaryOps AdjustIType(BinaryOperator::BinaryOps iType,
-                                             const Type *Ty) {
-  // API compatibility: Adjust integer opcodes to floating-point opcodes.
-  if (Ty->isFPOrFPVectorTy()) {
-    if (iType == BinaryOperator::Add) iType = BinaryOperator::FAdd;
-    else if (iType == BinaryOperator::Sub) iType = BinaryOperator::FSub;
-    else if (iType == BinaryOperator::Mul) iType = BinaryOperator::FMul;
-  }
-  return iType;
-}
-
 BinaryOperator::BinaryOperator(BinaryOps iType, Value *S1, Value *S2,
                                const Type *Ty, const Twine &Name,
                                Instruction *InsertBefore)
-  : Instruction(Ty, AdjustIType(iType, Ty),
+  : Instruction(Ty, iType,
                 OperandTraits<BinaryOperator>::op_begin(this),
                 OperandTraits<BinaryOperator>::operands(this),
                 InsertBefore) {
   Op<0>() = S1;
   Op<1>() = S2;
-  init(AdjustIType(iType, Ty));
+  init(iType);
   setName(Name);
 }
 
 BinaryOperator::BinaryOperator(BinaryOps iType, Value *S1, Value *S2, 
                                const Type *Ty, const Twine &Name,
                                BasicBlock *InsertAtEnd)
-  : Instruction(Ty, AdjustIType(iType, Ty),
+  : Instruction(Ty, iType,
                 OperandTraits<BinaryOperator>::op_begin(this),
                 OperandTraits<BinaryOperator>::operands(this),
                 InsertAtEnd) {
   Op<0>() = S1;
   Op<1>() = S2;
-  init(AdjustIType(iType, Ty));
+  init(iType);
   setName(Name);
 }
 
@@ -2060,7 +1969,7 @@ unsigned CastInst::isEliminableCastPair(
   // FPEXT         <       FloatPt      n/a        FloatPt      n/a   
   // PTRTOINT     n/a      Pointer      n/a        Integral   Unsigned
   // INTTOPTR     n/a      Integral   Unsigned     Pointer      n/a
-  // BITCONVERT    =       FirstClass   n/a       FirstClass    n/a   
+  // BITCAST       =       FirstClass   n/a       FirstClass    n/a   
   //
   // NOTE: some transforms are safe, but we consider them to be non-profitable.
   // For example, we could merge "fptoui double to i32" + "zext i32 to i64",
diff --git a/lib/VMCore/LLVMContext.cpp b/lib/VMCore/LLVMContext.cpp
index 3244f2842c4f..4d61363b9394 100644
--- a/lib/VMCore/LLVMContext.cpp
+++ b/lib/VMCore/LLVMContext.cpp
@@ -17,6 +17,7 @@
 #include "llvm/Constants.h"
 #include "llvm/Instruction.h"
 #include "llvm/Support/ManagedStatic.h"
+#include "llvm/Support/SourceMgr.h"
 #include "LLVMContextImpl.h"
 using namespace llvm;
 
@@ -33,6 +34,10 @@ LLVMContext::LLVMContext() : pImpl(new LLVMContextImpl(*this)) {
 }
 LLVMContext::~LLVMContext() { delete pImpl; }
 
+//===----------------------------------------------------------------------===//
+// Recoverable Backend Errors
+//===----------------------------------------------------------------------===//
+
 void LLVMContext::setInlineAsmDiagnosticHandler(void *DiagHandler, 
                                                 void *DiagContext) {
   pImpl->InlineAsmDiagHandler = DiagHandler;
@@ -51,6 +56,39 @@ void *LLVMContext::getInlineAsmDiagnosticContext() const {
   return pImpl->InlineAsmDiagContext;
 }
 
+void LLVMContext::emitError(StringRef ErrorStr) {
+  emitError(0U, ErrorStr);
+}
+
+void LLVMContext::emitError(const Instruction *I, StringRef ErrorStr) {
+  unsigned LocCookie = 0;
+  if (const MDNode *SrcLoc = I->getMetadata("srcloc")) {
+    if (SrcLoc->getNumOperands() != 0)
+      if (const ConstantInt *CI = dyn_cast<ConstantInt>(SrcLoc->getOperand(0)))
+        LocCookie = CI->getZExtValue();
+  }
+  return emitError(LocCookie, ErrorStr);
+}
+
+void LLVMContext::emitError(unsigned LocCookie, StringRef ErrorStr) {
+  // If there is no error handler installed, just print the error and exit.
+  if (pImpl->InlineAsmDiagHandler == 0) {
+    errs() << "error: " << ErrorStr << "\n";
+    exit(1);
+  }
+  
+  // If we do have an error handler, we can report the error and keep going.
+  SMDiagnostic Diag("", "error: " + ErrorStr.str());
+  
+  ((SourceMgr::DiagHandlerTy)(intptr_t)pImpl->InlineAsmDiagHandler)
+      (Diag, pImpl->InlineAsmDiagContext, LocCookie);
+  
+}
+
+//===----------------------------------------------------------------------===//
+// Metadata Kind Uniquing
+//===----------------------------------------------------------------------===//
+
 #ifndef NDEBUG
 /// isValidName - Return true if Name is a valid custom metadata handler name.
 static bool isValidName(StringRef MDName) {
diff --git a/lib/VMCore/LLVMContextImpl.cpp b/lib/VMCore/LLVMContextImpl.cpp
index e71157f44020..9e41a0815608 100644
--- a/lib/VMCore/LLVMContextImpl.cpp
+++ b/lib/VMCore/LLVMContextImpl.cpp
@@ -13,6 +13,7 @@
 
 #include "LLVMContextImpl.h"
 #include <algorithm>
+using namespace llvm;
 
 LLVMContextImpl::LLVMContextImpl(LLVMContext &C)
   : TheTrueVal(0), TheFalseVal(0),
diff --git a/lib/VMCore/LeaksContext.h b/lib/VMCore/LeaksContext.h
index abff090b8796..b9e59d46b7ad 100644
--- a/lib/VMCore/LeaksContext.h
+++ b/lib/VMCore/LeaksContext.h
@@ -14,7 +14,8 @@
 
 #include "llvm/Value.h"
 #include "llvm/ADT/SmallPtrSet.h"
-using namespace llvm;
+
+namespace llvm {
 
 template <class T>
 struct PrinterTrait {
@@ -87,3 +88,5 @@ private:
   const T* Cache;
   const char* Name;
 };
+
+}
diff --git a/lib/VMCore/Makefile b/lib/VMCore/Makefile
index 4395ecfda05b..03a4fc707deb 100644
--- a/lib/VMCore/Makefile
+++ b/lib/VMCore/Makefile
@@ -1,4 +1,4 @@
-##===- lib/VMCore/Makefile ------------------------------*- Makefile -*-===##
+##===- lib/VMCore/Makefile ---------------------------------*- Makefile -*-===##
 #
 #                     The LLVM Compiler Infrastructure
 #
diff --git a/lib/VMCore/Metadata.cpp b/lib/VMCore/Metadata.cpp
index 72de0321c3aa..092fe00a5369 100644
--- a/lib/VMCore/Metadata.cpp
+++ b/lib/VMCore/Metadata.cpp
@@ -178,6 +178,13 @@ void MDNode::destroy() {
   free(this);
 }
 
+/// isFunctionLocalValue - Return true if this is a value that would require a
+/// function-local MDNode.
+static bool isFunctionLocalValue(Value *V) {
+  return isa<Instruction>(V) || isa<Argument>(V) || isa<BasicBlock>(V) ||
+         (isa<MDNode>(V) && cast<MDNode>(V)->isFunctionLocal());
+}
+
 MDNode *MDNode::getMDNode(LLVMContext &Context, Value *const *Vals,
                           unsigned NumVals, FunctionLocalness FL,
                           bool Insert) {
@@ -188,8 +195,7 @@ MDNode *MDNode::getMDNode(LLVMContext &Context, Value *const *Vals,
     for (unsigned i = 0; i != NumVals; ++i) {
       Value *V = Vals[i];
       if (!V) continue;
-      if (isa<Instruction>(V) || isa<Argument>(V) || isa<BasicBlock>(V) ||
-          (isa<MDNode>(V) && cast<MDNode>(V)->isFunctionLocal())) {
+      if (isFunctionLocalValue(V)) {
         isFunctionLocal = true;
         break;
       }
@@ -262,6 +268,13 @@ void MDNode::setIsNotUniqued() {
 void MDNode::replaceOperand(MDNodeOperand *Op, Value *To) {
   Value *From = *Op;
 
+  // If is possible that someone did GV->RAUW(inst), replacing a global variable
+  // with an instruction or some other function-local object.  If this is a
+  // non-function-local MDNode, it can't point to a function-local object.
+  // Handle this case by implicitly dropping the MDNode reference to null.
+  if (!isFunctionLocal() && To && isFunctionLocalValue(To))
+    To = 0;
+  
   if (From == To)
     return;
 
diff --git a/lib/VMCore/Pass.cpp b/lib/VMCore/Pass.cpp
index 6b941f34996e..a60877db2f62 100644
--- a/lib/VMCore/Pass.cpp
+++ b/lib/VMCore/Pass.cpp
@@ -318,6 +318,8 @@ static PassRegistrar *getPassRegistrar() {
   return PassRegistrarObj;
 }
 
+namespace {
+
 // FIXME: We use ManagedCleanup to erase the pass registrar on shutdown.
 // Unfortunately, passes are registered with static ctors, and having
 // llvm_shutdown clear this map prevents successful ressurection after 
@@ -329,7 +331,9 @@ void cleanupPassRegistrar(void*) {
     PassRegistrarObj = 0;
   }
 }
-ManagedCleanup<&cleanupPassRegistrar> registrarCleanup;
+ManagedCleanup<&cleanupPassRegistrar> registrarCleanup ATTRIBUTE_USED;
+
+}
 
 // getPassInfo - Return the PassInfo data structure that corresponds to this
 // pass...
diff --git a/lib/VMCore/PassManager.cpp b/lib/VMCore/PassManager.cpp
index 6ca35ac0260f..b28fdebd5232 100644
--- a/lib/VMCore/PassManager.cpp
+++ b/lib/VMCore/PassManager.cpp
@@ -1293,9 +1293,8 @@ void FunctionPassManager::add(Pass *P) {
 bool FunctionPassManager::run(Function &F) {
   if (F.isMaterializable()) {
     std::string errstr;
-    if (F.Materialize(&errstr)) {
-      llvm_report_error("Error reading bitcode file: " + errstr);
-    }
+    if (F.Materialize(&errstr))
+      report_fatal_error("Error reading bitcode file: " + Twine(errstr));
   }
   return FPM->run(F);
 }
diff --git a/lib/VMCore/Type.cpp b/lib/VMCore/Type.cpp
index 5f9c11fc0ed9..845b523c2421 100644
--- a/lib/VMCore/Type.cpp
+++ b/lib/VMCore/Type.cpp
@@ -380,6 +380,10 @@ const Type *Type::getPPC_FP128Ty(LLVMContext &C) {
   return &C.pImpl->PPC_FP128Ty;
 }
 
+const IntegerType *Type::getIntNTy(LLVMContext &C, unsigned N) {
+  return IntegerType::get(C, N);
+}
+
 const IntegerType *Type::getInt1Ty(LLVMContext &C) {
   return &C.pImpl->Int1Ty;
 }
@@ -420,6 +424,10 @@ const PointerType *Type::getPPC_FP128PtrTy(LLVMContext &C, unsigned AS) {
   return getPPC_FP128Ty(C)->getPointerTo(AS);
 }
 
+const PointerType *Type::getIntNPtrTy(LLVMContext &C, unsigned N, unsigned AS) {
+  return getIntNTy(C, N)->getPointerTo(AS);
+}
+
 const PointerType *Type::getInt1PtrTy(LLVMContext &C, unsigned AS) {
   return getInt1Ty(C)->getPointerTo(AS);
 }
diff --git a/lib/VMCore/TypeSymbolTable.cpp b/lib/VMCore/TypeSymbolTable.cpp
index b4daf0f63144..d68a44bd6711 100644
--- a/lib/VMCore/TypeSymbolTable.cpp
+++ b/lib/VMCore/TypeSymbolTable.cpp
@@ -126,13 +126,15 @@ void TypeSymbolTable::refineAbstractType(const DerivedType *OldType,
   // faster to remove them all in one pass.
   //
   for (iterator I = begin(), E = end(); I != E; ++I) {
-    if (I->second == (Type*)OldType) {  // FIXME when Types aren't const.
+    // FIXME when Types aren't const.
+    if (I->second == const_cast<DerivedType *>(OldType)) {
 #if DEBUG_ABSTYPE
       dbgs() << "Removing type " << OldType->getDescription() << "\n";
 #endif
       OldType->removeAbstractTypeUser(this);
 
-      I->second = (Type*)NewType;  // TODO FIXME when types aren't const
+      // TODO FIXME when types aren't const
+      I->second = const_cast<Type *>(NewType);
       if (NewType->isAbstract()) {
 #if DEBUG_ABSTYPE
         dbgs() << "Added type " << NewType->getDescription() << "\n";
diff --git a/lib/VMCore/Verifier.cpp b/lib/VMCore/Verifier.cpp
index c18168d8a5c7..6ad427218d7b 100644
--- a/lib/VMCore/Verifier.cpp
+++ b/lib/VMCore/Verifier.cpp
@@ -93,7 +93,7 @@ namespace {  // Anonymous namespace for class
       }
 
       if (Broken)
-        llvm_report_error("Broken module, no Basic Block terminator!");
+        report_fatal_error("Broken module, no Basic Block terminator!");
 
       return false;
     }
@@ -176,6 +176,10 @@ namespace {
     /// Types - keep track of the types that have been checked already.
     TypeSet Types;
 
+    /// MDNodes - keep track of the metadata nodes that have been checked
+    /// already.
+    SmallPtrSet<MDNode *, 32> MDNodes;
+
     Verifier()
       : FunctionPass(&ID), 
       Broken(false), RealPass(true), action(AbortProcessAction),
@@ -244,6 +248,10 @@ namespace {
            I != E; ++I)
         visitGlobalAlias(*I);
 
+      for (Module::named_metadata_iterator I = M.named_metadata_begin(),
+           E = M.named_metadata_end(); I != E; ++I)
+        visitNamedMDNode(*I);
+
       // If the module is broken, abort at this time.
       return abortIfBroken();
     }
@@ -284,6 +292,8 @@ namespace {
     void visitGlobalValue(GlobalValue &GV);
     void visitGlobalVariable(GlobalVariable &GV);
     void visitGlobalAlias(GlobalAlias &GA);
+    void visitNamedMDNode(NamedMDNode &NMD);
+    void visitMDNode(MDNode &MD, Function *F);
     void visitFunction(Function &F);
     void visitBasicBlock(BasicBlock &BB);
     using InstVisitor<Verifier>::visit;
@@ -333,8 +343,6 @@ namespace {
                           int VT, unsigned ArgNo, std::string &Suffix);
     void VerifyIntrinsicPrototype(Intrinsic::ID ID, Function *F,
                                   unsigned RetNum, unsigned ParamNum, ...);
-    void VerifyFunctionLocalMetadata(MDNode *N, Function *F,
-                                     SmallPtrSet<MDNode *, 32> &Visited);
     void VerifyParameterAttrs(Attributes Attrs, const Type *Ty,
                               bool isReturnValue, const Value *V);
     void VerifyFunctionAttrs(const FunctionType *FT, const AttrListPtr &Attrs,
@@ -489,6 +497,54 @@ void Verifier::visitGlobalAlias(GlobalAlias &GA) {
   visitGlobalValue(GA);
 }
 
+void Verifier::visitNamedMDNode(NamedMDNode &NMD) {
+  for (unsigned i = 0, e = NMD.getNumOperands(); i != e; ++i) {
+    MDNode *MD = NMD.getOperand(i);
+    if (!MD)
+      continue;
+
+    Assert2(!MD->isFunctionLocal(),
+            "Named metadata operand cannot be function local!", &NMD, MD);
+    visitMDNode(*MD, 0);
+  }
+}
+
+void Verifier::visitMDNode(MDNode &MD, Function *F) {
+  // Only visit each node once.  Metadata can be mutually recursive, so this
+  // avoids infinite recursion here, as well as being an optimization.
+  if (!MDNodes.insert(&MD))
+    return;
+
+  for (unsigned i = 0, e = MD.getNumOperands(); i != e; ++i) {
+    Value *Op = MD.getOperand(i);
+    if (!Op)
+      continue;
+    if (isa<Constant>(Op) || isa<MDString>(Op) || isa<NamedMDNode>(Op))
+      continue;
+    if (MDNode *N = dyn_cast<MDNode>(Op)) {
+      Assert2(MD.isFunctionLocal() || !N->isFunctionLocal(),
+              "Global metadata operand cannot be function local!", &MD, N);
+      visitMDNode(*N, F);
+      continue;
+    }
+    Assert2(MD.isFunctionLocal(), "Invalid operand for global metadata!", &MD, Op);
+
+    // If this was an instruction, bb, or argument, verify that it is in the
+    // function that we expect.
+    Function *ActualF = 0;
+    if (Instruction *I = dyn_cast<Instruction>(Op))
+      ActualF = I->getParent()->getParent();
+    else if (BasicBlock *BB = dyn_cast<BasicBlock>(Op))
+      ActualF = BB->getParent();
+    else if (Argument *A = dyn_cast<Argument>(Op))
+      ActualF = A->getParent();
+    assert(ActualF && "Unimplemented function local metadata case!");
+
+    Assert2(ActualF == F, "function-local metadata used in wrong function",
+            &MD, Op);
+  }
+}
+
 void Verifier::verifyTypeSymbolTable(TypeSymbolTable &ST) {
   for (TypeSymbolTable::iterator I = ST.begin(), E = ST.end(); I != E; ++I)
     VerifyType(I->second);
@@ -1553,38 +1609,6 @@ void Verifier::VerifyType(const Type *Ty) {
   }
 }
 
-/// VerifyFunctionLocalMetadata - Verify that the specified MDNode is local to
-/// specified Function.
-void Verifier::VerifyFunctionLocalMetadata(MDNode *N, Function *F,
-                                           SmallPtrSet<MDNode *, 32> &Visited) {
-  assert(N->isFunctionLocal() && "Should only be called on function-local MD");
-
-  // Only visit each node once.
-  if (!Visited.insert(N))
-    return;
-  
-  for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
-    Value *V = N->getOperand(i);
-    if (!V) continue;
-    
-    Function *ActualF = 0;
-    if (Instruction *I = dyn_cast<Instruction>(V))
-      ActualF = I->getParent()->getParent();
-    else if (BasicBlock *BB = dyn_cast<BasicBlock>(V))
-      ActualF = BB->getParent();
-    else if (Argument *A = dyn_cast<Argument>(V))
-      ActualF = A->getParent();
-    else if (MDNode *MD = dyn_cast<MDNode>(V))
-      if (MD->isFunctionLocal())
-        VerifyFunctionLocalMetadata(MD, F, Visited);
-
-    // If this was an instruction, bb, or argument, verify that it is in the
-    // function that we expect.
-    Assert1(ActualF == 0 || ActualF == F,
-            "function-local metadata used in wrong function", N);
-  }
-}
-
 // Flags used by TableGen to mark intrinsic parameters with the
 // LLVMExtendedElementVectorType and LLVMTruncatedElementVectorType classes.
 static const unsigned ExtendedElementVectorType = 0x40000000;
@@ -1604,11 +1628,8 @@ void Verifier::visitIntrinsicFunctionCall(Intrinsic::ID ID, CallInst &CI) {
   // If the intrinsic takes MDNode arguments, verify that they are either global
   // or are local to *this* function.
   for (unsigned i = 1, e = CI.getNumOperands(); i != e; ++i)
-    if (MDNode *MD = dyn_cast<MDNode>(CI.getOperand(i))) {
-      if (!MD->isFunctionLocal()) continue;
-      SmallPtrSet<MDNode *, 32> Visited;
-      VerifyFunctionLocalMetadata(MD, CI.getParent()->getParent(), Visited);
-    }
+    if (MDNode *MD = dyn_cast<MDNode>(CI.getOperand(i)))
+      visitMDNode(*MD, CI.getParent()->getParent());
 
   switch (ID) {
   default:
@@ -1933,7 +1954,9 @@ FunctionPass *llvm::createVerifierPass(VerifierFailureAction action) {
 }
 
 
-// verifyFunction - Create
+/// verifyFunction - Check a function for errors, printing messages on stderr.
+/// Return true if the function is corrupt.
+///
 bool llvm::verifyFunction(const Function &f, VerifierFailureAction action) {
   Function &F = const_cast<Function&>(f);
   assert(!F.isDeclaration() && "Cannot verify external functions");
diff --git a/runtime/Makefile b/runtime/Makefile
index 7209867c2f6c..d0e85d58264f 100644
--- a/runtime/Makefile
+++ b/runtime/Makefile
@@ -20,7 +20,7 @@ ifeq ($(ARCH), Sparc)
 PARALLEL_DIRS := $(filter-out libprofile, $(PARALLEL_DIRS))
 endif
 
-ifeq ($(TARGET_OS), $(filter $(TARGET_OS), Cygwin MingW))
+ifeq ($(TARGET_OS), $(filter $(TARGET_OS), Cygwin MingW Minix))
 PARALLEL_DIRS := $(filter-out libprofile, $(PARALLEL_DIRS))
 endif
 
diff --git a/runtime/libprofile/exported_symbols.lst b/runtime/libprofile/exported_symbols.lst
index aafafb68a030..f45ff4760189 100644
--- a/runtime/libprofile/exported_symbols.lst
+++ b/runtime/libprofile/exported_symbols.lst
@@ -1,4 +1,3 @@
-
 llvm_start_edge_profiling
 llvm_start_opt_edge_profiling
 llvm_start_basic_block_tracing
diff --git a/test/Analysis/BasicAA/cas.ll b/test/Analysis/BasicAA/cas.ll
index 4ce7811634e2..8dd3695d6d57 100644
--- a/test/Analysis/BasicAA/cas.ll
+++ b/test/Analysis/BasicAA/cas.ll
@@ -12,4 +12,4 @@ define i32 @main() {
   ret i32 %d
 }
 
-declare i32 @llvm.atomic.swap.i32.p0i32(i32*, i32) nounwind
-\ No newline at end of file
+declare i32 @llvm.atomic.swap.i32.p0i32(i32*, i32) nounwind
diff --git a/test/Analysis/BasicAA/empty.ll b/test/Analysis/BasicAA/empty.ll
new file mode 100644
index 000000000000..689efec26adb
--- /dev/null
+++ b/test/Analysis/BasicAA/empty.ll
@@ -0,0 +1,10 @@
+; RUN: opt < %s -aa-eval -print-all-alias-modref-info -disable-output \
+; RUN:   |& grep {NoAlias:	\{\}\\* \[%\]p, \{\}\\* \[%\]q}
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
+
+define void @foo({}* %p, {}* %q) {
+  store {} {}, {}* %p
+  store {} {}, {}* %q
+  ret void
+}
diff --git a/test/Analysis/CallGraph/2008-09-09-DirectCall.ll b/test/Analysis/CallGraph/2008-09-09-DirectCall.ll
index 6e34209a0e36..784d6c7ba1b1 100644
--- a/test/Analysis/CallGraph/2008-09-09-DirectCall.ll
+++ b/test/Analysis/CallGraph/2008-09-09-DirectCall.ll
@@ -1,5 +1,5 @@
 ; RUN: opt < %s -print-callgraph -disable-output |& \
-; RUN:   grep {Calls function 'callee'} | count 2
+; RUN:   grep {calls function 'callee'} | count 2
 
 define internal void @callee(...) {
 entry:
diff --git a/test/Analysis/CallGraph/2008-09-09-UsedByGlobal.ll b/test/Analysis/CallGraph/2008-09-09-UsedByGlobal.ll
index 12849b7aa96c..0c5ef92ef71e 100644
--- a/test/Analysis/CallGraph/2008-09-09-UsedByGlobal.ll
+++ b/test/Analysis/CallGraph/2008-09-09-UsedByGlobal.ll
@@ -1,5 +1,4 @@
-; RUN: opt < %s -print-callgraph -disable-output |& \
-; RUN:   grep {Calls function}
+; RUN: opt < %s -print-callgraph -disable-output |& grep {calls function}
 
 @a = global void ()* @f		; <void ()**> [#uses=0]
 
diff --git a/test/Analysis/ScalarEvolution/2008-11-18-LessThanOrEqual.ll b/test/Analysis/ScalarEvolution/2008-11-18-LessThanOrEqual.ll
index 06637b5eaee7..335bbaf9ad09 100644
--- a/test/Analysis/ScalarEvolution/2008-11-18-LessThanOrEqual.ll
+++ b/test/Analysis/ScalarEvolution/2008-11-18-LessThanOrEqual.ll
@@ -1,6 +1,5 @@
 ; RUN: opt < %s -analyze -scalar-evolution |& \
 ; RUN: grep {Loop %bb: backedge-taken count is (7 + (-1 \\* %argc))}
-; XFAIL: *
 
 define i32 @main(i32 %argc, i8** %argv) nounwind {
 entry:
diff --git a/test/Analysis/ScalarEvolution/2008-11-18-Stride2.ll b/test/Analysis/ScalarEvolution/2008-11-18-Stride2.ll
index 102acc65650f..fa9f21af3713 100644
--- a/test/Analysis/ScalarEvolution/2008-11-18-Stride2.ll
+++ b/test/Analysis/ScalarEvolution/2008-11-18-Stride2.ll
@@ -1,6 +1,9 @@
 ; RUN: opt < %s -analyze -scalar-evolution |& grep {/u 3}
 ; XFAIL: *
 
+; This is a tricky testcase for unsigned wrap detection which ScalarEvolution
+; doesn't yet know how to do.
+
 define i32 @f(i32 %x) nounwind readnone {
 entry:
 	%0 = icmp ugt i32 %x, 999		; <i1> [#uses=1]
diff --git a/test/Analysis/ScalarEvolution/2008-12-08-FiniteSGE.ll b/test/Analysis/ScalarEvolution/2008-12-08-FiniteSGE.ll
index 226221b297f9..25a0434b29e2 100644
--- a/test/Analysis/ScalarEvolution/2008-12-08-FiniteSGE.ll
+++ b/test/Analysis/ScalarEvolution/2008-12-08-FiniteSGE.ll
@@ -1,5 +1,4 @@
 ; RUN: opt < %s -analyze -scalar-evolution | grep {backedge-taken count is 255}
-; XFAIL: *
 
 define i32 @foo(i32 %x, i32 %y, i32* %lam, i32* %alp) nounwind {
 bb1.thread:
diff --git a/test/Analysis/ScalarEvolution/2008-12-11-SMaxOverflow.ll b/test/Analysis/ScalarEvolution/2008-12-11-SMaxOverflow.ll
index 33a74795d2ea..12254e37dcc7 100644
--- a/test/Analysis/ScalarEvolution/2008-12-11-SMaxOverflow.ll
+++ b/test/Analysis/ScalarEvolution/2008-12-11-SMaxOverflow.ll
@@ -1,5 +1,7 @@
-; RUN: opt < %s -analyze -scalar-evolution | grep {0 smax}
-; XFAIL: *
+; RUN: opt < %s -analyze -scalar-evolution | FileCheck %s
+
+; CHECK: @f
+; CHECK: Loop %bb16.preheader: backedge-taken count is (-1 + %c.idx.val)
 
 define i32 @f(i32 %c.idx.val) {
 
diff --git a/test/Analysis/ScalarEvolution/sle.ll b/test/Analysis/ScalarEvolution/sle.ll
new file mode 100644
index 000000000000..f38f6b63dce4
--- /dev/null
+++ b/test/Analysis/ScalarEvolution/sle.ll
@@ -0,0 +1,27 @@
+; RUN: opt -analyze -scalar-evolution < %s | FileCheck %s
+
+; ScalarEvolution should be able to use nsw information to prove that
+; this loop has a finite trip count.
+
+; CHECK: @le
+; CHECK: Loop %for.body: backedge-taken count is %n
+; CHECK: Loop %for.body: max backedge-taken count is 9223372036854775807
+
+define void @le(i64 %n, double* nocapture %p) nounwind {
+entry:
+  %cmp6 = icmp slt i64 %n, 0                      ; <i1> [#uses=1]
+  br i1 %cmp6, label %for.end, label %for.body
+
+for.body:                                         ; preds = %for.body, %entry
+  %i = phi i64 [ %i.next, %for.body ], [ 0, %entry ] ; <i64> [#uses=2]
+  %arrayidx = getelementptr double* %p, i64 %i    ; <double*> [#uses=2]
+  %t4 = load double* %arrayidx                    ; <double> [#uses=1]
+  %mul = fmul double %t4, 2.200000e+00            ; <double> [#uses=1]
+  store double %mul, double* %arrayidx
+  %i.next = add nsw i64 %i, 1                     ; <i64> [#uses=2]
+  %cmp = icmp sgt i64 %i.next, %n                 ; <i1> [#uses=1]
+  br i1 %cmp, label %for.end, label %for.body
+
+for.end:                                          ; preds = %for.body, %entry
+  ret void
+}
diff --git a/test/Analysis/ScalarEvolution/undefined.ll b/test/Analysis/ScalarEvolution/undefined.ll
new file mode 100644
index 000000000000..b1f44460af6b
--- /dev/null
+++ b/test/Analysis/ScalarEvolution/undefined.ll
@@ -0,0 +1,39 @@
+; RUN: opt -analyze -scalar-evolution < %s | FileCheck %s
+
+; ScalarEvolution shouldn't attempt to interpret expressions which have
+; undefined results.
+
+define void @foo(i64 %x) {
+
+  %a = udiv i64 %x, 0
+; CHECK: -->  (%x /u 0)
+
+  %B = shl i64 %x, 64
+; CHECK: -->  %B
+
+  %b = ashr i64 %B, 64
+; CHECK: -->  %b
+
+  %c = lshr i64 %x, 64
+; CHECK: -->  %c
+
+  %d = shl i64 %x, 64
+; CHECK: -->  %d
+
+  %E = shl i64 %x, -1
+; CHECK: -->  %E
+
+  %e = ashr i64 %E, -1
+; CHECK: -->  %e
+
+  %f = lshr i64 %x, -1
+; CHECK: -->  %f
+
+  %g = shl i64 %x, -1
+; CHECK: -->  %g
+
+  %h = bitcast i64 undef to i64
+; CHECK: -->  undef
+
+  ret void
+}
diff --git a/test/Analysis/ScalarEvolution/unsimplified-loop.ll b/test/Analysis/ScalarEvolution/unsimplified-loop.ll
new file mode 100644
index 000000000000..a3175077b686
--- /dev/null
+++ b/test/Analysis/ScalarEvolution/unsimplified-loop.ll
@@ -0,0 +1,29 @@
+; RUN: opt -analyze -scalar-evolution < %s | FileCheck %s
+
+; This loop has no preheader, multiple backedges, etc., but ScalarEvolution
+; should still be able to analyze it.
+
+; CHECK: %i = phi i64 [ 5, %entry ], [ 5, %alt ], [ %i.next, %loop.a ], [ %i.next, %loop.b ]
+; CHECK-NEXT: -->  {5,+,1}<%loop>
+
+define void @foo(i1 %p, i1 %q, i1 %s, i1 %u) {
+entry:
+  br i1 %p, label %loop, label %alt
+
+alt:
+  br i1 %s, label %loop, label %exit
+
+loop:
+  %i = phi i64 [ 5, %entry ], [ 5, %alt ], [ %i.next, %loop.a ], [ %i.next, %loop.b ]
+  %i.next = add i64 %i, 1
+  br i1 %q, label %loop.a, label %loop.b
+
+loop.a:
+  br label %loop
+
+loop.b:
+  br i1 %u, label %loop, label %exit
+
+exit:
+  ret void
+}
diff --git a/test/Assembler/metadata.ll b/test/Assembler/metadata.ll
index a52de87650b7..50f27b4eb45f 100644
--- a/test/Assembler/metadata.ll
+++ b/test/Assembler/metadata.ll
@@ -19,4 +19,4 @@ declare void @llvm.dbg.func.start(metadata) nounwind readnone
 !foo = !{ !0 }
 !bar = !{ !1 }
 
-; !foo = !{ !0, !"foo" }
-\ No newline at end of file
+; !foo = !{ !0, !"foo" }
diff --git a/test/Bitcode/memcpy.ll b/test/Bitcode/memcpy.ll
index b6573b5f6216..299eb1ed41e3 100644
--- a/test/Bitcode/memcpy.ll
+++ b/test/Bitcode/memcpy.ll
@@ -20,4 +20,4 @@ declare void @llvm.memset.i32(i8*, i8, i32, i32)
 
 declare void @llvm.memmove.i32(i8*, i8*, i32, i32)
 
-declare void @llvm.memmove.i64(i8*, i8*, i32, i32)
+declare void @llvm.memmove.i64(i8*, i8*, i64, i32)
diff --git a/test/Bitcode/sse41_pmulld.ll b/test/Bitcode/sse41_pmulld.ll
index caf85479bbc2..6872cc0b712d 100644
--- a/test/Bitcode/sse41_pmulld.ll
+++ b/test/Bitcode/sse41_pmulld.ll
@@ -1,2 +1,2 @@
 ; RUN: llvm-dis < %s.bc | not grep {i32 @llvm\\.pmulld}
-; RUN: llvm-dis < %s.bc | grep mul
-\ No newline at end of file
+; RUN: llvm-dis < %s.bc | grep mul
diff --git a/test/Bitcode/ssse3_palignr.ll b/test/Bitcode/ssse3_palignr.ll
new file mode 100644
index 000000000000..d596dd5eb363
--- /dev/null
+++ b/test/Bitcode/ssse3_palignr.ll
@@ -0,0 +1 @@
+; RUN: llvm-dis < %s.bc | not grep {@llvm\\.palign}
diff --git a/test/Bitcode/ssse3_palignr.ll.bc b/test/Bitcode/ssse3_palignr.ll.bc
new file mode 100644
index 000000000000..642f4dedc417
--- /dev/null
+++ b/test/Bitcode/ssse3_palignr.ll.bc
diff --git a/test/CodeGen/ARM/2007-05-07-tailmerge-1.ll b/test/CodeGen/ARM/2007-05-07-tailmerge-1.ll
index f2a8ee1a1424..52937c1dd9fc 100644
--- a/test/CodeGen/ARM/2007-05-07-tailmerge-1.ll
+++ b/test/CodeGen/ARM/2007-05-07-tailmerge-1.ll
@@ -1,7 +1,5 @@
 ; RUN: llc < %s -march=arm -enable-tail-merge | grep bl.*baz | count 1
 ; RUN: llc < %s -march=arm -enable-tail-merge | grep bl.*quux | count 1
-; RUN: llc < %s -march=arm -enable-tail-merge -enable-eh | grep bl.*baz | count 1
-; RUN: llc < %s -march=arm -enable-tail-merge -enable-eh | grep bl.*quux | count 1
 ; Check that calls to baz and quux are tail-merged.
 ; PR1628
 
diff --git a/test/CodeGen/ARM/2007-05-09-tailmerge-2.ll b/test/CodeGen/ARM/2007-05-09-tailmerge-2.ll
index 275850581154..c925fa83ef36 100644
--- a/test/CodeGen/ARM/2007-05-09-tailmerge-2.ll
+++ b/test/CodeGen/ARM/2007-05-09-tailmerge-2.ll
@@ -1,7 +1,5 @@
 ; RUN: llc < %s -march=arm -enable-tail-merge | grep bl.*baz | count 1
 ; RUN: llc < %s -march=arm -enable-tail-merge | grep bl.*quux | count 1
-; RUN: llc < %s -march=arm -enable-tail-merge -enable-eh | grep bl.*baz | count 1
-; RUN: llc < %s -march=arm -enable-tail-merge -enable-eh | grep bl.*quux | count 1
 ; Check that calls to baz and quux are tail-merged.
 ; PR1628
 
diff --git a/test/CodeGen/ARM/2007-05-22-tailmerge-3.ll b/test/CodeGen/ARM/2007-05-22-tailmerge-3.ll
index 061bf5e851b0..9df5af59a79f 100644
--- a/test/CodeGen/ARM/2007-05-22-tailmerge-3.ll
+++ b/test/CodeGen/ARM/2007-05-22-tailmerge-3.ll
@@ -2,10 +2,6 @@
 ; RUN: llc < %s -march=arm | grep bl.*quux | count 1
 ; RUN: llc < %s -march=arm -enable-tail-merge=0 | grep bl.*baz | count 2
 ; RUN: llc < %s -march=arm -enable-tail-merge=0 | grep bl.*quux | count 2
-; RUN: llc < %s -march=arm -enable-eh | grep bl.*baz | count 1
-; RUN: llc < %s -march=arm -enable-eh | grep bl.*quux | count 1
-; RUN: llc < %s -march=arm -enable-tail-merge=0 -enable-eh | grep bl.*baz | count 2
-; RUN: llc < %s -march=arm -enable-tail-merge=0 -enable-eh | grep bl.*quux | count 2
 ; Check that tail merging is the default on ARM, and that -enable-tail-merge=0 works.
 ; PR1628
 
diff --git a/test/CodeGen/ARM/2009-08-31-LSDA-Name.ll b/test/CodeGen/ARM/2009-08-31-LSDA-Name.ll
index b6cf880a3001..c0ad65f81b6b 100644
--- a/test/CodeGen/ARM/2009-08-31-LSDA-Name.ll
+++ b/test/CodeGen/ARM/2009-08-31-LSDA-Name.ll
@@ -86,7 +86,7 @@ bb:                                               ; preds = %entry
 return:                                           ; preds = %bb
   ret void
 }
-;CHECK: L_LSDA_1:
+;CHECK: L_LSDA_0:
 
 declare arm_apcscc void @_ZdlPv(i8*) nounwind
 
diff --git a/test/CodeGen/ARM/2010-04-07-DbgValueOtherTargets.ll b/test/CodeGen/ARM/2010-04-07-DbgValueOtherTargets.ll
new file mode 100644
index 000000000000..8a24cfa39785
--- /dev/null
+++ b/test/CodeGen/ARM/2010-04-07-DbgValueOtherTargets.ll
@@ -0,0 +1,33 @@
+; RUN: llc -O0 -march=arm -asm-verbose < %s | FileCheck %s
+; Check that DEBUG_VALUE comments come through on a variety of targets.
+
+%tart.reflect.ComplexType = type { double, double }
+
+@.type.SwitchStmtTest = constant %tart.reflect.ComplexType { double 3.0, double 2.0 }
+
+define i32 @"main(tart.core.String[])->int32"(i32 %args) {
+entry:
+; CHECK: DEBUG_VALUE
+  tail call void @llvm.dbg.value(metadata !14, i64 0, metadata !8)
+  tail call void @"tart.reflect.ComplexType.create->tart.core.Object"(%tart.reflect.ComplexType* @.type.SwitchStmtTest) ; <%tart.core.Object*> [#uses=2]
+  ret i32 3
+}
+
+declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
+declare void @"tart.reflect.ComplexType.create->tart.core.Object"(%tart.reflect.ComplexType*) nounwind readnone
+
+!0 = metadata !{i32 458769, i32 0, i32 1, metadata !"sm.c", metadata !"/Volumes/MacOS9/tests/", metadata !"4.2.1 (Based on Apple Inc. build 5658) (LLVM build)", i1 true, i1 false, metadata !"", i32 0} ; [ DW_TAG_compile_unit ]
+!1 = metadata !{i32 458790, metadata !0, metadata !"", metadata !0, i32 0, i64 192, i64 64, i64 0, i32 0, metadata !2} ; [ DW_TAG_const_type ]
+!2 = metadata !{i32 458771, metadata !0, metadata !"C", metadata !0, i32 1, i64 192, i64 64, i64 0, i32 0, null, metadata !3, i32 0, null} ; [ DW_TAG_structure_type ]
+!3 = metadata !{metadata !4, metadata !6, metadata !7}
+!4 = metadata !{i32 458765, metadata !2, metadata !"x", metadata !0, i32 1, i64 64, i64 64, i64 0, i32 0, metadata !5} ; [ DW_TAG_member ]
+!5 = metadata !{i32 458788, metadata !0, metadata !"double", metadata !0, i32 0, i64 64, i64 64, i64 0, i32 0, i32 4} ; [ DW_TAG_base_type ]
+!6 = metadata !{i32 458765, metadata !2, metadata !"y", metadata !0, i32 1, i64 64, i64 64, i64 64, i32 0, metadata !5} ; [ DW_TAG_member ]
+!7 = metadata !{i32 458765, metadata !2, metadata !"z", metadata !0, i32 1, i64 64, i64 64, i64 128, i32 0, metadata !5} ; [ DW_TAG_member ]
+!8 = metadata !{i32 459008, metadata !9, metadata !"t", metadata !0, i32 5, metadata !2} ; [ DW_TAG_auto_variable ]
+!9 = metadata !{i32 458763, metadata !10}        ; [ DW_TAG_lexical_block ]
+!10 = metadata !{i32 458798, i32 0, metadata !0, metadata !"foo", metadata !"foo", metadata !"foo", metadata !0, i32 4, metadata !11, i1 false, i1 true, i32 0, i32 0, null} ; [ DW_TAG_subprogram ]
+!11 = metadata !{i32 458773, metadata !0, metadata !"", metadata !0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !12, i32 0, null} ; [ DW_TAG_subroutine_type ]
+!12 = metadata !{metadata !13}
+!13 = metadata !{i32 458788, metadata !0, metadata !"int", metadata !0, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
+!14 = metadata !{%tart.reflect.ComplexType* @.type.SwitchStmtTest}
diff --git a/test/CodeGen/ARM/2010-04-09-NeonSelect.ll b/test/CodeGen/ARM/2010-04-09-NeonSelect.ll
new file mode 100644
index 000000000000..71e0b0a36ce7
--- /dev/null
+++ b/test/CodeGen/ARM/2010-04-09-NeonSelect.ll
@@ -0,0 +1,23 @@
+; RUN: llc -march=arm -mattr=+neon < %s
+; Radar 7770501: Don't crash on SELECT and SELECT_CC with NEON vector values.
+
+define arm_apcscc void @vDSP_FFT16_copv(float* nocapture %O, float* nocapture %I, i32 %Direction) nounwind {
+entry:
+  %.22 = select i1 undef, <4 x float> undef, <4 x float> zeroinitializer ; <<4 x float>> [#uses=1]
+  %0 = fadd <4 x float> undef, %.22               ; <<4 x float>> [#uses=1]
+  %1 = fsub <4 x float> %0, undef                 ; <<4 x float>> [#uses=1]
+  %2 = shufflevector <4 x float> %1, <4 x float> undef, <4 x i32> <i32 2, i32 6, i32 3, i32 7> ; <<4 x float>> [#uses=1]
+  %3 = shufflevector <4 x float> undef, <4 x float> %2, <4 x i32> <i32 2, i32 3, i32 6, i32 7> ; <<4 x float>> [#uses=1]
+  %4 = fmul <4 x float> %3, <float 0.000000e+00, float 0x3FED906BC0000000, float 0x3FE6A09E60000000, float 0xBFD87DE2A0000000> ; <<4 x float>> [#uses=1]
+  %5 = fadd <4 x float> undef, %4                 ; <<4 x float>> [#uses=1]
+  %6 = fadd <4 x float> undef, %5                 ; <<4 x float>> [#uses=1]
+  %7 = fadd <4 x float> undef, %6                 ; <<4 x float>> [#uses=1]
+  br i1 undef, label %bb4, label %bb3
+
+bb3:                                              ; preds = %entry
+  %8 = shufflevector <4 x float> undef, <4 x float> %7, <4 x i32> <i32 2, i32 6, i32 3, i32 7> ; <<4 x float>> [#uses=0]
+  ret void
+
+bb4:                                              ; preds = %entry
+  ret void
+}
diff --git a/test/CodeGen/ARM/2010-04-13-v2f64SplitArg.ll b/test/CodeGen/ARM/2010-04-13-v2f64SplitArg.ll
new file mode 100644
index 000000000000..4f71b83b9495
--- /dev/null
+++ b/test/CodeGen/ARM/2010-04-13-v2f64SplitArg.ll
@@ -0,0 +1,7 @@
+; RUN: llc < %s -mtriple=arm-apple-darwin -mcpu=cortex-a8
+; Radar 7855014
+
+define arm_apcscc void @test1(i32 %f0, i32 %f1, i32 %f2, <4 x i32> %f3) nounwind {
+entry:
+  unreachable
+}
diff --git a/test/CodeGen/ARM/2010-04-14-SplitVector.ll b/test/CodeGen/ARM/2010-04-14-SplitVector.ll
new file mode 100644
index 000000000000..42f98521e30c
--- /dev/null
+++ b/test/CodeGen/ARM/2010-04-14-SplitVector.ll
@@ -0,0 +1,16 @@
+; RUN: llc < %s -march=arm -mcpu=arm1136jf-s
+; Radar 7854640
+
+define arm_apcscc void @test() nounwind {
+bb:
+  br i1 undef, label %bb9, label %bb10
+
+bb9:
+  %tmp63 = bitcast <4 x float> zeroinitializer to i128
+  %tmp64 = trunc i128 %tmp63 to i32
+  br label %bb10
+
+bb10:
+  %0 = phi i32 [ %tmp64, %bb9 ], [ undef, %bb ]
+  ret void
+}
diff --git a/test/CodeGen/ARM/2010-04-15-ScavengerDebugValue.ll b/test/CodeGen/ARM/2010-04-15-ScavengerDebugValue.ll
new file mode 100644
index 000000000000..ed7bca8510d3
--- /dev/null
+++ b/test/CodeGen/ARM/2010-04-15-ScavengerDebugValue.ll
@@ -0,0 +1,26 @@
+; RUN: llc < %s
+; PR6847
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:32-f32:32:32-f64:32:32-v64:64:64-v128:128:128-a0:0:64-n32"
+target triple = "armv4t-apple-darwin10"
+
+define hidden arm_apcscc i32 @__addvsi3(i32 %a, i32 %b) nounwind {
+entry:
+  tail call void @llvm.dbg.value(metadata !{i32 %b}, i64 0, metadata !0)
+  %0 = add nsw i32 %b, %a, !dbg !9                ; <i32> [#uses=1]
+  ret i32 %0, !dbg !11
+}
+
+declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
+
+!0 = metadata !{i32 524545, metadata !1, metadata !"b", metadata !2, i32 93, metadata !6} ; [ DW_TAG_arg_variable ]
+!1 = metadata !{i32 524334, i32 0, metadata !2, metadata !"__addvsi3", metadata !"__addvsi3", metadata !"__addvsi3", metadata !2, i32 94, metadata !4, i1 false, i1 true, i32 0, i32 0, null, i1 false} ; [ DW_TAG_subprogram ]
+!2 = metadata !{i32 524329, metadata !"libgcc2.c", metadata !"/Users/bwilson/local/nightly/test-2010-04-14/build/llvmgcc.roots/llvmgcc~obj/src/gcc", metadata !3} ; [ DW_TAG_file_type ]
+!3 = metadata !{i32 524305, i32 0, i32 1, metadata !"libgcc2.c", metadata !"/Users/bwilson/local/nightly/test-2010-04-14/build/llvmgcc.roots/llvmgcc~obj/src/gcc", metadata !"4.2.1 (Based on Apple Inc. build 5658) (LLVM build 00)", i1 true, i1 true, metadata !"", i32 0} ; [ DW_TAG_compile_unit ]
+!4 = metadata !{i32 524309, metadata !2, metadata !"", metadata !2, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !5, i32 0, null} ; [ DW_TAG_subroutine_type ]
+!5 = metadata !{metadata !6, metadata !6, metadata !6}
+!6 = metadata !{i32 524310, metadata !2, metadata !"SItype", metadata !7, i32 152, i64 0, i64 0, i64 0, i32 0, metadata !8} ; [ DW_TAG_typedef ]
+!7 = metadata !{i32 524329, metadata !"libgcc2.h", metadata !"/Users/bwilson/local/nightly/test-2010-04-14/build/llvmgcc.roots/llvmgcc~obj/src/gcc", metadata !3} ; [ DW_TAG_file_type ]
+!8 = metadata !{i32 524324, metadata !2, metadata !"int", metadata !2, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
+!9 = metadata !{i32 95, i32 0, metadata !10, null}
+!10 = metadata !{i32 524299, metadata !1, i32 94, i32 0} ; [ DW_TAG_lexical_block ]
+!11 = metadata !{i32 100, i32 0, metadata !10, null}
diff --git a/test/CodeGen/ARM/arguments7.ll b/test/CodeGen/ARM/arguments7.ll
index 038e417b333a..fa97ee821b39 100644
--- a/test/CodeGen/ARM/arguments7.ll
+++ b/test/CodeGen/ARM/arguments7.ll
@@ -6,4 +6,4 @@ define double @f(i32 %a1, i32 %a2, i32 %a3, i32 %a4, i32 %a5, double %b) {
         ret double %tmp
 }
 
-declare double @g(double)
+declare double @g(i32, i32, i32, i32, double)
diff --git a/test/CodeGen/ARM/arguments8.ll b/test/CodeGen/ARM/arguments8.ll
index 6999a4d4f656..abe059bf234e 100644
--- a/test/CodeGen/ARM/arguments8.ll
+++ b/test/CodeGen/ARM/arguments8.ll
@@ -6,4 +6,4 @@ define i64 @f(i32 %a1, i32 %a2, i32 %a3, i32 %a4, i32 %a5, i64 %b) {
         ret i64 %tmp
 }
 
-declare i64 @g(i64)
+declare i64 @g(i32, i32, i32, i32, i64)
diff --git a/test/CodeGen/ARM/bfx.ll b/test/CodeGen/ARM/bfx.ll
new file mode 100644
index 000000000000..fcca191a016e
--- /dev/null
+++ b/test/CodeGen/ARM/bfx.ll
@@ -0,0 +1,28 @@
+; RUN: llc < %s -march=arm -mattr=+v7a | FileCheck %s
+
+define i32 @sbfx1(i32 %a) {
+; CHECK: sbfx1
+; CHECK: sbfx r0, r0, #7, #11
+	%t1 = lshr i32 %a, 7
+	%t2 = trunc i32 %t1 to i11
+	%t3 = sext i11 %t2 to i32
+	ret i32 %t3
+}
+
+define i32 @ubfx1(i32 %a) {
+; CHECK: ubfx1
+; CHECK: ubfx r0, r0, #7, #11
+	%t1 = lshr i32 %a, 7
+	%t2 = trunc i32 %t1 to i11
+	%t3 = zext i11 %t2 to i32
+	ret i32 %t3
+}
+
+define i32 @ubfx2(i32 %a) {
+; CHECK: ubfx2
+; CHECK: ubfx r0, r0, #7, #11
+	%t1 = lshr i32 %a, 7
+	%t2 = and i32 %t1, 2047
+	ret i32 %t2
+}
+
diff --git a/test/CodeGen/ARM/globals.ll b/test/CodeGen/ARM/globals.ll
index adb449739743..ccb14280df9b 100644
--- a/test/CodeGen/ARM/globals.ll
+++ b/test/CodeGen/ARM/globals.ll
@@ -11,23 +11,23 @@ define i32 @test1() {
 }
 
 ; DarwinStatic: _test1:
-; DarwinStatic: 	ldr r0, LCPI1_0
+; DarwinStatic: 	ldr r0, LCPI0_0
 ; DarwinStatic:	        ldr r0, [r0]
 ; DarwinStatic:	        bx lr
 
 ; DarwinStatic: 	.align	2
-; DarwinStatic:	LCPI1_0:
+; DarwinStatic:	LCPI0_0:
 ; DarwinStatic: 	.long	{{_G$}}
 
 
 ; DarwinDynamic: _test1:
-; DarwinDynamic: 	ldr r0, LCPI1_0
+; DarwinDynamic: 	ldr r0, LCPI0_0
 ; DarwinDynamic:        ldr r0, [r0]
 ; DarwinDynamic:        ldr r0, [r0]
 ; DarwinDynamic:        bx lr
 
 ; DarwinDynamic: 	.align	2
-; DarwinDynamic:	LCPI1_0:
+; DarwinDynamic:	LCPI0_0:
 ; DarwinDynamic: 	.long	L_G$non_lazy_ptr
 
 ; DarwinDynamic: 	.section __DATA,__nl_symbol_ptr,non_lazy_symbol_pointers
@@ -39,15 +39,15 @@ define i32 @test1() {
 
 
 ; DarwinPIC: _test1:
-; DarwinPIC: 	ldr r0, LCPI1_0
-; DarwinPIC: LPC1_0:
+; DarwinPIC: 	ldr r0, LCPI0_0
+; DarwinPIC: LPC0_0:
 ; DarwinPIC:    ldr r0, [pc, r0]
 ; DarwinPIC:    ldr r0, [r0]
 ; DarwinPIC:    bx lr
 
 ; DarwinPIC: 	.align	2
-; DarwinPIC: LCPI1_0:
-; DarwinPIC: 	.long	L_G$non_lazy_ptr-(LPC1_0+8)
+; DarwinPIC: LCPI0_0:
+; DarwinPIC: 	.long	L_G$non_lazy_ptr-(LPC0_0+8)
 
 ; DarwinPIC: 	.section __DATA,__nl_symbol_ptr,non_lazy_symbol_pointers
 ; DarwinPIC:	.align	2
@@ -58,18 +58,18 @@ define i32 @test1() {
 
 
 ; LinuxPIC: test1:
-; LinuxPIC: 	ldr r0, .LCPI1_0
-; LinuxPIC: 	ldr r1, .LCPI1_1
+; LinuxPIC: 	ldr r0, .LCPI0_0
+; LinuxPIC: 	ldr r1, .LCPI0_1
 	
-; LinuxPIC: .LPC1_0:
+; LinuxPIC: .LPC0_0:
 ; LinuxPIC: 	add r0, pc, r0
 ; LinuxPIC: 	ldr r0, [r1, r0]
 ; LinuxPIC: 	ldr r0, [r0]
 ; LinuxPIC: 	bx lr
 
 ; LinuxPIC: .align 2
-; LinuxPIC: .LCPI1_0:
-; LinuxPIC:     .long _GLOBAL_OFFSET_TABLE_-(.LPC1_0+8)
+; LinuxPIC: .LCPI0_0:
+; LinuxPIC:     .long _GLOBAL_OFFSET_TABLE_-(.LPC0_0+8)
 ; LinuxPIC: .align 2
-; LinuxPIC: .LCPI1_1:
+; LinuxPIC: .LCPI0_1:
 ; LinuxPIC:     .long	G(GOT)
diff --git a/test/CodeGen/ARM/hidden-vis-3.ll b/test/CodeGen/ARM/hidden-vis-3.ll
index 3bd710ae949f..fc8b2febc320 100644
--- a/test/CodeGen/ARM/hidden-vis-3.ll
+++ b/test/CodeGen/ARM/hidden-vis-3.ll
@@ -5,9 +5,9 @@
 
 define i32 @t() nounwind readonly {
 entry:
-; CHECK: LCPI1_0:
+; CHECK: LCPI0_0:
 ; CHECK-NEXT: .long _x
-; CHECK: LCPI1_1:
+; CHECK: LCPI0_1:
 ; CHECK-NEXT: .long _y
 
 	%0 = load i32* @x, align 4		; <i32> [#uses=1]
diff --git a/test/CodeGen/ARM/ifcvt2.ll b/test/CodeGen/ARM/ifcvt2.ll
index ce57d736c167..d9cac8022b24 100644
--- a/test/CodeGen/ARM/ifcvt2.ll
+++ b/test/CodeGen/ARM/ifcvt2.ll
@@ -1,7 +1,8 @@
-; RUN: llc < %s -march=arm
-; RUN: llc < %s -march=arm | grep bxlt | count 1
-; RUN: llc < %s -march=arm | grep bxgt | count 1
-; RUN: llc < %s -march=arm | grep bxge | count 1
+; RUN: llc < %s -march=arm > %t
+; RUN: grep bxlt %t | count 1
+; RUN: grep bxgt %t | count 1
+; RUN: not grep bxge %t
+; RUN: not grep bxle %t
 
 define i32 @t1(i32 %a, i32 %b, i32 %c, i32 %d) {
 	%tmp2 = icmp sgt i32 %c, 10
diff --git a/test/CodeGen/ARM/ifcvt5.ll b/test/CodeGen/ARM/ifcvt5.ll
index 8677ce535971..bca2ae346a6f 100644
--- a/test/CodeGen/ARM/ifcvt5.ll
+++ b/test/CodeGen/ARM/ifcvt5.ll
@@ -9,7 +9,7 @@ entry:
 	ret void
 }
 
-define void @t1(i32 %a, i32 %b) {
+define i32 @t1(i32 %a, i32 %b) {
 ; CHECK: t1:
 ; CHECK: ldmialt sp!, {r7, pc}
 entry:
@@ -18,8 +18,8 @@ entry:
 
 cond_true:		; preds = %entry
 	tail call void @foo( i32 %b )
-	ret void
+	ret i32 0
 
 UnifiedReturnBlock:		; preds = %entry
-	ret void
+	ret i32 1
 }
diff --git a/test/CodeGen/ARM/sbfx.ll b/test/CodeGen/ARM/sbfx.ll
index 6f1d87d2c17b..d29693e4cf92 100644
--- a/test/CodeGen/ARM/sbfx.ll
+++ b/test/CodeGen/ARM/sbfx.ll
@@ -12,7 +12,7 @@ entry:
 define i32 @f2(i32 %a) {
 entry:
 ; CHECK: f2:
-; CHECK: ubfx r0, r0, #0, #20
+; CHECK: bfc	r0, #20, #12
     %tmp = shl i32 %a, 12
     %tmp2 = lshr i32 %tmp, 12
     ret i32 %tmp2
diff --git a/test/CodeGen/ARM/vadd.ll b/test/CodeGen/ARM/vadd.ll
index 9fa530750aa1..9bb8bf561045 100644
--- a/test/CodeGen/ARM/vadd.ll
+++ b/test/CodeGen/ARM/vadd.ll
@@ -41,7 +41,7 @@ define <2 x float> @vaddf32(<2 x float>* %A, <2 x float>* %B) nounwind {
 ;CHECK: vadd.f32
 	%tmp1 = load <2 x float>* %A
 	%tmp2 = load <2 x float>* %B
-	%tmp3 = add <2 x float> %tmp1, %tmp2
+	%tmp3 = fadd <2 x float> %tmp1, %tmp2
 	ret <2 x float> %tmp3
 }
 
@@ -86,7 +86,7 @@ define <4 x float> @vaddQf32(<4 x float>* %A, <4 x float>* %B) nounwind {
 ;CHECK: vadd.f32
 	%tmp1 = load <4 x float>* %A
 	%tmp2 = load <4 x float>* %B
-	%tmp3 = add <4 x float> %tmp1, %tmp2
+	%tmp3 = fadd <4 x float> %tmp1, %tmp2
 	ret <4 x float> %tmp3
 }
 
diff --git a/test/CodeGen/ARM/vld1.ll b/test/CodeGen/ARM/vld1.ll
index f5383aafb2bb..c61ea8c9a789 100644
--- a/test/CodeGen/ARM/vld1.ll
+++ b/test/CodeGen/ARM/vld1.ll
@@ -10,28 +10,32 @@ define <8 x i8> @vld1i8(i8* %A) nounwind {
 define <4 x i16> @vld1i16(i16* %A) nounwind {
 ;CHECK: vld1i16:
 ;CHECK: vld1.16
-	%tmp1 = call <4 x i16> @llvm.arm.neon.vld1.v4i16(i16* %A)
+	%tmp0 = bitcast i16* %A to i8*
+	%tmp1 = call <4 x i16> @llvm.arm.neon.vld1.v4i16(i8* %tmp0)
 	ret <4 x i16> %tmp1
 }
 
 define <2 x i32> @vld1i32(i32* %A) nounwind {
 ;CHECK: vld1i32:
 ;CHECK: vld1.32
-	%tmp1 = call <2 x i32> @llvm.arm.neon.vld1.v2i32(i32* %A)
+	%tmp0 = bitcast i32* %A to i8*
+	%tmp1 = call <2 x i32> @llvm.arm.neon.vld1.v2i32(i8* %tmp0)
 	ret <2 x i32> %tmp1
 }
 
 define <2 x float> @vld1f(float* %A) nounwind {
 ;CHECK: vld1f:
 ;CHECK: vld1.32
-	%tmp1 = call <2 x float> @llvm.arm.neon.vld1.v2f32(float* %A)
+	%tmp0 = bitcast float* %A to i8*
+	%tmp1 = call <2 x float> @llvm.arm.neon.vld1.v2f32(i8* %tmp0)
 	ret <2 x float> %tmp1
 }
 
 define <1 x i64> @vld1i64(i64* %A) nounwind {
 ;CHECK: vld1i64:
 ;CHECK: vld1.64
-	%tmp1 = call <1 x i64> @llvm.arm.neon.vld1.v1i64(i64* %A)
+	%tmp0 = bitcast i64* %A to i8*
+	%tmp1 = call <1 x i64> @llvm.arm.neon.vld1.v1i64(i8* %tmp0)
 	ret <1 x i64> %tmp1
 }
 
@@ -45,28 +49,32 @@ define <16 x i8> @vld1Qi8(i8* %A) nounwind {
 define <8 x i16> @vld1Qi16(i16* %A) nounwind {
 ;CHECK: vld1Qi16:
 ;CHECK: vld1.16
-	%tmp1 = call <8 x i16> @llvm.arm.neon.vld1.v8i16(i16* %A)
+	%tmp0 = bitcast i16* %A to i8*
+	%tmp1 = call <8 x i16> @llvm.arm.neon.vld1.v8i16(i8* %tmp0)
 	ret <8 x i16> %tmp1
 }
 
 define <4 x i32> @vld1Qi32(i32* %A) nounwind {
 ;CHECK: vld1Qi32:
 ;CHECK: vld1.32
-	%tmp1 = call <4 x i32> @llvm.arm.neon.vld1.v4i32(i32* %A)
+	%tmp0 = bitcast i32* %A to i8*
+	%tmp1 = call <4 x i32> @llvm.arm.neon.vld1.v4i32(i8* %tmp0)
 	ret <4 x i32> %tmp1
 }
 
 define <4 x float> @vld1Qf(float* %A) nounwind {
 ;CHECK: vld1Qf:
 ;CHECK: vld1.32
-	%tmp1 = call <4 x float> @llvm.arm.neon.vld1.v4f32(float* %A)
+	%tmp0 = bitcast float* %A to i8*
+	%tmp1 = call <4 x float> @llvm.arm.neon.vld1.v4f32(i8* %tmp0)
 	ret <4 x float> %tmp1
 }
 
 define <2 x i64> @vld1Qi64(i64* %A) nounwind {
 ;CHECK: vld1Qi64:
 ;CHECK: vld1.64
-	%tmp1 = call <2 x i64> @llvm.arm.neon.vld1.v2i64(i64* %A)
+	%tmp0 = bitcast i64* %A to i8*
+	%tmp1 = call <2 x i64> @llvm.arm.neon.vld1.v2i64(i8* %tmp0)
 	ret <2 x i64> %tmp1
 }
 
diff --git a/test/CodeGen/ARM/vld2.ll b/test/CodeGen/ARM/vld2.ll
index 23f7d2ca0cd3..0838636ce742 100644
--- a/test/CodeGen/ARM/vld2.ll
+++ b/test/CodeGen/ARM/vld2.ll
@@ -24,7 +24,8 @@ define <8 x i8> @vld2i8(i8* %A) nounwind {
 define <4 x i16> @vld2i16(i16* %A) nounwind {
 ;CHECK: vld2i16:
 ;CHECK: vld2.16
-	%tmp1 = call %struct.__neon_int16x4x2_t @llvm.arm.neon.vld2.v4i16(i16* %A)
+	%tmp0 = bitcast i16* %A to i8*
+	%tmp1 = call %struct.__neon_int16x4x2_t @llvm.arm.neon.vld2.v4i16(i8* %tmp0)
         %tmp2 = extractvalue %struct.__neon_int16x4x2_t %tmp1, 0
         %tmp3 = extractvalue %struct.__neon_int16x4x2_t %tmp1, 1
         %tmp4 = add <4 x i16> %tmp2, %tmp3
@@ -34,7 +35,8 @@ define <4 x i16> @vld2i16(i16* %A) nounwind {
 define <2 x i32> @vld2i32(i32* %A) nounwind {
 ;CHECK: vld2i32:
 ;CHECK: vld2.32
-	%tmp1 = call %struct.__neon_int32x2x2_t @llvm.arm.neon.vld2.v2i32(i32* %A)
+	%tmp0 = bitcast i32* %A to i8*
+	%tmp1 = call %struct.__neon_int32x2x2_t @llvm.arm.neon.vld2.v2i32(i8* %tmp0)
         %tmp2 = extractvalue %struct.__neon_int32x2x2_t %tmp1, 0
         %tmp3 = extractvalue %struct.__neon_int32x2x2_t %tmp1, 1
         %tmp4 = add <2 x i32> %tmp2, %tmp3
@@ -44,17 +46,19 @@ define <2 x i32> @vld2i32(i32* %A) nounwind {
 define <2 x float> @vld2f(float* %A) nounwind {
 ;CHECK: vld2f:
 ;CHECK: vld2.32
-	%tmp1 = call %struct.__neon_float32x2x2_t @llvm.arm.neon.vld2.v2f32(float* %A)
+	%tmp0 = bitcast float* %A to i8*
+	%tmp1 = call %struct.__neon_float32x2x2_t @llvm.arm.neon.vld2.v2f32(i8* %tmp0)
         %tmp2 = extractvalue %struct.__neon_float32x2x2_t %tmp1, 0
         %tmp3 = extractvalue %struct.__neon_float32x2x2_t %tmp1, 1
-        %tmp4 = add <2 x float> %tmp2, %tmp3
+        %tmp4 = fadd <2 x float> %tmp2, %tmp3
 	ret <2 x float> %tmp4
 }
 
 define <1 x i64> @vld2i64(i64* %A) nounwind {
 ;CHECK: vld2i64:
 ;CHECK: vld1.64
-	%tmp1 = call %struct.__neon_int64x1x2_t @llvm.arm.neon.vld2.v1i64(i64* %A)
+	%tmp0 = bitcast i64* %A to i8*
+	%tmp1 = call %struct.__neon_int64x1x2_t @llvm.arm.neon.vld2.v1i64(i8* %tmp0)
         %tmp2 = extractvalue %struct.__neon_int64x1x2_t %tmp1, 0
         %tmp3 = extractvalue %struct.__neon_int64x1x2_t %tmp1, 1
         %tmp4 = add <1 x i64> %tmp2, %tmp3
@@ -74,7 +78,8 @@ define <16 x i8> @vld2Qi8(i8* %A) nounwind {
 define <8 x i16> @vld2Qi16(i16* %A) nounwind {
 ;CHECK: vld2Qi16:
 ;CHECK: vld2.16
-	%tmp1 = call %struct.__neon_int16x8x2_t @llvm.arm.neon.vld2.v8i16(i16* %A)
+	%tmp0 = bitcast i16* %A to i8*
+	%tmp1 = call %struct.__neon_int16x8x2_t @llvm.arm.neon.vld2.v8i16(i8* %tmp0)
         %tmp2 = extractvalue %struct.__neon_int16x8x2_t %tmp1, 0
         %tmp3 = extractvalue %struct.__neon_int16x8x2_t %tmp1, 1
         %tmp4 = add <8 x i16> %tmp2, %tmp3
@@ -84,7 +89,8 @@ define <8 x i16> @vld2Qi16(i16* %A) nounwind {
 define <4 x i32> @vld2Qi32(i32* %A) nounwind {
 ;CHECK: vld2Qi32:
 ;CHECK: vld2.32
-	%tmp1 = call %struct.__neon_int32x4x2_t @llvm.arm.neon.vld2.v4i32(i32* %A)
+	%tmp0 = bitcast i32* %A to i8*
+	%tmp1 = call %struct.__neon_int32x4x2_t @llvm.arm.neon.vld2.v4i32(i8* %tmp0)
         %tmp2 = extractvalue %struct.__neon_int32x4x2_t %tmp1, 0
         %tmp3 = extractvalue %struct.__neon_int32x4x2_t %tmp1, 1
         %tmp4 = add <4 x i32> %tmp2, %tmp3
@@ -94,10 +100,11 @@ define <4 x i32> @vld2Qi32(i32* %A) nounwind {
 define <4 x float> @vld2Qf(float* %A) nounwind {
 ;CHECK: vld2Qf:
 ;CHECK: vld2.32
-	%tmp1 = call %struct.__neon_float32x4x2_t @llvm.arm.neon.vld2.v4f32(float* %A)
+	%tmp0 = bitcast float* %A to i8*
+	%tmp1 = call %struct.__neon_float32x4x2_t @llvm.arm.neon.vld2.v4f32(i8* %tmp0)
         %tmp2 = extractvalue %struct.__neon_float32x4x2_t %tmp1, 0
         %tmp3 = extractvalue %struct.__neon_float32x4x2_t %tmp1, 1
-        %tmp4 = add <4 x float> %tmp2, %tmp3
+        %tmp4 = fadd <4 x float> %tmp2, %tmp3
 	ret <4 x float> %tmp4
 }
 
diff --git a/test/CodeGen/ARM/vld3.ll b/test/CodeGen/ARM/vld3.ll
index 207dc6a22e45..65a24486bc62 100644
--- a/test/CodeGen/ARM/vld3.ll
+++ b/test/CodeGen/ARM/vld3.ll
@@ -24,7 +24,8 @@ define <8 x i8> @vld3i8(i8* %A) nounwind {
 define <4 x i16> @vld3i16(i16* %A) nounwind {
 ;CHECK: vld3i16:
 ;CHECK: vld3.16
-	%tmp1 = call %struct.__neon_int16x4x3_t @llvm.arm.neon.vld3.v4i16(i16* %A)
+	%tmp0 = bitcast i16* %A to i8*
+	%tmp1 = call %struct.__neon_int16x4x3_t @llvm.arm.neon.vld3.v4i16(i8* %tmp0)
         %tmp2 = extractvalue %struct.__neon_int16x4x3_t %tmp1, 0
         %tmp3 = extractvalue %struct.__neon_int16x4x3_t %tmp1, 2
         %tmp4 = add <4 x i16> %tmp2, %tmp3
@@ -34,7 +35,8 @@ define <4 x i16> @vld3i16(i16* %A) nounwind {
 define <2 x i32> @vld3i32(i32* %A) nounwind {
 ;CHECK: vld3i32:
 ;CHECK: vld3.32
-	%tmp1 = call %struct.__neon_int32x2x3_t @llvm.arm.neon.vld3.v2i32(i32* %A)
+	%tmp0 = bitcast i32* %A to i8*
+	%tmp1 = call %struct.__neon_int32x2x3_t @llvm.arm.neon.vld3.v2i32(i8* %tmp0)
         %tmp2 = extractvalue %struct.__neon_int32x2x3_t %tmp1, 0
         %tmp3 = extractvalue %struct.__neon_int32x2x3_t %tmp1, 2
         %tmp4 = add <2 x i32> %tmp2, %tmp3
@@ -44,17 +46,19 @@ define <2 x i32> @vld3i32(i32* %A) nounwind {
 define <2 x float> @vld3f(float* %A) nounwind {
 ;CHECK: vld3f:
 ;CHECK: vld3.32
-	%tmp1 = call %struct.__neon_float32x2x3_t @llvm.arm.neon.vld3.v2f32(float* %A)
+	%tmp0 = bitcast float* %A to i8*
+	%tmp1 = call %struct.__neon_float32x2x3_t @llvm.arm.neon.vld3.v2f32(i8* %tmp0)
         %tmp2 = extractvalue %struct.__neon_float32x2x3_t %tmp1, 0
         %tmp3 = extractvalue %struct.__neon_float32x2x3_t %tmp1, 2
-        %tmp4 = add <2 x float> %tmp2, %tmp3
+        %tmp4 = fadd <2 x float> %tmp2, %tmp3
 	ret <2 x float> %tmp4
 }
 
 define <1 x i64> @vld3i64(i64* %A) nounwind {
 ;CHECK: vld3i64:
 ;CHECK: vld1.64
-	%tmp1 = call %struct.__neon_int64x1x3_t @llvm.arm.neon.vld3.v1i64(i64* %A)
+	%tmp0 = bitcast i64* %A to i8*
+	%tmp1 = call %struct.__neon_int64x1x3_t @llvm.arm.neon.vld3.v1i64(i8* %tmp0)
         %tmp2 = extractvalue %struct.__neon_int64x1x3_t %tmp1, 0
         %tmp3 = extractvalue %struct.__neon_int64x1x3_t %tmp1, 2
         %tmp4 = add <1 x i64> %tmp2, %tmp3
@@ -76,7 +80,8 @@ define <8 x i16> @vld3Qi16(i16* %A) nounwind {
 ;CHECK: vld3Qi16:
 ;CHECK: vld3.16
 ;CHECK: vld3.16
-	%tmp1 = call %struct.__neon_int16x8x3_t @llvm.arm.neon.vld3.v8i16(i16* %A)
+	%tmp0 = bitcast i16* %A to i8*
+	%tmp1 = call %struct.__neon_int16x8x3_t @llvm.arm.neon.vld3.v8i16(i8* %tmp0)
         %tmp2 = extractvalue %struct.__neon_int16x8x3_t %tmp1, 0
         %tmp3 = extractvalue %struct.__neon_int16x8x3_t %tmp1, 2
         %tmp4 = add <8 x i16> %tmp2, %tmp3
@@ -87,7 +92,8 @@ define <4 x i32> @vld3Qi32(i32* %A) nounwind {
 ;CHECK: vld3Qi32:
 ;CHECK: vld3.32
 ;CHECK: vld3.32
-	%tmp1 = call %struct.__neon_int32x4x3_t @llvm.arm.neon.vld3.v4i32(i32* %A)
+	%tmp0 = bitcast i32* %A to i8*
+	%tmp1 = call %struct.__neon_int32x4x3_t @llvm.arm.neon.vld3.v4i32(i8* %tmp0)
         %tmp2 = extractvalue %struct.__neon_int32x4x3_t %tmp1, 0
         %tmp3 = extractvalue %struct.__neon_int32x4x3_t %tmp1, 2
         %tmp4 = add <4 x i32> %tmp2, %tmp3
@@ -98,10 +104,11 @@ define <4 x float> @vld3Qf(float* %A) nounwind {
 ;CHECK: vld3Qf:
 ;CHECK: vld3.32
 ;CHECK: vld3.32
-	%tmp1 = call %struct.__neon_float32x4x3_t @llvm.arm.neon.vld3.v4f32(float* %A)
+	%tmp0 = bitcast float* %A to i8*
+	%tmp1 = call %struct.__neon_float32x4x3_t @llvm.arm.neon.vld3.v4f32(i8* %tmp0)
         %tmp2 = extractvalue %struct.__neon_float32x4x3_t %tmp1, 0
         %tmp3 = extractvalue %struct.__neon_float32x4x3_t %tmp1, 2
-        %tmp4 = add <4 x float> %tmp2, %tmp3
+        %tmp4 = fadd <4 x float> %tmp2, %tmp3
 	ret <4 x float> %tmp4
 }
 
diff --git a/test/CodeGen/ARM/vld4.ll b/test/CodeGen/ARM/vld4.ll
index 0624f2977ea4..e0b870638a18 100644
--- a/test/CodeGen/ARM/vld4.ll
+++ b/test/CodeGen/ARM/vld4.ll
@@ -24,7 +24,8 @@ define <8 x i8> @vld4i8(i8* %A) nounwind {
 define <4 x i16> @vld4i16(i16* %A) nounwind {
 ;CHECK: vld4i16:
 ;CHECK: vld4.16
-	%tmp1 = call %struct.__neon_int16x4x4_t @llvm.arm.neon.vld4.v4i16(i16* %A)
+	%tmp0 = bitcast i16* %A to i8*
+	%tmp1 = call %struct.__neon_int16x4x4_t @llvm.arm.neon.vld4.v4i16(i8* %tmp0)
         %tmp2 = extractvalue %struct.__neon_int16x4x4_t %tmp1, 0
         %tmp3 = extractvalue %struct.__neon_int16x4x4_t %tmp1, 2
         %tmp4 = add <4 x i16> %tmp2, %tmp3
@@ -34,7 +35,8 @@ define <4 x i16> @vld4i16(i16* %A) nounwind {
 define <2 x i32> @vld4i32(i32* %A) nounwind {
 ;CHECK: vld4i32:
 ;CHECK: vld4.32
-	%tmp1 = call %struct.__neon_int32x2x4_t @llvm.arm.neon.vld4.v2i32(i32* %A)
+	%tmp0 = bitcast i32* %A to i8*
+	%tmp1 = call %struct.__neon_int32x2x4_t @llvm.arm.neon.vld4.v2i32(i8* %tmp0)
         %tmp2 = extractvalue %struct.__neon_int32x2x4_t %tmp1, 0
         %tmp3 = extractvalue %struct.__neon_int32x2x4_t %tmp1, 2
         %tmp4 = add <2 x i32> %tmp2, %tmp3
@@ -44,17 +46,19 @@ define <2 x i32> @vld4i32(i32* %A) nounwind {
 define <2 x float> @vld4f(float* %A) nounwind {
 ;CHECK: vld4f:
 ;CHECK: vld4.32
-	%tmp1 = call %struct.__neon_float32x2x4_t @llvm.arm.neon.vld4.v2f32(float* %A)
+	%tmp0 = bitcast float* %A to i8*
+	%tmp1 = call %struct.__neon_float32x2x4_t @llvm.arm.neon.vld4.v2f32(i8* %tmp0)
         %tmp2 = extractvalue %struct.__neon_float32x2x4_t %tmp1, 0
         %tmp3 = extractvalue %struct.__neon_float32x2x4_t %tmp1, 2
-        %tmp4 = add <2 x float> %tmp2, %tmp3
+        %tmp4 = fadd <2 x float> %tmp2, %tmp3
 	ret <2 x float> %tmp4
 }
 
 define <1 x i64> @vld4i64(i64* %A) nounwind {
 ;CHECK: vld4i64:
 ;CHECK: vld1.64
-	%tmp1 = call %struct.__neon_int64x1x4_t @llvm.arm.neon.vld4.v1i64(i64* %A)
+	%tmp0 = bitcast i64* %A to i8*
+	%tmp1 = call %struct.__neon_int64x1x4_t @llvm.arm.neon.vld4.v1i64(i8* %tmp0)
         %tmp2 = extractvalue %struct.__neon_int64x1x4_t %tmp1, 0
         %tmp3 = extractvalue %struct.__neon_int64x1x4_t %tmp1, 2
         %tmp4 = add <1 x i64> %tmp2, %tmp3
@@ -76,7 +80,8 @@ define <8 x i16> @vld4Qi16(i16* %A) nounwind {
 ;CHECK: vld4Qi16:
 ;CHECK: vld4.16
 ;CHECK: vld4.16
-	%tmp1 = call %struct.__neon_int16x8x4_t @llvm.arm.neon.vld4.v8i16(i16* %A)
+	%tmp0 = bitcast i16* %A to i8*
+	%tmp1 = call %struct.__neon_int16x8x4_t @llvm.arm.neon.vld4.v8i16(i8* %tmp0)
         %tmp2 = extractvalue %struct.__neon_int16x8x4_t %tmp1, 0
         %tmp3 = extractvalue %struct.__neon_int16x8x4_t %tmp1, 2
         %tmp4 = add <8 x i16> %tmp2, %tmp3
@@ -87,7 +92,8 @@ define <4 x i32> @vld4Qi32(i32* %A) nounwind {
 ;CHECK: vld4Qi32:
 ;CHECK: vld4.32
 ;CHECK: vld4.32
-	%tmp1 = call %struct.__neon_int32x4x4_t @llvm.arm.neon.vld4.v4i32(i32* %A)
+	%tmp0 = bitcast i32* %A to i8*
+	%tmp1 = call %struct.__neon_int32x4x4_t @llvm.arm.neon.vld4.v4i32(i8* %tmp0)
         %tmp2 = extractvalue %struct.__neon_int32x4x4_t %tmp1, 0
         %tmp3 = extractvalue %struct.__neon_int32x4x4_t %tmp1, 2
         %tmp4 = add <4 x i32> %tmp2, %tmp3
@@ -98,10 +104,11 @@ define <4 x float> @vld4Qf(float* %A) nounwind {
 ;CHECK: vld4Qf:
 ;CHECK: vld4.32
 ;CHECK: vld4.32
-	%tmp1 = call %struct.__neon_float32x4x4_t @llvm.arm.neon.vld4.v4f32(float* %A)
+	%tmp0 = bitcast float* %A to i8*
+	%tmp1 = call %struct.__neon_float32x4x4_t @llvm.arm.neon.vld4.v4f32(i8* %tmp0)
         %tmp2 = extractvalue %struct.__neon_float32x4x4_t %tmp1, 0
         %tmp3 = extractvalue %struct.__neon_float32x4x4_t %tmp1, 2
-        %tmp4 = add <4 x float> %tmp2, %tmp3
+        %tmp4 = fadd <4 x float> %tmp2, %tmp3
 	ret <4 x float> %tmp4
 }
 
diff --git a/test/CodeGen/ARM/vldlane.ll b/test/CodeGen/ARM/vldlane.ll
index 53881a3f924e..b32c59019f4c 100644
--- a/test/CodeGen/ARM/vldlane.ll
+++ b/test/CodeGen/ARM/vldlane.ll
@@ -23,8 +23,9 @@ define <8 x i8> @vld2lanei8(i8* %A, <8 x i8>* %B) nounwind {
 define <4 x i16> @vld2lanei16(i16* %A, <4 x i16>* %B) nounwind {
 ;CHECK: vld2lanei16:
 ;CHECK: vld2.16
+	%tmp0 = bitcast i16* %A to i8*
 	%tmp1 = load <4 x i16>* %B
-	%tmp2 = call %struct.__neon_int16x4x2_t @llvm.arm.neon.vld2lane.v4i16(i16* %A, <4 x i16> %tmp1, <4 x i16> %tmp1, i32 1)
+	%tmp2 = call %struct.__neon_int16x4x2_t @llvm.arm.neon.vld2lane.v4i16(i8* %tmp0, <4 x i16> %tmp1, <4 x i16> %tmp1, i32 1)
         %tmp3 = extractvalue %struct.__neon_int16x4x2_t %tmp2, 0
         %tmp4 = extractvalue %struct.__neon_int16x4x2_t %tmp2, 1
         %tmp5 = add <4 x i16> %tmp3, %tmp4
@@ -34,8 +35,9 @@ define <4 x i16> @vld2lanei16(i16* %A, <4 x i16>* %B) nounwind {
 define <2 x i32> @vld2lanei32(i32* %A, <2 x i32>* %B) nounwind {
 ;CHECK: vld2lanei32:
 ;CHECK: vld2.32
+	%tmp0 = bitcast i32* %A to i8*
 	%tmp1 = load <2 x i32>* %B
-	%tmp2 = call %struct.__neon_int32x2x2_t @llvm.arm.neon.vld2lane.v2i32(i32* %A, <2 x i32> %tmp1, <2 x i32> %tmp1, i32 1)
+	%tmp2 = call %struct.__neon_int32x2x2_t @llvm.arm.neon.vld2lane.v2i32(i8* %tmp0, <2 x i32> %tmp1, <2 x i32> %tmp1, i32 1)
         %tmp3 = extractvalue %struct.__neon_int32x2x2_t %tmp2, 0
         %tmp4 = extractvalue %struct.__neon_int32x2x2_t %tmp2, 1
         %tmp5 = add <2 x i32> %tmp3, %tmp4
@@ -45,19 +47,21 @@ define <2 x i32> @vld2lanei32(i32* %A, <2 x i32>* %B) nounwind {
 define <2 x float> @vld2lanef(float* %A, <2 x float>* %B) nounwind {
 ;CHECK: vld2lanef:
 ;CHECK: vld2.32
+	%tmp0 = bitcast float* %A to i8*
 	%tmp1 = load <2 x float>* %B
-	%tmp2 = call %struct.__neon_float32x2x2_t @llvm.arm.neon.vld2lane.v2f32(float* %A, <2 x float> %tmp1, <2 x float> %tmp1, i32 1)
+	%tmp2 = call %struct.__neon_float32x2x2_t @llvm.arm.neon.vld2lane.v2f32(i8* %tmp0, <2 x float> %tmp1, <2 x float> %tmp1, i32 1)
         %tmp3 = extractvalue %struct.__neon_float32x2x2_t %tmp2, 0
         %tmp4 = extractvalue %struct.__neon_float32x2x2_t %tmp2, 1
-        %tmp5 = add <2 x float> %tmp3, %tmp4
+        %tmp5 = fadd <2 x float> %tmp3, %tmp4
 	ret <2 x float> %tmp5
 }
 
 define <8 x i16> @vld2laneQi16(i16* %A, <8 x i16>* %B) nounwind {
 ;CHECK: vld2laneQi16:
 ;CHECK: vld2.16
+	%tmp0 = bitcast i16* %A to i8*
 	%tmp1 = load <8 x i16>* %B
-	%tmp2 = call %struct.__neon_int16x8x2_t @llvm.arm.neon.vld2lane.v8i16(i16* %A, <8 x i16> %tmp1, <8 x i16> %tmp1, i32 1)
+	%tmp2 = call %struct.__neon_int16x8x2_t @llvm.arm.neon.vld2lane.v8i16(i8* %tmp0, <8 x i16> %tmp1, <8 x i16> %tmp1, i32 1)
         %tmp3 = extractvalue %struct.__neon_int16x8x2_t %tmp2, 0
         %tmp4 = extractvalue %struct.__neon_int16x8x2_t %tmp2, 1
         %tmp5 = add <8 x i16> %tmp3, %tmp4
@@ -67,8 +71,9 @@ define <8 x i16> @vld2laneQi16(i16* %A, <8 x i16>* %B) nounwind {
 define <4 x i32> @vld2laneQi32(i32* %A, <4 x i32>* %B) nounwind {
 ;CHECK: vld2laneQi32:
 ;CHECK: vld2.32
+	%tmp0 = bitcast i32* %A to i8*
 	%tmp1 = load <4 x i32>* %B
-	%tmp2 = call %struct.__neon_int32x4x2_t @llvm.arm.neon.vld2lane.v4i32(i32* %A, <4 x i32> %tmp1, <4 x i32> %tmp1, i32 2)
+	%tmp2 = call %struct.__neon_int32x4x2_t @llvm.arm.neon.vld2lane.v4i32(i8* %tmp0, <4 x i32> %tmp1, <4 x i32> %tmp1, i32 2)
         %tmp3 = extractvalue %struct.__neon_int32x4x2_t %tmp2, 0
         %tmp4 = extractvalue %struct.__neon_int32x4x2_t %tmp2, 1
         %tmp5 = add <4 x i32> %tmp3, %tmp4
@@ -78,11 +83,12 @@ define <4 x i32> @vld2laneQi32(i32* %A, <4 x i32>* %B) nounwind {
 define <4 x float> @vld2laneQf(float* %A, <4 x float>* %B) nounwind {
 ;CHECK: vld2laneQf:
 ;CHECK: vld2.32
+	%tmp0 = bitcast float* %A to i8*
 	%tmp1 = load <4 x float>* %B
-	%tmp2 = call %struct.__neon_float32x4x2_t @llvm.arm.neon.vld2lane.v4f32(float* %A, <4 x float> %tmp1, <4 x float> %tmp1, i32 1)
+	%tmp2 = call %struct.__neon_float32x4x2_t @llvm.arm.neon.vld2lane.v4f32(i8* %tmp0, <4 x float> %tmp1, <4 x float> %tmp1, i32 1)
         %tmp3 = extractvalue %struct.__neon_float32x4x2_t %tmp2, 0
         %tmp4 = extractvalue %struct.__neon_float32x4x2_t %tmp2, 1
-        %tmp5 = add <4 x float> %tmp3, %tmp4
+        %tmp5 = fadd <4 x float> %tmp3, %tmp4
 	ret <4 x float> %tmp5
 }
 
@@ -120,8 +126,9 @@ define <8 x i8> @vld3lanei8(i8* %A, <8 x i8>* %B) nounwind {
 define <4 x i16> @vld3lanei16(i16* %A, <4 x i16>* %B) nounwind {
 ;CHECK: vld3lanei16:
 ;CHECK: vld3.16
+	%tmp0 = bitcast i16* %A to i8*
 	%tmp1 = load <4 x i16>* %B
-	%tmp2 = call %struct.__neon_int16x4x3_t @llvm.arm.neon.vld3lane.v4i16(i16* %A, <4 x i16> %tmp1, <4 x i16> %tmp1, <4 x i16> %tmp1, i32 1)
+	%tmp2 = call %struct.__neon_int16x4x3_t @llvm.arm.neon.vld3lane.v4i16(i8* %tmp0, <4 x i16> %tmp1, <4 x i16> %tmp1, <4 x i16> %tmp1, i32 1)
         %tmp3 = extractvalue %struct.__neon_int16x4x3_t %tmp2, 0
         %tmp4 = extractvalue %struct.__neon_int16x4x3_t %tmp2, 1
         %tmp5 = extractvalue %struct.__neon_int16x4x3_t %tmp2, 2
@@ -133,8 +140,9 @@ define <4 x i16> @vld3lanei16(i16* %A, <4 x i16>* %B) nounwind {
 define <2 x i32> @vld3lanei32(i32* %A, <2 x i32>* %B) nounwind {
 ;CHECK: vld3lanei32:
 ;CHECK: vld3.32
+	%tmp0 = bitcast i32* %A to i8*
 	%tmp1 = load <2 x i32>* %B
-	%tmp2 = call %struct.__neon_int32x2x3_t @llvm.arm.neon.vld3lane.v2i32(i32* %A, <2 x i32> %tmp1, <2 x i32> %tmp1, <2 x i32> %tmp1, i32 1)
+	%tmp2 = call %struct.__neon_int32x2x3_t @llvm.arm.neon.vld3lane.v2i32(i8* %tmp0, <2 x i32> %tmp1, <2 x i32> %tmp1, <2 x i32> %tmp1, i32 1)
         %tmp3 = extractvalue %struct.__neon_int32x2x3_t %tmp2, 0
         %tmp4 = extractvalue %struct.__neon_int32x2x3_t %tmp2, 1
         %tmp5 = extractvalue %struct.__neon_int32x2x3_t %tmp2, 2
@@ -146,21 +154,23 @@ define <2 x i32> @vld3lanei32(i32* %A, <2 x i32>* %B) nounwind {
 define <2 x float> @vld3lanef(float* %A, <2 x float>* %B) nounwind {
 ;CHECK: vld3lanef:
 ;CHECK: vld3.32
+	%tmp0 = bitcast float* %A to i8*
 	%tmp1 = load <2 x float>* %B
-	%tmp2 = call %struct.__neon_float32x2x3_t @llvm.arm.neon.vld3lane.v2f32(float* %A, <2 x float> %tmp1, <2 x float> %tmp1, <2 x float> %tmp1, i32 1)
+	%tmp2 = call %struct.__neon_float32x2x3_t @llvm.arm.neon.vld3lane.v2f32(i8* %tmp0, <2 x float> %tmp1, <2 x float> %tmp1, <2 x float> %tmp1, i32 1)
         %tmp3 = extractvalue %struct.__neon_float32x2x3_t %tmp2, 0
         %tmp4 = extractvalue %struct.__neon_float32x2x3_t %tmp2, 1
         %tmp5 = extractvalue %struct.__neon_float32x2x3_t %tmp2, 2
-        %tmp6 = add <2 x float> %tmp3, %tmp4
-        %tmp7 = add <2 x float> %tmp5, %tmp6
+        %tmp6 = fadd <2 x float> %tmp3, %tmp4
+        %tmp7 = fadd <2 x float> %tmp5, %tmp6
 	ret <2 x float> %tmp7
 }
 
 define <8 x i16> @vld3laneQi16(i16* %A, <8 x i16>* %B) nounwind {
 ;CHECK: vld3laneQi16:
 ;CHECK: vld3.16
+	%tmp0 = bitcast i16* %A to i8*
 	%tmp1 = load <8 x i16>* %B
-	%tmp2 = call %struct.__neon_int16x8x3_t @llvm.arm.neon.vld3lane.v8i16(i16* %A, <8 x i16> %tmp1, <8 x i16> %tmp1, <8 x i16> %tmp1, i32 1)
+	%tmp2 = call %struct.__neon_int16x8x3_t @llvm.arm.neon.vld3lane.v8i16(i8* %tmp0, <8 x i16> %tmp1, <8 x i16> %tmp1, <8 x i16> %tmp1, i32 1)
         %tmp3 = extractvalue %struct.__neon_int16x8x3_t %tmp2, 0
         %tmp4 = extractvalue %struct.__neon_int16x8x3_t %tmp2, 1
         %tmp5 = extractvalue %struct.__neon_int16x8x3_t %tmp2, 2
@@ -172,8 +182,9 @@ define <8 x i16> @vld3laneQi16(i16* %A, <8 x i16>* %B) nounwind {
 define <4 x i32> @vld3laneQi32(i32* %A, <4 x i32>* %B) nounwind {
 ;CHECK: vld3laneQi32:
 ;CHECK: vld3.32
+	%tmp0 = bitcast i32* %A to i8*
 	%tmp1 = load <4 x i32>* %B
-	%tmp2 = call %struct.__neon_int32x4x3_t @llvm.arm.neon.vld3lane.v4i32(i32* %A, <4 x i32> %tmp1, <4 x i32> %tmp1, <4 x i32> %tmp1, i32 3)
+	%tmp2 = call %struct.__neon_int32x4x3_t @llvm.arm.neon.vld3lane.v4i32(i8* %tmp0, <4 x i32> %tmp1, <4 x i32> %tmp1, <4 x i32> %tmp1, i32 3)
         %tmp3 = extractvalue %struct.__neon_int32x4x3_t %tmp2, 0
         %tmp4 = extractvalue %struct.__neon_int32x4x3_t %tmp2, 1
         %tmp5 = extractvalue %struct.__neon_int32x4x3_t %tmp2, 2
@@ -185,13 +196,14 @@ define <4 x i32> @vld3laneQi32(i32* %A, <4 x i32>* %B) nounwind {
 define <4 x float> @vld3laneQf(float* %A, <4 x float>* %B) nounwind {
 ;CHECK: vld3laneQf:
 ;CHECK: vld3.32
+	%tmp0 = bitcast float* %A to i8*
 	%tmp1 = load <4 x float>* %B
-	%tmp2 = call %struct.__neon_float32x4x3_t @llvm.arm.neon.vld3lane.v4f32(float* %A, <4 x float> %tmp1, <4 x float> %tmp1, <4 x float> %tmp1, i32 1)
+	%tmp2 = call %struct.__neon_float32x4x3_t @llvm.arm.neon.vld3lane.v4f32(i8* %tmp0, <4 x float> %tmp1, <4 x float> %tmp1, <4 x float> %tmp1, i32 1)
         %tmp3 = extractvalue %struct.__neon_float32x4x3_t %tmp2, 0
         %tmp4 = extractvalue %struct.__neon_float32x4x3_t %tmp2, 1
         %tmp5 = extractvalue %struct.__neon_float32x4x3_t %tmp2, 2
-        %tmp6 = add <4 x float> %tmp3, %tmp4
-        %tmp7 = add <4 x float> %tmp5, %tmp6
+        %tmp6 = fadd <4 x float> %tmp3, %tmp4
+        %tmp7 = fadd <4 x float> %tmp5, %tmp6
 	ret <4 x float> %tmp7
 }
 
@@ -231,8 +243,9 @@ define <8 x i8> @vld4lanei8(i8* %A, <8 x i8>* %B) nounwind {
 define <4 x i16> @vld4lanei16(i16* %A, <4 x i16>* %B) nounwind {
 ;CHECK: vld4lanei16:
 ;CHECK: vld4.16
+	%tmp0 = bitcast i16* %A to i8*
 	%tmp1 = load <4 x i16>* %B
-	%tmp2 = call %struct.__neon_int16x4x4_t @llvm.arm.neon.vld4lane.v4i16(i16* %A, <4 x i16> %tmp1, <4 x i16> %tmp1, <4 x i16> %tmp1, <4 x i16> %tmp1, i32 1)
+	%tmp2 = call %struct.__neon_int16x4x4_t @llvm.arm.neon.vld4lane.v4i16(i8* %tmp0, <4 x i16> %tmp1, <4 x i16> %tmp1, <4 x i16> %tmp1, <4 x i16> %tmp1, i32 1)
         %tmp3 = extractvalue %struct.__neon_int16x4x4_t %tmp2, 0
         %tmp4 = extractvalue %struct.__neon_int16x4x4_t %tmp2, 1
         %tmp5 = extractvalue %struct.__neon_int16x4x4_t %tmp2, 2
@@ -246,8 +259,9 @@ define <4 x i16> @vld4lanei16(i16* %A, <4 x i16>* %B) nounwind {
 define <2 x i32> @vld4lanei32(i32* %A, <2 x i32>* %B) nounwind {
 ;CHECK: vld4lanei32:
 ;CHECK: vld4.32
+	%tmp0 = bitcast i32* %A to i8*
 	%tmp1 = load <2 x i32>* %B
-	%tmp2 = call %struct.__neon_int32x2x4_t @llvm.arm.neon.vld4lane.v2i32(i32* %A, <2 x i32> %tmp1, <2 x i32> %tmp1, <2 x i32> %tmp1, <2 x i32> %tmp1, i32 1)
+	%tmp2 = call %struct.__neon_int32x2x4_t @llvm.arm.neon.vld4lane.v2i32(i8* %tmp0, <2 x i32> %tmp1, <2 x i32> %tmp1, <2 x i32> %tmp1, <2 x i32> %tmp1, i32 1)
         %tmp3 = extractvalue %struct.__neon_int32x2x4_t %tmp2, 0
         %tmp4 = extractvalue %struct.__neon_int32x2x4_t %tmp2, 1
         %tmp5 = extractvalue %struct.__neon_int32x2x4_t %tmp2, 2
@@ -261,23 +275,25 @@ define <2 x i32> @vld4lanei32(i32* %A, <2 x i32>* %B) nounwind {
 define <2 x float> @vld4lanef(float* %A, <2 x float>* %B) nounwind {
 ;CHECK: vld4lanef:
 ;CHECK: vld4.32
+	%tmp0 = bitcast float* %A to i8*
 	%tmp1 = load <2 x float>* %B
-	%tmp2 = call %struct.__neon_float32x2x4_t @llvm.arm.neon.vld4lane.v2f32(float* %A, <2 x float> %tmp1, <2 x float> %tmp1, <2 x float> %tmp1, <2 x float> %tmp1, i32 1)
+	%tmp2 = call %struct.__neon_float32x2x4_t @llvm.arm.neon.vld4lane.v2f32(i8* %tmp0, <2 x float> %tmp1, <2 x float> %tmp1, <2 x float> %tmp1, <2 x float> %tmp1, i32 1)
         %tmp3 = extractvalue %struct.__neon_float32x2x4_t %tmp2, 0
         %tmp4 = extractvalue %struct.__neon_float32x2x4_t %tmp2, 1
         %tmp5 = extractvalue %struct.__neon_float32x2x4_t %tmp2, 2
         %tmp6 = extractvalue %struct.__neon_float32x2x4_t %tmp2, 3
-        %tmp7 = add <2 x float> %tmp3, %tmp4
-        %tmp8 = add <2 x float> %tmp5, %tmp6
-        %tmp9 = add <2 x float> %tmp7, %tmp8
+        %tmp7 = fadd <2 x float> %tmp3, %tmp4
+        %tmp8 = fadd <2 x float> %tmp5, %tmp6
+        %tmp9 = fadd <2 x float> %tmp7, %tmp8
 	ret <2 x float> %tmp9
 }
 
 define <8 x i16> @vld4laneQi16(i16* %A, <8 x i16>* %B) nounwind {
 ;CHECK: vld4laneQi16:
 ;CHECK: vld4.16
+	%tmp0 = bitcast i16* %A to i8*
 	%tmp1 = load <8 x i16>* %B
-	%tmp2 = call %struct.__neon_int16x8x4_t @llvm.arm.neon.vld4lane.v8i16(i16* %A, <8 x i16> %tmp1, <8 x i16> %tmp1, <8 x i16> %tmp1, <8 x i16> %tmp1, i32 1)
+	%tmp2 = call %struct.__neon_int16x8x4_t @llvm.arm.neon.vld4lane.v8i16(i8* %tmp0, <8 x i16> %tmp1, <8 x i16> %tmp1, <8 x i16> %tmp1, <8 x i16> %tmp1, i32 1)
         %tmp3 = extractvalue %struct.__neon_int16x8x4_t %tmp2, 0
         %tmp4 = extractvalue %struct.__neon_int16x8x4_t %tmp2, 1
         %tmp5 = extractvalue %struct.__neon_int16x8x4_t %tmp2, 2
@@ -291,8 +307,9 @@ define <8 x i16> @vld4laneQi16(i16* %A, <8 x i16>* %B) nounwind {
 define <4 x i32> @vld4laneQi32(i32* %A, <4 x i32>* %B) nounwind {
 ;CHECK: vld4laneQi32:
 ;CHECK: vld4.32
+	%tmp0 = bitcast i32* %A to i8*
 	%tmp1 = load <4 x i32>* %B
-	%tmp2 = call %struct.__neon_int32x4x4_t @llvm.arm.neon.vld4lane.v4i32(i32* %A, <4 x i32> %tmp1, <4 x i32> %tmp1, <4 x i32> %tmp1, <4 x i32> %tmp1, i32 1)
+	%tmp2 = call %struct.__neon_int32x4x4_t @llvm.arm.neon.vld4lane.v4i32(i8* %tmp0, <4 x i32> %tmp1, <4 x i32> %tmp1, <4 x i32> %tmp1, <4 x i32> %tmp1, i32 1)
         %tmp3 = extractvalue %struct.__neon_int32x4x4_t %tmp2, 0
         %tmp4 = extractvalue %struct.__neon_int32x4x4_t %tmp2, 1
         %tmp5 = extractvalue %struct.__neon_int32x4x4_t %tmp2, 2
@@ -306,15 +323,16 @@ define <4 x i32> @vld4laneQi32(i32* %A, <4 x i32>* %B) nounwind {
 define <4 x float> @vld4laneQf(float* %A, <4 x float>* %B) nounwind {
 ;CHECK: vld4laneQf:
 ;CHECK: vld4.32
+	%tmp0 = bitcast float* %A to i8*
 	%tmp1 = load <4 x float>* %B
-	%tmp2 = call %struct.__neon_float32x4x4_t @llvm.arm.neon.vld4lane.v4f32(float* %A, <4 x float> %tmp1, <4 x float> %tmp1, <4 x float> %tmp1, <4 x float> %tmp1, i32 1)
+	%tmp2 = call %struct.__neon_float32x4x4_t @llvm.arm.neon.vld4lane.v4f32(i8* %tmp0, <4 x float> %tmp1, <4 x float> %tmp1, <4 x float> %tmp1, <4 x float> %tmp1, i32 1)
         %tmp3 = extractvalue %struct.__neon_float32x4x4_t %tmp2, 0
         %tmp4 = extractvalue %struct.__neon_float32x4x4_t %tmp2, 1
         %tmp5 = extractvalue %struct.__neon_float32x4x4_t %tmp2, 2
         %tmp6 = extractvalue %struct.__neon_float32x4x4_t %tmp2, 3
-        %tmp7 = add <4 x float> %tmp3, %tmp4
-        %tmp8 = add <4 x float> %tmp5, %tmp6
-        %tmp9 = add <4 x float> %tmp7, %tmp8
+        %tmp7 = fadd <4 x float> %tmp3, %tmp4
+        %tmp8 = fadd <4 x float> %tmp5, %tmp6
+        %tmp9 = fadd <4 x float> %tmp7, %tmp8
 	ret <4 x float> %tmp9
 }
 
diff --git a/test/CodeGen/ARM/vmla.ll b/test/CodeGen/ARM/vmla.ll
index 840521827413..77cf10ad3e68 100644
--- a/test/CodeGen/ARM/vmla.ll
+++ b/test/CodeGen/ARM/vmla.ll
@@ -39,8 +39,8 @@ define <2 x float> @vmlaf32(<2 x float>* %A, <2 x float>* %B, <2 x float>* %C) n
 	%tmp1 = load <2 x float>* %A
 	%tmp2 = load <2 x float>* %B
 	%tmp3 = load <2 x float>* %C
-	%tmp4 = mul <2 x float> %tmp2, %tmp3
-	%tmp5 = add <2 x float> %tmp1, %tmp4
+	%tmp4 = fmul <2 x float> %tmp2, %tmp3
+	%tmp5 = fadd <2 x float> %tmp1, %tmp4
 	ret <2 x float> %tmp5
 }
 
@@ -83,8 +83,8 @@ define <4 x float> @vmlaQf32(<4 x float>* %A, <4 x float>* %B, <4 x float>* %C)
 	%tmp1 = load <4 x float>* %A
 	%tmp2 = load <4 x float>* %B
 	%tmp3 = load <4 x float>* %C
-	%tmp4 = mul <4 x float> %tmp2, %tmp3
-	%tmp5 = add <4 x float> %tmp1, %tmp4
+	%tmp4 = fmul <4 x float> %tmp2, %tmp3
+	%tmp5 = fadd <4 x float> %tmp1, %tmp4
 	ret <4 x float> %tmp5
 }
 
diff --git a/test/CodeGen/ARM/vmls.ll b/test/CodeGen/ARM/vmls.ll
index c89552e6f9ea..2b70a7878ced 100644
--- a/test/CodeGen/ARM/vmls.ll
+++ b/test/CodeGen/ARM/vmls.ll
@@ -39,8 +39,8 @@ define <2 x float> @vmlsf32(<2 x float>* %A, <2 x float>* %B, <2 x float>* %C) n
 	%tmp1 = load <2 x float>* %A
 	%tmp2 = load <2 x float>* %B
 	%tmp3 = load <2 x float>* %C
-	%tmp4 = mul <2 x float> %tmp2, %tmp3
-	%tmp5 = sub <2 x float> %tmp1, %tmp4
+	%tmp4 = fmul <2 x float> %tmp2, %tmp3
+	%tmp5 = fsub <2 x float> %tmp1, %tmp4
 	ret <2 x float> %tmp5
 }
 
@@ -83,8 +83,8 @@ define <4 x float> @vmlsQf32(<4 x float>* %A, <4 x float>* %B, <4 x float>* %C)
 	%tmp1 = load <4 x float>* %A
 	%tmp2 = load <4 x float>* %B
 	%tmp3 = load <4 x float>* %C
-	%tmp4 = mul <4 x float> %tmp2, %tmp3
-	%tmp5 = sub <4 x float> %tmp1, %tmp4
+	%tmp4 = fmul <4 x float> %tmp2, %tmp3
+	%tmp5 = fsub <4 x float> %tmp1, %tmp4
 	ret <4 x float> %tmp5
 }
 
diff --git a/test/CodeGen/ARM/vmul.ll b/test/CodeGen/ARM/vmul.ll
index 325da5deabe5..1d9168021279 100644
--- a/test/CodeGen/ARM/vmul.ll
+++ b/test/CodeGen/ARM/vmul.ll
@@ -32,7 +32,7 @@ define <2 x float> @vmulf32(<2 x float>* %A, <2 x float>* %B) nounwind {
 ;CHECK: vmul.f32
 	%tmp1 = load <2 x float>* %A
 	%tmp2 = load <2 x float>* %B
-	%tmp3 = mul <2 x float> %tmp1, %tmp2
+	%tmp3 = fmul <2 x float> %tmp1, %tmp2
 	ret <2 x float> %tmp3
 }
 
@@ -77,7 +77,7 @@ define <4 x float> @vmulQf32(<4 x float>* %A, <4 x float>* %B) nounwind {
 ;CHECK: vmul.f32
 	%tmp1 = load <4 x float>* %A
 	%tmp2 = load <4 x float>* %B
-	%tmp3 = mul <4 x float> %tmp1, %tmp2
+	%tmp3 = fmul <4 x float> %tmp1, %tmp2
 	ret <4 x float> %tmp3
 }
 
diff --git a/test/CodeGen/ARM/vneg.ll b/test/CodeGen/ARM/vneg.ll
index 7764e87c6ac6..4a10732458e8 100644
--- a/test/CodeGen/ARM/vneg.ll
+++ b/test/CodeGen/ARM/vneg.ll
@@ -28,7 +28,7 @@ define <2 x float> @vnegf32(<2 x float>* %A) nounwind {
 ;CHECK: vnegf32:
 ;CHECK: vneg.f32
 	%tmp1 = load <2 x float>* %A
-	%tmp2 = sub <2 x float> < float -0.000000e+00, float -0.000000e+00 >, %tmp1
+	%tmp2 = fsub <2 x float> < float -0.000000e+00, float -0.000000e+00 >, %tmp1
 	ret <2 x float> %tmp2
 }
 
@@ -60,7 +60,7 @@ define <4 x float> @vnegQf32(<4 x float>* %A) nounwind {
 ;CHECK: vnegQf32:
 ;CHECK: vneg.f32
 	%tmp1 = load <4 x float>* %A
-	%tmp2 = sub <4 x float> < float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00 >, %tmp1
+	%tmp2 = fsub <4 x float> < float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00 >, %tmp1
 	ret <4 x float> %tmp2
 }
 
diff --git a/test/CodeGen/ARM/vst1.ll b/test/CodeGen/ARM/vst1.ll
index 602b124ffad9..95414c308914 100644
--- a/test/CodeGen/ARM/vst1.ll
+++ b/test/CodeGen/ARM/vst1.ll
@@ -11,32 +11,36 @@ define void @vst1i8(i8* %A, <8 x i8>* %B) nounwind {
 define void @vst1i16(i16* %A, <4 x i16>* %B) nounwind {
 ;CHECK: vst1i16:
 ;CHECK: vst1.16
+	%tmp0 = bitcast i16* %A to i8*
 	%tmp1 = load <4 x i16>* %B
-	call void @llvm.arm.neon.vst1.v4i16(i16* %A, <4 x i16> %tmp1)
+	call void @llvm.arm.neon.vst1.v4i16(i8* %tmp0, <4 x i16> %tmp1)
 	ret void
 }
 
 define void @vst1i32(i32* %A, <2 x i32>* %B) nounwind {
 ;CHECK: vst1i32:
 ;CHECK: vst1.32
+	%tmp0 = bitcast i32* %A to i8*
 	%tmp1 = load <2 x i32>* %B
-	call void @llvm.arm.neon.vst1.v2i32(i32* %A, <2 x i32> %tmp1)
+	call void @llvm.arm.neon.vst1.v2i32(i8* %tmp0, <2 x i32> %tmp1)
 	ret void
 }
 
 define void @vst1f(float* %A, <2 x float>* %B) nounwind {
 ;CHECK: vst1f:
 ;CHECK: vst1.32
+	%tmp0 = bitcast float* %A to i8*
 	%tmp1 = load <2 x float>* %B
-	call void @llvm.arm.neon.vst1.v2f32(float* %A, <2 x float> %tmp1)
+	call void @llvm.arm.neon.vst1.v2f32(i8* %tmp0, <2 x float> %tmp1)
 	ret void
 }
 
 define void @vst1i64(i64* %A, <1 x i64>* %B) nounwind {
 ;CHECK: vst1i64:
 ;CHECK: vst1.64
+	%tmp0 = bitcast i64* %A to i8*
 	%tmp1 = load <1 x i64>* %B
-	call void @llvm.arm.neon.vst1.v1i64(i64* %A, <1 x i64> %tmp1)
+	call void @llvm.arm.neon.vst1.v1i64(i8* %tmp0, <1 x i64> %tmp1)
 	ret void
 }
 
@@ -51,32 +55,36 @@ define void @vst1Qi8(i8* %A, <16 x i8>* %B) nounwind {
 define void @vst1Qi16(i16* %A, <8 x i16>* %B) nounwind {
 ;CHECK: vst1Qi16:
 ;CHECK: vst1.16
+	%tmp0 = bitcast i16* %A to i8*
 	%tmp1 = load <8 x i16>* %B
-	call void @llvm.arm.neon.vst1.v8i16(i16* %A, <8 x i16> %tmp1)
+	call void @llvm.arm.neon.vst1.v8i16(i8* %tmp0, <8 x i16> %tmp1)
 	ret void
 }
 
 define void @vst1Qi32(i32* %A, <4 x i32>* %B) nounwind {
 ;CHECK: vst1Qi32:
 ;CHECK: vst1.32
+	%tmp0 = bitcast i32* %A to i8*
 	%tmp1 = load <4 x i32>* %B
-	call void @llvm.arm.neon.vst1.v4i32(i32* %A, <4 x i32> %tmp1)
+	call void @llvm.arm.neon.vst1.v4i32(i8* %tmp0, <4 x i32> %tmp1)
 	ret void
 }
 
 define void @vst1Qf(float* %A, <4 x float>* %B) nounwind {
 ;CHECK: vst1Qf:
 ;CHECK: vst1.32
+	%tmp0 = bitcast float* %A to i8*
 	%tmp1 = load <4 x float>* %B
-	call void @llvm.arm.neon.vst1.v4f32(float* %A, <4 x float> %tmp1)
+	call void @llvm.arm.neon.vst1.v4f32(i8* %tmp0, <4 x float> %tmp1)
 	ret void
 }
 
 define void @vst1Qi64(i64* %A, <2 x i64>* %B) nounwind {
 ;CHECK: vst1Qi64:
 ;CHECK: vst1.64
+	%tmp0 = bitcast i64* %A to i8*
 	%tmp1 = load <2 x i64>* %B
-	call void @llvm.arm.neon.vst1.v2i64(i64* %A, <2 x i64> %tmp1)
+	call void @llvm.arm.neon.vst1.v2i64(i8* %tmp0, <2 x i64> %tmp1)
 	ret void
 }
 
diff --git a/test/CodeGen/ARM/vst2.ll b/test/CodeGen/ARM/vst2.ll
index 17d6bee0f56c..3c98a2cbe60d 100644
--- a/test/CodeGen/ARM/vst2.ll
+++ b/test/CodeGen/ARM/vst2.ll
@@ -11,32 +11,36 @@ define void @vst2i8(i8* %A, <8 x i8>* %B) nounwind {
 define void @vst2i16(i16* %A, <4 x i16>* %B) nounwind {
 ;CHECK: vst2i16:
 ;CHECK: vst2.16
+	%tmp0 = bitcast i16* %A to i8*
 	%tmp1 = load <4 x i16>* %B
-	call void @llvm.arm.neon.vst2.v4i16(i16* %A, <4 x i16> %tmp1, <4 x i16> %tmp1)
+	call void @llvm.arm.neon.vst2.v4i16(i8* %tmp0, <4 x i16> %tmp1, <4 x i16> %tmp1)
 	ret void
 }
 
 define void @vst2i32(i32* %A, <2 x i32>* %B) nounwind {
 ;CHECK: vst2i32:
 ;CHECK: vst2.32
+	%tmp0 = bitcast i32* %A to i8*
 	%tmp1 = load <2 x i32>* %B
-	call void @llvm.arm.neon.vst2.v2i32(i32* %A, <2 x i32> %tmp1, <2 x i32> %tmp1)
+	call void @llvm.arm.neon.vst2.v2i32(i8* %tmp0, <2 x i32> %tmp1, <2 x i32> %tmp1)
 	ret void
 }
 
 define void @vst2f(float* %A, <2 x float>* %B) nounwind {
 ;CHECK: vst2f:
 ;CHECK: vst2.32
+	%tmp0 = bitcast float* %A to i8*
 	%tmp1 = load <2 x float>* %B
-	call void @llvm.arm.neon.vst2.v2f32(float* %A, <2 x float> %tmp1, <2 x float> %tmp1)
+	call void @llvm.arm.neon.vst2.v2f32(i8* %tmp0, <2 x float> %tmp1, <2 x float> %tmp1)
 	ret void
 }
 
 define void @vst2i64(i64* %A, <1 x i64>* %B) nounwind {
 ;CHECK: vst2i64:
 ;CHECK: vst1.64
+	%tmp0 = bitcast i64* %A to i8*
 	%tmp1 = load <1 x i64>* %B
-	call void @llvm.arm.neon.vst2.v1i64(i64* %A, <1 x i64> %tmp1, <1 x i64> %tmp1)
+	call void @llvm.arm.neon.vst2.v1i64(i8* %tmp0, <1 x i64> %tmp1, <1 x i64> %tmp1)
 	ret void
 }
 
@@ -51,24 +55,27 @@ define void @vst2Qi8(i8* %A, <16 x i8>* %B) nounwind {
 define void @vst2Qi16(i16* %A, <8 x i16>* %B) nounwind {
 ;CHECK: vst2Qi16:
 ;CHECK: vst2.16
+	%tmp0 = bitcast i16* %A to i8*
 	%tmp1 = load <8 x i16>* %B
-	call void @llvm.arm.neon.vst2.v8i16(i16* %A, <8 x i16> %tmp1, <8 x i16> %tmp1)
+	call void @llvm.arm.neon.vst2.v8i16(i8* %tmp0, <8 x i16> %tmp1, <8 x i16> %tmp1)
 	ret void
 }
 
 define void @vst2Qi32(i32* %A, <4 x i32>* %B) nounwind {
 ;CHECK: vst2Qi32:
 ;CHECK: vst2.32
+	%tmp0 = bitcast i32* %A to i8*
 	%tmp1 = load <4 x i32>* %B
-	call void @llvm.arm.neon.vst2.v4i32(i32* %A, <4 x i32> %tmp1, <4 x i32> %tmp1)
+	call void @llvm.arm.neon.vst2.v4i32(i8* %tmp0, <4 x i32> %tmp1, <4 x i32> %tmp1)
 	ret void
 }
 
 define void @vst2Qf(float* %A, <4 x float>* %B) nounwind {
 ;CHECK: vst2Qf:
 ;CHECK: vst2.32
+	%tmp0 = bitcast float* %A to i8*
 	%tmp1 = load <4 x float>* %B
-	call void @llvm.arm.neon.vst2.v4f32(float* %A, <4 x float> %tmp1, <4 x float> %tmp1)
+	call void @llvm.arm.neon.vst2.v4f32(i8* %tmp0, <4 x float> %tmp1, <4 x float> %tmp1)
 	ret void
 }
 
diff --git a/test/CodeGen/ARM/vst3.ll b/test/CodeGen/ARM/vst3.ll
index a831a0c08ce9..2599bc0db933 100644
--- a/test/CodeGen/ARM/vst3.ll
+++ b/test/CodeGen/ARM/vst3.ll
@@ -11,32 +11,36 @@ define void @vst3i8(i8* %A, <8 x i8>* %B) nounwind {
 define void @vst3i16(i16* %A, <4 x i16>* %B) nounwind {
 ;CHECK: vst3i16:
 ;CHECK: vst3.16
+	%tmp0 = bitcast i16* %A to i8*
 	%tmp1 = load <4 x i16>* %B
-	call void @llvm.arm.neon.vst3.v4i16(i16* %A, <4 x i16> %tmp1, <4 x i16> %tmp1, <4 x i16> %tmp1)
+	call void @llvm.arm.neon.vst3.v4i16(i8* %tmp0, <4 x i16> %tmp1, <4 x i16> %tmp1, <4 x i16> %tmp1)
 	ret void
 }
 
 define void @vst3i32(i32* %A, <2 x i32>* %B) nounwind {
 ;CHECK: vst3i32:
 ;CHECK: vst3.32
+	%tmp0 = bitcast i32* %A to i8*
 	%tmp1 = load <2 x i32>* %B
-	call void @llvm.arm.neon.vst3.v2i32(i32* %A, <2 x i32> %tmp1, <2 x i32> %tmp1, <2 x i32> %tmp1)
+	call void @llvm.arm.neon.vst3.v2i32(i8* %tmp0, <2 x i32> %tmp1, <2 x i32> %tmp1, <2 x i32> %tmp1)
 	ret void
 }
 
 define void @vst3f(float* %A, <2 x float>* %B) nounwind {
 ;CHECK: vst3f:
 ;CHECK: vst3.32
+	%tmp0 = bitcast float* %A to i8*
 	%tmp1 = load <2 x float>* %B
-	call void @llvm.arm.neon.vst3.v2f32(float* %A, <2 x float> %tmp1, <2 x float> %tmp1, <2 x float> %tmp1)
+	call void @llvm.arm.neon.vst3.v2f32(i8* %tmp0, <2 x float> %tmp1, <2 x float> %tmp1, <2 x float> %tmp1)
 	ret void
 }
 
 define void @vst3i64(i64* %A, <1 x i64>* %B) nounwind {
 ;CHECK: vst3i64:
 ;CHECK: vst1.64
+	%tmp0 = bitcast i64* %A to i8*
 	%tmp1 = load <1 x i64>* %B
-	call void @llvm.arm.neon.vst3.v1i64(i64* %A, <1 x i64> %tmp1, <1 x i64> %tmp1, <1 x i64> %tmp1)
+	call void @llvm.arm.neon.vst3.v1i64(i8* %tmp0, <1 x i64> %tmp1, <1 x i64> %tmp1, <1 x i64> %tmp1)
 	ret void
 }
 
@@ -53,8 +57,9 @@ define void @vst3Qi16(i16* %A, <8 x i16>* %B) nounwind {
 ;CHECK: vst3Qi16:
 ;CHECK: vst3.16
 ;CHECK: vst3.16
+	%tmp0 = bitcast i16* %A to i8*
 	%tmp1 = load <8 x i16>* %B
-	call void @llvm.arm.neon.vst3.v8i16(i16* %A, <8 x i16> %tmp1, <8 x i16> %tmp1, <8 x i16> %tmp1)
+	call void @llvm.arm.neon.vst3.v8i16(i8* %tmp0, <8 x i16> %tmp1, <8 x i16> %tmp1, <8 x i16> %tmp1)
 	ret void
 }
 
@@ -62,8 +67,9 @@ define void @vst3Qi32(i32* %A, <4 x i32>* %B) nounwind {
 ;CHECK: vst3Qi32:
 ;CHECK: vst3.32
 ;CHECK: vst3.32
+	%tmp0 = bitcast i32* %A to i8*
 	%tmp1 = load <4 x i32>* %B
-	call void @llvm.arm.neon.vst3.v4i32(i32* %A, <4 x i32> %tmp1, <4 x i32> %tmp1, <4 x i32> %tmp1)
+	call void @llvm.arm.neon.vst3.v4i32(i8* %tmp0, <4 x i32> %tmp1, <4 x i32> %tmp1, <4 x i32> %tmp1)
 	ret void
 }
 
@@ -71,8 +77,9 @@ define void @vst3Qf(float* %A, <4 x float>* %B) nounwind {
 ;CHECK: vst3Qf:
 ;CHECK: vst3.32
 ;CHECK: vst3.32
+	%tmp0 = bitcast float* %A to i8*
 	%tmp1 = load <4 x float>* %B
-	call void @llvm.arm.neon.vst3.v4f32(float* %A, <4 x float> %tmp1, <4 x float> %tmp1, <4 x float> %tmp1)
+	call void @llvm.arm.neon.vst3.v4f32(i8* %tmp0, <4 x float> %tmp1, <4 x float> %tmp1, <4 x float> %tmp1)
 	ret void
 }
 
diff --git a/test/CodeGen/ARM/vst4.ll b/test/CodeGen/ARM/vst4.ll
index d92c017c30b2..878f0efaa480 100644
--- a/test/CodeGen/ARM/vst4.ll
+++ b/test/CodeGen/ARM/vst4.ll
@@ -11,32 +11,36 @@ define void @vst4i8(i8* %A, <8 x i8>* %B) nounwind {
 define void @vst4i16(i16* %A, <4 x i16>* %B) nounwind {
 ;CHECK: vst4i16:
 ;CHECK: vst4.16
+	%tmp0 = bitcast i16* %A to i8*
 	%tmp1 = load <4 x i16>* %B
-	call void @llvm.arm.neon.vst4.v4i16(i16* %A, <4 x i16> %tmp1, <4 x i16> %tmp1, <4 x i16> %tmp1, <4 x i16> %tmp1)
+	call void @llvm.arm.neon.vst4.v4i16(i8* %tmp0, <4 x i16> %tmp1, <4 x i16> %tmp1, <4 x i16> %tmp1, <4 x i16> %tmp1)
 	ret void
 }
 
 define void @vst4i32(i32* %A, <2 x i32>* %B) nounwind {
 ;CHECK: vst4i32:
 ;CHECK: vst4.32
+	%tmp0 = bitcast i32* %A to i8*
 	%tmp1 = load <2 x i32>* %B
-	call void @llvm.arm.neon.vst4.v2i32(i32* %A, <2 x i32> %tmp1, <2 x i32> %tmp1, <2 x i32> %tmp1, <2 x i32> %tmp1)
+	call void @llvm.arm.neon.vst4.v2i32(i8* %tmp0, <2 x i32> %tmp1, <2 x i32> %tmp1, <2 x i32> %tmp1, <2 x i32> %tmp1)
 	ret void
 }
 
 define void @vst4f(float* %A, <2 x float>* %B) nounwind {
 ;CHECK: vst4f:
 ;CHECK: vst4.32
+	%tmp0 = bitcast float* %A to i8*
 	%tmp1 = load <2 x float>* %B
-	call void @llvm.arm.neon.vst4.v2f32(float* %A, <2 x float> %tmp1, <2 x float> %tmp1, <2 x float> %tmp1, <2 x float> %tmp1)
+	call void @llvm.arm.neon.vst4.v2f32(i8* %tmp0, <2 x float> %tmp1, <2 x float> %tmp1, <2 x float> %tmp1, <2 x float> %tmp1)
 	ret void
 }
 
 define void @vst4i64(i64* %A, <1 x i64>* %B) nounwind {
 ;CHECK: vst4i64:
 ;CHECK: vst1.64
+	%tmp0 = bitcast i64* %A to i8*
 	%tmp1 = load <1 x i64>* %B
-	call void @llvm.arm.neon.vst4.v1i64(i64* %A, <1 x i64> %tmp1, <1 x i64> %tmp1, <1 x i64> %tmp1, <1 x i64> %tmp1)
+	call void @llvm.arm.neon.vst4.v1i64(i8* %tmp0, <1 x i64> %tmp1, <1 x i64> %tmp1, <1 x i64> %tmp1, <1 x i64> %tmp1)
 	ret void
 }
 
@@ -53,8 +57,9 @@ define void @vst4Qi16(i16* %A, <8 x i16>* %B) nounwind {
 ;CHECK: vst4Qi16:
 ;CHECK: vst4.16
 ;CHECK: vst4.16
+	%tmp0 = bitcast i16* %A to i8*
 	%tmp1 = load <8 x i16>* %B
-	call void @llvm.arm.neon.vst4.v8i16(i16* %A, <8 x i16> %tmp1, <8 x i16> %tmp1, <8 x i16> %tmp1, <8 x i16> %tmp1)
+	call void @llvm.arm.neon.vst4.v8i16(i8* %tmp0, <8 x i16> %tmp1, <8 x i16> %tmp1, <8 x i16> %tmp1, <8 x i16> %tmp1)
 	ret void
 }
 
@@ -62,8 +67,9 @@ define void @vst4Qi32(i32* %A, <4 x i32>* %B) nounwind {
 ;CHECK: vst4Qi32:
 ;CHECK: vst4.32
 ;CHECK: vst4.32
+	%tmp0 = bitcast i32* %A to i8*
 	%tmp1 = load <4 x i32>* %B
-	call void @llvm.arm.neon.vst4.v4i32(i32* %A, <4 x i32> %tmp1, <4 x i32> %tmp1, <4 x i32> %tmp1, <4 x i32> %tmp1)
+	call void @llvm.arm.neon.vst4.v4i32(i8* %tmp0, <4 x i32> %tmp1, <4 x i32> %tmp1, <4 x i32> %tmp1, <4 x i32> %tmp1)
 	ret void
 }
 
@@ -71,8 +77,9 @@ define void @vst4Qf(float* %A, <4 x float>* %B) nounwind {
 ;CHECK: vst4Qf:
 ;CHECK: vst4.32
 ;CHECK: vst4.32
+	%tmp0 = bitcast float* %A to i8*
 	%tmp1 = load <4 x float>* %B
-	call void @llvm.arm.neon.vst4.v4f32(float* %A, <4 x float> %tmp1, <4 x float> %tmp1, <4 x float> %tmp1, <4 x float> %tmp1)
+	call void @llvm.arm.neon.vst4.v4f32(i8* %tmp0, <4 x float> %tmp1, <4 x float> %tmp1, <4 x float> %tmp1, <4 x float> %tmp1)
 	ret void
 }
 
diff --git a/test/CodeGen/ARM/vstlane.ll b/test/CodeGen/ARM/vstlane.ll
index 3bfb14f17b77..cf50756d465e 100644
--- a/test/CodeGen/ARM/vstlane.ll
+++ b/test/CodeGen/ARM/vstlane.ll
@@ -11,48 +11,54 @@ define void @vst2lanei8(i8* %A, <8 x i8>* %B) nounwind {
 define void @vst2lanei16(i16* %A, <4 x i16>* %B) nounwind {
 ;CHECK: vst2lanei16:
 ;CHECK: vst2.16
+	%tmp0 = bitcast i16* %A to i8*
 	%tmp1 = load <4 x i16>* %B
-	call void @llvm.arm.neon.vst2lane.v4i16(i16* %A, <4 x i16> %tmp1, <4 x i16> %tmp1, i32 1)
+	call void @llvm.arm.neon.vst2lane.v4i16(i8* %tmp0, <4 x i16> %tmp1, <4 x i16> %tmp1, i32 1)
 	ret void
 }
 
 define void @vst2lanei32(i32* %A, <2 x i32>* %B) nounwind {
 ;CHECK: vst2lanei32:
 ;CHECK: vst2.32
+	%tmp0 = bitcast i32* %A to i8*
 	%tmp1 = load <2 x i32>* %B
-	call void @llvm.arm.neon.vst2lane.v2i32(i32* %A, <2 x i32> %tmp1, <2 x i32> %tmp1, i32 1)
+	call void @llvm.arm.neon.vst2lane.v2i32(i8* %tmp0, <2 x i32> %tmp1, <2 x i32> %tmp1, i32 1)
 	ret void
 }
 
 define void @vst2lanef(float* %A, <2 x float>* %B) nounwind {
 ;CHECK: vst2lanef:
 ;CHECK: vst2.32
+	%tmp0 = bitcast float* %A to i8*
 	%tmp1 = load <2 x float>* %B
-	call void @llvm.arm.neon.vst2lane.v2f32(float* %A, <2 x float> %tmp1, <2 x float> %tmp1, i32 1)
+	call void @llvm.arm.neon.vst2lane.v2f32(i8* %tmp0, <2 x float> %tmp1, <2 x float> %tmp1, i32 1)
 	ret void
 }
 
 define void @vst2laneQi16(i16* %A, <8 x i16>* %B) nounwind {
 ;CHECK: vst2laneQi16:
 ;CHECK: vst2.16
+	%tmp0 = bitcast i16* %A to i8*
 	%tmp1 = load <8 x i16>* %B
-	call void @llvm.arm.neon.vst2lane.v8i16(i16* %A, <8 x i16> %tmp1, <8 x i16> %tmp1, i32 1)
+	call void @llvm.arm.neon.vst2lane.v8i16(i8* %tmp0, <8 x i16> %tmp1, <8 x i16> %tmp1, i32 1)
 	ret void
 }
 
 define void @vst2laneQi32(i32* %A, <4 x i32>* %B) nounwind {
 ;CHECK: vst2laneQi32:
 ;CHECK: vst2.32
+	%tmp0 = bitcast i32* %A to i8*
 	%tmp1 = load <4 x i32>* %B
-	call void @llvm.arm.neon.vst2lane.v4i32(i32* %A, <4 x i32> %tmp1, <4 x i32> %tmp1, i32 2)
+	call void @llvm.arm.neon.vst2lane.v4i32(i8* %tmp0, <4 x i32> %tmp1, <4 x i32> %tmp1, i32 2)
 	ret void
 }
 
 define void @vst2laneQf(float* %A, <4 x float>* %B) nounwind {
 ;CHECK: vst2laneQf:
 ;CHECK: vst2.32
+	%tmp0 = bitcast float* %A to i8*
 	%tmp1 = load <4 x float>* %B
-	call void @llvm.arm.neon.vst2lane.v4f32(float* %A, <4 x float> %tmp1, <4 x float> %tmp1, i32 3)
+	call void @llvm.arm.neon.vst2lane.v4f32(i8* %tmp0, <4 x float> %tmp1, <4 x float> %tmp1, i32 3)
 	ret void
 }
 
@@ -76,48 +82,54 @@ define void @vst3lanei8(i8* %A, <8 x i8>* %B) nounwind {
 define void @vst3lanei16(i16* %A, <4 x i16>* %B) nounwind {
 ;CHECK: vst3lanei16:
 ;CHECK: vst3.16
+	%tmp0 = bitcast i16* %A to i8*
 	%tmp1 = load <4 x i16>* %B
-	call void @llvm.arm.neon.vst3lane.v4i16(i16* %A, <4 x i16> %tmp1, <4 x i16> %tmp1, <4 x i16> %tmp1, i32 1)
+	call void @llvm.arm.neon.vst3lane.v4i16(i8* %tmp0, <4 x i16> %tmp1, <4 x i16> %tmp1, <4 x i16> %tmp1, i32 1)
 	ret void
 }
 
 define void @vst3lanei32(i32* %A, <2 x i32>* %B) nounwind {
 ;CHECK: vst3lanei32:
 ;CHECK: vst3.32
+	%tmp0 = bitcast i32* %A to i8*
 	%tmp1 = load <2 x i32>* %B
-	call void @llvm.arm.neon.vst3lane.v2i32(i32* %A, <2 x i32> %tmp1, <2 x i32> %tmp1, <2 x i32> %tmp1, i32 1)
+	call void @llvm.arm.neon.vst3lane.v2i32(i8* %tmp0, <2 x i32> %tmp1, <2 x i32> %tmp1, <2 x i32> %tmp1, i32 1)
 	ret void
 }
 
 define void @vst3lanef(float* %A, <2 x float>* %B) nounwind {
 ;CHECK: vst3lanef:
 ;CHECK: vst3.32
+	%tmp0 = bitcast float* %A to i8*
 	%tmp1 = load <2 x float>* %B
-	call void @llvm.arm.neon.vst3lane.v2f32(float* %A, <2 x float> %tmp1, <2 x float> %tmp1, <2 x float> %tmp1, i32 1)
+	call void @llvm.arm.neon.vst3lane.v2f32(i8* %tmp0, <2 x float> %tmp1, <2 x float> %tmp1, <2 x float> %tmp1, i32 1)
 	ret void
 }
 
 define void @vst3laneQi16(i16* %A, <8 x i16>* %B) nounwind {
 ;CHECK: vst3laneQi16:
 ;CHECK: vst3.16
+	%tmp0 = bitcast i16* %A to i8*
 	%tmp1 = load <8 x i16>* %B
-	call void @llvm.arm.neon.vst3lane.v8i16(i16* %A, <8 x i16> %tmp1, <8 x i16> %tmp1, <8 x i16> %tmp1, i32 6)
+	call void @llvm.arm.neon.vst3lane.v8i16(i8* %tmp0, <8 x i16> %tmp1, <8 x i16> %tmp1, <8 x i16> %tmp1, i32 6)
 	ret void
 }
 
 define void @vst3laneQi32(i32* %A, <4 x i32>* %B) nounwind {
 ;CHECK: vst3laneQi32:
 ;CHECK: vst3.32
+	%tmp0 = bitcast i32* %A to i8*
 	%tmp1 = load <4 x i32>* %B
-	call void @llvm.arm.neon.vst3lane.v4i32(i32* %A, <4 x i32> %tmp1, <4 x i32> %tmp1, <4 x i32> %tmp1, i32 0)
+	call void @llvm.arm.neon.vst3lane.v4i32(i8* %tmp0, <4 x i32> %tmp1, <4 x i32> %tmp1, <4 x i32> %tmp1, i32 0)
 	ret void
 }
 
 define void @vst3laneQf(float* %A, <4 x float>* %B) nounwind {
 ;CHECK: vst3laneQf:
 ;CHECK: vst3.32
+	%tmp0 = bitcast float* %A to i8*
 	%tmp1 = load <4 x float>* %B
-	call void @llvm.arm.neon.vst3lane.v4f32(float* %A, <4 x float> %tmp1, <4 x float> %tmp1, <4 x float> %tmp1, i32 1)
+	call void @llvm.arm.neon.vst3lane.v4f32(i8* %tmp0, <4 x float> %tmp1, <4 x float> %tmp1, <4 x float> %tmp1, i32 1)
 	ret void
 }
 
@@ -142,48 +154,54 @@ define void @vst4lanei8(i8* %A, <8 x i8>* %B) nounwind {
 define void @vst4lanei16(i16* %A, <4 x i16>* %B) nounwind {
 ;CHECK: vst4lanei16:
 ;CHECK: vst4.16
+	%tmp0 = bitcast i16* %A to i8*
 	%tmp1 = load <4 x i16>* %B
-	call void @llvm.arm.neon.vst4lane.v4i16(i16* %A, <4 x i16> %tmp1, <4 x i16> %tmp1, <4 x i16> %tmp1, <4 x i16> %tmp1, i32 1)
+	call void @llvm.arm.neon.vst4lane.v4i16(i8* %tmp0, <4 x i16> %tmp1, <4 x i16> %tmp1, <4 x i16> %tmp1, <4 x i16> %tmp1, i32 1)
 	ret void
 }
 
 define void @vst4lanei32(i32* %A, <2 x i32>* %B) nounwind {
 ;CHECK: vst4lanei32:
 ;CHECK: vst4.32
+	%tmp0 = bitcast i32* %A to i8*
 	%tmp1 = load <2 x i32>* %B
-	call void @llvm.arm.neon.vst4lane.v2i32(i32* %A, <2 x i32> %tmp1, <2 x i32> %tmp1, <2 x i32> %tmp1, <2 x i32> %tmp1, i32 1)
+	call void @llvm.arm.neon.vst4lane.v2i32(i8* %tmp0, <2 x i32> %tmp1, <2 x i32> %tmp1, <2 x i32> %tmp1, <2 x i32> %tmp1, i32 1)
 	ret void
 }
 
 define void @vst4lanef(float* %A, <2 x float>* %B) nounwind {
 ;CHECK: vst4lanef:
 ;CHECK: vst4.32
+	%tmp0 = bitcast float* %A to i8*
 	%tmp1 = load <2 x float>* %B
-	call void @llvm.arm.neon.vst4lane.v2f32(float* %A, <2 x float> %tmp1, <2 x float> %tmp1, <2 x float> %tmp1, <2 x float> %tmp1, i32 1)
+	call void @llvm.arm.neon.vst4lane.v2f32(i8* %tmp0, <2 x float> %tmp1, <2 x float> %tmp1, <2 x float> %tmp1, <2 x float> %tmp1, i32 1)
 	ret void
 }
 
 define void @vst4laneQi16(i16* %A, <8 x i16>* %B) nounwind {
 ;CHECK: vst4laneQi16:
 ;CHECK: vst4.16
+	%tmp0 = bitcast i16* %A to i8*
 	%tmp1 = load <8 x i16>* %B
-	call void @llvm.arm.neon.vst4lane.v8i16(i16* %A, <8 x i16> %tmp1, <8 x i16> %tmp1, <8 x i16> %tmp1, <8 x i16> %tmp1, i32 7)
+	call void @llvm.arm.neon.vst4lane.v8i16(i8* %tmp0, <8 x i16> %tmp1, <8 x i16> %tmp1, <8 x i16> %tmp1, <8 x i16> %tmp1, i32 7)
 	ret void
 }
 
 define void @vst4laneQi32(i32* %A, <4 x i32>* %B) nounwind {
 ;CHECK: vst4laneQi32:
 ;CHECK: vst4.32
+	%tmp0 = bitcast i32* %A to i8*
 	%tmp1 = load <4 x i32>* %B
-	call void @llvm.arm.neon.vst4lane.v4i32(i32* %A, <4 x i32> %tmp1, <4 x i32> %tmp1, <4 x i32> %tmp1, <4 x i32> %tmp1, i32 2)
+	call void @llvm.arm.neon.vst4lane.v4i32(i8* %tmp0, <4 x i32> %tmp1, <4 x i32> %tmp1, <4 x i32> %tmp1, <4 x i32> %tmp1, i32 2)
 	ret void
 }
 
 define void @vst4laneQf(float* %A, <4 x float>* %B) nounwind {
 ;CHECK: vst4laneQf:
 ;CHECK: vst4.32
+	%tmp0 = bitcast float* %A to i8*
 	%tmp1 = load <4 x float>* %B
-	call void @llvm.arm.neon.vst4lane.v4f32(float* %A, <4 x float> %tmp1, <4 x float> %tmp1, <4 x float> %tmp1, <4 x float> %tmp1, i32 1)
+	call void @llvm.arm.neon.vst4lane.v4f32(i8* %tmp0, <4 x float> %tmp1, <4 x float> %tmp1, <4 x float> %tmp1, <4 x float> %tmp1, i32 1)
 	ret void
 }
 
diff --git a/test/CodeGen/ARM/vsub.ll b/test/CodeGen/ARM/vsub.ll
index 8f0055fd4103..3416de76f123 100644
--- a/test/CodeGen/ARM/vsub.ll
+++ b/test/CodeGen/ARM/vsub.ll
@@ -41,7 +41,7 @@ define <2 x float> @vsubf32(<2 x float>* %A, <2 x float>* %B) nounwind {
 ;CHECK: vsub.f32
 	%tmp1 = load <2 x float>* %A
 	%tmp2 = load <2 x float>* %B
-	%tmp3 = sub <2 x float> %tmp1, %tmp2
+	%tmp3 = fsub <2 x float> %tmp1, %tmp2
 	ret <2 x float> %tmp3
 }
 
@@ -86,7 +86,7 @@ define <4 x float> @vsubQf32(<4 x float>* %A, <4 x float>* %B) nounwind {
 ;CHECK: vsub.f32
 	%tmp1 = load <4 x float>* %A
 	%tmp2 = load <4 x float>* %B
-	%tmp3 = sub <4 x float> %tmp1, %tmp2
+	%tmp3 = fsub <4 x float> %tmp1, %tmp2
 	ret <4 x float> %tmp3
 }
 
diff --git a/test/CodeGen/ARM/vtrn.ll b/test/CodeGen/ARM/vtrn.ll
index 5122b0981e96..10bb10ac24a1 100644
--- a/test/CodeGen/ARM/vtrn.ll
+++ b/test/CodeGen/ARM/vtrn.ll
@@ -44,7 +44,7 @@ define <2 x float> @vtrnf(<2 x float>* %A, <2 x float>* %B) nounwind {
 	%tmp2 = load <2 x float>* %B
 	%tmp3 = shufflevector <2 x float> %tmp1, <2 x float> %tmp2, <2 x i32> <i32 0, i32 2>
 	%tmp4 = shufflevector <2 x float> %tmp1, <2 x float> %tmp2, <2 x i32> <i32 1, i32 3>
-        %tmp5 = add <2 x float> %tmp3, %tmp4
+        %tmp5 = fadd <2 x float> %tmp3, %tmp4
 	ret <2 x float> %tmp5
 }
 
@@ -92,6 +92,6 @@ define <4 x float> @vtrnQf(<4 x float>* %A, <4 x float>* %B) nounwind {
 	%tmp2 = load <4 x float>* %B
 	%tmp3 = shufflevector <4 x float> %tmp1, <4 x float> %tmp2, <4 x i32> <i32 0, i32 4, i32 2, i32 6>
 	%tmp4 = shufflevector <4 x float> %tmp1, <4 x float> %tmp2, <4 x i32> <i32 1, i32 5, i32 3, i32 7>
-        %tmp5 = add <4 x float> %tmp3, %tmp4
+        %tmp5 = fadd <4 x float> %tmp3, %tmp4
 	ret <4 x float> %tmp5
 }
diff --git a/test/CodeGen/ARM/vuzp.ll b/test/CodeGen/ARM/vuzp.ll
index e531718d94aa..6cef188d76dd 100644
--- a/test/CodeGen/ARM/vuzp.ll
+++ b/test/CodeGen/ARM/vuzp.ll
@@ -70,6 +70,6 @@ define <4 x float> @vuzpQf(<4 x float>* %A, <4 x float>* %B) nounwind {
 	%tmp2 = load <4 x float>* %B
 	%tmp3 = shufflevector <4 x float> %tmp1, <4 x float> %tmp2, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
 	%tmp4 = shufflevector <4 x float> %tmp1, <4 x float> %tmp2, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
-        %tmp5 = add <4 x float> %tmp3, %tmp4
+        %tmp5 = fadd <4 x float> %tmp3, %tmp4
 	ret <4 x float> %tmp5
 }
diff --git a/test/CodeGen/ARM/vzip.ll b/test/CodeGen/ARM/vzip.ll
index 32f7e0d02c44..a9ecdcab42d7 100644
--- a/test/CodeGen/ARM/vzip.ll
+++ b/test/CodeGen/ARM/vzip.ll
@@ -70,6 +70,6 @@ define <4 x float> @vzipQf(<4 x float>* %A, <4 x float>* %B) nounwind {
 	%tmp2 = load <4 x float>* %B
 	%tmp3 = shufflevector <4 x float> %tmp1, <4 x float> %tmp2, <4 x i32> <i32 0, i32 4, i32 1, i32 5>
 	%tmp4 = shufflevector <4 x float> %tmp1, <4 x float> %tmp2, <4 x i32> <i32 2, i32 6, i32 3, i32 7>
-        %tmp5 = add <4 x float> %tmp3, %tmp4
+        %tmp5 = fadd <4 x float> %tmp3, %tmp4
 	ret <4 x float> %tmp5
 }
diff --git a/test/CodeGen/Alpha/2010-04-07-DbgValueOtherTargets.ll b/test/CodeGen/Alpha/2010-04-07-DbgValueOtherTargets.ll
new file mode 100644
index 000000000000..cf3f0b90037d
--- /dev/null
+++ b/test/CodeGen/Alpha/2010-04-07-DbgValueOtherTargets.ll
@@ -0,0 +1,33 @@
+; RUN: llc -O0 -march=alpha -asm-verbose < %s | FileCheck %s
+; Check that DEBUG_VALUE comments come through on a variety of targets.
+
+%tart.reflect.ComplexType = type { double, double }
+
+@.type.SwitchStmtTest = constant %tart.reflect.ComplexType { double 3.0, double 2.0 }
+
+define i32 @"main(tart.core.String[])->int32"(i32 %args) {
+entry:
+; CHECK: DEBUG_VALUE
+  tail call void @llvm.dbg.value(metadata !14, i64 0, metadata !8)
+  tail call void @"tart.reflect.ComplexType.create->tart.core.Object"(%tart.reflect.ComplexType* @.type.SwitchStmtTest) ; <%tart.core.Object*> [#uses=2]
+  ret i32 3
+}
+
+declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
+declare void @"tart.reflect.ComplexType.create->tart.core.Object"(%tart.reflect.ComplexType*) nounwind readnone
+
+!0 = metadata !{i32 458769, i32 0, i32 1, metadata !"sm.c", metadata !"/Volumes/MacOS9/tests/", metadata !"4.2.1 (Based on Apple Inc. build 5658) (LLVM build)", i1 true, i1 false, metadata !"", i32 0} ; [ DW_TAG_compile_unit ]
+!1 = metadata !{i32 458790, metadata !0, metadata !"", metadata !0, i32 0, i64 192, i64 64, i64 0, i32 0, metadata !2} ; [ DW_TAG_const_type ]
+!2 = metadata !{i32 458771, metadata !0, metadata !"C", metadata !0, i32 1, i64 192, i64 64, i64 0, i32 0, null, metadata !3, i32 0, null} ; [ DW_TAG_structure_type ]
+!3 = metadata !{metadata !4, metadata !6, metadata !7}
+!4 = metadata !{i32 458765, metadata !2, metadata !"x", metadata !0, i32 1, i64 64, i64 64, i64 0, i32 0, metadata !5} ; [ DW_TAG_member ]
+!5 = metadata !{i32 458788, metadata !0, metadata !"double", metadata !0, i32 0, i64 64, i64 64, i64 0, i32 0, i32 4} ; [ DW_TAG_base_type ]
+!6 = metadata !{i32 458765, metadata !2, metadata !"y", metadata !0, i32 1, i64 64, i64 64, i64 64, i32 0, metadata !5} ; [ DW_TAG_member ]
+!7 = metadata !{i32 458765, metadata !2, metadata !"z", metadata !0, i32 1, i64 64, i64 64, i64 128, i32 0, metadata !5} ; [ DW_TAG_member ]
+!8 = metadata !{i32 459008, metadata !9, metadata !"t", metadata !0, i32 5, metadata !2} ; [ DW_TAG_auto_variable ]
+!9 = metadata !{i32 458763, metadata !10}        ; [ DW_TAG_lexical_block ]
+!10 = metadata !{i32 458798, i32 0, metadata !0, metadata !"foo", metadata !"foo", metadata !"foo", metadata !0, i32 4, metadata !11, i1 false, i1 true, i32 0, i32 0, null} ; [ DW_TAG_subprogram ]
+!11 = metadata !{i32 458773, metadata !0, metadata !"", metadata !0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !12, i32 0, null} ; [ DW_TAG_subroutine_type ]
+!12 = metadata !{metadata !13}
+!13 = metadata !{i32 458788, metadata !0, metadata !"int", metadata !0, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
+!14 = metadata !{%tart.reflect.ComplexType* @.type.SwitchStmtTest}
diff --git a/test/CodeGen/Blackfin/jumptable.ll b/test/CodeGen/Blackfin/jumptable.ll
index 5f49e9d193e4..263533c00097 100644
--- a/test/CodeGen/Blackfin/jumptable.ll
+++ b/test/CodeGen/Blackfin/jumptable.ll
@@ -1,8 +1,8 @@
 ; RUN: llc < %s -march=bfin -verify-machineinstrs | FileCheck %s
 
 ; CHECK: .section .rodata
-; CHECK: JTI1_0:
-; CHECK: .long .BB1_1
+; CHECK: JTI0_0:
+; CHECK: .long .BB0_1
 
 define i32 @oper(i32 %op, i32 %A, i32 %B) {
 entry:
diff --git a/test/CodeGen/CellSPU/2010-04-07-DbgValueOtherTargets.ll b/test/CodeGen/CellSPU/2010-04-07-DbgValueOtherTargets.ll
new file mode 100644
index 000000000000..45d53c8c975f
--- /dev/null
+++ b/test/CodeGen/CellSPU/2010-04-07-DbgValueOtherTargets.ll
@@ -0,0 +1,33 @@
+; RUN: llc -O0 -march=cellspu -asm-verbose < %s | FileCheck %s
+; Check that DEBUG_VALUE comments come through on a variety of targets.
+
+%tart.reflect.ComplexType = type { double, double }
+
+@.type.SwitchStmtTest = constant %tart.reflect.ComplexType { double 3.0, double 2.0 }
+
+define i32 @"main(tart.core.String[])->int32"(i32 %args) {
+entry:
+; CHECK: DEBUG_VALUE
+  tail call void @llvm.dbg.value(metadata !14, i64 0, metadata !8)
+  tail call void @"tart.reflect.ComplexType.create->tart.core.Object"(%tart.reflect.ComplexType* @.type.SwitchStmtTest) ; <%tart.core.Object*> [#uses=2]
+  ret i32 3
+}
+
+declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
+declare void @"tart.reflect.ComplexType.create->tart.core.Object"(%tart.reflect.ComplexType*) nounwind readnone
+
+!0 = metadata !{i32 458769, i32 0, i32 1, metadata !"sm.c", metadata !"/Volumes/MacOS9/tests/", metadata !"4.2.1 (Based on Apple Inc. build 5658) (LLVM build)", i1 true, i1 false, metadata !"", i32 0} ; [ DW_TAG_compile_unit ]
+!1 = metadata !{i32 458790, metadata !0, metadata !"", metadata !0, i32 0, i64 192, i64 64, i64 0, i32 0, metadata !2} ; [ DW_TAG_const_type ]
+!2 = metadata !{i32 458771, metadata !0, metadata !"C", metadata !0, i32 1, i64 192, i64 64, i64 0, i32 0, null, metadata !3, i32 0, null} ; [ DW_TAG_structure_type ]
+!3 = metadata !{metadata !4, metadata !6, metadata !7}
+!4 = metadata !{i32 458765, metadata !2, metadata !"x", metadata !0, i32 1, i64 64, i64 64, i64 0, i32 0, metadata !5} ; [ DW_TAG_member ]
+!5 = metadata !{i32 458788, metadata !0, metadata !"double", metadata !0, i32 0, i64 64, i64 64, i64 0, i32 0, i32 4} ; [ DW_TAG_base_type ]
+!6 = metadata !{i32 458765, metadata !2, metadata !"y", metadata !0, i32 1, i64 64, i64 64, i64 64, i32 0, metadata !5} ; [ DW_TAG_member ]
+!7 = metadata !{i32 458765, metadata !2, metadata !"z", metadata !0, i32 1, i64 64, i64 64, i64 128, i32 0, metadata !5} ; [ DW_TAG_member ]
+!8 = metadata !{i32 459008, metadata !9, metadata !"t", metadata !0, i32 5, metadata !2} ; [ DW_TAG_auto_variable ]
+!9 = metadata !{i32 458763, metadata !10}        ; [ DW_TAG_lexical_block ]
+!10 = metadata !{i32 458798, i32 0, metadata !0, metadata !"foo", metadata !"foo", metadata !"foo", metadata !0, i32 4, metadata !11, i1 false, i1 true, i32 0, i32 0, null} ; [ DW_TAG_subprogram ]
+!11 = metadata !{i32 458773, metadata !0, metadata !"", metadata !0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !12, i32 0, null} ; [ DW_TAG_subroutine_type ]
+!12 = metadata !{metadata !13}
+!13 = metadata !{i32 458788, metadata !0, metadata !"int", metadata !0, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
+!14 = metadata !{%tart.reflect.ComplexType* @.type.SwitchStmtTest}
diff --git a/test/CodeGen/CellSPU/bss.ll b/test/CodeGen/CellSPU/bss.ll
index 05a0f5003931..327800d09cbf 100644
--- a/test/CodeGen/CellSPU/bss.ll
+++ b/test/CodeGen/CellSPU/bss.ll
@@ -1,5 +1,11 @@
-; RUN: llc < %s -march=cellspu > %t1.s
-; RUN: grep "\.section" %t1.s | grep "\.bss" | count 1
+; RUN: llc < %s -march=cellspu | FileCheck %s
 
 @bssVar = global i32 zeroinitializer
+; CHECK: .section .bss
+; CHECK-NEXT: .globl
+
+@localVar= internal global i32 zeroinitializer
+; CHECK-NOT: .lcomm
+; CHECK: .local
+; CHECK-NEXT: .comm
 
diff --git a/test/CodeGen/CellSPU/crash.ll b/test/CodeGen/CellSPU/crash.ll
new file mode 100644
index 000000000000..cc2ab71db3b3
--- /dev/null
+++ b/test/CodeGen/CellSPU/crash.ll
@@ -0,0 +1,8 @@
+; RUN: llc %s -march=cellspu -o -
+declare i8 @return_i8()
+declare i16 @return_i16()
+define void @testfunc() {
+ %rv1 = call i8 @return_i8()
+ %rv2 = call i16 @return_i16()
+ ret void
+}
+\ No newline at end of file
diff --git a/test/CodeGen/Generic/2007-05-03-EHTypeInfo.ll b/test/CodeGen/Generic/2007-05-03-EHTypeInfo.ll
index bb774b45d8ee..81347a23b864 100644
--- a/test/CodeGen/Generic/2007-05-03-EHTypeInfo.ll
+++ b/test/CodeGen/Generic/2007-05-03-EHTypeInfo.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -enable-eh
+; RUN: llc < %s
 
 	%struct.exception = type { i8, i8, i32, i8*, i8*, i32, i8* }
 @program_error = external global %struct.exception		; <%struct.exception*> [#uses=1]
diff --git a/test/CodeGen/Generic/2007-12-17-InvokeAsm.ll b/test/CodeGen/Generic/2007-12-17-InvokeAsm.ll
index 4cc1e7c181ed..5df2200dec76 100644
--- a/test/CodeGen/Generic/2007-12-17-InvokeAsm.ll
+++ b/test/CodeGen/Generic/2007-12-17-InvokeAsm.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -enable-eh
+; RUN: llc < %s
 
 define fastcc void @bc__support__high_resolution_time__initialize_clock_rate() {
 entry:
diff --git a/test/CodeGen/Generic/2007-12-31-UnusedSelector.ll b/test/CodeGen/Generic/2007-12-31-UnusedSelector.ll
index d2e97a477b44..00e027b8d3c8 100644
--- a/test/CodeGen/Generic/2007-12-31-UnusedSelector.ll
+++ b/test/CodeGen/Generic/2007-12-31-UnusedSelector.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -enable-eh
+; RUN: llc < %s
 ; PR1833
 
 	%struct.__class_type_info_pseudo = type { %struct.__type_info_pseudo }
diff --git a/test/CodeGen/Generic/crash.ll b/test/CodeGen/Generic/crash.ll
new file mode 100644
index 000000000000..7218565617fc
--- /dev/null
+++ b/test/CodeGen/Generic/crash.ll
@@ -0,0 +1,8 @@
+; RUN: llc %s -o -
+
+; PR6332
+%struct.AVCodecTag = type opaque
+@ff_codec_bmp_tags = external global [0 x %struct.AVCodecTag]
+@tags = global [1 x %struct.AVCodecTag*] [%struct.AVCodecTag* getelementptr
+inbounds ([0 x %struct.AVCodecTag]* @ff_codec_bmp_tags, i32 0, i32 0)]
+
diff --git a/test/CodeGen/MBlaze/2010-04-07-DbgValueOtherTargets.ll b/test/CodeGen/MBlaze/2010-04-07-DbgValueOtherTargets.ll
new file mode 100644
index 000000000000..854352a70111
--- /dev/null
+++ b/test/CodeGen/MBlaze/2010-04-07-DbgValueOtherTargets.ll
@@ -0,0 +1,33 @@
+; RUN: llc -O0 -march=mblaze -asm-verbose < %s | FileCheck %s
+; Check that DEBUG_VALUE comments come through on a variety of targets.
+
+%tart.reflect.ComplexType = type { double, double }
+
+@.type.SwitchStmtTest = constant %tart.reflect.ComplexType { double 3.0, double 2.0 }
+
+define i32 @"main(tart.core.String[])->int32"(i32 %args) {
+entry:
+; CHECK: DEBUG_VALUE
+  tail call void @llvm.dbg.value(metadata !14, i64 0, metadata !8)
+  tail call void @"tart.reflect.ComplexType.create->tart.core.Object"(%tart.reflect.ComplexType* @.type.SwitchStmtTest) ; <%tart.core.Object*> [#uses=2]
+  ret i32 3
+}
+
+declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
+declare void @"tart.reflect.ComplexType.create->tart.core.Object"(%tart.reflect.ComplexType*) nounwind readnone
+
+!0 = metadata !{i32 458769, i32 0, i32 1, metadata !"sm.c", metadata !"/Volumes/MacOS9/tests/", metadata !"4.2.1 (Based on Apple Inc. build 5658) (LLVM build)", i1 true, i1 false, metadata !"", i32 0} ; [ DW_TAG_compile_unit ]
+!1 = metadata !{i32 458790, metadata !0, metadata !"", metadata !0, i32 0, i64 192, i64 64, i64 0, i32 0, metadata !2} ; [ DW_TAG_const_type ]
+!2 = metadata !{i32 458771, metadata !0, metadata !"C", metadata !0, i32 1, i64 192, i64 64, i64 0, i32 0, null, metadata !3, i32 0, null} ; [ DW_TAG_structure_type ]
+!3 = metadata !{metadata !4, metadata !6, metadata !7}
+!4 = metadata !{i32 458765, metadata !2, metadata !"x", metadata !0, i32 1, i64 64, i64 64, i64 0, i32 0, metadata !5} ; [ DW_TAG_member ]
+!5 = metadata !{i32 458788, metadata !0, metadata !"double", metadata !0, i32 0, i64 64, i64 64, i64 0, i32 0, i32 4} ; [ DW_TAG_base_type ]
+!6 = metadata !{i32 458765, metadata !2, metadata !"y", metadata !0, i32 1, i64 64, i64 64, i64 64, i32 0, metadata !5} ; [ DW_TAG_member ]
+!7 = metadata !{i32 458765, metadata !2, metadata !"z", metadata !0, i32 1, i64 64, i64 64, i64 128, i32 0, metadata !5} ; [ DW_TAG_member ]
+!8 = metadata !{i32 459008, metadata !9, metadata !"t", metadata !0, i32 5, metadata !2} ; [ DW_TAG_auto_variable ]
+!9 = metadata !{i32 458763, metadata !10}        ; [ DW_TAG_lexical_block ]
+!10 = metadata !{i32 458798, i32 0, metadata !0, metadata !"foo", metadata !"foo", metadata !"foo", metadata !0, i32 4, metadata !11, i1 false, i1 true, i32 0, i32 0, null} ; [ DW_TAG_subprogram ]
+!11 = metadata !{i32 458773, metadata !0, metadata !"", metadata !0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !12, i32 0, null} ; [ DW_TAG_subroutine_type ]
+!12 = metadata !{metadata !13}
+!13 = metadata !{i32 458788, metadata !0, metadata !"int", metadata !0, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
+!14 = metadata !{%tart.reflect.ComplexType* @.type.SwitchStmtTest}
diff --git a/test/CodeGen/MSP430/2009-05-17-Rot.ll b/test/CodeGen/MSP430/2009-05-17-Rot.ll
index 2ae005259d4f..d622aa71164b 100644
--- a/test/CodeGen/MSP430/2009-05-17-Rot.ll
+++ b/test/CodeGen/MSP430/2009-05-17-Rot.ll
@@ -14,4 +14,4 @@ define i16 @rol1u16(i16 %x.arg) nounwind {
 return:
         %6 = load i16* %retval
         ret i16 %6
-}
-\ No newline at end of file
+}
diff --git a/test/CodeGen/MSP430/2009-05-17-Shift.ll b/test/CodeGen/MSP430/2009-05-17-Shift.ll
index 25aff60c2b3f..e23df7851666 100644
--- a/test/CodeGen/MSP430/2009-05-17-Shift.ll
+++ b/test/CodeGen/MSP430/2009-05-17-Shift.ll
@@ -12,4 +12,4 @@ return:
         %3 = load i16* %retval
         ret i16 %3
 
-}
-\ No newline at end of file
+}
diff --git a/test/CodeGen/MSP430/2010-04-07-DbgValueOtherTargets.ll b/test/CodeGen/MSP430/2010-04-07-DbgValueOtherTargets.ll
new file mode 100644
index 000000000000..8de044cf48ba
--- /dev/null
+++ b/test/CodeGen/MSP430/2010-04-07-DbgValueOtherTargets.ll
@@ -0,0 +1,33 @@
+; RUN: llc -O0 -march=msp430 -asm-verbose < %s | FileCheck %s
+; Check that DEBUG_VALUE comments come through on a variety of targets.
+
+%tart.reflect.ComplexType = type { double, double }
+
+@.type.SwitchStmtTest = constant %tart.reflect.ComplexType { double 3.0, double 2.0 }
+
+define i32 @"main(tart.core.String[])->int32"(i32 %args) {
+entry:
+; CHECK: DEBUG_VALUE
+  tail call void @llvm.dbg.value(metadata !14, i64 0, metadata !8)
+  tail call void @"tart.reflect.ComplexType.create->tart.core.Object"(%tart.reflect.ComplexType* @.type.SwitchStmtTest) ; <%tart.core.Object*> [#uses=2]
+  ret i32 3
+}
+
+declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
+declare void @"tart.reflect.ComplexType.create->tart.core.Object"(%tart.reflect.ComplexType*) nounwind readnone
+
+!0 = metadata !{i32 458769, i32 0, i32 1, metadata !"sm.c", metadata !"/Volumes/MacOS9/tests/", metadata !"4.2.1 (Based on Apple Inc. build 5658) (LLVM build)", i1 true, i1 false, metadata !"", i32 0} ; [ DW_TAG_compile_unit ]
+!1 = metadata !{i32 458790, metadata !0, metadata !"", metadata !0, i32 0, i64 192, i64 64, i64 0, i32 0, metadata !2} ; [ DW_TAG_const_type ]
+!2 = metadata !{i32 458771, metadata !0, metadata !"C", metadata !0, i32 1, i64 192, i64 64, i64 0, i32 0, null, metadata !3, i32 0, null} ; [ DW_TAG_structure_type ]
+!3 = metadata !{metadata !4, metadata !6, metadata !7}
+!4 = metadata !{i32 458765, metadata !2, metadata !"x", metadata !0, i32 1, i64 64, i64 64, i64 0, i32 0, metadata !5} ; [ DW_TAG_member ]
+!5 = metadata !{i32 458788, metadata !0, metadata !"double", metadata !0, i32 0, i64 64, i64 64, i64 0, i32 0, i32 4} ; [ DW_TAG_base_type ]
+!6 = metadata !{i32 458765, metadata !2, metadata !"y", metadata !0, i32 1, i64 64, i64 64, i64 64, i32 0, metadata !5} ; [ DW_TAG_member ]
+!7 = metadata !{i32 458765, metadata !2, metadata !"z", metadata !0, i32 1, i64 64, i64 64, i64 128, i32 0, metadata !5} ; [ DW_TAG_member ]
+!8 = metadata !{i32 459008, metadata !9, metadata !"t", metadata !0, i32 5, metadata !2} ; [ DW_TAG_auto_variable ]
+!9 = metadata !{i32 458763, metadata !10}        ; [ DW_TAG_lexical_block ]
+!10 = metadata !{i32 458798, i32 0, metadata !0, metadata !"foo", metadata !"foo", metadata !"foo", metadata !0, i32 4, metadata !11, i1 false, i1 true, i32 0, i32 0, null} ; [ DW_TAG_subprogram ]
+!11 = metadata !{i32 458773, metadata !0, metadata !"", metadata !0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !12, i32 0, null} ; [ DW_TAG_subroutine_type ]
+!12 = metadata !{metadata !13}
+!13 = metadata !{i32 458788, metadata !0, metadata !"int", metadata !0, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
+!14 = metadata !{%tart.reflect.ComplexType* @.type.SwitchStmtTest}
diff --git a/test/CodeGen/MSP430/2010-05-01-CombinerAnd.ll b/test/CodeGen/MSP430/2010-05-01-CombinerAnd.ll
new file mode 100644
index 000000000000..99100377034b
--- /dev/null
+++ b/test/CodeGen/MSP430/2010-05-01-CombinerAnd.ll
@@ -0,0 +1,27 @@
+; RUN: llc < %s
+; PR7001
+
+target datalayout = "e-p:16:16:16-i8:8:8-i16:16:16-i32:16:32-n8:16"
+target triple = "msp430-elf"
+
+define i16 @main() nounwind {
+entry:
+  br label %while.cond
+
+while.cond:                                       ; preds = %while.body, %entry
+  br i1 undef, label %land.rhs, label %land.end
+
+land.rhs:                                         ; preds = %while.cond
+  br label %land.end
+
+land.end:                                         ; preds = %land.rhs, %while.cond
+  %0 = phi i1 [ false, %while.cond ], [ undef, %land.rhs ] ; <i1> [#uses=1]
+  br i1 %0, label %while.body, label %while.end
+
+while.body:                                       ; preds = %land.end
+  %tmp4 = load i16* undef                         ; <i16> [#uses=0]
+  br label %while.cond
+
+while.end:                                        ; preds = %land.end
+  ret i16 undef
+}
diff --git a/test/CodeGen/MSP430/indirectbr.ll b/test/CodeGen/MSP430/indirectbr.ll
new file mode 100644
index 000000000000..2a62c9135c6a
--- /dev/null
+++ b/test/CodeGen/MSP430/indirectbr.ll
@@ -0,0 +1,41 @@
+; RUN: llc -march=msp430 < %s
+
+@nextaddr = global i8* null                       ; <i8**> [#uses=2]
+@C.0.2070 = private constant [5 x i8*] [i8* blockaddress(@foo, %L1), i8* blockaddress(@foo, %L2), i8* blockaddress(@foo, %L3), i8* blockaddress(@foo, %L4), i8* blockaddress(@foo, %L5)] ; <[5 x i8*]*> [#uses=1]
+
+define internal i16 @foo(i16 %i) nounwind {
+entry:
+  %0 = load i8** @nextaddr, align 4               ; <i8*> [#uses=2]
+  %1 = icmp eq i8* %0, null                       ; <i1> [#uses=1]
+  br i1 %1, label %bb3, label %bb2
+
+bb2:                                              ; preds = %bb3, %entry
+  %gotovar.4.0 = phi i8* [ %gotovar.4.0.pre, %bb3 ], [ %0, %entry ] ; <i8*> [#uses=1]
+  indirectbr i8* %gotovar.4.0, [label %L5, label %L4, label %L3, label %L2, label %L1]
+
+bb3:                                              ; preds = %entry
+  %2 = getelementptr inbounds [5 x i8*]* @C.0.2070, i16 0, i16 %i ; <i8**> [#uses=1]
+  %gotovar.4.0.pre = load i8** %2, align 4        ; <i8*> [#uses=1]
+  br label %bb2
+
+L5:                                               ; preds = %bb2
+  br label %L4
+
+L4:                                               ; preds = %L5, %bb2
+  %res.0 = phi i16 [ 385, %L5 ], [ 35, %bb2 ]     ; <i16> [#uses=1]
+  br label %L3
+
+L3:                                               ; preds = %L4, %bb2
+  %res.1 = phi i16 [ %res.0, %L4 ], [ 5, %bb2 ]   ; <i16> [#uses=1]
+  br label %L2
+
+L2:                                               ; preds = %L3, %bb2
+  %res.2 = phi i16 [ %res.1, %L3 ], [ 1, %bb2 ]   ; <i16> [#uses=1]
+  %phitmp = mul i16 %res.2, 6                     ; <i16> [#uses=1]
+  br label %L1
+
+L1:                                               ; preds = %L2, %bb2
+  %res.3 = phi i16 [ %phitmp, %L2 ], [ 2, %bb2 ]  ; <i16> [#uses=1]
+  store i8* blockaddress(@foo, %L5), i8** @nextaddr, align 4
+  ret i16 %res.3
+}
diff --git a/test/CodeGen/MSP430/indirectbr2.ll b/test/CodeGen/MSP430/indirectbr2.ll
new file mode 100644
index 000000000000..93cfb2506bb4
--- /dev/null
+++ b/test/CodeGen/MSP430/indirectbr2.ll
@@ -0,0 +1,29 @@
+; RUN: llc -march=msp430 < %s | FileCheck %s
+@C.0.2070 = private constant [5 x i8*] [i8* blockaddress(@foo, %L1), i8* blockaddress(@foo, %L2), i8* blockaddress(@foo, %L3), i8* blockaddress(@foo, %L4), i8* blockaddress(@foo, %L5)] ; <[5 x i8*]*> [#uses=1]
+
+define internal i16 @foo(i16 %i) nounwind {
+entry:
+  %tmp1 = getelementptr inbounds [5 x i8*]* @C.0.2070, i16 0, i16 %i ; <i8**> [#uses=1]
+  %gotovar.4.0 = load i8** %tmp1, align 4        ; <i8*> [#uses=1]
+; CHECK: mov.w   .LC.0.2070(r15), pc
+  indirectbr i8* %gotovar.4.0, [label %L5, label %L4, label %L3, label %L2, label %L1]
+
+L5:                                               ; preds = %bb2
+  br label %L4
+
+L4:                                               ; preds = %L5, %bb2
+  %res.0 = phi i16 [ 385, %L5 ], [ 35, %entry ]     ; <i16> [#uses=1]
+  br label %L3
+
+L3:                                               ; preds = %L4, %bb2
+  %res.1 = phi i16 [ %res.0, %L4 ], [ 5, %entry ]   ; <i16> [#uses=1]
+  br label %L2
+
+L2:                                               ; preds = %L3, %bb2
+  %res.2 = phi i16 [ %res.1, %L3 ], [ 1, %entry ]   ; <i16> [#uses=1]
+  br label %L1
+
+L1:                                               ; preds = %L2, %bb2
+  %res.3 = phi i16 [ %res.2, %L2 ], [ 2, %entry ]  ; <i16> [#uses=1]
+  ret i16 %res.3
+}
diff --git a/test/CodeGen/Mips/2009-11-16-CstPoolLoad.ll b/test/CodeGen/Mips/2009-11-16-CstPoolLoad.ll
index 636b318014a5..b8d68269af42 100644
--- a/test/CodeGen/Mips/2009-11-16-CstPoolLoad.ll
+++ b/test/CodeGen/Mips/2009-11-16-CstPoolLoad.ll
@@ -4,7 +4,7 @@ target triple = "mips-unknown-linux"
 
 define float @h() nounwind readnone {
 entry:
-; CHECK: lw $2, %got($CPI1_0)($gp)
-; CHECK: lwc1 $f0, %lo($CPI1_0)($2)
+; CHECK: lw $2, %got($CPI0_0)($gp)
+; CHECK: lwc1 $f0, %lo($CPI0_0)($2)
   ret float 0x400B333340000000
 }
diff --git a/test/CodeGen/Mips/2010-04-07-DbgValueOtherTargets.ll b/test/CodeGen/Mips/2010-04-07-DbgValueOtherTargets.ll
new file mode 100644
index 000000000000..4161c1d686e6
--- /dev/null
+++ b/test/CodeGen/Mips/2010-04-07-DbgValueOtherTargets.ll
@@ -0,0 +1,33 @@
+; RUN: llc -O0 -march=mips -asm-verbose < %s | FileCheck %s
+; Check that DEBUG_VALUE comments come through on a variety of targets.
+
+%tart.reflect.ComplexType = type { double, double }
+
+@.type.SwitchStmtTest = constant %tart.reflect.ComplexType { double 3.0, double 2.0 }
+
+define i32 @"main(tart.core.String[])->int32"(i32 %args) {
+entry:
+; CHECK: DEBUG_VALUE
+  tail call void @llvm.dbg.value(metadata !14, i64 0, metadata !8)
+  tail call void @"tart.reflect.ComplexType.create->tart.core.Object"(%tart.reflect.ComplexType* @.type.SwitchStmtTest) ; <%tart.core.Object*> [#uses=2]
+  ret i32 3
+}
+
+declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
+declare void @"tart.reflect.ComplexType.create->tart.core.Object"(%tart.reflect.ComplexType*) nounwind readnone
+
+!0 = metadata !{i32 458769, i32 0, i32 1, metadata !"sm.c", metadata !"/Volumes/MacOS9/tests/", metadata !"4.2.1 (Based on Apple Inc. build 5658) (LLVM build)", i1 true, i1 false, metadata !"", i32 0} ; [ DW_TAG_compile_unit ]
+!1 = metadata !{i32 458790, metadata !0, metadata !"", metadata !0, i32 0, i64 192, i64 64, i64 0, i32 0, metadata !2} ; [ DW_TAG_const_type ]
+!2 = metadata !{i32 458771, metadata !0, metadata !"C", metadata !0, i32 1, i64 192, i64 64, i64 0, i32 0, null, metadata !3, i32 0, null} ; [ DW_TAG_structure_type ]
+!3 = metadata !{metadata !4, metadata !6, metadata !7}
+!4 = metadata !{i32 458765, metadata !2, metadata !"x", metadata !0, i32 1, i64 64, i64 64, i64 0, i32 0, metadata !5} ; [ DW_TAG_member ]
+!5 = metadata !{i32 458788, metadata !0, metadata !"double", metadata !0, i32 0, i64 64, i64 64, i64 0, i32 0, i32 4} ; [ DW_TAG_base_type ]
+!6 = metadata !{i32 458765, metadata !2, metadata !"y", metadata !0, i32 1, i64 64, i64 64, i64 64, i32 0, metadata !5} ; [ DW_TAG_member ]
+!7 = metadata !{i32 458765, metadata !2, metadata !"z", metadata !0, i32 1, i64 64, i64 64, i64 128, i32 0, metadata !5} ; [ DW_TAG_member ]
+!8 = metadata !{i32 459008, metadata !9, metadata !"t", metadata !0, i32 5, metadata !2} ; [ DW_TAG_auto_variable ]
+!9 = metadata !{i32 458763, metadata !10}        ; [ DW_TAG_lexical_block ]
+!10 = metadata !{i32 458798, i32 0, metadata !0, metadata !"foo", metadata !"foo", metadata !"foo", metadata !0, i32 4, metadata !11, i1 false, i1 true, i32 0, i32 0, null} ; [ DW_TAG_subprogram ]
+!11 = metadata !{i32 458773, metadata !0, metadata !"", metadata !0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !12, i32 0, null} ; [ DW_TAG_subroutine_type ]
+!12 = metadata !{metadata !13}
+!13 = metadata !{i32 458788, metadata !0, metadata !"int", metadata !0, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
+!14 = metadata !{%tart.reflect.ComplexType* @.type.SwitchStmtTest}
diff --git a/test/CodeGen/PIC16/2009-11-20-NewNode.ll b/test/CodeGen/PIC16/2009-11-20-NewNode.ll
index f9d66ca63471..d68f0f41c4a5 100644
--- a/test/CodeGen/PIC16/2009-11-20-NewNode.ll
+++ b/test/CodeGen/PIC16/2009-11-20-NewNode.ll
@@ -1,6 +1,5 @@
 ; RUN: llc -march=pic16 < %s
 ; PR5558
-; XFAIL: vg_leak
 
 define i64 @_strtoll_r(i16 %base) nounwind {
 entry:
diff --git a/test/CodeGen/PowerPC/2007-11-16-landingpad-split.ll b/test/CodeGen/PowerPC/2007-11-16-landingpad-split.ll
index 439ef14d8b24..0c1167409447 100644
--- a/test/CodeGen/PowerPC/2007-11-16-landingpad-split.ll
+++ b/test/CodeGen/PowerPC/2007-11-16-landingpad-split.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -enable-eh
+; RUN: llc < %s
 ;; Formerly crashed, see PR 1508
 target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f128:64:128"
 target triple = "powerpc64-apple-darwin8"
diff --git a/test/CodeGen/PowerPC/2008-01-25-EmptyFunction.ll b/test/CodeGen/PowerPC/2008-01-25-EmptyFunction.ll
index a05245d542fc..db2ab877ff7d 100644
--- a/test/CodeGen/PowerPC/2008-01-25-EmptyFunction.ll
+++ b/test/CodeGen/PowerPC/2008-01-25-EmptyFunction.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=ppc32 | grep .byte
+; RUN: llc < %s -march=ppc32 | grep nop
 target triple = "powerpc-apple-darwin8"
 
 
diff --git a/test/CodeGen/PowerPC/2010-02-04-EmptyGlobal.ll b/test/CodeGen/PowerPC/2010-02-04-EmptyGlobal.ll
index 32ddb3484812..1ba11d3fd036 100644
--- a/test/CodeGen/PowerPC/2010-02-04-EmptyGlobal.ll
+++ b/test/CodeGen/PowerPC/2010-02-04-EmptyGlobal.ll
@@ -8,4 +8,13 @@
 ; CHECK:      .globl __cmd
 ; CHECK-NEXT: .align 3
 ; CHECK-NEXT: __cmd:
-; CHECK-NEXT: .space 1
+; CHECK-NEXT: .byte 0
+
+; PR6340
+
+%Ty = type { i32, {}, i32 }
+@k = global %Ty { i32 1, {} zeroinitializer, i32 3 }
+
+; CHECK: _k:
+; CHECK-NEXT:	.long	1
+; CHECK-NEXT:	.long	3
diff --git a/test/CodeGen/PowerPC/2010-04-07-DbgValueOtherTargets.ll b/test/CodeGen/PowerPC/2010-04-07-DbgValueOtherTargets.ll
new file mode 100644
index 000000000000..f48f32f8fb17
--- /dev/null
+++ b/test/CodeGen/PowerPC/2010-04-07-DbgValueOtherTargets.ll
@@ -0,0 +1,33 @@
+; RUN: llc -O0 -march=ppc32 -asm-verbose < %s | FileCheck %s
+; Check that DEBUG_VALUE comments come through on a variety of targets.
+
+%tart.reflect.ComplexType = type { double, double }
+
+@.type.SwitchStmtTest = constant %tart.reflect.ComplexType { double 3.0, double 2.0 }
+
+define i32 @"main(tart.core.String[])->int32"(i32 %args) {
+entry:
+; CHECK: DEBUG_VALUE
+  tail call void @llvm.dbg.value(metadata !14, i64 0, metadata !8)
+  tail call void @"tart.reflect.ComplexType.create->tart.core.Object"(%tart.reflect.ComplexType* @.type.SwitchStmtTest) ; <%tart.core.Object*> [#uses=2]
+  ret i32 3
+}
+
+declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
+declare void @"tart.reflect.ComplexType.create->tart.core.Object"(%tart.reflect.ComplexType*) nounwind readnone
+
+!0 = metadata !{i32 458769, i32 0, i32 1, metadata !"sm.c", metadata !"/Volumes/MacOS9/tests/", metadata !"4.2.1 (Based on Apple Inc. build 5658) (LLVM build)", i1 true, i1 false, metadata !"", i32 0} ; [ DW_TAG_compile_unit ]
+!1 = metadata !{i32 458790, metadata !0, metadata !"", metadata !0, i32 0, i64 192, i64 64, i64 0, i32 0, metadata !2} ; [ DW_TAG_const_type ]
+!2 = metadata !{i32 458771, metadata !0, metadata !"C", metadata !0, i32 1, i64 192, i64 64, i64 0, i32 0, null, metadata !3, i32 0, null} ; [ DW_TAG_structure_type ]
+!3 = metadata !{metadata !4, metadata !6, metadata !7}
+!4 = metadata !{i32 458765, metadata !2, metadata !"x", metadata !0, i32 1, i64 64, i64 64, i64 0, i32 0, metadata !5} ; [ DW_TAG_member ]
+!5 = metadata !{i32 458788, metadata !0, metadata !"double", metadata !0, i32 0, i64 64, i64 64, i64 0, i32 0, i32 4} ; [ DW_TAG_base_type ]
+!6 = metadata !{i32 458765, metadata !2, metadata !"y", metadata !0, i32 1, i64 64, i64 64, i64 64, i32 0, metadata !5} ; [ DW_TAG_member ]
+!7 = metadata !{i32 458765, metadata !2, metadata !"z", metadata !0, i32 1, i64 64, i64 64, i64 128, i32 0, metadata !5} ; [ DW_TAG_member ]
+!8 = metadata !{i32 459008, metadata !9, metadata !"t", metadata !0, i32 5, metadata !2} ; [ DW_TAG_auto_variable ]
+!9 = metadata !{i32 458763, metadata !10}        ; [ DW_TAG_lexical_block ]
+!10 = metadata !{i32 458798, i32 0, metadata !0, metadata !"foo", metadata !"foo", metadata !"foo", metadata !0, i32 4, metadata !11, i1 false, i1 true, i32 0, i32 0, null} ; [ DW_TAG_subprogram ]
+!11 = metadata !{i32 458773, metadata !0, metadata !"", metadata !0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !12, i32 0, null} ; [ DW_TAG_subroutine_type ]
+!12 = metadata !{metadata !13}
+!13 = metadata !{i32 458788, metadata !0, metadata !"int", metadata !0, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
+!14 = metadata !{%tart.reflect.ComplexType* @.type.SwitchStmtTest}
diff --git a/test/CodeGen/PowerPC/2010-05-03-retaddr1.ll b/test/CodeGen/PowerPC/2010-05-03-retaddr1.ll
new file mode 100644
index 000000000000..b10920a6c10d
--- /dev/null
+++ b/test/CodeGen/PowerPC/2010-05-03-retaddr1.ll
@@ -0,0 +1,24 @@
+; RUN: llc < %s -march=ppc32 -mtriple=powerpc-apple-darwin -mcpu=g5 | FileCheck %s
+
+declare i8* @llvm.frameaddress(i32) nounwind readnone
+
+define i8* @g2() nounwind readnone {
+entry:
+; CHECK: _g2:
+; CHECK: lwz r3, 0(r1)
+  %0 = tail call i8* @llvm.frameaddress(i32 1)    ; <i8*> [#uses=1]
+  ret i8* %0
+}
+
+declare i8* @llvm.returnaddress(i32) nounwind readnone
+
+define i8* @g() nounwind readnone {
+entry:
+; CHECK: _g:
+; CHECK:  mflr r0
+; CHECK:  stw r0, 8(r1)
+; CHECK:  lwz r3, 0(r1)
+; CHECK:  lwz r3, 8(r3)
+  %0 = tail call i8* @llvm.returnaddress(i32 1)   ; <i8*> [#uses=1]
+  ret i8* %0
+}
diff --git a/test/CodeGen/PowerPC/indirectbr.ll b/test/CodeGen/PowerPC/indirectbr.ll
index 9b76ecc43db9..ab8d9dca5dc4 100644
--- a/test/CodeGen/PowerPC/indirectbr.ll
+++ b/test/CodeGen/PowerPC/indirectbr.ll
@@ -43,8 +43,8 @@ L2:                                               ; preds = %L3, %bb2
 
 L1:                                               ; preds = %L2, %bb2
   %res.3 = phi i32 [ %phitmp, %L2 ], [ 2, %bb2 ]  ; <i32> [#uses=1]
-; PIC: addis r4, r4, ha16(Ltmp0-"L1$pb")
-; PIC: li r6, lo16(Ltmp0-"L1$pb")
+; PIC: addis r4, r4, ha16(Ltmp0-"L0$pb")
+; PIC: li r6, lo16(Ltmp0-"L0$pb")
 ; PIC: add r4, r4, r6
 ; PIC: stw r4
 ; STATIC: li r5, lo16(Ltmp0)
diff --git a/test/CodeGen/PowerPC/rlwimi-keep-rsh.ll b/test/CodeGen/PowerPC/rlwimi-keep-rsh.ll
index 7bce01c00afa..3dc8061a52a7 100644
--- a/test/CodeGen/PowerPC/rlwimi-keep-rsh.ll
+++ b/test/CodeGen/PowerPC/rlwimi-keep-rsh.ll
@@ -25,4 +25,4 @@ entry:
 return:
   ret void
 ; CHECK: blr
-}
-\ No newline at end of file
+}
diff --git a/test/CodeGen/SPARC/2010-04-07-DbgValueOtherTargets.ll b/test/CodeGen/SPARC/2010-04-07-DbgValueOtherTargets.ll
new file mode 100644
index 000000000000..f66ee216089d
--- /dev/null
+++ b/test/CodeGen/SPARC/2010-04-07-DbgValueOtherTargets.ll
@@ -0,0 +1,33 @@
+; RUN: llc -O0 -march=sparc -asm-verbose < %s | FileCheck %s
+; Check that DEBUG_VALUE comments come through on a variety of targets.
+
+%tart.reflect.ComplexType = type { double, double }
+
+@.type.SwitchStmtTest = constant %tart.reflect.ComplexType { double 3.0, double 2.0 }
+
+define i32 @"main(tart.core.String[])->int32"(i32 %args) {
+entry:
+; CHECK: DEBUG_VALUE
+  tail call void @llvm.dbg.value(metadata !14, i64 0, metadata !8)
+  tail call void @"tart.reflect.ComplexType.create->tart.core.Object"(%tart.reflect.ComplexType* @.type.SwitchStmtTest) ; <%tart.core.Object*> [#uses=2]
+  ret i32 3
+}
+
+declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
+declare void @"tart.reflect.ComplexType.create->tart.core.Object"(%tart.reflect.ComplexType*) nounwind readnone
+
+!0 = metadata !{i32 458769, i32 0, i32 1, metadata !"sm.c", metadata !"/Volumes/MacOS9/tests/", metadata !"4.2.1 (Based on Apple Inc. build 5658) (LLVM build)", i1 true, i1 false, metadata !"", i32 0} ; [ DW_TAG_compile_unit ]
+!1 = metadata !{i32 458790, metadata !0, metadata !"", metadata !0, i32 0, i64 192, i64 64, i64 0, i32 0, metadata !2} ; [ DW_TAG_const_type ]
+!2 = metadata !{i32 458771, metadata !0, metadata !"C", metadata !0, i32 1, i64 192, i64 64, i64 0, i32 0, null, metadata !3, i32 0, null} ; [ DW_TAG_structure_type ]
+!3 = metadata !{metadata !4, metadata !6, metadata !7}
+!4 = metadata !{i32 458765, metadata !2, metadata !"x", metadata !0, i32 1, i64 64, i64 64, i64 0, i32 0, metadata !5} ; [ DW_TAG_member ]
+!5 = metadata !{i32 458788, metadata !0, metadata !"double", metadata !0, i32 0, i64 64, i64 64, i64 0, i32 0, i32 4} ; [ DW_TAG_base_type ]
+!6 = metadata !{i32 458765, metadata !2, metadata !"y", metadata !0, i32 1, i64 64, i64 64, i64 64, i32 0, metadata !5} ; [ DW_TAG_member ]
+!7 = metadata !{i32 458765, metadata !2, metadata !"z", metadata !0, i32 1, i64 64, i64 64, i64 128, i32 0, metadata !5} ; [ DW_TAG_member ]
+!8 = metadata !{i32 459008, metadata !9, metadata !"t", metadata !0, i32 5, metadata !2} ; [ DW_TAG_auto_variable ]
+!9 = metadata !{i32 458763, metadata !10}        ; [ DW_TAG_lexical_block ]
+!10 = metadata !{i32 458798, i32 0, metadata !0, metadata !"foo", metadata !"foo", metadata !"foo", metadata !0, i32 4, metadata !11, i1 false, i1 true, i32 0, i32 0, null} ; [ DW_TAG_subprogram ]
+!11 = metadata !{i32 458773, metadata !0, metadata !"", metadata !0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !12, i32 0, null} ; [ DW_TAG_subroutine_type ]
+!12 = metadata !{metadata !13}
+!13 = metadata !{i32 458788, metadata !0, metadata !"int", metadata !0, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
+!14 = metadata !{%tart.reflect.ComplexType* @.type.SwitchStmtTest}
diff --git a/test/CodeGen/SystemZ/00-RetVoid.ll b/test/CodeGen/SystemZ/00-RetVoid.ll
index de23795ab08a..6f3cbac738f8 100644
--- a/test/CodeGen/SystemZ/00-RetVoid.ll
+++ b/test/CodeGen/SystemZ/00-RetVoid.ll
@@ -3,4 +3,4 @@
 define void @foo() {
 entry:
     ret void
-}
-\ No newline at end of file
+}
diff --git a/test/CodeGen/SystemZ/01-RetArg.ll b/test/CodeGen/SystemZ/01-RetArg.ll
index 9ab2097a0c89..8e1ff49c26fa 100644
--- a/test/CodeGen/SystemZ/01-RetArg.ll
+++ b/test/CodeGen/SystemZ/01-RetArg.ll
@@ -3,4 +3,4 @@
 define i64 @foo(i64 %a, i64 %b) {
 entry:
     ret i64 %b
-}
-\ No newline at end of file
+}
diff --git a/test/CodeGen/SystemZ/02-RetAdd.ll b/test/CodeGen/SystemZ/02-RetAdd.ll
index 9ff9b6ac3833..d5dfa220ad23 100644
--- a/test/CodeGen/SystemZ/02-RetAdd.ll
+++ b/test/CodeGen/SystemZ/02-RetAdd.ll
@@ -3,4 +3,4 @@ define i64 @foo(i64 %a, i64 %b) {
 entry:
     %c = add i64 %a, %b
     ret i64 %c
-}
-\ No newline at end of file
+}
diff --git a/test/CodeGen/SystemZ/02-RetAddImm.ll b/test/CodeGen/SystemZ/02-RetAddImm.ll
index 6d73e4d42ab2..40f6cce936bd 100644
--- a/test/CodeGen/SystemZ/02-RetAddImm.ll
+++ b/test/CodeGen/SystemZ/02-RetAddImm.ll
@@ -3,4 +3,4 @@ define i64 @foo(i64 %a, i64 %b) {
 entry:
     %c = add i64 %a, 1
     ret i64 %c
-}
-\ No newline at end of file
+}
diff --git a/test/CodeGen/SystemZ/02-RetAnd.ll b/test/CodeGen/SystemZ/02-RetAnd.ll
index 1492f9dbee75..b568a57f8bee 100644
--- a/test/CodeGen/SystemZ/02-RetAnd.ll
+++ b/test/CodeGen/SystemZ/02-RetAnd.ll
@@ -4,4 +4,4 @@ define i64 @foo(i64 %a, i64 %b) {
 entry:
     %c = and i64 %a, %b
     ret i64 %c
-}
-\ No newline at end of file
+}
diff --git a/test/CodeGen/SystemZ/02-RetNeg.ll b/test/CodeGen/SystemZ/02-RetNeg.ll
index 7f3380dc16ac..3f6ba2f27fd4 100644
--- a/test/CodeGen/SystemZ/02-RetNeg.ll
+++ b/test/CodeGen/SystemZ/02-RetNeg.ll
@@ -4,4 +4,4 @@ define i64 @foo(i64 %a) {
 entry:
     %c = sub i64 0, %a
     ret i64 %c
-}
-\ No newline at end of file
+}
diff --git a/test/CodeGen/SystemZ/02-RetOr.ll b/test/CodeGen/SystemZ/02-RetOr.ll
index 1e8134d2ddcc..a1ddb63d04ab 100644
--- a/test/CodeGen/SystemZ/02-RetOr.ll
+++ b/test/CodeGen/SystemZ/02-RetOr.ll
@@ -3,4 +3,4 @@ define i64 @foo(i64 %a, i64 %b) {
 entry:
     %c = or i64 %a, %b
     ret i64 %c
-}
-\ No newline at end of file
+}
diff --git a/test/CodeGen/SystemZ/02-RetSub.ll b/test/CodeGen/SystemZ/02-RetSub.ll
index 1c4514f36c93..98e1861365fd 100644
--- a/test/CodeGen/SystemZ/02-RetSub.ll
+++ b/test/CodeGen/SystemZ/02-RetSub.ll
@@ -4,4 +4,4 @@ define i64 @foo(i64 %a, i64 %b) {
 entry:
     %c = sub i64 %a, %b
     ret i64 %c
-}
-\ No newline at end of file
+}
diff --git a/test/CodeGen/SystemZ/02-RetSubImm.ll b/test/CodeGen/SystemZ/02-RetSubImm.ll
index 4f91cb073997..8479fbf8656e 100644
--- a/test/CodeGen/SystemZ/02-RetSubImm.ll
+++ b/test/CodeGen/SystemZ/02-RetSubImm.ll
@@ -4,4 +4,4 @@ define i64 @foo(i64 %a, i64 %b) {
 entry:
     %c = sub i64 %a, 1
     ret i64 %c
-}
-\ No newline at end of file
+}
diff --git a/test/CodeGen/SystemZ/02-RetXor.ll b/test/CodeGen/SystemZ/02-RetXor.ll
index a9439bf452a7..4d1adf2f367c 100644
--- a/test/CodeGen/SystemZ/02-RetXor.ll
+++ b/test/CodeGen/SystemZ/02-RetXor.ll
@@ -3,4 +3,4 @@ define i64 @foo(i64 %a, i64 %b) {
 entry:
     %c = xor i64 %a, %b
     ret i64 %c
-}
-\ No newline at end of file
+}
diff --git a/test/CodeGen/SystemZ/02-RetXorImm.ll b/test/CodeGen/SystemZ/02-RetXorImm.ll
index ea4b8290df3b..473bbf74f5bc 100644
--- a/test/CodeGen/SystemZ/02-RetXorImm.ll
+++ b/test/CodeGen/SystemZ/02-RetXorImm.ll
@@ -3,4 +3,4 @@ define i64 @foo(i64 %a, i64 %b) {
 entry:
     %c = xor i64 %a, 1
     ret i64 %c
-}
-\ No newline at end of file
+}
diff --git a/test/CodeGen/SystemZ/2009-07-11-FloatBitConvert.ll b/test/CodeGen/SystemZ/2009-07-11-FloatBitConvert.ll
index 564d3438310a..5457b12afcba 100644
--- a/test/CodeGen/SystemZ/2009-07-11-FloatBitConvert.ll
+++ b/test/CodeGen/SystemZ/2009-07-11-FloatBitConvert.ll
@@ -13,4 +13,4 @@ define i32 @bar(float %a) {
 entry:
     %b = bitcast float %a to i32
     ret i32 %b
-}
-\ No newline at end of file
+}
diff --git a/test/CodeGen/SystemZ/2010-04-07-DbgValueOtherTargets.ll b/test/CodeGen/SystemZ/2010-04-07-DbgValueOtherTargets.ll
new file mode 100644
index 000000000000..610aa40197e3
--- /dev/null
+++ b/test/CodeGen/SystemZ/2010-04-07-DbgValueOtherTargets.ll
@@ -0,0 +1,33 @@
+; RUN: llc -O0 -march=systemz -asm-verbose < %s | FileCheck %s
+; Check that DEBUG_VALUE comments come through on a variety of targets.
+
+%tart.reflect.ComplexType = type { double, double }
+
+@.type.SwitchStmtTest = constant %tart.reflect.ComplexType { double 3.0, double 2.0 }
+
+define i32 @"main(tart.core.String[])->int32"(i32 %args) {
+entry:
+; CHECK: DEBUG_VALUE
+  tail call void @llvm.dbg.value(metadata !14, i64 0, metadata !8)
+  tail call void @"tart.reflect.ComplexType.create->tart.core.Object"(%tart.reflect.ComplexType* @.type.SwitchStmtTest) ; <%tart.core.Object*> [#uses=2]
+  ret i32 3
+}
+
+declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
+declare void @"tart.reflect.ComplexType.create->tart.core.Object"(%tart.reflect.ComplexType*) nounwind readnone
+
+!0 = metadata !{i32 458769, i32 0, i32 1, metadata !"sm.c", metadata !"/Volumes/MacOS9/tests/", metadata !"4.2.1 (Based on Apple Inc. build 5658) (LLVM build)", i1 true, i1 false, metadata !"", i32 0} ; [ DW_TAG_compile_unit ]
+!1 = metadata !{i32 458790, metadata !0, metadata !"", metadata !0, i32 0, i64 192, i64 64, i64 0, i32 0, metadata !2} ; [ DW_TAG_const_type ]
+!2 = metadata !{i32 458771, metadata !0, metadata !"C", metadata !0, i32 1, i64 192, i64 64, i64 0, i32 0, null, metadata !3, i32 0, null} ; [ DW_TAG_structure_type ]
+!3 = metadata !{metadata !4, metadata !6, metadata !7}
+!4 = metadata !{i32 458765, metadata !2, metadata !"x", metadata !0, i32 1, i64 64, i64 64, i64 0, i32 0, metadata !5} ; [ DW_TAG_member ]
+!5 = metadata !{i32 458788, metadata !0, metadata !"double", metadata !0, i32 0, i64 64, i64 64, i64 0, i32 0, i32 4} ; [ DW_TAG_base_type ]
+!6 = metadata !{i32 458765, metadata !2, metadata !"y", metadata !0, i32 1, i64 64, i64 64, i64 64, i32 0, metadata !5} ; [ DW_TAG_member ]
+!7 = metadata !{i32 458765, metadata !2, metadata !"z", metadata !0, i32 1, i64 64, i64 64, i64 128, i32 0, metadata !5} ; [ DW_TAG_member ]
+!8 = metadata !{i32 459008, metadata !9, metadata !"t", metadata !0, i32 5, metadata !2} ; [ DW_TAG_auto_variable ]
+!9 = metadata !{i32 458763, metadata !10}        ; [ DW_TAG_lexical_block ]
+!10 = metadata !{i32 458798, i32 0, metadata !0, metadata !"foo", metadata !"foo", metadata !"foo", metadata !0, i32 4, metadata !11, i1 false, i1 true, i32 0, i32 0, null} ; [ DW_TAG_subprogram ]
+!11 = metadata !{i32 458773, metadata !0, metadata !"", metadata !0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !12, i32 0, null} ; [ DW_TAG_subroutine_type ]
+!12 = metadata !{metadata !13}
+!13 = metadata !{i32 458788, metadata !0, metadata !"int", metadata !0, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
+!14 = metadata !{%tart.reflect.ComplexType* @.type.SwitchStmtTest}
diff --git a/test/CodeGen/Thumb/2010-04-07-DbgValueOtherTargets.ll b/test/CodeGen/Thumb/2010-04-07-DbgValueOtherTargets.ll
new file mode 100644
index 000000000000..6b6c14f40871
--- /dev/null
+++ b/test/CodeGen/Thumb/2010-04-07-DbgValueOtherTargets.ll
@@ -0,0 +1,33 @@
+; RUN: llc -O0 -march=thumb -asm-verbose < %s | FileCheck %s
+; Check that DEBUG_VALUE comments come through on a variety of targets.
+
+%tart.reflect.ComplexType = type { double, double }
+
+@.type.SwitchStmtTest = constant %tart.reflect.ComplexType { double 3.0, double 2.0 }
+
+define i32 @"main(tart.core.String[])->int32"(i32 %args) {
+entry:
+; CHECK: DEBUG_VALUE
+  tail call void @llvm.dbg.value(metadata !14, i64 0, metadata !8)
+  tail call void @"tart.reflect.ComplexType.create->tart.core.Object"(%tart.reflect.ComplexType* @.type.SwitchStmtTest) ; <%tart.core.Object*> [#uses=2]
+  ret i32 3
+}
+
+declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
+declare void @"tart.reflect.ComplexType.create->tart.core.Object"(%tart.reflect.ComplexType*) nounwind readnone
+
+!0 = metadata !{i32 458769, i32 0, i32 1, metadata !"sm.c", metadata !"/Volumes/MacOS9/tests/", metadata !"4.2.1 (Based on Apple Inc. build 5658) (LLVM build)", i1 true, i1 false, metadata !"", i32 0} ; [ DW_TAG_compile_unit ]
+!1 = metadata !{i32 458790, metadata !0, metadata !"", metadata !0, i32 0, i64 192, i64 64, i64 0, i32 0, metadata !2} ; [ DW_TAG_const_type ]
+!2 = metadata !{i32 458771, metadata !0, metadata !"C", metadata !0, i32 1, i64 192, i64 64, i64 0, i32 0, null, metadata !3, i32 0, null} ; [ DW_TAG_structure_type ]
+!3 = metadata !{metadata !4, metadata !6, metadata !7}
+!4 = metadata !{i32 458765, metadata !2, metadata !"x", metadata !0, i32 1, i64 64, i64 64, i64 0, i32 0, metadata !5} ; [ DW_TAG_member ]
+!5 = metadata !{i32 458788, metadata !0, metadata !"double", metadata !0, i32 0, i64 64, i64 64, i64 0, i32 0, i32 4} ; [ DW_TAG_base_type ]
+!6 = metadata !{i32 458765, metadata !2, metadata !"y", metadata !0, i32 1, i64 64, i64 64, i64 64, i32 0, metadata !5} ; [ DW_TAG_member ]
+!7 = metadata !{i32 458765, metadata !2, metadata !"z", metadata !0, i32 1, i64 64, i64 64, i64 128, i32 0, metadata !5} ; [ DW_TAG_member ]
+!8 = metadata !{i32 459008, metadata !9, metadata !"t", metadata !0, i32 5, metadata !2} ; [ DW_TAG_auto_variable ]
+!9 = metadata !{i32 458763, metadata !10}        ; [ DW_TAG_lexical_block ]
+!10 = metadata !{i32 458798, i32 0, metadata !0, metadata !"foo", metadata !"foo", metadata !"foo", metadata !0, i32 4, metadata !11, i1 false, i1 true, i32 0, i32 0, null} ; [ DW_TAG_subprogram ]
+!11 = metadata !{i32 458773, metadata !0, metadata !"", metadata !0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !12, i32 0, null} ; [ DW_TAG_subroutine_type ]
+!12 = metadata !{metadata !13}
+!13 = metadata !{i32 458788, metadata !0, metadata !"int", metadata !0, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
+!14 = metadata !{%tart.reflect.ComplexType* @.type.SwitchStmtTest}
diff --git a/test/CodeGen/Thumb/machine-licm.ll b/test/CodeGen/Thumb/machine-licm.ll
index dae1412b9130..a69a64f5c15d 100644
--- a/test/CodeGen/Thumb/machine-licm.ll
+++ b/test/CodeGen/Thumb/machine-licm.ll
@@ -15,12 +15,12 @@ entry:
 
 bb.nph:                                           ; preds = %entry
 ; CHECK: BB#1
-; CHECK: ldr.n r2, LCPI1_0
+; CHECK: ldr.n r2, LCPI0_0
 ; CHECK: add r2, pc
 ; CHECK: ldr r{{[0-9]+}}, [r2]
-; CHECK: LBB1_2
-; CHECK: LCPI1_0:
-; CHECK-NOT: LCPI1_1:
+; CHECK: LBB0_2
+; CHECK: LCPI0_0:
+; CHECK-NOT: LCPI0_1:
 ; CHECK: .section
   %.pre = load i32* @GV, align 4                  ; <i32> [#uses=1]
   br label %bb
diff --git a/test/CodeGen/Thumb2/2009-10-15-ITBlockBranch.ll b/test/CodeGen/Thumb2/2009-10-15-ITBlockBranch.ll
index bfb7f6eabc7f..e3086a332a8b 100644
--- a/test/CodeGen/Thumb2/2009-10-15-ITBlockBranch.ll
+++ b/test/CodeGen/Thumb2/2009-10-15-ITBlockBranch.ll
@@ -1,6 +1,5 @@
 ; RUN: llc < %s -mtriple=thumbv7-eabi -mcpu=cortex-a8 -float-abi=hard | FileCheck %s
-
-; A fix for PR5204 will require this check to be changed.
+; PR5204
 
 %"struct.__gnu_cxx::__normal_iterator<char*,std::basic_string<char, std::char_traits<char>, std::allocator<char> > >" = type { i8* }
 %"struct.__gnu_cxx::new_allocator<char>" = type <{ i8 }>
@@ -11,11 +10,9 @@
 
 define weak arm_aapcs_vfpcc i32 @_ZNKSs7compareERKSs(%"struct.std::basic_string<char,std::char_traits<char>,std::allocator<char> >"* %this, %"struct.std::basic_string<char,std::char_traits<char>,std::allocator<char> >"* %__str) {
 ; CHECK: _ZNKSs7compareERKSs:
-; CHECK:	it ne
-; CHECK-NEXT: ldmiane.w
-; CHECK-NEXT: itt eq
-; CHECK-NEXT: subeq.w
-; CHECK-NEXT: ldmiaeq.w
+; CHECK:      it  eq
+; CHECK-NEXT: subeq.w r0, r6, r8
+; CHECK-NEXT: ldmia.w sp, {r4, r5, r6, r8, r9, pc}
 entry:
   %0 = tail call arm_aapcs_vfpcc  i32 @_ZNKSs4sizeEv(%"struct.std::basic_string<char,std::char_traits<char>,std::allocator<char> >"* %this) ; <i32> [#uses=3]
   %1 = tail call arm_aapcs_vfpcc  i32 @_ZNKSs4sizeEv(%"struct.std::basic_string<char,std::char_traits<char>,std::allocator<char> >"* %__str) ; <i32> [#uses=3]
diff --git a/test/CodeGen/Thumb2/2010-04-15-DynAllocBug.ll b/test/CodeGen/Thumb2/2010-04-15-DynAllocBug.ll
new file mode 100644
index 000000000000..fea2dca6a273
--- /dev/null
+++ b/test/CodeGen/Thumb2/2010-04-15-DynAllocBug.ll
@@ -0,0 +1,23 @@
+; RUN: llc < %s -mtriple=thumbv7-apple-darwin -mcpu=cortex-a8 -O3 | FileCheck %s
+; rdar://7493908
+
+; Make sure the result of the first dynamic_alloc isn't copied back to sp more
+; than once. We'll deal with poor codegen later.
+
+define arm_apcscc void @t() nounwind ssp {
+entry:
+; CHECK: t:
+; CHECK: mov r0, sp
+; CHECK: bfc r0, #0, #3
+; CHECK: subs r0, #16
+; CHECK: mov sp, r0
+; Yes, this is stupid codegen, but it's correct.
+; CHECK: mov r0, sp
+; CHECK: bfc r0, #0, #3
+; CHECK: subs r0, #16
+; CHECK: mov sp, r0
+  %size = mul i32 8, 2
+  %vla_a = alloca i8, i32 %size, align 8
+  %vla_b = alloca i8, i32 %size, align 8
+  unreachable
+}
diff --git a/test/CodeGen/Thumb2/2010-04-26-CopyRegCrash.ll b/test/CodeGen/Thumb2/2010-04-26-CopyRegCrash.ll
new file mode 100644
index 000000000000..950b67e6a596
--- /dev/null
+++ b/test/CodeGen/Thumb2/2010-04-26-CopyRegCrash.ll
@@ -0,0 +1,73 @@
+; RUN: llc < %s -mtriple=thumbv7-apple-darwin
+; Radar 7896289
+
+target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:32-f32:32:32-f64:32:32-v64:64:64-v128:128:128-a0:0:32-n32"
+target triple = "thumbv7-apple-darwin10"
+
+define arm_apcscc void @test(i32 %mode) nounwind optsize noinline {
+entry:
+  br i1 undef, label %return, label %bb3
+
+bb3:                                              ; preds = %entry
+  br i1 undef, label %bb15, label %bb18
+
+bb15:                                             ; preds = %bb3
+  unreachable
+
+bb18:                                             ; preds = %bb3
+  switch i32 %mode, label %return [
+    i32 0, label %bb26
+    i32 1, label %bb56
+    i32 2, label %bb107
+    i32 6, label %bb150.preheader
+    i32 9, label %bb310.preheader
+    i32 13, label %bb414.preheader
+    i32 15, label %bb468.preheader
+    i32 16, label %bb522.preheader
+  ]
+
+bb150.preheader:                                  ; preds = %bb18
+  br i1 undef, label %bb154, label %bb160
+
+bb310.preheader:                                  ; preds = %bb18
+  unreachable
+
+bb414.preheader:                                  ; preds = %bb18
+  unreachable
+
+bb468.preheader:                                  ; preds = %bb18
+  unreachable
+
+bb522.preheader:                                  ; preds = %bb18
+  unreachable
+
+bb26:                                             ; preds = %bb18
+  unreachable
+
+bb56:                                             ; preds = %bb18
+  unreachable
+
+bb107:                                            ; preds = %bb18
+  br label %bb110
+
+bb110:                                            ; preds = %bb122, %bb107
+  %asmtmp.i.i179 = tail call i16 asm "rev16 $0, $1\0A", "=l,l"(i16 undef) nounwind ; <i16> [#uses=1]
+  %asmtmp.i.i178 = tail call i16 asm "rev16 $0, $1\0A", "=l,l"(i16 %asmtmp.i.i179) nounwind ; <i16> [#uses=1]
+  store i16 %asmtmp.i.i178, i16* undef, align 2
+  br i1 undef, label %bb122, label %bb121
+
+bb121:                                            ; preds = %bb110
+  br label %bb122
+
+bb122:                                            ; preds = %bb121, %bb110
+  br label %bb110
+
+bb154:                                            ; preds = %bb150.preheader
+  unreachable
+
+bb160:                                            ; preds = %bb150.preheader
+  unreachable
+
+return:                                           ; preds = %bb18, %entry
+  ret void
+}
diff --git a/test/CodeGen/Thumb2/bfx.ll b/test/CodeGen/Thumb2/bfx.ll
new file mode 100644
index 000000000000..489349d61552
--- /dev/null
+++ b/test/CodeGen/Thumb2/bfx.ll
@@ -0,0 +1,28 @@
+; RUN: llc < %s -march=thumb -mattr=+thumb2 | FileCheck %s
+
+define i32 @sbfx1(i32 %a) {
+; CHECK: sbfx1
+; CHECK: sbfx r0, r0, #7, #11
+	%t1 = lshr i32 %a, 7
+	%t2 = trunc i32 %t1 to i11
+	%t3 = sext i11 %t2 to i32
+	ret i32 %t3
+}
+
+define i32 @ubfx1(i32 %a) {
+; CHECK: ubfx1
+; CHECK: ubfx r0, r0, #7, #11
+	%t1 = lshr i32 %a, 7
+	%t2 = trunc i32 %t1 to i11
+	%t3 = zext i11 %t2 to i32
+	ret i32 %t3
+}
+
+define i32 @ubfx2(i32 %a) {
+; CHECK: ubfx2
+; CHECK: ubfx r0, r0, #7, #11
+	%t1 = lshr i32 %a, 7
+	%t2 = and i32 %t1, 2047
+	ret i32 %t2
+}
+
diff --git a/test/CodeGen/Thumb2/cross-rc-coalescing-2.ll b/test/CodeGen/Thumb2/cross-rc-coalescing-2.ll
index 2b209319792d..6c453499a431 100644
--- a/test/CodeGen/Thumb2/cross-rc-coalescing-2.ll
+++ b/test/CodeGen/Thumb2/cross-rc-coalescing-2.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -mtriple=thumbv7-apple-darwin9 -mcpu=cortex-a8 | grep vmov.f32 | count 3
+; RUN: llc < %s -mtriple=thumbv7-apple-darwin9 -mcpu=cortex-a8 | grep vmov.f32 | count 1
 
 define arm_apcscc void @fht(float* nocapture %fz, i16 signext %n) nounwind {
 entry:
diff --git a/test/CodeGen/Thumb2/load-global.ll b/test/CodeGen/Thumb2/load-global.ll
index 92866700bd65..46e053ca4ea5 100644
--- a/test/CodeGen/Thumb2/load-global.ll
+++ b/test/CodeGen/Thumb2/load-global.ll
@@ -14,7 +14,7 @@ define i32 @test1() {
 
 ; PIC: _test1
 ; PIC: add r0, pc
-; PIC: .long L_G$non_lazy_ptr-(LPC1_0+4)
+; PIC: .long L_G$non_lazy_ptr-(LPC0_0+4)
 
 ; LINUX: test1
 ; LINUX: .long G(GOT)
diff --git a/test/CodeGen/Thumb2/lsr-deficiency.ll b/test/CodeGen/Thumb2/lsr-deficiency.ll
index ac2cd34e4b34..2038606aa8c4 100644
--- a/test/CodeGen/Thumb2/lsr-deficiency.ll
+++ b/test/CodeGen/Thumb2/lsr-deficiency.ll
@@ -19,7 +19,7 @@ entry:
   br label %bb
 
 bb:                                               ; preds = %bb, %entry
-; CHECK: LBB1_1:
+; CHECK: LBB0_1:
 ; CHECK: cmp r2, #0
 ; CHECK: sub.w r9, r2, #1
 ; CHECK: mov r2, r9
diff --git a/test/CodeGen/Thumb2/machine-licm.ll b/test/CodeGen/Thumb2/machine-licm.ll
index 53ff53725c57..c298aa2dd936 100644
--- a/test/CodeGen/Thumb2/machine-licm.ll
+++ b/test/CodeGen/Thumb2/machine-licm.ll
@@ -17,22 +17,22 @@ entry:
 
 bb.nph:                                           ; preds = %entry
 ; CHECK: BB#1
-; CHECK: ldr.n r2, LCPI1_0
+; CHECK: ldr.n r2, LCPI0_0
 ; CHECK: ldr r2, [r2]
 ; CHECK: ldr r3, [r2]
-; CHECK: LBB1_2
-; CHECK: LCPI1_0:
+; CHECK: LBB0_2
+; CHECK: LCPI0_0:
 ; CHECK-NOT: LCPI1_1:
 ; CHECK: .section
 
 ; PIC: BB#1
-; PIC: ldr.n r2, LCPI1_0
+; PIC: ldr.n r2, LCPI0_0
 ; PIC: add r2, pc
 ; PIC: ldr r2, [r2]
 ; PIC: ldr r3, [r2]
-; PIC: LBB1_2
-; PIC: LCPI1_0:
-; PIC-NOT: LCPI1_1:
+; PIC: LBB0_2
+; PIC: LCPI0_0:
+; PIC-NOT: LCPI0_1:
 ; PIC: .section
   %.pre = load i32* @GV, align 4                  ; <i32> [#uses=1]
   br label %bb
diff --git a/test/CodeGen/Thumb2/thumb2-branch.ll b/test/CodeGen/Thumb2/thumb2-branch.ll
index 129838457b26..1d2af7a54745 100644
--- a/test/CodeGen/Thumb2/thumb2-branch.ll
+++ b/test/CodeGen/Thumb2/thumb2-branch.ll
@@ -1,6 +1,6 @@
 ; RUN: llc < %s -mtriple=thumbv7-apple-darwin -mattr=+thumb2 | FileCheck %s
 
-define void @f1(i32 %a, i32 %b, i32* %v) {
+define i32 @f1(i32 %a, i32 %b, i32* %v) {
 entry:
 ; CHECK: f1:
 ; CHECK: bne LBB
@@ -9,13 +9,13 @@ entry:
 
 cond_true:              ; preds = %entry
         store i32 0, i32* %v
-        ret void
+        ret i32 0
 
 return:         ; preds = %entry
-        ret void
+        ret i32 1
 }
 
-define void @f2(i32 %a, i32 %b, i32* %v) {
+define i32 @f2(i32 %a, i32 %b, i32* %v) {
 entry:
 ; CHECK: f2:
 ; CHECK: bge LBB
@@ -24,13 +24,13 @@ entry:
 
 cond_true:              ; preds = %entry
         store i32 0, i32* %v
-        ret void
+        ret i32 0
 
 return:         ; preds = %entry
-        ret void
+        ret i32 1
 }
 
-define void @f3(i32 %a, i32 %b, i32* %v) {
+define i32 @f3(i32 %a, i32 %b, i32* %v) {
 entry:
 ; CHECK: f3:
 ; CHECK: bhs LBB
@@ -39,13 +39,13 @@ entry:
 
 cond_true:              ; preds = %entry
         store i32 0, i32* %v
-        ret void
+        ret i32 0
 
 return:         ; preds = %entry
-        ret void
+        ret i32 1
 }
 
-define void @f4(i32 %a, i32 %b, i32* %v) {
+define i32 @f4(i32 %a, i32 %b, i32* %v) {
 entry:
 ; CHECK: f4:
 ; CHECK: blo LBB
@@ -54,8 +54,8 @@ entry:
 
 cond_true:              ; preds = %entry
         store i32 0, i32* %v
-        ret void
+        ret i32 0
 
 return:         ; preds = %entry
-        ret void
+        ret i32 1
 }
diff --git a/test/CodeGen/Thumb2/thumb2-ifcvt3.ll b/test/CodeGen/Thumb2/thumb2-ifcvt3.ll
index 496158c9a0c0..e465c00eae9f 100644
--- a/test/CodeGen/Thumb2/thumb2-ifcvt3.ll
+++ b/test/CodeGen/Thumb2/thumb2-ifcvt3.ll
@@ -23,7 +23,7 @@ bb52:                                             ; preds = %newFuncRoot
 ; CHECK: movne
 ; CHECK: moveq
 ; CHECK: pop
-; CHECK-NEXT: LBB1_1:
+; CHECK-NEXT: LBB0_1:
   %0 = load i64* @posed, align 4                  ; <i64> [#uses=3]
   %1 = sub i64 %0, %.reload78                     ; <i64> [#uses=1]
   %2 = ashr i64 %1, 1                             ; <i64> [#uses=3]
diff --git a/test/CodeGen/X86/2007-01-08-InstrSched.ll b/test/CodeGen/X86/2007-01-08-InstrSched.ll
index 58e186b4c4f8..ef19d72150a0 100644
--- a/test/CodeGen/X86/2007-01-08-InstrSched.ll
+++ b/test/CodeGen/X86/2007-01-08-InstrSched.ll
@@ -11,12 +11,12 @@ define float @foo(float %x) nounwind {
     %tmp14 = fadd float %tmp12, %tmp7
     ret float %tmp14
 
-; CHECK: mulss	LCPI1_0(%rip)
-; CHECK: mulss	LCPI1_1(%rip)
+; CHECK: mulss	LCPI0_0(%rip)
+; CHECK: mulss	LCPI0_1(%rip)
 ; CHECK: addss
-; CHECK: mulss	LCPI1_2(%rip)
+; CHECK: mulss	LCPI0_2(%rip)
 ; CHECK: addss
-; CHECK: mulss	LCPI1_3(%rip)
+; CHECK: mulss	LCPI0_3(%rip)
 ; CHECK: addss
 ; CHECK: ret
 }
diff --git a/test/CodeGen/X86/2007-05-05-Personality.ll b/test/CodeGen/X86/2007-05-05-Personality.ll
index c92783e5e4eb..a9b17d3b8f36 100644
--- a/test/CodeGen/X86/2007-05-05-Personality.ll
+++ b/test/CodeGen/X86/2007-05-05-Personality.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -mtriple=i686-pc-linux-gnu -enable-eh -o - | grep zPL
+; RUN: llc < %s -mtriple=i686-pc-linux-gnu -o - | grep zPL
 
 @error = external global i8		; <i8*> [#uses=2]
 
diff --git a/test/CodeGen/X86/2007-05-07-InvokeSRet.ll b/test/CodeGen/X86/2007-05-07-InvokeSRet.ll
index a3ff2f60c8d7..ae49bd002208 100644
--- a/test/CodeGen/X86/2007-05-07-InvokeSRet.ll
+++ b/test/CodeGen/X86/2007-05-07-InvokeSRet.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -mtriple=i686-pc-linux-gnu -enable-eh -disable-fp-elim | not grep {addl .12, %esp}
+; RUN: llc < %s -mtriple=i686-pc-linux-gnu -disable-fp-elim | not grep {addl .12, %esp}
 ; PR1398
 
 	%struct.S = type { i32, i32 }
diff --git a/test/CodeGen/X86/2007-06-04-tailmerge4.ll b/test/CodeGen/X86/2007-06-04-tailmerge4.ll
index baf2377c5a02..d5ec0898b9a9 100644
--- a/test/CodeGen/X86/2007-06-04-tailmerge4.ll
+++ b/test/CodeGen/X86/2007-06-04-tailmerge4.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -enable-eh -asm-verbose | grep invcont131
+; RUN: llc < %s -asm-verbose | grep invcont131
 ; PR 1496:  tail merge was incorrectly removing this block
 
 ; ModuleID = 'report.1.bc'
diff --git a/test/CodeGen/X86/2007-09-17-ObjcFrameEH.ll b/test/CodeGen/X86/2007-09-17-ObjcFrameEH.ll
index 56ee2a314990..c3403a0b4eeb 100644
--- a/test/CodeGen/X86/2007-09-17-ObjcFrameEH.ll
+++ b/test/CodeGen/X86/2007-09-17-ObjcFrameEH.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=x86 -mtriple=i686-apple-darwin -enable-eh  | grep {isNullOrNil].eh"} | count 2
+; RUN: llc < %s -march=x86 -mtriple=i686-apple-darwin | grep {isNullOrNil].eh"} | count 2
 
 	%struct.NSString = type {  }
 	%struct._objc__method_prototype_list = type opaque
diff --git a/test/CodeGen/X86/2008-01-25-EmptyFunction.ll b/test/CodeGen/X86/2008-01-25-EmptyFunction.ll
index 387645f74366..b936686798f0 100644
--- a/test/CodeGen/X86/2008-01-25-EmptyFunction.ll
+++ b/test/CodeGen/X86/2008-01-25-EmptyFunction.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=x86 | grep {.byte	0}
+; RUN: llc < %s -march=x86 | grep nop
 target triple = "i686-apple-darwin8"
 
 
diff --git a/test/CodeGen/X86/2008-07-11-SpillerBug.ll b/test/CodeGen/X86/2008-07-11-SpillerBug.ll
index 548b44db6d27..d0023b28c6dd 100644
--- a/test/CodeGen/X86/2008-07-11-SpillerBug.ll
+++ b/test/CodeGen/X86/2008-07-11-SpillerBug.ll
@@ -3,6 +3,7 @@
 
 ; CHECK: andl    $65534, %
 ; CHECK-NEXT: movl %
+; CHECK-NEXT: movzwl
 ; CHECK-NEXT: movl $17
 
 @g_5 = external global i16		; <i16*> [#uses=2]
diff --git a/test/CodeGen/X86/2008-10-16-SpillerBug.ll b/test/CodeGen/X86/2008-10-16-SpillerBug.ll
index b8ca364d1798..87305a0b3116 100644
--- a/test/CodeGen/X86/2008-10-16-SpillerBug.ll
+++ b/test/CodeGen/X86/2008-10-16-SpillerBug.ll
@@ -1,4 +1,5 @@
-; RUN: llc < %s -relocation-model=pic -disable-fp-elim -mtriple=i386-apple-darwin | grep {andl.*7.*edi}
+; RUN: llc < %s -relocation-model=pic -disable-fp-elim -mtriple=i386-apple-darwin -stats |& grep asm-printer | grep 41
+; RUN: llc < %s -relocation-model=pic -disable-fp-elim -mtriple=i386-apple-darwin | FileCheck %s
 
 	%struct.XXDActiveTextureTargets = type { i64, i64, i64, i64, i64, i64 }
 	%struct.XXDAlphaTest = type { float, i16, i8, i8 }
@@ -61,11 +62,15 @@
 
 define void @t(%struct.XXDState* %gldst, <4 x float>* %prgrm, <4 x float>** %buffs, %struct._XXVMConstants* %cnstn, %struct.YYToken* %pstrm, %struct.XXVMVPContext* %vmctx, %struct.XXVMTextures* %txtrs, %struct.XXVMVPStack* %vpstk, <4 x float>* %atr0, <4 x float>* %atr1, <4 x float>* %atr2, <4 x float>* %atr3, <4 x float>* %vtx0, <4 x float>* %vtx1, <4 x float>* %vtx2, <4 x float>* %vtx3, [4 x <4 x float>]* %tmpGbl, i32* %oldMsk, <4 x i32>* %adrGbl, i64 %key_token) nounwind {
 entry:
+; CHECK: t:
 	%0 = trunc i64 %key_token to i32		; <i32> [#uses=1]
 	%1 = getelementptr %struct.YYToken* %pstrm, i32 %0		; <%struct.YYToken*> [#uses=5]
 	br label %bb1132
 
 bb51:		; preds = %bb1132
+; CHECK: .align 4
+; CHECK: xorl %ecx, %ecx
+; CHECK: andl $7
 	%2 = getelementptr %struct.YYToken* %1, i32 %operation.0.rec, i32 0, i32 0		; <i16*> [#uses=1]
 	%3 = load i16* %2, align 1		; <i16> [#uses=3]
 	%4 = lshr i16 %3, 6		; <i16> [#uses=1]
diff --git a/test/CodeGen/X86/2008-12-19-EarlyClobberBug.ll b/test/CodeGen/X86/2008-12-19-EarlyClobberBug.ll
index a6cabc4fd331..5eba9b9c3022 100644
--- a/test/CodeGen/X86/2008-12-19-EarlyClobberBug.ll
+++ b/test/CodeGen/X86/2008-12-19-EarlyClobberBug.ll
@@ -3,7 +3,7 @@
 ; Make sure the copy after inline asm is not coalesced away.
 
 ; CHECK:         ## InlineAsm End
-; CHECK-NEXT: BB1_2:
+; CHECK-NEXT: BB0_2:
 ; CHECK-NEXT:    movl	%esi, %eax
 
 
diff --git a/test/CodeGen/X86/2009-04-21-NoReloadImpDef.ll b/test/CodeGen/X86/2009-04-21-NoReloadImpDef.ll
index abbe97ac193e..69f644f5834b 100644
--- a/test/CodeGen/X86/2009-04-21-NoReloadImpDef.ll
+++ b/test/CodeGen/X86/2009-04-21-NoReloadImpDef.ll
@@ -4,7 +4,7 @@
 ; rdar://6808032
 
 ; CHECK: pextrw $14
-; CHECK-NEXT: movzbl
+; CHECK-NEXT: shrl $8
 ; CHECK-NEXT: (%ebp)
 ; CHECK-NEXT: pinsrw
 
diff --git a/test/CodeGen/X86/2009-05-28-DAGCombineCrash.ll b/test/CodeGen/X86/2009-05-28-DAGCombineCrash.ll
index 2fd42f40d891..1d1462075492 100644
--- a/test/CodeGen/X86/2009-05-28-DAGCombineCrash.ll
+++ b/test/CodeGen/X86/2009-05-28-DAGCombineCrash.ll
@@ -1,38 +1,15 @@
 ; RUN: llc < %s -march=x86-64
 
-	%struct.tempsym_t = type { i8*, i8*, i8*, i8*, i32, i32, i32, i32, i32 }
-
-define fastcc signext i8 @S_next_symbol(%struct.tempsym_t* %symptr) nounwind ssp {
+define fastcc void @S_next_symbol(i448* %P) nounwind ssp {
 entry:
-	br label %bb116
-
-bb:		; preds = %bb116
-	switch i8 undef, label %bb14 [
-		i8 9, label %bb116
-		i8 32, label %bb116
-		i8 10, label %bb116
-		i8 13, label %bb116
-		i8 12, label %bb116
-	]
+	br label %bb14
 
 bb14:		; preds = %bb
-	br i1 undef, label %bb75, label %bb115
-
-bb75:		; preds = %bb14
-	%srcval16 = load i448* null, align 8		; <i448> [#uses=1]
+	%srcval16 = load i448* %P, align 8		; <i448> [#uses=1]
 	%tmp = zext i32 undef to i448		; <i448> [#uses=1]
 	%tmp15 = shl i448 %tmp, 288		; <i448> [#uses=1]
 	%mask = and i448 %srcval16, -2135987035423586845985235064014169866455883682256196619149693890381755748887481053010428711403521		; <i448> [#uses=1]
 	%ins = or i448 %tmp15, %mask		; <i448> [#uses=1]
-	store i448 %ins, i448* null, align 8
-	ret i8 1
-
-bb115:		; preds = %bb14
-	ret i8 1
-
-bb116:		; preds = %bb, %bb, %bb, %bb, %bb, %entry
-	br i1 undef, label %bb, label %bb117
-
-bb117:		; preds = %bb116
-	ret i8 0
+	store i448 %ins, i448* %P, align 8
+	ret void
 }
diff --git a/test/CodeGen/X86/2009-11-16-MachineLICM.ll b/test/CodeGen/X86/2009-11-16-MachineLICM.ll
index 8f274df918d1..2ac688fd80af 100644
--- a/test/CodeGen/X86/2009-11-16-MachineLICM.ll
+++ b/test/CodeGen/X86/2009-11-16-MachineLICM.ll
@@ -15,7 +15,7 @@ bb.nph:                                           ; preds = %entry
   br label %bb
 
 bb:                                               ; preds = %bb, %bb.nph
-; CHECK: LBB1_2:
+; CHECK: LBB0_2:
   %indvar = phi i64 [ 0, %bb.nph ], [ %indvar.next, %bb ] ; <i64> [#uses=2]
   %tmp9 = shl i64 %indvar, 2                      ; <i64> [#uses=4]
   %tmp1016 = or i64 %tmp9, 1                      ; <i64> [#uses=1]
diff --git a/test/CodeGen/X86/2009-11-16-UnfoldMemOpBug.ll b/test/CodeGen/X86/2009-11-16-UnfoldMemOpBug.ll
index 26bf09c723a0..d33f93ea7b27 100644
--- a/test/CodeGen/X86/2009-11-16-UnfoldMemOpBug.ll
+++ b/test/CodeGen/X86/2009-11-16-UnfoldMemOpBug.ll
@@ -13,7 +13,7 @@ entry:
   br label %bb1
 
 bb1:
-; CHECK: LBB1_1:
+; CHECK: LBB0_1:
 ; CHECK: movaps %xmm0, (%rsp)
   %tmp2 = phi i32 [ %tmp3, %bb1 ], [ 0, %entry ]
   call void @llvm.memcpy.i64(i8* %tmp1, i8* getelementptr inbounds ([28 x i8]* @str, i64 0, i64 0), i64 28, i32 1)
diff --git a/test/CodeGen/X86/2010-01-07-UAMemFeature.ll b/test/CodeGen/X86/2010-01-07-UAMemFeature.ll
index 3728f15d969c..bb24adb41817 100644
--- a/test/CodeGen/X86/2010-01-07-UAMemFeature.ll
+++ b/test/CodeGen/X86/2010-01-07-UAMemFeature.ll
@@ -6,6 +6,6 @@ target triple = "x86_64-unknown-linux-gnu"
 
 define <4 x float> @foo(<4 x float>* %P, <4 x float> %In) nounwind {
 	%A = load <4 x float>* %P, align 4
-	%B = add <4 x float> %A, %In
+	%B = fadd <4 x float> %A, %In
 	ret <4 x float> %B
 }
diff --git a/test/CodeGen/X86/2010-01-08-Atomic64Bug.ll b/test/CodeGen/X86/2010-01-08-Atomic64Bug.ll
index 172e1c73d568..c6936362048b 100644
--- a/test/CodeGen/X86/2010-01-08-Atomic64Bug.ll
+++ b/test/CodeGen/X86/2010-01-08-Atomic64Bug.ll
@@ -10,7 +10,7 @@ entry:
 ; CHECK: movl $1
 ; CHECK: movl (%ebp), %eax
 ; CHECK: movl 4(%ebp), %edx
-; CHECK: LBB1_1:
+; CHECK: LBB0_1:
 ; CHECK-NOT: movl $1
 ; CHECK-NOT: movl $0
 ; CHECK: addl
diff --git a/test/CodeGen/X86/2010-04-06-SSEDomainFixCrash.ll b/test/CodeGen/X86/2010-04-06-SSEDomainFixCrash.ll
new file mode 100644
index 000000000000..ef1798d1ae43
--- /dev/null
+++ b/test/CodeGen/X86/2010-04-06-SSEDomainFixCrash.ll
@@ -0,0 +1,64 @@
+; RUN: llc < %s -O3 -relocation-model=pic -disable-fp-elim -mcpu=nocona
+;
+; This test case is reduced from Bullet. It crashes SSEDomainFix.
+;
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128-n8:16:32"
+target triple = "i386-apple-darwin10.0"
+
+declare i32 @_ZN11HullLibrary16CreateConvexHullERK8HullDescR10HullResult(i8*, i8* nocapture, i8* nocapture) ssp align 2
+
+define void @_ZN17btSoftBodyHelpers4DrawEP10btSoftBodyP12btIDebugDrawi(i8* %psb, i8* %idraw, i32 %drawflags) ssp align 2 {
+entry:
+  br i1 undef, label %bb92, label %bb58
+
+bb58:                                             ; preds = %entry
+  %0 = invoke i32 @_ZN11HullLibrary16CreateConvexHullERK8HullDescR10HullResult(i8* undef, i8* undef, i8* undef)
+          to label %invcont64 unwind label %lpad159 ; <i32> [#uses=0]
+
+invcont64:                                        ; preds = %bb58
+  br i1 undef, label %invcont65, label %bb.i.i
+
+bb.i.i:                                           ; preds = %invcont64
+  %1 = load <4 x float>* undef, align 16          ; <<4 x float>> [#uses=5]
+  br i1 undef, label %bb.nph.i.i, label %invcont65
+
+bb.nph.i.i:                                       ; preds = %bb.i.i
+  %tmp22.i.i = bitcast <4 x float> %1 to i128     ; <i128> [#uses=1]
+  %tmp23.i.i = trunc i128 %tmp22.i.i to i32       ; <i32> [#uses=1]
+  %2 = bitcast i32 %tmp23.i.i to float            ; <float> [#uses=1]
+  %tmp6.i = extractelement <4 x float> %1, i32 1  ; <float> [#uses=1]
+  %tmp2.i = extractelement <4 x float> %1, i32 2  ; <float> [#uses=1]
+  br label %bb1.i.i
+
+bb1.i.i:                                          ; preds = %bb1.i.i, %bb.nph.i.i
+  %.tmp6.0.i.i = phi float [ %tmp2.i, %bb.nph.i.i ], [ %5, %bb1.i.i ] ; <float> [#uses=1]
+  %.tmp5.0.i.i = phi float [ %tmp6.i, %bb.nph.i.i ], [ %4, %bb1.i.i ] ; <float> [#uses=1]
+  %.tmp.0.i.i = phi float [ %2, %bb.nph.i.i ], [ %3, %bb1.i.i ] ; <float> [#uses=1]
+  %3 = fadd float %.tmp.0.i.i, undef              ; <float> [#uses=2]
+  %4 = fadd float %.tmp5.0.i.i, undef             ; <float> [#uses=2]
+  %5 = fadd float %.tmp6.0.i.i, undef             ; <float> [#uses=2]
+  br i1 undef, label %bb2.return.loopexit_crit_edge.i.i, label %bb1.i.i
+
+bb2.return.loopexit_crit_edge.i.i:                ; preds = %bb1.i.i
+  %tmp8.i = insertelement <4 x float> %1, float %3, i32 0 ; <<4 x float>> [#uses=1]
+  %tmp4.i = insertelement <4 x float> %tmp8.i, float %4, i32 1 ; <<4 x float>> [#uses=1]
+  %tmp.i = insertelement <4 x float> %tmp4.i, float %5, i32 2 ; <<4 x float>> [#uses=1]
+  br label %invcont65
+
+invcont65:                                        ; preds = %bb2.return.loopexit_crit_edge.i.i, %bb.i.i, %invcont64
+  %.0.i = phi <4 x float> [ %tmp.i, %bb2.return.loopexit_crit_edge.i.i ], [ undef, %invcont64 ], [ %1, %bb.i.i ] ; <<4 x float>> [#uses=1]
+  %tmp15.i = extractelement <4 x float> %.0.i, i32 2 ; <float> [#uses=1]
+  %6 = fmul float %tmp15.i, undef                 ; <float> [#uses=1]
+  br label %bb.i265
+
+bb.i265:                                          ; preds = %bb.i265, %invcont65
+  %7 = fsub float 0.000000e+00, %6                ; <float> [#uses=1]
+  store float %7, float* undef, align 4
+  br label %bb.i265
+
+bb92:                                             ; preds = %entry
+  unreachable
+
+lpad159:                                          ; preds = %bb58
+  unreachable
+}
diff --git a/test/CodeGen/X86/2010-04-07-DbgValueOtherTargets.ll b/test/CodeGen/X86/2010-04-07-DbgValueOtherTargets.ll
new file mode 100644
index 000000000000..76cc1a497d3d
--- /dev/null
+++ b/test/CodeGen/X86/2010-04-07-DbgValueOtherTargets.ll
@@ -0,0 +1,34 @@
+; RUN: llc -O0 -march=x86 -asm-verbose < %s | FileCheck %s
+; RUN: llc -O0 -march=x86-64 -asm-verbose < %s | FileCheck %s
+; Check that DEBUG_VALUE comments come through on a variety of targets.
+
+%tart.reflect.ComplexType = type { double, double }
+
+@.type.SwitchStmtTest = constant %tart.reflect.ComplexType { double 3.0, double 2.0 }
+
+define i32 @"main(tart.core.String[])->int32"(i32 %args) {
+entry:
+; CHECK: DEBUG_VALUE
+  tail call void @llvm.dbg.value(metadata !14, i64 0, metadata !8)
+  tail call void @"tart.reflect.ComplexType.create->tart.core.Object"(%tart.reflect.ComplexType* @.type.SwitchStmtTest) ; <%tart.core.Object*> [#uses=2]
+  ret i32 3
+}
+
+declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
+declare void @"tart.reflect.ComplexType.create->tart.core.Object"(%tart.reflect.ComplexType*) nounwind readnone
+
+!0 = metadata !{i32 458769, i32 0, i32 1, metadata !"sm.c", metadata !"/Volumes/MacOS9/tests/", metadata !"4.2.1 (Based on Apple Inc. build 5658) (LLVM build)", i1 true, i1 false, metadata !"", i32 0} ; [ DW_TAG_compile_unit ]
+!1 = metadata !{i32 458790, metadata !0, metadata !"", metadata !0, i32 0, i64 192, i64 64, i64 0, i32 0, metadata !2} ; [ DW_TAG_const_type ]
+!2 = metadata !{i32 458771, metadata !0, metadata !"C", metadata !0, i32 1, i64 192, i64 64, i64 0, i32 0, null, metadata !3, i32 0, null} ; [ DW_TAG_structure_type ]
+!3 = metadata !{metadata !4, metadata !6, metadata !7}
+!4 = metadata !{i32 458765, metadata !2, metadata !"x", metadata !0, i32 1, i64 64, i64 64, i64 0, i32 0, metadata !5} ; [ DW_TAG_member ]
+!5 = metadata !{i32 458788, metadata !0, metadata !"double", metadata !0, i32 0, i64 64, i64 64, i64 0, i32 0, i32 4} ; [ DW_TAG_base_type ]
+!6 = metadata !{i32 458765, metadata !2, metadata !"y", metadata !0, i32 1, i64 64, i64 64, i64 64, i32 0, metadata !5} ; [ DW_TAG_member ]
+!7 = metadata !{i32 458765, metadata !2, metadata !"z", metadata !0, i32 1, i64 64, i64 64, i64 128, i32 0, metadata !5} ; [ DW_TAG_member ]
+!8 = metadata !{i32 459008, metadata !9, metadata !"t", metadata !0, i32 5, metadata !2} ; [ DW_TAG_auto_variable ]
+!9 = metadata !{i32 458763, metadata !10}        ; [ DW_TAG_lexical_block ]
+!10 = metadata !{i32 458798, i32 0, metadata !0, metadata !"foo", metadata !"foo", metadata !"foo", metadata !0, i32 4, metadata !11, i1 false, i1 true, i32 0, i32 0, null} ; [ DW_TAG_subprogram ]
+!11 = metadata !{i32 458773, metadata !0, metadata !"", metadata !0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !12, i32 0, null} ; [ DW_TAG_subroutine_type ]
+!12 = metadata !{metadata !13}
+!13 = metadata !{i32 458788, metadata !0, metadata !"int", metadata !0, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
+!14 = metadata !{%tart.reflect.ComplexType* @.type.SwitchStmtTest}
diff --git a/test/CodeGen/X86/2010-04-08-CoalescerBug.ll b/test/CodeGen/X86/2010-04-08-CoalescerBug.ll
new file mode 100644
index 000000000000..1c7c28c68e9f
--- /dev/null
+++ b/test/CodeGen/X86/2010-04-08-CoalescerBug.ll
@@ -0,0 +1,26 @@
+; RUN: llc < %s -mtriple=x86_64-apple-darwin | FileCheck %s
+; rdar://7842028
+
+; Do not delete partially dead copy instructions.
+; %RDI<def,dead> = MOV64rr %RAX<kill>, %EDI<imp-def>
+; REP_MOVSD %ECX<imp-def,dead>, %EDI<imp-def,dead>, %ESI<imp-def,dead>, %ECX<imp-use,kill>, %EDI<imp-use,kill>, %ESI<imp-use,kill>
+
+
+%struct.F = type { %struct.FC*, i32, i32, i8, i32, i32, i32 }
+%struct.FC = type { [10 x i8], [32 x i32], %struct.FC*, i32 }
+
+define void @t(%struct.F* %this) nounwind {
+entry:
+; CHECK: t:
+; CHECK: addq $12, %rsi
+  %BitValueArray = alloca [32 x i32], align 4
+  %tmp2 = getelementptr inbounds %struct.F* %this, i64 0, i32 0
+  %tmp3 = load %struct.FC** %tmp2, align 8
+  %tmp4 = getelementptr inbounds %struct.FC* %tmp3, i64 0, i32 1, i64 0
+  %tmp5 = bitcast [32 x i32]* %BitValueArray to i8*
+  %tmp6 = bitcast i32* %tmp4 to i8*
+  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %tmp5, i8* %tmp6, i64 128, i32 4, i1 false)
+  unreachable
+}
+
+declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i32, i1) nounwind
diff --git a/test/CodeGen/X86/2010-04-13-AnalyzeBranchCrash.ll b/test/CodeGen/X86/2010-04-13-AnalyzeBranchCrash.ll
new file mode 100644
index 000000000000..fadbd2191989
--- /dev/null
+++ b/test/CodeGen/X86/2010-04-13-AnalyzeBranchCrash.ll
@@ -0,0 +1,42 @@
+; RUN: llc < %s -mtriple=i386-apple-darwin -mcpu=core2
+; rdar://7857830
+
+%0 = type opaque
+%1 = type opaque
+
+define void @t(%0* %self, i8* nocapture %_cmd, %1* %scroller, i32 %hitPart, float %multiplier) nounwind optsize ssp {
+entry:
+  switch i32 %hitPart, label %if.else [
+    i32 7, label %if.then
+    i32 8, label %if.then
+  ]
+
+if.then:                                          ; preds = %entry, %entry
+  %tmp69 = load float* null, align 4              ; <float> [#uses=1]
+  %cmp19 = icmp eq %1* null, %scroller            ; <i1> [#uses=2]
+  %cond = select i1 %cmp19, float %tmp69, float 0.000000e+00 ; <float> [#uses=1]
+  %call36 = call i64 bitcast (i8* (i8*, i8*, ...)* @objc_msgSend to i64 (i8*, i8*)*)(i8* undef, i8* undef) nounwind optsize ; <i64> [#uses=2]
+  br i1 %cmp19, label %cond.true32, label %cond.false39
+
+cond.true32:                                      ; preds = %if.then
+  %sroa.store.elt68 = lshr i64 %call36, 32        ; <i64> [#uses=1]
+  %0 = trunc i64 %sroa.store.elt68 to i32         ; <i32> [#uses=1]
+  br label %cond.end47
+
+cond.false39:                                     ; preds = %if.then
+  %1 = trunc i64 %call36 to i32                   ; <i32> [#uses=1]
+  br label %cond.end47
+
+cond.end47:                                       ; preds = %cond.false39, %cond.true32
+  %cond48.in = phi i32 [ %0, %cond.true32 ], [ %1, %cond.false39 ] ; <i32> [#uses=1]
+  %cond48 = bitcast i32 %cond48.in to float       ; <float> [#uses=1]
+  %div = fdiv float %cond, undef                  ; <float> [#uses=1]
+  %div58 = fdiv float %div, %cond48               ; <float> [#uses=1]
+  call void bitcast (i8* (i8*, i8*, ...)* @objc_msgSend to void (i8*, i8*, float)*)(i8* undef, i8* undef, float %div58) nounwind optsize
+  ret void
+
+if.else:                                          ; preds = %entry
+  ret void
+}
+
+declare i8* @objc_msgSend(i8*, i8*, ...)
diff --git a/test/CodeGen/X86/2010-04-21-CoalescerBug.ll b/test/CodeGen/X86/2010-04-21-CoalescerBug.ll
new file mode 100644
index 000000000000..d5987645cfca
--- /dev/null
+++ b/test/CodeGen/X86/2010-04-21-CoalescerBug.ll
@@ -0,0 +1,15 @@
+; RUN: llc < %s -mtriple=x86_64-apple-darwin
+; rdar://7886733
+
+%struct.CMTime = type <{ i64, i32, i32, i64 }>
+%struct.CMTimeMapping = type { %struct.CMTimeRange, %struct.CMTimeRange }
+%struct.CMTimeRange = type { %struct.CMTime, %struct.CMTime }
+
+define void @t(%struct.CMTimeMapping* noalias nocapture sret %agg.result) nounwind optsize ssp {
+entry:
+  %agg.result1 = bitcast %struct.CMTimeMapping* %agg.result to i8* ; <i8*> [#uses=1]
+  tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %agg.result1, i8* null, i64 96, i32 4, i1 false)
+  ret void
+}
+
+declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i32, i1) nounwind
diff --git a/test/CodeGen/X86/2010-04-23-mmx-movdq2q.ll b/test/CodeGen/X86/2010-04-23-mmx-movdq2q.ll
new file mode 100644
index 000000000000..4cd3be35e820
--- /dev/null
+++ b/test/CodeGen/X86/2010-04-23-mmx-movdq2q.ll
@@ -0,0 +1,45 @@
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+mmx,+sse2 | FileCheck %s
+
+define void @ti8(double %a, double %b) nounwind {
+entry:
+        %tmp1 = bitcast double %a to <8 x i8>
+; CHECK: movdq2q
+        %tmp2 = bitcast double %b to <8 x i8>
+; CHECK: movdq2q
+        %tmp3 = add <8 x i8> %tmp1, %tmp2
+        store <8 x i8> %tmp3, <8 x i8>* null
+        ret void
+}
+
+define void @ti16(double %a, double %b) nounwind {
+entry:
+        %tmp1 = bitcast double %a to <4 x i16>
+; CHECK: movdq2q
+        %tmp2 = bitcast double %b to <4 x i16>
+; CHECK: movdq2q
+        %tmp3 = add <4 x i16> %tmp1, %tmp2
+        store <4 x i16> %tmp3, <4 x i16>* null
+        ret void
+}
+
+define void @ti32(double %a, double %b) nounwind {
+entry:
+        %tmp1 = bitcast double %a to <2 x i32>
+; CHECK: movdq2q
+        %tmp2 = bitcast double %b to <2 x i32>
+; CHECK: movdq2q
+        %tmp3 = add <2 x i32> %tmp1, %tmp2
+        store <2 x i32> %tmp3, <2 x i32>* null
+        ret void
+}
+
+define void @ti64(double %a, double %b) nounwind {
+entry:
+        %tmp1 = bitcast double %a to <1 x i64>
+; CHECK: movdq2q
+        %tmp2 = bitcast double %b to <1 x i64>
+; CHECK: movdq2q
+        %tmp3 = add <1 x i64> %tmp1, %tmp2
+        store <1 x i64> %tmp3, <1 x i64>* null
+        ret void
+}
diff --git a/test/CodeGen/X86/2010-04-29-CoalescerCrash.ll b/test/CodeGen/X86/2010-04-29-CoalescerCrash.ll
new file mode 100644
index 000000000000..a22f38ae3baa
--- /dev/null
+++ b/test/CodeGen/X86/2010-04-29-CoalescerCrash.ll
@@ -0,0 +1,142 @@
+; RUN: llc < %s -relocation-model=pic -disable-fp-elim -verify-machineinstrs
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
+target triple = "x86_64-unknown-linux-gnu"
+
+define void @_ZN12_GLOBAL__N_113SPUAsmPrinter15EmitInstructionEPKN4llvm12MachineInstrE(i8* %this, i8* %MI) nounwind inlinehint align 2 {
+entry:
+  br i1 undef, label %"3.i", label %"4.i"
+
+"3.i":                                            ; preds = %entry
+  unreachable
+
+"4.i":                                            ; preds = %entry
+  switch i32 undef, label %_ZN12_GLOBAL__N_113SPUAsmPrinter16printInstructionEPKN4llvm12MachineInstrERNS1_11raw_ostreamE.exit [
+    i32 1, label %"5.i"
+    i32 2, label %"6.i"
+    i32 3, label %"7.i"
+    i32 4, label %"8.i"
+    i32 5, label %"9.i"
+  ]
+
+"5.i":                                            ; preds = %"4.i"
+  unreachable
+
+"6.i":                                            ; preds = %"4.i"
+  switch i32 undef, label %"11.i" [
+    i32 1, label %"12.i"
+    i32 2, label %"13.i"
+    i32 3, label %_ZN12_GLOBAL__N_113SPUAsmPrinter16printInstructionEPKN4llvm12MachineInstrERNS1_11raw_ostreamE.exit
+    i32 4, label %"14.i"
+  ]
+
+"7.i":                                            ; preds = %"4.i"
+  unreachable
+
+"8.i":                                            ; preds = %"4.i"
+  unreachable
+
+"9.i":                                            ; preds = %"4.i"
+  unreachable
+
+"11.i":                                           ; preds = %"6.i"
+  switch i32 undef, label %"15.i" [
+    i32 1, label %"16.i"
+    i32 2, label %"17.i"
+    i32 3, label %"18.i"
+    i32 4, label %"19.i"
+    i32 5, label %"20.i"
+    i32 6, label %"21.i"
+    i32 7, label %"24.i"
+    i32 8, label %"27.i"
+    i32 9, label %"28.i"
+    i32 10, label %"29.i"
+    i32 11, label %"30.i"
+    i32 12, label %"31.i"
+    i32 13, label %"32.i"
+    i32 14, label %"39.i"
+  ]
+
+"12.i":                                           ; preds = %"6.i"
+  unreachable
+
+"13.i":                                           ; preds = %"6.i"
+  unreachable
+
+"14.i":                                           ; preds = %"6.i"
+  unreachable
+
+"15.i":                                           ; preds = %"11.i"
+  unreachable
+
+"16.i":                                           ; preds = %"11.i"
+  unreachable
+
+"17.i":                                           ; preds = %"11.i"
+  unreachable
+
+"18.i":                                           ; preds = %"11.i"
+  unreachable
+
+"19.i":                                           ; preds = %"11.i"
+  unreachable
+
+"20.i":                                           ; preds = %"11.i"
+  unreachable
+
+"21.i":                                           ; preds = %"11.i"
+  br i1 undef, label %"22.i", label %"23.i"
+
+"22.i":                                           ; preds = %"21.i"
+  unreachable
+
+"23.i":                                           ; preds = %"21.i"
+  unreachable
+
+"24.i":                                           ; preds = %"11.i"
+  unreachable
+
+"27.i":                                           ; preds = %"11.i"
+  unreachable
+
+"28.i":                                           ; preds = %"11.i"
+  unreachable
+
+"29.i":                                           ; preds = %"11.i"
+  unreachable
+
+"30.i":                                           ; preds = %"11.i"
+  unreachable
+
+"31.i":                                           ; preds = %"11.i"
+  unreachable
+
+"32.i":                                           ; preds = %"11.i"
+  unreachable
+
+"39.i":                                           ; preds = %"11.i"
+  br i1 undef, label %"41.i", label %"40.i"
+
+"40.i":                                           ; preds = %"39.i"
+  unreachable
+
+"41.i":                                           ; preds = %"39.i"
+  %0 = call i64 @_ZNK4llvm14MachineOperand6getImmEv(i8 undef) nounwind inlinehint ; <i64> [#uses=1]
+  %1 = trunc i64 %0 to i16                        ; <i16> [#uses=1]
+  br i1 undef, label %"42.i", label %"43.i"
+
+"42.i":                                           ; preds = %"41.i"
+  unreachable
+
+"43.i":                                           ; preds = %"41.i"
+  %2 = and i16 %1, -16                            ; <i16> [#uses=1]
+  %3 = sext i16 %2 to i64                         ; <i64> [#uses=1]
+  %4 = call i8 @_ZN4llvm11raw_ostreamlsEl(i8 undef, i64 %3) nounwind ; <i8> [#uses=0]
+  unreachable
+
+_ZN12_GLOBAL__N_113SPUAsmPrinter16printInstructionEPKN4llvm12MachineInstrERNS1_11raw_ostreamE.exit: ; preds = %"6.i", %"4.i"
+  ret void
+}
+
+declare i64 @_ZNK4llvm14MachineOperand6getImmEv(i8) nounwind inlinehint align 2
+
+declare i8 @_ZN4llvm11raw_ostreamlsEl(i8, i64)
diff --git a/test/CodeGen/X86/2010-04-30-LocalAlloc-LandingPad.ll b/test/CodeGen/X86/2010-04-30-LocalAlloc-LandingPad.ll
new file mode 100644
index 000000000000..f5048afd7cc2
--- /dev/null
+++ b/test/CodeGen/X86/2010-04-30-LocalAlloc-LandingPad.ll
@@ -0,0 +1,143 @@
+; RUN: llc < %s -O0 -regalloc=local -relocation-model=pic -disable-fp-elim | FileCheck %s
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128-n8:16:32"
+target triple = "i386-apple-darwin10.0.0"
+
+%struct.S = type { [2 x i8*] }
+
+@_ZTIi = external constant i8*                    ; <i8**> [#uses=1]
+@.str = internal constant [4 x i8] c"%p\0A\00"    ; <[4 x i8]*> [#uses=1]
+@llvm.used = appending global [1 x i8*] [i8* bitcast (i8* (%struct.S*, i32, %struct.S*)* @_Z4test1SiS_ to i8*)], section "llvm.metadata" ; <[1 x i8*]*> [#uses=0]
+
+; Verify that %esi gets spilled before the call.
+; CHECK: Z4test1SiS
+; CHECK: movl %esi,{{.*}}(%ebp) 
+; CHECK: call __Z6throwsv
+
+define i8* @_Z4test1SiS_(%struct.S* byval %s1, i32 %n, %struct.S* byval %s2) ssp {
+entry:
+  %retval = alloca i8*, align 4                   ; <i8**> [#uses=2]
+  %n.addr = alloca i32, align 4                   ; <i32*> [#uses=1]
+  %_rethrow = alloca i8*                          ; <i8**> [#uses=4]
+  %0 = alloca i32, align 4                        ; <i32*> [#uses=1]
+  %cleanup.dst = alloca i32                       ; <i32*> [#uses=3]
+  %cleanup.dst7 = alloca i32                      ; <i32*> [#uses=6]
+  store i32 %n, i32* %n.addr
+  invoke void @_Z6throwsv()
+          to label %invoke.cont unwind label %try.handler
+
+invoke.cont:                                      ; preds = %entry
+  store i32 1, i32* %cleanup.dst7
+  br label %finally
+
+terminate.handler:                                ; preds = %match.end
+  %exc = call i8* @llvm.eh.exception()            ; <i8*> [#uses=1]
+  %1 = call i32 (i8*, i8*, ...)* @llvm.eh.selector(i8* %exc, i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*), i32 1) ; <i32> [#uses=0]
+  call void @_ZSt9terminatev() noreturn nounwind
+  unreachable
+
+try.handler:                                      ; preds = %entry
+  %exc1 = call i8* @llvm.eh.exception()           ; <i8*> [#uses=3]
+  %selector = call i32 (i8*, i8*, ...)* @llvm.eh.selector(i8* %exc1, i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*), i8* bitcast (i8** @_ZTIi to i8*), i8* null) ; <i32> [#uses=1]
+  %2 = call i32 @llvm.eh.typeid.for(i8* bitcast (i8** @_ZTIi to i8*)) ; <i32> [#uses=1]
+  %3 = icmp eq i32 %selector, %2                  ; <i1> [#uses=1]
+  br i1 %3, label %match, label %catch.next
+
+match:                                            ; preds = %try.handler
+  %4 = call i8* @__cxa_begin_catch(i8* %exc1)     ; <i8*> [#uses=1]
+  %5 = bitcast i8* %4 to i32*                     ; <i32*> [#uses=1]
+  %6 = load i32* %5                               ; <i32> [#uses=1]
+  store i32 %6, i32* %0
+  %call = invoke i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([4 x i8]* @.str, i32 0, i32 0), %struct.S* %s2)
+          to label %invoke.cont2 unwind label %match.handler ; <i32> [#uses=0]
+
+invoke.cont2:                                     ; preds = %match
+  store i32 1, i32* %cleanup.dst
+  br label %match.end
+
+match.handler:                                    ; preds = %match
+  %exc3 = call i8* @llvm.eh.exception()           ; <i8*> [#uses=2]
+  %7 = call i32 (i8*, i8*, ...)* @llvm.eh.selector(i8* %exc3, i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*), i32 0) ; <i32> [#uses=0]
+  store i8* %exc3, i8** %_rethrow
+  store i32 2, i32* %cleanup.dst
+  br label %match.end
+
+cleanup.pad:                                      ; preds = %cleanup.switch
+  store i32 1, i32* %cleanup.dst7
+  br label %finally
+
+cleanup.pad4:                                     ; preds = %cleanup.switch
+  store i32 2, i32* %cleanup.dst7
+  br label %finally
+
+match.end:                                        ; preds = %match.handler, %invoke.cont2
+  invoke void @__cxa_end_catch()
+          to label %invoke.cont5 unwind label %terminate.handler
+
+invoke.cont5:                                     ; preds = %match.end
+  br label %cleanup.switch
+
+cleanup.switch:                                   ; preds = %invoke.cont5
+  %tmp = load i32* %cleanup.dst                   ; <i32> [#uses=1]
+  switch i32 %tmp, label %cleanup.end [
+    i32 1, label %cleanup.pad
+    i32 2, label %cleanup.pad4
+  ]
+
+cleanup.end:                                      ; preds = %cleanup.switch
+  %exc6 = call i8* @llvm.eh.exception()           ; <i8*> [#uses=1]
+  store i8* %exc6, i8** %_rethrow
+  store i32 2, i32* %cleanup.dst7
+  br label %finally
+
+catch.next:                                       ; preds = %try.handler
+  store i8* %exc1, i8** %_rethrow
+  store i32 2, i32* %cleanup.dst7
+  br label %finally
+
+finally:                                          ; preds = %catch.next, %cleanup.end, %cleanup.pad4, %cleanup.pad, %invoke.cont
+  br label %cleanup.switch9
+
+cleanup.switch9:                                  ; preds = %finally
+  %tmp8 = load i32* %cleanup.dst7                 ; <i32> [#uses=1]
+  switch i32 %tmp8, label %cleanup.end10 [
+    i32 1, label %finally.end
+    i32 2, label %finally.throw
+  ]
+
+cleanup.end10:                                    ; preds = %cleanup.switch9
+  br label %finally.end
+
+finally.throw:                                    ; preds = %cleanup.switch9
+  %8 = load i8** %_rethrow                        ; <i8*> [#uses=1]
+  call void @_Unwind_Resume_or_Rethrow(i8* %8)
+  unreachable
+
+finally.end:                                      ; preds = %cleanup.end10, %cleanup.switch9
+  %tmp11 = getelementptr inbounds %struct.S* %s1, i32 0, i32 0 ; <[2 x i8*]*> [#uses=1]
+  %arraydecay = getelementptr inbounds [2 x i8*]* %tmp11, i32 0, i32 0 ; <i8**> [#uses=1]
+  %arrayidx = getelementptr inbounds i8** %arraydecay, i32 1 ; <i8**> [#uses=1]
+  %tmp12 = load i8** %arrayidx                    ; <i8*> [#uses=1]
+  store i8* %tmp12, i8** %retval
+  %9 = load i8** %retval                          ; <i8*> [#uses=1]
+  ret i8* %9
+}
+
+declare void @_Z6throwsv() ssp
+
+declare i32 @__gxx_personality_v0(...)
+
+declare i8* @llvm.eh.exception() nounwind readonly
+
+declare i32 @llvm.eh.selector(i8*, i8*, ...) nounwind
+
+declare void @_ZSt9terminatev()
+
+declare void @_Unwind_Resume_or_Rethrow(i8*)
+
+declare i32 @llvm.eh.typeid.for(i8*) nounwind
+
+declare i8* @__cxa_begin_catch(i8*)
+
+declare i32 @printf(i8*, ...)
+
+declare void @__cxa_end_catch()
diff --git a/test/CodeGen/X86/2010-05-03-CoalescerSubRegClobber.ll b/test/CodeGen/X86/2010-05-03-CoalescerSubRegClobber.ll
new file mode 100644
index 000000000000..323925c7ff68
--- /dev/null
+++ b/test/CodeGen/X86/2010-05-03-CoalescerSubRegClobber.ll
@@ -0,0 +1,33 @@
+; RUN: llc < %s | FileCheck %s
+; PR6941
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
+target triple = "x86_64-apple-darwin10.0.0"
+
+define i32 @snd_xbytes(i32 %v, i32 %from, i32 %to) nounwind readnone ssp {
+entry:
+  %cmp19 = icmp eq i32 %to, 0                     ; <i1> [#uses=1]
+  br i1 %cmp19, label %while.end, label %while.cond
+
+while.cond:                                       ; preds = %entry, %while.cond
+  %y.021 = phi i32 [ %rem, %while.cond ], [ %to, %entry ] ; <i32> [#uses=3]
+  %x.020 = phi i32 [ %y.021, %while.cond ], [ %from, %entry ] ; <i32> [#uses=1]
+  %rem = urem i32 %x.020, %y.021                  ; <i32> [#uses=2]
+  %cmp = icmp eq i32 %rem, 0                      ; <i1> [#uses=1]
+  br i1 %cmp, label %while.end, label %while.cond
+
+while.end:                                        ; preds = %while.cond, %entry
+  %x.0.lcssa = phi i32 [ %from, %entry ], [ %y.021, %while.cond ] ; <i32> [#uses=2]
+  %div = udiv i32 %from, %x.0.lcssa               ; <i32> [#uses=1]
+  %div11 = udiv i32 %to, %x.0.lcssa               ; <i32> [#uses=1]
+  %conv = zext i32 %v to i64                      ; <i64> [#uses=1]
+  %conv14 = zext i32 %div11 to i64                ; <i64> [#uses=1]
+; Verify that we don't clobber %eax after putting the imulq result in %rax
+; CHECK: imulq	%r{{.}}x, %r[[RES:.]]x
+; CHECK-NOT: movl	{{.*}}, %e[[RES]]x
+; CHECK: div
+  %mul = mul i64 %conv14, %conv                   ; <i64> [#uses=1]
+  %conv16 = zext i32 %div to i64                  ; <i64> [#uses=1]
+  %div17 = udiv i64 %mul, %conv16                 ; <i64> [#uses=1]
+  %conv18 = trunc i64 %div17 to i32               ; <i32> [#uses=1]
+  ret i32 %conv18
+}
diff --git a/test/CodeGen/X86/MachineSink-CritEdge.ll b/test/CodeGen/X86/MachineSink-CritEdge.ll
new file mode 100644
index 000000000000..74a1049772a0
--- /dev/null
+++ b/test/CodeGen/X86/MachineSink-CritEdge.ll
@@ -0,0 +1,58 @@
+; RUN: llc < %s | FileCheck %s
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
+target triple = "x86_64-apple-darwin10.0.0"
+
+define i32 @f(i32 %x) nounwind ssp {
+entry:
+  %shl.i = shl i32 %x, 12
+  %neg.i = xor i32 %shl.i, -1
+  %add.i = add nsw i32 %neg.i, %x
+  %shr.i = ashr i32 %add.i, 22
+  %xor.i = xor i32 %shr.i, %add.i
+  %shl5.i = shl i32 %xor.i, 13
+  %neg6.i = xor i32 %shl5.i, -1
+  %add8.i = add nsw i32 %xor.i, %neg6.i
+  %shr10.i = ashr i32 %add8.i, 8
+  %xor12.i = xor i32 %shr10.i, %add8.i
+  %add16.i = mul i32 %xor12.i, 9
+  %shr18.i = ashr i32 %add16.i, 15
+  %xor20.i = xor i32 %shr18.i, %add16.i
+  %shl22.i = shl i32 %xor20.i, 27
+  %neg23.i = xor i32 %shl22.i, -1
+  %add25.i = add nsw i32 %xor20.i, %neg23.i
+  %shr27.i = ashr i32 %add25.i, 31
+  %rem = srem i32 %x, 7
+  %cmp = icmp eq i32 %rem, 3
+  br i1 %cmp, label %land.lhs.true, label %do.body.preheader
+
+land.lhs.true:
+  %call3 = tail call i32 @g(i32 %x) nounwind
+  %cmp4 = icmp eq i32 %call3, 10
+  br i1 %cmp4, label %do.body.preheader, label %if.then
+
+; %shl.i should be sinked all the way down to do.body.preheader, but not into the loop.
+; CHECK: do.body.preheader
+; CHECK-NOT: do.body
+; CHECK: shll	$12
+
+do.body.preheader:
+  %xor29.i = xor i32 %shr27.i, %add25.i
+  br label %do.body
+
+if.then:
+  %add = add nsw i32 %x, 11
+  ret i32 %add
+
+do.body:
+  %x.addr.1 = phi i32 [ %add9, %do.body ], [ %x, %do.body.preheader ]
+  %xor = xor i32 %xor29.i, %x.addr.1
+  %add9 = add nsw i32 %xor, %x.addr.1
+  %and = and i32 %add9, 13
+  %tobool = icmp eq i32 %and, 0
+  br i1 %tobool, label %if.end, label %do.body
+
+if.end:
+  ret i32 %add9
+}
+
+declare i32 @g(i32)
diff --git a/test/CodeGen/X86/abi-isel.ll b/test/CodeGen/X86/abi-isel.ll
index 920873813e45..23042b6eff3e 100644
--- a/test/CodeGen/X86/abi-isel.ll
+++ b/test/CodeGen/X86/abi-isel.ll
@@ -72,12 +72,12 @@ entry:
 ; DARWIN-32-DYNAMIC-NEXT: 	ret
 
 ; DARWIN-32-PIC: _foo00:
-; DARWIN-32-PIC: 	call	L1$pb
-; DARWIN-32-PIC-NEXT: L1$pb:
+; DARWIN-32-PIC: 	call	L0$pb
+; DARWIN-32-PIC-NEXT: L0$pb:
 ; DARWIN-32-PIC-NEXT: 	popl	%eax
-; DARWIN-32-PIC-NEXT: 	movl	L_src$non_lazy_ptr-L1$pb(%eax), %ecx
+; DARWIN-32-PIC-NEXT: 	movl	L_src$non_lazy_ptr-L0$pb(%eax), %ecx
 ; DARWIN-32-PIC-NEXT: 	movl	(%ecx), %ecx
-; DARWIN-32-PIC-NEXT: 	movl	L_dst$non_lazy_ptr-L1$pb(%eax), %eax
+; DARWIN-32-PIC-NEXT: 	movl	L_dst$non_lazy_ptr-L0$pb(%eax), %eax
 ; DARWIN-32-PIC-NEXT: 	movl	%ecx, (%eax)
 ; DARWIN-32-PIC-NEXT: 	ret
 
@@ -144,12 +144,12 @@ entry:
 ; DARWIN-32-DYNAMIC-NEXT: 	ret
 
 ; DARWIN-32-PIC: _fxo00:
-; DARWIN-32-PIC: 	call	L2$pb
-; DARWIN-32-PIC-NEXT: L2$pb:
+; DARWIN-32-PIC: 	call	L1$pb
+; DARWIN-32-PIC-NEXT: L1$pb:
 ; DARWIN-32-PIC-NEXT: 	popl	%eax
-; DARWIN-32-PIC-NEXT: 	movl	L_xsrc$non_lazy_ptr-L2$pb(%eax), %ecx
+; DARWIN-32-PIC-NEXT: 	movl	L_xsrc$non_lazy_ptr-L1$pb(%eax), %ecx
 ; DARWIN-32-PIC-NEXT: 	movl	(%ecx), %ecx
-; DARWIN-32-PIC-NEXT: 	movl	L_xdst$non_lazy_ptr-L2$pb(%eax), %eax
+; DARWIN-32-PIC-NEXT: 	movl	L_xdst$non_lazy_ptr-L1$pb(%eax), %eax
 ; DARWIN-32-PIC-NEXT: 	movl	%ecx, (%eax)
 ; DARWIN-32-PIC-NEXT: 	ret
 
@@ -208,11 +208,11 @@ entry:
 ; DARWIN-32-DYNAMIC-NEXT: 	ret
 
 ; DARWIN-32-PIC: _foo01:
-; DARWIN-32-PIC: 	call	L3$pb
-; DARWIN-32-PIC-NEXT: L3$pb:
+; DARWIN-32-PIC: 	call	L2$pb
+; DARWIN-32-PIC-NEXT: L2$pb:
 ; DARWIN-32-PIC-NEXT: 	popl	%eax
-; DARWIN-32-PIC-NEXT: 	movl	L_dst$non_lazy_ptr-L3$pb(%eax), %ecx
-; DARWIN-32-PIC-NEXT: 	movl	L_ptr$non_lazy_ptr-L3$pb(%eax), %eax
+; DARWIN-32-PIC-NEXT: 	movl	L_dst$non_lazy_ptr-L2$pb(%eax), %ecx
+; DARWIN-32-PIC-NEXT: 	movl	L_ptr$non_lazy_ptr-L2$pb(%eax), %eax
 ; DARWIN-32-PIC-NEXT: 	movl	%ecx, (%eax)
 ; DARWIN-32-PIC-NEXT: 	ret
 
@@ -268,11 +268,11 @@ entry:
 ; DARWIN-32-DYNAMIC-NEXT: 	ret
 
 ; DARWIN-32-PIC: _fxo01:
-; DARWIN-32-PIC: 	call	L4$pb
-; DARWIN-32-PIC-NEXT: L4$pb:
+; DARWIN-32-PIC: 	call	L3$pb
+; DARWIN-32-PIC-NEXT: L3$pb:
 ; DARWIN-32-PIC-NEXT: 	popl	%eax
-; DARWIN-32-PIC-NEXT: 	movl	L_xdst$non_lazy_ptr-L4$pb(%eax), %ecx
-; DARWIN-32-PIC-NEXT: 	movl	L_ptr$non_lazy_ptr-L4$pb(%eax), %eax
+; DARWIN-32-PIC-NEXT: 	movl	L_xdst$non_lazy_ptr-L3$pb(%eax), %ecx
+; DARWIN-32-PIC-NEXT: 	movl	L_ptr$non_lazy_ptr-L3$pb(%eax), %eax
 ; DARWIN-32-PIC-NEXT: 	movl	%ecx, (%eax)
 ; DARWIN-32-PIC-NEXT: 	ret
 
@@ -342,12 +342,12 @@ entry:
 ; DARWIN-32-DYNAMIC-NEXT: 	ret
 
 ; DARWIN-32-PIC: _foo02:
-; DARWIN-32-PIC: 	call	L5$pb
-; DARWIN-32-PIC-NEXT: L5$pb:
+; DARWIN-32-PIC: 	call	L4$pb
+; DARWIN-32-PIC-NEXT: L4$pb:
 ; DARWIN-32-PIC-NEXT: 	popl	%eax
-; DARWIN-32-PIC-NEXT: 	movl	L_src$non_lazy_ptr-L5$pb(%eax), %ecx
+; DARWIN-32-PIC-NEXT: 	movl	L_src$non_lazy_ptr-L4$pb(%eax), %ecx
 ; DARWIN-32-PIC-NEXT: 	movl	(%ecx), %ecx
-; DARWIN-32-PIC-NEXT: 	movl	L_ptr$non_lazy_ptr-L5$pb(%eax), %eax
+; DARWIN-32-PIC-NEXT: 	movl	L_ptr$non_lazy_ptr-L4$pb(%eax), %eax
 ; DARWIN-32-PIC-NEXT: 	movl	(%eax), %eax
 ; DARWIN-32-PIC-NEXT: 	movl	%ecx, (%eax)
 ; DARWIN-32-PIC-NEXT: 	ret
@@ -424,12 +424,12 @@ entry:
 ; DARWIN-32-DYNAMIC-NEXT: 	ret
 
 ; DARWIN-32-PIC: _fxo02:
-; DARWIN-32-PIC: 	call	L6$pb
-; DARWIN-32-PIC-NEXT: L6$pb:
+; DARWIN-32-PIC: 	call	L5$pb
+; DARWIN-32-PIC-NEXT: L5$pb:
 ; DARWIN-32-PIC-NEXT: 	popl	%eax
-; DARWIN-32-PIC-NEXT: 	movl	L_xsrc$non_lazy_ptr-L6$pb(%eax), %ecx
+; DARWIN-32-PIC-NEXT: 	movl	L_xsrc$non_lazy_ptr-L5$pb(%eax), %ecx
 ; DARWIN-32-PIC-NEXT: 	movl	(%ecx), %ecx
-; DARWIN-32-PIC-NEXT: 	movl	L_ptr$non_lazy_ptr-L6$pb(%eax), %eax
+; DARWIN-32-PIC-NEXT: 	movl	L_ptr$non_lazy_ptr-L5$pb(%eax), %eax
 ; DARWIN-32-PIC-NEXT: 	movl	(%eax), %eax
 ; DARWIN-32-PIC-NEXT: 	movl	%ecx, (%eax)
 ; DARWIN-32-PIC-NEXT: 	ret
@@ -497,11 +497,11 @@ entry:
 ; DARWIN-32-DYNAMIC-NEXT: 	ret
 
 ; DARWIN-32-PIC: _foo03:
-; DARWIN-32-PIC: 	call	L7$pb
-; DARWIN-32-PIC-NEXT: L7$pb:
+; DARWIN-32-PIC: 	call	L6$pb
+; DARWIN-32-PIC-NEXT: L6$pb:
 ; DARWIN-32-PIC-NEXT: 	popl	%eax
-; DARWIN-32-PIC-NEXT: 	movl	_dsrc-L7$pb(%eax), %ecx
-; DARWIN-32-PIC-NEXT: 	movl	%ecx, _ddst-L7$pb(%eax)
+; DARWIN-32-PIC-NEXT: 	movl	_dsrc-L6$pb(%eax), %ecx
+; DARWIN-32-PIC-NEXT: 	movl	%ecx, _ddst-L6$pb(%eax)
 ; DARWIN-32-PIC-NEXT: 	ret
 
 ; DARWIN-64-STATIC: _foo03:
@@ -551,11 +551,11 @@ entry:
 ; DARWIN-32-DYNAMIC-NEXT: 	ret
 
 ; DARWIN-32-PIC: _foo04:
-; DARWIN-32-PIC: 	call	L8$pb
-; DARWIN-32-PIC-NEXT: L8$pb:
+; DARWIN-32-PIC: 	call	L7$pb
+; DARWIN-32-PIC-NEXT: L7$pb:
 ; DARWIN-32-PIC-NEXT: 	popl	%eax
-; DARWIN-32-PIC-NEXT: 	leal	_ddst-L8$pb(%eax), %ecx
-; DARWIN-32-PIC-NEXT: 	movl	%ecx, _dptr-L8$pb(%eax)
+; DARWIN-32-PIC-NEXT: 	leal	_ddst-L7$pb(%eax), %ecx
+; DARWIN-32-PIC-NEXT: 	movl	%ecx, _dptr-L7$pb(%eax)
 ; DARWIN-32-PIC-NEXT: 	ret
 
 ; DARWIN-64-STATIC: _foo04:
@@ -619,11 +619,11 @@ entry:
 ; DARWIN-32-DYNAMIC-NEXT: 	ret
 
 ; DARWIN-32-PIC: _foo05:
-; DARWIN-32-PIC: 	call	L9$pb
-; DARWIN-32-PIC-NEXT: L9$pb:
+; DARWIN-32-PIC: 	call	L8$pb
+; DARWIN-32-PIC-NEXT: L8$pb:
 ; DARWIN-32-PIC-NEXT: 	popl	%eax
-; DARWIN-32-PIC-NEXT: 	movl	_dsrc-L9$pb(%eax), %ecx
-; DARWIN-32-PIC-NEXT: 	movl	_dptr-L9$pb(%eax), %eax
+; DARWIN-32-PIC-NEXT: 	movl	_dsrc-L8$pb(%eax), %ecx
+; DARWIN-32-PIC-NEXT: 	movl	_dptr-L8$pb(%eax), %eax
 ; DARWIN-32-PIC-NEXT: 	movl	%ecx, (%eax)
 ; DARWIN-32-PIC-NEXT: 	ret
 
@@ -682,11 +682,11 @@ entry:
 ; DARWIN-32-DYNAMIC-NEXT: 	ret
 
 ; DARWIN-32-PIC: _foo06:
-; DARWIN-32-PIC: 	call	L10$pb
-; DARWIN-32-PIC-NEXT: L10$pb:
+; DARWIN-32-PIC: 	call	L9$pb
+; DARWIN-32-PIC-NEXT: L9$pb:
 ; DARWIN-32-PIC-NEXT: 	popl	%eax
-; DARWIN-32-PIC-NEXT: 	movl	_lsrc-L10$pb(%eax), %ecx
-; DARWIN-32-PIC-NEXT: 	movl	%ecx, _ldst-L10$pb(%eax)
+; DARWIN-32-PIC-NEXT: 	movl	_lsrc-L9$pb(%eax), %ecx
+; DARWIN-32-PIC-NEXT: 	movl	%ecx, _ldst-L9$pb(%eax)
 ; DARWIN-32-PIC-NEXT: 	ret
 
 ; DARWIN-64-STATIC: _foo06:
@@ -735,11 +735,11 @@ entry:
 ; DARWIN-32-DYNAMIC-NEXT: 	ret
 
 ; DARWIN-32-PIC: _foo07:
-; DARWIN-32-PIC: 	call	L11$pb
-; DARWIN-32-PIC-NEXT: L11$pb:
+; DARWIN-32-PIC: 	call	L10$pb
+; DARWIN-32-PIC-NEXT: L10$pb:
 ; DARWIN-32-PIC-NEXT: 	popl	%eax
-; DARWIN-32-PIC-NEXT: 	leal	_ldst-L11$pb(%eax), %ecx
-; DARWIN-32-PIC-NEXT: 	movl	%ecx, _lptr-L11$pb(%eax)
+; DARWIN-32-PIC-NEXT: 	leal	_ldst-L10$pb(%eax), %ecx
+; DARWIN-32-PIC-NEXT: 	movl	%ecx, _lptr-L10$pb(%eax)
 ; DARWIN-32-PIC-NEXT: 	ret
 
 ; DARWIN-64-STATIC: _foo07:
@@ -801,11 +801,11 @@ entry:
 ; DARWIN-32-DYNAMIC-NEXT: 	ret
 
 ; DARWIN-32-PIC: _foo08:
-; DARWIN-32-PIC: 	call	L12$pb
-; DARWIN-32-PIC-NEXT: L12$pb:
+; DARWIN-32-PIC: 	call	L11$pb
+; DARWIN-32-PIC-NEXT: L11$pb:
 ; DARWIN-32-PIC-NEXT: 	popl	%eax
-; DARWIN-32-PIC-NEXT: 	movl	_lsrc-L12$pb(%eax), %ecx
-; DARWIN-32-PIC-NEXT: 	movl	_lptr-L12$pb(%eax), %eax
+; DARWIN-32-PIC-NEXT: 	movl	_lsrc-L11$pb(%eax), %ecx
+; DARWIN-32-PIC-NEXT: 	movl	_lptr-L11$pb(%eax), %eax
 ; DARWIN-32-PIC-NEXT: 	movl	%ecx, (%eax)
 ; DARWIN-32-PIC-NEXT: 	ret
 
@@ -868,12 +868,12 @@ entry:
 ; DARWIN-32-DYNAMIC-NEXT: 	ret
 
 ; DARWIN-32-PIC: _qux00:
-; DARWIN-32-PIC: 	call	L13$pb
-; DARWIN-32-PIC-NEXT: L13$pb:
+; DARWIN-32-PIC: 	call	L12$pb
+; DARWIN-32-PIC-NEXT: L12$pb:
 ; DARWIN-32-PIC-NEXT: 	popl	%eax
-; DARWIN-32-PIC-NEXT: 	movl	L_src$non_lazy_ptr-L13$pb(%eax), %ecx
+; DARWIN-32-PIC-NEXT: 	movl	L_src$non_lazy_ptr-L12$pb(%eax), %ecx
 ; DARWIN-32-PIC-NEXT: 	movl	64(%ecx), %ecx
-; DARWIN-32-PIC-NEXT: 	movl	L_dst$non_lazy_ptr-L13$pb(%eax), %eax
+; DARWIN-32-PIC-NEXT: 	movl	L_dst$non_lazy_ptr-L12$pb(%eax), %eax
 ; DARWIN-32-PIC-NEXT: 	movl	%ecx, 64(%eax)
 ; DARWIN-32-PIC-NEXT: 	ret
 
@@ -939,12 +939,12 @@ entry:
 ; DARWIN-32-DYNAMIC-NEXT: 	ret
 
 ; DARWIN-32-PIC: _qxx00:
-; DARWIN-32-PIC: 	call	L14$pb
-; DARWIN-32-PIC-NEXT: L14$pb:
+; DARWIN-32-PIC: 	call	L13$pb
+; DARWIN-32-PIC-NEXT: L13$pb:
 ; DARWIN-32-PIC-NEXT: 	popl	%eax
-; DARWIN-32-PIC-NEXT: 	movl	L_xsrc$non_lazy_ptr-L14$pb(%eax), %ecx
+; DARWIN-32-PIC-NEXT: 	movl	L_xsrc$non_lazy_ptr-L13$pb(%eax), %ecx
 ; DARWIN-32-PIC-NEXT: 	movl	64(%ecx), %ecx
-; DARWIN-32-PIC-NEXT: 	movl	L_xdst$non_lazy_ptr-L14$pb(%eax), %eax
+; DARWIN-32-PIC-NEXT: 	movl	L_xdst$non_lazy_ptr-L13$pb(%eax), %eax
 ; DARWIN-32-PIC-NEXT: 	movl	%ecx, 64(%eax)
 ; DARWIN-32-PIC-NEXT: 	ret
 
@@ -1005,12 +1005,12 @@ entry:
 ; DARWIN-32-DYNAMIC-NEXT: 	ret
 
 ; DARWIN-32-PIC: _qux01:
-; DARWIN-32-PIC: 	call	L15$pb
-; DARWIN-32-PIC-NEXT: L15$pb:
+; DARWIN-32-PIC: 	call	L14$pb
+; DARWIN-32-PIC-NEXT: L14$pb:
 ; DARWIN-32-PIC-NEXT: 	popl	%eax
-; DARWIN-32-PIC-NEXT: 	movl	L_dst$non_lazy_ptr-L15$pb(%eax), %ecx
+; DARWIN-32-PIC-NEXT: 	movl	L_dst$non_lazy_ptr-L14$pb(%eax), %ecx
 ; DARWIN-32-PIC-NEXT: 	addl	$64, %ecx
-; DARWIN-32-PIC-NEXT: 	movl	L_ptr$non_lazy_ptr-L15$pb(%eax), %eax
+; DARWIN-32-PIC-NEXT: 	movl	L_ptr$non_lazy_ptr-L14$pb(%eax), %eax
 ; DARWIN-32-PIC-NEXT: 	movl	%ecx, (%eax)
 ; DARWIN-32-PIC-NEXT: 	ret
 
@@ -1071,12 +1071,12 @@ entry:
 ; DARWIN-32-DYNAMIC-NEXT: 	ret
 
 ; DARWIN-32-PIC: _qxx01:
-; DARWIN-32-PIC: 	call	L16$pb
-; DARWIN-32-PIC-NEXT: L16$pb:
+; DARWIN-32-PIC: 	call	L15$pb
+; DARWIN-32-PIC-NEXT: L15$pb:
 ; DARWIN-32-PIC-NEXT: 	popl	%eax
-; DARWIN-32-PIC-NEXT: 	movl	L_xdst$non_lazy_ptr-L16$pb(%eax), %ecx
+; DARWIN-32-PIC-NEXT: 	movl	L_xdst$non_lazy_ptr-L15$pb(%eax), %ecx
 ; DARWIN-32-PIC-NEXT: 	addl	$64, %ecx
-; DARWIN-32-PIC-NEXT: 	movl	L_ptr$non_lazy_ptr-L16$pb(%eax), %eax
+; DARWIN-32-PIC-NEXT: 	movl	L_ptr$non_lazy_ptr-L15$pb(%eax), %eax
 ; DARWIN-32-PIC-NEXT: 	movl	%ecx, (%eax)
 ; DARWIN-32-PIC-NEXT: 	ret
 
@@ -1150,12 +1150,12 @@ entry:
 ; DARWIN-32-DYNAMIC-NEXT: 	ret
 
 ; DARWIN-32-PIC: _qux02:
-; DARWIN-32-PIC: 	call	L17$pb
-; DARWIN-32-PIC-NEXT: L17$pb:
+; DARWIN-32-PIC: 	call	L16$pb
+; DARWIN-32-PIC-NEXT: L16$pb:
 ; DARWIN-32-PIC-NEXT: 	popl	%eax
-; DARWIN-32-PIC-NEXT: 	movl	L_src$non_lazy_ptr-L17$pb(%eax), %ecx
+; DARWIN-32-PIC-NEXT: 	movl	L_src$non_lazy_ptr-L16$pb(%eax), %ecx
 ; DARWIN-32-PIC-NEXT: 	movl	64(%ecx), %ecx
-; DARWIN-32-PIC-NEXT: 	movl	L_ptr$non_lazy_ptr-L17$pb(%eax), %eax
+; DARWIN-32-PIC-NEXT: 	movl	L_ptr$non_lazy_ptr-L16$pb(%eax), %eax
 ; DARWIN-32-PIC-NEXT: 	movl	(%eax), %eax
 ; DARWIN-32-PIC-NEXT: 	movl	%ecx, 64(%eax)
 ; DARWIN-32-PIC-NEXT: 	ret
@@ -1233,12 +1233,12 @@ entry:
 ; DARWIN-32-DYNAMIC-NEXT: 	ret
 
 ; DARWIN-32-PIC: _qxx02:
-; DARWIN-32-PIC: 	call	L18$pb
-; DARWIN-32-PIC-NEXT: L18$pb:
+; DARWIN-32-PIC: 	call	L17$pb
+; DARWIN-32-PIC-NEXT: L17$pb:
 ; DARWIN-32-PIC-NEXT: 	popl	%eax
-; DARWIN-32-PIC-NEXT: 	movl	L_xsrc$non_lazy_ptr-L18$pb(%eax), %ecx
+; DARWIN-32-PIC-NEXT: 	movl	L_xsrc$non_lazy_ptr-L17$pb(%eax), %ecx
 ; DARWIN-32-PIC-NEXT: 	movl	64(%ecx), %ecx
-; DARWIN-32-PIC-NEXT: 	movl	L_ptr$non_lazy_ptr-L18$pb(%eax), %eax
+; DARWIN-32-PIC-NEXT: 	movl	L_ptr$non_lazy_ptr-L17$pb(%eax), %eax
 ; DARWIN-32-PIC-NEXT: 	movl	(%eax), %eax
 ; DARWIN-32-PIC-NEXT: 	movl	%ecx, 64(%eax)
 ; DARWIN-32-PIC-NEXT: 	ret
@@ -1306,11 +1306,11 @@ entry:
 ; DARWIN-32-DYNAMIC-NEXT: 	ret
 
 ; DARWIN-32-PIC: _qux03:
-; DARWIN-32-PIC: 	call	L19$pb
-; DARWIN-32-PIC-NEXT: L19$pb:
+; DARWIN-32-PIC: 	call	L18$pb
+; DARWIN-32-PIC-NEXT: L18$pb:
 ; DARWIN-32-PIC-NEXT: 	popl	%eax
-; DARWIN-32-PIC-NEXT: 	movl	(_dsrc-L19$pb)+64(%eax), %ecx
-; DARWIN-32-PIC-NEXT: 	movl	%ecx, (_ddst-L19$pb)+64(%eax)
+; DARWIN-32-PIC-NEXT: 	movl	(_dsrc-L18$pb)+64(%eax), %ecx
+; DARWIN-32-PIC-NEXT: 	movl	%ecx, (_ddst-L18$pb)+64(%eax)
 ; DARWIN-32-PIC-NEXT: 	ret
 
 ; DARWIN-64-STATIC: _qux03:
@@ -1361,11 +1361,11 @@ entry:
 ; DARWIN-32-DYNAMIC-NEXT: 	ret
 
 ; DARWIN-32-PIC: _qux04:
-; DARWIN-32-PIC: 	call	L20$pb
-; DARWIN-32-PIC-NEXT: L20$pb:
+; DARWIN-32-PIC: 	call	L19$pb
+; DARWIN-32-PIC-NEXT: L19$pb:
 ; DARWIN-32-PIC-NEXT: 	popl	%eax
-; DARWIN-32-PIC-NEXT: 	leal	(_ddst-L20$pb)+64(%eax), %ecx
-; DARWIN-32-PIC-NEXT: 	movl	%ecx, _dptr-L20$pb(%eax)
+; DARWIN-32-PIC-NEXT: 	leal	(_ddst-L19$pb)+64(%eax), %ecx
+; DARWIN-32-PIC-NEXT: 	movl	%ecx, _dptr-L19$pb(%eax)
 ; DARWIN-32-PIC-NEXT: 	ret
 
 ; DARWIN-64-STATIC: _qux04:
@@ -1430,11 +1430,11 @@ entry:
 ; DARWIN-32-DYNAMIC-NEXT: 	ret
 
 ; DARWIN-32-PIC: _qux05:
-; DARWIN-32-PIC: 	call	L21$pb
-; DARWIN-32-PIC-NEXT: L21$pb:
+; DARWIN-32-PIC: 	call	L20$pb
+; DARWIN-32-PIC-NEXT: L20$pb:
 ; DARWIN-32-PIC-NEXT: 	popl	%eax
-; DARWIN-32-PIC-NEXT: 	movl	(_dsrc-L21$pb)+64(%eax), %ecx
-; DARWIN-32-PIC-NEXT: 	movl	_dptr-L21$pb(%eax), %eax
+; DARWIN-32-PIC-NEXT: 	movl	(_dsrc-L20$pb)+64(%eax), %ecx
+; DARWIN-32-PIC-NEXT: 	movl	_dptr-L20$pb(%eax), %eax
 ; DARWIN-32-PIC-NEXT: 	movl	%ecx, 64(%eax)
 ; DARWIN-32-PIC-NEXT: 	ret
 
@@ -1493,11 +1493,11 @@ entry:
 ; DARWIN-32-DYNAMIC-NEXT: 	ret
 
 ; DARWIN-32-PIC: _qux06:
-; DARWIN-32-PIC: 	call	L22$pb
-; DARWIN-32-PIC-NEXT: L22$pb:
+; DARWIN-32-PIC: 	call	L21$pb
+; DARWIN-32-PIC-NEXT: L21$pb:
 ; DARWIN-32-PIC-NEXT: 	popl	%eax
-; DARWIN-32-PIC-NEXT: 	movl	(_lsrc-L22$pb)+64(%eax), %ecx
-; DARWIN-32-PIC-NEXT: 	movl	%ecx, (_ldst-L22$pb)+64(%eax)
+; DARWIN-32-PIC-NEXT: 	movl	(_lsrc-L21$pb)+64(%eax), %ecx
+; DARWIN-32-PIC-NEXT: 	movl	%ecx, (_ldst-L21$pb)+64(%eax)
 ; DARWIN-32-PIC-NEXT: 	ret
 
 ; DARWIN-64-STATIC: _qux06:
@@ -1546,11 +1546,11 @@ entry:
 ; DARWIN-32-DYNAMIC-NEXT: 	ret
 
 ; DARWIN-32-PIC: _qux07:
-; DARWIN-32-PIC: 	call	L23$pb
-; DARWIN-32-PIC-NEXT: L23$pb:
+; DARWIN-32-PIC: 	call	L22$pb
+; DARWIN-32-PIC-NEXT: L22$pb:
 ; DARWIN-32-PIC-NEXT: 	popl	%eax
-; DARWIN-32-PIC-NEXT: 	leal	(_ldst-L23$pb)+64(%eax), %ecx
-; DARWIN-32-PIC-NEXT: 	movl	%ecx, _lptr-L23$pb(%eax)
+; DARWIN-32-PIC-NEXT: 	leal	(_ldst-L22$pb)+64(%eax), %ecx
+; DARWIN-32-PIC-NEXT: 	movl	%ecx, _lptr-L22$pb(%eax)
 ; DARWIN-32-PIC-NEXT: 	ret
 
 ; DARWIN-64-STATIC: _qux07:
@@ -1613,11 +1613,11 @@ entry:
 ; DARWIN-32-DYNAMIC-NEXT: 	ret
 
 ; DARWIN-32-PIC: _qux08:
-; DARWIN-32-PIC: 	call	L24$pb
-; DARWIN-32-PIC-NEXT: L24$pb:
+; DARWIN-32-PIC: 	call	L23$pb
+; DARWIN-32-PIC-NEXT: L23$pb:
 ; DARWIN-32-PIC-NEXT: 	popl	%eax
-; DARWIN-32-PIC-NEXT: 	movl	(_lsrc-L24$pb)+64(%eax), %ecx
-; DARWIN-32-PIC-NEXT: 	movl	_lptr-L24$pb(%eax), %eax
+; DARWIN-32-PIC-NEXT: 	movl	(_lsrc-L23$pb)+64(%eax), %ecx
+; DARWIN-32-PIC-NEXT: 	movl	_lptr-L23$pb(%eax), %eax
 ; DARWIN-32-PIC-NEXT: 	movl	%ecx, 64(%eax)
 ; DARWIN-32-PIC-NEXT: 	ret
 
@@ -1686,13 +1686,13 @@ entry:
 ; DARWIN-32-DYNAMIC-NEXT: 	ret
 
 ; DARWIN-32-PIC: _ind00:
-; DARWIN-32-PIC: 	call	L25$pb
-; DARWIN-32-PIC-NEXT: L25$pb:
+; DARWIN-32-PIC: 	call	L24$pb
+; DARWIN-32-PIC-NEXT: L24$pb:
 ; DARWIN-32-PIC-NEXT: 	popl	%eax
 ; DARWIN-32-PIC-NEXT: 	movl	4(%esp), %ecx
-; DARWIN-32-PIC-NEXT: 	movl	L_src$non_lazy_ptr-L25$pb(%eax), %edx
+; DARWIN-32-PIC-NEXT: 	movl	L_src$non_lazy_ptr-L24$pb(%eax), %edx
 ; DARWIN-32-PIC-NEXT: 	movl	(%edx,%ecx,4), %edx
-; DARWIN-32-PIC-NEXT: 	movl	L_dst$non_lazy_ptr-L25$pb(%eax), %eax
+; DARWIN-32-PIC-NEXT: 	movl	L_dst$non_lazy_ptr-L24$pb(%eax), %eax
 ; DARWIN-32-PIC-NEXT: 	movl	%edx, (%eax,%ecx,4)
 ; DARWIN-32-PIC-NEXT: 	ret
 
@@ -1764,13 +1764,13 @@ entry:
 ; DARWIN-32-DYNAMIC-NEXT: 	ret
 
 ; DARWIN-32-PIC: _ixd00:
-; DARWIN-32-PIC: 	call	L26$pb
-; DARWIN-32-PIC-NEXT: L26$pb:
+; DARWIN-32-PIC: 	call	L25$pb
+; DARWIN-32-PIC-NEXT: L25$pb:
 ; DARWIN-32-PIC-NEXT: 	popl	%eax
 ; DARWIN-32-PIC-NEXT: 	movl	4(%esp), %ecx
-; DARWIN-32-PIC-NEXT: 	movl	L_xsrc$non_lazy_ptr-L26$pb(%eax), %edx
+; DARWIN-32-PIC-NEXT: 	movl	L_xsrc$non_lazy_ptr-L25$pb(%eax), %edx
 ; DARWIN-32-PIC-NEXT: 	movl	(%edx,%ecx,4), %edx
-; DARWIN-32-PIC-NEXT: 	movl	L_xdst$non_lazy_ptr-L26$pb(%eax), %eax
+; DARWIN-32-PIC-NEXT: 	movl	L_xdst$non_lazy_ptr-L25$pb(%eax), %eax
 ; DARWIN-32-PIC-NEXT: 	movl	%edx, (%eax,%ecx,4)
 ; DARWIN-32-PIC-NEXT: 	ret
 
@@ -1840,13 +1840,13 @@ entry:
 ; DARWIN-32-DYNAMIC-NEXT: 	ret
 
 ; DARWIN-32-PIC: _ind01:
-; DARWIN-32-PIC: 	call	L27$pb
-; DARWIN-32-PIC-NEXT: L27$pb:
+; DARWIN-32-PIC: 	call	L26$pb
+; DARWIN-32-PIC-NEXT: L26$pb:
 ; DARWIN-32-PIC-NEXT: 	popl	%eax
 ; DARWIN-32-PIC-NEXT: 	movl	4(%esp), %ecx
 ; DARWIN-32-PIC-NEXT: 	shll	$2, %ecx
-; DARWIN-32-PIC-NEXT: 	addl	L_dst$non_lazy_ptr-L27$pb(%eax), %ecx
-; DARWIN-32-PIC-NEXT: 	movl	L_ptr$non_lazy_ptr-L27$pb(%eax), %eax
+; DARWIN-32-PIC-NEXT: 	addl	L_dst$non_lazy_ptr-L26$pb(%eax), %ecx
+; DARWIN-32-PIC-NEXT: 	movl	L_ptr$non_lazy_ptr-L26$pb(%eax), %eax
 ; DARWIN-32-PIC-NEXT: 	movl	%ecx, (%eax)
 ; DARWIN-32-PIC-NEXT: 	ret
 
@@ -1916,13 +1916,13 @@ entry:
 ; DARWIN-32-DYNAMIC-NEXT: 	ret
 
 ; DARWIN-32-PIC: _ixd01:
-; DARWIN-32-PIC: 	call	L28$pb
-; DARWIN-32-PIC-NEXT: L28$pb:
+; DARWIN-32-PIC: 	call	L27$pb
+; DARWIN-32-PIC-NEXT: L27$pb:
 ; DARWIN-32-PIC-NEXT: 	popl	%eax
 ; DARWIN-32-PIC-NEXT: 	movl	4(%esp), %ecx
 ; DARWIN-32-PIC-NEXT: 	shll	$2, %ecx
-; DARWIN-32-PIC-NEXT: 	addl	L_xdst$non_lazy_ptr-L28$pb(%eax), %ecx
-; DARWIN-32-PIC-NEXT: 	movl	L_ptr$non_lazy_ptr-L28$pb(%eax), %eax
+; DARWIN-32-PIC-NEXT: 	addl	L_xdst$non_lazy_ptr-L27$pb(%eax), %ecx
+; DARWIN-32-PIC-NEXT: 	movl	L_ptr$non_lazy_ptr-L27$pb(%eax), %eax
 ; DARWIN-32-PIC-NEXT: 	movl	%ecx, (%eax)
 ; DARWIN-32-PIC-NEXT: 	ret
 
@@ -2001,13 +2001,13 @@ entry:
 ; DARWIN-32-DYNAMIC-NEXT: 	ret
 
 ; DARWIN-32-PIC: _ind02:
-; DARWIN-32-PIC: 	call	L29$pb
-; DARWIN-32-PIC-NEXT: L29$pb:
+; DARWIN-32-PIC: 	call	L28$pb
+; DARWIN-32-PIC-NEXT: L28$pb:
 ; DARWIN-32-PIC-NEXT: 	popl	%eax
 ; DARWIN-32-PIC-NEXT: 	movl	4(%esp), %ecx
-; DARWIN-32-PIC-NEXT: 	movl	L_src$non_lazy_ptr-L29$pb(%eax), %edx
+; DARWIN-32-PIC-NEXT: 	movl	L_src$non_lazy_ptr-L28$pb(%eax), %edx
 ; DARWIN-32-PIC-NEXT: 	movl	(%edx,%ecx,4), %edx
-; DARWIN-32-PIC-NEXT: 	movl	L_ptr$non_lazy_ptr-L29$pb(%eax), %eax
+; DARWIN-32-PIC-NEXT: 	movl	L_ptr$non_lazy_ptr-L28$pb(%eax), %eax
 ; DARWIN-32-PIC-NEXT: 	movl	(%eax), %eax
 ; DARWIN-32-PIC-NEXT: 	movl	%edx, (%eax,%ecx,4)
 ; DARWIN-32-PIC-NEXT: 	ret
@@ -2090,13 +2090,13 @@ entry:
 ; DARWIN-32-DYNAMIC-NEXT: 	ret
 
 ; DARWIN-32-PIC: _ixd02:
-; DARWIN-32-PIC: 	call	L30$pb
-; DARWIN-32-PIC-NEXT: L30$pb:
+; DARWIN-32-PIC: 	call	L29$pb
+; DARWIN-32-PIC-NEXT: L29$pb:
 ; DARWIN-32-PIC-NEXT: 	popl	%eax
 ; DARWIN-32-PIC-NEXT: 	movl	4(%esp), %ecx
-; DARWIN-32-PIC-NEXT: 	movl	L_xsrc$non_lazy_ptr-L30$pb(%eax), %edx
+; DARWIN-32-PIC-NEXT: 	movl	L_xsrc$non_lazy_ptr-L29$pb(%eax), %edx
 ; DARWIN-32-PIC-NEXT: 	movl	(%edx,%ecx,4), %edx
-; DARWIN-32-PIC-NEXT: 	movl	L_ptr$non_lazy_ptr-L30$pb(%eax), %eax
+; DARWIN-32-PIC-NEXT: 	movl	L_ptr$non_lazy_ptr-L29$pb(%eax), %eax
 ; DARWIN-32-PIC-NEXT: 	movl	(%eax), %eax
 ; DARWIN-32-PIC-NEXT: 	movl	%edx, (%eax,%ecx,4)
 ; DARWIN-32-PIC-NEXT: 	ret
@@ -2170,12 +2170,12 @@ entry:
 ; DARWIN-32-DYNAMIC-NEXT: 	ret
 
 ; DARWIN-32-PIC: _ind03:
-; DARWIN-32-PIC: 	call	L31$pb
-; DARWIN-32-PIC-NEXT: L31$pb:
+; DARWIN-32-PIC: 	call	L30$pb
+; DARWIN-32-PIC-NEXT: L30$pb:
 ; DARWIN-32-PIC-NEXT: 	popl	%eax
 ; DARWIN-32-PIC-NEXT: 	movl	4(%esp), %ecx
-; DARWIN-32-PIC-NEXT: 	movl	_dsrc-L31$pb(%eax,%ecx,4), %edx
-; DARWIN-32-PIC-NEXT: 	movl	%edx, _ddst-L31$pb(%eax,%ecx,4)
+; DARWIN-32-PIC-NEXT: 	movl	_dsrc-L30$pb(%eax,%ecx,4), %edx
+; DARWIN-32-PIC-NEXT: 	movl	%edx, _ddst-L30$pb(%eax,%ecx,4)
 ; DARWIN-32-PIC-NEXT: 	ret
 
 ; DARWIN-64-STATIC: _ind03:
@@ -2242,12 +2242,12 @@ entry:
 ; DARWIN-32-DYNAMIC-NEXT: 	ret
 
 ; DARWIN-32-PIC: _ind04:
-; DARWIN-32-PIC: 	call	L32$pb
-; DARWIN-32-PIC-NEXT: L32$pb:
+; DARWIN-32-PIC: 	call	L31$pb
+; DARWIN-32-PIC-NEXT: L31$pb:
 ; DARWIN-32-PIC-NEXT: 	popl	%eax
 ; DARWIN-32-PIC-NEXT: 	movl	4(%esp), %ecx
-; DARWIN-32-PIC-NEXT: 	leal	_ddst-L32$pb(%eax,%ecx,4), %ecx
-; DARWIN-32-PIC-NEXT: 	movl	%ecx, _dptr-L32$pb(%eax)
+; DARWIN-32-PIC-NEXT: 	leal	_ddst-L31$pb(%eax,%ecx,4), %ecx
+; DARWIN-32-PIC-NEXT: 	movl	%ecx, _dptr-L31$pb(%eax)
 ; DARWIN-32-PIC-NEXT: 	ret
 
 ; DARWIN-64-STATIC: _ind04:
@@ -2320,12 +2320,12 @@ entry:
 ; DARWIN-32-DYNAMIC-NEXT: 	ret
 
 ; DARWIN-32-PIC: _ind05:
-; DARWIN-32-PIC: 	call	L33$pb
-; DARWIN-32-PIC-NEXT: L33$pb:
+; DARWIN-32-PIC: 	call	L32$pb
+; DARWIN-32-PIC-NEXT: L32$pb:
 ; DARWIN-32-PIC-NEXT: 	popl	%eax
 ; DARWIN-32-PIC-NEXT: 	movl	4(%esp), %ecx
-; DARWIN-32-PIC-NEXT: 	movl	_dsrc-L33$pb(%eax,%ecx,4), %edx
-; DARWIN-32-PIC-NEXT: 	movl	_dptr-L33$pb(%eax), %eax
+; DARWIN-32-PIC-NEXT: 	movl	_dsrc-L32$pb(%eax,%ecx,4), %edx
+; DARWIN-32-PIC-NEXT: 	movl	_dptr-L32$pb(%eax), %eax
 ; DARWIN-32-PIC-NEXT: 	movl	%edx, (%eax,%ecx,4)
 ; DARWIN-32-PIC-NEXT: 	ret
 
@@ -2395,12 +2395,12 @@ entry:
 ; DARWIN-32-DYNAMIC-NEXT: 	ret
 
 ; DARWIN-32-PIC: _ind06:
-; DARWIN-32-PIC: 	call	L34$pb
-; DARWIN-32-PIC-NEXT: L34$pb:
+; DARWIN-32-PIC: 	call	L33$pb
+; DARWIN-32-PIC-NEXT: L33$pb:
 ; DARWIN-32-PIC-NEXT: 	popl	%eax
 ; DARWIN-32-PIC-NEXT: 	movl	4(%esp), %ecx
-; DARWIN-32-PIC-NEXT: 	movl	_lsrc-L34$pb(%eax,%ecx,4), %edx
-; DARWIN-32-PIC-NEXT: 	movl	%edx, _ldst-L34$pb(%eax,%ecx,4)
+; DARWIN-32-PIC-NEXT: 	movl	_lsrc-L33$pb(%eax,%ecx,4), %edx
+; DARWIN-32-PIC-NEXT: 	movl	%edx, _ldst-L33$pb(%eax,%ecx,4)
 ; DARWIN-32-PIC-NEXT: 	ret
 
 ; DARWIN-64-STATIC: _ind06:
@@ -2466,12 +2466,12 @@ entry:
 ; DARWIN-32-DYNAMIC-NEXT: 	ret
 
 ; DARWIN-32-PIC: _ind07:
-; DARWIN-32-PIC: 	call	L35$pb
-; DARWIN-32-PIC-NEXT: L35$pb:
+; DARWIN-32-PIC: 	call	L34$pb
+; DARWIN-32-PIC-NEXT: L34$pb:
 ; DARWIN-32-PIC-NEXT: 	popl	%eax
 ; DARWIN-32-PIC-NEXT: 	movl	4(%esp), %ecx
-; DARWIN-32-PIC-NEXT: 	leal	_ldst-L35$pb(%eax,%ecx,4), %ecx
-; DARWIN-32-PIC-NEXT: 	movl	%ecx, _lptr-L35$pb(%eax)
+; DARWIN-32-PIC-NEXT: 	leal	_ldst-L34$pb(%eax,%ecx,4), %ecx
+; DARWIN-32-PIC-NEXT: 	movl	%ecx, _lptr-L34$pb(%eax)
 ; DARWIN-32-PIC-NEXT: 	ret
 
 ; DARWIN-64-STATIC: _ind07:
@@ -2543,12 +2543,12 @@ entry:
 ; DARWIN-32-DYNAMIC-NEXT: 	ret
 
 ; DARWIN-32-PIC: _ind08:
-; DARWIN-32-PIC: 	call	L36$pb
-; DARWIN-32-PIC-NEXT: L36$pb:
+; DARWIN-32-PIC: 	call	L35$pb
+; DARWIN-32-PIC-NEXT: L35$pb:
 ; DARWIN-32-PIC-NEXT: 	popl	%eax
 ; DARWIN-32-PIC-NEXT: 	movl	4(%esp), %ecx
-; DARWIN-32-PIC-NEXT: 	movl	_lsrc-L36$pb(%eax,%ecx,4), %edx
-; DARWIN-32-PIC-NEXT: 	movl	_lptr-L36$pb(%eax), %eax
+; DARWIN-32-PIC-NEXT: 	movl	_lsrc-L35$pb(%eax,%ecx,4), %edx
+; DARWIN-32-PIC-NEXT: 	movl	_lptr-L35$pb(%eax), %eax
 ; DARWIN-32-PIC-NEXT: 	movl	%edx, (%eax,%ecx,4)
 ; DARWIN-32-PIC-NEXT: 	ret
 
@@ -2621,13 +2621,13 @@ entry:
 ; DARWIN-32-DYNAMIC-NEXT: 	ret
 
 ; DARWIN-32-PIC: _off00:
-; DARWIN-32-PIC: 	call	L37$pb
-; DARWIN-32-PIC-NEXT: L37$pb:
+; DARWIN-32-PIC: 	call	L36$pb
+; DARWIN-32-PIC-NEXT: L36$pb:
 ; DARWIN-32-PIC-NEXT: 	popl	%eax
 ; DARWIN-32-PIC-NEXT: 	movl	4(%esp), %ecx
-; DARWIN-32-PIC-NEXT: 	movl	L_src$non_lazy_ptr-L37$pb(%eax), %edx
+; DARWIN-32-PIC-NEXT: 	movl	L_src$non_lazy_ptr-L36$pb(%eax), %edx
 ; DARWIN-32-PIC-NEXT: 	movl	64(%edx,%ecx,4), %edx
-; DARWIN-32-PIC-NEXT: 	movl	L_dst$non_lazy_ptr-L37$pb(%eax), %eax
+; DARWIN-32-PIC-NEXT: 	movl	L_dst$non_lazy_ptr-L36$pb(%eax), %eax
 ; DARWIN-32-PIC-NEXT: 	movl	%edx, 64(%eax,%ecx,4)
 ; DARWIN-32-PIC-NEXT: 	ret
 
@@ -2700,13 +2700,13 @@ entry:
 ; DARWIN-32-DYNAMIC-NEXT: 	ret
 
 ; DARWIN-32-PIC: _oxf00:
-; DARWIN-32-PIC: 	call	L38$pb
-; DARWIN-32-PIC-NEXT: L38$pb:
+; DARWIN-32-PIC: 	call	L37$pb
+; DARWIN-32-PIC-NEXT: L37$pb:
 ; DARWIN-32-PIC-NEXT: 	popl	%eax
 ; DARWIN-32-PIC-NEXT: 	movl	4(%esp), %ecx
-; DARWIN-32-PIC-NEXT: 	movl	L_xsrc$non_lazy_ptr-L38$pb(%eax), %edx
+; DARWIN-32-PIC-NEXT: 	movl	L_xsrc$non_lazy_ptr-L37$pb(%eax), %edx
 ; DARWIN-32-PIC-NEXT: 	movl	64(%edx,%ecx,4), %edx
-; DARWIN-32-PIC-NEXT: 	movl	L_xdst$non_lazy_ptr-L38$pb(%eax), %eax
+; DARWIN-32-PIC-NEXT: 	movl	L_xdst$non_lazy_ptr-L37$pb(%eax), %eax
 ; DARWIN-32-PIC-NEXT: 	movl	%edx, 64(%eax,%ecx,4)
 ; DARWIN-32-PIC-NEXT: 	ret
 
@@ -2777,13 +2777,13 @@ entry:
 ; DARWIN-32-DYNAMIC-NEXT: 	ret
 
 ; DARWIN-32-PIC: _off01:
-; DARWIN-32-PIC: 	call	L39$pb
-; DARWIN-32-PIC-NEXT: L39$pb:
+; DARWIN-32-PIC: 	call	L38$pb
+; DARWIN-32-PIC-NEXT: L38$pb:
 ; DARWIN-32-PIC-NEXT: 	popl	%eax
 ; DARWIN-32-PIC-NEXT: 	movl	4(%esp), %ecx
-; DARWIN-32-PIC-NEXT: 	movl	L_dst$non_lazy_ptr-L39$pb(%eax), %edx
+; DARWIN-32-PIC-NEXT: 	movl	L_dst$non_lazy_ptr-L38$pb(%eax), %edx
 ; DARWIN-32-PIC-NEXT: 	leal	64(%edx,%ecx,4), %ecx
-; DARWIN-32-PIC-NEXT: 	movl	L_ptr$non_lazy_ptr-L39$pb(%eax), %eax
+; DARWIN-32-PIC-NEXT: 	movl	L_ptr$non_lazy_ptr-L38$pb(%eax), %eax
 ; DARWIN-32-PIC-NEXT: 	movl	%ecx, (%eax)
 ; DARWIN-32-PIC-NEXT: 	ret
 
@@ -2854,13 +2854,13 @@ entry:
 ; DARWIN-32-DYNAMIC-NEXT: 	ret
 
 ; DARWIN-32-PIC: _oxf01:
-; DARWIN-32-PIC: 	call	L40$pb
-; DARWIN-32-PIC-NEXT: L40$pb:
+; DARWIN-32-PIC: 	call	L39$pb
+; DARWIN-32-PIC-NEXT: L39$pb:
 ; DARWIN-32-PIC-NEXT: 	popl	%eax
 ; DARWIN-32-PIC-NEXT: 	movl	4(%esp), %ecx
-; DARWIN-32-PIC-NEXT: 	movl	L_xdst$non_lazy_ptr-L40$pb(%eax), %edx
+; DARWIN-32-PIC-NEXT: 	movl	L_xdst$non_lazy_ptr-L39$pb(%eax), %edx
 ; DARWIN-32-PIC-NEXT: 	leal	64(%edx,%ecx,4), %ecx
-; DARWIN-32-PIC-NEXT: 	movl	L_ptr$non_lazy_ptr-L40$pb(%eax), %eax
+; DARWIN-32-PIC-NEXT: 	movl	L_ptr$non_lazy_ptr-L39$pb(%eax), %eax
 ; DARWIN-32-PIC-NEXT: 	movl	%ecx, (%eax)
 ; DARWIN-32-PIC-NEXT: 	ret
 
@@ -2940,13 +2940,13 @@ entry:
 ; DARWIN-32-DYNAMIC-NEXT: 	ret
 
 ; DARWIN-32-PIC: _off02:
-; DARWIN-32-PIC: 	call	L41$pb
-; DARWIN-32-PIC-NEXT: L41$pb:
+; DARWIN-32-PIC: 	call	L40$pb
+; DARWIN-32-PIC-NEXT: L40$pb:
 ; DARWIN-32-PIC-NEXT: 	popl	%eax
 ; DARWIN-32-PIC-NEXT: 	movl	4(%esp), %ecx
-; DARWIN-32-PIC-NEXT: 	movl	L_src$non_lazy_ptr-L41$pb(%eax), %edx
+; DARWIN-32-PIC-NEXT: 	movl	L_src$non_lazy_ptr-L40$pb(%eax), %edx
 ; DARWIN-32-PIC-NEXT: 	movl	64(%edx,%ecx,4), %edx
-; DARWIN-32-PIC-NEXT: 	movl	L_ptr$non_lazy_ptr-L41$pb(%eax), %eax
+; DARWIN-32-PIC-NEXT: 	movl	L_ptr$non_lazy_ptr-L40$pb(%eax), %eax
 ; DARWIN-32-PIC-NEXT: 	movl	(%eax), %eax
 ; DARWIN-32-PIC-NEXT: 	movl	%edx, 64(%eax,%ecx,4)
 ; DARWIN-32-PIC-NEXT: 	ret
@@ -3030,13 +3030,13 @@ entry:
 ; DARWIN-32-DYNAMIC-NEXT: 	ret
 
 ; DARWIN-32-PIC: _oxf02:
-; DARWIN-32-PIC: 	call	L42$pb
-; DARWIN-32-PIC-NEXT: L42$pb:
+; DARWIN-32-PIC: 	call	L41$pb
+; DARWIN-32-PIC-NEXT: L41$pb:
 ; DARWIN-32-PIC-NEXT: 	popl	%eax
 ; DARWIN-32-PIC-NEXT: 	movl	4(%esp), %ecx
-; DARWIN-32-PIC-NEXT: 	movl	L_xsrc$non_lazy_ptr-L42$pb(%eax), %edx
+; DARWIN-32-PIC-NEXT: 	movl	L_xsrc$non_lazy_ptr-L41$pb(%eax), %edx
 ; DARWIN-32-PIC-NEXT: 	movl	64(%edx,%ecx,4), %edx
-; DARWIN-32-PIC-NEXT: 	movl	L_ptr$non_lazy_ptr-L42$pb(%eax), %eax
+; DARWIN-32-PIC-NEXT: 	movl	L_ptr$non_lazy_ptr-L41$pb(%eax), %eax
 ; DARWIN-32-PIC-NEXT: 	movl	(%eax), %eax
 ; DARWIN-32-PIC-NEXT: 	movl	%edx, 64(%eax,%ecx,4)
 ; DARWIN-32-PIC-NEXT: 	ret
@@ -3111,12 +3111,12 @@ entry:
 ; DARWIN-32-DYNAMIC-NEXT: 	ret
 
 ; DARWIN-32-PIC: _off03:
-; DARWIN-32-PIC: 	call	L43$pb
-; DARWIN-32-PIC-NEXT: L43$pb:
+; DARWIN-32-PIC: 	call	L42$pb
+; DARWIN-32-PIC-NEXT: L42$pb:
 ; DARWIN-32-PIC-NEXT: 	popl	%eax
 ; DARWIN-32-PIC-NEXT: 	movl	4(%esp), %ecx
-; DARWIN-32-PIC-NEXT: 	movl	(_dsrc-L43$pb)+64(%eax,%ecx,4), %edx
-; DARWIN-32-PIC-NEXT: 	movl	%edx, (_ddst-L43$pb)+64(%eax,%ecx,4)
+; DARWIN-32-PIC-NEXT: 	movl	(_dsrc-L42$pb)+64(%eax,%ecx,4), %edx
+; DARWIN-32-PIC-NEXT: 	movl	%edx, (_ddst-L42$pb)+64(%eax,%ecx,4)
 ; DARWIN-32-PIC-NEXT: 	ret
 
 ; DARWIN-64-STATIC: _off03:
@@ -3184,12 +3184,12 @@ entry:
 ; DARWIN-32-DYNAMIC-NEXT: 	ret
 
 ; DARWIN-32-PIC: _off04:
-; DARWIN-32-PIC: 	call	L44$pb
-; DARWIN-32-PIC-NEXT: L44$pb:
+; DARWIN-32-PIC: 	call	L43$pb
+; DARWIN-32-PIC-NEXT: L43$pb:
 ; DARWIN-32-PIC-NEXT: 	popl	%eax
 ; DARWIN-32-PIC-NEXT: 	movl	4(%esp), %ecx
-; DARWIN-32-PIC-NEXT: 	leal	(_ddst-L44$pb)+64(%eax,%ecx,4), %ecx
-; DARWIN-32-PIC-NEXT: 	movl	%ecx, _dptr-L44$pb(%eax)
+; DARWIN-32-PIC-NEXT: 	leal	(_ddst-L43$pb)+64(%eax,%ecx,4), %ecx
+; DARWIN-32-PIC-NEXT: 	movl	%ecx, _dptr-L43$pb(%eax)
 ; DARWIN-32-PIC-NEXT: 	ret
 
 ; DARWIN-64-STATIC: _off04:
@@ -3263,12 +3263,12 @@ entry:
 ; DARWIN-32-DYNAMIC-NEXT: 	ret
 
 ; DARWIN-32-PIC: _off05:
-; DARWIN-32-PIC: 	call	L45$pb
-; DARWIN-32-PIC-NEXT: L45$pb:
+; DARWIN-32-PIC: 	call	L44$pb
+; DARWIN-32-PIC-NEXT: L44$pb:
 ; DARWIN-32-PIC-NEXT: 	popl	%eax
 ; DARWIN-32-PIC-NEXT: 	movl	4(%esp), %ecx
-; DARWIN-32-PIC-NEXT: 	movl	(_dsrc-L45$pb)+64(%eax,%ecx,4), %edx
-; DARWIN-32-PIC-NEXT: 	movl	_dptr-L45$pb(%eax), %eax
+; DARWIN-32-PIC-NEXT: 	movl	(_dsrc-L44$pb)+64(%eax,%ecx,4), %edx
+; DARWIN-32-PIC-NEXT: 	movl	_dptr-L44$pb(%eax), %eax
 ; DARWIN-32-PIC-NEXT: 	movl	%edx, 64(%eax,%ecx,4)
 ; DARWIN-32-PIC-NEXT: 	ret
 
@@ -3339,12 +3339,12 @@ entry:
 ; DARWIN-32-DYNAMIC-NEXT: 	ret
 
 ; DARWIN-32-PIC: _off06:
-; DARWIN-32-PIC: 	call	L46$pb
-; DARWIN-32-PIC-NEXT: L46$pb:
+; DARWIN-32-PIC: 	call	L45$pb
+; DARWIN-32-PIC-NEXT: L45$pb:
 ; DARWIN-32-PIC-NEXT: 	popl	%eax
 ; DARWIN-32-PIC-NEXT: 	movl	4(%esp), %ecx
-; DARWIN-32-PIC-NEXT: 	movl	(_lsrc-L46$pb)+64(%eax,%ecx,4), %edx
-; DARWIN-32-PIC-NEXT: 	movl	%edx, (_ldst-L46$pb)+64(%eax,%ecx,4)
+; DARWIN-32-PIC-NEXT: 	movl	(_lsrc-L45$pb)+64(%eax,%ecx,4), %edx
+; DARWIN-32-PIC-NEXT: 	movl	%edx, (_ldst-L45$pb)+64(%eax,%ecx,4)
 ; DARWIN-32-PIC-NEXT: 	ret
 
 ; DARWIN-64-STATIC: _off06:
@@ -3411,12 +3411,12 @@ entry:
 ; DARWIN-32-DYNAMIC-NEXT: 	ret
 
 ; DARWIN-32-PIC: _off07:
-; DARWIN-32-PIC: 	call	L47$pb
-; DARWIN-32-PIC-NEXT: L47$pb:
+; DARWIN-32-PIC: 	call	L46$pb
+; DARWIN-32-PIC-NEXT: L46$pb:
 ; DARWIN-32-PIC-NEXT: 	popl	%eax
 ; DARWIN-32-PIC-NEXT: 	movl	4(%esp), %ecx
-; DARWIN-32-PIC-NEXT: 	leal	(_ldst-L47$pb)+64(%eax,%ecx,4), %ecx
-; DARWIN-32-PIC-NEXT: 	movl	%ecx, _lptr-L47$pb(%eax)
+; DARWIN-32-PIC-NEXT: 	leal	(_ldst-L46$pb)+64(%eax,%ecx,4), %ecx
+; DARWIN-32-PIC-NEXT: 	movl	%ecx, _lptr-L46$pb(%eax)
 ; DARWIN-32-PIC-NEXT: 	ret
 
 ; DARWIN-64-STATIC: _off07:
@@ -3489,12 +3489,12 @@ entry:
 ; DARWIN-32-DYNAMIC-NEXT: 	ret
 
 ; DARWIN-32-PIC: _off08:
-; DARWIN-32-PIC: 	call	L48$pb
-; DARWIN-32-PIC-NEXT: L48$pb:
+; DARWIN-32-PIC: 	call	L47$pb
+; DARWIN-32-PIC-NEXT: L47$pb:
 ; DARWIN-32-PIC-NEXT: 	popl	%eax
 ; DARWIN-32-PIC-NEXT: 	movl	4(%esp), %ecx
-; DARWIN-32-PIC-NEXT: 	movl	(_lsrc-L48$pb)+64(%eax,%ecx,4), %edx
-; DARWIN-32-PIC-NEXT: 	movl	_lptr-L48$pb(%eax), %eax
+; DARWIN-32-PIC-NEXT: 	movl	(_lsrc-L47$pb)+64(%eax,%ecx,4), %edx
+; DARWIN-32-PIC-NEXT: 	movl	_lptr-L47$pb(%eax), %eax
 ; DARWIN-32-PIC-NEXT: 	movl	%edx, 64(%eax,%ecx,4)
 ; DARWIN-32-PIC-NEXT: 	ret
 
@@ -3560,12 +3560,12 @@ entry:
 ; DARWIN-32-DYNAMIC-NEXT: 	ret
 
 ; DARWIN-32-PIC: _moo00:
-; DARWIN-32-PIC: 	call	L49$pb
-; DARWIN-32-PIC-NEXT: L49$pb:
+; DARWIN-32-PIC: 	call	L48$pb
+; DARWIN-32-PIC-NEXT: L48$pb:
 ; DARWIN-32-PIC-NEXT: 	popl	%eax
-; DARWIN-32-PIC-NEXT: 	movl	L_src$non_lazy_ptr-L49$pb(%eax), %ecx
+; DARWIN-32-PIC-NEXT: 	movl	L_src$non_lazy_ptr-L48$pb(%eax), %ecx
 ; DARWIN-32-PIC-NEXT: 	movl	262144(%ecx), %ecx
-; DARWIN-32-PIC-NEXT: 	movl	L_dst$non_lazy_ptr-L49$pb(%eax), %eax
+; DARWIN-32-PIC-NEXT: 	movl	L_dst$non_lazy_ptr-L48$pb(%eax), %eax
 ; DARWIN-32-PIC-NEXT: 	movl	%ecx, 262144(%eax)
 ; DARWIN-32-PIC-NEXT: 	ret
 
@@ -3626,12 +3626,12 @@ entry:
 ; DARWIN-32-DYNAMIC-NEXT: 	ret
 
 ; DARWIN-32-PIC: _moo01:
-; DARWIN-32-PIC: 	call	L50$pb
-; DARWIN-32-PIC-NEXT: L50$pb:
+; DARWIN-32-PIC: 	call	L49$pb
+; DARWIN-32-PIC-NEXT: L49$pb:
 ; DARWIN-32-PIC-NEXT: 	popl	%eax
 ; DARWIN-32-PIC-NEXT: 	movl	$262144, %ecx
-; DARWIN-32-PIC-NEXT: 	addl	L_dst$non_lazy_ptr-L50$pb(%eax), %ecx
-; DARWIN-32-PIC-NEXT: 	movl	L_ptr$non_lazy_ptr-L50$pb(%eax), %eax
+; DARWIN-32-PIC-NEXT: 	addl	L_dst$non_lazy_ptr-L49$pb(%eax), %ecx
+; DARWIN-32-PIC-NEXT: 	movl	L_ptr$non_lazy_ptr-L49$pb(%eax), %eax
 ; DARWIN-32-PIC-NEXT: 	movl	%ecx, (%eax)
 ; DARWIN-32-PIC-NEXT: 	ret
 
@@ -3705,12 +3705,12 @@ entry:
 ; DARWIN-32-DYNAMIC-NEXT: 	ret
 
 ; DARWIN-32-PIC: _moo02:
-; DARWIN-32-PIC: 	call	L51$pb
-; DARWIN-32-PIC-NEXT: L51$pb:
+; DARWIN-32-PIC: 	call	L50$pb
+; DARWIN-32-PIC-NEXT: L50$pb:
 ; DARWIN-32-PIC-NEXT: 	popl	%eax
-; DARWIN-32-PIC-NEXT: 	movl	L_src$non_lazy_ptr-L51$pb(%eax), %ecx
+; DARWIN-32-PIC-NEXT: 	movl	L_src$non_lazy_ptr-L50$pb(%eax), %ecx
 ; DARWIN-32-PIC-NEXT: 	movl	262144(%ecx), %ecx
-; DARWIN-32-PIC-NEXT: 	movl	L_ptr$non_lazy_ptr-L51$pb(%eax), %eax
+; DARWIN-32-PIC-NEXT: 	movl	L_ptr$non_lazy_ptr-L50$pb(%eax), %eax
 ; DARWIN-32-PIC-NEXT: 	movl	(%eax), %eax
 ; DARWIN-32-PIC-NEXT: 	movl	%ecx, 262144(%eax)
 ; DARWIN-32-PIC-NEXT: 	ret
@@ -3778,11 +3778,11 @@ entry:
 ; DARWIN-32-DYNAMIC-NEXT: 	ret
 
 ; DARWIN-32-PIC: _moo03:
-; DARWIN-32-PIC: 	call	L52$pb
-; DARWIN-32-PIC-NEXT: L52$pb:
+; DARWIN-32-PIC: 	call	L51$pb
+; DARWIN-32-PIC-NEXT: L51$pb:
 ; DARWIN-32-PIC-NEXT: 	popl	%eax
-; DARWIN-32-PIC-NEXT: 	movl	(_dsrc-L52$pb)+262144(%eax), %ecx
-; DARWIN-32-PIC-NEXT: 	movl	%ecx, (_ddst-L52$pb)+262144(%eax)
+; DARWIN-32-PIC-NEXT: 	movl	(_dsrc-L51$pb)+262144(%eax), %ecx
+; DARWIN-32-PIC-NEXT: 	movl	%ecx, (_ddst-L51$pb)+262144(%eax)
 ; DARWIN-32-PIC-NEXT: 	ret
 
 ; DARWIN-64-STATIC: _moo03:
@@ -3833,11 +3833,11 @@ entry:
 ; DARWIN-32-DYNAMIC-NEXT: 	ret
 
 ; DARWIN-32-PIC: _moo04:
-; DARWIN-32-PIC: 	call	L53$pb
-; DARWIN-32-PIC-NEXT: L53$pb:
+; DARWIN-32-PIC: 	call	L52$pb
+; DARWIN-32-PIC-NEXT: L52$pb:
 ; DARWIN-32-PIC-NEXT: 	popl	%eax
-; DARWIN-32-PIC-NEXT: 	leal	(_ddst-L53$pb)+262144(%eax), %ecx
-; DARWIN-32-PIC-NEXT: 	movl	%ecx, _dptr-L53$pb(%eax)
+; DARWIN-32-PIC-NEXT: 	leal	(_ddst-L52$pb)+262144(%eax), %ecx
+; DARWIN-32-PIC-NEXT: 	movl	%ecx, _dptr-L52$pb(%eax)
 ; DARWIN-32-PIC-NEXT: 	ret
 
 ; DARWIN-64-STATIC: _moo04:
@@ -3902,11 +3902,11 @@ entry:
 ; DARWIN-32-DYNAMIC-NEXT: 	ret
 
 ; DARWIN-32-PIC: _moo05:
-; DARWIN-32-PIC: 	call	L54$pb
-; DARWIN-32-PIC-NEXT: L54$pb:
+; DARWIN-32-PIC: 	call	L53$pb
+; DARWIN-32-PIC-NEXT: L53$pb:
 ; DARWIN-32-PIC-NEXT: 	popl	%eax
-; DARWIN-32-PIC-NEXT: 	movl	(_dsrc-L54$pb)+262144(%eax), %ecx
-; DARWIN-32-PIC-NEXT: 	movl	_dptr-L54$pb(%eax), %eax
+; DARWIN-32-PIC-NEXT: 	movl	(_dsrc-L53$pb)+262144(%eax), %ecx
+; DARWIN-32-PIC-NEXT: 	movl	_dptr-L53$pb(%eax), %eax
 ; DARWIN-32-PIC-NEXT: 	movl	%ecx, 262144(%eax)
 ; DARWIN-32-PIC-NEXT: 	ret
 
@@ -3965,11 +3965,11 @@ entry:
 ; DARWIN-32-DYNAMIC-NEXT: 	ret
 
 ; DARWIN-32-PIC: _moo06:
-; DARWIN-32-PIC: 	call	L55$pb
-; DARWIN-32-PIC-NEXT: L55$pb:
+; DARWIN-32-PIC: 	call	L54$pb
+; DARWIN-32-PIC-NEXT: L54$pb:
 ; DARWIN-32-PIC-NEXT: 	popl	%eax
-; DARWIN-32-PIC-NEXT: 	movl	(_lsrc-L55$pb)+262144(%eax), %ecx
-; DARWIN-32-PIC-NEXT: 	movl	%ecx, (_ldst-L55$pb)+262144(%eax)
+; DARWIN-32-PIC-NEXT: 	movl	(_lsrc-L54$pb)+262144(%eax), %ecx
+; DARWIN-32-PIC-NEXT: 	movl	%ecx, (_ldst-L54$pb)+262144(%eax)
 ; DARWIN-32-PIC-NEXT: 	ret
 
 ; DARWIN-64-STATIC: _moo06:
@@ -4018,11 +4018,11 @@ entry:
 ; DARWIN-32-DYNAMIC-NEXT: 	ret
 
 ; DARWIN-32-PIC: _moo07:
-; DARWIN-32-PIC: 	call	L56$pb
-; DARWIN-32-PIC-NEXT: L56$pb:
+; DARWIN-32-PIC: 	call	L55$pb
+; DARWIN-32-PIC-NEXT: L55$pb:
 ; DARWIN-32-PIC-NEXT: 	popl	%eax
-; DARWIN-32-PIC-NEXT: 	leal	(_ldst-L56$pb)+262144(%eax), %ecx
-; DARWIN-32-PIC-NEXT: 	movl	%ecx, _lptr-L56$pb(%eax)
+; DARWIN-32-PIC-NEXT: 	leal	(_ldst-L55$pb)+262144(%eax), %ecx
+; DARWIN-32-PIC-NEXT: 	movl	%ecx, _lptr-L55$pb(%eax)
 ; DARWIN-32-PIC-NEXT: 	ret
 
 ; DARWIN-64-STATIC: _moo07:
@@ -4085,11 +4085,11 @@ entry:
 ; DARWIN-32-DYNAMIC-NEXT: 	ret
 
 ; DARWIN-32-PIC: _moo08:
-; DARWIN-32-PIC: 	call	L57$pb
-; DARWIN-32-PIC-NEXT: L57$pb:
+; DARWIN-32-PIC: 	call	L56$pb
+; DARWIN-32-PIC-NEXT: L56$pb:
 ; DARWIN-32-PIC-NEXT: 	popl	%eax
-; DARWIN-32-PIC-NEXT: 	movl	(_lsrc-L57$pb)+262144(%eax), %ecx
-; DARWIN-32-PIC-NEXT: 	movl	_lptr-L57$pb(%eax), %eax
+; DARWIN-32-PIC-NEXT: 	movl	(_lsrc-L56$pb)+262144(%eax), %ecx
+; DARWIN-32-PIC-NEXT: 	movl	_lptr-L56$pb(%eax), %eax
 ; DARWIN-32-PIC-NEXT: 	movl	%ecx, 262144(%eax)
 ; DARWIN-32-PIC-NEXT: 	ret
 
@@ -4159,13 +4159,13 @@ entry:
 ; DARWIN-32-DYNAMIC-NEXT: 	ret
 
 ; DARWIN-32-PIC: _big00:
-; DARWIN-32-PIC: 	call	L58$pb
-; DARWIN-32-PIC-NEXT: L58$pb:
+; DARWIN-32-PIC: 	call	L57$pb
+; DARWIN-32-PIC-NEXT: L57$pb:
 ; DARWIN-32-PIC-NEXT: 	popl	%eax
 ; DARWIN-32-PIC-NEXT: 	movl	4(%esp), %ecx
-; DARWIN-32-PIC-NEXT: 	movl	L_src$non_lazy_ptr-L58$pb(%eax), %edx
+; DARWIN-32-PIC-NEXT: 	movl	L_src$non_lazy_ptr-L57$pb(%eax), %edx
 ; DARWIN-32-PIC-NEXT: 	movl	262144(%edx,%ecx,4), %edx
-; DARWIN-32-PIC-NEXT: 	movl	L_dst$non_lazy_ptr-L58$pb(%eax), %eax
+; DARWIN-32-PIC-NEXT: 	movl	L_dst$non_lazy_ptr-L57$pb(%eax), %eax
 ; DARWIN-32-PIC-NEXT: 	movl	%edx, 262144(%eax,%ecx,4)
 ; DARWIN-32-PIC-NEXT: 	ret
 
@@ -4236,13 +4236,13 @@ entry:
 ; DARWIN-32-DYNAMIC-NEXT: 	ret
 
 ; DARWIN-32-PIC: _big01:
-; DARWIN-32-PIC: 	call	L59$pb
-; DARWIN-32-PIC-NEXT: L59$pb:
+; DARWIN-32-PIC: 	call	L58$pb
+; DARWIN-32-PIC-NEXT: L58$pb:
 ; DARWIN-32-PIC-NEXT: 	popl	%eax
 ; DARWIN-32-PIC-NEXT: 	movl	4(%esp), %ecx
-; DARWIN-32-PIC-NEXT: 	movl	L_dst$non_lazy_ptr-L59$pb(%eax), %edx
+; DARWIN-32-PIC-NEXT: 	movl	L_dst$non_lazy_ptr-L58$pb(%eax), %edx
 ; DARWIN-32-PIC-NEXT: 	leal	262144(%edx,%ecx,4), %ecx
-; DARWIN-32-PIC-NEXT: 	movl	L_ptr$non_lazy_ptr-L59$pb(%eax), %eax
+; DARWIN-32-PIC-NEXT: 	movl	L_ptr$non_lazy_ptr-L58$pb(%eax), %eax
 ; DARWIN-32-PIC-NEXT: 	movl	%ecx, (%eax)
 ; DARWIN-32-PIC-NEXT: 	ret
 
@@ -4322,13 +4322,13 @@ entry:
 ; DARWIN-32-DYNAMIC-NEXT: 	ret
 
 ; DARWIN-32-PIC: _big02:
-; DARWIN-32-PIC: 	call	L60$pb
-; DARWIN-32-PIC-NEXT: L60$pb:
+; DARWIN-32-PIC: 	call	L59$pb
+; DARWIN-32-PIC-NEXT: L59$pb:
 ; DARWIN-32-PIC-NEXT: 	popl	%eax
 ; DARWIN-32-PIC-NEXT: 	movl	4(%esp), %ecx
-; DARWIN-32-PIC-NEXT: 	movl	L_src$non_lazy_ptr-L60$pb(%eax), %edx
+; DARWIN-32-PIC-NEXT: 	movl	L_src$non_lazy_ptr-L59$pb(%eax), %edx
 ; DARWIN-32-PIC-NEXT: 	movl	262144(%edx,%ecx,4), %edx
-; DARWIN-32-PIC-NEXT: 	movl	L_ptr$non_lazy_ptr-L60$pb(%eax), %eax
+; DARWIN-32-PIC-NEXT: 	movl	L_ptr$non_lazy_ptr-L59$pb(%eax), %eax
 ; DARWIN-32-PIC-NEXT: 	movl	(%eax), %eax
 ; DARWIN-32-PIC-NEXT: 	movl	%edx, 262144(%eax,%ecx,4)
 ; DARWIN-32-PIC-NEXT: 	ret
@@ -4403,12 +4403,12 @@ entry:
 ; DARWIN-32-DYNAMIC-NEXT: 	ret
 
 ; DARWIN-32-PIC: _big03:
-; DARWIN-32-PIC: 	call	L61$pb
-; DARWIN-32-PIC-NEXT: L61$pb:
+; DARWIN-32-PIC: 	call	L60$pb
+; DARWIN-32-PIC-NEXT: L60$pb:
 ; DARWIN-32-PIC-NEXT: 	popl	%eax
 ; DARWIN-32-PIC-NEXT: 	movl	4(%esp), %ecx
-; DARWIN-32-PIC-NEXT: 	movl	(_dsrc-L61$pb)+262144(%eax,%ecx,4), %edx
-; DARWIN-32-PIC-NEXT: 	movl	%edx, (_ddst-L61$pb)+262144(%eax,%ecx,4)
+; DARWIN-32-PIC-NEXT: 	movl	(_dsrc-L60$pb)+262144(%eax,%ecx,4), %edx
+; DARWIN-32-PIC-NEXT: 	movl	%edx, (_ddst-L60$pb)+262144(%eax,%ecx,4)
 ; DARWIN-32-PIC-NEXT: 	ret
 
 ; DARWIN-64-STATIC: _big03:
@@ -4476,12 +4476,12 @@ entry:
 ; DARWIN-32-DYNAMIC-NEXT: 	ret
 
 ; DARWIN-32-PIC: _big04:
-; DARWIN-32-PIC: 	call	L62$pb
-; DARWIN-32-PIC-NEXT: L62$pb:
+; DARWIN-32-PIC: 	call	L61$pb
+; DARWIN-32-PIC-NEXT: L61$pb:
 ; DARWIN-32-PIC-NEXT: 	popl	%eax
 ; DARWIN-32-PIC-NEXT: 	movl	4(%esp), %ecx
-; DARWIN-32-PIC-NEXT: 	leal	(_ddst-L62$pb)+262144(%eax,%ecx,4), %ecx
-; DARWIN-32-PIC-NEXT: 	movl	%ecx, _dptr-L62$pb(%eax)
+; DARWIN-32-PIC-NEXT: 	leal	(_ddst-L61$pb)+262144(%eax,%ecx,4), %ecx
+; DARWIN-32-PIC-NEXT: 	movl	%ecx, _dptr-L61$pb(%eax)
 ; DARWIN-32-PIC-NEXT: 	ret
 
 ; DARWIN-64-STATIC: _big04:
@@ -4555,12 +4555,12 @@ entry:
 ; DARWIN-32-DYNAMIC-NEXT: 	ret
 
 ; DARWIN-32-PIC: _big05:
-; DARWIN-32-PIC: 	call	L63$pb
-; DARWIN-32-PIC-NEXT: L63$pb:
+; DARWIN-32-PIC: 	call	L62$pb
+; DARWIN-32-PIC-NEXT: L62$pb:
 ; DARWIN-32-PIC-NEXT: 	popl	%eax
 ; DARWIN-32-PIC-NEXT: 	movl	4(%esp), %ecx
-; DARWIN-32-PIC-NEXT: 	movl	(_dsrc-L63$pb)+262144(%eax,%ecx,4), %edx
-; DARWIN-32-PIC-NEXT: 	movl	_dptr-L63$pb(%eax), %eax
+; DARWIN-32-PIC-NEXT: 	movl	(_dsrc-L62$pb)+262144(%eax,%ecx,4), %edx
+; DARWIN-32-PIC-NEXT: 	movl	_dptr-L62$pb(%eax), %eax
 ; DARWIN-32-PIC-NEXT: 	movl	%edx, 262144(%eax,%ecx,4)
 ; DARWIN-32-PIC-NEXT: 	ret
 
@@ -4631,12 +4631,12 @@ entry:
 ; DARWIN-32-DYNAMIC-NEXT: 	ret
 
 ; DARWIN-32-PIC: _big06:
-; DARWIN-32-PIC: 	call	L64$pb
-; DARWIN-32-PIC-NEXT: L64$pb:
+; DARWIN-32-PIC: 	call	L63$pb
+; DARWIN-32-PIC-NEXT: L63$pb:
 ; DARWIN-32-PIC-NEXT: 	popl	%eax
 ; DARWIN-32-PIC-NEXT: 	movl	4(%esp), %ecx
-; DARWIN-32-PIC-NEXT: 	movl	(_lsrc-L64$pb)+262144(%eax,%ecx,4), %edx
-; DARWIN-32-PIC-NEXT: 	movl	%edx, (_ldst-L64$pb)+262144(%eax,%ecx,4)
+; DARWIN-32-PIC-NEXT: 	movl	(_lsrc-L63$pb)+262144(%eax,%ecx,4), %edx
+; DARWIN-32-PIC-NEXT: 	movl	%edx, (_ldst-L63$pb)+262144(%eax,%ecx,4)
 ; DARWIN-32-PIC-NEXT: 	ret
 
 ; DARWIN-64-STATIC: _big06:
@@ -4703,12 +4703,12 @@ entry:
 ; DARWIN-32-DYNAMIC-NEXT: 	ret
 
 ; DARWIN-32-PIC: _big07:
-; DARWIN-32-PIC: 	call	L65$pb
-; DARWIN-32-PIC-NEXT: L65$pb:
+; DARWIN-32-PIC: 	call	L64$pb
+; DARWIN-32-PIC-NEXT: L64$pb:
 ; DARWIN-32-PIC-NEXT: 	popl	%eax
 ; DARWIN-32-PIC-NEXT: 	movl	4(%esp), %ecx
-; DARWIN-32-PIC-NEXT: 	leal	(_ldst-L65$pb)+262144(%eax,%ecx,4), %ecx
-; DARWIN-32-PIC-NEXT: 	movl	%ecx, _lptr-L65$pb(%eax)
+; DARWIN-32-PIC-NEXT: 	leal	(_ldst-L64$pb)+262144(%eax,%ecx,4), %ecx
+; DARWIN-32-PIC-NEXT: 	movl	%ecx, _lptr-L64$pb(%eax)
 ; DARWIN-32-PIC-NEXT: 	ret
 
 ; DARWIN-64-STATIC: _big07:
@@ -4781,12 +4781,12 @@ entry:
 ; DARWIN-32-DYNAMIC-NEXT: 	ret
 
 ; DARWIN-32-PIC: _big08:
-; DARWIN-32-PIC: 	call	L66$pb
-; DARWIN-32-PIC-NEXT: L66$pb:
+; DARWIN-32-PIC: 	call	L65$pb
+; DARWIN-32-PIC-NEXT: L65$pb:
 ; DARWIN-32-PIC-NEXT: 	popl	%eax
 ; DARWIN-32-PIC-NEXT: 	movl	4(%esp), %ecx
-; DARWIN-32-PIC-NEXT: 	movl	(_lsrc-L66$pb)+262144(%eax,%ecx,4), %edx
-; DARWIN-32-PIC-NEXT: 	movl	_lptr-L66$pb(%eax), %eax
+; DARWIN-32-PIC-NEXT: 	movl	(_lsrc-L65$pb)+262144(%eax,%ecx,4), %edx
+; DARWIN-32-PIC-NEXT: 	movl	_lptr-L65$pb(%eax), %eax
 ; DARWIN-32-PIC-NEXT: 	movl	%edx, 262144(%eax,%ecx,4)
 ; DARWIN-32-PIC-NEXT: 	ret
 
@@ -4840,10 +4840,10 @@ entry:
 ; DARWIN-32-DYNAMIC-NEXT: 	ret
 
 ; DARWIN-32-PIC: _bar00:
-; DARWIN-32-PIC: 	call	L67$pb
-; DARWIN-32-PIC-NEXT: L67$pb:
+; DARWIN-32-PIC: 	call	L66$pb
+; DARWIN-32-PIC-NEXT: L66$pb:
 ; DARWIN-32-PIC-NEXT: 	popl	%eax
-; DARWIN-32-PIC-NEXT: 	movl	L_src$non_lazy_ptr-L67$pb(%eax), %eax
+; DARWIN-32-PIC-NEXT: 	movl	L_src$non_lazy_ptr-L66$pb(%eax), %eax
 ; DARWIN-32-PIC-NEXT: 	ret
 
 ; DARWIN-64-STATIC: _bar00:
@@ -4887,10 +4887,10 @@ entry:
 ; DARWIN-32-DYNAMIC-NEXT: 	ret
 
 ; DARWIN-32-PIC: _bxr00:
-; DARWIN-32-PIC: 	call	L68$pb
-; DARWIN-32-PIC-NEXT: L68$pb:
+; DARWIN-32-PIC: 	call	L67$pb
+; DARWIN-32-PIC-NEXT: L67$pb:
 ; DARWIN-32-PIC-NEXT: 	popl	%eax
-; DARWIN-32-PIC-NEXT: 	movl	L_xsrc$non_lazy_ptr-L68$pb(%eax), %eax
+; DARWIN-32-PIC-NEXT: 	movl	L_xsrc$non_lazy_ptr-L67$pb(%eax), %eax
 ; DARWIN-32-PIC-NEXT: 	ret
 
 ; DARWIN-64-STATIC: _bxr00:
@@ -4934,10 +4934,10 @@ entry:
 ; DARWIN-32-DYNAMIC-NEXT: 	ret
 
 ; DARWIN-32-PIC: _bar01:
-; DARWIN-32-PIC: 	call	L69$pb
-; DARWIN-32-PIC-NEXT: L69$pb:
+; DARWIN-32-PIC: 	call	L68$pb
+; DARWIN-32-PIC-NEXT: L68$pb:
 ; DARWIN-32-PIC-NEXT: 	popl	%eax
-; DARWIN-32-PIC-NEXT: 	movl	L_dst$non_lazy_ptr-L69$pb(%eax), %eax
+; DARWIN-32-PIC-NEXT: 	movl	L_dst$non_lazy_ptr-L68$pb(%eax), %eax
 ; DARWIN-32-PIC-NEXT: 	ret
 
 ; DARWIN-64-STATIC: _bar01:
@@ -4981,10 +4981,10 @@ entry:
 ; DARWIN-32-DYNAMIC-NEXT: 	ret
 
 ; DARWIN-32-PIC: _bxr01:
-; DARWIN-32-PIC: 	call	L70$pb
-; DARWIN-32-PIC-NEXT: L70$pb:
+; DARWIN-32-PIC: 	call	L69$pb
+; DARWIN-32-PIC-NEXT: L69$pb:
 ; DARWIN-32-PIC-NEXT: 	popl	%eax
-; DARWIN-32-PIC-NEXT: 	movl	L_xdst$non_lazy_ptr-L70$pb(%eax), %eax
+; DARWIN-32-PIC-NEXT: 	movl	L_xdst$non_lazy_ptr-L69$pb(%eax), %eax
 ; DARWIN-32-PIC-NEXT: 	ret
 
 ; DARWIN-64-STATIC: _bxr01:
@@ -5028,10 +5028,10 @@ entry:
 ; DARWIN-32-DYNAMIC-NEXT: 	ret
 
 ; DARWIN-32-PIC: _bar02:
-; DARWIN-32-PIC: 	call	L71$pb
-; DARWIN-32-PIC-NEXT: L71$pb:
+; DARWIN-32-PIC: 	call	L70$pb
+; DARWIN-32-PIC-NEXT: L70$pb:
 ; DARWIN-32-PIC-NEXT: 	popl	%eax
-; DARWIN-32-PIC-NEXT: 	movl	L_ptr$non_lazy_ptr-L71$pb(%eax), %eax
+; DARWIN-32-PIC-NEXT: 	movl	L_ptr$non_lazy_ptr-L70$pb(%eax), %eax
 ; DARWIN-32-PIC-NEXT: 	ret
 
 ; DARWIN-64-STATIC: _bar02:
@@ -5075,10 +5075,10 @@ entry:
 ; DARWIN-32-DYNAMIC-NEXT: 	ret
 
 ; DARWIN-32-PIC: _bar03:
-; DARWIN-32-PIC: 	call	L72$pb
-; DARWIN-32-PIC-NEXT: L72$pb:
+; DARWIN-32-PIC: 	call	L71$pb
+; DARWIN-32-PIC-NEXT: L71$pb:
 ; DARWIN-32-PIC-NEXT: 	popl	%eax
-; DARWIN-32-PIC-NEXT: 	leal	_dsrc-L72$pb(%eax), %eax
+; DARWIN-32-PIC-NEXT: 	leal	_dsrc-L71$pb(%eax), %eax
 ; DARWIN-32-PIC-NEXT: 	ret
 
 ; DARWIN-64-STATIC: _bar03:
@@ -5122,10 +5122,10 @@ entry:
 ; DARWIN-32-DYNAMIC-NEXT: 	ret
 
 ; DARWIN-32-PIC: _bar04:
-; DARWIN-32-PIC: 	call	L73$pb
-; DARWIN-32-PIC-NEXT: L73$pb:
+; DARWIN-32-PIC: 	call	L72$pb
+; DARWIN-32-PIC-NEXT: L72$pb:
 ; DARWIN-32-PIC-NEXT: 	popl	%eax
-; DARWIN-32-PIC-NEXT: 	leal	_ddst-L73$pb(%eax), %eax
+; DARWIN-32-PIC-NEXT: 	leal	_ddst-L72$pb(%eax), %eax
 ; DARWIN-32-PIC-NEXT: 	ret
 
 ; DARWIN-64-STATIC: _bar04:
@@ -5169,10 +5169,10 @@ entry:
 ; DARWIN-32-DYNAMIC-NEXT: 	ret
 
 ; DARWIN-32-PIC: _bar05:
-; DARWIN-32-PIC: 	call	L74$pb
-; DARWIN-32-PIC-NEXT: L74$pb:
+; DARWIN-32-PIC: 	call	L73$pb
+; DARWIN-32-PIC-NEXT: L73$pb:
 ; DARWIN-32-PIC-NEXT: 	popl	%eax
-; DARWIN-32-PIC-NEXT: 	leal	_dptr-L74$pb(%eax), %eax
+; DARWIN-32-PIC-NEXT: 	leal	_dptr-L73$pb(%eax), %eax
 ; DARWIN-32-PIC-NEXT: 	ret
 
 ; DARWIN-64-STATIC: _bar05:
@@ -5216,10 +5216,10 @@ entry:
 ; DARWIN-32-DYNAMIC-NEXT: 	ret
 
 ; DARWIN-32-PIC: _bar06:
-; DARWIN-32-PIC: 	call	L75$pb
-; DARWIN-32-PIC-NEXT: L75$pb:
+; DARWIN-32-PIC: 	call	L74$pb
+; DARWIN-32-PIC-NEXT: L74$pb:
 ; DARWIN-32-PIC-NEXT: 	popl	%eax
-; DARWIN-32-PIC-NEXT: 	leal	_lsrc-L75$pb(%eax), %eax
+; DARWIN-32-PIC-NEXT: 	leal	_lsrc-L74$pb(%eax), %eax
 ; DARWIN-32-PIC-NEXT: 	ret
 
 ; DARWIN-64-STATIC: _bar06:
@@ -5263,10 +5263,10 @@ entry:
 ; DARWIN-32-DYNAMIC-NEXT: 	ret
 
 ; DARWIN-32-PIC: _bar07:
-; DARWIN-32-PIC: 	call	L76$pb
-; DARWIN-32-PIC-NEXT: L76$pb:
+; DARWIN-32-PIC: 	call	L75$pb
+; DARWIN-32-PIC-NEXT: L75$pb:
 ; DARWIN-32-PIC-NEXT: 	popl	%eax
-; DARWIN-32-PIC-NEXT: 	leal	_ldst-L76$pb(%eax), %eax
+; DARWIN-32-PIC-NEXT: 	leal	_ldst-L75$pb(%eax), %eax
 ; DARWIN-32-PIC-NEXT: 	ret
 
 ; DARWIN-64-STATIC: _bar07:
@@ -5310,10 +5310,10 @@ entry:
 ; DARWIN-32-DYNAMIC-NEXT: 	ret
 
 ; DARWIN-32-PIC: _bar08:
-; DARWIN-32-PIC: 	call	L77$pb
-; DARWIN-32-PIC-NEXT: L77$pb:
+; DARWIN-32-PIC: 	call	L76$pb
+; DARWIN-32-PIC-NEXT: L76$pb:
 ; DARWIN-32-PIC-NEXT: 	popl	%eax
-; DARWIN-32-PIC-NEXT: 	leal	_lptr-L77$pb(%eax), %eax
+; DARWIN-32-PIC-NEXT: 	leal	_lptr-L76$pb(%eax), %eax
 ; DARWIN-32-PIC-NEXT: 	ret
 
 ; DARWIN-64-STATIC: _bar08:
@@ -5357,10 +5357,10 @@ entry:
 ; DARWIN-32-DYNAMIC-NEXT: 	ret
 
 ; DARWIN-32-PIC: _har00:
-; DARWIN-32-PIC: 	call	L78$pb
-; DARWIN-32-PIC-NEXT: L78$pb:
+; DARWIN-32-PIC: 	call	L77$pb
+; DARWIN-32-PIC-NEXT: L77$pb:
 ; DARWIN-32-PIC-NEXT: 	popl	%eax
-; DARWIN-32-PIC-NEXT: 	movl	L_src$non_lazy_ptr-L78$pb(%eax), %eax
+; DARWIN-32-PIC-NEXT: 	movl	L_src$non_lazy_ptr-L77$pb(%eax), %eax
 ; DARWIN-32-PIC-NEXT: 	ret
 
 ; DARWIN-64-STATIC: _har00:
@@ -5404,10 +5404,10 @@ entry:
 ; DARWIN-32-DYNAMIC-NEXT: 	ret
 
 ; DARWIN-32-PIC: _hxr00:
-; DARWIN-32-PIC: 	call	L79$pb
-; DARWIN-32-PIC-NEXT: L79$pb:
+; DARWIN-32-PIC: 	call	L78$pb
+; DARWIN-32-PIC-NEXT: L78$pb:
 ; DARWIN-32-PIC-NEXT: 	popl	%eax
-; DARWIN-32-PIC-NEXT: 	movl	L_xsrc$non_lazy_ptr-L79$pb(%eax), %eax
+; DARWIN-32-PIC-NEXT: 	movl	L_xsrc$non_lazy_ptr-L78$pb(%eax), %eax
 ; DARWIN-32-PIC-NEXT: 	ret
 
 ; DARWIN-64-STATIC: _hxr00:
@@ -5451,10 +5451,10 @@ entry:
 ; DARWIN-32-DYNAMIC-NEXT: 	ret
 
 ; DARWIN-32-PIC: _har01:
-; DARWIN-32-PIC: 	call	L80$pb
-; DARWIN-32-PIC-NEXT: L80$pb:
+; DARWIN-32-PIC: 	call	L79$pb
+; DARWIN-32-PIC-NEXT: L79$pb:
 ; DARWIN-32-PIC-NEXT: 	popl	%eax
-; DARWIN-32-PIC-NEXT: 	movl	L_dst$non_lazy_ptr-L80$pb(%eax), %eax
+; DARWIN-32-PIC-NEXT: 	movl	L_dst$non_lazy_ptr-L79$pb(%eax), %eax
 ; DARWIN-32-PIC-NEXT: 	ret
 
 ; DARWIN-64-STATIC: _har01:
@@ -5498,10 +5498,10 @@ entry:
 ; DARWIN-32-DYNAMIC-NEXT: 	ret
 
 ; DARWIN-32-PIC: _hxr01:
-; DARWIN-32-PIC: 	call	L81$pb
-; DARWIN-32-PIC-NEXT: L81$pb:
+; DARWIN-32-PIC: 	call	L80$pb
+; DARWIN-32-PIC-NEXT: L80$pb:
 ; DARWIN-32-PIC-NEXT: 	popl	%eax
-; DARWIN-32-PIC-NEXT: 	movl	L_xdst$non_lazy_ptr-L81$pb(%eax), %eax
+; DARWIN-32-PIC-NEXT: 	movl	L_xdst$non_lazy_ptr-L80$pb(%eax), %eax
 ; DARWIN-32-PIC-NEXT: 	ret
 
 ; DARWIN-64-STATIC: _hxr01:
@@ -5549,10 +5549,10 @@ entry:
 ; DARWIN-32-DYNAMIC-NEXT: 	ret
 
 ; DARWIN-32-PIC: _har02:
-; DARWIN-32-PIC: 	call	L82$pb
-; DARWIN-32-PIC-NEXT: L82$pb:
+; DARWIN-32-PIC: 	call	L81$pb
+; DARWIN-32-PIC-NEXT: L81$pb:
 ; DARWIN-32-PIC-NEXT: 	popl	%eax
-; DARWIN-32-PIC-NEXT: 	movl	L_ptr$non_lazy_ptr-L82$pb(%eax), %eax
+; DARWIN-32-PIC-NEXT: 	movl	L_ptr$non_lazy_ptr-L81$pb(%eax), %eax
 ; DARWIN-32-PIC-NEXT: 	movl	(%eax), %eax
 ; DARWIN-32-PIC-NEXT: 	ret
 
@@ -5600,10 +5600,10 @@ entry:
 ; DARWIN-32-DYNAMIC-NEXT: 	ret
 
 ; DARWIN-32-PIC: _har03:
-; DARWIN-32-PIC: 	call	L83$pb
-; DARWIN-32-PIC-NEXT: L83$pb:
+; DARWIN-32-PIC: 	call	L82$pb
+; DARWIN-32-PIC-NEXT: L82$pb:
 ; DARWIN-32-PIC-NEXT: 	popl	%eax
-; DARWIN-32-PIC-NEXT: 	leal	_dsrc-L83$pb(%eax), %eax
+; DARWIN-32-PIC-NEXT: 	leal	_dsrc-L82$pb(%eax), %eax
 ; DARWIN-32-PIC-NEXT: 	ret
 
 ; DARWIN-64-STATIC: _har03:
@@ -5647,10 +5647,10 @@ entry:
 ; DARWIN-32-DYNAMIC-NEXT: 	ret
 
 ; DARWIN-32-PIC: _har04:
-; DARWIN-32-PIC: 	call	L84$pb
-; DARWIN-32-PIC-NEXT: L84$pb:
+; DARWIN-32-PIC: 	call	L83$pb
+; DARWIN-32-PIC-NEXT: L83$pb:
 ; DARWIN-32-PIC-NEXT: 	popl	%eax
-; DARWIN-32-PIC-NEXT: 	leal	_ddst-L84$pb(%eax), %eax
+; DARWIN-32-PIC-NEXT: 	leal	_ddst-L83$pb(%eax), %eax
 ; DARWIN-32-PIC-NEXT: 	ret
 
 ; DARWIN-64-STATIC: _har04:
@@ -5697,10 +5697,10 @@ entry:
 ; DARWIN-32-DYNAMIC-NEXT: 	ret
 
 ; DARWIN-32-PIC: _har05:
-; DARWIN-32-PIC: 	call	L85$pb
-; DARWIN-32-PIC-NEXT: L85$pb:
+; DARWIN-32-PIC: 	call	L84$pb
+; DARWIN-32-PIC-NEXT: L84$pb:
 ; DARWIN-32-PIC-NEXT: 	popl	%eax
-; DARWIN-32-PIC-NEXT: 	movl	_dptr-L85$pb(%eax), %eax
+; DARWIN-32-PIC-NEXT: 	movl	_dptr-L84$pb(%eax), %eax
 ; DARWIN-32-PIC-NEXT: 	ret
 
 ; DARWIN-64-STATIC: _har05:
@@ -5744,10 +5744,10 @@ entry:
 ; DARWIN-32-DYNAMIC-NEXT: 	ret
 
 ; DARWIN-32-PIC: _har06:
-; DARWIN-32-PIC: 	call	L86$pb
-; DARWIN-32-PIC-NEXT: L86$pb:
+; DARWIN-32-PIC: 	call	L85$pb
+; DARWIN-32-PIC-NEXT: L85$pb:
 ; DARWIN-32-PIC-NEXT: 	popl	%eax
-; DARWIN-32-PIC-NEXT: 	leal	_lsrc-L86$pb(%eax), %eax
+; DARWIN-32-PIC-NEXT: 	leal	_lsrc-L85$pb(%eax), %eax
 ; DARWIN-32-PIC-NEXT: 	ret
 
 ; DARWIN-64-STATIC: _har06:
@@ -5791,10 +5791,10 @@ entry:
 ; DARWIN-32-DYNAMIC-NEXT: 	ret
 
 ; DARWIN-32-PIC: _har07:
-; DARWIN-32-PIC: 	call	L87$pb
-; DARWIN-32-PIC-NEXT: L87$pb:
+; DARWIN-32-PIC: 	call	L86$pb
+; DARWIN-32-PIC-NEXT: L86$pb:
 ; DARWIN-32-PIC-NEXT: 	popl	%eax
-; DARWIN-32-PIC-NEXT: 	leal	_ldst-L87$pb(%eax), %eax
+; DARWIN-32-PIC-NEXT: 	leal	_ldst-L86$pb(%eax), %eax
 ; DARWIN-32-PIC-NEXT: 	ret
 
 ; DARWIN-64-STATIC: _har07:
@@ -5840,10 +5840,10 @@ entry:
 ; DARWIN-32-DYNAMIC-NEXT: 	ret
 
 ; DARWIN-32-PIC: _har08:
-; DARWIN-32-PIC: 	call	L88$pb
-; DARWIN-32-PIC-NEXT: L88$pb:
+; DARWIN-32-PIC: 	call	L87$pb
+; DARWIN-32-PIC-NEXT: L87$pb:
 ; DARWIN-32-PIC-NEXT: 	popl	%eax
-; DARWIN-32-PIC-NEXT: 	movl	_lptr-L88$pb(%eax), %eax
+; DARWIN-32-PIC-NEXT: 	movl	_lptr-L87$pb(%eax), %eax
 ; DARWIN-32-PIC-NEXT: 	ret
 
 ; DARWIN-64-STATIC: _har08:
@@ -5889,10 +5889,10 @@ entry:
 ; DARWIN-32-DYNAMIC-NEXT: 	ret
 
 ; DARWIN-32-PIC: _bat00:
-; DARWIN-32-PIC: 	call	L89$pb
-; DARWIN-32-PIC-NEXT: L89$pb:
+; DARWIN-32-PIC: 	call	L88$pb
+; DARWIN-32-PIC-NEXT: L88$pb:
 ; DARWIN-32-PIC-NEXT: 	popl	%eax
-; DARWIN-32-PIC-NEXT: 	movl	L_src$non_lazy_ptr-L89$pb(%eax), %eax
+; DARWIN-32-PIC-NEXT: 	movl	L_src$non_lazy_ptr-L88$pb(%eax), %eax
 ; DARWIN-32-PIC-NEXT: 	addl	$64, %eax
 ; DARWIN-32-PIC-NEXT: 	ret
 
@@ -5942,10 +5942,10 @@ entry:
 ; DARWIN-32-DYNAMIC-NEXT: 	ret
 
 ; DARWIN-32-PIC: _bxt00:
-; DARWIN-32-PIC: 	call	L90$pb
-; DARWIN-32-PIC-NEXT: L90$pb:
+; DARWIN-32-PIC: 	call	L89$pb
+; DARWIN-32-PIC-NEXT: L89$pb:
 ; DARWIN-32-PIC-NEXT: 	popl	%eax
-; DARWIN-32-PIC-NEXT: 	movl	L_xsrc$non_lazy_ptr-L90$pb(%eax), %eax
+; DARWIN-32-PIC-NEXT: 	movl	L_xsrc$non_lazy_ptr-L89$pb(%eax), %eax
 ; DARWIN-32-PIC-NEXT: 	addl	$64, %eax
 ; DARWIN-32-PIC-NEXT: 	ret
 
@@ -5995,10 +5995,10 @@ entry:
 ; DARWIN-32-DYNAMIC-NEXT: 	ret
 
 ; DARWIN-32-PIC: _bat01:
-; DARWIN-32-PIC: 	call	L91$pb
-; DARWIN-32-PIC-NEXT: L91$pb:
+; DARWIN-32-PIC: 	call	L90$pb
+; DARWIN-32-PIC-NEXT: L90$pb:
 ; DARWIN-32-PIC-NEXT: 	popl	%eax
-; DARWIN-32-PIC-NEXT: 	movl	L_dst$non_lazy_ptr-L91$pb(%eax), %eax
+; DARWIN-32-PIC-NEXT: 	movl	L_dst$non_lazy_ptr-L90$pb(%eax), %eax
 ; DARWIN-32-PIC-NEXT: 	addl	$64, %eax
 ; DARWIN-32-PIC-NEXT: 	ret
 
@@ -6048,10 +6048,10 @@ entry:
 ; DARWIN-32-DYNAMIC-NEXT: 	ret
 
 ; DARWIN-32-PIC: _bxt01:
-; DARWIN-32-PIC: 	call	L92$pb
-; DARWIN-32-PIC-NEXT: L92$pb:
+; DARWIN-32-PIC: 	call	L91$pb
+; DARWIN-32-PIC-NEXT: L91$pb:
 ; DARWIN-32-PIC-NEXT: 	popl	%eax
-; DARWIN-32-PIC-NEXT: 	movl	L_xdst$non_lazy_ptr-L92$pb(%eax), %eax
+; DARWIN-32-PIC-NEXT: 	movl	L_xdst$non_lazy_ptr-L91$pb(%eax), %eax
 ; DARWIN-32-PIC-NEXT: 	addl	$64, %eax
 ; DARWIN-32-PIC-NEXT: 	ret
 
@@ -6110,10 +6110,10 @@ entry:
 ; DARWIN-32-DYNAMIC-NEXT: 	ret
 
 ; DARWIN-32-PIC: _bat02:
-; DARWIN-32-PIC: 	call	L93$pb
-; DARWIN-32-PIC-NEXT: L93$pb:
+; DARWIN-32-PIC: 	call	L92$pb
+; DARWIN-32-PIC-NEXT: L92$pb:
 ; DARWIN-32-PIC-NEXT: 	popl	%eax
-; DARWIN-32-PIC-NEXT: 	movl	L_ptr$non_lazy_ptr-L93$pb(%eax), %eax
+; DARWIN-32-PIC-NEXT: 	movl	L_ptr$non_lazy_ptr-L92$pb(%eax), %eax
 ; DARWIN-32-PIC-NEXT: 	movl	(%eax), %eax
 ; DARWIN-32-PIC-NEXT: 	addl	$64, %eax
 ; DARWIN-32-PIC-NEXT: 	ret
@@ -6166,10 +6166,10 @@ entry:
 ; DARWIN-32-DYNAMIC-NEXT: 	ret
 
 ; DARWIN-32-PIC: _bat03:
-; DARWIN-32-PIC: 	call	L94$pb
-; DARWIN-32-PIC-NEXT: L94$pb:
+; DARWIN-32-PIC: 	call	L93$pb
+; DARWIN-32-PIC-NEXT: L93$pb:
 ; DARWIN-32-PIC-NEXT: 	popl	%eax
-; DARWIN-32-PIC-NEXT: 	leal	(_dsrc-L94$pb)+64(%eax), %eax
+; DARWIN-32-PIC-NEXT: 	leal	(_dsrc-L93$pb)+64(%eax), %eax
 ; DARWIN-32-PIC-NEXT: 	ret
 
 ; DARWIN-64-STATIC: _bat03:
@@ -6214,10 +6214,10 @@ entry:
 ; DARWIN-32-DYNAMIC-NEXT: 	ret
 
 ; DARWIN-32-PIC: _bat04:
-; DARWIN-32-PIC: 	call	L95$pb
-; DARWIN-32-PIC-NEXT: L95$pb:
+; DARWIN-32-PIC: 	call	L94$pb
+; DARWIN-32-PIC-NEXT: L94$pb:
 ; DARWIN-32-PIC-NEXT: 	popl	%eax
-; DARWIN-32-PIC-NEXT: 	leal	(_ddst-L95$pb)+64(%eax), %eax
+; DARWIN-32-PIC-NEXT: 	leal	(_ddst-L94$pb)+64(%eax), %eax
 ; DARWIN-32-PIC-NEXT: 	ret
 
 ; DARWIN-64-STATIC: _bat04:
@@ -6271,10 +6271,10 @@ entry:
 ; DARWIN-32-DYNAMIC-NEXT: 	ret
 
 ; DARWIN-32-PIC: _bat05:
-; DARWIN-32-PIC: 	call	L96$pb
-; DARWIN-32-PIC-NEXT: L96$pb:
+; DARWIN-32-PIC: 	call	L95$pb
+; DARWIN-32-PIC-NEXT: L95$pb:
 ; DARWIN-32-PIC-NEXT: 	popl	%eax
-; DARWIN-32-PIC-NEXT: 	movl	_dptr-L96$pb(%eax), %eax
+; DARWIN-32-PIC-NEXT: 	movl	_dptr-L95$pb(%eax), %eax
 ; DARWIN-32-PIC-NEXT: 	addl	$64, %eax
 ; DARWIN-32-PIC-NEXT: 	ret
 
@@ -6322,10 +6322,10 @@ entry:
 ; DARWIN-32-DYNAMIC-NEXT: 	ret
 
 ; DARWIN-32-PIC: _bat06:
-; DARWIN-32-PIC: 	call	L97$pb
-; DARWIN-32-PIC-NEXT: L97$pb:
+; DARWIN-32-PIC: 	call	L96$pb
+; DARWIN-32-PIC-NEXT: L96$pb:
 ; DARWIN-32-PIC-NEXT: 	popl	%eax
-; DARWIN-32-PIC-NEXT: 	leal	(_lsrc-L97$pb)+64(%eax), %eax
+; DARWIN-32-PIC-NEXT: 	leal	(_lsrc-L96$pb)+64(%eax), %eax
 ; DARWIN-32-PIC-NEXT: 	ret
 
 ; DARWIN-64-STATIC: _bat06:
@@ -6369,10 +6369,10 @@ entry:
 ; DARWIN-32-DYNAMIC-NEXT: 	ret
 
 ; DARWIN-32-PIC: _bat07:
-; DARWIN-32-PIC: 	call	L98$pb
-; DARWIN-32-PIC-NEXT: L98$pb:
+; DARWIN-32-PIC: 	call	L97$pb
+; DARWIN-32-PIC-NEXT: L97$pb:
 ; DARWIN-32-PIC-NEXT: 	popl	%eax
-; DARWIN-32-PIC-NEXT: 	leal	(_ldst-L98$pb)+64(%eax), %eax
+; DARWIN-32-PIC-NEXT: 	leal	(_ldst-L97$pb)+64(%eax), %eax
 ; DARWIN-32-PIC-NEXT: 	ret
 
 ; DARWIN-64-STATIC: _bat07:
@@ -6425,10 +6425,10 @@ entry:
 ; DARWIN-32-DYNAMIC-NEXT: 	ret
 
 ; DARWIN-32-PIC: _bat08:
-; DARWIN-32-PIC: 	call	L99$pb
-; DARWIN-32-PIC-NEXT: L99$pb:
+; DARWIN-32-PIC: 	call	L98$pb
+; DARWIN-32-PIC-NEXT: L98$pb:
 ; DARWIN-32-PIC-NEXT: 	popl	%eax
-; DARWIN-32-PIC-NEXT: 	movl	_lptr-L99$pb(%eax), %eax
+; DARWIN-32-PIC-NEXT: 	movl	_lptr-L98$pb(%eax), %eax
 ; DARWIN-32-PIC-NEXT: 	addl	$64, %eax
 ; DARWIN-32-PIC-NEXT: 	ret
 
@@ -6478,11 +6478,11 @@ entry:
 ; DARWIN-32-DYNAMIC-NEXT: 	ret
 
 ; DARWIN-32-PIC: _bam00:
-; DARWIN-32-PIC: 	call	L100$pb
-; DARWIN-32-PIC-NEXT: L100$pb:
+; DARWIN-32-PIC: 	call	L99$pb
+; DARWIN-32-PIC-NEXT: L99$pb:
 ; DARWIN-32-PIC-NEXT: 	popl	%ecx
 ; DARWIN-32-PIC-NEXT: 	movl	$262144, %eax
-; DARWIN-32-PIC-NEXT: 	addl	L_src$non_lazy_ptr-L100$pb(%ecx), %eax
+; DARWIN-32-PIC-NEXT: 	addl	L_src$non_lazy_ptr-L99$pb(%ecx), %eax
 ; DARWIN-32-PIC-NEXT: 	ret
 
 ; DARWIN-64-STATIC: _bam00:
@@ -6531,11 +6531,11 @@ entry:
 ; DARWIN-32-DYNAMIC-NEXT: 	ret
 
 ; DARWIN-32-PIC: _bam01:
-; DARWIN-32-PIC: 	call	L101$pb
-; DARWIN-32-PIC-NEXT: L101$pb:
+; DARWIN-32-PIC: 	call	L100$pb
+; DARWIN-32-PIC-NEXT: L100$pb:
 ; DARWIN-32-PIC-NEXT: 	popl	%ecx
 ; DARWIN-32-PIC-NEXT: 	movl	$262144, %eax
-; DARWIN-32-PIC-NEXT: 	addl	L_dst$non_lazy_ptr-L101$pb(%ecx), %eax
+; DARWIN-32-PIC-NEXT: 	addl	L_dst$non_lazy_ptr-L100$pb(%ecx), %eax
 ; DARWIN-32-PIC-NEXT: 	ret
 
 ; DARWIN-64-STATIC: _bam01:
@@ -6584,11 +6584,11 @@ entry:
 ; DARWIN-32-DYNAMIC-NEXT: 	ret
 
 ; DARWIN-32-PIC: _bxm01:
-; DARWIN-32-PIC: 	call	L102$pb
-; DARWIN-32-PIC-NEXT: L102$pb:
+; DARWIN-32-PIC: 	call	L101$pb
+; DARWIN-32-PIC-NEXT: L101$pb:
 ; DARWIN-32-PIC-NEXT: 	popl	%ecx
 ; DARWIN-32-PIC-NEXT: 	movl	$262144, %eax
-; DARWIN-32-PIC-NEXT: 	addl	L_xdst$non_lazy_ptr-L102$pb(%ecx), %eax
+; DARWIN-32-PIC-NEXT: 	addl	L_xdst$non_lazy_ptr-L101$pb(%ecx), %eax
 ; DARWIN-32-PIC-NEXT: 	ret
 
 ; DARWIN-64-STATIC: _bxm01:
@@ -6646,10 +6646,10 @@ entry:
 ; DARWIN-32-DYNAMIC-NEXT: 	ret
 
 ; DARWIN-32-PIC: _bam02:
-; DARWIN-32-PIC: 	call	L103$pb
-; DARWIN-32-PIC-NEXT: L103$pb:
+; DARWIN-32-PIC: 	call	L102$pb
+; DARWIN-32-PIC-NEXT: L102$pb:
 ; DARWIN-32-PIC-NEXT: 	popl	%eax
-; DARWIN-32-PIC-NEXT: 	movl	L_ptr$non_lazy_ptr-L103$pb(%eax), %ecx
+; DARWIN-32-PIC-NEXT: 	movl	L_ptr$non_lazy_ptr-L102$pb(%eax), %ecx
 ; DARWIN-32-PIC-NEXT: 	movl	$262144, %eax
 ; DARWIN-32-PIC-NEXT: 	addl	(%ecx), %eax
 ; DARWIN-32-PIC-NEXT: 	ret
@@ -6702,10 +6702,10 @@ entry:
 ; DARWIN-32-DYNAMIC-NEXT: 	ret
 
 ; DARWIN-32-PIC: _bam03:
-; DARWIN-32-PIC: 	call	L104$pb
-; DARWIN-32-PIC-NEXT: L104$pb:
+; DARWIN-32-PIC: 	call	L103$pb
+; DARWIN-32-PIC-NEXT: L103$pb:
 ; DARWIN-32-PIC-NEXT: 	popl	%eax
-; DARWIN-32-PIC-NEXT: 	leal	(_dsrc-L104$pb)+262144(%eax), %eax
+; DARWIN-32-PIC-NEXT: 	leal	(_dsrc-L103$pb)+262144(%eax), %eax
 ; DARWIN-32-PIC-NEXT: 	ret
 
 ; DARWIN-64-STATIC: _bam03:
@@ -6750,10 +6750,10 @@ entry:
 ; DARWIN-32-DYNAMIC-NEXT: 	ret
 
 ; DARWIN-32-PIC: _bam04:
-; DARWIN-32-PIC: 	call	L105$pb
-; DARWIN-32-PIC-NEXT: L105$pb:
+; DARWIN-32-PIC: 	call	L104$pb
+; DARWIN-32-PIC-NEXT: L104$pb:
 ; DARWIN-32-PIC-NEXT: 	popl	%eax
-; DARWIN-32-PIC-NEXT: 	leal	(_ddst-L105$pb)+262144(%eax), %eax
+; DARWIN-32-PIC-NEXT: 	leal	(_ddst-L104$pb)+262144(%eax), %eax
 ; DARWIN-32-PIC-NEXT: 	ret
 
 ; DARWIN-64-STATIC: _bam04:
@@ -6807,11 +6807,11 @@ entry:
 ; DARWIN-32-DYNAMIC-NEXT: 	ret
 
 ; DARWIN-32-PIC: _bam05:
-; DARWIN-32-PIC: 	call	L106$pb
-; DARWIN-32-PIC-NEXT: L106$pb:
+; DARWIN-32-PIC: 	call	L105$pb
+; DARWIN-32-PIC-NEXT: L105$pb:
 ; DARWIN-32-PIC-NEXT: 	popl	%ecx
 ; DARWIN-32-PIC-NEXT: 	movl	$262144, %eax
-; DARWIN-32-PIC-NEXT: 	addl	_dptr-L106$pb(%ecx), %eax
+; DARWIN-32-PIC-NEXT: 	addl	_dptr-L105$pb(%ecx), %eax
 ; DARWIN-32-PIC-NEXT: 	ret
 
 ; DARWIN-64-STATIC: _bam05:
@@ -6858,10 +6858,10 @@ entry:
 ; DARWIN-32-DYNAMIC-NEXT: 	ret
 
 ; DARWIN-32-PIC: _bam06:
-; DARWIN-32-PIC: 	call	L107$pb
-; DARWIN-32-PIC-NEXT: L107$pb:
+; DARWIN-32-PIC: 	call	L106$pb
+; DARWIN-32-PIC-NEXT: L106$pb:
 ; DARWIN-32-PIC-NEXT: 	popl	%eax
-; DARWIN-32-PIC-NEXT: 	leal	(_lsrc-L107$pb)+262144(%eax), %eax
+; DARWIN-32-PIC-NEXT: 	leal	(_lsrc-L106$pb)+262144(%eax), %eax
 ; DARWIN-32-PIC-NEXT: 	ret
 
 ; DARWIN-64-STATIC: _bam06:
@@ -6905,10 +6905,10 @@ entry:
 ; DARWIN-32-DYNAMIC-NEXT: 	ret
 
 ; DARWIN-32-PIC: _bam07:
-; DARWIN-32-PIC: 	call	L108$pb
-; DARWIN-32-PIC-NEXT: L108$pb:
+; DARWIN-32-PIC: 	call	L107$pb
+; DARWIN-32-PIC-NEXT: L107$pb:
 ; DARWIN-32-PIC-NEXT: 	popl	%eax
-; DARWIN-32-PIC-NEXT: 	leal	(_ldst-L108$pb)+262144(%eax), %eax
+; DARWIN-32-PIC-NEXT: 	leal	(_ldst-L107$pb)+262144(%eax), %eax
 ; DARWIN-32-PIC-NEXT: 	ret
 
 ; DARWIN-64-STATIC: _bam07:
@@ -6961,11 +6961,11 @@ entry:
 ; DARWIN-32-DYNAMIC-NEXT: 	ret
 
 ; DARWIN-32-PIC: _bam08:
-; DARWIN-32-PIC: 	call	L109$pb
-; DARWIN-32-PIC-NEXT: L109$pb:
+; DARWIN-32-PIC: 	call	L108$pb
+; DARWIN-32-PIC-NEXT: L108$pb:
 ; DARWIN-32-PIC-NEXT: 	popl	%ecx
 ; DARWIN-32-PIC-NEXT: 	movl	$262144, %eax
-; DARWIN-32-PIC-NEXT: 	addl	_lptr-L109$pb(%ecx), %eax
+; DARWIN-32-PIC-NEXT: 	addl	_lptr-L108$pb(%ecx), %eax
 ; DARWIN-32-PIC-NEXT: 	ret
 
 ; DARWIN-64-STATIC: _bam08:
@@ -7021,11 +7021,11 @@ entry:
 ; DARWIN-32-DYNAMIC-NEXT: 	ret
 
 ; DARWIN-32-PIC: _cat00:
-; DARWIN-32-PIC: 	call	L110$pb
-; DARWIN-32-PIC-NEXT: L110$pb:
+; DARWIN-32-PIC: 	call	L109$pb
+; DARWIN-32-PIC-NEXT: L109$pb:
 ; DARWIN-32-PIC-NEXT: 	popl	%eax
 ; DARWIN-32-PIC-NEXT: 	movl	4(%esp), %ecx
-; DARWIN-32-PIC-NEXT: 	movl	L_src$non_lazy_ptr-L110$pb(%eax), %eax
+; DARWIN-32-PIC-NEXT: 	movl	L_src$non_lazy_ptr-L109$pb(%eax), %eax
 ; DARWIN-32-PIC-NEXT: 	leal	64(%eax,%ecx,4), %eax
 ; DARWIN-32-PIC-NEXT: 	ret
 
@@ -7082,11 +7082,11 @@ entry:
 ; DARWIN-32-DYNAMIC-NEXT: 	ret
 
 ; DARWIN-32-PIC: _cxt00:
-; DARWIN-32-PIC: 	call	L111$pb
-; DARWIN-32-PIC-NEXT: L111$pb:
+; DARWIN-32-PIC: 	call	L110$pb
+; DARWIN-32-PIC-NEXT: L110$pb:
 ; DARWIN-32-PIC-NEXT: 	popl	%eax
 ; DARWIN-32-PIC-NEXT: 	movl	4(%esp), %ecx
-; DARWIN-32-PIC-NEXT: 	movl	L_xsrc$non_lazy_ptr-L111$pb(%eax), %eax
+; DARWIN-32-PIC-NEXT: 	movl	L_xsrc$non_lazy_ptr-L110$pb(%eax), %eax
 ; DARWIN-32-PIC-NEXT: 	leal	64(%eax,%ecx,4), %eax
 ; DARWIN-32-PIC-NEXT: 	ret
 
@@ -7143,11 +7143,11 @@ entry:
 ; DARWIN-32-DYNAMIC-NEXT: 	ret
 
 ; DARWIN-32-PIC: _cat01:
-; DARWIN-32-PIC: 	call	L112$pb
-; DARWIN-32-PIC-NEXT: L112$pb:
+; DARWIN-32-PIC: 	call	L111$pb
+; DARWIN-32-PIC-NEXT: L111$pb:
 ; DARWIN-32-PIC-NEXT: 	popl	%eax
 ; DARWIN-32-PIC-NEXT: 	movl	4(%esp), %ecx
-; DARWIN-32-PIC-NEXT: 	movl	L_dst$non_lazy_ptr-L112$pb(%eax), %eax
+; DARWIN-32-PIC-NEXT: 	movl	L_dst$non_lazy_ptr-L111$pb(%eax), %eax
 ; DARWIN-32-PIC-NEXT: 	leal	64(%eax,%ecx,4), %eax
 ; DARWIN-32-PIC-NEXT: 	ret
 
@@ -7204,11 +7204,11 @@ entry:
 ; DARWIN-32-DYNAMIC-NEXT: 	ret
 
 ; DARWIN-32-PIC: _cxt01:
-; DARWIN-32-PIC: 	call	L113$pb
-; DARWIN-32-PIC-NEXT: L113$pb:
+; DARWIN-32-PIC: 	call	L112$pb
+; DARWIN-32-PIC-NEXT: L112$pb:
 ; DARWIN-32-PIC-NEXT: 	popl	%eax
 ; DARWIN-32-PIC-NEXT: 	movl	4(%esp), %ecx
-; DARWIN-32-PIC-NEXT: 	movl	L_xdst$non_lazy_ptr-L113$pb(%eax), %eax
+; DARWIN-32-PIC-NEXT: 	movl	L_xdst$non_lazy_ptr-L112$pb(%eax), %eax
 ; DARWIN-32-PIC-NEXT: 	leal	64(%eax,%ecx,4), %eax
 ; DARWIN-32-PIC-NEXT: 	ret
 
@@ -7272,10 +7272,10 @@ entry:
 ; DARWIN-32-DYNAMIC-NEXT: 	ret
 
 ; DARWIN-32-PIC: _cat02:
-; DARWIN-32-PIC: 	call	L114$pb
-; DARWIN-32-PIC-NEXT: L114$pb:
+; DARWIN-32-PIC: 	call	L113$pb
+; DARWIN-32-PIC-NEXT: L113$pb:
 ; DARWIN-32-PIC-NEXT: 	popl	%eax
-; DARWIN-32-PIC-NEXT: 	movl	L_ptr$non_lazy_ptr-L114$pb(%eax), %eax
+; DARWIN-32-PIC-NEXT: 	movl	L_ptr$non_lazy_ptr-L113$pb(%eax), %eax
 ; DARWIN-32-PIC-NEXT: 	movl	(%eax), %eax
 ; DARWIN-32-PIC-NEXT: 	movl	4(%esp), %ecx
 ; DARWIN-32-PIC-NEXT: 	leal	64(%eax,%ecx,4), %eax
@@ -7336,11 +7336,11 @@ entry:
 ; DARWIN-32-DYNAMIC-NEXT: 	ret
 
 ; DARWIN-32-PIC: _cat03:
-; DARWIN-32-PIC: 	call	L115$pb
-; DARWIN-32-PIC-NEXT: L115$pb:
+; DARWIN-32-PIC: 	call	L114$pb
+; DARWIN-32-PIC-NEXT: L114$pb:
 ; DARWIN-32-PIC-NEXT: 	popl	%eax
 ; DARWIN-32-PIC-NEXT: 	movl	4(%esp), %ecx
-; DARWIN-32-PIC-NEXT: 	leal	(_dsrc-L115$pb)+64(%eax,%ecx,4), %eax
+; DARWIN-32-PIC-NEXT: 	leal	(_dsrc-L114$pb)+64(%eax,%ecx,4), %eax
 ; DARWIN-32-PIC-NEXT: 	ret
 
 ; DARWIN-64-STATIC: _cat03:
@@ -7395,11 +7395,11 @@ entry:
 ; DARWIN-32-DYNAMIC-NEXT: 	ret
 
 ; DARWIN-32-PIC: _cat04:
-; DARWIN-32-PIC: 	call	L116$pb
-; DARWIN-32-PIC-NEXT: L116$pb:
+; DARWIN-32-PIC: 	call	L115$pb
+; DARWIN-32-PIC-NEXT: L115$pb:
 ; DARWIN-32-PIC-NEXT: 	popl	%eax
 ; DARWIN-32-PIC-NEXT: 	movl	4(%esp), %ecx
-; DARWIN-32-PIC-NEXT: 	leal	(_ddst-L116$pb)+64(%eax,%ecx,4), %eax
+; DARWIN-32-PIC-NEXT: 	leal	(_ddst-L115$pb)+64(%eax,%ecx,4), %eax
 ; DARWIN-32-PIC-NEXT: 	ret
 
 ; DARWIN-64-STATIC: _cat04:
@@ -7461,11 +7461,11 @@ entry:
 ; DARWIN-32-DYNAMIC-NEXT: 	ret
 
 ; DARWIN-32-PIC: _cat05:
-; DARWIN-32-PIC: 	call	L117$pb
-; DARWIN-32-PIC-NEXT: L117$pb:
+; DARWIN-32-PIC: 	call	L116$pb
+; DARWIN-32-PIC-NEXT: L116$pb:
 ; DARWIN-32-PIC-NEXT: 	popl	%eax
 ; DARWIN-32-PIC-NEXT: 	movl	4(%esp), %ecx
-; DARWIN-32-PIC-NEXT: 	movl	_dptr-L117$pb(%eax), %eax
+; DARWIN-32-PIC-NEXT: 	movl	_dptr-L116$pb(%eax), %eax
 ; DARWIN-32-PIC-NEXT: 	leal	64(%eax,%ecx,4), %eax
 ; DARWIN-32-PIC-NEXT: 	ret
 
@@ -7521,11 +7521,11 @@ entry:
 ; DARWIN-32-DYNAMIC-NEXT: 	ret
 
 ; DARWIN-32-PIC: _cat06:
-; DARWIN-32-PIC: 	call	L118$pb
-; DARWIN-32-PIC-NEXT: L118$pb:
+; DARWIN-32-PIC: 	call	L117$pb
+; DARWIN-32-PIC-NEXT: L117$pb:
 ; DARWIN-32-PIC-NEXT: 	popl	%eax
 ; DARWIN-32-PIC-NEXT: 	movl	4(%esp), %ecx
-; DARWIN-32-PIC-NEXT: 	leal	(_lsrc-L118$pb)+64(%eax,%ecx,4), %eax
+; DARWIN-32-PIC-NEXT: 	leal	(_lsrc-L117$pb)+64(%eax,%ecx,4), %eax
 ; DARWIN-32-PIC-NEXT: 	ret
 
 ; DARWIN-64-STATIC: _cat06:
@@ -7580,11 +7580,11 @@ entry:
 ; DARWIN-32-DYNAMIC-NEXT: 	ret
 
 ; DARWIN-32-PIC: _cat07:
-; DARWIN-32-PIC: 	call	L119$pb
-; DARWIN-32-PIC-NEXT: L119$pb:
+; DARWIN-32-PIC: 	call	L118$pb
+; DARWIN-32-PIC-NEXT: L118$pb:
 ; DARWIN-32-PIC-NEXT: 	popl	%eax
 ; DARWIN-32-PIC-NEXT: 	movl	4(%esp), %ecx
-; DARWIN-32-PIC-NEXT: 	leal	(_ldst-L119$pb)+64(%eax,%ecx,4), %eax
+; DARWIN-32-PIC-NEXT: 	leal	(_ldst-L118$pb)+64(%eax,%ecx,4), %eax
 ; DARWIN-32-PIC-NEXT: 	ret
 
 ; DARWIN-64-STATIC: _cat07:
@@ -7645,11 +7645,11 @@ entry:
 ; DARWIN-32-DYNAMIC-NEXT: 	ret
 
 ; DARWIN-32-PIC: _cat08:
-; DARWIN-32-PIC: 	call	L120$pb
-; DARWIN-32-PIC-NEXT: L120$pb:
+; DARWIN-32-PIC: 	call	L119$pb
+; DARWIN-32-PIC-NEXT: L119$pb:
 ; DARWIN-32-PIC-NEXT: 	popl	%eax
 ; DARWIN-32-PIC-NEXT: 	movl	4(%esp), %ecx
-; DARWIN-32-PIC-NEXT: 	movl	_lptr-L120$pb(%eax), %eax
+; DARWIN-32-PIC-NEXT: 	movl	_lptr-L119$pb(%eax), %eax
 ; DARWIN-32-PIC-NEXT: 	leal	64(%eax,%ecx,4), %eax
 ; DARWIN-32-PIC-NEXT: 	ret
 
@@ -7706,11 +7706,11 @@ entry:
 ; DARWIN-32-DYNAMIC-NEXT: 	ret
 
 ; DARWIN-32-PIC: _cam00:
-; DARWIN-32-PIC: 	call	L121$pb
-; DARWIN-32-PIC-NEXT: L121$pb:
+; DARWIN-32-PIC: 	call	L120$pb
+; DARWIN-32-PIC-NEXT: L120$pb:
 ; DARWIN-32-PIC-NEXT: 	popl	%eax
 ; DARWIN-32-PIC-NEXT: 	movl	4(%esp), %ecx
-; DARWIN-32-PIC-NEXT: 	movl	L_src$non_lazy_ptr-L121$pb(%eax), %eax
+; DARWIN-32-PIC-NEXT: 	movl	L_src$non_lazy_ptr-L120$pb(%eax), %eax
 ; DARWIN-32-PIC-NEXT: 	leal	262144(%eax,%ecx,4), %eax
 ; DARWIN-32-PIC-NEXT: 	ret
 
@@ -7767,11 +7767,11 @@ entry:
 ; DARWIN-32-DYNAMIC-NEXT: 	ret
 
 ; DARWIN-32-PIC: _cxm00:
-; DARWIN-32-PIC: 	call	L122$pb
-; DARWIN-32-PIC-NEXT: L122$pb:
+; DARWIN-32-PIC: 	call	L121$pb
+; DARWIN-32-PIC-NEXT: L121$pb:
 ; DARWIN-32-PIC-NEXT: 	popl	%eax
 ; DARWIN-32-PIC-NEXT: 	movl	4(%esp), %ecx
-; DARWIN-32-PIC-NEXT: 	movl	L_xsrc$non_lazy_ptr-L122$pb(%eax), %eax
+; DARWIN-32-PIC-NEXT: 	movl	L_xsrc$non_lazy_ptr-L121$pb(%eax), %eax
 ; DARWIN-32-PIC-NEXT: 	leal	262144(%eax,%ecx,4), %eax
 ; DARWIN-32-PIC-NEXT: 	ret
 
@@ -7828,11 +7828,11 @@ entry:
 ; DARWIN-32-DYNAMIC-NEXT: 	ret
 
 ; DARWIN-32-PIC: _cam01:
-; DARWIN-32-PIC: 	call	L123$pb
-; DARWIN-32-PIC-NEXT: L123$pb:
+; DARWIN-32-PIC: 	call	L122$pb
+; DARWIN-32-PIC-NEXT: L122$pb:
 ; DARWIN-32-PIC-NEXT: 	popl	%eax
 ; DARWIN-32-PIC-NEXT: 	movl	4(%esp), %ecx
-; DARWIN-32-PIC-NEXT: 	movl	L_dst$non_lazy_ptr-L123$pb(%eax), %eax
+; DARWIN-32-PIC-NEXT: 	movl	L_dst$non_lazy_ptr-L122$pb(%eax), %eax
 ; DARWIN-32-PIC-NEXT: 	leal	262144(%eax,%ecx,4), %eax
 ; DARWIN-32-PIC-NEXT: 	ret
 
@@ -7889,11 +7889,11 @@ entry:
 ; DARWIN-32-DYNAMIC-NEXT: 	ret
 
 ; DARWIN-32-PIC: _cxm01:
-; DARWIN-32-PIC: 	call	L124$pb
-; DARWIN-32-PIC-NEXT: L124$pb:
+; DARWIN-32-PIC: 	call	L123$pb
+; DARWIN-32-PIC-NEXT: L123$pb:
 ; DARWIN-32-PIC-NEXT: 	popl	%eax
 ; DARWIN-32-PIC-NEXT: 	movl	4(%esp), %ecx
-; DARWIN-32-PIC-NEXT: 	movl	L_xdst$non_lazy_ptr-L124$pb(%eax), %eax
+; DARWIN-32-PIC-NEXT: 	movl	L_xdst$non_lazy_ptr-L123$pb(%eax), %eax
 ; DARWIN-32-PIC-NEXT: 	leal	262144(%eax,%ecx,4), %eax
 ; DARWIN-32-PIC-NEXT: 	ret
 
@@ -7957,10 +7957,10 @@ entry:
 ; DARWIN-32-DYNAMIC-NEXT: 	ret
 
 ; DARWIN-32-PIC: _cam02:
-; DARWIN-32-PIC: 	call	L125$pb
-; DARWIN-32-PIC-NEXT: L125$pb:
+; DARWIN-32-PIC: 	call	L124$pb
+; DARWIN-32-PIC-NEXT: L124$pb:
 ; DARWIN-32-PIC-NEXT: 	popl	%eax
-; DARWIN-32-PIC-NEXT: 	movl	L_ptr$non_lazy_ptr-L125$pb(%eax), %eax
+; DARWIN-32-PIC-NEXT: 	movl	L_ptr$non_lazy_ptr-L124$pb(%eax), %eax
 ; DARWIN-32-PIC-NEXT: 	movl	(%eax), %eax
 ; DARWIN-32-PIC-NEXT: 	movl	4(%esp), %ecx
 ; DARWIN-32-PIC-NEXT: 	leal	262144(%eax,%ecx,4), %eax
@@ -8021,11 +8021,11 @@ entry:
 ; DARWIN-32-DYNAMIC-NEXT: 	ret
 
 ; DARWIN-32-PIC: _cam03:
-; DARWIN-32-PIC: 	call	L126$pb
-; DARWIN-32-PIC-NEXT: L126$pb:
+; DARWIN-32-PIC: 	call	L125$pb
+; DARWIN-32-PIC-NEXT: L125$pb:
 ; DARWIN-32-PIC-NEXT: 	popl	%eax
 ; DARWIN-32-PIC-NEXT: 	movl	4(%esp), %ecx
-; DARWIN-32-PIC-NEXT: 	leal	(_dsrc-L126$pb)+262144(%eax,%ecx,4), %eax
+; DARWIN-32-PIC-NEXT: 	leal	(_dsrc-L125$pb)+262144(%eax,%ecx,4), %eax
 ; DARWIN-32-PIC-NEXT: 	ret
 
 ; DARWIN-64-STATIC: _cam03:
@@ -8080,11 +8080,11 @@ entry:
 ; DARWIN-32-DYNAMIC-NEXT: 	ret
 
 ; DARWIN-32-PIC: _cam04:
-; DARWIN-32-PIC: 	call	L127$pb
-; DARWIN-32-PIC-NEXT: L127$pb:
+; DARWIN-32-PIC: 	call	L126$pb
+; DARWIN-32-PIC-NEXT: L126$pb:
 ; DARWIN-32-PIC-NEXT: 	popl	%eax
 ; DARWIN-32-PIC-NEXT: 	movl	4(%esp), %ecx
-; DARWIN-32-PIC-NEXT: 	leal	(_ddst-L127$pb)+262144(%eax,%ecx,4), %eax
+; DARWIN-32-PIC-NEXT: 	leal	(_ddst-L126$pb)+262144(%eax,%ecx,4), %eax
 ; DARWIN-32-PIC-NEXT: 	ret
 
 ; DARWIN-64-STATIC: _cam04:
@@ -8146,11 +8146,11 @@ entry:
 ; DARWIN-32-DYNAMIC-NEXT: 	ret
 
 ; DARWIN-32-PIC: _cam05:
-; DARWIN-32-PIC: 	call	L128$pb
-; DARWIN-32-PIC-NEXT: L128$pb:
+; DARWIN-32-PIC: 	call	L127$pb
+; DARWIN-32-PIC-NEXT: L127$pb:
 ; DARWIN-32-PIC-NEXT: 	popl	%eax
 ; DARWIN-32-PIC-NEXT: 	movl	4(%esp), %ecx
-; DARWIN-32-PIC-NEXT: 	movl	_dptr-L128$pb(%eax), %eax
+; DARWIN-32-PIC-NEXT: 	movl	_dptr-L127$pb(%eax), %eax
 ; DARWIN-32-PIC-NEXT: 	leal	262144(%eax,%ecx,4), %eax
 ; DARWIN-32-PIC-NEXT: 	ret
 
@@ -8206,11 +8206,11 @@ entry:
 ; DARWIN-32-DYNAMIC-NEXT: 	ret
 
 ; DARWIN-32-PIC: _cam06:
-; DARWIN-32-PIC: 	call	L129$pb
-; DARWIN-32-PIC-NEXT: L129$pb:
+; DARWIN-32-PIC: 	call	L128$pb
+; DARWIN-32-PIC-NEXT: L128$pb:
 ; DARWIN-32-PIC-NEXT: 	popl	%eax
 ; DARWIN-32-PIC-NEXT: 	movl	4(%esp), %ecx
-; DARWIN-32-PIC-NEXT: 	leal	(_lsrc-L129$pb)+262144(%eax,%ecx,4), %eax
+; DARWIN-32-PIC-NEXT: 	leal	(_lsrc-L128$pb)+262144(%eax,%ecx,4), %eax
 ; DARWIN-32-PIC-NEXT: 	ret
 
 ; DARWIN-64-STATIC: _cam06:
@@ -8265,11 +8265,11 @@ entry:
 ; DARWIN-32-DYNAMIC-NEXT: 	ret
 
 ; DARWIN-32-PIC: _cam07:
-; DARWIN-32-PIC: 	call	L130$pb
-; DARWIN-32-PIC-NEXT: L130$pb:
+; DARWIN-32-PIC: 	call	L129$pb
+; DARWIN-32-PIC-NEXT: L129$pb:
 ; DARWIN-32-PIC-NEXT: 	popl	%eax
 ; DARWIN-32-PIC-NEXT: 	movl	4(%esp), %ecx
-; DARWIN-32-PIC-NEXT: 	leal	(_ldst-L130$pb)+262144(%eax,%ecx,4), %eax
+; DARWIN-32-PIC-NEXT: 	leal	(_ldst-L129$pb)+262144(%eax,%ecx,4), %eax
 ; DARWIN-32-PIC-NEXT: 	ret
 
 ; DARWIN-64-STATIC: _cam07:
@@ -8330,11 +8330,11 @@ entry:
 ; DARWIN-32-DYNAMIC-NEXT: 	ret
 
 ; DARWIN-32-PIC: _cam08:
-; DARWIN-32-PIC: 	call	L131$pb
-; DARWIN-32-PIC-NEXT: L131$pb:
+; DARWIN-32-PIC: 	call	L130$pb
+; DARWIN-32-PIC-NEXT: L130$pb:
 ; DARWIN-32-PIC-NEXT: 	popl	%eax
 ; DARWIN-32-PIC-NEXT: 	movl	4(%esp), %ecx
-; DARWIN-32-PIC-NEXT: 	movl	_lptr-L131$pb(%eax), %eax
+; DARWIN-32-PIC-NEXT: 	movl	_lptr-L130$pb(%eax), %eax
 ; DARWIN-32-PIC-NEXT: 	leal	262144(%eax,%ecx,4), %eax
 ; DARWIN-32-PIC-NEXT: 	ret
 
@@ -8644,10 +8644,10 @@ entry:
 ; DARWIN-32-DYNAMIC-NEXT: 	ret
 
 ; DARWIN-32-PIC: _address:
-; DARWIN-32-PIC: 	call	L134$pb
-; DARWIN-32-PIC-NEXT: L134$pb:
+; DARWIN-32-PIC: 	call	L133$pb
+; DARWIN-32-PIC-NEXT: L133$pb:
 ; DARWIN-32-PIC-NEXT: 	popl	%eax
-; DARWIN-32-PIC-NEXT: 	movl	L_callee$non_lazy_ptr-L134$pb(%eax), %eax
+; DARWIN-32-PIC-NEXT: 	movl	L_callee$non_lazy_ptr-L133$pb(%eax), %eax
 ; DARWIN-32-PIC-NEXT: 	ret
 
 ; DARWIN-64-STATIC: _address:
@@ -8693,10 +8693,10 @@ entry:
 ; DARWIN-32-DYNAMIC-NEXT: 	ret
 
 ; DARWIN-32-PIC: _laddress:
-; DARWIN-32-PIC: 	call	L135$pb
-; DARWIN-32-PIC-NEXT: L135$pb:
+; DARWIN-32-PIC: 	call	L134$pb
+; DARWIN-32-PIC-NEXT: L134$pb:
 ; DARWIN-32-PIC-NEXT: 	popl	%eax
-; DARWIN-32-PIC-NEXT: 	leal	_lcallee-L135$pb(%eax), %eax
+; DARWIN-32-PIC-NEXT: 	leal	_lcallee-L134$pb(%eax), %eax
 ; DARWIN-32-PIC-NEXT: 	ret
 
 ; DARWIN-64-STATIC: _laddress:
@@ -8740,10 +8740,10 @@ entry:
 ; DARWIN-32-DYNAMIC-NEXT: 	ret
 
 ; DARWIN-32-PIC: _daddress:
-; DARWIN-32-PIC: 	call	L136$pb
-; DARWIN-32-PIC-NEXT: L136$pb:
+; DARWIN-32-PIC: 	call	L135$pb
+; DARWIN-32-PIC-NEXT: L135$pb:
 ; DARWIN-32-PIC-NEXT: 	popl	%eax
-; DARWIN-32-PIC-NEXT: 	leal	_dcallee-L136$pb(%eax), %eax
+; DARWIN-32-PIC-NEXT: 	leal	_dcallee-L135$pb(%eax), %eax
 ; DARWIN-32-PIC-NEXT: 	ret
 
 ; DARWIN-64-STATIC: _daddress:
@@ -9224,10 +9224,10 @@ entry:
 ; DARWIN-32-PIC: _icaller:
 ; DARWIN-32-PIC: 	pushl	%esi
 ; DARWIN-32-PIC-NEXT: 	subl	$8, %esp
-; DARWIN-32-PIC-NEXT: 	call	L143$pb
-; DARWIN-32-PIC-NEXT: L143$pb:
+; DARWIN-32-PIC-NEXT: 	call	L142$pb
+; DARWIN-32-PIC-NEXT: L142$pb:
 ; DARWIN-32-PIC-NEXT: 	popl	%eax
-; DARWIN-32-PIC-NEXT: 	movl	L_ifunc$non_lazy_ptr-L143$pb(%eax), %esi
+; DARWIN-32-PIC-NEXT: 	movl	L_ifunc$non_lazy_ptr-L142$pb(%eax), %esi
 ; DARWIN-32-PIC-NEXT: 	call	*(%esi)
 ; DARWIN-32-PIC-NEXT: 	call	*(%esi)
 ; DARWIN-32-PIC-NEXT: 	addl	$8, %esp
@@ -9310,11 +9310,11 @@ entry:
 ; DARWIN-32-PIC: _dicaller:
 ; DARWIN-32-PIC: 	pushl	%esi
 ; DARWIN-32-PIC-NEXT: 	subl	$8, %esp
-; DARWIN-32-PIC-NEXT: 	call	L144$pb
-; DARWIN-32-PIC-NEXT: L144$pb:
+; DARWIN-32-PIC-NEXT: 	call	L143$pb
+; DARWIN-32-PIC-NEXT: L143$pb:
 ; DARWIN-32-PIC-NEXT: 	popl	%esi
-; DARWIN-32-PIC-NEXT: 	call	*_difunc-L144$pb(%esi)
-; DARWIN-32-PIC-NEXT: 	call	*_difunc-L144$pb(%esi)
+; DARWIN-32-PIC-NEXT: 	call	*_difunc-L143$pb(%esi)
+; DARWIN-32-PIC-NEXT: 	call	*_difunc-L143$pb(%esi)
 ; DARWIN-32-PIC-NEXT: 	addl	$8, %esp
 ; DARWIN-32-PIC-NEXT: 	popl	%esi
 ; DARWIN-32-PIC-NEXT: 	ret
@@ -9391,11 +9391,11 @@ entry:
 ; DARWIN-32-PIC: _licaller:
 ; DARWIN-32-PIC: 	pushl	%esi
 ; DARWIN-32-PIC-NEXT: 	subl	$8, %esp
-; DARWIN-32-PIC-NEXT: 	call	L145$pb
-; DARWIN-32-PIC-NEXT: L145$pb:
+; DARWIN-32-PIC-NEXT: 	call	L144$pb
+; DARWIN-32-PIC-NEXT: L144$pb:
 ; DARWIN-32-PIC-NEXT: 	popl	%esi
-; DARWIN-32-PIC-NEXT: 	call	*_lifunc-L145$pb(%esi)
-; DARWIN-32-PIC-NEXT: 	call	*_lifunc-L145$pb(%esi)
+; DARWIN-32-PIC-NEXT: 	call	*_lifunc-L144$pb(%esi)
+; DARWIN-32-PIC-NEXT: 	call	*_lifunc-L144$pb(%esi)
 ; DARWIN-32-PIC-NEXT: 	addl	$8, %esp
 ; DARWIN-32-PIC-NEXT: 	popl	%esi
 ; DARWIN-32-PIC-NEXT: 	ret
@@ -9476,10 +9476,10 @@ entry:
 ; DARWIN-32-PIC: _itailcaller:
 ; DARWIN-32-PIC: 	pushl	%esi
 ; DARWIN-32-PIC-NEXT: 	subl	$8, %esp
-; DARWIN-32-PIC-NEXT: 	call	L146$pb
-; DARWIN-32-PIC-NEXT: L146$pb:
+; DARWIN-32-PIC-NEXT: 	call	L145$pb
+; DARWIN-32-PIC-NEXT: L145$pb:
 ; DARWIN-32-PIC-NEXT: 	popl	%eax
-; DARWIN-32-PIC-NEXT: 	movl	L_ifunc$non_lazy_ptr-L146$pb(%eax), %esi
+; DARWIN-32-PIC-NEXT: 	movl	L_ifunc$non_lazy_ptr-L145$pb(%eax), %esi
 ; DARWIN-32-PIC-NEXT: 	call	*(%esi)
 ; DARWIN-32-PIC-NEXT: 	call	*(%esi)
 ; DARWIN-32-PIC-NEXT: 	addl	$8, %esp
@@ -9553,10 +9553,10 @@ entry:
 
 ; DARWIN-32-PIC: _ditailcaller:
 ; DARWIN-32-PIC: 	subl	$12, %esp
-; DARWIN-32-PIC-NEXT: 	call	L147$pb
-; DARWIN-32-PIC-NEXT: L147$pb:
+; DARWIN-32-PIC-NEXT: 	call	L146$pb
+; DARWIN-32-PIC-NEXT: L146$pb:
 ; DARWIN-32-PIC-NEXT: 	popl	%eax
-; DARWIN-32-PIC-NEXT: 	call	*_difunc-L147$pb(%eax)
+; DARWIN-32-PIC-NEXT: 	call	*_difunc-L146$pb(%eax)
 ; DARWIN-32-PIC-NEXT: 	addl	$12, %esp
 ; DARWIN-32-PIC-NEXT: 	ret
 
@@ -9619,10 +9619,10 @@ entry:
 
 ; DARWIN-32-PIC: _litailcaller:
 ; DARWIN-32-PIC: 	subl	$12, %esp
-; DARWIN-32-PIC-NEXT: 	call	L148$pb
-; DARWIN-32-PIC-NEXT: L148$pb:
+; DARWIN-32-PIC-NEXT: 	call	L147$pb
+; DARWIN-32-PIC-NEXT: L147$pb:
 ; DARWIN-32-PIC-NEXT: 	popl	%eax
-; DARWIN-32-PIC-NEXT: 	call	*_lifunc-L148$pb(%eax)
+; DARWIN-32-PIC-NEXT: 	call	*_lifunc-L147$pb(%eax)
 ; DARWIN-32-PIC-NEXT: 	addl	$12, %esp
 ; DARWIN-32-PIC-NEXT: 	ret
 
diff --git a/test/CodeGen/X86/alignment.ll b/test/CodeGen/X86/alignment.ll
new file mode 100644
index 000000000000..9678e6df740e
--- /dev/null
+++ b/test/CodeGen/X86/alignment.ll
@@ -0,0 +1,43 @@
+; RUN: llc %s -o - -mtriple=x86_64-linux-gnu | FileCheck %s
+
+; This cannot get rounded up to the preferred alignment (16) if they have an
+; explicit alignment specified.
+@GlobalA = global { [384 x i8] } zeroinitializer, align 8 
+
+; CHECK:	.bss
+; CHECK:	.globl	GlobalA
+; CHECK:	.align	16
+; CHECK: GlobalA:
+; CHECK:	.zero	384
+
+; Common variables should not get rounded up to the preferred alignment (16) if
+; they have an explicit alignment specified.
+; PR6921
+@GlobalB = common global { [384 x i8] } zeroinitializer, align 8
+
+; CHECK: 	.comm	GlobalB,384,16
+
+
+@GlobalC = common global { [384 x i8] } zeroinitializer, align 2
+
+; CHECK: 	.comm	GlobalC,384,16
+
+
+
+; This cannot get rounded up to the preferred alignment (16) if they have an
+; explicit alignment specified *and* a section specified.
+@GlobalAS = global { [384 x i8] } zeroinitializer, align 8, section "foo"
+
+; CHECK:	.globl	GlobalAS
+; CHECK:	.align	8
+; CHECK: GlobalAS:
+; CHECK:	.zero	384
+
+; Common variables should not get rounded up to the preferred alignment (16) if
+; they have an explicit alignment specified and a section specified.
+; PR6921
+@GlobalBS = common global { [384 x i8] } zeroinitializer, align 8, section "foo"
+; CHECK: 	.comm	GlobalBS,384,8
+
+@GlobalCS = common global { [384 x i8] } zeroinitializer, align 2, section "foo"
+; CHECK: 	.comm	GlobalCS,384,2
+\ No newline at end of file
diff --git a/test/CodeGen/X86/atomic_add.ll b/test/CodeGen/X86/atomic_add.ll
index d00f8e861c21..26d25e24dfbc 100644
--- a/test/CodeGen/X86/atomic_add.ll
+++ b/test/CodeGen/X86/atomic_add.ll
@@ -192,7 +192,7 @@ entry:
 define void @sub2(i16* nocapture %p, i32 %v) nounwind ssp {
 entry:
 ; CHECK: sub2:
-; CHECK: subw
+; CHECK: negl
 	%0 = trunc i32 %v to i16		; <i16> [#uses=1]
 	%1 = tail call i16 @llvm.atomic.load.sub.i16.p0i16(i16* %p, i16 %0)		; <i16> [#uses=0]
 	ret void
diff --git a/test/CodeGen/X86/avoid-loop-align.ll b/test/CodeGen/X86/avoid-loop-align.ll
index d4c5c6723243..7957db72fe6d 100644
--- a/test/CodeGen/X86/avoid-loop-align.ll
+++ b/test/CodeGen/X86/avoid-loop-align.ll
@@ -3,9 +3,9 @@
 ; CodeGen should align the top of the loop, which differs from the loop
 ; header in this case.
 
-; CHECK: jmp LBB1_2
+; CHECK: jmp LBB0_2
 ; CHECK: .align
-; CHECK: LBB1_1:
+; CHECK: LBB0_1:
 
 @A = common global [100 x i32] zeroinitializer, align 32		; <[100 x i32]*> [#uses=1]
 
diff --git a/test/CodeGen/X86/brcond.ll b/test/CodeGen/X86/brcond.ll
index 130483ad8410..5cdc1000f3cd 100644
--- a/test/CodeGen/X86/brcond.ll
+++ b/test/CodeGen/X86/brcond.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -mtriple=i386-apple-darwin10 | FileCheck %s
+; RUN: llc < %s -mtriple=i386-apple-darwin10 -mcpu=core2 | FileCheck %s
 ; rdar://7475489
 
 define i32 @test1(i32 %a, i32 %b) nounwind ssp {
@@ -46,7 +46,7 @@ return:                                           ; preds = %entry
 ; CHECK: test2:
 ; CHECK:	movl	4(%esp), %eax
 ; CHECK-NEXT:	orl	8(%esp), %eax
-; CHECK-NEXT:	jne	LBB2_2
+; CHECK-NEXT:	jne	LBB1_2
 }
 
 ; PR3351 - (P != 0) | (Q != 0) -> (P|Q) != 0
@@ -65,5 +65,44 @@ return:                                           ; preds = %entry
 ; CHECK: test3:
 ; CHECK:	movl	4(%esp), %eax
 ; CHECK-NEXT:	orl	8(%esp), %eax
-; CHECK-NEXT:	je	LBB3_2
+; CHECK-NEXT:	je	LBB2_2
+}
+
+; <rdar://problem/7598384>:
+;
+;    jCC  L1
+;    jmp  L2
+; L1:
+;   ...
+; L2:
+;   ...
+;
+; to:
+; 
+;    jnCC L2
+; L1:
+;   ...
+; L2:
+;   ...
+define float @test4(float %x, float %y) nounwind readnone optsize ssp {
+entry:
+  %0 = fpext float %x to double                   ; <double> [#uses=1]
+  %1 = fpext float %y to double                   ; <double> [#uses=1]
+  %2 = fmul double %0, %1                         ; <double> [#uses=3]
+  %3 = fcmp oeq double %2, 0.000000e+00           ; <i1> [#uses=1]
+  br i1 %3, label %bb2, label %bb1
+
+; CHECK:      jne
+; CHECK-NEXT: jnp
+; CHECK-NOT:  jmp
+; CHECK:      LBB
+
+bb1:                                              ; preds = %entry
+  %4 = fadd double %2, -1.000000e+00              ; <double> [#uses=1]
+  br label %bb2
+
+bb2:                                              ; preds = %entry, %bb1
+  %.0.in = phi double [ %4, %bb1 ], [ %2, %entry ] ; <double> [#uses=1]
+  %.0 = fptrunc double %.0.in to float            ; <float> [#uses=1]
+  ret float %.0
 }
diff --git a/test/CodeGen/X86/const-select.ll b/test/CodeGen/X86/const-select.ll
index ca8cc1464c77..665984ce28ed 100644
--- a/test/CodeGen/X86/const-select.ll
+++ b/test/CodeGen/X86/const-select.ll
@@ -2,7 +2,7 @@
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
 target triple = "i386-apple-darwin7"
 
-; RUN: llc < %s | grep {LCPI1_0(,%eax,4)}
+; RUN: llc < %s | grep {LCPI0_0(,%eax,4)}
 define float @f(i32 %x) nounwind readnone {
 entry:
 	%0 = icmp eq i32 %x, 0		; <i1> [#uses=1]
diff --git a/test/CodeGen/X86/constant-pool-sharing.ll b/test/CodeGen/X86/constant-pool-sharing.ll
index c3e97adffb19..33de5767ad65 100644
--- a/test/CodeGen/X86/constant-pool-sharing.ll
+++ b/test/CodeGen/X86/constant-pool-sharing.ll
@@ -3,7 +3,7 @@
 ; llc should share constant pool entries between this integer vector
 ; and this floating-point vector since they have the same encoding.
 
-; CHECK:  LCPI1_0(%rip), %xmm0
+; CHECK:  LCPI0_0(%rip), %xmm0
 ; CHECK:  movaps        %xmm0, (%rdi)
 ; CHECK:  movaps        %xmm0, (%rsi)
 
diff --git a/test/CodeGen/X86/convert-2-addr-3-addr-inc64.ll b/test/CodeGen/X86/convert-2-addr-3-addr-inc64.ll
index 8e38fe309f7c..17cb2b305721 100644
--- a/test/CodeGen/X86/convert-2-addr-3-addr-inc64.ll
+++ b/test/CodeGen/X86/convert-2-addr-3-addr-inc64.ll
@@ -1,5 +1,5 @@
 ; RUN: llc < %s -march=x86-64 -o %t -stats -info-output-file - | \
-; RUN:   grep {asm-printer} | grep {Number of machine instrs printed} | grep 10
+; RUN:   grep {asm-printer} | grep {Number of machine instrs printed} | grep 9
 ; RUN: grep {leal	1(\%rsi),} %t
 
 define fastcc zeroext i8 @fullGtU(i32 %i1, i32 %i2, i8* %ptr) nounwind optsize {
diff --git a/test/CodeGen/X86/crash.ll b/test/CodeGen/X86/crash.ll
index 4b7c85099389..2f27f35f0acd 100644
--- a/test/CodeGen/X86/crash.ll
+++ b/test/CodeGen/X86/crash.ll
@@ -92,3 +92,41 @@ foo:
 }
 
 
+; Crash commoning identical asms.
+; PR6803
+define void @test6(i1 %C) nounwind optsize ssp {
+entry:
+  br i1 %C, label %do.body55, label %do.body92
+
+do.body55:                                        ; preds = %if.else36
+  call void asm sideeffect "foo", "~{dirflag},~{fpsr},~{flags}"() nounwind, !srcloc !0
+  ret void
+
+do.body92:                                        ; preds = %if.then66
+  call void asm sideeffect "foo", "~{dirflag},~{fpsr},~{flags}"() nounwind, !srcloc !1
+  ret void
+}
+
+!0 = metadata !{i32 633550}                       
+!1 = metadata !{i32 634261}                       
+
+
+; Crash during XOR optimization.
+; <rdar://problem/7869290>
+
+define void @test7() nounwind ssp {
+entry:
+  br i1 undef, label %bb14, label %bb67
+
+bb14:
+  %tmp0 = trunc i16 undef to i1
+  %tmp1 = load i8* undef, align 8
+  %tmp2 = shl i8 %tmp1, 4
+  %tmp3 = lshr i8 %tmp2, 7
+  %tmp4 = trunc i8 %tmp3 to i1
+  %tmp5 = icmp ne i1 %tmp0, %tmp4
+  br i1 %tmp5, label %bb14, label %bb67
+
+bb67:
+  ret void
+}
diff --git a/test/CodeGen/X86/dbg-byval-parameter.ll b/test/CodeGen/X86/dbg-byval-parameter.ll
new file mode 100644
index 000000000000..5e5577620d97
--- /dev/null
+++ b/test/CodeGen/X86/dbg-byval-parameter.ll
@@ -0,0 +1,45 @@
+; RUN: llc  -march=x86 -asm-verbose < %s | grep DW_TAG_formal_parameter
+
+
+%struct.Pt = type { double, double }
+%struct.Rect = type { %struct.Pt, %struct.Pt }
+
+define double @foo(%struct.Rect* byval %my_r0) nounwind ssp {
+entry:
+  %retval = alloca double                         ; <double*> [#uses=2]
+  %0 = alloca double                              ; <double*> [#uses=2]
+  %"alloca point" = bitcast i32 0 to i32          ; <i32> [#uses=0]
+  call void @llvm.dbg.declare(metadata !{%struct.Rect* %my_r0}, metadata !0), !dbg !15
+  %1 = getelementptr inbounds %struct.Rect* %my_r0, i32 0, i32 0, !dbg !16 ; <%struct.Pt*> [#uses=1]
+  %2 = getelementptr inbounds %struct.Pt* %1, i32 0, i32 0, !dbg !16 ; <double*> [#uses=1]
+  %3 = load double* %2, align 8, !dbg !16         ; <double> [#uses=1]
+  store double %3, double* %0, align 8, !dbg !16
+  %4 = load double* %0, align 8, !dbg !16         ; <double> [#uses=1]
+  store double %4, double* %retval, align 8, !dbg !16
+  br label %return, !dbg !16
+
+return:                                           ; preds = %entry
+  %retval1 = load double* %retval, !dbg !16       ; <double> [#uses=1]
+  ret double %retval1, !dbg !16
+}
+
+declare void @llvm.dbg.declare(metadata, metadata) nounwind readnone
+
+!0 = metadata !{i32 524545, metadata !1, metadata !"my_r0", metadata !2, i32 11, metadata !7} ; [ DW_TAG_arg_variable ]
+!1 = metadata !{i32 524334, i32 0, metadata !2, metadata !"foo", metadata !"foo", metadata !"foo", metadata !2, i32 11, metadata !4, i1 false, i1 true, i32 0, i32 0, null, i1 false} ; [ DW_TAG_subprogram ]
+!2 = metadata !{i32 524329, metadata !"b2.c", metadata !"/tmp/", metadata !3} ; [ DW_TAG_file_type ]
+!3 = metadata !{i32 524305, i32 0, i32 1, metadata !"b2.c", metadata !"/tmp/", metadata !"4.2.1 (Based on Apple Inc. build 5658) (LLVM build)", i1 true, i1 false, metadata !"", i32 0} ; [ DW_TAG_compile_unit ]
+!4 = metadata !{i32 524309, metadata !2, metadata !"", metadata !2, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !5, i32 0, null} ; [ DW_TAG_subroutine_type ]
+!5 = metadata !{metadata !6, metadata !7}
+!6 = metadata !{i32 524324, metadata !2, metadata !"double", metadata !2, i32 0, i64 64, i64 64, i64 0, i32 0, i32 4} ; [ DW_TAG_base_type ]
+!7 = metadata !{i32 524307, metadata !2, metadata !"Rect", metadata !2, i32 6, i64 256, i64 64, i64 0, i32 0, null, metadata !8, i32 0, null} ; [ DW_TAG_structure_type ]
+!8 = metadata !{metadata !9, metadata !14}
+!9 = metadata !{i32 524301, metadata !7, metadata !"P1", metadata !2, i32 7, i64 128, i64 64, i64 0, i32 0, metadata !10} ; [ DW_TAG_member ]
+!10 = metadata !{i32 524307, metadata !2, metadata !"Pt", metadata !2, i32 1, i64 128, i64 64, i64 0, i32 0, null, metadata !11, i32 0, null} ; [ DW_TAG_structure_type ]
+!11 = metadata !{metadata !12, metadata !13}
+!12 = metadata !{i32 524301, metadata !10, metadata !"x", metadata !2, i32 2, i64 64, i64 64, i64 0, i32 0, metadata !6} ; [ DW_TAG_member ]
+!13 = metadata !{i32 524301, metadata !10, metadata !"y", metadata !2, i32 3, i64 64, i64 64, i64 64, i32 0, metadata !6} ; [ DW_TAG_member ]
+!14 = metadata !{i32 524301, metadata !7, metadata !"P2", metadata !2, i32 8, i64 128, i64 64, i64 128, i32 0, metadata !10} ; [ DW_TAG_member ]
+!15 = metadata !{i32 11, i32 0, metadata !1, null}
+!16 = metadata !{i32 12, i32 0, metadata !17, null}
+!17 = metadata !{i32 524299, metadata !1, i32 11, i32 0} ; [ DW_TAG_lexical_block ]
diff --git a/test/CodeGen/X86/dllexport.ll b/test/CodeGen/X86/dllexport.ll
index 2c699bfb0db4..bdbaac05f118 100644
--- a/test/CodeGen/X86/dllexport.ll
+++ b/test/CodeGen/X86/dllexport.ll
@@ -9,4 +9,4 @@ entry:
 }
 
 ; CHECK: .section .drectve
-; CHECK: -export:@foo@0
-\ No newline at end of file
+; CHECK: -export:@foo@0
diff --git a/test/CodeGen/X86/fast-isel-constpool.ll b/test/CodeGen/X86/fast-isel-constpool.ll
index 84d10f32c294..323c8533cec2 100644
--- a/test/CodeGen/X86/fast-isel-constpool.ll
+++ b/test/CodeGen/X86/fast-isel-constpool.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -fast-isel | grep {LCPI1_0(%rip)}
+; RUN: llc < %s -fast-isel | grep {LCPI0_0(%rip)}
 ; Make sure fast isel uses rip-relative addressing when required.
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
 target triple = "x86_64-apple-darwin9.0"
diff --git a/test/CodeGen/X86/fast-isel-phys.ll b/test/CodeGen/X86/fast-isel-phys.ll
deleted file mode 100644
index 158ef551ce42..000000000000
--- a/test/CodeGen/X86/fast-isel-phys.ll
+++ /dev/null
@@ -1,11 +0,0 @@
-; RUN: llc < %s -fast-isel -fast-isel-abort -march=x86
-
-define i8 @t2(i8 %a, i8 %c) nounwind {
-       %tmp = shl i8 %a, %c
-       ret i8 %tmp
-}
-
-define i8 @t1(i8 %a) nounwind {
-       %tmp = mul i8 %a, 17
-       ret i8 %tmp
-}
diff --git a/test/CodeGen/X86/fast-isel-trunc.ll b/test/CodeGen/X86/fast-isel-trunc.ll
deleted file mode 100644
index 69b26c5442e4..000000000000
--- a/test/CodeGen/X86/fast-isel-trunc.ll
+++ /dev/null
@@ -1,12 +0,0 @@
-; RUN: llc < %s -march=x86 -fast-isel -fast-isel-abort
-; RUN: llc < %s -march=x86-64 -fast-isel -fast-isel-abort
-
-define i8 @t1(i32 %x) signext nounwind  {
-	%tmp1 = trunc i32 %x to i8
-	ret i8 %tmp1
-}
-
-define i8 @t2(i16 signext %x) signext nounwind  {
-	%tmp1 = trunc i16 %x to i8
-	ret i8 %tmp1
-}
diff --git a/test/CodeGen/X86/fast-isel.ll b/test/CodeGen/X86/fast-isel.ll
index 84b3fd7caf3a..3d26ae7018b5 100644
--- a/test/CodeGen/X86/fast-isel.ll
+++ b/test/CodeGen/X86/fast-isel.ll
@@ -1,4 +1,5 @@
 ; RUN: llc < %s -fast-isel -fast-isel-abort -march=x86 -mattr=sse2
+; RUN: llc < %s -fast-isel -fast-isel-abort -march=x86-64
 
 ; This tests very minimal fast-isel functionality.
 
@@ -65,6 +66,26 @@ define i8* @inttoptr_i32(i32 %p) nounwind {
   ret i8* %t
 }
 
+define i8 @trunc_i32_i8(i32 %x) signext nounwind  {
+	%tmp1 = trunc i32 %x to i8
+	ret i8 %tmp1
+}
+
+define i8 @trunc_i16_i8(i16 signext %x) signext nounwind  {
+	%tmp1 = trunc i16 %x to i8
+	ret i8 %tmp1
+}
+
+define i8 @shl_i8(i8 %a, i8 %c) nounwind {
+       %tmp = shl i8 %a, %c
+       ret i8 %tmp
+}
+
+define i8 @mul_i8(i8 %a) nounwind {
+       %tmp = mul i8 %a, 17
+       ret i8 %tmp
+}
+
 define void @store_i1(i1* %p, i1 %t) nounwind {
   store i1 %t, i1* %p
   ret void
diff --git a/test/CodeGen/X86/field-extract-use-trunc.ll b/test/CodeGen/X86/field-extract-use-trunc.ll
index 60205305a977..735e1341f65b 100644
--- a/test/CodeGen/X86/field-extract-use-trunc.ll
+++ b/test/CodeGen/X86/field-extract-use-trunc.ll
@@ -1,38 +1,38 @@
 ; RUN: llc < %s -march=x86 | grep sar | count 1
 ; RUN: llc < %s -march=x86-64 | not grep sar
 
-define i32 @test(i32 %f12) {
+define i32 @test(i32 %f12) nounwind {
 	%tmp7.25 = lshr i32 %f12, 16		
 	%tmp7.26 = trunc i32 %tmp7.25 to i8
 	%tmp78.2 = sext i8 %tmp7.26 to i32
 	ret i32 %tmp78.2
 }
 
-define i32 @test2(i32 %f12) {
+define i32 @test2(i32 %f12) nounwind {
 	%f11 = shl i32 %f12, 8
 	%tmp7.25 = ashr i32 %f11, 24
 	ret i32 %tmp7.25
 }
 
-define i32 @test3(i32 %f12) {
+define i32 @test3(i32 %f12) nounwind {
 	%f11 = shl i32 %f12, 13
 	%tmp7.25 = ashr i32 %f11, 24
 	ret i32 %tmp7.25
 }
 
-define i64 @test4(i64 %f12) {
+define i64 @test4(i64 %f12) nounwind {
 	%f11 = shl i64 %f12, 32
 	%tmp7.25 = ashr i64 %f11, 32
 	ret i64 %tmp7.25
 }
 
-define i16 @test5(i16 %f12) {
+define i16 @test5(i16 %f12) nounwind {
 	%f11 = shl i16 %f12, 2
 	%tmp7.25 = ashr i16 %f11, 8
 	ret i16 %tmp7.25
 }
 
-define i16 @test6(i16 %f12) {
+define i16 @test6(i16 %f12) nounwind {
 	%f11 = shl i16 %f12, 8
 	%tmp7.25 = ashr i16 %f11, 8
 	ret i16 %tmp7.25
diff --git a/test/CodeGen/X86/fold-pcmpeqd-0.ll b/test/CodeGen/X86/fold-pcmpeqd-0.ll
index ef5202f554c5..e5be58e1aaa3 100644
--- a/test/CodeGen/X86/fold-pcmpeqd-0.ll
+++ b/test/CodeGen/X86/fold-pcmpeqd-0.ll
@@ -1,5 +1,5 @@
 ; RUN: llc < %s -mtriple=i386-apple-darwin -mcpu=yonah  | not grep pcmpeqd
-; RUN: llc < %s -mtriple=i386-apple-darwin -mcpu=yonah  | grep orps | grep CPI1_2  | count 2
+; RUN: llc < %s -mtriple=i386-apple-darwin -mcpu=yonah  | grep orps | grep CPI0_2  | count 2
 ; RUN: llc < %s -mtriple=x86_64-apple-darwin | grep pcmpeqd | count 1
 
 ; This testcase shouldn't need to spill the -1 value,
diff --git a/test/CodeGen/X86/fp-elim.ll b/test/CodeGen/X86/fp-elim.ll
new file mode 100644
index 000000000000..60892a2352fb
--- /dev/null
+++ b/test/CodeGen/X86/fp-elim.ll
@@ -0,0 +1,44 @@
+; RUN: llc < %s -march=x86 -asm-verbose=false                           | FileCheck %s -check-prefix=FP-ELIM
+; RUN: llc < %s -march=x86 -asm-verbose=false -disable-fp-elim          | FileCheck %s -check-prefix=NO-ELIM
+; RUN: llc < %s -march=x86 -asm-verbose=false -disable-non-leaf-fp-elim | FileCheck %s -check-prefix=NON-LEAF
+
+; Implement -momit-leaf-frame-pointer
+; rdar://7886181
+
+define i32 @t1() nounwind readnone {
+entry:
+; FP-ELIM:      t1:
+; FP-ELIM-NEXT: movl
+; FP-ELIM-NEXT: ret
+
+; NO-ELIM:      t1:
+; NO-ELIM-NEXT: pushl %ebp
+; NO-ELIM:      popl %ebp
+; NO-ELIM-NEXT: ret
+
+; NON-LEAF:      t1:
+; NON-LEAF-NEXT: movl
+; NON-LEAF-NEXT: ret
+  ret i32 10
+}
+
+define void @t2() nounwind {
+entry:
+; FP-ELIM:     t2:
+; FP-ELIM-NOT: pushl %ebp
+; FP-ELIM:     ret
+
+; NO-ELIM:      t2:
+; NO-ELIM-NEXT: pushl %ebp
+; NO-ELIM:      popl %ebp
+; NO-ELIM-NEXT: ret
+
+; NON-LEAF:      t2:
+; NON-LEAF-NEXT: pushl %ebp
+; NON-LEAF:      popl %ebp
+; NON-LEAF-NEXT: ret
+  tail call void @foo(i32 0) nounwind
+  ret void
+}
+
+declare void @foo(i32)
diff --git a/test/CodeGen/X86/global-sections.ll b/test/CodeGen/X86/global-sections.ll
index d79c56bc4637..6d211913b015 100644
--- a/test/CodeGen/X86/global-sections.ll
+++ b/test/CodeGen/X86/global-sections.ll
@@ -1,5 +1,6 @@
 ; RUN: llc < %s -mtriple=i386-unknown-linux-gnu | FileCheck %s -check-prefix=LINUX
 ; RUN: llc < %s -mtriple=i386-apple-darwin9.7 | FileCheck %s -check-prefix=DARWIN
+; RUN: llc < %s -mtriple=i386-unknown-linux-gnu -fdata-sections | FileCheck %s -check-prefix=LINUX-SECTIONS
 
 
 ; int G1;
@@ -32,6 +33,12 @@
 ; DARWIN: _G3:
 ; DARWIN:     .long _G1
 
+; LINUX:   .section        .rodata,"a",@progbits
+; LINUX:   .globl  G3
+
+; LINUX-SECTIONS: .section        .rodata.G3,"a",@progbits
+; LINUX-SECTIONS: .globl  G3
+
 
 ; _Complex long long const G4 = 34;
 @G4 = constant {i64,i64} { i64 34, i64 0 }
@@ -97,6 +104,9 @@
 ; LINUX: G7:
 ; LINUX:	.asciz	"abcdefghi"
 
+; LINUX-SECTIONS: .section        .rodata.G7,"aMS",@progbits,1
+; LINUX-SECTIONS:	.globl G7
+
 
 @G8 = constant [4 x i16] [ i16 1, i16 2, i16 3, i16 0 ]
 
@@ -134,3 +144,17 @@
 ; LINUX: G10:
 ; LINUX:	.zero	400
 
+
+
+;; Zero sized objects should round up to 1 byte in zerofill directives.
+; rdar://7886017
+@G11 = global [0 x i32] zeroinitializer
+@G12 = global {} zeroinitializer
+@G13 = global { [0 x {}] } zeroinitializer
+
+; DARWIN: .globl _G11
+; DARWIN: .zerofill __DATA,__common,_G11,1,2
+; DARWIN: .globl _G12
+; DARWIN: .zerofill __DATA,__common,_G12,1,3
+; DARWIN: .globl _G13
+; DARWIN: .zerofill __DATA,__common,_G13,1,3
diff --git a/test/CodeGen/X86/h-registers-0.ll b/test/CodeGen/X86/h-registers-0.ll
index 878fd93b737c..e84bb9a34a26 100644
--- a/test/CodeGen/X86/h-registers-0.ll
+++ b/test/CodeGen/X86/h-registers-0.ll
@@ -1,12 +1,16 @@
-; RUN: llc < %s -march=x86-64 | grep {movzbl	%\[abcd\]h,} | count 4
-; RUN: llc < %s -march=x86    > %t
-; RUN: grep {incb	%ah} %t | count 3
-; RUN: grep {movzbl	%ah,} %t | count 3
+; RUN: llc < %s -march=x86-64 | FileCheck %s -check-prefix=X86-64
+; RUN: llc < %s -march=x86    | FileCheck %s -check-prefix=X86-32
 
 ; Use h registers. On x86-64, codegen doesn't support general allocation
 ; of h registers yet, due to x86 encoding complications.
 
 define void @bar64(i64 inreg %x, i8* inreg %p) nounwind {
+; X86-64: bar64:
+; X86-64: shrq $8, %rdi
+; X86-64: incb %dil
+
+; X86-32: bar64:
+; X86-32: incb %ah
   %t0 = lshr i64 %x, 8
   %t1 = trunc i64 %t0 to i8
   %t2 = add i8 %t1, 1
@@ -15,6 +19,12 @@ define void @bar64(i64 inreg %x, i8* inreg %p) nounwind {
 }
 
 define void @bar32(i32 inreg %x, i8* inreg %p) nounwind {
+; X86-64: bar32:
+; X86-64: shrl $8, %edi
+; X86-64: incb %dil
+
+; X86-32: bar32:
+; X86-32: incb %ah
   %t0 = lshr i32 %x, 8
   %t1 = trunc i32 %t0 to i8
   %t2 = add i8 %t1, 1
@@ -23,6 +33,12 @@ define void @bar32(i32 inreg %x, i8* inreg %p) nounwind {
 }
 
 define void @bar16(i16 inreg %x, i8* inreg %p) nounwind {
+; X86-64: bar16:
+; X86-64: shrl $8, %edi
+; X86-64: incb %dil
+
+; X86-32: bar16:
+; X86-32: incb %ah
   %t0 = lshr i16 %x, 8
   %t1 = trunc i16 %t0 to i8
   %t2 = add i8 %t1, 1
@@ -31,18 +47,36 @@ define void @bar16(i16 inreg %x, i8* inreg %p) nounwind {
 }
 
 define i64 @qux64(i64 inreg %x) nounwind {
+; X86-64: qux64:
+; X86-64: movq %rdi, %rax
+; X86-64: movzbl %ah, %eax
+
+; X86-32: qux64:
+; X86-32: movzbl %ah, %eax
   %t0 = lshr i64 %x, 8
   %t1 = and i64 %t0, 255
   ret i64 %t1
 }
 
 define i32 @qux32(i32 inreg %x) nounwind {
+; X86-64: qux32:
+; X86-64: movl %edi, %eax
+; X86-64: movzbl %ah, %eax
+
+; X86-32: qux32:
+; X86-32: movzbl %ah, %eax
   %t0 = lshr i32 %x, 8
   %t1 = and i32 %t0, 255
   ret i32 %t1
 }
 
 define i16 @qux16(i16 inreg %x) nounwind {
+; X86-64: qux16:
+; X86-64: movl %edi, %eax
+; X86-64: movzbl %ah, %eax
+
+; X86-32: qux16:
+; X86-32: movzbl %ah, %eax
   %t0 = lshr i16 %x, 8
   ret i16 %t0
 }
diff --git a/test/CodeGen/X86/ins_subreg_coalesce-1.ll b/test/CodeGen/X86/ins_subreg_coalesce-1.ll
index 2243f93f3ddd..83674361a773 100644
--- a/test/CodeGen/X86/ins_subreg_coalesce-1.ll
+++ b/test/CodeGen/X86/ins_subreg_coalesce-1.ll
@@ -1,7 +1,13 @@
-; RUN: llc < %s -march=x86 | grep mov | count 3
+; RUN: llc < %s -march=x86 | FileCheck %s
 
-define fastcc i32 @sqlite3ExprResolveNames() nounwind  {
+define fastcc i32 @t() nounwind  {
 entry:
+; CHECK: t:
+; CHECK: movzwl 0, %eax
+; CHECK: orl $2, %eax
+; CHECK: movw %ax, 0
+; CHECK: shrl $3, %eax
+; CHECK: andl $1, %eax
 	br i1 false, label %UnifiedReturnBlock, label %bb4
 bb4:		; preds = %entry
 	br i1 false, label %bb17, label %bb22
diff --git a/test/CodeGen/X86/loop-blocks.ll b/test/CodeGen/X86/loop-blocks.ll
index a125e54050bb..354d08206972 100644
--- a/test/CodeGen/X86/loop-blocks.ll
+++ b/test/CodeGen/X86/loop-blocks.ll
@@ -7,11 +7,11 @@
 ; order to avoid a branch within the loop.
 
 ; CHECK: simple:
-;      CHECK:   jmp   .LBB1_1
+;      CHECK:   jmp   .LBB0_1
 ; CHECK-NEXT:   align
-; CHECK-NEXT: .LBB1_2:
+; CHECK-NEXT: .LBB0_2:
 ; CHECK-NEXT:   callq loop_latch
-; CHECK-NEXT: .LBB1_1:
+; CHECK-NEXT: .LBB0_1:
 ; CHECK-NEXT:   callq loop_header
 
 define void @simple() nounwind {
@@ -37,11 +37,11 @@ done:
 ; falls through into the loop, avoiding a branch within the loop.
 
 ; CHECK: slightly_more_involved:
-;      CHECK:   jmp .LBB2_1
+;      CHECK:   jmp .LBB1_1
 ; CHECK-NEXT:   align
-; CHECK-NEXT: .LBB2_4:
+; CHECK-NEXT: .LBB1_4:
 ; CHECK-NEXT:   callq bar99
-; CHECK-NEXT: .LBB2_1:
+; CHECK-NEXT: .LBB1_1:
 ; CHECK-NEXT:   callq body
 
 define void @slightly_more_involved() nounwind {
@@ -72,20 +72,20 @@ exit:
 ; fallthrough edges which should be preserved.
 
 ; CHECK: yet_more_involved:
-;      CHECK:   jmp .LBB3_1
+;      CHECK:   jmp .LBB2_1
 ; CHECK-NEXT:   align
-; CHECK-NEXT: .LBB3_4:
+; CHECK-NEXT: .LBB2_4:
 ; CHECK-NEXT:   callq bar99
 ; CHECK-NEXT:   callq get
 ; CHECK-NEXT:   cmpl $2999, %eax
-; CHECK-NEXT:   jg .LBB3_6
+; CHECK-NEXT:   jg .LBB2_6
 ; CHECK-NEXT:   callq block_a_true_func
-; CHECK-NEXT:   jmp .LBB3_7
-; CHECK-NEXT: .LBB3_6:
+; CHECK-NEXT:   jmp .LBB2_7
+; CHECK-NEXT: .LBB2_6:
 ; CHECK-NEXT:   callq block_a_false_func
-; CHECK-NEXT: .LBB3_7:
+; CHECK-NEXT: .LBB2_7:
 ; CHECK-NEXT:   callq block_a_merge_func
-; CHECK-NEXT: .LBB3_1:
+; CHECK-NEXT: .LBB2_1:
 ; CHECK-NEXT:   callq body
 
 define void @yet_more_involved() nounwind {
@@ -131,20 +131,20 @@ exit:
 ; loop.
 
 ; CHECK: cfg_islands:
-;      CHECK:   jmp     .LBB4_1
+;      CHECK:   jmp     .LBB3_1
 ; CHECK-NEXT:   align
-; CHECK-NEXT: .LBB4_7:
+; CHECK-NEXT: .LBB3_7:
 ; CHECK-NEXT:   callq   bar100
-; CHECK-NEXT:   jmp     .LBB4_1
-; CHECK-NEXT: .LBB4_8:
+; CHECK-NEXT:   jmp     .LBB3_1
+; CHECK-NEXT: .LBB3_8:
 ; CHECK-NEXT:   callq   bar101
-; CHECK-NEXT:   jmp     .LBB4_1
-; CHECK-NEXT: .LBB4_9:
+; CHECK-NEXT:   jmp     .LBB3_1
+; CHECK-NEXT: .LBB3_9:
 ; CHECK-NEXT:   callq   bar102
-; CHECK-NEXT:   jmp     .LBB4_1
-; CHECK-NEXT: .LBB4_5:
+; CHECK-NEXT:   jmp     .LBB3_1
+; CHECK-NEXT: .LBB3_5:
 ; CHECK-NEXT:   callq   loop_latch
-; CHECK-NEXT: .LBB4_1:
+; CHECK-NEXT: .LBB3_1:
 ; CHECK-NEXT:   callq   loop_header
 
 define void @cfg_islands() nounwind {
diff --git a/test/CodeGen/X86/loop-hoist.ll b/test/CodeGen/X86/loop-hoist.ll
index b9008e5e3022..c103e29f3bf3 100644
--- a/test/CodeGen/X86/loop-hoist.ll
+++ b/test/CodeGen/X86/loop-hoist.ll
@@ -4,7 +4,7 @@
 
 ; CHECK: _foo:
 ; CHECK:    L_Arr$non_lazy_ptr
-; CHECK: LBB1_1:
+; CHECK: LBB0_1:
 
 @Arr = external global [0 x i32]		; <[0 x i32]*> [#uses=1]
 
diff --git a/test/CodeGen/X86/loop-strength-reduce8.ll b/test/CodeGen/X86/loop-strength-reduce8.ll
index 6b2247d1d619..1d042769b0ba 100644
--- a/test/CodeGen/X86/loop-strength-reduce8.ll
+++ b/test/CodeGen/X86/loop-strength-reduce8.ll
@@ -4,7 +4,7 @@
 ; CHECK: align
 ; CHECK: addl    $4, %edx
 ; CHECK: decl    %ecx
-; CHECK: jne     LBB1_2
+; CHECK: jne     LBB0_2
 
 	%struct.CUMULATIVE_ARGS = type { i32, i32, i32, i32, i32, i32, i32 }
 	%struct.bitmap_element = type { %struct.bitmap_element*, %struct.bitmap_element*, i32, [2 x i64] }
diff --git a/test/CodeGen/X86/lsr-delayed-fold.ll b/test/CodeGen/X86/lsr-delayed-fold.ll
new file mode 100644
index 000000000000..17d6a4c0915f
--- /dev/null
+++ b/test/CodeGen/X86/lsr-delayed-fold.ll
@@ -0,0 +1,51 @@
+; RUN: llc -march=x86-64 < %s > /dev/null
+
+; ScalarEvolution misses an opportunity to fold ((trunc x) + (trunc -x) + y),
+; but LSR should tolerate this.
+; rdar://7886751
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
+target triple = "x86_64-apple-darwin11.0"
+
+define fastcc void @formatValue(i64 %arg5) nounwind {
+bb12:                                             ; preds = %bb11
+  %t = trunc i64 %arg5 to i32                   ; <i32> [#uses=1]
+  %t13 = sub i64 0, %arg5                       ; <i64> [#uses=1]
+  %t14 = and i64 %t13, 4294967295             ; <i64> [#uses=1]
+  br label %bb15
+
+bb15:                                             ; preds = %bb21, %bb12
+  %t16 = phi i64 [ 0, %bb12 ], [ %t23, %bb15 ] ; <i64> [#uses=2]
+  %t17 = mul i64 %t14, %t16                 ; <i64> [#uses=1]
+  %t18 = add i64 undef, %t17                  ; <i64> [#uses=1]
+  %t19 = trunc i64 %t18 to i32                ; <i32> [#uses=1]
+  %t22 = icmp eq i32 %t19, %t               ; <i1> [#uses=1]
+  %t23 = add i64 %t16, 1                      ; <i64> [#uses=1]
+  br i1 %t22, label %bb24, label %bb15
+
+bb24:                                             ; preds = %bb21, %bb11
+  unreachable
+}
+
+; ScalarEvolution should be able to correctly expand the crazy addrec here.
+; PR6914
+
+define void @int323() nounwind {
+entry:
+  br label %for.cond
+
+for.cond:                                         ; preds = %lbl_264, %for.inc, %entry
+  %g_263.tmp.1 = phi i8 [ undef, %entry ], [ %g_263.tmp.1, %for.cond ]
+  %p_95.addr.0 = phi i8 [ 0, %entry ], [ %add, %for.cond ]
+  %add = add i8 %p_95.addr.0, 1                   ; <i8> [#uses=1]
+  br i1 undef, label %for.cond, label %lbl_264
+
+lbl_264:                                          ; preds = %if.end, %lbl_264.preheader
+  %g_263.tmp.0 = phi i8 [ %g_263.tmp.1, %for.cond ] ; <i8> [#uses=1]
+  %tmp7 = load i16* undef                         ; <i16> [#uses=1]
+  %conv8 = trunc i16 %tmp7 to i8                  ; <i8> [#uses=1]
+  %mul.i = mul i8 %p_95.addr.0, %p_95.addr.0      ; <i8> [#uses=1]
+  %mul.i18 = mul i8 %mul.i, %conv8                ; <i8> [#uses=1]
+  %tobool12 = icmp eq i8 %mul.i18, 0              ; <i1> [#uses=1]
+  unreachable
+}
diff --git a/test/CodeGen/X86/lsr-reuse.ll b/test/CodeGen/X86/lsr-reuse.ll
index ab71555950b2..b80ee0897d89 100644
--- a/test/CodeGen/X86/lsr-reuse.ll
+++ b/test/CodeGen/X86/lsr-reuse.ll
@@ -260,7 +260,7 @@ return:
 ; CHECK: count_me_2:
 ; CHECK: movl    $10, %eax
 ; CHECK: align
-; CHECK: BB7_1:
+; CHECK: BB6_1:
 ; CHECK: movsd   -40(%rdi,%rax,8), %xmm0
 ; CHECK: addsd   -40(%rsi,%rax,8), %xmm0
 ; CHECK: movsd   %xmm0, -40(%rdx,%rax,8)
@@ -305,7 +305,7 @@ return:
 
 ; CHECK: full_me_1:
 ; CHECK: align
-; CHECK: BB8_1:
+; CHECK: BB7_1:
 ; CHECK: movsd   (%rdi), %xmm0
 ; CHECK: addsd   (%rsi), %xmm0
 ; CHECK: movsd   %xmm0, (%rdx)
@@ -389,7 +389,7 @@ return:
 ; rdar://7657764
 
 ; CHECK: asd:
-; CHECK: BB10_5:
+; CHECK: BB9_5:
 ; CHECK-NEXT: addl  (%r{{[^,]*}},%rdi,4), %e
 ; CHECK-NEXT: incq  %rdi
 ; CHECK-NEXT: cmpq  %rdi, %r{{[^,]*}}
diff --git a/test/CodeGen/X86/memcpy-2.ll b/test/CodeGen/X86/memcpy-2.ll
index 4fe32d974d05..17cd8e868a25 100644
--- a/test/CodeGen/X86/memcpy-2.ll
+++ b/test/CodeGen/X86/memcpy-2.ll
@@ -3,20 +3,20 @@
 ; RUN: llc < %s -mattr=-sse       -mtriple=i686-apple-darwin -mcpu=core2 | FileCheck %s -check-prefix=NOSSE
 ; RUN: llc < %s                 -mtriple=x86_64-apple-darwin -mcpu=core2 | FileCheck %s -check-prefix=X86-64
 
-	%struct.ParmT = type { [25 x i8], i8, i8* }
-@.str12 = internal constant [25 x i8] c"image\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00"		; <[25 x i8]*> [#uses=1]
+@.str = internal constant [25 x i8] c"image\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00"
+@.str2 = internal constant [30 x i8] c"xxxxxxxxxxxxxxxxxxxxxxxxxxxxx\00", align 4
 
 define void @t1(i32 %argc, i8** %argv) nounwind  {
 entry:
 ; SSE2: t1:
-; SSE2: movaps _.str12, %xmm0
+; SSE2: movaps _.str, %xmm0
 ; SSE2: movaps %xmm0
 ; SSE2: movb $0
 ; SSE2: movl $0
 ; SSE2: movl $0
 
 ; SSE1: t1:
-; SSE1: movaps _.str12, %xmm0
+; SSE1: movaps _.str, %xmm0
 ; SSE1: movaps %xmm0
 ; SSE1: movb $0
 ; SSE1: movl $0
@@ -32,14 +32,14 @@ entry:
 ; NOSSE: movl $1734438249
 
 ; X86-64: t1:
-; X86-64: movaps _.str12(%rip), %xmm0
+; X86-64: movaps _.str(%rip), %xmm0
 ; X86-64: movaps %xmm0
 ; X86-64: movb $0
 ; X86-64: movq $0
-	%parms.i = alloca [13 x %struct.ParmT]		; <[13 x %struct.ParmT]*> [#uses=1]
-	%parms1.i = getelementptr [13 x %struct.ParmT]* %parms.i, i32 0, i32 0, i32 0, i32 0		; <i8*> [#uses=1]
-	call void @llvm.memcpy.i32( i8* %parms1.i, i8* getelementptr ([25 x i8]* @.str12, i32 0, i32 0), i32 25, i32 1 ) nounwind 
-	unreachable
+  %tmp1 = alloca [25 x i8]
+  %tmp2 = bitcast [25 x i8]* %tmp1 to i8*
+  call void @llvm.memcpy.i32( i8* %tmp2, i8* getelementptr ([25 x i8]* @.str, i32 0, i32 0), i32 25, i32 1 ) nounwind 
+  unreachable
 }
 
 ;rdar://7774704
@@ -119,4 +119,49 @@ entry:
   ret void
 }
 
+define void @t4() nounwind {
+entry:
+; SSE2: t4:
+; SSE2: movw $120
+; SSE2: movl $2021161080
+; SSE2: movl $2021161080
+; SSE2: movl $2021161080
+; SSE2: movl $2021161080
+; SSE2: movl $2021161080
+; SSE2: movl $2021161080
+; SSE2: movl $2021161080
+
+; SSE1: t4:
+; SSE1: movw $120
+; SSE1: movl $2021161080
+; SSE1: movl $2021161080
+; SSE1: movl $2021161080
+; SSE1: movl $2021161080
+; SSE1: movl $2021161080
+; SSE1: movl $2021161080
+; SSE1: movl $2021161080
+
+; NOSSE: t4:
+; NOSSE: movw $120
+; NOSSE: movl $2021161080
+; NOSSE: movl $2021161080
+; NOSSE: movl $2021161080
+; NOSSE: movl $2021161080
+; NOSSE: movl $2021161080
+; NOSSE: movl $2021161080
+; NOSSE: movl $2021161080
+
+; X86-64: t4:
+; X86-64: movabsq $8680820740569200760, %rax
+; X86-64: movq %rax
+; X86-64: movq %rax
+; X86-64: movq %rax
+; X86-64: movw $120
+; X86-64: movl $2021161080
+  %tmp1 = alloca [30 x i8]
+  %tmp2 = bitcast [30 x i8]* %tmp1 to i8*
+  call void @llvm.memcpy.i32(i8* %tmp2, i8* getelementptr inbounds ([30 x i8]* @.str2, i32 0, i32 0), i32 30, i32 1)
+  unreachable
+}
+
 declare void @llvm.memcpy.i32(i8* nocapture, i8* nocapture, i32, i32) nounwind
diff --git a/test/CodeGen/X86/multiple-loop-post-inc.ll b/test/CodeGen/X86/multiple-loop-post-inc.ll
new file mode 100644
index 000000000000..51a06112aada
--- /dev/null
+++ b/test/CodeGen/X86/multiple-loop-post-inc.ll
@@ -0,0 +1,304 @@
+; RUN: llc -asm-verbose=false -disable-branch-fold -disable-code-place -disable-tail-duplicate -march=x86-64 < %s | FileCheck %s
+; rdar://7236213
+
+; CodeGen shouldn't require any lea instructions inside the marked loop.
+; It should properly set up post-increment uses and do coalescing for
+; the induction variables.
+
+; CHECK: # Start
+; CHECK-NOT: lea
+; CHECK: # Stop
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
+
+define void @foo(float* %I, i64 %IS, float* nocapture %Start, float* nocapture %Step, float* %O, i64 %OS, i64 %N) nounwind {
+entry:
+  %times4 = alloca float, align 4                 ; <float*> [#uses=3]
+  %timesN = alloca float, align 4                 ; <float*> [#uses=2]
+  %0 = load float* %Step, align 4                 ; <float> [#uses=8]
+  %1 = ptrtoint float* %I to i64                  ; <i64> [#uses=1]
+  %2 = ptrtoint float* %O to i64                  ; <i64> [#uses=1]
+  %tmp = xor i64 %2, %1                           ; <i64> [#uses=1]
+  %tmp16 = and i64 %tmp, 15                       ; <i64> [#uses=1]
+  %3 = icmp eq i64 %tmp16, 0                      ; <i1> [#uses=1]
+  %4 = trunc i64 %IS to i32                       ; <i32> [#uses=1]
+  %5 = xor i32 %4, 1                              ; <i32> [#uses=1]
+  %6 = trunc i64 %OS to i32                       ; <i32> [#uses=1]
+  %7 = xor i32 %6, 1                              ; <i32> [#uses=1]
+  %8 = or i32 %7, %5                              ; <i32> [#uses=1]
+  %9 = icmp eq i32 %8, 0                          ; <i1> [#uses=1]
+  br i1 %9, label %bb, label %return
+
+bb:                                               ; preds = %entry
+  %10 = load float* %Start, align 4               ; <float> [#uses=1]
+  br label %bb2
+
+bb1:                                              ; preds = %bb3
+  %11 = load float* %I_addr.0, align 4            ; <float> [#uses=1]
+  %12 = fmul float %11, %x.0                      ; <float> [#uses=1]
+  store float %12, float* %O_addr.0, align 4
+  %13 = fadd float %x.0, %0                       ; <float> [#uses=1]
+  %indvar.next53 = add i64 %14, 1                 ; <i64> [#uses=1]
+  br label %bb2
+
+bb2:                                              ; preds = %bb1, %bb
+  %14 = phi i64 [ %indvar.next53, %bb1 ], [ 0, %bb ] ; <i64> [#uses=21]
+  %x.0 = phi float [ %13, %bb1 ], [ %10, %bb ]    ; <float> [#uses=6]
+  %N_addr.0 = sub i64 %N, %14                     ; <i64> [#uses=4]
+  %O_addr.0 = getelementptr float* %O, i64 %14    ; <float*> [#uses=4]
+  %I_addr.0 = getelementptr float* %I, i64 %14    ; <float*> [#uses=3]
+  %15 = icmp slt i64 %N_addr.0, 1                 ; <i1> [#uses=1]
+  br i1 %15, label %bb4, label %bb3
+
+bb3:                                              ; preds = %bb2
+  %16 = ptrtoint float* %O_addr.0 to i64          ; <i64> [#uses=1]
+  %17 = and i64 %16, 15                           ; <i64> [#uses=1]
+  %18 = icmp eq i64 %17, 0                        ; <i1> [#uses=1]
+  br i1 %18, label %bb4, label %bb1
+
+bb4:                                              ; preds = %bb3, %bb2
+  %19 = fmul float %0, 4.000000e+00               ; <float> [#uses=1]
+  store float %19, float* %times4, align 4
+  %20 = fmul float %0, 1.600000e+01               ; <float> [#uses=1]
+  store float %20, float* %timesN, align 4
+  %21 = fmul float %0, 0.000000e+00               ; <float> [#uses=1]
+  %22 = fadd float %21, %x.0                      ; <float> [#uses=1]
+  %23 = fadd float %x.0, %0                       ; <float> [#uses=1]
+  %24 = fmul float %0, 2.000000e+00               ; <float> [#uses=1]
+  %25 = fadd float %24, %x.0                      ; <float> [#uses=1]
+  %26 = fmul float %0, 3.000000e+00               ; <float> [#uses=1]
+  %27 = fadd float %26, %x.0                      ; <float> [#uses=1]
+  %28 = insertelement <4 x float> undef, float %22, i32 0 ; <<4 x float>> [#uses=1]
+  %29 = insertelement <4 x float> %28, float %23, i32 1 ; <<4 x float>> [#uses=1]
+  %30 = insertelement <4 x float> %29, float %25, i32 2 ; <<4 x float>> [#uses=1]
+  %31 = insertelement <4 x float> %30, float %27, i32 3 ; <<4 x float>> [#uses=5]
+  %asmtmp.i = call <4 x float> asm "movss $1, $0\09\0Apshufd $$0, $0, $0", "=x,*m,~{dirflag},~{fpsr},~{flags}"(float* %times4) nounwind ; <<4 x float>> [#uses=3]
+  %32 = fadd <4 x float> %31, %asmtmp.i           ; <<4 x float>> [#uses=3]
+  %33 = fadd <4 x float> %32, %asmtmp.i           ; <<4 x float>> [#uses=3]
+  %34 = fadd <4 x float> %33, %asmtmp.i           ; <<4 x float>> [#uses=2]
+  %asmtmp.i18 = call <4 x float> asm "movss $1, $0\09\0Apshufd $$0, $0, $0", "=x,*m,~{dirflag},~{fpsr},~{flags}"(float* %timesN) nounwind ; <<4 x float>> [#uses=8]
+  %35 = icmp sgt i64 %N_addr.0, 15                ; <i1> [#uses=2]
+  br i1 %3, label %bb6.preheader, label %bb8
+
+bb6.preheader:                                    ; preds = %bb4
+  br i1 %35, label %bb.nph43, label %bb7
+
+bb.nph43:                                         ; preds = %bb6.preheader
+  %tmp108 = add i64 %14, 16                       ; <i64> [#uses=1]
+  %tmp111 = add i64 %14, 4                        ; <i64> [#uses=1]
+  %tmp115 = add i64 %14, 8                        ; <i64> [#uses=1]
+  %tmp119 = add i64 %14, 12                       ; <i64> [#uses=1]
+  %tmp134 = add i64 %N, -16                       ; <i64> [#uses=1]
+  %tmp135 = sub i64 %tmp134, %14                  ; <i64> [#uses=1]
+  call void asm sideeffect "# Start.", "~{dirflag},~{fpsr},~{flags}"() nounwind
+  br label %bb5
+
+bb5:                                              ; preds = %bb.nph43, %bb5
+  %indvar102 = phi i64 [ 0, %bb.nph43 ], [ %indvar.next103, %bb5 ] ; <i64> [#uses=3]
+  %vX3.041 = phi <4 x float> [ %34, %bb.nph43 ], [ %45, %bb5 ] ; <<4 x float>> [#uses=2]
+  %vX0.039 = phi <4 x float> [ %31, %bb.nph43 ], [ %41, %bb5 ] ; <<4 x float>> [#uses=2]
+  %vX2.037 = phi <4 x float> [ %33, %bb.nph43 ], [ %46, %bb5 ] ; <<4 x float>> [#uses=2]
+  %vX1.036 = phi <4 x float> [ %32, %bb.nph43 ], [ %47, %bb5 ] ; <<4 x float>> [#uses=2]
+  %tmp104 = shl i64 %indvar102, 4                 ; <i64> [#uses=5]
+  %tmp105 = add i64 %14, %tmp104                  ; <i64> [#uses=2]
+  %scevgep106 = getelementptr float* %I, i64 %tmp105 ; <float*> [#uses=1]
+  %scevgep106107 = bitcast float* %scevgep106 to <4 x float>* ; <<4 x float>*> [#uses=1]
+  %tmp109 = add i64 %tmp108, %tmp104              ; <i64> [#uses=2]
+  %tmp112 = add i64 %tmp111, %tmp104              ; <i64> [#uses=2]
+  %scevgep113 = getelementptr float* %I, i64 %tmp112 ; <float*> [#uses=1]
+  %scevgep113114 = bitcast float* %scevgep113 to <4 x float>* ; <<4 x float>*> [#uses=1]
+  %tmp116 = add i64 %tmp115, %tmp104              ; <i64> [#uses=2]
+  %scevgep117 = getelementptr float* %I, i64 %tmp116 ; <float*> [#uses=1]
+  %scevgep117118 = bitcast float* %scevgep117 to <4 x float>* ; <<4 x float>*> [#uses=1]
+  %tmp120 = add i64 %tmp119, %tmp104              ; <i64> [#uses=2]
+  %scevgep121 = getelementptr float* %I, i64 %tmp120 ; <float*> [#uses=1]
+  %scevgep121122 = bitcast float* %scevgep121 to <4 x float>* ; <<4 x float>*> [#uses=1]
+  %scevgep123 = getelementptr float* %O, i64 %tmp105 ; <float*> [#uses=1]
+  %scevgep123124 = bitcast float* %scevgep123 to <4 x float>* ; <<4 x float>*> [#uses=1]
+  %scevgep126 = getelementptr float* %O, i64 %tmp112 ; <float*> [#uses=1]
+  %scevgep126127 = bitcast float* %scevgep126 to <4 x float>* ; <<4 x float>*> [#uses=1]
+  %scevgep128 = getelementptr float* %O, i64 %tmp116 ; <float*> [#uses=1]
+  %scevgep128129 = bitcast float* %scevgep128 to <4 x float>* ; <<4 x float>*> [#uses=1]
+  %scevgep130 = getelementptr float* %O, i64 %tmp120 ; <float*> [#uses=1]
+  %scevgep130131 = bitcast float* %scevgep130 to <4 x float>* ; <<4 x float>*> [#uses=1]
+  %tmp132 = mul i64 %indvar102, -16               ; <i64> [#uses=1]
+  %tmp136 = add i64 %tmp135, %tmp132              ; <i64> [#uses=2]
+  %36 = load <4 x float>* %scevgep106107, align 16 ; <<4 x float>> [#uses=1]
+  %37 = load <4 x float>* %scevgep113114, align 16 ; <<4 x float>> [#uses=1]
+  %38 = load <4 x float>* %scevgep117118, align 16 ; <<4 x float>> [#uses=1]
+  %39 = load <4 x float>* %scevgep121122, align 16 ; <<4 x float>> [#uses=1]
+  %40 = fmul <4 x float> %36, %vX0.039            ; <<4 x float>> [#uses=1]
+  %41 = fadd <4 x float> %vX0.039, %asmtmp.i18    ; <<4 x float>> [#uses=2]
+  %42 = fmul <4 x float> %37, %vX1.036            ; <<4 x float>> [#uses=1]
+  %43 = fmul <4 x float> %38, %vX2.037            ; <<4 x float>> [#uses=1]
+  %44 = fmul <4 x float> %39, %vX3.041            ; <<4 x float>> [#uses=1]
+  store <4 x float> %40, <4 x float>* %scevgep123124, align 16
+  store <4 x float> %42, <4 x float>* %scevgep126127, align 16
+  store <4 x float> %43, <4 x float>* %scevgep128129, align 16
+  store <4 x float> %44, <4 x float>* %scevgep130131, align 16
+  %45 = fadd <4 x float> %vX3.041, %asmtmp.i18    ; <<4 x float>> [#uses=1]
+  %46 = fadd <4 x float> %vX2.037, %asmtmp.i18    ; <<4 x float>> [#uses=1]
+  %47 = fadd <4 x float> %vX1.036, %asmtmp.i18    ; <<4 x float>> [#uses=1]
+  %48 = icmp sgt i64 %tmp136, 15                  ; <i1> [#uses=1]
+  %indvar.next103 = add i64 %indvar102, 1         ; <i64> [#uses=1]
+  br i1 %48, label %bb5, label %bb6.bb7_crit_edge
+
+bb6.bb7_crit_edge:                                ; preds = %bb5
+  call void asm sideeffect "# Stop.", "~{dirflag},~{fpsr},~{flags}"() nounwind
+  %scevgep110 = getelementptr float* %I, i64 %tmp109 ; <float*> [#uses=1]
+  %scevgep125 = getelementptr float* %O, i64 %tmp109 ; <float*> [#uses=1]
+  br label %bb7
+
+bb7:                                              ; preds = %bb6.bb7_crit_edge, %bb6.preheader
+  %I_addr.1.lcssa = phi float* [ %scevgep110, %bb6.bb7_crit_edge ], [ %I_addr.0, %bb6.preheader ] ; <float*> [#uses=1]
+  %O_addr.1.lcssa = phi float* [ %scevgep125, %bb6.bb7_crit_edge ], [ %O_addr.0, %bb6.preheader ] ; <float*> [#uses=1]
+  %vX0.0.lcssa = phi <4 x float> [ %41, %bb6.bb7_crit_edge ], [ %31, %bb6.preheader ] ; <<4 x float>> [#uses=1]
+  %N_addr.1.lcssa = phi i64 [ %tmp136, %bb6.bb7_crit_edge ], [ %N_addr.0, %bb6.preheader ] ; <i64> [#uses=1]
+  %asmtmp.i17 = call <4 x float> asm "movss $1, $0\09\0Apshufd $$0, $0, $0", "=x,*m,~{dirflag},~{fpsr},~{flags}"(float* %times4) nounwind ; <<4 x float>> [#uses=0]
+  br label %bb11
+
+bb8:                                              ; preds = %bb4
+  br i1 %35, label %bb.nph, label %bb11
+
+bb.nph:                                           ; preds = %bb8
+  %I_addr.0.sum = add i64 %14, -1                 ; <i64> [#uses=1]
+  %49 = getelementptr inbounds float* %I, i64 %I_addr.0.sum ; <float*> [#uses=1]
+  %50 = bitcast float* %49 to <4 x float>*        ; <<4 x float>*> [#uses=1]
+  %51 = load <4 x float>* %50, align 16           ; <<4 x float>> [#uses=1]
+  %tmp54 = add i64 %14, 16                        ; <i64> [#uses=1]
+  %tmp56 = add i64 %14, 3                         ; <i64> [#uses=1]
+  %tmp60 = add i64 %14, 7                         ; <i64> [#uses=1]
+  %tmp64 = add i64 %14, 11                        ; <i64> [#uses=1]
+  %tmp68 = add i64 %14, 15                        ; <i64> [#uses=1]
+  %tmp76 = add i64 %14, 4                         ; <i64> [#uses=1]
+  %tmp80 = add i64 %14, 8                         ; <i64> [#uses=1]
+  %tmp84 = add i64 %14, 12                        ; <i64> [#uses=1]
+  %tmp90 = add i64 %N, -16                        ; <i64> [#uses=1]
+  %tmp91 = sub i64 %tmp90, %14                    ; <i64> [#uses=1]
+  br label %bb9
+
+bb9:                                              ; preds = %bb.nph, %bb9
+  %indvar = phi i64 [ 0, %bb.nph ], [ %indvar.next, %bb9 ] ; <i64> [#uses=3]
+  %vX3.125 = phi <4 x float> [ %34, %bb.nph ], [ %69, %bb9 ] ; <<4 x float>> [#uses=2]
+  %vX0.223 = phi <4 x float> [ %31, %bb.nph ], [ %65, %bb9 ] ; <<4 x float>> [#uses=2]
+  %vX2.121 = phi <4 x float> [ %33, %bb.nph ], [ %70, %bb9 ] ; <<4 x float>> [#uses=2]
+  %vX1.120 = phi <4 x float> [ %32, %bb.nph ], [ %71, %bb9 ] ; <<4 x float>> [#uses=2]
+  %vI0.019 = phi <4 x float> [ %51, %bb.nph ], [ %55, %bb9 ] ; <<4 x float>> [#uses=1]
+  %tmp51 = shl i64 %indvar, 4                     ; <i64> [#uses=9]
+  %tmp55 = add i64 %tmp54, %tmp51                 ; <i64> [#uses=2]
+  %tmp57 = add i64 %tmp56, %tmp51                 ; <i64> [#uses=1]
+  %scevgep58 = getelementptr float* %I, i64 %tmp57 ; <float*> [#uses=1]
+  %scevgep5859 = bitcast float* %scevgep58 to <4 x float>* ; <<4 x float>*> [#uses=1]
+  %tmp61 = add i64 %tmp60, %tmp51                 ; <i64> [#uses=1]
+  %scevgep62 = getelementptr float* %I, i64 %tmp61 ; <float*> [#uses=1]
+  %scevgep6263 = bitcast float* %scevgep62 to <4 x float>* ; <<4 x float>*> [#uses=1]
+  %tmp65 = add i64 %tmp64, %tmp51                 ; <i64> [#uses=1]
+  %scevgep66 = getelementptr float* %I, i64 %tmp65 ; <float*> [#uses=1]
+  %scevgep6667 = bitcast float* %scevgep66 to <4 x float>* ; <<4 x float>*> [#uses=1]
+  %tmp69 = add i64 %tmp68, %tmp51                 ; <i64> [#uses=1]
+  %scevgep70 = getelementptr float* %I, i64 %tmp69 ; <float*> [#uses=1]
+  %scevgep7071 = bitcast float* %scevgep70 to <4 x float>* ; <<4 x float>*> [#uses=1]
+  %tmp72 = add i64 %14, %tmp51                    ; <i64> [#uses=1]
+  %scevgep73 = getelementptr float* %O, i64 %tmp72 ; <float*> [#uses=1]
+  %scevgep7374 = bitcast float* %scevgep73 to <4 x float>* ; <<4 x float>*> [#uses=1]
+  %tmp77 = add i64 %tmp76, %tmp51                 ; <i64> [#uses=1]
+  %scevgep78 = getelementptr float* %O, i64 %tmp77 ; <float*> [#uses=1]
+  %scevgep7879 = bitcast float* %scevgep78 to <4 x float>* ; <<4 x float>*> [#uses=1]
+  %tmp81 = add i64 %tmp80, %tmp51                 ; <i64> [#uses=1]
+  %scevgep82 = getelementptr float* %O, i64 %tmp81 ; <float*> [#uses=1]
+  %scevgep8283 = bitcast float* %scevgep82 to <4 x float>* ; <<4 x float>*> [#uses=1]
+  %tmp85 = add i64 %tmp84, %tmp51                 ; <i64> [#uses=1]
+  %scevgep86 = getelementptr float* %O, i64 %tmp85 ; <float*> [#uses=1]
+  %scevgep8687 = bitcast float* %scevgep86 to <4 x float>* ; <<4 x float>*> [#uses=1]
+  %tmp88 = mul i64 %indvar, -16                   ; <i64> [#uses=1]
+  %tmp92 = add i64 %tmp91, %tmp88                 ; <i64> [#uses=2]
+  %52 = load <4 x float>* %scevgep5859, align 16  ; <<4 x float>> [#uses=2]
+  %53 = load <4 x float>* %scevgep6263, align 16  ; <<4 x float>> [#uses=2]
+  %54 = load <4 x float>* %scevgep6667, align 16  ; <<4 x float>> [#uses=2]
+  %55 = load <4 x float>* %scevgep7071, align 16  ; <<4 x float>> [#uses=2]
+  %56 = shufflevector <4 x float> %vI0.019, <4 x float> %52, <4 x i32> <i32 4, i32 1, i32 2, i32 3> ; <<4 x float>> [#uses=1]
+  %57 = shufflevector <4 x float> %56, <4 x float> undef, <4 x i32> <i32 1, i32 2, i32 3, i32 0> ; <<4 x float>> [#uses=1]
+  %58 = shufflevector <4 x float> %52, <4 x float> %53, <4 x i32> <i32 4, i32 1, i32 2, i32 3> ; <<4 x float>> [#uses=1]
+  %59 = shufflevector <4 x float> %58, <4 x float> undef, <4 x i32> <i32 1, i32 2, i32 3, i32 0> ; <<4 x float>> [#uses=1]
+  %60 = shufflevector <4 x float> %53, <4 x float> %54, <4 x i32> <i32 4, i32 1, i32 2, i32 3> ; <<4 x float>> [#uses=1]
+  %61 = shufflevector <4 x float> %60, <4 x float> undef, <4 x i32> <i32 1, i32 2, i32 3, i32 0> ; <<4 x float>> [#uses=1]
+  %62 = shufflevector <4 x float> %54, <4 x float> %55, <4 x i32> <i32 4, i32 1, i32 2, i32 3> ; <<4 x float>> [#uses=1]
+  %63 = shufflevector <4 x float> %62, <4 x float> undef, <4 x i32> <i32 1, i32 2, i32 3, i32 0> ; <<4 x float>> [#uses=1]
+  %64 = fmul <4 x float> %57, %vX0.223            ; <<4 x float>> [#uses=1]
+  %65 = fadd <4 x float> %vX0.223, %asmtmp.i18    ; <<4 x float>> [#uses=2]
+  %66 = fmul <4 x float> %59, %vX1.120            ; <<4 x float>> [#uses=1]
+  %67 = fmul <4 x float> %61, %vX2.121            ; <<4 x float>> [#uses=1]
+  %68 = fmul <4 x float> %63, %vX3.125            ; <<4 x float>> [#uses=1]
+  store <4 x float> %64, <4 x float>* %scevgep7374, align 16
+  store <4 x float> %66, <4 x float>* %scevgep7879, align 16
+  store <4 x float> %67, <4 x float>* %scevgep8283, align 16
+  store <4 x float> %68, <4 x float>* %scevgep8687, align 16
+  %69 = fadd <4 x float> %vX3.125, %asmtmp.i18    ; <<4 x float>> [#uses=1]
+  %70 = fadd <4 x float> %vX2.121, %asmtmp.i18    ; <<4 x float>> [#uses=1]
+  %71 = fadd <4 x float> %vX1.120, %asmtmp.i18    ; <<4 x float>> [#uses=1]
+  %72 = icmp sgt i64 %tmp92, 15                   ; <i1> [#uses=1]
+  %indvar.next = add i64 %indvar, 1               ; <i64> [#uses=1]
+  br i1 %72, label %bb9, label %bb10.bb11.loopexit_crit_edge
+
+bb10.bb11.loopexit_crit_edge:                     ; preds = %bb9
+  %scevgep = getelementptr float* %I, i64 %tmp55  ; <float*> [#uses=1]
+  %scevgep75 = getelementptr float* %O, i64 %tmp55 ; <float*> [#uses=1]
+  br label %bb11
+
+bb11:                                             ; preds = %bb8, %bb10.bb11.loopexit_crit_edge, %bb7
+  %N_addr.2 = phi i64 [ %N_addr.1.lcssa, %bb7 ], [ %tmp92, %bb10.bb11.loopexit_crit_edge ], [ %N_addr.0, %bb8 ] ; <i64> [#uses=2]
+  %vX0.1 = phi <4 x float> [ %vX0.0.lcssa, %bb7 ], [ %65, %bb10.bb11.loopexit_crit_edge ], [ %31, %bb8 ] ; <<4 x float>> [#uses=1]
+  %O_addr.2 = phi float* [ %O_addr.1.lcssa, %bb7 ], [ %scevgep75, %bb10.bb11.loopexit_crit_edge ], [ %O_addr.0, %bb8 ] ; <float*> [#uses=1]
+  %I_addr.2 = phi float* [ %I_addr.1.lcssa, %bb7 ], [ %scevgep, %bb10.bb11.loopexit_crit_edge ], [ %I_addr.0, %bb8 ] ; <float*> [#uses=1]
+  %73 = extractelement <4 x float> %vX0.1, i32 0  ; <float> [#uses=2]
+  %74 = icmp sgt i64 %N_addr.2, 0                 ; <i1> [#uses=1]
+  br i1 %74, label %bb12, label %bb14
+
+bb12:                                             ; preds = %bb11, %bb12
+  %indvar94 = phi i64 [ %indvar.next95, %bb12 ], [ 0, %bb11 ] ; <i64> [#uses=3]
+  %x.130 = phi float [ %77, %bb12 ], [ %73, %bb11 ] ; <float> [#uses=2]
+  %I_addr.433 = getelementptr float* %I_addr.2, i64 %indvar94 ; <float*> [#uses=1]
+  %O_addr.432 = getelementptr float* %O_addr.2, i64 %indvar94 ; <float*> [#uses=1]
+  %75 = load float* %I_addr.433, align 4          ; <float> [#uses=1]
+  %76 = fmul float %75, %x.130                    ; <float> [#uses=1]
+  store float %76, float* %O_addr.432, align 4
+  %77 = fadd float %x.130, %0                     ; <float> [#uses=2]
+  %indvar.next95 = add i64 %indvar94, 1           ; <i64> [#uses=2]
+  %exitcond = icmp eq i64 %indvar.next95, %N_addr.2 ; <i1> [#uses=1]
+  br i1 %exitcond, label %bb14, label %bb12
+
+bb14:                                             ; preds = %bb12, %bb11
+  %x.1.lcssa = phi float [ %73, %bb11 ], [ %77, %bb12 ] ; <float> [#uses=1]
+  store float %x.1.lcssa, float* %Start, align 4
+  ret void
+
+return:                                           ; preds = %entry
+  ret void
+}
+
+; Codegen shouldn't crash on this testcase.
+
+define void @bar(i32 %a, i32 %b) nounwind {
+entry:                           ; preds = %bb1, %entry, %for.end204
+  br label %outer
+
+outer:                                     ; preds = %bb1, %entry
+  %i6 = phi i32 [ %storemerge171, %bb1 ], [ %a, %entry ] ; <i32> [#uses=2]
+  %storemerge171 = add i32 %i6, 1      ; <i32> [#uses=1]
+  br label %inner
+
+inner:                                       ; preds = %bb0, %if.end275
+  %i8 = phi i32 [ %a, %outer ], [ %indvar.next159, %bb0 ] ; <i32> [#uses=2]
+  %t338 = load i32* undef                     ; <i32> [#uses=1]
+  %t191 = mul i32 %i8, %t338        ; <i32> [#uses=1]
+  %t179 = add i32 %i6, %t191        ; <i32> [#uses=1]
+  br label %bb0
+
+bb0:                                     ; preds = %for.body332
+  %indvar.next159 = add i32 %i8, 1     ; <i32> [#uses=1]
+  br i1 undef, label %bb1, label %inner
+
+bb1:                                     ; preds = %bb0, %outer
+  %midx.4 = phi i32 [ %t179, %bb0 ] ; <i32> [#uses=0]
+  br label %outer
+}
diff --git a/test/CodeGen/X86/optimize-max-3.ll b/test/CodeGen/X86/optimize-max-3.ll
new file mode 100644
index 000000000000..bf8bfa28dafd
--- /dev/null
+++ b/test/CodeGen/X86/optimize-max-3.ll
@@ -0,0 +1,32 @@
+; RUN: llc < %s -march=x86-64 | FileCheck %s
+
+; LSR's OptimizeMax should eliminate the select (max).
+
+; CHECK: foo:
+; CHECK-NOT: cmov
+; CHECK: jle
+
+define void @foo(i64 %n, double* nocapture %p) nounwind {
+entry:
+  %cmp6 = icmp slt i64 %n, 0                      ; <i1> [#uses=1]
+  br i1 %cmp6, label %for.end, label %for.body.preheader
+
+for.body.preheader:                               ; preds = %entry
+  %tmp = icmp sgt i64 %n, 0                       ; <i1> [#uses=1]
+  %n.op = add i64 %n, 1                           ; <i64> [#uses=1]
+  %tmp1 = select i1 %tmp, i64 %n.op, i64 1        ; <i64> [#uses=1]
+  br label %for.body
+
+for.body:                                         ; preds = %for.body.preheader, %for.body
+  %i = phi i64 [ %i.next, %for.body ], [ 0, %for.body.preheader ] ; <i64> [#uses=2]
+  %arrayidx = getelementptr double* %p, i64 %i    ; <double*> [#uses=2]
+  %t4 = load double* %arrayidx                    ; <double> [#uses=1]
+  %mul = fmul double %t4, 2.200000e+00            ; <double> [#uses=1]
+  store double %mul, double* %arrayidx
+  %i.next = add nsw i64 %i, 1                     ; <i64> [#uses=2]
+  %exitcond = icmp eq i64 %i.next, %tmp1          ; <i1> [#uses=1]
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:                                          ; preds = %for.body, %entry
+  ret void
+}
diff --git a/test/CodeGen/X86/or-address.ll b/test/CodeGen/X86/or-address.ll
new file mode 100644
index 000000000000..6447680e623b
--- /dev/null
+++ b/test/CodeGen/X86/or-address.ll
@@ -0,0 +1,90 @@
+; PR1135
+; RUN: llc %s -o - | FileCheck %s
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
+target triple = "x86_64-apple-darwin10.3"
+
+
+; CHECK: 	movl	%{{.*}},   (%rdi,%rdx,4)
+; CHECK:	movl	%{{.*}},  8(%rdi,%rdx,4)
+; CHECK:	movl	%{{.*}},  4(%rdi,%rdx,4)
+; CHECK:	movl	%{{.*}}, 12(%rdi,%rdx,4)
+
+define void @test(i32* nocapture %array, i32 %r0) nounwind ssp noredzone {
+bb.nph:
+  br label %bb
+
+bb:                                               ; preds = %bb, %bb.nph
+  %j.010 = phi i8 [ 0, %bb.nph ], [ %14, %bb ]    ; <i8> [#uses=1]
+  %k.19 = phi i8 [ 0, %bb.nph ], [ %.k.1, %bb ]   ; <i8> [#uses=1]
+  %i0.08 = phi i8 [ 0, %bb.nph ], [ %15, %bb ]    ; <i8> [#uses=3]
+  %0 = icmp slt i8 %i0.08, 4                      ; <i1> [#uses=1]
+  %iftmp.0.0 = select i1 %0, i8 %i0.08, i8 0      ; <i8> [#uses=2]
+  %1 = icmp eq i8 %i0.08, 4                       ; <i1> [#uses=1]
+  %2 = zext i1 %1 to i8                           ; <i8> [#uses=1]
+  %.k.1 = add i8 %2, %k.19                        ; <i8> [#uses=2]
+  %3 = shl i8 %.k.1, 2                            ; <i8> [#uses=1]
+  %4 = add i8 %3, %iftmp.0.0                      ; <i8> [#uses=1]
+  %5 = shl i8 %4, 2                               ; <i8> [#uses=1]
+  %6 = zext i8 %5 to i64                          ; <i64> [#uses=4]
+  %7 = getelementptr inbounds i32* %array, i64 %6 ; <i32*> [#uses=1]
+  store i32 %r0, i32* %7, align 4
+  %8 = or i64 %6, 2                               ; <i64> [#uses=1]
+  %9 = getelementptr inbounds i32* %array, i64 %8 ; <i32*> [#uses=1]
+  store i32 %r0, i32* %9, align 4
+  %10 = or i64 %6, 1                              ; <i64> [#uses=1]
+  %11 = getelementptr inbounds i32* %array, i64 %10 ; <i32*> [#uses=1]
+  store i32 %r0, i32* %11, align 4
+  %12 = or i64 %6, 3                              ; <i64> [#uses=1]
+  %13 = getelementptr inbounds i32* %array, i64 %12 ; <i32*> [#uses=1]
+  store i32 %r0, i32* %13, align 4
+  %14 = add nsw i8 %j.010, 1                      ; <i8> [#uses=2]
+  %15 = add i8 %iftmp.0.0, 1                      ; <i8> [#uses=1]
+  %exitcond = icmp eq i8 %14, 32                  ; <i1> [#uses=1]
+  br i1 %exitcond, label %return, label %bb
+
+return:                                           ; preds = %bb
+  ret void
+}
+
+; CHECK: test1:
+; CHECK: 	movl	%{{.*}},   (%rdi,%rcx,4)
+; CHECK:	movl	%{{.*}},  8(%rdi,%rcx,4)
+; CHECK:	movl	%{{.*}},  4(%rdi,%rcx,4)
+; CHECK:	movl	%{{.*}}, 12(%rdi,%rcx,4)
+
+define void @test1(i32* nocapture %array, i32 %r0, i8 signext %k, i8 signext %i0) nounwind {
+bb.nph:
+  br label %for.body
+
+for.body:                                         ; preds = %for.body, %bb.nph
+  %j.065 = phi i8 [ 0, %bb.nph ], [ %inc52, %for.body ] ; <i8> [#uses=1]
+  %i0.addr.064 = phi i8 [ %i0, %bb.nph ], [ %add, %for.body ] ; <i8> [#uses=3]
+  %k.addr.163 = phi i8 [ %k, %bb.nph ], [ %inc.k.addr.1, %for.body ] ; <i8> [#uses=1]
+  %cmp5 = icmp slt i8 %i0.addr.064, 4             ; <i1> [#uses=1]
+  %cond = select i1 %cmp5, i8 %i0.addr.064, i8 0  ; <i8> [#uses=2]
+  %cmp12 = icmp eq i8 %i0.addr.064, 4             ; <i1> [#uses=1]
+  %inc = zext i1 %cmp12 to i8                     ; <i8> [#uses=1]
+  %inc.k.addr.1 = add i8 %inc, %k.addr.163        ; <i8> [#uses=2]
+  %mul = shl i8 %cond, 2                          ; <i8> [#uses=1]
+  %mul22 = shl i8 %inc.k.addr.1, 4                ; <i8> [#uses=1]
+  %add23 = add i8 %mul22, %mul                    ; <i8> [#uses=1]
+  %idxprom = zext i8 %add23 to i64                ; <i64> [#uses=4]
+  %arrayidx = getelementptr inbounds i32* %array, i64 %idxprom ; <i32*> [#uses=1]
+  store i32 %r0, i32* %arrayidx
+  %add3356 = or i64 %idxprom, 2                   ; <i64> [#uses=1]
+  %arrayidx36 = getelementptr inbounds i32* %array, i64 %add3356 ; <i32*> [#uses=1]
+  store i32 %r0, i32* %arrayidx36
+  %add4058 = or i64 %idxprom, 1                   ; <i64> [#uses=1]
+  %arrayidx43 = getelementptr inbounds i32* %array, i64 %add4058 ; <i32*> [#uses=1]
+  store i32 %r0, i32* %arrayidx43
+  %add4760 = or i64 %idxprom, 3                   ; <i64> [#uses=1]
+  %arrayidx50 = getelementptr inbounds i32* %array, i64 %add4760 ; <i32*> [#uses=1]
+  store i32 %r0, i32* %arrayidx50
+  %inc52 = add nsw i8 %j.065, 1                   ; <i8> [#uses=2]
+  %add = add i8 %cond, 1                          ; <i8> [#uses=1]
+  %exitcond = icmp eq i8 %inc52, 32               ; <i1> [#uses=1]
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:                                          ; preds = %for.body
+  ret void
+}
diff --git a/test/CodeGen/X86/pic.ll b/test/CodeGen/X86/pic.ll
index 35b172509c95..9506c9b5db11 100644
--- a/test/CodeGen/X86/pic.ll
+++ b/test/CodeGen/X86/pic.ll
@@ -4,18 +4,18 @@
 @dst = external global i32 
 @src = external global i32 
 
-define void @test1() nounwind {
+define void @test0() nounwind {
 entry:
     store i32* @dst, i32** @ptr
     %tmp.s = load i32* @src
     store i32 %tmp.s, i32* @dst
     ret void
     
-; LINUX:    test1:
-; LINUX:	call	.L1$pb
-; LINUX-NEXT: .L1$pb:
+; LINUX:    test0:
+; LINUX:	call	.L0$pb
+; LINUX-NEXT: .L0$pb:
 ; LINUX-NEXT:	popl
-; LINUX:	addl	$_GLOBAL_OFFSET_TABLE_+(.L{{.*}}-.L1$pb),
+; LINUX:	addl	$_GLOBAL_OFFSET_TABLE_+(.L{{.*}}-.L0$pb),
 ; LINUX:	movl	dst@GOT(%eax),
 ; LINUX:	movl	ptr@GOT(%eax),
 ; LINUX:	movl	src@GOT(%eax),
@@ -26,18 +26,18 @@ entry:
 @dst2 = global i32 0
 @src2 = global i32 0
 
-define void @test2() nounwind {
+define void @test1() nounwind {
 entry:
     store i32* @dst2, i32** @ptr2
     %tmp.s = load i32* @src2
     store i32 %tmp.s, i32* @dst2
     ret void
     
-; LINUX: test2:
-; LINUX:	call	.L2$pb
-; LINUX-NEXT: .L2$pb:
+; LINUX: test1:
+; LINUX:	call	.L1$pb
+; LINUX-NEXT: .L1$pb:
 ; LINUX-NEXT:	popl
-; LINUX:	addl	$_GLOBAL_OFFSET_TABLE_+(.L{{.*}}-.L2$pb), %eax
+; LINUX:	addl	$_GLOBAL_OFFSET_TABLE_+(.L{{.*}}-.L1$pb), %eax
 ; LINUX:	movl	dst2@GOT(%eax),
 ; LINUX:	movl	ptr2@GOT(%eax),
 ; LINUX:	movl	src2@GOT(%eax),
@@ -47,17 +47,17 @@ entry:
 
 declare i8* @malloc(i32)
 
-define void @test3() nounwind {
+define void @test2() nounwind {
 entry:
     %ptr = call i8* @malloc(i32 40)
     ret void
-; LINUX: test3:
+; LINUX: test2:
 ; LINUX: 	pushl	%ebx
 ; LINUX-NEXT: 	subl	$8, %esp
-; LINUX-NEXT: 	call	.L3$pb
-; LINUX-NEXT: .L3$pb:
+; LINUX-NEXT: 	call	.L2$pb
+; LINUX-NEXT: .L2$pb:
 ; LINUX-NEXT: 	popl	%ebx
-; LINUX: 	addl	$_GLOBAL_OFFSET_TABLE_+(.L{{.*}}-.L3$pb), %ebx
+; LINUX: 	addl	$_GLOBAL_OFFSET_TABLE_+(.L{{.*}}-.L2$pb), %ebx
 ; LINUX: 	movl	$40, (%esp)
 ; LINUX: 	call	malloc@PLT
 ; LINUX: 	addl	$8, %esp
@@ -67,18 +67,18 @@ entry:
 
 @pfoo = external global void(...)* 
 
-define void @test4() nounwind {
+define void @test3() nounwind {
 entry:
     %tmp = call void(...)*(...)* @afoo()
     store void(...)* %tmp, void(...)** @pfoo
     %tmp1 = load void(...)** @pfoo
     call void(...)* %tmp1()
     ret void
-; LINUX: test4:
-; LINUX: 	call	.L4$pb
-; LINUX-NEXT: .L4$pb:
+; LINUX: test3:
+; LINUX: 	call	.L3$pb
+; LINUX-NEXT: .L3$pb:
 ; LINUX: 	popl
-; LINUX: 	addl	$_GLOBAL_OFFSET_TABLE_+(.L{{.*}}-.L4$pb),
+; LINUX: 	addl	$_GLOBAL_OFFSET_TABLE_+(.L{{.*}}-.L3$pb),
 ; LINUX: 	movl	pfoo@GOT(%esi),
 ; LINUX: 	call	afoo@PLT
 ; LINUX: 	call	*
@@ -86,14 +86,14 @@ entry:
 
 declare void(...)* @afoo(...)
 
-define void @test5() nounwind {
+define void @test4() nounwind {
 entry:
     call void(...)* @foo()
     ret void
-; LINUX: test5:
-; LINUX: call	.L5$pb
+; LINUX: test4:
+; LINUX: call	.L4$pb
 ; LINUX: popl	%ebx
-; LINUX: addl	$_GLOBAL_OFFSET_TABLE_+(.L{{.*}}-.L5$pb), %ebx
+; LINUX: addl	$_GLOBAL_OFFSET_TABLE_+(.L{{.*}}-.L4$pb), %ebx
 ; LINUX: call	foo@PLT
 }
 
@@ -104,18 +104,18 @@ declare void @foo(...)
 @dst6 = internal global i32 0
 @src6 = internal global i32 0
 
-define void @test6() nounwind {
+define void @test5() nounwind {
 entry:
     store i32* @dst6, i32** @ptr6
     %tmp.s = load i32* @src6
     store i32 %tmp.s, i32* @dst6
     ret void
     
-; LINUX: test6:
-; LINUX: 	call	.L6$pb
-; LINUX-NEXT: .L6$pb:
+; LINUX: test5:
+; LINUX: 	call	.L5$pb
+; LINUX-NEXT: .L5$pb:
 ; LINUX-NEXT: 	popl	%eax
-; LINUX: 	addl	$_GLOBAL_OFFSET_TABLE_+(.L{{.*}}-.L6$pb), %eax
+; LINUX: 	addl	$_GLOBAL_OFFSET_TABLE_+(.L{{.*}}-.L5$pb), %eax
 ; LINUX: 	leal	dst6@GOTOFF(%eax), %ecx
 ; LINUX: 	movl	%ecx, ptr6@GOTOFF(%eax)
 ; LINUX: 	movl	src6@GOTOFF(%eax), %ecx
@@ -125,24 +125,24 @@ entry:
 
 
 ;; Test constant pool references.
-define double @test7(i32 %a.u) nounwind {
+define double @test6(i32 %a.u) nounwind {
 entry:
     %tmp = icmp eq i32 %a.u,0
     %retval = select i1 %tmp, double 4.561230e+02, double 1.234560e+02
     ret double %retval
 
-; LINUX: .LCPI7_0:
+; LINUX: .LCPI6_0:
 
-; LINUX: test7:
-; LINUX:    call .L7$pb
-; LINUX: .L7$pb:
-; LINUX:    addl	$_GLOBAL_OFFSET_TABLE_+(.L{{.*}}-.L7$pb), 
-; LINUX:    fldl	.LCPI7_0@GOTOFF(
+; LINUX: test6:
+; LINUX:    call .L6$pb
+; LINUX: .L6$pb:
+; LINUX:    addl	$_GLOBAL_OFFSET_TABLE_+(.L{{.*}}-.L6$pb), 
+; LINUX:    fldl	.LCPI6_0@GOTOFF(
 }
 
 
 ;; Test jump table references.
-define void @test8(i32 %n.u) nounwind {
+define void @test7(i32 %n.u) nounwind {
 entry:
     switch i32 %n.u, label %bb12 [i32 1, label %bb i32 2, label %bb6 i32 4, label %bb7 i32 5, label %bb8 i32 6, label %bb10 i32 7, label %bb1 i32 8, label %bb3 i32 9, label %bb4 i32 10, label %bb9 i32 11, label %bb2 i32 12, label %bb5 i32 13, label %bb11 ]
 bb:
@@ -185,19 +185,19 @@ bb12:
     tail call void(...)* @foo6()
     ret void
     
-; LINUX: test8:
-; LINUX:   call	.L8$pb
-; LINUX: .L8$pb:
-; LINUX:   addl	$_GLOBAL_OFFSET_TABLE_+(.L{{.*}}-.L8$pb),
-; LINUX:   addl	.LJTI8_0@GOTOFF(
+; LINUX: test7:
+; LINUX:   call	.L7$pb
+; LINUX: .L7$pb:
+; LINUX:   addl	$_GLOBAL_OFFSET_TABLE_+(.L{{.*}}-.L7$pb),
+; LINUX:   addl	.LJTI7_0@GOTOFF(
 ; LINUX:   jmpl	*
 
-; LINUX: .LJTI8_0:
-; LINUX:   .long	 .LBB8_2@GOTOFF
-; LINUX:   .long	 .LBB8_8@GOTOFF
-; LINUX:   .long	 .LBB8_14@GOTOFF
-; LINUX:   .long	 .LBB8_9@GOTOFF
-; LINUX:   .long	 .LBB8_10@GOTOFF
+; LINUX: .LJTI7_0:
+; LINUX:   .long	 .LBB7_2@GOTOFF
+; LINUX:   .long	 .LBB7_8@GOTOFF
+; LINUX:   .long	 .LBB7_14@GOTOFF
+; LINUX:   .long	 .LBB7_9@GOTOFF
+; LINUX:   .long	 .LBB7_10@GOTOFF
 }
 
 declare void @foo1(...)
diff --git a/test/CodeGen/X86/pic_jumptable.ll b/test/CodeGen/X86/pic_jumptable.ll
index b3750c1e8e67..31071bc74a78 100644
--- a/test/CodeGen/X86/pic_jumptable.ll
+++ b/test/CodeGen/X86/pic_jumptable.ll
@@ -1,13 +1,18 @@
 ; RUN: llc < %s -relocation-model=pic -mtriple=i386-linux-gnu -asm-verbose=false | not grep -F .text
-; RUN: llc < %s -relocation-model=pic -mtriple=i686-apple-darwin -asm-verbose=false | not grep lea
-; RUN: llc < %s -relocation-model=pic -mtriple=i686-apple-darwin -asm-verbose=false | grep add | count 2
+; RUN: llc < %s -relocation-model=pic -mtriple=i686-apple-darwin -asm-verbose=false | FileCheck %s
 ; RUN: llc < %s                       -mtriple=x86_64-apple-darwin | not grep 'lJTI'
 ; rdar://6971437
+; rdar://7738756
 
 declare void @_Z3bari(i32)
 
 define linkonce void @_Z3fooILi1EEvi(i32 %Y) nounwind {
 entry:
+; CHECK:       L0$pb
+; CHECK-NOT:   leal
+; CHECK:       Ltmp0 = LJTI0_0-L0$pb
+; CHECK-NEXT:  addl Ltmp0(%eax,%ecx,4)
+; CHECK-NEXT:  jmpl *%eax
 	%Y_addr = alloca i32		; <i32*> [#uses=2]
 	%"alloca point" = bitcast i32 0 to i32		; <i32> [#uses=0]
 	store i32 %Y, i32* %Y_addr
diff --git a/test/CodeGen/X86/postra-licm.ll b/test/CodeGen/X86/postra-licm.ll
new file mode 100644
index 000000000000..97cc7b4977cf
--- /dev/null
+++ b/test/CodeGen/X86/postra-licm.ll
@@ -0,0 +1,185 @@
+; RUN: llc < %s -mtriple=i386-apple-darwin -relocation-model=pic -disable-fp-elim | FileCheck %s -check-prefix=X86-32
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -relocation-model=pic -disable-fp-elim | FileCheck %s -check-prefix=X86-64
+
+; MachineLICM should be able to hoist loop invariant reload out of the loop.
+; rdar://7233099
+
+%struct.FILE = type { i8*, i32, i32, i16, i16, %struct.__sbuf, i32, i8*, i32 (i8*)*, i32 (i8*, i8*, i32)*, i64 (i8*, i64, i32)*, i32 (i8*, i8*, i32)*, %struct.__sbuf, %struct.__sFILEX*, i32, [3 x i8], [1 x i8], %struct.__sbuf, i32, i64 }
+%struct.__sFILEX = type opaque
+%struct.__sbuf = type { i8*, i32 }
+%struct.epoch_t = type { %struct.trans_t*, %struct.trans_t*, i32, i32, i32, i32, i32 }
+%struct.trans_t = type { i32, i32, i32, i8* }
+
+@.str12 = external constant [2 x i8], align 1     ; <[2 x i8]*> [#uses=1]
+@.str19 = external constant [7 x i8], align 1     ; <[7 x i8]*> [#uses=1]
+@.str24 = external constant [4 x i8], align 1     ; <[4 x i8]*> [#uses=1]
+
+define i32 @t1(i32 %c, i8** nocapture %v) nounwind ssp {
+; X86-32: t1:
+entry:
+  br i1 undef, label %bb, label %bb3
+
+bb:                                               ; preds = %entry
+  unreachable
+
+bb3:                                              ; preds = %entry
+  br i1 undef, label %bb.i, label %bb.nph41
+
+bb.i:                                             ; preds = %bb3
+  unreachable
+
+bb.nph41:                                         ; preds = %bb3
+  %0 = call %struct.FILE* @"\01_fopen$UNIX2003"(i8* undef, i8* getelementptr inbounds ([2 x i8]* @.str12, i32 0, i32 0)) nounwind ; <%struct.FILE*> [#uses=3]
+  br i1 undef, label %bb4, label %bb5.preheader
+
+bb5.preheader:                                    ; preds = %bb.nph41
+  br label %bb5
+
+bb4:                                              ; preds = %bb.nph41
+  unreachable
+
+bb5:                                              ; preds = %bb5, %bb5.preheader
+  br i1 undef, label %bb7, label %bb5
+
+bb7:                                              ; preds = %bb5
+  br i1 undef, label %bb9, label %bb12
+
+bb9:                                              ; preds = %bb7
+  unreachable
+
+bb12:                                             ; preds = %bb7
+  br i1 undef, label %bb16, label %bb22
+
+bb16:                                             ; preds = %bb12
+  unreachable
+
+bb22:                                             ; preds = %bb12
+  br label %bb.i1
+
+bb.i1:                                            ; preds = %bb.i1, %bb22
+  %1 = icmp eq i8 undef, 69                       ; <i1> [#uses=1]
+  br i1 %1, label %imix_test.exit, label %bb.i1
+
+imix_test.exit:                                   ; preds = %bb.i1
+  br i1 undef, label %bb23, label %bb26.preheader
+
+bb26.preheader:                                   ; preds = %imix_test.exit
+  br i1 undef, label %bb28, label %bb30
+
+bb23:                                             ; preds = %imix_test.exit
+  unreachable
+; X86-32: %bb26.preheader.bb28_crit_edge
+; X86-32: movl -16(%ebp),
+; X86-32-NEXT: .align 4
+; X86-32-NEXT: %bb28
+
+bb28:                                             ; preds = %bb28, %bb26.preheader
+  %counter.035 = phi i32 [ %3, %bb28 ], [ 0, %bb26.preheader ] ; <i32> [#uses=2]
+  %tmp56 = shl i32 %counter.035, 2                ; <i32> [#uses=0]
+  %2 = call i8* @fgets(i8* undef, i32 50, %struct.FILE* %0) nounwind ; <i8*> [#uses=0]
+  %3 = add nsw i32 %counter.035, 1                ; <i32> [#uses=1]
+  %4 = call i32 @feof(%struct.FILE* %0) nounwind  ; <i32> [#uses=0]
+  br label %bb28
+
+bb30:                                             ; preds = %bb26.preheader
+  %5 = call i32 @strcmp(i8* undef, i8* getelementptr inbounds ([7 x i8]* @.str19, i32 0, i32 0)) nounwind readonly ; <i32> [#uses=0]
+  br i1 undef, label %bb34, label %bb70
+
+bb32.loopexit:                                    ; preds = %bb45
+  %6 = icmp eq i32 undef, 0                       ; <i1> [#uses=1]
+  %indvar.next55 = add i32 %indvar54, 1           ; <i32> [#uses=1]
+  br i1 %6, label %bb34, label %bb70
+
+bb34:                                             ; preds = %bb32.loopexit, %bb30
+  %indvar54 = phi i32 [ %indvar.next55, %bb32.loopexit ], [ 0, %bb30 ] ; <i32> [#uses=3]
+  br i1 false, label %bb35, label %bb39.preheader
+
+bb35:                                             ; preds = %bb34
+  unreachable
+
+bb39.preheader:                                   ; preds = %bb34
+  %7 = getelementptr inbounds %struct.epoch_t* undef, i32 %indvar54, i32 3 ; <i32*> [#uses=1]
+  %8 = getelementptr inbounds %struct.epoch_t* undef, i32 %indvar54, i32 2 ; <i32*> [#uses=0]
+  br i1 false, label %bb42, label %bb45
+
+bb42:                                             ; preds = %bb39.preheader
+  unreachable
+
+bb45:                                             ; preds = %bb39.preheader
+  %9 = call i32 @strcmp(i8* undef, i8* getelementptr inbounds ([4 x i8]* @.str24, i32 0, i32 0)) nounwind readonly ; <i32> [#uses=0]
+  br i1 false, label %bb47, label %bb32.loopexit
+
+bb47:                                             ; preds = %bb45
+  %10 = load i32* %7, align 4                     ; <i32> [#uses=0]
+  unreachable
+
+bb70:                                             ; preds = %bb32.loopexit, %bb30
+  br i1 undef, label %bb78, label %bb76
+
+bb76:                                             ; preds = %bb70
+  unreachable
+
+bb78:                                             ; preds = %bb70
+  br i1 undef, label %bb83, label %bb79
+
+bb79:                                             ; preds = %bb78
+  unreachable
+
+bb83:                                             ; preds = %bb78
+  call void @rewind(%struct.FILE* %0) nounwind
+  unreachable
+}
+
+declare %struct.FILE* @"\01_fopen$UNIX2003"(i8*, i8*)
+
+declare i8* @fgets(i8*, i32, %struct.FILE* nocapture) nounwind
+
+declare void @rewind(%struct.FILE* nocapture) nounwind
+
+declare i32 @feof(%struct.FILE* nocapture) nounwind
+
+declare i32 @strcmp(i8* nocapture, i8* nocapture) nounwind readonly
+
+@map_4_to_16 = external constant [16 x i16], align 32 ; <[16 x i16]*> [#uses=2]
+
+define void @t2(i8* nocapture %bufp, i8* nocapture %data, i32 %dsize) nounwind ssp {
+; X86-64: t2:
+entry:
+  br i1 undef, label %return, label %bb.nph
+
+bb.nph:                                           ; preds = %entry
+; X86-64: movq _map_4_to_16@GOTPCREL(%rip)
+; X86-64: .align 4
+  %tmp5 = zext i32 undef to i64                   ; <i64> [#uses=1]
+  %tmp6 = add i64 %tmp5, 1                        ; <i64> [#uses=1]
+  %tmp11 = shl i64 undef, 1                       ; <i64> [#uses=1]
+  %tmp14 = mul i64 undef, 3                       ; <i64> [#uses=1]
+  br label %bb
+
+bb:                                               ; preds = %bb, %bb.nph
+  %tmp9 = mul i64 undef, undef                    ; <i64> [#uses=2]
+  %tmp12 = add i64 %tmp11, %tmp9                  ; <i64> [#uses=1]
+  %scevgep13 = getelementptr i8* %bufp, i64 %tmp12 ; <i8*> [#uses=1]
+  %tmp15 = add i64 %tmp14, %tmp9                  ; <i64> [#uses=1]
+  %scevgep16 = getelementptr i8* %bufp, i64 %tmp15 ; <i8*> [#uses=1]
+  %0 = load i8* undef, align 1                    ; <i8> [#uses=1]
+  %1 = zext i8 %0 to i32                          ; <i32> [#uses=1]
+  %2 = getelementptr inbounds [16 x i16]* @map_4_to_16, i64 0, i64 0 ; <i16*> [#uses=1]
+  %3 = load i16* %2, align 2                      ; <i16> [#uses=1]
+  %4 = trunc i16 %3 to i8                         ; <i8> [#uses=1]
+  store i8 %4, i8* undef, align 1
+  %5 = and i32 %1, 15                             ; <i32> [#uses=1]
+  %6 = zext i32 %5 to i64                         ; <i64> [#uses=1]
+  %7 = getelementptr inbounds [16 x i16]* @map_4_to_16, i64 0, i64 %6 ; <i16*> [#uses=1]
+  %8 = load i16* %7, align 2                      ; <i16> [#uses=2]
+  %9 = lshr i16 %8, 8                             ; <i16> [#uses=1]
+  %10 = trunc i16 %9 to i8                        ; <i8> [#uses=1]
+  store i8 %10, i8* %scevgep13, align 1
+  %11 = trunc i16 %8 to i8                        ; <i8> [#uses=1]
+  store i8 %11, i8* %scevgep16, align 1
+  %exitcond = icmp eq i64 undef, %tmp6            ; <i1> [#uses=1]
+  br i1 %exitcond, label %return, label %bb
+
+return:                                           ; preds = %bb, %entry
+  ret void
+}
diff --git a/test/CodeGen/X86/promote-i16.ll b/test/CodeGen/X86/promote-i16.ll
new file mode 100644
index 000000000000..101bb29593cc
--- /dev/null
+++ b/test/CodeGen/X86/promote-i16.ll
@@ -0,0 +1,11 @@
+; RUN: llc < %s -march=x86 | FileCheck %s
+
+define signext i16 @foo(i16 signext %x) nounwind {
+entry:
+; CHECK: foo:
+; CHECK: movzwl 4(%esp), %eax
+; CHECK: xorl $21998, %eax
+; CHECK: movswl %ax, %eax
+  %0 = xor i16 %x, 21998
+  ret i16 %0
+}
diff --git a/test/CodeGen/X86/rot16.ll b/test/CodeGen/X86/rot16.ll
index 42ece47b0300..de23dcb78f10 100644
--- a/test/CodeGen/X86/rot16.ll
+++ b/test/CodeGen/X86/rot16.ll
@@ -1,11 +1,9 @@
-; RUN: llc < %s -march=x86 > %t
-; RUN: grep rol %t | count 3
-; RUN: grep ror %t | count 1
-; RUN: grep shld %t | count 2
-; RUN: grep shrd %t | count 2
+; RUN: llc < %s -march=x86 | FileCheck %s
 
 define i16 @foo(i16 %x, i16 %y, i16 %z) nounwind readnone {
 entry:
+; CHECK: foo:
+; CHECK: rolw %cl
 	%0 = shl i16 %x, %z
 	%1 = sub i16 16, %z
 	%2 = lshr i16 %x, %1
@@ -15,6 +13,8 @@ entry:
 
 define i16 @bar(i16 %x, i16 %y, i16 %z) nounwind readnone {
 entry:
+; CHECK: bar:
+; CHECK: shldw %cl
 	%0 = shl i16 %y, %z
 	%1 = sub i16 16, %z
 	%2 = lshr i16 %x, %1
@@ -24,6 +24,8 @@ entry:
 
 define i16 @un(i16 %x, i16 %y, i16 %z) nounwind readnone {
 entry:
+; CHECK: un:
+; CHECK: rorw %cl
 	%0 = lshr i16 %x, %z
 	%1 = sub i16 16, %z
 	%2 = shl i16 %x, %1
@@ -33,6 +35,8 @@ entry:
 
 define i16 @bu(i16 %x, i16 %y, i16 %z) nounwind readnone {
 entry:
+; CHECK: bu:
+; CHECK: shrdw
 	%0 = lshr i16 %y, %z
 	%1 = sub i16 16, %z
 	%2 = shl i16 %x, %1
@@ -42,6 +46,8 @@ entry:
 
 define i16 @xfoo(i16 %x, i16 %y, i16 %z) nounwind readnone {
 entry:
+; CHECK: xfoo:
+; CHECK: rolw $5
 	%0 = lshr i16 %x, 11
 	%1 = shl i16 %x, 5
 	%2 = or i16 %0, %1
@@ -50,6 +56,8 @@ entry:
 
 define i16 @xbar(i16 %x, i16 %y, i16 %z) nounwind readnone {
 entry:
+; CHECK: xbar:
+; CHECK: shldw $5
 	%0 = shl i16 %y, 5
 	%1 = lshr i16 %x, 11
 	%2 = or i16 %0, %1
@@ -58,6 +66,8 @@ entry:
 
 define i16 @xun(i16 %x, i16 %y, i16 %z) nounwind readnone {
 entry:
+; CHECK: xun:
+; CHECK: rolw $11
 	%0 = lshr i16 %x, 5
 	%1 = shl i16 %x, 11
 	%2 = or i16 %0, %1
@@ -66,6 +76,8 @@ entry:
 
 define i16 @xbu(i16 %x, i16 %y, i16 %z) nounwind readnone {
 entry:
+; CHECK: xbu:
+; CHECK: shldw $11
 	%0 = lshr i16 %y, 5
 	%1 = shl i16 %x, 11
 	%2 = or i16 %0, %1
diff --git a/test/CodeGen/X86/rot32.ll b/test/CodeGen/X86/rot32.ll
index 655ed272837a..99602fd64ff5 100644
--- a/test/CodeGen/X86/rot32.ll
+++ b/test/CodeGen/X86/rot32.ll
@@ -1,11 +1,9 @@
-; RUN: llc < %s -march=x86 > %t
-; RUN: grep rol %t | count 3
-; RUN: grep ror %t | count 1
-; RUN: grep shld %t | count 2
-; RUN: grep shrd %t | count 2
+; RUN: llc < %s -march=x86 | FileCheck %s
 
 define i32 @foo(i32 %x, i32 %y, i32 %z) nounwind readnone {
 entry:
+; CHECK: foo:
+; CHECK: roll %cl
 	%0 = shl i32 %x, %z
 	%1 = sub i32 32, %z
 	%2 = lshr i32 %x, %1
@@ -15,6 +13,8 @@ entry:
 
 define i32 @bar(i32 %x, i32 %y, i32 %z) nounwind readnone {
 entry:
+; CHECK: bar:
+; CHECK: shldl %cl
 	%0 = shl i32 %y, %z
 	%1 = sub i32 32, %z
 	%2 = lshr i32 %x, %1
@@ -24,6 +24,8 @@ entry:
 
 define i32 @un(i32 %x, i32 %y, i32 %z) nounwind readnone {
 entry:
+; CHECK: un:
+; CHECK: rorl %cl
 	%0 = lshr i32 %x, %z
 	%1 = sub i32 32, %z
 	%2 = shl i32 %x, %1
@@ -33,6 +35,8 @@ entry:
 
 define i32 @bu(i32 %x, i32 %y, i32 %z) nounwind readnone {
 entry:
+; CHECK: bu:
+; CHECK: shrdl %cl
 	%0 = lshr i32 %y, %z
 	%1 = sub i32 32, %z
 	%2 = shl i32 %x, %1
@@ -42,6 +46,8 @@ entry:
 
 define i32 @xfoo(i32 %x, i32 %y, i32 %z) nounwind readnone {
 entry:
+; CHECK: xfoo:
+; CHECK: roll $7
 	%0 = lshr i32 %x, 25
 	%1 = shl i32 %x, 7
 	%2 = or i32 %0, %1
@@ -50,6 +56,8 @@ entry:
 
 define i32 @xbar(i32 %x, i32 %y, i32 %z) nounwind readnone {
 entry:
+; CHECK: xbar:
+; CHECK: shldl $7
 	%0 = shl i32 %y, 7
 	%1 = lshr i32 %x, 25
 	%2 = or i32 %0, %1
@@ -58,6 +66,8 @@ entry:
 
 define i32 @xun(i32 %x, i32 %y, i32 %z) nounwind readnone {
 entry:
+; CHECK: xun:
+; CHECK: roll $25
 	%0 = lshr i32 %x, 7
 	%1 = shl i32 %x, 25
 	%2 = or i32 %0, %1
@@ -66,6 +76,8 @@ entry:
 
 define i32 @xbu(i32 %x, i32 %y, i32 %z) nounwind readnone {
 entry:
+; CHECK: xbu:
+; CHECK: shldl
 	%0 = lshr i32 %y, 7
 	%1 = shl i32 %x, 25
 	%2 = or i32 %0, %1
diff --git a/test/CodeGen/X86/shl_elim.ll b/test/CodeGen/X86/shl_elim.ll
index 445889166bd5..0827221875b1 100644
--- a/test/CodeGen/X86/shl_elim.ll
+++ b/test/CodeGen/X86/shl_elim.ll
@@ -2,7 +2,7 @@
 ; RUN: llc < %s -march=x86 | grep {shrl	.eax}
 ; RUN: llc < %s -march=x86 | grep {movswl	.ax, .eax}
 
-define i32 @test1(i64 %a) {
+define i32 @test1(i64 %a) nounwind {
         %tmp29 = lshr i64 %a, 24                ; <i64> [#uses=1]
         %tmp23 = trunc i64 %tmp29 to i32                ; <i32> [#uses=1]
         %tmp410 = lshr i32 %tmp23, 9            ; <i32> [#uses=1]
diff --git a/test/CodeGen/X86/sibcall-2.ll b/test/CodeGen/X86/sibcall-2.ll
new file mode 100644
index 000000000000..f8a746563b51
--- /dev/null
+++ b/test/CodeGen/X86/sibcall-2.ll
@@ -0,0 +1,52 @@
+; RUN: llc < %s -mtriple=i386-apple-darwin   -disable-fp-elim | FileCheck %s -check-prefix=32
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -disable-fp-elim | FileCheck %s -check-prefix=64
+
+; Tail call should not use ebp / rbp after it's popped. Use esp / rsp.
+
+define void @t1(i8* nocapture %value) nounwind {
+entry:
+; 32: t1:
+; 32: jmpl *4(%esp)
+
+; 64: t1:
+; 64: jmpq *%rdi
+  %0 = bitcast i8* %value to void ()*
+  tail call void %0() nounwind
+  ret void
+}
+
+define void @t2(i32 %a, i8* nocapture %value) nounwind {
+entry:
+; 32: t2:
+; 32: jmpl *8(%esp)
+
+; 64: t2:
+; 64: jmpq *%rsi
+  %0 = bitcast i8* %value to void ()*
+  tail call void %0() nounwind
+  ret void
+}
+
+define void @t3(i32 %a, i32 %b, i32 %c, i32 %d, i32 %e, i32 %f, i8* nocapture %value) nounwind {
+entry:
+; 32: t3:
+; 32: jmpl *28(%esp)
+
+; 64: t3:
+; 64: jmpq *8(%rsp)
+  %0 = bitcast i8* %value to void ()*
+  tail call void %0() nounwind
+  ret void
+}
+
+define void @t4(i32 %a, i32 %b, i32 %c, i32 %d, i32 %e, i32 %f, i32 %g, i8* nocapture %value) nounwind {
+entry:
+; 32: t4:
+; 32: jmpl *32(%esp)
+
+; 64: t4:
+; 64: jmpq *16(%rsp)
+  %0 = bitcast i8* %value to void ()*
+  tail call void %0() nounwind
+  ret void
+}
diff --git a/test/CodeGen/X86/sibcall.ll b/test/CodeGen/X86/sibcall.ll
index 8e52a7cbfe78..4b27f2edb759 100644
--- a/test/CodeGen/X86/sibcall.ll
+++ b/test/CodeGen/X86/sibcall.ll
@@ -1,5 +1,5 @@
-; RUN: llc < %s -march=x86    -asm-verbose=false | FileCheck %s -check-prefix=32
-; RUN: llc < %s -march=x86-64 -asm-verbose=false | FileCheck %s -check-prefix=64
+; RUN: llc < %s -march=x86    -mattr=+sse2 -asm-verbose=false | FileCheck %s -check-prefix=32
+; RUN: llc < %s -march=x86-64 -mattr=+sse2 -asm-verbose=false | FileCheck %s -check-prefix=64
 
 define void @t1(i32 %x) nounwind ssp {
 entry:
@@ -313,3 +313,21 @@ entry:
 }
 
 declare void @foo()
+
+; If caller / callee calling convention mismatch then check if the return
+; values are returned in the same registers.
+; rdar://7874780
+
+define double @t20(double %x) nounwind {
+entry:
+; 32: t20:
+; 32: call {{_?}}foo20
+; 32: fldl (%esp)
+
+; 64: t20:
+; 64: jmp {{_?}}foo20
+  %0 = tail call fastcc double @foo20(double %x) nounwind
+  ret double %0
+}
+
+declare fastcc double @foo20(double) nounwind
diff --git a/test/CodeGen/X86/sink-hoist.ll b/test/CodeGen/X86/sink-hoist.ll
index 01d73736d6c2..031c01e9af7d 100644
--- a/test/CodeGen/X86/sink-hoist.ll
+++ b/test/CodeGen/X86/sink-hoist.ll
@@ -61,9 +61,9 @@ entry:
 ; Codegen should hoist and CSE these constants.
 
 ; CHECK: vv:
-; CHECK: LCPI4_0(%rip), %xmm0
-; CHECK: LCPI4_1(%rip), %xmm1
-; CHECK: LCPI4_2(%rip), %xmm2
+; CHECK: LCPI3_0(%rip), %xmm0
+; CHECK: LCPI3_1(%rip), %xmm1
+; CHECK: LCPI3_2(%rip), %xmm2
 ; CHECK: align
 ; CHECK-NOT: LCPI
 ; CHECK: ret
diff --git a/test/CodeGen/X86/sse41.ll b/test/CodeGen/X86/sse41.ll
index a734c05b8686..ef66d1a44a18 100644
--- a/test/CodeGen/X86/sse41.ll
+++ b/test/CodeGen/X86/sse41.ll
@@ -123,11 +123,11 @@ define float @ext_1(<4 x float> %v) nounwind {
 
 ; X32: _ext_1:
 ; X32:	  pshufd	$3, %xmm0, %xmm0
-; X32:	  addss	LCPI8_0, %xmm0
+; X32:	  addss	LCPI7_0, %xmm0
 
 ; X64: _ext_1:
 ; X64:	  pshufd	$3, %xmm0, %xmm0
-; X64:	  addss	LCPI8_0(%rip), %xmm0
+; X64:	  addss	LCPI7_0(%rip), %xmm0
 }
 define float @ext_2(<4 x float> %v) nounwind {
   %s = extractelement <4 x float> %v, i32 3
diff --git a/test/CodeGen/X86/stack-align.ll b/test/CodeGen/X86/stack-align.ll
index e971ef70dbd4..271ad1aad0ba 100644
--- a/test/CodeGen/X86/stack-align.ll
+++ b/test/CodeGen/X86/stack-align.ll
@@ -31,7 +31,7 @@ define <2 x double> @test3(<2 x double> %x, <2 x double> %y) alignstack(32) {
 entry:
     ; CHECK: andl{{.*}}$-32, %esp
     call void @test2()
-    %A = mul <2 x double> %x, %y
+    %A = fmul <2 x double> %x, %y
     ret <2 x double> %A
 }
 
diff --git a/test/CodeGen/X86/stack-color-with-reg.ll b/test/CodeGen/X86/stack-color-with-reg.ll
index 83f56c10bb75..001a54096408 100644
--- a/test/CodeGen/X86/stack-color-with-reg.ll
+++ b/test/CodeGen/X86/stack-color-with-reg.ll
@@ -1,6 +1,6 @@
 ; RUN: llc < %s -mtriple=x86_64-apple-darwin10 -relocation-model=pic -disable-fp-elim -color-ss-with-regs -stats -info-output-file - > %t
-; RUN:   grep asm-printer %t | grep 156
-; RUN:   grep stackcoloring %t | grep "stack slot refs replaced with reg refs"  | grep 4
+; RUN:   grep asm-printer %t | grep 166
+; RUN:   grep stackcoloring %t | grep "stack slot refs replaced with reg refs"  | grep 5
 
 	type { [62 x %struct.Bitvec*] }		; type %0
 	type { i8* }		; type %1
diff --git a/test/CodeGen/X86/store-narrow.ll b/test/CodeGen/X86/store-narrow.ll
new file mode 100644
index 000000000000..b1100fa960c0
--- /dev/null
+++ b/test/CodeGen/X86/store-narrow.ll
@@ -0,0 +1,127 @@
+; rdar://7860110
+; RUN: llc < %s | FileCheck %s -check-prefix=X64
+; RUN: llc -march=x86 < %s | FileCheck %s -check-prefix=X32
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
+target triple = "x86_64-apple-darwin10.2"
+
+define void @test1(i32* nocapture %a0, i8 zeroext %a1) nounwind ssp {
+entry:
+  %A = load i32* %a0, align 4
+  %B = and i32 %A, -256     ; 0xFFFFFF00
+  %C = zext i8 %a1 to i32
+  %D = or i32 %C, %B
+  store i32 %D, i32* %a0, align 4
+  ret void
+  
+; X64: test1:
+; X64: movb	%sil, (%rdi)
+
+; X32: test1:
+; X32: movb	8(%esp), %al
+; X32: movb	%al, (%{{.*}})
+}
+
+define void @test2(i32* nocapture %a0, i8 zeroext %a1) nounwind ssp {
+entry:
+  %A = load i32* %a0, align 4
+  %B = and i32 %A, -65281    ; 0xFFFF00FF
+  %C = zext i8 %a1 to i32
+  %CS = shl i32 %C, 8
+  %D = or i32 %B, %CS
+  store i32 %D, i32* %a0, align 4
+  ret void
+; X64: test2:
+; X64: movb	%sil, 1(%rdi)
+
+; X32: test2:
+; X32: movb	8(%esp), %al
+; X32: movb	%al, 1(%{{.*}})
+}
+
+define void @test3(i32* nocapture %a0, i16 zeroext %a1) nounwind ssp {
+entry:
+  %A = load i32* %a0, align 4
+  %B = and i32 %A, -65536    ; 0xFFFF0000
+  %C = zext i16 %a1 to i32
+  %D = or i32 %B, %C
+  store i32 %D, i32* %a0, align 4
+  ret void
+; X64: test3:
+; X64: movw	%si, (%rdi)
+
+; X32: test3:
+; X32: movw	8(%esp), %ax
+; X32: movw	%ax, (%{{.*}})
+}
+
+define void @test4(i32* nocapture %a0, i16 zeroext %a1) nounwind ssp {
+entry:
+  %A = load i32* %a0, align 4
+  %B = and i32 %A, 65535    ; 0x0000FFFF
+  %C = zext i16 %a1 to i32
+  %CS = shl i32 %C, 16
+  %D = or i32 %B, %CS
+  store i32 %D, i32* %a0, align 4
+  ret void
+; X64: test4:
+; X64: movw	%si, 2(%rdi)
+
+; X32: test4:
+; X32: movzwl	8(%esp), %eax
+; X32: movw	%ax, 2(%{{.*}})
+}
+
+define void @test5(i64* nocapture %a0, i16 zeroext %a1) nounwind ssp {
+entry:
+  %A = load i64* %a0, align 4
+  %B = and i64 %A, -4294901761    ; 0xFFFFFFFF0000FFFF
+  %C = zext i16 %a1 to i64
+  %CS = shl i64 %C, 16
+  %D = or i64 %B, %CS
+  store i64 %D, i64* %a0, align 4
+  ret void
+; X64: test5:
+; X64: movw	%si, 2(%rdi)
+
+; X32: test5:
+; X32: movzwl	8(%esp), %eax
+; X32: movw	%ax, 2(%{{.*}})
+}
+
+define void @test6(i64* nocapture %a0, i8 zeroext %a1) nounwind ssp {
+entry:
+  %A = load i64* %a0, align 4
+  %B = and i64 %A, -280375465082881    ; 0xFFFF00FFFFFFFFFF
+  %C = zext i8 %a1 to i64
+  %CS = shl i64 %C, 40
+  %D = or i64 %B, %CS
+  store i64 %D, i64* %a0, align 4
+  ret void
+; X64: test6:
+; X64: movb	%sil, 5(%rdi)
+
+
+; X32: test6:
+; X32: movb	8(%esp), %al
+; X32: movb	%al, 5(%{{.*}})
+}
+
+define i32 @test7(i64* nocapture %a0, i8 zeroext %a1, i32* %P2) nounwind {
+entry:
+  %OtherLoad = load i32 *%P2
+  %A = load i64* %a0, align 4
+  %B = and i64 %A, -280375465082881    ; 0xFFFF00FFFFFFFFFF
+  %C = zext i8 %a1 to i64
+  %CS = shl i64 %C, 40
+  %D = or i64 %B, %CS
+  store i64 %D, i64* %a0, align 4
+  ret i32 %OtherLoad
+; X64: test7:
+; X64: movb	%sil, 5(%rdi)
+
+
+; X32: test7:
+; X32: movb	8(%esp), %cl
+; X32: movb	%cl, 5(%{{.*}})
+}
+
diff --git a/test/CodeGen/X86/tail-opts.ll b/test/CodeGen/X86/tail-opts.ll
index 7b21e1bd7209..9662ad6cd740 100644
--- a/test/CodeGen/X86/tail-opts.ll
+++ b/test/CodeGen/X86/tail-opts.ll
@@ -110,16 +110,16 @@ altret:
 ; CHECK: dont_merge_oddly:
 ; CHECK-NOT:   ret
 ; CHECK:        ucomiss %xmm1, %xmm2
-; CHECK-NEXT:   jbe .LBB3_3
+; CHECK-NEXT:   jbe .LBB2_3
 ; CHECK-NEXT:   ucomiss %xmm0, %xmm1
-; CHECK-NEXT:   ja .LBB3_4
-; CHECK-NEXT: .LBB3_2:
+; CHECK-NEXT:   ja .LBB2_4
+; CHECK-NEXT: .LBB2_2:
 ; CHECK-NEXT:   movb $1, %al
 ; CHECK-NEXT:   ret
-; CHECK-NEXT: .LBB3_3:
+; CHECK-NEXT: .LBB2_3:
 ; CHECK-NEXT:   ucomiss %xmm0, %xmm2
-; CHECK-NEXT:   jbe .LBB3_2
-; CHECK-NEXT: .LBB3_4:
+; CHECK-NEXT:   jbe .LBB2_2
+; CHECK-NEXT: .LBB2_4:
 ; CHECK-NEXT:   xorb %al, %al
 ; CHECK-NEXT:   ret
 
@@ -153,19 +153,19 @@ bb30:
 ; an unconditional jump to complete a two-way conditional branch.
 
 ; CHECK: c_expand_expr_stmt:
-; CHECK:        jmp .LBB4_7
-; CHECK-NEXT: .LBB4_12:
+; CHECK:        jmp .LBB3_7
+; CHECK-NEXT: .LBB3_12:
 ; CHECK-NEXT:   movq 8(%rax), %rax
 ; CHECK-NEXT:   movb 16(%rax), %al
 ; CHECK-NEXT:   cmpb $16, %al
-; CHECK-NEXT:   je .LBB4_6
+; CHECK-NEXT:   je .LBB3_6
 ; CHECK-NEXT:   cmpb $23, %al
-; CHECK-NEXT:   je .LBB4_6
-; CHECK-NEXT:   jmp .LBB4_15
-; CHECK-NEXT: .LBB4_14:
+; CHECK-NEXT:   je .LBB3_6
+; CHECK-NEXT:   jmp .LBB3_15
+; CHECK-NEXT: .LBB3_14:
 ; CHECK-NEXT:   cmpb $23, %bl
-; CHECK-NEXT:   jne .LBB4_15
-; CHECK-NEXT: .LBB4_15:
+; CHECK-NEXT:   jne .LBB3_15
+; CHECK-NEXT: .LBB3_15:
 
 %0 = type { %struct.rtx_def* }
 %struct.lang_decl = type opaque
@@ -275,7 +275,7 @@ declare fastcc %union.tree_node* @default_conversion(%union.tree_node*) nounwind
 
 ; CHECK: foo:
 ; CHECK:        callq func
-; CHECK-NEXT: .LBB5_2:
+; CHECK-NEXT: .LBB4_2:
 ; CHECK-NEXT:   addq $8, %rsp
 ; CHECK-NEXT:   ret
 
@@ -406,3 +406,26 @@ bb12:
 return:
   ret void
 }
+
+; Tail-merging should merge the two ret instructions since one side
+; can fall-through into the ret and the other side has to branch anyway.
+
+; CHECK: TESTE:
+; CHECK: imulq
+; CHECK-NEXT: LBB8_2:
+; CHECK-NEXT: ret
+
+define i64 @TESTE(i64 %parami, i64 %paraml) nounwind readnone {
+entry:
+  %cmp = icmp slt i64 %parami, 1                  ; <i1> [#uses=1]
+  %varx.0 = select i1 %cmp, i64 1, i64 %parami    ; <i64> [#uses=1]
+  %cmp410 = icmp slt i64 %paraml, 1               ; <i1> [#uses=1]
+  br i1 %cmp410, label %for.end, label %bb.nph
+
+bb.nph:                                           ; preds = %entry
+  %tmp15 = mul i64 %paraml, %parami                   ; <i64> [#uses=1]
+  ret i64 %tmp15
+
+for.end:                                          ; preds = %entry
+  ret i64 %varx.0
+}
diff --git a/test/CodeGen/X86/tls11.ll b/test/CodeGen/X86/tls11.ll
index a2c1a1f75deb..514a168c5387 100644
--- a/test/CodeGen/X86/tls11.ll
+++ b/test/CodeGen/X86/tls11.ll
@@ -1,7 +1,7 @@
 ; RUN: llc < %s -march=x86 -mtriple=i386-linux-gnu > %t
-; RUN: grep {movw	%gs:i@NTPOFF, %ax} %t
+; RUN: grep {movzwl	%gs:i@NTPOFF, %eax} %t
 ; RUN: llc < %s -march=x86-64 -mtriple=x86_64-linux-gnu > %t2
-; RUN: grep {movw	%fs:i@TPOFF, %ax} %t2
+; RUN: grep {movzwl	%fs:i@TPOFF, %eax} %t2
 
 @i = thread_local global i16 15
 
diff --git a/test/CodeGen/X86/unaligned-load.ll b/test/CodeGen/X86/unaligned-load.ll
index 47b7896bb8f1..a99af0605b12 100644
--- a/test/CodeGen/X86/unaligned-load.ll
+++ b/test/CodeGen/X86/unaligned-load.ll
@@ -13,9 +13,7 @@ entry:
 bb:
   %String2Loc9 = getelementptr inbounds [31 x i8]* %String2Loc, i64 0, i64 0
   call void @llvm.memcpy.i64(i8* %String2Loc9, i8* getelementptr inbounds ([31 x i8]* @.str3, i64 0, i64 0), i64 31, i32 1)
-; I386: movsd _.str3+16
-; I386: movsd _.str3+8
-; I386: movsd _.str3
+; I386: call {{_?}}memcpy
 
 ; CORE2: movabsq
 ; CORE2: movabsq
@@ -30,8 +28,9 @@ return:
 
 declare void @llvm.memcpy.i64(i8* nocapture, i8* nocapture, i64, i32) nounwind
 
-; CORE2: .align  3
+; CORE2: .section
+; CORE2: .align  4
 ; CORE2-NEXT: _.str1:
 ; CORE2-NEXT: .asciz "DHRYSTONE PROGRAM, SOME STRING"
-; CORE2: .align 3
+; CORE2: .align 4
 ; CORE2-NEXT: _.str3:
diff --git a/test/CodeGen/X86/vec_shuffle-36.ll b/test/CodeGen/X86/vec_shuffle-36.ll
index 1ea37c881e71..8090afc7434d 100644
--- a/test/CodeGen/X86/vec_shuffle-36.ll
+++ b/test/CodeGen/X86/vec_shuffle-36.ll
@@ -13,4 +13,4 @@ define <8 x i16> @shuf7(<8 x i16> %t0) {
 ; CHECK: pshufd
   %tmp10 = shufflevector <8 x i16> %t0, <8 x i16> undef, <8 x i32> < i32 undef, i32 2, i32 2, i32 2, i32 2, i32 2, i32 undef, i32 undef >
   ret <8 x i16> %tmp10
-}
-\ No newline at end of file
+}
diff --git a/test/CodeGen/X86/vec_ss_load_fold.ll b/test/CodeGen/X86/vec_ss_load_fold.ll
index c8b2927b71f3..3bd3f7b60b3b 100644
--- a/test/CodeGen/X86/vec_ss_load_fold.ll
+++ b/test/CodeGen/X86/vec_ss_load_fold.ll
@@ -16,9 +16,9 @@ define i16 @test1(float %f) nounwind {
 	%tmp69 = trunc i32 %tmp.upgrd.1 to i16		; <i16> [#uses=1]
 	ret i16 %tmp69
 ; CHECK: test1:
-; CHECK: subss	LCPI1_
-; CHECK: mulss	LCPI1_
-; CHECK: minss	LCPI1_
+; CHECK: subss	LCPI0_
+; CHECK: mulss	LCPI0_
+; CHECK: minss	LCPI0_
 }
 
 define i16 @test2(float %f) nounwind {
@@ -31,9 +31,9 @@ define i16 @test2(float %f) nounwind {
 	%tmp69 = trunc i32 %tmp to i16		; <i16> [#uses=1]
 	ret i16 %tmp69
 ; CHECK: test2:
-; CHECK: addss	LCPI2_
-; CHECK: mulss	LCPI2_
-; CHECK: minss	LCPI2_
+; CHECK: addss	LCPI1_
+; CHECK: mulss	LCPI1_
+; CHECK: minss	LCPI1_
 }
 
 declare <4 x float> @llvm.x86.sse.sub.ss(<4 x float>, <4 x float>)
diff --git a/test/CodeGen/X86/widen_load-2.ll b/test/CodeGen/X86/widen_load-2.ll
index 658c05143e84..551704c498fa 100644
--- a/test/CodeGen/X86/widen_load-2.ll
+++ b/test/CodeGen/X86/widen_load-2.ll
@@ -24,10 +24,10 @@ define void @add3i32_2(%i32vec3*  sret %ret, %i32vec3* %ap, %i32vec3* %bp)  {
 ; CHECK: paddd
 ; CHECK: pextrd
 ; CHECK: movq
-	%a = load %i32vec3* %ap
-	%b = load %i32vec3* %bp
+	%a = load %i32vec3* %ap, align 8
+	%b = load %i32vec3* %bp, align 8
 	%x = add %i32vec3 %a, %b
-	store %i32vec3 %x, %i32vec3* %ret
+	store %i32vec3 %x, %i32vec3* %ret, align 8
 	ret void
 }
 
diff --git a/test/CodeGen/X86/xor.ll b/test/CodeGen/X86/xor.ll
index f270d9d56e21..6c623cb15538 100644
--- a/test/CodeGen/X86/xor.ll
+++ b/test/CodeGen/X86/xor.ll
@@ -80,11 +80,11 @@ bb:
 bb12:
 	ret i16 %tmp3
 ; X64: test5:
-; X64:    notw	[[REG:%[a-z]+]]
-; X64:    andw	{{.*}}[[REG]]
+; X64:    notl	[[REG:%[a-z]+]]
+; X64:    andl	{{.*}}[[REG]]
 ; X32: test5:
-; X32:    notw	[[REG:%[a-z]+]]
-; X32:    andw	{{.*}}[[REG]]
+; X32:    notl	[[REG:%[a-z]+]]
+; X32:    andl	{{.*}}[[REG]]
 }
 
 define i8 @test6(i8 %a, i8 %b) nounwind  {
diff --git a/test/CodeGen/XCore/2010-04-07-DbgValueOtherTargets.ll b/test/CodeGen/XCore/2010-04-07-DbgValueOtherTargets.ll
new file mode 100644
index 000000000000..f24e1d1851b4
--- /dev/null
+++ b/test/CodeGen/XCore/2010-04-07-DbgValueOtherTargets.ll
@@ -0,0 +1,33 @@
+; RUN: llc -O0 -march=xcore -asm-verbose < %s | FileCheck %s
+; Check that DEBUG_VALUE comments come through on a variety of targets.
+
+%tart.reflect.ComplexType = type { double, double }
+
+@.type.SwitchStmtTest = constant %tart.reflect.ComplexType { double 3.0, double 2.0 }
+
+define i32 @"main(tart.core.String[])->int32"(i32 %args) {
+entry:
+; CHECK: DEBUG_VALUE
+  tail call void @llvm.dbg.value(metadata !14, i64 0, metadata !8)
+  tail call void @"tart.reflect.ComplexType.create->tart.core.Object"(%tart.reflect.ComplexType* @.type.SwitchStmtTest) ; <%tart.core.Object*> [#uses=2]
+  ret i32 3
+}
+
+declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
+declare void @"tart.reflect.ComplexType.create->tart.core.Object"(%tart.reflect.ComplexType*) nounwind readnone
+
+!0 = metadata !{i32 458769, i32 0, i32 1, metadata !"sm.c", metadata !"/Volumes/MacOS9/tests/", metadata !"4.2.1 (Based on Apple Inc. build 5658) (LLVM build)", i1 true, i1 false, metadata !"", i32 0} ; [ DW_TAG_compile_unit ]
+!1 = metadata !{i32 458790, metadata !0, metadata !"", metadata !0, i32 0, i64 192, i64 64, i64 0, i32 0, metadata !2} ; [ DW_TAG_const_type ]
+!2 = metadata !{i32 458771, metadata !0, metadata !"C", metadata !0, i32 1, i64 192, i64 64, i64 0, i32 0, null, metadata !3, i32 0, null} ; [ DW_TAG_structure_type ]
+!3 = metadata !{metadata !4, metadata !6, metadata !7}
+!4 = metadata !{i32 458765, metadata !2, metadata !"x", metadata !0, i32 1, i64 64, i64 64, i64 0, i32 0, metadata !5} ; [ DW_TAG_member ]
+!5 = metadata !{i32 458788, metadata !0, metadata !"double", metadata !0, i32 0, i64 64, i64 64, i64 0, i32 0, i32 4} ; [ DW_TAG_base_type ]
+!6 = metadata !{i32 458765, metadata !2, metadata !"y", metadata !0, i32 1, i64 64, i64 64, i64 64, i32 0, metadata !5} ; [ DW_TAG_member ]
+!7 = metadata !{i32 458765, metadata !2, metadata !"z", metadata !0, i32 1, i64 64, i64 64, i64 128, i32 0, metadata !5} ; [ DW_TAG_member ]
+!8 = metadata !{i32 459008, metadata !9, metadata !"t", metadata !0, i32 5, metadata !2} ; [ DW_TAG_auto_variable ]
+!9 = metadata !{i32 458763, metadata !10}        ; [ DW_TAG_lexical_block ]
+!10 = metadata !{i32 458798, i32 0, metadata !0, metadata !"foo", metadata !"foo", metadata !"foo", metadata !0, i32 4, metadata !11, i1 false, i1 true, i32 0, i32 0, null} ; [ DW_TAG_subprogram ]
+!11 = metadata !{i32 458773, metadata !0, metadata !"", metadata !0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !12, i32 0, null} ; [ DW_TAG_subroutine_type ]
+!12 = metadata !{metadata !13}
+!13 = metadata !{i32 458788, metadata !0, metadata !"int", metadata !0, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
+!14 = metadata !{%tart.reflect.ComplexType* @.type.SwitchStmtTest}
diff --git a/test/CodeGen/XCore/constants.ll b/test/CodeGen/XCore/constants.ll
index 95fa11e77470..cad1a2153f4f 100644
--- a/test/CodeGen/XCore/constants.ll
+++ b/test/CodeGen/XCore/constants.ll
@@ -1,10 +1,10 @@
 ; RUN: llc < %s -march=xcore -mcpu=xs1b-generic | FileCheck %s
 
 ; CHECK: .section .cp.rodata.cst4,"aMc",@progbits,4
-; CHECK: .LCPI1_0:
+; CHECK: .LCPI0_0:
 ; CHECK: .long 12345678
 ; CHECK: f:
-; CHECK: ldw r0, cp[.LCPI1_0]
+; CHECK: ldw r0, cp[.LCPI0_0]
 define i32 @f() {
 entry:
 	ret i32 12345678
diff --git a/test/DebugInfo/2009-10-16-Phi.ll b/test/DebugInfo/2009-10-16-Phi.ll
index fc0375186df9..0f799e3a7892 100644
--- a/test/DebugInfo/2009-10-16-Phi.ll
+++ b/test/DebugInfo/2009-10-16-Phi.ll
@@ -10,4 +10,4 @@ B2:
    ret i32 %0
 }
 
-!0 = metadata !{i32 42}
-\ No newline at end of file
+!0 = metadata !{i32 42}
diff --git a/test/DebugInfo/2010-01-18-DbgValue.ll b/test/DebugInfo/2010-01-18-DbgValue.ll
index ff97b189440e..001f853dd236 100644
--- a/test/DebugInfo/2010-01-18-DbgValue.ll
+++ b/test/DebugInfo/2010-01-18-DbgValue.ll
@@ -5,51 +5,47 @@ target triple = "i386-apple-darwin9.8"
 ; Currently, dbg.declare generates a DEBUG_VALUE comment.  Eventually it will
 ; generate DWARF and this test will need to be modified or removed.
 
-@Y = common global i32 0                          ; <i32*> [#uses=1]
 
-define i32 @test() nounwind {
+%struct.Pt = type { double, double }
+%struct.Rect = type { %struct.Pt, %struct.Pt }
+
+define double @foo(%struct.Rect* byval %my_r0) nounwind ssp {
 entry:
-; CHECK: DEBUG_VALUE:
-  %retval = alloca i32                            ; <i32*> [#uses=2]
-  %X = alloca i32                                 ; <i32*> [#uses=5]
-  %0 = alloca i32                                 ; <i32*> [#uses=2]
+;CHECK: DEBUG_VALUE
+  %retval = alloca double                         ; <double*> [#uses=2]
+  %0 = alloca double                              ; <double*> [#uses=2]
   %"alloca point" = bitcast i32 0 to i32          ; <i32> [#uses=0]
-  call void @llvm.dbg.declare(metadata !{i32* %X}, metadata !3), !dbg !7
-  store i32 4, i32* %X, align 4, !dbg !8
-  %1 = load i32* %X, align 4, !dbg !9             ; <i32> [#uses=1]
-  call void @use(i32 %1) nounwind, !dbg !9
-  %2 = load i32* @Y, align 4, !dbg !10            ; <i32> [#uses=1]
-  %3 = add nsw i32 %2, 2, !dbg !10                ; <i32> [#uses=1]
-  store i32 %3, i32* %X, align 4, !dbg !10
-  %4 = load i32* %X, align 4, !dbg !11            ; <i32> [#uses=1]
-  call void @use(i32 %4) nounwind, !dbg !11
-  %5 = load i32* %X, align 4, !dbg !12            ; <i32> [#uses=1]
-  store i32 %5, i32* %0, align 4, !dbg !12
-  %6 = load i32* %0, align 4, !dbg !12            ; <i32> [#uses=1]
-  store i32 %6, i32* %retval, align 4, !dbg !12
-  br label %return, !dbg !12
+  call void @llvm.dbg.declare(metadata !{%struct.Rect* %my_r0}, metadata !0), !dbg !15
+  %1 = getelementptr inbounds %struct.Rect* %my_r0, i32 0, i32 0, !dbg !16 ; <%struct.Pt*> [#uses=1]
+  %2 = getelementptr inbounds %struct.Pt* %1, i32 0, i32 0, !dbg !16 ; <double*> [#uses=1]
+  %3 = load double* %2, align 8, !dbg !16         ; <double> [#uses=1]
+  store double %3, double* %0, align 8, !dbg !16
+  %4 = load double* %0, align 8, !dbg !16         ; <double> [#uses=1]
+  store double %4, double* %retval, align 8, !dbg !16
+  br label %return, !dbg !16
 
 return:                                           ; preds = %entry
-  %retval1 = load i32* %retval, !dbg !12          ; <i32> [#uses=1]
-  ret i32 %retval1, !dbg !12
+  %retval1 = load double* %retval, !dbg !16       ; <double> [#uses=1]
+  ret double %retval1, !dbg !16
 }
 
 declare void @llvm.dbg.declare(metadata, metadata) nounwind readnone
 
-declare void @use(i32)
-
-!llvm.dbg.gv = !{!0}
-
-!0 = metadata !{i32 458804, i32 0, metadata !1, metadata !"Y", metadata !"Y", metadata !"Y", metadata !1, i32 2, metadata !2, i1 false, i1 true, i32* @Y} ; [ DW_TAG_variable ]
-!1 = metadata !{i32 458769, i32 0, i32 1, metadata !"try.c", metadata !"/Volumes/MacOS9/tests/", metadata !"4.2.1 (Based on Apple Inc. build 5658) (LLVM build)", i1 true, i1 false, metadata !"", i32 0} ; [ DW_TAG_compile_unit ]
-!2 = metadata !{i32 458788, metadata !1, metadata !"int", metadata !1, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
-!3 = metadata !{i32 459008, metadata !4, metadata !"X", metadata !1, i32 4, metadata !2} ; [ DW_TAG_auto_variable ]
-!4 = metadata !{i32 458798, i32 0, metadata !1, metadata !"", metadata !"", metadata !"test", metadata !1, i32 3, metadata !5, i1 false, i1 true, i32 0, i32 0, null} ; [ DW_TAG_subprogram ]
-!5 = metadata !{i32 458773, metadata !1, metadata !"", metadata !1, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !6, i32 0} ; [ DW_TAG_subroutine_type ]
-!6 = metadata !{metadata !2}
-!7 = metadata !{i32 3, i32 0, metadata !4, null}
-!8 = metadata !{i32 4, i32 0, metadata !4, null}
-!9 = metadata !{i32 5, i32 0, metadata !4, null}
-!10 = metadata !{i32 6, i32 0, metadata !4, null}
-!11 = metadata !{i32 7, i32 0, metadata !4, null}
-!12 = metadata !{i32 8, i32 0, metadata !4, null}
+!0 = metadata !{i32 524545, metadata !1, metadata !"my_r0", metadata !2, i32 11, metadata !7} ; [ DW_TAG_arg_variable ]
+!1 = metadata !{i32 524334, i32 0, metadata !2, metadata !"foo", metadata !"foo", metadata !"foo", metadata !2, i32 11, metadata !4, i1 false, i1 true, i32 0, i32 0, null, i1 false} ; [ DW_TAG_subprogram ]
+!2 = metadata !{i32 524329, metadata !"b2.c", metadata !"/tmp/", metadata !3} ; [ DW_TAG_file_type ]
+!3 = metadata !{i32 524305, i32 0, i32 1, metadata !"b2.c", metadata !"/tmp/", metadata !"4.2.1 (Based on Apple Inc. build 5658) (LLVM build)", i1 true, i1 false, metadata !"", i32 0} ; [ DW_TAG_compile_unit ]
+!4 = metadata !{i32 524309, metadata !2, metadata !"", metadata !2, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !5, i32 0, null} ; [ DW_TAG_subroutine_type ]
+!5 = metadata !{metadata !6, metadata !7}
+!6 = metadata !{i32 524324, metadata !2, metadata !"double", metadata !2, i32 0, i64 64, i64 64, i64 0, i32 0, i32 4} ; [ DW_TAG_base_type ]
+!7 = metadata !{i32 524307, metadata !2, metadata !"Rect", metadata !2, i32 6, i64 256, i64 64, i64 0, i32 0, null, metadata !8, i32 0, null} ; [ DW_TAG_structure_type ]
+!8 = metadata !{metadata !9, metadata !14}
+!9 = metadata !{i32 524301, metadata !7, metadata !"P1", metadata !2, i32 7, i64 128, i64 64, i64 0, i32 0, metadata !10} ; [ DW_TAG_member ]
+!10 = metadata !{i32 524307, metadata !2, metadata !"Pt", metadata !2, i32 1, i64 128, i64 64, i64 0, i32 0, null, metadata !11, i32 0, null} ; [ DW_TAG_structure_type ]
+!11 = metadata !{metadata !12, metadata !13}
+!12 = metadata !{i32 524301, metadata !10, metadata !"x", metadata !2, i32 2, i64 64, i64 64, i64 0, i32 0, metadata !6} ; [ DW_TAG_member ]
+!13 = metadata !{i32 524301, metadata !10, metadata !"y", metadata !2, i32 3, i64 64, i64 64, i64 64, i32 0, metadata !6} ; [ DW_TAG_member ]
+!14 = metadata !{i32 524301, metadata !7, metadata !"P2", metadata !2, i32 8, i64 128, i64 64, i64 128, i32 0, metadata !10} ; [ DW_TAG_member ]
+!15 = metadata !{i32 11, i32 0, metadata !1, null}
+!16 = metadata !{i32 12, i32 0, metadata !17, null}
+!17 = metadata !{i32 524299, metadata !1, i32 11, i32 0} ; [ DW_TAG_lexical_block ]
diff --git a/test/DebugInfo/2010-04-06-NestedFnDbgInfo.ll b/test/DebugInfo/2010-04-06-NestedFnDbgInfo.ll
new file mode 100644
index 000000000000..dd6c5a965eb6
--- /dev/null
+++ b/test/DebugInfo/2010-04-06-NestedFnDbgInfo.ll
@@ -0,0 +1,89 @@
+; RUN: llvm-as < %s | llc -asm-verbose -O0 | grep AT_specification | count 2
+; Radar 7833483
+; Do not emit AT_specification for nested function foo.
+
+%class.A = type { i8 }
+%class.B = type { i8 }
+
+define i32 @main() ssp {
+entry:
+  %retval = alloca i32, align 4                   ; <i32*> [#uses=3]
+  %b = alloca %class.A, align 1                   ; <%class.A*> [#uses=1]
+  store i32 0, i32* %retval
+  call void @llvm.dbg.declare(metadata !{%class.A* %b}, metadata !0), !dbg !14
+  %call = call i32 @_ZN1B2fnEv(%class.A* %b), !dbg !15 ; <i32> [#uses=1]
+  store i32 %call, i32* %retval, !dbg !15
+  %0 = load i32* %retval, !dbg !16                ; <i32> [#uses=1]
+  ret i32 %0, !dbg !16
+}
+
+declare void @llvm.dbg.declare(metadata, metadata) nounwind readnone
+
+define linkonce_odr i32 @_ZN1B2fnEv(%class.A* %this) ssp align 2 {
+entry:
+  %retval = alloca i32, align 4                   ; <i32*> [#uses=2]
+  %this.addr = alloca %class.A*, align 8          ; <%class.A**> [#uses=2]
+  %a = alloca %class.A, align 1                   ; <%class.A*> [#uses=1]
+  %i = alloca i32, align 4                        ; <i32*> [#uses=2]
+  store %class.A* %this, %class.A** %this.addr
+  call void @llvm.dbg.declare(metadata !{%class.A** %this.addr}, metadata !17), !dbg !18
+  %this1 = load %class.A** %this.addr             ; <%class.A*> [#uses=0]
+  call void @llvm.dbg.declare(metadata !{%class.A* %a}, metadata !19), !dbg !27
+  call void @llvm.dbg.declare(metadata !{i32* %i}, metadata !28), !dbg !29
+  %call = call i32 @_ZZN1B2fnEvEN1A3fooEv(%class.A* %a), !dbg !30 ; <i32> [#uses=1]
+  store i32 %call, i32* %i, !dbg !30
+  %tmp = load i32* %i, !dbg !31                   ; <i32> [#uses=1]
+  store i32 %tmp, i32* %retval, !dbg !31
+  %0 = load i32* %retval, !dbg !32                ; <i32> [#uses=1]
+  ret i32 %0, !dbg !32
+}
+
+define internal i32 @_ZZN1B2fnEvEN1A3fooEv(%class.A* %this) ssp align 2 {
+entry:
+  %retval = alloca i32, align 4                   ; <i32*> [#uses=2]
+  %this.addr = alloca %class.A*, align 8          ; <%class.A**> [#uses=2]
+  store %class.A* %this, %class.A** %this.addr
+  call void @llvm.dbg.declare(metadata !{%class.A** %this.addr}, metadata !33), !dbg !34
+  %this1 = load %class.A** %this.addr             ; <%class.A*> [#uses=0]
+  store i32 42, i32* %retval, !dbg !35
+  %0 = load i32* %retval, !dbg !35                ; <i32> [#uses=1]
+  ret i32 %0, !dbg !35
+}
+
+!0 = metadata !{i32 524544, metadata !1, metadata !"b", metadata !3, i32 16, metadata !8} ; [ DW_TAG_auto_variable ]
+!1 = metadata !{i32 524299, metadata !2, i32 15, i32 12} ; [ DW_TAG_lexical_block ]
+!2 = metadata !{i32 524334, i32 0, metadata !3, metadata !"main", metadata !"main", metadata !"main", metadata !3, i32 15, metadata !5, i1 false, i1 true, i32 0, i32 0, null, i1 false} ; [ DW_TAG_subprogram ]
+!3 = metadata !{i32 524329, metadata !"one.cc", metadata !"/tmp", metadata !4} ; [ DW_TAG_file_type ]
+!4 = metadata !{i32 524305, i32 0, i32 4, metadata !"one.cc", metadata !"/tmp", metadata !"clang 1.5", i1 true, i1 false, metadata !"", i32 0} ; [ DW_TAG_compile_unit ]
+!5 = metadata !{i32 524309, metadata !3, metadata !"", metadata !3, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !6, i32 0, null} ; [ DW_TAG_subroutine_type ]
+!6 = metadata !{metadata !7}
+!7 = metadata !{i32 524324, metadata !3, metadata !"int", metadata !3, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
+!8 = metadata !{i32 524290, metadata !3, metadata !"B", metadata !3, i32 2, i64 8, i64 8, i64 0, i32 0, null, metadata !9, i32 0, null} ; [ DW_TAG_class_type ]
+!9 = metadata !{metadata !10}
+!10 = metadata !{i32 524334, i32 0, metadata !8, metadata !"fn", metadata !"fn", metadata !"_ZN1B2fnEv", metadata !3, i32 4, metadata !11, i1 false, i1 true, i32 0, i32 0, null, i1 false} ; [ DW_TAG_subprogram ]
+!11 = metadata !{i32 524309, metadata !3, metadata !"", metadata !3, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !12, i32 0, null} ; [ DW_TAG_subroutine_type ]
+!12 = metadata !{metadata !7, metadata !13}
+!13 = metadata !{i32 524303, metadata !3, metadata !"", metadata !3, i32 0, i64 64, i64 64, i64 0, i32 64, metadata !8} ; [ DW_TAG_pointer_type ]
+!14 = metadata !{i32 16, i32 5, metadata !1, null}
+!15 = metadata !{i32 17, i32 3, metadata !1, null}
+!16 = metadata !{i32 18, i32 1, metadata !2, null}
+!17 = metadata !{i32 524545, metadata !10, metadata !"this", metadata !3, i32 4, metadata !13} ; [ DW_TAG_arg_variable ]
+!18 = metadata !{i32 4, i32 7, metadata !10, null}
+!19 = metadata !{i32 524544, metadata !20, metadata !"a", metadata !3, i32 9, metadata !21} ; [ DW_TAG_auto_variable ]
+!20 = metadata !{i32 524299, metadata !10, i32 4, i32 12} ; [ DW_TAG_lexical_block ]
+!21 = metadata !{i32 524290, metadata !10, metadata !"A", metadata !3, i32 5, i64 8, i64 8, i64 0, i32 0, null, metadata !22, i32 0, null} ; [ DW_TAG_class_type ]
+!22 = metadata !{metadata !23}
+!23 = metadata !{i32 524334, i32 0, metadata !21, metadata !"foo", metadata !"foo", metadata !"_ZZN1B2fnEvEN1A3fooEv", metadata !3, i32 7, metadata !24, i1 false, i1 true, i32 0, i32 0, null, i1 false} ; [ DW_TAG_subprogram ]
+!24 = metadata !{i32 524309, metadata !3, metadata !"", metadata !3, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !25, i32 0, null} ; [ DW_TAG_subroutine_type ]
+!25 = metadata !{metadata !7, metadata !26}
+!26 = metadata !{i32 524303, metadata !3, metadata !"", metadata !3, i32 0, i64 64, i64 64, i64 0, i32 64, metadata !21} ; [ DW_TAG_pointer_type ]
+!27 = metadata !{i32 9, i32 7, metadata !20, null}
+!28 = metadata !{i32 524544, metadata !20, metadata !"i", metadata !3, i32 10, metadata !7} ; [ DW_TAG_auto_variable ]
+!29 = metadata !{i32 10, i32 9, metadata !20, null}
+!30 = metadata !{i32 10, i32 5, metadata !20, null}
+!31 = metadata !{i32 11, i32 5, metadata !20, null}
+!32 = metadata !{i32 12, i32 3, metadata !10, null}
+!33 = metadata !{i32 524545, metadata !23, metadata !"this", metadata !3, i32 7, metadata !26} ; [ DW_TAG_arg_variable ]
+!34 = metadata !{i32 7, i32 11, metadata !23, null}
+!35 = metadata !{i32 7, i32 19, metadata !36, null}
+!36 = metadata !{i32 524299, metadata !23, i32 7, i32 17} ; [ DW_TAG_lexical_block ]
diff --git a/test/DebugInfo/2010-04-13-PubType.ll b/test/DebugInfo/2010-04-13-PubType.ll
new file mode 100644
index 000000000000..371169fe1839
--- /dev/null
+++ b/test/DebugInfo/2010-04-13-PubType.ll
@@ -0,0 +1,47 @@
+; RUN: llc -O0 -asm-verbose < %s > %t
+; RUN: grep "External Name" %t | grep -v X
+; RUN: grep "External Name" %t | grep Y | count 1
+; Test to check type with no defintion is listed in pubtypes section.
+%struct.X = type opaque
+%struct.Y = type { i32 }
+
+define i32 @foo(%struct.X* %x, %struct.Y* %y) nounwind ssp {
+entry:
+  %x_addr = alloca %struct.X*                     ; <%struct.X**> [#uses=1]
+  %y_addr = alloca %struct.Y*                     ; <%struct.Y**> [#uses=1]
+  %retval = alloca i32                            ; <i32*> [#uses=2]
+  %0 = alloca i32                                 ; <i32*> [#uses=2]
+  %"alloca point" = bitcast i32 0 to i32          ; <i32> [#uses=0]
+  call void @llvm.dbg.declare(metadata !{%struct.X** %x_addr}, metadata !0), !dbg !13
+  store %struct.X* %x, %struct.X** %x_addr
+  call void @llvm.dbg.declare(metadata !{%struct.Y** %y_addr}, metadata !14), !dbg !13
+  store %struct.Y* %y, %struct.Y** %y_addr
+  store i32 0, i32* %0, align 4, !dbg !13
+  %1 = load i32* %0, align 4, !dbg !13            ; <i32> [#uses=1]
+  store i32 %1, i32* %retval, align 4, !dbg !13
+  br label %return, !dbg !13
+
+return:                                           ; preds = %entry
+  %retval1 = load i32* %retval, !dbg !13          ; <i32> [#uses=1]
+  ret i32 %retval1, !dbg !15
+}
+
+declare void @llvm.dbg.declare(metadata, metadata) nounwind readnone
+
+!0 = metadata !{i32 524545, metadata !1, metadata !"x", metadata !2, i32 7, metadata !7} ; [ DW_TAG_arg_variable ]
+!1 = metadata !{i32 524334, i32 0, metadata !2, metadata !"foo", metadata !"foo", metadata !"foo", metadata !2, i32 7, metadata !4, i1 false, i1 true, i32 0, i32 0, null, i1 false} ; [ DW_TAG_subprogram ]
+!2 = metadata !{i32 524329, metadata !"a.c", metadata !"/tmp/", metadata !3} ; [ DW_TAG_file_type ]
+!3 = metadata !{i32 524305, i32 0, i32 1, metadata !"a.c", metadata !"/tmp/", metadata !"4.2.1 (Based on Apple Inc. build 5658) (LLVM build)", i1 true, i1 false, metadata !"", i32 0} ; [ DW_TAG_compile_unit ]
+!4 = metadata !{i32 524309, metadata !2, metadata !"", metadata !2, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !5, i32 0, null} ; [ DW_TAG_subroutine_type ]
+!5 = metadata !{metadata !6, metadata !7, metadata !9}
+!6 = metadata !{i32 524324, metadata !2, metadata !"int", metadata !2, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
+!7 = metadata !{i32 524303, metadata !2, metadata !"", metadata !2, i32 0, i64 64, i64 64, i64 0, i32 0, metadata !8} ; [ DW_TAG_pointer_type ]
+!8 = metadata !{i32 524307, metadata !2, metadata !"X", metadata !2, i32 3, i64 0, i64 0, i64 0, i32 4, null, null, i32 0, null} ; [ DW_TAG_structure_type ]
+!9 = metadata !{i32 524303, metadata !2, metadata !"", metadata !2, i32 0, i64 64, i64 64, i64 0, i32 0, metadata !10} ; [ DW_TAG_pointer_type ]
+!10 = metadata !{i32 524307, metadata !2, metadata !"Y", metadata !2, i32 4, i64 32, i64 32, i64 0, i32 0, null, metadata !11, i32 0, null} ; [ DW_TAG_structure_type ]
+!11 = metadata !{metadata !12}
+!12 = metadata !{i32 524301, metadata !10, metadata !"x", metadata !2, i32 5, i64 32, i64 32, i64 0, i32 0, metadata !6} ; [ DW_TAG_member ]
+!13 = metadata !{i32 7, i32 0, metadata !1, null}
+!14 = metadata !{i32 524545, metadata !1, metadata !"y", metadata !2, i32 7, metadata !9} ; [ DW_TAG_arg_variable ]
+!15 = metadata !{i32 7, i32 0, metadata !16, null}
+!16 = metadata !{i32 524299, metadata !1, i32 7, i32 0} ; [ DW_TAG_lexical_block ]
diff --git a/test/DebugInfo/2010-04-19-FramePtr.ll b/test/DebugInfo/2010-04-19-FramePtr.ll
new file mode 100644
index 000000000000..30031219d4ea
--- /dev/null
+++ b/test/DebugInfo/2010-04-19-FramePtr.ll
@@ -0,0 +1,30 @@
+; RUN: llc -asm-verbose -O0 -o %t < %s 
+; RUN: grep DW_AT_APPLE_omit_frame_ptr %t
+; RUN: llc -disable-fp-elim -asm-verbose -O0 -o %t < %s 
+; RUN: grep -v DW_AT_APPLE_omit_frame_ptr %t
+
+
+define i32 @foo() nounwind ssp {
+entry:
+  %retval = alloca i32                            ; <i32*> [#uses=2]
+  %0 = alloca i32                                 ; <i32*> [#uses=2]
+  %"alloca point" = bitcast i32 0 to i32          ; <i32> [#uses=0]
+  store i32 42, i32* %0, align 4, !dbg !0
+  %1 = load i32* %0, align 4, !dbg !0             ; <i32> [#uses=1]
+  store i32 %1, i32* %retval, align 4, !dbg !0
+  br label %return, !dbg !0
+
+return:                                           ; preds = %entry
+  %retval1 = load i32* %retval, !dbg !0           ; <i32> [#uses=1]
+  ret i32 %retval1, !dbg !7
+}
+
+!0 = metadata !{i32 2, i32 0, metadata !1, null}
+!1 = metadata !{i32 524334, i32 0, metadata !2, metadata !"foo", metadata !"foo", metadata !"foo", metadata !2, i32 2, metadata !4, i1 false, i1 true, i32 0, i32 0, null, i1 false} ; [ DW_TAG_subprogram ]
+!2 = metadata !{i32 524329, metadata !"a.c", metadata !"/tmp", metadata !3} ; [ DW_TAG_file_type ]
+!3 = metadata !{i32 524305, i32 0, i32 1, metadata !"a.c", metadata !"/tmp", metadata !"4.2.1 (Based on Apple Inc. build 5658) (LLVM build)", i1 true, i1 false, metadata !"", i32 0} ; [ DW_TAG_compile_unit ]
+!4 = metadata !{i32 524309, metadata !2, metadata !"", metadata !2, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !5, i32 0, null} ; [ DW_TAG_subroutine_type ]
+!5 = metadata !{metadata !6}
+!6 = metadata !{i32 524324, metadata !2, metadata !"int", metadata !2, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
+!7 = metadata !{i32 2, i32 0, metadata !8, null}
+!8 = metadata !{i32 524299, metadata !1, i32 2, i32 0} ; [ DW_TAG_lexical_block ]
diff --git a/test/DebugInfo/2010-03-22-CU-HighLow.ll b/test/DebugInfo/2010-04-25-CU-entry_pc.ll
index 42c25e4711cc..de099b6b9c50 100644
--- a/test/DebugInfo/2010-03-22-CU-HighLow.ll
+++ b/test/DebugInfo/2010-04-25-CU-entry_pc.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s | grep low_pc | count 2
+; RUN: llc < %s | grep entry_pc | count 2
 @i = global i32 1                                 ; <i32*> [#uses=0]
 
 !llvm.dbg.gv = !{!0}
diff --git a/test/DebugInfo/2010-05-03-DisableFramePtr.ll b/test/DebugInfo/2010-05-03-DisableFramePtr.ll
new file mode 100644
index 000000000000..4061bdc834d7
--- /dev/null
+++ b/test/DebugInfo/2010-05-03-DisableFramePtr.ll
@@ -0,0 +1,34 @@
+; RUN: llc  -o /dev/null -disable-non-leaf-fp-elim < %s
+; Radar 7937664
+%struct.AppleEvent = type opaque
+
+define void @DisposeDMNotificationUPP(void (%struct.AppleEvent*)* %userUPP) nounwind ssp {
+entry:
+  %userUPP_addr = alloca void (%struct.AppleEvent*)* ; <void (%struct.AppleEvent*)**> [#uses=1]
+  %"alloca point" = bitcast i32 0 to i32          ; <i32> [#uses=0]
+  call void @llvm.dbg.declare(metadata !{void (%struct.AppleEvent*)** %userUPP_addr}, metadata !0), !dbg !13
+  store void (%struct.AppleEvent*)* %userUPP, void (%struct.AppleEvent*)** %userUPP_addr
+  br label %return, !dbg !14
+
+return:                                           ; preds = %entry
+  ret void, !dbg !14
+}
+
+declare void @llvm.dbg.declare(metadata, metadata) nounwind readnone
+
+!0 = metadata !{i32 524545, metadata !1, metadata !"userUPP", metadata !2, i32 7, metadata !6} ; [ DW_TAG_arg_variable ]
+!1 = metadata !{i32 524334, i32 0, metadata !2, metadata !"DisposeDMNotificationUPP", metadata !"DisposeDMNotificationUPP", metadata !"DisposeDMNotificationUPP", metadata !2, i32 7, metadata !4, i1 false, i1 true, i32 0, i32 0, null, i1 false} ; [ DW_TAG_subprogram ]
+!2 = metadata !{i32 524329, metadata !"t.c", metadata !"/Users/echeng/LLVM/radars/r7937664/", metadata !3} ; [ DW_TAG_file_type ]
+!3 = metadata !{i32 524305, i32 0, i32 1, metadata !"t.c", metadata !"/Users/echeng/LLVM/radars/r7937664/", metadata !"4.2.1 (Based on Apple Inc. build 5658) (LLVM build 9999)", i1 true, i1 false, metadata !"", i32 0} ; [ DW_TAG_compile_unit ]
+!4 = metadata !{i32 524309, metadata !2, metadata !"", metadata !2, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !5, i32 0, null} ; [ DW_TAG_subroutine_type ]
+!5 = metadata !{null, metadata !6}
+!6 = metadata !{i32 524310, metadata !2, metadata !"DMNotificationUPP", metadata !2, i32 6, i64 0, i64 0, i64 0, i32 0, metadata !7} ; [ DW_TAG_typedef ]
+!7 = metadata !{i32 524303, metadata !2, metadata !"", metadata !2, i32 0, i64 64, i64 64, i64 0, i32 0, metadata !8} ; [ DW_TAG_pointer_type ]
+!8 = metadata !{i32 524309, metadata !2, metadata !"", metadata !2, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !9, i32 0, null} ; [ DW_TAG_subroutine_type ]
+!9 = metadata !{null, metadata !10}
+!10 = metadata !{i32 524303, metadata !2, metadata !"", metadata !2, i32 0, i64 64, i64 64, i64 0, i32 0, metadata !11} ; [ DW_TAG_pointer_type ]
+!11 = metadata !{i32 524310, metadata !2, metadata !"AppleEvent", metadata !2, i32 4, i64 0, i64 0, i64 0, i32 0, metadata !12} ; [ DW_TAG_typedef ]
+!12 = metadata !{i32 524307, metadata !2, metadata !"AEDesc", metadata !2, i32 1, i64 0, i64 0, i64 0, i32 4, null, null, i32 0, null} ; [ DW_TAG_structure_type ]
+!13 = metadata !{i32 7, i32 0, metadata !1, null}
+!14 = metadata !{i32 8, i32 0, metadata !15, null}
+!15 = metadata !{i32 524299, metadata !1, i32 7, i32 0} ; [ DW_TAG_lexical_block ]
diff --git a/test/DebugInfo/2010-05-03-OriginDIE.ll b/test/DebugInfo/2010-05-03-OriginDIE.ll
new file mode 100644
index 000000000000..0e1d1fddc412
--- /dev/null
+++ b/test/DebugInfo/2010-05-03-OriginDIE.ll
@@ -0,0 +1,86 @@
+
+;RUN: llc < %s -o /dev/null
+;Radar 7937109
+
+%struct.anon = type { i64, i32, i32, i32, [1 x i32] }
+%struct.gpm_t = type { i32, i8*, [16 x i8], i32, i64, i64, i64, i64, i64, i64, i32, i16, i16, [8 x %struct.gpmr_t] }
+%struct.gpmr_t = type { [48 x i8], [48 x i8], [16 x i8], i64, i64, i64, i64, i16 }
+%struct.gpt_t = type { [8 x i8], i32, i32, i32, i32, i64, i64, i64, i64, [16 x i8], %struct.anon }
+
+@llvm.used = appending global [1 x i8*] [i8* bitcast (void (%struct.gpm_t*, %struct.gpt_t*)* @gpt2gpm to i8*)], section "llvm.metadata" ; <[1 x i8*]*> [#uses=0]
+
+define fastcc void @gpt2gpm(%struct.gpm_t* %gpm, %struct.gpt_t* %gpt) nounwind optsize ssp {
+entry:
+  %data_addr.i18 = alloca i64, align 8            ; <i64*> [#uses=1]
+  %data_addr.i17 = alloca i64, align 8            ; <i64*> [#uses=2]
+  %data_addr.i16 = alloca i64, align 8            ; <i64*> [#uses=0]
+  %data_addr.i15 = alloca i32, align 4            ; <i32*> [#uses=0]
+  %data_addr.i = alloca i64, align 8              ; <i64*> [#uses=0]
+  %0 = getelementptr inbounds %struct.gpm_t* %gpm, i32 0, i32 2, i32 0 ; <i8*> [#uses=1]
+  %1 = getelementptr inbounds %struct.gpt_t* %gpt, i32 0, i32 9, i32 0 ; <i8*> [#uses=1]
+  call void @uuid_LtoB(i8* %0, i8* %1) nounwind, !dbg !0
+  %a9 = volatile load i64* %data_addr.i18, align 8 ; <i64> [#uses=1]
+  %a10 = call i64 @llvm.bswap.i64(i64 %a9) nounwind ; <i64> [#uses=1]
+  %a11 = getelementptr inbounds %struct.gpt_t* %gpt, i32 0, i32 8, !dbg !7 ; <i64*> [#uses=1]
+  %a12 = load i64* %a11, align 4, !dbg !7         ; <i64> [#uses=1]
+  call void @llvm.dbg.declare(metadata !{i64* %data_addr.i17}, metadata !8) nounwind, !dbg !14
+  store i64 %a12, i64* %data_addr.i17, align 8
+  call void @llvm.dbg.value(metadata !6, i64 0, metadata !15) nounwind
+  call void @llvm.dbg.value(metadata !18, i64 0, metadata !19) nounwind
+  call void @llvm.dbg.declare(metadata !6, metadata !23) nounwind
+  call void @llvm.dbg.value(metadata !{i64* %data_addr.i17}, i64 0, metadata !34) nounwind
+  %a13 = volatile load i64* %data_addr.i17, align 8 ; <i64> [#uses=1]
+  %a14 = call i64 @llvm.bswap.i64(i64 %a13) nounwind ; <i64> [#uses=2]
+  %a15 = add i64 %a10, %a14, !dbg !7              ; <i64> [#uses=1]
+  %a16 = sub i64 %a15, %a14                       ; <i64> [#uses=1]
+  %a17 = getelementptr inbounds %struct.gpm_t* %gpm, i32 0, i32 5, !dbg !7 ; <i64*> [#uses=1]
+  store i64 %a16, i64* %a17, align 4, !dbg !7
+  ret void, !dbg !7
+}
+
+declare void @llvm.dbg.declare(metadata, metadata) nounwind readnone
+
+declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
+
+declare i32 @llvm.bswap.i32(i32) nounwind readnone
+
+declare i64 @llvm.bswap.i64(i64) nounwind readnone
+
+declare void @uuid_LtoB(i8*, i8*)
+
+!0 = metadata !{i32 808, i32 0, metadata !1, null}
+!1 = metadata !{i32 524299, metadata !2, i32 807, i32 0} ; [ DW_TAG_lexical_block ]
+!2 = metadata !{i32 524334, i32 0, metadata !3, metadata !"gpt2gpm", metadata !"gpt2gpm", metadata !"gpt2gpm", metadata !3, i32 807, metadata !5, i1 true, i1 true, i32 0, i32 0, null, i1 false} ; [ DW_TAG_subprogram ]
+!3 = metadata !{i32 524329, metadata !"G.c", metadata !"/tmp", metadata !4} ; [ DW_TAG_file_type ]
+!4 = metadata !{i32 524305, i32 0, i32 1, metadata !"G.c", metadata !"/tmp", metadata !"llvm-gcc", i1 true, i1 true, metadata !"", i32 0} ; [ DW_TAG_compile_unit ]
+!5 = metadata !{i32 524309, metadata !3, metadata !"", metadata !3, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !6, i32 0, null} ; [ DW_TAG_subroutine_type ]
+!6 = metadata !{null}
+!7 = metadata !{i32 810, i32 0, metadata !1, null}
+!8 = metadata !{i32 524545, metadata !9, metadata !"data", metadata !10, i32 201, metadata !11} ; [ DW_TAG_arg_variable ]
+!9 = metadata !{i32 524334, i32 0, metadata !3, metadata !"_OSSwapInt64", metadata !"_OSSwapInt64", metadata !"_OSSwapInt64", metadata !10, i32 202, metadata !5, i1 true, i1 true, i32 0, i32 0, null, i1 false} ; [ DW_TAG_subprogram ]
+!10 = metadata !{i32 524329, metadata !"OSByteOrder.h", metadata !"/usr/include/libkern/ppc", metadata !4} ; [ DW_TAG_file_type ]
+!11 = metadata !{i32 524310, metadata !3, metadata !"uint64_t", metadata !12, i32 59, i64 0, i64 0, i64 0, i32 0, metadata !13} ; [ DW_TAG_typedef ]
+!12 = metadata !{i32 524329, metadata !"stdint.h", metadata !"/usr/4.2.1/include", metadata !4} ; [ DW_TAG_file_type ]
+!13 = metadata !{i32 524324, metadata !3, metadata !"long long unsigned int", metadata !3, i32 0, i64 64, i64 64, i64 0, i32 0, i32 7} ; [ DW_TAG_base_type ]
+!14 = metadata !{i32 202, i32 0, metadata !9, metadata !7}
+!15 = metadata !{i32 524545, metadata !16, metadata !"base", metadata !10, i32 92, metadata !17} ; [ DW_TAG_arg_variable ]
+!16 = metadata !{i32 524334, i32 0, metadata !3, metadata !"OSReadSwapInt64", metadata !"OSReadSwapInt64", metadata !"OSReadSwapInt64", metadata !10, i32 95, metadata !5, i1 true, i1 true, i32 0, i32 0, null, i1 false} ; [ DW_TAG_subprogram ]
+!17 = metadata !{i32 524303, metadata !3, metadata !"", metadata !3, i32 0, i64 32, i64 32, i64 0, i32 0, null} ; [ DW_TAG_pointer_type ]
+!18 = metadata !{i32 0}
+!19 = metadata !{i32 524545, metadata !16, metadata !"byteOffset", metadata !10, i32 94, metadata !20} ; [ DW_TAG_arg_variable ]
+!20 = metadata !{i32 524310, metadata !3, metadata !"uintptr_t", metadata !21, i32 114, i64 0, i64 0, i64 0, i32 0, metadata !22} ; [ DW_TAG_typedef ]
+!21 = metadata !{i32 524329, metadata !"types.h", metadata !"/usr/include/ppc", metadata !4} ; [ DW_TAG_file_type ]
+!22 = metadata !{i32 524324, metadata !3, metadata !"long unsigned int", metadata !3, i32 0, i64 32, i64 32, i64 0, i32 0, i32 7} ; [ DW_TAG_base_type ]
+!23 = metadata !{i32 524544, metadata !24, metadata !"u", metadata !10, i32 100, metadata !25} ; [ DW_TAG_auto_variable ]
+!24 = metadata !{i32 524299, metadata !16, i32 95, i32 0} ; [ DW_TAG_lexical_block ]
+!25 = metadata !{i32 524311, metadata !16, metadata !"", metadata !10, i32 97, i64 64, i64 64, i64 0, i32 0, null, metadata !26, i32 0, null} ; [ DW_TAG_union_type ]
+!26 = metadata !{metadata !27, metadata !28}
+!27 = metadata !{i32 524301, metadata !25, metadata !"u64", metadata !10, i32 98, i64 64, i64 64, i64 0, i32 0, metadata !11} ; [ DW_TAG_member ]
+!28 = metadata !{i32 524301, metadata !25, metadata !"u32", metadata !10, i32 99, i64 64, i64 32, i64 0, i32 0, metadata !29} ; [ DW_TAG_member ]
+!29 = metadata !{i32 524289, metadata !3, metadata !"", metadata !3, i32 0, i64 64, i64 32, i64 0, i32 0, metadata !30, metadata !32, i32 0, null} ; [ DW_TAG_array_type ]
+!30 = metadata !{i32 524310, metadata !3, metadata !"uint32_t", metadata !12, i32 55, i64 0, i64 0, i64 0, i32 0, metadata !31} ; [ DW_TAG_typedef ]
+!31 = metadata !{i32 524324, metadata !3, metadata !"unsigned int", metadata !3, i32 0, i64 32, i64 32, i64 0, i32 0, i32 7} ; [ DW_TAG_base_type ]
+!32 = metadata !{metadata !33}
+!33 = metadata !{i32 524321, i64 0, i64 1}        ; [ DW_TAG_subrange_type ]
+!34 = metadata !{i32 524544, metadata !24, metadata !"addr", metadata !10, i32 96, metadata !35} ; [ DW_TAG_auto_variable ]
+!35 = metadata !{i32 524303, metadata !3, metadata !"", metadata !3, i32 0, i64 32, i64 32, i64 0, i32 0, metadata !11} ; [ DW_TAG_pointer_type ]
diff --git a/test/FrontendC++/2009-07-16-Using.cpp b/test/FrontendC++/2009-07-16-Using.cpp
index 1acadf642122..c0e031424acb 100644
--- a/test/FrontendC++/2009-07-16-Using.cpp
+++ b/test/FrontendC++/2009-07-16-Using.cpp
@@ -1,4 +1,4 @@
-// RUN: %llvmgxx %s -S
+// RUN: %llvmgxx %s -S -o /dev/null
 
 namespace A {
   typedef int B;
diff --git a/test/FrontendC++/2009-10-27-crash.cpp b/test/FrontendC++/2009-10-27-crash.cpp
index 5641aa420507..21d0064c687b 100644
--- a/test/FrontendC++/2009-10-27-crash.cpp
+++ b/test/FrontendC++/2009-10-27-crash.cpp
@@ -1,4 +1,4 @@
-// RUN: %llvmgxx -emit-llvm -S %s
+// RUN: %llvmgxx -emit-llvm -S %s -o /dev/null
 // Radar 7328944
 
 typedef struct
diff --git a/test/FrontendC++/2010-04-30-OptimizedMethod-Dbg.cpp b/test/FrontendC++/2010-04-30-OptimizedMethod-Dbg.cpp
new file mode 100644
index 000000000000..dc9b16c28227
--- /dev/null
+++ b/test/FrontendC++/2010-04-30-OptimizedMethod-Dbg.cpp
@@ -0,0 +1,17 @@
+// RUN: %llvmgcc -g -S -O2 %s -o %t
+// RUN: grep "i1 false, i1 true. . . DW_TAG_subprogram" %t | count 2
+
+class foo {
+public:
+      int bar(int x);
+      static int baz(int x);
+};
+
+int foo::bar(int x) {
+    return x*4 + 1;
+}
+
+int foo::baz(int x) {
+    return x*4 + 1;
+}
+
diff --git a/test/FrontendC/2007-05-16-EmptyStruct.c b/test/FrontendC/2007-05-16-EmptyStruct.c
index 748aa98351d5..23c0b1d6a3f6 100644
--- a/test/FrontendC/2007-05-16-EmptyStruct.c
+++ b/test/FrontendC/2007-05-16-EmptyStruct.c
@@ -1,5 +1,5 @@
 // PR 1417
 
-// RUN: %llvmgcc -xc  %s -c -o - | llvm-dis | grep "struct.anon = type \{ \}"
+// RUN: %llvmgcc -xc  %s -c -o - | llvm-dis | grep "struct.anon = type \{\}"
 
 struct { } *X;
diff --git a/test/FrontendC/2008-11-02-WeakAlias.c b/test/FrontendC/2008-11-02-WeakAlias.c
index 4bdc5c7bec93..befafe455149 100644
--- a/test/FrontendC/2008-11-02-WeakAlias.c
+++ b/test/FrontendC/2008-11-02-WeakAlias.c
@@ -2,4 +2,4 @@
 // PR2691
 
 void init_IRQ(void) __attribute__((weak, alias("native_init_IRQ")));
-void native_init_IRQ(void) {}
-\ No newline at end of file
+void native_init_IRQ(void) {}
diff --git a/test/FrontendC/2009-01-20-k8.c b/test/FrontendC/2009-01-20-k8.c
index d28302b4ce80..2cd15387390c 100644
--- a/test/FrontendC/2009-01-20-k8.c
+++ b/test/FrontendC/2009-01-20-k8.c
@@ -1,4 +1,4 @@
-// RUN: %llvmgcc %s -S -march=k8
+// RUN: %llvmgcc %s -S -march=k8 -o /dev/null
 // XFAIL: *
 // XTARGET: x86,i386,i686
 long double x;
diff --git a/test/FrontendC/2009-01-21-InvalidIterator.c b/test/FrontendC/2009-01-21-InvalidIterator.c
index 310ea3bfa17b..6ac61f8a748b 100644
--- a/test/FrontendC/2009-01-21-InvalidIterator.c
+++ b/test/FrontendC/2009-01-21-InvalidIterator.c
@@ -1,4 +1,4 @@
-// RUN: %llvmgcc %s -S -g
+// RUN: %llvmgcc %s -S -g -o /dev/null
 
 typedef long unsigned int size_t;
 typedef unsigned short int uint16_t;
diff --git a/test/FrontendC/2009-04-22-UnknownSize.c b/test/FrontendC/2009-04-22-UnknownSize.c
index 2b90c91fe22c..7db9c0730c80 100644
--- a/test/FrontendC/2009-04-22-UnknownSize.c
+++ b/test/FrontendC/2009-04-22-UnknownSize.c
@@ -1,4 +1,4 @@
-// RUN: not %llvmgcc -O1 %s -S |& grep {error: storage size}
+// RUN: not %llvmgcc -O1 %s -S -o /dev/null |& grep {error: storage size}
 // PR2958
 static struct foo s;
 struct foo *p = &s;
diff --git a/test/FrontendC/2009-06-14-HighlyAligned.c b/test/FrontendC/2009-06-14-HighlyAligned.c
index 4678b75b6daf..227db74f47a0 100644
--- a/test/FrontendC/2009-06-14-HighlyAligned.c
+++ b/test/FrontendC/2009-06-14-HighlyAligned.c
@@ -1,4 +1,4 @@
-// RUN: %llvmgcc %s -S
+// RUN: %llvmgcc %s -S -o /dev/null
 // PR4332
 
 static int highly_aligned __attribute__((aligned(4096)));
diff --git a/test/FrontendC/2009-12-07-BitFieldAlignment.c b/test/FrontendC/2009-12-07-BitFieldAlignment.c
index a8312a5fd8a9..02ff8bce1821 100644
--- a/test/FrontendC/2009-12-07-BitFieldAlignment.c
+++ b/test/FrontendC/2009-12-07-BitFieldAlignment.c
@@ -9,7 +9,7 @@ struct S {
 };
 
 void f0(struct S *a) {
-// CHECK: %3 = load i32* %2, align 4
-// CHECK: store i32 %4, i32* %2, align 4
+// CHECK: load {{.*}}, align 4
+// CHECK: store {{.*}}, align 4
   a->e = 0;
 }
diff --git a/test/FrontendC/crash-invalid-array.c b/test/FrontendC/crash-invalid-array.c
new file mode 100644
index 000000000000..d602f7854585
--- /dev/null
+++ b/test/FrontendC/crash-invalid-array.c
@@ -0,0 +1,17 @@
+// RUN: not %llvmgcc -O1 %s -S |& grep {error: invalid use of array with unspecified bounds}
+// PR6913
+
+#include <stdio.h>
+
+int main()
+{
+   int x[10][10];
+   int (*p)[] = x;   // <-- this line is what triggered it
+
+   int i;
+
+   for(i = 0; i < 10; ++i)
+   {
+       p[i][i] = i;
+   }
+}
diff --git a/test/FrontendC/cstring-align.c b/test/FrontendC/cstring-align.c
index 715d0f312690..b9ec281f5677 100644
--- a/test/FrontendC/cstring-align.c
+++ b/test/FrontendC/cstring-align.c
@@ -1,6 +1,5 @@
 // RUN: %llvmgcc %s -c -Os -m32 -emit-llvm -o - | llc -march=x86 -mtriple=i386-apple-darwin10 | FileCheck %s -check-prefix=DARWIN32
 // RUN: %llvmgcc %s -c -Os -m64 -emit-llvm -o - | llc -march=x86-64 -mtriple=x86_64-apple-darwin10 | FileCheck %s -check-prefix=DARWIN64
-// XFAIL: *
 // XTARGET: darwin
 
 extern void func(const char *, const char *);
@@ -9,10 +8,10 @@ void long_function_name() {
   func("%s: the function name", __func__);
 }
 
-// DARWIN64: .align 3
+// DARWIN64: .align 4
 // DARWIN64: ___func__.
 // DARWIN64: .asciz "long_function_name"
 
-// DARWIN32: .align 2
+// DARWIN32: .align 4
 // DARWIN32: ___func__.
 // DARWIN32: .asciz "long_function_name"
diff --git a/test/FrontendC/inline-asm-function.c b/test/FrontendC/inline-asm-function.c
new file mode 100644
index 000000000000..e5848409865b
--- /dev/null
+++ b/test/FrontendC/inline-asm-function.c
@@ -0,0 +1,6 @@
+// RUN: %llvmgcc -S %s -fasm-blocks -o - -O | grep naked
+// 7533078 (partial).
+
+asm int f() {
+  xyz
+}
diff --git a/test/MC/AsmParser/X86/x86_32-encoding.s b/test/MC/AsmParser/X86/x86_32-encoding.s
index 22627585220f..f973cb23b57a 100644
--- a/test/MC/AsmParser/X86/x86_32-encoding.s
+++ b/test/MC/AsmParser/X86/x86_32-encoding.s
@@ -9961,3 +9961,89 @@
 // CHECK: aeskeygenassist	$125, (%edx,%eax,4), %xmm2
 // CHECK:  encoding: [0x66,0x0f,0x3a,0xdf,0x14,0x82,0x7d]
                 aeskeygenassist $125, (%edx,%eax,4), %xmm2
+
+// rdar://7840289
+// CHECK: pshufb	CPI1_0(%rip), %xmm1
+// CHECK:  encoding: [0x66,0x0f,0x38,0x00,0x0d,A,A,A,A]
+// CHECK:  fixup A - offset: 5, value: CPI1_0-4
+pshufb	CPI1_0(%rip), %xmm1   
+
+// rdar://7910087
+// CHECK: bsfw	%bx, %bx
+// CHECK:  encoding: [0x66,0x0f,0xbc,0xdb]
+          bsfw  %bx, %bx
+
+// CHECK: bsfw	3735928559(%ebx,%ecx,8), %bx
+// CHECK:  encoding: [0x66,0x0f,0xbc,0x9c,0xcb,0xef,0xbe,0xad,0xde]
+          bsfw  3735928559(%ebx,%ecx,8), %bx
+
+// CHECK: bsrw	%bx, %bx
+// CHECK:  encoding: [0x66,0x0f,0xbd,0xdb]
+          bsrw  %bx, %bx
+
+// CHECK: bsrw	305419896, %bx
+// CHECK:  encoding: [0x66,0x0f,0xbd,0x1d,0x78,0x56,0x34,0x12]
+          bsrw  305419896, %bx
+
+// radr://7901779
+// CHECK: pushl   $127
+// CHECK:  encoding: [0x6a,0xfe]
+          pushl   $127
+
+// CHECK: pushw   $254
+// CHECK:  encoding: [0x66,0x68,0xfe,0x00]
+          pushw   $254
+
+// CHECK: pushl   $254
+// CHECK:  encoding: [0x68,0xfe,0x00,0x00,0x00]
+          pushl   $254
+
+// radr://7928400
+// CHECK: movq    %mm3, 3735928559(%ebx,%ecx,8)
+// CHECK:  encoding: [0x0f,0x7f,0x9c,0xcb,0xef,0xbe,0xad,0xde]
+          movq    %mm3, 3735928559(%ebx,%ecx,8)
+
+// CHECK: movd    %mm3, 3735928559(%ebx,%ecx,8)
+// CHECK:  encoding: [0x0f,0x7e,0x9c,0xcb,0xef,0xbe,0xad,0xde]
+          movd    %mm3, 3735928559(%ebx,%ecx,8)
+
+// CHECK: movq    3735928559(%ebx,%ecx,8), %xmm5
+// CHECK:  encoding: [0xf3,0x0f,0x7e,0xac,0xcb,0xef,0xbe,0xad,0xde]
+          movq    3735928559(%ebx,%ecx,8), %xmm5
+
+// CHECK: movd    3735928559(%ebx,%ecx,8), %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x6e,0xac,0xcb,0xef,0xbe,0xad,0xde]
+          movd    3735928559(%ebx,%ecx,8), %xmm5
+
+// radr://7914715
+// CHECK: fcoml   3735928559(%ebx,%ecx,8)
+// CHECK:  encoding: [0xdc,0x94,0xcb,0xef,0xbe,0xad,0xde]
+          fcoml   3735928559(%ebx,%ecx,8) 
+
+// CHECK: fcoms   32493
+// CHECK:  encoding: [0xd8,0x15,0xed,0x7e,0x00,0x00]
+          fcoms   32493
+
+// CHECK: fcompl  3735928559(%ebx,%ecx,8)
+// CHECK:  encoding: [0xdc,0x9c,0xcb,0xef,0xbe,0xad,0xde]
+          fcompl  3735928559(%ebx,%ecx,8)
+
+// CHECK: fcomps  32493
+// CHECK:  encoding: [0xd8,0x1d,0xed,0x7e,0x00,0x00]
+          fcomps  32493
+
+// CHECK: ficoml  3735928559(%ebx,%ecx,8)
+// CHECK:  encoding: [0xda,0x94,0xcb,0xef,0xbe,0xad,0xde]
+          ficoml  3735928559(%ebx,%ecx,8)
+
+// CHECK: ficoms  32493
+// CHECK:  encoding: [0xde,0x15,0xed,0x7e,0x00,0x00]
+          ficoms  32493
+
+// CHECK: ficompl 3735928559(%ebx,%ecx,8)
+// CHECK:  encoding: [0xda,0x9c,0xcb,0xef,0xbe,0xad,0xde]
+          ficompl 3735928559(%ebx,%ecx,8)
+
+// CHECK: ficomps 32493
+// CHECK:  encoding: [0xde,0x1d,0xed,0x7e,0x00,0x00]
+          ficomps 32493
diff --git a/test/MC/AsmParser/X86/x86_operands.s b/test/MC/AsmParser/X86/x86_operands.s
index edddd1fc4492..bf958d8478ca 100644
--- a/test/MC/AsmParser/X86/x86_operands.s
+++ b/test/MC/AsmParser/X86/x86_operands.s
@@ -55,4 +55,6 @@
 # CHECK: call *4(%eax)
         call *4(%eax)
 
-        
+# CHECK: movl	%gs:8, %eax
+movl %gs:8, %eax
+
diff --git a/test/MC/AsmParser/exprs.s b/test/MC/AsmParser/exprs.s
index 62b11c2d4e4e..d9a248cd94ff 100644
--- a/test/MC/AsmParser/exprs.s
+++ b/test/MC/AsmParser/exprs.s
@@ -61,3 +61,14 @@ n:
         
         
         movw	$8, (42)+66(%eax)
+
+// "." support:
+_f0:
+L0:
+        jmp L1
+        .long . - L0
+L1:
+        jmp A
+        .long . - L1
+
+        .zerofill __DATA,_bss,A,0
diff --git a/test/MC/Disassembler/arm-tests.txt b/test/MC/Disassembler/arm-tests.txt
index 81261c59028d..a1e229caebf8 100644
--- a/test/MC/Disassembler/arm-tests.txt
+++ b/test/MC/Disassembler/arm-tests.txt
@@ -1,5 +1,4 @@
 # RUN: llvm-mc --disassemble %s -triple=arm-apple-darwin9 | FileCheck %s
-# XFAIL: *
 
 # CHECK:	b	#0
 0xfe 0xff 0xff 0xea
@@ -16,9 +15,21 @@
 # CHECK:	dmb	nshst
 0x56 0xf0 0x7f 0xf5
 
+# CHECK:	ldclvc	p5, cr15, [r8], #-0
+0x00 0xf5 0x78 0x7c
+
 # CHECK:	ldr	r0, [r2], #15
 0x0f 0x00 0x92 0xe4
 
+# CHECK:	ldrh	r0, [r2], #0
+0xb0 0x00 0xd2 0xe0
+
+# CHECK:	ldrht	r0, [r2], #15
+0xbf 0x00 0xf2 0xe0
+
+# CHECK:	ldrsbtvs	lr, [r2], -r9
+0xd9 0xe9 0x32 0x60
+
 # CHECK:	lsls	r0, r2, #31
 0x82 0x0f 0xb0 0xe1
 
@@ -28,6 +39,9 @@
 # CHECK:	movt	r8, #65535
 0xff 0x8f 0x4f 0xe3
 
+# CHECK:	mvnpls	r7, #245, 2
+0xf5 0x71 0xf0 0x53
+
 # CHECK:	pkhbt	r8, r9, r10, lsl #4
 0x1a 0x82 0x89 0xe6
 
diff --git a/test/MC/Disassembler/dg.exp b/test/MC/Disassembler/dg.exp
index 68d5f1de395b..fc2f17a6fbaa 100644
--- a/test/MC/Disassembler/dg.exp
+++ b/test/MC/Disassembler/dg.exp
@@ -1,4 +1,6 @@
 load_lib llvm.exp
 
-RunLLVMTests [lsort [glob -nocomplain $srcdir/$subdir/*.{txt}]]
+if { [llvm_supports_target ARM] } {
+  RunLLVMTests [lsort [glob -nocomplain $srcdir/$subdir/*.{txt}]]
+}
 
diff --git a/test/MC/Disassembler/neon-tests.txt b/test/MC/Disassembler/neon-tests.txt
index 25099c0a39b8..51b31e7c1a6e 100644
--- a/test/MC/Disassembler/neon-tests.txt
+++ b/test/MC/Disassembler/neon-tests.txt
@@ -1,5 +1,4 @@
 # RUN: llvm-mc --disassemble %s -triple=arm-apple-darwin9 | FileCheck %s
-# XFAIL: *
 
 # CHECK:	vbif	q15, q7, q0
 0x50 0xe1 0x7e 0xf3
@@ -10,6 +9,10 @@
 # CHECK:	vdup.32	q3, d1[0]
 0x41 0x6c 0xb4 0xf3
 
+# VLD1q8_UPD (with ${dst:dregpair} operand)
+# CHECK:	vld1.8	{d17, d18}, [r6], r5
+0x05 0x1a 0x66 0xf4
+
 # CHECK:	vld4.8	{d0, d1, d2, d3}, [r2], r7
 0x07 0x00 0x22 0xf4
 
@@ -19,6 +22,9 @@
 # CHECK:	vmov	d0, d15
 0x1f 0x01 0x2f 0xf2
 
+# CHECK:	vmov.i64	q6, #0xFF00FF00FF
+0x75 0xce 0x81 0xf2
+
 # CHECK:	vmul.f32	d0, d0, d6
 0x16 0x0d 0x00 0xf3
 
diff --git a/test/MC/Disassembler/simple-tests.txt b/test/MC/Disassembler/simple-tests.txt
index b6bb35dc56a1..41552612cf58 100644
--- a/test/MC/Disassembler/simple-tests.txt
+++ b/test/MC/Disassembler/simple-tests.txt
@@ -51,3 +51,6 @@
 
 # CHECK: vmptrst
 0x0f 0xc7 0x38
+
+# CHECK: movl $0, -4(%rbp)
+0xc7 0x45 0xfc 0x00 0x00 0x00 0x00
diff --git a/test/MC/Disassembler/thumb-tests.txt b/test/MC/Disassembler/thumb-tests.txt
index 343e6c94b19f..14e91295276b 100644
--- a/test/MC/Disassembler/thumb-tests.txt
+++ b/test/MC/Disassembler/thumb-tests.txt
@@ -1,5 +1,4 @@
 # RUN: llvm-mc --disassemble %s -triple=thumb-apple-darwin9 | FileCheck %s
-# XFAIL: *
 
 # CHECK:	add	r5, sp, #68
 0x11 0xad
@@ -10,6 +9,9 @@
 # CHECK:	b	#34
 0x0f 0xe0
 
+# CHECK:	b.w	#-12
+0xff 0xf7 0xf8 0xaf
+
 # CHECK:	bfi	r2, r10, #0, #1
 0x6a 0xf3 0x00 0x02
 
@@ -25,6 +27,9 @@
 # CHECK:	ldmia	r0!, {r1}
 0x02 0xc8
 
+# CHECK:	ldrb.w	r8, #-24
+0x1f 0xf8 0x18 0x80
+
 # CHECK:	ldrd	r0, r1, [r7, #64]!
 0xf7 0xe9 0x10 0x01
 
@@ -55,6 +60,9 @@
 # CHECK:	subw	r0, pc, #1
 0xaf 0xf2 0x01 0x00
 
+# CHECK:	subw	r0, sp, #835
+0xad 0xf2 0x43 0x30
+
 # CHECK:	uqadd16	r3, r4, r5
 0x94 0xfa 0x55 0xf3
 
@@ -77,6 +85,9 @@
 # CHECK:	lsleq	r1, r0, #28
 0x01 0x07
 
-# CHECK:	rsbne	r1, r2, #0
-0x51 0x42
+# CHECK:	stmiane	r0!, {r1, r2, r3}
+0x0e 0xc0
+
 # IT block end
+# CHECK:	rsbs	r1, r2, #0
+0x51 0x42
diff --git a/test/Other/lint.ll b/test/Other/lint.ll
new file mode 100644
index 000000000000..d0db5e46c668
--- /dev/null
+++ b/test/Other/lint.ll
@@ -0,0 +1,84 @@
+; RUN: opt -lint -disable-output < %s |& FileCheck %s
+target datalayout = "e-p:64:64:64"
+
+declare fastcc void @bar()
+
+@CG = constant i32 7
+
+define i32 @foo() noreturn {
+; CHECK: Caller and callee calling convention differ
+  call void @bar()
+; CHECK: Null pointer dereference
+  store i32 0, i32* null
+; CHECK: Null pointer dereference
+  %t = load i32* null
+; CHECK: Undef pointer dereference
+  store i32 0, i32* undef
+; CHECK: Undef pointer dereference
+  %u = load i32* undef
+; CHECK: Memory reference address is misaligned
+  %x = inttoptr i32 1 to i32*
+  load i32* %x, align 4
+; CHECK: Division by zero
+  %sd = sdiv i32 2, 0
+; CHECK: Division by zero
+  %ud = udiv i32 2, 0
+; CHECK: Division by zero
+  %sr = srem i32 2, 0
+; CHECK: Division by zero
+  %ur = urem i32 2, 0
+; CHECK: extractelement index out of range
+  %ee = extractelement <4 x i32> zeroinitializer, i32 4
+; CHECK: insertelement index out of range
+  %ie = insertelement <4 x i32> zeroinitializer, i32 0, i32 4
+; CHECK: Shift count out of range
+  %r = lshr i32 0, 32
+; CHECK: Shift count out of range
+  %q = ashr i32 0, 32
+; CHECK: Shift count out of range
+  %l = shl i32 0, 32
+; CHECK: xor(undef, undef)
+  %xx = xor i32 undef, undef
+; CHECK: sub(undef, undef)
+  %xs = sub i32 undef, undef
+
+; CHECK: Write to read-only memory
+  store i32 8, i32* @CG
+; CHECK: Write to text section
+  store i32 8, i32* bitcast (i32()* @foo to i32*)
+; CHECK: Load from block address
+  %lb = load i32* bitcast (i8* blockaddress(@foo, %next) to i32*)
+; CHECK: Call to block address
+  call void()* bitcast (i8* blockaddress(@foo, %next) to void()*)()
+
+  br label %next
+
+next:
+; CHECK: Static alloca outside of entry block
+  %a = alloca i32
+; CHECK: Return statement in function with noreturn attribute
+  ret i32 0
+
+foo:
+  %z = add i32 0, 0
+; CHECK: unreachable immediately preceded by instruction without side effects
+  unreachable
+}
+
+; CHECK: Unnamed function with non-local linkage
+define void @0() nounwind {
+  ret void
+}
+
+; CHECK: va_start called in a non-varargs function
+declare void @llvm.va_start(i8*)
+define void @not_vararg(i8* %p) nounwind {
+  call void @llvm.va_start(i8* %p)
+  ret void
+}
+
+define void @use_indbr() {
+  indirectbr i8* bitcast (i32()* @foo to i8*), [label %block]
+block:
+  unreachable
+}
diff --git a/test/Transforms/ArgumentPromotion/crash.ll b/test/Transforms/ArgumentPromotion/crash.ll
new file mode 100644
index 000000000000..e2d3d4de9edb
--- /dev/null
+++ b/test/Transforms/ArgumentPromotion/crash.ll
@@ -0,0 +1,38 @@
+; rdar://7879828
+; RUN: opt -inline -argpromotion %s
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
+target triple = "x86_64-apple-darwin10.0.0"
+
+define void @foo() {
+  invoke void @foo2()
+          to label %if.end432 unwind label %for.end520 
+
+if.end432:  
+  unreachable
+
+for.end520: 
+  unreachable
+}
+
+define internal  void @foo2() ssp {
+  %call7 = call fastcc i8* @foo3(i1 (i8*)* @foo4)
+  %call58 = call fastcc i8* @foo3(i1 (i8*)* @foo5)
+  unreachable
+}
+
+define internal fastcc i8* @foo3(i1 (i8*)* %Pred) {
+entry:
+  unreachable
+}
+
+define internal i1 @foo4(i8* %O) nounwind {
+entry:
+  %call = call zeroext i1 @foo5(i8* %O) ; <i1> [#uses=0]
+  unreachable
+}
+
+define internal i1 @foo5(i8* %O) nounwind {
+entry:
+  ret i1 undef
+}
+
diff --git a/test/Transforms/DeadArgElim/2009-03-17-MRE-Invoke.ll b/test/Transforms/DeadArgElim/2009-03-17-MRE-Invoke.ll
index f251d6ce882c..161821f3f8f7 100644
--- a/test/Transforms/DeadArgElim/2009-03-17-MRE-Invoke.ll
+++ b/test/Transforms/DeadArgElim/2009-03-17-MRE-Invoke.ll
@@ -23,4 +23,4 @@ T:
   ret i32 %y
 T2:
   unreachable
-}
-\ No newline at end of file
+}
diff --git a/test/Transforms/DeadArgElim/2010-04-30-DbgInfo.ll b/test/Transforms/DeadArgElim/2010-04-30-DbgInfo.ll
new file mode 100644
index 000000000000..2f820bad8474
--- /dev/null
+++ b/test/Transforms/DeadArgElim/2010-04-30-DbgInfo.ll
@@ -0,0 +1,68 @@
+; RUN: opt -S -deadargelim < %s | FileCheck %s
+
+@.str = private constant [1 x i8] zeroinitializer, align 1 ; <[1 x i8]*> [#uses=1]
+
+define i8* @vfs_addname(i8* %name, i32 %len, i32 %hash, i32 %flags) nounwind ssp {
+entry:
+  call void @llvm.dbg.value(metadata !{i8* %name}, i64 0, metadata !0)
+  call void @llvm.dbg.value(metadata !{i32 %len}, i64 0, metadata !10)
+  call void @llvm.dbg.value(metadata !{i32 %hash}, i64 0, metadata !11)
+  call void @llvm.dbg.value(metadata !{i32 %flags}, i64 0, metadata !12)
+; CHECK:  call fastcc i8* @add_name_internal(i8* %name, i32 %hash) nounwind, !dbg !13
+  %0 = call fastcc i8* @add_name_internal(i8* %name, i32 %len, i32 %hash, i8 zeroext 0, i32 %flags) nounwind, !dbg !13 ; <i8*> [#uses=1]
+  ret i8* %0, !dbg !13
+}
+
+declare void @llvm.dbg.declare(metadata, metadata) nounwind readnone
+
+define internal fastcc i8* @add_name_internal(i8* %name, i32 %len, i32 %hash, i8 zeroext %extra, i32 %flags) nounwind noinline ssp {
+entry:
+  call void @llvm.dbg.value(metadata !{i8* %name}, i64 0, metadata !15)
+  call void @llvm.dbg.value(metadata !{i32 %len}, i64 0, metadata !20)
+  call void @llvm.dbg.value(metadata !{i32 %hash}, i64 0, metadata !21)
+  call void @llvm.dbg.value(metadata !{i8 %extra}, i64 0, metadata !22)
+  call void @llvm.dbg.value(metadata !{i32 %flags}, i64 0, metadata !23)
+  %0 = icmp eq i32 %hash, 0, !dbg !24             ; <i1> [#uses=1]
+  br i1 %0, label %bb, label %bb1, !dbg !24
+
+bb:                                               ; preds = %entry
+  br label %bb2, !dbg !26
+
+bb1:                                              ; preds = %entry
+  br label %bb2, !dbg !27
+
+bb2:                                              ; preds = %bb1, %bb
+  %.0 = phi i8* [ getelementptr inbounds ([1 x i8]* @.str, i64 0, i64 0), %bb ], [ %name, %bb1 ] ; <i8*> [#uses=1]
+  ret i8* %.0, !dbg !27
+}
+
+declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
+
+!0 = metadata !{i32 524545, metadata !1, metadata !"name", metadata !2, i32 8, metadata !6} ; [ DW_TAG_arg_variable ]
+!1 = metadata !{i32 524334, i32 0, metadata !2, metadata !"vfs_addname", metadata !"vfs_addname", metadata !"vfs_addname", metadata !2, i32 12, metadata !4, i1 false, i1 true, i32 0, i32 0, null, i1 false} ; [ DW_TAG_subprogram ]
+!2 = metadata !{i32 524329, metadata !"tail.c", metadata !"/Users/echeng/LLVM/radars/r7927803/", metadata !3} ; [ DW_TAG_file_type ]
+!3 = metadata !{i32 524305, i32 0, i32 1, metadata !"tail.c", metadata !"/Users/echeng/LLVM/radars/r7927803/", metadata !"4.2.1 (Based on Apple Inc. build 5658) (LLVM build 9999)", i1 true, i1 false, metadata !"", i32 0} ; [ DW_TAG_compile_unit ]
+!4 = metadata !{i32 524309, metadata !2, metadata !"", metadata !2, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !5, i32 0, null} ; [ DW_TAG_subroutine_type ]
+!5 = metadata !{metadata !6, metadata !6, metadata !9, metadata !9, metadata !9}
+!6 = metadata !{i32 524303, metadata !2, metadata !"", metadata !2, i32 0, i64 64, i64 64, i64 0, i32 0, metadata !7} ; [ DW_TAG_pointer_type ]
+!7 = metadata !{i32 524326, metadata !2, metadata !"", metadata !2, i32 0, i64 8, i64 8, i64 0, i32 0, metadata !8} ; [ DW_TAG_const_type ]
+!8 = metadata !{i32 524324, metadata !2, metadata !"char", metadata !2, i32 0, i64 8, i64 8, i64 0, i32 0, i32 6} ; [ DW_TAG_base_type ]
+!9 = metadata !{i32 524324, metadata !2, metadata !"unsigned int", metadata !2, i32 0, i64 32, i64 32, i64 0, i32 0, i32 7} ; [ DW_TAG_base_type ]
+!10 = metadata !{i32 524545, metadata !1, metadata !"len", metadata !2, i32 9, metadata !9} ; [ DW_TAG_arg_variable ]
+!11 = metadata !{i32 524545, metadata !1, metadata !"hash", metadata !2, i32 10, metadata !9} ; [ DW_TAG_arg_variable ]
+!12 = metadata !{i32 524545, metadata !1, metadata !"flags", metadata !2, i32 11, metadata !9} ; [ DW_TAG_arg_variable ]
+!13 = metadata !{i32 13, i32 0, metadata !14, null}
+!14 = metadata !{i32 524299, metadata !1, i32 12, i32 0} ; [ DW_TAG_lexical_block ]
+!15 = metadata !{i32 524545, metadata !16, metadata !"name", metadata !2, i32 17, metadata !6} ; [ DW_TAG_arg_variable ]
+!16 = metadata !{i32 524334, i32 0, metadata !2, metadata !"add_name_internal", metadata !"add_name_internal", metadata !"add_name_internal", metadata !2, i32 22, metadata !17, i1 true, i1 true, i32 0, i32 0, null, i1 false} ; [ DW_TAG_subprogram ]
+!17 = metadata !{i32 524309, metadata !2, metadata !"", metadata !2, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !18, i32 0, null} ; [ DW_TAG_subroutine_type ]
+!18 = metadata !{metadata !6, metadata !6, metadata !9, metadata !9, metadata !19, metadata !9}
+!19 = metadata !{i32 524324, metadata !2, metadata !"unsigned char", metadata !2, i32 0, i64 8, i64 8, i64 0, i32 0, i32 8} ; [ DW_TAG_base_type ]
+!20 = metadata !{i32 524545, metadata !16, metadata !"len", metadata !2, i32 18, metadata !9} ; [ DW_TAG_arg_variable ]
+!21 = metadata !{i32 524545, metadata !16, metadata !"hash", metadata !2, i32 19, metadata !9} ; [ DW_TAG_arg_variable ]
+!22 = metadata !{i32 524545, metadata !16, metadata !"extra", metadata !2, i32 20, metadata !19} ; [ DW_TAG_arg_variable ]
+!23 = metadata !{i32 524545, metadata !16, metadata !"flags", metadata !2, i32 21, metadata !9} ; [ DW_TAG_arg_variable ]
+!24 = metadata !{i32 23, i32 0, metadata !25, null}
+!25 = metadata !{i32 524299, metadata !16, i32 22, i32 0} ; [ DW_TAG_lexical_block ]
+!26 = metadata !{i32 24, i32 0, metadata !25, null}
+!27 = metadata !{i32 26, i32 0, metadata !25, null}
diff --git a/test/Transforms/DeadArgElim/deadexternal.ll b/test/Transforms/DeadArgElim/deadexternal.ll
new file mode 100644
index 000000000000..7256b93af1a2
--- /dev/null
+++ b/test/Transforms/DeadArgElim/deadexternal.ll
@@ -0,0 +1,13 @@
+; RUN: opt -deadargelim -S %s | FileCheck %s
+; XFAIL: *
+
+define void @test(i32) {
+  ret void
+}
+
+define void @foo() {
+  call void @test(i32 0)
+  ret void
+; CHECK: @foo
+; CHECK: i32 undef
+}
diff --git a/test/Transforms/DeadStoreElimination/crash.ll b/test/Transforms/DeadStoreElimination/crash.ll
index 6d8ba71b2094..5aac877a9ecf 100644
--- a/test/Transforms/DeadStoreElimination/crash.ll
+++ b/test/Transforms/DeadStoreElimination/crash.ll
@@ -54,4 +54,4 @@ dead:
   store i32 4, i32* %P2
   store i32 4, i32* %Q2
   br label %dead
-}
-\ No newline at end of file
+}
diff --git a/test/Transforms/GVN/2010-03-31-RedundantPHIs.ll b/test/Transforms/GVN/2010-03-31-RedundantPHIs.ll
new file mode 100644
index 000000000000..066e3038b087
--- /dev/null
+++ b/test/Transforms/GVN/2010-03-31-RedundantPHIs.ll
@@ -0,0 +1,46 @@
+; RUN: opt < %s -gvn -enable-full-load-pre -S | FileCheck %s
+
+define i8* @cat(i8* %s1, ...) nounwind {
+entry:
+  br i1 undef, label %bb, label %bb3
+
+bb:                                               ; preds = %entry
+  unreachable
+
+bb3:                                              ; preds = %entry
+  store i8* undef, i8** undef, align 4
+  br i1 undef, label %bb5, label %bb6
+
+bb5:                                              ; preds = %bb3
+  unreachable
+
+bb6:                                              ; preds = %bb3
+  br label %bb12
+
+bb8:                                              ; preds = %bb12
+  br i1 undef, label %bb9, label %bb10
+
+bb9:                                              ; preds = %bb8
+  %0 = load i8** undef, align 4                   ; <i8*> [#uses=0]
+  %1 = load i8** undef, align 4                   ; <i8*> [#uses=0]
+  br label %bb11
+
+bb10:                                             ; preds = %bb8
+  br label %bb11
+
+bb11:                                             ; preds = %bb10, %bb9
+; CHECK: bb11:
+; CHECK: phi
+; CHECK-NOT: phi
+  br label %bb12
+
+bb12:                                             ; preds = %bb11, %bb6
+; CHECK: bb12:
+; CHECK: phi
+; CHECK-NOT: phi
+  br i1 undef, label %bb8, label %bb13
+
+bb13:                                             ; preds = %bb12
+; CHECK: bb13:
+  ret i8* undef
+}
diff --git a/test/Transforms/GVN/invariant-simple.ll b/test/Transforms/GVN/invariant-simple.ll
index 6de75f14350a..0a4182c410ae 100644
--- a/test/Transforms/GVN/invariant-simple.ll
+++ b/test/Transforms/GVN/invariant-simple.ll
@@ -33,4 +33,4 @@ entry:
 declare i32 @foo(i8*) nounwind 
 declare i32 @bar(i8*) nounwind readonly
 declare {}* @llvm.invariant.start(i64 %S, i8* nocapture %P) readonly
-declare void @llvm.invariant.end({}* %S, i64 %SS, i8* nocapture %P)
-\ No newline at end of file
+declare void @llvm.invariant.end({}* %S, i64 %SS, i8* nocapture %P)
diff --git a/test/Transforms/GVN/lifetime-simple.ll b/test/Transforms/GVN/lifetime-simple.ll
index 8139246dcf3c..48e5bc8bb63f 100644
--- a/test/Transforms/GVN/lifetime-simple.ll
+++ b/test/Transforms/GVN/lifetime-simple.ll
@@ -16,5 +16,5 @@ entry:
   ret i8 %1
 }
 
-declare {}* @llvm.lifetime.start(i64 %S, i8* nocapture %P) readonly
-declare void @llvm.lifetime.end(i64 %S, i8* nocapture %P)
-\ No newline at end of file
+declare void @llvm.lifetime.start(i64 %S, i8* nocapture %P) readonly
+declare void @llvm.lifetime.end(i64 %S, i8* nocapture %P)
diff --git a/test/Transforms/GlobalOpt/crash.ll b/test/Transforms/GlobalOpt/crash.ll
index a45cbe9c0f6f..701472c059a8 100644
--- a/test/Transforms/GlobalOpt/crash.ll
+++ b/test/Transforms/GlobalOpt/crash.ll
@@ -9,8 +9,34 @@ target triple = "i386-apple-darwin9.8"
 @_ZL6vTwist =  global %struct.btSimdScalar zeroinitializer ; <%struct.btSimdScalar*> [#uses=1]
 @llvm.global_ctors = appending global [1 x %0] [%0 { i32 65535, void ()* @_GLOBAL__I__ZN21btConeTwistConstraintC2Ev }] ; <[12 x %0]*> [#uses=0]
 
-define internal  void @_GLOBAL__I__ZN21btConeTwistConstraintC2Ev() nounwind section "__TEXT,__StaticInit,regular,pure_instructions" {
+define internal void @_GLOBAL__I__ZN21btConeTwistConstraintC2Ev() nounwind section "__TEXT,__StaticInit,regular,pure_instructions" {
 entry:
   store float 1.0, float* getelementptr inbounds (%struct.btSimdScalar* @_ZL6vTwist, i32 0, i32 0, i32 0, i32 3), align 4
   ret void
 }
+
+
+; PR6760
+%T = type { [5 x i32] }
+
+@switch_inf = internal global %T* null
+
+define void @test(i8* %arch_file, i32 %route_type) {
+entry:
+  %A = sext i32 1 to i64
+  %B = mul i64 %A, 20
+  %C = call noalias i8* @malloc(i64 %B) nounwind
+  %D = bitcast i8* %C to %T*
+  store %T* %D, %T** @switch_inf, align 8
+  unreachable
+
+bb.nph.i: 
+  %scevgep.i539 = getelementptr i8* %C, i64 4
+  unreachable
+
+xx:
+  %E = load %T** @switch_inf, align 8 
+  unreachable
+}
+
+declare noalias i8* @malloc(i64) nounwind
diff --git a/test/Transforms/GlobalOpt/malloc-promote-2.ll b/test/Transforms/GlobalOpt/malloc-promote-2.ll
index f989b798b45f..6cb44812d2b6 100644
--- a/test/Transforms/GlobalOpt/malloc-promote-2.ll
+++ b/test/Transforms/GlobalOpt/malloc-promote-2.ll
@@ -1,24 +1,19 @@
-; RUN: opt < %s -globalopt -globaldce -S | not grep malloc
+; RUN: opt < %s -globalopt -S | FileCheck %s
 target datalayout = "E-p:64:64:64-a0:0:8-f32:32:32-f64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-v64:64:64-v128:128:128"
 
-@G = internal global i32* null          ; <i32**> [#uses=3]
+@G = internal global i32* null
 
-define void @init() {
-        %malloccall = tail call i8* @malloc(i64 mul (i64 100, i64 4)) ; <i8*> [#uses=1]
-        %P = bitcast i8* %malloccall to i32*            ; <i32*> [#uses=1]
-        store i32* %P, i32** @G
-        %GV = load i32** @G             ; <i32*> [#uses=1]
-        %GVe = getelementptr i32* %GV, i32 40           ; <i32*> [#uses=1]
-        store i32 20, i32* %GVe
-        ret void
+define void @t() {
+; CHECK: @t()
+; CHECK-NOT: call i8* @malloc
+; CHECK-NEXT: ret void
+  %malloccall = tail call i8* @malloc(i64 mul (i64 100, i64 4))
+  %P = bitcast i8* %malloccall to i32*
+  store i32* %P, i32** @G
+  %GV = load i32** @G
+  %GVe = getelementptr i32* %GV, i32 40
+  store i32 20, i32* %GVe
+  ret void
 }
 
 declare noalias i8* @malloc(i64)
-
-define i32 @get() {
-        %GV = load i32** @G             ; <i32*> [#uses=1]
-        %GVe = getelementptr i32* %GV, i32 40           ; <i32*> [#uses=1]
-        %V = load i32* %GVe             ; <i32> [#uses=1]
-        ret i32 %V
-}
-
diff --git a/test/Transforms/GlobalOpt/malloc-promote-3.ll b/test/Transforms/GlobalOpt/malloc-promote-3.ll
deleted file mode 100644
index 57f937d8c929..000000000000
--- a/test/Transforms/GlobalOpt/malloc-promote-3.ll
+++ /dev/null
@@ -1,30 +0,0 @@
-; RUN: opt < %s -globalopt -globaldce -S | not grep malloc
-target datalayout = "E-p:64:64:64-a0:0:8-f32:32:32-f64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-v64:64:64-v128:128:128"
-
-@G = internal global i32* null          ; <i32**> [#uses=4]
-
-define void @init() {
-        %malloccall = tail call i8* @malloc(i64 mul (i64 100, i64 4)) ; <i8*> [#uses=1]
-        %P = bitcast i8* %malloccall to i32*            ; <i32*> [#uses=1]
-        store i32* %P, i32** @G
-        %GV = load i32** @G             ; <i32*> [#uses=1]
-        %GVe = getelementptr i32* %GV, i32 40           ; <i32*> [#uses=1]
-        store i32 20, i32* %GVe
-        ret void
-}
-
-declare noalias i8* @malloc(i64)
-
-define i32 @get() {
-        %GV = load i32** @G             ; <i32*> [#uses=1]
-        %GVe = getelementptr i32* %GV, i32 40           ; <i32*> [#uses=1]
-        %V = load i32* %GVe             ; <i32> [#uses=1]
-        ret i32 %V
-}
-
-define i1 @check() {
-        %GV = load i32** @G             ; <i32*> [#uses=1]
-        %V = icmp eq i32* %GV, null             ; <i1> [#uses=1]
-        ret i1 %V
-}
-
diff --git a/test/Transforms/GlobalOpt/metadata.ll b/test/Transforms/GlobalOpt/metadata.ll
new file mode 100644
index 000000000000..a09ba72439fc
--- /dev/null
+++ b/test/Transforms/GlobalOpt/metadata.ll
@@ -0,0 +1,19 @@
+; RUN: opt -S -globalopt < %s | FileCheck %s
+
+; PR6112 - When globalopt does RAUW(@G, %G), the metadata reference should drop
+; to null.
+@G = internal global i8** null
+
+define i32 @main(i32 %argc, i8** %argv) {
+; CHECK: @main
+; CHECK: %G = alloca
+  store i8** %argv, i8*** @G
+  ret i32 0
+}
+
+!named = !{!0}
+
+; CHECK: !0 = metadata !{null}
+!0 = metadata !{i8*** @G}
+
+
diff --git a/test/Transforms/IndVarSimplify/casted-argument.ll b/test/Transforms/IndVarSimplify/casted-argument.ll
index dfefe1dc5bbe..a5e002b4578f 100644
--- a/test/Transforms/IndVarSimplify/casted-argument.ll
+++ b/test/Transforms/IndVarSimplify/casted-argument.ll
@@ -47,4 +47,4 @@ if.end54:		; preds = %if.end54, %if.else
 
 declare void @bcopy(i8* nocapture) nounwind
 
-declare void @bcopy_4038(i8*, i32) nounwind
+declare void @bcopy_4038(i8*, i8*, i32) nounwind
diff --git a/test/Transforms/IndVarSimplify/crash.ll b/test/Transforms/IndVarSimplify/crash.ll
index 14f79fefb180..ab438334c660 100644
--- a/test/Transforms/IndVarSimplify/crash.ll
+++ b/test/Transforms/IndVarSimplify/crash.ll
@@ -16,4 +16,4 @@ define void @t2(i1* %P) nounwind {
 
 ; <label>:6                                       ; preds = %1
   ret void
-}
-\ No newline at end of file
+}
diff --git a/test/Transforms/IndVarSimplify/eliminate-comparison.ll b/test/Transforms/IndVarSimplify/eliminate-comparison.ll
new file mode 100644
index 000000000000..953bbdff5c62
--- /dev/null
+++ b/test/Transforms/IndVarSimplify/eliminate-comparison.ll
@@ -0,0 +1,108 @@
+; RUN: opt -indvars -S < %s | FileCheck %s
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
+
+@X = external global [0 x double]
+
+; Indvars should be able to simplify simple comparisons involving
+; induction variables.
+
+; CHECK: @foo
+; CHECK: %cond = and i1 %tobool.not, true
+
+define void @foo(i64 %n, i32* nocapture %p) nounwind {
+entry:
+  %cmp9 = icmp sgt i64 %n, 0
+  br i1 %cmp9, label %pre, label %return
+
+pre:
+  %t3 = load i32* %p
+  %tobool.not = icmp ne i32 %t3, 0
+  br label %loop
+
+loop:
+  %i = phi i64 [ 0, %pre ], [ %inc, %for.inc ]
+  %cmp6 = icmp slt i64 %i, %n
+  %cond = and i1 %tobool.not, %cmp6
+  br i1 %cond, label %if.then, label %for.inc
+
+if.then:
+  %arrayidx = getelementptr [0 x double]* @X, i64 0, i64 %i
+  store double 3.200000e+00, double* %arrayidx
+  br label %for.inc
+
+for.inc:
+  %inc = add nsw i64 %i, 1
+  %exitcond = icmp sge i64 %inc, %n
+  br i1 %exitcond, label %return, label %loop
+
+return:
+  ret void
+}
+
+; Don't eliminate an icmp that's contributing to the loop exit test though.
+
+; CHECK: @_ZNK4llvm5APInt3ultERKS0_
+; CHECK: %tmp99 = icmp sgt i32 %i, -1
+
+define i32 @_ZNK4llvm5APInt3ultERKS0_(i32 %tmp2.i1, i64** %tmp65, i64** %tmp73, i64** %tmp82, i64** %tmp90) {
+entry:
+  br label %bb18
+
+bb13:
+  %tmp66 = load i64** %tmp65, align 4
+  %tmp68 = getelementptr inbounds i64* %tmp66, i32 %i
+  %tmp69 = load i64* %tmp68, align 4
+  %tmp74 = load i64** %tmp73, align 4
+  %tmp76 = getelementptr inbounds i64* %tmp74, i32 %i
+  %tmp77 = load i64* %tmp76, align 4
+  %tmp78 = icmp ugt i64 %tmp69, %tmp77
+  br i1 %tmp78, label %bb20.loopexit, label %bb15
+
+bb15:
+  %tmp83 = load i64** %tmp82, align 4
+  %tmp85 = getelementptr inbounds i64* %tmp83, i32 %i
+  %tmp86 = load i64* %tmp85, align 4
+  %tmp91 = load i64** %tmp90, align 4
+  %tmp93 = getelementptr inbounds i64* %tmp91, i32 %i
+  %tmp94 = load i64* %tmp93, align 4
+  %tmp95 = icmp ult i64 %tmp86, %tmp94
+  br i1 %tmp95, label %bb20.loopexit, label %bb17
+
+bb17:
+  %tmp97 = add nsw i32 %i, -1
+  br label %bb18
+
+bb18:
+  %i = phi i32 [ %tmp2.i1, %entry ], [ %tmp97, %bb17 ]
+  %tmp99 = icmp sgt i32 %i, -1
+  br i1 %tmp99, label %bb13, label %bb20.loopexit
+
+bb20.loopexit:
+  %tmp.0.ph = phi i32 [ 0, %bb18 ], [ 1, %bb15 ], [ 0, %bb13 ]
+  ret i32 %tmp.0.ph
+}
+
+; Indvars should eliminate the icmp here.
+
+; CHECK: @func_10
+; CHECK-NOT: icmp
+; CHECK: ret void
+
+define void @func_10() nounwind {
+entry:
+  br label %loop
+
+loop:
+  %i = phi i32 [ %i.next, %loop ], [ 0, %entry ]
+  %t0 = icmp slt i32 %i, 0
+  %t1 = zext i1 %t0 to i32
+  %t2 = add i32 %t1, %i
+  %u3 = zext i32 %t2 to i64
+  store i64 %u3, i64* null
+  %i.next = add i32 %i, 1
+  br i1 undef, label %loop, label %return
+
+return:
+  ret void
+}
diff --git a/test/Transforms/IndVarSimplify/eliminate-max.ll b/test/Transforms/IndVarSimplify/eliminate-max.ll
new file mode 100644
index 000000000000..c25bd0e35418
--- /dev/null
+++ b/test/Transforms/IndVarSimplify/eliminate-max.ll
@@ -0,0 +1,52 @@
+; RUN: opt < %s -S -indvars | grep {= icmp} | count 3
+; PR4914.ll
+
+; Indvars should be able to do range analysis and eliminate icmps.
+; There are two here which cannot be eliminated.
+; There's one that icmp which can be eliminated and which indvars currently
+; cannot eliminate, because it requires analyzing more than just the
+; range of the induction variable.
+
+@0 = private constant [4 x i8] c"%d\0A\00", align 1 ; <[4 x i8]*> [#uses=1]
+
+define i32 @main() nounwind {
+bb:
+  br label %bb1
+
+bb1:                                              ; preds = %bb14, %bb
+  %t = phi i32 [ 0, %bb ], [ %t19, %bb14 ]        ; <i32> [#uses=5]
+  %t2 = phi i32 [ 0, %bb ], [ %t18, %bb14 ]       ; <i32> [#uses=1]
+  %t3 = icmp slt i32 %t, 0                        ; <i1> [#uses=1]
+  br i1 %t3, label %bb7, label %bb4
+
+bb4:                                              ; preds = %bb1
+  %t5 = icmp sgt i32 %t, 255                      ; <i1> [#uses=1]
+  %t6 = select i1 %t5, i32 255, i32 %t            ; <i32> [#uses=1]
+  br label %bb7
+
+bb7:                                              ; preds = %bb4, %bb1
+  %t8 = phi i32 [ %t6, %bb4 ], [ 0, %bb1 ]        ; <i32> [#uses=1]
+  %t9 = sub i32 0, %t                             ; <i32> [#uses=3]
+  %t10 = icmp slt i32 %t9, 0                      ; <i1> [#uses=1]
+  br i1 %t10, label %bb14, label %bb11
+
+bb11:                                             ; preds = %bb7
+  %t12 = icmp sgt i32 %t9, 255                    ; <i1> [#uses=1]
+  %t13 = select i1 %t12, i32 255, i32 %t9         ; <i32> [#uses=1]
+  br label %bb14
+
+bb14:                                             ; preds = %bb11, %bb7
+  %t15 = phi i32 [ %t13, %bb11 ], [ 0, %bb7 ]     ; <i32> [#uses=1]
+  %t16 = add nsw i32 %t2, 255                     ; <i32> [#uses=1]
+  %t17 = add nsw i32 %t16, %t8                    ; <i32> [#uses=1]
+  %t18 = add nsw i32 %t17, %t15                   ; <i32> [#uses=2]
+  %t19 = add nsw i32 %t, 1                        ; <i32> [#uses=2]
+  %t20 = icmp slt i32 %t19, 1000000000            ; <i1> [#uses=1]
+  br i1 %t20, label %bb1, label %bb21
+
+bb21:                                             ; preds = %bb14
+  %t22 = call i32 (i8*, ...)* @printf(i8* noalias getelementptr inbounds ([4 x i8]* @0, i32 0, i32 0), i32 %t18) nounwind
+  ret i32 0
+}
+
+declare i32 @printf(i8* noalias nocapture, ...) nounwind
diff --git a/test/Transforms/IndVarSimplify/eliminate-rem.ll b/test/Transforms/IndVarSimplify/eliminate-rem.ll
new file mode 100644
index 000000000000..f756389398fb
--- /dev/null
+++ b/test/Transforms/IndVarSimplify/eliminate-rem.ll
@@ -0,0 +1,121 @@
+; RUN: opt -indvars -S < %s | FileCheck %s
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
+
+; Indvars should be able to eliminate this srem.
+; CHECK: @simple
+; CHECK-NOT: rem
+; CHECK: ret
+
+define void @simple(i64 %arg, double* %arg3) nounwind {
+bb:
+  %t = icmp slt i64 0, %arg                     ; <i1> [#uses=1]
+  br i1 %t, label %bb4, label %bb12
+
+bb4:                                              ; preds = %bb
+  br label %bb5
+
+bb5:                                              ; preds = %bb4, %bb5
+  %t6 = phi i64 [ %t9, %bb5 ], [ 0, %bb4 ]    ; <i64> [#uses=2]
+  %t7 = srem i64 %t6, %arg                    ; <i64> [#uses=1]
+  %t8 = getelementptr inbounds double* %arg3, i64 %t7 ; <double*> [#uses=1]
+  store double 0.000000e+00, double* %t8
+  %t9 = add nsw i64 %t6, 1                    ; <i64> [#uses=2]
+  %t10 = icmp slt i64 %t9, %arg               ; <i1> [#uses=1]
+  br i1 %t10, label %bb5, label %bb11
+
+bb11:                                             ; preds = %bb5
+  br label %bb12
+
+bb12:                                             ; preds = %bb11, %bb
+  ret void
+}
+
+; Indvars should be able to eliminate the (i+1)%n.
+; CHECK: @f
+; CHECK-NOT: rem
+; CHECK: rem
+; CHECK-NOT: rem
+; CHECK: ret
+
+define i32 @f(i64* %arg, i64 %arg1, i64 %arg2, i64 %arg3) nounwind {
+bb:
+  %t = icmp sgt i64 %arg1, 0                      ; <i1> [#uses=1]
+  br i1 %t, label %bb4, label %bb54
+
+bb4:                                              ; preds = %bb
+  br label %bb5
+
+bb5:                                              ; preds = %bb49, %bb4
+  %t6 = phi i64 [ %t51, %bb49 ], [ 0, %bb4 ]      ; <i64> [#uses=4]
+  %t7 = phi i32 [ %t50, %bb49 ], [ 0, %bb4 ]      ; <i32> [#uses=2]
+  %t8 = add nsw i64 %t6, %arg1                    ; <i64> [#uses=1]
+  %t9 = add nsw i64 %t8, -2                       ; <i64> [#uses=1]
+  %t10 = srem i64 %t9, %arg1                      ; <i64> [#uses=1]
+  %t11 = add nsw i64 %t10, 1                      ; <i64> [#uses=1]
+  %t12 = add nsw i64 %t6, 1                       ; <i64> [#uses=1]
+  %t13 = srem i64 %t12, %arg1                     ; <i64> [#uses=1]
+  %t14 = icmp sgt i64 %arg1, 0                    ; <i1> [#uses=1]
+  br i1 %t14, label %bb15, label %bb49
+
+bb15:                                             ; preds = %bb5
+  br label %bb16
+
+bb16:                                             ; preds = %bb44, %bb15
+  %t17 = phi i64 [ %t46, %bb44 ], [ 0, %bb15 ]    ; <i64> [#uses=1]
+  %t18 = phi i32 [ %t45, %bb44 ], [ %t7, %bb15 ]  ; <i32> [#uses=2]
+  %t19 = icmp sgt i64 %arg1, 0                    ; <i1> [#uses=1]
+  br i1 %t19, label %bb20, label %bb44
+
+bb20:                                             ; preds = %bb16
+  br label %bb21
+
+bb21:                                             ; preds = %bb21, %bb20
+  %t22 = phi i64 [ %t41, %bb21 ], [ 0, %bb20 ]    ; <i64> [#uses=4]
+  %t23 = phi i32 [ %t40, %bb21 ], [ %t18, %bb20 ] ; <i32> [#uses=1]
+  %t24 = mul i64 %t6, %arg1                       ; <i64> [#uses=1]
+  %t25 = mul i64 %t13, %arg1                      ; <i64> [#uses=1]
+  %t26 = add nsw i64 %t24, %t22                   ; <i64> [#uses=1]
+  %t27 = mul i64 %t11, %arg1                      ; <i64> [#uses=1]
+  %t28 = add nsw i64 %t25, %t22                   ; <i64> [#uses=1]
+  %t29 = getelementptr inbounds i64* %arg, i64 %t26 ; <i64*> [#uses=1]
+  %t30 = add nsw i64 %t27, %t22                   ; <i64> [#uses=1]
+  %t31 = getelementptr inbounds i64* %arg, i64 %t28 ; <i64*> [#uses=1]
+  %t32 = zext i32 %t23 to i64                     ; <i64> [#uses=1]
+  %t33 = load i64* %t29                           ; <i64> [#uses=1]
+  %t34 = getelementptr inbounds i64* %arg, i64 %t30 ; <i64*> [#uses=1]
+  %t35 = load i64* %t31                           ; <i64> [#uses=1]
+  %t36 = add nsw i64 %t32, %t33                   ; <i64> [#uses=1]
+  %t37 = add nsw i64 %t36, %t35                   ; <i64> [#uses=1]
+  %t38 = load i64* %t34                           ; <i64> [#uses=1]
+  %t39 = add nsw i64 %t37, %t38                   ; <i64> [#uses=1]
+  %t40 = trunc i64 %t39 to i32                    ; <i32> [#uses=2]
+  %t41 = add nsw i64 %t22, 1                      ; <i64> [#uses=2]
+  %t42 = icmp slt i64 %t41, %arg1                 ; <i1> [#uses=1]
+  br i1 %t42, label %bb21, label %bb43
+
+bb43:                                             ; preds = %bb21
+  br label %bb44
+
+bb44:                                             ; preds = %bb43, %bb16
+  %t45 = phi i32 [ %t18, %bb16 ], [ %t40, %bb43 ] ; <i32> [#uses=2]
+  %t46 = add nsw i64 %t17, 1                      ; <i64> [#uses=2]
+  %t47 = icmp slt i64 %t46, %arg1                 ; <i1> [#uses=1]
+  br i1 %t47, label %bb16, label %bb48
+
+bb48:                                             ; preds = %bb44
+  br label %bb49
+
+bb49:                                             ; preds = %bb48, %bb5
+  %t50 = phi i32 [ %t7, %bb5 ], [ %t45, %bb48 ]   ; <i32> [#uses=2]
+  %t51 = add nsw i64 %t6, 1                       ; <i64> [#uses=2]
+  %t52 = icmp slt i64 %t51, %arg1                 ; <i1> [#uses=1]
+  br i1 %t52, label %bb5, label %bb53
+
+bb53:                                             ; preds = %bb49
+  br label %bb54
+
+bb54:                                             ; preds = %bb53, %bb
+  %t55 = phi i32 [ 0, %bb ], [ %t50, %bb53 ]      ; <i32> [#uses=1]
+  ret i32 %t55
+}
diff --git a/test/Transforms/IndVarSimplify/udiv.ll b/test/Transforms/IndVarSimplify/udiv.ll
new file mode 100644
index 000000000000..8260093d1c37
--- /dev/null
+++ b/test/Transforms/IndVarSimplify/udiv.ll
@@ -0,0 +1,162 @@
+; RUN: opt -indvars -S < %s | FileCheck %s
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
+
+@main.flags = internal global [8193 x i8] zeroinitializer, align 1 ; <[8193 x i8]*> [#uses=5]
+@.str = private constant [11 x i8] c"Count: %d\0A\00" ; <[11 x i8]*> [#uses=1]
+
+; Indvars shouldn't emit a udiv here, because there's no udiv in the
+; original code. This comes from SingleSource/Benchmarks/Shootout/sieve.c.
+
+; CHECK: @main
+; CHECK-NOT: div
+
+define i32 @main(i32 %argc, i8** nocapture %argv) nounwind {
+entry:
+  %cmp = icmp eq i32 %argc, 2                     ; <i1> [#uses=1]
+  br i1 %cmp, label %cond.true, label %while.cond.preheader
+
+cond.true:                                        ; preds = %entry
+  %arrayidx = getelementptr inbounds i8** %argv, i64 1 ; <i8**> [#uses=1]
+  %tmp2 = load i8** %arrayidx                     ; <i8*> [#uses=1]
+  %call = tail call i32 @atoi(i8* %tmp2) nounwind readonly ; <i32> [#uses=1]
+  br label %while.cond.preheader
+
+while.cond.preheader:                             ; preds = %entry, %cond.true
+  %NUM.0.ph = phi i32 [ %call, %cond.true ], [ 170000, %entry ] ; <i32> [#uses=2]
+  %tobool18 = icmp eq i32 %NUM.0.ph, 0            ; <i1> [#uses=1]
+  br i1 %tobool18, label %while.end, label %bb.nph30
+
+while.cond.loopexit:                              ; preds = %for.cond12.while.cond.loopexit_crit_edge, %for.cond12.loopexit
+  %count.2.lcssa = phi i32 [ %count.1.lcssa, %for.cond12.while.cond.loopexit_crit_edge ], [ 0, %for.cond12.loopexit ] ; <i32> [#uses=1]
+  br label %while.cond
+
+while.cond:                                       ; preds = %while.cond.loopexit
+  %tobool = icmp eq i32 %dec19, 0                 ; <i1> [#uses=1]
+  br i1 %tobool, label %while.cond.while.end_crit_edge, label %for.cond.preheader
+
+while.cond.while.end_crit_edge:                   ; preds = %while.cond
+  %count.2.lcssa.lcssa = phi i32 [ %count.2.lcssa, %while.cond ] ; <i32> [#uses=1]
+  br label %while.end
+
+bb.nph30:                                         ; preds = %while.cond.preheader
+  br label %for.cond.preheader
+
+for.cond.preheader:                               ; preds = %bb.nph30, %while.cond
+  %dec19.in = phi i32 [ %NUM.0.ph, %bb.nph30 ], [ %dec19, %while.cond ] ; <i32> [#uses=1]
+  %dec19 = add i32 %dec19.in, -1                  ; <i32> [#uses=2]
+  br i1 true, label %bb.nph, label %for.cond12.loopexit
+
+for.cond:                                         ; preds = %for.body
+  %cmp8 = icmp slt i64 %inc, 8193                 ; <i1> [#uses=1]
+  br i1 %cmp8, label %for.body, label %for.cond.for.cond12.loopexit_crit_edge
+
+for.cond.for.cond12.loopexit_crit_edge:           ; preds = %for.cond
+  br label %for.cond12.loopexit
+
+bb.nph:                                           ; preds = %for.cond.preheader
+  br label %for.body
+
+for.body:                                         ; preds = %bb.nph, %for.cond
+  %i.02 = phi i64 [ 2, %bb.nph ], [ %inc, %for.cond ] ; <i64> [#uses=2]
+  %arrayidx10 = getelementptr inbounds [8193 x i8]* @main.flags, i64 0, i64 %i.02 ; <i8*> [#uses=1]
+  store i8 1, i8* %arrayidx10
+  %inc = add nsw i64 %i.02, 1                     ; <i64> [#uses=2]
+  br label %for.cond
+
+for.cond12.loopexit:                              ; preds = %for.cond.for.cond12.loopexit_crit_edge, %for.cond.preheader
+  br i1 true, label %bb.nph16, label %while.cond.loopexit
+
+for.cond12:                                       ; preds = %for.inc35
+  %cmp14 = icmp slt i64 %inc37, 8193              ; <i1> [#uses=1]
+  br i1 %cmp14, label %for.body15, label %for.cond12.while.cond.loopexit_crit_edge
+
+for.cond12.while.cond.loopexit_crit_edge:         ; preds = %for.cond12
+  %count.1.lcssa = phi i32 [ %count.1, %for.cond12 ] ; <i32> [#uses=1]
+  br label %while.cond.loopexit
+
+bb.nph16:                                         ; preds = %for.cond12.loopexit
+  br label %for.body15
+
+for.body15:                                       ; preds = %bb.nph16, %for.cond12
+  %count.212 = phi i32 [ 0, %bb.nph16 ], [ %count.1, %for.cond12 ] ; <i32> [#uses=2]
+  %i.17 = phi i64 [ 2, %bb.nph16 ], [ %inc37, %for.cond12 ] ; <i64> [#uses=4]
+  %arrayidx17 = getelementptr inbounds [8193 x i8]* @main.flags, i64 0, i64 %i.17 ; <i8*> [#uses=1]
+  %tmp18 = load i8* %arrayidx17                   ; <i8> [#uses=1]
+  %tobool19 = icmp eq i8 %tmp18, 0                ; <i1> [#uses=1]
+  br i1 %tobool19, label %for.inc35, label %if.then
+
+if.then:                                          ; preds = %for.body15
+  %add = shl i64 %i.17, 1                         ; <i64> [#uses=2]
+  %cmp243 = icmp slt i64 %add, 8193               ; <i1> [#uses=1]
+  br i1 %cmp243, label %bb.nph5, label %for.end32
+
+for.cond22:                                       ; preds = %for.body25
+  %cmp24 = icmp slt i64 %add31, 8193              ; <i1> [#uses=1]
+  br i1 %cmp24, label %for.body25, label %for.cond22.for.end32_crit_edge
+
+for.cond22.for.end32_crit_edge:                   ; preds = %for.cond22
+  br label %for.end32
+
+bb.nph5:                                          ; preds = %if.then
+  br label %for.body25
+
+for.body25:                                       ; preds = %bb.nph5, %for.cond22
+  %k.04 = phi i64 [ %add, %bb.nph5 ], [ %add31, %for.cond22 ] ; <i64> [#uses=2]
+  %arrayidx27 = getelementptr inbounds [8193 x i8]* @main.flags, i64 0, i64 %k.04 ; <i8*> [#uses=1]
+  store i8 0, i8* %arrayidx27
+  %add31 = add nsw i64 %k.04, %i.17               ; <i64> [#uses=2]
+  br label %for.cond22
+
+for.end32:                                        ; preds = %for.cond22.for.end32_crit_edge, %if.then
+  %inc34 = add nsw i32 %count.212, 1              ; <i32> [#uses=1]
+  br label %for.inc35
+
+for.inc35:                                        ; preds = %for.body15, %for.end32
+  %count.1 = phi i32 [ %inc34, %for.end32 ], [ %count.212, %for.body15 ] ; <i32> [#uses=2]
+  %inc37 = add nsw i64 %i.17, 1                   ; <i64> [#uses=2]
+  br label %for.cond12
+
+while.end:                                        ; preds = %while.cond.while.end_crit_edge, %while.cond.preheader
+  %count.0.lcssa = phi i32 [ %count.2.lcssa.lcssa, %while.cond.while.end_crit_edge ], [ 0, %while.cond.preheader ] ; <i32> [#uses=1]
+  %call40 = tail call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([11 x i8]* @.str, i64 0, i64 0), i32 %count.0.lcssa) nounwind ; <i32> [#uses=0]
+  ret i32 0
+}
+
+declare i32 @atoi(i8* nocapture) nounwind readonly
+
+declare i32 @printf(i8* nocapture, ...) nounwind
+
+; IndVars shouldn't be afraid to emit a udiv here, since there's a udiv in
+; the original code.
+
+; CHECK: @foo
+; CHECK: for.body.preheader:
+; CHECK-NEXT: udiv
+
+define void @foo(double* %p, i64 %n) nounwind {
+entry:
+  %div0 = udiv i64 %n, 7                          ; <i64> [#uses=1]
+  %div1 = add i64 %div0, 1
+  %cmp2 = icmp ult i64 0, %div1                   ; <i1> [#uses=1]
+  br i1 %cmp2, label %for.body.preheader, label %for.end
+
+for.body.preheader:                               ; preds = %entry
+  br label %for.body
+
+for.body:                                         ; preds = %for.body.preheader, %for.body
+  %i.03 = phi i64 [ %inc, %for.body ], [ 0, %for.body.preheader ] ; <i64> [#uses=2]
+  %arrayidx = getelementptr inbounds double* %p, i64 %i.03 ; <double*> [#uses=1]
+  store double 0.000000e+00, double* %arrayidx
+  %inc = add i64 %i.03, 1                         ; <i64> [#uses=2]
+  %divx = udiv i64 %n, 7                           ; <i64> [#uses=1]
+  %div = add i64 %divx, 1
+  %cmp = icmp ult i64 %inc, %div                  ; <i1> [#uses=1]
+  br i1 %cmp, label %for.body, label %for.end.loopexit
+
+for.end.loopexit:                                 ; preds = %for.body
+  br label %for.end
+
+for.end:                                          ; preds = %for.end.loopexit, %entry
+  ret void
+}
diff --git a/test/Transforms/Inline/2009-01-12-RecursiveInline.ll b/test/Transforms/Inline/2009-01-12-RecursiveInline.ll
deleted file mode 100644
index 1a3325a68b61..000000000000
--- a/test/Transforms/Inline/2009-01-12-RecursiveInline.ll
+++ /dev/null
@@ -1,92 +0,0 @@
-; RUN: opt < %s -inline -S | grep {call.*fib} | count 4
-; First call to fib from fib is inlined, producing 2 instead of 1, total 3.
-; Second call to fib from fib is not inlined because new body of fib exceeds
-; inlining limit of 200.  Plus call in main = 4 total.
-
-; ModuleID = '<stdin>'
-target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
-target triple = "i386-apple-darwin9.6"
-@"\01LC" = internal constant [5 x i8] c"%ld\0A\00"		; <[5 x i8]*> [#uses=1]
-
-define i32 @fib(i32 %n) nounwind {
-entry:
-	%n_addr = alloca i32		; <i32*> [#uses=4]
-	%retval = alloca i32		; <i32*> [#uses=2]
-	%0 = alloca i32		; <i32*> [#uses=3]
-	%"alloca point" = bitcast i32 0 to i32		; <i32> [#uses=0]
-	store i32 %n, i32* %n_addr
-	%1 = load i32* %n_addr, align 4		; <i32> [#uses=1]
-	%2 = icmp ule i32 %1, 1		; <i1> [#uses=1]
-	br i1 %2, label %bb, label %bb1
-
-bb:		; preds = %entry
-	store i32 1, i32* %0, align 4
-	br label %bb2
-
-bb1:		; preds = %entry
-	%3 = load i32* %n_addr, align 4		; <i32> [#uses=1]
-	%4 = sub i32 %3, 2		; <i32> [#uses=1]
-	%5 = call i32 @fib(i32 %4) nounwind		; <i32> [#uses=1]
-	%6 = load i32* %n_addr, align 4		; <i32> [#uses=1]
-	%7 = sub i32 %6, 1		; <i32> [#uses=1]
-	%8 = call i32 @fib(i32 %7) nounwind		; <i32> [#uses=1]
-	%9 = add i32 %5, %8		; <i32> [#uses=1]
-	store i32 %9, i32* %0, align 4
-	br label %bb2
-
-bb2:		; preds = %bb1, %bb
-	%10 = load i32* %0, align 4		; <i32> [#uses=1]
-	store i32 %10, i32* %retval, align 4
-	br label %return
-
-return:		; preds = %bb2
-	%retval3 = load i32* %retval		; <i32> [#uses=1]
-	ret i32 %retval3
-}
-
-define i32 @main(i32 %argc, i8** %argv) nounwind {
-entry:
-	%argc_addr = alloca i32		; <i32*> [#uses=2]
-	%argv_addr = alloca i8**		; <i8***> [#uses=2]
-	%retval = alloca i32		; <i32*> [#uses=2]
-	%N = alloca i32		; <i32*> [#uses=2]
-	%0 = alloca i32		; <i32*> [#uses=2]
-	%iftmp.0 = alloca i32		; <i32*> [#uses=3]
-	%"alloca point" = bitcast i32 0 to i32		; <i32> [#uses=0]
-	store i32 %argc, i32* %argc_addr
-	store i8** %argv, i8*** %argv_addr
-	%1 = load i32* %argc_addr, align 4		; <i32> [#uses=1]
-	%2 = icmp eq i32 %1, 2		; <i1> [#uses=1]
-	br i1 %2, label %bb, label %bb1
-
-bb:		; preds = %entry
-	%3 = load i8*** %argv_addr, align 4		; <i8**> [#uses=1]
-	%4 = getelementptr i8** %3, i32 1		; <i8**> [#uses=1]
-	%5 = load i8** %4, align 4		; <i8*> [#uses=1]
-	%6 = call i32 @atoi(i8* %5) nounwind		; <i32> [#uses=1]
-	store i32 %6, i32* %iftmp.0, align 4
-	br label %bb2
-
-bb1:		; preds = %entry
-	store i32 43, i32* %iftmp.0, align 4
-	br label %bb2
-
-bb2:		; preds = %bb1, %bb
-	%7 = load i32* %iftmp.0, align 4		; <i32> [#uses=1]
-	store i32 %7, i32* %N, align 4
-	%8 = load i32* %N, align 4		; <i32> [#uses=1]
-	%9 = call i32 @fib(i32 %8) nounwind		; <i32> [#uses=1]
-	%10 = call i32 (i8*, ...)* @printf(i8* getelementptr ([5 x i8]* @"\01LC", i32 0, i32 0), i32 %9) nounwind		; <i32> [#uses=0]
-	store i32 0, i32* %0, align 4
-	%11 = load i32* %0, align 4		; <i32> [#uses=1]
-	store i32 %11, i32* %retval, align 4
-	br label %return
-
-return:		; preds = %bb2
-	%retval3 = load i32* %retval		; <i32> [#uses=1]
-	ret i32 %retval3
-}
-
-declare i32 @atoi(i8*)
-
-declare i32 @printf(i8*, ...) nounwind
diff --git a/test/Transforms/Inline/crash.ll b/test/Transforms/Inline/crash.ll
index f34b44c2aa08..1df4d6063e84 100644
--- a/test/Transforms/Inline/crash.ll
+++ b/test/Transforms/Inline/crash.ll
@@ -86,3 +86,34 @@ bb260:
 lpad:                               
   unwind
 }
+
+
+
+;; This exposed a crash handling devirtualized calls.
+define void @f1(void ()* %f) ssp {
+entry:
+  call void %f()
+  ret void
+}
+
+define void @f4(i32 %size) ssp {
+entry:
+  invoke void @f1(void ()* @f3)
+          to label %invcont3 unwind label %lpad18
+
+invcont3:                                         ; preds = %bb1
+  ret void
+
+lpad18:                                           ; preds = %invcont3, %bb1
+  unreachable
+}
+
+define void @f3() ssp {
+entry:
+  unreachable
+}
+
+declare void @f5() ssp
+
+
+
diff --git a/test/Transforms/Inline/crash2.ll b/test/Transforms/Inline/crash2.ll
new file mode 100644
index 000000000000..cb1f44d5cca7
--- /dev/null
+++ b/test/Transforms/Inline/crash2.ll
@@ -0,0 +1,29 @@
+; RUN: opt  -inline -scalarrepl -max-cg-scc-iterations=1  %s -disable-output
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
+target triple = "x86_64-apple-darwin10.3"
+
+declare i8* @f1(i8*) ssp align 2
+
+define linkonce_odr void @f2(i8* %t) inlinehint ssp {
+entry:
+  unreachable
+}
+
+define linkonce_odr void @f3(void (i8*)* %__f) ssp {
+entry:
+  %__f_addr = alloca void (i8*)*, align 8
+  store void (i8*)* %__f, void (i8*)** %__f_addr
+
+  %0 = load void (i8*)** %__f_addr, align 8
+  call void %0(i8* undef)
+  call i8* @f1(i8* undef) ssp
+  unreachable
+}
+
+define linkonce_odr void @f4(i8* %this) ssp align 2 {
+entry:
+  %0 = alloca i32
+  call void @f3(void (i8*)* @f2) ssp
+  ret void
+}
+
diff --git a/test/Transforms/Inline/devirtualize-2.ll b/test/Transforms/Inline/devirtualize-2.ll
new file mode 100644
index 000000000000..02ff7679148d
--- /dev/null
+++ b/test/Transforms/Inline/devirtualize-2.ll
@@ -0,0 +1,44 @@
+; RUN: opt < %s -inline -S | FileCheck %s
+; PR4834
+
+define i32 @test1() {
+  %funcall1_ = call fastcc i32 ()* ()* @f1()
+  %executecommandptr1_ = call i32 %funcall1_()
+  ret i32 %executecommandptr1_
+}
+
+define internal fastcc i32 ()* @f1() nounwind readnone {
+  ret i32 ()* @f2
+}
+
+define internal i32 @f2() nounwind readnone {
+  ret i32 1
+}
+
+; CHECK: @test1()
+; CHECK-NEXT: ret i32 1
+
+
+
+
+
+declare i8* @f1a(i8*) ssp align 2
+
+define internal i32 @f2a(i8* %t) inlinehint ssp {
+entry:
+  ret i32 41
+}
+
+define internal i32 @f3a(i32 (i8*)* %__f) ssp {
+entry:
+  %A = call i32 %__f(i8* undef)
+  ret i32 %A
+}
+
+define i32 @test2(i8* %this) ssp align 2 {
+  %X = call i32 @f3a(i32 (i8*)* @f2a) ssp
+  ret i32 %X
+}
+
+; CHECK: @test2
+; CHECK-NEXT: ret i32 41
diff --git a/test/Transforms/Inline/devirtualize-3.ll b/test/Transforms/Inline/devirtualize-3.ll
new file mode 100644
index 000000000000..0a50786498df
--- /dev/null
+++ b/test/Transforms/Inline/devirtualize-3.ll
@@ -0,0 +1,79 @@
+; RUN: opt -inline -S -scalarrepl -gvn -instcombine %s | FileCheck %s
+; PR5009
+
+; CHECK: define i32 @main() 
+; CHECK-NEXT: entry:
+; CHECK-NEXT:  call void @exit(i32 38) 
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
+target triple = "x86_64-apple-darwin10.0.0"
+
+%struct.cont_t = type { void (i8*, i32)*, i8* }
+%struct.foo_sf_t = type { %struct.cont_t*, i32 }
+
+define i32 @main() nounwind ssp {
+entry:
+  %cont = alloca %struct.cont_t, align 8          ; <%struct.cont_t*> [#uses=4]
+  %tmp = getelementptr inbounds %struct.cont_t* %cont, i32 0, i32 0 ; <void (i8*, i32)**> [#uses=1]
+  %tmp1 = getelementptr inbounds %struct.cont_t* %cont, i32 0, i32 0 ; <void (i8*, i32)**> [#uses=2]
+  store void (i8*, i32)* bitcast (void (%struct.cont_t*, i32)* @quit to void (i8*, i32)*), void (i8*, i32)** %tmp1
+  %tmp2 = load void (i8*, i32)** %tmp1            ; <void (i8*, i32)*> [#uses=1]
+  store void (i8*, i32)* %tmp2, void (i8*, i32)** %tmp
+  %tmp3 = getelementptr inbounds %struct.cont_t* %cont, i32 0, i32 1 ; <i8**> [#uses=1]
+  store i8* null, i8** %tmp3
+  call void @foo(%struct.cont_t* %cont)
+  ret i32 0
+}
+
+define internal void @quit(%struct.cont_t* %cont, i32 %rcode) nounwind ssp {
+entry:
+  call void @exit(i32 %rcode) noreturn
+  unreachable
+}
+
+define internal void @foo(%struct.cont_t* %c) nounwind ssp {
+entry:
+  %sf = alloca %struct.foo_sf_t, align 8          ; <%struct.foo_sf_t*> [#uses=3]
+  %next = alloca %struct.cont_t, align 8          ; <%struct.cont_t*> [#uses=3]
+  %tmp = getelementptr inbounds %struct.foo_sf_t* %sf, i32 0, i32 0 ; <%struct.cont_t**> [#uses=1]
+  store %struct.cont_t* %c, %struct.cont_t** %tmp
+  %tmp2 = getelementptr inbounds %struct.foo_sf_t* %sf, i32 0, i32 1 ; <i32*> [#uses=1]
+  store i32 2, i32* %tmp2
+  %tmp4 = getelementptr inbounds %struct.cont_t* %next, i32 0, i32 0 ; <void (i8*, i32)**> [#uses=1]
+  store void (i8*, i32)* bitcast (void (%struct.foo_sf_t*, i32)* @foo2 to void (i8*, i32)*), void (i8*, i32)** %tmp4
+  %tmp5 = getelementptr inbounds %struct.cont_t* %next, i32 0, i32 1 ; <i8**> [#uses=1]
+  %conv = bitcast %struct.foo_sf_t* %sf to i8*    ; <i8*> [#uses=1]
+  store i8* %conv, i8** %tmp5
+  call void @bar(%struct.cont_t* %next, i32 14)
+  ret void
+}
+
+define internal void @foo2(%struct.foo_sf_t* %sf, i32 %y) nounwind ssp {
+entry:
+  %tmp1 = getelementptr inbounds %struct.foo_sf_t* %sf, i32 0, i32 0 ; <%struct.cont_t**> [#uses=1]
+  %tmp2 = load %struct.cont_t** %tmp1             ; <%struct.cont_t*> [#uses=1]
+  %tmp3 = getelementptr inbounds %struct.cont_t* %tmp2, i32 0, i32 0 ; <void (i8*, i32)**> [#uses=1]
+  %tmp4 = load void (i8*, i32)** %tmp3            ; <void (i8*, i32)*> [#uses=1]
+  %tmp6 = getelementptr inbounds %struct.foo_sf_t* %sf, i32 0, i32 0 ; <%struct.cont_t**> [#uses=1]
+  %tmp7 = load %struct.cont_t** %tmp6             ; <%struct.cont_t*> [#uses=1]
+  %conv = bitcast %struct.cont_t* %tmp7 to i8*    ; <i8*> [#uses=1]
+  %tmp9 = getelementptr inbounds %struct.foo_sf_t* %sf, i32 0, i32 1 ; <i32*> [#uses=1]
+  %tmp10 = load i32* %tmp9                        ; <i32> [#uses=1]
+  %mul = mul i32 %tmp10, %y                       ; <i32> [#uses=1]
+  call void %tmp4(i8* %conv, i32 %mul)
+  ret void
+}
+
+define internal void @bar(%struct.cont_t* %c, i32 %y) nounwind ssp {
+entry:
+  %tmp1 = getelementptr inbounds %struct.cont_t* %c, i32 0, i32 0 ; <void (i8*, i32)**> [#uses=1]
+  %tmp2 = load void (i8*, i32)** %tmp1            ; <void (i8*, i32)*> [#uses=1]
+  %tmp4 = getelementptr inbounds %struct.cont_t* %c, i32 0, i32 1 ; <i8**> [#uses=1]
+  %tmp5 = load i8** %tmp4                         ; <i8*> [#uses=1]
+  %add = add nsw i32 %y, 5                        ; <i32> [#uses=1]
+  call void %tmp2(i8* %tmp5, i32 %add)
+  ret void
+}
+
+declare void @exit(i32) noreturn
+
diff --git a/test/Transforms/Inline/devirtualize.ll b/test/Transforms/Inline/devirtualize.ll
new file mode 100644
index 000000000000..9ed4b6958c38
--- /dev/null
+++ b/test/Transforms/Inline/devirtualize.ll
@@ -0,0 +1,182 @@
+; RUN: opt -S -inline -scalarrepl -instcombine -simplifycfg -instcombine -gvn -globaldce %s | FileCheck %s
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
+target triple = "x86_64-apple-darwin10.0.0"
+
+
+; Simple devirt testcase, requires iteration between inliner and GVN.
+;  rdar://6295824
+define i32 @foo(i32 ()** noalias %p, i64* noalias %q) nounwind ssp {
+entry:
+  store i32 ()* @bar, i32 ()** %p
+  store i64 0, i64* %q
+  %tmp3 = load i32 ()** %p                        ; <i32 ()*> [#uses=1]
+  %call = call i32 %tmp3()                        ; <i32> [#uses=1]
+  %X = add i32 %call, 4
+  ret i32 %X
+  
+; CHECK: @foo
+; CHECK-NEXT: entry:
+; CHECK-NEXT: store
+; CHECK-NEXT: store
+; CHECK-NEXT: ret i32 11
+}
+
+define internal i32 @bar() nounwind ssp {
+entry:
+  ret i32 7
+}
+
+
+;; More complex devirt case, from PR6724
+; CHECK: @_Z1gv()
+; CHECK-NEXT: entry:
+; CHECK-NEXT: ret i32 7
+
+%0 = type { i8*, i8* }
+%1 = type { i8*, i8*, i32, i32, i8*, i64, i8*, i64 }
+%2 = type { i8*, i8*, i8* }
+%struct.A = type { i8** }
+%struct.B = type { i8** }
+%struct.C = type { [16 x i8] }
+%struct.D = type { [16 x i8] }
+
+@_ZTV1D = linkonce_odr constant [6 x i8*] [i8* null, i8* bitcast (%2* @_ZTI1D to i8*), i8* bitcast (i32 (%struct.C*)* @_ZN1D1fEv to i8*), i8* inttoptr (i64 -8 to i8*), i8* bitcast (%2* @_ZTI1D to i8*), i8* bitcast (i32 (%struct.C*)* @_ZThn8_N1D1fEv to i8*)] ; <[6 x i8*]*> [#uses=2]
+@_ZTVN10__cxxabiv120__si_class_type_infoE = external global i8* ; <i8**> [#uses=1]
+@_ZTS1D = linkonce_odr constant [3 x i8] c"1D\00"     ; <[3 x i8]*> [#uses=1]
+@_ZTVN10__cxxabiv121__vmi_class_type_infoE = external global i8* ; <i8**> [#uses=1]
+@_ZTS1C = linkonce_odr constant [3 x i8] c"1C\00"     ; <[3 x i8]*> [#uses=1]
+@_ZTVN10__cxxabiv117__class_type_infoE = external global i8* ; <i8**> [#uses=1]
+@_ZTS1A = linkonce_odr constant [3 x i8] c"1A\00"     ; <[3 x i8]*> [#uses=1]
+@_ZTI1A = linkonce_odr constant %0 { i8* bitcast (i8** getelementptr inbounds (i8** @_ZTVN10__cxxabiv117__class_type_infoE, i64 2) to i8*), i8* getelementptr inbounds ([3 x i8]* @_ZTS1A, i32 0, i32 0) } ; <%0*> [#uses=1]
+@_ZTS1B = linkonce_odr constant [3 x i8] c"1B\00"     ; <[3 x i8]*> [#uses=1]
+@_ZTI1B = linkonce_odr constant %0 { i8* bitcast (i8** getelementptr inbounds (i8** @_ZTVN10__cxxabiv117__class_type_infoE, i64 2) to i8*), i8* getelementptr inbounds ([3 x i8]* @_ZTS1B, i32 0, i32 0) } ; <%0*> [#uses=1]
+@_ZTI1C = linkonce_odr constant %1 { i8* bitcast (i8** getelementptr inbounds (i8** @_ZTVN10__cxxabiv121__vmi_class_type_infoE, i64 2) to i8*), i8* getelementptr inbounds ([3 x i8]* @_ZTS1C, i32 0, i32 0), i32 0, i32 2, i8* bitcast (%0* @_ZTI1A to i8*), i64 2, i8* bitcast (%0* @_ZTI1B to i8*), i64 2050 } ; <%1*> [#uses=1]
+@_ZTI1D = linkonce_odr constant %2 { i8* bitcast (i8** getelementptr inbounds (i8** @_ZTVN10__cxxabiv120__si_class_type_infoE, i64 2) to i8*), i8* getelementptr inbounds ([3 x i8]* @_ZTS1D, i32 0, i32 0), i8* bitcast (%1* @_ZTI1C to i8*) } ; <%2*> [#uses=1]
+@_ZTV1C = linkonce_odr constant [6 x i8*] [i8* null, i8* bitcast (%1* @_ZTI1C to i8*), i8* bitcast (i32 (%struct.C*)* @_ZN1C1fEv to i8*), i8* inttoptr (i64 -8 to i8*), i8* bitcast (%1* @_ZTI1C to i8*), i8* bitcast (i32 (%struct.C*)* @_ZThn8_N1C1fEv to i8*)] ; <[6 x i8*]*> [#uses=2]
+@_ZTV1B = linkonce_odr constant [3 x i8*] [i8* null, i8* bitcast (%0* @_ZTI1B to i8*), i8* bitcast (i32 (%struct.A*)* @_ZN1B1fEv to i8*)] ; <[3 x i8*]*> [#uses=1]
+@_ZTV1A = linkonce_odr constant [3 x i8*] [i8* null, i8* bitcast (%0* @_ZTI1A to i8*), i8* bitcast (i32 (%struct.A*)* @_ZN1A1fEv to i8*)] ; <[3 x i8*]*> [#uses=1]
+
+define i32 @_Z1gv() ssp {
+entry:
+  %d = alloca %struct.C, align 8                  ; <%struct.C*> [#uses=2]
+  call void @_ZN1DC1Ev(%struct.C* %d)
+  %call = call i32 @_Z1fP1D(%struct.C* %d)        ; <i32> [#uses=1]
+  %X = add i32 %call, 3
+  ret i32 %X
+}
+
+define linkonce_odr void @_ZN1DC1Ev(%struct.C* %this) inlinehint ssp align 2 {
+entry:
+  call void @_ZN1DC2Ev(%struct.C* %this)
+  ret void
+}
+
+define internal i32 @_Z1fP1D(%struct.C* %d) ssp {
+entry:
+  %0 = icmp eq %struct.C* %d, null                ; <i1> [#uses=1]
+  br i1 %0, label %cast.end, label %cast.notnull
+
+cast.notnull:                                     ; preds = %entry
+  %1 = bitcast %struct.C* %d to i8*               ; <i8*> [#uses=1]
+  %add.ptr = getelementptr i8* %1, i64 8          ; <i8*> [#uses=1]
+  %2 = bitcast i8* %add.ptr to %struct.A*         ; <%struct.A*> [#uses=1]
+  br label %cast.end
+
+cast.end:                                         ; preds = %entry, %cast.notnull
+  %3 = phi %struct.A* [ %2, %cast.notnull ], [ null, %entry ] ; <%struct.A*> [#uses=2]
+  %4 = bitcast %struct.A* %3 to i32 (%struct.A*)*** ; <i32 (%struct.A*)***> [#uses=1]
+  %5 = load i32 (%struct.A*)*** %4                ; <i32 (%struct.A*)**> [#uses=1]
+  %vfn = getelementptr inbounds i32 (%struct.A*)** %5, i64 0 ; <i32 (%struct.A*)**> [#uses=1]
+  %6 = load i32 (%struct.A*)** %vfn               ; <i32 (%struct.A*)*> [#uses=1]
+  %call = call i32 %6(%struct.A* %3)              ; <i32> [#uses=1]
+  ret i32 %call
+}
+
+define linkonce_odr i32 @_ZN1D1fEv(%struct.C* %this) ssp align 2 {
+entry:
+  ret i32 4
+}
+
+define linkonce_odr i32 @_ZThn8_N1D1fEv(%struct.C* %this) {
+entry:
+  %0 = bitcast %struct.C* %this to i8*            ; <i8*> [#uses=1]
+  %1 = getelementptr inbounds i8* %0, i64 -8      ; <i8*> [#uses=1]
+  %2 = bitcast i8* %1 to %struct.C*               ; <%struct.C*> [#uses=1]
+  %call = call i32 @_ZN1D1fEv(%struct.C* %2)      ; <i32> [#uses=1]
+  ret i32 %call
+}
+
+define linkonce_odr void @_ZN1DC2Ev(%struct.C* %this) inlinehint ssp align 2 {
+entry:
+  call void @_ZN1CC2Ev(%struct.C* %this)
+  %0 = bitcast %struct.C* %this to i8*            ; <i8*> [#uses=1]
+  %1 = getelementptr inbounds i8* %0, i64 0       ; <i8*> [#uses=1]
+  %2 = bitcast i8* %1 to i8***                    ; <i8***> [#uses=1]
+  store i8** getelementptr inbounds ([6 x i8*]* @_ZTV1D, i64 0, i64 2), i8*** %2
+  %3 = bitcast %struct.C* %this to i8*            ; <i8*> [#uses=1]
+  %4 = getelementptr inbounds i8* %3, i64 8       ; <i8*> [#uses=1]
+  %5 = bitcast i8* %4 to i8***                    ; <i8***> [#uses=1]
+  store i8** getelementptr inbounds ([6 x i8*]* @_ZTV1D, i64 0, i64 5), i8*** %5
+  ret void
+}
+
+define linkonce_odr void @_ZN1CC2Ev(%struct.C* %this) inlinehint ssp align 2 {
+entry:
+  %0 = bitcast %struct.C* %this to %struct.A*     ; <%struct.A*> [#uses=1]
+  call void @_ZN1AC2Ev(%struct.A* %0)
+  %1 = bitcast %struct.C* %this to i8*            ; <i8*> [#uses=1]
+  %2 = getelementptr inbounds i8* %1, i64 8       ; <i8*> [#uses=1]
+  %3 = bitcast i8* %2 to %struct.A*               ; <%struct.A*> [#uses=1]
+  call void @_ZN1BC2Ev(%struct.A* %3)
+  %4 = bitcast %struct.C* %this to i8*            ; <i8*> [#uses=1]
+  %5 = getelementptr inbounds i8* %4, i64 0       ; <i8*> [#uses=1]
+  %6 = bitcast i8* %5 to i8***                    ; <i8***> [#uses=1]
+  store i8** getelementptr inbounds ([6 x i8*]* @_ZTV1C, i64 0, i64 2), i8*** %6
+  %7 = bitcast %struct.C* %this to i8*            ; <i8*> [#uses=1]
+  %8 = getelementptr inbounds i8* %7, i64 8       ; <i8*> [#uses=1]
+  %9 = bitcast i8* %8 to i8***                    ; <i8***> [#uses=1]
+  store i8** getelementptr inbounds ([6 x i8*]* @_ZTV1C, i64 0, i64 5), i8*** %9
+  ret void
+}
+
+define linkonce_odr i32 @_ZN1C1fEv(%struct.C* %this) ssp align 2 {
+entry:
+  ret i32 3
+}
+
+define linkonce_odr i32 @_ZThn8_N1C1fEv(%struct.C* %this) {
+entry:
+  %0 = bitcast %struct.C* %this to i8*            ; <i8*> [#uses=1]
+  %1 = getelementptr inbounds i8* %0, i64 -8      ; <i8*> [#uses=1]
+  %2 = bitcast i8* %1 to %struct.C*               ; <%struct.C*> [#uses=1]
+  %call = call i32 @_ZN1C1fEv(%struct.C* %2)      ; <i32> [#uses=1]
+  ret i32 %call
+}
+
+define linkonce_odr void @_ZN1AC2Ev(%struct.A* %this) inlinehint ssp align 2 {
+entry:
+  %0 = bitcast %struct.A* %this to i8*            ; <i8*> [#uses=1]
+  %1 = getelementptr inbounds i8* %0, i64 0       ; <i8*> [#uses=1]
+  %2 = bitcast i8* %1 to i8***                    ; <i8***> [#uses=1]
+  store i8** getelementptr inbounds ([3 x i8*]* @_ZTV1A, i64 0, i64 2), i8*** %2
+  ret void
+}
+
+define linkonce_odr void @_ZN1BC2Ev(%struct.A* %this) inlinehint ssp align 2 {
+entry:
+  %0 = bitcast %struct.A* %this to i8*            ; <i8*> [#uses=1]
+  %1 = getelementptr inbounds i8* %0, i64 0       ; <i8*> [#uses=1]
+  %2 = bitcast i8* %1 to i8***                    ; <i8***> [#uses=1]
+  store i8** getelementptr inbounds ([3 x i8*]* @_ZTV1B, i64 0, i64 2), i8*** %2
+  ret void
+}
+
+define linkonce_odr i32 @_ZN1B1fEv(%struct.A* %this) ssp align 2 {
+entry:
+  ret i32 2
+}
+
+define linkonce_odr i32 @_ZN1A1fEv(%struct.A* %this) ssp align 2 {
+entry:
+  ret i32 1
+}
diff --git a/test/Transforms/Inline/externally_available.ll b/test/Transforms/Inline/externally_available.ll
index 43fe5d37f9e8..08b56385ac07 100644
--- a/test/Transforms/Inline/externally_available.ll
+++ b/test/Transforms/Inline/externally_available.ll
@@ -13,4 +13,4 @@ define i32 @result() {
   %A = call i32 @test_function()
   %B = add i32 %A, 1
   ret i32 %B
-}
-\ No newline at end of file
+}
diff --git a/test/Transforms/Inline/gvn-inline-iteration.ll b/test/Transforms/Inline/gvn-inline-iteration.ll
new file mode 100644
index 000000000000..32144d4ebba5
--- /dev/null
+++ b/test/Transforms/Inline/gvn-inline-iteration.ll
@@ -0,0 +1,23 @@
+; RUN: opt -inline -gvn %s -S -max-cg-scc-iterations=1 | FileCheck %s
+; rdar://6295824 and PR6724
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
+target triple = "x86_64-apple-darwin10.0.0"
+
+define i32 @foo(i32 ()** noalias nocapture %p, i64* noalias nocapture %q) nounwind ssp {
+entry:
+  store i32 ()* @bar, i32 ()** %p
+  store i64 0, i64* %q
+  %tmp3 = load i32 ()** %p                        ; <i32 ()*> [#uses=1]
+  %call = tail call i32 %tmp3() nounwind          ; <i32> [#uses=1]
+  ret i32 %call
+}
+; CHECK: @foo
+; CHECK: ret i32 7
+; CHECK: @bar
+; CHECK: ret i32 7
+
+define internal i32 @bar() nounwind readnone ssp {
+entry:
+  ret i32 7
+}
diff --git a/test/Transforms/Inline/indirect_resolve.ll b/test/Transforms/Inline/indirect_resolve.ll
deleted file mode 100644
index 76182e2fe4f6..000000000000
--- a/test/Transforms/Inline/indirect_resolve.ll
+++ /dev/null
@@ -1,16 +0,0 @@
-; RUN: opt < %s -inline | llvm-dis
-; PR4834
-
-define i32 @main() {
-  %funcall1_ = call fastcc i32 ()* ()* @f1()
-  %executecommandptr1_ = call i32 %funcall1_()
-  ret i32 %executecommandptr1_
-}
-
-define internal fastcc i32 ()* @f1() nounwind readnone {
-  ret i32 ()* @f2
-}
-
-define internal i32 @f2() nounwind readnone {
-  ret i32 1
-}
diff --git a/test/Transforms/Inline/noinline-recursive-fn.ll b/test/Transforms/Inline/noinline-recursive-fn.ll
new file mode 100644
index 000000000000..1d5ebbbf0fa9
--- /dev/null
+++ b/test/Transforms/Inline/noinline-recursive-fn.ll
@@ -0,0 +1,73 @@
+; The inliner should never inline recursive functions into other functions.
+; This effectively is just peeling off the first iteration of a loop, and the
+; inliner heuristics are not set up for this.
+
+; RUN: opt -inline %s -S | FileCheck %s
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
+target triple = "x86_64-apple-darwin10.3"
+
+@g = common global i32 0                          ; <i32*> [#uses=1]
+
+define internal void @foo(i32 %x) nounwind ssp {
+entry:
+  %0 = icmp slt i32 %x, 0                         ; <i1> [#uses=1]
+  br i1 %0, label %return, label %bb
+
+bb:                                               ; preds = %entry
+  %1 = sub nsw i32 %x, 1                          ; <i32> [#uses=1]
+  call void @foo(i32 %1) nounwind ssp
+  volatile store i32 1, i32* @g, align 4
+  ret void
+
+return:                                           ; preds = %entry
+  ret void
+}
+
+
+;; CHECK: @bonk
+;; CHECK: call void @foo(i32 42)
+define void @bonk() nounwind ssp {
+entry:
+  call void @foo(i32 42) nounwind ssp
+  ret void
+}
+
+
+
+;; Here is an indirect case that should not be infinitely inlined.
+
+define internal void @f1(i32 %x, i8* %Foo, i8* %Bar) nounwind ssp {
+entry:
+  %0 = bitcast i8* %Bar to void (i32, i8*, i8*)*
+  %1 = sub nsw i32 %x, 1
+  call void %0(i32 %1, i8* %Foo, i8* %Bar) nounwind
+  volatile store i32 42, i32* @g, align 4
+  ret void
+}
+
+define internal void @f2(i32 %x, i8* %Foo, i8* %Bar) nounwind ssp {
+entry:
+  %0 = icmp slt i32 %x, 0                         ; <i1> [#uses=1]
+  br i1 %0, label %return, label %bb
+
+bb:                                               ; preds = %entry
+  %1 = bitcast i8* %Foo to void (i32, i8*, i8*)*  ; <void (i32, i8*, i8*)*> [#uses=1]
+  call void %1(i32 %x, i8* %Foo, i8* %Bar) nounwind
+  volatile store i32 13, i32* @g, align 4
+  ret void
+
+return:                                           ; preds = %entry
+  ret void
+}
+
+
+; CHECK: @top_level
+; CHECK: call void @f2(i32 122
+; Here we inline one instance of the cycle, but we don't want to completely
+; unroll it.
+define void @top_level() nounwind ssp {
+entry:
+  call void @f2(i32 123, i8* bitcast (void (i32, i8*, i8*)* @f1 to i8*), i8* bitcast (void (i32, i8*, i8*)* @f2 to i8*)) nounwind ssp
+  ret void
+}
diff --git a/test/Transforms/InstCombine/2007-12-28-IcmpSub2.ll b/test/Transforms/InstCombine/2007-12-28-IcmpSub2.ll
index b59548fd8e6f..8721c83521bf 100644
--- a/test/Transforms/InstCombine/2007-12-28-IcmpSub2.ll
+++ b/test/Transforms/InstCombine/2007-12-28-IcmpSub2.ll
@@ -86,4 +86,4 @@ entry:
 	%cmp = icmp ne i32 %sub, 0
 	%retval = select i1 %cmp, i32 1, i32 0
 	ret i32 %retval
-}
-\ No newline at end of file
+}
diff --git a/test/Transforms/InstCombine/invariant.ll b/test/Transforms/InstCombine/invariant.ll
index c67ad3319593..383238022692 100644
--- a/test/Transforms/InstCombine/invariant.ll
+++ b/test/Transforms/InstCombine/invariant.ll
@@ -3,13 +3,13 @@
 
 declare void @g(i8*)
 
-declare { }* @llvm.invariant.start(i64, i8* nocapture) nounwind readonly
+declare {}* @llvm.invariant.start(i64, i8* nocapture) nounwind readonly
 
 define i8 @f() {
   %a = alloca i8                                  ; <i8*> [#uses=4]
   store i8 0, i8* %a
-  %i = call { }* @llvm.invariant.start(i64 1, i8* %a) ; <{ }*> [#uses=0]
-  ; CHECK: call { }* @llvm.invariant.start
+  %i = call {}* @llvm.invariant.start(i64 1, i8* %a) ; <{}*> [#uses=0]
+  ; CHECK: call {}* @llvm.invariant.start
   call void @g(i8* %a)
   %r = load i8* %a                                ; <i8> [#uses=1]
   ret i8 %r
diff --git a/test/Transforms/InstCombine/objsize.ll b/test/Transforms/InstCombine/objsize.ll
index d74312d2e7e9..664701bf2114 100644
--- a/test/Transforms/InstCombine/objsize.ll
+++ b/test/Transforms/InstCombine/objsize.ll
@@ -111,10 +111,10 @@ define i32 @test4() nounwind ssp {
 entry:
   %0 = alloca %struct.data, align 8
   %1 = bitcast %struct.data* %0 to i8*
-  %2 = call i64 @llvm.objectsize.i64(i8* %1, i1 false) nounwind
+  %2 = call i32 @llvm.objectsize.i32(i8* %1, i1 false) nounwind
 ; CHECK-NOT: @llvm.objectsize
-; CHECK: @llvm.memset.p0i8.i64(i8* %1, i8 0, i64 1824, i32 8, i1 false)
-  %3 = call i8* @__memset_chk(i8* %1, i32 0, i64 1824, i64 %2) nounwind
+; CHECK: @llvm.memset.p0i8.i32(i8* %1, i8 0, i32 1824, i32 8, i1 false)
+  %3 = call i8* @__memset_chk(i8* %1, i32 0, i32 1824, i32 %2) nounwind
   ret i32 0
 }
 
@@ -145,10 +145,8 @@ entry:
   ret void
 }
 
-declare i8* @__memset_chk(i8*, i32, i64, i64) nounwind
+declare i8* @__memset_chk(i8*, i32, i32, i32) nounwind
 
 declare noalias i8* @malloc(i32) nounwind
 
 declare i32 @llvm.objectsize.i32(i8*, i1) nounwind readonly
-
-declare i64 @llvm.objectsize.i64(i8*, i1) nounwind readonly
diff --git a/test/Transforms/InstCombine/odr-linkage.ll b/test/Transforms/InstCombine/odr-linkage.ll
index a64ef289a4b6..61365b4848ab 100644
--- a/test/Transforms/InstCombine/odr-linkage.ll
+++ b/test/Transforms/InstCombine/odr-linkage.ll
@@ -16,4 +16,4 @@ define i32 @test() {
   %c = add i32 %b, %D
   ret i32 %c
 }
-   
-\ No newline at end of file
+   
diff --git a/test/Transforms/InstCombine/strcpy_chk.ll b/test/Transforms/InstCombine/strcpy_chk.ll
index a20a13c78b23..8835a0ba467c 100644
--- a/test/Transforms/InstCombine/strcpy_chk.ll
+++ b/test/Transforms/InstCombine/strcpy_chk.ll
@@ -1,4 +1,5 @@
 ; RUN: opt < %s -instcombine -S | FileCheck %s
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
 @a = common global [60 x i8] zeroinitializer, align 1 ; <[60 x i8]*> [#uses=1]
 @.str = private constant [8 x i8] c"abcdefg\00"   ; <[8 x i8]*> [#uses=1]
 
diff --git a/test/Transforms/InstCombine/vec_shuffle.ll b/test/Transforms/InstCombine/vec_shuffle.ll
index 29adc1e20890..5132a8ff9bd1 100644
--- a/test/Transforms/InstCombine/vec_shuffle.ll
+++ b/test/Transforms/InstCombine/vec_shuffle.ll
@@ -86,4 +86,4 @@ define <4 x i8> @test9(<16 x i8> %tmp6) nounwind {
 	%tmp7 = shufflevector <16 x i8> %tmp6, <16 x i8> undef, <4 x i32> < i32 13, i32 9, i32 4, i32 13 >		; <<4 x i8>> [#uses=1]
 	%tmp9 = shufflevector <4 x i8> %tmp7, <4 x i8> undef, <4 x i32> < i32 3, i32 1, i32 2, i32 0 >		; <<4 x i8>> [#uses=1]
 	ret <4 x i8> %tmp9
-}
-\ No newline at end of file
+}
diff --git a/test/Transforms/JumpThreading/crash.ll b/test/Transforms/JumpThreading/crash.ll
index c65fd1014be4..21620bef9ccc 100644
--- a/test/Transforms/JumpThreading/crash.ll
+++ b/test/Transforms/JumpThreading/crash.ll
@@ -324,3 +324,20 @@ A:                                             ; preds = %entry
   call void undef(i64 ptrtoint (i8* blockaddress(@test11, %A) to i64)) nounwind
   unreachable
 }
+
+; PR6743
+define void @test12() nounwind ssp {
+entry:
+  br label %lbl_51
+
+lbl_51:                                           ; preds = %if.then, %entry
+  %tmp3 = phi i1 [ false, %if.then ], [ undef, %entry ] ; <i1> [#uses=2]
+  br i1 %tmp3, label %if.end12, label %if.then
+
+if.then:                                          ; preds = %lbl_51
+  br i1 %tmp3, label %lbl_51, label %if.end12
+
+if.end12:                                         ; preds = %if.then, %lbl_51
+  ret void
+}
+
diff --git a/test/Transforms/LoopIndexSplit/2008-02-08-Crash.ll b/test/Transforms/LoopIndexSplit/2008-02-08-Crash.ll
index 084746494357..3cfd6c96b7b2 100644
--- a/test/Transforms/LoopIndexSplit/2008-02-08-Crash.ll
+++ b/test/Transforms/LoopIndexSplit/2008-02-08-Crash.ll
@@ -45,4 +45,4 @@ bb39:		; preds = %bb12, %bb3, %bb17
 
 return:		; preds = %bb39
 	ret void
-}
-\ No newline at end of file
+}
diff --git a/test/Transforms/LoopIndexSplit/PR4174-2.ll b/test/Transforms/LoopIndexSplit/PR4174-2.ll
new file mode 100644
index 000000000000..cc17bc0a9337
--- /dev/null
+++ b/test/Transforms/LoopIndexSplit/PR4174-2.ll
@@ -0,0 +1,38 @@
+; RUN: llvm-as < %s | opt -loop-index-split | llvm-dis | not grep clone
+
+declare void @f()
+
+define fastcc i32 @main() nounwind {
+entry:
+        br label %bb1552
+
+bb1552:
+        %j295.0.reg2mem.0 = phi i32 [ %storemerge110, %bb1669 ], [ 0, %entry ]
+        br label %bb1553
+
+bb1553:
+        call void @f()
+        %tmp1628 = icmp sgt i32 %j295.0.reg2mem.0, -3
+        br i1 %tmp1628, label %bb1588, label %bb1616
+
+bb1588:
+        br label %bb1616
+
+bb1616:
+        %tmp1629 = icmp sgt i32 %j295.0.reg2mem.0, -3
+        br i1 %tmp1629, label %bb1649, label %bb1632
+
+bb1632:
+        br label %bb1669
+
+bb1649:
+        br label %bb1669
+
+bb1669:
+        %storemerge110 = add i32 %j295.0.reg2mem.0, 1
+        %tmp1672 = icmp sgt i32 %storemerge110, 3
+        br i1 %tmp1672, label %bb1678, label %bb1552
+
+bb1678:
+        ret i32 0
+}
diff --git a/test/Transforms/LoopIndexSplit/PR4174.ll b/test/Transforms/LoopIndexSplit/PR4174.ll
new file mode 100644
index 000000000000..e8f5a737f05b
--- /dev/null
+++ b/test/Transforms/LoopIndexSplit/PR4174.ll
@@ -0,0 +1,23 @@
+; RUN: llvm-as < %s | opt -loop-index-split | llvm-dis | not grep clone
+
+declare void @f()
+
+define i32 @main() {
+entry:
+        br label %head
+head:
+        %i = phi i32 [0, %entry], [%i1, %tail]
+        call void @f()
+        %splitcond = icmp slt i32 %i, 2
+        br i1 %splitcond, label %yes, label %no
+yes:
+        br label %tail
+no:
+        br label %tail
+tail:
+        %i1 = add i32 %i, 1
+        %exitcond = icmp slt i32 %i1, 4
+        br i1 %exitcond, label %head, label %exit
+exit:
+        ret i32 0
+}
diff --git a/test/Transforms/LoopStrengthReduce/insert-positions.ll b/test/Transforms/LoopStrengthReduce/insert-positions.ll
new file mode 100644
index 000000000000..1a695f35e3b0
--- /dev/null
+++ b/test/Transforms/LoopStrengthReduce/insert-positions.ll
@@ -0,0 +1,69 @@
+; RUN: llc < %s -march=x86-64 >/dev/null
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
+
+define void @test0() nounwind {
+if.end90.i.i:
+  br label %while.body.i.i221.i
+
+while.body.i.i221.i:                              ; preds = %while.cond.backedge.i.i.i, %if.end90.i.i
+  br i1 undef, label %if.then.i.i224.i, label %while.cond.backedge.i.i.i
+
+while.cond.backedge.i.i.i:                        ; preds = %for.end.i.i.i, %while.body.i.i221.i
+  br label %while.body.i.i221.i
+
+if.then.i.i224.i:                                 ; preds = %while.body.i.i221.i
+  switch i32 undef, label %for.cond.i.i226.i [
+    i32 92, label %sw.bb.i.i225.i
+    i32 34, label %sw.bb.i.i225.i
+    i32 110, label %sw.bb21.i.i.i
+  ]
+
+sw.bb.i.i225.i:                                   ; preds = %if.then.i.i224.i, %if.then.i.i224.i
+  unreachable
+
+sw.bb21.i.i.i:                                    ; preds = %if.then.i.i224.i
+  unreachable
+
+for.cond.i.i226.i:                                ; preds = %for.body.i.i.i, %if.then.i.i224.i
+  %0 = phi i64 [ %tmp154.i.i.i, %for.body.i.i.i ], [ 0, %if.then.i.i224.i ] ; <i64> [#uses=2]
+  %tmp154.i.i.i = add i64 %0, 1                   ; <i64> [#uses=2]
+  %i.0.i.i.i = trunc i64 %0 to i32                ; <i32> [#uses=1]
+  br i1 undef, label %land.rhs.i.i.i, label %for.end.i.i.i
+
+land.rhs.i.i.i:                                   ; preds = %for.cond.i.i226.i
+  br i1 undef, label %for.body.i.i.i, label %for.end.i.i.i
+
+for.body.i.i.i:                                   ; preds = %land.rhs.i.i.i
+  br label %for.cond.i.i226.i
+
+for.end.i.i.i:                                    ; preds = %land.rhs.i.i.i, %for.cond.i.i226.i
+  %idx.ext.i.i.i = sext i32 %i.0.i.i.i to i64     ; <i64> [#uses=1]
+  %sub.ptr72.sum.i.i.i = xor i64 %idx.ext.i.i.i, -1 ; <i64> [#uses=1]
+  %pos.addr.1.sum155.i.i.i = add i64 %tmp154.i.i.i, %sub.ptr72.sum.i.i.i ; <i64> [#uses=1]
+  %arrayidx76.i.i.i = getelementptr inbounds i8* undef, i64 %pos.addr.1.sum155.i.i.i ; <i8*> [#uses=0]
+  br label %while.cond.backedge.i.i.i
+}
+
+define void @test1() nounwind {
+entry:
+  %t = shl i32 undef, undef                     ; <i32> [#uses=1]
+  %t9 = sub nsw i32 0, %t                     ; <i32> [#uses=1]
+  br label %outer
+
+outer:                                             ; preds = %bb18, %bb
+  %i12 = phi i32 [ %t21, %bb18 ], [ 0, %entry ]  ; <i32> [#uses=2]
+  %i13 = phi i32 [ %t20, %bb18 ], [ 0, %entry ]  ; <i32> [#uses=2]
+  br label %inner
+
+inner:                                             ; preds = %bb16, %bb11
+  %t17 = phi i32 [ %i13, %outer ], [ undef, %inner ] ; <i32> [#uses=1]
+  store i32 %t17, i32* undef
+  br i1 undef, label %bb18, label %inner
+
+bb18:                                             ; preds = %bb16
+  %t19 = add i32 %i13, %t9                 ; <i32> [#uses=1]
+  %t20 = add i32 %t19, %i12                 ; <i32> [#uses=1]
+  %t21 = add i32 %i12, 1                      ; <i32> [#uses=1]
+  br label %outer
+}
diff --git a/test/Transforms/LoopStrengthReduce/pr2537.ll b/test/Transforms/LoopStrengthReduce/pr2537.ll
index 73c3152d30e7..46ad70e736d8 100644
--- a/test/Transforms/LoopStrengthReduce/pr2537.ll
+++ b/test/Transforms/LoopStrengthReduce/pr2537.ll
@@ -18,4 +18,4 @@ afterdo:                ; preds = %dobody
         ret void
 }
 
-declare void @b(i128 %add)
-\ No newline at end of file
+declare void @b(i128 %add)
diff --git a/test/Transforms/LoopStrengthReduce/quadradic-exit-value.ll b/test/Transforms/LoopStrengthReduce/quadradic-exit-value.ll
index 8959c1774099..59f14fcd1ce5 100644
--- a/test/Transforms/LoopStrengthReduce/quadradic-exit-value.ll
+++ b/test/Transforms/LoopStrengthReduce/quadradic-exit-value.ll
@@ -1,4 +1,4 @@
-; RUN: opt < %s -analyze -iv-users | grep {\{1,+,3,+,2\}<%loop> (post-inc)}
+; RUN: opt < %s -analyze -iv-users | grep {\{1,+,3,+,2\}<%loop> (post-inc with loop %loop)}
 
 ; The value of %r is dependent on a polynomial iteration expression.
 
diff --git a/test/Transforms/LoopStrengthReduce/uglygep.ll b/test/Transforms/LoopStrengthReduce/uglygep.ll
new file mode 100644
index 000000000000..dca97e9ad187
--- /dev/null
+++ b/test/Transforms/LoopStrengthReduce/uglygep.ll
@@ -0,0 +1,67 @@
+; RUN: opt < %s -loop-reduce -S | not grep uglygep
+
+; LSR shouldn't consider %t8 to be an interesting user of %t6, and it
+; should be able to form pretty GEPs.
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
+target triple = "x86_64-unknown-linux-gnu"
+
+define void @Z4() nounwind {
+bb:
+  br label %bb3
+
+bb1:                                              ; preds = %bb3
+  br i1 undef, label %bb10, label %bb2
+
+bb2:                                              ; preds = %bb1
+  %t = add i64 %t4, 1                         ; <i64> [#uses=1]
+  br label %bb3
+
+bb3:                                              ; preds = %bb2, %bb
+  %t4 = phi i64 [ %t, %bb2 ], [ 0, %bb ]      ; <i64> [#uses=3]
+  br label %bb1
+
+bb10:                                             ; preds = %bb9
+  %t7 = icmp eq i64 %t4, 0                    ; <i1> [#uses=1]
+  %t3 = add i64 %t4, 16                     ; <i64> [#uses=1]
+  br label %bb14
+
+bb14:                                             ; preds = %bb14, %bb10
+  %t2 = getelementptr inbounds i8* undef, i64 %t4 ; <i8*> [#uses=1]
+  store i8 undef, i8* %t2
+  %t6 = load float** undef
+  %t8 = bitcast float* %t6 to i8*              ; <i8*> [#uses=1]
+  %t9 = getelementptr inbounds i8* %t8, i64 %t3 ; <i8*> [#uses=1]
+  store i8 undef, i8* %t9
+  br label %bb14
+}
+
+define fastcc void @TransformLine() nounwind {
+bb:
+  br label %loop0
+
+loop0:                                            ; preds = %loop0, %bb
+  %i0 = phi i32 [ %i0.next, %loop0 ], [ 0, %bb ]  ; <i32> [#uses=2]
+  %i0.next = add i32 %i0, 1                       ; <i32> [#uses=1]
+  br i1 false, label %loop0, label %bb0
+
+bb0:                                              ; preds = %loop0
+  br label %loop1
+
+loop1:                                            ; preds = %bb5, %bb0
+  %i1 = phi i32 [ 0, %bb0 ], [ %i1.next, %bb5 ]   ; <i32> [#uses=4]
+  %t0 = add i32 %i0, %i1                          ; <i32> [#uses=1]
+  br i1 false, label %bb2, label %bb6
+
+bb2:                                              ; preds = %loop1
+  br i1 true, label %bb6, label %bb5
+
+bb5:                                              ; preds = %bb2
+  %i1.next = add i32 %i1, 1                       ; <i32> [#uses=1]
+  br i1 true, label %bb6, label %loop1
+
+bb6:                                              ; preds = %bb5, %bb2, %loop1
+  %p8 = phi i32 [ %t0, %bb5 ], [ undef, %loop1 ], [ undef, %bb2 ] ; <i32> [#uses=0]
+  %p9 = phi i32 [ undef, %bb5 ], [ %i1, %loop1 ], [ %i1, %bb2 ] ; <i32> [#uses=0]
+  unreachable
+}
diff --git a/test/Transforms/LoopUnswitch/crash.ll b/test/Transforms/LoopUnswitch/crash.ll
index fac55a6bb1a7..101fb7a2c2ce 100644
--- a/test/Transforms/LoopUnswitch/crash.ll
+++ b/test/Transforms/LoopUnswitch/crash.ll
@@ -45,3 +45,22 @@ for.body:                                         ; preds = %for.body, %bb.nph
 for.end:                                          ; preds = %for.body, %entry
   ret void
 }
+
+; PR6879
+define i32* @test3(i32** %p_45, i16 zeroext %p_46, i64 %p_47, i64 %p_48, i16 signext %p_49) nounwind {
+entry:
+  br label %for.cond
+
+for.cond:                                         ; preds = %for.cond4, %entry
+  br i1 false, label %for.cond4, label %for.end88
+
+for.cond4:                                        ; preds = %for.cond
+  %conv46 = trunc i32 0 to i8                     ; <i8> [#uses=2]
+  %cmp60 = icmp sgt i8 %conv46, 124               ; <i1> [#uses=1]
+  %or.cond = and i1 undef, %cmp60                 ; <i1> [#uses=1]
+  %cond = select i1 %or.cond, i8 %conv46, i8 undef ; <i8> [#uses=0]
+  br label %for.cond
+
+for.end88:                                        ; preds = %for.cond
+  ret i32* undef
+}
diff --git a/test/Transforms/PruneEH/2008-09-05-CGUpdate.ll b/test/Transforms/PruneEH/2008-09-05-CGUpdate.ll
index 347af8f8463c..33e0cfa3ce53 100644
--- a/test/Transforms/PruneEH/2008-09-05-CGUpdate.ll
+++ b/test/Transforms/PruneEH/2008-09-05-CGUpdate.ll
@@ -1,6 +1,6 @@
 ; RUN: opt < %s -prune-eh -inline -print-callgraph \
 ; RUN:   -disable-output |& \
-; RUN:     grep {Calls.*ce3806g__fxio__put__put_int64__4.1339} | count 2
+; RUN:     grep {calls.*ce3806g__fxio__put__put_int64__4.1339} | count 2
 	%struct.FRAME.ce3806g = type { %struct.string___XUB, %struct.string___XUB, %struct.string___XUB, %struct.string___XUB }
 	%struct.FRAME.ce3806g__fxio__put__4 = type { i32, i32, i32, %struct.system__file_control_block__pstring*, i32, i32, i8 }
 	%struct.RETURN = type { i8, i32 }
diff --git a/test/Transforms/SCCP/ipsccp-basic.ll b/test/Transforms/SCCP/ipsccp-basic.ll
index e3699209a35b..a3c7637f986b 100644
--- a/test/Transforms/SCCP/ipsccp-basic.ll
+++ b/test/Transforms/SCCP/ipsccp-basic.ll
@@ -188,7 +188,7 @@ define void @test8b(i32* %P) {
     %X = call {} @test8a(i32 5, i32* %P)
     ret void
 ; CHECK: define void @test8b
-; CHECK-NEXT: call { } @test8a
+; CHECK-NEXT: call {} @test8a
 ; CHECK-NEXT: ret void
 }
 
diff --git a/test/Transforms/SCCP/undef-resolve.ll b/test/Transforms/SCCP/undef-resolve.ll
new file mode 100644
index 000000000000..bed561c8e4f2
--- /dev/null
+++ b/test/Transforms/SCCP/undef-resolve.ll
@@ -0,0 +1,106 @@
+; RUN: opt %s -sccp -S | FileCheck %s
+
+
+; PR6940
+define double @test1() {
+  %t = sitofp i32 undef to double
+  ret double %t
+; CHECK: @test1
+; CHECK: ret double 0.0
+}
+
+
+; rdar://7832370
+; Check that lots of stuff doesn't get turned into undef.
+define i32 @test2() nounwind readnone ssp {
+; CHECK: @test2
+init:
+  br label %control.outer.outer
+
+control.outer.loopexit.us-lcssa:                  ; preds = %control
+  br label %control.outer.loopexit
+
+control.outer.loopexit:                           ; preds = %control.outer.loopexit.us-lcssa.us, %control.outer.loopexit.us-lcssa
+  br label %control.outer.outer.backedge
+
+control.outer.outer:                              ; preds = %control.outer.outer.backedge, %init
+  %switchCond.0.ph.ph = phi i32 [ 2, %init ], [ 3, %control.outer.outer.backedge ] ; <i32> [#uses=2]
+  %i.0.ph.ph = phi i32 [ undef, %init ], [ %i.0.ph.ph.be, %control.outer.outer.backedge ] ; <i32> [#uses=1]
+  %tmp4 = icmp eq i32 %i.0.ph.ph, 0               ; <i1> [#uses=1]
+  br i1 %tmp4, label %control.outer.outer.split.us, label %control.outer.outer.control.outer.outer.split_crit_edge
+
+control.outer.outer.control.outer.outer.split_crit_edge: ; preds = %control.outer.outer
+  br label %control.outer
+
+control.outer.outer.split.us:                     ; preds = %control.outer.outer
+  br label %control.outer.us
+
+control.outer.us:                                 ; preds = %bb3.us, %control.outer.outer.split.us
+  %A.0.ph.us = phi i32 [ %switchCond.0.us, %bb3.us ], [ 4, %control.outer.outer.split.us ] ; <i32> [#uses=2]
+  %switchCond.0.ph.us = phi i32 [ %A.0.ph.us, %bb3.us ], [ %switchCond.0.ph.ph, %control.outer.outer.split.us ] ; <i32> [#uses=1]
+  br label %control.us
+
+bb3.us:                                           ; preds = %control.us
+  br label %control.outer.us
+
+bb0.us:                                           ; preds = %control.us
+  br label %control.us
+
+; CHECK: control.us:                                       ; preds = %bb0.us, %control.outer.us
+; CHECK-NEXT:  %switchCond.0.us = phi i32
+; CHECK-NEXT:  switch i32 %switchCond.0.us
+control.us:                                       ; preds = %bb0.us, %control.outer.us
+  %switchCond.0.us = phi i32 [ %A.0.ph.us, %bb0.us ], [ %switchCond.0.ph.us, %control.outer.us ] ; <i32> [#uses=2]
+  switch i32 %switchCond.0.us, label %control.outer.loopexit.us-lcssa.us [
+    i32 0, label %bb0.us
+    i32 1, label %bb1.us-lcssa.us
+    i32 3, label %bb3.us
+    i32 4, label %bb4.us-lcssa.us
+  ]
+
+control.outer.loopexit.us-lcssa.us:               ; preds = %control.us
+  br label %control.outer.loopexit
+
+bb1.us-lcssa.us:                                  ; preds = %control.us
+  br label %bb1
+
+bb4.us-lcssa.us:                                  ; preds = %control.us
+  br label %bb4
+
+control.outer:                                    ; preds = %bb3, %control.outer.outer.control.outer.outer.split_crit_edge
+  %A.0.ph = phi i32 [ %nextId17, %bb3 ], [ 4, %control.outer.outer.control.outer.outer.split_crit_edge ] ; <i32> [#uses=1]
+  %switchCond.0.ph = phi i32 [ 0, %bb3 ], [ %switchCond.0.ph.ph, %control.outer.outer.control.outer.outer.split_crit_edge ] ; <i32> [#uses=1]
+  br label %control
+
+control:                                          ; preds = %bb0, %control.outer
+  %switchCond.0 = phi i32 [ %A.0.ph, %bb0 ], [ %switchCond.0.ph, %control.outer ] ; <i32> [#uses=2]
+  switch i32 %switchCond.0, label %control.outer.loopexit.us-lcssa [
+    i32 0, label %bb0
+    i32 1, label %bb1.us-lcssa
+    i32 3, label %bb3
+    i32 4, label %bb4.us-lcssa
+  ]
+
+bb4.us-lcssa:                                     ; preds = %control
+  br label %bb4
+
+bb4:                                              ; preds = %bb4.us-lcssa, %bb4.us-lcssa.us
+  br label %control.outer.outer.backedge
+
+control.outer.outer.backedge:                     ; preds = %bb4, %control.outer.loopexit
+  %i.0.ph.ph.be = phi i32 [ 1, %bb4 ], [ 0, %control.outer.loopexit ] ; <i32> [#uses=1]
+  br label %control.outer.outer
+
+bb3:                                              ; preds = %control
+  %nextId17 = add i32 %switchCond.0, -2           ; <i32> [#uses=1]
+  br label %control.outer
+
+bb0:                                              ; preds = %control
+  br label %control
+
+bb1.us-lcssa:                                     ; preds = %control
+  br label %bb1
+
+bb1:                                              ; preds = %bb1.us-lcssa, %bb1.us-lcssa.us
+  ret i32 0
+}
diff --git a/test/Transforms/ScalarRepl/memcpy-align.ll b/test/Transforms/ScalarRepl/memcpy-align.ll
new file mode 100644
index 000000000000..91d354d31a3b
--- /dev/null
+++ b/test/Transforms/ScalarRepl/memcpy-align.ll
@@ -0,0 +1,32 @@
+; RUN: opt %s -scalarrepl -S | FileCheck %s
+; PR6832
+target datalayout =
+"e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-n32"
+target triple = "arm-u-u"
+
+%0 = type { %struct.anon, %struct.anon }
+%struct.anon = type { [4 x i8] }
+
+@c = external global %0                           ; <%0*> [#uses=1]
+
+define arm_aapcscc void @good() nounwind {
+entry:
+  %x0 = alloca %struct.anon, align 4              ; <%struct.anon*> [#uses=2]
+  %tmp = bitcast %struct.anon* %x0 to i8*         ; <i8*> [#uses=1]
+  call void @llvm.memset.p0i8.i32(i8* %tmp, i8 0, i32 4, i32 4, i1 false)
+  %tmp1 = bitcast %struct.anon* %x0 to i8*        ; <i8*> [#uses=1]
+  call void @llvm.memcpy.p0i8.p0i8.i32(i8* getelementptr inbounds (%0* @c, i32
+0, i32 0, i32 0, i32 0), i8* %tmp1, i32 4, i32 4, i1 false)
+  ret void
+  
+; CHECK: store i8 0, i8*{{.*}}, align 4
+; CHECK: store i8 0, i8*{{.*}}, align 1
+; CHECK: store i8 0, i8*{{.*}}, align 2
+; CHECK: store i8 0, i8*{{.*}}, align 1
+}
+
+declare void @llvm.memset.p0i8.i32(i8* nocapture, i8, i32, i32, i1) nounwind
+
+declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i32,
+i1) nounwind
+
diff --git a/test/Transforms/TailCallElim/inf-recursion.ll b/test/Transforms/TailCallElim/inf-recursion.ll
index a5f246d36ce1..e4ac9283aec5 100644
--- a/test/Transforms/TailCallElim/inf-recursion.ll
+++ b/test/Transforms/TailCallElim/inf-recursion.ll
@@ -1,6 +1,10 @@
-; RUN: opt < %s -tailcallelim -S | grep call
+; RUN: opt < %s -tailcallelim -S | FileCheck %s
+
 ; Don't turn this into an infinite loop, this is probably the implementation
 ; of fabs and we expect the codegen to lower fabs.
+; CHECK: @fabs(double %f)
+; CHECK: call
+; CHECK: ret
 
 define double @fabs(double %f) {
 entry:
@@ -8,3 +12,23 @@ entry:
         ret double %tmp2
 }
 
+; Do turn other calls into infinite loops though.
+
+; CHECK: define double @foo
+; CHECK-NOT: call
+; CHECK: }
+define double @foo(double %f) {
+        %t= call double @foo(double %f)
+        ret double %t
+}
+
+; CHECK: define float @fabsf
+; CHECK-NOT: call
+; CHECK: }
+define float @fabsf(float %f) {
+        %t= call float @fabsf(float 2.0)
+        ret float %t
+}
+
+declare float @fabsf(float %f)
+declare x86_fp80 @fabsl(x86_fp80 %f)
diff --git a/tools/CMakeLists.txt b/tools/CMakeLists.txt
index 8b5d77e2c42d..b9c77b1c8a38 100644
--- a/tools/CMakeLists.txt
+++ b/tools/CMakeLists.txt
@@ -31,6 +31,7 @@ add_subdirectory(llvm-extract)
 add_subdirectory(bugpoint)
 add_subdirectory(llvm-bcanalyzer)
 add_subdirectory(llvm-stub)
+add_subdirectory(edis)
 add_subdirectory(llvmc)
 
 if( EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/clang/CMakeLists.txt )
diff --git a/tools/Makefile b/tools/Makefile
index e124422cd6de..9d2e576dfbdc 100644
--- a/tools/Makefile
+++ b/tools/Makefile
@@ -15,13 +15,16 @@ OPTIONAL_PARALLEL_DIRS := clang
 # NOTE: The tools are organized into five groups of four consisting of one
 # large and three small executables. This is done to minimize memory load
 # in parallel builds.  Please retain this ordering.
-DIRS := llvm-config
+
+# libEnhancedDisassembly must be built ahead of llvm-mc
+# because llvm-mc links against libEnhancedDisassembly
+DIRS := llvm-config edis llvm-mc
 PARALLEL_DIRS := opt llvm-as llvm-dis \
                  llc llvm-ranlib llvm-ar llvm-nm \
                  llvm-ld llvm-prof llvm-link \
                  lli llvm-extract \
                  bugpoint llvm-bcanalyzer llvm-stub \
-                 llvm-mc llvmc
+                 llvmc
 
 # Let users override the set of tools to build from the command line.
 ifdef ONLY_TOOLS
@@ -36,8 +39,6 @@ include $(LEVEL)/Makefile.config
 ifeq ($(ENABLE_PIC),1)
   # No support for dynamic libraries on windows targets.
   ifneq ($(TARGET_OS), $(filter $(TARGET_OS), Cygwin MingW))
-    PARALLEL_DIRS += edis
-
     # gold only builds if binutils is around.  It requires "lto" to build before
     # it so it is added to DIRS.
     ifdef BINUTILS_INCDIR
@@ -48,11 +49,6 @@ ifeq ($(ENABLE_PIC),1)
   endif
 endif
 
-# Only build edis if X86 target support is enabled.
-ifeq ($(filter $(TARGETS_TO_BUILD), X86),)
-  PARALLEL_DIRS := $(filter-out edis, $(PARALLEL_DIRS))
-endif
-
 # Don't build edis if we explicitly disabled it.
 ifeq ($(DISABLE_EDIS),1)
   PARALLEL_DIRS := $(filter-out edis, $(PARALLEL_DIRS))
diff --git a/tools/bugpoint/BugDriver.cpp b/tools/bugpoint/BugDriver.cpp
index d676a4374ac6..45a0d4dd17a3 100644
--- a/tools/bugpoint/BugDriver.cpp
+++ b/tools/bugpoint/BugDriver.cpp
@@ -115,30 +115,25 @@ bool BugDriver::addSources(const std::vector<std::string> &Filenames) {
   assert(Program == 0 && "Cannot call addSources multiple times!");
   assert(!Filenames.empty() && "Must specify at least on input filename!");
 
-  try {
-    // Load the first input file.
-    Program = ParseInputFile(Filenames[0], Context);
-    if (Program == 0) return true;
+  // Load the first input file.
+  Program = ParseInputFile(Filenames[0], Context);
+  if (Program == 0) return true;
     
-    if (!run_as_child)
-      outs() << "Read input file      : '" << Filenames[0] << "'\n";
+  if (!run_as_child)
+    outs() << "Read input file      : '" << Filenames[0] << "'\n";
 
-    for (unsigned i = 1, e = Filenames.size(); i != e; ++i) {
-      std::auto_ptr<Module> M(ParseInputFile(Filenames[i], Context));
-      if (M.get() == 0) return true;
+  for (unsigned i = 1, e = Filenames.size(); i != e; ++i) {
+    std::auto_ptr<Module> M(ParseInputFile(Filenames[i], Context));
+    if (M.get() == 0) return true;
 
-      if (!run_as_child)
-        outs() << "Linking in input file: '" << Filenames[i] << "'\n";
-      std::string ErrorMessage;
-      if (Linker::LinkModules(Program, M.get(), &ErrorMessage)) {
-        errs() << ToolName << ": error linking in '" << Filenames[i] << "': "
-               << ErrorMessage << '\n';
-        return true;
-      }
+    if (!run_as_child)
+      outs() << "Linking in input file: '" << Filenames[i] << "'\n";
+    std::string ErrorMessage;
+    if (Linker::LinkModules(Program, M.get(), &ErrorMessage)) {
+      errs() << ToolName << ": error linking in '" << Filenames[i] << "': "
+             << ErrorMessage << '\n';
+      return true;
     }
-  } catch (const std::string &Error) {
-    errs() << ToolName << ": error reading input '" << Error << "'\n";
-    return true;
   }
 
   if (!run_as_child)
@@ -153,7 +148,7 @@ bool BugDriver::addSources(const std::vector<std::string> &Filenames) {
 /// run - The top level method that is invoked after all of the instance
 /// variables are set up from command line arguments.
 ///
-bool BugDriver::run() {
+bool BugDriver::run(std::string &ErrMsg) {
   // The first thing to do is determine if we're running as a child. If we are,
   // then what to do is very narrow. This form of invocation is only called
   // from the runPasses method to actually run those passes in a child process.
@@ -165,7 +160,7 @@ bool BugDriver::run() {
   if (run_find_bugs) {
     // Rearrange the passes and apply them to the program. Repeat this process
     // until the user kills the program or we find a bug.
-    return runManyPasses(PassesToRun);
+    return runManyPasses(PassesToRun, ErrMsg);
   }
 
   // If we're not running as a child, the first thing that we must do is 
@@ -186,14 +181,13 @@ bool BugDriver::run() {
 
   // Test to see if we have a code generator crash.
   outs() << "Running the code generator to test for a crash: ";
-  try {
-    compileProgram(Program);
-    outs() << '\n';
-  } catch (ToolExecutionError &TEE) {
-    outs() << TEE.what();
-    return debugCodeGeneratorCrash();
+  std::string Error;
+  compileProgram(Program, &Error);
+  if (!Error.empty()) {
+    outs() << Error;
+    return debugCodeGeneratorCrash(ErrMsg);
   }
-
+  outs() << '\n';
 
   // Run the raw input to see where we are coming from.  If a reference output
   // was specified, make sure that the raw output matches it.  If not, it's a
@@ -202,8 +196,8 @@ bool BugDriver::run() {
   bool CreatedOutput = false;
   if (ReferenceOutputFile.empty()) {
     outs() << "Generating reference output from raw program: ";
-    if(!createReferenceFile(Program)){
-      return debugCodeGeneratorCrash();
+    if (!createReferenceFile(Program)) {
+      return debugCodeGeneratorCrash(ErrMsg);
     }
     CreatedOutput = true;
   }
@@ -217,24 +211,29 @@ bool BugDriver::run() {
   // matches, then we assume there is a miscompilation bug and try to 
   // diagnose it.
   outs() << "*** Checking the code generator...\n";
-  try {
-    if (!diffProgram()) {
-      outs() << "\n*** Output matches: Debugging miscompilation!\n";
-      return debugMiscompilation();
+  bool Diff = diffProgram("", "", false, &Error);
+  if (!Error.empty()) {
+    errs() << Error;
+    return debugCodeGeneratorCrash(ErrMsg);
+  }
+  if (!Diff) {
+    outs() << "\n*** Output matches: Debugging miscompilation!\n";
+    debugMiscompilation(&Error);
+    if (!Error.empty()) {
+      errs() << Error;
+      return debugCodeGeneratorCrash(ErrMsg);
     }
-  } catch (ToolExecutionError &TEE) {
-    errs() << TEE.what();
-    return debugCodeGeneratorCrash();
+    return false;
   }
 
   outs() << "\n*** Input program does not match reference diff!\n";
   outs() << "Debugging code generator problem!\n";
-  try {
-    return debugCodeGenerator();
-  } catch (ToolExecutionError &TEE) {
-    errs() << TEE.what();
-    return debugCodeGeneratorCrash();
+  bool Failure = debugCodeGenerator(&Error);
+  if (!Error.empty()) {
+    errs() << Error;
+    return debugCodeGeneratorCrash(ErrMsg);
   }
+  return Failure;
 }
 
 void llvm::PrintFunctionList(const std::vector<Function*> &Funcs) {
diff --git a/tools/bugpoint/BugDriver.h b/tools/bugpoint/BugDriver.h
index abc249868de2..e5b7373f04c0 100644
--- a/tools/bugpoint/BugDriver.h
+++ b/tools/bugpoint/BugDriver.h
@@ -88,7 +88,7 @@ public:
   /// variables are set up from command line arguments. The \p as_child argument
   /// indicates whether the driver is to run in parent mode or child mode.
   ///
-  bool run();
+  bool run(std::string &ErrMsg);
 
   /// debugOptimizerCrash - This method is called when some optimizer pass
   /// crashes on input.  It attempts to prune down the testcase to something
@@ -99,12 +99,12 @@ public:
   /// debugCodeGeneratorCrash - This method is called when the code generator
   /// crashes on an input.  It attempts to reduce the input as much as possible
   /// while still causing the code generator to crash.
-  bool debugCodeGeneratorCrash();
+  bool debugCodeGeneratorCrash(std::string &Error);
 
   /// debugMiscompilation - This method is used when the passes selected are not
   /// crashing, but the generated output is semantically different from the
   /// input.
-  bool debugMiscompilation();
+  void debugMiscompilation(std::string *Error);
 
   /// debugPassMiscompilation - This method is called when the specified pass
   /// miscompiles Program as input.  It tries to reduce the testcase to
@@ -118,12 +118,13 @@ public:
   /// compileSharedObject - This method creates a SharedObject from a given
   /// BitcodeFile for debugging a code generator.
   ///
-  std::string compileSharedObject(const std::string &BitcodeFile);
+  std::string compileSharedObject(const std::string &BitcodeFile,
+                                  std::string &Error);
 
   /// debugCodeGenerator - This method narrows down a module to a function or
   /// set of functions, using the CBE as a ``safe'' code generator for other
   /// functions that are not under consideration.
-  bool debugCodeGenerator();
+  bool debugCodeGenerator(std::string *Error);
 
   /// isExecutingJIT - Returns true if bugpoint is currently testing the JIT
   ///
@@ -164,28 +165,27 @@ public:
   /// the specified one as the current program.
   void setNewProgram(Module *M);
 
-  /// compileProgram - Try to compile the specified module, throwing an
-  /// exception if an error occurs, or returning normally if not.  This is used
-  /// for code generation crash testing.
+  /// compileProgram - Try to compile the specified module, returning false and
+  /// setting Error if an error occurs.  This is used for code generation
+  /// crash testing.
   ///
-  void compileProgram(Module *M);
+  void compileProgram(Module *M, std::string *Error);
 
   /// executeProgram - This method runs "Program", capturing the output of the
-  /// program to a file, returning the filename of the file.  A recommended
-  /// filename may be optionally specified.  If there is a problem with the code
-  /// generator (e.g., llc crashes), this will throw an exception.
+  /// program to a file.  A recommended filename may be optionally specified.
   ///
-  std::string executeProgram(std::string RequestedOutputFilename = "",
-                             std::string Bitcode = "",
-                             const std::string &SharedObjects = "",
-                             AbstractInterpreter *AI = 0,
-                             bool *ProgramExitedNonzero = 0);
+  std::string executeProgram(std::string OutputFilename,
+                             std::string Bitcode,
+                             const std::string &SharedObjects,
+                             AbstractInterpreter *AI,
+                             std::string *Error);
 
   /// executeProgramSafely - Used to create reference output with the "safe"
   /// backend, if reference output is not provided.  If there is a problem with
-  /// the code generator (e.g., llc crashes), this will throw an exception.
+  /// the code generator (e.g., llc crashes), this will return false and set
+  /// Error.
   ///
-  std::string executeProgramSafely(std::string OutputFile = "");
+  std::string executeProgramSafely(std::string OutputFile, std::string *Error);
 
   /// createReferenceFile - calls compileProgram and then records the output
   /// into ReferenceOutputFile. Returns true if reference file created, false 
@@ -197,13 +197,14 @@ public:
 
   /// diffProgram - This method executes the specified module and diffs the
   /// output against the file specified by ReferenceOutputFile.  If the output
-  /// is different, true is returned.  If there is a problem with the code
-  /// generator (e.g., llc crashes), this will throw an exception.
+  /// is different, 1 is returned.  If there is a problem with the code
+  /// generator (e.g., llc crashes), this will return -1 and set Error.
   ///
   bool diffProgram(const std::string &BitcodeFile = "",
                    const std::string &SharedObj = "",
-                   bool RemoveBitcode = false);
-                   
+                   bool RemoveBitcode = false,
+                   std::string *Error = 0);
+
   /// EmitProgressBitcode - This function is used to output the current Program
   /// to a file named "bugpoint-ID.bc".
   ///
@@ -267,7 +268,8 @@ public:
   /// If the passes did not compile correctly, output the command required to 
   /// recreate the failure. This returns true if a compiler error is found.
   ///
-  bool runManyPasses(const std::vector<const PassInfo*> &AllPasses);
+  bool runManyPasses(const std::vector<const PassInfo*> &AllPasses,
+		     std::string &ErrMsg);
 
   /// writeProgramToFile - This writes the current "Program" to the named
   /// bitcode file.  If an error occurs, true is returned.
diff --git a/tools/bugpoint/CMakeLists.txt b/tools/bugpoint/CMakeLists.txt
index 90f24bac5826..34b759fd8794 100644
--- a/tools/bugpoint/CMakeLists.txt
+++ b/tools/bugpoint/CMakeLists.txt
@@ -1,6 +1,5 @@
 set(LLVM_LINK_COMPONENTS asmparser instrumentation scalaropts ipo
   linker bitreader bitwriter)
-set(LLVM_REQUIRES_EH 1)
 
 add_llvm_tool(bugpoint
   BugDriver.cpp
diff --git a/tools/bugpoint/CrashDebugger.cpp b/tools/bugpoint/CrashDebugger.cpp
index b51bdb450938..46b33d2a7268 100644
--- a/tools/bugpoint/CrashDebugger.cpp
+++ b/tools/bugpoint/CrashDebugger.cpp
@@ -53,13 +53,15 @@ namespace llvm {
     // passes.  If we return true, we update the current module of bugpoint.
     //
     virtual TestResult doTest(std::vector<const PassInfo*> &Removed,
-                              std::vector<const PassInfo*> &Kept);
+                              std::vector<const PassInfo*> &Kept,
+                              std::string &Error);
   };
 }
 
 ReducePassList::TestResult
 ReducePassList::doTest(std::vector<const PassInfo*> &Prefix,
-                       std::vector<const PassInfo*> &Suffix) {
+                       std::vector<const PassInfo*> &Suffix,
+                       std::string &Error) {
   sys::Path PrefixOutput;
   Module *OrigProgram = 0;
   if (!Prefix.empty()) {
@@ -107,27 +109,26 @@ namespace {
     bool (*TestFn)(BugDriver &, Module *);
   public:
     ReduceCrashingGlobalVariables(BugDriver &bd,
-                                  bool (*testFn)(BugDriver&, Module*))
+                                  bool (*testFn)(BugDriver &, Module *))
       : BD(bd), TestFn(testFn) {}
 
-    virtual TestResult doTest(std::vector<GlobalVariable*>& Prefix,
-                              std::vector<GlobalVariable*>& Kept) {
+    virtual TestResult doTest(std::vector<GlobalVariable*> &Prefix,
+                              std::vector<GlobalVariable*> &Kept,
+                              std::string &Error) {
       if (!Kept.empty() && TestGlobalVariables(Kept))
         return KeepSuffix;
-
       if (!Prefix.empty() && TestGlobalVariables(Prefix))
         return KeepPrefix;
-
       return NoFailure;
     }
 
-    bool TestGlobalVariables(std::vector<GlobalVariable*>& GVs);
+    bool TestGlobalVariables(std::vector<GlobalVariable*> &GVs);
   };
 }
 
 bool
 ReduceCrashingGlobalVariables::TestGlobalVariables(
-                              std::vector<GlobalVariable*>& GVs) {
+                              std::vector<GlobalVariable*> &GVs) {
   // Clone the program to try hacking it apart...
   DenseMap<const Value*, Value*> ValueMap;
   Module *M = CloneModule(BD.getProgram(), ValueMap);
@@ -182,7 +183,8 @@ namespace llvm {
       : BD(bd), TestFn(testFn) {}
 
     virtual TestResult doTest(std::vector<Function*> &Prefix,
-                              std::vector<Function*> &Kept) {
+                              std::vector<Function*> &Kept,
+                              std::string &Error) {
       if (!Kept.empty() && TestFuncs(Kept))
         return KeepSuffix;
       if (!Prefix.empty() && TestFuncs(Prefix))
@@ -253,7 +255,8 @@ namespace {
       : BD(bd), TestFn(testFn) {}
 
     virtual TestResult doTest(std::vector<const BasicBlock*> &Prefix,
-                              std::vector<const BasicBlock*> &Kept) {
+                              std::vector<const BasicBlock*> &Kept,
+                              std::string &Error) {
       if (!Kept.empty() && TestBlocks(Kept))
         return KeepSuffix;
       if (!Prefix.empty() && TestBlocks(Prefix))
@@ -355,7 +358,8 @@ namespace {
       : BD(bd), TestFn(testFn) {}
 
     virtual TestResult doTest(std::vector<const Instruction*> &Prefix,
-                              std::vector<const Instruction*> &Kept) {
+                              std::vector<const Instruction*> &Kept,
+                              std::string &Error) {
       if (!Kept.empty() && TestInsts(Kept))
         return KeepSuffix;
       if (!Prefix.empty() && TestInsts(Prefix))
@@ -421,7 +425,8 @@ bool ReduceCrashingInstructions::TestInsts(std::vector<const Instruction*>
 /// DebugACrash - Given a predicate that determines whether a component crashes
 /// on a program, try to destructively reduce the program while still keeping
 /// the predicate true.
-static bool DebugACrash(BugDriver &BD,  bool (*TestFn)(BugDriver &, Module *)) {
+static bool DebugACrash(BugDriver &BD, bool (*TestFn)(BugDriver &, Module *),
+                        std::string &Error) {
   // See if we can get away with nuking some of the global variable initializers
   // in the program...
   if (!NoGlobalRM &&
@@ -464,7 +469,9 @@ static bool DebugACrash(BugDriver &BD,  bool (*TestFn)(BugDriver &, Module *)) {
                     << "variables in the testcase\n";
 
           unsigned OldSize = GVs.size();
-          ReduceCrashingGlobalVariables(BD, TestFn).reduceList(GVs);
+          ReduceCrashingGlobalVariables(BD, TestFn).reduceList(GVs, Error);
+          if (!Error.empty())
+            return true;
 
           if (GVs.size() < OldSize)
             BD.EmitProgressBitcode("reduced-global-variables");
@@ -485,7 +492,7 @@ static bool DebugACrash(BugDriver &BD,  bool (*TestFn)(BugDriver &, Module *)) {
       "in the testcase\n";
 
     unsigned OldSize = Functions.size();
-    ReduceCrashingFunctions(BD, TestFn).reduceList(Functions);
+    ReduceCrashingFunctions(BD, TestFn).reduceList(Functions, Error);
 
     if (Functions.size() < OldSize)
       BD.EmitProgressBitcode("reduced-function");
@@ -503,7 +510,7 @@ static bool DebugACrash(BugDriver &BD,  bool (*TestFn)(BugDriver &, Module *)) {
       for (Function::const_iterator FI = I->begin(), E = I->end(); FI !=E; ++FI)
         Blocks.push_back(FI);
     unsigned OldSize = Blocks.size();
-    ReduceCrashingBlocks(BD, TestFn).reduceList(Blocks);
+    ReduceCrashingBlocks(BD, TestFn).reduceList(Blocks, Error);
     if (Blocks.size() < OldSize)
       BD.EmitProgressBitcode("reduced-blocks");
   }
@@ -521,7 +528,7 @@ static bool DebugACrash(BugDriver &BD,  bool (*TestFn)(BugDriver &, Module *)) {
           if (!isa<TerminatorInst>(I))
             Insts.push_back(I);
 
-    ReduceCrashingInstructions(BD, TestFn).reduceList(Insts);
+    ReduceCrashingInstructions(BD, TestFn).reduceList(Insts, Error);
   }
 
   // FIXME: This should use the list reducer to converge faster by deleting
@@ -614,9 +621,11 @@ static bool TestForOptimizerCrash(BugDriver &BD, Module *M) {
 bool BugDriver::debugOptimizerCrash(const std::string &ID) {
   outs() << "\n*** Debugging optimizer crash!\n";
 
+  std::string Error;
   // Reduce the list of passes which causes the optimizer to crash...
   if (!BugpointIsInterrupted)
-    ReducePassList(*this).reduceList(PassesToRun);
+    ReducePassList(*this).reduceList(PassesToRun, Error);
+  assert(Error.empty());
 
   outs() << "\n*** Found crashing pass"
          << (PassesToRun.size() == 1 ? ": " : "es: ")
@@ -624,25 +633,27 @@ bool BugDriver::debugOptimizerCrash(const std::string &ID) {
 
   EmitProgressBitcode(ID);
 
-  return DebugACrash(*this, TestForOptimizerCrash);
+  bool Success = DebugACrash(*this, TestForOptimizerCrash, Error);
+  assert(Error.empty());
+  return Success;
 }
 
 static bool TestForCodeGenCrash(BugDriver &BD, Module *M) {
-  try {
-    BD.compileProgram(M);
-    errs() << '\n';
-    return false;
-  } catch (ToolExecutionError &) {
+  std::string Error;
+  BD.compileProgram(M, &Error);
+  if (!Error.empty()) {
     errs() << "<crash>\n";
     return true;  // Tool is still crashing.
   }
+  errs() << '\n';
+  return false;
 }
 
 /// debugCodeGeneratorCrash - This method is called when the code generator
 /// crashes on an input.  It attempts to reduce the input as much as possible
 /// while still causing the code generator to crash.
-bool BugDriver::debugCodeGeneratorCrash() {
+bool BugDriver::debugCodeGeneratorCrash(std::string &Error) {
   errs() << "*** Debugging code generator crash!\n";
 
-  return DebugACrash(*this, TestForCodeGenCrash);
+  return DebugACrash(*this, TestForCodeGenCrash, Error);
 }
diff --git a/tools/bugpoint/ExecutionDriver.cpp b/tools/bugpoint/ExecutionDriver.cpp
index 7228c01567a9..9eb33149692e 100644
--- a/tools/bugpoint/ExecutionDriver.cpp
+++ b/tools/bugpoint/ExecutionDriver.cpp
@@ -278,15 +278,15 @@ bool BugDriver::initializeExecutionEnvironment() {
   return Interpreter == 0;
 }
 
-/// compileProgram - Try to compile the specified module, throwing an exception
-/// if an error occurs, or returning normally if not.  This is used for code
-/// generation crash testing.
+/// compileProgram - Try to compile the specified module, returning false and
+/// setting Error if an error occurs.  This is used for code generation
+/// crash testing.
 ///
-void BugDriver::compileProgram(Module *M) {
+void BugDriver::compileProgram(Module *M, std::string *Error) {
   // Emit the program to a bitcode file...
   sys::Path BitcodeFile (OutputPrefix + "-test-program.bc");
   std::string ErrMsg;
-  if (BitcodeFile.makeUnique(true,&ErrMsg)) {
+  if (BitcodeFile.makeUnique(true, &ErrMsg)) {
     errs() << ToolName << ": Error making unique filename: " << ErrMsg 
            << "\n";
     exit(1);
@@ -297,11 +297,11 @@ void BugDriver::compileProgram(Module *M) {
     exit(1);
   }
 
-    // Remove the temporary bitcode file when we are done.
+  // Remove the temporary bitcode file when we are done.
   FileRemover BitcodeFileRemover(BitcodeFile, !SaveTemps);
 
   // Actually compile the program!
-  Interpreter->compileProgram(BitcodeFile.str());
+  Interpreter->compileProgram(BitcodeFile.str(), Error);
 }
 
 
@@ -313,7 +313,7 @@ std::string BugDriver::executeProgram(std::string OutputFile,
                                       std::string BitcodeFile,
                                       const std::string &SharedObj,
                                       AbstractInterpreter *AI,
-                                      bool *ProgramExitedNonzero) {
+                                      std::string *Error) {
   if (AI == 0) AI = Interpreter;
   assert(AI && "Interpreter should have been created already!");
   bool CreatedBitcode = false;
@@ -337,7 +337,7 @@ std::string BugDriver::executeProgram(std::string OutputFile,
   }
 
   // Remove the temporary bitcode file when we are done.
-  sys::Path BitcodePath (BitcodeFile);
+  sys::Path BitcodePath(BitcodeFile);
   FileRemover BitcodeFileRemover(BitcodePath, CreatedBitcode && !SaveTemps);
 
   if (OutputFile.empty()) OutputFile = OutputPrefix + "-execution-output";
@@ -356,9 +356,11 @@ std::string BugDriver::executeProgram(std::string OutputFile,
   if (!SharedObj.empty())
     SharedObjs.push_back(SharedObj);
 
-  int RetVal = AI->ExecuteProgram(BitcodeFile, InputArgv, InputFile,
-                                  OutputFile, AdditionalLinkerArgs, SharedObjs, 
+  int RetVal = AI->ExecuteProgram(BitcodeFile, InputArgv, InputFile, OutputFile,
+                                  Error, AdditionalLinkerArgs, SharedObjs,
                                   Timeout, MemoryLimit);
+  if (!Error->empty())
+    return OutputFile;
 
   if (RetVal == -1) {
     errs() << "<timeout>";
@@ -379,9 +381,6 @@ std::string BugDriver::executeProgram(std::string OutputFile,
     outFile.close();
   }
 
-  if (ProgramExitedNonzero != 0)
-    *ProgramExitedNonzero = (RetVal != 0);
-
   // Return the filename we captured the output to.
   return OutputFile;
 }
@@ -389,23 +388,28 @@ std::string BugDriver::executeProgram(std::string OutputFile,
 /// executeProgramSafely - Used to create reference output with the "safe"
 /// backend, if reference output is not provided.
 ///
-std::string BugDriver::executeProgramSafely(std::string OutputFile) {
-  bool ProgramExitedNonzero;
-  std::string outFN = executeProgram(OutputFile, "", "", SafeInterpreter,
-                                     &ProgramExitedNonzero);
-  return outFN;
+std::string BugDriver::executeProgramSafely(std::string OutputFile,
+                                            std::string *Error) {
+  return executeProgram(OutputFile, "", "", SafeInterpreter, Error);
 }
 
-std::string BugDriver::compileSharedObject(const std::string &BitcodeFile) {
+std::string BugDriver::compileSharedObject(const std::string &BitcodeFile,
+                                           std::string &Error) {
   assert(Interpreter && "Interpreter should have been created already!");
   sys::Path OutputFile;
 
   // Using the known-good backend.
-  GCC::FileType FT = SafeInterpreter->OutputCode(BitcodeFile, OutputFile);
+  GCC::FileType FT = SafeInterpreter->OutputCode(BitcodeFile, OutputFile,
+                                                 Error);
+  if (!Error.empty())
+    return "";
 
   std::string SharedObjectFile;
-  if (gcc->MakeSharedObject(OutputFile.str(), FT,
-                            SharedObjectFile, AdditionalLinkerArgs))
+  bool Failure = gcc->MakeSharedObject(OutputFile.str(), FT, SharedObjectFile,
+                                       AdditionalLinkerArgs, Error);
+  if (!Error.empty())
+    return "";
+  if (Failure)
     exit(1);
 
   // Remove the intermediate C file
@@ -420,16 +424,14 @@ std::string BugDriver::compileSharedObject(const std::string &BitcodeFile) {
 /// this function.
 ///
 bool BugDriver::createReferenceFile(Module *M, const std::string &Filename) {
-  try {
-    compileProgram(Program);
-  } catch (ToolExecutionError &) {
+  std::string Error;
+  compileProgram(Program, &Error);
+  if (!Error.empty())
     return false;
-  }
-  try {
-    ReferenceOutputFile = executeProgramSafely(Filename);
-    outs() << "\nReference output is: " << ReferenceOutputFile << "\n\n";
-  } catch (ToolExecutionError &TEE) {
-    errs() << TEE.what();
+
+  ReferenceOutputFile = executeProgramSafely(Filename, &Error);
+  if (!Error.empty()) {
+    errs() << Error;
     if (Interpreter != SafeInterpreter) {
       errs() << "*** There is a bug running the \"safe\" backend.  Either"
              << " debug it (for example with the -run-cbe bugpoint option,"
@@ -438,22 +440,23 @@ bool BugDriver::createReferenceFile(Module *M, const std::string &Filename) {
     }
     return false;
   }
+  outs() << "\nReference output is: " << ReferenceOutputFile << "\n\n";
   return true;
 }
 
 /// diffProgram - This method executes the specified module and diffs the
 /// output against the file specified by ReferenceOutputFile.  If the output
-/// is different, true is returned.  If there is a problem with the code
-/// generator (e.g., llc crashes), this will throw an exception.
+/// is different, 1 is returned.  If there is a problem with the code
+/// generator (e.g., llc crashes), this will return -1 and set Error.
 ///
 bool BugDriver::diffProgram(const std::string &BitcodeFile,
                             const std::string &SharedObject,
-                            bool RemoveBitcode) {
-  bool ProgramExitedNonzero;
-
+                            bool RemoveBitcode,
+                            std::string *ErrMsg) {
   // Execute the program, generating an output file...
-  sys::Path Output(executeProgram("", BitcodeFile, SharedObject, 0,
-                                      &ProgramExitedNonzero));
+  sys::Path Output(executeProgram("", BitcodeFile, SharedObject, 0, ErrMsg));
+  if (!ErrMsg->empty())
+    return false;
 
   std::string Error;
   bool FilesDifferent = false;
diff --git a/tools/bugpoint/FindBugs.cpp b/tools/bugpoint/FindBugs.cpp
index 2c11d29f60d5..224c71747a6f 100644
--- a/tools/bugpoint/FindBugs.cpp
+++ b/tools/bugpoint/FindBugs.cpp
@@ -29,7 +29,8 @@ using namespace llvm;
 /// If the passes did not compile correctly, output the command required to 
 /// recreate the failure. This returns true if a compiler error is found.
 ///
-bool BugDriver::runManyPasses(const std::vector<const PassInfo*> &AllPasses) {
+bool BugDriver::runManyPasses(const std::vector<const PassInfo*> &AllPasses,
+                              std::string &ErrMsg) {
   setPassesToRun(AllPasses);
   outs() << "Starting bug finding procedure...\n\n";
   
@@ -57,7 +58,7 @@ bool BugDriver::runManyPasses(const std::vector<const PassInfo*> &AllPasses) {
     //
     outs() << "Running selected passes on program to test for crash: ";
     for(int i = 0, e = PassesToRun.size(); i != e; i++) {
-      outs() << "-" << PassesToRun[i]->getPassArgument( )<< " ";
+      outs() << "-" << PassesToRun[i]->getPassArgument() << " ";
     }
     
     std::string Filename;
@@ -74,33 +75,33 @@ bool BugDriver::runManyPasses(const std::vector<const PassInfo*> &AllPasses) {
     // Step 3: Compile the optimized code.
     //
     outs() << "Running the code generator to test for a crash: ";
-    try {
-      compileProgram(Program);
-      outs() << '\n';
-    } catch (ToolExecutionError &TEE) {
+    std::string Error;
+    compileProgram(Program, &Error);
+    if (!Error.empty()) {
       outs() << "\n*** compileProgram threw an exception: ";
-      outs() << TEE.what();
-      return debugCodeGeneratorCrash();
+      outs() << Error;
+      return debugCodeGeneratorCrash(ErrMsg);
     }
+    outs() << '\n';
     
     //
     // Step 4: Run the program and compare its output to the reference 
     // output (created above).
     //
     outs() << "*** Checking if passes caused miscompliation:\n";
-    try {
-      if (diffProgram(Filename, "", false)) {
-        outs() << "\n*** diffProgram returned true!\n";
-        debugMiscompilation();
+    bool Diff = diffProgram(Filename, "", false, &Error);
+    if (Error.empty() && Diff) {
+      outs() << "\n*** diffProgram returned true!\n";
+      debugMiscompilation(&Error);
+      if (Error.empty())
         return true;
-      } else {
-        outs() << "\n*** diff'd output matches!\n";
-      }
-    } catch (ToolExecutionError &TEE) {
-      errs() << TEE.what();
-      debugCodeGeneratorCrash();
+    }
+    if (!Error.empty()) {
+      errs() << Error;
+      debugCodeGeneratorCrash(ErrMsg);
       return true;
     }
+    outs() << "\n*** diff'd output matches!\n";
     
     sys::Path(Filename).eraseFromDisk();
     
diff --git a/tools/bugpoint/ListReducer.h b/tools/bugpoint/ListReducer.h
index 8036d1f54499..5e9cff0efa82 100644
--- a/tools/bugpoint/ListReducer.h
+++ b/tools/bugpoint/ListReducer.h
@@ -16,6 +16,7 @@
 #define BUGPOINT_LIST_REDUCER_H
 
 #include "llvm/Support/raw_ostream.h"
+#include "llvm/Support/ErrorHandling.h"
 #include <vector>
 #include <cstdlib>
 #include <algorithm>
@@ -29,7 +30,8 @@ struct ListReducer {
   enum TestResult {
     NoFailure,         // No failure of the predicate was detected
     KeepSuffix,        // The suffix alone satisfies the predicate
-    KeepPrefix         // The prefix alone satisfies the predicate
+    KeepPrefix,        // The prefix alone satisfies the predicate
+    InternalError      // Encountered an error trying to run the predicate
   };
 
   virtual ~ListReducer() {}
@@ -40,16 +42,17 @@ struct ListReducer {
   // the prefix anyway, it can.
   //
   virtual TestResult doTest(std::vector<ElTy> &Prefix,
-                            std::vector<ElTy> &Kept) = 0;
+                            std::vector<ElTy> &Kept,
+                            std::string &Error) = 0;
 
   // reduceList - This function attempts to reduce the length of the specified
   // list while still maintaining the "test" property.  This is the core of the
   // "work" that bugpoint does.
   //
-  bool reduceList(std::vector<ElTy> &TheList) {
+  bool reduceList(std::vector<ElTy> &TheList, std::string &Error) {
     std::vector<ElTy> empty;
     std::srand(0x6e5ea738); // Seed the random number generator
-    switch (doTest(TheList, empty)) {
+    switch (doTest(TheList, empty, Error)) {
     case KeepPrefix:
       if (TheList.size() == 1) // we are done, it's the base case and it fails
         return true;
@@ -58,11 +61,15 @@ struct ListReducer {
 
     case KeepSuffix:
       // cannot be reached!
-      errs() << "bugpoint ListReducer internal error: selected empty set.\n";
-      abort();
+      llvm_unreachable("bugpoint ListReducer internal error: "
+                       "selected empty set.");
 
     case NoFailure:
       return false; // there is no failure with the full set of passes/funcs!
+
+    case InternalError:
+      assert(!Error.empty());
+      return true;
     }
 
     // Maximal number of allowed splitting iterations,
@@ -90,7 +97,7 @@ Backjump:
         std::random_shuffle(ShuffledList.begin(), ShuffledList.end());
         errs() << "\n\n*** Testing shuffled set...\n\n";
         // Check that random shuffle doesn't loose the bug
-        if (doTest(ShuffledList, empty) == KeepPrefix) {
+        if (doTest(ShuffledList, empty, Error) == KeepPrefix) {
           // If the bug is still here, use the shuffled list.
           TheList.swap(ShuffledList);
           MidTop = TheList.size();
@@ -109,7 +116,7 @@ Backjump:
       std::vector<ElTy> Prefix(TheList.begin(), TheList.begin()+Mid);
       std::vector<ElTy> Suffix(TheList.begin()+Mid, TheList.end());
 
-      switch (doTest(Prefix, Suffix)) {
+      switch (doTest(Prefix, Suffix, Error)) {
       case KeepSuffix:
         // The property still holds.  We can just drop the prefix elements, and
         // shorten the list to the "kept" elements.
@@ -133,7 +140,10 @@ Backjump:
         MidTop = Mid;
         NumOfIterationsWithoutProgress++;
         break;
+      case InternalError:
+        return true;  // Error was set by doTest.
       }
+      assert(Error.empty() && "doTest did not return InternalError for error");
     }
 
     // Probability of backjumping from the trimming loop back to the binary
@@ -167,12 +177,14 @@ Backjump:
           std::vector<ElTy> TestList(TheList);
           TestList.erase(TestList.begin()+i);
 
-          if (doTest(EmptyList, TestList) == KeepSuffix) {
+          if (doTest(EmptyList, TestList, Error) == KeepSuffix) {
             // We can trim down the list!
             TheList.swap(TestList);
             --i;  // Don't skip an element of the list
             Changed = true;
           }
+	  if (!Error.empty())
+	    return true;
         }
         // This can take a long time if left uncontrolled.  For now, don't
         // iterate.
diff --git a/tools/bugpoint/Makefile b/tools/bugpoint/Makefile
index b821b6c7bd23..5d287ef188ae 100644
--- a/tools/bugpoint/Makefile
+++ b/tools/bugpoint/Makefile
@@ -12,6 +12,5 @@ TOOLNAME = bugpoint
 
 LINK_COMPONENTS := asmparser instrumentation scalaropts ipo \
                    linker bitreader bitwriter
-REQUIRES_EH := 1
 
 include $(LEVEL)/Makefile.common
diff --git a/tools/bugpoint/Miscompilation.cpp b/tools/bugpoint/Miscompilation.cpp
index c2b002f9dd5c..45bb745a9acc 100644
--- a/tools/bugpoint/Miscompilation.cpp
+++ b/tools/bugpoint/Miscompilation.cpp
@@ -49,7 +49,8 @@ namespace {
     ReduceMiscompilingPasses(BugDriver &bd) : BD(bd) {}
 
     virtual TestResult doTest(std::vector<const PassInfo*> &Prefix,
-                              std::vector<const PassInfo*> &Suffix);
+                              std::vector<const PassInfo*> &Suffix,
+                              std::string &Error);
   };
 }
 
@@ -58,7 +59,8 @@ namespace {
 ///
 ReduceMiscompilingPasses::TestResult
 ReduceMiscompilingPasses::doTest(std::vector<const PassInfo*> &Prefix,
-                                 std::vector<const PassInfo*> &Suffix) {
+                                 std::vector<const PassInfo*> &Suffix,
+                                 std::string &Error) {
   // First, run the program with just the Suffix passes.  If it is still broken
   // with JUST the kept passes, discard the prefix passes.
   outs() << "Checking to see if '" << getPassesString(Suffix)
@@ -74,7 +76,11 @@ ReduceMiscompilingPasses::doTest(std::vector<const PassInfo*> &Prefix,
   }
   
   // Check to see if the finished program matches the reference output...
-  if (BD.diffProgram(BitcodeResult, "", true /*delete bitcode*/)) {
+  bool Diff = BD.diffProgram(BitcodeResult, "", true /*delete bitcode*/,
+                             &Error);
+  if (!Error.empty())
+    return InternalError;
+  if (Diff) {
     outs() << " nope.\n";
     if (Suffix.empty()) {
       errs() << BD.getToolName() << ": I'm confused: the test fails when "
@@ -107,7 +113,10 @@ ReduceMiscompilingPasses::doTest(std::vector<const PassInfo*> &Prefix,
   }
 
   // If the prefix maintains the predicate by itself, only keep the prefix!
-  if (BD.diffProgram(BitcodeResult)) {
+  Diff = BD.diffProgram(BitcodeResult, "", false, &Error);
+  if (!Error.empty())
+    return InternalError;
+  if (Diff) {
     outs() << " nope.\n";
     sys::Path(BitcodeResult).eraseFromDisk();
     return KeepPrefix;
@@ -143,7 +152,10 @@ ReduceMiscompilingPasses::doTest(std::vector<const PassInfo*> &Prefix,
   }
 
   // Run the result...
-  if (BD.diffProgram(BitcodeResult, "", true/*delete bitcode*/)) {
+  Diff = BD.diffProgram(BitcodeResult, "", true /*delete bitcode*/, &Error);
+  if (!Error.empty())
+    return InternalError;
+  if (Diff) {
     outs() << " nope.\n";
     delete OriginalInput;     // We pruned down the original input...
     return KeepSuffix;
@@ -158,22 +170,34 @@ ReduceMiscompilingPasses::doTest(std::vector<const PassInfo*> &Prefix,
 namespace {
   class ReduceMiscompilingFunctions : public ListReducer<Function*> {
     BugDriver &BD;
-    bool (*TestFn)(BugDriver &, Module *, Module *);
+    bool (*TestFn)(BugDriver &, Module *, Module *, std::string &);
   public:
     ReduceMiscompilingFunctions(BugDriver &bd,
-                                bool (*F)(BugDriver &, Module *, Module *))
+                                bool (*F)(BugDriver &, Module *, Module *,
+                                          std::string &))
       : BD(bd), TestFn(F) {}
 
     virtual TestResult doTest(std::vector<Function*> &Prefix,
-                              std::vector<Function*> &Suffix) {
-      if (!Suffix.empty() && TestFuncs(Suffix))
-        return KeepSuffix;
-      if (!Prefix.empty() && TestFuncs(Prefix))
-        return KeepPrefix;
+                              std::vector<Function*> &Suffix,
+                              std::string &Error) {
+      if (!Suffix.empty()) {
+        bool Ret = TestFuncs(Suffix, Error);
+        if (!Error.empty())
+          return InternalError;
+        if (Ret)
+          return KeepSuffix;
+      }
+      if (!Prefix.empty()) {
+        bool Ret = TestFuncs(Prefix, Error);
+        if (!Error.empty())
+          return InternalError;
+        if (Ret)
+          return KeepPrefix;
+      }
       return NoFailure;
     }
 
-    bool TestFuncs(const std::vector<Function*> &Prefix);
+    int TestFuncs(const std::vector<Function*> &Prefix, std::string &Error);
   };
 }
 
@@ -184,7 +208,7 @@ namespace {
 /// returns.
 ///
 static bool TestMergedProgram(BugDriver &BD, Module *M1, Module *M2,
-                              bool DeleteInputs) {
+                              bool DeleteInputs, std::string &Error) {
   // Link the two portions of the program back to together.
   std::string ErrorMsg;
   if (!DeleteInputs) {
@@ -202,11 +226,12 @@ static bool TestMergedProgram(BugDriver &BD, Module *M1, Module *M2,
 
   // Execute the program.  If it does not match the expected output, we must
   // return true.
-  bool Broken = BD.diffProgram();
-
-  // Delete the linked module & restore the original
-  BD.swapProgramIn(OldProgram);
-  delete M1;
+  bool Broken = BD.diffProgram("", "", false, &Error);
+  if (!Error.empty()) {
+    // Delete the linked module & restore the original
+    BD.swapProgramIn(OldProgram);
+    delete M1;
+  }
   return Broken;
 }
 
@@ -214,7 +239,8 @@ static bool TestMergedProgram(BugDriver &BD, Module *M1, Module *M2,
 /// under consideration for miscompilation vs. those that are not, and test
 /// accordingly. Each group of functions becomes a separate Module.
 ///
-bool ReduceMiscompilingFunctions::TestFuncs(const std::vector<Function*>&Funcs){
+int ReduceMiscompilingFunctions::TestFuncs(const std::vector<Function*> &Funcs,
+                                           std::string &Error) {
   // Test to see if the function is misoptimized if we ONLY run it on the
   // functions listed in Funcs.
   outs() << "Checking to see if the program is misoptimized when "
@@ -231,7 +257,7 @@ bool ReduceMiscompilingFunctions::TestFuncs(const std::vector<Function*>&Funcs){
                                                  ValueMap);
 
   // Run the predicate, note that the predicate will delete both input modules.
-  return TestFn(BD, ToOptimize, ToNotOptimize);
+  return TestFn(BD, ToOptimize, ToNotOptimize, Error);
 }
 
 /// DisambiguateGlobalSymbols - Give anonymous global values names.
@@ -251,8 +277,10 @@ static void DisambiguateGlobalSymbols(Module *M) {
 /// bug.  If so, it reduces the amount of code identified.
 ///
 static bool ExtractLoops(BugDriver &BD,
-                         bool (*TestFn)(BugDriver &, Module *, Module *),
-                         std::vector<Function*> &MiscompiledFunctions) {
+                         bool (*TestFn)(BugDriver &, Module *, Module *,
+                                        std::string &),
+                         std::vector<Function*> &MiscompiledFunctions,
+                         std::string &Error) {
   bool MadeChange = false;
   while (1) {
     if (BugpointIsInterrupted) return MadeChange;
@@ -279,7 +307,11 @@ static bool ExtractLoops(BugDriver &BD,
     // has broken.  If something broke, then we'll inform the user and stop
     // extraction.
     AbstractInterpreter *AI = BD.switchToSafeInterpreter();
-    if (TestMergedProgram(BD, ToOptimizeLoopExtracted, ToNotOptimize, false)) {
+    bool Failure = TestMergedProgram(BD, ToOptimizeLoopExtracted, ToNotOptimize,
+                                     false, Error);
+    if (!Error.empty())
+      return false;
+    if (Failure) {
       BD.switchToInterpreter(AI);
 
       // Merged program doesn't work anymore!
@@ -308,7 +340,10 @@ static bool ExtractLoops(BugDriver &BD,
     // Clone modules, the tester function will free them.
     Module *TOLEBackup = CloneModule(ToOptimizeLoopExtracted);
     Module *TNOBackup  = CloneModule(ToNotOptimize);
-    if (!TestFn(BD, ToOptimizeLoopExtracted, ToNotOptimize)) {
+    Failure = TestFn(BD, ToOptimizeLoopExtracted, ToNotOptimize, Error);
+    if (!Error.empty())
+      return false;
+    if (!Failure) {
       outs() << "*** Loop extraction masked the problem.  Undoing.\n";
       // If the program is not still broken, then loop extraction did something
       // that masked the error.  Stop loop extraction now.
@@ -361,31 +396,44 @@ static bool ExtractLoops(BugDriver &BD,
 namespace {
   class ReduceMiscompiledBlocks : public ListReducer<BasicBlock*> {
     BugDriver &BD;
-    bool (*TestFn)(BugDriver &, Module *, Module *);
+    bool (*TestFn)(BugDriver &, Module *, Module *, std::string &);
     std::vector<Function*> FunctionsBeingTested;
   public:
     ReduceMiscompiledBlocks(BugDriver &bd,
-                            bool (*F)(BugDriver &, Module *, Module *),
+                            bool (*F)(BugDriver &, Module *, Module *,
+                                      std::string &),
                             const std::vector<Function*> &Fns)
       : BD(bd), TestFn(F), FunctionsBeingTested(Fns) {}
 
     virtual TestResult doTest(std::vector<BasicBlock*> &Prefix,
-                              std::vector<BasicBlock*> &Suffix) {
-      if (!Suffix.empty() && TestFuncs(Suffix))
-        return KeepSuffix;
-      if (TestFuncs(Prefix))
-        return KeepPrefix;
+                              std::vector<BasicBlock*> &Suffix,
+                              std::string &Error) {
+      if (!Suffix.empty()) {
+        bool Ret = TestFuncs(Suffix, Error);
+        if (!Error.empty())
+          return InternalError;
+        if (Ret)
+          return KeepSuffix;
+      }
+      if (!Prefix.empty()) {
+        bool Ret = TestFuncs(Prefix, Error);
+        if (!Error.empty())
+          return InternalError;
+        if (Ret)
+          return KeepPrefix;
+      }
       return NoFailure;
     }
 
-    bool TestFuncs(const std::vector<BasicBlock*> &Prefix);
+    bool TestFuncs(const std::vector<BasicBlock*> &BBs, std::string &Error);
   };
 }
 
 /// TestFuncs - Extract all blocks for the miscompiled functions except for the
 /// specified blocks.  If the problem still exists, return true.
 ///
-bool ReduceMiscompiledBlocks::TestFuncs(const std::vector<BasicBlock*> &BBs) {
+bool ReduceMiscompiledBlocks::TestFuncs(const std::vector<BasicBlock*> &BBs,
+                                        std::string &Error) {
   // Test to see if the function is misoptimized if we ONLY run it on the
   // functions listed in Funcs.
   outs() << "Checking to see if the program is misoptimized when all ";
@@ -411,7 +459,7 @@ bool ReduceMiscompiledBlocks::TestFuncs(const std::vector<BasicBlock*> &BBs) {
   if (Module *New = BD.ExtractMappedBlocksFromModule(BBs, ToOptimize)) {
     delete ToOptimize;
     // Run the predicate, not that the predicate will delete both input modules.
-    return TestFn(BD, New, ToNotOptimize);
+    return TestFn(BD, New, ToNotOptimize, Error);
   }
   delete ToOptimize;
   delete ToNotOptimize;
@@ -424,8 +472,10 @@ bool ReduceMiscompiledBlocks::TestFuncs(const std::vector<BasicBlock*> &BBs) {
 /// the bug.
 ///
 static bool ExtractBlocks(BugDriver &BD,
-                          bool (*TestFn)(BugDriver &, Module *, Module *),
-                          std::vector<Function*> &MiscompiledFunctions) {
+                          bool (*TestFn)(BugDriver &, Module *, Module *,
+                                         std::string &),
+                          std::vector<Function*> &MiscompiledFunctions,
+                          std::string &Error) {
   if (BugpointIsInterrupted) return false;
   
   std::vector<BasicBlock*> Blocks;
@@ -440,11 +490,17 @@ static bool ExtractBlocks(BugDriver &BD,
   unsigned OldSize = Blocks.size();
 
   // Check to see if all blocks are extractible first.
-  if (ReduceMiscompiledBlocks(BD, TestFn,
-                  MiscompiledFunctions).TestFuncs(std::vector<BasicBlock*>())) {
+  bool Ret = ReduceMiscompiledBlocks(BD, TestFn, MiscompiledFunctions)
+                                  .TestFuncs(std::vector<BasicBlock*>(), Error);
+  if (!Error.empty())
+    return false;
+  if (Ret) {
     Blocks.clear();
   } else {
-    ReduceMiscompiledBlocks(BD, TestFn,MiscompiledFunctions).reduceList(Blocks);
+    ReduceMiscompiledBlocks(BD, TestFn,
+                            MiscompiledFunctions).reduceList(Blocks, Error);
+    if (!Error.empty())
+      return false;
     if (Blocks.size() == OldSize)
       return false;
   }
@@ -505,7 +561,9 @@ static bool ExtractBlocks(BugDriver &BD,
 ///
 static std::vector<Function*>
 DebugAMiscompilation(BugDriver &BD,
-                     bool (*TestFn)(BugDriver &, Module *, Module *)) {
+                     bool (*TestFn)(BugDriver &, Module *, Module *,
+                                    std::string &),
+                     std::string &Error) {
   // Okay, now that we have reduced the list of passes which are causing the
   // failure, see if we can pin down which functions are being
   // miscompiled... first build a list of all of the non-external functions in
@@ -518,7 +576,10 @@ DebugAMiscompilation(BugDriver &BD,
 
   // Do the reduction...
   if (!BugpointIsInterrupted)
-    ReduceMiscompilingFunctions(BD, TestFn).reduceList(MiscompiledFunctions);
+    ReduceMiscompilingFunctions(BD, TestFn).reduceList(MiscompiledFunctions,
+                                                       Error);
+  if (!Error.empty())
+    return MiscompiledFunctions;
 
   outs() << "\n*** The following function"
          << (MiscompiledFunctions.size() == 1 ? " is" : "s are")
@@ -529,37 +590,51 @@ DebugAMiscompilation(BugDriver &BD,
   // See if we can rip any loops out of the miscompiled functions and still
   // trigger the problem.
 
-  if (!BugpointIsInterrupted && !DisableLoopExtraction &&
-      ExtractLoops(BD, TestFn, MiscompiledFunctions)) {
-    // Okay, we extracted some loops and the problem still appears.  See if we
-    // can eliminate some of the created functions from being candidates.
-    DisambiguateGlobalSymbols(BD.getProgram());
-
-    // Do the reduction...
-    if (!BugpointIsInterrupted)
-      ReduceMiscompilingFunctions(BD, TestFn).reduceList(MiscompiledFunctions);
-
-    outs() << "\n*** The following function"
-           << (MiscompiledFunctions.size() == 1 ? " is" : "s are")
-           << " being miscompiled: ";
-    PrintFunctionList(MiscompiledFunctions);
-    outs() << '\n';
+  if (!BugpointIsInterrupted && !DisableLoopExtraction) {
+    bool Ret = ExtractLoops(BD, TestFn, MiscompiledFunctions, Error);
+    if (!Error.empty())
+      return MiscompiledFunctions;
+    if (Ret) {
+      // Okay, we extracted some loops and the problem still appears.  See if
+      // we can eliminate some of the created functions from being candidates.
+      DisambiguateGlobalSymbols(BD.getProgram());
+
+      // Do the reduction...
+      if (!BugpointIsInterrupted)
+        ReduceMiscompilingFunctions(BD, TestFn).reduceList(MiscompiledFunctions,
+                                                           Error);
+      if (!Error.empty())
+        return MiscompiledFunctions;
+
+      outs() << "\n*** The following function"
+             << (MiscompiledFunctions.size() == 1 ? " is" : "s are")
+             << " being miscompiled: ";
+      PrintFunctionList(MiscompiledFunctions);
+      outs() << '\n';
+    }
   }
 
-  if (!BugpointIsInterrupted && !DisableBlockExtraction && 
-      ExtractBlocks(BD, TestFn, MiscompiledFunctions)) {
-    // Okay, we extracted some blocks and the problem still appears.  See if we
-    // can eliminate some of the created functions from being candidates.
-    DisambiguateGlobalSymbols(BD.getProgram());
-
-    // Do the reduction...
-    ReduceMiscompilingFunctions(BD, TestFn).reduceList(MiscompiledFunctions);
-
-    outs() << "\n*** The following function"
-           << (MiscompiledFunctions.size() == 1 ? " is" : "s are")
-           << " being miscompiled: ";
-    PrintFunctionList(MiscompiledFunctions);
-    outs() << '\n';
+  if (!BugpointIsInterrupted && !DisableBlockExtraction) {
+    bool Ret = ExtractBlocks(BD, TestFn, MiscompiledFunctions, Error);
+    if (!Error.empty())
+      return MiscompiledFunctions;
+    if (Ret) {
+      // Okay, we extracted some blocks and the problem still appears.  See if
+      // we can eliminate some of the created functions from being candidates.
+      DisambiguateGlobalSymbols(BD.getProgram());
+
+      // Do the reduction...
+      ReduceMiscompilingFunctions(BD, TestFn).reduceList(MiscompiledFunctions,
+                                                         Error);
+      if (!Error.empty())
+        return MiscompiledFunctions;
+
+      outs() << "\n*** The following function"
+             << (MiscompiledFunctions.size() == 1 ? " is" : "s are")
+             << " being miscompiled: ";
+      PrintFunctionList(MiscompiledFunctions);
+      outs() << '\n';
+    }
   }
 
   return MiscompiledFunctions;
@@ -569,7 +644,8 @@ DebugAMiscompilation(BugDriver &BD,
 /// "Test" portion of the program is misoptimized.  If so, return true.  In any
 /// case, both module arguments are deleted.
 ///
-static bool TestOptimizer(BugDriver &BD, Module *Test, Module *Safe) {
+static bool TestOptimizer(BugDriver &BD, Module *Test, Module *Safe,
+                          std::string &Error) {
   // Run the optimization passes on ToOptimize, producing a transformed version
   // of the functions being tested.
   outs() << "  Optimizing functions being tested: ";
@@ -579,8 +655,8 @@ static bool TestOptimizer(BugDriver &BD, Module *Test, Module *Safe) {
   delete Test;
 
   outs() << "  Checking to see if the merged program executes correctly: ";
-  bool Broken = TestMergedProgram(BD, Optimized, Safe, true);
-  outs() << (Broken ? " nope.\n" : " yup.\n");
+  bool Broken = TestMergedProgram(BD, Optimized, Safe, true, Error);
+  if (Error.empty()) outs() << (Broken ? " nope.\n" : " yup.\n");
   return Broken;
 }
 
@@ -589,13 +665,14 @@ static bool TestOptimizer(BugDriver &BD, Module *Test, Module *Safe) {
 /// crashing, but the generated output is semantically different from the
 /// input.
 ///
-bool BugDriver::debugMiscompilation() {
+void BugDriver::debugMiscompilation(std::string *Error) {
   // Make sure something was miscompiled...
   if (!BugpointIsInterrupted)
-    if (!ReduceMiscompilingPasses(*this).reduceList(PassesToRun)) {
-      errs() << "*** Optimized program matches reference output!  No problem"
-             << " detected...\nbugpoint can't help you with your problem!\n";
-      return false;
+    if (!ReduceMiscompilingPasses(*this).reduceList(PassesToRun, *Error)) {
+      if (Error->empty())
+        errs() << "*** Optimized program matches reference output!  No problem"
+               << " detected...\nbugpoint can't help you with your problem!\n";
+      return;
     }
 
   outs() << "\n*** Found miscompiling pass"
@@ -603,8 +680,10 @@ bool BugDriver::debugMiscompilation() {
          << getPassesString(getPassesToRun()) << '\n';
   EmitProgressBitcode("passinput");
 
-  std::vector<Function*> MiscompiledFunctions =
-    DebugAMiscompilation(*this, TestOptimizer);
+  std::vector<Function *> MiscompiledFunctions = 
+    DebugAMiscompilation(*this, TestOptimizer, *Error);
+  if (!Error->empty())
+    return;
 
   // Output a bunch of bitcode files for the user...
   outs() << "Outputting reduced bitcode files which expose the problem:\n";
@@ -624,7 +703,7 @@ bool BugDriver::debugMiscompilation() {
   EmitProgressBitcode("tooptimize");
   setNewProgram(ToOptimize);      // Delete hacked module.
 
-  return false;
+  return;
 }
 
 /// CleanupAndPrepareModules - Get the specified modules ready for code
@@ -797,7 +876,8 @@ static void CleanupAndPrepareModules(BugDriver &BD, Module *&Test,
 /// the "Test" portion of the program is miscompiled by the code generator under
 /// test.  If so, return true.  In any case, both module arguments are deleted.
 ///
-static bool TestCodeGenerator(BugDriver &BD, Module *Test, Module *Safe) {
+static bool TestCodeGenerator(BugDriver &BD, Module *Test, Module *Safe,
+                              std::string &Error) {
   CleanupAndPrepareModules(BD, Test, Safe);
 
   sys::Path TestModuleBC("bugpoint.test.bc");
@@ -827,12 +907,16 @@ static bool TestCodeGenerator(BugDriver &BD, Module *Test, Module *Safe) {
            << "'\nExiting.";
     exit(1);
   }
-  std::string SharedObject = BD.compileSharedObject(SafeModuleBC.str());
+  std::string SharedObject = BD.compileSharedObject(SafeModuleBC.str(), Error);
+  if (!Error.empty())
+    return false;
   delete Safe;
 
   // Run the code generator on the `Test' code, loading the shared library.
   // The function returns whether or not the new output differs from reference.
-  int Result = BD.diffProgram(TestModuleBC.str(), SharedObject, false);
+  bool Result = BD.diffProgram(TestModuleBC.str(), SharedObject, false, &Error);
+  if (!Error.empty())
+    return false;
 
   if (Result)
     errs() << ": still failing!\n";
@@ -848,23 +932,28 @@ static bool TestCodeGenerator(BugDriver &BD, Module *Test, Module *Safe) {
 
 /// debugCodeGenerator - debug errors in LLC, LLI, or CBE.
 ///
-bool BugDriver::debugCodeGenerator() {
+bool BugDriver::debugCodeGenerator(std::string *Error) {
   if ((void*)SafeInterpreter == (void*)Interpreter) {
-    std::string Result = executeProgramSafely("bugpoint.safe.out");
-    outs() << "\n*** The \"safe\" i.e. 'known good' backend cannot match "
-           << "the reference diff.  This may be due to a\n    front-end "
-           << "bug or a bug in the original program, but this can also "
-           << "happen if bugpoint isn't running the program with the "
-           << "right flags or input.\n    I left the result of executing "
-           << "the program with the \"safe\" backend in this file for "
-           << "you: '"
-           << Result << "'.\n";
+    std::string Result = executeProgramSafely("bugpoint.safe.out", Error);
+    if (Error->empty()) {
+      outs() << "\n*** The \"safe\" i.e. 'known good' backend cannot match "
+             << "the reference diff.  This may be due to a\n    front-end "
+             << "bug or a bug in the original program, but this can also "
+             << "happen if bugpoint isn't running the program with the "
+             << "right flags or input.\n    I left the result of executing "
+             << "the program with the \"safe\" backend in this file for "
+             << "you: '"
+             << Result << "'.\n";
+    }
     return true;
   }
 
   DisambiguateGlobalSymbols(Program);
 
-  std::vector<Function*> Funcs = DebugAMiscompilation(*this, TestCodeGenerator);
+  std::vector<Function*> Funcs = DebugAMiscompilation(*this, TestCodeGenerator,
+                                                      *Error);
+  if (!Error->empty())
+    return true;
 
   // Split the module into the two halves of the program we want.
   DenseMap<const Value*, Value*> ValueMap;
@@ -902,14 +991,16 @@ bool BugDriver::debugCodeGenerator() {
            << "'\nExiting.";
     exit(1);
   }
-  std::string SharedObject = compileSharedObject(SafeModuleBC.str());
+  std::string SharedObject = compileSharedObject(SafeModuleBC.str(), *Error);
+  if (!Error->empty())
+    return true;
   delete ToNotCodeGen;
 
   outs() << "You can reproduce the problem with the command line: \n";
   if (isExecutingJIT()) {
     outs() << "  lli -load " << SharedObject << " " << TestModuleBC.str();
   } else {
-    outs() << "  llc -f " << TestModuleBC.str() << " -o " << TestModuleBC.str()
+    outs() << "  llc " << TestModuleBC.str() << " -o " << TestModuleBC.str()
            << ".s\n";
     outs() << "  gcc " << SharedObject << " " << TestModuleBC.str()
               << ".s -o " << TestModuleBC.str() << ".exe";
@@ -919,7 +1010,7 @@ bool BugDriver::debugCodeGenerator() {
     outs() << "\n";
     outs() << "  " << TestModuleBC.str() << ".exe";
   }
-  for (unsigned i=0, e = InputArgv.size(); i != e; ++i)
+  for (unsigned i = 0, e = InputArgv.size(); i != e; ++i)
     outs() << " " << InputArgv[i];
   outs() << '\n';
   outs() << "The shared object was created with:\n  llc -march=c "
diff --git a/tools/bugpoint/ToolRunner.cpp b/tools/bugpoint/ToolRunner.cpp
index 62df0f1dff07..7ade77840237 100644
--- a/tools/bugpoint/ToolRunner.cpp
+++ b/tools/bugpoint/ToolRunner.cpp
@@ -50,11 +50,9 @@ namespace {
           cl::desc("Remote execution (rsh/ssh) extra options"));
 }
 
-ToolExecutionError::~ToolExecutionError() throw() { }
-
 /// RunProgramWithTimeout - This function provides an alternate interface
 /// to the sys::Program::ExecuteAndWait interface.
-/// @see sys:Program::ExecuteAndWait
+/// @see sys::Program::ExecuteAndWait
 static int RunProgramWithTimeout(const sys::Path &ProgramPath,
                                  const char **Args,
                                  const sys::Path &StdInFile,
@@ -86,7 +84,7 @@ static int RunProgramWithTimeout(const sys::Path &ProgramPath,
 /// Returns the remote program exit code or reports a remote client error if it
 /// fails. Remote client is required to return 255 if it failed or program exit
 /// code otherwise.
-/// @see sys:Program::ExecuteAndWait
+/// @see sys::Program::ExecuteAndWait
 static int RunProgramRemotelyWithTimeout(const sys::Path &RemoteClientPath,
                                          const char **Args,
                                          const sys::Path &StdInFile,
@@ -129,13 +127,13 @@ static int RunProgramRemotelyWithTimeout(const sys::Path &RemoteClientPath,
       ErrorFile.close();
     }
 
-    throw ToolExecutionError(OS.str());
+    errs() << OS;
   }
 
   return ReturnCode;
 }
 
-static void ProcessFailure(sys::Path ProgPath, const char** Args) {
+static std::string ProcessFailure(sys::Path ProgPath, const char** Args) {
   std::ostringstream OS;
   OS << "\nError running tool:\n ";
   for (const char **Arg = Args; *Arg; ++Arg)
@@ -162,7 +160,7 @@ static void ProcessFailure(sys::Path ProgPath, const char** Args) {
   }
 
   ErrorFilename.eraseFromDisk();
-  throw ToolExecutionError(OS.str());
+  return OS.str();
 }
 
 //===---------------------------------------------------------------------===//
@@ -183,6 +181,7 @@ namespace {
                                const std::vector<std::string> &Args,
                                const std::string &InputFile,
                                const std::string &OutputFile,
+                               std::string *Error,
                                const std::vector<std::string> &GCCArgs,
                                const std::vector<std::string> &SharedLibs =
                                std::vector<std::string>(),
@@ -195,6 +194,7 @@ int LLI::ExecuteProgram(const std::string &Bitcode,
                         const std::vector<std::string> &Args,
                         const std::string &InputFile,
                         const std::string &OutputFile,
+                        std::string *Error,
                         const std::vector<std::string> &GCCArgs,
                         const std::vector<std::string> &SharedLibs,
                         unsigned Timeout,
@@ -263,9 +263,10 @@ namespace {
                                const std::vector<std::string> &Args,
                                const std::string &InputFile,
                                const std::string &OutputFile,
+                               std::string *Error,
                                const std::vector<std::string> &GCCArgs,
                                const std::vector<std::string> &SharedLibs =
-                               std::vector<std::string>(),
+                                 std::vector<std::string>(),
                                unsigned Timeout = 0,
                                unsigned MemoryLimit = 0);
   };
@@ -275,6 +276,7 @@ int CustomExecutor::ExecuteProgram(const std::string &Bitcode,
                         const std::vector<std::string> &Args,
                         const std::string &InputFile,
                         const std::string &OutputFile,
+                        std::string *Error,
                         const std::vector<std::string> &GCCArgs,
                         const std::vector<std::string> &SharedLibs,
                         unsigned Timeout,
@@ -289,7 +291,7 @@ int CustomExecutor::ExecuteProgram(const std::string &Bitcode,
   ProgramArgs.push_back(0);
 
   // Add optional parameters to the running program from Argv
-  for (unsigned i=0, e = Args.size(); i != e; ++i)
+  for (unsigned i = 0, e = Args.size(); i != e; ++i)
     ProgramArgs.push_back(Args[i].c_str());
 
   return RunProgramWithTimeout(
@@ -351,7 +353,7 @@ AbstractInterpreter *AbstractInterpreter::createCustom(
 // LLC Implementation of AbstractIntepreter interface
 //
 GCC::FileType LLC::OutputCode(const std::string &Bitcode, 
-                              sys::Path &OutputAsmFile) {
+                              sys::Path &OutputAsmFile, std::string &Error) {
   const char *Suffix = (UseIntegratedAssembler ? ".llc.o" : ".llc.s");
   sys::Path uniqueFile(Bitcode + Suffix);
   std::string ErrMsg;
@@ -379,20 +381,19 @@ GCC::FileType LLC::OutputCode(const std::string &Bitcode,
   outs() << (UseIntegratedAssembler ? "<llc-ia>" : "<llc>");
   outs().flush();
   DEBUG(errs() << "\nAbout to run:\t";
-        for (unsigned i=0, e = LLCArgs.size()-1; i != e; ++i)
+        for (unsigned i = 0, e = LLCArgs.size()-1; i != e; ++i)
           errs() << " " << LLCArgs[i];
         errs() << "\n";
         );
   if (RunProgramWithTimeout(sys::Path(LLCPath), &LLCArgs[0],
                             sys::Path(), sys::Path(), sys::Path()))
-    ProcessFailure(sys::Path(LLCPath), &LLCArgs[0]);
-
-  return UseIntegratedAssembler ? GCC::ObjectFile : GCC::AsmFile;
+    Error = ProcessFailure(sys::Path(LLCPath), &LLCArgs[0]);
+  return UseIntegratedAssembler ? GCC::ObjectFile : GCC::AsmFile;  
 }
 
-void LLC::compileProgram(const std::string &Bitcode) {
+void LLC::compileProgram(const std::string &Bitcode, std::string *Error) {
   sys::Path OutputAsmFile;
-  OutputCode(Bitcode, OutputAsmFile);
+  OutputCode(Bitcode, OutputAsmFile, *Error);
   OutputAsmFile.eraseFromDisk();
 }
 
@@ -400,13 +401,14 @@ int LLC::ExecuteProgram(const std::string &Bitcode,
                         const std::vector<std::string> &Args,
                         const std::string &InputFile,
                         const std::string &OutputFile,
+                        std::string *Error,
                         const std::vector<std::string> &ArgsForGCC,
                         const std::vector<std::string> &SharedLibs,
                         unsigned Timeout,
                         unsigned MemoryLimit) {
 
   sys::Path OutputAsmFile;
-  GCC::FileType FileKind = OutputCode(Bitcode, OutputAsmFile);
+  GCC::FileType FileKind = OutputCode(Bitcode, OutputAsmFile, *Error);
   FileRemover OutFileRemover(OutputAsmFile, !SaveTemps);
 
   std::vector<std::string> GCCArgs(ArgsForGCC);
@@ -415,7 +417,7 @@ int LLC::ExecuteProgram(const std::string &Bitcode,
 
   // Assuming LLC worked, compile the result with GCC and run it.
   return gcc->ExecuteProgram(OutputAsmFile.str(), Args, FileKind,
-                             InputFile, OutputFile, GCCArgs,
+                             InputFile, OutputFile, Error, GCCArgs,
                              Timeout, MemoryLimit);
 }
 
@@ -460,12 +462,13 @@ namespace {
                                const std::vector<std::string> &Args,
                                const std::string &InputFile,
                                const std::string &OutputFile,
+                               std::string *Error,
                                const std::vector<std::string> &GCCArgs =
                                  std::vector<std::string>(),
                                const std::vector<std::string> &SharedLibs =
                                  std::vector<std::string>(), 
-                               unsigned Timeout =0,
-                               unsigned MemoryLimit =0);
+                               unsigned Timeout = 0,
+                               unsigned MemoryLimit = 0);
   };
 }
 
@@ -473,6 +476,7 @@ int JIT::ExecuteProgram(const std::string &Bitcode,
                         const std::vector<std::string> &Args,
                         const std::string &InputFile,
                         const std::string &OutputFile,
+                        std::string *Error,
                         const std::vector<std::string> &GCCArgs,
                         const std::vector<std::string> &SharedLibs,
                         unsigned Timeout,
@@ -524,7 +528,7 @@ AbstractInterpreter *AbstractInterpreter::createJIT(const char *Argv0,
 }
 
 GCC::FileType CBE::OutputCode(const std::string &Bitcode,
-                              sys::Path &OutputCFile) {
+                              sys::Path &OutputCFile, std::string &Error) {
   sys::Path uniqueFile(Bitcode+".cbe.c");
   std::string ErrMsg;
   if (uniqueFile.makeUnique(true, &ErrMsg)) {
@@ -533,34 +537,33 @@ GCC::FileType CBE::OutputCode(const std::string &Bitcode,
   }
   OutputCFile = uniqueFile;
   std::vector<const char *> LLCArgs;
-  LLCArgs.push_back (LLCPath.c_str());
+  LLCArgs.push_back(LLCPath.c_str());
 
   // Add any extra LLC args.
   for (unsigned i = 0, e = ToolArgs.size(); i != e; ++i)
     LLCArgs.push_back(ToolArgs[i].c_str());
 
-  LLCArgs.push_back ("-o");
-  LLCArgs.push_back (OutputCFile.c_str());   // Output to the C file
-  LLCArgs.push_back ("-march=c");            // Output C language
-  LLCArgs.push_back ("-f");                  // Overwrite as necessary...
-  LLCArgs.push_back (Bitcode.c_str());      // This is the input bitcode
-  LLCArgs.push_back (0);
+  LLCArgs.push_back("-o");
+  LLCArgs.push_back(OutputCFile.c_str());   // Output to the C file
+  LLCArgs.push_back("-march=c");            // Output C language
+  LLCArgs.push_back(Bitcode.c_str());      // This is the input bitcode
+  LLCArgs.push_back(0);
 
   outs() << "<cbe>"; outs().flush();
   DEBUG(errs() << "\nAbout to run:\t";
-        for (unsigned i=0, e = LLCArgs.size()-1; i != e; ++i)
+        for (unsigned i = 0, e = LLCArgs.size()-1; i != e; ++i)
           errs() << " " << LLCArgs[i];
         errs() << "\n";
         );
   if (RunProgramWithTimeout(LLCPath, &LLCArgs[0], sys::Path(), sys::Path(),
                             sys::Path()))
-    ProcessFailure(LLCPath, &LLCArgs[0]);
+    Error = ProcessFailure(LLCPath, &LLCArgs[0]);
   return GCC::CFile;
 }
 
-void CBE::compileProgram(const std::string &Bitcode) {
+void CBE::compileProgram(const std::string &Bitcode, std::string *Error) {
   sys::Path OutputCFile;
-  OutputCode(Bitcode, OutputCFile);
+  OutputCode(Bitcode, OutputCFile, *Error);
   OutputCFile.eraseFromDisk();
 }
 
@@ -568,12 +571,13 @@ int CBE::ExecuteProgram(const std::string &Bitcode,
                         const std::vector<std::string> &Args,
                         const std::string &InputFile,
                         const std::string &OutputFile,
+                        std::string *Error,
                         const std::vector<std::string> &ArgsForGCC,
                         const std::vector<std::string> &SharedLibs,
                         unsigned Timeout,
                         unsigned MemoryLimit) {
   sys::Path OutputCFile;
-  OutputCode(Bitcode, OutputCFile);
+  OutputCode(Bitcode, OutputCFile, *Error);
 
   FileRemover CFileRemove(OutputCFile, !SaveTemps);
 
@@ -581,7 +585,7 @@ int CBE::ExecuteProgram(const std::string &Bitcode,
   GCCArgs.insert(GCCArgs.end(), SharedLibs.begin(), SharedLibs.end());
 
   return gcc->ExecuteProgram(OutputCFile.str(), Args, GCC::CFile,
-                             InputFile, OutputFile, GCCArgs,
+                             InputFile, OutputFile, Error, GCCArgs,
                              Timeout, MemoryLimit);
 }
 
@@ -631,6 +635,7 @@ int GCC::ExecuteProgram(const std::string &ProgramFile,
                         FileType fileType,
                         const std::string &InputFile,
                         const std::string &OutputFile,
+                        std::string *Error,
                         const std::vector<std::string> &ArgsForGCC,
                         unsigned Timeout,
                         unsigned MemoryLimit) {
@@ -694,14 +699,14 @@ int GCC::ExecuteProgram(const std::string &ProgramFile,
 
   outs() << "<gcc>"; outs().flush();
   DEBUG(errs() << "\nAbout to run:\t";
-        for (unsigned i=0, e = GCCArgs.size()-1; i != e; ++i)
+        for (unsigned i = 0, e = GCCArgs.size()-1; i != e; ++i)
           errs() << " " << GCCArgs[i];
         errs() << "\n";
         );
   if (RunProgramWithTimeout(GCCPath, &GCCArgs[0], sys::Path(), sys::Path(),
         sys::Path())) {
-    ProcessFailure(GCCPath, &GCCArgs[0]);
-    exit(1);
+    *Error = ProcessFailure(GCCPath, &GCCArgs[0]);
+    return -1;
   }
 
   std::vector<const char*> ProgramArgs;
@@ -734,14 +739,14 @@ int GCC::ExecuteProgram(const std::string &ProgramFile,
   }
 
   // Add optional parameters to the running program from Argv
-  for (unsigned i=0, e = Args.size(); i != e; ++i)
+  for (unsigned i = 0, e = Args.size(); i != e; ++i)
     ProgramArgs.push_back(Args[i].c_str());
   ProgramArgs.push_back(0);                // NULL terminator
 
   // Now that we have a binary, run it!
   outs() << "<program>"; outs().flush();
   DEBUG(errs() << "\nAbout to run:\t";
-        for (unsigned i=0, e = ProgramArgs.size()-1; i != e; ++i)
+        for (unsigned i = 0, e = ProgramArgs.size()-1; i != e; ++i)
           errs() << " " << ProgramArgs[i];
         errs() << "\n";
         );
@@ -749,7 +754,7 @@ int GCC::ExecuteProgram(const std::string &ProgramFile,
   FileRemover OutputBinaryRemover(OutputBinary, !SaveTemps);
 
   if (RemoteClientPath.isEmpty()) {
-    DEBUG(errs() << "<run locally>";);
+    DEBUG(errs() << "<run locally>");
     return RunProgramWithTimeout(OutputBinary, &ProgramArgs[0],
         sys::Path(InputFile), sys::Path(OutputFile), sys::Path(OutputFile),
         Timeout, MemoryLimit);
@@ -763,7 +768,8 @@ int GCC::ExecuteProgram(const std::string &ProgramFile,
 
 int GCC::MakeSharedObject(const std::string &InputFile, FileType fileType,
                           std::string &OutputFile,
-                          const std::vector<std::string> &ArgsForGCC) {
+                          const std::vector<std::string> &ArgsForGCC,
+                          std::string &Error) {
   sys::Path uniqueFilename(InputFile+LTDL_SHLIB_EXT);
   std::string ErrMsg;
   if (uniqueFilename.makeUnique(true, &ErrMsg)) {
@@ -831,13 +837,13 @@ int GCC::MakeSharedObject(const std::string &InputFile, FileType fileType,
 
   outs() << "<gcc>"; outs().flush();
   DEBUG(errs() << "\nAbout to run:\t";
-        for (unsigned i=0, e = GCCArgs.size()-1; i != e; ++i)
+        for (unsigned i = 0, e = GCCArgs.size()-1; i != e; ++i)
           errs() << " " << GCCArgs[i];
         errs() << "\n";
         );
   if (RunProgramWithTimeout(GCCPath, &GCCArgs[0], sys::Path(), sys::Path(),
                             sys::Path())) {
-    ProcessFailure(GCCPath, &GCCArgs[0]);
+    Error = ProcessFailure(GCCPath, &GCCArgs[0]);
     return 1;
   }
   return 0;
diff --git a/tools/bugpoint/ToolRunner.h b/tools/bugpoint/ToolRunner.h
index 0f75e99db4be..cba10f214a90 100644
--- a/tools/bugpoint/ToolRunner.h
+++ b/tools/bugpoint/ToolRunner.h
@@ -19,6 +19,7 @@
 
 #include "llvm/ADT/Triple.h"
 #include "llvm/Support/CommandLine.h"
+#include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/SystemUtils.h"
 #include "llvm/System/Path.h"
 #include <exception>
@@ -32,19 +33,6 @@ extern Triple TargetTriple;
 class CBE;
 class LLC;
 
-/// ToolExecutionError - An instance of this class is thrown by the
-/// AbstractInterpreter instances if there is an error running a tool (e.g., LLC
-/// crashes) which prevents execution of the program.
-///
-class ToolExecutionError : std::exception {
-  std::string Message;
-public:
-  explicit ToolExecutionError(const std::string &M) : Message(M) {}
-  virtual ~ToolExecutionError() throw();
-  virtual const char* what() const throw() { return Message.c_str(); }
-};
-
-
 //===---------------------------------------------------------------------===//
 // GCC abstraction
 //
@@ -75,6 +63,7 @@ public:
                      FileType fileType,
                      const std::string &InputFile,
                      const std::string &OutputFile,
+		     std::string *Error = 0,
                      const std::vector<std::string> &GCCArgs =
                          std::vector<std::string>(), 
                      unsigned Timeout = 0,
@@ -85,7 +74,8 @@ public:
   ///
   int MakeSharedObject(const std::string &InputFile, FileType fileType,
                        std::string &OutputFile,
-                       const std::vector<std::string> &ArgsForGCC);
+                       const std::vector<std::string> &ArgsForGCC,
+                       std::string &Error);
 };
 
 
@@ -118,26 +108,29 @@ public:
 
   /// compileProgram - Compile the specified program from bitcode to executable
   /// code.  This does not produce any output, it is only used when debugging
-  /// the code generator.  If the code generator fails, an exception should be
-  /// thrown, otherwise, this function will just return.
-  virtual void compileProgram(const std::string &Bitcode) {}
+  /// the code generator.  It returns false if the code generator fails.
+  virtual void compileProgram(const std::string &Bitcode, std::string *Error) {}
 
   /// OutputCode - Compile the specified program from bitcode to code
   /// understood by the GCC driver (either C or asm).  If the code generator
-  /// fails, an exception should be thrown, otherwise, this function returns the
-  /// type of code emitted.
+  /// fails, it sets Error, otherwise, this function returns the type of code
+  /// emitted.
   virtual GCC::FileType OutputCode(const std::string &Bitcode,
-                                   sys::Path &OutFile) {
-    throw std::string("OutputCode not supported by this AbstractInterpreter!");
+                                   sys::Path &OutFile, std::string &Error) {
+    Error = "OutputCode not supported by this AbstractInterpreter!";
+    return GCC::AsmFile;
   }
-  
+
   /// ExecuteProgram - Run the specified bitcode file, emitting output to the
-  /// specified filename.  This returns the exit code of the program.
+  /// specified filename.  This sets RetVal to the exit code of the program or
+  /// returns false if a problem was encountered that prevented execution of
+  /// the program.
   ///
   virtual int ExecuteProgram(const std::string &Bitcode,
                              const std::vector<std::string> &Args,
                              const std::string &InputFile,
                              const std::string &OutputFile,
+                             std::string *Error,
                              const std::vector<std::string> &GCCArgs =
                                std::vector<std::string>(),
                              const std::vector<std::string> &SharedLibs =
@@ -164,14 +157,14 @@ public:
 
   /// compileProgram - Compile the specified program from bitcode to executable
   /// code.  This does not produce any output, it is only used when debugging
-  /// the code generator.  If the code generator fails, an exception should be
-  /// thrown, otherwise, this function will just return.
-  virtual void compileProgram(const std::string &Bitcode);
+  /// the code generator.  Returns false if the code generator fails.
+  virtual void compileProgram(const std::string &Bitcode, std::string *Error);
 
   virtual int ExecuteProgram(const std::string &Bitcode,
                              const std::vector<std::string> &Args,
                              const std::string &InputFile,
                              const std::string &OutputFile,
+                             std::string *Error,
                              const std::vector<std::string> &GCCArgs =
                                std::vector<std::string>(),
                              const std::vector<std::string> &SharedLibs =
@@ -181,10 +174,10 @@ public:
 
   /// OutputCode - Compile the specified program from bitcode to code
   /// understood by the GCC driver (either C or asm).  If the code generator
-  /// fails, an exception should be thrown, otherwise, this function returns the
-  /// type of code emitted.
+  /// fails, it sets Error, otherwise, this function returns the type of code
+  /// emitted.
   virtual GCC::FileType OutputCode(const std::string &Bitcode,
-                                   sys::Path &OutFile);
+                                   sys::Path &OutFile, std::string &Error);
 };
 
 
@@ -212,14 +205,14 @@ public:
 
   /// compileProgram - Compile the specified program from bitcode to executable
   /// code.  This does not produce any output, it is only used when debugging
-  /// the code generator.  If the code generator fails, an exception should be
-  /// thrown, otherwise, this function will just return.
-  virtual void compileProgram(const std::string &Bitcode);
+  /// the code generator.  Returns false if the code generator fails.
+  virtual void compileProgram(const std::string &Bitcode, std::string *Error);
 
   virtual int ExecuteProgram(const std::string &Bitcode,
                              const std::vector<std::string> &Args,
                              const std::string &InputFile,
                              const std::string &OutputFile,
+                             std::string *Error,
                              const std::vector<std::string> &GCCArgs =
                                std::vector<std::string>(),
                              const std::vector<std::string> &SharedLibs =
@@ -227,9 +220,12 @@ public:
                              unsigned Timeout = 0,
                              unsigned MemoryLimit = 0);
 
+  /// OutputCode - Compile the specified program from bitcode to code
+  /// understood by the GCC driver (either C or asm).  If the code generator
+  /// fails, it sets Error, otherwise, this function returns the type of code
+  /// emitted.
   virtual GCC::FileType OutputCode(const std::string &Bitcode,
-                                   sys::Path &OutFile);
-  
+                                   sys::Path &OutFile, std::string &Error);
 };
 
 } // End llvm namespace
diff --git a/tools/bugpoint/bugpoint.cpp b/tools/bugpoint/bugpoint.cpp
index e14f31e67d85..ba5234bdc89d 100644
--- a/tools/bugpoint/bugpoint.cpp
+++ b/tools/bugpoint/bugpoint.cpp
@@ -149,23 +149,11 @@ int main(int argc, char **argv) {
   // avoid filling up the disk, we prevent it
   sys::Process::PreventCoreFiles();
 
-  try {
-    return D.run();
-  } catch (ToolExecutionError &TEE) {
-    errs() << "Tool execution error: " << TEE.what() << '\n';
-  } catch (const std::string& msg) {
-    errs() << argv[0] << ": " << msg << "\n";
-  } catch (const std::bad_alloc&) {
-    errs() << "Oh no, a bugpoint process ran out of memory!\n"
-              "To increase the allocation limits for bugpoint child\n"
-              "processes, use the -mlimit option.\n";
-  } catch (const std::exception &e) {
-    errs() << "Whoops, a std::exception leaked out of bugpoint: "
-           << e.what() << "\n"
-           << "This is a bug in bugpoint!\n";
-  } catch (...) {
-    errs() << "Whoops, an exception leaked out of bugpoint.  "
-           << "This is a bug in bugpoint!\n";
+  std::string Error;
+  bool Failure = D.run(Error);
+  if (!Error.empty()) {
+    errs() << Error;
+    return 1;
   }
-  return 1;
+  return Failure;
 }
diff --git a/tools/edis/CMakeLists.txt b/tools/edis/CMakeLists.txt
new file mode 100644
index 000000000000..f7a199d597b4
--- /dev/null
+++ b/tools/edis/CMakeLists.txt
@@ -0,0 +1,25 @@
+set(LLVM_NO_RTTI 1)
+
+add_custom_command(OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/EDInfo.inc
+  COMMAND ${LLVM_TABLEGEN_EXE} -o ${CMAKE_CURRENT_BINARY_DIR}/EDInfo.inc
+          -gen-enhanced-disassembly-header ${CMAKE_CURRENT_SOURCE_DIR}/EDInfo.td
+  DEPENDS tblgen
+  COMMENT "Building enhanced disassembly semantic information header (EDInfo.inc)")
+set_source_files_properties(${CMAKE_CURRENT_BINARY_DIR}/EDInfo.inc PROPERTIES GENERATED 1)
+
+include_directories(${CMAKE_CURRENT_BINARY_DIR})
+
+add_llvm_library(EnhancedDisassembly
+  EDDisassembler.cpp
+  EDInst.cpp
+  EDMain.cpp
+  EDOperand.cpp
+  EDToken.cpp
+  ../../include/llvm-c/EnhancedDisassembly.h
+  ${CMAKE_CURRENT_BINARY_DIR}/EDInfo.inc
+)
+
+set_target_properties(EnhancedDisassembly
+  PROPERTIES
+  LINKER_LANGUAGE CXX)
+
diff --git a/tools/edis/EDDisassembler.cpp b/tools/edis/EDDisassembler.cpp
index 8729b4998fda..00b5d8d33a00 100644
--- a/tools/edis/EDDisassembler.cpp
+++ b/tools/edis/EDDisassembler.cpp
@@ -13,8 +13,12 @@
 //
 //===----------------------------------------------------------------------===//
 
+#include "EDDisassembler.h"
+#include "EDInst.h"
+
 #include "llvm/ADT/OwningPtr.h"
 #include "llvm/ADT/SmallVector.h"
+#include "llvm/MC/EDInstInfo.h"
 #include "llvm/MC/MCAsmInfo.h"
 #include "llvm/MC/MCContext.h"
 #include "llvm/MC/MCDisassembler.h"
@@ -36,38 +40,34 @@
 #include "llvm/Target/TargetRegisterInfo.h"
 #include "llvm/Target/TargetSelect.h"
 
-#include "EDDisassembler.h"
-#include "EDInst.h"
-
-#include "../../lib/Target/X86/X86GenEDInfo.inc"
-
 using namespace llvm;
 
 bool EDDisassembler::sInitialized = false;
 EDDisassembler::DisassemblerMap_t EDDisassembler::sDisassemblers;
 
-struct InfoMap {
+struct TripleMap {
   Triple::ArchType Arch;
   const char *String;
-  const InstInfo *Info;
 };
 
-static struct InfoMap infomap[] = {
-  { Triple::x86,          "i386-unknown-unknown",   instInfoX86 },
-  { Triple::x86_64,       "x86_64-unknown-unknown", instInfoX86 },
-  { Triple::InvalidArch,  NULL,                     NULL        }
+static struct TripleMap triplemap[] = {
+  { Triple::x86,          "i386-unknown-unknown"    },
+  { Triple::x86_64,       "x86_64-unknown-unknown"  },
+  { Triple::arm,          "arm-unknown-unknown"     },
+  { Triple::thumb,        "thumb-unknown-unknown"   },
+  { Triple::InvalidArch,  NULL,                     }
 };
 
-/// infoFromArch - Returns the InfoMap corresponding to a given architecture,
+/// infoFromArch - Returns the TripleMap corresponding to a given architecture,
 ///   or NULL if there is an error
 ///
 /// @arg arch - The Triple::ArchType for the desired architecture
-static const InfoMap *infoFromArch(Triple::ArchType arch) {
+static const char *tripleFromArch(Triple::ArchType arch) {
   unsigned int infoIndex;
   
-  for (infoIndex = 0; infomap[infoIndex].String != NULL; ++infoIndex) {
-    if(arch == infomap[infoIndex].Arch)
-      return &infomap[infoIndex];
+  for (infoIndex = 0; triplemap[infoIndex].String != NULL; ++infoIndex) {
+    if (arch == triplemap[infoIndex].Arch)
+      return triplemap[infoIndex].String;
   }
   
   return NULL;
@@ -95,23 +95,25 @@ static int getLLVMSyntaxVariant(Triple::ArchType arch,
       return 1;
     else
       return -1;
+  case kEDAssemblySyntaxARMUAL:
+    if (arch == Triple::arm || arch == Triple::thumb)
+      return 0;
+    else
+      return -1;
   }
 }
 
-#define BRINGUP_TARGET(tgt)           \
-  LLVMInitialize##tgt##TargetInfo();  \
-  LLVMInitialize##tgt##Target();      \
-  LLVMInitialize##tgt##AsmPrinter();  \
-  LLVMInitialize##tgt##AsmParser();   \
-  LLVMInitialize##tgt##Disassembler();
-
 void EDDisassembler::initialize() {
   if (sInitialized)
     return;
   
   sInitialized = true;
   
-  BRINGUP_TARGET(X86)
+  InitializeAllTargetInfos();
+  InitializeAllTargets();
+  InitializeAllAsmPrinters();
+  InitializeAllAsmParsers();
+  InitializeAllDisassemblers();
 }
 
 #undef BRINGUP_TARGET
@@ -126,10 +128,9 @@ EDDisassembler *EDDisassembler::getDisassembler(Triple::ArchType arch,
   
   if (i != sDisassemblers.end()) {
     return i->second;
-  }
-  else {
+  } else {
     EDDisassembler* sdd = new EDDisassembler(key);
-    if(!sdd->valid()) {
+    if (!sdd->valid()) {
       delete sdd;
       return NULL;
     }
@@ -150,17 +151,18 @@ EDDisassembler *EDDisassembler::getDisassembler(StringRef str,
 }
 
 EDDisassembler::EDDisassembler(CPUKey &key) : 
-  Valid(false), ErrorString(), ErrorStream(ErrorString), Key(key) {
-  const InfoMap *infoMap = infoFromArch(key.Arch);
-  
-  if (!infoMap)
+  Valid(false), 
+  HasSemantics(false), 
+  ErrorStream(nulls()), 
+  Key(key) {
+  const char *triple = tripleFromArch(key.Arch);
+    
+  if (!triple)
     return;
   
-  const char *triple = infoMap->String;
-  
-  int syntaxVariant = getLLVMSyntaxVariant(key.Arch, key.Syntax);
+  LLVMSyntaxVariant = getLLVMSyntaxVariant(key.Arch, key.Syntax);
   
-  if (syntaxVariant < 0)
+  if (LLVMSyntaxVariant < 0)
     return;
   
   std::string tripleString(triple);
@@ -182,6 +184,8 @@ EDDisassembler::EDDisassembler(CPUKey &key) :
   
   if (!registerInfo)
     return;
+    
+  initMaps(*registerInfo);
   
   AsmInfo.reset(Tgt->createAsmInfo(tripleString));
   
@@ -192,10 +196,12 @@ EDDisassembler::EDDisassembler(CPUKey &key) :
   
   if (!Disassembler)
     return;
+    
+  InstInfos = Disassembler->getEDInfo();
   
   InstString.reset(new std::string);
   InstStream.reset(new raw_string_ostream(*InstString));
-  InstPrinter.reset(Tgt->createMCInstPrinter(syntaxVariant, *AsmInfo));
+  InstPrinter.reset(Tgt->createMCInstPrinter(LLVMSyntaxVariant, *AsmInfo));
   
   if (!InstPrinter)
     return;
@@ -203,8 +209,6 @@ EDDisassembler::EDDisassembler(CPUKey &key) :
   GenericAsmLexer.reset(new AsmLexer(*AsmInfo));
   SpecificAsmLexer.reset(Tgt->createAsmLexer(*AsmInfo));
   SpecificAsmLexer->InstallLexer(*GenericAsmLexer);
-                          
-  InstInfos = infoMap->Info;
   
   initMaps(*targetMachine->getRegisterInfo());
     
@@ -212,7 +216,7 @@ EDDisassembler::EDDisassembler(CPUKey &key) :
 }
 
 EDDisassembler::~EDDisassembler() {
-  if(!valid())
+  if (!valid())
     return;
 }
 
@@ -230,10 +234,10 @@ namespace {
     uint64_t getBase() const { return 0x0; }
     uint64_t getExtent() const { return (uint64_t)-1; }
     int readByte(uint64_t address, uint8_t *ptr) const {
-      if(!Callback)
+      if (!Callback)
         return -1;
       
-      if(Callback(ptr, address, Arg))
+      if (Callback(ptr, address, Arg))
         return -1;
       
       return 0;
@@ -256,9 +260,10 @@ EDInst *EDDisassembler::createInst(EDByteReaderCallback byteReader,
                                     ErrorStream)) {
     delete inst;
     return NULL;
-  }
-  else {
-    const InstInfo *thisInstInfo = &InstInfos[inst->getOpcode()];
+  } else {
+    const llvm::EDInstInfo *thisInstInfo;
+
+    thisInstInfo = &InstInfos[inst->getOpcode()];
     
     EDInst* sdInst = new EDInst(inst, byteSize, *this, thisInstInfo);
     return sdInst;
@@ -276,8 +281,11 @@ void EDDisassembler::initMaps(const TargetRegisterInfo &registerInfo) {
     RegRMap[registerName] = registerIndex;
   }
   
-  if (Key.Arch == Triple::x86 ||
-      Key.Arch == Triple::x86_64) {
+  switch (Key.Arch) {
+  default:
+    break;
+  case Triple::x86:
+  case Triple::x86_64:
     stackPointers.insert(registerIDWithName("SP"));
     stackPointers.insert(registerIDWithName("ESP"));
     stackPointers.insert(registerIDWithName("RSP"));
@@ -285,6 +293,13 @@ void EDDisassembler::initMaps(const TargetRegisterInfo &registerInfo) {
     programCounters.insert(registerIDWithName("IP"));
     programCounters.insert(registerIDWithName("EIP"));
     programCounters.insert(registerIDWithName("RIP"));
+    break;
+  case Triple::arm:
+  case Triple::thumb:
+    stackPointers.insert(registerIDWithName("SP"));
+    
+    programCounters.insert(registerIDWithName("PC"));
+    break;  
   }
 }
 
@@ -329,6 +344,16 @@ int EDDisassembler::parseInst(SmallVectorImpl<MCParsedAsmOperand*> &operands,
                               const std::string &str) {
   int ret = 0;
   
+  switch (Key.Arch) {
+  default:
+    return -1;
+  case Triple::x86:
+  case Triple::x86_64:
+  case Triple::arm:
+  case Triple::thumb:
+    break;
+  }
+  
   const char *cStr = str.c_str();
   MemoryBuffer *buf = MemoryBuffer::getMemBuffer(cStr, cStr + strlen(cStr));
   
@@ -343,14 +368,16 @@ int EDDisassembler::parseInst(SmallVectorImpl<MCParsedAsmOperand*> &operands,
   OwningPtr<TargetAsmParser> TargetParser(Tgt->createAsmParser(genericParser));
   
   AsmToken OpcodeToken = genericParser.Lex();
-  
-  if(OpcodeToken.is(AsmToken::Identifier)) {
+  AsmToken NextToken = genericParser.Lex();  // consume next token, because specificParser expects us to
+    
+  if (OpcodeToken.is(AsmToken::Identifier)) {
     instName = OpcodeToken.getString();
     instLoc = OpcodeToken.getLoc();
-    if (TargetParser->ParseInstruction(instName, instLoc, operands))
+    
+    if (NextToken.isNot(AsmToken::Eof) &&
+        TargetParser->ParseInstruction(instName, instLoc, operands))
       ret = -1;
-  }
-  else {
+  } else {
     ret = -1;
   }
   
diff --git a/tools/edis/EDDisassembler.h b/tools/edis/EDDisassembler.h
index 6be9152facb6..74a260e82532 100644
--- a/tools/edis/EDDisassembler.h
+++ b/tools/edis/EDDisassembler.h
@@ -48,6 +48,8 @@ template <typename T> class SmallVectorImpl;
 class SourceMgr;
 class Target;
 class TargetRegisterInfo;
+
+struct EDInstInfo;
 }
 
 /// EDDisassembler - Encapsulates a disassembler for a single architecture and
@@ -113,13 +115,13 @@ struct EDDisassembler {
   // Per-object members //
   ////////////////////////
   
-  /// True only if the object has been fully and successfully initialized
+  /// True only if the object has been successfully initialized
   bool Valid;
+  /// True if the disassembler can provide semantic information
+  bool HasSemantics;
   
-  /// The string that stores disassembler errors from the backend
-  std::string ErrorString;
-  /// The stream that wraps the ErrorString
-  llvm::raw_string_ostream ErrorStream;
+  /// The stream to write errors to
+  llvm::raw_ostream &ErrorStream;
 
   /// The architecture/syntax pair for the current architecture
   CPUKey Key;
@@ -143,7 +145,7 @@ struct EDDisassembler {
   llvm::sys::Mutex PrinterMutex;
   /// The array of instruction information provided by the TableGen backend for
   ///   the target architecture
-  const InstInfo *InstInfos;
+  const llvm::EDInstInfo *InstInfos;
   /// The target-specific lexer for use in tokenizing strings, in
   ///   target-independent and target-specific portions
   llvm::OwningPtr<llvm::AsmLexer> GenericAsmLexer;
@@ -180,6 +182,12 @@ struct EDDisassembler {
     return Valid;
   }
   
+  /// hasSemantics - reports whether the disassembler can provide operands and
+  ///   tokens.
+  bool hasSemantics() {
+    return HasSemantics;
+  }
+  
   ~EDDisassembler();
   
   /// createInst - creates and returns an instruction given a callback and
diff --git a/tools/edis/EDInfo.td b/tools/edis/EDInfo.td
new file mode 100644
index 000000000000..bd9ec079d80d
--- /dev/null
+++ b/tools/edis/EDInfo.td
@@ -0,0 +1 @@
+// Intentionally empty.
diff --git a/tools/edis/EDInst.cpp b/tools/edis/EDInst.cpp
index 9ed27002ad57..af3a54abbcf0 100644
--- a/tools/edis/EDInst.cpp
+++ b/tools/edis/EDInst.cpp
@@ -18,6 +18,7 @@
 #include "EDOperand.h"
 #include "EDToken.h"
 
+#include "llvm/MC/EDInstInfo.h"
 #include "llvm/MC/MCInst.h"
 
 using namespace llvm;
@@ -25,7 +26,7 @@ using namespace llvm;
 EDInst::EDInst(llvm::MCInst *inst,
                uint64_t byteSize, 
                EDDisassembler &disassembler,
-               const InstInfo *info) :
+               const llvm::EDInstInfo *info) :
   Disassembler(disassembler),
   Inst(inst),
   ThisInstInfo(info),
@@ -33,6 +34,7 @@ EDInst::EDInst(llvm::MCInst *inst,
   BranchTarget(-1),
   MoveSource(-1),
   MoveTarget(-1) {
+  OperandOrder = ThisInstInfo->operandOrders[Disassembler.llvmSyntaxVariant()];
 }
 
 EDInst::~EDInst() {
@@ -60,8 +62,6 @@ int EDInst::stringify() {
   
   if (Disassembler.printInst(String, *Inst))
     return StringifyResult.setResult(-1);
-
-  OperandOrder = ThisInstInfo->operandOrders[Disassembler.llvmSyntaxVariant()];
   
   return StringifyResult.setResult(0);
 }
@@ -81,21 +81,21 @@ unsigned EDInst::instID() {
 
 bool EDInst::isBranch() {
   if (ThisInstInfo)
-    return ThisInstInfo->instructionFlags & kInstructionFlagBranch;
+    return ThisInstInfo->instructionType == kInstructionTypeBranch;
   else
     return false;
 }
 
 bool EDInst::isMove() {
   if (ThisInstInfo)
-    return ThisInstInfo->instructionFlags & kInstructionFlagMove;
+    return ThisInstInfo->instructionType == kInstructionTypeMove;
   else
     return false;
 }
 
 int EDInst::parseOperands() {
   if (ParseResult.valid())
-    return ParseResult.result(); 
+    return ParseResult.result();
   
   if (!ThisInstInfo)
     return ParseResult.setResult(-1);
diff --git a/tools/edis/EDInst.h b/tools/edis/EDInst.h
index db03a7852eef..c8a747ff99f9 100644
--- a/tools/edis/EDInst.h
+++ b/tools/edis/EDInst.h
@@ -23,6 +23,10 @@
 #include <string>
 #include <vector>
 
+namespace llvm {
+  struct EDInstInfo;
+}
+
 /// CachedResult - Encapsulates the result of a function along with the validity
 ///   of that result, so that slow functions don't need to run twice
 struct CachedResult {
@@ -54,7 +58,7 @@ struct EDInst {
   /// The containing MCInst
   llvm::MCInst *Inst;
   /// The instruction information provided by TableGen for this instruction
-  const InstInfo *ThisInstInfo;
+  const llvm::EDInstInfo *ThisInstInfo;
   /// The number of bytes for the machine code representation of the instruction
   uint64_t ByteSize;
   
@@ -95,7 +99,7 @@ struct EDInst {
   EDInst(llvm::MCInst *inst,
          uint64_t byteSize,
          EDDisassembler &disassembler,
-         const InstInfo *instInfo);
+         const llvm::EDInstInfo *instInfo);
   ~EDInst();
   
   /// byteSize - returns the number of bytes consumed by the machine code
diff --git a/tools/edis/EDMain.cpp b/tools/edis/EDMain.cpp
index 3585657ca62d..b6ca32f2db86 100644
--- a/tools/edis/EDMain.cpp
+++ b/tools/edis/EDMain.cpp
@@ -29,8 +29,7 @@ int EDGetDisassembler(EDDisassemblerRef *disassembler,
   if (ret) {
     *disassembler = ret;
     return 0;
-  }
-  else {
+  } else {
     return -1;
   }
 }
@@ -39,7 +38,7 @@ int EDGetRegisterName(const char** regName,
                       EDDisassemblerRef disassembler,
                       unsigned regID) {
   const char* name = disassembler->nameWithRegisterID(regID);
-  if(!name)
+  if (!name)
     return -1;
   *regName = name;
   return 0;
@@ -63,10 +62,10 @@ unsigned int EDCreateInsts(EDInstRef *insts,
                            void *arg) {
   unsigned int index;
   
-  for (index = 0; index < count; index++) {
+  for (index = 0; index < count; ++index) {
     EDInst *inst = disassembler->createInst(byteReader, address, arg);
     
-    if(!inst)
+    if (!inst)
       return index;
     
     insts[index] = inst;
@@ -134,42 +133,42 @@ int EDOperandIndexForToken(EDTokenRef token) {
 }
 
 int EDTokenIsWhitespace(EDTokenRef token) {
-  if(token->type() == EDToken::kTokenWhitespace)
+  if (token->type() == EDToken::kTokenWhitespace)
     return 1;
   else
     return 0;
 }
 
 int EDTokenIsPunctuation(EDTokenRef token) {
-  if(token->type() == EDToken::kTokenPunctuation)
+  if (token->type() == EDToken::kTokenPunctuation)
     return 1;
   else
     return 0;
 }
 
 int EDTokenIsOpcode(EDTokenRef token) {
-  if(token->type() == EDToken::kTokenOpcode)
+  if (token->type() == EDToken::kTokenOpcode)
     return 1;
   else
     return 0;
 }
 
 int EDTokenIsLiteral(EDTokenRef token) {
-  if(token->type() == EDToken::kTokenLiteral)
+  if (token->type() == EDToken::kTokenLiteral)
     return 1;
   else
     return 0;
 }
 
 int EDTokenIsRegister(EDTokenRef token) {
-  if(token->type() == EDToken::kTokenRegister)
+  if (token->type() == EDToken::kTokenRegister)
     return 1;
   else
     return 0;
 }
 
 int EDTokenIsNegativeLiteral(EDTokenRef token) {
-  if(token->type() != EDToken::kTokenLiteral)
+  if (token->type() != EDToken::kTokenLiteral)
     return -1;
   
   return token->literalSign();
@@ -177,7 +176,7 @@ int EDTokenIsNegativeLiteral(EDTokenRef token) {
 
 int EDLiteralTokenAbsoluteValue(uint64_t *value,
                                 EDTokenRef token) {
-  if(token->type() != EDToken::kTokenLiteral)
+  if (token->type() != EDToken::kTokenLiteral)
     return -1;
   
   return token->literalAbsoluteValue(*value);
@@ -185,7 +184,7 @@ int EDLiteralTokenAbsoluteValue(uint64_t *value,
 
 int EDRegisterTokenValue(unsigned *registerID,
                          EDTokenRef token) {
-  if(token->type() != EDToken::kTokenRegister)
+  if (token->type() != EDToken::kTokenRegister)
     return -1;
   
   return token->registerID(*registerID);
@@ -215,7 +214,7 @@ int EDOperandIsMemory(EDOperandRef operand) {
 
 int EDRegisterOperandValue(unsigned *value, 
                            EDOperandRef operand) {
-  if(!operand->isRegister())
+  if (!operand->isRegister())
     return -1;
   *value = operand->regVal();
   return 0;
@@ -223,7 +222,7 @@ int EDRegisterOperandValue(unsigned *value,
 
 int EDImmediateOperandValue(uint64_t *value,
                            EDOperandRef operand) {
-  if(!operand->isImmediate())
+  if (!operand->isImmediate())
     return -1;
   *value = operand->immediateVal();
   return 0;
diff --git a/tools/edis/EDOperand.cpp b/tools/edis/EDOperand.cpp
index da6797e035d8..d63c1c6bfbf9 100644
--- a/tools/edis/EDOperand.cpp
+++ b/tools/edis/EDOperand.cpp
@@ -17,6 +17,7 @@
 #include "EDInst.h"
 #include "EDOperand.h"
 
+#include "llvm/MC/EDInstInfo.h"
 #include "llvm/MC/MCInst.h"
 
 using namespace llvm;
@@ -31,26 +32,77 @@ EDOperand::EDOperand(const EDDisassembler &disassembler,
   MCOpIndex(mcOpIndex) {
   unsigned int numMCOperands = 0;
     
-  if(Disassembler.Key.Arch == Triple::x86 ||
-     Disassembler.Key.Arch == Triple::x86_64) {
-    uint8_t operandFlags = inst.ThisInstInfo->operandFlags[opIndex];
+  if (Disassembler.Key.Arch == Triple::x86 ||
+      Disassembler.Key.Arch == Triple::x86_64) {
+    uint8_t operandType = inst.ThisInstInfo->operandTypes[opIndex];
     
-    if (operandFlags & kOperandFlagImmediate) {
+    switch (operandType) {
+    default:
+      break;
+    case kOperandTypeImmediate:
       numMCOperands = 1;
-    }
-    else if (operandFlags & kOperandFlagRegister) {
+      break;
+    case kOperandTypeRegister:
       numMCOperands = 1;
+      break;
+    case kOperandTypeX86Memory:
+      numMCOperands = 5;
+      break;
+    case kOperandTypeX86EffectiveAddress:
+      numMCOperands = 4;
+      break;
+    case kOperandTypeX86PCRelative:
+      numMCOperands = 1;
+      break;
     }
-    else if (operandFlags & kOperandFlagMemory) {
-      if (operandFlags & kOperandFlagPCRelative) {
-        numMCOperands = 1;
-      }
-      else {
-        numMCOperands = 5;
-      }
-    }
-    else if (operandFlags & kOperandFlagEffectiveAddress) {
+  }
+  else if (Disassembler.Key.Arch == Triple::arm ||
+           Disassembler.Key.Arch == Triple::thumb) {
+    uint8_t operandType = inst.ThisInstInfo->operandTypes[opIndex];
+    
+    switch (operandType) {
+    default:
+    case kOperandTypeARMRegisterList:
+      break;
+    case kOperandTypeImmediate:
+    case kOperandTypeRegister:
+    case kOperandTypeARMBranchTarget:
+    case kOperandTypeARMSoImm:
+    case kOperandTypeThumb2SoImm:
+    case kOperandTypeARMSoImm2Part:
+    case kOperandTypeARMPredicate:
+    case kOperandTypeThumbITMask:
+    case kOperandTypeThumb2AddrModeImm8Offset:
+    case kOperandTypeARMTBAddrMode:
+    case kOperandTypeThumb2AddrModeImm8s4Offset:
+      numMCOperands = 1;
+      break;
+    case kOperandTypeThumb2SoReg:
+    case kOperandTypeARMAddrMode2Offset:
+    case kOperandTypeARMAddrMode3Offset:
+    case kOperandTypeARMAddrMode4:
+    case kOperandTypeARMAddrMode5:
+    case kOperandTypeARMAddrModePC:
+    case kOperandTypeThumb2AddrModeImm8:
+    case kOperandTypeThumb2AddrModeImm12:
+    case kOperandTypeThumb2AddrModeImm8s4:
+    case kOperandTypeThumbAddrModeRR:
+    case kOperandTypeThumbAddrModeSP:
+      numMCOperands = 2;
+      break;
+    case kOperandTypeARMSoReg:
+    case kOperandTypeARMAddrMode2:
+    case kOperandTypeARMAddrMode3:
+    case kOperandTypeThumb2AddrModeSoReg:
+    case kOperandTypeThumbAddrModeS1:
+    case kOperandTypeThumbAddrModeS2:
+    case kOperandTypeThumbAddrModeS4:
+    case kOperandTypeARMAddrMode6Offset:
+      numMCOperands = 3;
+      break;
+    case kOperandTypeARMAddrMode6:
       numMCOperands = 4;
+      break;
     }
   }
     
@@ -63,70 +115,103 @@ EDOperand::~EDOperand() {
 int EDOperand::evaluate(uint64_t &result,
                         EDRegisterReaderCallback callback,
                         void *arg) {
-  if (Disassembler.Key.Arch == Triple::x86 ||
-      Disassembler.Key.Arch == Triple::x86_64) {
-    uint8_t operandFlags = Inst.ThisInstInfo->operandFlags[OpIndex];
-    
-    if (operandFlags & kOperandFlagImmediate) {
+  uint8_t operandType = Inst.ThisInstInfo->operandTypes[OpIndex];
+  
+  switch (Disassembler.Key.Arch) {
+  default:
+    return -1;  
+  case Triple::x86:
+  case Triple::x86_64:    
+    switch (operandType) {
+    default:
+      return -1;
+    case kOperandTypeImmediate:
       result = Inst.Inst->getOperand(MCOpIndex).getImm();
       return 0;
-    }
-    if (operandFlags & kOperandFlagRegister) {
+    case kOperandTypeRegister:
+    {
       unsigned reg = Inst.Inst->getOperand(MCOpIndex).getReg();
       return callback(&result, reg, arg);
     }
-    if (operandFlags & kOperandFlagMemory ||
-        operandFlags & kOperandFlagEffectiveAddress){
-      if(operandFlags & kOperandFlagPCRelative) {
-        int64_t displacement = Inst.Inst->getOperand(MCOpIndex).getImm();
+    case kOperandTypeX86PCRelative:
+    {
+      int64_t displacement = Inst.Inst->getOperand(MCOpIndex).getImm();
         
-        uint64_t ripVal;
+      uint64_t ripVal;
         
-        // TODO fix how we do this
+      // TODO fix how we do this
         
-        if (callback(&ripVal, Disassembler.registerIDWithName("RIP"), arg))
-          return -1;
+      if (callback(&ripVal, Disassembler.registerIDWithName("RIP"), arg))
+        return -1;
         
-        result = ripVal + displacement;
-        return 0;
-      }
-      else {
-        unsigned baseReg = Inst.Inst->getOperand(MCOpIndex).getReg();
-        uint64_t scaleAmount = Inst.Inst->getOperand(MCOpIndex+1).getImm();
-        unsigned indexReg = Inst.Inst->getOperand(MCOpIndex+2).getReg();
-        int64_t displacement = Inst.Inst->getOperand(MCOpIndex+3).getImm();
-        //unsigned segmentReg = Inst.Inst->getOperand(MCOpIndex+4).getReg();
+      result = ripVal + displacement;
+      return 0;
+    }
+    case kOperandTypeX86Memory:
+    case kOperandTypeX86EffectiveAddress:  
+    {
+      unsigned baseReg = Inst.Inst->getOperand(MCOpIndex).getReg();
+      uint64_t scaleAmount = Inst.Inst->getOperand(MCOpIndex+1).getImm();
+      unsigned indexReg = Inst.Inst->getOperand(MCOpIndex+2).getReg();
+      int64_t displacement = Inst.Inst->getOperand(MCOpIndex+3).getImm();
+      //unsigned segmentReg = Inst.Inst->getOperand(MCOpIndex+4).getReg();
       
-        uint64_t addr = 0;
+      uint64_t addr = 0;
         
-        if(baseReg) {
-          uint64_t baseVal;
-          if (callback(&baseVal, baseReg, arg))
-            return -1;
-          addr += baseVal;
-        }
-        
-        if(indexReg) {
-          uint64_t indexVal;
-          if (callback(&indexVal, indexReg, arg))
-            return -1;
-          addr += (scaleAmount * indexVal);
-        }
-        
-        addr += displacement;
+      if (baseReg) {
+        uint64_t baseVal;
+        if (callback(&baseVal, baseReg, arg))
+          return -1;
+        addr += baseVal;
+      }
         
-        result = addr;
-        return 0;
+      if (indexReg) {
+        uint64_t indexVal;
+        if (callback(&indexVal, indexReg, arg))
+          return -1;
+        addr += (scaleAmount * indexVal);
       }
+       
+      addr += displacement;
+       
+      result = addr;
+      return 0;
+    }
+    }
+    break;
+  case Triple::arm:
+  case Triple::thumb:
+    switch (operandType) {
+    default:
+      return -1;
+    case kOperandTypeImmediate:
+      result = Inst.Inst->getOperand(MCOpIndex).getImm();
+      return 0;
+    case kOperandTypeRegister:
+    {
+      unsigned reg = Inst.Inst->getOperand(MCOpIndex).getReg();
+      return callback(&result, reg, arg);
+    }
+    case kOperandTypeARMBranchTarget:
+    {
+      int64_t displacement = Inst.Inst->getOperand(MCOpIndex).getImm();
+      
+      uint64_t pcVal;
+      
+      if (callback(&pcVal, Disassembler.registerIDWithName("PC"), arg))
+        return -1;
+      
+      result = pcVal + displacement;
+      return 0;
+    }
     }
-    return -1;
   }
   
   return -1;
 }
 
 int EDOperand::isRegister() {
-  return(Inst.ThisInstInfo->operandFlags[OpIndex] & kOperandFlagRegister);
+  return(Inst.ThisInstInfo->operandFlags[OpIndex] == kOperandTypeRegister);
 }
 
 unsigned EDOperand::regVal() {
@@ -134,7 +219,7 @@ unsigned EDOperand::regVal() {
 }
 
 int EDOperand::isImmediate() {
-  return(Inst.ThisInstInfo->operandFlags[OpIndex] & kOperandFlagImmediate);
+  return(Inst.ThisInstInfo->operandFlags[OpIndex] == kOperandTypeImmediate);
 }
 
 uint64_t EDOperand::immediateVal() {
@@ -142,7 +227,38 @@ uint64_t EDOperand::immediateVal() {
 }
 
 int EDOperand::isMemory() {
-  return(Inst.ThisInstInfo->operandFlags[OpIndex] & kOperandFlagMemory);
+  uint8_t operandType = Inst.ThisInstInfo->operandTypes[OpIndex];
+    
+  switch (operandType) {
+  default:
+    return 0;
+  case kOperandTypeX86Memory:
+  case kOperandTypeX86PCRelative:
+  case kOperandTypeX86EffectiveAddress:
+  case kOperandTypeARMSoReg:
+  case kOperandTypeARMSoImm:
+  case kOperandTypeARMAddrMode2:
+  case kOperandTypeARMAddrMode2Offset:
+  case kOperandTypeARMAddrMode3:
+  case kOperandTypeARMAddrMode3Offset:
+  case kOperandTypeARMAddrMode4:
+  case kOperandTypeARMAddrMode5:
+  case kOperandTypeARMAddrMode6:
+  case kOperandTypeARMAddrModePC:
+  case kOperandTypeARMBranchTarget:
+  case kOperandTypeThumbAddrModeS1:
+  case kOperandTypeThumbAddrModeS2:
+  case kOperandTypeThumbAddrModeS4:
+  case kOperandTypeThumbAddrModeRR:
+  case kOperandTypeThumbAddrModeSP:
+  case kOperandTypeThumb2SoImm:
+  case kOperandTypeThumb2AddrModeImm8:
+  case kOperandTypeThumb2AddrModeImm8Offset:
+  case kOperandTypeThumb2AddrModeImm12:
+  case kOperandTypeThumb2AddrModeSoReg:
+  case kOperandTypeThumb2AddrModeImm8s4:
+    return 1;
+  }
 }
 
 #ifdef __BLOCKS__
diff --git a/tools/edis/EDToken.cpp b/tools/edis/EDToken.cpp
index cd79152e3594..3bcb0a14b8c4 100644
--- a/tools/edis/EDToken.cpp
+++ b/tools/edis/EDToken.cpp
@@ -68,20 +68,20 @@ int EDToken::operandID() const {
 }
 
 int EDToken::literalSign() const {
-  if(Type != kTokenLiteral)
+  if (Type != kTokenLiteral)
     return -1;
   return (LiteralSign ? 1 : 0);
 }
 
 int EDToken::literalAbsoluteValue(uint64_t &value) const {
-  if(Type != kTokenLiteral)
+  if (Type != kTokenLiteral)
     return -1;
   value = LiteralAbsoluteValue;
   return 0;
 }
 
 int EDToken::registerID(unsigned &registerID) const {
-  if(Type != kTokenRegister)
+  if (Type != kTokenRegister)
     return -1;
   registerID = RegisterID;
   return 0;
@@ -94,7 +94,7 @@ int EDToken::tokenize(std::vector<EDToken*> &tokens,
   SmallVector<MCParsedAsmOperand*, 5> parsedOperands;
   SmallVector<AsmToken, 10> asmTokens;
   
-  if(disassembler.parseInst(parsedOperands, asmTokens, str))
+  if (disassembler.parseInst(parsedOperands, asmTokens, str))
     return -1;
   
   SmallVectorImpl<MCParsedAsmOperand*>::iterator operandIterator;
@@ -115,7 +115,7 @@ int EDToken::tokenize(std::vector<EDToken*> &tokens,
     
     const char *tokenPointer = tokenLoc.getPointer();
     
-    if(tokenPointer > wsPointer) {
+    if (tokenPointer > wsPointer) {
       unsigned long wsLength = tokenPointer - wsPointer;
       
       EDToken *whitespaceToken = new EDToken(StringRef(wsPointer, wsLength),
@@ -164,7 +164,7 @@ int EDToken::tokenize(std::vector<EDToken*> &tokens,
         
       int64_t intVal = tokenIterator->getIntVal();
       
-      if(intVal < 0)  
+      if (intVal < 0)  
         token->makeLiteral(true, -intVal);
       else
         token->makeLiteral(false, intVal);
@@ -182,14 +182,14 @@ int EDToken::tokenize(std::vector<EDToken*> &tokens,
     }
     }
     
-    if(operandIterator != parsedOperands.end() &&
+    if (operandIterator != parsedOperands.end() &&
        tokenLoc.getPointer() >= 
        (*operandIterator)->getStartLoc().getPointer()) {
       /// operandIndex == 0 means the operand is the instruction (which the
       /// AsmParser treats as an operand but edis does not).  We therefore skip
       /// operandIndex == 0 and subtract 1 from all other operand indices.
       
-      if(operandIndex > 0)
+      if (operandIndex > 0)
         token->setOperandID(operandOrder[operandIndex - 1]);
     }
     
@@ -200,7 +200,7 @@ int EDToken::tokenize(std::vector<EDToken*> &tokens,
 }
 
 int EDToken::getString(const char*& buf) {
-  if(PermStr.length() == 0) {
+  if (PermStr.length() == 0) {
     PermStr = Str.str();
   }
   buf = PermStr.c_str();
diff --git a/tools/edis/EnhancedDisassembly.exports b/tools/edis/EnhancedDisassembly.exports
index d3f87435cc3d..7050f7f32948 100644
--- a/tools/edis/EnhancedDisassembly.exports
+++ b/tools/edis/EnhancedDisassembly.exports
@@ -1,36 +1,36 @@
-_EDGetDisassembler
-_EDGetRegisterName
-_EDRegisterIsStackPointer
-_EDRegisterIsProgramCounter
-_EDCreateInsts
-_EDReleaseInst
-_EDInstByteSize
-_EDGetInstString
-_EDInstIsBranch
-_EDInstIsMove
-_EDBranchTargetID
-_EDMoveSourceID
-_EDMoveTargetID
-_EDNumTokens
-_EDGetToken
-_EDGetTokenString
-_EDOperandIndexForToken
-_EDTokenIsWhitespace
-_EDTokenIsPunctuation
-_EDTokenIsOpcode
-_EDTokenIsLiteral
-_EDTokenIsRegister
-_EDTokenIsNegativeLiteral
-_EDLiteralTokenAbsoluteValue
-_EDRegisterTokenValue
-_EDNumOperands
-_EDGetOperand
-_EDOperandIsRegister
-_EDOperandIsImmediate
-_EDOperandIsMemory
-_EDRegisterOperandValue
-_EDImmediateOperandValue
-_EDEvaluateOperand
-_EDBlockCreateInsts
-_EDBlockEvaluateOperand
-_EDBlockVisitTokens
+EDGetDisassembler
+EDGetRegisterName
+EDRegisterIsStackPointer
+EDRegisterIsProgramCounter
+EDCreateInsts
+EDReleaseInst
+EDInstByteSize
+EDGetInstString
+EDInstIsBranch
+EDInstIsMove
+EDBranchTargetID
+EDMoveSourceID
+EDMoveTargetID
+EDNumTokens
+EDGetToken
+EDGetTokenString
+EDOperandIndexForToken
+EDTokenIsWhitespace
+EDTokenIsPunctuation
+EDTokenIsOpcode
+EDTokenIsLiteral
+EDTokenIsRegister
+EDTokenIsNegativeLiteral
+EDLiteralTokenAbsoluteValue
+EDRegisterTokenValue
+EDNumOperands
+EDGetOperand
+EDOperandIsRegister
+EDOperandIsImmediate
+EDOperandIsMemory
+EDRegisterOperandValue
+EDImmediateOperandValue
+EDEvaluateOperand
+EDBlockCreateInsts
+EDBlockEvaluateOperand
+EDBlockVisitTokens
diff --git a/tools/edis/Makefile b/tools/edis/Makefile
index 7f7b097e2ce3..9151f627bb0c 100644
--- a/tools/edis/Makefile
+++ b/tools/edis/Makefile
@@ -12,13 +12,19 @@ LIBRARYNAME = EnhancedDisassembly
 
 BUILT_SOURCES = EDInfo.inc
 
+EXPORTED_SYMBOL_FILE = $(PROJ_SRC_DIR)/EnhancedDisassembly.exports
+
 # Include this here so we can get the configuration of the targets
 # that have been configured for construction. We have to do this 
 # early so we can set up LINK_COMPONENTS before including Makefile.rules
 include $(LEVEL)/Makefile.config
 
-LINK_LIBS_IN_SHARED = 1
-SHARED_LIBRARY = 1
+ifeq ($(ENABLE_PIC),1)
+    ifneq ($(TARGET_OS), $(filter $(TARGET_OS), Cygwin MingW))
+	LINK_LIBS_IN_SHARED = 1
+	SHARED_LIBRARY = 1
+    endif
+endif
 
 LINK_COMPONENTS := $(TARGETS_TO_BUILD) x86asmprinter x86disassembler
 
@@ -28,11 +34,10 @@ ifeq ($(HOST_OS),Darwin)
     # extra options to override libtool defaults 
     LLVMLibsOptions    := $(LLVMLibsOptions)  \
                          -avoid-version \
-                         -Wl,-exported_symbols_list -Wl,$(PROJ_SRC_DIR)/EnhancedDisassembly.exports \
                          -Wl,-dead_strip
 
     ifdef EDIS_VERSION
-        LLVMLibsOptions    := -Wl,-current_version -Wl,$(EDIS_VERSION) \
+        LLVMLibsOptions    := $(LLVMLibsOptions) -Wl,-current_version -Wl,$(EDIS_VERSION) \
                               -Wl,-compatibility_version -Wl,1
     endif
 
diff --git a/tools/gold/Makefile b/tools/gold/Makefile
index 7bac4ec62dea..77eacb7ee5bf 100644
--- a/tools/gold/Makefile
+++ b/tools/gold/Makefile
@@ -10,6 +10,8 @@
 LEVEL = ../..
 LIBRARYNAME = libLLVMgold
 
+EXPORTED_SYMBOL_FILE = $(PROJ_SRC_DIR)/gold.exports
+
 # Include this here so we can get the configuration of the targets
 # that have been configured for construction. We have to do this 
 # early so we can set up LINK_COMPONENTS before including Makefile.rules
diff --git a/tools/gold/gold-plugin.cpp b/tools/gold/gold-plugin.cpp
index 0e1db1bf2eaa..16e645f8daaa 100644
--- a/tools/gold/gold-plugin.cpp
+++ b/tools/gold/gold-plugin.cpp
@@ -58,16 +58,16 @@ namespace {
 }
 
 namespace options {
-  bool generate_api_file = false;
-  const char *as_path = NULL;
+  static bool generate_api_file = false;
+  static const char *as_path = NULL;
   // Additional options to pass into the code generator.
   // Note: This array will contain all plugin options which are not claimed 
   // as plugin exclusive to pass to the code generator.
   // For example, "generate-api-file" and "as"options are for the plugin 
   // use only and will not be passed.
-  std::vector<std::string> extra;
+  static std::vector<std::string> extra;
 
-  void process_plugin_option(const char* opt)
+  static void process_plugin_option(const char* opt)
   {
     if (opt == NULL)
       return;
@@ -88,10 +88,10 @@ namespace options {
   }
 }
 
-ld_plugin_status claim_file_hook(const ld_plugin_input_file *file,
-                                 int *claimed);
-ld_plugin_status all_symbols_read_hook(void);
-ld_plugin_status cleanup_hook(void);
+static ld_plugin_status claim_file_hook(const ld_plugin_input_file *file,
+                                        int *claimed);
+static ld_plugin_status all_symbols_read_hook(void);
+static ld_plugin_status cleanup_hook(void);
 
 extern "C" ld_plugin_status onload(ld_plugin_tv *tv);
 ld_plugin_status onload(ld_plugin_tv *tv) {
@@ -192,8 +192,8 @@ ld_plugin_status onload(ld_plugin_tv *tv) {
 /// claim_file_hook - called by gold to see whether this file is one that
 /// our plugin can handle. We'll try to open it and register all the symbols
 /// with add_symbol if possible.
-ld_plugin_status claim_file_hook(const ld_plugin_input_file *file,
-                                 int *claimed) {
+static ld_plugin_status claim_file_hook(const ld_plugin_input_file *file,
+                                        int *claimed) {
   void *buf = NULL;
   if (file->offset) {
     // Gold has found what might be IR part-way inside of a file, such as
@@ -316,7 +316,7 @@ ld_plugin_status claim_file_hook(const ld_plugin_input_file *file,
 /// At this point, we use get_symbols to see if any of our definitions have
 /// been overridden by a native object file. Then, perform optimization and
 /// codegen.
-ld_plugin_status all_symbols_read_hook(void) {
+static ld_plugin_status all_symbols_read_hook(void) {
   lto_code_gen_t cg = lto_codegen_create();
 
   for (std::list<claimed_file>::iterator I = Modules.begin(),
@@ -340,9 +340,7 @@ ld_plugin_status all_symbols_read_hook(void) {
          E = Modules.end(); I != E; ++I) {
       (*get_symbols)(I->handle, I->syms.size(), &I->syms[0]);
       for (unsigned i = 0, e = I->syms.size(); i != e; i++) {
-        if (I->syms[i].resolution == LDPR_PREVAILING_DEF ||
-            (I->syms[i].def == LDPK_COMMON &&
-             I->syms[i].resolution == LDPR_RESOLVED_IR)) {
+        if (I->syms[i].resolution == LDPR_PREVAILING_DEF) {
           lto_codegen_add_must_preserve_symbol(cg, I->syms[i].name);
           anySymbolsPreserved = true;
 
@@ -387,7 +385,7 @@ ld_plugin_status all_symbols_read_hook(void) {
     (*message)(LDPL_ERROR, "%s", ErrMsg.c_str());
     return LDPS_ERR;
   }
-  raw_fd_ostream *objFile = 
+  raw_fd_ostream *objFile =
     new raw_fd_ostream(uniqueObjPath.c_str(), ErrMsg,
                        raw_fd_ostream::F_Binary);
   if (!ErrMsg.empty()) {
@@ -412,7 +410,7 @@ ld_plugin_status all_symbols_read_hook(void) {
   return LDPS_OK;
 }
 
-ld_plugin_status cleanup_hook(void) {
+static ld_plugin_status cleanup_hook(void) {
   std::string ErrMsg;
 
   for (int i = 0, e = Cleanup.size(); i != e; ++i)
diff --git a/tools/gold/gold.exports b/tools/gold/gold.exports
new file mode 100644
index 000000000000..277a33a1ec34
--- /dev/null
+++ b/tools/gold/gold.exports
@@ -0,0 +1 @@
+onload
diff --git a/tools/llc/llc.cpp b/tools/llc/llc.cpp
index dccfb26e7455..378cc63201cd 100644
--- a/tools/llc/llc.cpp
+++ b/tools/llc/llc.cpp
@@ -49,9 +49,6 @@ InputFilename(cl::Positional, cl::desc("<input bitcode>"), cl::init("-"));
 static cl::opt<std::string>
 OutputFilename("o", cl::desc("Output filename"), cl::value_desc("filename"));
 
-static cl::opt<bool>
-Force("f", cl::desc("Enable binary output on terminals"));
-
 // Determine optimization level.
 static cl::opt<char>
 OptLevel("O",
diff --git a/tools/lli/lli.cpp b/tools/lli/lli.cpp
index 81c17cd8fc16..4e3e07ffd22c 100644
--- a/tools/lli/lli.cpp
+++ b/tools/lli/lli.cpp
@@ -192,7 +192,7 @@ int main(int argc, char **argv, char * const *envp) {
   } else {
     // Otherwise, if there is a .bc suffix on the executable strip it off, it
     // might confuse the program.
-    if (InputFile.rfind(".bc") == InputFile.length() - 3)
+    if (StringRef(InputFile).endswith(".bc"))
       InputFile.erase(InputFile.length() - 3);
   }
 
diff --git a/tools/llvm-ld/llvm-ld.cpp b/tools/llvm-ld/llvm-ld.cpp
index 29b74b8a72bf..ce52b59f0d26 100644
--- a/tools/llvm-ld/llvm-ld.cpp
+++ b/tools/llvm-ld/llvm-ld.cpp
@@ -399,7 +399,7 @@ static int GenerateNative(const std::string &OutputFilename,
 
   // Run the compiler to assembly and link together the program.
   int R = sys::Program::ExecuteAndWait(
-    gcc, &Args[0], (const char**)clean_env, 0, 0, 0, &ErrMsg);
+    gcc, &Args[0], const_cast<const char **>(clean_env), 0, 0, 0, &ErrMsg);
   delete [] clean_env;
   return R;
 }
diff --git a/tools/llvm-mc/CMakeLists.txt b/tools/llvm-mc/CMakeLists.txt
index 49c29324d4b3..8b61a4e8db15 100644
--- a/tools/llvm-mc/CMakeLists.txt
+++ b/tools/llvm-mc/CMakeLists.txt
@@ -1,3 +1,4 @@
+set( LLVM_USED_LIBS EnhancedDisassembly)
 set(LLVM_LINK_COMPONENTS ${LLVM_TARGETS_TO_BUILD} support MC MCParser)
 
 add_llvm_tool(llvm-mc
diff --git a/tools/llvm-mc/Disassembler.cpp b/tools/llvm-mc/Disassembler.cpp
index 9fe07909284c..9234a14bc1bf 100644
--- a/tools/llvm-mc/Disassembler.cpp
+++ b/tools/llvm-mc/Disassembler.cpp
@@ -15,6 +15,7 @@
 #include "Disassembler.h"
 
 #include "llvm/ADT/OwningPtr.h"
+#include "llvm/ADT/Triple.h"
 #include "llvm/MC/MCAsmInfo.h"
 #include "llvm/MC/MCDisassembler.h"
 #include "llvm/MC/MCInst.h"
@@ -24,6 +25,9 @@
 #include "llvm/Support/MemoryObject.h"
 #include "llvm/Support/raw_ostream.h"
 #include "llvm/Support/SourceMgr.h"
+
+#include "llvm-c/EnhancedDisassembly.h"
+
 using namespace llvm;
 
 typedef std::vector<std::pair<unsigned char, const char*> > ByteArrayTy;
@@ -64,8 +68,7 @@ static bool PrintInsts(const MCDisassembler &DisAsm,
                                /*REMOVE*/ nulls())) {
       Printer.printInst(&Inst, outs());
       outs() << "\n";
-    }
-    else {
+    } else {
       SM.PrintMessage(SMLoc::getFromPointer(Bytes[Index].second),
                       "invalid instruction encoding", "warning");
       if (Size == 0)
@@ -76,37 +79,9 @@ static bool PrintInsts(const MCDisassembler &DisAsm,
   return false;
 }
 
-int Disassembler::disassemble(const Target &T, const std::string &Triple,
-                              MemoryBuffer &Buffer) {
-  // Set up disassembler.
-  OwningPtr<const MCAsmInfo> AsmInfo(T.createAsmInfo(Triple));
-  
-  if (!AsmInfo) {
-    errs() << "error: no assembly info for target " << Triple << "\n";
-    return -1;
-  }
-  
-  OwningPtr<const MCDisassembler> DisAsm(T.createMCDisassembler());
-  if (!DisAsm) {
-    errs() << "error: no disassembler for target " << Triple << "\n";
-    return -1;
-  }
-  
-  OwningPtr<MCInstPrinter> IP(T.createMCInstPrinter(0, *AsmInfo));
-  if (!IP) {
-    errs() << "error: no instruction printer for target " << Triple << '\n';
-    return -1;
-  }
-  
-  bool ErrorOccurred = false;
-  
-  SourceMgr SM;
-  SM.AddNewSourceBuffer(&Buffer, SMLoc());
-  
-  // Convert the input to a vector for disassembly.
-  ByteArrayTy ByteArray;
-  
-  StringRef Str = Buffer.getBuffer();
+static bool ByteArrayFromString(ByteArrayTy &ByteArray, 
+                                StringRef &Str, 
+                                SourceMgr &SM) {
   while (!Str.empty()) {
     // Strip horizontal whitespace.
     if (size_t Pos = Str.find_first_not_of(" \t\r")) {
@@ -119,9 +94,9 @@ int Disassembler::disassemble(const Target &T, const std::string &Triple,
     if (Str[0] == '\n' || Str[0] == '#') {
       // Strip to the end of line if we already processed any bytes on this
       // line.  This strips the comment and/or the \n.
-      if (Str[0] == '\n')
+      if (Str[0] == '\n') {
         Str = Str.substr(1);
-      else {
+      } else {
         Str = Str.substr(Str.find_first_of('\n'));
         if (!Str.empty())
           Str = Str.substr(1);
@@ -138,8 +113,7 @@ int Disassembler::disassemble(const Target &T, const std::string &Triple,
     if (Value.getAsInteger(0, ByteVal) || ByteVal > 255) {
       // If we have an error, print it and skip to the end of line.
       SM.PrintMessage(SMLoc::getFromPointer(Value.data()),
-                                 "invalid input token", "error");
-      ErrorOccurred = true;
+                      "invalid input token", "error");
       Str = Str.substr(Str.find('\n'));
       ByteArray.clear();
       continue;
@@ -149,8 +123,231 @@ int Disassembler::disassemble(const Target &T, const std::string &Triple,
     Str = Str.substr(Next);
   }
   
+  return false;
+}
+
+int Disassembler::disassemble(const Target &T, const std::string &Triple,
+                              MemoryBuffer &Buffer) {
+  // Set up disassembler.
+  OwningPtr<const MCAsmInfo> AsmInfo(T.createAsmInfo(Triple));
+  
+  if (!AsmInfo) {
+    errs() << "error: no assembly info for target " << Triple << "\n";
+    return -1;
+  }
+  
+  OwningPtr<const MCDisassembler> DisAsm(T.createMCDisassembler());
+  if (!DisAsm) {
+    errs() << "error: no disassembler for target " << Triple << "\n";
+    return -1;
+  }
+  
+  int AsmPrinterVariant = AsmInfo->getAssemblerDialect();
+  OwningPtr<MCInstPrinter> IP(T.createMCInstPrinter(AsmPrinterVariant,
+                                                    *AsmInfo));
+  if (!IP) {
+    errs() << "error: no instruction printer for target " << Triple << '\n';
+    return -1;
+  }
+  
+  bool ErrorOccurred = false;
+  
+  SourceMgr SM;
+  SM.AddNewSourceBuffer(&Buffer, SMLoc());
+  
+  // Convert the input to a vector for disassembly.
+  ByteArrayTy ByteArray;
+  StringRef Str = Buffer.getBuffer();
+  
+  ErrorOccurred |= ByteArrayFromString(ByteArray, Str, SM);
+  
   if (!ByteArray.empty())
     ErrorOccurred |= PrintInsts(*DisAsm, *IP, ByteArray, SM);
     
   return ErrorOccurred;
 }
+
+static int byteArrayReader(uint8_t *B, uint64_t A, void *Arg) {
+  ByteArrayTy &ByteArray = *((ByteArrayTy*)Arg);
+  
+  if (A >= ByteArray.size())
+    return -1;
+  
+  *B = ByteArray[A].first;
+  
+  return 0;
+}
+
+static int verboseEvaluator(uint64_t *V, unsigned R, void *Arg) {
+  EDDisassemblerRef &disassembler = *((EDDisassemblerRef*)Arg);
+  
+  const char *regName;
+  
+  if (!EDGetRegisterName(&regName,
+                        disassembler,
+                        R))
+    outs() << "[" << regName << "/" << R << "]";
+  if (EDRegisterIsStackPointer(disassembler, R))
+    outs() << "(sp)";
+  if (EDRegisterIsProgramCounter(disassembler, R))
+    outs() << "(pc)";
+  
+  *V = 0;
+  
+  return 0;
+}
+
+int Disassembler::disassembleEnhanced(const std::string &TS, 
+                                      MemoryBuffer &Buffer) {
+  ByteArrayTy ByteArray;
+  StringRef Str = Buffer.getBuffer();
+  SourceMgr SM;
+  
+  SM.AddNewSourceBuffer(&Buffer, SMLoc());
+  
+  if (ByteArrayFromString(ByteArray, Str, SM)) {
+    return -1;
+  }
+  
+  EDDisassemblerRef disassembler;
+  
+  Triple T(TS);
+  EDAssemblySyntax_t AS;
+  
+  switch (T.getArch()) {
+  default:
+    errs() << "error: no default assembly syntax for " << TS.c_str() << "\n";
+    return -1;
+  case Triple::arm:
+  case Triple::thumb:
+    AS = kEDAssemblySyntaxARMUAL;
+    break;
+  case Triple::x86:
+  case Triple::x86_64:
+    AS = kEDAssemblySyntaxX86ATT;
+    break;
+  }
+  
+  if (EDGetDisassembler(&disassembler, 
+                        TS.c_str(),
+                        AS)) {
+    errs() << "error: couldn't get disassembler for " << TS.c_str() << "\n";
+    return -1;
+  }
+  
+  EDInstRef inst;
+  
+  if (EDCreateInsts(&inst, 1, disassembler, byteArrayReader, 0,&ByteArray) 
+      != 1) {
+    errs() << "error: Didn't get an instruction\n";
+    return -1;
+  }
+  
+  int numTokens = EDNumTokens(inst);
+  
+  if (numTokens < 0) {
+    errs() << "error: Couldn't count the instruction's tokens\n";
+    return -1;
+  }
+  
+  int tokenIndex;
+  
+  for (tokenIndex = 0; tokenIndex < numTokens; ++tokenIndex) {
+    EDTokenRef token;
+    
+    if (EDGetToken(&token, inst, tokenIndex)) {
+      errs() << "error: Couldn't get token\n";
+      return -1;
+    }
+    
+    const char *buf;
+    
+    if (EDGetTokenString(&buf, token)) {
+      errs() << "error: Couldn't get string for token\n";
+      return -1;
+    }
+    
+    outs() << "[";
+    
+    int operandIndex = EDOperandIndexForToken(token);
+    
+    if (operandIndex >= 0)
+      outs() << operandIndex << "-";
+    
+    if (EDTokenIsWhitespace(token)) {
+      outs() << "w";
+    } else if (EDTokenIsPunctuation(token)) {
+      outs() << "p";
+    } else if (EDTokenIsOpcode(token)) {
+      outs() << "o";
+    } else if (EDTokenIsLiteral(token)) {
+      outs() << "l";
+    } else if (EDTokenIsRegister(token)) {
+      outs() << "r";
+    } else {
+      outs() << "?";
+    }
+    
+    outs() << ":" << buf;
+  
+    if (EDTokenIsLiteral(token)) {
+      outs() << "=";
+      if (EDTokenIsNegativeLiteral(token))
+        outs() << "-";
+      uint64_t absoluteValue;
+      if (EDLiteralTokenAbsoluteValue(&absoluteValue, token)) {
+        errs() << "error: Couldn't get the value of a literal token\n";
+        return -1;
+      }
+      outs() << absoluteValue;
+    } else if (EDTokenIsRegister(token)) {
+      outs() << "=";
+      unsigned regID;
+      if (EDRegisterTokenValue(&regID, token)) {
+        errs() << "error: Couldn't get the ID of a register token\n";
+        return -1;
+      }
+      outs() << "r" << regID;
+    }
+    
+    outs() << "]";
+  }
+  
+  outs() << " ";
+  
+  int numOperands = EDNumOperands(inst);
+  
+  if (numOperands < 0) {
+    errs() << "error: Couldn't count operands\n";
+    return -1;
+  }
+  
+  int operandIndex;
+  
+  for (operandIndex = 0; operandIndex < numOperands; ++operandIndex) {
+    outs() << operandIndex << ":";
+    
+    EDOperandRef operand;
+    
+    if (EDGetOperand(&operand,
+                     inst,
+                     operandIndex)) {
+      errs() << "error: Couldn't get operand\n";
+      return -1;
+    }
+    
+    uint64_t evaluatedResult;
+    
+    EDEvaluateOperand(&evaluatedResult, 
+                      operand, 
+                      verboseEvaluator, 
+                      &disassembler);
+    
+    outs() << " ";
+  }
+  
+  outs() << "\n";
+  
+  return 0;
+}
+
diff --git a/tools/llvm-mc/Disassembler.h b/tools/llvm-mc/Disassembler.h
index 78c2f854946d..3da23965bdb1 100644
--- a/tools/llvm-mc/Disassembler.h
+++ b/tools/llvm-mc/Disassembler.h
@@ -27,6 +27,9 @@ public:
   static int disassemble(const Target &target, 
                          const std::string &tripleString,
                          MemoryBuffer &buffer);
+  
+  static int disassembleEnhanced(const std::string &tripleString,
+                                 MemoryBuffer &buffer);
 };
   
 } // namespace llvm
diff --git a/tools/llvm-mc/Makefile b/tools/llvm-mc/Makefile
index 5b0fe3f54462..f92e643c3323 100644
--- a/tools/llvm-mc/Makefile
+++ b/tools/llvm-mc/Makefile
@@ -22,3 +22,6 @@ include $(LEVEL)/Makefile.config
 LINK_COMPONENTS := $(TARGETS_TO_BUILD) MCParser MC support
 
 include $(LLVM_SRC_ROOT)/Makefile.rules
+
+# Using LIBS instead of USEDLIBS to force static linking
+LIBS += $(LLVMLibDir)/libEnhancedDisassembly.a
diff --git a/tools/llvm-mc/llvm-mc.cpp b/tools/llvm-mc/llvm-mc.cpp
index c6657f5d3e74..47f67c5e1bbe 100644
--- a/tools/llvm-mc/llvm-mc.cpp
+++ b/tools/llvm-mc/llvm-mc.cpp
@@ -97,7 +97,8 @@ NoInitialTextSection("n", cl::desc(
 enum ActionType {
   AC_AsLex,
   AC_Assemble,
-  AC_Disassemble
+  AC_Disassemble,
+  AC_EDisassemble
 };
 
 static cl::opt<ActionType>
@@ -109,6 +110,8 @@ Action(cl::desc("Action to perform:"),
                              "Assemble a .s file (default)"),
                   clEnumValN(AC_Disassemble, "disassemble",
                              "Disassemble strings of hex bytes"),
+                  clEnumValN(AC_EDisassemble, "edis",
+                             "Enhanced disassembly of strings of hex bytes"),
                   clEnumValEnd));
 
 static const Target *GetTarget(const char *ProgName) {
@@ -325,7 +328,7 @@ static int AssembleInput(const char *ProgName) {
   return Res;
 }
 
-static int DisassembleInput(const char *ProgName) {
+static int DisassembleInput(const char *ProgName, bool Enhanced) {
   const Target *TheTarget = GetTarget(ProgName);
   if (!TheTarget)
     return 0;
@@ -344,7 +347,10 @@ static int DisassembleInput(const char *ProgName) {
     return 1;
   }
   
-  return Disassembler::disassemble(*TheTarget, TripleName, *Buffer);
+  if (Enhanced)
+    return Disassembler::disassembleEnhanced(TripleName, *Buffer);
+  else
+    return Disassembler::disassemble(*TheTarget, TripleName, *Buffer);
 }
 
 
@@ -371,7 +377,9 @@ int main(int argc, char **argv) {
   case AC_Assemble:
     return AssembleInput(argv[0]);
   case AC_Disassemble:
-    return DisassembleInput(argv[0]);
+    return DisassembleInput(argv[0], false);
+  case AC_EDisassemble:
+    return DisassembleInput(argv[0], true);
   }
   
   return 0;
diff --git a/tools/llvmc/plugins/Base/Base.td.in b/tools/llvmc/plugins/Base/Base.td.in
index 3c4f4e959539..23f46b7e57d0 100644
--- a/tools/llvmc/plugins/Base/Base.td.in
+++ b/tools/llvmc/plugins/Base/Base.td.in
@@ -244,7 +244,7 @@ def llc : Tool<
 [(in_language ["llvm-bitcode", "llvm-assembler"]),
  (out_language "assembler"),
  (output_suffix "s"),
- (command "llc -f"),
+ (command "llc"),
  (actions (case
           (switch_on "S"), (stop_compilation),
           (switch_on "O0"), (forward "O0"),
@@ -263,7 +263,7 @@ def llc : Tool<
 
 // Base class for linkers
 class llvm_gcc_based_linker <string cmd_prefix> : Tool<
-[(in_language "object-code"),
+[(in_language ["object-code", "static-library"]),
  (out_language "executable"),
  (output_suffix "out"),
  (command cmd_prefix),
@@ -317,6 +317,7 @@ def LanguageMap : LanguageMap<
      LangToSuffixes<"llvm-assembler", ["ll"]>,
      LangToSuffixes<"llvm-bitcode", ["bc"]>,
      LangToSuffixes<"object-code", ["o", "*empty*"]>,
+     LangToSuffixes<"static-library", ["a", "lib"]>,
      LangToSuffixes<"executable", ["out"]>
      ]>;
 
diff --git a/tools/lto/LTOCodeGenerator.cpp b/tools/lto/LTOCodeGenerator.cpp
index bc65b3afc28a..10105921c4eb 100644
--- a/tools/lto/LTOCodeGenerator.cpp
+++ b/tools/lto/LTOCodeGenerator.cpp
@@ -328,7 +328,7 @@ void LTOCodeGenerator::applyScopeRestrictions() {
     }
     for (Module::global_iterator v = mergedModule->global_begin(), 
          e = mergedModule->global_end(); v !=  e; ++v) {
-      if (v->isDeclaration() &&
+      if (!v->isDeclaration() &&
           _mustPreserveSymbols.count(mangler.getNameWithPrefix(v)))
         mustPreserveList.push_back(::strdup(v->getNameStr().c_str()));
     }
@@ -353,24 +353,10 @@ bool LTOCodeGenerator::generateAssemblyCode(formatted_raw_ostream& out,
 
     Module* mergedModule = _linker.getModule();
 
-    // If target supports exception handling then enable it now.
-    switch (_target->getMCAsmInfo()->getExceptionHandlingType()) {
-    case ExceptionHandling::Dwarf:
-      llvm::DwarfExceptionHandling = true;
-      break;
-    case ExceptionHandling::SjLj:
-      llvm::SjLjExceptionHandling = true;
-      break;
-    case ExceptionHandling::None:
-      break;
-    default:
-      assert (0 && "Unknown exception handling model!");
-    }
-
     // if options were requested, set them
     if ( !_codegenOptions.empty() )
         cl::ParseCommandLineOptions(_codegenOptions.size(), 
-                                                (char**)&_codegenOptions[0]);
+                                    const_cast<char **>(&_codegenOptions[0]));
 
     // Instantiate the pass manager to organize the passes.
     PassManager passes;
diff --git a/tools/lto/Makefile b/tools/lto/Makefile
index 3120aa52c953..8d57333f9c0c 100644
--- a/tools/lto/Makefile
+++ b/tools/lto/Makefile
@@ -10,6 +10,8 @@
 LEVEL = ../..
 LIBRARYNAME = LTO
 
+EXPORTED_SYMBOL_FILE = $(PROJ_SRC_DIR)/lto.exports
+
 # Include this here so we can get the configuration of the targets
 # that have been configured for construction. We have to do this 
 # early so we can set up LINK_COMPONENTS before including Makefile.rules
@@ -32,7 +34,6 @@ ifeq ($(HOST_OS),Darwin)
     # extra options to override libtool defaults 
     LLVMLibsOptions    := $(LLVMLibsOptions)  \
                          -avoid-version \
-                         -Wl,-exported_symbols_list -Wl,$(PROJ_SRC_DIR)/lto.exports \
                          -Wl,-dead_strip \
                          -Wl,-seg1addr -Wl,0xE0000000 
 
diff --git a/tools/lto/lto.exports b/tools/lto/lto.exports
index 01f43d1c36da..9011cf602b1a 100644
--- a/tools/lto/lto.exports
+++ b/tools/lto/lto.exports
@@ -1,24 +1,23 @@
-_lto_get_error_message
-_lto_get_version
-_lto_module_create
-_lto_module_create_from_memory
-_lto_module_get_num_symbols
-_lto_module_get_symbol_attribute
-_lto_module_get_symbol_name
-_lto_module_get_target_triple
-_lto_module_is_object_file
-_lto_module_is_object_file_for_target
-_lto_module_is_object_file_in_memory
-_lto_module_is_object_file_in_memory_for_target
-_lto_module_dispose
-_lto_codegen_add_module
-_lto_codegen_add_must_preserve_symbol
-_lto_codegen_compile
-_lto_codegen_create
-_lto_codegen_dispose
-_lto_codegen_set_debug_model
-_lto_codegen_set_pic_model
-_lto_codegen_write_merged_modules
-_lto_codegen_debug_options
-_lto_codegen_set_assembler_path
-
+lto_get_error_message
+lto_get_version
+lto_module_create
+lto_module_create_from_memory
+lto_module_get_num_symbols
+lto_module_get_symbol_attribute
+lto_module_get_symbol_name
+lto_module_get_target_triple
+lto_module_is_object_file
+lto_module_is_object_file_for_target
+lto_module_is_object_file_in_memory
+lto_module_is_object_file_in_memory_for_target
+lto_module_dispose
+lto_codegen_add_module
+lto_codegen_add_must_preserve_symbol
+lto_codegen_compile
+lto_codegen_create
+lto_codegen_dispose
+lto_codegen_set_debug_model
+lto_codegen_set_pic_model
+lto_codegen_write_merged_modules
+lto_codegen_debug_options
+lto_codegen_set_assembler_path
diff --git a/tools/opt/AnalysisWrappers.cpp b/tools/opt/AnalysisWrappers.cpp
index 18360f837e93..f548d007d78e 100644
--- a/tools/opt/AnalysisWrappers.cpp
+++ b/tools/opt/AnalysisWrappers.cpp
@@ -22,7 +22,6 @@
 #include "llvm/Support/CallSite.h"
 #include "llvm/Analysis/CallGraph.h"
 #include "llvm/Support/raw_ostream.h"
-#include <iostream>
 using namespace llvm;
 
 namespace {
diff --git a/tools/opt/GraphPrinters.cpp b/tools/opt/GraphPrinters.cpp
index bbf8d122e7c7..86f99324b953 100644
--- a/tools/opt/GraphPrinters.cpp
+++ b/tools/opt/GraphPrinters.cpp
@@ -19,12 +19,11 @@
 #include "llvm/Value.h"
 #include "llvm/Analysis/CallGraph.h"
 #include "llvm/Analysis/Dominators.h"
-#include <iostream>
-#include <fstream>
+#include "llvm/Support/raw_ostream.h"
 using namespace llvm;
 
 template<typename GraphType>
-static void WriteGraphToFile(std::ostream &O, const std::string &GraphName,
+static void WriteGraphToFile(raw_ostream &O, const std::string &GraphName,
                              const GraphType &GT) {
   std::string Filename = GraphName + ".dot";
   O << "Writing '" << Filename << "'...";
@@ -69,7 +68,7 @@ namespace {
     CallGraphPrinter() : ModulePass(&ID) {}
 
     virtual bool runOnModule(Module &M) {
-      WriteGraphToFile(std::cerr, "callgraph", &getAnalysis<CallGraph>());
+      WriteGraphToFile(llvm::errs(), "callgraph", &getAnalysis<CallGraph>());
       return false;
     }
 
diff --git a/tools/opt/opt.cpp b/tools/opt/opt.cpp
index 311f67173f37..ff19942ead89 100644
--- a/tools/opt/opt.cpp
+++ b/tools/opt/opt.cpp
@@ -141,15 +141,14 @@ struct CallGraphSCCPassPrinter : public CallGraphSCCPass {
   CallGraphSCCPassPrinter(const PassInfo *PI) :
     CallGraphSCCPass(&ID), PassToPrint(PI) {}
 
-  virtual bool runOnSCC(std::vector<CallGraphNode *>&SCC) {
+  virtual bool runOnSCC(CallGraphSCC &SCC) {
     if (!Quiet) {
       outs() << "Printing analysis '" << PassToPrint->getPassName() << "':\n";
 
-      for (unsigned i = 0, e = SCC.size(); i != e; ++i) {
-        Function *F = SCC[i]->getFunction();
-        if (F) {
+      for (CallGraphSCC::iterator I = SCC.begin(), E = SCC.end(); I != E; ++I) {
+        Function *F = (*I)->getFunction();
+        if (F)
           getAnalysisID<Pass>(PassToPrint).print(outs(), F->getParent());
-        }
       }
     }
     // Get and print pass...
diff --git a/unittests/ADT/BitVectorTest.cpp b/unittests/ADT/BitVectorTest.cpp
index 4fe11c1d17ab..a9fc133c72e2 100644
--- a/unittests/ADT/BitVectorTest.cpp
+++ b/unittests/ADT/BitVectorTest.cpp
@@ -7,7 +7,9 @@
 //
 //===----------------------------------------------------------------------===//
 
-#ifndef XFAIL
+// Some of these tests fail on PowerPC for unknown reasons.
+#ifndef __ppc__
+
 #include "llvm/ADT/BitVector.h"
 #include "gtest/gtest.h"
 
@@ -56,7 +58,7 @@ TEST(BitVectorTest, TrivialOperation) {
   Vec.resize(26, true);
   Vec.resize(29, false);
   Vec.resize(33, true);
-  Vec.resize(61, false);
+  Vec.resize(57, false);
   unsigned Count = 0;
   for (unsigned i = Vec.find_first(); i != -1u; i = Vec.find_next(i)) {
     ++Count;
@@ -67,7 +69,8 @@ TEST(BitVectorTest, TrivialOperation) {
   EXPECT_EQ(Count, 23u);
   EXPECT_FALSE(Vec[0]);
   EXPECT_TRUE(Vec[32]);
-  EXPECT_FALSE(Vec[60]);
+  EXPECT_FALSE(Vec[56]);
+  Vec.resize(61, false);
 
   BitVector Copy = Vec;
   BitVector Alt(3, false);
@@ -177,6 +180,15 @@ TEST(BitVectorTest, CompoundAssignment) {
   EXPECT_EQ(100U, A.size());
 }
 
+TEST(BitVectorTest, ProxyIndex) {
+  BitVector Vec(3);
+  EXPECT_TRUE(Vec.none());
+  Vec[0] = Vec[1] = Vec[2] = true;
+  EXPECT_EQ(Vec.size(), Vec.count());
+  Vec[2] = Vec[1] = Vec[0] = false;
+  EXPECT_TRUE(Vec.none());
+}
+
 }
 
 #endif
diff --git a/unittests/ADT/SmallBitVectorTest.cpp b/unittests/ADT/SmallBitVectorTest.cpp
index a2cc652ca1d4..9c69aad5a973 100644
--- a/unittests/ADT/SmallBitVectorTest.cpp
+++ b/unittests/ADT/SmallBitVectorTest.cpp
@@ -55,7 +55,7 @@ TEST(SmallBitVectorTest, TrivialOperation) {
   Vec.resize(26, true);
   Vec.resize(29, false);
   Vec.resize(33, true);
-  Vec.resize(61, false);
+  Vec.resize(57, false);
   unsigned Count = 0;
   for (unsigned i = Vec.find_first(); i != -1u; i = Vec.find_next(i)) {
     ++Count;
@@ -66,7 +66,8 @@ TEST(SmallBitVectorTest, TrivialOperation) {
   EXPECT_EQ(Count, 23u);
   EXPECT_FALSE(Vec[0]);
   EXPECT_TRUE(Vec[32]);
-  EXPECT_FALSE(Vec[60]);
+  EXPECT_FALSE(Vec[56]);
+  Vec.resize(61, false);
 
   SmallBitVector Copy = Vec;
   SmallBitVector Alt(3, false);
@@ -176,4 +177,13 @@ TEST(SmallBitVectorTest, CompoundAssignment) {
   EXPECT_EQ(100U, A.size());
 }
 
+TEST(SmallBitVectorTest, ProxyIndex) {
+  SmallBitVector Vec(3);
+  EXPECT_TRUE(Vec.none());
+  Vec[0] = Vec[1] = Vec[2] = true;
+  EXPECT_EQ(Vec.size(), Vec.count());
+  Vec[2] = Vec[1] = Vec[0] = false;
+  EXPECT_TRUE(Vec.none());
+}
+
 }
diff --git a/unittests/ADT/ValueMapTest.cpp b/unittests/ADT/ValueMapTest.cpp
index 451e30a7434b..2fc093860102 100644
--- a/unittests/ADT/ValueMapTest.cpp
+++ b/unittests/ADT/ValueMapTest.cpp
@@ -61,9 +61,9 @@ TYPED_TEST(ValueMapTest, FollowsValue) {
 
 TYPED_TEST(ValueMapTest, OperationsWork) {
   ValueMap<TypeParam*, int> VM;
-  ValueMap<TypeParam*, int> VM2(16);
+  ValueMap<TypeParam*, int> VM2(16);  (void)VM2;
   typename ValueMapConfig<TypeParam*>::ExtraData Data;
-  ValueMap<TypeParam*, int> VM3(Data, 16);
+  ValueMap<TypeParam*, int> VM3(Data, 16);  (void)VM3;
   EXPECT_TRUE(VM.empty());
 
   VM[this->BitcastV.get()] = 7;
@@ -128,7 +128,7 @@ TYPED_TEST(ValueMapTest, Iteration) {
   for (typename ValueMap<TypeParam*, int>::iterator I = VM.begin(), E = VM.end();
        I != E; ++I) {
     ++size;
-    std::pair<TypeParam*, int> value = *I;
+    std::pair<TypeParam*, int> value = *I; (void)value;
     CompileAssertHasType<TypeParam*>(I->first);
     if (I->second == 2) {
       EXPECT_EQ(this->BitcastV.get(), I->first);
@@ -150,7 +150,7 @@ TYPED_TEST(ValueMapTest, Iteration) {
   for (typename ValueMap<TypeParam*, int>::const_iterator I = CVM.begin(),
          E = CVM.end(); I != E; ++I) {
     ++size;
-    std::pair<TypeParam*, int> value = *I;
+    std::pair<TypeParam*, int> value = *I;  (void)value;
     CompileAssertHasType<TypeParam*>(I->first);
     if (I->second == 5) {
       EXPECT_EQ(this->BitcastV.get(), I->first);
diff --git a/unittests/ExecutionEngine/JIT/JITMemoryManagerTest.cpp b/unittests/ExecutionEngine/JIT/JITMemoryManagerTest.cpp
index aa0c41d3ed2a..ff5af3b72d4a 100644
--- a/unittests/ExecutionEngine/JIT/JITMemoryManagerTest.cpp
+++ b/unittests/ExecutionEngine/JIT/JITMemoryManagerTest.cpp
@@ -239,8 +239,8 @@ TEST(JITMemoryManagerTest, TestManyGlobals) {
   size_t Size = 128;
   int Iters = (SlabSize / Size) + 1;
 
-  // We should start with one slab.
-  EXPECT_EQ(1U, MemMgr->GetNumDataSlabs());
+  // We should start with no slabs.
+  EXPECT_EQ(0U, MemMgr->GetNumDataSlabs());
 
   // After allocating a bunch of globals, we should have two.
   for (int I = 0; I < Iters; ++I)
@@ -262,8 +262,8 @@ TEST(JITMemoryManagerTest, TestManyStubs) {
   size_t Size = 128;
   int Iters = (SlabSize / Size) + 1;
 
-  // We should start with one slab.
-  EXPECT_EQ(1U, MemMgr->GetNumStubSlabs());
+  // We should start with no slabs.
+  EXPECT_EQ(0U, MemMgr->GetNumDataSlabs());
 
   // After allocating a bunch of stubs, we should have two.
   for (int I = 0; I < Iters; ++I)
diff --git a/unittests/VMCore/PassManagerTest.cpp b/unittests/VMCore/PassManagerTest.cpp
index bc21298ef200..4b38910b11af 100644
--- a/unittests/VMCore/PassManagerTest.cpp
+++ b/unittests/VMCore/PassManagerTest.cpp
@@ -154,7 +154,7 @@ namespace llvm {
 
     struct CGPass : public PassTest<CallGraph, CallGraphSCCPass> {
     public:
-      virtual bool runOnSCC(std::vector<CallGraphNode*> &SCMM) {
+      virtual bool runOnSCC(CallGraphSCC &SCMM) {
         EXPECT_TRUE(getAnalysisIfAvailable<TargetData>());
         run();
         return false;
diff --git a/utils/TableGen/ARMDecoderEmitter.cpp b/utils/TableGen/ARMDecoderEmitter.cpp
index 5fb8b6bfb232..c879a54b3554 100644
--- a/utils/TableGen/ARMDecoderEmitter.cpp
+++ b/utils/TableGen/ARMDecoderEmitter.cpp
@@ -623,7 +623,7 @@ void Filter::recurse() {
     assert(FilterChooserMap.size() == 1);
     return;
   }
-        
+
   // Otherwise, create sub choosers.
   for (mapIterator = FilteredInstructions.begin();
        mapIterator != FilteredInstructions.end();
@@ -631,7 +631,7 @@ void Filter::recurse() {
 
     // Marks all the segment positions with either BIT_TRUE or BIT_FALSE.
     for (bitIndex = 0; bitIndex < NumBits; bitIndex++) {
-      if (mapIterator->first & (1 << bitIndex))
+      if (mapIterator->first & (1ULL << bitIndex))
         BitValueArray[StartBit + bitIndex] = BIT_TRUE;
       else
         BitValueArray[StartBit + bitIndex] = BIT_FALSE;
@@ -853,7 +853,7 @@ bool FilterChooser::fieldFromInsn(uint64_t &Field, insn_t &Insn,
       return false;
 
     if (Insn[StartBit + i] == BIT_TRUE)
-      Field = Field | (1 << i);
+      Field = Field | (1ULL << i);
   }
 
   return true;
@@ -1344,23 +1344,8 @@ void FilterChooser::doFilter() {
     return;
 
   // If we come to here, the instruction decoding has failed.
-  // Print out the instructions in the conflict set...
+  // Set the BestIndex to -1 to indicate so.
   BestIndex = -1;
-
-  DEBUG({
-      errs() << "Conflict:\n";
-
-      dumpStack(errs(), "\t\t");
-
-      for (unsigned i = 0; i < Num; i++) {
-        const std::string &Name = nameWithID(Opcodes[i]);
-
-        errs() << '\t' << Name << " ";
-        dumpBits(errs(),
-                 getBitsField(*AllInstructions[Opcodes[i]]->TheDef, "Inst"));
-        errs() << '\n';
-      }
-    });
 }
 
 // Emits code to decode our share of instructions.  Returns true if the
@@ -1465,7 +1450,9 @@ bool FilterChooser::emit(raw_ostream &o, unsigned &Indentation) {
 
     // Otherwise, it does not belong to the known conflict sets.
   }
-  // We don't know how to decode these instructions!  Dump the conflict set!
+
+  // We don't know how to decode these instructions!  Return 0 and dump the
+  // conflict set!
   o.indent(Indentation) << "return 0;" << " // Conflict set: ";
   for (int i = 0, N = Opcodes.size(); i < N; ++i) {
     o << nameWithID(Opcodes[i]);
@@ -1474,6 +1461,21 @@ bool FilterChooser::emit(raw_ostream &o, unsigned &Indentation) {
     else
       o << '\n';
   }
+
+  // Print out useful conflict information for postmortem analysis.
+  errs() << "Decoding Conflict:\n";
+
+  dumpStack(errs(), "\t\t");
+
+  for (unsigned i = 0; i < Opcodes.size(); i++) {
+    const std::string &Name = nameWithID(Opcodes[i]);
+
+    errs() << '\t' << Name << " ";
+    dumpBits(errs(),
+             getBitsField(*AllInstructions[Opcodes[i]]->TheDef, "Inst"));
+    errs() << '\n';
+  }
+
   return true;
 }
 
@@ -1548,6 +1550,16 @@ bool ARMDecoderEmitter::ARMDEBackend::populateInstruction(
   const StringRef Name = Def.getName();
   uint8_t Form = getByteField(Def, "Form");
 
+  BitsInit &Bits = getBitsField(Def, "Inst");
+
+  // If all the bit positions are not specified; do not decode this instruction.
+  // We are bound to fail!  For proper disassembly, the well-known encoding bits
+  // of the instruction must be fully specified.
+  //
+  // This also removes pseudo instructions from considerations of disassembly,
+  // which is a better design and less fragile than the name matchings.
+  if (Bits.allInComplete()) return false;
+
   if (TN == TARGET_ARM) {
     // FIXME: what about Int_MemBarrierV6 and Int_SyncBarrierV6?
     if ((Name != "Int_MemBarrierV7" && Name != "Int_SyncBarrierV7") &&
@@ -1668,11 +1680,6 @@ bool ARMDecoderEmitter::ARMDEBackend::populateInstruction(
     if (!thumbInstruction(Form))
       return false;
 
-    // Ignore pseudo instructions.
-    if (Name == "tInt_eh_sjlj_setjmp" || Name == "t2Int_eh_sjlj_setjmp" ||
-        Name == "t2MOVi32imm" || Name == "tBX" || Name == "tBXr9")
-      return false;
-
     // On Darwin R9 is call-clobbered.  Ignore the non-Darwin counterparts.
     if (Name == "tBL" || Name == "tBLXi" || Name == "tBLXr")
       return false;
@@ -1692,8 +1699,12 @@ bool ARMDecoderEmitter::ARMDEBackend::populateInstruction(
     // Ignore tADDrSP, tADDspr, and tPICADD, prefer the generic tADDhirr.
     // Ignore t2SUBrSPs, prefer the t2SUB[S]r[r|s].
     // Ignore t2ADDrSPs, prefer the t2ADD[S]r[r|s].
+    // Ignore t2ADDrSPi/t2SUBrSPi, which have more generic couterparts.
+    // Ignore t2ADDrSPi12/t2SUBrSPi12, which have more generic couterparts
     if (Name == "tADDrSP" || Name == "tADDspr" || Name == "tPICADD" ||
-        Name == "t2SUBrSPs" || Name == "t2ADDrSPs")
+        Name == "t2SUBrSPs" || Name == "t2ADDrSPs" ||
+        Name == "t2ADDrSPi" || Name == "t2SUBrSPi" ||
+        Name == "t2ADDrSPi12" || Name == "t2SUBrSPi12")
       return false;
 
     // Ignore t2LDRDpci, prefer the generic t2LDRDi8, t2LDRD_PRE, t2LDRD_POST.
@@ -1716,7 +1727,6 @@ bool ARMDecoderEmitter::ARMDEBackend::populateInstruction(
     //   tLDRcp conflicts with tLDRspi
     //   tRestore conflicts with tLDRspi
     //   t2LEApcrelJT conflicts with t2LEApcrel
-    //   t2ADDrSPi/t2SUBrSPi have more generic couterparts
     if (Name == "tBfar" ||
         /* Name == "tCMNz" || */ Name == "tCMPzi8" || Name == "tCMPzr" ||
         Name == "tCMPzhir" || /* Name == "t2CMNzrr" || Name == "t2CMNzrs" ||
@@ -1724,7 +1734,7 @@ bool ARMDecoderEmitter::ARMDEBackend::populateInstruction(
         Name == "t2CMPzri" || Name == "tPOP_RET" || Name == "t2LDM_RET" ||
         Name == "tMOVCCi" || Name == "tMOVCCr" || Name == "tBR_JTr" ||
         Name == "tSpill" || Name == "tLDRcp" || Name == "tRestore" ||
-        Name == "t2LEApcrelJT" || Name == "t2ADDrSPi" || Name == "t2SUBrSPi")
+        Name == "t2LEApcrelJT")
       return false;
   }
 
@@ -1737,8 +1747,6 @@ bool ARMDecoderEmitter::ARMDEBackend::populateInstruction(
   }
 
   DEBUG({
-      BitsInit &Bits = getBitsField(Def, "Inst");
-
       errs() << " ";
 
       // Dumps the instruction encoding bits.
diff --git a/utils/TableGen/AsmMatcherEmitter.cpp b/utils/TableGen/AsmMatcherEmitter.cpp
index e5c068bcdf63..1947824cbf88 100644
--- a/utils/TableGen/AsmMatcherEmitter.cpp
+++ b/utils/TableGen/AsmMatcherEmitter.cpp
@@ -1564,10 +1564,14 @@ void AsmMatcherEmitter::run(raw_ostream &OS) {
          Info.Instructions.begin(), ie = Info.Instructions.end();
        it != ie; ++it)
     MaxNumOperands = std::max(MaxNumOperands, (*it)->Operands.size());
-  
-  OS << "bool " << Target.getName() << ClassName
-     << "::\nMatchInstruction(const SmallVectorImpl<MCParsedAsmOperand*> "
-        "&Operands,\n                 MCInst &Inst) {\n";
+
+  const std::string &MatchName =
+    AsmParser->getValueAsString("MatchInstructionName");
+  OS << "bool " << Target.getName() << ClassName << "::\n"
+     << MatchName
+     << "(const SmallVectorImpl<MCParsedAsmOperand*> &Operands,\n";
+  OS.indent(MatchName.size() + 1);
+  OS << "MCInst &Inst) {\n";
 
   // Emit the static match table; unused classes get initalized to 0 which is
   // guaranteed to be InvalidMatchClass.
diff --git a/utils/TableGen/CodeEmitterGen.cpp b/utils/TableGen/CodeEmitterGen.cpp
index 641c22433a19..2a2a4ef63e16 100644
--- a/utils/TableGen/CodeEmitterGen.cpp
+++ b/utils/TableGen/CodeEmitterGen.cpp
@@ -35,7 +35,8 @@ void CodeEmitterGen::reverseBits(std::vector<Record*> &Insts) {
         R->getName() == "IMPLICIT_DEF" ||
         R->getName() == "SUBREG_TO_REG" ||
         R->getName() == "COPY_TO_REGCLASS" ||
-        R->getName() == "DBG_VALUE") continue;
+        R->getName() == "DBG_VALUE" ||
+        R->getName() == "REG_SEQUENCE") continue;
 
     BitsInit *BI = R->getValueAsBitsInit("Inst");
 
@@ -113,7 +114,8 @@ void CodeEmitterGen::run(raw_ostream &o) {
         R->getName() == "IMPLICIT_DEF" ||
         R->getName() == "SUBREG_TO_REG" ||
         R->getName() == "COPY_TO_REGCLASS" ||
-        R->getName() == "DBG_VALUE") {
+        R->getName() == "DBG_VALUE" ||
+        R->getName() == "REG_SEQUENCE") {
       o << "    0U,\n";
       continue;
     }
@@ -152,7 +154,8 @@ void CodeEmitterGen::run(raw_ostream &o) {
         InstName == "IMPLICIT_DEF" ||
         InstName == "SUBREG_TO_REG" ||
         InstName == "COPY_TO_REGCLASS" ||
-        InstName == "DBG_VALUE") continue;
+        InstName == "DBG_VALUE" ||
+        InstName == "REG_SEQUENCE") continue;
 
     BitsInit *BI = R->getValueAsBitsInit("Inst");
     const std::vector<RecordVal> &Vals = R->getValues();
@@ -249,7 +252,7 @@ void CodeEmitterGen::run(raw_ostream &o) {
     << "    std::string msg;\n"
     << "    raw_string_ostream Msg(msg);\n"
     << "    Msg << \"Not supported instr: \" << MI;\n"
-    << "    llvm_report_error(Msg.str());\n"
+    << "    report_fatal_error(Msg.str());\n"
     << "  }\n"
     << "  return Value;\n"
     << "}\n\n";
diff --git a/utils/TableGen/CodeGenDAGPatterns.cpp b/utils/TableGen/CodeGenDAGPatterns.cpp
index d2c0195c6dcf..a0bccfc5e57b 100644
--- a/utils/TableGen/CodeGenDAGPatterns.cpp
+++ b/utils/TableGen/CodeGenDAGPatterns.cpp
@@ -1992,16 +1992,13 @@ void CodeGenDAGPatterns::ParseDefaultOperands() {
 /// HandleUse - Given "Pat" a leaf in the pattern, check to see if it is an
 /// instruction input.  Return true if this is a real use.
 static bool HandleUse(TreePattern *I, TreePatternNode *Pat,
-                      std::map<std::string, TreePatternNode*> &InstInputs,
-                      std::vector<Record*> &InstImpInputs) {
+                      std::map<std::string, TreePatternNode*> &InstInputs) {
   // No name -> not interesting.
   if (Pat->getName().empty()) {
     if (Pat->isLeaf()) {
       DefInit *DI = dynamic_cast<DefInit*>(Pat->getLeafValue());
       if (DI && DI->getDef()->isSubClassOf("RegisterClass"))
         I->error("Input " + DI->getDef()->getName() + " must be named!");
-      else if (DI && DI->getDef()->isSubClassOf("Register")) 
-        InstImpInputs.push_back(DI->getDef());
     }
     return false;
   }
@@ -2047,10 +2044,9 @@ void CodeGenDAGPatterns::
 FindPatternInputsAndOutputs(TreePattern *I, TreePatternNode *Pat,
                             std::map<std::string, TreePatternNode*> &InstInputs,
                             std::map<std::string, TreePatternNode*>&InstResults,
-                            std::vector<Record*> &InstImpInputs,
                             std::vector<Record*> &InstImpResults) {
   if (Pat->isLeaf()) {
-    bool isUse = HandleUse(I, Pat, InstInputs, InstImpInputs);
+    bool isUse = HandleUse(I, Pat, InstInputs);
     if (!isUse && Pat->getTransformFn())
       I->error("Cannot specify a transform function for a non-input value!");
     return;
@@ -2077,12 +2073,12 @@ FindPatternInputsAndOutputs(TreePattern *I, TreePatternNode *Pat,
       if (Pat->getChild(i)->getNumTypes() == 0)
         I->error("Cannot have void nodes inside of patterns!");
       FindPatternInputsAndOutputs(I, Pat->getChild(i), InstInputs, InstResults,
-                                  InstImpInputs, InstImpResults);
+                                  InstImpResults);
     }
     
     // If this is a non-leaf node with no children, treat it basically as if
     // it were a leaf.  This handles nodes like (imm).
-    bool isUse = HandleUse(I, Pat, InstInputs, InstImpInputs);
+    bool isUse = HandleUse(I, Pat, InstInputs);
     
     if (!isUse && Pat->getTransformFn())
       I->error("Cannot specify a transform function for a non-input value!");
@@ -2123,8 +2119,7 @@ FindPatternInputsAndOutputs(TreePattern *I, TreePatternNode *Pat,
     
   // Verify and collect info from the computation.
   FindPatternInputsAndOutputs(I, Pat->getChild(NumDests),
-                              InstInputs, InstResults,
-                              InstImpInputs, InstImpResults);
+                              InstInputs, InstResults, InstImpResults);
 }
 
 //===----------------------------------------------------------------------===//
@@ -2297,10 +2292,8 @@ void CodeGenDAGPatterns::ParseInstructions() {
       
       // Create and insert the instruction.
       std::vector<Record*> ImpResults;
-      std::vector<Record*> ImpOperands;
       Instructions.insert(std::make_pair(Instrs[i], 
-                          DAGInstruction(0, Results, Operands, ImpResults,
-                                         ImpOperands)));
+                          DAGInstruction(0, Results, Operands, ImpResults)));
       continue;  // no pattern.
     }
     
@@ -2322,7 +2315,6 @@ void CodeGenDAGPatterns::ParseInstructions() {
     // in the instruction, including what reg class they are.
     std::map<std::string, TreePatternNode*> InstResults;
 
-    std::vector<Record*> InstImpInputs;
     std::vector<Record*> InstImpResults;
     
     // Verify that the top-level forms in the instruction are of void type, and
@@ -2335,7 +2327,7 @@ void CodeGenDAGPatterns::ParseInstructions() {
 
       // Find inputs and outputs, and verify the structure of the uses/defs.
       FindPatternInputsAndOutputs(I, Pat, InstInputs, InstResults,
-                                  InstImpInputs, InstImpResults);
+                                  InstImpResults);
     }
 
     // Now that we have inputs and outputs of the pattern, inspect the operands
@@ -2445,9 +2437,8 @@ void CodeGenDAGPatterns::ParseInstructions() {
       ResultPattern->setType(i, Res0Node->getExtType(i));
 
     // Create and insert the instruction.
-    // FIXME: InstImpResults and InstImpInputs should not be part of
-    // DAGInstruction.
-    DAGInstruction TheInst(I, Results, Operands, InstImpResults, InstImpInputs);
+    // FIXME: InstImpResults should not be part of DAGInstruction.
+    DAGInstruction TheInst(I, Results, Operands, InstImpResults);
     Instructions.insert(std::make_pair(I->getRecord(), TheInst));
 
     // Use a temporary tree pattern to infer all types and make sure that the
@@ -2682,12 +2673,11 @@ void CodeGenDAGPatterns::ParsePatterns() {
     // Validate that the input pattern is correct.
     std::map<std::string, TreePatternNode*> InstInputs;
     std::map<std::string, TreePatternNode*> InstResults;
-    std::vector<Record*> InstImpInputs;
     std::vector<Record*> InstImpResults;
     for (unsigned j = 0, ee = Pattern->getNumTrees(); j != ee; ++j)
       FindPatternInputsAndOutputs(Pattern, Pattern->getTree(j),
                                   InstInputs, InstResults,
-                                  InstImpInputs, InstImpResults);
+                                  InstImpResults);
 
     // Promote the xform function to be an explicit node if set.
     TreePatternNode *DstPattern = Result->getOnlyTree();
diff --git a/utils/TableGen/CodeGenDAGPatterns.h b/utils/TableGen/CodeGenDAGPatterns.h
index 548051300dc6..0a1362ab2494 100644
--- a/utils/TableGen/CodeGenDAGPatterns.h
+++ b/utils/TableGen/CodeGenDAGPatterns.h
@@ -529,23 +529,19 @@ class DAGInstruction {
   std::vector<Record*> Results;
   std::vector<Record*> Operands;
   std::vector<Record*> ImpResults;
-  std::vector<Record*> ImpOperands;
   TreePatternNode *ResultPattern;
 public:
   DAGInstruction(TreePattern *TP,
                  const std::vector<Record*> &results,
                  const std::vector<Record*> &operands,
-                 const std::vector<Record*> &impresults,
-                 const std::vector<Record*> &impoperands)
+                 const std::vector<Record*> &impresults)
     : Pattern(TP), Results(results), Operands(operands), 
-      ImpResults(impresults), ImpOperands(impoperands),
-      ResultPattern(0) {}
+      ImpResults(impresults), ResultPattern(0) {}
 
   const TreePattern *getPattern() const { return Pattern; }
   unsigned getNumResults() const { return Results.size(); }
   unsigned getNumOperands() const { return Operands.size(); }
   unsigned getNumImpResults() const { return ImpResults.size(); }
-  unsigned getNumImpOperands() const { return ImpOperands.size(); }
   const std::vector<Record*>& getImpResults() const { return ImpResults; }
   
   void setResultPattern(TreePatternNode *R) { ResultPattern = R; }
@@ -565,11 +561,6 @@ public:
     return ImpResults[RN];
   }
   
-  Record *getImpOperand(unsigned ON) const {
-    assert(ON < ImpOperands.size());
-    return ImpOperands[ON];
-  }
-
   TreePatternNode *getResultPattern() const { return ResultPattern; }
 };
   
@@ -750,7 +741,6 @@ private:
                                    TreePatternNode*> &InstInputs,
                                    std::map<std::string,
                                    TreePatternNode*> &InstResults,
-                                   std::vector<Record*> &InstImpInputs,
                                    std::vector<Record*> &InstImpResults);
 };
 } // end namespace llvm
diff --git a/utils/TableGen/CodeGenTarget.cpp b/utils/TableGen/CodeGenTarget.cpp
index 0392895ba489..1efe2ff080d6 100644
--- a/utils/TableGen/CodeGenTarget.cpp
+++ b/utils/TableGen/CodeGenTarget.cpp
@@ -326,6 +326,7 @@ void CodeGenTarget::ComputeInstrsByEnum() const {
   const CodeGenInstruction *COPY_TO_REGCLASS =
     GetInstByName("COPY_TO_REGCLASS", Insts);
   const CodeGenInstruction *DBG_VALUE = GetInstByName("DBG_VALUE", Insts);
+  const CodeGenInstruction *REG_SEQUENCE = GetInstByName("REG_SEQUENCE", Insts);
 
   // Print out the rest of the instructions now.
   InstrsByEnum.push_back(PHI);
@@ -340,6 +341,7 @@ void CodeGenTarget::ComputeInstrsByEnum() const {
   InstrsByEnum.push_back(SUBREG_TO_REG);
   InstrsByEnum.push_back(COPY_TO_REGCLASS);
   InstrsByEnum.push_back(DBG_VALUE);
+  InstrsByEnum.push_back(REG_SEQUENCE);
   
   unsigned EndOfPredefines = InstrsByEnum.size();
   
@@ -357,7 +359,8 @@ void CodeGenTarget::ComputeInstrsByEnum() const {
         CGI != IMPLICIT_DEF &&
         CGI != SUBREG_TO_REG &&
         CGI != COPY_TO_REGCLASS &&
-        CGI != DBG_VALUE)
+        CGI != DBG_VALUE &&
+        CGI != REG_SEQUENCE)
       InstrsByEnum.push_back(CGI);
   }
   
diff --git a/utils/TableGen/EDEmitter.cpp b/utils/TableGen/EDEmitter.cpp
index f83ab4868259..d3bf60e1c8c3 100644
--- a/utils/TableGen/EDEmitter.cpp
+++ b/utils/TableGen/EDEmitter.cpp
@@ -19,15 +19,14 @@
 #include "CodeGenTarget.h"
 #include "Record.h"
 
+#include "llvm/MC/EDInstInfo.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/Format.h"
 #include "llvm/Support/raw_ostream.h"
 
-#include <vector>
+#include <map>
 #include <string>
-
-#define MAX_OPERANDS 5
-#define MAX_SYNTAXES 2
+#include <vector>
 
 using namespace llvm;
 
@@ -54,9 +53,9 @@ namespace {
       
       unsigned int index = 0;
       unsigned int numEntries = Entries.size();
-      for(index = 0; index < numEntries; ++index) {
+      for (index = 0; index < numEntries; ++index) {
         o.indent(i) << Entries[index];
-        if(index < (numEntries - 1))
+        if (index < (numEntries - 1))
           o << ",";
         o << "\n";
       }
@@ -88,24 +87,24 @@ namespace {
   class StructEmitter {
   private:
     std::string Name;
-    std::vector<std::string> MemberTypes;
-    std::vector<std::string> MemberNames;
+    typedef std::pair<const char*, const char*> member;
+    std::vector< member > Members;
   public:
     StructEmitter(const char *N) : Name(N) {
     }
     void addMember(const char *t, const char *n) {
-      MemberTypes.push_back(std::string(t));
-      MemberNames.push_back(std::string(n));
+      member m(t, n);
+      Members.push_back(m);
     }
     void emit(raw_ostream &o, unsigned int &i) {
       o.indent(i) << "struct " << Name.c_str() << " {" << "\n";
       i += 2;
       
       unsigned int index = 0;
-      unsigned int numMembers = MemberTypes.size();
+      unsigned int numMembers = Members.size();
       for (index = 0; index < numMembers; ++index) {
-        o.indent(i) << MemberTypes[index] << " " << MemberNames[index] << ";";
-        o << "\n";
+        o.indent(i) << Members[index].first << " ";
+        o.indent(i) << Members[index].second << ";" << "\n";
       }
       
       i -= 2;
@@ -121,47 +120,87 @@ namespace {
   
   class LiteralConstantEmitter : public ConstantEmitter {
   private:
-    std::string Literal;
+    bool IsNumber;
+    union {
+      int Number;
+      const char* String;
+    };
   public:
-    LiteralConstantEmitter(const char *literal) : Literal(literal) {
+    LiteralConstantEmitter(const char *string) : 
+      IsNumber(false),
+      String(string) {
     }
-    LiteralConstantEmitter(int literal) {
-      char buf[256];
-      snprintf(buf, 256, "%d", literal);
-      Literal = buf;
+    LiteralConstantEmitter(int number = 0) : 
+      IsNumber(true),
+      Number(number) {
+    }
+    void set(const char *string) {
+      IsNumber = false;
+      Number = 0;
+      String = string;
+    }
+    void set(int number) {
+      IsNumber = true;
+      String = NULL;
+      Number = number;
+    }
+    bool is(const char *string) {
+      return !strcmp(String, string);
     }
     void emit(raw_ostream &o, unsigned int &i) {
-      o << Literal;
+      if (IsNumber)
+        o << Number;
+      else
+        o << String;
     }
   };
   
   class CompoundConstantEmitter : public ConstantEmitter {
   private:
-    std::vector<ConstantEmitter*> Entries;
+    unsigned int Padding;
+    std::vector<ConstantEmitter *> Entries;
   public:
-    CompoundConstantEmitter() {
-    }
-    ~CompoundConstantEmitter() {
-      unsigned int index;
-      unsigned int numEntries = Entries.size();
-      for (index = 0; index < numEntries; ++index) {
-        delete Entries[index];
-      }
+    CompoundConstantEmitter(unsigned int padding = 0) : Padding(padding) {
     }
     CompoundConstantEmitter &addEntry(ConstantEmitter *e) {
       Entries.push_back(e);
+      
       return *this;
     }
+    ~CompoundConstantEmitter() {
+      while (Entries.size()) {
+        ConstantEmitter *entry = Entries.back();
+        Entries.pop_back();
+        delete entry;
+      }
+    }
     void emit(raw_ostream &o, unsigned int &i) {
       o << "{" << "\n";
       i += 2;
   
       unsigned int index;
       unsigned int numEntries = Entries.size();
-      for (index = 0; index < numEntries; ++index) {
+      
+      unsigned int numToPrint;
+      
+      if (Padding) {
+        if (numEntries > Padding) {
+          fprintf(stderr, "%u entries but %u padding\n", numEntries, Padding);
+          llvm_unreachable("More entries than padding");
+        }
+        numToPrint = Padding;
+      } else {
+        numToPrint = numEntries;
+      }
+          
+      for (index = 0; index < numToPrint; ++index) {
         o.indent(i);
-        Entries[index]->emit(o, i);
-        if (index < (numEntries - 1))
+        if (index < numEntries)
+          Entries[index]->emit(o, i);
+        else
+          o << "-1";
+        
+        if (index < (numToPrint - 1))
           o << ",";
         o << "\n";
       }
@@ -226,40 +265,31 @@ void populateOperandOrder(CompoundConstantEmitter *operandOrder,
        ++operandIterator) {
     if (operandIterator->OperandType == 
         AsmWriterOperand::isMachineInstrOperand) {
-      char buf[2];
-      snprintf(buf, sizeof(buf), "%u", operandIterator->CGIOpNo);
-      operandOrder->addEntry(new LiteralConstantEmitter(buf));
+      operandOrder->addEntry(
+        new LiteralConstantEmitter(operandIterator->CGIOpNo));
       numArgs++;
     }
   }
-  
-  for(; numArgs < MAX_OPERANDS; numArgs++) {
-    operandOrder->addEntry(new LiteralConstantEmitter("-1"));
-  }
 }
 
 /////////////////////////////////////////////////////
 // Support functions for handling X86 instructions //
 /////////////////////////////////////////////////////
 
-#define ADDFLAG(flag) flags->addEntry(flag)
+#define SET(flag) { type->set(flag); return 0; }
 
-#define REG(str) if (name == str) { ADDFLAG("kOperandFlagRegister"); return 0; }
-#define MEM(str) if (name == str) { ADDFLAG("kOperandFlagMemory"); return 0; }
-#define LEA(str) if (name == str) { ADDFLAG("kOperandFlagEffectiveAddress"); \
-                                    return 0; }
-#define IMM(str) if (name == str) { ADDFLAG("kOperandFlagImmediate"); \
-                                    return 0; }
-#define PCR(str) if (name == str) { ADDFLAG("kOperandFlagMemory"); \
-                                    ADDFLAG("kOperandFlagPCRelative"); \
-                                    return 0; }
+#define REG(str) if (name == str) SET("kOperandTypeRegister");
+#define MEM(str) if (name == str) SET("kOperandTypeX86Memory");
+#define LEA(str) if (name == str) SET("kOperandTypeX86EffectiveAddress");
+#define IMM(str) if (name == str) SET("kOperandTypeImmediate");
+#define PCR(str) if (name == str) SET("kOperandTypeX86PCRelative");
 
-/// X86FlagFromOpName - Processes the name of a single X86 operand (which is
-///   actually its type) and translates it into an operand flag
+/// X86TypeFromOpName - Processes the name of a single X86 operand (which is
+///   actually its type) and translates it into an operand type
 ///
-/// @arg flags    - The flags object to add the flag to
+/// @arg flags    - The type object to set
 /// @arg name     - The name of the operand
-static int X86FlagFromOpName(FlagsConstantEmitter *flags,
+static int X86TypeFromOpName(LiteralConstantEmitter *type,
                              const std::string &name) {
   REG("GR8");
   REG("GR8_NOREX");
@@ -282,6 +312,17 @@ static int X86FlagFromOpName(FlagsConstantEmitter *flags,
   REG("CONTROL_REG_32");
   REG("CONTROL_REG_64");
   
+  IMM("i8imm");
+  IMM("i16imm");
+  IMM("i16i8imm");
+  IMM("i32imm");
+  IMM("i32i8imm");
+  IMM("i64imm");
+  IMM("i64i8imm");
+  IMM("i64i32imm");
+  IMM("SSECC");
+  
+  // all R, I, R, I, R
   MEM("i8mem");
   MEM("i8mem_NOREX");
   MEM("i16mem");
@@ -301,22 +342,14 @@ static int X86FlagFromOpName(FlagsConstantEmitter *flags,
   MEM("f128mem");
   MEM("opaque512mem");
   
+  // all R, I, R, I
   LEA("lea32mem");
   LEA("lea64_32mem");
   LEA("lea64mem");
   
-  IMM("i8imm");
-  IMM("i16imm");
-  IMM("i16i8imm");
-  IMM("i32imm");
-  IMM("i32imm_pcrel");
-  IMM("i32i8imm");
-  IMM("i64imm");
-  IMM("i64i8imm");
-  IMM("i64i32imm");
-  IMM("i64i32imm_pcrel");
-  IMM("SSECC");
-  
+  // all I
+  PCR("i32imm_pcrel");
+  PCR("i64i32imm_pcrel");
   PCR("brtarget8");
   PCR("offset8");
   PCR("offset16");
@@ -332,7 +365,8 @@ static int X86FlagFromOpName(FlagsConstantEmitter *flags,
 #undef LEA
 #undef IMM
 #undef PCR
-#undef ADDFLAG
+
+#undef SET
 
 /// X86PopulateOperands - Handles all the operands in an X86 instruction, adding
 ///   the appropriate flags to their descriptors
@@ -340,7 +374,7 @@ static int X86FlagFromOpName(FlagsConstantEmitter *flags,
 /// @operandFlags - A reference the array of operand flag objects
 /// @inst         - The instruction to use as a source of information
 static void X86PopulateOperands(
-  FlagsConstantEmitter *(&operandFlags)[MAX_OPERANDS],
+  LiteralConstantEmitter *(&operandTypes)[EDIS_MAX_OPERANDS],
   const CodeGenInstruction &inst) {
   if (!inst.TheDef->isSubClassOf("X86Inst"))
     return;
@@ -353,7 +387,7 @@ static void X86PopulateOperands(
       inst.OperandList[index];
     Record &rec = *operandInfo.Rec;
     
-    if (X86FlagFromOpName(operandFlags[index], rec.getName())) {
+    if (X86TypeFromOpName(operandTypes[index], rec.getName())) {
       errs() << "Operand type: " << rec.getName().c_str() << "\n";
       errs() << "Operand name: " << operandInfo.Name.c_str() << "\n";
       errs() << "Instruction mame: " << inst.TheDef->getName().c_str() << "\n";
@@ -369,10 +403,11 @@ static void X86PopulateOperands(
 ///                 between names and operand indices
 /// @opName       - The name of the operand
 /// @flag         - The name of the flag to add
-static inline void decorate1(FlagsConstantEmitter *(&operandFlags)[MAX_OPERANDS],
-                             const CodeGenInstruction &inst,
-                             const char *opName,
-                             const char *opFlag) {
+static inline void decorate1(
+  FlagsConstantEmitter *(&operandFlags)[EDIS_MAX_OPERANDS],
+  const CodeGenInstruction &inst,
+  const char *opName,
+  const char *opFlag) {
   unsigned opIndex;
   
   opIndex = inst.getOperandNamed(std::string(opName));
@@ -382,78 +417,70 @@ static inline void decorate1(FlagsConstantEmitter *(&operandFlags)[MAX_OPERANDS]
 
 #define DECORATE1(opName, opFlag) decorate1(operandFlags, inst, opName, opFlag)
 
-#define MOV(source, target) {                       \
-  instFlags.addEntry("kInstructionFlagMove");       \
-  DECORATE1(source, "kOperandFlagSource");          \
-  DECORATE1(target, "kOperandFlagTarget");          \
+#define MOV(source, target) {               \
+  instType.set("kInstructionTypeMove");     \
+  DECORATE1(source, "kOperandFlagSource");  \
+  DECORATE1(target, "kOperandFlagTarget");  \
 }
 
-#define BRANCH(target) {                            \
-  instFlags.addEntry("kInstructionFlagBranch");     \
-  DECORATE1(target, "kOperandFlagTarget");          \
+#define BRANCH(target) {                    \
+  instType.set("kInstructionTypeBranch");   \
+  DECORATE1(target, "kOperandFlagTarget");  \
 }
 
-#define PUSH(source) {                              \
-  instFlags.addEntry("kInstructionFlagPush");       \
-  DECORATE1(source, "kOperandFlagSource");          \
+#define PUSH(source) {                      \
+  instType.set("kInstructionTypePush");     \
+  DECORATE1(source, "kOperandFlagSource");  \
 }
 
-#define POP(target) {                               \
-  instFlags.addEntry("kInstructionFlagPop");        \
-  DECORATE1(target, "kOperandFlagTarget");          \
+#define POP(target) {                       \
+  instType.set("kInstructionTypePop");      \
+  DECORATE1(target, "kOperandFlagTarget");  \
 }
 
-#define CALL(target) {                              \
-  instFlags.addEntry("kInstructionFlagCall");       \
-  DECORATE1(target, "kOperandFlagTarget");          \
+#define CALL(target) {                      \
+  instType.set("kInstructionTypeCall");     \
+  DECORATE1(target, "kOperandFlagTarget");  \
 }
 
-#define RETURN() {                                  \
-  instFlags.addEntry("kInstructionFlagReturn");     \
+#define RETURN() {                          \
+  instType.set("kInstructionTypeReturn");   \
 }
 
 /// X86ExtractSemantics - Performs various checks on the name of an X86
 ///   instruction to determine what sort of an instruction it is and then adds 
 ///   the appropriate flags to the instruction and its operands
 ///
-/// @arg instFlags    - A reference to the flags for the instruction as a whole
+/// @arg instType     - A reference to the type for the instruction as a whole
 /// @arg operandFlags - A reference to the array of operand flag object pointers
 /// @arg inst         - A reference to the original instruction
-static void X86ExtractSemantics(FlagsConstantEmitter &instFlags,
-                                FlagsConstantEmitter *(&operandFlags)[MAX_OPERANDS],
-                                const CodeGenInstruction &inst) {
+static void X86ExtractSemantics(
+  LiteralConstantEmitter &instType,
+  FlagsConstantEmitter *(&operandFlags)[EDIS_MAX_OPERANDS],
+  const CodeGenInstruction &inst) {
   const std::string &name = inst.TheDef->getName();
     
   if (name.find("MOV") != name.npos) {
     if (name.find("MOV_V") != name.npos) {
       // ignore (this is a pseudoinstruction)
-    }
-    else if (name.find("MASK") != name.npos) {
+    } else if (name.find("MASK") != name.npos) {
       // ignore (this is a masking move)
-    }
-    else if (name.find("r0") != name.npos) {
+    } else if (name.find("r0") != name.npos) {
       // ignore (this is a pseudoinstruction)
-    }
-    else if (name.find("PS") != name.npos ||
+    } else if (name.find("PS") != name.npos ||
              name.find("PD") != name.npos) {
       // ignore (this is a shuffling move)
-    }
-    else if (name.find("MOVS") != name.npos) {
+    } else if (name.find("MOVS") != name.npos) {
       // ignore (this is a string move)
-    }
-    else if (name.find("_F") != name.npos) {
+    } else if (name.find("_F") != name.npos) {
       // TODO handle _F moves to ST(0)
-    }
-    else if (name.find("a") != name.npos) {
+    } else if (name.find("a") != name.npos) {
       // TODO handle moves to/from %ax
-    }
-    else if (name.find("CMOV") != name.npos) {
+    } else if (name.find("CMOV") != name.npos) {
       MOV("src2", "dst");
-    }
-    else if (name.find("PC") != name.npos) {
+    } else if (name.find("PC") != name.npos) {
       MOV("label", "reg")
-    }
-    else {
+    } else {
       MOV("src", "dst");
     }
   }
@@ -462,8 +489,7 @@ static void X86ExtractSemantics(FlagsConstantEmitter &instFlags,
       name.find("J") == 0) {
     if (name.find("FAR") != name.npos && name.find("i") != name.npos) {
       BRANCH("off");
-    }
-    else {
+    } else {
       BRANCH("dst");
     }
   }
@@ -471,19 +497,15 @@ static void X86ExtractSemantics(FlagsConstantEmitter &instFlags,
   if (name.find("PUSH") != name.npos) {
     if (name.find("FS") != name.npos ||
         name.find("GS") != name.npos) {
-      instFlags.addEntry("kInstructionFlagPush");
+      instType.set("kInstructionTypePush");
       // TODO add support for fixed operands
-    }
-    else if (name.find("F") != name.npos) {
+    } else if (name.find("F") != name.npos) {
       // ignore (this pushes onto the FP stack)
-    }
-    else if (name[name.length() - 1] == 'm') {
+    } else if (name[name.length() - 1] == 'm') {
       PUSH("src");
-    }
-    else if (name.find("i") != name.npos) {
+    } else if (name.find("i") != name.npos) {
       PUSH("imm");
-    }
-    else {
+    } else {
       PUSH("reg");
     }
   }
@@ -491,19 +513,15 @@ static void X86ExtractSemantics(FlagsConstantEmitter &instFlags,
   if (name.find("POP") != name.npos) {
     if (name.find("POPCNT") != name.npos) {
       // ignore (not a real pop)
-    }
-    else if (name.find("FS") != name.npos ||
+    } else if (name.find("FS") != name.npos ||
              name.find("GS") != name.npos) {
-      instFlags.addEntry("kInstructionFlagPop");
+      instType.set("kInstructionTypePop");
       // TODO add support for fixed operands
-    }
-    else if (name.find("F") != name.npos) {
+    } else if (name.find("F") != name.npos) {
       // ignore (this pops from the FP stack)
-    }
-    else if (name[name.length() - 1] == 'm') {
+    } else if (name[name.length() - 1] == 'm') {
       POP("dst");
-    }
-    else {
+    } else {
       POP("reg");
     }
   }
@@ -511,17 +529,13 @@ static void X86ExtractSemantics(FlagsConstantEmitter &instFlags,
   if (name.find("CALL") != name.npos) {
     if (name.find("ADJ") != name.npos) {
       // ignore (not a call)
-    }
-    else if (name.find("SYSCALL") != name.npos) {
+    } else if (name.find("SYSCALL") != name.npos) {
       // ignore (doesn't go anywhere we know about)
-    }
-    else if (name.find("VMCALL") != name.npos) {
+    } else if (name.find("VMCALL") != name.npos) {
       // ignore (rather different semantics than a regular call)
-    }
-    else if (name.find("FAR") != name.npos && name.find("i") != name.npos) {
+    } else if (name.find("FAR") != name.npos && name.find("i") != name.npos) {
       CALL("off");
-    }
-    else {
+    } else {
       CALL("dst");
     }
   }
@@ -538,9 +552,182 @@ static void X86ExtractSemantics(FlagsConstantEmitter &instFlags,
 #undef CALL
 #undef RETURN
 
-#undef COND_DECORATE_2
-#undef COND_DECORATE_1
-#undef DECORATE1
+/////////////////////////////////////////////////////
+// Support functions for handling ARM instructions //
+/////////////////////////////////////////////////////
+
+#define SET(flag) { type->set(flag); return 0; }
+
+#define REG(str)    if (name == str) SET("kOperandTypeRegister");
+#define IMM(str)    if (name == str) SET("kOperandTypeImmediate");
+
+#define MISC(str, type)   if (name == str) SET(type);
+
+/// ARMFlagFromOpName - Processes the name of a single ARM operand (which is
+///   actually its type) and translates it into an operand type
+///
+/// @arg type     - The type object to set
+/// @arg name     - The name of the operand
+static int ARMFlagFromOpName(LiteralConstantEmitter *type,
+                             const std::string &name) {
+  REG("GPR");
+  REG("cc_out");
+  REG("s_cc_out");
+  REG("tGPR");
+  REG("DPR");
+  REG("SPR");
+  REG("QPR");
+  REG("DPR_VFP2");
+  REG("DPR_8");
+  
+  IMM("i32imm");
+  IMM("bf_inv_mask_imm");
+  IMM("jtblock_operand");
+  IMM("nohash_imm");
+  IMM("cpinst_operand");
+  IMM("cps_opt");
+  IMM("vfp_f64imm");
+  IMM("vfp_f32imm");
+  IMM("msr_mask");
+  IMM("neg_zero");
+  IMM("imm0_31");
+  IMM("h8imm");
+  IMM("h16imm");
+  IMM("h32imm");
+  IMM("h64imm");
+  IMM("imm0_4095");
+  IMM("jt2block_operand");
+  IMM("t_imm_s4");
+  IMM("pclabel");
+  
+  MISC("brtarget", "kOperandTypeARMBranchTarget");                // ?
+  MISC("so_reg", "kOperandTypeARMSoReg");                         // R, R, I
+  MISC("t2_so_reg", "kOperandTypeThumb2SoReg");                   // R, I
+  MISC("so_imm", "kOperandTypeARMSoImm");                         // I
+  MISC("t2_so_imm", "kOperandTypeThumb2SoImm");                   // I
+  MISC("so_imm2part", "kOperandTypeARMSoImm2Part");               // I
+  MISC("pred", "kOperandTypeARMPredicate");                       // I, R
+  MISC("it_pred", "kOperandTypeARMPredicate");                    // I
+  MISC("addrmode2", "kOperandTypeARMAddrMode2");                  // R, R, I
+  MISC("am2offset", "kOperandTypeARMAddrMode2Offset");            // R, I
+  MISC("addrmode3", "kOperandTypeARMAddrMode3");                  // R, R, I
+  MISC("am3offset", "kOperandTypeARMAddrMode3Offset");            // R, I
+  MISC("addrmode4", "kOperandTypeARMAddrMode4");                  // R, I
+  MISC("addrmode5", "kOperandTypeARMAddrMode5");                  // R, I
+  MISC("addrmode6", "kOperandTypeARMAddrMode6");                  // R, R, I, I
+  MISC("am6offset", "kOperandTypeARMAddrMode6Offset");            // R, I, I
+  MISC("addrmodepc", "kOperandTypeARMAddrModePC");                // R, I
+  MISC("reglist", "kOperandTypeARMRegisterList");                 // I, R, ...
+  MISC("it_mask", "kOperandTypeThumbITMask");                     // I
+  MISC("t2addrmode_imm8", "kOperandTypeThumb2AddrModeImm8");      // R, I
+  MISC("t2am_imm8_offset", "kOperandTypeThumb2AddrModeImm8Offset");//I
+  MISC("t2addrmode_imm12", "kOperandTypeThumb2AddrModeImm12");    // R, I
+  MISC("t2addrmode_so_reg", "kOperandTypeThumb2AddrModeSoReg");   // R, R, I
+  MISC("t2addrmode_imm8s4", "kOperandTypeThumb2AddrModeImm8s4");  // R, I
+  MISC("t2am_imm8s4_offset", "kOperandTypeThumb2AddrModeImm8s4Offset");  
+                                                                  // R, I
+  MISC("tb_addrmode", "kOperandTypeARMTBAddrMode");               // I
+  MISC("t_addrmode_s1", "kOperandTypeThumbAddrModeS1");           // R, I, R
+  MISC("t_addrmode_s2", "kOperandTypeThumbAddrModeS2");           // R, I, R
+  MISC("t_addrmode_s4", "kOperandTypeThumbAddrModeS4");           // R, I, R
+  MISC("t_addrmode_rr", "kOperandTypeThumbAddrModeRR");           // R, R
+  MISC("t_addrmode_sp", "kOperandTypeThumbAddrModeSP");           // R, I
+  
+  return 1;
+}
+
+#undef SOREG
+#undef SOIMM
+#undef PRED
+#undef REG
+#undef MEM
+#undef LEA
+#undef IMM
+#undef PCR
+
+#undef SET
+
+/// ARMPopulateOperands - Handles all the operands in an ARM instruction, adding
+///   the appropriate flags to their descriptors
+///
+/// @operandFlags - A reference the array of operand flag objects
+/// @inst         - The instruction to use as a source of information
+static void ARMPopulateOperands(
+  LiteralConstantEmitter *(&operandTypes)[EDIS_MAX_OPERANDS],
+  const CodeGenInstruction &inst) {
+  if (!inst.TheDef->isSubClassOf("InstARM") &&
+      !inst.TheDef->isSubClassOf("InstThumb"))
+    return;
+  
+  unsigned int index;
+  unsigned int numOperands = inst.OperandList.size();
+  
+  if (numOperands > EDIS_MAX_OPERANDS) {
+    errs() << "numOperands == " << numOperands << " > " << 
+      EDIS_MAX_OPERANDS << '\n';
+    llvm_unreachable("Too many operands");
+  }
+  
+  for (index = 0; index < numOperands; ++index) {
+    const CodeGenInstruction::OperandInfo &operandInfo = 
+    inst.OperandList[index];
+    Record &rec = *operandInfo.Rec;
+    
+    if (ARMFlagFromOpName(operandTypes[index], rec.getName())) {
+      errs() << "Operand type: " << rec.getName() << '\n';
+      errs() << "Operand name: " << operandInfo.Name << '\n';
+      errs() << "Instruction mame: " << inst.TheDef->getName() << '\n';
+      llvm_unreachable("Unhandled type");
+    }
+  }
+}
+
+#define BRANCH(target) {                    \
+  instType.set("kInstructionTypeBranch");   \
+  DECORATE1(target, "kOperandFlagTarget");  \
+}
+
+/// ARMExtractSemantics - Performs various checks on the name of an ARM
+///   instruction to determine what sort of an instruction it is and then adds 
+///   the appropriate flags to the instruction and its operands
+///
+/// @arg instType     - A reference to the type for the instruction as a whole
+/// @arg operandTypes - A reference to the array of operand type object pointers
+/// @arg operandFlags - A reference to the array of operand flag object pointers
+/// @arg inst         - A reference to the original instruction
+static void ARMExtractSemantics(
+  LiteralConstantEmitter &instType,
+  LiteralConstantEmitter *(&operandTypes)[EDIS_MAX_OPERANDS],
+  FlagsConstantEmitter *(&operandFlags)[EDIS_MAX_OPERANDS],
+  const CodeGenInstruction &inst) {
+  const std::string &name = inst.TheDef->getName();
+  
+  if (name == "tBcc"   ||
+      name == "tB"     ||
+      name == "t2Bcc"  ||
+      name == "Bcc"    ||
+      name == "tCBZ"   ||
+      name == "tCBNZ") {
+    BRANCH("target");
+  }
+  
+  if (name == "tBLr9"      ||
+      name == "BLr9_pred"  ||
+      name == "tBLXi_r9"   ||
+      name == "tBLXr_r9"   ||
+      name == "BLXr9"      ||
+      name == "t2BXJ"      ||
+      name == "BXJ") {
+    BRANCH("func");
+    
+    unsigned opIndex;
+    opIndex = inst.getOperandNamed("func");
+    if (operandTypes[opIndex]->is("kOperandTypeImmediate"))
+      operandTypes[opIndex]->set("kOperandTypeARMBranchTarget");
+  }
+}
+
+#undef BRANCH
 
 /// populateInstInfo - Fills an array of InstInfos with information about each 
 ///   instruction in a target
@@ -561,19 +748,29 @@ static void populateInstInfo(CompoundConstantEmitter &infoArray,
     CompoundConstantEmitter *infoStruct = new CompoundConstantEmitter;
     infoArray.addEntry(infoStruct);
     
-    FlagsConstantEmitter *instFlags = new FlagsConstantEmitter;
-    infoStruct->addEntry(instFlags);
+    LiteralConstantEmitter *instType = new LiteralConstantEmitter;
+    infoStruct->addEntry(instType);
     
     LiteralConstantEmitter *numOperandsEmitter = 
       new LiteralConstantEmitter(inst.OperandList.size());
     infoStruct->addEntry(numOperandsEmitter);
+    
+    CompoundConstantEmitter *operandTypeArray = new CompoundConstantEmitter;
+    infoStruct->addEntry(operandTypeArray);
+    
+    LiteralConstantEmitter *operandTypes[EDIS_MAX_OPERANDS];
                          
     CompoundConstantEmitter *operandFlagArray = new CompoundConstantEmitter;
     infoStruct->addEntry(operandFlagArray);
         
-    FlagsConstantEmitter *operandFlags[MAX_OPERANDS];
+    FlagsConstantEmitter *operandFlags[EDIS_MAX_OPERANDS];
     
-    for (unsigned operandIndex = 0; operandIndex < MAX_OPERANDS; ++operandIndex) {
+    for (unsigned operandIndex = 0; 
+         operandIndex < EDIS_MAX_OPERANDS; 
+         ++operandIndex) {
+      operandTypes[operandIndex] = new LiteralConstantEmitter;
+      operandTypeArray->addEntry(operandTypes[operandIndex]);
+      
       operandFlags[operandIndex] = new FlagsConstantEmitter;
       operandFlagArray->addEntry(operandFlags[operandIndex]);
     }
@@ -581,32 +778,99 @@ static void populateInstInfo(CompoundConstantEmitter &infoArray,
     unsigned numSyntaxes = 0;
     
     if (target.getName() == "X86") {
-      X86PopulateOperands(operandFlags, inst);
-      X86ExtractSemantics(*instFlags, operandFlags, inst);
+      X86PopulateOperands(operandTypes, inst);
+      X86ExtractSemantics(*instType, operandFlags, inst);
       numSyntaxes = 2;
     }
+    else if (target.getName() == "ARM") {
+      ARMPopulateOperands(operandTypes, inst);
+      ARMExtractSemantics(*instType, operandTypes, operandFlags, inst);
+      numSyntaxes = 1;
+    }
+    
+    CompoundConstantEmitter *operandOrderArray = new CompoundConstantEmitter;    
     
-    CompoundConstantEmitter *operandOrderArray = new CompoundConstantEmitter;
     infoStruct->addEntry(operandOrderArray);
     
-    for (unsigned syntaxIndex = 0; syntaxIndex < MAX_SYNTAXES; ++syntaxIndex) {
-      CompoundConstantEmitter *operandOrder = new CompoundConstantEmitter;
+    for (unsigned syntaxIndex = 0; 
+         syntaxIndex < EDIS_MAX_SYNTAXES; 
+         ++syntaxIndex) {
+      CompoundConstantEmitter *operandOrder = 
+        new CompoundConstantEmitter(EDIS_MAX_OPERANDS);
+      
       operandOrderArray->addEntry(operandOrder);
       
       if (syntaxIndex < numSyntaxes) {
         populateOperandOrder(operandOrder, inst, syntaxIndex);
       }
-      else {
-        for (unsigned operandIndex = 0; 
-             operandIndex < MAX_OPERANDS; 
-             ++operandIndex) {
-          operandOrder->addEntry(new LiteralConstantEmitter("-1"));
-        }
-      }
     }
+    
+    infoStruct = NULL;
   }
 }
 
+static void emitCommonEnums(raw_ostream &o, unsigned int &i) {
+  EnumEmitter operandTypes("OperandTypes");
+  operandTypes.addEntry("kOperandTypeNone");
+  operandTypes.addEntry("kOperandTypeImmediate");
+  operandTypes.addEntry("kOperandTypeRegister");
+  operandTypes.addEntry("kOperandTypeX86Memory");
+  operandTypes.addEntry("kOperandTypeX86EffectiveAddress");
+  operandTypes.addEntry("kOperandTypeX86PCRelative");
+  operandTypes.addEntry("kOperandTypeARMBranchTarget");
+  operandTypes.addEntry("kOperandTypeARMSoReg");
+  operandTypes.addEntry("kOperandTypeARMSoImm");
+  operandTypes.addEntry("kOperandTypeARMSoImm2Part");
+  operandTypes.addEntry("kOperandTypeARMPredicate");
+  operandTypes.addEntry("kOperandTypeARMAddrMode2");
+  operandTypes.addEntry("kOperandTypeARMAddrMode2Offset");
+  operandTypes.addEntry("kOperandTypeARMAddrMode3");
+  operandTypes.addEntry("kOperandTypeARMAddrMode3Offset");
+  operandTypes.addEntry("kOperandTypeARMAddrMode4");
+  operandTypes.addEntry("kOperandTypeARMAddrMode5");
+  operandTypes.addEntry("kOperandTypeARMAddrMode6");
+  operandTypes.addEntry("kOperandTypeARMAddrMode6Offset");
+  operandTypes.addEntry("kOperandTypeARMAddrModePC");
+  operandTypes.addEntry("kOperandTypeARMRegisterList");
+  operandTypes.addEntry("kOperandTypeARMTBAddrMode");
+  operandTypes.addEntry("kOperandTypeThumbITMask");
+  operandTypes.addEntry("kOperandTypeThumbAddrModeS1");
+  operandTypes.addEntry("kOperandTypeThumbAddrModeS2");
+  operandTypes.addEntry("kOperandTypeThumbAddrModeS4");
+  operandTypes.addEntry("kOperandTypeThumbAddrModeRR");
+  operandTypes.addEntry("kOperandTypeThumbAddrModeSP");
+  operandTypes.addEntry("kOperandTypeThumb2SoReg");
+  operandTypes.addEntry("kOperandTypeThumb2SoImm");
+  operandTypes.addEntry("kOperandTypeThumb2AddrModeImm8");
+  operandTypes.addEntry("kOperandTypeThumb2AddrModeImm8Offset");
+  operandTypes.addEntry("kOperandTypeThumb2AddrModeImm12");
+  operandTypes.addEntry("kOperandTypeThumb2AddrModeSoReg");
+  operandTypes.addEntry("kOperandTypeThumb2AddrModeImm8s4");
+  operandTypes.addEntry("kOperandTypeThumb2AddrModeImm8s4Offset");
+  operandTypes.emit(o, i);
+  
+  o << "\n";
+  
+  EnumEmitter operandFlags("OperandFlags");
+  operandFlags.addEntry("kOperandFlagSource");
+  operandFlags.addEntry("kOperandFlagTarget");
+  operandFlags.emitAsFlags(o, i);
+  
+  o << "\n";
+  
+  EnumEmitter instructionTypes("InstructionTypes");
+  instructionTypes.addEntry("kInstructionTypeNone");
+  instructionTypes.addEntry("kInstructionTypeMove");
+  instructionTypes.addEntry("kInstructionTypeBranch");
+  instructionTypes.addEntry("kInstructionTypePush");
+  instructionTypes.addEntry("kInstructionTypePop");
+  instructionTypes.addEntry("kInstructionTypeCall");
+  instructionTypes.addEntry("kInstructionTypeReturn");
+  instructionTypes.emit(o, i);
+  
+  o << "\n";
+}
+
 void EDEmitter::run(raw_ostream &o) {
   unsigned int i = 0;
   
@@ -615,9 +879,15 @@ void EDEmitter::run(raw_ostream &o) {
   
   populateInstInfo(infoArray, target);
   
-  o << "InstInfo instInfo" << target.getName().c_str() << "[] = ";
+  emitCommonEnums(o, i);
+  
+  o << "namespace {\n";
+  
+  o << "llvm::EDInstInfo instInfo" << target.getName().c_str() << "[] = ";
   infoArray.emit(o, i);
   o << ";" << "\n";
+  
+  o << "}\n";
 }
 
 void EDEmitter::runHeader(raw_ostream &o) {
@@ -626,43 +896,13 @@ void EDEmitter::runHeader(raw_ostream &o) {
   o << "#ifndef EDInfo_" << "\n";
   o << "#define EDInfo_" << "\n";
   o << "\n";
-  o << "#include <inttypes.h>" << "\n";
-  o << "\n";
-  o << "#define MAX_OPERANDS " << format("%d", MAX_OPERANDS) << "\n";
-  o << "#define MAX_SYNTAXES " << format("%d", MAX_SYNTAXES) << "\n";
+  o << "#define EDIS_MAX_OPERANDS " << format("%d", EDIS_MAX_OPERANDS) << "\n";
+  o << "#define EDIS_MAX_SYNTAXES " << format("%d", EDIS_MAX_SYNTAXES) << "\n";
   o << "\n";
   
   unsigned int i = 0;
   
-  EnumEmitter operandFlags("OperandFlags");
-  operandFlags.addEntry("kOperandFlagImmediate");
-  operandFlags.addEntry("kOperandFlagRegister");
-  operandFlags.addEntry("kOperandFlagMemory");
-  operandFlags.addEntry("kOperandFlagEffectiveAddress");
-  operandFlags.addEntry("kOperandFlagPCRelative");
-  operandFlags.addEntry("kOperandFlagSource");
-  operandFlags.addEntry("kOperandFlagTarget");
-  operandFlags.emitAsFlags(o, i);
-  
-  o << "\n";
-  
-  EnumEmitter instructionFlags("InstructionFlags");
-  instructionFlags.addEntry("kInstructionFlagMove");
-  instructionFlags.addEntry("kInstructionFlagBranch");
-  instructionFlags.addEntry("kInstructionFlagPush");
-  instructionFlags.addEntry("kInstructionFlagPop");
-  instructionFlags.addEntry("kInstructionFlagCall");
-  instructionFlags.addEntry("kInstructionFlagReturn");
-  instructionFlags.emitAsFlags(o, i);
-  
-  o << "\n";
-  
-  StructEmitter instInfo("InstInfo");
-  instInfo.addMember("uint32_t", "instructionFlags");
-  instInfo.addMember("uint8_t", "numOperands");
-  instInfo.addMember("uint8_t", "operandFlags[MAX_OPERANDS]");
-  instInfo.addMember("const char", "operandOrders[MAX_SYNTAXES][MAX_OPERANDS]");
-  instInfo.emit(o, i);
+  emitCommonEnums(o, i);
   
   o << "\n";
   o << "#endif" << "\n";
diff --git a/utils/TableGen/Record.h b/utils/TableGen/Record.h
index 55c1a80f9b6a..576d626e069c 100644
--- a/utils/TableGen/Record.h
+++ b/utils/TableGen/Record.h
@@ -609,6 +609,11 @@ public:
       if (!getBit(i)->isComplete()) return false;
     return true;
   }
+  bool allInComplete() const {
+    for (unsigned i = 0; i != getNumBits(); ++i)
+      if (getBit(i)->isComplete()) return false;
+    return true;
+  }
   virtual std::string getAsString() const;
 
   virtual Init *resolveReferences(Record &R, const RecordVal *RV);
diff --git a/utils/TableGen/SubtargetEmitter.cpp b/utils/TableGen/SubtargetEmitter.cpp
index 9ac652f79935..b04eaf88f73a 100644
--- a/utils/TableGen/SubtargetEmitter.cpp
+++ b/utils/TableGen/SubtargetEmitter.cpp
@@ -203,7 +203,8 @@ unsigned SubtargetEmitter::CollectAllItinClasses(raw_ostream &OS,
 // data initialization for the specified itinerary.  N is the number
 // of stages.
 //
-void SubtargetEmitter::FormItineraryStageString(Record *ItinData,
+void SubtargetEmitter::FormItineraryStageString(const std::string &Name,
+                                                Record *ItinData,
                                                 std::string &ItinString,
                                                 unsigned &NStages) {
   // Get states list
@@ -216,7 +217,7 @@ void SubtargetEmitter::FormItineraryStageString(Record *ItinData,
     // Next stage
     const Record *Stage = StageList[i];
   
-    // Form string as ,{ cycles, u1 | u2 | ... | un, timeinc }
+    // Form string as ,{ cycles, u1 | u2 | ... | un, timeinc, kind }
     int Cycles = Stage->getValueAsInt("Cycles");
     ItinString += "  { " + itostr(Cycles) + ", ";
     
@@ -226,13 +227,16 @@ void SubtargetEmitter::FormItineraryStageString(Record *ItinData,
     // For each unit
     for (unsigned j = 0, M = UnitList.size(); j < M;) {
       // Add name and bitwise or
-      ItinString += UnitList[j]->getName();
+      ItinString += Name + "FU::" + UnitList[j]->getName();
       if (++j < M) ItinString += " | ";
     }
     
     int TimeInc = Stage->getValueAsInt("TimeInc");
     ItinString += ", " + itostr(TimeInc);
 
+    int Kind = Stage->getValueAsInt("Kind");
+    ItinString += ", (llvm::InstrStage::ReservationKinds)" + itostr(Kind);
+
     // Close off stage
     ItinString += " }";
     if (++i < N) ItinString += ", ";
@@ -276,9 +280,29 @@ void SubtargetEmitter::EmitStageAndOperandCycleData(raw_ostream &OS,
   // If just no itinerary then don't bother
   if (ProcItinList.size() < 2) return;
 
+  // Emit functional units for all the itineraries.
+  for (unsigned i = 0, N = ProcItinList.size(); i < N; ++i) {
+    // Next record
+    Record *Proc = ProcItinList[i];
+
+    std::vector<Record*> FUs = Proc->getValueAsListOfDefs("FU");
+    if (FUs.empty())
+      continue;
+
+    const std::string &Name = Proc->getName();
+    OS << "\n// Functional units for itineraries \"" << Name << "\"\n"
+       << "namespace " << Name << "FU {\n";
+
+    for (unsigned j = 0, FUN = FUs.size(); j < FUN; ++j)
+      OS << "  const unsigned " << FUs[j]->getName()
+         << " = 1 << " << j << ";\n";
+
+    OS << "}\n";
+  }
+
   // Begin stages table
-  std::string StageTable = "static const llvm::InstrStage Stages[] = {\n";
-  StageTable += "  { 0, 0, 0 }, // No itinerary\n";
+  std::string StageTable = "\nstatic const llvm::InstrStage Stages[] = {\n";
+  StageTable += "  { 0, 0, 0, llvm::InstrStage::Required }, // No itinerary\n";
         
   // Begin operand cycle table
   std::string OperandCycleTable = "static const unsigned OperandCycles[] = {\n";
@@ -312,7 +336,7 @@ void SubtargetEmitter::EmitStageAndOperandCycleData(raw_ostream &OS,
       // Get string and stage count
       std::string ItinStageString;
       unsigned NStages;
-      FormItineraryStageString(ItinData, ItinStageString, NStages);
+      FormItineraryStageString(Name, ItinData, ItinStageString, NStages);
 
       // Get string and operand cycle count
       std::string ItinOperandCycleString;
@@ -367,7 +391,7 @@ void SubtargetEmitter::EmitStageAndOperandCycleData(raw_ostream &OS,
   }
   
   // Closing stage
-  StageTable += "  { 0, 0, 0 } // End itinerary\n";
+  StageTable += "  { 0, 0, 0, llvm::InstrStage::Required } // End itinerary\n";
   StageTable += "};\n";
 
   // Closing operand cycles
@@ -564,9 +588,9 @@ void SubtargetEmitter::run(raw_ostream &OS) {
   OS << "#include \"llvm/Support/raw_ostream.h\"\n";
   OS << "#include \"llvm/Target/SubtargetFeature.h\"\n";
   OS << "#include \"llvm/Target/TargetInstrItineraries.h\"\n\n";
-  
-  Enumeration(OS, "FuncUnit", true);
-  OS<<"\n";
+
+//  Enumeration(OS, "FuncUnit", true);
+//  OS<<"\n";
 //  Enumeration(OS, "InstrItinClass", false);
 //  OS<<"\n";
   Enumeration(OS, "SubtargetFeature", true);
diff --git a/utils/TableGen/SubtargetEmitter.h b/utils/TableGen/SubtargetEmitter.h
index 1d7088fd3902..f43a4431d61e 100644
--- a/utils/TableGen/SubtargetEmitter.h
+++ b/utils/TableGen/SubtargetEmitter.h
@@ -34,7 +34,8 @@ class SubtargetEmitter : public TableGenBackend {
   void CPUKeyValues(raw_ostream &OS);
   unsigned CollectAllItinClasses(raw_ostream &OS,
                                std::map<std::string, unsigned> &ItinClassesMap);
-  void FormItineraryStageString(Record *ItinData, std::string &ItinString,
+  void FormItineraryStageString(const std::string &Names,
+                                Record *ItinData, std::string &ItinString,
                                 unsigned &NStages);
   void FormItineraryOperandCycleString(Record *ItinData, std::string &ItinString,
                                        unsigned &NOperandCycles);
diff --git a/utils/TableGen/X86DisassemblerTables.cpp b/utils/TableGen/X86DisassemblerTables.cpp
index be070317fc60..2176224523a3 100644
--- a/utils/TableGen/X86DisassemblerTables.cpp
+++ b/utils/TableGen/X86DisassemblerTables.cpp
@@ -161,7 +161,7 @@ void DisassemblerTables::emitOneID(raw_ostream &o,
 /// @param i        - The indentation level for that output stream.
 static void emitEmptyTable(raw_ostream &o, uint32_t &i)
 {
-  o.indent(i * 2) << "InstrUID modRMEmptyTable[1] = { 0 };" << "\n";
+  o.indent(i * 2) << "static InstrUID modRMEmptyTable[1] = { 0 };" << "\n";
   o << "\n";
 }
 
@@ -275,7 +275,7 @@ void DisassemblerTables::emitModRMDecision(raw_ostream &o1,
     return;
   }
     
-  o1.indent(i1) << "InstrUID modRMTable" << thisTableNumber;
+  o1.indent(i1) << "static InstrUID modRMTable" << thisTableNumber;
     
   switch (dt) {
     default:
diff --git a/utils/TableGen/X86RecognizableInstr.cpp b/utils/TableGen/X86RecognizableInstr.cpp
index d71252caf274..b15db2fab0ba 100644
--- a/utils/TableGen/X86RecognizableInstr.cpp
+++ b/utils/TableGen/X86RecognizableInstr.cpp
@@ -820,7 +820,7 @@ OperandType RecognizableInstr::typeFromString(const std::string &s,
   TYPE("i128mem",             TYPE_M128)
   TYPE("i64i32imm_pcrel",     TYPE_REL64)
   TYPE("i32imm_pcrel",        TYPE_REL32)
-  TYPE("SSECC",               TYPE_IMM8)
+  TYPE("SSECC",               TYPE_IMM3)
   TYPE("brtarget",            TYPE_RELv)
   TYPE("brtarget8",           TYPE_REL8)
   TYPE("f80mem",              TYPE_M80FP)
diff --git a/utils/buildit/GNUmakefile b/utils/buildit/GNUmakefile
index 8d8504c39b5c..e0568d2f145f 100644
--- a/utils/buildit/GNUmakefile
+++ b/utils/buildit/GNUmakefile
@@ -46,6 +46,9 @@ else
 LLVM_OPTIMIZED := yes
 endif
 
+# Default to not install libLTO.dylib.
+INSTALL_LIBLTO := no
+
 ifndef RC_ProjectSourceVersion
 RC_ProjectSourceVersion = 9999
 endif
@@ -59,10 +62,9 @@ install: $(OBJROOT) $(SYMROOT) $(DSTROOT)
 	cd $(OBJROOT) && \
 	  $(SRC)/utils/buildit/build_llvm "$(RC_ARCHS)" "$(TARGETS)" \
 	    $(SRC) $(PREFIX) $(DSTROOT) $(SYMROOT) \
-	    $(ENABLE_ASSERTIONS) $(LLVM_OPTIMIZED) \
+	    $(ENABLE_ASSERTIONS) $(LLVM_OPTIMIZED) $(INSTALL_LIBLTO) \
 	    $(RC_ProjectSourceVersion) $(RC_ProjectSourceSubversion) 
 
-
 # installhdrs does nothing, because the headers aren't useful until
 # the compiler is installed.
 installhdrs:
diff --git a/utils/buildit/build_llvm b/utils/buildit/build_llvm
index 33f471db4fc3..78cc6557e32c 100755
--- a/utils/buildit/build_llvm
+++ b/utils/buildit/build_llvm
@@ -9,12 +9,12 @@ set -x
 # The first parameter is a space-separated list of the architectures the
 # compilers will run on. For instance, "ppc i386". If the current machine
 # isn't in the list, it will (effectively) be added.
-# FIXME: HOSTS is not used in this script. Use it or Remove it.
 HOSTS="$1"
 
 # The second parameter is a space-separated list of the architectures the
 # compilers will generate code for. If the current machine isn't in the list, a
 # compiler for it will get built anyway, but won't be installed.
+# FIXME: The list of targets is currently hard-coded and TARGETS is not used.
 TARGETS="$2"
 
 # The third parameter is the path to the compiler sources. There should be a
@@ -42,11 +42,15 @@ LLVM_ASSERTIONS="$7"
 # build.
 LLVM_OPTIMIZED="$8"
 
-# The nineth parameter is the version number of the submission, e.g. 1007.
-LLVM_SUBMIT_VERSION="$9"
+# The ninth parameter is a yes/no that indicates whether libLTO.dylib
+# should be installed.
+INSTALL_LIBLTO="$9"
 
-# The tenth parameter is the subversion number of the submission, e.g. 03.
-LLVM_SUBMIT_SUBVERSION="${10}"
+# The tenth parameter is the version number of the submission, e.g. 1007.
+LLVM_SUBMIT_VERSION="${10}"
+
+# The eleventh parameter is the subversion number of the submission, e.g. 03.
+LLVM_SUBMIT_SUBVERSION="${11}"
 
 # The current working directory is where the build will happen. It may already
 # contain a partial result of an interrupted build, in which case this script
@@ -56,11 +60,6 @@ DIR=`pwd`
 DARWIN_VERS=`uname -r | sed 's/\..*//'`
 echo DARWIN_VERS = $DARWIN_VERS
 
-if [ "x$RC_ProjectName" = "xllvmCore_Embedded" ]; then
-    DEST_DIR="$DEST_DIR/Developer/Platforms/iPhoneOS.platform"
-    mkdir -p "$DEST_DIR"
-fi
-
 DEVELOPER_DIR="${DEVELOPER_DIR-Developer}"
 if [ "x$RC_ProjectName" = "xllvmCore_EmbeddedHosted" ]; then
     DT_HOME="$DEST_DIR/usr"
@@ -200,7 +199,7 @@ if [ "x$MAJ_VER" != "x4" -o "x$MIN_VER" != "x0" ]; then
     JOBS_FLAG="-j $SYSCTL"
 fi
 
-make $JOBS_FLAG $OPTIMIZE_OPTS UNIVERSAL=1 UNIVERSAL_ARCH="$TARGETS" \
+make $JOBS_FLAG $OPTIMIZE_OPTS UNIVERSAL=1 UNIVERSAL_ARCH="$HOSTS" \
     UNIVERSAL_SDK_PATH=$HOST_SDKROOT \
     NO_RUNTIME_LIBS=1 \
     DISABLE_EDIS=1 \
@@ -226,7 +225,7 @@ rm -rf * || exit 1
 cd $DIR/obj-llvm || exit 1
 
 # Install the tree into the destination directory.
-make $LOCAL_MAKEFLAGS $OPTIMIZE_OPTS UNIVERSAL=1 UNIVERSAL_ARCH="$TARGETS" \
+make $LOCAL_MAKEFLAGS $OPTIMIZE_OPTS UNIVERSAL=1 UNIVERSAL_ARCH="$HOSTS" \
     NO_RUNTIME_LIBS=1 \
     DISABLE_EDIS=1 \
     LLVM_SUBMIT_VERSION=$LLVM_SUBMIT_VERSION \
@@ -276,9 +275,11 @@ else
 fi
 
 cd $DEST_DIR$DEST_ROOT
-mkdir -p $DT_HOME/lib
-mv lib/libLTO.dylib $DT_HOME/lib/libLTO.dylib
-strip -S $DT_HOME/lib/libLTO.dylib
+if [ "$INSTALL_LIBLTO" == yes ]; then
+  mkdir -p $DT_HOME/lib
+  mv lib/libLTO.dylib $DT_HOME/lib/libLTO.dylib
+  strip -S $DT_HOME/lib/libLTO.dylib
+fi
 rm -f lib/libLTO.a lib/libLTO.la
 
 # The Hello dylib is an example of how to build a pass. No need to install it.
@@ -343,10 +344,11 @@ find $DEST_DIR -name html.tar.gz -exec rm {} \;
 ################################################################################
 # symlinks so that B&I can find things
 
-cd $DEST_DIR
-mkdir -p ./usr/lib/
-cd usr/lib
-ln -s ../../$DEVELOPER_DIR/usr/lib/libLTO.dylib ./libLTO.dylib
+if [ "$INSTALL_LIBLTO" == yes ]; then
+  mkdir -p $DEST_DIR/usr/lib/
+  cd $DEST_DIR/usr/lib && \
+    ln -s ../../$DEVELOPER_DIR/usr/lib/libLTO.dylib ./libLTO.dylib
+fi
 
 ################################################################################
 # w00t! Done!
diff --git a/utils/fpcmp/fpcmp.cpp b/utils/fpcmp/fpcmp.cpp
index 66d8ab159b93..5f6b5e8d6e59 100644
--- a/utils/fpcmp/fpcmp.cpp
+++ b/utils/fpcmp/fpcmp.cpp
@@ -14,7 +14,7 @@
 
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/FileUtilities.h"
-#include <iostream>
+#include "llvm/Support/raw_ostream.h"
 using namespace llvm;
 
 namespace {
@@ -37,7 +37,7 @@ int main(int argc, char **argv) {
                                   sys::PathWithStatus(File2),
                                   AbsTolerance, RelTolerance, &ErrorMsg);
   if (!ErrorMsg.empty())
-    std::cerr << argv[0] << ": " << ErrorMsg << "\n";
+    errs() << argv[0] << ": " << ErrorMsg << "\n";
   return DF;
 }
 
diff --git a/utils/lit/lit/ExampleTests/LLVM.OutOfTree/src/test/Foo/pct-S.ll b/utils/lit/lit/ExampleTests/LLVM.OutOfTree/src/test/Foo/pct-S.ll
index 4e8a58205569..3ff363315a32 100644
--- a/utils/lit/lit/ExampleTests/LLVM.OutOfTree/src/test/Foo/pct-S.ll
+++ b/utils/lit/lit/ExampleTests/LLVM.OutOfTree/src/test/Foo/pct-S.ll
@@ -1 +1 @@
-; RUN: grep "hi" %S/data.txt
-\ No newline at end of file
+; RUN: grep "hi" %S/data.txt
diff --git a/utils/lit/lit/TestFormats.py b/utils/lit/lit/TestFormats.py
index 7ab9bb6e452e..5e1a811e6c77 100644
--- a/utils/lit/lit/TestFormats.py
+++ b/utils/lit/lit/TestFormats.py
@@ -129,14 +129,20 @@ class OneCommandPerFileTest:
             self.command = [command]
         else:
             self.command = list(command)
-        self.dir = str(dir)
+        if dir is not None:
+            dir = str(dir)
+        self.dir = dir
         self.recursive = bool(recursive)
         self.pattern = re.compile(pattern)
         self.useTempInput = useTempInput
 
     def getTestsInDirectory(self, testSuite, path_in_suite,
                             litConfig, localConfig):
-        for dirname,subdirs,filenames in os.walk(self.dir):
+        dir = self.dir
+        if dir is None:
+            dir = testSuite.getSourcePath(path_in_suite)
+
+        for dirname,subdirs,filenames in os.walk(dir):
             if not self.recursive:
                 subdirs[:] = []
 
@@ -151,7 +157,7 @@ class OneCommandPerFileTest:
                     continue
 
                 path = os.path.join(dirname,filename)
-                suffix = path[len(self.dir):]
+                suffix = path[len(dir):]
                 if suffix.startswith(os.sep):
                     suffix = suffix[1:]
                 test = Test.Test(testSuite,
diff --git a/utils/llvm.grm b/utils/llvm.grm
index d391e2aeab9e..fa0dcd1e1520 100644
--- a/utils/llvm.grm
+++ b/utils/llvm.grm
@@ -398,7 +398,7 @@ OptVolatile ::= - volatile | _ ;
 OptExact ::= - exact | _ ;
 OptNSW ::= - nsw | _ ;
 OptNUW ::= - nuw | _ ;
-OptNW  ::= OptNUW OptNSW ;
+OptNW  ::= OptNUW OptNSW | OptNSW OptNUW ;
 OptInBounds  ::= - inbounds | _ ;
 
 MemoryInst ::= malloc Types OptCAlign
diff --git a/utils/unittest/googletest/gtest-filepath.cc b/utils/unittest/googletest/gtest-filepath.cc
index 640c27c313c3..493ba0bfa7d0 100644
--- a/utils/unittest/googletest/gtest-filepath.cc
+++ b/utils/unittest/googletest/gtest-filepath.cc
@@ -167,7 +167,7 @@ bool FilePath::FileOrDirectoryExists() const {
   return _stat(pathname_.c_str(), &file_stat) == 0;
 #endif  // _WIN32_WCE
 #else
-  struct stat file_stat = {};
+  struct stat file_stat;
   return stat(pathname_.c_str(), &file_stat) == 0;
 #endif  // GTEST_OS_WINDOWS
 }
@@ -195,7 +195,7 @@ bool FilePath::DirectoryExists() const {
       (_S_IFDIR & file_stat.st_mode) != 0;
 #endif  // _WIN32_WCE
 #else
-  struct stat file_stat = {};
+  struct stat file_stat;
   result = stat(pathname_.c_str(), &file_stat) == 0 &&
       S_ISDIR(file_stat.st_mode);
 #endif  // GTEST_OS_WINDOWS
diff --git a/utils/unittest/googletest/include/gtest/internal/gtest-linked_ptr.h b/utils/unittest/googletest/include/gtest/internal/gtest-linked_ptr.h
index f98af0b123bd..d4c7a39b0ec1 100644
--- a/utils/unittest/googletest/include/gtest/internal/gtest-linked_ptr.h
+++ b/utils/unittest/googletest/include/gtest/internal/gtest-linked_ptr.h
@@ -176,6 +176,7 @@ class linked_ptr {
   // Sole ownership by this linked_ptr object is required.
   T* release() {
     bool last = link_.depart();
+    (void) last;
     assert(last);
     T* v = value_;
     value_ = NULL;
diff --git a/utils/unittest/googletest/include/gtest/internal/gtest-port.h b/utils/unittest/googletest/include/gtest/internal/gtest-port.h
index 3e49993bff4c..20a95c9b2ece 100644
--- a/utils/unittest/googletest/include/gtest/internal/gtest-port.h
+++ b/utils/unittest/googletest/include/gtest/internal/gtest-port.h
@@ -227,9 +227,10 @@
 // TODO(wan@google.com): uses autoconf to detect whether ::std::wstring
 //   is available.
 
-#if defined(GTEST_OS_CYGWIN) || defined(GTEST_OS_SOLARIS) || defined(GTEST_OS_HAIKU)
+#if defined(GTEST_OS_CYGWIN) || defined(GTEST_OS_SOLARIS) || defined(GTEST_OS_HAIKU) || defined(_MINIX)
 // At least some versions of cygwin don't support ::std::wstring.
 // Solaris' libc++ doesn't support it either.
+// Minix currently doesn't support it either.
 #define GTEST_HAS_STD_WSTRING 0
 #else
 #define GTEST_HAS_STD_WSTRING GTEST_HAS_STD_STRING