diff options
author | Dimitry Andric <dim@FreeBSD.org> | 2014-05-11 18:24:26 +0000 |
---|---|---|
committer | Dimitry Andric <dim@FreeBSD.org> | 2014-05-11 18:24:26 +0000 |
commit | 68bcb7db193e4bc81430063148253d30a791023e (patch) | |
tree | 9f9245264c66971905eab3af40b7fc82e38fc2ad | |
parent | 512b84fc6c12bc496cef739e69bfaaf27e7ccc8e (diff) |
Vendor import of llvm RELEASE_34/dot1-final tag r208032 (effectively, 3.4.1 release):vendor/llvm/llvm-release_34-r208032
Notes
Notes:
svn path=/vendor/llvm/dist/; revision=265889
svn path=/vendor/llvm/llvm-release_34-r208032/; revision=265890; tag=vendor/llvm/llvm-release_34-r208032
119 files changed, 2510 insertions, 370 deletions
diff --git a/CMakeLists.txt b/CMakeLists.txt index a68e7e163c52..bd0f846f04b3 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -12,9 +12,10 @@ set(CMAKE_MODULE_PATH set(LLVM_VERSION_MAJOR 3) set(LLVM_VERSION_MINOR 4) +set(LLVM_VERSION_PATCH 1) if (NOT PACKAGE_VERSION) - set(PACKAGE_VERSION "${LLVM_VERSION_MAJOR}.${LLVM_VERSION_MINOR}svn") + set(PACKAGE_VERSION "${LLVM_VERSION_MAJOR}.${LLVM_VERSION_MINOR}.${LLVM_VERSION_PATCH}") endif() option(LLVM_INSTALL_TOOLCHAIN_ONLY "Only include toolchain files in the 'install' target." OFF) @@ -42,6 +43,7 @@ set(CPACK_PACKAGE_INSTALL_DIRECTORY "LLVM") set(CPACK_PACKAGE_VENDOR "LLVM") set(CPACK_PACKAGE_VERSION_MAJOR ${LLVM_VERSION_MAJOR}) set(CPACK_PACKAGE_VERSION_MINOR ${LLVM_VERSION_MINOR}) +set(CPACK_PACKAGE_VERSION_PATCH ${LLVM_VERSION_PATCH}) set(CPACK_PACKAGE_VERSION ${PACKAGE_VERSION}) set(CPACK_RESOURCE_FILE_LICENSE "${CMAKE_CURRENT_SOURCE_DIR}/LICENSE.TXT") if(WIN32 AND NOT UNIX) diff --git a/Makefile.config.in b/Makefile.config.in index dcca45f36cd8..00c6d965170a 100644 --- a/Makefile.config.in +++ b/Makefile.config.in @@ -15,6 +15,10 @@ # Define LLVM specific info and directories based on the autoconf variables LLVMPackageName := @PACKAGE_TARNAME@ LLVMVersion := @PACKAGE_VERSION@ +LLVM_VERSION_MAJOR := @LLVM_VERSION_MAJOR@ +LLVM_VERSION_MINOR := @LLVM_VERSION_MINOR@ +LLVM_VERSION_PATCH := @LLVM_VERSION_PATCH@ +LLVM_VERSION_SUFFIX := @LLVM_VERSION_SUFFIX@ LLVM_CONFIGTIME := @LLVM_CONFIGTIME@ ########################################################################### diff --git a/Makefile.rules b/Makefile.rules index 68f6cf8ec5d1..210abdafc8e3 100644 --- a/Makefile.rules +++ b/Makefile.rules @@ -757,7 +757,7 @@ else Ranlib = ranlib endif -AliasTool = ln -s +AliasTool = ln -sf #---------------------------------------------------------- # Get the list of source files and compute object file @@ -1121,15 +1121,19 @@ ifdef LIBRARYNAME # Make sure there isn't any extraneous whitespace on the LIBRARYNAME option LIBRARYNAME := $(strip $(LIBRARYNAME)) +LIBRARYALIASNAME := $(strip $(LIBRARYALIASNAME)) ifdef LOADABLE_MODULE BaseLibName.A := $(LIBRARYNAME).a BaseLibName.SO := $(LIBRARYNAME)$(SHLIBEXT) +BaseAliasName.SO := $(LIBRARYALIASNAME)$(SHLIBEXT) else BaseLibName.A := lib$(LIBRARYNAME).a BaseLibName.SO := $(SharedPrefix)$(LIBRARYNAME)$(SHLIBEXT) +BaseAliasName.SO := $(SharedPrefix)$(LIBRARYALIASNAME)$(SHLIBEXT) endif LibName.A := $(LibDir)/$(BaseLibName.A) LibName.SO := $(SharedLibDir)/$(BaseLibName.SO) +AliasName.SO := $(SharedLibDir)/$(BaseAliasName.SO) LibName.O := $(LibDir)/$(LIBRARYNAME).o #--------------------------------------------------------- @@ -1183,12 +1187,17 @@ else DestSharedLibDir := $(DESTDIR)$(PROJ_libdir) endif DestSharedLib := $(DestSharedLibDir)/$(BaseLibName.SO) +DestSharedAlias := $(DestSharedLibDir)/$(BaseAliasName.SO) install-local:: $(DestSharedLib) $(DestSharedLib): $(LibName.SO) $(DestSharedLibDir) $(Echo) Installing $(BuildMode) Shared Library $(DestSharedLib) $(Verb) $(INSTALL) $(LibName.SO) $(DestSharedLib) +ifdef SHARED_ALIAS + $(Echo) Creating alias from $(DestSharedLib) to $(DestSharedAlias) + $(Verb) $(AliasTool) $(DestSharedLib) $(DestSharedAlias) +endif uninstall-local:: $(Echo) Uninstalling $(BuildMode) Shared Library $(DestSharedLib) diff --git a/autoconf/configure.ac b/autoconf/configure.ac index a9d491548f11..7b4bae7e71a5 100644 --- a/autoconf/configure.ac +++ b/autoconf/configure.ac @@ -31,9 +31,22 @@ dnl=== dnl===-----------------------------------------------------------------------=== dnl Initialize autoconf and define the package name, version number and dnl address for reporting bugs. -AC_INIT([LLVM],[3.4],[http://llvm.org/bugs/]) -AC_DEFINE([LLVM_VERSION_MAJOR], [3], [Major version of the LLVM API]) -AC_DEFINE([LLVM_VERSION_MINOR], [4], [Minor version of the LLVM API]) + +AC_INIT([LLVM],[3.4.1],[http://llvm.org/bugs/]) + +LLVM_VERSION_MAJOR=3 +LLVM_VERSION_MINOR=4 +LLVM_VERSION_PATCH=1 +LLVM_VERSION_SUFFIX= + +AC_DEFINE_UNQUOTED([LLVM_VERSION_MAJOR], $LLVM_VERSION_MAJOR, [Major version of the LLVM API]) +AC_DEFINE_UNQUOTED([LLVM_VERSION_MINOR], $LLVM_VERSION_MINOR, [Minor version of the LLVM API]) +AC_DEFINE_UNQUOTED([LLVM_VERSION_PATCH], $LLVM_VERSION_PATCH, [Patch version of the LLVM API]) + +AC_SUBST([LLVM_VERSION_MAJOR]) +AC_SUBST([LLVM_VERSION_MINOR]) +AC_SUBST([LLVM_VERSION_PATCH]) +AC_SUBST([LLVM_VERSION_SUFFIX]) dnl Provide a copyright substitution and ensure the copyright notice is included dnl in the output of --version option of the generated configure script. diff --git a/configure b/configure index 72e9b1330443..cbda923cc700 100755 --- a/configure +++ b/configure @@ -1,6 +1,6 @@ #! /bin/sh # Guess values for system-dependent variables and create Makefiles. -# Generated by GNU Autoconf 2.60 for LLVM 3.4. +# Generated by GNU Autoconf 2.60 for LLVM 3.4.1. # # Report bugs to <http://llvm.org/bugs/>. # @@ -561,8 +561,8 @@ SHELL=${CONFIG_SHELL-/bin/sh} # Identity of this package. PACKAGE_NAME='LLVM' PACKAGE_TARNAME='llvm' -PACKAGE_VERSION='3.4' -PACKAGE_STRING='LLVM 3.4' +PACKAGE_VERSION='3.4.1' +PACKAGE_STRING='LLVM 3.4.1' PACKAGE_BUGREPORT='http://llvm.org/bugs/' ac_unique_file="lib/IR/Module.cpp" @@ -639,6 +639,10 @@ LIBS build_alias host_alias target_alias +LLVM_VERSION_MAJOR +LLVM_VERSION_MINOR +LLVM_VERSION_PATCH +LLVM_VERSION_SUFFIX LLVM_COPYRIGHT CC CFLAGS @@ -1330,7 +1334,7 @@ if test "$ac_init_help" = "long"; then # Omit some internal or obsolete options to make the list less imposing. # This message is too long to be a string in the A/UX 3.1 sh. cat <<_ACEOF -\`configure' configures LLVM 3.4 to adapt to many kinds of systems. +\`configure' configures LLVM 3.4.1 to adapt to many kinds of systems. Usage: $0 [OPTION]... [VAR=VALUE]... @@ -1396,7 +1400,7 @@ fi if test -n "$ac_init_help"; then case $ac_init_help in - short | recursive ) echo "Configuration of LLVM 3.4:";; + short | recursive ) echo "Configuration of LLVM 3.4.1:";; esac cat <<\_ACEOF @@ -1564,7 +1568,7 @@ fi test -n "$ac_init_help" && exit $ac_status if $ac_init_version; then cat <<\_ACEOF -LLVM configure 3.4 +LLVM configure 3.4.1 generated by GNU Autoconf 2.60 Copyright (C) 1992, 1993, 1994, 1995, 1996, 1998, 1999, 2000, 2001, @@ -1580,7 +1584,7 @@ cat >config.log <<_ACEOF This file contains any messages produced by compilers while running configure, to aid debugging if configure makes a mistake. -It was created by LLVM $as_me 3.4, which was +It was created by LLVM $as_me 3.4.1, which was generated by GNU Autoconf 2.60. Invocation command line was $ $0 $@ @@ -1934,16 +1938,32 @@ ac_compiler_gnu=$ac_cv_c_compiler_gnu -cat >>confdefs.h <<\_ACEOF -#define LLVM_VERSION_MAJOR 3 +LLVM_VERSION_MAJOR=3 +LLVM_VERSION_MINOR=4 +LLVM_VERSION_PATCH=1 +LLVM_VERSION_SUFFIX= + + +cat >>confdefs.h <<_ACEOF +#define LLVM_VERSION_MAJOR $LLVM_VERSION_MAJOR _ACEOF -cat >>confdefs.h <<\_ACEOF -#define LLVM_VERSION_MINOR 4 +cat >>confdefs.h <<_ACEOF +#define LLVM_VERSION_MINOR $LLVM_VERSION_MINOR +_ACEOF + + +cat >>confdefs.h <<_ACEOF +#define LLVM_VERSION_PATCH $LLVM_VERSION_PATCH _ACEOF + + + + + LLVM_COPYRIGHT="Copyright (c) 2003-2013 University of Illinois at Urbana-Champaign." @@ -8825,7 +8845,9 @@ if test "${enable_ltdl_install+set}" = set; then fi - if test x"${enable_ltdl_install-no}" != xno; then + + +if test x"${enable_ltdl_install-no}" != xno; then INSTALL_LTDL_TRUE= INSTALL_LTDL_FALSE='#' else @@ -8833,7 +8855,9 @@ else INSTALL_LTDL_FALSE= fi - if test x"${enable_ltdl_convenience-no}" != xno; then + + +if test x"${enable_ltdl_convenience-no}" != xno; then CONVENIENCE_LTDL_TRUE= CONVENIENCE_LTDL_FALSE='#' else @@ -10582,7 +10606,7 @@ else lt_dlunknown=0; lt_dlno_uscore=1; lt_dlneed_uscore=2 lt_status=$lt_dlunknown cat > conftest.$ac_ext <<EOF -#line 10585 "configure" +#line 10609 "configure" #include "confdefs.h" #if HAVE_DLFCN_H @@ -22745,7 +22769,7 @@ exec 6>&1 # report actual input values of CONFIG_FILES etc. instead of their # values after options handling. ac_log=" -This file was extended by LLVM $as_me 3.4, which was +This file was extended by LLVM $as_me 3.4.1, which was generated by GNU Autoconf 2.60. Invocation command line was CONFIG_FILES = $CONFIG_FILES @@ -22798,7 +22822,7 @@ Report bugs to <bug-autoconf@gnu.org>." _ACEOF cat >>$CONFIG_STATUS <<_ACEOF ac_cs_version="\\ -LLVM config.status 3.4 +LLVM config.status 3.4.1 configured by $0, generated by GNU Autoconf 2.60, with options \\"`echo "$ac_configure_args" | sed 's/^ //; s/[\\""\`\$]/\\\\&/g'`\\" @@ -23036,6 +23060,10 @@ LIBS!$LIBS$ac_delim build_alias!$build_alias$ac_delim host_alias!$host_alias$ac_delim target_alias!$target_alias$ac_delim +LLVM_VERSION_MAJOR!$LLVM_VERSION_MAJOR$ac_delim +LLVM_VERSION_MINOR!$LLVM_VERSION_MINOR$ac_delim +LLVM_VERSION_PATCH!$LLVM_VERSION_PATCH$ac_delim +LLVM_VERSION_SUFFIX!$LLVM_VERSION_SUFFIX$ac_delim LLVM_COPYRIGHT!$LLVM_COPYRIGHT$ac_delim CC!$CC$ac_delim CFLAGS!$CFLAGS$ac_delim @@ -23092,10 +23120,6 @@ DISABLE_ASSERTIONS!$DISABLE_ASSERTIONS$ac_delim ENABLE_WERROR!$ENABLE_WERROR$ac_delim ENABLE_EXPENSIVE_CHECKS!$ENABLE_EXPENSIVE_CHECKS$ac_delim EXPENSIVE_CHECKS!$EXPENSIVE_CHECKS$ac_delim -DEBUG_RUNTIME!$DEBUG_RUNTIME$ac_delim -DEBUG_SYMBOLS!$DEBUG_SYMBOLS$ac_delim -KEEP_SYMBOLS!$KEEP_SYMBOLS$ac_delim -JIT!$JIT$ac_delim _ACEOF if test `sed -n "s/.*$ac_delim\$/X/p" conf$$subs.sed | grep -c X` = 97; then @@ -23137,6 +23161,10 @@ _ACEOF ac_delim='%!_!# ' for ac_last_try in false false false false false :; do cat >conf$$subs.sed <<_ACEOF +DEBUG_RUNTIME!$DEBUG_RUNTIME$ac_delim +DEBUG_SYMBOLS!$DEBUG_SYMBOLS$ac_delim +KEEP_SYMBOLS!$KEEP_SYMBOLS$ac_delim +JIT!$JIT$ac_delim TARGET_HAS_JIT!$TARGET_HAS_JIT$ac_delim ENABLE_DOCS!$ENABLE_DOCS$ac_delim ENABLE_DOXYGEN!$ENABLE_DOXYGEN$ac_delim @@ -23230,10 +23258,6 @@ LLVM_ETCDIR!$LLVM_ETCDIR$ac_delim LLVM_INCLUDEDIR!$LLVM_INCLUDEDIR$ac_delim LLVM_INFODIR!$LLVM_INFODIR$ac_delim LLVM_MANDIR!$LLVM_MANDIR$ac_delim -LLVM_CONFIGTIME!$LLVM_CONFIGTIME$ac_delim -BINDINGS_TO_BUILD!$BINDINGS_TO_BUILD$ac_delim -ALL_BINDINGS!$ALL_BINDINGS$ac_delim -OCAML_LIBDIR!$OCAML_LIBDIR$ac_delim _ACEOF if test `sed -n "s/.*$ac_delim\$/X/p" conf$$subs.sed | grep -c X` = 97; then @@ -23275,6 +23299,10 @@ _ACEOF ac_delim='%!_!# ' for ac_last_try in false false false false false :; do cat >conf$$subs.sed <<_ACEOF +LLVM_CONFIGTIME!$LLVM_CONFIGTIME$ac_delim +BINDINGS_TO_BUILD!$BINDINGS_TO_BUILD$ac_delim +ALL_BINDINGS!$ALL_BINDINGS$ac_delim +OCAML_LIBDIR!$OCAML_LIBDIR$ac_delim ENABLE_VISIBILITY_INLINES_HIDDEN!$ENABLE_VISIBILITY_INLINES_HIDDEN$ac_delim RPATH!$RPATH$ac_delim RDYNAMIC!$RDYNAMIC$ac_delim @@ -23283,7 +23311,7 @@ LIBOBJS!$LIBOBJS$ac_delim LTLIBOBJS!$LTLIBOBJS$ac_delim _ACEOF - if test `sed -n "s/.*$ac_delim\$/X/p" conf$$subs.sed | grep -c X` = 6; then + if test `sed -n "s/.*$ac_delim\$/X/p" conf$$subs.sed | grep -c X` = 10; then break elif $ac_last_try; then { { echo "$as_me:$LINENO: error: could not make $CONFIG_STATUS" >&5 diff --git a/include/llvm/Config/config.h.in b/include/llvm/Config/config.h.in index dcec8f8cd1ea..2317823349ee 100644 --- a/include/llvm/Config/config.h.in +++ b/include/llvm/Config/config.h.in @@ -3,9 +3,6 @@ #ifndef CONFIG_H #define CONFIG_H -/* Define if building universal (internal helper macro) */ -#undef AC_APPLE_UNIVERSAL_BUILD - /* Bug report URL. */ #undef BUG_REPORT_URL @@ -641,6 +638,9 @@ /* Minor version of the LLVM API */ #undef LLVM_VERSION_MINOR +/* Patch version of the LLVM API */ +#undef LLVM_VERSION_PATCH + /* Define if the OS needs help to load dependent libraries for dlopen(). */ #undef LTDL_DLOPEN_DEPLIBS @@ -673,9 +673,6 @@ /* Define to the one symbol short name of this package. */ #undef PACKAGE_TARNAME -/* Define to the home page for this package. */ -#undef PACKAGE_URL - /* Define to the version of this package. */ #undef PACKAGE_VERSION @@ -700,18 +697,6 @@ /* Type of 1st arg on ELM Callback */ #undef WIN32_ELMCB_PCSTR -/* Define WORDS_BIGENDIAN to 1 if your processor stores words with the most - significant byte first (like Motorola and SPARC, unlike Intel). */ -#if defined AC_APPLE_UNIVERSAL_BUILD -# if defined __BIG_ENDIAN__ -# define WORDS_BIGENDIAN 1 -# endif -#else -# ifndef WORDS_BIGENDIAN -# undef WORDS_BIGENDIAN -# endif -#endif - /* Define to empty if `const' does not conform to ANSI C. */ #undef const diff --git a/include/llvm/IR/IntrinsicsX86.td b/include/llvm/IR/IntrinsicsX86.td index 4c5718f8c8b5..46c32ae2fd00 100644 --- a/include/llvm/IR/IntrinsicsX86.td +++ b/include/llvm/IR/IntrinsicsX86.td @@ -1758,68 +1758,68 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". def int_x86_avx2_gather_d_pd : GCCBuiltin<"__builtin_ia32_gatherd_pd">, Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty, llvm_ptr_ty, llvm_v4i32_ty, llvm_v2f64_ty, llvm_i8_ty], - [IntrReadMem]>; + [IntrReadArgMem]>; def int_x86_avx2_gather_d_pd_256 : GCCBuiltin<"__builtin_ia32_gatherd_pd256">, Intrinsic<[llvm_v4f64_ty], [llvm_v4f64_ty, llvm_ptr_ty, llvm_v4i32_ty, llvm_v4f64_ty, llvm_i8_ty], - [IntrReadMem]>; + [IntrReadArgMem]>; def int_x86_avx2_gather_q_pd : GCCBuiltin<"__builtin_ia32_gatherq_pd">, Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty, llvm_ptr_ty, llvm_v2i64_ty, llvm_v2f64_ty, llvm_i8_ty], - [IntrReadMem]>; + [IntrReadArgMem]>; def int_x86_avx2_gather_q_pd_256 : GCCBuiltin<"__builtin_ia32_gatherq_pd256">, Intrinsic<[llvm_v4f64_ty], [llvm_v4f64_ty, llvm_ptr_ty, llvm_v4i64_ty, llvm_v4f64_ty, llvm_i8_ty], - [IntrReadMem]>; + [IntrReadArgMem]>; def int_x86_avx2_gather_d_ps : GCCBuiltin<"__builtin_ia32_gatherd_ps">, Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_ptr_ty, llvm_v4i32_ty, llvm_v4f32_ty, llvm_i8_ty], - [IntrReadMem]>; + [IntrReadArgMem]>; def int_x86_avx2_gather_d_ps_256 : GCCBuiltin<"__builtin_ia32_gatherd_ps256">, Intrinsic<[llvm_v8f32_ty], [llvm_v8f32_ty, llvm_ptr_ty, llvm_v8i32_ty, llvm_v8f32_ty, llvm_i8_ty], - [IntrReadMem]>; + [IntrReadArgMem]>; def int_x86_avx2_gather_q_ps : GCCBuiltin<"__builtin_ia32_gatherq_ps">, Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_ptr_ty, llvm_v2i64_ty, llvm_v4f32_ty, llvm_i8_ty], - [IntrReadMem]>; + [IntrReadArgMem]>; def int_x86_avx2_gather_q_ps_256 : GCCBuiltin<"__builtin_ia32_gatherq_ps256">, Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_ptr_ty, llvm_v4i64_ty, llvm_v4f32_ty, llvm_i8_ty], - [IntrReadMem]>; + [IntrReadArgMem]>; def int_x86_avx2_gather_d_q : GCCBuiltin<"__builtin_ia32_gatherd_q">, Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_ptr_ty, llvm_v4i32_ty, llvm_v2i64_ty, llvm_i8_ty], - [IntrReadMem]>; + [IntrReadArgMem]>; def int_x86_avx2_gather_d_q_256 : GCCBuiltin<"__builtin_ia32_gatherd_q256">, Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_ptr_ty, llvm_v4i32_ty, llvm_v4i64_ty, llvm_i8_ty], - [IntrReadMem]>; + [IntrReadArgMem]>; def int_x86_avx2_gather_q_q : GCCBuiltin<"__builtin_ia32_gatherq_q">, Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_ptr_ty, llvm_v2i64_ty, llvm_v2i64_ty, llvm_i8_ty], - [IntrReadMem]>; + [IntrReadArgMem]>; def int_x86_avx2_gather_q_q_256 : GCCBuiltin<"__builtin_ia32_gatherq_q256">, Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_ptr_ty, llvm_v4i64_ty, llvm_v4i64_ty, llvm_i8_ty], - [IntrReadMem]>; + [IntrReadArgMem]>; def int_x86_avx2_gather_d_d : GCCBuiltin<"__builtin_ia32_gatherd_d">, Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_ptr_ty, llvm_v4i32_ty, llvm_v4i32_ty, llvm_i8_ty], - [IntrReadMem]>; + [IntrReadArgMem]>; def int_x86_avx2_gather_d_d_256 : GCCBuiltin<"__builtin_ia32_gatherd_d256">, Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_ptr_ty, llvm_v8i32_ty, llvm_v8i32_ty, llvm_i8_ty], - [IntrReadMem]>; + [IntrReadArgMem]>; def int_x86_avx2_gather_q_d : GCCBuiltin<"__builtin_ia32_gatherq_d">, Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_ptr_ty, llvm_v2i64_ty, llvm_v4i32_ty, llvm_i8_ty], - [IntrReadMem]>; + [IntrReadArgMem]>; def int_x86_avx2_gather_q_d_256 : GCCBuiltin<"__builtin_ia32_gatherq_d256">, Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_ptr_ty, llvm_v4i64_ty, llvm_v4i32_ty, llvm_i8_ty], - [IntrReadMem]>; + [IntrReadArgMem]>; } // Misc. @@ -2909,28 +2909,28 @@ let TargetPrefix = "x86" in { def int_x86_avx512_gather_dpd_mask_512 : GCCBuiltin<"__builtin_ia32_mask_gatherdpd512">, Intrinsic<[llvm_v8f64_ty], [llvm_v8f64_ty, llvm_i8_ty, llvm_v8i32_ty, llvm_ptr_ty, llvm_i32_ty], - [IntrReadMem]>; + [IntrReadArgMem]>; def int_x86_avx512_gather_dps_mask_512 : GCCBuiltin<"__builtin_ia32_mask_gatherdps512">, Intrinsic<[llvm_v16f32_ty], [llvm_v16f32_ty, llvm_i16_ty, llvm_v16i32_ty, llvm_ptr_ty, llvm_i32_ty], - [IntrReadMem]>; + [IntrReadArgMem]>; def int_x86_avx512_gather_qpd_mask_512 : GCCBuiltin<"__builtin_ia32_mask_gatherqpd512">, Intrinsic<[llvm_v8f64_ty], [llvm_v8f64_ty, llvm_i8_ty, llvm_v8i64_ty, llvm_ptr_ty, llvm_i32_ty], - [IntrReadMem]>; + [IntrReadArgMem]>; def int_x86_avx512_gather_qps_mask_512 : GCCBuiltin<"__builtin_ia32_mask_gatherqps512">, Intrinsic<[llvm_v8f32_ty], [llvm_v8f32_ty, llvm_i8_ty, llvm_v8i64_ty, llvm_ptr_ty, llvm_i32_ty], - [IntrReadMem]>; + [IntrReadArgMem]>; def int_x86_avx512_gather_dpd_512 : GCCBuiltin<"__builtin_ia32_gatherdpd512">, Intrinsic<[llvm_v8f64_ty], [llvm_v8i32_ty, llvm_ptr_ty, llvm_i32_ty], - [IntrReadMem]>; + [IntrReadArgMem]>; def int_x86_avx512_gather_dps_512 : GCCBuiltin<"__builtin_ia32_gatherdps512">, Intrinsic<[llvm_v16f32_ty], [llvm_v16i32_ty, llvm_ptr_ty, llvm_i32_ty], - [IntrReadMem]>; + [IntrReadArgMem]>; def int_x86_avx512_gather_qpd_512 : GCCBuiltin<"__builtin_ia32_gatherqpd512">, Intrinsic<[llvm_v8f64_ty], [llvm_v8i64_ty, llvm_ptr_ty, llvm_i32_ty], @@ -2938,12 +2938,12 @@ let TargetPrefix = "x86" in { def int_x86_avx512_gather_qps_512 : GCCBuiltin<"__builtin_ia32_gatherqps512">, Intrinsic<[llvm_v8f32_ty], [llvm_v8i64_ty, llvm_ptr_ty, llvm_i32_ty], - [IntrReadMem]>; + [IntrReadArgMem]>; def int_x86_avx512_gather_dpq_mask_512 : GCCBuiltin<"__builtin_ia32_mask_gatherdpq512">, Intrinsic<[llvm_v8i64_ty], [llvm_v8i64_ty, llvm_i8_ty, llvm_v8i32_ty, llvm_ptr_ty, llvm_i32_ty], - [IntrReadMem]>; + [IntrReadArgMem]>; def int_x86_avx512_gather_dpi_mask_512 : GCCBuiltin<"__builtin_ia32_mask_gatherdpi512">, Intrinsic<[llvm_v16i32_ty], [llvm_v16i32_ty, llvm_i16_ty, llvm_v16i32_ty, llvm_ptr_ty, llvm_i32_ty], @@ -2955,7 +2955,7 @@ let TargetPrefix = "x86" in { def int_x86_avx512_gather_qpi_mask_512 : GCCBuiltin<"__builtin_ia32_mask_gatherqpi512">, Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_i8_ty, llvm_v8i64_ty, llvm_ptr_ty, llvm_i32_ty], - [IntrReadMem]>; + [IntrReadArgMem]>; def int_x86_avx512_gather_dpq_512 : GCCBuiltin<"__builtin_ia32_gatherdpq512">, Intrinsic<[llvm_v8i64_ty], [llvm_v8i32_ty, llvm_ptr_ty, diff --git a/include/llvm/MC/MCAsmInfo.h b/include/llvm/MC/MCAsmInfo.h index 7a9939462147..9845623d4716 100644 --- a/include/llvm/MC/MCAsmInfo.h +++ b/include/llvm/MC/MCAsmInfo.h @@ -266,13 +266,16 @@ namespace llvm { /// global as being a weak undefined symbol. const char *WeakRefDirective; // Defaults to NULL. - /// WeakDefDirective - This directive, if non-null, is used to declare a - /// global as being a weak defined symbol. - const char *WeakDefDirective; // Defaults to NULL. + /// True if we have a directive to declare a global as being a weak + /// defined symbol. + bool HasWeakDefDirective; // Defaults to false. - /// LinkOnceDirective - This directive, if non-null is used to declare a - /// global as being a weak defined symbol. This is used on cygwin/mingw. - const char *LinkOnceDirective; // Defaults to NULL. + /// True if we have a directive to declare a global as being a weak + /// defined symbol that can be hidden (unexported). + bool HasWeakDefCanBeHiddenDirective; // Defaults to false. + + /// True if we have a .linkonce directive. This is used on cygwin/mingw. + bool HasLinkOnceDirective; // Defaults to false. /// HiddenVisibilityAttr - This attribute, if not MCSA_Invalid, is used to /// declare a symbol as having hidden visibility. @@ -303,6 +306,10 @@ namespace llvm { /// uses relocations for references to other .debug_* sections. bool DwarfUsesRelocationsAcrossSections; + /// DwarfFDESymbolsUseAbsDiff - true if DWARF FDE symbol reference + /// relocations should be replaced by an absolute difference. + bool DwarfFDESymbolsUseAbsDiff; + /// DwarfRegNumForCFI - True if dwarf register numbers are printed /// instead of symbolic register names in .cfi_* directives. bool DwarfRegNumForCFI; // Defaults to false; @@ -497,8 +504,11 @@ namespace llvm { bool hasIdentDirective() const { return HasIdentDirective; } bool hasNoDeadStrip() const { return HasNoDeadStrip; } const char *getWeakRefDirective() const { return WeakRefDirective; } - const char *getWeakDefDirective() const { return WeakDefDirective; } - const char *getLinkOnceDirective() const { return LinkOnceDirective; } + bool hasWeakDefDirective() const { return HasWeakDefDirective; } + bool hasWeakDefCanBeHiddenDirective() const { + return HasWeakDefCanBeHiddenDirective; + } + bool hasLinkOnceDirective() const { return HasLinkOnceDirective; } MCSymbolAttr getHiddenVisibilityAttr() const { return HiddenVisibilityAttr;} MCSymbolAttr getHiddenDeclarationVisibilityAttr() const { @@ -528,6 +538,9 @@ namespace llvm { bool doesDwarfUseRelocationsAcrossSections() const { return DwarfUsesRelocationsAcrossSections; } + bool doDwarfFDESymbolsUseAbsDiff() const { + return DwarfFDESymbolsUseAbsDiff; + } bool useDwarfRegNumForCFI() const { return DwarfRegNumForCFI; } diff --git a/lib/Analysis/BasicAliasAnalysis.cpp b/lib/Analysis/BasicAliasAnalysis.cpp index b2c20110e90e..d0e186c5f23f 100644 --- a/lib/Analysis/BasicAliasAnalysis.cpp +++ b/lib/Analysis/BasicAliasAnalysis.cpp @@ -18,7 +18,10 @@ #include "llvm/ADT/SmallVector.h" #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/Analysis/CaptureTracking.h" +#include "llvm/Analysis/CFG.h" +#include "llvm/Analysis/Dominators.h" #include "llvm/Analysis/InstructionSimplify.h" +#include "llvm/Analysis/LoopInfo.h" #include "llvm/Analysis/MemoryBuiltins.h" #include "llvm/Analysis/ValueTracking.h" #include "llvm/IR/Constants.h" @@ -38,6 +41,12 @@ #include <algorithm> using namespace llvm; +/// Cutoff after which to stop analysing a set of phi nodes potentially involved +/// in a cycle. Because we are analysing 'through' phi nodes we need to be +/// careful with value equivalence. We use reachability to make sure a value +/// cannot be involved in a cycle. +const unsigned MaxNumPhiBBsValueReachabilityCheck = 20; + //===----------------------------------------------------------------------===// // Useful predicates //===----------------------------------------------------------------------===// @@ -403,42 +412,6 @@ DecomposeGEPExpression(const Value *V, int64_t &BaseOffs, return V; } -/// GetIndexDifference - Dest and Src are the variable indices from two -/// decomposed GetElementPtr instructions GEP1 and GEP2 which have common base -/// pointers. Subtract the GEP2 indices from GEP1 to find the symbolic -/// difference between the two pointers. -static void GetIndexDifference(SmallVectorImpl<VariableGEPIndex> &Dest, - const SmallVectorImpl<VariableGEPIndex> &Src) { - if (Src.empty()) return; - - for (unsigned i = 0, e = Src.size(); i != e; ++i) { - const Value *V = Src[i].V; - ExtensionKind Extension = Src[i].Extension; - int64_t Scale = Src[i].Scale; - - // Find V in Dest. This is N^2, but pointer indices almost never have more - // than a few variable indexes. - for (unsigned j = 0, e = Dest.size(); j != e; ++j) { - if (Dest[j].V != V || Dest[j].Extension != Extension) continue; - - // If we found it, subtract off Scale V's from the entry in Dest. If it - // goes to zero, remove the entry. - if (Dest[j].Scale != Scale) - Dest[j].Scale -= Scale; - else - Dest.erase(Dest.begin()+j); - Scale = 0; - break; - } - - // If we didn't consume this entry, add it to the end of the Dest list. - if (Scale) { - VariableGEPIndex Entry = { V, Extension, -Scale }; - Dest.push_back(Entry); - } - } -} - //===----------------------------------------------------------------------===// // BasicAliasAnalysis Pass //===----------------------------------------------------------------------===// @@ -492,6 +465,7 @@ namespace { // SmallDenseMap if it ever grows larger. // FIXME: This should really be shrink_to_inline_capacity_and_clear(). AliasCache.shrink_and_clear(); + VisitedPhiBBs.clear(); return Alias; } @@ -532,9 +506,39 @@ namespace { typedef SmallDenseMap<LocPair, AliasResult, 8> AliasCacheTy; AliasCacheTy AliasCache; + /// \brief Track phi nodes we have visited. When interpret "Value" pointer + /// equality as value equality we need to make sure that the "Value" is not + /// part of a cycle. Otherwise, two uses could come from different + /// "iterations" of a cycle and see different values for the same "Value" + /// pointer. + /// The following example shows the problem: + /// %p = phi(%alloca1, %addr2) + /// %l = load %ptr + /// %addr1 = gep, %alloca2, 0, %l + /// %addr2 = gep %alloca2, 0, (%l + 1) + /// alias(%p, %addr1) -> MayAlias ! + /// store %l, ... + SmallPtrSet<const BasicBlock*, 8> VisitedPhiBBs; + // Visited - Track instructions visited by pointsToConstantMemory. SmallPtrSet<const Value*, 16> Visited; + /// \brief Check whether two Values can be considered equivalent. + /// + /// In addition to pointer equivalence of \p V1 and \p V2 this checks + /// whether they can not be part of a cycle in the value graph by looking at + /// all visited phi nodes an making sure that the phis cannot reach the + /// value. We have to do this because we are looking through phi nodes (That + /// is we say noalias(V, phi(VA, VB)) if noalias(V, VA) and noalias(V, VB). + bool isValueEqualInPotentialCycles(const Value *V1, const Value *V2); + + /// \brief Dest and Src are the variable indices from two decomposed + /// GetElementPtr instructions GEP1 and GEP2 which have common base + /// pointers. Subtract the GEP2 indices from GEP1 to find the symbolic + /// difference between the two pointers. + void GetIndexDifference(SmallVectorImpl<VariableGEPIndex> &Dest, + const SmallVectorImpl<VariableGEPIndex> &Src); + // aliasGEP - Provide a bunch of ad-hoc rules to disambiguate a GEP // instruction against another. AliasResult aliasGEP(const GEPOperator *V1, uint64_t V1Size, @@ -1005,7 +1009,15 @@ BasicAliasAnalysis::aliasGEP(const GEPOperator *GEP1, uint64_t V1Size, return NoAlias; } } else { - if (V1Size != UnknownSize) { + // We have the situation where: + // + + + // | BaseOffset | + // ---------------->| + // |-->V1Size |-------> V2Size + // GEP1 V2 + // We need to know that V2Size is not unknown, otherwise we might have + // stripped a gep with negative index ('gep <ptr>, -1, ...). + if (V1Size != UnknownSize && V2Size != UnknownSize) { if (-(uint64_t)GEP1BaseOffset < V1Size) return PartialAlias; return NoAlias; @@ -1094,6 +1106,10 @@ BasicAliasAnalysis::aliasPHI(const PHINode *PN, uint64_t PNSize, const MDNode *PNTBAAInfo, const Value *V2, uint64_t V2Size, const MDNode *V2TBAAInfo) { + // Track phi nodes we have visited. We use this information when we determine + // value equivalence. + VisitedPhiBBs.insert(PN->getParent()); + // If the values are PHIs in the same block, we can do a more precise // as well as efficient check: just check for aliases between the values // on corresponding edges. @@ -1187,7 +1203,13 @@ BasicAliasAnalysis::aliasCheck(const Value *V1, uint64_t V1Size, V2 = V2->stripPointerCasts(); // Are we checking for alias of the same value? - if (V1 == V2) return MustAlias; + // Because we look 'through' phi nodes we could look at "Value" pointers from + // different iterations. We must therefore make sure that this is not the + // case. The function isValueEqualInPotentialCycles ensures that this cannot + // happen by looking at the visited phi nodes and making sure they cannot + // reach the value. + if (isValueEqualInPotentialCycles(V1, V2)) + return MustAlias; if (!V1->getType()->isPointerTy() || !V2->getType()->isPointerTy()) return NoAlias; // Scalars cannot alias each other @@ -1307,3 +1329,71 @@ BasicAliasAnalysis::aliasCheck(const Value *V1, uint64_t V1Size, Location(V2, V2Size, V2TBAAInfo)); return AliasCache[Locs] = Result; } + +bool BasicAliasAnalysis::isValueEqualInPotentialCycles(const Value *V, + const Value *V2) { + if (V != V2) + return false; + + const Instruction *Inst = dyn_cast<Instruction>(V); + if (!Inst) + return true; + + if (VisitedPhiBBs.size() > MaxNumPhiBBsValueReachabilityCheck) + return false; + + // Use dominance or loop info if available. + DominatorTree *DT = getAnalysisIfAvailable<DominatorTree>(); + LoopInfo *LI = getAnalysisIfAvailable<LoopInfo>(); + + // Make sure that the visited phis cannot reach the Value. This ensures that + // the Values cannot come from different iterations of a potential cycle the + // phi nodes could be involved in. + for (SmallPtrSet<const BasicBlock *, 8>::iterator PI = VisitedPhiBBs.begin(), + PE = VisitedPhiBBs.end(); + PI != PE; ++PI) + if (isPotentiallyReachable((*PI)->begin(), Inst, DT, LI)) + return false; + + return true; +} + +/// GetIndexDifference - Dest and Src are the variable indices from two +/// decomposed GetElementPtr instructions GEP1 and GEP2 which have common base +/// pointers. Subtract the GEP2 indices from GEP1 to find the symbolic +/// difference between the two pointers. +void BasicAliasAnalysis::GetIndexDifference( + SmallVectorImpl<VariableGEPIndex> &Dest, + const SmallVectorImpl<VariableGEPIndex> &Src) { + if (Src.empty()) + return; + + for (unsigned i = 0, e = Src.size(); i != e; ++i) { + const Value *V = Src[i].V; + ExtensionKind Extension = Src[i].Extension; + int64_t Scale = Src[i].Scale; + + // Find V in Dest. This is N^2, but pointer indices almost never have more + // than a few variable indexes. + for (unsigned j = 0, e = Dest.size(); j != e; ++j) { + if (!isValueEqualInPotentialCycles(Dest[j].V, V) || + Dest[j].Extension != Extension) + continue; + + // If we found it, subtract off Scale V's from the entry in Dest. If it + // goes to zero, remove the entry. + if (Dest[j].Scale != Scale) + Dest[j].Scale -= Scale; + else + Dest.erase(Dest.begin() + j); + Scale = 0; + break; + } + + // If we didn't consume this entry, add it to the end of the Dest list. + if (Scale) { + VariableGEPIndex Entry = { V, Extension, -Scale }; + Dest.push_back(Entry); + } + } +} diff --git a/lib/Analysis/IVUsers.cpp b/lib/Analysis/IVUsers.cpp index b33e2cb9999e..5a06cdce3085 100644 --- a/lib/Analysis/IVUsers.cpp +++ b/lib/Analysis/IVUsers.cpp @@ -187,15 +187,34 @@ bool IVUsers::AddUsersImpl(Instruction *I, if (AddUserToIVUsers) { // Okay, we found a user that we cannot reduce. - IVUses.push_back(new IVStrideUse(this, User, I)); - IVStrideUse &NewUse = IVUses.back(); + IVStrideUse &NewUse = AddUser(User, I); // Autodetect the post-inc loop set, populating NewUse.PostIncLoops. // The regular return value here is discarded; instead of recording // it, we just recompute it when we need it. + const SCEV *OriginalISE = ISE; ISE = TransformForPostIncUse(NormalizeAutodetect, ISE, User, I, NewUse.PostIncLoops, *SE, *DT); + + // PostIncNormalization effectively simplifies the expression under + // pre-increment assumptions. Those assumptions (no wrapping) might not + // hold for the post-inc value. Catch such cases by making sure the + // transformation is invertible. + if (OriginalISE != ISE) { + const SCEV *DenormalizedISE = + TransformForPostIncUse(Denormalize, ISE, User, I, + NewUse.PostIncLoops, *SE, *DT); + + // If we normalized the expression, but denormalization doesn't give the + // original one, discard this user. + if (OriginalISE != DenormalizedISE) { + DEBUG(dbgs() << " DISCARDING (NORMALIZATION ISN'T INVERTIBLE): " + << *ISE << '\n'); + IVUses.pop_back(); + return false; + } + } DEBUG(if (SE->getSCEV(I) != ISE) dbgs() << " NORMALIZED TO: " << *ISE << '\n'); } diff --git a/lib/Analysis/ScalarEvolution.cpp b/lib/Analysis/ScalarEvolution.cpp index 0a02f4e9d747..d9b696e0798f 100644 --- a/lib/Analysis/ScalarEvolution.cpp +++ b/lib/Analysis/ScalarEvolution.cpp @@ -6218,7 +6218,7 @@ bool ScalarEvolution::isImpliedCond(ICmpInst::Predicate Pred, // LHS' type is checked for above. if (getTypeSizeInBits(LHS->getType()) > getTypeSizeInBits(FoundLHS->getType())) { - if (CmpInst::isSigned(Pred)) { + if (CmpInst::isSigned(FoundPred)) { FoundLHS = getSignExtendExpr(FoundLHS, LHS->getType()); FoundRHS = getSignExtendExpr(FoundRHS, LHS->getType()); } else { diff --git a/lib/CodeGen/AsmPrinter/AsmPrinter.cpp b/lib/CodeGen/AsmPrinter/AsmPrinter.cpp index 308b0e091ac5..8be4dcba3689 100644 --- a/lib/CodeGen/AsmPrinter/AsmPrinter.cpp +++ b/lib/CodeGen/AsmPrinter/AsmPrinter.cpp @@ -222,13 +222,14 @@ void AsmPrinter::EmitLinkage(const GlobalValue *GV, MCSymbol *GVSym) const { case GlobalValue::WeakAnyLinkage: case GlobalValue::WeakODRLinkage: case GlobalValue::LinkerPrivateWeakLinkage: - if (MAI->getWeakDefDirective() != 0) { + if (MAI->hasWeakDefDirective()) { // .globl _foo OutStreamer.EmitSymbolAttribute(GVSym, MCSA_Global); bool CanBeHidden = false; - if (Linkage == GlobalValue::LinkOnceODRLinkage) { + if (Linkage == GlobalValue::LinkOnceODRLinkage && + MAI->hasWeakDefCanBeHiddenDirective()) { if (GV->hasUnnamedAddr()) { CanBeHidden = true; } else { @@ -243,7 +244,7 @@ void AsmPrinter::EmitLinkage(const GlobalValue *GV, MCSymbol *GVSym) const { OutStreamer.EmitSymbolAttribute(GVSym, MCSA_WeakDefinition); else OutStreamer.EmitSymbolAttribute(GVSym, MCSA_WeakDefAutoPrivate); - } else if (MAI->getLinkOnceDirective() != 0) { + } else if (MAI->hasLinkOnceDirective()) { // .globl _foo OutStreamer.EmitSymbolAttribute(GVSym, MCSA_Global); //NOTE: linkonce is handled by the section the symbol was assigned to. diff --git a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index 43f72c5ef9b4..69cf8d9a909a 100644 --- a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -8547,7 +8547,10 @@ struct MemOpLink { // base ptr. struct ConsecutiveMemoryChainSorter { bool operator()(MemOpLink LHS, MemOpLink RHS) { - return LHS.OffsetFromBase < RHS.OffsetFromBase; + return + LHS.OffsetFromBase < RHS.OffsetFromBase || + (LHS.OffsetFromBase == RHS.OffsetFromBase && + LHS.SequenceNum > RHS.SequenceNum); } }; diff --git a/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp b/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp index 2c3cdccb56e8..3fb2d9bdf55e 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp +++ b/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp @@ -210,6 +210,7 @@ SDValue VectorLegalizer::LegalizeOp(SDValue Op) { case ISD::SRL: case ISD::ROTL: case ISD::ROTR: + case ISD::BSWAP: case ISD::CTLZ: case ISD::CTTZ: case ISD::CTLZ_ZERO_UNDEF: diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp b/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp index 054e3dd840b5..c1893c9231f0 100644 --- a/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp +++ b/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp @@ -219,8 +219,11 @@ void ScheduleDAGSDNodes::ClusterNeighboringLoads(SDNode *Node) { DenseMap<long long, SDNode*> O2SMap; // Map from offset to SDNode. bool Cluster = false; SDNode *Base = Node; + // This algorithm requires a reasonably low use count before finding a match + // to avoid uselessly blowing up compile time in large blocks. + unsigned UseCount = 0; for (SDNode::use_iterator I = Chain->use_begin(), E = Chain->use_end(); - I != E; ++I) { + I != E && UseCount < 100; ++I, ++UseCount) { SDNode *User = *I; if (User == Node || !Visited.insert(User)) continue; @@ -237,6 +240,8 @@ void ScheduleDAGSDNodes::ClusterNeighboringLoads(SDNode *Node) { if (Offset2 < Offset1) Base = User; Cluster = true; + // Reset UseCount to allow more matches. + UseCount = 0; } if (!Cluster) diff --git a/lib/MC/MCAsmInfo.cpp b/lib/MC/MCAsmInfo.cpp index 28f1c951641c..daf19e927c7a 100644 --- a/lib/MC/MCAsmInfo.cpp +++ b/lib/MC/MCAsmInfo.cpp @@ -76,8 +76,9 @@ MCAsmInfo::MCAsmInfo() { HasIdentDirective = false; HasNoDeadStrip = false; WeakRefDirective = 0; - WeakDefDirective = 0; - LinkOnceDirective = 0; + HasWeakDefDirective = false; + HasWeakDefCanBeHiddenDirective = false; + HasLinkOnceDirective = false; HiddenVisibilityAttr = MCSA_Hidden; HiddenDeclarationVisibilityAttr = MCSA_Hidden; ProtectedVisibilityAttr = MCSA_Protected; @@ -85,6 +86,7 @@ MCAsmInfo::MCAsmInfo() { SupportsDebugInformation = false; ExceptionsType = ExceptionHandling::None; DwarfUsesRelocationsAcrossSections = true; + DwarfFDESymbolsUseAbsDiff = false; DwarfRegNumForCFI = false; HasMicrosoftFastStdCallMangling = false; NeedsDwarfSectionOffsetDirective = false; diff --git a/lib/MC/MCAsmInfoCOFF.cpp b/lib/MC/MCAsmInfoCOFF.cpp index 9d9f98e72b96..1cac71f1eda1 100644 --- a/lib/MC/MCAsmInfoCOFF.cpp +++ b/lib/MC/MCAsmInfoCOFF.cpp @@ -27,7 +27,7 @@ MCAsmInfoCOFF::MCAsmInfoCOFF() { HasSingleParameterDotFile = false; PrivateGlobalPrefix = "L"; // Prefix for private global symbols WeakRefDirective = "\t.weak\t"; - LinkOnceDirective = "\t.linkonce discard\n"; + HasLinkOnceDirective = true; // Doesn't support visibility: HiddenVisibilityAttr = HiddenDeclarationVisibilityAttr = MCSA_Invalid; diff --git a/lib/MC/MCAsmInfoDarwin.cpp b/lib/MC/MCAsmInfoDarwin.cpp index 704c8161f880..351ec56352a6 100644 --- a/lib/MC/MCAsmInfoDarwin.cpp +++ b/lib/MC/MCAsmInfoDarwin.cpp @@ -36,7 +36,8 @@ MCAsmInfoDarwin::MCAsmInfoDarwin() { InlineAsmEnd = " InlineAsm End"; // Directives: - WeakDefDirective = "\t.weak_definition "; + HasWeakDefDirective = true; + HasWeakDefCanBeHiddenDirective = true; WeakRefDirective = "\t.weak_reference "; ZeroDirective = "\t.space\t"; // ".space N" emits N zeros. HasMachoZeroFillDirective = true; // Uses .zerofill diff --git a/lib/MC/MCDwarf.cpp b/lib/MC/MCDwarf.cpp index 1e5c2e34c488..9fd13ab3af61 100644 --- a/lib/MC/MCDwarf.cpp +++ b/lib/MC/MCDwarf.cpp @@ -836,8 +836,9 @@ static unsigned getSizeForEncoding(MCStreamer &streamer, } } -static void EmitSymbol(MCStreamer &streamer, const MCSymbol &symbol, - unsigned symbolEncoding, const char *comment = 0) { +static void EmitFDESymbol(MCStreamer &streamer, const MCSymbol &symbol, + unsigned symbolEncoding, bool isEH, + const char *comment = 0) { MCContext &context = streamer.getContext(); const MCAsmInfo *asmInfo = context.getAsmInfo(); const MCExpr *v = asmInfo->getExprForFDESymbol(&symbol, @@ -845,7 +846,10 @@ static void EmitSymbol(MCStreamer &streamer, const MCSymbol &symbol, streamer); unsigned size = getSizeForEncoding(streamer, symbolEncoding); if (streamer.isVerboseAsm() && comment) streamer.AddComment(comment); - streamer.EmitAbsValue(v, size); + if (asmInfo->doDwarfFDESymbolsUseAbsDiff() && isEH) + streamer.EmitAbsValue(v, size); + else + streamer.EmitValue(v, size); } static void EmitPersonality(MCStreamer &streamer, const MCSymbol &symbol, @@ -1344,7 +1348,7 @@ MCSymbol *FrameEmitterImpl::EmitFDE(MCStreamer &streamer, unsigned PCEncoding = IsEH ? MOFI->getFDEEncoding(UsingCFI) : (unsigned)dwarf::DW_EH_PE_absptr; unsigned PCSize = getSizeForEncoding(streamer, PCEncoding); - EmitSymbol(streamer, *frame.Begin, PCEncoding, "FDE initial location"); + EmitFDESymbol(streamer, *frame.Begin, PCEncoding, IsEH, "FDE initial location"); // PC Range const MCExpr *Range = MakeStartMinusEndExpr(streamer, *frame.Begin, @@ -1364,8 +1368,8 @@ MCSymbol *FrameEmitterImpl::EmitFDE(MCStreamer &streamer, // Augmentation Data if (frame.Lsda) - EmitSymbol(streamer, *frame.Lsda, frame.LsdaEncoding, - "Language Specific Data Area"); + EmitFDESymbol(streamer, *frame.Lsda, frame.LsdaEncoding, true, + "Language Specific Data Area"); } // Call Frame Instructions diff --git a/lib/MC/MCParser/AsmParser.cpp b/lib/MC/MCParser/AsmParser.cpp index a91bd93105b6..6f7729639e42 100644 --- a/lib/MC/MCParser/AsmParser.cpp +++ b/lib/MC/MCParser/AsmParser.cpp @@ -4292,6 +4292,10 @@ bool AsmParser::parseMSInlineAsm( break; } case AOK_DotOperator: + // Insert the dot if the user omitted it. + OS.flush(); + if (AsmStringIR.at(AsmStringIR.size() - 1) != '.') + OS << '.'; OS << (*I).Val; break; } diff --git a/lib/Target/AArch64/AArch64ISelLowering.cpp b/lib/Target/AArch64/AArch64ISelLowering.cpp index 4fdb667b9539..cf7aec3b4538 100644 --- a/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -31,12 +31,8 @@ using namespace llvm; static TargetLoweringObjectFile *createTLOF(AArch64TargetMachine &TM) { const AArch64Subtarget *Subtarget = &TM.getSubtarget<AArch64Subtarget>(); - - if (Subtarget->isTargetLinux()) - return new AArch64LinuxTargetObjectFile(); - if (Subtarget->isTargetELF()) - return new TargetLoweringObjectFileELF(); - llvm_unreachable("unknown subtarget type"); + assert (Subtarget->isTargetELF() && "unknown subtarget type"); + return new AArch64ElfTargetObjectFile(); } AArch64TargetLowering::AArch64TargetLowering(AArch64TargetMachine &TM) @@ -2782,7 +2778,7 @@ AArch64TargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const { SDValue AArch64TargetLowering::LowerVACOPY(SDValue Op, SelectionDAG &DAG) const { const Value *DestSV = cast<SrcValueSDNode>(Op.getOperand(3))->getValue(); - const Value *SrcSV = cast<SrcValueSDNode>(Op.getOperand(3))->getValue(); + const Value *SrcSV = cast<SrcValueSDNode>(Op.getOperand(4))->getValue(); // We have to make sure we copy the entire structure: 8+8+8+4+4 = 32 bytes // rather than just 8. diff --git a/lib/Target/AArch64/AArch64InstrInfo.td b/lib/Target/AArch64/AArch64InstrInfo.td index 23d81fc478e8..8e5a4d30396c 100644 --- a/lib/Target/AArch64/AArch64InstrInfo.td +++ b/lib/Target/AArch64/AArch64InstrInfo.td @@ -2587,6 +2587,7 @@ class A64I_SRexs_impl<bits<2> size, bits<3> opcode, string asm, dag outs, pat, itin> { let mayStore = 1; let PostEncoderMethod = "fixLoadStoreExclusive<1,0>"; + let Constraints = "@earlyclobber $Rs"; } multiclass A64I_SRex<string asmstr, bits<3> opcode, string prefix> { diff --git a/lib/Target/AArch64/AArch64TargetObjectFile.cpp b/lib/Target/AArch64/AArch64TargetObjectFile.cpp index b4452f514590..f8f21198a4f9 100644 --- a/lib/Target/AArch64/AArch64TargetObjectFile.cpp +++ b/lib/Target/AArch64/AArch64TargetObjectFile.cpp @@ -22,3 +22,10 @@ AArch64LinuxTargetObjectFile::Initialize(MCContext &Ctx, TargetLoweringObjectFileELF::Initialize(Ctx, TM); InitializeELF(TM.Options.UseInitArray); } + +void +AArch64ElfTargetObjectFile::Initialize(MCContext &Ctx, + const TargetMachine &TM) { + TargetLoweringObjectFileELF::Initialize(Ctx, TM); + InitializeELF(TM.Options.UseInitArray); +} diff --git a/lib/Target/AArch64/AArch64TargetObjectFile.h b/lib/Target/AArch64/AArch64TargetObjectFile.h index bf0565a79ec8..f782285d1c02 100644 --- a/lib/Target/AArch64/AArch64TargetObjectFile.h +++ b/lib/Target/AArch64/AArch64TargetObjectFile.h @@ -20,8 +20,12 @@ namespace llvm { - /// AArch64LinuxTargetObjectFile - This implementation is used for linux - /// AArch64. + /// AArch64ElfTargetObjectFile - This implementation is used for ELF + /// AArch64 targets. + class AArch64ElfTargetObjectFile : public TargetLoweringObjectFileELF { + virtual void Initialize(MCContext &Ctx, const TargetMachine &TM); + }; + class AArch64LinuxTargetObjectFile : public TargetLoweringObjectFileELF { virtual void Initialize(MCContext &Ctx, const TargetMachine &TM); }; diff --git a/lib/Target/ARM/A15SDOptimizer.cpp b/lib/Target/ARM/A15SDOptimizer.cpp index ff585b41a2aa..3e805a2b68bc 100644 --- a/lib/Target/ARM/A15SDOptimizer.cpp +++ b/lib/Target/ARM/A15SDOptimizer.cpp @@ -418,7 +418,8 @@ SmallVector<unsigned, 8> A15SDOptimizer::getReadDPRs(MachineInstr *MI) { if (!MO.isReg() || !MO.isUse()) continue; if (!usesRegClass(MO, &ARM::DPRRegClass) && - !usesRegClass(MO, &ARM::QPRRegClass)) + !usesRegClass(MO, &ARM::QPRRegClass) && + !usesRegClass(MO, &ARM::DPairRegClass)) // Treat DPair as QPR continue; Defs.push_back(MO.getReg()); @@ -538,7 +539,10 @@ A15SDOptimizer::optimizeAllLanesPattern(MachineInstr *MI, unsigned Reg) { InsertPt++; unsigned Out; - if (MRI->getRegClass(Reg)->hasSuperClassEq(&ARM::QPRRegClass)) { + // DPair has the same length as QPR and also has two DPRs as subreg. + // Treat DPair as QPR. + if (MRI->getRegClass(Reg)->hasSuperClassEq(&ARM::QPRRegClass) || + MRI->getRegClass(Reg)->hasSuperClassEq(&ARM::DPairRegClass)) { unsigned DSub0 = createExtractSubreg(MBB, InsertPt, DL, Reg, ARM::dsub_0, &ARM::DPRRegClass); unsigned DSub1 = createExtractSubreg(MBB, InsertPt, DL, Reg, @@ -571,7 +575,9 @@ A15SDOptimizer::optimizeAllLanesPattern(MachineInstr *MI, unsigned Reg) { default: llvm_unreachable("Unknown preferred lane!"); } - bool UsesQPR = usesRegClass(MI->getOperand(0), &ARM::QPRRegClass); + // Treat DPair as QPR + bool UsesQPR = usesRegClass(MI->getOperand(0), &ARM::QPRRegClass) || + usesRegClass(MI->getOperand(0), &ARM::DPairRegClass); Out = createImplicitDef(MBB, InsertPt, DL); Out = createInsertSubreg(MBB, InsertPt, DL, Out, PrefLane, Reg); diff --git a/lib/Target/ARM/ARMBaseInstrInfo.cpp b/lib/Target/ARM/ARMBaseInstrInfo.cpp index f835a4e5b5fe..658af8380d8d 100644 --- a/lib/Target/ARM/ARMBaseInstrInfo.cpp +++ b/lib/Target/ARM/ARMBaseInstrInfo.cpp @@ -3684,6 +3684,7 @@ ARMBaseInstrInfo::getOperandLatency(const InstrItineraryData *ItinData, case ARM::VLD3d16Pseudo: case ARM::VLD3d32Pseudo: case ARM::VLD1d64TPseudo: + case ARM::VLD1d64TPseudoWB_fixed: case ARM::VLD3d8Pseudo_UPD: case ARM::VLD3d16Pseudo_UPD: case ARM::VLD3d32Pseudo_UPD: @@ -3700,6 +3701,7 @@ ARMBaseInstrInfo::getOperandLatency(const InstrItineraryData *ItinData, case ARM::VLD4d16Pseudo: case ARM::VLD4d32Pseudo: case ARM::VLD1d64QPseudo: + case ARM::VLD1d64QPseudoWB_fixed: case ARM::VLD4d8Pseudo_UPD: case ARM::VLD4d16Pseudo_UPD: case ARM::VLD4d32Pseudo_UPD: diff --git a/lib/Target/ARM/ARMExpandPseudoInsts.cpp b/lib/Target/ARM/ARMExpandPseudoInsts.cpp index e6f7f86c5587..3e62b649e0dc 100644 --- a/lib/Target/ARM/ARMExpandPseudoInsts.cpp +++ b/lib/Target/ARM/ARMExpandPseudoInsts.cpp @@ -136,7 +136,9 @@ static const NEONLdStTableEntry NEONLdStTable[] = { { ARM::VLD1LNq8Pseudo_UPD, ARM::VLD1LNd8_UPD, true, true, true, EvenDblSpc, 1, 8 ,true}, { ARM::VLD1d64QPseudo, ARM::VLD1d64Q, true, false, false, SingleSpc, 4, 1 ,false}, +{ ARM::VLD1d64QPseudoWB_fixed, ARM::VLD1d64Qwb_fixed, true, true, false, SingleSpc, 4, 1 ,false}, { ARM::VLD1d64TPseudo, ARM::VLD1d64T, true, false, false, SingleSpc, 3, 1 ,false}, +{ ARM::VLD1d64TPseudoWB_fixed, ARM::VLD1d64Twb_fixed, true, true, false, SingleSpc, 3, 1 ,false}, { ARM::VLD2LNd16Pseudo, ARM::VLD2LNd16, true, false, false, SingleSpc, 2, 4 ,true}, { ARM::VLD2LNd16Pseudo_UPD, ARM::VLD2LNd16_UPD, true, true, true, SingleSpc, 2, 4 ,true}, @@ -1071,6 +1073,7 @@ bool ARMExpandPseudo::ExpandMI(MachineBasicBlock &MBB, case ARM::VLD3d16Pseudo: case ARM::VLD3d32Pseudo: case ARM::VLD1d64TPseudo: + case ARM::VLD1d64TPseudoWB_fixed: case ARM::VLD3d8Pseudo_UPD: case ARM::VLD3d16Pseudo_UPD: case ARM::VLD3d32Pseudo_UPD: @@ -1087,6 +1090,7 @@ bool ARMExpandPseudo::ExpandMI(MachineBasicBlock &MBB, case ARM::VLD4d16Pseudo: case ARM::VLD4d32Pseudo: case ARM::VLD1d64QPseudo: + case ARM::VLD1d64QPseudoWB_fixed: case ARM::VLD4d8Pseudo_UPD: case ARM::VLD4d16Pseudo_UPD: case ARM::VLD4d32Pseudo_UPD: diff --git a/lib/Target/ARM/ARMISelDAGToDAG.cpp b/lib/Target/ARM/ARMISelDAGToDAG.cpp index 87d15226947a..6d9b18877f72 100644 --- a/lib/Target/ARM/ARMISelDAGToDAG.cpp +++ b/lib/Target/ARM/ARMISelDAGToDAG.cpp @@ -1673,9 +1673,61 @@ SDValue ARMDAGToDAGISel::GetVLDSTAlign(SDValue Align, unsigned NumVecs, return CurDAG->getTargetConstant(Alignment, MVT::i32); } +static bool isVLDfixed(unsigned Opc) +{ + switch (Opc) { + default: return false; + case ARM::VLD1d8wb_fixed : return true; + case ARM::VLD1d16wb_fixed : return true; + case ARM::VLD1d64Qwb_fixed : return true; + case ARM::VLD1d32wb_fixed : return true; + case ARM::VLD1d64wb_fixed : return true; + case ARM::VLD1d64TPseudoWB_fixed : return true; + case ARM::VLD1d64QPseudoWB_fixed : return true; + case ARM::VLD1q8wb_fixed : return true; + case ARM::VLD1q16wb_fixed : return true; + case ARM::VLD1q32wb_fixed : return true; + case ARM::VLD1q64wb_fixed : return true; + case ARM::VLD2d8wb_fixed : return true; + case ARM::VLD2d16wb_fixed : return true; + case ARM::VLD2d32wb_fixed : return true; + case ARM::VLD2q8PseudoWB_fixed : return true; + case ARM::VLD2q16PseudoWB_fixed : return true; + case ARM::VLD2q32PseudoWB_fixed : return true; + case ARM::VLD2DUPd8wb_fixed : return true; + case ARM::VLD2DUPd16wb_fixed : return true; + case ARM::VLD2DUPd32wb_fixed : return true; + } +} + +static bool isVSTfixed(unsigned Opc) +{ + switch (Opc) { + default: return false; + case ARM::VST1d8wb_fixed : return true; + case ARM::VST1d16wb_fixed : return true; + case ARM::VST1d32wb_fixed : return true; + case ARM::VST1d64wb_fixed : return true; + case ARM::VST1q8wb_fixed : return true; + case ARM::VST1q16wb_fixed : return true; + case ARM::VST1q32wb_fixed : return true; + case ARM::VST1q64wb_fixed : return true; + case ARM::VST1d64TPseudoWB_fixed : return true; + case ARM::VST1d64QPseudoWB_fixed : return true; + case ARM::VST2d8wb_fixed : return true; + case ARM::VST2d16wb_fixed : return true; + case ARM::VST2d32wb_fixed : return true; + case ARM::VST2q8PseudoWB_fixed : return true; + case ARM::VST2q16PseudoWB_fixed : return true; + case ARM::VST2q32PseudoWB_fixed : return true; + } +} + // Get the register stride update opcode of a VLD/VST instruction that // is otherwise equivalent to the given fixed stride updating instruction. static unsigned getVLDSTRegisterUpdateOpcode(unsigned Opc) { + assert((isVLDfixed(Opc) || isVSTfixed(Opc)) + && "Incorrect fixed stride updating instruction."); switch (Opc) { default: break; case ARM::VLD1d8wb_fixed: return ARM::VLD1d8wb_register; @@ -1686,6 +1738,10 @@ static unsigned getVLDSTRegisterUpdateOpcode(unsigned Opc) { case ARM::VLD1q16wb_fixed: return ARM::VLD1q16wb_register; case ARM::VLD1q32wb_fixed: return ARM::VLD1q32wb_register; case ARM::VLD1q64wb_fixed: return ARM::VLD1q64wb_register; + case ARM::VLD1d64Twb_fixed: return ARM::VLD1d64Twb_register; + case ARM::VLD1d64Qwb_fixed: return ARM::VLD1d64Qwb_register; + case ARM::VLD1d64TPseudoWB_fixed: return ARM::VLD1d64TPseudoWB_register; + case ARM::VLD1d64QPseudoWB_fixed: return ARM::VLD1d64QPseudoWB_register; case ARM::VST1d8wb_fixed: return ARM::VST1d8wb_register; case ARM::VST1d16wb_fixed: return ARM::VST1d16wb_register; @@ -1785,11 +1841,11 @@ SDNode *ARMDAGToDAGISel::SelectVLD(SDNode *N, bool isUpdating, unsigned NumVecs, SDValue Inc = N->getOperand(AddrOpIdx + 1); // FIXME: VLD1/VLD2 fixed increment doesn't need Reg0. Remove the reg0 // case entirely when the rest are updated to that form, too. - if ((NumVecs == 1 || NumVecs == 2) && !isa<ConstantSDNode>(Inc.getNode())) + if ((NumVecs <= 2) && !isa<ConstantSDNode>(Inc.getNode())) Opc = getVLDSTRegisterUpdateOpcode(Opc); - // We use a VLD1 for v1i64 even if the pseudo says vld2/3/4, so + // FIXME: We use a VLD1 for v1i64 even if the pseudo says vld2/3/4, so // check for that explicitly too. Horribly hacky, but temporary. - if ((NumVecs != 1 && NumVecs != 2 && Opc != ARM::VLD1q64wb_fixed) || + if ((NumVecs > 2 && !isVLDfixed(Opc)) || !isa<ConstantSDNode>(Inc.getNode())) Ops.push_back(isa<ConstantSDNode>(Inc.getNode()) ? Reg0 : Inc); } @@ -1937,11 +1993,12 @@ SDNode *ARMDAGToDAGISel::SelectVST(SDNode *N, bool isUpdating, unsigned NumVecs, // case entirely when the rest are updated to that form, too. if (NumVecs <= 2 && !isa<ConstantSDNode>(Inc.getNode())) Opc = getVLDSTRegisterUpdateOpcode(Opc); - // We use a VST1 for v1i64 even if the pseudo says vld2/3/4, so + // FIXME: We use a VST1 for v1i64 even if the pseudo says vld2/3/4, so // check for that explicitly too. Horribly hacky, but temporary. - if ((NumVecs > 2 && Opc != ARM::VST1q64wb_fixed) || - !isa<ConstantSDNode>(Inc.getNode())) - Ops.push_back(isa<ConstantSDNode>(Inc.getNode()) ? Reg0 : Inc); + if (!isa<ConstantSDNode>(Inc.getNode())) + Ops.push_back(Inc); + else if (NumVecs > 2 && !isVSTfixed(Opc)) + Ops.push_back(Reg0); } Ops.push_back(SrcReg); Ops.push_back(Pred); @@ -2834,7 +2891,7 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) { static const uint16_t DOpcodes[] = { ARM::VLD3d8Pseudo_UPD, ARM::VLD3d16Pseudo_UPD, ARM::VLD3d32Pseudo_UPD, - ARM::VLD1q64wb_fixed}; + ARM::VLD1d64TPseudoWB_fixed}; static const uint16_t QOpcodes0[] = { ARM::VLD3q8Pseudo_UPD, ARM::VLD3q16Pseudo_UPD, ARM::VLD3q32Pseudo_UPD }; @@ -2848,7 +2905,7 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) { static const uint16_t DOpcodes[] = { ARM::VLD4d8Pseudo_UPD, ARM::VLD4d16Pseudo_UPD, ARM::VLD4d32Pseudo_UPD, - ARM::VLD1q64wb_fixed}; + ARM::VLD1d64QPseudoWB_fixed}; static const uint16_t QOpcodes0[] = { ARM::VLD4q8Pseudo_UPD, ARM::VLD4q16Pseudo_UPD, ARM::VLD4q32Pseudo_UPD }; diff --git a/lib/Target/ARM/ARMInstrNEON.td b/lib/Target/ARM/ARMInstrNEON.td index 43bd4c21dc39..0b05c08ed948 100644 --- a/lib/Target/ARM/ARMInstrNEON.td +++ b/lib/Target/ARM/ARMInstrNEON.td @@ -730,6 +730,8 @@ defm VLD1d32Twb : VLD1D3WB<{1,0,0,?}, "32">; defm VLD1d64Twb : VLD1D3WB<{1,1,0,?}, "64">; def VLD1d64TPseudo : VLDQQPseudo<IIC_VLD1x3>; +def VLD1d64TPseudoWB_fixed : VLDQQWBfixedPseudo<IIC_VLD1x3>; +def VLD1d64TPseudoWB_register : VLDQQWBregisterPseudo<IIC_VLD1x3>; // ...with 4 registers class VLD1D4<bits<4> op7_4, string Dt> @@ -769,6 +771,8 @@ defm VLD1d32Qwb : VLD1D4WB<{1,0,?,?}, "32">; defm VLD1d64Qwb : VLD1D4WB<{1,1,?,?}, "64">; def VLD1d64QPseudo : VLDQQPseudo<IIC_VLD1x4>; +def VLD1d64QPseudoWB_fixed : VLDQQWBfixedPseudo<IIC_VLD1x4>; +def VLD1d64QPseudoWB_register : VLDQQWBregisterPseudo<IIC_VLD1x4>; // VLD2 : Vector Load (multiple 2-element structures) class VLD2<bits<4> op11_8, bits<4> op7_4, string Dt, RegisterOperand VdTy, @@ -1671,7 +1675,7 @@ defm VST1d32Twb : VST1D3WB<{1,0,0,?}, "32">; defm VST1d64Twb : VST1D3WB<{1,1,0,?}, "64">; def VST1d64TPseudo : VSTQQPseudo<IIC_VST1x3>; -def VST1d64TPseudoWB_fixed : VSTQQWBPseudo<IIC_VST1x3u>; +def VST1d64TPseudoWB_fixed : VSTQQWBfixedPseudo<IIC_VST1x3u>; def VST1d64TPseudoWB_register : VSTQQWBPseudo<IIC_VST1x3u>; // ...with 4 registers @@ -1714,7 +1718,7 @@ defm VST1d32Qwb : VST1D4WB<{1,0,?,?}, "32">; defm VST1d64Qwb : VST1D4WB<{1,1,?,?}, "64">; def VST1d64QPseudo : VSTQQPseudo<IIC_VST1x4>; -def VST1d64QPseudoWB_fixed : VSTQQWBPseudo<IIC_VST1x4u>; +def VST1d64QPseudoWB_fixed : VSTQQWBfixedPseudo<IIC_VST1x4u>; def VST1d64QPseudoWB_register : VSTQQWBPseudo<IIC_VST1x4u>; // VST2 : Vector Store (multiple 2-element structures) diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCMCAsmInfo.cpp b/lib/Target/PowerPC/MCTargetDesc/PPCMCAsmInfo.cpp index f3dddce30120..1d9c06406a4b 100644 --- a/lib/Target/PowerPC/MCTargetDesc/PPCMCAsmInfo.cpp +++ b/lib/Target/PowerPC/MCTargetDesc/PPCMCAsmInfo.cpp @@ -12,10 +12,14 @@ //===----------------------------------------------------------------------===// #include "PPCMCAsmInfo.h" +#include "llvm/ADT/Triple.h" + using namespace llvm; void PPCMCAsmInfoDarwin::anchor() { } +/// This version of the constructor is here to maintain ABI compatibility with +/// LLVM 3.4.0 PPCMCAsmInfoDarwin::PPCMCAsmInfoDarwin(bool is64Bit) { if (is64Bit) { PointerSize = CalleeSaveStackSlotSize = 8; @@ -32,6 +36,28 @@ PPCMCAsmInfoDarwin::PPCMCAsmInfoDarwin(bool is64Bit) { SupportsDebugInformation= true; // Debug information. } +PPCMCAsmInfoDarwin::PPCMCAsmInfoDarwin(bool is64Bit, const Triple& T) { + if (is64Bit) { + PointerSize = CalleeSaveStackSlotSize = 8; + } + IsLittleEndian = false; + + CommentString = ";"; + ExceptionsType = ExceptionHandling::DwarfCFI; + + if (!is64Bit) + Data64bitsDirective = 0; // We can't emit a 64-bit unit in PPC32 mode. + + AssemblerDialect = 1; // New-Style mnemonics. + SupportsDebugInformation= true; // Debug information. + + // old assembler lacks some directives + // FIXME: this should really be a check on the assembler characteristics + // rather than OS version + if (T.isMacOSX() && T.isMacOSXVersionLT(10, 6)) + HasWeakDefCanBeHiddenDirective = false; +} + void PPCLinuxMCAsmInfo::anchor() { } PPCLinuxMCAsmInfo::PPCLinuxMCAsmInfo(bool is64Bit) { diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCMCAsmInfo.h b/lib/Target/PowerPC/MCTargetDesc/PPCMCAsmInfo.h index 1530e774cfc7..633970ccc289 100644 --- a/lib/Target/PowerPC/MCTargetDesc/PPCMCAsmInfo.h +++ b/lib/Target/PowerPC/MCTargetDesc/PPCMCAsmInfo.h @@ -18,11 +18,15 @@ #include "llvm/MC/MCAsmInfoELF.h" namespace llvm { +class Triple; class PPCMCAsmInfoDarwin : public MCAsmInfoDarwin { virtual void anchor(); public: + /// This version of the constructor is here to maintain ABI compatibility + /// with LLVM 3.4.0. explicit PPCMCAsmInfoDarwin(bool is64Bit); + explicit PPCMCAsmInfoDarwin(bool is64Bit, const Triple&); }; class PPCLinuxMCAsmInfo : public MCAsmInfoELF { diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp b/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp index f18d095c6d02..6a5051840181 100644 --- a/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp +++ b/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp @@ -72,7 +72,7 @@ static MCAsmInfo *createPPCMCAsmInfo(const MCRegisterInfo &MRI, StringRef TT) { MCAsmInfo *MAI; if (TheTriple.isOSDarwin()) - MAI = new PPCMCAsmInfoDarwin(isPPC64); + MAI = new PPCMCAsmInfoDarwin(isPPC64, TheTriple); else MAI = new PPCLinuxMCAsmInfo(isPPC64); diff --git a/lib/Target/PowerPC/PPCAsmPrinter.cpp b/lib/Target/PowerPC/PPCAsmPrinter.cpp index ada34ed9e18a..2d92a112d5ef 100644 --- a/lib/Target/PowerPC/PPCAsmPrinter.cpp +++ b/lib/Target/PowerPC/PPCAsmPrinter.cpp @@ -701,13 +701,6 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) { return; } break; - case PPC::SYNC: - // In Book E sync is called msync, handle this special case here... - if (Subtarget.isBookE()) { - OutStreamer.EmitRawText(StringRef("\tmsync")); - return; - } - break; case PPC::LD: case PPC::STD: case PPC::LWA_32: diff --git a/lib/Target/PowerPC/PPCCTRLoops.cpp b/lib/Target/PowerPC/PPCCTRLoops.cpp index 4224ae2d273c..e419b9b40d8e 100644 --- a/lib/Target/PowerPC/PPCCTRLoops.cpp +++ b/lib/Target/PowerPC/PPCCTRLoops.cpp @@ -186,6 +186,13 @@ bool PPCCTRLoops::runOnFunction(Function &F) { return MadeChange; } +static bool isLargeIntegerTy(bool Is32Bit, Type *Ty) { + if (IntegerType *ITy = dyn_cast<IntegerType>(Ty)) + return ITy->getBitWidth() > (Is32Bit ? 32 : 64); + + return false; +} + bool PPCCTRLoops::mightUseCTR(const Triple &TT, BasicBlock *BB) { for (BasicBlock::iterator J = BB->begin(), JE = BB->end(); J != JE; ++J) { @@ -352,13 +359,11 @@ bool PPCCTRLoops::mightUseCTR(const Triple &TT, BasicBlock *BB) { CastInst *CI = cast<CastInst>(J); if (CI->getSrcTy()->getScalarType()->isPPC_FP128Ty() || CI->getDestTy()->getScalarType()->isPPC_FP128Ty() || - (TT.isArch32Bit() && - (CI->getSrcTy()->getScalarType()->isIntegerTy(64) || - CI->getDestTy()->getScalarType()->isIntegerTy(64)) - )) + isLargeIntegerTy(TT.isArch32Bit(), CI->getSrcTy()->getScalarType()) || + isLargeIntegerTy(TT.isArch32Bit(), CI->getDestTy()->getScalarType())) return true; - } else if (TT.isArch32Bit() && - J->getType()->getScalarType()->isIntegerTy(64) && + } else if (isLargeIntegerTy(TT.isArch32Bit(), + J->getType()->getScalarType()) && (J->getOpcode() == Instruction::UDiv || J->getOpcode() == Instruction::SDiv || J->getOpcode() == Instruction::URem || diff --git a/lib/Target/PowerPC/PPCFastISel.cpp b/lib/Target/PowerPC/PPCFastISel.cpp index 09117e7ded49..4e3b0b83244a 100644 --- a/lib/Target/PowerPC/PPCFastISel.cpp +++ b/lib/Target/PowerPC/PPCFastISel.cpp @@ -892,11 +892,13 @@ unsigned PPCFastISel::PPCMoveToFPReg(MVT SrcVT, unsigned SrcReg, unsigned LoadOpc = PPC::LFD; if (SrcVT == MVT::i32) { - Addr.Offset = 4; - if (!IsSigned) + if (!IsSigned) { LoadOpc = PPC::LFIWZX; - else if (PPCSubTarget.hasLFIWAX()) + Addr.Offset = 4; + } else if (PPCSubTarget.hasLFIWAX()) { LoadOpc = PPC::LFIWAX; + Addr.Offset = 4; + } } const TargetRegisterClass *RC = &PPC::F8RCRegClass; diff --git a/lib/Target/PowerPC/PPCISelDAGToDAG.cpp b/lib/Target/PowerPC/PPCISelDAGToDAG.cpp index 6ba6af6446e5..d25762a5bbca 100644 --- a/lib/Target/PowerPC/PPCISelDAGToDAG.cpp +++ b/lib/Target/PowerPC/PPCISelDAGToDAG.cpp @@ -261,11 +261,11 @@ SDNode *PPCDAGToDAGISel::getGlobalBaseReg() { DebugLoc dl; if (PPCLowering.getPointerTy() == MVT::i32) { - GlobalBaseReg = RegInfo->createVirtualRegister(&PPC::GPRCRegClass); + GlobalBaseReg = RegInfo->createVirtualRegister(&PPC::GPRC_NOR0RegClass); BuildMI(FirstMBB, MBBI, dl, TII.get(PPC::MovePCtoLR)); BuildMI(FirstMBB, MBBI, dl, TII.get(PPC::MFLR), GlobalBaseReg); } else { - GlobalBaseReg = RegInfo->createVirtualRegister(&PPC::G8RCRegClass); + GlobalBaseReg = RegInfo->createVirtualRegister(&PPC::G8RC_NOX0RegClass); BuildMI(FirstMBB, MBBI, dl, TII.get(PPC::MovePCtoLR8)); BuildMI(FirstMBB, MBBI, dl, TII.get(PPC::MFLR8), GlobalBaseReg); } diff --git a/lib/Target/PowerPC/PPCISelLowering.cpp b/lib/Target/PowerPC/PPCISelLowering.cpp index 8da5f0563c6a..25a7ca7f59a7 100644 --- a/lib/Target/PowerPC/PPCISelLowering.cpp +++ b/lib/Target/PowerPC/PPCISelLowering.cpp @@ -2333,7 +2333,7 @@ PPCTargetLowering::LowerFormalArguments_64SVR4( EVT ObjType = (ObjSize == 1 ? MVT::i8 : (ObjSize == 2 ? MVT::i16 : MVT::i32)); Store = DAG.getTruncStore(Val.getValue(1), dl, Val, FIN, - MachinePointerInfo(FuncArg, CurArgOffset), + MachinePointerInfo(FuncArg), ObjType, false, false, 0); } else { // For sizes that don't fit a truncating store (3, 5, 6, 7), @@ -2345,7 +2345,7 @@ PPCTargetLowering::LowerFormalArguments_64SVR4( int FI = MFI->CreateFixedObject(PtrByteSize, ArgOffset, true); SDValue FIN = DAG.getFrameIndex(FI, PtrVT); Store = DAG.getStore(Val.getValue(1), dl, Val, FIN, - MachinePointerInfo(FuncArg, ArgOffset), + MachinePointerInfo(FuncArg), false, false, 0); } @@ -2369,7 +2369,7 @@ PPCTargetLowering::LowerFormalArguments_64SVR4( SDValue FIN = DAG.getFrameIndex(FI, PtrVT); SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT); SDValue Store = DAG.getStore(Val.getValue(1), dl, Val, FIN, - MachinePointerInfo(FuncArg, ArgOffset), + MachinePointerInfo(FuncArg, j), false, false, 0); MemOps.push_back(Store); ++GPR_idx; @@ -2665,8 +2665,7 @@ PPCTargetLowering::LowerFormalArguments_Darwin( SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT); EVT ObjType = ObjSize == 1 ? MVT::i8 : MVT::i16; SDValue Store = DAG.getTruncStore(Val.getValue(1), dl, Val, FIN, - MachinePointerInfo(FuncArg, - CurArgOffset), + MachinePointerInfo(FuncArg), ObjType, false, false, 0); MemOps.push_back(Store); ++GPR_idx; @@ -2690,7 +2689,7 @@ PPCTargetLowering::LowerFormalArguments_Darwin( SDValue FIN = DAG.getFrameIndex(FI, PtrVT); SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT); SDValue Store = DAG.getStore(Val.getValue(1), dl, Val, FIN, - MachinePointerInfo(FuncArg, ArgOffset), + MachinePointerInfo(FuncArg, j), false, false, 0); MemOps.push_back(Store); ++GPR_idx; diff --git a/lib/Target/PowerPC/PPCInstrInfo.cpp b/lib/Target/PowerPC/PPCInstrInfo.cpp index 315ad04ebe3e..80bc27a95765 100644 --- a/lib/Target/PowerPC/PPCInstrInfo.cpp +++ b/lib/Target/PowerPC/PPCInstrInfo.cpp @@ -570,12 +570,14 @@ PPCInstrInfo::StoreRegToStackSlot(MachineFunction &MF, // update isStoreToStackSlot. DebugLoc DL; - if (PPC::GPRCRegClass.hasSubClassEq(RC)) { + if (PPC::GPRCRegClass.hasSubClassEq(RC) || + PPC::GPRC_NOR0RegClass.hasSubClassEq(RC)) { NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::STW)) .addReg(SrcReg, getKillRegState(isKill)), FrameIdx)); - } else if (PPC::G8RCRegClass.hasSubClassEq(RC)) { + } else if (PPC::G8RCRegClass.hasSubClassEq(RC) || + PPC::G8RC_NOX0RegClass.hasSubClassEq(RC)) { NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::STD)) .addReg(SrcReg, getKillRegState(isKill)), @@ -695,10 +697,12 @@ PPCInstrInfo::LoadRegFromStackSlot(MachineFunction &MF, DebugLoc DL, // Note: If additional load instructions are added here, // update isLoadFromStackSlot. - if (PPC::GPRCRegClass.hasSubClassEq(RC)) { + if (PPC::GPRCRegClass.hasSubClassEq(RC) || + PPC::GPRC_NOR0RegClass.hasSubClassEq(RC)) { NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::LWZ), DestReg), FrameIdx)); - } else if (PPC::G8RCRegClass.hasSubClassEq(RC)) { + } else if (PPC::G8RCRegClass.hasSubClassEq(RC) || + PPC::G8RC_NOX0RegClass.hasSubClassEq(RC)) { NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::LD), DestReg), FrameIdx)); } else if (PPC::F8RCRegClass.hasSubClassEq(RC)) { diff --git a/lib/Target/PowerPC/PPCInstrInfo.td b/lib/Target/PowerPC/PPCInstrInfo.td index 2bd3aadc798d..fc29c69642bf 100644 --- a/lib/Target/PowerPC/PPCInstrInfo.td +++ b/lib/Target/PowerPC/PPCInstrInfo.td @@ -580,6 +580,7 @@ def iaddroff : ComplexPattern<iPTR, 1, "SelectAddrImmOffs", [], []>; def In32BitMode : Predicate<"!PPCSubTarget.isPPC64()">; def In64BitMode : Predicate<"PPCSubTarget.isPPC64()">; def IsBookE : Predicate<"PPCSubTarget.isBookE()">; +def IsNotBookE : Predicate<"!PPCSubTarget.isBookE()">; //===----------------------------------------------------------------------===// // PowerPC Multiclass Definitions. @@ -1541,8 +1542,17 @@ def STMW : DForm_1<47, (outs), (ins gprc:$rS, memri:$dst), "stmw $rS, $dst", LdStLMW, []>; def SYNC : XForm_24_sync<31, 598, (outs), (ins i32imm:$L), - "sync $L", LdStSync, []>; -def : Pat<(int_ppc_sync), (SYNC 0)>; + "sync $L", LdStSync, []>, Requires<[IsNotBookE]>; + +let isCodeGenOnly = 1 in { + def MSYNC : XForm_24_sync<31, 598, (outs), (ins), + "msync", LdStSync, []>, Requires<[IsBookE]> { + let L = 0; + } +} + +def : Pat<(int_ppc_sync), (SYNC 0)>, Requires<[IsNotBookE]>; +def : Pat<(int_ppc_sync), (MSYNC)>, Requires<[IsBookE]>; //===----------------------------------------------------------------------===// // PPC32 Arithmetic Instructions. @@ -2284,7 +2294,8 @@ def : Pat<(f64 (extloadf32 xaddr:$src)), def : Pat<(f64 (fextend f32:$src)), (COPY_TO_REGCLASS $src, F8RC)>; -def : Pat<(atomic_fence (imm), (imm)), (SYNC 0)>; +def : Pat<(atomic_fence (imm), (imm)), (SYNC 0)>, Requires<[IsNotBookE]>; +def : Pat<(atomic_fence (imm), (imm)), (MSYNC)>, Requires<[IsBookE]>; // Additional FNMSUB patterns: -a*c + b == -(a*c - b) def : Pat<(fma (fneg f64:$A), f64:$C, f64:$B), @@ -2373,10 +2384,10 @@ class PPCAsmPseudo<string asm, dag iops> def : InstAlias<"sc", (SC 0)>; -def : InstAlias<"sync", (SYNC 0)>; -def : InstAlias<"msync", (SYNC 0)>; -def : InstAlias<"lwsync", (SYNC 1)>; -def : InstAlias<"ptesync", (SYNC 2)>; +def : InstAlias<"sync", (SYNC 0)>, Requires<[IsNotBookE]>; +def : InstAlias<"msync", (SYNC 0)>, Requires<[IsNotBookE]>; +def : InstAlias<"lwsync", (SYNC 1)>, Requires<[IsNotBookE]>; +def : InstAlias<"ptesync", (SYNC 2)>, Requires<[IsNotBookE]>; def : InstAlias<"wait", (WAIT 0)>; def : InstAlias<"waitrsv", (WAIT 1)>; diff --git a/lib/Target/PowerPC/PPCRegisterInfo.td b/lib/Target/PowerPC/PPCRegisterInfo.td index d566e2c3e52d..43663ce013e9 100644 --- a/lib/Target/PowerPC/PPCRegisterInfo.td +++ b/lib/Target/PowerPC/PPCRegisterInfo.td @@ -144,6 +144,13 @@ def CR6 : CR<6, "cr6", [CR6LT, CR6GT, CR6EQ, CR6UN]>, DwarfRegNum<[74, 74]>; def CR7 : CR<7, "cr7", [CR7LT, CR7GT, CR7EQ, CR7UN]>, DwarfRegNum<[75, 75]>; } +// The full condition-code register. This is not modeled fully, but defined +// here primarily, for compatibility with gcc, to allow the inline asm "cc" +// clobber specification to work. +def CC : PPCReg<"cc">, DwarfRegAlias<CR0> { + let Aliases = [CR0, CR1, CR2, CR3, CR4, CR5, CR6, CR7]; +} + // Link register def LR : SPR<8, "lr">, DwarfRegNum<[-2, 65]>; //let Aliases = [LR] in @@ -234,3 +241,8 @@ def VRSAVERC : RegisterClass<"PPC", [i32], 32, (add VRSAVE)>; def CARRYRC : RegisterClass<"PPC", [i32], 32, (add CARRY)> { let CopyCost = -1; } + +def CCRC : RegisterClass<"PPC", [i32], 32, (add CC)> { + let isAllocatable = 0; +} + diff --git a/lib/Target/PowerPC/PPCSubtarget.h b/lib/Target/PowerPC/PPCSubtarget.h index c863a6ecc777..ec8c82ad521c 100644 --- a/lib/Target/PowerPC/PPCSubtarget.h +++ b/lib/Target/PowerPC/PPCSubtarget.h @@ -126,22 +126,6 @@ public: /// selection. const InstrItineraryData &getInstrItineraryData() const { return InstrItins; } - /// getDataLayoutString - Return the pointer size and type alignment - /// properties of this subtarget. - const char *getDataLayoutString() const { - // Note, the alignment values for f64 and i64 on ppc64 in Darwin - // documentation are wrong; these are correct (i.e. "what gcc does"). - if (isPPC64() && isSVR4ABI()) { - if (TargetTriple.getOS() == llvm::Triple::FreeBSD) - return "E-p:64:64-f64:64:64-i64:64:64-v128:128:128-n32:64"; - else - return "E-p:64:64-f64:64:64-i64:64:64-f128:128:128-v128:128:128-n32:64"; - } - - return isPPC64() ? "E-p:64:64-f64:64:64-i64:64:64-f128:64:128-n32:64" - : "E-p:32:32-f64:64:64-i64:64:64-f128:64:128-n32"; - } - /// \brief Reset the features for the PowerPC target. virtual void resetSubtargetFeatures(const MachineFunction *MF); private: diff --git a/lib/Target/PowerPC/PPCTargetMachine.cpp b/lib/Target/PowerPC/PPCTargetMachine.cpp index 9acefe53ce4a..d6767d51f2cc 100644 --- a/lib/Target/PowerPC/PPCTargetMachine.cpp +++ b/lib/Target/PowerPC/PPCTargetMachine.cpp @@ -33,6 +33,43 @@ extern "C" void LLVMInitializePowerPCTarget() { RegisterTargetMachine<PPC64TargetMachine> C(ThePPC64LETarget); } +/// Return the datalayout string of a subtarget. +static std::string getDataLayoutString(const PPCSubtarget &ST) { + const Triple &T = ST.getTargetTriple(); + + // PPC is big endian + std::string Ret = "E"; + + // PPC64 has 64 bit pointers, PPC32 has 32 bit pointers. + if (ST.isPPC64()) + Ret += "-p:64:64"; + else + Ret += "-p:32:32"; + + // Note, the alignment values for f64 and i64 on ppc64 in Darwin + // documentation are wrong; these are correct (i.e. "what gcc does"). + if (ST.isPPC64() || ST.isSVR4ABI()) + Ret += "-f64:64:64-i64:64:64"; + else + Ret += "-f64:32:64"; + + // Set support for 128 floats depending on the ABI. + if (!ST.isPPC64() && ST.isSVR4ABI()) + Ret += "-f128:64:128"; + + // Some ABIs support 128 bit vectors. + if (ST.isPPC64() && ST.isSVR4ABI()) + Ret += "-v128:128:128"; + + // PPC64 has 32 and 64 bit register, PPC32 has only 32 bit ones. + if (ST.isPPC64()) + Ret += "-n32:64"; + else + Ret += "-n32"; + + return Ret; +} + PPCTargetMachine::PPCTargetMachine(const Target &T, StringRef TT, StringRef CPU, StringRef FS, const TargetOptions &Options, @@ -41,7 +78,7 @@ PPCTargetMachine::PPCTargetMachine(const Target &T, StringRef TT, bool is64Bit) : LLVMTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL), Subtarget(TT, CPU, FS, is64Bit), - DL(Subtarget.getDataLayoutString()), InstrInfo(*this), + DL(getDataLayoutString(Subtarget)), InstrInfo(*this), FrameLowering(Subtarget), JITInfo(*this, is64Bit), TLInfo(*this), TSInfo(*this), InstrItins(Subtarget.getInstrItineraryData()) { diff --git a/lib/Target/R600/AMDGPUISelLowering.cpp b/lib/Target/R600/AMDGPUISelLowering.cpp index c4d75ffa0d06..1029f306d632 100644 --- a/lib/Target/R600/AMDGPUISelLowering.cpp +++ b/lib/Target/R600/AMDGPUISelLowering.cpp @@ -133,6 +133,8 @@ AMDGPUTargetLowering::AMDGPUTargetLowering(TargetMachine &TM) : setLoadExtAction(ISD::SEXTLOAD, MVT::v4i16, Expand); setLoadExtAction(ISD::ZEXTLOAD, MVT::v4i16, Expand); + setOperationAction(ISD::BR_CC, MVT::i1, Expand); + setOperationAction(ISD::FNEG, MVT::v2f32, Expand); setOperationAction(ISD::FNEG, MVT::v4f32, Expand); diff --git a/lib/Target/R600/AMDGPUInstructions.td b/lib/Target/R600/AMDGPUInstructions.td index 3c5375d84ecf..7acd67313eea 100644 --- a/lib/Target/R600/AMDGPUInstructions.td +++ b/lib/Target/R600/AMDGPUInstructions.td @@ -388,6 +388,11 @@ class SHA256MaPattern <Instruction BFI_INT, Instruction XOR> : Pat < // Bitfield extract patterns +/* + +XXX: The BFE pattern is not working correctly because the XForm is not being +applied. + def legalshift32 : ImmLeaf <i32, [{return Imm >=0 && Imm < 32;}]>; def bfemask : PatLeaf <(imm), [{return isMask_32(N->getZExtValue());}], SDNodeXForm<imm, [{ return CurDAG->getTargetConstant(CountTrailingOnes_32(N->getZExtValue()), MVT::i32);}]>>; @@ -397,6 +402,8 @@ class BFEPattern <Instruction BFE> : Pat < (BFE $x, $y, $z) >; +*/ + // rotr pattern class ROTRPattern <Instruction BIT_ALIGN> : Pat < (rotr i32:$src0, i32:$src1), diff --git a/lib/Target/R600/MCTargetDesc/AMDGPUMCAsmInfo.cpp b/lib/Target/R600/MCTargetDesc/AMDGPUMCAsmInfo.cpp index 4a8e1b0b2d86..9b26af7bc736 100644 --- a/lib/Target/R600/MCTargetDesc/AMDGPUMCAsmInfo.cpp +++ b/lib/Target/R600/MCTargetDesc/AMDGPUMCAsmInfo.cpp @@ -13,7 +13,6 @@ using namespace llvm; AMDGPUMCAsmInfo::AMDGPUMCAsmInfo(StringRef &TT) : MCAsmInfo() { HasSingleParameterDotFile = false; - WeakDefDirective = 0; //===------------------------------------------------------------------===// HasSubsectionsViaSymbols = true; HasMachoZeroFillDirective = false; @@ -58,7 +57,6 @@ AMDGPUMCAsmInfo::AMDGPUMCAsmInfo(StringRef &TT) : MCAsmInfo() { HasDotTypeDotSizeDirective = false; HasNoDeadStrip = true; WeakRefDirective = ".weakref\t"; - LinkOnceDirective = 0; //===--- Dwarf Emission Directives -----------------------------------===// HasLEB128 = true; SupportsDebugInformation = true; diff --git a/lib/Target/R600/R600ControlFlowFinalizer.cpp b/lib/Target/R600/R600ControlFlowFinalizer.cpp index ac3d8f63d57f..2a8276b2214f 100644 --- a/lib/Target/R600/R600ControlFlowFinalizer.cpp +++ b/lib/Target/R600/R600ControlFlowFinalizer.cpp @@ -356,6 +356,7 @@ public: DEBUG(dbgs() << CfCount << ":"; I->dump();); FetchClauses.push_back(MakeFetchClause(MBB, I)); CfCount++; + LastAlu.back() = 0; continue; } diff --git a/lib/Target/R600/R600InstrInfo.cpp b/lib/Target/R600/R600InstrInfo.cpp index c0827fc1ca40..2eca6cf43271 100644 --- a/lib/Target/R600/R600InstrInfo.cpp +++ b/lib/Target/R600/R600InstrInfo.cpp @@ -716,7 +716,13 @@ R600InstrInfo::AnalyzeBranch(MachineBasicBlock &MBB, return false; } - // Get the last instruction in the block. + // Remove successive JUMP + while (I != MBB.begin() && llvm::prior(I)->getOpcode() == AMDGPU::JUMP) { + MachineBasicBlock::iterator PriorI = llvm::prior(I); + if (AllowModify) + I->removeFromParent(); + I = PriorI; + } MachineInstr *LastInst = I; // If there is only one terminator instruction, process it. diff --git a/lib/Target/R600/R600Instructions.td b/lib/Target/R600/R600Instructions.td index 0346e24ab771..74c65daa065c 100644 --- a/lib/Target/R600/R600Instructions.td +++ b/lib/Target/R600/R600Instructions.td @@ -1516,7 +1516,9 @@ let Predicates = [isEGorCayman] in { i32:$src2))], VecALU >; - def : BFEPattern <BFE_UINT_eg>; +// XXX: This pattern is broken, disabling for now. See comment in +// AMDGPUInstructions.td for more info. +// def : BFEPattern <BFE_UINT_eg>; def BFI_INT_eg : R600_3OP <0x06, "BFI_INT", [], VecALU>; defm : BFIPatterns <BFI_INT_eg>; @@ -1636,7 +1638,6 @@ class R600_LDS_1A <bits<6> lds_op, string name, list<dag> pattern> : R600_LDS < let src2 = 0; let src2_rel = 0; - let Defs = [OQAP]; let usesCustomInserter = 1; let LDS_1A = 1; let DisableEncoding = "$dst"; @@ -1672,7 +1673,6 @@ class R600_LDS_1A1D_RET <bits<6> lds_op, string name, list<dag> pattern> : let BaseOp = name; let usesCustomInserter = 1; let DisableEncoding = "$dst"; - let Defs = [OQAP]; } class R600_LDS_1A2D <bits<6> lds_op, string name, list<dag> pattern> : diff --git a/lib/Target/R600/SIFixSGPRCopies.cpp b/lib/Target/R600/SIFixSGPRCopies.cpp index 3370c7955bc7..f0065ea13c5b 100644 --- a/lib/Target/R600/SIFixSGPRCopies.cpp +++ b/lib/Target/R600/SIFixSGPRCopies.cpp @@ -187,7 +187,7 @@ bool SIFixSGPRCopies::isVGPRToSGPRCopy(const MachineInstr &Copy, DstRC == &AMDGPU::M0RegRegClass) return false; - SrcRC = inferRegClassFromDef(TRI, MRI, SrcReg, SrcSubReg); + SrcRC = TRI->getSubRegClass(MRI.getRegClass(SrcReg), SrcSubReg); return TRI->isSGPRClass(DstRC) && TRI->hasVGPRs(SrcRC); } diff --git a/lib/Target/R600/SIInsertWaits.cpp b/lib/Target/R600/SIInsertWaits.cpp index 7ef662eb65b1..695ec407fdbe 100644 --- a/lib/Target/R600/SIInsertWaits.cpp +++ b/lib/Target/R600/SIInsertWaits.cpp @@ -314,6 +314,12 @@ Counters SIInsertWaits::handleOperands(MachineInstr &MI) { Counters Result = ZeroCounts; + // S_SENDMSG implicitly waits for all outstanding LGKM transfers to finish, + // but we also want to wait for any other outstanding transfers before + // signalling other hardware blocks + if (MI.getOpcode() == AMDGPU::S_SENDMSG) + return LastIssued; + // For each register affected by this // instruction increase the result sequence for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) { diff --git a/lib/Target/R600/SIInstrInfo.td b/lib/Target/R600/SIInstrInfo.td index 4cd0daa55c5f..b7879c6eface 100644 --- a/lib/Target/R600/SIInstrInfo.td +++ b/lib/Target/R600/SIInstrInfo.td @@ -290,10 +290,10 @@ multiclass VOP2_64 <bits<6> op, string opName, list<dag> pattern, : VOP2_Helper <op, VReg_64, VSrc_64, opName, pattern, revOp>; multiclass VOP2b_32 <bits<6> op, string opName, list<dag> pattern, - string revOp = opName> { + RegisterClass src0_rc, string revOp = opName> { def _e32 : VOP2 < - op, (outs VReg_32:$dst), (ins VSrc_32:$src0, VReg_32:$src1), + op, (outs VReg_32:$dst), (ins src0_rc:$src0, VReg_32:$src1), opName#"_e32 $dst, $src0, $src1", pattern >, VOP <opName>, VOP2_REV<revOp#"_e32", !eq(revOp, opName)>; @@ -425,26 +425,48 @@ class MTBUF_Store_Helper <bits<3> op, string asm, RegisterClass regClass> : MTBU multiclass MUBUF_Load_Helper <bits<7> op, string asm, RegisterClass regClass> { - let glc = 0, lds = 0, slc = 0, tfe = 0, soffset = 128 /* ZERO */, - mayLoad = 1 in { - - let offen = 1, idxen = 0, addr64 = 0, offset = 0 in { - def _OFFEN : MUBUF <op, (outs regClass:$vdata), - (ins SReg_128:$srsrc, VReg_32:$vaddr), - asm#" $vdata, $srsrc + $vaddr", []>; - } - - let offen = 0, idxen = 1, addr64 = 0 in { - def _IDXEN : MUBUF <op, (outs regClass:$vdata), - (ins SReg_128:$srsrc, VReg_32:$vaddr, i16imm:$offset), - asm#" $vdata, $srsrc[$vaddr] + $offset", []>; - } + let lds = 0, mayLoad = 1 in { + + let addr64 = 0 in { + + let offen = 0, idxen = 0 in { + def _OFFSET : MUBUF <op, (outs regClass:$vdata), + (ins SReg_128:$srsrc, VReg_32:$vaddr, + i16imm:$offset, SSrc_32:$soffset, i1imm:$glc, + i1imm:$slc, i1imm:$tfe), + asm#" $vdata, $srsrc + $offset + $soffset, glc=$glc, slc=$slc, tfe=$tfe", []>; + } + + let offen = 1, idxen = 0, offset = 0 in { + def _OFFEN : MUBUF <op, (outs regClass:$vdata), + (ins SReg_128:$srsrc, VReg_32:$vaddr, + SSrc_32:$soffset, i1imm:$glc, i1imm:$slc, + i1imm:$tfe), + asm#" $vdata, $srsrc + $vaddr + $soffset, glc=$glc, slc=$slc, tfe=$tfe", []>; + } + + let offen = 0, idxen = 1 in { + def _IDXEN : MUBUF <op, (outs regClass:$vdata), + (ins SReg_128:$srsrc, VReg_32:$vaddr, + i16imm:$offset, SSrc_32:$soffset, i1imm:$glc, + i1imm:$slc, i1imm:$tfe), + asm#" $vdata, $srsrc[$vaddr] + $offset + $soffset, glc=$glc, slc=$slc, tfe=$tfe", []>; + } + + let offen = 1, idxen = 1 in { + def _BOTHEN : MUBUF <op, (outs regClass:$vdata), + (ins SReg_128:$srsrc, VReg_64:$vaddr, + SSrc_32:$soffset, i1imm:$glc, + i1imm:$slc, i1imm:$tfe), + asm#" $vdata, $srsrc[$vaddr[0]] + $vaddr[1] + $soffset, glc=$glc, slc=$slc, tfe=$tfe", []>; + } + } - let offen = 0, idxen = 0, addr64 = 1 in { - def _ADDR64 : MUBUF <op, (outs regClass:$vdata), - (ins SReg_128:$srsrc, VReg_64:$vaddr, i16imm:$offset), - asm#" $vdata, $srsrc + $vaddr + $offset", []>; - } + let offen = 0, idxen = 0, addr64 = 1, glc = 0, slc = 0, tfe = 0, soffset = 128 /* ZERO */ in { + def _ADDR64 : MUBUF <op, (outs regClass:$vdata), + (ins SReg_128:$srsrc, VReg_64:$vaddr, i16imm:$offset), + asm#" $vdata, $srsrc + $vaddr + $offset", []>; + } } } diff --git a/lib/Target/R600/SIInstructions.td b/lib/Target/R600/SIInstructions.td index 76f05eb49655..2ca6a95978b3 100644 --- a/lib/Target/R600/SIInstructions.td +++ b/lib/Target/R600/SIInstructions.td @@ -22,6 +22,8 @@ def InterpSlot : Operand<i32> { let PrintMethod = "printInterpSlot"; } +def SendMsgImm : Operand<i32>; + def isSI : Predicate<"Subtarget.getGeneration() " ">= AMDGPUSubtarget::SOUTHERN_ISLANDS">; @@ -826,17 +828,25 @@ def S_BARRIER : SOPP <0x0000000a, (ins), "S_BARRIER", def S_WAITCNT : SOPP <0x0000000c, (ins WAIT_FLAG:$simm16), "S_WAITCNT $simm16", [] >; -} // End hasSideEffects //def S_SETHALT : SOPP_ <0x0000000d, "S_SETHALT", []>; //def S_SLEEP : SOPP_ <0x0000000e, "S_SLEEP", []>; //def S_SETPRIO : SOPP_ <0x0000000f, "S_SETPRIO", []>; -//def S_SENDMSG : SOPP_ <0x00000010, "S_SENDMSG", []>; + +let Uses = [EXEC] in { + def S_SENDMSG : SOPP <0x00000010, (ins SendMsgImm:$simm16, M0Reg:$m0), "S_SENDMSG $simm16", + [(int_SI_sendmsg imm:$simm16, M0Reg:$m0)] + > { + let DisableEncoding = "$m0"; + } +} // End Uses = [EXEC] + //def S_SENDMSGHALT : SOPP_ <0x00000011, "S_SENDMSGHALT", []>; //def S_TRAP : SOPP_ <0x00000012, "S_TRAP", []>; //def S_ICACHE_INV : SOPP_ <0x00000013, "S_ICACHE_INV", []>; //def S_INCPERFLEVEL : SOPP_ <0x00000014, "S_INCPERFLEVEL", []>; //def S_DECPERFLEVEL : SOPP_ <0x00000015, "S_DECPERFLEVEL", []>; //def S_TTRACEDATA : SOPP_ <0x00000016, "S_TTRACEDATA", []>; +} // End hasSideEffects def V_CNDMASK_B32_e32 : VOP2 <0x00000000, (outs VReg_32:$dst), (ins VSrc_32:$src0, VReg_32:$src1, VCCReg:$vcc), @@ -979,14 +989,16 @@ defm V_MBCNT_HI_U32_B32 : VOP2_32 <0x00000024, "V_MBCNT_HI_U32_B32", []>; let isCommutable = 1, Defs = [VCC] in { // Carry-out goes to VCC // No patterns so that the scalar instructions are always selected. // The scalar versions will be replaced with vector when needed later. -defm V_ADD_I32 : VOP2b_32 <0x00000025, "V_ADD_I32", []>; -defm V_SUB_I32 : VOP2b_32 <0x00000026, "V_SUB_I32", []>; -defm V_SUBREV_I32 : VOP2b_32 <0x00000027, "V_SUBREV_I32", [], "V_SUB_I32">; +defm V_ADD_I32 : VOP2b_32 <0x00000025, "V_ADD_I32", [], VSrc_32>; +defm V_SUB_I32 : VOP2b_32 <0x00000026, "V_SUB_I32", [], VSrc_32>; +defm V_SUBREV_I32 : VOP2b_32 <0x00000027, "V_SUBREV_I32", [], VSrc_32, + "V_SUB_I32">; let Uses = [VCC] in { // Carry-in comes from VCC -defm V_ADDC_U32 : VOP2b_32 <0x00000028, "V_ADDC_U32", []>; -defm V_SUBB_U32 : VOP2b_32 <0x00000029, "V_SUBB_U32", []>; -defm V_SUBBREV_U32 : VOP2b_32 <0x0000002a, "V_SUBBREV_U32", [], "V_SUBB_U32">; +defm V_ADDC_U32 : VOP2b_32 <0x00000028, "V_ADDC_U32", [], VReg_32>; +defm V_SUBB_U32 : VOP2b_32 <0x00000029, "V_SUBB_U32", [], VReg_32>; +defm V_SUBBREV_U32 : VOP2b_32 <0x0000002a, "V_SUBBREV_U32", [], VReg_32, + "V_SUBB_U32">; } // End Uses = [VCC] } // End isCommutable = 1, Defs = [VCC] @@ -1403,7 +1415,7 @@ def : Pat < /* int_SI_vs_load_input */ def : Pat< (SIload_input i128:$tlst, IMM12bit:$attr_offset, i32:$buf_idx_vgpr), - (BUFFER_LOAD_FORMAT_XYZW_IDXEN $tlst, $buf_idx_vgpr, imm:$attr_offset) + (BUFFER_LOAD_FORMAT_XYZW_IDXEN $tlst, $buf_idx_vgpr, imm:$attr_offset, 0, 0, 0, 0) >; /* int_SI_export */ @@ -1658,16 +1670,30 @@ def : Pat < 0 /* ABS */, 1 /* CLAMP */, 0 /* OMOD */, 0 /* NEG */) >; +/********** ================================ **********/ +/********** Floating point absolute/negative **********/ +/********** ================================ **********/ + +// Manipulate the sign bit directly, as e.g. using the source negation modifier +// in V_ADD_F32_e64 $src, 0, [...] does not result in -0.0 for $src == +0.0, +// breaking the piglit *s-floatBitsToInt-neg* tests + +// TODO: Look into not implementing isFNegFree/isFAbsFree for SI, and possibly +// removing these patterns + +def : Pat < + (fneg (fabs f32:$src)), + (V_OR_B32_e32 $src, (V_MOV_B32_e32 0x80000000)) /* Set sign bit */ +>; + def : Pat < (fabs f32:$src), - (V_ADD_F32_e64 $src, (i32 0 /* SRC1 */), - 1 /* ABS */, 0 /* CLAMP */, 0 /* OMOD */, 0 /* NEG */) + (V_AND_B32_e32 $src, (V_MOV_B32_e32 0x7fffffff)) /* Clear sign bit */ >; def : Pat < (fneg f32:$src), - (V_ADD_F32_e64 $src, (i32 0 /* SRC1 */), - 0 /* ABS */, 0 /* CLAMP */, 0 /* OMOD */, 1 /* NEG */) + (V_XOR_B32_e32 $src, (V_MOV_B32_e32 0x80000000)) /* Toggle sign bit */ >; /********** ================== **********/ @@ -1794,6 +1820,11 @@ def : Pat < (V_CNDMASK_B32_e64 (i32 0), (i32 -1), $src0) >; +def : Pat < + (i32 (zext i1:$src0)), + (V_CNDMASK_B32_e64 (i32 0), (i32 1), $src0) +>; + // 1. Offset as 8bit DWORD immediate def : Pat < (SIload_constant i128:$sbase, IMM8bitDWORD:$offset), @@ -1809,7 +1840,7 @@ def : Pat < // 3. Offset in an 32Bit VGPR def : Pat < (SIload_constant i128:$sbase, i32:$voff), - (BUFFER_LOAD_DWORD_OFFEN $sbase, $voff) + (BUFFER_LOAD_DWORD_OFFEN $sbase, $voff, 0, 0, 0, 0) >; // The multiplication scales from [0,1] to the unsigned integer range @@ -1970,6 +2001,50 @@ defm : MUBUFStore_Pattern <BUFFER_STORE_DWORDX2, i64, global_store>; defm : MUBUFStore_Pattern <BUFFER_STORE_DWORDX2, v2i32, global_store>; defm : MUBUFStore_Pattern <BUFFER_STORE_DWORDX4, v4i32, global_store>; +// BUFFER_LOAD_DWORD*, addr64=0 +multiclass MUBUF_Load_Dword <ValueType vt, MUBUF offset, MUBUF offen, MUBUF idxen, + MUBUF bothen> { + + def : Pat < + (vt (int_SI_buffer_load_dword i128:$rsrc, i32:$vaddr, i32:$soffset, + imm:$offset, 0, 0, imm:$glc, imm:$slc, + imm:$tfe)), + (offset $rsrc, $vaddr, (as_i16imm $offset), $soffset, (as_i1imm $glc), + (as_i1imm $slc), (as_i1imm $tfe)) + >; + + def : Pat < + (vt (int_SI_buffer_load_dword i128:$rsrc, i32:$vaddr, i32:$soffset, + imm, 1, 0, imm:$glc, imm:$slc, + imm:$tfe)), + (offen $rsrc, $vaddr, $soffset, (as_i1imm $glc), (as_i1imm $slc), + (as_i1imm $tfe)) + >; + + def : Pat < + (vt (int_SI_buffer_load_dword i128:$rsrc, i32:$vaddr, i32:$soffset, + imm:$offset, 0, 1, imm:$glc, imm:$slc, + imm:$tfe)), + (idxen $rsrc, $vaddr, (as_i16imm $offset), $soffset, (as_i1imm $glc), + (as_i1imm $slc), (as_i1imm $tfe)) + >; + + def : Pat < + (vt (int_SI_buffer_load_dword i128:$rsrc, v2i32:$vaddr, i32:$soffset, + imm, 1, 1, imm:$glc, imm:$slc, + imm:$tfe)), + (bothen $rsrc, $vaddr, $soffset, (as_i1imm $glc), (as_i1imm $slc), + (as_i1imm $tfe)) + >; +} + +defm : MUBUF_Load_Dword <i32, BUFFER_LOAD_DWORD_OFFSET, BUFFER_LOAD_DWORD_OFFEN, + BUFFER_LOAD_DWORD_IDXEN, BUFFER_LOAD_DWORD_BOTHEN>; +defm : MUBUF_Load_Dword <v2i32, BUFFER_LOAD_DWORDX2_OFFSET, BUFFER_LOAD_DWORDX2_OFFEN, + BUFFER_LOAD_DWORDX2_IDXEN, BUFFER_LOAD_DWORDX2_BOTHEN>; +defm : MUBUF_Load_Dword <v4i32, BUFFER_LOAD_DWORDX4_OFFSET, BUFFER_LOAD_DWORDX4_OFFEN, + BUFFER_LOAD_DWORDX4_IDXEN, BUFFER_LOAD_DWORDX4_BOTHEN>; + //===----------------------------------------------------------------------===// // MTBUF Patterns //===----------------------------------------------------------------------===// @@ -2057,6 +2132,11 @@ def : Pat < (EXTRACT_SUBREG $a, sub0) >; +def : Pat < + (i1 (trunc i32:$a)), + (V_CMP_EQ_I32_e64 (V_AND_B32_e32 (i32 1), $a), 1) +>; + // V_ADD_I32_e32/S_ADD_I32 produces carry in VCC/SCC. For the vector // case, the sgpr-copies pass will fix this to use the vector version. def : Pat < diff --git a/lib/Target/R600/SIIntrinsics.td b/lib/Target/R600/SIIntrinsics.td index 7fcc96452114..00e32c03a99e 100644 --- a/lib/Target/R600/SIIntrinsics.td +++ b/lib/Target/R600/SIIntrinsics.td @@ -38,6 +38,22 @@ let TargetPrefix = "SI", isTarget = 1 in { llvm_i32_ty], // tfe(imm) []>; + // Fully-flexible BUFFER_LOAD_DWORD_* except for the ADDR64 bit, which is not exposed + def int_SI_buffer_load_dword : Intrinsic < + [llvm_anyint_ty], // vdata(VGPR), overloaded for types i32, v2i32, v4i32 + [llvm_anyint_ty, // rsrc(SGPR) + llvm_anyint_ty, // vaddr(VGPR) + llvm_i32_ty, // soffset(SGPR) + llvm_i32_ty, // inst_offset(imm) + llvm_i32_ty, // offen(imm) + llvm_i32_ty, // idxen(imm) + llvm_i32_ty, // glc(imm) + llvm_i32_ty, // slc(imm) + llvm_i32_ty], // tfe(imm) + [IntrReadArgMem]>; + + def int_SI_sendmsg : Intrinsic <[], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>; + class Sample : Intrinsic <[llvm_v4f32_ty], [llvm_anyvector_ty, llvm_v32i8_ty, llvm_anyint_ty, llvm_i32_ty], [IntrNoMem]>; def int_SI_sample : Sample; diff --git a/lib/Target/R600/SILowerControlFlow.cpp b/lib/Target/R600/SILowerControlFlow.cpp index 958763dffc22..ef867d36692d 100644 --- a/lib/Target/R600/SILowerControlFlow.cpp +++ b/lib/Target/R600/SILowerControlFlow.cpp @@ -109,6 +109,23 @@ FunctionPass *llvm::createSILowerControlFlowPass(TargetMachine &tm) { return new SILowerControlFlowPass(tm); } +static bool isDS(unsigned Opcode) { + switch(Opcode) { + default: return false; + case AMDGPU::DS_ADD_U32_RTN: + case AMDGPU::DS_SUB_U32_RTN: + case AMDGPU::DS_WRITE_B32: + case AMDGPU::DS_WRITE_B8: + case AMDGPU::DS_WRITE_B16: + case AMDGPU::DS_READ_B32: + case AMDGPU::DS_READ_I8: + case AMDGPU::DS_READ_U8: + case AMDGPU::DS_READ_I16: + case AMDGPU::DS_READ_U16: + return true; + } +} + bool SILowerControlFlowPass::shouldSkip(MachineBasicBlock *From, MachineBasicBlock *To) { @@ -145,7 +162,9 @@ void SILowerControlFlowPass::SkipIfDead(MachineInstr &MI) { MachineBasicBlock &MBB = *MI.getParent(); DebugLoc DL = MI.getDebugLoc(); - if (!shouldSkip(&MBB, &MBB.getParent()->back())) + if (MBB.getParent()->getInfo<SIMachineFunctionInfo>()->ShaderType != + ShaderType::PIXEL || + !shouldSkip(&MBB, &MBB.getParent()->back())) return; MachineBasicBlock::iterator Insert = &MI; @@ -296,9 +315,11 @@ void SILowerControlFlowPass::Kill(MachineInstr &MI) { MachineBasicBlock &MBB = *MI.getParent(); DebugLoc DL = MI.getDebugLoc(); - // Kill is only allowed in pixel shaders + // Kill is only allowed in pixel / geometry shaders assert(MBB.getParent()->getInfo<SIMachineFunctionInfo>()->ShaderType == - ShaderType::PIXEL); + ShaderType::PIXEL || + MBB.getParent()->getInfo<SIMachineFunctionInfo>()->ShaderType == + ShaderType::GEOMETRY); // Clear this pixel from the exec mask if the operand is negative BuildMI(MBB, &MI, DL, TII->get(AMDGPU::V_CMPX_LE_F32_e32), AMDGPU::VCC) @@ -431,6 +452,11 @@ bool SILowerControlFlowPass::runOnMachineFunction(MachineFunction &MF) { Next = llvm::next(I); MachineInstr &MI = *I; + if (isDS(MI.getOpcode())) { + NeedM0 = true; + NeedWQM = true; + } + switch (MI.getOpcode()) { default: break; case AMDGPU::SI_IF: @@ -491,14 +517,6 @@ bool SILowerControlFlowPass::runOnMachineFunction(MachineFunction &MF) { IndirectDst(MI); break; - case AMDGPU::DS_READ_B32: - NeedWQM = true; - // Fall through - case AMDGPU::DS_WRITE_B32: - case AMDGPU::DS_ADD_U32_RTN: - NeedM0 = true; - break; - case AMDGPU::V_INTERP_P1_F32: case AMDGPU::V_INTERP_P2_F32: case AMDGPU::V_INTERP_MOV_F32: @@ -517,7 +535,7 @@ bool SILowerControlFlowPass::runOnMachineFunction(MachineFunction &MF) { AMDGPU::M0).addImm(0xffffffff); } - if (NeedWQM && MFI->ShaderType != ShaderType::COMPUTE) { + if (NeedWQM && MFI->ShaderType == ShaderType::PIXEL) { MachineBasicBlock &MBB = MF.front(); BuildMI(MBB, MBB.getFirstNonPHI(), DebugLoc(), TII->get(AMDGPU::S_WQM_B64), AMDGPU::EXEC).addReg(AMDGPU::EXEC); diff --git a/lib/Target/X86/AsmParser/X86AsmParser.cpp b/lib/Target/X86/AsmParser/X86AsmParser.cpp index bc8f367e9255..22b79b3b2844 100644 --- a/lib/Target/X86/AsmParser/X86AsmParser.cpp +++ b/lib/Target/X86/AsmParser/X86AsmParser.cpp @@ -1181,16 +1181,23 @@ X86AsmParser::CreateMemForInlineAsm(unsigned SegReg, const MCExpr *Disp, unsigned Scale, SMLoc Start, SMLoc End, unsigned Size, StringRef Identifier, InlineAsmIdentifierInfo &Info){ - if (isa<MCSymbolRefExpr>(Disp)) { - // If this is not a VarDecl then assume it is a FuncDecl or some other label - // reference. We need an 'r' constraint here, so we need to create register - // operand to ensure proper matching. Just pick a GPR based on the size of - // a pointer. - if (!Info.IsVarDecl) { - unsigned RegNo = is64BitMode() ? X86::RBX : X86::EBX; - return X86Operand::CreateReg(RegNo, Start, End, /*AddressOf=*/true, - SMLoc(), Identifier, Info.OpDecl); - } + // If this is not a VarDecl then assume it is a FuncDecl or some other label + // reference. We need an 'r' constraint here, so we need to create register + // operand to ensure proper matching. Just pick a GPR based on the size of + // a pointer. + if (isa<MCSymbolRefExpr>(Disp) && !Info.IsVarDecl) { + unsigned RegNo = is64BitMode() ? X86::RBX : X86::EBX; + return X86Operand::CreateReg(RegNo, Start, End, /*AddressOf=*/true, + SMLoc(), Identifier, Info.OpDecl); + } + + // We either have a direct symbol reference, or an offset from a symbol. The + // parser always puts the symbol on the LHS, so look there for size + // calculation purposes. + const MCBinaryExpr *BinOp = dyn_cast<MCBinaryExpr>(Disp); + bool IsSymRef = + isa<MCSymbolRefExpr>(BinOp ? BinOp->getLHS() : Disp); + if (IsSymRef) { if (!Size) { Size = Info.Type * 8; // Size is in terms of bits in this context. if (Size) @@ -1312,10 +1319,15 @@ bool X86AsmParser::ParseIntelExpression(IntelExprStateMachine &SM, SMLoc &End) { if (getParser().parsePrimaryExpr(Val, End)) return Error(Tok.getLoc(), "Unexpected identifier!"); } else { - InlineAsmIdentifierInfo &Info = SM.getIdentifierInfo(); - if (ParseIntelIdentifier(Val, Identifier, Info, - /*Unevaluated=*/false, End)) - return true; + // This is a dot operator, not an adjacent identifier. + if (Identifier.find('.') != StringRef::npos) { + return false; + } else { + InlineAsmIdentifierInfo &Info = SM.getIdentifierInfo(); + if (ParseIntelIdentifier(Val, Identifier, Info, + /*Unevaluated=*/false, End)) + return true; + } } SM.onIdentifierExpr(Val, Identifier); UpdateLocLex = false; @@ -1366,7 +1378,7 @@ X86Operand *X86AsmParser::ParseIntelBracExpression(unsigned SegReg, SMLoc Start, if (ParseIntelExpression(SM, End)) return 0; - const MCExpr *Disp; + const MCExpr *Disp = 0; if (const MCExpr *Sym = SM.getSym()) { // A symbolic displacement. Disp = Sym; @@ -1374,13 +1386,20 @@ X86Operand *X86AsmParser::ParseIntelBracExpression(unsigned SegReg, SMLoc Start, RewriteIntelBracExpression(InstInfo->AsmRewrites, SM.getSymName(), ImmDisp, SM.getImm(), BracLoc, StartInBrac, End); - } else { - // An immediate displacement only. - Disp = MCConstantExpr::Create(SM.getImm(), getContext()); } - // Parse the dot operator (e.g., [ebx].foo.bar). - if (Tok.getString().startswith(".")) { + if (SM.getImm() || !Disp) { + const MCExpr *Imm = MCConstantExpr::Create(SM.getImm(), getContext()); + if (Disp) + Disp = MCBinaryExpr::CreateAdd(Disp, Imm, getContext()); + else + Disp = Imm; // An immediate displacement only. + } + + // Parse struct field access. Intel requires a dot, but MSVC doesn't. MSVC + // will in fact do global lookup the field name inside all global typedefs, + // but we don't emulate that. + if (Tok.getString().find('.') != StringRef::npos) { const MCExpr *NewDisp; if (ParseIntelDotOperator(Disp, NewDisp)) return 0; @@ -1532,8 +1551,10 @@ bool X86AsmParser::ParseIntelDotOperator(const MCExpr *Disp, else return Error(Tok.getLoc(), "Non-constant offsets are not supported!"); - // Drop the '.'. - StringRef DotDispStr = Tok.getString().drop_front(1); + // Drop the optional '.'. + StringRef DotDispStr = Tok.getString(); + if (DotDispStr.startswith(".")) + DotDispStr = DotDispStr.drop_front(1); // .Imm gets lexed as a real. if (Tok.is(AsmToken::Real)) { diff --git a/lib/Target/X86/Disassembler/X86DisassemblerDecoder.c b/lib/Target/X86/Disassembler/X86DisassemblerDecoder.c index c81a85755f82..16ee0d357b77 100644 --- a/lib/Target/X86/Disassembler/X86DisassemblerDecoder.c +++ b/lib/Target/X86/Disassembler/X86DisassemblerDecoder.c @@ -1065,6 +1065,7 @@ static int readSIB(struct InternalInstruction* insn) { switch (base) { case 0x5: + case 0xd: switch (modFromModRM(insn->modRM)) { case 0x0: insn->eaDisplacement = EA_DISP_32; @@ -1072,13 +1073,11 @@ static int readSIB(struct InternalInstruction* insn) { break; case 0x1: insn->eaDisplacement = EA_DISP_8; - insn->sibBase = (insn->addressSize == 4 ? - SIB_BASE_EBP : SIB_BASE_RBP); + insn->sibBase = (SIBBase)(sibBaseBase + base); break; case 0x2: insn->eaDisplacement = EA_DISP_32; - insn->sibBase = (insn->addressSize == 4 ? - SIB_BASE_EBP : SIB_BASE_RBP); + insn->sibBase = (SIBBase)(sibBaseBase + base); break; case 0x3: debug("Cannot have Mod = 0b11 and a SIB byte"); diff --git a/lib/Target/X86/MCTargetDesc/X86MCAsmInfo.cpp b/lib/Target/X86/MCTargetDesc/X86MCAsmInfo.cpp index 3861e1ce290a..8d2b5954ef20 100644 --- a/lib/Target/X86/MCTargetDesc/X86MCAsmInfo.cpp +++ b/lib/Target/X86/MCTargetDesc/X86MCAsmInfo.cpp @@ -65,6 +65,17 @@ X86MCAsmInfoDarwin::X86MCAsmInfoDarwin(const Triple &T) { // Exceptions handling ExceptionsType = ExceptionHandling::DwarfCFI; + + // FIXME: this should not depend on the target OS version, but on the ld64 + // version in use. From at least >= ld64-97.17 (Xcode 3.2.6) the abs-ified + // FDE relocs may be used. + DwarfFDESymbolsUseAbsDiff = T.isMacOSX() && !T.isMacOSXVersionLT(10, 6); + + // old assembler lacks some directives + // FIXME: this should really be a check on the assembler characteristics + // rather than OS version + if (T.isMacOSX() && T.isMacOSXVersionLT(10, 6)) + HasWeakDefCanBeHiddenDirective = false; } X86_64MCAsmInfoDarwin::X86_64MCAsmInfoDarwin(const Triple &Triple) diff --git a/lib/Target/X86/X86AsmPrinter.cpp b/lib/Target/X86/X86AsmPrinter.cpp index 12584411509d..1f5f91844f80 100644 --- a/lib/Target/X86/X86AsmPrinter.cpp +++ b/lib/Target/X86/X86AsmPrinter.cpp @@ -393,9 +393,11 @@ bool X86AsmPrinter::printAsmMRegister(const MachineOperand &MO, char Mode, case 'k': // Print SImode register Reg = getX86SubSuperRegister(Reg, MVT::i32); break; - case 'q': // Print DImode register - // FIXME: gcc will actually print e instead of r for 32-bit. - Reg = getX86SubSuperRegister(Reg, MVT::i64); + case 'q': + // Print 64-bit register names if 64-bit integer registers are available. + // Otherwise, print 32-bit register names. + MVT::SimpleValueType Ty = Subtarget->is64Bit() ? MVT::i64 : MVT::i32; + Reg = getX86SubSuperRegister(Reg, Ty); break; } diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index 76eeb64650ba..716c146811a1 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -15226,9 +15226,15 @@ X86TargetLowering::EmitVAStartSaveXMMRegsWithCustomInserter( MBB->addSuccessor(EndMBB); } + // Make sure the last operand is EFLAGS, which gets clobbered by the branch + // that was just emitted, but clearly shouldn't be "saved". + assert((MI->getNumOperands() <= 3 || + !MI->getOperand(MI->getNumOperands() - 1).isReg() || + MI->getOperand(MI->getNumOperands() - 1).getReg() == X86::EFLAGS) + && "Expected last argument to be EFLAGS"); unsigned MOVOpc = Subtarget->hasFp256() ? X86::VMOVAPSmr : X86::MOVAPSmr; // In the XMM save block, save all the XMM argument registers. - for (int i = 3, e = MI->getNumOperands(); i != e; ++i) { + for (int i = 3, e = MI->getNumOperands() - 1; i != e; ++i) { int64_t Offset = (i - 3) * 16 + VarArgsFPOffset; MachineMemOperand *MMO = F->getMachineMemOperand( @@ -17577,12 +17583,30 @@ static SDValue CMPEQCombine(SDNode *N, SelectionDAG &DAG, // FIXME: need symbolic constants for these magic numbers. // See X86ATTInstPrinter.cpp:printSSECC(). unsigned x86cc = (cc0 == X86::COND_E) ? 0 : 4; - SDValue OnesOrZeroesF = DAG.getNode(NTOperator, DL, MVT::f32, CMP00, CMP01, + SDValue OnesOrZeroesF = DAG.getNode(NTOperator, DL, CMP00.getValueType(), + CMP00, CMP01, DAG.getConstant(x86cc, MVT::i8)); - SDValue OnesOrZeroesI = DAG.getNode(ISD::BITCAST, DL, MVT::i32, - OnesOrZeroesF); - SDValue ANDed = DAG.getNode(ISD::AND, DL, MVT::i32, OnesOrZeroesI, - DAG.getConstant(1, MVT::i32)); + + MVT IntVT = is64BitFP ? MVT::i64 : MVT::i32; + + if (is64BitFP && !Subtarget->is64Bit()) { + // On a 32-bit target, we cannot bitcast the 64-bit float to a + // 64-bit integer, since that's not a legal type. Since + // OnesOrZeroesF is all ones of all zeroes, we don't need all the + // bits, but can do this little dance to extract the lowest 32 bits + // and work with those going forward. + SDValue Vector64 = DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, MVT::v2f64, + OnesOrZeroesF); + SDValue Vector32 = DAG.getNode(ISD::BITCAST, DL, MVT::v4f32, + Vector64); + OnesOrZeroesF = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, + Vector32, DAG.getIntPtrConstant(0)); + IntVT = MVT::i32; + } + + SDValue OnesOrZeroesI = DAG.getNode(ISD::BITCAST, DL, IntVT, OnesOrZeroesF); + SDValue ANDed = DAG.getNode(ISD::AND, DL, IntVT, OnesOrZeroesI, + DAG.getConstant(1, IntVT)); SDValue OneBitOfTruth = DAG.getNode(ISD::TRUNCATE, DL, MVT::i8, ANDed); return OneBitOfTruth; } diff --git a/lib/Target/X86/X86InstrCompiler.td b/lib/Target/X86/X86InstrCompiler.td index 7d10b67bfe6d..5c8840823b16 100644 --- a/lib/Target/X86/X86InstrCompiler.td +++ b/lib/Target/X86/X86InstrCompiler.td @@ -72,7 +72,7 @@ def ADJCALLSTACKUP64 : I<0, Pseudo, (outs), (ins i32imm:$amt1, i32imm:$amt2), // x86-64 va_start lowering magic. -let usesCustomInserter = 1 in { +let usesCustomInserter = 1, Defs = [EFLAGS] in { def VASTART_SAVE_XMM_REGS : I<0, Pseudo, (outs), (ins GR8:$al, @@ -81,7 +81,8 @@ def VASTART_SAVE_XMM_REGS : I<0, Pseudo, "#VASTART_SAVE_XMM_REGS $al, $regsavefi, $offset", [(X86vastart_save_xmm_regs GR8:$al, imm:$regsavefi, - imm:$offset)]>; + imm:$offset), + (implicit EFLAGS)]>; // The VAARG_64 pseudo-instruction takes the address of the va_list, // and places the address of the next argument into a register. diff --git a/lib/Transforms/InstCombine/InstCombineVectorOps.cpp b/lib/Transforms/InstCombine/InstCombineVectorOps.cpp index 1e724106991a..30290ee7a799 100644 --- a/lib/Transforms/InstCombine/InstCombineVectorOps.cpp +++ b/lib/Transforms/InstCombine/InstCombineVectorOps.cpp @@ -25,11 +25,13 @@ static bool CheapToScalarize(Value *V, bool isConstant) { if (isConstant) return true; // If all elts are the same, we can extract it and use any of the values. - Constant *Op0 = C->getAggregateElement(0U); - for (unsigned i = 1, e = V->getType()->getVectorNumElements(); i != e; ++i) - if (C->getAggregateElement(i) != Op0) - return false; - return true; + if (Constant *Op0 = C->getAggregateElement(0U)) { + for (unsigned i = 1, e = V->getType()->getVectorNumElements(); i != e; + ++i) + if (C->getAggregateElement(i) != Op0) + return false; + return true; + } } Instruction *I = dyn_cast<Instruction>(V); if (!I) return false; diff --git a/lib/Transforms/Scalar/LoopRerollPass.cpp b/lib/Transforms/Scalar/LoopRerollPass.cpp index 335af81b957a..643bc78f6e58 100644 --- a/lib/Transforms/Scalar/LoopRerollPass.cpp +++ b/lib/Transforms/Scalar/LoopRerollPass.cpp @@ -1088,9 +1088,8 @@ bool LoopReroll::reroll(Instruction *IV, Loop *L, BasicBlock *Header, L, SCEV::FlagAnyWrap)); { // Limit the lifetime of SCEVExpander. SCEVExpander Expander(*SE, "reroll"); - PHINode *NewIV = - cast<PHINode>(Expander.expandCodeFor(H, IV->getType(), - Header->begin())); + Value *NewIV = Expander.expandCodeFor(H, IV->getType(), Header->begin()); + for (DenseSet<Instruction *>::iterator J = BaseUseSet.begin(), JE = BaseUseSet.end(); J != JE; ++J) (*J)->replaceUsesOfWith(IV, NewIV); @@ -1101,20 +1100,23 @@ bool LoopReroll::reroll(Instruction *IV, Loop *L, BasicBlock *Header, if (Inc == 1) ICSCEV = SE->getMulExpr(ICSCEV, SE->getConstant(ICSCEV->getType(), Scale)); - Value *IC; - if (isa<SCEVConstant>(ICSCEV)) { - IC = Expander.expandCodeFor(ICSCEV, NewIV->getType(), BI); + // Iteration count SCEV minus 1 + const SCEV *ICMinus1SCEV = + SE->getMinusSCEV(ICSCEV, SE->getConstant(ICSCEV->getType(), 1)); + + Value *ICMinus1; // Iteration count minus 1 + if (isa<SCEVConstant>(ICMinus1SCEV)) { + ICMinus1 = Expander.expandCodeFor(ICMinus1SCEV, NewIV->getType(), BI); } else { BasicBlock *Preheader = L->getLoopPreheader(); if (!Preheader) Preheader = InsertPreheaderForLoop(L, this); - IC = Expander.expandCodeFor(ICSCEV, NewIV->getType(), - Preheader->getTerminator()); + ICMinus1 = Expander.expandCodeFor(ICMinus1SCEV, NewIV->getType(), + Preheader->getTerminator()); } - Value *NewIVNext = NewIV->getIncomingValueForBlock(Header); - Value *Cond = new ICmpInst(BI, CmpInst::ICMP_EQ, NewIVNext, IC, + Value *Cond = new ICmpInst(BI, CmpInst::ICMP_EQ, NewIV, ICMinus1, "exitcond"); BI->setCondition(Cond); diff --git a/lib/Transforms/Scalar/LoopStrengthReduce.cpp b/lib/Transforms/Scalar/LoopStrengthReduce.cpp index eff5268c448a..6133962e42d7 100644 --- a/lib/Transforms/Scalar/LoopStrengthReduce.cpp +++ b/lib/Transforms/Scalar/LoopStrengthReduce.cpp @@ -3390,6 +3390,10 @@ void LSRInstance::GenerateICmpZeroScales(LSRUse &LU, unsigned LUIdx, int64_t NewBaseOffset = (uint64_t)Base.BaseOffset * Factor; if (NewBaseOffset / Factor != Base.BaseOffset) continue; + // If the offset will be truncated at this use, check that it is in bounds. + if (!IntTy->isPointerTy() && + !ConstantInt::isValueValidForType(IntTy, NewBaseOffset)) + continue; // Check that multiplying with the use offset doesn't overflow. int64_t Offset = LU.MinOffset; @@ -3398,6 +3402,10 @@ void LSRInstance::GenerateICmpZeroScales(LSRUse &LU, unsigned LUIdx, Offset = (uint64_t)Offset * Factor; if (Offset / Factor != LU.MinOffset) continue; + // If the offset will be truncated at this use, check that it is in bounds. + if (!IntTy->isPointerTy() && + !ConstantInt::isValueValidForType(IntTy, Offset)) + continue; Formula F = Base; F.BaseOffset = NewBaseOffset; @@ -3432,6 +3440,10 @@ void LSRInstance::GenerateICmpZeroScales(LSRUse &LU, unsigned LUIdx, F.UnfoldedOffset = (uint64_t)F.UnfoldedOffset * Factor; if (F.UnfoldedOffset / Factor != Base.UnfoldedOffset) continue; + // If the offset will be truncated, check that it is in bounds. + if (!IntTy->isPointerTy() && + !ConstantInt::isValueValidForType(IntTy, F.UnfoldedOffset)) + continue; } // If we make it here and it's legal, add it. diff --git a/lib/Transforms/Utils/LCSSA.cpp b/lib/Transforms/Utils/LCSSA.cpp index f15e8d59276b..97e7e5d95783 100644 --- a/lib/Transforms/Utils/LCSSA.cpp +++ b/lib/Transforms/Utils/LCSSA.cpp @@ -32,6 +32,7 @@ #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/Statistic.h" #include "llvm/Analysis/Dominators.h" +#include "llvm/Analysis/AliasAnalysis.h" #include "llvm/Analysis/LoopPass.h" #include "llvm/Analysis/ScalarEvolution.h" #include "llvm/IR/Constants.h" @@ -70,6 +71,7 @@ namespace { AU.addRequired<DominatorTree>(); AU.addRequired<LoopInfo>(); AU.addPreservedID(LoopSimplifyID); + AU.addPreserved<AliasAnalysis>(); AU.addPreserved<ScalarEvolution>(); } private: diff --git a/lib/Transforms/Vectorize/LoopVectorize.cpp b/lib/Transforms/Vectorize/LoopVectorize.cpp index 5e758713ed19..f9f6b18940d3 100644 --- a/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -4191,13 +4191,22 @@ bool LoopVectorizationLegality::AddReductionVar(PHINode *Phi, continue; } - // Process instructions only once (termination). + // Process instructions only once (termination). Each reduction cycle + // value must only be used once, except by phi nodes and min/max + // reductions which are represented as a cmp followed by a select. + ReductionInstDesc IgnoredVal(false, 0); if (VisitedInsts.insert(Usr)) { if (isa<PHINode>(Usr)) PHIs.push_back(Usr); else NonPHIs.push_back(Usr); - } + } else if (!isa<PHINode>(Usr) && + ((!isa<FCmpInst>(Usr) && + !isa<ICmpInst>(Usr) && + !isa<SelectInst>(Usr)) || + !isMinMaxSelectCmpPattern(Usr, IgnoredVal).IsReduction)) + return false; + // Remember that we completed the cycle. if (Usr == Phi) FoundStartPHI = true; diff --git a/test/Analysis/BasicAA/noalias-bugs.ll b/test/Analysis/BasicAA/noalias-bugs.ll new file mode 100644 index 000000000000..c02a302c1950 --- /dev/null +++ b/test/Analysis/BasicAA/noalias-bugs.ll @@ -0,0 +1,33 @@ +; RUN: opt -S -basicaa -dse < %s | FileCheck %s + +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128" +target triple = "x86_64-unknown-linux-gnu" + +; We incorrectly returned noalias in the example below for "ptr.64" and +; "either_ptr.64". +; PR18460 + +%nested = type { %nested.i64 } +%nested.i64 = type { i64 } + +define i64 @testcase(%nested * noalias %p1, %nested * noalias %p2, + i32 %a, i32 %b) { + %ptr = getelementptr inbounds %nested* %p1, i64 -1, i32 0 + %ptr.64 = getelementptr inbounds %nested.i64* %ptr, i64 0, i32 0 + %ptr2= getelementptr inbounds %nested* %p2, i64 0, i32 0 + %cmp = icmp ult i32 %a, %b + %either_ptr = select i1 %cmp, %nested.i64* %ptr2, %nested.i64* %ptr + %either_ptr.64 = getelementptr inbounds %nested.i64* %either_ptr, i64 0, i32 0 + +; Because either_ptr.64 and ptr.64 can alias (we used to return noalias) +; elimination of the first store is not valid. + +; CHECK: store i64 2 +; CHECK: load +; CHECK; store i64 1 + + store i64 2, i64* %ptr.64, align 8 + %r = load i64* %either_ptr.64, align 8 + store i64 1, i64* %ptr.64, align 8 + ret i64 %r +} diff --git a/test/Analysis/BasicAA/phi-aa.ll b/test/Analysis/BasicAA/phi-aa.ll index 6aa26c185e0f..74279e1c4c93 100644 --- a/test/Analysis/BasicAA/phi-aa.ll +++ b/test/Analysis/BasicAA/phi-aa.ll @@ -1,10 +1,14 @@ ; RUN: opt < %s -basicaa -aa-eval -print-all-alias-modref-info -disable-output 2>&1 | FileCheck %s +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + ; rdar://7282591 @X = common global i32 0 @Y = common global i32 0 @Z = common global i32 0 +; CHECK-LABEL: foo ; CHECK: NoAlias: i32* %P, i32* @Z define void @foo(i32 %cond) nounwind { @@ -29,3 +33,46 @@ bb2: return: ret void } + +; Pointers can vary in between iterations of loops. +; PR18068 + +; CHECK-LABEL: pr18068 +; CHECK: MayAlias: i32* %0, i32* %arrayidx5 + +define i32 @pr18068(i32* %jj7, i32* %j) { +entry: + %oa5 = alloca [100 x i32], align 16 + br label %codeRepl + +codeRepl: + %0 = phi i32* [ %arrayidx13, %for.body ], [ %j, %entry ] + %targetBlock = call i1 @cond(i32* %jj7) + br i1 %targetBlock, label %for.body, label %bye + +for.body: + %1 = load i32* %jj7, align 4 + %idxprom4 = zext i32 %1 to i64 + %arrayidx5 = getelementptr inbounds [100 x i32]* %oa5, i64 0, i64 %idxprom4 + %2 = load i32* %arrayidx5, align 4 + %sub6 = sub i32 %2, 6 + store i32 %sub6, i32* %arrayidx5, align 4 + ; %0 and %arrayidx5 can alias! It is not safe to DSE the above store. + %3 = load i32* %0, align 4 + store i32 %3, i32* %arrayidx5, align 4 + %sub11 = add i32 %1, -1 + %idxprom12 = zext i32 %sub11 to i64 + %arrayidx13 = getelementptr inbounds [100 x i32]* %oa5, i64 0, i64 %idxprom12 + call void @inc(i32* %jj7) + br label %codeRepl + +bye: + %.reload = load i32* %jj7, align 4 + ret i32 %.reload +} + +declare i1 @cond(i32*) + +declare void @inc(i32*) + + diff --git a/test/Analysis/ScalarEvolution/zext-signed-addrec.ll b/test/Analysis/ScalarEvolution/zext-signed-addrec.ll new file mode 100644 index 000000000000..27aed3b0da1b --- /dev/null +++ b/test/Analysis/ScalarEvolution/zext-signed-addrec.ll @@ -0,0 +1,81 @@ +; RUN: opt -loop-reduce -S < %s | FileCheck %s +; PR18000 + +target datalayout = "e-i64:64-f80:128-s:64-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +@a = global i32 0, align 4 +@b = common global i32 0, align 4 +@e = common global i8 0, align 1 +@d = common global i32 0, align 4 +@c = common global i32 0, align 4 +@.str = private unnamed_addr constant [4 x i8] c"%d\0A\00", align 1 + +; Function Attrs: nounwind optsize uwtable +; CHECK-LABEL: foo +define i32 @foo() { +entry: + %.pr = load i32* @b, align 4 + %cmp10 = icmp slt i32 %.pr, 1 + br i1 %cmp10, label %for.cond1.preheader.lr.ph, label %entry.for.end9_crit_edge + +entry.for.end9_crit_edge: ; preds = %entry + %.pre = load i32* @c, align 4 + br label %for.end9 + +for.cond1.preheader.lr.ph: ; preds = %entry + %0 = load i32* @a, align 4 + %tobool = icmp eq i32 %0, 0 + br i1 %tobool, label %for.cond1.preheader.for.cond1.preheader.split_crit_edge, label %return.loopexit.split + +for.cond1.preheader.for.cond1.preheader.split_crit_edge: ; preds = %for.cond1.preheader.lr.ph, %for.inc8 + %1 = phi i32 [ %inc, %for.inc8 ], [ %.pr, %for.cond1.preheader.lr.ph ] + br label %if.end + +; CHECK-LABEL: if.end +if.end: ; preds = %if.end, %for.cond1.preheader.for.cond1.preheader.split_crit_edge + +; CHECK: %lsr.iv = phi i32 [ %lsr.iv.next, %if.end ], [ 258, %for.cond1.preheader.for.cond1.preheader.split_crit_edge ] + %indvars.iv = phi i32 [ 1, %for.cond1.preheader.for.cond1.preheader.split_crit_edge ], [ %indvars.iv.next, %if.end ] + + %2 = phi i8 [ 1, %for.cond1.preheader.for.cond1.preheader.split_crit_edge ], [ %dec, %if.end ] + %conv7 = mul i32 %indvars.iv, 258 + %shl = and i32 %conv7, 510 + store i32 %shl, i32* @c, align 4 + +; CHECK: %lsr.iv.next = add i32 %lsr.iv, -258 + %dec = add i8 %2, -1 + + %cmp2 = icmp sgt i8 %dec, -1 + %indvars.iv.next = add i32 %indvars.iv, -1 + br i1 %cmp2, label %if.end, label %for.inc8 + +for.inc8: ; preds = %if.end + store i32 0, i32* @d, align 4 + %inc = add nsw i32 %1, 1 + store i32 %inc, i32* @b, align 4 + %cmp = icmp slt i32 %1, 0 + br i1 %cmp, label %for.cond1.preheader.for.cond1.preheader.split_crit_edge, label %for.cond.for.end9_crit_edge + +for.cond.for.end9_crit_edge: ; preds = %for.inc8 + store i8 %dec, i8* @e, align 1 + br label %for.end9 + +for.end9: ; preds = %entry.for.end9_crit_edge, %for.cond.for.end9_crit_edge + %3 = phi i32 [ %.pre, %entry.for.end9_crit_edge ], [ %shl, %for.cond.for.end9_crit_edge ] + %call = tail call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([4 x i8]* @.str, i64 0, i64 0), i32 %3) #2 + br label %return + +return.loopexit.split: ; preds = %for.cond1.preheader.lr.ph + store i8 1, i8* @e, align 1 + store i32 0, i32* @d, align 4 + br label %return + +return: ; preds = %return.loopexit.split, %for.end9 + %retval.0 = phi i32 [ 0, %for.end9 ], [ 1, %return.loopexit.split ] + ret i32 %retval.0 +} + +; Function Attrs: nounwind optsize +declare i32 @printf(i8* nocapture readonly, ...) + diff --git a/test/CodeGen/AArch64/atomic-ops.ll b/test/CodeGen/AArch64/atomic-ops.ll index de84ff46ec3b..5857faf80a16 100644 --- a/test/CodeGen/AArch64/atomic-ops.ll +++ b/test/CodeGen/AArch64/atomic-ops.ll @@ -1,4 +1,5 @@ ; RUN: llc -mtriple=aarch64-none-linux-gnu -verify-machineinstrs < %s | FileCheck %s +; RUN: llc -mtriple=aarch64-none-linux-gnu -verify-machineinstrs < %s | FileCheck --check-prefix=CHECK-REG %s @var8 = global i8 0 @var16 = global i16 0 @@ -17,6 +18,8 @@ define i8 @test_atomic_load_add_i8(i8 %offset) nounwind { ; w0 below is a reasonable guess but could change: it certainly comes into the ; function there. ; CHECK-NEXT: add [[NEW:w[0-9]+]], w[[OLD]], w0 +; CHECK-REG: add w[[NEW:[0-9]+]], w{{[0-9]+}}, w0 +; CHECK-REG-NOT: stlxrb w[[NEW]], w[[NEW]], [x{{[0-9]+}}] ; CHECK-NEXT: stlxrb [[STATUS:w[0-9]+]], [[NEW]], [x[[ADDR]]] ; CHECK-NEXT: cbnz [[STATUS]], .LBB{{[0-9]+}}_1 ; CHECK-NOT: dmb @@ -37,6 +40,8 @@ define i16 @test_atomic_load_add_i16(i16 %offset) nounwind { ; w0 below is a reasonable guess but could change: it certainly comes into the ; function there. ; CHECK-NEXT: add [[NEW:w[0-9]+]], w[[OLD]], w0 +; CHECK-REG: add w[[NEW:[0-9]+]], w{{[0-9]+}}, w0 +; CHECK-REG-NOT: stxrh w[[NEW]], w[[NEW]], [x{{[0-9]+}}] ; CHECK-NEXT: stxrh [[STATUS:w[0-9]+]], [[NEW]], [x[[ADDR]]] ; CHECK-NEXT: cbnz [[STATUS]], .LBB{{[0-9]+}}_1 ; CHECK-NOT: dmb @@ -57,6 +62,8 @@ define i32 @test_atomic_load_add_i32(i32 %offset) nounwind { ; w0 below is a reasonable guess but could change: it certainly comes into the ; function there. ; CHECK-NEXT: add [[NEW:w[0-9]+]], w[[OLD]], w0 +; CHECK-REG: add w[[NEW:[0-9]+]], w{{[0-9]+}}, w0 +; CHECK-REG-NOT: stlxr w[[NEW]], w[[NEW]], [x{{[0-9]+}}] ; CHECK-NEXT: stlxr [[STATUS:w[0-9]+]], [[NEW]], [x[[ADDR]]] ; CHECK-NEXT: cbnz [[STATUS]], .LBB{{[0-9]+}}_1 ; CHECK-NOT: dmb @@ -77,6 +84,8 @@ define i64 @test_atomic_load_add_i64(i64 %offset) nounwind { ; x0 below is a reasonable guess but could change: it certainly comes into the ; function there. ; CHECK-NEXT: add [[NEW:x[0-9]+]], x[[OLD]], x0 +; CHECK-REG: add x[[NEW:[0-9]+]], x{{[0-9]+}}, x0 +; CHECK-REG-NOT: stxr w[[NEW]], x[[NEW]], [x{{[0-9]+}}] ; CHECK-NEXT: stxr [[STATUS:w[0-9]+]], [[NEW]], [x[[ADDR]]] ; CHECK-NEXT: cbnz [[STATUS]], .LBB{{[0-9]+}}_1 ; CHECK-NOT: dmb @@ -97,6 +106,8 @@ define i8 @test_atomic_load_sub_i8(i8 %offset) nounwind { ; w0 below is a reasonable guess but could change: it certainly comes into the ; function there. ; CHECK-NEXT: sub [[NEW:w[0-9]+]], w[[OLD]], w0 +; CHECK-REG: sub w[[NEW:[0-9]+]], w{{[0-9]+}}, w0 +; CHECK-REG-NOT: stxrb w[[NEW]], w[[NEW]], [x{{[0-9]+}}] ; CHECK-NEXT: stxrb [[STATUS:w[0-9]+]], [[NEW]], [x[[ADDR]]] ; CHECK-NEXT: cbnz [[STATUS]], .LBB{{[0-9]+}}_1 ; CHECK-NOT: dmb @@ -117,6 +128,8 @@ define i16 @test_atomic_load_sub_i16(i16 %offset) nounwind { ; w0 below is a reasonable guess but could change: it certainly comes into the ; function there. ; CHECK-NEXT: sub [[NEW:w[0-9]+]], w[[OLD]], w0 +; CHECK-REG: sub w[[NEW:[0-9]+]], w{{[0-9]+}}, w0 +; CHECK-REG-NOT: stlxrh w[[NEW]], w[[NEW]], [x{{[0-9]+}}] ; CHECK-NEXT: stlxrh [[STATUS:w[0-9]+]], [[NEW]], [x[[ADDR]]] ; CHECK-NEXT: cbnz [[STATUS]], .LBB{{[0-9]+}}_1 ; CHECK-NOT: dmb @@ -137,6 +150,8 @@ define i32 @test_atomic_load_sub_i32(i32 %offset) nounwind { ; w0 below is a reasonable guess but could change: it certainly comes into the ; function there. ; CHECK-NEXT: sub [[NEW:w[0-9]+]], w[[OLD]], w0 +; CHECK-REG: sub w[[NEW:[0-9]+]], w{{[0-9]+}}, w0 +; CHECK-REG-NOT: stxr w[[NEW]], w[[NEW]], [x{{[0-9]+}}] ; CHECK-NEXT: stxr [[STATUS:w[0-9]+]], [[NEW]], [x[[ADDR]]] ; CHECK-NEXT: cbnz [[STATUS]], .LBB{{[0-9]+}}_1 ; CHECK-NOT: dmb @@ -157,6 +172,8 @@ define i64 @test_atomic_load_sub_i64(i64 %offset) nounwind { ; x0 below is a reasonable guess but could change: it certainly comes into the ; function there. ; CHECK-NEXT: sub [[NEW:x[0-9]+]], x[[OLD]], x0 +; CHECK-REG: sub x[[NEW:[0-9]+]], x{{[0-9]+}}, x0 +; CHECK-REG-NOT: stlxr w[[NEW]], x[[NEW]], [x{{[0-9]+}}] ; CHECK-NEXT: stlxr [[STATUS:w[0-9]+]], [[NEW]], [x[[ADDR]]] ; CHECK-NEXT: cbnz [[STATUS]], .LBB{{[0-9]+}}_1 ; CHECK-NOT: dmb @@ -177,6 +194,8 @@ define i8 @test_atomic_load_and_i8(i8 %offset) nounwind { ; w0 below is a reasonable guess but could change: it certainly comes into the ; function there. ; CHECK-NEXT: and [[NEW:w[0-9]+]], w[[OLD]], w0 +; CHECK-REG: and w[[NEW:[0-9]+]], w{{[0-9]+}}, w0 +; CHECK-REG-NOT: stlxrb w[[NEW]], w[[NEW]], [x{{[0-9]+}}] ; CHECK-NEXT: stlxrb [[STATUS:w[0-9]+]], [[NEW]], [x[[ADDR]]] ; CHECK-NEXT: cbnz [[STATUS]], .LBB{{[0-9]+}}_1 ; CHECK-NOT: dmb @@ -197,6 +216,8 @@ define i16 @test_atomic_load_and_i16(i16 %offset) nounwind { ; w0 below is a reasonable guess but could change: it certainly comes into the ; function there. ; CHECK-NEXT: and [[NEW:w[0-9]+]], w[[OLD]], w0 +; CHECK-REG: and w[[NEW:[0-9]+]], w{{[0-9]+}}, w0 +; CHECK-REG-NOT: stxrh w[[NEW]], w[[NEW]], [x{{[0-9]+}}] ; CHECK-NEXT: stxrh [[STATUS:w[0-9]+]], [[NEW]], [x[[ADDR]]] ; CHECK-NEXT: cbnz [[STATUS]], .LBB{{[0-9]+}}_1 ; CHECK-NOT: dmb @@ -217,6 +238,8 @@ define i32 @test_atomic_load_and_i32(i32 %offset) nounwind { ; w0 below is a reasonable guess but could change: it certainly comes into the ; function there. ; CHECK-NEXT: and [[NEW:w[0-9]+]], w[[OLD]], w0 +; CHECK-REG: and w[[NEW:[0-9]+]], w{{[0-9]+}}, w0 +; CHECK-REG-NOT: stlxr w[[NEW]], w[[NEW]], [x{{[0-9]+}}] ; CHECK-NEXT: stlxr [[STATUS:w[0-9]+]], [[NEW]], [x[[ADDR]]] ; CHECK-NEXT: cbnz [[STATUS]], .LBB{{[0-9]+}}_1 ; CHECK-NOT: dmb @@ -237,6 +260,8 @@ define i64 @test_atomic_load_and_i64(i64 %offset) nounwind { ; x0 below is a reasonable guess but could change: it certainly comes into the ; function there. ; CHECK-NEXT: and [[NEW:x[0-9]+]], x[[OLD]], x0 +; CHECK-REG: and x[[NEW:[0-9]+]], x{{[0-9]+}}, x0 +; CHECK-REG-NOT: stxr w[[NEW]], x[[NEW]], [x{{[0-9]+}}] ; CHECK-NEXT: stxr [[STATUS:w[0-9]+]], [[NEW]], [x[[ADDR]]] ; CHECK-NEXT: cbnz [[STATUS]], .LBB{{[0-9]+}}_1 ; CHECK-NOT: dmb @@ -257,6 +282,8 @@ define i8 @test_atomic_load_or_i8(i8 %offset) nounwind { ; w0 below is a reasonable guess but could change: it certainly comes into the ; function there. ; CHECK-NEXT: orr [[NEW:w[0-9]+]], w[[OLD]], w0 +; CHECK-REG: orr w[[NEW:[0-9]+]], w{{[0-9]+}}, w0 +; CHECK-REG-NOT: stlxrb w[[NEW]], w[[NEW]], [x{{[0-9]+}}] ; CHECK-NEXT: stlxrb [[STATUS:w[0-9]+]], [[NEW]], [x[[ADDR]]] ; CHECK-NEXT: cbnz [[STATUS]], .LBB{{[0-9]+}}_1 ; CHECK-NOT: dmb @@ -277,6 +304,8 @@ define i16 @test_atomic_load_or_i16(i16 %offset) nounwind { ; w0 below is a reasonable guess but could change: it certainly comes into the ; function there. ; CHECK-NEXT: orr [[NEW:w[0-9]+]], w[[OLD]], w0 +; CHECK-REG: orr w[[NEW:[0-9]+]], w{{[0-9]+}}, w0 +; CHECK-REG-NOT: stxrh w[[NEW]], w[[NEW]], [x{{[0-9]+}}] ; CHECK-NEXT: stxrh [[STATUS:w[0-9]+]], [[NEW]], [x[[ADDR]]] ; CHECK-NEXT: cbnz [[STATUS]], .LBB{{[0-9]+}}_1 ; CHECK-NOT: dmb @@ -297,6 +326,8 @@ define i32 @test_atomic_load_or_i32(i32 %offset) nounwind { ; w0 below is a reasonable guess but could change: it certainly comes into the ; function there. ; CHECK-NEXT: orr [[NEW:w[0-9]+]], w[[OLD]], w0 +; CHECK-REG: orr w[[NEW:[0-9]+]], w{{[0-9]+}}, w0 +; CHECK-REG-NOT: stxr w[[NEW]], w[[NEW]], [x{{[0-9]+}}] ; CHECK-NEXT: stxr [[STATUS:w[0-9]+]], [[NEW]], [x[[ADDR]]] ; CHECK-NEXT: cbnz [[STATUS]], .LBB{{[0-9]+}}_1 ; CHECK-NOT: dmb @@ -317,6 +348,8 @@ define i64 @test_atomic_load_or_i64(i64 %offset) nounwind { ; x0 below is a reasonable guess but could change: it certainly comes into the ; function there. ; CHECK-NEXT: orr [[NEW:x[0-9]+]], x[[OLD]], x0 +; CHECK-REG: orr x[[NEW:[0-9]+]], x{{[0-9]+}}, x0 +; CHECK-REG-NOT: stlxr w[[NEW]], x[[NEW]], [x{{[0-9]+}}] ; CHECK-NEXT: stlxr [[STATUS:w[0-9]+]], [[NEW]], [x[[ADDR]]] ; CHECK-NEXT: cbnz [[STATUS]], .LBB{{[0-9]+}}_1 ; CHECK-NOT: dmb @@ -337,6 +370,8 @@ define i8 @test_atomic_load_xor_i8(i8 %offset) nounwind { ; w0 below is a reasonable guess but could change: it certainly comes into the ; function there. ; CHECK-NEXT: eor [[NEW:w[0-9]+]], w[[OLD]], w0 +; CHECK-REG: eor w[[NEW:[0-9]+]], w{{[0-9]+}}, w0 +; CHECK-REG-NOT: stxrb w[[NEW]], w[[NEW]], [x{{[0-9]+}}] ; CHECK-NEXT: stxrb [[STATUS:w[0-9]+]], [[NEW]], [x[[ADDR]]] ; CHECK-NEXT: cbnz [[STATUS]], .LBB{{[0-9]+}}_1 ; CHECK-NOT: dmb @@ -357,6 +392,8 @@ define i16 @test_atomic_load_xor_i16(i16 %offset) nounwind { ; w0 below is a reasonable guess but could change: it certainly comes into the ; function there. ; CHECK-NEXT: eor [[NEW:w[0-9]+]], w[[OLD]], w0 +; CHECK-REG: eor w[[NEW:[0-9]+]], w{{[0-9]+}}, w0 +; CHECK-REG-NOT: stxrh w[[NEW]], w[[NEW]], [x{{[0-9]+}}] ; CHECK-NEXT: stlxrh [[STATUS:w[0-9]+]], [[NEW]], [x[[ADDR]]] ; CHECK-NEXT: cbnz [[STATUS]], .LBB{{[0-9]+}}_1 ; CHECK-NOT: dmb @@ -377,6 +414,8 @@ define i32 @test_atomic_load_xor_i32(i32 %offset) nounwind { ; w0 below is a reasonable guess but could change: it certainly comes into the ; function there. ; CHECK-NEXT: eor [[NEW:w[0-9]+]], w[[OLD]], w0 +; CHECK-REG: eor w[[NEW:[0-9]+]], w{{[0-9]+}}, w0 +; CHECK-REG-NOT: stlxr w[[NEW]], w[[NEW]], [x{{[0-9]+}}] ; CHECK-NEXT: stlxr [[STATUS:w[0-9]+]], [[NEW]], [x[[ADDR]]] ; CHECK-NEXT: cbnz [[STATUS]], .LBB{{[0-9]+}}_1 ; CHECK-NOT: dmb @@ -397,6 +436,8 @@ define i64 @test_atomic_load_xor_i64(i64 %offset) nounwind { ; x0 below is a reasonable guess but could change: it certainly comes into the ; function there. ; CHECK-NEXT: eor [[NEW:x[0-9]+]], x[[OLD]], x0 +; CHECK-REG: eor x[[NEW:[0-9]+]], x{{[0-9]+}}, x0 +; CHECK-REG-NOT: stxr w[[NEW]], x[[NEW]], [x{{[0-9]+}}] ; CHECK-NEXT: stxr [[STATUS:w[0-9]+]], [[NEW]], [x[[ADDR]]] ; CHECK-NEXT: cbnz [[STATUS]], .LBB{{[0-9]+}}_1 ; CHECK-NOT: dmb @@ -416,6 +457,7 @@ define i8 @test_atomic_load_xchg_i8(i8 %offset) nounwind { ; CHECK-NEXT: ldxrb w[[OLD:[0-9]+]], [x[[ADDR]]] ; w0 below is a reasonable guess but could change: it certainly comes into the ; function there. +; CHECK-REG-NOT: stxrb w0, w0, [x{{[0-9]+}}] ; CHECK-NEXT: stxrb [[STATUS:w[0-9]+]], w0, [x[[ADDR]]] ; CHECK-NEXT: cbnz [[STATUS]], .LBB{{[0-9]+}}_1 ; CHECK-NOT: dmb @@ -435,6 +477,7 @@ define i16 @test_atomic_load_xchg_i16(i16 %offset) nounwind { ; CHECK-NEXT: ldaxrh w[[OLD:[0-9]+]], [x[[ADDR]]] ; w0 below is a reasonable guess but could change: it certainly comes into the ; function there. +; CHECK-REG-NOT: stlxrh w0, w0, [x{{[0-9]+}}] ; CHECK-NEXT: stlxrh [[STATUS:w[0-9]+]], w0, [x[[ADDR]]] ; CHECK-NEXT: cbnz [[STATUS]], .LBB{{[0-9]+}}_1 ; CHECK-NOT: dmb @@ -454,6 +497,7 @@ define i32 @test_atomic_load_xchg_i32(i32 %offset) nounwind { ; CHECK-NEXT: ldxr w[[OLD:[0-9]+]], [x[[ADDR]]] ; w0 below is a reasonable guess but could change: it certainly comes into the ; function there. +; CHECK-REG-NOT: stlxr w0, w0, [x{{[0-9]+}}] ; CHECK-NEXT: stlxr [[STATUS:w[0-9]+]], w0, [x[[ADDR]]] ; CHECK-NEXT: cbnz [[STATUS]], .LBB{{[0-9]+}}_1 ; CHECK-NOT: dmb @@ -473,6 +517,7 @@ define i64 @test_atomic_load_xchg_i64(i64 %offset) nounwind { ; CHECK-NEXT: ldaxr x[[OLD:[0-9]+]], [x[[ADDR]]] ; x0 below is a reasonable guess but could change: it certainly comes into the ; function there. +; CHECK-REG-NOT: stxr w0, x0, [x{{[0-9]+}}] ; CHECK-NEXT: stxr [[STATUS:w[0-9]+]], x0, [x[[ADDR]]] ; CHECK-NEXT: cbnz [[STATUS]], .LBB{{[0-9]+}}_1 ; CHECK-NOT: dmb @@ -495,6 +540,8 @@ define i8 @test_atomic_load_min_i8(i8 %offset) nounwind { ; function there. ; CHECK-NEXT: cmp w0, w[[OLD]], sxtb ; CHECK-NEXT: csel [[NEW:w[0-9]+]], w[[OLD]], w0, gt +; CHECK-REG: csel w[[NEW:[0-9]+]], w{{[0-9]+}}, w0, gt +; CHECK-REG-NOT: stxrb w[[NEW]], w[[NEW]], [x{{[0-9]+}}] ; CHECK-NEXT: stxrb [[STATUS:w[0-9]+]], [[NEW]], [x[[ADDR]]] ; CHECK-NEXT: cbnz [[STATUS]], .LBB{{[0-9]+}}_1 ; CHECK-NOT: dmb @@ -516,6 +563,8 @@ define i16 @test_atomic_load_min_i16(i16 %offset) nounwind { ; function there. ; CHECK-NEXT: cmp w0, w[[OLD]], sxth ; CHECK-NEXT: csel [[NEW:w[0-9]+]], w[[OLD]], w0, gt +; CHECK-REG: csel w[[NEW:[0-9]+]], w{{[0-9]+}}, w0, gt +; CHECK-REG-NOT: stlxrh w[[NEW]], w[[NEW]], [x{{[0-9]+}}] ; CHECK-NEXT: stlxrh [[STATUS:w[0-9]+]], [[NEW]], [x[[ADDR]]] ; CHECK-NEXT: cbnz [[STATUS]], .LBB{{[0-9]+}}_1 ; CHECK-NOT: dmb @@ -537,6 +586,8 @@ define i32 @test_atomic_load_min_i32(i32 %offset) nounwind { ; function there. ; CHECK-NEXT: cmp w0, w[[OLD]] ; CHECK-NEXT: csel [[NEW:w[0-9]+]], w[[OLD]], w0, gt +; CHECK-REG: csel w[[NEW:[0-9]+]], w{{[0-9]+}}, w0, gt +; CHECK-REG-NOT: stxr w[[NEW]], w[[NEW]], [x{{[0-9]+}}] ; CHECK-NEXT: stxr [[STATUS:w[0-9]+]], [[NEW]], [x[[ADDR]]] ; CHECK-NEXT: cbnz [[STATUS]], .LBB{{[0-9]+}}_1 ; CHECK-NOT: dmb @@ -558,6 +609,8 @@ define i64 @test_atomic_load_min_i64(i64 %offset) nounwind { ; function there. ; CHECK-NEXT: cmp x0, x[[OLD]] ; CHECK-NEXT: csel [[NEW:x[0-9]+]], x[[OLD]], x0, gt +; CHECK-REG: csel x[[NEW:[0-9]+]], x{{[0-9]+}}, x0, gt +; CHECK-REG-NOT: stlxr w[[NEW]], x[[NEW]], [x{{[0-9]+}}] ; CHECK-NEXT: stlxr [[STATUS:w[0-9]+]], [[NEW]], [x[[ADDR]]] ; CHECK-NEXT: cbnz [[STATUS]], .LBB{{[0-9]+}}_1 ; CHECK-NOT: dmb @@ -579,6 +632,8 @@ define i8 @test_atomic_load_max_i8(i8 %offset) nounwind { ; function there. ; CHECK-NEXT: cmp w0, w[[OLD]], sxtb ; CHECK-NEXT: csel [[NEW:w[0-9]+]], w[[OLD]], w0, lt +; CHECK-REG: csel w[[NEW:[0-9]+]], w{{[0-9]+}}, w0, lt +; CHECK-REG-NOT: stlxrb w[[NEW]], w[[NEW]], [x{{[0-9]+}}] ; CHECK-NEXT: stlxrb [[STATUS:w[0-9]+]], [[NEW]], [x[[ADDR]]] ; CHECK-NEXT: cbnz [[STATUS]], .LBB{{[0-9]+}}_1 ; CHECK-NOT: dmb @@ -600,6 +655,8 @@ define i16 @test_atomic_load_max_i16(i16 %offset) nounwind { ; function there. ; CHECK-NEXT: cmp w0, w[[OLD]], sxth ; CHECK-NEXT: csel [[NEW:w[0-9]+]], w[[OLD]], w0, lt +; CHECK-REG: csel w[[NEW:[0-9]+]], w{{[0-9]+}}, w0, lt +; CHECK-REG-NOT: stxrh w[[NEW]], w[[NEW]], [x{{[0-9]+}}] ; CHECK-NEXT: stxrh [[STATUS:w[0-9]+]], [[NEW]], [x[[ADDR]]] ; CHECK-NEXT: cbnz [[STATUS]], .LBB{{[0-9]+}}_1 ; CHECK-NOT: dmb @@ -621,6 +678,8 @@ define i32 @test_atomic_load_max_i32(i32 %offset) nounwind { ; function there. ; CHECK-NEXT: cmp w0, w[[OLD]] ; CHECK-NEXT: csel [[NEW:w[0-9]+]], w[[OLD]], w0, lt +; CHECK-REG: csel w[[NEW:[0-9]+]], w{{[0-9]+}}, w0, lt +; CHECK-REG-NOT: stlxr w[[NEW]], w[[NEW]], [x{{[0-9]+}}] ; CHECK-NEXT: stlxr [[STATUS:w[0-9]+]], [[NEW]], [x[[ADDR]]] ; CHECK-NEXT: cbnz [[STATUS]], .LBB{{[0-9]+}}_1 ; CHECK-NOT: dmb @@ -642,6 +701,8 @@ define i64 @test_atomic_load_max_i64(i64 %offset) nounwind { ; function there. ; CHECK-NEXT: cmp x0, x[[OLD]] ; CHECK-NEXT: csel [[NEW:x[0-9]+]], x[[OLD]], x0, lt +; CHECK-REG: csel x[[NEW:[0-9]+]], x{{[0-9]+}}, x0, lt +; CHECK-REG-NOT: stlxr w[[NEW]], x[[NEW]], [x{{[0-9]+}}] ; CHECK-NEXT: stxr [[STATUS:w[0-9]+]], [[NEW]], [x[[ADDR]]] ; CHECK-NEXT: cbnz [[STATUS]], .LBB{{[0-9]+}}_1 ; CHECK-NOT: dmb @@ -663,6 +724,8 @@ define i8 @test_atomic_load_umin_i8(i8 %offset) nounwind { ; function there. ; CHECK-NEXT: cmp w0, w[[OLD]], uxtb ; CHECK-NEXT: csel [[NEW:w[0-9]+]], w[[OLD]], w0, hi +; CHECK-REG: csel w[[NEW:[0-9]+]], w{{[0-9]+}}, w0, hi +; CHECK-REG-NOT: stlxr w[[NEW]], w[[NEW]], [x{{[0-9]+}}] ; CHECK-NEXT: stxrb [[STATUS:w[0-9]+]], [[NEW]], [x[[ADDR]]] ; CHECK-NEXT: cbnz [[STATUS]], .LBB{{[0-9]+}}_1 ; CHECK-NOT: dmb @@ -684,6 +747,8 @@ define i16 @test_atomic_load_umin_i16(i16 %offset) nounwind { ; function there. ; CHECK-NEXT: cmp w0, w[[OLD]], uxth ; CHECK-NEXT: csel [[NEW:w[0-9]+]], w[[OLD]], w0, hi +; CHECK-REG: csel w[[NEW:[0-9]+]], w{{[0-9]+}}, w0, hi +; CHECK-REG-NOT: stxrh w[[NEW]], w[[NEW]], [x{{[0-9]+}}] ; CHECK-NEXT: stxrh [[STATUS:w[0-9]+]], [[NEW]], [x[[ADDR]]] ; CHECK-NEXT: cbnz [[STATUS]], .LBB{{[0-9]+}}_1 ; CHECK-NOT: dmb @@ -705,6 +770,8 @@ define i32 @test_atomic_load_umin_i32(i32 %offset) nounwind { ; function there. ; CHECK-NEXT: cmp w0, w[[OLD]] ; CHECK-NEXT: csel [[NEW:w[0-9]+]], w[[OLD]], w0, hi +; CHECK-REG: csel w[[NEW:[0-9]+]], w{{[0-9]+}}, w0, hi +; CHECK-REG-NOT: stlxr w[[NEW]], w[[NEW]], [x{{[0-9]+}}] ; CHECK-NEXT: stlxr [[STATUS:w[0-9]+]], [[NEW]], [x[[ADDR]]] ; CHECK-NEXT: cbnz [[STATUS]], .LBB{{[0-9]+}}_1 ; CHECK-NOT: dmb @@ -726,6 +793,8 @@ define i64 @test_atomic_load_umin_i64(i64 %offset) nounwind { ; function there. ; CHECK-NEXT: cmp x0, x[[OLD]] ; CHECK-NEXT: csel [[NEW:x[0-9]+]], x[[OLD]], x0, hi +; CHECK-REG: csel x[[NEW:[0-9]+]], x{{[0-9]+}}, x0, hi +; CHECK-REG-NOT: stlxr w[[NEW]], x[[NEW]], [x{{[0-9]+}}] ; CHECK-NEXT: stlxr [[STATUS:w[0-9]+]], [[NEW]], [x[[ADDR]]] ; CHECK-NEXT: cbnz [[STATUS]], .LBB{{[0-9]+}}_1 ; CHECK-NOT: dmb @@ -747,6 +816,8 @@ define i8 @test_atomic_load_umax_i8(i8 %offset) nounwind { ; function there. ; CHECK-NEXT: cmp w0, w[[OLD]], uxtb ; CHECK-NEXT: csel [[NEW:w[0-9]+]], w[[OLD]], w0, lo +; CHECK-REG: csel w[[NEW:[0-9]+]], w{{[0-9]+}}, w0, lo +; CHECK-REG-NOT: stlxrb w[[NEW]], w[[NEW]], [x{{[0-9]+}}] ; CHECK-NEXT: stlxrb [[STATUS:w[0-9]+]], [[NEW]], [x[[ADDR]]] ; CHECK-NEXT: cbnz [[STATUS]], .LBB{{[0-9]+}}_1 ; CHECK-NOT: dmb @@ -768,6 +839,8 @@ define i16 @test_atomic_load_umax_i16(i16 %offset) nounwind { ; function there. ; CHECK-NEXT: cmp w0, w[[OLD]], uxth ; CHECK-NEXT: csel [[NEW:w[0-9]+]], w[[OLD]], w0, lo +; CHECK-REG: csel w[[NEW:[0-9]+]], w{{[0-9]+}}, w0, lo +; CHECK-REG-NOT: stxrh w[[NEW]], w[[NEW]], [x{{[0-9]+}}] ; CHECK-NEXT: stxrh [[STATUS:w[0-9]+]], [[NEW]], [x[[ADDR]]] ; CHECK-NEXT: cbnz [[STATUS]], .LBB{{[0-9]+}}_1 ; CHECK-NOT: dmb @@ -789,6 +862,8 @@ define i32 @test_atomic_load_umax_i32(i32 %offset) nounwind { ; function there. ; CHECK-NEXT: cmp w0, w[[OLD]] ; CHECK-NEXT: csel [[NEW:w[0-9]+]], w[[OLD]], w0, lo +; CHECK-REG: csel w[[NEW:[0-9]+]], w{{[0-9]+}}, w0, lo +; CHECK-REG-NOT: stlxr w[[NEW]], w[[NEW]], [x{{[0-9]+}}] ; CHECK-NEXT: stlxr [[STATUS:w[0-9]+]], [[NEW]], [x[[ADDR]]] ; CHECK-NEXT: cbnz [[STATUS]], .LBB{{[0-9]+}}_1 ; CHECK-NOT: dmb @@ -810,6 +885,8 @@ define i64 @test_atomic_load_umax_i64(i64 %offset) nounwind { ; function there. ; CHECK-NEXT: cmp x0, x[[OLD]] ; CHECK-NEXT: csel [[NEW:x[0-9]+]], x[[OLD]], x0, lo +; CHECK-REG: csel x[[NEW:[0-9]+]], x{{[0-9]+}}, x0, lo +; CHECK-REG-NOT: stlxr w[[NEW]], x[[NEW]], [x{{[0-9]+}}] ; CHECK-NEXT: stlxr [[STATUS:w[0-9]+]], [[NEW]], [x[[ADDR]]] ; CHECK-NEXT: cbnz [[STATUS]], .LBB{{[0-9]+}}_1 ; CHECK-NOT: dmb @@ -832,6 +909,7 @@ define i8 @test_atomic_cmpxchg_i8(i8 %wanted, i8 %new) nounwind { ; CHECK-NEXT: cmp w[[OLD]], w0 ; CHECK-NEXT: b.ne [[GET_OUT:.LBB[0-9]+_[0-9]+]] ; As above, w1 is a reasonable guess. +; CHECK-REG-NOT: stxrb w1, w1, [x{{[0-9]+}}] ; CHECK: stxrb [[STATUS:w[0-9]+]], w1, [x[[ADDR]]] ; CHECK-NEXT: cbnz [[STATUS]], [[STARTAGAIN]] ; CHECK-NOT: dmb @@ -854,6 +932,7 @@ define i16 @test_atomic_cmpxchg_i16(i16 %wanted, i16 %new) nounwind { ; CHECK-NEXT: cmp w[[OLD]], w0 ; CHECK-NEXT: b.ne [[GET_OUT:.LBB[0-9]+_[0-9]+]] ; As above, w1 is a reasonable guess. +; CHECK-REG-NOT: stlxrh w1, w1, [x{{[0-9]+}}] ; CHECK: stlxrh [[STATUS:w[0-9]+]], w1, [x[[ADDR]]] ; CHECK-NEXT: cbnz [[STATUS]], [[STARTAGAIN]] ; CHECK-NOT: dmb @@ -876,6 +955,7 @@ define i32 @test_atomic_cmpxchg_i32(i32 %wanted, i32 %new) nounwind { ; CHECK-NEXT: cmp w[[OLD]], w0 ; CHECK-NEXT: b.ne [[GET_OUT:.LBB[0-9]+_[0-9]+]] ; As above, w1 is a reasonable guess. +; CHECK-REG-NOT: stlxr w1, w1, [x{{[0-9]+}}] ; CHECK: stlxr [[STATUS:w[0-9]+]], w1, [x[[ADDR]]] ; CHECK-NEXT: cbnz [[STATUS]], [[STARTAGAIN]] ; CHECK-NOT: dmb @@ -898,6 +978,7 @@ define i64 @test_atomic_cmpxchg_i64(i64 %wanted, i64 %new) nounwind { ; CHECK-NEXT: cmp x[[OLD]], x0 ; CHECK-NEXT: b.ne [[GET_OUT:.LBB[0-9]+_[0-9]+]] ; As above, w1 is a reasonable guess. +; CHECK-REG-NOT: stxr w1, x1, [x{{[0-9]+}}] ; CHECK: stxr [[STATUS:w[0-9]+]], x1, [x[[ADDR]]] ; CHECK-NEXT: cbnz [[STATUS]], [[STARTAGAIN]] ; CHECK-NOT: dmb diff --git a/test/CodeGen/AArch64/init-array.ll b/test/CodeGen/AArch64/init-array.ll index 3ff1c1a86ec6..076ae27721df 100644 --- a/test/CodeGen/AArch64/init-array.ll +++ b/test/CodeGen/AArch64/init-array.ll @@ -1,4 +1,5 @@ ; RUN: llc -mtriple=aarch64-none-linux-gnu -verify-machineinstrs -use-init-array < %s | FileCheck %s +; RUN: llc -mtriple=aarch64-none-none-eabi -verify-machineinstrs -use-init-array < %s | FileCheck %s define internal void @_GLOBAL__I_a() section ".text.startup" { ret void diff --git a/test/CodeGen/AArch64/variadic.ll b/test/CodeGen/AArch64/variadic.ll index f3d376beeb28..4c219eb83788 100644 --- a/test/CodeGen/AArch64/variadic.ll +++ b/test/CodeGen/AArch64/variadic.ll @@ -179,24 +179,19 @@ define void @test_va_copy() { ; Check beginning and end again: -; CHECK: ldr [[BLOCK:x[0-9]+]], [{{x[0-9]+}}, #:lo12:var] ; CHECK: add x[[SRC_LIST:[0-9]+]], {{x[0-9]+}}, #:lo12:var -; CHECK-NOFP: ldr [[BLOCK:x[0-9]+]], [{{x[0-9]+}}, #:lo12:var] -; CHECK-NOFP: add x[[SRC_LIST:[0-9]+]], {{x[0-9]+}}, #:lo12:var - -; CHECK: str [[BLOCK]], [{{x[0-9]+}}, #:lo12:second_list] - -; CHECK: ldr [[BLOCK:x[0-9]+]], [x[[SRC_LIST]], #24] ; CHECK: add x[[DEST_LIST:[0-9]+]], {{x[0-9]+}}, #:lo12:second_list +; CHECK: ldr [[BLOCK1:x[0-9]+]], [{{x[0-9]+}}, #:lo12:var] +; CHECK: ldr [[BLOCK2:x[0-9]+]], [x[[SRC_LIST]], #24] +; CHECK: str [[BLOCK1]], [{{x[0-9]+}}, #:lo12:second_list] +; CHECK: str [[BLOCK2]], [x[[DEST_LIST]], #24] -; CHECK: str [[BLOCK]], [x[[DEST_LIST]], #24] - -; CHECK-NOFP: str [[BLOCK]], [{{x[0-9]+}}, #:lo12:second_list] - -; CHECK-NOFP: ldr [[BLOCK:x[0-9]+]], [x[[SRC_LIST]], #24] +; CHECK-NOFP: add x[[SRC_LIST:[0-9]+]], {{x[0-9]+}}, #:lo12:var ; CHECK-NOFP: add x[[DEST_LIST:[0-9]+]], {{x[0-9]+}}, #:lo12:second_list - -; CHECK-NOFP: str [[BLOCK]], [x[[DEST_LIST]], #24] +; CHECK-NOFP: ldr [[BLOCK1:x[0-9]+]], [{{x[0-9]+}}, #:lo12:var] +; CHECK-NOFP: ldr [[BLOCK2:x[0-9]+]], [x[[SRC_LIST]], #24] +; CHECK-NOFP: str [[BLOCK1]], [{{x[0-9]+}}, #:lo12:second_list] +; CHECK-NOFP: str [[BLOCK2]], [x[[DEST_LIST]], #24] ret void ; CHECK: ret diff --git a/test/CodeGen/ARM/a15-SD-dep.ll b/test/CodeGen/ARM/a15-SD-dep.ll index 019ff6129b00..5e5ca4b873f3 100644 --- a/test/CodeGen/ARM/a15-SD-dep.ll +++ b/test/CodeGen/ARM/a15-SD-dep.ll @@ -56,3 +56,62 @@ define arm_aapcs_vfpcc <4 x float> @t5(<4 x float> %q, float %f) { %i2 = fadd <4 x float> %i1, %i1 ret <4 x float> %i2 } + +; Test that DPair can be successfully passed as QPR. +; CHECK-ENABLED-LABEL: test_DPair1: +; CHECK-DISABLED-LABEL: test_DPair1: +define void @test_DPair1(i32 %vsout, i8* nocapture %out, float %x, float %y) { +entry: + %0 = insertelement <4 x float> undef, float %x, i32 1 + %1 = insertelement <4 x float> %0, float %y, i32 0 + ; CHECK-ENABLED: vdup.32 d{{[0-9]*}}, d{{[0-9]*}}[0] + ; CHECK-ENABLED: vdup.32 d{{[0-9]*}}, d{{[0-9]*}}[1] + ; CHECK-ENABLED: vdup.32 d{{[0-9]*}}, d{{[0-9]*}}[0] + ; CHECK-ENABLED: vdup.32 d{{[0-9]*}}, d{{[0-9]*}}[1] + ; CHECK-DISABLED-NOT: vdup + switch i32 %vsout, label %sw.epilog [ + i32 1, label %sw.bb + i32 0, label %sw.bb6 + ] + +sw.bb: ; preds = %entry + %2 = insertelement <4 x float> %1, float 0.000000e+00, i32 0 + br label %sw.bb6 + +sw.bb6: ; preds = %sw.bb, %entry + %sum.0 = phi <4 x float> [ %1, %entry ], [ %2, %sw.bb ] + %3 = extractelement <4 x float> %sum.0, i32 0 + %conv = fptoui float %3 to i8 + store i8 %conv, i8* %out, align 1 + ret void + +sw.epilog: ; preds = %entry + ret void +} + +; CHECK-ENABLED-LABEL: test_DPair2: +; CHECK-DISABLED-LABEL: test_DPair2: +define void @test_DPair2(i32 %vsout, i8* nocapture %out, float %x) { +entry: + %0 = insertelement <4 x float> undef, float %x, i32 0 + ; CHECK-ENABLED: vdup.32 q{{[0-9]*}}, d{{[0-9]*}}[0] + ; CHECK-DISABLED-NOT: vdup + switch i32 %vsout, label %sw.epilog [ + i32 1, label %sw.bb + i32 0, label %sw.bb1 + ] + +sw.bb: ; preds = %entry + %1 = insertelement <4 x float> %0, float 0.000000e+00, i32 0 + br label %sw.bb1 + +sw.bb1: ; preds = %entry, %sw.bb + %sum.0 = phi <4 x float> [ %0, %entry ], [ %1, %sw.bb ] + %2 = extractelement <4 x float> %sum.0, i32 0 + %conv = fptoui float %2 to i8 + store i8 %conv, i8* %out, align 1 + br label %sw.epilog + +sw.epilog: ; preds = %entry, %sw.bb1 + ret void +}
\ No newline at end of file diff --git a/test/CodeGen/ARM/vld3.ll b/test/CodeGen/ARM/vld3.ll index 400541fb90a2..d6eb4c2f6dd3 100644 --- a/test/CodeGen/ARM/vld3.ll +++ b/test/CodeGen/ARM/vld3.ll @@ -83,6 +83,19 @@ define <1 x i64> @vld3i64(i64* %A) nounwind { ret <1 x i64> %tmp4 } +define <1 x i64> @vld3i64_update(i64** %ptr, i64* %A) nounwind { +;CHECK-LABEL: vld3i64_update: +;CHECK: vld1.64 {d16, d17, d18}, [r1:64]! + %tmp0 = bitcast i64* %A to i8* + %tmp1 = call %struct.__neon_int64x1x3_t @llvm.arm.neon.vld3.v1i64(i8* %tmp0, i32 16) + %tmp5 = getelementptr i64* %A, i32 3 + store i64* %tmp5, i64** %ptr + %tmp2 = extractvalue %struct.__neon_int64x1x3_t %tmp1, 0 + %tmp3 = extractvalue %struct.__neon_int64x1x3_t %tmp1, 2 + %tmp4 = add <1 x i64> %tmp2, %tmp3 + ret <1 x i64> %tmp4 +} + define <16 x i8> @vld3Qi8(i8* %A) nounwind { ;CHECK-LABEL: vld3Qi8: ;Check the alignment value. Max for this instruction is 64 bits: diff --git a/test/CodeGen/ARM/vld4.ll b/test/CodeGen/ARM/vld4.ll index f7376b503a30..ff162bb022e1 100644 --- a/test/CodeGen/ARM/vld4.ll +++ b/test/CodeGen/ARM/vld4.ll @@ -83,6 +83,19 @@ define <1 x i64> @vld4i64(i64* %A) nounwind { ret <1 x i64> %tmp4 } +define <1 x i64> @vld4i64_update(i64** %ptr, i64* %A) nounwind { +;CHECK-LABEL: vld4i64_update: +;CHECK: vld1.64 {d16, d17, d18, d19}, [r1:256]! + %tmp0 = bitcast i64* %A to i8* + %tmp1 = call %struct.__neon_int64x1x4_t @llvm.arm.neon.vld4.v1i64(i8* %tmp0, i32 64) + %tmp5 = getelementptr i64* %A, i32 4 + store i64* %tmp5, i64** %ptr + %tmp2 = extractvalue %struct.__neon_int64x1x4_t %tmp1, 0 + %tmp3 = extractvalue %struct.__neon_int64x1x4_t %tmp1, 2 + %tmp4 = add <1 x i64> %tmp2, %tmp3 + ret <1 x i64> %tmp4 +} + define <16 x i8> @vld4Qi8(i8* %A) nounwind { ;CHECK-LABEL: vld4Qi8: ;Check the alignment value. Max for this instruction is 256 bits: diff --git a/test/CodeGen/ARM/vst3.ll b/test/CodeGen/ARM/vst3.ll index 91eb7fce2b74..65625de34573 100644 --- a/test/CodeGen/ARM/vst3.ll +++ b/test/CodeGen/ARM/vst3.ll @@ -61,6 +61,18 @@ define void @vst3i64(i64* %A, <1 x i64>* %B) nounwind { ret void } +define void @vst3i64_update(i64** %ptr, <1 x i64>* %B) nounwind { +;CHECK-LABEL: vst3i64_update +;CHECK: vst1.64 {d{{.*}}, d{{.*}}, d{{.*}}}, [r{{.*}}]! + %A = load i64** %ptr + %tmp0 = bitcast i64* %A to i8* + %tmp1 = load <1 x i64>* %B + call void @llvm.arm.neon.vst3.v1i64(i8* %tmp0, <1 x i64> %tmp1, <1 x i64> %tmp1, <1 x i64> %tmp1, i32 1) + %tmp2 = getelementptr i64* %A, i32 3 + store i64* %tmp2, i64** %ptr + ret void +} + define void @vst3Qi8(i8* %A, <16 x i8>* %B) nounwind { ;CHECK-LABEL: vst3Qi8: ;Check the alignment value. Max for this instruction is 64 bits: diff --git a/test/CodeGen/ARM/vst4.ll b/test/CodeGen/ARM/vst4.ll index ef5c83a57dbb..83a6c7048650 100644 --- a/test/CodeGen/ARM/vst4.ll +++ b/test/CodeGen/ARM/vst4.ll @@ -60,6 +60,18 @@ define void @vst4i64(i64* %A, <1 x i64>* %B) nounwind { ret void } +define void @vst4i64_update(i64** %ptr, <1 x i64>* %B) nounwind { +;CHECK-LABEL: vst4i64_update: +;CHECK: vst1.64 {d16, d17, d18, d19}, [r1]! + %A = load i64** %ptr + %tmp0 = bitcast i64* %A to i8* + %tmp1 = load <1 x i64>* %B + call void @llvm.arm.neon.vst4.v1i64(i8* %tmp0, <1 x i64> %tmp1, <1 x i64> %tmp1, <1 x i64> %tmp1, <1 x i64> %tmp1, i32 1) + %tmp2 = getelementptr i64* %A, i32 4 + store i64* %tmp2, i64** %ptr + ret void +} + define void @vst4Qi8(i8* %A, <16 x i8>* %B) nounwind { ;CHECK-LABEL: vst4Qi8: ;Check the alignment value. Max for this instruction is 256 bits: diff --git a/test/CodeGen/PowerPC/anon_aggr.ll b/test/CodeGen/PowerPC/anon_aggr.ll index 1525e05501ee..ce07d8845ddb 100644 --- a/test/CodeGen/PowerPC/anon_aggr.ll +++ b/test/CodeGen/PowerPC/anon_aggr.ll @@ -119,9 +119,9 @@ unequal: ; CHECK: ld 3, -[[OFFSET1]](1) ; DARWIN32: _func3: -; DARWIN32: addi r[[REG1:[0-9]+]], r[[REGSP:[0-9]+]], 40 +; DARWIN32: addi r[[REG1:[0-9]+]], r[[REGSP:[0-9]+]], 36 ; DARWIN32: addi r[[REG2:[0-9]+]], r[[REGSP]], 24 -; DARWIN32: lwz r[[REG3:[0-9]+]], 48(r[[REGSP]]) +; DARWIN32: lwz r[[REG3:[0-9]+]], 44(r[[REGSP]]) ; DARWIN32: lwz r[[REG4:[0-9]+]], 32(r[[REGSP]]) ; DARWIN32: cmplw cr{{[0-9]+}}, r[[REG4]], r[[REG3]] ; DARWIN32: stw r[[REG3]], -[[OFFSET1:[0-9]+]] diff --git a/test/CodeGen/PowerPC/byval-agg-info.ll b/test/CodeGen/PowerPC/byval-agg-info.ll new file mode 100644 index 000000000000..89ad8e4dcf98 --- /dev/null +++ b/test/CodeGen/PowerPC/byval-agg-info.ll @@ -0,0 +1,17 @@ +; RUN: llc < %s -print-after=prologepilog >%t 2>&1 && FileCheck <%t %s +target datalayout = "E-m:e-i64:64-n32:64" +target triple = "powerpc64-unknown-linux-gnu" + +%struct.anon = type { i32, i32 } + +declare void @foo(%struct.anon* %v) +define void @test(i32 %a, i32 %b, %struct.anon* byval nocapture %v) { +entry: + call void @foo(%struct.anon* %v) + ret void +} + +; Make sure that the MMO on the store has no offset from the byval +; variable itself (we used to have mem:ST8[%v+64]). +; CHECK: STD %X5<kill>, 176, %X1; mem:ST8[%v](align=16) + diff --git a/test/CodeGen/PowerPC/cc.ll b/test/CodeGen/PowerPC/cc.ll new file mode 100644 index 000000000000..ab724f5a7e2d --- /dev/null +++ b/test/CodeGen/PowerPC/cc.ll @@ -0,0 +1,70 @@ +; RUN: llc -mcpu=pwr7 < %s | FileCheck %s +target datalayout = "E-m:e-i64:64-n32:64" +target triple = "powerpc64-unknown-linux-gnu" + +define i64 @test1(i64 %a, i64 %b) { +entry: + %c = icmp eq i64 %a, %b + br label %foo + +foo: + call { i64, i64 } asm sideeffect "sc", "={r0},={r3},{r0},~{cr0},~{cr1},~{cr2},~{cr3},~{cr4},~{cr5},~{cr6},~{cr7}" (i64 %a) + br i1 %c, label %bar, label %end + +bar: + ret i64 %b + +end: + ret i64 %a + +; CHECK-LABEL: @test1 +; CHECK: mfcr [[REG1:[0-9]+]] +; CHECK-DAG: cmpld +; CHECK-DAG: mfocrf [[REG2:[0-9]+]], +; CHECK-DAG: stw [[REG1]], 8(1) +; CHECK-DAG: stw [[REG2]], -4(1) + +; CHECK: sc +; CHECK: lwz [[REG3:[0-9]+]], -4(1) +; CHECK: mtocrf 128, [[REG3]] + +; CHECK: lwz [[REG4:[0-9]+]], 8(1) +; CHECK-DAG: mtocrf 32, [[REG4]] +; CHECK-DAG: mtocrf 16, [[REG4]] +; CHECK-DAG: mtocrf 8, [[REG4]] +; CHECK: blr +} + +define i64 @test2(i64 %a, i64 %b) { +entry: + %c = icmp eq i64 %a, %b + br label %foo + +foo: + call { i64, i64 } asm sideeffect "sc", "={r0},={r3},{r0},~{cc}" (i64 %a) + br i1 %c, label %bar, label %end + +bar: + ret i64 %b + +end: + ret i64 %a + +; CHECK-LABEL: @test2 +; CHECK: mfcr [[REG1:[0-9]+]] +; CHECK-DAG: cmpld +; CHECK-DAG: mfocrf [[REG2:[0-9]+]], +; CHECK-DAG: stw [[REG1]], 8(1) +; CHECK-DAG: stw [[REG2]], -4(1) + +; CHECK: sc +; CHECK: lwz [[REG3:[0-9]+]], -4(1) +; CHECK: mtocrf 128, [[REG3]] + +; CHECK: lwz [[REG4:[0-9]+]], 8(1) +; CHECK-DAG: mtocrf 32, [[REG4]] +; CHECK-DAG: mtocrf 16, [[REG4]] +; CHECK-DAG: mtocrf 8, [[REG4]] +; CHECK: blr +} + diff --git a/test/CodeGen/PowerPC/ctrloop-udivti3.ll b/test/CodeGen/PowerPC/ctrloop-udivti3.ll new file mode 100644 index 000000000000..d07a11fe60fb --- /dev/null +++ b/test/CodeGen/PowerPC/ctrloop-udivti3.ll @@ -0,0 +1,31 @@ +; RUN: llc < %s -march=ppc64 | FileCheck %s +target datalayout = "E-m:e-i64:64-n32:64" +target triple = "powerpc64-unknown-linux-gnu" + +; Function Attrs: nounwind +define hidden void @_mpd_shortdiv(i64 %n) #0 { +entry: + br i1 undef, label %for.end, label %for.body.lr.ph + +for.body.lr.ph: ; preds = %entry + br label %for.body + +for.body: ; preds = %for.body, %for.body.lr.ph + %i.018.in = phi i64 [ %n, %for.body.lr.ph ], [ %i.018, %for.body ] + %i.018 = add i64 %i.018.in, -1 + %add.i = or i128 undef, undef + %div.i = udiv i128 %add.i, 0 + %conv3.i11 = trunc i128 %div.i to i64 + store i64 %conv3.i11, i64* undef, align 8 + %cmp = icmp eq i64 %i.018, 0 + br i1 %cmp, label %for.end, label %for.body + +for.end: ; preds = %for.body, %entry + ret void +} + +; CHECK-LABEL: @_mpd_shortdiv +; CHECK-NOT: mtctr + +attributes #0 = { nounwind } + diff --git a/test/CodeGen/PowerPC/fast-isel-conversion-p5.ll b/test/CodeGen/PowerPC/fast-isel-conversion-p5.ll new file mode 100644 index 000000000000..db0d8ed0ffa4 --- /dev/null +++ b/test/CodeGen/PowerPC/fast-isel-conversion-p5.ll @@ -0,0 +1,153 @@ +; RUN: llc < %s -O0 -verify-machineinstrs -fast-isel-abort -mtriple=powerpc64-unknown-linux-gnu -mcpu=pwr5 | FileCheck %s --check-prefix=ELF64 + +; Test sitofp + +define void @sitofp_double_i32(i32 %a, double %b) nounwind ssp { +entry: +; ELF64: sitofp_double_i32 + %b.addr = alloca double, align 8 + %conv = sitofp i32 %a to double +; ELF64: std {{[0-9]+}}, -[[OFFSET:[0-9]+]](1) +; ELF64: lfd {{[0-9]+}}, -[[OFFSET]](1) +; ELF64: fcfid + store double %conv, double* %b.addr, align 8 + ret void +} + +define void @sitofp_double_i64(i64 %a, double %b) nounwind ssp { +entry: +; ELF64: sitofp_double_i64 + %b.addr = alloca double, align 8 + %conv = sitofp i64 %a to double +; ELF64: std {{[0-9]+}}, -[[OFFSET:[0-9]+]](1) +; ELF64: lfd {{[0-9]+}}, -[[OFFSET]](1) +; ELF64: fcfid + store double %conv, double* %b.addr, align 8 + ret void +} + +define void @sitofp_double_i16(i16 %a, double %b) nounwind ssp { +entry: +; ELF64: sitofp_double_i16 + %b.addr = alloca double, align 8 + %conv = sitofp i16 %a to double +; ELF64: extsh +; ELF64: std {{[0-9]+}}, -[[OFFSET:[0-9]+]](1) +; ELF64: lfd {{[0-9]+}}, -[[OFFSET]](1) +; ELF64: fcfid + store double %conv, double* %b.addr, align 8 + ret void +} + +define void @sitofp_double_i8(i8 %a, double %b) nounwind ssp { +entry: +; ELF64: sitofp_double_i8 + %b.addr = alloca double, align 8 + %conv = sitofp i8 %a to double +; ELF64: extsb +; ELF64: std {{[0-9]+}}, -[[OFFSET:[0-9]+]](1) +; ELF64: lfd {{[0-9]+}}, -[[OFFSET]](1) +; ELF64: fcfid + store double %conv, double* %b.addr, align 8 + ret void +} + +; Test fptosi + +define void @fptosi_float_i32(float %a) nounwind ssp { +entry: +; ELF64: fptosi_float_i32 + %b.addr = alloca i32, align 4 + %conv = fptosi float %a to i32 +; ELF64: fctiwz +; ELF64: stfd +; ELF64: lwa + store i32 %conv, i32* %b.addr, align 4 + ret void +} + +define void @fptosi_float_i64(float %a) nounwind ssp { +entry: +; ELF64: fptosi_float_i64 + %b.addr = alloca i64, align 4 + %conv = fptosi float %a to i64 +; ELF64: fctidz +; ELF64: stfd +; ELF64: ld + store i64 %conv, i64* %b.addr, align 4 + ret void +} + +define void @fptosi_double_i32(double %a) nounwind ssp { +entry: +; ELF64: fptosi_double_i32 + %b.addr = alloca i32, align 8 + %conv = fptosi double %a to i32 +; ELF64: fctiwz +; ELF64: stfd +; ELF64: lwa + store i32 %conv, i32* %b.addr, align 8 + ret void +} + +define void @fptosi_double_i64(double %a) nounwind ssp { +entry: +; ELF64: fptosi_double_i64 + %b.addr = alloca i64, align 8 + %conv = fptosi double %a to i64 +; ELF64: fctidz +; ELF64: stfd +; ELF64: ld + store i64 %conv, i64* %b.addr, align 8 + ret void +} + +; Test fptoui + +define void @fptoui_float_i32(float %a) nounwind ssp { +entry: +; ELF64: fptoui_float_i32 + %b.addr = alloca i32, align 4 + %conv = fptoui float %a to i32 +; ELF64: fctidz +; ELF64: stfd +; ELF64: lwz + store i32 %conv, i32* %b.addr, align 4 + ret void +} + +define void @fptoui_float_i64(float %a) nounwind ssp { +entry: +; ELF64: fptoui_float_i64 + %b.addr = alloca i64, align 4 + %conv = fptoui float %a to i64 +; ELF64: fctiduz +; ELF64: stfd +; ELF64: ld + store i64 %conv, i64* %b.addr, align 4 + ret void +} + +define void @fptoui_double_i32(double %a) nounwind ssp { +entry: +; ELF64: fptoui_double_i32 + %b.addr = alloca i32, align 8 + %conv = fptoui double %a to i32 +; ELF64: fctidz +; ELF64: stfd +; ELF64: lwz + store i32 %conv, i32* %b.addr, align 8 + ret void +} + +define void @fptoui_double_i64(double %a) nounwind ssp { +entry: +; ELF64: fptoui_double_i64 + %b.addr = alloca i64, align 8 + %conv = fptoui double %a to i64 +; ELF64: fctiduz +; ELF64: stfd +; ELF64: ld + store i64 %conv, i64* %b.addr, align 8 + ret void +} diff --git a/test/CodeGen/PowerPC/spill-nor0.ll b/test/CodeGen/PowerPC/spill-nor0.ll new file mode 100644 index 000000000000..65bdc0914350 --- /dev/null +++ b/test/CodeGen/PowerPC/spill-nor0.ll @@ -0,0 +1,23 @@ +; RUN: llc < %s -O0 -mcpu=ppc64 | FileCheck %s +target datalayout = "E-m:e-i64:64-n32:64" +target triple = "powerpc64-unknown-linux-gnu" + +; Function Attrs: nounwind +define void @_ZN4llvm3sys17RunningOnValgrindEv() #0 { +entry: + br i1 undef, label %if.then, label %if.end + +if.then: ; preds = %entry + ret void + +if.end: ; preds = %entry + %0 = call i64 asm sideeffect "mr 3,$1\0A\09mr 4,$2\0A\09rotldi 0,0,3 ; rotldi 0,0,13\0A\09rotldi 0,0,61 ; rotldi 0,0,51\0A\09or 1,1,1\0A\09mr $0,3", "=b,b,b,~{cc},~{memory},~{r3},~{r4}"(i32 0, i64* undef) #0 + unreachable + +; CHECK-LABEL: @_ZN4llvm3sys17RunningOnValgrindEv +; CHECK: stw +; CHECK: lwz +} + +attributes #0 = { nounwind } + diff --git a/test/CodeGen/PowerPC/weak_def_can_be_hidden.ll b/test/CodeGen/PowerPC/weak_def_can_be_hidden.ll new file mode 100644 index 000000000000..130d8faaf8bc --- /dev/null +++ b/test/CodeGen/PowerPC/weak_def_can_be_hidden.ll @@ -0,0 +1,38 @@ +; taken from X86 version of the same test +; RUN: llc -mtriple=powerpc-apple-darwin10 -O0 < %s | FileCheck %s +; RUN: llc -mtriple=powerpc-apple-darwin9 -O0 < %s | FileCheck --check-prefix=CHECK-D89 %s +; RUN: llc -mtriple=powerpc-apple-darwin8 -O0 < %s | FileCheck --check-prefix=CHECK-D89 %s + +@v1 = linkonce_odr global i32 32 +; CHECK: .globl _v1 +; CHECK: .weak_def_can_be_hidden _v1 + +; CHECK-D89: .globl _v1 +; CHECK-D89: .weak_definition _v1 + +define i32 @f1() { + %x = load i32 * @v1 + ret i32 %x +} + +@v2 = linkonce_odr global i32 32 +; CHECK: .globl _v2 +; CHECK: .weak_definition _v2 + +; CHECK-D89: .globl _v2 +; CHECK-D89: .weak_definition _v2 + +@v3 = linkonce_odr unnamed_addr global i32 32 +; CHECK: .globl _v3 +; CHECK: .weak_def_can_be_hidden _v3 + +; CHECK-D89: .globl _v3 +; CHECK-D89: .weak_definition _v3 + +define i32* @f2() { + ret i32* @v2 +} + +define i32* @f3() { + ret i32* @v3 +} diff --git a/test/CodeGen/R600/bfe_uint.ll b/test/CodeGen/R600/bfe_uint.ll index 92570c315299..fe466e6ad5fd 100644 --- a/test/CodeGen/R600/bfe_uint.ll +++ b/test/CodeGen/R600/bfe_uint.ll @@ -1,5 +1,7 @@ ; RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s +; XFAIL: * + ; CHECK: @bfe_def ; CHECK: BFE_UINT define void @bfe_def(i32 addrspace(1)* %out, i32 %x) { diff --git a/test/CodeGen/R600/fabs.ll b/test/CodeGen/R600/fabs.ll index a5f5df96b5d9..2cd3a4f604f2 100644 --- a/test/CodeGen/R600/fabs.ll +++ b/test/CodeGen/R600/fabs.ll @@ -9,7 +9,7 @@ ; R600-CHECK-NOT: AND ; R600-CHECK: |PV.{{[XYZW]}}| ; SI-CHECK-LABEL: @fabs_free -; SI-CHECK: V_ADD_F32_e64 v{{[0-9]}}, s{{[0-9]}}, 0, 1, 0, 0, 0 +; SI-CHECK: V_AND_B32 define void @fabs_free(float addrspace(1)* %out, i32 %in) { entry: @@ -23,8 +23,8 @@ entry: ; R600-CHECK: |{{(PV|T[0-9])\.[XYZW]}}| ; R600-CHECK: |{{(PV|T[0-9])\.[XYZW]}}| ; SI-CHECK-LABEL: @fabs_v2 -; SI-CHECK: V_ADD_F32_e64 v{{[0-9]}}, s{{[0-9]}}, 0, 1, 0, 0, 0 -; SI-CHECK: V_ADD_F32_e64 v{{[0-9]}}, s{{[0-9]}}, 0, 1, 0, 0, 0 +; SI-CHECK: V_AND_B32 +; SI-CHECK: V_AND_B32 define void @fabs_v2(<2 x float> addrspace(1)* %out, <2 x float> %in) { entry: %0 = call <2 x float> @llvm.fabs.v2f32(<2 x float> %in) @@ -38,10 +38,10 @@ entry: ; R600-CHECK: |{{(PV|T[0-9])\.[XYZW]}}| ; R600-CHECK: |{{(PV|T[0-9])\.[XYZW]}}| ; SI-CHECK-LABEL: @fabs_v4 -; SI-CHECK: V_ADD_F32_e64 v{{[0-9]}}, s{{[0-9]}}, 0, 1, 0, 0, 0 -; SI-CHECK: V_ADD_F32_e64 v{{[0-9]}}, s{{[0-9]}}, 0, 1, 0, 0, 0 -; SI-CHECK: V_ADD_F32_e64 v{{[0-9]}}, s{{[0-9]}}, 0, 1, 0, 0, 0 -; SI-CHECK: V_ADD_F32_e64 v{{[0-9]}}, s{{[0-9]}}, 0, 1, 0, 0, 0 +; SI-CHECK: V_AND_B32 +; SI-CHECK: V_AND_B32 +; SI-CHECK: V_AND_B32 +; SI-CHECK: V_AND_B32 define void @fabs_v4(<4 x float> addrspace(1)* %out, <4 x float> %in) { entry: %0 = call <4 x float> @llvm.fabs.v4f32(<4 x float> %in) diff --git a/test/CodeGen/R600/fneg-fabs.ll b/test/CodeGen/R600/fneg-fabs.ll new file mode 100644 index 000000000000..d95e1311bc10 --- /dev/null +++ b/test/CodeGen/R600/fneg-fabs.ll @@ -0,0 +1,55 @@ +; RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s --check-prefix=R600-CHECK +; RUN: llc < %s -march=r600 -mcpu=SI -verify-machineinstrs | FileCheck %s --check-prefix=SI-CHECK + +; DAGCombiner will transform: +; (fabs (f32 bitcast (i32 a))) => (f32 bitcast (and (i32 a), 0x7FFFFFFF)) +; unless isFabsFree returns true + +; R600-CHECK-LABEL: @fneg_fabs_free +; R600-CHECK-NOT: AND +; R600-CHECK: |PV.{{[XYZW]}}| +; R600-CHECK: -PV +; SI-CHECK-LABEL: @fneg_fabs_free +; SI-CHECK: V_OR_B32 + +define void @fneg_fabs_free(float addrspace(1)* %out, i32 %in) { +entry: + %0 = bitcast i32 %in to float + %1 = call float @fabs(float %0) + %2 = fsub float -0.000000e+00, %1 + store float %2, float addrspace(1)* %out + ret void +} + +; R600-CHECK-LABEL: @fneg_fabs_v2 +; R600-CHECK: |{{(PV|T[0-9])\.[XYZW]}}| +; R600-CHECK: -PV +; R600-CHECK: |{{(PV|T[0-9])\.[XYZW]}}| +; R600-CHECK: -PV +; SI-CHECK-LABEL: @fneg_fabs_v2 +; SI-CHECK: V_OR_B32 +; SI-CHECK: V_OR_B32 +define void @fneg_fabs_v2(<2 x float> addrspace(1)* %out, <2 x float> %in) { +entry: + %0 = call <2 x float> @llvm.fabs.v2f32(<2 x float> %in) + %1 = fsub <2 x float> <float -0.000000e+00, float -0.000000e+00>, %0 + store <2 x float> %1, <2 x float> addrspace(1)* %out + ret void +} + +; SI-CHECK-LABEL: @fneg_fabs_v4 +; SI-CHECK: V_OR_B32 +; SI-CHECK: V_OR_B32 +; SI-CHECK: V_OR_B32 +; SI-CHECK: V_OR_B32 +define void @fneg_fabs_v4(<4 x float> addrspace(1)* %out, <4 x float> %in) { +entry: + %0 = call <4 x float> @llvm.fabs.v4f32(<4 x float> %in) + %1 = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %0 + store <4 x float> %1, <4 x float> addrspace(1)* %out + ret void +} + +declare float @fabs(float ) readnone +declare <2 x float> @llvm.fabs.v2f32(<2 x float> ) readnone +declare <4 x float> @llvm.fabs.v4f32(<4 x float> ) readnone diff --git a/test/CodeGen/R600/fneg.ll b/test/CodeGen/R600/fneg.ll index 9446aa8ea9c3..f4e6be62467a 100644 --- a/test/CodeGen/R600/fneg.ll +++ b/test/CodeGen/R600/fneg.ll @@ -4,7 +4,7 @@ ; R600-CHECK-LABEL: @fneg ; R600-CHECK: -PV ; SI-CHECK-LABEL: @fneg -; SI-CHECK: V_ADD_F32_e64 v{{[0-9]}}, s{{[0-9]}}, 0, 0, 0, 0, 1 +; SI-CHECK: V_XOR_B32 define void @fneg(float addrspace(1)* %out, float %in) { entry: %0 = fsub float -0.000000e+00, %in @@ -16,8 +16,8 @@ entry: ; R600-CHECK: -PV ; R600-CHECK: -PV ; SI-CHECK-LABEL: @fneg_v2 -; SI-CHECK: V_ADD_F32_e64 v{{[0-9]}}, s{{[0-9]}}, 0, 0, 0, 0, 1 -; SI-CHECK: V_ADD_F32_e64 v{{[0-9]}}, s{{[0-9]}}, 0, 0, 0, 0, 1 +; SI-CHECK: V_XOR_B32 +; SI-CHECK: V_XOR_B32 define void @fneg_v2(<2 x float> addrspace(1)* nocapture %out, <2 x float> %in) { entry: %0 = fsub <2 x float> <float -0.000000e+00, float -0.000000e+00>, %in @@ -31,10 +31,10 @@ entry: ; R600-CHECK: -PV ; R600-CHECK: -PV ; SI-CHECK-LABEL: @fneg_v4 -; SI-CHECK: V_ADD_F32_e64 v{{[0-9]}}, s{{[0-9]}}, 0, 0, 0, 0, 1 -; SI-CHECK: V_ADD_F32_e64 v{{[0-9]}}, s{{[0-9]}}, 0, 0, 0, 0, 1 -; SI-CHECK: V_ADD_F32_e64 v{{[0-9]}}, s{{[0-9]}}, 0, 0, 0, 0, 1 -; SI-CHECK: V_ADD_F32_e64 v{{[0-9]}}, s{{[0-9]}}, 0, 0, 0, 0, 1 +; SI-CHECK: V_XOR_B32 +; SI-CHECK: V_XOR_B32 +; SI-CHECK: V_XOR_B32 +; SI-CHECK: V_XOR_B32 define void @fneg_v4(<4 x float> addrspace(1)* nocapture %out, <4 x float> %in) { entry: %0 = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %in diff --git a/test/CodeGen/R600/lds-oqap-crash.ll b/test/CodeGen/R600/lds-oqap-crash.ll new file mode 100644 index 000000000000..79591506e8cb --- /dev/null +++ b/test/CodeGen/R600/lds-oqap-crash.ll @@ -0,0 +1,28 @@ +; RUN: llc < %s -march=r600 -mcpu=redwood -verify-machineinstrs | FileCheck %s + +; The test is for a bug in R600EmitClauseMarkers.cpp where this pass +; was searching for a use of the OQAP register in order to determine +; if an LDS instruction could fit in the current clause, but never finding +; one. This created an infinite loop and hung the compiler. +; +; The LDS instruction should not have been defining OQAP in the first place, +; because the LDS instructions are pseudo instructions and the OQAP +; reads and writes are bundled together in the same instruction. + +; CHECK: @lds_crash +define void @lds_crash(i32 addrspace(1)* %out, i32 addrspace(3)* %in, i32 %a, i32 %b, i32 %c) { +entry: + %0 = load i32 addrspace(3)* %in + ; This block needs to be > 115 ISA instructions to hit the bug, + ; so we'll use udiv instructions. + %div0 = udiv i32 %0, %b + %div1 = udiv i32 %div0, %a + %div2 = udiv i32 %div1, 11 + %div3 = udiv i32 %div2, %a + %div4 = udiv i32 %div3, %b + %div5 = udiv i32 %div4, %c + %div6 = udiv i32 %div5, %div0 + %div7 = udiv i32 %div6, %div1 + store i32 %div7, i32 addrspace(1)* %out + ret void +} diff --git a/test/CodeGen/R600/llvm.AMDGPU.kill.ll b/test/CodeGen/R600/llvm.AMDGPU.kill.ll new file mode 100644 index 000000000000..bec5cdf65f1b --- /dev/null +++ b/test/CodeGen/R600/llvm.AMDGPU.kill.ll @@ -0,0 +1,18 @@ +; RUN: llc < %s -march=r600 -mcpu=verde -verify-machineinstrs | FileCheck --check-prefix=SI %s + +; SI-LABEL: @kill_gs +; SI: V_CMPX_LE_F32 + +define void @kill_gs() #0 { +main_body: + %0 = icmp ule i32 0, 3 + %1 = select i1 %0, float 1.000000e+00, float -1.000000e+00 + call void @llvm.AMDGPU.kill(float %1) + ret void +} + +declare void @llvm.AMDGPU.kill(float) + +attributes #0 = { "ShaderType"="2" } + +!0 = metadata !{metadata !"const", null, i32 1} diff --git a/test/CodeGen/R600/llvm.SI.load.dword.ll b/test/CodeGen/R600/llvm.SI.load.dword.ll new file mode 100644 index 000000000000..a6227755b72e --- /dev/null +++ b/test/CodeGen/R600/llvm.SI.load.dword.ll @@ -0,0 +1,40 @@ +;RUN: llc < %s -march=r600 -mcpu=verde -verify-machineinstrs | FileCheck %s + +; Example of a simple geometry shader loading vertex attributes from the +; ESGS ring buffer + +; CHECK-LABEL: @main +; CHECK: BUFFER_LOAD_DWORD +; CHECK: BUFFER_LOAD_DWORD +; CHECK: BUFFER_LOAD_DWORD +; CHECK: BUFFER_LOAD_DWORD + +define void @main([17 x <16 x i8>] addrspace(2)* byval, [32 x <16 x i8>] addrspace(2)* byval, [16 x <32 x i8>] addrspace(2)* byval, [2 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* inreg, [17 x <16 x i8>] addrspace(2)* inreg, i32, i32, i32, i32) #0 { +main_body: + %10 = getelementptr [2 x <16 x i8>] addrspace(2)* %3, i64 0, i32 1 + %11 = load <16 x i8> addrspace(2)* %10, !tbaa !0 + %12 = shl i32 %6, 2 + %13 = call i32 @llvm.SI.buffer.load.dword.i32.i32(<16 x i8> %11, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 0) + %14 = bitcast i32 %13 to float + %15 = call i32 @llvm.SI.buffer.load.dword.i32.i32(<16 x i8> %11, i32 %12, i32 0, i32 0, i32 1, i32 0, i32 1, i32 1, i32 0) + %16 = bitcast i32 %15 to float + %17 = call i32 @llvm.SI.buffer.load.dword.i32.i32(<16 x i8> %11, i32 %12, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 0) + %18 = bitcast i32 %17 to float + %19 = call i32 @llvm.SI.buffer.load.dword.i32.v2i32(<16 x i8> %11, <2 x i32> <i32 0, i32 0>, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 0) + %20 = bitcast i32 %19 to float + call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %14, float %16, float %18, float %20) + ret void +} + +; Function Attrs: nounwind readonly +declare i32 @llvm.SI.buffer.load.dword.i32.i32(<16 x i8>, i32, i32, i32, i32, i32, i32, i32, i32) #1 + +; Function Attrs: nounwind readonly +declare i32 @llvm.SI.buffer.load.dword.i32.v2i32(<16 x i8>, <2 x i32>, i32, i32, i32, i32, i32, i32, i32) #1 + +declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) + +attributes #0 = { "ShaderType"="1" } +attributes #1 = { nounwind readonly } + +!0 = metadata !{metadata !"const", null, i32 1} diff --git a/test/CodeGen/R600/llvm.SI.sendmsg.ll b/test/CodeGen/R600/llvm.SI.sendmsg.ll new file mode 100644 index 000000000000..cfcc7c4e40ee --- /dev/null +++ b/test/CodeGen/R600/llvm.SI.sendmsg.ll @@ -0,0 +1,21 @@ +;RUN: llc < %s -march=r600 -mcpu=verde -verify-machineinstrs | FileCheck %s + +; CHECK-LABEL: @main +; CHECK: S_SENDMSG 34 +; CHECK: S_SENDMSG 274 +; CHECK: S_SENDMSG 562 +; CHECK: S_SENDMSG 3 + +define void @main() { +main_body: + call void @llvm.SI.sendmsg(i32 34, i32 0); + call void @llvm.SI.sendmsg(i32 274, i32 0); + call void @llvm.SI.sendmsg(i32 562, i32 0); + call void @llvm.SI.sendmsg(i32 3, i32 0); + ret void +} + +; Function Attrs: nounwind +declare void @llvm.SI.sendmsg(i32, i32) #0 + +attributes #0 = { nounwind } diff --git a/test/CodeGen/R600/load.ll b/test/CodeGen/R600/load.ll index e4492d7d6e7b..0153524d136c 100644 --- a/test/CodeGen/R600/load.ll +++ b/test/CodeGen/R600/load.ll @@ -445,6 +445,7 @@ define void @load_const_addrspace_f32(float addrspace(1)* %out, float addrspace( ; R600-CHECK: LDS_UBYTE_READ_RET ; SI-CHECK-LABEL: @load_i8_local ; SI-CHECK-NOT: S_WQM_B64 +; SI-CHECK: S_MOV_B32 m0 ; SI-CHECK: DS_READ_U8 define void @load_i8_local(i32 addrspace(1)* %out, i8 addrspace(3)* %in) { %1 = load i8 addrspace(3)* %in @@ -458,6 +459,7 @@ define void @load_i8_local(i32 addrspace(1)* %out, i8 addrspace(3)* %in) { ; R600-CHECK: ASHR ; SI-CHECK-LABEL: @load_i8_sext_local ; SI-CHECK-NOT: S_WQM_B64 +; SI-CHECK: S_MOV_B32 m0 ; SI-CHECK: DS_READ_I8 define void @load_i8_sext_local(i32 addrspace(1)* %out, i8 addrspace(3)* %in) { entry: @@ -472,6 +474,7 @@ entry: ; R600-CHECK: LDS_UBYTE_READ_RET ; SI-CHECK-LABEL: @load_v2i8_local ; SI-CHECK-NOT: S_WQM_B64 +; SI-CHECK: S_MOV_B32 m0 ; SI-CHECK: DS_READ_U8 ; SI-CHECK: DS_READ_U8 define void @load_v2i8_local(<2 x i32> addrspace(1)* %out, <2 x i8> addrspace(3)* %in) { @@ -489,6 +492,7 @@ entry: ; R600-CHECK-DAG: ASHR ; SI-CHECK-LABEL: @load_v2i8_sext_local ; SI-CHECK-NOT: S_WQM_B64 +; SI-CHECK: S_MOV_B32 m0 ; SI-CHECK: DS_READ_I8 ; SI-CHECK: DS_READ_I8 define void @load_v2i8_sext_local(<2 x i32> addrspace(1)* %out, <2 x i8> addrspace(3)* %in) { @@ -506,6 +510,7 @@ entry: ; R600-CHECK: LDS_UBYTE_READ_RET ; SI-CHECK-LABEL: @load_v4i8_local ; SI-CHECK-NOT: S_WQM_B64 +; SI-CHECK: S_MOV_B32 m0 ; SI-CHECK: DS_READ_U8 ; SI-CHECK: DS_READ_U8 ; SI-CHECK: DS_READ_U8 @@ -529,6 +534,7 @@ entry: ; R600-CHECK-DAG: ASHR ; SI-CHECK-LABEL: @load_v4i8_sext_local ; SI-CHECK-NOT: S_WQM_B64 +; SI-CHECK: S_MOV_B32 m0 ; SI-CHECK: DS_READ_I8 ; SI-CHECK: DS_READ_I8 ; SI-CHECK: DS_READ_I8 @@ -546,6 +552,7 @@ entry: ; R600-CHECK: LDS_USHORT_READ_RET ; SI-CHECK-LABEL: @load_i16_local ; SI-CHECK-NOT: S_WQM_B64 +; SI-CHECK: S_MOV_B32 m0 ; SI-CHECK: DS_READ_U16 define void @load_i16_local(i32 addrspace(1)* %out, i16 addrspace(3)* %in) { entry: @@ -560,6 +567,7 @@ entry: ; R600-CHECK: ASHR ; SI-CHECK-LABEL: @load_i16_sext_local ; SI-CHECK-NOT: S_WQM_B64 +; SI-CHECK: S_MOV_B32 m0 ; SI-CHECK: DS_READ_I16 define void @load_i16_sext_local(i32 addrspace(1)* %out, i16 addrspace(3)* %in) { entry: @@ -574,6 +582,7 @@ entry: ; R600-CHECK: LDS_USHORT_READ_RET ; SI-CHECK-LABEL: @load_v2i16_local ; SI-CHECK-NOT: S_WQM_B64 +; SI-CHECK: S_MOV_B32 m0 ; SI-CHECK: DS_READ_U16 ; SI-CHECK: DS_READ_U16 define void @load_v2i16_local(<2 x i32> addrspace(1)* %out, <2 x i16> addrspace(3)* %in) { @@ -591,6 +600,7 @@ entry: ; R600-CHECK-DAG: ASHR ; SI-CHECK-LABEL: @load_v2i16_sext_local ; SI-CHECK-NOT: S_WQM_B64 +; SI-CHECK: S_MOV_B32 m0 ; SI-CHECK: DS_READ_I16 ; SI-CHECK: DS_READ_I16 define void @load_v2i16_sext_local(<2 x i32> addrspace(1)* %out, <2 x i16> addrspace(3)* %in) { @@ -608,6 +618,7 @@ entry: ; R600-CHECK: LDS_USHORT_READ_RET ; SI-CHECK-LABEL: @load_v4i16_local ; SI-CHECK-NOT: S_WQM_B64 +; SI-CHECK: S_MOV_B32 m0 ; SI-CHECK: DS_READ_U16 ; SI-CHECK: DS_READ_U16 ; SI-CHECK: DS_READ_U16 @@ -631,6 +642,7 @@ entry: ; R600-CHECK-DAG: ASHR ; SI-CHECK-LABEL: @load_v4i16_sext_local ; SI-CHECK-NOT: S_WQM_B64 +; SI-CHECK: S_MOV_B32 m0 ; SI-CHECK: DS_READ_I16 ; SI-CHECK: DS_READ_I16 ; SI-CHECK: DS_READ_I16 @@ -643,11 +655,12 @@ entry: ret void } -; load an i32 value from the glocal address space. +; load an i32 value from the local address space. ; R600-CHECK-LABEL: @load_i32_local ; R600-CHECK: LDS_READ_RET ; SI-CHECK-LABEL: @load_i32_local ; SI-CHECK-NOT: S_WQM_B64 +; SI-CHECK: S_MOV_B32 m0 ; SI-CHECK: DS_READ_B32 define void @load_i32_local(i32 addrspace(1)* %out, i32 addrspace(3)* %in) { entry: @@ -656,10 +669,11 @@ entry: ret void } -; load a f32 value from the global address space. +; load a f32 value from the local address space. ; R600-CHECK-LABEL: @load_f32_local ; R600-CHECK: LDS_READ_RET ; SI-CHECK-LABEL: @load_f32_local +; SI-CHECK: S_MOV_B32 m0 ; SI-CHECK: DS_READ_B32 define void @load_f32_local(float addrspace(1)* %out, float addrspace(3)* %in) { entry: @@ -673,6 +687,7 @@ entry: ; R600-CHECK: LDS_READ_RET ; R600-CHECK: LDS_READ_RET ; SI-CHECK-LABEL: @load_v2f32_local +; SI-CHECK: S_MOV_B32 m0 ; SI-CHECK: DS_READ_B32 ; SI-CHECK: DS_READ_B32 define void @load_v2f32_local(<2 x float> addrspace(1)* %out, <2 x float> addrspace(3)* %in) { diff --git a/test/CodeGen/R600/trunc.ll b/test/CodeGen/R600/trunc.ll index 0bd320ad9ceb..6bbd7f7b510e 100644 --- a/test/CodeGen/R600/trunc.ll +++ b/test/CodeGen/R600/trunc.ll @@ -28,3 +28,13 @@ define void @trunc_shl_i64(i32 addrspace(1)* %out, i64 %a) { store i32 %result, i32 addrspace(1)* %out, align 4 ret void } + +; SI-LABEL: @trunc_i32_to_i1: +; SI: V_AND_B32 +; SI: V_CMP_EQ_I32 +define void @trunc_i32_to_i1(i32 addrspace(1)* %out, i32 %a) { + %trunc = trunc i32 %a to i1 + %result = select i1 %trunc, i32 1, i32 0 + store i32 %result, i32 addrspace(1)* %out, align 4 + ret void +} diff --git a/test/CodeGen/R600/vtx-fetch-branch.ll b/test/CodeGen/R600/vtx-fetch-branch.ll new file mode 100644 index 000000000000..0fc99dee0dbe --- /dev/null +++ b/test/CodeGen/R600/vtx-fetch-branch.ll @@ -0,0 +1,29 @@ +; RUN: llc -march=r600 -mcpu=redwood %s -o - | FileCheck %s + +; This tests for a bug where vertex fetch clauses right before an ENDIF +; instruction where being emitted after the ENDIF. We were using ALU_POP_AFTER +; for the ALU clause before the vetex fetch instead of emitting a POP instruction +; after the fetch clause. + + +; CHECK-LABEL: @test +; CHECK-NOT: ALU_POP_AFTER +; CHECK: TEX +; CHECK-NEXT: POP +define void @test(i32 addrspace(1)* %out, i32 addrspace(1)* %in, i32 %cond) { +entry: + %0 = icmp eq i32 %cond, 0 + br i1 %0, label %endif, label %if + +if: + %1 = load i32 addrspace(1)* %in + br label %endif + +endif: + %x = phi i32 [ %1, %if], [ 0, %entry] + store i32 %x, i32 addrspace(1)* %out + br label %done + +done: + ret void +} diff --git a/test/CodeGen/R600/zero_extend.ll b/test/CodeGen/R600/zero_extend.ll index 481b3b328259..a114bfc4a02b 100644 --- a/test/CodeGen/R600/zero_extend.ll +++ b/test/CodeGen/R600/zero_extend.ll @@ -16,3 +16,13 @@ entry: store i64 %2, i64 addrspace(1)* %out ret void } + +; SI-CHECK-LABEL: @testi1toi32 +; SI-CHECK: V_CNDMASK_B32 +define void @testi1toi32(i32 addrspace(1)* %out, i32 %a, i32 %b) { +entry: + %0 = icmp eq i32 %a, %b + %1 = zext i1 %0 to i32 + store i32 %1, i32 addrspace(1)* %out + ret void +} diff --git a/test/CodeGen/X86/2009-06-05-VZextByteShort.ll b/test/CodeGen/X86/2009-06-05-VZextByteShort.ll index 5f5d5cccf714..50c62dfb73b8 100644 --- a/test/CodeGen/X86/2009-06-05-VZextByteShort.ll +++ b/test/CodeGen/X86/2009-06-05-VZextByteShort.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -march=x86 -mattr=+mmx,+sse2 > %t1 +; RUN: llc < %s -march=x86 -mcpu=core2 > %t1 ; RUN: grep movzwl %t1 | count 2 ; RUN: grep movzbl %t1 | count 1 ; RUN: grep movd %t1 | count 4 diff --git a/test/CodeGen/X86/bswap-vector.ll b/test/CodeGen/X86/bswap-vector.ll new file mode 100644 index 000000000000..7a7a8a4ebb18 --- /dev/null +++ b/test/CodeGen/X86/bswap-vector.ll @@ -0,0 +1,19 @@ +; RUN: llc < %s -mcpu=core | FileCheck %s +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +declare <2 x i64> @llvm.bswap.v2i64(<2 x i64>) + +define <2 x i64> @foo(<2 x i64> %v) #0 { +entry: + %r = call <2 x i64> @llvm.bswap.v2i64(<2 x i64> %v) + ret <2 x i64> %r +} + +; CHECK-LABEL: @foo +; CHECK: bswapq +; CHECK: bswapq +; CHECK: ret + +attributes #0 = { nounwind uwtable } + diff --git a/test/CodeGen/X86/fma4-intrinsics-x86_64.ll b/test/CodeGen/X86/fma4-intrinsics-x86_64.ll index 7a1a9ae46147..494cb28677a4 100644 --- a/test/CodeGen/X86/fma4-intrinsics-x86_64.ll +++ b/test/CodeGen/X86/fma4-intrinsics-x86_64.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -march=x86-64 -mattr=+avx,+fma4 | FileCheck %s +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -march=x86-64 -mcpu=corei7-avx -mattr=+fma4 | FileCheck %s ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=bdver2 -mattr=+avx,-fma | FileCheck %s ; VFMADD diff --git a/test/CodeGen/X86/fp-fast.ll b/test/CodeGen/X86/fp-fast.ll index 07baca84804e..7b08ad67220b 100644 --- a/test/CodeGen/X86/fp-fast.ll +++ b/test/CodeGen/X86/fp-fast.ll @@ -1,4 +1,4 @@ -; RUN: llc -march=x86-64 -mattr=+avx,-fma4 -mtriple=x86_64-apple-darwin -enable-unsafe-fp-math < %s | FileCheck %s +; RUN: llc -march=x86-64 -mcpu=corei7-avx -enable-unsafe-fp-math < %s | FileCheck %s ; CHECK-LABEL: test1 define float @test1(float %a) { diff --git a/test/CodeGen/X86/inline-asm-modifier-q.ll b/test/CodeGen/X86/inline-asm-modifier-q.ll new file mode 100644 index 000000000000..d20f06d29054 --- /dev/null +++ b/test/CodeGen/X86/inline-asm-modifier-q.ll @@ -0,0 +1,12 @@ +; RUN: llc < %s -march=x86 | FileCheck %s + +; If the target does not have 64-bit integer registers, emit 32-bit register +; names. + +; CHECK: movq (%e{{[abcd]}}x, %ebx, 4) + +define void @q_modifier(i32* %p) { +entry: + tail call void asm sideeffect "movq (${0:q}, %ebx, 4), %mm0", "r,~{dirflag},~{fpsr},~{flags}"(i32* %p) + ret void +} diff --git a/test/CodeGen/X86/isint.ll b/test/CodeGen/X86/isint.ll index 4a98e63f38fc..38d05c662bd5 100644 --- a/test/CodeGen/X86/isint.ll +++ b/test/CodeGen/X86/isint.ll @@ -1,6 +1,11 @@ -; RUN: llc < %s -march=x86 -mattr=+sse2 | FileCheck %s +; RUN: llc < %s -mtriple=x86_64-pc-unknown -mattr=+sse2 -mcpu=penryn | FileCheck %s +; RUN: llc < %s -mtriple=i686-pc-unknown -mattr=+sse2 -mcpu=penryn | FileCheck %s + +; PR19059 +; RUN: llc < %s -mtriple=i686-pc-unknown -mattr=+sse2 -mcpu=penryn | FileCheck -check-prefix=CHECK32 %s define i32 @isint_return(double %d) nounwind { +; CHECK-LABEL: isint_return: ; CHECK-NOT: xor ; CHECK: cvt %i = fptosi double %d to i32 @@ -8,6 +13,24 @@ define i32 @isint_return(double %d) nounwind { %e = sitofp i32 %i to double ; CHECK: cmpeqsd %c = fcmp oeq double %d, %e +; CHECK32-NOT: movd {{.*}}, %r{{.*}} +; CHECK32-NOT: andq +; CHECK-NEXT: movd +; CHECK-NEXT: andl + %z = zext i1 %c to i32 + ret i32 %z +} + +define i32 @isint_float_return(float %f) nounwind { +; CHECK-LABEL: isint_float_return: +; CHECK-NOT: xor +; CHECK: cvt + %i = fptosi float %f to i32 +; CHECK-NEXT: cvt + %g = sitofp i32 %i to float +; CHECK: cmpeqss + %c = fcmp oeq float %f, %g +; CHECK-NOT: movd {{.*}}, %r{{.*}} ; CHECK-NEXT: movd ; CHECK-NEXT: andl %z = zext i1 %c to i32 @@ -17,6 +40,7 @@ define i32 @isint_return(double %d) nounwind { declare void @foo() define void @isint_branch(double %d) nounwind { +; CHECK-LABEL: isint_branch: ; CHECK: cvt %i = fptosi double %d to i32 ; CHECK-NEXT: cvt diff --git a/test/CodeGen/X86/pr10420.ll b/test/CodeGen/X86/pr10420.ll index 3993f24954ee..62951892619b 100644 --- a/test/CodeGen/X86/pr10420.ll +++ b/test/CodeGen/X86/pr10420.ll @@ -1,4 +1,9 @@ -; RUN: llc < %s -mtriple=x86_64-apple-macosx -disable-cfi | FileCheck %s +; RUN: llc < %s -mtriple=x86_64-apple-macosx10.7 -disable-cfi | FileCheck --check-prefix=CHECK-64-D11 %s +; RUN: llc < %s -mtriple=x86_64-apple-macosx10.6 -disable-cfi | FileCheck %s +; RUN: llc < %s -mtriple=x86_64-apple-macosx10.5 -disable-cfi | FileCheck --check-prefix=CHECK-64-D89 %s +; RUN: llc < %s -mtriple=i686-apple-macosx10.6 -disable-cfi | FileCheck --check-prefix=CHECK-I686-D10 %s +; RUN: llc < %s -mtriple=i686-apple-macosx10.5 -disable-cfi | FileCheck --check-prefix=CHECK-I686-D89 %s +; RUN: llc < %s -mtriple=i686-apple-macosx10.4 -disable-cfi | FileCheck --check-prefix=CHECK-I686-D89 %s define private void @foo() { ret void @@ -19,3 +24,44 @@ define void @bar() { ; CHECK: Ltmp19: ; CHECK-NEXT: Ltmp20 = Ltmp2-Ltmp19 ## FDE initial location ; CHECK-NEXT: .quad Ltmp20 + + +; CHECK-64-D11: Ltmp13: +; CHECK-64-D11-NEXT: Ltmp14 = L_foo-Ltmp13 ## FDE initial location +; CHECK-64-D11-NEXT: .quad Ltmp14 + +; CHECK-64-D11: Ltmp20: +; CHECK-64-D11-NEXT: Ltmp21 = Ltmp2-Ltmp20 ## FDE initial location +; CHECK-64-D11-NEXT: .quad Ltmp21 + + +; CHECK-64-D89: Ltmp12: +; CHECK-64-D89-NEXT: .quad L_foo-Ltmp12 ## FDE initial location +; CHECK-64-D89-NEXT: Ltmp13 = (Ltmp0-L_foo)-0 ## FDE address range +; CHECK-64-D89-NEXT: .quad Ltmp13 + +; CHECK-64-D89: Ltmp18: +; CHECK-64-D89-NEXT: .quad Ltmp2-Ltmp18 ## FDE initial location +; CHECK-64-D89-NEXT: Ltmp19 = (Ltmp4-Ltmp2)-0 ## FDE address range +; CHECK-64-D89-NEXT: .quad Ltmp19 + + +; CHECK-I686-D10: Ltmp12: +; CHECK-I686-D10-NEXT: Ltmp13 = L_foo-Ltmp12 ## FDE initial location +; CHECK-I686-D10-NEXT: .long Ltmp13 + +; CHECK-I686-D10: Ltmp19: +; CHECK-I686-D10-NEXT: Ltmp20 = Ltmp2-Ltmp19 ## FDE initial location +; CHECK-I686-D10-NEXT: .long Ltmp20 + + +; CHECK-I686-D89: Ltmp12: +; CHECK-I686-D89-NEXT: .long L_foo-Ltmp12 ## FDE initial location +; CHECK-I686-D89-NEXT: Ltmp13 = (Ltmp0-L_foo)-0 ## FDE address range +; CHECK-I686-D89-NEXT: .long Ltmp13 + +; CHECK-I686-D89: Ltmp18: +; CHECK-I686-D89-NEXT: .long Ltmp2-Ltmp18 ## FDE initial location +; CHECK-I686-D89-NEXT: Ltmp19 = (Ltmp4-Ltmp2)-0 ## FDE address range +; CHECK-I686-D89-NEXT: .long Ltmp19 + diff --git a/test/CodeGen/X86/stores-merging.ll b/test/CodeGen/X86/stores-merging.ll new file mode 100644 index 000000000000..61dea088995b --- /dev/null +++ b/test/CodeGen/X86/stores-merging.ll @@ -0,0 +1,23 @@ +; RUN: llc < %s | FileCheck %s + +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +%structTy = type { i8, i32, i32 } + +@e = common global %structTy zeroinitializer, align 4 + +; CHECK-LABEL: f +define void @f() { +entry: + +; CHECK: movabsq $528280977409, %rax +; CHECK: movq %rax, e+4(%rip) +; CHECK: movl $456, e+8(%rip) + + store i32 1, i32* getelementptr inbounds (%structTy* @e, i64 0, i32 1), align 4 + store i32 123, i32* getelementptr inbounds (%structTy* @e, i64 0, i32 2), align 4 + store i32 456, i32* getelementptr inbounds (%structTy* @e, i64 0, i32 2), align 4 + ret void +} + diff --git a/test/CodeGen/X86/vaargs.ll b/test/CodeGen/X86/vaargs.ll new file mode 100644 index 000000000000..ddeb7a336d4a --- /dev/null +++ b/test/CodeGen/X86/vaargs.ll @@ -0,0 +1,67 @@ +; RUN: llc -mcpu=corei7-avx %s -o - | FileCheck %s --check-prefix=CHECK --check-prefix=NO-FLAGS +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" +target triple = "x86_64-apple-macosx10.9.0" + +%struct.__va_list_tag = type { i32, i32, i8*, i8* } + +; Check that vastart gets the right thing. +define i32 @sum(i32 %count, ...) nounwind optsize ssp uwtable { +; CHECK: testb %al, %al +; CHECK-NEXT: je +; CHECK-NEXT: ## BB#{{[0-9]+}}: +; CHECK-NEXT: vmovaps %xmm0, 48(%rsp) +; CHECK-NEXT: vmovaps %xmm1, 64(%rsp) +; CHECK-NEXT: vmovaps %xmm2, 80(%rsp) +; CHECK-NEXT: vmovaps %xmm3, 96(%rsp) +; CHECK-NEXT: vmovaps %xmm4, 112(%rsp) +; CHECK-NEXT: vmovaps %xmm5, 128(%rsp) +; CHECK-NEXT: vmovaps %xmm6, 144(%rsp) +; CHECK-NEXT: vmovaps %xmm7, 160(%rsp) + +; Check that [EFLAGS] hasn't been pulled in. +; NO-FLAGS-NOT: %flags + + %ap = alloca [1 x %struct.__va_list_tag], align 16 + %1 = bitcast [1 x %struct.__va_list_tag]* %ap to i8* + call void @llvm.va_start(i8* %1) + %2 = icmp sgt i32 %count, 0 + br i1 %2, label %.lr.ph, label %._crit_edge + +.lr.ph: ; preds = %0 + %3 = getelementptr inbounds [1 x %struct.__va_list_tag]* %ap, i64 0, i64 0, i32 0 + %4 = getelementptr inbounds [1 x %struct.__va_list_tag]* %ap, i64 0, i64 0, i32 2 + %.pre = load i32* %3, align 16 + br label %5 + +; <label>:5 ; preds = %.lr.ph, %13 + %6 = phi i32 [ %.pre, %.lr.ph ], [ %14, %13 ] + %.01 = phi i32 [ %count, %.lr.ph ], [ %15, %13 ] + %7 = icmp ult i32 %6, 41 + br i1 %7, label %8, label %10 + +; <label>:8 ; preds = %5 + %9 = add i32 %6, 8 + store i32 %9, i32* %3, align 16 + br label %13 + +; <label>:10 ; preds = %5 + %11 = load i8** %4, align 8 + %12 = getelementptr i8* %11, i64 8 + store i8* %12, i8** %4, align 8 + br label %13 + +; <label>:13 ; preds = %10, %8 + %14 = phi i32 [ %6, %10 ], [ %9, %8 ] + %15 = add nsw i32 %.01, 1 + %16 = icmp sgt i32 %15, 0 + br i1 %16, label %5, label %._crit_edge + +._crit_edge: ; preds = %13, %0 + %.0.lcssa = phi i32 [ %count, %0 ], [ %15, %13 ] + call void @llvm.va_end(i8* %1) + ret i32 %.0.lcssa +} + +declare void @llvm.va_start(i8*) nounwind + +declare void @llvm.va_end(i8*) nounwind diff --git a/test/CodeGen/X86/vastart-defs-eflags.ll b/test/CodeGen/X86/vastart-defs-eflags.ll new file mode 100644 index 000000000000..6017753fc8fd --- /dev/null +++ b/test/CodeGen/X86/vastart-defs-eflags.ll @@ -0,0 +1,23 @@ +; RUN: llc %s -o - | FileCheck %s + +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" +target triple = "x86_64-apple-macosx10.10.0" + +; Check that vastart handling doesn't get between testb and je for the branch. +define i32 @check_flag(i32 %flags, ...) nounwind { +entry: +; CHECK: {{^}} testb $2, %bh +; CHECK-NOT: test +; CHECK: {{^}} je + %and = and i32 %flags, 512 + %tobool = icmp eq i32 %and, 0 + br i1 %tobool, label %if.end, label %if.then + +if.then: ; preds = %entry + br label %if.end + +if.end: ; preds = %entry, %if.then + %hasflag = phi i32 [ 1, %if.then ], [ 0, %entry ] + ret i32 %hasflag +} + diff --git a/test/CodeGen/X86/vec_shift4.ll b/test/CodeGen/X86/vec_shift4.ll index e2fe45cf9724..b266a6987557 100644 --- a/test/CodeGen/X86/vec_shift4.ll +++ b/test/CodeGen/X86/vec_shift4.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -march=x86 -mattr=+sse4.1 | FileCheck %s +; RUN: llc < %s -march=x86 -mcpu=corei7 | FileCheck %s define <2 x i64> @shl1(<4 x i32> %r, <4 x i32> %a) nounwind readnone ssp { entry: diff --git a/test/CodeGen/X86/vshift-4.ll b/test/CodeGen/X86/vshift-4.ll index 4363cd9399cf..a060cf803727 100644 --- a/test/CodeGen/X86/vshift-4.ll +++ b/test/CodeGen/X86/vshift-4.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -march=x86 -mattr=+sse2 | FileCheck %s +; RUN: llc < %s -march=x86 -mcpu=core2 | FileCheck %s ; test vector shifts converted to proper SSE2 vector shifts when the shift ; amounts are the same when using a shuffle splat. diff --git a/test/CodeGen/X86/weak_def_can_be_hidden.ll b/test/CodeGen/X86/weak_def_can_be_hidden.ll index f78f3571cec9..22aa135e65e0 100644 --- a/test/CodeGen/X86/weak_def_can_be_hidden.ll +++ b/test/CodeGen/X86/weak_def_can_be_hidden.ll @@ -1,9 +1,16 @@ -; RUN: llc -mtriple=x86_64-apple-darwin -O0 < %s | FileCheck %s +; RUN: llc -mtriple=x86_64-apple-darwin11 -O0 < %s | FileCheck %s +; RUN: llc -mtriple=x86_64-apple-darwin10 -O0 < %s | FileCheck %s +; RUN: llc -mtriple=x86_64-apple-darwin9 -O0 < %s | FileCheck --check-prefix=CHECK-D89 %s +; RUN: llc -mtriple=i686-apple-darwin9 -O0 < %s | FileCheck --check-prefix=CHECK-D89 %s +; RUN: llc -mtriple=i686-apple-darwin8 -O0 < %s | FileCheck --check-prefix=CHECK-D89 %s @v1 = linkonce_odr global i32 32 ; CHECK: .globl _v1 ; CHECK: .weak_def_can_be_hidden _v1 +; CHECK-D89: .globl _v1 +; CHECK-D89: .weak_definition _v1 + define i32 @f1() { %x = load i32 * @v1 ret i32 %x @@ -13,10 +20,16 @@ define i32 @f1() { ; CHECK: .globl _v2 ; CHECK: .weak_definition _v2 +; CHECK-D89: .globl _v2 +; CHECK-D89: .weak_definition _v2 + @v3 = linkonce_odr unnamed_addr global i32 32 ; CHECK: .globl _v3 ; CHECK: .weak_def_can_be_hidden _v3 +; CHECK-D89: .globl _v3 +; CHECK-D89: .weak_definition _v3 + define i32* @f2() { ret i32* @v2 } diff --git a/test/MC/Disassembler/X86/x86-64.txt b/test/MC/Disassembler/X86/x86-64.txt index 8c6bc0e2964c..6f072df7e4b7 100644 --- a/test/MC/Disassembler/X86/x86-64.txt +++ b/test/MC/Disassembler/X86/x86-64.txt @@ -241,3 +241,27 @@ # CHECK: pextrw $3, %xmm3, (%rax) 0x66 0x0f 0x3a 0x15 0x18 0x03 + +# CHECK: $0, 305419896(,%r8) +0x43 0x80 0x04 0x05 0x78 0x56 0x34 0x12 0x00 + +# CHECK: $0, 305419896(%r13,%r8) +0x43 0x80 0x84 0x05 0x78 0x56 0x34 0x12 0x00 + +# CHECK: $0, 305419896(,%r8) +0x42 0x80 0x04 0x05 0x78 0x56 0x34 0x12 0x00 + +# CHECK: $0, 305419896(%rbp,%r8) +0x42 0x80 0x84 0x05 0x78 0x56 0x34 0x12 0x00 + +# CHECK: $0, 305419896(,%r12) +0x42 0x80 0x04 0x25 0x78 0x56 0x34 0x12 0x00 + +# CHECK: $0, 305419896(%rbp,%r12) +0x42 0x80 0x84 0x25 0x78 0x56 0x34 0x12 0x00 + +# CHECK: $0, 305419896 +0x80 0x04 0x25 0x78 0x56 0x34 0x12 0x00 + +# CHECK: $0, 305419896(%rbp) +0x80 0x84 0x25 0x78 0x56 0x34 0x12 0x00 diff --git a/test/MC/X86/intel-syntax.s b/test/MC/X86/intel-syntax.s index 9677da731c17..50f29e0a64c9 100644 --- a/test/MC/X86/intel-syntax.s +++ b/test/MC/X86/intel-syntax.s @@ -584,3 +584,12 @@ fsub ST(1) fsubr ST(1) fdiv ST(1) fdivr ST(1) + +.bss +.globl _g0 +.text + +// CHECK: movq _g0, %rbx +// CHECK: movq _g0+8, %rcx +mov rbx, qword ptr [_g0] +mov rcx, qword ptr [_g0 + 8] diff --git a/test/Transforms/InstCombine/vec_extract_var_elt.ll b/test/Transforms/InstCombine/vec_extract_var_elt.ll index 3c982873e288..f6f9e0134a16 100644 --- a/test/Transforms/InstCombine/vec_extract_var_elt.ll +++ b/test/Transforms/InstCombine/vec_extract_var_elt.ll @@ -16,3 +16,11 @@ define void @test (float %b, <8 x float> * %p) { ret void } +; PR18600 +define i32 @test2(i32 %i) { + %e = extractelement <4 x i32> bitcast (<2 x i64> <i64 1, i64 2> to <4 x i32>), i32 %i + ret i32 %e + +; CHECK-LABEL: @test2 +; CHECK: extractelement +} diff --git a/test/Transforms/LoopReroll/basic.ll b/test/Transforms/LoopReroll/basic.ll index 314a14947e3e..3bd6d7ae02e1 100644 --- a/test/Transforms/LoopReroll/basic.ll +++ b/test/Transforms/LoopReroll/basic.ll @@ -33,7 +33,7 @@ for.body: ; preds = %for.body, %entry ; CHECK: %indvar = phi i32 [ %indvar.next, %for.body ], [ 0, %entry ] ; CHECK: %call = tail call i32 @foo(i32 %indvar) #1 ; CHECK: %indvar.next = add i32 %indvar, 1 -; CHECK: %exitcond1 = icmp eq i32 %indvar.next, 498 +; CHECK: %exitcond1 = icmp eq i32 %indvar, 497 ; CHECK: br i1 %exitcond1, label %for.end, label %for.body ; CHECK: ret @@ -83,7 +83,7 @@ for.body: ; preds = %entry, %for.body ; CHECK: %arrayidx = getelementptr inbounds i32* %x, i64 %indvar ; CHECK: store i32 %call, i32* %arrayidx, align 4 ; CHECK: %indvar.next = add i64 %indvar, 1 -; CHECK: %exitcond = icmp eq i64 %indvar.next, 1500 +; CHECK: %exitcond = icmp eq i64 %indvar, 1499 ; CHECK: br i1 %exitcond, label %for.end, label %for.body ; CHECK: ret @@ -131,7 +131,7 @@ for.body: ; preds = %for.body, %entry ; CHECK: %arrayidx = getelementptr inbounds i32* %x, i64 %indvars.iv ; CHECK: store i32 %call, i32* %arrayidx, align 4 ; CHECK: %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 -; CHECK: %exitcond1 = icmp eq i64 %indvars.iv.next, 1500 +; CHECK: %exitcond1 = icmp eq i64 %indvars.iv, 1499 ; CHECK: br i1 %exitcond1, label %for.end, label %for.body ; CHECK: ret @@ -213,7 +213,7 @@ for.body: ; preds = %entry, %for.body ; CHECK: %add = fadd float %1, %mul ; CHECK: store float %add, float* %arrayidx2, align 4 ; CHECK: %indvar.next = add i64 %indvar, 1 -; CHECK: %exitcond = icmp eq i64 %indvar.next, 3200 +; CHECK: %exitcond = icmp eq i64 %indvar, 3199 ; CHECK: br i1 %exitcond, label %for.end, label %for.body ; CHECK: ret @@ -313,7 +313,7 @@ for.body: ; preds = %entry, %for.body ; CHECK: %add = fadd float %2, %mul ; CHECK: store float %add, float* %arrayidx4, align 4 ; CHECK: %indvar.next = add i64 %indvar, 1 -; CHECK: %exitcond = icmp eq i64 %indvar.next, 3200 +; CHECK: %exitcond = icmp eq i64 %indvar, 3199 ; CHECK: br i1 %exitcond, label %for.end, label %for.body ; CHECK: ret diff --git a/test/Transforms/LoopReroll/nonconst_lb.ll b/test/Transforms/LoopReroll/nonconst_lb.ll new file mode 100644 index 000000000000..a45469bd2393 --- /dev/null +++ b/test/Transforms/LoopReroll/nonconst_lb.ll @@ -0,0 +1,152 @@ +; RUN: opt < %s -loop-reroll -S | FileCheck %s +target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:32:64-v128:32:128-a0:0:32-n32-S32" +target triple = "thumbv7-none-linux" + +;void foo(int *A, int *B, int m, int n) { +; for (int i = m; i < n; i+=4) { +; A[i+0] = B[i+0] * 4; +; A[i+1] = B[i+1] * 4; +; A[i+2] = B[i+2] * 4; +; A[i+3] = B[i+3] * 4; +; } +;} +define void @foo(i32* nocapture %A, i32* nocapture readonly %B, i32 %m, i32 %n) { +entry: + %cmp34 = icmp slt i32 %m, %n + br i1 %cmp34, label %for.body, label %for.end + +for.body: ; preds = %entry, %for.body + %i.035 = phi i32 [ %add18, %for.body ], [ %m, %entry ] + %arrayidx = getelementptr inbounds i32* %B, i32 %i.035 + %0 = load i32* %arrayidx, align 4 + %mul = shl nsw i32 %0, 2 + %arrayidx2 = getelementptr inbounds i32* %A, i32 %i.035 + store i32 %mul, i32* %arrayidx2, align 4 + %add3 = add nsw i32 %i.035, 1 + %arrayidx4 = getelementptr inbounds i32* %B, i32 %add3 + %1 = load i32* %arrayidx4, align 4 + %mul5 = shl nsw i32 %1, 2 + %arrayidx7 = getelementptr inbounds i32* %A, i32 %add3 + store i32 %mul5, i32* %arrayidx7, align 4 + %add8 = add nsw i32 %i.035, 2 + %arrayidx9 = getelementptr inbounds i32* %B, i32 %add8 + %2 = load i32* %arrayidx9, align 4 + %mul10 = shl nsw i32 %2, 2 + %arrayidx12 = getelementptr inbounds i32* %A, i32 %add8 + store i32 %mul10, i32* %arrayidx12, align 4 + %add13 = add nsw i32 %i.035, 3 + %arrayidx14 = getelementptr inbounds i32* %B, i32 %add13 + %3 = load i32* %arrayidx14, align 4 + %mul15 = shl nsw i32 %3, 2 + %arrayidx17 = getelementptr inbounds i32* %A, i32 %add13 + store i32 %mul15, i32* %arrayidx17, align 4 + %add18 = add nsw i32 %i.035, 4 + %cmp = icmp slt i32 %add18, %n + br i1 %cmp, label %for.body, label %for.end + +for.end: ; preds = %for.body, %entry + ret void +} +; CHECK-LABEL: @foo +; CHECK: for.body.preheader: ; preds = %entry +; CHECK: %0 = add i32 %n, -1 +; CHECK: %1 = sub i32 %0, %m +; CHECK: %2 = lshr i32 %1, 2 +; CHECK: %3 = mul i32 %2, 4 +; CHECK: %4 = add i32 %m, %3 +; CHECK: %5 = add i32 %4, 3 +; CHECK: br label %for.body + +; CHECK: for.body: ; preds = %for.body, %for.body.preheader +; CHECK: %indvar = phi i32 [ %indvar.next, %for.body ], [ 0, %for.body.preheader ] +; CHECK: %6 = add i32 %m, %indvar +; CHECK: %arrayidx = getelementptr inbounds i32* %B, i32 %6 +; CHECK: %7 = load i32* %arrayidx, align 4 +; CHECK: %mul = shl nsw i32 %7, 2 +; CHECK: %arrayidx2 = getelementptr inbounds i32* %A, i32 %6 +; CHECK: store i32 %mul, i32* %arrayidx2, align 4 +; CHECK: %indvar.next = add i32 %indvar, 1 +; CHECK: %exitcond = icmp eq i32 %6, %5 +; CHECK: br i1 %exitcond, label %for.end, label %for.body + +;void daxpy_ur(int n,float da,float *dx,float *dy) +; { +; int m = n % 4; +; for (int i = m; i < n; i = i + 4) +; { +; dy[i] = dy[i] + da*dx[i]; +; dy[i+1] = dy[i+1] + da*dx[i+1]; +; dy[i+2] = dy[i+2] + da*dx[i+2]; +; dy[i+3] = dy[i+3] + da*dx[i+3]; +; } +; } +define void @daxpy_ur(i32 %n, float %da, float* nocapture readonly %dx, float* nocapture %dy) { +entry: + %rem = srem i32 %n, 4 + %cmp55 = icmp slt i32 %rem, %n + br i1 %cmp55, label %for.body, label %for.end + +for.body: ; preds = %entry, %for.body + %i.056 = phi i32 [ %add27, %for.body ], [ %rem, %entry ] + %arrayidx = getelementptr inbounds float* %dy, i32 %i.056 + %0 = load float* %arrayidx, align 4 + %arrayidx1 = getelementptr inbounds float* %dx, i32 %i.056 + %1 = load float* %arrayidx1, align 4 + %mul = fmul float %1, %da + %add = fadd float %0, %mul + store float %add, float* %arrayidx, align 4 + %add3 = add nsw i32 %i.056, 1 + %arrayidx4 = getelementptr inbounds float* %dy, i32 %add3 + %2 = load float* %arrayidx4, align 4 + %arrayidx6 = getelementptr inbounds float* %dx, i32 %add3 + %3 = load float* %arrayidx6, align 4 + %mul7 = fmul float %3, %da + %add8 = fadd float %2, %mul7 + store float %add8, float* %arrayidx4, align 4 + %add11 = add nsw i32 %i.056, 2 + %arrayidx12 = getelementptr inbounds float* %dy, i32 %add11 + %4 = load float* %arrayidx12, align 4 + %arrayidx14 = getelementptr inbounds float* %dx, i32 %add11 + %5 = load float* %arrayidx14, align 4 + %mul15 = fmul float %5, %da + %add16 = fadd float %4, %mul15 + store float %add16, float* %arrayidx12, align 4 + %add19 = add nsw i32 %i.056, 3 + %arrayidx20 = getelementptr inbounds float* %dy, i32 %add19 + %6 = load float* %arrayidx20, align 4 + %arrayidx22 = getelementptr inbounds float* %dx, i32 %add19 + %7 = load float* %arrayidx22, align 4 + %mul23 = fmul float %7, %da + %add24 = fadd float %6, %mul23 + store float %add24, float* %arrayidx20, align 4 + %add27 = add nsw i32 %i.056, 4 + %cmp = icmp slt i32 %add27, %n + br i1 %cmp, label %for.body, label %for.end + +for.end: ; preds = %for.body, %entry + ret void +} + +; CHECK-LABEL: @daxpy_ur +; CHECK: for.body.preheader: +; CHECK: %0 = add i32 %n, -1 +; CHECK: %1 = sub i32 %0, %rem +; CHECK: %2 = lshr i32 %1, 2 +; CHECK: %3 = mul i32 %2, 4 +; CHECK: %4 = add i32 %rem, %3 +; CHECK: %5 = add i32 %4, 3 +; CHECK: br label %for.body + +; CHECK: for.body: +; CHECK: %indvar = phi i32 [ %indvar.next, %for.body ], [ 0, %for.body.preheader ] +; CHECK: %6 = add i32 %rem, %indvar +; CHECK: %arrayidx = getelementptr inbounds float* %dy, i32 %6 +; CHECK: %7 = load float* %arrayidx, align 4 +; CHECK: %arrayidx1 = getelementptr inbounds float* %dx, i32 %6 +; CHECK: %8 = load float* %arrayidx1, align 4 +; CHECK: %mul = fmul float %8, %da +; CHECK: %add = fadd float %7, %mul +; CHECK: store float %add, float* %arrayidx, align 4 +; CHECK: %indvar.next = add i32 %indvar, 1 +; CHECK: %exitcond = icmp eq i32 %6, %5 +; CHECK: br i1 %exitcond, label %for.end, label %for.body diff --git a/test/Transforms/LoopReroll/reduction.ll b/test/Transforms/LoopReroll/reduction.ll index aed7670b666d..c9991c723ec6 100644 --- a/test/Transforms/LoopReroll/reduction.ll +++ b/test/Transforms/LoopReroll/reduction.ll @@ -38,7 +38,7 @@ for.body: ; preds = %entry, %for.body ; CHECK: %0 = load i32* %arrayidx, align 4 ; CHECK: %add = add nsw i32 %0, %r.029 ; CHECK: %indvar.next = add i64 %indvar, 1 -; CHECK: %exitcond = icmp eq i64 %indvar.next, 400 +; CHECK: %exitcond = icmp eq i64 %indvar, 399 ; CHECK: br i1 %exitcond, label %for.end, label %for.body ; CHECK: ret @@ -83,7 +83,7 @@ for.body: ; preds = %entry, %for.body ; CHECK: %0 = load float* %arrayidx, align 4 ; CHECK: %add = fadd float %0, %r.029 ; CHECK: %indvar.next = add i64 %indvar, 1 -; CHECK: %exitcond = icmp eq i64 %indvar.next, 400 +; CHECK: %exitcond = icmp eq i64 %indvar, 399 ; CHECK: br i1 %exitcond, label %for.end, label %for.body ; CHECK: ret diff --git a/test/Transforms/LoopStrengthReduce/X86/pr17473.ll b/test/Transforms/LoopStrengthReduce/X86/pr17473.ll new file mode 100644 index 000000000000..4204abc7ca22 --- /dev/null +++ b/test/Transforms/LoopStrengthReduce/X86/pr17473.ll @@ -0,0 +1,67 @@ +; RUN: opt < %s -loop-reduce -S | FileCheck %s + +target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128-n8:16:32-S128" +target triple = "x86_64-apple-macosx10.9.0" + +; LSR shouldn't normalize IV if it can't be denormalized to the original +; expression. In this testcase, the normalized expression was denormalized to +; an expression different from the original, and we were losing sign extension. + +; CHECK: [[TMP:%[a-z]+]] = trunc i32 {{.*}} to i8 +; CHECK: {{%[a-z0-9]+}} = sext i8 [[TMP]] to i32 + +@j = common global i32 0, align 4 +@c = common global i32 0, align 4 +@g = common global i32 0, align 4 +@h = common global i8 0, align 1 +@d = common global i32 0, align 4 +@i = common global i32 0, align 4 +@e = common global i32 0, align 4 +@.str = private unnamed_addr constant [4 x i8] c"%x\0A\00", align 1 +@a = common global i32 0, align 4 +@b = common global i16 0, align 2 + +; Function Attrs: nounwind optsize ssp uwtable +define i32 @main() #0 { +entry: + store i8 0, i8* @h, align 1 + %0 = load i32* @j, align 4 + %tobool.i = icmp eq i32 %0, 0 + %1 = load i32* @d, align 4 + %cmp3 = icmp sgt i32 %1, -1 + %.lobit = lshr i32 %1, 31 + %.lobit.not = xor i32 %.lobit, 1 + br label %for.body + +for.body: ; preds = %entry, %fn3.exit + %inc9 = phi i8 [ 0, %entry ], [ %inc, %fn3.exit ] + %conv = sext i8 %inc9 to i32 + br i1 %tobool.i, label %fn3.exit, label %land.rhs.i + +land.rhs.i: ; preds = %for.body + store i32 0, i32* @c, align 4 + br label %fn3.exit + +fn3.exit: ; preds = %for.body, %land.rhs.i + %inc = add i8 %inc9, 1 + %cmp = icmp sgt i8 %inc, -1 + br i1 %cmp, label %for.body, label %for.end + +for.end: ; preds = %fn3.exit + %.lobit.not. = select i1 %cmp3, i32 %.lobit.not, i32 0 + store i32 %conv, i32* @g, align 4 + store i32 %.lobit.not., i32* @i, align 4 + store i8 %inc, i8* @h, align 1 + %conv7 = sext i8 %inc to i32 + %add = add nsw i32 %conv7, %conv + store i32 %add, i32* @e, align 4 + %call = tail call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([4 x i8]* @.str, i64 0, i64 0), i32 %add) #2 + ret i32 0 +} + +; Function Attrs: nounwind optsize +declare i32 @printf(i8* nocapture readonly, ...) #1 + +attributes #0 = { nounwind optsize ssp uwtable "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" } +attributes #1 = { nounwind optsize "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" } +attributes #2 = { nounwind optsize } diff --git a/test/Transforms/LoopStrengthReduce/pr18165.ll b/test/Transforms/LoopStrengthReduce/pr18165.ll new file mode 100644 index 000000000000..89adef7bd49d --- /dev/null +++ b/test/Transforms/LoopStrengthReduce/pr18165.ll @@ -0,0 +1,88 @@ +; RUN: opt < %s -loop-reduce -S | FileCheck %s +target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128-n8:16:32-S128" +target triple = "x86_64-apple-macosx10.9.0" + +; LSR shouldn't reuse IV if the resultant offset is not valid for the operand type. +; CHECK-NOT: trunc i32 %.ph to i8 + +%struct.anon = type { i32, i32, i32 } + +@c = global i32 1, align 4 +@.str = private unnamed_addr constant [4 x i8] c"%d\0A\00", align 1 +@b = common global i32 0, align 4 +@a = common global %struct.anon zeroinitializer, align 4 +@e = common global %struct.anon zeroinitializer, align 4 +@d = common global i32 0, align 4 +@f = common global i32 0, align 4 +@g = common global i32 0, align 4 +@h = common global i32 0, align 4 + +; Function Attrs: nounwind optsize ssp uwtable +define i32 @main() #0 { +entry: + %0 = load i32* getelementptr inbounds (%struct.anon* @a, i64 0, i32 0), align 4, !tbaa !1 + %tobool7.i = icmp eq i32 %0, 0 + %.promoted.i = load i32* getelementptr inbounds (%struct.anon* @a, i64 0, i32 2), align 4, !tbaa !6 + %f.promoted.i = load i32* @f, align 4, !tbaa !7 + br label %for.body6.i.outer + +for.body6.i.outer: ; preds = %entry, %lor.end.i + %.ph = phi i32 [ %add.i, %lor.end.i ], [ 0, %entry ] + %or1512.i.ph = phi i32 [ %or15.i, %lor.end.i ], [ %f.promoted.i, %entry ] + %or1410.i.ph = phi i32 [ %or14.i, %lor.end.i ], [ %.promoted.i, %entry ] + %p.addr.16.i.ph = phi i8 [ %inc10.i, %lor.end.i ], [ -128, %entry ] + br i1 %tobool7.i, label %if.end9.i, label %lbl.loopexit.i + +lbl.loopexit.i: ; preds = %for.body6.i.outer, %lbl.loopexit.i + br label %lbl.loopexit.i + +if.end9.i: ; preds = %for.body6.i.outer + %inc10.i = add i8 %p.addr.16.i.ph, 1 + %tobool12.i = icmp eq i8 %p.addr.16.i.ph, 0 + br i1 %tobool12.i, label %lor.rhs.i, label %lor.end.i + +lor.rhs.i: ; preds = %if.end9.i + %1 = load i32* @b, align 4, !tbaa !7 + %dec.i = add nsw i32 %1, -1 + store i32 %dec.i, i32* @b, align 4, !tbaa !7 + %tobool13.i = icmp ne i32 %1, 0 + br label %lor.end.i + +lor.end.i: ; preds = %lor.rhs.i, %if.end9.i + %2 = phi i1 [ true, %if.end9.i ], [ %tobool13.i, %lor.rhs.i ] + %lor.ext.i = zext i1 %2 to i32 + %or14.i = or i32 %lor.ext.i, %or1410.i.ph + %or15.i = or i32 %or14.i, %or1512.i.ph + %add.i = add nsw i32 %.ph, 2 + %cmp.i = icmp slt i32 %add.i, 21 + br i1 %cmp.i, label %for.body6.i.outer, label %fn1.exit + +fn1.exit: ; preds = %lor.end.i + store i32 0, i32* @g, align 4, !tbaa !7 + store i32 %or14.i, i32* getelementptr inbounds (%struct.anon* @a, i64 0, i32 2), align 4, !tbaa !6 + store i32 %or15.i, i32* @f, align 4, !tbaa !7 + store i32 %add.i, i32* getelementptr inbounds (%struct.anon* @e, i64 0, i32 1), align 4, !tbaa !8 + store i32 0, i32* @h, align 4, !tbaa !7 + %3 = load i32* @b, align 4, !tbaa !7 + %call1 = tail call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([4 x i8]* @.str, i64 0, i64 0), i32 %3) #2 + ret i32 0 +} + +; Function Attrs: nounwind optsize +declare i32 @printf(i8* nocapture readonly, ...) #1 + +attributes #0 = { nounwind optsize ssp uwtable "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" } +attributes #1 = { nounwind optsize "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" } +attributes #2 = { nounwind optsize } + +!llvm.ident = !{!0} + +!0 = metadata !{metadata !"clang version 3.5 "} +!1 = metadata !{metadata !2, metadata !3, i64 0} +!2 = metadata !{metadata !"", metadata !3, i64 0, metadata !3, i64 4, metadata !3, i64 8} +!3 = metadata !{metadata !"int", metadata !4, i64 0} +!4 = metadata !{metadata !"omnipotent char", metadata !5, i64 0} +!5 = metadata !{metadata !"Simple C/C++ TBAA"} +!6 = metadata !{metadata !2, metadata !3, i64 8} +!7 = metadata !{metadata !3, metadata !3, i64 0} +!8 = metadata !{metadata !2, metadata !3, i64 4} diff --git a/test/Transforms/LoopVectorize/multi-use-reduction-bug.ll b/test/Transforms/LoopVectorize/multi-use-reduction-bug.ll new file mode 100644 index 000000000000..5fc5ed55a99d --- /dev/null +++ b/test/Transforms/LoopVectorize/multi-use-reduction-bug.ll @@ -0,0 +1,42 @@ +; RUN: opt -indvars -loop-vectorize -force-vector-width=2 -force-vector-unroll=1 -S < %s | FileCheck %s + +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" +target triple = "x86_64-apple-macosx10.9.0" + +; We must not vectorize this loop. %add55 is not reduction. Its value is used +; multiple times. + +; PR18526 + +; CHECK: multiple_use_of_value +; CHECK-NOT: <2 x i32> + +define void @multiple_use_of_value() { +entry: + %n = alloca i32, align 4 + %k7 = alloca i32, align 4 + %nf = alloca i32, align 4 + %0 = load i32* %k7, align 4 + %.neg1 = sub i32 0, %0 + %n.promoted = load i32* %n, align 4 + %nf.promoted = load i32* %nf, align 4 + br label %for.body + +for.body: + %inc107 = phi i32 [ undef, %entry ], [ %inc10, %for.body ] + %inc6 = phi i32 [ %nf.promoted, %entry ], [ undef, %for.body ] + %add55 = phi i32 [ %n.promoted, %entry ], [ %add5, %for.body ] + %.neg2 = sub i32 0, %inc6 + %add.neg = add i32 0, %add55 + %add4.neg = add i32 %add.neg, %.neg1 + %sub = add i32 %add4.neg, %.neg2 + %add5 = add i32 %sub, %add55 + %inc10 = add i32 %inc107, 1 + %cmp = icmp ult i32 %inc10, 61 + br i1 %cmp, label %for.body, label %for.end + +for.end: + %add5.lcssa = phi i32 [ %add5, %for.body ] + store i32 %add5.lcssa, i32* %n, align 4 + ret void +} diff --git a/tools/llvm-shlib/Makefile b/tools/llvm-shlib/Makefile index 92f3132ff539..4a0c2ea68df3 100644 --- a/tools/llvm-shlib/Makefile +++ b/tools/llvm-shlib/Makefile @@ -10,10 +10,12 @@ LEVEL := ../.. LIBRARYNAME = LLVM-$(LLVMVersion) +LIBRARYALIASNAME = LLVM-$(LLVM_VERSION_MAJOR).$(LLVM_VERSION_MINOR)$(LLVM_VERSION_SUFFIX) NO_BUILD_ARCHIVE := 1 LINK_LIBS_IN_SHARED := 1 SHARED_LIBRARY := 1 +SHARED_ALIAS := 1 include $(LEVEL)/Makefile.config diff --git a/utils/release/test-release.sh b/utils/release/test-release.sh index 91dac4ca7821..90c5f1ab2d08 100755 --- a/utils/release/test-release.sh +++ b/utils/release/test-release.sh @@ -26,6 +26,7 @@ Base_url="http://llvm.org/svn/llvm-project" Release="" Release_no_dot="" RC="" +DOT="" Triple="" use_gzip="no" do_checkout="yes" @@ -45,6 +46,7 @@ function usage() { echo "" echo " -release X.Y The release number to test." echo " -rc NUM The pre-release candidate number." + echo " -dot NUM The dot release to test e.g. X.Y.DOT_NUM [default: 0]" echo " -final The final release candidate." echo " -triple TRIPLE The target triple for this machine." echo " -j NUM Number of compile jobs to run. [default: 3]" @@ -76,6 +78,13 @@ while [ $# -gt 0 ]; do -final | --final ) RC=final ;; + -dot | --dot ) + shift + DOT="$1" + if [ $DOT -eq 0 ]; then + DOT="" + fi + ;; -triple | --triple ) shift Triple="$1" @@ -137,6 +146,10 @@ while [ $# -gt 0 ]; do shift done +if [ -n "$DOT" ]; then + Release="$Release.$DOT" +fi + # Check required arguments. if [ -z "$Release" ]; then echo "error: no release number specified" @@ -217,12 +230,29 @@ if [ `uname -s` != "Darwin" ]; then check_program_exists 'objdump' fi +function get_svn_tag() { + case $1 in + # llvm and clang are the only projects currently doing dot releases. + llvm | cfe) + if [ -z $DOT ]; then + SvnTag="$RC" + else + SvnTag="dot$DOT-$RC" + fi + ;; + *) + SvnTag="$RC" + ;; + esac +} + # Make sure that the URLs are valid. function check_valid_urls() { for proj in $projects ; do echo "# Validating $proj SVN URL" - if ! svn ls $Base_url/$proj/tags/RELEASE_$Release_no_dot/$RC > /dev/null 2>&1 ; then + get_svn_tag "$proj" + if ! svn ls $Base_url/$proj/tags/RELEASE_$Release_no_dot/$SvnTag > /dev/null 2>&1 ; then echo "llvm $Release release candidate $RC doesn't exist!" exit 1 fi @@ -235,7 +265,8 @@ function export_sources() { for proj in $projects ; do echo "# Exporting $proj $Release-RC$RC sources" - if ! svn export -q $Base_url/$proj/tags/RELEASE_$Release_no_dot/$RC $proj.src ; then + get_svn_tag "$proj" + if ! svn export -q $Base_url/$proj/tags/RELEASE_$Release_no_dot/$SvnTag $proj.src ; then echo "error: failed to export $proj project" exit 1 fi |