aboutsummaryrefslogtreecommitdiff
path: root/contrib/llvm/tools/clang/lib/Headers
diff options
context:
space:
mode:
authorDimitry Andric <dim@FreeBSD.org>2015-12-30 13:34:49 +0000
committerDimitry Andric <dim@FreeBSD.org>2015-12-30 13:34:49 +0000
commit0623d7483df5fc17b32ba7bc5cb9c7beebf6db9c (patch)
tree28726ef2038e86121e353aabf52297b35a48efa2 /contrib/llvm/tools/clang/lib/Headers
parent7d523365ff1a3cc95bc058b33102500f61e8166d (diff)
parent45b533945f0851ec234ca846e1af5ee1e4df0b6e (diff)
Update clang to trunk r256633.
Notes
Notes: svn path=/projects/clang380-import/; revision=292942
Diffstat (limited to 'contrib/llvm/tools/clang/lib/Headers')
-rw-r--r--contrib/llvm/tools/clang/lib/Headers/Intrin.h34
-rw-r--r--contrib/llvm/tools/clang/lib/Headers/__clang_cuda_runtime_wrapper.h216
-rw-r--r--contrib/llvm/tools/clang/lib/Headers/__wmmintrin_aes.h10
-rw-r--r--contrib/llvm/tools/clang/lib/Headers/__wmmintrin_pclmul.h6
-rw-r--r--contrib/llvm/tools/clang/lib/Headers/adxintrin.h6
-rw-r--r--contrib/llvm/tools/clang/lib/Headers/altivec.h747
-rw-r--r--contrib/llvm/tools/clang/lib/Headers/ammintrin.h108
-rw-r--r--contrib/llvm/tools/clang/lib/Headers/arm_acle.h16
-rw-r--r--contrib/llvm/tools/clang/lib/Headers/avx2intrin.h451
-rw-r--r--contrib/llvm/tools/clang/lib/Headers/avx512bwintrin.h390
-rw-r--r--contrib/llvm/tools/clang/lib/Headers/avx512dqintrin.h538
-rw-r--r--contrib/llvm/tools/clang/lib/Headers/avx512erintrin.h26
-rw-r--r--contrib/llvm/tools/clang/lib/Headers/avx512fintrin.h701
-rw-r--r--contrib/llvm/tools/clang/lib/Headers/avx512vlbwintrin.h433
-rw-r--r--contrib/llvm/tools/clang/lib/Headers/avx512vldqintrin.h606
-rw-r--r--contrib/llvm/tools/clang/lib/Headers/avx512vlintrin.h2666
-rw-r--r--contrib/llvm/tools/clang/lib/Headers/avxintrin.h186
-rw-r--r--contrib/llvm/tools/clang/lib/Headers/bmi2intrin.h6
-rw-r--r--contrib/llvm/tools/clang/lib/Headers/bmiintrin.h18
-rw-r--r--contrib/llvm/tools/clang/lib/Headers/emmintrin.h81
-rw-r--r--contrib/llvm/tools/clang/lib/Headers/f16cintrin.h26
-rw-r--r--contrib/llvm/tools/clang/lib/Headers/fma4intrin.h8
-rw-r--r--contrib/llvm/tools/clang/lib/Headers/fmaintrin.h8
-rw-r--r--contrib/llvm/tools/clang/lib/Headers/fxsrintrin.h2
-rw-r--r--contrib/llvm/tools/clang/lib/Headers/htmxlintrin.h2
-rw-r--r--contrib/llvm/tools/clang/lib/Headers/immintrin.h95
-rw-r--r--contrib/llvm/tools/clang/lib/Headers/lzcntintrin.h6
-rw-r--r--contrib/llvm/tools/clang/lib/Headers/mm3dnow.h6
-rw-r--r--contrib/llvm/tools/clang/lib/Headers/mmintrin.h48
-rw-r--r--contrib/llvm/tools/clang/lib/Headers/module.modulemap25
-rw-r--r--contrib/llvm/tools/clang/lib/Headers/nmmintrin.h5
-rw-r--r--contrib/llvm/tools/clang/lib/Headers/pmmintrin.h10
-rw-r--r--contrib/llvm/tools/clang/lib/Headers/popcntintrin.h18
-rw-r--r--contrib/llvm/tools/clang/lib/Headers/prfchwintrin.h6
-rw-r--r--contrib/llvm/tools/clang/lib/Headers/rdseedintrin.h5
-rw-r--r--contrib/llvm/tools/clang/lib/Headers/rtmintrin.h2
-rw-r--r--contrib/llvm/tools/clang/lib/Headers/shaintrin.h8
-rw-r--r--contrib/llvm/tools/clang/lib/Headers/smmintrin.h181
-rw-r--r--contrib/llvm/tools/clang/lib/Headers/stdint.h14
-rw-r--r--contrib/llvm/tools/clang/lib/Headers/tbmintrin.h14
-rw-r--r--contrib/llvm/tools/clang/lib/Headers/tgmath.h2
-rw-r--r--contrib/llvm/tools/clang/lib/Headers/tmmintrin.h19
-rw-r--r--contrib/llvm/tools/clang/lib/Headers/wmmintrin.h9
-rw-r--r--contrib/llvm/tools/clang/lib/Headers/x86intrin.h24
-rw-r--r--contrib/llvm/tools/clang/lib/Headers/xmmintrin.h42
-rw-r--r--contrib/llvm/tools/clang/lib/Headers/xopintrin.h91
-rw-r--r--contrib/llvm/tools/clang/lib/Headers/xsavecintrin.h48
-rw-r--r--contrib/llvm/tools/clang/lib/Headers/xsaveintrin.h58
-rw-r--r--contrib/llvm/tools/clang/lib/Headers/xsaveoptintrin.h48
-rw-r--r--contrib/llvm/tools/clang/lib/Headers/xsavesintrin.h58
50 files changed, 6937 insertions, 1196 deletions
diff --git a/contrib/llvm/tools/clang/lib/Headers/Intrin.h b/contrib/llvm/tools/clang/lib/Headers/Intrin.h
index 24b3eae8bf86..6c1d0d16eabf 100644
--- a/contrib/llvm/tools/clang/lib/Headers/Intrin.h
+++ b/contrib/llvm/tools/clang/lib/Headers/Intrin.h
@@ -49,10 +49,7 @@ extern "C" {
#if defined(__MMX__)
/* And the random ones that aren't in those files. */
__m64 _m_from_float(float);
-__m64 _m_from_int(int _l);
-void _m_prefetch(void *);
float _m_to_float(__m64);
-int _m_to_int(__m64 _M);
#endif
/* Other assorted instruction intrinsics. */
@@ -292,9 +289,6 @@ void _xend(void);
static __inline__
#define _XCR_XFEATURE_ENABLED_MASK 0
unsigned __int64 __cdecl _xgetbv(unsigned int);
-void __cdecl _xrstor(void const *, unsigned __int64);
-void __cdecl _xsave(void *, unsigned __int64);
-void __cdecl _xsaveopt(void *, unsigned __int64);
void __cdecl _xsetbv(unsigned int, unsigned __int64);
/* These additional intrinsics are turned on in x64/amd64/x86_64 mode. */
@@ -434,13 +428,21 @@ __umulh(unsigned __int64 _Multiplier, unsigned __int64 _Multiplicand) {
(unsigned __int128)_Multiplier * (unsigned __int128)_Multiplicand;
return _FullProduct >> 64;
}
-void __cdecl _xrstor64(void const *, unsigned __int64);
-void __cdecl _xsave64(void *, unsigned __int64);
-void __cdecl _xsaveopt64(void *, unsigned __int64);
#endif /* __x86_64__ */
/*----------------------------------------------------------------------------*\
+|* Multiplication
+\*----------------------------------------------------------------------------*/
+static __inline__ __int64 __DEFAULT_FN_ATTRS
+__emul(int __in1, int __in2) {
+ return (__int64)__in1 * (__int64)__in2;
+}
+static __inline__ unsigned __int64 __DEFAULT_FN_ATTRS
+__emulu(unsigned int __in1, unsigned int __in2) {
+ return (unsigned __int64)__in1 * (unsigned __int64)__in2;
+}
+/*----------------------------------------------------------------------------*\
|* Bit Twiddling
\*----------------------------------------------------------------------------*/
static __inline__ unsigned char __DEFAULT_FN_ATTRS
@@ -770,27 +772,25 @@ _InterlockedCompareExchange64(__int64 volatile *_Destination,
/*----------------------------------------------------------------------------*\
|* Barriers
\*----------------------------------------------------------------------------*/
-#if defined(__i386__) || defined(__x86_64__)
static __inline__ void __DEFAULT_FN_ATTRS
__attribute__((__deprecated__("use other intrinsics or C++11 atomics instead")))
_ReadWriteBarrier(void) {
- __asm__ volatile ("" : : : "memory");
+ __atomic_signal_fence(__ATOMIC_SEQ_CST);
}
static __inline__ void __DEFAULT_FN_ATTRS
__attribute__((__deprecated__("use other intrinsics or C++11 atomics instead")))
_ReadBarrier(void) {
- __asm__ volatile ("" : : : "memory");
+ __atomic_signal_fence(__ATOMIC_SEQ_CST);
}
static __inline__ void __DEFAULT_FN_ATTRS
__attribute__((__deprecated__("use other intrinsics or C++11 atomics instead")))
_WriteBarrier(void) {
- __asm__ volatile ("" : : : "memory");
+ __atomic_signal_fence(__ATOMIC_SEQ_CST);
}
-#endif
#ifdef __x86_64__
static __inline__ void __DEFAULT_FN_ATTRS
__faststorefence(void) {
- __asm__ volatile("lock orq $0, (%%rsp)" : : : "memory");
+ __atomic_thread_fence(__ATOMIC_SEQ_CST);
}
#endif
/*----------------------------------------------------------------------------*\
@@ -851,7 +851,7 @@ __movsd(unsigned long *__dst, unsigned long const *__src, size_t __n) {
}
static __inline__ void __DEFAULT_FN_ATTRS
__movsw(unsigned short *__dst, unsigned short const *__src, size_t __n) {
- __asm__("rep movsh" : : "D"(__dst), "S"(__src), "c"(__n)
+ __asm__("rep movsw" : : "D"(__dst), "S"(__src), "c"(__n)
: "%edi", "%esi", "%ecx");
}
static __inline__ void __DEFAULT_FN_ATTRS
@@ -866,7 +866,7 @@ __stosd(unsigned long *__dst, unsigned long __x, size_t __n) {
}
static __inline__ void __DEFAULT_FN_ATTRS
__stosw(unsigned short *__dst, unsigned short __x, size_t __n) {
- __asm__("rep stosh" : : "D"(__dst), "a"(__x), "c"(__n)
+ __asm__("rep stosw" : : "D"(__dst), "a"(__x), "c"(__n)
: "%edi", "%ecx");
}
#endif
diff --git a/contrib/llvm/tools/clang/lib/Headers/__clang_cuda_runtime_wrapper.h b/contrib/llvm/tools/clang/lib/Headers/__clang_cuda_runtime_wrapper.h
new file mode 100644
index 000000000000..8e5f0331cb38
--- /dev/null
+++ b/contrib/llvm/tools/clang/lib/Headers/__clang_cuda_runtime_wrapper.h
@@ -0,0 +1,216 @@
+/*===---- __clang_cuda_runtime_wrapper.h - CUDA runtime support -------------===
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ *
+ *===-----------------------------------------------------------------------===
+ */
+
+/*
+ * WARNING: This header is intended to be directly -include'd by
+ * the compiler and is not supposed to be included by users.
+ *
+ * CUDA headers are implemented in a way that currently makes it
+ * impossible for user code to #include directly when compiling with
+ * Clang. They present different view of CUDA-supplied functions
+ * depending on where in NVCC's compilation pipeline the headers are
+ * included. Neither of these modes provides function definitions with
+ * correct attributes, so we use preprocessor to force the headers
+ * into a form that Clang can use.
+ *
+ * Similarly to NVCC which -include's cuda_runtime.h, Clang -include's
+ * this file during every CUDA compilation.
+ */
+
+#ifndef __CLANG_CUDA_RUNTIME_WRAPPER_H__
+#define __CLANG_CUDA_RUNTIME_WRAPPER_H__
+
+#if defined(__CUDA__) && defined(__clang__)
+
+// Include some standard headers to avoid CUDA headers including them
+// while some required macros (like __THROW) are in a weird state.
+#include <stdlib.h>
+#include <cmath>
+
+// Preserve common macros that will be changed below by us or by CUDA
+// headers.
+#pragma push_macro("__THROW")
+#pragma push_macro("__CUDA_ARCH__")
+
+// WARNING: Preprocessor hacks below are based on specific details of
+// CUDA-7.x headers and are not expected to work with any other
+// version of CUDA headers.
+#include "cuda.h"
+#if !defined(CUDA_VERSION)
+#error "cuda.h did not define CUDA_VERSION"
+#elif CUDA_VERSION < 7000 || CUDA_VERSION > 7050
+#error "Unsupported CUDA version!"
+#endif
+
+// Make largest subset of device functions available during host
+// compilation -- SM_35 for the time being.
+#ifndef __CUDA_ARCH__
+#define __CUDA_ARCH__ 350
+#endif
+
+#include "cuda_builtin_vars.h"
+
+// No need for device_launch_parameters.h as cuda_builtin_vars.h above
+// has taken care of builtin variables declared in the file.
+#define __DEVICE_LAUNCH_PARAMETERS_H__
+
+// {math,device}_functions.h only have declarations of the
+// functions. We don't need them as we're going to pull in their
+// definitions from .hpp files.
+#define __DEVICE_FUNCTIONS_H__
+#define __MATH_FUNCTIONS_H__
+
+#undef __CUDACC__
+#define __CUDABE__
+// Disables definitions of device-side runtime support stubs in
+// cuda_device_runtime_api.h
+#define __CUDADEVRT_INTERNAL__
+#include "host_config.h"
+#include "host_defines.h"
+#include "driver_types.h"
+#include "common_functions.h"
+#undef __CUDADEVRT_INTERNAL__
+
+#undef __CUDABE__
+#define __CUDACC__
+#include "cuda_runtime.h"
+
+#undef __CUDACC__
+#define __CUDABE__
+
+// CUDA headers use __nvvm_memcpy and __nvvm_memset which Clang does
+// not have at the moment. Emulate them with a builtin memcpy/memset.
+#define __nvvm_memcpy(s,d,n,a) __builtin_memcpy(s,d,n)
+#define __nvvm_memset(d,c,n,a) __builtin_memset(d,c,n)
+
+#include "crt/host_runtime.h"
+#include "crt/device_runtime.h"
+// device_runtime.h defines __cxa_* macros that will conflict with
+// cxxabi.h.
+// FIXME: redefine these as __device__ functions.
+#undef __cxa_vec_ctor
+#undef __cxa_vec_cctor
+#undef __cxa_vec_dtor
+#undef __cxa_vec_new2
+#undef __cxa_vec_new3
+#undef __cxa_vec_delete2
+#undef __cxa_vec_delete
+#undef __cxa_vec_delete3
+#undef __cxa_pure_virtual
+
+// We need decls for functions in CUDA's libdevice with __device__
+// attribute only. Alas they come either as __host__ __device__ or
+// with no attributes at all. To work around that, define __CUDA_RTC__
+// which produces HD variant and undef __host__ which gives us desided
+// decls with __device__ attribute.
+#pragma push_macro("__host__")
+#define __host__
+#define __CUDACC_RTC__
+#include "device_functions_decls.h"
+#undef __CUDACC_RTC__
+
+// Temporarily poison __host__ macro to ensure it's not used by any of
+// the headers we're about to include.
+#define __host__ UNEXPECTED_HOST_ATTRIBUTE
+
+// device_functions.hpp and math_functions*.hpp use 'static
+// __forceinline__' (with no __device__) for definitions of device
+// functions. Temporarily redefine __forceinline__ to include
+// __device__.
+#pragma push_macro("__forceinline__")
+#define __forceinline__ __device__ __inline__ __attribute__((always_inline))
+#include "device_functions.hpp"
+#include "math_functions.hpp"
+#include "math_functions_dbl_ptx3.hpp"
+#pragma pop_macro("__forceinline__")
+
+// Pull in host-only functions that are only available when neither
+// __CUDACC__ nor __CUDABE__ are defined.
+#undef __MATH_FUNCTIONS_HPP__
+#undef __CUDABE__
+#include "math_functions.hpp"
+// Alas, additional overloads for these functions are hard to get to.
+// Considering that we only need these overloads for a few functions,
+// we can provide them here.
+static inline float rsqrt(float a) { return rsqrtf(a); }
+static inline float rcbrt(float a) { return rcbrtf(a); }
+static inline float sinpi(float a) { return sinpif(a); }
+static inline float cospi(float a) { return cospif(a); }
+static inline void sincospi(float a, float *b, float *c) {
+ return sincospi(a, b, c);
+}
+static inline float erfcinv(float a) { return erfcinvf(a); }
+static inline float normcdfinv(float a) { return normcdfinvf(a); }
+static inline float normcdf(float a) { return normcdff(a); }
+static inline float erfcx(float a) { return erfcxf(a); }
+
+// For some reason single-argument variant is not always declared by
+// CUDA headers. Alas, device_functions.hpp included below needs it.
+static inline __device__ void __brkpt(int c) { __brkpt(); }
+
+// Now include *.hpp with definitions of various GPU functions. Alas,
+// a lot of thins get declared/defined with __host__ attribute which
+// we don't want and we have to define it out. We also have to include
+// {device,math}_functions.hpp again in order to extract the other
+// branch of #if/else inside.
+
+#define __host__
+#undef __CUDABE__
+#define __CUDACC__
+#undef __DEVICE_FUNCTIONS_HPP__
+#include "device_functions.hpp"
+#include "device_atomic_functions.hpp"
+#include "sm_20_atomic_functions.hpp"
+#include "sm_32_atomic_functions.hpp"
+#include "sm_20_intrinsics.hpp"
+// sm_30_intrinsics.h has declarations that use default argument, so
+// we have to include it and it will in turn include .hpp
+#include "sm_30_intrinsics.h"
+#include "sm_32_intrinsics.hpp"
+#undef __MATH_FUNCTIONS_HPP__
+#include "math_functions.hpp"
+#pragma pop_macro("__host__")
+
+#include "texture_indirect_functions.h"
+
+// Restore state of __CUDA_ARCH__ and __THROW we had on entry.
+#pragma pop_macro("__CUDA_ARCH__")
+#pragma pop_macro("__THROW")
+
+// Set up compiler macros expected to be seen during compilation.
+#undef __CUDABE__
+#define __CUDACC__
+#define __NVCC__
+
+#if defined(__CUDA_ARCH__)
+// We need to emit IR declaration for non-existing __nvvm_reflect() to
+// let backend know that it should be treated as const nothrow
+// function which is what NVVMReflect pass expects to see.
+extern "C" __device__ __attribute__((const)) int __nvvm_reflect(const void *);
+static __device__ __attribute__((used)) int __nvvm_reflect_anchor() {
+ return __nvvm_reflect("NONE");
+}
+#endif
+
+#endif // __CUDA__
+#endif // __CLANG_CUDA_RUNTIME_WRAPPER_H__
diff --git a/contrib/llvm/tools/clang/lib/Headers/__wmmintrin_aes.h b/contrib/llvm/tools/clang/lib/Headers/__wmmintrin_aes.h
index 9f594ee56092..100799ebfdb8 100644
--- a/contrib/llvm/tools/clang/lib/Headers/__wmmintrin_aes.h
+++ b/contrib/llvm/tools/clang/lib/Headers/__wmmintrin_aes.h
@@ -25,12 +25,8 @@
#include <emmintrin.h>
-#if !defined (__AES__)
-# error "AES instructions not enabled"
-#else
-
/* Define the default attributes for the functions in this file. */
-#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__))
+#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("aes")))
static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_aesenc_si128(__m128i __V, __m128i __R)
@@ -63,10 +59,8 @@ _mm_aesimc_si128(__m128i __V)
}
#define _mm_aeskeygenassist_si128(C, R) \
- __builtin_ia32_aeskeygenassist128((C), (R))
+ (__m128i)__builtin_ia32_aeskeygenassist128((__v2di)(__m128i)(C), (int)(R))
#undef __DEFAULT_FN_ATTRS
-#endif
-
#endif /* _WMMINTRIN_AES_H */
diff --git a/contrib/llvm/tools/clang/lib/Headers/__wmmintrin_pclmul.h b/contrib/llvm/tools/clang/lib/Headers/__wmmintrin_pclmul.h
index 8d1f1b7c0868..68e944e92198 100644
--- a/contrib/llvm/tools/clang/lib/Headers/__wmmintrin_pclmul.h
+++ b/contrib/llvm/tools/clang/lib/Headers/__wmmintrin_pclmul.h
@@ -1,4 +1,4 @@
-/*===---- __wmmintrin_pclmul.h - AES intrinsics ----------------------------===
+/*===---- __wmmintrin_pclmul.h - PCMUL intrinsics ---------------------------===
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
@@ -23,12 +23,8 @@
#ifndef _WMMINTRIN_PCLMUL_H
#define _WMMINTRIN_PCLMUL_H
-#if !defined (__PCLMUL__)
-# error "PCLMUL instruction is not enabled"
-#else
#define _mm_clmulepi64_si128(__X, __Y, __I) \
((__m128i)__builtin_ia32_pclmulqdq128((__v2di)(__m128i)(__X), \
(__v2di)(__m128i)(__Y), (char)(__I)))
-#endif
#endif /* _WMMINTRIN_PCLMUL_H */
diff --git a/contrib/llvm/tools/clang/lib/Headers/adxintrin.h b/contrib/llvm/tools/clang/lib/Headers/adxintrin.h
index b8eb9cbf6ebb..ee347284178e 100644
--- a/contrib/llvm/tools/clang/lib/Headers/adxintrin.h
+++ b/contrib/llvm/tools/clang/lib/Headers/adxintrin.h
@@ -32,8 +32,7 @@
#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__))
/* Intrinsics that are available only if __ADX__ defined */
-#ifdef __ADX__
-static __inline unsigned char __DEFAULT_FN_ATTRS
+static __inline unsigned char __attribute__((__always_inline__, __nodebug__, __target__("adx")))
_addcarryx_u32(unsigned char __cf, unsigned int __x, unsigned int __y,
unsigned int *__p)
{
@@ -41,14 +40,13 @@ _addcarryx_u32(unsigned char __cf, unsigned int __x, unsigned int __y,
}
#ifdef __x86_64__
-static __inline unsigned char __DEFAULT_FN_ATTRS
+static __inline unsigned char __attribute__((__always_inline__, __nodebug__, __target__("adx")))
_addcarryx_u64(unsigned char __cf, unsigned long long __x,
unsigned long long __y, unsigned long long *__p)
{
return __builtin_ia32_addcarryx_u64(__cf, __x, __y, __p);
}
#endif
-#endif
/* Intrinsics that are also available if __ADX__ undefined */
static __inline unsigned char __DEFAULT_FN_ATTRS
diff --git a/contrib/llvm/tools/clang/lib/Headers/altivec.h b/contrib/llvm/tools/clang/lib/Headers/altivec.h
index 5c8eb5640546..dc0dcbc7385c 100644
--- a/contrib/llvm/tools/clang/lib/Headers/altivec.h
+++ b/contrib/llvm/tools/clang/lib/Headers/altivec.h
@@ -27,7 +27,7 @@
#error "AltiVec support not enabled"
#endif
-/* constants for mapping CR6 bits to predicate result. */
+/* Constants for mapping CR6 bits to predicate result. */
#define __CR6_EQ 0
#define __CR6_EQ_REV 1
@@ -137,7 +137,7 @@ static vector double __ATTRS_o_ai vec_abs(vector double __a) {
}
#endif
-/* vec_abss */
+/* vec_abss */
#define __builtin_altivec_abss_v16qi vec_abss
#define __builtin_altivec_abss_v8hi vec_abss
#define __builtin_altivec_abss_v4si vec_abss
@@ -278,6 +278,38 @@ vec_add(vector double __a, vector double __b) {
}
#endif // __VSX__
+/* vec_adde */
+
+#if defined(__POWER8_VECTOR__) && defined(__powerpc64__)
+static vector signed __int128 __ATTRS_o_ai
+vec_adde(vector signed __int128 __a, vector signed __int128 __b,
+ vector signed __int128 __c) {
+ return __builtin_altivec_vaddeuqm(__a, __b, __c);
+}
+
+static vector unsigned __int128 __ATTRS_o_ai
+vec_adde(vector unsigned __int128 __a, vector unsigned __int128 __b,
+ vector unsigned __int128 __c) {
+ return __builtin_altivec_vaddeuqm(__a, __b, __c);
+}
+#endif
+
+/* vec_addec */
+
+#if defined(__POWER8_VECTOR__) && defined(__powerpc64__)
+static vector signed __int128 __ATTRS_o_ai
+vec_addec(vector signed __int128 __a, vector signed __int128 __b,
+ vector signed __int128 __c) {
+ return __builtin_altivec_vaddecuq(__a, __b, __c);
+}
+
+static vector unsigned __int128 __ATTRS_o_ai
+vec_addec(vector unsigned __int128 __a, vector unsigned __int128 __b,
+ vector unsigned __int128 __c) {
+ return __builtin_altivec_vaddecuq(__a, __b, __c);
+}
+#endif
+
/* vec_vaddubm */
#define __builtin_altivec_vaddubm vec_vaddubm
@@ -390,6 +422,12 @@ vec_vaddfp(vector float __a, vector float __b) {
/* vec_addc */
+static vector signed int __ATTRS_o_ai vec_addc(vector signed int __a,
+ vector signed int __b) {
+ return (vector signed int)__builtin_altivec_vaddcuw((vector unsigned int)__a,
+ (vector unsigned int)__b);
+}
+
static vector unsigned int __ATTRS_o_ai vec_addc(vector unsigned int __a,
vector unsigned int __b) {
return __builtin_altivec_vaddcuw(__a, __b);
@@ -398,7 +436,9 @@ static vector unsigned int __ATTRS_o_ai vec_addc(vector unsigned int __a,
#if defined(__POWER8_VECTOR__) && defined(__powerpc64__)
static vector signed __int128 __ATTRS_o_ai
vec_addc(vector signed __int128 __a, vector signed __int128 __b) {
- return __builtin_altivec_vaddcuq(__a, __b);
+ return (vector signed __int128)__builtin_altivec_vaddcuq(
+ (vector unsigned __int128)__a,
+ (vector unsigned __int128)__b);
}
static vector unsigned __int128 __ATTRS_o_ai
@@ -1512,48 +1552,6 @@ vec_cmpeq(vector double __a, vector double __b) {
}
#endif
-/* vec_cmpge */
-
-static vector bool int __ATTRS_o_ai
-vec_cmpge(vector float __a, vector float __b) {
-#ifdef __VSX__
- return (vector bool int)__builtin_vsx_xvcmpgesp(__a, __b);
-#else
- return (vector bool int)__builtin_altivec_vcmpgefp(__a, __b);
-#endif
-}
-
-#ifdef __VSX__
-static vector bool long long __ATTRS_o_ai
-vec_cmpge(vector double __a, vector double __b) {
- return (vector bool long long)__builtin_vsx_xvcmpgedp(__a, __b);
-}
-#endif
-
-#ifdef __POWER8_VECTOR__
-/* Forwrad declarations as the functions are used here */
-static vector bool long long __ATTRS_o_ai
-vec_cmpgt(vector unsigned long long __a, vector unsigned long long __b);
-static vector bool long long __ATTRS_o_ai
-vec_cmpgt(vector signed long long __a, vector signed long long __b);
-
-static vector bool long long __ATTRS_o_ai
-vec_cmpge(vector signed long long __a, vector signed long long __b) {
- return ~(vec_cmpgt(__b, __a));
-}
-
-static vector bool long long __ATTRS_o_ai
-vec_cmpge(vector unsigned long long __a, vector unsigned long long __b) {
- return ~(vec_cmpgt(__b, __a));
-}
-#endif
-
-/* vec_vcmpgefp */
-
-static vector bool int __attribute__((__always_inline__))
-vec_vcmpgefp(vector float __a, vector float __b) {
- return (vector bool int)__builtin_altivec_vcmpgefp(__a, __b);
-}
/* vec_cmpgt */
@@ -1613,6 +1611,74 @@ vec_cmpgt(vector double __a, vector double __b) {
return (vector bool long long)__builtin_vsx_xvcmpgtdp(__a, __b);
}
#endif
+
+/* vec_cmpge */
+
+static vector bool char __ATTRS_o_ai
+vec_cmpge (vector signed char __a, vector signed char __b) {
+ return ~(vec_cmpgt(__b, __a));
+}
+
+static vector bool char __ATTRS_o_ai
+vec_cmpge (vector unsigned char __a, vector unsigned char __b) {
+ return ~(vec_cmpgt(__b, __a));
+}
+
+static vector bool short __ATTRS_o_ai
+vec_cmpge (vector signed short __a, vector signed short __b) {
+ return ~(vec_cmpgt(__b, __a));
+}
+
+static vector bool short __ATTRS_o_ai
+vec_cmpge (vector unsigned short __a, vector unsigned short __b) {
+ return ~(vec_cmpgt(__b, __a));
+}
+
+static vector bool int __ATTRS_o_ai
+vec_cmpge (vector signed int __a, vector signed int __b) {
+ return ~(vec_cmpgt(__b, __a));
+}
+
+static vector bool int __ATTRS_o_ai
+vec_cmpge (vector unsigned int __a, vector unsigned int __b) {
+ return ~(vec_cmpgt(__b, __a));
+}
+
+static vector bool int __ATTRS_o_ai
+vec_cmpge(vector float __a, vector float __b) {
+#ifdef __VSX__
+ return (vector bool int)__builtin_vsx_xvcmpgesp(__a, __b);
+#else
+ return (vector bool int)__builtin_altivec_vcmpgefp(__a, __b);
+#endif
+}
+
+#ifdef __VSX__
+static vector bool long long __ATTRS_o_ai
+vec_cmpge(vector double __a, vector double __b) {
+ return (vector bool long long)__builtin_vsx_xvcmpgedp(__a, __b);
+}
+#endif
+
+#ifdef __POWER8_VECTOR__
+static vector bool long long __ATTRS_o_ai
+vec_cmpge(vector signed long long __a, vector signed long long __b) {
+ return ~(vec_cmpgt(__b, __a));
+}
+
+static vector bool long long __ATTRS_o_ai
+vec_cmpge(vector unsigned long long __a, vector unsigned long long __b) {
+ return ~(vec_cmpgt(__b, __a));
+}
+#endif
+
+/* vec_vcmpgefp */
+
+static vector bool int __attribute__((__always_inline__))
+vec_vcmpgefp(vector float __a, vector float __b) {
+ return (vector bool int)__builtin_altivec_vcmpgefp(__a, __b);
+}
+
/* vec_vcmpgtsb */
static vector bool char __attribute__((__always_inline__))
@@ -1664,6 +1730,36 @@ vec_vcmpgtfp(vector float __a, vector float __b) {
/* vec_cmple */
+static vector bool char __ATTRS_o_ai
+vec_cmple (vector signed char __a, vector signed char __b) {
+ return vec_cmpge(__b, __a);
+}
+
+static vector bool char __ATTRS_o_ai
+vec_cmple (vector unsigned char __a, vector unsigned char __b) {
+ return vec_cmpge(__b, __a);
+}
+
+static vector bool short __ATTRS_o_ai
+vec_cmple (vector signed short __a, vector signed short __b) {
+ return vec_cmpge(__b, __a);
+}
+
+static vector bool short __ATTRS_o_ai
+vec_cmple (vector unsigned short __a, vector unsigned short __b) {
+ return vec_cmpge(__b, __a);
+}
+
+static vector bool int __ATTRS_o_ai
+vec_cmple (vector signed int __a, vector signed int __b) {
+ return vec_cmpge(__b, __a);
+}
+
+static vector bool int __ATTRS_o_ai
+vec_cmple (vector unsigned int __a, vector unsigned int __b) {
+ return vec_cmpge(__b, __a);
+}
+
static vector bool int __ATTRS_o_ai
vec_cmple(vector float __a, vector float __b) {
return vec_cmpge(__b, __a);
@@ -1837,6 +1933,20 @@ vec_vctuxs(vector float __a, int __b) {
return __builtin_altivec_vctuxs(__a, __b);
}
+/* vec_double */
+
+#ifdef __VSX__
+static vector double __ATTRS_o_ai vec_double (vector signed long long __a) {
+ vector double __ret = { __a[0], __a[1] };
+ return __ret;
+}
+
+static vector double __ATTRS_o_ai vec_double (vector unsigned long long __a) {
+ vector double __ret = { __a[0], __a[1] };
+ return __ret;
+}
+#endif
+
/* vec_div */
/* Integer vector divides (vectors are scalarized, elements divided
@@ -1942,34 +2052,16 @@ static vector signed char __ATTRS_o_ai vec_eqv(vector signed char __a,
(vector unsigned int)__b);
}
-static vector signed char __ATTRS_o_ai vec_eqv(vector bool char __a,
- vector signed char __b) {
- return (vector signed char)__builtin_vsx_xxleqv((vector unsigned int)__a,
- (vector unsigned int)__b);
-}
-
-static vector signed char __ATTRS_o_ai vec_eqv(vector signed char __a,
- vector bool char __b) {
- return (vector signed char)__builtin_vsx_xxleqv((vector unsigned int)__a,
- (vector unsigned int)__b);
-}
-
static vector unsigned char __ATTRS_o_ai vec_eqv(vector unsigned char __a,
vector unsigned char __b) {
return (vector unsigned char)__builtin_vsx_xxleqv((vector unsigned int)__a,
(vector unsigned int)__b);
}
-static vector unsigned char __ATTRS_o_ai vec_eqv(vector bool char __a,
- vector unsigned char __b) {
- return (vector unsigned char)__builtin_vsx_xxleqv((vector unsigned int)__a,
- (vector unsigned int)__b);
-}
-
-static vector unsigned char __ATTRS_o_ai vec_eqv(vector unsigned char __a,
- vector bool char __b) {
- return (vector unsigned char)__builtin_vsx_xxleqv((vector unsigned int)__a,
- (vector unsigned int)__b);
+static vector bool char __ATTRS_o_ai vec_eqv(vector bool char __a,
+ vector bool char __b) {
+ return (vector bool char)__builtin_vsx_xxleqv((vector unsigned int)__a,
+ (vector unsigned int)__b);
}
static vector signed short __ATTRS_o_ai vec_eqv(vector signed short __a,
@@ -1978,70 +2070,33 @@ static vector signed short __ATTRS_o_ai vec_eqv(vector signed short __a,
(vector unsigned int)__b);
}
-static vector signed short __ATTRS_o_ai vec_eqv(vector bool short __a,
- vector signed short __b) {
- return (vector signed short)__builtin_vsx_xxleqv((vector unsigned int)__a,
- (vector unsigned int)__b);
-}
-
-static vector signed short __ATTRS_o_ai vec_eqv(vector signed short __a,
- vector bool short __b) {
- return (vector signed short)__builtin_vsx_xxleqv((vector unsigned int)__a,
- (vector unsigned int)__b);
-}
-
static vector unsigned short __ATTRS_o_ai vec_eqv(vector unsigned short __a,
vector unsigned short __b) {
return (vector unsigned short)__builtin_vsx_xxleqv((vector unsigned int)__a,
(vector unsigned int)__b);
}
-static vector unsigned short __ATTRS_o_ai vec_eqv(vector bool short __a,
- vector unsigned short __b) {
- return (vector unsigned short)__builtin_vsx_xxleqv((vector unsigned int)__a,
- (vector unsigned int)__b);
-}
-
-static vector unsigned short __ATTRS_o_ai vec_eqv(vector unsigned short __a,
- vector bool short __b) {
- return (vector unsigned short)__builtin_vsx_xxleqv((vector unsigned int)__a,
- (vector unsigned int)__b);
-}
-
-static vector signed int __ATTRS_o_ai vec_eqv(vector signed int __a,
- vector signed int __b) {
- return (vector signed int)__builtin_vsx_xxleqv((vector unsigned int)__a,
- (vector unsigned int)__b);
-}
-
-static vector signed int __ATTRS_o_ai vec_eqv(vector bool int __a,
- vector signed int __b) {
- return (vector signed int)__builtin_vsx_xxleqv((vector unsigned int)__a,
+static vector bool short __ATTRS_o_ai vec_eqv(vector bool short __a,
+ vector bool short __b) {
+ return (vector bool short)__builtin_vsx_xxleqv((vector unsigned int)__a,
(vector unsigned int)__b);
}
static vector signed int __ATTRS_o_ai vec_eqv(vector signed int __a,
- vector bool int __b) {
+ vector signed int __b) {
return (vector signed int)__builtin_vsx_xxleqv((vector unsigned int)__a,
(vector unsigned int)__b);
}
static vector unsigned int __ATTRS_o_ai vec_eqv(vector unsigned int __a,
vector unsigned int __b) {
- return __builtin_vsx_xxleqv((vector unsigned int)__a,
- (vector unsigned int)__b);
-}
-
-static vector unsigned int __ATTRS_o_ai vec_eqv(vector bool int __a,
- vector unsigned int __b) {
- return __builtin_vsx_xxleqv((vector unsigned int)__a,
- (vector unsigned int)__b);
+ return __builtin_vsx_xxleqv(__a, __b);
}
-static vector unsigned int __ATTRS_o_ai vec_eqv(vector unsigned int __a,
- vector bool int __b) {
- return __builtin_vsx_xxleqv((vector unsigned int)__a,
- (vector unsigned int)__b);
+static vector bool int __ATTRS_o_ai vec_eqv(vector bool int __a,
+ vector bool int __b) {
+ return (vector bool int)__builtin_vsx_xxleqv((vector unsigned int)__a,
+ (vector unsigned int)__b);
}
static vector signed long long __ATTRS_o_ai
@@ -2050,33 +2105,15 @@ vec_eqv(vector signed long long __a, vector signed long long __b) {
__builtin_vsx_xxleqv((vector unsigned int)__a, (vector unsigned int)__b);
}
-static vector signed long long __ATTRS_o_ai
-vec_eqv(vector bool long long __a, vector signed long long __b) {
- return (vector signed long long)
- __builtin_vsx_xxleqv((vector unsigned int)__a, (vector unsigned int)__b);
-}
-
-static vector signed long long __ATTRS_o_ai
-vec_eqv(vector signed long long __a, vector bool long long __b) {
- return (vector signed long long)
- __builtin_vsx_xxleqv((vector unsigned int)__a, (vector unsigned int)__b);
-}
-
static vector unsigned long long __ATTRS_o_ai
vec_eqv(vector unsigned long long __a, vector unsigned long long __b) {
return (vector unsigned long long)
__builtin_vsx_xxleqv((vector unsigned int)__a, (vector unsigned int)__b);
}
-static vector unsigned long long __ATTRS_o_ai
-vec_eqv(vector bool long long __a, vector unsigned long long __b) {
- return (vector unsigned long long)
- __builtin_vsx_xxleqv((vector unsigned int)__a, (vector unsigned int)__b);
-}
-
-static vector unsigned long long __ATTRS_o_ai
-vec_eqv(vector unsigned long long __a, vector bool long long __b) {
- return (vector unsigned long long)
+static vector bool long long __ATTRS_o_ai
+vec_eqv(vector bool long long __a, vector bool long long __b) {
+ return (vector bool long long)
__builtin_vsx_xxleqv((vector unsigned int)__a, (vector unsigned int)__b);
}
@@ -2085,35 +2122,11 @@ static vector float __ATTRS_o_ai vec_eqv(vector float __a, vector float __b) {
(vector unsigned int)__b);
}
-static vector float __ATTRS_o_ai vec_eqv(vector bool int __a,
- vector float __b) {
- return (vector float)__builtin_vsx_xxleqv((vector unsigned int)__a,
- (vector unsigned int)__b);
-}
-
-static vector float __ATTRS_o_ai vec_eqv(vector float __a,
- vector bool int __b) {
- return (vector float)__builtin_vsx_xxleqv((vector unsigned int)__a,
- (vector unsigned int)__b);
-}
-
static vector double __ATTRS_o_ai vec_eqv(vector double __a,
vector double __b) {
return (vector double)__builtin_vsx_xxleqv((vector unsigned int)__a,
(vector unsigned int)__b);
}
-
-static vector double __ATTRS_o_ai vec_eqv(vector bool long long __a,
- vector double __b) {
- return (vector double)__builtin_vsx_xxleqv((vector unsigned int)__a,
- (vector unsigned int)__b);
-}
-
-static vector double __ATTRS_o_ai vec_eqv(vector double __a,
- vector bool long long __b) {
- return (vector double)__builtin_vsx_xxleqv((vector unsigned int)__a,
- (vector unsigned int)__b);
-}
#endif
/* vec_expte */
@@ -2815,6 +2828,38 @@ static vector unsigned char __ATTRS_o_ai vec_lvsr(int __a, const float *__b) {
#endif
/* vec_madd */
+static vector signed short __ATTRS_o_ai
+vec_mladd(vector signed short, vector signed short, vector signed short);
+static vector signed short __ATTRS_o_ai
+vec_mladd(vector signed short, vector unsigned short, vector unsigned short);
+static vector signed short __ATTRS_o_ai
+vec_mladd(vector unsigned short, vector signed short, vector signed short);
+static vector unsigned short __ATTRS_o_ai
+vec_mladd(vector unsigned short, vector unsigned short, vector unsigned short);
+
+static vector signed short __ATTRS_o_ai
+vec_madd(vector signed short __a, vector signed short __b,
+ vector signed short __c) {
+ return vec_mladd(__a, __b, __c);
+}
+
+static vector signed short __ATTRS_o_ai
+vec_madd(vector signed short __a, vector unsigned short __b,
+ vector unsigned short __c) {
+ return vec_mladd(__a, __b, __c);
+}
+
+static vector signed short __ATTRS_o_ai
+vec_madd(vector unsigned short __a, vector signed short __b,
+ vector signed short __c) {
+ return vec_mladd(__a, __b, __c);
+}
+
+static vector unsigned short __ATTRS_o_ai
+vec_madd(vector unsigned short __a, vector unsigned short __b,
+ vector unsigned short __c) {
+ return vec_mladd(__a, __b, __c);
+}
static vector float __ATTRS_o_ai
vec_madd(vector float __a, vector float __b, vector float __c) {
@@ -3256,6 +3301,16 @@ vec_mergeh(vector bool long long __a, vector unsigned long long __b) {
0x10, 0x11, 0x12, 0x13,
0x14, 0x15, 0x16, 0x17));
}
+
+static vector bool long long __ATTRS_o_ai
+vec_mergeh(vector bool long long __a, vector bool long long __b) {
+ return vec_perm(__a, __b,
+ (vector unsigned char)(0x00, 0x01, 0x02, 0x03,
+ 0x04, 0x05, 0x06, 0x07,
+ 0x10, 0x11, 0x12, 0x13,
+ 0x14, 0x15, 0x16, 0x17));
+}
+
static vector double __ATTRS_o_ai vec_mergeh(vector double __a,
vector double __b) {
return vec_perm(__a, __b,
@@ -3519,6 +3574,14 @@ vec_mergel(vector bool long long __a, vector unsigned long long __b) {
0x18, 0X19, 0x1A, 0x1B,
0x1C, 0x1D, 0x1E, 0x1F));
}
+static vector bool long long __ATTRS_o_ai
+vec_mergel(vector bool long long __a, vector bool long long __b) {
+ return vec_perm(__a, __b,
+ (vector unsigned char)(0x08, 0x09, 0x0A, 0x0B,
+ 0x0C, 0x0D, 0x0E, 0x0F,
+ 0x18, 0X19, 0x1A, 0x1B,
+ 0x1C, 0x1D, 0x1E, 0x1F));
+}
static vector double __ATTRS_o_ai
vec_mergel(vector double __a, vector double __b) {
return vec_perm(__a, __b,
@@ -3651,21 +3714,21 @@ static vector float __ATTRS_o_ai vec_vmrglw(vector float __a,
static vector bool int __ATTRS_o_ai
vec_mergee(vector bool int __a, vector bool int __b) {
return vec_perm(__a, __b, (vector unsigned char)
- (0x00, 0x01, 0x02, 0x03, 0x10, 0x11, 0x12, 0x13,
+ (0x00, 0x01, 0x02, 0x03, 0x10, 0x11, 0x12, 0x13,
0x08, 0x09, 0x0A, 0x0B, 0x18, 0x19, 0x1A, 0x1B));
}
static vector signed int __ATTRS_o_ai
vec_mergee(vector signed int __a, vector signed int __b) {
return vec_perm(__a, __b, (vector unsigned char)
- (0x00, 0x01, 0x02, 0x03, 0x10, 0x11, 0x12, 0x13,
+ (0x00, 0x01, 0x02, 0x03, 0x10, 0x11, 0x12, 0x13,
0x08, 0x09, 0x0A, 0x0B, 0x18, 0x19, 0x1A, 0x1B));
}
static vector unsigned int __ATTRS_o_ai
vec_mergee(vector unsigned int __a, vector unsigned int __b) {
return vec_perm(__a, __b, (vector unsigned char)
- (0x00, 0x01, 0x02, 0x03, 0x10, 0x11, 0x12, 0x13,
+ (0x00, 0x01, 0x02, 0x03, 0x10, 0x11, 0x12, 0x13,
0x08, 0x09, 0x0A, 0x0B, 0x18, 0x19, 0x1A, 0x1B));
}
@@ -4439,6 +4502,11 @@ static vector unsigned char __ATTRS_o_ai vec_nand(vector bool char __a,
return ~(__a & __b);
}
+static vector bool char __ATTRS_o_ai vec_nand(vector bool char __a,
+ vector bool char __b) {
+ return ~(__a & __b);
+}
+
static vector signed short __ATTRS_o_ai vec_nand(vector signed short __a,
vector signed short __b) {
return ~(__a & __b);
@@ -4465,8 +4533,8 @@ static vector unsigned short __ATTRS_o_ai vec_nand(vector unsigned short __a,
}
-static vector unsigned short __ATTRS_o_ai vec_nand(vector bool short __a,
- vector unsigned short __b) {
+static vector bool short __ATTRS_o_ai vec_nand(vector bool short __a,
+ vector bool short __b) {
return ~(__a & __b);
}
@@ -4501,6 +4569,11 @@ static vector unsigned int __ATTRS_o_ai vec_nand(vector bool int __a,
return ~(__a & __b);
}
+static vector bool int __ATTRS_o_ai vec_nand(vector bool int __a,
+ vector bool int __b) {
+ return ~(__a & __b);
+}
+
static vector signed long long __ATTRS_o_ai
vec_nand(vector signed long long __a, vector signed long long __b) {
return ~(__a & __b);
@@ -4531,6 +4604,11 @@ vec_nand(vector bool long long __a, vector unsigned long long __b) {
return ~(__a & __b);
}
+static vector bool long long __ATTRS_o_ai
+vec_nand(vector bool long long __a, vector bool long long __b) {
+ return ~(__a & __b);
+}
+
#endif
/* vec_nmadd */
@@ -4909,6 +4987,11 @@ static vector unsigned char __ATTRS_o_ai vec_orc(vector bool char __a,
return __a | ~__b;
}
+static vector bool char __ATTRS_o_ai vec_orc(vector bool char __a,
+ vector bool char __b) {
+ return __a | ~__b;
+}
+
static vector signed short __ATTRS_o_ai vec_orc(vector signed short __a,
vector signed short __b) {
return __a | ~__b;
@@ -4939,6 +5022,11 @@ vec_orc(vector bool short __a, vector unsigned short __b) {
return __a | ~__b;
}
+static vector bool short __ATTRS_o_ai vec_orc(vector bool short __a,
+ vector bool short __b) {
+ return __a | ~__b;
+}
+
static vector signed int __ATTRS_o_ai vec_orc(vector signed int __a,
vector signed int __b) {
return __a | ~__b;
@@ -4969,6 +5057,11 @@ static vector unsigned int __ATTRS_o_ai vec_orc(vector bool int __a,
return __a | ~__b;
}
+static vector bool int __ATTRS_o_ai vec_orc(vector bool int __a,
+ vector bool int __b) {
+ return __a | ~__b;
+}
+
static vector signed long long __ATTRS_o_ai
vec_orc(vector signed long long __a, vector signed long long __b) {
return __a | ~__b;
@@ -4998,6 +5091,11 @@ static vector unsigned long long __ATTRS_o_ai
vec_orc(vector bool long long __a, vector unsigned long long __b) {
return __a | ~__b;
}
+
+static vector bool long long __ATTRS_o_ai
+vec_orc(vector bool long long __a, vector bool long long __b) {
+ return __a | ~__b;
+}
#endif
/* vec_vor */
@@ -9191,17 +9289,27 @@ vec_sub(vector unsigned __int128 __a, vector unsigned __int128 __b) {
}
#endif // defined(__POWER8_VECTOR__) && defined(__powerpc64__)
-static vector float __ATTRS_o_ai vec_sub(vector float __a, vector float __b) {
+#ifdef __VSX__
+static vector signed long long __ATTRS_o_ai
+vec_sub(vector signed long long __a, vector signed long long __b) {
+ return __a - __b;
+}
+
+static vector unsigned long long __ATTRS_o_ai
+vec_sub(vector unsigned long long __a, vector unsigned long long __b) {
return __a - __b;
}
-#ifdef __VSX__
static vector double __ATTRS_o_ai
vec_sub(vector double __a, vector double __b) {
return __a - __b;
}
#endif
+static vector float __ATTRS_o_ai vec_sub(vector float __a, vector float __b) {
+ return __a - __b;
+}
+
/* vec_vsububm */
#define __builtin_altivec_vsububm vec_vsububm
@@ -10390,7 +10498,12 @@ static unsigned char __ATTRS_o_ai vec_extract(vector unsigned char __a,
return __a[__b];
}
-static short __ATTRS_o_ai vec_extract(vector short __a, int __b) {
+static unsigned char __ATTRS_o_ai vec_extract(vector bool char __a,
+ int __b) {
+ return __a[__b];
+}
+
+static signed short __ATTRS_o_ai vec_extract(vector signed short __a, int __b) {
return __a[__b];
}
@@ -10399,7 +10512,12 @@ static unsigned short __ATTRS_o_ai vec_extract(vector unsigned short __a,
return __a[__b];
}
-static int __ATTRS_o_ai vec_extract(vector int __a, int __b) {
+static unsigned short __ATTRS_o_ai vec_extract(vector bool short __a,
+ int __b) {
+ return __a[__b];
+}
+
+static signed int __ATTRS_o_ai vec_extract(vector signed int __a, int __b) {
return __a[__b];
}
@@ -10407,6 +10525,31 @@ static unsigned int __ATTRS_o_ai vec_extract(vector unsigned int __a, int __b) {
return __a[__b];
}
+static unsigned int __ATTRS_o_ai vec_extract(vector bool int __a, int __b) {
+ return __a[__b];
+}
+
+#ifdef __VSX__
+static signed long long __ATTRS_o_ai vec_extract(vector signed long long __a,
+ int __b) {
+ return __a[__b];
+}
+
+static unsigned long long __ATTRS_o_ai
+vec_extract(vector unsigned long long __a, int __b) {
+ return __a[__b];
+}
+
+static unsigned long long __ATTRS_o_ai vec_extract(vector bool long long __a,
+ int __b) {
+ return __a[__b];
+}
+
+static double __ATTRS_o_ai vec_extract(vector double __a, int __b) {
+ return __a[__b];
+}
+#endif
+
static float __ATTRS_o_ai vec_extract(vector float __a, int __b) {
return __a[__b];
}
@@ -10427,8 +10570,16 @@ static vector unsigned char __ATTRS_o_ai vec_insert(unsigned char __a,
return __b;
}
-static vector short __ATTRS_o_ai vec_insert(short __a, vector short __b,
- int __c) {
+static vector bool char __ATTRS_o_ai vec_insert(unsigned char __a,
+ vector bool char __b,
+ int __c) {
+ __b[__c] = __a;
+ return __b;
+}
+
+static vector signed short __ATTRS_o_ai vec_insert(signed short __a,
+ vector signed short __b,
+ int __c) {
__b[__c] = __a;
return __b;
}
@@ -10440,7 +10591,16 @@ static vector unsigned short __ATTRS_o_ai vec_insert(unsigned short __a,
return __b;
}
-static vector int __ATTRS_o_ai vec_insert(int __a, vector int __b, int __c) {
+static vector bool short __ATTRS_o_ai vec_insert(unsigned short __a,
+ vector bool short __b,
+ int __c) {
+ __b[__c] = __a;
+ return __b;
+}
+
+static vector signed int __ATTRS_o_ai vec_insert(signed int __a,
+ vector signed int __b,
+ int __c) {
__b[__c] = __a;
return __b;
}
@@ -10452,6 +10612,38 @@ static vector unsigned int __ATTRS_o_ai vec_insert(unsigned int __a,
return __b;
}
+static vector bool int __ATTRS_o_ai vec_insert(unsigned int __a,
+ vector bool int __b,
+ int __c) {
+ __b[__c] = __a;
+ return __b;
+}
+
+#ifdef __VSX__
+static vector signed long long __ATTRS_o_ai
+vec_insert(signed long long __a, vector signed long long __b, int __c) {
+ __b[__c] = __a;
+ return __b;
+}
+
+static vector unsigned long long __ATTRS_o_ai
+vec_insert(unsigned long long __a, vector unsigned long long __b, int __c) {
+ __b[__c] = __a;
+ return __b;
+}
+
+static vector bool long long __ATTRS_o_ai
+vec_insert(unsigned long long __a, vector bool long long __b, int __c) {
+ __b[__c] = __a;
+ return __b;
+}
+static vector double __ATTRS_o_ai vec_insert(double __a, vector double __b,
+ int __c) {
+ __b[__c] = __a;
+ return __b;
+}
+#endif
+
static vector float __ATTRS_o_ai vec_insert(float __a, vector float __b,
int __c) {
__b[__c] = __a;
@@ -11376,6 +11568,33 @@ static vector unsigned int __ATTRS_o_ai vec_splats(unsigned int __a) {
return (vector unsigned int)(__a);
}
+#ifdef __VSX__
+static vector signed long long __ATTRS_o_ai vec_splats(signed long long __a) {
+ return (vector signed long long)(__a);
+}
+
+static vector unsigned long long __ATTRS_o_ai
+vec_splats(unsigned long long __a) {
+ return (vector unsigned long long)(__a);
+}
+
+#if defined(__POWER8_VECTOR__) && defined(__powerpc64__)
+static vector signed __int128 __ATTRS_o_ai vec_splats(signed __int128 __a) {
+ return (vector signed __int128)(__a);
+}
+
+static vector unsigned __int128 __ATTRS_o_ai
+vec_splats(unsigned __int128 __a) {
+ return (vector unsigned __int128)(__a);
+}
+
+#endif
+
+static vector double __ATTRS_o_ai vec_splats(double __a) {
+ return (vector double)(__a);
+}
+#endif
+
static vector float __ATTRS_o_ai vec_splats(float __a) {
return (vector float)(__a);
}
@@ -11546,8 +11765,18 @@ static int __ATTRS_o_ai vec_all_eq(vector bool long long __a,
#endif
static int __ATTRS_o_ai vec_all_eq(vector float __a, vector float __b) {
+#ifdef __VSX__
+ return __builtin_vsx_xvcmpeqsp_p(__CR6_LT, __a, __b);
+#else
return __builtin_altivec_vcmpeqfp_p(__CR6_LT, __a, __b);
+#endif
+}
+
+#ifdef __VSX__
+static int __ATTRS_o_ai vec_all_eq(vector double __a, vector double __b) {
+ return __builtin_vsx_xvcmpeqdp_p(__CR6_LT, __a, __b);
}
+#endif
/* vec_all_ge */
@@ -11698,8 +11927,18 @@ static int __ATTRS_o_ai vec_all_ge(vector bool long long __a,
#endif
static int __ATTRS_o_ai vec_all_ge(vector float __a, vector float __b) {
+#ifdef __VSX__
+ return __builtin_vsx_xvcmpgesp_p(__CR6_LT, __a, __b);
+#else
return __builtin_altivec_vcmpgefp_p(__CR6_LT, __a, __b);
+#endif
+}
+
+#ifdef __VSX__
+static int __ATTRS_o_ai vec_all_ge(vector double __a, vector double __b) {
+ return __builtin_vsx_xvcmpgedp_p(__CR6_LT, __a, __b);
}
+#endif
/* vec_all_gt */
@@ -11850,8 +12089,18 @@ static int __ATTRS_o_ai vec_all_gt(vector bool long long __a,
#endif
static int __ATTRS_o_ai vec_all_gt(vector float __a, vector float __b) {
+#ifdef __VSX__
+ return __builtin_vsx_xvcmpgtsp_p(__CR6_LT, __a, __b);
+#else
return __builtin_altivec_vcmpgtfp_p(__CR6_LT, __a, __b);
+#endif
+}
+
+#ifdef __VSX__
+static int __ATTRS_o_ai vec_all_gt(vector double __a, vector double __b) {
+ return __builtin_vsx_xvcmpgtdp_p(__CR6_LT, __a, __b);
}
+#endif
/* vec_all_in */
@@ -12010,9 +12259,19 @@ static int __ATTRS_o_ai vec_all_le(vector bool long long __a,
#endif
static int __ATTRS_o_ai vec_all_le(vector float __a, vector float __b) {
+#ifdef __VSX__
+ return __builtin_vsx_xvcmpgesp_p(__CR6_LT, __b, __a);
+#else
return __builtin_altivec_vcmpgefp_p(__CR6_LT, __b, __a);
+#endif
}
+#ifdef __VSX__
+static int __ATTRS_o_ai vec_all_le(vector double __a, vector double __b) {
+ return __builtin_vsx_xvcmpgedp_p(__CR6_LT, __b, __a);
+}
+#endif
+
/* vec_all_lt */
static int __ATTRS_o_ai vec_all_lt(vector signed char __a,
@@ -12163,15 +12422,35 @@ static int __ATTRS_o_ai vec_all_lt(vector bool long long __a,
#endif
static int __ATTRS_o_ai vec_all_lt(vector float __a, vector float __b) {
+#ifdef __VSX__
+ return __builtin_vsx_xvcmpgtsp_p(__CR6_LT, __b, __a);
+#else
return __builtin_altivec_vcmpgtfp_p(__CR6_LT, __b, __a);
+#endif
+}
+
+#ifdef __VSX__
+static int __ATTRS_o_ai vec_all_lt(vector double __a, vector double __b) {
+ return __builtin_vsx_xvcmpgtdp_p(__CR6_LT, __b, __a);
}
+#endif
/* vec_all_nan */
-static int __attribute__((__always_inline__)) vec_all_nan(vector float __a) {
+static int __ATTRS_o_ai vec_all_nan(vector float __a) {
+#ifdef __VSX__
+ return __builtin_vsx_xvcmpeqsp_p(__CR6_EQ, __a, __a);
+#else
return __builtin_altivec_vcmpeqfp_p(__CR6_EQ, __a, __a);
+#endif
}
+#ifdef __VSX__
+static int __ATTRS_o_ai vec_all_nan(vector double __a) {
+ return __builtin_vsx_xvcmpeqdp_p(__CR6_EQ, __a, __a);
+}
+#endif
+
/* vec_all_ne */
static int __ATTRS_o_ai vec_all_ne(vector signed char __a,
@@ -12337,22 +12616,54 @@ static int __ATTRS_o_ai vec_all_ne(vector bool long long __a,
#endif
static int __ATTRS_o_ai vec_all_ne(vector float __a, vector float __b) {
+#ifdef __VSX__
+ return __builtin_vsx_xvcmpeqdp_p(__CR6_EQ, __a, __b);
+#else
return __builtin_altivec_vcmpeqfp_p(__CR6_EQ, __a, __b);
+#endif
}
+#ifdef __VSX__
+static int __ATTRS_o_ai vec_all_ne(vector double __a, vector double __b) {
+ return __builtin_vsx_xvcmpeqdp_p(__CR6_EQ, __a, __b);
+}
+#endif
+
/* vec_all_nge */
-static int __attribute__((__always_inline__))
+static int __ATTRS_o_ai
vec_all_nge(vector float __a, vector float __b) {
+#ifdef __VSX__
+ return __builtin_vsx_xvcmpgesp_p(__CR6_EQ, __a, __b);
+#else
return __builtin_altivec_vcmpgefp_p(__CR6_EQ, __a, __b);
+#endif
+}
+
+#ifdef __VSX__
+static int __ATTRS_o_ai
+vec_all_nge(vector double __a, vector double __b) {
+ return __builtin_vsx_xvcmpgedp_p(__CR6_EQ, __a, __b);
}
+#endif
/* vec_all_ngt */
-static int __attribute__((__always_inline__))
+static int __ATTRS_o_ai
vec_all_ngt(vector float __a, vector float __b) {
+#ifdef __VSX__
+ return __builtin_vsx_xvcmpgtsp_p(__CR6_EQ, __a, __b);
+#else
return __builtin_altivec_vcmpgtfp_p(__CR6_EQ, __a, __b);
+#endif
+}
+
+#ifdef __VSX__
+static int __ATTRS_o_ai
+vec_all_ngt(vector double __a, vector double __b) {
+ return __builtin_vsx_xvcmpgtdp_p(__CR6_EQ, __a, __b);
}
+#endif
/* vec_all_nle */
@@ -12540,8 +12851,18 @@ static int __ATTRS_o_ai vec_any_eq(vector bool long long __a,
#endif
static int __ATTRS_o_ai vec_any_eq(vector float __a, vector float __b) {
+#ifdef __VSX__
+ return __builtin_vsx_xvcmpeqsp_p(__CR6_EQ_REV, __a, __b);
+#else
return __builtin_altivec_vcmpeqfp_p(__CR6_EQ_REV, __a, __b);
+#endif
+}
+
+#ifdef __VSX__
+static int __ATTRS_o_ai vec_any_eq(vector double __a, vector double __b) {
+ return __builtin_vsx_xvcmpeqdp_p(__CR6_EQ_REV, __a, __b);
}
+#endif
/* vec_any_ge */
@@ -12700,9 +13021,19 @@ static int __ATTRS_o_ai vec_any_ge(vector bool long long __a,
#endif
static int __ATTRS_o_ai vec_any_ge(vector float __a, vector float __b) {
+#ifdef __VSX__
+ return __builtin_vsx_xvcmpgesp_p(__CR6_EQ_REV, __a, __b);
+#else
return __builtin_altivec_vcmpgefp_p(__CR6_EQ_REV, __a, __b);
+#endif
}
+#ifdef __VSX__
+static int __ATTRS_o_ai vec_any_ge(vector double __a, vector double __b) {
+ return __builtin_vsx_xvcmpgedp_p(__CR6_EQ_REV, __a, __b);
+}
+#endif
+
/* vec_any_gt */
static int __ATTRS_o_ai vec_any_gt(vector signed char __a,
@@ -12860,8 +13191,18 @@ static int __ATTRS_o_ai vec_any_gt(vector bool long long __a,
#endif
static int __ATTRS_o_ai vec_any_gt(vector float __a, vector float __b) {
+#ifdef __VSX__
+ return __builtin_vsx_xvcmpgtsp_p(__CR6_EQ_REV, __a, __b);
+#else
return __builtin_altivec_vcmpgtfp_p(__CR6_EQ_REV, __a, __b);
+#endif
+}
+
+#ifdef __VSX__
+static int __ATTRS_o_ai vec_any_gt(vector double __a, vector double __b) {
+ return __builtin_vsx_xvcmpgtdp_p(__CR6_EQ_REV, __a, __b);
}
+#endif
/* vec_any_le */
@@ -13020,8 +13361,18 @@ static int __ATTRS_o_ai vec_any_le(vector bool long long __a,
#endif
static int __ATTRS_o_ai vec_any_le(vector float __a, vector float __b) {
+#ifdef __VSX__
+ return __builtin_vsx_xvcmpgesp_p(__CR6_EQ_REV, __b, __a);
+#else
return __builtin_altivec_vcmpgefp_p(__CR6_EQ_REV, __b, __a);
+#endif
+}
+
+#ifdef __VSX__
+static int __ATTRS_o_ai vec_any_le(vector double __a, vector double __b) {
+ return __builtin_vsx_xvcmpgedp_p(__CR6_EQ_REV, __b, __a);
}
+#endif
/* vec_any_lt */
@@ -13180,8 +13531,18 @@ static int __ATTRS_o_ai vec_any_lt(vector bool long long __a,
#endif
static int __ATTRS_o_ai vec_any_lt(vector float __a, vector float __b) {
+#ifdef __VSX__
+ return __builtin_vsx_xvcmpgtsp_p(__CR6_EQ_REV, __b, __a);
+#else
return __builtin_altivec_vcmpgtfp_p(__CR6_EQ_REV, __b, __a);
+#endif
+}
+
+#ifdef __VSX__
+static int __ATTRS_o_ai vec_any_lt(vector double __a, vector double __b) {
+ return __builtin_vsx_xvcmpgtdp_p(__CR6_EQ_REV, __b, __a);
}
+#endif
/* vec_any_nan */
@@ -13354,9 +13715,19 @@ static int __ATTRS_o_ai vec_any_ne(vector bool long long __a,
#endif
static int __ATTRS_o_ai vec_any_ne(vector float __a, vector float __b) {
+#ifdef __VSX__
+ return __builtin_vsx_xvcmpeqsp_p(__CR6_LT_REV, __a, __b);
+#else
return __builtin_altivec_vcmpeqfp_p(__CR6_LT_REV, __a, __b);
+#endif
}
+#ifdef __VSX__
+static int __ATTRS_o_ai vec_any_ne(vector double __a, vector double __b) {
+ return __builtin_vsx_xvcmpeqdp_p(__CR6_LT_REV, __a, __b);
+}
+#endif
+
/* vec_any_nge */
static int __attribute__((__always_inline__))
@@ -13411,11 +13782,14 @@ support). As a result, we need to be able to turn off support for those.
The remaining ones (currently controlled by -mcrypto for GCC) still
need to be provided on compliant hardware even if Vector.Crypto is not
provided.
-FIXME: the naming convention for the builtins will be adjusted due
-to the inconsistency (__builtin_crypto_ prefix on builtins that cannot be
-removed with -mno-crypto). This is under development.
*/
#ifdef __CRYPTO__
+#define vec_sbox_be __builtin_altivec_crypto_vsbox
+#define vec_cipher_be __builtin_altivec_crypto_vcipher
+#define vec_cipherlast_be __builtin_altivec_crypto_vcipherlast
+#define vec_ncipher_be __builtin_altivec_crypto_vncipher
+#define vec_ncipherlast_be __builtin_altivec_crypto_vncipherlast
+
static vector unsigned long long __attribute__((__always_inline__))
__builtin_crypto_vsbox(vector unsigned long long __a) {
return __builtin_altivec_crypto_vsbox(__a);
@@ -13447,6 +13821,11 @@ __builtin_crypto_vncipherlast(vector unsigned long long __a,
#define __builtin_crypto_vshasigmad __builtin_altivec_crypto_vshasigmad
#define __builtin_crypto_vshasigmaw __builtin_altivec_crypto_vshasigmaw
+
+#define vec_shasigma_be(X, Y, Z) \
+ _Generic((X), vector unsigned int: __builtin_crypto_vshasigmaw, \
+ vector unsigned long long: __builtin_crypto_vshasigmad) \
+((X), (Y), (Z))
#endif
#ifdef __POWER8_VECTOR__
@@ -13494,8 +13873,9 @@ __builtin_crypto_vpmsumb(vector unsigned int __a, vector unsigned int __b) {
return __builtin_altivec_crypto_vpmsumw(__a, __b);
}
-static vector unsigned long long __ATTRS_o_ai __builtin_crypto_vpmsumb(
- vector unsigned long long __a, vector unsigned long long __b) {
+static vector unsigned long long __ATTRS_o_ai
+__builtin_crypto_vpmsumb(vector unsigned long long __a,
+ vector unsigned long long __b) {
return __builtin_altivec_crypto_vpmsumd(__a, __b);
}
@@ -13504,6 +13884,9 @@ static vector signed char __ATTRS_o_ai vec_vgbbd (vector signed char __a)
return __builtin_altivec_vgbbd((vector unsigned char) __a);
}
+#define vec_pmsum_be __builtin_crypto_vpmsumb
+#define vec_gb __builtin_altivec_vgbbd
+
static vector unsigned char __ATTRS_o_ai vec_vgbbd (vector unsigned char __a)
{
return __builtin_altivec_vgbbd(__a);
@@ -13521,6 +13904,14 @@ vec_vbpermq (vector unsigned char __a, vector unsigned char __b)
{
return __builtin_altivec_vbpermq(__a, __b);
}
+
+#ifdef __powerpc64__
+static vector unsigned long long __attribute__((__always_inline__))
+vec_bperm (vector unsigned __int128 __a, vector unsigned char __b) {
+ return __builtin_altivec_vbpermq((vector unsigned char) __a,
+ (vector unsigned char) __b);
+}
+#endif
#endif
#undef __ATTRS_o_ai
diff --git a/contrib/llvm/tools/clang/lib/Headers/ammintrin.h b/contrib/llvm/tools/clang/lib/Headers/ammintrin.h
index 4d0e770ff9e4..4880fd7ebad1 100644
--- a/contrib/llvm/tools/clang/lib/Headers/ammintrin.h
+++ b/contrib/llvm/tools/clang/lib/Headers/ammintrin.h
@@ -24,27 +24,23 @@
#ifndef __AMMINTRIN_H
#define __AMMINTRIN_H
-#ifndef __SSE4A__
-#error "SSE4A instruction set not enabled"
-#else
-
#include <pmmintrin.h>
/* Define the default attributes for the functions in this file. */
-#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__))
+#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("sse4a")))
/// \brief Extracts the specified bits from the lower 64 bits of the 128-bit
/// integer vector operand at the index idx and of the length len.
///
/// \headerfile <x86intrin.h>
///
-/// \code
+/// \code
/// __m128i _mm_extracti_si64(__m128i x, const int len, const int idx);
-/// \endcode
+/// \endcode
///
-/// \code
+/// \code
/// This intrinsic corresponds to the \c EXTRQ instruction.
-/// \endcode
+/// \endcode
///
/// \param x
/// The value from which bits are extracted.
@@ -52,10 +48,10 @@
/// Bits [5:0] specify the length; the other bits are ignored. If bits [5:0]
/// are zero, the length is interpreted as 64.
/// \param idx
-/// Bits [5:0] specify the index of the least significant bit; the other
-/// bits are ignored. If the sum of the index and length is greater than
-/// 64, the result is undefined. If the length and index are both zero,
-/// bits [63:0] of parameter x are extracted. If the length is zero
+/// Bits [5:0] specify the index of the least significant bit; the other
+/// bits are ignored. If the sum of the index and length is greater than
+/// 64, the result is undefined. If the length and index are both zero,
+/// bits [63:0] of parameter x are extracted. If the length is zero
/// but the index is non-zero, the result is undefined.
/// \returns A 128-bit integer vector whose lower 64 bits contain the bits
/// extracted from the source operand.
@@ -68,21 +64,21 @@
///
/// \headerfile <x86intrin.h>
///
-/// \code
+/// \code
/// This intrinsic corresponds to the \c EXTRQ instruction.
-/// \endcode
+/// \endcode
///
/// \param __x
/// The value from which bits are extracted.
/// \param __y
-/// Specifies the index of the least significant bit at [13:8]
-/// and the length at [5:0]; all other bits are ignored.
+/// Specifies the index of the least significant bit at [13:8]
+/// and the length at [5:0]; all other bits are ignored.
/// If bits [5:0] are zero, the length is interpreted as 64.
-/// If the sum of the index and length is greater than 64, the result is
-/// undefined. If the length and index are both zero, bits [63:0] of
-/// parameter __x are extracted. If the length is zero but the index is
-/// non-zero, the result is undefined.
-/// \returns A 128-bit vector whose lower 64 bits contain the bits extracted
+/// If the sum of the index and length is greater than 64, the result is
+/// undefined. If the length and index are both zero, bits [63:0] of
+/// parameter __x are extracted. If the length is zero but the index is
+/// non-zero, the result is undefined.
+/// \returns A 128-bit vector whose lower 64 bits contain the bits extracted
/// from the source operand.
static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_extract_si64(__m128i __x, __m128i __y)
@@ -90,40 +86,40 @@ _mm_extract_si64(__m128i __x, __m128i __y)
return (__m128i)__builtin_ia32_extrq((__v2di)__x, (__v16qi)__y);
}
-/// \brief Inserts bits of a specified length from the source integer vector
-/// y into the lower 64 bits of the destination integer vector x at the
+/// \brief Inserts bits of a specified length from the source integer vector
+/// y into the lower 64 bits of the destination integer vector x at the
/// index idx and of the length len.
///
/// \headerfile <x86intrin.h>
///
-/// \code
+/// \code
/// __m128i _mm_inserti_si64(__m128i x, __m128i y, const int len,
/// const int idx);
-/// \endcode
+/// \endcode
///
-/// \code
+/// \code
/// This intrinsic corresponds to the \c INSERTQ instruction.
-/// \endcode
+/// \endcode
///
/// \param x
-/// The destination operand where bits will be inserted. The inserted bits
-/// are defined by the length len and by the index idx specifying the least
+/// The destination operand where bits will be inserted. The inserted bits
+/// are defined by the length len and by the index idx specifying the least
/// significant bit.
/// \param y
-/// The source operand containing the bits to be extracted. The extracted
+/// The source operand containing the bits to be extracted. The extracted
/// bits are the least significant bits of operand y of length len.
/// \param len
/// Bits [5:0] specify the length; the other bits are ignored. If bits [5:0]
/// are zero, the length is interpreted as 64.
/// \param idx
-/// Bits [5:0] specify the index of the least significant bit; the other
-/// bits are ignored. If the sum of the index and length is greater than
-/// 64, the result is undefined. If the length and index are both zero,
-/// bits [63:0] of parameter y are inserted into parameter x. If the
+/// Bits [5:0] specify the index of the least significant bit; the other
+/// bits are ignored. If the sum of the index and length is greater than
+/// 64, the result is undefined. If the length and index are both zero,
+/// bits [63:0] of parameter y are inserted into parameter x. If the
/// length is zero but the index is non-zero, the result is undefined.
-/// \returns A 128-bit integer vector containing the original lower 64-bits
+/// \returns A 128-bit integer vector containing the original lower 64-bits
/// of destination operand x with the specified bitfields replaced by the
-/// lower bits of source operand y. The upper 64 bits of the return value
+/// lower bits of source operand y. The upper 64 bits of the return value
/// are undefined.
#define _mm_inserti_si64(x, y, len, idx) \
@@ -131,33 +127,33 @@ _mm_extract_si64(__m128i __x, __m128i __y)
(__v2di)(__m128i)(y), \
(char)(len), (char)(idx)))
-/// \brief Inserts bits of a specified length from the source integer vector
-/// __y into the lower 64 bits of the destination integer vector __x at
+/// \brief Inserts bits of a specified length from the source integer vector
+/// __y into the lower 64 bits of the destination integer vector __x at
/// the index and of the length specified by __y.
///
/// \headerfile <x86intrin.h>
///
-/// \code
+/// \code
/// This intrinsic corresponds to the \c INSERTQ instruction.
-/// \endcode
+/// \endcode
///
/// \param __x
-/// The destination operand where bits will be inserted. The inserted bits
-/// are defined by the length and by the index of the least significant bit
+/// The destination operand where bits will be inserted. The inserted bits
+/// are defined by the length and by the index of the least significant bit
/// specified by operand __y.
/// \param __y
-/// The source operand containing the bits to be extracted. The extracted
+/// The source operand containing the bits to be extracted. The extracted
/// bits are the least significant bits of operand __y with length specified
-/// by bits [69:64]. These are inserted into the destination at the index
+/// by bits [69:64]. These are inserted into the destination at the index
/// specified by bits [77:72]; all other bits are ignored.
/// If bits [69:64] are zero, the length is interpreted as 64.
-/// If the sum of the index and length is greater than 64, the result is
-/// undefined. If the length and index are both zero, bits [63:0] of
+/// If the sum of the index and length is greater than 64, the result is
+/// undefined. If the length and index are both zero, bits [63:0] of
/// parameter __y are inserted into parameter __x. If the length
-/// is zero but the index is non-zero, the result is undefined.
-/// \returns A 128-bit integer vector containing the original lower 64-bits
+/// is zero but the index is non-zero, the result is undefined.
+/// \returns A 128-bit integer vector containing the original lower 64-bits
/// of destination operand __x with the specified bitfields replaced by the
-/// lower bits of source operand __y. The upper 64 bits of the return value
+/// lower bits of source operand __y. The upper 64 bits of the return value
/// are undefined.
static __inline__ __m128i __DEFAULT_FN_ATTRS
@@ -166,15 +162,15 @@ _mm_insert_si64(__m128i __x, __m128i __y)
return (__m128i)__builtin_ia32_insertq((__v2di)__x, (__v2di)__y);
}
-/// \brief Stores a 64-bit double-precision value in a 64-bit memory location.
+/// \brief Stores a 64-bit double-precision value in a 64-bit memory location.
/// To minimize caching, the data is flagged as non-temporal (unlikely to be
/// used again soon).
///
/// \headerfile <x86intrin.h>
///
-/// \code
+/// \code
/// This intrinsic corresponds to the \c MOVNTSD instruction.
-/// \endcode
+/// \endcode
///
/// \param __p
/// The 64-bit memory location used to store the register value.
@@ -193,9 +189,9 @@ _mm_stream_sd(double *__p, __m128d __a)
///
/// \headerfile <x86intrin.h>
///
-/// \code
+/// \code
/// This intrinsic corresponds to the \c MOVNTSS instruction.
-/// \endcode
+/// \endcode
///
/// \param __p
/// The 32-bit memory location used to store the register value.
@@ -210,6 +206,4 @@ _mm_stream_ss(float *__p, __m128 __a)
#undef __DEFAULT_FN_ATTRS
-#endif /* __SSE4A__ */
-
#endif /* __AMMINTRIN_H */
diff --git a/contrib/llvm/tools/clang/lib/Headers/arm_acle.h b/contrib/llvm/tools/clang/lib/Headers/arm_acle.h
index 73a7e76ce3c4..4be1d097dc5e 100644
--- a/contrib/llvm/tools/clang/lib/Headers/arm_acle.h
+++ b/contrib/llvm/tools/clang/lib/Headers/arm_acle.h
@@ -175,14 +175,18 @@ static __inline__ uint32_t __attribute__((__always_inline__, __nodebug__))
return __ror(__rev(t), 16);
}
-static __inline__ unsigned long __attribute__((__always_inline__, __nodebug__))
- __rev16l(unsigned long t) {
- return __rorl(__revl(t), sizeof(long) / 2);
-}
-
static __inline__ uint64_t __attribute__((__always_inline__, __nodebug__))
__rev16ll(uint64_t t) {
- return __rorll(__revll(t), 32);
+ return (((uint64_t)__rev16(t >> 32)) << 32) | __rev16(t);
+}
+
+static __inline__ unsigned long __attribute__((__always_inline__, __nodebug__))
+ __rev16l(unsigned long t) {
+#if __SIZEOF_LONG__ == 4
+ return __rev16(t);
+#else
+ return __rev16ll(t);
+#endif
}
/* REVSH */
diff --git a/contrib/llvm/tools/clang/lib/Headers/avx2intrin.h b/contrib/llvm/tools/clang/lib/Headers/avx2intrin.h
index d8b6b0aa4d23..f786572dae7d 100644
--- a/contrib/llvm/tools/clang/lib/Headers/avx2intrin.h
+++ b/contrib/llvm/tools/clang/lib/Headers/avx2intrin.h
@@ -29,7 +29,7 @@
#define __AVX2INTRIN_H
/* Define the default attributes for the functions in this file. */
-#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__))
+#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("avx2")))
/* SSE4 Multiple Packed Sums of Absolute Difference. */
#define _mm256_mpsadbw_epu8(X, Y, M) __builtin_ia32_mpsadbw256((X), (Y), (M))
@@ -124,10 +124,9 @@ _mm256_adds_epu16(__m256i __a, __m256i __b)
return (__m256i)__builtin_ia32_paddusw256((__v16hi)__a, (__v16hi)__b);
}
-#define _mm256_alignr_epi8(a, b, n) __extension__ ({ \
- __m256i __a = (a); \
- __m256i __b = (b); \
- (__m256i)__builtin_ia32_palignr256((__v32qi)__a, (__v32qi)__b, (n)); })
+#define _mm256_alignr_epi8(a, b, n) __extension__ ({ \
+ (__m256i)__builtin_ia32_palignr256((__v32qi)(__m256i)(a), \
+ (__v32qi)(__m256i)(b), (n)); })
static __inline__ __m256i __DEFAULT_FN_ATTRS
_mm256_and_si256(__m256i __a, __m256i __b)
@@ -160,20 +159,19 @@ _mm256_blendv_epi8(__m256i __V1, __m256i __V2, __m256i __M)
(__v32qi)__M);
}
-#define _mm256_blend_epi16(V1, V2, M) __extension__ ({ \
- __m256i __V1 = (V1); \
- __m256i __V2 = (V2); \
- (__m256i)__builtin_shufflevector((__v16hi)__V1, (__v16hi)__V2, \
- (((M) & 0x01) ? 16 : 0), \
- (((M) & 0x02) ? 17 : 1), \
- (((M) & 0x04) ? 18 : 2), \
- (((M) & 0x08) ? 19 : 3), \
- (((M) & 0x10) ? 20 : 4), \
- (((M) & 0x20) ? 21 : 5), \
- (((M) & 0x40) ? 22 : 6), \
- (((M) & 0x80) ? 23 : 7), \
- (((M) & 0x01) ? 24 : 8), \
- (((M) & 0x02) ? 25 : 9), \
+#define _mm256_blend_epi16(V1, V2, M) __extension__ ({ \
+ (__m256i)__builtin_shufflevector((__v16hi)(__m256i)(V1), \
+ (__v16hi)(__m256i)(V2), \
+ (((M) & 0x01) ? 16 : 0), \
+ (((M) & 0x02) ? 17 : 1), \
+ (((M) & 0x04) ? 18 : 2), \
+ (((M) & 0x08) ? 19 : 3), \
+ (((M) & 0x10) ? 20 : 4), \
+ (((M) & 0x20) ? 21 : 5), \
+ (((M) & 0x40) ? 22 : 6), \
+ (((M) & 0x80) ? 23 : 7), \
+ (((M) & 0x01) ? 24 : 8), \
+ (((M) & 0x02) ? 25 : 9), \
(((M) & 0x04) ? 26 : 10), \
(((M) & 0x08) ? 27 : 11), \
(((M) & 0x10) ? 28 : 12), \
@@ -208,7 +206,9 @@ _mm256_cmpeq_epi64(__m256i __a, __m256i __b)
static __inline__ __m256i __DEFAULT_FN_ATTRS
_mm256_cmpgt_epi8(__m256i __a, __m256i __b)
{
- return (__m256i)((__v32qi)__a > (__v32qi)__b);
+ /* This function always performs a signed comparison, but __v32qi is a char
+ which may be signed or unsigned, so use __v32qs. */
+ return (__m256i)((__v32qs)__a > (__v32qs)__b);
}
static __inline__ __m256i __DEFAULT_FN_ATTRS
@@ -488,8 +488,8 @@ _mm256_shuffle_epi8(__m256i __a, __m256i __b)
}
#define _mm256_shuffle_epi32(a, imm) __extension__ ({ \
- __m256i __a = (a); \
- (__m256i)__builtin_shufflevector((__v8si)__a, (__v8si)_mm256_set1_epi32(0), \
+ (__m256i)__builtin_shufflevector((__v8si)(__m256i)(a), \
+ (__v8si)_mm256_setzero_si256(), \
(imm) & 0x3, ((imm) & 0xc) >> 2, \
((imm) & 0x30) >> 4, ((imm) & 0xc0) >> 6, \
4 + (((imm) & 0x03) >> 0), \
@@ -498,8 +498,8 @@ _mm256_shuffle_epi8(__m256i __a, __m256i __b)
4 + (((imm) & 0xc0) >> 6)); })
#define _mm256_shufflehi_epi16(a, imm) __extension__ ({ \
- __m256i __a = (a); \
- (__m256i)__builtin_shufflevector((__v16hi)__a, (__v16hi)_mm256_set1_epi16(0), \
+ (__m256i)__builtin_shufflevector((__v16hi)(__m256i)(a), \
+ (__v16hi)_mm256_setzero_si256(), \
0, 1, 2, 3, \
4 + (((imm) & 0x03) >> 0), \
4 + (((imm) & 0x0c) >> 2), \
@@ -512,8 +512,8 @@ _mm256_shuffle_epi8(__m256i __a, __m256i __b)
12 + (((imm) & 0xc0) >> 6)); })
#define _mm256_shufflelo_epi16(a, imm) __extension__ ({ \
- __m256i __a = (a); \
- (__m256i)__builtin_shufflevector((__v16hi)__a, (__v16hi)_mm256_set1_epi16(0), \
+ (__m256i)__builtin_shufflevector((__v16hi)(__m256i)(a), \
+ (__v16hi)_mm256_setzero_si256(), \
(imm) & 0x3,((imm) & 0xc) >> 2, \
((imm) & 0x30) >> 4, ((imm) & 0xc0) >> 6, \
4, 5, 6, 7, \
@@ -542,8 +542,7 @@ _mm256_sign_epi32(__m256i __a, __m256i __b)
}
#define _mm256_slli_si256(a, count) __extension__ ({ \
- __m256i __a = (a); \
- (__m256i)__builtin_ia32_pslldqi256(__a, (count)*8); })
+ (__m256i)__builtin_ia32_pslldqi256((__m256i)(a), (count)*8); })
#define _mm256_bslli_epi128(a, count) _mm256_slli_si256((a), (count))
@@ -608,8 +607,7 @@ _mm256_sra_epi32(__m256i __a, __m128i __count)
}
#define _mm256_srli_si256(a, count) __extension__ ({ \
- __m256i __a = (a); \
- (__m256i)__builtin_ia32_psrldqi256(__a, (count)*8); })
+ (__m256i)__builtin_ia32_psrldqi256((__m256i)(a), (count)*8); })
#define _mm256_bsrli_epi128(a, count) _mm256_srli_si256((a), (count))
@@ -752,15 +750,15 @@ _mm256_xor_si256(__m256i __a, __m256i __b)
}
static __inline__ __m256i __DEFAULT_FN_ATTRS
-_mm256_stream_load_si256(__m256i *__V)
+_mm256_stream_load_si256(__m256i const *__V)
{
- return (__m256i)__builtin_ia32_movntdqa256((__v4di *)__V);
+ return (__m256i)__builtin_ia32_movntdqa256((const __v4di *)__V);
}
static __inline__ __m128 __DEFAULT_FN_ATTRS
_mm_broadcastss_ps(__m128 __X)
{
- return (__m128)__builtin_ia32_vbroadcastss_ps((__v4sf)__X);
+ return (__m128)__builtin_shufflevector((__v4sf)__X, (__v4sf)__X, 0, 0, 0, 0);
}
static __inline__ __m128d __DEFAULT_FN_ATTRS
@@ -772,13 +770,13 @@ _mm_broadcastsd_pd(__m128d __a)
static __inline__ __m256 __DEFAULT_FN_ATTRS
_mm256_broadcastss_ps(__m128 __X)
{
- return (__m256)__builtin_ia32_vbroadcastss_ps256((__v4sf)__X);
+ return (__m256)__builtin_shufflevector((__v4sf)__X, (__v4sf)__X, 0, 0, 0, 0, 0, 0, 0, 0);
}
static __inline__ __m256d __DEFAULT_FN_ATTRS
_mm256_broadcastsd_pd(__m128d __X)
{
- return (__m256d)__builtin_ia32_vbroadcastsd_pd256((__v2df)__X);
+ return (__m256d)__builtin_shufflevector((__v2df)__X, (__v2df)__X, 0, 0, 0, 0);
}
static __inline__ __m256i __DEFAULT_FN_ATTRS
@@ -788,18 +786,16 @@ _mm256_broadcastsi128_si256(__m128i __X)
}
#define _mm_blend_epi32(V1, V2, M) __extension__ ({ \
- __m128i __V1 = (V1); \
- __m128i __V2 = (V2); \
- (__m128i)__builtin_shufflevector((__v4si)__V1, (__v4si)__V2, \
+ (__m128i)__builtin_shufflevector((__v4si)(__m128i)(V1), \
+ (__v4si)(__m128i)(V2), \
(((M) & 0x01) ? 4 : 0), \
(((M) & 0x02) ? 5 : 1), \
(((M) & 0x04) ? 6 : 2), \
(((M) & 0x08) ? 7 : 3)); })
#define _mm256_blend_epi32(V1, V2, M) __extension__ ({ \
- __m256i __V1 = (V1); \
- __m256i __V2 = (V2); \
- (__m256i)__builtin_shufflevector((__v8si)__V1, (__v8si)__V2, \
+ (__m256i)__builtin_shufflevector((__v8si)(__m256i)(V1), \
+ (__v8si)(__m256i)(V2), \
(((M) & 0x01) ? 8 : 0), \
(((M) & 0x02) ? 9 : 1), \
(((M) & 0x04) ? 10 : 2), \
@@ -812,50 +808,50 @@ _mm256_broadcastsi128_si256(__m128i __X)
static __inline__ __m256i __DEFAULT_FN_ATTRS
_mm256_broadcastb_epi8(__m128i __X)
{
- return (__m256i)__builtin_ia32_pbroadcastb256((__v16qi)__X);
+ return (__m256i)__builtin_shufflevector((__v16qi)__X, (__v16qi)__X, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
}
static __inline__ __m256i __DEFAULT_FN_ATTRS
_mm256_broadcastw_epi16(__m128i __X)
{
- return (__m256i)__builtin_ia32_pbroadcastw256((__v8hi)__X);
+ return (__m256i)__builtin_shufflevector((__v8hi)__X, (__v8hi)__X, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
}
static __inline__ __m256i __DEFAULT_FN_ATTRS
_mm256_broadcastd_epi32(__m128i __X)
{
- return (__m256i)__builtin_ia32_pbroadcastd256((__v4si)__X);
+ return (__m256i)__builtin_shufflevector((__v4si)__X, (__v4si)__X, 0, 0, 0, 0, 0, 0, 0, 0);
}
static __inline__ __m256i __DEFAULT_FN_ATTRS
_mm256_broadcastq_epi64(__m128i __X)
{
- return (__m256i)__builtin_ia32_pbroadcastq256(__X);
+ return (__m256i)__builtin_shufflevector(__X, __X, 0, 0, 0, 0);
}
static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_broadcastb_epi8(__m128i __X)
{
- return (__m128i)__builtin_ia32_pbroadcastb128((__v16qi)__X);
+ return (__m128i)__builtin_shufflevector((__v16qi)__X, (__v16qi)__X, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
}
static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_broadcastw_epi16(__m128i __X)
{
- return (__m128i)__builtin_ia32_pbroadcastw128((__v8hi)__X);
+ return (__m128i)__builtin_shufflevector((__v8hi)__X, (__v8hi)__X, 0, 0, 0, 0, 0, 0, 0, 0);
}
static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_broadcastd_epi32(__m128i __X)
{
- return (__m128i)__builtin_ia32_pbroadcastd128((__v4si)__X);
+ return (__m128i)__builtin_shufflevector((__v4si)__X, (__v4si)__X, 0, 0, 0, 0);
}
static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_broadcastq_epi64(__m128i __X)
{
- return (__m128i)__builtin_ia32_pbroadcastq128(__X);
+ return (__m128i)__builtin_shufflevector(__X, __X, 0, 0);
}
static __inline__ __m256i __DEFAULT_FN_ATTRS
@@ -865,43 +861,39 @@ _mm256_permutevar8x32_epi32(__m256i __a, __m256i __b)
}
#define _mm256_permute4x64_pd(V, M) __extension__ ({ \
- __m256d __V = (V); \
- (__m256d)__builtin_shufflevector((__v4df)__V, (__v4df) _mm256_setzero_pd(), \
+ (__m256d)__builtin_shufflevector((__v4df)(__m256d)(V), \
+ (__v4df)_mm256_setzero_pd(), \
(M) & 0x3, ((M) & 0xc) >> 2, \
((M) & 0x30) >> 4, ((M) & 0xc0) >> 6); })
static __inline__ __m256 __DEFAULT_FN_ATTRS
-_mm256_permutevar8x32_ps(__m256 __a, __m256 __b)
+_mm256_permutevar8x32_ps(__m256 __a, __m256i __b)
{
- return (__m256)__builtin_ia32_permvarsf256((__v8sf)__a, (__v8sf)__b);
+ return (__m256)__builtin_ia32_permvarsf256((__v8sf)__a, (__v8si)__b);
}
#define _mm256_permute4x64_epi64(V, M) __extension__ ({ \
- __m256i __V = (V); \
- (__m256i)__builtin_shufflevector((__v4di)__V, (__v4di) _mm256_setzero_si256(), \
+ (__m256i)__builtin_shufflevector((__v4di)(__m256i)(V), \
+ (__v4di)_mm256_setzero_si256(), \
(M) & 0x3, ((M) & 0xc) >> 2, \
((M) & 0x30) >> 4, ((M) & 0xc0) >> 6); })
#define _mm256_permute2x128_si256(V1, V2, M) __extension__ ({ \
- __m256i __V1 = (V1); \
- __m256i __V2 = (V2); \
- (__m256i)__builtin_ia32_permti256(__V1, __V2, (M)); })
+ (__m256i)__builtin_ia32_permti256((__m256i)(V1), (__m256i)(V2), (M)); })
#define _mm256_extracti128_si256(V, M) __extension__ ({ \
- (__m128i)__builtin_shufflevector( \
- (__v4di)(V), \
- (__v4di)(_mm256_setzero_si256()), \
- (((M) & 1) ? 2 : 0), \
- (((M) & 1) ? 3 : 1) );})
+ (__m128i)__builtin_shufflevector((__v4di)(__m256i)(V), \
+ (__v4di)_mm256_setzero_si256(), \
+ (((M) & 1) ? 2 : 0), \
+ (((M) & 1) ? 3 : 1) ); })
#define _mm256_inserti128_si256(V1, V2, M) __extension__ ({ \
- (__m256i)__builtin_shufflevector( \
- (__v4di)(V1), \
- (__v4di)_mm256_castsi128_si256((__m128i)(V2)), \
- (((M) & 1) ? 0 : 4), \
- (((M) & 1) ? 1 : 5), \
- (((M) & 1) ? 4 : 2), \
- (((M) & 1) ? 5 : 3) );})
+ (__m256i)__builtin_shufflevector((__v4di)(__m256i)(V1), \
+ (__v4di)_mm256_castsi128_si256((__m128i)(V2)), \
+ (((M) & 1) ? 0 : 4), \
+ (((M) & 1) ? 1 : 5), \
+ (((M) & 1) ? 4 : 2), \
+ (((M) & 1) ? 5 : 3) ); })
static __inline__ __m256i __DEFAULT_FN_ATTRS
_mm256_maskload_epi32(int const *__X, __m256i __M)
@@ -1012,244 +1004,211 @@ _mm_srlv_epi64(__m128i __X, __m128i __Y)
}
#define _mm_mask_i32gather_pd(a, m, i, mask, s) __extension__ ({ \
- __m128d __a = (a); \
- double const *__m = (m); \
- __m128i __i = (i); \
- __m128d __mask = (mask); \
- (__m128d)__builtin_ia32_gatherd_pd((__v2df)__a, (const __v2df *)__m, \
- (__v4si)__i, (__v2df)__mask, (s)); })
+ (__m128d)__builtin_ia32_gatherd_pd((__v2df)(__m128i)(a), \
+ (double const *)(m), \
+ (__v4si)(__m128i)(i), \
+ (__v2df)(__m128d)(mask), (s)); })
#define _mm256_mask_i32gather_pd(a, m, i, mask, s) __extension__ ({ \
- __m256d __a = (a); \
- double const *__m = (m); \
- __m128i __i = (i); \
- __m256d __mask = (mask); \
- (__m256d)__builtin_ia32_gatherd_pd256((__v4df)__a, (const __v4df *)__m, \
- (__v4si)__i, (__v4df)__mask, (s)); })
+ (__m256d)__builtin_ia32_gatherd_pd256((__v4df)(__m256d)(a), \
+ (double const *)(m), \
+ (__v4si)(__m128i)(i), \
+ (__v4df)(__m256d)(mask), (s)); })
#define _mm_mask_i64gather_pd(a, m, i, mask, s) __extension__ ({ \
- __m128d __a = (a); \
- double const *__m = (m); \
- __m128i __i = (i); \
- __m128d __mask = (mask); \
- (__m128d)__builtin_ia32_gatherq_pd((__v2df)__a, (const __v2df *)__m, \
- (__v2di)__i, (__v2df)__mask, (s)); })
+ (__m128d)__builtin_ia32_gatherq_pd((__v2df)(__m128d)(a), \
+ (double const *)(m), \
+ (__v2di)(__m128i)(i), \
+ (__v2df)(__m128d)(mask), (s)); })
#define _mm256_mask_i64gather_pd(a, m, i, mask, s) __extension__ ({ \
- __m256d __a = (a); \
- double const *__m = (m); \
- __m256i __i = (i); \
- __m256d __mask = (mask); \
- (__m256d)__builtin_ia32_gatherq_pd256((__v4df)__a, (const __v4df *)__m, \
- (__v4di)__i, (__v4df)__mask, (s)); })
+ (__m256d)__builtin_ia32_gatherq_pd256((__v4df)(__m256d)(a), \
+ (double const *)(m), \
+ (__v4di)(__m256i)(i), \
+ (__v4df)(__m256d)(mask), (s)); })
#define _mm_mask_i32gather_ps(a, m, i, mask, s) __extension__ ({ \
- __m128 __a = (a); \
- float const *__m = (m); \
- __m128i __i = (i); \
- __m128 __mask = (mask); \
- (__m128)__builtin_ia32_gatherd_ps((__v4sf)__a, (const __v4sf *)__m, \
- (__v4si)__i, (__v4sf)__mask, (s)); })
+ (__m128)__builtin_ia32_gatherd_ps((__v4sf)(__m128)(a), \
+ (float const *)(m), \
+ (__v4si)(__m128i)(i), \
+ (__v4sf)(__m128)(mask), (s)); })
#define _mm256_mask_i32gather_ps(a, m, i, mask, s) __extension__ ({ \
- __m256 __a = (a); \
- float const *__m = (m); \
- __m256i __i = (i); \
- __m256 __mask = (mask); \
- (__m256)__builtin_ia32_gatherd_ps256((__v8sf)__a, (const __v8sf *)__m, \
- (__v8si)__i, (__v8sf)__mask, (s)); })
+ (__m256)__builtin_ia32_gatherd_ps256((__v8sf)(__m256)(a), \
+ (float const *)(m), \
+ (__v8si)(__m256i)(i), \
+ (__v8sf)(__m256)(mask), (s)); })
#define _mm_mask_i64gather_ps(a, m, i, mask, s) __extension__ ({ \
- __m128 __a = (a); \
- float const *__m = (m); \
- __m128i __i = (i); \
- __m128 __mask = (mask); \
- (__m128)__builtin_ia32_gatherq_ps((__v4sf)__a, (const __v4sf *)__m, \
- (__v2di)__i, (__v4sf)__mask, (s)); })
+ (__m128)__builtin_ia32_gatherq_ps((__v4sf)(__m128)(a), \
+ (float const *)(m), \
+ (__v2di)(__m128i)(i), \
+ (__v4sf)(__m128)(mask), (s)); })
#define _mm256_mask_i64gather_ps(a, m, i, mask, s) __extension__ ({ \
- __m128 __a = (a); \
- float const *__m = (m); \
- __m256i __i = (i); \
- __m128 __mask = (mask); \
- (__m128)__builtin_ia32_gatherq_ps256((__v4sf)__a, (const __v4sf *)__m, \
- (__v4di)__i, (__v4sf)__mask, (s)); })
+ (__m128)__builtin_ia32_gatherq_ps256((__v4sf)(__m128)(a), \
+ (float const *)(m), \
+ (__v4di)(__m256i)(i), \
+ (__v4sf)(__m128)(mask), (s)); })
#define _mm_mask_i32gather_epi32(a, m, i, mask, s) __extension__ ({ \
- __m128i __a = (a); \
- int const *__m = (m); \
- __m128i __i = (i); \
- __m128i __mask = (mask); \
- (__m128i)__builtin_ia32_gatherd_d((__v4si)__a, (const __v4si *)__m, \
- (__v4si)__i, (__v4si)__mask, (s)); })
+ (__m128i)__builtin_ia32_gatherd_d((__v4si)(__m128i)(a), \
+ (int const *)(m), \
+ (__v4si)(__m128i)(i), \
+ (__v4si)(__m128i)(mask), (s)); })
#define _mm256_mask_i32gather_epi32(a, m, i, mask, s) __extension__ ({ \
- __m256i __a = (a); \
- int const *__m = (m); \
- __m256i __i = (i); \
- __m256i __mask = (mask); \
- (__m256i)__builtin_ia32_gatherd_d256((__v8si)__a, (const __v8si *)__m, \
- (__v8si)__i, (__v8si)__mask, (s)); })
+ (__m256i)__builtin_ia32_gatherd_d256((__v8si)(__m256i)(a), \
+ (int const *)(m), \
+ (__v8si)(__m256i)(i), \
+ (__v8si)(__m256i)(mask), (s)); })
#define _mm_mask_i64gather_epi32(a, m, i, mask, s) __extension__ ({ \
- __m128i __a = (a); \
- int const *__m = (m); \
- __m128i __i = (i); \
- __m128i __mask = (mask); \
- (__m128i)__builtin_ia32_gatherq_d((__v4si)__a, (const __v4si *)__m, \
- (__v2di)__i, (__v4si)__mask, (s)); })
+ (__m128i)__builtin_ia32_gatherq_d((__v4si)(__m128i)(a), \
+ (int const *)(m), \
+ (__v2di)(__m128i)(i), \
+ (__v4si)(__m128i)(mask), (s)); })
#define _mm256_mask_i64gather_epi32(a, m, i, mask, s) __extension__ ({ \
- __m128i __a = (a); \
- int const *__m = (m); \
- __m256i __i = (i); \
- __m128i __mask = (mask); \
- (__m128i)__builtin_ia32_gatherq_d256((__v4si)__a, (const __v4si *)__m, \
- (__v4di)__i, (__v4si)__mask, (s)); })
+ (__m128i)__builtin_ia32_gatherq_d256((__v4si)(__m128i)(a), \
+ (int const *)(m), \
+ (__v4di)(__m256i)(i), \
+ (__v4si)(__m128i)(mask), (s)); })
#define _mm_mask_i32gather_epi64(a, m, i, mask, s) __extension__ ({ \
- __m128i __a = (a); \
- long long const *__m = (m); \
- __m128i __i = (i); \
- __m128i __mask = (mask); \
- (__m128i)__builtin_ia32_gatherd_q((__v2di)__a, (const __v2di *)__m, \
- (__v4si)__i, (__v2di)__mask, (s)); })
+ (__m128i)__builtin_ia32_gatherd_q((__v2di)(__m128i)(a), \
+ (long long const *)(m), \
+ (__v4si)(__m128i)(i), \
+ (__v2di)(__m128i)(mask), (s)); })
#define _mm256_mask_i32gather_epi64(a, m, i, mask, s) __extension__ ({ \
- __m256i __a = (a); \
- long long const *__m = (m); \
- __m128i __i = (i); \
- __m256i __mask = (mask); \
- (__m256i)__builtin_ia32_gatherd_q256((__v4di)__a, (const __v4di *)__m, \
- (__v4si)__i, (__v4di)__mask, (s)); })
+ (__m256i)__builtin_ia32_gatherd_q256((__v4di)(__m256i)(a), \
+ (long long const *)(m), \
+ (__v4si)(__m128i)(i), \
+ (__v4di)(__m256i)(mask), (s)); })
#define _mm_mask_i64gather_epi64(a, m, i, mask, s) __extension__ ({ \
- __m128i __a = (a); \
- long long const *__m = (m); \
- __m128i __i = (i); \
- __m128i __mask = (mask); \
- (__m128i)__builtin_ia32_gatherq_q((__v2di)__a, (const __v2di *)__m, \
- (__v2di)__i, (__v2di)__mask, (s)); })
+ (__m128i)__builtin_ia32_gatherq_q((__v2di)(__m128i)(a), \
+ (long long const *)(m), \
+ (__v2di)(__m128i)(i), \
+ (__v2di)(__m128i)(mask), (s)); })
#define _mm256_mask_i64gather_epi64(a, m, i, mask, s) __extension__ ({ \
- __m256i __a = (a); \
- long long const *__m = (m); \
- __m256i __i = (i); \
- __m256i __mask = (mask); \
- (__m256i)__builtin_ia32_gatherq_q256((__v4di)__a, (const __v4di *)__m, \
- (__v4di)__i, (__v4di)__mask, (s)); })
+ (__m256i)__builtin_ia32_gatherq_q256((__v4di)(__m256i)(a), \
+ (long long const *)(m), \
+ (__v4di)(__m256i)(i), \
+ (__v4di)(__m256i)(mask), (s)); })
#define _mm_i32gather_pd(m, i, s) __extension__ ({ \
- double const *__m = (m); \
- __m128i __i = (i); \
- (__m128d)__builtin_ia32_gatherd_pd((__v2df)_mm_setzero_pd(), \
- (const __v2df *)__m, (__v4si)__i, \
- (__v2df)_mm_set1_pd((double)(long long int)-1), (s)); })
+ (__m128d)__builtin_ia32_gatherd_pd((__v2df)_mm_undefined_pd(), \
+ (double const *)(m), \
+ (__v4si)(__m128i)(i), \
+ (__v2df)_mm_cmpeq_pd(_mm_setzero_pd(), \
+ _mm_setzero_pd()), \
+ (s)); })
#define _mm256_i32gather_pd(m, i, s) __extension__ ({ \
- double const *__m = (m); \
- __m128i __i = (i); \
- (__m256d)__builtin_ia32_gatherd_pd256((__v4df)_mm256_setzero_pd(), \
- (const __v4df *)__m, (__v4si)__i, \
- (__v4df)_mm256_set1_pd((double)(long long int)-1), (s)); })
+ (__m256d)__builtin_ia32_gatherd_pd256((__v4df)_mm256_undefined_pd(), \
+ (double const *)(m), \
+ (__v4si)(__m128i)(i), \
+ (__v4df)_mm256_cmp_pd(_mm256_setzero_pd(), \
+ _mm256_setzero_pd(), \
+ _CMP_EQ_OQ), \
+ (s)); })
#define _mm_i64gather_pd(m, i, s) __extension__ ({ \
- double const *__m = (m); \
- __m128i __i = (i); \
- (__m128d)__builtin_ia32_gatherq_pd((__v2df)_mm_setzero_pd(), \
- (const __v2df *)__m, (__v2di)__i, \
- (__v2df)_mm_set1_pd((double)(long long int)-1), (s)); })
+ (__m128d)__builtin_ia32_gatherq_pd((__v2df)_mm_undefined_pd(), \
+ (double const *)(m), \
+ (__v2di)(__m128i)(i), \
+ (__v2df)_mm_cmpeq_pd(_mm_setzero_pd(), \
+ _mm_setzero_pd()), \
+ (s)); })
#define _mm256_i64gather_pd(m, i, s) __extension__ ({ \
- double const *__m = (m); \
- __m256i __i = (i); \
- (__m256d)__builtin_ia32_gatherq_pd256((__v4df)_mm256_setzero_pd(), \
- (const __v4df *)__m, (__v4di)__i, \
- (__v4df)_mm256_set1_pd((double)(long long int)-1), (s)); })
+ (__m256d)__builtin_ia32_gatherq_pd256((__v4df)_mm256_undefined_pd(), \
+ (double const *)(m), \
+ (__v4di)(__m256i)(i), \
+ (__v4df)_mm256_cmp_pd(_mm256_setzero_pd(), \
+ _mm256_setzero_pd(), \
+ _CMP_EQ_OQ), \
+ (s)); })
#define _mm_i32gather_ps(m, i, s) __extension__ ({ \
- float const *__m = (m); \
- __m128i __i = (i); \
- (__m128)__builtin_ia32_gatherd_ps((__v4sf)_mm_setzero_ps(), \
- (const __v4sf *)__m, (__v4si)__i, \
- (__v4sf)_mm_set1_ps((float)(int)-1), (s)); })
+ (__m128)__builtin_ia32_gatherd_ps((__v4sf)_mm_undefined_ps(), \
+ (float const *)(m), \
+ (__v4si)(__m128i)(i), \
+ (__v4sf)_mm_cmpeq_ps(_mm_setzero_ps(), \
+ _mm_setzero_ps()), \
+ (s)); })
#define _mm256_i32gather_ps(m, i, s) __extension__ ({ \
- float const *__m = (m); \
- __m256i __i = (i); \
- (__m256)__builtin_ia32_gatherd_ps256((__v8sf)_mm256_setzero_ps(), \
- (const __v8sf *)__m, (__v8si)__i, \
- (__v8sf)_mm256_set1_ps((float)(int)-1), (s)); })
+ (__m256)__builtin_ia32_gatherd_ps256((__v8sf)_mm256_undefined_ps(), \
+ (float const *)(m), \
+ (__v8si)(__m256i)(i), \
+ (__v8sf)_mm256_cmp_ps(_mm256_setzero_ps(), \
+ _mm256_setzero_ps(), \
+ _CMP_EQ_OQ), \
+ (s)); })
#define _mm_i64gather_ps(m, i, s) __extension__ ({ \
- float const *__m = (m); \
- __m128i __i = (i); \
- (__m128)__builtin_ia32_gatherq_ps((__v4sf)_mm_setzero_ps(), \
- (const __v4sf *)__m, (__v2di)__i, \
- (__v4sf)_mm_set1_ps((float)(int)-1), (s)); })
+ (__m128)__builtin_ia32_gatherq_ps((__v4sf)_mm_undefined_ps(), \
+ (float const *)(m), \
+ (__v2di)(__m128i)(i), \
+ (__v4sf)_mm_cmpeq_ps(_mm_setzero_ps(), \
+ _mm_setzero_ps()), \
+ (s)); })
#define _mm256_i64gather_ps(m, i, s) __extension__ ({ \
- float const *__m = (m); \
- __m256i __i = (i); \
- (__m128)__builtin_ia32_gatherq_ps256((__v4sf)_mm_setzero_ps(), \
- (const __v4sf *)__m, (__v4di)__i, \
- (__v4sf)_mm_set1_ps((float)(int)-1), (s)); })
+ (__m128)__builtin_ia32_gatherq_ps256((__v4sf)_mm_undefined_ps(), \
+ (float const *)(m), \
+ (__v4di)(__m256i)(i), \
+ (__v4sf)_mm_cmpeq_ps(_mm_setzero_ps(), \
+ _mm_setzero_ps()), \
+ (s)); })
#define _mm_i32gather_epi32(m, i, s) __extension__ ({ \
- int const *__m = (m); \
- __m128i __i = (i); \
- (__m128i)__builtin_ia32_gatherd_d((__v4si)_mm_setzero_si128(), \
- (const __v4si *)__m, (__v4si)__i, \
- (__v4si)_mm_set1_epi32(-1), (s)); })
+ (__m128i)__builtin_ia32_gatherd_d((__v4si)_mm_undefined_si128(), \
+ (int const *)(m), (__v4si)(__m128i)(i), \
+ (__v4si)_mm_set1_epi32(-1), (s)); })
#define _mm256_i32gather_epi32(m, i, s) __extension__ ({ \
- int const *__m = (m); \
- __m256i __i = (i); \
- (__m256i)__builtin_ia32_gatherd_d256((__v8si)_mm256_setzero_si256(), \
- (const __v8si *)__m, (__v8si)__i, \
- (__v8si)_mm256_set1_epi32(-1), (s)); })
+ (__m256i)__builtin_ia32_gatherd_d256((__v8si)_mm256_undefined_si256(), \
+ (int const *)(m), (__v8si)(__m256i)(i), \
+ (__v8si)_mm256_set1_epi32(-1), (s)); })
#define _mm_i64gather_epi32(m, i, s) __extension__ ({ \
- int const *__m = (m); \
- __m128i __i = (i); \
- (__m128i)__builtin_ia32_gatherq_d((__v4si)_mm_setzero_si128(), \
- (const __v4si *)__m, (__v2di)__i, \
- (__v4si)_mm_set1_epi32(-1), (s)); })
+ (__m128i)__builtin_ia32_gatherq_d((__v4si)_mm_undefined_si128(), \
+ (int const *)(m), (__v2di)(__m128i)(i), \
+ (__v4si)_mm_set1_epi32(-1), (s)); })
#define _mm256_i64gather_epi32(m, i, s) __extension__ ({ \
- int const *__m = (m); \
- __m256i __i = (i); \
- (__m128i)__builtin_ia32_gatherq_d256((__v4si)_mm_setzero_si128(), \
- (const __v4si *)__m, (__v4di)__i, \
- (__v4si)_mm_set1_epi32(-1), (s)); })
+ (__m128i)__builtin_ia32_gatherq_d256((__v4si)_mm_undefined_si128(), \
+ (int const *)(m), (__v4di)(__m256i)(i), \
+ (__v4si)_mm_set1_epi32(-1), (s)); })
#define _mm_i32gather_epi64(m, i, s) __extension__ ({ \
- long long const *__m = (m); \
- __m128i __i = (i); \
- (__m128i)__builtin_ia32_gatherd_q((__v2di)_mm_setzero_si128(), \
- (const __v2di *)__m, (__v4si)__i, \
- (__v2di)_mm_set1_epi64x(-1), (s)); })
+ (__m128i)__builtin_ia32_gatherd_q((__v2di)_mm_undefined_si128(), \
+ (long long const *)(m), \
+ (__v4si)(__m128i)(i), \
+ (__v2di)_mm_set1_epi64x(-1), (s)); })
#define _mm256_i32gather_epi64(m, i, s) __extension__ ({ \
- long long const *__m = (m); \
- __m128i __i = (i); \
- (__m256i)__builtin_ia32_gatherd_q256((__v4di)_mm256_setzero_si256(), \
- (const __v4di *)__m, (__v4si)__i, \
- (__v4di)_mm256_set1_epi64x(-1), (s)); })
+ (__m256i)__builtin_ia32_gatherd_q256((__v4di)_mm256_undefined_si256(), \
+ (long long const *)(m), \
+ (__v4si)(__m128i)(i), \
+ (__v4di)_mm256_set1_epi64x(-1), (s)); })
#define _mm_i64gather_epi64(m, i, s) __extension__ ({ \
- long long const *__m = (m); \
- __m128i __i = (i); \
- (__m128i)__builtin_ia32_gatherq_q((__v2di)_mm_setzero_si128(), \
- (const __v2di *)__m, (__v2di)__i, \
- (__v2di)_mm_set1_epi64x(-1), (s)); })
+ (__m128i)__builtin_ia32_gatherq_q((__v2di)_mm_undefined_si128(), \
+ (long long const *)(m), \
+ (__v2di)(__m128i)(i), \
+ (__v2di)_mm_set1_epi64x(-1), (s)); })
#define _mm256_i64gather_epi64(m, i, s) __extension__ ({ \
- long long const *__m = (m); \
- __m256i __i = (i); \
- (__m256i)__builtin_ia32_gatherq_q256((__v4di)_mm256_setzero_si256(), \
- (const __v4di *)__m, (__v4di)__i, \
- (__v4di)_mm256_set1_epi64x(-1), (s)); })
+ (__m256i)__builtin_ia32_gatherq_q256((__v4di)_mm256_undefined_si256(), \
+ (long long const *)(m), \
+ (__v4di)(__m256i)(i), \
+ (__v4di)_mm256_set1_epi64x(-1), (s)); })
#undef __DEFAULT_FN_ATTRS
diff --git a/contrib/llvm/tools/clang/lib/Headers/avx512bwintrin.h b/contrib/llvm/tools/clang/lib/Headers/avx512bwintrin.h
index 9e8297a9c9a5..f289ed71a332 100644
--- a/contrib/llvm/tools/clang/lib/Headers/avx512bwintrin.h
+++ b/contrib/llvm/tools/clang/lib/Headers/avx512bwintrin.h
@@ -34,10 +34,10 @@ typedef char __v64qi __attribute__ ((__vector_size__ (64)));
typedef short __v32hi __attribute__ ((__vector_size__ (64)));
/* Define the default attributes for the functions in this file. */
-#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__))
+#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("avx512bw")))
static __inline __v64qi __DEFAULT_FN_ATTRS
-_mm512_setzero_qi (void) {
+_mm512_setzero_qi(void) {
return (__v64qi){ 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0,
@@ -49,7 +49,7 @@ _mm512_setzero_qi (void) {
}
static __inline __v32hi __DEFAULT_FN_ATTRS
-_mm512_setzero_hi (void) {
+_mm512_setzero_hi(void) {
return (__v32hi){ 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0,
@@ -363,8 +363,7 @@ static __inline__ __m512i __DEFAULT_FN_ATTRS
_mm512_maskz_add_epi8 (__mmask64 __U, __m512i __A, __m512i __B) {
return (__m512i) __builtin_ia32_paddb512_mask ((__v64qi) __A,
(__v64qi) __B,
- (__v64qi)
- _mm512_setzero_qi (),
+ (__v64qi) _mm512_setzero_qi(),
(__mmask64) __U);
}
@@ -385,8 +384,7 @@ static __inline__ __m512i __DEFAULT_FN_ATTRS
_mm512_maskz_sub_epi8 (__mmask64 __U, __m512i __A, __m512i __B) {
return (__m512i) __builtin_ia32_psubb512_mask ((__v64qi) __A,
(__v64qi) __B,
- (__v64qi)
- _mm512_setzero_qi (),
+ (__v64qi) _mm512_setzero_qi(),
(__mmask64) __U);
}
@@ -407,8 +405,7 @@ static __inline__ __m512i __DEFAULT_FN_ATTRS
_mm512_maskz_add_epi16 (__mmask32 __U, __m512i __A, __m512i __B) {
return (__m512i) __builtin_ia32_paddw512_mask ((__v32hi) __A,
(__v32hi) __B,
- (__v32hi)
- _mm512_setzero_hi (),
+ (__v32hi) _mm512_setzero_hi(),
(__mmask32) __U);
}
@@ -429,8 +426,7 @@ static __inline__ __m512i __DEFAULT_FN_ATTRS
_mm512_maskz_sub_epi16 (__mmask32 __U, __m512i __A, __m512i __B) {
return (__m512i) __builtin_ia32_psubw512_mask ((__v32hi) __A,
(__v32hi) __B,
- (__v32hi)
- _mm512_setzero_hi (),
+ (__v32hi) _mm512_setzero_hi(),
(__mmask32) __U);
}
@@ -451,8 +447,7 @@ static __inline__ __m512i __DEFAULT_FN_ATTRS
_mm512_maskz_mullo_epi16 (__mmask32 __U, __m512i __A, __m512i __B) {
return (__m512i) __builtin_ia32_pmullw512_mask ((__v32hi) __A,
(__v32hi) __B,
- (__v32hi)
- _mm512_setzero_hi (),
+ (__v32hi) _mm512_setzero_hi(),
(__mmask32) __U);
}
@@ -476,7 +471,7 @@ static __inline__ __m512i __DEFAULT_FN_ATTRS
_mm512_abs_epi8 (__m512i __A)
{
return (__m512i) __builtin_ia32_pabsb512_mask ((__v64qi) __A,
- (__v64qi) _mm512_setzero_qi (),
+ (__v64qi) _mm512_setzero_qi(),
(__mmask64) -1);
}
@@ -492,7 +487,7 @@ static __inline__ __m512i __DEFAULT_FN_ATTRS
_mm512_maskz_abs_epi8 (__mmask64 __U, __m512i __A)
{
return (__m512i) __builtin_ia32_pabsb512_mask ((__v64qi) __A,
- (__v64qi) _mm512_setzero_qi (),
+ (__v64qi) _mm512_setzero_qi(),
(__mmask64) __U);
}
@@ -500,7 +495,7 @@ static __inline__ __m512i __DEFAULT_FN_ATTRS
_mm512_abs_epi16 (__m512i __A)
{
return (__m512i) __builtin_ia32_pabsw512_mask ((__v32hi) __A,
- (__v32hi) _mm512_setzero_hi (),
+ (__v32hi) _mm512_setzero_hi(),
(__mmask32) -1);
}
@@ -516,7 +511,7 @@ static __inline__ __m512i __DEFAULT_FN_ATTRS
_mm512_maskz_abs_epi16 (__mmask32 __U, __m512i __A)
{
return (__m512i) __builtin_ia32_pabsw512_mask ((__v32hi) __A,
- (__v32hi) _mm512_setzero_hi (),
+ (__v32hi) _mm512_setzero_hi(),
(__mmask32) __U);
}
@@ -525,7 +520,7 @@ _mm512_packs_epi32 (__m512i __A, __m512i __B)
{
return (__m512i) __builtin_ia32_packssdw512_mask ((__v16si) __A,
(__v16si) __B,
- (__v32hi) _mm512_setzero_hi (),
+ (__v32hi) _mm512_setzero_hi(),
(__mmask32) -1);
}
@@ -553,7 +548,7 @@ _mm512_packs_epi16 (__m512i __A, __m512i __B)
{
return (__m512i) __builtin_ia32_packsswb512_mask ((__v32hi) __A,
(__v32hi) __B,
- (__v64qi) _mm512_setzero_qi (),
+ (__v64qi) _mm512_setzero_qi(),
(__mmask64) -1);
}
@@ -581,7 +576,7 @@ _mm512_packus_epi32 (__m512i __A, __m512i __B)
{
return (__m512i) __builtin_ia32_packusdw512_mask ((__v16si) __A,
(__v16si) __B,
- (__v32hi) _mm512_setzero_hi (),
+ (__v32hi) _mm512_setzero_hi(),
(__mmask32) -1);
}
@@ -609,7 +604,7 @@ _mm512_packus_epi16 (__m512i __A, __m512i __B)
{
return (__m512i) __builtin_ia32_packuswb512_mask ((__v32hi) __A,
(__v32hi) __B,
- (__v64qi) _mm512_setzero_qi (),
+ (__v64qi) _mm512_setzero_qi(),
(__mmask64) -1);
}
@@ -637,7 +632,7 @@ _mm512_adds_epi8 (__m512i __A, __m512i __B)
{
return (__m512i) __builtin_ia32_paddsb512_mask ((__v64qi) __A,
(__v64qi) __B,
- (__v64qi) _mm512_setzero_qi (),
+ (__v64qi) _mm512_setzero_qi(),
(__mmask64) -1);
}
@@ -656,7 +651,7 @@ _mm512_maskz_adds_epi8 (__mmask64 __U, __m512i __A, __m512i __B)
{
return (__m512i) __builtin_ia32_paddsb512_mask ((__v64qi) __A,
(__v64qi) __B,
- (__v64qi) _mm512_setzero_qi (),
+ (__v64qi) _mm512_setzero_qi(),
(__mmask64) __U);
}
@@ -665,7 +660,7 @@ _mm512_adds_epi16 (__m512i __A, __m512i __B)
{
return (__m512i) __builtin_ia32_paddsw512_mask ((__v32hi) __A,
(__v32hi) __B,
- (__v32hi) _mm512_setzero_hi (),
+ (__v32hi) _mm512_setzero_hi(),
(__mmask32) -1);
}
@@ -684,7 +679,7 @@ _mm512_maskz_adds_epi16 (__mmask32 __U, __m512i __A, __m512i __B)
{
return (__m512i) __builtin_ia32_paddsw512_mask ((__v32hi) __A,
(__v32hi) __B,
- (__v32hi) _mm512_setzero_hi (),
+ (__v32hi) _mm512_setzero_hi(),
(__mmask32) __U);
}
@@ -693,7 +688,7 @@ _mm512_adds_epu8 (__m512i __A, __m512i __B)
{
return (__m512i) __builtin_ia32_paddusb512_mask ((__v64qi) __A,
(__v64qi) __B,
- (__v64qi) _mm512_setzero_qi (),
+ (__v64qi) _mm512_setzero_qi(),
(__mmask64) -1);
}
@@ -712,7 +707,7 @@ _mm512_maskz_adds_epu8 (__mmask64 __U, __m512i __A, __m512i __B)
{
return (__m512i) __builtin_ia32_paddusb512_mask ((__v64qi) __A,
(__v64qi) __B,
- (__v64qi) _mm512_setzero_qi (),
+ (__v64qi) _mm512_setzero_qi(),
(__mmask64) __U);
}
@@ -721,7 +716,7 @@ _mm512_adds_epu16 (__m512i __A, __m512i __B)
{
return (__m512i) __builtin_ia32_paddusw512_mask ((__v32hi) __A,
(__v32hi) __B,
- (__v32hi) _mm512_setzero_hi (),
+ (__v32hi) _mm512_setzero_hi(),
(__mmask32) -1);
}
@@ -740,7 +735,7 @@ _mm512_maskz_adds_epu16 (__mmask32 __U, __m512i __A, __m512i __B)
{
return (__m512i) __builtin_ia32_paddusw512_mask ((__v32hi) __A,
(__v32hi) __B,
- (__v32hi) _mm512_setzero_hi (),
+ (__v32hi) _mm512_setzero_hi(),
(__mmask32) __U);
}
@@ -749,7 +744,7 @@ _mm512_avg_epu8 (__m512i __A, __m512i __B)
{
return (__m512i) __builtin_ia32_pavgb512_mask ((__v64qi) __A,
(__v64qi) __B,
- (__v64qi) _mm512_setzero_qi (),
+ (__v64qi) _mm512_setzero_qi(),
(__mmask64) -1);
}
@@ -777,7 +772,7 @@ _mm512_avg_epu16 (__m512i __A, __m512i __B)
{
return (__m512i) __builtin_ia32_pavgw512_mask ((__v32hi) __A,
(__v32hi) __B,
- (__v32hi) _mm512_setzero_hi (),
+ (__v32hi) _mm512_setzero_hi(),
(__mmask32) -1);
}
@@ -805,7 +800,7 @@ _mm512_max_epi8 (__m512i __A, __m512i __B)
{
return (__m512i) __builtin_ia32_pmaxsb512_mask ((__v64qi) __A,
(__v64qi) __B,
- (__v64qi) _mm512_setzero_qi (),
+ (__v64qi) _mm512_setzero_qi(),
(__mmask64) -1);
}
@@ -833,7 +828,7 @@ _mm512_max_epi16 (__m512i __A, __m512i __B)
{
return (__m512i) __builtin_ia32_pmaxsw512_mask ((__v32hi) __A,
(__v32hi) __B,
- (__v32hi) _mm512_setzero_hi (),
+ (__v32hi) _mm512_setzero_hi(),
(__mmask32) -1);
}
@@ -861,7 +856,7 @@ _mm512_max_epu8 (__m512i __A, __m512i __B)
{
return (__m512i) __builtin_ia32_pmaxub512_mask ((__v64qi) __A,
(__v64qi) __B,
- (__v64qi) _mm512_setzero_qi (),
+ (__v64qi) _mm512_setzero_qi(),
(__mmask64) -1);
}
@@ -889,7 +884,7 @@ _mm512_max_epu16 (__m512i __A, __m512i __B)
{
return (__m512i) __builtin_ia32_pmaxuw512_mask ((__v32hi) __A,
(__v32hi) __B,
- (__v32hi) _mm512_setzero_hi (),
+ (__v32hi) _mm512_setzero_hi(),
(__mmask32) -1);
}
@@ -917,7 +912,7 @@ _mm512_min_epi8 (__m512i __A, __m512i __B)
{
return (__m512i) __builtin_ia32_pminsb512_mask ((__v64qi) __A,
(__v64qi) __B,
- (__v64qi) _mm512_setzero_qi (),
+ (__v64qi) _mm512_setzero_qi(),
(__mmask64) -1);
}
@@ -945,7 +940,7 @@ _mm512_min_epi16 (__m512i __A, __m512i __B)
{
return (__m512i) __builtin_ia32_pminsw512_mask ((__v32hi) __A,
(__v32hi) __B,
- (__v32hi) _mm512_setzero_hi (),
+ (__v32hi) _mm512_setzero_hi(),
(__mmask32) -1);
}
@@ -973,7 +968,7 @@ _mm512_min_epu8 (__m512i __A, __m512i __B)
{
return (__m512i) __builtin_ia32_pminub512_mask ((__v64qi) __A,
(__v64qi) __B,
- (__v64qi) _mm512_setzero_qi (),
+ (__v64qi) _mm512_setzero_qi(),
(__mmask64) -1);
}
@@ -1001,7 +996,7 @@ _mm512_min_epu16 (__m512i __A, __m512i __B)
{
return (__m512i) __builtin_ia32_pminuw512_mask ((__v32hi) __A,
(__v32hi) __B,
- (__v32hi) _mm512_setzero_hi (),
+ (__v32hi) _mm512_setzero_hi(),
(__mmask32) -1);
}
@@ -1029,7 +1024,7 @@ _mm512_shuffle_epi8 (__m512i __A, __m512i __B)
{
return (__m512i) __builtin_ia32_pshufb512_mask ((__v64qi) __A,
(__v64qi) __B,
- (__v64qi) _mm512_setzero_qi (),
+ (__v64qi) _mm512_setzero_qi(),
(__mmask64) -1);
}
@@ -1048,7 +1043,7 @@ _mm512_maskz_shuffle_epi8 (__mmask64 __U, __m512i __A, __m512i __B)
{
return (__m512i) __builtin_ia32_pshufb512_mask ((__v64qi) __A,
(__v64qi) __B,
- (__v64qi) _mm512_setzero_qi (),
+ (__v64qi) _mm512_setzero_qi(),
(__mmask64) __U);
}
@@ -1057,7 +1052,7 @@ _mm512_subs_epi8 (__m512i __A, __m512i __B)
{
return (__m512i) __builtin_ia32_psubsb512_mask ((__v64qi) __A,
(__v64qi) __B,
- (__v64qi) _mm512_setzero_qi (),
+ (__v64qi) _mm512_setzero_qi(),
(__mmask64) -1);
}
@@ -1076,7 +1071,7 @@ _mm512_maskz_subs_epi8 (__mmask64 __U, __m512i __A, __m512i __B)
{
return (__m512i) __builtin_ia32_psubsb512_mask ((__v64qi) __A,
(__v64qi) __B,
- (__v64qi) _mm512_setzero_qi (),
+ (__v64qi) _mm512_setzero_qi(),
(__mmask64) __U);
}
@@ -1085,7 +1080,7 @@ _mm512_subs_epi16 (__m512i __A, __m512i __B)
{
return (__m512i) __builtin_ia32_psubsw512_mask ((__v32hi) __A,
(__v32hi) __B,
- (__v32hi) _mm512_setzero_hi (),
+ (__v32hi) _mm512_setzero_hi(),
(__mmask32) -1);
}
@@ -1104,7 +1099,7 @@ _mm512_maskz_subs_epi16 (__mmask32 __U, __m512i __A, __m512i __B)
{
return (__m512i) __builtin_ia32_psubsw512_mask ((__v32hi) __A,
(__v32hi) __B,
- (__v32hi) _mm512_setzero_hi (),
+ (__v32hi) _mm512_setzero_hi(),
(__mmask32) __U);
}
@@ -1113,7 +1108,7 @@ _mm512_subs_epu8 (__m512i __A, __m512i __B)
{
return (__m512i) __builtin_ia32_psubusb512_mask ((__v64qi) __A,
(__v64qi) __B,
- (__v64qi) _mm512_setzero_qi (),
+ (__v64qi) _mm512_setzero_qi(),
(__mmask64) -1);
}
@@ -1132,7 +1127,7 @@ _mm512_maskz_subs_epu8 (__mmask64 __U, __m512i __A, __m512i __B)
{
return (__m512i) __builtin_ia32_psubusb512_mask ((__v64qi) __A,
(__v64qi) __B,
- (__v64qi) _mm512_setzero_qi (),
+ (__v64qi) _mm512_setzero_qi(),
(__mmask64) __U);
}
@@ -1141,7 +1136,7 @@ _mm512_subs_epu16 (__m512i __A, __m512i __B)
{
return (__m512i) __builtin_ia32_psubusw512_mask ((__v32hi) __A,
(__v32hi) __B,
- (__v32hi) _mm512_setzero_hi (),
+ (__v32hi) _mm512_setzero_hi(),
(__mmask32) -1);
}
@@ -1160,7 +1155,7 @@ _mm512_maskz_subs_epu16 (__mmask32 __U, __m512i __A, __m512i __B)
{
return (__m512i) __builtin_ia32_psubusw512_mask ((__v32hi) __A,
(__v32hi) __B,
- (__v32hi) _mm512_setzero_hi (),
+ (__v32hi) _mm512_setzero_hi(),
(__mmask32) __U);
}
@@ -1204,6 +1199,303 @@ _mm512_maskz_permutex2var_epi16 (__mmask32 __U, __m512i __A,
(__mmask32) __U);
}
+static __inline__ __m512i __DEFAULT_FN_ATTRS
+_mm512_mulhrs_epi16 (__m512i __A, __m512i __B)
+{
+ return (__m512i) __builtin_ia32_pmulhrsw512_mask ((__v32hi) __A,
+ (__v32hi) __B,
+ (__v32hi) _mm512_setzero_hi(),
+ (__mmask32) -1);
+}
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS
+_mm512_mask_mulhrs_epi16 (__m512i __W, __mmask32 __U, __m512i __A,
+ __m512i __B)
+{
+ return (__m512i) __builtin_ia32_pmulhrsw512_mask ((__v32hi) __A,
+ (__v32hi) __B,
+ (__v32hi) __W,
+ (__mmask32) __U);
+}
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS
+_mm512_maskz_mulhrs_epi16 (__mmask32 __U, __m512i __A, __m512i __B)
+{
+ return (__m512i) __builtin_ia32_pmulhrsw512_mask ((__v32hi) __A,
+ (__v32hi) __B,
+ (__v32hi) _mm512_setzero_hi(),
+ (__mmask32) __U);
+}
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS
+_mm512_mulhi_epi16 (__m512i __A, __m512i __B)
+{
+ return (__m512i) __builtin_ia32_pmulhw512_mask ((__v32hi) __A,
+ (__v32hi) __B,
+ (__v32hi) _mm512_setzero_hi(),
+ (__mmask32) -1);
+}
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS
+_mm512_mask_mulhi_epi16 (__m512i __W, __mmask32 __U, __m512i __A,
+ __m512i __B)
+{
+ return (__m512i) __builtin_ia32_pmulhw512_mask ((__v32hi) __A,
+ (__v32hi) __B,
+ (__v32hi) __W,
+ (__mmask32) __U);
+}
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS
+_mm512_maskz_mulhi_epi16 (__mmask32 __U, __m512i __A, __m512i __B)
+{
+ return (__m512i) __builtin_ia32_pmulhw512_mask ((__v32hi) __A,
+ (__v32hi) __B,
+ (__v32hi) _mm512_setzero_hi(),
+ (__mmask32) __U);
+}
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS
+_mm512_mulhi_epu16 (__m512i __A, __m512i __B)
+{
+ return (__m512i) __builtin_ia32_pmulhuw512_mask ((__v32hi) __A,
+ (__v32hi) __B,
+ (__v32hi) _mm512_setzero_hi(),
+ (__mmask32) -1);
+}
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS
+_mm512_mask_mulhi_epu16 (__m512i __W, __mmask32 __U, __m512i __A,
+ __m512i __B)
+{
+ return (__m512i) __builtin_ia32_pmulhuw512_mask ((__v32hi) __A,
+ (__v32hi) __B,
+ (__v32hi) __W,
+ (__mmask32) __U);
+}
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS
+_mm512_maskz_mulhi_epu16 (__mmask32 __U, __m512i __A, __m512i __B)
+{
+ return (__m512i) __builtin_ia32_pmulhuw512_mask ((__v32hi) __A,
+ (__v32hi) __B,
+ (__v32hi) _mm512_setzero_hi(),
+ (__mmask32) __U);
+}
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS
+_mm512_maddubs_epi16 (__m512i __X, __m512i __Y) {
+ return (__m512i) __builtin_ia32_pmaddubsw512_mask ((__v64qi) __X,
+ (__v64qi) __Y,
+ (__v32hi) _mm512_setzero_hi(),
+ (__mmask32) -1);
+}
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS
+_mm512_mask_maddubs_epi16 (__m512i __W, __mmask32 __U, __m512i __X,
+ __m512i __Y) {
+ return (__m512i) __builtin_ia32_pmaddubsw512_mask ((__v64qi) __X,
+ (__v64qi) __Y,
+ (__v32hi) __W,
+ (__mmask32) __U);
+}
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS
+_mm512_maskz_maddubs_epi16 (__mmask32 __U, __m512i __X, __m512i __Y) {
+ return (__m512i) __builtin_ia32_pmaddubsw512_mask ((__v64qi) __X,
+ (__v64qi) __Y,
+ (__v32hi) _mm512_setzero_hi(),
+ (__mmask32) __U);
+}
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS
+_mm512_madd_epi16 (__m512i __A, __m512i __B) {
+ return (__m512i) __builtin_ia32_pmaddwd512_mask ((__v32hi) __A,
+ (__v32hi) __B,
+ (__v16si) _mm512_setzero_si512(),
+ (__mmask16) -1);
+}
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS
+_mm512_mask_madd_epi16 (__m512i __W, __mmask16 __U, __m512i __A,
+ __m512i __B) {
+ return (__m512i) __builtin_ia32_pmaddwd512_mask ((__v32hi) __A,
+ (__v32hi) __B,
+ (__v16si) __W,
+ (__mmask16) __U);
+}
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS
+_mm512_maskz_madd_epi16 (__mmask16 __U, __m512i __A, __m512i __B) {
+ return (__m512i) __builtin_ia32_pmaddwd512_mask ((__v32hi) __A,
+ (__v32hi) __B,
+ (__v16si) _mm512_setzero_si512(),
+ (__mmask16) __U);
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS
+_mm512_cvtsepi16_epi8 (__m512i __A) {
+ return (__m256i) __builtin_ia32_pmovswb512_mask ((__v32hi) __A,
+ (__v32qi)_mm256_setzero_si256(),
+ (__mmask32) -1);
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS
+_mm512_mask_cvtsepi16_epi8 (__m256i __O, __mmask32 __M, __m512i __A) {
+ return (__m256i) __builtin_ia32_pmovswb512_mask ((__v32hi) __A,
+ (__v32qi)__O,
+ __M);
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS
+_mm512_maskz_cvtsepi16_epi8 (__mmask32 __M, __m512i __A) {
+ return (__m256i) __builtin_ia32_pmovswb512_mask ((__v32hi) __A,
+ (__v32qi) _mm256_setzero_si256(),
+ __M);
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS
+_mm512_cvtusepi16_epi8 (__m512i __A) {
+ return (__m256i) __builtin_ia32_pmovuswb512_mask ((__v32hi) __A,
+ (__v32qi) _mm256_setzero_si256(),
+ (__mmask32) -1);
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS
+_mm512_mask_cvtusepi16_epi8 (__m256i __O, __mmask32 __M, __m512i __A) {
+ return (__m256i) __builtin_ia32_pmovuswb512_mask ((__v32hi) __A,
+ (__v32qi) __O,
+ __M);
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS
+_mm512_maskz_cvtusepi16_epi8 (__mmask32 __M, __m512i __A) {
+ return (__m256i) __builtin_ia32_pmovuswb512_mask ((__v32hi) __A,
+ (__v32qi) _mm256_setzero_si256(),
+ __M);
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS
+_mm512_cvtepi16_epi8 (__m512i __A) {
+ return (__m256i) __builtin_ia32_pmovwb512_mask ((__v32hi) __A,
+ (__v32qi) _mm256_setzero_si256(),
+ (__mmask32) -1);
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS
+_mm512_mask_cvtepi16_epi8 (__m256i __O, __mmask32 __M, __m512i __A) {
+ return (__m256i) __builtin_ia32_pmovwb512_mask ((__v32hi) __A,
+ (__v32qi) __O,
+ __M);
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS
+_mm512_maskz_cvtepi16_epi8 (__mmask32 __M, __m512i __A) {
+ return (__m256i) __builtin_ia32_pmovwb512_mask ((__v32hi) __A,
+ (__v32qi) _mm256_setzero_si256(),
+ __M);
+}
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS
+_mm512_unpackhi_epi8 (__m512i __A, __m512i __B) {
+ return (__m512i) __builtin_ia32_punpckhbw512_mask ((__v64qi) __A,
+ (__v64qi) __B,
+ (__v64qi) _mm512_setzero_qi(),
+ (__mmask64) -1);
+}
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS
+_mm512_mask_unpackhi_epi8 (__m512i __W, __mmask64 __U, __m512i __A,
+ __m512i __B) {
+ return (__m512i) __builtin_ia32_punpckhbw512_mask ((__v64qi) __A,
+ (__v64qi) __B,
+ (__v64qi) __W,
+ (__mmask64) __U);
+}
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS
+_mm512_maskz_unpackhi_epi8 (__mmask64 __U, __m512i __A, __m512i __B) {
+ return (__m512i) __builtin_ia32_punpckhbw512_mask ((__v64qi) __A,
+ (__v64qi) __B,
+ (__v64qi) _mm512_setzero_qi(),
+ (__mmask64) __U);
+}
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS
+_mm512_unpackhi_epi16 (__m512i __A, __m512i __B) {
+ return (__m512i) __builtin_ia32_punpckhwd512_mask ((__v32hi) __A,
+ (__v32hi) __B,
+ (__v32hi) _mm512_setzero_hi(),
+ (__mmask32) -1);
+}
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS
+_mm512_mask_unpackhi_epi16 (__m512i __W, __mmask32 __U, __m512i __A,
+ __m512i __B) {
+ return (__m512i) __builtin_ia32_punpckhwd512_mask ((__v32hi) __A,
+ (__v32hi) __B,
+ (__v32hi) __W,
+ (__mmask32) __U);
+}
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS
+_mm512_maskz_unpackhi_epi16 (__mmask32 __U, __m512i __A, __m512i __B) {
+ return (__m512i) __builtin_ia32_punpckhwd512_mask ((__v32hi) __A,
+ (__v32hi) __B,
+ (__v32hi) _mm512_setzero_hi(),
+ (__mmask32) __U);
+}
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS
+_mm512_unpacklo_epi8 (__m512i __A, __m512i __B) {
+ return (__m512i) __builtin_ia32_punpcklbw512_mask ((__v64qi) __A,
+ (__v64qi) __B,
+ (__v64qi) _mm512_setzero_qi(),
+ (__mmask64) -1);
+}
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS
+_mm512_mask_unpacklo_epi8 (__m512i __W, __mmask64 __U, __m512i __A,
+ __m512i __B) {
+ return (__m512i) __builtin_ia32_punpcklbw512_mask ((__v64qi) __A,
+ (__v64qi) __B,
+ (__v64qi) __W,
+ (__mmask64) __U);
+}
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS
+_mm512_maskz_unpacklo_epi8 (__mmask64 __U, __m512i __A, __m512i __B) {
+ return (__m512i) __builtin_ia32_punpcklbw512_mask ((__v64qi) __A,
+ (__v64qi) __B,
+ (__v64qi) _mm512_setzero_qi(),
+ (__mmask64) __U);
+}
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS
+_mm512_unpacklo_epi16 (__m512i __A, __m512i __B) {
+ return (__m512i) __builtin_ia32_punpcklwd512_mask ((__v32hi) __A,
+ (__v32hi) __B,
+ (__v32hi) _mm512_setzero_hi(),
+ (__mmask32) -1);
+}
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS
+_mm512_mask_unpacklo_epi16 (__m512i __W, __mmask32 __U, __m512i __A,
+ __m512i __B) {
+ return (__m512i) __builtin_ia32_punpcklwd512_mask ((__v32hi) __A,
+ (__v32hi) __B,
+ (__v32hi) __W,
+ (__mmask32) __U);
+}
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS
+_mm512_maskz_unpacklo_epi16 (__mmask32 __U, __m512i __A, __m512i __B) {
+ return (__m512i) __builtin_ia32_punpcklwd512_mask ((__v32hi) __A,
+ (__v32hi) __B,
+ (__v32hi) _mm512_setzero_hi(),
+ (__mmask32) __U);
+}
+
#define _mm512_cmp_epi8_mask(a, b, p) __extension__ ({ \
(__mmask16)__builtin_ia32_cmpb512_mask((__v64qi)(__m512i)(a), \
(__v64qi)(__m512i)(b), \
diff --git a/contrib/llvm/tools/clang/lib/Headers/avx512dqintrin.h b/contrib/llvm/tools/clang/lib/Headers/avx512dqintrin.h
index c946de286742..afee4903ba77 100644
--- a/contrib/llvm/tools/clang/lib/Headers/avx512dqintrin.h
+++ b/contrib/llvm/tools/clang/lib/Headers/avx512dqintrin.h
@@ -29,7 +29,7 @@
#define __AVX512DQINTRIN_H
/* Define the default attributes for the functions in this file. */
-#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__))
+#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("avx512dq")))
static __inline__ __m512i __DEFAULT_FN_ATTRS
_mm512_mullo_epi64 (__m512i __A, __m512i __B) {
@@ -237,6 +237,542 @@ _mm512_maskz_andnot_ps (__mmask16 __U, __m512 __A, __m512 __B) {
(__mmask16) __U);
}
+static __inline__ __m512i __DEFAULT_FN_ATTRS
+_mm512_cvtpd_epi64 (__m512d __A) {
+ return (__m512i) __builtin_ia32_cvtpd2qq512_mask ((__v8df) __A,
+ (__v8di) _mm512_setzero_si512(),
+ (__mmask8) -1,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS
+_mm512_mask_cvtpd_epi64 (__m512i __W, __mmask8 __U, __m512d __A) {
+ return (__m512i) __builtin_ia32_cvtpd2qq512_mask ((__v8df) __A,
+ (__v8di) __W,
+ (__mmask8) __U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS
+_mm512_maskz_cvtpd_epi64 (__mmask8 __U, __m512d __A) {
+ return (__m512i) __builtin_ia32_cvtpd2qq512_mask ((__v8df) __A,
+ (__v8di) _mm512_setzero_si512(),
+ (__mmask8) __U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+#define _mm512_cvt_roundpd_epi64(__A, __R) __extension__ ({ \
+ (__m512i) __builtin_ia32_cvtpd2qq512_mask ((__v8df) __A, \
+ (__v8di) _mm512_setzero_si512(), (__mmask8) -1, __R);})
+
+#define _mm512_mask_cvt_roundpd_epi64(__W, __U, __A, __R) __extension__ ({ \
+ (__m512i) __builtin_ia32_cvtpd2qq512_mask ((__v8df) __A, \
+ (__v8di) __W, (__mmask8) __U, __R);})
+
+#define _mm512_maskz_cvt_roundpd_epi64(__U, __A, __R) __extension__ ({ \
+ (__m512i) __builtin_ia32_cvtpd2qq512_mask ((__v8df) __A, \
+ (__v8di) _mm512_setzero_si512(), (__mmask8) __U, __R); })
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS
+_mm512_cvtpd_epu64 (__m512d __A) {
+ return (__m512i) __builtin_ia32_cvtpd2uqq512_mask ((__v8df) __A,
+ (__v8di) _mm512_setzero_si512(),
+ (__mmask8) -1,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS
+_mm512_mask_cvtpd_epu64 (__m512i __W, __mmask8 __U, __m512d __A) {
+ return (__m512i) __builtin_ia32_cvtpd2uqq512_mask ((__v8df) __A,
+ (__v8di) __W,
+ (__mmask8) __U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS
+_mm512_maskz_cvtpd_epu64 (__mmask8 __U, __m512d __A) {
+ return (__m512i) __builtin_ia32_cvtpd2uqq512_mask ((__v8df) __A,
+ (__v8di) _mm512_setzero_si512(),
+ (__mmask8) __U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+#define _mm512_cvt_roundpd_epu64(__A, __R) __extension__ ({ \
+ (__m512i) __builtin_ia32_cvtpd2uqq512_mask ((__v8df) __A, \
+ (__v8di) _mm512_setzero_si512(), (__mmask8) -1, __R);})
+
+#define _mm512_mask_cvt_roundpd_epu64(__W, __U, __A, __R) __extension__ ({ \
+ (__m512i) __builtin_ia32_cvtpd2uqq512_mask ((__v8df) __A, \
+ (__v8di) __W, (__mmask8) __U, __R);})
+
+#define _mm512_maskz_cvt_roundpd_epu64(__U, __A, __R) __extension__ ({ \
+ (__m512i) __builtin_ia32_cvtpd2uqq512_mask ((__v8df) __A, \
+ (__v8di) _mm512_setzero_si512(), (__mmask8) __U, __R);})
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS
+_mm512_cvtps_epi64 (__m256 __A) {
+ return (__m512i) __builtin_ia32_cvtps2qq512_mask ((__v8sf) __A,
+ (__v8di) _mm512_setzero_si512(),
+ (__mmask8) -1,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS
+_mm512_mask_cvtps_epi64 (__m512i __W, __mmask8 __U, __m256 __A) {
+ return (__m512i) __builtin_ia32_cvtps2qq512_mask ((__v8sf) __A,
+ (__v8di) __W,
+ (__mmask8) __U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS
+_mm512_maskz_cvtps_epi64 (__mmask8 __U, __m256 __A) {
+ return (__m512i) __builtin_ia32_cvtps2qq512_mask ((__v8sf) __A,
+ (__v8di) _mm512_setzero_si512(),
+ (__mmask8) __U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+#define _mm512_cvt_roundps_epi64(__A, __R) __extension__ ({ \
+ (__m512i) __builtin_ia32_cvtps2qq512_mask ((__v8sf) __A, \
+ (__v8di) _mm512_setzero_si512(), (__mmask8) -1, __R);})
+
+#define _mm512_mask_cvt_roundps_epi64(__W, __U, __A, __R) __extension__ ({ \
+ (__m512i) __builtin_ia32_cvtps2qq512_mask ((__v8sf) __A, \
+ (__v8di) __W, (__mmask8) __U, __R);})
+
+#define _mm512_maskz_cvt_roundps_epi64(__U, __A, __R) __extension__ ({ \
+ (__m512i) __builtin_ia32_cvtps2qq512_mask ((__v8sf) __A, \
+ (__v8di) _mm512_setzero_si512(), (__mmask8) __U, __R);})
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS
+_mm512_cvtps_epu64 (__m256 __A) {
+ return (__m512i) __builtin_ia32_cvtps2uqq512_mask ((__v8sf) __A,
+ (__v8di) _mm512_setzero_si512(),
+ (__mmask8) -1,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS
+_mm512_mask_cvtps_epu64 (__m512i __W, __mmask8 __U, __m256 __A) {
+ return (__m512i) __builtin_ia32_cvtps2uqq512_mask ((__v8sf) __A,
+ (__v8di) __W,
+ (__mmask8) __U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS
+_mm512_maskz_cvtps_epu64 (__mmask8 __U, __m256 __A) {
+ return (__m512i) __builtin_ia32_cvtps2uqq512_mask ((__v8sf) __A,
+ (__v8di) _mm512_setzero_si512(),
+ (__mmask8) __U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+#define _mm512_cvt_roundps_epu64(__A, __R) __extension__ ({ \
+ (__m512i) __builtin_ia32_cvtps2uqq512_mask ((__v8sf) __A, \
+ (__v8di) _mm512_setzero_si512(), (__mmask8) -1, __R);})
+
+#define _mm512_mask_cvt_roundps_epu64(__W, __U, __A, __R) __extension__ ({ \
+ (__m512i) __builtin_ia32_cvtps2uqq512_mask ((__v8sf) __A, \
+ (__v8di) __W, (__mmask8) __U, __R);})
+
+#define _mm512_maskz_cvt_roundps_epu64(__U, __A, __R) __extension__ ({ \
+ (__m512i) __builtin_ia32_cvtps2uqq512_mask ((__v8sf) __A, \
+ (__v8di) _mm512_setzero_si512(), (__mmask8) __U, __R);})
+
+
+static __inline__ __m512d __DEFAULT_FN_ATTRS
+_mm512_cvtepi64_pd (__m512i __A) {
+ return (__m512d) __builtin_ia32_cvtqq2pd512_mask ((__v8di) __A,
+ (__v8df) _mm512_setzero_pd(),
+ (__mmask8) -1,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+static __inline__ __m512d __DEFAULT_FN_ATTRS
+_mm512_mask_cvtepi64_pd (__m512d __W, __mmask8 __U, __m512i __A) {
+ return (__m512d) __builtin_ia32_cvtqq2pd512_mask ((__v8di) __A,
+ (__v8df) __W,
+ (__mmask8) __U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+static __inline__ __m512d __DEFAULT_FN_ATTRS
+_mm512_maskz_cvtepi64_pd (__mmask8 __U, __m512i __A) {
+ return (__m512d) __builtin_ia32_cvtqq2pd512_mask ((__v8di) __A,
+ (__v8df) _mm512_setzero_pd(),
+ (__mmask8) __U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+#define _mm512_cvt_roundepi64_pd(__A, __R) __extension__ ({ \
+ (__m512d) __builtin_ia32_cvtqq2pd512_mask ((__v8di) __A, \
+ (__v8df) _mm512_setzero_pd(), (__mmask8) -1, __R);})
+
+#define _mm512_mask_cvt_roundepi64_pd(__W, __U, __A, __R) __extension__ ({ \
+ (__m512d) __builtin_ia32_cvtqq2pd512_mask ((__v8di) __A, \
+ (__v8df) __W, (__mmask8) __U, __R);})
+
+#define _mm512_maskz_cvt_roundepi64_pd(__U, __A, __R) __extension__ ({ \
+ (__m512d) __builtin_ia32_cvtqq2pd512_mask ((__v8di) __A, \
+ (__v8df) _mm512_setzero_pd(), (__mmask8) __U, __R);})
+
+static __inline__ __m256 __DEFAULT_FN_ATTRS
+_mm512_cvtepi64_ps (__m512i __A) {
+ return (__m256) __builtin_ia32_cvtqq2ps512_mask ((__v8di) __A,
+ (__v8sf) _mm256_setzero_ps(),
+ (__mmask8) -1,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+static __inline__ __m256 __DEFAULT_FN_ATTRS
+_mm512_mask_cvtepi64_ps (__m256 __W, __mmask8 __U, __m512i __A) {
+ return (__m256) __builtin_ia32_cvtqq2ps512_mask ((__v8di) __A,
+ (__v8sf) __W,
+ (__mmask8) __U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+static __inline__ __m256 __DEFAULT_FN_ATTRS
+_mm512_maskz_cvtepi64_ps (__mmask8 __U, __m512i __A) {
+ return (__m256) __builtin_ia32_cvtqq2ps512_mask ((__v8di) __A,
+ (__v8sf) _mm256_setzero_ps(),
+ (__mmask8) __U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+#define _mm512_cvt_roundepi64_ps(__A, __R) __extension__ ({ \
+ (__m256) __builtin_ia32_cvtqq2ps512_mask ((__v8di) __A, \
+ (__v8sf) _mm256_setzero_ps(), (__mmask8) -1, __R);})
+
+#define _mm512_mask_cvt_roundepi64_ps(__W, __U, __A, __R) __extension__ ({ \
+ (__m256) __builtin_ia32_cvtqq2ps512_mask ((__v8di) __A, \
+ (__v8sf) __W, (__mmask8) __U, __R);})
+
+#define _mm512_maskz_cvt_roundepi64_ps(__U, __A, __R) __extension__ ({ \
+ (__m256) __builtin_ia32_cvtqq2ps512_mask ((__v8di) __A, \
+ (__v8sf) _mm256_setzero_ps(), (__mmask8) __U, __R);})
+
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS
+_mm512_cvttpd_epi64 (__m512d __A) {
+ return (__m512i) __builtin_ia32_cvttpd2qq512_mask ((__v8df) __A,
+ (__v8di) _mm512_setzero_si512(),
+ (__mmask8) -1,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS
+_mm512_mask_cvttpd_epi64 (__m512i __W, __mmask8 __U, __m512d __A) {
+ return (__m512i) __builtin_ia32_cvttpd2qq512_mask ((__v8df) __A,
+ (__v8di) __W,
+ (__mmask8) __U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS
+_mm512_maskz_cvttpd_epi64 (__mmask8 __U, __m512d __A) {
+ return (__m512i) __builtin_ia32_cvttpd2qq512_mask ((__v8df) __A,
+ (__v8di) _mm512_setzero_si512(),
+ (__mmask8) __U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+#define _mm512_cvtt_roundpd_epi64(__A, __R) __extension__ ({ \
+ (__m512i) __builtin_ia32_cvttpd2qq512_mask ((__v8df) __A, \
+ (__v8di) _mm512_setzero_si512(), (__mmask8) -1, __R);})
+
+#define _mm512_mask_cvtt_roundpd_epi64(__W, __U, __A, __R) __extension__ ({ \
+ (__m512i) __builtin_ia32_cvttpd2qq512_mask ((__v8df) __A, \
+ (__v8di) __W, (__mmask8) __U, __R);})
+
+#define _mm512_maskz_cvtt_roundpd_epi64(__U, __A, __R) __extension__ ({ \
+ (__m512i) __builtin_ia32_cvttpd2qq512_mask ((__v8df) __A, \
+ (__v8di) _mm512_setzero_si512(), (__mmask8) __U, __R);})
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS
+_mm512_cvttpd_epu64 (__m512d __A) {
+ return (__m512i) __builtin_ia32_cvttpd2uqq512_mask ((__v8df) __A,
+ (__v8di) _mm512_setzero_si512(),
+ (__mmask8) -1,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS
+_mm512_mask_cvttpd_epu64 (__m512i __W, __mmask8 __U, __m512d __A) {
+ return (__m512i) __builtin_ia32_cvttpd2uqq512_mask ((__v8df) __A,
+ (__v8di) __W,
+ (__mmask8) __U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS
+_mm512_maskz_cvttpd_epu64 (__mmask8 __U, __m512d __A) {
+ return (__m512i) __builtin_ia32_cvttpd2uqq512_mask ((__v8df) __A,
+ (__v8di) _mm512_setzero_si512(),
+ (__mmask8) __U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+#define _mm512_cvtt_roundpd_epu64(__A, __R) __extension__ ({ \
+ (__m512i) __builtin_ia32_cvttpd2uqq512_mask ((__v8df) __A, \
+ (__v8di) _mm512_setzero_si512(), (__mmask8) -1, __R);})
+
+#define _mm512_mask_cvtt_roundpd_epu64(__W, __U, __A, __R) __extension__ ({ \
+ (__m512i) __builtin_ia32_cvttpd2uqq512_mask ((__v8df) __A, \
+ (__v8di) __W, (__mmask8) __U, __R);})
+
+#define _mm512_maskz_cvtt_roundpd_epu64(__U, __A, __R) __extension__ ({ \
+ (__m512i) __builtin_ia32_cvttpd2uqq512_mask ((__v8df) __A, \
+ (__v8di) _mm512_setzero_si512(), (__mmask8) __U, __R);})
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS
+_mm512_cvttps_epi64 (__m256 __A) {
+ return (__m512i) __builtin_ia32_cvttps2qq512_mask ((__v8sf) __A,
+ (__v8di) _mm512_setzero_si512(),
+ (__mmask8) -1,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS
+_mm512_mask_cvttps_epi64 (__m512i __W, __mmask8 __U, __m256 __A) {
+ return (__m512i) __builtin_ia32_cvttps2qq512_mask ((__v8sf) __A,
+ (__v8di) __W,
+ (__mmask8) __U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS
+_mm512_maskz_cvttps_epi64 (__mmask8 __U, __m256 __A) {
+ return (__m512i) __builtin_ia32_cvttps2qq512_mask ((__v8sf) __A,
+ (__v8di) _mm512_setzero_si512(),
+ (__mmask8) __U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+#define _mm512_cvtt_roundps_epi64(__A, __R) __extension__ ({ \
+ (__m512i) __builtin_ia32_cvttps2qq512_mask ((__v8sf) __A, \
+ (__v8di) _mm512_setzero_si512(), (__mmask8) -1, __R);})
+
+#define _mm512_mask_cvtt_roundps_epi64(__W, __U, __A, __R) __extension__ ({ \
+ (__m512i) __builtin_ia32_cvttps2qq512_mask ((__v8sf) __A, \
+ (__v8di) __W, (__mmask8) __U, __R);})
+
+#define _mm512_maskz_cvtt_roundps_epi64(__U, __A, __R) __extension__ ({ \
+ (__m512i) __builtin_ia32_cvttps2qq512_mask ((__v8sf) __A, \
+ (__v8di) _mm512_setzero_si512(), (__mmask8) __U, __R);})
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS
+_mm512_cvttps_epu64 (__m256 __A) {
+ return (__m512i) __builtin_ia32_cvttps2uqq512_mask ((__v8sf) __A,
+ (__v8di) _mm512_setzero_si512(),
+ (__mmask8) -1,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS
+_mm512_mask_cvttps_epu64 (__m512i __W, __mmask8 __U, __m256 __A) {
+ return (__m512i) __builtin_ia32_cvttps2uqq512_mask ((__v8sf) __A,
+ (__v8di) __W,
+ (__mmask8) __U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS
+_mm512_maskz_cvttps_epu64 (__mmask8 __U, __m256 __A) {
+ return (__m512i) __builtin_ia32_cvttps2uqq512_mask ((__v8sf) __A,
+ (__v8di) _mm512_setzero_si512(),
+ (__mmask8) __U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+#define _mm512_cvtt_roundps_epu64(__A, __R) __extension__ ({ \
+ (__m512i) __builtin_ia32_cvttps2uqq512_mask ((__v8sf) __A, \
+ (__v8di) _mm512_setzero_si512(),(__mmask8) -1, __R);})
+
+#define _mm512_mask_cvtt_roundps_epu64(__W, __U, __A, __R) __extension__ ({ \
+ (__m512i) __builtin_ia32_cvttps2uqq512_mask ((__v8sf) __A, \
+ (__v8di) __W, (__mmask8) __U, __R);})
+
+#define _mm512_maskz_cvtt_roundps_epu64(__U, __A, __R) __extension__ ({ \
+ (__m512i) __builtin_ia32_cvttps2uqq512_mask ((__v8sf) __A, \
+ (__v8di) _mm512_setzero_si512(), (__mmask8) __U, __R);})
+
+static __inline__ __m512d __DEFAULT_FN_ATTRS
+_mm512_cvtepu64_pd (__m512i __A) {
+ return (__m512d) __builtin_ia32_cvtuqq2pd512_mask ((__v8di) __A,
+ (__v8df) _mm512_setzero_pd(),
+ (__mmask8) -1,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+static __inline__ __m512d __DEFAULT_FN_ATTRS
+_mm512_mask_cvtepu64_pd (__m512d __W, __mmask8 __U, __m512i __A) {
+ return (__m512d) __builtin_ia32_cvtuqq2pd512_mask ((__v8di) __A,
+ (__v8df) __W,
+ (__mmask8) __U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+static __inline__ __m512d __DEFAULT_FN_ATTRS
+_mm512_maskz_cvtepu64_pd (__mmask8 __U, __m512i __A) {
+ return (__m512d) __builtin_ia32_cvtuqq2pd512_mask ((__v8di) __A,
+ (__v8df) _mm512_setzero_pd(),
+ (__mmask8) __U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+#define _mm512_cvt_roundepu64_pd(__A, __R) __extension__ ({ \
+ (__m512d) __builtin_ia32_cvtuqq2pd512_mask ((__v8di) __A, \
+ (__v8df) _mm512_setzero_pd(), (__mmask8) -1, __R);})
+
+#define _mm512_mask_cvt_roundepu64_pd(__W, __U, __A, __R) __extension__ ({ \
+ (__m512d) __builtin_ia32_cvtuqq2pd512_mask ((__v8di) __A, \
+ (__v8df) __W, (__mmask8) __U, __R);})
+
+
+#define _mm512_maskz_cvt_roundepu64_pd(__U, __A, __R) __extension__ ({ \
+ (__m512d) __builtin_ia32_cvtuqq2pd512_mask ((__v8di) __A, \
+ (__v8df) _mm512_setzero_pd(), (__mmask8) __U, __R);})
+
+
+static __inline__ __m256 __DEFAULT_FN_ATTRS
+_mm512_cvtepu64_ps (__m512i __A) {
+ return (__m256) __builtin_ia32_cvtuqq2ps512_mask ((__v8di) __A,
+ (__v8sf) _mm256_setzero_ps(),
+ (__mmask8) -1,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+static __inline__ __m256 __DEFAULT_FN_ATTRS
+_mm512_mask_cvtepu64_ps (__m256 __W, __mmask8 __U, __m512i __A) {
+ return (__m256) __builtin_ia32_cvtuqq2ps512_mask ((__v8di) __A,
+ (__v8sf) __W,
+ (__mmask8) __U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+static __inline__ __m256 __DEFAULT_FN_ATTRS
+_mm512_maskz_cvtepu64_ps (__mmask8 __U, __m512i __A) {
+ return (__m256) __builtin_ia32_cvtuqq2ps512_mask ((__v8di) __A,
+ (__v8sf) _mm256_setzero_ps(),
+ (__mmask8) __U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+#define _mm512_cvt_roundepu64_ps(__A, __R) __extension__ ({ \
+ (__m256) __builtin_ia32_cvtuqq2ps512_mask ((__v8di) __A, \
+ (__v8sf) _mm256_setzero_ps(), (__mmask8) -1, __R);})
+
+#define _mm512_mask_cvt_roundepu64_ps(__W, __U, __A, __R) __extension__ ({ \
+ (__m256) __builtin_ia32_cvtuqq2ps512_mask ((__v8di) __A, \
+ (__v8sf) __W, (__mmask8) __U, __R);})
+
+#define _mm512_maskz_cvt_roundepu64_ps(__U, __A, __R) __extension__ ({ \
+ (__m256) __builtin_ia32_cvtuqq2ps512_mask ((__v8di) __A, \
+ (__v8sf) _mm256_setzero_ps(), (__mmask8) __U, __R);})
+
+#define _mm512_range_pd(__A, __B, __C) __extension__ ({ \
+ (__m512d) __builtin_ia32_rangepd512_mask ((__v8df) __A, (__v8df) __B, __C,\
+ (__v8df) _mm512_setzero_pd(), (__mmask8) -1, \
+ _MM_FROUND_CUR_DIRECTION);})
+
+#define _mm512_mask_range_pd(__W, __U, __A, __B, __C) __extension__ ({ \
+ (__m512d) __builtin_ia32_rangepd512_mask ((__v8df) __A, (__v8df) __B, __C,\
+ (__v8df) __W, (__mmask8) __U, _MM_FROUND_CUR_DIRECTION);})
+
+#define _mm512_maskz_range_pd(__U, __A, __B, __C) __extension__ ({ \
+ (__m512d) __builtin_ia32_rangepd512_mask ((__v8df) __A, (__v8df) __B, __C, \
+ (__v8df) _mm512_setzero_pd(), (__mmask8) __U, \
+ _MM_FROUND_CUR_DIRECTION);})
+
+#define _mm512_range_round_pd(__A, __B, __C, __R) __extension__ ({ \
+ (__m512d) __builtin_ia32_rangepd512_mask ((__v8df) __A, (__v8df) __B, __C, \
+ (__v8df) _mm512_setzero_pd(), (__mmask8) -1, __R);})
+
+#define _mm512_mask_range_round_pd(__W, __U, __A, __B, __C, __R) __extension__ ({ \
+ (__m512d) __builtin_ia32_rangepd512_mask ((__v8df) __A, (__v8df) __B, __C, \
+ (__v8df) __W, (__mmask8) __U, __R);})
+
+#define _mm512_maskz_range_round_pd(__U, __A, __B, __C, __R) __extension__ ({ \
+ (__m512d) __builtin_ia32_rangepd512_mask ((__v8df) __A, (__v8df) __B, __C, \
+ (__v8df) _mm512_setzero_pd(), (__mmask8) __U, __R);})
+
+#define _mm512_range_ps(__A, __B, __C) __extension__ ({ \
+ (__m512) __builtin_ia32_rangeps512_mask ((__v16sf) __A, (__v16sf) __B, __C, \
+ (__v16sf) _mm512_setzero_ps(), (__mmask16) -1, \
+ _MM_FROUND_CUR_DIRECTION);})
+
+#define _mm512_mask_range_ps(__W, __U, __A, __B, __C) __extension__ ({ \
+ (__m512) __builtin_ia32_rangeps512_mask ((__v16sf) __A, (__v16sf) __B, \
+ __C, (__v16sf) __W, (__mmask16) __U, _MM_FROUND_CUR_DIRECTION);})
+
+#define _mm512_maskz_range_ps(__U, __A, __B, __C) __extension__ ({ \
+ (__m512) __builtin_ia32_rangeps512_mask ((__v16sf) __A,(__v16sf) __B, \
+ __C, (__v16sf) _mm512_setzero_ps(), (__mmask16) __U, \
+ _MM_FROUND_CUR_DIRECTION);})
+
+#define _mm512_range_round_ps(__A, __B, __C, __R) __extension__ ({ \
+ (__m512) __builtin_ia32_rangeps512_mask ((__v16sf) __A, (__v16sf) __B, \
+ __C, (__v16sf) _mm512_setzero_ps(), (__mmask16) -1, __R);})
+
+#define _mm512_mask_range_round_ps(__W, __U, __A, __B, __C, __R) __extension__ ({ \
+ (__m512) __builtin_ia32_rangeps512_mask ((__v16sf) __A, (__v16sf) __B, \
+ __C, (__v16sf) __W, (__mmask16) __U, __R);})
+
+#define _mm512_maskz_range_round_ps(__U, __A, __B, __C, __R) __extension__ ({ \
+ (__m512) __builtin_ia32_rangeps512_mask ((__v16sf) __A, (__v16sf) __B, \
+ __C, (__v16sf) _mm512_setzero_ps(), (__mmask16) __U, __R);})
+
+#define _mm512_reduce_pd(__A, __B) __extension__ ({ \
+ (__m512d) __builtin_ia32_reducepd512_mask ((__v8df) __A, __B, \
+ (__v8df) _mm512_setzero_pd(), (__mmask8) -1, _MM_FROUND_CUR_DIRECTION);})
+
+#define _mm512_mask_reduce_pd(__W, __U, __A, __B) __extension__ ({ \
+ (__m512d) __builtin_ia32_reducepd512_mask ((__v8df) __A, __B, \
+ (__v8df) __W,(__mmask8) __U, _MM_FROUND_CUR_DIRECTION);})
+
+#define _mm512_maskz_reduce_pd(__U, __A, __B) __extension__ ({ \
+ (__m512d) __builtin_ia32_reducepd512_mask ((__v8df) __A, __B, \
+ (__v8df) _mm512_setzero_pd(), (__mmask8) __U, _MM_FROUND_CUR_DIRECTION);})
+
+#define _mm512_reduce_ps(__A, __B) __extension__ ({ \
+ (__m512) __builtin_ia32_reduceps512_mask ((__v16sf) __A, __B, \
+ (__v16sf) _mm512_setzero_ps(), (__mmask16) -1, _MM_FROUND_CUR_DIRECTION);})
+
+#define _mm512_mask_reduce_ps(__W, __U, __A, __B) __extension__ ({ \
+ (__m512) __builtin_ia32_reduceps512_mask ((__v16sf) __A, __B, \
+ (__v16sf) __W, (__mmask16) __U, _MM_FROUND_CUR_DIRECTION);})
+
+#define _mm512_maskz_reduce_ps(__U, __A, __B) __extension__ ({ \
+ (__m512) __builtin_ia32_reduceps512_mask ((__v16sf) __A, __B, \
+ (__v16sf) _mm512_setzero_ps(), (__mmask16) __U, _MM_FROUND_CUR_DIRECTION);})
+
+#define _mm512_reduce_round_pd(__A, __B, __R) __extension__ ({\
+ (__m512d) __builtin_ia32_reducepd512_mask ((__v8df) __A, __B, \
+ (__v8df) _mm512_setzero_pd(), (__mmask8) -1, __R);})
+
+#define _mm512_mask_reduce_round_pd(__W, __U, __A, __B, __R) __extension__ ({\
+ (__m512d) __builtin_ia32_reducepd512_mask ((__v8df) __A, __B, \
+ (__v8df) __W,(__mmask8) __U, __R);})
+
+#define _mm512_maskz_reduce_round_pd(__U, __A, __B, __R) __extension__ ({\
+ (__m512d) __builtin_ia32_reducepd512_mask ((__v8df) __A, __B, \
+ (__v8df) _mm512_setzero_pd(), (__mmask8) __U, __R);})
+
+#define _mm512_reduce_round_ps(__A, __B, __R) __extension__ ({\
+ (__m512) __builtin_ia32_reduceps512_mask ((__v16sf) __A, __B, \
+ (__v16sf) _mm512_setzero_ps(), (__mmask16) -1, __R);})
+
+#define _mm512_mask_reduce_round_ps(__W, __U, __A, __B, __R) __extension__ ({\
+ (__m512) __builtin_ia32_reduceps512_mask ((__v16sf) __A, __B, \
+ (__v16sf) __W, (__mmask16) __U, __R);})
+
+#define _mm512_maskz_reduce_round_ps(__U, __A, __B, __R) __extension__ ({\
+ (__m512) __builtin_ia32_reduceps512_mask ((__v16sf) __A, __B, \
+ (__v16sf) _mm512_setzero_ps(), (__mmask16) __U, __R);})
+
#undef __DEFAULT_FN_ATTRS
#endif
diff --git a/contrib/llvm/tools/clang/lib/Headers/avx512erintrin.h b/contrib/llvm/tools/clang/lib/Headers/avx512erintrin.h
index 56edffc11ca8..40a912189e5d 100644
--- a/contrib/llvm/tools/clang/lib/Headers/avx512erintrin.h
+++ b/contrib/llvm/tools/clang/lib/Headers/avx512erintrin.h
@@ -1,4 +1,4 @@
-/*===---- avx512fintrin.h - AVX2 intrinsics -----------------------------------===
+/*===---- avx512erintrin.h - AVX512ER intrinsics ---------------------------===
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
@@ -126,19 +126,19 @@
_mm512_maskz_rsqrt28_round_ps((M), (A), _MM_FROUND_CUR_DIRECTION)
#define _mm_rsqrt28_round_ss(A, B, R) __extension__ ({ \
- (__m128)__builtin_ia32_rsqrt28ss_mask((__v4sf)(__m128)(A), \
+ (__m128)__builtin_ia32_rsqrt28ss_round((__v4sf)(__m128)(A), \
(__v4sf)(__m128)(B), \
(__v4sf)_mm_setzero_ps(), \
(__mmask8)-1, (R)); })
#define _mm_mask_rsqrt28_round_ss(S, M, A, B, R) __extension__ ({ \
- (__m128)__builtin_ia32_rsqrt28ss_mask((__v4sf)(__m128)(A), \
+ (__m128)__builtin_ia32_rsqrt28ss_round((__v4sf)(__m128)(A), \
(__v4sf)(__m128)(B), \
(__v4sf)(__m128)(S), \
(__mmask8)(M), (R)); })
#define _mm_maskz_rsqrt28_round_ss(M, A, B, R) __extension__ ({ \
- (__m128)__builtin_ia32_rsqrt28ss_mask((__v4sf)(__m128)(A), \
+ (__m128)__builtin_ia32_rsqrt28ss_round((__v4sf)(__m128)(A), \
(__v4sf)(__m128)(B), \
(__v4sf)_mm_setzero_ps(), \
(__mmask8)(M), (R)); })
@@ -153,19 +153,19 @@
_mm_maskz_rsqrt28_round_ss((M), (A), (B), _MM_FROUND_CUR_DIRECTION)
#define _mm_rsqrt28_round_sd(A, B, R) __extension__ ({ \
- (__m128d)__builtin_ia32_rsqrt28sd_mask((__v2df)(__m128d)(A), \
+ (__m128d)__builtin_ia32_rsqrt28sd_round((__v2df)(__m128d)(A), \
(__v2df)(__m128d)(B), \
(__v2df)_mm_setzero_pd(), \
(__mmask8)-1, (R)); })
#define _mm_mask_rsqrt28_round_sd(S, M, A, B, R) __extension__ ({ \
- (__m128d)__builtin_ia32_rsqrt28sd_mask((__v2df)(__m128d)(A), \
+ (__m128d)__builtin_ia32_rsqrt28sd_round((__v2df)(__m128d)(A), \
(__v2df)(__m128d)(B), \
(__v2df)(__m128d)(S), \
(__mmask8)(M), (R)); })
#define _mm_maskz_rsqrt28_round_sd(M, A, B, R) __extension__ ({ \
- (__m128d)__builtin_ia32_rsqrt28sd_mask((__v2df)(__m128d)(A), \
+ (__m128d)__builtin_ia32_rsqrt28sd_round((__v2df)(__m128d)(A), \
(__v2df)(__m128d)(B), \
(__v2df)_mm_setzero_pd(), \
(__mmask8)(M), (R)); })
@@ -229,19 +229,19 @@
_mm512_maskz_rcp28_round_ps((M), (A), _MM_FROUND_CUR_DIRECTION)
#define _mm_rcp28_round_ss(A, B, R) __extension__ ({ \
- (__m128)__builtin_ia32_rcp28ss_mask((__v4sf)(__m128)(A), \
+ (__m128)__builtin_ia32_rcp28ss_round((__v4sf)(__m128)(A), \
(__v4sf)(__m128)(B), \
(__v4sf)_mm_setzero_ps(), \
(__mmask8)-1, (R)); })
#define _mm_mask_rcp28_round_ss(S, M, A, B, R) __extension__ ({ \
- (__m128)__builtin_ia32_rcp28ss_mask((__v4sf)(__m128)(A), \
+ (__m128)__builtin_ia32_rcp28ss_round((__v4sf)(__m128)(A), \
(__v4sf)(__m128)(B), \
(__v4sf)(__m128)(S), \
(__mmask8)(M), (R)); })
#define _mm_maskz_rcp28_round_ss(M, A, B, R) __extension__ ({ \
- (__m128)__builtin_ia32_rcp28ss_mask((__v4sf)(__m128)(A), \
+ (__m128)__builtin_ia32_rcp28ss_round((__v4sf)(__m128)(A), \
(__v4sf)(__m128)(B), \
(__v4sf)_mm_setzero_ps(), \
(__mmask8)(M), (R)); })
@@ -256,19 +256,19 @@
_mm_maskz_rcp28_round_ss((M), (A), (B), _MM_FROUND_CUR_DIRECTION)
#define _mm_rcp28_round_sd(A, B, R) __extension__ ({ \
- (__m128d)__builtin_ia32_rcp28sd_mask((__v2df)(__m128d)(A), \
+ (__m128d)__builtin_ia32_rcp28sd_round((__v2df)(__m128d)(A), \
(__v2df)(__m128d)(B), \
(__v2df)_mm_setzero_pd(), \
(__mmask8)-1, (R)); })
#define _mm_mask_rcp28_round_sd(S, M, A, B, R) __extension__ ({ \
- (__m128d)__builtin_ia32_rcp28sd_mask((__v2df)(__m128d)(A), \
+ (__m128d)__builtin_ia32_rcp28sd_round((__v2df)(__m128d)(A), \
(__v2df)(__m128d)(B), \
(__v2df)(__m128d)(S), \
(__mmask8)(M), (R)); })
#define _mm_maskz_rcp28_round_sd(M, A, B, R) __extension__ ({ \
- (__m128d)__builtin_ia32_rcp28sd_mask((__v2df)(__m128d)(A), \
+ (__m128d)__builtin_ia32_rcp28sd_round((__v2df)(__m128d)(A), \
(__v2df)(__m128d)(B), \
(__v2df)_mm_setzero_pd(), \
(__mmask8)(M), (R)); })
diff --git a/contrib/llvm/tools/clang/lib/Headers/avx512fintrin.h b/contrib/llvm/tools/clang/lib/Headers/avx512fintrin.h
index 4f7cba0b1507..8dcdc710d5c3 100644
--- a/contrib/llvm/tools/clang/lib/Headers/avx512fintrin.h
+++ b/contrib/llvm/tools/clang/lib/Headers/avx512fintrin.h
@@ -1,4 +1,4 @@
-/*===---- avx512fintrin.h - AVX2 intrinsics --------------------------------===
+/*===---- avx512fintrin.h - AVX512F intrinsics -----------------------------===
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
@@ -47,7 +47,7 @@ typedef unsigned short __mmask16;
#define _MM_FROUND_CUR_DIRECTION 0x04
/* Define the default attributes for the functions in this file. */
-#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__))
+#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("avx512f")))
/* Create vectors with repeated elements */
@@ -57,6 +57,30 @@ _mm512_setzero_si512(void)
return (__m512i)(__v8di){ 0, 0, 0, 0, 0, 0, 0, 0 };
}
+static __inline__ __m512d __DEFAULT_FN_ATTRS
+_mm512_undefined_pd()
+{
+ return (__m512d)__builtin_ia32_undef512();
+}
+
+static __inline__ __m512 __DEFAULT_FN_ATTRS
+_mm512_undefined()
+{
+ return (__m512)__builtin_ia32_undef512();
+}
+
+static __inline__ __m512 __DEFAULT_FN_ATTRS
+_mm512_undefined_ps()
+{
+ return (__m512)__builtin_ia32_undef512();
+}
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS
+_mm512_undefined_epi32()
+{
+ return (__m512i)__builtin_ia32_undef512();
+}
+
static __inline __m512i __DEFAULT_FN_ATTRS
_mm512_maskz_set1_epi32(__mmask16 __M, int __A)
{
@@ -543,6 +567,66 @@ _mm512_max_ps(__m512 __A, __m512 __B)
_MM_FROUND_CUR_DIRECTION);
}
+static __inline__ __m128 __DEFAULT_FN_ATTRS
+_mm_mask_max_ss(__m128 __W, __mmask8 __U,__m128 __A, __m128 __B) {
+ return (__m128) __builtin_ia32_maxss_round ((__v4sf) __A,
+ (__v4sf) __B,
+ (__v4sf) __W,
+ (__mmask8) __U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+static __inline__ __m128 __DEFAULT_FN_ATTRS
+_mm_maskz_max_ss(__mmask8 __U,__m128 __A, __m128 __B) {
+ return (__m128) __builtin_ia32_maxss_round ((__v4sf) __A,
+ (__v4sf) __B,
+ (__v4sf) _mm_setzero_ps (),
+ (__mmask8) __U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+#define _mm_max_round_ss(__A, __B, __R) __extension__ ({ \
+ (__m128) __builtin_ia32_maxss_round ((__v4sf) __A, (__v4sf) __B, \
+ (__v4sf) _mm_setzero_ps(), (__mmask8) -1, __R); })
+
+#define _mm_mask_max_round_ss(__W, __U, __A, __B, __R) __extension__ ({ \
+ (__m128) __builtin_ia32_maxss_round ((__v4sf) __A, (__v4sf) __B, \
+ (__v4sf) __W, (__mmask8) __U,__R); })
+
+#define _mm_maskz_max_round_ss(__U, __A, __B, __R) __extension__ ({ \
+ (__m128) __builtin_ia32_maxss_round ((__v4sf) __A, (__v4sf) __B, \
+ (__v4sf) _mm_setzero_ps(), (__mmask8) __U,__R); })
+
+static __inline__ __m128d __DEFAULT_FN_ATTRS
+_mm_mask_max_sd(__m128d __W, __mmask8 __U,__m128d __A, __m128d __B) {
+ return (__m128d) __builtin_ia32_maxsd_round ((__v2df) __A,
+ (__v2df) __B,
+ (__v2df) __W,
+ (__mmask8) __U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+static __inline__ __m128d __DEFAULT_FN_ATTRS
+_mm_maskz_max_sd(__mmask8 __U,__m128d __A, __m128d __B) {
+ return (__m128d) __builtin_ia32_maxsd_round ((__v2df) __A,
+ (__v2df) __B,
+ (__v2df) _mm_setzero_pd (),
+ (__mmask8) __U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+#define _mm_max_round_sd(__A, __B, __R) __extension__ ({ \
+ (__m128d) __builtin_ia32_maxsd_round ((__v2df) __A, (__v2df) __B, \
+ (__v2df) _mm_setzero_pd(), (__mmask8) -1, __R); })
+
+#define _mm_mask_max_round_sd(__W, __U, __A, __B, __R) __extension__ ({ \
+ (__m128d) __builtin_ia32_maxsd_round ((__v2df) __A, (__v2df) __B, \
+ (__v2df) __W, (__mmask8) __U,__R); })
+
+#define _mm_maskz_max_round_sd(__U, __A, __B, __R) __extension__ ({ \
+ (__m128d) __builtin_ia32_maxsd_round ((__v2df) __A, (__v2df) __B, \
+ (__v2df) _mm_setzero_pd(), (__mmask8) __U,__R); })
+
static __inline __m512i
__DEFAULT_FN_ATTRS
_mm512_max_epi32(__m512i __A, __m512i __B)
@@ -606,6 +690,66 @@ _mm512_min_ps(__m512 __A, __m512 __B)
_MM_FROUND_CUR_DIRECTION);
}
+static __inline__ __m128 __DEFAULT_FN_ATTRS
+_mm_mask_min_ss(__m128 __W, __mmask8 __U,__m128 __A, __m128 __B) {
+ return (__m128) __builtin_ia32_minss_round ((__v4sf) __A,
+ (__v4sf) __B,
+ (__v4sf) __W,
+ (__mmask8) __U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+static __inline__ __m128 __DEFAULT_FN_ATTRS
+_mm_maskz_min_ss(__mmask8 __U,__m128 __A, __m128 __B) {
+ return (__m128) __builtin_ia32_minss_round ((__v4sf) __A,
+ (__v4sf) __B,
+ (__v4sf) _mm_setzero_ps (),
+ (__mmask8) __U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+#define _mm_min_round_ss(__A, __B, __R) __extension__ ({ \
+ (__m128) __builtin_ia32_minss_round ((__v4sf) __A, (__v4sf) __B, \
+ (__v4sf) _mm_setzero_ps(), (__mmask8) -1, __R); })
+
+#define _mm_mask_min_round_ss(__W, __U, __A, __B, __R) __extension__ ({ \
+ (__m128) __builtin_ia32_minss_round ((__v4sf) __A, (__v4sf) __B, \
+ (__v4sf) __W, (__mmask8) __U,__R); })
+
+#define _mm_maskz_min_round_ss(__U, __A, __B, __R) __extension__ ({ \
+ (__m128) __builtin_ia32_minss_round ((__v4sf) __A, (__v4sf) __B, \
+ (__v4sf) _mm_setzero_ps(), (__mmask8) __U,__R); })
+
+static __inline__ __m128d __DEFAULT_FN_ATTRS
+_mm_mask_min_sd(__m128d __W, __mmask8 __U,__m128d __A, __m128d __B) {
+ return (__m128d) __builtin_ia32_minsd_round ((__v2df) __A,
+ (__v2df) __B,
+ (__v2df) __W,
+ (__mmask8) __U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+static __inline__ __m128d __DEFAULT_FN_ATTRS
+_mm_maskz_min_sd(__mmask8 __U,__m128d __A, __m128d __B) {
+ return (__m128d) __builtin_ia32_minsd_round ((__v2df) __A,
+ (__v2df) __B,
+ (__v2df) _mm_setzero_pd (),
+ (__mmask8) __U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+#define _mm_min_round_sd(__A, __B, __R) __extension__ ({ \
+ (__m128d) __builtin_ia32_minsd_round ((__v2df) __A, (__v2df) __B, \
+ (__v2df) _mm_setzero_pd(), (__mmask8) -1, __R); })
+
+#define _mm_mask_min_round_sd(__W, __U, __A, __B, __R) __extension__ ({ \
+ (__m128d) __builtin_ia32_minsd_round ((__v2df) __A, (__v2df) __B, \
+ (__v2df) __W, (__mmask8) __U,__R); })
+
+#define _mm_maskz_min_round_sd(__U, __A, __B, __R) __extension__ ({ \
+ (__m128d) __builtin_ia32_minsd_round ((__v2df) __A, (__v2df) __B, \
+ (__v2df) _mm_setzero_pd(), (__mmask8) __U,__R); })
+
static __inline __m512i
__DEFAULT_FN_ATTRS
_mm512_min_epi32(__m512i __A, __m512i __B)
@@ -728,18 +872,18 @@ _mm512_mask_mullo_epi32 (__m512i __W, __mmask16 __M, __m512i __A, __m512i __B)
}
static __inline__ __m512d __DEFAULT_FN_ATTRS
-_mm512_sqrt_pd(__m512d a)
+_mm512_sqrt_pd(__m512d __a)
{
- return (__m512d)__builtin_ia32_sqrtpd512_mask((__v8df)a,
+ return (__m512d)__builtin_ia32_sqrtpd512_mask((__v8df)__a,
(__v8df) _mm512_setzero_pd (),
(__mmask8) -1,
_MM_FROUND_CUR_DIRECTION);
}
static __inline__ __m512 __DEFAULT_FN_ATTRS
-_mm512_sqrt_ps(__m512 a)
+_mm512_sqrt_ps(__m512 __a)
{
- return (__m512)__builtin_ia32_sqrtps512_mask((__v16sf)a,
+ return (__m512)__builtin_ia32_sqrtps512_mask((__v16sf)__a,
(__v16sf) _mm512_setzero_ps (),
(__mmask16) -1,
_MM_FROUND_CUR_DIRECTION);
@@ -765,7 +909,7 @@ _mm512_rsqrt14_ps(__m512 __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS
_mm_rsqrt14_ss(__m128 __A, __m128 __B)
{
- return (__m128) __builtin_ia32_rsqrt14ss_mask ((__v4sf) __A,
+ return (__m128) __builtin_ia32_rsqrt14ss ((__v4sf) __A,
(__v4sf) __B,
(__v4sf)
_mm_setzero_ps (),
@@ -775,7 +919,7 @@ _mm_rsqrt14_ss(__m128 __A, __m128 __B)
static __inline__ __m128d __DEFAULT_FN_ATTRS
_mm_rsqrt14_sd(__m128d __A, __m128d __B)
{
- return (__m128d) __builtin_ia32_rsqrt14sd_mask ((__v2df) __A,
+ return (__m128d) __builtin_ia32_rsqrt14sd ((__v2df) __A,
(__v2df) __B,
(__v2df)
_mm_setzero_pd (),
@@ -802,7 +946,7 @@ _mm512_rcp14_ps(__m512 __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS
_mm_rcp14_ss(__m128 __A, __m128 __B)
{
- return (__m128) __builtin_ia32_rcp14ss_mask ((__v4sf) __A,
+ return (__m128) __builtin_ia32_rcp14ss ((__v4sf) __A,
(__v4sf) __B,
(__v4sf)
_mm_setzero_ps (),
@@ -812,7 +956,7 @@ _mm_rcp14_ss(__m128 __A, __m128 __B)
static __inline__ __m128d __DEFAULT_FN_ATTRS
_mm_rcp14_sd(__m128d __A, __m128d __B)
{
- return (__m128d) __builtin_ia32_rcp14sd_mask ((__v2df) __A,
+ return (__m128d) __builtin_ia32_rcp14sd ((__v2df) __A,
(__v2df) __B,
(__v2df)
_mm_setzero_pd (),
@@ -873,6 +1017,489 @@ _mm512_abs_epi32(__m512i __A)
(__mmask16) -1);
}
+static __inline__ __m128 __DEFAULT_FN_ATTRS
+_mm_mask_add_ss(__m128 __W, __mmask8 __U,__m128 __A, __m128 __B) {
+ return (__m128) __builtin_ia32_addss_round ((__v4sf) __A,
+ (__v4sf) __B,
+ (__v4sf) __W,
+ (__mmask8) __U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+static __inline__ __m128 __DEFAULT_FN_ATTRS
+_mm_maskz_add_ss(__mmask8 __U,__m128 __A, __m128 __B) {
+ return (__m128) __builtin_ia32_addss_round ((__v4sf) __A,
+ (__v4sf) __B,
+ (__v4sf) _mm_setzero_ps (),
+ (__mmask8) __U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+#define _mm_add_round_ss(__A, __B, __R) __extension__ ({ \
+ (__m128) __builtin_ia32_addss_round ((__v4sf) __A, (__v4sf) __B, \
+ (__v4sf) _mm_setzero_ps(), (__mmask8) -1, __R); })
+
+#define _mm_mask_add_round_ss(__W, __U, __A, __B, __R) __extension__ ({ \
+ (__m128) __builtin_ia32_addss_round ((__v4sf) __A, (__v4sf) __B, \
+ (__v4sf) __W, (__mmask8) __U,__R); })
+
+#define _mm_maskz_add_round_ss(__U, __A, __B, __R) __extension__ ({ \
+ (__m128) __builtin_ia32_addss_round ((__v4sf) __A, (__v4sf) __B, \
+ (__v4sf) _mm_setzero_ps(), (__mmask8) __U,__R); })
+
+static __inline__ __m128d __DEFAULT_FN_ATTRS
+_mm_mask_add_sd(__m128d __W, __mmask8 __U,__m128d __A, __m128d __B) {
+ return (__m128d) __builtin_ia32_addsd_round ((__v2df) __A,
+ (__v2df) __B,
+ (__v2df) __W,
+ (__mmask8) __U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+static __inline__ __m128d __DEFAULT_FN_ATTRS
+_mm_maskz_add_sd(__mmask8 __U,__m128d __A, __m128d __B) {
+ return (__m128d) __builtin_ia32_addsd_round ((__v2df) __A,
+ (__v2df) __B,
+ (__v2df) _mm_setzero_pd (),
+ (__mmask8) __U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+#define _mm_add_round_sd(__A, __B, __R) __extension__ ({ \
+ (__m128d) __builtin_ia32_addsd_round ((__v2df) __A, (__v2df) __B, \
+ (__v2df) _mm_setzero_pd(), (__mmask8) -1, __R); })
+
+#define _mm_mask_add_round_sd(__W, __U, __A, __B, __R) __extension__ ({ \
+ (__m128d) __builtin_ia32_addsd_round ((__v2df) __A, (__v2df) __B, \
+ (__v2df) __W, (__mmask8) __U,__R); })
+
+#define _mm_maskz_add_round_sd(__U, __A, __B, __R) __extension__ ({ \
+ (__m128d) __builtin_ia32_addsd_round ((__v2df) __A, (__v2df) __B, \
+ (__v2df) _mm_setzero_pd(), (__mmask8) __U,__R); })
+
+static __inline__ __m512d __DEFAULT_FN_ATTRS
+_mm512_mask_add_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B) {
+ return (__m512d) __builtin_ia32_addpd512_mask ((__v8df) __A,
+ (__v8df) __B,
+ (__v8df) __W,
+ (__mmask8) __U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+static __inline__ __m512d __DEFAULT_FN_ATTRS
+_mm512_maskz_add_pd(__mmask8 __U, __m512d __A, __m512d __B) {
+ return (__m512d) __builtin_ia32_addpd512_mask ((__v8df) __A,
+ (__v8df) __B,
+ (__v8df) _mm512_setzero_pd (),
+ (__mmask8) __U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+static __inline__ __m512 __DEFAULT_FN_ATTRS
+_mm512_mask_add_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B) {
+ return (__m512) __builtin_ia32_addps512_mask ((__v16sf) __A,
+ (__v16sf) __B,
+ (__v16sf) __W,
+ (__mmask16) __U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+static __inline__ __m512 __DEFAULT_FN_ATTRS
+_mm512_maskz_add_ps(__mmask16 __U, __m512 __A, __m512 __B) {
+ return (__m512) __builtin_ia32_addps512_mask ((__v16sf) __A,
+ (__v16sf) __B,
+ (__v16sf) _mm512_setzero_ps (),
+ (__mmask16) __U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+#define _mm512_add_round_pd(__A, __B, __R) __extension__ ({ \
+ (__m512d) __builtin_ia32_addpd512_mask ((__v8df) __A, (__v8df) __B, \
+ (__v8df) _mm512_setzero_pd(), (__mmask8) -1, __R); })
+
+#define _mm512_mask_add_round_pd(__W, __U, __A, __B, __R) __extension__ ({ \
+ (__m512d) __builtin_ia32_addpd512_mask((__v8df) __A, (__v8df) __B, \
+ (__v8df) __W, (__mmask8) __U, __R); })
+
+#define _mm512_maskz_add_round_pd(__U, __A, __B, __R) __extension__ ({ \
+ (__m512d) __builtin_ia32_addpd512_mask ((__v8df) __A, (__v8df) __B, \
+ (__v8df) _mm512_setzero_pd(), (__mmask8) __U, __R); })
+
+#define _mm512_add_round_ps(__A, __B, __R) __extension__ ({ \
+ (__m512) __builtin_ia32_addps512_mask ((__v16sf) __A, (__v16sf) __B, \
+ (__v16sf) _mm512_setzero_ps(), (__mmask16) -1, __R); })
+
+#define _mm512_mask_add_round_ps(__W, __U, __A, __B, __R) __extension__ ({ \
+ (__m512) __builtin_ia32_addps512_mask ((__v16sf) __A, (__v16sf) __B, \
+ (__v16sf) __W, (__mmask16)__U, __R); })
+
+#define _mm512_maskz_add_round_ps(__U, __A, __B, __R) __extension__ ({ \
+ (__m512) __builtin_ia32_addps512_mask ((__v16sf) __A, (__v16sf) __B, \
+ (__v16sf) _mm512_setzero_ps(), (__mmask16)__U, __R); })
+
+static __inline__ __m128 __DEFAULT_FN_ATTRS
+_mm_mask_sub_ss(__m128 __W, __mmask8 __U,__m128 __A, __m128 __B) {
+ return (__m128) __builtin_ia32_subss_round ((__v4sf) __A,
+ (__v4sf) __B,
+ (__v4sf) __W,
+ (__mmask8) __U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+static __inline__ __m128 __DEFAULT_FN_ATTRS
+_mm_maskz_sub_ss(__mmask8 __U,__m128 __A, __m128 __B) {
+ return (__m128) __builtin_ia32_subss_round ((__v4sf) __A,
+ (__v4sf) __B,
+ (__v4sf) _mm_setzero_ps (),
+ (__mmask8) __U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+#define _mm_sub_round_ss(__A, __B, __R) __extension__ ({ \
+ (__m128) __builtin_ia32_subss_round ((__v4sf) __A, (__v4sf) __B, \
+ (__v4sf) _mm_setzero_ps(), (__mmask8) -1, __R); })
+
+#define _mm_mask_sub_round_ss(__W, __U, __A, __B, __R) __extension__ ({ \
+ (__m128) __builtin_ia32_subss_round ((__v4sf) __A, (__v4sf) __B, \
+ (__v4sf) __W, (__mmask8) __U,__R); })
+
+#define _mm_maskz_sub_round_ss(__U, __A, __B, __R) __extension__ ({ \
+ (__m128) __builtin_ia32_subss_round ((__v4sf) __A, (__v4sf) __B, \
+ (__v4sf) _mm_setzero_ps(), (__mmask8) __U,__R); })
+
+static __inline__ __m128d __DEFAULT_FN_ATTRS
+_mm_mask_sub_sd(__m128d __W, __mmask8 __U,__m128d __A, __m128d __B) {
+ return (__m128d) __builtin_ia32_subsd_round ((__v2df) __A,
+ (__v2df) __B,
+ (__v2df) __W,
+ (__mmask8) __U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+static __inline__ __m128d __DEFAULT_FN_ATTRS
+_mm_maskz_sub_sd(__mmask8 __U,__m128d __A, __m128d __B) {
+ return (__m128d) __builtin_ia32_subsd_round ((__v2df) __A,
+ (__v2df) __B,
+ (__v2df) _mm_setzero_pd (),
+ (__mmask8) __U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+#define _mm_sub_round_sd(__A, __B, __R) __extension__ ({ \
+ (__m128d) __builtin_ia32_subsd_round ((__v2df) __A, (__v2df) __B, \
+ (__v2df) _mm_setzero_pd(), (__mmask8) -1, __R); })
+
+#define _mm_mask_sub_round_sd(__W, __U, __A, __B, __R) __extension__ ({ \
+ (__m128d) __builtin_ia32_subsd_round ((__v2df) __A, (__v2df) __B, \
+ (__v2df) __W, (__mmask8) __U,__R); })
+
+#define _mm_maskz_sub_round_sd(__U, __A, __B, __R) __extension__ ({ \
+ (__m128d) __builtin_ia32_subsd_round ((__v2df) __A, (__v2df) __B, \
+ (__v2df) _mm_setzero_pd(), (__mmask8) __U,__R); })
+
+static __inline__ __m512d __DEFAULT_FN_ATTRS
+_mm512_mask_sub_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B) {
+ return (__m512d) __builtin_ia32_subpd512_mask ((__v8df) __A,
+ (__v8df) __B,
+ (__v8df) __W,
+ (__mmask8) __U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+static __inline__ __m512d __DEFAULT_FN_ATTRS
+_mm512_maskz_sub_pd(__mmask8 __U, __m512d __A, __m512d __B) {
+ return (__m512d) __builtin_ia32_subpd512_mask ((__v8df) __A,
+ (__v8df) __B,
+ (__v8df)
+ _mm512_setzero_pd (),
+ (__mmask8) __U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+static __inline__ __m512 __DEFAULT_FN_ATTRS
+_mm512_mask_sub_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B) {
+ return (__m512) __builtin_ia32_subps512_mask ((__v16sf) __A,
+ (__v16sf) __B,
+ (__v16sf) __W,
+ (__mmask16) __U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+static __inline__ __m512 __DEFAULT_FN_ATTRS
+_mm512_maskz_sub_ps(__mmask16 __U, __m512 __A, __m512 __B) {
+ return (__m512) __builtin_ia32_subps512_mask ((__v16sf) __A,
+ (__v16sf) __B,
+ (__v16sf)
+ _mm512_setzero_ps (),
+ (__mmask16) __U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+#define _mm512_sub_round_pd(__A, __B, __R) __extension__ ({ \
+ (__m512d) __builtin_ia32_subpd512_mask ((__v8df) __A, (__v8df) __B,\
+ (__v8df) _mm512_setzero_pd(), (__mmask8) -1, __R); })
+
+#define _mm512_mask_sub_round_pd(__W, __U, __A, __B, __R) __extension__ ({ \
+ (__m512d) __builtin_ia32_subpd512_mask ((__v8df) __A, (__v8df) __B, \
+ (__v8df) __W, (__mmask8) __U, __R); })
+
+#define _mm512_maskz_sub_round_pd(__U, __A, __B, __R) __extension__ ({ \
+ (__m512d) __builtin_ia32_subpd512_mask ((__v8df) __A, (__v8df) __B, \
+ (__v8df) _mm512_setzero_pd(), (__mmask8) __U, __R);})
+
+#define _mm512_sub_round_ps(__A, __B, __R) __extension__ ({ \
+ (__m512) __builtin_ia32_subps512_mask ((__v16sf) __A, (__v16sf) __B, \
+ (__v16sf) _mm512_setzero_ps (), (__mmask16) -1, __R);})
+
+#define _mm512_mask_sub_round_ps(__W, __U, __A, __B, __R) __extension__ ({ \
+ (__m512) __builtin_ia32_subps512_mask ((__v16sf) __A, (__v16sf) __B, \
+ (__v16sf) __W, (__mmask16) __U, __R); });
+
+#define _mm512_maskz_sub_round_ps(__U, __A, __B, __R) __extension__ ({ \
+ (__m512) __builtin_ia32_subps512_mask ((__v16sf) __A, (__v16sf) __B, \
+ (__v16sf) _mm512_setzero_ps (), (__mmask16) __U, __R);});
+
+static __inline__ __m128 __DEFAULT_FN_ATTRS
+_mm_mask_mul_ss(__m128 __W, __mmask8 __U,__m128 __A, __m128 __B) {
+ return (__m128) __builtin_ia32_mulss_round ((__v4sf) __A,
+ (__v4sf) __B,
+ (__v4sf) __W,
+ (__mmask8) __U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+static __inline__ __m128 __DEFAULT_FN_ATTRS
+_mm_maskz_mul_ss(__mmask8 __U,__m128 __A, __m128 __B) {
+ return (__m128) __builtin_ia32_mulss_round ((__v4sf) __A,
+ (__v4sf) __B,
+ (__v4sf) _mm_setzero_ps (),
+ (__mmask8) __U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+#define _mm_mul_round_ss(__A, __B, __R) __extension__ ({ \
+ (__m128) __builtin_ia32_mulss_round ((__v4sf) __A, (__v4sf) __B, \
+ (__v4sf) _mm_setzero_ps(), (__mmask8) -1, __R); })
+
+#define _mm_mask_mul_round_ss(__W, __U, __A, __B, __R) __extension__ ({ \
+ (__m128) __builtin_ia32_mulss_round ((__v4sf) __A, (__v4sf) __B, \
+ (__v4sf) __W, (__mmask8) __U,__R); })
+
+#define _mm_maskz_mul_round_ss(__U, __A, __B, __R) __extension__ ({ \
+ (__m128) __builtin_ia32_mulss_round ((__v4sf) __A, (__v4sf) __B, \
+ (__v4sf) _mm_setzero_ps(), (__mmask8) __U,__R); })
+
+static __inline__ __m128d __DEFAULT_FN_ATTRS
+_mm_mask_mul_sd(__m128d __W, __mmask8 __U,__m128d __A, __m128d __B) {
+ return (__m128d) __builtin_ia32_mulsd_round ((__v2df) __A,
+ (__v2df) __B,
+ (__v2df) __W,
+ (__mmask8) __U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+static __inline__ __m128d __DEFAULT_FN_ATTRS
+_mm_maskz_mul_sd(__mmask8 __U,__m128d __A, __m128d __B) {
+ return (__m128d) __builtin_ia32_mulsd_round ((__v2df) __A,
+ (__v2df) __B,
+ (__v2df) _mm_setzero_pd (),
+ (__mmask8) __U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+#define _mm_mul_round_sd(__A, __B, __R) __extension__ ({ \
+ (__m128d) __builtin_ia32_mulsd_round ((__v2df) __A, (__v2df) __B, \
+ (__v2df) _mm_setzero_pd(), (__mmask8) -1, __R); })
+
+#define _mm_mask_mul_round_sd(__W, __U, __A, __B, __R) __extension__ ({ \
+ (__m128d) __builtin_ia32_mulsd_round ((__v2df) __A, (__v2df) __B, \
+ (__v2df) __W, (__mmask8) __U,__R); })
+
+#define _mm_maskz_mul_round_sd(__U, __A, __B, __R) __extension__ ({ \
+ (__m128d) __builtin_ia32_mulsd_round ((__v2df) __A, (__v2df) __B, \
+ (__v2df) _mm_setzero_pd(), (__mmask8) __U,__R); })
+
+static __inline__ __m512d __DEFAULT_FN_ATTRS
+_mm512_mask_mul_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B) {
+ return (__m512d) __builtin_ia32_mulpd512_mask ((__v8df) __A,
+ (__v8df) __B,
+ (__v8df) __W,
+ (__mmask8) __U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+static __inline__ __m512d __DEFAULT_FN_ATTRS
+_mm512_maskz_mul_pd(__mmask8 __U, __m512d __A, __m512d __B) {
+ return (__m512d) __builtin_ia32_mulpd512_mask ((__v8df) __A,
+ (__v8df) __B,
+ (__v8df)
+ _mm512_setzero_pd (),
+ (__mmask8) __U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+static __inline__ __m512 __DEFAULT_FN_ATTRS
+_mm512_mask_mul_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B) {
+ return (__m512) __builtin_ia32_mulps512_mask ((__v16sf) __A,
+ (__v16sf) __B,
+ (__v16sf) __W,
+ (__mmask16) __U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+static __inline__ __m512 __DEFAULT_FN_ATTRS
+_mm512_maskz_mul_ps(__mmask16 __U, __m512 __A, __m512 __B) {
+ return (__m512) __builtin_ia32_mulps512_mask ((__v16sf) __A,
+ (__v16sf) __B,
+ (__v16sf)
+ _mm512_setzero_ps (),
+ (__mmask16) __U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+#define _mm512_mul_round_pd(__A, __B, __R) __extension__ ({ \
+ (__m512d) __builtin_ia32_mulpd512_mask ((__v8df) __A, (__v8df) __B,\
+ (__v8df) _mm512_setzero_pd(), (__mmask8) -1, __R); })
+
+#define _mm512_mask_mul_round_pd(__W, __U, __A, __B, __R) __extension__ ({ \
+ (__m512d) __builtin_ia32_mulpd512_mask ((__v8df) __A, (__v8df) __B, \
+ (__v8df) __W, (__mmask8) __U, __R); })
+
+#define _mm512_maskz_mul_round_pd(__U, __A, __B, __R) __extension__ ({ \
+ (__m512d) __builtin_ia32_mulpd512_mask ((__v8df) __A, (__v8df) __B, \
+ (__v8df) _mm512_setzero_pd(), (__mmask8) __U, __R);})
+
+#define _mm512_mul_round_ps(__A, __B, __R) __extension__ ({ \
+ (__m512) __builtin_ia32_mulps512_mask ((__v16sf) __A, (__v16sf) __B, \
+ (__v16sf) _mm512_setzero_ps (), (__mmask16) -1, __R);})
+
+#define _mm512_mask_mul_round_ps(__W, __U, __A, __B, __R) __extension__ ({ \
+ (__m512) __builtin_ia32_mulps512_mask ((__v16sf) __A, (__v16sf) __B, \
+ (__v16sf) __W, (__mmask16) __U, __R); });
+
+#define _mm512_maskz_mul_round_ps(__U, __A, __B, __R) __extension__ ({ \
+ (__m512) __builtin_ia32_mulps512_mask ((__v16sf) __A, (__v16sf) __B, \
+ (__v16sf) _mm512_setzero_ps (), (__mmask16) __U, __R);});
+
+static __inline__ __m128 __DEFAULT_FN_ATTRS
+_mm_mask_div_ss(__m128 __W, __mmask8 __U,__m128 __A, __m128 __B) {
+ return (__m128) __builtin_ia32_divss_round ((__v4sf) __A,
+ (__v4sf) __B,
+ (__v4sf) __W,
+ (__mmask8) __U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+static __inline__ __m128 __DEFAULT_FN_ATTRS
+_mm_maskz_div_ss(__mmask8 __U,__m128 __A, __m128 __B) {
+ return (__m128) __builtin_ia32_divss_round ((__v4sf) __A,
+ (__v4sf) __B,
+ (__v4sf) _mm_setzero_ps (),
+ (__mmask8) __U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+#define _mm_div_round_ss(__A, __B, __R) __extension__ ({ \
+ (__m128) __builtin_ia32_divss_round ((__v4sf) __A, (__v4sf) __B, \
+ (__v4sf) _mm_setzero_ps(), (__mmask8) -1, __R); })
+
+#define _mm_mask_div_round_ss(__W, __U, __A, __B, __R) __extension__ ({ \
+ (__m128) __builtin_ia32_divss_round ((__v4sf) __A, (__v4sf) __B, \
+ (__v4sf) __W, (__mmask8) __U,__R); })
+
+#define _mm_maskz_div_round_ss(__U, __A, __B, __R) __extension__ ({ \
+ (__m128) __builtin_ia32_divss_round ((__v4sf) __A, (__v4sf) __B, \
+ (__v4sf) _mm_setzero_ps(), (__mmask8) __U,__R); })
+
+static __inline__ __m128d __DEFAULT_FN_ATTRS
+_mm_mask_div_sd(__m128d __W, __mmask8 __U,__m128d __A, __m128d __B) {
+ return (__m128d) __builtin_ia32_divsd_round ((__v2df) __A,
+ (__v2df) __B,
+ (__v2df) __W,
+ (__mmask8) __U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+static __inline__ __m128d __DEFAULT_FN_ATTRS
+_mm_maskz_div_sd(__mmask8 __U,__m128d __A, __m128d __B) {
+ return (__m128d) __builtin_ia32_divsd_round ((__v2df) __A,
+ (__v2df) __B,
+ (__v2df) _mm_setzero_pd (),
+ (__mmask8) __U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+#define _mm_div_round_sd(__A, __B, __R) __extension__ ({ \
+ (__m128d) __builtin_ia32_divsd_round ((__v2df) __A, (__v2df) __B, \
+ (__v2df) _mm_setzero_pd(), (__mmask8) -1, __R); })
+
+#define _mm_mask_div_round_sd(__W, __U, __A, __B, __R) __extension__ ({ \
+ (__m128d) __builtin_ia32_divsd_round ((__v2df) __A, (__v2df) __B, \
+ (__v2df) __W, (__mmask8) __U,__R); })
+
+#define _mm_maskz_div_round_sd(__U, __A, __B, __R) __extension__ ({ \
+ (__m128d) __builtin_ia32_divsd_round ((__v2df) __A, (__v2df) __B, \
+ (__v2df) _mm_setzero_pd(), (__mmask8) __U,__R); })
+
+static __inline__ __m512d __DEFAULT_FN_ATTRS
+_mm512_mask_div_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B) {
+ return (__m512d) __builtin_ia32_divpd512_mask ((__v8df) __A,
+ (__v8df) __B,
+ (__v8df) __W,
+ (__mmask8) __U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+static __inline__ __m512d __DEFAULT_FN_ATTRS
+_mm512_maskz_div_pd(__mmask8 __U, __m512d __A, __m512d __B) {
+ return (__m512d) __builtin_ia32_divpd512_mask ((__v8df) __A,
+ (__v8df) __B,
+ (__v8df)
+ _mm512_setzero_pd (),
+ (__mmask8) __U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+static __inline__ __m512 __DEFAULT_FN_ATTRS
+_mm512_mask_div_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B) {
+ return (__m512) __builtin_ia32_divps512_mask ((__v16sf) __A,
+ (__v16sf) __B,
+ (__v16sf) __W,
+ (__mmask16) __U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+static __inline__ __m512 __DEFAULT_FN_ATTRS
+_mm512_maskz_div_ps(__mmask16 __U, __m512 __A, __m512 __B) {
+ return (__m512) __builtin_ia32_divps512_mask ((__v16sf) __A,
+ (__v16sf) __B,
+ (__v16sf)
+ _mm512_setzero_ps (),
+ (__mmask16) __U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+#define _mm512_div_round_pd(__A, __B, __R) __extension__ ({ \
+ (__m512d) __builtin_ia32_divpd512_mask ((__v8df) __A, (__v8df) __B,\
+ (__v8df) _mm512_setzero_pd(), (__mmask8) -1, __R); })
+
+#define _mm512_mask_div_round_pd(__W, __U, __A, __B, __R) __extension__ ({ \
+ (__m512d) __builtin_ia32_divpd512_mask ((__v8df) __A, (__v8df) __B, \
+ (__v8df) __W, (__mmask8) __U, __R); })
+
+#define _mm512_maskz_div_round_pd(__U, __A, __B, __R) __extension__ ({ \
+ (__m512d) __builtin_ia32_divpd512_mask ((__v8df) __A, (__v8df) __B, \
+ (__v8df) _mm512_setzero_pd(), (__mmask8) __U, __R);})
+
+#define _mm512_div_round_ps(__A, __B, __R) __extension__ ({ \
+ (__m512) __builtin_ia32_divps512_mask ((__v16sf) __A, (__v16sf) __B, \
+ (__v16sf) _mm512_setzero_ps (), (__mmask16) -1, __R);})
+
+#define _mm512_mask_div_round_ps(__W, __U, __A, __B, __R) __extension__ ({ \
+ (__m512) __builtin_ia32_divps512_mask ((__v16sf) __A, (__v16sf) __B, \
+ (__v16sf) __W, (__mmask16) __U, __R); });
+
+#define _mm512_maskz_div_round_ps(__U, __A, __B, __R) __extension__ ({ \
+ (__m512) __builtin_ia32_divps512_mask ((__v16sf) __A, (__v16sf) __B, \
+ (__v16sf) _mm512_setzero_ps (), (__mmask16) __U, __R);});
+
#define _mm512_roundscale_ps(A, B) __extension__ ({ \
(__m512)__builtin_ia32_rndscaleps_mask((__v16sf)(A), (B), (__v16sf)(A), \
-1, _MM_FROUND_CUR_DIRECTION); })
@@ -1706,17 +2333,15 @@ _mm512_permutex2var_ps(__m512 __A, __m512i __I, __m512 __B)
/* Vector Extract */
#define _mm512_extractf64x4_pd(A, I) __extension__ ({ \
- __m512d __A = (A); \
(__m256d) \
- __builtin_ia32_extractf64x4_mask((__v8df)__A, \
+ __builtin_ia32_extractf64x4_mask((__v8df)(__m512d)(A), \
(I), \
(__v4df)_mm256_setzero_si256(), \
(__mmask8) -1); })
#define _mm512_extractf32x4_ps(A, I) __extension__ ({ \
- __m512 __A = (A); \
(__m128) \
- __builtin_ia32_extractf32x4_mask((__v16sf)__A, \
+ __builtin_ia32_extractf32x4_mask((__v16sf)(__m512)(A), \
(I), \
(__v4sf)_mm_setzero_ps(), \
(__mmask8) -1); })
@@ -1850,18 +2475,18 @@ _mm512_cvtph_ps(__m256i __A)
}
static __inline __m512i __DEFAULT_FN_ATTRS
-_mm512_cvttps_epi32(__m512 a)
+_mm512_cvttps_epi32(__m512 __a)
{
return (__m512i)
- __builtin_ia32_cvttps2dq512_mask((__v16sf) a,
+ __builtin_ia32_cvttps2dq512_mask((__v16sf) __a,
(__v16si) _mm512_setzero_si512 (),
(__mmask16) -1, _MM_FROUND_CUR_DIRECTION);
}
static __inline __m256i __DEFAULT_FN_ATTRS
-_mm512_cvttpd_epi32(__m512d a)
+_mm512_cvttpd_epi32(__m512d __a)
{
- return (__m256i)__builtin_ia32_cvttpd2dq512_mask((__v8df) a,
+ return (__m256i)__builtin_ia32_cvttpd2dq512_mask((__v8df) __a,
(__v8si)_mm256_setzero_si256(),
(__mmask8) -1,
_MM_FROUND_CUR_DIRECTION);
@@ -2405,51 +3030,43 @@ _mm512_mask_cmpneq_epu64_mask(__mmask8 __u, __m512i __a, __m512i __b) {
}
#define _mm512_cmp_epi32_mask(a, b, p) __extension__ ({ \
- __m512i __a = (a); \
- __m512i __b = (b); \
- (__mmask16)__builtin_ia32_cmpd512_mask((__v16si)__a, (__v16si)__b, (p), \
+ (__mmask16)__builtin_ia32_cmpd512_mask((__v16si)(__m512i)(a), \
+ (__v16si)(__m512i)(b), (p), \
(__mmask16)-1); })
#define _mm512_cmp_epu32_mask(a, b, p) __extension__ ({ \
- __m512i __a = (a); \
- __m512i __b = (b); \
- (__mmask16)__builtin_ia32_ucmpd512_mask((__v16si)__a, (__v16si)__b, (p), \
+ (__mmask16)__builtin_ia32_ucmpd512_mask((__v16si)(__m512i)(a), \
+ (__v16si)(__m512i)(b), (p), \
(__mmask16)-1); })
#define _mm512_cmp_epi64_mask(a, b, p) __extension__ ({ \
- __m512i __a = (a); \
- __m512i __b = (b); \
- (__mmask8)__builtin_ia32_cmpq512_mask((__v8di)__a, (__v8di)__b, (p), \
+ (__mmask8)__builtin_ia32_cmpq512_mask((__v8di)(__m512i)(a), \
+ (__v8di)(__m512i)(b), (p), \
(__mmask8)-1); })
#define _mm512_cmp_epu64_mask(a, b, p) __extension__ ({ \
- __m512i __a = (a); \
- __m512i __b = (b); \
- (__mmask8)__builtin_ia32_ucmpq512_mask((__v8di)__a, (__v8di)__b, (p), \
+ (__mmask8)__builtin_ia32_ucmpq512_mask((__v8di)(__m512i)(a), \
+ (__v8di)(__m512i)(b), (p), \
(__mmask8)-1); })
#define _mm512_mask_cmp_epi32_mask(m, a, b, p) __extension__ ({ \
- __m512i __a = (a); \
- __m512i __b = (b); \
- (__mmask16)__builtin_ia32_cmpd512_mask((__v16si)__a, (__v16si)__b, (p), \
+ (__mmask16)__builtin_ia32_cmpd512_mask((__v16si)(__m512i)(a), \
+ (__v16si)(__m512i)(b), (p), \
(__mmask16)(m)); })
#define _mm512_mask_cmp_epu32_mask(m, a, b, p) __extension__ ({ \
- __m512i __a = (a); \
- __m512i __b = (b); \
- (__mmask16)__builtin_ia32_ucmpd512_mask((__v16si)__a, (__v16si)__b, (p), \
+ (__mmask16)__builtin_ia32_ucmpd512_mask((__v16si)(__m512i)(a), \
+ (__v16si)(__m512i)(b), (p), \
(__mmask16)(m)); })
#define _mm512_mask_cmp_epi64_mask(m, a, b, p) __extension__ ({ \
- __m512i __a = (a); \
- __m512i __b = (b); \
- (__mmask8)__builtin_ia32_cmpq512_mask((__v8di)__a, (__v8di)__b, (p), \
+ (__mmask8)__builtin_ia32_cmpq512_mask((__v8di)(__m512i)(a), \
+ (__v8di)(__m512i)(b), (p), \
(__mmask8)(m)); })
#define _mm512_mask_cmp_epu64_mask(m, a, b, p) __extension__ ({ \
- __m512i __a = (a); \
- __m512i __b = (b); \
- (__mmask8)__builtin_ia32_ucmpq512_mask((__v8di)__a, (__v8di)__b, (p), \
+ (__mmask8)__builtin_ia32_ucmpq512_mask((__v8di)(__m512i)(a), \
+ (__v8di)(__m512i)(b), (p), \
(__mmask8)(m)); })
#undef __DEFAULT_FN_ATTRS
diff --git a/contrib/llvm/tools/clang/lib/Headers/avx512vlbwintrin.h b/contrib/llvm/tools/clang/lib/Headers/avx512vlbwintrin.h
index 74ec17583096..b4542d69ab08 100644
--- a/contrib/llvm/tools/clang/lib/Headers/avx512vlbwintrin.h
+++ b/contrib/llvm/tools/clang/lib/Headers/avx512vlbwintrin.h
@@ -1,4 +1,4 @@
-/*===---- avx512vlbwintrin.h - AVX512VL and AVX512BW intrinsics ----------===
+/*===---- avx512vlbwintrin.h - AVX512VL and AVX512BW intrinsics ------------===
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
@@ -29,7 +29,7 @@
#define __AVX512VLBWINTRIN_H
/* Define the default attributes for the functions in this file. */
-#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__))
+#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("avx512vl,avx512bw")))
/* Integer compare */
@@ -1822,6 +1822,435 @@ _mm256_maskz_permutex2var_epi16 (__mmask16 __U, __m256i __A,
(__mmask16) __U);
}
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm_mask_maddubs_epi16 (__m128i __W, __mmask8 __U, __m128i __X, __m128i __Y) {
+ return (__m128i) __builtin_ia32_pmaddubsw128_mask ((__v16qi) __X,
+ (__v16qi) __Y,
+ (__v8hi) __W,
+ (__mmask8) __U);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm_maskz_maddubs_epi16 (__mmask8 __U, __m128i __X, __m128i __Y) {
+ return (__m128i) __builtin_ia32_pmaddubsw128_mask ((__v16qi) __X,
+ (__v16qi) __Y,
+ (__v8hi) _mm_setzero_si128(),
+ (__mmask8) __U);
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS
+_mm256_mask_maddubs_epi16 (__m256i __W, __mmask16 __U, __m256i __X,
+ __m256i __Y) {
+ return (__m256i) __builtin_ia32_pmaddubsw256_mask ((__v32qi) __X,
+ (__v32qi) __Y,
+ (__v16hi) __W,
+ (__mmask16) __U);
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS
+_mm256_maskz_maddubs_epi16 (__mmask16 __U, __m256i __X, __m256i __Y) {
+ return (__m256i) __builtin_ia32_pmaddubsw256_mask ((__v32qi) __X,
+ (__v32qi) __Y,
+ (__v16hi) _mm256_setzero_si256(),
+ (__mmask16) __U);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm_mask_madd_epi16 (__m128i __W, __mmask8 __U, __m128i __A,
+ __m128i __B) {
+ return (__m128i) __builtin_ia32_pmaddwd128_mask ((__v8hi) __A,
+ (__v8hi) __B,
+ (__v4si) __W,
+ (__mmask8) __U);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm_maskz_madd_epi16 (__mmask8 __U, __m128i __A, __m128i __B) {
+ return (__m128i) __builtin_ia32_pmaddwd128_mask ((__v8hi) __A,
+ (__v8hi) __B,
+ (__v4si) _mm_setzero_si128(),
+ (__mmask8) __U);
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS
+_mm256_mask_madd_epi16 (__m256i __W, __mmask8 __U, __m256i __A, __m256i __B) {
+ return (__m256i) __builtin_ia32_pmaddwd256_mask ((__v16hi) __A,
+ (__v16hi) __B,
+ (__v8si) __W,
+ (__mmask8) __U);
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS
+_mm256_maskz_madd_epi16 (__mmask8 __U, __m256i __A, __m256i __B) {
+ return (__m256i) __builtin_ia32_pmaddwd256_mask ((__v16hi) __A,
+ (__v16hi) __B,
+ (__v8si) _mm256_setzero_si256(),
+ (__mmask8) __U);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm_cvtsepi16_epi8 (__m128i __A) {
+ return (__m128i) __builtin_ia32_pmovswb128_mask ((__v8hi) __A,
+ (__v16qi) _mm_setzero_si128(),
+ (__mmask8) -1);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm_mask_cvtsepi16_epi8 (__m128i __O, __mmask8 __M, __m128i __A) {
+ return (__m128i) __builtin_ia32_pmovswb128_mask ((__v8hi) __A,
+ (__v16qi) __O,
+ __M);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm_maskz_cvtsepi16_epi8 (__mmask8 __M, __m128i __A) {
+ return (__m128i) __builtin_ia32_pmovswb128_mask ((__v8hi) __A,
+ (__v16qi) _mm_setzero_si128(),
+ __M);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm256_cvtsepi16_epi8 (__m256i __A) {
+ return (__m128i) __builtin_ia32_pmovswb256_mask ((__v16hi) __A,
+ (__v16qi) _mm_setzero_si128(),
+ (__mmask16) -1);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm256_mask_cvtsepi16_epi8 (__m128i __O, __mmask16 __M, __m256i __A) {
+ return (__m128i) __builtin_ia32_pmovswb256_mask ((__v16hi) __A,
+ (__v16qi) __O,
+ __M);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm256_maskz_cvtsepi16_epi8 (__mmask16 __M, __m256i __A) {
+ return (__m128i) __builtin_ia32_pmovswb256_mask ((__v16hi) __A,
+ (__v16qi) _mm_setzero_si128(),
+ __M);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm_cvtusepi16_epi8 (__m128i __A) {
+ return (__m128i) __builtin_ia32_pmovuswb128_mask ((__v8hi) __A,
+ (__v16qi) _mm_setzero_si128(),
+ (__mmask8) -1);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm_mask_cvtusepi16_epi8 (__m128i __O, __mmask8 __M, __m128i __A) {
+ return (__m128i) __builtin_ia32_pmovuswb128_mask ((__v8hi) __A,
+ (__v16qi) __O,
+ __M);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm_maskz_cvtusepi16_epi8 (__mmask8 __M, __m128i __A) {
+ return (__m128i) __builtin_ia32_pmovuswb128_mask ((__v8hi) __A,
+ (__v16qi) _mm_setzero_si128(),
+ __M);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm256_cvtusepi16_epi8 (__m256i __A) {
+ return (__m128i) __builtin_ia32_pmovuswb256_mask ((__v16hi) __A,
+ (__v16qi) _mm_setzero_si128(),
+ (__mmask16) -1);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm256_mask_cvtusepi16_epi8 (__m128i __O, __mmask16 __M, __m256i __A) {
+ return (__m128i) __builtin_ia32_pmovuswb256_mask ((__v16hi) __A,
+ (__v16qi) __O,
+ __M);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm256_maskz_cvtusepi16_epi8 (__mmask16 __M, __m256i __A) {
+ return (__m128i) __builtin_ia32_pmovuswb256_mask ((__v16hi) __A,
+ (__v16qi) _mm_setzero_si128(),
+ __M);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm_cvtepi16_epi8 (__m128i __A) {
+
+ return (__m128i) __builtin_ia32_pmovwb128_mask ((__v8hi) __A,
+ (__v16qi) _mm_setzero_si128(),
+ (__mmask8) -1);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm_mask_cvtepi16_epi8 (__m128i __O, __mmask8 __M, __m128i __A) {
+ return (__m128i) __builtin_ia32_pmovwb128_mask ((__v8hi) __A,
+ (__v16qi) __O,
+ __M);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm_maskz_cvtepi16_epi8 (__mmask8 __M, __m128i __A) {
+ return (__m128i) __builtin_ia32_pmovwb128_mask ((__v8hi) __A,
+ (__v16qi) _mm_setzero_si128(),
+ __M);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm256_cvtepi16_epi8 (__m256i __A) {
+ return (__m128i) __builtin_ia32_pmovwb256_mask ((__v16hi) __A,
+ (__v16qi) _mm_setzero_si128(),
+ (__mmask16) -1);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm256_mask_cvtepi16_epi8 (__m128i __O, __mmask16 __M, __m256i __A) {
+ return (__m128i) __builtin_ia32_pmovwb256_mask ((__v16hi) __A,
+ (__v16qi) __O,
+ __M);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm256_maskz_cvtepi16_epi8 (__mmask16 __M, __m256i __A) {
+ return (__m128i) __builtin_ia32_pmovwb256_mask ((__v16hi) __A,
+ (__v16qi) _mm_setzero_si128(),
+ __M);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm_mask_mulhrs_epi16 (__m128i __W, __mmask8 __U, __m128i __X, __m128i __Y) {
+ return (__m128i) __builtin_ia32_pmulhrsw128_mask ((__v8hi) __X,
+ (__v8hi) __Y,
+ (__v8hi) __W,
+ (__mmask8) __U);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm_maskz_mulhrs_epi16 (__mmask8 __U, __m128i __X, __m128i __Y) {
+ return (__m128i) __builtin_ia32_pmulhrsw128_mask ((__v8hi) __X,
+ (__v8hi) __Y,
+ (__v8hi) _mm_setzero_si128(),
+ (__mmask8) __U);
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS
+_mm256_mask_mulhrs_epi16 (__m256i __W, __mmask16 __U, __m256i __X, __m256i __Y) {
+ return (__m256i) __builtin_ia32_pmulhrsw256_mask ((__v16hi) __X,
+ (__v16hi) __Y,
+ (__v16hi) __W,
+ (__mmask16) __U);
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS
+_mm256_maskz_mulhrs_epi16 (__mmask16 __U, __m256i __X, __m256i __Y) {
+ return (__m256i) __builtin_ia32_pmulhrsw256_mask ((__v16hi) __X,
+ (__v16hi) __Y,
+ (__v16hi) _mm256_setzero_si256(),
+ (__mmask16) __U);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm_mask_mulhi_epu16 (__m128i __W, __mmask8 __U, __m128i __A,
+ __m128i __B) {
+ return (__m128i) __builtin_ia32_pmulhuw128_mask ((__v8hi) __A,
+ (__v8hi) __B,
+ (__v8hi) __W,
+ (__mmask8) __U);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm_maskz_mulhi_epu16 (__mmask8 __U, __m128i __A, __m128i __B) {
+ return (__m128i) __builtin_ia32_pmulhuw128_mask ((__v8hi) __A,
+ (__v8hi) __B,
+ (__v8hi) _mm_setzero_si128(),
+ (__mmask8) __U);
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS
+_mm256_mask_mulhi_epu16 (__m256i __W, __mmask16 __U, __m256i __A,
+ __m256i __B) {
+ return (__m256i) __builtin_ia32_pmulhuw256_mask ((__v16hi) __A,
+ (__v16hi) __B,
+ (__v16hi) __W,
+ (__mmask16) __U);
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS
+_mm256_maskz_mulhi_epu16 (__mmask16 __U, __m256i __A, __m256i __B) {
+ return (__m256i) __builtin_ia32_pmulhuw256_mask ((__v16hi) __A,
+ (__v16hi) __B,
+ (__v16hi) _mm256_setzero_si256(),
+ (__mmask16) __U);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm_mask_mulhi_epi16 (__m128i __W, __mmask8 __U, __m128i __A,
+ __m128i __B) {
+ return (__m128i) __builtin_ia32_pmulhw128_mask ((__v8hi) __A,
+ (__v8hi) __B,
+ (__v8hi) __W,
+ (__mmask8) __U);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm_maskz_mulhi_epi16 (__mmask8 __U, __m128i __A, __m128i __B) {
+ return (__m128i) __builtin_ia32_pmulhw128_mask ((__v8hi) __A,
+ (__v8hi) __B,
+ (__v8hi) _mm_setzero_si128(),
+ (__mmask8) __U);
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS
+_mm256_mask_mulhi_epi16 (__m256i __W, __mmask16 __U, __m256i __A,
+ __m256i __B) {
+ return (__m256i) __builtin_ia32_pmulhw256_mask ((__v16hi) __A,
+ (__v16hi) __B,
+ (__v16hi) __W,
+ (__mmask16) __U);
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS
+_mm256_maskz_mulhi_epi16 (__mmask16 __U, __m256i __A, __m256i __B) {
+ return (__m256i) __builtin_ia32_pmulhw256_mask ((__v16hi) __A,
+ (__v16hi) __B,
+ (__v16hi) _mm256_setzero_si256(),
+ (__mmask16) __U);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm_mask_unpackhi_epi8 (__m128i __W, __mmask16 __U, __m128i __A,
+ __m128i __B) {
+ return (__m128i) __builtin_ia32_punpckhbw128_mask ((__v16qi) __A,
+ (__v16qi) __B,
+ (__v16qi) __W,
+ (__mmask16) __U);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm_maskz_unpackhi_epi8 (__mmask16 __U, __m128i __A, __m128i __B) {
+ return (__m128i) __builtin_ia32_punpckhbw128_mask ((__v16qi) __A,
+ (__v16qi) __B,
+ (__v16qi) _mm_setzero_si128(),
+ (__mmask16) __U);
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS
+_mm256_mask_unpackhi_epi8 (__m256i __W, __mmask32 __U, __m256i __A,
+ __m256i __B) {
+ return (__m256i) __builtin_ia32_punpckhbw256_mask ((__v32qi) __A,
+ (__v32qi) __B,
+ (__v32qi) __W,
+ (__mmask32) __U);
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS
+_mm256_maskz_unpackhi_epi8 (__mmask32 __U, __m256i __A, __m256i __B) {
+ return (__m256i) __builtin_ia32_punpckhbw256_mask ((__v32qi) __A,
+ (__v32qi) __B,
+ (__v32qi) _mm256_setzero_si256(),
+ (__mmask32) __U);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm_mask_unpackhi_epi16 (__m128i __W, __mmask8 __U, __m128i __A,
+ __m128i __B) {
+ return (__m128i) __builtin_ia32_punpckhwd128_mask ((__v8hi) __A,
+ (__v8hi) __B,
+ (__v8hi) __W,
+ (__mmask8) __U);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm_maskz_unpackhi_epi16 (__mmask8 __U, __m128i __A, __m128i __B) {
+ return (__m128i) __builtin_ia32_punpckhwd128_mask ((__v8hi) __A,
+ (__v8hi) __B,
+ (__v8hi) _mm_setzero_si128(),
+ (__mmask8) __U);
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS
+_mm256_mask_unpackhi_epi16 (__m256i __W, __mmask16 __U, __m256i __A,
+ __m256i __B) {
+ return (__m256i) __builtin_ia32_punpckhwd256_mask ((__v16hi) __A,
+ (__v16hi) __B,
+ (__v16hi) __W,
+ (__mmask16) __U);
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS
+_mm256_maskz_unpackhi_epi16 (__mmask16 __U, __m256i __A, __m256i __B) {
+ return (__m256i) __builtin_ia32_punpckhwd256_mask ((__v16hi) __A,
+ (__v16hi) __B,
+ (__v16hi) _mm256_setzero_si256(),
+ (__mmask16) __U);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm_mask_unpacklo_epi8 (__m128i __W, __mmask16 __U, __m128i __A,
+ __m128i __B) {
+ return (__m128i) __builtin_ia32_punpcklbw128_mask ((__v16qi) __A,
+ (__v16qi) __B,
+ (__v16qi) __W,
+ (__mmask16) __U);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm_maskz_unpacklo_epi8 (__mmask16 __U, __m128i __A, __m128i __B) {
+ return (__m128i) __builtin_ia32_punpcklbw128_mask ((__v16qi) __A,
+ (__v16qi) __B,
+ (__v16qi) _mm_setzero_si128(),
+ (__mmask16) __U);
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS
+_mm256_mask_unpacklo_epi8 (__m256i __W, __mmask32 __U, __m256i __A,
+ __m256i __B) {
+ return (__m256i) __builtin_ia32_punpcklbw256_mask ((__v32qi) __A,
+ (__v32qi) __B,
+ (__v32qi) __W,
+ (__mmask32) __U);
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS
+_mm256_maskz_unpacklo_epi8 (__mmask32 __U, __m256i __A, __m256i __B) {
+ return (__m256i) __builtin_ia32_punpcklbw256_mask ((__v32qi) __A,
+ (__v32qi) __B,
+ (__v32qi) _mm256_setzero_si256(),
+ (__mmask32) __U);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm_mask_unpacklo_epi16 (__m128i __W, __mmask8 __U, __m128i __A,
+ __m128i __B) {
+ return (__m128i) __builtin_ia32_punpcklwd128_mask ((__v8hi) __A,
+ (__v8hi) __B,
+ (__v8hi) __W,
+ (__mmask8) __U);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm_maskz_unpacklo_epi16 (__mmask8 __U, __m128i __A, __m128i __B) {
+ return (__m128i) __builtin_ia32_punpcklwd128_mask ((__v8hi) __A,
+ (__v8hi) __B,
+ (__v8hi) _mm_setzero_si128(),
+ (__mmask8) __U);
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS
+_mm256_mask_unpacklo_epi16 (__m256i __W, __mmask16 __U, __m256i __A,
+ __m256i __B) {
+ return (__m256i) __builtin_ia32_punpcklwd256_mask ((__v16hi) __A,
+ (__v16hi) __B,
+ (__v16hi) __W,
+ (__mmask16) __U);
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS
+_mm256_maskz_unpacklo_epi16 (__mmask16 __U, __m256i __A, __m256i __B) {
+ return (__m256i) __builtin_ia32_punpcklwd256_mask ((__v16hi) __A,
+ (__v16hi) __B,
+ (__v16hi) _mm256_setzero_si256(),
+ (__mmask16) __U);
+}
+
#define _mm_cmp_epi8_mask(a, b, p) __extension__ ({ \
(__mmask16)__builtin_ia32_cmpb128_mask((__v16qi)(__m128i)(a), \
(__v16qi)(__m128i)(b), \
diff --git a/contrib/llvm/tools/clang/lib/Headers/avx512vldqintrin.h b/contrib/llvm/tools/clang/lib/Headers/avx512vldqintrin.h
index 1edf29d128ee..dfd858e013da 100644
--- a/contrib/llvm/tools/clang/lib/Headers/avx512vldqintrin.h
+++ b/contrib/llvm/tools/clang/lib/Headers/avx512vldqintrin.h
@@ -1,4 +1,4 @@
-/*===---- avx512vldqintrin.h - AVX512VL and AVX512DQ intrinsics ---------------------------===
+/*===---- avx512vldqintrin.h - AVX512VL and AVX512DQ intrinsics ------------===
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
@@ -20,7 +20,7 @@
*
*===-----------------------------------------------------------------------===
*/
-
+
#ifndef __IMMINTRIN_H
#error "Never use <avx512vldqintrin.h> directly; include <immintrin.h> instead."
#endif
@@ -29,7 +29,7 @@
#define __AVX512VLDQINTRIN_H
/* Define the default attributes for the functions in this file. */
-#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__))
+#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("avx512vl,avx512dq")))
static __inline__ __m256i __DEFAULT_FN_ATTRS
_mm256_mullo_epi64 (__m256i __A, __m256i __B) {
@@ -348,6 +348,606 @@ _mm_maskz_or_ps (__mmask8 __U, __m128 __A, __m128 __B) {
(__mmask8) __U);
}
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm_cvtpd_epi64 (__m128d __A) {
+ return (__m128i) __builtin_ia32_cvtpd2qq128_mask ((__v2df) __A,
+ (__v2di) _mm_setzero_si128(),
+ (__mmask8) -1);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm_mask_cvtpd_epi64 (__m128i __W, __mmask8 __U, __m128d __A) {
+ return (__m128i) __builtin_ia32_cvtpd2qq128_mask ((__v2df) __A,
+ (__v2di) __W,
+ (__mmask8) __U);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm_maskz_cvtpd_epi64 (__mmask8 __U, __m128d __A) {
+ return (__m128i) __builtin_ia32_cvtpd2qq128_mask ((__v2df) __A,
+ (__v2di) _mm_setzero_si128(),
+ (__mmask8) __U);
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS
+_mm256_cvtpd_epi64 (__m256d __A) {
+ return (__m256i) __builtin_ia32_cvtpd2qq256_mask ((__v4df) __A,
+ (__v4di) _mm256_setzero_si256(),
+ (__mmask8) -1);
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS
+_mm256_mask_cvtpd_epi64 (__m256i __W, __mmask8 __U, __m256d __A) {
+ return (__m256i) __builtin_ia32_cvtpd2qq256_mask ((__v4df) __A,
+ (__v4di) __W,
+ (__mmask8) __U);
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS
+_mm256_maskz_cvtpd_epi64 (__mmask8 __U, __m256d __A) {
+ return (__m256i) __builtin_ia32_cvtpd2qq256_mask ((__v4df) __A,
+ (__v4di) _mm256_setzero_si256(),
+ (__mmask8) __U);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm_cvtpd_epu64 (__m128d __A) {
+ return (__m128i) __builtin_ia32_cvtpd2uqq128_mask ((__v2df) __A,
+ (__v2di) _mm_setzero_si128(),
+ (__mmask8) -1);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm_mask_cvtpd_epu64 (__m128i __W, __mmask8 __U, __m128d __A) {
+ return (__m128i) __builtin_ia32_cvtpd2uqq128_mask ((__v2df) __A,
+ (__v2di) __W,
+ (__mmask8) __U);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm_maskz_cvtpd_epu64 (__mmask8 __U, __m128d __A) {
+ return (__m128i) __builtin_ia32_cvtpd2uqq128_mask ((__v2df) __A,
+ (__v2di) _mm_setzero_si128(),
+ (__mmask8) __U);
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS
+_mm256_cvtpd_epu64 (__m256d __A) {
+ return (__m256i) __builtin_ia32_cvtpd2uqq256_mask ((__v4df) __A,
+ (__v4di) _mm256_setzero_si256(),
+ (__mmask8) -1);
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS
+_mm256_mask_cvtpd_epu64 (__m256i __W, __mmask8 __U, __m256d __A) {
+ return (__m256i) __builtin_ia32_cvtpd2uqq256_mask ((__v4df) __A,
+ (__v4di) __W,
+ (__mmask8) __U);
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS
+_mm256_maskz_cvtpd_epu64 (__mmask8 __U, __m256d __A) {
+ return (__m256i) __builtin_ia32_cvtpd2uqq256_mask ((__v4df) __A,
+ (__v4di) _mm256_setzero_si256(),
+ (__mmask8) __U);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm_cvtps_epi64 (__m128 __A) {
+ return (__m128i) __builtin_ia32_cvtps2qq128_mask ((__v4sf) __A,
+ (__v2di) _mm_setzero_si128(),
+ (__mmask8) -1);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm_mask_cvtps_epi64 (__m128i __W, __mmask8 __U, __m128 __A) {
+ return (__m128i) __builtin_ia32_cvtps2qq128_mask ((__v4sf) __A,
+ (__v2di) __W,
+ (__mmask8) __U);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm_maskz_cvtps_epi64 (__mmask8 __U, __m128 __A) {
+ return (__m128i) __builtin_ia32_cvtps2qq128_mask ((__v4sf) __A,
+ (__v2di) _mm_setzero_si128(),
+ (__mmask8) __U);
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS
+_mm256_cvtps_epi64 (__m128 __A) {
+ return (__m256i) __builtin_ia32_cvtps2qq256_mask ((__v4sf) __A,
+ (__v4di) _mm256_setzero_si256(),
+ (__mmask8) -1);
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS
+_mm256_mask_cvtps_epi64 (__m256i __W, __mmask8 __U, __m128 __A) {
+ return (__m256i) __builtin_ia32_cvtps2qq256_mask ((__v4sf) __A,
+ (__v4di) __W,
+ (__mmask8) __U);
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS
+_mm256_maskz_cvtps_epi64 (__mmask8 __U, __m128 __A) {
+ return (__m256i) __builtin_ia32_cvtps2qq256_mask ((__v4sf) __A,
+ (__v4di) _mm256_setzero_si256(),
+ (__mmask8) __U);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm_cvtps_epu64 (__m128 __A) {
+ return (__m128i) __builtin_ia32_cvtps2uqq128_mask ((__v4sf) __A,
+ (__v2di) _mm_setzero_si128(),
+ (__mmask8) -1);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm_mask_cvtps_epu64 (__m128i __W, __mmask8 __U, __m128 __A) {
+ return (__m128i) __builtin_ia32_cvtps2uqq128_mask ((__v4sf) __A,
+ (__v2di) __W,
+ (__mmask8) __U);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm_maskz_cvtps_epu64 (__mmask8 __U, __m128 __A) {
+ return (__m128i) __builtin_ia32_cvtps2uqq128_mask ((__v4sf) __A,
+ (__v2di) _mm_setzero_si128(),
+ (__mmask8) __U);
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS
+_mm256_cvtps_epu64 (__m128 __A) {
+ return (__m256i) __builtin_ia32_cvtps2uqq256_mask ((__v4sf) __A,
+ (__v4di) _mm256_setzero_si256(),
+ (__mmask8) -1);
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS
+_mm256_mask_cvtps_epu64 (__m256i __W, __mmask8 __U, __m128 __A) {
+ return (__m256i) __builtin_ia32_cvtps2uqq256_mask ((__v4sf) __A,
+ (__v4di) __W,
+ (__mmask8) __U);
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS
+_mm256_maskz_cvtps_epu64 (__mmask8 __U, __m128 __A) {
+ return (__m256i) __builtin_ia32_cvtps2uqq256_mask ((__v4sf) __A,
+ (__v4di) _mm256_setzero_si256(),
+ (__mmask8) __U);
+}
+
+static __inline__ __m128d __DEFAULT_FN_ATTRS
+_mm_cvtepi64_pd (__m128i __A) {
+ return (__m128d) __builtin_ia32_cvtqq2pd128_mask ((__v2di) __A,
+ (__v2df) _mm_setzero_pd(),
+ (__mmask8) -1);
+}
+
+static __inline__ __m128d __DEFAULT_FN_ATTRS
+_mm_mask_cvtepi64_pd (__m128d __W, __mmask8 __U, __m128i __A) {
+ return (__m128d) __builtin_ia32_cvtqq2pd128_mask ((__v2di) __A,
+ (__v2df) __W,
+ (__mmask8) __U);
+}
+
+static __inline__ __m128d __DEFAULT_FN_ATTRS
+_mm_maskz_cvtepi64_pd (__mmask8 __U, __m128i __A) {
+ return (__m128d) __builtin_ia32_cvtqq2pd128_mask ((__v2di) __A,
+ (__v2df) _mm_setzero_pd(),
+ (__mmask8) __U);
+}
+
+static __inline__ __m256d __DEFAULT_FN_ATTRS
+_mm256_cvtepi64_pd (__m256i __A) {
+ return (__m256d) __builtin_ia32_cvtqq2pd256_mask ((__v4di) __A,
+ (__v4df) _mm256_setzero_pd(),
+ (__mmask8) -1);
+}
+
+static __inline__ __m256d __DEFAULT_FN_ATTRS
+_mm256_mask_cvtepi64_pd (__m256d __W, __mmask8 __U, __m256i __A) {
+ return (__m256d) __builtin_ia32_cvtqq2pd256_mask ((__v4di) __A,
+ (__v4df) __W,
+ (__mmask8) __U);
+}
+
+static __inline__ __m256d __DEFAULT_FN_ATTRS
+_mm256_maskz_cvtepi64_pd (__mmask8 __U, __m256i __A) {
+ return (__m256d) __builtin_ia32_cvtqq2pd256_mask ((__v4di) __A,
+ (__v4df) _mm256_setzero_pd(),
+ (__mmask8) __U);
+}
+
+static __inline__ __m128 __DEFAULT_FN_ATTRS
+_mm_cvtepi64_ps (__m128i __A) {
+ return (__m128) __builtin_ia32_cvtqq2ps128_mask ((__v2di) __A,
+ (__v4sf) _mm_setzero_ps(),
+ (__mmask8) -1);
+}
+
+static __inline__ __m128 __DEFAULT_FN_ATTRS
+_mm_mask_cvtepi64_ps (__m128 __W, __mmask8 __U, __m128i __A) {
+ return (__m128) __builtin_ia32_cvtqq2ps128_mask ((__v2di) __A,
+ (__v4sf) __W,
+ (__mmask8) __U);
+}
+
+static __inline__ __m128 __DEFAULT_FN_ATTRS
+_mm_maskz_cvtepi64_ps (__mmask8 __U, __m128i __A) {
+ return (__m128) __builtin_ia32_cvtqq2ps128_mask ((__v2di) __A,
+ (__v4sf) _mm_setzero_ps(),
+ (__mmask8) __U);
+}
+
+static __inline__ __m128 __DEFAULT_FN_ATTRS
+_mm256_cvtepi64_ps (__m256i __A) {
+ return (__m128) __builtin_ia32_cvtqq2ps256_mask ((__v4di) __A,
+ (__v4sf) _mm_setzero_ps(),
+ (__mmask8) -1);
+}
+
+static __inline__ __m128 __DEFAULT_FN_ATTRS
+_mm256_mask_cvtepi64_ps (__m128 __W, __mmask8 __U, __m256i __A) {
+ return (__m128) __builtin_ia32_cvtqq2ps256_mask ((__v4di) __A,
+ (__v4sf) __W,
+ (__mmask8) __U);
+}
+
+static __inline__ __m128 __DEFAULT_FN_ATTRS
+_mm256_maskz_cvtepi64_ps (__mmask8 __U, __m256i __A) {
+ return (__m128) __builtin_ia32_cvtqq2ps256_mask ((__v4di) __A,
+ (__v4sf) _mm_setzero_ps(),
+ (__mmask8) __U);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm_cvttpd_epi64 (__m128d __A) {
+ return (__m128i) __builtin_ia32_cvttpd2qq128_mask ((__v2df) __A,
+ (__v2di) _mm_setzero_si128(),
+ (__mmask8) -1);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm_mask_cvttpd_epi64 (__m128i __W, __mmask8 __U, __m128d __A) {
+ return (__m128i) __builtin_ia32_cvttpd2qq128_mask ((__v2df) __A,
+ (__v2di) __W,
+ (__mmask8) __U);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm_maskz_cvttpd_epi64 (__mmask8 __U, __m128d __A) {
+ return (__m128i) __builtin_ia32_cvttpd2qq128_mask ((__v2df) __A,
+ (__v2di) _mm_setzero_si128(),
+ (__mmask8) __U);
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS
+_mm256_cvttpd_epi64 (__m256d __A) {
+ return (__m256i) __builtin_ia32_cvttpd2qq256_mask ((__v4df) __A,
+ (__v4di) _mm256_setzero_si256(),
+ (__mmask8) -1);
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS
+_mm256_mask_cvttpd_epi64 (__m256i __W, __mmask8 __U, __m256d __A) {
+ return (__m256i) __builtin_ia32_cvttpd2qq256_mask ((__v4df) __A,
+ (__v4di) __W,
+ (__mmask8) __U);
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS
+_mm256_maskz_cvttpd_epi64 (__mmask8 __U, __m256d __A) {
+ return (__m256i) __builtin_ia32_cvttpd2qq256_mask ((__v4df) __A,
+ (__v4di) _mm256_setzero_si256(),
+ (__mmask8) __U);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm_cvttpd_epu64 (__m128d __A) {
+ return (__m128i) __builtin_ia32_cvttpd2uqq128_mask ((__v2df) __A,
+ (__v2di) _mm_setzero_si128(),
+ (__mmask8) -1);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm_mask_cvttpd_epu64 (__m128i __W, __mmask8 __U, __m128d __A) {
+ return (__m128i) __builtin_ia32_cvttpd2uqq128_mask ((__v2df) __A,
+ (__v2di) __W,
+ (__mmask8) __U);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm_maskz_cvttpd_epu64 (__mmask8 __U, __m128d __A) {
+ return (__m128i) __builtin_ia32_cvttpd2uqq128_mask ((__v2df) __A,
+ (__v2di) _mm_setzero_si128(),
+ (__mmask8) __U);
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS
+_mm256_cvttpd_epu64 (__m256d __A) {
+ return (__m256i) __builtin_ia32_cvttpd2uqq256_mask ((__v4df) __A,
+ (__v4di) _mm256_setzero_si256(),
+ (__mmask8) -1);
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS
+_mm256_mask_cvttpd_epu64 (__m256i __W, __mmask8 __U, __m256d __A) {
+ return (__m256i) __builtin_ia32_cvttpd2uqq256_mask ((__v4df) __A,
+ (__v4di) __W,
+ (__mmask8) __U);
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS
+_mm256_maskz_cvttpd_epu64 (__mmask8 __U, __m256d __A) {
+ return (__m256i) __builtin_ia32_cvttpd2uqq256_mask ((__v4df) __A,
+ (__v4di) _mm256_setzero_si256(),
+ (__mmask8) __U);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm_cvttps_epi64 (__m128 __A) {
+ return (__m128i) __builtin_ia32_cvttps2qq128_mask ((__v4sf) __A,
+ (__v2di) _mm_setzero_si128(),
+ (__mmask8) -1);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm_mask_cvttps_epi64 (__m128i __W, __mmask8 __U, __m128 __A) {
+ return (__m128i) __builtin_ia32_cvttps2qq128_mask ((__v4sf) __A,
+ (__v2di) __W,
+ (__mmask8) __U);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm_maskz_cvttps_epi64 (__mmask8 __U, __m128 __A) {
+ return (__m128i) __builtin_ia32_cvttps2qq128_mask ((__v4sf) __A,
+ (__v2di) _mm_setzero_si128(),
+ (__mmask8) __U);
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS
+_mm256_cvttps_epi64 (__m128 __A) {
+ return (__m256i) __builtin_ia32_cvttps2qq256_mask ((__v4sf) __A,
+ (__v4di) _mm256_setzero_si256(),
+ (__mmask8) -1);
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS
+_mm256_mask_cvttps_epi64 (__m256i __W, __mmask8 __U, __m128 __A) {
+ return (__m256i) __builtin_ia32_cvttps2qq256_mask ((__v4sf) __A,
+ (__v4di) __W,
+ (__mmask8) __U);
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS
+_mm256_maskz_cvttps_epi64 (__mmask8 __U, __m128 __A) {
+ return (__m256i) __builtin_ia32_cvttps2qq256_mask ((__v4sf) __A,
+ (__v4di) _mm256_setzero_si256(),
+ (__mmask8) __U);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm_cvttps_epu64 (__m128 __A) {
+ return (__m128i) __builtin_ia32_cvttps2uqq128_mask ((__v4sf) __A,
+ (__v2di) _mm_setzero_si128(),
+ (__mmask8) -1);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm_mask_cvttps_epu64 (__m128i __W, __mmask8 __U, __m128 __A) {
+ return (__m128i) __builtin_ia32_cvttps2uqq128_mask ((__v4sf) __A,
+ (__v2di) __W,
+ (__mmask8) __U);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm_maskz_cvttps_epu64 (__mmask8 __U, __m128 __A) {
+ return (__m128i) __builtin_ia32_cvttps2uqq128_mask ((__v4sf) __A,
+ (__v2di) _mm_setzero_si128(),
+ (__mmask8) __U);
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS
+_mm256_cvttps_epu64 (__m128 __A) {
+ return (__m256i) __builtin_ia32_cvttps2uqq256_mask ((__v4sf) __A,
+ (__v4di) _mm256_setzero_si256(),
+ (__mmask8) -1);
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS
+_mm256_mask_cvttps_epu64 (__m256i __W, __mmask8 __U, __m128 __A) {
+ return (__m256i) __builtin_ia32_cvttps2uqq256_mask ((__v4sf) __A,
+ (__v4di) __W,
+ (__mmask8) __U);
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS
+_mm256_maskz_cvttps_epu64 (__mmask8 __U, __m128 __A) {
+ return (__m256i) __builtin_ia32_cvttps2uqq256_mask ((__v4sf) __A,
+ (__v4di) _mm256_setzero_si256(),
+ (__mmask8) __U);
+}
+
+static __inline__ __m128d __DEFAULT_FN_ATTRS
+_mm_cvtepu64_pd (__m128i __A) {
+ return (__m128d) __builtin_ia32_cvtuqq2pd128_mask ((__v2di) __A,
+ (__v2df) _mm_setzero_pd(),
+ (__mmask8) -1);
+}
+
+static __inline__ __m128d __DEFAULT_FN_ATTRS
+_mm_mask_cvtepu64_pd (__m128d __W, __mmask8 __U, __m128i __A) {
+ return (__m128d) __builtin_ia32_cvtuqq2pd128_mask ((__v2di) __A,
+ (__v2df) __W,
+ (__mmask8) __U);
+}
+
+static __inline__ __m128d __DEFAULT_FN_ATTRS
+_mm_maskz_cvtepu64_pd (__mmask8 __U, __m128i __A) {
+ return (__m128d) __builtin_ia32_cvtuqq2pd128_mask ((__v2di) __A,
+ (__v2df) _mm_setzero_pd(),
+ (__mmask8) __U);
+}
+
+static __inline__ __m256d __DEFAULT_FN_ATTRS
+_mm256_cvtepu64_pd (__m256i __A) {
+ return (__m256d) __builtin_ia32_cvtuqq2pd256_mask ((__v4di) __A,
+ (__v4df) _mm256_setzero_pd(),
+ (__mmask8) -1);
+}
+
+static __inline__ __m256d __DEFAULT_FN_ATTRS
+_mm256_mask_cvtepu64_pd (__m256d __W, __mmask8 __U, __m256i __A) {
+ return (__m256d) __builtin_ia32_cvtuqq2pd256_mask ((__v4di) __A,
+ (__v4df) __W,
+ (__mmask8) __U);
+}
+
+static __inline__ __m256d __DEFAULT_FN_ATTRS
+_mm256_maskz_cvtepu64_pd (__mmask8 __U, __m256i __A) {
+ return (__m256d) __builtin_ia32_cvtuqq2pd256_mask ((__v4di) __A,
+ (__v4df) _mm256_setzero_pd(),
+ (__mmask8) __U);
+}
+
+static __inline__ __m128 __DEFAULT_FN_ATTRS
+_mm_cvtepu64_ps (__m128i __A) {
+ return (__m128) __builtin_ia32_cvtuqq2ps128_mask ((__v2di) __A,
+ (__v4sf) _mm_setzero_ps(),
+ (__mmask8) -1);
+}
+
+static __inline__ __m128 __DEFAULT_FN_ATTRS
+_mm_mask_cvtepu64_ps (__m128 __W, __mmask8 __U, __m128i __A) {
+ return (__m128) __builtin_ia32_cvtuqq2ps128_mask ((__v2di) __A,
+ (__v4sf) __W,
+ (__mmask8) __U);
+}
+
+static __inline__ __m128 __DEFAULT_FN_ATTRS
+_mm_maskz_cvtepu64_ps (__mmask8 __U, __m128i __A) {
+ return (__m128) __builtin_ia32_cvtuqq2ps128_mask ((__v2di) __A,
+ (__v4sf) _mm_setzero_ps(),
+ (__mmask8) __U);
+}
+
+static __inline__ __m128 __DEFAULT_FN_ATTRS
+_mm256_cvtepu64_ps (__m256i __A) {
+ return (__m128) __builtin_ia32_cvtuqq2ps256_mask ((__v4di) __A,
+ (__v4sf) _mm_setzero_ps(),
+ (__mmask8) -1);
+}
+
+static __inline__ __m128 __DEFAULT_FN_ATTRS
+_mm256_mask_cvtepu64_ps (__m128 __W, __mmask8 __U, __m256i __A) {
+ return (__m128) __builtin_ia32_cvtuqq2ps256_mask ((__v4di) __A,
+ (__v4sf) __W,
+ (__mmask8) __U);
+}
+
+static __inline__ __m128 __DEFAULT_FN_ATTRS
+_mm256_maskz_cvtepu64_ps (__mmask8 __U, __m256i __A) {
+ return (__m128) __builtin_ia32_cvtuqq2ps256_mask ((__v4di) __A,
+ (__v4sf) _mm_setzero_ps(),
+ (__mmask8) __U);
+}
+
+#define _mm_range_pd(__A, __B, __C) __extension__ ({ \
+ (__m128d) __builtin_ia32_rangepd128_mask ((__v2df) __A, (__v2df) __B, __C, \
+ (__v2df) _mm_setzero_pd(), (__mmask8) -1); })
+
+#define _mm_mask_range_pd(__W, __U, __A, __B, __C) __extension__ ({ \
+ (__m128d) __builtin_ia32_rangepd128_mask ((__v2df) __A, (__v2df) __B, __C, \
+ (__v2df) __W, (__mmask8) __U); })
+
+#define _mm_maskz_range_pd(__U, __A, __B, __C) __extension__ ({ \
+ (__m128d) __builtin_ia32_rangepd128_mask ((__v2df) __A, (__v2df) __B, __C, \
+ (__v2df) _mm_setzero_pd(), (__mmask8) __U); })
+
+#define _mm256_range_pd(__A, __B, __C) __extension__ ({ \
+ (__m256d) __builtin_ia32_rangepd256_mask ((__v4df) __A, (__v4df) __B, __C, \
+ (__v4df) _mm256_setzero_pd(), (__mmask8) -1); })
+
+#define _mm256_mask_range_pd(__W, __U, __A, __B, __C) __extension__ ({ \
+ (__m256d) __builtin_ia32_rangepd256_mask ((__v4df) __A, (__v4df) __B, __C, \
+ (__v4df) __W, (__mmask8) __U); })
+
+#define _mm256_maskz_range_pd(__U, __A, __B, __C) __extension__ ({ \
+ (__m256d) __builtin_ia32_rangepd256_mask ((__v4df) __A, (__v4df) __B, __C, \
+ (__v4df) _mm256_setzero_pd(), (__mmask8) __U); })
+
+#define _mm_range_ps(__A, __B, __C) __extension__ ({ \
+ (__m128) __builtin_ia32_rangeps128_mask ((__v4sf) __A, (__v4sf) __B, __C, \
+ (__v4sf) _mm_setzero_ps(), (__mmask8) -1); })
+
+#define _mm_mask_range_ps(__W, __U, __A, __B, __C) __extension__ ({ \
+ (__m128) __builtin_ia32_rangeps128_mask ((__v4sf) __A, (__v4sf) __B, __C, \
+ (__v4sf) __W, (__mmask8) __U); })
+
+#define _mm_maskz_range_ps(__U, __A, __B, __C) __extension__ ({ \
+ (__m128) __builtin_ia32_rangeps128_mask ((__v4sf) __A, (__v4sf) __B, __C, \
+ (__v4sf) _mm_setzero_ps(), (__mmask8) __U); })
+
+#define _mm256_range_ps(__A, __B, __C) __extension__ ({ \
+ (__m256) __builtin_ia32_rangeps256_mask ((__v8sf) __A, (__v8sf) __B, __C, \
+ (__v8sf) _mm256_setzero_ps(), (__mmask8) -1); })
+
+#define _mm256_mask_range_ps(__W, __U, __A, __B, __C) __extension__ ({ \
+ (__m256) __builtin_ia32_rangeps256_mask ((__v8sf) __A, (__v8sf) __B, __C, \
+ (__v8sf) __W, (__mmask8) __U); })
+
+#define _mm256_maskz_range_ps(__U, __A, __B, __C) __extension__ ({ \
+ (__m256) __builtin_ia32_rangeps256_mask ((__v8sf) __A, (__v8sf) __B, __C, \
+ (__v8sf) _mm256_setzero_ps(), (__mmask8) __U); })
+
+#define _mm_reduce_pd(__A, __B) __extension__ ({ \
+ (__m128d) __builtin_ia32_reducepd128_mask ((__v2df) __A, __B, \
+ (__v2df) _mm_setzero_pd(), (__mmask8) -1); })
+
+#define _mm_mask_reduce_pd(__W, __U, __A, __B) __extension__ ({ \
+ (__m128d) __builtin_ia32_reducepd128_mask ((__v2df) __A, __B, \
+ (__v2df) __W, (__mmask8) __U); })
+
+#define _mm_maskz_reduce_pd(__U, __A, __B) __extension__ ({ \
+ (__m128d) __builtin_ia32_reducepd128_mask ((__v2df) __A, __B, \
+ (__v2df) _mm_setzero_pd(), (__mmask8) __U); })
+
+#define _mm256_reduce_pd(__A, __B) __extension__ ({ \
+ (__m256d) __builtin_ia32_reducepd256_mask ((__v4df) __A, __B, \
+ (__v4df) _mm256_setzero_pd(), (__mmask8) -1); })
+
+#define _mm256_mask_reduce_pd(__W, __U, __A, __B) __extension__ ({ \
+ (__m256d) __builtin_ia32_reducepd256_mask ((__v4df) __A, __B, \
+ (__v4df) __W, (__mmask8) __U); })
+
+#define _mm256_maskz_reduce_pd(__U, __A, __B) __extension__ ({ \
+ (__m256d) __builtin_ia32_reducepd256_mask ((__v4df) __A, __B, \
+ (__v4df) _mm256_setzero_pd(), (__mmask8) __U); })
+
+#define _mm_reduce_ps(__A, __B) __extension__ ({ \
+ (__m128) __builtin_ia32_reduceps128_mask ((__v4sf) __A, __B, \
+ (__v4sf) _mm_setzero_ps(), (__mmask8) -1); })
+
+#define _mm_mask_reduce_ps(__W, __U, __A, __B) __extension__ ({ \
+ (__m128) __builtin_ia32_reduceps128_mask ((__v4sf) __A, __B, \
+ (__v4sf) __W, (__mmask8) __U); })
+
+#define _mm_maskz_reduce_ps(__U, __A, __B) __extension__ ({ \
+ (__m128) __builtin_ia32_reduceps128_mask ((__v4sf) __A, __B, \
+ (__v4sf) _mm_setzero_ps(), (__mmask8) __U); })
+
+#define _mm256_reduce_ps(__A, __B) __extension__ ({ \
+ (__m256) __builtin_ia32_reduceps256_mask ((__v8sf) __A, __B, \
+ (__v8sf) _mm256_setzero_ps(), (__mmask8) -1); })
+
+#define _mm256_mask_reduce_ps(__W, __U, __A, __B) __extension__ ({ \
+ (__m256) __builtin_ia32_reduceps256_mask ((__v8sf) __A, __B, \
+ (__v8sf) __W, (__mmask8) __U); })
+
+#define _mm256_maskz_reduce_ps(__U, __A, __B) __extension__ ({ \
+ (__m256) __builtin_ia32_reduceps256_mask ((__v8sf) __A, __B, \
+ (__v8sf) _mm256_setzero_ps(), (__mmask8) __U); })
+
#undef __DEFAULT_FN_ATTRS
#endif
diff --git a/contrib/llvm/tools/clang/lib/Headers/avx512vlintrin.h b/contrib/llvm/tools/clang/lib/Headers/avx512vlintrin.h
index fc1b9d6e7a23..8f13536fbb0e 100644
--- a/contrib/llvm/tools/clang/lib/Headers/avx512vlintrin.h
+++ b/contrib/llvm/tools/clang/lib/Headers/avx512vlintrin.h
@@ -28,18 +28,18 @@
#ifndef __AVX512VLINTRIN_H
#define __AVX512VLINTRIN_H
-/* Define the default attributes for the functions in this file. */
-#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__))
+#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("avx512vl")))
+#define __DEFAULT_FN_ATTRS_BOTH __attribute__((__always_inline__, __nodebug__, __target__("avx512vl, avx512bw")))
/* Integer compare */
-static __inline__ __mmask8 __DEFAULT_FN_ATTRS
+static __inline__ __mmask8 __DEFAULT_FN_ATTRS_BOTH
_mm_cmpeq_epi32_mask(__m128i __a, __m128i __b) {
return (__mmask8)__builtin_ia32_pcmpeqd128_mask((__v4si)__a, (__v4si)__b,
(__mmask8)-1);
}
-static __inline__ __mmask8 __DEFAULT_FN_ATTRS
+static __inline__ __mmask8 __DEFAULT_FN_ATTRS_BOTH
_mm_mask_cmpeq_epi32_mask(__mmask8 __u, __m128i __a, __m128i __b) {
return (__mmask8)__builtin_ia32_pcmpeqd128_mask((__v4si)__a, (__v4si)__b,
__u);
@@ -57,13 +57,13 @@ _mm_mask_cmpeq_epu32_mask(__mmask8 __u, __m128i __a, __m128i __b) {
__u);
}
-static __inline__ __mmask8 __DEFAULT_FN_ATTRS
+static __inline__ __mmask8 __DEFAULT_FN_ATTRS_BOTH
_mm256_cmpeq_epi32_mask(__m256i __a, __m256i __b) {
return (__mmask8)__builtin_ia32_pcmpeqd256_mask((__v8si)__a, (__v8si)__b,
(__mmask8)-1);
}
-static __inline__ __mmask8 __DEFAULT_FN_ATTRS
+static __inline__ __mmask8 __DEFAULT_FN_ATTRS_BOTH
_mm256_mask_cmpeq_epi32_mask(__mmask8 __u, __m256i __a, __m256i __b) {
return (__mmask8)__builtin_ia32_pcmpeqd256_mask((__v8si)__a, (__v8si)__b,
__u);
@@ -81,13 +81,13 @@ _mm256_mask_cmpeq_epu32_mask(__mmask8 __u, __m256i __a, __m256i __b) {
__u);
}
-static __inline__ __mmask8 __DEFAULT_FN_ATTRS
+static __inline__ __mmask8 __DEFAULT_FN_ATTRS_BOTH
_mm_cmpeq_epi64_mask(__m128i __a, __m128i __b) {
return (__mmask8)__builtin_ia32_pcmpeqq128_mask((__v2di)__a, (__v2di)__b,
(__mmask8)-1);
}
-static __inline__ __mmask8 __DEFAULT_FN_ATTRS
+static __inline__ __mmask8 __DEFAULT_FN_ATTRS_BOTH
_mm_mask_cmpeq_epi64_mask(__mmask8 __u, __m128i __a, __m128i __b) {
return (__mmask8)__builtin_ia32_pcmpeqq128_mask((__v2di)__a, (__v2di)__b,
__u);
@@ -105,13 +105,13 @@ _mm_mask_cmpeq_epu64_mask(__mmask8 __u, __m128i __a, __m128i __b) {
__u);
}
-static __inline__ __mmask8 __DEFAULT_FN_ATTRS
+static __inline__ __mmask8 __DEFAULT_FN_ATTRS_BOTH
_mm256_cmpeq_epi64_mask(__m256i __a, __m256i __b) {
return (__mmask8)__builtin_ia32_pcmpeqq256_mask((__v4di)__a, (__v4di)__b,
(__mmask8)-1);
}
-static __inline__ __mmask8 __DEFAULT_FN_ATTRS
+static __inline__ __mmask8 __DEFAULT_FN_ATTRS_BOTH
_mm256_mask_cmpeq_epi64_mask(__mmask8 __u, __m256i __a, __m256i __b) {
return (__mmask8)__builtin_ia32_pcmpeqq256_mask((__v4di)__a, (__v4di)__b,
__u);
@@ -226,16 +226,13 @@ _mm256_mask_cmpge_epu64_mask(__mmask8 __u, __m256i __a, __m256i __b) {
__u);
}
-
-
-
-static __inline__ __mmask8 __DEFAULT_FN_ATTRS
+static __inline__ __mmask8 __DEFAULT_FN_ATTRS_BOTH
_mm_cmpgt_epi32_mask(__m128i __a, __m128i __b) {
return (__mmask8)__builtin_ia32_pcmpgtd128_mask((__v4si)__a, (__v4si)__b,
(__mmask8)-1);
}
-static __inline__ __mmask8 __DEFAULT_FN_ATTRS
+static __inline__ __mmask8 __DEFAULT_FN_ATTRS_BOTH
_mm_mask_cmpgt_epi32_mask(__mmask8 __u, __m128i __a, __m128i __b) {
return (__mmask8)__builtin_ia32_pcmpgtd128_mask((__v4si)__a, (__v4si)__b,
__u);
@@ -253,13 +250,13 @@ _mm_mask_cmpgt_epu32_mask(__mmask8 __u, __m128i __a, __m128i __b) {
__u);
}
-static __inline__ __mmask8 __DEFAULT_FN_ATTRS
+static __inline__ __mmask8 __DEFAULT_FN_ATTRS_BOTH
_mm256_cmpgt_epi32_mask(__m256i __a, __m256i __b) {
return (__mmask8)__builtin_ia32_pcmpgtd256_mask((__v8si)__a, (__v8si)__b,
(__mmask8)-1);
}
-static __inline__ __mmask8 __DEFAULT_FN_ATTRS
+static __inline__ __mmask8 __DEFAULT_FN_ATTRS_BOTH
_mm256_mask_cmpgt_epi32_mask(__mmask8 __u, __m256i __a, __m256i __b) {
return (__mmask8)__builtin_ia32_pcmpgtd256_mask((__v8si)__a, (__v8si)__b,
__u);
@@ -277,13 +274,13 @@ _mm256_mask_cmpgt_epu32_mask(__mmask8 __u, __m256i __a, __m256i __b) {
__u);
}
-static __inline__ __mmask8 __DEFAULT_FN_ATTRS
+static __inline__ __mmask8 __DEFAULT_FN_ATTRS_BOTH
_mm_cmpgt_epi64_mask(__m128i __a, __m128i __b) {
return (__mmask8)__builtin_ia32_pcmpgtq128_mask((__v2di)__a, (__v2di)__b,
(__mmask8)-1);
}
-static __inline__ __mmask8 __DEFAULT_FN_ATTRS
+static __inline__ __mmask8 __DEFAULT_FN_ATTRS_BOTH
_mm_mask_cmpgt_epi64_mask(__mmask8 __u, __m128i __a, __m128i __b) {
return (__mmask8)__builtin_ia32_pcmpgtq128_mask((__v2di)__a, (__v2di)__b,
__u);
@@ -301,13 +298,13 @@ _mm_mask_cmpgt_epu64_mask(__mmask8 __u, __m128i __a, __m128i __b) {
__u);
}
-static __inline__ __mmask8 __DEFAULT_FN_ATTRS
+static __inline__ __mmask8 __DEFAULT_FN_ATTRS_BOTH
_mm256_cmpgt_epi64_mask(__m256i __a, __m256i __b) {
return (__mmask8)__builtin_ia32_pcmpgtq256_mask((__v4di)__a, (__v4di)__b,
(__mmask8)-1);
}
-static __inline__ __mmask8 __DEFAULT_FN_ATTRS
+static __inline__ __mmask8 __DEFAULT_FN_ATTRS_BOTH
_mm256_mask_cmpgt_epi64_mask(__mmask8 __u, __m256i __a, __m256i __b) {
return (__mmask8)__builtin_ia32_pcmpgtq256_mask((__v4di)__a, (__v4di)__b,
__u);
@@ -1977,6 +1974,2633 @@ _mm256_mask3_fnmsub_ps(__m256 __A, __m256 __B, __m256 __C, __mmask8 __U)
(__mmask8) __U);
}
+static __inline__ __m128d __DEFAULT_FN_ATTRS
+_mm_mask_add_pd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) {
+ return (__m128d) __builtin_ia32_addpd128_mask ((__v2df) __A,
+ (__v2df) __B,
+ (__v2df) __W,
+ (__mmask8) __U);
+}
+
+static __inline__ __m128d __DEFAULT_FN_ATTRS
+_mm_maskz_add_pd (__mmask8 __U, __m128d __A, __m128d __B) {
+ return (__m128d) __builtin_ia32_addpd128_mask ((__v2df) __A,
+ (__v2df) __B,
+ (__v2df)
+ _mm_setzero_pd (),
+ (__mmask8) __U);
+}
+
+static __inline__ __m256d __DEFAULT_FN_ATTRS
+_mm256_mask_add_pd (__m256d __W, __mmask8 __U, __m256d __A, __m256d __B) {
+ return (__m256d) __builtin_ia32_addpd256_mask ((__v4df) __A,
+ (__v4df) __B,
+ (__v4df) __W,
+ (__mmask8) __U);
+}
+
+static __inline__ __m256d __DEFAULT_FN_ATTRS
+_mm256_maskz_add_pd (__mmask8 __U, __m256d __A, __m256d __B) {
+ return (__m256d) __builtin_ia32_addpd256_mask ((__v4df) __A,
+ (__v4df) __B,
+ (__v4df)
+ _mm256_setzero_pd (),
+ (__mmask8) __U);
+}
+
+static __inline__ __m128 __DEFAULT_FN_ATTRS
+_mm_mask_add_ps (__m128 __W, __mmask16 __U, __m128 __A, __m128 __B) {
+ return (__m128) __builtin_ia32_addps128_mask ((__v4sf) __A,
+ (__v4sf) __B,
+ (__v4sf) __W,
+ (__mmask8) __U);
+}
+
+static __inline__ __m128 __DEFAULT_FN_ATTRS
+_mm_maskz_add_ps (__mmask16 __U, __m128 __A, __m128 __B) {
+ return (__m128) __builtin_ia32_addps128_mask ((__v4sf) __A,
+ (__v4sf) __B,
+ (__v4sf)
+ _mm_setzero_ps (),
+ (__mmask8) __U);
+}
+
+static __inline__ __m256 __DEFAULT_FN_ATTRS
+_mm256_mask_add_ps (__m256 __W, __mmask16 __U, __m256 __A, __m256 __B) {
+ return (__m256) __builtin_ia32_addps256_mask ((__v8sf) __A,
+ (__v8sf) __B,
+ (__v8sf) __W,
+ (__mmask8) __U);
+}
+
+static __inline__ __m256 __DEFAULT_FN_ATTRS
+_mm256_maskz_add_ps (__mmask16 __U, __m256 __A, __m256 __B) {
+ return (__m256) __builtin_ia32_addps256_mask ((__v8sf) __A,
+ (__v8sf) __B,
+ (__v8sf)
+ _mm256_setzero_ps (),
+ (__mmask8) __U);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm_mask_blend_epi32 (__mmask8 __U, __m128i __A, __m128i __W) {
+ return (__m128i) __builtin_ia32_blendmd_128_mask ((__v4si) __A,
+ (__v4si) __W,
+ (__mmask8) __U);
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS
+_mm256_mask_blend_epi32 (__mmask8 __U, __m256i __A, __m256i __W) {
+ return (__m256i) __builtin_ia32_blendmd_256_mask ((__v8si) __A,
+ (__v8si) __W,
+ (__mmask8) __U);
+}
+
+static __inline__ __m128d __DEFAULT_FN_ATTRS
+_mm_mask_blend_pd (__mmask8 __U, __m128d __A, __m128d __W) {
+ return (__m128d) __builtin_ia32_blendmpd_128_mask ((__v2df) __A,
+ (__v2df) __W,
+ (__mmask8) __U);
+}
+
+static __inline__ __m256d __DEFAULT_FN_ATTRS
+_mm256_mask_blend_pd (__mmask8 __U, __m256d __A, __m256d __W) {
+ return (__m256d) __builtin_ia32_blendmpd_256_mask ((__v4df) __A,
+ (__v4df) __W,
+ (__mmask8) __U);
+}
+
+static __inline__ __m128 __DEFAULT_FN_ATTRS
+_mm_mask_blend_ps (__mmask8 __U, __m128 __A, __m128 __W) {
+ return (__m128) __builtin_ia32_blendmps_128_mask ((__v4sf) __A,
+ (__v4sf) __W,
+ (__mmask8) __U);
+}
+
+static __inline__ __m256 __DEFAULT_FN_ATTRS
+_mm256_mask_blend_ps (__mmask8 __U, __m256 __A, __m256 __W) {
+ return (__m256) __builtin_ia32_blendmps_256_mask ((__v8sf) __A,
+ (__v8sf) __W,
+ (__mmask8) __U);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm_mask_blend_epi64 (__mmask8 __U, __m128i __A, __m128i __W) {
+ return (__m128i) __builtin_ia32_blendmq_128_mask ((__v2di) __A,
+ (__v2di) __W,
+ (__mmask8) __U);
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS
+_mm256_mask_blend_epi64 (__mmask8 __U, __m256i __A, __m256i __W) {
+ return (__m256i) __builtin_ia32_blendmq_256_mask ((__v4di) __A,
+ (__v4di) __W,
+ (__mmask8) __U);
+}
+
+static __inline__ __m128d __DEFAULT_FN_ATTRS
+_mm_mask_compress_pd (__m128d __W, __mmask8 __U, __m128d __A) {
+ return (__m128d) __builtin_ia32_compressdf128_mask ((__v2df) __A,
+ (__v2df) __W,
+ (__mmask8) __U);
+}
+
+static __inline__ __m128d __DEFAULT_FN_ATTRS
+_mm_maskz_compress_pd (__mmask8 __U, __m128d __A) {
+ return (__m128d) __builtin_ia32_compressdf128_mask ((__v2df) __A,
+ (__v2df)
+ _mm_setzero_pd (),
+ (__mmask8) __U);
+}
+
+static __inline__ __m256d __DEFAULT_FN_ATTRS
+_mm256_mask_compress_pd (__m256d __W, __mmask8 __U, __m256d __A) {
+ return (__m256d) __builtin_ia32_compressdf256_mask ((__v4df) __A,
+ (__v4df) __W,
+ (__mmask8) __U);
+}
+
+static __inline__ __m256d __DEFAULT_FN_ATTRS
+_mm256_maskz_compress_pd (__mmask8 __U, __m256d __A) {
+ return (__m256d) __builtin_ia32_compressdf256_mask ((__v4df) __A,
+ (__v4df)
+ _mm256_setzero_pd (),
+ (__mmask8) __U);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm_mask_compress_epi64 (__m128i __W, __mmask8 __U, __m128i __A) {
+ return (__m128i) __builtin_ia32_compressdi128_mask ((__v2di) __A,
+ (__v2di) __W,
+ (__mmask8) __U);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm_maskz_compress_epi64 (__mmask8 __U, __m128i __A) {
+ return (__m128i) __builtin_ia32_compressdi128_mask ((__v2di) __A,
+ (__v2di)
+ _mm_setzero_si128 (),
+ (__mmask8) __U);
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS
+_mm256_mask_compress_epi64 (__m256i __W, __mmask8 __U, __m256i __A) {
+ return (__m256i) __builtin_ia32_compressdi256_mask ((__v4di) __A,
+ (__v4di) __W,
+ (__mmask8) __U);
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS
+_mm256_maskz_compress_epi64 (__mmask8 __U, __m256i __A) {
+ return (__m256i) __builtin_ia32_compressdi256_mask ((__v4di) __A,
+ (__v4di)
+ _mm256_setzero_si256 (),
+ (__mmask8) __U);
+}
+
+static __inline__ __m128 __DEFAULT_FN_ATTRS
+_mm_mask_compress_ps (__m128 __W, __mmask8 __U, __m128 __A) {
+ return (__m128) __builtin_ia32_compresssf128_mask ((__v4sf) __A,
+ (__v4sf) __W,
+ (__mmask8) __U);
+}
+
+static __inline__ __m128 __DEFAULT_FN_ATTRS
+_mm_maskz_compress_ps (__mmask8 __U, __m128 __A) {
+ return (__m128) __builtin_ia32_compresssf128_mask ((__v4sf) __A,
+ (__v4sf)
+ _mm_setzero_ps (),
+ (__mmask8) __U);
+}
+
+static __inline__ __m256 __DEFAULT_FN_ATTRS
+_mm256_mask_compress_ps (__m256 __W, __mmask8 __U, __m256 __A) {
+ return (__m256) __builtin_ia32_compresssf256_mask ((__v8sf) __A,
+ (__v8sf) __W,
+ (__mmask8) __U);
+}
+
+static __inline__ __m256 __DEFAULT_FN_ATTRS
+_mm256_maskz_compress_ps (__mmask8 __U, __m256 __A) {
+ return (__m256) __builtin_ia32_compresssf256_mask ((__v8sf) __A,
+ (__v8sf)
+ _mm256_setzero_ps (),
+ (__mmask8) __U);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm_mask_compress_epi32 (__m128i __W, __mmask8 __U, __m128i __A) {
+ return (__m128i) __builtin_ia32_compresssi128_mask ((__v4si) __A,
+ (__v4si) __W,
+ (__mmask8) __U);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm_maskz_compress_epi32 (__mmask8 __U, __m128i __A) {
+ return (__m128i) __builtin_ia32_compresssi128_mask ((__v4si) __A,
+ (__v4si)
+ _mm_setzero_si128 (),
+ (__mmask8) __U);
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS
+_mm256_mask_compress_epi32 (__m256i __W, __mmask8 __U, __m256i __A) {
+ return (__m256i) __builtin_ia32_compresssi256_mask ((__v8si) __A,
+ (__v8si) __W,
+ (__mmask8) __U);
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS
+_mm256_maskz_compress_epi32 (__mmask8 __U, __m256i __A) {
+ return (__m256i) __builtin_ia32_compresssi256_mask ((__v8si) __A,
+ (__v8si)
+ _mm256_setzero_si256 (),
+ (__mmask8) __U);
+}
+
+static __inline__ void __DEFAULT_FN_ATTRS
+_mm_mask_compressstoreu_pd (void *__P, __mmask8 __U, __m128d __A) {
+ __builtin_ia32_compressstoredf128_mask ((__v2df *) __P,
+ (__v2df) __A,
+ (__mmask8) __U);
+}
+
+static __inline__ void __DEFAULT_FN_ATTRS
+_mm256_mask_compressstoreu_pd (void *__P, __mmask8 __U, __m256d __A) {
+ __builtin_ia32_compressstoredf256_mask ((__v4df *) __P,
+ (__v4df) __A,
+ (__mmask8) __U);
+}
+
+static __inline__ void __DEFAULT_FN_ATTRS
+_mm_mask_compressstoreu_epi64 (void *__P, __mmask8 __U, __m128i __A) {
+ __builtin_ia32_compressstoredi128_mask ((__v2di *) __P,
+ (__v2di) __A,
+ (__mmask8) __U);
+}
+
+static __inline__ void __DEFAULT_FN_ATTRS
+_mm256_mask_compressstoreu_epi64 (void *__P, __mmask8 __U, __m256i __A) {
+ __builtin_ia32_compressstoredi256_mask ((__v4di *) __P,
+ (__v4di) __A,
+ (__mmask8) __U);
+}
+
+static __inline__ void __DEFAULT_FN_ATTRS
+_mm_mask_compressstoreu_ps (void *__P, __mmask8 __U, __m128 __A) {
+ __builtin_ia32_compressstoresf128_mask ((__v4sf *) __P,
+ (__v4sf) __A,
+ (__mmask8) __U);
+}
+
+static __inline__ void __DEFAULT_FN_ATTRS
+_mm256_mask_compressstoreu_ps (void *__P, __mmask8 __U, __m256 __A) {
+ __builtin_ia32_compressstoresf256_mask ((__v8sf *) __P,
+ (__v8sf) __A,
+ (__mmask8) __U);
+}
+
+static __inline__ void __DEFAULT_FN_ATTRS
+_mm_mask_compressstoreu_epi32 (void *__P, __mmask8 __U, __m128i __A) {
+ __builtin_ia32_compressstoresi128_mask ((__v4si *) __P,
+ (__v4si) __A,
+ (__mmask8) __U);
+}
+
+static __inline__ void __DEFAULT_FN_ATTRS
+_mm256_mask_compressstoreu_epi32 (void *__P, __mmask8 __U, __m256i __A) {
+ __builtin_ia32_compressstoresi256_mask ((__v8si *) __P,
+ (__v8si) __A,
+ (__mmask8) __U);
+}
+
+static __inline__ __m128d __DEFAULT_FN_ATTRS
+_mm_mask_cvtepi32_pd (__m128d __W, __mmask8 __U, __m128i __A) {
+ return (__m128d) __builtin_ia32_cvtdq2pd128_mask ((__v4si) __A,
+ (__v2df) __W,
+ (__mmask8) __U);
+}
+
+static __inline__ __m128d __DEFAULT_FN_ATTRS
+_mm_maskz_cvtepi32_pd (__mmask8 __U, __m128i __A) {
+ return (__m128d) __builtin_ia32_cvtdq2pd128_mask ((__v4si) __A,
+ (__v2df)
+ _mm_setzero_pd (),
+ (__mmask8) __U);
+}
+
+static __inline__ __m256d __DEFAULT_FN_ATTRS
+_mm256_mask_cvtepi32_pd (__m256d __W, __mmask8 __U, __m128i __A) {
+ return (__m256d) __builtin_ia32_cvtdq2pd256_mask ((__v4si) __A,
+ (__v4df) __W,
+ (__mmask8) __U);
+}
+
+static __inline__ __m256d __DEFAULT_FN_ATTRS
+_mm256_maskz_cvtepi32_pd (__mmask8 __U, __m128i __A) {
+ return (__m256d) __builtin_ia32_cvtdq2pd256_mask ((__v4si) __A,
+ (__v4df)
+ _mm256_setzero_pd (),
+ (__mmask8) __U);
+}
+
+static __inline__ __m128 __DEFAULT_FN_ATTRS
+_mm_mask_cvtepi32_ps (__m128 __W, __mmask8 __U, __m128i __A) {
+ return (__m128) __builtin_ia32_cvtdq2ps128_mask ((__v4si) __A,
+ (__v4sf) __W,
+ (__mmask8) __U);
+}
+
+static __inline__ __m128 __DEFAULT_FN_ATTRS
+_mm_maskz_cvtepi32_ps (__mmask16 __U, __m128i __A) {
+ return (__m128) __builtin_ia32_cvtdq2ps128_mask ((__v4si) __A,
+ (__v4sf)
+ _mm_setzero_ps (),
+ (__mmask8) __U);
+}
+
+static __inline__ __m256 __DEFAULT_FN_ATTRS
+_mm256_mask_cvtepi32_ps (__m256 __W, __mmask8 __U, __m256i __A) {
+ return (__m256) __builtin_ia32_cvtdq2ps256_mask ((__v8si) __A,
+ (__v8sf) __W,
+ (__mmask8) __U);
+}
+
+static __inline__ __m256 __DEFAULT_FN_ATTRS
+_mm256_maskz_cvtepi32_ps (__mmask16 __U, __m256i __A) {
+ return (__m256) __builtin_ia32_cvtdq2ps256_mask ((__v8si) __A,
+ (__v8sf)
+ _mm256_setzero_ps (),
+ (__mmask8) __U);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm_mask_cvtpd_epi32 (__m128i __W, __mmask8 __U, __m128d __A) {
+ return (__m128i) __builtin_ia32_cvtpd2dq128_mask ((__v2df) __A,
+ (__v4si) __W,
+ (__mmask8) __U);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm_maskz_cvtpd_epi32 (__mmask8 __U, __m128d __A) {
+ return (__m128i) __builtin_ia32_cvtpd2dq128_mask ((__v2df) __A,
+ (__v4si)
+ _mm_setzero_si128 (),
+ (__mmask8) __U);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm256_mask_cvtpd_epi32 (__m128i __W, __mmask8 __U, __m256d __A) {
+ return (__m128i) __builtin_ia32_cvtpd2dq256_mask ((__v4df) __A,
+ (__v4si) __W,
+ (__mmask8) __U);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm256_maskz_cvtpd_epi32 (__mmask8 __U, __m256d __A) {
+ return (__m128i) __builtin_ia32_cvtpd2dq256_mask ((__v4df) __A,
+ (__v4si)
+ _mm_setzero_si128 (),
+ (__mmask8) __U);
+}
+
+static __inline__ __m128 __DEFAULT_FN_ATTRS
+_mm_mask_cvtpd_ps (__m128 __W, __mmask8 __U, __m128d __A) {
+ return (__m128) __builtin_ia32_cvtpd2ps_mask ((__v2df) __A,
+ (__v4sf) __W,
+ (__mmask8) __U);
+}
+
+static __inline__ __m128 __DEFAULT_FN_ATTRS
+_mm_maskz_cvtpd_ps (__mmask8 __U, __m128d __A) {
+ return (__m128) __builtin_ia32_cvtpd2ps_mask ((__v2df) __A,
+ (__v4sf)
+ _mm_setzero_ps (),
+ (__mmask8) __U);
+}
+
+static __inline__ __m128 __DEFAULT_FN_ATTRS
+_mm256_mask_cvtpd_ps (__m128 __W, __mmask8 __U, __m256d __A) {
+ return (__m128) __builtin_ia32_cvtpd2ps256_mask ((__v4df) __A,
+ (__v4sf) __W,
+ (__mmask8) __U);
+}
+
+static __inline__ __m128 __DEFAULT_FN_ATTRS
+_mm256_maskz_cvtpd_ps (__mmask8 __U, __m256d __A) {
+ return (__m128) __builtin_ia32_cvtpd2ps256_mask ((__v4df) __A,
+ (__v4sf)
+ _mm_setzero_ps (),
+ (__mmask8) __U);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm_cvtpd_epu32 (__m128d __A) {
+ return (__m128i) __builtin_ia32_cvtpd2udq128_mask ((__v2df) __A,
+ (__v4si)
+ _mm_setzero_si128 (),
+ (__mmask8) -1);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm_mask_cvtpd_epu32 (__m128i __W, __mmask8 __U, __m128d __A) {
+ return (__m128i) __builtin_ia32_cvtpd2udq128_mask ((__v2df) __A,
+ (__v4si) __W,
+ (__mmask8) __U);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm_maskz_cvtpd_epu32 (__mmask8 __U, __m128d __A) {
+ return (__m128i) __builtin_ia32_cvtpd2udq128_mask ((__v2df) __A,
+ (__v4si)
+ _mm_setzero_si128 (),
+ (__mmask8) __U);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm256_cvtpd_epu32 (__m256d __A) {
+ return (__m128i) __builtin_ia32_cvtpd2udq256_mask ((__v4df) __A,
+ (__v4si)
+ _mm_setzero_si128 (),
+ (__mmask8) -1);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm256_mask_cvtpd_epu32 (__m128i __W, __mmask8 __U, __m256d __A) {
+ return (__m128i) __builtin_ia32_cvtpd2udq256_mask ((__v4df) __A,
+ (__v4si) __W,
+ (__mmask8) __U);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm256_maskz_cvtpd_epu32 (__mmask8 __U, __m256d __A) {
+ return (__m128i) __builtin_ia32_cvtpd2udq256_mask ((__v4df) __A,
+ (__v4si)
+ _mm_setzero_si128 (),
+ (__mmask8) __U);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm_mask_cvtps_epi32 (__m128i __W, __mmask8 __U, __m128 __A) {
+ return (__m128i) __builtin_ia32_cvtps2dq128_mask ((__v4sf) __A,
+ (__v4si) __W,
+ (__mmask8) __U);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm_maskz_cvtps_epi32 (__mmask8 __U, __m128 __A) {
+ return (__m128i) __builtin_ia32_cvtps2dq128_mask ((__v4sf) __A,
+ (__v4si)
+ _mm_setzero_si128 (),
+ (__mmask8) __U);
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS
+_mm256_mask_cvtps_epi32 (__m256i __W, __mmask8 __U, __m256 __A) {
+ return (__m256i) __builtin_ia32_cvtps2dq256_mask ((__v8sf) __A,
+ (__v8si) __W,
+ (__mmask8) __U);
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS
+_mm256_maskz_cvtps_epi32 (__mmask8 __U, __m256 __A) {
+ return (__m256i) __builtin_ia32_cvtps2dq256_mask ((__v8sf) __A,
+ (__v8si)
+ _mm256_setzero_si256 (),
+ (__mmask8) __U);
+}
+
+static __inline__ __m128d __DEFAULT_FN_ATTRS
+_mm_mask_cvtps_pd (__m128d __W, __mmask8 __U, __m128 __A) {
+ return (__m128d) __builtin_ia32_cvtps2pd128_mask ((__v4sf) __A,
+ (__v2df) __W,
+ (__mmask8) __U);
+}
+
+static __inline__ __m128d __DEFAULT_FN_ATTRS
+_mm_maskz_cvtps_pd (__mmask8 __U, __m128 __A) {
+ return (__m128d) __builtin_ia32_cvtps2pd128_mask ((__v4sf) __A,
+ (__v2df)
+ _mm_setzero_pd (),
+ (__mmask8) __U);
+}
+
+static __inline__ __m256d __DEFAULT_FN_ATTRS
+_mm256_mask_cvtps_pd (__m256d __W, __mmask8 __U, __m128 __A) {
+ return (__m256d) __builtin_ia32_cvtps2pd256_mask ((__v4sf) __A,
+ (__v4df) __W,
+ (__mmask8) __U);
+}
+
+static __inline__ __m256d __DEFAULT_FN_ATTRS
+_mm256_maskz_cvtps_pd (__mmask8 __U, __m128 __A) {
+ return (__m256d) __builtin_ia32_cvtps2pd256_mask ((__v4sf) __A,
+ (__v4df)
+ _mm256_setzero_pd (),
+ (__mmask8) __U);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm_cvtps_epu32 (__m128 __A) {
+ return (__m128i) __builtin_ia32_cvtps2udq128_mask ((__v4sf) __A,
+ (__v4si)
+ _mm_setzero_si128 (),
+ (__mmask8) -1);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm_mask_cvtps_epu32 (__m128i __W, __mmask8 __U, __m128 __A) {
+ return (__m128i) __builtin_ia32_cvtps2udq128_mask ((__v4sf) __A,
+ (__v4si) __W,
+ (__mmask8) __U);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm_maskz_cvtps_epu32 (__mmask8 __U, __m128 __A) {
+ return (__m128i) __builtin_ia32_cvtps2udq128_mask ((__v4sf) __A,
+ (__v4si)
+ _mm_setzero_si128 (),
+ (__mmask8) __U);
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS
+_mm256_cvtps_epu32 (__m256 __A) {
+ return (__m256i) __builtin_ia32_cvtps2udq256_mask ((__v8sf) __A,
+ (__v8si)
+ _mm256_setzero_si256 (),
+ (__mmask8) -1);
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS
+_mm256_mask_cvtps_epu32 (__m256i __W, __mmask8 __U, __m256 __A) {
+ return (__m256i) __builtin_ia32_cvtps2udq256_mask ((__v8sf) __A,
+ (__v8si) __W,
+ (__mmask8) __U);
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS
+_mm256_maskz_cvtps_epu32 (__mmask8 __U, __m256 __A) {
+ return (__m256i) __builtin_ia32_cvtps2udq256_mask ((__v8sf) __A,
+ (__v8si)
+ _mm256_setzero_si256 (),
+ (__mmask8) __U);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm_mask_cvttpd_epi32 (__m128i __W, __mmask8 __U, __m128d __A) {
+ return (__m128i) __builtin_ia32_cvttpd2dq128_mask ((__v2df) __A,
+ (__v4si) __W,
+ (__mmask8) __U);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm_maskz_cvttpd_epi32 (__mmask8 __U, __m128d __A) {
+ return (__m128i) __builtin_ia32_cvttpd2dq128_mask ((__v2df) __A,
+ (__v4si)
+ _mm_setzero_si128 (),
+ (__mmask8) __U);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm256_mask_cvttpd_epi32 (__m128i __W, __mmask8 __U, __m256d __A) {
+ return (__m128i) __builtin_ia32_cvttpd2dq256_mask ((__v4df) __A,
+ (__v4si) __W,
+ (__mmask8) __U);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm256_maskz_cvttpd_epi32 (__mmask8 __U, __m256d __A) {
+ return (__m128i) __builtin_ia32_cvttpd2dq256_mask ((__v4df) __A,
+ (__v4si)
+ _mm_setzero_si128 (),
+ (__mmask8) __U);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm_cvttpd_epu32 (__m128d __A) {
+ return (__m128i) __builtin_ia32_cvttpd2udq128_mask ((__v2df) __A,
+ (__v4si)
+ _mm_setzero_si128 (),
+ (__mmask8) -1);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm_mask_cvttpd_epu32 (__m128i __W, __mmask8 __U, __m128d __A) {
+ return (__m128i) __builtin_ia32_cvttpd2udq128_mask ((__v2df) __A,
+ (__v4si) __W,
+ (__mmask8) __U);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm_maskz_cvttpd_epu32 (__mmask8 __U, __m128d __A) {
+ return (__m128i) __builtin_ia32_cvttpd2udq128_mask ((__v2df) __A,
+ (__v4si)
+ _mm_setzero_si128 (),
+ (__mmask8) __U);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm256_cvttpd_epu32 (__m256d __A) {
+ return (__m128i) __builtin_ia32_cvttpd2udq256_mask ((__v4df) __A,
+ (__v4si)
+ _mm_setzero_si128 (),
+ (__mmask8) -1);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm256_mask_cvttpd_epu32 (__m128i __W, __mmask8 __U, __m256d __A) {
+ return (__m128i) __builtin_ia32_cvttpd2udq256_mask ((__v4df) __A,
+ (__v4si) __W,
+ (__mmask8) __U);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm256_maskz_cvttpd_epu32 (__mmask8 __U, __m256d __A) {
+ return (__m128i) __builtin_ia32_cvttpd2udq256_mask ((__v4df) __A,
+ (__v4si)
+ _mm_setzero_si128 (),
+ (__mmask8) __U);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm_mask_cvttps_epi32 (__m128i __W, __mmask8 __U, __m128 __A) {
+ return (__m128i) __builtin_ia32_cvttps2dq128_mask ((__v4sf) __A,
+ (__v4si) __W,
+ (__mmask8) __U);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm_maskz_cvttps_epi32 (__mmask8 __U, __m128 __A) {
+ return (__m128i) __builtin_ia32_cvttps2dq128_mask ((__v4sf) __A,
+ (__v4si)
+ _mm_setzero_si128 (),
+ (__mmask8) __U);
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS
+_mm256_mask_cvttps_epi32 (__m256i __W, __mmask8 __U, __m256 __A) {
+ return (__m256i) __builtin_ia32_cvttps2dq256_mask ((__v8sf) __A,
+ (__v8si) __W,
+ (__mmask8) __U);
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS
+_mm256_maskz_cvttps_epi32 (__mmask8 __U, __m256 __A) {
+ return (__m256i) __builtin_ia32_cvttps2dq256_mask ((__v8sf) __A,
+ (__v8si)
+ _mm256_setzero_si256 (),
+ (__mmask8) __U);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm_cvttps_epu32 (__m128 __A) {
+ return (__m128i) __builtin_ia32_cvttps2udq128_mask ((__v4sf) __A,
+ (__v4si)
+ _mm_setzero_si128 (),
+ (__mmask8) -1);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm_mask_cvttps_epu32 (__m128i __W, __mmask8 __U, __m128 __A) {
+ return (__m128i) __builtin_ia32_cvttps2udq128_mask ((__v4sf) __A,
+ (__v4si) __W,
+ (__mmask8) __U);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm_maskz_cvttps_epu32 (__mmask8 __U, __m128 __A) {
+ return (__m128i) __builtin_ia32_cvttps2udq128_mask ((__v4sf) __A,
+ (__v4si)
+ _mm_setzero_si128 (),
+ (__mmask8) __U);
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS
+_mm256_cvttps_epu32 (__m256 __A) {
+ return (__m256i) __builtin_ia32_cvttps2udq256_mask ((__v8sf) __A,
+ (__v8si)
+ _mm256_setzero_si256 (),
+ (__mmask8) -1);
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS
+_mm256_mask_cvttps_epu32 (__m256i __W, __mmask8 __U, __m256 __A) {
+ return (__m256i) __builtin_ia32_cvttps2udq256_mask ((__v8sf) __A,
+ (__v8si) __W,
+ (__mmask8) __U);
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS
+_mm256_maskz_cvttps_epu32 (__mmask8 __U, __m256 __A) {
+ return (__m256i) __builtin_ia32_cvttps2udq256_mask ((__v8sf) __A,
+ (__v8si)
+ _mm256_setzero_si256 (),
+ (__mmask8) __U);
+}
+
+static __inline__ __m128d __DEFAULT_FN_ATTRS
+_mm_cvtepu32_pd (__m128i __A) {
+ return (__m128d) __builtin_ia32_cvtudq2pd128_mask ((__v4si) __A,
+ (__v2df)
+ _mm_setzero_pd (),
+ (__mmask8) -1);
+}
+
+static __inline__ __m128d __DEFAULT_FN_ATTRS
+_mm_mask_cvtepu32_pd (__m128d __W, __mmask8 __U, __m128i __A) {
+ return (__m128d) __builtin_ia32_cvtudq2pd128_mask ((__v4si) __A,
+ (__v2df) __W,
+ (__mmask8) __U);
+}
+
+static __inline__ __m128d __DEFAULT_FN_ATTRS
+_mm_maskz_cvtepu32_pd (__mmask8 __U, __m128i __A) {
+ return (__m128d) __builtin_ia32_cvtudq2pd128_mask ((__v4si) __A,
+ (__v2df)
+ _mm_setzero_pd (),
+ (__mmask8) __U);
+}
+
+static __inline__ __m256d __DEFAULT_FN_ATTRS
+_mm256_cvtepu32_pd (__m128i __A) {
+ return (__m256d) __builtin_ia32_cvtudq2pd256_mask ((__v4si) __A,
+ (__v4df)
+ _mm256_setzero_pd (),
+ (__mmask8) -1);
+}
+
+static __inline__ __m256d __DEFAULT_FN_ATTRS
+_mm256_mask_cvtepu32_pd (__m256d __W, __mmask8 __U, __m128i __A) {
+ return (__m256d) __builtin_ia32_cvtudq2pd256_mask ((__v4si) __A,
+ (__v4df) __W,
+ (__mmask8) __U);
+}
+
+static __inline__ __m256d __DEFAULT_FN_ATTRS
+_mm256_maskz_cvtepu32_pd (__mmask8 __U, __m128i __A) {
+ return (__m256d) __builtin_ia32_cvtudq2pd256_mask ((__v4si) __A,
+ (__v4df)
+ _mm256_setzero_pd (),
+ (__mmask8) __U);
+}
+
+static __inline__ __m128 __DEFAULT_FN_ATTRS
+_mm_cvtepu32_ps (__m128i __A) {
+ return (__m128) __builtin_ia32_cvtudq2ps128_mask ((__v4si) __A,
+ (__v4sf)
+ _mm_setzero_ps (),
+ (__mmask8) -1);
+}
+
+static __inline__ __m128 __DEFAULT_FN_ATTRS
+_mm_mask_cvtepu32_ps (__m128 __W, __mmask8 __U, __m128i __A) {
+ return (__m128) __builtin_ia32_cvtudq2ps128_mask ((__v4si) __A,
+ (__v4sf) __W,
+ (__mmask8) __U);
+}
+
+static __inline__ __m128 __DEFAULT_FN_ATTRS
+_mm_maskz_cvtepu32_ps (__mmask8 __U, __m128i __A) {
+ return (__m128) __builtin_ia32_cvtudq2ps128_mask ((__v4si) __A,
+ (__v4sf)
+ _mm_setzero_ps (),
+ (__mmask8) __U);
+}
+
+static __inline__ __m256 __DEFAULT_FN_ATTRS
+_mm256_cvtepu32_ps (__m256i __A) {
+ return (__m256) __builtin_ia32_cvtudq2ps256_mask ((__v8si) __A,
+ (__v8sf)
+ _mm256_setzero_ps (),
+ (__mmask8) -1);
+}
+
+static __inline__ __m256 __DEFAULT_FN_ATTRS
+_mm256_mask_cvtepu32_ps (__m256 __W, __mmask8 __U, __m256i __A) {
+ return (__m256) __builtin_ia32_cvtudq2ps256_mask ((__v8si) __A,
+ (__v8sf) __W,
+ (__mmask8) __U);
+}
+
+static __inline__ __m256 __DEFAULT_FN_ATTRS
+_mm256_maskz_cvtepu32_ps (__mmask8 __U, __m256i __A) {
+ return (__m256) __builtin_ia32_cvtudq2ps256_mask ((__v8si) __A,
+ (__v8sf)
+ _mm256_setzero_ps (),
+ (__mmask8) __U);
+}
+
+static __inline__ __m128d __DEFAULT_FN_ATTRS
+_mm_mask_div_pd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) {
+ return (__m128d) __builtin_ia32_divpd_mask ((__v2df) __A,
+ (__v2df) __B,
+ (__v2df) __W,
+ (__mmask8) __U);
+}
+
+static __inline__ __m128d __DEFAULT_FN_ATTRS
+_mm_maskz_div_pd (__mmask8 __U, __m128d __A, __m128d __B) {
+ return (__m128d) __builtin_ia32_divpd_mask ((__v2df) __A,
+ (__v2df) __B,
+ (__v2df)
+ _mm_setzero_pd (),
+ (__mmask8) __U);
+}
+
+static __inline__ __m256d __DEFAULT_FN_ATTRS
+_mm256_mask_div_pd (__m256d __W, __mmask8 __U, __m256d __A,
+ __m256d __B) {
+ return (__m256d) __builtin_ia32_divpd256_mask ((__v4df) __A,
+ (__v4df) __B,
+ (__v4df) __W,
+ (__mmask8) __U);
+}
+
+static __inline__ __m256d __DEFAULT_FN_ATTRS
+_mm256_maskz_div_pd (__mmask8 __U, __m256d __A, __m256d __B) {
+ return (__m256d) __builtin_ia32_divpd256_mask ((__v4df) __A,
+ (__v4df) __B,
+ (__v4df)
+ _mm256_setzero_pd (),
+ (__mmask8) __U);
+}
+
+static __inline__ __m128 __DEFAULT_FN_ATTRS
+_mm_mask_div_ps (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) {
+ return (__m128) __builtin_ia32_divps_mask ((__v4sf) __A,
+ (__v4sf) __B,
+ (__v4sf) __W,
+ (__mmask8) __U);
+}
+
+static __inline__ __m128 __DEFAULT_FN_ATTRS
+_mm_maskz_div_ps (__mmask8 __U, __m128 __A, __m128 __B) {
+ return (__m128) __builtin_ia32_divps_mask ((__v4sf) __A,
+ (__v4sf) __B,
+ (__v4sf)
+ _mm_setzero_ps (),
+ (__mmask8) __U);
+}
+
+static __inline__ __m256 __DEFAULT_FN_ATTRS
+_mm256_mask_div_ps (__m256 __W, __mmask8 __U, __m256 __A, __m256 __B) {
+ return (__m256) __builtin_ia32_divps256_mask ((__v8sf) __A,
+ (__v8sf) __B,
+ (__v8sf) __W,
+ (__mmask8) __U);
+}
+
+static __inline__ __m256 __DEFAULT_FN_ATTRS
+_mm256_maskz_div_ps (__mmask8 __U, __m256 __A, __m256 __B) {
+ return (__m256) __builtin_ia32_divps256_mask ((__v8sf) __A,
+ (__v8sf) __B,
+ (__v8sf)
+ _mm256_setzero_ps (),
+ (__mmask8) __U);
+}
+
+static __inline__ __m128d __DEFAULT_FN_ATTRS
+_mm_mask_expand_pd (__m128d __W, __mmask8 __U, __m128d __A) {
+ return (__m128d) __builtin_ia32_expanddf128_mask ((__v2df) __A,
+ (__v2df) __W,
+ (__mmask8) __U);
+}
+
+static __inline__ __m128d __DEFAULT_FN_ATTRS
+_mm_maskz_expand_pd (__mmask8 __U, __m128d __A) {
+ return (__m128d) __builtin_ia32_expanddf128_mask ((__v2df) __A,
+ (__v2df)
+ _mm_setzero_pd (),
+ (__mmask8) __U);
+}
+
+static __inline__ __m256d __DEFAULT_FN_ATTRS
+_mm256_mask_expand_pd (__m256d __W, __mmask8 __U, __m256d __A) {
+ return (__m256d) __builtin_ia32_expanddf256_mask ((__v4df) __A,
+ (__v4df) __W,
+ (__mmask8) __U);
+}
+
+static __inline__ __m256d __DEFAULT_FN_ATTRS
+_mm256_maskz_expand_pd (__mmask8 __U, __m256d __A) {
+ return (__m256d) __builtin_ia32_expanddf256_mask ((__v4df) __A,
+ (__v4df)
+ _mm256_setzero_pd (),
+ (__mmask8) __U);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm_mask_expand_epi64 (__m128i __W, __mmask8 __U, __m128i __A) {
+ return (__m128i) __builtin_ia32_expanddi128_mask ((__v2di) __A,
+ (__v2di) __W,
+ (__mmask8) __U);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm_maskz_expand_epi64 (__mmask8 __U, __m128i __A) {
+ return (__m128i) __builtin_ia32_expanddi128_mask ((__v2di) __A,
+ (__v2di)
+ _mm_setzero_si128 (),
+ (__mmask8) __U);
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS
+_mm256_mask_expand_epi64 (__m256i __W, __mmask8 __U, __m256i __A) {
+ return (__m256i) __builtin_ia32_expanddi256_mask ((__v4di) __A,
+ (__v4di) __W,
+ (__mmask8) __U);
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS
+_mm256_maskz_expand_epi64 (__mmask8 __U, __m256i __A) {
+ return (__m256i) __builtin_ia32_expanddi256_mask ((__v4di) __A,
+ (__v4di)
+ _mm256_setzero_si256 (),
+ (__mmask8) __U);
+}
+
+static __inline__ __m128d __DEFAULT_FN_ATTRS
+_mm_mask_expandloadu_pd (__m128d __W, __mmask8 __U, void const *__P) {
+ return (__m128d) __builtin_ia32_expandloaddf128_mask ((__v2df *) __P,
+ (__v2df) __W,
+ (__mmask8)
+ __U);
+}
+
+static __inline__ __m128d __DEFAULT_FN_ATTRS
+_mm_maskz_expandloadu_pd (__mmask8 __U, void const *__P) {
+ return (__m128d) __builtin_ia32_expandloaddf128_mask ((__v2df *) __P,
+ (__v2df)
+ _mm_setzero_pd (),
+ (__mmask8)
+ __U);
+}
+
+static __inline__ __m256d __DEFAULT_FN_ATTRS
+_mm256_mask_expandloadu_pd (__m256d __W, __mmask8 __U, void const *__P) {
+ return (__m256d) __builtin_ia32_expandloaddf256_mask ((__v4df *) __P,
+ (__v4df) __W,
+ (__mmask8)
+ __U);
+}
+
+static __inline__ __m256d __DEFAULT_FN_ATTRS
+_mm256_maskz_expandloadu_pd (__mmask8 __U, void const *__P) {
+ return (__m256d) __builtin_ia32_expandloaddf256_mask ((__v4df *) __P,
+ (__v4df)
+ _mm256_setzero_pd (),
+ (__mmask8)
+ __U);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm_mask_expandloadu_epi64 (__m128i __W, __mmask8 __U, void const *__P) {
+ return (__m128i) __builtin_ia32_expandloaddi128_mask ((__v2di *) __P,
+ (__v2di) __W,
+ (__mmask8)
+ __U);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm_maskz_expandloadu_epi64 (__mmask8 __U, void const *__P) {
+ return (__m128i) __builtin_ia32_expandloaddi128_mask ((__v2di *) __P,
+ (__v2di)
+ _mm_setzero_si128 (),
+ (__mmask8)
+ __U);
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS
+_mm256_mask_expandloadu_epi64 (__m256i __W, __mmask8 __U,
+ void const *__P) {
+ return (__m256i) __builtin_ia32_expandloaddi256_mask ((__v4di *) __P,
+ (__v4di) __W,
+ (__mmask8)
+ __U);
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS
+_mm256_maskz_expandloadu_epi64 (__mmask8 __U, void const *__P) {
+ return (__m256i) __builtin_ia32_expandloaddi256_mask ((__v4di *) __P,
+ (__v4di)
+ _mm256_setzero_si256 (),
+ (__mmask8)
+ __U);
+}
+
+static __inline__ __m128 __DEFAULT_FN_ATTRS
+_mm_mask_expandloadu_ps (__m128 __W, __mmask8 __U, void const *__P) {
+ return (__m128) __builtin_ia32_expandloadsf128_mask ((__v4sf *) __P,
+ (__v4sf) __W,
+ (__mmask8) __U);
+}
+
+static __inline__ __m128 __DEFAULT_FN_ATTRS
+_mm_maskz_expandloadu_ps (__mmask8 __U, void const *__P) {
+ return (__m128) __builtin_ia32_expandloadsf128_mask ((__v4sf *) __P,
+ (__v4sf)
+ _mm_setzero_ps (),
+ (__mmask8)
+ __U);
+}
+
+static __inline__ __m256 __DEFAULT_FN_ATTRS
+_mm256_mask_expandloadu_ps (__m256 __W, __mmask8 __U, void const *__P) {
+ return (__m256) __builtin_ia32_expandloadsf256_mask ((__v8sf *) __P,
+ (__v8sf) __W,
+ (__mmask8) __U);
+}
+
+static __inline__ __m256 __DEFAULT_FN_ATTRS
+_mm256_maskz_expandloadu_ps (__mmask8 __U, void const *__P) {
+ return (__m256) __builtin_ia32_expandloadsf256_mask ((__v8sf *) __P,
+ (__v8sf)
+ _mm256_setzero_ps (),
+ (__mmask8)
+ __U);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm_mask_expandloadu_epi32 (__m128i __W, __mmask8 __U, void const *__P) {
+ return (__m128i) __builtin_ia32_expandloadsi128_mask ((__v4si *) __P,
+ (__v4si) __W,
+ (__mmask8)
+ __U);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm_maskz_expandloadu_epi32 (__mmask8 __U, void const *__P) {
+ return (__m128i) __builtin_ia32_expandloadsi128_mask ((__v4si *) __P,
+ (__v4si)
+ _mm_setzero_si128 (),
+ (__mmask8) __U);
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS
+_mm256_mask_expandloadu_epi32 (__m256i __W, __mmask8 __U,
+ void const *__P) {
+ return (__m256i) __builtin_ia32_expandloadsi256_mask ((__v8si *) __P,
+ (__v8si) __W,
+ (__mmask8)
+ __U);
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS
+_mm256_maskz_expandloadu_epi32 (__mmask8 __U, void const *__P) {
+ return (__m256i) __builtin_ia32_expandloadsi256_mask ((__v8si *) __P,
+ (__v8si)
+ _mm256_setzero_si256 (),
+ (__mmask8)
+ __U);
+}
+
+static __inline__ __m128 __DEFAULT_FN_ATTRS
+_mm_mask_expand_ps (__m128 __W, __mmask8 __U, __m128 __A) {
+ return (__m128) __builtin_ia32_expandsf128_mask ((__v4sf) __A,
+ (__v4sf) __W,
+ (__mmask8) __U);
+}
+
+static __inline__ __m128 __DEFAULT_FN_ATTRS
+_mm_maskz_expand_ps (__mmask8 __U, __m128 __A) {
+ return (__m128) __builtin_ia32_expandsf128_mask ((__v4sf) __A,
+ (__v4sf)
+ _mm_setzero_ps (),
+ (__mmask8) __U);
+}
+
+static __inline__ __m256 __DEFAULT_FN_ATTRS
+_mm256_mask_expand_ps (__m256 __W, __mmask8 __U, __m256 __A) {
+ return (__m256) __builtin_ia32_expandsf256_mask ((__v8sf) __A,
+ (__v8sf) __W,
+ (__mmask8) __U);
+}
+
+static __inline__ __m256 __DEFAULT_FN_ATTRS
+_mm256_maskz_expand_ps (__mmask8 __U, __m256 __A) {
+ return (__m256) __builtin_ia32_expandsf256_mask ((__v8sf) __A,
+ (__v8sf)
+ _mm256_setzero_ps (),
+ (__mmask8) __U);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm_mask_expand_epi32 (__m128i __W, __mmask8 __U, __m128i __A) {
+ return (__m128i) __builtin_ia32_expandsi128_mask ((__v4si) __A,
+ (__v4si) __W,
+ (__mmask8) __U);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm_maskz_expand_epi32 (__mmask8 __U, __m128i __A) {
+ return (__m128i) __builtin_ia32_expandsi128_mask ((__v4si) __A,
+ (__v4si)
+ _mm_setzero_si128 (),
+ (__mmask8) __U);
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS
+_mm256_mask_expand_epi32 (__m256i __W, __mmask8 __U, __m256i __A) {
+ return (__m256i) __builtin_ia32_expandsi256_mask ((__v8si) __A,
+ (__v8si) __W,
+ (__mmask8) __U);
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS
+_mm256_maskz_expand_epi32 (__mmask8 __U, __m256i __A) {
+ return (__m256i) __builtin_ia32_expandsi256_mask ((__v8si) __A,
+ (__v8si)
+ _mm256_setzero_si256 (),
+ (__mmask8) __U);
+}
+
+static __inline__ __m128d __DEFAULT_FN_ATTRS
+_mm_getexp_pd (__m128d __A) {
+ return (__m128d) __builtin_ia32_getexppd128_mask ((__v2df) __A,
+ (__v2df)
+ _mm_setzero_pd (),
+ (__mmask8) -1);
+}
+
+static __inline__ __m128d __DEFAULT_FN_ATTRS
+_mm_mask_getexp_pd (__m128d __W, __mmask8 __U, __m128d __A) {
+ return (__m128d) __builtin_ia32_getexppd128_mask ((__v2df) __A,
+ (__v2df) __W,
+ (__mmask8) __U);
+}
+
+static __inline__ __m128d __DEFAULT_FN_ATTRS
+_mm_maskz_getexp_pd (__mmask8 __U, __m128d __A) {
+ return (__m128d) __builtin_ia32_getexppd128_mask ((__v2df) __A,
+ (__v2df)
+ _mm_setzero_pd (),
+ (__mmask8) __U);
+}
+
+static __inline__ __m256d __DEFAULT_FN_ATTRS
+_mm256_getexp_pd (__m256d __A) {
+ return (__m256d) __builtin_ia32_getexppd256_mask ((__v4df) __A,
+ (__v4df)
+ _mm256_setzero_pd (),
+ (__mmask8) -1);
+}
+
+static __inline__ __m256d __DEFAULT_FN_ATTRS
+_mm256_mask_getexp_pd (__m256d __W, __mmask8 __U, __m256d __A) {
+ return (__m256d) __builtin_ia32_getexppd256_mask ((__v4df) __A,
+ (__v4df) __W,
+ (__mmask8) __U);
+}
+
+static __inline__ __m256d __DEFAULT_FN_ATTRS
+_mm256_maskz_getexp_pd (__mmask8 __U, __m256d __A) {
+ return (__m256d) __builtin_ia32_getexppd256_mask ((__v4df) __A,
+ (__v4df)
+ _mm256_setzero_pd (),
+ (__mmask8) __U);
+}
+
+static __inline__ __m128 __DEFAULT_FN_ATTRS
+_mm_getexp_ps (__m128 __A) {
+ return (__m128) __builtin_ia32_getexpps128_mask ((__v4sf) __A,
+ (__v4sf)
+ _mm_setzero_ps (),
+ (__mmask8) -1);
+}
+
+static __inline__ __m128 __DEFAULT_FN_ATTRS
+_mm_mask_getexp_ps (__m128 __W, __mmask8 __U, __m128 __A) {
+ return (__m128) __builtin_ia32_getexpps128_mask ((__v4sf) __A,
+ (__v4sf) __W,
+ (__mmask8) __U);
+}
+
+static __inline__ __m128 __DEFAULT_FN_ATTRS
+_mm_maskz_getexp_ps (__mmask8 __U, __m128 __A) {
+ return (__m128) __builtin_ia32_getexpps128_mask ((__v4sf) __A,
+ (__v4sf)
+ _mm_setzero_ps (),
+ (__mmask8) __U);
+}
+
+static __inline__ __m256 __DEFAULT_FN_ATTRS
+_mm256_getexp_ps (__m256 __A) {
+ return (__m256) __builtin_ia32_getexpps256_mask ((__v8sf) __A,
+ (__v8sf)
+ _mm256_setzero_ps (),
+ (__mmask8) -1);
+}
+
+static __inline__ __m256 __DEFAULT_FN_ATTRS
+_mm256_mask_getexp_ps (__m256 __W, __mmask8 __U, __m256 __A) {
+ return (__m256) __builtin_ia32_getexpps256_mask ((__v8sf) __A,
+ (__v8sf) __W,
+ (__mmask8) __U);
+}
+
+static __inline__ __m256 __DEFAULT_FN_ATTRS
+_mm256_maskz_getexp_ps (__mmask8 __U, __m256 __A) {
+ return (__m256) __builtin_ia32_getexpps256_mask ((__v8sf) __A,
+ (__v8sf)
+ _mm256_setzero_ps (),
+ (__mmask8) __U);
+}
+
+static __inline__ __m128d __DEFAULT_FN_ATTRS
+_mm_mask_max_pd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) {
+ return (__m128d) __builtin_ia32_maxpd_mask ((__v2df) __A,
+ (__v2df) __B,
+ (__v2df) __W,
+ (__mmask8) __U);
+}
+
+static __inline__ __m128d __DEFAULT_FN_ATTRS
+_mm_maskz_max_pd (__mmask8 __U, __m128d __A, __m128d __B) {
+ return (__m128d) __builtin_ia32_maxpd_mask ((__v2df) __A,
+ (__v2df) __B,
+ (__v2df)
+ _mm_setzero_pd (),
+ (__mmask8) __U);
+}
+
+static __inline__ __m256d __DEFAULT_FN_ATTRS
+_mm256_mask_max_pd (__m256d __W, __mmask8 __U, __m256d __A,
+ __m256d __B) {
+ return (__m256d) __builtin_ia32_maxpd256_mask ((__v4df) __A,
+ (__v4df) __B,
+ (__v4df) __W,
+ (__mmask8) __U);
+}
+
+static __inline__ __m256d __DEFAULT_FN_ATTRS
+_mm256_maskz_max_pd (__mmask8 __U, __m256d __A, __m256d __B) {
+ return (__m256d) __builtin_ia32_maxpd256_mask ((__v4df) __A,
+ (__v4df) __B,
+ (__v4df)
+ _mm256_setzero_pd (),
+ (__mmask8) __U);
+}
+
+static __inline__ __m128 __DEFAULT_FN_ATTRS
+_mm_mask_max_ps (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) {
+ return (__m128) __builtin_ia32_maxps_mask ((__v4sf) __A,
+ (__v4sf) __B,
+ (__v4sf) __W,
+ (__mmask8) __U);
+}
+
+static __inline__ __m128 __DEFAULT_FN_ATTRS
+_mm_maskz_max_ps (__mmask8 __U, __m128 __A, __m128 __B) {
+ return (__m128) __builtin_ia32_maxps_mask ((__v4sf) __A,
+ (__v4sf) __B,
+ (__v4sf)
+ _mm_setzero_ps (),
+ (__mmask8) __U);
+}
+
+static __inline__ __m256 __DEFAULT_FN_ATTRS
+_mm256_mask_max_ps (__m256 __W, __mmask8 __U, __m256 __A, __m256 __B) {
+ return (__m256) __builtin_ia32_maxps256_mask ((__v8sf) __A,
+ (__v8sf) __B,
+ (__v8sf) __W,
+ (__mmask8) __U);
+}
+
+static __inline__ __m256 __DEFAULT_FN_ATTRS
+_mm256_maskz_max_ps (__mmask8 __U, __m256 __A, __m256 __B) {
+ return (__m256) __builtin_ia32_maxps256_mask ((__v8sf) __A,
+ (__v8sf) __B,
+ (__v8sf)
+ _mm256_setzero_ps (),
+ (__mmask8) __U);
+}
+
+static __inline__ __m128d __DEFAULT_FN_ATTRS
+_mm_mask_min_pd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) {
+ return (__m128d) __builtin_ia32_minpd_mask ((__v2df) __A,
+ (__v2df) __B,
+ (__v2df) __W,
+ (__mmask8) __U);
+}
+
+static __inline__ __m128d __DEFAULT_FN_ATTRS
+_mm_maskz_min_pd (__mmask8 __U, __m128d __A, __m128d __B) {
+ return (__m128d) __builtin_ia32_minpd_mask ((__v2df) __A,
+ (__v2df) __B,
+ (__v2df)
+ _mm_setzero_pd (),
+ (__mmask8) __U);
+}
+
+static __inline__ __m256d __DEFAULT_FN_ATTRS
+_mm256_mask_min_pd (__m256d __W, __mmask8 __U, __m256d __A,
+ __m256d __B) {
+ return (__m256d) __builtin_ia32_minpd256_mask ((__v4df) __A,
+ (__v4df) __B,
+ (__v4df) __W,
+ (__mmask8) __U);
+}
+
+static __inline__ __m256d __DEFAULT_FN_ATTRS
+_mm256_maskz_min_pd (__mmask8 __U, __m256d __A, __m256d __B) {
+ return (__m256d) __builtin_ia32_minpd256_mask ((__v4df) __A,
+ (__v4df) __B,
+ (__v4df)
+ _mm256_setzero_pd (),
+ (__mmask8) __U);
+}
+
+static __inline__ __m128 __DEFAULT_FN_ATTRS
+_mm_mask_min_ps (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) {
+ return (__m128) __builtin_ia32_minps_mask ((__v4sf) __A,
+ (__v4sf) __B,
+ (__v4sf) __W,
+ (__mmask8) __U);
+}
+
+static __inline__ __m128 __DEFAULT_FN_ATTRS
+_mm_maskz_min_ps (__mmask8 __U, __m128 __A, __m128 __B) {
+ return (__m128) __builtin_ia32_minps_mask ((__v4sf) __A,
+ (__v4sf) __B,
+ (__v4sf)
+ _mm_setzero_ps (),
+ (__mmask8) __U);
+}
+
+static __inline__ __m256 __DEFAULT_FN_ATTRS
+_mm256_mask_min_ps (__m256 __W, __mmask8 __U, __m256 __A, __m256 __B) {
+ return (__m256) __builtin_ia32_minps256_mask ((__v8sf) __A,
+ (__v8sf) __B,
+ (__v8sf) __W,
+ (__mmask8) __U);
+}
+
+static __inline__ __m256 __DEFAULT_FN_ATTRS
+_mm256_maskz_min_ps (__mmask8 __U, __m256 __A, __m256 __B) {
+ return (__m256) __builtin_ia32_minps256_mask ((__v8sf) __A,
+ (__v8sf) __B,
+ (__v8sf)
+ _mm256_setzero_ps (),
+ (__mmask8) __U);
+}
+
+static __inline__ __m128d __DEFAULT_FN_ATTRS
+_mm_mask_mul_pd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) {
+ return (__m128d) __builtin_ia32_mulpd_mask ((__v2df) __A,
+ (__v2df) __B,
+ (__v2df) __W,
+ (__mmask8) __U);
+}
+
+static __inline__ __m128d __DEFAULT_FN_ATTRS
+_mm_maskz_mul_pd (__mmask8 __U, __m128d __A, __m128d __B) {
+ return (__m128d) __builtin_ia32_mulpd_mask ((__v2df) __A,
+ (__v2df) __B,
+ (__v2df)
+ _mm_setzero_pd (),
+ (__mmask8) __U);
+}
+
+static __inline__ __m256d __DEFAULT_FN_ATTRS
+_mm256_mask_mul_pd (__m256d __W, __mmask8 __U, __m256d __A,
+ __m256d __B) {
+ return (__m256d) __builtin_ia32_mulpd256_mask ((__v4df) __A,
+ (__v4df) __B,
+ (__v4df) __W,
+ (__mmask8) __U);
+}
+
+static __inline__ __m256d __DEFAULT_FN_ATTRS
+_mm256_maskz_mul_pd (__mmask8 __U, __m256d __A, __m256d __B) {
+ return (__m256d) __builtin_ia32_mulpd256_mask ((__v4df) __A,
+ (__v4df) __B,
+ (__v4df)
+ _mm256_setzero_pd (),
+ (__mmask8) __U);
+}
+
+static __inline__ __m128 __DEFAULT_FN_ATTRS
+_mm_mask_mul_ps (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) {
+ return (__m128) __builtin_ia32_mulps_mask ((__v4sf) __A,
+ (__v4sf) __B,
+ (__v4sf) __W,
+ (__mmask8) __U);
+}
+
+static __inline__ __m128 __DEFAULT_FN_ATTRS
+_mm_maskz_mul_ps (__mmask8 __U, __m128 __A, __m128 __B) {
+ return (__m128) __builtin_ia32_mulps_mask ((__v4sf) __A,
+ (__v4sf) __B,
+ (__v4sf)
+ _mm_setzero_ps (),
+ (__mmask8) __U);
+}
+
+static __inline__ __m256 __DEFAULT_FN_ATTRS
+_mm256_mask_mul_ps (__m256 __W, __mmask8 __U, __m256 __A, __m256 __B) {
+ return (__m256) __builtin_ia32_mulps256_mask ((__v8sf) __A,
+ (__v8sf) __B,
+ (__v8sf) __W,
+ (__mmask8) __U);
+}
+
+static __inline__ __m256 __DEFAULT_FN_ATTRS
+_mm256_maskz_mul_ps (__mmask8 __U, __m256 __A, __m256 __B) {
+ return (__m256) __builtin_ia32_mulps256_mask ((__v8sf) __A,
+ (__v8sf) __B,
+ (__v8sf)
+ _mm256_setzero_ps (),
+ (__mmask8) __U);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm_mask_abs_epi32 (__m128i __W, __mmask8 __U, __m128i __A) {
+ return (__m128i) __builtin_ia32_pabsd128_mask ((__v4si) __A,
+ (__v4si) __W,
+ (__mmask8) __U);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm_maskz_abs_epi32 (__mmask8 __U, __m128i __A) {
+ return (__m128i) __builtin_ia32_pabsd128_mask ((__v4si) __A,
+ (__v4si)
+ _mm_setzero_si128 (),
+ (__mmask8) __U);
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS
+_mm256_mask_abs_epi32 (__m256i __W, __mmask8 __U, __m256i __A) {
+ return (__m256i) __builtin_ia32_pabsd256_mask ((__v8si) __A,
+ (__v8si) __W,
+ (__mmask8) __U);
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS
+_mm256_maskz_abs_epi32 (__mmask8 __U, __m256i __A) {
+ return (__m256i) __builtin_ia32_pabsd256_mask ((__v8si) __A,
+ (__v8si)
+ _mm256_setzero_si256 (),
+ (__mmask8) __U);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm_abs_epi64 (__m128i __A) {
+ return (__m128i) __builtin_ia32_pabsq128_mask ((__v2di) __A,
+ (__v2di)
+ _mm_setzero_si128 (),
+ (__mmask8) -1);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm_mask_abs_epi64 (__m128i __W, __mmask8 __U, __m128i __A) {
+ return (__m128i) __builtin_ia32_pabsq128_mask ((__v2di) __A,
+ (__v2di) __W,
+ (__mmask8) __U);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm_maskz_abs_epi64 (__mmask8 __U, __m128i __A) {
+ return (__m128i) __builtin_ia32_pabsq128_mask ((__v2di) __A,
+ (__v2di)
+ _mm_setzero_si128 (),
+ (__mmask8) __U);
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS
+_mm256_abs_epi64 (__m256i __A) {
+ return (__m256i) __builtin_ia32_pabsq256_mask ((__v4di) __A,
+ (__v4di)
+ _mm256_setzero_si256 (),
+ (__mmask8) -1);
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS
+_mm256_mask_abs_epi64 (__m256i __W, __mmask8 __U, __m256i __A) {
+ return (__m256i) __builtin_ia32_pabsq256_mask ((__v4di) __A,
+ (__v4di) __W,
+ (__mmask8) __U);
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS
+_mm256_maskz_abs_epi64 (__mmask8 __U, __m256i __A) {
+ return (__m256i) __builtin_ia32_pabsq256_mask ((__v4di) __A,
+ (__v4di)
+ _mm256_setzero_si256 (),
+ (__mmask8) __U);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm_maskz_max_epi32 (__mmask8 __M, __m128i __A, __m128i __B) {
+ return (__m128i) __builtin_ia32_pmaxsd128_mask ((__v4si) __A,
+ (__v4si) __B,
+ (__v4si)
+ _mm_setzero_si128 (),
+ __M);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm_mask_max_epi32 (__m128i __W, __mmask8 __M, __m128i __A,
+ __m128i __B) {
+ return (__m128i) __builtin_ia32_pmaxsd128_mask ((__v4si) __A,
+ (__v4si) __B,
+ (__v4si) __W, __M);
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS
+_mm256_maskz_max_epi32 (__mmask8 __M, __m256i __A, __m256i __B) {
+ return (__m256i) __builtin_ia32_pmaxsd256_mask ((__v8si) __A,
+ (__v8si) __B,
+ (__v8si)
+ _mm256_setzero_si256 (),
+ __M);
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS
+_mm256_mask_max_epi32 (__m256i __W, __mmask8 __M, __m256i __A,
+ __m256i __B) {
+ return (__m256i) __builtin_ia32_pmaxsd256_mask ((__v8si) __A,
+ (__v8si) __B,
+ (__v8si) __W, __M);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm_maskz_max_epi64 (__mmask8 __M, __m128i __A, __m128i __B) {
+ return (__m128i) __builtin_ia32_pmaxsq128_mask ((__v2di) __A,
+ (__v2di) __B,
+ (__v2di)
+ _mm_setzero_si128 (),
+ __M);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm_mask_max_epi64 (__m128i __W, __mmask8 __M, __m128i __A,
+ __m128i __B) {
+ return (__m128i) __builtin_ia32_pmaxsq128_mask ((__v2di) __A,
+ (__v2di) __B,
+ (__v2di) __W, __M);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm_max_epi64 (__m128i __A, __m128i __B) {
+ return (__m128i) __builtin_ia32_pmaxsq128_mask ((__v2di) __A,
+ (__v2di) __B,
+ (__v2di)
+ _mm_setzero_si128 (),
+ (__mmask8) -1);
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS
+_mm256_maskz_max_epi64 (__mmask8 __M, __m256i __A, __m256i __B) {
+ return (__m256i) __builtin_ia32_pmaxsq256_mask ((__v4di) __A,
+ (__v4di) __B,
+ (__v4di)
+ _mm256_setzero_si256 (),
+ __M);
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS
+_mm256_mask_max_epi64 (__m256i __W, __mmask8 __M, __m256i __A,
+ __m256i __B) {
+ return (__m256i) __builtin_ia32_pmaxsq256_mask ((__v4di) __A,
+ (__v4di) __B,
+ (__v4di) __W, __M);
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS
+_mm256_max_epi64 (__m256i __A, __m256i __B) {
+ return (__m256i) __builtin_ia32_pmaxsq256_mask ((__v4di) __A,
+ (__v4di) __B,
+ (__v4di)
+ _mm256_setzero_si256 (),
+ (__mmask8) -1);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm_maskz_max_epu32 (__mmask8 __M, __m128i __A, __m128i __B) {
+ return (__m128i) __builtin_ia32_pmaxud128_mask ((__v4si) __A,
+ (__v4si) __B,
+ (__v4si)
+ _mm_setzero_si128 (),
+ __M);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm_mask_max_epu32 (__m128i __W, __mmask8 __M, __m128i __A,
+ __m128i __B) {
+ return (__m128i) __builtin_ia32_pmaxud128_mask ((__v4si) __A,
+ (__v4si) __B,
+ (__v4si) __W, __M);
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS
+_mm256_maskz_max_epu32 (__mmask8 __M, __m256i __A, __m256i __B) {
+ return (__m256i) __builtin_ia32_pmaxud256_mask ((__v8si) __A,
+ (__v8si) __B,
+ (__v8si)
+ _mm256_setzero_si256 (),
+ __M);
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS
+_mm256_mask_max_epu32 (__m256i __W, __mmask8 __M, __m256i __A,
+ __m256i __B) {
+ return (__m256i) __builtin_ia32_pmaxud256_mask ((__v8si) __A,
+ (__v8si) __B,
+ (__v8si) __W, __M);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm_maskz_max_epu64 (__mmask8 __M, __m128i __A, __m128i __B) {
+ return (__m128i) __builtin_ia32_pmaxuq128_mask ((__v2di) __A,
+ (__v2di) __B,
+ (__v2di)
+ _mm_setzero_si128 (),
+ __M);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm_max_epu64 (__m128i __A, __m128i __B) {
+ return (__m128i) __builtin_ia32_pmaxuq128_mask ((__v2di) __A,
+ (__v2di) __B,
+ (__v2di)
+ _mm_setzero_si128 (),
+ (__mmask8) -1);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm_mask_max_epu64 (__m128i __W, __mmask8 __M, __m128i __A,
+ __m128i __B) {
+ return (__m128i) __builtin_ia32_pmaxuq128_mask ((__v2di) __A,
+ (__v2di) __B,
+ (__v2di) __W, __M);
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS
+_mm256_maskz_max_epu64 (__mmask8 __M, __m256i __A, __m256i __B) {
+ return (__m256i) __builtin_ia32_pmaxuq256_mask ((__v4di) __A,
+ (__v4di) __B,
+ (__v4di)
+ _mm256_setzero_si256 (),
+ __M);
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS
+_mm256_max_epu64 (__m256i __A, __m256i __B) {
+ return (__m256i) __builtin_ia32_pmaxuq256_mask ((__v4di) __A,
+ (__v4di) __B,
+ (__v4di)
+ _mm256_setzero_si256 (),
+ (__mmask8) -1);
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS
+_mm256_mask_max_epu64 (__m256i __W, __mmask8 __M, __m256i __A,
+ __m256i __B) {
+ return (__m256i) __builtin_ia32_pmaxuq256_mask ((__v4di) __A,
+ (__v4di) __B,
+ (__v4di) __W, __M);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm_maskz_min_epi32 (__mmask8 __M, __m128i __A, __m128i __B) {
+ return (__m128i) __builtin_ia32_pminsd128_mask ((__v4si) __A,
+ (__v4si) __B,
+ (__v4si)
+ _mm_setzero_si128 (),
+ __M);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm_mask_min_epi32 (__m128i __W, __mmask8 __M, __m128i __A,
+ __m128i __B) {
+ return (__m128i) __builtin_ia32_pminsd128_mask ((__v4si) __A,
+ (__v4si) __B,
+ (__v4si) __W, __M);
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS
+_mm256_maskz_min_epi32 (__mmask8 __M, __m256i __A, __m256i __B) {
+ return (__m256i) __builtin_ia32_pminsd256_mask ((__v8si) __A,
+ (__v8si) __B,
+ (__v8si)
+ _mm256_setzero_si256 (),
+ __M);
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS
+_mm256_mask_min_epi32 (__m256i __W, __mmask8 __M, __m256i __A,
+ __m256i __B) {
+ return (__m256i) __builtin_ia32_pminsd256_mask ((__v8si) __A,
+ (__v8si) __B,
+ (__v8si) __W, __M);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm_min_epi64 (__m128i __A, __m128i __B) {
+ return (__m128i) __builtin_ia32_pminsq128_mask ((__v2di) __A,
+ (__v2di) __B,
+ (__v2di)
+ _mm_setzero_si128 (),
+ (__mmask8) -1);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm_mask_min_epi64 (__m128i __W, __mmask8 __M, __m128i __A,
+ __m128i __B) {
+ return (__m128i) __builtin_ia32_pminsq128_mask ((__v2di) __A,
+ (__v2di) __B,
+ (__v2di) __W, __M);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm_maskz_min_epi64 (__mmask8 __M, __m128i __A, __m128i __B) {
+ return (__m128i) __builtin_ia32_pminsq128_mask ((__v2di) __A,
+ (__v2di) __B,
+ (__v2di)
+ _mm_setzero_si128 (),
+ __M);
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS
+_mm256_min_epi64 (__m256i __A, __m256i __B) {
+ return (__m256i) __builtin_ia32_pminsq256_mask ((__v4di) __A,
+ (__v4di) __B,
+ (__v4di)
+ _mm256_setzero_si256 (),
+ (__mmask8) -1);
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS
+_mm256_mask_min_epi64 (__m256i __W, __mmask8 __M, __m256i __A,
+ __m256i __B) {
+ return (__m256i) __builtin_ia32_pminsq256_mask ((__v4di) __A,
+ (__v4di) __B,
+ (__v4di) __W, __M);
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS
+_mm256_maskz_min_epi64 (__mmask8 __M, __m256i __A, __m256i __B) {
+ return (__m256i) __builtin_ia32_pminsq256_mask ((__v4di) __A,
+ (__v4di) __B,
+ (__v4di)
+ _mm256_setzero_si256 (),
+ __M);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm_maskz_min_epu32 (__mmask8 __M, __m128i __A, __m128i __B) {
+ return (__m128i) __builtin_ia32_pminud128_mask ((__v4si) __A,
+ (__v4si) __B,
+ (__v4si)
+ _mm_setzero_si128 (),
+ __M);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm_mask_min_epu32 (__m128i __W, __mmask8 __M, __m128i __A,
+ __m128i __B) {
+ return (__m128i) __builtin_ia32_pminud128_mask ((__v4si) __A,
+ (__v4si) __B,
+ (__v4si) __W, __M);
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS
+_mm256_maskz_min_epu32 (__mmask8 __M, __m256i __A, __m256i __B) {
+ return (__m256i) __builtin_ia32_pminud256_mask ((__v8si) __A,
+ (__v8si) __B,
+ (__v8si)
+ _mm256_setzero_si256 (),
+ __M);
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS
+_mm256_mask_min_epu32 (__m256i __W, __mmask8 __M, __m256i __A,
+ __m256i __B) {
+ return (__m256i) __builtin_ia32_pminud256_mask ((__v8si) __A,
+ (__v8si) __B,
+ (__v8si) __W, __M);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm_min_epu64 (__m128i __A, __m128i __B) {
+ return (__m128i) __builtin_ia32_pminuq128_mask ((__v2di) __A,
+ (__v2di) __B,
+ (__v2di)
+ _mm_setzero_si128 (),
+ (__mmask8) -1);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm_mask_min_epu64 (__m128i __W, __mmask8 __M, __m128i __A,
+ __m128i __B) {
+ return (__m128i) __builtin_ia32_pminuq128_mask ((__v2di) __A,
+ (__v2di) __B,
+ (__v2di) __W, __M);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm_maskz_min_epu64 (__mmask8 __M, __m128i __A, __m128i __B) {
+ return (__m128i) __builtin_ia32_pminuq128_mask ((__v2di) __A,
+ (__v2di) __B,
+ (__v2di)
+ _mm_setzero_si128 (),
+ __M);
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS
+_mm256_min_epu64 (__m256i __A, __m256i __B) {
+ return (__m256i) __builtin_ia32_pminuq256_mask ((__v4di) __A,
+ (__v4di) __B,
+ (__v4di)
+ _mm256_setzero_si256 (),
+ (__mmask8) -1);
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS
+_mm256_mask_min_epu64 (__m256i __W, __mmask8 __M, __m256i __A,
+ __m256i __B) {
+ return (__m256i) __builtin_ia32_pminuq256_mask ((__v4di) __A,
+ (__v4di) __B,
+ (__v4di) __W, __M);
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS
+_mm256_maskz_min_epu64 (__mmask8 __M, __m256i __A, __m256i __B) {
+ return (__m256i) __builtin_ia32_pminuq256_mask ((__v4di) __A,
+ (__v4di) __B,
+ (__v4di)
+ _mm256_setzero_si256 (),
+ __M);
+}
+
+#define _mm_roundscale_pd(__A, __imm) __extension__ ({ \
+ (__m128d) __builtin_ia32_rndscalepd_128_mask ((__v2df) __A, \
+ __imm, (__v2df) _mm_setzero_pd (), (__mmask8) -1); })
+
+
+#define _mm_mask_roundscale_pd(__W, __U, __A, __imm) __extension__ ({ \
+ (__m128d) __builtin_ia32_rndscalepd_128_mask ((__v2df) __A, __imm, \
+ (__v2df) __W, (__mmask8) __U); })
+
+
+#define _mm_maskz_roundscale_pd(__U, __A, __imm) __extension__ ({ \
+ (__m128d) __builtin_ia32_rndscalepd_128_mask ((__v2df) __A, __imm, \
+ (__v2df) _mm_setzero_pd (), (__mmask8) __U); })
+
+
+#define _mm256_roundscale_pd(__A, __imm) __extension__ ({ \
+ (__m256d) __builtin_ia32_rndscalepd_256_mask ((__v4df) __A, __imm, \
+ (__v4df) _mm256_setzero_pd (), (__mmask8) -1); })
+
+
+#define _mm256_mask_roundscale_pd(__W, __U, __A, __imm) __extension__ ({ \
+ (__m256d) __builtin_ia32_rndscalepd_256_mask ((__v4df) __A, __imm, \
+ (__v4df) __W, (__mmask8) __U); })
+
+
+#define _mm256_maskz_roundscale_pd(__U, __A, __imm) __extension__ ({ \
+ (__m256d) __builtin_ia32_rndscalepd_256_mask ((__v4df) __A, __imm, \
+ (__v4df) _mm256_setzero_pd(), (__mmask8) __U); })
+
+#define _mm_roundscale_ps(__A, __imm) __extension__ ({ \
+ (__m128) __builtin_ia32_rndscaleps_128_mask ((__v4sf) __A, __imm, \
+ (__v4sf) _mm_setzero_ps(), (__mmask8) -1); })
+
+
+#define _mm_mask_roundscale_ps(__W, __U, __A, __imm) __extension__ ({ \
+ (__m128) __builtin_ia32_rndscaleps_128_mask ((__v4sf) __A, __imm, \
+ (__v4sf) __W, (__mmask8) __U); })
+
+
+#define _mm_maskz_roundscale_ps(__U, __A, __imm) __extension__ ({ \
+ (__m128) __builtin_ia32_rndscaleps_128_mask ((__v4sf) __A, __imm, \
+ (__v4sf) _mm_setzero_ps(), (__mmask8) __U); })
+
+#define _mm256_roundscale_ps(__A, __imm) __extension__ ({ \
+ (__m256) __builtin_ia32_rndscaleps_256_mask ((__v8sf) __A,__imm, \
+ (__v8sf) _mm256_setzero_ps(), (__mmask8) -1); })
+
+#define _mm256_mask_roundscale_ps(__W, __U, __A,__imm) __extension__ ({ \
+ (__m256) __builtin_ia32_rndscaleps_256_mask ((__v8sf) __A, __imm, \
+ (__v8sf) __W, (__mmask8) __U); })
+
+
+#define _mm256_maskz_roundscale_ps(__U, __A, __imm) __extension__ ({ \
+ (__m256) __builtin_ia32_rndscaleps_256_mask ((__v8sf) __A, __imm, \
+ (__v8sf) _mm256_setzero_ps(), (__mmask8) __U); })
+
+static __inline__ __m128d __DEFAULT_FN_ATTRS
+_mm_scalef_pd (__m128d __A, __m128d __B) {
+ return (__m128d) __builtin_ia32_scalefpd128_mask ((__v2df) __A,
+ (__v2df) __B,
+ (__v2df)
+ _mm_setzero_pd (),
+ (__mmask8) -1);
+}
+
+static __inline__ __m128d __DEFAULT_FN_ATTRS
+_mm_mask_scalef_pd (__m128d __W, __mmask8 __U, __m128d __A,
+ __m128d __B) {
+ return (__m128d) __builtin_ia32_scalefpd128_mask ((__v2df) __A,
+ (__v2df) __B,
+ (__v2df) __W,
+ (__mmask8) __U);
+}
+
+static __inline__ __m128d __DEFAULT_FN_ATTRS
+_mm_maskz_scalef_pd (__mmask8 __U, __m128d __A, __m128d __B) {
+ return (__m128d) __builtin_ia32_scalefpd128_mask ((__v2df) __A,
+ (__v2df) __B,
+ (__v2df)
+ _mm_setzero_pd (),
+ (__mmask8) __U);
+}
+
+static __inline__ __m256d __DEFAULT_FN_ATTRS
+_mm256_scalef_pd (__m256d __A, __m256d __B) {
+ return (__m256d) __builtin_ia32_scalefpd256_mask ((__v4df) __A,
+ (__v4df) __B,
+ (__v4df)
+ _mm256_setzero_pd (),
+ (__mmask8) -1);
+}
+
+static __inline__ __m256d __DEFAULT_FN_ATTRS
+_mm256_mask_scalef_pd (__m256d __W, __mmask8 __U, __m256d __A,
+ __m256d __B) {
+ return (__m256d) __builtin_ia32_scalefpd256_mask ((__v4df) __A,
+ (__v4df) __B,
+ (__v4df) __W,
+ (__mmask8) __U);
+}
+
+static __inline__ __m256d __DEFAULT_FN_ATTRS
+_mm256_maskz_scalef_pd (__mmask8 __U, __m256d __A, __m256d __B) {
+ return (__m256d) __builtin_ia32_scalefpd256_mask ((__v4df) __A,
+ (__v4df) __B,
+ (__v4df)
+ _mm256_setzero_pd (),
+ (__mmask8) __U);
+}
+
+static __inline__ __m128 __DEFAULT_FN_ATTRS
+_mm_scalef_ps (__m128 __A, __m128 __B) {
+ return (__m128) __builtin_ia32_scalefps128_mask ((__v4sf) __A,
+ (__v4sf) __B,
+ (__v4sf)
+ _mm_setzero_ps (),
+ (__mmask8) -1);
+}
+
+static __inline__ __m128 __DEFAULT_FN_ATTRS
+_mm_mask_scalef_ps (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) {
+ return (__m128) __builtin_ia32_scalefps128_mask ((__v4sf) __A,
+ (__v4sf) __B,
+ (__v4sf) __W,
+ (__mmask8) __U);
+}
+
+static __inline__ __m128 __DEFAULT_FN_ATTRS
+_mm_maskz_scalef_ps (__mmask8 __U, __m128 __A, __m128 __B) {
+ return (__m128) __builtin_ia32_scalefps128_mask ((__v4sf) __A,
+ (__v4sf) __B,
+ (__v4sf)
+ _mm_setzero_ps (),
+ (__mmask8) __U);
+}
+
+static __inline__ __m256 __DEFAULT_FN_ATTRS
+_mm256_scalef_ps (__m256 __A, __m256 __B) {
+ return (__m256) __builtin_ia32_scalefps256_mask ((__v8sf) __A,
+ (__v8sf) __B,
+ (__v8sf)
+ _mm256_setzero_ps (),
+ (__mmask8) -1);
+}
+
+static __inline__ __m256 __DEFAULT_FN_ATTRS
+_mm256_mask_scalef_ps (__m256 __W, __mmask8 __U, __m256 __A,
+ __m256 __B) {
+ return (__m256) __builtin_ia32_scalefps256_mask ((__v8sf) __A,
+ (__v8sf) __B,
+ (__v8sf) __W,
+ (__mmask8) __U);
+}
+
+static __inline__ __m256 __DEFAULT_FN_ATTRS
+_mm256_maskz_scalef_ps (__mmask8 __U, __m256 __A, __m256 __B) {
+ return (__m256) __builtin_ia32_scalefps256_mask ((__v8sf) __A,
+ (__v8sf) __B,
+ (__v8sf)
+ _mm256_setzero_ps (),
+ (__mmask8) __U);
+}
+
+#define _mm_i64scatter_pd(__addr,__index, __v1, __scale) __extension__ ({ \
+ __builtin_ia32_scatterdiv2df(__addr, (__mmask8) 0xFF, (__v2di) __index, \
+ (__v2df) __v1, __scale); })
+
+#define _mm_mask_i64scatter_pd(__addr, __mask, __index, __v1, \
+ __scale) __extension__ ({ \
+ __builtin_ia32_scatterdiv2df (__addr, __mask, (__v2di) __index, \
+ (__v2df) __v1, __scale); })
+
+
+#define _mm_i64scatter_epi64(__addr, __index, __v1, __scale) __extension__ ({ \
+ __builtin_ia32_scatterdiv2di (__addr, (__mmask8) 0xFF, \
+ (__v2di) __index, (__v2di) __v1, __scale); })
+
+#define _mm_mask_i64scatter_epi64(__addr, __mask, __index, __v1,\
+ __scale) __extension__ ({ \
+ __builtin_ia32_scatterdiv2di (__addr, __mask, (__v2di) __index,\
+ (__v2di) __v1, __scale); })
+
+#define _mm256_i64scatter_pd(__addr, __index, __v1, __scale) __extension__ ({ \
+ __builtin_ia32_scatterdiv4df (__addr, (__mmask8) 0xFF,\
+ (__v4di) __index, (__v4df) __v1, __scale); })
+
+#define _mm256_mask_i64scatter_pd(__addr, __mask, __index, __v1,\
+ __scale) __extension__ ({ \
+ __builtin_ia32_scatterdiv4df (__addr, __mask, (__v4di) __index,\
+ (__v4df) __v1, __scale); })
+
+#define _mm256_i64scatter_epi64(__addr, __index, __v1, __scale) __extension__ ({ \
+ __builtin_ia32_scatterdiv4di (__addr, (__mmask8) 0xFF, (__v4di) __index,\
+ (__v4di) __v1, __scale); })
+
+#define _mm256_mask_i64scatter_epi64(__addr, __mask, __index, __v1,\
+ __scale) __extension__ ({ \
+ __builtin_ia32_scatterdiv4di (__addr, __mask, (__v4di) __index,\
+ (__v4di) __v1, __scale); })
+
+#define _mm_i64scatter_ps(__addr, __index, __v1, __scale) __extension__ ({ \
+ __builtin_ia32_scatterdiv4sf (__addr, (__mmask8) 0xFF,\
+ (__v2di) __index, (__v4sf) __v1, __scale); })
+
+#define _mm_mask_i64scatter_ps(__addr, __mask, __index, __v1, \
+ __scale) __extension__ ({ \
+ __builtin_ia32_scatterdiv4sf (__addr, __mask, (__v2di) __index,\
+ (__v4sf) __v1, __scale); })
+
+#define _mm_i64scatter_epi32(__addr, __index, __v1, \
+ __scale) __extension__ ({ \
+ __builtin_ia32_scatterdiv4si (__addr, (__mmask8) 0xFF,\
+ (__v2di) __index, (__v4si) __v1, __scale); })
+
+#define _mm_mask_i64scatter_epi32(__addr, __mask, __index, __v1,\
+ __scale) __extension__ ({ \
+ __builtin_ia32_scatterdiv4si (__addr, __mask, (__v2di) __index,\
+ (__v4si) __v1, __scale); })
+
+#define _mm256_i64scatter_ps(__addr, __index, __v1, __scale) __extension__ ({ \
+ __builtin_ia32_scatterdiv8sf (__addr, (__mmask8) 0xFF, (__v4di) __index, \
+ (__v4sf) __v1, __scale); })
+
+#define _mm256_mask_i64scatter_ps(__addr, __mask, __index, __v1, \
+ __scale) __extension__ ({ \
+ __builtin_ia32_scatterdiv8sf (__addr, __mask, (__v4di) __index, \
+ (__v4sf) __v1, __scale); })
+
+#define _mm256_i64scatter_epi32(__addr, __index, __v1, __scale) __extension__ ({ \
+ __builtin_ia32_scatterdiv8si (__addr, (__mmask8) 0xFF, \
+ (__v4di) __index, (__v4si) __v1, __scale); })
+
+#define _mm256_mask_i64scatter_epi32(__addr, __mask, __index, __v1, \
+ __scale) __extension__ ({ \
+ __builtin_ia32_scatterdiv8si(__addr, __mask, (__v4di) __index, \
+ (__v4si) __v1, __scale); })
+
+#define _mm_i32scatter_pd(__addr, __index, __v1, \
+ __scale) __extension__ ({ \
+ __builtin_ia32_scattersiv2df (__addr, (__mmask8) 0xFF, \
+ (__v4si) __index, (__v2df) __v1, __scale); })
+
+#define _mm_mask_i32scatter_pd(__addr, __mask, __index, __v1, \
+ __scale) __extension__ ({ \
+ __builtin_ia32_scattersiv2df (__addr, __mask, (__v4si) __index,\
+ (__v2df) __v1, __scale); })
+
+#define _mm_i32scatter_epi64(__addr, __index, __v1, __scale) __extension__ ({ \
+ __builtin_ia32_scattersiv2di (__addr, (__mmask8) 0xFF, \
+ (__v4si) __index, (__v2di) __v1, __scale); })
+
+#define _mm_mask_i32scatter_epi64(__addr, __mask, __index, __v1, \
+ __scale) __extension__ ({ \
+ __builtin_ia32_scattersiv2di (__addr, __mask, (__v4si) __index, \
+ (__v2di) __v1, __scale); })
+
+#define _mm256_i32scatter_pd(__addr, __index, __v1, __scale) __extension__ ({ \
+ __builtin_ia32_scattersiv4df (__addr, (__mmask8) 0xFF, \
+ (__v4si) __index, (__v4df) __v1, __scale); })
+
+#define _mm256_mask_i32scatter_pd(__addr, __mask, __index, __v1, \
+ __scale) __extension__ ({ \
+ __builtin_ia32_scattersiv4df (__addr, __mask, (__v4si) __index, \
+ (__v4df) __v1, __scale); })
+
+#define _mm256_i32scatter_epi64(__addr, __index, __v1, \
+ __scale) __extension__ ({ \
+ __builtin_ia32_scattersiv4di (__addr, (__mmask8) 0xFF, \
+ (__v4si) __index, (__v4di) __v1, __scale); })
+
+#define _mm256_mask_i32scatter_epi64(__addr, __mask, __index, __v1, \
+ __scale) __extension__ ({ \
+ __builtin_ia32_scattersiv4di (__addr, __mask, (__v4si) __index, \
+ (__v4di) __v1, __scale); })
+
+#define _mm_i32scatter_ps(__addr, __index, __v1, __scale) __extension__ ({ \
+ __builtin_ia32_scattersiv4sf (__addr, (__mmask8) 0xFF, \
+ (__v4si) __index, (__v4sf) __v1, __scale); })
+
+#define _mm_mask_i32scatter_ps(__addr, __mask, __index, __v1, \
+ __scale) __extension__ ({ \
+ __builtin_ia32_scattersiv4sf (__addr, __mask, (__v4si) __index, \
+ (__v4sf) __v1, __scale); })
+
+#define _mm_i32scatter_epi32(__addr, __index, __v1, __scale) __extension__ ({ \
+ __builtin_ia32_scattersiv4si (__addr, (__mmask8) 0xFF, \
+ (__v4si) __index, (__v4si) __v1, __scale); })
+
+#define _mm_mask_i32scatter_epi32(__addr, __mask, __index, __v1, \
+ __scale) __extension__ ({ \
+ __builtin_ia32_scattersiv4si (__addr, __mask, (__v4si) __index,\
+ (__v4si) __v1, __scale); })
+
+#define _mm256_i32scatter_ps(__addr, __index, __v1, __scale) __extension__ ({ \
+ __builtin_ia32_scattersiv8sf (__addr, (__mmask8) 0xFF, \
+ (__v8si) __index, (__v8sf) __v1, __scale); })
+
+#define _mm256_mask_i32scatter_ps(__addr, __mask, __index, __v1, \
+ __scale) __extension__ ({ \
+ __builtin_ia32_scattersiv8sf (__addr, __mask, (__v8si) __index,\
+ (__v8sf) __v1, __scale); })
+
+#define _mm256_i32scatter_epi32(__addr, __index, __v1, __scale) __extension__ ({ \
+ __builtin_ia32_scattersiv8si (__addr, (__mmask8) 0xFF, \
+ (__v8si) __index, (__v8si) __v1, __scale); })
+
+#define _mm256_mask_i32scatter_epi32(__addr, __mask, __index, __v1, \
+ __scale) __extension__ ({ \
+ __builtin_ia32_scattersiv8si (__addr, __mask, (__v8si) __index, \
+ (__v8si) __v1, __scale); })
+
+static __inline__ __m128d __DEFAULT_FN_ATTRS
+_mm_mask_sqrt_pd (__m128d __W, __mmask8 __U, __m128d __A) {
+ return (__m128d) __builtin_ia32_sqrtpd128_mask ((__v2df) __A,
+ (__v2df) __W,
+ (__mmask8) __U);
+}
+
+static __inline__ __m128d __DEFAULT_FN_ATTRS
+_mm_maskz_sqrt_pd (__mmask8 __U, __m128d __A) {
+ return (__m128d) __builtin_ia32_sqrtpd128_mask ((__v2df) __A,
+ (__v2df)
+ _mm_setzero_pd (),
+ (__mmask8) __U);
+}
+
+static __inline__ __m256d __DEFAULT_FN_ATTRS
+_mm256_mask_sqrt_pd (__m256d __W, __mmask8 __U, __m256d __A) {
+ return (__m256d) __builtin_ia32_sqrtpd256_mask ((__v4df) __A,
+ (__v4df) __W,
+ (__mmask8) __U);
+}
+
+static __inline__ __m256d __DEFAULT_FN_ATTRS
+_mm256_maskz_sqrt_pd (__mmask8 __U, __m256d __A) {
+ return (__m256d) __builtin_ia32_sqrtpd256_mask ((__v4df) __A,
+ (__v4df)
+ _mm256_setzero_pd (),
+ (__mmask8) __U);
+}
+
+static __inline__ __m128 __DEFAULT_FN_ATTRS
+_mm_mask_sqrt_ps (__m128 __W, __mmask8 __U, __m128 __A) {
+ return (__m128) __builtin_ia32_sqrtps128_mask ((__v4sf) __A,
+ (__v4sf) __W,
+ (__mmask8) __U);
+}
+
+static __inline__ __m128 __DEFAULT_FN_ATTRS
+_mm_maskz_sqrt_ps (__mmask8 __U, __m128 __A) {
+ return (__m128) __builtin_ia32_sqrtps128_mask ((__v4sf) __A,
+ (__v4sf)
+ _mm_setzero_ps (),
+ (__mmask8) __U);
+}
+
+static __inline__ __m256 __DEFAULT_FN_ATTRS
+_mm256_mask_sqrt_ps (__m256 __W, __mmask8 __U, __m256 __A) {
+ return (__m256) __builtin_ia32_sqrtps256_mask ((__v8sf) __A,
+ (__v8sf) __W,
+ (__mmask8) __U);
+}
+
+static __inline__ __m256 __DEFAULT_FN_ATTRS
+_mm256_maskz_sqrt_ps (__mmask8 __U, __m256 __A) {
+ return (__m256) __builtin_ia32_sqrtps256_mask ((__v8sf) __A,
+ (__v8sf)
+ _mm256_setzero_ps (),
+ (__mmask8) __U);
+}
+
+static __inline__ __m128d __DEFAULT_FN_ATTRS
+_mm_mask_sub_pd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) {
+ return (__m128d) __builtin_ia32_subpd128_mask ((__v2df) __A,
+ (__v2df) __B,
+ (__v2df) __W,
+ (__mmask8) __U);
+}
+
+static __inline__ __m128d __DEFAULT_FN_ATTRS
+_mm_maskz_sub_pd (__mmask8 __U, __m128d __A, __m128d __B) {
+ return (__m128d) __builtin_ia32_subpd128_mask ((__v2df) __A,
+ (__v2df) __B,
+ (__v2df)
+ _mm_setzero_pd (),
+ (__mmask8) __U);
+}
+
+static __inline__ __m256d __DEFAULT_FN_ATTRS
+_mm256_mask_sub_pd (__m256d __W, __mmask8 __U, __m256d __A,
+ __m256d __B) {
+ return (__m256d) __builtin_ia32_subpd256_mask ((__v4df) __A,
+ (__v4df) __B,
+ (__v4df) __W,
+ (__mmask8) __U);
+}
+
+static __inline__ __m256d __DEFAULT_FN_ATTRS
+_mm256_maskz_sub_pd (__mmask8 __U, __m256d __A, __m256d __B) {
+ return (__m256d) __builtin_ia32_subpd256_mask ((__v4df) __A,
+ (__v4df) __B,
+ (__v4df)
+ _mm256_setzero_pd (),
+ (__mmask8) __U);
+}
+
+static __inline__ __m128 __DEFAULT_FN_ATTRS
+_mm_mask_sub_ps (__m128 __W, __mmask16 __U, __m128 __A, __m128 __B) {
+ return (__m128) __builtin_ia32_subps128_mask ((__v4sf) __A,
+ (__v4sf) __B,
+ (__v4sf) __W,
+ (__mmask8) __U);
+}
+
+static __inline__ __m128 __DEFAULT_FN_ATTRS
+_mm_maskz_sub_ps (__mmask16 __U, __m128 __A, __m128 __B) {
+ return (__m128) __builtin_ia32_subps128_mask ((__v4sf) __A,
+ (__v4sf) __B,
+ (__v4sf)
+ _mm_setzero_ps (),
+ (__mmask8) __U);
+}
+
+static __inline__ __m256 __DEFAULT_FN_ATTRS
+_mm256_mask_sub_ps (__m256 __W, __mmask16 __U, __m256 __A, __m256 __B) {
+ return (__m256) __builtin_ia32_subps256_mask ((__v8sf) __A,
+ (__v8sf) __B,
+ (__v8sf) __W,
+ (__mmask8) __U);
+}
+
+static __inline__ __m256 __DEFAULT_FN_ATTRS
+_mm256_maskz_sub_ps (__mmask16 __U, __m256 __A, __m256 __B) {
+ return (__m256) __builtin_ia32_subps256_mask ((__v8sf) __A,
+ (__v8sf) __B,
+ (__v8sf)
+ _mm256_setzero_ps (),
+ (__mmask8) __U);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm_mask2_permutex2var_epi32 (__m128i __A, __m128i __I, __mmask8 __U,
+ __m128i __B) {
+ return (__m128i) __builtin_ia32_vpermi2vard128_mask ((__v4si) __A,
+ (__v4si) __I
+ /* idx */ ,
+ (__v4si) __B,
+ (__mmask8) __U);
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS
+_mm256_mask2_permutex2var_epi32 (__m256i __A, __m256i __I,
+ __mmask8 __U, __m256i __B) {
+ return (__m256i) __builtin_ia32_vpermi2vard256_mask ((__v8si) __A,
+ (__v8si) __I
+ /* idx */ ,
+ (__v8si) __B,
+ (__mmask8) __U);
+}
+
+static __inline__ __m128d __DEFAULT_FN_ATTRS
+_mm_mask2_permutex2var_pd (__m128d __A, __m128i __I, __mmask8 __U,
+ __m128d __B) {
+ return (__m128d) __builtin_ia32_vpermi2varpd128_mask ((__v2df) __A,
+ (__v2di) __I
+ /* idx */ ,
+ (__v2df) __B,
+ (__mmask8)
+ __U);
+}
+
+static __inline__ __m256d __DEFAULT_FN_ATTRS
+_mm256_mask2_permutex2var_pd (__m256d __A, __m256i __I, __mmask8 __U,
+ __m256d __B) {
+ return (__m256d) __builtin_ia32_vpermi2varpd256_mask ((__v4df) __A,
+ (__v4di) __I
+ /* idx */ ,
+ (__v4df) __B,
+ (__mmask8)
+ __U);
+}
+
+static __inline__ __m128 __DEFAULT_FN_ATTRS
+_mm_mask2_permutex2var_ps (__m128 __A, __m128i __I, __mmask8 __U,
+ __m128 __B) {
+ return (__m128) __builtin_ia32_vpermi2varps128_mask ((__v4sf) __A,
+ (__v4si) __I
+ /* idx */ ,
+ (__v4sf) __B,
+ (__mmask8) __U);
+}
+
+static __inline__ __m256 __DEFAULT_FN_ATTRS
+_mm256_mask2_permutex2var_ps (__m256 __A, __m256i __I, __mmask8 __U,
+ __m256 __B) {
+ return (__m256) __builtin_ia32_vpermi2varps256_mask ((__v8sf) __A,
+ (__v8si) __I
+ /* idx */ ,
+ (__v8sf) __B,
+ (__mmask8) __U);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm_mask2_permutex2var_epi64 (__m128i __A, __m128i __I, __mmask8 __U,
+ __m128i __B) {
+ return (__m128i) __builtin_ia32_vpermi2varq128_mask ((__v2di) __A,
+ (__v2di) __I
+ /* idx */ ,
+ (__v2di) __B,
+ (__mmask8) __U);
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS
+_mm256_mask2_permutex2var_epi64 (__m256i __A, __m256i __I,
+ __mmask8 __U, __m256i __B) {
+ return (__m256i) __builtin_ia32_vpermi2varq256_mask ((__v4di) __A,
+ (__v4di) __I
+ /* idx */ ,
+ (__v4di) __B,
+ (__mmask8) __U);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm_permutex2var_epi32 (__m128i __A, __m128i __I, __m128i __B) {
+ return (__m128i) __builtin_ia32_vpermt2vard128_mask ((__v4si) __I
+ /* idx */ ,
+ (__v4si) __A,
+ (__v4si) __B,
+ (__mmask8) -1);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm_mask_permutex2var_epi32 (__m128i __A, __mmask8 __U, __m128i __I,
+ __m128i __B) {
+ return (__m128i) __builtin_ia32_vpermt2vard128_mask ((__v4si) __I
+ /* idx */ ,
+ (__v4si) __A,
+ (__v4si) __B,
+ (__mmask8) __U);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm_maskz_permutex2var_epi32 (__mmask8 __U, __m128i __A, __m128i __I,
+ __m128i __B) {
+ return (__m128i) __builtin_ia32_vpermt2vard128_maskz ((__v4si) __I
+ /* idx */ ,
+ (__v4si) __A,
+ (__v4si) __B,
+ (__mmask8)
+ __U);
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS
+_mm256_permutex2var_epi32 (__m256i __A, __m256i __I, __m256i __B) {
+ return (__m256i) __builtin_ia32_vpermt2vard256_mask ((__v8si) __I
+ /* idx */ ,
+ (__v8si) __A,
+ (__v8si) __B,
+ (__mmask8) -1);
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS
+_mm256_mask_permutex2var_epi32 (__m256i __A, __mmask8 __U, __m256i __I,
+ __m256i __B) {
+ return (__m256i) __builtin_ia32_vpermt2vard256_mask ((__v8si) __I
+ /* idx */ ,
+ (__v8si) __A,
+ (__v8si) __B,
+ (__mmask8) __U);
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS
+_mm256_maskz_permutex2var_epi32 (__mmask8 __U, __m256i __A,
+ __m256i __I, __m256i __B) {
+ return (__m256i) __builtin_ia32_vpermt2vard256_maskz ((__v8si) __I
+ /* idx */ ,
+ (__v8si) __A,
+ (__v8si) __B,
+ (__mmask8)
+ __U);
+}
+
+static __inline__ __m128d __DEFAULT_FN_ATTRS
+_mm_permutex2var_pd (__m128d __A, __m128i __I, __m128d __B) {
+ return (__m128d) __builtin_ia32_vpermt2varpd128_mask ((__v2di) __I
+ /* idx */ ,
+ (__v2df) __A,
+ (__v2df) __B,
+ (__mmask8) -
+ 1);
+}
+
+static __inline__ __m128d __DEFAULT_FN_ATTRS
+_mm_mask_permutex2var_pd (__m128d __A, __mmask8 __U, __m128i __I,
+ __m128d __B) {
+ return (__m128d) __builtin_ia32_vpermt2varpd128_mask ((__v2di) __I
+ /* idx */ ,
+ (__v2df) __A,
+ (__v2df) __B,
+ (__mmask8)
+ __U);
+}
+
+static __inline__ __m128d __DEFAULT_FN_ATTRS
+_mm_maskz_permutex2var_pd (__mmask8 __U, __m128d __A, __m128i __I,
+ __m128d __B) {
+ return (__m128d) __builtin_ia32_vpermt2varpd128_maskz ((__v2di) __I
+ /* idx */ ,
+ (__v2df) __A,
+ (__v2df) __B,
+ (__mmask8)
+ __U);
+}
+
+static __inline__ __m256d __DEFAULT_FN_ATTRS
+_mm256_permutex2var_pd (__m256d __A, __m256i __I, __m256d __B) {
+ return (__m256d) __builtin_ia32_vpermt2varpd256_mask ((__v4di) __I
+ /* idx */ ,
+ (__v4df) __A,
+ (__v4df) __B,
+ (__mmask8) -
+ 1);
+}
+
+static __inline__ __m256d __DEFAULT_FN_ATTRS
+_mm256_mask_permutex2var_pd (__m256d __A, __mmask8 __U, __m256i __I,
+ __m256d __B) {
+ return (__m256d) __builtin_ia32_vpermt2varpd256_mask ((__v4di) __I
+ /* idx */ ,
+ (__v4df) __A,
+ (__v4df) __B,
+ (__mmask8)
+ __U);
+}
+
+static __inline__ __m256d __DEFAULT_FN_ATTRS
+_mm256_maskz_permutex2var_pd (__mmask8 __U, __m256d __A, __m256i __I,
+ __m256d __B) {
+ return (__m256d) __builtin_ia32_vpermt2varpd256_maskz ((__v4di) __I
+ /* idx */ ,
+ (__v4df) __A,
+ (__v4df) __B,
+ (__mmask8)
+ __U);
+}
+
+static __inline__ __m128 __DEFAULT_FN_ATTRS
+_mm_permutex2var_ps (__m128 __A, __m128i __I, __m128 __B) {
+ return (__m128) __builtin_ia32_vpermt2varps128_mask ((__v4si) __I
+ /* idx */ ,
+ (__v4sf) __A,
+ (__v4sf) __B,
+ (__mmask8) -1);
+}
+
+static __inline__ __m128 __DEFAULT_FN_ATTRS
+_mm_mask_permutex2var_ps (__m128 __A, __mmask8 __U, __m128i __I,
+ __m128 __B) {
+ return (__m128) __builtin_ia32_vpermt2varps128_mask ((__v4si) __I
+ /* idx */ ,
+ (__v4sf) __A,
+ (__v4sf) __B,
+ (__mmask8) __U);
+}
+
+static __inline__ __m128 __DEFAULT_FN_ATTRS
+_mm_maskz_permutex2var_ps (__mmask8 __U, __m128 __A, __m128i __I,
+ __m128 __B) {
+ return (__m128) __builtin_ia32_vpermt2varps128_maskz ((__v4si) __I
+ /* idx */ ,
+ (__v4sf) __A,
+ (__v4sf) __B,
+ (__mmask8)
+ __U);
+}
+
+static __inline__ __m256 __DEFAULT_FN_ATTRS
+_mm256_permutex2var_ps (__m256 __A, __m256i __I, __m256 __B) {
+ return (__m256) __builtin_ia32_vpermt2varps256_mask ((__v8si) __I
+ /* idx */ ,
+ (__v8sf) __A,
+ (__v8sf) __B,
+ (__mmask8) -1);
+}
+
+static __inline__ __m256 __DEFAULT_FN_ATTRS
+_mm256_mask_permutex2var_ps (__m256 __A, __mmask8 __U, __m256i __I,
+ __m256 __B) {
+ return (__m256) __builtin_ia32_vpermt2varps256_mask ((__v8si) __I
+ /* idx */ ,
+ (__v8sf) __A,
+ (__v8sf) __B,
+ (__mmask8) __U);
+}
+
+static __inline__ __m256 __DEFAULT_FN_ATTRS
+_mm256_maskz_permutex2var_ps (__mmask8 __U, __m256 __A, __m256i __I,
+ __m256 __B) {
+ return (__m256) __builtin_ia32_vpermt2varps256_maskz ((__v8si) __I
+ /* idx */ ,
+ (__v8sf) __A,
+ (__v8sf) __B,
+ (__mmask8)
+ __U);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm_permutex2var_epi64 (__m128i __A, __m128i __I, __m128i __B) {
+ return (__m128i) __builtin_ia32_vpermt2varq128_mask ((__v2di) __I
+ /* idx */ ,
+ (__v2di) __A,
+ (__v2di) __B,
+ (__mmask8) -1);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm_mask_permutex2var_epi64 (__m128i __A, __mmask8 __U, __m128i __I,
+ __m128i __B) {
+ return (__m128i) __builtin_ia32_vpermt2varq128_mask ((__v2di) __I
+ /* idx */ ,
+ (__v2di) __A,
+ (__v2di) __B,
+ (__mmask8) __U);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm_maskz_permutex2var_epi64 (__mmask8 __U, __m128i __A, __m128i __I,
+ __m128i __B) {
+ return (__m128i) __builtin_ia32_vpermt2varq128_maskz ((__v2di) __I
+ /* idx */ ,
+ (__v2di) __A,
+ (__v2di) __B,
+ (__mmask8)
+ __U);
+}
+
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS
+_mm256_permutex2var_epi64 (__m256i __A, __m256i __I, __m256i __B) {
+ return (__m256i) __builtin_ia32_vpermt2varq256_mask ((__v4di) __I
+ /* idx */ ,
+ (__v4di) __A,
+ (__v4di) __B,
+ (__mmask8) -1);
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS
+_mm256_mask_permutex2var_epi64 (__m256i __A, __mmask8 __U, __m256i __I,
+ __m256i __B) {
+ return (__m256i) __builtin_ia32_vpermt2varq256_mask ((__v4di) __I
+ /* idx */ ,
+ (__v4di) __A,
+ (__v4di) __B,
+ (__mmask8) __U);
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS
+_mm256_maskz_permutex2var_epi64 (__mmask8 __U, __m256i __A,
+ __m256i __I, __m256i __B) {
+ return (__m256i) __builtin_ia32_vpermt2varq256_maskz ((__v4di) __I
+ /* idx */ ,
+ (__v4di) __A,
+ (__v4di) __B,
+ (__mmask8)
+ __U);
+}
+
#undef __DEFAULT_FN_ATTRS
+#undef __DEFAULT_FN_ATTRS_BOTH
#endif /* __AVX512VLINTRIN_H */
diff --git a/contrib/llvm/tools/clang/lib/Headers/avxintrin.h b/contrib/llvm/tools/clang/lib/Headers/avxintrin.h
index c1bc85b39e82..6d1ca5473dcf 100644
--- a/contrib/llvm/tools/clang/lib/Headers/avxintrin.h
+++ b/contrib/llvm/tools/clang/lib/Headers/avxintrin.h
@@ -35,12 +35,16 @@ typedef int __v8si __attribute__ ((__vector_size__ (32)));
typedef short __v16hi __attribute__ ((__vector_size__ (32)));
typedef char __v32qi __attribute__ ((__vector_size__ (32)));
+/* We need an explicitly signed variant for char. Note that this shouldn't
+ * appear in the interface though. */
+typedef signed char __v32qs __attribute__((__vector_size__(32)));
+
typedef float __m256 __attribute__ ((__vector_size__ (32)));
typedef double __m256d __attribute__((__vector_size__(32)));
typedef long long __m256i __attribute__((__vector_size__(32)));
/* Define the default attributes for the functions in this file. */
-#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__))
+#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("avx")))
/* Arithmetic */
static __inline __m256d __DEFAULT_FN_ATTRS
@@ -152,12 +156,10 @@ _mm256_rcp_ps(__m256 __a)
}
#define _mm256_round_pd(V, M) __extension__ ({ \
- __m256d __V = (V); \
- (__m256d)__builtin_ia32_roundpd256((__v4df)__V, (M)); })
+ (__m256d)__builtin_ia32_roundpd256((__v4df)(__m256d)(V), (M)); })
#define _mm256_round_ps(V, M) __extension__ ({ \
- __m256 __V = (V); \
- (__m256)__builtin_ia32_roundps256((__v8sf)__V, (M)); })
+ (__m256)__builtin_ia32_roundps256((__v8sf)(__m256)(V), (M)); })
#define _mm256_ceil_pd(V) _mm256_round_pd((V), _MM_FROUND_CEIL)
#define _mm256_floor_pd(V) _mm256_round_pd((V), _MM_FROUND_FLOOR)
@@ -264,26 +266,26 @@ _mm256_permutevar_ps(__m256 __a, __m256i __c)
}
#define _mm_permute_pd(A, C) __extension__ ({ \
- __m128d __A = (A); \
- (__m128d)__builtin_shufflevector((__v2df)__A, (__v2df) _mm_setzero_pd(), \
+ (__m128d)__builtin_shufflevector((__v2df)(__m128d)(A), \
+ (__v2df)_mm_setzero_pd(), \
(C) & 0x1, ((C) & 0x2) >> 1); })
#define _mm256_permute_pd(A, C) __extension__ ({ \
- __m256d __A = (A); \
- (__m256d)__builtin_shufflevector((__v4df)__A, (__v4df) _mm256_setzero_pd(), \
+ (__m256d)__builtin_shufflevector((__v4df)(__m256d)(A), \
+ (__v4df)_mm256_setzero_pd(), \
(C) & 0x1, ((C) & 0x2) >> 1, \
2 + (((C) & 0x4) >> 2), \
2 + (((C) & 0x8) >> 3)); })
#define _mm_permute_ps(A, C) __extension__ ({ \
- __m128 __A = (A); \
- (__m128)__builtin_shufflevector((__v4sf)__A, (__v4sf) _mm_setzero_ps(), \
+ (__m128)__builtin_shufflevector((__v4sf)(__m128)(A), \
+ (__v4sf)_mm_setzero_ps(), \
(C) & 0x3, ((C) & 0xc) >> 2, \
((C) & 0x30) >> 4, ((C) & 0xc0) >> 6); })
#define _mm256_permute_ps(A, C) __extension__ ({ \
- __m256 __A = (A); \
- (__m256)__builtin_shufflevector((__v8sf)__A, (__v8sf) _mm256_setzero_ps(), \
+ (__m256)__builtin_shufflevector((__v8sf)(__m256)(A), \
+ (__v8sf)_mm256_setzero_ps(), \
(C) & 0x3, ((C) & 0xc) >> 2, \
((C) & 0x30) >> 4, ((C) & 0xc0) >> 6, \
4 + (((C) & 0x03) >> 0), \
@@ -292,34 +294,29 @@ _mm256_permutevar_ps(__m256 __a, __m256i __c)
4 + (((C) & 0xc0) >> 6)); })
#define _mm256_permute2f128_pd(V1, V2, M) __extension__ ({ \
- __m256d __V1 = (V1); \
- __m256d __V2 = (V2); \
- (__m256d)__builtin_ia32_vperm2f128_pd256((__v4df)__V1, (__v4df)__V2, (M)); })
+ (__m256d)__builtin_ia32_vperm2f128_pd256((__v4df)(__m256d)(V1), \
+ (__v4df)(__m256d)(V2), (M)); })
#define _mm256_permute2f128_ps(V1, V2, M) __extension__ ({ \
- __m256 __V1 = (V1); \
- __m256 __V2 = (V2); \
- (__m256)__builtin_ia32_vperm2f128_ps256((__v8sf)__V1, (__v8sf)__V2, (M)); })
+ (__m256)__builtin_ia32_vperm2f128_ps256((__v8sf)(__m256)(V1), \
+ (__v8sf)(__m256)(V2), (M)); })
#define _mm256_permute2f128_si256(V1, V2, M) __extension__ ({ \
- __m256i __V1 = (V1); \
- __m256i __V2 = (V2); \
- (__m256i)__builtin_ia32_vperm2f128_si256((__v8si)__V1, (__v8si)__V2, (M)); })
+ (__m256i)__builtin_ia32_vperm2f128_si256((__v8si)(__m256i)(V1), \
+ (__v8si)(__m256i)(V2), (M)); })
/* Vector Blend */
#define _mm256_blend_pd(V1, V2, M) __extension__ ({ \
- __m256d __V1 = (V1); \
- __m256d __V2 = (V2); \
- (__m256d)__builtin_shufflevector((__v4df)__V1, (__v4df)__V2, \
+ (__m256d)__builtin_shufflevector((__v4df)(__m256d)(V1), \
+ (__v4df)(__m256d)(V2), \
(((M) & 0x01) ? 4 : 0), \
(((M) & 0x02) ? 5 : 1), \
(((M) & 0x04) ? 6 : 2), \
(((M) & 0x08) ? 7 : 3)); })
#define _mm256_blend_ps(V1, V2, M) __extension__ ({ \
- __m256 __V1 = (V1); \
- __m256 __V2 = (V2); \
- (__m256)__builtin_shufflevector((__v8sf)__V1, (__v8sf)__V2, \
+ (__m256)__builtin_shufflevector((__v8sf)(__m256)(V1), \
+ (__v8sf)(__m256)(V2), \
(((M) & 0x01) ? 8 : 0), \
(((M) & 0x02) ? 9 : 1), \
(((M) & 0x04) ? 10 : 2), \
@@ -345,28 +342,29 @@ _mm256_blendv_ps(__m256 __a, __m256 __b, __m256 __c)
/* Vector Dot Product */
#define _mm256_dp_ps(V1, V2, M) __extension__ ({ \
- __m256 __V1 = (V1); \
- __m256 __V2 = (V2); \
- (__m256)__builtin_ia32_dpps256((__v8sf)__V1, (__v8sf)__V2, (M)); })
+ (__m256)__builtin_ia32_dpps256((__v8sf)(__m256)(V1), \
+ (__v8sf)(__m256)(V2), (M)); })
/* Vector shuffle */
#define _mm256_shuffle_ps(a, b, mask) __extension__ ({ \
- __m256 __a = (a); \
- __m256 __b = (b); \
- (__m256)__builtin_shufflevector((__v8sf)__a, (__v8sf)__b, \
- (mask) & 0x3, ((mask) & 0xc) >> 2, \
- (((mask) & 0x30) >> 4) + 8, (((mask) & 0xc0) >> 6) + 8, \
- ((mask) & 0x3) + 4, (((mask) & 0xc) >> 2) + 4, \
- (((mask) & 0x30) >> 4) + 12, (((mask) & 0xc0) >> 6) + 12); })
+ (__m256)__builtin_shufflevector((__v8sf)(__m256)(a), \
+ (__v8sf)(__m256)(b), \
+ (mask) & 0x3, \
+ ((mask) & 0xc) >> 2, \
+ (((mask) & 0x30) >> 4) + 8, \
+ (((mask) & 0xc0) >> 6) + 8, \
+ ((mask) & 0x3) + 4, \
+ (((mask) & 0xc) >> 2) + 4, \
+ (((mask) & 0x30) >> 4) + 12, \
+ (((mask) & 0xc0) >> 6) + 12); })
#define _mm256_shuffle_pd(a, b, mask) __extension__ ({ \
- __m256d __a = (a); \
- __m256d __b = (b); \
- (__m256d)__builtin_shufflevector((__v4df)__a, (__v4df)__b, \
- (mask) & 0x1, \
- (((mask) & 0x2) >> 1) + 4, \
- (((mask) & 0x4) >> 2) + 2, \
- (((mask) & 0x8) >> 3) + 6); })
+ (__m256d)__builtin_shufflevector((__v4df)(__m256d)(a), \
+ (__v4df)(__m256d)(b), \
+ (mask) & 0x1, \
+ (((mask) & 0x2) >> 1) + 4, \
+ (((mask) & 0x4) >> 2) + 2, \
+ (((mask) & 0x8) >> 3) + 6); })
/* Compare */
#define _CMP_EQ_OQ 0x00 /* Equal (ordered, non-signaling) */
@@ -403,34 +401,28 @@ _mm256_blendv_ps(__m256 __a, __m256 __b, __m256 __c)
#define _CMP_TRUE_US 0x1f /* True (unordered, signaling) */
#define _mm_cmp_pd(a, b, c) __extension__ ({ \
- __m128d __a = (a); \
- __m128d __b = (b); \
- (__m128d)__builtin_ia32_cmppd((__v2df)__a, (__v2df)__b, (c)); })
+ (__m128d)__builtin_ia32_cmppd((__v2df)(__m128d)(a), \
+ (__v2df)(__m128d)(b), (c)); })
#define _mm_cmp_ps(a, b, c) __extension__ ({ \
- __m128 __a = (a); \
- __m128 __b = (b); \
- (__m128)__builtin_ia32_cmpps((__v4sf)__a, (__v4sf)__b, (c)); })
+ (__m128)__builtin_ia32_cmpps((__v4sf)(__m128)(a), \
+ (__v4sf)(__m128)(b), (c)); })
#define _mm256_cmp_pd(a, b, c) __extension__ ({ \
- __m256d __a = (a); \
- __m256d __b = (b); \
- (__m256d)__builtin_ia32_cmppd256((__v4df)__a, (__v4df)__b, (c)); })
+ (__m256d)__builtin_ia32_cmppd256((__v4df)(__m256d)(a), \
+ (__v4df)(__m256d)(b), (c)); })
#define _mm256_cmp_ps(a, b, c) __extension__ ({ \
- __m256 __a = (a); \
- __m256 __b = (b); \
- (__m256)__builtin_ia32_cmpps256((__v8sf)__a, (__v8sf)__b, (c)); })
+ (__m256)__builtin_ia32_cmpps256((__v8sf)(__m256)(a), \
+ (__v8sf)(__m256)(b), (c)); })
#define _mm_cmp_sd(a, b, c) __extension__ ({ \
- __m128d __a = (a); \
- __m128d __b = (b); \
- (__m128d)__builtin_ia32_cmpsd((__v2df)__a, (__v2df)__b, (c)); })
+ (__m128d)__builtin_ia32_cmpsd((__v2df)(__m128d)(a), \
+ (__v2df)(__m128d)(b), (c)); })
#define _mm_cmp_ss(a, b, c) __extension__ ({ \
- __m128 __a = (a); \
- __m128 __b = (b); \
- (__m128)__builtin_ia32_cmpss((__v4sf)__a, (__v4sf)__b, (c)); })
+ (__m128)__builtin_ia32_cmpss((__v4sf)(__m128)(a), \
+ (__v4sf)(__m128)(b), (c)); })
static __inline int __DEFAULT_FN_ATTRS
_mm256_extract_epi32(__m256i __a, const int __imm)
@@ -831,53 +823,53 @@ _mm256_storeu_si256(__m256i *__p, __m256i __a)
/* Conditional load ops */
static __inline __m128d __DEFAULT_FN_ATTRS
-_mm_maskload_pd(double const *__p, __m128d __m)
+_mm_maskload_pd(double const *__p, __m128i __m)
{
- return (__m128d)__builtin_ia32_maskloadpd((const __v2df *)__p, (__v2df)__m);
+ return (__m128d)__builtin_ia32_maskloadpd((const __v2df *)__p, (__v2di)__m);
}
static __inline __m256d __DEFAULT_FN_ATTRS
-_mm256_maskload_pd(double const *__p, __m256d __m)
+_mm256_maskload_pd(double const *__p, __m256i __m)
{
return (__m256d)__builtin_ia32_maskloadpd256((const __v4df *)__p,
- (__v4df)__m);
+ (__v4di)__m);
}
static __inline __m128 __DEFAULT_FN_ATTRS
-_mm_maskload_ps(float const *__p, __m128 __m)
+_mm_maskload_ps(float const *__p, __m128i __m)
{
- return (__m128)__builtin_ia32_maskloadps((const __v4sf *)__p, (__v4sf)__m);
+ return (__m128)__builtin_ia32_maskloadps((const __v4sf *)__p, (__v4si)__m);
}
static __inline __m256 __DEFAULT_FN_ATTRS
-_mm256_maskload_ps(float const *__p, __m256 __m)
+_mm256_maskload_ps(float const *__p, __m256i __m)
{
- return (__m256)__builtin_ia32_maskloadps256((const __v8sf *)__p, (__v8sf)__m);
+ return (__m256)__builtin_ia32_maskloadps256((const __v8sf *)__p, (__v8si)__m);
}
/* Conditional store ops */
static __inline void __DEFAULT_FN_ATTRS
-_mm256_maskstore_ps(float *__p, __m256 __m, __m256 __a)
+_mm256_maskstore_ps(float *__p, __m256i __m, __m256 __a)
{
- __builtin_ia32_maskstoreps256((__v8sf *)__p, (__v8sf)__m, (__v8sf)__a);
+ __builtin_ia32_maskstoreps256((__v8sf *)__p, (__v8si)__m, (__v8sf)__a);
}
static __inline void __DEFAULT_FN_ATTRS
-_mm_maskstore_pd(double *__p, __m128d __m, __m128d __a)
+_mm_maskstore_pd(double *__p, __m128i __m, __m128d __a)
{
- __builtin_ia32_maskstorepd((__v2df *)__p, (__v2df)__m, (__v2df)__a);
+ __builtin_ia32_maskstorepd((__v2df *)__p, (__v2di)__m, (__v2df)__a);
}
static __inline void __DEFAULT_FN_ATTRS
-_mm256_maskstore_pd(double *__p, __m256d __m, __m256d __a)
+_mm256_maskstore_pd(double *__p, __m256i __m, __m256d __a)
{
- __builtin_ia32_maskstorepd256((__v4df *)__p, (__v4df)__m, (__v4df)__a);
+ __builtin_ia32_maskstorepd256((__v4df *)__p, (__v4di)__m, (__v4df)__a);
}
static __inline void __DEFAULT_FN_ATTRS
-_mm_maskstore_ps(float *__p, __m128 __m, __m128 __a)
+_mm_maskstore_ps(float *__p, __m128i __m, __m128 __a)
{
- __builtin_ia32_maskstoreps((__v4sf *)__p, (__v4sf)__m, (__v4sf)__a);
+ __builtin_ia32_maskstoreps((__v4sf *)__p, (__v4si)__m, (__v4sf)__a);
}
/* Cacheability support ops */
@@ -900,6 +892,24 @@ _mm256_stream_ps(float *__p, __m256 __a)
}
/* Create vectors */
+static __inline__ __m256d __DEFAULT_FN_ATTRS
+_mm256_undefined_pd()
+{
+ return (__m256d)__builtin_ia32_undef256();
+}
+
+static __inline__ __m256 __DEFAULT_FN_ATTRS
+_mm256_undefined_ps()
+{
+ return (__m256)__builtin_ia32_undef256();
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS
+_mm256_undefined_si256()
+{
+ return (__m256i)__builtin_ia32_undef256();
+}
+
static __inline __m256d __DEFAULT_FN_ATTRS
_mm256_set_pd(double __a, double __b, double __c, double __d)
{
@@ -1140,14 +1150,14 @@ _mm256_castsi128_si256(__m128i __a)
return __builtin_shufflevector(__a, __a, 0, 1, -1, -1);
}
-/*
+/*
Vector insert.
We use macros rather than inlines because we only want to accept
invocations where the immediate M is a constant expression.
*/
#define _mm256_insertf128_ps(V1, V2, M) __extension__ ({ \
(__m256)__builtin_shufflevector( \
- (__v8sf)(V1), \
+ (__v8sf)(__m256)(V1), \
(__v8sf)_mm256_castps128_ps256((__m128)(V2)), \
(((M) & 1) ? 0 : 8), \
(((M) & 1) ? 1 : 9), \
@@ -1160,7 +1170,7 @@ _mm256_castsi128_si256(__m128i __a)
#define _mm256_insertf128_pd(V1, V2, M) __extension__ ({ \
(__m256d)__builtin_shufflevector( \
- (__v4df)(V1), \
+ (__v4df)(__m256d)(V1), \
(__v4df)_mm256_castpd128_pd256((__m128d)(V2)), \
(((M) & 1) ? 0 : 4), \
(((M) & 1) ? 1 : 5), \
@@ -1169,21 +1179,21 @@ _mm256_castsi128_si256(__m128i __a)
#define _mm256_insertf128_si256(V1, V2, M) __extension__ ({ \
(__m256i)__builtin_shufflevector( \
- (__v4di)(V1), \
+ (__v4di)(__m256i)(V1), \
(__v4di)_mm256_castsi128_si256((__m128i)(V2)), \
(((M) & 1) ? 0 : 4), \
(((M) & 1) ? 1 : 5), \
(((M) & 1) ? 4 : 2), \
(((M) & 1) ? 5 : 3) );})
-/*
+/*
Vector extract.
We use macros rather than inlines because we only want to accept
invocations where the immediate M is a constant expression.
*/
#define _mm256_extractf128_ps(V, M) __extension__ ({ \
(__m128)__builtin_shufflevector( \
- (__v8sf)(V), \
+ (__v8sf)(__m256)(V), \
(__v8sf)(_mm256_setzero_ps()), \
(((M) & 1) ? 4 : 0), \
(((M) & 1) ? 5 : 1), \
@@ -1192,14 +1202,14 @@ _mm256_castsi128_si256(__m128i __a)
#define _mm256_extractf128_pd(V, M) __extension__ ({ \
(__m128d)__builtin_shufflevector( \
- (__v4df)(V), \
+ (__v4df)(__m256d)(V), \
(__v4df)(_mm256_setzero_pd()), \
(((M) & 1) ? 2 : 0), \
(((M) & 1) ? 3 : 1) );})
#define _mm256_extractf128_si256(V, M) __extension__ ({ \
(__m128i)__builtin_shufflevector( \
- (__v4di)(V), \
+ (__v4di)(__m256i)(V), \
(__v4di)(_mm256_setzero_si256()), \
(((M) & 1) ? 2 : 0), \
(((M) & 1) ? 3 : 1) );})
@@ -1222,7 +1232,7 @@ _mm256_loadu2_m128d(double const *__addr_hi, double const *__addr_lo)
struct __loadu_pd {
__m128d __v;
} __attribute__((__packed__, __may_alias__));
-
+
__m256d __v256 = _mm256_castpd128_pd256(((struct __loadu_pd*)__addr_lo)->__v);
return _mm256_insertf128_pd(__v256, ((struct __loadu_pd*)__addr_hi)->__v, 1);
}
diff --git a/contrib/llvm/tools/clang/lib/Headers/bmi2intrin.h b/contrib/llvm/tools/clang/lib/Headers/bmi2intrin.h
index c63397c96ebe..fdae82cf2ba7 100644
--- a/contrib/llvm/tools/clang/lib/Headers/bmi2intrin.h
+++ b/contrib/llvm/tools/clang/lib/Headers/bmi2intrin.h
@@ -25,15 +25,11 @@
#error "Never use <bmi2intrin.h> directly; include <x86intrin.h> instead."
#endif
-#ifndef __BMI2__
-# error "BMI2 instruction set not enabled"
-#endif /* __BMI2__ */
-
#ifndef __BMI2INTRIN_H
#define __BMI2INTRIN_H
/* Define the default attributes for the functions in this file. */
-#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__))
+#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("bmi2")))
static __inline__ unsigned int __DEFAULT_FN_ATTRS
_bzhi_u32(unsigned int __X, unsigned int __Y)
diff --git a/contrib/llvm/tools/clang/lib/Headers/bmiintrin.h b/contrib/llvm/tools/clang/lib/Headers/bmiintrin.h
index 0e93d575cb8b..da98792d8307 100644
--- a/contrib/llvm/tools/clang/lib/Headers/bmiintrin.h
+++ b/contrib/llvm/tools/clang/lib/Headers/bmiintrin.h
@@ -25,10 +25,6 @@
#error "Never use <bmiintrin.h> directly; include <x86intrin.h> instead."
#endif
-#ifndef __BMI__
-# error "BMI instruction set not enabled"
-#endif /* __BMI__ */
-
#ifndef __BMIINTRIN_H
#define __BMIINTRIN_H
@@ -41,9 +37,14 @@
#define _tzcnt_u32(a) (__tzcnt_u32((a)))
/* Define the default attributes for the functions in this file. */
-#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__))
+#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("bmi")))
+
+/* Allow using the tzcnt intrinsics even for non-BMI targets. Since the TZCNT
+ instruction behaves as BSF on non-BMI targets, there is code that expects
+ to use it as a potentially faster version of BSF. */
+#define __RELAXED_FN_ATTRS __attribute__((__always_inline__, __nodebug__))
-static __inline__ unsigned short __DEFAULT_FN_ATTRS
+static __inline__ unsigned short __RELAXED_FN_ATTRS
__tzcnt_u16(unsigned short __X)
{
return __X ? __builtin_ctzs(__X) : 16;
@@ -87,7 +88,7 @@ __blsr_u32(unsigned int __X)
return __X & (__X - 1);
}
-static __inline__ unsigned int __DEFAULT_FN_ATTRS
+static __inline__ unsigned int __RELAXED_FN_ATTRS
__tzcnt_u32(unsigned int __X)
{
return __X ? __builtin_ctz(__X) : 32;
@@ -140,7 +141,7 @@ __blsr_u64(unsigned long long __X)
return __X & (__X - 1);
}
-static __inline__ unsigned long long __DEFAULT_FN_ATTRS
+static __inline__ unsigned long long __RELAXED_FN_ATTRS
__tzcnt_u64(unsigned long long __X)
{
return __X ? __builtin_ctzll(__X) : 64;
@@ -149,5 +150,6 @@ __tzcnt_u64(unsigned long long __X)
#endif /* __x86_64__ */
#undef __DEFAULT_FN_ATTRS
+#undef __RELAXED_FN_ATTRS
#endif /* __BMIINTRIN_H */
diff --git a/contrib/llvm/tools/clang/lib/Headers/emmintrin.h b/contrib/llvm/tools/clang/lib/Headers/emmintrin.h
index 656bc19d3dea..cfc2c7161460 100644
--- a/contrib/llvm/tools/clang/lib/Headers/emmintrin.h
+++ b/contrib/llvm/tools/clang/lib/Headers/emmintrin.h
@@ -24,10 +24,6 @@
#ifndef __EMMINTRIN_H
#define __EMMINTRIN_H
-#ifndef __SSE2__
-#error "SSE2 instruction set not enabled"
-#else
-
#include <xmmintrin.h>
typedef double __m128d __attribute__((__vector_size__(16)));
@@ -39,8 +35,14 @@ typedef long long __v2di __attribute__ ((__vector_size__ (16)));
typedef short __v8hi __attribute__((__vector_size__(16)));
typedef char __v16qi __attribute__((__vector_size__(16)));
+/* We need an explicitly signed variant for char. Note that this shouldn't
+ * appear in the interface though. */
+typedef signed char __v16qs __attribute__((__vector_size__(16)));
+
+#include <f16cintrin.h>
+
/* Define the default attributes for the functions in this file. */
-#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__))
+#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("sse2")))
static __inline__ __m128d __DEFAULT_FN_ATTRS
_mm_add_sd(__m128d __a, __m128d __b)
@@ -527,6 +529,12 @@ _mm_loadl_pd(__m128d __a, double const *__dp)
}
static __inline__ __m128d __DEFAULT_FN_ATTRS
+_mm_undefined_pd()
+{
+ return (__m128d)__builtin_ia32_undef128();
+}
+
+static __inline__ __m128d __DEFAULT_FN_ATTRS
_mm_set_sd(double __w)
{
return (__m128d){ __w, 0 };
@@ -639,7 +647,7 @@ _mm_add_epi32(__m128i __a, __m128i __b)
static __inline__ __m64 __DEFAULT_FN_ATTRS
_mm_add_si64(__m64 __a, __m64 __b)
{
- return __a + __b;
+ return (__m64)__builtin_ia32_paddq(__a, __b);
}
static __inline__ __m128i __DEFAULT_FN_ATTRS
@@ -771,7 +779,7 @@ _mm_sub_epi32(__m128i __a, __m128i __b)
static __inline__ __m64 __DEFAULT_FN_ATTRS
_mm_sub_si64(__m64 __a, __m64 __b)
{
- return __a - __b;
+ return (__m64)__builtin_ia32_psubq(__a, __b);
}
static __inline__ __m128i __DEFAULT_FN_ATTRS
@@ -992,8 +1000,7 @@ static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_cmpgt_epi8(__m128i __a, __m128i __b)
{
/* This function always performs a signed comparison, but __v16qi is a char
- which may be signed or unsigned. */
- typedef signed char __v16qs __attribute__((__vector_size__(16)));
+ which may be signed or unsigned, so use __v16qs. */
return (__m128i)((__v16qs)__a > (__v16qs)__b);
}
@@ -1120,33 +1127,39 @@ _mm_loadl_epi64(__m128i const *__p)
}
static __inline__ __m128i __DEFAULT_FN_ATTRS
-_mm_set_epi64x(long long q1, long long q0)
+_mm_undefined_si128()
{
- return (__m128i){ q0, q1 };
+ return (__m128i)__builtin_ia32_undef128();
}
static __inline__ __m128i __DEFAULT_FN_ATTRS
-_mm_set_epi64(__m64 q1, __m64 q0)
+_mm_set_epi64x(long long __q1, long long __q0)
{
- return (__m128i){ (long long)q0, (long long)q1 };
+ return (__m128i){ __q0, __q1 };
}
static __inline__ __m128i __DEFAULT_FN_ATTRS
-_mm_set_epi32(int i3, int i2, int i1, int i0)
+_mm_set_epi64(__m64 __q1, __m64 __q0)
{
- return (__m128i)(__v4si){ i0, i1, i2, i3};
+ return (__m128i){ (long long)__q0, (long long)__q1 };
}
static __inline__ __m128i __DEFAULT_FN_ATTRS
-_mm_set_epi16(short w7, short w6, short w5, short w4, short w3, short w2, short w1, short w0)
+_mm_set_epi32(int __i3, int __i2, int __i1, int __i0)
{
- return (__m128i)(__v8hi){ w0, w1, w2, w3, w4, w5, w6, w7 };
+ return (__m128i)(__v4si){ __i0, __i1, __i2, __i3};
}
static __inline__ __m128i __DEFAULT_FN_ATTRS
-_mm_set_epi8(char b15, char b14, char b13, char b12, char b11, char b10, char b9, char b8, char b7, char b6, char b5, char b4, char b3, char b2, char b1, char b0)
+_mm_set_epi16(short __w7, short __w6, short __w5, short __w4, short __w3, short __w2, short __w1, short __w0)
{
- return (__m128i)(__v16qi){ b0, b1, b2, b3, b4, b5, b6, b7, b8, b9, b10, b11, b12, b13, b14, b15 };
+ return (__m128i)(__v8hi){ __w0, __w1, __w2, __w3, __w4, __w5, __w6, __w7 };
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm_set_epi8(char __b15, char __b14, char __b13, char __b12, char __b11, char __b10, char __b9, char __b8, char __b7, char __b6, char __b5, char __b4, char __b3, char __b2, char __b1, char __b0)
+{
+ return (__m128i)(__v16qi){ __b0, __b1, __b2, __b3, __b4, __b5, __b6, __b7, __b8, __b9, __b10, __b11, __b12, __b13, __b14, __b15 };
}
static __inline__ __m128i __DEFAULT_FN_ATTRS
@@ -1180,27 +1193,27 @@ _mm_set1_epi8(char __b)
}
static __inline__ __m128i __DEFAULT_FN_ATTRS
-_mm_setr_epi64(__m64 q0, __m64 q1)
+_mm_setr_epi64(__m64 __q0, __m64 __q1)
{
- return (__m128i){ (long long)q0, (long long)q1 };
+ return (__m128i){ (long long)__q0, (long long)__q1 };
}
static __inline__ __m128i __DEFAULT_FN_ATTRS
-_mm_setr_epi32(int i0, int i1, int i2, int i3)
+_mm_setr_epi32(int __i0, int __i1, int __i2, int __i3)
{
- return (__m128i)(__v4si){ i0, i1, i2, i3};
+ return (__m128i)(__v4si){ __i0, __i1, __i2, __i3};
}
static __inline__ __m128i __DEFAULT_FN_ATTRS
-_mm_setr_epi16(short w0, short w1, short w2, short w3, short w4, short w5, short w6, short w7)
+_mm_setr_epi16(short __w0, short __w1, short __w2, short __w3, short __w4, short __w5, short __w6, short __w7)
{
- return (__m128i)(__v8hi){ w0, w1, w2, w3, w4, w5, w6, w7 };
+ return (__m128i)(__v8hi){ __w0, __w1, __w2, __w3, __w4, __w5, __w6, __w7 };
}
static __inline__ __m128i __DEFAULT_FN_ATTRS
-_mm_setr_epi8(char b0, char b1, char b2, char b3, char b4, char b5, char b6, char b7, char b8, char b9, char b10, char b11, char b12, char b13, char b14, char b15)
+_mm_setr_epi8(char __b0, char __b1, char __b2, char __b3, char __b4, char __b5, char __b6, char __b7, char __b8, char __b9, char __b10, char __b11, char __b12, char __b13, char __b14, char __b15)
{
- return (__m128i)(__v16qi){ b0, b1, b2, b3, b4, b5, b6, b7, b8, b9, b10, b11, b12, b13, b14, b15 };
+ return (__m128i)(__v16qi){ __b0, __b1, __b2, __b3, __b4, __b5, __b6, __b7, __b8, __b9, __b10, __b11, __b12, __b13, __b14, __b15 };
}
static __inline__ __m128i __DEFAULT_FN_ATTRS
@@ -1321,20 +1334,20 @@ _mm_movemask_epi8(__m128i __a)
#define _mm_shuffle_epi32(a, imm) __extension__ ({ \
(__m128i)__builtin_shufflevector((__v4si)(__m128i)(a), \
- (__v4si)_mm_set1_epi32(0), \
+ (__v4si)_mm_setzero_si128(), \
(imm) & 0x3, ((imm) & 0xc) >> 2, \
((imm) & 0x30) >> 4, ((imm) & 0xc0) >> 6); })
#define _mm_shufflelo_epi16(a, imm) __extension__ ({ \
(__m128i)__builtin_shufflevector((__v8hi)(__m128i)(a), \
- (__v8hi)_mm_set1_epi16(0), \
+ (__v8hi)_mm_setzero_si128(), \
(imm) & 0x3, ((imm) & 0xc) >> 2, \
((imm) & 0x30) >> 4, ((imm) & 0xc0) >> 6, \
4, 5, 6, 7); })
#define _mm_shufflehi_epi16(a, imm) __extension__ ({ \
(__m128i)__builtin_shufflevector((__v8hi)(__m128i)(a), \
- (__v8hi)_mm_set1_epi16(0), \
+ (__v8hi)_mm_setzero_si128(), \
0, 1, 2, 3, \
4 + (((imm) & 0x03) >> 0), \
4 + (((imm) & 0x0c) >> 2), \
@@ -1426,8 +1439,8 @@ _mm_movemask_pd(__m128d __a)
}
#define _mm_shuffle_pd(a, b, i) __extension__ ({ \
- __builtin_shufflevector((__m128d)(a), (__m128d)(b), \
- (i) & 1, (((i) & 2) >> 1) + 2); })
+ (__m128d)__builtin_shufflevector((__v2df)(__m128d)(a), (__v2df)(__m128d)(b), \
+ (i) & 1, (((i) & 2) >> 1) + 2); })
static __inline__ __m128 __DEFAULT_FN_ATTRS
_mm_castpd_ps(__m128d __a)
@@ -1468,13 +1481,11 @@ _mm_castsi128_pd(__m128i __a)
static __inline__ void __DEFAULT_FN_ATTRS
_mm_pause(void)
{
- __asm__ volatile ("pause");
+ __builtin_ia32_pause();
}
#undef __DEFAULT_FN_ATTRS
#define _MM_SHUFFLE2(x, y) (((x) << 1) | (y))
-#endif /* __SSE2__ */
-
#endif /* __EMMINTRIN_H */
diff --git a/contrib/llvm/tools/clang/lib/Headers/f16cintrin.h b/contrib/llvm/tools/clang/lib/Headers/f16cintrin.h
index 3730ae0d3eeb..c655d98ee9ab 100644
--- a/contrib/llvm/tools/clang/lib/Headers/f16cintrin.h
+++ b/contrib/llvm/tools/clang/lib/Headers/f16cintrin.h
@@ -21,30 +21,18 @@
*===-----------------------------------------------------------------------===
*/
-#if !defined __X86INTRIN_H && !defined __IMMINTRIN_H
-#error "Never use <f16cintrin.h> directly; include <x86intrin.h> instead."
+#if !defined __X86INTRIN_H && !defined __EMMINTRIN_H && !defined __IMMINTRIN_H
+#error "Never use <f16cintrin.h> directly; include <emmintrin.h> instead."
#endif
-#ifndef __F16C__
-# error "F16C instruction is not enabled"
-#endif /* __F16C__ */
-
#ifndef __F16CINTRIN_H
#define __F16CINTRIN_H
-typedef float __v8sf __attribute__ ((__vector_size__ (32)));
-typedef float __m256 __attribute__ ((__vector_size__ (32)));
-
/* Define the default attributes for the functions in this file. */
-#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__))
+#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("f16c")))
#define _mm_cvtps_ph(a, imm) __extension__ ({ \
- __m128 __a = (a); \
- (__m128i)__builtin_ia32_vcvtps2ph((__v4sf)__a, (imm)); })
-
-#define _mm256_cvtps_ph(a, imm) __extension__ ({ \
- __m256 __a = (a); \
- (__m128i)__builtin_ia32_vcvtps2ph256((__v8sf)__a, (imm)); })
+ (__m128i)__builtin_ia32_vcvtps2ph((__v4sf)(__m128)(a), (imm)); })
static __inline __m128 __DEFAULT_FN_ATTRS
_mm_cvtph_ps(__m128i __a)
@@ -52,12 +40,6 @@ _mm_cvtph_ps(__m128i __a)
return (__m128)__builtin_ia32_vcvtph2ps((__v8hi)__a);
}
-static __inline __m256 __DEFAULT_FN_ATTRS
-_mm256_cvtph_ps(__m128i __a)
-{
- return (__m256)__builtin_ia32_vcvtph2ps256((__v8hi)__a);
-}
-
#undef __DEFAULT_FN_ATTRS
#endif /* __F16CINTRIN_H */
diff --git a/contrib/llvm/tools/clang/lib/Headers/fma4intrin.h b/contrib/llvm/tools/clang/lib/Headers/fma4intrin.h
index d6405cf02922..f1178877b252 100644
--- a/contrib/llvm/tools/clang/lib/Headers/fma4intrin.h
+++ b/contrib/llvm/tools/clang/lib/Headers/fma4intrin.h
@@ -28,14 +28,10 @@
#ifndef __FMA4INTRIN_H
#define __FMA4INTRIN_H
-#ifndef __FMA4__
-# error "FMA4 instruction set is not enabled"
-#else
-
#include <pmmintrin.h>
/* Define the default attributes for the functions in this file. */
-#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__))
+#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("fma4")))
static __inline__ __m128 __DEFAULT_FN_ATTRS
_mm_macc_ps(__m128 __A, __m128 __B, __m128 __C)
@@ -231,6 +227,4 @@ _mm256_msubadd_pd(__m256d __A, __m256d __B, __m256d __C)
#undef __DEFAULT_FN_ATTRS
-#endif /* __FMA4__ */
-
#endif /* __FMA4INTRIN_H */
diff --git a/contrib/llvm/tools/clang/lib/Headers/fmaintrin.h b/contrib/llvm/tools/clang/lib/Headers/fmaintrin.h
index ad693fed0bfd..114a14380ea0 100644
--- a/contrib/llvm/tools/clang/lib/Headers/fmaintrin.h
+++ b/contrib/llvm/tools/clang/lib/Headers/fmaintrin.h
@@ -28,12 +28,8 @@
#ifndef __FMAINTRIN_H
#define __FMAINTRIN_H
-#ifndef __FMA__
-# error "FMA instruction set is not enabled"
-#else
-
/* Define the default attributes for the functions in this file. */
-#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__))
+#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("fma")))
static __inline__ __m128 __DEFAULT_FN_ATTRS
_mm_fmadd_ps(__m128 __A, __m128 __B, __m128 __C)
@@ -229,6 +225,4 @@ _mm256_fmsubadd_pd(__m256d __A, __m256d __B, __m256d __C)
#undef __DEFAULT_FN_ATTRS
-#endif /* __FMA__ */
-
#endif /* __FMAINTRIN_H */
diff --git a/contrib/llvm/tools/clang/lib/Headers/fxsrintrin.h b/contrib/llvm/tools/clang/lib/Headers/fxsrintrin.h
index 2b3549c057a1..ac6026aa5ba2 100644
--- a/contrib/llvm/tools/clang/lib/Headers/fxsrintrin.h
+++ b/contrib/llvm/tools/clang/lib/Headers/fxsrintrin.h
@@ -28,7 +28,7 @@
#ifndef __FXSRINTRIN_H
#define __FXSRINTRIN_H
-#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__))
+#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("fxsr")))
static __inline__ void __DEFAULT_FN_ATTRS
_fxsave(void *__p) {
diff --git a/contrib/llvm/tools/clang/lib/Headers/htmxlintrin.h b/contrib/llvm/tools/clang/lib/Headers/htmxlintrin.h
index 30f524d5df49..c7571ecd0661 100644
--- a/contrib/llvm/tools/clang/lib/Headers/htmxlintrin.h
+++ b/contrib/llvm/tools/clang/lib/Headers/htmxlintrin.h
@@ -46,7 +46,7 @@ extern "C" {
typedef char TM_buff_type[16];
-/* This macro can be used to determine whether a transaction was successfully
+/* This macro can be used to determine whether a transaction was successfully
started from the __TM_begin() and __TM_simple_begin() intrinsic functions
below. */
#define _HTM_TBEGIN_STARTED 1
diff --git a/contrib/llvm/tools/clang/lib/Headers/immintrin.h b/contrib/llvm/tools/clang/lib/Headers/immintrin.h
index 21ad3281f850..f3c6d1914d61 100644
--- a/contrib/llvm/tools/clang/lib/Headers/immintrin.h
+++ b/contrib/llvm/tools/clang/lib/Headers/immintrin.h
@@ -24,178 +24,147 @@
#ifndef __IMMINTRIN_H
#define __IMMINTRIN_H
-#ifdef __MMX__
#include <mmintrin.h>
-#endif
-#ifdef __SSE__
#include <xmmintrin.h>
-#endif
-#ifdef __SSE2__
#include <emmintrin.h>
-#endif
-#ifdef __SSE3__
#include <pmmintrin.h>
-#endif
-#ifdef __SSSE3__
#include <tmmintrin.h>
-#endif
-#if defined (__SSE4_2__) || defined (__SSE4_1__)
#include <smmintrin.h>
-#endif
-#if defined (__AES__) || defined (__PCLMUL__)
#include <wmmintrin.h>
-#endif
-#ifdef __AVX__
#include <avxintrin.h>
-#endif
-#ifdef __AVX2__
#include <avx2intrin.h>
-#endif
-#ifdef __BMI__
+/* The 256-bit versions of functions in f16cintrin.h.
+ Intel documents these as being in immintrin.h, and
+ they depend on typedefs from avxintrin.h. */
+
+#define _mm256_cvtps_ph(a, imm) __extension__ ({ \
+ (__m128i)__builtin_ia32_vcvtps2ph256((__v8sf)(__m256)(a), (imm)); })
+
+static __inline __m256 __attribute__((__always_inline__, __nodebug__, __target__("f16c")))
+_mm256_cvtph_ps(__m128i __a)
+{
+ return (__m256)__builtin_ia32_vcvtph2ps256((__v8hi)__a);
+}
+
#include <bmiintrin.h>
-#endif
-#ifdef __BMI2__
#include <bmi2intrin.h>
-#endif
-#ifdef __LZCNT__
#include <lzcntintrin.h>
-#endif
-#ifdef __FMA__
#include <fmaintrin.h>
-#endif
-#ifdef __AVX512F__
#include <avx512fintrin.h>
-#endif
-#ifdef __AVX512VL__
#include <avx512vlintrin.h>
-#endif
-#ifdef __AVX512BW__
#include <avx512bwintrin.h>
-#endif
-#ifdef __AVX512CD__
#include <avx512cdintrin.h>
-#endif
-#ifdef __AVX512DQ__
#include <avx512dqintrin.h>
-#endif
-#if defined (__AVX512VL__) && defined (__AVX512BW__)
#include <avx512vlbwintrin.h>
-#endif
-#if defined (__AVX512VL__) && defined (__AVX512DQ__)
#include <avx512vldqintrin.h>
-#endif
-#ifdef __AVX512ER__
#include <avx512erintrin.h>
-#endif
-#ifdef __RDRND__
-static __inline__ int __attribute__((__always_inline__, __nodebug__))
+static __inline__ int __attribute__((__always_inline__, __nodebug__, __target__("rdrnd")))
_rdrand16_step(unsigned short *__p)
{
return __builtin_ia32_rdrand16_step(__p);
}
-static __inline__ int __attribute__((__always_inline__, __nodebug__))
+static __inline__ int __attribute__((__always_inline__, __nodebug__, __target__("rdrnd")))
_rdrand32_step(unsigned int *__p)
{
return __builtin_ia32_rdrand32_step(__p);
}
#ifdef __x86_64__
-static __inline__ int __attribute__((__always_inline__, __nodebug__))
+static __inline__ int __attribute__((__always_inline__, __nodebug__, __target__("rdrnd")))
_rdrand64_step(unsigned long long *__p)
{
return __builtin_ia32_rdrand64_step(__p);
}
#endif
-#endif /* __RDRND__ */
-#ifdef __FSGSBASE__
#ifdef __x86_64__
-static __inline__ unsigned int __attribute__((__always_inline__, __nodebug__))
+static __inline__ unsigned int __attribute__((__always_inline__, __nodebug__, __target__("fsgsbase")))
_readfsbase_u32(void)
{
return __builtin_ia32_rdfsbase32();
}
-static __inline__ unsigned long long __attribute__((__always_inline__, __nodebug__))
+static __inline__ unsigned long long __attribute__((__always_inline__, __nodebug__, __target__("fsgsbase")))
_readfsbase_u64(void)
{
return __builtin_ia32_rdfsbase64();
}
-static __inline__ unsigned int __attribute__((__always_inline__, __nodebug__))
+static __inline__ unsigned int __attribute__((__always_inline__, __nodebug__, __target__("fsgsbase")))
_readgsbase_u32(void)
{
return __builtin_ia32_rdgsbase32();
}
-static __inline__ unsigned long long __attribute__((__always_inline__, __nodebug__))
+static __inline__ unsigned long long __attribute__((__always_inline__, __nodebug__, __target__("fsgsbase")))
_readgsbase_u64(void)
{
return __builtin_ia32_rdgsbase64();
}
-static __inline__ void __attribute__((__always_inline__, __nodebug__))
+static __inline__ void __attribute__((__always_inline__, __nodebug__, __target__("fsgsbase")))
_writefsbase_u32(unsigned int __V)
{
return __builtin_ia32_wrfsbase32(__V);
}
-static __inline__ void __attribute__((__always_inline__, __nodebug__))
+static __inline__ void __attribute__((__always_inline__, __nodebug__, __target__("fsgsbase")))
_writefsbase_u64(unsigned long long __V)
{
return __builtin_ia32_wrfsbase64(__V);
}
-static __inline__ void __attribute__((__always_inline__, __nodebug__))
+static __inline__ void __attribute__((__always_inline__, __nodebug__, __target__("fsgsbase")))
_writegsbase_u32(unsigned int __V)
{
return __builtin_ia32_wrgsbase32(__V);
}
-static __inline__ void __attribute__((__always_inline__, __nodebug__))
+static __inline__ void __attribute__((__always_inline__, __nodebug__, __target__("fsgsbase")))
_writegsbase_u64(unsigned long long __V)
{
return __builtin_ia32_wrgsbase64(__V);
}
#endif
-#endif /* __FSGSBASE__ */
-#ifdef __RTM__
#include <rtmintrin.h>
-#endif
-#ifdef __RTM__
#include <xtestintrin.h>
-#endif
-#ifdef __SHA__
#include <shaintrin.h>
-#endif
#include <fxsrintrin.h>
+#include <xsaveintrin.h>
+
+#include <xsaveoptintrin.h>
+
+#include <xsavecintrin.h>
+
+#include <xsavesintrin.h>
+
/* Some intrinsics inside adxintrin.h are available only on processors with ADX,
* whereas others are also available at all times. */
#include <adxintrin.h>
diff --git a/contrib/llvm/tools/clang/lib/Headers/lzcntintrin.h b/contrib/llvm/tools/clang/lib/Headers/lzcntintrin.h
index 8ee29975c2eb..4c00e42ac3a9 100644
--- a/contrib/llvm/tools/clang/lib/Headers/lzcntintrin.h
+++ b/contrib/llvm/tools/clang/lib/Headers/lzcntintrin.h
@@ -25,15 +25,11 @@
#error "Never use <lzcntintrin.h> directly; include <x86intrin.h> instead."
#endif
-#ifndef __LZCNT__
-# error "LZCNT instruction is not enabled"
-#endif /* __LZCNT__ */
-
#ifndef __LZCNTINTRIN_H
#define __LZCNTINTRIN_H
/* Define the default attributes for the functions in this file. */
-#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__))
+#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("lzcnt")))
static __inline__ unsigned short __DEFAULT_FN_ATTRS
__lzcnt16(unsigned short __X)
diff --git a/contrib/llvm/tools/clang/lib/Headers/mm3dnow.h b/contrib/llvm/tools/clang/lib/Headers/mm3dnow.h
index ac8e0f4af1bf..cb93faf2b6a4 100644
--- a/contrib/llvm/tools/clang/lib/Headers/mm3dnow.h
+++ b/contrib/llvm/tools/clang/lib/Headers/mm3dnow.h
@@ -30,7 +30,7 @@
typedef float __v2sf __attribute__((__vector_size__(8)));
/* Define the default attributes for the functions in this file. */
-#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__))
+#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("3dnow")))
static __inline__ void __DEFAULT_FN_ATTRS
_m_femms() {
@@ -132,6 +132,10 @@ _m_pmulhrw(__m64 __m1, __m64 __m2) {
return (__m64)__builtin_ia32_pmulhrw((__v4hi)__m1, (__v4hi)__m2);
}
+/* Handle the 3dnowa instructions here. */
+#undef __DEFAULT_FN_ATTRS
+#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("3dnowa")))
+
static __inline__ __m64 __DEFAULT_FN_ATTRS
_m_pf2iw(__m64 __m) {
return (__m64)__builtin_ia32_pf2iw((__v2sf)__m);
diff --git a/contrib/llvm/tools/clang/lib/Headers/mmintrin.h b/contrib/llvm/tools/clang/lib/Headers/mmintrin.h
index 0be5f32c7d02..162cb1aa1711 100644
--- a/contrib/llvm/tools/clang/lib/Headers/mmintrin.h
+++ b/contrib/llvm/tools/clang/lib/Headers/mmintrin.h
@@ -24,10 +24,6 @@
#ifndef __MMINTRIN_H
#define __MMINTRIN_H
-#ifndef __MMX__
-#error "MMX instruction set not enabled"
-#else
-
typedef long long __m64 __attribute__((__vector_size__(8)));
typedef int __v2si __attribute__((__vector_size__(8)));
@@ -35,7 +31,7 @@ typedef short __v4hi __attribute__((__vector_size__(8)));
typedef char __v8qi __attribute__((__vector_size__(8)));
/* Define the default attributes for the functions in this file. */
-#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__))
+#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("mmx")))
static __inline__ void __DEFAULT_FN_ATTRS
_mm_empty(void)
@@ -140,7 +136,7 @@ _mm_add_pi32(__m64 __m1, __m64 __m2)
}
static __inline__ __m64 __DEFAULT_FN_ATTRS
-_mm_adds_pi8(__m64 __m1, __m64 __m2)
+_mm_adds_pi8(__m64 __m1, __m64 __m2)
{
return (__m64)__builtin_ia32_paddsb((__v8qi)__m1, (__v8qi)__m2);
}
@@ -148,17 +144,17 @@ _mm_adds_pi8(__m64 __m1, __m64 __m2)
static __inline__ __m64 __DEFAULT_FN_ATTRS
_mm_adds_pi16(__m64 __m1, __m64 __m2)
{
- return (__m64)__builtin_ia32_paddsw((__v4hi)__m1, (__v4hi)__m2);
+ return (__m64)__builtin_ia32_paddsw((__v4hi)__m1, (__v4hi)__m2);
}
static __inline__ __m64 __DEFAULT_FN_ATTRS
-_mm_adds_pu8(__m64 __m1, __m64 __m2)
+_mm_adds_pu8(__m64 __m1, __m64 __m2)
{
return (__m64)__builtin_ia32_paddusb((__v8qi)__m1, (__v8qi)__m2);
}
-
+
static __inline__ __m64 __DEFAULT_FN_ATTRS
-_mm_adds_pu16(__m64 __m1, __m64 __m2)
+_mm_adds_pu16(__m64 __m1, __m64 __m2)
{
return (__m64)__builtin_ia32_paddusw((__v4hi)__m1, (__v4hi)__m2);
}
@@ -168,13 +164,13 @@ _mm_sub_pi8(__m64 __m1, __m64 __m2)
{
return (__m64)__builtin_ia32_psubb((__v8qi)__m1, (__v8qi)__m2);
}
-
+
static __inline__ __m64 __DEFAULT_FN_ATTRS
_mm_sub_pi16(__m64 __m1, __m64 __m2)
{
return (__m64)__builtin_ia32_psubw((__v4hi)__m1, (__v4hi)__m2);
}
-
+
static __inline__ __m64 __DEFAULT_FN_ATTRS
_mm_sub_pi32(__m64 __m1, __m64 __m2)
{
@@ -198,7 +194,7 @@ _mm_subs_pu8(__m64 __m1, __m64 __m2)
{
return (__m64)__builtin_ia32_psubusb((__v8qi)__m1, (__v8qi)__m2);
}
-
+
static __inline__ __m64 __DEFAULT_FN_ATTRS
_mm_subs_pu16(__m64 __m1, __m64 __m2)
{
@@ -216,9 +212,9 @@ _mm_mulhi_pi16(__m64 __m1, __m64 __m2)
{
return (__m64)__builtin_ia32_pmulhw((__v4hi)__m1, (__v4hi)__m2);
}
-
+
static __inline__ __m64 __DEFAULT_FN_ATTRS
-_mm_mullo_pi16(__m64 __m1, __m64 __m2)
+_mm_mullo_pi16(__m64 __m1, __m64 __m2)
{
return (__m64)__builtin_ia32_pmullw((__v4hi)__m1, (__v4hi)__m2);
}
@@ -232,7 +228,7 @@ _mm_sll_pi16(__m64 __m, __m64 __count)
static __inline__ __m64 __DEFAULT_FN_ATTRS
_mm_slli_pi16(__m64 __m, int __count)
{
- return (__m64)__builtin_ia32_psllwi((__v4hi)__m, __count);
+ return (__m64)__builtin_ia32_psllwi((__v4hi)__m, __count);
}
static __inline__ __m64 __DEFAULT_FN_ATTRS
@@ -256,13 +252,13 @@ _mm_sll_si64(__m64 __m, __m64 __count)
static __inline__ __m64 __DEFAULT_FN_ATTRS
_mm_slli_si64(__m64 __m, int __count)
{
- return (__m64)__builtin_ia32_psllqi(__m, __count);
+ return (__m64)__builtin_ia32_psllqi(__m, __count);
}
static __inline__ __m64 __DEFAULT_FN_ATTRS
_mm_sra_pi16(__m64 __m, __m64 __count)
{
- return (__m64)__builtin_ia32_psraw((__v4hi)__m, __count);
+ return (__m64)__builtin_ia32_psraw((__v4hi)__m, __count);
}
static __inline__ __m64 __DEFAULT_FN_ATTRS
@@ -274,7 +270,7 @@ _mm_srai_pi16(__m64 __m, int __count)
static __inline__ __m64 __DEFAULT_FN_ATTRS
_mm_sra_pi32(__m64 __m, __m64 __count)
{
- return (__m64)__builtin_ia32_psrad((__v2si)__m, __count);
+ return (__m64)__builtin_ia32_psrad((__v2si)__m, __count);
}
static __inline__ __m64 __DEFAULT_FN_ATTRS
@@ -286,19 +282,19 @@ _mm_srai_pi32(__m64 __m, int __count)
static __inline__ __m64 __DEFAULT_FN_ATTRS
_mm_srl_pi16(__m64 __m, __m64 __count)
{
- return (__m64)__builtin_ia32_psrlw((__v4hi)__m, __count);
+ return (__m64)__builtin_ia32_psrlw((__v4hi)__m, __count);
}
static __inline__ __m64 __DEFAULT_FN_ATTRS
_mm_srli_pi16(__m64 __m, int __count)
{
- return (__m64)__builtin_ia32_psrlwi((__v4hi)__m, __count);
+ return (__m64)__builtin_ia32_psrlwi((__v4hi)__m, __count);
}
static __inline__ __m64 __DEFAULT_FN_ATTRS
_mm_srl_pi32(__m64 __m, __m64 __count)
{
- return (__m64)__builtin_ia32_psrld((__v2si)__m, __count);
+ return (__m64)__builtin_ia32_psrld((__v2si)__m, __count);
}
static __inline__ __m64 __DEFAULT_FN_ATTRS
@@ -310,13 +306,13 @@ _mm_srli_pi32(__m64 __m, int __count)
static __inline__ __m64 __DEFAULT_FN_ATTRS
_mm_srl_si64(__m64 __m, __m64 __count)
{
- return (__m64)__builtin_ia32_psrlq(__m, __count);
+ return (__m64)__builtin_ia32_psrlq(__m, __count);
}
static __inline__ __m64 __DEFAULT_FN_ATTRS
_mm_srli_si64(__m64 __m, int __count)
{
- return (__m64)__builtin_ia32_psrlqi(__m, __count);
+ return (__m64)__builtin_ia32_psrlqi(__m, __count);
}
static __inline__ __m64 __DEFAULT_FN_ATTRS
@@ -447,7 +443,9 @@ _mm_setr_pi8(char __b0, char __b1, char __b2, char __b3, char __b4, char __b5,
/* Aliases for compatibility. */
#define _m_empty _mm_empty
#define _m_from_int _mm_cvtsi32_si64
+#define _m_from_int64 _mm_cvtsi64_m64
#define _m_to_int _mm_cvtsi64_si32
+#define _m_to_int64 _mm_cvtm64_si64
#define _m_packsswb _mm_packs_pi16
#define _m_packssdw _mm_packs_pi32
#define _m_packuswb _mm_packs_pu16
@@ -501,7 +499,5 @@ _mm_setr_pi8(char __b0, char __b1, char __b2, char __b3, char __b4, char __b5,
#define _m_pcmpgtw _mm_cmpgt_pi16
#define _m_pcmpgtd _mm_cmpgt_pi32
-#endif /* __MMX__ */
-
#endif /* __MMINTRIN_H */
diff --git a/contrib/llvm/tools/clang/lib/Headers/module.modulemap b/contrib/llvm/tools/clang/lib/Headers/module.modulemap
index b861fdd8c2a1..b147e891dceb 100644
--- a/contrib/llvm/tools/clang/lib/Headers/module.modulemap
+++ b/contrib/llvm/tools/clang/lib/Headers/module.modulemap
@@ -32,142 +32,117 @@ module _Builtin_intrinsics [system] [extern_c] {
}
explicit module cpuid {
- requires x86
header "cpuid.h"
}
explicit module mmx {
- requires mmx
header "mmintrin.h"
}
explicit module f16c {
- requires f16c
header "f16cintrin.h"
}
explicit module sse {
- requires sse
export mmx
export sse2 // note: for hackish <emmintrin.h> dependency
header "xmmintrin.h"
}
explicit module sse2 {
- requires sse2
export sse
header "emmintrin.h"
}
explicit module sse3 {
- requires sse3
export sse2
header "pmmintrin.h"
}
explicit module ssse3 {
- requires ssse3
export sse3
header "tmmintrin.h"
}
explicit module sse4_1 {
- requires sse41
export ssse3
header "smmintrin.h"
}
explicit module sse4_2 {
- requires sse42
export sse4_1
header "nmmintrin.h"
}
explicit module sse4a {
- requires sse4a
export sse3
header "ammintrin.h"
}
explicit module avx {
- requires avx
export sse4_2
header "avxintrin.h"
}
explicit module avx2 {
- requires avx2
export avx
header "avx2intrin.h"
}
explicit module avx512f {
- requires avx512f
export avx2
header "avx512fintrin.h"
}
explicit module avx512er {
- requires avx512er
header "avx512erintrin.h"
}
explicit module bmi {
- requires bmi
header "bmiintrin.h"
}
explicit module bmi2 {
- requires bmi2
header "bmi2intrin.h"
}
explicit module fma {
- requires fma
header "fmaintrin.h"
}
explicit module fma4 {
- requires fma4
export sse3
header "fma4intrin.h"
}
explicit module lzcnt {
- requires lzcnt
header "lzcntintrin.h"
}
explicit module popcnt {
- requires popcnt
header "popcntintrin.h"
}
explicit module mm3dnow {
- requires mm3dnow
header "mm3dnow.h"
}
explicit module xop {
- requires xop
export fma4
header "xopintrin.h"
}
explicit module aes_pclmul {
- requires aes, pclmul
header "wmmintrin.h"
export aes
export pclmul
}
explicit module aes {
- requires aes
header "__wmmintrin_aes.h"
}
explicit module pclmul {
- requires pclmul
header "__wmmintrin_pclmul.h"
}
}
diff --git a/contrib/llvm/tools/clang/lib/Headers/nmmintrin.h b/contrib/llvm/tools/clang/lib/Headers/nmmintrin.h
index f12622d7be68..57fec15963d1 100644
--- a/contrib/llvm/tools/clang/lib/Headers/nmmintrin.h
+++ b/contrib/llvm/tools/clang/lib/Headers/nmmintrin.h
@@ -24,12 +24,7 @@
#ifndef _NMMINTRIN_H
#define _NMMINTRIN_H
-#ifndef __SSE4_2__
-#error "SSE4.2 instruction set not enabled"
-#else
-
/* To match expectations of gcc we put the sse4.2 definitions into smmintrin.h,
just include it now then. */
#include <smmintrin.h>
-#endif /* __SSE4_2__ */
#endif /* _NMMINTRIN_H */
diff --git a/contrib/llvm/tools/clang/lib/Headers/pmmintrin.h b/contrib/llvm/tools/clang/lib/Headers/pmmintrin.h
index e1b8d9b603d1..0ff940912483 100644
--- a/contrib/llvm/tools/clang/lib/Headers/pmmintrin.h
+++ b/contrib/llvm/tools/clang/lib/Headers/pmmintrin.h
@@ -20,18 +20,14 @@
*
*===-----------------------------------------------------------------------===
*/
-
+
#ifndef __PMMINTRIN_H
#define __PMMINTRIN_H
-#ifndef __SSE3__
-#error "SSE3 instruction set not enabled"
-#else
-
#include <emmintrin.h>
/* Define the default attributes for the functions in this file. */
-#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__))
+#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("sse3")))
static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_lddqu_si128(__m128i const *__p)
@@ -117,6 +113,4 @@ _mm_mwait(unsigned __extensions, unsigned __hints)
#undef __DEFAULT_FN_ATTRS
-#endif /* __SSE3__ */
-
#endif /* __PMMINTRIN_H */
diff --git a/contrib/llvm/tools/clang/lib/Headers/popcntintrin.h b/contrib/llvm/tools/clang/lib/Headers/popcntintrin.h
index 1a4e9000aeb6..6fcda65c7807 100644
--- a/contrib/llvm/tools/clang/lib/Headers/popcntintrin.h
+++ b/contrib/llvm/tools/clang/lib/Headers/popcntintrin.h
@@ -21,15 +21,11 @@
*===-----------------------------------------------------------------------===
*/
-#ifndef __POPCNT__
-#error "POPCNT instruction set not enabled"
-#endif
-
#ifndef _POPCNTINTRIN_H
#define _POPCNTINTRIN_H
/* Define the default attributes for the functions in this file. */
-#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__))
+#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("popcnt")))
static __inline__ int __DEFAULT_FN_ATTRS
_mm_popcnt_u32(unsigned int __A)
@@ -37,12 +33,24 @@ _mm_popcnt_u32(unsigned int __A)
return __builtin_popcount(__A);
}
+static __inline__ int __DEFAULT_FN_ATTRS
+_popcnt32(int __A)
+{
+ return __builtin_popcount(__A);
+}
+
#ifdef __x86_64__
static __inline__ long long __DEFAULT_FN_ATTRS
_mm_popcnt_u64(unsigned long long __A)
{
return __builtin_popcountll(__A);
}
+
+static __inline__ long long __DEFAULT_FN_ATTRS
+_popcnt64(long long __A)
+{
+ return __builtin_popcountll(__A);
+}
#endif /* __x86_64__ */
#undef __DEFAULT_FN_ATTRS
diff --git a/contrib/llvm/tools/clang/lib/Headers/prfchwintrin.h b/contrib/llvm/tools/clang/lib/Headers/prfchwintrin.h
index 9825bd8c9700..ba0285751823 100644
--- a/contrib/llvm/tools/clang/lib/Headers/prfchwintrin.h
+++ b/contrib/llvm/tools/clang/lib/Headers/prfchwintrin.h
@@ -30,6 +30,12 @@
#if defined(__PRFCHW__) || defined(__3dNOW__)
static __inline__ void __attribute__((__always_inline__, __nodebug__))
+_m_prefetch(void *__P)
+{
+ __builtin_prefetch (__P, 0, 3 /* _MM_HINT_T0 */);
+}
+
+static __inline__ void __attribute__((__always_inline__, __nodebug__))
_m_prefetchw(void *__P)
{
__builtin_prefetch (__P, 1, 3 /* _MM_HINT_T0 */);
diff --git a/contrib/llvm/tools/clang/lib/Headers/rdseedintrin.h b/contrib/llvm/tools/clang/lib/Headers/rdseedintrin.h
index fdf7e18afa95..421f4ea48702 100644
--- a/contrib/llvm/tools/clang/lib/Headers/rdseedintrin.h
+++ b/contrib/llvm/tools/clang/lib/Headers/rdseedintrin.h
@@ -28,10 +28,8 @@
#ifndef __RDSEEDINTRIN_H
#define __RDSEEDINTRIN_H
-#ifdef __RDSEED__
-
/* Define the default attributes for the functions in this file. */
-#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__))
+#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("rdseed")))
static __inline__ int __DEFAULT_FN_ATTRS
_rdseed16_step(unsigned short *__p)
@@ -55,5 +53,4 @@ _rdseed64_step(unsigned long long *__p)
#undef __DEFAULT_FN_ATTRS
-#endif /* __RDSEED__ */
#endif /* __RDSEEDINTRIN_H */
diff --git a/contrib/llvm/tools/clang/lib/Headers/rtmintrin.h b/contrib/llvm/tools/clang/lib/Headers/rtmintrin.h
index 17256815fb8d..e6a58d743bc9 100644
--- a/contrib/llvm/tools/clang/lib/Headers/rtmintrin.h
+++ b/contrib/llvm/tools/clang/lib/Headers/rtmintrin.h
@@ -38,7 +38,7 @@
#define _XABORT_CODE(x) (((x) >> 24) & 0xFF)
/* Define the default attributes for the functions in this file. */
-#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__))
+#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("rtm")))
static __inline__ unsigned int __DEFAULT_FN_ATTRS
_xbegin(void)
diff --git a/contrib/llvm/tools/clang/lib/Headers/shaintrin.h b/contrib/llvm/tools/clang/lib/Headers/shaintrin.h
index 960cced7a55c..9b5d21800819 100644
--- a/contrib/llvm/tools/clang/lib/Headers/shaintrin.h
+++ b/contrib/llvm/tools/clang/lib/Headers/shaintrin.h
@@ -28,15 +28,11 @@
#ifndef __SHAINTRIN_H
#define __SHAINTRIN_H
-#if !defined (__SHA__)
-# error "SHA instructions not enabled"
-#endif
-
/* Define the default attributes for the functions in this file. */
-#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__))
+#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("sha")))
#define _mm_sha1rnds4_epu32(V1, V2, M) __extension__ ({ \
- __builtin_ia32_sha1rnds4((V1), (V2), (M)); })
+ __builtin_ia32_sha1rnds4((__v4si)(__m128i)(V1), (__v4si)(__m128i)(V2), (M)); })
static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_sha1nexte_epu32(__m128i __X, __m128i __Y)
diff --git a/contrib/llvm/tools/clang/lib/Headers/smmintrin.h b/contrib/llvm/tools/clang/lib/Headers/smmintrin.h
index 04bd0722b11f..69ad07f42ad6 100644
--- a/contrib/llvm/tools/clang/lib/Headers/smmintrin.h
+++ b/contrib/llvm/tools/clang/lib/Headers/smmintrin.h
@@ -24,14 +24,10 @@
#ifndef _SMMINTRIN_H
#define _SMMINTRIN_H
-#ifndef __SSE4_1__
-#error "SSE4.1 instruction set not enabled"
-#else
-
#include <tmmintrin.h>
/* Define the default attributes for the functions in this file. */
-#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__))
+#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("sse4.1")))
/* SSE4 Rounding macros. */
#define _MM_FROUND_TO_NEAREST_INT 0x00
@@ -61,35 +57,28 @@
#define _mm_floor_sd(X, Y) _mm_round_sd((X), (Y), _MM_FROUND_FLOOR)
#define _mm_round_ps(X, M) __extension__ ({ \
- __m128 __X = (X); \
- (__m128) __builtin_ia32_roundps((__v4sf)__X, (M)); })
+ (__m128)__builtin_ia32_roundps((__v4sf)(__m128)(X), (M)); })
#define _mm_round_ss(X, Y, M) __extension__ ({ \
- __m128 __X = (X); \
- __m128 __Y = (Y); \
- (__m128) __builtin_ia32_roundss((__v4sf)__X, (__v4sf)__Y, (M)); })
+ (__m128)__builtin_ia32_roundss((__v4sf)(__m128)(X), \
+ (__v4sf)(__m128)(Y), (M)); })
#define _mm_round_pd(X, M) __extension__ ({ \
- __m128d __X = (X); \
- (__m128d) __builtin_ia32_roundpd((__v2df)__X, (M)); })
+ (__m128d)__builtin_ia32_roundpd((__v2df)(__m128d)(X), (M)); })
#define _mm_round_sd(X, Y, M) __extension__ ({ \
- __m128d __X = (X); \
- __m128d __Y = (Y); \
- (__m128d) __builtin_ia32_roundsd((__v2df)__X, (__v2df)__Y, (M)); })
+ (__m128d)__builtin_ia32_roundsd((__v2df)(__m128d)(X), \
+ (__v2df)(__m128d)(Y), (M)); })
/* SSE4 Packed Blending Intrinsics. */
#define _mm_blend_pd(V1, V2, M) __extension__ ({ \
- __m128d __V1 = (V1); \
- __m128d __V2 = (V2); \
- (__m128d)__builtin_shufflevector((__v2df)__V1, (__v2df)__V2, \
+ (__m128d)__builtin_shufflevector((__v2df)(__m128d)(V1), \
+ (__v2df)(__m128d)(V2), \
(((M) & 0x01) ? 2 : 0), \
(((M) & 0x02) ? 3 : 1)); })
#define _mm_blend_ps(V1, V2, M) __extension__ ({ \
- __m128 __V1 = (V1); \
- __m128 __V2 = (V2); \
- (__m128)__builtin_shufflevector((__v4sf)__V1, (__v4sf)__V2, \
+ (__m128)__builtin_shufflevector((__v4sf)(__m128)(V1), (__v4sf)(__m128)(V2), \
(((M) & 0x01) ? 4 : 0), \
(((M) & 0x02) ? 5 : 1), \
(((M) & 0x04) ? 6 : 2), \
@@ -117,9 +106,8 @@ _mm_blendv_epi8 (__m128i __V1, __m128i __V2, __m128i __M)
}
#define _mm_blend_epi16(V1, V2, M) __extension__ ({ \
- __m128i __V1 = (V1); \
- __m128i __V2 = (V2); \
- (__m128i)__builtin_shufflevector((__v8hi)__V1, (__v8hi)__V2, \
+ (__m128i)__builtin_shufflevector((__v8hi)(__m128i)(V1), \
+ (__v8hi)(__m128i)(V2), \
(((M) & 0x01) ? 8 : 0), \
(((M) & 0x02) ? 9 : 1), \
(((M) & 0x04) ? 10 : 2), \
@@ -144,20 +132,18 @@ _mm_mul_epi32 (__m128i __V1, __m128i __V2)
/* SSE4 Floating Point Dot Product Instructions. */
#define _mm_dp_ps(X, Y, M) __extension__ ({ \
- __m128 __X = (X); \
- __m128 __Y = (Y); \
- (__m128) __builtin_ia32_dpps((__v4sf)__X, (__v4sf)__Y, (M)); })
+ (__m128) __builtin_ia32_dpps((__v4sf)(__m128)(X), \
+ (__v4sf)(__m128)(Y), (M)); })
#define _mm_dp_pd(X, Y, M) __extension__ ({\
- __m128d __X = (X); \
- __m128d __Y = (Y); \
- (__m128d) __builtin_ia32_dppd((__v2df)__X, (__v2df)__Y, (M)); })
+ (__m128d) __builtin_ia32_dppd((__v2df)(__m128d)(X), \
+ (__v2df)(__m128d)(Y), (M)); })
/* SSE4 Streaming Load Hint Instruction. */
static __inline__ __m128i __DEFAULT_FN_ATTRS
-_mm_stream_load_si128 (__m128i *__V)
+_mm_stream_load_si128 (__m128i const *__V)
{
- return (__m128i) __builtin_ia32_movntdqa ((__v2di *) __V);
+ return (__m128i) __builtin_ia32_movntdqa ((const __v2di *) __V);
}
/* SSE4 Packed Integer Min/Max Instructions. */
@@ -213,7 +199,7 @@ _mm_max_epu32 (__m128i __V1, __m128i __V2)
#define _mm_insert_ps(X, Y, N) __builtin_ia32_insertps128((X), (Y), (N))
#define _mm_extract_ps(X, N) (__extension__ \
({ union { int __i; float __f; } __t; \
- __v4sf __a = (__v4sf)(X); \
+ __v4sf __a = (__v4sf)(__m128)(X); \
__t.__f = __a[(N) & 3]; \
__t.__i;}))
@@ -221,39 +207,44 @@ _mm_max_epu32 (__m128i __V1, __m128i __V2)
/* Extract a single-precision float from X at index N into D. */
#define _MM_EXTRACT_FLOAT(D, X, N) (__extension__ ({ __v4sf __a = (__v4sf)(X); \
(D) = __a[N]; }))
-
+
/* Or together 2 sets of indexes (X and Y) with the zeroing bits (Z) to create
an index suitable for _mm_insert_ps. */
#define _MM_MK_INSERTPS_NDX(X, Y, Z) (((X) << 6) | ((Y) << 4) | (Z))
-
+
/* Extract a float from X at index N into the first index of the return. */
#define _MM_PICK_OUT_PS(X, N) _mm_insert_ps (_mm_setzero_ps(), (X), \
_MM_MK_INSERTPS_NDX((N), 0, 0x0e))
-
+
/* Insert int into packed integer array at index. */
-#define _mm_insert_epi8(X, I, N) (__extension__ ({ __v16qi __a = (__v16qi)(X); \
- __a[(N) & 15] = (I); \
- __a;}))
-#define _mm_insert_epi32(X, I, N) (__extension__ ({ __v4si __a = (__v4si)(X); \
- __a[(N) & 3] = (I); \
- __a;}))
+#define _mm_insert_epi8(X, I, N) (__extension__ \
+ ({ __v16qi __a = (__v16qi)(__m128i)(X); \
+ __a[(N) & 15] = (I); \
+ __a;}))
+#define _mm_insert_epi32(X, I, N) (__extension__ \
+ ({ __v4si __a = (__v4si)(__m128i)(X); \
+ __a[(N) & 3] = (I); \
+ __a;}))
#ifdef __x86_64__
-#define _mm_insert_epi64(X, I, N) (__extension__ ({ __v2di __a = (__v2di)(X); \
- __a[(N) & 1] = (I); \
- __a;}))
+#define _mm_insert_epi64(X, I, N) (__extension__ \
+ ({ __v2di __a = (__v2di)(__m128i)(X); \
+ __a[(N) & 1] = (I); \
+ __a;}))
#endif /* __x86_64__ */
/* Extract int from packed integer array at index. This returns the element
* as a zero extended value, so it is unsigned.
*/
-#define _mm_extract_epi8(X, N) (__extension__ ({ __v16qi __a = (__v16qi)(X); \
- (int)(unsigned char) \
- __a[(N) & 15];}))
-#define _mm_extract_epi32(X, N) (__extension__ ({ __v4si __a = (__v4si)(X); \
- __a[(N) & 3];}))
+#define _mm_extract_epi8(X, N) (__extension__ \
+ ({ __v16qi __a = (__v16qi)(__m128i)(X); \
+ (int)(unsigned char) __a[(N) & 15];}))
+#define _mm_extract_epi32(X, N) (__extension__ \
+ ({ __v4si __a = (__v4si)(__m128i)(X); \
+ (int)__a[(N) & 3];}))
#ifdef __x86_64__
-#define _mm_extract_epi64(X, N) (__extension__ ({ __v2di __a = (__v2di)(X); \
- __a[(N) & 1];}))
+#define _mm_extract_epi64(X, N) (__extension__ \
+ ({ __v2di __a = (__v2di)(__m128i)(X); \
+ (long long)__a[(N) & 1];}))
#endif /* __x86_64 */
/* SSE4 128-bit Packed Integer Comparisons. */
@@ -290,37 +281,44 @@ _mm_cmpeq_epi64(__m128i __V1, __m128i __V2)
static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_cvtepi8_epi16(__m128i __V)
{
- return (__m128i) __builtin_ia32_pmovsxbw128((__v16qi) __V);
+ /* This function always performs a signed extension, but __v16qi is a char
+ which may be signed or unsigned, so use __v16qs. */
+ return (__m128i)__builtin_convertvector(__builtin_shufflevector((__v16qs)__V, (__v16qs)__V, 0, 1, 2, 3, 4, 5, 6, 7), __v8hi);
}
static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_cvtepi8_epi32(__m128i __V)
{
- return (__m128i) __builtin_ia32_pmovsxbd128((__v16qi) __V);
+ /* This function always performs a signed extension, but __v16qi is a char
+ which may be signed or unsigned, so use __v16qs. */
+ return (__m128i)__builtin_convertvector(__builtin_shufflevector((__v16qs)__V, (__v16qs)__V, 0, 1, 2, 3), __v4si);
}
static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_cvtepi8_epi64(__m128i __V)
{
- return (__m128i) __builtin_ia32_pmovsxbq128((__v16qi) __V);
+ /* This function always performs a signed extension, but __v16qi is a char
+ which may be signed or unsigned, so use __v16qs. */
+ typedef signed char __v16qs __attribute__((__vector_size__(16)));
+ return (__m128i)__builtin_convertvector(__builtin_shufflevector((__v16qs)__V, (__v16qs)__V, 0, 1), __v2di);
}
static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_cvtepi16_epi32(__m128i __V)
{
- return (__m128i) __builtin_ia32_pmovsxwd128((__v8hi) __V);
+ return (__m128i)__builtin_convertvector(__builtin_shufflevector((__v8hi)__V, (__v8hi)__V, 0, 1, 2, 3), __v4si);
}
static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_cvtepi16_epi64(__m128i __V)
{
- return (__m128i) __builtin_ia32_pmovsxwq128((__v8hi)__V);
+ return (__m128i)__builtin_convertvector(__builtin_shufflevector((__v8hi)__V, (__v8hi)__V, 0, 1), __v2di);
}
static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_cvtepi32_epi64(__m128i __V)
{
- return (__m128i) __builtin_ia32_pmovsxdq128((__v4si)__V);
+ return (__m128i)__builtin_convertvector(__builtin_shufflevector((__v4si)__V, (__v4si)__V, 0, 1), __v2di);
}
/* SSE4 Packed Integer Zero-Extension. */
@@ -369,9 +367,8 @@ _mm_packus_epi32(__m128i __V1, __m128i __V2)
/* SSE4 Multiple Packed Sums of Absolute Difference. */
#define _mm_mpsadbw_epu8(X, Y, M) __extension__ ({ \
- __m128i __X = (X); \
- __m128i __Y = (Y); \
- (__m128i) __builtin_ia32_mpsadbw128((__v16qi)__X, (__v16qi)__Y, (M)); })
+ (__m128i) __builtin_ia32_mpsadbw128((__v16qi)(__m128i)(X), \
+ (__v16qi)(__m128i)(Y), (M)); })
static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_minpos_epu16(__m128i __V)
@@ -379,9 +376,13 @@ _mm_minpos_epu16(__m128i __V)
return (__m128i) __builtin_ia32_phminposuw128((__v8hi)__V);
}
+/* Handle the sse4.2 definitions here. */
+
/* These definitions are normally in nmmintrin.h, but gcc puts them in here
so we'll do the same. */
-#ifdef __SSE4_2__
+
+#undef __DEFAULT_FN_ATTRS
+#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("sse4.2")))
/* These specify the type of data that we're comparing. */
#define _SIDD_UBYTE_OPS 0x00
@@ -410,36 +411,59 @@ _mm_minpos_epu16(__m128i __V)
#define _SIDD_UNIT_MASK 0x40
/* SSE4.2 Packed Comparison Intrinsics. */
-#define _mm_cmpistrm(A, B, M) __builtin_ia32_pcmpistrm128((A), (B), (M))
-#define _mm_cmpistri(A, B, M) __builtin_ia32_pcmpistri128((A), (B), (M))
+#define _mm_cmpistrm(A, B, M) \
+ (__m128i)__builtin_ia32_pcmpistrm128((__v16qi)(__m128i)(A), \
+ (__v16qi)(__m128i)(B), (int)(M))
+#define _mm_cmpistri(A, B, M) \
+ (int)__builtin_ia32_pcmpistri128((__v16qi)(__m128i)(A), \
+ (__v16qi)(__m128i)(B), (int)(M))
#define _mm_cmpestrm(A, LA, B, LB, M) \
- __builtin_ia32_pcmpestrm128((A), (LA), (B), (LB), (M))
+ (__m128i)__builtin_ia32_pcmpestrm128((__v16qi)(__m128i)(A), (int)(LA), \
+ (__v16qi)(__m128i)(B), (int)(LB), \
+ (int)(M))
#define _mm_cmpestri(A, LA, B, LB, M) \
- __builtin_ia32_pcmpestri128((A), (LA), (B), (LB), (M))
-
+ (int)__builtin_ia32_pcmpestri128((__v16qi)(__m128i)(A), (int)(LA), \
+ (__v16qi)(__m128i)(B), (int)(LB), \
+ (int)(M))
+
/* SSE4.2 Packed Comparison Intrinsics and EFlag Reading. */
#define _mm_cmpistra(A, B, M) \
- __builtin_ia32_pcmpistria128((A), (B), (M))
+ (int)__builtin_ia32_pcmpistria128((__v16qi)(__m128i)(A), \
+ (__v16qi)(__m128i)(B), (int)(M))
#define _mm_cmpistrc(A, B, M) \
- __builtin_ia32_pcmpistric128((A), (B), (M))
+ (int)__builtin_ia32_pcmpistric128((__v16qi)(__m128i)(A), \
+ (__v16qi)(__m128i)(B), (int)(M))
#define _mm_cmpistro(A, B, M) \
- __builtin_ia32_pcmpistrio128((A), (B), (M))
+ (int)__builtin_ia32_pcmpistrio128((__v16qi)(__m128i)(A), \
+ (__v16qi)(__m128i)(B), (int)(M))
#define _mm_cmpistrs(A, B, M) \
- __builtin_ia32_pcmpistris128((A), (B), (M))
+ (int)__builtin_ia32_pcmpistris128((__v16qi)(__m128i)(A), \
+ (__v16qi)(__m128i)(B), (int)(M))
#define _mm_cmpistrz(A, B, M) \
- __builtin_ia32_pcmpistriz128((A), (B), (M))
+ (int)__builtin_ia32_pcmpistriz128((__v16qi)(__m128i)(A), \
+ (__v16qi)(__m128i)(B), (int)(M))
#define _mm_cmpestra(A, LA, B, LB, M) \
- __builtin_ia32_pcmpestria128((A), (LA), (B), (LB), (M))
+ (int)__builtin_ia32_pcmpestria128((__v16qi)(__m128i)(A), (int)(LA), \
+ (__v16qi)(__m128i)(B), (int)(LB), \
+ (int)(M))
#define _mm_cmpestrc(A, LA, B, LB, M) \
- __builtin_ia32_pcmpestric128((A), (LA), (B), (LB), (M))
+ (int)__builtin_ia32_pcmpestric128((__v16qi)(__m128i)(A), (int)(LA), \
+ (__v16qi)(__m128i)(B), (int)(LB), \
+ (int)(M))
#define _mm_cmpestro(A, LA, B, LB, M) \
- __builtin_ia32_pcmpestrio128((A), (LA), (B), (LB), (M))
+ (int)__builtin_ia32_pcmpestrio128((__v16qi)(__m128i)(A), (int)(LA), \
+ (__v16qi)(__m128i)(B), (int)(LB), \
+ (int)(M))
#define _mm_cmpestrs(A, LA, B, LB, M) \
- __builtin_ia32_pcmpestris128((A), (LA), (B), (LB), (M))
+ (int)__builtin_ia32_pcmpestris128((__v16qi)(__m128i)(A), (int)(LA), \
+ (__v16qi)(__m128i)(B), (int)(LB), \
+ (int)(M))
#define _mm_cmpestrz(A, LA, B, LB, M) \
- __builtin_ia32_pcmpestriz128((A), (LA), (B), (LB), (M))
+ (int)__builtin_ia32_pcmpestriz128((__v16qi)(__m128i)(A), (int)(LA), \
+ (__v16qi)(__m128i)(B), (int)(LB), \
+ (int)(M))
/* SSE4.2 Compare Packed Data -- Greater Than. */
static __inline__ __m128i __DEFAULT_FN_ATTRS
@@ -481,7 +505,4 @@ _mm_crc32_u64(unsigned long long __C, unsigned long long __D)
#include <popcntintrin.h>
#endif
-#endif /* __SSE4_2__ */
-#endif /* __SSE4_1__ */
-
#endif /* _SMMINTRIN_H */
diff --git a/contrib/llvm/tools/clang/lib/Headers/stdint.h b/contrib/llvm/tools/clang/lib/Headers/stdint.h
index 0303db90be1f..3f2fcbc57023 100644
--- a/contrib/llvm/tools/clang/lib/Headers/stdint.h
+++ b/contrib/llvm/tools/clang/lib/Headers/stdint.h
@@ -77,14 +77,14 @@
* C99 7.18.1.2 Minimum-width integer types.
* C99 7.18.1.3 Fastest minimum-width integer types.
*
- * The standard requires that exact-width type be defined for 8-, 16-, 32-, and
+ * The standard requires that exact-width type be defined for 8-, 16-, 32-, and
* 64-bit types if they are implemented. Other exact width types are optional.
* This implementation defines an exact-width types for every integer width
* that is represented in the standard integer types.
*
* The standard also requires minimum-width types be defined for 8-, 16-, 32-,
* and 64-bit widths regardless of whether there are corresponding exact-width
- * types.
+ * types.
*
* To accommodate targets that are missing types that are exactly 8, 16, 32, or
* 64 bits wide, this implementation takes an approach of cascading
@@ -97,7 +97,7 @@
* suboptimal.
*
* In violation of the standard, some targets do not implement a type that is
- * wide enough to represent all of the required widths (8-, 16-, 32-, 64-bit).
+ * wide enough to represent all of the required widths (8-, 16-, 32-, 64-bit).
* To accommodate these targets, a required minimum-width type is only
* defined if there exists an exact-width type of equal or greater width.
*/
@@ -247,7 +247,7 @@ typedef __uint_least8_t uint_fast8_t;
#endif /* __int_least8_t */
/* prevent glibc sys/types.h from defining conflicting types */
-#ifndef __int8_t_defined
+#ifndef __int8_t_defined
# define __int8_t_defined
#endif /* __int8_t_defined */
@@ -280,9 +280,9 @@ typedef __UINTMAX_TYPE__ uintmax_t;
*
* The standard requires that integer constant macros be defined for all the
* minimum-width types defined above. As 8-, 16-, 32-, and 64-bit minimum-width
- * types are required, the corresponding integer constant macros are defined
+ * types are required, the corresponding integer constant macros are defined
* here. This implementation also defines minimum-width types for every other
- * integer width that the target implements, so corresponding macros are
+ * integer width that the target implements, so corresponding macros are
* defined below, too.
*
* These macros are defined using the same successive-shrinking approach as
@@ -452,7 +452,7 @@ typedef __UINTMAX_TYPE__ uintmax_t;
#endif /* __int_least8_t */
-/* C99 7.18.2.1 Limits of exact-width integer types.
+/* C99 7.18.2.1 Limits of exact-width integer types.
* C99 7.18.2.2 Limits of minimum-width integer types.
* C99 7.18.2.3 Limits of fastest minimum-width integer types.
*
diff --git a/contrib/llvm/tools/clang/lib/Headers/tbmintrin.h b/contrib/llvm/tools/clang/lib/Headers/tbmintrin.h
index 48c0b07f423f..785961c6ab86 100644
--- a/contrib/llvm/tools/clang/lib/Headers/tbmintrin.h
+++ b/contrib/llvm/tools/clang/lib/Headers/tbmintrin.h
@@ -21,10 +21,6 @@
*===-----------------------------------------------------------------------===
*/
-#ifndef __TBM__
-#error "TBM instruction set is not enabled"
-#endif
-
#ifndef __X86INTRIN_H
#error "Never use <tbmintrin.h> directly; include <x86intrin.h> instead."
#endif
@@ -33,9 +29,11 @@
#define __TBMINTRIN_H
/* Define the default attributes for the functions in this file. */
-#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__))
+#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("tbm")))
-#define __bextri_u32(a, b) (__builtin_ia32_bextri_u32((a), (b)))
+#define __bextri_u32(a, b) \
+ ((unsigned int)__builtin_ia32_bextri_u32((unsigned int)(a), \
+ (unsigned int)(b)))
static __inline__ unsigned int __DEFAULT_FN_ATTRS
__blcfill_u32(unsigned int a)
@@ -92,7 +90,9 @@ __tzmsk_u32(unsigned int a)
}
#ifdef __x86_64__
-#define __bextri_u64(a, b) (__builtin_ia32_bextri_u64((a), (int)(b)))
+#define __bextri_u64(a, b) \
+ ((unsigned long long)__builtin_ia32_bextri_u64((unsigned long long)(a), \
+ (unsigned long long)(b)))
static __inline__ unsigned long long __DEFAULT_FN_ATTRS
__blcfill_u64(unsigned long long a)
diff --git a/contrib/llvm/tools/clang/lib/Headers/tgmath.h b/contrib/llvm/tools/clang/lib/Headers/tgmath.h
index a48e267e60d0..318e1185feee 100644
--- a/contrib/llvm/tools/clang/lib/Headers/tgmath.h
+++ b/contrib/llvm/tools/clang/lib/Headers/tgmath.h
@@ -490,7 +490,7 @@ static double _Complex
static long double _Complex
_TG_ATTRS
- __tg_pow(long double _Complex __x, long double _Complex __y)
+ __tg_pow(long double _Complex __x, long double _Complex __y)
{return cpowl(__x, __y);}
#undef pow
diff --git a/contrib/llvm/tools/clang/lib/Headers/tmmintrin.h b/contrib/llvm/tools/clang/lib/Headers/tmmintrin.h
index 2ecc730e90e9..0002890c1393 100644
--- a/contrib/llvm/tools/clang/lib/Headers/tmmintrin.h
+++ b/contrib/llvm/tools/clang/lib/Headers/tmmintrin.h
@@ -20,18 +20,14 @@
*
*===-----------------------------------------------------------------------===
*/
-
+
#ifndef __TMMINTRIN_H
#define __TMMINTRIN_H
-#ifndef __SSSE3__
-#error "SSSE3 instruction set not enabled"
-#else
-
#include <pmmintrin.h>
/* Define the default attributes for the functions in this file. */
-#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__))
+#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("ssse3")))
static __inline__ __m64 __DEFAULT_FN_ATTRS
_mm_abs_pi8(__m64 __a)
@@ -70,14 +66,11 @@ _mm_abs_epi32(__m128i __a)
}
#define _mm_alignr_epi8(a, b, n) __extension__ ({ \
- __m128i __a = (a); \
- __m128i __b = (b); \
- (__m128i)__builtin_ia32_palignr128((__v16qi)__a, (__v16qi)__b, (n)); })
+ (__m128i)__builtin_ia32_palignr128((__v16qi)(__m128i)(a), \
+ (__v16qi)(__m128i)(b), (n)); })
#define _mm_alignr_pi8(a, b, n) __extension__ ({ \
- __m64 __a = (a); \
- __m64 __b = (b); \
- (__m64)__builtin_ia32_palignr((__v8qi)__a, (__v8qi)__b, (n)); })
+ (__m64)__builtin_ia32_palignr((__v8qi)(__m64)(a), (__v8qi)(__m64)(b), (n)); })
static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_hadd_epi16(__m128i __a, __m128i __b)
@@ -225,6 +218,4 @@ _mm_sign_pi32(__m64 __a, __m64 __b)
#undef __DEFAULT_FN_ATTRS
-#endif /* __SSSE3__ */
-
#endif /* __TMMINTRIN_H */
diff --git a/contrib/llvm/tools/clang/lib/Headers/wmmintrin.h b/contrib/llvm/tools/clang/lib/Headers/wmmintrin.h
index 369e3c208e53..a2d931010aea 100644
--- a/contrib/llvm/tools/clang/lib/Headers/wmmintrin.h
+++ b/contrib/llvm/tools/clang/lib/Headers/wmmintrin.h
@@ -26,17 +26,8 @@
#include <emmintrin.h>
-#if !defined (__AES__) && !defined (__PCLMUL__)
-# error "AES/PCLMUL instructions not enabled"
-#else
-
-#ifdef __AES__
#include <__wmmintrin_aes.h>
-#endif /* __AES__ */
-#ifdef __PCLMUL__
#include <__wmmintrin_pclmul.h>
-#endif /* __PCLMUL__ */
-#endif /* __AES__ || __PCLMUL__ */
#endif /* _WMMINTRIN_H */
diff --git a/contrib/llvm/tools/clang/lib/Headers/x86intrin.h b/contrib/llvm/tools/clang/lib/Headers/x86intrin.h
index 21a43daf3c2d..4d8077e38291 100644
--- a/contrib/llvm/tools/clang/lib/Headers/x86intrin.h
+++ b/contrib/llvm/tools/clang/lib/Headers/x86intrin.h
@@ -28,53 +28,29 @@
#include <immintrin.h>
-#ifdef __3dNOW__
#include <mm3dnow.h>
-#endif
-#ifdef __BMI__
#include <bmiintrin.h>
-#endif
-#ifdef __BMI2__
#include <bmi2intrin.h>
-#endif
-#ifdef __LZCNT__
#include <lzcntintrin.h>
-#endif
-#ifdef __POPCNT__
#include <popcntintrin.h>
-#endif
-#ifdef __RDSEED__
#include <rdseedintrin.h>
-#endif
-#ifdef __PRFCHW__
#include <prfchwintrin.h>
-#endif
-#ifdef __SSE4A__
#include <ammintrin.h>
-#endif
-#ifdef __FMA4__
#include <fma4intrin.h>
-#endif
-#ifdef __XOP__
#include <xopintrin.h>
-#endif
-#ifdef __TBM__
#include <tbmintrin.h>
-#endif
-#ifdef __F16C__
#include <f16cintrin.h>
-#endif
/* FIXME: LWP */
diff --git a/contrib/llvm/tools/clang/lib/Headers/xmmintrin.h b/contrib/llvm/tools/clang/lib/Headers/xmmintrin.h
index 0d58c753029f..ae0b2cd1b26e 100644
--- a/contrib/llvm/tools/clang/lib/Headers/xmmintrin.h
+++ b/contrib/llvm/tools/clang/lib/Headers/xmmintrin.h
@@ -20,13 +20,9 @@
*
*===-----------------------------------------------------------------------===
*/
-
+
#ifndef __XMMINTRIN_H
#define __XMMINTRIN_H
-
-#ifndef __SSE__
-#error "SSE instruction set not enabled"
-#else
#include <mmintrin.h>
@@ -41,7 +37,7 @@ typedef float __m128 __attribute__((__vector_size__(16)));
#endif
/* Define the default attributes for the functions in this file. */
-#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__))
+#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("sse")))
static __inline__ __m128 __DEFAULT_FN_ATTRS
_mm_add_ss(__m128 __a, __m128 __b)
@@ -581,6 +577,12 @@ _mm_loadr_ps(const float *__p)
}
static __inline__ __m128 __DEFAULT_FN_ATTRS
+_mm_undefined_ps()
+{
+ return (__m128)__builtin_ia32_undef128();
+}
+
+static __inline__ __m128 __DEFAULT_FN_ATTRS
_mm_set_ss(float __w)
{
return (__m128){ __w, 0, 0, 0 };
@@ -752,8 +754,7 @@ _mm_mulhi_pu16(__m64 __a, __m64 __b)
}
#define _mm_shuffle_pi16(a, n) __extension__ ({ \
- __m64 __a = (a); \
- (__m64)__builtin_ia32_pshufw((__v4hi)__a, (n)); })
+ (__m64)__builtin_ia32_pshufw((__v4hi)(__m64)(a), (n)); })
static __inline__ void __DEFAULT_FN_ATTRS
_mm_maskmove_si64(__m64 __d, __m64 __n, char *__p)
@@ -792,9 +793,7 @@ _mm_setcsr(unsigned int __i)
}
#define _mm_shuffle_ps(a, b, mask) __extension__ ({ \
- __m128 __a = (a); \
- __m128 __b = (b); \
- (__m128)__builtin_shufflevector((__v4sf)__a, (__v4sf)__b, \
+ (__m128)__builtin_shufflevector((__v4sf)(__m128)(a), (__v4sf)(__m128)(b), \
(mask) & 0x3, ((mask) & 0xc) >> 2, \
(((mask) & 0x30) >> 4) + 4, \
(((mask) & 0xc0) >> 6) + 4); })
@@ -868,7 +867,7 @@ static __inline__ __m128 __DEFAULT_FN_ATTRS
_mm_cvtpi8_ps(__m64 __a)
{
__m64 __b;
-
+
__b = _mm_setzero_si64();
__b = _mm_cmpgt_pi8(__b, __a);
__b = _mm_unpacklo_pi8(__a, __b);
@@ -880,7 +879,7 @@ static __inline__ __m128 __DEFAULT_FN_ATTRS
_mm_cvtpu8_ps(__m64 __a)
{
__m64 __b;
-
+
__b = _mm_setzero_si64();
__b = _mm_unpacklo_pi8(__a, __b);
@@ -891,7 +890,7 @@ static __inline__ __m128 __DEFAULT_FN_ATTRS
_mm_cvtpi32x2_ps(__m64 __a, __m64 __b)
{
__m128 __c;
-
+
__c = _mm_setzero_ps();
__c = _mm_cvtpi32_ps(__c, __b);
__c = _mm_movelh_ps(__c, __c);
@@ -903,11 +902,11 @@ static __inline__ __m64 __DEFAULT_FN_ATTRS
_mm_cvtps_pi16(__m128 __a)
{
__m64 __b, __c;
-
+
__b = _mm_cvtps_pi32(__a);
__a = _mm_movehl_ps(__a, __a);
__c = _mm_cvtps_pi32(__a);
-
+
return _mm_packs_pi32(__b, __c);
}
@@ -915,10 +914,10 @@ static __inline__ __m64 __DEFAULT_FN_ATTRS
_mm_cvtps_pi8(__m128 __a)
{
__m64 __b, __c;
-
+
__b = _mm_cvtps_pi16(__a);
__c = _mm_setzero_si64();
-
+
return _mm_packs_pi16(__b, __c);
}
@@ -928,6 +927,11 @@ _mm_movemask_ps(__m128 __a)
return __builtin_ia32_movmskps(__a);
}
+
+#ifdef _MSC_VER
+#define _MM_ALIGN16 __declspec(align(16))
+#endif
+
#define _MM_SHUFFLE(z, y, x, w) (((z) << 6) | ((y) << 4) | ((x) << 2) | (w))
#define _MM_EXCEPT_INVALID (0x0001)
@@ -1003,6 +1007,4 @@ do { \
#include <emmintrin.h>
#endif
-#endif /* __SSE__ */
-
#endif /* __XMMINTRIN_H */
diff --git a/contrib/llvm/tools/clang/lib/Headers/xopintrin.h b/contrib/llvm/tools/clang/lib/Headers/xopintrin.h
index 2eb35c4be844..f07f51c27515 100644
--- a/contrib/llvm/tools/clang/lib/Headers/xopintrin.h
+++ b/contrib/llvm/tools/clang/lib/Headers/xopintrin.h
@@ -28,14 +28,10 @@
#ifndef __XOPINTRIN_H
#define __XOPINTRIN_H
-#ifndef __XOP__
-# error "XOP instruction set is not enabled"
-#else
-
#include <fma4intrin.h>
/* Define the default attributes for the functions in this file. */
-#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__))
+#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("xop")))
static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_maccs_epi16(__m128i __A, __m128i __B, __m128i __C)
@@ -242,20 +238,16 @@ _mm_rot_epi64(__m128i __A, __m128i __B)
}
#define _mm_roti_epi8(A, N) __extension__ ({ \
- __m128i __A = (A); \
- (__m128i)__builtin_ia32_vprotbi((__v16qi)__A, (N)); })
+ (__m128i)__builtin_ia32_vprotbi((__v16qi)(__m128i)(A), (N)); })
#define _mm_roti_epi16(A, N) __extension__ ({ \
- __m128i __A = (A); \
- (__m128i)__builtin_ia32_vprotwi((__v8hi)__A, (N)); })
+ (__m128i)__builtin_ia32_vprotwi((__v8hi)(__m128i)(A), (N)); })
#define _mm_roti_epi32(A, N) __extension__ ({ \
- __m128i __A = (A); \
- (__m128i)__builtin_ia32_vprotdi((__v4si)__A, (N)); })
+ (__m128i)__builtin_ia32_vprotdi((__v4si)(__m128i)(A), (N)); })
#define _mm_roti_epi64(A, N) __extension__ ({ \
- __m128i __A = (A); \
- (__m128i)__builtin_ia32_vprotqi((__v2di)__A, (N)); })
+ (__m128i)__builtin_ia32_vprotqi((__v2di)(__m128i)(A), (N)); })
static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_shl_epi8(__m128i __A, __m128i __B)
@@ -306,44 +298,36 @@ _mm_sha_epi64(__m128i __A, __m128i __B)
}
#define _mm_com_epu8(A, B, N) __extension__ ({ \
- __m128i __A = (A); \
- __m128i __B = (B); \
- (__m128i)__builtin_ia32_vpcomub((__v16qi)__A, (__v16qi)__B, (N)); })
+ (__m128i)__builtin_ia32_vpcomub((__v16qi)(__m128i)(A), \
+ (__v16qi)(__m128i)(B), (N)); })
#define _mm_com_epu16(A, B, N) __extension__ ({ \
- __m128i __A = (A); \
- __m128i __B = (B); \
- (__m128i)__builtin_ia32_vpcomuw((__v8hi)__A, (__v8hi)__B, (N)); })
+ (__m128i)__builtin_ia32_vpcomuw((__v8hi)(__m128i)(A), \
+ (__v8hi)(__m128i)(B), (N)); })
#define _mm_com_epu32(A, B, N) __extension__ ({ \
- __m128i __A = (A); \
- __m128i __B = (B); \
- (__m128i)__builtin_ia32_vpcomud((__v4si)__A, (__v4si)__B, (N)); })
+ (__m128i)__builtin_ia32_vpcomud((__v4si)(__m128i)(A), \
+ (__v4si)(__m128i)(B), (N)); })
#define _mm_com_epu64(A, B, N) __extension__ ({ \
- __m128i __A = (A); \
- __m128i __B = (B); \
- (__m128i)__builtin_ia32_vpcomuq((__v2di)__A, (__v2di)__B, (N)); })
+ (__m128i)__builtin_ia32_vpcomuq((__v2di)(__m128i)(A), \
+ (__v2di)(__m128i)(B), (N)); })
#define _mm_com_epi8(A, B, N) __extension__ ({ \
- __m128i __A = (A); \
- __m128i __B = (B); \
- (__m128i)__builtin_ia32_vpcomb((__v16qi)__A, (__v16qi)__B, (N)); })
+ (__m128i)__builtin_ia32_vpcomb((__v16qi)(__m128i)(A), \
+ (__v16qi)(__m128i)(B), (N)); })
#define _mm_com_epi16(A, B, N) __extension__ ({ \
- __m128i __A = (A); \
- __m128i __B = (B); \
- (__m128i)__builtin_ia32_vpcomw((__v8hi)__A, (__v8hi)__B, (N)); })
+ (__m128i)__builtin_ia32_vpcomw((__v8hi)(__m128i)(A), \
+ (__v8hi)(__m128i)(B), (N)); })
#define _mm_com_epi32(A, B, N) __extension__ ({ \
- __m128i __A = (A); \
- __m128i __B = (B); \
- (__m128i)__builtin_ia32_vpcomd((__v4si)__A, (__v4si)__B, (N)); })
+ (__m128i)__builtin_ia32_vpcomd((__v4si)(__m128i)(A), \
+ (__v4si)(__m128i)(B), (N)); })
#define _mm_com_epi64(A, B, N) __extension__ ({ \
- __m128i __A = (A); \
- __m128i __B = (B); \
- (__m128i)__builtin_ia32_vpcomq((__v2di)__A, (__v2di)__B, (N)); })
+ (__m128i)__builtin_ia32_vpcomq((__v2di)(__m128i)(A), \
+ (__v2di)(__m128i)(B), (N)); })
#define _MM_PCOMCTRL_LT 0
#define _MM_PCOMCTRL_LE 1
@@ -739,32 +723,23 @@ _mm_comtrue_epi64(__m128i __A, __m128i __B)
}
#define _mm_permute2_pd(X, Y, C, I) __extension__ ({ \
- __m128d __X = (X); \
- __m128d __Y = (Y); \
- __m128i __C = (C); \
- (__m128d)__builtin_ia32_vpermil2pd((__v2df)__X, (__v2df)__Y, \
- (__v2di)__C, (I)); })
+ (__m128d)__builtin_ia32_vpermil2pd((__v2df)(__m128d)(X), \
+ (__v2df)(__m128d)(Y), \
+ (__v2di)(__m128i)(C), (I)); })
#define _mm256_permute2_pd(X, Y, C, I) __extension__ ({ \
- __m256d __X = (X); \
- __m256d __Y = (Y); \
- __m256i __C = (C); \
- (__m256d)__builtin_ia32_vpermil2pd256((__v4df)__X, (__v4df)__Y, \
- (__v4di)__C, (I)); })
+ (__m256d)__builtin_ia32_vpermil2pd256((__v4df)(__m256d)(X), \
+ (__v4df)(__m256d)(Y), \
+ (__v4di)(__m256i)(C), (I)); })
#define _mm_permute2_ps(X, Y, C, I) __extension__ ({ \
- __m128 __X = (X); \
- __m128 __Y = (Y); \
- __m128i __C = (C); \
- (__m128)__builtin_ia32_vpermil2ps((__v4sf)__X, (__v4sf)__Y, \
- (__v4si)__C, (I)); })
+ (__m128)__builtin_ia32_vpermil2ps((__v4sf)(__m128)(X), (__v4sf)(__m128)(Y), \
+ (__v4si)(__m128i)(C), (I)); })
#define _mm256_permute2_ps(X, Y, C, I) __extension__ ({ \
- __m256 __X = (X); \
- __m256 __Y = (Y); \
- __m256i __C = (C); \
- (__m256)__builtin_ia32_vpermil2ps256((__v8sf)__X, (__v8sf)__Y, \
- (__v8si)__C, (I)); })
+ (__m256)__builtin_ia32_vpermil2ps256((__v8sf)(__m256)(X), \
+ (__v8sf)(__m256)(Y), \
+ (__v8si)(__m256i)(C), (I)); })
static __inline__ __m128 __DEFAULT_FN_ATTRS
_mm_frcz_ss(__m128 __A)
@@ -804,6 +779,4 @@ _mm256_frcz_pd(__m256d __A)
#undef __DEFAULT_FN_ATTRS
-#endif /* __XOP__ */
-
#endif /* __XOPINTRIN_H */
diff --git a/contrib/llvm/tools/clang/lib/Headers/xsavecintrin.h b/contrib/llvm/tools/clang/lib/Headers/xsavecintrin.h
new file mode 100644
index 000000000000..598470a682e2
--- /dev/null
+++ b/contrib/llvm/tools/clang/lib/Headers/xsavecintrin.h
@@ -0,0 +1,48 @@
+/*===---- xsavecintrin.h - XSAVEC intrinsic ------------------------------------===
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ *
+ *===-----------------------------------------------------------------------===
+ */
+
+#ifndef __IMMINTRIN_H
+#error "Never use <xsavecintrin.h> directly; include <immintrin.h> instead."
+#endif
+
+#ifndef __XSAVECINTRIN_H
+#define __XSAVECINTRIN_H
+
+/* Define the default attributes for the functions in this file. */
+#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("xsavec")))
+
+static __inline__ void __DEFAULT_FN_ATTRS
+_xsavec(void *__p, unsigned long long __m) {
+ __builtin_ia32_xsavec(__p, __m);
+}
+
+#ifdef __x86_64__
+static __inline__ void __DEFAULT_FN_ATTRS
+_xsavec64(void *__p, unsigned long long __m) {
+ __builtin_ia32_xsavec64(__p, __m);
+}
+#endif
+
+#undef __DEFAULT_FN_ATTRS
+
+#endif
diff --git a/contrib/llvm/tools/clang/lib/Headers/xsaveintrin.h b/contrib/llvm/tools/clang/lib/Headers/xsaveintrin.h
new file mode 100644
index 000000000000..a2e6b2e742ff
--- /dev/null
+++ b/contrib/llvm/tools/clang/lib/Headers/xsaveintrin.h
@@ -0,0 +1,58 @@
+/*===---- xsaveintrin.h - XSAVE intrinsic ------------------------------------===
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ *
+ *===-----------------------------------------------------------------------===
+ */
+
+#ifndef __IMMINTRIN_H
+#error "Never use <xsaveintrin.h> directly; include <immintrin.h> instead."
+#endif
+
+#ifndef __XSAVEINTRIN_H
+#define __XSAVEINTRIN_H
+
+/* Define the default attributes for the functions in this file. */
+#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("xsave")))
+
+static __inline__ void __DEFAULT_FN_ATTRS
+_xsave(void *__p, unsigned long long __m) {
+ return __builtin_ia32_xsave(__p, __m);
+}
+
+static __inline__ void __DEFAULT_FN_ATTRS
+_xrstor(void *__p, unsigned long long __m) {
+ return __builtin_ia32_xrstor(__p, __m);
+}
+
+#ifdef __x86_64__
+static __inline__ void __DEFAULT_FN_ATTRS
+_xsave64(void *__p, unsigned long long __m) {
+ return __builtin_ia32_xsave64(__p, __m);
+}
+
+static __inline__ void __DEFAULT_FN_ATTRS
+_xrstor64(void *__p, unsigned long long __m) {
+ return __builtin_ia32_xrstor64(__p, __m);
+}
+#endif
+
+#undef __DEFAULT_FN_ATTRS
+
+#endif
diff --git a/contrib/llvm/tools/clang/lib/Headers/xsaveoptintrin.h b/contrib/llvm/tools/clang/lib/Headers/xsaveoptintrin.h
new file mode 100644
index 000000000000..d3faae78be4f
--- /dev/null
+++ b/contrib/llvm/tools/clang/lib/Headers/xsaveoptintrin.h
@@ -0,0 +1,48 @@
+/*===---- xsaveoptintrin.h - XSAVEOPT intrinsic ------------------------------------===
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ *
+ *===-----------------------------------------------------------------------===
+ */
+
+#ifndef __IMMINTRIN_H
+#error "Never use <xsaveoptintrin.h> directly; include <immintrin.h> instead."
+#endif
+
+#ifndef __XSAVEOPTINTRIN_H
+#define __XSAVEOPTINTRIN_H
+
+/* Define the default attributes for the functions in this file. */
+#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("xsaveopt")))
+
+static __inline__ void __DEFAULT_FN_ATTRS
+_xsaveopt(void *__p, unsigned long long __m) {
+ return __builtin_ia32_xsaveopt(__p, __m);
+}
+
+#ifdef __x86_64__
+static __inline__ void __DEFAULT_FN_ATTRS
+_xsaveopt64(void *__p, unsigned long long __m) {
+ return __builtin_ia32_xsaveopt64(__p, __m);
+}
+#endif
+
+#undef __DEFAULT_FN_ATTRS
+
+#endif
diff --git a/contrib/llvm/tools/clang/lib/Headers/xsavesintrin.h b/contrib/llvm/tools/clang/lib/Headers/xsavesintrin.h
new file mode 100644
index 000000000000..c5e540a86edb
--- /dev/null
+++ b/contrib/llvm/tools/clang/lib/Headers/xsavesintrin.h
@@ -0,0 +1,58 @@
+/*===---- xsavesintrin.h - XSAVES intrinsic ------------------------------------===
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ *
+ *===-----------------------------------------------------------------------===
+ */
+
+#ifndef __IMMINTRIN_H
+#error "Never use <xsavesintrin.h> directly; include <immintrin.h> instead."
+#endif
+
+#ifndef __XSAVESINTRIN_H
+#define __XSAVESINTRIN_H
+
+/* Define the default attributes for the functions in this file. */
+#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("xsaves")))
+
+static __inline__ void __DEFAULT_FN_ATTRS
+_xsaves(void *__p, unsigned long long __m) {
+ __builtin_ia32_xsaves(__p, __m);
+}
+
+static __inline__ void __DEFAULT_FN_ATTRS
+_xrstors(void *__p, unsigned long long __m) {
+ __builtin_ia32_xrstors(__p, __m);
+}
+
+#ifdef __x86_64__
+static __inline__ void __DEFAULT_FN_ATTRS
+_xrstors64(void *__p, unsigned long long __m) {
+ __builtin_ia32_xrstors64(__p, __m);
+}
+
+static __inline__ void __DEFAULT_FN_ATTRS
+_xsaves64(void *__p, unsigned long long __m) {
+ __builtin_ia32_xsaves64(__p, __m);
+}
+#endif
+
+#undef __DEFAULT_FN_ATTRS
+
+#endif