diff options
Diffstat (limited to 'contrib/llvm/tools/clang/lib/Headers/avx512vlbwintrin.h')
-rw-r--r-- | contrib/llvm/tools/clang/lib/Headers/avx512vlbwintrin.h | 1328 |
1 files changed, 1199 insertions, 129 deletions
diff --git a/contrib/llvm/tools/clang/lib/Headers/avx512vlbwintrin.h b/contrib/llvm/tools/clang/lib/Headers/avx512vlbwintrin.h index b4542d69ab08..990e992a113f 100644 --- a/contrib/llvm/tools/clang/lib/Headers/avx512vlbwintrin.h +++ b/contrib/llvm/tools/clang/lib/Headers/avx512vlbwintrin.h @@ -31,6 +31,11 @@ /* Define the default attributes for the functions in this file. */ #define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("avx512vl,avx512bw"))) +static __inline __m128i __DEFAULT_FN_ATTRS +_mm_setzero_hi(void){ + return (__m128i)(__v8hi){ 0, 0, 0, 0, 0, 0, 0, 0 }; +} + /* Integer compare */ static __inline__ __mmask16 __DEFAULT_FN_ATTRS @@ -781,33 +786,33 @@ _mm_maskz_mullo_epi16 (__mmask8 __U, __m128i __A, __m128i __B) { static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_mask_blend_epi8 (__mmask16 __U, __m128i __A, __m128i __W) { - return (__m128i) __builtin_ia32_blendmb_128_mask ((__v16qi) __A, - (__v16qi) __W, - (__mmask16) __U); + return (__m128i) __builtin_ia32_selectb_128 ((__mmask16) __U, + (__v16qi) __W, + (__v16qi) __A); } static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_mask_blend_epi8 (__mmask32 __U, __m256i __A, __m256i __W) { - return (__m256i) __builtin_ia32_blendmb_256_mask ((__v32qi) __A, + return (__m256i) __builtin_ia32_selectb_256 ((__mmask32) __U, (__v32qi) __W, - (__mmask32) __U); + (__v32qi) __A); } static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_mask_blend_epi16 (__mmask8 __U, __m128i __A, __m128i __W) { - return (__m128i) __builtin_ia32_blendmw_128_mask ((__v8hi) __A, + return (__m128i) __builtin_ia32_selectw_128 ((__mmask8) __U, (__v8hi) __W, - (__mmask8) __U); + (__v8hi) __A); } static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_mask_blend_epi16 (__mmask16 __U, __m256i __A, __m256i __W) { - return (__m256i) __builtin_ia32_blendmw_256_mask ((__v16hi) __A, + return (__m256i) __builtin_ia32_selectw_256 ((__mmask16) __U, (__v16hi) __W, - (__mmask16) __U); + (__v16hi) __A); } static __inline__ __m128i __DEFAULT_FN_ATTRS @@ -1994,6 +1999,25 @@ _mm_maskz_cvtepi16_epi8 (__mmask8 __M, __m128i __A) { __M); } +static __inline__ void __DEFAULT_FN_ATTRS +_mm_mask_cvtepi16_storeu_epi8 (void * __P, __mmask8 __M, __m128i __A) +{ + __builtin_ia32_pmovwb128mem_mask ((__v16qi *) __P, (__v8hi) __A, __M); +} + + +static __inline__ void __DEFAULT_FN_ATTRS +_mm_mask_cvtsepi16_storeu_epi8 (void * __P, __mmask8 __M, __m128i __A) +{ + __builtin_ia32_pmovswb128mem_mask ((__v16qi *) __P, (__v8hi) __A, __M); +} + +static __inline__ void __DEFAULT_FN_ATTRS +_mm_mask_cvtusepi16_storeu_epi8 (void * __P, __mmask8 __M, __m128i __A) +{ + __builtin_ia32_pmovuswb128mem_mask ((__v16qi *) __P, (__v8hi) __A, __M); +} + static __inline__ __m128i __DEFAULT_FN_ATTRS _mm256_cvtepi16_epi8 (__m256i __A) { return (__m128i) __builtin_ia32_pmovwb256_mask ((__v16hi) __A, @@ -2015,6 +2039,23 @@ _mm256_maskz_cvtepi16_epi8 (__mmask16 __M, __m256i __A) { __M); } +static __inline__ void __DEFAULT_FN_ATTRS +_mm256_mask_cvtepi16_storeu_epi8 (void * __P, __mmask16 __M, __m256i __A) +{ + __builtin_ia32_pmovwb256mem_mask ((__v16qi *) __P, (__v16hi) __A, __M); +} + +static __inline__ void __DEFAULT_FN_ATTRS +_mm256_mask_cvtsepi16_storeu_epi8 (void * __P, __mmask16 __M, __m256i __A) +{ + __builtin_ia32_pmovswb256mem_mask ((__v16qi *) __P, (__v16hi) __A, __M); +} + +static __inline__ void __DEFAULT_FN_ATTRS +_mm256_mask_cvtusepi16_storeu_epi8 (void * __P, __mmask8 __M, __m256i __A) +{ + __builtin_ia32_pmovuswb256mem_mask ((__v16qi*) __P, (__v16hi) __A, __M); +} static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_mask_mulhrs_epi16 (__m128i __W, __mmask8 __U, __m128i __X, __m128i __Y) { return (__m128i) __builtin_ia32_pmulhrsw128_mask ((__v8hi) __X, @@ -2116,220 +2157,1249 @@ _mm256_maskz_mulhi_epi16 (__mmask16 __U, __m256i __A, __m256i __B) { } static __inline__ __m128i __DEFAULT_FN_ATTRS -_mm_mask_unpackhi_epi8 (__m128i __W, __mmask16 __U, __m128i __A, - __m128i __B) { - return (__m128i) __builtin_ia32_punpckhbw128_mask ((__v16qi) __A, - (__v16qi) __B, - (__v16qi) __W, - (__mmask16) __U); +_mm_mask_unpackhi_epi8(__m128i __W, __mmask16 __U, __m128i __A, __m128i __B) { + return (__m128i)__builtin_ia32_selectb_128((__mmask16)__U, + (__v16qi)_mm_unpackhi_epi8(__A, __B), + (__v16qi)__W); } static __inline__ __m128i __DEFAULT_FN_ATTRS -_mm_maskz_unpackhi_epi8 (__mmask16 __U, __m128i __A, __m128i __B) { - return (__m128i) __builtin_ia32_punpckhbw128_mask ((__v16qi) __A, - (__v16qi) __B, - (__v16qi) _mm_setzero_si128(), - (__mmask16) __U); +_mm_maskz_unpackhi_epi8(__mmask16 __U, __m128i __A, __m128i __B) { + return (__m128i)__builtin_ia32_selectb_128((__mmask16)__U, + (__v16qi)_mm_unpackhi_epi8(__A, __B), + (__v16qi)_mm_setzero_si128()); } static __inline__ __m256i __DEFAULT_FN_ATTRS -_mm256_mask_unpackhi_epi8 (__m256i __W, __mmask32 __U, __m256i __A, - __m256i __B) { - return (__m256i) __builtin_ia32_punpckhbw256_mask ((__v32qi) __A, - (__v32qi) __B, - (__v32qi) __W, - (__mmask32) __U); +_mm256_mask_unpackhi_epi8(__m256i __W, __mmask32 __U, __m256i __A, __m256i __B) { + return (__m256i)__builtin_ia32_selectb_256((__mmask32)__U, + (__v32qi)_mm256_unpackhi_epi8(__A, __B), + (__v32qi)__W); } static __inline__ __m256i __DEFAULT_FN_ATTRS -_mm256_maskz_unpackhi_epi8 (__mmask32 __U, __m256i __A, __m256i __B) { - return (__m256i) __builtin_ia32_punpckhbw256_mask ((__v32qi) __A, - (__v32qi) __B, - (__v32qi) _mm256_setzero_si256(), - (__mmask32) __U); +_mm256_maskz_unpackhi_epi8(__mmask32 __U, __m256i __A, __m256i __B) { + return (__m256i)__builtin_ia32_selectb_256((__mmask32)__U, + (__v32qi)_mm256_unpackhi_epi8(__A, __B), + (__v32qi)_mm256_setzero_si256()); } static __inline__ __m128i __DEFAULT_FN_ATTRS -_mm_mask_unpackhi_epi16 (__m128i __W, __mmask8 __U, __m128i __A, - __m128i __B) { - return (__m128i) __builtin_ia32_punpckhwd128_mask ((__v8hi) __A, - (__v8hi) __B, - (__v8hi) __W, - (__mmask8) __U); +_mm_mask_unpackhi_epi16(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) { + return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U, + (__v8hi)_mm_unpackhi_epi16(__A, __B), + (__v8hi)__W); } static __inline__ __m128i __DEFAULT_FN_ATTRS -_mm_maskz_unpackhi_epi16 (__mmask8 __U, __m128i __A, __m128i __B) { - return (__m128i) __builtin_ia32_punpckhwd128_mask ((__v8hi) __A, - (__v8hi) __B, - (__v8hi) _mm_setzero_si128(), - (__mmask8) __U); +_mm_maskz_unpackhi_epi16(__mmask8 __U, __m128i __A, __m128i __B) { + return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U, + (__v8hi)_mm_unpackhi_epi16(__A, __B), + (__v8hi) _mm_setzero_si128()); } static __inline__ __m256i __DEFAULT_FN_ATTRS -_mm256_mask_unpackhi_epi16 (__m256i __W, __mmask16 __U, __m256i __A, - __m256i __B) { - return (__m256i) __builtin_ia32_punpckhwd256_mask ((__v16hi) __A, - (__v16hi) __B, - (__v16hi) __W, - (__mmask16) __U); +_mm256_mask_unpackhi_epi16(__m256i __W, __mmask16 __U, __m256i __A, __m256i __B) { + return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U, + (__v16hi)_mm256_unpackhi_epi16(__A, __B), + (__v16hi)__W); } static __inline__ __m256i __DEFAULT_FN_ATTRS -_mm256_maskz_unpackhi_epi16 (__mmask16 __U, __m256i __A, __m256i __B) { - return (__m256i) __builtin_ia32_punpckhwd256_mask ((__v16hi) __A, - (__v16hi) __B, - (__v16hi) _mm256_setzero_si256(), - (__mmask16) __U); +_mm256_maskz_unpackhi_epi16(__mmask16 __U, __m256i __A, __m256i __B) { + return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U, + (__v16hi)_mm256_unpackhi_epi16(__A, __B), + (__v16hi)_mm256_setzero_si256()); } static __inline__ __m128i __DEFAULT_FN_ATTRS -_mm_mask_unpacklo_epi8 (__m128i __W, __mmask16 __U, __m128i __A, - __m128i __B) { - return (__m128i) __builtin_ia32_punpcklbw128_mask ((__v16qi) __A, - (__v16qi) __B, - (__v16qi) __W, - (__mmask16) __U); +_mm_mask_unpacklo_epi8(__m128i __W, __mmask16 __U, __m128i __A, __m128i __B) { + return (__m128i)__builtin_ia32_selectb_128((__mmask16)__U, + (__v16qi)_mm_unpacklo_epi8(__A, __B), + (__v16qi)__W); } static __inline__ __m128i __DEFAULT_FN_ATTRS -_mm_maskz_unpacklo_epi8 (__mmask16 __U, __m128i __A, __m128i __B) { - return (__m128i) __builtin_ia32_punpcklbw128_mask ((__v16qi) __A, - (__v16qi) __B, - (__v16qi) _mm_setzero_si128(), - (__mmask16) __U); +_mm_maskz_unpacklo_epi8(__mmask16 __U, __m128i __A, __m128i __B) { + return (__m128i)__builtin_ia32_selectb_128((__mmask16)__U, + (__v16qi)_mm_unpacklo_epi8(__A, __B), + (__v16qi)_mm_setzero_si128()); } static __inline__ __m256i __DEFAULT_FN_ATTRS -_mm256_mask_unpacklo_epi8 (__m256i __W, __mmask32 __U, __m256i __A, - __m256i __B) { - return (__m256i) __builtin_ia32_punpcklbw256_mask ((__v32qi) __A, - (__v32qi) __B, - (__v32qi) __W, - (__mmask32) __U); +_mm256_mask_unpacklo_epi8(__m256i __W, __mmask32 __U, __m256i __A, __m256i __B) { + return (__m256i)__builtin_ia32_selectb_256((__mmask32)__U, + (__v32qi)_mm256_unpacklo_epi8(__A, __B), + (__v32qi)__W); } static __inline__ __m256i __DEFAULT_FN_ATTRS -_mm256_maskz_unpacklo_epi8 (__mmask32 __U, __m256i __A, __m256i __B) { - return (__m256i) __builtin_ia32_punpcklbw256_mask ((__v32qi) __A, - (__v32qi) __B, - (__v32qi) _mm256_setzero_si256(), - (__mmask32) __U); +_mm256_maskz_unpacklo_epi8(__mmask32 __U, __m256i __A, __m256i __B) { + return (__m256i)__builtin_ia32_selectb_256((__mmask32)__U, + (__v32qi)_mm256_unpacklo_epi8(__A, __B), + (__v32qi)_mm256_setzero_si256()); } static __inline__ __m128i __DEFAULT_FN_ATTRS -_mm_mask_unpacklo_epi16 (__m128i __W, __mmask8 __U, __m128i __A, - __m128i __B) { - return (__m128i) __builtin_ia32_punpcklwd128_mask ((__v8hi) __A, - (__v8hi) __B, - (__v8hi) __W, - (__mmask8) __U); +_mm_mask_unpacklo_epi16(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) { + return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U, + (__v8hi)_mm_unpacklo_epi16(__A, __B), + (__v8hi)__W); } static __inline__ __m128i __DEFAULT_FN_ATTRS -_mm_maskz_unpacklo_epi16 (__mmask8 __U, __m128i __A, __m128i __B) { - return (__m128i) __builtin_ia32_punpcklwd128_mask ((__v8hi) __A, - (__v8hi) __B, - (__v8hi) _mm_setzero_si128(), - (__mmask8) __U); +_mm_maskz_unpacklo_epi16(__mmask8 __U, __m128i __A, __m128i __B) { + return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U, + (__v8hi)_mm_unpacklo_epi16(__A, __B), + (__v8hi) _mm_setzero_si128()); } static __inline__ __m256i __DEFAULT_FN_ATTRS -_mm256_mask_unpacklo_epi16 (__m256i __W, __mmask16 __U, __m256i __A, - __m256i __B) { - return (__m256i) __builtin_ia32_punpcklwd256_mask ((__v16hi) __A, - (__v16hi) __B, - (__v16hi) __W, - (__mmask16) __U); +_mm256_mask_unpacklo_epi16(__m256i __W, __mmask16 __U, __m256i __A, __m256i __B) { + return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U, + (__v16hi)_mm256_unpacklo_epi16(__A, __B), + (__v16hi)__W); } static __inline__ __m256i __DEFAULT_FN_ATTRS -_mm256_maskz_unpacklo_epi16 (__mmask16 __U, __m256i __A, __m256i __B) { - return (__m256i) __builtin_ia32_punpcklwd256_mask ((__v16hi) __A, - (__v16hi) __B, - (__v16hi) _mm256_setzero_si256(), - (__mmask16) __U); +_mm256_maskz_unpacklo_epi16(__mmask16 __U, __m256i __A, __m256i __B) { + return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U, + (__v16hi)_mm256_unpacklo_epi16(__A, __B), + (__v16hi)_mm256_setzero_si256()); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm_mask_cvtepi8_epi16 (__m128i __W, __mmask32 __U, __m128i __A) +{ + return (__m128i) __builtin_ia32_pmovsxbw128_mask ((__v16qi) __A, + (__v8hi) __W, + (__mmask8) __U); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm_maskz_cvtepi8_epi16 (__mmask8 __U, __m128i __A) +{ + return (__m128i) __builtin_ia32_pmovsxbw128_mask ((__v16qi) __A, + (__v8hi) + _mm_setzero_si128 (), + (__mmask8) __U); } +static __inline__ __m256i __DEFAULT_FN_ATTRS +_mm256_mask_cvtepi8_epi16 (__m256i __W, __mmask32 __U, __m128i __A) +{ + return (__m256i) __builtin_ia32_pmovsxbw256_mask ((__v16qi) __A, + (__v16hi) __W, + (__mmask16) __U); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS +_mm256_maskz_cvtepi8_epi16 (__mmask16 __U, __m128i __A) +{ + return (__m256i) __builtin_ia32_pmovsxbw256_mask ((__v16qi) __A, + (__v16hi) + _mm256_setzero_si256 (), + (__mmask16) __U); +} + + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm_mask_cvtepu8_epi16 (__m128i __W, __mmask32 __U, __m128i __A) +{ + return (__m128i) __builtin_ia32_pmovzxbw128_mask ((__v16qi) __A, + (__v8hi) __W, + (__mmask8) __U); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm_maskz_cvtepu8_epi16 (__mmask8 __U, __m128i __A) +{ + return (__m128i) __builtin_ia32_pmovzxbw128_mask ((__v16qi) __A, + (__v8hi) + _mm_setzero_si128 (), + (__mmask8) __U); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS +_mm256_mask_cvtepu8_epi16 (__m256i __W, __mmask32 __U, __m128i __A) +{ + return (__m256i) __builtin_ia32_pmovzxbw256_mask ((__v16qi) __A, + (__v16hi) __W, + (__mmask16) __U); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS +_mm256_maskz_cvtepu8_epi16 (__mmask16 __U, __m128i __A) +{ + return (__m256i) __builtin_ia32_pmovzxbw256_mask ((__v16qi) __A, + (__v16hi) + _mm256_setzero_si256 (), + (__mmask16) __U); +} + + #define _mm_cmp_epi8_mask(a, b, p) __extension__ ({ \ (__mmask16)__builtin_ia32_cmpb128_mask((__v16qi)(__m128i)(a), \ - (__v16qi)(__m128i)(b), \ - (p), (__mmask16)-1); }) + (__v16qi)(__m128i)(b), (int)(p), \ + (__mmask16)-1); }) #define _mm_mask_cmp_epi8_mask(m, a, b, p) __extension__ ({ \ (__mmask16)__builtin_ia32_cmpb128_mask((__v16qi)(__m128i)(a), \ - (__v16qi)(__m128i)(b), \ - (p), (__mmask16)(m)); }) + (__v16qi)(__m128i)(b), (int)(p), \ + (__mmask16)(m)); }) #define _mm_cmp_epu8_mask(a, b, p) __extension__ ({ \ (__mmask16)__builtin_ia32_ucmpb128_mask((__v16qi)(__m128i)(a), \ - (__v16qi)(__m128i)(b), \ - (p), (__mmask16)-1); }) + (__v16qi)(__m128i)(b), (int)(p), \ + (__mmask16)-1); }) #define _mm_mask_cmp_epu8_mask(m, a, b, p) __extension__ ({ \ (__mmask16)__builtin_ia32_ucmpb128_mask((__v16qi)(__m128i)(a), \ - (__v16qi)(__m128i)(b), \ - (p), (__mmask16)(m)); }) + (__v16qi)(__m128i)(b), (int)(p), \ + (__mmask16)(m)); }) #define _mm256_cmp_epi8_mask(a, b, p) __extension__ ({ \ (__mmask32)__builtin_ia32_cmpb256_mask((__v32qi)(__m256i)(a), \ - (__v32qi)(__m256i)(b), \ - (p), (__mmask32)-1); }) + (__v32qi)(__m256i)(b), (int)(p), \ + (__mmask32)-1); }) #define _mm256_mask_cmp_epi8_mask(m, a, b, p) __extension__ ({ \ (__mmask32)__builtin_ia32_cmpb256_mask((__v32qi)(__m256i)(a), \ - (__v32qi)(__m256i)(b), \ - (p), (__mmask32)(m)); }) + (__v32qi)(__m256i)(b), (int)(p), \ + (__mmask32)(m)); }) #define _mm256_cmp_epu8_mask(a, b, p) __extension__ ({ \ (__mmask32)__builtin_ia32_ucmpb256_mask((__v32qi)(__m256i)(a), \ - (__v32qi)(__m256i)(b), \ - (p), (__mmask32)-1); }) + (__v32qi)(__m256i)(b), (int)(p), \ + (__mmask32)-1); }) #define _mm256_mask_cmp_epu8_mask(m, a, b, p) __extension__ ({ \ (__mmask32)__builtin_ia32_ucmpb256_mask((__v32qi)(__m256i)(a), \ - (__v32qi)(__m256i)(b), \ - (p), (__mmask32)(m)); }) + (__v32qi)(__m256i)(b), (int)(p), \ + (__mmask32)(m)); }) #define _mm_cmp_epi16_mask(a, b, p) __extension__ ({ \ (__mmask8)__builtin_ia32_cmpw128_mask((__v8hi)(__m128i)(a), \ - (__v8hi)(__m128i)(b), \ - (p), (__mmask8)-1); }) + (__v8hi)(__m128i)(b), (int)(p), \ + (__mmask8)-1); }) #define _mm_mask_cmp_epi16_mask(m, a, b, p) __extension__ ({ \ (__mmask8)__builtin_ia32_cmpw128_mask((__v8hi)(__m128i)(a), \ - (__v8hi)(__m128i)(b), \ - (p), (__mmask8)(m)); }) + (__v8hi)(__m128i)(b), (int)(p), \ + (__mmask8)(m)); }) #define _mm_cmp_epu16_mask(a, b, p) __extension__ ({ \ (__mmask8)__builtin_ia32_ucmpw128_mask((__v8hi)(__m128i)(a), \ - (__v8hi)(__m128i)(b), \ - (p), (__mmask8)-1); }) + (__v8hi)(__m128i)(b), (int)(p), \ + (__mmask8)-1); }) #define _mm_mask_cmp_epu16_mask(m, a, b, p) __extension__ ({ \ (__mmask8)__builtin_ia32_ucmpw128_mask((__v8hi)(__m128i)(a), \ - (__v8hi)(__m128i)(b), \ - (p), (__mmask8)(m)); }) + (__v8hi)(__m128i)(b), (int)(p), \ + (__mmask8)(m)); }) #define _mm256_cmp_epi16_mask(a, b, p) __extension__ ({ \ (__mmask16)__builtin_ia32_cmpw256_mask((__v16hi)(__m256i)(a), \ - (__v16hi)(__m256i)(b), \ - (p), (__mmask16)-1); }) + (__v16hi)(__m256i)(b), (int)(p), \ + (__mmask16)-1); }) #define _mm256_mask_cmp_epi16_mask(m, a, b, p) __extension__ ({ \ (__mmask16)__builtin_ia32_cmpw256_mask((__v16hi)(__m256i)(a), \ - (__v16hi)(__m256i)(b), \ - (p), (__mmask16)(m)); }) + (__v16hi)(__m256i)(b), (int)(p), \ + (__mmask16)(m)); }) #define _mm256_cmp_epu16_mask(a, b, p) __extension__ ({ \ (__mmask16)__builtin_ia32_ucmpw256_mask((__v16hi)(__m256i)(a), \ - (__v16hi)(__m256i)(b), \ - (p), (__mmask16)-1); }) + (__v16hi)(__m256i)(b), (int)(p), \ + (__mmask16)-1); }) #define _mm256_mask_cmp_epu16_mask(m, a, b, p) __extension__ ({ \ (__mmask16)__builtin_ia32_ucmpw256_mask((__v16hi)(__m256i)(a), \ - (__v16hi)(__m256i)(b), \ - (p), (__mmask16)(m)); }) + (__v16hi)(__m256i)(b), (int)(p), \ + (__mmask16)(m)); }) + +#define _mm_mask_shufflehi_epi16(W, U, A, imm) __extension__ ({ \ + (__m128i)__builtin_ia32_selectw_128((__mmask8)(U), \ + (__v8hi)_mm_shufflehi_epi16((A), (imm)), \ + (__v8hi)(__m128i)(W)); }) + +#define _mm_maskz_shufflehi_epi16(U, A, imm) __extension__ ({ \ + (__m128i)__builtin_ia32_selectw_128((__mmask8)(U), \ + (__v8hi)_mm_shufflehi_epi16((A), (imm)), \ + (__v8hi)_mm_setzero_hi()); }) + +#define _mm256_mask_shufflehi_epi16(W, U, A, imm) __extension__ ({ \ + (__m256i)__builtin_ia32_selectw_256((__mmask16)(U), \ + (__v16hi)_mm256_shufflehi_epi16((A), (imm)), \ + (__v16hi)(__m256i)(W)); }) + +#define _mm256_maskz_shufflehi_epi16(U, A, imm) __extension__ ({ \ + (__m256i)__builtin_ia32_selectw_256((__mmask16)(U), \ + (__v16hi)_mm256_shufflehi_epi16((A), (imm)), \ + (__v16hi)_mm256_setzero_si256()); }) + +#define _mm_mask_shufflelo_epi16(W, U, A, imm) __extension__ ({ \ + (__m128i)__builtin_ia32_selectw_128((__mmask8)(U), \ + (__v8hi)_mm_shufflelo_epi16((A), (imm)), \ + (__v8hi)(__m128i)(W)); }) + +#define _mm_maskz_shufflelo_epi16(U, A, imm) __extension__ ({ \ + (__m128i)__builtin_ia32_selectw_128((__mmask8)(U), \ + (__v8hi)_mm_shufflelo_epi16((A), (imm)), \ + (__v8hi)_mm_setzero_hi()); }) + +#define _mm256_mask_shufflelo_epi16(W, U, A, imm) __extension__ ({ \ + (__m256i)__builtin_ia32_selectw_256((__mmask16)(U), \ + (__v16hi)_mm256_shufflelo_epi16((A), \ + (imm)), \ + (__v16hi)(__m256i)(W)); }) + +#define _mm256_maskz_shufflelo_epi16(U, A, imm) __extension__ ({ \ + (__m256i)__builtin_ia32_selectw_256((__mmask16)(U), \ + (__v16hi)_mm256_shufflelo_epi16((A), \ + (imm)), \ + (__v16hi)_mm256_setzero_si256()); }) + +static __inline__ __m256i __DEFAULT_FN_ATTRS +_mm256_sllv_epi16 (__m256i __A, __m256i __B) +{ + return (__m256i) __builtin_ia32_psllv16hi_mask ((__v16hi) __A, + (__v16hi) __B, + (__v16hi) + _mm256_setzero_si256 (), + (__mmask16) -1); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS +_mm256_mask_sllv_epi16 (__m256i __W, __mmask16 __U, __m256i __A, + __m256i __B) +{ + return (__m256i) __builtin_ia32_psllv16hi_mask ((__v16hi) __A, + (__v16hi) __B, + (__v16hi) __W, + (__mmask16) __U); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS +_mm256_maskz_sllv_epi16 (__mmask16 __U, __m256i __A, __m256i __B) +{ + return (__m256i) __builtin_ia32_psllv16hi_mask ((__v16hi) __A, + (__v16hi) __B, + (__v16hi) + _mm256_setzero_si256 (), + (__mmask16) __U); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm_sllv_epi16 (__m128i __A, __m128i __B) +{ + return (__m128i) __builtin_ia32_psllv8hi_mask ((__v8hi) __A, + (__v8hi) __B, + (__v8hi) + _mm_setzero_hi (), + (__mmask8) -1); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm_mask_sllv_epi16 (__m128i __W, __mmask8 __U, __m128i __A, + __m128i __B) +{ + return (__m128i) __builtin_ia32_psllv8hi_mask ((__v8hi) __A, + (__v8hi) __B, + (__v8hi) __W, + (__mmask8) __U); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm_maskz_sllv_epi16 (__mmask8 __U, __m128i __A, __m128i __B) +{ + return (__m128i) __builtin_ia32_psllv8hi_mask ((__v8hi) __A, + (__v8hi) __B, + (__v8hi) + _mm_setzero_si128 (), + (__mmask8) __U); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm_mask_sll_epi16 (__m128i __W, __mmask8 __U, __m128i __A, + __m128i __B) +{ + return (__m128i) __builtin_ia32_psllw128_mask ((__v8hi) __A, + (__v8hi) __B, + (__v8hi) __W, + (__mmask8) __U); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm_maskz_sll_epi16 (__mmask8 __U, __m128i __A, __m128i __B) +{ + return (__m128i) __builtin_ia32_psllw128_mask ((__v8hi) __A, + (__v8hi) __B, + (__v8hi) + _mm_setzero_si128 (), + (__mmask8) __U); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS +_mm256_mask_sll_epi16 (__m256i __W, __mmask16 __U, __m256i __A, + __m128i __B) +{ + return (__m256i) __builtin_ia32_psllw256_mask ((__v16hi) __A, + (__v8hi) __B, + (__v16hi) __W, + (__mmask16) __U); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS +_mm256_maskz_sll_epi16 (__mmask16 __U, __m256i __A, __m128i __B) +{ + return (__m256i) __builtin_ia32_psllw256_mask ((__v16hi) __A, + (__v8hi) __B, + (__v16hi) + _mm256_setzero_si256 (), + (__mmask16) __U); +} + +#define _mm_mask_slli_epi16(W, U, A, B) __extension__ ({ \ + (__m128i)__builtin_ia32_psllwi128_mask((__v8hi)(__m128i)(A), (int)(B), \ + (__v8hi)(__m128i)(W), \ + (__mmask8)(U)); }) + +#define _mm_maskz_slli_epi16(U, A, B) __extension__ ({ \ + (__m128i)__builtin_ia32_psllwi128_mask((__v8hi)(__m128i)(A), (int)(B), \ + (__v8hi)_mm_setzero_si128(), \ + (__mmask8)(U)); }) + +#define _mm256_mask_slli_epi16(W, U, A, B) __extension__ ({ \ + (__m256i)__builtin_ia32_psllwi256_mask((__v16hi)(__m256i)(A), (int)(B), \ + (__v16hi)(__m256i)(W), \ + (__mmask16)(U)); }) + +#define _mm256_maskz_slli_epi16(U, A, B) __extension__ ({ \ + (__m256i)__builtin_ia32_psllwi256_mask((__v16hi)(__m256i)(A), (int)(B), \ + (__v16hi)_mm256_setzero_si256(), \ + (__mmask16)(U)); }) + + + +static __inline__ __m256i __DEFAULT_FN_ATTRS +_mm256_srlv_epi16 (__m256i __A, __m256i __B) +{ + return (__m256i) __builtin_ia32_psrlv16hi_mask ((__v16hi) __A, + (__v16hi) __B, + (__v16hi) + _mm256_setzero_si256 (), + (__mmask16) -1); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS +_mm256_mask_srlv_epi16 (__m256i __W, __mmask16 __U, __m256i __A, + __m256i __B) +{ + return (__m256i) __builtin_ia32_psrlv16hi_mask ((__v16hi) __A, + (__v16hi) __B, + (__v16hi) __W, + (__mmask16) __U); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS +_mm256_maskz_srlv_epi16 (__mmask16 __U, __m256i __A, __m256i __B) +{ + return (__m256i) __builtin_ia32_psrlv16hi_mask ((__v16hi) __A, + (__v16hi) __B, + (__v16hi) + _mm256_setzero_si256 (), + (__mmask16) __U); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm_srlv_epi16 (__m128i __A, __m128i __B) +{ + return (__m128i) __builtin_ia32_psrlv8hi_mask ((__v8hi) __A, + (__v8hi) __B, + (__v8hi) + _mm_setzero_hi (), + (__mmask8) -1); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm_mask_srlv_epi16 (__m128i __W, __mmask8 __U, __m128i __A, + __m128i __B) +{ + return (__m128i) __builtin_ia32_psrlv8hi_mask ((__v8hi) __A, + (__v8hi) __B, + (__v8hi) __W, + (__mmask8) __U); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm_maskz_srlv_epi16 (__mmask8 __U, __m128i __A, __m128i __B) +{ + return (__m128i) __builtin_ia32_psrlv8hi_mask ((__v8hi) __A, + (__v8hi) __B, + (__v8hi) + _mm_setzero_si128 (), + (__mmask8) __U); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS +_mm256_srav_epi16 (__m256i __A, __m256i __B) +{ + return (__m256i) __builtin_ia32_psrav16hi_mask ((__v16hi) __A, + (__v16hi) __B, + (__v16hi) + _mm256_setzero_si256 (), + (__mmask16) -1); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS +_mm256_mask_srav_epi16 (__m256i __W, __mmask16 __U, __m256i __A, + __m256i __B) +{ + return (__m256i) __builtin_ia32_psrav16hi_mask ((__v16hi) __A, + (__v16hi) __B, + (__v16hi) __W, + (__mmask16) __U); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS +_mm256_maskz_srav_epi16 (__mmask16 __U, __m256i __A, __m256i __B) +{ + return (__m256i) __builtin_ia32_psrav16hi_mask ((__v16hi) __A, + (__v16hi) __B, + (__v16hi) + _mm256_setzero_si256 (), + (__mmask16) __U); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm_srav_epi16 (__m128i __A, __m128i __B) +{ + return (__m128i) __builtin_ia32_psrav8hi_mask ((__v8hi) __A, + (__v8hi) __B, + (__v8hi) + _mm_setzero_hi (), + (__mmask8) -1); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm_mask_srav_epi16 (__m128i __W, __mmask8 __U, __m128i __A, + __m128i __B) +{ + return (__m128i) __builtin_ia32_psrav8hi_mask ((__v8hi) __A, + (__v8hi) __B, + (__v8hi) __W, + (__mmask8) __U); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm_maskz_srav_epi16 (__mmask8 __U, __m128i __A, __m128i __B) +{ + return (__m128i) __builtin_ia32_psrav8hi_mask ((__v8hi) __A, + (__v8hi) __B, + (__v8hi) + _mm_setzero_si128 (), + (__mmask8) __U); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm_mask_sra_epi16 (__m128i __W, __mmask8 __U, __m128i __A, + __m128i __B) +{ + return (__m128i) __builtin_ia32_psraw128_mask ((__v8hi) __A, + (__v8hi) __B, + (__v8hi) __W, + (__mmask8) __U); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm_maskz_sra_epi16 (__mmask8 __U, __m128i __A, __m128i __B) +{ + return (__m128i) __builtin_ia32_psraw128_mask ((__v8hi) __A, + (__v8hi) __B, + (__v8hi) + _mm_setzero_si128 (), + (__mmask8) __U); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS +_mm256_mask_sra_epi16 (__m256i __W, __mmask16 __U, __m256i __A, + __m128i __B) +{ + return (__m256i) __builtin_ia32_psraw256_mask ((__v16hi) __A, + (__v8hi) __B, + (__v16hi) __W, + (__mmask16) __U); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS +_mm256_maskz_sra_epi16 (__mmask16 __U, __m256i __A, __m128i __B) +{ + return (__m256i) __builtin_ia32_psraw256_mask ((__v16hi) __A, + (__v8hi) __B, + (__v16hi) + _mm256_setzero_si256 (), + (__mmask16) __U); +} + +#define _mm_mask_srai_epi16(W, U, A, imm) __extension__ ({ \ + (__m128i)__builtin_ia32_psrawi128_mask((__v8hi)(__m128i)(A), (int)(imm), \ + (__v8hi)(__m128i)(W), \ + (__mmask8)(U)); }) + +#define _mm_maskz_srai_epi16(U, A, imm) __extension__ ({ \ + (__m128i)__builtin_ia32_psrawi128_mask((__v8hi)(__m128i)(A), (int)(imm), \ + (__v8hi)_mm_setzero_si128(), \ + (__mmask8)(U)); }) + +#define _mm256_mask_srai_epi16(W, U, A, imm) __extension__ ({ \ + (__m256i)__builtin_ia32_psrawi256_mask((__v16hi)(__m256i)(A), (int)(imm), \ + (__v16hi)(__m256i)(W), \ + (__mmask16)(U)); }) + +#define _mm256_maskz_srai_epi16(U, A, imm) __extension__ ({ \ + (__m256i)__builtin_ia32_psrawi256_mask((__v16hi)(__m256i)(A), (int)(imm), \ + (__v16hi)_mm256_setzero_si256(), \ + (__mmask16)(U)); }) + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm_mask_srl_epi16 (__m128i __W, __mmask8 __U, __m128i __A, + __m128i __B) +{ + return (__m128i) __builtin_ia32_psrlw128_mask ((__v8hi) __A, + (__v8hi) __B, + (__v8hi) __W, + (__mmask8) __U); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm_maskz_srl_epi16 (__mmask8 __U, __m128i __A, __m128i __B) +{ + return (__m128i) __builtin_ia32_psrlw128_mask ((__v8hi) __A, + (__v8hi) __B, + (__v8hi) + _mm_setzero_si128 (), + (__mmask8) __U); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS +_mm256_mask_srl_epi16 (__m256i __W, __mmask16 __U, __m256i __A, + __m128i __B) +{ + return (__m256i) __builtin_ia32_psrlw256_mask ((__v16hi) __A, + (__v8hi) __B, + (__v16hi) __W, + (__mmask16) __U); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS +_mm256_maskz_srl_epi16 (__mmask16 __U, __m256i __A, __m128i __B) +{ + return (__m256i) __builtin_ia32_psrlw256_mask ((__v16hi) __A, + (__v8hi) __B, + (__v16hi) + _mm256_setzero_si256 (), + (__mmask16) __U); +} + +#define _mm_mask_srli_epi16(W, U, A, imm) __extension__ ({ \ + (__m128i)__builtin_ia32_psrlwi128_mask((__v8hi)(__m128i)(A), (int)(imm), \ + (__v8hi)(__m128i)(W), \ + (__mmask8)(U)); }) + +#define _mm_maskz_srli_epi16(U, A, imm) __extension__ ({ \ + (__m128i)__builtin_ia32_psrlwi128_mask((__v8hi)(__m128i)(A), (int)(imm), \ + (__v8hi)_mm_setzero_si128(), \ + (__mmask8)(U)); }) + +#define _mm256_mask_srli_epi16(W, U, A, imm) __extension__ ({ \ + (__m256i)__builtin_ia32_psrlwi256_mask((__v16hi)(__m256i)(A), (int)(imm), \ + (__v16hi)(__m256i)(W), \ + (__mmask16)(U)); }) + +#define _mm256_maskz_srli_epi16(U, A, imm) __extension__ ({ \ + (__m256i)__builtin_ia32_psrlwi256_mask((__v16hi)(__m256i)(A), (int)(imm), \ + (__v16hi)_mm256_setzero_si256(), \ + (__mmask16)(U)); }) + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm_mask_mov_epi16 (__m128i __W, __mmask8 __U, __m128i __A) +{ + return (__m128i) __builtin_ia32_selectw_128 ((__mmask8) __U, + (__v8hi) __A, + (__v8hi) __W); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm_maskz_mov_epi16 (__mmask8 __U, __m128i __A) +{ + return (__m128i) __builtin_ia32_selectw_128 ((__mmask8) __U, + (__v8hi) __A, + (__v8hi) _mm_setzero_hi ()); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS +_mm256_mask_mov_epi16 (__m256i __W, __mmask16 __U, __m256i __A) +{ + return (__m256i) __builtin_ia32_selectw_256 ((__mmask16) __U, + (__v16hi) __A, + (__v16hi) __W); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS +_mm256_maskz_mov_epi16 (__mmask16 __U, __m256i __A) +{ + return (__m256i) __builtin_ia32_selectw_256 ((__mmask16) __U, + (__v16hi) __A, + (__v16hi) _mm256_setzero_si256 ()); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm_mask_mov_epi8 (__m128i __W, __mmask16 __U, __m128i __A) +{ + return (__m128i) __builtin_ia32_selectb_128 ((__mmask16) __U, + (__v16qi) __A, + (__v16qi) __W); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm_maskz_mov_epi8 (__mmask16 __U, __m128i __A) +{ + return (__m128i) __builtin_ia32_selectb_128 ((__mmask16) __U, + (__v16qi) __A, + (__v16qi) _mm_setzero_hi ()); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS +_mm256_mask_mov_epi8 (__m256i __W, __mmask32 __U, __m256i __A) +{ + return (__m256i) __builtin_ia32_selectb_256 ((__mmask32) __U, + (__v32qi) __A, + (__v32qi) __W); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS +_mm256_maskz_mov_epi8 (__mmask32 __U, __m256i __A) +{ + return (__m256i) __builtin_ia32_selectb_256 ((__mmask32) __U, + (__v32qi) __A, + (__v32qi) _mm256_setzero_si256 ()); +} + + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm_mask_set1_epi8 (__m128i __O, __mmask16 __M, char __A) +{ + return (__m128i) __builtin_ia32_pbroadcastb128_gpr_mask (__A, + (__v16qi) __O, + __M); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm_maskz_set1_epi8 (__mmask16 __M, char __A) +{ + return (__m128i) __builtin_ia32_pbroadcastb128_gpr_mask (__A, + (__v16qi) + _mm_setzero_si128 (), + __M); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS +_mm256_mask_set1_epi8 (__m256i __O, __mmask32 __M, char __A) +{ + return (__m256i) __builtin_ia32_pbroadcastb256_gpr_mask (__A, + (__v32qi) __O, + __M); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS +_mm256_maskz_set1_epi8 (__mmask32 __M, char __A) +{ + return (__m256i) __builtin_ia32_pbroadcastb256_gpr_mask (__A, + (__v32qi) + _mm256_setzero_si256 (), + __M); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm_mask_loadu_epi16 (__m128i __W, __mmask8 __U, void const *__P) +{ + return (__m128i) __builtin_ia32_loaddquhi128_mask ((__v8hi *) __P, + (__v8hi) __W, + (__mmask8) __U); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm_maskz_loadu_epi16 (__mmask8 __U, void const *__P) +{ + return (__m128i) __builtin_ia32_loaddquhi128_mask ((__v8hi *) __P, + (__v8hi) + _mm_setzero_hi (), + (__mmask8) __U); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS +_mm256_mask_loadu_epi16 (__m256i __W, __mmask16 __U, void const *__P) +{ + return (__m256i) __builtin_ia32_loaddquhi256_mask ((__v16hi *) __P, + (__v16hi) __W, + (__mmask16) __U); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS +_mm256_maskz_loadu_epi16 (__mmask16 __U, void const *__P) +{ + return (__m256i) __builtin_ia32_loaddquhi256_mask ((__v16hi *) __P, + (__v16hi) + _mm256_setzero_si256 (), + (__mmask16) __U); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm_mask_loadu_epi8 (__m128i __W, __mmask16 __U, void const *__P) +{ + return (__m128i) __builtin_ia32_loaddquqi128_mask ((__v16qi *) __P, + (__v16qi) __W, + (__mmask16) __U); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm_maskz_loadu_epi8 (__mmask16 __U, void const *__P) +{ + return (__m128i) __builtin_ia32_loaddquqi128_mask ((__v16qi *) __P, + (__v16qi) + _mm_setzero_si128 (), + (__mmask16) __U); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS +_mm256_mask_loadu_epi8 (__m256i __W, __mmask32 __U, void const *__P) +{ + return (__m256i) __builtin_ia32_loaddquqi256_mask ((__v32qi *) __P, + (__v32qi) __W, + (__mmask32) __U); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS +_mm256_maskz_loadu_epi8 (__mmask32 __U, void const *__P) +{ + return (__m256i) __builtin_ia32_loaddquqi256_mask ((__v32qi *) __P, + (__v32qi) + _mm256_setzero_si256 (), + (__mmask32) __U); +} +static __inline__ void __DEFAULT_FN_ATTRS +_mm_mask_storeu_epi16 (void *__P, __mmask8 __U, __m128i __A) +{ + __builtin_ia32_storedquhi128_mask ((__v8hi *) __P, + (__v8hi) __A, + (__mmask8) __U); +} + +static __inline__ void __DEFAULT_FN_ATTRS +_mm256_mask_storeu_epi16 (void *__P, __mmask16 __U, __m256i __A) +{ + __builtin_ia32_storedquhi256_mask ((__v16hi *) __P, + (__v16hi) __A, + (__mmask16) __U); +} + +static __inline__ void __DEFAULT_FN_ATTRS +_mm_mask_storeu_epi8 (void *__P, __mmask16 __U, __m128i __A) +{ + __builtin_ia32_storedquqi128_mask ((__v16qi *) __P, + (__v16qi) __A, + (__mmask16) __U); +} + +static __inline__ void __DEFAULT_FN_ATTRS +_mm256_mask_storeu_epi8 (void *__P, __mmask32 __U, __m256i __A) +{ + __builtin_ia32_storedquqi256_mask ((__v32qi *) __P, + (__v32qi) __A, + (__mmask32) __U); +} + +static __inline__ __mmask16 __DEFAULT_FN_ATTRS +_mm_test_epi8_mask (__m128i __A, __m128i __B) +{ + return (__mmask16) __builtin_ia32_ptestmb128 ((__v16qi) __A, + (__v16qi) __B, + (__mmask16) -1); +} + +static __inline__ __mmask16 __DEFAULT_FN_ATTRS +_mm_mask_test_epi8_mask (__mmask16 __U, __m128i __A, __m128i __B) +{ + return (__mmask16) __builtin_ia32_ptestmb128 ((__v16qi) __A, + (__v16qi) __B, __U); +} + +static __inline__ __mmask32 __DEFAULT_FN_ATTRS +_mm256_test_epi8_mask (__m256i __A, __m256i __B) +{ + return (__mmask32) __builtin_ia32_ptestmb256 ((__v32qi) __A, + (__v32qi) __B, + (__mmask32) -1); +} + +static __inline__ __mmask32 __DEFAULT_FN_ATTRS +_mm256_mask_test_epi8_mask (__mmask32 __U, __m256i __A, __m256i __B) +{ + return (__mmask32) __builtin_ia32_ptestmb256 ((__v32qi) __A, + (__v32qi) __B, __U); +} + +static __inline__ __mmask8 __DEFAULT_FN_ATTRS +_mm_test_epi16_mask (__m128i __A, __m128i __B) +{ + return (__mmask8) __builtin_ia32_ptestmw128 ((__v8hi) __A, + (__v8hi) __B, + (__mmask8) -1); +} + +static __inline__ __mmask8 __DEFAULT_FN_ATTRS +_mm_mask_test_epi16_mask (__mmask8 __U, __m128i __A, __m128i __B) +{ + return (__mmask8) __builtin_ia32_ptestmw128 ((__v8hi) __A, + (__v8hi) __B, __U); +} + +static __inline__ __mmask16 __DEFAULT_FN_ATTRS +_mm256_test_epi16_mask (__m256i __A, __m256i __B) +{ + return (__mmask16) __builtin_ia32_ptestmw256 ((__v16hi) __A, + (__v16hi) __B, + (__mmask16) -1); +} + +static __inline__ __mmask16 __DEFAULT_FN_ATTRS +_mm256_mask_test_epi16_mask (__mmask16 __U, __m256i __A, __m256i __B) +{ + return (__mmask16) __builtin_ia32_ptestmw256 ((__v16hi) __A, + (__v16hi) __B, __U); +} + +static __inline__ __mmask16 __DEFAULT_FN_ATTRS +_mm_testn_epi8_mask (__m128i __A, __m128i __B) +{ + return (__mmask16) __builtin_ia32_ptestnmb128 ((__v16qi) __A, + (__v16qi) __B, + (__mmask16) -1); +} + +static __inline__ __mmask16 __DEFAULT_FN_ATTRS +_mm_mask_testn_epi8_mask (__mmask16 __U, __m128i __A, __m128i __B) +{ + return (__mmask16) __builtin_ia32_ptestnmb128 ((__v16qi) __A, + (__v16qi) __B, __U); +} + +static __inline__ __mmask32 __DEFAULT_FN_ATTRS +_mm256_testn_epi8_mask (__m256i __A, __m256i __B) +{ + return (__mmask32) __builtin_ia32_ptestnmb256 ((__v32qi) __A, + (__v32qi) __B, + (__mmask32) -1); +} + +static __inline__ __mmask32 __DEFAULT_FN_ATTRS +_mm256_mask_testn_epi8_mask (__mmask32 __U, __m256i __A, __m256i __B) +{ + return (__mmask32) __builtin_ia32_ptestnmb256 ((__v32qi) __A, + (__v32qi) __B, __U); +} + +static __inline__ __mmask8 __DEFAULT_FN_ATTRS +_mm_testn_epi16_mask (__m128i __A, __m128i __B) +{ + return (__mmask8) __builtin_ia32_ptestnmw128 ((__v8hi) __A, + (__v8hi) __B, + (__mmask8) -1); +} + +static __inline__ __mmask8 __DEFAULT_FN_ATTRS +_mm_mask_testn_epi16_mask (__mmask8 __U, __m128i __A, __m128i __B) +{ + return (__mmask8) __builtin_ia32_ptestnmw128 ((__v8hi) __A, + (__v8hi) __B, __U); +} + +static __inline__ __mmask16 __DEFAULT_FN_ATTRS +_mm256_testn_epi16_mask (__m256i __A, __m256i __B) +{ + return (__mmask16) __builtin_ia32_ptestnmw256 ((__v16hi) __A, + (__v16hi) __B, + (__mmask16) -1); +} + +static __inline__ __mmask16 __DEFAULT_FN_ATTRS +_mm256_mask_testn_epi16_mask (__mmask16 __U, __m256i __A, __m256i __B) +{ + return (__mmask16) __builtin_ia32_ptestnmw256 ((__v16hi) __A, + (__v16hi) __B, __U); +} + +static __inline__ __mmask16 __DEFAULT_FN_ATTRS +_mm_movepi8_mask (__m128i __A) +{ + return (__mmask16) __builtin_ia32_cvtb2mask128 ((__v16qi) __A); +} + +static __inline__ __mmask32 __DEFAULT_FN_ATTRS +_mm256_movepi8_mask (__m256i __A) +{ + return (__mmask32) __builtin_ia32_cvtb2mask256 ((__v32qi) __A); +} + +static __inline__ __mmask8 __DEFAULT_FN_ATTRS +_mm_movepi16_mask (__m128i __A) +{ + return (__mmask8) __builtin_ia32_cvtw2mask128 ((__v8hi) __A); +} + +static __inline__ __mmask16 __DEFAULT_FN_ATTRS +_mm256_movepi16_mask (__m256i __A) +{ + return (__mmask16) __builtin_ia32_cvtw2mask256 ((__v16hi) __A); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm_movm_epi8 (__mmask16 __A) +{ + return (__m128i) __builtin_ia32_cvtmask2b128 (__A); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS +_mm256_movm_epi8 (__mmask32 __A) +{ + return (__m256i) __builtin_ia32_cvtmask2b256 (__A); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm_movm_epi16 (__mmask8 __A) +{ + return (__m128i) __builtin_ia32_cvtmask2w128 (__A); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS +_mm256_movm_epi16 (__mmask16 __A) +{ + return (__m256i) __builtin_ia32_cvtmask2w256 (__A); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm_mask_broadcastb_epi8 (__m128i __O, __mmask16 __M, __m128i __A) +{ + return (__m128i)__builtin_ia32_selectb_128(__M, + (__v16qi) _mm_broadcastb_epi8(__A), + (__v16qi) __O); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm_maskz_broadcastb_epi8 (__mmask16 __M, __m128i __A) +{ + return (__m128i)__builtin_ia32_selectb_128(__M, + (__v16qi) _mm_broadcastb_epi8(__A), + (__v16qi) _mm_setzero_si128()); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS +_mm256_mask_broadcastb_epi8 (__m256i __O, __mmask32 __M, __m128i __A) +{ + return (__m256i)__builtin_ia32_selectb_256(__M, + (__v32qi) _mm256_broadcastb_epi8(__A), + (__v32qi) __O); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS +_mm256_maskz_broadcastb_epi8 (__mmask32 __M, __m128i __A) +{ + return (__m256i)__builtin_ia32_selectb_256(__M, + (__v32qi) _mm256_broadcastb_epi8(__A), + (__v32qi) _mm256_setzero_si256()); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm_mask_broadcastw_epi16 (__m128i __O, __mmask8 __M, __m128i __A) +{ + return (__m128i)__builtin_ia32_selectw_128(__M, + (__v8hi) _mm_broadcastw_epi16(__A), + (__v8hi) __O); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm_maskz_broadcastw_epi16 (__mmask8 __M, __m128i __A) +{ + return (__m128i)__builtin_ia32_selectw_128(__M, + (__v8hi) _mm_broadcastw_epi16(__A), + (__v8hi) _mm_setzero_si128()); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS +_mm256_mask_broadcastw_epi16 (__m256i __O, __mmask16 __M, __m128i __A) +{ + return (__m256i)__builtin_ia32_selectw_256(__M, + (__v16hi) _mm256_broadcastw_epi16(__A), + (__v16hi) __O); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS +_mm256_maskz_broadcastw_epi16 (__mmask16 __M, __m128i __A) +{ + return (__m256i)__builtin_ia32_selectw_256(__M, + (__v16hi) _mm256_broadcastw_epi16(__A), + (__v16hi) _mm256_setzero_si256()); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS +_mm256_mask_set1_epi16 (__m256i __O, __mmask16 __M, short __A) +{ + return (__m256i) __builtin_ia32_pbroadcastw256_gpr_mask (__A, + (__v16hi) __O, + __M); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS +_mm256_maskz_set1_epi16 (__mmask16 __M, short __A) +{ + return (__m256i) __builtin_ia32_pbroadcastw256_gpr_mask (__A, + (__v16hi) _mm256_setzero_si256 (), + __M); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm_mask_set1_epi16 (__m128i __O, __mmask8 __M, short __A) +{ + return (__m128i) __builtin_ia32_pbroadcastw128_gpr_mask (__A, + (__v8hi) __O, + __M); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm_maskz_set1_epi16 (__mmask8 __M, short __A) +{ + return (__m128i) __builtin_ia32_pbroadcastw128_gpr_mask (__A, + (__v8hi) _mm_setzero_si128 (), + __M); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm_permutexvar_epi16 (__m128i __A, __m128i __B) +{ + return (__m128i) __builtin_ia32_permvarhi128_mask ((__v8hi) __B, + (__v8hi) __A, + (__v8hi) _mm_undefined_si128 (), + (__mmask8) -1); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm_maskz_permutexvar_epi16 (__mmask8 __M, __m128i __A, __m128i __B) +{ + return (__m128i) __builtin_ia32_permvarhi128_mask ((__v8hi) __B, + (__v8hi) __A, + (__v8hi) _mm_setzero_si128 (), + (__mmask8) __M); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm_mask_permutexvar_epi16 (__m128i __W, __mmask8 __M, __m128i __A, + __m128i __B) +{ + return (__m128i) __builtin_ia32_permvarhi128_mask ((__v8hi) __B, + (__v8hi) __A, + (__v8hi) __W, + (__mmask8) __M); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS +_mm256_permutexvar_epi16 (__m256i __A, __m256i __B) +{ + return (__m256i) __builtin_ia32_permvarhi256_mask ((__v16hi) __B, + (__v16hi) __A, + (__v16hi) _mm256_undefined_si256 (), + (__mmask16) -1); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS +_mm256_maskz_permutexvar_epi16 (__mmask16 __M, __m256i __A, + __m256i __B) +{ + return (__m256i) __builtin_ia32_permvarhi256_mask ((__v16hi) __B, + (__v16hi) __A, + (__v16hi) _mm256_setzero_si256 (), + (__mmask16) __M); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS +_mm256_mask_permutexvar_epi16 (__m256i __W, __mmask16 __M, __m256i __A, + __m256i __B) +{ + return (__m256i) __builtin_ia32_permvarhi256_mask ((__v16hi) __B, + (__v16hi) __A, + (__v16hi) __W, + (__mmask16) __M); +} + +#define _mm_mask_alignr_epi8(W, U, A, B, N) __extension__ ({ \ + (__m128i)__builtin_ia32_palignr128_mask((__v16qi)(__m128i)(A), \ + (__v16qi)(__m128i)(B), (int)(N), \ + (__v16qi)(__m128i)(W), \ + (__mmask16)(U)); }) + +#define _mm_maskz_alignr_epi8(U, A, B, N) __extension__ ({ \ + (__m128i)__builtin_ia32_palignr128_mask((__v16qi)(__m128i)(A), \ + (__v16qi)(__m128i)(B), (int)(N), \ + (__v16qi)_mm_setzero_si128(), \ + (__mmask16)(U)); }) + +#define _mm256_mask_alignr_epi8(W, U, A, B, N) __extension__ ({ \ + (__m256i)__builtin_ia32_palignr256_mask((__v32qi)(__m256i)(A), \ + (__v32qi)(__m256i)(B), (int)(N), \ + (__v32qi)(__m256i)(W), \ + (__mmask32)(U)); }) + +#define _mm256_maskz_alignr_epi8(U, A, B, N) __extension__ ({ \ + (__m256i)__builtin_ia32_palignr256_mask((__v32qi)(__m256i)(A), \ + (__v32qi)(__m256i)(B), (int)(N), \ + (__v32qi)_mm256_setzero_si256(), \ + (__mmask32)(U)); }) + +#define _mm_dbsad_epu8(A, B, imm) __extension__ ({ \ + (__m128i)__builtin_ia32_dbpsadbw128_mask((__v16qi)(__m128i)(A), \ + (__v16qi)(__m128i)(B), (int)(imm), \ + (__v8hi)_mm_setzero_hi(), \ + (__mmask8)-1); }) + +#define _mm_mask_dbsad_epu8(W, U, A, B, imm) __extension__ ({ \ + (__m128i)__builtin_ia32_dbpsadbw128_mask((__v16qi)(__m128i)(A), \ + (__v16qi)(__m128i)(B), (int)(imm), \ + (__v8hi)(__m128i)(W), \ + (__mmask8)(U)); }) + +#define _mm_maskz_dbsad_epu8(U, A, B, imm) __extension__ ({ \ + (__m128i)__builtin_ia32_dbpsadbw128_mask((__v16qi)(__m128i)(A), \ + (__v16qi)(__m128i)(B), (int)(imm), \ + (__v8hi)_mm_setzero_si128(), \ + (__mmask8)(U)); }) + +#define _mm256_dbsad_epu8(A, B, imm) __extension__ ({ \ + (__m256i)__builtin_ia32_dbpsadbw256_mask((__v32qi)(__m256i)(A), \ + (__v32qi)(__m256i)(B), (int)(imm), \ + (__v16hi)_mm256_setzero_si256(), \ + (__mmask16)-1); }) + +#define _mm256_mask_dbsad_epu8(W, U, A, B, imm) __extension__ ({ \ + (__m256i)__builtin_ia32_dbpsadbw256_mask((__v32qi)(__m256i)(A), \ + (__v32qi)(__m256i)(B), (int)(imm), \ + (__v16hi)(__m256i)(W), \ + (__mmask16)(U)); }) + +#define _mm256_maskz_dbsad_epu8(U, A, B, imm) __extension__ ({ \ + (__m256i)__builtin_ia32_dbpsadbw256_mask((__v32qi)(__m256i)(A), \ + (__v32qi)(__m256i)(B), (int)(imm), \ + (__v16hi)_mm256_setzero_si256(), \ + (__mmask16)(U)); }) #undef __DEFAULT_FN_ATTRS |