diff options
Diffstat (limited to 'test/CodeGen/X86/masked_memop.ll')
-rw-r--r-- | test/CodeGen/X86/masked_memop.ll | 70 |
1 files changed, 38 insertions, 32 deletions
diff --git a/test/CodeGen/X86/masked_memop.ll b/test/CodeGen/X86/masked_memop.ll index 4e65b169c7e6..3c616e8a9f43 100644 --- a/test/CodeGen/X86/masked_memop.ll +++ b/test/CodeGen/X86/masked_memop.ll @@ -29,8 +29,7 @@ define <2 x double> @test6(<2 x i64> %trigger, <2 x double>* %addr, <2 x double> ; SKX: ## BB#0: ; SKX-NEXT: vpxor %xmm2, %xmm2, %xmm2 ; SKX-NEXT: vpcmpeqq %xmm2, %xmm0, %k1 -; SKX-NEXT: vmovupd (%rdi), %xmm1 {%k1} -; SKX-NEXT: vmovapd %xmm1, %xmm0 +; SKX-NEXT: vblendmpd (%rdi), %xmm1, %xmm0 {%k1} ; SKX-NEXT: retq %mask = icmp eq <2 x i64> %trigger, zeroinitializer %res = call <2 x double> @llvm.masked.load.v2f64.p0v2f64(<2 x double>* %addr, i32 4, <2 x i1>%mask, <2 x double>%dst) @@ -58,8 +57,7 @@ define <4 x float> @test7(<4 x i32> %trigger, <4 x float>* %addr, <4 x float> %d ; SKX: ## BB#0: ; SKX-NEXT: vpxor %xmm2, %xmm2, %xmm2 ; SKX-NEXT: vpcmpeqd %xmm2, %xmm0, %k1 -; SKX-NEXT: vmovups (%rdi), %xmm1 {%k1} -; SKX-NEXT: vmovaps %xmm1, %xmm0 +; SKX-NEXT: vblendmps (%rdi), %xmm1, %xmm0 {%k1} ; SKX-NEXT: retq %mask = icmp eq <4 x i32> %trigger, zeroinitializer %res = call <4 x float> @llvm.masked.load.v4f32.p0v4f32(<4 x float>* %addr, i32 4, <4 x i1>%mask, <4 x float>%dst) @@ -95,8 +93,7 @@ define <4 x i32> @test8(<4 x i32> %trigger, <4 x i32>* %addr, <4 x i32> %dst) { ; SKX: ## BB#0: ; SKX-NEXT: vpxor %xmm2, %xmm2, %xmm2 ; SKX-NEXT: vpcmpeqd %xmm2, %xmm0, %k1 -; SKX-NEXT: vmovdqu32 (%rdi), %xmm1 {%k1} -; SKX-NEXT: vmovdqa %xmm1, %xmm0 +; SKX-NEXT: vpblendmd (%rdi), %xmm1, %xmm0 {%k1} ; SKX-NEXT: retq %mask = icmp eq <4 x i32> %trigger, zeroinitializer %res = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* %addr, i32 4, <4 x i1>%mask, <4 x i32>%dst) @@ -171,8 +168,7 @@ define <4 x double> @test10(<4 x i32> %trigger, <4 x double>* %addr, <4 x double ; SKX: ## BB#0: ; SKX-NEXT: vpxor %xmm2, %xmm2, %xmm2 ; SKX-NEXT: vpcmpeqd %xmm2, %xmm0, %k1 -; SKX-NEXT: vmovapd (%rdi), %ymm1 {%k1} -; SKX-NEXT: vmovapd %ymm1, %ymm0 +; SKX-NEXT: vblendmpd (%rdi), %ymm1, %ymm0 {%k1} ; SKX-NEXT: retq %mask = icmp eq <4 x i32> %trigger, zeroinitializer %res = call <4 x double> @llvm.masked.load.v4f64.p0v4f64(<4 x double>* %addr, i32 32, <4 x i1>%mask, <4 x double>%dst) @@ -246,16 +242,15 @@ define <8 x float> @test11a(<8 x i32> %trigger, <8 x float>* %addr, <8 x float> ; AVX512F-NEXT: vpcmpeqd %zmm2, %zmm0, %k0 ; AVX512F-NEXT: kshiftlw $8, %k0, %k0 ; AVX512F-NEXT: kshiftrw $8, %k0, %k1 -; AVX512F-NEXT: vmovups (%rdi), %zmm1 {%k1} -; AVX512F-NEXT: vmovaps %ymm1, %ymm0 +; AVX512F-NEXT: vblendmps (%rdi), %zmm1, %zmm0 {%k1} +; AVX512F-NEXT: ## kill: %YMM0<def> %YMM0<kill> %ZMM0<kill> ; AVX512F-NEXT: retq ; ; SKX-LABEL: test11a: ; SKX: ## BB#0: ; SKX-NEXT: vpxor %ymm2, %ymm2, %ymm2 ; SKX-NEXT: vpcmpeqd %ymm2, %ymm0, %k1 -; SKX-NEXT: vmovaps (%rdi), %ymm1 {%k1} -; SKX-NEXT: vmovaps %ymm1, %ymm0 +; SKX-NEXT: vblendmps (%rdi), %ymm1, %ymm0 {%k1} ; SKX-NEXT: retq %mask = icmp eq <8 x i32> %trigger, zeroinitializer %res = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %addr, i32 32, <8 x i1>%mask, <8 x float>%dst) @@ -293,16 +288,15 @@ define <8 x i32> @test11b(<8 x i1> %mask, <8 x i32>* %addr, <8 x i32> %dst) { ; AVX512F-NEXT: vptestmq %zmm0, %zmm0, %k0 ; AVX512F-NEXT: kshiftlw $8, %k0, %k0 ; AVX512F-NEXT: kshiftrw $8, %k0, %k1 -; AVX512F-NEXT: vmovdqu32 (%rdi), %zmm1 {%k1} -; AVX512F-NEXT: vmovdqa %ymm1, %ymm0 +; AVX512F-NEXT: vpblendmd (%rdi), %zmm1, %zmm0 {%k1} +; AVX512F-NEXT: ## kill: %YMM0<def> %YMM0<kill> %ZMM0<kill> ; AVX512F-NEXT: retq ; ; SKX-LABEL: test11b: ; SKX: ## BB#0: ; SKX-NEXT: vpsllw $15, %xmm0, %xmm0 ; SKX-NEXT: vpmovw2m %xmm0, %k1 -; SKX-NEXT: vmovdqu32 (%rdi), %ymm1 {%k1} -; SKX-NEXT: vmovdqa %ymm1, %ymm0 +; SKX-NEXT: vpblendmd (%rdi), %ymm1, %ymm0 {%k1} ; SKX-NEXT: retq %res = call <8 x i32> @llvm.masked.load.v8i32.p0v8i32(<8 x i32>* %addr, i32 4, <8 x i1>%mask, <8 x i32>%dst) ret <8 x i32> %res @@ -421,6 +415,7 @@ define void @test12(<8 x i32> %trigger, <8 x i32>* %addr, <8 x i32> %val) { ; AVX512F-NEXT: kshiftlw $8, %k0, %k0 ; AVX512F-NEXT: kshiftrw $8, %k0, %k1 ; AVX512F-NEXT: vmovdqu32 %zmm1, (%rdi) {%k1} +; AVX512F-NEXT: vzeroupper ; AVX512F-NEXT: retq ; ; SKX-LABEL: test12: @@ -428,6 +423,7 @@ define void @test12(<8 x i32> %trigger, <8 x i32>* %addr, <8 x i32> %val) { ; SKX-NEXT: vpxor %ymm2, %ymm2, %ymm2 ; SKX-NEXT: vpcmpeqd %ymm2, %ymm0, %k1 ; SKX-NEXT: vmovdqu32 %ymm1, (%rdi) {%k1} +; SKX-NEXT: vzeroupper ; SKX-NEXT: retq %mask = icmp eq <8 x i32> %trigger, zeroinitializer call void @llvm.masked.store.v8i32.p0v8i32(<8 x i32>%val, <8 x i32>* %addr, i32 4, <8 x i1>%mask) @@ -557,8 +553,7 @@ define <2 x float> @test16(<2 x i32> %trigger, <2 x float>* %addr, <2 x float> % ; SKX-NEXT: vpcmpeqq %xmm2, %xmm0, %k0 ; SKX-NEXT: kshiftlw $14, %k0, %k0 ; SKX-NEXT: kshiftrw $14, %k0, %k1 -; SKX-NEXT: vmovups (%rdi), %xmm1 {%k1} -; SKX-NEXT: vmovaps %xmm1, %xmm0 +; SKX-NEXT: vblendmps (%rdi), %xmm1, %xmm0 {%k1} ; SKX-NEXT: retq %mask = icmp eq <2 x i32> %trigger, zeroinitializer %res = call <2 x float> @llvm.masked.load.v2f32.p0v2f32(<2 x float>* %addr, i32 4, <2 x i1>%mask, <2 x float>%dst) @@ -702,7 +697,7 @@ define <4 x float> @mload_constmask_v4f32(<4 x float>* %addr, <4 x float> %dst) ; SKX-LABEL: mload_constmask_v4f32: ; SKX: ## BB#0: ; SKX-NEXT: movb $13, %al -; SKX-NEXT: kmovw %eax, %k1 +; SKX-NEXT: kmovd %eax, %k1 ; SKX-NEXT: vmovups (%rdi), %xmm0 {%k1} ; SKX-NEXT: retq %res = call <4 x float> @llvm.masked.load.v4f32.p0v4f32(<4 x float>* %addr, i32 4, <4 x i1> <i1 1, i1 0, i1 1, i1 1>, <4 x float> %dst) @@ -736,7 +731,7 @@ define <4 x i32> @mload_constmask_v4i32(<4 x i32>* %addr, <4 x i32> %dst) { ; SKX-LABEL: mload_constmask_v4i32: ; SKX: ## BB#0: ; SKX-NEXT: movb $14, %al -; SKX-NEXT: kmovw %eax, %k1 +; SKX-NEXT: kmovd %eax, %k1 ; SKX-NEXT: vmovdqu32 (%rdi), %xmm0 {%k1} ; SKX-NEXT: retq %res = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* %addr, i32 4, <4 x i1> <i1 0, i1 1, i1 1, i1 1>, <4 x i32> %dst) @@ -765,7 +760,7 @@ define <8 x float> @mload_constmask_v8f32(<8 x float>* %addr, <8 x float> %dst) ; SKX-LABEL: mload_constmask_v8f32: ; SKX: ## BB#0: ; SKX-NEXT: movb $7, %al -; SKX-NEXT: kmovw %eax, %k1 +; SKX-NEXT: kmovd %eax, %k1 ; SKX-NEXT: vmovups (%rdi), %ymm0 {%k1} ; SKX-NEXT: retq %res = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %addr, i32 4, <8 x i1> <i1 1, i1 1, i1 1, i1 0, i1 0, i1 0, i1 0, i1 0>, <8 x float> %dst) @@ -790,7 +785,7 @@ define <4 x double> @mload_constmask_v4f64(<4 x double>* %addr, <4 x double> %ds ; SKX-LABEL: mload_constmask_v4f64: ; SKX: ## BB#0: ; SKX-NEXT: movb $7, %al -; SKX-NEXT: kmovw %eax, %k1 +; SKX-NEXT: kmovd %eax, %k1 ; SKX-NEXT: vmovupd (%rdi), %ymm0 {%k1} ; SKX-NEXT: retq %res = call <4 x double> @llvm.masked.load.v4f64.p0v4f64(<4 x double>* %addr, i32 4, <4 x i1> <i1 1, i1 1, i1 1, i1 0>, <4 x double> %dst) @@ -822,7 +817,7 @@ define <8 x i32> @mload_constmask_v8i32(<8 x i32>* %addr, <8 x i32> %dst) { ; SKX-LABEL: mload_constmask_v8i32: ; SKX: ## BB#0: ; SKX-NEXT: movb $-121, %al -; SKX-NEXT: kmovw %eax, %k1 +; SKX-NEXT: kmovd %eax, %k1 ; SKX-NEXT: vmovdqu32 (%rdi), %ymm0 {%k1} ; SKX-NEXT: retq %res = call <8 x i32> @llvm.masked.load.v8i32.p0v8i32(<8 x i32>* %addr, i32 4, <8 x i1> <i1 1, i1 1, i1 1, i1 0, i1 0, i1 0, i1 0, i1 1>, <8 x i32> %dst) @@ -850,7 +845,7 @@ define <4 x i64> @mload_constmask_v4i64(<4 x i64>* %addr, <4 x i64> %dst) { ; SKX-LABEL: mload_constmask_v4i64: ; SKX: ## BB#0: ; SKX-NEXT: movb $9, %al -; SKX-NEXT: kmovw %eax, %k1 +; SKX-NEXT: kmovd %eax, %k1 ; SKX-NEXT: vmovdqu64 (%rdi), %ymm0 {%k1} ; SKX-NEXT: retq %res = call <4 x i64> @llvm.masked.load.v4i64.p0v4i64(<4 x i64>* %addr, i32 4, <4 x i1> <i1 1, i1 0, i1 0, i1 1>, <4 x i64> %dst) @@ -866,12 +861,19 @@ define <8 x double> @mload_constmask_v8f64(<8 x double>* %addr, <8 x double> %ds ; AVX-NEXT: vblendpd {{.*#+}} ymm0 = mem[0,1,2],ymm0[3] ; AVX-NEXT: retq ; -; AVX512-LABEL: mload_constmask_v8f64: -; AVX512: ## BB#0: -; AVX512-NEXT: movb $-121, %al -; AVX512-NEXT: kmovw %eax, %k1 -; AVX512-NEXT: vmovupd (%rdi), %zmm0 {%k1} -; AVX512-NEXT: retq +; AVX512F-LABEL: mload_constmask_v8f64: +; AVX512F: ## BB#0: +; AVX512F-NEXT: movb $-121, %al +; AVX512F-NEXT: kmovw %eax, %k1 +; AVX512F-NEXT: vmovupd (%rdi), %zmm0 {%k1} +; AVX512F-NEXT: retq +; +; SKX-LABEL: mload_constmask_v8f64: +; SKX: ## BB#0: +; SKX-NEXT: movb $-121, %al +; SKX-NEXT: kmovd %eax, %k1 +; SKX-NEXT: vmovupd (%rdi), %zmm0 {%k1} +; SKX-NEXT: retq %res = call <8 x double> @llvm.masked.load.v8f64.p0v8f64(<8 x double>* %addr, i32 4, <8 x i1> <i1 1, i1 1, i1 1, i1 0, i1 0, i1 0, i1 0, i1 1>, <8 x double> %dst) ret <8 x double> %res } @@ -894,7 +896,7 @@ define <4 x double> @mload_constmask_v4f64_undef_passthrough(<4 x double>* %addr ; SKX-LABEL: mload_constmask_v4f64_undef_passthrough: ; SKX: ## BB#0: ; SKX-NEXT: movb $7, %al -; SKX-NEXT: kmovw %eax, %k1 +; SKX-NEXT: kmovd %eax, %k1 ; SKX-NEXT: vmovupd (%rdi), %ymm0 {%k1} {z} ; SKX-NEXT: retq %res = call <4 x double> @llvm.masked.load.v4f64.p0v4f64(<4 x double>* %addr, i32 4, <4 x i1> <i1 1, i1 1, i1 1, i1 0>, <4 x double> undef) @@ -923,7 +925,7 @@ define <4 x i64> @mload_constmask_v4i64_undef_passthrough(<4 x i64>* %addr) { ; SKX-LABEL: mload_constmask_v4i64_undef_passthrough: ; SKX: ## BB#0: ; SKX-NEXT: movb $6, %al -; SKX-NEXT: kmovw %eax, %k1 +; SKX-NEXT: kmovd %eax, %k1 ; SKX-NEXT: vmovdqu64 (%rdi), %ymm0 {%k1} {z} ; SKX-NEXT: retq %res = call <4 x i64> @llvm.masked.load.v4i64.p0v4i64(<4 x i64>* %addr, i32 4, <4 x i1> <i1 0, i1 1, i1 1, i1 0>, <4 x i64> undef) @@ -1005,12 +1007,14 @@ define void @one_mask_bit_set3(<4 x i64>* %addr, <4 x i64> %val) { ; AVX512F: ## BB#0: ; AVX512F-NEXT: vextractf128 $1, %ymm0, %xmm0 ; AVX512F-NEXT: vmovlps %xmm0, 16(%rdi) +; AVX512F-NEXT: vzeroupper ; AVX512F-NEXT: retq ; ; SKX-LABEL: one_mask_bit_set3: ; SKX: ## BB#0: ; SKX-NEXT: vextracti128 $1, %ymm0, %xmm0 ; SKX-NEXT: vmovq %xmm0, 16(%rdi) +; SKX-NEXT: vzeroupper ; SKX-NEXT: retq call void @llvm.masked.store.v4i64.p0v4i64(<4 x i64> %val, <4 x i64>* %addr, i32 4, <4 x i1><i1 false, i1 false, i1 true, i1 false>) ret void @@ -1030,6 +1034,7 @@ define void @one_mask_bit_set4(<4 x double>* %addr, <4 x double> %val) { ; AVX512: ## BB#0: ; AVX512-NEXT: vextractf128 $1, %ymm0, %xmm0 ; AVX512-NEXT: vmovhpd %xmm0, 24(%rdi) +; AVX512-NEXT: vzeroupper ; AVX512-NEXT: retq call void @llvm.masked.store.v4f64.p0v4f64(<4 x double> %val, <4 x double>* %addr, i32 4, <4 x i1><i1 false, i1 false, i1 false, i1 true>) ret void @@ -1049,6 +1054,7 @@ define void @one_mask_bit_set5(<8 x double>* %addr, <8 x double> %val) { ; AVX512: ## BB#0: ; AVX512-NEXT: vextractf32x4 $3, %zmm0, %xmm0 ; AVX512-NEXT: vmovlps %xmm0, 48(%rdi) +; AVX512-NEXT: vzeroupper ; AVX512-NEXT: retq call void @llvm.masked.store.v8f64.p0v8f64(<8 x double> %val, <8 x double>* %addr, i32 4, <8 x i1><i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 true, i1 false>) ret void |