diff options
Diffstat (limited to 'test/CodeGen/AMDGPU/ds_read2.ll')
-rw-r--r-- | test/CodeGen/AMDGPU/ds_read2.ll | 48 |
1 files changed, 24 insertions, 24 deletions
diff --git a/test/CodeGen/AMDGPU/ds_read2.ll b/test/CodeGen/AMDGPU/ds_read2.ll index 9a313230e303..2c474dbe7b08 100644 --- a/test/CodeGen/AMDGPU/ds_read2.ll +++ b/test/CodeGen/AMDGPU/ds_read2.ll @@ -12,7 +12,7 @@ ; SI: v_add_f32_e32 [[RESULT:v[0-9]+]], v[[HI_VREG]], v[[LO_VREG]] ; SI: buffer_store_dword [[RESULT]] ; SI: s_endpgm -define void @simple_read2_f32(float addrspace(1)* %out) #0 { +define amdgpu_kernel void @simple_read2_f32(float addrspace(1)* %out) #0 { %x.i = tail call i32 @llvm.amdgcn.workitem.id.x() #1 %arrayidx0 = getelementptr inbounds [512 x float], [512 x float] addrspace(3)* @lds, i32 0, i32 %x.i %val0 = load float, float addrspace(3)* %arrayidx0, align 4 @@ -31,7 +31,7 @@ define void @simple_read2_f32(float addrspace(1)* %out) #0 { ; SI: v_add_f32_e32 [[RESULT:v[0-9]+]], v[[HI_VREG]], v[[LO_VREG]] ; SI: buffer_store_dword [[RESULT]] ; SI: s_endpgm -define void @simple_read2_f32_max_offset(float addrspace(1)* %out) #0 { +define amdgpu_kernel void @simple_read2_f32_max_offset(float addrspace(1)* %out) #0 { %x.i = tail call i32 @llvm.amdgcn.workitem.id.x() #1 %arrayidx0 = getelementptr inbounds [512 x float], [512 x float] addrspace(3)* @lds, i32 0, i32 %x.i %val0 = load float, float addrspace(3)* %arrayidx0, align 4 @@ -49,7 +49,7 @@ define void @simple_read2_f32_max_offset(float addrspace(1)* %out) #0 { ; SI: ds_read_b32 v{{[0-9]+}}, v{{[0-9]+}} ; SI: ds_read_b32 v{{[0-9]+}}, v{{[0-9]+}} offset:1028 ; SI: s_endpgm -define void @simple_read2_f32_too_far(float addrspace(1)* %out) #0 { +define amdgpu_kernel void @simple_read2_f32_too_far(float addrspace(1)* %out) #0 { %x.i = tail call i32 @llvm.amdgcn.workitem.id.x() #1 %arrayidx0 = getelementptr inbounds [512 x float], [512 x float] addrspace(3)* @lds, i32 0, i32 %x.i %val0 = load float, float addrspace(3)* %arrayidx0, align 4 @@ -66,7 +66,7 @@ define void @simple_read2_f32_too_far(float addrspace(1)* %out) #0 { ; SI: ds_read2_b32 v{{\[[0-9]+:[0-9]+\]}}, [[BASEADDR:v[0-9]+]] offset1:8 ; SI: ds_read2_b32 v{{\[[0-9]+:[0-9]+\]}}, [[BASEADDR]] offset0:11 offset1:27 ; SI: s_endpgm -define void @simple_read2_f32_x2(float addrspace(1)* %out) #0 { +define amdgpu_kernel void @simple_read2_f32_x2(float addrspace(1)* %out) #0 { %tid.x = tail call i32 @llvm.amdgcn.workitem.id.x() #1 %idx.0 = add nsw i32 %tid.x, 0 %arrayidx0 = getelementptr inbounds [512 x float], [512 x float] addrspace(3)* @lds, i32 0, i32 %idx.0 @@ -98,7 +98,7 @@ define void @simple_read2_f32_x2(float addrspace(1)* %out) #0 { ; SI: s_barrier ; SI: ds_read2_b32 v{{\[[0-9]+:[0-9]+\]}}, [[BASEADDR]] offset0:11 offset1:27 ; SI: s_endpgm -define void @simple_read2_f32_x2_barrier(float addrspace(1)* %out) #0 { +define amdgpu_kernel void @simple_read2_f32_x2_barrier(float addrspace(1)* %out) #0 { %tid.x = tail call i32 @llvm.amdgcn.workitem.id.x() #1 %idx.0 = add nsw i32 %tid.x, 0 %arrayidx0 = getelementptr inbounds [512 x float], [512 x float] addrspace(3)* @lds, i32 0, i32 %idx.0 @@ -133,7 +133,7 @@ define void @simple_read2_f32_x2_barrier(float addrspace(1)* %out) #0 { ; SI: ds_read2_b32 v{{\[[0-9]+:[0-9]+\]}}, [[BASEADDR:v[0-9]+]] offset0:2 offset1:8 ; SI: ds_read2_b32 v{{\[[0-9]+:[0-9]+\]}}, [[BASEADDR]] offset0:11 offset1:27 ; SI: s_endpgm -define void @simple_read2_f32_x2_nonzero_base(float addrspace(1)* %out) #0 { +define amdgpu_kernel void @simple_read2_f32_x2_nonzero_base(float addrspace(1)* %out) #0 { %tid.x = tail call i32 @llvm.amdgcn.workitem.id.x() #1 %idx.0 = add nsw i32 %tid.x, 2 %arrayidx0 = getelementptr inbounds [512 x float], [512 x float] addrspace(3)* @lds, i32 0, i32 %idx.0 @@ -170,7 +170,7 @@ define void @simple_read2_f32_x2_nonzero_base(float addrspace(1)* %out) #0 { ; SI: ds_read_b32 ; SI: ds_read_b32 ; SI: s_endpgm -define void @read2_ptr_is_subreg_arg_f32(float addrspace(1)* %out, <2 x float addrspace(3)*> %lds.ptr) #0 { +define amdgpu_kernel void @read2_ptr_is_subreg_arg_f32(float addrspace(1)* %out, <2 x float addrspace(3)*> %lds.ptr) #0 { %x.i = tail call i32 @llvm.amdgcn.workitem.id.x() #1 %index.0 = insertelement <2 x i32> undef, i32 %x.i, i32 0 %index.1 = insertelement <2 x i32> %index.0, i32 8, i32 0 @@ -196,7 +196,7 @@ define void @read2_ptr_is_subreg_arg_f32(float addrspace(1)* %out, <2 x float ad ; SI: ds_read_b32 ; SI: ds_read_b32 ; SI: s_endpgm -define void @read2_ptr_is_subreg_arg_offset_f32(float addrspace(1)* %out, <2 x float addrspace(3)*> %lds.ptr) #0 { +define amdgpu_kernel void @read2_ptr_is_subreg_arg_offset_f32(float addrspace(1)* %out, <2 x float addrspace(3)*> %lds.ptr) #0 { %x.i = tail call i32 @llvm.amdgcn.workitem.id.x() #1 %index.0 = insertelement <2 x i32> undef, i32 %x.i, i32 0 %index.1 = insertelement <2 x i32> %index.0, i32 8, i32 0 @@ -219,7 +219,7 @@ define void @read2_ptr_is_subreg_arg_offset_f32(float addrspace(1)* %out, <2 x f ; SI-LABEL: {{^}}read2_ptr_is_subreg_f32: ; SI: ds_read2_b32 {{v\[[0-9]+:[0-9]+\]}}, {{v[0-9]+}} offset1:8{{$}} ; SI: s_endpgm -define void @read2_ptr_is_subreg_f32(float addrspace(1)* %out) #0 { +define amdgpu_kernel void @read2_ptr_is_subreg_f32(float addrspace(1)* %out) #0 { %x.i = tail call i32 @llvm.amdgcn.workitem.id.x() #1 %ptr.0 = insertelement <2 x [512 x float] addrspace(3)*> undef, [512 x float] addrspace(3)* @lds, i32 0 %ptr.1 = insertelement <2 x [512 x float] addrspace(3)*> %ptr.0, [512 x float] addrspace(3)* @lds, i32 1 @@ -243,7 +243,7 @@ define void @read2_ptr_is_subreg_f32(float addrspace(1)* %out) #0 { ; SI: ds_read_b32 v{{[0-9]+}}, v{{[0-9]+}} ; SI: ds_read_b32 v{{[0-9]+}}, v{{[0-9]+}} offset:32 ; SI: s_endpgm -define void @simple_read2_f32_volatile_0(float addrspace(1)* %out) #0 { +define amdgpu_kernel void @simple_read2_f32_volatile_0(float addrspace(1)* %out) #0 { %x.i = tail call i32 @llvm.amdgcn.workitem.id.x() #1 %arrayidx0 = getelementptr inbounds [512 x float], [512 x float] addrspace(3)* @lds, i32 0, i32 %x.i %val0 = load volatile float, float addrspace(3)* %arrayidx0, align 4 @@ -261,7 +261,7 @@ define void @simple_read2_f32_volatile_0(float addrspace(1)* %out) #0 { ; SI: ds_read_b32 v{{[0-9]+}}, v{{[0-9]+}} ; SI: ds_read_b32 v{{[0-9]+}}, v{{[0-9]+}} offset:32 ; SI: s_endpgm -define void @simple_read2_f32_volatile_1(float addrspace(1)* %out) #0 { +define amdgpu_kernel void @simple_read2_f32_volatile_1(float addrspace(1)* %out) #0 { %x.i = tail call i32 @llvm.amdgcn.workitem.id.x() #1 %arrayidx0 = getelementptr inbounds [512 x float], [512 x float] addrspace(3)* @lds, i32 0, i32 %x.i %val0 = load float, float addrspace(3)* %arrayidx0, align 4 @@ -280,7 +280,7 @@ define void @simple_read2_f32_volatile_1(float addrspace(1)* %out) #0 { ; SI-LABEL: @unaligned_read2_f32 ; SI-NOT: ds_read2_b32 ; SI: s_endpgm -define void @unaligned_read2_f32(float addrspace(1)* %out, float addrspace(3)* %lds) #0 { +define amdgpu_kernel void @unaligned_read2_f32(float addrspace(1)* %out, float addrspace(3)* %lds) #0 { %x.i = tail call i32 @llvm.amdgcn.workitem.id.x() #1 %arrayidx0 = getelementptr inbounds float, float addrspace(3)* %lds, i32 %x.i %val0 = load float, float addrspace(3)* %arrayidx0, align 1 @@ -296,7 +296,7 @@ define void @unaligned_read2_f32(float addrspace(1)* %out, float addrspace(3)* % ; SI-LABEL: @misaligned_2_simple_read2_f32 ; SI-NOT: ds_read2_b32 ; SI: s_endpgm -define void @misaligned_2_simple_read2_f32(float addrspace(1)* %out, float addrspace(3)* %lds) #0 { +define amdgpu_kernel void @misaligned_2_simple_read2_f32(float addrspace(1)* %out, float addrspace(3)* %lds) #0 { %x.i = tail call i32 @llvm.amdgcn.workitem.id.x() #1 %arrayidx0 = getelementptr inbounds float, float addrspace(3)* %lds, i32 %x.i %val0 = load float, float addrspace(3)* %arrayidx0, align 2 @@ -315,7 +315,7 @@ define void @misaligned_2_simple_read2_f32(float addrspace(1)* %out, float addrs ; SI: v_add_f64 [[RESULT:v\[[0-9]+:[0-9]+\]]], v{{\[}}[[LO_VREG]]:{{[0-9]+\]}}, v{{\[[0-9]+}}:[[HI_VREG]]{{\]}} ; SI: buffer_store_dwordx2 [[RESULT]] ; SI: s_endpgm -define void @simple_read2_f64(double addrspace(1)* %out) #0 { +define amdgpu_kernel void @simple_read2_f64(double addrspace(1)* %out) #0 { %x.i = tail call i32 @llvm.amdgcn.workitem.id.x() #1 %arrayidx0 = getelementptr inbounds [512 x double], [512 x double] addrspace(3)* @lds.f64, i32 0, i32 %x.i %val0 = load double, double addrspace(3)* %arrayidx0, align 8 @@ -331,7 +331,7 @@ define void @simple_read2_f64(double addrspace(1)* %out) #0 { ; SI-LABEL: @simple_read2_f64_max_offset ; SI: ds_read2_b64 {{v\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}} offset1:255 ; SI: s_endpgm -define void @simple_read2_f64_max_offset(double addrspace(1)* %out) #0 { +define amdgpu_kernel void @simple_read2_f64_max_offset(double addrspace(1)* %out) #0 { %x.i = tail call i32 @llvm.amdgcn.workitem.id.x() #1 %arrayidx0 = getelementptr inbounds [512 x double], [512 x double] addrspace(3)* @lds.f64, i32 0, i32 %x.i %val0 = load double, double addrspace(3)* %arrayidx0, align 8 @@ -349,7 +349,7 @@ define void @simple_read2_f64_max_offset(double addrspace(1)* %out) #0 { ; SI: ds_read_b64 {{v\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}} ; SI: ds_read_b64 {{v\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}} offset:2056 ; SI: s_endpgm -define void @simple_read2_f64_too_far(double addrspace(1)* %out) #0 { +define amdgpu_kernel void @simple_read2_f64_too_far(double addrspace(1)* %out) #0 { %x.i = tail call i32 @llvm.amdgcn.workitem.id.x() #1 %arrayidx0 = getelementptr inbounds [512 x double], [512 x double] addrspace(3)* @lds.f64, i32 0, i32 %x.i %val0 = load double, double addrspace(3)* %arrayidx0, align 8 @@ -367,7 +367,7 @@ define void @simple_read2_f64_too_far(double addrspace(1)* %out) #0 { ; SI: ds_read2_b32 v{{\[[0-9]+:[0-9]+\]}}, {{v[0-9]+}} offset1:1 ; SI: ds_read2_b32 v{{\[[0-9]+:[0-9]+\]}}, {{v[0-9]+}} offset0:14 offset1:15 ; SI: s_endpgm -define void @misaligned_read2_f64(double addrspace(1)* %out, double addrspace(3)* %lds) #0 { +define amdgpu_kernel void @misaligned_read2_f64(double addrspace(1)* %out, double addrspace(3)* %lds) #0 { %x.i = tail call i32 @llvm.amdgcn.workitem.id.x() #1 %arrayidx0 = getelementptr inbounds double, double addrspace(3)* %lds, i32 %x.i %val0 = load double, double addrspace(3)* %arrayidx0, align 4 @@ -385,7 +385,7 @@ define void @misaligned_read2_f64(double addrspace(1)* %out, double addrspace(3) ; SI-LABEL: @load_constant_adjacent_offsets ; SI: v_mov_b32_e32 [[ZERO:v[0-9]+]], 0{{$}} ; SI: ds_read2_b32 v{{\[[0-9]+:[0-9]+\]}}, [[ZERO]] offset1:1 -define void @load_constant_adjacent_offsets(i32 addrspace(1)* %out) { +define amdgpu_kernel void @load_constant_adjacent_offsets(i32 addrspace(1)* %out) { %val0 = load i32, i32 addrspace(3)* getelementptr inbounds ([4 x i32], [4 x i32] addrspace(3)* @foo, i32 0, i32 0), align 4 %val1 = load i32, i32 addrspace(3)* getelementptr inbounds ([4 x i32], [4 x i32] addrspace(3)* @foo, i32 0, i32 1), align 4 %sum = add i32 %val0, %val1 @@ -396,7 +396,7 @@ define void @load_constant_adjacent_offsets(i32 addrspace(1)* %out) { ; SI-LABEL: @load_constant_disjoint_offsets ; SI: v_mov_b32_e32 [[ZERO:v[0-9]+]], 0{{$}} ; SI: ds_read2_b32 v{{\[[0-9]+:[0-9]+\]}}, [[ZERO]] offset1:2 -define void @load_constant_disjoint_offsets(i32 addrspace(1)* %out) { +define amdgpu_kernel void @load_constant_disjoint_offsets(i32 addrspace(1)* %out) { %val0 = load i32, i32 addrspace(3)* getelementptr inbounds ([4 x i32], [4 x i32] addrspace(3)* @foo, i32 0, i32 0), align 4 %val1 = load i32, i32 addrspace(3)* getelementptr inbounds ([4 x i32], [4 x i32] addrspace(3)* @foo, i32 0, i32 2), align 4 %sum = add i32 %val0, %val1 @@ -410,7 +410,7 @@ define void @load_constant_disjoint_offsets(i32 addrspace(1)* %out) { ; SI: v_mov_b32_e32 [[ZERO:v[0-9]+]], 0{{$}} ; SI: ds_read2_b32 v{{\[[0-9]+:[0-9]+\]}}, [[ZERO]] offset1:1 ; SI: ds_read2_b32 v{{\[[0-9]+:[0-9]+\]}}, [[ZERO]] offset0:2 offset1:3 -define void @load_misaligned64_constant_offsets(i64 addrspace(1)* %out) { +define amdgpu_kernel void @load_misaligned64_constant_offsets(i64 addrspace(1)* %out) { %val0 = load i64, i64 addrspace(3)* getelementptr inbounds ([4 x i64], [4 x i64] addrspace(3)* @bar, i32 0, i32 0), align 4 %val1 = load i64, i64 addrspace(3)* getelementptr inbounds ([4 x i64], [4 x i64] addrspace(3)* @bar, i32 0, i32 1), align 4 %sum = add i64 %val0, %val1 @@ -426,7 +426,7 @@ define void @load_misaligned64_constant_offsets(i64 addrspace(1)* %out) { ; SI-DAG: ds_read2_b32 v{{\[[0-9]+:[0-9]+\]}}, [[BASE0]] offset1:1 ; SI-DAG: ds_read2_b32 v{{\[[0-9]+:[0-9]+\]}}, [[BASE1]] offset1:1 ; SI: s_endpgm -define void @load_misaligned64_constant_large_offsets(i64 addrspace(1)* %out) { +define amdgpu_kernel void @load_misaligned64_constant_large_offsets(i64 addrspace(1)* %out) { %val0 = load i64, i64 addrspace(3)* getelementptr inbounds ([4096 x i64], [4096 x i64] addrspace(3)* @bar.large, i32 0, i32 2048), align 4 %val1 = load i64, i64 addrspace(3)* getelementptr inbounds ([4096 x i64], [4096 x i64] addrspace(3)* @bar.large, i32 0, i32 4095), align 4 %sum = add i64 %val0, %val1 @@ -437,7 +437,7 @@ define void @load_misaligned64_constant_large_offsets(i64 addrspace(1)* %out) { @sgemm.lA = internal unnamed_addr addrspace(3) global [264 x float] undef, align 4 @sgemm.lB = internal unnamed_addr addrspace(3) global [776 x float] undef, align 4 -define void @sgemm_inner_loop_read2_sequence(float addrspace(1)* %C, i32 %lda, i32 %ldb) #0 { +define amdgpu_kernel void @sgemm_inner_loop_read2_sequence(float addrspace(1)* %C, i32 %lda, i32 %ldb) #0 { %x.i = tail call i32 @llvm.amdgcn.workgroup.id.x() #1 %y.i = tail call i32 @llvm.amdgcn.workitem.id.y() #1 %arrayidx44 = getelementptr inbounds [264 x float], [264 x float] addrspace(3)* @sgemm.lA, i32 0, i32 %x.i @@ -481,13 +481,13 @@ define void @sgemm_inner_loop_read2_sequence(float addrspace(1)* %C, i32 %lda, i ret void } -define void @misaligned_read2_v2i32(<2 x i32> addrspace(1)* %out, <2 x i32> addrspace(3)* %in) #0 { +define amdgpu_kernel void @misaligned_read2_v2i32(<2 x i32> addrspace(1)* %out, <2 x i32> addrspace(3)* %in) #0 { %load = load <2 x i32>, <2 x i32> addrspace(3)* %in, align 4 store <2 x i32> %load, <2 x i32> addrspace(1)* %out, align 8 ret void } -define void @misaligned_read2_i64(i64 addrspace(1)* %out, i64 addrspace(3)* %in) #0 { +define amdgpu_kernel void @misaligned_read2_i64(i64 addrspace(1)* %out, i64 addrspace(3)* %in) #0 { %load = load i64, i64 addrspace(3)* %in, align 4 store i64 %load, i64 addrspace(1)* %out, align 8 ret void |