diff options
Diffstat (limited to 'test/CodeGen/AMDGPU/salu-to-valu.ll')
-rw-r--r-- | test/CodeGen/AMDGPU/salu-to-valu.ll | 40 |
1 files changed, 20 insertions, 20 deletions
diff --git a/test/CodeGen/AMDGPU/salu-to-valu.ll b/test/CodeGen/AMDGPU/salu-to-valu.ll index 37083fbbd3c5..6e1dd1638333 100644 --- a/test/CodeGen/AMDGPU/salu-to-valu.ll +++ b/test/CodeGen/AMDGPU/salu-to-valu.ll @@ -24,7 +24,7 @@ declare i32 @llvm.amdgcn.workitem.id.y() #0 ; GCN-HSA: flat_load_ubyte v{{[0-9]+}}, v[{{[0-9]+:[0-9]+}} ; GCN-HSA: flat_load_ubyte v{{[0-9]+}}, v[{{[0-9]+:[0-9]+}} -define void @mubuf(i32 addrspace(1)* %out, i8 addrspace(1)* %in) #1 { +define amdgpu_kernel void @mubuf(i32 addrspace(1)* %out, i8 addrspace(1)* %in) #1 { entry: %tmp = call i32 @llvm.amdgcn.workitem.id.x() %tmp1 = call i32 @llvm.amdgcn.workitem.id.y() @@ -55,17 +55,17 @@ done: ; preds = %loop ; GCN-LABEL: {{^}}smrd_valu: ; SI: s_movk_i32 [[OFFSET:s[0-9]+]], 0x2ee0 +; SI: s_mov_b32 ; GCN: v_readfirstlane_b32 s[[PTR_LO:[0-9]+]], v{{[0-9]+}} ; GCN: v_readfirstlane_b32 s[[PTR_HI:[0-9]+]], v{{[0-9]+}} ; SI: s_nop 3 ; SI: s_load_dword [[OUT:s[0-9]+]], s{{\[}}[[PTR_LO]]:[[PTR_HI]]{{\]}}, [[OFFSET]] -; SI: s_mov_b32 ; CI: s_load_dword [[OUT:s[0-9]+]], s{{\[}}[[PTR_LO]]:[[PTR_HI]]{{\]}}, 0xbb8 ; GCN: v_mov_b32_e32 [[V_OUT:v[0-9]+]], [[OUT]] ; GCN-NOHSA: buffer_store_dword [[V_OUT]] ; GCN-HSA: flat_store_dword {{.*}}, [[V_OUT]] -define void @smrd_valu(i32 addrspace(2)* addrspace(1)* %in, i32 %a, i32 %b, i32 addrspace(1)* %out) #1 { +define amdgpu_kernel void @smrd_valu(i32 addrspace(2)* addrspace(1)* %in, i32 %a, i32 %b, i32 addrspace(1)* %out) #1 { entry: %tmp = icmp ne i32 %a, 0 br i1 %tmp, label %if, label %else @@ -93,7 +93,7 @@ endif: ; preds = %else, %if ; GCN-NOHSA-NOT: v_add ; GCN-NOHSA: buffer_load_dword v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, s[{{[0-9]+:[0-9]+}}], 0 addr64 offset:16{{$}} ; GCN-HSA: flat_load_dword v{{[0-9]+}}, v[{{[0-9]+:[0-9]+}}] -define void @smrd_valu2(i32 addrspace(1)* %out, [8 x i32] addrspace(2)* %in) #1 { +define amdgpu_kernel void @smrd_valu2(i32 addrspace(1)* %out, [8 x i32] addrspace(2)* %in) #1 { entry: %tmp = call i32 @llvm.amdgcn.workitem.id.x() %tmp1 = add i32 %tmp, 4 @@ -113,7 +113,7 @@ entry: ; GCN-NOHSA: buffer_store_dword ; GCN-HSA: flat_load_dword v{{[0-9]+}}, v[{{[0-9]+:[0-9]+}}] ; GCN-HSA: flat_store_dword v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} -define void @smrd_valu_ci_offset(i32 addrspace(1)* %out, i32 addrspace(2)* %in, i32 %c) #1 { +define amdgpu_kernel void @smrd_valu_ci_offset(i32 addrspace(1)* %out, i32 addrspace(2)* %in, i32 %c) #1 { entry: %tmp = call i32 @llvm.amdgcn.workitem.id.x() %tmp2 = getelementptr i32, i32 addrspace(2)* %in, i32 %tmp @@ -133,7 +133,7 @@ entry: ; GCN-NOHSA: v_or_b32_e32 {{v[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}} ; GCN-NOHSA: buffer_store_dwordx2 ; GCN-HSA: flat_load_dwordx2 v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}] -define void @smrd_valu_ci_offset_x2(i64 addrspace(1)* %out, i64 addrspace(2)* %in, i64 %c) #1 { +define amdgpu_kernel void @smrd_valu_ci_offset_x2(i64 addrspace(1)* %out, i64 addrspace(2)* %in, i64 %c) #1 { entry: %tmp = call i32 @llvm.amdgcn.workitem.id.x() %tmp2 = getelementptr i64, i64 addrspace(2)* %in, i32 %tmp @@ -155,7 +155,7 @@ entry: ; GCN-NOHSA: v_or_b32_e32 {{v[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}} ; GCN-NOHSA: buffer_store_dwordx4 ; GCN-HSA: flat_load_dwordx4 v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}] -define void @smrd_valu_ci_offset_x4(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(2)* %in, <4 x i32> %c) #1 { +define amdgpu_kernel void @smrd_valu_ci_offset_x4(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(2)* %in, <4 x i32> %c) #1 { entry: %tmp = call i32 @llvm.amdgcn.workitem.id.x() %tmp2 = getelementptr <4 x i32>, <4 x i32> addrspace(2)* %in, i32 %tmp @@ -189,7 +189,7 @@ entry: ; GCN-NOHSA: buffer_store_dwordx4 ; GCN-HSA: flat_load_dwordx4 ; GCN-HSA: flat_load_dwordx4 -define void @smrd_valu_ci_offset_x8(<8 x i32> addrspace(1)* %out, <8 x i32> addrspace(2)* %in, <8 x i32> %c) #1 { +define amdgpu_kernel void @smrd_valu_ci_offset_x8(<8 x i32> addrspace(1)* %out, <8 x i32> addrspace(2)* %in, <8 x i32> %c) #1 { entry: %tmp = call i32 @llvm.amdgcn.workitem.id.x() %tmp2 = getelementptr <8 x i32>, <8 x i32> addrspace(2)* %in, i32 %tmp @@ -230,7 +230,7 @@ entry: ; GCN-HSA: flat_load_dwordx4 ; GCN: s_endpgm -define void @smrd_valu_ci_offset_x16(<16 x i32> addrspace(1)* %out, <16 x i32> addrspace(2)* %in, <16 x i32> %c) #1 { +define amdgpu_kernel void @smrd_valu_ci_offset_x16(<16 x i32> addrspace(1)* %out, <16 x i32> addrspace(2)* %in, <16 x i32> %c) #1 { entry: %tmp = call i32 @llvm.amdgcn.workitem.id.x() %tmp2 = getelementptr <16 x i32>, <16 x i32> addrspace(2)* %in, i32 %tmp @@ -247,7 +247,7 @@ entry: ; GCN: v_add_i32_e32 [[ADD:v[0-9]+]], vcc, s{{[0-9]+}}, [[MOVED]] ; GCN-NOHSA: buffer_store_dword [[ADD]] ; GCN-HSA: flat_store_dword {{.*}}, [[ADD]] -define void @smrd_valu2_salu_user(i32 addrspace(1)* %out, [8 x i32] addrspace(2)* %in, i32 %a) #1 { +define amdgpu_kernel void @smrd_valu2_salu_user(i32 addrspace(1)* %out, [8 x i32] addrspace(2)* %in, i32 %a) #1 { entry: %tmp = call i32 @llvm.amdgcn.workitem.id.x() %tmp1 = add i32 %tmp, 4 @@ -261,7 +261,7 @@ entry: ; GCN-LABEL: {{^}}smrd_valu2_max_smrd_offset: ; GCN-NOHSA: buffer_load_dword v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:1020{{$}} ; GCN-HSA flat_load_dword v{{[0-9]}}, v{{[0-9]+:[0-9]+}} -define void @smrd_valu2_max_smrd_offset(i32 addrspace(1)* %out, [1024 x i32] addrspace(2)* %in) #1 { +define amdgpu_kernel void @smrd_valu2_max_smrd_offset(i32 addrspace(1)* %out, [1024 x i32] addrspace(2)* %in) #1 { entry: %tmp = call i32 @llvm.amdgcn.workitem.id.x() %tmp1 = add i32 %tmp, 4 @@ -275,7 +275,7 @@ entry: ; GCN-NOHSA-NOT: v_add ; GCN-NOHSA: buffer_load_dword v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:1024{{$}} ; GCN-HSA: flat_load_dword v{{[0-9]}}, v[{{[0-9]+:[0-9]+}}] -define void @smrd_valu2_mubuf_offset(i32 addrspace(1)* %out, [1024 x i32] addrspace(2)* %in) #1 { +define amdgpu_kernel void @smrd_valu2_mubuf_offset(i32 addrspace(1)* %out, [1024 x i32] addrspace(2)* %in) #1 { entry: %tmp = call i32 @llvm.amdgcn.workitem.id.x() %tmp1 = add i32 %tmp, 4 @@ -290,7 +290,7 @@ entry: ; GCN-NOHSA: buffer_load_dwordx4 ; GCN-HSA: flat_load_dwordx4 ; GCN-HSA: flat_load_dwordx4 -define void @s_load_imm_v8i32(<8 x i32> addrspace(1)* %out, i32 addrspace(2)* nocapture readonly %in) #1 { +define amdgpu_kernel void @s_load_imm_v8i32(<8 x i32> addrspace(1)* %out, i32 addrspace(2)* nocapture readonly %in) #1 { entry: %tmp0 = tail call i32 @llvm.amdgcn.workitem.id.x() %tmp1 = getelementptr inbounds i32, i32 addrspace(2)* %in, i32 %tmp0 @@ -313,7 +313,7 @@ entry: ; GCN-NOHSA: buffer_store_dword ; GCN-HSA: flat_load_dwordx4 ; GCN-HSA: flat_load_dwordx4 -define void @s_load_imm_v8i32_salu_user(i32 addrspace(1)* %out, i32 addrspace(2)* nocapture readonly %in) #1 { +define amdgpu_kernel void @s_load_imm_v8i32_salu_user(i32 addrspace(1)* %out, i32 addrspace(2)* nocapture readonly %in) #1 { entry: %tmp0 = tail call i32 @llvm.amdgcn.workitem.id.x() %tmp1 = getelementptr inbounds i32, i32 addrspace(2)* %in, i32 %tmp0 @@ -350,7 +350,7 @@ entry: ; GCN-HSA: flat_load_dwordx4 ; GCN-HSA: flat_load_dwordx4 ; GCN-HSA: flat_load_dwordx4 -define void @s_load_imm_v16i32(<16 x i32> addrspace(1)* %out, i32 addrspace(2)* nocapture readonly %in) #1 { +define amdgpu_kernel void @s_load_imm_v16i32(<16 x i32> addrspace(1)* %out, i32 addrspace(2)* nocapture readonly %in) #1 { entry: %tmp0 = tail call i32 @llvm.amdgcn.workitem.id.x() %tmp1 = getelementptr inbounds i32, i32 addrspace(2)* %in, i32 %tmp0 @@ -385,7 +385,7 @@ entry: ; GCN-HSA: flat_load_dwordx4 ; GCN-HSA: flat_load_dwordx4 ; GCN-HSA: flat_load_dwordx4 -define void @s_load_imm_v16i32_salu_user(i32 addrspace(1)* %out, i32 addrspace(2)* nocapture readonly %in) #1 { +define amdgpu_kernel void @s_load_imm_v16i32_salu_user(i32 addrspace(1)* %out, i32 addrspace(2)* nocapture readonly %in) #1 { entry: %tmp0 = tail call i32 @llvm.amdgcn.workitem.id.x() %tmp1 = getelementptr inbounds i32, i32 addrspace(2)* %in, i32 %tmp0 @@ -439,9 +439,9 @@ entry: ; GCN: v_mov_b32_e32 [[ONE:v[0-9]+]], 1 ; GCN-NOHSA: buffer_store_dword [[ONE]] ; GCN-HSA: flat_store_dword v[{{[0-9]+:[0-9]+}}], [[ONE]] -; GCN; {{^}}[[EXIT]]: +; GCN: {{^}}[[EXIT]]: ; GCN: s_endpgm -define void @sopc_vopc_legalize_bug(i32 %cond, i32 addrspace(1)* %out, i32 addrspace(1)* %in) { +define amdgpu_kernel void @sopc_vopc_legalize_bug(i32 %cond, i32 addrspace(1)* %out, i32 addrspace(1)* %in) { bb3: ; preds = %bb2 %tmp0 = bitcast i32 %cond to float %tmp1 = fadd float %tmp0, 2.500000e-01 @@ -459,7 +459,7 @@ bb7: ; preds = %bb3 ; GCN-LABEL: {{^}}phi_visit_order: ; GCN: v_add_i32_e32 v{{[0-9]+}}, vcc, 1, v{{[0-9]+}} -define void @phi_visit_order() { +define amdgpu_kernel void @phi_visit_order() { bb: br label %bb1 @@ -484,7 +484,7 @@ bb4: ; GCN: [[LOOP_LABEL:[0-9a-zA-Z_]+]]: ; GCN: s_xor_b32 [[B]], [[B]], [[A]] ; GCN: s_cbranch_scc{{[01]}} [[LOOP_LABEL]] -define void @phi_imm_in_sgprs(i32 addrspace(3)* %out, i32 %cond) { +define amdgpu_kernel void @phi_imm_in_sgprs(i32 addrspace(3)* %out, i32 %cond) { entry: br label %loop |