diff options
Diffstat (limited to 'test/CodeGen/AMDGPU/commute-compares.ll')
-rw-r--r-- | test/CodeGen/AMDGPU/commute-compares.ll | 106 |
1 files changed, 53 insertions, 53 deletions
diff --git a/test/CodeGen/AMDGPU/commute-compares.ll b/test/CodeGen/AMDGPU/commute-compares.ll index a4c51b233f41..973c4544d97a 100644 --- a/test/CodeGen/AMDGPU/commute-compares.ll +++ b/test/CodeGen/AMDGPU/commute-compares.ll @@ -8,7 +8,7 @@ declare i32 @llvm.amdgcn.workitem.id.x() #0 ; GCN-LABEL: {{^}}commute_eq_64_i32: ; GCN: v_cmp_eq_u32_e32 vcc, 64, v{{[0-9]+}} -define void @commute_eq_64_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #1 { +define amdgpu_kernel void @commute_eq_64_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #1 { %tid = call i32 @llvm.amdgcn.workitem.id.x() #0 %gep.in = getelementptr i32, i32 addrspace(1)* %in, i32 %tid %gep.out = getelementptr i32, i32 addrspace(1)* %out, i32 %tid @@ -21,7 +21,7 @@ define void @commute_eq_64_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #1 ; GCN-LABEL: {{^}}commute_ne_64_i32: ; GCN: v_cmp_ne_u32_e32 vcc, 64, v{{[0-9]+}} -define void @commute_ne_64_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #1 { +define amdgpu_kernel void @commute_ne_64_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #1 { %tid = call i32 @llvm.amdgcn.workitem.id.x() #0 %gep.in = getelementptr i32, i32 addrspace(1)* %in, i32 %tid %gep.out = getelementptr i32, i32 addrspace(1)* %out, i32 %tid @@ -36,7 +36,7 @@ define void @commute_ne_64_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #1 ; GCN-LABEL: {{^}}commute_ne_litk_i32: ; GCN: v_mov_b32_e32 [[K:v[0-9]+]], 0x3039 ; GCN: v_cmp_ne_u32_e32 vcc, [[K]], v{{[0-9]+}} -define void @commute_ne_litk_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #1 { +define amdgpu_kernel void @commute_ne_litk_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #1 { %tid = call i32 @llvm.amdgcn.workitem.id.x() #0 %gep.in = getelementptr i32, i32 addrspace(1)* %in, i32 %tid %gep.out = getelementptr i32, i32 addrspace(1)* %out, i32 %tid @@ -49,7 +49,7 @@ define void @commute_ne_litk_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) ; GCN-LABEL: {{^}}commute_ugt_64_i32: ; GCN: v_cmp_lt_u32_e32 vcc, 64, v{{[0-9]+}} -define void @commute_ugt_64_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #1 { +define amdgpu_kernel void @commute_ugt_64_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #1 { %tid = call i32 @llvm.amdgcn.workitem.id.x() #0 %gep.in = getelementptr i32, i32 addrspace(1)* %in, i32 %tid %gep.out = getelementptr i32, i32 addrspace(1)* %out, i32 %tid @@ -62,7 +62,7 @@ define void @commute_ugt_64_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) # ; GCN-LABEL: {{^}}commute_uge_64_i32: ; GCN: v_cmp_lt_u32_e32 vcc, 63, v{{[0-9]+}} -define void @commute_uge_64_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #1 { +define amdgpu_kernel void @commute_uge_64_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #1 { %tid = call i32 @llvm.amdgcn.workitem.id.x() #0 %gep.in = getelementptr i32, i32 addrspace(1)* %in, i32 %tid %gep.out = getelementptr i32, i32 addrspace(1)* %out, i32 %tid @@ -75,7 +75,7 @@ define void @commute_uge_64_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) # ; GCN-LABEL: {{^}}commute_ult_64_i32: ; GCN: v_cmp_gt_u32_e32 vcc, 64, v{{[0-9]+}} -define void @commute_ult_64_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #1 { +define amdgpu_kernel void @commute_ult_64_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #1 { %tid = call i32 @llvm.amdgcn.workitem.id.x() #0 %gep.in = getelementptr i32, i32 addrspace(1)* %in, i32 %tid %gep.out = getelementptr i32, i32 addrspace(1)* %out, i32 %tid @@ -88,7 +88,7 @@ define void @commute_ult_64_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) # ; GCN-LABEL: {{^}}commute_ule_63_i32: ; GCN: v_cmp_gt_u32_e32 vcc, 64, v{{[0-9]+}} -define void @commute_ule_63_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #1 { +define amdgpu_kernel void @commute_ule_63_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #1 { %tid = call i32 @llvm.amdgcn.workitem.id.x() #0 %gep.in = getelementptr i32, i32 addrspace(1)* %in, i32 %tid %gep.out = getelementptr i32, i32 addrspace(1)* %out, i32 %tid @@ -104,7 +104,7 @@ define void @commute_ule_63_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) # ; GCN-LABEL: {{^}}commute_ule_64_i32: ; GCN: v_mov_b32_e32 [[K:v[0-9]+]], 0x41{{$}} ; GCN: v_cmp_gt_u32_e32 vcc, [[K]], v{{[0-9]+}} -define void @commute_ule_64_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #1 { +define amdgpu_kernel void @commute_ule_64_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #1 { %tid = call i32 @llvm.amdgcn.workitem.id.x() #0 %gep.in = getelementptr i32, i32 addrspace(1)* %in, i32 %tid %gep.out = getelementptr i32, i32 addrspace(1)* %out, i32 %tid @@ -117,7 +117,7 @@ define void @commute_ule_64_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) # ; GCN-LABEL: {{^}}commute_sgt_neg1_i32: ; GCN: v_cmp_lt_i32_e32 vcc, -1, v{{[0-9]+}} -define void @commute_sgt_neg1_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #1 { +define amdgpu_kernel void @commute_sgt_neg1_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #1 { %tid = call i32 @llvm.amdgcn.workitem.id.x() #0 %gep.in = getelementptr i32, i32 addrspace(1)* %in, i32 %tid %gep.out = getelementptr i32, i32 addrspace(1)* %out, i32 %tid @@ -130,7 +130,7 @@ define void @commute_sgt_neg1_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) ; GCN-LABEL: {{^}}commute_sge_neg2_i32: ; GCN: v_cmp_lt_i32_e32 vcc, -3, v{{[0-9]+}} -define void @commute_sge_neg2_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #1 { +define amdgpu_kernel void @commute_sge_neg2_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #1 { %tid = call i32 @llvm.amdgcn.workitem.id.x() #0 %gep.in = getelementptr i32, i32 addrspace(1)* %in, i32 %tid %gep.out = getelementptr i32, i32 addrspace(1)* %out, i32 %tid @@ -143,7 +143,7 @@ define void @commute_sge_neg2_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) ; GCN-LABEL: {{^}}commute_slt_neg16_i32: ; GCN: v_cmp_gt_i32_e32 vcc, -16, v{{[0-9]+}} -define void @commute_slt_neg16_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #1 { +define amdgpu_kernel void @commute_slt_neg16_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #1 { %tid = call i32 @llvm.amdgcn.workitem.id.x() #0 %gep.in = getelementptr i32, i32 addrspace(1)* %in, i32 %tid %gep.out = getelementptr i32, i32 addrspace(1)* %out, i32 %tid @@ -156,7 +156,7 @@ define void @commute_slt_neg16_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in ; GCN-LABEL: {{^}}commute_sle_5_i32: ; GCN: v_cmp_gt_i32_e32 vcc, 6, v{{[0-9]+}} -define void @commute_sle_5_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #1 { +define amdgpu_kernel void @commute_sle_5_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #1 { %tid = call i32 @llvm.amdgcn.workitem.id.x() #0 %gep.in = getelementptr i32, i32 addrspace(1)* %in, i32 %tid %gep.out = getelementptr i32, i32 addrspace(1)* %out, i32 %tid @@ -173,7 +173,7 @@ define void @commute_sle_5_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #1 ; GCN-LABEL: {{^}}commute_eq_64_i64: ; GCN: v_cmp_eq_u64_e32 vcc, 64, v{{\[[0-9]+:[0-9]+\]}} -define void @commute_eq_64_i64(i32 addrspace(1)* %out, i64 addrspace(1)* %in) #1 { +define amdgpu_kernel void @commute_eq_64_i64(i32 addrspace(1)* %out, i64 addrspace(1)* %in) #1 { %tid = call i32 @llvm.amdgcn.workitem.id.x() #0 %gep.in = getelementptr i64, i64 addrspace(1)* %in, i32 %tid %gep.out = getelementptr i32, i32 addrspace(1)* %out, i32 %tid @@ -186,7 +186,7 @@ define void @commute_eq_64_i64(i32 addrspace(1)* %out, i64 addrspace(1)* %in) #1 ; GCN-LABEL: {{^}}commute_ne_64_i64: ; GCN: v_cmp_ne_u64_e32 vcc, 64, v{{\[[0-9]+:[0-9]+\]}} -define void @commute_ne_64_i64(i32 addrspace(1)* %out, i64 addrspace(1)* %in) #1 { +define amdgpu_kernel void @commute_ne_64_i64(i32 addrspace(1)* %out, i64 addrspace(1)* %in) #1 { %tid = call i32 @llvm.amdgcn.workitem.id.x() #0 %gep.in = getelementptr i64, i64 addrspace(1)* %in, i32 %tid %gep.out = getelementptr i32, i32 addrspace(1)* %out, i32 %tid @@ -199,7 +199,7 @@ define void @commute_ne_64_i64(i32 addrspace(1)* %out, i64 addrspace(1)* %in) #1 ; GCN-LABEL: {{^}}commute_ugt_64_i64: ; GCN: v_cmp_lt_u64_e32 vcc, 64, v{{\[[0-9]+:[0-9]+\]}} -define void @commute_ugt_64_i64(i32 addrspace(1)* %out, i64 addrspace(1)* %in) #1 { +define amdgpu_kernel void @commute_ugt_64_i64(i32 addrspace(1)* %out, i64 addrspace(1)* %in) #1 { %tid = call i32 @llvm.amdgcn.workitem.id.x() #0 %gep.in = getelementptr i64, i64 addrspace(1)* %in, i32 %tid %gep.out = getelementptr i32, i32 addrspace(1)* %out, i32 %tid @@ -212,7 +212,7 @@ define void @commute_ugt_64_i64(i32 addrspace(1)* %out, i64 addrspace(1)* %in) # ; GCN-LABEL: {{^}}commute_uge_64_i64: ; GCN: v_cmp_lt_u64_e32 vcc, 63, v{{\[[0-9]+:[0-9]+\]}} -define void @commute_uge_64_i64(i32 addrspace(1)* %out, i64 addrspace(1)* %in) #1 { +define amdgpu_kernel void @commute_uge_64_i64(i32 addrspace(1)* %out, i64 addrspace(1)* %in) #1 { %tid = call i32 @llvm.amdgcn.workitem.id.x() #0 %gep.in = getelementptr i64, i64 addrspace(1)* %in, i32 %tid %gep.out = getelementptr i32, i32 addrspace(1)* %out, i32 %tid @@ -225,7 +225,7 @@ define void @commute_uge_64_i64(i32 addrspace(1)* %out, i64 addrspace(1)* %in) # ; GCN-LABEL: {{^}}commute_ult_64_i64: ; GCN: v_cmp_gt_u64_e32 vcc, 64, v{{\[[0-9]+:[0-9]+\]}} -define void @commute_ult_64_i64(i32 addrspace(1)* %out, i64 addrspace(1)* %in) #1 { +define amdgpu_kernel void @commute_ult_64_i64(i32 addrspace(1)* %out, i64 addrspace(1)* %in) #1 { %tid = call i32 @llvm.amdgcn.workitem.id.x() #0 %gep.in = getelementptr i64, i64 addrspace(1)* %in, i32 %tid %gep.out = getelementptr i32, i32 addrspace(1)* %out, i32 %tid @@ -238,7 +238,7 @@ define void @commute_ult_64_i64(i32 addrspace(1)* %out, i64 addrspace(1)* %in) # ; GCN-LABEL: {{^}}commute_ule_63_i64: ; GCN: v_cmp_gt_u64_e32 vcc, 64, v{{\[[0-9]+:[0-9]+\]}} -define void @commute_ule_63_i64(i32 addrspace(1)* %out, i64 addrspace(1)* %in) #1 { +define amdgpu_kernel void @commute_ule_63_i64(i32 addrspace(1)* %out, i64 addrspace(1)* %in) #1 { %tid = call i32 @llvm.amdgcn.workitem.id.x() #0 %gep.in = getelementptr i64, i64 addrspace(1)* %in, i32 %tid %gep.out = getelementptr i32, i32 addrspace(1)* %out, i32 %tid @@ -254,7 +254,7 @@ define void @commute_ule_63_i64(i32 addrspace(1)* %out, i64 addrspace(1)* %in) # ; GCN-LABEL: {{^}}commute_ule_64_i64: ; GCN-DAG: s_movk_i32 s[[KLO:[0-9]+]], 0x41{{$}} ; GCN: v_cmp_gt_u64_e32 vcc, s{{\[}}[[KLO]]:{{[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}} -define void @commute_ule_64_i64(i32 addrspace(1)* %out, i64 addrspace(1)* %in) #1 { +define amdgpu_kernel void @commute_ule_64_i64(i32 addrspace(1)* %out, i64 addrspace(1)* %in) #1 { %tid = call i32 @llvm.amdgcn.workitem.id.x() #0 %gep.in = getelementptr i64, i64 addrspace(1)* %in, i32 %tid %gep.out = getelementptr i32, i32 addrspace(1)* %out, i32 %tid @@ -267,7 +267,7 @@ define void @commute_ule_64_i64(i32 addrspace(1)* %out, i64 addrspace(1)* %in) # ; GCN-LABEL: {{^}}commute_sgt_neg1_i64: ; GCN: v_cmp_lt_i64_e32 vcc, -1, v{{\[[0-9]+:[0-9]+\]}} -define void @commute_sgt_neg1_i64(i32 addrspace(1)* %out, i64 addrspace(1)* %in) #1 { +define amdgpu_kernel void @commute_sgt_neg1_i64(i32 addrspace(1)* %out, i64 addrspace(1)* %in) #1 { %tid = call i32 @llvm.amdgcn.workitem.id.x() #0 %gep.in = getelementptr i64, i64 addrspace(1)* %in, i32 %tid %gep.out = getelementptr i32, i32 addrspace(1)* %out, i32 %tid @@ -280,7 +280,7 @@ define void @commute_sgt_neg1_i64(i32 addrspace(1)* %out, i64 addrspace(1)* %in) ; GCN-LABEL: {{^}}commute_sge_neg2_i64: ; GCN: v_cmp_lt_i64_e32 vcc, -3, v{{\[[0-9]+:[0-9]+\]}} -define void @commute_sge_neg2_i64(i32 addrspace(1)* %out, i64 addrspace(1)* %in) #1 { +define amdgpu_kernel void @commute_sge_neg2_i64(i32 addrspace(1)* %out, i64 addrspace(1)* %in) #1 { %tid = call i32 @llvm.amdgcn.workitem.id.x() #0 %gep.in = getelementptr i64, i64 addrspace(1)* %in, i32 %tid %gep.out = getelementptr i32, i32 addrspace(1)* %out, i32 %tid @@ -293,7 +293,7 @@ define void @commute_sge_neg2_i64(i32 addrspace(1)* %out, i64 addrspace(1)* %in) ; GCN-LABEL: {{^}}commute_slt_neg16_i64: ; GCN: v_cmp_gt_i64_e32 vcc, -16, v{{\[[0-9]+:[0-9]+\]}} -define void @commute_slt_neg16_i64(i32 addrspace(1)* %out, i64 addrspace(1)* %in) #1 { +define amdgpu_kernel void @commute_slt_neg16_i64(i32 addrspace(1)* %out, i64 addrspace(1)* %in) #1 { %tid = call i32 @llvm.amdgcn.workitem.id.x() #0 %gep.in = getelementptr i64, i64 addrspace(1)* %in, i32 %tid %gep.out = getelementptr i32, i32 addrspace(1)* %out, i32 %tid @@ -306,7 +306,7 @@ define void @commute_slt_neg16_i64(i32 addrspace(1)* %out, i64 addrspace(1)* %in ; GCN-LABEL: {{^}}commute_sle_5_i64: ; GCN: v_cmp_gt_i64_e32 vcc, 6, v{{\[[0-9]+:[0-9]+\]}} -define void @commute_sle_5_i64(i32 addrspace(1)* %out, i64 addrspace(1)* %in) #1 { +define amdgpu_kernel void @commute_sle_5_i64(i32 addrspace(1)* %out, i64 addrspace(1)* %in) #1 { %tid = call i32 @llvm.amdgcn.workitem.id.x() #0 %gep.in = getelementptr i64, i64 addrspace(1)* %in, i32 %tid %gep.out = getelementptr i32, i32 addrspace(1)* %out, i32 %tid @@ -324,7 +324,7 @@ define void @commute_sle_5_i64(i32 addrspace(1)* %out, i64 addrspace(1)* %in) #1 ; GCN-LABEL: {{^}}commute_oeq_2.0_f32: ; GCN: v_cmp_eq_f32_e32 vcc, 2.0, v{{[0-9]+}} -define void @commute_oeq_2.0_f32(i32 addrspace(1)* %out, float addrspace(1)* %in) #1 { +define amdgpu_kernel void @commute_oeq_2.0_f32(i32 addrspace(1)* %out, float addrspace(1)* %in) #1 { %tid = call i32 @llvm.amdgcn.workitem.id.x() #0 %gep.in = getelementptr float, float addrspace(1)* %in, i32 %tid %gep.out = getelementptr i32, i32 addrspace(1)* %out, i32 %tid @@ -338,7 +338,7 @@ define void @commute_oeq_2.0_f32(i32 addrspace(1)* %out, float addrspace(1)* %in ; GCN-LABEL: {{^}}commute_ogt_2.0_f32: ; GCN: v_cmp_lt_f32_e32 vcc, 2.0, v{{[0-9]+}} -define void @commute_ogt_2.0_f32(i32 addrspace(1)* %out, float addrspace(1)* %in) #1 { +define amdgpu_kernel void @commute_ogt_2.0_f32(i32 addrspace(1)* %out, float addrspace(1)* %in) #1 { %tid = call i32 @llvm.amdgcn.workitem.id.x() #0 %gep.in = getelementptr float, float addrspace(1)* %in, i32 %tid %gep.out = getelementptr i32, i32 addrspace(1)* %out, i32 %tid @@ -351,7 +351,7 @@ define void @commute_ogt_2.0_f32(i32 addrspace(1)* %out, float addrspace(1)* %in ; GCN-LABEL: {{^}}commute_oge_2.0_f32: ; GCN: v_cmp_le_f32_e32 vcc, 2.0, v{{[0-9]+}} -define void @commute_oge_2.0_f32(i32 addrspace(1)* %out, float addrspace(1)* %in) #1 { +define amdgpu_kernel void @commute_oge_2.0_f32(i32 addrspace(1)* %out, float addrspace(1)* %in) #1 { %tid = call i32 @llvm.amdgcn.workitem.id.x() #0 %gep.in = getelementptr float, float addrspace(1)* %in, i32 %tid %gep.out = getelementptr i32, i32 addrspace(1)* %out, i32 %tid @@ -364,7 +364,7 @@ define void @commute_oge_2.0_f32(i32 addrspace(1)* %out, float addrspace(1)* %in ; GCN-LABEL: {{^}}commute_olt_2.0_f32: ; GCN: v_cmp_gt_f32_e32 vcc, 2.0, v{{[0-9]+}} -define void @commute_olt_2.0_f32(i32 addrspace(1)* %out, float addrspace(1)* %in) #1 { +define amdgpu_kernel void @commute_olt_2.0_f32(i32 addrspace(1)* %out, float addrspace(1)* %in) #1 { %tid = call i32 @llvm.amdgcn.workitem.id.x() #0 %gep.in = getelementptr float, float addrspace(1)* %in, i32 %tid %gep.out = getelementptr i32, i32 addrspace(1)* %out, i32 %tid @@ -377,7 +377,7 @@ define void @commute_olt_2.0_f32(i32 addrspace(1)* %out, float addrspace(1)* %in ; GCN-LABEL: {{^}}commute_ole_2.0_f32: ; GCN: v_cmp_ge_f32_e32 vcc, 2.0, v{{[0-9]+}} -define void @commute_ole_2.0_f32(i32 addrspace(1)* %out, float addrspace(1)* %in) #1 { +define amdgpu_kernel void @commute_ole_2.0_f32(i32 addrspace(1)* %out, float addrspace(1)* %in) #1 { %tid = call i32 @llvm.amdgcn.workitem.id.x() #0 %gep.in = getelementptr float, float addrspace(1)* %in, i32 %tid %gep.out = getelementptr i32, i32 addrspace(1)* %out, i32 %tid @@ -390,7 +390,7 @@ define void @commute_ole_2.0_f32(i32 addrspace(1)* %out, float addrspace(1)* %in ; GCN-LABEL: {{^}}commute_one_2.0_f32: ; GCN: v_cmp_lg_f32_e32 vcc, 2.0, v{{[0-9]+}} -define void @commute_one_2.0_f32(i32 addrspace(1)* %out, float addrspace(1)* %in) #1 { +define amdgpu_kernel void @commute_one_2.0_f32(i32 addrspace(1)* %out, float addrspace(1)* %in) #1 { %tid = call i32 @llvm.amdgcn.workitem.id.x() #0 %gep.in = getelementptr float, float addrspace(1)* %in, i32 %tid %gep.out = getelementptr i32, i32 addrspace(1)* %out, i32 %tid @@ -403,7 +403,7 @@ define void @commute_one_2.0_f32(i32 addrspace(1)* %out, float addrspace(1)* %in ; GCN-LABEL: {{^}}commute_ord_2.0_f32: ; GCN: v_cmp_o_f32_e32 vcc, [[REG:v[0-9]+]], [[REG]] -define void @commute_ord_2.0_f32(i32 addrspace(1)* %out, float addrspace(1)* %in) #1 { +define amdgpu_kernel void @commute_ord_2.0_f32(i32 addrspace(1)* %out, float addrspace(1)* %in) #1 { %tid = call i32 @llvm.amdgcn.workitem.id.x() #0 %gep.in = getelementptr float, float addrspace(1)* %in, i32 %tid %gep.out = getelementptr i32, i32 addrspace(1)* %out, i32 %tid @@ -416,7 +416,7 @@ define void @commute_ord_2.0_f32(i32 addrspace(1)* %out, float addrspace(1)* %in ; GCN-LABEL: {{^}}commute_ueq_2.0_f32: ; GCN: v_cmp_nlg_f32_e32 vcc, 2.0, v{{[0-9]+}} -define void @commute_ueq_2.0_f32(i32 addrspace(1)* %out, float addrspace(1)* %in) #1 { +define amdgpu_kernel void @commute_ueq_2.0_f32(i32 addrspace(1)* %out, float addrspace(1)* %in) #1 { %tid = call i32 @llvm.amdgcn.workitem.id.x() #0 %gep.in = getelementptr float, float addrspace(1)* %in, i32 %tid %gep.out = getelementptr i32, i32 addrspace(1)* %out, i32 %tid @@ -429,7 +429,7 @@ define void @commute_ueq_2.0_f32(i32 addrspace(1)* %out, float addrspace(1)* %in ; GCN-LABEL: {{^}}commute_ugt_2.0_f32: ; GCN: v_cmp_nge_f32_e32 vcc, 2.0, v{{[0-9]+}} -define void @commute_ugt_2.0_f32(i32 addrspace(1)* %out, float addrspace(1)* %in) #1 { +define amdgpu_kernel void @commute_ugt_2.0_f32(i32 addrspace(1)* %out, float addrspace(1)* %in) #1 { %tid = call i32 @llvm.amdgcn.workitem.id.x() #0 %gep.in = getelementptr float, float addrspace(1)* %in, i32 %tid %gep.out = getelementptr i32, i32 addrspace(1)* %out, i32 %tid @@ -442,7 +442,7 @@ define void @commute_ugt_2.0_f32(i32 addrspace(1)* %out, float addrspace(1)* %in ; GCN-LABEL: {{^}}commute_uge_2.0_f32: ; GCN: v_cmp_ngt_f32_e32 vcc, 2.0, v{{[0-9]+}} -define void @commute_uge_2.0_f32(i32 addrspace(1)* %out, float addrspace(1)* %in) #1 { +define amdgpu_kernel void @commute_uge_2.0_f32(i32 addrspace(1)* %out, float addrspace(1)* %in) #1 { %tid = call i32 @llvm.amdgcn.workitem.id.x() #0 %gep.in = getelementptr float, float addrspace(1)* %in, i32 %tid %gep.out = getelementptr i32, i32 addrspace(1)* %out, i32 %tid @@ -455,7 +455,7 @@ define void @commute_uge_2.0_f32(i32 addrspace(1)* %out, float addrspace(1)* %in ; GCN-LABEL: {{^}}commute_ult_2.0_f32: ; GCN: v_cmp_nle_f32_e32 vcc, 2.0, v{{[0-9]+}} -define void @commute_ult_2.0_f32(i32 addrspace(1)* %out, float addrspace(1)* %in) #1 { +define amdgpu_kernel void @commute_ult_2.0_f32(i32 addrspace(1)* %out, float addrspace(1)* %in) #1 { %tid = call i32 @llvm.amdgcn.workitem.id.x() #0 %gep.in = getelementptr float, float addrspace(1)* %in, i32 %tid %gep.out = getelementptr i32, i32 addrspace(1)* %out, i32 %tid @@ -468,7 +468,7 @@ define void @commute_ult_2.0_f32(i32 addrspace(1)* %out, float addrspace(1)* %in ; GCN-LABEL: {{^}}commute_ule_2.0_f32: ; GCN: v_cmp_nlt_f32_e32 vcc, 2.0, v{{[0-9]+}} -define void @commute_ule_2.0_f32(i32 addrspace(1)* %out, float addrspace(1)* %in) #1 { +define amdgpu_kernel void @commute_ule_2.0_f32(i32 addrspace(1)* %out, float addrspace(1)* %in) #1 { %tid = call i32 @llvm.amdgcn.workitem.id.x() #0 %gep.in = getelementptr float, float addrspace(1)* %in, i32 %tid %gep.out = getelementptr i32, i32 addrspace(1)* %out, i32 %tid @@ -481,7 +481,7 @@ define void @commute_ule_2.0_f32(i32 addrspace(1)* %out, float addrspace(1)* %in ; GCN-LABEL: {{^}}commute_une_2.0_f32: ; GCN: v_cmp_neq_f32_e32 vcc, 2.0, v{{[0-9]+}} -define void @commute_une_2.0_f32(i32 addrspace(1)* %out, float addrspace(1)* %in) #1 { +define amdgpu_kernel void @commute_une_2.0_f32(i32 addrspace(1)* %out, float addrspace(1)* %in) #1 { %tid = call i32 @llvm.amdgcn.workitem.id.x() #0 %gep.in = getelementptr float, float addrspace(1)* %in, i32 %tid %gep.out = getelementptr i32, i32 addrspace(1)* %out, i32 %tid @@ -494,7 +494,7 @@ define void @commute_une_2.0_f32(i32 addrspace(1)* %out, float addrspace(1)* %in ; GCN-LABEL: {{^}}commute_uno_2.0_f32: ; GCN: v_cmp_u_f32_e32 vcc, [[REG:v[0-9]+]], [[REG]] -define void @commute_uno_2.0_f32(i32 addrspace(1)* %out, float addrspace(1)* %in) #1 { +define amdgpu_kernel void @commute_uno_2.0_f32(i32 addrspace(1)* %out, float addrspace(1)* %in) #1 { %tid = call i32 @llvm.amdgcn.workitem.id.x() #0 %gep.in = getelementptr float, float addrspace(1)* %in, i32 %tid %gep.out = getelementptr i32, i32 addrspace(1)* %out, i32 %tid @@ -512,7 +512,7 @@ define void @commute_uno_2.0_f32(i32 addrspace(1)* %out, float addrspace(1)* %in ; GCN-LABEL: {{^}}commute_oeq_2.0_f64: ; GCN: v_cmp_eq_f64_e32 vcc, 2.0, v{{\[[0-9]+:[0-9]+\]}} -define void @commute_oeq_2.0_f64(i32 addrspace(1)* %out, double addrspace(1)* %in) #1 { +define amdgpu_kernel void @commute_oeq_2.0_f64(i32 addrspace(1)* %out, double addrspace(1)* %in) #1 { %tid = call i32 @llvm.amdgcn.workitem.id.x() #0 %gep.in = getelementptr double, double addrspace(1)* %in, i32 %tid %gep.out = getelementptr i32, i32 addrspace(1)* %out, i32 %tid @@ -526,7 +526,7 @@ define void @commute_oeq_2.0_f64(i32 addrspace(1)* %out, double addrspace(1)* %i ; GCN-LABEL: {{^}}commute_ogt_2.0_f64: ; GCN: v_cmp_lt_f64_e32 vcc, 2.0, v{{\[[0-9]+:[0-9]+\]}} -define void @commute_ogt_2.0_f64(i32 addrspace(1)* %out, double addrspace(1)* %in) #1 { +define amdgpu_kernel void @commute_ogt_2.0_f64(i32 addrspace(1)* %out, double addrspace(1)* %in) #1 { %tid = call i32 @llvm.amdgcn.workitem.id.x() #0 %gep.in = getelementptr double, double addrspace(1)* %in, i32 %tid %gep.out = getelementptr i32, i32 addrspace(1)* %out, i32 %tid @@ -539,7 +539,7 @@ define void @commute_ogt_2.0_f64(i32 addrspace(1)* %out, double addrspace(1)* %i ; GCN-LABEL: {{^}}commute_oge_2.0_f64: ; GCN: v_cmp_le_f64_e32 vcc, 2.0, v{{\[[0-9]+:[0-9]+\]}} -define void @commute_oge_2.0_f64(i32 addrspace(1)* %out, double addrspace(1)* %in) #1 { +define amdgpu_kernel void @commute_oge_2.0_f64(i32 addrspace(1)* %out, double addrspace(1)* %in) #1 { %tid = call i32 @llvm.amdgcn.workitem.id.x() #0 %gep.in = getelementptr double, double addrspace(1)* %in, i32 %tid %gep.out = getelementptr i32, i32 addrspace(1)* %out, i32 %tid @@ -552,7 +552,7 @@ define void @commute_oge_2.0_f64(i32 addrspace(1)* %out, double addrspace(1)* %i ; GCN-LABEL: {{^}}commute_olt_2.0_f64: ; GCN: v_cmp_gt_f64_e32 vcc, 2.0, v{{\[[0-9]+:[0-9]+\]}} -define void @commute_olt_2.0_f64(i32 addrspace(1)* %out, double addrspace(1)* %in) #1 { +define amdgpu_kernel void @commute_olt_2.0_f64(i32 addrspace(1)* %out, double addrspace(1)* %in) #1 { %tid = call i32 @llvm.amdgcn.workitem.id.x() #0 %gep.in = getelementptr double, double addrspace(1)* %in, i32 %tid %gep.out = getelementptr i32, i32 addrspace(1)* %out, i32 %tid @@ -565,7 +565,7 @@ define void @commute_olt_2.0_f64(i32 addrspace(1)* %out, double addrspace(1)* %i ; GCN-LABEL: {{^}}commute_ole_2.0_f64: ; GCN: v_cmp_ge_f64_e32 vcc, 2.0, v{{\[[0-9]+:[0-9]+\]}} -define void @commute_ole_2.0_f64(i32 addrspace(1)* %out, double addrspace(1)* %in) #1 { +define amdgpu_kernel void @commute_ole_2.0_f64(i32 addrspace(1)* %out, double addrspace(1)* %in) #1 { %tid = call i32 @llvm.amdgcn.workitem.id.x() #0 %gep.in = getelementptr double, double addrspace(1)* %in, i32 %tid %gep.out = getelementptr i32, i32 addrspace(1)* %out, i32 %tid @@ -578,7 +578,7 @@ define void @commute_ole_2.0_f64(i32 addrspace(1)* %out, double addrspace(1)* %i ; GCN-LABEL: {{^}}commute_one_2.0_f64: ; GCN: v_cmp_lg_f64_e32 vcc, 2.0, v{{\[[0-9]+:[0-9]+\]}} -define void @commute_one_2.0_f64(i32 addrspace(1)* %out, double addrspace(1)* %in) #1 { +define amdgpu_kernel void @commute_one_2.0_f64(i32 addrspace(1)* %out, double addrspace(1)* %in) #1 { %tid = call i32 @llvm.amdgcn.workitem.id.x() #0 %gep.in = getelementptr double, double addrspace(1)* %in, i32 %tid %gep.out = getelementptr i32, i32 addrspace(1)* %out, i32 %tid @@ -591,7 +591,7 @@ define void @commute_one_2.0_f64(i32 addrspace(1)* %out, double addrspace(1)* %i ; GCN-LABEL: {{^}}commute_ord_2.0_f64: ; GCN: v_cmp_o_f64_e32 vcc, [[REG:v\[[0-9]+:[0-9]+\]]], [[REG]] -define void @commute_ord_2.0_f64(i32 addrspace(1)* %out, double addrspace(1)* %in) #1 { +define amdgpu_kernel void @commute_ord_2.0_f64(i32 addrspace(1)* %out, double addrspace(1)* %in) #1 { %tid = call i32 @llvm.amdgcn.workitem.id.x() #0 %gep.in = getelementptr double, double addrspace(1)* %in, i32 %tid %gep.out = getelementptr i32, i32 addrspace(1)* %out, i32 %tid @@ -604,7 +604,7 @@ define void @commute_ord_2.0_f64(i32 addrspace(1)* %out, double addrspace(1)* %i ; GCN-LABEL: {{^}}commute_ueq_2.0_f64: ; GCN: v_cmp_nlg_f64_e32 vcc, 2.0, v{{\[[0-9]+:[0-9]+\]}} -define void @commute_ueq_2.0_f64(i32 addrspace(1)* %out, double addrspace(1)* %in) #1 { +define amdgpu_kernel void @commute_ueq_2.0_f64(i32 addrspace(1)* %out, double addrspace(1)* %in) #1 { %tid = call i32 @llvm.amdgcn.workitem.id.x() #0 %gep.in = getelementptr double, double addrspace(1)* %in, i32 %tid %gep.out = getelementptr i32, i32 addrspace(1)* %out, i32 %tid @@ -617,7 +617,7 @@ define void @commute_ueq_2.0_f64(i32 addrspace(1)* %out, double addrspace(1)* %i ; GCN-LABEL: {{^}}commute_ugt_2.0_f64: ; GCN: v_cmp_nge_f64_e32 vcc, 2.0, v{{\[[0-9]+:[0-9]+\]}} -define void @commute_ugt_2.0_f64(i32 addrspace(1)* %out, double addrspace(1)* %in) #1 { +define amdgpu_kernel void @commute_ugt_2.0_f64(i32 addrspace(1)* %out, double addrspace(1)* %in) #1 { %tid = call i32 @llvm.amdgcn.workitem.id.x() #0 %gep.in = getelementptr double, double addrspace(1)* %in, i32 %tid %gep.out = getelementptr i32, i32 addrspace(1)* %out, i32 %tid @@ -630,7 +630,7 @@ define void @commute_ugt_2.0_f64(i32 addrspace(1)* %out, double addrspace(1)* %i ; GCN-LABEL: {{^}}commute_uge_2.0_f64: ; GCN: v_cmp_ngt_f64_e32 vcc, 2.0, v{{\[[0-9]+:[0-9]+\]}} -define void @commute_uge_2.0_f64(i32 addrspace(1)* %out, double addrspace(1)* %in) #1 { +define amdgpu_kernel void @commute_uge_2.0_f64(i32 addrspace(1)* %out, double addrspace(1)* %in) #1 { %tid = call i32 @llvm.amdgcn.workitem.id.x() #0 %gep.in = getelementptr double, double addrspace(1)* %in, i32 %tid %gep.out = getelementptr i32, i32 addrspace(1)* %out, i32 %tid @@ -643,7 +643,7 @@ define void @commute_uge_2.0_f64(i32 addrspace(1)* %out, double addrspace(1)* %i ; GCN-LABEL: {{^}}commute_ult_2.0_f64: ; GCN: v_cmp_nle_f64_e32 vcc, 2.0, v{{\[[0-9]+:[0-9]+\]}} -define void @commute_ult_2.0_f64(i32 addrspace(1)* %out, double addrspace(1)* %in) #1 { +define amdgpu_kernel void @commute_ult_2.0_f64(i32 addrspace(1)* %out, double addrspace(1)* %in) #1 { %tid = call i32 @llvm.amdgcn.workitem.id.x() #0 %gep.in = getelementptr double, double addrspace(1)* %in, i32 %tid %gep.out = getelementptr i32, i32 addrspace(1)* %out, i32 %tid @@ -656,7 +656,7 @@ define void @commute_ult_2.0_f64(i32 addrspace(1)* %out, double addrspace(1)* %i ; GCN-LABEL: {{^}}commute_ule_2.0_f64: ; GCN: v_cmp_nlt_f64_e32 vcc, 2.0, v{{\[[0-9]+:[0-9]+\]}} -define void @commute_ule_2.0_f64(i32 addrspace(1)* %out, double addrspace(1)* %in) #1 { +define amdgpu_kernel void @commute_ule_2.0_f64(i32 addrspace(1)* %out, double addrspace(1)* %in) #1 { %tid = call i32 @llvm.amdgcn.workitem.id.x() #0 %gep.in = getelementptr double, double addrspace(1)* %in, i32 %tid %gep.out = getelementptr i32, i32 addrspace(1)* %out, i32 %tid @@ -669,7 +669,7 @@ define void @commute_ule_2.0_f64(i32 addrspace(1)* %out, double addrspace(1)* %i ; GCN-LABEL: {{^}}commute_une_2.0_f64: ; GCN: v_cmp_neq_f64_e32 vcc, 2.0, v{{\[[0-9]+:[0-9]+\]}} -define void @commute_une_2.0_f64(i32 addrspace(1)* %out, double addrspace(1)* %in) #1 { +define amdgpu_kernel void @commute_une_2.0_f64(i32 addrspace(1)* %out, double addrspace(1)* %in) #1 { %tid = call i32 @llvm.amdgcn.workitem.id.x() #0 %gep.in = getelementptr double, double addrspace(1)* %in, i32 %tid %gep.out = getelementptr i32, i32 addrspace(1)* %out, i32 %tid @@ -682,7 +682,7 @@ define void @commute_une_2.0_f64(i32 addrspace(1)* %out, double addrspace(1)* %i ; GCN-LABEL: {{^}}commute_uno_2.0_f64: ; GCN: v_cmp_u_f64_e32 vcc, [[REG:v\[[0-9]+:[0-9]+\]]], [[REG]] -define void @commute_uno_2.0_f64(i32 addrspace(1)* %out, double addrspace(1)* %in) #1 { +define amdgpu_kernel void @commute_uno_2.0_f64(i32 addrspace(1)* %out, double addrspace(1)* %in) #1 { %tid = call i32 @llvm.amdgcn.workitem.id.x() #0 %gep.in = getelementptr double, double addrspace(1)* %in, i32 %tid %gep.out = getelementptr i32, i32 addrspace(1)* %out, i32 %tid @@ -701,9 +701,9 @@ define void @commute_uno_2.0_f64(i32 addrspace(1)* %out, double addrspace(1)* %i ; GCN-LABEL: {{^}}commute_frameindex: ; XGCN: v_cmp_eq_u32_e32 vcc, 0, v{{[0-9]+}} -; GCN: v_mov_b32_e32 [[FI:v[0-9]+]], 0{{$}} +; GCN: v_mov_b32_e32 [[FI:v[0-9]+]], 4{{$}} ; GCN: v_cmp_eq_u32_e32 vcc, [[FI]], v{{[0-9]+}} -define void @commute_frameindex(i32 addrspace(1)* nocapture %out) #0 { +define amdgpu_kernel void @commute_frameindex(i32 addrspace(1)* nocapture %out) #0 { entry: %stack0 = alloca i32 %ptr0 = load volatile i32*, i32* addrspace(1)* undef |