1 files changed, 37 insertions, 37 deletions
diff --git a/test/CodeGen/AMDGPU/llvm.amdgcn.image.gather4.ll b/test/CodeGen/AMDGPU/llvm.amdgcn.image.gather4.ll
index a65f422742c9..a9351dbb27d2 100644
--- a/test/CodeGen/AMDGPU/llvm.amdgcn.image.gather4.ll
+++ b/test/CodeGen/AMDGPU/llvm.amdgcn.image.gather4.ll
@@ -3,7 +3,7 @@
 
 ; GCN-LABEL: {{^}}gather4_v2:
 ; GCN: image_gather4 {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}} dmask:0x1 da
-define void @gather4_v2(<4 x float> addrspace(1)* %out) {
+define amdgpu_kernel void @gather4_v2(<4 x float> addrspace(1)* %out) {
 main_body:
   %r = call <4 x float> @llvm.amdgcn.image.gather4.v4f32.v2f32.v8i32(<2 x float> undef, <8 x i32> undef, <4 x i32> undef, i32 1, i1 0, i1 0, i1 0, i1 0, i1 1)
   store <4 x float> %r, <4 x float> addrspace(1)* %out
@@ -12,7 +12,7 @@ main_body:
 
 ; GCN-LABEL: {{^}}gather4:
 ; GCN: image_gather4 {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}} dmask:0x1 da
-define void @gather4(<4 x float> addrspace(1)* %out) {
+define amdgpu_kernel void @gather4(<4 x float> addrspace(1)* %out) {
 main_body:
   %r = call <4 x float> @llvm.amdgcn.image.gather4.v4f32.v4f32.v8i32(<4 x float> undef, <8 x i32> undef, <4 x i32> undef, i32 1, i1 0, i1 0, i1 0, i1 0, i1 1)
   store <4 x float> %r, <4 x float> addrspace(1)* %out
@@ -21,7 +21,7 @@ main_body:
 
 ; GCN-LABEL: {{^}}gather4_cl:
 ; GCN: image_gather4_cl {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}} dmask:0x1 da
-define void @gather4_cl(<4 x float> addrspace(1)* %out) {
+define amdgpu_kernel void @gather4_cl(<4 x float> addrspace(1)* %out) {
 main_body:
   %r = call <4 x float> @llvm.amdgcn.image.gather4.cl.v4f32.v4f32.v8i32(<4 x float> undef, <8 x i32> undef, <4 x i32> undef, i32 1, i1 0, i1 0, i1 0, i1 0, i1 1)
   store <4 x float> %r, <4 x float> addrspace(1)* %out
@@ -30,7 +30,7 @@ main_body:
 
 ; GCN-LABEL: {{^}}gather4_l:
 ; GCN: image_gather4_l {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}} dmask:0x1 da
-define void @gather4_l(<4 x float> addrspace(1)* %out) {
+define amdgpu_kernel void @gather4_l(<4 x float> addrspace(1)* %out) {
 main_body:
   %r = call <4 x float> @llvm.amdgcn.image.gather4.l.v4f32.v4f32.v8i32(<4 x float> undef, <8 x i32> undef, <4 x i32> undef, i32 1, i1 0, i1 0, i1 0, i1 0, i1 1)
   store <4 x float> %r, <4 x float> addrspace(1)* %out
@@ -39,7 +39,7 @@ main_body:
 
 ; GCN-LABEL: {{^}}gather4_b:
 ; GCN: image_gather4_b {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}} dmask:0x1 da
-define void @gather4_b(<4 x float> addrspace(1)* %out) {
+define amdgpu_kernel void @gather4_b(<4 x float> addrspace(1)* %out) {
 main_body:
   %r = call <4 x float> @llvm.amdgcn.image.gather4.b.v4f32.v4f32.v8i32(<4 x float> undef, <8 x i32> undef, <4 x i32> undef, i32 1, i1 0, i1 0, i1 0, i1 0, i1 1)
   store <4 x float> %r, <4 x float> addrspace(1)* %out
@@ -48,7 +48,7 @@ main_body:
 
 ; GCN-LABEL: {{^}}gather4_b_cl:
 ; GCN: image_gather4_b_cl {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}} dmask:0x1 da
-define void @gather4_b_cl(<4 x float> addrspace(1)* %out) {
+define amdgpu_kernel void @gather4_b_cl(<4 x float> addrspace(1)* %out) {
 main_body:
   %r = call <4 x float> @llvm.amdgcn.image.gather4.b.cl.v4f32.v4f32.v8i32(<4 x float> undef, <8 x i32> undef, <4 x i32> undef, i32 1, i1 0, i1 0, i1 0, i1 0, i1 1)
   store <4 x float> %r, <4 x float> addrspace(1)* %out
@@ -57,7 +57,7 @@ main_body:
 
 ; GCN-LABEL: {{^}}gather4_b_cl_v8:
 ; GCN: image_gather4_b_cl {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}} dmask:0x1 da
-define void @gather4_b_cl_v8(<4 x float> addrspace(1)* %out) {
+define amdgpu_kernel void @gather4_b_cl_v8(<4 x float> addrspace(1)* %out) {
 main_body:
   %r = call <4 x float> @llvm.amdgcn.image.gather4.b.cl.v4f32.v8f32.v8i32(<8 x float> undef, <8 x i32> undef, <4 x i32> undef, i32 1, i1 0, i1 0, i1 0, i1 0, i1 1)
   store <4 x float> %r, <4 x float> addrspace(1)* %out
@@ -66,7 +66,7 @@ main_body:
 
 ; GCN-LABEL: {{^}}gather4_lz_v2:
 ; GCN: image_gather4_lz {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}} dmask:0x1 da
-define void @gather4_lz_v2(<4 x float> addrspace(1)* %out) {
+define amdgpu_kernel void @gather4_lz_v2(<4 x float> addrspace(1)* %out) {
 main_body:
   %r = call <4 x float> @llvm.amdgcn.image.gather4.lz.v4f32.v2f32.v8i32(<2 x float> undef, <8 x i32> undef, <4 x i32> undef, i32 1, i1 0, i1 0, i1 0, i1 0, i1 1)
   store <4 x float> %r, <4 x float> addrspace(1)* %out
@@ -75,7 +75,7 @@ main_body:
 
 ; GCN-LABEL: {{^}}gather4_lz:
 ; GCN: image_gather4_lz {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}} dmask:0x1 da
-define void @gather4_lz(<4 x float> addrspace(1)* %out) {
+define amdgpu_kernel void @gather4_lz(<4 x float> addrspace(1)* %out) {
 main_body:
   %r = call <4 x float> @llvm.amdgcn.image.gather4.lz.v4f32.v4f32.v8i32(<4 x float> undef, <8 x i32> undef, <4 x i32> undef, i32 1, i1 0, i1 0, i1 0, i1 0, i1 1)
   store <4 x float> %r, <4 x float> addrspace(1)* %out
@@ -86,7 +86,7 @@ main_body:
 
 ; GCN-LABEL: {{^}}gather4_o:
 ; GCN: image_gather4_o {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}} dmask:0x1 da
-define void @gather4_o(<4 x float> addrspace(1)* %out) {
+define amdgpu_kernel void @gather4_o(<4 x float> addrspace(1)* %out) {
 main_body:
   %r = call <4 x float> @llvm.amdgcn.image.gather4.o.v4f32.v4f32.v8i32(<4 x float> undef, <8 x i32> undef, <4 x i32> undef, i32 1, i1 0, i1 0, i1 0, i1 0, i1 1)
   store <4 x float> %r, <4 x float> addrspace(1)* %out
@@ -95,7 +95,7 @@ main_body:
 
 ; GCN-LABEL: {{^}}gather4_cl_o:
 ; GCN: image_gather4_cl_o {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}} dmask:0x1 da
-define void @gather4_cl_o(<4 x float> addrspace(1)* %out) {
+define amdgpu_kernel void @gather4_cl_o(<4 x float> addrspace(1)* %out) {
 main_body:
   %r = call <4 x float> @llvm.amdgcn.image.gather4.cl.o.v4f32.v4f32.v8i32(<4 x float> undef, <8 x i32> undef, <4 x i32> undef, i32 1, i1 0, i1 0, i1 0, i1 0, i1 1)
   store <4 x float> %r, <4 x float> addrspace(1)* %out
@@ -104,7 +104,7 @@ main_body:
 
 ; GCN-LABEL: {{^}}gather4_cl_o_v8:
 ; GCN: image_gather4_cl_o {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}} dmask:0x1 da
-define void @gather4_cl_o_v8(<4 x float> addrspace(1)* %out) {
+define amdgpu_kernel void @gather4_cl_o_v8(<4 x float> addrspace(1)* %out) {
 main_body:
   %r = call <4 x float> @llvm.amdgcn.image.gather4.cl.o.v4f32.v8f32.v8i32(<8 x float> undef, <8 x i32> undef, <4 x i32> undef, i32 1, i1 0, i1 0, i1 0, i1 0, i1 1)
   store <4 x float> %r, <4 x float> addrspace(1)* %out
@@ -113,7 +113,7 @@ main_body:
 
 ; GCN-LABEL: {{^}}gather4_l_o:
 ; GCN: image_gather4_l_o {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}} dmask:0x1 da
-define void @gather4_l_o(<4 x float> addrspace(1)* %out) {
+define amdgpu_kernel void @gather4_l_o(<4 x float> addrspace(1)* %out) {
 main_body:
   %r = call <4 x float> @llvm.amdgcn.image.gather4.l.o.v4f32.v4f32.v8i32(<4 x float> undef, <8 x i32> undef, <4 x i32> undef, i32 1, i1 0, i1 0, i1 0, i1 0, i1 1)
   store <4 x float> %r, <4 x float> addrspace(1)* %out
@@ -122,7 +122,7 @@ main_body:
 
 ; GCN-LABEL: {{^}}gather4_l_o_v8:
 ; GCN: image_gather4_l_o {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}} dmask:0x1 da
-define void @gather4_l_o_v8(<4 x float> addrspace(1)* %out) {
+define amdgpu_kernel void @gather4_l_o_v8(<4 x float> addrspace(1)* %out) {
 main_body:
   %r = call <4 x float> @llvm.amdgcn.image.gather4.l.o.v4f32.v8f32.v8i32(<8 x float> undef, <8 x i32> undef, <4 x i32> undef, i32 1, i1 0, i1 0, i1 0, i1 0, i1 1)
   store <4 x float> %r, <4 x float> addrspace(1)* %out
@@ -131,7 +131,7 @@ main_body:
 
 ; GCN-LABEL: {{^}}gather4_b_o:
 ; GCN: image_gather4_b_o {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}} dmask:0x1 da
-define void @gather4_b_o(<4 x float> addrspace(1)* %out) {
+define amdgpu_kernel void @gather4_b_o(<4 x float> addrspace(1)* %out) {
 main_body:
   %r = call <4 x float> @llvm.amdgcn.image.gather4.b.o.v4f32.v4f32.v8i32(<4 x float> undef, <8 x i32> undef, <4 x i32> undef, i32 1, i1 0, i1 0, i1 0, i1 0, i1 1)
   store <4 x float> %r, <4 x float> addrspace(1)* %out
@@ -140,7 +140,7 @@ main_body:
 
 ; GCN-LABEL: {{^}}gather4_b_o_v8:
 ; GCN: image_gather4_b_o {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}} dmask:0x1 da
-define void @gather4_b_o_v8(<4 x float> addrspace(1)* %out) {
+define amdgpu_kernel void @gather4_b_o_v8(<4 x float> addrspace(1)* %out) {
 main_body:
   %r = call <4 x float> @llvm.amdgcn.image.gather4.b.o.v4f32.v8f32.v8i32(<8 x float> undef, <8 x i32> undef, <4 x i32> undef, i32 1, i1 0, i1 0, i1 0, i1 0, i1 1)
   store <4 x float> %r, <4 x float> addrspace(1)* %out
@@ -149,7 +149,7 @@ main_body:
 
 ; GCN-LABEL: {{^}}gather4_b_cl_o:
 ; GCN: image_gather4_b_cl_o {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}} dmask:0x1 da
-define void @gather4_b_cl_o(<4 x float> addrspace(1)* %out) {
+define amdgpu_kernel void @gather4_b_cl_o(<4 x float> addrspace(1)* %out) {
 main_body:
   %r = call <4 x float> @llvm.amdgcn.image.gather4.b.cl.o.v4f32.v8f32.v8i32(<8 x float> undef, <8 x i32> undef, <4 x i32> undef, i32 1, i1 0, i1 0, i1 0, i1 0, i1 1)
   store <4 x float> %r, <4 x float> addrspace(1)* %out
@@ -158,7 +158,7 @@ main_body:
 
 ; GCN-LABEL: {{^}}gather4_lz_o:
 ; GCN: image_gather4_lz_o {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}} dmask:0x1 da
-define void @gather4_lz_o(<4 x float> addrspace(1)* %out) {
+define amdgpu_kernel void @gather4_lz_o(<4 x float> addrspace(1)* %out) {
 main_body:
   %r = call <4 x float> @llvm.amdgcn.image.gather4.lz.o.v4f32.v4f32.v8i32(<4 x float> undef, <8 x i32> undef, <4 x i32> undef, i32 1, i1 0, i1 0, i1 0, i1 0, i1 1)
   store <4 x float> %r, <4 x float> addrspace(1)* %out
@@ -168,7 +168,7 @@ main_body:
 
 ; GCN-LABEL: {{^}}gather4_c:
 ; GCN: image_gather4_c {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}} dmask:0x1 da
-define void @gather4_c(<4 x float> addrspace(1)* %out) {
+define amdgpu_kernel void @gather4_c(<4 x float> addrspace(1)* %out) {
 main_body:
   %r = call <4 x float> @llvm.amdgcn.image.gather4.c.v4f32.v4f32.v8i32(<4 x float> undef, <8 x i32> undef, <4 x i32> undef, i32 1, i1 0, i1 0, i1 0, i1 0, i1 1)
   store <4 x float> %r, <4 x float> addrspace(1)* %out
@@ -177,7 +177,7 @@ main_body:
 
 ; GCN-LABEL: {{^}}gather4_c_cl:
 ; GCN: image_gather4_c_cl {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}} dmask:0x1 da
-define void @gather4_c_cl(<4 x float> addrspace(1)* %out) {
+define amdgpu_kernel void @gather4_c_cl(<4 x float> addrspace(1)* %out) {
 main_body:
   %r = call <4 x float> @llvm.amdgcn.image.gather4.c.cl.v4f32.v4f32.v8i32(<4 x float> undef, <8 x i32> undef, <4 x i32> undef, i32 1, i1 0, i1 0, i1 0, i1 0, i1 1)
   store <4 x float> %r, <4 x float> addrspace(1)* %out
@@ -186,7 +186,7 @@ main_body:
 
 ; GCN-LABEL: {{^}}gather4_c_cl_v8:
 ; GCN: image_gather4_c_cl {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}} dmask:0x1 da
-define void @gather4_c_cl_v8(<4 x float> addrspace(1)* %out) {
+define amdgpu_kernel void @gather4_c_cl_v8(<4 x float> addrspace(1)* %out) {
 main_body:
   %r = call <4 x float> @llvm.amdgcn.image.gather4.c.cl.v4f32.v8f32.v8i32(<8 x float> undef, <8 x i32> undef, <4 x i32> undef, i32 1, i1 0, i1 0, i1 0, i1 0, i1 1)
   store <4 x float> %r, <4 x float> addrspace(1)* %out
@@ -195,7 +195,7 @@ main_body:
 
 ; GCN-LABEL: {{^}}gather4_c_l:
 ; GCN: image_gather4_c_l {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}} dmask:0x1 da
-define void @gather4_c_l(<4 x float> addrspace(1)* %out) {
+define amdgpu_kernel void @gather4_c_l(<4 x float> addrspace(1)* %out) {
 main_body:
   %r = call <4 x float> @llvm.amdgcn.image.gather4.c.l.v4f32.v4f32.v8i32(<4 x float> undef, <8 x i32> undef, <4 x i32> undef, i32 1, i1 0, i1 0, i1 0, i1 0, i1 1)
   store <4 x float> %r, <4 x float> addrspace(1)* %out
@@ -204,7 +204,7 @@ main_body:
 
 ; GCN-LABEL: {{^}}gather4_c_l_v8:
 ; GCN: image_gather4_c_l {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}} dmask:0x1 da
-define void @gather4_c_l_v8(<4 x float> addrspace(1)* %out) {
+define amdgpu_kernel void @gather4_c_l_v8(<4 x float> addrspace(1)* %out) {
 main_body:
   %r = call <4 x float> @llvm.amdgcn.image.gather4.c.l.v4f32.v8f32.v8i32(<8 x float> undef, <8 x i32> undef, <4 x i32> undef, i32 1, i1 0, i1 0, i1 0, i1 0, i1 1)
   store <4 x float> %r, <4 x float> addrspace(1)* %out
@@ -213,7 +213,7 @@ main_body:
 
 ; GCN-LABEL: {{^}}gather4_c_b:
 ; GCN: image_gather4_c_b {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}} dmask:0x1 da
-define void @gather4_c_b(<4 x float> addrspace(1)* %out) {
+define amdgpu_kernel void @gather4_c_b(<4 x float> addrspace(1)* %out) {
 main_body:
   %r = call <4 x float> @llvm.amdgcn.image.gather4.c.b.v4f32.v4f32.v8i32(<4 x float> undef, <8 x i32> undef, <4 x i32> undef, i32 1, i1 0, i1 0, i1 0, i1 0, i1 1)
   store <4 x float> %r, <4 x float> addrspace(1)* %out
@@ -222,7 +222,7 @@ main_body:
 
 ; GCN-LABEL: {{^}}gather4_c_b_v8:
 ; GCN: image_gather4_c_b {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}} dmask:0x1 da
-define void @gather4_c_b_v8(<4 x float> addrspace(1)* %out) {
+define amdgpu_kernel void @gather4_c_b_v8(<4 x float> addrspace(1)* %out) {
 main_body:
   %r = call <4 x float> @llvm.amdgcn.image.gather4.c.b.v4f32.v8f32.v8i32(<8 x float> undef, <8 x i32> undef, <4 x i32> undef, i32 1, i1 0, i1 0, i1 0, i1 0, i1 1)
   store <4 x float> %r, <4 x float> addrspace(1)* %out
@@ -231,7 +231,7 @@ main_body:
 
 ; GCN-LABEL: {{^}}gather4_c_b_cl:
 ; GCN: image_gather4_c_b_cl {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}} dmask:0x1 da
-define void @gather4_c_b_cl(<4 x float> addrspace(1)* %out) {
+define amdgpu_kernel void @gather4_c_b_cl(<4 x float> addrspace(1)* %out) {
 main_body:
   %r = call <4 x float> @llvm.amdgcn.image.gather4.c.b.cl.v4f32.v8f32.v8i32(<8 x float> undef, <8 x i32> undef, <4 x i32> undef, i32 1, i1 0, i1 0, i1 0, i1 0, i1 1)
   store <4 x float> %r, <4 x float> addrspace(1)* %out
@@ -240,7 +240,7 @@ main_body:
 
 ; GCN-LABEL: {{^}}gather4_c_lz:
 ; GCN: image_gather4_c_lz {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}} dmask:0x1 da
-define void @gather4_c_lz(<4 x float> addrspace(1)* %out) {
+define amdgpu_kernel void @gather4_c_lz(<4 x float> addrspace(1)* %out) {
 main_body:
   %r = call <4 x float> @llvm.amdgcn.image.gather4.c.lz.v4f32.v4f32.v8i32(<4 x float> undef, <8 x i32> undef, <4 x i32> undef, i32 1, i1 0, i1 0, i1 0, i1 0, i1 1)
   store <4 x float> %r, <4 x float> addrspace(1)* %out
@@ -250,7 +250,7 @@ main_body:
 
 ; GCN-LABEL: {{^}}gather4_c_o:
 ; GCN: image_gather4_c_o {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}} dmask:0x1 da
-define void @gather4_c_o(<4 x float> addrspace(1)* %out) {
+define amdgpu_kernel void @gather4_c_o(<4 x float> addrspace(1)* %out) {
 main_body:
   %r = call <4 x float> @llvm.amdgcn.image.gather4.c.o.v4f32.v4f32.v8i32(<4 x float> undef, <8 x i32> undef, <4 x i32> undef, i32 1, i1 0, i1 0, i1 0, i1 0, i1 1)
   store <4 x float> %r, <4 x float> addrspace(1)* %out
@@ -259,7 +259,7 @@ main_body:
 
 ; GCN-LABEL: {{^}}gather4_c_o_v8:
 ; GCN: image_gather4_c_o {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}} dmask:0x1 da
-define void @gather4_c_o_v8(<4 x float> addrspace(1)* %out) {
+define amdgpu_kernel void @gather4_c_o_v8(<4 x float> addrspace(1)* %out) {
 main_body:
   %r = call <4 x float> @llvm.amdgcn.image.gather4.c.o.v4f32.v8f32.v8i32(<8 x float> undef, <8 x i32> undef, <4 x i32> undef, i32 1, i1 0, i1 0, i1 0, i1 0, i1 1)
   store <4 x float> %r, <4 x float> addrspace(1)* %out
@@ -268,7 +268,7 @@ main_body:
 
 ; GCN-LABEL: {{^}}gather4_c_cl_o:
 ; GCN: image_gather4_c_cl_o {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}} dmask:0x1 da
-define void @gather4_c_cl_o(<4 x float> addrspace(1)* %out) {
+define amdgpu_kernel void @gather4_c_cl_o(<4 x float> addrspace(1)* %out) {
 main_body:
   %r = call <4 x float> @llvm.amdgcn.image.gather4.c.cl.o.v4f32.v8f32.v8i32(<8 x float> undef, <8 x i32> undef, <4 x i32> undef, i32 1, i1 0, i1 0, i1 0, i1 0, i1 1)
   store <4 x float> %r, <4 x float> addrspace(1)* %out
@@ -277,7 +277,7 @@ main_body:
 
 ; GCN-LABEL: {{^}}gather4_c_l_o:
 ; GCN: image_gather4_c_l_o {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}} dmask:0x1 da
-define void @gather4_c_l_o(<4 x float> addrspace(1)* %out) {
+define amdgpu_kernel void @gather4_c_l_o(<4 x float> addrspace(1)* %out) {
 main_body:
   %r = call <4 x float> @llvm.amdgcn.image.gather4.c.l.o.v4f32.v8f32.v8i32(<8 x float> undef, <8 x i32> undef, <4 x i32> undef, i32 1, i1 0, i1 0, i1 0, i1 0, i1 1)
   store <4 x float> %r, <4 x float> addrspace(1)* %out
@@ -286,7 +286,7 @@ main_body:
 
 ; GCN-LABEL: {{^}}gather4_c_b_o:
 ; GCN: image_gather4_c_b_o {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}} dmask:0x1 da
-define void @gather4_c_b_o(<4 x float> addrspace(1)* %out) {
+define amdgpu_kernel void @gather4_c_b_o(<4 x float> addrspace(1)* %out) {
 main_body:
   %r = call <4 x float> @llvm.amdgcn.image.gather4.c.b.o.v4f32.v8f32.v8i32(<8 x float> undef, <8 x i32> undef, <4 x i32> undef, i32 1, i1 0, i1 0, i1 0, i1 0, i1 1)
   store <4 x float> %r, <4 x float> addrspace(1)* %out
@@ -295,7 +295,7 @@ main_body:
 
 ; GCN-LABEL: {{^}}gather4_c_b_cl_o:
 ; GCN: image_gather4_c_b_cl_o {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}} dmask:0x1 da
-define void @gather4_c_b_cl_o(<4 x float> addrspace(1)* %out) {
+define amdgpu_kernel void @gather4_c_b_cl_o(<4 x float> addrspace(1)* %out) {
 main_body:
   %r = call <4 x float> @llvm.amdgcn.image.gather4.c.b.cl.o.v4f32.v8f32.v8i32(<8 x float> undef, <8 x i32> undef, <4 x i32> undef, i32 1, i1 0, i1 0, i1 0, i1 0, i1 1)
   store <4 x float> %r, <4 x float> addrspace(1)* %out
@@ -304,7 +304,7 @@ main_body:
 
 ; GCN-LABEL: {{^}}gather4_c_lz_o:
 ; GCN: image_gather4_c_lz_o {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}} dmask:0x1 da
-define void @gather4_c_lz_o(<4 x float> addrspace(1)* %out) {
+define amdgpu_kernel void @gather4_c_lz_o(<4 x float> addrspace(1)* %out) {
 main_body:
   %r = call <4 x float> @llvm.amdgcn.image.gather4.c.lz.o.v4f32.v4f32.v8i32(<4 x float> undef, <8 x i32> undef, <4 x i32> undef, i32 1, i1 0, i1 0, i1 0, i1 0, i1 1)
   store <4 x float> %r, <4 x float> addrspace(1)* %out
@@ -313,7 +313,7 @@ main_body:
 
 ; GCN-LABEL: {{^}}gather4_c_lz_o_v8:
 ; GCN: image_gather4_c_lz_o {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}} dmask:0x1 da
-define void @gather4_c_lz_o_v8(<4 x float> addrspace(1)* %out) {
+define amdgpu_kernel void @gather4_c_lz_o_v8(<4 x float> addrspace(1)* %out) {
 main_body:
   %r = call <4 x float> @llvm.amdgcn.image.gather4.c.lz.o.v4f32.v8f32.v8i32(<8 x float> undef, <8 x i32> undef, <4 x i32> undef, i32 1, i1 0, i1 0, i1 0, i1 0, i1 1)
   store <4 x float> %r, <4 x float> addrspace(1)* %out
@@ -322,7 +322,7 @@ main_body:
 
 ; GCN-LABEL: {{^}}gather4_f32:
 ; GCN: image_gather4 {{v[0-9]+}}, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}} dmask:0x1 da
-define void @gather4_f32(float addrspace(1)* %out) {
+define amdgpu_kernel void @gather4_f32(float addrspace(1)* %out) {
 main_body:
   %r = call float @llvm.amdgcn.image.gather4.f32.v4f32.v8i32(<4 x float> undef, <8 x i32> undef, <4 x i32> undef, i32 1, i1 0, i1 0, i1 0, i1 0, i1 1)
   store float %r, float addrspace(1)* %out
@@ -331,7 +331,7 @@ main_body:
 
 ; GCN-LABEL: {{^}}gather4_v2f32:
 ; GCN: image_gather4 {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}} dmask:0x3 da
-define void @gather4_v2f32(<2 x float> addrspace(1)* %out) {
+define amdgpu_kernel void @gather4_v2f32(<2 x float> addrspace(1)* %out) {
 main_body:
   %r = call <2 x float> @llvm.amdgcn.image.gather4.v2f32.v4f32.v8i32(<4 x float> undef, <8 x i32> undef, <4 x i32> undef, i32 3, i1 0, i1 0, i1 0, i1 0, i1 1)
   store <2 x float> %r, <2 x float> addrspace(1)* %out