diff options
author | Dimitry Andric <dim@FreeBSD.org> | 2015-01-31 19:27:28 +0000 |
---|---|---|
committer | Dimitry Andric <dim@FreeBSD.org> | 2015-01-31 19:27:28 +0000 |
commit | ec304151b74f9254d7029ee4d197ce1f7cbe501a (patch) | |
tree | 63e4ed55e4fbb581fd4731d44a327a7b3278e0a1 /test/CodeGen/R600 | |
parent | 67c32a98315f785a9ec9d531c1f571a0196c7463 (diff) | |
download | src-ec304151b74f9254d7029ee4d197ce1f7cbe501a.tar.gz src-ec304151b74f9254d7029ee4d197ce1f7cbe501a.zip |
Vendor import of llvm RELEASE_360/rc2 tag r227651 (effectively, 3.6.0 RC2):vendor/llvm/llvm-release_360-r227651
Notes
Notes:
svn path=/vendor/llvm/dist/; revision=277992
svn path=/vendor/llvm/llvm-release_360-r227651/; revision=277993; tag=vendor/llvm/llvm-release_360-r227651
Diffstat (limited to 'test/CodeGen/R600')
-rw-r--r-- | test/CodeGen/R600/basic-loop.ll | 1 | ||||
-rw-r--r-- | test/CodeGen/R600/ctpop.ll | 66 | ||||
-rw-r--r-- | test/CodeGen/R600/ctpop64.ll | 3 | ||||
-rw-r--r-- | test/CodeGen/R600/ds_read2st64.ll | 4 | ||||
-rw-r--r-- | test/CodeGen/R600/fp_to_sint.ll | 15 | ||||
-rw-r--r-- | test/CodeGen/R600/hsa.ll | 2 | ||||
-rw-r--r-- | test/CodeGen/R600/misaligned-load.ll | 18 | ||||
-rw-r--r-- | test/CodeGen/R600/scratch-buffer.ll | 86 | ||||
-rw-r--r-- | test/CodeGen/R600/si-triv-disjoint-mem-access.ll | 2 |
9 files changed, 156 insertions, 41 deletions
diff --git a/test/CodeGen/R600/basic-loop.ll b/test/CodeGen/R600/basic-loop.ll index 72737ae273e6..9d0509b38d8a 100644 --- a/test/CodeGen/R600/basic-loop.ll +++ b/test/CodeGen/R600/basic-loop.ll @@ -1,4 +1,3 @@ -; XFAIL: * ; RUN: llc -O0 -verify-machineinstrs -march=amdgcn -mcpu=SI < %s | FileCheck %s ; CHECK-LABEL: {{^}}test_loop: diff --git a/test/CodeGen/R600/ctpop.ll b/test/CodeGen/R600/ctpop.ll index a47bc876cb96..c64f443ad697 100644 --- a/test/CodeGen/R600/ctpop.ll +++ b/test/CodeGen/R600/ctpop.ll @@ -24,8 +24,7 @@ define void @s_ctpop_i32(i32 addrspace(1)* noalias %out, i32 %val) nounwind { ; XXX - Why 0 in register? ; FUNC-LABEL: {{^}}v_ctpop_i32: ; SI: buffer_load_dword [[VAL:v[0-9]+]], -; SI: v_mov_b32_e32 [[VZERO:v[0-9]+]], 0 -; SI: v_bcnt_u32_b32_e32 [[RESULT:v[0-9]+]], [[VAL]], [[VZERO]] +; SI: v_bcnt_u32_b32_e64 [[RESULT:v[0-9]+]], [[VAL]], 0 ; SI: buffer_store_dword [[RESULT]], ; SI: s_endpgm @@ -40,8 +39,7 @@ define void @v_ctpop_i32(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noali ; FUNC-LABEL: {{^}}v_ctpop_add_chain_i32: ; SI: buffer_load_dword [[VAL0:v[0-9]+]], ; SI: buffer_load_dword [[VAL1:v[0-9]+]], -; SI: v_mov_b32_e32 [[VZERO:v[0-9]+]], 0 -; SI: v_bcnt_u32_b32_e32 [[MIDRESULT:v[0-9]+]], [[VAL1]], [[VZERO]] +; SI: v_bcnt_u32_b32_e64 [[MIDRESULT:v[0-9]+]], [[VAL1]], 0 ; SI-NEXT: v_bcnt_u32_b32_e32 [[RESULT:v[0-9]+]], [[VAL0]], [[MIDRESULT]] ; SI: buffer_store_dword [[RESULT]], ; SI: s_endpgm @@ -73,8 +71,8 @@ define void @v_ctpop_add_sgpr_i32(i32 addrspace(1)* noalias %out, i32 addrspace( } ; FUNC-LABEL: {{^}}v_ctpop_v2i32: -; SI: v_bcnt_u32_b32_e32 -; SI: v_bcnt_u32_b32_e32 +; SI: v_bcnt_u32_b32_e64 +; SI: v_bcnt_u32_b32_e64 ; SI: s_endpgm ; EG: BCNT_INT @@ -87,10 +85,10 @@ define void @v_ctpop_v2i32(<2 x i32> addrspace(1)* noalias %out, <2 x i32> addrs } ; FUNC-LABEL: {{^}}v_ctpop_v4i32: -; SI: v_bcnt_u32_b32_e32 -; SI: v_bcnt_u32_b32_e32 -; SI: v_bcnt_u32_b32_e32 -; SI: v_bcnt_u32_b32_e32 +; SI: v_bcnt_u32_b32_e64 +; SI: v_bcnt_u32_b32_e64 +; SI: v_bcnt_u32_b32_e64 +; SI: v_bcnt_u32_b32_e64 ; SI: s_endpgm ; EG: BCNT_INT @@ -105,14 +103,14 @@ define void @v_ctpop_v4i32(<4 x i32> addrspace(1)* noalias %out, <4 x i32> addrs } ; FUNC-LABEL: {{^}}v_ctpop_v8i32: -; SI: v_bcnt_u32_b32_e32 -; SI: v_bcnt_u32_b32_e32 -; SI: v_bcnt_u32_b32_e32 -; SI: v_bcnt_u32_b32_e32 -; SI: v_bcnt_u32_b32_e32 -; SI: v_bcnt_u32_b32_e32 -; SI: v_bcnt_u32_b32_e32 -; SI: v_bcnt_u32_b32_e32 +; SI: v_bcnt_u32_b32_e64 +; SI: v_bcnt_u32_b32_e64 +; SI: v_bcnt_u32_b32_e64 +; SI: v_bcnt_u32_b32_e64 +; SI: v_bcnt_u32_b32_e64 +; SI: v_bcnt_u32_b32_e64 +; SI: v_bcnt_u32_b32_e64 +; SI: v_bcnt_u32_b32_e64 ; SI: s_endpgm ; EG: BCNT_INT @@ -131,22 +129,22 @@ define void @v_ctpop_v8i32(<8 x i32> addrspace(1)* noalias %out, <8 x i32> addrs } ; FUNC-LABEL: {{^}}v_ctpop_v16i32: -; SI: v_bcnt_u32_b32_e32 -; SI: v_bcnt_u32_b32_e32 -; SI: v_bcnt_u32_b32_e32 -; SI: v_bcnt_u32_b32_e32 -; SI: v_bcnt_u32_b32_e32 -; SI: v_bcnt_u32_b32_e32 -; SI: v_bcnt_u32_b32_e32 -; SI: v_bcnt_u32_b32_e32 -; SI: v_bcnt_u32_b32_e32 -; SI: v_bcnt_u32_b32_e32 -; SI: v_bcnt_u32_b32_e32 -; SI: v_bcnt_u32_b32_e32 -; SI: v_bcnt_u32_b32_e32 -; SI: v_bcnt_u32_b32_e32 -; SI: v_bcnt_u32_b32_e32 -; SI: v_bcnt_u32_b32_e32 +; SI: v_bcnt_u32_b32_e64 +; SI: v_bcnt_u32_b32_e64 +; SI: v_bcnt_u32_b32_e64 +; SI: v_bcnt_u32_b32_e64 +; SI: v_bcnt_u32_b32_e64 +; SI: v_bcnt_u32_b32_e64 +; SI: v_bcnt_u32_b32_e64 +; SI: v_bcnt_u32_b32_e64 +; SI: v_bcnt_u32_b32_e64 +; SI: v_bcnt_u32_b32_e64 +; SI: v_bcnt_u32_b32_e64 +; SI: v_bcnt_u32_b32_e64 +; SI: v_bcnt_u32_b32_e64 +; SI: v_bcnt_u32_b32_e64 +; SI: v_bcnt_u32_b32_e64 +; SI: v_bcnt_u32_b32_e64 ; SI: s_endpgm ; EG: BCNT_INT diff --git a/test/CodeGen/R600/ctpop64.ll b/test/CodeGen/R600/ctpop64.ll index 8dfe571d3477..9758ac96ea9b 100644 --- a/test/CodeGen/R600/ctpop64.ll +++ b/test/CodeGen/R600/ctpop64.ll @@ -21,8 +21,7 @@ define void @s_ctpop_i64(i32 addrspace(1)* noalias %out, i64 %val) nounwind { ; FUNC-LABEL: {{^}}v_ctpop_i64: ; SI: buffer_load_dwordx2 v{{\[}}[[LOVAL:[0-9]+]]:[[HIVAL:[0-9]+]]{{\]}}, -; SI: v_mov_b32_e32 [[VZERO:v[0-9]+]], 0 -; SI: v_bcnt_u32_b32_e32 [[MIDRESULT:v[0-9]+]], v[[LOVAL]], [[VZERO]] +; SI: v_bcnt_u32_b32_e64 [[MIDRESULT:v[0-9]+]], v[[LOVAL]], 0 ; SI-NEXT: v_bcnt_u32_b32_e32 [[RESULT:v[0-9]+]], v[[HIVAL]], [[MIDRESULT]] ; SI: buffer_store_dword [[RESULT]], ; SI: s_endpgm diff --git a/test/CodeGen/R600/ds_read2st64.ll b/test/CodeGen/R600/ds_read2st64.ll index 24834af20404..efd875e93176 100644 --- a/test/CodeGen/R600/ds_read2st64.ll +++ b/test/CodeGen/R600/ds_read2st64.ll @@ -65,8 +65,8 @@ define void @simple_read2st64_f32_max_offset(float addrspace(1)* %out, float add ; SI-LABEL: @simple_read2st64_f32_over_max_offset ; SI-NOT: ds_read2st64_b32 -; SI: ds_read_b32 {{v[0-9]+}}, {{v[0-9]+}} offset:256 ; SI: v_add_i32_e32 [[BIGADD:v[0-9]+]], 0x10000, {{v[0-9]+}} +; SI: ds_read_b32 {{v[0-9]+}}, {{v[0-9]+}} offset:256 ; SI: ds_read_b32 {{v[0-9]+}}, [[BIGADD]] ; SI: s_endpgm define void @simple_read2st64_f32_over_max_offset(float addrspace(1)* %out, float addrspace(3)* %lds) #0 { @@ -197,8 +197,8 @@ define void @simple_read2st64_f64_max_offset(double addrspace(1)* %out, double a ; SI-LABEL: @simple_read2st64_f64_over_max_offset ; SI-NOT: ds_read2st64_b64 -; SI: ds_read_b64 {{v\[[0-9]+:[0-9]+\]}}, {{v[0-9]+}} offset:512 ; SI: v_add_i32_e32 [[BIGADD:v[0-9]+]], 0x10000, {{v[0-9]+}} +; SI: ds_read_b64 {{v\[[0-9]+:[0-9]+\]}}, {{v[0-9]+}} offset:512 ; SI: ds_read_b64 {{v\[[0-9]+:[0-9]+\]}}, [[BIGADD]] ; SI: s_endpgm define void @simple_read2st64_f64_over_max_offset(double addrspace(1)* %out, double addrspace(3)* %lds) #0 { diff --git a/test/CodeGen/R600/fp_to_sint.ll b/test/CodeGen/R600/fp_to_sint.ll index 35cfb03d39b4..d76e8a341c6f 100644 --- a/test/CodeGen/R600/fp_to_sint.ll +++ b/test/CodeGen/R600/fp_to_sint.ll @@ -1,16 +1,27 @@ ; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck %s --check-prefix=EG --check-prefix=FUNC ; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck %s --check-prefix=SI --check-prefix=FUNC +declare float @llvm.fabs.f32(float) #0 + ; FUNC-LABEL: {{^}}fp_to_sint_i32: ; EG: FLT_TO_INT {{\** *}}T{{[0-9]+\.[XYZW], PV\.[XYZW]}} ; SI: v_cvt_i32_f32_e32 ; SI: s_endpgm -define void @fp_to_sint_i32 (i32 addrspace(1)* %out, float %in) { +define void @fp_to_sint_i32(i32 addrspace(1)* %out, float %in) { %conv = fptosi float %in to i32 store i32 %conv, i32 addrspace(1)* %out ret void } +; FUNC-LABEL: {{^}}fp_to_sint_i32_fabs: +; SI: v_cvt_i32_f32_e64 v{{[0-9]+}}, |s{{[0-9]+}}|{{$}} +define void @fp_to_sint_i32_fabs(i32 addrspace(1)* %out, float %in) { + %in.fabs = call float @llvm.fabs.f32(float %in) #0 + %conv = fptosi float %in.fabs to i32 + store i32 %conv, i32 addrspace(1)* %out + ret void +} + ; FUNC-LABEL: {{^}}fp_to_sint_v2i32: ; EG: FLT_TO_INT {{\** *}}T{{[0-9]+\.[XYZW], PV\.[XYZW]}} ; EG: FLT_TO_INT {{\** *}}T{{[0-9]+\.[XYZW], PV\.[XYZW]}} @@ -214,3 +225,5 @@ define void @fp_to_sint_v4i64(<4 x i64> addrspace(1)* %out, <4 x float> %x) { store <4 x i64> %conv, <4 x i64> addrspace(1)* %out ret void } + +attributes #0 = { nounwind readnone } diff --git a/test/CodeGen/R600/hsa.ll b/test/CodeGen/R600/hsa.ll index 2e79866362ac..5ce3beaa16c0 100644 --- a/test/CodeGen/R600/hsa.ll +++ b/test/CodeGen/R600/hsa.ll @@ -1,6 +1,8 @@ ; RUN: llc < %s -mtriple=r600--amdhsa -mcpu=kaveri | FileCheck --check-prefix=HSA %s ; HSA: {{^}}simple: +; HSA: .section .hsa.version +; HSA-NEXT: .ascii "HSA Code Unit:0.0:AMD:0.1:GFX8.1:0" ; Make sure we are setting the ATC bit: ; HSA: s_mov_b32 s[[HI:[0-9]]], 0x100f000 ; HSA: buffer_store_dword v{{[0-9]+}}, s[0:[[HI]]], 0 diff --git a/test/CodeGen/R600/misaligned-load.ll b/test/CodeGen/R600/misaligned-load.ll new file mode 100644 index 000000000000..6290ca09d502 --- /dev/null +++ b/test/CodeGen/R600/misaligned-load.ll @@ -0,0 +1,18 @@ +; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s + +; SI: @byte_aligned_load64 +; SI: ds_read_u8 +; SI: ds_read_u8 +; SI: ds_read_u8 +; SI: ds_read_u8 +; SI: ds_read_u8 +; SI: ds_read_u8 +; SI: ds_read_u8 +; SI: ds_read_u8 +; SI: s_endpgm +define void @byte_aligned_load64(i64 addrspace(1)* %out, i64 addrspace(3)* %in) { +entry: + %0 = load i64 addrspace(3)* %in, align 1 + store i64 %0, i64 addrspace(1)* %out + ret void +} diff --git a/test/CodeGen/R600/scratch-buffer.ll b/test/CodeGen/R600/scratch-buffer.ll new file mode 100644 index 000000000000..740328a495da --- /dev/null +++ b/test/CodeGen/R600/scratch-buffer.ll @@ -0,0 +1,86 @@ +; RUN: llc -verify-machineinstrs -march=amdgcn -mcpu=SI < %s | FileCheck %s + +; When a frame index offset is more than 12-bits, make sure we don't store +; it in mubuf's offset field. + +; Also, make sure we use the same register for storing the scratch buffer addresss +; for both stores. This register is allocated by the register scavenger, so we +; should be able to reuse the same regiser for each scratch buffer access. + +; CHECK-LABEL: {{^}}legal_offset_fi: +; CHECK: v_mov_b32_e32 [[OFFSET:v[0-9]+]], 0{{$}} +; CHECK: buffer_store_dword v{{[0-9]+}}, [[OFFSET]], s[{{[0-9]+}}:{{[0-9]+}}], s{{[0-9]+}} offen +; CHECK: v_mov_b32_e32 [[OFFSET]], 0x8000 +; CHECK: buffer_store_dword v{{[0-9]+}}, [[OFFSET]], s[{{[0-9]+}}:{{[0-9]+}}], s{{[0-9]+}} offen{{$}} + +define void @legal_offset_fi(i32 addrspace(1)* %out, i32 %cond, i32 %if_offset, i32 %else_offset) { +entry: + %scratch0 = alloca [8192 x i32] + %scratch1 = alloca [8192 x i32] + + %scratchptr0 = getelementptr [8192 x i32]* %scratch0, i32 0, i32 0 + store i32 1, i32* %scratchptr0 + + %scratchptr1 = getelementptr [8192 x i32]* %scratch1, i32 0, i32 0 + store i32 2, i32* %scratchptr1 + + %cmp = icmp eq i32 %cond, 0 + br i1 %cmp, label %if, label %else + +if: + %if_ptr = getelementptr [8192 x i32]* %scratch0, i32 0, i32 %if_offset + %if_value = load i32* %if_ptr + br label %done + +else: + %else_ptr = getelementptr [8192 x i32]* %scratch1, i32 0, i32 %else_offset + %else_value = load i32* %else_ptr + br label %done + +done: + %value = phi i32 [%if_value, %if], [%else_value, %else] + store i32 %value, i32 addrspace(1)* %out + ret void + + ret void + +} + +; CHECK-LABEL: {{^}}legal_offset_fi_offset +; CHECK: buffer_store_dword v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}], s{{[0-9]+}} offen +; CHECK: v_add_i32_e32 [[OFFSET:v[0-9]+]], 0x8000 +; CHECK: buffer_store_dword v{{[0-9]+}}, [[OFFSET]], s[{{[0-9]+}}:{{[0-9]+}}], s{{[0-9]+}} offen{{$}} + +define void @legal_offset_fi_offset(i32 addrspace(1)* %out, i32 %cond, i32 addrspace(1)* %offsets, i32 %if_offset, i32 %else_offset) { +entry: + %scratch0 = alloca [8192 x i32] + %scratch1 = alloca [8192 x i32] + + %offset0 = load i32 addrspace(1)* %offsets + %scratchptr0 = getelementptr [8192 x i32]* %scratch0, i32 0, i32 %offset0 + store i32 %offset0, i32* %scratchptr0 + + %offsetptr1 = getelementptr i32 addrspace(1)* %offsets, i32 1 + %offset1 = load i32 addrspace(1)* %offsetptr1 + %scratchptr1 = getelementptr [8192 x i32]* %scratch1, i32 0, i32 %offset1 + store i32 %offset1, i32* %scratchptr1 + + %cmp = icmp eq i32 %cond, 0 + br i1 %cmp, label %if, label %else + +if: + %if_ptr = getelementptr [8192 x i32]* %scratch0, i32 0, i32 %if_offset + %if_value = load i32* %if_ptr + br label %done + +else: + %else_ptr = getelementptr [8192 x i32]* %scratch1, i32 0, i32 %else_offset + %else_value = load i32* %else_ptr + br label %done + +done: + %value = phi i32 [%if_value, %if], [%else_value, %else] + store i32 %value, i32 addrspace(1)* %out + ret void +} + diff --git a/test/CodeGen/R600/si-triv-disjoint-mem-access.ll b/test/CodeGen/R600/si-triv-disjoint-mem-access.ll index b2f4a9ff05e1..f6dcb388248a 100644 --- a/test/CodeGen/R600/si-triv-disjoint-mem-access.ll +++ b/test/CodeGen/R600/si-triv-disjoint-mem-access.ll @@ -51,8 +51,8 @@ define void @no_reorder_local_load_volatile_global_store_local_load(i32 addrspac ; FUNC-LABEL: @no_reorder_barrier_local_load_global_store_local_load ; CI: ds_read_b32 {{v[0-9]+}}, {{v[0-9]+}} offset:4 -; CI: buffer_store_dword ; CI: ds_read_b32 {{v[0-9]+}}, {{v[0-9]+}} offset:8 +; CI: buffer_store_dword define void @no_reorder_barrier_local_load_global_store_local_load(i32 addrspace(1)* %out, i32 addrspace(1)* %gptr) #0 { %ptr0 = load i32 addrspace(3)* addrspace(3)* @stored_lds_ptr, align 4 |