aboutsummaryrefslogtreecommitdiff
path: root/test/CodeGen/R600
diff options
context:
space:
mode:
authorDimitry Andric <dim@FreeBSD.org>2015-01-31 19:27:28 +0000
committerDimitry Andric <dim@FreeBSD.org>2015-01-31 19:27:28 +0000
commitec304151b74f9254d7029ee4d197ce1f7cbe501a (patch)
tree63e4ed55e4fbb581fd4731d44a327a7b3278e0a1 /test/CodeGen/R600
parent67c32a98315f785a9ec9d531c1f571a0196c7463 (diff)
downloadsrc-ec304151b74f9254d7029ee4d197ce1f7cbe501a.tar.gz
src-ec304151b74f9254d7029ee4d197ce1f7cbe501a.zip
Vendor import of llvm RELEASE_360/rc2 tag r227651 (effectively, 3.6.0 RC2):vendor/llvm/llvm-release_360-r227651
Notes
Notes: svn path=/vendor/llvm/dist/; revision=277992 svn path=/vendor/llvm/llvm-release_360-r227651/; revision=277993; tag=vendor/llvm/llvm-release_360-r227651
Diffstat (limited to 'test/CodeGen/R600')
-rw-r--r--test/CodeGen/R600/basic-loop.ll1
-rw-r--r--test/CodeGen/R600/ctpop.ll66
-rw-r--r--test/CodeGen/R600/ctpop64.ll3
-rw-r--r--test/CodeGen/R600/ds_read2st64.ll4
-rw-r--r--test/CodeGen/R600/fp_to_sint.ll15
-rw-r--r--test/CodeGen/R600/hsa.ll2
-rw-r--r--test/CodeGen/R600/misaligned-load.ll18
-rw-r--r--test/CodeGen/R600/scratch-buffer.ll86
-rw-r--r--test/CodeGen/R600/si-triv-disjoint-mem-access.ll2
9 files changed, 156 insertions, 41 deletions
diff --git a/test/CodeGen/R600/basic-loop.ll b/test/CodeGen/R600/basic-loop.ll
index 72737ae273e6..9d0509b38d8a 100644
--- a/test/CodeGen/R600/basic-loop.ll
+++ b/test/CodeGen/R600/basic-loop.ll
@@ -1,4 +1,3 @@
-; XFAIL: *
; RUN: llc -O0 -verify-machineinstrs -march=amdgcn -mcpu=SI < %s | FileCheck %s
; CHECK-LABEL: {{^}}test_loop:
diff --git a/test/CodeGen/R600/ctpop.ll b/test/CodeGen/R600/ctpop.ll
index a47bc876cb96..c64f443ad697 100644
--- a/test/CodeGen/R600/ctpop.ll
+++ b/test/CodeGen/R600/ctpop.ll
@@ -24,8 +24,7 @@ define void @s_ctpop_i32(i32 addrspace(1)* noalias %out, i32 %val) nounwind {
; XXX - Why 0 in register?
; FUNC-LABEL: {{^}}v_ctpop_i32:
; SI: buffer_load_dword [[VAL:v[0-9]+]],
-; SI: v_mov_b32_e32 [[VZERO:v[0-9]+]], 0
-; SI: v_bcnt_u32_b32_e32 [[RESULT:v[0-9]+]], [[VAL]], [[VZERO]]
+; SI: v_bcnt_u32_b32_e64 [[RESULT:v[0-9]+]], [[VAL]], 0
; SI: buffer_store_dword [[RESULT]],
; SI: s_endpgm
@@ -40,8 +39,7 @@ define void @v_ctpop_i32(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noali
; FUNC-LABEL: {{^}}v_ctpop_add_chain_i32:
; SI: buffer_load_dword [[VAL0:v[0-9]+]],
; SI: buffer_load_dword [[VAL1:v[0-9]+]],
-; SI: v_mov_b32_e32 [[VZERO:v[0-9]+]], 0
-; SI: v_bcnt_u32_b32_e32 [[MIDRESULT:v[0-9]+]], [[VAL1]], [[VZERO]]
+; SI: v_bcnt_u32_b32_e64 [[MIDRESULT:v[0-9]+]], [[VAL1]], 0
; SI-NEXT: v_bcnt_u32_b32_e32 [[RESULT:v[0-9]+]], [[VAL0]], [[MIDRESULT]]
; SI: buffer_store_dword [[RESULT]],
; SI: s_endpgm
@@ -73,8 +71,8 @@ define void @v_ctpop_add_sgpr_i32(i32 addrspace(1)* noalias %out, i32 addrspace(
}
; FUNC-LABEL: {{^}}v_ctpop_v2i32:
-; SI: v_bcnt_u32_b32_e32
-; SI: v_bcnt_u32_b32_e32
+; SI: v_bcnt_u32_b32_e64
+; SI: v_bcnt_u32_b32_e64
; SI: s_endpgm
; EG: BCNT_INT
@@ -87,10 +85,10 @@ define void @v_ctpop_v2i32(<2 x i32> addrspace(1)* noalias %out, <2 x i32> addrs
}
; FUNC-LABEL: {{^}}v_ctpop_v4i32:
-; SI: v_bcnt_u32_b32_e32
-; SI: v_bcnt_u32_b32_e32
-; SI: v_bcnt_u32_b32_e32
-; SI: v_bcnt_u32_b32_e32
+; SI: v_bcnt_u32_b32_e64
+; SI: v_bcnt_u32_b32_e64
+; SI: v_bcnt_u32_b32_e64
+; SI: v_bcnt_u32_b32_e64
; SI: s_endpgm
; EG: BCNT_INT
@@ -105,14 +103,14 @@ define void @v_ctpop_v4i32(<4 x i32> addrspace(1)* noalias %out, <4 x i32> addrs
}
; FUNC-LABEL: {{^}}v_ctpop_v8i32:
-; SI: v_bcnt_u32_b32_e32
-; SI: v_bcnt_u32_b32_e32
-; SI: v_bcnt_u32_b32_e32
-; SI: v_bcnt_u32_b32_e32
-; SI: v_bcnt_u32_b32_e32
-; SI: v_bcnt_u32_b32_e32
-; SI: v_bcnt_u32_b32_e32
-; SI: v_bcnt_u32_b32_e32
+; SI: v_bcnt_u32_b32_e64
+; SI: v_bcnt_u32_b32_e64
+; SI: v_bcnt_u32_b32_e64
+; SI: v_bcnt_u32_b32_e64
+; SI: v_bcnt_u32_b32_e64
+; SI: v_bcnt_u32_b32_e64
+; SI: v_bcnt_u32_b32_e64
+; SI: v_bcnt_u32_b32_e64
; SI: s_endpgm
; EG: BCNT_INT
@@ -131,22 +129,22 @@ define void @v_ctpop_v8i32(<8 x i32> addrspace(1)* noalias %out, <8 x i32> addrs
}
; FUNC-LABEL: {{^}}v_ctpop_v16i32:
-; SI: v_bcnt_u32_b32_e32
-; SI: v_bcnt_u32_b32_e32
-; SI: v_bcnt_u32_b32_e32
-; SI: v_bcnt_u32_b32_e32
-; SI: v_bcnt_u32_b32_e32
-; SI: v_bcnt_u32_b32_e32
-; SI: v_bcnt_u32_b32_e32
-; SI: v_bcnt_u32_b32_e32
-; SI: v_bcnt_u32_b32_e32
-; SI: v_bcnt_u32_b32_e32
-; SI: v_bcnt_u32_b32_e32
-; SI: v_bcnt_u32_b32_e32
-; SI: v_bcnt_u32_b32_e32
-; SI: v_bcnt_u32_b32_e32
-; SI: v_bcnt_u32_b32_e32
-; SI: v_bcnt_u32_b32_e32
+; SI: v_bcnt_u32_b32_e64
+; SI: v_bcnt_u32_b32_e64
+; SI: v_bcnt_u32_b32_e64
+; SI: v_bcnt_u32_b32_e64
+; SI: v_bcnt_u32_b32_e64
+; SI: v_bcnt_u32_b32_e64
+; SI: v_bcnt_u32_b32_e64
+; SI: v_bcnt_u32_b32_e64
+; SI: v_bcnt_u32_b32_e64
+; SI: v_bcnt_u32_b32_e64
+; SI: v_bcnt_u32_b32_e64
+; SI: v_bcnt_u32_b32_e64
+; SI: v_bcnt_u32_b32_e64
+; SI: v_bcnt_u32_b32_e64
+; SI: v_bcnt_u32_b32_e64
+; SI: v_bcnt_u32_b32_e64
; SI: s_endpgm
; EG: BCNT_INT
diff --git a/test/CodeGen/R600/ctpop64.ll b/test/CodeGen/R600/ctpop64.ll
index 8dfe571d3477..9758ac96ea9b 100644
--- a/test/CodeGen/R600/ctpop64.ll
+++ b/test/CodeGen/R600/ctpop64.ll
@@ -21,8 +21,7 @@ define void @s_ctpop_i64(i32 addrspace(1)* noalias %out, i64 %val) nounwind {
; FUNC-LABEL: {{^}}v_ctpop_i64:
; SI: buffer_load_dwordx2 v{{\[}}[[LOVAL:[0-9]+]]:[[HIVAL:[0-9]+]]{{\]}},
-; SI: v_mov_b32_e32 [[VZERO:v[0-9]+]], 0
-; SI: v_bcnt_u32_b32_e32 [[MIDRESULT:v[0-9]+]], v[[LOVAL]], [[VZERO]]
+; SI: v_bcnt_u32_b32_e64 [[MIDRESULT:v[0-9]+]], v[[LOVAL]], 0
; SI-NEXT: v_bcnt_u32_b32_e32 [[RESULT:v[0-9]+]], v[[HIVAL]], [[MIDRESULT]]
; SI: buffer_store_dword [[RESULT]],
; SI: s_endpgm
diff --git a/test/CodeGen/R600/ds_read2st64.ll b/test/CodeGen/R600/ds_read2st64.ll
index 24834af20404..efd875e93176 100644
--- a/test/CodeGen/R600/ds_read2st64.ll
+++ b/test/CodeGen/R600/ds_read2st64.ll
@@ -65,8 +65,8 @@ define void @simple_read2st64_f32_max_offset(float addrspace(1)* %out, float add
; SI-LABEL: @simple_read2st64_f32_over_max_offset
; SI-NOT: ds_read2st64_b32
-; SI: ds_read_b32 {{v[0-9]+}}, {{v[0-9]+}} offset:256
; SI: v_add_i32_e32 [[BIGADD:v[0-9]+]], 0x10000, {{v[0-9]+}}
+; SI: ds_read_b32 {{v[0-9]+}}, {{v[0-9]+}} offset:256
; SI: ds_read_b32 {{v[0-9]+}}, [[BIGADD]]
; SI: s_endpgm
define void @simple_read2st64_f32_over_max_offset(float addrspace(1)* %out, float addrspace(3)* %lds) #0 {
@@ -197,8 +197,8 @@ define void @simple_read2st64_f64_max_offset(double addrspace(1)* %out, double a
; SI-LABEL: @simple_read2st64_f64_over_max_offset
; SI-NOT: ds_read2st64_b64
-; SI: ds_read_b64 {{v\[[0-9]+:[0-9]+\]}}, {{v[0-9]+}} offset:512
; SI: v_add_i32_e32 [[BIGADD:v[0-9]+]], 0x10000, {{v[0-9]+}}
+; SI: ds_read_b64 {{v\[[0-9]+:[0-9]+\]}}, {{v[0-9]+}} offset:512
; SI: ds_read_b64 {{v\[[0-9]+:[0-9]+\]}}, [[BIGADD]]
; SI: s_endpgm
define void @simple_read2st64_f64_over_max_offset(double addrspace(1)* %out, double addrspace(3)* %lds) #0 {
diff --git a/test/CodeGen/R600/fp_to_sint.ll b/test/CodeGen/R600/fp_to_sint.ll
index 35cfb03d39b4..d76e8a341c6f 100644
--- a/test/CodeGen/R600/fp_to_sint.ll
+++ b/test/CodeGen/R600/fp_to_sint.ll
@@ -1,16 +1,27 @@
; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck %s --check-prefix=EG --check-prefix=FUNC
; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck %s --check-prefix=SI --check-prefix=FUNC
+declare float @llvm.fabs.f32(float) #0
+
; FUNC-LABEL: {{^}}fp_to_sint_i32:
; EG: FLT_TO_INT {{\** *}}T{{[0-9]+\.[XYZW], PV\.[XYZW]}}
; SI: v_cvt_i32_f32_e32
; SI: s_endpgm
-define void @fp_to_sint_i32 (i32 addrspace(1)* %out, float %in) {
+define void @fp_to_sint_i32(i32 addrspace(1)* %out, float %in) {
%conv = fptosi float %in to i32
store i32 %conv, i32 addrspace(1)* %out
ret void
}
+; FUNC-LABEL: {{^}}fp_to_sint_i32_fabs:
+; SI: v_cvt_i32_f32_e64 v{{[0-9]+}}, |s{{[0-9]+}}|{{$}}
+define void @fp_to_sint_i32_fabs(i32 addrspace(1)* %out, float %in) {
+ %in.fabs = call float @llvm.fabs.f32(float %in) #0
+ %conv = fptosi float %in.fabs to i32
+ store i32 %conv, i32 addrspace(1)* %out
+ ret void
+}
+
; FUNC-LABEL: {{^}}fp_to_sint_v2i32:
; EG: FLT_TO_INT {{\** *}}T{{[0-9]+\.[XYZW], PV\.[XYZW]}}
; EG: FLT_TO_INT {{\** *}}T{{[0-9]+\.[XYZW], PV\.[XYZW]}}
@@ -214,3 +225,5 @@ define void @fp_to_sint_v4i64(<4 x i64> addrspace(1)* %out, <4 x float> %x) {
store <4 x i64> %conv, <4 x i64> addrspace(1)* %out
ret void
}
+
+attributes #0 = { nounwind readnone }
diff --git a/test/CodeGen/R600/hsa.ll b/test/CodeGen/R600/hsa.ll
index 2e79866362ac..5ce3beaa16c0 100644
--- a/test/CodeGen/R600/hsa.ll
+++ b/test/CodeGen/R600/hsa.ll
@@ -1,6 +1,8 @@
; RUN: llc < %s -mtriple=r600--amdhsa -mcpu=kaveri | FileCheck --check-prefix=HSA %s
; HSA: {{^}}simple:
+; HSA: .section .hsa.version
+; HSA-NEXT: .ascii "HSA Code Unit:0.0:AMD:0.1:GFX8.1:0"
; Make sure we are setting the ATC bit:
; HSA: s_mov_b32 s[[HI:[0-9]]], 0x100f000
; HSA: buffer_store_dword v{{[0-9]+}}, s[0:[[HI]]], 0
diff --git a/test/CodeGen/R600/misaligned-load.ll b/test/CodeGen/R600/misaligned-load.ll
new file mode 100644
index 000000000000..6290ca09d502
--- /dev/null
+++ b/test/CodeGen/R600/misaligned-load.ll
@@ -0,0 +1,18 @@
+; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s
+
+; SI: @byte_aligned_load64
+; SI: ds_read_u8
+; SI: ds_read_u8
+; SI: ds_read_u8
+; SI: ds_read_u8
+; SI: ds_read_u8
+; SI: ds_read_u8
+; SI: ds_read_u8
+; SI: ds_read_u8
+; SI: s_endpgm
+define void @byte_aligned_load64(i64 addrspace(1)* %out, i64 addrspace(3)* %in) {
+entry:
+ %0 = load i64 addrspace(3)* %in, align 1
+ store i64 %0, i64 addrspace(1)* %out
+ ret void
+}
diff --git a/test/CodeGen/R600/scratch-buffer.ll b/test/CodeGen/R600/scratch-buffer.ll
new file mode 100644
index 000000000000..740328a495da
--- /dev/null
+++ b/test/CodeGen/R600/scratch-buffer.ll
@@ -0,0 +1,86 @@
+; RUN: llc -verify-machineinstrs -march=amdgcn -mcpu=SI < %s | FileCheck %s
+
+; When a frame index offset is more than 12-bits, make sure we don't store
+; it in mubuf's offset field.
+
+; Also, make sure we use the same register for storing the scratch buffer addresss
+; for both stores. This register is allocated by the register scavenger, so we
+; should be able to reuse the same regiser for each scratch buffer access.
+
+; CHECK-LABEL: {{^}}legal_offset_fi:
+; CHECK: v_mov_b32_e32 [[OFFSET:v[0-9]+]], 0{{$}}
+; CHECK: buffer_store_dword v{{[0-9]+}}, [[OFFSET]], s[{{[0-9]+}}:{{[0-9]+}}], s{{[0-9]+}} offen
+; CHECK: v_mov_b32_e32 [[OFFSET]], 0x8000
+; CHECK: buffer_store_dword v{{[0-9]+}}, [[OFFSET]], s[{{[0-9]+}}:{{[0-9]+}}], s{{[0-9]+}} offen{{$}}
+
+define void @legal_offset_fi(i32 addrspace(1)* %out, i32 %cond, i32 %if_offset, i32 %else_offset) {
+entry:
+ %scratch0 = alloca [8192 x i32]
+ %scratch1 = alloca [8192 x i32]
+
+ %scratchptr0 = getelementptr [8192 x i32]* %scratch0, i32 0, i32 0
+ store i32 1, i32* %scratchptr0
+
+ %scratchptr1 = getelementptr [8192 x i32]* %scratch1, i32 0, i32 0
+ store i32 2, i32* %scratchptr1
+
+ %cmp = icmp eq i32 %cond, 0
+ br i1 %cmp, label %if, label %else
+
+if:
+ %if_ptr = getelementptr [8192 x i32]* %scratch0, i32 0, i32 %if_offset
+ %if_value = load i32* %if_ptr
+ br label %done
+
+else:
+ %else_ptr = getelementptr [8192 x i32]* %scratch1, i32 0, i32 %else_offset
+ %else_value = load i32* %else_ptr
+ br label %done
+
+done:
+ %value = phi i32 [%if_value, %if], [%else_value, %else]
+ store i32 %value, i32 addrspace(1)* %out
+ ret void
+
+ ret void
+
+}
+
+; CHECK-LABEL: {{^}}legal_offset_fi_offset
+; CHECK: buffer_store_dword v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}], s{{[0-9]+}} offen
+; CHECK: v_add_i32_e32 [[OFFSET:v[0-9]+]], 0x8000
+; CHECK: buffer_store_dword v{{[0-9]+}}, [[OFFSET]], s[{{[0-9]+}}:{{[0-9]+}}], s{{[0-9]+}} offen{{$}}
+
+define void @legal_offset_fi_offset(i32 addrspace(1)* %out, i32 %cond, i32 addrspace(1)* %offsets, i32 %if_offset, i32 %else_offset) {
+entry:
+ %scratch0 = alloca [8192 x i32]
+ %scratch1 = alloca [8192 x i32]
+
+ %offset0 = load i32 addrspace(1)* %offsets
+ %scratchptr0 = getelementptr [8192 x i32]* %scratch0, i32 0, i32 %offset0
+ store i32 %offset0, i32* %scratchptr0
+
+ %offsetptr1 = getelementptr i32 addrspace(1)* %offsets, i32 1
+ %offset1 = load i32 addrspace(1)* %offsetptr1
+ %scratchptr1 = getelementptr [8192 x i32]* %scratch1, i32 0, i32 %offset1
+ store i32 %offset1, i32* %scratchptr1
+
+ %cmp = icmp eq i32 %cond, 0
+ br i1 %cmp, label %if, label %else
+
+if:
+ %if_ptr = getelementptr [8192 x i32]* %scratch0, i32 0, i32 %if_offset
+ %if_value = load i32* %if_ptr
+ br label %done
+
+else:
+ %else_ptr = getelementptr [8192 x i32]* %scratch1, i32 0, i32 %else_offset
+ %else_value = load i32* %else_ptr
+ br label %done
+
+done:
+ %value = phi i32 [%if_value, %if], [%else_value, %else]
+ store i32 %value, i32 addrspace(1)* %out
+ ret void
+}
+
diff --git a/test/CodeGen/R600/si-triv-disjoint-mem-access.ll b/test/CodeGen/R600/si-triv-disjoint-mem-access.ll
index b2f4a9ff05e1..f6dcb388248a 100644
--- a/test/CodeGen/R600/si-triv-disjoint-mem-access.ll
+++ b/test/CodeGen/R600/si-triv-disjoint-mem-access.ll
@@ -51,8 +51,8 @@ define void @no_reorder_local_load_volatile_global_store_local_load(i32 addrspac
; FUNC-LABEL: @no_reorder_barrier_local_load_global_store_local_load
; CI: ds_read_b32 {{v[0-9]+}}, {{v[0-9]+}} offset:4
-; CI: buffer_store_dword
; CI: ds_read_b32 {{v[0-9]+}}, {{v[0-9]+}} offset:8
+; CI: buffer_store_dword
define void @no_reorder_barrier_local_load_global_store_local_load(i32 addrspace(1)* %out, i32 addrspace(1)* %gptr) #0 {
%ptr0 = load i32 addrspace(3)* addrspace(3)* @stored_lds_ptr, align 4