From ee8648bdac07986a0f1ec897b02ec82a2f144d46 Mon Sep 17 00:00:00 2001 From: Dimitry Andric Date: Fri, 7 Aug 2015 23:01:33 +0000 Subject: Vendor import of llvm trunk r242221: https://llvm.org/svn/llvm-project/llvm/trunk@242221 --- test/CodeGen/AMDGPU/ds_write2.ll | 17 +++++++++++++++-- 1 file changed, 15 insertions(+), 2 deletions(-) (limited to 'test/CodeGen/AMDGPU/ds_write2.ll') diff --git a/test/CodeGen/AMDGPU/ds_write2.ll b/test/CodeGen/AMDGPU/ds_write2.ll index b553d3459e40..d4973e377b59 100644 --- a/test/CodeGen/AMDGPU/ds_write2.ll +++ b/test/CodeGen/AMDGPU/ds_write2.ll @@ -1,4 +1,4 @@ -; RUN: llc -march=amdgcn -mcpu=bonaire -verify-machineinstrs -mattr=+load-store-opt -enable-misched < %s | FileCheck -strict-whitespace -check-prefix=SI %s +; RUN: llc -march=amdgcn -mcpu=bonaire -verify-machineinstrs -mattr=+load-store-opt < %s | FileCheck -strict-whitespace -check-prefix=SI %s @lds = addrspace(3) global [512 x float] undef, align 4 @lds.f64 = addrspace(3) global [512 x double] undef, align 8 @@ -25,7 +25,7 @@ define void @simple_write2_one_val_f32(float addrspace(1)* %C, float addrspace(1 ; SI-DAG: buffer_load_dword [[VAL0:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}} ; SI-DAG: buffer_load_dword [[VAL1:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4 ; SI-DAG: v_lshlrev_b32_e32 [[VPTR:v[0-9]+]], 2, v{{[0-9]+}} -; SI: ds_write2_b32 [[VPTR]], [[VAL0]], [[VAL1]] offset1:8 +; SI: ds_write2_b32 [[VPTR]], [[VAL0]], [[VAL1]] offset1:8 ; SI: s_endpgm define void @simple_write2_two_val_f32(float addrspace(1)* %C, float addrspace(1)* %in) #0 { %x.i = tail call i32 @llvm.r600.read.tidig.x() #1 @@ -405,6 +405,19 @@ define void @write2_sgemm_sequence(float addrspace(1)* %C, i32 %lda, i32 %ldb, f ret void } +; CI-LABEL: {{^}}simple_write2_v4f32_superreg_align4: +; CI: ds_write2_b32 {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} offset0:3 offset1:2{{$}} +; CI: ds_write2_b32 {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} offset0:1{{$}} +; CI: s_endpgm +define void @simple_write2_v4f32_superreg_align4(<4 x float> addrspace(3)* %out, <4 x float> addrspace(1)* %in) #0 { + %x.i = tail call i32 @llvm.r600.read.tidig.x() #1 + %in.gep = getelementptr inbounds <4 x float>, <4 x float> addrspace(1)* %in + %val0 = load <4 x float>, <4 x float> addrspace(1)* %in.gep, align 4 + %out.gep = getelementptr inbounds <4 x float>, <4 x float> addrspace(3)* %out, i32 %x.i + store <4 x float> %val0, <4 x float> addrspace(3)* %out.gep, align 4 + ret void +} + ; Function Attrs: nounwind readnone declare i32 @llvm.r600.read.tgid.x() #1 -- cgit v1.2.3