aboutsummaryrefslogtreecommitdiff
path: root/test/CodeGen/NVPTX
diff options
context:
space:
mode:
Diffstat (limited to 'test/CodeGen/NVPTX')
-rw-r--r--test/CodeGen/NVPTX/access-non-generic.ll20
-rw-r--r--test/CodeGen/NVPTX/addrspacecast-gvar.ll4
-rw-r--r--test/CodeGen/NVPTX/addrspacecast.ll16
-rw-r--r--test/CodeGen/NVPTX/bug21465.ll4
-rw-r--r--test/CodeGen/NVPTX/bug22246.ll14
-rw-r--r--test/CodeGen/NVPTX/bug22322.ll62
-rw-r--r--test/CodeGen/NVPTX/call-with-alloca-buffer.ll22
-rw-r--r--test/CodeGen/NVPTX/fp16.ll8
-rw-r--r--test/CodeGen/NVPTX/function-align.ll7
-rw-r--r--test/CodeGen/NVPTX/generic-to-nvvm.ll4
-rw-r--r--test/CodeGen/NVPTX/half.ll14
-rw-r--r--test/CodeGen/NVPTX/i1-global.ll2
-rw-r--r--test/CodeGen/NVPTX/i8-param.ll2
-rw-r--r--test/CodeGen/NVPTX/ld-addrspace.ll36
-rw-r--r--test/CodeGen/NVPTX/ld-generic.ll12
-rw-r--r--test/CodeGen/NVPTX/ldu-reg-plus-offset.ll4
-rw-r--r--test/CodeGen/NVPTX/load-sext-i1.ll4
-rw-r--r--test/CodeGen/NVPTX/machine-sink.ll4
-rw-r--r--test/CodeGen/NVPTX/misaligned-vector-ldst.ll8
-rw-r--r--test/CodeGen/NVPTX/noduplicate-syncthreads.ll28
-rw-r--r--test/CodeGen/NVPTX/nounroll.ll37
-rw-r--r--test/CodeGen/NVPTX/nvvm-reflect.ll35
-rw-r--r--test/CodeGen/NVPTX/pr13291-i1-store.ll2
-rw-r--r--test/CodeGen/NVPTX/pr16278.ll2
-rw-r--r--test/CodeGen/NVPTX/pr17529.ll6
-rw-r--r--test/CodeGen/NVPTX/ptx-version-30.ll6
-rw-r--r--test/CodeGen/NVPTX/ptx-version-31.ll6
-rw-r--r--test/CodeGen/NVPTX/refl1.ll2
-rw-r--r--test/CodeGen/NVPTX/sched1.ll16
-rw-r--r--test/CodeGen/NVPTX/sched2.ll16
-rw-r--r--test/CodeGen/NVPTX/shift-parts.ll8
-rw-r--r--test/CodeGen/NVPTX/simple-call.ll2
-rw-r--r--test/CodeGen/NVPTX/sm-version-30.ll1
-rw-r--r--test/CodeGen/NVPTX/sm-version-32.ll7
-rw-r--r--test/CodeGen/NVPTX/sm-version-35.ll1
-rw-r--r--test/CodeGen/NVPTX/sm-version-37.ll7
-rw-r--r--test/CodeGen/NVPTX/sm-version-50.ll7
-rw-r--r--test/CodeGen/NVPTX/sm-version-52.ll7
-rw-r--r--test/CodeGen/NVPTX/sm-version-53.ll7
-rw-r--r--test/CodeGen/NVPTX/symbol-naming.ll2
-rw-r--r--test/CodeGen/NVPTX/vector-compare.ll4
-rw-r--r--test/CodeGen/NVPTX/vector-loads.ll12
-rw-r--r--test/CodeGen/NVPTX/vector-select.ll6
-rw-r--r--test/CodeGen/NVPTX/weak-global.ll2
44 files changed, 328 insertions, 148 deletions
diff --git a/test/CodeGen/NVPTX/access-non-generic.ll b/test/CodeGen/NVPTX/access-non-generic.ll
index c225abf0fd85..e709302918f5 100644
--- a/test/CodeGen/NVPTX/access-non-generic.ll
+++ b/test/CodeGen/NVPTX/access-non-generic.ll
@@ -18,7 +18,7 @@ define float @ld_st_shared_f32(i32 %i, float %v) {
; IR-NOT: addrspacecast
; PTX-LABEL: ld_st_shared_f32(
; load cast
- %1 = load float* addrspacecast (float addrspace(3)* @scalar to float*), align 4
+ %1 = load float, float* addrspacecast (float addrspace(3)* @scalar to float*), align 4
; PTX: ld.shared.f32 %f{{[0-9]+}}, [scalar];
; store cast
store float %v, float* addrspacecast (float addrspace(3)* @scalar to float*), align 4
@@ -29,7 +29,7 @@ define float @ld_st_shared_f32(i32 %i, float %v) {
; cast; load
%2 = addrspacecast float addrspace(3)* @scalar to float*
- %3 = load float* %2, align 4
+ %3 = load float, float* %2, align 4
; PTX: ld.shared.f32 %f{{[0-9]+}}, [scalar];
; cast; store
store float %v, float* %2, align 4
@@ -38,17 +38,17 @@ define float @ld_st_shared_f32(i32 %i, float %v) {
; PTX: bar.sync 0;
; load gep cast
- %4 = load float* getelementptr inbounds ([10 x float]* addrspacecast ([10 x float] addrspace(3)* @array to [10 x float]*), i32 0, i32 5), align 4
+ %4 = load float, float* getelementptr inbounds ([10 x float], [10 x float]* addrspacecast ([10 x float] addrspace(3)* @array to [10 x float]*), i32 0, i32 5), align 4
; PTX: ld.shared.f32 %f{{[0-9]+}}, [array+20];
; store gep cast
- store float %v, float* getelementptr inbounds ([10 x float]* addrspacecast ([10 x float] addrspace(3)* @array to [10 x float]*), i32 0, i32 5), align 4
+ store float %v, float* getelementptr inbounds ([10 x float], [10 x float]* addrspacecast ([10 x float] addrspace(3)* @array to [10 x float]*), i32 0, i32 5), align 4
; PTX: st.shared.f32 [array+20], %f{{[0-9]+}};
call void @llvm.cuda.syncthreads()
; PTX: bar.sync 0;
; gep cast; load
- %5 = getelementptr inbounds [10 x float]* addrspacecast ([10 x float] addrspace(3)* @array to [10 x float]*), i32 0, i32 5
- %6 = load float* %5, align 4
+ %5 = getelementptr inbounds [10 x float], [10 x float]* addrspacecast ([10 x float] addrspace(3)* @array to [10 x float]*), i32 0, i32 5
+ %6 = load float, float* %5, align 4
; PTX: ld.shared.f32 %f{{[0-9]+}}, [array+20];
; gep cast; store
store float %v, float* %5, align 4
@@ -58,8 +58,8 @@ define float @ld_st_shared_f32(i32 %i, float %v) {
; cast; gep; load
%7 = addrspacecast [10 x float] addrspace(3)* @array to [10 x float]*
- %8 = getelementptr inbounds [10 x float]* %7, i32 0, i32 %i
- %9 = load float* %8, align 4
+ %8 = getelementptr inbounds [10 x float], [10 x float]* %7, i32 0, i32 %i
+ %9 = load float, float* %8, align 4
; PTX: ld.shared.f32 %f{{[0-9]+}}, [%{{(r|rl|rd)[0-9]+}}];
; cast; gep; store
store float %v, float* %8, align 4
@@ -78,10 +78,10 @@ define float @ld_st_shared_f32(i32 %i, float %v) {
; addrspacecast with a bitcast.
define i32 @ld_int_from_float() {
; IR-LABEL: @ld_int_from_float
-; IR: load i32 addrspace(3)* bitcast (float addrspace(3)* @scalar to i32 addrspace(3)*)
+; IR: load i32, i32 addrspace(3)* bitcast (float addrspace(3)* @scalar to i32 addrspace(3)*)
; PTX-LABEL: ld_int_from_float(
; PTX: ld.shared.u{{(32|64)}}
- %1 = load i32* addrspacecast(float addrspace(3)* @scalar to i32*), align 4
+ %1 = load i32, i32* addrspacecast(float addrspace(3)* @scalar to i32*), align 4
ret i32 %1
}
diff --git a/test/CodeGen/NVPTX/addrspacecast-gvar.ll b/test/CodeGen/NVPTX/addrspacecast-gvar.ll
index 6afbdb8a429f..1e2fde4b858a 100644
--- a/test/CodeGen/NVPTX/addrspacecast-gvar.ll
+++ b/test/CodeGen/NVPTX/addrspacecast-gvar.ll
@@ -3,7 +3,11 @@
; CHECK: .visible .global .align 4 .u32 g = 42;
; CHECK: .visible .global .align 4 .u32 g2 = generic(g);
; CHECK: .visible .global .align 4 .u32 g3 = g;
+; CHECK: .visible .global .align 8 .u32 g4[2] = {0, generic(g)};
+; CHECK: .visible .global .align 8 .u32 g5[2] = {0, generic(g)+8};
@g = addrspace(1) global i32 42
@g2 = addrspace(1) global i32* addrspacecast (i32 addrspace(1)* @g to i32*)
@g3 = addrspace(1) global i32 addrspace(1)* @g
+@g4 = constant {i32*, i32*} {i32* null, i32* addrspacecast (i32 addrspace(1)* @g to i32*)}
+@g5 = constant {i32*, i32*} {i32* null, i32* addrspacecast (i32 addrspace(1)* getelementptr (i32, i32 addrspace(1)* @g, i32 2) to i32*)}
diff --git a/test/CodeGen/NVPTX/addrspacecast.ll b/test/CodeGen/NVPTX/addrspacecast.ll
index 03b9a9844752..42e67ca8ce9b 100644
--- a/test/CodeGen/NVPTX/addrspacecast.ll
+++ b/test/CodeGen/NVPTX/addrspacecast.ll
@@ -10,7 +10,7 @@ define i32 @conv1(i32 addrspace(1)* %ptr) {
; PTX64: cvta.global.u64
; PTX64: ld.u32
%genptr = addrspacecast i32 addrspace(1)* %ptr to i32*
- %val = load i32* %genptr
+ %val = load i32, i32* %genptr
ret i32 %val
}
@@ -22,7 +22,7 @@ define i32 @conv2(i32 addrspace(3)* %ptr) {
; PTX64: cvta.shared.u64
; PTX64: ld.u32
%genptr = addrspacecast i32 addrspace(3)* %ptr to i32*
- %val = load i32* %genptr
+ %val = load i32, i32* %genptr
ret i32 %val
}
@@ -34,7 +34,7 @@ define i32 @conv3(i32 addrspace(4)* %ptr) {
; PTX64: cvta.const.u64
; PTX64: ld.u32
%genptr = addrspacecast i32 addrspace(4)* %ptr to i32*
- %val = load i32* %genptr
+ %val = load i32, i32* %genptr
ret i32 %val
}
@@ -46,7 +46,7 @@ define i32 @conv4(i32 addrspace(5)* %ptr) {
; PTX64: cvta.local.u64
; PTX64: ld.u32
%genptr = addrspacecast i32 addrspace(5)* %ptr to i32*
- %val = load i32* %genptr
+ %val = load i32, i32* %genptr
ret i32 %val
}
@@ -58,7 +58,7 @@ define i32 @conv5(i32* %ptr) {
; PTX64: cvta.to.global.u64
; PTX64: ld.global.u32
%specptr = addrspacecast i32* %ptr to i32 addrspace(1)*
- %val = load i32 addrspace(1)* %specptr
+ %val = load i32, i32 addrspace(1)* %specptr
ret i32 %val
}
@@ -70,7 +70,7 @@ define i32 @conv6(i32* %ptr) {
; PTX64: cvta.to.shared.u64
; PTX64: ld.shared.u32
%specptr = addrspacecast i32* %ptr to i32 addrspace(3)*
- %val = load i32 addrspace(3)* %specptr
+ %val = load i32, i32 addrspace(3)* %specptr
ret i32 %val
}
@@ -82,7 +82,7 @@ define i32 @conv7(i32* %ptr) {
; PTX64: cvta.to.const.u64
; PTX64: ld.const.u32
%specptr = addrspacecast i32* %ptr to i32 addrspace(4)*
- %val = load i32 addrspace(4)* %specptr
+ %val = load i32, i32 addrspace(4)* %specptr
ret i32 %val
}
@@ -94,6 +94,6 @@ define i32 @conv8(i32* %ptr) {
; PTX64: cvta.to.local.u64
; PTX64: ld.local.u32
%specptr = addrspacecast i32* %ptr to i32 addrspace(5)*
- %val = load i32 addrspace(5)* %specptr
+ %val = load i32, i32 addrspace(5)* %specptr
ret i32 %val
}
diff --git a/test/CodeGen/NVPTX/bug21465.ll b/test/CodeGen/NVPTX/bug21465.ll
index cacffceac517..76af386c6516 100644
--- a/test/CodeGen/NVPTX/bug21465.ll
+++ b/test/CodeGen/NVPTX/bug21465.ll
@@ -11,8 +11,8 @@ entry:
; CHECK-LABEL @_Z22TakesStruct1SPi
; CHECK: bitcast %struct.S* %input to i8*
; CHECK: call i8 addrspace(101)* @llvm.nvvm.ptr.gen.to.param.p101i8.p0i8
- %b = getelementptr inbounds %struct.S* %input, i64 0, i32 1
- %0 = load i32* %b, align 4
+ %b = getelementptr inbounds %struct.S, %struct.S* %input, i64 0, i32 1
+ %0 = load i32, i32* %b, align 4
store i32 %0, i32* %output, align 4
ret void
}
diff --git a/test/CodeGen/NVPTX/bug22246.ll b/test/CodeGen/NVPTX/bug22246.ll
new file mode 100644
index 000000000000..70e7e12336e7
--- /dev/null
+++ b/test/CodeGen/NVPTX/bug22246.ll
@@ -0,0 +1,14 @@
+; RUN: llc < %s -march=nvptx -mcpu=sm_20 | FileCheck %s
+
+target datalayout = "e-i64:64-v16:16-v32:32-n16:32:64"
+target triple = "nvptx64-nvidia-cuda"
+
+; CHECK-LABEL: _Z3foobbbPb
+define void @_Z3foobbbPb(i1 zeroext %p1, i1 zeroext %p2, i1 zeroext %p3, i8* nocapture %output) {
+entry:
+; CHECK: selp.b32 %r{{[0-9]+}}, %r{{[0-9]+}}, %r{{[0-9]+}}, %p{{[0-9]+}}
+ %.sink.v = select i1 %p1, i1 %p2, i1 %p3
+ %frombool5 = zext i1 %.sink.v to i8
+ store i8 %frombool5, i8* %output, align 1
+ ret void
+}
diff --git a/test/CodeGen/NVPTX/bug22322.ll b/test/CodeGen/NVPTX/bug22322.ll
new file mode 100644
index 000000000000..97863b9ea546
--- /dev/null
+++ b/test/CodeGen/NVPTX/bug22322.ll
@@ -0,0 +1,62 @@
+; RUN: llc < %s -march=nvptx -mcpu=sm_20 | FileCheck %s
+
+target datalayout = "e-i64:64-v16:16-v32:32-n16:32:64"
+target triple = "nvptx64-nvidia-cuda"
+
+%class.float3 = type { float, float, float }
+
+; Function Attrs: nounwind
+; CHECK-LABEL: some_kernel
+define void @some_kernel(%class.float3* nocapture %dst) #0 {
+_ZL11compute_vecRK6float3jb.exit:
+ %ret_vec.sroa.8.i = alloca float, align 4
+ %0 = tail call i32 @llvm.ptx.read.ctaid.x()
+ %1 = tail call i32 @llvm.ptx.read.ntid.x()
+ %2 = mul nsw i32 %1, %0
+ %3 = tail call i32 @llvm.ptx.read.tid.x()
+ %4 = add nsw i32 %2, %3
+ %5 = zext i32 %4 to i64
+ %6 = bitcast float* %ret_vec.sroa.8.i to i8*
+ call void @llvm.lifetime.start(i64 4, i8* %6)
+ %7 = and i32 %4, 15
+ %8 = icmp eq i32 %7, 0
+ %9 = select i1 %8, float 0.000000e+00, float -1.000000e+00
+ store float %9, float* %ret_vec.sroa.8.i, align 4
+; CHECK: setp.lt.f32 %p{{[0-9]+}}, %f{{[0-9]+}}, 0f00000000
+ %10 = fcmp olt float %9, 0.000000e+00
+ %ret_vec.sroa.8.i.val = load float, float* %ret_vec.sroa.8.i, align 4
+ %11 = select i1 %10, float 0.000000e+00, float %ret_vec.sroa.8.i.val
+ call void @llvm.lifetime.end(i64 4, i8* %6)
+ %12 = getelementptr inbounds %class.float3, %class.float3* %dst, i64 %5, i32 0
+ store float 0.000000e+00, float* %12, align 4
+ %13 = getelementptr inbounds %class.float3, %class.float3* %dst, i64 %5, i32 1
+ store float %11, float* %13, align 4
+ %14 = getelementptr inbounds %class.float3, %class.float3* %dst, i64 %5, i32 2
+ store float 0.000000e+00, float* %14, align 4
+ ret void
+}
+
+; Function Attrs: nounwind readnone
+declare i32 @llvm.ptx.read.ctaid.x() #1
+
+; Function Attrs: nounwind readnone
+declare i32 @llvm.ptx.read.ntid.x() #1
+
+; Function Attrs: nounwind readnone
+declare i32 @llvm.ptx.read.tid.x() #1
+
+; Function Attrs: nounwind
+declare void @llvm.lifetime.start(i64, i8* nocapture) #2
+
+; Function Attrs: nounwind
+declare void @llvm.lifetime.end(i64, i8* nocapture) #2
+
+attributes #0 = { nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="true" "no-nans-fp-math"="true" "stack-protector-buffer-size"="8" "unsafe-fp-math"="true" "use-soft-float"="false" }
+attributes #1 = { nounwind readnone }
+attributes #2 = { nounwind }
+
+!nvvm.annotations = !{!0}
+!llvm.ident = !{!1}
+
+!0 = !{void (%class.float3*)* @some_kernel, !"kernel", i32 1}
+!1 = !{!"clang version 3.5.1 (tags/RELEASE_351/final)"}
diff --git a/test/CodeGen/NVPTX/call-with-alloca-buffer.ll b/test/CodeGen/NVPTX/call-with-alloca-buffer.ll
index 8483112381f1..58b191129917 100644
--- a/test/CodeGen/NVPTX/call-with-alloca-buffer.ll
+++ b/test/CodeGen/NVPTX/call-with-alloca-buffer.ll
@@ -27,22 +27,22 @@ entry:
; CHECK: ld.f32 %f[[A0_REG:[0-9]+]], [%rd[[A_REG]]]
; CHECK: st.f32 [%SP+0], %f[[A0_REG]]
- %0 = load float* %a, align 4
+ %0 = load float, float* %a, align 4
%1 = bitcast [16 x i8]* %buf to float*
store float %0, float* %1, align 4
- %arrayidx2 = getelementptr inbounds float* %a, i64 1
- %2 = load float* %arrayidx2, align 4
- %arrayidx3 = getelementptr inbounds [16 x i8]* %buf, i64 0, i64 1
+ %arrayidx2 = getelementptr inbounds float, float* %a, i64 1
+ %2 = load float, float* %arrayidx2, align 4
+ %arrayidx3 = getelementptr inbounds [16 x i8], [16 x i8]* %buf, i64 0, i64 1
%3 = bitcast i8* %arrayidx3 to float*
store float %2, float* %3, align 4
- %arrayidx4 = getelementptr inbounds float* %a, i64 2
- %4 = load float* %arrayidx4, align 4
- %arrayidx5 = getelementptr inbounds [16 x i8]* %buf, i64 0, i64 2
+ %arrayidx4 = getelementptr inbounds float, float* %a, i64 2
+ %4 = load float, float* %arrayidx4, align 4
+ %arrayidx5 = getelementptr inbounds [16 x i8], [16 x i8]* %buf, i64 0, i64 2
%5 = bitcast i8* %arrayidx5 to float*
store float %4, float* %5, align 4
- %arrayidx6 = getelementptr inbounds float* %a, i64 3
- %6 = load float* %arrayidx6, align 4
- %arrayidx7 = getelementptr inbounds [16 x i8]* %buf, i64 0, i64 3
+ %arrayidx6 = getelementptr inbounds float, float* %a, i64 3
+ %6 = load float, float* %arrayidx6, align 4
+ %arrayidx7 = getelementptr inbounds [16 x i8], [16 x i8]* %buf, i64 0, i64 3
%7 = bitcast i8* %arrayidx7 to float*
store float %6, float* %7, align 4
@@ -54,7 +54,7 @@ entry:
; CHECK-NEXT: call.uni
; CHECK-NEXT: callee,
- %arraydecay = getelementptr inbounds [16 x i8]* %buf, i64 0, i64 0
+ %arraydecay = getelementptr inbounds [16 x i8], [16 x i8]* %buf, i64 0, i64 0
call void @callee(float* %a, i8* %arraydecay) #2
ret void
}
diff --git a/test/CodeGen/NVPTX/fp16.ll b/test/CodeGen/NVPTX/fp16.ll
index 8770399f2ec9..b85eed0f6c7f 100644
--- a/test/CodeGen/NVPTX/fp16.ll
+++ b/test/CodeGen/NVPTX/fp16.ll
@@ -8,7 +8,7 @@ declare i16 @llvm.convert.to.fp16.f64(double) nounwind readnone
; CHECK-LABEL: @test_convert_fp16_to_fp32
; CHECK: cvt.f32.f16
define void @test_convert_fp16_to_fp32(float addrspace(1)* noalias %out, i16 addrspace(1)* noalias %in) nounwind {
- %val = load i16 addrspace(1)* %in, align 2
+ %val = load i16, i16 addrspace(1)* %in, align 2
%cvt = call float @llvm.convert.from.fp16.f32(i16 %val) nounwind readnone
store float %cvt, float addrspace(1)* %out, align 4
ret void
@@ -18,7 +18,7 @@ define void @test_convert_fp16_to_fp32(float addrspace(1)* noalias %out, i16 add
; CHECK-LABEL: @test_convert_fp16_to_fp64
; CHECK: cvt.f64.f16
define void @test_convert_fp16_to_fp64(double addrspace(1)* noalias %out, i16 addrspace(1)* noalias %in) nounwind {
- %val = load i16 addrspace(1)* %in, align 2
+ %val = load i16, i16 addrspace(1)* %in, align 2
%cvt = call double @llvm.convert.from.fp16.f64(i16 %val) nounwind readnone
store double %cvt, double addrspace(1)* %out, align 4
ret void
@@ -28,7 +28,7 @@ define void @test_convert_fp16_to_fp64(double addrspace(1)* noalias %out, i16 ad
; CHECK-LABEL: @test_convert_fp32_to_fp16
; CHECK: cvt.rn.f16.f32
define void @test_convert_fp32_to_fp16(i16 addrspace(1)* noalias %out, float addrspace(1)* noalias %in) nounwind {
- %val = load float addrspace(1)* %in, align 2
+ %val = load float, float addrspace(1)* %in, align 2
%cvt = call i16 @llvm.convert.to.fp16.f32(float %val) nounwind readnone
store i16 %cvt, i16 addrspace(1)* %out, align 4
ret void
@@ -38,7 +38,7 @@ define void @test_convert_fp32_to_fp16(i16 addrspace(1)* noalias %out, float add
; CHECK-LABEL: @test_convert_fp64_to_fp16
; CHECK: cvt.rn.f16.f64
define void @test_convert_fp64_to_fp16(i16 addrspace(1)* noalias %out, double addrspace(1)* noalias %in) nounwind {
- %val = load double addrspace(1)* %in, align 2
+ %val = load double, double addrspace(1)* %in, align 2
%cvt = call i16 @llvm.convert.to.fp16.f64(double %val) nounwind readnone
store i16 %cvt, i16 addrspace(1)* %out, align 4
ret void
diff --git a/test/CodeGen/NVPTX/function-align.ll b/test/CodeGen/NVPTX/function-align.ll
new file mode 100644
index 000000000000..e7abfb128f58
--- /dev/null
+++ b/test/CodeGen/NVPTX/function-align.ll
@@ -0,0 +1,7 @@
+; RUN: llc < %s -march=nvptx -mcpu=sm_20 | FileCheck %s
+
+; CHECK-NOT: .align 2
+define ptx_device void @foo() align 2 {
+; CHECK-LABEL: .func foo
+ ret void
+}
diff --git a/test/CodeGen/NVPTX/generic-to-nvvm.ll b/test/CodeGen/NVPTX/generic-to-nvvm.ll
index fb63d6ed575f..66917d5cb182 100644
--- a/test/CodeGen/NVPTX/generic-to-nvvm.ll
+++ b/test/CodeGen/NVPTX/generic-to-nvvm.ll
@@ -13,9 +13,9 @@ target triple = "nvptx-nvidia-cuda"
define void @foo(i32* %a, i32* %b) {
; CHECK: cvta.global.u32
- %ld1 = load i32* @myglobal
+ %ld1 = load i32, i32* @myglobal
; CHECK: cvta.global.u32
- %ld2 = load i32* @myconst
+ %ld2 = load i32, i32* @myconst
store i32 %ld1, i32* %a
store i32 %ld2, i32* %b
ret void
diff --git a/test/CodeGen/NVPTX/half.ll b/test/CodeGen/NVPTX/half.ll
index aa08cc78e91a..b99524162e65 100644
--- a/test/CodeGen/NVPTX/half.ll
+++ b/test/CodeGen/NVPTX/half.ll
@@ -4,7 +4,7 @@ define void @test_load_store(half addrspace(1)* %in, half addrspace(1)* %out) {
; CHECK-LABEL: @test_load_store
; CHECK: ld.global.u16 [[TMP:%rs[0-9]+]], [{{%r[0-9]+}}]
; CHECK: st.global.u16 [{{%r[0-9]+}}], [[TMP]]
- %val = load half addrspace(1)* %in
+ %val = load half, half addrspace(1)* %in
store half %val, half addrspace(1) * %out
ret void
}
@@ -13,7 +13,7 @@ define void @test_bitcast_from_half(half addrspace(1)* %in, i16 addrspace(1)* %o
; CHECK-LABEL: @test_bitcast_from_half
; CHECK: ld.global.u16 [[TMP:%rs[0-9]+]], [{{%r[0-9]+}}]
; CHECK: st.global.u16 [{{%r[0-9]+}}], [[TMP]]
- %val = load half addrspace(1) * %in
+ %val = load half, half addrspace(1) * %in
%val_int = bitcast half %val to i16
store i16 %val_int, i16 addrspace(1)* %out
ret void
@@ -23,7 +23,7 @@ define void @test_bitcast_to_half(half addrspace(1)* %out, i16 addrspace(1)* %in
; CHECK-LABEL: @test_bitcast_to_half
; CHECK: ld.global.u16 [[TMP:%rs[0-9]+]], [{{%r[0-9]+}}]
; CHECK: st.global.u16 [{{%r[0-9]+}}], [[TMP]]
- %val = load i16 addrspace(1)* %in
+ %val = load i16, i16 addrspace(1)* %in
%val_fp = bitcast i16 %val to half
store half %val_fp, half addrspace(1)* %out
ret void
@@ -33,7 +33,7 @@ define void @test_extend32(half addrspace(1)* %in, float addrspace(1)* %out) {
; CHECK-LABEL: @test_extend32
; CHECK: cvt.f32.f16
- %val16 = load half addrspace(1)* %in
+ %val16 = load half, half addrspace(1)* %in
%val32 = fpext half %val16 to float
store float %val32, float addrspace(1)* %out
ret void
@@ -43,7 +43,7 @@ define void @test_extend64(half addrspace(1)* %in, double addrspace(1)* %out) {
; CHECK-LABEL: @test_extend64
; CHECK: cvt.f64.f16
- %val16 = load half addrspace(1)* %in
+ %val16 = load half, half addrspace(1)* %in
%val64 = fpext half %val16 to double
store double %val64, double addrspace(1)* %out
ret void
@@ -53,7 +53,7 @@ define void @test_trunc32(float addrspace(1)* %in, half addrspace(1)* %out) {
; CHECK-LABEL: test_trunc32
; CHECK: cvt.rn.f16.f32
- %val32 = load float addrspace(1)* %in
+ %val32 = load float, float addrspace(1)* %in
%val16 = fptrunc float %val32 to half
store half %val16, half addrspace(1)* %out
ret void
@@ -63,7 +63,7 @@ define void @test_trunc64(double addrspace(1)* %in, half addrspace(1)* %out) {
; CHECK-LABEL: @test_trunc64
; CHECK: cvt.rn.f16.f64
- %val32 = load double addrspace(1)* %in
+ %val32 = load double, double addrspace(1)* %in
%val16 = fptrunc double %val32 to half
store half %val16, half addrspace(1)* %out
ret void
diff --git a/test/CodeGen/NVPTX/i1-global.ll b/test/CodeGen/NVPTX/i1-global.ll
index e3fe08e5f874..35d77b4b44d2 100644
--- a/test/CodeGen/NVPTX/i1-global.ll
+++ b/test/CodeGen/NVPTX/i1-global.ll
@@ -8,7 +8,7 @@ target triple = "nvptx-nvidia-cuda"
define void @foo(i1 %p, i32* %out) {
- %ld = load i1 addrspace(1)* @mypred
+ %ld = load i1, i1 addrspace(1)* @mypred
%val = zext i1 %ld to i32
store i32 %val, i32* %out
ret void
diff --git a/test/CodeGen/NVPTX/i8-param.ll b/test/CodeGen/NVPTX/i8-param.ll
index 84daa9f66316..6a1e3a0e1a0d 100644
--- a/test/CodeGen/NVPTX/i8-param.ll
+++ b/test/CodeGen/NVPTX/i8-param.ll
@@ -13,7 +13,7 @@ define i8 @callee(i8 %a) {
; CHECK: .visible .func caller
define void @caller(i8* %a) {
; CHECK: ld.u8
- %val = load i8* %a
+ %val = load i8, i8* %a
%ret = tail call i8 @callee(i8 %val)
; CHECK: ld.param.b32
store i8 %ret, i8* %a
diff --git a/test/CodeGen/NVPTX/ld-addrspace.ll b/test/CodeGen/NVPTX/ld-addrspace.ll
index f33659c92e84..0018e6177be8 100644
--- a/test/CodeGen/NVPTX/ld-addrspace.ll
+++ b/test/CodeGen/NVPTX/ld-addrspace.ll
@@ -8,7 +8,7 @@ define i8 @ld_global_i8(i8 addrspace(1)* %ptr) {
; PTX32: ret
; PTX64: ld.global.u8 %r{{[0-9]+}}, [%rd{{[0-9]+}}]
; PTX64: ret
- %a = load i8 addrspace(1)* %ptr
+ %a = load i8, i8 addrspace(1)* %ptr
ret i8 %a
}
@@ -17,7 +17,7 @@ define i8 @ld_shared_i8(i8 addrspace(3)* %ptr) {
; PTX32: ret
; PTX64: ld.shared.u8 %r{{[0-9]+}}, [%rd{{[0-9]+}}]
; PTX64: ret
- %a = load i8 addrspace(3)* %ptr
+ %a = load i8, i8 addrspace(3)* %ptr
ret i8 %a
}
@@ -26,7 +26,7 @@ define i8 @ld_local_i8(i8 addrspace(5)* %ptr) {
; PTX32: ret
; PTX64: ld.local.u8 %r{{[0-9]+}}, [%rd{{[0-9]+}}]
; PTX64: ret
- %a = load i8 addrspace(5)* %ptr
+ %a = load i8, i8 addrspace(5)* %ptr
ret i8 %a
}
@@ -36,7 +36,7 @@ define i16 @ld_global_i16(i16 addrspace(1)* %ptr) {
; PTX32: ret
; PTX64: ld.global.u16 %r{{[0-9]+}}, [%rd{{[0-9]+}}]
; PTX64: ret
- %a = load i16 addrspace(1)* %ptr
+ %a = load i16, i16 addrspace(1)* %ptr
ret i16 %a
}
@@ -45,7 +45,7 @@ define i16 @ld_shared_i16(i16 addrspace(3)* %ptr) {
; PTX32: ret
; PTX64: ld.shared.u16 %r{{[0-9]+}}, [%rd{{[0-9]+}}]
; PTX64: ret
- %a = load i16 addrspace(3)* %ptr
+ %a = load i16, i16 addrspace(3)* %ptr
ret i16 %a
}
@@ -54,7 +54,7 @@ define i16 @ld_local_i16(i16 addrspace(5)* %ptr) {
; PTX32: ret
; PTX64: ld.local.u16 %r{{[0-9]+}}, [%rd{{[0-9]+}}]
; PTX64: ret
- %a = load i16 addrspace(5)* %ptr
+ %a = load i16, i16 addrspace(5)* %ptr
ret i16 %a
}
@@ -64,7 +64,7 @@ define i32 @ld_global_i32(i32 addrspace(1)* %ptr) {
; PTX32: ret
; PTX64: ld.global.u32 %r{{[0-9]+}}, [%rd{{[0-9]+}}]
; PTX64: ret
- %a = load i32 addrspace(1)* %ptr
+ %a = load i32, i32 addrspace(1)* %ptr
ret i32 %a
}
@@ -73,7 +73,7 @@ define i32 @ld_shared_i32(i32 addrspace(3)* %ptr) {
; PTX32: ret
; PTX64: ld.shared.u32 %r{{[0-9]+}}, [%rd{{[0-9]+}}]
; PTX64: ret
- %a = load i32 addrspace(3)* %ptr
+ %a = load i32, i32 addrspace(3)* %ptr
ret i32 %a
}
@@ -82,7 +82,7 @@ define i32 @ld_local_i32(i32 addrspace(5)* %ptr) {
; PTX32: ret
; PTX64: ld.local.u32 %r{{[0-9]+}}, [%rd{{[0-9]+}}]
; PTX64: ret
- %a = load i32 addrspace(5)* %ptr
+ %a = load i32, i32 addrspace(5)* %ptr
ret i32 %a
}
@@ -92,7 +92,7 @@ define i64 @ld_global_i64(i64 addrspace(1)* %ptr) {
; PTX32: ret
; PTX64: ld.global.u64 %rd{{[0-9]+}}, [%rd{{[0-9]+}}]
; PTX64: ret
- %a = load i64 addrspace(1)* %ptr
+ %a = load i64, i64 addrspace(1)* %ptr
ret i64 %a
}
@@ -101,7 +101,7 @@ define i64 @ld_shared_i64(i64 addrspace(3)* %ptr) {
; PTX32: ret
; PTX64: ld.shared.u64 %rd{{[0-9]+}}, [%rd{{[0-9]+}}]
; PTX64: ret
- %a = load i64 addrspace(3)* %ptr
+ %a = load i64, i64 addrspace(3)* %ptr
ret i64 %a
}
@@ -110,7 +110,7 @@ define i64 @ld_local_i64(i64 addrspace(5)* %ptr) {
; PTX32: ret
; PTX64: ld.local.u64 %rd{{[0-9]+}}, [%rd{{[0-9]+}}]
; PTX64: ret
- %a = load i64 addrspace(5)* %ptr
+ %a = load i64, i64 addrspace(5)* %ptr
ret i64 %a
}
@@ -120,7 +120,7 @@ define float @ld_global_f32(float addrspace(1)* %ptr) {
; PTX32: ret
; PTX64: ld.global.f32 %f{{[0-9]+}}, [%rd{{[0-9]+}}]
; PTX64: ret
- %a = load float addrspace(1)* %ptr
+ %a = load float, float addrspace(1)* %ptr
ret float %a
}
@@ -129,7 +129,7 @@ define float @ld_shared_f32(float addrspace(3)* %ptr) {
; PTX32: ret
; PTX64: ld.shared.f32 %f{{[0-9]+}}, [%rd{{[0-9]+}}]
; PTX64: ret
- %a = load float addrspace(3)* %ptr
+ %a = load float, float addrspace(3)* %ptr
ret float %a
}
@@ -138,7 +138,7 @@ define float @ld_local_f32(float addrspace(5)* %ptr) {
; PTX32: ret
; PTX64: ld.local.f32 %f{{[0-9]+}}, [%rd{{[0-9]+}}]
; PTX64: ret
- %a = load float addrspace(5)* %ptr
+ %a = load float, float addrspace(5)* %ptr
ret float %a
}
@@ -148,7 +148,7 @@ define double @ld_global_f64(double addrspace(1)* %ptr) {
; PTX32: ret
; PTX64: ld.global.f64 %fd{{[0-9]+}}, [%rd{{[0-9]+}}]
; PTX64: ret
- %a = load double addrspace(1)* %ptr
+ %a = load double, double addrspace(1)* %ptr
ret double %a
}
@@ -157,7 +157,7 @@ define double @ld_shared_f64(double addrspace(3)* %ptr) {
; PTX32: ret
; PTX64: ld.shared.f64 %fd{{[0-9]+}}, [%rd{{[0-9]+}}]
; PTX64: ret
- %a = load double addrspace(3)* %ptr
+ %a = load double, double addrspace(3)* %ptr
ret double %a
}
@@ -166,6 +166,6 @@ define double @ld_local_f64(double addrspace(5)* %ptr) {
; PTX32: ret
; PTX64: ld.local.f64 %fd{{[0-9]+}}, [%rd{{[0-9]+}}]
; PTX64: ret
- %a = load double addrspace(5)* %ptr
+ %a = load double, double addrspace(5)* %ptr
ret double %a
}
diff --git a/test/CodeGen/NVPTX/ld-generic.ll b/test/CodeGen/NVPTX/ld-generic.ll
index d629e0ecc647..44cfe6551b99 100644
--- a/test/CodeGen/NVPTX/ld-generic.ll
+++ b/test/CodeGen/NVPTX/ld-generic.ll
@@ -8,7 +8,7 @@ define i8 @ld_global_i8(i8 addrspace(0)* %ptr) {
; PTX32: ret
; PTX64: ld.u8 %r{{[0-9]+}}, [%rd{{[0-9]+}}]
; PTX64: ret
- %a = load i8 addrspace(0)* %ptr
+ %a = load i8, i8 addrspace(0)* %ptr
ret i8 %a
}
@@ -18,7 +18,7 @@ define i16 @ld_global_i16(i16 addrspace(0)* %ptr) {
; PTX32: ret
; PTX64: ld.u16 %r{{[0-9]+}}, [%rd{{[0-9]+}}]
; PTX64: ret
- %a = load i16 addrspace(0)* %ptr
+ %a = load i16, i16 addrspace(0)* %ptr
ret i16 %a
}
@@ -28,7 +28,7 @@ define i32 @ld_global_i32(i32 addrspace(0)* %ptr) {
; PTX32: ret
; PTX64: ld.u32 %r{{[0-9]+}}, [%rd{{[0-9]+}}]
; PTX64: ret
- %a = load i32 addrspace(0)* %ptr
+ %a = load i32, i32 addrspace(0)* %ptr
ret i32 %a
}
@@ -38,7 +38,7 @@ define i64 @ld_global_i64(i64 addrspace(0)* %ptr) {
; PTX32: ret
; PTX64: ld.u64 %rd{{[0-9]+}}, [%rd{{[0-9]+}}]
; PTX64: ret
- %a = load i64 addrspace(0)* %ptr
+ %a = load i64, i64 addrspace(0)* %ptr
ret i64 %a
}
@@ -48,7 +48,7 @@ define float @ld_global_f32(float addrspace(0)* %ptr) {
; PTX32: ret
; PTX64: ld.f32 %f{{[0-9]+}}, [%rd{{[0-9]+}}]
; PTX64: ret
- %a = load float addrspace(0)* %ptr
+ %a = load float, float addrspace(0)* %ptr
ret float %a
}
@@ -58,6 +58,6 @@ define double @ld_global_f64(double addrspace(0)* %ptr) {
; PTX32: ret
; PTX64: ld.f64 %fd{{[0-9]+}}, [%rd{{[0-9]+}}]
; PTX64: ret
- %a = load double addrspace(0)* %ptr
+ %a = load double, double addrspace(0)* %ptr
ret double %a
}
diff --git a/test/CodeGen/NVPTX/ldu-reg-plus-offset.ll b/test/CodeGen/NVPTX/ldu-reg-plus-offset.ll
index fd35a7503901..ec96a493021a 100644
--- a/test/CodeGen/NVPTX/ldu-reg-plus-offset.ll
+++ b/test/CodeGen/NVPTX/ldu-reg-plus-offset.ll
@@ -6,9 +6,9 @@ target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f3
define void @reg_plus_offset(i32* %a) {
; CHECK: ldu.global.u32 %r{{[0-9]+}}, [%r{{[0-9]+}}+32];
; CHECK: ldu.global.u32 %r{{[0-9]+}}, [%r{{[0-9]+}}+36];
- %p2 = getelementptr i32* %a, i32 8
+ %p2 = getelementptr i32, i32* %a, i32 8
%t1 = call i32 @llvm.nvvm.ldu.global.i.i32.p0i32(i32* %p2, i32 4)
- %p3 = getelementptr i32* %a, i32 9
+ %p3 = getelementptr i32, i32* %a, i32 9
%t2 = call i32 @llvm.nvvm.ldu.global.i.i32.p0i32(i32* %p3, i32 4)
%t3 = mul i32 %t1, %t2
store i32 %t3, i32* %a
diff --git a/test/CodeGen/NVPTX/load-sext-i1.ll b/test/CodeGen/NVPTX/load-sext-i1.ll
index d836740eed94..9fc98a45f59a 100644
--- a/test/CodeGen/NVPTX/load-sext-i1.ll
+++ b/test/CodeGen/NVPTX/load-sext-i1.ll
@@ -6,8 +6,8 @@ target triple = "nvptx-nvidia-cuda"
define void @main(i1* %a1, i32 %a2, i32* %arg3) {
; CHECK: ld.u8
; CHECK-NOT: ld.u1
- %t1 = getelementptr i1* %a1, i32 %a2
- %t2 = load i1* %t1
+ %t1 = getelementptr i1, i1* %a1, i32 %a2
+ %t2 = load i1, i1* %t1
%t3 = sext i1 %t2 to i32
store i32 %t3, i32* %arg3
ret void
diff --git a/test/CodeGen/NVPTX/machine-sink.ll b/test/CodeGen/NVPTX/machine-sink.ll
index 3614bea16534..65ba141c41d9 100644
--- a/test/CodeGen/NVPTX/machine-sink.ll
+++ b/test/CodeGen/NVPTX/machine-sink.ll
@@ -14,8 +14,8 @@ target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f3
define float @post_dominate(float %x, i1 %cond) {
; CHECK-LABEL: post_dominate(
entry:
- %0 = load float* addrspacecast (float addrspace(3)* @scalar1 to float*), align 4
- %1 = load float* addrspacecast (float addrspace(3)* @scalar2 to float*), align 4
+ %0 = load float, float* addrspacecast (float addrspace(3)* @scalar1 to float*), align 4
+ %1 = load float, float* addrspacecast (float addrspace(3)* @scalar2 to float*), align 4
; CHECK: ld.shared.f32
; CHECK: ld.shared.f32
%2 = fmul float %0, %0
diff --git a/test/CodeGen/NVPTX/misaligned-vector-ldst.ll b/test/CodeGen/NVPTX/misaligned-vector-ldst.ll
index 90c9c4306de7..2ad72b018851 100644
--- a/test/CodeGen/NVPTX/misaligned-vector-ldst.ll
+++ b/test/CodeGen/NVPTX/misaligned-vector-ldst.ll
@@ -10,7 +10,7 @@ define <4 x float> @t1(i8* %p1) {
; CHECK-NOT: ld.f32
; CHECK: ld.u8
%cast = bitcast i8* %p1 to <4 x float>*
- %r = load <4 x float>* %cast, align 1
+ %r = load <4 x float>, <4 x float>* %cast, align 1
ret <4 x float> %r
}
@@ -20,7 +20,7 @@ define <4 x float> @t2(i8* %p1) {
; CHECK-NOT: ld.v2
; CHECK: ld.f32
%cast = bitcast i8* %p1 to <4 x float>*
- %r = load <4 x float>* %cast, align 4
+ %r = load <4 x float>, <4 x float>* %cast, align 4
ret <4 x float> %r
}
@@ -29,7 +29,7 @@ define <4 x float> @t3(i8* %p1) {
; CHECK-NOT: ld.v4
; CHECK: ld.v2
%cast = bitcast i8* %p1 to <4 x float>*
- %r = load <4 x float>* %cast, align 8
+ %r = load <4 x float>, <4 x float>* %cast, align 8
ret <4 x float> %r
}
@@ -37,7 +37,7 @@ define <4 x float> @t3(i8* %p1) {
define <4 x float> @t4(i8* %p1) {
; CHECK: ld.v4
%cast = bitcast i8* %p1 to <4 x float>*
- %r = load <4 x float>* %cast, align 16
+ %r = load <4 x float>, <4 x float>* %cast, align 16
ret <4 x float> %r
}
diff --git a/test/CodeGen/NVPTX/noduplicate-syncthreads.ll b/test/CodeGen/NVPTX/noduplicate-syncthreads.ll
index 841bbc3a517c..2fec31b3791d 100644
--- a/test/CodeGen/NVPTX/noduplicate-syncthreads.ll
+++ b/test/CodeGen/NVPTX/noduplicate-syncthreads.ll
@@ -11,16 +11,16 @@ define void @foo(float* %output) #1 {
entry:
%output.addr = alloca float*, align 8
store float* %output, float** %output.addr, align 8
- %0 = load float** %output.addr, align 8
- %arrayidx = getelementptr inbounds float* %0, i64 0
- %1 = load float* %arrayidx, align 4
+ %0 = load float*, float** %output.addr, align 8
+ %arrayidx = getelementptr inbounds float, float* %0, i64 0
+ %1 = load float, float* %arrayidx, align 4
%conv = fpext float %1 to double
%cmp = fcmp olt double %conv, 1.000000e+01
br i1 %cmp, label %if.then, label %if.else
if.then: ; preds = %entry
- %2 = load float** %output.addr, align 8
- %3 = load float* %2, align 4
+ %2 = load float*, float** %output.addr, align 8
+ %3 = load float, float* %2, align 4
%conv1 = fpext float %3 to double
%add = fadd double %conv1, 1.000000e+00
%conv2 = fptrunc double %add to float
@@ -28,8 +28,8 @@ if.then: ; preds = %entry
br label %if.end
if.else: ; preds = %entry
- %4 = load float** %output.addr, align 8
- %5 = load float* %4, align 4
+ %4 = load float*, float** %output.addr, align 8
+ %5 = load float, float* %4, align 4
%conv3 = fpext float %5 to double
%add4 = fadd double %conv3, 2.000000e+00
%conv5 = fptrunc double %add4 to float
@@ -38,16 +38,16 @@ if.else: ; preds = %entry
if.end: ; preds = %if.else, %if.then
call void @llvm.cuda.syncthreads()
- %6 = load float** %output.addr, align 8
- %arrayidx6 = getelementptr inbounds float* %6, i64 0
- %7 = load float* %arrayidx6, align 4
+ %6 = load float*, float** %output.addr, align 8
+ %arrayidx6 = getelementptr inbounds float, float* %6, i64 0
+ %7 = load float, float* %arrayidx6, align 4
%conv7 = fpext float %7 to double
%cmp8 = fcmp olt double %conv7, 1.000000e+01
br i1 %cmp8, label %if.then9, label %if.else13
if.then9: ; preds = %if.end
- %8 = load float** %output.addr, align 8
- %9 = load float* %8, align 4
+ %8 = load float*, float** %output.addr, align 8
+ %9 = load float, float* %8, align 4
%conv10 = fpext float %9 to double
%add11 = fadd double %conv10, 3.000000e+00
%conv12 = fptrunc double %add11 to float
@@ -55,8 +55,8 @@ if.then9: ; preds = %if.end
br label %if.end17
if.else13: ; preds = %if.end
- %10 = load float** %output.addr, align 8
- %11 = load float* %10, align 4
+ %10 = load float*, float** %output.addr, align 8
+ %11 = load float, float* %10, align 4
%conv14 = fpext float %11 to double
%add15 = fadd double %conv14, 4.000000e+00
%conv16 = fptrunc double %add15 to float
diff --git a/test/CodeGen/NVPTX/nounroll.ll b/test/CodeGen/NVPTX/nounroll.ll
new file mode 100644
index 000000000000..e80a4a21f161
--- /dev/null
+++ b/test/CodeGen/NVPTX/nounroll.ll
@@ -0,0 +1,37 @@
+; RUN: llc < %s -march=nvptx64 -mcpu=sm_20 | FileCheck %s
+
+target datalayout = "e-i64:64-v16:16-v32:32-n16:32:64"
+target triple = "nvptx64-unknown-unknown"
+
+; Compiled from the following CUDA code:
+;
+; #pragma nounroll
+; for (int i = 0; i < 2; ++i)
+; output[i] = input[i];
+define void @nounroll(float* %input, float* %output) {
+; CHECK-LABEL: .visible .func nounroll(
+entry:
+ br label %for.body
+
+for.body:
+; CHECK: .pragma "nounroll"
+ %i.06 = phi i32 [ 0, %entry ], [ %inc, %for.body ]
+ %idxprom = sext i32 %i.06 to i64
+ %arrayidx = getelementptr inbounds float, float* %input, i64 %idxprom
+ %0 = load float, float* %arrayidx, align 4
+; CHECK: ld.f32
+ %arrayidx2 = getelementptr inbounds float, float* %output, i64 %idxprom
+ store float %0, float* %arrayidx2, align 4
+; CHECK: st.f32
+ %inc = add nuw nsw i32 %i.06, 1
+ %exitcond = icmp eq i32 %inc, 2
+ br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !0
+; CHECK-NOT: ld.f32
+; CHECK-NOT: st.f32
+
+for.end:
+ ret void
+}
+
+!0 = distinct !{!0, !1}
+!1 = !{!"llvm.loop.unroll.disable"}
diff --git a/test/CodeGen/NVPTX/nvvm-reflect.ll b/test/CodeGen/NVPTX/nvvm-reflect.ll
index 21e9c69e657a..8c75dfc30a56 100644
--- a/test/CodeGen/NVPTX/nvvm-reflect.ll
+++ b/test/CodeGen/NVPTX/nvvm-reflect.ll
@@ -11,7 +11,7 @@ define float @foo(float %a, float %b) {
; USE_MUL_0-NOT: call i32 @__nvvm_reflect
; USE_MUL_1: define float @foo
; USE_MUL_1-NOT: call i32 @__nvvm_reflect
- %ptr = tail call i8* @llvm.nvvm.ptr.constant.to.gen.p0i8.p4i8(i8 addrspace(4)* getelementptr inbounds ([8 x i8] addrspace(4)* @str, i32 0, i32 0))
+ %ptr = tail call i8* @llvm.nvvm.ptr.constant.to.gen.p0i8.p4i8(i8 addrspace(4)* getelementptr inbounds ([8 x i8], [8 x i8] addrspace(4)* @str, i32 0, i32 0))
%reflect = tail call i32 @__nvvm_reflect(i8* %ptr)
%cmp = icmp ugt i32 %reflect, 0
br i1 %cmp, label %use_mul, label %use_add
@@ -42,7 +42,38 @@ define i32 @intrinsic() {
; USE_MUL_0: ret i32 0
; USE_MUL_1-NOT: call i32 @llvm.nvvm.reflect
; USE_MUL_1: ret i32 1
- %ptr = tail call i8* @llvm.nvvm.ptr.constant.to.gen.p0i8.p4i8(i8 addrspace(4)* getelementptr inbounds ([8 x i8] addrspace(4)* @str, i32 0, i32 0))
+ %ptr = tail call i8* @llvm.nvvm.ptr.constant.to.gen.p0i8.p4i8(i8 addrspace(4)* getelementptr inbounds ([8 x i8], [8 x i8] addrspace(4)* @str, i32 0, i32 0))
%reflect = tail call i32 @llvm.nvvm.reflect.p0i8(i8* %ptr)
ret i32 %reflect
}
+
+; CUDA-7.0 passes __nvvm_reflect argument slightly differently.
+; Verify that it works, too
+
+@"$str" = private addrspace(1) constant [8 x i8] c"USE_MUL\00"
+
+define float @bar(float %a, float %b) {
+; USE_MUL_0: define float @bar
+; USE_MUL_0-NOT: call i32 @__nvvm_reflect
+; USE_MUL_1: define float @bar
+; USE_MUL_1-NOT: call i32 @__nvvm_reflect
+ %reflect = call i32 @__nvvm_reflect(i8* addrspacecast (i8 addrspace(1)* getelementptr inbounds ([8 x i8], [8 x i8] addrspace(1)* @"$str", i32 0, i32 0) to i8*))
+ %cmp = icmp ne i32 %reflect, 0
+ br i1 %cmp, label %use_mul, label %use_add
+
+use_mul:
+; USE_MUL_1: fmul float %a, %b
+; USE_MUL_0-NOT: fadd float %a, %b
+ %ret1 = fmul float %a, %b
+ br label %exit
+
+use_add:
+; USE_MUL_0: fadd float %a, %b
+; USE_MUL_1-NOT: fmul float %a, %b
+ %ret2 = fadd float %a, %b
+ br label %exit
+
+exit:
+ %ret = phi float [%ret1, %use_mul], [%ret2, %use_add]
+ ret float %ret
+}
diff --git a/test/CodeGen/NVPTX/pr13291-i1-store.ll b/test/CodeGen/NVPTX/pr13291-i1-store.ll
index cc67a6fff8e4..d4f7c3bd210a 100644
--- a/test/CodeGen/NVPTX/pr13291-i1-store.ll
+++ b/test/CodeGen/NVPTX/pr13291-i1-store.ll
@@ -19,7 +19,7 @@ define ptx_kernel void @t2(i1* %a, i8* %b) {
; PTX64: and.b16 %rs{{[0-9]+}}, %rs{{[0-9]+}}, 1;
; PTX64: setp.eq.b16 %p{{[0-9]+}}, %rs{{[0-9]+}}, 1;
- %t1 = load i1* %a
+ %t1 = load i1, i1* %a
%t2 = select i1 %t1, i8 1, i8 2
store i8 %t2, i8* %b
ret void
diff --git a/test/CodeGen/NVPTX/pr16278.ll b/test/CodeGen/NVPTX/pr16278.ll
index 5432a848442c..a836eaf2e51f 100644
--- a/test/CodeGen/NVPTX/pr16278.ll
+++ b/test/CodeGen/NVPTX/pr16278.ll
@@ -5,6 +5,6 @@
define float @foo() {
; CHECK: ld.const.f32
- %val = load float addrspace(4)* @one_f
+ %val = load float, float addrspace(4)* @one_f
ret float %val
}
diff --git a/test/CodeGen/NVPTX/pr17529.ll b/test/CodeGen/NVPTX/pr17529.ll
index a16214225674..a7519776f526 100644
--- a/test/CodeGen/NVPTX/pr17529.ll
+++ b/test/CodeGen/NVPTX/pr17529.ll
@@ -11,7 +11,7 @@ entry:
vector.body: ; preds = %vector.body, %entry
%index = phi i64 [ %index.next, %vector.body ], [ 0, %entry ]
- %scevgep9 = getelementptr i8* %dst, i64 %index
+ %scevgep9 = getelementptr i8, i8* %dst, i64 %index
%scevgep910 = bitcast i8* %scevgep9 to <4 x i8>*
store <4 x i8> undef, <4 x i8>* %scevgep910, align 1
%index.next = add i64 %index, 4
@@ -22,13 +22,13 @@ middle.block: ; preds = %vector.body
br i1 undef, label %for.end, label %for.body.preheader1
for.body.preheader1: ; preds = %middle.block
- %scevgep2 = getelementptr i8* %dst, i64 0
+ %scevgep2 = getelementptr i8, i8* %dst, i64 0
br label %for.body
for.body: ; preds = %for.body, %for.body.preheader1
%lsr.iv3 = phi i8* [ %scevgep2, %for.body.preheader1 ], [ %scevgep4, %for.body ]
store i8 undef, i8* %lsr.iv3, align 1
- %scevgep4 = getelementptr i8* %lsr.iv3, i64 1
+ %scevgep4 = getelementptr i8, i8* %lsr.iv3, i64 1
br label %for.body
for.end: ; preds = %middle.block, %entry
diff --git a/test/CodeGen/NVPTX/ptx-version-30.ll b/test/CodeGen/NVPTX/ptx-version-30.ll
deleted file mode 100644
index 0422b01f4ee3..000000000000
--- a/test/CodeGen/NVPTX/ptx-version-30.ll
+++ /dev/null
@@ -1,6 +0,0 @@
-; RUN: llc < %s -march=nvptx -mcpu=sm_20 -mattr=ptx30 | FileCheck %s
-; RUN: llc < %s -march=nvptx64 -mcpu=sm_20 -mattr=ptx30 | FileCheck %s
-
-
-; CHECK: .version 3.0
-
diff --git a/test/CodeGen/NVPTX/ptx-version-31.ll b/test/CodeGen/NVPTX/ptx-version-31.ll
deleted file mode 100644
index d6e57301a371..000000000000
--- a/test/CodeGen/NVPTX/ptx-version-31.ll
+++ /dev/null
@@ -1,6 +0,0 @@
-; RUN: llc < %s -march=nvptx -mcpu=sm_20 -mattr=ptx31 | FileCheck %s
-; RUN: llc < %s -march=nvptx64 -mcpu=sm_20 -mattr=ptx31 | FileCheck %s
-
-
-; CHECK: .version 3.1
-
diff --git a/test/CodeGen/NVPTX/refl1.ll b/test/CodeGen/NVPTX/refl1.ll
index e8782ea3aa27..0432b67535c0 100644
--- a/test/CodeGen/NVPTX/refl1.ll
+++ b/test/CodeGen/NVPTX/refl1.ll
@@ -5,7 +5,7 @@ target triple = "nvptx-nvidia-cuda"
; Function Attrs: nounwind
; CHECK: .entry foo
define void @foo(float* nocapture %a) #0 {
- %val = load float* %a
+ %val = load float, float* %a
%tan = tail call fastcc float @__nv_fast_tanf(float %val)
store float %tan, float* %a
ret void
diff --git a/test/CodeGen/NVPTX/sched1.ll b/test/CodeGen/NVPTX/sched1.ll
index 03ab635e73b9..fb01eb262adc 100644
--- a/test/CodeGen/NVPTX/sched1.ll
+++ b/test/CodeGen/NVPTX/sched1.ll
@@ -11,14 +11,14 @@ define void @foo(i32* %a) {
; CHECK-NEXT: add.s32
; CHECK-NEXT: add.s32
; CHECK-NEXT: add.s32
- %ptr0 = getelementptr i32* %a, i32 0
- %val0 = load i32* %ptr0
- %ptr1 = getelementptr i32* %a, i32 1
- %val1 = load i32* %ptr1
- %ptr2 = getelementptr i32* %a, i32 2
- %val2 = load i32* %ptr2
- %ptr3 = getelementptr i32* %a, i32 3
- %val3 = load i32* %ptr3
+ %ptr0 = getelementptr i32, i32* %a, i32 0
+ %val0 = load i32, i32* %ptr0
+ %ptr1 = getelementptr i32, i32* %a, i32 1
+ %val1 = load i32, i32* %ptr1
+ %ptr2 = getelementptr i32, i32* %a, i32 2
+ %val2 = load i32, i32* %ptr2
+ %ptr3 = getelementptr i32, i32* %a, i32 3
+ %val3 = load i32, i32* %ptr3
%t0 = add i32 %val0, %val1
%t1 = add i32 %t0, %val2
diff --git a/test/CodeGen/NVPTX/sched2.ll b/test/CodeGen/NVPTX/sched2.ll
index 71a9a4963faf..91ed77878f81 100644
--- a/test/CodeGen/NVPTX/sched2.ll
+++ b/test/CodeGen/NVPTX/sched2.ll
@@ -12,14 +12,14 @@ define void @foo(<2 x i32>* %a) {
; CHECK-NEXT: add.s32
; CHECK-NEXT: add.s32
; CHECK-NEXT: add.s32
- %ptr0 = getelementptr <2 x i32>* %a, i32 0
- %val0 = load <2 x i32>* %ptr0
- %ptr1 = getelementptr <2 x i32>* %a, i32 1
- %val1 = load <2 x i32>* %ptr1
- %ptr2 = getelementptr <2 x i32>* %a, i32 2
- %val2 = load <2 x i32>* %ptr2
- %ptr3 = getelementptr <2 x i32>* %a, i32 3
- %val3 = load <2 x i32>* %ptr3
+ %ptr0 = getelementptr <2 x i32>, <2 x i32>* %a, i32 0
+ %val0 = load <2 x i32>, <2 x i32>* %ptr0
+ %ptr1 = getelementptr <2 x i32>, <2 x i32>* %a, i32 1
+ %val1 = load <2 x i32>, <2 x i32>* %ptr1
+ %ptr2 = getelementptr <2 x i32>, <2 x i32>* %a, i32 2
+ %val2 = load <2 x i32>, <2 x i32>* %ptr2
+ %ptr3 = getelementptr <2 x i32>, <2 x i32>* %a, i32 3
+ %val3 = load <2 x i32>, <2 x i32>* %ptr3
%t0 = add <2 x i32> %val0, %val1
%t1 = add <2 x i32> %t0, %val2
diff --git a/test/CodeGen/NVPTX/shift-parts.ll b/test/CodeGen/NVPTX/shift-parts.ll
index 748297caf339..b4d408ff5972 100644
--- a/test/CodeGen/NVPTX/shift-parts.ll
+++ b/test/CodeGen/NVPTX/shift-parts.ll
@@ -12,8 +12,8 @@ define void @shift_parts_left_128(i128* %val, i128* %amtptr) {
; CHECK: setp.gt.s32
; CHECK: selp.b64
; CHECK: shl.b64
- %amt = load i128* %amtptr
- %a = load i128* %val
+ %amt = load i128, i128* %amtptr
+ %a = load i128, i128* %val
%val0 = shl i128 %a, %amt
store i128 %val0, i128* %val
ret void
@@ -30,8 +30,8 @@ define void @shift_parts_right_128(i128* %val, i128* %amtptr) {
; CHECK: setp.gt.s32
; CHECK: selp.b64
; CHECK: shr.s64
- %amt = load i128* %amtptr
- %a = load i128* %val
+ %amt = load i128, i128* %amtptr
+ %a = load i128, i128* %val
%val0 = ashr i128 %a, %amt
store i128 %val0, i128* %val
ret void
diff --git a/test/CodeGen/NVPTX/simple-call.ll b/test/CodeGen/NVPTX/simple-call.ll
index 1b41361cf7ed..da6568685fe6 100644
--- a/test/CodeGen/NVPTX/simple-call.ll
+++ b/test/CodeGen/NVPTX/simple-call.ll
@@ -11,7 +11,7 @@ define float @device_func(float %a) noinline {
; CHECK: .entry kernel_func
define void @kernel_func(float* %a) {
- %val = load float* %a
+ %val = load float, float* %a
; CHECK: call.uni (retval0),
; CHECK: device_func,
%mul = call float @device_func(float %val)
diff --git a/test/CodeGen/NVPTX/sm-version-30.ll b/test/CodeGen/NVPTX/sm-version-30.ll
index 692b49a0d6b3..4f35cf04c63b 100644
--- a/test/CodeGen/NVPTX/sm-version-30.ll
+++ b/test/CodeGen/NVPTX/sm-version-30.ll
@@ -2,5 +2,6 @@
; RUN: llc < %s -march=nvptx64 -mcpu=sm_30 | FileCheck %s
+; CHECK: .version 3.2
; CHECK: .target sm_30
diff --git a/test/CodeGen/NVPTX/sm-version-32.ll b/test/CodeGen/NVPTX/sm-version-32.ll
new file mode 100644
index 000000000000..d6a5082c5267
--- /dev/null
+++ b/test/CodeGen/NVPTX/sm-version-32.ll
@@ -0,0 +1,7 @@
+; RUN: llc < %s -march=nvptx -mcpu=sm_32 | FileCheck %s
+; RUN: llc < %s -march=nvptx64 -mcpu=sm_32 | FileCheck %s
+
+
+; CHECK: .version 4.0
+; CHECK: .target sm_32
+
diff --git a/test/CodeGen/NVPTX/sm-version-35.ll b/test/CodeGen/NVPTX/sm-version-35.ll
index 25368a01335e..8456c666677d 100644
--- a/test/CodeGen/NVPTX/sm-version-35.ll
+++ b/test/CodeGen/NVPTX/sm-version-35.ll
@@ -2,5 +2,6 @@
; RUN: llc < %s -march=nvptx64 -mcpu=sm_35 | FileCheck %s
+; CHECK: .version 3.2
; CHECK: .target sm_35
diff --git a/test/CodeGen/NVPTX/sm-version-37.ll b/test/CodeGen/NVPTX/sm-version-37.ll
new file mode 100644
index 000000000000..fd51a9c7063f
--- /dev/null
+++ b/test/CodeGen/NVPTX/sm-version-37.ll
@@ -0,0 +1,7 @@
+; RUN: llc < %s -march=nvptx -mcpu=sm_37 | FileCheck %s
+; RUN: llc < %s -march=nvptx64 -mcpu=sm_37 | FileCheck %s
+
+
+; CHECK: .version 4.1
+; CHECK: .target sm_37
+
diff --git a/test/CodeGen/NVPTX/sm-version-50.ll b/test/CodeGen/NVPTX/sm-version-50.ll
new file mode 100644
index 000000000000..374c6ea057ae
--- /dev/null
+++ b/test/CodeGen/NVPTX/sm-version-50.ll
@@ -0,0 +1,7 @@
+; RUN: llc < %s -march=nvptx -mcpu=sm_50 | FileCheck %s
+; RUN: llc < %s -march=nvptx64 -mcpu=sm_50 | FileCheck %s
+
+
+; CHECK: .version 4.0
+; CHECK: .target sm_50
+
diff --git a/test/CodeGen/NVPTX/sm-version-52.ll b/test/CodeGen/NVPTX/sm-version-52.ll
new file mode 100644
index 000000000000..18881b2e98cc
--- /dev/null
+++ b/test/CodeGen/NVPTX/sm-version-52.ll
@@ -0,0 +1,7 @@
+; RUN: llc < %s -march=nvptx -mcpu=sm_52 | FileCheck %s
+; RUN: llc < %s -march=nvptx64 -mcpu=sm_52 | FileCheck %s
+
+
+; CHECK: .version 4.1
+; CHECK: .target sm_52
+
diff --git a/test/CodeGen/NVPTX/sm-version-53.ll b/test/CodeGen/NVPTX/sm-version-53.ll
new file mode 100644
index 000000000000..50d2dec11bc5
--- /dev/null
+++ b/test/CodeGen/NVPTX/sm-version-53.ll
@@ -0,0 +1,7 @@
+; RUN: llc < %s -march=nvptx -mcpu=sm_53 | FileCheck %s
+; RUN: llc < %s -march=nvptx64 -mcpu=sm_53 | FileCheck %s
+
+
+; CHECK: .version 4.2
+; CHECK: .target sm_53
+
diff --git a/test/CodeGen/NVPTX/symbol-naming.ll b/test/CodeGen/NVPTX/symbol-naming.ll
index bd1333f1c4e6..0f176934ca39 100644
--- a/test/CodeGen/NVPTX/symbol-naming.ll
+++ b/test/CodeGen/NVPTX/symbol-naming.ll
@@ -24,7 +24,7 @@ target triple = "nvptx64-unknown-unknown"
; Function Attrs: nounwind
define void @foo(i32 %a, float %b, i8 signext %c, i32 %e) {
entry:
- %call = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([13 x i8]* @.str, i32 0, i32 0))
+ %call = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([13 x i8], [13 x i8]* @.str, i32 0, i32 0))
ret void
}
diff --git a/test/CodeGen/NVPTX/vector-compare.ll b/test/CodeGen/NVPTX/vector-compare.ll
index 218049995233..2992b0e62c56 100644
--- a/test/CodeGen/NVPTX/vector-compare.ll
+++ b/test/CodeGen/NVPTX/vector-compare.ll
@@ -6,8 +6,8 @@
; tried to promote <2 x i1> to <2 x i8> and instruction selection failed.
define void @foo(<2 x i32>* %a, <2 x i32>* %b, i32* %r1, i32* %r2) {
- %aval = load <2 x i32>* %a
- %bval = load <2 x i32>* %b
+ %aval = load <2 x i32>, <2 x i32>* %a
+ %bval = load <2 x i32>, <2 x i32>* %b
%res = icmp slt <2 x i32> %aval, %bval
%t1 = extractelement <2 x i1> %res, i32 0
%t2 = extractelement <2 x i1> %res, i32 1
diff --git a/test/CodeGen/NVPTX/vector-loads.ll b/test/CodeGen/NVPTX/vector-loads.ll
index 58882bf16668..d70348942200 100644
--- a/test/CodeGen/NVPTX/vector-loads.ll
+++ b/test/CodeGen/NVPTX/vector-loads.ll
@@ -10,7 +10,7 @@
define void @foo(<2 x float>* %a) {
; CHECK: .func foo
; CHECK: ld.v2.f32 {%f{{[0-9]+}}, %f{{[0-9]+}}}
- %t1 = load <2 x float>* %a
+ %t1 = load <2 x float>, <2 x float>* %a
%t2 = fmul <2 x float> %t1, %t1
store <2 x float> %t2, <2 x float>* %a
ret void
@@ -19,7 +19,7 @@ define void @foo(<2 x float>* %a) {
define void @foo2(<4 x float>* %a) {
; CHECK: .func foo2
; CHECK: ld.v4.f32 {%f{{[0-9]+}}, %f{{[0-9]+}}, %f{{[0-9]+}}, %f{{[0-9]+}}}
- %t1 = load <4 x float>* %a
+ %t1 = load <4 x float>, <4 x float>* %a
%t2 = fmul <4 x float> %t1, %t1
store <4 x float> %t2, <4 x float>* %a
ret void
@@ -29,7 +29,7 @@ define void @foo3(<8 x float>* %a) {
; CHECK: .func foo3
; CHECK: ld.v4.f32 {%f{{[0-9]+}}, %f{{[0-9]+}}, %f{{[0-9]+}}, %f{{[0-9]+}}}
; CHECK-NEXT: ld.v4.f32 {%f{{[0-9]+}}, %f{{[0-9]+}}, %f{{[0-9]+}}, %f{{[0-9]+}}}
- %t1 = load <8 x float>* %a
+ %t1 = load <8 x float>, <8 x float>* %a
%t2 = fmul <8 x float> %t1, %t1
store <8 x float> %t2, <8 x float>* %a
ret void
@@ -40,7 +40,7 @@ define void @foo3(<8 x float>* %a) {
define void @foo4(<2 x i32>* %a) {
; CHECK: .func foo4
; CHECK: ld.v2.u32 {%r{{[0-9]+}}, %r{{[0-9]+}}}
- %t1 = load <2 x i32>* %a
+ %t1 = load <2 x i32>, <2 x i32>* %a
%t2 = mul <2 x i32> %t1, %t1
store <2 x i32> %t2, <2 x i32>* %a
ret void
@@ -49,7 +49,7 @@ define void @foo4(<2 x i32>* %a) {
define void @foo5(<4 x i32>* %a) {
; CHECK: .func foo5
; CHECK: ld.v4.u32 {%r{{[0-9]+}}, %r{{[0-9]+}}, %r{{[0-9]+}}, %r{{[0-9]+}}}
- %t1 = load <4 x i32>* %a
+ %t1 = load <4 x i32>, <4 x i32>* %a
%t2 = mul <4 x i32> %t1, %t1
store <4 x i32> %t2, <4 x i32>* %a
ret void
@@ -59,7 +59,7 @@ define void @foo6(<8 x i32>* %a) {
; CHECK: .func foo6
; CHECK: ld.v4.u32 {%r{{[0-9]+}}, %r{{[0-9]+}}, %r{{[0-9]+}}, %r{{[0-9]+}}}
; CHECK-NEXT: ld.v4.u32 {%r{{[0-9]+}}, %r{{[0-9]+}}, %r{{[0-9]+}}, %r{{[0-9]+}}}
- %t1 = load <8 x i32>* %a
+ %t1 = load <8 x i32>, <8 x i32>* %a
%t2 = mul <8 x i32> %t1, %t1
store <8 x i32> %t2, <8 x i32>* %a
ret void
diff --git a/test/CodeGen/NVPTX/vector-select.ll b/test/CodeGen/NVPTX/vector-select.ll
index 11893df10329..1e81031c685a 100644
--- a/test/CodeGen/NVPTX/vector-select.ll
+++ b/test/CodeGen/NVPTX/vector-select.ll
@@ -6,9 +6,9 @@
define void @foo(<2 x i32> addrspace(1)* %def_a, <2 x i32> addrspace(1)* %def_b, <2 x i32> addrspace(1)* %def_c) {
entry:
- %tmp4 = load <2 x i32> addrspace(1)* %def_a
- %tmp6 = load <2 x i32> addrspace(1)* %def_c
- %tmp8 = load <2 x i32> addrspace(1)* %def_b
+ %tmp4 = load <2 x i32>, <2 x i32> addrspace(1)* %def_a
+ %tmp6 = load <2 x i32>, <2 x i32> addrspace(1)* %def_c
+ %tmp8 = load <2 x i32>, <2 x i32> addrspace(1)* %def_b
%0 = icmp sge <2 x i32> %tmp4, zeroinitializer
%cond = select <2 x i1> %0, <2 x i32> %tmp6, <2 x i32> %tmp8
store <2 x i32> %cond, <2 x i32> addrspace(1)* %def_c
diff --git a/test/CodeGen/NVPTX/weak-global.ll b/test/CodeGen/NVPTX/weak-global.ll
index 2bef4c5228a9..a64f9f48b26f 100644
--- a/test/CodeGen/NVPTX/weak-global.ll
+++ b/test/CodeGen/NVPTX/weak-global.ll
@@ -4,6 +4,6 @@
@g = common addrspace(1) global i32 zeroinitializer
define i32 @func0() {
- %val = load i32 addrspace(1)* @g
+ %val = load i32, i32 addrspace(1)* @g
ret i32 %val
}