diff options
Diffstat (limited to 'test/CodeGen/NVPTX/fast-math.ll')
-rw-r--r-- | test/CodeGen/NVPTX/fast-math.ll | 137 |
1 files changed, 132 insertions, 5 deletions
diff --git a/test/CodeGen/NVPTX/fast-math.ll b/test/CodeGen/NVPTX/fast-math.ll index d0a333d369ca..56b1f88f3b2e 100644 --- a/test/CodeGen/NVPTX/fast-math.ll +++ b/test/CodeGen/NVPTX/fast-math.ll @@ -1,25 +1,91 @@ ; RUN: llc < %s -march=nvptx -mcpu=sm_20 | FileCheck %s -declare float @llvm.nvvm.sqrt.f(float) +declare float @llvm.sqrt.f32(float) +declare double @llvm.sqrt.f64(double) -; CHECK-LABEL: sqrt_div +; CHECK-LABEL: sqrt_div( ; CHECK: sqrt.rn.f32 ; CHECK: div.rn.f32 define float @sqrt_div(float %a, float %b) { - %t1 = tail call float @llvm.nvvm.sqrt.f(float %a) + %t1 = tail call float @llvm.sqrt.f32(float %a) %t2 = fdiv float %t1, %b ret float %t2 } -; CHECK-LABEL: sqrt_div_fast +; CHECK-LABEL: sqrt_div_fast( ; CHECK: sqrt.approx.f32 ; CHECK: div.approx.f32 define float @sqrt_div_fast(float %a, float %b) #0 { - %t1 = tail call float @llvm.nvvm.sqrt.f(float %a) + %t1 = tail call float @llvm.sqrt.f32(float %a) %t2 = fdiv float %t1, %b ret float %t2 } +; CHECK-LABEL: sqrt_div_ftz( +; CHECK: sqrt.rn.ftz.f32 +; CHECK: div.rn.ftz.f32 +define float @sqrt_div_ftz(float %a, float %b) #1 { + %t1 = tail call float @llvm.sqrt.f32(float %a) + %t2 = fdiv float %t1, %b + ret float %t2 +} + +; CHECK-LABEL: sqrt_div_fast_ftz( +; CHECK: sqrt.approx.ftz.f32 +; CHECK: div.approx.ftz.f32 +define float @sqrt_div_fast_ftz(float %a, float %b) #0 #1 { + %t1 = tail call float @llvm.sqrt.f32(float %a) + %t2 = fdiv float %t1, %b + ret float %t2 +} + +; There are no fast-math or ftz versions of sqrt and div for f64. We use +; reciprocal(rsqrt(x)) for sqrt(x), and emit a vanilla divide. + +; CHECK-LABEL: sqrt_div_fast_ftz_f64( +; CHECK: rsqrt.approx.f64 +; CHECK: rcp.approx.ftz.f64 +; CHECK: div.rn.f64 +define double @sqrt_div_fast_ftz_f64(double %a, double %b) #0 #1 { + %t1 = tail call double @llvm.sqrt.f64(double %a) + %t2 = fdiv double %t1, %b + ret double %t2 +} + +; CHECK-LABEL: rsqrt( +; CHECK-NOT: rsqrt.approx +; CHECK: sqrt.rn.f32 +; CHECK-NOT: rsqrt.approx +define float @rsqrt(float %a) { + %b = tail call float @llvm.sqrt.f32(float %a) + %ret = fdiv float 1.0, %b + ret float %ret +} + +; CHECK-LABEL: rsqrt_fast( +; CHECK-NOT: div. +; CHECK-NOT: sqrt. +; CHECK: rsqrt.approx.f32 +; CHECK-NOT: div. +; CHECK-NOT: sqrt. +define float @rsqrt_fast(float %a) #0 { + %b = tail call float @llvm.sqrt.f32(float %a) + %ret = fdiv float 1.0, %b + ret float %ret +} + +; CHECK-LABEL: rsqrt_fast_ftz( +; CHECK-NOT: div. +; CHECK-NOT: sqrt. +; CHECK: rsqrt.approx.ftz.f32 +; CHECK-NOT: div. +; CHECK-NOT: sqrt. +define float @rsqrt_fast_ftz(float %a) #0 #1 { + %b = tail call float @llvm.sqrt.f32(float %a) + %ret = fdiv float 1.0, %b + ret float %ret +} + ; CHECK-LABEL: fadd ; CHECK: add.rn.f32 define float @fadd(float %a, float %b) { @@ -34,5 +100,66 @@ define float @fadd_ftz(float %a, float %b) #1 { ret float %t1 } +declare float @llvm.sin.f32(float) +declare float @llvm.cos.f32(float) + +; CHECK-LABEL: fsin_approx +; CHECK: sin.approx.f32 +define float @fsin_approx(float %a) #0 { + %r = tail call float @llvm.sin.f32(float %a) + ret float %r +} + +; CHECK-LABEL: fcos_approx +; CHECK: cos.approx.f32 +define float @fcos_approx(float %a) #0 { + %r = tail call float @llvm.cos.f32(float %a) + ret float %r +} + +; CHECK-LABEL: repeated_div_recip_allowed +define float @repeated_div_recip_allowed(i1 %pred, float %a, float %b, float %divisor) { +; CHECK: rcp.rn.f32 +; CHECK: mul.rn.f32 +; CHECK: mul.rn.f32 + %x = fdiv arcp float %a, %divisor + %y = fdiv arcp float %b, %divisor + %z = select i1 %pred, float %x, float %y + ret float %z +} + +; CHECK-LABEL: repeated_div_recip_allowed_ftz +define float @repeated_div_recip_allowed_ftz(i1 %pred, float %a, float %b, float %divisor) #1 { +; CHECK: rcp.rn.ftz.f32 +; CHECK: mul.rn.ftz.f32 +; CHECK: mul.rn.ftz.f32 + %x = fdiv arcp float %a, %divisor + %y = fdiv arcp float %b, %divisor + %z = select i1 %pred, float %x, float %y + ret float %z +} + +; CHECK-LABEL: repeated_div_fast +define float @repeated_div_fast(i1 %pred, float %a, float %b, float %divisor) #0 { +; CHECK: rcp.approx.f32 +; CHECK: mul.f32 +; CHECK: mul.f32 + %x = fdiv float %a, %divisor + %y = fdiv float %b, %divisor + %z = select i1 %pred, float %x, float %y + ret float %z +} + +; CHECK-LABEL: repeated_div_fast_ftz +define float @repeated_div_fast_ftz(i1 %pred, float %a, float %b, float %divisor) #0 #1 { +; CHECK: rcp.approx.ftz.f32 +; CHECK: mul.ftz.f32 +; CHECK: mul.ftz.f32 + %x = fdiv float %a, %divisor + %y = fdiv float %b, %divisor + %z = select i1 %pred, float %x, float %y + ret float %z +} + attributes #0 = { "unsafe-fp-math" = "true" } attributes #1 = { "nvptx-f32ftz" = "true" } |