diff options
Diffstat (limited to 'test/Transforms/LICM')
-rw-r--r-- | test/Transforms/LICM/atomics.ll | 148 | ||||
-rw-r--r-- | test/Transforms/LICM/constexpr.ll | 2 | ||||
-rw-r--r-- | test/Transforms/LICM/hoist-bitcast-load.ll | 3 | ||||
-rw-r--r-- | test/Transforms/LICM/hoist-deref-load.ll | 3 | ||||
-rw-r--r-- | test/Transforms/LICM/hoist-fast-fdiv.ll | 34 | ||||
-rw-r--r-- | test/Transforms/LICM/hoist-nounwind.ll | 2 | ||||
-rw-r--r-- | test/Transforms/LICM/hoist-round.ll | 5 | ||||
-rw-r--r-- | test/Transforms/LICM/hoisting.ll | 173 | ||||
-rw-r--r-- | test/Transforms/LICM/loopsink.ll | 1 | ||||
-rw-r--r-- | test/Transforms/LICM/opt-remarks.ll | 2 | ||||
-rw-r--r-- | test/Transforms/LICM/pr32129.ll | 18 | ||||
-rw-r--r-- | test/Transforms/LICM/scalar-promote-unwind.ll | 263 | ||||
-rw-r--r-- | test/Transforms/LICM/scalar-promote.ll (renamed from test/Transforms/LICM/scalar_promote.ll) | 27 | ||||
-rw-r--r-- | test/Transforms/LICM/scalar_promote-unwind.ll | 72 | ||||
-rw-r--r-- | test/Transforms/LICM/sink.ll | 2 | ||||
-rw-r--r-- | test/Transforms/LICM/unrolled-deeply-nested.ll | 76 |
16 files changed, 749 insertions, 82 deletions
diff --git a/test/Transforms/LICM/atomics.ll b/test/Transforms/LICM/atomics.ll index d23cb49c5486..15c461aeca27 100644 --- a/test/Transforms/LICM/atomics.ll +++ b/test/Transforms/LICM/atomics.ll @@ -1,5 +1,5 @@ ; RUN: opt < %s -S -basicaa -licm | FileCheck %s -; RUN: opt -aa-pipeline=basic-aa -passes='lcssa,require<aa>,require<targetir>,require<scalar-evolution>,require<opt-remark-emit>,loop(licm)' < %s -S | FileCheck %s +; RUN: opt -aa-pipeline=basic-aa -passes='require<opt-remark-emit>,loop(licm)' < %s -S | FileCheck %s ; Check that we can hoist unordered loads define i32 @test1(i32* nocapture %y) nounwind uwtable ssp { @@ -60,8 +60,7 @@ end: ; CHECK-NEXT: br label %loop } -; Don't try to "sink" unordered stores yet; it is legal, but the machinery -; isn't there. +; We can sink an unordered store define i32 @test4(i32* nocapture noalias %x, i32* nocapture %y) nounwind uwtable ssp { entry: br label %loop @@ -75,6 +74,149 @@ loop: end: ret i32 %vala ; CHECK-LABEL: define i32 @test4( +; CHECK-LABEL: loop: +; CHECK: load atomic i32, i32* %y monotonic +; CHECK-NOT: store +; CHECK-LABEL: end: +; CHECK-NEXT: %[[LCSSAPHI:.*]] = phi i32 [ %vala +; CHECK: store atomic i32 %[[LCSSAPHI]], i32* %x unordered, align 4 +} + +; We currently don't handle ordered atomics. +define i32 @test5(i32* nocapture noalias %x, i32* nocapture %y) nounwind uwtable ssp { +entry: + br label %loop + +loop: + %vala = load atomic i32, i32* %y monotonic, align 4 + store atomic i32 %vala, i32* %x release, align 4 + %exitcond = icmp ne i32 %vala, 0 + br i1 %exitcond, label %end, label %loop + +end: + ret i32 %vala +; CHECK-LABEL: define i32 @test5( ; CHECK: load atomic i32, i32* %y monotonic ; CHECK-NEXT: store atomic } + +; We currently don't touch volatiles +define i32 @test6(i32* nocapture noalias %x, i32* nocapture %y) nounwind uwtable ssp { +entry: + br label %loop + +loop: + %vala = load atomic i32, i32* %y monotonic, align 4 + store volatile i32 %vala, i32* %x, align 4 + %exitcond = icmp ne i32 %vala, 0 + br i1 %exitcond, label %end, label %loop + +end: + ret i32 %vala +; CHECK-LABEL: define i32 @test6( +; CHECK: load atomic i32, i32* %y monotonic +; CHECK-NEXT: store volatile +} + +; We currently don't touch volatiles +define i32 @test6b(i32* nocapture noalias %x, i32* nocapture %y) nounwind uwtable ssp { +entry: + br label %loop + +loop: + %vala = load atomic i32, i32* %y monotonic, align 4 + store atomic volatile i32 %vala, i32* %x unordered, align 4 + %exitcond = icmp ne i32 %vala, 0 + br i1 %exitcond, label %end, label %loop + +end: + ret i32 %vala +; CHECK-LABEL: define i32 @test6b( +; CHECK: load atomic i32, i32* %y monotonic +; CHECK-NEXT: store atomic volatile +} + +; Mixing unorder atomics and normal loads/stores is +; current unimplemented +define i32 @test7(i32* nocapture noalias %x, i32* nocapture %y) nounwind uwtable ssp { +entry: + br label %loop + +loop: + store i32 5, i32* %x + %vala = load atomic i32, i32* %y monotonic, align 4 + store atomic i32 %vala, i32* %x unordered, align 4 + %exitcond = icmp ne i32 %vala, 0 + br i1 %exitcond, label %end, label %loop + +end: + ret i32 %vala +; CHECK-LABEL: define i32 @test7( +; CHECK: store i32 5, i32* %x +; CHECK-NEXT: load atomic i32, i32* %y +; CHECK-NEXT: store atomic i32 +} + +; Three provably noalias locations - we can sink normal and unordered, but +; not monotonic +define i32 @test7b(i32* nocapture noalias %x, i32* nocapture %y, i32* noalias nocapture %z) nounwind uwtable ssp { +entry: + br label %loop + +loop: + store i32 5, i32* %x + %vala = load atomic i32, i32* %y monotonic, align 4 + store atomic i32 %vala, i32* %z unordered, align 4 + %exitcond = icmp ne i32 %vala, 0 + br i1 %exitcond, label %end, label %loop + +end: + ret i32 %vala +; CHECK-LABEL: define i32 @test7b( +; CHECK: load atomic i32, i32* %y monotonic + +; CHECK-LABEL: end: +; CHECK: store i32 5, i32* %x +; CHECK: store atomic i32 %{{.+}}, i32* %z unordered, align 4 +} + + +define i32 @test8(i32* nocapture noalias %x, i32* nocapture %y) { +entry: + br label %loop + +loop: + %vala = load atomic i32, i32* %y monotonic, align 4 + store atomic i32 %vala, i32* %x unordered, align 4 + fence release + %exitcond = icmp ne i32 %vala, 0 + br i1 %exitcond, label %end, label %loop + +end: + ret i32 %vala +; CHECK-LABEL: define i32 @test8( +; CHECK-LABEL: loop: +; CHECK: load atomic i32, i32* %y monotonic +; CHECK-NEXT: store atomic +; CHECK-NEXT: fence +} + +; Exact semantics of monotonic accesses are a bit vague in the C++ spec, +; for the moment, be conservative and don't touch them. +define i32 @test9(i32* nocapture noalias %x, i32* nocapture %y) { +entry: + br label %loop + +loop: + %vala = load atomic i32, i32* %y monotonic, align 4 + store atomic i32 %vala, i32* %x monotonic, align 4 + %exitcond = icmp ne i32 %vala, 0 + br i1 %exitcond, label %end, label %loop + +end: + ret i32 %vala +; CHECK-LABEL: define i32 @test9( +; CHECK-LABEL: loop: +; CHECK: load atomic i32, i32* %y monotonic +; CHECK-NEXT: store atomic i32 %vala, i32* %x monotonic, align 4 +} diff --git a/test/Transforms/LICM/constexpr.ll b/test/Transforms/LICM/constexpr.ll index 8ffc73513600..488821ac8fd4 100644 --- a/test/Transforms/LICM/constexpr.ll +++ b/test/Transforms/LICM/constexpr.ll @@ -1,5 +1,5 @@ ; RUN: opt < %s -S -basicaa -licm | FileCheck %s -; RUN: opt -aa-pipeline=basic-aa -passes='lcssa,require<aa>,require<targetir>,require<scalar-evolution>,require<opt-remark-emit>,loop(licm)' < %s -S | FileCheck %s +; RUN: opt -aa-pipeline=basic-aa -passes='require<opt-remark-emit>,loop(licm)' < %s -S | FileCheck %s ; This fixes PR22460 target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" diff --git a/test/Transforms/LICM/hoist-bitcast-load.ll b/test/Transforms/LICM/hoist-bitcast-load.ll index 6ef00738820e..956c7283be31 100644 --- a/test/Transforms/LICM/hoist-bitcast-load.ll +++ b/test/Transforms/LICM/hoist-bitcast-load.ll @@ -1,5 +1,6 @@ ; RUN: opt -S -basicaa -licm < %s | FileCheck %s -; RUN: opt -aa-pipeline=basic-aa -passes='loop-simplify,require<aa>,require<targetir>,require<scalar-evolution>,require<opt-remark-emit>,loop(simplify-cfg,licm)' -S < %s | FileCheck %s +; RUN: opt -aa-pipeline=basic-aa -passes='require<opt-remark-emit>,loop(simplify-cfg,licm)' -S < %s | FileCheck %s + target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" target triple = "x86_64-unknown-linux-gnu" diff --git a/test/Transforms/LICM/hoist-deref-load.ll b/test/Transforms/LICM/hoist-deref-load.ll index e67becdeb5e4..b48c9e5c7b14 100644 --- a/test/Transforms/LICM/hoist-deref-load.ll +++ b/test/Transforms/LICM/hoist-deref-load.ll @@ -1,5 +1,6 @@ ; RUN: opt -S -basicaa -licm < %s | FileCheck %s -; RUN: opt -aa-pipeline=basic-aa -passes='loop-simplify,require<aa>,require<targetir>,require<scalar-evolution>,require<opt-remark-emit>,loop(simplify-cfg,licm)' -S < %s | FileCheck %s +; RUN: opt -aa-pipeline=basic-aa -passes='require<opt-remark-emit>,loop(simplify-cfg,licm)' -S < %s | FileCheck %s + target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" target triple = "x86_64-unknown-linux-gnu" diff --git a/test/Transforms/LICM/hoist-fast-fdiv.ll b/test/Transforms/LICM/hoist-fast-fdiv.ll new file mode 100644 index 000000000000..f61564fd726c --- /dev/null +++ b/test/Transforms/LICM/hoist-fast-fdiv.ll @@ -0,0 +1,34 @@ +; RUN: opt -licm -S < %s | FileCheck %s + +; Function Attrs: noinline norecurse nounwind readnone ssp uwtable +define zeroext i1 @f(double %v) #0 { +entry: +; CHECK-LABEL: @f( +; CHECK-NEXT: entry: +; CHECK-NEXT: fdiv fast double 1.000000e+00, %v + br label %loop + +loop: ; preds = %entry, %loop + %v3 = phi i32 [ 0, %entry ], [ %v11, %loop ] + %v4 = phi i32 [ 0, %entry ], [ %v12, %loop ] + %v5 = uitofp i32 %v4 to double + +; CHECK-LABEL: loop: +; CHECK: fmul fast double +; CHECK-NOT: fdiv + %v6 = fdiv fast double %v5, %v + %v7 = fptoui double %v6 to i64 + %v8 = and i64 %v7, 1 + %v9 = xor i64 %v8, 1 + %v10 = trunc i64 %v9 to i32 + %v11 = add i32 %v10, %v3 + %v12 = add nuw i32 %v4, 1 + %v13 = icmp eq i32 %v12, -1 + br i1 %v13, label %end, label %loop + +end: ; preds = %loop + %v15 = phi i32 [ %v11, %loop ] + %v16 = icmp ne i32 %v15, 0 + ret i1 %v16 +} + diff --git a/test/Transforms/LICM/hoist-nounwind.ll b/test/Transforms/LICM/hoist-nounwind.ll index e9720235893a..9fc4903b8302 100644 --- a/test/Transforms/LICM/hoist-nounwind.ll +++ b/test/Transforms/LICM/hoist-nounwind.ll @@ -1,5 +1,5 @@ ; RUN: opt -S -basicaa -licm < %s | FileCheck %s -; RUN: opt -aa-pipeline=basic-aa -passes='lcssa,require<aa>,require<targetir>,require<scalar-evolution>,require<opt-remark-emit>,loop(licm)' -S %s | FileCheck %s +; RUN: opt -aa-pipeline=basic-aa -passes='require<opt-remark-emit>,loop(licm)' -S %s | FileCheck %s target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" target triple = "x86_64-unknown-linux-gnu" diff --git a/test/Transforms/LICM/hoist-round.ll b/test/Transforms/LICM/hoist-round.ll index 9c6a3a180b50..87a7050668de 100644 --- a/test/Transforms/LICM/hoist-round.ll +++ b/test/Transforms/LICM/hoist-round.ll @@ -18,6 +18,7 @@ target datalayout = "E-m:e-p:32:32-i8:8:8-i16:16:16-i64:32:32-f64:32:32-v64:32:3 ; CHECK: call float @llvm.copysign.f32 ; CHECK: call float @llvm.minnum.f32 ; CHECK: call float @llvm.maxnum.f32 +; CHECK: call float @llvm.powi.f32 ; CHECK: for.body: define void @test(float %arg1, float %arg2) { @@ -40,7 +41,8 @@ for.body: %tmp.8 = call float @llvm.copysign.f32(float %tmp.7, float %arg2) %tmp.9 = call float @llvm.minnum.f32(float %tmp.8, float %arg2) %tmp.10 = call float @llvm.maxnum.f32(float %tmp.9, float %arg2) - call void @consume(float %tmp.10) + %tmp.11 = call float @llvm.powi.f32(float %tmp.10, i32 4) + call void @consume(float %tmp.11) %IND.new = add i32 %IND, 1 br label %for.head @@ -60,3 +62,4 @@ declare float @llvm.fabs.f32(float) declare float @llvm.copysign.f32(float, float) declare float @llvm.minnum.f32(float, float) declare float @llvm.maxnum.f32(float, float) +declare float @llvm.powi.f32(float, i32) diff --git a/test/Transforms/LICM/hoisting.ll b/test/Transforms/LICM/hoisting.ll index 29595b3e1cc0..cbd17689e939 100644 --- a/test/Transforms/LICM/hoisting.ll +++ b/test/Transforms/LICM/hoisting.ll @@ -1,5 +1,5 @@ ; RUN: opt < %s -licm -S | FileCheck %s -; RUN: opt -lcssa %s | opt -aa-pipeline=basic-aa -passes='require<aa>,require<targetir>,require<scalar-evolution>,require<opt-remark-emit>,loop(licm)' -S | FileCheck %s +; RUN: opt < %s -aa-pipeline=basic-aa -passes='require<opt-remark-emit>,loop(licm)' -S | FileCheck %s @X = global i32 0 ; <i32*> [#uses=1] @@ -149,3 +149,174 @@ latch: return: ret i32 %sum } + +declare {}* @llvm.invariant.start.p0i8(i64, i8* nocapture) nounwind readonly +declare void @llvm.invariant.end.p0i8({}*, i64, i8* nocapture) nounwind +declare void @escaping.invariant.start({}*) nounwind +; invariant.start dominates the load, and in this scope, the +; load is invariant. So, we can hoist the `addrld` load out of the loop. +define i32 @test_fence(i8* %addr, i32 %n, i8* %volatile) { +; CHECK-LABEL: @test_fence +; CHECK-LABEL: entry +; CHECK: invariant.start +; CHECK: %addrld = load atomic i32, i32* %addr.i unordered, align 8 +; CHECK: br label %loop +entry: + %gep = getelementptr inbounds i8, i8* %addr, i64 8 + %addr.i = bitcast i8* %gep to i32 * + store atomic i32 5, i32 * %addr.i unordered, align 8 + fence release + %invst = call {}* @llvm.invariant.start.p0i8(i64 4, i8* %gep) + br label %loop + +loop: + %indvar = phi i32 [ %indvar.next, %loop ], [ 0, %entry ] + %sum = phi i32 [ %sum.next, %loop ], [ 0, %entry ] + %volload = load atomic i8, i8* %volatile unordered, align 8 + fence acquire + %volchk = icmp eq i8 %volload, 0 + %addrld = load atomic i32, i32* %addr.i unordered, align 8 + %sel = select i1 %volchk, i32 0, i32 %addrld + %sum.next = add i32 %sel, %sum + %indvar.next = add i32 %indvar, 1 + %cond = icmp slt i32 %indvar.next, %n + br i1 %cond, label %loop, label %loopexit + +loopexit: + ret i32 %sum +} + + + +; Same as test above, but the load is no longer invariant (presence of +; invariant.end). We cannot hoist the addrld out of loop. +define i32 @test_fence1(i8* %addr, i32 %n, i8* %volatile) { +; CHECK-LABEL: @test_fence1 +; CHECK-LABEL: entry +; CHECK: invariant.start +; CHECK-NEXT: invariant.end +; CHECK-NEXT: br label %loop +entry: + %gep = getelementptr inbounds i8, i8* %addr, i64 8 + %addr.i = bitcast i8* %gep to i32 * + store atomic i32 5, i32 * %addr.i unordered, align 8 + fence release + %invst = call {}* @llvm.invariant.start.p0i8(i64 4, i8* %gep) + call void @llvm.invariant.end.p0i8({}* %invst, i64 4, i8* %gep) + br label %loop + +loop: + %indvar = phi i32 [ %indvar.next, %loop ], [ 0, %entry ] + %sum = phi i32 [ %sum.next, %loop ], [ 0, %entry ] + %volload = load atomic i8, i8* %volatile unordered, align 8 + fence acquire + %volchk = icmp eq i8 %volload, 0 + %addrld = load atomic i32, i32* %addr.i unordered, align 8 + %sel = select i1 %volchk, i32 0, i32 %addrld + %sum.next = add i32 %sel, %sum + %indvar.next = add i32 %indvar, 1 + %cond = icmp slt i32 %indvar.next, %n + br i1 %cond, label %loop, label %loopexit + +loopexit: + ret i32 %sum +} + +; same as test above, but instead of invariant.end, we have the result of +; invariant.start escaping through a call. We cannot hoist the load. +define i32 @test_fence2(i8* %addr, i32 %n, i8* %volatile) { +; CHECK-LABEL: @test_fence2 +; CHECK-LABEL: entry +; CHECK-NOT: load +; CHECK: br label %loop +entry: + %gep = getelementptr inbounds i8, i8* %addr, i64 8 + %addr.i = bitcast i8* %gep to i32 * + store atomic i32 5, i32 * %addr.i unordered, align 8 + fence release + %invst = call {}* @llvm.invariant.start.p0i8(i64 4, i8* %gep) + call void @escaping.invariant.start({}* %invst) + br label %loop + +loop: + %indvar = phi i32 [ %indvar.next, %loop ], [ 0, %entry ] + %sum = phi i32 [ %sum.next, %loop ], [ 0, %entry ] + %volload = load atomic i8, i8* %volatile unordered, align 8 + fence acquire + %volchk = icmp eq i8 %volload, 0 + %addrld = load atomic i32, i32* %addr.i unordered, align 8 + %sel = select i1 %volchk, i32 0, i32 %addrld + %sum.next = add i32 %sel, %sum + %indvar.next = add i32 %indvar, 1 + %cond = icmp slt i32 %indvar.next, %n + br i1 %cond, label %loop, label %loopexit + +loopexit: + ret i32 %sum +} + +; FIXME: invariant.start dominates the load, and in this scope, the +; load is invariant. So, we can hoist the `addrld` load out of the loop. +; Consider the loadoperand addr.i bitcasted before being passed to +; invariant.start +define i32 @test_fence3(i32* %addr, i32 %n, i8* %volatile) { +; CHECK-LABEL: @test_fence3 +; CHECK-LABEL: entry +; CHECK: invariant.start +; CHECK-NOT: %addrld = load atomic i32, i32* %addr.i unordered, align 8 +; CHECK: br label %loop +entry: + %addr.i = getelementptr inbounds i32, i32* %addr, i64 8 + %gep = bitcast i32* %addr.i to i8 * + store atomic i32 5, i32 * %addr.i unordered, align 8 + fence release + %invst = call {}* @llvm.invariant.start.p0i8(i64 4, i8* %gep) + br label %loop + +loop: + %indvar = phi i32 [ %indvar.next, %loop ], [ 0, %entry ] + %sum = phi i32 [ %sum.next, %loop ], [ 0, %entry ] + %volload = load atomic i8, i8* %volatile unordered, align 8 + fence acquire + %volchk = icmp eq i8 %volload, 0 + %addrld = load atomic i32, i32* %addr.i unordered, align 8 + %sel = select i1 %volchk, i32 0, i32 %addrld + %sum.next = add i32 %sel, %sum + %indvar.next = add i32 %indvar, 1 + %cond = icmp slt i32 %indvar.next, %n + br i1 %cond, label %loop, label %loopexit + +loopexit: + ret i32 %sum +} + +; We should not hoist the addrld out of the loop. +define i32 @test_fence4(i32* %addr, i32 %n, i8* %volatile) { +; CHECK-LABEL: @test_fence4 +; CHECK-LABEL: entry +; CHECK-NOT: %addrld = load atomic i32, i32* %addr.i unordered, align 8 +; CHECK: br label %loop +entry: + %addr.i = getelementptr inbounds i32, i32* %addr, i64 8 + %gep = bitcast i32* %addr.i to i8 * + br label %loop + +loop: + %indvar = phi i32 [ %indvar.next, %loop ], [ 0, %entry ] + %sum = phi i32 [ %sum.next, %loop ], [ 0, %entry ] + store atomic i32 5, i32 * %addr.i unordered, align 8 + fence release + %invst = call {}* @llvm.invariant.start.p0i8(i64 4, i8* %gep) + %volload = load atomic i8, i8* %volatile unordered, align 8 + fence acquire + %volchk = icmp eq i8 %volload, 0 + %addrld = load atomic i32, i32* %addr.i unordered, align 8 + %sel = select i1 %volchk, i32 0, i32 %addrld + %sum.next = add i32 %sel, %sum + %indvar.next = add i32 %indvar, 1 + %cond = icmp slt i32 %indvar.next, %n + br i1 %cond, label %loop, label %loopexit + +loopexit: + ret i32 %sum +} diff --git a/test/Transforms/LICM/loopsink.ll b/test/Transforms/LICM/loopsink.ll index 5004752d1031..b203ea8b51ad 100644 --- a/test/Transforms/LICM/loopsink.ll +++ b/test/Transforms/LICM/loopsink.ll @@ -1,4 +1,5 @@ ; RUN: opt -S -loop-sink < %s | FileCheck %s +; RUN: opt -S -passes=loop-sink < %s | FileCheck %s @g = global i32 0, align 4 diff --git a/test/Transforms/LICM/opt-remarks.ll b/test/Transforms/LICM/opt-remarks.ll index f0ef386c9f9a..b44fc57131a5 100644 --- a/test/Transforms/LICM/opt-remarks.ll +++ b/test/Transforms/LICM/opt-remarks.ll @@ -10,7 +10,7 @@ Loop: %j = phi i32 [ 0, %Entry ], [ %Next, %Loop ] %addr = getelementptr i32, i32* %array, i32 %j %a = load i32, i32* %addr -; CHECK: remark: /tmp/kk.c:2:20: hosting load +; CHECK: remark: /tmp/kk.c:2:20: hoisting load %b = load i32, i32* %p, !dbg !8 %a2 = add i32 %a, %b store i32 %a2, i32* %addr diff --git a/test/Transforms/LICM/pr32129.ll b/test/Transforms/LICM/pr32129.ll new file mode 100644 index 000000000000..2618afe46322 --- /dev/null +++ b/test/Transforms/LICM/pr32129.ll @@ -0,0 +1,18 @@ +; RUN: opt -S -licm -loop-unswitch -licm < %s | FileCheck %s + +declare void @llvm.experimental.guard(i1, ...) + +define void @test() { +; CHECK-LABEL: @test( +; CHECK-NOT: guard +entry: + br label %header + +header: + br label %loop + +loop: + %0 = icmp ult i32 0, 400 + call void (i1, ...) @llvm.experimental.guard(i1 %0, i32 9) [ "deopt"() ] + br i1 undef, label %header, label %loop +} diff --git a/test/Transforms/LICM/scalar-promote-unwind.ll b/test/Transforms/LICM/scalar-promote-unwind.ll new file mode 100644 index 000000000000..f1f52eed1d4c --- /dev/null +++ b/test/Transforms/LICM/scalar-promote-unwind.ll @@ -0,0 +1,263 @@ +; RUN: opt < %s -basicaa -licm -S | FileCheck %s +; RUN: opt -aa-pipeline=basic-aa -passes='require<aa>,require<targetir>,require<scalar-evolution>,require<opt-remark-emit>,loop(licm)' -S %s | FileCheck %s + +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +; Make sure we don't hoist the store out of the loop; %a would +; have the wrong value if f() unwinds + +define void @test1(i32* nocapture noalias %a, i1 zeroext %y) uwtable { +entry: + br label %for.body + +for.body: + %i.03 = phi i32 [ 0, %entry ], [ %inc, %for.inc ] + %0 = load i32, i32* %a, align 4 + %add = add nsw i32 %0, 1 + store i32 %add, i32* %a, align 4 + br i1 %y, label %if.then, label %for.inc + +; CHECK: define void @test1 +; CHECK: load i32, i32* +; CHECK-NEXT: add +; CHECK-NEXT: store i32 + +if.then: + tail call void @f() + br label %for.inc + +for.inc: + %inc = add nuw nsw i32 %i.03, 1 + %exitcond = icmp eq i32 %inc, 10000 + br i1 %exitcond, label %for.cond.cleanup, label %for.body + +for.cond.cleanup: + ret void +} + +; We can hoist the store out of the loop here; if f() unwinds, +; the lifetime of %a ends. + +define void @test2(i1 zeroext %y) uwtable { +entry: + %a = alloca i32 + br label %for.body + +for.body: + %i.03 = phi i32 [ 0, %entry ], [ %inc, %for.inc ] + %0 = load i32, i32* %a, align 4 + %add = add nsw i32 %0, 1 + store i32 %add, i32* %a, align 4 + br i1 %y, label %if.then, label %for.inc + +if.then: + tail call void @f() + br label %for.inc + +for.inc: + %inc = add nuw nsw i32 %i.03, 1 + %exitcond = icmp eq i32 %inc, 10000 + br i1 %exitcond, label %for.cond.cleanup, label %for.body + +for.cond.cleanup: +; CHECK: define void @test2 +; CHECK: store i32 +; CHECK-NEXT: ret void + ret void +} + +@_ZTIi = external constant i8* + +; In this test, the loop is within a try block. There is an explicit unwind edge out of the loop. +; Make sure this edge is treated as a loop exit, and that the loads and stores are promoted as +; expected +define void @loop_within_tryblock() personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) { +entry: + %a = alloca i32, align 4 + store i32 0, i32* %a, align 4 + br label %for.cond + +for.cond: + %i.0 = phi i32 [ 0, %entry ], [ %inc, %for.inc ] + %cmp = icmp slt i32 %i.0, 1024 + br i1 %cmp, label %for.body, label %for.end + +; CHECK: for.body: +; CHECK-NOT: load +; CHECK-NOT: store +; CHECK: invoke +for.body: + %0 = load i32, i32* %a, align 4 + %add = add nsw i32 %0, 1 + store i32 %add, i32* %a, align 4 + invoke void @boo() + to label %invoke.cont unwind label %lpad + +invoke.cont: + br label %for.inc + +for.inc: + %inc = add nsw i32 %i.0, 1 + br label %for.cond + +; CHECK: lpad: +; CHECK: store +; CHECK: br +lpad: + %1 = landingpad { i8*, i32 } + catch i8* bitcast (i8** @_ZTIi to i8*) + %2 = extractvalue { i8*, i32 } %1, 0 + %3 = extractvalue { i8*, i32 } %1, 1 + br label %catch.dispatch + +catch.dispatch: + %4 = call i32 @llvm.eh.typeid.for(i8* bitcast (i8** @_ZTIi to i8*)) #3 + %matches = icmp eq i32 %3, %4 + br i1 %matches, label %catch, label %eh.resume + +catch: + %5 = call i8* @__cxa_begin_catch(i8* %2) #3 + %6 = bitcast i8* %5 to i32* + %7 = load i32, i32* %6, align 4 + call void @__cxa_end_catch() #3 + br label %try.cont + +try.cont: + ret void + +for.end: + br label %try.cont + +eh.resume: + %lpad.val = insertvalue { i8*, i32 } undef, i8* %2, 0 + %lpad.val3 = insertvalue { i8*, i32 } %lpad.val, i32 %3, 1 + resume { i8*, i32 } %lpad.val3 +} + + +; The malloc'ed memory is not capture and therefore promoted. +define void @malloc_no_capture() #0 personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) { +entry: + %call = call i8* @malloc(i64 4) + %0 = bitcast i8* %call to i32* + br label %for.body + +; CHECK: for.body: +; CHECK-NOT: load +; CHECK-NOT: store +; CHECK: br +for.body: + %i.0 = phi i32 [ 0, %entry ], [ %inc, %for.latch ] + %1 = load i32, i32* %0, align 4 + %add = add nsw i32 %1, 1 + store i32 %add, i32* %0, align 4 + br label %for.call + +for.call: + invoke void @boo() + to label %invoke.cont unwind label %lpad + +invoke.cont: + br label %for.latch + +for.latch: + %inc = add i32 %i.0, 1 + %cmp = icmp slt i32 %i.0, 1024 + br i1 %cmp, label %for.body, label %for.end + +for.end: + br label %fun.ret + +lpad: + %2 = landingpad { i8*, i32 } + catch i8* null + %3 = extractvalue { i8*, i32 } %2, 0 + %4 = extractvalue { i8*, i32 } %2, 1 + br label %catch + +catch: + %5 = call i8* @__cxa_begin_catch(i8* %3) #4 + %6 = bitcast i32* %0 to i8* + call void @free(i8* %6) + call void @__cxa_end_catch() + br label %fun.ret + +fun.ret: + ret void +} + +; The malloc'ed memory can be captured and therefore not promoted. +define void @malloc_capture(i32** noalias %A) personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) { +entry: + %call = call i8* @malloc(i64 4) + %0 = bitcast i8* %call to i32* + br label %for.body + +; CHECK: for.body: +; CHECK: load +; CHECK: store +; CHECK: br +for.body: + %i.0 = phi i32 [ 0, %entry ], [ %inc, %for.latch ] + %1 = load i32, i32* %0, align 4 + %add = add nsw i32 %1, 1 + store i32 %add, i32* %0, align 4 + br label %for.call + +for.call: + invoke void @boo_readnone() + to label %invoke.cont unwind label %lpad + +invoke.cont: + br label %for.latch + +for.latch: + store i32* %0, i32** %A + %inc = add i32 %i.0, 1 + %cmp = icmp slt i32 %i.0, 1024 + br i1 %cmp, label %for.body, label %for.end + +for.end: + br label %fun.ret + +lpad: + %2 = landingpad { i8*, i32 } + catch i8* null + %3 = extractvalue { i8*, i32 } %2, 0 + %4 = extractvalue { i8*, i32 } %2, 1 + br label %catch + +catch: + %5 = call i8* @__cxa_begin_catch(i8* %3) #4 + %6 = bitcast i32* %0 to i8* + call void @free(i8* %6) + call void @__cxa_end_catch() + br label %fun.ret + +fun.ret: + ret void +} + +; Function Attrs: nounwind +declare noalias i8* @malloc(i64) + +; Function Attrs: nounwind +declare void @free(i8* nocapture) + +declare void @boo() + +; This is an artifical example, readnone functions by definition cannot unwind +; exceptions by calling the C++ exception throwing methods +; This function should only be used to test malloc_capture. +declare void @boo_readnone() readnone + +declare i32 @__gxx_personality_v0(...) + +declare i8* @__cxa_begin_catch(i8*) + +declare void @__cxa_end_catch() + +declare i32 @llvm.eh.typeid.for(i8*) + +declare void @f() uwtable diff --git a/test/Transforms/LICM/scalar_promote.ll b/test/Transforms/LICM/scalar-promote.ll index c88701154b8f..89888546494f 100644 --- a/test/Transforms/LICM/scalar_promote.ll +++ b/test/Transforms/LICM/scalar-promote.ll @@ -378,6 +378,33 @@ exit: ret i32 %ret } +define void @test10(i32 %i) { +Entry: + br label %Loop +; CHECK-LABEL: @test10( +; CHECK: Entry: +; CHECK-NEXT: load atomic i32, i32* @X unordered, align 4 +; CHECK-NEXT: br label %Loop + + +Loop: ; preds = %Loop, %0 + %j = phi i32 [ 0, %Entry ], [ %Next, %Loop ] ; <i32> [#uses=1] + %x = load atomic i32, i32* @X unordered, align 4 + %x2 = add i32 %x, 1 + store atomic i32 %x2, i32* @X unordered, align 4 + %Next = add i32 %j, 1 + %cond = icmp eq i32 %Next, 0 + br i1 %cond, label %Out, label %Loop + +Out: + ret void +; CHECK: Out: +; CHECK-NEXT: %[[LCSSAPHI:.*]] = phi i32 [ %x2 +; CHECK-NEXT: store atomic i32 %[[LCSSAPHI]], i32* @X unordered, align 4 +; CHECK-NEXT: ret void + +} + !0 = !{!4, !4, i64 0} !1 = !{!"omnipotent char", !2} !2 = !{!"Simple C/C++ TBAA"} diff --git a/test/Transforms/LICM/scalar_promote-unwind.ll b/test/Transforms/LICM/scalar_promote-unwind.ll deleted file mode 100644 index dd3693b4af63..000000000000 --- a/test/Transforms/LICM/scalar_promote-unwind.ll +++ /dev/null @@ -1,72 +0,0 @@ -; RUN: opt < %s -basicaa -licm -S | FileCheck %s -; RUN: opt -aa-pipeline=basic-aa -passes='require<aa>,require<targetir>,require<scalar-evolution>,require<opt-remark-emit>,loop(licm)' -S %s | FileCheck %s - -target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" -target triple = "x86_64-unknown-linux-gnu" - -; Make sure we don't hoist the store out of the loop; %a would -; have the wrong value if f() unwinds - -define void @test1(i32* nocapture noalias %a, i1 zeroext %y) uwtable { -entry: - br label %for.body - -for.body: - %i.03 = phi i32 [ 0, %entry ], [ %inc, %for.inc ] - %0 = load i32, i32* %a, align 4 - %add = add nsw i32 %0, 1 - store i32 %add, i32* %a, align 4 - br i1 %y, label %if.then, label %for.inc - -; CHECK: define void @test1 -; CHECK: load i32, i32* -; CHECK-NEXT: add -; CHECK-NEXT: store i32 - -if.then: - tail call void @f() - br label %for.inc - -for.inc: - %inc = add nuw nsw i32 %i.03, 1 - %exitcond = icmp eq i32 %inc, 10000 - br i1 %exitcond, label %for.cond.cleanup, label %for.body - -for.cond.cleanup: - ret void -} - -; We can hoist the store out of the loop here; if f() unwinds, -; the lifetime of %a ends. - -define void @test2(i1 zeroext %y) uwtable { -entry: - %a = alloca i32 - br label %for.body - -for.body: - %i.03 = phi i32 [ 0, %entry ], [ %inc, %for.inc ] - %0 = load i32, i32* %a, align 4 - %add = add nsw i32 %0, 1 - store i32 %add, i32* %a, align 4 - br i1 %y, label %if.then, label %for.inc - -if.then: - tail call void @f() - br label %for.inc - -for.inc: - %inc = add nuw nsw i32 %i.03, 1 - %exitcond = icmp eq i32 %inc, 10000 - br i1 %exitcond, label %for.cond.cleanup, label %for.body - -for.cond.cleanup: - ret void - -; CHECK: define void @test2 -; CHECK: store i32 -; CHECK-NEXT: ret void - ret void -} - -declare void @f() uwtable diff --git a/test/Transforms/LICM/sink.ll b/test/Transforms/LICM/sink.ll index cf169ddc12a9..70fa6fa13e3e 100644 --- a/test/Transforms/LICM/sink.ll +++ b/test/Transforms/LICM/sink.ll @@ -1,5 +1,7 @@ ; RUN: opt -S -licm < %s | FileCheck %s --check-prefix=CHECK-LICM ; RUN: opt -S -licm < %s | opt -S -loop-sink | FileCheck %s --check-prefix=CHECK-SINK +; RUN: opt -S < %s -passes='require<opt-remark-emit>,loop(licm),loop-sink' \ +; RUN: | FileCheck %s --check-prefix=CHECK-SINK ; Original source code: ; int g; diff --git a/test/Transforms/LICM/unrolled-deeply-nested.ll b/test/Transforms/LICM/unrolled-deeply-nested.ll new file mode 100644 index 000000000000..c0f2c9818000 --- /dev/null +++ b/test/Transforms/LICM/unrolled-deeply-nested.ll @@ -0,0 +1,76 @@ +; Test that LICM correctly detects conflicting accesses to memory in deeply +; nested subloops. This works in the legacy PM due to a special retained map of +; alias information for inner loops, and in the new PM it is recomputed for each +; loop. +; +; RUN: opt -S -aa-pipeline=basic-aa -passes='require<opt-remark-emit>,loop(licm)' < %s | FileCheck %s +; RUN: opt -S -basicaa -licm < %s | FileCheck %s + +define i32 @test(i32* %a, i64 %n.0, i64 %n.0.0, i64 %n.0.0.0, i64 %n.0.0.0.0) nounwind uwtable readonly { +; CHECK-LABEL: define i32 @test +entry: + %b = alloca i32 + %c = alloca i32 + %a.i8 = bitcast i32* %a to i8* + %b.i8 = bitcast i32* %b to i8* + %c.i8 = bitcast i32* %c to i8* + br label %l.0.header +; CHECK: %b = alloca i32 +; CHECK: %c = alloca i32 +; CHECK: %[[AI8:.*]] = bitcast i32* %a to i8* +; CHECK: %[[BI8:.*]] = bitcast i32* %b to i8* +; CHECK: %[[CI8:.*]] = bitcast i32* %c to i8* +; CHECK-NOT: load +; CHECK: br + +l.0.header: + %iv.0 = phi i64 [ %iv.0.next, %l.0.latch ], [ 0, %entry ] + %iv.0.next = add i64 %iv.0, 1 + %exitcond.0 = icmp eq i64 %iv.0.next, %n.0 + %a.val = load i32, i32* %a + store i32 %a.val, i32* %b + %c.val = trunc i64 %iv.0 to i32 + store i32 %c.val, i32* %c + br label %l.0.0.header +; CHECK: %[[AV:.*]] = load i32, i32* %a +; CHECK: store i32 %[[AV]], i32* %b +; CHECK: %[[CT:.*]] = trunc i64 {{.*}} to i32 +; CHECK: store i32 %[[CT]], i32* %c +; CHECK: br + +l.0.0.header: + %iv.0.0 = phi i64 [ %iv.0.0.next, %l.0.0.latch ], [ 0, %l.0.header ] + %iv.0.0.next = add i64 %iv.0.0, 1 + %exitcond.0.0 = icmp eq i64 %iv.0.0.next, %n.0.0 + br label %l.0.0.0.header +; CHECK: br + +l.0.0.0.header: + %iv.0.0.0 = phi i64 [ %iv.0.0.0.next, %l.0.0.0.header ], [ 0, %l.0.0.header ] + %iv.0.0.0.next = add i64 %iv.0.0.0, 1 + %exitcond.0.0.0 = icmp eq i64 %iv.0.0.0.next, %n.0.0.0 + call void @llvm.memcpy.p0i8.p0i8.i64(i8* %a.i8, i8* %c.i8, i64 4, i32 1, i1 false) + call void @llvm.memcpy.p0i8.p0i8.i64(i8* %b.i8, i8* %c.i8, i64 4, i32 1, i1 false) + br i1 %exitcond.0.0.0, label %l.0.0.0.header, label %l.0.0.latch +; CHECK: call void @llvm.memcpy.{{.*}}(i8* %[[AI8]], i8* %[[CI8]], i64 4 +; CHECK: call void @llvm.memcpy.{{.*}}(i8* %[[BI8]], i8* %[[CI8]], i64 4 +; CHECK: br + +l.0.0.latch: + br i1 %exitcond.0.0, label %l.0.0.header, label %l.0.latch +; CHECK: br + +l.0.latch: + %b.val = load i32, i32* %b + br i1 %exitcond.0, label %exit, label %l.0.header +; CHECK: %[[BV:.*]] = load i32, i32* %b +; CHECK: br + +exit: + %result.lcssa = phi i32 [ %b.val, %l.0.latch ] + ret i32 %b.val +; CHECK: %[[LCSSA:.*]] = phi i32 [ %[[BV]], %{{.*}} ] +; CHECK: ret i32 %[[LCSSA]] +} + +declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture writeonly, i8* nocapture readonly, i64, i32, i1) |