aboutsummaryrefslogtreecommitdiff
path: root/test/Transforms/LICM
diff options
context:
space:
mode:
Diffstat (limited to 'test/Transforms/LICM')
-rw-r--r--test/Transforms/LICM/atomics.ll148
-rw-r--r--test/Transforms/LICM/constexpr.ll2
-rw-r--r--test/Transforms/LICM/hoist-bitcast-load.ll3
-rw-r--r--test/Transforms/LICM/hoist-deref-load.ll3
-rw-r--r--test/Transforms/LICM/hoist-fast-fdiv.ll34
-rw-r--r--test/Transforms/LICM/hoist-nounwind.ll2
-rw-r--r--test/Transforms/LICM/hoist-round.ll5
-rw-r--r--test/Transforms/LICM/hoisting.ll173
-rw-r--r--test/Transforms/LICM/loopsink.ll1
-rw-r--r--test/Transforms/LICM/opt-remarks.ll2
-rw-r--r--test/Transforms/LICM/pr32129.ll18
-rw-r--r--test/Transforms/LICM/scalar-promote-unwind.ll263
-rw-r--r--test/Transforms/LICM/scalar-promote.ll (renamed from test/Transforms/LICM/scalar_promote.ll)27
-rw-r--r--test/Transforms/LICM/scalar_promote-unwind.ll72
-rw-r--r--test/Transforms/LICM/sink.ll2
-rw-r--r--test/Transforms/LICM/unrolled-deeply-nested.ll76
16 files changed, 749 insertions, 82 deletions
diff --git a/test/Transforms/LICM/atomics.ll b/test/Transforms/LICM/atomics.ll
index d23cb49c5486..15c461aeca27 100644
--- a/test/Transforms/LICM/atomics.ll
+++ b/test/Transforms/LICM/atomics.ll
@@ -1,5 +1,5 @@
; RUN: opt < %s -S -basicaa -licm | FileCheck %s
-; RUN: opt -aa-pipeline=basic-aa -passes='lcssa,require<aa>,require<targetir>,require<scalar-evolution>,require<opt-remark-emit>,loop(licm)' < %s -S | FileCheck %s
+; RUN: opt -aa-pipeline=basic-aa -passes='require<opt-remark-emit>,loop(licm)' < %s -S | FileCheck %s
; Check that we can hoist unordered loads
define i32 @test1(i32* nocapture %y) nounwind uwtable ssp {
@@ -60,8 +60,7 @@ end:
; CHECK-NEXT: br label %loop
}
-; Don't try to "sink" unordered stores yet; it is legal, but the machinery
-; isn't there.
+; We can sink an unordered store
define i32 @test4(i32* nocapture noalias %x, i32* nocapture %y) nounwind uwtable ssp {
entry:
br label %loop
@@ -75,6 +74,149 @@ loop:
end:
ret i32 %vala
; CHECK-LABEL: define i32 @test4(
+; CHECK-LABEL: loop:
+; CHECK: load atomic i32, i32* %y monotonic
+; CHECK-NOT: store
+; CHECK-LABEL: end:
+; CHECK-NEXT: %[[LCSSAPHI:.*]] = phi i32 [ %vala
+; CHECK: store atomic i32 %[[LCSSAPHI]], i32* %x unordered, align 4
+}
+
+; We currently don't handle ordered atomics.
+define i32 @test5(i32* nocapture noalias %x, i32* nocapture %y) nounwind uwtable ssp {
+entry:
+ br label %loop
+
+loop:
+ %vala = load atomic i32, i32* %y monotonic, align 4
+ store atomic i32 %vala, i32* %x release, align 4
+ %exitcond = icmp ne i32 %vala, 0
+ br i1 %exitcond, label %end, label %loop
+
+end:
+ ret i32 %vala
+; CHECK-LABEL: define i32 @test5(
; CHECK: load atomic i32, i32* %y monotonic
; CHECK-NEXT: store atomic
}
+
+; We currently don't touch volatiles
+define i32 @test6(i32* nocapture noalias %x, i32* nocapture %y) nounwind uwtable ssp {
+entry:
+ br label %loop
+
+loop:
+ %vala = load atomic i32, i32* %y monotonic, align 4
+ store volatile i32 %vala, i32* %x, align 4
+ %exitcond = icmp ne i32 %vala, 0
+ br i1 %exitcond, label %end, label %loop
+
+end:
+ ret i32 %vala
+; CHECK-LABEL: define i32 @test6(
+; CHECK: load atomic i32, i32* %y monotonic
+; CHECK-NEXT: store volatile
+}
+
+; We currently don't touch volatiles
+define i32 @test6b(i32* nocapture noalias %x, i32* nocapture %y) nounwind uwtable ssp {
+entry:
+ br label %loop
+
+loop:
+ %vala = load atomic i32, i32* %y monotonic, align 4
+ store atomic volatile i32 %vala, i32* %x unordered, align 4
+ %exitcond = icmp ne i32 %vala, 0
+ br i1 %exitcond, label %end, label %loop
+
+end:
+ ret i32 %vala
+; CHECK-LABEL: define i32 @test6b(
+; CHECK: load atomic i32, i32* %y monotonic
+; CHECK-NEXT: store atomic volatile
+}
+
+; Mixing unorder atomics and normal loads/stores is
+; current unimplemented
+define i32 @test7(i32* nocapture noalias %x, i32* nocapture %y) nounwind uwtable ssp {
+entry:
+ br label %loop
+
+loop:
+ store i32 5, i32* %x
+ %vala = load atomic i32, i32* %y monotonic, align 4
+ store atomic i32 %vala, i32* %x unordered, align 4
+ %exitcond = icmp ne i32 %vala, 0
+ br i1 %exitcond, label %end, label %loop
+
+end:
+ ret i32 %vala
+; CHECK-LABEL: define i32 @test7(
+; CHECK: store i32 5, i32* %x
+; CHECK-NEXT: load atomic i32, i32* %y
+; CHECK-NEXT: store atomic i32
+}
+
+; Three provably noalias locations - we can sink normal and unordered, but
+; not monotonic
+define i32 @test7b(i32* nocapture noalias %x, i32* nocapture %y, i32* noalias nocapture %z) nounwind uwtable ssp {
+entry:
+ br label %loop
+
+loop:
+ store i32 5, i32* %x
+ %vala = load atomic i32, i32* %y monotonic, align 4
+ store atomic i32 %vala, i32* %z unordered, align 4
+ %exitcond = icmp ne i32 %vala, 0
+ br i1 %exitcond, label %end, label %loop
+
+end:
+ ret i32 %vala
+; CHECK-LABEL: define i32 @test7b(
+; CHECK: load atomic i32, i32* %y monotonic
+
+; CHECK-LABEL: end:
+; CHECK: store i32 5, i32* %x
+; CHECK: store atomic i32 %{{.+}}, i32* %z unordered, align 4
+}
+
+
+define i32 @test8(i32* nocapture noalias %x, i32* nocapture %y) {
+entry:
+ br label %loop
+
+loop:
+ %vala = load atomic i32, i32* %y monotonic, align 4
+ store atomic i32 %vala, i32* %x unordered, align 4
+ fence release
+ %exitcond = icmp ne i32 %vala, 0
+ br i1 %exitcond, label %end, label %loop
+
+end:
+ ret i32 %vala
+; CHECK-LABEL: define i32 @test8(
+; CHECK-LABEL: loop:
+; CHECK: load atomic i32, i32* %y monotonic
+; CHECK-NEXT: store atomic
+; CHECK-NEXT: fence
+}
+
+; Exact semantics of monotonic accesses are a bit vague in the C++ spec,
+; for the moment, be conservative and don't touch them.
+define i32 @test9(i32* nocapture noalias %x, i32* nocapture %y) {
+entry:
+ br label %loop
+
+loop:
+ %vala = load atomic i32, i32* %y monotonic, align 4
+ store atomic i32 %vala, i32* %x monotonic, align 4
+ %exitcond = icmp ne i32 %vala, 0
+ br i1 %exitcond, label %end, label %loop
+
+end:
+ ret i32 %vala
+; CHECK-LABEL: define i32 @test9(
+; CHECK-LABEL: loop:
+; CHECK: load atomic i32, i32* %y monotonic
+; CHECK-NEXT: store atomic i32 %vala, i32* %x monotonic, align 4
+}
diff --git a/test/Transforms/LICM/constexpr.ll b/test/Transforms/LICM/constexpr.ll
index 8ffc73513600..488821ac8fd4 100644
--- a/test/Transforms/LICM/constexpr.ll
+++ b/test/Transforms/LICM/constexpr.ll
@@ -1,5 +1,5 @@
; RUN: opt < %s -S -basicaa -licm | FileCheck %s
-; RUN: opt -aa-pipeline=basic-aa -passes='lcssa,require<aa>,require<targetir>,require<scalar-evolution>,require<opt-remark-emit>,loop(licm)' < %s -S | FileCheck %s
+; RUN: opt -aa-pipeline=basic-aa -passes='require<opt-remark-emit>,loop(licm)' < %s -S | FileCheck %s
; This fixes PR22460
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
diff --git a/test/Transforms/LICM/hoist-bitcast-load.ll b/test/Transforms/LICM/hoist-bitcast-load.ll
index 6ef00738820e..956c7283be31 100644
--- a/test/Transforms/LICM/hoist-bitcast-load.ll
+++ b/test/Transforms/LICM/hoist-bitcast-load.ll
@@ -1,5 +1,6 @@
; RUN: opt -S -basicaa -licm < %s | FileCheck %s
-; RUN: opt -aa-pipeline=basic-aa -passes='loop-simplify,require<aa>,require<targetir>,require<scalar-evolution>,require<opt-remark-emit>,loop(simplify-cfg,licm)' -S < %s | FileCheck %s
+; RUN: opt -aa-pipeline=basic-aa -passes='require<opt-remark-emit>,loop(simplify-cfg,licm)' -S < %s | FileCheck %s
+
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
target triple = "x86_64-unknown-linux-gnu"
diff --git a/test/Transforms/LICM/hoist-deref-load.ll b/test/Transforms/LICM/hoist-deref-load.ll
index e67becdeb5e4..b48c9e5c7b14 100644
--- a/test/Transforms/LICM/hoist-deref-load.ll
+++ b/test/Transforms/LICM/hoist-deref-load.ll
@@ -1,5 +1,6 @@
; RUN: opt -S -basicaa -licm < %s | FileCheck %s
-; RUN: opt -aa-pipeline=basic-aa -passes='loop-simplify,require<aa>,require<targetir>,require<scalar-evolution>,require<opt-remark-emit>,loop(simplify-cfg,licm)' -S < %s | FileCheck %s
+; RUN: opt -aa-pipeline=basic-aa -passes='require<opt-remark-emit>,loop(simplify-cfg,licm)' -S < %s | FileCheck %s
+
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
target triple = "x86_64-unknown-linux-gnu"
diff --git a/test/Transforms/LICM/hoist-fast-fdiv.ll b/test/Transforms/LICM/hoist-fast-fdiv.ll
new file mode 100644
index 000000000000..f61564fd726c
--- /dev/null
+++ b/test/Transforms/LICM/hoist-fast-fdiv.ll
@@ -0,0 +1,34 @@
+; RUN: opt -licm -S < %s | FileCheck %s
+
+; Function Attrs: noinline norecurse nounwind readnone ssp uwtable
+define zeroext i1 @f(double %v) #0 {
+entry:
+; CHECK-LABEL: @f(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: fdiv fast double 1.000000e+00, %v
+ br label %loop
+
+loop: ; preds = %entry, %loop
+ %v3 = phi i32 [ 0, %entry ], [ %v11, %loop ]
+ %v4 = phi i32 [ 0, %entry ], [ %v12, %loop ]
+ %v5 = uitofp i32 %v4 to double
+
+; CHECK-LABEL: loop:
+; CHECK: fmul fast double
+; CHECK-NOT: fdiv
+ %v6 = fdiv fast double %v5, %v
+ %v7 = fptoui double %v6 to i64
+ %v8 = and i64 %v7, 1
+ %v9 = xor i64 %v8, 1
+ %v10 = trunc i64 %v9 to i32
+ %v11 = add i32 %v10, %v3
+ %v12 = add nuw i32 %v4, 1
+ %v13 = icmp eq i32 %v12, -1
+ br i1 %v13, label %end, label %loop
+
+end: ; preds = %loop
+ %v15 = phi i32 [ %v11, %loop ]
+ %v16 = icmp ne i32 %v15, 0
+ ret i1 %v16
+}
+
diff --git a/test/Transforms/LICM/hoist-nounwind.ll b/test/Transforms/LICM/hoist-nounwind.ll
index e9720235893a..9fc4903b8302 100644
--- a/test/Transforms/LICM/hoist-nounwind.ll
+++ b/test/Transforms/LICM/hoist-nounwind.ll
@@ -1,5 +1,5 @@
; RUN: opt -S -basicaa -licm < %s | FileCheck %s
-; RUN: opt -aa-pipeline=basic-aa -passes='lcssa,require<aa>,require<targetir>,require<scalar-evolution>,require<opt-remark-emit>,loop(licm)' -S %s | FileCheck %s
+; RUN: opt -aa-pipeline=basic-aa -passes='require<opt-remark-emit>,loop(licm)' -S %s | FileCheck %s
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
target triple = "x86_64-unknown-linux-gnu"
diff --git a/test/Transforms/LICM/hoist-round.ll b/test/Transforms/LICM/hoist-round.ll
index 9c6a3a180b50..87a7050668de 100644
--- a/test/Transforms/LICM/hoist-round.ll
+++ b/test/Transforms/LICM/hoist-round.ll
@@ -18,6 +18,7 @@ target datalayout = "E-m:e-p:32:32-i8:8:8-i16:16:16-i64:32:32-f64:32:32-v64:32:3
; CHECK: call float @llvm.copysign.f32
; CHECK: call float @llvm.minnum.f32
; CHECK: call float @llvm.maxnum.f32
+; CHECK: call float @llvm.powi.f32
; CHECK: for.body:
define void @test(float %arg1, float %arg2) {
@@ -40,7 +41,8 @@ for.body:
%tmp.8 = call float @llvm.copysign.f32(float %tmp.7, float %arg2)
%tmp.9 = call float @llvm.minnum.f32(float %tmp.8, float %arg2)
%tmp.10 = call float @llvm.maxnum.f32(float %tmp.9, float %arg2)
- call void @consume(float %tmp.10)
+ %tmp.11 = call float @llvm.powi.f32(float %tmp.10, i32 4)
+ call void @consume(float %tmp.11)
%IND.new = add i32 %IND, 1
br label %for.head
@@ -60,3 +62,4 @@ declare float @llvm.fabs.f32(float)
declare float @llvm.copysign.f32(float, float)
declare float @llvm.minnum.f32(float, float)
declare float @llvm.maxnum.f32(float, float)
+declare float @llvm.powi.f32(float, i32)
diff --git a/test/Transforms/LICM/hoisting.ll b/test/Transforms/LICM/hoisting.ll
index 29595b3e1cc0..cbd17689e939 100644
--- a/test/Transforms/LICM/hoisting.ll
+++ b/test/Transforms/LICM/hoisting.ll
@@ -1,5 +1,5 @@
; RUN: opt < %s -licm -S | FileCheck %s
-; RUN: opt -lcssa %s | opt -aa-pipeline=basic-aa -passes='require<aa>,require<targetir>,require<scalar-evolution>,require<opt-remark-emit>,loop(licm)' -S | FileCheck %s
+; RUN: opt < %s -aa-pipeline=basic-aa -passes='require<opt-remark-emit>,loop(licm)' -S | FileCheck %s
@X = global i32 0 ; <i32*> [#uses=1]
@@ -149,3 +149,174 @@ latch:
return:
ret i32 %sum
}
+
+declare {}* @llvm.invariant.start.p0i8(i64, i8* nocapture) nounwind readonly
+declare void @llvm.invariant.end.p0i8({}*, i64, i8* nocapture) nounwind
+declare void @escaping.invariant.start({}*) nounwind
+; invariant.start dominates the load, and in this scope, the
+; load is invariant. So, we can hoist the `addrld` load out of the loop.
+define i32 @test_fence(i8* %addr, i32 %n, i8* %volatile) {
+; CHECK-LABEL: @test_fence
+; CHECK-LABEL: entry
+; CHECK: invariant.start
+; CHECK: %addrld = load atomic i32, i32* %addr.i unordered, align 8
+; CHECK: br label %loop
+entry:
+ %gep = getelementptr inbounds i8, i8* %addr, i64 8
+ %addr.i = bitcast i8* %gep to i32 *
+ store atomic i32 5, i32 * %addr.i unordered, align 8
+ fence release
+ %invst = call {}* @llvm.invariant.start.p0i8(i64 4, i8* %gep)
+ br label %loop
+
+loop:
+ %indvar = phi i32 [ %indvar.next, %loop ], [ 0, %entry ]
+ %sum = phi i32 [ %sum.next, %loop ], [ 0, %entry ]
+ %volload = load atomic i8, i8* %volatile unordered, align 8
+ fence acquire
+ %volchk = icmp eq i8 %volload, 0
+ %addrld = load atomic i32, i32* %addr.i unordered, align 8
+ %sel = select i1 %volchk, i32 0, i32 %addrld
+ %sum.next = add i32 %sel, %sum
+ %indvar.next = add i32 %indvar, 1
+ %cond = icmp slt i32 %indvar.next, %n
+ br i1 %cond, label %loop, label %loopexit
+
+loopexit:
+ ret i32 %sum
+}
+
+
+
+; Same as test above, but the load is no longer invariant (presence of
+; invariant.end). We cannot hoist the addrld out of loop.
+define i32 @test_fence1(i8* %addr, i32 %n, i8* %volatile) {
+; CHECK-LABEL: @test_fence1
+; CHECK-LABEL: entry
+; CHECK: invariant.start
+; CHECK-NEXT: invariant.end
+; CHECK-NEXT: br label %loop
+entry:
+ %gep = getelementptr inbounds i8, i8* %addr, i64 8
+ %addr.i = bitcast i8* %gep to i32 *
+ store atomic i32 5, i32 * %addr.i unordered, align 8
+ fence release
+ %invst = call {}* @llvm.invariant.start.p0i8(i64 4, i8* %gep)
+ call void @llvm.invariant.end.p0i8({}* %invst, i64 4, i8* %gep)
+ br label %loop
+
+loop:
+ %indvar = phi i32 [ %indvar.next, %loop ], [ 0, %entry ]
+ %sum = phi i32 [ %sum.next, %loop ], [ 0, %entry ]
+ %volload = load atomic i8, i8* %volatile unordered, align 8
+ fence acquire
+ %volchk = icmp eq i8 %volload, 0
+ %addrld = load atomic i32, i32* %addr.i unordered, align 8
+ %sel = select i1 %volchk, i32 0, i32 %addrld
+ %sum.next = add i32 %sel, %sum
+ %indvar.next = add i32 %indvar, 1
+ %cond = icmp slt i32 %indvar.next, %n
+ br i1 %cond, label %loop, label %loopexit
+
+loopexit:
+ ret i32 %sum
+}
+
+; same as test above, but instead of invariant.end, we have the result of
+; invariant.start escaping through a call. We cannot hoist the load.
+define i32 @test_fence2(i8* %addr, i32 %n, i8* %volatile) {
+; CHECK-LABEL: @test_fence2
+; CHECK-LABEL: entry
+; CHECK-NOT: load
+; CHECK: br label %loop
+entry:
+ %gep = getelementptr inbounds i8, i8* %addr, i64 8
+ %addr.i = bitcast i8* %gep to i32 *
+ store atomic i32 5, i32 * %addr.i unordered, align 8
+ fence release
+ %invst = call {}* @llvm.invariant.start.p0i8(i64 4, i8* %gep)
+ call void @escaping.invariant.start({}* %invst)
+ br label %loop
+
+loop:
+ %indvar = phi i32 [ %indvar.next, %loop ], [ 0, %entry ]
+ %sum = phi i32 [ %sum.next, %loop ], [ 0, %entry ]
+ %volload = load atomic i8, i8* %volatile unordered, align 8
+ fence acquire
+ %volchk = icmp eq i8 %volload, 0
+ %addrld = load atomic i32, i32* %addr.i unordered, align 8
+ %sel = select i1 %volchk, i32 0, i32 %addrld
+ %sum.next = add i32 %sel, %sum
+ %indvar.next = add i32 %indvar, 1
+ %cond = icmp slt i32 %indvar.next, %n
+ br i1 %cond, label %loop, label %loopexit
+
+loopexit:
+ ret i32 %sum
+}
+
+; FIXME: invariant.start dominates the load, and in this scope, the
+; load is invariant. So, we can hoist the `addrld` load out of the loop.
+; Consider the loadoperand addr.i bitcasted before being passed to
+; invariant.start
+define i32 @test_fence3(i32* %addr, i32 %n, i8* %volatile) {
+; CHECK-LABEL: @test_fence3
+; CHECK-LABEL: entry
+; CHECK: invariant.start
+; CHECK-NOT: %addrld = load atomic i32, i32* %addr.i unordered, align 8
+; CHECK: br label %loop
+entry:
+ %addr.i = getelementptr inbounds i32, i32* %addr, i64 8
+ %gep = bitcast i32* %addr.i to i8 *
+ store atomic i32 5, i32 * %addr.i unordered, align 8
+ fence release
+ %invst = call {}* @llvm.invariant.start.p0i8(i64 4, i8* %gep)
+ br label %loop
+
+loop:
+ %indvar = phi i32 [ %indvar.next, %loop ], [ 0, %entry ]
+ %sum = phi i32 [ %sum.next, %loop ], [ 0, %entry ]
+ %volload = load atomic i8, i8* %volatile unordered, align 8
+ fence acquire
+ %volchk = icmp eq i8 %volload, 0
+ %addrld = load atomic i32, i32* %addr.i unordered, align 8
+ %sel = select i1 %volchk, i32 0, i32 %addrld
+ %sum.next = add i32 %sel, %sum
+ %indvar.next = add i32 %indvar, 1
+ %cond = icmp slt i32 %indvar.next, %n
+ br i1 %cond, label %loop, label %loopexit
+
+loopexit:
+ ret i32 %sum
+}
+
+; We should not hoist the addrld out of the loop.
+define i32 @test_fence4(i32* %addr, i32 %n, i8* %volatile) {
+; CHECK-LABEL: @test_fence4
+; CHECK-LABEL: entry
+; CHECK-NOT: %addrld = load atomic i32, i32* %addr.i unordered, align 8
+; CHECK: br label %loop
+entry:
+ %addr.i = getelementptr inbounds i32, i32* %addr, i64 8
+ %gep = bitcast i32* %addr.i to i8 *
+ br label %loop
+
+loop:
+ %indvar = phi i32 [ %indvar.next, %loop ], [ 0, %entry ]
+ %sum = phi i32 [ %sum.next, %loop ], [ 0, %entry ]
+ store atomic i32 5, i32 * %addr.i unordered, align 8
+ fence release
+ %invst = call {}* @llvm.invariant.start.p0i8(i64 4, i8* %gep)
+ %volload = load atomic i8, i8* %volatile unordered, align 8
+ fence acquire
+ %volchk = icmp eq i8 %volload, 0
+ %addrld = load atomic i32, i32* %addr.i unordered, align 8
+ %sel = select i1 %volchk, i32 0, i32 %addrld
+ %sum.next = add i32 %sel, %sum
+ %indvar.next = add i32 %indvar, 1
+ %cond = icmp slt i32 %indvar.next, %n
+ br i1 %cond, label %loop, label %loopexit
+
+loopexit:
+ ret i32 %sum
+}
diff --git a/test/Transforms/LICM/loopsink.ll b/test/Transforms/LICM/loopsink.ll
index 5004752d1031..b203ea8b51ad 100644
--- a/test/Transforms/LICM/loopsink.ll
+++ b/test/Transforms/LICM/loopsink.ll
@@ -1,4 +1,5 @@
; RUN: opt -S -loop-sink < %s | FileCheck %s
+; RUN: opt -S -passes=loop-sink < %s | FileCheck %s
@g = global i32 0, align 4
diff --git a/test/Transforms/LICM/opt-remarks.ll b/test/Transforms/LICM/opt-remarks.ll
index f0ef386c9f9a..b44fc57131a5 100644
--- a/test/Transforms/LICM/opt-remarks.ll
+++ b/test/Transforms/LICM/opt-remarks.ll
@@ -10,7 +10,7 @@ Loop:
%j = phi i32 [ 0, %Entry ], [ %Next, %Loop ]
%addr = getelementptr i32, i32* %array, i32 %j
%a = load i32, i32* %addr
-; CHECK: remark: /tmp/kk.c:2:20: hosting load
+; CHECK: remark: /tmp/kk.c:2:20: hoisting load
%b = load i32, i32* %p, !dbg !8
%a2 = add i32 %a, %b
store i32 %a2, i32* %addr
diff --git a/test/Transforms/LICM/pr32129.ll b/test/Transforms/LICM/pr32129.ll
new file mode 100644
index 000000000000..2618afe46322
--- /dev/null
+++ b/test/Transforms/LICM/pr32129.ll
@@ -0,0 +1,18 @@
+; RUN: opt -S -licm -loop-unswitch -licm < %s | FileCheck %s
+
+declare void @llvm.experimental.guard(i1, ...)
+
+define void @test() {
+; CHECK-LABEL: @test(
+; CHECK-NOT: guard
+entry:
+ br label %header
+
+header:
+ br label %loop
+
+loop:
+ %0 = icmp ult i32 0, 400
+ call void (i1, ...) @llvm.experimental.guard(i1 %0, i32 9) [ "deopt"() ]
+ br i1 undef, label %header, label %loop
+}
diff --git a/test/Transforms/LICM/scalar-promote-unwind.ll b/test/Transforms/LICM/scalar-promote-unwind.ll
new file mode 100644
index 000000000000..f1f52eed1d4c
--- /dev/null
+++ b/test/Transforms/LICM/scalar-promote-unwind.ll
@@ -0,0 +1,263 @@
+; RUN: opt < %s -basicaa -licm -S | FileCheck %s
+; RUN: opt -aa-pipeline=basic-aa -passes='require<aa>,require<targetir>,require<scalar-evolution>,require<opt-remark-emit>,loop(licm)' -S %s | FileCheck %s
+
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+; Make sure we don't hoist the store out of the loop; %a would
+; have the wrong value if f() unwinds
+
+define void @test1(i32* nocapture noalias %a, i1 zeroext %y) uwtable {
+entry:
+ br label %for.body
+
+for.body:
+ %i.03 = phi i32 [ 0, %entry ], [ %inc, %for.inc ]
+ %0 = load i32, i32* %a, align 4
+ %add = add nsw i32 %0, 1
+ store i32 %add, i32* %a, align 4
+ br i1 %y, label %if.then, label %for.inc
+
+; CHECK: define void @test1
+; CHECK: load i32, i32*
+; CHECK-NEXT: add
+; CHECK-NEXT: store i32
+
+if.then:
+ tail call void @f()
+ br label %for.inc
+
+for.inc:
+ %inc = add nuw nsw i32 %i.03, 1
+ %exitcond = icmp eq i32 %inc, 10000
+ br i1 %exitcond, label %for.cond.cleanup, label %for.body
+
+for.cond.cleanup:
+ ret void
+}
+
+; We can hoist the store out of the loop here; if f() unwinds,
+; the lifetime of %a ends.
+
+define void @test2(i1 zeroext %y) uwtable {
+entry:
+ %a = alloca i32
+ br label %for.body
+
+for.body:
+ %i.03 = phi i32 [ 0, %entry ], [ %inc, %for.inc ]
+ %0 = load i32, i32* %a, align 4
+ %add = add nsw i32 %0, 1
+ store i32 %add, i32* %a, align 4
+ br i1 %y, label %if.then, label %for.inc
+
+if.then:
+ tail call void @f()
+ br label %for.inc
+
+for.inc:
+ %inc = add nuw nsw i32 %i.03, 1
+ %exitcond = icmp eq i32 %inc, 10000
+ br i1 %exitcond, label %for.cond.cleanup, label %for.body
+
+for.cond.cleanup:
+; CHECK: define void @test2
+; CHECK: store i32
+; CHECK-NEXT: ret void
+ ret void
+}
+
+@_ZTIi = external constant i8*
+
+; In this test, the loop is within a try block. There is an explicit unwind edge out of the loop.
+; Make sure this edge is treated as a loop exit, and that the loads and stores are promoted as
+; expected
+define void @loop_within_tryblock() personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) {
+entry:
+ %a = alloca i32, align 4
+ store i32 0, i32* %a, align 4
+ br label %for.cond
+
+for.cond:
+ %i.0 = phi i32 [ 0, %entry ], [ %inc, %for.inc ]
+ %cmp = icmp slt i32 %i.0, 1024
+ br i1 %cmp, label %for.body, label %for.end
+
+; CHECK: for.body:
+; CHECK-NOT: load
+; CHECK-NOT: store
+; CHECK: invoke
+for.body:
+ %0 = load i32, i32* %a, align 4
+ %add = add nsw i32 %0, 1
+ store i32 %add, i32* %a, align 4
+ invoke void @boo()
+ to label %invoke.cont unwind label %lpad
+
+invoke.cont:
+ br label %for.inc
+
+for.inc:
+ %inc = add nsw i32 %i.0, 1
+ br label %for.cond
+
+; CHECK: lpad:
+; CHECK: store
+; CHECK: br
+lpad:
+ %1 = landingpad { i8*, i32 }
+ catch i8* bitcast (i8** @_ZTIi to i8*)
+ %2 = extractvalue { i8*, i32 } %1, 0
+ %3 = extractvalue { i8*, i32 } %1, 1
+ br label %catch.dispatch
+
+catch.dispatch:
+ %4 = call i32 @llvm.eh.typeid.for(i8* bitcast (i8** @_ZTIi to i8*)) #3
+ %matches = icmp eq i32 %3, %4
+ br i1 %matches, label %catch, label %eh.resume
+
+catch:
+ %5 = call i8* @__cxa_begin_catch(i8* %2) #3
+ %6 = bitcast i8* %5 to i32*
+ %7 = load i32, i32* %6, align 4
+ call void @__cxa_end_catch() #3
+ br label %try.cont
+
+try.cont:
+ ret void
+
+for.end:
+ br label %try.cont
+
+eh.resume:
+ %lpad.val = insertvalue { i8*, i32 } undef, i8* %2, 0
+ %lpad.val3 = insertvalue { i8*, i32 } %lpad.val, i32 %3, 1
+ resume { i8*, i32 } %lpad.val3
+}
+
+
+; The malloc'ed memory is not capture and therefore promoted.
+define void @malloc_no_capture() #0 personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) {
+entry:
+ %call = call i8* @malloc(i64 4)
+ %0 = bitcast i8* %call to i32*
+ br label %for.body
+
+; CHECK: for.body:
+; CHECK-NOT: load
+; CHECK-NOT: store
+; CHECK: br
+for.body:
+ %i.0 = phi i32 [ 0, %entry ], [ %inc, %for.latch ]
+ %1 = load i32, i32* %0, align 4
+ %add = add nsw i32 %1, 1
+ store i32 %add, i32* %0, align 4
+ br label %for.call
+
+for.call:
+ invoke void @boo()
+ to label %invoke.cont unwind label %lpad
+
+invoke.cont:
+ br label %for.latch
+
+for.latch:
+ %inc = add i32 %i.0, 1
+ %cmp = icmp slt i32 %i.0, 1024
+ br i1 %cmp, label %for.body, label %for.end
+
+for.end:
+ br label %fun.ret
+
+lpad:
+ %2 = landingpad { i8*, i32 }
+ catch i8* null
+ %3 = extractvalue { i8*, i32 } %2, 0
+ %4 = extractvalue { i8*, i32 } %2, 1
+ br label %catch
+
+catch:
+ %5 = call i8* @__cxa_begin_catch(i8* %3) #4
+ %6 = bitcast i32* %0 to i8*
+ call void @free(i8* %6)
+ call void @__cxa_end_catch()
+ br label %fun.ret
+
+fun.ret:
+ ret void
+}
+
+; The malloc'ed memory can be captured and therefore not promoted.
+define void @malloc_capture(i32** noalias %A) personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) {
+entry:
+ %call = call i8* @malloc(i64 4)
+ %0 = bitcast i8* %call to i32*
+ br label %for.body
+
+; CHECK: for.body:
+; CHECK: load
+; CHECK: store
+; CHECK: br
+for.body:
+ %i.0 = phi i32 [ 0, %entry ], [ %inc, %for.latch ]
+ %1 = load i32, i32* %0, align 4
+ %add = add nsw i32 %1, 1
+ store i32 %add, i32* %0, align 4
+ br label %for.call
+
+for.call:
+ invoke void @boo_readnone()
+ to label %invoke.cont unwind label %lpad
+
+invoke.cont:
+ br label %for.latch
+
+for.latch:
+ store i32* %0, i32** %A
+ %inc = add i32 %i.0, 1
+ %cmp = icmp slt i32 %i.0, 1024
+ br i1 %cmp, label %for.body, label %for.end
+
+for.end:
+ br label %fun.ret
+
+lpad:
+ %2 = landingpad { i8*, i32 }
+ catch i8* null
+ %3 = extractvalue { i8*, i32 } %2, 0
+ %4 = extractvalue { i8*, i32 } %2, 1
+ br label %catch
+
+catch:
+ %5 = call i8* @__cxa_begin_catch(i8* %3) #4
+ %6 = bitcast i32* %0 to i8*
+ call void @free(i8* %6)
+ call void @__cxa_end_catch()
+ br label %fun.ret
+
+fun.ret:
+ ret void
+}
+
+; Function Attrs: nounwind
+declare noalias i8* @malloc(i64)
+
+; Function Attrs: nounwind
+declare void @free(i8* nocapture)
+
+declare void @boo()
+
+; This is an artifical example, readnone functions by definition cannot unwind
+; exceptions by calling the C++ exception throwing methods
+; This function should only be used to test malloc_capture.
+declare void @boo_readnone() readnone
+
+declare i32 @__gxx_personality_v0(...)
+
+declare i8* @__cxa_begin_catch(i8*)
+
+declare void @__cxa_end_catch()
+
+declare i32 @llvm.eh.typeid.for(i8*)
+
+declare void @f() uwtable
diff --git a/test/Transforms/LICM/scalar_promote.ll b/test/Transforms/LICM/scalar-promote.ll
index c88701154b8f..89888546494f 100644
--- a/test/Transforms/LICM/scalar_promote.ll
+++ b/test/Transforms/LICM/scalar-promote.ll
@@ -378,6 +378,33 @@ exit:
ret i32 %ret
}
+define void @test10(i32 %i) {
+Entry:
+ br label %Loop
+; CHECK-LABEL: @test10(
+; CHECK: Entry:
+; CHECK-NEXT: load atomic i32, i32* @X unordered, align 4
+; CHECK-NEXT: br label %Loop
+
+
+Loop: ; preds = %Loop, %0
+ %j = phi i32 [ 0, %Entry ], [ %Next, %Loop ] ; <i32> [#uses=1]
+ %x = load atomic i32, i32* @X unordered, align 4
+ %x2 = add i32 %x, 1
+ store atomic i32 %x2, i32* @X unordered, align 4
+ %Next = add i32 %j, 1
+ %cond = icmp eq i32 %Next, 0
+ br i1 %cond, label %Out, label %Loop
+
+Out:
+ ret void
+; CHECK: Out:
+; CHECK-NEXT: %[[LCSSAPHI:.*]] = phi i32 [ %x2
+; CHECK-NEXT: store atomic i32 %[[LCSSAPHI]], i32* @X unordered, align 4
+; CHECK-NEXT: ret void
+
+}
+
!0 = !{!4, !4, i64 0}
!1 = !{!"omnipotent char", !2}
!2 = !{!"Simple C/C++ TBAA"}
diff --git a/test/Transforms/LICM/scalar_promote-unwind.ll b/test/Transforms/LICM/scalar_promote-unwind.ll
deleted file mode 100644
index dd3693b4af63..000000000000
--- a/test/Transforms/LICM/scalar_promote-unwind.ll
+++ /dev/null
@@ -1,72 +0,0 @@
-; RUN: opt < %s -basicaa -licm -S | FileCheck %s
-; RUN: opt -aa-pipeline=basic-aa -passes='require<aa>,require<targetir>,require<scalar-evolution>,require<opt-remark-emit>,loop(licm)' -S %s | FileCheck %s
-
-target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
-target triple = "x86_64-unknown-linux-gnu"
-
-; Make sure we don't hoist the store out of the loop; %a would
-; have the wrong value if f() unwinds
-
-define void @test1(i32* nocapture noalias %a, i1 zeroext %y) uwtable {
-entry:
- br label %for.body
-
-for.body:
- %i.03 = phi i32 [ 0, %entry ], [ %inc, %for.inc ]
- %0 = load i32, i32* %a, align 4
- %add = add nsw i32 %0, 1
- store i32 %add, i32* %a, align 4
- br i1 %y, label %if.then, label %for.inc
-
-; CHECK: define void @test1
-; CHECK: load i32, i32*
-; CHECK-NEXT: add
-; CHECK-NEXT: store i32
-
-if.then:
- tail call void @f()
- br label %for.inc
-
-for.inc:
- %inc = add nuw nsw i32 %i.03, 1
- %exitcond = icmp eq i32 %inc, 10000
- br i1 %exitcond, label %for.cond.cleanup, label %for.body
-
-for.cond.cleanup:
- ret void
-}
-
-; We can hoist the store out of the loop here; if f() unwinds,
-; the lifetime of %a ends.
-
-define void @test2(i1 zeroext %y) uwtable {
-entry:
- %a = alloca i32
- br label %for.body
-
-for.body:
- %i.03 = phi i32 [ 0, %entry ], [ %inc, %for.inc ]
- %0 = load i32, i32* %a, align 4
- %add = add nsw i32 %0, 1
- store i32 %add, i32* %a, align 4
- br i1 %y, label %if.then, label %for.inc
-
-if.then:
- tail call void @f()
- br label %for.inc
-
-for.inc:
- %inc = add nuw nsw i32 %i.03, 1
- %exitcond = icmp eq i32 %inc, 10000
- br i1 %exitcond, label %for.cond.cleanup, label %for.body
-
-for.cond.cleanup:
- ret void
-
-; CHECK: define void @test2
-; CHECK: store i32
-; CHECK-NEXT: ret void
- ret void
-}
-
-declare void @f() uwtable
diff --git a/test/Transforms/LICM/sink.ll b/test/Transforms/LICM/sink.ll
index cf169ddc12a9..70fa6fa13e3e 100644
--- a/test/Transforms/LICM/sink.ll
+++ b/test/Transforms/LICM/sink.ll
@@ -1,5 +1,7 @@
; RUN: opt -S -licm < %s | FileCheck %s --check-prefix=CHECK-LICM
; RUN: opt -S -licm < %s | opt -S -loop-sink | FileCheck %s --check-prefix=CHECK-SINK
+; RUN: opt -S < %s -passes='require<opt-remark-emit>,loop(licm),loop-sink' \
+; RUN: | FileCheck %s --check-prefix=CHECK-SINK
; Original source code:
; int g;
diff --git a/test/Transforms/LICM/unrolled-deeply-nested.ll b/test/Transforms/LICM/unrolled-deeply-nested.ll
new file mode 100644
index 000000000000..c0f2c9818000
--- /dev/null
+++ b/test/Transforms/LICM/unrolled-deeply-nested.ll
@@ -0,0 +1,76 @@
+; Test that LICM correctly detects conflicting accesses to memory in deeply
+; nested subloops. This works in the legacy PM due to a special retained map of
+; alias information for inner loops, and in the new PM it is recomputed for each
+; loop.
+;
+; RUN: opt -S -aa-pipeline=basic-aa -passes='require<opt-remark-emit>,loop(licm)' < %s | FileCheck %s
+; RUN: opt -S -basicaa -licm < %s | FileCheck %s
+
+define i32 @test(i32* %a, i64 %n.0, i64 %n.0.0, i64 %n.0.0.0, i64 %n.0.0.0.0) nounwind uwtable readonly {
+; CHECK-LABEL: define i32 @test
+entry:
+ %b = alloca i32
+ %c = alloca i32
+ %a.i8 = bitcast i32* %a to i8*
+ %b.i8 = bitcast i32* %b to i8*
+ %c.i8 = bitcast i32* %c to i8*
+ br label %l.0.header
+; CHECK: %b = alloca i32
+; CHECK: %c = alloca i32
+; CHECK: %[[AI8:.*]] = bitcast i32* %a to i8*
+; CHECK: %[[BI8:.*]] = bitcast i32* %b to i8*
+; CHECK: %[[CI8:.*]] = bitcast i32* %c to i8*
+; CHECK-NOT: load
+; CHECK: br
+
+l.0.header:
+ %iv.0 = phi i64 [ %iv.0.next, %l.0.latch ], [ 0, %entry ]
+ %iv.0.next = add i64 %iv.0, 1
+ %exitcond.0 = icmp eq i64 %iv.0.next, %n.0
+ %a.val = load i32, i32* %a
+ store i32 %a.val, i32* %b
+ %c.val = trunc i64 %iv.0 to i32
+ store i32 %c.val, i32* %c
+ br label %l.0.0.header
+; CHECK: %[[AV:.*]] = load i32, i32* %a
+; CHECK: store i32 %[[AV]], i32* %b
+; CHECK: %[[CT:.*]] = trunc i64 {{.*}} to i32
+; CHECK: store i32 %[[CT]], i32* %c
+; CHECK: br
+
+l.0.0.header:
+ %iv.0.0 = phi i64 [ %iv.0.0.next, %l.0.0.latch ], [ 0, %l.0.header ]
+ %iv.0.0.next = add i64 %iv.0.0, 1
+ %exitcond.0.0 = icmp eq i64 %iv.0.0.next, %n.0.0
+ br label %l.0.0.0.header
+; CHECK: br
+
+l.0.0.0.header:
+ %iv.0.0.0 = phi i64 [ %iv.0.0.0.next, %l.0.0.0.header ], [ 0, %l.0.0.header ]
+ %iv.0.0.0.next = add i64 %iv.0.0.0, 1
+ %exitcond.0.0.0 = icmp eq i64 %iv.0.0.0.next, %n.0.0.0
+ call void @llvm.memcpy.p0i8.p0i8.i64(i8* %a.i8, i8* %c.i8, i64 4, i32 1, i1 false)
+ call void @llvm.memcpy.p0i8.p0i8.i64(i8* %b.i8, i8* %c.i8, i64 4, i32 1, i1 false)
+ br i1 %exitcond.0.0.0, label %l.0.0.0.header, label %l.0.0.latch
+; CHECK: call void @llvm.memcpy.{{.*}}(i8* %[[AI8]], i8* %[[CI8]], i64 4
+; CHECK: call void @llvm.memcpy.{{.*}}(i8* %[[BI8]], i8* %[[CI8]], i64 4
+; CHECK: br
+
+l.0.0.latch:
+ br i1 %exitcond.0.0, label %l.0.0.header, label %l.0.latch
+; CHECK: br
+
+l.0.latch:
+ %b.val = load i32, i32* %b
+ br i1 %exitcond.0, label %exit, label %l.0.header
+; CHECK: %[[BV:.*]] = load i32, i32* %b
+; CHECK: br
+
+exit:
+ %result.lcssa = phi i32 [ %b.val, %l.0.latch ]
+ ret i32 %b.val
+; CHECK: %[[LCSSA:.*]] = phi i32 [ %[[BV]], %{{.*}} ]
+; CHECK: ret i32 %[[LCSSA]]
+}
+
+declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture writeonly, i8* nocapture readonly, i64, i32, i1)