aboutsummaryrefslogtreecommitdiff
path: root/test/Transforms/Inline
diff options
context:
space:
mode:
Diffstat (limited to 'test/Transforms/Inline')
-rw-r--r--test/Transforms/Inline/AArch64/gep-cost.ll30
-rw-r--r--test/Transforms/Inline/AArch64/lit.local.cfg2
-rw-r--r--test/Transforms/Inline/alloca-bonus.ll8
-rw-r--r--test/Transforms/Inline/arg-attr-propagation.ll50
-rw-r--r--test/Transforms/Inline/bfi-update.ll93
-rw-r--r--test/Transforms/Inline/cgscc-incremental-invalidate.ll111
-rw-r--r--test/Transforms/Inline/cgscc-invalidate.ll32
-rw-r--r--test/Transforms/Inline/clear-analyses.ll32
-rw-r--r--test/Transforms/Inline/crash-lifetime-marker.ll4
-rw-r--r--test/Transforms/Inline/function-count-update-2.ll33
-rw-r--r--test/Transforms/Inline/function-count-update-3.ll78
-rw-r--r--test/Transforms/Inline/function-count-update.ll50
-rw-r--r--test/Transforms/Inline/inline-cold-callee.ll1
-rw-r--r--test/Transforms/Inline/inline-cold-callsite.ll54
-rw-r--r--test/Transforms/Inline/inline-hot-callsite-2.ll56
-rw-r--r--test/Transforms/Inline/inline-hot-callsite.ll2
-rw-r--r--test/Transforms/Inline/inline_stats.ll3
-rw-r--r--test/Transforms/Inline/internal-scc-members.ll31
-rw-r--r--test/Transforms/Inline/last-call-bonus.ll52
-rw-r--r--test/Transforms/Inline/lifetime-no-datalayout.ll4
-rw-r--r--test/Transforms/Inline/lifetime.ll40
-rw-r--r--test/Transforms/Inline/monster_scc.ll460
-rw-r--r--test/Transforms/Inline/optimization-remarks-with-hotness.ll3
-rw-r--r--test/Transforms/Inline/optimization-remarks.ll3
-rw-r--r--test/Transforms/Inline/prof-update.ll39
25 files changed, 1221 insertions, 50 deletions
diff --git a/test/Transforms/Inline/AArch64/gep-cost.ll b/test/Transforms/Inline/AArch64/gep-cost.ll
new file mode 100644
index 000000000000..204958f082dd
--- /dev/null
+++ b/test/Transforms/Inline/AArch64/gep-cost.ll
@@ -0,0 +1,30 @@
+; REQUIRES: asserts
+; RUN: opt -inline -mtriple=aarch64--linux-gnu -mcpu=kryo -S -debug-only=inline-cost < %s 2>&1 | FileCheck %s
+
+target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128"
+target triple = "aarch64--linux-gnu"
+
+define void @outer([4 x i32]* %ptr, i32 %i) {
+ call void @inner1([4 x i32]* %ptr, i32 %i)
+ call void @inner2([4 x i32]* %ptr, i32 %i)
+ ret void
+}
+; The gep in inner1() is reg+reg, which is a legal addressing mode for AArch64.
+; Thus, both the gep and ret can be simplified.
+; CHECK: Analyzing call of inner1
+; CHECK: NumInstructionsSimplified: 2
+; CHECK: NumInstructions: 2
+define void @inner1([4 x i32]* %ptr, i32 %i) {
+ %G = getelementptr inbounds [4 x i32], [4 x i32]* %ptr, i32 0, i32 %i
+ ret void
+}
+
+; The gep in inner2() is reg+imm+reg, which is not a legal addressing mode for
+; AArch64. Thus, only the ret can be simplified and not the gep.
+; CHECK: Analyzing call of inner2
+; CHECK: NumInstructionsSimplified: 1
+; CHECK: NumInstructions: 2
+define void @inner2([4 x i32]* %ptr, i32 %i) {
+ %G = getelementptr inbounds [4 x i32], [4 x i32]* %ptr, i32 1, i32 %i
+ ret void
+}
diff --git a/test/Transforms/Inline/AArch64/lit.local.cfg b/test/Transforms/Inline/AArch64/lit.local.cfg
new file mode 100644
index 000000000000..7184443994b6
--- /dev/null
+++ b/test/Transforms/Inline/AArch64/lit.local.cfg
@@ -0,0 +1,2 @@
+if not 'AArch64' in config.root.targets:
+ config.unsupported = True
diff --git a/test/Transforms/Inline/alloca-bonus.ll b/test/Transforms/Inline/alloca-bonus.ll
index 542dcee0fcb2..c5c2ce11cc5b 100644
--- a/test/Transforms/Inline/alloca-bonus.ll
+++ b/test/Transforms/Inline/alloca-bonus.ll
@@ -3,7 +3,7 @@
target datalayout = "p:32:32"
-declare void @llvm.lifetime.start(i64 %size, i8* nocapture %ptr)
+declare void @llvm.lifetime.start.p0i8(i64 %size, i8* nocapture %ptr)
@glbl = external global i32
@@ -22,7 +22,7 @@ define void @inner1(i32 *%ptr) {
%D = getelementptr inbounds i32, i32* %ptr, i32 1
%E = bitcast i32* %ptr to i8*
%F = select i1 false, i32* %ptr, i32* @glbl
- call void @llvm.lifetime.start(i64 0, i8* %E)
+ call void @llvm.lifetime.start.p0i8(i64 0, i8* %E)
call void @extern()
ret void
}
@@ -43,7 +43,7 @@ define void @inner2(i32 *%ptr) {
%D = getelementptr inbounds i32, i32* %ptr, i32 %A
%E = bitcast i32* %ptr to i8*
%F = select i1 false, i32* %ptr, i32* @glbl
- call void @llvm.lifetime.start(i64 0, i8* %E)
+ call void @llvm.lifetime.start.p0i8(i64 0, i8* %E)
call void @extern()
ret void
}
@@ -152,7 +152,7 @@ if.then:
%D = getelementptr inbounds i32, i32* %ptr, i32 %A
%E = bitcast i32* %ptr to i8*
%F = select i1 false, i32* %ptr, i32* @glbl
- call void @llvm.lifetime.start(i64 0, i8* %E)
+ call void @llvm.lifetime.start.p0i8(i64 0, i8* %E)
ret void
exit:
diff --git a/test/Transforms/Inline/arg-attr-propagation.ll b/test/Transforms/Inline/arg-attr-propagation.ll
new file mode 100644
index 000000000000..3d18e8047e5b
--- /dev/null
+++ b/test/Transforms/Inline/arg-attr-propagation.ll
@@ -0,0 +1,50 @@
+; RUN: opt -inline -S < %s | FileCheck %s
+
+; The callee guarantees that the pointer argument is nonnull and dereferenceable.
+; That information should transfer to the caller.
+
+define i32 @callee(i32* dereferenceable(32) %t1) {
+; CHECK-LABEL: @callee(i32* dereferenceable(32) %t1)
+; CHECK-NEXT: [[T2:%.*]] = load i32, i32* %t1
+; CHECK-NEXT: ret i32 [[T2]]
+;
+ %t2 = load i32, i32* %t1
+ ret i32 %t2
+}
+
+; FIXME: All dereferenceability information is lost.
+; The caller argument could be known nonnull and dereferenceable(32).
+
+define i32 @caller1(i32* %t1) {
+; CHECK-LABEL: @caller1(i32* %t1)
+; CHECK-NEXT: [[T2_I:%.*]] = load i32, i32* %t1
+; CHECK-NEXT: ret i32 [[T2_I]]
+;
+ %t2 = tail call i32 @callee(i32* dereferenceable(32) %t1)
+ ret i32 %t2
+}
+
+; The caller argument is nonnull, but that can be explicit.
+; The dereferenceable amount could be increased.
+
+define i32 @caller2(i32* dereferenceable(31) %t1) {
+; CHECK-LABEL: @caller2(i32* dereferenceable(31) %t1)
+; CHECK-NEXT: [[T2_I:%.*]] = load i32, i32* %t1
+; CHECK-NEXT: ret i32 [[T2_I]]
+;
+ %t2 = tail call i32 @callee(i32* dereferenceable(32) %t1)
+ ret i32 %t2
+}
+
+; The caller argument is nonnull, but that can be explicit.
+; Make sure that we don't propagate a smaller dereferenceable amount.
+
+define i32 @caller3(i32* dereferenceable(33) %t1) {
+; CHECK-LABEL: @caller3(i32* dereferenceable(33) %t1)
+; CHECK-NEXT: [[T2_I:%.*]] = load i32, i32* %t1
+; CHECK-NEXT: ret i32 [[T2_I]]
+;
+ %t2 = tail call i32 @callee(i32* dereferenceable(32) %t1)
+ ret i32 %t2
+}
+
diff --git a/test/Transforms/Inline/bfi-update.ll b/test/Transforms/Inline/bfi-update.ll
new file mode 100644
index 000000000000..94584e2e6ce5
--- /dev/null
+++ b/test/Transforms/Inline/bfi-update.ll
@@ -0,0 +1,93 @@
+; RUN: opt < %s -passes='require<profile-summary>,cgscc(inline)' -S -inline-threshold=50 -inline-cold-callsite-threshold=0 -hot-callsite-threshold=50 | FileCheck %s
+; This tests incremental updates to caller's BFI as a callee gets inlined.
+; In bottom-up inlining, first c->e inlining is considered and fails because
+; e's size exceeds the threshold of 50. Then a->c inlining is considered and it
+; succeeds. a's BFI is updated incrementally. As c's blocks get pruned, the
+; block with label cond_false is removed and since the remanining code is
+; straight-line a single block gets cloned into a. This block should get the
+; maximum block frequency among the original blocks in c. If it gets the
+; frequency of the block with label cond_true in @c, its frequency will be
+; 1/10th of function a's entry block frequency, resulting in a callsite count of
+; 2 (since a's entry count is 20) which means that a->e callsite will be
+; considered cold and not inlined.
+
+@data = external global i32
+; CHECK-LABEL: define i32 @a(
+define i32 @a(i32 %a1) !prof !21 {
+; CHECK-NOT: call i32 @c
+; CHECK-NOT: call i32 @e
+; CHECK: ret
+entry:
+ %cond = icmp sle i32 %a1, 1
+ %a2 = call i32 @c(i32 1)
+ br label %exit
+exit:
+ ret i32 %a2
+}
+
+declare void @ext();
+
+; CHECK: @c(i32 %c1) !prof [[COUNT1:![0-9]+]]
+define i32 @c(i32 %c1) !prof !23 {
+ call void @ext()
+ %cond = icmp sle i32 %c1, 1
+ br i1 %cond, label %cond_true, label %cond_false, !prof !25
+
+cond_false:
+ br label %exit
+
+cond_true:
+ %c11 = call i32 @e(i32 %c1)
+ br label %exit
+exit:
+ %c12 = phi i32 [ 0, %cond_false], [ %c11, %cond_true ]
+ ret i32 %c12
+}
+
+
+; CHECK: @e(i32 %c1) !prof [[COUNT2:![0-9]+]]
+define i32 @e(i32 %c1) !prof !24 {
+ call void @ext()
+ call void @ext()
+ %cond = icmp sle i32 %c1, 1
+ br i1 %cond, label %cond_true, label %cond_false
+
+cond_false:
+ call void @ext()
+ %c2 = load i32, i32* @data, align 4
+ %c3 = add i32 %c1, %c2
+ %c4 = mul i32 %c3, %c2
+ %c5 = add i32 %c4, %c2
+ %c6 = mul i32 %c5, %c2
+ %c7 = add i32 %c6, %c2
+ %c8 = mul i32 %c7, %c2
+ %c9 = add i32 %c8, %c2
+ %c10 = mul i32 %c9, %c2
+ ret i32 %c10
+
+cond_true:
+ ret i32 0
+}
+
+; CHECK: [[COUNT1]] = !{!"function_entry_count", i64 480}
+; CHECK: [[COUNT2]] = !{!"function_entry_count", i64 80}
+!21 = !{!"function_entry_count", i64 20}
+!23 = !{!"function_entry_count", i64 500}
+!24 = !{!"function_entry_count", i64 100}
+!25 = !{!"branch_weights", i32 1, i32 9}
+
+!llvm.module.flags = !{!1}
+!1 = !{i32 1, !"ProfileSummary", !2}
+!2 = !{!3, !4, !5, !6, !7, !8, !9, !10}
+!3 = !{!"ProfileFormat", !"InstrProf"}
+!4 = !{!"TotalCount", i64 10000}
+!5 = !{!"MaxCount", i64 1000}
+!6 = !{!"MaxInternalCount", i64 1}
+!7 = !{!"MaxFunctionCount", i64 1000}
+!8 = !{!"NumCounts", i64 3}
+!9 = !{!"NumFunctions", i64 3}
+!10 = !{!"DetailedSummary", !11}
+!11 = !{!12, !13, !14}
+!12 = !{i32 10000, i64 1000, i32 1}
+!13 = !{i32 999000, i64 1000, i32 1}
+!14 = !{i32 999999, i64 5, i32 2}
diff --git a/test/Transforms/Inline/cgscc-incremental-invalidate.ll b/test/Transforms/Inline/cgscc-incremental-invalidate.ll
new file mode 100644
index 000000000000..82d321ccf225
--- /dev/null
+++ b/test/Transforms/Inline/cgscc-incremental-invalidate.ll
@@ -0,0 +1,111 @@
+; Test for a subtle bug when computing analyses during inlining and mutating
+; the SCC structure. Without care, this can fail to invalidate analyses.
+;
+; RUN: opt < %s -passes='cgscc(inline,function(verify<domtree>))' -debug-pass-manager -S 2>&1 | FileCheck %s
+
+; First we check that the passes run in the way we expect. Otherwise this test
+; may stop testing anything.
+;
+; CHECK-LABEL: Starting llvm::Module pass manager run.
+; CHECK: Running pass: InlinerPass on (test1_f, test1_g, test1_h)
+; CHECK: Running analysis: FunctionAnalysisManagerCGSCCProxy on (test1_f, test1_g, test1_h)
+; CHECK: Running analysis: DominatorTreeAnalysis on test1_f
+; CHECK: Running analysis: DominatorTreeAnalysis on test1_g
+; CHECK: Invalidating all non-preserved analyses for: (test1_f, test1_g, test1_h)
+; CHECK: Invalidating all non-preserved analyses for: test1_f
+; CHECK: Invalidating analysis: DominatorTreeAnalysis on test1_f
+; CHECK: Invalidating all non-preserved analyses for: test1_g
+; CHECK: Invalidating analysis: DominatorTreeAnalysis on test1_g
+; CHECK: Invalidating all non-preserved analyses for: test1_h
+; CHECK-NOT: Invalidating anaylsis:
+; CHECK: Running analysis: DominatorTreeAnalysis on test1_h
+; CHECK: Invalidating all non-preserved analyses for: (test1_g, test1_h)
+; CHECK: Invalidating all non-preserved analyses for: test1_h
+; CHECK: Invalidating analysis: DominatorTreeAnalysis on test1_h
+
+; An external function used to control branches.
+declare i1 @flag()
+; CHECK-LABEL: declare i1 @flag()
+
+; The utility function with interesting control flow that gets inlined below to
+; perturb the dominator tree.
+define internal void @callee() {
+entry:
+ %ptr = alloca i8
+ %flag = call i1 @flag()
+ br i1 %flag, label %then, label %else
+
+then:
+ store volatile i8 42, i8* %ptr
+ br label %return
+
+else:
+ store volatile i8 -42, i8* %ptr
+ br label %return
+
+return:
+ ret void
+}
+
+; The 'test1_' prefixed functions work to carefully test that incrementally
+; reducing an SCC in the inliner cannot accidentially leave stale function
+; analysis results due to failing to invalidate them for all the functions.
+
+; The inliner visits this last function. It can't actually break any cycles
+; here, but because we visit this function we compute fresh analyses for it.
+; These analyses are then invalidated when we inline callee disrupting the
+; CFG, and it is important that they be freed.
+define void @test1_h() {
+; CHECK-LABEL: define void @test1_h()
+entry:
+ call void @test1_g()
+; CHECK: call void @test1_g()
+
+ ; Pull interesting CFG into this function.
+ call void @callee()
+; CHECK-NOT: call void @callee()
+
+ ret void
+; CHECK: ret void
+}
+
+; We visit this function second and here we inline the edge to 'test1_f'
+; separating it into its own SCC. The current SCC is now just 'test1_g' and
+; 'test1_h'.
+define void @test1_g() {
+; CHECK-LABEL: define void @test1_g()
+entry:
+ ; This edge gets inlined away.
+ call void @test1_f()
+; CHECK-NOT: call void @test1_f()
+; CHECK: call void @test1_g()
+
+ ; We force this edge to survive inlining.
+ call void @test1_h() noinline
+; CHECK: call void @test1_h()
+
+ ; Pull interesting CFG into this function.
+ call void @callee()
+; CHECK-NOT: call void @callee()
+
+ ret void
+; CHECK: ret void
+}
+
+; We visit this function first in the inliner, and while we inline callee
+; perturbing the CFG, we don't inline anything else and the SCC structure
+; remains in tact.
+define void @test1_f() {
+; CHECK-LABEL: define void @test1_f()
+entry:
+ ; We force this edge to survive inlining.
+ call void @test1_g() noinline
+; CHECK: call void @test1_g()
+
+ ; Pull interesting CFG into this function.
+ call void @callee()
+; CHECK-NOT: call void @callee()
+
+ ret void
+; CHECK: ret void
+}
diff --git a/test/Transforms/Inline/cgscc-invalidate.ll b/test/Transforms/Inline/cgscc-invalidate.ll
index 60315cda771d..69d84f65e251 100644
--- a/test/Transforms/Inline/cgscc-invalidate.ll
+++ b/test/Transforms/Inline/cgscc-invalidate.ll
@@ -65,15 +65,15 @@ entry:
; The 'test3_' prefixed functions test the scenario of not inlining preserving
; dominators after splitting an SCC into two smaller SCCs.
-; The first function gets visited first and we end up inlining everything we
-; can into this routine. That splits test3_g into a separate SCC that is enqued
-; for later processing.
-define void @test3_f() {
-; CHECK-LABEL: define void @test3_f()
+; This function ends up split into a separate SCC, which can cause its analyses
+; to become stale if the splitting doesn't properly invalidate things. Also, as
+; a consequence of being split out, test3_f is too large to inline by the time
+; we get here.
+define void @test3_g() {
+; CHECK-LABEL: define void @test3_g()
entry:
- ; Create the first edge in the SCC cycle.
- call void @test3_g()
-; CHECK-NOT: @test3_g()
+ ; Create the second edge in the SCC cycle.
+ call void @test3_f()
; CHECK: call void @test3_f()
; Pull interesting CFG into this function.
@@ -84,15 +84,15 @@ entry:
; CHECK: ret void
}
-; This function ends up split into a separate SCC, which can cause its analyses
-; to become stale if the splitting doesn't properly invalidate things. Also, as
-; a consequence of being split out, test3_f is too large to inline by the time
-; we get here.
-define void @test3_g() {
-; CHECK-LABEL: define void @test3_g()
+; The second function gets visited first and we end up inlining everything we
+; can into this routine. That splits test3_g into a separate SCC that is enqued
+; for later processing.
+define void @test3_f() {
+; CHECK-LABEL: define void @test3_f()
entry:
- ; Create the second edge in the SCC cycle.
- call void @test3_f()
+ ; Create the first edge in the SCC cycle.
+ call void @test3_g()
+; CHECK-NOT: @test3_g()
; CHECK: call void @test3_f()
; Pull interesting CFG into this function.
diff --git a/test/Transforms/Inline/clear-analyses.ll b/test/Transforms/Inline/clear-analyses.ll
new file mode 100644
index 000000000000..4b1d37ca29a9
--- /dev/null
+++ b/test/Transforms/Inline/clear-analyses.ll
@@ -0,0 +1,32 @@
+; Test that when a pass like correlated-propagation populates an analysis such
+; as LVI with references back into the IR of a function that the inliner will
+; delete, this doesn't crash or go awry despite the inliner clearing the analyses
+; separately from when it deletes the function.
+;
+; RUN: opt -debug-pass-manager -S < %s 2>&1 \
+; RUN: -passes='cgscc(inline,function(correlated-propagation))' \
+; RUN: | FileCheck %s
+;
+; CHECK-LABEL: Starting llvm::Module pass manager run.
+; CHECK: Running pass: InlinerPass on (callee)
+; CHECK: Running pass: CorrelatedValuePropagationPass on callee
+; CHECK: Running analysis: LazyValueAnalysis
+; CHECK: Running pass: InlinerPass on (caller)
+; CHECK: Clearing all analysis results for: callee
+; CHECK: Running pass: CorrelatedValuePropagationPass on caller
+; CHECK: Running analysis: LazyValueAnalysis
+
+define internal i32 @callee(i32 %x) {
+; CHECK-NOT: @callee
+entry:
+ ret i32 %x
+}
+
+define i32 @caller(i32 %x) {
+; CHECK-LABEL: define i32 @caller
+entry:
+ %call = call i32 @callee(i32 %x)
+; CHECK-NOT: call
+ ret i32 %call
+; CHECK: ret i32 %x
+}
diff --git a/test/Transforms/Inline/crash-lifetime-marker.ll b/test/Transforms/Inline/crash-lifetime-marker.ll
index e7a594cdb5e4..7196616521e9 100644
--- a/test/Transforms/Inline/crash-lifetime-marker.ll
+++ b/test/Transforms/Inline/crash-lifetime-marker.ll
@@ -15,9 +15,9 @@ define i32 @callee1(i32 %count) {
; CHECK-LABEL: define i32 @caller1(
; CHECK: [[ALLOCA:%[a-z0-9\.]+]] = alloca i8
-; CHECK-NOT: call void @llvm.lifetime.start(
+; CHECK-NOT: call void @llvm.lifetime.start.p0i8(
; CHECK: call i32 @callee2(i8* [[ALLOCA]])
-; CHECK-NOT: call void @llvm.lifetime.end(
+; CHECK-NOT: call void @llvm.lifetime.end.p0i8(
define i32 @caller1(i32 %count) {
%call0 = call i32 @callee1(i32 0)
diff --git a/test/Transforms/Inline/function-count-update-2.ll b/test/Transforms/Inline/function-count-update-2.ll
new file mode 100644
index 000000000000..702fa6292c29
--- /dev/null
+++ b/test/Transforms/Inline/function-count-update-2.ll
@@ -0,0 +1,33 @@
+; RUN: opt < %s -passes='require<profile-summary>,cgscc(inline)' -S | FileCheck %s
+
+; This tests that the function count of a callee gets correctly updated after it
+; has been inlined into a two callsites.
+
+; CHECK: @callee() !prof [[COUNT:![0-9]+]]
+define i32 @callee() !prof !1 {
+ ret i32 0
+}
+
+define i32 @caller1() !prof !2 {
+; CHECK-LABEL: @caller1
+; CHECK-NOT: callee
+; CHECK: ret
+ %i = call i32 @callee()
+ ret i32 %i
+}
+
+define i32 @caller2() !prof !3 {
+; CHECK-LABEL: @caller2
+; CHECK-NOT: callee
+; CHECK: ret
+ %i = call i32 @callee()
+ ret i32 %i
+}
+
+!llvm.module.flags = !{!0}
+; CHECK: [[COUNT]] = !{!"function_entry_count", i64 0}
+!0 = !{i32 1, !"MaxFunctionCount", i32 1000}
+!1 = !{!"function_entry_count", i64 1000}
+!2 = !{!"function_entry_count", i64 600}
+!3 = !{!"function_entry_count", i64 400}
+
diff --git a/test/Transforms/Inline/function-count-update-3.ll b/test/Transforms/Inline/function-count-update-3.ll
new file mode 100644
index 000000000000..215d64175faf
--- /dev/null
+++ b/test/Transforms/Inline/function-count-update-3.ll
@@ -0,0 +1,78 @@
+; RUN: opt < %s -passes='require<profile-summary>,cgscc(inline)' -S -inline-threshold=50 | FileCheck %s
+
+; This tests that the function count of a function gets properly scaled after
+; inlining a call chain leading to the function.
+; Function a calls c with count 200 (C1)
+; Function c calls e with count 250 (C2)
+; Entry count of e is 500 (C3)
+; Entry count of c is 500 (C4)
+; Function b calls c with count 300 (C5)
+; c->e inlining does not happen since the cost exceeds threshold.
+; c then inlined into a.
+; e now gets inlined into a (through c) since the branch condition in e is now
+; known and hence the cost gets reduced.
+; Estimated count of a->e callsite = C2 * (C1 / C4)
+; Estimated count of a->e callsite = 250 * (200 / 500) = 100
+; Remaining count of e = C3 - 100 = 500 - 100 = 400
+; Remaining count of c = C4 - C1 - C5 = 500 - 200 - 300 = 0
+
+@data = external global i32
+
+define i32 @a(i32 %a1) !prof !1 {
+ %a2 = call i32 @c(i32 %a1, i32 1)
+ ret i32 %a2
+}
+
+define i32 @b(i32 %b1) !prof !2 {
+ %b2 = call i32 @c(i32 %b1, i32 %b1)
+ ret i32 %b2
+}
+
+declare void @ext();
+
+; CHECK: @c(i32 %c1, i32 %c100) !prof [[COUNT1:![0-9]+]]
+define i32 @c(i32 %c1, i32 %c100) !prof !3 {
+ call void @ext()
+ %cond = icmp sle i32 %c1, 1
+ br i1 %cond, label %cond_true, label %cond_false
+
+cond_false:
+ ret i32 0
+
+cond_true:
+ %c11 = call i32 @e(i32 %c100)
+ ret i32 %c11
+}
+
+
+; CHECK: @e(i32 %c1) !prof [[COUNT2:![0-9]+]]
+define i32 @e(i32 %c1) !prof !4 {
+ %cond = icmp sle i32 %c1, 1
+ br i1 %cond, label %cond_true, label %cond_false
+
+cond_false:
+ call void @ext()
+ %c2 = load i32, i32* @data, align 4
+ %c3 = add i32 %c1, %c2
+ %c4 = mul i32 %c3, %c2
+ %c5 = add i32 %c4, %c2
+ %c6 = mul i32 %c5, %c2
+ %c7 = add i32 %c6, %c2
+ %c8 = mul i32 %c7, %c2
+ %c9 = add i32 %c8, %c2
+ %c10 = mul i32 %c9, %c2
+ ret i32 %c10
+
+cond_true:
+ ret i32 0
+}
+
+!llvm.module.flags = !{!0}
+; CHECK: [[COUNT1]] = !{!"function_entry_count", i64 0}
+; CHECK: [[COUNT2]] = !{!"function_entry_count", i64 400}
+!0 = !{i32 1, !"MaxFunctionCount", i32 5000}
+!1 = !{!"function_entry_count", i64 200}
+!2 = !{!"function_entry_count", i64 300}
+!3 = !{!"function_entry_count", i64 500}
+!4 = !{!"function_entry_count", i64 500}
+
diff --git a/test/Transforms/Inline/function-count-update.ll b/test/Transforms/Inline/function-count-update.ll
new file mode 100644
index 000000000000..094ad5a2ae67
--- /dev/null
+++ b/test/Transforms/Inline/function-count-update.ll
@@ -0,0 +1,50 @@
+; RUN: opt < %s -passes='require<profile-summary>,cgscc(inline)' -S | FileCheck %s
+
+; This tests that the function count of two callees get correctly updated after
+; they have been inlined into two back-to-back callsites in a single basic block
+; in the caller. The callees have the alwaysinline attribute and so they get
+; inlined both with the regular inliner pass and the always inline pass. In
+; both cases, the new count of each callee is the original count minus callsite
+; count which is 200 (since the caller's entry count is 400 and the block
+; containing the calls have a relative block frequency of 0.5).
+
+; CHECK: @callee1(i32 %n) #0 !prof [[COUNT1:![0-9]+]]
+define i32 @callee1(i32 %n) #0 !prof !1 {
+ %cond = icmp sle i32 %n, 10
+ br i1 %cond, label %cond_true, label %cond_false
+
+cond_true:
+ %r1 = add i32 %n, 1
+ ret i32 %r1
+cond_false:
+ %r2 = add i32 %n, 2
+ ret i32 %r2
+}
+
+; CHECK: @callee2(i32 %n) #0 !prof [[COUNT2:![0-9]+]]
+define i32 @callee2(i32 %n) #0 !prof !2 {
+ %r1 = add i32 %n, 1
+ ret i32 %r1
+}
+
+define i32 @caller(i32 %n) !prof !3 {
+ %cond = icmp sle i32 %n, 100
+ br i1 %cond, label %cond_true, label %cond_false
+
+cond_true:
+ %i = call i32 @callee1(i32 %n)
+ %j = call i32 @callee2(i32 %i)
+ ret i32 %j
+cond_false:
+ ret i32 0
+}
+
+!llvm.module.flags = !{!0}
+; CHECK: [[COUNT1]] = !{!"function_entry_count", i64 800}
+; CHECK: [[COUNT2]] = !{!"function_entry_count", i64 1800}
+!0 = !{i32 1, !"MaxFunctionCount", i32 1000}
+!1 = !{!"function_entry_count", i64 1000}
+!2 = !{!"function_entry_count", i64 2000}
+!3 = !{!"function_entry_count", i64 400}
+attributes #0 = { alwaysinline }
+
diff --git a/test/Transforms/Inline/inline-cold-callee.ll b/test/Transforms/Inline/inline-cold-callee.ll
index 153f446c5c2e..404c537b297f 100644
--- a/test/Transforms/Inline/inline-cold-callee.ll
+++ b/test/Transforms/Inline/inline-cold-callee.ll
@@ -1,5 +1,4 @@
; RUN: opt < %s -inline -inlinecold-threshold=0 -S | FileCheck %s
-; RUN: opt < %s -passes='require<profile-summary>,cgscc(inline)' -inlinecold-threshold=0 -S | FileCheck %s
; This tests that a cold callee gets the (lower) inlinecold-threshold even without
; Cold hint and does not get inlined because the cost exceeds the inlinecold-threshold.
diff --git a/test/Transforms/Inline/inline-cold-callsite.ll b/test/Transforms/Inline/inline-cold-callsite.ll
new file mode 100644
index 000000000000..26ea8e50eaf1
--- /dev/null
+++ b/test/Transforms/Inline/inline-cold-callsite.ll
@@ -0,0 +1,54 @@
+; RUN: opt < %s -passes='require<profile-summary>,cgscc(inline)' -inline-threshold=100 -inline-cold-callsite-threshold=0 -S | FileCheck %s
+
+; This tests that a cold callsite gets the inline-cold-callsite-threshold
+; and does not get inlined. Another callsite to an identical callee that
+; is not cold gets inlined because cost is below the inline-threshold.
+
+define i32 @callee1(i32 %x) !prof !21 {
+ %x1 = add i32 %x, 1
+ %x2 = add i32 %x1, 1
+ %x3 = add i32 %x2, 1
+ call void @extern()
+ ret i32 %x3
+}
+
+define i32 @caller(i32 %n) !prof !22 {
+; CHECK-LABEL: @caller(
+ %cond = icmp sle i32 %n, 100
+ br i1 %cond, label %cond_true, label %cond_false, !prof !0
+
+cond_true:
+; CHECK-LABEL: cond_true:
+; CHECK-NOT: call i32 @callee1
+; CHECK: ret i32 %x3.i
+ %i = call i32 @callee1(i32 %n)
+ ret i32 %i
+cond_false:
+; CHECK-LABEL: cond_false:
+; CHECK: call i32 @callee1
+; CHECK: ret i32 %j
+ %j = call i32 @callee1(i32 %n)
+ ret i32 %j
+}
+declare void @extern()
+
+!0 = !{!"branch_weights", i32 200, i32 1}
+
+!llvm.module.flags = !{!1}
+!21 = !{!"function_entry_count", i64 200}
+!22 = !{!"function_entry_count", i64 200}
+
+!1 = !{i32 1, !"ProfileSummary", !2}
+!2 = !{!3, !4, !5, !6, !7, !8, !9, !10}
+!3 = !{!"ProfileFormat", !"InstrProf"}
+!4 = !{!"TotalCount", i64 10000}
+!5 = !{!"MaxCount", i64 1000}
+!6 = !{!"MaxInternalCount", i64 1}
+!7 = !{!"MaxFunctionCount", i64 1000}
+!8 = !{!"NumCounts", i64 3}
+!9 = !{!"NumFunctions", i64 3}
+!10 = !{!"DetailedSummary", !11}
+!11 = !{!12, !13, !14}
+!12 = !{i32 10000, i64 1000, i32 1}
+!13 = !{i32 999000, i64 1000, i32 1}
+!14 = !{i32 999999, i64 1, i32 2}
diff --git a/test/Transforms/Inline/inline-hot-callsite-2.ll b/test/Transforms/Inline/inline-hot-callsite-2.ll
new file mode 100644
index 000000000000..ccfe2f0b5dec
--- /dev/null
+++ b/test/Transforms/Inline/inline-hot-callsite-2.ll
@@ -0,0 +1,56 @@
+; RUN: opt < %s -passes='require<profile-summary>,cgscc(inline)' -inline-threshold=0 -inlinehint-threshold=0 -hot-callsite-threshold=100 -S | FileCheck %s
+
+; This tests that a callsite which is determined to be hot based on the caller's
+; entry count and the callsite block frequency gets the hot-callsite-threshold.
+; Another callsite with the same callee that is not hot does not get inlined
+; because cost exceeds the inline-threshold. inlinthint-threshold is set to 0
+; to ensure callee's hotness is not used to boost the threshold.
+
+define i32 @callee1(i32 %x) !prof !21 {
+ %x1 = add i32 %x, 1
+ %x2 = add i32 %x1, 1
+ %x3 = add i32 %x2, 1
+ call void @extern()
+ ret i32 %x3
+}
+
+define i32 @caller(i32 %n) !prof !22 {
+; CHECK-LABEL: @caller(
+ %cond = icmp sle i32 %n, 100
+ br i1 %cond, label %cond_true, label %cond_false, !prof !0
+
+cond_true:
+; CHECK-LABEL: cond_true:
+; CHECK-NOT: call i32 @callee1
+; CHECK: ret i32 %x3.i
+ %i = call i32 @callee1(i32 %n)
+ ret i32 %i
+cond_false:
+; CHECK-LABEL: cond_false:
+; CHECK: call i32 @callee1
+; CHECK: ret i32 %j
+ %j = call i32 @callee1(i32 %n)
+ ret i32 %j
+}
+declare void @extern()
+
+!0 = !{!"branch_weights", i32 64, i32 4}
+
+!llvm.module.flags = !{!1}
+!21 = !{!"function_entry_count", i64 200}
+!22 = !{!"function_entry_count", i64 200}
+
+!1 = !{i32 1, !"ProfileSummary", !2}
+!2 = !{!3, !4, !5, !6, !7, !8, !9, !10}
+!3 = !{!"ProfileFormat", !"InstrProf"}
+!4 = !{!"TotalCount", i64 10000}
+!5 = !{!"MaxCount", i64 1000}
+!6 = !{!"MaxInternalCount", i64 1}
+!7 = !{!"MaxFunctionCount", i64 1000}
+!8 = !{!"NumCounts", i64 3}
+!9 = !{!"NumFunctions", i64 3}
+!10 = !{!"DetailedSummary", !11}
+!11 = !{!12, !13, !14}
+!12 = !{i32 10000, i64 100, i32 1}
+!13 = !{i32 999000, i64 100, i32 1}
+!14 = !{i32 999999, i64 1, i32 2}
diff --git a/test/Transforms/Inline/inline-hot-callsite.ll b/test/Transforms/Inline/inline-hot-callsite.ll
index bdd7175b3eea..ebf4030d3d10 100644
--- a/test/Transforms/Inline/inline-hot-callsite.ll
+++ b/test/Transforms/Inline/inline-hot-callsite.ll
@@ -41,7 +41,7 @@ declare void @extern()
!1 = !{i32 1, !"ProfileSummary", !2}
!2 = !{!3, !4, !5, !6, !7, !8, !9, !10}
-!3 = !{!"ProfileFormat", !"InstrProf"}
+!3 = !{!"ProfileFormat", !"SampleProfile"}
!4 = !{!"TotalCount", i64 10000}
!5 = !{!"MaxCount", i64 1000}
!6 = !{!"MaxInternalCount", i64 1}
diff --git a/test/Transforms/Inline/inline_stats.ll b/test/Transforms/Inline/inline_stats.ll
index cf0d43e9215b..bc005b6afd51 100644
--- a/test/Transforms/Inline/inline_stats.ll
+++ b/test/Transforms/Inline/inline_stats.ll
@@ -36,9 +36,12 @@ define void @internal3() {
ret void
}
+declare void @external_decl()
+
define void @external1() alwaysinline !thinlto_src_module !0 {
call fastcc void @internal2()
call fastcc void @external2();
+ call void @external_decl();
ret void
}
diff --git a/test/Transforms/Inline/internal-scc-members.ll b/test/Transforms/Inline/internal-scc-members.ll
new file mode 100644
index 000000000000..258ce00744c5
--- /dev/null
+++ b/test/Transforms/Inline/internal-scc-members.ll
@@ -0,0 +1,31 @@
+; Test that the inliner can handle deleting functions within an SCC while still
+; processing the calls in that SCC.
+;
+; RUN: opt < %s -S -inline | FileCheck %s
+; RUN: opt < %s -S -passes=inline | FileCheck %s
+
+; CHECK-LABEL: define internal void @test1_scc0()
+; CHECK-NOT: call
+; CHECK: call void @test1_scc0()
+; CHECK-NOT: call
+; CHECK: ret
+define internal void @test1_scc0() {
+entry:
+ call void @test1_scc1()
+ ret void
+}
+
+; CHECK-NOT: @test1_scc1
+define internal void @test1_scc1() {
+entry:
+ call void @test1_scc0()
+ ret void
+}
+
+; CHECK-LABEL: define void @test1()
+; CHECK: call void @test1_scc0()
+define void @test1() {
+entry:
+ call void @test1_scc0() noinline
+ ret void
+}
diff --git a/test/Transforms/Inline/last-call-bonus.ll b/test/Transforms/Inline/last-call-bonus.ll
new file mode 100644
index 000000000000..0088d316848f
--- /dev/null
+++ b/test/Transforms/Inline/last-call-bonus.ll
@@ -0,0 +1,52 @@
+; The goal of this test is checking if LastCallToStaticBonus is applied
+; correctly while deciding inline deferral. For the test code below, when
+; inliner evaluates the callsite of bar->baz, it checks if inlining of bar->baz
+; prevents ininling of foo->bar, even when foo->bar inlining is more beneficial
+; than bar->baz inlining. As LastCallToStaticBonus has a massive value, and
+; both baz and bar has only one caller, the cost of foo->bar inlining and
+; bar->baz inlining should be non-trivial for inliner to compute that bar->baz
+; inlining can actaully prevent foo->bar inlining. To make the cost of these
+; callsites big enough, loop unrolling pass with very high threshold is used to
+; preprocess the test.
+
+; RUN: opt < %s -loop-unroll -inline -unroll-threshold=15000 -inline-threshold=250 -S | FileCheck %s
+; CHECK-LABEL: define internal i32 @bar()
+
+define internal i32 @baz() {
+entry:
+ br label %bb1
+
+bb1:
+ %ind = phi i32 [ 0, %entry ], [ %inc, %bb1 ]
+ call void @extern()
+ %inc = add nsw i32 %ind, 1
+ %cmp = icmp sgt i32 %inc, 510
+ br i1 %cmp, label %ret, label %bb1
+
+ret:
+ ret i32 0
+}
+
+define internal i32 @bar() {
+entry:
+ br label %bb1
+
+bb1:
+ %ind = phi i32 [ 0, %entry ], [ %inc, %bb1 ]
+ call void @extern()
+ %inc = add nsw i32 %ind, 1
+ %cmp = icmp sgt i32 %inc, 510
+ br i1 %cmp, label %ret, label %bb1
+
+ret:
+ call i32 @baz()
+ ret i32 0
+}
+
+define i32 @foo() {
+entry:
+ call i32 @bar()
+ ret i32 0
+}
+
+declare void @extern()
diff --git a/test/Transforms/Inline/lifetime-no-datalayout.ll b/test/Transforms/Inline/lifetime-no-datalayout.ll
index 0212e69d624a..5d1872c6a244 100644
--- a/test/Transforms/Inline/lifetime-no-datalayout.ll
+++ b/test/Transforms/Inline/lifetime-no-datalayout.ll
@@ -13,9 +13,9 @@ define void @helper() {
define void @test() {
; CHECK-LABEL: @test(
; CHECK-NOT: lifetime
-; CHECK: llvm.lifetime.start(i64 1
+; CHECK: llvm.lifetime.start.p0i8(i64 1
; CHECK-NOT: lifetime
-; CHECK: llvm.lifetime.end(i64 1
+; CHECK: llvm.lifetime.end.p0i8(i64 1
call void @helper()
; CHECK-NOT: lifetime
; CHECK: ret void
diff --git a/test/Transforms/Inline/lifetime.ll b/test/Transforms/Inline/lifetime.ll
index 4f415e58f1bf..c47091395fce 100644
--- a/test/Transforms/Inline/lifetime.ll
+++ b/test/Transforms/Inline/lifetime.ll
@@ -2,25 +2,25 @@
; RUN: opt -passes='cgscc(inline)' -S < %s | FileCheck %s
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
-declare void @llvm.lifetime.start(i64, i8*)
-declare void @llvm.lifetime.end(i64, i8*)
+declare void @llvm.lifetime.start.p0i8(i64, i8*)
+declare void @llvm.lifetime.end.p0i8(i64, i8*)
define void @helper_both_markers() {
%a = alloca i8
; Size in llvm.lifetime.start / llvm.lifetime.end differs from
; allocation size. We should use the former.
- call void @llvm.lifetime.start(i64 2, i8* %a)
- call void @llvm.lifetime.end(i64 2, i8* %a)
+ call void @llvm.lifetime.start.p0i8(i64 2, i8* %a)
+ call void @llvm.lifetime.end.p0i8(i64 2, i8* %a)
ret void
}
define void @test_both_markers() {
; CHECK-LABEL: @test_both_markers(
-; CHECK: llvm.lifetime.start(i64 2
-; CHECK-NEXT: llvm.lifetime.end(i64 2
+; CHECK: llvm.lifetime.start.p0i8(i64 2
+; CHECK-NEXT: llvm.lifetime.end.p0i8(i64 2
call void @helper_both_markers()
-; CHECK-NEXT: llvm.lifetime.start(i64 2
-; CHECK-NEXT: llvm.lifetime.end(i64 2
+; CHECK-NEXT: llvm.lifetime.start.p0i8(i64 2
+; CHECK-NEXT: llvm.lifetime.end.p0i8(i64 2
call void @helper_both_markers()
; CHECK-NEXT: ret void
ret void
@@ -41,14 +41,14 @@ define void @helper_no_markers() {
define void @test_no_marker() {
; CHECK-LABEL: @test_no_marker(
; CHECK-NOT: lifetime
-; CHECK: llvm.lifetime.start(i64 1
+; CHECK: llvm.lifetime.start.p0i8(i64 1
; CHECK-NOT: lifetime
-; CHECK: llvm.lifetime.end(i64 1
+; CHECK: llvm.lifetime.end.p0i8(i64 1
call void @helper_no_markers()
; CHECK-NOT: lifetime
-; CHECK: llvm.lifetime.start(i64 1
+; CHECK: llvm.lifetime.start.p0i8(i64 1
; CHECK-NOT: lifetime
-; CHECK: llvm.lifetime.end(i64 1
+; CHECK: llvm.lifetime.end.p0i8(i64 1
call void @helper_no_markers()
; CHECK-NOT: lifetime
; CHECK: ret void
@@ -58,23 +58,23 @@ define void @test_no_marker() {
define void @helper_two_casts() {
%a = alloca i32
%b = bitcast i32* %a to i8*
- call void @llvm.lifetime.start(i64 4, i8* %b)
+ call void @llvm.lifetime.start.p0i8(i64 4, i8* %b)
%c = bitcast i32* %a to i8*
- call void @llvm.lifetime.end(i64 4, i8* %c)
+ call void @llvm.lifetime.end.p0i8(i64 4, i8* %c)
ret void
}
define void @test_two_casts() {
; CHECK-LABEL: @test_two_casts(
; CHECK-NOT: lifetime
-; CHECK: llvm.lifetime.start(i64 4
+; CHECK: llvm.lifetime.start.p0i8(i64 4
; CHECK-NOT: lifetime
-; CHECK: llvm.lifetime.end(i64 4
+; CHECK: llvm.lifetime.end.p0i8(i64 4
call void @helper_two_casts()
; CHECK-NOT: lifetime
-; CHECK: llvm.lifetime.start(i64 4
+; CHECK: llvm.lifetime.start.p0i8(i64 4
; CHECK-NOT: lifetime
-; CHECK: llvm.lifetime.end(i64 4
+; CHECK: llvm.lifetime.end.p0i8(i64 4
call void @helper_two_casts()
; CHECK-NOT: lifetime
; CHECK: ret void
@@ -91,9 +91,9 @@ define void @helper_arrays_alloca() {
define void @test_arrays_alloca() {
; CHECK-LABEL: @test_arrays_alloca(
; CHECK-NOT: lifetime
-; CHECK: llvm.lifetime.start(i64 40,
+; CHECK: llvm.lifetime.start.p0i8(i64 40,
; CHECK-NOT: lifetime
-; CHECK: llvm.lifetime.end(i64 40,
+; CHECK: llvm.lifetime.end.p0i8(i64 40,
call void @helper_arrays_alloca()
; CHECK-NOT: lifetime
; CHECK: ret void
diff --git a/test/Transforms/Inline/monster_scc.ll b/test/Transforms/Inline/monster_scc.ll
new file mode 100644
index 000000000000..0f8f1f21c8b5
--- /dev/null
+++ b/test/Transforms/Inline/monster_scc.ll
@@ -0,0 +1,460 @@
+; This test creates a monster SCC with a very pernicious call graph. It builds
+; a cycle of cross-connected pairs of functions with interesting inlining
+; decisions throughout, but ultimately trivial code complexity.
+;
+; Typically, a greedy approach to inlining works well for bottom-up inliners
+; such as LLVM's. However, there is no way to be bottom-up over an SCC: it's
+; a cycle! Greedily inlining as much as possible into each function of this
+; *SCC* will have the disasterous effect of inlining all N-1 functions into the
+; first one visited, N-2 functions into the second one visited, N-3 into the
+; third, and so on. This is because until inlining occurs, each function in
+; isolation appears to be an excellent inline candidate.
+;
+; Note that the exact number of calls in each function doesn't really matter.
+; It is mostly a function of cost thresholds and visit order. Because this is an
+; SCC there is no "right" or "wrong" answer here as long as no function blows up
+; to be *huge*. The specific concerning pattern is if one or more functions get
+; more than 16 calls in them.
+;
+; This test is extracted from the following C++ program compiled with Clang.
+; The IR is simplified with SROA, instcombine, and simplify-cfg. Then C++
+; linkage stuff, attributes, target specific things, metadata and comments were
+; removed. The order of the fuctions is also made more predictable than Clang's
+; output order.
+;
+; void g(int);
+;
+; template <bool K, int N> void f(bool *B, bool *E) {
+; if (K)
+; g(N);
+; if (B == E)
+; return;
+; if (*B)
+; f<true, N + 1>(B + 1, E);
+; else
+; f<false, N + 1>(B + 1, E);
+; }
+; template <> void f<false, MAX>(bool *B, bool *E) { return f<false, 0>(B, E); }
+; template <> void f<true, MAX>(bool *B, bool *E) { return f<true, 0>(B, E); }
+;
+; void test(bool *B, bool *E) { f<false, 0>(B, E); }
+;
+; RUN: opt -S < %s -inline -inline-threshold=150 | FileCheck %s --check-prefixes=CHECK,OLD
+; RUN: opt -S < %s -passes=inline -inline-threshold=150 | FileCheck %s --check-prefixes=CHECK,NEW
+
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+
+declare void @_Z1gi(i32)
+
+; CHECK-LABEL: define void @_Z1fILb0ELi0EEvPbS0_(
+; OLD-NOT: call
+; OLD: call void @_Z1gi(
+; OLD-NOT: call
+; OLD: call void @_Z1fILb1ELi2EEvPbS0_(
+; OLD-NOT: call
+; OLD: call void @_Z1fILb0ELi2EEvPbS0_(
+; OLD-NOT: call
+; OLD: call void @_Z1fILb0ELi1EEvPbS0_(
+; OLD-NOT: call
+; NEW-NOT: call
+; NEW: call void @_Z1gi(
+; NEW-NOT: call
+; NEW: call void @_Z1fILb1ELi2EEvPbS0_(
+; NEW-NOT: call
+; NEW: call void @_Z1fILb0ELi2EEvPbS0_(
+; NEW-NOT: call
+; NEW: call void @_Z1fILb1ELi2EEvPbS0_(
+; NEW-NOT: call
+; NEW: call void @_Z1fILb0ELi2EEvPbS0_(
+; NEW-NOT: call
+define void @_Z1fILb0ELi0EEvPbS0_(i8* %B, i8* %E) {
+entry:
+ %cmp = icmp eq i8* %B, %E
+ br i1 %cmp, label %if.end3, label %if.end
+
+if.end:
+ %0 = load i8, i8* %B, align 1
+ %tobool = icmp eq i8 %0, 0
+ %add.ptr2 = getelementptr inbounds i8, i8* %B, i64 1
+ br i1 %tobool, label %if.else, label %if.then1
+
+if.then1:
+ call void @_Z1fILb1ELi1EEvPbS0_(i8* %add.ptr2, i8* %E)
+ br label %if.end3
+
+if.else:
+ call void @_Z1fILb0ELi1EEvPbS0_(i8* %add.ptr2, i8* %E)
+ br label %if.end3
+
+if.end3:
+ ret void
+}
+
+; CHECK-LABEL: define void @_Z1fILb1ELi0EEvPbS0_(
+; OLD-NOT: call
+; OLD: call void @_Z1gi(
+; OLD-NOT: call
+; OLD: call void @_Z1gi(
+; OLD-NOT: call
+; OLD: call void @_Z1fILb1ELi2EEvPbS0_(
+; OLD-NOT: call
+; OLD: call void @_Z1fILb0ELi2EEvPbS0_(
+; OLD-NOT: call
+; OLD: call void @_Z1fILb0ELi1EEvPbS0_(
+; OLD-NOT: call
+; NEW-NOT: call
+; NEW: call void @_Z1gi(
+; NEW-NOT: call
+; NEW: call void @_Z1fILb1ELi1EEvPbS0_(
+; NEW-NOT: call
+; NEW: call void @_Z1fILb1ELi2EEvPbS0_(
+; NEW-NOT: call
+; NEW: call void @_Z1fILb0ELi2EEvPbS0_(
+; NEW-NOT: call
+define void @_Z1fILb1ELi0EEvPbS0_(i8* %B, i8* %E) {
+entry:
+ call void @_Z1gi(i32 0)
+ %cmp = icmp eq i8* %B, %E
+ br i1 %cmp, label %if.end3, label %if.end
+
+if.end:
+ %0 = load i8, i8* %B, align 1
+ %tobool = icmp eq i8 %0, 0
+ %add.ptr2 = getelementptr inbounds i8, i8* %B, i64 1
+ br i1 %tobool, label %if.else, label %if.then1
+
+if.then1:
+ call void @_Z1fILb1ELi1EEvPbS0_(i8* %add.ptr2, i8* %E)
+ br label %if.end3
+
+if.else:
+ call void @_Z1fILb0ELi1EEvPbS0_(i8* %add.ptr2, i8* %E)
+ br label %if.end3
+
+if.end3:
+ ret void
+}
+
+; CHECK-LABEL: define void @_Z1fILb0ELi1EEvPbS0_(
+; OLD-NOT: call
+; OLD: call void @_Z1gi(
+; OLD-NOT: call
+; OLD: call void @_Z1gi(
+; OLD-NOT: call
+; OLD: call void @_Z1fILb1ELi0EEvPbS0_(
+; OLD-NOT: call
+; OLD: call void @_Z1fILb0ELi0EEvPbS0_(
+; OLD-NOT: call
+; OLD: call void @_Z1fILb1ELi0EEvPbS0_(
+; OLD-NOT: call
+; OLD: call void @_Z1fILb0ELi0EEvPbS0_(
+; OLD-NOT: call
+; OLD: call void @_Z1fILb0ELi2EEvPbS0_(
+; OLD-NOT: call
+; NEW-NOT: call
+; NEW: call void @_Z1fILb1ELi2EEvPbS0_(
+; NEW-NOT: call
+; NEW: call void @_Z1gi(
+; NEW-NOT: call
+; NEW: call void @_Z1fILb1ELi0EEvPbS0_(
+; NEW-NOT: call
+; NEW: call void @_Z1fILb0ELi0EEvPbS0_(
+; NEW-NOT: call
+; NEW: call void @_Z1fILb0ELi3EEvPbS0_(
+; NEW-NOT: call
+define void @_Z1fILb0ELi1EEvPbS0_(i8* %B, i8* %E) {
+entry:
+ %cmp = icmp eq i8* %B, %E
+ br i1 %cmp, label %if.end3, label %if.end
+
+if.end:
+ %0 = load i8, i8* %B, align 1
+ %tobool = icmp eq i8 %0, 0
+ %add.ptr2 = getelementptr inbounds i8, i8* %B, i64 1
+ br i1 %tobool, label %if.else, label %if.then1
+
+if.then1:
+ call void @_Z1fILb1ELi2EEvPbS0_(i8* %add.ptr2, i8* %E)
+ br label %if.end3
+
+if.else:
+ call void @_Z1fILb0ELi2EEvPbS0_(i8* %add.ptr2, i8* %E)
+ br label %if.end3
+
+if.end3:
+ ret void
+}
+
+; CHECK-LABEL: define void @_Z1fILb1ELi1EEvPbS0_(
+; OLD-NOT: call
+; OLD: call void @_Z1gi(
+; OLD-NOT: call
+; OLD: call void @_Z1fILb1ELi2EEvPbS0_(
+; OLD-NOT: call
+; OLD: call void @_Z1fILb0ELi2EEvPbS0_(
+; OLD-NOT: call
+; NEW-NOT: call
+; NEW: call void @_Z1gi(
+; NEW-NOT: call
+; NEW: call void @_Z1gi(
+; NEW-NOT: call
+; NEW: call void @_Z1gi(
+; NEW-NOT: call
+; NEW: call void @_Z1fILb1ELi0EEvPbS0_(
+; NEW-NOT: call
+; NEW: call void @_Z1fILb0ELi0EEvPbS0_(
+; NEW-NOT: call
+; NEW: call void @_Z1fILb0ELi3EEvPbS0_(
+; NEW-NOT: call
+; NEW: call void @_Z1gi(
+; NEW-NOT: call
+; NEW: call void @_Z1fILb1ELi0EEvPbS0_(
+; NEW-NOT: call
+; NEW: call void @_Z1fILb0ELi0EEvPbS0_(
+; NEW-NOT: call
+; NEW: call void @_Z1fILb0ELi3EEvPbS0_(
+; NEW-NOT: call
+define void @_Z1fILb1ELi1EEvPbS0_(i8* %B, i8* %E) {
+entry:
+ call void @_Z1gi(i32 1)
+ %cmp = icmp eq i8* %B, %E
+; CHECK-NOT: call
+ br i1 %cmp, label %if.end3, label %if.end
+
+if.end:
+ %0 = load i8, i8* %B, align 1
+ %tobool = icmp eq i8 %0, 0
+ %add.ptr2 = getelementptr inbounds i8, i8* %B, i64 1
+ br i1 %tobool, label %if.else, label %if.then1
+
+if.then1:
+ call void @_Z1fILb1ELi2EEvPbS0_(i8* %add.ptr2, i8* %E)
+ br label %if.end3
+
+if.else:
+ call void @_Z1fILb0ELi2EEvPbS0_(i8* %add.ptr2, i8* %E)
+ br label %if.end3
+
+if.end3:
+ ret void
+}
+
+; CHECK-LABEL: define void @_Z1fILb0ELi2EEvPbS0_(
+; OLD-NOT: call
+; OLD: call void @_Z1gi(
+; OLD-NOT: call
+; OLD: call void @_Z1fILb1ELi0EEvPbS0_(
+; OLD-NOT: call
+; OLD: call void @_Z1fILb0ELi0EEvPbS0_(
+; OLD-NOT: call
+; OLD: call void @_Z1fILb1ELi0EEvPbS0_(
+; OLD-NOT: call
+; OLD: call void @_Z1fILb0ELi0EEvPbS0_(
+; OLD-NOT: call
+; NEW-NOT: call
+; NEW: call void @_Z1gi(
+; NEW-NOT: call
+; NEW: call void @_Z1fILb1ELi0EEvPbS0_(
+; NEW-NOT: call
+; NEW: call void @_Z1fILb0ELi0EEvPbS0_(
+; NEW-NOT: call
+; NEW: call void @_Z1fILb1ELi4EEvPbS0_(
+; NEW-NOT: call
+; NEW: call void @_Z1fILb0ELi0EEvPbS0_(
+; NEW-NOT: call
+define void @_Z1fILb0ELi2EEvPbS0_(i8* %B, i8* %E) {
+entry:
+ %cmp = icmp eq i8* %B, %E
+ br i1 %cmp, label %if.end3, label %if.end
+
+if.end:
+ %0 = load i8, i8* %B, align 1
+ %tobool = icmp eq i8 %0, 0
+ %add.ptr2 = getelementptr inbounds i8, i8* %B, i64 1
+ br i1 %tobool, label %if.else, label %if.then1
+
+if.then1:
+ call void @_Z1fILb1ELi3EEvPbS0_(i8* %add.ptr2, i8* %E)
+ br label %if.end3
+
+if.else:
+ call void @_Z1fILb0ELi3EEvPbS0_(i8* %add.ptr2, i8* %E)
+ br label %if.end3
+
+if.end3:
+ ret void
+}
+
+; CHECK-LABEL: define void @_Z1fILb1ELi2EEvPbS0_(
+; OLD-NOT: call
+; OLD: call void @_Z1gi(
+; OLD-NOT: call
+; OLD: call void @_Z1gi(
+; OLD-NOT: call
+; OLD: call void @_Z1fILb1ELi0EEvPbS0_(
+; OLD-NOT: call
+; OLD: call void @_Z1fILb0ELi0EEvPbS0_(
+; OLD-NOT: call
+; OLD: call void @_Z1fILb1ELi0EEvPbS0_(
+; OLD-NOT: call
+; OLD: call void @_Z1fILb0ELi0EEvPbS0_(
+; OLD-NOT: call
+; NEW-NOT: call
+; NEW: call void @_Z1gi(
+; NEW-NOT: call
+; NEW: call void @_Z1gi(
+; NEW-NOT: call
+; NEW: call void @_Z1gi(
+; NEW-NOT: call
+; NEW: call void @_Z1fILb1ELi1EEvPbS0_(
+; NEW-NOT: call
+; NEW: call void @_Z1fILb0ELi1EEvPbS0_(
+; NEW-NOT: call
+; NEW: call void @_Z1fILb0ELi0EEvPbS0_(
+; NEW-NOT: call
+; NEW: call void @_Z1gi(
+; NEW-NOT: call
+; NEW: call void @_Z1fILb1ELi1EEvPbS0_(
+; NEW-NOT: call
+; NEW: call void @_Z1fILb0ELi1EEvPbS0_(
+; NEW-NOT: call
+; NEW: call void @_Z1fILb0ELi0EEvPbS0_(
+; NEW-NOT: call
+define void @_Z1fILb1ELi2EEvPbS0_(i8* %B, i8* %E) {
+entry:
+ call void @_Z1gi(i32 2)
+ %cmp = icmp eq i8* %B, %E
+ br i1 %cmp, label %if.end3, label %if.end
+
+if.end:
+ %0 = load i8, i8* %B, align 1
+ %tobool = icmp eq i8 %0, 0
+ %add.ptr2 = getelementptr inbounds i8, i8* %B, i64 1
+ br i1 %tobool, label %if.else, label %if.then1
+
+if.then1:
+ call void @_Z1fILb1ELi3EEvPbS0_(i8* %add.ptr2, i8* %E)
+ br label %if.end3
+
+if.else:
+ call void @_Z1fILb0ELi3EEvPbS0_(i8* %add.ptr2, i8* %E)
+ br label %if.end3
+
+if.end3:
+ ret void
+}
+
+; CHECK-LABEL: define void @_Z1fILb0ELi3EEvPbS0_(
+; OLD-NOT: call
+; OLD: call void @_Z1fILb1ELi0EEvPbS0_(
+; OLD-NOT: call
+; OLD: call void @_Z1fILb0ELi0EEvPbS0_(
+; OLD-NOT: call
+; NEW-NOT: call
+; NEW: call void @_Z1gi(
+; NEW-NOT: call
+; NEW: call void @_Z1fILb1ELi1EEvPbS0_(
+; NEW-NOT: call
+; NEW: call void @_Z1fILb0ELi1EEvPbS0_(
+; NEW-NOT: call
+; NEW: call void @_Z1fILb0ELi0EEvPbS0_(
+; NEW-NOT: call
+define void @_Z1fILb0ELi3EEvPbS0_(i8* %B, i8* %E) {
+entry:
+ %cmp = icmp eq i8* %B, %E
+ br i1 %cmp, label %if.end3, label %if.end
+
+if.end:
+ %0 = load i8, i8* %B, align 1
+ %tobool = icmp eq i8 %0, 0
+ %add.ptr2 = getelementptr inbounds i8, i8* %B, i64 1
+ br i1 %tobool, label %if.else, label %if.then1
+
+if.then1:
+ call void @_Z1fILb1ELi4EEvPbS0_(i8* %add.ptr2, i8* %E)
+ br label %if.end3
+
+if.else:
+ call void @_Z1fILb0ELi4EEvPbS0_(i8* %add.ptr2, i8* %E)
+ br label %if.end3
+
+if.end3:
+ ret void
+}
+
+; CHECK-LABEL: define void @_Z1fILb1ELi3EEvPbS0_(
+; CHECK-NOT: call
+; CHECK: call void @_Z1gi(
+; CHECK-NOT: call
+; CHECK: call void @_Z1fILb1ELi0EEvPbS0_(
+; CHECK-NOT: call
+; CHECK: call void @_Z1fILb0ELi0EEvPbS0_(
+; CHECK-NOT: call
+define void @_Z1fILb1ELi3EEvPbS0_(i8* %B, i8* %E) {
+entry:
+ call void @_Z1gi(i32 3)
+ %cmp = icmp eq i8* %B, %E
+ br i1 %cmp, label %if.end3, label %if.end
+
+if.end:
+ %0 = load i8, i8* %B, align 1
+ %tobool = icmp eq i8 %0, 0
+ %add.ptr2 = getelementptr inbounds i8, i8* %B, i64 1
+ br i1 %tobool, label %if.else, label %if.then1
+
+if.then1:
+ call void @_Z1fILb1ELi4EEvPbS0_(i8* %add.ptr2, i8* %E)
+ br label %if.end3
+
+if.else:
+ call void @_Z1fILb0ELi4EEvPbS0_(i8* %add.ptr2, i8* %E)
+ br label %if.end3
+
+if.end3:
+ ret void
+}
+
+; CHECK-LABEL: define void @_Z1fILb0ELi4EEvPbS0_(
+; CHECK-NOT: call
+; CHECK: call void @_Z1fILb0ELi0EEvPbS0_(
+; CHECK-NOT: call
+define void @_Z1fILb0ELi4EEvPbS0_(i8* %B, i8* %E) {
+entry:
+ call void @_Z1fILb0ELi0EEvPbS0_(i8* %B, i8* %E)
+ ret void
+}
+
+; CHECK-LABEL: define void @_Z1fILb1ELi4EEvPbS0_(
+; OLD-NOT: call
+; OLD: call void @_Z1fILb1ELi0EEvPbS0_(
+; OLD-NOT: call
+; NEW-NOT: call
+; NEW: call void @_Z1gi(
+; NEW-NOT: call
+; NEW: call void @_Z1fILb1ELi1EEvPbS0_(
+; NEW-NOT: call
+; NEW: call void @_Z1fILb1ELi2EEvPbS0_(
+; NEW-NOT: call
+; NEW: call void @_Z1gi(
+; NEW-NOT: call
+; NEW: call void @_Z1fILb1ELi0EEvPbS0_(
+; NEW-NOT: call
+; NEW: call void @_Z1fILb0ELi0EEvPbS0_(
+; NEW-NOT: call
+; NEW: call void @_Z1fILb0ELi3EEvPbS0_(
+; NEW-NOT: call
+define void @_Z1fILb1ELi4EEvPbS0_(i8* %B, i8* %E) {
+entry:
+ call void @_Z1fILb1ELi0EEvPbS0_(i8* %B, i8* %E)
+ ret void
+}
+
+; CHECK-LABEL: define void @_Z4testPbS_(
+; CHECK: call
+; CHECK-NOT: call
+define void @_Z4testPbS_(i8* %B, i8* %E) {
+entry:
+ call void @_Z1fILb0ELi0EEvPbS0_(i8* %B, i8* %E)
+ ret void
+}
+
diff --git a/test/Transforms/Inline/optimization-remarks-with-hotness.ll b/test/Transforms/Inline/optimization-remarks-with-hotness.ll
index 9611a2dd1bd4..1d6d135bdda8 100644
--- a/test/Transforms/Inline/optimization-remarks-with-hotness.ll
+++ b/test/Transforms/Inline/optimization-remarks-with-hotness.ll
@@ -4,8 +4,7 @@
; CHECK: foo should always be inlined (cost=always) (hotness: 30)
; CHECK: foo inlined into bar (hotness: 30)
-; CHECK: foz should never be inlined (cost=never) (hotness: 30)
-; CHECK: foz will not be inlined into bar (hotness: 30)
+; CHECK: foz not inlined into bar because it should never be inlined (cost=never) (hotness: 30)
; Function Attrs: alwaysinline nounwind uwtable
define i32 @foo() #0 !prof !1 {
diff --git a/test/Transforms/Inline/optimization-remarks.ll b/test/Transforms/Inline/optimization-remarks.ll
index 59cf08327350..61e270cff76c 100644
--- a/test/Transforms/Inline/optimization-remarks.ll
+++ b/test/Transforms/Inline/optimization-remarks.ll
@@ -9,8 +9,7 @@
; NO_HOTNESS-NOT: fox will not be inlined into bar because its definition is unavailable
; CHECK: foo should always be inlined (cost=always)
; CHECK: foo inlined into bar
-; CHECK: foz should never be inlined (cost=never)
-; CHECK: foz will not be inlined into bar
+; CHECK: foz not inlined into bar because it should never be inlined (cost=never)
; Function Attrs: alwaysinline nounwind uwtable
define i32 @foo(i32 %x, i32 %y) #0 {
diff --git a/test/Transforms/Inline/prof-update.ll b/test/Transforms/Inline/prof-update.ll
new file mode 100644
index 000000000000..38fcc7e45996
--- /dev/null
+++ b/test/Transforms/Inline/prof-update.ll
@@ -0,0 +1,39 @@
+; RUN: opt < %s -inline -S | FileCheck %s
+; Checks if inliner updates branch_weights annotation for call instructions.
+
+declare void @ext();
+declare void @ext1();
+
+; CHECK: define void @callee(i32 %n) !prof ![[ENTRY_COUNT:[0-9]*]]
+define void @callee(i32 %n) !prof !1 {
+ %cond = icmp sle i32 %n, 10
+ br i1 %cond, label %cond_true, label %cond_false
+cond_true:
+; ext1 is optimized away, thus not updated.
+; CHECK: call void @ext1(), !prof ![[COUNT_CALLEE1:[0-9]*]]
+ call void @ext1(), !prof !2
+ ret void
+cond_false:
+; ext is cloned and updated.
+; CHECK: call void @ext(), !prof ![[COUNT_CALLEE:[0-9]*]]
+ call void @ext(), !prof !2
+ ret void
+}
+
+; CHECK: define void @caller()
+define void @caller() {
+; CHECK: call void @ext(), !prof ![[COUNT_CALLER:[0-9]*]]
+ call void @callee(i32 15), !prof !3
+ ret void
+}
+
+!llvm.module.flags = !{!0}
+!0 = !{i32 1, !"MaxFunctionCount", i32 2000}
+!1 = !{!"function_entry_count", i64 1000}
+!2 = !{!"branch_weights", i64 2000}
+!3 = !{!"branch_weights", i64 400}
+attributes #0 = { alwaysinline }
+; CHECK: ![[ENTRY_COUNT]] = !{!"function_entry_count", i64 600}
+; CHECK: ![[COUNT_CALLEE1]] = !{!"branch_weights", i64 2000}
+; CHECK: ![[COUNT_CALLEE]] = !{!"branch_weights", i32 1200}
+; CHECK: ![[COUNT_CALLER]] = !{!"branch_weights", i32 800}