diff options
Diffstat (limited to 'test/Transforms/Inline')
25 files changed, 1221 insertions, 50 deletions
diff --git a/test/Transforms/Inline/AArch64/gep-cost.ll b/test/Transforms/Inline/AArch64/gep-cost.ll new file mode 100644 index 000000000000..204958f082dd --- /dev/null +++ b/test/Transforms/Inline/AArch64/gep-cost.ll @@ -0,0 +1,30 @@ +; REQUIRES: asserts +; RUN: opt -inline -mtriple=aarch64--linux-gnu -mcpu=kryo -S -debug-only=inline-cost < %s 2>&1 | FileCheck %s + +target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128" +target triple = "aarch64--linux-gnu" + +define void @outer([4 x i32]* %ptr, i32 %i) { + call void @inner1([4 x i32]* %ptr, i32 %i) + call void @inner2([4 x i32]* %ptr, i32 %i) + ret void +} +; The gep in inner1() is reg+reg, which is a legal addressing mode for AArch64. +; Thus, both the gep and ret can be simplified. +; CHECK: Analyzing call of inner1 +; CHECK: NumInstructionsSimplified: 2 +; CHECK: NumInstructions: 2 +define void @inner1([4 x i32]* %ptr, i32 %i) { + %G = getelementptr inbounds [4 x i32], [4 x i32]* %ptr, i32 0, i32 %i + ret void +} + +; The gep in inner2() is reg+imm+reg, which is not a legal addressing mode for +; AArch64. Thus, only the ret can be simplified and not the gep. +; CHECK: Analyzing call of inner2 +; CHECK: NumInstructionsSimplified: 1 +; CHECK: NumInstructions: 2 +define void @inner2([4 x i32]* %ptr, i32 %i) { + %G = getelementptr inbounds [4 x i32], [4 x i32]* %ptr, i32 1, i32 %i + ret void +} diff --git a/test/Transforms/Inline/AArch64/lit.local.cfg b/test/Transforms/Inline/AArch64/lit.local.cfg new file mode 100644 index 000000000000..7184443994b6 --- /dev/null +++ b/test/Transforms/Inline/AArch64/lit.local.cfg @@ -0,0 +1,2 @@ +if not 'AArch64' in config.root.targets: + config.unsupported = True diff --git a/test/Transforms/Inline/alloca-bonus.ll b/test/Transforms/Inline/alloca-bonus.ll index 542dcee0fcb2..c5c2ce11cc5b 100644 --- a/test/Transforms/Inline/alloca-bonus.ll +++ b/test/Transforms/Inline/alloca-bonus.ll @@ -3,7 +3,7 @@ target datalayout = "p:32:32" -declare void @llvm.lifetime.start(i64 %size, i8* nocapture %ptr) +declare void @llvm.lifetime.start.p0i8(i64 %size, i8* nocapture %ptr) @glbl = external global i32 @@ -22,7 +22,7 @@ define void @inner1(i32 *%ptr) { %D = getelementptr inbounds i32, i32* %ptr, i32 1 %E = bitcast i32* %ptr to i8* %F = select i1 false, i32* %ptr, i32* @glbl - call void @llvm.lifetime.start(i64 0, i8* %E) + call void @llvm.lifetime.start.p0i8(i64 0, i8* %E) call void @extern() ret void } @@ -43,7 +43,7 @@ define void @inner2(i32 *%ptr) { %D = getelementptr inbounds i32, i32* %ptr, i32 %A %E = bitcast i32* %ptr to i8* %F = select i1 false, i32* %ptr, i32* @glbl - call void @llvm.lifetime.start(i64 0, i8* %E) + call void @llvm.lifetime.start.p0i8(i64 0, i8* %E) call void @extern() ret void } @@ -152,7 +152,7 @@ if.then: %D = getelementptr inbounds i32, i32* %ptr, i32 %A %E = bitcast i32* %ptr to i8* %F = select i1 false, i32* %ptr, i32* @glbl - call void @llvm.lifetime.start(i64 0, i8* %E) + call void @llvm.lifetime.start.p0i8(i64 0, i8* %E) ret void exit: diff --git a/test/Transforms/Inline/arg-attr-propagation.ll b/test/Transforms/Inline/arg-attr-propagation.ll new file mode 100644 index 000000000000..3d18e8047e5b --- /dev/null +++ b/test/Transforms/Inline/arg-attr-propagation.ll @@ -0,0 +1,50 @@ +; RUN: opt -inline -S < %s | FileCheck %s + +; The callee guarantees that the pointer argument is nonnull and dereferenceable. +; That information should transfer to the caller. + +define i32 @callee(i32* dereferenceable(32) %t1) { +; CHECK-LABEL: @callee(i32* dereferenceable(32) %t1) +; CHECK-NEXT: [[T2:%.*]] = load i32, i32* %t1 +; CHECK-NEXT: ret i32 [[T2]] +; + %t2 = load i32, i32* %t1 + ret i32 %t2 +} + +; FIXME: All dereferenceability information is lost. +; The caller argument could be known nonnull and dereferenceable(32). + +define i32 @caller1(i32* %t1) { +; CHECK-LABEL: @caller1(i32* %t1) +; CHECK-NEXT: [[T2_I:%.*]] = load i32, i32* %t1 +; CHECK-NEXT: ret i32 [[T2_I]] +; + %t2 = tail call i32 @callee(i32* dereferenceable(32) %t1) + ret i32 %t2 +} + +; The caller argument is nonnull, but that can be explicit. +; The dereferenceable amount could be increased. + +define i32 @caller2(i32* dereferenceable(31) %t1) { +; CHECK-LABEL: @caller2(i32* dereferenceable(31) %t1) +; CHECK-NEXT: [[T2_I:%.*]] = load i32, i32* %t1 +; CHECK-NEXT: ret i32 [[T2_I]] +; + %t2 = tail call i32 @callee(i32* dereferenceable(32) %t1) + ret i32 %t2 +} + +; The caller argument is nonnull, but that can be explicit. +; Make sure that we don't propagate a smaller dereferenceable amount. + +define i32 @caller3(i32* dereferenceable(33) %t1) { +; CHECK-LABEL: @caller3(i32* dereferenceable(33) %t1) +; CHECK-NEXT: [[T2_I:%.*]] = load i32, i32* %t1 +; CHECK-NEXT: ret i32 [[T2_I]] +; + %t2 = tail call i32 @callee(i32* dereferenceable(32) %t1) + ret i32 %t2 +} + diff --git a/test/Transforms/Inline/bfi-update.ll b/test/Transforms/Inline/bfi-update.ll new file mode 100644 index 000000000000..94584e2e6ce5 --- /dev/null +++ b/test/Transforms/Inline/bfi-update.ll @@ -0,0 +1,93 @@ +; RUN: opt < %s -passes='require<profile-summary>,cgscc(inline)' -S -inline-threshold=50 -inline-cold-callsite-threshold=0 -hot-callsite-threshold=50 | FileCheck %s +; This tests incremental updates to caller's BFI as a callee gets inlined. +; In bottom-up inlining, first c->e inlining is considered and fails because +; e's size exceeds the threshold of 50. Then a->c inlining is considered and it +; succeeds. a's BFI is updated incrementally. As c's blocks get pruned, the +; block with label cond_false is removed and since the remanining code is +; straight-line a single block gets cloned into a. This block should get the +; maximum block frequency among the original blocks in c. If it gets the +; frequency of the block with label cond_true in @c, its frequency will be +; 1/10th of function a's entry block frequency, resulting in a callsite count of +; 2 (since a's entry count is 20) which means that a->e callsite will be +; considered cold and not inlined. + +@data = external global i32 +; CHECK-LABEL: define i32 @a( +define i32 @a(i32 %a1) !prof !21 { +; CHECK-NOT: call i32 @c +; CHECK-NOT: call i32 @e +; CHECK: ret +entry: + %cond = icmp sle i32 %a1, 1 + %a2 = call i32 @c(i32 1) + br label %exit +exit: + ret i32 %a2 +} + +declare void @ext(); + +; CHECK: @c(i32 %c1) !prof [[COUNT1:![0-9]+]] +define i32 @c(i32 %c1) !prof !23 { + call void @ext() + %cond = icmp sle i32 %c1, 1 + br i1 %cond, label %cond_true, label %cond_false, !prof !25 + +cond_false: + br label %exit + +cond_true: + %c11 = call i32 @e(i32 %c1) + br label %exit +exit: + %c12 = phi i32 [ 0, %cond_false], [ %c11, %cond_true ] + ret i32 %c12 +} + + +; CHECK: @e(i32 %c1) !prof [[COUNT2:![0-9]+]] +define i32 @e(i32 %c1) !prof !24 { + call void @ext() + call void @ext() + %cond = icmp sle i32 %c1, 1 + br i1 %cond, label %cond_true, label %cond_false + +cond_false: + call void @ext() + %c2 = load i32, i32* @data, align 4 + %c3 = add i32 %c1, %c2 + %c4 = mul i32 %c3, %c2 + %c5 = add i32 %c4, %c2 + %c6 = mul i32 %c5, %c2 + %c7 = add i32 %c6, %c2 + %c8 = mul i32 %c7, %c2 + %c9 = add i32 %c8, %c2 + %c10 = mul i32 %c9, %c2 + ret i32 %c10 + +cond_true: + ret i32 0 +} + +; CHECK: [[COUNT1]] = !{!"function_entry_count", i64 480} +; CHECK: [[COUNT2]] = !{!"function_entry_count", i64 80} +!21 = !{!"function_entry_count", i64 20} +!23 = !{!"function_entry_count", i64 500} +!24 = !{!"function_entry_count", i64 100} +!25 = !{!"branch_weights", i32 1, i32 9} + +!llvm.module.flags = !{!1} +!1 = !{i32 1, !"ProfileSummary", !2} +!2 = !{!3, !4, !5, !6, !7, !8, !9, !10} +!3 = !{!"ProfileFormat", !"InstrProf"} +!4 = !{!"TotalCount", i64 10000} +!5 = !{!"MaxCount", i64 1000} +!6 = !{!"MaxInternalCount", i64 1} +!7 = !{!"MaxFunctionCount", i64 1000} +!8 = !{!"NumCounts", i64 3} +!9 = !{!"NumFunctions", i64 3} +!10 = !{!"DetailedSummary", !11} +!11 = !{!12, !13, !14} +!12 = !{i32 10000, i64 1000, i32 1} +!13 = !{i32 999000, i64 1000, i32 1} +!14 = !{i32 999999, i64 5, i32 2} diff --git a/test/Transforms/Inline/cgscc-incremental-invalidate.ll b/test/Transforms/Inline/cgscc-incremental-invalidate.ll new file mode 100644 index 000000000000..82d321ccf225 --- /dev/null +++ b/test/Transforms/Inline/cgscc-incremental-invalidate.ll @@ -0,0 +1,111 @@ +; Test for a subtle bug when computing analyses during inlining and mutating +; the SCC structure. Without care, this can fail to invalidate analyses. +; +; RUN: opt < %s -passes='cgscc(inline,function(verify<domtree>))' -debug-pass-manager -S 2>&1 | FileCheck %s + +; First we check that the passes run in the way we expect. Otherwise this test +; may stop testing anything. +; +; CHECK-LABEL: Starting llvm::Module pass manager run. +; CHECK: Running pass: InlinerPass on (test1_f, test1_g, test1_h) +; CHECK: Running analysis: FunctionAnalysisManagerCGSCCProxy on (test1_f, test1_g, test1_h) +; CHECK: Running analysis: DominatorTreeAnalysis on test1_f +; CHECK: Running analysis: DominatorTreeAnalysis on test1_g +; CHECK: Invalidating all non-preserved analyses for: (test1_f, test1_g, test1_h) +; CHECK: Invalidating all non-preserved analyses for: test1_f +; CHECK: Invalidating analysis: DominatorTreeAnalysis on test1_f +; CHECK: Invalidating all non-preserved analyses for: test1_g +; CHECK: Invalidating analysis: DominatorTreeAnalysis on test1_g +; CHECK: Invalidating all non-preserved analyses for: test1_h +; CHECK-NOT: Invalidating anaylsis: +; CHECK: Running analysis: DominatorTreeAnalysis on test1_h +; CHECK: Invalidating all non-preserved analyses for: (test1_g, test1_h) +; CHECK: Invalidating all non-preserved analyses for: test1_h +; CHECK: Invalidating analysis: DominatorTreeAnalysis on test1_h + +; An external function used to control branches. +declare i1 @flag() +; CHECK-LABEL: declare i1 @flag() + +; The utility function with interesting control flow that gets inlined below to +; perturb the dominator tree. +define internal void @callee() { +entry: + %ptr = alloca i8 + %flag = call i1 @flag() + br i1 %flag, label %then, label %else + +then: + store volatile i8 42, i8* %ptr + br label %return + +else: + store volatile i8 -42, i8* %ptr + br label %return + +return: + ret void +} + +; The 'test1_' prefixed functions work to carefully test that incrementally +; reducing an SCC in the inliner cannot accidentially leave stale function +; analysis results due to failing to invalidate them for all the functions. + +; The inliner visits this last function. It can't actually break any cycles +; here, but because we visit this function we compute fresh analyses for it. +; These analyses are then invalidated when we inline callee disrupting the +; CFG, and it is important that they be freed. +define void @test1_h() { +; CHECK-LABEL: define void @test1_h() +entry: + call void @test1_g() +; CHECK: call void @test1_g() + + ; Pull interesting CFG into this function. + call void @callee() +; CHECK-NOT: call void @callee() + + ret void +; CHECK: ret void +} + +; We visit this function second and here we inline the edge to 'test1_f' +; separating it into its own SCC. The current SCC is now just 'test1_g' and +; 'test1_h'. +define void @test1_g() { +; CHECK-LABEL: define void @test1_g() +entry: + ; This edge gets inlined away. + call void @test1_f() +; CHECK-NOT: call void @test1_f() +; CHECK: call void @test1_g() + + ; We force this edge to survive inlining. + call void @test1_h() noinline +; CHECK: call void @test1_h() + + ; Pull interesting CFG into this function. + call void @callee() +; CHECK-NOT: call void @callee() + + ret void +; CHECK: ret void +} + +; We visit this function first in the inliner, and while we inline callee +; perturbing the CFG, we don't inline anything else and the SCC structure +; remains in tact. +define void @test1_f() { +; CHECK-LABEL: define void @test1_f() +entry: + ; We force this edge to survive inlining. + call void @test1_g() noinline +; CHECK: call void @test1_g() + + ; Pull interesting CFG into this function. + call void @callee() +; CHECK-NOT: call void @callee() + + ret void +; CHECK: ret void +} diff --git a/test/Transforms/Inline/cgscc-invalidate.ll b/test/Transforms/Inline/cgscc-invalidate.ll index 60315cda771d..69d84f65e251 100644 --- a/test/Transforms/Inline/cgscc-invalidate.ll +++ b/test/Transforms/Inline/cgscc-invalidate.ll @@ -65,15 +65,15 @@ entry: ; The 'test3_' prefixed functions test the scenario of not inlining preserving ; dominators after splitting an SCC into two smaller SCCs. -; The first function gets visited first and we end up inlining everything we -; can into this routine. That splits test3_g into a separate SCC that is enqued -; for later processing. -define void @test3_f() { -; CHECK-LABEL: define void @test3_f() +; This function ends up split into a separate SCC, which can cause its analyses +; to become stale if the splitting doesn't properly invalidate things. Also, as +; a consequence of being split out, test3_f is too large to inline by the time +; we get here. +define void @test3_g() { +; CHECK-LABEL: define void @test3_g() entry: - ; Create the first edge in the SCC cycle. - call void @test3_g() -; CHECK-NOT: @test3_g() + ; Create the second edge in the SCC cycle. + call void @test3_f() ; CHECK: call void @test3_f() ; Pull interesting CFG into this function. @@ -84,15 +84,15 @@ entry: ; CHECK: ret void } -; This function ends up split into a separate SCC, which can cause its analyses -; to become stale if the splitting doesn't properly invalidate things. Also, as -; a consequence of being split out, test3_f is too large to inline by the time -; we get here. -define void @test3_g() { -; CHECK-LABEL: define void @test3_g() +; The second function gets visited first and we end up inlining everything we +; can into this routine. That splits test3_g into a separate SCC that is enqued +; for later processing. +define void @test3_f() { +; CHECK-LABEL: define void @test3_f() entry: - ; Create the second edge in the SCC cycle. - call void @test3_f() + ; Create the first edge in the SCC cycle. + call void @test3_g() +; CHECK-NOT: @test3_g() ; CHECK: call void @test3_f() ; Pull interesting CFG into this function. diff --git a/test/Transforms/Inline/clear-analyses.ll b/test/Transforms/Inline/clear-analyses.ll new file mode 100644 index 000000000000..4b1d37ca29a9 --- /dev/null +++ b/test/Transforms/Inline/clear-analyses.ll @@ -0,0 +1,32 @@ +; Test that when a pass like correlated-propagation populates an analysis such +; as LVI with references back into the IR of a function that the inliner will +; delete, this doesn't crash or go awry despite the inliner clearing the analyses +; separately from when it deletes the function. +; +; RUN: opt -debug-pass-manager -S < %s 2>&1 \ +; RUN: -passes='cgscc(inline,function(correlated-propagation))' \ +; RUN: | FileCheck %s +; +; CHECK-LABEL: Starting llvm::Module pass manager run. +; CHECK: Running pass: InlinerPass on (callee) +; CHECK: Running pass: CorrelatedValuePropagationPass on callee +; CHECK: Running analysis: LazyValueAnalysis +; CHECK: Running pass: InlinerPass on (caller) +; CHECK: Clearing all analysis results for: callee +; CHECK: Running pass: CorrelatedValuePropagationPass on caller +; CHECK: Running analysis: LazyValueAnalysis + +define internal i32 @callee(i32 %x) { +; CHECK-NOT: @callee +entry: + ret i32 %x +} + +define i32 @caller(i32 %x) { +; CHECK-LABEL: define i32 @caller +entry: + %call = call i32 @callee(i32 %x) +; CHECK-NOT: call + ret i32 %call +; CHECK: ret i32 %x +} diff --git a/test/Transforms/Inline/crash-lifetime-marker.ll b/test/Transforms/Inline/crash-lifetime-marker.ll index e7a594cdb5e4..7196616521e9 100644 --- a/test/Transforms/Inline/crash-lifetime-marker.ll +++ b/test/Transforms/Inline/crash-lifetime-marker.ll @@ -15,9 +15,9 @@ define i32 @callee1(i32 %count) { ; CHECK-LABEL: define i32 @caller1( ; CHECK: [[ALLOCA:%[a-z0-9\.]+]] = alloca i8 -; CHECK-NOT: call void @llvm.lifetime.start( +; CHECK-NOT: call void @llvm.lifetime.start.p0i8( ; CHECK: call i32 @callee2(i8* [[ALLOCA]]) -; CHECK-NOT: call void @llvm.lifetime.end( +; CHECK-NOT: call void @llvm.lifetime.end.p0i8( define i32 @caller1(i32 %count) { %call0 = call i32 @callee1(i32 0) diff --git a/test/Transforms/Inline/function-count-update-2.ll b/test/Transforms/Inline/function-count-update-2.ll new file mode 100644 index 000000000000..702fa6292c29 --- /dev/null +++ b/test/Transforms/Inline/function-count-update-2.ll @@ -0,0 +1,33 @@ +; RUN: opt < %s -passes='require<profile-summary>,cgscc(inline)' -S | FileCheck %s + +; This tests that the function count of a callee gets correctly updated after it +; has been inlined into a two callsites. + +; CHECK: @callee() !prof [[COUNT:![0-9]+]] +define i32 @callee() !prof !1 { + ret i32 0 +} + +define i32 @caller1() !prof !2 { +; CHECK-LABEL: @caller1 +; CHECK-NOT: callee +; CHECK: ret + %i = call i32 @callee() + ret i32 %i +} + +define i32 @caller2() !prof !3 { +; CHECK-LABEL: @caller2 +; CHECK-NOT: callee +; CHECK: ret + %i = call i32 @callee() + ret i32 %i +} + +!llvm.module.flags = !{!0} +; CHECK: [[COUNT]] = !{!"function_entry_count", i64 0} +!0 = !{i32 1, !"MaxFunctionCount", i32 1000} +!1 = !{!"function_entry_count", i64 1000} +!2 = !{!"function_entry_count", i64 600} +!3 = !{!"function_entry_count", i64 400} + diff --git a/test/Transforms/Inline/function-count-update-3.ll b/test/Transforms/Inline/function-count-update-3.ll new file mode 100644 index 000000000000..215d64175faf --- /dev/null +++ b/test/Transforms/Inline/function-count-update-3.ll @@ -0,0 +1,78 @@ +; RUN: opt < %s -passes='require<profile-summary>,cgscc(inline)' -S -inline-threshold=50 | FileCheck %s + +; This tests that the function count of a function gets properly scaled after +; inlining a call chain leading to the function. +; Function a calls c with count 200 (C1) +; Function c calls e with count 250 (C2) +; Entry count of e is 500 (C3) +; Entry count of c is 500 (C4) +; Function b calls c with count 300 (C5) +; c->e inlining does not happen since the cost exceeds threshold. +; c then inlined into a. +; e now gets inlined into a (through c) since the branch condition in e is now +; known and hence the cost gets reduced. +; Estimated count of a->e callsite = C2 * (C1 / C4) +; Estimated count of a->e callsite = 250 * (200 / 500) = 100 +; Remaining count of e = C3 - 100 = 500 - 100 = 400 +; Remaining count of c = C4 - C1 - C5 = 500 - 200 - 300 = 0 + +@data = external global i32 + +define i32 @a(i32 %a1) !prof !1 { + %a2 = call i32 @c(i32 %a1, i32 1) + ret i32 %a2 +} + +define i32 @b(i32 %b1) !prof !2 { + %b2 = call i32 @c(i32 %b1, i32 %b1) + ret i32 %b2 +} + +declare void @ext(); + +; CHECK: @c(i32 %c1, i32 %c100) !prof [[COUNT1:![0-9]+]] +define i32 @c(i32 %c1, i32 %c100) !prof !3 { + call void @ext() + %cond = icmp sle i32 %c1, 1 + br i1 %cond, label %cond_true, label %cond_false + +cond_false: + ret i32 0 + +cond_true: + %c11 = call i32 @e(i32 %c100) + ret i32 %c11 +} + + +; CHECK: @e(i32 %c1) !prof [[COUNT2:![0-9]+]] +define i32 @e(i32 %c1) !prof !4 { + %cond = icmp sle i32 %c1, 1 + br i1 %cond, label %cond_true, label %cond_false + +cond_false: + call void @ext() + %c2 = load i32, i32* @data, align 4 + %c3 = add i32 %c1, %c2 + %c4 = mul i32 %c3, %c2 + %c5 = add i32 %c4, %c2 + %c6 = mul i32 %c5, %c2 + %c7 = add i32 %c6, %c2 + %c8 = mul i32 %c7, %c2 + %c9 = add i32 %c8, %c2 + %c10 = mul i32 %c9, %c2 + ret i32 %c10 + +cond_true: + ret i32 0 +} + +!llvm.module.flags = !{!0} +; CHECK: [[COUNT1]] = !{!"function_entry_count", i64 0} +; CHECK: [[COUNT2]] = !{!"function_entry_count", i64 400} +!0 = !{i32 1, !"MaxFunctionCount", i32 5000} +!1 = !{!"function_entry_count", i64 200} +!2 = !{!"function_entry_count", i64 300} +!3 = !{!"function_entry_count", i64 500} +!4 = !{!"function_entry_count", i64 500} + diff --git a/test/Transforms/Inline/function-count-update.ll b/test/Transforms/Inline/function-count-update.ll new file mode 100644 index 000000000000..094ad5a2ae67 --- /dev/null +++ b/test/Transforms/Inline/function-count-update.ll @@ -0,0 +1,50 @@ +; RUN: opt < %s -passes='require<profile-summary>,cgscc(inline)' -S | FileCheck %s + +; This tests that the function count of two callees get correctly updated after +; they have been inlined into two back-to-back callsites in a single basic block +; in the caller. The callees have the alwaysinline attribute and so they get +; inlined both with the regular inliner pass and the always inline pass. In +; both cases, the new count of each callee is the original count minus callsite +; count which is 200 (since the caller's entry count is 400 and the block +; containing the calls have a relative block frequency of 0.5). + +; CHECK: @callee1(i32 %n) #0 !prof [[COUNT1:![0-9]+]] +define i32 @callee1(i32 %n) #0 !prof !1 { + %cond = icmp sle i32 %n, 10 + br i1 %cond, label %cond_true, label %cond_false + +cond_true: + %r1 = add i32 %n, 1 + ret i32 %r1 +cond_false: + %r2 = add i32 %n, 2 + ret i32 %r2 +} + +; CHECK: @callee2(i32 %n) #0 !prof [[COUNT2:![0-9]+]] +define i32 @callee2(i32 %n) #0 !prof !2 { + %r1 = add i32 %n, 1 + ret i32 %r1 +} + +define i32 @caller(i32 %n) !prof !3 { + %cond = icmp sle i32 %n, 100 + br i1 %cond, label %cond_true, label %cond_false + +cond_true: + %i = call i32 @callee1(i32 %n) + %j = call i32 @callee2(i32 %i) + ret i32 %j +cond_false: + ret i32 0 +} + +!llvm.module.flags = !{!0} +; CHECK: [[COUNT1]] = !{!"function_entry_count", i64 800} +; CHECK: [[COUNT2]] = !{!"function_entry_count", i64 1800} +!0 = !{i32 1, !"MaxFunctionCount", i32 1000} +!1 = !{!"function_entry_count", i64 1000} +!2 = !{!"function_entry_count", i64 2000} +!3 = !{!"function_entry_count", i64 400} +attributes #0 = { alwaysinline } + diff --git a/test/Transforms/Inline/inline-cold-callee.ll b/test/Transforms/Inline/inline-cold-callee.ll index 153f446c5c2e..404c537b297f 100644 --- a/test/Transforms/Inline/inline-cold-callee.ll +++ b/test/Transforms/Inline/inline-cold-callee.ll @@ -1,5 +1,4 @@ ; RUN: opt < %s -inline -inlinecold-threshold=0 -S | FileCheck %s -; RUN: opt < %s -passes='require<profile-summary>,cgscc(inline)' -inlinecold-threshold=0 -S | FileCheck %s ; This tests that a cold callee gets the (lower) inlinecold-threshold even without ; Cold hint and does not get inlined because the cost exceeds the inlinecold-threshold. diff --git a/test/Transforms/Inline/inline-cold-callsite.ll b/test/Transforms/Inline/inline-cold-callsite.ll new file mode 100644 index 000000000000..26ea8e50eaf1 --- /dev/null +++ b/test/Transforms/Inline/inline-cold-callsite.ll @@ -0,0 +1,54 @@ +; RUN: opt < %s -passes='require<profile-summary>,cgscc(inline)' -inline-threshold=100 -inline-cold-callsite-threshold=0 -S | FileCheck %s + +; This tests that a cold callsite gets the inline-cold-callsite-threshold +; and does not get inlined. Another callsite to an identical callee that +; is not cold gets inlined because cost is below the inline-threshold. + +define i32 @callee1(i32 %x) !prof !21 { + %x1 = add i32 %x, 1 + %x2 = add i32 %x1, 1 + %x3 = add i32 %x2, 1 + call void @extern() + ret i32 %x3 +} + +define i32 @caller(i32 %n) !prof !22 { +; CHECK-LABEL: @caller( + %cond = icmp sle i32 %n, 100 + br i1 %cond, label %cond_true, label %cond_false, !prof !0 + +cond_true: +; CHECK-LABEL: cond_true: +; CHECK-NOT: call i32 @callee1 +; CHECK: ret i32 %x3.i + %i = call i32 @callee1(i32 %n) + ret i32 %i +cond_false: +; CHECK-LABEL: cond_false: +; CHECK: call i32 @callee1 +; CHECK: ret i32 %j + %j = call i32 @callee1(i32 %n) + ret i32 %j +} +declare void @extern() + +!0 = !{!"branch_weights", i32 200, i32 1} + +!llvm.module.flags = !{!1} +!21 = !{!"function_entry_count", i64 200} +!22 = !{!"function_entry_count", i64 200} + +!1 = !{i32 1, !"ProfileSummary", !2} +!2 = !{!3, !4, !5, !6, !7, !8, !9, !10} +!3 = !{!"ProfileFormat", !"InstrProf"} +!4 = !{!"TotalCount", i64 10000} +!5 = !{!"MaxCount", i64 1000} +!6 = !{!"MaxInternalCount", i64 1} +!7 = !{!"MaxFunctionCount", i64 1000} +!8 = !{!"NumCounts", i64 3} +!9 = !{!"NumFunctions", i64 3} +!10 = !{!"DetailedSummary", !11} +!11 = !{!12, !13, !14} +!12 = !{i32 10000, i64 1000, i32 1} +!13 = !{i32 999000, i64 1000, i32 1} +!14 = !{i32 999999, i64 1, i32 2} diff --git a/test/Transforms/Inline/inline-hot-callsite-2.ll b/test/Transforms/Inline/inline-hot-callsite-2.ll new file mode 100644 index 000000000000..ccfe2f0b5dec --- /dev/null +++ b/test/Transforms/Inline/inline-hot-callsite-2.ll @@ -0,0 +1,56 @@ +; RUN: opt < %s -passes='require<profile-summary>,cgscc(inline)' -inline-threshold=0 -inlinehint-threshold=0 -hot-callsite-threshold=100 -S | FileCheck %s + +; This tests that a callsite which is determined to be hot based on the caller's +; entry count and the callsite block frequency gets the hot-callsite-threshold. +; Another callsite with the same callee that is not hot does not get inlined +; because cost exceeds the inline-threshold. inlinthint-threshold is set to 0 +; to ensure callee's hotness is not used to boost the threshold. + +define i32 @callee1(i32 %x) !prof !21 { + %x1 = add i32 %x, 1 + %x2 = add i32 %x1, 1 + %x3 = add i32 %x2, 1 + call void @extern() + ret i32 %x3 +} + +define i32 @caller(i32 %n) !prof !22 { +; CHECK-LABEL: @caller( + %cond = icmp sle i32 %n, 100 + br i1 %cond, label %cond_true, label %cond_false, !prof !0 + +cond_true: +; CHECK-LABEL: cond_true: +; CHECK-NOT: call i32 @callee1 +; CHECK: ret i32 %x3.i + %i = call i32 @callee1(i32 %n) + ret i32 %i +cond_false: +; CHECK-LABEL: cond_false: +; CHECK: call i32 @callee1 +; CHECK: ret i32 %j + %j = call i32 @callee1(i32 %n) + ret i32 %j +} +declare void @extern() + +!0 = !{!"branch_weights", i32 64, i32 4} + +!llvm.module.flags = !{!1} +!21 = !{!"function_entry_count", i64 200} +!22 = !{!"function_entry_count", i64 200} + +!1 = !{i32 1, !"ProfileSummary", !2} +!2 = !{!3, !4, !5, !6, !7, !8, !9, !10} +!3 = !{!"ProfileFormat", !"InstrProf"} +!4 = !{!"TotalCount", i64 10000} +!5 = !{!"MaxCount", i64 1000} +!6 = !{!"MaxInternalCount", i64 1} +!7 = !{!"MaxFunctionCount", i64 1000} +!8 = !{!"NumCounts", i64 3} +!9 = !{!"NumFunctions", i64 3} +!10 = !{!"DetailedSummary", !11} +!11 = !{!12, !13, !14} +!12 = !{i32 10000, i64 100, i32 1} +!13 = !{i32 999000, i64 100, i32 1} +!14 = !{i32 999999, i64 1, i32 2} diff --git a/test/Transforms/Inline/inline-hot-callsite.ll b/test/Transforms/Inline/inline-hot-callsite.ll index bdd7175b3eea..ebf4030d3d10 100644 --- a/test/Transforms/Inline/inline-hot-callsite.ll +++ b/test/Transforms/Inline/inline-hot-callsite.ll @@ -41,7 +41,7 @@ declare void @extern() !1 = !{i32 1, !"ProfileSummary", !2} !2 = !{!3, !4, !5, !6, !7, !8, !9, !10} -!3 = !{!"ProfileFormat", !"InstrProf"} +!3 = !{!"ProfileFormat", !"SampleProfile"} !4 = !{!"TotalCount", i64 10000} !5 = !{!"MaxCount", i64 1000} !6 = !{!"MaxInternalCount", i64 1} diff --git a/test/Transforms/Inline/inline_stats.ll b/test/Transforms/Inline/inline_stats.ll index cf0d43e9215b..bc005b6afd51 100644 --- a/test/Transforms/Inline/inline_stats.ll +++ b/test/Transforms/Inline/inline_stats.ll @@ -36,9 +36,12 @@ define void @internal3() { ret void } +declare void @external_decl() + define void @external1() alwaysinline !thinlto_src_module !0 { call fastcc void @internal2() call fastcc void @external2(); + call void @external_decl(); ret void } diff --git a/test/Transforms/Inline/internal-scc-members.ll b/test/Transforms/Inline/internal-scc-members.ll new file mode 100644 index 000000000000..258ce00744c5 --- /dev/null +++ b/test/Transforms/Inline/internal-scc-members.ll @@ -0,0 +1,31 @@ +; Test that the inliner can handle deleting functions within an SCC while still +; processing the calls in that SCC. +; +; RUN: opt < %s -S -inline | FileCheck %s +; RUN: opt < %s -S -passes=inline | FileCheck %s + +; CHECK-LABEL: define internal void @test1_scc0() +; CHECK-NOT: call +; CHECK: call void @test1_scc0() +; CHECK-NOT: call +; CHECK: ret +define internal void @test1_scc0() { +entry: + call void @test1_scc1() + ret void +} + +; CHECK-NOT: @test1_scc1 +define internal void @test1_scc1() { +entry: + call void @test1_scc0() + ret void +} + +; CHECK-LABEL: define void @test1() +; CHECK: call void @test1_scc0() +define void @test1() { +entry: + call void @test1_scc0() noinline + ret void +} diff --git a/test/Transforms/Inline/last-call-bonus.ll b/test/Transforms/Inline/last-call-bonus.ll new file mode 100644 index 000000000000..0088d316848f --- /dev/null +++ b/test/Transforms/Inline/last-call-bonus.ll @@ -0,0 +1,52 @@ +; The goal of this test is checking if LastCallToStaticBonus is applied +; correctly while deciding inline deferral. For the test code below, when +; inliner evaluates the callsite of bar->baz, it checks if inlining of bar->baz +; prevents ininling of foo->bar, even when foo->bar inlining is more beneficial +; than bar->baz inlining. As LastCallToStaticBonus has a massive value, and +; both baz and bar has only one caller, the cost of foo->bar inlining and +; bar->baz inlining should be non-trivial for inliner to compute that bar->baz +; inlining can actaully prevent foo->bar inlining. To make the cost of these +; callsites big enough, loop unrolling pass with very high threshold is used to +; preprocess the test. + +; RUN: opt < %s -loop-unroll -inline -unroll-threshold=15000 -inline-threshold=250 -S | FileCheck %s +; CHECK-LABEL: define internal i32 @bar() + +define internal i32 @baz() { +entry: + br label %bb1 + +bb1: + %ind = phi i32 [ 0, %entry ], [ %inc, %bb1 ] + call void @extern() + %inc = add nsw i32 %ind, 1 + %cmp = icmp sgt i32 %inc, 510 + br i1 %cmp, label %ret, label %bb1 + +ret: + ret i32 0 +} + +define internal i32 @bar() { +entry: + br label %bb1 + +bb1: + %ind = phi i32 [ 0, %entry ], [ %inc, %bb1 ] + call void @extern() + %inc = add nsw i32 %ind, 1 + %cmp = icmp sgt i32 %inc, 510 + br i1 %cmp, label %ret, label %bb1 + +ret: + call i32 @baz() + ret i32 0 +} + +define i32 @foo() { +entry: + call i32 @bar() + ret i32 0 +} + +declare void @extern() diff --git a/test/Transforms/Inline/lifetime-no-datalayout.ll b/test/Transforms/Inline/lifetime-no-datalayout.ll index 0212e69d624a..5d1872c6a244 100644 --- a/test/Transforms/Inline/lifetime-no-datalayout.ll +++ b/test/Transforms/Inline/lifetime-no-datalayout.ll @@ -13,9 +13,9 @@ define void @helper() { define void @test() { ; CHECK-LABEL: @test( ; CHECK-NOT: lifetime -; CHECK: llvm.lifetime.start(i64 1 +; CHECK: llvm.lifetime.start.p0i8(i64 1 ; CHECK-NOT: lifetime -; CHECK: llvm.lifetime.end(i64 1 +; CHECK: llvm.lifetime.end.p0i8(i64 1 call void @helper() ; CHECK-NOT: lifetime ; CHECK: ret void diff --git a/test/Transforms/Inline/lifetime.ll b/test/Transforms/Inline/lifetime.ll index 4f415e58f1bf..c47091395fce 100644 --- a/test/Transforms/Inline/lifetime.ll +++ b/test/Transforms/Inline/lifetime.ll @@ -2,25 +2,25 @@ ; RUN: opt -passes='cgscc(inline)' -S < %s | FileCheck %s target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" -declare void @llvm.lifetime.start(i64, i8*) -declare void @llvm.lifetime.end(i64, i8*) +declare void @llvm.lifetime.start.p0i8(i64, i8*) +declare void @llvm.lifetime.end.p0i8(i64, i8*) define void @helper_both_markers() { %a = alloca i8 ; Size in llvm.lifetime.start / llvm.lifetime.end differs from ; allocation size. We should use the former. - call void @llvm.lifetime.start(i64 2, i8* %a) - call void @llvm.lifetime.end(i64 2, i8* %a) + call void @llvm.lifetime.start.p0i8(i64 2, i8* %a) + call void @llvm.lifetime.end.p0i8(i64 2, i8* %a) ret void } define void @test_both_markers() { ; CHECK-LABEL: @test_both_markers( -; CHECK: llvm.lifetime.start(i64 2 -; CHECK-NEXT: llvm.lifetime.end(i64 2 +; CHECK: llvm.lifetime.start.p0i8(i64 2 +; CHECK-NEXT: llvm.lifetime.end.p0i8(i64 2 call void @helper_both_markers() -; CHECK-NEXT: llvm.lifetime.start(i64 2 -; CHECK-NEXT: llvm.lifetime.end(i64 2 +; CHECK-NEXT: llvm.lifetime.start.p0i8(i64 2 +; CHECK-NEXT: llvm.lifetime.end.p0i8(i64 2 call void @helper_both_markers() ; CHECK-NEXT: ret void ret void @@ -41,14 +41,14 @@ define void @helper_no_markers() { define void @test_no_marker() { ; CHECK-LABEL: @test_no_marker( ; CHECK-NOT: lifetime -; CHECK: llvm.lifetime.start(i64 1 +; CHECK: llvm.lifetime.start.p0i8(i64 1 ; CHECK-NOT: lifetime -; CHECK: llvm.lifetime.end(i64 1 +; CHECK: llvm.lifetime.end.p0i8(i64 1 call void @helper_no_markers() ; CHECK-NOT: lifetime -; CHECK: llvm.lifetime.start(i64 1 +; CHECK: llvm.lifetime.start.p0i8(i64 1 ; CHECK-NOT: lifetime -; CHECK: llvm.lifetime.end(i64 1 +; CHECK: llvm.lifetime.end.p0i8(i64 1 call void @helper_no_markers() ; CHECK-NOT: lifetime ; CHECK: ret void @@ -58,23 +58,23 @@ define void @test_no_marker() { define void @helper_two_casts() { %a = alloca i32 %b = bitcast i32* %a to i8* - call void @llvm.lifetime.start(i64 4, i8* %b) + call void @llvm.lifetime.start.p0i8(i64 4, i8* %b) %c = bitcast i32* %a to i8* - call void @llvm.lifetime.end(i64 4, i8* %c) + call void @llvm.lifetime.end.p0i8(i64 4, i8* %c) ret void } define void @test_two_casts() { ; CHECK-LABEL: @test_two_casts( ; CHECK-NOT: lifetime -; CHECK: llvm.lifetime.start(i64 4 +; CHECK: llvm.lifetime.start.p0i8(i64 4 ; CHECK-NOT: lifetime -; CHECK: llvm.lifetime.end(i64 4 +; CHECK: llvm.lifetime.end.p0i8(i64 4 call void @helper_two_casts() ; CHECK-NOT: lifetime -; CHECK: llvm.lifetime.start(i64 4 +; CHECK: llvm.lifetime.start.p0i8(i64 4 ; CHECK-NOT: lifetime -; CHECK: llvm.lifetime.end(i64 4 +; CHECK: llvm.lifetime.end.p0i8(i64 4 call void @helper_two_casts() ; CHECK-NOT: lifetime ; CHECK: ret void @@ -91,9 +91,9 @@ define void @helper_arrays_alloca() { define void @test_arrays_alloca() { ; CHECK-LABEL: @test_arrays_alloca( ; CHECK-NOT: lifetime -; CHECK: llvm.lifetime.start(i64 40, +; CHECK: llvm.lifetime.start.p0i8(i64 40, ; CHECK-NOT: lifetime -; CHECK: llvm.lifetime.end(i64 40, +; CHECK: llvm.lifetime.end.p0i8(i64 40, call void @helper_arrays_alloca() ; CHECK-NOT: lifetime ; CHECK: ret void diff --git a/test/Transforms/Inline/monster_scc.ll b/test/Transforms/Inline/monster_scc.ll new file mode 100644 index 000000000000..0f8f1f21c8b5 --- /dev/null +++ b/test/Transforms/Inline/monster_scc.ll @@ -0,0 +1,460 @@ +; This test creates a monster SCC with a very pernicious call graph. It builds +; a cycle of cross-connected pairs of functions with interesting inlining +; decisions throughout, but ultimately trivial code complexity. +; +; Typically, a greedy approach to inlining works well for bottom-up inliners +; such as LLVM's. However, there is no way to be bottom-up over an SCC: it's +; a cycle! Greedily inlining as much as possible into each function of this +; *SCC* will have the disasterous effect of inlining all N-1 functions into the +; first one visited, N-2 functions into the second one visited, N-3 into the +; third, and so on. This is because until inlining occurs, each function in +; isolation appears to be an excellent inline candidate. +; +; Note that the exact number of calls in each function doesn't really matter. +; It is mostly a function of cost thresholds and visit order. Because this is an +; SCC there is no "right" or "wrong" answer here as long as no function blows up +; to be *huge*. The specific concerning pattern is if one or more functions get +; more than 16 calls in them. +; +; This test is extracted from the following C++ program compiled with Clang. +; The IR is simplified with SROA, instcombine, and simplify-cfg. Then C++ +; linkage stuff, attributes, target specific things, metadata and comments were +; removed. The order of the fuctions is also made more predictable than Clang's +; output order. +; +; void g(int); +; +; template <bool K, int N> void f(bool *B, bool *E) { +; if (K) +; g(N); +; if (B == E) +; return; +; if (*B) +; f<true, N + 1>(B + 1, E); +; else +; f<false, N + 1>(B + 1, E); +; } +; template <> void f<false, MAX>(bool *B, bool *E) { return f<false, 0>(B, E); } +; template <> void f<true, MAX>(bool *B, bool *E) { return f<true, 0>(B, E); } +; +; void test(bool *B, bool *E) { f<false, 0>(B, E); } +; +; RUN: opt -S < %s -inline -inline-threshold=150 | FileCheck %s --check-prefixes=CHECK,OLD +; RUN: opt -S < %s -passes=inline -inline-threshold=150 | FileCheck %s --check-prefixes=CHECK,NEW + +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" + +declare void @_Z1gi(i32) + +; CHECK-LABEL: define void @_Z1fILb0ELi0EEvPbS0_( +; OLD-NOT: call +; OLD: call void @_Z1gi( +; OLD-NOT: call +; OLD: call void @_Z1fILb1ELi2EEvPbS0_( +; OLD-NOT: call +; OLD: call void @_Z1fILb0ELi2EEvPbS0_( +; OLD-NOT: call +; OLD: call void @_Z1fILb0ELi1EEvPbS0_( +; OLD-NOT: call +; NEW-NOT: call +; NEW: call void @_Z1gi( +; NEW-NOT: call +; NEW: call void @_Z1fILb1ELi2EEvPbS0_( +; NEW-NOT: call +; NEW: call void @_Z1fILb0ELi2EEvPbS0_( +; NEW-NOT: call +; NEW: call void @_Z1fILb1ELi2EEvPbS0_( +; NEW-NOT: call +; NEW: call void @_Z1fILb0ELi2EEvPbS0_( +; NEW-NOT: call +define void @_Z1fILb0ELi0EEvPbS0_(i8* %B, i8* %E) { +entry: + %cmp = icmp eq i8* %B, %E + br i1 %cmp, label %if.end3, label %if.end + +if.end: + %0 = load i8, i8* %B, align 1 + %tobool = icmp eq i8 %0, 0 + %add.ptr2 = getelementptr inbounds i8, i8* %B, i64 1 + br i1 %tobool, label %if.else, label %if.then1 + +if.then1: + call void @_Z1fILb1ELi1EEvPbS0_(i8* %add.ptr2, i8* %E) + br label %if.end3 + +if.else: + call void @_Z1fILb0ELi1EEvPbS0_(i8* %add.ptr2, i8* %E) + br label %if.end3 + +if.end3: + ret void +} + +; CHECK-LABEL: define void @_Z1fILb1ELi0EEvPbS0_( +; OLD-NOT: call +; OLD: call void @_Z1gi( +; OLD-NOT: call +; OLD: call void @_Z1gi( +; OLD-NOT: call +; OLD: call void @_Z1fILb1ELi2EEvPbS0_( +; OLD-NOT: call +; OLD: call void @_Z1fILb0ELi2EEvPbS0_( +; OLD-NOT: call +; OLD: call void @_Z1fILb0ELi1EEvPbS0_( +; OLD-NOT: call +; NEW-NOT: call +; NEW: call void @_Z1gi( +; NEW-NOT: call +; NEW: call void @_Z1fILb1ELi1EEvPbS0_( +; NEW-NOT: call +; NEW: call void @_Z1fILb1ELi2EEvPbS0_( +; NEW-NOT: call +; NEW: call void @_Z1fILb0ELi2EEvPbS0_( +; NEW-NOT: call +define void @_Z1fILb1ELi0EEvPbS0_(i8* %B, i8* %E) { +entry: + call void @_Z1gi(i32 0) + %cmp = icmp eq i8* %B, %E + br i1 %cmp, label %if.end3, label %if.end + +if.end: + %0 = load i8, i8* %B, align 1 + %tobool = icmp eq i8 %0, 0 + %add.ptr2 = getelementptr inbounds i8, i8* %B, i64 1 + br i1 %tobool, label %if.else, label %if.then1 + +if.then1: + call void @_Z1fILb1ELi1EEvPbS0_(i8* %add.ptr2, i8* %E) + br label %if.end3 + +if.else: + call void @_Z1fILb0ELi1EEvPbS0_(i8* %add.ptr2, i8* %E) + br label %if.end3 + +if.end3: + ret void +} + +; CHECK-LABEL: define void @_Z1fILb0ELi1EEvPbS0_( +; OLD-NOT: call +; OLD: call void @_Z1gi( +; OLD-NOT: call +; OLD: call void @_Z1gi( +; OLD-NOT: call +; OLD: call void @_Z1fILb1ELi0EEvPbS0_( +; OLD-NOT: call +; OLD: call void @_Z1fILb0ELi0EEvPbS0_( +; OLD-NOT: call +; OLD: call void @_Z1fILb1ELi0EEvPbS0_( +; OLD-NOT: call +; OLD: call void @_Z1fILb0ELi0EEvPbS0_( +; OLD-NOT: call +; OLD: call void @_Z1fILb0ELi2EEvPbS0_( +; OLD-NOT: call +; NEW-NOT: call +; NEW: call void @_Z1fILb1ELi2EEvPbS0_( +; NEW-NOT: call +; NEW: call void @_Z1gi( +; NEW-NOT: call +; NEW: call void @_Z1fILb1ELi0EEvPbS0_( +; NEW-NOT: call +; NEW: call void @_Z1fILb0ELi0EEvPbS0_( +; NEW-NOT: call +; NEW: call void @_Z1fILb0ELi3EEvPbS0_( +; NEW-NOT: call +define void @_Z1fILb0ELi1EEvPbS0_(i8* %B, i8* %E) { +entry: + %cmp = icmp eq i8* %B, %E + br i1 %cmp, label %if.end3, label %if.end + +if.end: + %0 = load i8, i8* %B, align 1 + %tobool = icmp eq i8 %0, 0 + %add.ptr2 = getelementptr inbounds i8, i8* %B, i64 1 + br i1 %tobool, label %if.else, label %if.then1 + +if.then1: + call void @_Z1fILb1ELi2EEvPbS0_(i8* %add.ptr2, i8* %E) + br label %if.end3 + +if.else: + call void @_Z1fILb0ELi2EEvPbS0_(i8* %add.ptr2, i8* %E) + br label %if.end3 + +if.end3: + ret void +} + +; CHECK-LABEL: define void @_Z1fILb1ELi1EEvPbS0_( +; OLD-NOT: call +; OLD: call void @_Z1gi( +; OLD-NOT: call +; OLD: call void @_Z1fILb1ELi2EEvPbS0_( +; OLD-NOT: call +; OLD: call void @_Z1fILb0ELi2EEvPbS0_( +; OLD-NOT: call +; NEW-NOT: call +; NEW: call void @_Z1gi( +; NEW-NOT: call +; NEW: call void @_Z1gi( +; NEW-NOT: call +; NEW: call void @_Z1gi( +; NEW-NOT: call +; NEW: call void @_Z1fILb1ELi0EEvPbS0_( +; NEW-NOT: call +; NEW: call void @_Z1fILb0ELi0EEvPbS0_( +; NEW-NOT: call +; NEW: call void @_Z1fILb0ELi3EEvPbS0_( +; NEW-NOT: call +; NEW: call void @_Z1gi( +; NEW-NOT: call +; NEW: call void @_Z1fILb1ELi0EEvPbS0_( +; NEW-NOT: call +; NEW: call void @_Z1fILb0ELi0EEvPbS0_( +; NEW-NOT: call +; NEW: call void @_Z1fILb0ELi3EEvPbS0_( +; NEW-NOT: call +define void @_Z1fILb1ELi1EEvPbS0_(i8* %B, i8* %E) { +entry: + call void @_Z1gi(i32 1) + %cmp = icmp eq i8* %B, %E +; CHECK-NOT: call + br i1 %cmp, label %if.end3, label %if.end + +if.end: + %0 = load i8, i8* %B, align 1 + %tobool = icmp eq i8 %0, 0 + %add.ptr2 = getelementptr inbounds i8, i8* %B, i64 1 + br i1 %tobool, label %if.else, label %if.then1 + +if.then1: + call void @_Z1fILb1ELi2EEvPbS0_(i8* %add.ptr2, i8* %E) + br label %if.end3 + +if.else: + call void @_Z1fILb0ELi2EEvPbS0_(i8* %add.ptr2, i8* %E) + br label %if.end3 + +if.end3: + ret void +} + +; CHECK-LABEL: define void @_Z1fILb0ELi2EEvPbS0_( +; OLD-NOT: call +; OLD: call void @_Z1gi( +; OLD-NOT: call +; OLD: call void @_Z1fILb1ELi0EEvPbS0_( +; OLD-NOT: call +; OLD: call void @_Z1fILb0ELi0EEvPbS0_( +; OLD-NOT: call +; OLD: call void @_Z1fILb1ELi0EEvPbS0_( +; OLD-NOT: call +; OLD: call void @_Z1fILb0ELi0EEvPbS0_( +; OLD-NOT: call +; NEW-NOT: call +; NEW: call void @_Z1gi( +; NEW-NOT: call +; NEW: call void @_Z1fILb1ELi0EEvPbS0_( +; NEW-NOT: call +; NEW: call void @_Z1fILb0ELi0EEvPbS0_( +; NEW-NOT: call +; NEW: call void @_Z1fILb1ELi4EEvPbS0_( +; NEW-NOT: call +; NEW: call void @_Z1fILb0ELi0EEvPbS0_( +; NEW-NOT: call +define void @_Z1fILb0ELi2EEvPbS0_(i8* %B, i8* %E) { +entry: + %cmp = icmp eq i8* %B, %E + br i1 %cmp, label %if.end3, label %if.end + +if.end: + %0 = load i8, i8* %B, align 1 + %tobool = icmp eq i8 %0, 0 + %add.ptr2 = getelementptr inbounds i8, i8* %B, i64 1 + br i1 %tobool, label %if.else, label %if.then1 + +if.then1: + call void @_Z1fILb1ELi3EEvPbS0_(i8* %add.ptr2, i8* %E) + br label %if.end3 + +if.else: + call void @_Z1fILb0ELi3EEvPbS0_(i8* %add.ptr2, i8* %E) + br label %if.end3 + +if.end3: + ret void +} + +; CHECK-LABEL: define void @_Z1fILb1ELi2EEvPbS0_( +; OLD-NOT: call +; OLD: call void @_Z1gi( +; OLD-NOT: call +; OLD: call void @_Z1gi( +; OLD-NOT: call +; OLD: call void @_Z1fILb1ELi0EEvPbS0_( +; OLD-NOT: call +; OLD: call void @_Z1fILb0ELi0EEvPbS0_( +; OLD-NOT: call +; OLD: call void @_Z1fILb1ELi0EEvPbS0_( +; OLD-NOT: call +; OLD: call void @_Z1fILb0ELi0EEvPbS0_( +; OLD-NOT: call +; NEW-NOT: call +; NEW: call void @_Z1gi( +; NEW-NOT: call +; NEW: call void @_Z1gi( +; NEW-NOT: call +; NEW: call void @_Z1gi( +; NEW-NOT: call +; NEW: call void @_Z1fILb1ELi1EEvPbS0_( +; NEW-NOT: call +; NEW: call void @_Z1fILb0ELi1EEvPbS0_( +; NEW-NOT: call +; NEW: call void @_Z1fILb0ELi0EEvPbS0_( +; NEW-NOT: call +; NEW: call void @_Z1gi( +; NEW-NOT: call +; NEW: call void @_Z1fILb1ELi1EEvPbS0_( +; NEW-NOT: call +; NEW: call void @_Z1fILb0ELi1EEvPbS0_( +; NEW-NOT: call +; NEW: call void @_Z1fILb0ELi0EEvPbS0_( +; NEW-NOT: call +define void @_Z1fILb1ELi2EEvPbS0_(i8* %B, i8* %E) { +entry: + call void @_Z1gi(i32 2) + %cmp = icmp eq i8* %B, %E + br i1 %cmp, label %if.end3, label %if.end + +if.end: + %0 = load i8, i8* %B, align 1 + %tobool = icmp eq i8 %0, 0 + %add.ptr2 = getelementptr inbounds i8, i8* %B, i64 1 + br i1 %tobool, label %if.else, label %if.then1 + +if.then1: + call void @_Z1fILb1ELi3EEvPbS0_(i8* %add.ptr2, i8* %E) + br label %if.end3 + +if.else: + call void @_Z1fILb0ELi3EEvPbS0_(i8* %add.ptr2, i8* %E) + br label %if.end3 + +if.end3: + ret void +} + +; CHECK-LABEL: define void @_Z1fILb0ELi3EEvPbS0_( +; OLD-NOT: call +; OLD: call void @_Z1fILb1ELi0EEvPbS0_( +; OLD-NOT: call +; OLD: call void @_Z1fILb0ELi0EEvPbS0_( +; OLD-NOT: call +; NEW-NOT: call +; NEW: call void @_Z1gi( +; NEW-NOT: call +; NEW: call void @_Z1fILb1ELi1EEvPbS0_( +; NEW-NOT: call +; NEW: call void @_Z1fILb0ELi1EEvPbS0_( +; NEW-NOT: call +; NEW: call void @_Z1fILb0ELi0EEvPbS0_( +; NEW-NOT: call +define void @_Z1fILb0ELi3EEvPbS0_(i8* %B, i8* %E) { +entry: + %cmp = icmp eq i8* %B, %E + br i1 %cmp, label %if.end3, label %if.end + +if.end: + %0 = load i8, i8* %B, align 1 + %tobool = icmp eq i8 %0, 0 + %add.ptr2 = getelementptr inbounds i8, i8* %B, i64 1 + br i1 %tobool, label %if.else, label %if.then1 + +if.then1: + call void @_Z1fILb1ELi4EEvPbS0_(i8* %add.ptr2, i8* %E) + br label %if.end3 + +if.else: + call void @_Z1fILb0ELi4EEvPbS0_(i8* %add.ptr2, i8* %E) + br label %if.end3 + +if.end3: + ret void +} + +; CHECK-LABEL: define void @_Z1fILb1ELi3EEvPbS0_( +; CHECK-NOT: call +; CHECK: call void @_Z1gi( +; CHECK-NOT: call +; CHECK: call void @_Z1fILb1ELi0EEvPbS0_( +; CHECK-NOT: call +; CHECK: call void @_Z1fILb0ELi0EEvPbS0_( +; CHECK-NOT: call +define void @_Z1fILb1ELi3EEvPbS0_(i8* %B, i8* %E) { +entry: + call void @_Z1gi(i32 3) + %cmp = icmp eq i8* %B, %E + br i1 %cmp, label %if.end3, label %if.end + +if.end: + %0 = load i8, i8* %B, align 1 + %tobool = icmp eq i8 %0, 0 + %add.ptr2 = getelementptr inbounds i8, i8* %B, i64 1 + br i1 %tobool, label %if.else, label %if.then1 + +if.then1: + call void @_Z1fILb1ELi4EEvPbS0_(i8* %add.ptr2, i8* %E) + br label %if.end3 + +if.else: + call void @_Z1fILb0ELi4EEvPbS0_(i8* %add.ptr2, i8* %E) + br label %if.end3 + +if.end3: + ret void +} + +; CHECK-LABEL: define void @_Z1fILb0ELi4EEvPbS0_( +; CHECK-NOT: call +; CHECK: call void @_Z1fILb0ELi0EEvPbS0_( +; CHECK-NOT: call +define void @_Z1fILb0ELi4EEvPbS0_(i8* %B, i8* %E) { +entry: + call void @_Z1fILb0ELi0EEvPbS0_(i8* %B, i8* %E) + ret void +} + +; CHECK-LABEL: define void @_Z1fILb1ELi4EEvPbS0_( +; OLD-NOT: call +; OLD: call void @_Z1fILb1ELi0EEvPbS0_( +; OLD-NOT: call +; NEW-NOT: call +; NEW: call void @_Z1gi( +; NEW-NOT: call +; NEW: call void @_Z1fILb1ELi1EEvPbS0_( +; NEW-NOT: call +; NEW: call void @_Z1fILb1ELi2EEvPbS0_( +; NEW-NOT: call +; NEW: call void @_Z1gi( +; NEW-NOT: call +; NEW: call void @_Z1fILb1ELi0EEvPbS0_( +; NEW-NOT: call +; NEW: call void @_Z1fILb0ELi0EEvPbS0_( +; NEW-NOT: call +; NEW: call void @_Z1fILb0ELi3EEvPbS0_( +; NEW-NOT: call +define void @_Z1fILb1ELi4EEvPbS0_(i8* %B, i8* %E) { +entry: + call void @_Z1fILb1ELi0EEvPbS0_(i8* %B, i8* %E) + ret void +} + +; CHECK-LABEL: define void @_Z4testPbS_( +; CHECK: call +; CHECK-NOT: call +define void @_Z4testPbS_(i8* %B, i8* %E) { +entry: + call void @_Z1fILb0ELi0EEvPbS0_(i8* %B, i8* %E) + ret void +} + diff --git a/test/Transforms/Inline/optimization-remarks-with-hotness.ll b/test/Transforms/Inline/optimization-remarks-with-hotness.ll index 9611a2dd1bd4..1d6d135bdda8 100644 --- a/test/Transforms/Inline/optimization-remarks-with-hotness.ll +++ b/test/Transforms/Inline/optimization-remarks-with-hotness.ll @@ -4,8 +4,7 @@ ; CHECK: foo should always be inlined (cost=always) (hotness: 30) ; CHECK: foo inlined into bar (hotness: 30) -; CHECK: foz should never be inlined (cost=never) (hotness: 30) -; CHECK: foz will not be inlined into bar (hotness: 30) +; CHECK: foz not inlined into bar because it should never be inlined (cost=never) (hotness: 30) ; Function Attrs: alwaysinline nounwind uwtable define i32 @foo() #0 !prof !1 { diff --git a/test/Transforms/Inline/optimization-remarks.ll b/test/Transforms/Inline/optimization-remarks.ll index 59cf08327350..61e270cff76c 100644 --- a/test/Transforms/Inline/optimization-remarks.ll +++ b/test/Transforms/Inline/optimization-remarks.ll @@ -9,8 +9,7 @@ ; NO_HOTNESS-NOT: fox will not be inlined into bar because its definition is unavailable ; CHECK: foo should always be inlined (cost=always) ; CHECK: foo inlined into bar -; CHECK: foz should never be inlined (cost=never) -; CHECK: foz will not be inlined into bar +; CHECK: foz not inlined into bar because it should never be inlined (cost=never) ; Function Attrs: alwaysinline nounwind uwtable define i32 @foo(i32 %x, i32 %y) #0 { diff --git a/test/Transforms/Inline/prof-update.ll b/test/Transforms/Inline/prof-update.ll new file mode 100644 index 000000000000..38fcc7e45996 --- /dev/null +++ b/test/Transforms/Inline/prof-update.ll @@ -0,0 +1,39 @@ +; RUN: opt < %s -inline -S | FileCheck %s +; Checks if inliner updates branch_weights annotation for call instructions. + +declare void @ext(); +declare void @ext1(); + +; CHECK: define void @callee(i32 %n) !prof ![[ENTRY_COUNT:[0-9]*]] +define void @callee(i32 %n) !prof !1 { + %cond = icmp sle i32 %n, 10 + br i1 %cond, label %cond_true, label %cond_false +cond_true: +; ext1 is optimized away, thus not updated. +; CHECK: call void @ext1(), !prof ![[COUNT_CALLEE1:[0-9]*]] + call void @ext1(), !prof !2 + ret void +cond_false: +; ext is cloned and updated. +; CHECK: call void @ext(), !prof ![[COUNT_CALLEE:[0-9]*]] + call void @ext(), !prof !2 + ret void +} + +; CHECK: define void @caller() +define void @caller() { +; CHECK: call void @ext(), !prof ![[COUNT_CALLER:[0-9]*]] + call void @callee(i32 15), !prof !3 + ret void +} + +!llvm.module.flags = !{!0} +!0 = !{i32 1, !"MaxFunctionCount", i32 2000} +!1 = !{!"function_entry_count", i64 1000} +!2 = !{!"branch_weights", i64 2000} +!3 = !{!"branch_weights", i64 400} +attributes #0 = { alwaysinline } +; CHECK: ![[ENTRY_COUNT]] = !{!"function_entry_count", i64 600} +; CHECK: ![[COUNT_CALLEE1]] = !{!"branch_weights", i64 2000} +; CHECK: ![[COUNT_CALLEE]] = !{!"branch_weights", i32 1200} +; CHECK: ![[COUNT_CALLER]] = !{!"branch_weights", i32 800} |