aboutsummaryrefslogtreecommitdiff
path: root/test/Analysis
diff options
context:
space:
mode:
authorDimitry Andric <dim@FreeBSD.org>2017-01-02 19:17:04 +0000
committerDimitry Andric <dim@FreeBSD.org>2017-01-02 19:17:04 +0000
commitb915e9e0fc85ba6f398b3fab0db6a81a8913af94 (patch)
tree98b8f811c7aff2547cab8642daf372d6c59502fb /test/Analysis
parent6421cca32f69ac849537a3cff78c352195e99f1b (diff)
Vendor import of llvm trunk r290819:vendor/llvm/llvm-trunk-r290819
Notes
Notes: svn path=/vendor/llvm/dist/; revision=311116 svn path=/vendor/llvm/llvm-trunk-r290819/; revision=311117; tag=vendor/llvm/llvm-trunk-r290819
Diffstat (limited to 'test/Analysis')
-rw-r--r--test/Analysis/AliasSet/intrinsics.ll19
-rw-r--r--test/Analysis/AliasSet/memtransfer.ll114
-rw-r--r--test/Analysis/AliasSet/saturation.ll53
-rw-r--r--test/Analysis/BasicAA/assume.ll4
-rw-r--r--test/Analysis/BasicAA/cs-cs.ll103
-rw-r--r--test/Analysis/BasicAA/full-store-partial-alias.ll2
-rw-r--r--test/Analysis/BasicAA/gep-and-alias.ll43
-rw-r--r--test/Analysis/BasicAA/guards.ll4
-rw-r--r--test/Analysis/BasicAA/invalidation.ll47
-rw-r--r--test/Analysis/BranchProbabilityInfo/basic.ll94
-rw-r--r--test/Analysis/CFLAliasAnalysis/Andersen/basic-interproc.ll22
-rw-r--r--test/Analysis/CFLAliasAnalysis/Andersen/interproc-arg-deref-escape.ll33
-rw-r--r--test/Analysis/CFLAliasAnalysis/Andersen/interproc-arg-escape.ll31
-rw-r--r--test/Analysis/CFLAliasAnalysis/Andersen/interproc-ret-arg.ll26
-rw-r--r--test/Analysis/CFLAliasAnalysis/Andersen/interproc-ret-deref-arg-multilevel.ll52
-rw-r--r--test/Analysis/CFLAliasAnalysis/Andersen/interproc-ret-deref-arg.ll37
-rw-r--r--test/Analysis/CFLAliasAnalysis/Andersen/interproc-ret-escape.ll33
-rw-r--r--test/Analysis/CFLAliasAnalysis/Andersen/interproc-ret-ref-arg-multilevel.ll53
-rw-r--r--test/Analysis/CFLAliasAnalysis/Andersen/interproc-ret-ref-arg.ll37
-rw-r--r--test/Analysis/CFLAliasAnalysis/Andersen/interproc-ret-unknown.ll38
-rw-r--r--test/Analysis/CFLAliasAnalysis/Andersen/interproc-store-arg-multilevel.ll45
-rw-r--r--test/Analysis/CFLAliasAnalysis/Andersen/interproc-store-arg-unknown.ll32
-rw-r--r--test/Analysis/CFLAliasAnalysis/Andersen/interproc-store-arg.ll40
-rw-r--r--test/Analysis/CFLAliasAnalysis/Steensgaard/full-store-partial-alias.ll2
-rw-r--r--test/Analysis/CFLAliasAnalysis/Steensgaard/interproc-ret-arg.ll3
-rw-r--r--test/Analysis/CFLAliasAnalysis/Steensgaard/interproc-store-arg-multilevel.ll4
-rw-r--r--test/Analysis/ConstantFolding/gep.ll30
-rw-r--r--test/Analysis/ConstantFolding/vectorgep-crash.ll40
-rw-r--r--test/Analysis/CostModel/AArch64/gep.ll292
-rw-r--r--test/Analysis/CostModel/AArch64/store.ll18
-rw-r--r--test/Analysis/CostModel/ARM/gep.ll22
-rw-r--r--test/Analysis/CostModel/PowerPC/vsr_load_32_64.ll19
-rw-r--r--test/Analysis/CostModel/X86/arith-fp.ll544
-rw-r--r--test/Analysis/CostModel/X86/arith.ll618
-rw-r--r--test/Analysis/CostModel/X86/ctbits-cost.ll264
-rw-r--r--test/Analysis/CostModel/X86/div.ll400
-rw-r--r--test/Analysis/CostModel/X86/fptosi.ll261
-rw-r--r--test/Analysis/CostModel/X86/fptoui.ll262
-rwxr-xr-xtest/Analysis/CostModel/X86/interleave-load-i32.ll85
-rwxr-xr-xtest/Analysis/CostModel/X86/interleave-store-i32.ll85
-rw-r--r--test/Analysis/CostModel/X86/reduction.ll4
-rw-r--r--test/Analysis/CostModel/X86/rem.ll116
-rw-r--r--test/Analysis/CostModel/X86/scalarize.ll6
-rw-r--r--test/Analysis/CostModel/X86/shuffle-broadcast.ll31
-rw-r--r--test/Analysis/CostModel/X86/shuffle-reverse.ll168
-rw-r--r--test/Analysis/CostModel/X86/shuffle-single-src.ll94
-rw-r--r--test/Analysis/CostModel/X86/shuffle-two-src.ll68
-rw-r--r--test/Analysis/CostModel/X86/sitofp.ll920
-rwxr-xr-xtest/Analysis/CostModel/X86/strided-load-i16.ll113
-rwxr-xr-xtest/Analysis/CostModel/X86/strided-load-i32.ll110
-rwxr-xr-xtest/Analysis/CostModel/X86/strided-load-i64.ll81
-rwxr-xr-xtest/Analysis/CostModel/X86/strided-load-i8.ll117
-rw-r--r--test/Analysis/CostModel/X86/trunc.ll141
-rw-r--r--test/Analysis/CostModel/X86/uitofp.ll951
-rw-r--r--test/Analysis/CostModel/X86/uniformshift.ll39
-rw-r--r--test/Analysis/CostModel/X86/vshift-ashr-cost.ll84
-rw-r--r--test/Analysis/CostModel/X86/vshift-lshr-cost.ll84
-rw-r--r--test/Analysis/CostModel/X86/vshift-shl-cost.ll50
-rw-r--r--test/Analysis/Delinearization/terms_with_identity_factor.ll64
-rw-r--r--test/Analysis/Dominators/2006-10-02-BreakCritEdges.ll1
-rw-r--r--test/Analysis/Dominators/2007-01-14-BreakCritEdges.ll3
-rw-r--r--test/Analysis/GlobalsModRef/dead-uses.ll54
-rw-r--r--test/Analysis/GlobalsModRef/func-memattributes.ll24
-rw-r--r--test/Analysis/GlobalsModRef/global-used-by-global.ll54
-rw-r--r--test/Analysis/MemoryDependenceAnalysis/invalidation.ll76
-rw-r--r--test/Analysis/RegionInfo/infinite_loop_5_a.ll24
-rw-r--r--test/Analysis/RegionInfo/infinite_loop_5_b.ll25
-rw-r--r--test/Analysis/RegionInfo/infinite_loop_5_c.ll22
-rw-r--r--test/Analysis/ScalarEvolution/max-mulops-inline.ll29
-rw-r--r--test/Analysis/ScalarEvolution/no-wrap-unknown-becount.ll66
-rw-r--r--test/Analysis/ScalarEvolution/pr18606.ll67
-rw-r--r--test/Analysis/ScalarEvolution/pr28705.ll41
-rw-r--r--test/Analysis/ScalarEvolution/scev-expander-existing-value-offset.ll44
-rw-r--r--test/Analysis/ScalarEvolution/scev-expander-reuse-gep.ll36
-rw-r--r--test/Analysis/ScalarEvolution/scev-expander-reuse-unroll.ll35
-rw-r--r--test/Analysis/ScalarEvolution/scev-expander-reuse-vect.ll (renamed from test/Analysis/ScalarEvolution/scev-expander-existing-value.ll)2
-rw-r--r--test/Analysis/ScalarEvolution/sext-inreg.ll33
-rw-r--r--test/Analysis/ScalarEvolution/smax-br-phi-idioms.ll28
-rw-r--r--test/Analysis/ScalarEvolution/trip-count-unknown-stride.ll62
-rw-r--r--test/Analysis/ScalarEvolution/trip-count13.ll8
-rw-r--r--test/Analysis/ScalarEvolution/trip-count14.ll177
-rw-r--r--test/Analysis/ScalarEvolution/trip-count5.ll59
-rw-r--r--test/Analysis/TypeBasedAliasAnalysis/aliastest.ll3
-rw-r--r--test/Analysis/TypeBasedAliasAnalysis/cyclic.ll2
-rw-r--r--test/Analysis/TypeBasedAliasAnalysis/dse.ll3
-rw-r--r--test/Analysis/TypeBasedAliasAnalysis/dynamic-indices.ll2
-rw-r--r--test/Analysis/TypeBasedAliasAnalysis/gvn-nonlocal-type-mismatch.ll3
-rw-r--r--test/Analysis/TypeBasedAliasAnalysis/intrinsics.ll2
-rw-r--r--test/Analysis/TypeBasedAliasAnalysis/licm.ll7
-rw-r--r--test/Analysis/TypeBasedAliasAnalysis/memcpyopt.ll2
-rw-r--r--test/Analysis/ValueTracking/dereferenceable-and-aligned.ll21
-rw-r--r--test/Analysis/ValueTracking/get-pointer-base-with-const-off.ll26
-rw-r--r--test/Analysis/ValueTracking/known-nonnull-at.ll57
-rw-r--r--test/Analysis/ValueTracking/known-signbit-shift.ll55
-rw-r--r--test/Analysis/ValueTracking/knownzero-addrspacecast.ll24
-rw-r--r--test/Analysis/ValueTracking/knownzero-shift.ll65
-rw-r--r--test/Analysis/ValueTracking/signbits-extract-elt.ll28
97 files changed, 6520 insertions, 1902 deletions
diff --git a/test/Analysis/AliasSet/intrinsics.ll b/test/Analysis/AliasSet/intrinsics.ll
new file mode 100644
index 000000000000..100b5a101346
--- /dev/null
+++ b/test/Analysis/AliasSet/intrinsics.ll
@@ -0,0 +1,19 @@
+; RUN: opt -basicaa -print-alias-sets -S -o - < %s 2>&1 | FileCheck %s
+
+; CHECK: Alias sets for function 'test1':
+; CHECK: Alias Set Tracker: 2 alias sets for 2 pointer values.
+; CHECK: AliasSet[0x{{[0-9a-f]+}}, 1] must alias, Mod Pointers: (i8* %a, 1)
+; CHECK-NOT: 1 Unknown instruction
+; CHECK: AliasSet[0x{{[0-9a-f]+}}, 1] must alias, Mod Pointers: (i8* %b, 1)
+define void @test1(i32 %c) {
+entry:
+ %a = alloca i8, align 1
+ %b = alloca i8, align 1
+ store i8 1, i8* %a, align 1
+ %cond1 = icmp ne i32 %c, 0
+ call void @llvm.assume(i1 %cond1)
+ store i8 1, i8* %b, align 1
+ ret void
+}
+
+declare void @llvm.assume(i1)
diff --git a/test/Analysis/AliasSet/memtransfer.ll b/test/Analysis/AliasSet/memtransfer.ll
new file mode 100644
index 000000000000..9f1ed63edf22
--- /dev/null
+++ b/test/Analysis/AliasSet/memtransfer.ll
@@ -0,0 +1,114 @@
+; RUN: opt -basicaa -print-alias-sets -S -o - < %s 2>&1 | FileCheck %s
+
+@s = global i8 1, align 1
+@d = global i8 2, align 1
+
+; CHECK: Alias sets for function 'test1':
+; CHECK: Alias Set Tracker: 3 alias sets for 4 pointer values.
+; CHECK: AliasSet[0x{{[0-9a-f]+}}, 1] must alias, Mod Pointers: (i8* %a, 1)
+; CHECK-NOT: 1 Unknown instructions
+; CHECK: AliasSet[0x{{[0-9a-f]+}}, 2] may alias, Mod/Ref Pointers: (i8* %s, 1), (i8* %d, 1)
+; CHECK: AliasSet[0x{{[0-9a-f]+}}, 1] must alias, Mod Pointers: (i8* %b, 1)
+define void @test1(i8* %s, i8* %d) {
+entry:
+ %a = alloca i8, align 1
+ %b = alloca i8, align 1
+ store i8 1, i8* %a, align 1
+ call void @llvm.memcpy.p0i8.p0i8.i64(i8* %d, i8* %s, i64 1, i32 1, i1 false)
+ store i8 1, i8* %b, align 1
+ ret void
+}
+
+; CHECK: Alias sets for function 'test2':
+; CHECK: Alias Set Tracker: 3 alias sets for 4 pointer values.
+; CHECK: AliasSet[0x{{[0-9a-f]+}}, 1] must alias, Mod Pointers: (i8* %a, 1)
+; CHECK-NOT: 1 Unknown instructions
+; CHECK: AliasSet[0x{{[0-9a-f]+}}, 2] may alias, Mod/Ref [volatile] Pointers: (i8* %s, 1), (i8* %d, 1)
+; CHECK: AliasSet[0x{{[0-9a-f]+}}, 1] must alias, Mod Pointers: (i8* %b, 1)
+define void @test2(i8* %s, i8* %d) {
+entry:
+ %a = alloca i8, align 1
+ %b = alloca i8, align 1
+ store i8 1, i8* %a, align 1
+ call void @llvm.memcpy.p0i8.p0i8.i64(i8* %d, i8* %s, i64 1, i32 1, i1 true)
+ store i8 1, i8* %b, align 1
+ ret void
+}
+
+; CHECK: Alias sets for function 'test3':
+; CHECK: Alias Set Tracker: 3 alias sets for 4 pointer values.
+; CHECK: AliasSet[0x{{[0-9a-f]+}}, 1] must alias, Mod Pointers: (i8* %a, 1)
+; CHECK-NOT: 1 Unknown instructions
+; CHECK: AliasSet[0x{{[0-9a-f]+}}, 2] may alias, Mod/Ref Pointers: (i8* %s, 1), (i8* %d, 1)
+; CHECK: AliasSet[0x{{[0-9a-f]+}}, 1] must alias, Mod Pointers: (i8* %b, 1)
+define void @test3(i8* %s, i8* %d) {
+entry:
+ %a = alloca i8, align 1
+ %b = alloca i8, align 1
+ store i8 1, i8* %a, align 1
+ call void @llvm.memmove.p0i8.p0i8.i64(i8* %d, i8* %s, i64 1, i32 1, i1 false)
+ store i8 1, i8* %b, align 1
+ ret void
+}
+
+; CHECK: Alias sets for function 'test4':
+; CHECK: Alias Set Tracker: 3 alias sets for 4 pointer values.
+; CHECK: AliasSet[0x{{[0-9a-f]+}}, 1] must alias, Mod Pointers: (i8* %a, 1)
+; CHECK-NOT: 1 Unknown instructions
+; CHECK: AliasSet[0x{{[0-9a-f]+}}, 2] may alias, Mod/Ref [volatile] Pointers: (i8* %s, 1), (i8* %d, 1)
+; CHECK: AliasSet[0x{{[0-9a-f]+}}, 1] must alias, Mod Pointers: (i8* %b, 1)
+define void @test4(i8* %s, i8* %d) {
+entry:
+ %a = alloca i8, align 1
+ %b = alloca i8, align 1
+ store i8 1, i8* %a, align 1
+ call void @llvm.memmove.p0i8.p0i8.i64(i8* %d, i8* %s, i64 1, i32 1, i1 true)
+ store i8 1, i8* %b, align 1
+ ret void
+}
+
+; CHECK: Alias sets for function 'test5':
+; CHECK: Alias Set Tracker: 2 alias sets for 2 pointer values.
+; CHECK: AliasSet[0x{{[0-9a-f]+}}, 1] must alias, Mod/Ref Pointers: (i8* %a, 1)
+; CHECK: AliasSet[0x{{[0-9a-f]+}}, 1] must alias, Mod Pointers: (i8* %b, 1)
+define void @test5() {
+entry:
+ %a = alloca i8, align 1
+ %b = alloca i8, align 1
+ store i8 1, i8* %a, align 1
+ call void @llvm.memcpy.p0i8.p0i8.i64(i8* %b, i8* %a, i64 1, i32 1, i1 false)
+ store i8 1, i8* %b, align 1
+ ret void
+}
+
+; CHECK: Alias sets for function 'test6':
+; CHECK: Alias Set Tracker: 2 alias sets for 2 pointer values.
+; CHECK: AliasSet[0x{{[0-9a-f]+}}, 1] must alias, Mod/Ref Pointers: (i8* %a, 1)
+; CHECK: AliasSet[0x{{[0-9a-f]+}}, 1] must alias, Mod Pointers: (i8* %b, 1)
+define void @test6() {
+entry:
+ %a = alloca i8, align 1
+ %b = alloca i8, align 1
+ store i8 1, i8* %a, align 1
+ call void @llvm.memmove.p0i8.p0i8.i64(i8* %b, i8* %a, i64 1, i32 1, i1 false)
+ store i8 1, i8* %b, align 1
+ ret void
+}
+
+; CHECK: Alias sets for function 'test7':
+; CHECK: Alias Set Tracker: 2 alias sets for 2 pointer values.
+; CHECK: AliasSet[0x{{[0-9a-f]+}}, 1] must alias, Mod/Ref Pointers: (i8* %a, 1)
+; CHECK: AliasSet[0x{{[0-9a-f]+}}, 1] must alias, Mod/Ref Pointers: (i8* %b, 1)
+define void @test7() {
+entry:
+ %a = alloca i8, align 1
+ %b = alloca i8, align 1
+ store i8 1, i8* %a, align 1
+ call void @llvm.memcpy.p0i8.p0i8.i64(i8* %b, i8* %a, i64 1, i32 1, i1 false)
+ call void @llvm.memcpy.p0i8.p0i8.i64(i8* %a, i8* %b, i64 1, i32 1, i1 false)
+ store i8 1, i8* %b, align 1
+ ret void
+}
+
+declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture writeonly, i8* nocapture readonly, i64, i32, i1)
+declare void @llvm.memmove.p0i8.p0i8.i64(i8* nocapture, i8* nocapture readonly, i64, i32, i1)
diff --git a/test/Analysis/AliasSet/saturation.ll b/test/Analysis/AliasSet/saturation.ll
new file mode 100644
index 000000000000..406ecbca1c21
--- /dev/null
+++ b/test/Analysis/AliasSet/saturation.ll
@@ -0,0 +1,53 @@
+; RUN: opt -basicaa -print-alias-sets -alias-set-saturation-threshold=2 -S -o - < %s 2>&1 | FileCheck %s --check-prefix=CHECK --check-prefix=NOSAT
+; RUN: opt -basicaa -print-alias-sets -alias-set-saturation-threshold=1 -S -o - < %s 2>&1 | FileCheck %s --check-prefix=CHECK --check-prefix=SAT
+
+; CHECK-LABEL: 'allmust'
+; CHECK: AliasSet[{{.*}}, 1] must alias, Mod Pointers: (i32* %a, 4)
+; CHECK: AliasSet[{{.*}}, 1] must alias, Mod Pointers: (i32* %b, 4)
+; CHECK: AliasSet[{{.*}}, 1] must alias, Mod Pointers: (i32* %c, 4)
+; CHECK: AliasSet[{{.*}}, 1] must alias, Mod Pointers: (i32* %d, 4)
+define void @allmust() {
+ %a = alloca i32
+ %b = alloca i32
+ %c = alloca i32
+ %d = alloca i32
+ store i32 1, i32* %a
+ store i32 2, i32* %b
+ store i32 3, i32* %c
+ store i32 4, i32* %d
+ ret void
+}
+
+; CHECK-LABEL :'mergemay'
+; NOSAT: AliasSet[{{.*}}, 2] may alias, Mod Pointers: (i32* %a, 4), (i32* %a1, 4)
+; NOSAT: AliasSet[{{.*}}, 1] must alias, Mod Pointers: (i32* %b, 4)
+; SAT: AliasSet[{{.*}}, 2] may alias, Mod forwarding to 0x[[FWD:[0-9a-f]*]]
+; SAT: AliasSet[{{.*}}, 1] must alias, Mod forwarding to 0x[[FWD]]
+; SAT: AliasSet[0x[[FWD]], 2] may alias, Mod/Ref Pointers: (i32* %a, 4), (i32* %a1, 4), (i32* %b, 4)
+define void @mergemay(i32 %k) {
+ %a = alloca i32
+ %b = alloca i32
+ store i32 1, i32* %a
+ store i32 2, i32* %b
+ %a1 = getelementptr i32, i32 *%a, i32 %k
+ store i32 2, i32* %a1
+ ret void
+}
+
+; CHECK-LABEL: 'mergemust'
+; NOSAT: AliasSet[{{.*}}, 1] must alias, Mod Pointers: (i32* %a, 4)
+; NOSAT: AliasSet[{{.*}}, 1] must alias, Mod Pointers: (i32* %b, 4)
+; NOSAT: AliasSet[{{.*}}, 2] may alias, Mod Pointers: (i32* %c, 4), (i32* %d, 4)
+; SAT: AliasSet[{{.*}}, 1] must alias, Mod forwarding to 0x[[FWD:[0-9a-f]*]]
+; SAT: AliasSet[{{.*}}, 1] must alias, Mod forwarding to 0x[[FWD]]
+; SAT: AliasSet[{{.*}}, 2] may alias, Mod forwarding to 0x[[FWD]]
+; SAT: AliasSet[0x[[FWD]], 3] may alias, Mod/Ref Pointers: (i32* %a, 4), (i32* %b, 4), (i32* %c, 4), (i32* %d, 4)
+define void @mergemust(i32* %c, i32* %d) {
+ %a = alloca i32
+ %b = alloca i32
+ store i32 1, i32* %a
+ store i32 2, i32* %b
+ store i32 3, i32* %c
+ store i32 4, i32* %d
+ ret void
+}
diff --git a/test/Analysis/BasicAA/assume.ll b/test/Analysis/BasicAA/assume.ll
index e163b5a4161c..f9f5353a4528 100644
--- a/test/Analysis/BasicAA/assume.ll
+++ b/test/Analysis/BasicAA/assume.ll
@@ -14,8 +14,8 @@ define void @test1(i8* %P, i8* %Q) nounwind ssp {
; CHECK: MayAlias: i8* %P, i8* %Q
; CHECK: NoModRef: Ptr: i8* %P <-> tail call void @llvm.assume(i1 true)
; CHECK: NoModRef: Ptr: i8* %Q <-> tail call void @llvm.assume(i1 true)
-; CHECK: Both ModRef: Ptr: i8* %P <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i32 1, i1 false)
-; CHECK: Both ModRef: Ptr: i8* %Q <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i32 1, i1 false)
+; CHECK: Just Mod: Ptr: i8* %P <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i32 1, i1 false)
+; CHECK: Just Ref: Ptr: i8* %Q <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i32 1, i1 false)
; CHECK: NoModRef: tail call void @llvm.assume(i1 true) <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i32 1, i1 false)
; CHECK: NoModRef: tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i32 1, i1 false) <-> tail call void @llvm.assume(i1 true)
}
diff --git a/test/Analysis/BasicAA/cs-cs.ll b/test/Analysis/BasicAA/cs-cs.ll
index 3bc4d72eab35..0f74dbd92bbd 100644
--- a/test/Analysis/BasicAA/cs-cs.ll
+++ b/test/Analysis/BasicAA/cs-cs.ll
@@ -23,18 +23,18 @@ entry:
; CHECK-LABEL: Function: test1:
; CHECK: NoAlias: i8* %p, i8* %q
-; CHECK: Just Ref: Ptr: i8* %p <-> %a = call <8 x i16> @llvm.arm.neon.vld1.v8i16.p0i8(i8* %p, i32 16) #5
-; CHECK: NoModRef: Ptr: i8* %q <-> %a = call <8 x i16> @llvm.arm.neon.vld1.v8i16.p0i8(i8* %p, i32 16) #5
+; CHECK: Just Ref: Ptr: i8* %p <-> %a = call <8 x i16> @llvm.arm.neon.vld1.v8i16.p0i8(i8* %p, i32 16)
+; CHECK: NoModRef: Ptr: i8* %q <-> %a = call <8 x i16> @llvm.arm.neon.vld1.v8i16.p0i8(i8* %p, i32 16)
; CHECK: NoModRef: Ptr: i8* %p <-> call void @llvm.arm.neon.vst1.p0i8.v8i16(i8* %q, <8 x i16> %y, i32 16)
; CHECK: Both ModRef: Ptr: i8* %q <-> call void @llvm.arm.neon.vst1.p0i8.v8i16(i8* %q, <8 x i16> %y, i32 16)
-; CHECK: Just Ref: Ptr: i8* %p <-> %b = call <8 x i16> @llvm.arm.neon.vld1.v8i16.p0i8(i8* %p, i32 16) #5
-; CHECK: NoModRef: Ptr: i8* %q <-> %b = call <8 x i16> @llvm.arm.neon.vld1.v8i16.p0i8(i8* %p, i32 16) #5
-; CHECK: NoModRef: %a = call <8 x i16> @llvm.arm.neon.vld1.v8i16.p0i8(i8* %p, i32 16) #5 <-> call void @llvm.arm.neon.vst1.p0i8.v8i16(i8* %q, <8 x i16> %y, i32 16)
-; CHECK: NoModRef: %a = call <8 x i16> @llvm.arm.neon.vld1.v8i16.p0i8(i8* %p, i32 16) #5 <-> %b = call <8 x i16> @llvm.arm.neon.vld1.v8i16.p0i8(i8* %p, i32 16) #5
-; CHECK: NoModRef: call void @llvm.arm.neon.vst1.p0i8.v8i16(i8* %q, <8 x i16> %y, i32 16) <-> %a = call <8 x i16> @llvm.arm.neon.vld1.v8i16.p0i8(i8* %p, i32 16) #5
-; CHECK: NoModRef: call void @llvm.arm.neon.vst1.p0i8.v8i16(i8* %q, <8 x i16> %y, i32 16) <-> %b = call <8 x i16> @llvm.arm.neon.vld1.v8i16.p0i8(i8* %p, i32 16) #5
-; CHECK: NoModRef: %b = call <8 x i16> @llvm.arm.neon.vld1.v8i16.p0i8(i8* %p, i32 16) #5 <-> %a = call <8 x i16> @llvm.arm.neon.vld1.v8i16.p0i8(i8* %p, i32 16) #5
-; CHECK: NoModRef: %b = call <8 x i16> @llvm.arm.neon.vld1.v8i16.p0i8(i8* %p, i32 16) #5 <-> call void @llvm.arm.neon.vst1.p0i8.v8i16(i8* %q, <8 x i16> %y, i32 16)
+; CHECK: Just Ref: Ptr: i8* %p <-> %b = call <8 x i16> @llvm.arm.neon.vld1.v8i16.p0i8(i8* %p, i32 16)
+; CHECK: NoModRef: Ptr: i8* %q <-> %b = call <8 x i16> @llvm.arm.neon.vld1.v8i16.p0i8(i8* %p, i32 16)
+; CHECK: NoModRef: %a = call <8 x i16> @llvm.arm.neon.vld1.v8i16.p0i8(i8* %p, i32 16) #{{[0-9]+}} <-> call void @llvm.arm.neon.vst1.p0i8.v8i16(i8* %q, <8 x i16> %y, i32 16)
+; CHECK: NoModRef: %a = call <8 x i16> @llvm.arm.neon.vld1.v8i16.p0i8(i8* %p, i32 16) #{{[0-9]+}} <-> %b = call <8 x i16> @llvm.arm.neon.vld1.v8i16.p0i8(i8* %p, i32 16)
+; CHECK: NoModRef: call void @llvm.arm.neon.vst1.p0i8.v8i16(i8* %q, <8 x i16> %y, i32 16) <-> %a = call <8 x i16> @llvm.arm.neon.vld1.v8i16.p0i8(i8* %p, i32 16)
+; CHECK: NoModRef: call void @llvm.arm.neon.vst1.p0i8.v8i16(i8* %q, <8 x i16> %y, i32 16) <-> %b = call <8 x i16> @llvm.arm.neon.vld1.v8i16.p0i8(i8* %p, i32 16)
+; CHECK: NoModRef: %b = call <8 x i16> @llvm.arm.neon.vld1.v8i16.p0i8(i8* %p, i32 16) #{{[0-9]+}} <-> %a = call <8 x i16> @llvm.arm.neon.vld1.v8i16.p0i8(i8* %p, i32 16)
+; CHECK: NoModRef: %b = call <8 x i16> @llvm.arm.neon.vld1.v8i16.p0i8(i8* %p, i32 16) #{{[0-9]+}} <-> call void @llvm.arm.neon.vst1.p0i8.v8i16(i8* %q, <8 x i16> %y, i32 16)
}
define void @test2(i8* %P, i8* %Q) nounwind ssp {
@@ -45,12 +45,12 @@ define void @test2(i8* %P, i8* %Q) nounwind ssp {
; CHECK-LABEL: Function: test2:
; CHECK: MayAlias: i8* %P, i8* %Q
-; CHECK: Both ModRef: Ptr: i8* %P <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i32 1, i1 false)
-; CHECK: Both ModRef: Ptr: i8* %Q <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i32 1, i1 false)
-; CHECK: Both ModRef: Ptr: i8* %P <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i32 1, i1 false)
-; CHECK: Both ModRef: Ptr: i8* %Q <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i32 1, i1 false)
-; CHECK: Both ModRef: tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i32 1, i1 false) <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i32 1, i1 false)
-; CHECK: Both ModRef: tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i32 1, i1 false) <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i32 1, i1 false)
+; CHECK: Just Mod: Ptr: i8* %P <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i32 1, i1 false)
+; CHECK: Just Ref: Ptr: i8* %Q <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i32 1, i1 false)
+; CHECK: Just Mod: Ptr: i8* %P <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i32 1, i1 false)
+; CHECK: Just Ref: Ptr: i8* %Q <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i32 1, i1 false)
+; CHECK: Just Mod: tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i32 1, i1 false) <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i32 1, i1 false)
+; CHECK: Just Mod: tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i32 1, i1 false) <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i32 1, i1 false)
}
define void @test2a(i8* noalias %P, i8* noalias %Q) nounwind ssp {
@@ -161,12 +161,12 @@ define void @test3(i8* %P, i8* %Q) nounwind ssp {
; CHECK-LABEL: Function: test3:
; CHECK: MayAlias: i8* %P, i8* %Q
-; CHECK: Both ModRef: Ptr: i8* %P <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 8, i32 1, i1 false)
-; CHECK: Both ModRef: Ptr: i8* %Q <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 8, i32 1, i1 false)
-; CHECK: Both ModRef: Ptr: i8* %P <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i32 1, i1 false)
-; CHECK: Both ModRef: Ptr: i8* %Q <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i32 1, i1 false)
-; CHECK: Both ModRef: tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 8, i32 1, i1 false) <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i32 1, i1 false)
-; CHECK: Both ModRef: tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i32 1, i1 false) <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 8, i32 1, i1 false)
+; CHECK: Just Mod: Ptr: i8* %P <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 8, i32 1, i1 false)
+; CHECK: Just Ref: Ptr: i8* %Q <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 8, i32 1, i1 false)
+; CHECK: Just Mod: Ptr: i8* %P <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i32 1, i1 false)
+; CHECK: Just Ref: Ptr: i8* %Q <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i32 1, i1 false)
+; CHECK: Just Mod: tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 8, i32 1, i1 false) <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i32 1, i1 false)
+; CHECK: Just Mod: tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i32 1, i1 false) <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 8, i32 1, i1 false)
}
define void @test3a(i8* noalias %P, i8* noalias %Q) nounwind ssp {
@@ -211,14 +211,14 @@ define void @test5(i8* %P, i8* %Q, i8* %R) nounwind ssp {
; CHECK: MayAlias: i8* %P, i8* %Q
; CHECK: MayAlias: i8* %P, i8* %R
; CHECK: MayAlias: i8* %Q, i8* %R
-; CHECK: Both ModRef: Ptr: i8* %P <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i32 1, i1 false)
-; CHECK: Both ModRef: Ptr: i8* %Q <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i32 1, i1 false)
+; CHECK: Just Mod: Ptr: i8* %P <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i32 1, i1 false)
+; CHECK: Just Ref: Ptr: i8* %Q <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i32 1, i1 false)
; CHECK: Both ModRef: Ptr: i8* %R <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i32 1, i1 false)
-; CHECK: Both ModRef: Ptr: i8* %P <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %R, i64 12, i32 1, i1 false)
+; CHECK: Just Mod: Ptr: i8* %P <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %R, i64 12, i32 1, i1 false)
; CHECK: Both ModRef: Ptr: i8* %Q <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %R, i64 12, i32 1, i1 false)
-; CHECK: Both ModRef: Ptr: i8* %R <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %R, i64 12, i32 1, i1 false)
-; CHECK: Both ModRef: tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i32 1, i1 false) <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %R, i64 12, i32 1, i1 false)
-; CHECK: Both ModRef: tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %R, i64 12, i32 1, i1 false) <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i32 1, i1 false)
+; CHECK: Just Ref: Ptr: i8* %R <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %R, i64 12, i32 1, i1 false)
+; CHECK: Just Mod: tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i32 1, i1 false) <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %R, i64 12, i32 1, i1 false)
+; CHECK: Just Mod: tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %R, i64 12, i32 1, i1 false) <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i32 1, i1 false)
}
define void @test6(i8* %P) nounwind ssp {
@@ -247,10 +247,45 @@ define void @test7(i8* %P) nounwind ssp {
; CHECK: Just Ref: call void @a_readonly_func(i8* %P) <-> call void @a_writeonly_func(i8* %P)
}
-attributes #0 = { argmemonly nounwind readonly }
-attributes #1 = { argmemonly nounwind }
-attributes #2 = { noinline nounwind readonly }
-attributes #3 = { noinline nounwind writeonly }
-attributes #4 = { nounwind ssp }
-attributes #5 = { nounwind }
+declare void @an_inaccessiblememonly_func() nounwind inaccessiblememonly
+declare void @an_inaccessibleorargmemonly_func(i8 *) nounwind inaccessiblemem_or_argmemonly
+declare void @an_argmemonly_func(i8 *) nounwind argmemonly
+define void @test8(i8* %p) {
+entry:
+ %q = getelementptr i8, i8* %p, i64 16
+ call void @a_readonly_func(i8* %p)
+ call void @an_inaccessiblememonly_func()
+ call void @a_writeonly_func(i8* %q)
+ call void @an_inaccessiblememonly_func()
+ call void @an_inaccessibleorargmemonly_func(i8* %q)
+ call void @an_argmemonly_func(i8* %q)
+ ret void
+
+; CHECK-LABEL: Function: test8
+; CHECK: NoModRef: Ptr: i8* %p <-> call void @an_inaccessiblememonly_func()
+; CHECK: NoModRef: Ptr: i8* %q <-> call void @an_inaccessiblememonly_func()
+; CHECK: NoModRef: Ptr: i8* %p <-> call void @an_inaccessibleorargmemonly_func(i8* %q)
+; CHECK: Both ModRef: Ptr: i8* %q <-> call void @an_inaccessibleorargmemonly_func(i8* %q)
+; CHECK: NoModRef: Ptr: i8* %p <-> call void @an_argmemonly_func(i8* %q)
+; CHECK: Both ModRef: Ptr: i8* %q <-> call void @an_argmemonly_func(i8* %q)
+; CHECK: Just Ref: call void @a_readonly_func(i8* %p) <-> call void @an_inaccessiblememonly_func()
+; CHECK: Just Ref: call void @a_readonly_func(i8* %p) <-> call void @an_inaccessibleorargmemonly_func(i8* %q)
+; CHECK: Just Ref: call void @a_readonly_func(i8* %p) <-> call void @an_argmemonly_func(i8* %q)
+; CHECK: Both ModRef: call void @an_inaccessiblememonly_func() <-> call void @a_readonly_func(i8* %p)
+; CHECK: Both ModRef: call void @an_inaccessiblememonly_func() <-> call void @a_writeonly_func(i8* %q)
+; CHECK: Both ModRef: call void @an_inaccessiblememonly_func() <-> call void @an_inaccessiblememonly_func()
+; CHECK: Both ModRef: call void @an_inaccessiblememonly_func() <-> call void @an_inaccessibleorargmemonly_func(i8* %q)
+; CHECK: NoModRef: call void @an_inaccessiblememonly_func() <-> call void @an_argmemonly_func(i8* %q)
+; CHECK: Just Mod: call void @a_writeonly_func(i8* %q) <-> call void @an_inaccessiblememonly_func()
+; CHECK: Just Mod: call void @a_writeonly_func(i8* %q) <-> call void @an_inaccessibleorargmemonly_func(i8* %q)
+; CHECK: Just Mod: call void @a_writeonly_func(i8* %q) <-> call void @an_argmemonly_func(i8* %q)
+; CHECK: Both ModRef: call void @an_inaccessibleorargmemonly_func(i8* %q) <-> call void @a_readonly_func(i8* %p)
+; CHECK: Both ModRef: call void @an_inaccessibleorargmemonly_func(i8* %q) <-> call void @a_writeonly_func(i8* %q)
+; CHECK: Both ModRef: call void @an_inaccessibleorargmemonly_func(i8* %q) <-> call void @an_inaccessiblememonly_func()
+; CHECK: Both ModRef: call void @an_inaccessibleorargmemonly_func(i8* %q) <-> call void @an_argmemonly_func(i8* %q)
+; CHECK: Both ModRef: call void @an_argmemonly_func(i8* %q) <-> call void @a_readonly_func(i8* %p)
+; CHECK: Both ModRef: call void @an_argmemonly_func(i8* %q) <-> call void @a_writeonly_func(i8* %q)
+; CHECK: NoModRef: call void @an_argmemonly_func(i8* %q) <-> call void @an_inaccessiblememonly_func()
+; CHECK: Both ModRef: call void @an_argmemonly_func(i8* %q) <-> call void @an_inaccessibleorargmemonly_func(i8* %q)
+}
diff --git a/test/Analysis/BasicAA/full-store-partial-alias.ll b/test/Analysis/BasicAA/full-store-partial-alias.ll
index 20f6f7ec4ad0..e1337d6805b7 100644
--- a/test/Analysis/BasicAA/full-store-partial-alias.ll
+++ b/test/Analysis/BasicAA/full-store-partial-alias.ll
@@ -31,7 +31,7 @@ entry:
!0 = !{!4, !4, i64 0}
!1 = !{!"omnipotent char", !2}
-!2 = !{!"Simple C/C++ TBAA", null}
+!2 = !{!"Simple C/C++ TBAA"}
!3 = !{!5, !5, i64 0}
!4 = !{!"double", !1}
!5 = !{!"int", !1}
diff --git a/test/Analysis/BasicAA/gep-and-alias.ll b/test/Analysis/BasicAA/gep-and-alias.ll
new file mode 100644
index 000000000000..4ec64305900d
--- /dev/null
+++ b/test/Analysis/BasicAA/gep-and-alias.ll
@@ -0,0 +1,43 @@
+; RUN: opt -S -basicaa -gvn < %s | FileCheck %s
+
+target datalayout = "e-m:o-p:32:32-f64:32:64-f80:128-n8:16:32-S128"
+target triple = "i386-apple-macosx10.6.0"
+
+; The load and store address in the loop body could alias so the load
+; can't be hoisted above the store and out of the loop.
+
+declare void @llvm.memset.p0i8.i32(i8* nocapture writeonly, i8, i32, i32, i1)
+
+define i32 @foo(i32 %x, i32 %z, i32 %n) {
+entry:
+ %pool = alloca [59 x i32], align 4
+ %tmp = bitcast [59 x i32]* %pool to i8*
+ call void @llvm.memset.p0i8.i32(i8* nonnull %tmp, i8 0, i32 236, i32 4, i1 false)
+ %cmp3 = icmp eq i32 %n, 0
+ br i1 %cmp3, label %for.end, label %for.body.lr.ph
+
+for.body.lr.ph: ; preds = %entry
+ %add = add i32 %z, %x
+ %and = and i32 %add, 2147483647
+ %sub = add nsw i32 %and, -2137521902
+ %arrayidx = getelementptr inbounds [59 x i32], [59 x i32]* %pool, i32 0, i32 %sub
+ %arrayidx1 = getelementptr inbounds [59 x i32], [59 x i32]* %pool, i32 0, i32 42
+ br label %for.body
+
+for.body: ; preds = %for.body.lr.ph, %for.body
+ %i.04 = phi i32 [ 0, %for.body.lr.ph ], [ %inc, %for.body ]
+ store i32 %i.04, i32* %arrayidx, align 4
+ %tmp1 = load i32, i32* %arrayidx1, align 4
+ %inc = add nuw i32 %i.04, 1
+ %exitcond = icmp ne i32 %inc, %n
+ br i1 %exitcond, label %for.body, label %for.end.loopexit
+
+for.end.loopexit: ; preds = %for.body
+ %lcssa = phi i32 [ %tmp1, %for.body ]
+ br label %for.end
+
+for.end: ; preds = %for.end.loopexit, %entry
+ %s = phi i32 [ 0, %entry ], [ %lcssa, %for.end.loopexit ]
+; CHECK: ret i32 %s
+ ret i32 %s
+}
diff --git a/test/Analysis/BasicAA/guards.ll b/test/Analysis/BasicAA/guards.ll
index 66cfb156b7d9..e90328255252 100644
--- a/test/Analysis/BasicAA/guards.ll
+++ b/test/Analysis/BasicAA/guards.ll
@@ -14,8 +14,8 @@ define void @test1(i8* %P, i8* %Q) {
; CHECK: Just Ref: Ptr: i8* %P <-> tail call void (i1, ...) @llvm.experimental.guard(i1 true) [ "deopt"() ]
; CHECK: Just Ref: Ptr: i8* %Q <-> tail call void (i1, ...) @llvm.experimental.guard(i1 true) [ "deopt"() ]
-; CHECK: Both ModRef: Ptr: i8* %P <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i32 1, i1 false)
-; CHECK: Both ModRef: Ptr: i8* %Q <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i32 1, i1 false)
+; CHECK: Just Mod: Ptr: i8* %P <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i32 1, i1 false)
+; CHECK: Just Ref: Ptr: i8* %Q <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i32 1, i1 false)
; CHECK: Just Ref: tail call void (i1, ...) @llvm.experimental.guard(i1 true) [ "deopt"() ] <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i32 1, i1 false)
; CHECK: Just Mod: tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i32 1, i1 false) <-> tail call void (i1, ...) @llvm.experimental.guard(i1 true) [ "deopt"() ]
}
diff --git a/test/Analysis/BasicAA/invalidation.ll b/test/Analysis/BasicAA/invalidation.ll
new file mode 100644
index 000000000000..0eaf7752f89b
--- /dev/null
+++ b/test/Analysis/BasicAA/invalidation.ll
@@ -0,0 +1,47 @@
+; Test that the BasicAA analysis gets invalidated when its dependencies go
+; away.
+;
+; Check DomTree specifically.
+; RUN: opt -disable-output -disable-verify -debug-pass-manager %s 2>&1 \
+; RUN: -passes='require<aa>,invalidate<domtree>,aa-eval' -aa-pipeline='basic-aa' \
+; RUN: | FileCheck %s --check-prefix=CHECK-DT-INVALIDATE
+; CHECK-DT-INVALIDATE: Running pass: RequireAnalysisPass
+; CHECK-DT-INVALIDATE: Running analysis: BasicAA
+; CHECK-DT-INVALIDATE: Running pass: InvalidateAnalysisPass
+; CHECK-DT-INVALIDATE: Invalidating analysis: DominatorTreeAnalysis
+; CHECK-DT-INVALIDATE: Invalidating analysis: BasicAA
+; CHECK-DT-INVALIDATE: Running pass: AAEvaluator
+; CHECK-DT-INVALIDATE: Running analysis: BasicAA
+;
+; Check LoopInfo specifically.
+; RUN: opt -disable-output -disable-verify -debug-pass-manager %s 2>&1 \
+; RUN: -passes='require<loops>,require<aa>,invalidate<loops>,aa-eval' -aa-pipeline='basic-aa' \
+; RUN: | FileCheck %s --check-prefix=CHECK-LI-INVALIDATE
+; CHECK-LI-INVALIDATE: Running pass: RequireAnalysisPass
+; CHECK-LI-INVALIDATE: Running analysis: BasicAA
+; CHECK-LI-INVALIDATE: Running pass: InvalidateAnalysisPass
+; CHECK-LI-INVALIDATE: Invalidating analysis: LoopAnalysis
+; CHECK-LI-INVALIDATE: Invalidating analysis: BasicAA
+; CHECK-LI-INVALIDATE: Running pass: AAEvaluator
+; CHECK-LI-INVALIDATE: Running analysis: BasicAA
+
+; Some code that will result in actual AA queries, including inside of a loop.
+; FIXME: Sadly, none of these queries managed to use either the domtree or
+; loopinfo that basic-aa cache. But nor does any other test in LLVM. It would
+; be good to enhance this to actually use these other analyses to make this
+; a more thorough test.
+define void @foo(i1 %x, i8* %p1, i8* %p2) {
+entry:
+ %p3 = alloca i8
+ store i8 42, i8* %p1
+ %gep2 = getelementptr i8, i8* %p2, i32 0
+ br i1 %x, label %loop, label %exit
+
+loop:
+ store i8 13, i8* %p3
+ %tmp1 = load i8, i8* %gep2
+ br label %loop
+
+exit:
+ ret void
+}
diff --git a/test/Analysis/BranchProbabilityInfo/basic.ll b/test/Analysis/BranchProbabilityInfo/basic.ll
index d86709130f34..67d3e9e850c3 100644
--- a/test/Analysis/BranchProbabilityInfo/basic.ll
+++ b/test/Analysis/BranchProbabilityInfo/basic.ll
@@ -1,4 +1,5 @@
; RUN: opt < %s -analyze -branch-prob | FileCheck %s
+; RUN: opt < %s -analyze -lazy-branch-prob | FileCheck %s
; RUN: opt < %s -passes='print<branch-prob>' -disable-output 2>&1 | FileCheck %s
define i32 @test1(i32 %i, i32* %a) {
@@ -175,6 +176,99 @@ exit:
ret i32 %ret
}
+; CHECK-LABEL: test_invoke_code_callsite1
+define i32 @test_invoke_code_callsite1(i1 %c) personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) {
+entry:
+ br i1 %c, label %if.then, label %if.end
+; Edge "entry->if.end" should have higher probability based on the cold call
+; heuristic which treat %if.then as a cold block because the normal destination
+; of the invoke instruction in %if.then is post-dominated by ColdFunc().
+; CHECK: edge entry -> if.then probability is 0x07878788 / 0x80000000 = 5.88%
+; CHECK: edge entry -> if.end probability is 0x78787878 / 0x80000000 = 94.12% [HOT edge]
+
+if.then:
+ invoke i32 @InvokeCall()
+ to label %invoke.cont unwind label %lpad
+; CHECK: edge if.then -> invoke.cont probability is 0x7ffff800 / 0x80000000 = 100.00% [HOT edge]
+; CHECK: edge if.then -> lpad probability is 0x00000800 / 0x80000000 = 0.00%
+
+invoke.cont:
+ call void @ColdFunc() #0
+ br label %if.end
+
+lpad:
+ %ll = landingpad { i8*, i32 }
+ cleanup
+ br label %if.end
+
+if.end:
+ ret i32 0
+}
+
+; CHECK-LABEL: test_invoke_code_callsite2
+define i32 @test_invoke_code_callsite2(i1 %c) personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) {
+entry:
+ br i1 %c, label %if.then, label %if.end
+
+; CHECK: edge entry -> if.then probability is 0x40000000 / 0x80000000 = 50.00%
+; CHECK: edge entry -> if.end probability is 0x40000000 / 0x80000000 = 50.00%
+
+if.then:
+ invoke i32 @InvokeCall()
+ to label %invoke.cont unwind label %lpad
+; The cold call heuristic should not kick in when the cold callsite is in EH path.
+; CHECK: edge if.then -> invoke.cont probability is 0x7ffff800 / 0x80000000 = 100.00% [HOT edge]
+; CHECK: edge if.then -> lpad probability is 0x00000800 / 0x80000000 = 0.00%
+
+invoke.cont:
+ br label %if.end
+
+lpad:
+ %ll = landingpad { i8*, i32 }
+ cleanup
+ call void @ColdFunc() #0
+ br label %if.end
+
+if.end:
+ ret i32 0
+}
+
+; CHECK-LABEL: test_invoke_code_callsite3
+define i32 @test_invoke_code_callsite3(i1 %c) personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) {
+entry:
+ br i1 %c, label %if.then, label %if.end
+; CHECK: edge entry -> if.then probability is 0x07878788 / 0x80000000 = 5.88%
+; CHECK: edge entry -> if.end probability is 0x78787878 / 0x80000000 = 94.12% [HOT edge]
+
+if.then:
+ invoke i32 @InvokeCall()
+ to label %invoke.cont unwind label %lpad
+; Regardless of cold calls, edge weights from a invoke instruction should be
+; determined by the invoke heuristic.
+; CHECK: edge if.then -> invoke.cont probability is 0x7ffff800 / 0x80000000 = 100.00% [HOT edge]
+; CHECK: edge if.then -> lpad probability is 0x00000800 / 0x80000000 = 0.00%
+
+invoke.cont:
+ call void @ColdFunc() #0
+ br label %if.end
+
+lpad:
+ %ll = landingpad { i8*, i32 }
+ cleanup
+ call void @ColdFunc() #0
+ br label %if.end
+
+if.end:
+ ret i32 0
+}
+
+declare i32 @__gxx_personality_v0(...)
+declare void @ColdFunc()
+declare i32 @InvokeCall()
+
+attributes #0 = { cold }
+
+
define i32 @zero1(i32 %i, i32 %a, i32 %b) {
; CHECK: Printing analysis {{.*}} for function 'zero1'
entry:
diff --git a/test/Analysis/CFLAliasAnalysis/Andersen/basic-interproc.ll b/test/Analysis/CFLAliasAnalysis/Andersen/basic-interproc.ll
new file mode 100644
index 000000000000..9ec52521a046
--- /dev/null
+++ b/test/Analysis/CFLAliasAnalysis/Andersen/basic-interproc.ll
@@ -0,0 +1,22 @@
+; This testcase ensures that CFL AA won't be too conservative when trying to do
+; interprocedural analysis on simple callee
+
+; RUN: opt < %s -disable-basicaa -cfl-anders-aa -aa-eval -print-all-alias-modref-info -disable-output 2>&1 | FileCheck %s
+; RUN: opt < %s -aa-pipeline=cfl-anders-aa -passes=aa-eval -print-all-alias-modref-info -disable-output 2>&1 | FileCheck %s
+
+; CHECK-LABEL: Function: noop_callee
+; CHECK: MayAlias: i32* %arg1, i32* %arg2
+define void @noop_callee(i32* %arg1, i32* %arg2) {
+ store i32 0, i32* %arg1
+ store i32 0, i32* %arg2
+ ret void
+}
+; CHECK-LABEL: Function: test_noop
+; CHECK: NoAlias: i32* %a, i32* %b
+define void @test_noop() {
+ %a = alloca i32, align 4
+ %b = alloca i32, align 4
+ call void @noop_callee(i32* %a, i32* %b)
+
+ ret void
+}
diff --git a/test/Analysis/CFLAliasAnalysis/Andersen/interproc-arg-deref-escape.ll b/test/Analysis/CFLAliasAnalysis/Andersen/interproc-arg-deref-escape.ll
new file mode 100644
index 000000000000..c71f40403f6e
--- /dev/null
+++ b/test/Analysis/CFLAliasAnalysis/Andersen/interproc-arg-deref-escape.ll
@@ -0,0 +1,33 @@
+; This testcase ensures that CFL AA answers queries soundly when callee tries
+; to escape the memory pointed to by its parameters
+
+; RUN: opt < %s -disable-basicaa -cfl-anders-aa -aa-eval -print-all-alias-modref-info -disable-output 2>&1 | FileCheck %s
+; RUN: opt < %s -aa-pipeline=cfl-anders-aa -passes=aa-eval -print-all-alias-modref-info -disable-output 2>&1 | FileCheck %s
+
+declare void @opaque(i32*)
+define void @escape_arg_deref(i32** %arg) {
+ %arg_deref = load i32*, i32** %arg
+ call void @opaque(i32* %arg_deref)
+ ret void
+}
+; CHECK-LABEL: Function: test_arg_deref_escape
+; CHECK: NoAlias: i32* %a, i32** %x
+; CHECK: NoAlias: i32* %b, i32** %x
+; CHECK: NoAlias: i32* %a, i32* %b
+; CHECK: NoAlias: i32** %p, i32** %x
+; CHECK: NoAlias: i32* %a, i32** %p
+; CHECK: NoAlias: i32* %b, i32** %p
+; CHECK: MayAlias: i32* %a, i32* %c
+; CHECK: NoAlias: i32* %b, i32* %c
+; CHECK: NoAlias: i32* %c, i32** %p
+define void @test_arg_deref_escape(i32** %x) {
+ %a = alloca i32, align 4
+ %b = alloca i32, align 4
+ %p = alloca i32*, align 4
+
+ store i32* %a, i32** %p
+ call void @escape_arg_deref(i32** %p)
+ %c = load i32*, i32** %x
+
+ ret void
+} \ No newline at end of file
diff --git a/test/Analysis/CFLAliasAnalysis/Andersen/interproc-arg-escape.ll b/test/Analysis/CFLAliasAnalysis/Andersen/interproc-arg-escape.ll
new file mode 100644
index 000000000000..a87764fa7f71
--- /dev/null
+++ b/test/Analysis/CFLAliasAnalysis/Andersen/interproc-arg-escape.ll
@@ -0,0 +1,31 @@
+; This testcase ensures that CFL AA answers queries soundly when callee tries
+; to escape its parameters
+
+; RUN: opt < %s -disable-basicaa -cfl-anders-aa -aa-eval -print-all-alias-modref-info -disable-output 2>&1 | FileCheck %s
+; RUN: opt < %s -aa-pipeline=cfl-anders-aa -passes=aa-eval -print-all-alias-modref-info -disable-output 2>&1 | FileCheck %s
+
+declare void @opaque(i32*)
+define void @escape_arg(i32* %arg) {
+ call void @opaque(i32* %arg)
+ ret void
+}
+; CHECK-LABEL: Function: test_arg_escape
+; CHECK: NoAlias: i32* %a, i32** %x
+; CHECK: NoAlias: i32* %b, i32** %x
+; CHECK: NoAlias: i32* %a, i32* %b
+; CHECK: NoAlias: i32* %c, i32** %x
+; CHECK: NoAlias: i32* %a, i32* %c
+; CHECK: NoAlias: i32* %b, i32* %c
+; CHECK: MayAlias: i32* %a, i32* %d
+; CHECK: MayAlias: i32* %b, i32* %d
+; CHECK: NoAlias: i32* %c, i32* %d
+define void @test_arg_escape(i32** %x) {
+ %a = alloca i32, align 4
+ %b = alloca i32, align 4
+ %c = alloca i32, align 4
+ call void @escape_arg(i32* %a)
+ call void @escape_arg(i32* %b)
+ %d = load i32*, i32** %x
+
+ ret void
+} \ No newline at end of file
diff --git a/test/Analysis/CFLAliasAnalysis/Andersen/interproc-ret-arg.ll b/test/Analysis/CFLAliasAnalysis/Andersen/interproc-ret-arg.ll
new file mode 100644
index 000000000000..002ff173e7c6
--- /dev/null
+++ b/test/Analysis/CFLAliasAnalysis/Andersen/interproc-ret-arg.ll
@@ -0,0 +1,26 @@
+; This testcase ensures that CFL AA answers queries soundly when callee tries
+; to return one of its parameters
+
+; RUN: opt < %s -disable-basicaa -cfl-anders-aa -aa-eval -print-all-alias-modref-info -disable-output 2>&1 | FileCheck %s
+; RUN: opt < %s -aa-pipeline=cfl-anders-aa -passes=aa-eval -print-all-alias-modref-info -disable-output 2>&1 | FileCheck %s
+
+define i32* @return_arg_callee(i32* %arg1, i32* %arg2) {
+ ret i32* %arg1
+}
+; CHECK-LABEL: Function: test_return_arg
+; CHECK: NoAlias: i32* %a, i32* %b
+; CHECK: MayAlias: i32* %a, i32* %c
+; CHECK: NoAlias: i32* %b, i32* %c
+
+; Temporarily disable modref checks
+; NoModRef: Ptr: i32* %a <-> %c = call i32* @return_arg_callee(i32* %a, i32* %b)
+; NoModRef: Ptr: i32* %b <-> %c = call i32* @return_arg_callee(i32* %a, i32* %b)
+; NoModRef: Ptr: i32* %c <-> %c = call i32* @return_arg_callee(i32* %a, i32* %b)
+define void @test_return_arg() {
+ %a = alloca i32, align 4
+ %b = alloca i32, align 4
+
+ %c = call i32* @return_arg_callee(i32* %a, i32* %b)
+
+ ret void
+} \ No newline at end of file
diff --git a/test/Analysis/CFLAliasAnalysis/Andersen/interproc-ret-deref-arg-multilevel.ll b/test/Analysis/CFLAliasAnalysis/Andersen/interproc-ret-deref-arg-multilevel.ll
new file mode 100644
index 000000000000..764d14449104
--- /dev/null
+++ b/test/Analysis/CFLAliasAnalysis/Andersen/interproc-ret-deref-arg-multilevel.ll
@@ -0,0 +1,52 @@
+; This testcase ensures that CFL AA answers queries soundly when callee tries
+; to return the multi-level dereference of one of its parameters
+
+; RUN: opt < %s -disable-basicaa -cfl-anders-aa -aa-eval -print-all-alias-modref-info -disable-output 2>&1 | FileCheck %s
+; RUN: opt < %s -aa-pipeline=cfl-anders-aa -passes=aa-eval -print-all-alias-modref-info -disable-output 2>&1 | FileCheck %s
+
+define i32* @return_deref_arg_multilevel_callee(i32*** %arg1) {
+ %deref = load i32**, i32*** %arg1
+ %deref2 = load i32*, i32** %deref
+ ret i32* %deref2
+}
+; CHECK-LABEL: Function: test_return_deref_arg_multilevel
+; CHECK: NoAlias: i32* %a, i32* %b
+; CHECK: MayAlias: i32* %a, i32* %c
+; CHECK: NoAlias: i32* %b, i32* %c
+; CHECK: NoAlias: i32* %c, i32** %p
+; CHECK: NoAlias: i32* %c, i32*** %pp
+; CHECK: MayAlias: i32** %lpp, i32** %p
+; CHECK: NoAlias: i32** %lpp, i32*** %pp
+; CHECK: NoAlias: i32* %c, i32** %lpp
+; CHECK: MayAlias: i32* %a, i32* %lpp_deref
+; CHECK: NoAlias: i32* %b, i32* %lpp_deref
+; CHECK: NoAlias: i32* %lpp_deref, i32*** %pp
+; CHECK: MayAlias: i32* %a, i32* %lp
+; CHECK: NoAlias: i32* %b, i32* %lp
+; CHECK: NoAlias: i32* %lp, i32** %p
+; CHECK: NoAlias: i32* %lp, i32*** %pp
+; CHECK: MayAlias: i32* %c, i32* %lp
+; CHECK: NoAlias: i32* %lp, i32** %lpp
+; CHECK: MayAlias: i32* %lp, i32* %lpp_deref
+
+; Temporarily disable modref checks
+; Just Ref: Ptr: i32** %p <-> %c = call i32* @return_deref_arg_multilevel_callee(i32*** %pp)
+; Just Ref: Ptr: i32*** %pp <-> %c = call i32* @return_deref_arg_multilevel_callee(i32*** %pp)
+; Just Ref: Ptr: i32** %lpp <-> %c = call i32* @return_deref_arg_multilevel_callee(i32*** %pp)
+
+define void @test_return_deref_arg_multilevel() {
+ %a = alloca i32, align 4
+ %b = alloca i32, align 4
+ %p = alloca i32*, align 8
+ %pp = alloca i32**, align 8
+
+ store i32* %a, i32** %p
+ store i32** %p, i32*** %pp
+ %c = call i32* @return_deref_arg_multilevel_callee(i32*** %pp)
+
+ %lpp = load i32**, i32*** %pp
+ %lpp_deref = load i32*, i32** %lpp
+ %lp = load i32*, i32** %p
+
+ ret void
+} \ No newline at end of file
diff --git a/test/Analysis/CFLAliasAnalysis/Andersen/interproc-ret-deref-arg.ll b/test/Analysis/CFLAliasAnalysis/Andersen/interproc-ret-deref-arg.ll
new file mode 100644
index 000000000000..76e7c77bf87b
--- /dev/null
+++ b/test/Analysis/CFLAliasAnalysis/Andersen/interproc-ret-deref-arg.ll
@@ -0,0 +1,37 @@
+; This testcase ensures that CFL AA answers queries soundly when callee tries
+; to return the dereference of one of its parameters
+
+; RUN: opt < %s -disable-basicaa -cfl-anders-aa -aa-eval -print-all-alias-modref-info -disable-output 2>&1 | FileCheck %s
+; RUN: opt < %s -aa-pipeline=cfl-anders-aa -passes=aa-eval -print-all-alias-modref-info -disable-output 2>&1 | FileCheck %s
+
+define i32* @return_deref_arg_callee(i32** %arg1) {
+ %deref = load i32*, i32** %arg1
+ ret i32* %deref
+}
+; CHECK-LABEL: Function: test_return_deref_arg
+; CHECK: NoAlias: i32* %a, i32* %b
+; CHECK: MayAlias: i32* %a, i32* %c
+; CHECK: NoAlias: i32* %b, i32* %c
+; CHECK: MayAlias: i32* %a, i32* %lp
+; CHECK: NoAlias: i32* %b, i32* %lp
+; CHECK: NoAlias: i32* %lp, i32** %p
+; CHECK: MayAlias: i32* %c, i32* %lp
+
+; Temporarily disable modref checks
+; NoModRef: Ptr: i32* %a <-> %c = call i32* @return_deref_arg_callee(i32** %p)
+; NoModRef: Ptr: i32* %b <-> %c = call i32* @return_deref_arg_callee(i32** %p)
+; Just Ref: Ptr: i32** %p <-> %c = call i32* @return_deref_arg_callee(i32** %p)
+; NoModRef: Ptr: i32* %c <-> %c = call i32* @return_deref_arg_callee(i32** %p)
+; NoModRef: Ptr: i32* %lp <-> %c = call i32* @return_deref_arg_callee(i32** %p)
+define void @test_return_deref_arg() {
+ %a = alloca i32, align 4
+ %b = alloca i32, align 4
+ %p = alloca i32*, align 8
+
+ store i32* %a, i32** %p
+ %c = call i32* @return_deref_arg_callee(i32** %p)
+
+ %lp = load i32*, i32** %p
+
+ ret void
+} \ No newline at end of file
diff --git a/test/Analysis/CFLAliasAnalysis/Andersen/interproc-ret-escape.ll b/test/Analysis/CFLAliasAnalysis/Andersen/interproc-ret-escape.ll
new file mode 100644
index 000000000000..5eb8f30dbaab
--- /dev/null
+++ b/test/Analysis/CFLAliasAnalysis/Andersen/interproc-ret-escape.ll
@@ -0,0 +1,33 @@
+; This testcase ensures that CFL AA answers queries soundly when callee tries
+; to return an escaped pointer
+
+; RUN: opt < %s -disable-basicaa -cfl-anders-aa -aa-eval -print-all-alias-modref-info -disable-output 2>&1 | FileCheck %s
+; RUN: opt < %s -aa-pipeline=cfl-anders-aa -passes=aa-eval -print-all-alias-modref-info -disable-output 2>&1 | FileCheck %s
+
+declare noalias i8* @malloc(i64)
+declare void @opaque(i32*)
+
+define i32* @return_escaped_callee() {
+ %ptr = call noalias i8* @malloc(i64 8)
+ %ptr_cast = bitcast i8* %ptr to i32*
+ call void @opaque(i32* %ptr_cast)
+ ret i32* %ptr_cast
+}
+; CHECK-LABEL: Function: test_return_escape
+; CHECK: NoAlias: i32* %a, i32** %x
+; CHECK: NoAlias: i32* %b, i32** %x
+; CHECK: NoAlias: i32* %a, i32* %b
+; CHECK: NoAlias: i32* %c, i32** %x
+; CHECK: NoAlias: i32* %a, i32* %c
+; CHECK: NoAlias: i32* %b, i32* %c
+; CHECK: NoAlias: i32* %a, i32* %d
+; CHECK: MayAlias: i32* %b, i32* %d
+; CHECK: MayAlias: i32* %c, i32* %d
+define void @test_return_escape(i32** %x) {
+ %a = alloca i32, align 4
+ %b = call i32* @return_escaped_callee()
+ %c = call i32* @return_escaped_callee()
+ %d = load i32*, i32** %x
+
+ ret void
+} \ No newline at end of file
diff --git a/test/Analysis/CFLAliasAnalysis/Andersen/interproc-ret-ref-arg-multilevel.ll b/test/Analysis/CFLAliasAnalysis/Andersen/interproc-ret-ref-arg-multilevel.ll
new file mode 100644
index 000000000000..4e3222dff526
--- /dev/null
+++ b/test/Analysis/CFLAliasAnalysis/Andersen/interproc-ret-ref-arg-multilevel.ll
@@ -0,0 +1,53 @@
+; This testcase ensures that CFL AA answers queries soundly when callee tries
+; to return the multi-level reference of one of its parameters
+
+; RUN: opt < %s -disable-basicaa -cfl-anders-aa -aa-eval -print-all-alias-modref-info -disable-output 2>&1 | FileCheck %s
+; RUN: opt < %s -aa-pipeline=cfl-anders-aa -passes=aa-eval -print-all-alias-modref-info -disable-output 2>&1 | FileCheck %s
+
+declare noalias i8* @malloc(i64)
+
+define i32*** @return_ref_arg_multilevel_callee(i32* %arg1) {
+ %ptr = call noalias i8* @malloc(i64 8)
+ %ptr_cast = bitcast i8* %ptr to i32***
+ %ptr2 = call noalias i8* @malloc(i64 8)
+ %ptr_cast2 = bitcast i8* %ptr2 to i32**
+ store i32* %arg1, i32** %ptr_cast2
+ store i32** %ptr_cast2, i32*** %ptr_cast
+ ret i32*** %ptr_cast
+}
+; CHECK-LABEL: Function: test_return_ref_arg_multilevel
+; CHECK: NoAlias: i32* %a, i32*** %b
+; CHECK: NoAlias: i32** %p, i32*** %b
+; CHECK: NoAlias: i32*** %b, i32*** %pp
+; CHECK: NoAlias: i32* %a, i32** %lb
+; CHECK: NoAlias: i32** %lb, i32** %p
+; CHECK: NoAlias: i32** %lb, i32*** %pp
+; CHECK: NoAlias: i32** %lb, i32*** %b
+; CHECK: MayAlias: i32* %a, i32* %lb_deref
+; CHECK: NoAlias: i32* %lb_deref, i32** %lpp
+; CHECK: MayAlias: i32* %lb_deref, i32* %lpp_deref
+; CHECK: NoAlias: i32* %lpp_deref, i32** %lpp
+; CHECK: MayAlias: i32* %lb_deref, i32* %lp
+; CHECK: NoAlias: i32* %lp, i32** %lpp
+; CHECK: MayAlias: i32* %lp, i32* %lpp_deref
+
+; Temporarily disable modref checks
+; Just Mod: Ptr: i32*** %b <-> %b = call i32*** @return_ref_arg_multilevel_callee(i32* %a)
+; Just Mod: Ptr: i32** %lb <-> %b = call i32*** @return_ref_arg_multilevel_callee(i32* %a)
+define void @test_return_ref_arg_multilevel() {
+ %a = alloca i32, align 4
+ %p = alloca i32*, align 8
+ %pp = alloca i32**, align 8
+
+ store i32* %a, i32** %p
+ store i32** %p, i32*** %pp
+ %b = call i32*** @return_ref_arg_multilevel_callee(i32* %a)
+
+ %lb = load i32**, i32*** %b
+ %lb_deref = load i32*, i32** %lb
+ %lpp = load i32**, i32*** %pp
+ %lpp_deref = load i32*, i32** %lpp
+ %lp = load i32*, i32** %p
+
+ ret void
+} \ No newline at end of file
diff --git a/test/Analysis/CFLAliasAnalysis/Andersen/interproc-ret-ref-arg.ll b/test/Analysis/CFLAliasAnalysis/Andersen/interproc-ret-ref-arg.ll
new file mode 100644
index 000000000000..623468585162
--- /dev/null
+++ b/test/Analysis/CFLAliasAnalysis/Andersen/interproc-ret-ref-arg.ll
@@ -0,0 +1,37 @@
+; This testcase ensures that CFL AA answers queries soundly when callee tries
+; to return the reference of one of its parameters
+
+; RUN: opt < %s -disable-basicaa -cfl-anders-aa -aa-eval -print-all-alias-modref-info -disable-output 2>&1 | FileCheck %s
+; RUN: opt < %s -aa-pipeline=cfl-anders-aa -passes=aa-eval -print-all-alias-modref-info -disable-output 2>&1 | FileCheck %s
+
+declare noalias i8* @malloc(i64)
+
+define i32** @return_ref_arg_callee(i32* %arg1) {
+ %ptr = call noalias i8* @malloc(i64 8)
+ %ptr_cast = bitcast i8* %ptr to i32**
+ store i32* %arg1, i32** %ptr_cast
+ ret i32** %ptr_cast
+}
+; CHECK-LABEL: Function: test_return_ref_arg
+; CHECK: NoAlias: i32** %b, i32** %p
+; CHECK: MayAlias: i32* %a, i32* %lb
+; CHECK: NoAlias: i32* %lb, i32** %p
+; CHECK: NoAlias: i32* %lb, i32** %b
+; CHECK: NoAlias: i32* %lp, i32** %p
+; CHECK: NoAlias: i32* %lp, i32** %b
+; CHECK: MayAlias: i32* %lb, i32* %lp
+
+; Temporarily disable modref checks
+; Just Mod: Ptr: i32** %b <-> %b = call i32** @return_ref_arg_callee(i32* %a)
+define void @test_return_ref_arg() {
+ %a = alloca i32, align 4
+ %p = alloca i32*, align 8
+
+ store i32* %a, i32** %p
+ %b = call i32** @return_ref_arg_callee(i32* %a)
+
+ %lb = load i32*, i32** %b
+ %lp = load i32*, i32** %p
+
+ ret void
+} \ No newline at end of file
diff --git a/test/Analysis/CFLAliasAnalysis/Andersen/interproc-ret-unknown.ll b/test/Analysis/CFLAliasAnalysis/Andersen/interproc-ret-unknown.ll
new file mode 100644
index 000000000000..4c20269896ab
--- /dev/null
+++ b/test/Analysis/CFLAliasAnalysis/Andersen/interproc-ret-unknown.ll
@@ -0,0 +1,38 @@
+; This testcase ensures that CFL AA answers queries soundly when callee tries
+; to return an unknown pointer
+
+; RUN: opt < %s -disable-basicaa -cfl-anders-aa -aa-eval -print-all-alias-modref-info -disable-output 2>&1 | FileCheck %s
+; RUN: opt < %s -aa-pipeline=cfl-anders-aa -passes=aa-eval -print-all-alias-modref-info -disable-output 2>&1 | FileCheck %s
+
+@g = external global i32
+define i32* @return_unknown_callee(i32* %arg1, i32* %arg2) {
+ ret i32* @g
+}
+; CHECK-LABEL: Function: test_return_unknown
+; CHECK: NoAlias: i32* %a, i32* %b
+; CHECK: MayAlias: i32* %c, i32* %x
+; CHECK: NoAlias: i32* %a, i32* %c
+; CHECK: NoAlias: i32* %b, i32* %c
+define void @test_return_unknown(i32* %x) {
+ %a = alloca i32, align 4
+ %b = alloca i32, align 4
+
+ %c = call i32* @return_unknown_callee(i32* %a, i32* %b)
+
+ ret void
+}
+
+@g2 = external global i32*
+define i32** @return_unknown_callee2() {
+ ret i32** @g2
+}
+; CHECK-LABEL: Function: test_return_unknown2
+; CHECK: MayAlias: i32* %x, i32** %a
+; CHECK: MayAlias: i32* %b, i32* %x
+; CHECK: MayAlias: i32* %b, i32** %a
+define void @test_return_unknown2(i32* %x) {
+ %a = call i32** @return_unknown_callee2()
+ %b = load i32*, i32** %a
+
+ ret void
+} \ No newline at end of file
diff --git a/test/Analysis/CFLAliasAnalysis/Andersen/interproc-store-arg-multilevel.ll b/test/Analysis/CFLAliasAnalysis/Andersen/interproc-store-arg-multilevel.ll
new file mode 100644
index 000000000000..bf19e270b646
--- /dev/null
+++ b/test/Analysis/CFLAliasAnalysis/Andersen/interproc-store-arg-multilevel.ll
@@ -0,0 +1,45 @@
+; This testcase ensures that CFL AA answers queries soundly when callee tries
+; to mutate the memory pointed to by its parameters
+
+; RUN: opt < %s -disable-basicaa -cfl-anders-aa -aa-eval -print-all-alias-modref-info -disable-output 2>&1 | FileCheck %s
+; RUN: opt < %s -aa-pipeline=cfl-anders-aa -passes=aa-eval -print-all-alias-modref-info -disable-output 2>&1 | FileCheck %s
+
+declare noalias i8* @malloc(i64)
+
+define void @store_arg_multilevel_callee(i32*** %arg1, i32* %arg2) {
+ %ptr = call noalias i8* @malloc(i64 8)
+ %ptr_cast = bitcast i8* %ptr to i32**
+ store i32* %arg2, i32** %ptr_cast
+ store i32** %ptr_cast, i32*** %arg1
+ ret void
+}
+; CHECK-LABEL: Function: test_store_arg_multilevel
+; CHECK: NoAlias: i32* %a, i32* %b
+; CHECK: NoAlias: i32* %a, i32** %lpp
+; CHECK: NoAlias: i32* %b, i32** %lpp
+; CHECK: MayAlias: i32** %lpp, i32** %p
+; CHECK: MayAlias: i32* %a, i32* %lpp_deref
+; CHECK: MayAlias: i32* %b, i32* %lpp_deref
+; CHECK: NoAlias: i32* %lpp_deref, i32** %p
+; CHECK: NoAlias: i32* %lpp_deref, i32*** %pp
+; CHECK: NoAlias: i32* %lpp_deref, i32** %lpp
+; CHECK: MayAlias: i32* %a, i32* %lp
+; CHECK: NoAlias: i32* %lp, i32*** %pp
+; CHECK: NoAlias: i32* %lp, i32** %lpp
+; CHECK: MayAlias: i32* %lp, i32* %lpp_deref
+define void @test_store_arg_multilevel() {
+ %a = alloca i32, align 4
+ %b = alloca i32, align 4
+ %p = alloca i32*, align 8
+ %pp = alloca i32**, align 8
+
+ store i32* %a, i32** %p
+ store i32** %p, i32*** %pp
+ call void @store_arg_multilevel_callee(i32*** %pp, i32* %b)
+
+ %lpp = load i32**, i32*** %pp
+ %lpp_deref = load i32*, i32** %lpp
+ %lp = load i32*, i32** %p
+
+ ret void
+} \ No newline at end of file
diff --git a/test/Analysis/CFLAliasAnalysis/Andersen/interproc-store-arg-unknown.ll b/test/Analysis/CFLAliasAnalysis/Andersen/interproc-store-arg-unknown.ll
new file mode 100644
index 000000000000..05717b5ddb30
--- /dev/null
+++ b/test/Analysis/CFLAliasAnalysis/Andersen/interproc-store-arg-unknown.ll
@@ -0,0 +1,32 @@
+; This testcase ensures that CFL AA answers queries soundly when callee tries
+; to mutate the memory pointed to by its parameters
+
+; RUN: opt < %s -disable-basicaa -cfl-anders-aa -aa-eval -print-all-alias-modref-info -disable-output 2>&1 | FileCheck %s
+; RUN: opt < %s -aa-pipeline=cfl-anders-aa -passes=aa-eval -print-all-alias-modref-info -disable-output 2>&1 | FileCheck %s
+
+@g = external global i32
+
+define void @store_arg_unknown_callee(i32** %arg1) {
+ store i32* @g, i32** %arg1
+ ret void
+}
+; CHECK-LABEL: Function: test_store_arg_unknown
+; CHECK: NoAlias: i32* %x, i32** %p
+; CHECK: NoAlias: i32* %a, i32** %p
+; CHECK: NoAlias: i32* %b, i32** %p
+; CHECK: MayAlias: i32* %lp, i32* %x
+; CHECK: MayAlias: i32* %a, i32* %lp
+; CHECK: NoAlias: i32* %b, i32* %lp
+; CHECK: NoAlias: i32* %lp, i32** %p
+define void @test_store_arg_unknown(i32* %x) {
+ %a = alloca i32, align 4
+ %b = alloca i32, align 4
+ %p = alloca i32*, align 8
+
+ store i32* %a, i32** %p
+ call void @store_arg_unknown_callee(i32** %p)
+
+ %lp = load i32*, i32** %p
+
+ ret void
+} \ No newline at end of file
diff --git a/test/Analysis/CFLAliasAnalysis/Andersen/interproc-store-arg.ll b/test/Analysis/CFLAliasAnalysis/Andersen/interproc-store-arg.ll
new file mode 100644
index 000000000000..89c02198f73b
--- /dev/null
+++ b/test/Analysis/CFLAliasAnalysis/Andersen/interproc-store-arg.ll
@@ -0,0 +1,40 @@
+; This testcase ensures that CFL AA answers queries soundly when callee tries
+; to mutate the memory pointed to by its parameters
+
+; RUN: opt < %s -disable-basicaa -cfl-anders-aa -aa-eval -print-all-alias-modref-info -disable-output 2>&1 | FileCheck %s
+; RUN: opt < %s -aa-pipeline=cfl-anders-aa -passes=aa-eval -print-all-alias-modref-info -disable-output 2>&1 | FileCheck %s
+
+define void @store_arg_callee(i32** %arg1, i32* %arg2) {
+ store i32* %arg2, i32** %arg1
+ ret void
+}
+; CHECK-LABEL: Function: test_store_arg
+; CHECK: NoAlias: i32* %a, i32* %b
+; CHECK: NoAlias: i32* %a, i32** %p
+; CHECK: NoAlias: i32* %b, i32** %p
+; CHECK: MayAlias: i32* %a, i32* %lp
+; CHECK: MayAlias: i32* %b, i32* %lp
+; CHECK: NoAlias: i32* %a, i32* %lq
+; CHECK: MayAlias: i32* %b, i32* %lq
+; CHECK: MayAlias: i32* %lp, i32* %lq
+
+; Temporarily disable modref checks
+; NoModRef: Ptr: i32* %a <-> call void @store_arg_callee(i32** %p, i32* %b)
+; Just Ref: Ptr: i32* %b <-> call void @store_arg_callee(i32** %p, i32* %b)
+; Just Mod: Ptr: i32** %p <-> call void @store_arg_callee(i32** %p, i32* %b)
+; NoModRef: Ptr: i32** %q <-> call void @store_arg_callee(i32** %p, i32* %b)
+define void @test_store_arg() {
+ %a = alloca i32, align 4
+ %b = alloca i32, align 4
+ %p = alloca i32*, align 8
+ %q = alloca i32*, align 8
+
+ store i32* %a, i32** %p
+ store i32* %b, i32** %q
+ call void @store_arg_callee(i32** %p, i32* %b)
+
+ %lp = load i32*, i32** %p
+ %lq = load i32*, i32** %q
+
+ ret void
+} \ No newline at end of file
diff --git a/test/Analysis/CFLAliasAnalysis/Steensgaard/full-store-partial-alias.ll b/test/Analysis/CFLAliasAnalysis/Steensgaard/full-store-partial-alias.ll
index 39ea845f2a3a..2f9ac96c0cc6 100644
--- a/test/Analysis/CFLAliasAnalysis/Steensgaard/full-store-partial-alias.ll
+++ b/test/Analysis/CFLAliasAnalysis/Steensgaard/full-store-partial-alias.ll
@@ -33,7 +33,7 @@ entry:
!0 = !{!4, !4, i64 0}
!1 = !{!"omnipotent char", !2}
-!2 = !{!"Simple C/C++ TBAA", null}
+!2 = !{!"Simple C/C++ TBAA"}
!3 = !{!5, !5, i64 0}
!4 = !{!"double", !1}
!5 = !{!"int", !1}
diff --git a/test/Analysis/CFLAliasAnalysis/Steensgaard/interproc-ret-arg.ll b/test/Analysis/CFLAliasAnalysis/Steensgaard/interproc-ret-arg.ll
index e071e7e46c25..e6e2065e7310 100644
--- a/test/Analysis/CFLAliasAnalysis/Steensgaard/interproc-ret-arg.ll
+++ b/test/Analysis/CFLAliasAnalysis/Steensgaard/interproc-ret-arg.ll
@@ -12,7 +12,8 @@ define i32* @return_arg_callee(i32* %arg1, i32* %arg2) {
; CHECK: MayAlias: i32* %a, i32* %c
; CHECK: NoAlias: i32* %b, i32* %c
-; CHECK: NoModRef: Ptr: i32* %b <-> %c = call i32* @return_arg_callee(i32* %a, i32* %b)
+; Temporarily disable modref checks
+; NoModRef: Ptr: i32* %b <-> %c = call i32* @return_arg_callee(i32* %a, i32* %b)
define void @test_return_arg() {
%a = alloca i32, align 4
%b = alloca i32, align 4
diff --git a/test/Analysis/CFLAliasAnalysis/Steensgaard/interproc-store-arg-multilevel.ll b/test/Analysis/CFLAliasAnalysis/Steensgaard/interproc-store-arg-multilevel.ll
index f5b61687ad70..54e39d87127d 100644
--- a/test/Analysis/CFLAliasAnalysis/Steensgaard/interproc-store-arg-multilevel.ll
+++ b/test/Analysis/CFLAliasAnalysis/Steensgaard/interproc-store-arg-multilevel.ll
@@ -29,7 +29,6 @@ define void @store_arg_multilevel_callee(i32*** %arg1, i32* %arg2) {
; We could've proven the following facts if the analysis were inclusion-based:
; NoAlias: i32* %a, i32* %b
-; NoAlias: i32* %b, i32* %lp
define void @test_store_arg_multilevel() {
%a = alloca i32, align 4
%b = alloca i32, align 4
@@ -45,4 +44,5 @@ define void @test_store_arg_multilevel() {
%lp = load i32*, i32** %p
ret void
-} \ No newline at end of file
+}
+
diff --git a/test/Analysis/ConstantFolding/gep.ll b/test/Analysis/ConstantFolding/gep.ll
new file mode 100644
index 000000000000..caa7f15f36ed
--- /dev/null
+++ b/test/Analysis/ConstantFolding/gep.ll
@@ -0,0 +1,30 @@
+; RUN: opt -instcombine -S -o - %s | FileCheck %s
+; Tests that we preserve the inrange attribute on indices where possible.
+
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+%struct.A = type { i32 (...)** }
+
+@vt = external global [3 x i8*]
+
+; CHECK: define i32 (...)* @f0()
+define i32 (...)* @f0() {
+ ; CHECK-NEXT: load i32 (...)*, i32 (...)** bitcast (i8** getelementptr inbounds ([3 x i8*], [3 x i8*]* @vt, inrange i64 0, i64 2) to i32 (...)**)
+ %load = load i32 (...)*, i32 (...)** getelementptr (i32 (...)*, i32 (...)** bitcast (i8** getelementptr inbounds ([3 x i8*], [3 x i8*]* @vt, inrange i64 0, i64 1) to i32 (...)**), i64 1)
+ ret i32 (...)* %load
+}
+
+; CHECK: define i32 (...)* @f1()
+define i32 (...)* @f1() {
+ ; CHECK-NEXT: load i32 (...)*, i32 (...)** bitcast (i8** getelementptr inbounds ([3 x i8*], [3 x i8*]* @vt, i64 0, i64 2) to i32 (...)**)
+ %load = load i32 (...)*, i32 (...)** getelementptr (i32 (...)*, i32 (...)** bitcast (i8** getelementptr inbounds ([3 x i8*], [3 x i8*]* @vt, i64 0, inrange i64 1) to i32 (...)**), i64 1)
+ ret i32 (...)* %load
+}
+
+; CHECK: define i32 (...)* @f2()
+define i32 (...)* @f2() {
+ ; CHECK-NEXT: load i32 (...)*, i32 (...)** bitcast (i8** getelementptr ([3 x i8*], [3 x i8*]* @vt, i64 1, i64 1) to i32 (...)**)
+ %load = load i32 (...)*, i32 (...)** getelementptr (i32 (...)*, i32 (...)** bitcast (i8** getelementptr inbounds ([3 x i8*], [3 x i8*]* @vt, i64 0, inrange i64 1) to i32 (...)**), i64 3)
+ ret i32 (...)* %load
+}
diff --git a/test/Analysis/ConstantFolding/vectorgep-crash.ll b/test/Analysis/ConstantFolding/vectorgep-crash.ll
new file mode 100644
index 000000000000..e7a5117d6ed2
--- /dev/null
+++ b/test/Analysis/ConstantFolding/vectorgep-crash.ll
@@ -0,0 +1,40 @@
+; RUN: opt -instcombine -S -o - %s | FileCheck %s
+; Tests that we don't crash upon encountering a vector GEP
+
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+%Dual = type { %Dual.72, %Partials.73 }
+%Dual.72 = type { double, %Partials }
+%Partials = type { [2 x double] }
+%Partials.73 = type { [2 x %Dual.72] }
+
+; Function Attrs: sspreq
+define <8 x i64*> @"julia_axpy!_65480"(%Dual* %arg1, <8 x i64> %arg2) {
+top:
+; CHECK: %VectorGep14 = getelementptr inbounds %Dual, %Dual* %arg1, <8 x i64> %arg2, i32 1, i32 0, i64 0, i32 1, i32 0, i64 0
+ %VectorGep14 = getelementptr inbounds %Dual, %Dual* %arg1, <8 x i64> %arg2, i32 1, i32 0, i64 0, i32 1, i32 0, i64 0
+ %0 = bitcast <8 x double*> %VectorGep14 to <8 x i64*>
+ ret <8 x i64*> %0
+}
+
+%struct.A = type { i32, %struct.B* }
+%struct.B = type { i64, %struct.C* }
+%struct.C = type { i64 }
+
+@G = internal global [65 x %struct.A] zeroinitializer, align 16
+; CHECK-LABEL: @test
+; CHECK: ret <16 x i32*> getelementptr ([65 x %struct.A], [65 x %struct.A]* @G, <16 x i64> zeroinitializer, <16 x i64> <i64 1, i64 2, i64 3, i64 4, i64 5, i64 6, i64 7, i64 8, i64 9, i64 10, i64 11, i64 12, i64 13, i64 14, i64 15, i64 16>, <16 x i32> zeroinitializer)
+define <16 x i32*> @test() {
+vector.body:
+ %VectorGep = getelementptr [65 x %struct.A], [65 x %struct.A]* @G, <16 x i64> zeroinitializer, <16 x i64> <i64 1, i64 2, i64 3, i64 4, i64 5, i64 6, i64 7, i64 8, i64 9, i64 10, i64 11, i64 12, i64 13, i64 14, i64 15, i64 16>, <16 x i32> zeroinitializer
+ ret <16 x i32*> %VectorGep
+}
+
+; CHECK-LABEL: @test2
+; CHECK: ret <16 x i32*> getelementptr ([65 x %struct.A], [65 x %struct.A]* @G, <16 x i64> zeroinitializer, <16 x i64> <i64 1, i64 2, i64 3, i64 4, i64 5, i64 6, i64 7, i64 8, i64 9,
+define <16 x i32*> @test2() {
+vector.body:
+ %VectorGep = getelementptr [65 x %struct.A], [65 x %struct.A]* @G, <16 x i32> zeroinitializer, <16 x i64> <i64 1, i64 2, i64 3, i64 4, i64 5, i64 6, i64 7, i64 8, i64 9, i64 10, i64 11, i64 12, i64 13, i64 14, i64 15, i64 16>, <16 x i32> zeroinitializer
+ ret <16 x i32*> %VectorGep
+}
diff --git a/test/Analysis/CostModel/AArch64/gep.ll b/test/Analysis/CostModel/AArch64/gep.ll
new file mode 100644
index 000000000000..f3d83c133027
--- /dev/null
+++ b/test/Analysis/CostModel/AArch64/gep.ll
@@ -0,0 +1,292 @@
+; RUN: opt -cost-model -analyze -mtriple=aarch64--linux-gnu -mcpu=kryo < %s | FileCheck %s
+
+target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128"
+target triple = "aarch64--linux-gnu"
+
+define i8 @test1(i8* %p, i32 %i) {
+; CHECK-LABEL: test1
+; CHECK: cost of 0 for instruction: {{.*}} getelementptr inbounds i8, i8*
+ %a = getelementptr inbounds i8, i8* %p, i32 0
+ %v = load i8, i8* %a
+ ret i8 %v
+}
+
+define i16 @test2(i16* %p, i32 %i) {
+; CHECK-LABEL: test2
+; CHECK: cost of 0 for instruction: {{.*}} getelementptr inbounds i16, i16*
+ %a = getelementptr inbounds i16, i16* %p, i32 0
+ %v = load i16, i16* %a
+ ret i16 %v
+}
+
+define i32 @test3(i32* %p, i32 %i) {
+; CHECK-LABEL: test3
+; CHECK: cost of 0 for instruction: {{.*}} getelementptr inbounds i32, i32*
+ %a = getelementptr inbounds i32, i32* %p, i32 0
+ %v = load i32, i32* %a
+ ret i32 %v
+}
+
+define i64 @test4(i64* %p, i32 %i) {
+; CHECK-LABEL: test4
+; CHECK: cost of 0 for instruction: {{.*}} getelementptr inbounds i64, i64*
+ %a = getelementptr inbounds i64, i64* %p, i32 0
+ %v = load i64, i64* %a
+ ret i64 %v
+}
+
+define i8 @test5(i8* %p, i32 %i) {
+; CHECK-LABEL: test5
+; CHECK: cost of 0 for instruction: {{.*}} getelementptr inbounds i8, i8*
+ %a = getelementptr inbounds i8, i8* %p, i32 1024
+ %v = load i8, i8* %a
+ ret i8 %v
+}
+
+define i16 @test6(i16* %p, i32 %i) {
+; CHECK-LABEL: test6
+; CHECK: cost of 0 for instruction: {{.*}} getelementptr inbounds i16, i16*
+ %a = getelementptr inbounds i16, i16* %p, i32 1024
+ %v = load i16, i16* %a
+ ret i16 %v
+}
+
+define i32 @test7(i32* %p, i32 %i) {
+; CHECK-LABEL: test7
+; CHECK: cost of 0 for instruction: {{.*}} getelementptr inbounds i32, i32*
+ %a = getelementptr inbounds i32, i32* %p, i32 1024
+ %v = load i32, i32* %a
+ ret i32 %v
+}
+
+define i64 @test8(i64* %p, i32 %i) {
+; CHECK-LABEL: test8
+; CHECK: cost of 0 for instruction: {{.*}} getelementptr inbounds i64, i64*
+ %a = getelementptr inbounds i64, i64* %p, i32 1024
+ %v = load i64, i64* %a
+ ret i64 %v
+}
+
+define i8 @test9(i8* %p, i32 %i) {
+; CHECK-LABEL: test9
+; CHECK: cost of 1 for instruction: {{.*}} getelementptr inbounds i8, i8*
+ %a = getelementptr inbounds i8, i8* %p, i32 4096
+ %v = load i8, i8* %a
+ ret i8 %v
+}
+
+define i16 @test10(i16* %p, i32 %i) {
+; CHECK-LABEL: test10
+; CHECK: cost of 1 for instruction: {{.*}} getelementptr inbounds i16, i16*
+ %a = getelementptr inbounds i16, i16* %p, i32 4096
+ %v = load i16, i16* %a
+ ret i16 %v
+}
+
+define i32 @test11(i32* %p, i32 %i) {
+; CHECK-LABEL: test11
+; CHECK: cost of 1 for instruction: {{.*}} getelementptr inbounds i32, i32*
+ %a = getelementptr inbounds i32, i32* %p, i32 4096
+ %v = load i32, i32* %a
+ ret i32 %v
+}
+
+define i64 @test12(i64* %p, i32 %i) {
+; CHECK-LABEL: test12
+; CHECK: cost of 1 for instruction: {{.*}} getelementptr inbounds i64, i64*
+ %a = getelementptr inbounds i64, i64* %p, i32 4096
+ %v = load i64, i64* %a
+ ret i64 %v
+}
+
+define i8 @test13(i8* %p, i32 %i) {
+; CHECK-LABEL: test13
+; CHECK: cost of 0 for instruction: {{.*}} getelementptr inbounds i8, i8*
+ %a = getelementptr inbounds i8, i8* %p, i32 -64
+ %v = load i8, i8* %a
+ ret i8 %v
+}
+
+define i16 @test14(i16* %p, i32 %i) {
+; CHECK-LABEL: test14
+; CHECK: cost of 0 for instruction: {{.*}} getelementptr inbounds i16, i16*
+ %a = getelementptr inbounds i16, i16* %p, i32 -64
+ %v = load i16, i16* %a
+ ret i16 %v
+}
+
+define i32 @test15(i32* %p, i32 %i) {
+; CHECK-LABEL: test15
+; CHECK: cost of 0 for instruction: {{.*}} getelementptr inbounds i32, i32*
+ %a = getelementptr inbounds i32, i32* %p, i32 -64
+ %v = load i32, i32* %a
+ ret i32 %v
+}
+
+define i64 @test16(i64* %p, i32 %i) {
+; CHECK-LABEL: test16
+; CHECK: cost of 1 for instruction: {{.*}} getelementptr inbounds i64, i64*
+ %a = getelementptr inbounds i64, i64* %p, i32 -64
+ %v = load i64, i64* %a
+ ret i64 %v
+}
+
+define i8 @test17(i8* %p, i32 %i) {
+; CHECK-LABEL: test17
+; CHECK: cost of 1 for instruction: {{.*}} getelementptr inbounds i8, i8*
+ %a = getelementptr inbounds i8, i8* %p, i32 -1024
+ %v = load i8, i8* %a
+ ret i8 %v
+}
+
+define i16 @test18(i16* %p, i32 %i) {
+; CHECK-LABEL: test18
+; CHECK: cost of 1 for instruction: {{.*}} getelementptr inbounds i16, i16*
+ %a = getelementptr inbounds i16, i16* %p, i32 -1024
+ %v = load i16, i16* %a
+ ret i16 %v
+}
+
+define i32 @test19(i32* %p, i32 %i) {
+; CHECK-LABEL: test19
+; CHECK: cost of 1 for instruction: {{.*}} getelementptr inbounds i32, i32*
+ %a = getelementptr inbounds i32, i32* %p, i32 -1024
+ %v = load i32, i32* %a
+ ret i32 %v
+}
+
+define i64 @test20(i64* %p, i32 %i) {
+; CHECK-LABEL: test20
+; CHECK: cost of 1 for instruction: {{.*}} getelementptr inbounds i64, i64*
+ %a = getelementptr inbounds i64, i64* %p, i32 -1024
+ %v = load i64, i64* %a
+ ret i64 %v
+}
+
+define i8 @test21(i8* %p, i32 %i) {
+; CHECK-LABEL: test21
+; CHECK: cost of 0 for instruction: {{.*}} getelementptr inbounds i8, i8*
+ %a = getelementptr inbounds i8, i8* %p, i32 %i
+ %v = load i8, i8* %a
+ ret i8 %v
+}
+
+define i16 @test22(i16* %p, i32 %i) {
+; CHECK-LABEL: test22
+; CHECK: cost of 0 for instruction: {{.*}} getelementptr inbounds i16, i16*
+ %a = getelementptr inbounds i16, i16* %p, i32 %i
+ %v = load i16, i16* %a
+ ret i16 %v
+}
+
+define i32 @test23(i32* %p, i32 %i) {
+; CHECK-LABEL: test23
+; CHECK: cost of 0 for instruction: {{.*}} getelementptr inbounds i32, i32*
+ %a = getelementptr inbounds i32, i32* %p, i32 %i
+ %v = load i32, i32* %a
+ ret i32 %v
+}
+
+define i64 @test24(i64* %p, i32 %i) {
+; CHECK-LABEL: test24
+; CHECK: cost of 0 for instruction: {{.*}} getelementptr inbounds i64, i64*
+ %a = getelementptr inbounds i64, i64* %p, i32 %i
+ %v = load i64, i64* %a
+ ret i64 %v
+}
+
+define i8 @test25(i8* %p, i32 %i) {
+; CHECK-LABEL: test25
+; CHECK: cost of 0 for instruction: {{.*}} getelementptr inbounds i8, i8*
+ %a = getelementptr inbounds i8, i8* %p, i32 -128
+ %v = load i8, i8* %a
+ ret i8 %v
+}
+
+define i16 @test26(i16* %p, i32 %i) {
+; CHECK-LABEL: test26
+; CHECK: cost of 0 for instruction: {{.*}} getelementptr inbounds i16, i16*
+ %a = getelementptr inbounds i16, i16* %p, i32 -128
+ %v = load i16, i16* %a
+ ret i16 %v
+}
+
+define i32 @test27(i32* %p, i32 %i) {
+; CHECK-LABEL: test27
+; CHECK: cost of 1 for instruction: {{.*}} getelementptr inbounds i32, i32*
+ %a = getelementptr inbounds i32, i32* %p, i32 -128
+ %v = load i32, i32* %a
+ ret i32 %v
+}
+
+define i64 @test28(i64* %p, i32 %i) {
+; CHECK-LABEL: test28
+; CHECK: cost of 1 for instruction: {{.*}} getelementptr inbounds i64, i64*
+ %a = getelementptr inbounds i64, i64* %p, i32 -128
+ %v = load i64, i64* %a
+ ret i64 %v
+}
+
+define i8 @test29(i8* %p, i32 %i) {
+; CHECK-LABEL: test29
+; CHECK: cost of 0 for instruction: {{.*}} getelementptr inbounds i8, i8*
+ %a = getelementptr inbounds i8, i8* %p, i32 -256
+ %v = load i8, i8* %a
+ ret i8 %v
+}
+
+define i16 @test30(i16* %p, i32 %i) {
+; CHECK-LABEL: test30
+; CHECK: cost of 1 for instruction: {{.*}} getelementptr inbounds i16, i16*
+ %a = getelementptr inbounds i16, i16* %p, i32 -256
+ %v = load i16, i16* %a
+ ret i16 %v
+}
+
+define i32 @test31(i32* %p, i32 %i) {
+; CHECK-LABEL: test31
+; CHECK: cost of 1 for instruction: {{.*}} getelementptr inbounds i32, i32*
+ %a = getelementptr inbounds i32, i32* %p, i32 -256
+ %v = load i32, i32* %a
+ ret i32 %v
+}
+
+define i64 @test32(i64* %p, i32 %i) {
+; CHECK-LABEL: test32
+; CHECK: cost of 1 for instruction: {{.*}} getelementptr inbounds i64, i64*
+ %a = getelementptr inbounds i64, i64* %p, i32 -256
+ %v = load i64, i64* %a
+ ret i64 %v
+}
+
+define i8 @test33(i8* %p, i32 %i) {
+; CHECK-LABEL: test33
+; CHECK: cost of 1 for instruction: {{.*}} getelementptr inbounds i8, i8*
+ %a = getelementptr inbounds i8, i8* %p, i32 -512
+ %v = load i8, i8* %a
+ ret i8 %v
+}
+
+define i16 @test34(i16* %p, i32 %i) {
+; CHECK-LABEL: test34
+; CHECK: cost of 1 for instruction: {{.*}} getelementptr inbounds i16, i16*
+ %a = getelementptr inbounds i16, i16* %p, i32 -512
+ %v = load i16, i16* %a
+ ret i16 %v
+}
+
+define i32 @test35(i32* %p, i32 %i) {
+; CHECK-LABEL: test35
+; CHECK: cost of 1 for instruction: {{.*}} getelementptr inbounds i32, i32*
+ %a = getelementptr inbounds i32, i32* %p, i32 -512
+ %v = load i32, i32* %a
+ ret i32 %v
+}
+
+define i64 @test36(i64* %p, i32 %i) {
+; CHECK-LABEL: test36
+; CHECK: cost of 1 for instruction: {{.*}} getelementptr inbounds i64, i64*
+ %a = getelementptr inbounds i64, i64* %p, i32 -512
+ %v = load i64, i64* %a
+ ret i64 %v
+}
diff --git a/test/Analysis/CostModel/AArch64/store.ll b/test/Analysis/CostModel/AArch64/store.ll
index 307f8f8ee974..58750721cb97 100644
--- a/test/Analysis/CostModel/AArch64/store.ll
+++ b/test/Analysis/CostModel/AArch64/store.ll
@@ -1,10 +1,16 @@
-; RUN: opt < %s -cost-model -analyze -mtriple=arm64-apple-ios -mcpu=cyclone | FileCheck %s
+; RUN: opt < %s -cost-model -analyze -mtriple=aarch64-apple-ios | FileCheck %s
+; RUN: opt < %s -cost-model -analyze -mtriple=aarch64-apple-ios -mattr=slow-misaligned-128store | FileCheck %s --check-prefix=SLOW_MISALIGNED_128_STORE
+
target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:32:64-v128:32:128-a0:0:32-n32-S32"
-; CHECK-LABEL: store
-define void @store() {
- ; Stores of <2 x i64> should be expensive because we don't split them and
- ; and unaligned 16b stores have bad performance.
- ; CHECK: cost of 12 {{.*}} store
+; CHECK-LABEL: getMemoryOpCost
+; SLOW_MISALIGNED_128_STORE-LABEL: getMemoryOpCost
+define void @getMemoryOpCost() {
+ ; If FeatureSlowMisaligned128Store is set, we penalize <2 x i64> stores. On
+ ; Cyclone, for example, such stores should be expensive because we don't
+ ; split them and misaligned 16b stores have bad performance.
+ ;
+ ; CHECK: cost of 1 {{.*}} store
+ ; SLOW_MISALIGNED_128_STORE: cost of 12 {{.*}} store
store <2 x i64> undef, <2 x i64> * undef
; We scalarize the loads/stores because there is no vector register name for
diff --git a/test/Analysis/CostModel/ARM/gep.ll b/test/Analysis/CostModel/ARM/gep.ll
index a70d6d42b61b..9d74da4c2d3b 100644
--- a/test/Analysis/CostModel/ARM/gep.ll
+++ b/test/Analysis/CostModel/ARM/gep.ll
@@ -44,17 +44,17 @@ define void @test_geps(i32 %i) {
%b4 = getelementptr inbounds float, float* undef, i32 1024
;CHECK: cost of 1 for instruction: {{.*}} getelementptr inbounds double, double*
%b5 = getelementptr inbounds double, double* undef, i32 1024
-;CHECK: cost of 0 for instruction: {{.*}} getelementptr inbounds <4 x i8>, <4 x i8>*
+;CHECK: cost of 1 for instruction: {{.*}} getelementptr inbounds <4 x i8>, <4 x i8>*
%b7 = getelementptr inbounds <4 x i8>, <4 x i8>* undef, i32 1
-;CHECK: cost of 0 for instruction: {{.*}} getelementptr inbounds <4 x i16>, <4 x i16>*
+;CHECK: cost of 1 for instruction: {{.*}} getelementptr inbounds <4 x i16>, <4 x i16>*
%b8 = getelementptr inbounds <4 x i16>, <4 x i16>* undef, i32 1
-;CHECK: cost of 0 for instruction: {{.*}} getelementptr inbounds <4 x i32>, <4 x i32>*
+;CHECK: cost of 1 for instruction: {{.*}} getelementptr inbounds <4 x i32>, <4 x i32>*
%b9 = getelementptr inbounds <4 x i32>, <4 x i32>* undef, i32 1
-;CHECK: cost of 0 for instruction: {{.*}} getelementptr inbounds <4 x i64>, <4 x i64>*
+;CHECK: cost of 1 for instruction: {{.*}} getelementptr inbounds <4 x i64>, <4 x i64>*
%b10 = getelementptr inbounds <4 x i64>, <4 x i64>* undef, i32 1
-;CHECK: cost of 0 for instruction: {{.*}} getelementptr inbounds <4 x float>, <4 x float>*
+;CHECK: cost of 1 for instruction: {{.*}} getelementptr inbounds <4 x float>, <4 x float>*
%b11 = getelementptr inbounds <4 x float>, <4 x float>* undef, i32 1
-;CHECK: cost of 0 for instruction: {{.*}} getelementptr inbounds <4 x double>, <4 x double>*
+;CHECK: cost of 1 for instruction: {{.*}} getelementptr inbounds <4 x double>, <4 x double>*
%b12 = getelementptr inbounds <4 x double>, <4 x double>* undef, i32 1
;CHECK: cost of 0 for instruction: {{.*}} getelementptr inbounds i8, i8*
@@ -63,15 +63,15 @@ define void @test_geps(i32 %i) {
%c1 = getelementptr inbounds i16, i16* undef, i32 %i
;CHECK: cost of 0 for instruction: {{.*}} getelementptr inbounds i32, i32*
%c2 = getelementptr inbounds i32, i32* undef, i32 %i
-;CHECK: cost of 0 for instruction: {{.*}} getelementptr inbounds i64, i64*
+;CHECK: cost of 1 for instruction: {{.*}} getelementptr inbounds i64, i64*
%c3 = getelementptr inbounds i64, i64* undef, i32 %i
-;CHECK: cost of 0 for instruction: {{.*}} getelementptr inbounds float, float*
+;CHECK: cost of 1 for instruction: {{.*}} getelementptr inbounds float, float*
%c4 = getelementptr inbounds float, float* undef, i32 %i
-;CHECK: cost of 0 for instruction: {{.*}} getelementptr inbounds double, double*
+;CHECK: cost of 1 for instruction: {{.*}} getelementptr inbounds double, double*
%c5 = getelementptr inbounds double, double* undef, i32 %i
-;CHECK: cost of 0 for instruction: {{.*}} getelementptr inbounds <4 x i8>, <4 x i8>*
+;CHECK: cost of 1 for instruction: {{.*}} getelementptr inbounds <4 x i8>, <4 x i8>*
%c7 = getelementptr inbounds <4 x i8>, <4 x i8>* undef, i32 %i
-;CHECK: cost of 0 for instruction: {{.*}} getelementptr inbounds <4 x i16>, <4 x i16>*
+;CHECK: cost of 1 for instruction: {{.*}} getelementptr inbounds <4 x i16>, <4 x i16>*
%c8 = getelementptr inbounds <4 x i16>, <4 x i16>* undef, i32 %i
; Thumb-2 cannot fold scales larger than 8 to address computation.
;CHECK: cost of 1 for instruction: {{.*}} getelementptr inbounds <4 x i32>, <4 x i32>*
diff --git a/test/Analysis/CostModel/PowerPC/vsr_load_32_64.ll b/test/Analysis/CostModel/PowerPC/vsr_load_32_64.ll
new file mode 100644
index 000000000000..4afeabca00ad
--- /dev/null
+++ b/test/Analysis/CostModel/PowerPC/vsr_load_32_64.ll
@@ -0,0 +1,19 @@
+; RUN: opt < %s -cost-model -analyze -mtriple=powerpc64-unknown-linux-gnu -mcpu=pwr8 -mattr=+vsx | FileCheck %s
+target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-v128:128:128-n32:64"
+target triple = "powerpc64-unknown-linux-gnu"
+
+define i32 @loads(i32 %arg) {
+ ; CHECK: cost of 1 {{.*}} load
+ load <4 x i8>, <4 x i8>* undef, align 1
+
+ ; CHECK: cost of 1 {{.*}} load
+ load <8 x i8>, <8 x i8>* undef, align 1
+
+ ; CHECK: cost of 1 {{.*}} load
+ load <2 x i16>, <2 x i16>* undef, align 2
+
+ ; CHECK: cost of 1 {{.*}} load
+ load <4 x i16>, <4 x i16>* undef, align 2
+
+ ret i32 undef
+}
diff --git a/test/Analysis/CostModel/X86/arith-fp.ll b/test/Analysis/CostModel/X86/arith-fp.ll
new file mode 100644
index 000000000000..689442f67a13
--- /dev/null
+++ b/test/Analysis/CostModel/X86/arith-fp.ll
@@ -0,0 +1,544 @@
+; RUN: opt < %s -enable-no-nans-fp-math -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mattr=+sse2 | FileCheck %s --check-prefix=CHECK --check-prefix=SSE2
+; RUN: opt < %s -enable-no-nans-fp-math -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mattr=+sse4.2 | FileCheck %s --check-prefix=CHECK --check-prefix=SSE42
+; RUN: opt < %s -enable-no-nans-fp-math -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mattr=+avx,+fma | FileCheck %s --check-prefix=CHECK --check-prefix=AVX
+; RUN: opt < %s -enable-no-nans-fp-math -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mattr=+avx2,+fma | FileCheck %s --check-prefix=CHECK --check-prefix=AVX2
+; RUN: opt < %s -enable-no-nans-fp-math -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mattr=+avx512f | FileCheck %s --check-prefix=CHECK --check-prefix=AVX512 --check-prefix=AVX512F
+; RUN: opt < %s -enable-no-nans-fp-math -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mattr=+avx512f,+avx512bw | FileCheck %s --check-prefix=CHECK --check-prefix=AVX512 --check-prefix=AVX512BW
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-apple-macosx10.8.0"
+
+; CHECK-LABEL: 'fadd'
+define i32 @fadd(i32 %arg) {
+ ; SSE2: cost of 2 {{.*}} %F32 = fadd
+ ; SSE42: cost of 2 {{.*}} %F32 = fadd
+ ; AVX: cost of 2 {{.*}} %F32 = fadd
+ ; AVX2: cost of 2 {{.*}} %F32 = fadd
+ ; AVX512: cost of 2 {{.*}} %F32 = fadd
+ %F32 = fadd float undef, undef
+ ; SSE2: cost of 2 {{.*}} %V4F32 = fadd
+ ; SSE42: cost of 2 {{.*}} %V4F32 = fadd
+ ; AVX: cost of 2 {{.*}} %V4F32 = fadd
+ ; AVX2: cost of 2 {{.*}} %V4F32 = fadd
+ ; AVX512: cost of 2 {{.*}} %V4F32 = fadd
+ %V4F32 = fadd <4 x float> undef, undef
+ ; SSE2: cost of 4 {{.*}} %V8F32 = fadd
+ ; SSE42: cost of 4 {{.*}} %V8F32 = fadd
+ ; AVX: cost of 2 {{.*}} %V8F32 = fadd
+ ; AVX2: cost of 2 {{.*}} %V8F32 = fadd
+ ; AVX512: cost of 2 {{.*}} %V8F32 = fadd
+ %V8F32 = fadd <8 x float> undef, undef
+ ; SSE2: cost of 8 {{.*}} %V16F32 = fadd
+ ; SSE42: cost of 8 {{.*}} %V16F32 = fadd
+ ; AVX: cost of 4 {{.*}} %V16F32 = fadd
+ ; AVX2: cost of 4 {{.*}} %V16F32 = fadd
+ ; AVX512: cost of 2 {{.*}} %V16F32 = fadd
+ %V16F32 = fadd <16 x float> undef, undef
+
+ ; SSE2: cost of 2 {{.*}} %F64 = fadd
+ ; SSE42: cost of 2 {{.*}} %F64 = fadd
+ ; AVX: cost of 2 {{.*}} %F64 = fadd
+ ; AVX2: cost of 2 {{.*}} %F64 = fadd
+ ; AVX512: cost of 2 {{.*}} %F64 = fadd
+ %F64 = fadd double undef, undef
+ ; SSE2: cost of 2 {{.*}} %V2F64 = fadd
+ ; SSE42: cost of 2 {{.*}} %V2F64 = fadd
+ ; AVX: cost of 2 {{.*}} %V2F64 = fadd
+ ; AVX2: cost of 2 {{.*}} %V2F64 = fadd
+ ; AVX512: cost of 2 {{.*}} %V2F64 = fadd
+ %V2F64 = fadd <2 x double> undef, undef
+ ; SSE2: cost of 4 {{.*}} %V4F64 = fadd
+ ; SSE42: cost of 4 {{.*}} %V4F64 = fadd
+ ; AVX: cost of 2 {{.*}} %V4F64 = fadd
+ ; AVX2: cost of 2 {{.*}} %V4F64 = fadd
+ ; AVX512: cost of 2 {{.*}} %V4F64 = fadd
+ %V4F64 = fadd <4 x double> undef, undef
+ ; SSE2: cost of 8 {{.*}} %V8F64 = fadd
+ ; SSE42: cost of 8 {{.*}} %V8F64 = fadd
+ ; AVX: cost of 4 {{.*}} %V8F64 = fadd
+ ; AVX2: cost of 4 {{.*}} %V8F64 = fadd
+ ; AVX512: cost of 2 {{.*}} %V8F64 = fadd
+ %V8F64 = fadd <8 x double> undef, undef
+
+ ret i32 undef
+}
+
+; CHECK-LABEL: 'fsub'
+define i32 @fsub(i32 %arg) {
+ ; SSE2: cost of 2 {{.*}} %F32 = fsub
+ ; SSE42: cost of 2 {{.*}} %F32 = fsub
+ ; AVX: cost of 2 {{.*}} %F32 = fsub
+ ; AVX2: cost of 2 {{.*}} %F32 = fsub
+ ; AVX512: cost of 2 {{.*}} %F32 = fsub
+ %F32 = fsub float undef, undef
+ ; SSE2: cost of 2 {{.*}} %V4F32 = fsub
+ ; SSE42: cost of 2 {{.*}} %V4F32 = fsub
+ ; AVX: cost of 2 {{.*}} %V4F32 = fsub
+ ; AVX2: cost of 2 {{.*}} %V4F32 = fsub
+ ; AVX512: cost of 2 {{.*}} %V4F32 = fsub
+ %V4F32 = fsub <4 x float> undef, undef
+ ; SSE2: cost of 4 {{.*}} %V8F32 = fsub
+ ; SSE42: cost of 4 {{.*}} %V8F32 = fsub
+ ; AVX: cost of 2 {{.*}} %V8F32 = fsub
+ ; AVX2: cost of 2 {{.*}} %V8F32 = fsub
+ ; AVX512: cost of 2 {{.*}} %V8F32 = fsub
+ %V8F32 = fsub <8 x float> undef, undef
+ ; SSE2: cost of 8 {{.*}} %V16F32 = fsub
+ ; SSE42: cost of 8 {{.*}} %V16F32 = fsub
+ ; AVX: cost of 4 {{.*}} %V16F32 = fsub
+ ; AVX2: cost of 4 {{.*}} %V16F32 = fsub
+ ; AVX512: cost of 2 {{.*}} %V16F32 = fsub
+ %V16F32 = fsub <16 x float> undef, undef
+
+ ; SSE2: cost of 2 {{.*}} %F64 = fsub
+ ; SSE42: cost of 2 {{.*}} %F64 = fsub
+ ; AVX: cost of 2 {{.*}} %F64 = fsub
+ ; AVX2: cost of 2 {{.*}} %F64 = fsub
+ ; AVX512: cost of 2 {{.*}} %F64 = fsub
+ %F64 = fsub double undef, undef
+ ; SSE2: cost of 2 {{.*}} %V2F64 = fsub
+ ; SSE42: cost of 2 {{.*}} %V2F64 = fsub
+ ; AVX: cost of 2 {{.*}} %V2F64 = fsub
+ ; AVX2: cost of 2 {{.*}} %V2F64 = fsub
+ ; AVX512: cost of 2 {{.*}} %V2F64 = fsub
+ %V2F64 = fsub <2 x double> undef, undef
+ ; SSE2: cost of 4 {{.*}} %V4F64 = fsub
+ ; SSE42: cost of 4 {{.*}} %V4F64 = fsub
+ ; AVX: cost of 2 {{.*}} %V4F64 = fsub
+ ; AVX2: cost of 2 {{.*}} %V4F64 = fsub
+ ; AVX512: cost of 2 {{.*}} %V4F64 = fsub
+ %V4F64 = fsub <4 x double> undef, undef
+ ; SSE2: cost of 8 {{.*}} %V8F64 = fsub
+ ; SSE42: cost of 8 {{.*}} %V8F64 = fsub
+ ; AVX: cost of 4 {{.*}} %V8F64 = fsub
+ ; AVX2: cost of 4 {{.*}} %V8F64 = fsub
+ ; AVX512: cost of 2 {{.*}} %V8F64 = fsub
+ %V8F64 = fsub <8 x double> undef, undef
+
+ ret i32 undef
+}
+
+; CHECK-LABEL: 'fmul'
+define i32 @fmul(i32 %arg) {
+ ; SSE2: cost of 2 {{.*}} %F32 = fmul
+ ; SSE42: cost of 2 {{.*}} %F32 = fmul
+ ; AVX: cost of 2 {{.*}} %F32 = fmul
+ ; AVX2: cost of 2 {{.*}} %F32 = fmul
+ ; AVX512: cost of 2 {{.*}} %F32 = fmul
+ %F32 = fmul float undef, undef
+ ; SSE2: cost of 2 {{.*}} %V4F32 = fmul
+ ; SSE42: cost of 2 {{.*}} %V4F32 = fmul
+ ; AVX: cost of 2 {{.*}} %V4F32 = fmul
+ ; AVX2: cost of 2 {{.*}} %V4F32 = fmul
+ ; AVX512: cost of 2 {{.*}} %V4F32 = fmul
+ %V4F32 = fmul <4 x float> undef, undef
+ ; SSE2: cost of 4 {{.*}} %V8F32 = fmul
+ ; SSE42: cost of 4 {{.*}} %V8F32 = fmul
+ ; AVX: cost of 2 {{.*}} %V8F32 = fmul
+ ; AVX2: cost of 2 {{.*}} %V8F32 = fmul
+ ; AVX512: cost of 2 {{.*}} %V8F32 = fmul
+ %V8F32 = fmul <8 x float> undef, undef
+ ; SSE2: cost of 8 {{.*}} %V16F32 = fmul
+ ; SSE42: cost of 8 {{.*}} %V16F32 = fmul
+ ; AVX: cost of 4 {{.*}} %V16F32 = fmul
+ ; AVX2: cost of 4 {{.*}} %V16F32 = fmul
+ ; AVX512: cost of 2 {{.*}} %V16F32 = fmul
+ %V16F32 = fmul <16 x float> undef, undef
+
+ ; SSE2: cost of 2 {{.*}} %F64 = fmul
+ ; SSE42: cost of 2 {{.*}} %F64 = fmul
+ ; AVX: cost of 2 {{.*}} %F64 = fmul
+ ; AVX2: cost of 2 {{.*}} %F64 = fmul
+ ; AVX512: cost of 2 {{.*}} %F64 = fmul
+ %F64 = fmul double undef, undef
+ ; SSE2: cost of 2 {{.*}} %V2F64 = fmul
+ ; SSE42: cost of 2 {{.*}} %V2F64 = fmul
+ ; AVX: cost of 2 {{.*}} %V2F64 = fmul
+ ; AVX2: cost of 2 {{.*}} %V2F64 = fmul
+ ; AVX512: cost of 2 {{.*}} %V2F64 = fmul
+ %V2F64 = fmul <2 x double> undef, undef
+ ; SSE2: cost of 4 {{.*}} %V4F64 = fmul
+ ; SSE42: cost of 4 {{.*}} %V4F64 = fmul
+ ; AVX: cost of 2 {{.*}} %V4F64 = fmul
+ ; AVX2: cost of 2 {{.*}} %V4F64 = fmul
+ ; AVX512: cost of 2 {{.*}} %V4F64 = fmul
+ %V4F64 = fmul <4 x double> undef, undef
+ ; SSE2: cost of 8 {{.*}} %V8F64 = fmul
+ ; SSE42: cost of 8 {{.*}} %V8F64 = fmul
+ ; AVX: cost of 4 {{.*}} %V8F64 = fmul
+ ; AVX2: cost of 4 {{.*}} %V8F64 = fmul
+ ; AVX512: cost of 2 {{.*}} %V8F64 = fmul
+ %V8F64 = fmul <8 x double> undef, undef
+
+ ret i32 undef
+}
+
+; CHECK-LABEL: 'fdiv'
+define i32 @fdiv(i32 %arg) {
+ ; SSE2: cost of 23 {{.*}} %F32 = fdiv
+ ; SSE42: cost of 14 {{.*}} %F32 = fdiv
+ ; AVX: cost of 14 {{.*}} %F32 = fdiv
+ ; AVX2: cost of 7 {{.*}} %F32 = fdiv
+ ; AVX512: cost of 7 {{.*}} %F32 = fdiv
+ %F32 = fdiv float undef, undef
+ ; SSE2: cost of 39 {{.*}} %V4F32 = fdiv
+ ; SSE42: cost of 14 {{.*}} %V4F32 = fdiv
+ ; AVX: cost of 14 {{.*}} %V4F32 = fdiv
+ ; AVX2: cost of 7 {{.*}} %V4F32 = fdiv
+ ; AVX512: cost of 7 {{.*}} %V4F32 = fdiv
+ %V4F32 = fdiv <4 x float> undef, undef
+ ; SSE2: cost of 78 {{.*}} %V8F32 = fdiv
+ ; SSE42: cost of 28 {{.*}} %V8F32 = fdiv
+ ; AVX: cost of 28 {{.*}} %V8F32 = fdiv
+ ; AVX2: cost of 14 {{.*}} %V8F32 = fdiv
+ ; AVX512: cost of 14 {{.*}} %V8F32 = fdiv
+ %V8F32 = fdiv <8 x float> undef, undef
+ ; SSE2: cost of 156 {{.*}} %V16F32 = fdiv
+ ; SSE42: cost of 56 {{.*}} %V16F32 = fdiv
+ ; AVX: cost of 56 {{.*}} %V16F32 = fdiv
+ ; AVX2: cost of 28 {{.*}} %V16F32 = fdiv
+ ; AVX512: cost of 2 {{.*}} %V16F32 = fdiv
+ %V16F32 = fdiv <16 x float> undef, undef
+
+ ; SSE2: cost of 38 {{.*}} %F64 = fdiv
+ ; SSE42: cost of 22 {{.*}} %F64 = fdiv
+ ; AVX: cost of 22 {{.*}} %F64 = fdiv
+ ; AVX2: cost of 14 {{.*}} %F64 = fdiv
+ ; AVX512: cost of 14 {{.*}} %F64 = fdiv
+ %F64 = fdiv double undef, undef
+ ; SSE2: cost of 69 {{.*}} %V2F64 = fdiv
+ ; SSE42: cost of 22 {{.*}} %V2F64 = fdiv
+ ; AVX: cost of 22 {{.*}} %V2F64 = fdiv
+ ; AVX2: cost of 14 {{.*}} %V2F64 = fdiv
+ ; AVX512: cost of 14 {{.*}} %V2F64 = fdiv
+ %V2F64 = fdiv <2 x double> undef, undef
+ ; SSE2: cost of 138 {{.*}} %V4F64 = fdiv
+ ; SSE42: cost of 44 {{.*}} %V4F64 = fdiv
+ ; AVX: cost of 44 {{.*}} %V4F64 = fdiv
+ ; AVX2: cost of 28 {{.*}} %V4F64 = fdiv
+ ; AVX512: cost of 28 {{.*}} %V4F64 = fdiv
+ %V4F64 = fdiv <4 x double> undef, undef
+ ; SSE2: cost of 276 {{.*}} %V8F64 = fdiv
+ ; SSE42: cost of 88 {{.*}} %V8F64 = fdiv
+ ; AVX: cost of 88 {{.*}} %V8F64 = fdiv
+ ; AVX2: cost of 56 {{.*}} %V8F64 = fdiv
+ ; AVX512: cost of 2 {{.*}} %V8F64 = fdiv
+ %V8F64 = fdiv <8 x double> undef, undef
+
+ ret i32 undef
+}
+
+; CHECK-LABEL: 'frem'
+define i32 @frem(i32 %arg) {
+ ; SSE2: cost of 2 {{.*}} %F32 = frem
+ ; SSE42: cost of 2 {{.*}} %F32 = frem
+ ; AVX: cost of 2 {{.*}} %F32 = frem
+ ; AVX2: cost of 2 {{.*}} %F32 = frem
+ ; AVX512: cost of 2 {{.*}} %F32 = frem
+ %F32 = frem float undef, undef
+ ; SSE2: cost of 14 {{.*}} %V4F32 = frem
+ ; SSE42: cost of 14 {{.*}} %V4F32 = frem
+ ; AVX: cost of 14 {{.*}} %V4F32 = frem
+ ; AVX2: cost of 14 {{.*}} %V4F32 = frem
+ ; AVX512: cost of 14 {{.*}} %V4F32 = frem
+ %V4F32 = frem <4 x float> undef, undef
+ ; SSE2: cost of 28 {{.*}} %V8F32 = frem
+ ; SSE42: cost of 28 {{.*}} %V8F32 = frem
+ ; AVX: cost of 30 {{.*}} %V8F32 = frem
+ ; AVX2: cost of 30 {{.*}} %V8F32 = frem
+ ; AVX512: cost of 30 {{.*}} %V8F32 = frem
+ %V8F32 = frem <8 x float> undef, undef
+ ; SSE2: cost of 56 {{.*}} %V16F32 = frem
+ ; SSE42: cost of 56 {{.*}} %V16F32 = frem
+ ; AVX: cost of 60 {{.*}} %V16F32 = frem
+ ; AVX2: cost of 60 {{.*}} %V16F32 = frem
+ ; AVX512: cost of 62 {{.*}} %V16F32 = frem
+ %V16F32 = frem <16 x float> undef, undef
+
+ ; SSE2: cost of 2 {{.*}} %F64 = frem
+ ; SSE42: cost of 2 {{.*}} %F64 = frem
+ ; AVX: cost of 2 {{.*}} %F64 = frem
+ ; AVX2: cost of 2 {{.*}} %F64 = frem
+ ; AVX512: cost of 2 {{.*}} %F64 = frem
+ %F64 = frem double undef, undef
+ ; SSE2: cost of 6 {{.*}} %V2F64 = frem
+ ; SSE42: cost of 6 {{.*}} %V2F64 = frem
+ ; AVX: cost of 6 {{.*}} %V2F64 = frem
+ ; AVX2: cost of 6 {{.*}} %V2F64 = frem
+ ; AVX512: cost of 6 {{.*}} %V2F64 = frem
+ %V2F64 = frem <2 x double> undef, undef
+ ; SSE2: cost of 12 {{.*}} %V4F64 = frem
+ ; SSE42: cost of 12 {{.*}} %V4F64 = frem
+ ; AVX: cost of 14 {{.*}} %V4F64 = frem
+ ; AVX2: cost of 14 {{.*}} %V4F64 = frem
+ ; AVX512: cost of 14 {{.*}} %V4F64 = frem
+ %V4F64 = frem <4 x double> undef, undef
+ ; SSE2: cost of 24 {{.*}} %V8F64 = frem
+ ; SSE42: cost of 24 {{.*}} %V8F64 = frem
+ ; AVX: cost of 28 {{.*}} %V8F64 = frem
+ ; AVX2: cost of 28 {{.*}} %V8F64 = frem
+ ; AVX512: cost of 30 {{.*}} %V8F64 = frem
+ %V8F64 = frem <8 x double> undef, undef
+
+ ret i32 undef
+}
+
+; CHECK-LABEL: 'fsqrt'
+define i32 @fsqrt(i32 %arg) {
+ ; SSE2: cost of 28 {{.*}} %F32 = call float @llvm.sqrt.f32
+ ; SSE42: cost of 18 {{.*}} %F32 = call float @llvm.sqrt.f32
+ ; AVX: cost of 14 {{.*}} %F32 = call float @llvm.sqrt.f32
+ ; AVX2: cost of 7 {{.*}} %F32 = call float @llvm.sqrt.f32
+ ; AVX512: cost of 7 {{.*}} %F32 = call float @llvm.sqrt.f32
+ %F32 = call float @llvm.sqrt.f32(float undef)
+ ; SSE2: cost of 56 {{.*}} %V4F32 = call <4 x float> @llvm.sqrt.v4f32
+ ; SSE42: cost of 18 {{.*}} %V4F32 = call <4 x float> @llvm.sqrt.v4f32
+ ; AVX: cost of 14 {{.*}} %V4F32 = call <4 x float> @llvm.sqrt.v4f32
+ ; AVX2: cost of 7 {{.*}} %V4F32 = call <4 x float> @llvm.sqrt.v4f32
+ ; AVX512: cost of 7 {{.*}} %V4F32 = call <4 x float> @llvm.sqrt.v4f32
+ %V4F32 = call <4 x float> @llvm.sqrt.v4f32(<4 x float> undef)
+ ; SSE2: cost of 112 {{.*}} %V8F32 = call <8 x float> @llvm.sqrt.v8f32
+ ; SSE42: cost of 36 {{.*}} %V8F32 = call <8 x float> @llvm.sqrt.v8f32
+ ; AVX: cost of 28 {{.*}} %V8F32 = call <8 x float> @llvm.sqrt.v8f32
+ ; AVX2: cost of 14 {{.*}} %V8F32 = call <8 x float> @llvm.sqrt.v8f32
+ ; AVX512: cost of 14 {{.*}} %V8F32 = call <8 x float> @llvm.sqrt.v8f32
+ %V8F32 = call <8 x float> @llvm.sqrt.v8f32(<8 x float> undef)
+ ; SSE2: cost of 224 {{.*}} %V16F32 = call <16 x float> @llvm.sqrt.v16f32
+ ; SSE42: cost of 72 {{.*}} %V16F32 = call <16 x float> @llvm.sqrt.v16f32
+ ; AVX: cost of 56 {{.*}} %V16F32 = call <16 x float> @llvm.sqrt.v16f32
+ ; AVX2: cost of 28 {{.*}} %V16F32 = call <16 x float> @llvm.sqrt.v16f32
+ ; AVX512: cost of 1 {{.*}} %V16F32 = call <16 x float> @llvm.sqrt.v16f32
+ %V16F32 = call <16 x float> @llvm.sqrt.v16f32(<16 x float> undef)
+
+ ; SSE2: cost of 32 {{.*}} %F64 = call double @llvm.sqrt.f64
+ ; SSE42: cost of 32 {{.*}} %F64 = call double @llvm.sqrt.f64
+ ; AVX: cost of 21 {{.*}} %F64 = call double @llvm.sqrt.f64
+ ; AVX2: cost of 14 {{.*}} %F64 = call double @llvm.sqrt.f64
+ ; AVX512: cost of 14 {{.*}} %F64 = call double @llvm.sqrt.f64
+ %F64 = call double @llvm.sqrt.f64(double undef)
+ ; SSE2: cost of 32 {{.*}} %V2F64 = call <2 x double> @llvm.sqrt.v2f64
+ ; SSE42: cost of 32 {{.*}} %V2F64 = call <2 x double> @llvm.sqrt.v2f64
+ ; AVX: cost of 21 {{.*}} %V2F64 = call <2 x double> @llvm.sqrt.v2f64
+ ; AVX2: cost of 14 {{.*}} %V2F64 = call <2 x double> @llvm.sqrt.v2f64
+ ; AVX512: cost of 14 {{.*}} %V2F64 = call <2 x double> @llvm.sqrt.v2f64
+ %V2F64 = call <2 x double> @llvm.sqrt.v2f64(<2 x double> undef)
+ ; SSE2: cost of 64 {{.*}} %V4F64 = call <4 x double> @llvm.sqrt.v4f64
+ ; SSE42: cost of 64 {{.*}} %V4F64 = call <4 x double> @llvm.sqrt.v4f64
+ ; AVX: cost of 43 {{.*}} %V4F64 = call <4 x double> @llvm.sqrt.v4f64
+ ; AVX2: cost of 28 {{.*}} %V4F64 = call <4 x double> @llvm.sqrt.v4f64
+ ; AVX512: cost of 28 {{.*}} %V4F64 = call <4 x double> @llvm.sqrt.v4f64
+ %V4F64 = call <4 x double> @llvm.sqrt.v4f64(<4 x double> undef)
+ ; SSE2: cost of 128 {{.*}} %V8F64 = call <8 x double> @llvm.sqrt.v8f64
+ ; SSE42: cost of 128 {{.*}} %V8F64 = call <8 x double> @llvm.sqrt.v8f64
+ ; AVX: cost of 86 {{.*}} %V8F64 = call <8 x double> @llvm.sqrt.v8f64
+ ; AVX2: cost of 56 {{.*}} %V8F64 = call <8 x double> @llvm.sqrt.v8f64
+ ; AVX512: cost of 1 {{.*}} %V8F64 = call <8 x double> @llvm.sqrt.v8f64
+ %V8F64 = call <8 x double> @llvm.sqrt.v8f64(<8 x double> undef)
+
+ ret i32 undef
+}
+
+; CHECK-LABEL: 'fabs'
+define i32 @fabs(i32 %arg) {
+ ; SSE2: cost of 2 {{.*}} %F32 = call float @llvm.fabs.f32
+ ; SSE42: cost of 2 {{.*}} %F32 = call float @llvm.fabs.f32
+ ; AVX: cost of 2 {{.*}} %F32 = call float @llvm.fabs.f32
+ ; AVX2: cost of 2 {{.*}} %F32 = call float @llvm.fabs.f32
+ ; AVX512: cost of 2 {{.*}} %F32 = call float @llvm.fabs.f32
+ %F32 = call float @llvm.fabs.f32(float undef)
+ ; SSE2: cost of 2 {{.*}} %V4F32 = call <4 x float> @llvm.fabs.v4f32
+ ; SSE42: cost of 2 {{.*}} %V4F32 = call <4 x float> @llvm.fabs.v4f32
+ ; AVX: cost of 2 {{.*}} %V4F32 = call <4 x float> @llvm.fabs.v4f32
+ ; AVX2: cost of 2 {{.*}} %V4F32 = call <4 x float> @llvm.fabs.v4f32
+ ; AVX512: cost of 2 {{.*}} %V4F32 = call <4 x float> @llvm.fabs.v4f32
+ %V4F32 = call <4 x float> @llvm.fabs.v4f32(<4 x float> undef)
+ ; SSE2: cost of 4 {{.*}} %V8F32 = call <8 x float> @llvm.fabs.v8f32
+ ; SSE42: cost of 4 {{.*}} %V8F32 = call <8 x float> @llvm.fabs.v8f32
+ ; AVX: cost of 2 {{.*}} %V8F32 = call <8 x float> @llvm.fabs.v8f32
+ ; AVX2: cost of 2 {{.*}} %V8F32 = call <8 x float> @llvm.fabs.v8f32
+ ; AVX512: cost of 2 {{.*}} %V8F32 = call <8 x float> @llvm.fabs.v8f32
+ %V8F32 = call <8 x float> @llvm.fabs.v8f32(<8 x float> undef)
+ ; SSE2: cost of 8 {{.*}} %V16F32 = call <16 x float> @llvm.fabs.v16f32
+ ; SSE42: cost of 8 {{.*}} %V16F32 = call <16 x float> @llvm.fabs.v16f32
+ ; AVX: cost of 4 {{.*}} %V16F32 = call <16 x float> @llvm.fabs.v16f32
+ ; AVX2: cost of 4 {{.*}} %V16F32 = call <16 x float> @llvm.fabs.v16f32
+ ; AVX512: cost of 2 {{.*}} %V16F32 = call <16 x float> @llvm.fabs.v16f32
+ %V16F32 = call <16 x float> @llvm.fabs.v16f32(<16 x float> undef)
+
+ ; SSE2: cost of 2 {{.*}} %F64 = call double @llvm.fabs.f64
+ ; SSE42: cost of 2 {{.*}} %F64 = call double @llvm.fabs.f64
+ ; AVX: cost of 2 {{.*}} %F64 = call double @llvm.fabs.f64
+ ; AVX2: cost of 2 {{.*}} %F64 = call double @llvm.fabs.f64
+ ; AVX512: cost of 2 {{.*}} %F64 = call double @llvm.fabs.f64
+ %F64 = call double @llvm.fabs.f64(double undef)
+ ; SSE2: cost of 2 {{.*}} %V2F64 = call <2 x double> @llvm.fabs.v2f64
+ ; SSE42: cost of 2 {{.*}} %V2F64 = call <2 x double> @llvm.fabs.v2f64
+ ; AVX: cost of 2 {{.*}} %V2F64 = call <2 x double> @llvm.fabs.v2f64
+ ; AVX2: cost of 2 {{.*}} %V2F64 = call <2 x double> @llvm.fabs.v2f64
+ ; AVX512: cost of 2 {{.*}} %V2F64 = call <2 x double> @llvm.fabs.v2f64
+ %V2F64 = call <2 x double> @llvm.fabs.v2f64(<2 x double> undef)
+ ; SSE2: cost of 4 {{.*}} %V4F64 = call <4 x double> @llvm.fabs.v4f64
+ ; SSE42: cost of 4 {{.*}} %V4F64 = call <4 x double> @llvm.fabs.v4f64
+ ; AVX: cost of 2 {{.*}} %V4F64 = call <4 x double> @llvm.fabs.v4f64
+ ; AVX2: cost of 2 {{.*}} %V4F64 = call <4 x double> @llvm.fabs.v4f64
+ ; AVX512: cost of 2 {{.*}} %V4F64 = call <4 x double> @llvm.fabs.v4f64
+ %V4F64 = call <4 x double> @llvm.fabs.v4f64(<4 x double> undef)
+ ; SSE2: cost of 8 {{.*}} %V8F64 = call <8 x double> @llvm.fabs.v8f64
+ ; SSE42: cost of 8 {{.*}} %V8F64 = call <8 x double> @llvm.fabs.v8f64
+ ; AVX: cost of 4 {{.*}} %V8F64 = call <8 x double> @llvm.fabs.v8f64
+ ; AVX2: cost of 4 {{.*}} %V8F64 = call <8 x double> @llvm.fabs.v8f64
+ ; AVX512: cost of 2 {{.*}} %V8F64 = call <8 x double> @llvm.fabs.v8f64
+ %V8F64 = call <8 x double> @llvm.fabs.v8f64(<8 x double> undef)
+
+ ret i32 undef
+}
+
+; CHECK-LABEL: 'fcopysign'
+define i32 @fcopysign(i32 %arg) {
+ ; SSE2: cost of 2 {{.*}} %F32 = call float @llvm.copysign.f32
+ ; SSE42: cost of 2 {{.*}} %F32 = call float @llvm.copysign.f32
+ ; AVX: cost of 2 {{.*}} %F32 = call float @llvm.copysign.f32
+ ; AVX2: cost of 2 {{.*}} %F32 = call float @llvm.copysign.f32
+ ; AVX512: cost of 2 {{.*}} %F32 = call float @llvm.copysign.f32
+ %F32 = call float @llvm.copysign.f32(float undef, float undef)
+ ; SSE2: cost of 2 {{.*}} %V4F32 = call <4 x float> @llvm.copysign.v4f32
+ ; SSE42: cost of 2 {{.*}} %V4F32 = call <4 x float> @llvm.copysign.v4f32
+ ; AVX: cost of 2 {{.*}} %V4F32 = call <4 x float> @llvm.copysign.v4f32
+ ; AVX2: cost of 2 {{.*}} %V4F32 = call <4 x float> @llvm.copysign.v4f32
+ ; AVX512: cost of 2 {{.*}} %V4F32 = call <4 x float> @llvm.copysign.v4f32
+ %V4F32 = call <4 x float> @llvm.copysign.v4f32(<4 x float> undef, <4 x float> undef)
+ ; SSE2: cost of 4 {{.*}} %V8F32 = call <8 x float> @llvm.copysign.v8f32
+ ; SSE42: cost of 4 {{.*}} %V8F32 = call <8 x float> @llvm.copysign.v8f32
+ ; AVX: cost of 2 {{.*}} %V8F32 = call <8 x float> @llvm.copysign.v8f32
+ ; AVX2: cost of 2 {{.*}} %V8F32 = call <8 x float> @llvm.copysign.v8f32
+ ; AVX512: cost of 2 {{.*}} %V8F32 = call <8 x float> @llvm.copysign.v8f32
+ %V8F32 = call <8 x float> @llvm.copysign.v8f32(<8 x float> undef, <8 x float> undef)
+ ; SSE2: cost of 8 {{.*}} %V16F32 = call <16 x float> @llvm.copysign.v16f32
+ ; SSE42: cost of 8 {{.*}} %V16F32 = call <16 x float> @llvm.copysign.v16f32
+ ; AVX: cost of 4 {{.*}} %V16F32 = call <16 x float> @llvm.copysign.v16f32
+ ; AVX2: cost of 4 {{.*}} %V16F32 = call <16 x float> @llvm.copysign.v16f32
+ ; AVX512: cost of 2 {{.*}} %V16F32 = call <16 x float> @llvm.copysign.v16f32
+ %V16F32 = call <16 x float> @llvm.copysign.v16f32(<16 x float> undef, <16 x float> undef)
+
+ ; SSE2: cost of 2 {{.*}} %F64 = call double @llvm.copysign.f64
+ ; SSE42: cost of 2 {{.*}} %F64 = call double @llvm.copysign.f64
+ ; AVX: cost of 2 {{.*}} %F64 = call double @llvm.copysign.f64
+ ; AVX2: cost of 2 {{.*}} %F64 = call double @llvm.copysign.f64
+ ; AVX512: cost of 2 {{.*}} %F64 = call double @llvm.copysign.f64
+ %F64 = call double @llvm.copysign.f64(double undef, double undef)
+ ; SSE2: cost of 2 {{.*}} %V2F64 = call <2 x double> @llvm.copysign.v2f64
+ ; SSE42: cost of 2 {{.*}} %V2F64 = call <2 x double> @llvm.copysign.v2f64
+ ; AVX: cost of 2 {{.*}} %V2F64 = call <2 x double> @llvm.copysign.v2f64
+ ; AVX2: cost of 2 {{.*}} %V2F64 = call <2 x double> @llvm.copysign.v2f64
+ ; AVX512: cost of 2 {{.*}} %V2F64 = call <2 x double> @llvm.copysign.v2f64
+ %V2F64 = call <2 x double> @llvm.copysign.v2f64(<2 x double> undef, <2 x double> undef)
+ ; SSE2: cost of 4 {{.*}} %V4F64 = call <4 x double> @llvm.copysign.v4f64
+ ; SSE42: cost of 4 {{.*}} %V4F64 = call <4 x double> @llvm.copysign.v4f64
+ ; AVX: cost of 2 {{.*}} %V4F64 = call <4 x double> @llvm.copysign.v4f64
+ ; AVX2: cost of 2 {{.*}} %V4F64 = call <4 x double> @llvm.copysign.v4f64
+ ; AVX512: cost of 2 {{.*}} %V4F64 = call <4 x double> @llvm.copysign.v4f64
+ %V4F64 = call <4 x double> @llvm.copysign.v4f64(<4 x double> undef, <4 x double> undef)
+ ; SSE2: cost of 8 {{.*}} %V8F64 = call <8 x double> @llvm.copysign.v8f64
+ ; SSE42: cost of 8 {{.*}} %V8F64 = call <8 x double> @llvm.copysign.v8f64
+ ; AVX: cost of 4 {{.*}} %V8F64 = call <8 x double> @llvm.copysign.v8f64
+ ; AVX2: cost of 4 {{.*}} %V8F64 = call <8 x double> @llvm.copysign.v8f64
+ ; AVX512: cost of 2 {{.*}} %V8F64 = call <8 x double> @llvm.copysign.v8f64
+ %V8F64 = call <8 x double> @llvm.copysign.v8f64(<8 x double> undef, <8 x double> undef)
+
+ ret i32 undef
+}
+
+; CHECK-LABEL: 'fma'
+define i32 @fma(i32 %arg) {
+ ; SSE2: cost of 10 {{.*}} %F32 = call float @llvm.fma.f32
+ ; SSE42: cost of 10 {{.*}} %F32 = call float @llvm.fma.f32
+ ; AVX: cost of 1 {{.*}} %F32 = call float @llvm.fma.f32
+ ; AVX2: cost of 1 {{.*}} %F32 = call float @llvm.fma.f32
+ ; AVX512: cost of 1 {{.*}} %F32 = call float @llvm.fma.f32
+ %F32 = call float @llvm.fma.f32(float undef, float undef, float undef)
+ ; SSE2: cost of 52 {{.*}} %V4F32 = call <4 x float> @llvm.fma.v4f32
+ ; SSE42: cost of 52 {{.*}} %V4F32 = call <4 x float> @llvm.fma.v4f32
+ ; AVX: cost of 1 {{.*}} %V4F32 = call <4 x float> @llvm.fma.v4f32
+ ; AVX2: cost of 1 {{.*}} %V4F32 = call <4 x float> @llvm.fma.v4f32
+ ; AVX512: cost of 1 {{.*}} %V4F32 = call <4 x float> @llvm.fma.v4f32
+ %V4F32 = call <4 x float> @llvm.fma.v4f32(<4 x float> undef, <4 x float> undef, <4 x float> undef)
+ ; SSE2: cost of 104 {{.*}} %V8F32 = call <8 x float> @llvm.fma.v8f32
+ ; SSE42: cost of 104 {{.*}} %V8F32 = call <8 x float> @llvm.fma.v8f32
+ ; AVX: cost of 1 {{.*}} %V8F32 = call <8 x float> @llvm.fma.v8f32
+ ; AVX2: cost of 1 {{.*}} %V8F32 = call <8 x float> @llvm.fma.v8f32
+ ; AVX512: cost of 1 {{.*}} %V8F32 = call <8 x float> @llvm.fma.v8f32
+ %V8F32 = call <8 x float> @llvm.fma.v8f32(<8 x float> undef, <8 x float> undef, <8 x float> undef)
+ ; SSE2: cost of 208 {{.*}} %V16F32 = call <16 x float> @llvm.fma.v16f32
+ ; SSE42: cost of 208 {{.*}} %V16F32 = call <16 x float> @llvm.fma.v16f32
+ ; AVX: cost of 4 {{.*}} %V16F32 = call <16 x float> @llvm.fma.v16f32
+ ; AVX2: cost of 4 {{.*}} %V16F32 = call <16 x float> @llvm.fma.v16f32
+ ; AVX512: cost of 1 {{.*}} %V16F32 = call <16 x float> @llvm.fma.v16f32
+ %V16F32 = call <16 x float> @llvm.fma.v16f32(<16 x float> undef, <16 x float> undef, <16 x float> undef)
+
+ ; SSE2: cost of 10 {{.*}} %F64 = call double @llvm.fma.f64
+ ; SSE42: cost of 10 {{.*}} %F64 = call double @llvm.fma.f64
+ ; AVX: cost of 1 {{.*}} %F64 = call double @llvm.fma.f64
+ ; AVX2: cost of 1 {{.*}} %F64 = call double @llvm.fma.f64
+ ; AVX512: cost of 1 {{.*}} %F64 = call double @llvm.fma.f64
+ %F64 = call double @llvm.fma.f64(double undef, double undef, double undef)
+ ; SSE2: cost of 24 {{.*}} %V2F64 = call <2 x double> @llvm.fma.v2f64
+ ; SSE42: cost of 24 {{.*}} %V2F64 = call <2 x double> @llvm.fma.v2f64
+ ; AVX: cost of 1 {{.*}} %V2F64 = call <2 x double> @llvm.fma.v2f64
+ ; AVX2: cost of 1 {{.*}} %V2F64 = call <2 x double> @llvm.fma.v2f64
+ ; AVX512: cost of 1 {{.*}} %V2F64 = call <2 x double> @llvm.fma.v2f64
+ %V2F64 = call <2 x double> @llvm.fma.v2f64(<2 x double> undef, <2 x double> undef, <2 x double> undef)
+ ; SSE2: cost of 48 {{.*}} %V4F64 = call <4 x double> @llvm.fma.v4f64
+ ; SSE42: cost of 48 {{.*}} %V4F64 = call <4 x double> @llvm.fma.v4f64
+ ; AVX: cost of 1 {{.*}} %V4F64 = call <4 x double> @llvm.fma.v4f64
+ ; AVX2: cost of 1 {{.*}} %V4F64 = call <4 x double> @llvm.fma.v4f64
+ ; AVX512: cost of 1 {{.*}} %V4F64 = call <4 x double> @llvm.fma.v4f64
+ %V4F64 = call <4 x double> @llvm.fma.v4f64(<4 x double> undef, <4 x double> undef, <4 x double> undef)
+ ; SSE2: cost of 96 {{.*}} %V8F64 = call <8 x double> @llvm.fma.v8f64
+ ; SSE42: cost of 96 {{.*}} %V8F64 = call <8 x double> @llvm.fma.v8f64
+ ; AVX: cost of 4 {{.*}} %V8F64 = call <8 x double> @llvm.fma.v8f64
+ ; AVX2: cost of 4 {{.*}} %V8F64 = call <8 x double> @llvm.fma.v8f64
+ ; AVX512: cost of 1 {{.*}} %V8F64 = call <8 x double> @llvm.fma.v8f64
+ %V8F64 = call <8 x double> @llvm.fma.v8f64(<8 x double> undef, <8 x double> undef, <8 x double> undef)
+
+ ret i32 undef
+}
+
+declare float @llvm.sqrt.f32(float)
+declare <4 x float> @llvm.sqrt.v4f32(<4 x float>)
+declare <8 x float> @llvm.sqrt.v8f32(<8 x float>)
+declare <16 x float> @llvm.sqrt.v16f32(<16 x float>)
+
+declare double @llvm.sqrt.f64(double)
+declare <2 x double> @llvm.sqrt.v2f64(<2 x double>)
+declare <4 x double> @llvm.sqrt.v4f64(<4 x double>)
+declare <8 x double> @llvm.sqrt.v8f64(<8 x double>)
+
+declare float @llvm.fabs.f32(float)
+declare <4 x float> @llvm.fabs.v4f32(<4 x float>)
+declare <8 x float> @llvm.fabs.v8f32(<8 x float>)
+declare <16 x float> @llvm.fabs.v16f32(<16 x float>)
+
+declare double @llvm.fabs.f64(double)
+declare <2 x double> @llvm.fabs.v2f64(<2 x double>)
+declare <4 x double> @llvm.fabs.v4f64(<4 x double>)
+declare <8 x double> @llvm.fabs.v8f64(<8 x double>)
+
+declare float @llvm.copysign.f32(float, float)
+declare <4 x float> @llvm.copysign.v4f32(<4 x float>, <4 x float>)
+declare <8 x float> @llvm.copysign.v8f32(<8 x float>, <8 x float>)
+declare <16 x float> @llvm.copysign.v16f32(<16 x float>, <16 x float>)
+
+declare double @llvm.copysign.f64(double, double)
+declare <2 x double> @llvm.copysign.v2f64(<2 x double>, <2 x double>)
+declare <4 x double> @llvm.copysign.v4f64(<4 x double>, <4 x double>)
+declare <8 x double> @llvm.copysign.v8f64(<8 x double>, <8 x double>)
+
+declare float @llvm.fma.f32(float, float, float)
+declare <4 x float> @llvm.fma.v4f32(<4 x float>, <4 x float>, <4 x float>)
+declare <8 x float> @llvm.fma.v8f32(<8 x float>, <8 x float>, <8 x float>)
+declare <16 x float> @llvm.fma.v16f32(<16 x float>, <16 x float>, <16 x float>)
+
+declare double @llvm.fma.f64(double, double, double)
+declare <2 x double> @llvm.fma.v2f64(<2 x double>, <2 x double>, <2 x double>)
+declare <4 x double> @llvm.fma.v4f64(<4 x double>, <4 x double>, <4 x double>)
+declare <8 x double> @llvm.fma.v8f64(<8 x double>, <8 x double>, <8 x double>)
diff --git a/test/Analysis/CostModel/X86/arith.ll b/test/Analysis/CostModel/X86/arith.ll
index a35db9c68ffb..7319efb413d6 100644
--- a/test/Analysis/CostModel/X86/arith.ll
+++ b/test/Analysis/CostModel/X86/arith.ll
@@ -2,6 +2,9 @@
; RUN: opt < %s -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mattr=+sse4.2 | FileCheck %s --check-prefix=CHECK --check-prefix=SSE42
; RUN: opt < %s -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mattr=+avx | FileCheck %s --check-prefix=CHECK --check-prefix=AVX
; RUN: opt < %s -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mattr=+avx2 | FileCheck %s --check-prefix=CHECK --check-prefix=AVX2
+; RUN: opt < %s -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mattr=+avx512f | FileCheck %s --check-prefix=CHECK --check-prefix=AVX512 --check-prefix=AVX512F
+; RUN: opt < %s -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mattr=+avx512f,+avx512bw | FileCheck %s --check-prefix=CHECK --check-prefix=AVX512 --check-prefix=AVX512BW
+; RUN: opt < %s -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mattr=+avx512f,+avx512dq | FileCheck %s --check-prefix=CHECK --check-prefix=AVX512 --check-prefix=AVX512DQ
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
target triple = "x86_64-apple-macosx10.8.0"
@@ -12,27 +15,246 @@ define i32 @add(i32 %arg) {
; SSE42: cost of 1 {{.*}} %A = add
; AVX: cost of 1 {{.*}} %A = add
; AVX2: cost of 1 {{.*}} %A = add
- %A = add <4 x i32> undef, undef
+ ; AVX512: cost of 1 {{.*}} %A = add
+ %A = add <2 x i64> undef, undef
; SSSE3: cost of 2 {{.*}} %B = add
; SSE42: cost of 2 {{.*}} %B = add
- ; AVX: cost of 4 {{.*}} %B = add
+ ; AVX: cost of 4 {{.*}} %B = add
; AVX2: cost of 1 {{.*}} %B = add
- %B = add <8 x i32> undef, undef
- ; SSSE3: cost of 1 {{.*}} %C = add
- ; SSE42: cost of 1 {{.*}} %C = add
- ; AVX: cost of 1 {{.*}} %C = add
- ; AVX2: cost of 1 {{.*}} %C = add
- %C = add <2 x i64> undef, undef
- ; SSSE3: cost of 2 {{.*}} %D = add
- ; SSE42: cost of 2 {{.*}} %D = add
- ; AVX: cost of 4 {{.*}} %D = add
+ ; AVX512: cost of 1 {{.*}} %B = add
+ %B = add <4 x i64> undef, undef
+ ; SSSE3: cost of 4 {{.*}} %C = add
+ ; SSE42: cost of 4 {{.*}} %C = add
+ ; AVX: cost of 8 {{.*}} %C = add
+ ; AVX2: cost of 2 {{.*}} %C = add
+ ; AVX512: cost of 1 {{.*}} %C = add
+ %C = add <8 x i64> undef, undef
+
+ ; SSSE3: cost of 1 {{.*}} %D = add
+ ; SSE42: cost of 1 {{.*}} %D = add
+ ; AVX: cost of 1 {{.*}} %D = add
; AVX2: cost of 1 {{.*}} %D = add
- %D = add <4 x i64> undef, undef
- ; SSSE3: cost of 4 {{.*}} %E = add
- ; SSE42: cost of 4 {{.*}} %E = add
- ; AVX: cost of 8 {{.*}} %E = add
- ; AVX2: cost of 2 {{.*}} %E = add
- %E = add <8 x i64> undef, undef
+ ; AVX512: cost of 1 {{.*}} %D = add
+ %D = add <4 x i32> undef, undef
+ ; SSSE3: cost of 2 {{.*}} %E = add
+ ; SSE42: cost of 2 {{.*}} %E = add
+ ; AVX: cost of 4 {{.*}} %E = add
+ ; AVX2: cost of 1 {{.*}} %E = add
+ ; AVX512: cost of 1 {{.*}} %E = add
+ %E = add <8 x i32> undef, undef
+ ; SSSE3: cost of 4 {{.*}} %F = add
+ ; SSE42: cost of 4 {{.*}} %F = add
+ ; AVX: cost of 8 {{.*}} %F = add
+ ; AVX2: cost of 2 {{.*}} %F = add
+ ; AVX512: cost of 1 {{.*}} %F = add
+ %F = add <16 x i32> undef, undef
+
+ ; SSSE3: cost of 1 {{.*}} %G = add
+ ; SSE42: cost of 1 {{.*}} %G = add
+ ; AVX: cost of 1 {{.*}} %G = add
+ ; AVX2: cost of 1 {{.*}} %G = add
+ ; AVX512: cost of 1 {{.*}} %G = add
+ %G = add <8 x i16> undef, undef
+ ; SSSE3: cost of 2 {{.*}} %H = add
+ ; SSE42: cost of 2 {{.*}} %H = add
+ ; AVX: cost of 4 {{.*}} %H = add
+ ; AVX2: cost of 1 {{.*}} %H = add
+ ; AVX512: cost of 1 {{.*}} %H = add
+ %H = add <16 x i16> undef, undef
+ ; SSSE3: cost of 4 {{.*}} %I = add
+ ; SSE42: cost of 4 {{.*}} %I = add
+ ; AVX: cost of 8 {{.*}} %I = add
+ ; AVX2: cost of 2 {{.*}} %I = add
+ ; AVX512F: cost of 2 {{.*}} %I = add
+ ; AVX512BW: cost of 1 {{.*}} %I = add
+ %I = add <32 x i16> undef, undef
+
+ ; SSSE3: cost of 1 {{.*}} %J = add
+ ; SSE42: cost of 1 {{.*}} %J = add
+ ; AVX: cost of 1 {{.*}} %J = add
+ ; AVX2: cost of 1 {{.*}} %J = add
+ ; AVX512: cost of 1 {{.*}} %J = add
+ %J = add <16 x i8> undef, undef
+ ; SSSE3: cost of 2 {{.*}} %K = add
+ ; SSE42: cost of 2 {{.*}} %K = add
+ ; AVX: cost of 4 {{.*}} %K = add
+ ; AVX2: cost of 1 {{.*}} %K = add
+ ; AVX512: cost of 1 {{.*}} %K = add
+ %K = add <32 x i8> undef, undef
+ ; SSSE3: cost of 4 {{.*}} %L = add
+ ; SSE42: cost of 4 {{.*}} %L = add
+ ; AVX: cost of 8 {{.*}} %L = add
+ ; AVX2: cost of 2 {{.*}} %L = add
+ ; AVX512F: cost of 2 {{.*}} %L = add
+ ; AVX512BW: cost of 1 {{.*}} %L = add
+ %L = add <64 x i8> undef, undef
+
+ ret i32 undef
+}
+
+; CHECK-LABEL: 'sub'
+define i32 @sub(i32 %arg) {
+ ; SSSE3: cost of 1 {{.*}} %A = sub
+ ; SSE42: cost of 1 {{.*}} %A = sub
+ ; AVX: cost of 1 {{.*}} %A = sub
+ ; AVX2: cost of 1 {{.*}} %A = sub
+ ; AVX512: cost of 1 {{.*}} %A = sub
+ %A = sub <2 x i64> undef, undef
+ ; SSSE3: cost of 2 {{.*}} %B = sub
+ ; SSE42: cost of 2 {{.*}} %B = sub
+ ; AVX: cost of 4 {{.*}} %B = sub
+ ; AVX2: cost of 1 {{.*}} %B = sub
+ ; AVX512: cost of 1 {{.*}} %B = sub
+ %B = sub <4 x i64> undef, undef
+ ; SSSE3: cost of 4 {{.*}} %C = sub
+ ; SSE42: cost of 4 {{.*}} %C = sub
+ ; AVX: cost of 8 {{.*}} %C = sub
+ ; AVX2: cost of 2 {{.*}} %C = sub
+ ; AVX512: cost of 1 {{.*}} %C = sub
+ %C = sub <8 x i64> undef, undef
+
+ ; SSSE3: cost of 1 {{.*}} %D = sub
+ ; SSE42: cost of 1 {{.*}} %D = sub
+ ; AVX: cost of 1 {{.*}} %D = sub
+ ; AVX2: cost of 1 {{.*}} %D = sub
+ ; AVX512: cost of 1 {{.*}} %D = sub
+ %D = sub <4 x i32> undef, undef
+ ; SSSE3: cost of 2 {{.*}} %E = sub
+ ; SSE42: cost of 2 {{.*}} %E = sub
+ ; AVX: cost of 4 {{.*}} %E = sub
+ ; AVX2: cost of 1 {{.*}} %E = sub
+ ; AVX512: cost of 1 {{.*}} %E = sub
+ %E = sub <8 x i32> undef, undef
+ ; SSSE3: cost of 4 {{.*}} %F = sub
+ ; SSE42: cost of 4 {{.*}} %F = sub
+ ; AVX: cost of 8 {{.*}} %F = sub
+ ; AVX2: cost of 2 {{.*}} %F = sub
+ ; AVX512: cost of 1 {{.*}} %F = sub
+ %F = sub <16 x i32> undef, undef
+
+ ; SSSE3: cost of 1 {{.*}} %G = sub
+ ; SSE42: cost of 1 {{.*}} %G = sub
+ ; AVX: cost of 1 {{.*}} %G = sub
+ ; AVX2: cost of 1 {{.*}} %G = sub
+ ; AVX512: cost of 1 {{.*}} %G = sub
+ %G = sub <8 x i16> undef, undef
+ ; SSSE3: cost of 2 {{.*}} %H = sub
+ ; SSE42: cost of 2 {{.*}} %H = sub
+ ; AVX: cost of 4 {{.*}} %H = sub
+ ; AVX2: cost of 1 {{.*}} %H = sub
+ ; AVX512: cost of 1 {{.*}} %H = sub
+ %H = sub <16 x i16> undef, undef
+ ; SSSE3: cost of 4 {{.*}} %I = sub
+ ; SSE42: cost of 4 {{.*}} %I = sub
+ ; AVX: cost of 8 {{.*}} %I = sub
+ ; AVX2: cost of 2 {{.*}} %I = sub
+ ; AVX512F: cost of 2 {{.*}} %I = sub
+ ; AVX512BW: cost of 1 {{.*}} %I = sub
+ %I = sub <32 x i16> undef, undef
+
+ ; SSSE3: cost of 1 {{.*}} %J = sub
+ ; SSE42: cost of 1 {{.*}} %J = sub
+ ; AVX: cost of 1 {{.*}} %J = sub
+ ; AVX2: cost of 1 {{.*}} %J = sub
+ ; AVX512: cost of 1 {{.*}} %J = sub
+ %J = sub <16 x i8> undef, undef
+ ; SSSE3: cost of 2 {{.*}} %K = sub
+ ; SSE42: cost of 2 {{.*}} %K = sub
+ ; AVX: cost of 4 {{.*}} %K = sub
+ ; AVX2: cost of 1 {{.*}} %K = sub
+ ; AVX512: cost of 1 {{.*}} %K = sub
+ %K = sub <32 x i8> undef, undef
+ ; SSSE3: cost of 4 {{.*}} %L = sub
+ ; SSE42: cost of 4 {{.*}} %L = sub
+ ; AVX: cost of 8 {{.*}} %L = sub
+ ; AVX2: cost of 2 {{.*}} %L = sub
+ ; AVX512F: cost of 2 {{.*}} %L = sub
+ ; AVX512BW: cost of 1 {{.*}} %L = sub
+ %L = sub <64 x i8> undef, undef
+
+ ret i32 undef
+}
+
+; CHECK-LABEL: 'or'
+define i32 @or(i32 %arg) {
+ ; SSSE3: cost of 1 {{.*}} %A = or
+ ; SSE42: cost of 1 {{.*}} %A = or
+ ; AVX: cost of 1 {{.*}} %A = or
+ ; AVX2: cost of 1 {{.*}} %A = or
+ ; AVX512: cost of 1 {{.*}} %A = or
+ %A = or <2 x i64> undef, undef
+ ; SSSE3: cost of 2 {{.*}} %B = or
+ ; SSE42: cost of 2 {{.*}} %B = or
+ ; AVX: cost of 1 {{.*}} %B = or
+ ; AVX2: cost of 1 {{.*}} %B = or
+ ; AVX512: cost of 1 {{.*}} %B = or
+ %B = or <4 x i64> undef, undef
+ ; SSSE3: cost of 4 {{.*}} %C = or
+ ; SSE42: cost of 4 {{.*}} %C = or
+ ; AVX: cost of 2 {{.*}} %C = or
+ ; AVX2: cost of 2 {{.*}} %C = or
+ ; AVX512: cost of 1 {{.*}} %C = or
+ %C = or <8 x i64> undef, undef
+
+ ; SSSE3: cost of 1 {{.*}} %D = or
+ ; SSE42: cost of 1 {{.*}} %D = or
+ ; AVX: cost of 1 {{.*}} %D = or
+ ; AVX2: cost of 1 {{.*}} %D = or
+ ; AVX512: cost of 1 {{.*}} %D = or
+ %D = or <4 x i32> undef, undef
+ ; SSSE3: cost of 2 {{.*}} %E = or
+ ; SSE42: cost of 2 {{.*}} %E = or
+ ; AVX: cost of 1 {{.*}} %E = or
+ ; AVX2: cost of 1 {{.*}} %E = or
+ ; AVX512: cost of 1 {{.*}} %E = or
+ %E = or <8 x i32> undef, undef
+ ; SSSE3: cost of 4 {{.*}} %F = or
+ ; SSE42: cost of 4 {{.*}} %F = or
+ ; AVX: cost of 2 {{.*}} %F = or
+ ; AVX2: cost of 2 {{.*}} %F = or
+ ; AVX512: cost of 1 {{.*}} %F = or
+ %F = or <16 x i32> undef, undef
+
+ ; SSSE3: cost of 1 {{.*}} %G = or
+ ; SSE42: cost of 1 {{.*}} %G = or
+ ; AVX: cost of 1 {{.*}} %G = or
+ ; AVX2: cost of 1 {{.*}} %G = or
+ ; AVX512: cost of 1 {{.*}} %G = or
+ %G = or <8 x i16> undef, undef
+ ; SSSE3: cost of 2 {{.*}} %H = or
+ ; SSE42: cost of 2 {{.*}} %H = or
+ ; AVX: cost of 1 {{.*}} %H = or
+ ; AVX2: cost of 1 {{.*}} %H = or
+ ; AVX512: cost of 1 {{.*}} %H = or
+ %H = or <16 x i16> undef, undef
+ ; SSSE3: cost of 4 {{.*}} %I = or
+ ; SSE42: cost of 4 {{.*}} %I = or
+ ; AVX: cost of 2 {{.*}} %I = or
+ ; AVX2: cost of 2 {{.*}} %I = or
+ ; AVX512F: cost of 2 {{.*}} %I = or
+ ; AVX512BW: cost of 1 {{.*}} %I = or
+ %I = or <32 x i16> undef, undef
+
+ ; SSSE3: cost of 1 {{.*}} %J = or
+ ; SSE42: cost of 1 {{.*}} %J = or
+ ; AVX: cost of 1 {{.*}} %J = or
+ ; AVX2: cost of 1 {{.*}} %J = or
+ ; AVX512: cost of 1 {{.*}} %J = or
+ %J = or <16 x i8> undef, undef
+ ; SSSE3: cost of 2 {{.*}} %K = or
+ ; SSE42: cost of 2 {{.*}} %K = or
+ ; AVX: cost of 1 {{.*}} %K = or
+ ; AVX2: cost of 1 {{.*}} %K = or
+ ; AVX512: cost of 1 {{.*}} %K = or
+ %K = or <32 x i8> undef, undef
+ ; SSSE3: cost of 4 {{.*}} %L = or
+ ; SSE42: cost of 4 {{.*}} %L = or
+ ; AVX: cost of 2 {{.*}} %L = or
+ ; AVX2: cost of 2 {{.*}} %L = or
+ ; AVX512F: cost of 2 {{.*}} %L = or
+ ; AVX512BW: cost of 1 {{.*}} %L = or
+ %L = or <64 x i8> undef, undef
+
ret i32 undef
}
@@ -42,132 +264,270 @@ define i32 @xor(i32 %arg) {
; SSE42: cost of 1 {{.*}} %A = xor
; AVX: cost of 1 {{.*}} %A = xor
; AVX2: cost of 1 {{.*}} %A = xor
- %A = xor <4 x i32> undef, undef
+ ; AVX512: cost of 1 {{.*}} %A = xor
+ %A = xor <2 x i64> undef, undef
; SSSE3: cost of 2 {{.*}} %B = xor
; SSE42: cost of 2 {{.*}} %B = xor
; AVX: cost of 1 {{.*}} %B = xor
; AVX2: cost of 1 {{.*}} %B = xor
- %B = xor <8 x i32> undef, undef
- ; SSSE3: cost of 1 {{.*}} %C = xor
- ; SSE42: cost of 1 {{.*}} %C = xor
- ; AVX: cost of 1 {{.*}} %C = xor
- ; AVX2: cost of 1 {{.*}} %C = xor
- %C = xor <2 x i64> undef, undef
- ; SSSE3: cost of 2 {{.*}} %D = xor
- ; SSE42: cost of 2 {{.*}} %D = xor
+ ; AVX512: cost of 1 {{.*}} %B = xor
+ %B = xor <4 x i64> undef, undef
+ ; SSSE3: cost of 4 {{.*}} %C = xor
+ ; SSE42: cost of 4 {{.*}} %C = xor
+ ; AVX: cost of 2 {{.*}} %C = xor
+ ; AVX2: cost of 2 {{.*}} %C = xor
+ ; AVX512: cost of 1 {{.*}} %C = xor
+ %C = xor <8 x i64> undef, undef
+
+ ; SSSE3: cost of 1 {{.*}} %D = xor
+ ; SSE42: cost of 1 {{.*}} %D = xor
; AVX: cost of 1 {{.*}} %D = xor
; AVX2: cost of 1 {{.*}} %D = xor
- %D = xor <4 x i64> undef, undef
+ ; AVX512: cost of 1 {{.*}} %D = xor
+ %D = xor <4 x i32> undef, undef
+ ; SSSE3: cost of 2 {{.*}} %E = xor
+ ; SSE42: cost of 2 {{.*}} %E = xor
+ ; AVX: cost of 1 {{.*}} %E = xor
+ ; AVX2: cost of 1 {{.*}} %E = xor
+ ; AVX512: cost of 1 {{.*}} %E = xor
+ %E = xor <8 x i32> undef, undef
+ ; SSSE3: cost of 4 {{.*}} %F = xor
+ ; SSE42: cost of 4 {{.*}} %F = xor
+ ; AVX: cost of 2 {{.*}} %F = xor
+ ; AVX2: cost of 2 {{.*}} %F = xor
+ ; AVX512: cost of 1 {{.*}} %F = xor
+ %F = xor <16 x i32> undef, undef
+
+ ; SSSE3: cost of 1 {{.*}} %G = xor
+ ; SSE42: cost of 1 {{.*}} %G = xor
+ ; AVX: cost of 1 {{.*}} %G = xor
+ ; AVX2: cost of 1 {{.*}} %G = xor
+ ; AVX512: cost of 1 {{.*}} %G = xor
+ %G = xor <8 x i16> undef, undef
+ ; SSSE3: cost of 2 {{.*}} %H = xor
+ ; SSE42: cost of 2 {{.*}} %H = xor
+ ; AVX: cost of 1 {{.*}} %H = xor
+ ; AVX2: cost of 1 {{.*}} %H = xor
+ ; AVX512: cost of 1 {{.*}} %H = xor
+ %H = xor <16 x i16> undef, undef
+ ; SSSE3: cost of 4 {{.*}} %I = xor
+ ; SSE42: cost of 4 {{.*}} %I = xor
+ ; AVX: cost of 2 {{.*}} %I = xor
+ ; AVX2: cost of 2 {{.*}} %I = xor
+ ; AVX512F: cost of 2 {{.*}} %I = xor
+ ; AVX512BW: cost of 1 {{.*}} %I = xor
+ %I = xor <32 x i16> undef, undef
+
+ ; SSSE3: cost of 1 {{.*}} %J = xor
+ ; SSE42: cost of 1 {{.*}} %J = xor
+ ; AVX: cost of 1 {{.*}} %J = xor
+ ; AVX2: cost of 1 {{.*}} %J = xor
+ ; AVX512: cost of 1 {{.*}} %J = xor
+ %J = xor <16 x i8> undef, undef
+ ; SSSE3: cost of 2 {{.*}} %K = xor
+ ; SSE42: cost of 2 {{.*}} %K = xor
+ ; AVX: cost of 1 {{.*}} %K = xor
+ ; AVX2: cost of 1 {{.*}} %K = xor
+ ; AVX512: cost of 1 {{.*}} %K = xor
+ %K = xor <32 x i8> undef, undef
+ ; SSSE3: cost of 4 {{.*}} %L = xor
+ ; SSE42: cost of 4 {{.*}} %L = xor
+ ; AVX: cost of 2 {{.*}} %L = xor
+ ; AVX2: cost of 2 {{.*}} %L = xor
+ ; AVX512F: cost of 2 {{.*}} %L = xor
+ ; AVX512BW: cost of 1 {{.*}} %L = xor
+ %L = xor <64 x i8> undef, undef
+
ret i32 undef
}
-; CHECK-LABEL: 'mul'
-define void @mul() {
- ; A <2 x i32> gets expanded to a <2 x i64> vector.
- ; A <2 x i64> vector multiply is implemented using
- ; 3 PMULUDQ and 2 PADDS and 4 shifts.
- ; SSSE3: cost of 9 {{.*}} %A0 = mul
- ; SSE42: cost of 9 {{.*}} %A0 = mul
- ; AVX: cost of 9 {{.*}} %A0 = mul
- ; AVX2: cost of 9 {{.*}} %A0 = mul
- %A0 = mul <2 x i32> undef, undef
- ; SSSE3: cost of 6 {{.*}} %A1 = mul
- ; SSE42: cost of 1 {{.*}} %A1 = mul
- ; AVX: cost of 1 {{.*}} %A1 = mul
- ; AVX2: cost of 1 {{.*}} %A1 = mul
- %A1 = mul <4 x i32> undef, undef
- ; SSSE3: cost of 9 {{.*}} %A2 = mul
- ; SSE42: cost of 9 {{.*}} %A2 = mul
- ; AVX: cost of 9 {{.*}} %A2 = mul
- ; AVX2: cost of 9 {{.*}} %A2 = mul
- %A2 = mul <2 x i64> undef, undef
- ; SSSE3: cost of 18 {{.*}} %A3 = mul
- ; SSE42: cost of 18 {{.*}} %A3 = mul
- ; AVX: cost of 18 {{.*}} %A3 = mul
- ; AVX2: cost of 9 {{.*}} %A3 = mul
- %A3 = mul <4 x i64> undef, undef
- ret void
+; CHECK-LABEL: 'and'
+define i32 @and(i32 %arg) {
+ ; SSSE3: cost of 1 {{.*}} %A = and
+ ; SSE42: cost of 1 {{.*}} %A = and
+ ; AVX: cost of 1 {{.*}} %A = and
+ ; AVX2: cost of 1 {{.*}} %A = and
+ ; AVX512: cost of 1 {{.*}} %A = and
+ %A = and <2 x i64> undef, undef
+ ; SSSE3: cost of 2 {{.*}} %B = and
+ ; SSE42: cost of 2 {{.*}} %B = and
+ ; AVX: cost of 1 {{.*}} %B = and
+ ; AVX2: cost of 1 {{.*}} %B = and
+ ; AVX512: cost of 1 {{.*}} %B = and
+ %B = and <4 x i64> undef, undef
+ ; SSSE3: cost of 4 {{.*}} %C = and
+ ; SSE42: cost of 4 {{.*}} %C = and
+ ; AVX: cost of 2 {{.*}} %C = and
+ ; AVX2: cost of 2 {{.*}} %C = and
+ ; AVX512: cost of 1 {{.*}} %C = and
+ %C = and <8 x i64> undef, undef
+
+ ; SSSE3: cost of 1 {{.*}} %D = and
+ ; SSE42: cost of 1 {{.*}} %D = and
+ ; AVX: cost of 1 {{.*}} %D = and
+ ; AVX2: cost of 1 {{.*}} %D = and
+ ; AVX512: cost of 1 {{.*}} %D = and
+ %D = and <4 x i32> undef, undef
+ ; SSSE3: cost of 2 {{.*}} %E = and
+ ; SSE42: cost of 2 {{.*}} %E = and
+ ; AVX: cost of 1 {{.*}} %E = and
+ ; AVX2: cost of 1 {{.*}} %E = and
+ ; AVX512: cost of 1 {{.*}} %E = and
+ %E = and <8 x i32> undef, undef
+ ; SSSE3: cost of 4 {{.*}} %F = and
+ ; SSE42: cost of 4 {{.*}} %F = and
+ ; AVX: cost of 2 {{.*}} %F = and
+ ; AVX2: cost of 2 {{.*}} %F = and
+ ; AVX512: cost of 1 {{.*}} %F = and
+ %F = and <16 x i32> undef, undef
+
+ ; SSSE3: cost of 1 {{.*}} %G = and
+ ; SSE42: cost of 1 {{.*}} %G = and
+ ; AVX: cost of 1 {{.*}} %G = and
+ ; AVX2: cost of 1 {{.*}} %G = and
+ ; AVX512: cost of 1 {{.*}} %G = and
+ %G = and <8 x i16> undef, undef
+ ; SSSE3: cost of 2 {{.*}} %H = and
+ ; SSE42: cost of 2 {{.*}} %H = and
+ ; AVX: cost of 1 {{.*}} %H = and
+ ; AVX2: cost of 1 {{.*}} %H = and
+ ; AVX512: cost of 1 {{.*}} %H = and
+ %H = and <16 x i16> undef, undef
+ ; SSSE3: cost of 4 {{.*}} %I = and
+ ; SSE42: cost of 4 {{.*}} %I = and
+ ; AVX: cost of 2 {{.*}} %I = and
+ ; AVX2: cost of 2 {{.*}} %I = and
+ ; AVX512F: cost of 2 {{.*}} %I = and
+ ; AVX512BW: cost of 1 {{.*}} %I = and
+ %I = and <32 x i16> undef, undef
+
+ ; SSSE3: cost of 1 {{.*}} %J = and
+ ; SSE42: cost of 1 {{.*}} %J = and
+ ; AVX: cost of 1 {{.*}} %J = and
+ ; AVX2: cost of 1 {{.*}} %J = and
+ ; AVX512: cost of 1 {{.*}} %J = and
+ %J = and <16 x i8> undef, undef
+ ; SSSE3: cost of 2 {{.*}} %K = and
+ ; SSE42: cost of 2 {{.*}} %K = and
+ ; AVX: cost of 1 {{.*}} %K = and
+ ; AVX2: cost of 1 {{.*}} %K = and
+ ; AVX512: cost of 1 {{.*}} %K = and
+ %K = and <32 x i8> undef, undef
+ ; SSSE3: cost of 4 {{.*}} %L = and
+ ; SSE42: cost of 4 {{.*}} %L = and
+ ; AVX: cost of 2 {{.*}} %L = and
+ ; AVX2: cost of 2 {{.*}} %L = and
+ ; AVX512F: cost of 2 {{.*}} %L = and
+ ; AVX512BW: cost of 1 {{.*}} %L = and
+ %L = and <64 x i8> undef, undef
+
+ ret i32 undef
}
-; CHECK-LABEL: 'fmul'
-define i32 @fmul(i32 %arg) {
- ; SSSE3: cost of 2 {{.*}} %A = fmul
- ; SSE42: cost of 2 {{.*}} %A = fmul
- ; AVX: cost of 2 {{.*}} %A = fmul
- ; AVX2: cost of 2 {{.*}} %A = fmul
- %A = fmul <4 x float> undef, undef
- ; SSSE3: cost of 4 {{.*}} %B = fmul
- ; SSE42: cost of 4 {{.*}} %B = fmul
- ; AVX: cost of 2 {{.*}} %B = fmul
- ; AVX2: cost of 2 {{.*}} %B = fmul
- %B = fmul <8 x float> undef, undef
+; CHECK-LABEL: 'mul'
+define i32 @mul(i32 %arg) {
+ ; SSSE3: cost of 8 {{.*}} %A = mul
+ ; SSE42: cost of 8 {{.*}} %A = mul
+ ; AVX: cost of 8 {{.*}} %A = mul
+ ; AVX2: cost of 8 {{.*}} %A = mul
+ ; AVX512F: cost of 8 {{.*}} %A = mul
+ ; AVX512BW: cost of 8 {{.*}} %A = mul
+ ; AVX512DQ: cost of 1 {{.*}} %A = mul
+ %A = mul <2 x i64> undef, undef
+ ; SSSE3: cost of 16 {{.*}} %B = mul
+ ; SSE42: cost of 16 {{.*}} %B = mul
+ ; AVX: cost of 16 {{.*}} %B = mul
+ ; AVX2: cost of 8 {{.*}} %B = mul
+ ; AVX512F: cost of 8 {{.*}} %B = mul
+ ; AVX512BW: cost of 8 {{.*}} %B = mul
+ ; AVX512DQ: cost of 1 {{.*}} %B = mul
+ %B = mul <4 x i64> undef, undef
+ ; SSSE3: cost of 32 {{.*}} %C = mul
+ ; SSE42: cost of 32 {{.*}} %C = mul
+ ; AVX: cost of 32 {{.*}} %C = mul
+ ; AVX2: cost of 16 {{.*}} %C = mul
+ ; AVX512F: cost of 8 {{.*}} %C = mul
+ ; AVX512BW: cost of 8 {{.*}} %C = mul
+ ; AVX512DQ: cost of 1 {{.*}} %C = mul
+ %C = mul <8 x i64> undef, undef
+
+ ; SSSE3: cost of 6 {{.*}} %D = mul
+ ; SSE42: cost of 1 {{.*}} %D = mul
+ ; AVX: cost of 1 {{.*}} %D = mul
+ ; AVX2: cost of 1 {{.*}} %D = mul
+ ; AVX512: cost of 1 {{.*}} %D = mul
+ %D = mul <4 x i32> undef, undef
+ ; SSSE3: cost of 12 {{.*}} %E = mul
+ ; SSE42: cost of 2 {{.*}} %E = mul
+ ; AVX: cost of 4 {{.*}} %E = mul
+ ; AVX2: cost of 1 {{.*}} %E = mul
+ ; AVX512: cost of 1 {{.*}} %E = mul
+ %E = mul <8 x i32> undef, undef
+ ; SSSE3: cost of 24 {{.*}} %F = mul
+ ; SSE42: cost of 4 {{.*}} %F = mul
+ ; AVX: cost of 8 {{.*}} %F = mul
+ ; AVX2: cost of 2 {{.*}} %F = mul
+ ; AVX512: cost of 1 {{.*}} %F = mul
+ %F = mul <16 x i32> undef, undef
+
+ ; SSSE3: cost of 1 {{.*}} %G = mul
+ ; SSE42: cost of 1 {{.*}} %G = mul
+ ; AVX: cost of 1 {{.*}} %G = mul
+ ; AVX2: cost of 1 {{.*}} %G = mul
+ ; AVX512: cost of 1 {{.*}} %G = mul
+ %G = mul <8 x i16> undef, undef
+ ; SSSE3: cost of 2 {{.*}} %H = mul
+ ; SSE42: cost of 2 {{.*}} %H = mul
+ ; AVX: cost of 4 {{.*}} %H = mul
+ ; AVX2: cost of 1 {{.*}} %H = mul
+ ; AVX512: cost of 1 {{.*}} %H = mul
+ %H = mul <16 x i16> undef, undef
+ ; SSSE3: cost of 4 {{.*}} %I = mul
+ ; SSE42: cost of 4 {{.*}} %I = mul
+ ; AVX: cost of 8 {{.*}} %I = mul
+ ; AVX2: cost of 2 {{.*}} %I = mul
+ ; AVX512F: cost of 2 {{.*}} %I = mul
+ ; AVX512BW: cost of 1 {{.*}} %I = mul
+ %I = mul <32 x i16> undef, undef
+
+ ; SSSE3: cost of 12 {{.*}} %J = mul
+ ; SSE42: cost of 12 {{.*}} %J = mul
+ ; AVX: cost of 12 {{.*}} %J = mul
+ ; AVX2: cost of 7 {{.*}} %J = mul
+ ; AVX512F: cost of 5 {{.*}} %J = mul
+ ; AVX512BW: cost of 4 {{.*}} %J = mul
+ %J = mul <16 x i8> undef, undef
+ ; SSSE3: cost of 24 {{.*}} %K = mul
+ ; SSE42: cost of 24 {{.*}} %K = mul
+ ; AVX: cost of 26 {{.*}} %K = mul
+ ; AVX2: cost of 17 {{.*}} %K = mul
+ ; AVX512F: cost of 13 {{.*}} %K = mul
+ ; AVX512BW: cost of 4 {{.*}} %K = mul
+ %K = mul <32 x i8> undef, undef
+ ; SSSE3: cost of 48 {{.*}} %L = mul
+ ; SSE42: cost of 48 {{.*}} %L = mul
+ ; AVX: cost of 52 {{.*}} %L = mul
+ ; AVX2: cost of 34 {{.*}} %L = mul
+ ; AVX512F: cost of 26 {{.*}} %L = mul
+ ; AVX512BW: cost of 11 {{.*}} %L = mul
+ %L = mul <64 x i8> undef, undef
+
ret i32 undef
}
-; CHECK-LABEL: 'shift'
-define void @shift() {
- ; SSSE3: cost of 10 {{.*}} %A0 = shl
- ; SSE42: cost of 10 {{.*}} %A0 = shl
- ; AVX: cost of 10 {{.*}} %A0 = shl
- ; AVX2: cost of 1 {{.*}} %A0 = shl
- %A0 = shl <4 x i32> undef, undef
- ; SSSE3: cost of 4 {{.*}} %A1 = shl
- ; SSE42: cost of 4 {{.*}} %A1 = shl
- ; AVX: cost of 4 {{.*}} %A1 = shl
- ; AVX2: cost of 1 {{.*}} %A1 = shl
- %A1 = shl <2 x i64> undef, undef
- ; SSSE3: cost of 20 {{.*}} %A2 = shl
- ; SSE42: cost of 20 {{.*}} %A2 = shl
- ; AVX: cost of 20 {{.*}} %A2 = shl
- ; AVX2: cost of 1 {{.*}} %A2 = shl
- %A2 = shl <8 x i32> undef, undef
- ; SSSE3: cost of 8 {{.*}} %A3 = shl
- ; SSE42: cost of 8 {{.*}} %A3 = shl
- ; AVX: cost of 8 {{.*}} %A3 = shl
- ; AVX2: cost of 1 {{.*}} %A3 = shl
- %A3 = shl <4 x i64> undef, undef
-
- ; SSSE3: cost of 16 {{.*}} %B0 = lshr
- ; SSE42: cost of 16 {{.*}} %B0 = lshr
- ; AVX: cost of 16 {{.*}} %B0 = lshr
- ; AVX2: cost of 1 {{.*}} %B0 = lshr
- %B0 = lshr <4 x i32> undef, undef
- ; SSSE3: cost of 4 {{.*}} %B1 = lshr
- ; SSE42: cost of 4 {{.*}} %B1 = lshr
- ; AVX: cost of 4 {{.*}} %B1 = lshr
- ; AVX2: cost of 1 {{.*}} %B1 = lshr
- %B1 = lshr <2 x i64> undef, undef
- ; SSSE3: cost of 32 {{.*}} %B2 = lshr
- ; SSE42: cost of 32 {{.*}} %B2 = lshr
- ; AVX: cost of 32 {{.*}} %B2 = lshr
- ; AVX2: cost of 1 {{.*}} %B2 = lshr
- %B2 = lshr <8 x i32> undef, undef
- ; SSSE3: cost of 8 {{.*}} %B3 = lshr
- ; SSE42: cost of 8 {{.*}} %B3 = lshr
- ; AVX: cost of 8 {{.*}} %B3 = lshr
- ; AVX2: cost of 1 {{.*}} %B3 = lshr
- %B3 = lshr <4 x i64> undef, undef
-
- ; SSSE3: cost of 16 {{.*}} %C0 = ashr
- ; SSE42: cost of 16 {{.*}} %C0 = ashr
- ; AVX: cost of 16 {{.*}} %C0 = ashr
- ; AVX2: cost of 1 {{.*}} %C0 = ashr
- %C0 = ashr <4 x i32> undef, undef
- ; SSSE3: cost of 12 {{.*}} %C1 = ashr
- ; SSE42: cost of 12 {{.*}} %C1 = ashr
- ; AVX: cost of 12 {{.*}} %C1 = ashr
- ; AVX2: cost of 4 {{.*}} %C1 = ashr
- %C1 = ashr <2 x i64> undef, undef
- ; SSSE3: cost of 32 {{.*}} %C2 = ashr
- ; SSE42: cost of 32 {{.*}} %C2 = ashr
- ; AVX: cost of 32 {{.*}} %C2 = ashr
- ; AVX2: cost of 1 {{.*}} %C2 = ashr
- %C2 = ashr <8 x i32> undef, undef
- ; SSSE3: cost of 24 {{.*}} %C3 = ashr
- ; SSE42: cost of 24 {{.*}} %C3 = ashr
- ; AVX: cost of 24 {{.*}} %C3 = ashr
- ; AVX2: cost of 4 {{.*}} %C3 = ashr
- %C3 = ashr <4 x i64> undef, undef
+; CHECK-LABEL: 'mul_2i32'
+define void @mul_2i32() {
+ ; A <2 x i32> gets expanded to a <2 x i64> vector.
+ ; A <2 x i64> vector multiply is implemented using
+ ; 3 PMULUDQ and 2 PADDS and 4 shifts.
+ ; SSSE3: cost of 8 {{.*}} %A0 = mul
+ ; SSE42: cost of 8 {{.*}} %A0 = mul
+ ; AVX: cost of 8 {{.*}} %A0 = mul
+ ; AVX2: cost of 8 {{.*}} %A0 = mul
+ ; AVX512F: cost of 8 {{.*}} %A0 = mul
+ ; AVX512BW: cost of 8 {{.*}} %A0 = mul
+ ; AVX512DQ: cost of 1 {{.*}} %A0 = mul
+ %A0 = mul <2 x i32> undef, undef
ret void
}
diff --git a/test/Analysis/CostModel/X86/ctbits-cost.ll b/test/Analysis/CostModel/X86/ctbits-cost.ll
index 23bfafd8bc94..8c7fa9d73151 100644
--- a/test/Analysis/CostModel/X86/ctbits-cost.ll
+++ b/test/Analysis/CostModel/X86/ctbits-cost.ll
@@ -2,8 +2,8 @@
; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -mcpu=corei7 -cost-model -analyze | FileCheck %s -check-prefix=CHECK -check-prefix=SSE -check-prefix=SSE42 -check-prefix=POPCNT
; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -mcpu=corei7-avx -cost-model -analyze | FileCheck %s -check-prefix=CHECK -check-prefix=AVX -check-prefix=AVX1 -check-prefix=POPCNT
; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -mcpu=core-avx2 -cost-model -analyze | FileCheck %s -check-prefix=CHECK -check-prefix=AVX -check-prefix=AVX2 -check-prefix=POPCNT
-; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -mcpu=bdver2 -cost-model -analyze | FileCheck %s -check-prefix=CHECK -check-prefix=XOP -check-prefix=XOPAVX1 -check-prefix=POPCNT
-; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -mcpu=bdver4 -cost-model -analyze | FileCheck %s -check-prefix=CHECK -check-prefix=XOP -check-prefix=XOPAVX2 -check-prefix=POPCNT
+; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -mcpu=bdver2 -cost-model -analyze | FileCheck %s -check-prefix=CHECK -check-prefix=AVX -check-prefix=AVX1 -check-prefix=POPCNT
+; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -mcpu=bdver4 -cost-model -analyze | FileCheck %s -check-prefix=CHECK -check-prefix=AVX -check-prefix=AVX2 -check-prefix=POPCNT
; Verify the cost of scalar population count instructions.
@@ -58,72 +58,76 @@ declare <32 x i8> @llvm.ctpop.v32i8(<32 x i8>)
define <2 x i64> @var_ctpop_v2i64(<2 x i64> %a) {
; CHECK: 'Cost Model Analysis' for function 'var_ctpop_v2i64':
-; SSE: Found an estimated cost of 2 for instruction: %ctpop
-; AVX: Found an estimated cost of 2 for instruction: %ctpop
-; XOP: Found an estimated cost of 2 for instruction: %ctpop
+; SSE2: Found an estimated cost of 12 for instruction: %ctpop
+; SSE42: Found an estimated cost of 7 for instruction: %ctpop
+; AVX: Found an estimated cost of 7 for instruction: %ctpop
%ctpop = call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %a)
ret <2 x i64> %ctpop
}
define <4 x i64> @var_ctpop_v4i64(<4 x i64> %a) {
; CHECK: 'Cost Model Analysis' for function 'var_ctpop_v4i64':
-; SSE: Found an estimated cost of 4 for instruction: %ctpop
-; AVX: Found an estimated cost of 2 for instruction: %ctpop
-; XOP: Found an estimated cost of 2 for instruction: %ctpop
+; SSE2: Found an estimated cost of 24 for instruction: %ctpop
+; SSE42: Found an estimated cost of 14 for instruction: %ctpop
+; AVX1: Found an estimated cost of 14 for instruction: %ctpop
+; AVX2: Found an estimated cost of 7 for instruction: %ctpop
%ctpop = call <4 x i64> @llvm.ctpop.v4i64(<4 x i64> %a)
ret <4 x i64> %ctpop
}
define <4 x i32> @var_ctpop_v4i32(<4 x i32> %a) {
; CHECK: 'Cost Model Analysis' for function 'var_ctpop_v4i32':
-; SSE: Found an estimated cost of 2 for instruction: %ctpop
-; AVX: Found an estimated cost of 2 for instruction: %ctpop
-; XOP: Found an estimated cost of 2 for instruction: %ctpop
+; SSE2: Found an estimated cost of 15 for instruction: %ctpop
+; SSE42: Found an estimated cost of 11 for instruction: %ctpop
+; AVX: Found an estimated cost of 11 for instruction: %ctpop
%ctpop = call <4 x i32> @llvm.ctpop.v4i32(<4 x i32> %a)
ret <4 x i32> %ctpop
}
define <8 x i32> @var_ctpop_v8i32(<8 x i32> %a) {
; CHECK: 'Cost Model Analysis' for function 'var_ctpop_v8i32':
-; SSE: Found an estimated cost of 4 for instruction: %ctpop
-; AVX: Found an estimated cost of 2 for instruction: %ctpop
-; XOP: Found an estimated cost of 2 for instruction: %ctpop
+; SSE2: Found an estimated cost of 30 for instruction: %ctpop
+; SSE42: Found an estimated cost of 22 for instruction: %ctpop
+; AVX1: Found an estimated cost of 22 for instruction: %ctpop
+; AVX2: Found an estimated cost of 11 for instruction: %ctpop
%ctpop = call <8 x i32> @llvm.ctpop.v8i32(<8 x i32> %a)
ret <8 x i32> %ctpop
}
define <8 x i16> @var_ctpop_v8i16(<8 x i16> %a) {
; CHECK: 'Cost Model Analysis' for function 'var_ctpop_v8i16':
-; SSE: Found an estimated cost of 2 for instruction: %ctpop
-; AVX: Found an estimated cost of 2 for instruction: %ctpop
-; XOP: Found an estimated cost of 2 for instruction: %ctpop
+; SSE2: Found an estimated cost of 13 for instruction: %ctpop
+; SSE42: Found an estimated cost of 9 for instruction: %ctpop
+; AVX: Found an estimated cost of 9 for instruction: %ctpop
%ctpop = call <8 x i16> @llvm.ctpop.v8i16(<8 x i16> %a)
ret <8 x i16> %ctpop
}
define <16 x i16> @var_ctpop_v16i16(<16 x i16> %a) {
; CHECK: 'Cost Model Analysis' for function 'var_ctpop_v16i16':
-; SSE: Found an estimated cost of 4 for instruction: %ctpop
-; AVX: Found an estimated cost of 2 for instruction: %ctpop
-; XOP: Found an estimated cost of 2 for instruction: %ctpop
+; SSE2: Found an estimated cost of 26 for instruction: %ctpop
+; SSE42: Found an estimated cost of 18 for instruction: %ctpop
+; AVX1: Found an estimated cost of 18 for instruction: %ctpop
+; AVX2: Found an estimated cost of 9 for instruction: %ctpop
%ctpop = call <16 x i16> @llvm.ctpop.v16i16(<16 x i16> %a)
ret <16 x i16> %ctpop
}
define <16 x i8> @var_ctpop_v16i8(<16 x i8> %a) {
; CHECK: 'Cost Model Analysis' for function 'var_ctpop_v16i8':
-; SSE: Found an estimated cost of 2 for instruction: %ctpop
-; AVX: Found an estimated cost of 2 for instruction: %ctpop
-; XOP: Found an estimated cost of 2 for instruction: %ctpop
+; SSE2: Found an estimated cost of 10 for instruction: %ctpop
+; SSE42: Found an estimated cost of 6 for instruction: %ctpop
+; AVX: Found an estimated cost of 6 for instruction: %ctpop
%ctpop = call <16 x i8> @llvm.ctpop.v16i8(<16 x i8> %a)
ret <16 x i8> %ctpop
}
define <32 x i8> @var_ctpop_v32i8(<32 x i8> %a) {
; CHECK: 'Cost Model Analysis' for function 'var_ctpop_v32i8':
-; SSE: Found an estimated cost of 4 for instruction: %ctpop
-; AVX: Found an estimated cost of 2 for instruction: %ctpop
-; XOP: Found an estimated cost of 2 for instruction: %ctpop
+; SSE2: Found an estimated cost of 20 for instruction: %ctpop
+; SSE42: Found an estimated cost of 12 for instruction: %ctpop
+; AVX1: Found an estimated cost of 12 for instruction: %ctpop
+; AVX2: Found an estimated cost of 6 for instruction: %ctpop
%ctpop = call <32 x i8> @llvm.ctpop.v32i8(<32 x i8> %a)
ret <32 x i8> %ctpop
}
@@ -205,144 +209,152 @@ declare <32 x i8> @llvm.ctlz.v32i8(<32 x i8>, i1)
define <2 x i64> @var_ctlz_v2i64(<2 x i64> %a) {
; CHECK: 'Cost Model Analysis' for function 'var_ctlz_v2i64':
-; SSE: Found an estimated cost of 6 for instruction: %ctlz
-; AVX: Found an estimated cost of 6 for instruction: %ctlz
-; XOP: Found an estimated cost of 6 for instruction: %ctlz
+; SSE2: Found an estimated cost of 25 for instruction: %ctlz
+; SSE42: Found an estimated cost of 23 for instruction: %ctlz
+; AVX: Found an estimated cost of 23 for instruction: %ctlz
%ctlz = call <2 x i64> @llvm.ctlz.v2i64(<2 x i64> %a, i1 0)
ret <2 x i64> %ctlz
}
define <2 x i64> @var_ctlz_v2i64u(<2 x i64> %a) {
; CHECK: 'Cost Model Analysis' for function 'var_ctlz_v2i64u':
-; SSE: Found an estimated cost of 6 for instruction: %ctlz
-; AVX: Found an estimated cost of 6 for instruction: %ctlz
-; XOP: Found an estimated cost of 6 for instruction: %ctlz
+; SSE2: Found an estimated cost of 25 for instruction: %ctlz
+; SSE42: Found an estimated cost of 23 for instruction: %ctlz
+; AVX: Found an estimated cost of 23 for instruction: %ctlz
%ctlz = call <2 x i64> @llvm.ctlz.v2i64(<2 x i64> %a, i1 1)
ret <2 x i64> %ctlz
}
define <4 x i64> @var_ctlz_v4i64(<4 x i64> %a) {
; CHECK: 'Cost Model Analysis' for function 'var_ctlz_v4i64':
-; SSE: Found an estimated cost of 12 for instruction: %ctlz
-; AVX: Found an estimated cost of 12 for instruction: %ctlz
-; XOP: Found an estimated cost of 12 for instruction: %ctlz
+; SSE2: Found an estimated cost of 50 for instruction: %ctlz
+; SSE42: Found an estimated cost of 46 for instruction: %ctlz
+; AVX1: Found an estimated cost of 46 for instruction: %ctlz
+; AVX2: Found an estimated cost of 23 for instruction: %ctlz
%ctlz = call <4 x i64> @llvm.ctlz.v4i64(<4 x i64> %a, i1 0)
ret <4 x i64> %ctlz
}
define <4 x i64> @var_ctlz_v4i64u(<4 x i64> %a) {
; CHECK: 'Cost Model Analysis' for function 'var_ctlz_v4i64u':
-; SSE: Found an estimated cost of 12 for instruction: %ctlz
-; AVX: Found an estimated cost of 12 for instruction: %ctlz
-; XOP: Found an estimated cost of 12 for instruction: %ctlz
+; SSE2: Found an estimated cost of 50 for instruction: %ctlz
+; SSE42: Found an estimated cost of 46 for instruction: %ctlz
+; AVX1: Found an estimated cost of 46 for instruction: %ctlz
+; AVX2: Found an estimated cost of 23 for instruction: %ctlz
%ctlz = call <4 x i64> @llvm.ctlz.v4i64(<4 x i64> %a, i1 1)
ret <4 x i64> %ctlz
}
define <4 x i32> @var_ctlz_v4i32(<4 x i32> %a) {
; CHECK: 'Cost Model Analysis' for function 'var_ctlz_v4i32':
-; SSE: Found an estimated cost of 12 for instruction: %ctlz
-; AVX: Found an estimated cost of 12 for instruction: %ctlz
-; XOP: Found an estimated cost of 12 for instruction: %ctlz
+; SSE2: Found an estimated cost of 26 for instruction: %ctlz
+; SSE42: Found an estimated cost of 18 for instruction: %ctlz
+; AVX: Found an estimated cost of 18 for instruction: %ctlz
%ctlz = call <4 x i32> @llvm.ctlz.v4i32(<4 x i32> %a, i1 0)
ret <4 x i32> %ctlz
}
define <4 x i32> @var_ctlz_v4i32u(<4 x i32> %a) {
; CHECK: 'Cost Model Analysis' for function 'var_ctlz_v4i32u':
-; SSE: Found an estimated cost of 12 for instruction: %ctlz
-; AVX: Found an estimated cost of 12 for instruction: %ctlz
-; XOP: Found an estimated cost of 12 for instruction: %ctlz
+; SSE2: Found an estimated cost of 26 for instruction: %ctlz
+; SSE42: Found an estimated cost of 18 for instruction: %ctlz
+; AVX: Found an estimated cost of 18 for instruction: %ctlz
%ctlz = call <4 x i32> @llvm.ctlz.v4i32(<4 x i32> %a, i1 1)
ret <4 x i32> %ctlz
}
define <8 x i32> @var_ctlz_v8i32(<8 x i32> %a) {
; CHECK: 'Cost Model Analysis' for function 'var_ctlz_v8i32':
-; SSE: Found an estimated cost of 24 for instruction: %ctlz
-; AVX: Found an estimated cost of 24 for instruction: %ctlz
-; XOP: Found an estimated cost of 24 for instruction: %ctlz
+; SSE2: Found an estimated cost of 52 for instruction: %ctlz
+; SSE42: Found an estimated cost of 36 for instruction: %ctlz
+; AVX1: Found an estimated cost of 36 for instruction: %ctlz
+; AVX2: Found an estimated cost of 18 for instruction: %ctlz
%ctlz = call <8 x i32> @llvm.ctlz.v8i32(<8 x i32> %a, i1 0)
ret <8 x i32> %ctlz
}
define <8 x i32> @var_ctlz_v8i32u(<8 x i32> %a) {
; CHECK: 'Cost Model Analysis' for function 'var_ctlz_v8i32u':
-; SSE: Found an estimated cost of 24 for instruction: %ctlz
-; AVX: Found an estimated cost of 24 for instruction: %ctlz
-; XOP: Found an estimated cost of 24 for instruction: %ctlz
+; SSE2: Found an estimated cost of 52 for instruction: %ctlz
+; SSE42: Found an estimated cost of 36 for instruction: %ctlz
+; AVX1: Found an estimated cost of 36 for instruction: %ctlz
+; AVX2: Found an estimated cost of 18 for instruction: %ctlz
%ctlz = call <8 x i32> @llvm.ctlz.v8i32(<8 x i32> %a, i1 1)
ret <8 x i32> %ctlz
}
define <8 x i16> @var_ctlz_v8i16(<8 x i16> %a) {
; CHECK: 'Cost Model Analysis' for function 'var_ctlz_v8i16':
-; SSE: Found an estimated cost of 24 for instruction: %ctlz
-; AVX: Found an estimated cost of 24 for instruction: %ctlz
-; XOP: Found an estimated cost of 24 for instruction: %ctlz
+; SSE2: Found an estimated cost of 20 for instruction: %ctlz
+; SSE42: Found an estimated cost of 14 for instruction: %ctlz
+; AVX: Found an estimated cost of 14 for instruction: %ctlz
%ctlz = call <8 x i16> @llvm.ctlz.v8i16(<8 x i16> %a, i1 0)
ret <8 x i16> %ctlz
}
define <8 x i16> @var_ctlz_v8i16u(<8 x i16> %a) {
; CHECK: 'Cost Model Analysis' for function 'var_ctlz_v8i16u':
-; SSE: Found an estimated cost of 24 for instruction: %ctlz
-; AVX: Found an estimated cost of 24 for instruction: %ctlz
-; XOP: Found an estimated cost of 24 for instruction: %ctlz
+; SSE2: Found an estimated cost of 20 for instruction: %ctlz
+; SSE42: Found an estimated cost of 14 for instruction: %ctlz
+; AVX: Found an estimated cost of 14 for instruction: %ctlz
%ctlz = call <8 x i16> @llvm.ctlz.v8i16(<8 x i16> %a, i1 1)
ret <8 x i16> %ctlz
}
define <16 x i16> @var_ctlz_v16i16(<16 x i16> %a) {
; CHECK: 'Cost Model Analysis' for function 'var_ctlz_v16i16':
-; SSE: Found an estimated cost of 48 for instruction: %ctlz
-; AVX: Found an estimated cost of 48 for instruction: %ctlz
-; XOP: Found an estimated cost of 48 for instruction: %ctlz
+; SSE2: Found an estimated cost of 40 for instruction: %ctlz
+; SSE42: Found an estimated cost of 28 for instruction: %ctlz
+; AVX1: Found an estimated cost of 28 for instruction: %ctlz
+; AVX2: Found an estimated cost of 14 for instruction: %ctlz
%ctlz = call <16 x i16> @llvm.ctlz.v16i16(<16 x i16> %a, i1 0)
ret <16 x i16> %ctlz
}
define <16 x i16> @var_ctlz_v16i16u(<16 x i16> %a) {
; CHECK: 'Cost Model Analysis' for function 'var_ctlz_v16i16u':
-; SSE: Found an estimated cost of 48 for instruction: %ctlz
-; AVX: Found an estimated cost of 48 for instruction: %ctlz
-; XOP: Found an estimated cost of 48 for instruction: %ctlz
+; SSE2: Found an estimated cost of 40 for instruction: %ctlz
+; SSE42: Found an estimated cost of 28 for instruction: %ctlz
+; AVX1: Found an estimated cost of 28 for instruction: %ctlz
+; AVX2: Found an estimated cost of 14 for instruction: %ctlz
%ctlz = call <16 x i16> @llvm.ctlz.v16i16(<16 x i16> %a, i1 1)
ret <16 x i16> %ctlz
}
define <16 x i8> @var_ctlz_v16i8(<16 x i8> %a) {
; CHECK: 'Cost Model Analysis' for function 'var_ctlz_v16i8':
-; SSE: Found an estimated cost of 48 for instruction: %ctlz
-; AVX: Found an estimated cost of 48 for instruction: %ctlz
-; XOP: Found an estimated cost of 48 for instruction: %ctlz
+; SSE2: Found an estimated cost of 17 for instruction: %ctlz
+; SSE42: Found an estimated cost of 9 for instruction: %ctlz
+; AVX: Found an estimated cost of 9 for instruction: %ctlz
%ctlz = call <16 x i8> @llvm.ctlz.v16i8(<16 x i8> %a, i1 0)
ret <16 x i8> %ctlz
}
define <16 x i8> @var_ctlz_v16i8u(<16 x i8> %a) {
; CHECK: 'Cost Model Analysis' for function 'var_ctlz_v16i8u':
-; SSE: Found an estimated cost of 48 for instruction: %ctlz
-; AVX: Found an estimated cost of 48 for instruction: %ctlz
-; XOP: Found an estimated cost of 48 for instruction: %ctlz
+; SSE2: Found an estimated cost of 17 for instruction: %ctlz
+; SSE42: Found an estimated cost of 9 for instruction: %ctlz
+; AVX: Found an estimated cost of 9 for instruction: %ctlz
%ctlz = call <16 x i8> @llvm.ctlz.v16i8(<16 x i8> %a, i1 1)
ret <16 x i8> %ctlz
}
define <32 x i8> @var_ctlz_v32i8(<32 x i8> %a) {
; CHECK: 'Cost Model Analysis' for function 'var_ctlz_v32i8':
-; SSE: Found an estimated cost of 96 for instruction: %ctlz
-; AVX: Found an estimated cost of 96 for instruction: %ctlz
-; XOP: Found an estimated cost of 96 for instruction: %ctlz
+; SSE2: Found an estimated cost of 34 for instruction: %ctlz
+; SSE42: Found an estimated cost of 18 for instruction: %ctlz
+; AVX1: Found an estimated cost of 18 for instruction: %ctlz
+; AVX2: Found an estimated cost of 9 for instruction: %ctlz
%ctlz = call <32 x i8> @llvm.ctlz.v32i8(<32 x i8> %a, i1 0)
ret <32 x i8> %ctlz
}
define <32 x i8> @var_ctlz_v32i8u(<32 x i8> %a) {
; CHECK: 'Cost Model Analysis' for function 'var_ctlz_v32i8u':
-; SSE: Found an estimated cost of 96 for instruction: %ctlz
-; AVX: Found an estimated cost of 96 for instruction: %ctlz
-; XOP: Found an estimated cost of 96 for instruction: %ctlz
+; SSE2: Found an estimated cost of 34 for instruction: %ctlz
+; SSE42: Found an estimated cost of 18 for instruction: %ctlz
+; AVX1: Found an estimated cost of 18 for instruction: %ctlz
+; AVX2: Found an estimated cost of 9 for instruction: %ctlz
%ctlz = call <32 x i8> @llvm.ctlz.v32i8(<32 x i8> %a, i1 1)
ret <32 x i8> %ctlz
}
@@ -424,144 +436,152 @@ declare <32 x i8> @llvm.cttz.v32i8(<32 x i8>, i1)
define <2 x i64> @var_cttz_v2i64(<2 x i64> %a) {
; CHECK: 'Cost Model Analysis' for function 'var_cttz_v2i64':
-; SSE: Found an estimated cost of 6 for instruction: %cttz
-; AVX: Found an estimated cost of 6 for instruction: %cttz
-; XOP: Found an estimated cost of 6 for instruction: %cttz
+; SSE2: Found an estimated cost of 14 for instruction: %cttz
+; SSE42: Found an estimated cost of 10 for instruction: %cttz
+; AVX: Found an estimated cost of 10 for instruction: %cttz
%cttz = call <2 x i64> @llvm.cttz.v2i64(<2 x i64> %a, i1 0)
ret <2 x i64> %cttz
}
define <2 x i64> @var_cttz_v2i64u(<2 x i64> %a) {
; CHECK: 'Cost Model Analysis' for function 'var_cttz_v2i64u':
-; SSE: Found an estimated cost of 6 for instruction: %cttz
-; AVX: Found an estimated cost of 6 for instruction: %cttz
-; XOP: Found an estimated cost of 6 for instruction: %cttz
+; SSE2: Found an estimated cost of 14 for instruction: %cttz
+; SSE42: Found an estimated cost of 10 for instruction: %cttz
+; AVX: Found an estimated cost of 10 for instruction: %cttz
%cttz = call <2 x i64> @llvm.cttz.v2i64(<2 x i64> %a, i1 1)
ret <2 x i64> %cttz
}
define <4 x i64> @var_cttz_v4i64(<4 x i64> %a) {
; CHECK: 'Cost Model Analysis' for function 'var_cttz_v4i64':
-; SSE: Found an estimated cost of 12 for instruction: %cttz
-; AVX: Found an estimated cost of 12 for instruction: %cttz
-; XOP: Found an estimated cost of 12 for instruction: %cttz
+; SSE2: Found an estimated cost of 28 for instruction: %cttz
+; SSE42: Found an estimated cost of 20 for instruction: %cttz
+; AVX1: Found an estimated cost of 20 for instruction: %cttz
+; AVX2: Found an estimated cost of 10 for instruction: %cttz
%cttz = call <4 x i64> @llvm.cttz.v4i64(<4 x i64> %a, i1 0)
ret <4 x i64> %cttz
}
define <4 x i64> @var_cttz_v4i64u(<4 x i64> %a) {
; CHECK: 'Cost Model Analysis' for function 'var_cttz_v4i64u':
-; SSE: Found an estimated cost of 12 for instruction: %cttz
-; AVX: Found an estimated cost of 12 for instruction: %cttz
-; XOP: Found an estimated cost of 12 for instruction: %cttz
+; SSE2: Found an estimated cost of 28 for instruction: %cttz
+; SSE42: Found an estimated cost of 20 for instruction: %cttz
+; AVX1: Found an estimated cost of 20 for instruction: %cttz
+; AVX2: Found an estimated cost of 10 for instruction: %cttz
%cttz = call <4 x i64> @llvm.cttz.v4i64(<4 x i64> %a, i1 1)
ret <4 x i64> %cttz
}
define <4 x i32> @var_cttz_v4i32(<4 x i32> %a) {
; CHECK: 'Cost Model Analysis' for function 'var_cttz_v4i32':
-; SSE: Found an estimated cost of 12 for instruction: %cttz
-; AVX: Found an estimated cost of 12 for instruction: %cttz
-; XOP: Found an estimated cost of 12 for instruction: %cttz
+; SSE2: Found an estimated cost of 18 for instruction: %cttz
+; SSE42: Found an estimated cost of 14 for instruction: %cttz
+; AVX: Found an estimated cost of 14 for instruction: %cttz
%cttz = call <4 x i32> @llvm.cttz.v4i32(<4 x i32> %a, i1 0)
ret <4 x i32> %cttz
}
define <4 x i32> @var_cttz_v4i32u(<4 x i32> %a) {
; CHECK: 'Cost Model Analysis' for function 'var_cttz_v4i32u':
-; SSE: Found an estimated cost of 12 for instruction: %cttz
-; AVX: Found an estimated cost of 12 for instruction: %cttz
-; XOP: Found an estimated cost of 12 for instruction: %cttz
+; SSE2: Found an estimated cost of 18 for instruction: %cttz
+; SSE42: Found an estimated cost of 14 for instruction: %cttz
+; AVX: Found an estimated cost of 14 for instruction: %cttz
%cttz = call <4 x i32> @llvm.cttz.v4i32(<4 x i32> %a, i1 1)
ret <4 x i32> %cttz
}
define <8 x i32> @var_cttz_v8i32(<8 x i32> %a) {
; CHECK: 'Cost Model Analysis' for function 'var_cttz_v8i32':
-; SSE: Found an estimated cost of 24 for instruction: %cttz
-; AVX: Found an estimated cost of 24 for instruction: %cttz
-; XOP: Found an estimated cost of 24 for instruction: %cttz
+; SSE2: Found an estimated cost of 36 for instruction: %cttz
+; SSE42: Found an estimated cost of 28 for instruction: %cttz
+; AVX1: Found an estimated cost of 28 for instruction: %cttz
+; AVX2: Found an estimated cost of 14 for instruction: %cttz
%cttz = call <8 x i32> @llvm.cttz.v8i32(<8 x i32> %a, i1 0)
ret <8 x i32> %cttz
}
define <8 x i32> @var_cttz_v8i32u(<8 x i32> %a) {
; CHECK: 'Cost Model Analysis' for function 'var_cttz_v8i32u':
-; SSE: Found an estimated cost of 24 for instruction: %cttz
-; AVX: Found an estimated cost of 24 for instruction: %cttz
-; XOP: Found an estimated cost of 24 for instruction: %cttz
+; SSE2: Found an estimated cost of 36 for instruction: %cttz
+; SSE42: Found an estimated cost of 28 for instruction: %cttz
+; AVX1: Found an estimated cost of 28 for instruction: %cttz
+; AVX2: Found an estimated cost of 14 for instruction: %cttz
%cttz = call <8 x i32> @llvm.cttz.v8i32(<8 x i32> %a, i1 1)
ret <8 x i32> %cttz
}
define <8 x i16> @var_cttz_v8i16(<8 x i16> %a) {
; CHECK: 'Cost Model Analysis' for function 'var_cttz_v8i16':
-; SSE: Found an estimated cost of 24 for instruction: %cttz
-; AVX: Found an estimated cost of 24 for instruction: %cttz
-; XOP: Found an estimated cost of 24 for instruction: %cttz
+; SSE2: Found an estimated cost of 16 for instruction: %cttz
+; SSE42: Found an estimated cost of 12 for instruction: %cttz
+; AVX: Found an estimated cost of 12 for instruction: %cttz
%cttz = call <8 x i16> @llvm.cttz.v8i16(<8 x i16> %a, i1 0)
ret <8 x i16> %cttz
}
define <8 x i16> @var_cttz_v8i16u(<8 x i16> %a) {
; CHECK: 'Cost Model Analysis' for function 'var_cttz_v8i16u':
-; SSE: Found an estimated cost of 24 for instruction: %cttz
-; AVX: Found an estimated cost of 24 for instruction: %cttz
-; XOP: Found an estimated cost of 24 for instruction: %cttz
+; SSE2: Found an estimated cost of 16 for instruction: %cttz
+; SSE42: Found an estimated cost of 12 for instruction: %cttz
+; AVX: Found an estimated cost of 12 for instruction: %cttz
%cttz = call <8 x i16> @llvm.cttz.v8i16(<8 x i16> %a, i1 1)
ret <8 x i16> %cttz
}
define <16 x i16> @var_cttz_v16i16(<16 x i16> %a) {
; CHECK: 'Cost Model Analysis' for function 'var_cttz_v16i16':
-; SSE: Found an estimated cost of 48 for instruction: %cttz
-; AVX: Found an estimated cost of 48 for instruction: %cttz
-; XOP: Found an estimated cost of 48 for instruction: %cttz
+; SSE2: Found an estimated cost of 32 for instruction: %cttz
+; SSE42: Found an estimated cost of 24 for instruction: %cttz
+; AVX1: Found an estimated cost of 24 for instruction: %cttz
+; AVX2: Found an estimated cost of 12 for instruction: %cttz
%cttz = call <16 x i16> @llvm.cttz.v16i16(<16 x i16> %a, i1 0)
ret <16 x i16> %cttz
}
define <16 x i16> @var_cttz_v16i16u(<16 x i16> %a) {
; CHECK: 'Cost Model Analysis' for function 'var_cttz_v16i16u':
-; SSE: Found an estimated cost of 48 for instruction: %cttz
-; AVX: Found an estimated cost of 48 for instruction: %cttz
-; XOP: Found an estimated cost of 48 for instruction: %cttz
+; SSE2: Found an estimated cost of 32 for instruction: %cttz
+; SSE42: Found an estimated cost of 24 for instruction: %cttz
+; AVX1: Found an estimated cost of 24 for instruction: %cttz
+; AVX2: Found an estimated cost of 12 for instruction: %cttz
%cttz = call <16 x i16> @llvm.cttz.v16i16(<16 x i16> %a, i1 1)
ret <16 x i16> %cttz
}
define <16 x i8> @var_cttz_v16i8(<16 x i8> %a) {
; CHECK: 'Cost Model Analysis' for function 'var_cttz_v16i8':
-; SSE: Found an estimated cost of 48 for instruction: %cttz
-; AVX: Found an estimated cost of 48 for instruction: %cttz
-; XOP: Found an estimated cost of 48 for instruction: %cttz
+; SSE2: Found an estimated cost of 13 for instruction: %cttz
+; SSE42: Found an estimated cost of 9 for instruction: %cttz
+; AVX: Found an estimated cost of 9 for instruction: %cttz
%cttz = call <16 x i8> @llvm.cttz.v16i8(<16 x i8> %a, i1 0)
ret <16 x i8> %cttz
}
define <16 x i8> @var_cttz_v16i8u(<16 x i8> %a) {
; CHECK: 'Cost Model Analysis' for function 'var_cttz_v16i8u':
-; SSE: Found an estimated cost of 48 for instruction: %cttz
-; AVX: Found an estimated cost of 48 for instruction: %cttz
-; XOP: Found an estimated cost of 48 for instruction: %cttz
+; SSE2: Found an estimated cost of 13 for instruction: %cttz
+; SSE42: Found an estimated cost of 9 for instruction: %cttz
+; AVX: Found an estimated cost of 9 for instruction: %cttz
%cttz = call <16 x i8> @llvm.cttz.v16i8(<16 x i8> %a, i1 1)
ret <16 x i8> %cttz
}
define <32 x i8> @var_cttz_v32i8(<32 x i8> %a) {
; CHECK: 'Cost Model Analysis' for function 'var_cttz_v32i8':
-; SSE: Found an estimated cost of 96 for instruction: %cttz
-; AVX: Found an estimated cost of 96 for instruction: %cttz
-; XOP: Found an estimated cost of 96 for instruction: %cttz
+; SSE2: Found an estimated cost of 26 for instruction: %cttz
+; SSE42: Found an estimated cost of 18 for instruction: %cttz
+; AVX1: Found an estimated cost of 18 for instruction: %cttz
+; AVX2: Found an estimated cost of 9 for instruction: %cttz
%cttz = call <32 x i8> @llvm.cttz.v32i8(<32 x i8> %a, i1 0)
ret <32 x i8> %cttz
}
define <32 x i8> @var_cttz_v32i8u(<32 x i8> %a) {
; CHECK: 'Cost Model Analysis' for function 'var_cttz_v32i8u':
-; SSE: Found an estimated cost of 96 for instruction: %cttz
-; AVX: Found an estimated cost of 96 for instruction: %cttz
-; XOP: Found an estimated cost of 96 for instruction: %cttz
+; SSE2: Found an estimated cost of 26 for instruction: %cttz
+; SSE42: Found an estimated cost of 18 for instruction: %cttz
+; AVX1: Found an estimated cost of 18 for instruction: %cttz
+; AVX2: Found an estimated cost of 9 for instruction: %cttz
%cttz = call <32 x i8> @llvm.cttz.v32i8(<32 x i8> %a, i1 1)
ret <32 x i8> %cttz
}
diff --git a/test/Analysis/CostModel/X86/div.ll b/test/Analysis/CostModel/X86/div.ll
index c7d6517c7f03..0ac06ff75ebe 100644
--- a/test/Analysis/CostModel/X86/div.ll
+++ b/test/Analysis/CostModel/X86/div.ll
@@ -1,32 +1,376 @@
-; RUN: opt -mtriple=x86_64-apple-darwin -mcpu=core2 -cost-model -analyze < %s | FileCheck --check-prefix=SSE2 %s
-; RUN: opt -mtriple=x86_64-apple-darwin -mcpu=core-avx2 -cost-model -analyze < %s | FileCheck --check-prefix=AVX2 %s
-
-
-define void @div_sse() {
- ; SSE2: div_sse
- ; SSE2: cost of 320 {{.*}} sdiv
- %a0 = sdiv <16 x i8> undef, undef
- ; SSE2: cost of 160 {{.*}} sdiv
- %a1 = sdiv <8 x i16> undef, undef
- ; SSE2: cost of 80 {{.*}} sdiv
- %a2 = sdiv <4 x i32> undef, undef
- ; SSE2: cost of 40 {{.*}} sdiv
- %a3 = sdiv <2 x i32> undef, undef
- ret void
+; RUN: opt < %s -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mattr=+sse2 | FileCheck %s --check-prefix=CHECK --check-prefix=SSE --check-prefix=SSE2
+; RUN: opt < %s -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mattr=+ssse3 | FileCheck %s --check-prefix=CHECK --check-prefix=SSE --check-prefix=SSSE3
+; RUN: opt < %s -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mattr=+sse4.2 | FileCheck %s --check-prefix=CHECK --check-prefix=SSE --check-prefix=SSE42
+; RUN: opt < %s -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mattr=+avx | FileCheck %s --check-prefix=CHECK --check-prefix=AVX --check-prefix=AVX1
+; RUN: opt < %s -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mattr=+avx2 | FileCheck %s --check-prefix=CHECK --check-prefix=AVX --check-prefix=AVX2
+; RUN: opt < %s -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mattr=+avx512f | FileCheck %s --check-prefix=CHECK --check-prefix=AVX512 --check-prefix=AVX512F
+; RUN: opt < %s -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mattr=+avx512f,+avx512bw | FileCheck %s --check-prefix=CHECK --check-prefix=AVX512 --check-prefix=AVX512BW
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-apple-macosx10.8.0"
+
+; CHECK-LABEL: 'sdiv'
+define i32 @sdiv() {
+ ; CHECK: cost of 1 {{.*}} %I64 = sdiv
+ %I64 = sdiv i64 undef, undef
+ ; SSE: cost of 40 {{.*}} %V2i64 = sdiv
+ ; AVX: cost of 40 {{.*}} %V2i64 = sdiv
+ %V2i64 = sdiv <2 x i64> undef, undef
+ ; SSE: cost of 80 {{.*}} %V4i64 = sdiv
+ ; AVX: cost of 80 {{.*}} %V4i64 = sdiv
+ %V4i64 = sdiv <4 x i64> undef, undef
+ ; SSE: cost of 160 {{.*}} %V8i64 = sdiv
+ ; AVX: cost of 160 {{.*}} %V8i64 = sdiv
+ %V8i64 = sdiv <8 x i64> undef, undef
+
+ ; CHECK: cost of 1 {{.*}} %I32 = sdiv
+ %I32 = sdiv i32 undef, undef
+ ; SSE: cost of 80 {{.*}} %V4i32 = sdiv
+ ; AVX: cost of 80 {{.*}} %V4i32 = sdiv
+ %V4i32 = sdiv <4 x i32> undef, undef
+ ; SSE: cost of 160 {{.*}} %V8i32 = sdiv
+ ; AVX: cost of 160 {{.*}} %V8i32 = sdiv
+ %V8i32 = sdiv <8 x i32> undef, undef
+ ; SSE: cost of 320 {{.*}} %V16i32 = sdiv
+ ; AVX: cost of 320 {{.*}} %V16i32 = sdiv
+ %V16i32 = sdiv <16 x i32> undef, undef
+
+ ; CHECK: cost of 1 {{.*}} %I16 = sdiv
+ %I16 = sdiv i16 undef, undef
+ ; SSE: cost of 160 {{.*}} %V8i16 = sdiv
+ ; AVX: cost of 160 {{.*}} %V8i16 = sdiv
+ %V8i16 = sdiv <8 x i16> undef, undef
+ ; SSE: cost of 320 {{.*}} %V16i16 = sdiv
+ ; AVX: cost of 320 {{.*}} %V16i16 = sdiv
+ %V16i16 = sdiv <16 x i16> undef, undef
+ ; SSE: cost of 640 {{.*}} %V32i16 = sdiv
+ ; AVX: cost of 640 {{.*}} %V32i16 = sdiv
+ %V32i16 = sdiv <32 x i16> undef, undef
+
+ ; CHECK: cost of 1 {{.*}} %I8 = sdiv
+ %I8 = sdiv i8 undef, undef
+ ; SSE: cost of 320 {{.*}} %V16i8 = sdiv
+ ; AVX: cost of 320 {{.*}} %V16i8 = sdiv
+ %V16i8 = sdiv <16 x i8> undef, undef
+ ; SSE: cost of 640 {{.*}} %V32i8 = sdiv
+ ; AVX: cost of 640 {{.*}} %V32i8 = sdiv
+ %V32i8 = sdiv <32 x i8> undef, undef
+ ; SSE: cost of 1280 {{.*}} %V64i8 = sdiv
+ ; AVX: cost of 1280 {{.*}} %V64i8 = sdiv
+ %V64i8 = sdiv <64 x i8> undef, undef
+
+ ret i32 undef
}
-; SSE2: div_avx
-
-define void @div_avx() {
- ; AVX2: div_avx
- ; AVX2: cost of 640 {{.*}} sdiv
- %a0 = sdiv <32 x i8> undef, undef
- ; AVX2: cost of 320 {{.*}} sdiv
- %a1 = sdiv <16 x i16> undef, undef
- ; AVX2: cost of 160 {{.*}} sdiv
- %a2 = sdiv <8 x i32> undef, undef
- ; AVX2: cost of 80 {{.*}} sdiv
- %a3 = sdiv <4 x i32> undef, undef
- ret void
+
+; CHECK-LABEL: 'udiv'
+define i32 @udiv() {
+ ; CHECK: cost of 1 {{.*}} %I64 = udiv
+ %I64 = udiv i64 undef, undef
+ ; SSE: cost of 40 {{.*}} %V2i64 = udiv
+ ; AVX: cost of 40 {{.*}} %V2i64 = udiv
+ %V2i64 = udiv <2 x i64> undef, undef
+ ; SSE: cost of 80 {{.*}} %V4i64 = udiv
+ ; AVX: cost of 80 {{.*}} %V4i64 = udiv
+ %V4i64 = udiv <4 x i64> undef, undef
+ ; SSE: cost of 160 {{.*}} %V8i64 = udiv
+ ; AVX: cost of 160 {{.*}} %V8i64 = udiv
+ %V8i64 = udiv <8 x i64> undef, undef
+
+ ; CHECK: cost of 1 {{.*}} %I32 = udiv
+ %I32 = udiv i32 undef, undef
+ ; SSE: cost of 80 {{.*}} %V4i32 = udiv
+ ; AVX: cost of 80 {{.*}} %V4i32 = udiv
+ %V4i32 = udiv <4 x i32> undef, undef
+ ; SSE: cost of 160 {{.*}} %V8i32 = udiv
+ ; AVX: cost of 160 {{.*}} %V8i32 = udiv
+ %V8i32 = udiv <8 x i32> undef, undef
+ ; SSE: cost of 320 {{.*}} %V16i32 = udiv
+ ; AVX: cost of 320 {{.*}} %V16i32 = udiv
+ %V16i32 = udiv <16 x i32> undef, undef
+
+ ; CHECK: cost of 1 {{.*}} %I16 = udiv
+ %I16 = udiv i16 undef, undef
+ ; SSE: cost of 160 {{.*}} %V8i16 = udiv
+ ; AVX: cost of 160 {{.*}} %V8i16 = udiv
+ %V8i16 = udiv <8 x i16> undef, undef
+ ; SSE: cost of 320 {{.*}} %V16i16 = udiv
+ ; AVX: cost of 320 {{.*}} %V16i16 = udiv
+ %V16i16 = udiv <16 x i16> undef, undef
+ ; SSE: cost of 640 {{.*}} %V32i16 = udiv
+ ; AVX: cost of 640 {{.*}} %V32i16 = udiv
+ %V32i16 = udiv <32 x i16> undef, undef
+
+ ; CHECK: cost of 1 {{.*}} %I8 = udiv
+ %I8 = udiv i8 undef, undef
+ ; SSE: cost of 320 {{.*}} %V16i8 = udiv
+ ; AVX: cost of 320 {{.*}} %V16i8 = udiv
+ %V16i8 = udiv <16 x i8> undef, undef
+ ; SSE: cost of 640 {{.*}} %V32i8 = udiv
+ ; AVX: cost of 640 {{.*}} %V32i8 = udiv
+ %V32i8 = udiv <32 x i8> undef, undef
+ ; SSE: cost of 1280 {{.*}} %V64i8 = udiv
+ ; AVX: cost of 1280 {{.*}} %V64i8 = udiv
+ %V64i8 = udiv <64 x i8> undef, undef
+
+ ret i32 undef
+}
+
+; CHECK-LABEL: 'sdiv_uniformconst'
+define i32 @sdiv_uniformconst() {
+ ; CHECK: cost of 1 {{.*}} %I64 = sdiv
+ %I64 = sdiv i64 undef, 7
+ ; SSE: cost of 40 {{.*}} %V2i64 = sdiv
+ ; AVX: cost of 40 {{.*}} %V2i64 = sdiv
+ %V2i64 = sdiv <2 x i64> undef, <i64 7, i64 7>
+ ; SSE: cost of 80 {{.*}} %V4i64 = sdiv
+ ; AVX: cost of 80 {{.*}} %V4i64 = sdiv
+ %V4i64 = sdiv <4 x i64> undef, <i64 7, i64 7, i64 7, i64 7>
+ ; SSE: cost of 160 {{.*}} %V8i64 = sdiv
+ ; AVX: cost of 160 {{.*}} %V8i64 = sdiv
+ %V8i64 = sdiv <8 x i64> undef, <i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7>
+
+ ; CHECK: cost of 1 {{.*}} %I32 = sdiv
+ %I32 = sdiv i32 undef, 7
+ ; SSE2: cost of 19 {{.*}} %V4i32 = sdiv
+ ; SSSE3: cost of 19 {{.*}} %V4i32 = sdiv
+ ; SSE42: cost of 15 {{.*}} %V4i32 = sdiv
+ ; AVX: cost of 15 {{.*}} %V4i32 = sdiv
+ %V4i32 = sdiv <4 x i32> undef, <i32 7, i32 7, i32 7, i32 7>
+ ; SSE2: cost of 38 {{.*}} %V8i32 = sdiv
+ ; SSSE3: cost of 38 {{.*}} %V8i32 = sdiv
+ ; SSE42: cost of 30 {{.*}} %V8i32 = sdiv
+ ; AVX1: cost of 30 {{.*}} %V8i32 = sdiv
+ ; AVX2: cost of 15 {{.*}} %V8i32 = sdiv
+ ; AVX512: cost of 15 {{.*}} %V8i32 = sdiv
+ %V8i32 = sdiv <8 x i32> undef, <i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7>
+ ; SSE2: cost of 76 {{.*}} %V16i32 = sdiv
+ ; SSSE3: cost of 76 {{.*}} %V16i32 = sdiv
+ ; SSE42: cost of 60 {{.*}} %V16i32 = sdiv
+ ; AVX1: cost of 60 {{.*}} %V16i32 = sdiv
+ ; AVX2: cost of 30 {{.*}} %V16i32 = sdiv
+ ; AVX512: cost of 15 {{.*}} %V16i32 = sdiv
+ %V16i32 = sdiv <16 x i32> undef, <i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7>
+
+ ; CHECK: cost of 1 {{.*}} %I16 = sdiv
+ %I16 = sdiv i16 undef, 7
+ ; SSE: cost of 6 {{.*}} %V8i16 = sdiv
+ ; AVX: cost of 6 {{.*}} %V8i16 = sdiv
+ %V8i16 = sdiv <8 x i16> undef, <i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7>
+ ; SSE: cost of 12 {{.*}} %V16i16 = sdiv
+ ; AVX1: cost of 12 {{.*}} %V16i16 = sdiv
+ ; AVX2: cost of 6 {{.*}} %V16i16 = sdiv
+ ; AVX512: cost of 6 {{.*}} %V16i16 = sdiv
+ %V16i16 = sdiv <16 x i16> undef, <i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7>
+ ; SSE: cost of 24 {{.*}} %V32i16 = sdiv
+ ; AVX1: cost of 24 {{.*}} %V32i16 = sdiv
+ ; AVX2: cost of 12 {{.*}} %V32i16 = sdiv
+ ; AVX512F: cost of 12 {{.*}} %V32i16 = sdiv
+ ; AVX512BW: cost of 6 {{.*}} %V32i16 = sdiv
+ %V32i16 = sdiv <32 x i16> undef, <i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7>
+
+ ; CHECK: cost of 1 {{.*}} %I8 = sdiv
+ %I8 = sdiv i8 undef, 7
+ ; SSE: cost of 320 {{.*}} %V16i8 = sdiv
+ ; AVX: cost of 320 {{.*}} %V16i8 = sdiv
+ %V16i8 = sdiv <16 x i8> undef, <i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7>
+ ; SSE: cost of 640 {{.*}} %V32i8 = sdiv
+ ; AVX: cost of 640 {{.*}} %V32i8 = sdiv
+ %V32i8 = sdiv <32 x i8> undef, <i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7>
+ ; SSE: cost of 1280 {{.*}} %V64i8 = sdiv
+ ; AVX: cost of 1280 {{.*}} %V64i8 = sdiv
+ %V64i8 = sdiv <64 x i8> undef, <i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7>
+
+ ret i32 undef
}
+; CHECK-LABEL: 'udiv_uniformconst'
+define i32 @udiv_uniformconst() {
+ ; CHECK: cost of 1 {{.*}} %I64 = udiv
+ %I64 = udiv i64 undef, 7
+ ; SSE: cost of 40 {{.*}} %V2i64 = udiv
+ ; AVX: cost of 40 {{.*}} %V2i64 = udiv
+ %V2i64 = udiv <2 x i64> undef, <i64 7, i64 7>
+ ; SSE: cost of 80 {{.*}} %V4i64 = udiv
+ ; AVX: cost of 80 {{.*}} %V4i64 = udiv
+ %V4i64 = udiv <4 x i64> undef, <i64 7, i64 7, i64 7, i64 7>
+ ; SSE: cost of 160 {{.*}} %V8i64 = udiv
+ ; AVX: cost of 160 {{.*}} %V8i64 = udiv
+ %V8i64 = udiv <8 x i64> undef, <i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7>
+ ; CHECK: cost of 1 {{.*}} %I32 = udiv
+ %I32 = udiv i32 undef, 7
+ ; SSE: cost of 15 {{.*}} %V4i32 = udiv
+ ; AVX: cost of 15 {{.*}} %V4i32 = udiv
+ %V4i32 = udiv <4 x i32> undef, <i32 7, i32 7, i32 7, i32 7>
+ ; SSE: cost of 30 {{.*}} %V8i32 = udiv
+ ; AVX1: cost of 30 {{.*}} %V8i32 = udiv
+ ; AVX2: cost of 15 {{.*}} %V8i32 = udiv
+ ; AVX512: cost of 15 {{.*}} %V8i32 = udiv
+ %V8i32 = udiv <8 x i32> undef, <i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7>
+ ; SSE: cost of 60 {{.*}} %V16i32 = udiv
+ ; AVX1: cost of 60 {{.*}} %V16i32 = udiv
+ ; AVX2: cost of 30 {{.*}} %V16i32 = udiv
+ ; AVX512: cost of 15 {{.*}} %V16i32 = udiv
+ %V16i32 = udiv <16 x i32> undef, <i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7>
+
+ ; CHECK: cost of 1 {{.*}} %I16 = udiv
+ %I16 = udiv i16 undef, 7
+ ; SSE: cost of 6 {{.*}} %V8i16 = udiv
+ ; AVX: cost of 6 {{.*}} %V8i16 = udiv
+ %V8i16 = udiv <8 x i16> undef, <i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7>
+ ; SSE: cost of 12 {{.*}} %V16i16 = udiv
+ ; AVX1: cost of 12 {{.*}} %V16i16 = udiv
+ ; AVX2: cost of 6 {{.*}} %V16i16 = udiv
+ ; AVX512: cost of 6 {{.*}} %V16i16 = udiv
+ %V16i16 = udiv <16 x i16> undef, <i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7>
+ ; SSE: cost of 24 {{.*}} %V32i16 = udiv
+ ; AVX1: cost of 24 {{.*}} %V32i16 = udiv
+ ; AVX2: cost of 12 {{.*}} %V32i16 = udiv
+ ; AVX512F: cost of 12 {{.*}} %V32i16 = udiv
+ ; AVX512BW: cost of 6 {{.*}} %V32i16 = udiv
+ %V32i16 = udiv <32 x i16> undef, <i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7>
+
+ ; CHECK: cost of 1 {{.*}} %I8 = udiv
+ %I8 = udiv i8 undef, 7
+ ; SSE: cost of 320 {{.*}} %V16i8 = udiv
+ ; AVX: cost of 320 {{.*}} %V16i8 = udiv
+ %V16i8 = udiv <16 x i8> undef, <i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7>
+ ; SSE: cost of 640 {{.*}} %V32i8 = udiv
+ ; AVX: cost of 640 {{.*}} %V32i8 = udiv
+ %V32i8 = udiv <32 x i8> undef, <i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7>
+ ; SSE: cost of 1280 {{.*}} %V64i8 = udiv
+ ; AVX: cost of 1280 {{.*}} %V64i8 = udiv
+ %V64i8 = udiv <64 x i8> undef, <i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7>
+
+ ret i32 undef
+}
+
+; CHECK-LABEL: 'sdiv_uniformconstpow2'
+define i32 @sdiv_uniformconstpow2() {
+ ; CHECK: cost of 1 {{.*}} %I64 = sdiv
+ %I64 = sdiv i64 undef, 16
+ ; SSE: cost of 40 {{.*}} %V2i64 = sdiv
+ ; AVX: cost of 40 {{.*}} %V2i64 = sdiv
+ %V2i64 = sdiv <2 x i64> undef, <i64 16, i64 16>
+ ; SSE: cost of 80 {{.*}} %V4i64 = sdiv
+ ; AVX: cost of 80 {{.*}} %V4i64 = sdiv
+ %V4i64 = sdiv <4 x i64> undef, <i64 16, i64 16, i64 16, i64 16>
+ ; SSE: cost of 160 {{.*}} %V8i64 = sdiv
+ ; AVX: cost of 160 {{.*}} %V8i64 = sdiv
+ %V8i64 = sdiv <8 x i64> undef, <i64 16, i64 16, i64 16, i64 16, i64 16, i64 16, i64 16, i64 16>
+
+ ; CHECK: cost of 1 {{.*}} %I32 = sdiv
+ %I32 = sdiv i32 undef, 16
+ ; SSE2: cost of 19 {{.*}} %V4i32 = sdiv
+ ; SSSE3: cost of 19 {{.*}} %V4i32 = sdiv
+ ; SSE42: cost of 15 {{.*}} %V4i32 = sdiv
+ ; AVX: cost of 15 {{.*}} %V4i32 = sdiv
+ %V4i32 = sdiv <4 x i32> undef, <i32 16, i32 16, i32 16, i32 16>
+ ; SSE2: cost of 38 {{.*}} %V8i32 = sdiv
+ ; SSSE3: cost of 38 {{.*}} %V8i32 = sdiv
+ ; SSE42: cost of 30 {{.*}} %V8i32 = sdiv
+ ; AVX1: cost of 30 {{.*}} %V8i32 = sdiv
+ ; AVX2: cost of 15 {{.*}} %V8i32 = sdiv
+ ; AVX512: cost of 15 {{.*}} %V8i32 = sdiv
+ %V8i32 = sdiv <8 x i32> undef, <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
+ ; SSE2: cost of 76 {{.*}} %V16i32 = sdiv
+ ; SSSE3: cost of 76 {{.*}} %V16i32 = sdiv
+ ; SSE42: cost of 60 {{.*}} %V16i32 = sdiv
+ ; AVX1: cost of 60 {{.*}} %V16i32 = sdiv
+ ; AVX2: cost of 30 {{.*}} %V16i32 = sdiv
+ ; AVX512: cost of 15 {{.*}} %V16i32 = sdiv
+ %V16i32 = sdiv <16 x i32> undef, <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
+
+ ; CHECK: cost of 1 {{.*}} %I16 = sdiv
+ %I16 = sdiv i16 undef, 16
+ ; SSE: cost of 6 {{.*}} %V8i16 = sdiv
+ ; AVX: cost of 6 {{.*}} %V8i16 = sdiv
+ %V8i16 = sdiv <8 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
+ ; SSE: cost of 12 {{.*}} %V16i16 = sdiv
+ ; AVX1: cost of 12 {{.*}} %V16i16 = sdiv
+ ; AVX2: cost of 6 {{.*}} %V16i16 = sdiv
+ ; AVX512: cost of 6 {{.*}} %V16i16 = sdiv
+ %V16i16 = sdiv <16 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
+ ; SSE: cost of 24 {{.*}} %V32i16 = sdiv
+ ; AVX1: cost of 24 {{.*}} %V32i16 = sdiv
+ ; AVX2: cost of 12 {{.*}} %V32i16 = sdiv
+ ; AVX512F: cost of 12 {{.*}} %V32i16 = sdiv
+ ; AVX512BW: cost of 6 {{.*}} %V32i16 = sdiv
+ %V32i16 = sdiv <32 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
+
+ ; CHECK: cost of 1 {{.*}} %I8 = sdiv
+ %I8 = sdiv i8 undef, 16
+ ; SSE: cost of 320 {{.*}} %V16i8 = sdiv
+ ; AVX: cost of 320 {{.*}} %V16i8 = sdiv
+ %V16i8 = sdiv <16 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
+ ; SSE: cost of 640 {{.*}} %V32i8 = sdiv
+ ; AVX: cost of 640 {{.*}} %V32i8 = sdiv
+ %V32i8 = sdiv <32 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
+ ; SSE: cost of 1280 {{.*}} %V64i8 = sdiv
+ ; AVX: cost of 1280 {{.*}} %V64i8 = sdiv
+ %V64i8 = sdiv <64 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
+
+ ret i32 undef
+}
+
+; CHECK-LABEL: 'udiv_uniformconstpow2'
+define i32 @udiv_uniformconstpow2() {
+ ; CHECK: cost of 1 {{.*}} %I64 = udiv
+ %I64 = udiv i64 undef, 16
+ ; SSE: cost of 40 {{.*}} %V2i64 = udiv
+ ; AVX: cost of 40 {{.*}} %V2i64 = udiv
+ %V2i64 = udiv <2 x i64> undef, <i64 16, i64 16>
+ ; SSE: cost of 80 {{.*}} %V4i64 = udiv
+ ; AVX: cost of 80 {{.*}} %V4i64 = udiv
+ %V4i64 = udiv <4 x i64> undef, <i64 16, i64 16, i64 16, i64 16>
+ ; SSE: cost of 160 {{.*}} %V8i64 = udiv
+ ; AVX: cost of 160 {{.*}} %V8i64 = udiv
+ %V8i64 = udiv <8 x i64> undef, <i64 16, i64 16, i64 16, i64 16, i64 16, i64 16, i64 16, i64 16>
+
+ ; CHECK: cost of 1 {{.*}} %I32 = udiv
+ %I32 = udiv i32 undef, 16
+ ; SSE: cost of 15 {{.*}} %V4i32 = udiv
+ ; AVX: cost of 15 {{.*}} %V4i32 = udiv
+ %V4i32 = udiv <4 x i32> undef, <i32 16, i32 16, i32 16, i32 16>
+ ; SSE: cost of 30 {{.*}} %V8i32 = udiv
+ ; AVX1: cost of 30 {{.*}} %V8i32 = udiv
+ ; AVX2: cost of 15 {{.*}} %V8i32 = udiv
+ ; AVX512: cost of 15 {{.*}} %V8i32 = udiv
+ %V8i32 = udiv <8 x i32> undef, <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
+ ; SSE: cost of 60 {{.*}} %V16i32 = udiv
+ ; AVX1: cost of 60 {{.*}} %V16i32 = udiv
+ ; AVX2: cost of 30 {{.*}} %V16i32 = udiv
+ ; AVX512: cost of 15 {{.*}} %V16i32 = udiv
+ %V16i32 = udiv <16 x i32> undef, <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
+
+ ; CHECK: cost of 1 {{.*}} %I16 = udiv
+ %I16 = udiv i16 undef, 16
+ ; SSE: cost of 6 {{.*}} %V8i16 = udiv
+ ; AVX: cost of 6 {{.*}} %V8i16 = udiv
+ %V8i16 = udiv <8 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
+ ; SSE: cost of 12 {{.*}} %V16i16 = udiv
+ ; AVX1: cost of 12 {{.*}} %V16i16 = udiv
+ ; AVX2: cost of 6 {{.*}} %V16i16 = udiv
+ ; AVX512: cost of 6 {{.*}} %V16i16 = udiv
+ %V16i16 = udiv <16 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
+ ; SSE: cost of 24 {{.*}} %V32i16 = udiv
+ ; AVX1: cost of 24 {{.*}} %V32i16 = udiv
+ ; AVX2: cost of 12 {{.*}} %V32i16 = udiv
+ ; AVX512F: cost of 12 {{.*}} %V32i16 = udiv
+ ; AVX512BW: cost of 6 {{.*}} %V32i16 = udiv
+ %V32i16 = udiv <32 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
+
+ ; CHECK: cost of 1 {{.*}} %I8 = udiv
+ %I8 = udiv i8 undef, 16
+ ; SSE: cost of 320 {{.*}} %V16i8 = udiv
+ ; AVX: cost of 320 {{.*}} %V16i8 = udiv
+ %V16i8 = udiv <16 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
+ ; SSE: cost of 640 {{.*}} %V32i8 = udiv
+ ; AVX: cost of 640 {{.*}} %V32i8 = udiv
+ %V32i8 = udiv <32 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
+ ; SSE: cost of 1280 {{.*}} %V64i8 = udiv
+ ; AVX: cost of 1280 {{.*}} %V64i8 = udiv
+ %V64i8 = udiv <64 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
+
+ ret i32 undef
+}
diff --git a/test/Analysis/CostModel/X86/fptosi.ll b/test/Analysis/CostModel/X86/fptosi.ll
new file mode 100644
index 000000000000..d5e21f8685a7
--- /dev/null
+++ b/test/Analysis/CostModel/X86/fptosi.ll
@@ -0,0 +1,261 @@
+; RUN: opt -mtriple=x86_64-apple-darwin -mattr=+sse2 -cost-model -analyze < %s | FileCheck --check-prefix=SSE --check-prefix=SSE2 %s
+; RUN: opt -mtriple=x86_64-apple-darwin -mattr=+sse4.2 -cost-model -analyze < %s | FileCheck --check-prefix=SSE --check-prefix=SSE42 %s
+; RUN: opt -mtriple=x86_64-apple-darwin -mattr=+avx -cost-model -analyze < %s | FileCheck --check-prefix=AVX --check-prefix=AVX1 %s
+; RUN: opt -mtriple=x86_64-apple-darwin -mattr=+avx2 -cost-model -analyze < %s | FileCheck --check-prefix=AVX --check-prefix=AVX2 %s
+; RUN: opt -mtriple=x86_64-apple-darwin -mattr=+avx512f -cost-model -analyze < %s | FileCheck --check-prefix=AVX512 --check-prefix=AVX512F %s
+; RUN: opt -mtriple=x86_64-apple-darwin -mattr=+avx512f,+avx512dq -cost-model -analyze < %s | FileCheck --check-prefix=AVX512 --check-prefix=AVX512DQ %s
+
+; CHECK-LABEL: 'fptosi_double_i64'
+define i32 @fptosi_double_i64(i32 %arg) {
+ ; SSE2: cost of 1 {{.*}} %I64 = fptosi
+ ; SSE42: cost of 1 {{.*}} %I64 = fptosi
+ ; AVX1: cost of 1 {{.*}} %I64 = fptosi
+ ; AVX2: cost of 1 {{.*}} %I64 = fptosi
+ ; AVX512: cost of 1 {{.*}} %I64 = fptosi
+ %I64 = fptosi double undef to i64
+ ; SSE2: cost of 6 {{.*}} %V2I64 = fptosi
+ ; SSE42: cost of 6 {{.*}} %V2I64 = fptosi
+ ; AVX1: cost of 6 {{.*}} %V2I64 = fptosi
+ ; AVX2: cost of 6 {{.*}} %V2I64 = fptosi
+ ; AVX512F: cost of 6 {{.*}} %V2I64 = fptosi
+ ; AVX512DQ: cost of 1 {{.*}} %V2I64 = fptosi
+ %V2I64 = fptosi <2 x double> undef to <2 x i64>
+ ; SSE2: cost of 13 {{.*}} %V4I64 = fptosi
+ ; SSE42: cost of 13 {{.*}} %V4I64 = fptosi
+ ; AVX1: cost of 12 {{.*}} %V4I64 = fptosi
+ ; AVX2: cost of 12 {{.*}} %V4I64 = fptosi
+ ; AVX512F: cost of 12 {{.*}} %V4I64 = fptosi
+ ; AVX512DQ: cost of 1 {{.*}} %V4I64 = fptosi
+ %V4I64 = fptosi <4 x double> undef to <4 x i64>
+ ; SSE2: cost of 27 {{.*}} %V8I64 = fptosi
+ ; SSE42: cost of 27 {{.*}} %V8I64 = fptosi
+ ; AVX1: cost of 25 {{.*}} %V8I64 = fptosi
+ ; AVX2: cost of 25 {{.*}} %V8I64 = fptosi
+ ; AVX512F: cost of 24 {{.*}} %V8I64 = fptosi
+ ; AVX512DQ: cost of 1 {{.*}} %V8I64 = fptosi
+ %V8I64 = fptosi <8 x double> undef to <8 x i64>
+
+ ret i32 undef
+}
+
+; CHECK-LABEL: 'fptosi_double_i32'
+define i32 @fptosi_double_i32(i32 %arg) {
+ ; SSE2: cost of 1 {{.*}} %I32 = fptosi
+ ; SSE42: cost of 1 {{.*}} %I32 = fptosi
+ ; AVX1: cost of 1 {{.*}} %I32 = fptosi
+ ; AVX2: cost of 1 {{.*}} %I32 = fptosi
+ ; AVX512: cost of 1 {{.*}} %I32 = fptosi
+ %I32 = fptosi double undef to i32
+ ; SSE2: cost of 3 {{.*}} %V2I32 = fptosi
+ ; SSE42: cost of 3 {{.*}} %V2I32 = fptosi
+ ; AVX1: cost of 3 {{.*}} %V2I32 = fptosi
+ ; AVX2: cost of 3 {{.*}} %V2I32 = fptosi
+ ; AVX512: cost of 3 {{.*}} %V2I32 = fptosi
+ %V2I32 = fptosi <2 x double> undef to <2 x i32>
+ ; SSE2: cost of 7 {{.*}} %V4I32 = fptosi
+ ; SSE42: cost of 7 {{.*}} %V4I32 = fptosi
+ ; AVX1: cost of 1 {{.*}} %V4I32 = fptosi
+ ; AVX2: cost of 1 {{.*}} %V4I32 = fptosi
+ ; AVX512: cost of 1 {{.*}} %V4I32 = fptosi
+ %V4I32 = fptosi <4 x double> undef to <4 x i32>
+ ; SSE2: cost of 15 {{.*}} %V8I32 = fptosi
+ ; SSE42: cost of 15 {{.*}} %V8I32 = fptosi
+ ; AVX1: cost of 3 {{.*}} %V8I32 = fptosi
+ ; AVX2: cost of 3 {{.*}} %V8I32 = fptosi
+ ; AVX512: cost of 1 {{.*}} %V8I32 = fptosi
+ %V8I32 = fptosi <8 x double> undef to <8 x i32>
+
+ ret i32 undef
+}
+
+; CHECK-LABEL: 'fptosi_double_i16'
+define i32 @fptosi_double_i16(i32 %arg) {
+ ; SSE2: cost of 1 {{.*}} %I16 = fptosi
+ ; SSE42: cost of 1 {{.*}} %I16 = fptosi
+ ; AVX1: cost of 1 {{.*}} %I16 = fptosi
+ ; AVX2: cost of 1 {{.*}} %I16 = fptosi
+ ; AVX512: cost of 1 {{.*}} %I16 = fptosi
+ %I16 = fptosi double undef to i16
+ ; SSE2: cost of 6 {{.*}} %V2I16 = fptosi
+ ; SSE42: cost of 6 {{.*}} %V2I16 = fptosi
+ ; AVX1: cost of 6 {{.*}} %V2I16 = fptosi
+ ; AVX2: cost of 6 {{.*}} %V2I16 = fptosi
+ ; AVX512F: cost of 6 {{.*}} %V2I16 = fptosi
+ ; AVX512DQ: cost of 1 {{.*}} %V2I16 = fptosi
+ %V2I16 = fptosi <2 x double> undef to <2 x i16>
+ ; SSE2: cost of 13 {{.*}} %V4I16 = fptosi
+ ; SSE42: cost of 13 {{.*}} %V4I16 = fptosi
+ ; AVX1: cost of 1 {{.*}} %V4I16 = fptosi
+ ; AVX2: cost of 1 {{.*}} %V4I16 = fptosi
+ ; AVX512: cost of 1 {{.*}} %V4I16 = fptosi
+ %V4I16 = fptosi <4 x double> undef to <4 x i16>
+ ; SSE2: cost of 27 {{.*}} %V8I16 = fptosi
+ ; SSE42: cost of 27 {{.*}} %V8I16 = fptosi
+ ; AVX1: cost of 3 {{.*}} %V8I16 = fptosi
+ ; AVX2: cost of 3 {{.*}} %V8I16 = fptosi
+ ; AVX512: cost of 1 {{.*}} %V8I16 = fptosi
+ %V8I16 = fptosi <8 x double> undef to <8 x i16>
+
+ ret i32 undef
+}
+
+; CHECK-LABEL: 'fptosi_double_i8'
+define i32 @fptosi_double_i8(i32 %arg) {
+ ; SSE2: cost of 1 {{.*}} %I8 = fptosi
+ ; SSE42: cost of 1 {{.*}} %I8 = fptosi
+ ; AVX1: cost of 1 {{.*}} %I8 = fptosi
+ ; AVX2: cost of 1 {{.*}} %I8 = fptosi
+ ; AVX512: cost of 1 {{.*}} %I8 = fptosi
+ %I8 = fptosi double undef to i8
+ ; SSE2: cost of 6 {{.*}} %V2I8 = fptosi
+ ; SSE42: cost of 6 {{.*}} %V2I8 = fptosi
+ ; AVX1: cost of 6 {{.*}} %V2I8 = fptosi
+ ; AVX2: cost of 6 {{.*}} %V2I8 = fptosi
+ ; AVX512F: cost of 6 {{.*}} %V2I8 = fptosi
+ ; AVX512DQ: cost of 1 {{.*}} %V2I8 = fptosi
+ %V2I8 = fptosi <2 x double> undef to <2 x i8>
+ ; SSE2: cost of 13 {{.*}} %V4I8 = fptosi
+ ; SSE42: cost of 13 {{.*}} %V4I8 = fptosi
+ ; AVX1: cost of 1 {{.*}} %V4I8 = fptosi
+ ; AVX2: cost of 1 {{.*}} %V4I8 = fptosi
+ ; AVX512: cost of 1 {{.*}} %V4I8 = fptosi
+ %V4I8 = fptosi <4 x double> undef to <4 x i8>
+ ; SSE2: cost of 27 {{.*}} %V8I8 = fptosi
+ ; SSE42: cost of 27 {{.*}} %V8I8 = fptosi
+ ; AVX1: cost of 3 {{.*}} %V8I8 = fptosi
+ ; AVX2: cost of 3 {{.*}} %V8I8 = fptosi
+ ; AVX512: cost of 1 {{.*}} %V8I8 = fptosi
+ %V8I8 = fptosi <8 x double> undef to <8 x i8>
+
+ ret i32 undef
+}
+
+; CHECK-LABEL: 'fptosi_float_i64'
+define i32 @fptosi_float_i64(i32 %arg) {
+ ; SSE2: cost of 1 {{.*}} %I64 = fptosi
+ ; SSE42: cost of 1 {{.*}} %I64 = fptosi
+ ; AVX1: cost of 1 {{.*}} %I64 = fptosi
+ ; AVX2: cost of 1 {{.*}} %I64 = fptosi
+ ; AVX512: cost of 1 {{.*}} %I64 = fptosi
+ %I64 = fptosi float undef to i64
+ ; SSE2: cost of 6 {{.*}} %V2I64 = fptosi
+ ; SSE42: cost of 6 {{.*}} %V2I64 = fptosi
+ ; AVX1: cost of 6 {{.*}} %V2I64 = fptosi
+ ; AVX2: cost of 6 {{.*}} %V2I64 = fptosi
+ ; AVX512F: cost of 6 {{.*}} %V2I64 = fptosi
+ ; AVX512DQ: cost of 1 {{.*}} %V2I64 = fptosi
+ %V2I64 = fptosi <2 x float> undef to <2 x i64>
+ ; SSE2: cost of 13 {{.*}} %V4I64 = fptosi
+ ; SSE42: cost of 13 {{.*}} %V4I64 = fptosi
+ ; AVX1: cost of 12 {{.*}} %V4I64 = fptosi
+ ; AVX2: cost of 12 {{.*}} %V4I64 = fptosi
+ ; AVX512F: cost of 12 {{.*}} %V4I64 = fptosi
+ ; AVX512DQ: cost of 1 {{.*}} %V4I64 = fptosi
+ %V4I64 = fptosi <4 x float> undef to <4 x i64>
+ ; SSE2: cost of 27 {{.*}} %V8I64 = fptosi
+ ; SSE42: cost of 27 {{.*}} %V8I64 = fptosi
+ ; AVX1: cost of 25 {{.*}} %V8I64 = fptosi
+ ; AVX2: cost of 25 {{.*}} %V8I64 = fptosi
+ ; AVX512F: cost of 24 {{.*}} %V8I64 = fptosi
+ ; AVX512DQ: cost of 1 {{.*}} %V8I64 = fptosi
+ %V8I64 = fptosi <8 x float> undef to <8 x i64>
+ ; SSE2: cost of 55 {{.*}} %V16I64 = fptosi
+ ; SSE42: cost of 55 {{.*}} %V16I64 = fptosi
+ ; AVX1: cost of 51 {{.*}} %V16I64 = fptosi
+ ; AVX2: cost of 51 {{.*}} %V16I64 = fptosi
+ ; AVX512F: cost of 49 {{.*}} %V16I64 = fptosi
+ ; AVX512DQ: cost of 3 {{.*}} %V16I64 = fptosi
+ %V16I64 = fptosi <16 x float> undef to <16 x i64>
+
+ ret i32 undef
+}
+
+; CHECK-LABEL: 'fptosi_float_i32'
+define i32 @fptosi_float_i32(i32 %arg) {
+ ; SSE2: cost of 1 {{.*}} %I32 = fptosi
+ ; SSE42: cost of 1 {{.*}} %I32 = fptosi
+ ; AVX1: cost of 1 {{.*}} %I32 = fptosi
+ ; AVX2: cost of 1 {{.*}} %I32 = fptosi
+ ; AVX512: cost of 1 {{.*}} %I32 = fptosi
+ %I32 = fptosi float undef to i32
+ ; SSE2: cost of 1 {{.*}} %V4I32 = fptosi
+ ; SSE42: cost of 1 {{.*}} %V4I32 = fptosi
+ ; AVX1: cost of 1 {{.*}} %V4I32 = fptosi
+ ; AVX2: cost of 1 {{.*}} %V4I32 = fptosi
+ ; AVX512: cost of 1 {{.*}} %V4I32 = fptosi
+ %V4I32 = fptosi <4 x float> undef to <4 x i32>
+ ; SSE2: cost of 1 {{.*}} %V8I32 = fptosi
+ ; SSE42: cost of 1 {{.*}} %V8I32 = fptosi
+ ; AVX1: cost of 1 {{.*}} %V8I32 = fptosi
+ ; AVX2: cost of 1 {{.*}} %V8I32 = fptosi
+ ; AVX512: cost of 1 {{.*}} %V8I32 = fptosi
+ %V8I32 = fptosi <8 x float> undef to <8 x i32>
+ ; SSE2: cost of 1 {{.*}} %V16I32 = fptosi
+ ; SSE42: cost of 1 {{.*}} %V16I32 = fptosi
+ ; AVX1: cost of 1 {{.*}} %V16I32 = fptosi
+ ; AVX2: cost of 1 {{.*}} %V16I32 = fptosi
+ ; AVX512: cost of 1 {{.*}} %V16I32 = fptosi
+ %V16I32 = fptosi <16 x float> undef to <16 x i32>
+
+ ret i32 undef
+}
+
+; CHECK-LABEL: 'fptosi_float_i16'
+define i32 @fptosi_float_i16(i32 %arg) {
+ ; SSE2: cost of 1 {{.*}} %I16 = fptosi
+ ; SSE42: cost of 1 {{.*}} %I16 = fptosi
+ ; AVX1: cost of 1 {{.*}} %I16 = fptosi
+ ; AVX2: cost of 1 {{.*}} %I16 = fptosi
+ ; AVX512: cost of 1 {{.*}} %I16 = fptosi
+ %I16 = fptosi float undef to i16
+ ; SSE2: cost of 1 {{.*}} %V4I16 = fptosi
+ ; SSE42: cost of 1 {{.*}} %V4I16 = fptosi
+ ; AVX1: cost of 1 {{.*}} %V4I16 = fptosi
+ ; AVX2: cost of 1 {{.*}} %V4I16 = fptosi
+ ; AVX512: cost of 1 {{.*}} %V4I16 = fptosi
+ %V4I16 = fptosi <4 x float> undef to <4 x i16>
+ ; SSE2: cost of 3 {{.*}} %V8I16 = fptosi
+ ; SSE42: cost of 3 {{.*}} %V8I16 = fptosi
+ ; AVX1: cost of 1 {{.*}} %V8I16 = fptosi
+ ; AVX2: cost of 1 {{.*}} %V8I16 = fptosi
+ ; AVX512: cost of 1 {{.*}} %V8I16 = fptosi
+ %V8I16 = fptosi <8 x float> undef to <8 x i16>
+ ; SSE2: cost of 7 {{.*}} %V16I16 = fptosi
+ ; SSE42: cost of 7 {{.*}} %V16I16 = fptosi
+ ; AVX1: cost of 3 {{.*}} %V16I16 = fptosi
+ ; AVX2: cost of 3 {{.*}} %V16I16 = fptosi
+ ; AVX512: cost of 48 {{.*}} %V16I16 = fptosi
+ %V16I16 = fptosi <16 x float> undef to <16 x i16>
+
+ ret i32 undef
+}
+
+; CHECK-LABEL: 'fptosi_float_i8'
+define i32 @fptosi_float_i8(i32 %arg) {
+ ; SSE2: cost of 1 {{.*}} %I8 = fptosi
+ ; SSE42: cost of 1 {{.*}} %I8 = fptosi
+ ; AVX1: cost of 1 {{.*}} %I8 = fptosi
+ ; AVX2: cost of 1 {{.*}} %I8 = fptosi
+ ; AVX512: cost of 1 {{.*}} %I8 = fptosi
+ %I8 = fptosi float undef to i8
+ ; SSE2: cost of 1 {{.*}} %V4I8 = fptosi
+ ; SSE42: cost of 1 {{.*}} %V4I8 = fptosi
+ ; AVX1: cost of 1 {{.*}} %V4I8 = fptosi
+ ; AVX2: cost of 1 {{.*}} %V4I8 = fptosi
+ ; AVX512: cost of 1 {{.*}} %V4I8 = fptosi
+ %V4I8 = fptosi <4 x float> undef to <4 x i8>
+ ; SSE2: cost of 3 {{.*}} %V8I8 = fptosi
+ ; SSE42: cost of 3 {{.*}} %V8I8 = fptosi
+ ; AVX1: cost of 7 {{.*}} %V8I8 = fptosi
+ ; AVX2: cost of 7 {{.*}} %V8I8 = fptosi
+ ; AVX512: cost of 7 {{.*}} %V8I8 = fptosi
+ %V8I8 = fptosi <8 x float> undef to <8 x i8>
+ ; SSE2: cost of 7 {{.*}} %V16I8 = fptosi
+ ; SSE42: cost of 7 {{.*}} %V16I8 = fptosi
+ ; AVX1: cost of 15 {{.*}} %V16I8 = fptosi
+ ; AVX2: cost of 15 {{.*}} %V16I8 = fptosi
+ ; AVX512: cost of 48 {{.*}} %V16I8 = fptosi
+ %V16I8 = fptosi <16 x float> undef to <16 x i8>
+
+ ret i32 undef
+}
diff --git a/test/Analysis/CostModel/X86/fptoui.ll b/test/Analysis/CostModel/X86/fptoui.ll
new file mode 100644
index 000000000000..dbdba30357d9
--- /dev/null
+++ b/test/Analysis/CostModel/X86/fptoui.ll
@@ -0,0 +1,262 @@
+; RUN: opt -mtriple=x86_64-apple-darwin -mattr=+sse2 -cost-model -analyze < %s | FileCheck --check-prefix=SSE --check-prefix=SSE2 %s
+; RUN: opt -mtriple=x86_64-apple-darwin -mattr=+sse4.2 -cost-model -analyze < %s | FileCheck --check-prefix=SSE --check-prefix=SSE42 %s
+; RUN: opt -mtriple=x86_64-apple-darwin -mattr=+avx -cost-model -analyze < %s | FileCheck --check-prefix=AVX --check-prefix=AVX1 %s
+; RUN: opt -mtriple=x86_64-apple-darwin -mattr=+avx2 -cost-model -analyze < %s | FileCheck --check-prefix=AVX --check-prefix=AVX2 %s
+; RUN: opt -mtriple=x86_64-apple-darwin -mattr=+avx512f -cost-model -analyze < %s | FileCheck --check-prefix=AVX512 --check-prefix=AVX512F %s
+; RUN: opt -mtriple=x86_64-apple-darwin -mattr=+avx512f,+avx512dq -cost-model -analyze < %s | FileCheck --check-prefix=AVX512 --check-prefix=AVX512DQ %s
+
+; CHECK-LABEL: 'fptoui_double_i64'
+define i32 @fptoui_double_i64(i32 %arg) {
+ ; SSE2: cost of 4 {{.*}} %I64 = fptoui
+ ; SSE42: cost of 4 {{.*}} %I64 = fptoui
+ ; AVX1: cost of 4 {{.*}} %I64 = fptoui
+ ; AVX2: cost of 4 {{.*}} %I64 = fptoui
+ ; AVX512: cost of 1 {{.*}} %I64 = fptoui
+ %I64 = fptoui double undef to i64
+ ; SSE2: cost of 12 {{.*}} %V2I64 = fptoui
+ ; SSE42: cost of 12 {{.*}} %V2I64 = fptoui
+ ; AVX1: cost of 12 {{.*}} %V2I64 = fptoui
+ ; AVX2: cost of 12 {{.*}} %V2I64 = fptoui
+ ; AVX512F: cost of 6 {{.*}} %V2I64 = fptoui
+ ; AVX512DQ: cost of 1 {{.*}} %V2I64 = fptoui
+ %V2I64 = fptoui <2 x double> undef to <2 x i64>
+ ; SSE2: cost of 25 {{.*}} %V4I64 = fptoui
+ ; SSE42: cost of 25 {{.*}} %V4I64 = fptoui
+ ; AVX1: cost of 24 {{.*}} %V4I64 = fptoui
+ ; AVX2: cost of 24 {{.*}} %V4I64 = fptoui
+ ; AVX512F: cost of 12 {{.*}} %V4I64 = fptoui
+ ; AVX512DQ: cost of 1 {{.*}} %V4I64 = fptoui
+ %V4I64 = fptoui <4 x double> undef to <4 x i64>
+ ; SSE2: cost of 51 {{.*}} %V8I64 = fptoui
+ ; SSE42: cost of 51 {{.*}} %V8I64 = fptoui
+ ; AVX1: cost of 49 {{.*}} %V8I64 = fptoui
+ ; AVX2: cost of 49 {{.*}} %V8I64 = fptoui
+ ; AVX512F: cost of 24 {{.*}} %V8I64 = fptoui
+ ; AVX512DQ: cost of 1 {{.*}} %V8I64 = fptoui
+ %V8I64 = fptoui <8 x double> undef to <8 x i64>
+
+ ret i32 undef
+}
+
+; CHECK-LABEL: 'fptoui_double_i32'
+define i32 @fptoui_double_i32(i32 %arg) {
+ ; SSE2: cost of 1 {{.*}} %I32 = fptoui
+ ; SSE42: cost of 1 {{.*}} %I32 = fptoui
+ ; AVX1: cost of 1 {{.*}} %I32 = fptoui
+ ; AVX2: cost of 1 {{.*}} %I32 = fptoui
+ ; AVX512: cost of 1 {{.*}} %I32 = fptoui
+ %I32 = fptoui double undef to i32
+ ; SSE2: cost of 6 {{.*}} %V2I32 = fptoui
+ ; SSE42: cost of 6 {{.*}} %V2I32 = fptoui
+ ; AVX1: cost of 6 {{.*}} %V2I32 = fptoui
+ ; AVX2: cost of 6 {{.*}} %V2I32 = fptoui
+ ; AVX512F: cost of 6 {{.*}} %V2I32 = fptoui
+ ; AVX512DQ: cost of 1 {{.*}} %V2I32 = fptoui
+ %V2I32 = fptoui <2 x double> undef to <2 x i32>
+ ; SSE2: cost of 13 {{.*}} %V4I32 = fptoui
+ ; SSE42: cost of 13 {{.*}} %V4I32 = fptoui
+ ; AVX1: cost of 16 {{.*}} %V4I32 = fptoui
+ ; AVX2: cost of 16 {{.*}} %V4I32 = fptoui
+ ; AVX512: cost of 16 {{.*}} %V4I32 = fptoui
+ %V4I32 = fptoui <4 x double> undef to <4 x i32>
+ ; SSE2: cost of 27 {{.*}} %V8I32 = fptoui
+ ; SSE42: cost of 27 {{.*}} %V8I32 = fptoui
+ ; AVX1: cost of 33 {{.*}} %V8I32 = fptoui
+ ; AVX2: cost of 33 {{.*}} %V8I32 = fptoui
+ ; AVX512: cost of 1 {{.*}} %V8I32 = fptoui
+ %V8I32 = fptoui <8 x double> undef to <8 x i32>
+
+ ret i32 undef
+}
+
+; CHECK-LABEL: 'fptoui_double_i16'
+define i32 @fptoui_double_i16(i32 %arg) {
+ ; SSE2: cost of 1 {{.*}} %I16 = fptoui
+ ; SSE42: cost of 1 {{.*}} %I16 = fptoui
+ ; AVX1: cost of 1 {{.*}} %I16 = fptoui
+ ; AVX2: cost of 1 {{.*}} %I16 = fptoui
+ ; AVX512: cost of 1 {{.*}} %I16 = fptoui
+ %I16 = fptoui double undef to i16
+ ; SSE2: cost of 6 {{.*}} %V2I16 = fptoui
+ ; SSE42: cost of 6 {{.*}} %V2I16 = fptoui
+ ; AVX1: cost of 6 {{.*}} %V2I16 = fptoui
+ ; AVX2: cost of 6 {{.*}} %V2I16 = fptoui
+ ; AVX512F: cost of 6 {{.*}} %V2I16 = fptoui
+ ; AVX512DQ: cost of 1 {{.*}} %V2I16 = fptoui
+ %V2I16 = fptoui <2 x double> undef to <2 x i16>
+ ; SSE2: cost of 13 {{.*}} %V4I16 = fptoui
+ ; SSE42: cost of 13 {{.*}} %V4I16 = fptoui
+ ; AVX1: cost of 12 {{.*}} %V4I16 = fptoui
+ ; AVX2: cost of 12 {{.*}} %V4I16 = fptoui
+ ; AVX512: cost of 1 {{.*}} %V4I16 = fptoui
+ %V4I16 = fptoui <4 x double> undef to <4 x i16>
+ ; SSE2: cost of 27 {{.*}} %V8I16 = fptoui
+ ; SSE42: cost of 27 {{.*}} %V8I16 = fptoui
+ ; AVX1: cost of 25 {{.*}} %V8I16 = fptoui
+ ; AVX2: cost of 25 {{.*}} %V8I16 = fptoui
+ ; AVX512: cost of 1 {{.*}} %V8I16 = fptoui
+ %V8I16 = fptoui <8 x double> undef to <8 x i16>
+
+ ret i32 undef
+}
+
+; CHECK-LABEL: 'fptoui_double_i8'
+define i32 @fptoui_double_i8(i32 %arg) {
+ ; SSE2: cost of 1 {{.*}} %I8 = fptoui
+ ; SSE42: cost of 1 {{.*}} %I8 = fptoui
+ ; AVX1: cost of 1 {{.*}} %I8 = fptoui
+ ; AVX2: cost of 1 {{.*}} %I8 = fptoui
+ ; AVX512: cost of 1 {{.*}} %I8 = fptoui
+ %I8 = fptoui double undef to i8
+ ; SSE2: cost of 6 {{.*}} %V2I8 = fptoui
+ ; SSE42: cost of 6 {{.*}} %V2I8 = fptoui
+ ; AVX1: cost of 6 {{.*}} %V2I8 = fptoui
+ ; AVX2: cost of 6 {{.*}} %V2I8 = fptoui
+ ; AVX512F: cost of 6 {{.*}} %V2I8 = fptoui
+ ; AVX512DQ: cost of 1 {{.*}} %V2I8 = fptoui
+ %V2I8 = fptoui <2 x double> undef to <2 x i8>
+ ; SSE2: cost of 13 {{.*}} %V4I8 = fptoui
+ ; SSE42: cost of 13 {{.*}} %V4I8 = fptoui
+ ; AVX1: cost of 12 {{.*}} %V4I8 = fptoui
+ ; AVX2: cost of 12 {{.*}} %V4I8 = fptoui
+ ; AVX512: cost of 1 {{.*}} %V4I8 = fptoui
+ %V4I8 = fptoui <4 x double> undef to <4 x i8>
+ ; SSE2: cost of 27 {{.*}} %V8I8 = fptoui
+ ; SSE42: cost of 27 {{.*}} %V8I8 = fptoui
+ ; AVX1: cost of 25 {{.*}} %V8I8 = fptoui
+ ; AVX2: cost of 25 {{.*}} %V8I8 = fptoui
+ ; AVX512: cost of 1 {{.*}} %V8I8 = fptoui
+ %V8I8 = fptoui <8 x double> undef to <8 x i8>
+
+ ret i32 undef
+}
+
+; CHECK-LABEL: 'fptoui_float_i64'
+define i32 @fptoui_float_i64(i32 %arg) {
+ ; SSE2: cost of 4 {{.*}} %I64 = fptoui
+ ; SSE42: cost of 4 {{.*}} %I64 = fptoui
+ ; AVX1: cost of 4 {{.*}} %I64 = fptoui
+ ; AVX2: cost of 4 {{.*}} %I64 = fptoui
+ ; AVX512: cost of 1 {{.*}} %I64 = fptoui
+ %I64 = fptoui float undef to i64
+ ; SSE2: cost of 12 {{.*}} %V2I64 = fptoui
+ ; SSE42: cost of 12 {{.*}} %V2I64 = fptoui
+ ; AVX1: cost of 12 {{.*}} %V2I64 = fptoui
+ ; AVX2: cost of 12 {{.*}} %V2I64 = fptoui
+ ; AVX512F: cost of 6 {{.*}} %V2I64 = fptoui
+ ; AVX512DQ: cost of 1 {{.*}} %V2I64 = fptoui
+ %V2I64 = fptoui <2 x float> undef to <2 x i64>
+ ; SSE2: cost of 25 {{.*}} %V4I64 = fptoui
+ ; SSE42: cost of 25 {{.*}} %V4I64 = fptoui
+ ; AVX1: cost of 24 {{.*}} %V4I64 = fptoui
+ ; AVX2: cost of 24 {{.*}} %V4I64 = fptoui
+ ; AVX512F: cost of 12 {{.*}} %V4I64 = fptoui
+ ; AVX512DQ: cost of 1 {{.*}} %V4I64 = fptoui
+ %V4I64 = fptoui <4 x float> undef to <4 x i64>
+ ; SSE2: cost of 51 {{.*}} %V8I64 = fptoui
+ ; SSE42: cost of 51 {{.*}} %V8I64 = fptoui
+ ; AVX1: cost of 49 {{.*}} %V8I64 = fptoui
+ ; AVX2: cost of 49 {{.*}} %V8I64 = fptoui
+ ; AVX512F: cost of 24 {{.*}} %V8I64 = fptoui
+ ; AVX512DQ: cost of 1 {{.*}} %V8I64 = fptoui
+ %V8I64 = fptoui <8 x float> undef to <8 x i64>
+ ; SSE2: cost of 103 {{.*}} %V16I64 = fptoui
+ ; SSE42: cost of 103 {{.*}} %V16I64 = fptoui
+ ; AVX1: cost of 99 {{.*}} %V16I64 = fptoui
+ ; AVX2: cost of 99 {{.*}} %V16I64 = fptoui
+ ; AVX512F: cost of 49 {{.*}} %V16I64 = fptoui
+ ; AVX512DQ: cost of 3 {{.*}} %V16I64 = fptoui
+ %V16I64 = fptoui <16 x float> undef to <16 x i64>
+
+ ret i32 undef
+}
+
+; CHECK-LABEL: 'fptoui_float_i32'
+define i32 @fptoui_float_i32(i32 %arg) {
+ ; SSE2: cost of 1 {{.*}} %I32 = fptoui
+ ; SSE42: cost of 1 {{.*}} %I32 = fptoui
+ ; AVX1: cost of 1 {{.*}} %I32 = fptoui
+ ; AVX2: cost of 1 {{.*}} %I32 = fptoui
+ ; AVX512: cost of 1 {{.*}} %I32 = fptoui
+ %I32 = fptoui float undef to i32
+ ; SSE2: cost of 12 {{.*}} %V4I32 = fptoui
+ ; SSE42: cost of 12 {{.*}} %V4I32 = fptoui
+ ; AVX1: cost of 12 {{.*}} %V4I32 = fptoui
+ ; AVX2: cost of 12 {{.*}} %V4I32 = fptoui
+ ; AVX512: cost of 1 {{.*}} %V4I32 = fptoui
+ %V4I32 = fptoui <4 x float> undef to <4 x i32>
+ ; SSE2: cost of 25 {{.*}} %V8I32 = fptoui
+ ; SSE42: cost of 25 {{.*}} %V8I32 = fptoui
+ ; AVX1: cost of 32 {{.*}} %V8I32 = fptoui
+ ; AVX2: cost of 32 {{.*}} %V8I32 = fptoui
+ ; AVX512: cost of 1 {{.*}} %V8I32 = fptoui
+ %V8I32 = fptoui <8 x float> undef to <8 x i32>
+ ; SSE2: cost of 51 {{.*}} %V16I32 = fptoui
+ ; SSE42: cost of 51 {{.*}} %V16I32 = fptoui
+ ; AVX1: cost of 65 {{.*}} %V16I32 = fptoui
+ ; AVX2: cost of 65 {{.*}} %V16I32 = fptoui
+ ; AVX512: cost of 1 {{.*}} %V16I32 = fptoui
+ %V16I32 = fptoui <16 x float> undef to <16 x i32>
+
+ ret i32 undef
+}
+
+; CHECK-LABEL: 'fptoui_float_i16'
+define i32 @fptoui_float_i16(i32 %arg) {
+ ; SSE2: cost of 1 {{.*}} %I16 = fptoui
+ ; SSE42: cost of 1 {{.*}} %I16 = fptoui
+ ; AVX1: cost of 1 {{.*}} %I16 = fptoui
+ ; AVX2: cost of 1 {{.*}} %I16 = fptoui
+ ; AVX512: cost of 1 {{.*}} %I16 = fptoui
+ %I16 = fptoui float undef to i16
+ ; SSE2: cost of 12 {{.*}} %V4I16 = fptoui
+ ; SSE42: cost of 12 {{.*}} %V4I16 = fptoui
+ ; AVX1: cost of 12 {{.*}} %V4I16 = fptoui
+ ; AVX2: cost of 12 {{.*}} %V4I16 = fptoui
+ ; AVX512: cost of 1 {{.*}} %V4I16 = fptoui
+ %V4I16 = fptoui <4 x float> undef to <4 x i16>
+ ; SSE2: cost of 25 {{.*}} %V8I16 = fptoui
+ ; SSE42: cost of 25 {{.*}} %V8I16 = fptoui
+ ; AVX1: cost of 1 {{.*}} %V8I16 = fptoui
+ ; AVX2: cost of 1 {{.*}} %V8I16 = fptoui
+ ; AVX512: cost of 1 {{.*}} %V8I16 = fptoui
+ %V8I16 = fptoui <8 x float> undef to <8 x i16>
+ ; SSE2: cost of 51 {{.*}} %V16I16 = fptoui
+ ; SSE42: cost of 51 {{.*}} %V16I16 = fptoui
+ ; AVX1: cost of 3 {{.*}} %V16I16 = fptoui
+ ; AVX2: cost of 3 {{.*}} %V16I16 = fptoui
+ ; AVX512: cost of 48 {{.*}} %V16I16 = fptoui
+ %V16I16 = fptoui <16 x float> undef to <16 x i16>
+
+ ret i32 undef
+}
+
+; CHECK-LABEL: 'fptoui_float_i8'
+define i32 @fptoui_float_i8(i32 %arg) {
+ ; SSE2: cost of 1 {{.*}} %I8 = fptoui
+ ; SSE42: cost of 1 {{.*}} %I8 = fptoui
+ ; AVX1: cost of 1 {{.*}} %I8 = fptoui
+ ; AVX2: cost of 1 {{.*}} %I8 = fptoui
+ ; AVX512: cost of 1 {{.*}} %I8 = fptoui
+ %I8 = fptoui float undef to i8
+ ; SSE2: cost of 12 {{.*}} %V4I8 = fptoui
+ ; SSE42: cost of 12 {{.*}} %V4I8 = fptoui
+ ; AVX1: cost of 12 {{.*}} %V4I8 = fptoui
+ ; AVX2: cost of 12 {{.*}} %V4I8 = fptoui
+ ; AVX512: cost of 1 {{.*}} %V4I8 = fptoui
+ %V4I8 = fptoui <4 x float> undef to <4 x i8>
+ ; SSE2: cost of 25 {{.*}} %V8I8 = fptoui
+ ; SSE42: cost of 25 {{.*}} %V8I8 = fptoui
+ ; AVX1: cost of 1 {{.*}} %V8I8 = fptoui
+ ; AVX2: cost of 1 {{.*}} %V8I8 = fptoui
+ ; AVX512: cost of 1 {{.*}} %V8I8 = fptoui
+ %V8I8 = fptoui <8 x float> undef to <8 x i8>
+ ; SSE2: cost of 51 {{.*}} %V16I8 = fptoui
+ ; SSE42: cost of 51 {{.*}} %V16I8 = fptoui
+ ; AVX1: cost of 3 {{.*}} %V16I8 = fptoui
+ ; AVX2: cost of 3 {{.*}} %V16I8 = fptoui
+ ; AVX512: cost of 48 {{.*}} %V16I8 = fptoui
+ %V16I8 = fptoui <16 x float> undef to <16 x i8>
+
+ ret i32 undef
+}
diff --git a/test/Analysis/CostModel/X86/interleave-load-i32.ll b/test/Analysis/CostModel/X86/interleave-load-i32.ll
new file mode 100755
index 000000000000..3c94d8c446f9
--- /dev/null
+++ b/test/Analysis/CostModel/X86/interleave-load-i32.ll
@@ -0,0 +1,85 @@
+; REQUIRES: asserts
+; RUN: opt -loop-vectorize -S -mcpu=skx --debug-only=loop-vectorize < %s 2>&1 | FileCheck %s
+
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+@A = global [10240 x i32] zeroinitializer, align 16
+@B = global [10240 x i32] zeroinitializer, align 16
+
+; Function Attrs: nounwind uwtable
+define void @load_i32_interleave4() {
+;CHECK-LABEL: load_i32_interleave4
+;CHECK: Found an estimated cost of 1 for VF 1 For instruction: %0 = load
+;CHECK: Found an estimated cost of 5 for VF 2 For instruction: %0 = load
+;CHECK: Found an estimated cost of 5 for VF 4 For instruction: %0 = load
+;CHECK: Found an estimated cost of 8 for VF 8 For instruction: %0 = load
+;CHECK: Found an estimated cost of 22 for VF 16 For instruction: %0 = load
+entry:
+ br label %for.body
+
+for.cond.cleanup: ; preds = %for.body
+ ret void
+
+for.body: ; preds = %entry, %for.body
+ %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
+ %arrayidx = getelementptr inbounds [10240 x i32], [10240 x i32]* @A, i64 0, i64 %indvars.iv
+ %0 = load i32, i32* %arrayidx, align 16
+ %1 = or i64 %indvars.iv, 1
+ %arrayidx2 = getelementptr inbounds [10240 x i32], [10240 x i32]* @A, i64 0, i64 %1
+ %2 = load i32, i32* %arrayidx2, align 4
+ %add3 = add nsw i32 %2, %0
+ %3 = or i64 %indvars.iv, 2
+ %arrayidx6 = getelementptr inbounds [10240 x i32], [10240 x i32]* @A, i64 0, i64 %3
+ %4 = load i32, i32* %arrayidx6, align 8
+ %add7 = add nsw i32 %add3, %4
+ %5 = or i64 %indvars.iv, 3
+ %arrayidx10 = getelementptr inbounds [10240 x i32], [10240 x i32]* @A, i64 0, i64 %5
+ %6 = load i32, i32* %arrayidx10, align 4
+ %add11 = add nsw i32 %add7, %6
+ %arrayidx13 = getelementptr inbounds [10240 x i32], [10240 x i32]* @B, i64 0, i64 %indvars.iv
+ store i32 %add11, i32* %arrayidx13, align 16
+ %indvars.iv.next = add nuw nsw i64 %indvars.iv, 4
+ %cmp = icmp slt i64 %indvars.iv.next, 1024
+ br i1 %cmp, label %for.body, label %for.cond.cleanup
+}
+
+define void @load_i32_interleave5() {
+;CHECK-LABEL: load_i32_interleave5
+;CHECK: Found an estimated cost of 1 for VF 1 For instruction: %0 = load
+;CHECK: Found an estimated cost of 6 for VF 2 For instruction: %0 = load
+;CHECK: Found an estimated cost of 9 for VF 4 For instruction: %0 = load
+;CHECK: Found an estimated cost of 18 for VF 8 For instruction: %0 = load
+;CHECK: Found an estimated cost of 35 for VF 16 For instruction: %0 = load
+entry:
+ br label %for.body
+
+for.cond.cleanup: ; preds = %for.body
+ ret void
+
+for.body: ; preds = %entry, %for.body
+ %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
+ %arrayidx = getelementptr inbounds [10240 x i32], [10240 x i32]* @A, i64 0, i64 %indvars.iv
+ %0 = load i32, i32* %arrayidx, align 4
+ %1 = add nuw nsw i64 %indvars.iv, 1
+ %arrayidx2 = getelementptr inbounds [10240 x i32], [10240 x i32]* @A, i64 0, i64 %1
+ %2 = load i32, i32* %arrayidx2, align 4
+ %add3 = add nsw i32 %2, %0
+ %3 = add nuw nsw i64 %indvars.iv, 2
+ %arrayidx6 = getelementptr inbounds [10240 x i32], [10240 x i32]* @A, i64 0, i64 %3
+ %4 = load i32, i32* %arrayidx6, align 4
+ %add7 = add nsw i32 %add3, %4
+ %5 = add nuw nsw i64 %indvars.iv, 3
+ %arrayidx10 = getelementptr inbounds [10240 x i32], [10240 x i32]* @A, i64 0, i64 %5
+ %6 = load i32, i32* %arrayidx10, align 4
+ %add11 = add nsw i32 %add7, %6
+ %7 = add nuw nsw i64 %indvars.iv, 4
+ %arrayidx14 = getelementptr inbounds [10240 x i32], [10240 x i32]* @A, i64 0, i64 %7
+ %8 = load i32, i32* %arrayidx14, align 4
+ %add15 = add nsw i32 %add11, %8
+ %arrayidx17 = getelementptr inbounds [10240 x i32], [10240 x i32]* @B, i64 0, i64 %indvars.iv
+ store i32 %add15, i32* %arrayidx17, align 4
+ %indvars.iv.next = add nuw nsw i64 %indvars.iv, 5
+ %cmp = icmp slt i64 %indvars.iv.next, 1024
+ br i1 %cmp, label %for.body, label %for.cond.cleanup
+}
diff --git a/test/Analysis/CostModel/X86/interleave-store-i32.ll b/test/Analysis/CostModel/X86/interleave-store-i32.ll
new file mode 100755
index 000000000000..e3076bfa294b
--- /dev/null
+++ b/test/Analysis/CostModel/X86/interleave-store-i32.ll
@@ -0,0 +1,85 @@
+; REQUIRES: asserts
+; RUN: opt -loop-vectorize -S -mcpu=skx --debug-only=loop-vectorize < %s 2>&1| FileCheck %s
+
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+@A = global [10240 x i32] zeroinitializer, align 16
+@B = global [10240 x i32] zeroinitializer, align 16
+
+; Function Attrs: nounwind uwtable
+define void @store_i32_interleave4() {
+;CHECK-LABEL: store_i32_interleave4
+;CHECK: Found an estimated cost of 1 for VF 1 For instruction: store i32 %add16
+;CHECK: Found an estimated cost of 5 for VF 2 For instruction: store i32 %add16
+;CHECK: Found an estimated cost of 5 for VF 4 For instruction: store i32 %add16
+;CHECK: Found an estimated cost of 11 for VF 8 For instruction: store i32 %add16
+;CHECK: Found an estimated cost of 22 for VF 16 For instruction: store i32 %add16
+entry:
+ br label %for.body
+
+for.cond.cleanup: ; preds = %for.body
+ ret void
+
+for.body: ; preds = %entry, %for.body
+ %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
+ %arrayidx = getelementptr inbounds [10240 x i32], [10240 x i32]* @A, i64 0, i64 %indvars.iv
+ %0 = load i32, i32* %arrayidx, align 16
+ %arrayidx2 = getelementptr inbounds [10240 x i32], [10240 x i32]* @B, i64 0, i64 %indvars.iv
+ store i32 %0, i32* %arrayidx2, align 16
+ %add = add nsw i32 %0, 1
+ %1 = or i64 %indvars.iv, 1
+ %arrayidx7 = getelementptr inbounds [10240 x i32], [10240 x i32]* @B, i64 0, i64 %1
+ store i32 %add, i32* %arrayidx7, align 4
+ %add10 = add nsw i32 %0, 2
+ %2 = or i64 %indvars.iv, 2
+ %arrayidx13 = getelementptr inbounds [10240 x i32], [10240 x i32]* @B, i64 0, i64 %2
+ store i32 %add10, i32* %arrayidx13, align 8
+ %add16 = add nsw i32 %0, 3
+ %3 = or i64 %indvars.iv, 3
+ %arrayidx19 = getelementptr inbounds [10240 x i32], [10240 x i32]* @B, i64 0, i64 %3
+ store i32 %add16, i32* %arrayidx19, align 4
+ %indvars.iv.next = add nuw nsw i64 %indvars.iv, 4
+ %cmp = icmp slt i64 %indvars.iv.next, 1024
+ br i1 %cmp, label %for.body, label %for.cond.cleanup
+}
+
+define void @store_i32_interleave5() {
+;CHECK-LABEL: store_i32_interleave5
+;CHECK: Found an estimated cost of 1 for VF 1 For instruction: store i32 %add22
+;CHECK: Found an estimated cost of 7 for VF 2 For instruction: store i32 %add22
+;CHECK: Found an estimated cost of 14 for VF 4 For instruction: store i32 %add22
+;CHECK: Found an estimated cost of 21 for VF 8 For instruction: store i32 %add22
+;CHECK: Found an estimated cost of 35 for VF 16 For instruction: store i32 %add22
+entry:
+ br label %for.body
+
+for.cond.cleanup: ; preds = %for.body
+ ret void
+
+for.body: ; preds = %entry, %for.body
+ %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
+ %arrayidx = getelementptr inbounds [10240 x i32], [10240 x i32]* @A, i64 0, i64 %indvars.iv
+ %0 = load i32, i32* %arrayidx, align 4
+ %arrayidx2 = getelementptr inbounds [10240 x i32], [10240 x i32]* @B, i64 0, i64 %indvars.iv
+ store i32 %0, i32* %arrayidx2, align 4
+ %add = add nsw i32 %0, 1
+ %1 = add nuw nsw i64 %indvars.iv, 1
+ %arrayidx7 = getelementptr inbounds [10240 x i32], [10240 x i32]* @B, i64 0, i64 %1
+ store i32 %add, i32* %arrayidx7, align 4
+ %add10 = add nsw i32 %0, 2
+ %2 = add nuw nsw i64 %indvars.iv, 2
+ %arrayidx13 = getelementptr inbounds [10240 x i32], [10240 x i32]* @B, i64 0, i64 %2
+ store i32 %add10, i32* %arrayidx13, align 4
+ %add16 = add nsw i32 %0, 3
+ %3 = add nuw nsw i64 %indvars.iv, 3
+ %arrayidx19 = getelementptr inbounds [10240 x i32], [10240 x i32]* @B, i64 0, i64 %3
+ store i32 %add16, i32* %arrayidx19, align 4
+ %add22 = add nsw i32 %0, 4
+ %4 = add nuw nsw i64 %indvars.iv, 4
+ %arrayidx25 = getelementptr inbounds [10240 x i32], [10240 x i32]* @B, i64 0, i64 %4
+ store i32 %add22, i32* %arrayidx25, align 4
+ %indvars.iv.next = add nuw nsw i64 %indvars.iv, 5
+ %cmp = icmp slt i64 %indvars.iv.next, 1024
+ br i1 %cmp, label %for.body, label %for.cond.cleanup
+}
diff --git a/test/Analysis/CostModel/X86/reduction.ll b/test/Analysis/CostModel/X86/reduction.ll
index aaafe07c1eb8..45e2215cd36a 100644
--- a/test/Analysis/CostModel/X86/reduction.ll
+++ b/test/Analysis/CostModel/X86/reduction.ll
@@ -33,7 +33,9 @@ define fastcc i32 @reduction_cost_int(<8 x i32> %rdx) {
%bin.rdx.3 = add <8 x i32> %bin.rdx.2, %rdx.shuf.3
; CHECK-LABEL: reduction_cost_int
-; CHECK: cost of 17 {{.*}} extractelement
+; CHECK: cost of 11 {{.*}} extractelement
+; AVX-LABEL: reduction_cost_int
+; AVX: cost of 5 {{.*}} extractelement
%r = extractelement <8 x i32> %bin.rdx.3, i32 0
ret i32 %r
diff --git a/test/Analysis/CostModel/X86/rem.ll b/test/Analysis/CostModel/X86/rem.ll
new file mode 100644
index 000000000000..10ce6775576f
--- /dev/null
+++ b/test/Analysis/CostModel/X86/rem.ll
@@ -0,0 +1,116 @@
+; RUN: opt < %s -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mattr=+sse2 | FileCheck %s --check-prefix=CHECK --check-prefix=SSE --check-prefix=SSE2
+; RUN: opt < %s -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mattr=+ssse3 | FileCheck %s --check-prefix=CHECK --check-prefix=SSE --check-prefix=SSSE3
+; RUN: opt < %s -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mattr=+sse4.2 | FileCheck %s --check-prefix=CHECK --check-prefix=SSE --check-prefix=SSE42
+; RUN: opt < %s -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mattr=+avx | FileCheck %s --check-prefix=CHECK --check-prefix=AVX --check-prefix=AVX1
+; RUN: opt < %s -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mattr=+avx2 | FileCheck %s --check-prefix=CHECK --check-prefix=AVX --check-prefix=AVX2
+; RUN: opt < %s -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mattr=+avx512f | FileCheck %s --check-prefix=CHECK --check-prefix=AVX512 --check-prefix=AVX512F
+; RUN: opt < %s -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mattr=+avx512f,+avx512bw | FileCheck %s --check-prefix=CHECK --check-prefix=AVX512 --check-prefix=AVX512BW
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-apple-macosx10.8.0"
+
+; CHECK-LABEL: 'srem'
+define i32 @srem() {
+ ; CHECK: cost of 1 {{.*}} %I64 = srem
+ %I64 = srem i64 undef, undef
+ ; SSE: cost of 6 {{.*}} %V2i64 = srem
+ ; AVX: cost of 6 {{.*}} %V2i64 = srem
+ %V2i64 = srem <2 x i64> undef, undef
+ ; SSE: cost of 12 {{.*}} %V4i64 = srem
+ ; AVX: cost of 12 {{.*}} %V4i64 = srem
+ %V4i64 = srem <4 x i64> undef, undef
+ ; SSE: cost of 24 {{.*}} %V8i64 = srem
+ ; AVX: cost of 24 {{.*}} %V8i64 = srem
+ %V8i64 = srem <8 x i64> undef, undef
+
+ ; CHECK: cost of 1 {{.*}} %I32 = srem
+ %I32 = srem i32 undef, undef
+ ; SSE: cost of 12 {{.*}} %V4i32 = srem
+ ; AVX: cost of 12 {{.*}} %V4i32 = srem
+ %V4i32 = srem <4 x i32> undef, undef
+ ; SSE: cost of 24 {{.*}} %V8i32 = srem
+ ; AVX: cost of 24 {{.*}} %V8i32 = srem
+ %V8i32 = srem <8 x i32> undef, undef
+ ; SSE: cost of 48 {{.*}} %V16i32 = srem
+ ; AVX: cost of 48 {{.*}} %V16i32 = srem
+ %V16i32 = srem <16 x i32> undef, undef
+
+ ; CHECK: cost of 1 {{.*}} %I16 = srem
+ %I16 = srem i16 undef, undef
+ ; SSE: cost of 24 {{.*}} %V8i16 = srem
+ ; AVX: cost of 24 {{.*}} %V8i16 = srem
+ %V8i16 = srem <8 x i16> undef, undef
+ ; SSE: cost of 48 {{.*}} %V16i16 = srem
+ ; AVX: cost of 48 {{.*}} %V16i16 = srem
+ %V16i16 = srem <16 x i16> undef, undef
+ ; SSE: cost of 96 {{.*}} %V32i16 = srem
+ ; AVX: cost of 96 {{.*}} %V32i16 = srem
+ %V32i16 = srem <32 x i16> undef, undef
+
+ ; CHECK: cost of 1 {{.*}} %I8 = srem
+ %I8 = srem i8 undef, undef
+ ; SSE: cost of 48 {{.*}} %V16i8 = srem
+ ; AVX: cost of 48 {{.*}} %V16i8 = srem
+ %V16i8 = srem <16 x i8> undef, undef
+ ; SSE: cost of 96 {{.*}} %V32i8 = srem
+ ; AVX: cost of 96 {{.*}} %V32i8 = srem
+ %V32i8 = srem <32 x i8> undef, undef
+ ; SSE: cost of 192 {{.*}} %V64i8 = srem
+ ; AVX: cost of 192 {{.*}} %V64i8 = srem
+ %V64i8 = srem <64 x i8> undef, undef
+
+ ret i32 undef
+}
+
+; CHECK-LABEL: 'urem'
+define i32 @urem() {
+ ; CHECK: cost of 1 {{.*}} %I64 = urem
+ %I64 = urem i64 undef, undef
+ ; SSE: cost of 6 {{.*}} %V2i64 = urem
+ ; AVX: cost of 6 {{.*}} %V2i64 = urem
+ %V2i64 = urem <2 x i64> undef, undef
+ ; SSE: cost of 12 {{.*}} %V4i64 = urem
+ ; AVX: cost of 12 {{.*}} %V4i64 = urem
+ %V4i64 = urem <4 x i64> undef, undef
+ ; SSE: cost of 24 {{.*}} %V8i64 = urem
+ ; AVX: cost of 24 {{.*}} %V8i64 = urem
+ %V8i64 = urem <8 x i64> undef, undef
+
+ ; CHECK: cost of 1 {{.*}} %I32 = urem
+ %I32 = urem i32 undef, undef
+ ; SSE: cost of 12 {{.*}} %V4i32 = urem
+ ; AVX: cost of 12 {{.*}} %V4i32 = urem
+ %V4i32 = urem <4 x i32> undef, undef
+ ; SSE: cost of 24 {{.*}} %V8i32 = urem
+ ; AVX: cost of 24 {{.*}} %V8i32 = urem
+ %V8i32 = urem <8 x i32> undef, undef
+ ; SSE: cost of 48 {{.*}} %V16i32 = urem
+ ; AVX: cost of 48 {{.*}} %V16i32 = urem
+ %V16i32 = urem <16 x i32> undef, undef
+
+ ; CHECK: cost of 1 {{.*}} %I16 = urem
+ %I16 = urem i16 undef, undef
+ ; SSE: cost of 24 {{.*}} %V8i16 = urem
+ ; AVX: cost of 24 {{.*}} %V8i16 = urem
+ %V8i16 = urem <8 x i16> undef, undef
+ ; SSE: cost of 48 {{.*}} %V16i16 = urem
+ ; AVX: cost of 48 {{.*}} %V16i16 = urem
+ %V16i16 = urem <16 x i16> undef, undef
+ ; SSE: cost of 96 {{.*}} %V32i16 = urem
+ ; AVX: cost of 96 {{.*}} %V32i16 = urem
+ %V32i16 = urem <32 x i16> undef, undef
+
+ ; CHECK: cost of 1 {{.*}} %I8 = urem
+ %I8 = urem i8 undef, undef
+ ; SSE: cost of 48 {{.*}} %V16i8 = urem
+ ; AVX: cost of 48 {{.*}} %V16i8 = urem
+ %V16i8 = urem <16 x i8> undef, undef
+ ; SSE: cost of 96 {{.*}} %V32i8 = urem
+ ; AVX: cost of 96 {{.*}} %V32i8 = urem
+ %V32i8 = urem <32 x i8> undef, undef
+ ; SSE: cost of 192 {{.*}} %V64i8 = urem
+ ; AVX: cost of 192 {{.*}} %V64i8 = urem
+ %V64i8 = urem <64 x i8> undef, undef
+
+ ret i32 undef
+}
diff --git a/test/Analysis/CostModel/X86/scalarize.ll b/test/Analysis/CostModel/X86/scalarize.ll
index fd4e3005bd9f..53808871dd64 100644
--- a/test/Analysis/CostModel/X86/scalarize.ll
+++ b/test/Analysis/CostModel/X86/scalarize.ll
@@ -28,11 +28,11 @@ define void @test_scalarized_intrinsics() {
; CHECK64: cost of 1 {{.*}}bswap.v2i64
%r3 = call %i8 @llvm.bswap.v2i64(%i8 undef)
-; CHECK32: cost of 12 {{.*}}cttz.v4i32
-; CHECK64: cost of 12 {{.*}}cttz.v4i32
+; CHECK32: cost of 14 {{.*}}cttz.v4i32
+; CHECK64: cost of 14 {{.*}}cttz.v4i32
%r4 = call %i4 @llvm.cttz.v4i32(%i4 undef)
; CHECK32: cost of 10 {{.*}}cttz.v2i64
-; CHECK64: cost of 6 {{.*}}cttz.v2i64
+; CHECK64: cost of 10 {{.*}}cttz.v2i64
%r5 = call %i8 @llvm.cttz.v2i64(%i8 undef)
; CHECK32: ret
diff --git a/test/Analysis/CostModel/X86/shuffle-broadcast.ll b/test/Analysis/CostModel/X86/shuffle-broadcast.ll
new file mode 100644
index 000000000000..a829a47f89f2
--- /dev/null
+++ b/test/Analysis/CostModel/X86/shuffle-broadcast.ll
@@ -0,0 +1,31 @@
+; RUN: opt < %s -cost-model -analyze -mtriple=x86_64-unknown-linux-gnu -mattr=+sse2 | FileCheck %s -check-prefix=CHECK -check-prefix=SSE -check-prefix=SSE2
+; RUN: opt < %s -cost-model -analyze -mtriple=x86_64-unknown-linux-gnu -mattr=+ssse3 | FileCheck %s -check-prefix=CHECK -check-prefix=SSE -check-prefix=SSSE3
+; RUN: opt < %s -cost-model -analyze -mtriple=x86_64-unknown-linux-gnu -mattr=+sse4.2 | FileCheck %s -check-prefix=CHECK -check-prefix=SSE -check-prefix=SSE42
+; RUN: opt < %s -cost-model -analyze -mtriple=x86_64-unknown-linux-gnu -mattr=+avx | FileCheck %s -check-prefix=CHECK -check-prefix=AVX -check-prefix=AVX1
+; RUN: opt < %s -cost-model -analyze -mtriple=x86_64-unknown-linux-gnu -mattr=+avx2 | FileCheck %s -check-prefix=CHECK -check-prefix=AVX -check-prefix=AVX2
+; RUN: opt < %s -cost-model -analyze -mtriple=x86_64-unknown-linux-gnu -mattr=+avx512f | FileCheck %s --check-prefix=CHECK --check-prefix=AVX512 --check-prefix=AVX512F
+; RUN: opt < %s -cost-model -analyze -mtriple=x86_64-unknown-linux-gnu -mattr=+avx512f,+avx512bw | FileCheck %s --check-prefix=CHECK --check-prefix=AVX512 --check-prefix=AVX512BW
+
+;
+; Verify the cost model for broadcast shuffles.
+;
+
+; CHECK-LABEL: 'test_vXf64'
+define void @test_vXf64(<2 x double> %src128, <4 x double> %src256, <8 x double> %src512) {
+ ; SSE: cost of 1 {{.*}} %V128 = shufflevector
+ ; AVX: cost of 1 {{.*}} %V128 = shufflevector
+ ; AVX512: cost of 1 {{.*}} %V128 = shufflevector
+ %V128 = shufflevector <2 x double> %src128, <2 x double> undef, <2 x i32> zeroinitializer
+
+ ; SSE: cost of 1 {{.*}} %V256 = shufflevector
+ ; AVX: cost of 1 {{.*}} %V256 = shufflevector
+ ; AVX512: cost of 1 {{.*}} %V256 = shufflevector
+ %V256 = shufflevector <4 x double> %src256, <4 x double> undef, <4 x i32> zeroinitializer
+
+ ; SSE: cost of 1 {{.*}} %V512 = shufflevector
+ ; AVX: cost of 1 {{.*}} %V512 = shufflevector
+ ; AVX512: cost of 1 {{.*}} %V512 = shufflevector
+ %V512 = shufflevector <8 x double> %src512, <8 x double> undef, <8 x i32> zeroinitializer
+
+ ret void
+}
diff --git a/test/Analysis/CostModel/X86/shuffle-reverse.ll b/test/Analysis/CostModel/X86/shuffle-reverse.ll
new file mode 100644
index 000000000000..a1bdda0690aa
--- /dev/null
+++ b/test/Analysis/CostModel/X86/shuffle-reverse.ll
@@ -0,0 +1,168 @@
+; RUN: opt < %s -cost-model -analyze -mtriple=x86_64-unknown-linux-gnu -mattr=+sse2 | FileCheck %s -check-prefix=CHECK -check-prefix=SSE -check-prefix=SSE2
+; RUN: opt < %s -cost-model -analyze -mtriple=x86_64-unknown-linux-gnu -mattr=+ssse3 | FileCheck %s -check-prefix=CHECK -check-prefix=SSE -check-prefix=SSSE3
+; RUN: opt < %s -cost-model -analyze -mtriple=x86_64-unknown-linux-gnu -mattr=+sse4.2 | FileCheck %s -check-prefix=CHECK -check-prefix=SSE -check-prefix=SSE42
+; RUN: opt < %s -cost-model -analyze -mtriple=x86_64-unknown-linux-gnu -mattr=+avx | FileCheck %s -check-prefix=CHECK -check-prefix=AVX -check-prefix=AVX1
+; RUN: opt < %s -cost-model -analyze -mtriple=x86_64-unknown-linux-gnu -mattr=+avx2 | FileCheck %s -check-prefix=CHECK -check-prefix=AVX -check-prefix=AVX2
+; RUN: opt < %s -cost-model -analyze -mtriple=x86_64-unknown-linux-gnu -mattr=+avx512f | FileCheck %s --check-prefix=CHECK --check-prefix=AVX512 --check-prefix=AVX512F
+; RUN: opt < %s -cost-model -analyze -mtriple=x86_64-unknown-linux-gnu -mattr=+avx512f,+avx512bw | FileCheck %s --check-prefix=CHECK --check-prefix=AVX512 --check-prefix=AVX512BW
+
+;
+; Verify the cost model for reverse shuffles.
+;
+
+; CHECK-LABEL: 'test_vXf64'
+define void @test_vXf64(<2 x double> %src128, <4 x double> %src256, <8 x double> %src512) {
+ ; SSE: cost of 1 {{.*}} %V128 = shufflevector
+ ; AVX: cost of 1 {{.*}} %V128 = shufflevector
+ ; AVX512: cost of 1 {{.*}} %V128 = shufflevector
+ %V128 = shufflevector <2 x double> %src128, <2 x double> undef, <2 x i32> <i32 1, i32 0>
+
+ ; SSE: cost of 2 {{.*}} %V256 = shufflevector
+ ; AVX1: cost of 2 {{.*}} %V256 = shufflevector
+ ; AVX2: cost of 1 {{.*}} %V256 = shufflevector
+ ; AVX512: cost of 1 {{.*}} %V256 = shufflevector
+ %V256 = shufflevector <4 x double> %src256, <4 x double> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
+
+ ; SSE: cost of 4 {{.*}} %V512 = shufflevector
+ ; AVX1: cost of 4 {{.*}} %V512 = shufflevector
+ ; AVX2: cost of 2 {{.*}} %V512 = shufflevector
+ ; AVX512: cost of 1 {{.*}} %V512 = shufflevector
+ %V512 = shufflevector <8 x double> %src512, <8 x double> undef, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
+
+ ret void
+}
+
+; CHECK-LABEL: 'test_vXi64'
+define void @test_vXi64(<2 x i64> %src128, <4 x i64> %src256, <8 x i64> %src512) {
+ ; SSE: cost of 1 {{.*}} %V128 = shufflevector
+ ; AVX: cost of 1 {{.*}} %V128 = shufflevector
+ ; AVX512: cost of 1 {{.*}} %V128 = shufflevector
+ %V128 = shufflevector <2 x i64> %src128, <2 x i64> undef, <2 x i32> <i32 1, i32 0>
+
+ ; SSE: cost of 2 {{.*}} %V256 = shufflevector
+ ; AVX1: cost of 2 {{.*}} %V256 = shufflevector
+ ; AVX2: cost of 1 {{.*}} %V256 = shufflevector
+ ; AVX512: cost of 1 {{.*}} %V256 = shufflevector
+ %V256 = shufflevector <4 x i64> %src256, <4 x i64> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
+
+ ; SSE: cost of 4 {{.*}} %V512 = shufflevector
+ ; AVX1: cost of 4 {{.*}} %V512 = shufflevector
+ ; AVX2: cost of 2 {{.*}} %V512 = shufflevector
+ ; AVX512: cost of 1 {{.*}} %V512 = shufflevector
+ %V512 = shufflevector <8 x i64> %src512, <8 x i64> undef, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
+
+ ret void
+}
+
+; CHECK-LABEL: 'test_vXf32'
+define void @test_vXf32(<2 x float> %src64, <4 x float> %src128, <8 x float> %src256, <16 x float> %src512) {
+ ; SSE: cost of 1 {{.*}} %V64 = shufflevector
+ ; AVX: cost of 1 {{.*}} %V64 = shufflevector
+ ; AVX512: cost of 1 {{.*}} %V64 = shufflevector
+ %V64 = shufflevector <2 x float> %src64, <2 x float> undef, <2 x i32> <i32 1, i32 0>
+
+ ; SSE: cost of 1 {{.*}} %V128 = shufflevector
+ ; AVX: cost of 1 {{.*}} %V128 = shufflevector
+ ; AVX512: cost of 1 {{.*}} %V128 = shufflevector
+ %V128 = shufflevector <4 x float> %src128, <4 x float> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
+
+ ; SSE: cost of 2 {{.*}} %V256 = shufflevector
+ ; AVX1: cost of 2 {{.*}} %V256 = shufflevector
+ ; AVX2: cost of 1 {{.*}} %V256 = shufflevector
+ ; AVX512: cost of 1 {{.*}} %V256 = shufflevector
+ %V256 = shufflevector <8 x float> %src256, <8 x float> undef, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
+
+ ; SSE: cost of 4 {{.*}} %V512 = shufflevector
+ ; AVX1: cost of 4 {{.*}} %V512 = shufflevector
+ ; AVX2: cost of 2 {{.*}} %V512 = shufflevector
+ ; AVX512: cost of 1 {{.*}} %V512 = shufflevector
+ %V512 = shufflevector <16 x float> %src512, <16 x float> undef, <16 x i32> <i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
+
+ ret void
+}
+
+; CHECK-LABEL: 'test_vXi32'
+define void @test_vXi32(<2 x i32> %src64, <4 x i32> %src128, <8 x i32> %src256, <16 x i32> %src512) {
+ ; SSE: cost of 1 {{.*}} %V64 = shufflevector
+ ; AVX: cost of 1 {{.*}} %V64 = shufflevector
+ ; AVX512: cost of 1 {{.*}} %V64 = shufflevector
+ %V64 = shufflevector <2 x i32> %src64, <2 x i32> undef, <2 x i32> <i32 1, i32 0>
+
+ ; SSE: cost of 1 {{.*}} %V128 = shufflevector
+ ; AVX: cost of 1 {{.*}} %V128 = shufflevector
+ ; AVX512: cost of 1 {{.*}} %V128 = shufflevector
+ %V128 = shufflevector <4 x i32> %src128, <4 x i32> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
+
+ ; SSE: cost of 2 {{.*}} %V256 = shufflevector
+ ; AVX1: cost of 2 {{.*}} %V256 = shufflevector
+ ; AVX2: cost of 1 {{.*}} %V256 = shufflevector
+ ; AVX512: cost of 1 {{.*}} %V256 = shufflevector
+ %V256 = shufflevector <8 x i32> %src256, <8 x i32> undef, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
+
+ ; SSE: cost of 4 {{.*}} %V512 = shufflevector
+ ; AVX1: cost of 4 {{.*}} %V512 = shufflevector
+ ; AVX2: cost of 2 {{.*}} %V512 = shufflevector
+ ; AVX512: cost of 1 {{.*}} %V512 = shufflevector
+ %V512 = shufflevector <16 x i32> %src512, <16 x i32> undef, <16 x i32> <i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
+
+ ret void
+}
+
+; CHECK-LABEL: 'test_vXi16'
+define void @test_vXi16(<8 x i16> %src128, <16 x i16> %src256, <32 x i16> %src512) {
+ ; SSE2: cost of 3 {{.*}} %V128 = shufflevector
+ ; SSSE3: cost of 1 {{.*}} %V128 = shufflevector
+ ; SSE42: cost of 1 {{.*}} %V128 = shufflevector
+ ; AVX: cost of 1 {{.*}} %V128 = shufflevector
+ ; AVX512: cost of 1 {{.*}} %V128 = shufflevector
+ %V128 = shufflevector <8 x i16> %src128, <8 x i16> undef, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
+
+ ; SSE2: cost of 6 {{.*}} %V256 = shufflevector
+ ; SSSE3: cost of 2 {{.*}} %V256 = shufflevector
+ ; SSE42: cost of 2 {{.*}} %V256 = shufflevector
+ ; AVX1: cost of 4 {{.*}} %V256 = shufflevector
+ ; AVX2: cost of 2 {{.*}} %V256 = shufflevector
+ ; AVX512F: cost of 2 {{.*}} %V256 = shufflevector
+ ; AVX512BW: cost of 1 {{.*}} %V256 = shufflevector
+ %V256 = shufflevector <16 x i16> %src256, <16 x i16> undef, <16 x i32> <i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
+
+ ; SSE2: cost of 12 {{.*}} %V512 = shufflevector
+ ; SSSE3: cost of 4 {{.*}} %V512 = shufflevector
+ ; SSE42: cost of 4 {{.*}} %V512 = shufflevector
+ ; AVX1: cost of 8 {{.*}} %V512 = shufflevector
+ ; AVX2: cost of 4 {{.*}} %V512 = shufflevector
+ ; AVX512F: cost of 4 {{.*}} %V512 = shufflevector
+ ; AVX512BW: cost of 1 {{.*}} %V512 = shufflevector
+ %V512 = shufflevector <32 x i16> %src512, <32 x i16> undef, <32 x i32> <i32 31, i32 30, i32 29, i32 28, i32 27, i32 26, i32 25, i32 24, i32 23, i32 22, i32 21, i32 20, i32 19, i32 18, i32 17, i32 16, i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
+
+ ret void
+}
+
+; CHECK-LABEL: 'test_vXi8'
+define void @test_vXi8(<16 x i8> %src128, <32 x i8> %src256, <64 x i8> %src512) {
+ ; SSE2: cost of 9 {{.*}} %V128 = shufflevector
+ ; SSSE3: cost of 1 {{.*}} %V128 = shufflevector
+ ; SSE42: cost of 1 {{.*}} %V128 = shufflevector
+ ; AVX: cost of 1 {{.*}} %V128 = shufflevector
+ ; AVX512: cost of 1 {{.*}} %V128 = shufflevector
+ %V128 = shufflevector <16 x i8> %src128, <16 x i8> undef, <16 x i32> <i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
+
+ ; SSE2: cost of 18 {{.*}} %V256 = shufflevector
+ ; SSSE3: cost of 2 {{.*}} %V256 = shufflevector
+ ; SSE42: cost of 2 {{.*}} %V256 = shufflevector
+ ; AVX1: cost of 4 {{.*}} %V256 = shufflevector
+ ; AVX2: cost of 2 {{.*}} %V256 = shufflevector
+ ; AVX512: cost of 2 {{.*}} %V256 = shufflevector
+ %V256 = shufflevector <32 x i8> %src256, <32 x i8> undef, <32 x i32> <i32 31, i32 30, i32 29, i32 28, i32 27, i32 26, i32 25, i32 24, i32 23, i32 22, i32 21, i32 20, i32 19, i32 18, i32 17, i32 16, i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
+
+ ; SSE2: cost of 36 {{.*}} %V512 = shufflevector
+ ; SSSE3: cost of 4 {{.*}} %V512 = shufflevector
+ ; SSE42: cost of 4 {{.*}} %V512 = shufflevector
+ ; AVX1: cost of 8 {{.*}} %V512 = shufflevector
+ ; AVX2: cost of 4 {{.*}} %V512 = shufflevector
+ ; AVX512F: cost of 4 {{.*}} %V512 = shufflevector
+ ; AVX512BW: cost of 6 {{.*}} %V512 = shufflevector
+ %V512 = shufflevector <64 x i8> %src512, <64 x i8> undef, <64 x i32> <i32 63, i32 62, i32 61, i32 60, i32 59, i32 58, i32 57, i32 56, i32 55, i32 54, i32 53, i32 52, i32 51, i32 50, i32 49, i32 48, i32 47, i32 46, i32 45, i32 44, i32 43, i32 42, i32 41, i32 40, i32 39, i32 38, i32 37, i32 36, i32 35, i32 34, i32 33, i32 32, i32 31, i32 30, i32 29, i32 28, i32 27, i32 26, i32 25, i32 24, i32 23, i32 22, i32 21, i32 20, i32 19, i32 18, i32 17, i32 16, i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
+
+ ret void
+}
diff --git a/test/Analysis/CostModel/X86/shuffle-single-src.ll b/test/Analysis/CostModel/X86/shuffle-single-src.ll
new file mode 100644
index 000000000000..a953ec17d80f
--- /dev/null
+++ b/test/Analysis/CostModel/X86/shuffle-single-src.ll
@@ -0,0 +1,94 @@
+; RUN: opt < %s -cost-model -analyze -mtriple=x86_64-unknown-linux-gnu -mcpu=skylake-avx512 | FileCheck %s --check-prefix=SKX
+
+;
+; Verify the cost model for 1 src shuffles
+;
+
+; SKX-LABEL: 'test_vXf64'
+define void @test_vXf64(<4 x double> %src256, <8 x double> %src512, <16 x double> %src1024) {
+ ; SKX: cost of 1 {{.*}} %V256 = shufflevector
+ %V256 = shufflevector <4 x double> %src256, <4 x double> undef, <4 x i32> <i32 3, i32 3, i32 1, i32 0>
+
+ ; SKX: cost of 1 {{.*}} %V512 = shufflevector
+ %V512 = shufflevector <8 x double> %src512, <8 x double> undef, <8 x i32> <i32 7, i32 6, i32 6, i32 4, i32 3, i32 2, i32 1, i32 0>
+
+ ; SKX: cost of 2 {{.*}} %V1024 = shufflevector
+ %V1024 = shufflevector <16 x double> %src1024, <16 x double> undef, <16 x i32> <i32 15, i32 14, i32 13, i32 13, i32 11, i32 10, i32 8, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
+
+ ret void
+}
+
+; SKX-LABEL: 'test_vXi64'
+define void @test_vXi64(<4 x i64> %src256, <8 x i64> %src512) {
+
+ ; SKX: cost of 1 {{.*}} %V256 = shufflevector
+ %V256 = shufflevector <4 x i64> %src256, <4 x i64> undef, <4 x i32> <i32 3, i32 3, i32 1, i32 0>
+
+ ; SKX: cost of 1 {{.*}} %V512 = shufflevector
+ %V512 = shufflevector <8 x i64> %src512, <8 x i64> undef, <8 x i32> <i32 7, i32 6, i32 6, i32 4, i32 3, i32 2, i32 1, i32 0>
+
+ ret void
+}
+
+; CHECK-LABEL: 'test_vXf32'
+define void @test_vXf32(<4 x float> %src128, <8 x float> %src256, <16 x float> %src512) {
+
+ ; SKX: cost of 1 {{.*}} %V128 = shufflevector
+ %V128 = shufflevector <4 x float> %src128, <4 x float> undef, <4 x i32> <i32 3, i32 3, i32 1, i32 0>
+
+ ; SKX: cost of 1 {{.*}} %V256 = shufflevector
+ %V256 = shufflevector <8 x float> %src256, <8 x float> undef, <8 x i32> <i32 7, i32 6, i32 6, i32 4, i32 3, i32 2, i32 1, i32 0>
+
+ ; SKX: cost of 1 {{.*}} %V512 = shufflevector
+ %V512 = shufflevector <16 x float> %src512, <16 x float> undef, <16 x i32> <i32 15, i32 14, i32 13, i32 13, i32 11, i32 10, i32 8, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
+
+ ret void
+}
+
+; CHECK-LABEL: 'test_vXi32'
+define void @test_vXi32(<4 x i32> %src128, <8 x i32> %src256, <16 x i32> %src512, <32 x i32> %src1024) {
+
+ ; SKX: cost of 1 {{.*}} %V128 = shufflevector
+ %V128 = shufflevector <4 x i32> %src128, <4 x i32> undef, <4 x i32> <i32 3, i32 3, i32 1, i32 0>
+
+ ; SKX: cost of 1 {{.*}} %V256 = shufflevector
+ %V256 = shufflevector <8 x i32> %src256, <8 x i32> undef, <8 x i32> <i32 7, i32 6, i32 5, i32 5, i32 3, i32 2, i32 1, i32 0>
+
+ ; SKX: cost of 1 {{.*}} %V512 = shufflevector
+ %V512 = shufflevector <16 x i32> %src512, <16 x i32> undef, <16 x i32> <i32 15, i32 14, i32 13, i32 12, i32 13, i32 10, i32 9, i32 8, i32 8, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
+
+ ; SKX: cost of 2 {{.*}} %V1024 = shufflevector
+ %V1024 = shufflevector <32 x i32> %src1024, <32 x i32> undef, <32 x i32> <i32 31, i32 30, i32 20, i32 28, i32 27, i32 26, i32 25, i32 24, i32 23, i32 22, i32 21, i32 20, i32 19, i32 18, i32 17, i32 16, i32 15, i32 14, i32 13, i32 12, i32 11, i32 11, i32 9, i32 8, i32 7, i32 11, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
+ ret void
+}
+
+; CHECK-LABEL: 'test_vXi16'
+define void @test_vXi16(<8 x i16> %src128, <16 x i16> %src256, <32 x i16> %src512, <64 x i16> %src1024) {
+
+ ; SKX: cost of 1 {{.*}} %V128 = shufflevector
+ %V128 = shufflevector <8 x i16> %src128, <8 x i16> undef, <8 x i32> <i32 7, i32 6, i32 6, i32 4, i32 3, i32 2, i32 1, i32 0>
+
+ ; SKX: cost of 1 {{.*}} %V256 = shufflevector
+ %V256 = shufflevector <16 x i16> %src256, <16 x i16> undef, <16 x i32> <i32 15, i32 14, i32 13, i32 13, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
+
+ ; SKX: cost of 1 {{.*}} %V512 = shufflevector
+ %V512 = shufflevector <32 x i16> %src512, <32 x i16> undef, <32 x i32> <i32 31, i32 30, i32 20, i32 28, i32 27, i32 26, i32 25, i32 24, i32 23, i32 22, i32 21, i32 20, i32 19, i32 18, i32 17, i32 16, i32 15, i32 14, i32 13, i32 12, i32 11, i32 11, i32 9, i32 8, i32 7, i32 11, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
+
+ ; SKX: cost of 2 {{.*}} %V1024 = shufflevector
+ %V1024 = shufflevector <64 x i16> %src1024, <64 x i16> undef, <64 x i32> <i32 63, i32 62, i32 61, i32 60, i32 59, i32 58, i32 57, i32 56, i32 55, i32 54, i32 53, i32 52, i32 51, i32 50, i32 49, i32 48, i32 47, i32 46, i32 45, i32 44, i32 43, i32 42, i32 41, i32 40, i32 39, i32 38, i32 37, i32 36, i32 35, i32 34, i32 33, i32 32, i32 31, i32 30, i32 29, i32 28, i32 27, i32 26, i32 25, i32 24, i32 23, i32 20, i32 21, i32 20, i32 19, i32 18, i32 17, i32 16, i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
+ ret void
+}
+
+; CHECK-LABEL: 'test_vXi8'
+define void @test_vXi8(<16 x i8> %src128, <32 x i8> %src256, <64 x i8> %src512) {
+ ; SKX: cost of 1 {{.*}} %V128 = shufflevector
+ %V128 = shufflevector <16 x i8> %src128, <16 x i8> undef, <16 x i32> <i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 11, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
+
+ ; SKX: cost of 3 {{.*}} %V256 = shufflevector
+ %V256 = shufflevector <32 x i8> %src256, <32 x i8> undef, <32 x i32> <i32 31, i32 30, i32 29, i32 28, i32 27, i32 26, i32 25, i32 24, i32 23, i32 22, i32 21, i32 20, i32 19, i32 18, i32 17, i32 16, i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 8, i32 8, i32 7, i32 6, i32 8, i32 4, i32 3, i32 2, i32 1, i32 0>
+
+ ; SKX: cost of 8 {{.*}} %V512 = shufflevector
+ %V512 = shufflevector <64 x i8> %src512, <64 x i8> undef, <64 x i32> <i32 63, i32 62, i32 61, i32 60, i32 59, i32 58, i32 57, i32 56, i32 55, i32 54, i32 53, i32 52, i32 51, i32 50, i32 49, i32 48, i32 47, i32 46, i32 45, i32 44, i32 43, i32 42, i32 41, i32 40, i32 39, i32 38, i32 37, i32 36, i32 35, i32 34, i32 33, i32 32, i32 31, i32 30, i32 29, i32 28, i32 27, i32 26, i32 25, i32 24, i32 23, i32 20, i32 21, i32 20, i32 19, i32 18, i32 17, i32 16, i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
+
+ ret void
+}
diff --git a/test/Analysis/CostModel/X86/shuffle-two-src.ll b/test/Analysis/CostModel/X86/shuffle-two-src.ll
new file mode 100644
index 000000000000..de79a82e66ae
--- /dev/null
+++ b/test/Analysis/CostModel/X86/shuffle-two-src.ll
@@ -0,0 +1,68 @@
+; RUN: opt < %s -cost-model -analyze -mtriple=x86_64-unknown-linux-gnu -mcpu=skylake-avx512 | FileCheck %s --check-prefix=SKX
+
+;
+; Verify the cost model for 2 src shuffles
+;
+
+; SKX-LABEL: 'test_vXf64'
+define void @test_vXf64(<4 x double> %src256, <8 x double> %src512, <16 x double> %src1024, <4 x double> %src256_1, <8 x double> %src512_1, <16 x double> %src1024_1) {
+ ; SKX: cost of 1 {{.*}} %V256 = shufflevector
+ %V256 = shufflevector <4 x double> %src256, <4 x double> %src256_1, <4 x i32> <i32 3, i32 3, i32 7, i32 6>
+
+ ; SKX: cost of 1 {{.*}} %V512 = shufflevector
+ %V512 = shufflevector <8 x double> %src512, <8 x double> %src512_1, <8 x i32> <i32 7, i32 6, i32 12, i32 4, i32 3, i32 2, i32 1, i32 15>
+
+ ; SKX: cost of 6 {{.*}} %V1024 = shufflevector
+ %V1024 = shufflevector <16 x double> %src1024, <16 x double> %src1024_1, <16 x i32> <i32 30, i32 14, i32 13, i32 12, i32 13, i32 10, i32 18, i32 8, i32 8, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
+
+ ret void
+}
+
+; CHECK-LABEL: 'test_vXf32'
+define void @test_vXf32(<4 x float> %src128, <8 x float> %src256, <16 x float> %src512, <32 x float> %src1024, <4 x float> %src128_1, <8 x float> %src256_1, <16 x float> %src512_1, <32 x float> %src1024_1) {
+
+ ; SKX: cost of 1 {{.*}} %V128 = shufflevector
+ %V128 = shufflevector <4 x float> %src128, <4 x float> %src128_1, <4 x i32> <i32 3, i32 6, i32 1, i32 5>
+
+ ; SKX: cost of 1 {{.*}} %V256 = shufflevector
+ %V256 = shufflevector <8 x float> %src256, <8 x float> %src256_1, <8 x i32> <i32 7, i32 6, i32 8, i32 4, i32 3, i32 2, i32 12, i32 0>
+
+ ; SKX: cost of 1 {{.*}} %V512 = shufflevector
+ %V512 = shufflevector <16 x float> %src512, <16 x float> %src512_1, <16 x i32> <i32 15, i32 17, i32 13, i32 20, i32 11, i32 10, i32 8, i32 8, i32 7, i32 22, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
+
+ ; SKX: cost of 6 {{.*}} %V1024 = shufflevector
+ %V1024 = shufflevector <32 x float> %src1024, <32 x float> %src1024_1, <32 x i32> <i32 31, i32 33, i32 20, i32 28, i32 27, i32 26, i32 25, i32 24, i32 23, i32 22, i32 21, i32 20, i32 19, i32 18, i32 17, i32 16, i32 15, i32 48, i32 13, i32 12, i32 11, i32 11, i32 9, i32 45, i32 7, i32 11, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
+
+ ret void
+}
+
+; CHECK-LABEL: 'test_vXi16'
+define void @test_vXi16(<8 x i16> %src128, <16 x i16> %src256, <32 x i16> %src512, <64 x i16> %src1024, <8 x i16> %src128_1, <16 x i16> %src256_1, <32 x i16> %src512_1, <64 x i16> %src1024_1) {
+
+ ; SKX: cost of 1 {{.*}} %V128 = shufflevector
+ %V128 = shufflevector <8 x i16> %src128, <8 x i16> %src128_1, <8 x i32> <i32 7, i32 6, i32 6, i32 8, i32 9, i32 2, i32 1, i32 0>
+
+ ; SKX: cost of 1 {{.*}} %V256 = shufflevector
+ %V256 = shufflevector <16 x i16> %src256, <16 x i16> %src256_1, <16 x i32> <i32 15, i32 14, i32 13, i32 20, i32 21, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
+
+ ; SKX: cost of 1 {{.*}} %V512 = shufflevector
+ %V512 = shufflevector <32 x i16> %src512, <32 x i16> %src512_1, <32 x i32> <i32 31, i32 30, i32 45, i32 28, i32 27, i32 26, i32 25, i32 24, i32 23, i32 22, i32 21, i32 20, i32 19, i32 18, i32 17, i32 16, i32 15, i32 14, i32 13, i32 38, i32 11, i32 11, i32 9, i32 8, i32 7, i32 11, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
+
+ ; SKX: cost of 6 {{.*}} %V1024 = shufflevector
+ %V1024 = shufflevector <64 x i16> %src1024, <64 x i16> %src1024_1, <64 x i32> <i32 63, i32 62, i32 71, i32 60, i32 59, i32 58, i32 57, i32 56, i32 55, i32 54, i32 53, i32 52, i32 51, i32 50, i32 49, i32 48, i32 47, i32 46, i32 45, i32 44, i32 43, i32 42, i32 41, i32 40, i32 39, i32 38, i32 37, i32 36, i32 35, i32 34, i32 33, i32 32, i32 31, i32 30, i32 29, i32 28, i32 27, i32 26, i32 25, i32 24, i32 23, i32 20, i32 21, i32 20, i32 19, i32 18, i32 17, i32 16, i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 66, i32 2, i32 1, i32 0>
+ ret void
+}
+
+; CHECK-LABEL: 'test_vXi8'
+define void @test_vXi8(<16 x i8> %src128, <32 x i8> %src256, <64 x i8> %src512, <16 x i8> %src128_1, <32 x i8> %src256_1, <64 x i8> %src512_1) {
+ ; SKX: cost of 3 {{.*}} %V128 = shufflevector
+ %V128 = shufflevector <16 x i8> %src128, <16 x i8> %src128_1, <16 x i32> <i32 29, i32 14, i32 28, i32 12, i32 11, i32 10, i32 11, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
+
+ ; SKX: cost of 3 {{.*}} %V256 = shufflevector
+ %V256 = shufflevector <32 x i8> %src256, <32 x i8> %src256_1, <32 x i32> <i32 31, i32 30, i32 45, i32 28, i32 27, i32 26, i32 25, i32 24, i32 23, i32 22, i32 21, i32 20, i32 19, i32 18, i32 17, i32 16, i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 8, i32 8, i32 7, i32 6, i32 8, i32 4, i32 3, i32 2, i32 1, i32 0>
+
+ ; SKX: cost of 19 {{.*}} %V512 = shufflevector
+ %V512 = shufflevector <64 x i8> %src512, <64 x i8> %src512_1, <64 x i32> <i32 63, i32 100, i32 61, i32 96, i32 59, i32 58, i32 57, i32 56, i32 55, i32 54, i32 53, i32 52, i32 51, i32 50, i32 49, i32 48, i32 47, i32 46, i32 45, i32 44, i32 43, i32 42, i32 41, i32 40, i32 39, i32 38, i32 37, i32 36, i32 35, i32 34, i32 33, i32 32, i32 31, i32 30, i32 29, i32 28, i32 27, i32 26, i32 25, i32 24, i32 23, i32 20, i32 21, i32 20, i32 19, i32 18, i32 17, i32 16, i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
+
+ ret void
+}
diff --git a/test/Analysis/CostModel/X86/sitofp.ll b/test/Analysis/CostModel/X86/sitofp.ll
index fb390a2b17aa..a30cb5f7e823 100644
--- a/test/Analysis/CostModel/X86/sitofp.ll
+++ b/test/Analysis/CostModel/X86/sitofp.ll
@@ -1,678 +1,250 @@
; RUN: opt -mtriple=x86_64-apple-darwin -mattr=+sse2 -cost-model -analyze < %s | FileCheck --check-prefix=SSE --check-prefix=SSE2 %s
; RUN: opt -mtriple=x86_64-apple-darwin -mattr=+avx -cost-model -analyze < %s | FileCheck --check-prefix=AVX --check-prefix=AVX1 %s
; RUN: opt -mtriple=x86_64-apple-darwin -mattr=+avx2 -cost-model -analyze < %s | FileCheck --check-prefix=AVX --check-prefix=AVX2 %s
-; RUN: opt -mtriple=x86_64-apple-darwin -mattr=+avx512f -cost-model -analyze < %s | FileCheck --check-prefix=AVX512F %s
-
-define <2 x double> @sitofpv2i8v2double(<2 x i8> %a) {
- ; SSE2-LABEL: sitofpv2i8v2double
- ; SSE2: cost of 20 {{.*}} sitofp
- ;
- ; AVX1-LABEL: sitofpv2i8v2double
- ; AVX1: cost of 4 {{.*}} sitofp
- ;
- ; AVX2-LABEL: sitofpv2i8v2double
- ; AVX2: cost of 4 {{.*}} sitofp
- ;
- ; AVX512F-LABEL: sitofpv2i8v2double
- ; AVX512F: cost of 4 {{.*}} sitofp
- %1 = sitofp <2 x i8> %a to <2 x double>
- ret <2 x double> %1
-}
-
-define <4 x double> @sitofpv4i8v4double(<4 x i8> %a) {
- ; SSE2-LABEL: sitofpv4i8v4double
- ; SSE2: cost of 40 {{.*}} sitofp
- ;
- ; AVX1-LABEL: sitofpv4i8v4double
- ; AVX1: cost of 3 {{.*}} sitofp
- ;
- ; AVX2-LABEL: sitofpv4i8v4double
- ; AVX2: cost of 3 {{.*}} sitofp
- ;
- ; AVX512F-LABEL: sitofpv4i8v4double
- ; AVX512F: cost of 3 {{.*}} sitofp
- %1 = sitofp <4 x i8> %a to <4 x double>
- ret <4 x double> %1
-}
-
-define <8 x double> @sitofpv8i8v8double(<8 x i8> %a) {
- ; SSE2-LABEL: sitofpv8i8v8double
- ; SSE2: cost of 80 {{.*}} sitofp
- ;
- ; AVX1-LABEL: sitofpv8i8v8double
- ; AVX1: cost of 7 {{.*}} sitofp
- ;
- ; AVX2-LABEL: sitofpv8i8v8double
- ; AVX2: cost of 7 {{.*}} sitofp
- ;
- ; AVX512F-LABEL: sitofpv8i8v8double
- ; AVX512F: cost of 2 {{.*}} sitofp
- %1 = sitofp <8 x i8> %a to <8 x double>
- ret <8 x double> %1
-}
-
-define <16 x double> @sitofpv16i8v16double(<16 x i8> %a) {
- ; SSE2-LABEL: sitofpv16i8v16double
- ; SSE2: cost of 160 {{.*}} sitofp
- ;
- ; AVX1-LABEL: sitofpv16i8v16double
- ; AVX1: cost of 15 {{.*}} sitofp
- ;
- ; AVX2-LABEL: sitofpv16i8v16double
- ; AVX2: cost of 15 {{.*}} sitofp
- ;
- ; AVX512F-LABEL: sitofpv16i8v16double
- ; AVX512F: cost of 5 {{.*}} sitofp
- %1 = sitofp <16 x i8> %a to <16 x double>
- ret <16 x double> %1
-}
-
-define <32 x double> @sitofpv32i8v32double(<32 x i8> %a) {
- ; SSE2-LABEL: sitofpv32i8v32double
- ; SSE2: cost of 320 {{.*}} sitofp
- ;
- ; AVX1-LABEL: sitofpv32i8v32double
- ; AVX1: cost of 31 {{.*}} sitofp
- ;
- ; AVX2-LABEL: sitofpv32i8v32double
- ; AVX2: cost of 31 {{.*}} sitofp
- ;
- ; AVX512F-LABEL: sitofpv32i8v32double
- ; AVX512F: cost of 11 {{.*}} sitofp
- %1 = sitofp <32 x i8> %a to <32 x double>
- ret <32 x double> %1
-}
-
-define <2 x double> @sitofpv2i16v2double(<2 x i16> %a) {
- ; SSE2-LABEL: sitofpv2i16v2double
- ; SSE2: cost of 20 {{.*}} sitofp
- ;
- ; AVX1-LABEL: sitofpv2i16v2double
- ; AVX1: cost of 4 {{.*}} sitofp
- ;
- ; AVX2-LABEL: sitofpv2i16v2double
- ; AVX2: cost of 4 {{.*}} sitofp
- ;
- ; AVX512F-LABEL: sitofpv2i16v2double
- ; AVX512F: cost of 4 {{.*}} sitofp
- %1 = sitofp <2 x i16> %a to <2 x double>
- ret <2 x double> %1
-}
-
-define <4 x double> @sitofpv4i16v4double(<4 x i16> %a) {
- ; SSE2-LABEL: sitofpv4i16v4double
- ; SSE2: cost of 40 {{.*}} sitofp
- ;
- ; AVX1-LABEL: sitofpv4i16v4double
- ; AVX1: cost of 3 {{.*}} sitofp
- ;
- ; AVX2-LABEL: sitofpv4i16v4double
- ; AVX2: cost of 3 {{.*}} sitofp
- ;
- ; AVX512F-LABEL: sitofpv4i16v4double
- ; AVX512F: cost of 3 {{.*}} sitofp
- %1 = sitofp <4 x i16> %a to <4 x double>
- ret <4 x double> %1
-}
-
-define <8 x double> @sitofpv8i16v8double(<8 x i16> %a) {
- ; SSE2-LABEL: sitofpv8i16v8double
- ; SSE2: cost of 80 {{.*}} sitofp
- ;
- ; AVX1-LABEL: sitofpv8i16v8double
- ; AVX1: cost of 7 {{.*}} sitofp
- ;
- ; AVX2-LABEL: sitofpv8i16v8double
- ; AVX2: cost of 7 {{.*}} sitofp
- ;
- ; AVX512F-LABEL: sitofpv8i16v8double
- ; AVX512F: cost of 2 {{.*}} sitofp
- %1 = sitofp <8 x i16> %a to <8 x double>
- ret <8 x double> %1
-}
-
-define <16 x double> @sitofpv16i16v16double(<16 x i16> %a) {
- ; SSE2-LABEL: sitofpv16i16v16double
- ; SSE2: cost of 160 {{.*}} sitofp
- ;
- ; AVX1-LABEL: sitofpv16i16v16double
- ; AVX1: cost of 15 {{.*}} sitofp
- ;
- ; AVX2-LABEL: sitofpv16i16v16double
- ; AVX2: cost of 15 {{.*}} sitofp
- ;
- ; AVX512F-LABEL: sitofpv16i16v16double
- ; AVX512F: cost of 5 {{.*}} sitofp
- %1 = sitofp <16 x i16> %a to <16 x double>
- ret <16 x double> %1
-}
-
-define <32 x double> @sitofpv32i16v32double(<32 x i16> %a) {
- ; SSE2-LABEL: sitofpv32i16v32double
- ; SSE2: cost of 320 {{.*}} sitofp
- ;
- ; AVX1-LABEL: sitofpv32i16v32double
- ; AVX1: cost of 31 {{.*}} sitofp
- ;
- ; AVX2-LABEL: sitofpv32i16v32double
- ; AVX2: cost of 31 {{.*}} sitofp
- ;
- ; AVX512F-LABEL: sitofpv32i16v32double
- ; AVX512F: cost of 11 {{.*}} sitofp
- %1 = sitofp <32 x i16> %a to <32 x double>
- ret <32 x double> %1
-}
-
-define <2 x double> @sitofpv2i32v2double(<2 x i32> %a) {
- ; SSE2-LABEL: sitofpv2i32v2double
- ; SSE2: cost of 20 {{.*}} sitofp
- ;
- ; AVX1-LABEL: sitofpv2i32v2double
- ; AVX1: cost of 4 {{.*}} sitofp
- ;
- ; AVX2-LABEL: sitofpv2i32v2double
- ; AVX2: cost of 4 {{.*}} sitofp
- ;
- ; AVX512F-LABEL: sitofpv2i32v2double
- ; AVX512F: cost of 4 {{.*}} sitofp
- %1 = sitofp <2 x i32> %a to <2 x double>
- ret <2 x double> %1
-}
-
-define <4 x double> @sitofpv4i32v4double(<4 x i32> %a) {
- ; SSE2-LABEL: sitofpv4i32v4double
- ; SSE2: cost of 40 {{.*}} sitofp
- ;
- ; AVX1-LABEL: sitofpv4i32v4double
- ; AVX1: cost of 1 {{.*}} sitofp
- ;
- ; AVX2-LABEL: sitofpv4i32v4double
- ; AVX2: cost of 1 {{.*}} sitofp
- ;
- ; AVX512F-LABEL: sitofpv4i32v4double
- ; AVX512F: cost of 1 {{.*}} sitofp
- %1 = sitofp <4 x i32> %a to <4 x double>
- ret <4 x double> %1
-}
-
-define <8 x double> @sitofpv8i32v8double(<8 x i32> %a) {
- ; SSE2-LABEL: sitofpv8i32v8double
- ; SSE2: cost of 80 {{.*}} sitofp
- ;
- ; AVX1-LABEL: sitofpv8i32v8double
- ; AVX1: cost of 3 {{.*}} sitofp
- ;
- ; AVX2-LABEL: sitofpv8i32v8double
- ; AVX2: cost of 3 {{.*}} sitofp
- ;
- ; AVX512F-LABEL: sitofpv8i32v8double
- ; AVX512F: cost of 1 {{.*}} sitofp
- %1 = sitofp <8 x i32> %a to <8 x double>
- ret <8 x double> %1
-}
-
-define <16 x double> @sitofpv16i32v16double(<16 x i32> %a) {
- ; SSE2-LABEL: sitofpv16i32v16double
- ; SSE2: cost of 160 {{.*}} sitofp
- ;
- ; AVX1-LABEL: sitofpv16i32v16double
- ; AVX1: cost of 7 {{.*}} sitofp
- ;
- ; AVX2-LABEL: sitofpv16i32v16double
- ; AVX2: cost of 7 {{.*}} sitofp
- ;
- ; AVX512F-LABEL: sitofpv16i32v16double
- ; AVX512F: cost of 3 {{.*}} sitofp
- %1 = sitofp <16 x i32> %a to <16 x double>
- ret <16 x double> %1
-}
-
-define <32 x double> @sitofpv32i32v32double(<32 x i32> %a) {
- ; SSE2-LABEL: sitofpv32i32v32double
- ; SSE2: cost of 320 {{.*}} sitofp
- ;
- ; AVX1-LABEL: sitofpv32i32v32double
- ; AVX1: cost of 15 {{.*}} sitofp
- ;
- ; AVX2-LABEL: sitofpv32i32v32double
- ; AVX2: cost of 15 {{.*}} sitofp
- ;
- ; AVX512F-LABEL: sitofpv32i32v32double
- ; AVX512F: cost of 7 {{.*}} sitofp
- %1 = sitofp <32 x i32> %a to <32 x double>
- ret <32 x double> %1
-}
-
-define <2 x double> @sitofpv2i64v2double(<2 x i64> %a) {
- ; SSE2-LABEL: sitofpv2i64v2double
- ; SSE2: cost of 20 {{.*}} sitofp
- ;
- ; AVX1-LABEL: sitofpv2i64v2double
- ; AVX1: cost of 20 {{.*}} sitofp
- ;
- ; AVX2-LABEL: sitofpv2i64v2double
- ; AVX2: cost of 20 {{.*}} sitofp
- ;
- ; AVX512F-LABEL: sitofpv2i64v2double
- ; AVX512F: cost of 20 {{.*}} sitofp
- %1 = sitofp <2 x i64> %a to <2 x double>
- ret <2 x double> %1
-}
-
-define <4 x double> @sitofpv4i64v4double(<4 x i64> %a) {
- ; SSE2-LABEL: sitofpv4i64v4double
- ; SSE2: cost of 40 {{.*}} sitofp
- ;
- ; AVX1-LABEL: sitofpv4i64v4double
- ; AVX1: cost of 13 {{.*}} sitofp
- ;
- ; AVX2-LABEL: sitofpv4i64v4double
- ; AVX2: cost of 13 {{.*}} sitofp
- ;
- ; AVX512F-LABEL: sitofpv4i64v4double
- ; AVX512F: cost of 13 {{.*}} sitofp
- %1 = sitofp <4 x i64> %a to <4 x double>
- ret <4 x double> %1
-}
-
-define <8 x double> @sitofpv8i64v8double(<8 x i64> %a) {
- ; SSE2-LABEL: sitofpv8i64v8double
- ; SSE2: cost of 80 {{.*}} sitofp
- ;
- ; AVX1-LABEL: sitofpv8i64v8double
- ; AVX1: cost of 27 {{.*}} sitofp
- ;
- ; AVX2-LABEL: sitofpv8i64v8double
- ; AVX2: cost of 27 {{.*}} sitofp
- ;
- ; AVX512F-LABEL: sitofpv8i64v8double
- ; AVX512F: cost of 22 {{.*}} sitofp
- %1 = sitofp <8 x i64> %a to <8 x double>
- ret <8 x double> %1
-}
-
-define <16 x double> @sitofpv16i64v16double(<16 x i64> %a) {
- ; SSE2-LABEL: sitofpv16i64v16double
- ; SSE2: cost of 160 {{.*}} sitofp
- ;
- ; AVX1-LABEL: sitofpv16i64v16double
- ; AVX1: cost of 55 {{.*}} sitofp
- ;
- ; AVX2-LABEL: sitofpv16i64v16double
- ; AVX2: cost of 55 {{.*}} sitofp
- ;
- ; AVX512F-LABEL: sitofpv16i64v16double
- ; AVX512F: cost of 45 {{.*}} sitofp
- %1 = sitofp <16 x i64> %a to <16 x double>
- ret <16 x double> %1
-}
-
-define <32 x double> @sitofpv32i64v32double(<32 x i64> %a) {
- ; SSE2-LABEL: sitofpv32i64v32double
- ; SSE2: cost of 320 {{.*}} sitofp
- ;
- ; AVX1-LABEL: sitofpv32i64v32double
- ; AVX1: cost of 111 {{.*}} sitofp
- ;
- ; AVX2-LABEL: sitofpv32i64v32double
- ; AVX2: cost of 111 {{.*}} sitofp
- ;
- ; AVX512F-LABEL: sitofpv32i64v32double
- ; AVX512F: cost of 91 {{.*}} sitofp
- %1 = sitofp <32 x i64> %a to <32 x double>
- ret <32 x double> %1
-}
-
-define <2 x float> @sitofpv2i8v2float(<2 x i8> %a) {
- ; SSE2-LABEL: sitofpv2i8v2float
- ; SSE2: cost of 15 {{.*}} sitofp
- ;
- ; AVX1-LABEL: sitofpv2i8v2float
- ; AVX1: cost of 4 {{.*}} sitofp
- ;
- ; AVX2-LABEL: sitofpv2i8v2float
- ; AVX2: cost of 4 {{.*}} sitofp
- ;
- ; AVX512F-LABEL: sitofpv2i8v2float
- ; AVX512F: cost of 4 {{.*}} sitofp
- %1 = sitofp <2 x i8> %a to <2 x float>
- ret <2 x float> %1
-}
-
-define <4 x float> @sitofpv4i8v4float(<4 x i8> %a) {
- ; SSE2-LABEL: sitofpv4i8v4float
- ; SSE2: cost of 5 {{.*}} sitofp
- ;
- ; AVX1-LABEL: sitofpv4i8v4float
- ; AVX1: cost of 3 {{.*}} sitofp
- ;
- ; AVX2-LABEL: sitofpv4i8v4float
- ; AVX2: cost of 3 {{.*}} sitofp
- ;
- ; AVX512F-LABEL: sitofpv4i8v4float
- ; AVX512F: cost of 3 {{.*}} sitofp
- %1 = sitofp <4 x i8> %a to <4 x float>
- ret <4 x float> %1
-}
-
-define <8 x float> @sitofpv8i8v8float(<8 x i8> %a) {
- ; SSE2-LABEL: sitofpv8i8v8float
- ; SSE2: cost of 15 {{.*}} sitofp
- ;
- ; AVX1-LABEL: sitofpv8i8v8float
- ; AVX1: cost of 8 {{.*}} sitofp
- ;
- ; AVX2-LABEL: sitofpv8i8v8float
- ; AVX2: cost of 8 {{.*}} sitofp
- ;
- ; AVX512F-LABEL: sitofpv8i8v8float
- ; AVX512F: cost of 8 {{.*}} sitofp
- %1 = sitofp <8 x i8> %a to <8 x float>
- ret <8 x float> %1
-}
-
-define <16 x float> @sitofpv16i8v16float(<16 x i8> %a) {
- ; SSE2-LABEL: sitofpv16i8v16float
- ; SSE2: cost of 8 {{.*}} sitofp
- ;
- ; AVX1-LABEL: sitofpv16i8v16float
- ; AVX1: cost of 17 {{.*}} sitofp
- ;
- ; AVX2-LABEL: sitofpv16i8v16float
- ; AVX2: cost of 17 {{.*}} sitofp
- ;
- ; AVX512F-LABEL: sitofpv16i8v16float
- ; AVX512F: cost of 2 {{.*}} sitofp
- %1 = sitofp <16 x i8> %a to <16 x float>
- ret <16 x float> %1
-}
-
-define <32 x float> @sitofpv32i8v32float(<32 x i8> %a) {
- ; SSE2-LABEL: sitofpv32i8v32float
- ; SSE2: cost of 16 {{.*}} sitofp
- ;
- ; AVX1-LABEL: sitofpv32i8v32float
- ; AVX1: cost of 35 {{.*}} sitofp
- ;
- ; AVX2-LABEL: sitofpv32i8v32float
- ; AVX2: cost of 35 {{.*}} sitofp
- ;
- ; AVX512F-LABEL: sitofpv32i8v32float
- ; AVX512F: cost of 5 {{.*}} sitofp
- %1 = sitofp <32 x i8> %a to <32 x float>
- ret <32 x float> %1
-}
-
-define <2 x float> @sitofpv2i16v2float(<2 x i16> %a) {
- ; SSE2-LABEL: sitofpv2i16v2float
- ; SSE2: cost of 15 {{.*}} sitofp
- ;
- ; AVX1-LABEL: sitofpv2i16v2float
- ; AVX1: cost of 4 {{.*}} sitofp
- ;
- ; AVX2-LABEL: sitofpv2i16v2float
- ; AVX2: cost of 4 {{.*}} sitofp
- ;
- ; AVX512F-LABEL: sitofpv2i16v2float
- ; AVX512F: cost of 4 {{.*}} sitofp
- %1 = sitofp <2 x i16> %a to <2 x float>
- ret <2 x float> %1
-}
-
-define <4 x float> @sitofpv4i16v4float(<4 x i16> %a) {
- ; SSE2-LABEL: sitofpv4i16v4float
- ; SSE2: cost of 5 {{.*}} sitofp
- ;
- ; AVX1-LABEL: sitofpv4i16v4float
- ; AVX1: cost of 3 {{.*}} sitofp
- ;
- ; AVX2-LABEL: sitofpv4i16v4float
- ; AVX2: cost of 3 {{.*}} sitofp
- ;
- ; AVX512F-LABEL: sitofpv4i16v4float
- ; AVX512F: cost of 3 {{.*}} sitofp
- %1 = sitofp <4 x i16> %a to <4 x float>
- ret <4 x float> %1
-}
-
-define <8 x float> @sitofpv8i16v8float(<8 x i16> %a) {
- ; SSE2-LABEL: sitofpv8i16v8float
- ; SSE2: cost of 15 {{.*}} sitofp
- ;
- ; AVX1-LABEL: sitofpv8i16v8float
- ; AVX1: cost of 5 {{.*}} sitofp
- ;
- ; AVX2-LABEL: sitofpv8i16v8float
- ; AVX2: cost of 5 {{.*}} sitofp
- ;
- ; AVX512F-LABEL: sitofpv8i16v8float
- ; AVX512F: cost of 5 {{.*}} sitofp
- %1 = sitofp <8 x i16> %a to <8 x float>
- ret <8 x float> %1
-}
-
-define <16 x float> @sitofpv16i16v16float(<16 x i16> %a) {
- ; SSE2-LABEL: sitofpv16i16v16float
- ; SSE2: cost of 30 {{.*}} sitofp
- ;
- ; AVX1-LABEL: sitofpv16i16v16float
- ; AVX1: cost of 11 {{.*}} sitofp
- ;
- ; AVX2-LABEL: sitofpv16i16v16float
- ; AVX2: cost of 11 {{.*}} sitofp
- ;
- ; AVX512F-LABEL: sitofpv16i16v16float
- ; AVX512F: cost of 2 {{.*}} sitofp
- %1 = sitofp <16 x i16> %a to <16 x float>
- ret <16 x float> %1
-}
-
-define <32 x float> @sitofpv32i16v32float(<32 x i16> %a) {
- ; SSE2-LABEL: sitofpv32i16v32float
- ; SSE2: cost of 60 {{.*}} sitofp
- ;
- ; AVX1-LABEL: sitofpv32i16v32float
- ; AVX1: cost of 23 {{.*}} sitofp
- ;
- ; AVX2-LABEL: sitofpv32i16v32float
- ; AVX2: cost of 23 {{.*}} sitofp
- ;
- ; AVX512F-LABEL: sitofpv32i16v32float
- ; AVX512F: cost of 5 {{.*}} sitofp
- %1 = sitofp <32 x i16> %a to <32 x float>
- ret <32 x float> %1
-}
-
-define <2 x float> @sitofpv2i32v2float(<2 x i32> %a) {
- ; SSE2-LABEL: sitofpv2i32v2float
- ; SSE2: cost of 15 {{.*}} sitofp
- ;
- ; AVX1-LABEL: sitofpv2i32v2float
- ; AVX1: cost of 4 {{.*}} sitofp
- ;
- ; AVX2-LABEL: sitofpv2i32v2float
- ; AVX2: cost of 4 {{.*}} sitofp
- ;
- ; AVX512F-LABEL: sitofpv2i32v2float
- ; AVX512F: cost of 4 {{.*}} sitofp
- %1 = sitofp <2 x i32> %a to <2 x float>
- ret <2 x float> %1
-}
-
-define <4 x float> @sitofpv4i32v4float(<4 x i32> %a) {
- ; SSE2-LABEL: sitofpv4i32v4float
- ; SSE2: cost of 5 {{.*}} sitofp
- ;
- ; AVX1-LABEL: sitofpv4i32v4float
- ; AVX1: cost of 1 {{.*}} sitofp
- ;
- ; AVX2-LABEL: sitofpv4i32v4float
- ; AVX2: cost of 1 {{.*}} sitofp
- ;
- ; AVX512F-LABEL: sitofpv4i32v4float
- ; AVX512F: cost of 1 {{.*}} sitofp
- %1 = sitofp <4 x i32> %a to <4 x float>
- ret <4 x float> %1
-}
-
-define <8 x float> @sitofpv8i32v8float(<8 x i32> %a) {
- ; SSE2-LABEL: sitofpv8i32v8float
- ; SSE2: cost of 10 {{.*}} sitofp
- ;
- ; AVX1-LABEL: sitofpv8i32v8float
- ; AVX1: cost of 1 {{.*}} sitofp
- ;
- ; AVX2-LABEL: sitofpv8i32v8float
- ; AVX2: cost of 1 {{.*}} sitofp
- ;
- ; AVX512F-LABEL: sitofpv8i32v8float
- ; AVX512F: cost of 1 {{.*}} sitofp
- %1 = sitofp <8 x i32> %a to <8 x float>
- ret <8 x float> %1
-}
-
-define <16 x float> @sitofpv16i32v16float(<16 x i32> %a) {
- ; SSE2-LABEL: sitofpv16i32v16float
- ; SSE2: cost of 20 {{.*}} sitofp
- ;
- ; AVX1-LABEL: sitofpv16i32v16float
- ; AVX1: cost of 3 {{.*}} sitofp
- ;
- ; AVX2-LABEL: sitofpv16i32v16float
- ; AVX2: cost of 3 {{.*}} sitofp
- ;
- ; AVX512F-LABEL: sitofpv16i32v16float
- ; AVX512F: cost of 1 {{.*}} sitofp
- %1 = sitofp <16 x i32> %a to <16 x float>
- ret <16 x float> %1
-}
-
-define <32 x float> @sitofpv32i32v32float(<32 x i32> %a) {
- ; SSE2-LABEL: sitofpv32i32v32float
- ; SSE2: cost of 40 {{.*}} sitofp
- ;
- ; AVX1-LABEL: sitofpv32i32v32float
- ; AVX1: cost of 7 {{.*}} sitofp
- ;
- ; AVX2-LABEL: sitofpv32i32v32float
- ; AVX2: cost of 7 {{.*}} sitofp
- ;
- ; AVX512F-LABEL: sitofpv32i32v32float
- ; AVX512F: cost of 3 {{.*}} sitofp
- %1 = sitofp <32 x i32> %a to <32 x float>
- ret <32 x float> %1
-}
-
-define <2 x float> @sitofpv2i64v2float(<2 x i64> %a) {
- ; SSE2-LABEL: sitofpv2i64v2float
- ; SSE2: cost of 15 {{.*}} sitofp
- ;
- ; AVX1-LABEL: sitofpv2i64v2float
- ; AVX1: cost of 4 {{.*}} sitofp
- ;
- ; AVX2-LABEL: sitofpv2i64v2float
- ; AVX2: cost of 4 {{.*}} sitofp
- ;
- ; AVX512F-LABEL: sitofpv2i64v2float
- ; AVX512F: cost of 4 {{.*}} sitofp
- %1 = sitofp <2 x i64> %a to <2 x float>
- ret <2 x float> %1
-}
-
-define <4 x float> @sitofpv4i64v4float(<4 x i64> %a) {
- ; SSE2-LABEL: sitofpv4i64v4float
- ; SSE2: cost of 30 {{.*}} sitofp
- ;
- ; AVX1-LABEL: sitofpv4i64v4float
- ; AVX1: cost of 10 {{.*}} sitofp
- ;
- ; AVX2-LABEL: sitofpv4i64v4float
- ; AVX2: cost of 10 {{.*}} sitofp
- ;
- ; AVX512F-LABEL: sitofpv4i64v4float
- ; AVX512F: cost of 10 {{.*}} sitofp
- %1 = sitofp <4 x i64> %a to <4 x float>
- ret <4 x float> %1
-}
-
-define <8 x float> @sitofpv8i64v8float(<8 x i64> %a) {
- ; SSE2-LABEL: sitofpv8i64v8float
- ; SSE2: cost of 60 {{.*}} sitofp
- ;
- ; AVX1-LABEL: sitofpv8i64v8float
- ; AVX1: cost of 21 {{.*}} sitofp
- ;
- ; AVX2-LABEL: sitofpv8i64v8float
- ; AVX2: cost of 21 {{.*}} sitofp
- ;
- ; AVX512F-LABEL: sitofpv8i64v8float
- ; AVX512F: cost of 22 {{.*}} sitofp
- %1 = sitofp <8 x i64> %a to <8 x float>
- ret <8 x float> %1
-}
-
-define <16 x float> @sitofpv16i64v16float(<16 x i64> %a) {
- ; SSE2-LABEL: sitofpv16i64v16float
- ; SSE2: cost of 120 {{.*}} sitofp
- ;
- ; AVX1-LABEL: sitofpv16i64v16float
- ; AVX1: cost of 43 {{.*}} sitofp
- ;
- ; AVX2-LABEL: sitofpv16i64v16float
- ; AVX2: cost of 43 {{.*}} sitofp
- ;
- ; AVX512F-LABEL: sitofpv16i64v16float
- ; AVX512F: cost of 45 {{.*}} sitofp
- %1 = sitofp <16 x i64> %a to <16 x float>
- ret <16 x float> %1
-}
-
-define <32 x float> @sitofpv32i64v32float(<32 x i64> %a) {
- ; SSE2-LABEL: sitofpv32i64v32float
- ; SSE2: cost of 240 {{.*}} sitofp
- ;
- ; AVX1-LABEL: sitofpv32i64v32float
- ; AVX1: cost of 87 {{.*}} sitofp
- ;
- ; AVX2-LABEL: sitofpv32i64v32float
- ; AVX2: cost of 87 {{.*}} sitofp
- ;
- ; AVX512F-LABEL: sitofpv32i64v32float
- ; AVX512F: cost of 91 {{.*}} sitofp
- %1 = sitofp <32 x i64> %a to <32 x float>
- ret <32 x float> %1
-}
-
-define <8 x double> @sitofpv8i1v8double(<8 x double> %a) {
- ; SSE2-LABEL: sitofpv8i1v8double
- ; SSE2: cost of 80 {{.*}} sitofp
- ;
- ; AVX1-LABEL: sitofpv8i1v8double
- ; AVX1: cost of 7 {{.*}} sitofp
- ;
- ; AVX2-LABEL: sitofpv8i1v8double
- ; AVX2: cost of 7 {{.*}} sitofp
- ;
- ; AVX512F-LABEL: sitofpv8i1v8double
- ; AVX512F: cost of 4 {{.*}} sitofp
- %cmpres = fcmp ogt <8 x double> %a, zeroinitializer
- %1 = sitofp <8 x i1> %cmpres to <8 x double>
- ret <8 x double> %1
-}
-
-define <16 x float> @sitofpv16i1v16float(<16 x float> %a) {
- ; SSE2-LABEL: sitofpv16i1v16float
- ; SSE2: cost of 8 {{.*}} sitofp
- ;
- ; AVX1-LABEL: sitofpv16i1v16float
- ; AVX1: cost of 17 {{.*}} sitofp
- ;
- ; AVX2-LABEL: sitofpv16i1v16float
- ; AVX2: cost of 17 {{.*}} sitofp
- ;
- ; AVX512F-LABEL: sitofpv16i1v16float
- ; AVX512F: cost of 3 {{.*}} sitofp
- %cmpres = fcmp ogt <16 x float> %a, zeroinitializer
- %1 = sitofp <16 x i1> %cmpres to <16 x float>
- ret <16 x float> %1
+; RUN: opt -mtriple=x86_64-apple-darwin -mattr=+avx512f -cost-model -analyze < %s | FileCheck --check-prefix=AVX512 --check-prefix=AVX512F %s
+; RUN: opt -mtriple=x86_64-apple-darwin -mattr=+avx512dq -cost-model -analyze < %s | FileCheck --check-prefix=AVX512 --check-prefix=AVX512DQ %s
+
+; CHECK-LABEL: 'sitofp_i8_double'
+define i32 @sitofp_i8_double() {
+ ; SSE2: cost of 1 {{.*}} sitofp i8
+ ; AVX1: cost of 1 {{.*}} sitofp i8
+ ; AVX2: cost of 1 {{.*}} sitofp i8
+ ; AVX512: cost of 1 {{.*}} sitofp i8
+ %cvt_i8_f64 = sitofp i8 undef to double
+
+ ; SSE2: cost of 20 {{.*}} sitofp <2 x i8>
+ ; AVX1: cost of 4 {{.*}} sitofp <2 x i8>
+ ; AVX2: cost of 4 {{.*}} sitofp <2 x i8>
+ ; AVX512: cost of 4 {{.*}} sitofp <2 x i8>
+ %cvt_v2i8_v2f64 = sitofp <2 x i8> undef to <2 x double>
+
+ ; SSE2: cost of 40 {{.*}} sitofp <4 x i8>
+ ; AVX1: cost of 3 {{.*}} sitofp <4 x i8>
+ ; AVX2: cost of 3 {{.*}} sitofp <4 x i8>
+ ; AVX512: cost of 3 {{.*}} sitofp <4 x i8>
+ %cvt_v4i8_v4f64 = sitofp <4 x i8> undef to <4 x double>
+
+ ; SSE2: cost of 80 {{.*}} sitofp <8 x i8>
+ ; AVX1: cost of 7 {{.*}} sitofp <8 x i8>
+ ; AVX2: cost of 7 {{.*}} sitofp <8 x i8>
+ ; AVX512: cost of 2 {{.*}} sitofp <8 x i8>
+ %cvt_v8i8_v8f64 = sitofp <8 x i8> undef to <8 x double>
+
+ ret i32 undef
+}
+
+; CHECK-LABEL: 'sitofp_i16_double'
+define i32 @sitofp_i16_double() {
+ ; SSE2: cost of 1 {{.*}} sitofp i16
+ ; AVX1: cost of 1 {{.*}} sitofp i16
+ ; AVX2: cost of 1 {{.*}} sitofp i16
+ ; AVX512: cost of 1 {{.*}} sitofp i16
+ %cvt_i16_f64 = sitofp i16 undef to double
+
+ ; SSE2: cost of 20 {{.*}} sitofp <2 x i16>
+ ; AVX1: cost of 4 {{.*}} sitofp <2 x i16>
+ ; AVX2: cost of 4 {{.*}} sitofp <2 x i16>
+ ; AVX512: cost of 4 {{.*}} sitofp <2 x i16>
+ %cvt_v2i16_v2f64 = sitofp <2 x i16> undef to <2 x double>
+
+ ; SSE2: cost of 40 {{.*}} sitofp <4 x i16>
+ ; AVX1: cost of 3 {{.*}} sitofp <4 x i16>
+ ; AVX2: cost of 3 {{.*}} sitofp <4 x i16>
+ ; AVX512: cost of 3 {{.*}} sitofp <4 x i16>
+ %cvt_v4i16_v4f64 = sitofp <4 x i16> undef to <4 x double>
+
+ ; SSE2: cost of 80 {{.*}} sitofp <8 x i16>
+ ; AVX1: cost of 7 {{.*}} sitofp <8 x i16>
+ ; AVX2: cost of 7 {{.*}} sitofp <8 x i16>
+ ; AVX512: cost of 2 {{.*}} sitofp <8 x i16>
+ %cvt_v8i16_v8f64 = sitofp <8 x i16> undef to <8 x double>
+
+ ret i32 undef
+}
+
+; CHECK-LABEL: 'sitofp_i32_double'
+define i32 @sitofp_i32_double() {
+ ; SSE2: cost of 1 {{.*}} sitofp i32
+ ; AVX1: cost of 1 {{.*}} sitofp i32
+ ; AVX2: cost of 1 {{.*}} sitofp i32
+ ; AVX512: cost of 1 {{.*}} sitofp i32
+ %cvt_i32_f64 = sitofp i32 undef to double
+
+ ; SSE2: cost of 20 {{.*}} sitofp <2 x i32>
+ ; AVX1: cost of 4 {{.*}} sitofp <2 x i32>
+ ; AVX2: cost of 4 {{.*}} sitofp <2 x i32>
+ ; AVX512: cost of 4 {{.*}} sitofp <2 x i32>
+ %cvt_v2i32_v2f64 = sitofp <2 x i32> undef to <2 x double>
+
+ ; SSE2: cost of 40 {{.*}} sitofp <4 x i32>
+ ; AVX1: cost of 1 {{.*}} sitofp <4 x i32>
+ ; AVX2: cost of 1 {{.*}} sitofp <4 x i32>
+ ; AVX512: cost of 1 {{.*}} sitofp <4 x i32>
+ %cvt_v4i32_v4f64 = sitofp <4 x i32> undef to <4 x double>
+
+ ; SSE2: cost of 80 {{.*}} sitofp <8 x i32>
+ ; AVX1: cost of 3 {{.*}} sitofp <8 x i32>
+ ; AVX2: cost of 3 {{.*}} sitofp <8 x i32>
+ ; AVX512: cost of 1 {{.*}} sitofp <8 x i32>
+ %cvt_v8i32_v8f64 = sitofp <8 x i32> undef to <8 x double>
+
+ ret i32 undef
+}
+
+; CHECK-LABEL: 'sitofp_i64_double'
+define i32 @sitofp_i64_double() {
+ ; SSE2: cost of 1 {{.*}} sitofp i64
+ ; AVX1: cost of 1 {{.*}} sitofp i64
+ ; AVX2: cost of 1 {{.*}} sitofp i64
+ ; AVX512: cost of 1 {{.*}} sitofp i64
+ %cvt_i64_f64 = sitofp i64 undef to double
+
+ ; SSE2: cost of 20 {{.*}} sitofp <2 x i64>
+ ; AVX1: cost of 20 {{.*}} sitofp <2 x i64>
+ ; AVX2: cost of 20 {{.*}} sitofp <2 x i64>
+ ; AVX512F: cost of 20 {{.*}} sitofp <2 x i64>
+ ; AVX512DQ: cost of 1 {{.*}} sitofp <2 x i64>
+ %cvt_v2i64_v2f64 = sitofp <2 x i64> undef to <2 x double>
+
+ ; SSE2: cost of 40 {{.*}} sitofp <4 x i64>
+ ; AVX1: cost of 13 {{.*}} sitofp <4 x i64>
+ ; AVX2: cost of 13 {{.*}} sitofp <4 x i64>
+ ; AVX512F: cost of 13 {{.*}} sitofp <4 x i64>
+ ; AVX512DQ: cost of 1 {{.*}} sitofp <4 x i64>
+ %cvt_v4i64_v4f64 = sitofp <4 x i64> undef to <4 x double>
+
+ ; SSE2: cost of 80 {{.*}} sitofp <8 x i64>
+ ; AVX1: cost of 27 {{.*}} sitofp <8 x i64>
+ ; AVX2: cost of 27 {{.*}} sitofp <8 x i64>
+ ; AVX512F: cost of 22 {{.*}} sitofp <8 x i64>
+ ; AVX512DQ: cost of 1 {{.*}} sitofp <8 x i64>
+ %cvt_v8i64_v8f64 = sitofp <8 x i64> undef to <8 x double>
+
+ ret i32 undef
+}
+
+; CHECK-LABEL: 'sitofp_i8_float'
+define i32 @sitofp_i8_float() {
+ ; SSE2: cost of 1 {{.*}} sitofp i8
+ ; AVX1: cost of 1 {{.*}} sitofp i8
+ ; AVX2: cost of 1 {{.*}} sitofp i8
+ ; AVX512: cost of 1 {{.*}} sitofp i8
+ %cvt_i8_f32 = sitofp i8 undef to float
+
+ ; SSE2: cost of 5 {{.*}} sitofp <4 x i8>
+ ; AVX1: cost of 3 {{.*}} sitofp <4 x i8>
+ ; AVX2: cost of 3 {{.*}} sitofp <4 x i8>
+ ; AVX512: cost of 3 {{.*}} sitofp <4 x i8>
+ %cvt_v4i8_v4f32 = sitofp <4 x i8> undef to <4 x float>
+
+ ; SSE2: cost of 15 {{.*}} sitofp <8 x i8>
+ ; AVX1: cost of 8 {{.*}} sitofp <8 x i8>
+ ; AVX2: cost of 8 {{.*}} sitofp <8 x i8>
+ ; AVX512: cost of 8 {{.*}} sitofp <8 x i8>
+ %cvt_v8i8_v8f32 = sitofp <8 x i8> undef to <8 x float>
+
+ ; SSE2: cost of 8 {{.*}} sitofp <16 x i8>
+ ; AVX1: cost of 17 {{.*}} sitofp <16 x i8>
+ ; AVX16: cost of 17 {{.*}} sitofp <16 x i8>
+ ; AVX512: cost of 2 {{.*}} sitofp <16 x i8>
+ %cvt_v16i8_v16f32 = sitofp <16 x i8> undef to <16 x float>
+
+ ret i32 undef
+}
+
+; CHECK-LABEL: 'sitofp_i16_float'
+define i32 @sitofp_i16_float() {
+ ; SSE2: cost of 1 {{.*}} sitofp i16
+ ; AVX1: cost of 1 {{.*}} sitofp i16
+ ; AVX2: cost of 1 {{.*}} sitofp i16
+ ; AVX512: cost of 1 {{.*}} sitofp i16
+ %cvt_i16_f32 = sitofp i16 undef to float
+
+ ; SSE2: cost of 5 {{.*}} sitofp <4 x i16>
+ ; AVX1: cost of 3 {{.*}} sitofp <4 x i16>
+ ; AVX2: cost of 3 {{.*}} sitofp <4 x i16>
+ ; AVX512: cost of 3 {{.*}} sitofp <4 x i16>
+ %cvt_v4i16_v4f32 = sitofp <4 x i16> undef to <4 x float>
+
+ ; SSE2: cost of 15 {{.*}} sitofp <8 x i16>
+ ; AVX1: cost of 5 {{.*}} sitofp <8 x i16>
+ ; AVX2: cost of 5 {{.*}} sitofp <8 x i16>
+ ; AVX512: cost of 5 {{.*}} sitofp <8 x i16>
+ %cvt_v8i16_v8f32 = sitofp <8 x i16> undef to <8 x float>
+
+ ; SSE2: cost of 30 {{.*}} sitofp <16 x i16>
+ ; AVX1: cost of 11 {{.*}} sitofp <16 x i16>
+ ; AVX16: cost of 11 {{.*}} sitofp <16 x i16>
+ ; AVX512: cost of 2 {{.*}} sitofp <16 x i16>
+ %cvt_v16i16_v16f32 = sitofp <16 x i16> undef to <16 x float>
+
+ ret i32 undef
+}
+
+; CHECK-LABEL: 'sitofp_i32_float'
+define i32 @sitofp_i32_float() {
+ ; SSE2: cost of 1 {{.*}} sitofp i32
+ ; AVX1: cost of 1 {{.*}} sitofp i32
+ ; AVX2: cost of 1 {{.*}} sitofp i32
+ ; AVX512: cost of 1 {{.*}} sitofp i32
+ %cvt_i32_f32 = sitofp i32 undef to float
+
+ ; SSE2: cost of 5 {{.*}} sitofp <4 x i32>
+ ; AVX1: cost of 1 {{.*}} sitofp <4 x i32>
+ ; AVX2: cost of 1 {{.*}} sitofp <4 x i32>
+ ; AVX512: cost of 1 {{.*}} sitofp <4 x i32>
+ %cvt_v4i32_v4f32 = sitofp <4 x i32> undef to <4 x float>
+
+ ; SSE2: cost of 10 {{.*}} sitofp <8 x i32>
+ ; AVX1: cost of 1 {{.*}} sitofp <8 x i32>
+ ; AVX2: cost of 1 {{.*}} sitofp <8 x i32>
+ ; AVX512: cost of 1 {{.*}} sitofp <8 x i32>
+ %cvt_v8i32_v8f32 = sitofp <8 x i32> undef to <8 x float>
+
+ ; SSE2: cost of 20 {{.*}} sitofp <16 x i32>
+ ; AVX1: cost of 3 {{.*}} sitofp <16 x i32>
+ ; AVX2: cost of 3 {{.*}} sitofp <16 x i32>
+ ; AVX512: cost of 1 {{.*}} sitofp <16 x i32>
+ %cvt_v16i32_v16f32 = sitofp <16 x i32> undef to <16 x float>
+
+ ret i32 undef
+}
+
+; CHECK-LABEL: 'sitofp_i64_float'
+define i32 @sitofp_i64_float() {
+ ; SSE2: cost of 1 {{.*}} sitofp i64
+ ; AVX1: cost of 1 {{.*}} sitofp i64
+ ; AVX2: cost of 1 {{.*}} sitofp i64
+ ; AVX512: cost of 1 {{.*}} sitofp i64
+ %cvt_i64_f32 = sitofp i64 undef to float
+
+ ; SSE2: cost of 15 {{.*}} sitofp <2 x i64>
+ ; AVX1: cost of 4 {{.*}} sitofp <2 x i64>
+ ; AVX2: cost of 4 {{.*}} sitofp <2 x i64>
+ ; AVX512F: cost of 4 {{.*}} sitofp <2 x i64>
+ ; AVX512DQ: cost of 1 {{.*}} sitofp <2 x i64>
+ %cvt_v2i64_v2f32 = sitofp <2 x i64> undef to <2 x float>
+
+ ; SSE2: cost of 30 {{.*}} sitofp <4 x i64>
+ ; AVX1: cost of 10 {{.*}} sitofp <4 x i64>
+ ; AVX2: cost of 10 {{.*}} sitofp <4 x i64>
+ ; AVX512F: cost of 10 {{.*}} sitofp <4 x i64>
+ ; AVX512DQ: cost of 1 {{.*}} sitofp <4 x i64>
+ %cvt_v4i64_v4f32 = sitofp <4 x i64> undef to <4 x float>
+
+ ; SSE2: cost of 60 {{.*}} sitofp <8 x i64>
+ ; AVX1: cost of 21 {{.*}} sitofp <8 x i64>
+ ; AVX2: cost of 21 {{.*}} sitofp <8 x i64>
+ ; AVX512F: cost of 22 {{.*}} sitofp <8 x i64>
+ ; AVX512DQ: cost of 1 {{.*}} sitofp <8 x i64>
+ %cvt_v8i64_v8f32 = sitofp <8 x i64> undef to <8 x float>
+
+ ; SSE2: cost of 120 {{.*}} sitofp <16 x i64>
+ ; AVX1: cost of 43 {{.*}} sitofp <16 x i64>
+ ; AVX2: cost of 43 {{.*}} sitofp <16 x i64>
+ ; AVX512F: cost of 45 {{.*}} sitofp <16 x i64>
+ ; AVX512DQ: cost of 3 {{.*}} sitofp <16 x i64>
+ %cvt_v16i64_v16f32 = sitofp <16 x i64> undef to <16 x float>
+
+ ret i32 undef
}
diff --git a/test/Analysis/CostModel/X86/strided-load-i16.ll b/test/Analysis/CostModel/X86/strided-load-i16.ll
new file mode 100755
index 000000000000..2c2cf3938bcb
--- /dev/null
+++ b/test/Analysis/CostModel/X86/strided-load-i16.ll
@@ -0,0 +1,113 @@
+; REQUIRES: asserts
+; RUN: opt -loop-vectorize -S -mcpu=skx --debug-only=loop-vectorize < %s 2>&1| FileCheck %s
+
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+@A = global [10240 x i16] zeroinitializer, align 16
+@B = global [10240 x i16] zeroinitializer, align 16
+
+; Function Attrs: nounwind uwtable
+define void @load_i16_stride2() {
+;CHECK-LABEL: load_i16_stride2
+;CHECK: Found an estimated cost of 1 for VF 1 For instruction: %1 = load
+;CHECK: Found an estimated cost of 1 for VF 2 For instruction: %1 = load
+;CHECK: Found an estimated cost of 1 for VF 4 For instruction: %1 = load
+;CHECK: Found an estimated cost of 1 for VF 8 For instruction: %1 = load
+;CHECK: Found an estimated cost of 1 for VF 16 For instruction: %1 = load
+;CHECK: Found an estimated cost of 2 for VF 32 For instruction: %1 = load
+entry:
+ br label %for.body
+
+for.body: ; preds = %for.body, %entry
+ %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
+ %0 = shl nsw i64 %indvars.iv, 1
+ %arrayidx = getelementptr inbounds [10240 x i16], [10240 x i16]* @A, i64 0, i64 %0
+ %1 = load i16, i16* %arrayidx, align 4
+ %arrayidx2 = getelementptr inbounds [10240 x i16], [10240 x i16]* @B, i64 0, i64 %indvars.iv
+ store i16 %1, i16* %arrayidx2, align 2
+ %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+ %exitcond = icmp eq i64 %indvars.iv.next, 1024
+ br i1 %exitcond, label %for.end, label %for.body
+
+for.end: ; preds = %for.body
+ ret void
+}
+
+define void @load_i16_stride3() {
+;CHECK-LABEL: load_i16_stride3
+;CHECK: Found an estimated cost of 1 for VF 1 For instruction: %1 = load
+;CHECK: Found an estimated cost of 1 for VF 2 For instruction: %1 = load
+;CHECK: Found an estimated cost of 1 for VF 4 For instruction: %1 = load
+;CHECK: Found an estimated cost of 1 for VF 8 For instruction: %1 = load
+;CHECK: Found an estimated cost of 2 for VF 16 For instruction: %1 = load
+;CHECK: Found an estimated cost of 3 for VF 32 For instruction: %1 = load
+entry:
+ br label %for.body
+
+for.body: ; preds = %for.body, %entry
+ %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
+ %0 = mul nsw i64 %indvars.iv, 3
+ %arrayidx = getelementptr inbounds [10240 x i16], [10240 x i16]* @A, i64 0, i64 %0
+ %1 = load i16, i16* %arrayidx, align 4
+ %arrayidx2 = getelementptr inbounds [10240 x i16], [10240 x i16]* @B, i64 0, i64 %indvars.iv
+ store i16 %1, i16* %arrayidx2, align 2
+ %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+ %exitcond = icmp eq i64 %indvars.iv.next, 1024
+ br i1 %exitcond, label %for.end, label %for.body
+
+for.end: ; preds = %for.body
+ ret void
+}
+
+define void @load_i16_stride4() {
+;CHECK-LABEL: load_i16_stride4
+;CHECK: Found an estimated cost of 1 for VF 1 For instruction: %1 = load
+;CHECK: Found an estimated cost of 1 for VF 2 For instruction: %1 = load
+;CHECK: Found an estimated cost of 1 for VF 4 For instruction: %1 = load
+;CHECK: Found an estimated cost of 1 for VF 8 For instruction: %1 = load
+;CHECK: Found an estimated cost of 2 for VF 16 For instruction: %1 = load
+;CHECK: Found an estimated cost of 5 for VF 32 For instruction: %1 = load
+entry:
+ br label %for.body
+
+for.body: ; preds = %for.body, %entry
+ %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
+ %0 = shl nsw i64 %indvars.iv, 2
+ %arrayidx = getelementptr inbounds [10240 x i16], [10240 x i16]* @A, i64 0, i64 %0
+ %1 = load i16, i16* %arrayidx, align 4
+ %arrayidx2 = getelementptr inbounds [10240 x i16], [10240 x i16]* @B, i64 0, i64 %indvars.iv
+ store i16 %1, i16* %arrayidx2, align 2
+ %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+ %exitcond = icmp eq i64 %indvars.iv.next, 1024
+ br i1 %exitcond, label %for.end, label %for.body
+
+for.end: ; preds = %for.body
+ ret void
+}
+
+define void @load_i16_stride5() {
+;CHECK-LABEL: load_i16_stride5
+;CHECK: Found an estimated cost of 1 for VF 1 For instruction: %1 = load
+;CHECK: Found an estimated cost of 1 for VF 2 For instruction: %1 = load
+;CHECK: Found an estimated cost of 1 for VF 4 For instruction: %1 = load
+;CHECK: Found an estimated cost of 2 for VF 8 For instruction: %1 = load
+;CHECK: Found an estimated cost of 3 for VF 16 For instruction: %1 = load
+;CHECK: Found an estimated cost of 6 for VF 32 For instruction: %1 = load
+entry:
+ br label %for.body
+
+for.body: ; preds = %for.body, %entry
+ %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
+ %0 = mul nsw i64 %indvars.iv, 5
+ %arrayidx = getelementptr inbounds [10240 x i16], [10240 x i16]* @A, i64 0, i64 %0
+ %1 = load i16, i16* %arrayidx, align 4
+ %arrayidx2 = getelementptr inbounds [10240 x i16], [10240 x i16]* @B, i64 0, i64 %indvars.iv
+ store i16 %1, i16* %arrayidx2, align 2
+ %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+ %exitcond = icmp eq i64 %indvars.iv.next, 1024
+ br i1 %exitcond, label %for.end, label %for.body
+
+for.end: ; preds = %for.body
+ ret void
+}
diff --git a/test/Analysis/CostModel/X86/strided-load-i32.ll b/test/Analysis/CostModel/X86/strided-load-i32.ll
new file mode 100755
index 000000000000..0dcd3929da7f
--- /dev/null
+++ b/test/Analysis/CostModel/X86/strided-load-i32.ll
@@ -0,0 +1,110 @@
+; REQUIRES: asserts
+; RUN: opt -loop-vectorize -S -mcpu=skx --debug-only=loop-vectorize < %s 2>&1| FileCheck %s
+
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+@A = global [10240 x i32] zeroinitializer, align 16
+@B = global [10240 x i32] zeroinitializer, align 16
+
+; Function Attrs: nounwind uwtable
+define void @load_int_stride2() {
+;CHECK-LABEL: load_int_stride2
+;CHECK: Found an estimated cost of 1 for VF 1 For instruction: %1 = load
+;CHECK: Found an estimated cost of 1 for VF 2 For instruction: %1 = load
+;CHECK: Found an estimated cost of 1 for VF 4 For instruction: %1 = load
+;CHECK: Found an estimated cost of 1 for VF 8 For instruction: %1 = load
+;CHECK: Found an estimated cost of 2 for VF 16 For instruction: %1 = load
+entry:
+ br label %for.body
+
+for.body: ; preds = %for.body, %entry
+ %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
+ %0 = shl nsw i64 %indvars.iv, 1
+ %arrayidx = getelementptr inbounds [10240 x i32], [10240 x i32]* @A, i64 0, i64 %0
+ %1 = load i32, i32* %arrayidx, align 4
+ %arrayidx2 = getelementptr inbounds [10240 x i32], [10240 x i32]* @B, i64 0, i64 %indvars.iv
+ store i32 %1, i32* %arrayidx2, align 2
+ %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+ %exitcond = icmp eq i64 %indvars.iv.next, 1024
+ br i1 %exitcond, label %for.end, label %for.body
+
+for.end: ; preds = %for.body
+ ret void
+}
+
+define void @load_int_stride3() {
+;CHECK-LABEL: load_int_stride3
+;CHECK: Found an estimated cost of 1 for VF 1 For instruction: %1 = load
+;CHECK: Found an estimated cost of 1 for VF 2 For instruction: %1 = load
+;CHECK: Found an estimated cost of 1 for VF 4 For instruction: %1 = load
+;CHECK: Found an estimated cost of 2 for VF 8 For instruction: %1 = load
+;CHECK: Found an estimated cost of 3 for VF 16 For instruction: %1 = load
+entry:
+ br label %for.body
+
+for.body: ; preds = %for.body, %entry
+ %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
+ %0 = mul nsw i64 %indvars.iv, 3
+ %arrayidx = getelementptr inbounds [10240 x i32], [10240 x i32]* @A, i64 0, i64 %0
+ %1 = load i32, i32* %arrayidx, align 4
+ %arrayidx2 = getelementptr inbounds [10240 x i32], [10240 x i32]* @B, i64 0, i64 %indvars.iv
+ store i32 %1, i32* %arrayidx2, align 2
+ %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+ %exitcond = icmp eq i64 %indvars.iv.next, 1024
+ br i1 %exitcond, label %for.end, label %for.body
+
+for.end: ; preds = %for.body
+ ret void
+}
+
+define void @load_int_stride4() {
+;CHECK-LABEL: load_int_stride4
+;CHECK: Found an estimated cost of 1 for VF 1 For instruction: %1 = load
+;CHECK: Found an estimated cost of 1 for VF 2 For instruction: %1 = load
+;CHECK: Found an estimated cost of 1 for VF 4 For instruction: %1 = load
+;CHECK: Found an estimated cost of 2 for VF 8 For instruction: %1 = load
+;CHECK: Found an estimated cost of 5 for VF 16 For instruction: %1 = load
+entry:
+ br label %for.body
+
+for.body: ; preds = %for.body, %entry
+ %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
+ %0 = shl nsw i64 %indvars.iv, 2
+ %arrayidx = getelementptr inbounds [10240 x i32], [10240 x i32]* @A, i64 0, i64 %0
+ %1 = load i32, i32* %arrayidx, align 4
+ %arrayidx2 = getelementptr inbounds [10240 x i32], [10240 x i32]* @B, i64 0, i64 %indvars.iv
+ store i32 %1, i32* %arrayidx2, align 2
+ %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+ %exitcond = icmp eq i64 %indvars.iv.next, 1024
+ br i1 %exitcond, label %for.end, label %for.body
+
+for.end: ; preds = %for.body
+ ret void
+}
+
+define void @load_int_stride5() {
+;CHECK-LABEL: load_int_stride5
+;CHECK: Found an estimated cost of 1 for VF 1 For instruction: %1 = load
+;CHECK: Found an estimated cost of 1 for VF 2 For instruction: %1 = load
+;CHECK: Found an estimated cost of 2 for VF 4 For instruction: %1 = load
+;CHECK: Found an estimated cost of 3 for VF 8 For instruction: %1 = load
+;CHECK: Found an estimated cost of 6 for VF 16 For instruction: %1 = load
+entry:
+ br label %for.body
+
+for.body: ; preds = %for.body, %entry
+ %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
+ %0 = mul nsw i64 %indvars.iv, 5
+ %arrayidx = getelementptr inbounds [10240 x i32], [10240 x i32]* @A, i64 0, i64 %0
+ %1 = load i32, i32* %arrayidx, align 4
+ %arrayidx2 = getelementptr inbounds [10240 x i32], [10240 x i32]* @B, i64 0, i64 %indvars.iv
+ store i32 %1, i32* %arrayidx2, align 2
+ %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+ %exitcond = icmp eq i64 %indvars.iv.next, 1024
+ br i1 %exitcond, label %for.end, label %for.body
+
+for.end: ; preds = %for.body
+ ret void
+}
+
diff --git a/test/Analysis/CostModel/X86/strided-load-i64.ll b/test/Analysis/CostModel/X86/strided-load-i64.ll
new file mode 100755
index 000000000000..0370b6f80efd
--- /dev/null
+++ b/test/Analysis/CostModel/X86/strided-load-i64.ll
@@ -0,0 +1,81 @@
+; REQUIRES: asserts
+; RUN: opt -loop-vectorize -S -mcpu=skx --debug-only=loop-vectorize < %s 2>&1| FileCheck %s
+
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+@A = global [10240 x i64] zeroinitializer, align 16
+@B = global [10240 x i64] zeroinitializer, align 16
+
+; Function Attrs: nounwind uwtable
+define void @load_i64_stride2() {
+;CHECK-LABEL: load_i64_stride2
+;CHECK: Found an estimated cost of 1 for VF 1 For instruction: %1 = load
+;CHECK: Found an estimated cost of 1 for VF 2 For instruction: %1 = load
+;CHECK: Found an estimated cost of 1 for VF 4 For instruction: %1 = load
+;CHECK: Found an estimated cost of 2 for VF 8 For instruction: %1 = load
+entry:
+ br label %for.body
+
+for.body: ; preds = %for.body, %entry
+ %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
+ %0 = shl nsw i64 %indvars.iv, 1
+ %arrayidx = getelementptr inbounds [10240 x i64], [10240 x i64]* @A, i64 0, i64 %0
+ %1 = load i64, i64* %arrayidx, align 16
+ %arrayidx2 = getelementptr inbounds [10240 x i64], [10240 x i64]* @B, i64 0, i64 %indvars.iv
+ store i64 %1, i64* %arrayidx2, align 8
+ %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+ %exitcond = icmp eq i64 %indvars.iv.next, 1024
+ br i1 %exitcond, label %for.end, label %for.body
+
+for.end: ; preds = %for.body
+ ret void
+}
+
+define void @load_i64_stride3() {
+;CHECK-LABEL: load_i64_stride3
+;CHECK: Found an estimated cost of 1 for VF 1 For instruction: %1 = load
+;CHECK: Found an estimated cost of 1 for VF 2 For instruction: %1 = load
+;CHECK: Found an estimated cost of 2 for VF 4 For instruction: %1 = load
+;CHECK: Found an estimated cost of 3 for VF 8 For instruction: %1 = load
+entry:
+ br label %for.body
+
+for.body: ; preds = %for.body, %entry
+ %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
+ %0 = mul nsw i64 %indvars.iv, 3
+ %arrayidx = getelementptr inbounds [10240 x i64], [10240 x i64]* @A, i64 0, i64 %0
+ %1 = load i64, i64* %arrayidx, align 16
+ %arrayidx2 = getelementptr inbounds [10240 x i64], [10240 x i64]* @B, i64 0, i64 %indvars.iv
+ store i64 %1, i64* %arrayidx2, align 8
+ %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+ %exitcond = icmp eq i64 %indvars.iv.next, 1024
+ br i1 %exitcond, label %for.end, label %for.body
+
+for.end: ; preds = %for.body
+ ret void
+}
+
+define void @load_i64_stride4() {
+;CHECK-LABEL: load_i64_stride4
+;CHECK: Found an estimated cost of 1 for VF 1 For instruction: %1 = load
+;CHECK: Found an estimated cost of 1 for VF 2 For instruction: %1 = load
+;CHECK: Found an estimated cost of 2 for VF 4 For instruction: %1 = load
+;CHECK: Found an estimated cost of 5 for VF 8 For instruction: %1 = load
+entry:
+ br label %for.body
+
+for.body: ; preds = %for.body, %entry
+ %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
+ %0 = mul nsw i64 %indvars.iv, 4
+ %arrayidx = getelementptr inbounds [10240 x i64], [10240 x i64]* @A, i64 0, i64 %0
+ %1 = load i64, i64* %arrayidx, align 16
+ %arrayidx2 = getelementptr inbounds [10240 x i64], [10240 x i64]* @B, i64 0, i64 %indvars.iv
+ store i64 %1, i64* %arrayidx2, align 8
+ %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+ %exitcond = icmp eq i64 %indvars.iv.next, 1024
+ br i1 %exitcond, label %for.end, label %for.body
+
+for.end: ; preds = %for.body
+ ret void
+}
diff --git a/test/Analysis/CostModel/X86/strided-load-i8.ll b/test/Analysis/CostModel/X86/strided-load-i8.ll
new file mode 100755
index 000000000000..2a3a83864151
--- /dev/null
+++ b/test/Analysis/CostModel/X86/strided-load-i8.ll
@@ -0,0 +1,117 @@
+; REQUIRES: asserts
+; RUN: opt -loop-vectorize -S -mcpu=skx --debug-only=loop-vectorize < %s 2>&1| FileCheck %s
+
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+@A = global [10240 x i8] zeroinitializer, align 16
+@B = global [10240 x i8] zeroinitializer, align 16
+
+; Function Attrs: nounwind uwtable
+define void @load_i8_stride2() {
+;CHECK-LABEL: load_i8_stride2
+;CHECK: Found an estimated cost of 1 for VF 1 For instruction: %1 = load
+;CHECK: Found an estimated cost of 1 for VF 2 For instruction: %1 = load
+;CHECK: Found an estimated cost of 1 for VF 4 For instruction: %1 = load
+;CHECK: Found an estimated cost of 1 for VF 8 For instruction: %1 = load
+;CHECK: Found an estimated cost of 3 for VF 16 For instruction: %1 = load
+;CHECK: Found an estimated cost of 8 for VF 32 For instruction: %1 = load
+;CHECK: Found an estimated cost of 20 for VF 64 For instruction: %1 = load
+entry:
+ br label %for.body
+
+for.body: ; preds = %for.body, %entry
+ %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
+ %0 = shl nsw i64 %indvars.iv, 1
+ %arrayidx = getelementptr inbounds [10240 x i8], [10240 x i8]* @A, i64 0, i64 %0
+ %1 = load i8, i8* %arrayidx, align 2
+ %arrayidx2 = getelementptr inbounds [10240 x i8], [10240 x i8]* @B, i64 0, i64 %indvars.iv
+ store i8 %1, i8* %arrayidx2, align 1
+ %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+ %exitcond = icmp eq i64 %indvars.iv.next, 1024
+ br i1 %exitcond, label %for.end, label %for.body
+
+for.end: ; preds = %for.body
+ ret void
+}
+
+define void @load_i8_stride3() {
+;CHECK-LABEL: load_i8_stride3
+;CHECK: Found an estimated cost of 1 for VF 1 For instruction: %1 = load
+;CHECK: Found an estimated cost of 1 for VF 2 For instruction: %1 = load
+;CHECK: Found an estimated cost of 1 for VF 4 For instruction: %1 = load
+;CHECK: Found an estimated cost of 3 for VF 8 For instruction: %1 = load
+;CHECK: Found an estimated cost of 8 for VF 16 For instruction: %1 = load
+;CHECK: Found an estimated cost of 20 for VF 32 For instruction: %1 = load
+;CHECK: Found an estimated cost of 39 for VF 64 For instruction: %1 = load
+entry:
+ br label %for.body
+
+for.body: ; preds = %for.body, %entry
+ %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
+ %0 = mul nsw i64 %indvars.iv, 3
+ %arrayidx = getelementptr inbounds [10240 x i8], [10240 x i8]* @A, i64 0, i64 %0
+ %1 = load i8, i8* %arrayidx, align 2
+ %arrayidx2 = getelementptr inbounds [10240 x i8], [10240 x i8]* @B, i64 0, i64 %indvars.iv
+ store i8 %1, i8* %arrayidx2, align 1
+ %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+ %exitcond = icmp eq i64 %indvars.iv.next, 1024
+ br i1 %exitcond, label %for.end, label %for.body
+
+for.end: ; preds = %for.body
+ ret void
+}
+
+define void @load_i8_stride4() {
+;CHECK-LABEL: load_i8_stride4
+;CHECK: Found an estimated cost of 1 for VF 1 For instruction: %1 = load
+;CHECK: Found an estimated cost of 1 for VF 2 For instruction: %1 = load
+;CHECK: Found an estimated cost of 1 for VF 4 For instruction: %1 = load
+;CHECK: Found an estimated cost of 3 for VF 8 For instruction: %1 = load
+;CHECK: Found an estimated cost of 8 for VF 16 For instruction: %1 = load
+;CHECK: Found an estimated cost of 20 for VF 32 For instruction: %1 = load
+;CHECK: Found an estimated cost of 59 for VF 64 For instruction: %1 = load
+entry:
+ br label %for.body
+
+for.body: ; preds = %for.body, %entry
+ %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
+ %0 = shl nsw i64 %indvars.iv, 2
+ %arrayidx = getelementptr inbounds [10240 x i8], [10240 x i8]* @A, i64 0, i64 %0
+ %1 = load i8, i8* %arrayidx, align 2
+ %arrayidx2 = getelementptr inbounds [10240 x i8], [10240 x i8]* @B, i64 0, i64 %indvars.iv
+ store i8 %1, i8* %arrayidx2, align 1
+ %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+ %exitcond = icmp eq i64 %indvars.iv.next, 1024
+ br i1 %exitcond, label %for.end, label %for.body
+
+for.end: ; preds = %for.body
+ ret void
+}
+
+define void @load_i8_stride5() {
+;CHECK-LABEL: load_i8_stride5
+;CHECK: Found an estimated cost of 1 for VF 1 For instruction: %1 = load
+;CHECK: Found an estimated cost of 1 for VF 2 For instruction: %1 = load
+;CHECK: Found an estimated cost of 3 for VF 4 For instruction: %1 = load
+;CHECK: Found an estimated cost of 8 for VF 8 For instruction: %1 = load
+;CHECK: Found an estimated cost of 20 for VF 16 For instruction: %1 = load
+;CHECK: Found an estimated cost of 39 for VF 32 For instruction: %1 = load
+;CHECK: Found an estimated cost of 78 for VF 64 For instruction: %1 = load
+entry:
+ br label %for.body
+
+for.body: ; preds = %for.body, %entry
+ %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
+ %0 = mul nsw i64 %indvars.iv, 5
+ %arrayidx = getelementptr inbounds [10240 x i8], [10240 x i8]* @A, i64 0, i64 %0
+ %1 = load i8, i8* %arrayidx, align 2
+ %arrayidx2 = getelementptr inbounds [10240 x i8], [10240 x i8]* @B, i64 0, i64 %indvars.iv
+ store i8 %1, i8* %arrayidx2, align 1
+ %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+ %exitcond = icmp eq i64 %indvars.iv.next, 1024
+ br i1 %exitcond, label %for.end, label %for.body
+
+for.end: ; preds = %for.body
+ ret void
+}
diff --git a/test/Analysis/CostModel/X86/trunc.ll b/test/Analysis/CostModel/X86/trunc.ll
new file mode 100644
index 000000000000..a270251c2b17
--- /dev/null
+++ b/test/Analysis/CostModel/X86/trunc.ll
@@ -0,0 +1,141 @@
+; RUN: opt < %s -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mattr=+sse2 | FileCheck %s --check-prefix=CHECK --check-prefix=SSE --check-prefix=SSE2
+; RUN: opt < %s -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mattr=+ssse3 | FileCheck %s --check-prefix=CHECK --check-prefix=SSE --check-prefix=SSSE3
+; RUN: opt < %s -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mattr=+sse4.2 | FileCheck %s --check-prefix=CHECK --check-prefix=SSE --check-prefix=SSE42
+; RUN: opt < %s -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mattr=+avx | FileCheck %s --check-prefix=CHECK --check-prefix=AVX --check-prefix=AVX1
+; RUN: opt < %s -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mattr=+avx2 | FileCheck %s --check-prefix=CHECK --check-prefix=AVX --check-prefix=AVX2
+; RUN: opt < %s -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mattr=+avx512f | FileCheck %s --check-prefix=CHECK --check-prefix=AVX512 --check-prefix=AVX512F
+; RUN: opt < %s -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mattr=+avx512f,+avx512bw | FileCheck %s --check-prefix=CHECK --check-prefix=AVX512 --check-prefix=AVX512BW
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-apple-macosx10.8.0"
+
+; CHECK-LABEL: 'trunc_vXi32'
+define i32 @trunc_vXi32() {
+ ; SSE: cost of 0 {{.*}} %V2i64 = trunc
+ ; AVX1: cost of 0 {{.*}} %V2i64 = trunc
+ ; AVX2: cost of 0 {{.*}} %V2i64 = trunc
+ ; AVX512: cost of 0 {{.*}} %V2i64 = trunc
+ %V2i64 = trunc <2 x i64> undef to <2 x i32>
+
+ ; SSE: cost of 1 {{.*}} %V4i64 = trunc
+ ; AVX1: cost of 4 {{.*}} %V4i64 = trunc
+ ; AVX2: cost of 2 {{.*}} %V4i64 = trunc
+ ; AVX512: cost of 2 {{.*}} %V4i64 = trunc
+ %V4i64 = trunc <4 x i64> undef to <4 x i32>
+
+ ; SSE: cost of 3 {{.*}} %V8i64 = trunc
+ ; AVX1: cost of 9 {{.*}} %V8i64 = trunc
+ ; AVX2: cost of 4 {{.*}} %V8i64 = trunc
+ ; AVX512: cost of 1 {{.*}} %V8i64 = trunc
+ %V8i64 = trunc <8 x i64> undef to <8 x i32>
+
+ ret i32 undef
+}
+
+; CHECK-LABEL: 'trunc_vXi16'
+define i32 @trunc_vXi16() {
+ ; SSE: cost of 0 {{.*}} %V2i64 = trunc
+ ; AVX: cost of 0 {{.*}} %V2i64 = trunc
+ %V2i64 = trunc <2 x i64> undef to <2 x i16>
+
+ ; SSE: cost of 1 {{.*}} %V4i64 = trunc
+ ; AVX1: cost of 4 {{.*}} %V4i64 = trunc
+ ; AVX2: cost of 2 {{.*}} %V4i64 = trunc
+ ; AVX512: cost of 2 {{.*}} %V4i64 = trunc
+ %V4i64 = trunc <4 x i64> undef to <4 x i16>
+
+ ; SSE: cost of 3 {{.*}} %V8i64 = trunc
+ ; AVX: cost of 0 {{.*}} %V8i64 = trunc
+ %V8i64 = trunc <8 x i64> undef to <8 x i16>
+
+ ; SSE2: cost of 3 {{.*}} %V4i32 = trunc
+ ; SSSE3: cost of 3 {{.*}} %V4i32 = trunc
+ ; SSE42: cost of 1 {{.*}} %V4i32 = trunc
+ ; AVX1: cost of 1 {{.*}} %V4i32 = trunc
+ ; AVX2: cost of 1 {{.*}} %V4i32 = trunc
+ ; AVX512: cost of 1 {{.*}} %V4i32 = trunc
+ %V4i32 = trunc <4 x i32> undef to <4 x i16>
+
+ ; SSE2: cost of 5 {{.*}} %V8i32 = trunc
+ ; SSSE3: cost of 5 {{.*}} %V8i32 = trunc
+ ; SSE42: cost of 3 {{.*}} %V8i32 = trunc
+ ; AVX1: cost of 5 {{.*}} %V8i32 = trunc
+ ; AVX2: cost of 2 {{.*}} %V8i32 = trunc
+ ; AVX512: cost of 2 {{.*}} %V8i32 = trunc
+ %V8i32 = trunc <8 x i32> undef to <8 x i16>
+
+ ; SSE2: cost of 10 {{.*}} %V16i32 = trunc
+ ; SSSE3: cost of 10 {{.*}} %V16i32 = trunc
+ ; SSE42: cost of 6 {{.*}} %V16i32 = trunc
+ ; AVX1: cost of 6 {{.*}} %V16i32 = trunc
+ ; AVX2: cost of 6 {{.*}} %V16i32 = trunc
+ ; AVX512: cost of 1 {{.*}} %V16i32 = trunc
+ %V16i32 = trunc <16 x i32> undef to <16 x i16>
+
+ ret i32 undef
+}
+
+; CHECK-LABEL: 'trunc_vXi8'
+define i32 @trunc_vXi8() {
+ ; SSE: cost of 0 {{.*}} %V2i64 = trunc
+ ; AVX: cost of 0 {{.*}} %V2i64 = trunc
+ %V2i64 = trunc <2 x i64> undef to <2 x i8>
+
+ ; SSE: cost of 1 {{.*}} %V4i64 = trunc
+ ; AVX1: cost of 4 {{.*}} %V4i64 = trunc
+ ; AVX2: cost of 2 {{.*}} %V4i64 = trunc
+ ; AVX512: cost of 2 {{.*}} %V4i64 = trunc
+ %V4i64 = trunc <4 x i64> undef to <4 x i8>
+
+ ; SSE: cost of 3 {{.*}} %V8i64 = trunc
+ ; AVX: cost of 0 {{.*}} %V8i64 = trunc
+ %V8i64 = trunc <8 x i64> undef to <8 x i8>
+
+ ; SSE: cost of 0 {{.*}} %V2i32 = trunc
+ ; AVX: cost of 0 {{.*}} %V2i32 = trunc
+ %V2i32 = trunc <2 x i32> undef to <2 x i8>
+
+ ; SSE2: cost of 3 {{.*}} %V4i32 = trunc
+ ; SSSE3: cost of 3 {{.*}} %V4i32 = trunc
+ ; SSE42: cost of 1 {{.*}} %V4i32 = trunc
+ ; AVX: cost of 1 {{.*}} %V4i32 = trunc
+ %V4i32 = trunc <4 x i32> undef to <4 x i8>
+
+ ; SSE2: cost of 4 {{.*}} %V8i32 = trunc
+ ; SSSE3: cost of 4 {{.*}} %V8i32 = trunc
+ ; SSE42: cost of 3 {{.*}} %V8i32 = trunc
+ ; AVX1: cost of 4 {{.*}} %V8i32 = trunc
+ ; AVX2: cost of 2 {{.*}} %V8i32 = trunc
+ ; AVX512: cost of 2 {{.*}} %V8i32 = trunc
+ %V8i32 = trunc <8 x i32> undef to <8 x i8>
+
+ ; SSE: cost of 7 {{.*}} %V16i32 = trunc
+ ; AVX: cost of 7 {{.*}} %V16i32 = trunc
+ %V16i32 = trunc <16 x i32> undef to <16 x i8>
+
+ ; SSE: cost of 0 {{.*}} %V2i16 = trunc
+ ; AVX: cost of 0 {{.*}} %V2i16 = trunc
+ %V2i16 = trunc <2 x i16> undef to <2 x i8>
+
+ ; SSE2: cost of 4 {{.*}} %V4i16 = trunc
+ ; SSSE3: cost of 4 {{.*}} %V4i16 = trunc
+ ; SSE42: cost of 2 {{.*}} %V4i16 = trunc
+ ; AVX: cost of 2 {{.*}} %V4i16 = trunc
+ %V4i16 = trunc <4 x i16> undef to <4 x i8>
+
+ ; SSE2: cost of 2 {{.*}} %V8i16 = trunc
+ ; SSSE3: cost of 2 {{.*}} %V8i16 = trunc
+ ; SSE42: cost of 1 {{.*}} %V8i16 = trunc
+ ; AVX: cost of 1 {{.*}} %V8i16 = trunc
+ %V8i16 = trunc <8 x i16> undef to <8 x i8>
+
+ ; SSE: cost of 3 {{.*}} %V16i16 = trunc
+ ; AVX: cost of 4 {{.*}} %V16i16 = trunc
+ %V16i16 = trunc <16 x i16> undef to <16 x i8>
+
+ ; SSE: cost of 7 {{.*}} %V32i16 = trunc
+ ; AVX: cost of 9 {{.*}} %V32i16 = trunc
+ %V32i16 = trunc <32 x i16> undef to <32 x i8>
+
+ ret i32 undef
+}
diff --git a/test/Analysis/CostModel/X86/uitofp.ll b/test/Analysis/CostModel/X86/uitofp.ll
index 2eb8407974f7..a0b48c0b4501 100644
--- a/test/Analysis/CostModel/X86/uitofp.ll
+++ b/test/Analysis/CostModel/X86/uitofp.ll
@@ -1,709 +1,250 @@
; RUN: opt -mtriple=x86_64-apple-darwin -mattr=+sse2 -cost-model -analyze < %s | FileCheck --check-prefix=SSE --check-prefix=SSE2 %s
; RUN: opt -mtriple=x86_64-apple-darwin -mattr=+avx -cost-model -analyze < %s | FileCheck --check-prefix=AVX --check-prefix=AVX1 %s
; RUN: opt -mtriple=x86_64-apple-darwin -mattr=+avx2 -cost-model -analyze < %s | FileCheck --check-prefix=AVX --check-prefix=AVX2 %s
-; RUN: opt -mtriple=x86_64-apple-darwin -mattr=+avx512f -cost-model -analyze < %s | FileCheck --check-prefix=AVX512F %s
-; RUN: opt -mtriple=x86_64-apple-darwin -mattr=+avx512dq -cost-model -analyze < %s | FileCheck --check-prefix=AVX512DQ %s
-
-define <2 x double> @uitofpv2i8v2double(<2 x i8> %a) {
- ; SSE2-LABEL: uitofpv2i8v2double
- ; SSE2: cost of 20 {{.*}} uitofp
- ;
- ; AVX1-LABEL: uitofpv2i8v2double
- ; AVX1: cost of 4 {{.*}} uitofp
- ;
- ; AVX2-LABEL: uitofpv2i8v2double
- ; AVX2: cost of 4 {{.*}} uitofp
- ;
- ; AVX512F-LABEL: uitofpv2i8v2double
- ; AVX512F: cost of 2 {{.*}} uitofp
- %1 = uitofp <2 x i8> %a to <2 x double>
- ret <2 x double> %1
-}
-
-define <4 x double> @uitofpv4i8v4double(<4 x i8> %a) {
- ; SSE2-LABEL: uitofpv4i8v4double
- ; SSE2: cost of 40 {{.*}} uitofp
- ;
- ; AVX1-LABEL: uitofpv4i8v4double
- ; AVX1: cost of 2 {{.*}} uitofp
- ;
- ; AVX2-LABEL: uitofpv4i8v4double
- ; AVX2: cost of 2 {{.*}} uitofp
- ;
- ; AVX512F-LABEL: uitofpv4i8v4double
- ; AVX512F: cost of 2 {{.*}} uitofp
- %1 = uitofp <4 x i8> %a to <4 x double>
- ret <4 x double> %1
-}
-
-define <8 x double> @uitofpv8i8v8double(<8 x i8> %a) {
- ; SSE2-LABEL: uitofpv8i8v8double
- ; SSE2: cost of 80 {{.*}} uitofp
- ;
- ; AVX1-LABEL: uitofpv8i8v8double
- ; AVX1: cost of 5 {{.*}} uitofp
- ;
- ; AVX2-LABEL: uitofpv8i8v8double
- ; AVX2: cost of 5 {{.*}} uitofp
- ;
- ; AVX512F-LABEL: uitofpv8i8v8double
- ; AVX512F: cost of 2 {{.*}} uitofp
- %1 = uitofp <8 x i8> %a to <8 x double>
- ret <8 x double> %1
-}
-
-define <16 x double> @uitofpv16i8v16double(<16 x i8> %a) {
- ; SSE2-LABEL: uitofpv16i8v16double
- ; SSE2: cost of 160 {{.*}} uitofp
- ;
- ; AVX1-LABEL: uitofpv16i8v16double
- ; AVX1: cost of 11 {{.*}} uitofp
- ;
- ; AVX2-LABEL: uitofpv16i8v16double
- ; AVX2: cost of 11 {{.*}} uitofp
- ;
- ; AVX512F-LABEL: uitofpv16i8v16double
- ; AVX512F: cost of 5 {{.*}} uitofp
- %1 = uitofp <16 x i8> %a to <16 x double>
- ret <16 x double> %1
-}
-
-define <32 x double> @uitofpv32i8v32double(<32 x i8> %a) {
- ; SSE2-LABEL: uitofpv32i8v32double
- ; SSE2: cost of 320 {{.*}} uitofp
- ;
- ; AVX1-LABEL: uitofpv32i8v32double
- ; AVX1: cost of 23 {{.*}} uitofp
- ;
- ; AVX2-LABEL: uitofpv32i8v32double
- ; AVX2: cost of 23 {{.*}} uitofp
- ;
- ; AVX512F-LABEL: uitofpv32i8v32double
- ; AVX512F: cost of 11 {{.*}} uitofp
- %1 = uitofp <32 x i8> %a to <32 x double>
- ret <32 x double> %1
-}
-
-define <2 x double> @uitofpv2i16v2double(<2 x i16> %a) {
- ; SSE2-LABEL: uitofpv2i16v2double
- ; SSE2: cost of 20 {{.*}} uitofp
- ;
- ; AVX1-LABEL: uitofpv2i16v2double
- ; AVX1: cost of 4 {{.*}} uitofp
- ;
- ; AVX2-LABEL: uitofpv2i16v2double
- ; AVX2: cost of 4 {{.*}} uitofp
- ;
- ; AVX512F-LABEL: uitofpv2i16v2double
- ; AVX512F: cost of 5 {{.*}} uitofp
- %1 = uitofp <2 x i16> %a to <2 x double>
- ret <2 x double> %1
-}
-
-define <4 x double> @uitofpv4i16v4double(<4 x i16> %a) {
- ; SSE2-LABEL: uitofpv4i16v4double
- ; SSE2: cost of 40 {{.*}} uitofp
- ;
- ; AVX1-LABEL: uitofpv4i16v4double
- ; AVX1: cost of 2 {{.*}} uitofp
- ;
- ; AVX2-LABEL: uitofpv4i16v4double
- ; AVX2: cost of 2 {{.*}} uitofp
- ;
- ; AVX512F-LABEL: uitofpv4i16v4double
- ; AVX512F: cost of 2 {{.*}} uitofp
- %1 = uitofp <4 x i16> %a to <4 x double>
- ret <4 x double> %1
-}
-
-define <8 x double> @uitofpv8i16v8double(<8 x i16> %a) {
- ; SSE2-LABEL: uitofpv8i16v8double
- ; SSE2: cost of 80 {{.*}} uitofp
- ;
- ; AVX1-LABEL: uitofpv8i16v8double
- ; AVX1: cost of 5 {{.*}} uitofp
- ;
- ; AVX2-LABEL: uitofpv8i16v8double
- ; AVX2: cost of 5 {{.*}} uitofp
- ;
- ; AVX512F-LABEL: uitofpv8i16v8double
- ; AVX512F: cost of 2 {{.*}} uitofp
- %1 = uitofp <8 x i16> %a to <8 x double>
- ret <8 x double> %1
-}
-
-define <16 x double> @uitofpv16i16v16double(<16 x i16> %a) {
- ; SSE2-LABEL: uitofpv16i16v16double
- ; SSE2: cost of 160 {{.*}} uitofp
- ;
- ; AVX1-LABEL: uitofpv16i16v16double
- ; AVX1: cost of 11 {{.*}} uitofp
- ;
- ; AVX2-LABEL: uitofpv16i16v16double
- ; AVX2: cost of 11 {{.*}} uitofp
- ;
- ; AVX512F-LABEL: uitofpv16i16v16double
- ; AVX512F: cost of 5 {{.*}} uitofp
- %1 = uitofp <16 x i16> %a to <16 x double>
- ret <16 x double> %1
-}
-
-define <32 x double> @uitofpv32i16v32double(<32 x i16> %a) {
- ; SSE2-LABEL: uitofpv32i16v32double
- ; SSE2: cost of 320 {{.*}} uitofp
- ;
- ; AVX1-LABEL: uitofpv32i16v32double
- ; AVX1: cost of 23 {{.*}} uitofp
- ;
- ; AVX2-LABEL: uitofpv32i16v32double
- ; AVX2: cost of 23 {{.*}} uitofp
- ;
- ; AVX512F-LABEL: uitofpv32i16v32double
- ; AVX512F: cost of 11 {{.*}} uitofp
- %1 = uitofp <32 x i16> %a to <32 x double>
- ret <32 x double> %1
-}
-
-define <2 x double> @uitofpv2i32v2double(<2 x i32> %a) {
- ; SSE2-LABEL: uitofpv2i32v2double
- ; SSE2: cost of 20 {{.*}} uitofp
- ;
- ; AVX1-LABEL: uitofpv2i32v2double
- ; AVX1: cost of 6 {{.*}} uitofp
- ;
- ; AVX2-LABEL: uitofpv2i32v2double
- ; AVX2: cost of 6 {{.*}} uitofp
- ;
- ; AVX512F-LABEL: uitofpv2i32v2double
- ; AVX512F: cost of 1 {{.*}} uitofp
- %1 = uitofp <2 x i32> %a to <2 x double>
- ret <2 x double> %1
-}
-
-define <4 x double> @uitofpv4i32v4double(<4 x i32> %a) {
- ; SSE2-LABEL: uitofpv4i32v4double
- ; SSE2: cost of 40 {{.*}} uitofp
- ;
- ; AVX1-LABEL: uitofpv4i32v4double
- ; AVX1: cost of 6 {{.*}} uitofp
- ;
- ; AVX2-LABEL: uitofpv4i32v4double
- ; AVX2: cost of 6 {{.*}} uitofp
- ;
- ; AVX512F-LABEL: uitofpv4i32v4double
- ; AVX512F: cost of 1 {{.*}} uitofp
- %1 = uitofp <4 x i32> %a to <4 x double>
- ret <4 x double> %1
-}
-
-define <8 x double> @uitofpv8i32v8double(<8 x i32> %a) {
- ; SSE2-LABEL: uitofpv8i32v8double
- ; SSE2: cost of 80 {{.*}} uitofp
- ;
- ; AVX1-LABEL: uitofpv8i32v8double
- ; AVX1: cost of 13 {{.*}} uitofp
- ;
- ; AVX2-LABEL: uitofpv8i32v8double
- ; AVX2: cost of 13 {{.*}} uitofp
- ;
- ; AVX512F-LABEL: uitofpv8i32v8double
- ; AVX512F: cost of 1 {{.*}} uitofp
- %1 = uitofp <8 x i32> %a to <8 x double>
- ret <8 x double> %1
-}
-
-define <16 x double> @uitofpv16i32v16double(<16 x i32> %a) {
- ; SSE2-LABEL: uitofpv16i32v16double
- ; SSE2: cost of 160 {{.*}} uitofp
- ;
- ; AVX1-LABEL: uitofpv16i32v16double
- ; AVX1: cost of 27 {{.*}} uitofp
- ;
- ; AVX2-LABEL: uitofpv16i32v16double
- ; AVX2: cost of 27 {{.*}} uitofp
- ;
- ; AVX512F-LABEL: uitofpv16i32v16double
- ; AVX512F: cost of 3 {{.*}} uitofp
- %1 = uitofp <16 x i32> %a to <16 x double>
- ret <16 x double> %1
-}
-
-define <32 x double> @uitofpv32i32v32double(<32 x i32> %a) {
- ; SSE2-LABEL: uitofpv32i32v32double
- ; SSE2: cost of 320 {{.*}} uitofp
- ;
- ; AVX1-LABEL: uitofpv32i32v32double
- ; AVX1: cost of 55 {{.*}} uitofp
- ;
- ; AVX2-LABEL: uitofpv32i32v32double
- ; AVX2: cost of 55 {{.*}} uitofp
- ;
- ; AVX512F-LABEL: uitofpv32i32v32double
- ; AVX512F: cost of 7 {{.*}} uitofp
- %1 = uitofp <32 x i32> %a to <32 x double>
- ret <32 x double> %1
-}
-
-define <2 x double> @uitofpv2i64v2double(<2 x i64> %a) {
- ; SSE2-LABEL: uitofpv2i64v2double
- ; SSE2: cost of 20 {{.*}} uitofp
- ;
- ; AVX1-LABEL: uitofpv2i64v2double
- ; AVX1: cost of 10 {{.*}} uitofp
- ;
- ; AVX2-LABEL: uitofpv2i64v2double
- ; AVX2: cost of 10 {{.*}} uitofp
- ;
- ; AVX512F-LABEL: uitofpv2i64v2double
- ; AVX512F: cost of 5 {{.*}} uitofp
- ;
- ; AVX512DQ-LABEL: uitofpv2i64v2double
- ; AVX512DQ: cost of 1 {{.*}} uitofp
- %1 = uitofp <2 x i64> %a to <2 x double>
- ret <2 x double> %1
-}
-
-define <4 x double> @uitofpv4i64v4double(<4 x i64> %a) {
- ; SSE2-LABEL: uitofpv4i64v4double
- ; SSE2: cost of 40 {{.*}} uitofp
- ;
- ; AVX1-LABEL: uitofpv4i64v4double
- ; AVX1: cost of 20 {{.*}} uitofp
- ;
- ; AVX2-LABEL: uitofpv4i64v4double
- ; AVX2: cost of 20 {{.*}} uitofp
- ;
- ; AVX512F-LABEL: uitofpv4i64v4double
- ; AVX512F: cost of 12 {{.*}} uitofp
- ;
- ; AVX512DQ-LABEL: uitofpv4i64v4double
- ; AVX512DQ: cost of 1 {{.*}} uitofp
- %1 = uitofp <4 x i64> %a to <4 x double>
- ret <4 x double> %1
-}
-
-define <8 x double> @uitofpv8i64v8double(<8 x i64> %a) {
- ; SSE2-LABEL: uitofpv8i64v8double
- ; SSE2: cost of 80 {{.*}} uitofp
- ;
- ; AVX1-LABEL: uitofpv8i64v8double
- ; AVX1: cost of 41 {{.*}} uitofp
- ;
- ; AVX2-LABEL: uitofpv8i64v8double
- ; AVX2: cost of 41 {{.*}} uitofp
- ;
- ; AVX512F-LABEL: uitofpv8i64v8double
- ; AVX512F: cost of 26 {{.*}} uitofp
- ;
- ; AVX512DQ-LABEL: uitofpv8i64v8double
- ; AVX512DQ: cost of 1 {{.*}} uitofp
- %1 = uitofp <8 x i64> %a to <8 x double>
- ret <8 x double> %1
-}
-
-define <16 x double> @uitofpv16i64v16double(<16 x i64> %a) {
- ; SSE2-LABEL: uitofpv16i64v16double
- ; SSE2: cost of 160 {{.*}} uitofp
- ;
- ; AVX1-LABEL: uitofpv16i64v16double
- ; AVX1: cost of 83 {{.*}} uitofp
- ;
- ; AVX2-LABEL: uitofpv16i64v16double
- ; AVX2: cost of 83 {{.*}} uitofp
- ;
- ; AVX512F-LABEL: uitofpv16i64v16double
- ; AVX512F: cost of 53 {{.*}} uitofp
- ;
- ; AVX512DQ-LABEL: uitofpv16i64v16double
- ; AVX512DQ: cost of 3 {{.*}} uitofp
- %1 = uitofp <16 x i64> %a to <16 x double>
- ret <16 x double> %1
-}
-
-define <32 x double> @uitofpv32i64v32double(<32 x i64> %a) {
- ; SSE2-LABEL: uitofpv32i64v32double
- ; SSE2: cost of 320 {{.*}} uitofp
- ;
- ; AVX1-LABEL: uitofpv32i64v32double
- ; AVX1: cost of 167 {{.*}} uitofp
- ;
- ; AVX2-LABEL: uitofpv32i64v32double
- ; AVX2: cost of 167 {{.*}} uitofp
- ;
- ; AVX512F-LABEL: uitofpv32i64v32double
- ; AVX512F: cost of 107 {{.*}} uitofp
- ;
- ; AVX512DQ-LABEL: uitofpv32i64v32double
- ; AVX512DQ: cost of 2 {{.*}} uitofp
- %1 = uitofp <32 x i64> %a to <32 x double>
- ret <32 x double> %1
-}
-
-define <2 x float> @uitofpv2i8v2float(<2 x i8> %a) {
- ; SSE2-LABEL: uitofpv2i8v2float
- ; SSE2: cost of 15 {{.*}} uitofp
- ;
- ; AVX1-LABEL: uitofpv2i8v2float
- ; AVX1: cost of 4 {{.*}} uitofp
- ;
- ; AVX2-LABEL: uitofpv2i8v2float
- ; AVX2: cost of 4 {{.*}} uitofp
- ;
- ; AVX512F-LABEL: uitofpv2i8v2float
- ; AVX512F: cost of 4 {{.*}} uitofp
- %1 = uitofp <2 x i8> %a to <2 x float>
- ret <2 x float> %1
-}
-
-define <4 x float> @uitofpv4i8v4float(<4 x i8> %a) {
- ; SSE2-LABEL: uitofpv4i8v4float
- ; SSE2: cost of 8 {{.*}} uitofp
- ;
- ; AVX1-LABEL: uitofpv4i8v4float
- ; AVX1: cost of 2 {{.*}} uitofp
- ;
- ; AVX2-LABEL: uitofpv4i8v4float
- ; AVX2: cost of 2 {{.*}} uitofp
- ;
- ; AVX512F-LABEL: uitofpv4i8v4float
- ; AVX512F: cost of 2 {{.*}} uitofp
- %1 = uitofp <4 x i8> %a to <4 x float>
- ret <4 x float> %1
-}
-
-define <8 x float> @uitofpv8i8v8float(<8 x i8> %a) {
- ; SSE2-LABEL: uitofpv8i8v8float
- ; SSE2: cost of 15 {{.*}} uitofp
- ;
- ; AVX1-LABEL: uitofpv8i8v8float
- ; AVX1: cost of 5 {{.*}} uitofp
- ;
- ; AVX2-LABEL: uitofpv8i8v8float
- ; AVX2: cost of 5 {{.*}} uitofp
- ;
- ; AVX512F-LABEL: uitofpv8i8v8float
- ; AVX512F: cost of 2 {{.*}} uitofp
- %1 = uitofp <8 x i8> %a to <8 x float>
- ret <8 x float> %1
-}
-
-define <16 x float> @uitofpv16i8v16float(<16 x i8> %a) {
- ; SSE2-LABEL: uitofpv16i8v16float
- ; SSE2: cost of 8 {{.*}} uitofp
- ;
- ; AVX1-LABEL: uitofpv16i8v16float
- ; AVX1: cost of 11 {{.*}} uitofp
- ;
- ; AVX2-LABEL: uitofpv16i8v16float
- ; AVX2: cost of 11 {{.*}} uitofp
- ;
- ; AVX512F-LABEL: uitofpv16i8v16float
- ; AVX512F: cost of 2 {{.*}} uitofp
- %1 = uitofp <16 x i8> %a to <16 x float>
- ret <16 x float> %1
-}
-
-define <32 x float> @uitofpv32i8v32float(<32 x i8> %a) {
- ; SSE2-LABEL: uitofpv32i8v32float
- ; SSE2: cost of 16 {{.*}} uitofp
- ;
- ; AVX1-LABEL: uitofpv32i8v32float
- ; AVX1: cost of 23 {{.*}} uitofp
- ;
- ; AVX2-LABEL: uitofpv32i8v32float
- ; AVX2: cost of 23 {{.*}} uitofp
- ;
- ; AVX512F-LABEL: uitofpv32i8v32float
- ; AVX512F: cost of 5 {{.*}} uitofp
- %1 = uitofp <32 x i8> %a to <32 x float>
- ret <32 x float> %1
-}
-
-define <2 x float> @uitofpv2i16v2float(<2 x i16> %a) {
- ; SSE2-LABEL: uitofpv2i16v2float
- ; SSE2: cost of 15 {{.*}} uitofp
- ;
- ; AVX1-LABEL: uitofpv2i16v2float
- ; AVX1: cost of 4 {{.*}} uitofp
- ;
- ; AVX2-LABEL: uitofpv2i16v2float
- ; AVX2: cost of 4 {{.*}} uitofp
- ;
- ; AVX512F-LABEL: uitofpv2i16v2float
- ; AVX512F: cost of 4 {{.*}} uitofp
- %1 = uitofp <2 x i16> %a to <2 x float>
- ret <2 x float> %1
-}
-
-define <4 x float> @uitofpv4i16v4float(<4 x i16> %a) {
- ; SSE2-LABEL: uitofpv4i16v4float
- ; SSE2: cost of 8 {{.*}} uitofp
- ;
- ; AVX1-LABEL: uitofpv4i16v4float
- ; AVX1: cost of 2 {{.*}} uitofp
- ;
- ; AVX2-LABEL: uitofpv4i16v4float
- ; AVX2: cost of 2 {{.*}} uitofp
- ;
- ; AVX512F-LABEL: uitofpv4i16v4float
- ; AVX512F: cost of 2 {{.*}} uitofp
- %1 = uitofp <4 x i16> %a to <4 x float>
- ret <4 x float> %1
-}
-
-define <8 x float> @uitofpv8i16v8float(<8 x i16> %a) {
- ; SSE2-LABEL: uitofpv8i16v8float
- ; SSE2: cost of 15 {{.*}} uitofp
- ;
- ; AVX1-LABEL: uitofpv8i16v8float
- ; AVX1: cost of 5 {{.*}} uitofp
- ;
- ; AVX2-LABEL: uitofpv8i16v8float
- ; AVX2: cost of 5 {{.*}} uitofp
- ;
- ; AVX512F-LABEL: uitofpv8i16v8float
- ; AVX512F: cost of 2 {{.*}} uitofp
- %1 = uitofp <8 x i16> %a to <8 x float>
- ret <8 x float> %1
-}
-
-define <16 x float> @uitofpv16i16v16float(<16 x i16> %a) {
- ; SSE2-LABEL: uitofpv16i16v16float
- ; SSE2: cost of 30 {{.*}} uitofp
- ;
- ; AVX1-LABEL: uitofpv16i16v16float
- ; AVX1: cost of 11 {{.*}} uitofp
- ;
- ; AVX2-LABEL: uitofpv16i16v16float
- ; AVX2: cost of 11 {{.*}} uitofp
- ;
- ; AVX512F-LABEL: uitofpv16i16v16float
- ; AVX512F: cost of 2 {{.*}} uitofp
- %1 = uitofp <16 x i16> %a to <16 x float>
- ret <16 x float> %1
-}
-
-define <32 x float> @uitofpv32i16v32float(<32 x i16> %a) {
- ; SSE2-LABEL: uitofpv32i16v32float
- ; SSE2: cost of 60 {{.*}} uitofp
- ;
- ; AVX1-LABEL: uitofpv32i16v32float
- ; AVX1: cost of 23 {{.*}} uitofp
- ;
- ; AVX2-LABEL: uitofpv32i16v32float
- ; AVX2: cost of 23 {{.*}} uitofp
- ;
- ; AVX512F-LABEL: uitofpv32i16v32float
- ; AVX512F: cost of 5 {{.*}} uitofp
- %1 = uitofp <32 x i16> %a to <32 x float>
- ret <32 x float> %1
-}
-
-define <2 x float> @uitofpv2i32v2float(<2 x i32> %a) {
- ; SSE2-LABEL: uitofpv2i32v2float
- ; SSE2: cost of 15 {{.*}} uitofp
- ;
- ; AVX1-LABEL: uitofpv2i32v2float
- ; AVX1: cost of 4 {{.*}} uitofp
- ;
- ; AVX2-LABEL: uitofpv2i32v2float
- ; AVX2: cost of 4 {{.*}} uitofp
- ;
- ; AVX512F-LABEL: uitofpv2i32v2float
- ; AVX512F: cost of 2 {{.*}} uitofp
- %1 = uitofp <2 x i32> %a to <2 x float>
- ret <2 x float> %1
-}
-
-define <4 x float> @uitofpv4i32v4float(<4 x i32> %a) {
- ; SSE2-LABEL: uitofpv4i32v4float
- ; SSE2: cost of 8 {{.*}} uitofp
- ;
- ; AVX1-LABEL: uitofpv4i32v4float
- ; AVX1: cost of 6 {{.*}} uitofp
- ;
- ; AVX2-LABEL: uitofpv4i32v4float
- ; AVX2: cost of 6 {{.*}} uitofp
- ;
- ; AVX512F-LABEL: uitofpv4i32v4float
- ; AVX512F: cost of 1 {{.*}} uitofp
- %1 = uitofp <4 x i32> %a to <4 x float>
- ret <4 x float> %1
-}
-
-define <8 x float> @uitofpv8i32v8float(<8 x i32> %a) {
- ; SSE2-LABEL: uitofpv8i32v8float
- ; SSE2: cost of 16 {{.*}} uitofp
- ;
- ; AVX1-LABEL: uitofpv8i32v8float
- ; AVX1: cost of 9 {{.*}} uitofp
- ;
- ; AVX2-LABEL: uitofpv8i32v8float
- ; AVX2: cost of 8 {{.*}} uitofp
- ;
- ; AVX512F-LABEL: uitofpv8i32v8float
- ; AVX512F: cost of 1 {{.*}} uitofp
- %1 = uitofp <8 x i32> %a to <8 x float>
- ret <8 x float> %1
-}
-
-define <16 x float> @uitofpv16i32v16float(<16 x i32> %a) {
- ; SSE2-LABEL: uitofpv16i32v16float
- ; SSE2: cost of 32 {{.*}} uitofp
- ;
- ; AVX1-LABEL: uitofpv16i32v16float
- ; AVX1: cost of 19 {{.*}} uitofp
- ;
- ; AVX2-LABEL: uitofpv16i32v16float
- ; AVX2: cost of 17 {{.*}} uitofp
- ;
- ; AVX512F-LABEL: uitofpv16i32v16float
- ; AVX512F: cost of 1 {{.*}} uitofp
- %1 = uitofp <16 x i32> %a to <16 x float>
- ret <16 x float> %1
-}
-
-define <32 x float> @uitofpv32i32v32float(<32 x i32> %a) {
- ; SSE2-LABEL: uitofpv32i32v32float
- ; SSE2: cost of 64 {{.*}} uitofp
- ;
- ; AVX1-LABEL: uitofpv32i32v32float
- ; AVX1: cost of 39 {{.*}} uitofp
- ;
- ; AVX2-LABEL: uitofpv32i32v32float
- ; AVX2: cost of 35 {{.*}} uitofp
- ;
- ; AVX512F-LABEL: uitofpv32i32v32float
- ; AVX512F: cost of 3 {{.*}} uitofp
- %1 = uitofp <32 x i32> %a to <32 x float>
- ret <32 x float> %1
-}
-
-define <2 x float> @uitofpv2i64v2float(<2 x i64> %a) {
- ; SSE2-LABEL: uitofpv2i64v2float
- ; SSE2: cost of 15 {{.*}} uitofp
- ;
- ; AVX1-LABEL: uitofpv2i64v2float
- ; AVX1: cost of 4 {{.*}} uitofp
- ;
- ; AVX2-LABEL: uitofpv2i64v2float
- ; AVX2: cost of 4 {{.*}} uitofp
- ;
- ; AVX512F-LABEL: uitofpv2i64v2float
- ; AVX512F: cost of 5 {{.*}} uitofp
- %1 = uitofp <2 x i64> %a to <2 x float>
- ret <2 x float> %1
-}
-
-define <4 x float> @uitofpv4i64v4float(<4 x i64> %a) {
- ; SSE2-LABEL: uitofpv4i64v4float
- ; SSE2: cost of 30 {{.*}} uitofp
- ;
- ; AVX1-LABEL: uitofpv4i64v4float
- ; AVX1: cost of 10 {{.*}} uitofp
- ;
- ; AVX2-LABEL: uitofpv4i64v4float
- ; AVX2: cost of 10 {{.*}} uitofp
- ;
- ; AVX512F-LABEL: uitofpv4i64v4float
- ; AVX512F: cost of 10 {{.*}} uitofp
- %1 = uitofp <4 x i64> %a to <4 x float>
- ret <4 x float> %1
-}
-
-define <8 x float> @uitofpv8i64v8float(<8 x i64> %a) {
- ; SSE2-LABEL: uitofpv8i64v8float
- ; SSE2: cost of 60 {{.*}} uitofp
- ;
- ; AVX1-LABEL: uitofpv8i64v8float
- ; AVX1: cost of 21 {{.*}} uitofp
- ;
- ; AVX2-LABEL: uitofpv8i64v8float
- ; AVX2: cost of 21 {{.*}} uitofp
- ;
- ; AVX512F-LABEL: uitofpv8i64v8float
- ; AVX512F: cost of 26 {{.*}} uitofp
- %1 = uitofp <8 x i64> %a to <8 x float>
- ret <8 x float> %1
-}
-
-define <16 x float> @uitofpv16i64v16float(<16 x i64> %a) {
- ; SSE2-LABEL: uitofpv16i64v16float
- ; SSE2: cost of 120 {{.*}} uitofp
- ;
- ; AVX1-LABEL: uitofpv16i64v16float
- ; AVX1: cost of 43 {{.*}} uitofp
- ;
- ; AVX2-LABEL: uitofpv16i64v16float
- ; AVX2: cost of 43 {{.*}} uitofp
- ;
- ; AVX512F-LABEL: uitofpv16i64v16float
- ; AVX512F: cost of 53 {{.*}} uitofp
- %1 = uitofp <16 x i64> %a to <16 x float>
- ret <16 x float> %1
-}
-
-define <32 x float> @uitofpv32i64v32float(<32 x i64> %a) {
- ; SSE2-LABEL: uitofpv32i64v32float
- ; SSE2: cost of 240 {{.*}} uitofp
- ;
- ; AVX1-LABEL: uitofpv32i64v32float
- ; AVX1: cost of 87 {{.*}} uitofp
- ;
- ; AVX2-LABEL: uitofpv32i64v32float
- ; AVX2: cost of 87 {{.*}} uitofp
- ;
- ; AVX512F-LABEL: uitofpv32i64v32float
- ; AVX512F: cost of 107 {{.*}} uitofp
- %1 = uitofp <32 x i64> %a to <32 x float>
- ret <32 x float> %1
-}
-
-define <8 x i32> @fptouiv8f32v8i32(<8 x float> %a) {
- ; AVX512F-LABEL: fptouiv8f32v8i32
- ; AVX512F: cost of 1 {{.*}} fptoui
- %1 = fptoui <8 x float> %a to <8 x i32>
- ret <8 x i32> %1
-}
-
-define <4 x i32> @fptouiv4f32v4i32(<4 x float> %a) {
- ; AVX512F-LABEL: fptouiv4f32v4i32
- ; AVX512F: cost of 1 {{.*}} fptoui
- %1 = fptoui <4 x float> %a to <4 x i32>
- ret <4 x i32> %1
-}
-
-define <2 x i32> @fptouiv2f32v2i32(<2 x float> %a) {
- ; AVX512F-LABEL: fptouiv2f32v2i32
- ; AVX512F: cost of 1 {{.*}} fptoui
- %1 = fptoui <2 x float> %a to <2 x i32>
- ret <2 x i32> %1
-}
-
-define <16 x i32> @fptouiv16f32v16i32(<16 x float> %a) {
- ; AVX512F-LABEL: fptouiv16f32v16i32
- ; AVX512F: cost of 1 {{.*}} fptoui
- %1 = fptoui <16 x float> %a to <16 x i32>
- ret <16 x i32> %1
-}
-
-define <8 x i64> @fptouiv8f32v8i64(<8 x float> %a) {
- ; AVX512DQ-LABEL: fptouiv8f32v8i64
- ; AVX512DQ: cost of 1 {{.*}} fptoui
- %1 = fptoui <8 x float> %a to <8 x i64>
- ret <8 x i64> %1
-}
-
-define <4 x i64> @fptouiv4f32v4i64(<4 x float> %a) {
- ; AVX512DQ-LABEL: fptouiv4f32v4i64
- ; AVX512DQ: cost of 1 {{.*}} fptoui
- %1 = fptoui <4 x float> %a to <4 x i64>
- ret <4 x i64> %1
-}
-
-define <2 x i64> @fptouiv2f32v2i64(<2 x float> %a) {
- ; AVX512DQ-LABEL: fptouiv2f32v2i64
- ; AVX512DQ: cost of 1 {{.*}} fptoui
- %1 = fptoui <2 x float> %a to <2 x i64>
- ret <2 x i64> %1
+; RUN: opt -mtriple=x86_64-apple-darwin -mattr=+avx512f -cost-model -analyze < %s | FileCheck --check-prefix=AVX512 --check-prefix=AVX512F %s
+; RUN: opt -mtriple=x86_64-apple-darwin -mattr=+avx512dq -cost-model -analyze < %s | FileCheck --check-prefix=AVX512 --check-prefix=AVX512DQ %s
+
+; CHECK-LABEL: 'uitofp_i8_double'
+define i32 @uitofp_i8_double() {
+ ; SSE2: cost of 1 {{.*}} uitofp i8
+ ; AVX1: cost of 1 {{.*}} uitofp i8
+ ; AVX2: cost of 1 {{.*}} uitofp i8
+ ; AVX512: cost of 1 {{.*}} uitofp i8
+ %cvt_i8_f64 = uitofp i8 undef to double
+
+ ; SSE2: cost of 20 {{.*}} uitofp <2 x i8>
+ ; AVX1: cost of 4 {{.*}} uitofp <2 x i8>
+ ; AVX2: cost of 4 {{.*}} uitofp <2 x i8>
+ ; AVX512: cost of 2 {{.*}} uitofp <2 x i8>
+ %cvt_v2i8_v2f64 = uitofp <2 x i8> undef to <2 x double>
+
+ ; SSE2: cost of 40 {{.*}} uitofp <4 x i8>
+ ; AVX1: cost of 2 {{.*}} uitofp <4 x i8>
+ ; AVX2: cost of 2 {{.*}} uitofp <4 x i8>
+ ; AVX512: cost of 2 {{.*}} uitofp <4 x i8>
+ %cvt_v4i8_v4f64 = uitofp <4 x i8> undef to <4 x double>
+
+ ; SSE2: cost of 80 {{.*}} uitofp <8 x i8>
+ ; AVX1: cost of 5 {{.*}} uitofp <8 x i8>
+ ; AVX2: cost of 5 {{.*}} uitofp <8 x i8>
+ ; AVX512: cost of 2 {{.*}} uitofp <8 x i8>
+ %cvt_v8i8_v8f64 = uitofp <8 x i8> undef to <8 x double>
+
+ ret i32 undef
+}
+
+; CHECK-LABEL: 'uitofp_i16_double'
+define i32 @uitofp_i16_double() {
+ ; SSE2: cost of 1 {{.*}} uitofp i16
+ ; AVX1: cost of 1 {{.*}} uitofp i16
+ ; AVX2: cost of 1 {{.*}} uitofp i16
+ ; AVX512: cost of 1 {{.*}} uitofp i16
+ %cvt_i16_f64 = uitofp i16 undef to double
+
+ ; SSE2: cost of 20 {{.*}} uitofp <2 x i16>
+ ; AVX1: cost of 4 {{.*}} uitofp <2 x i16>
+ ; AVX2: cost of 4 {{.*}} uitofp <2 x i16>
+ ; AVX512: cost of 5 {{.*}} uitofp <2 x i16>
+ %cvt_v2i16_v2f64 = uitofp <2 x i16> undef to <2 x double>
+
+ ; SSE2: cost of 40 {{.*}} uitofp <4 x i16>
+ ; AVX1: cost of 2 {{.*}} uitofp <4 x i16>
+ ; AVX2: cost of 2 {{.*}} uitofp <4 x i16>
+ ; AVX512: cost of 2 {{.*}} uitofp <4 x i16>
+ %cvt_v4i16_v4f64 = uitofp <4 x i16> undef to <4 x double>
+
+ ; SSE2: cost of 80 {{.*}} uitofp <8 x i16>
+ ; AVX1: cost of 5 {{.*}} uitofp <8 x i16>
+ ; AVX2: cost of 5 {{.*}} uitofp <8 x i16>
+ ; AVX512: cost of 2 {{.*}} uitofp <8 x i16>
+ %cvt_v8i16_v8f64 = uitofp <8 x i16> undef to <8 x double>
+
+ ret i32 undef
+}
+
+; CHECK-LABEL: 'uitofp_i32_double'
+define i32 @uitofp_i32_double() {
+ ; SSE2: cost of 1 {{.*}} uitofp i32
+ ; AVX1: cost of 1 {{.*}} uitofp i32
+ ; AVX2: cost of 1 {{.*}} uitofp i32
+ ; AVX512: cost of 1 {{.*}} uitofp i32
+ %cvt_i32_f64 = uitofp i32 undef to double
+
+ ; SSE2: cost of 20 {{.*}} uitofp <2 x i32>
+ ; AVX1: cost of 6 {{.*}} uitofp <2 x i32>
+ ; AVX2: cost of 6 {{.*}} uitofp <2 x i32>
+ ; AVX512: cost of 1 {{.*}} uitofp <2 x i32>
+ %cvt_v2i32_v2f64 = uitofp <2 x i32> undef to <2 x double>
+
+ ; SSE2: cost of 40 {{.*}} uitofp <4 x i32>
+ ; AVX1: cost of 6 {{.*}} uitofp <4 x i32>
+ ; AVX2: cost of 6 {{.*}} uitofp <4 x i32>
+ ; AVX512: cost of 1 {{.*}} uitofp <4 x i32>
+ %cvt_v4i32_v4f64 = uitofp <4 x i32> undef to <4 x double>
+
+ ; SSE2: cost of 80 {{.*}} uitofp <8 x i32>
+ ; AVX1: cost of 13 {{.*}} uitofp <8 x i32>
+ ; AVX2: cost of 13 {{.*}} uitofp <8 x i32>
+ ; AVX512: cost of 1 {{.*}} uitofp <8 x i32>
+ %cvt_v8i32_v8f64 = uitofp <8 x i32> undef to <8 x double>
+
+ ret i32 undef
+}
+
+; CHECK-LABEL: 'uitofp_i64_double'
+define i32 @uitofp_i64_double() {
+ ; SSE2: cost of 1 {{.*}} uitofp i64
+ ; AVX1: cost of 1 {{.*}} uitofp i64
+ ; AVX2: cost of 1 {{.*}} uitofp i64
+ ; AVX512: cost of 1 {{.*}} uitofp i64
+ %cvt_i64_f64 = uitofp i64 undef to double
+
+ ; SSE2: cost of 20 {{.*}} uitofp <2 x i64>
+ ; AVX1: cost of 10 {{.*}} uitofp <2 x i64>
+ ; AVX2: cost of 10 {{.*}} uitofp <2 x i64>
+ ; AVX512F: cost of 5 {{.*}} uitofp <2 x i64>
+ ; AVX512DQ: cost of 1 {{.*}} uitofp <2 x i64>
+ %cvt_v2i64_v2f64 = uitofp <2 x i64> undef to <2 x double>
+
+ ; SSE2: cost of 40 {{.*}} uitofp <4 x i64>
+ ; AVX1: cost of 20 {{.*}} uitofp <4 x i64>
+ ; AVX2: cost of 20 {{.*}} uitofp <4 x i64>
+ ; AVX512F: cost of 12 {{.*}} uitofp <4 x i64>
+ ; AVX512DQ: cost of 1 {{.*}} uitofp <4 x i64>
+ %cvt_v4i64_v4f64 = uitofp <4 x i64> undef to <4 x double>
+
+ ; SSE2: cost of 80 {{.*}} uitofp <8 x i64>
+ ; AVX1: cost of 41 {{.*}} uitofp <8 x i64>
+ ; AVX2: cost of 41 {{.*}} uitofp <8 x i64>
+ ; AVX512F: cost of 26 {{.*}} uitofp <8 x i64>
+ ; AVX512DQ: cost of 1 {{.*}} uitofp <8 x i64>
+ %cvt_v8i64_v8f64 = uitofp <8 x i64> undef to <8 x double>
+
+ ret i32 undef
+}
+
+; CHECK-LABEL: 'uitofp_i8_float'
+define i32 @uitofp_i8_float() {
+ ; SSE2: cost of 1 {{.*}} uitofp i8
+ ; AVX1: cost of 1 {{.*}} uitofp i8
+ ; AVX2: cost of 1 {{.*}} uitofp i8
+ ; AVX512: cost of 1 {{.*}} uitofp i8
+ %cvt_i8_f32 = uitofp i8 undef to float
+
+ ; SSE2: cost of 8 {{.*}} uitofp <4 x i8>
+ ; AVX1: cost of 2 {{.*}} uitofp <4 x i8>
+ ; AVX2: cost of 2 {{.*}} uitofp <4 x i8>
+ ; AVX512: cost of 2 {{.*}} uitofp <4 x i8>
+ %cvt_v4i8_v4f32 = uitofp <4 x i8> undef to <4 x float>
+
+ ; SSE2: cost of 15 {{.*}} uitofp <8 x i8>
+ ; AVX1: cost of 5 {{.*}} uitofp <8 x i8>
+ ; AVX2: cost of 5 {{.*}} uitofp <8 x i8>
+ ; AVX512: cost of 2 {{.*}} uitofp <8 x i8>
+ %cvt_v8i8_v8f32 = uitofp <8 x i8> undef to <8 x float>
+
+ ; SSE2: cost of 8 {{.*}} uitofp <16 x i8>
+ ; AVX1: cost of 11 {{.*}} uitofp <16 x i8>
+ ; AVX16: cost of 11 {{.*}} uitofp <16 x i8>
+ ; AVX512: cost of 2 {{.*}} uitofp <16 x i8>
+ %cvt_v16i8_v16f32 = uitofp <16 x i8> undef to <16 x float>
+
+ ret i32 undef
+}
+
+; CHECK-LABEL: 'uitofp_i16_float'
+define i32 @uitofp_i16_float() {
+ ; SSE2: cost of 1 {{.*}} uitofp i16
+ ; AVX1: cost of 1 {{.*}} uitofp i16
+ ; AVX2: cost of 1 {{.*}} uitofp i16
+ ; AVX512: cost of 1 {{.*}} uitofp i16
+ %cvt_i16_f32 = uitofp i16 undef to float
+
+ ; SSE2: cost of 8 {{.*}} uitofp <4 x i16>
+ ; AVX1: cost of 2 {{.*}} uitofp <4 x i16>
+ ; AVX2: cost of 2 {{.*}} uitofp <4 x i16>
+ ; AVX512: cost of 2 {{.*}} uitofp <4 x i16>
+ %cvt_v4i16_v4f32 = uitofp <4 x i16> undef to <4 x float>
+
+ ; SSE2: cost of 15 {{.*}} uitofp <8 x i16>
+ ; AVX1: cost of 5 {{.*}} uitofp <8 x i16>
+ ; AVX2: cost of 5 {{.*}} uitofp <8 x i16>
+ ; AVX512: cost of 2 {{.*}} uitofp <8 x i16>
+ %cvt_v8i16_v8f32 = uitofp <8 x i16> undef to <8 x float>
+
+ ; SSE2: cost of 30 {{.*}} uitofp <16 x i16>
+ ; AVX1: cost of 11 {{.*}} uitofp <16 x i16>
+ ; AVX16: cost of 11 {{.*}} uitofp <16 x i16>
+ ; AVX512: cost of 2 {{.*}} uitofp <16 x i16>
+ %cvt_v16i16_v16f32 = uitofp <16 x i16> undef to <16 x float>
+
+ ret i32 undef
+}
+
+; CHECK-LABEL: 'uitofp_i32_float'
+define i32 @uitofp_i32_float() {
+ ; SSE2: cost of 1 {{.*}} uitofp i32
+ ; AVX1: cost of 1 {{.*}} uitofp i32
+ ; AVX2: cost of 1 {{.*}} uitofp i32
+ ; AVX512: cost of 1 {{.*}} uitofp i32
+ %cvt_i32_f32 = uitofp i32 undef to float
+
+ ; SSE2: cost of 8 {{.*}} uitofp <4 x i32>
+ ; AVX1: cost of 6 {{.*}} uitofp <4 x i32>
+ ; AVX2: cost of 6 {{.*}} uitofp <4 x i32>
+ ; AVX512: cost of 1 {{.*}} uitofp <4 x i32>
+ %cvt_v4i32_v4f32 = uitofp <4 x i32> undef to <4 x float>
+
+ ; SSE2: cost of 16 {{.*}} uitofp <8 x i32>
+ ; AVX1: cost of 9 {{.*}} uitofp <8 x i32>
+ ; AVX2: cost of 8 {{.*}} uitofp <8 x i32>
+ ; AVX512: cost of 1 {{.*}} uitofp <8 x i32>
+ %cvt_v8i32_v8f32 = uitofp <8 x i32> undef to <8 x float>
+
+ ; SSE2: cost of 32 {{.*}} uitofp <16 x i32>
+ ; AVX1: cost of 19 {{.*}} uitofp <16 x i32>
+ ; AVX2: cost of 17 {{.*}} uitofp <16 x i32>
+ ; AVX512: cost of 1 {{.*}} uitofp <16 x i32>
+ %cvt_v16i32_v16f32 = uitofp <16 x i32> undef to <16 x float>
+
+ ret i32 undef
+}
+
+; CHECK-LABEL: 'uitofp_i64_float'
+define i32 @uitofp_i64_float() {
+ ; SSE2: cost of 1 {{.*}} uitofp i64
+ ; AVX1: cost of 1 {{.*}} uitofp i64
+ ; AVX2: cost of 1 {{.*}} uitofp i64
+ ; AVX512: cost of 1 {{.*}} uitofp i64
+ %cvt_i64_f32 = uitofp i64 undef to float
+
+ ; SSE2: cost of 15 {{.*}} uitofp <2 x i64>
+ ; AVX1: cost of 4 {{.*}} uitofp <2 x i64>
+ ; AVX2: cost of 4 {{.*}} uitofp <2 x i64>
+ ; AVX512F: cost of 5 {{.*}} uitofp <2 x i64>
+ ; AVX512DQ: cost of 1 {{.*}} uitofp <2 x i64>
+ %cvt_v2i64_v2f32 = uitofp <2 x i64> undef to <2 x float>
+
+ ; SSE2: cost of 30 {{.*}} uitofp <4 x i64>
+ ; AVX1: cost of 10 {{.*}} uitofp <4 x i64>
+ ; AVX2: cost of 10 {{.*}} uitofp <4 x i64>
+ ; AVX512F: cost of 10 {{.*}} uitofp <4 x i64>
+ ; AVX512DQ: cost of 1 {{.*}} uitofp <4 x i64>
+ %cvt_v4i64_v4f32 = uitofp <4 x i64> undef to <4 x float>
+
+ ; SSE2: cost of 60 {{.*}} uitofp <8 x i64>
+ ; AVX1: cost of 21 {{.*}} uitofp <8 x i64>
+ ; AVX2: cost of 21 {{.*}} uitofp <8 x i64>
+ ; AVX512F: cost of 26 {{.*}} uitofp <8 x i64>
+ ; AVX512DQ: cost of 1 {{.*}} uitofp <8 x i64>
+ %cvt_v8i64_v8f32 = uitofp <8 x i64> undef to <8 x float>
+
+ ; SSE2: cost of 120 {{.*}} uitofp <16 x i64>
+ ; AVX1: cost of 43 {{.*}} uitofp <16 x i64>
+ ; AVX2: cost of 43 {{.*}} uitofp <16 x i64>
+ ; AVX512F: cost of 53 {{.*}} uitofp <16 x i64>
+ ; AVX512DQ: cost of 3 {{.*}} uitofp <16 x i64>
+ %cvt_v16i64_v16f32 = uitofp <16 x i64> undef to <16 x float>
+
+ ret i32 undef
}
diff --git a/test/Analysis/CostModel/X86/uniformshift.ll b/test/Analysis/CostModel/X86/uniformshift.ll
new file mode 100644
index 000000000000..4fef50f2bf1d
--- /dev/null
+++ b/test/Analysis/CostModel/X86/uniformshift.ll
@@ -0,0 +1,39 @@
+; RUN: llc -mtriple=x86_64-apple-darwin -mattr=+sse2 < %s | FileCheck --check-prefix=SSE2-CODEGEN %s
+; RUN: opt -mtriple=x86_64-apple-darwin -mattr=+sse2 -cost-model -analyze < %s | FileCheck --check-prefix=SSE2 %s
+
+define <4 x i32> @shl(<4 x i32> %vector, i32 %scalar) {
+entry:
+ ; SSE2: 'shl'
+ ; SSE2: cost of 1 {{.*}} shl
+ ; SSE2-CODEGEN: movd %edi, %xmm1
+ ; SSE2-CODEGEN: pslld %xmm1, %xmm0
+ %insert = insertelement <4 x i32> undef, i32 %scalar, i32 0
+ %splat = shufflevector <4 x i32> %insert, <4 x i32> undef, <4 x i32> zeroinitializer
+ %ret = shl <4 x i32> %vector , %splat
+ ret <4 x i32> %ret
+}
+
+define <4 x i32> @ashr(<4 x i32> %vector, i32 %scalar) {
+entry:
+ ; SSE2: 'ashr'
+ ; SSE2: cost of 1 {{.*}} ashr
+ ; SSE2-CODEGEN: movd %edi, %xmm1
+ ; SSE2-CODEGEN: psrad %xmm1, %xmm0
+ %insert = insertelement <4 x i32> undef, i32 %scalar, i32 0
+ %splat = shufflevector <4 x i32> %insert, <4 x i32> undef, <4 x i32> zeroinitializer
+ %ret = ashr <4 x i32> %vector , %splat
+ ret <4 x i32> %ret
+}
+
+define <4 x i32> @lshr(<4 x i32> %vector, i32 %scalar) {
+entry:
+ ; SSE2: 'lshr'
+ ; SSE2: cost of 1 {{.*}} lshr
+ ; SSE2-CODEGEN: movd %edi, %xmm1
+ ; SSE2-CODEGEN: psrld %xmm1, %xmm0
+ %insert = insertelement <4 x i32> undef, i32 %scalar, i32 0
+ %splat = shufflevector <4 x i32> %insert, <4 x i32> undef, <4 x i32> zeroinitializer
+ %ret = lshr <4 x i32> %vector , %splat
+ ret <4 x i32> %ret
+}
+
diff --git a/test/Analysis/CostModel/X86/vshift-ashr-cost.ll b/test/Analysis/CostModel/X86/vshift-ashr-cost.ll
index a0d07d7b6ec0..e53e40b57e1d 100644
--- a/test/Analysis/CostModel/X86/vshift-ashr-cost.ll
+++ b/test/Analysis/CostModel/X86/vshift-ashr-cost.ll
@@ -36,8 +36,8 @@ define <4 x i64> @var_shift_v4i64(<4 x i64> %a, <4 x i64> %b) {
define <4 x i32> @var_shift_v4i32(<4 x i32> %a, <4 x i32> %b) {
; CHECK: 'Cost Model Analysis' for function 'var_shift_v4i32':
; SSE2: Found an estimated cost of 16 for instruction: %shift
-; SSE41: Found an estimated cost of 16 for instruction: %shift
-; AVX: Found an estimated cost of 16 for instruction: %shift
+; SSE41: Found an estimated cost of 12 for instruction: %shift
+; AVX: Found an estimated cost of 12 for instruction: %shift
; AVX2: Found an estimated cost of 1 for instruction: %shift
; XOPAVX: Found an estimated cost of 2 for instruction: %shift
; XOPAVX2: Found an estimated cost of 1 for instruction: %shift
@@ -48,8 +48,8 @@ define <4 x i32> @var_shift_v4i32(<4 x i32> %a, <4 x i32> %b) {
define <8 x i32> @var_shift_v8i32(<8 x i32> %a, <8 x i32> %b) {
; CHECK: 'Cost Model Analysis' for function 'var_shift_v8i32':
; SSE2: Found an estimated cost of 32 for instruction: %shift
-; SSE41: Found an estimated cost of 32 for instruction: %shift
-; AVX: Found an estimated cost of 32 for instruction: %shift
+; SSE41: Found an estimated cost of 24 for instruction: %shift
+; AVX: Found an estimated cost of 24 for instruction: %shift
; AVX2: Found an estimated cost of 1 for instruction: %shift
; XOPAVX: Found an estimated cost of 4 for instruction: %shift
; XOPAVX2: Found an estimated cost of 1 for instruction: %shift
@@ -60,9 +60,9 @@ define <8 x i32> @var_shift_v8i32(<8 x i32> %a, <8 x i32> %b) {
define <8 x i16> @var_shift_v8i16(<8 x i16> %a, <8 x i16> %b) {
; CHECK: 'Cost Model Analysis' for function 'var_shift_v8i16':
; SSE2: Found an estimated cost of 32 for instruction: %shift
-; SSE41: Found an estimated cost of 32 for instruction: %shift
-; AVX: Found an estimated cost of 32 for instruction: %shift
-; AVX2: Found an estimated cost of 32 for instruction: %shift
+; SSE41: Found an estimated cost of 14 for instruction: %shift
+; AVX: Found an estimated cost of 14 for instruction: %shift
+; AVX2: Found an estimated cost of 14 for instruction: %shift
; XOP: Found an estimated cost of 2 for instruction: %shift
%shift = ashr <8 x i16> %a, %b
ret <8 x i16> %shift
@@ -71,8 +71,8 @@ define <8 x i16> @var_shift_v8i16(<8 x i16> %a, <8 x i16> %b) {
define <16 x i16> @var_shift_v16i16(<16 x i16> %a, <16 x i16> %b) {
; CHECK: 'Cost Model Analysis' for function 'var_shift_v16i16':
; SSE2: Found an estimated cost of 64 for instruction: %shift
-; SSE41: Found an estimated cost of 64 for instruction: %shift
-; AVX: Found an estimated cost of 64 for instruction: %shift
+; SSE41: Found an estimated cost of 28 for instruction: %shift
+; AVX: Found an estimated cost of 28 for instruction: %shift
; AVX2: Found an estimated cost of 10 for instruction: %shift
; XOP: Found an estimated cost of 4 for instruction: %shift
%shift = ashr <16 x i16> %a, %b
@@ -82,9 +82,9 @@ define <16 x i16> @var_shift_v16i16(<16 x i16> %a, <16 x i16> %b) {
define <16 x i8> @var_shift_v16i8(<16 x i8> %a, <16 x i8> %b) {
; CHECK: 'Cost Model Analysis' for function 'var_shift_v16i8':
; SSE2: Found an estimated cost of 54 for instruction: %shift
-; SSE41: Found an estimated cost of 54 for instruction: %shift
-; AVX: Found an estimated cost of 54 for instruction: %shift
-; AVX2: Found an estimated cost of 54 for instruction: %shift
+; SSE41: Found an estimated cost of 24 for instruction: %shift
+; AVX: Found an estimated cost of 24 for instruction: %shift
+; AVX2: Found an estimated cost of 24 for instruction: %shift
; XOP: Found an estimated cost of 2 for instruction: %shift
%shift = ashr <16 x i8> %a, %b
ret <16 x i8> %shift
@@ -93,8 +93,8 @@ define <16 x i8> @var_shift_v16i8(<16 x i8> %a, <16 x i8> %b) {
define <32 x i8> @var_shift_v32i8(<32 x i8> %a, <32 x i8> %b) {
; CHECK: 'Cost Model Analysis' for function 'var_shift_v32i8':
; SSE2: Found an estimated cost of 108 for instruction: %shift
-; SSE41: Found an estimated cost of 108 for instruction: %shift
-; AVX: Found an estimated cost of 108 for instruction: %shift
+; SSE41: Found an estimated cost of 48 for instruction: %shift
+; AVX: Found an estimated cost of 48 for instruction: %shift
; AVX2: Found an estimated cost of 24 for instruction: %shift
; XOP: Found an estimated cost of 4 for instruction: %shift
%shift = ashr <32 x i8> %a, %b
@@ -132,8 +132,8 @@ define <4 x i64> @splatvar_shift_v4i64(<4 x i64> %a, <4 x i64> %b) {
define <4 x i32> @splatvar_shift_v4i32(<4 x i32> %a, <4 x i32> %b) {
; CHECK: 'Cost Model Analysis' for function 'splatvar_shift_v4i32':
; SSE2: Found an estimated cost of 16 for instruction: %shift
-; SSE41: Found an estimated cost of 16 for instruction: %shift
-; AVX: Found an estimated cost of 16 for instruction: %shift
+; SSE41: Found an estimated cost of 12 for instruction: %shift
+; AVX: Found an estimated cost of 12 for instruction: %shift
; AVX2: Found an estimated cost of 1 for instruction: %shift
; XOPAVX: Found an estimated cost of 2 for instruction: %shift
; XOPAVX2: Found an estimated cost of 1 for instruction: %shift
@@ -145,8 +145,8 @@ define <4 x i32> @splatvar_shift_v4i32(<4 x i32> %a, <4 x i32> %b) {
define <8 x i32> @splatvar_shift_v8i32(<8 x i32> %a, <8 x i32> %b) {
; CHECK: 'Cost Model Analysis' for function 'splatvar_shift_v8i32':
; SSE2: Found an estimated cost of 32 for instruction: %shift
-; SSE41: Found an estimated cost of 32 for instruction: %shift
-; AVX: Found an estimated cost of 32 for instruction: %shift
+; SSE41: Found an estimated cost of 24 for instruction: %shift
+; AVX: Found an estimated cost of 24 for instruction: %shift
; AVX2: Found an estimated cost of 1 for instruction: %shift
; XOPAVX: Found an estimated cost of 4 for instruction: %shift
; XOPAVX2: Found an estimated cost of 1 for instruction: %shift
@@ -158,9 +158,9 @@ define <8 x i32> @splatvar_shift_v8i32(<8 x i32> %a, <8 x i32> %b) {
define <8 x i16> @splatvar_shift_v8i16(<8 x i16> %a, <8 x i16> %b) {
; CHECK: 'Cost Model Analysis' for function 'splatvar_shift_v8i16':
; SSE2: Found an estimated cost of 32 for instruction: %shift
-; SSE41: Found an estimated cost of 32 for instruction: %shift
-; AVX: Found an estimated cost of 32 for instruction: %shift
-; AVX2: Found an estimated cost of 32 for instruction: %shift
+; SSE41: Found an estimated cost of 14 for instruction: %shift
+; AVX: Found an estimated cost of 14 for instruction: %shift
+; AVX2: Found an estimated cost of 14 for instruction: %shift
; XOP: Found an estimated cost of 2 for instruction: %shift
%splat = shufflevector <8 x i16> %b, <8 x i16> undef, <8 x i32> zeroinitializer
%shift = ashr <8 x i16> %a, %splat
@@ -170,8 +170,8 @@ define <8 x i16> @splatvar_shift_v8i16(<8 x i16> %a, <8 x i16> %b) {
define <16 x i16> @splatvar_shift_v16i16(<16 x i16> %a, <16 x i16> %b) {
; CHECK: 'Cost Model Analysis' for function 'splatvar_shift_v16i16':
; SSE2: Found an estimated cost of 64 for instruction: %shift
-; SSE41: Found an estimated cost of 64 for instruction: %shift
-; AVX: Found an estimated cost of 64 for instruction: %shift
+; SSE41: Found an estimated cost of 28 for instruction: %shift
+; AVX: Found an estimated cost of 28 for instruction: %shift
; AVX2: Found an estimated cost of 10 for instruction: %shift
; XOP: Found an estimated cost of 4 for instruction: %shift
%splat = shufflevector <16 x i16> %b, <16 x i16> undef, <16 x i32> zeroinitializer
@@ -182,9 +182,9 @@ define <16 x i16> @splatvar_shift_v16i16(<16 x i16> %a, <16 x i16> %b) {
define <16 x i8> @splatvar_shift_v16i8(<16 x i8> %a, <16 x i8> %b) {
; CHECK: 'Cost Model Analysis' for function 'splatvar_shift_v16i8':
; SSE2: Found an estimated cost of 54 for instruction: %shift
-; SSE41: Found an estimated cost of 54 for instruction: %shift
-; AVX: Found an estimated cost of 54 for instruction: %shift
-; AVX2: Found an estimated cost of 54 for instruction: %shift
+; SSE41: Found an estimated cost of 24 for instruction: %shift
+; AVX: Found an estimated cost of 24 for instruction: %shift
+; AVX2: Found an estimated cost of 24 for instruction: %shift
; XOP: Found an estimated cost of 2 for instruction: %shift
%splat = shufflevector <16 x i8> %b, <16 x i8> undef, <16 x i32> zeroinitializer
%shift = ashr <16 x i8> %a, %splat
@@ -194,8 +194,8 @@ define <16 x i8> @splatvar_shift_v16i8(<16 x i8> %a, <16 x i8> %b) {
define <32 x i8> @splatvar_shift_v32i8(<32 x i8> %a, <32 x i8> %b) {
; CHECK: 'Cost Model Analysis' for function 'splatvar_shift_v32i8':
; SSE2: Found an estimated cost of 108 for instruction: %shift
-; SSE41: Found an estimated cost of 108 for instruction: %shift
-; AVX: Found an estimated cost of 108 for instruction: %shift
+; SSE41: Found an estimated cost of 48 for instruction: %shift
+; AVX: Found an estimated cost of 48 for instruction: %shift
; AVX2: Found an estimated cost of 24 for instruction: %shift
; XOP: Found an estimated cost of 4 for instruction: %shift
%splat = shufflevector <32 x i8> %b, <32 x i8> undef, <32 x i32> zeroinitializer
@@ -232,8 +232,8 @@ define <4 x i64> @constant_shift_v4i64(<4 x i64> %a) {
define <4 x i32> @constant_shift_v4i32(<4 x i32> %a) {
; CHECK: 'Cost Model Analysis' for function 'constant_shift_v4i32':
; SSE2: Found an estimated cost of 16 for instruction: %shift
-; SSE41: Found an estimated cost of 16 for instruction: %shift
-; AVX: Found an estimated cost of 16 for instruction: %shift
+; SSE41: Found an estimated cost of 12 for instruction: %shift
+; AVX: Found an estimated cost of 12 for instruction: %shift
; AVX2: Found an estimated cost of 1 for instruction: %shift
; XOPAVX: Found an estimated cost of 2 for instruction: %shift
; XOPAVX2: Found an estimated cost of 1 for instruction: %shift
@@ -244,8 +244,8 @@ define <4 x i32> @constant_shift_v4i32(<4 x i32> %a) {
define <8 x i32> @constant_shift_v8i32(<8 x i32> %a) {
; CHECK: 'Cost Model Analysis' for function 'constant_shift_v8i32':
; SSE2: Found an estimated cost of 32 for instruction: %shift
-; SSE41: Found an estimated cost of 32 for instruction: %shift
-; AVX: Found an estimated cost of 32 for instruction: %shift
+; SSE41: Found an estimated cost of 24 for instruction: %shift
+; AVX: Found an estimated cost of 24 for instruction: %shift
; AVX2: Found an estimated cost of 1 for instruction: %shift
; XOPAVX: Found an estimated cost of 4 for instruction: %shift
; XOPAVX2: Found an estimated cost of 1 for instruction: %shift
@@ -256,9 +256,9 @@ define <8 x i32> @constant_shift_v8i32(<8 x i32> %a) {
define <8 x i16> @constant_shift_v8i16(<8 x i16> %a) {
; CHECK: 'Cost Model Analysis' for function 'constant_shift_v8i16':
; SSE2: Found an estimated cost of 32 for instruction: %shift
-; SSE41: Found an estimated cost of 32 for instruction: %shift
-; AVX: Found an estimated cost of 32 for instruction: %shift
-; AVX2: Found an estimated cost of 32 for instruction: %shift
+; SSE41: Found an estimated cost of 14 for instruction: %shift
+; AVX: Found an estimated cost of 14 for instruction: %shift
+; AVX2: Found an estimated cost of 14 for instruction: %shift
; XOP: Found an estimated cost of 2 for instruction: %shift
%shift = ashr <8 x i16> %a, <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>
ret <8 x i16> %shift
@@ -267,8 +267,8 @@ define <8 x i16> @constant_shift_v8i16(<8 x i16> %a) {
define <16 x i16> @constant_shift_v16i16(<16 x i16> %a) {
; CHECK: 'Cost Model Analysis' for function 'constant_shift_v16i16':
; SSE2: Found an estimated cost of 64 for instruction: %shift
-; SSE41: Found an estimated cost of 64 for instruction: %shift
-; AVX: Found an estimated cost of 64 for instruction: %shift
+; SSE41: Found an estimated cost of 28 for instruction: %shift
+; AVX: Found an estimated cost of 28 for instruction: %shift
; AVX2: Found an estimated cost of 10 for instruction: %shift
; XOP: Found an estimated cost of 4 for instruction: %shift
%shift = ashr <16 x i16> %a, <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>
@@ -278,9 +278,9 @@ define <16 x i16> @constant_shift_v16i16(<16 x i16> %a) {
define <16 x i8> @constant_shift_v16i8(<16 x i8> %a) {
; CHECK: 'Cost Model Analysis' for function 'constant_shift_v16i8':
; SSE2: Found an estimated cost of 54 for instruction: %shift
-; SSE41: Found an estimated cost of 54 for instruction: %shift
-; AVX: Found an estimated cost of 54 for instruction: %shift
-; AVX2: Found an estimated cost of 54 for instruction: %shift
+; SSE41: Found an estimated cost of 24 for instruction: %shift
+; AVX: Found an estimated cost of 24 for instruction: %shift
+; AVX2: Found an estimated cost of 24 for instruction: %shift
; XOP: Found an estimated cost of 2 for instruction: %shift
%shift = ashr <16 x i8> %a, <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0>
ret <16 x i8> %shift
@@ -289,8 +289,8 @@ define <16 x i8> @constant_shift_v16i8(<16 x i8> %a) {
define <32 x i8> @constant_shift_v32i8(<32 x i8> %a) {
; CHECK: 'Cost Model Analysis' for function 'constant_shift_v32i8':
; SSE2: Found an estimated cost of 108 for instruction: %shift
-; SSE41: Found an estimated cost of 108 for instruction: %shift
-; AVX: Found an estimated cost of 108 for instruction: %shift
+; SSE41: Found an estimated cost of 48 for instruction: %shift
+; AVX: Found an estimated cost of 48 for instruction: %shift
; AVX2: Found an estimated cost of 24 for instruction: %shift
; XOP: Found an estimated cost of 4 for instruction: %shift
%shift = ashr <32 x i8> %a, <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0>
diff --git a/test/Analysis/CostModel/X86/vshift-lshr-cost.ll b/test/Analysis/CostModel/X86/vshift-lshr-cost.ll
index a686b4368f21..6d028268ea55 100644
--- a/test/Analysis/CostModel/X86/vshift-lshr-cost.ll
+++ b/test/Analysis/CostModel/X86/vshift-lshr-cost.ll
@@ -38,8 +38,8 @@ define <4 x i64> @var_shift_v4i64(<4 x i64> %a, <4 x i64> %b) {
define <4 x i32> @var_shift_v4i32(<4 x i32> %a, <4 x i32> %b) {
; CHECK: 'Cost Model Analysis' for function 'var_shift_v4i32':
; SSE2: Found an estimated cost of 16 for instruction: %shift
-; SSE41: Found an estimated cost of 16 for instruction: %shift
-; AVX: Found an estimated cost of 16 for instruction: %shift
+; SSE41: Found an estimated cost of 11 for instruction: %shift
+; AVX: Found an estimated cost of 11 for instruction: %shift
; AVX2: Found an estimated cost of 1 for instruction: %shift
; XOPAVX: Found an estimated cost of 2 for instruction: %shift
; XOPAVX2: Found an estimated cost of 1 for instruction: %shift
@@ -50,8 +50,8 @@ define <4 x i32> @var_shift_v4i32(<4 x i32> %a, <4 x i32> %b) {
define <8 x i32> @var_shift_v8i32(<8 x i32> %a, <8 x i32> %b) {
; CHECK: 'Cost Model Analysis' for function 'var_shift_v8i32':
; SSE2: Found an estimated cost of 32 for instruction: %shift
-; SSE41: Found an estimated cost of 32 for instruction: %shift
-; AVX: Found an estimated cost of 32 for instruction: %shift
+; SSE41: Found an estimated cost of 22 for instruction: %shift
+; AVX: Found an estimated cost of 22 for instruction: %shift
; AVX2: Found an estimated cost of 1 for instruction: %shift
; XOPAVX: Found an estimated cost of 4 for instruction: %shift
; XOPAVX2: Found an estimated cost of 1 for instruction: %shift
@@ -62,9 +62,9 @@ define <8 x i32> @var_shift_v8i32(<8 x i32> %a, <8 x i32> %b) {
define <8 x i16> @var_shift_v8i16(<8 x i16> %a, <8 x i16> %b) {
; CHECK: 'Cost Model Analysis' for function 'var_shift_v8i16':
; SSE2: Found an estimated cost of 32 for instruction: %shift
-; SSE41: Found an estimated cost of 32 for instruction: %shift
-; AVX: Found an estimated cost of 32 for instruction: %shift
-; AVX2: Found an estimated cost of 32 for instruction: %shift
+; SSE41: Found an estimated cost of 14 for instruction: %shift
+; AVX: Found an estimated cost of 14 for instruction: %shift
+; AVX2: Found an estimated cost of 14 for instruction: %shift
; XOP: Found an estimated cost of 2 for instruction: %shift
%shift = lshr <8 x i16> %a, %b
ret <8 x i16> %shift
@@ -73,8 +73,8 @@ define <8 x i16> @var_shift_v8i16(<8 x i16> %a, <8 x i16> %b) {
define <16 x i16> @var_shift_v16i16(<16 x i16> %a, <16 x i16> %b) {
; CHECK: 'Cost Model Analysis' for function 'var_shift_v16i16':
; SSE2: Found an estimated cost of 64 for instruction: %shift
-; SSE41: Found an estimated cost of 64 for instruction: %shift
-; AVX: Found an estimated cost of 64 for instruction: %shift
+; SSE41: Found an estimated cost of 28 for instruction: %shift
+; AVX: Found an estimated cost of 28 for instruction: %shift
; AVX2: Found an estimated cost of 10 for instruction: %shift
; XOP: Found an estimated cost of 4 for instruction: %shift
%shift = lshr <16 x i16> %a, %b
@@ -84,9 +84,9 @@ define <16 x i16> @var_shift_v16i16(<16 x i16> %a, <16 x i16> %b) {
define <16 x i8> @var_shift_v16i8(<16 x i8> %a, <16 x i8> %b) {
; CHECK: 'Cost Model Analysis' for function 'var_shift_v16i8':
; SSE2: Found an estimated cost of 26 for instruction: %shift
-; SSE41: Found an estimated cost of 26 for instruction: %shift
-; AVX: Found an estimated cost of 26 for instruction: %shift
-; AVX2: Found an estimated cost of 26 for instruction: %shift
+; SSE41: Found an estimated cost of 12 for instruction: %shift
+; AVX: Found an estimated cost of 12 for instruction: %shift
+; AVX2: Found an estimated cost of 12 for instruction: %shift
; XOP: Found an estimated cost of 2 for instruction: %shift
%shift = lshr <16 x i8> %a, %b
ret <16 x i8> %shift
@@ -95,8 +95,8 @@ define <16 x i8> @var_shift_v16i8(<16 x i8> %a, <16 x i8> %b) {
define <32 x i8> @var_shift_v32i8(<32 x i8> %a, <32 x i8> %b) {
; CHECK: 'Cost Model Analysis' for function 'var_shift_v32i8':
; SSE2: Found an estimated cost of 52 for instruction: %shift
-; SSE41: Found an estimated cost of 52 for instruction: %shift
-; AVX: Found an estimated cost of 52 for instruction: %shift
+; SSE41: Found an estimated cost of 24 for instruction: %shift
+; AVX: Found an estimated cost of 24 for instruction: %shift
; AVX2: Found an estimated cost of 11 for instruction: %shift
; XOP: Found an estimated cost of 4 for instruction: %shift
%shift = lshr <32 x i8> %a, %b
@@ -136,8 +136,8 @@ define <4 x i64> @splatvar_shift_v4i64(<4 x i64> %a, <4 x i64> %b) {
define <4 x i32> @splatvar_shift_v4i32(<4 x i32> %a, <4 x i32> %b) {
; CHECK: 'Cost Model Analysis' for function 'splatvar_shift_v4i32':
; SSE2: Found an estimated cost of 16 for instruction: %shift
-; SSE41: Found an estimated cost of 16 for instruction: %shift
-; AVX: Found an estimated cost of 16 for instruction: %shift
+; SSE41: Found an estimated cost of 11 for instruction: %shift
+; AVX: Found an estimated cost of 11 for instruction: %shift
; AVX2: Found an estimated cost of 1 for instruction: %shift
; XOPAVX: Found an estimated cost of 2 for instruction: %shift
; XOPAVX2: Found an estimated cost of 1 for instruction: %shift
@@ -149,8 +149,8 @@ define <4 x i32> @splatvar_shift_v4i32(<4 x i32> %a, <4 x i32> %b) {
define <8 x i32> @splatvar_shift_v8i32(<8 x i32> %a, <8 x i32> %b) {
; CHECK: 'Cost Model Analysis' for function 'splatvar_shift_v8i32':
; SSE2: Found an estimated cost of 32 for instruction: %shift
-; SSE41: Found an estimated cost of 32 for instruction: %shift
-; AVX: Found an estimated cost of 32 for instruction: %shift
+; SSE41: Found an estimated cost of 22 for instruction: %shift
+; AVX: Found an estimated cost of 22 for instruction: %shift
; AVX2: Found an estimated cost of 1 for instruction: %shift
; XOPAVX: Found an estimated cost of 4 for instruction: %shift
; XOPAVX2: Found an estimated cost of 1 for instruction: %shift
@@ -162,9 +162,9 @@ define <8 x i32> @splatvar_shift_v8i32(<8 x i32> %a, <8 x i32> %b) {
define <8 x i16> @splatvar_shift_v8i16(<8 x i16> %a, <8 x i16> %b) {
; CHECK: 'Cost Model Analysis' for function 'splatvar_shift_v8i16':
; SSE2: Found an estimated cost of 32 for instruction: %shift
-; SSE41: Found an estimated cost of 32 for instruction: %shift
-; AVX: Found an estimated cost of 32 for instruction: %shift
-; AVX2: Found an estimated cost of 32 for instruction: %shift
+; SSE41: Found an estimated cost of 14 for instruction: %shift
+; AVX: Found an estimated cost of 14 for instruction: %shift
+; AVX2: Found an estimated cost of 14 for instruction: %shift
; XOP: Found an estimated cost of 2 for instruction: %shift
%splat = shufflevector <8 x i16> %b, <8 x i16> undef, <8 x i32> zeroinitializer
%shift = lshr <8 x i16> %a, %splat
@@ -174,8 +174,8 @@ define <8 x i16> @splatvar_shift_v8i16(<8 x i16> %a, <8 x i16> %b) {
define <16 x i16> @splatvar_shift_v16i16(<16 x i16> %a, <16 x i16> %b) {
; CHECK: 'Cost Model Analysis' for function 'splatvar_shift_v16i16':
; SSE2: Found an estimated cost of 64 for instruction: %shift
-; SSE41: Found an estimated cost of 64 for instruction: %shift
-; AVX: Found an estimated cost of 64 for instruction: %shift
+; SSE41: Found an estimated cost of 28 for instruction: %shift
+; AVX: Found an estimated cost of 28 for instruction: %shift
; AVX2: Found an estimated cost of 10 for instruction: %shift
; XOP: Found an estimated cost of 4 for instruction: %shift
%splat = shufflevector <16 x i16> %b, <16 x i16> undef, <16 x i32> zeroinitializer
@@ -186,9 +186,9 @@ define <16 x i16> @splatvar_shift_v16i16(<16 x i16> %a, <16 x i16> %b) {
define <16 x i8> @splatvar_shift_v16i8(<16 x i8> %a, <16 x i8> %b) {
; CHECK: 'Cost Model Analysis' for function 'splatvar_shift_v16i8':
; SSE2: Found an estimated cost of 26 for instruction: %shift
-; SSE41: Found an estimated cost of 26 for instruction: %shift
-; AVX: Found an estimated cost of 26 for instruction: %shift
-; AVX2: Found an estimated cost of 26 for instruction: %shift
+; SSE41: Found an estimated cost of 12 for instruction: %shift
+; AVX: Found an estimated cost of 12 for instruction: %shift
+; AVX2: Found an estimated cost of 12 for instruction: %shift
; XOP: Found an estimated cost of 2 for instruction: %shift
%splat = shufflevector <16 x i8> %b, <16 x i8> undef, <16 x i32> zeroinitializer
%shift = lshr <16 x i8> %a, %splat
@@ -198,8 +198,8 @@ define <16 x i8> @splatvar_shift_v16i8(<16 x i8> %a, <16 x i8> %b) {
define <32 x i8> @splatvar_shift_v32i8(<32 x i8> %a, <32 x i8> %b) {
; CHECK: 'Cost Model Analysis' for function 'splatvar_shift_v32i8':
; SSE2: Found an estimated cost of 52 for instruction: %shift
-; SSE41: Found an estimated cost of 52 for instruction: %shift
-; AVX: Found an estimated cost of 52 for instruction: %shift
+; SSE41: Found an estimated cost of 24 for instruction: %shift
+; AVX: Found an estimated cost of 24 for instruction: %shift
; AVX2: Found an estimated cost of 11 for instruction: %shift
; XOP: Found an estimated cost of 4 for instruction: %shift
%splat = shufflevector <32 x i8> %b, <32 x i8> undef, <32 x i32> zeroinitializer
@@ -238,8 +238,8 @@ define <4 x i64> @constant_shift_v4i64(<4 x i64> %a) {
define <4 x i32> @constant_shift_v4i32(<4 x i32> %a) {
; CHECK: 'Cost Model Analysis' for function 'constant_shift_v4i32':
; SSE2: Found an estimated cost of 16 for instruction: %shift
-; SSE41: Found an estimated cost of 16 for instruction: %shift
-; AVX: Found an estimated cost of 16 for instruction: %shift
+; SSE41: Found an estimated cost of 11 for instruction: %shift
+; AVX: Found an estimated cost of 11 for instruction: %shift
; AVX2: Found an estimated cost of 1 for instruction: %shift
; XOPAVX: Found an estimated cost of 2 for instruction: %shift
; XOPAVX2: Found an estimated cost of 1 for instruction: %shift
@@ -250,8 +250,8 @@ define <4 x i32> @constant_shift_v4i32(<4 x i32> %a) {
define <8 x i32> @constant_shift_v8i32(<8 x i32> %a) {
; CHECK: 'Cost Model Analysis' for function 'constant_shift_v8i32':
; SSE2: Found an estimated cost of 32 for instruction: %shift
-; SSE41: Found an estimated cost of 32 for instruction: %shift
-; AVX: Found an estimated cost of 32 for instruction: %shift
+; SSE41: Found an estimated cost of 22 for instruction: %shift
+; AVX: Found an estimated cost of 22 for instruction: %shift
; AVX2: Found an estimated cost of 1 for instruction: %shift
; XOPAVX: Found an estimated cost of 4 for instruction: %shift
; XOPAVX2: Found an estimated cost of 1 for instruction: %shift
@@ -262,9 +262,9 @@ define <8 x i32> @constant_shift_v8i32(<8 x i32> %a) {
define <8 x i16> @constant_shift_v8i16(<8 x i16> %a) {
; CHECK: 'Cost Model Analysis' for function 'constant_shift_v8i16':
; SSE2: Found an estimated cost of 32 for instruction: %shift
-; SSE41: Found an estimated cost of 32 for instruction: %shift
-; AVX: Found an estimated cost of 32 for instruction: %shift
-; AVX2: Found an estimated cost of 32 for instruction: %shift
+; SSE41: Found an estimated cost of 14 for instruction: %shift
+; AVX: Found an estimated cost of 14 for instruction: %shift
+; AVX2: Found an estimated cost of 14 for instruction: %shift
; XOP: Found an estimated cost of 2 for instruction: %shift
%shift = lshr <8 x i16> %a, <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>
ret <8 x i16> %shift
@@ -273,8 +273,8 @@ define <8 x i16> @constant_shift_v8i16(<8 x i16> %a) {
define <16 x i16> @constant_shift_v16i16(<16 x i16> %a) {
; CHECK: 'Cost Model Analysis' for function 'constant_shift_v16i16':
; SSE2: Found an estimated cost of 64 for instruction: %shift
-; SSE41: Found an estimated cost of 64 for instruction: %shift
-; AVX: Found an estimated cost of 64 for instruction: %shift
+; SSE41: Found an estimated cost of 28 for instruction: %shift
+; AVX: Found an estimated cost of 28 for instruction: %shift
; AVX2: Found an estimated cost of 10 for instruction: %shift
; XOP: Found an estimated cost of 4 for instruction: %shift
%shift = lshr <16 x i16> %a, <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>
@@ -284,9 +284,9 @@ define <16 x i16> @constant_shift_v16i16(<16 x i16> %a) {
define <16 x i8> @constant_shift_v16i8(<16 x i8> %a) {
; CHECK: 'Cost Model Analysis' for function 'constant_shift_v16i8':
; SSE2: Found an estimated cost of 26 for instruction: %shift
-; SSE41: Found an estimated cost of 26 for instruction: %shift
-; AVX: Found an estimated cost of 26 for instruction: %shift
-; AVX2: Found an estimated cost of 26 for instruction: %shift
+; SSE41: Found an estimated cost of 12 for instruction: %shift
+; AVX: Found an estimated cost of 12 for instruction: %shift
+; AVX2: Found an estimated cost of 12 for instruction: %shift
; XOP: Found an estimated cost of 2 for instruction: %shift
%shift = lshr <16 x i8> %a, <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0>
ret <16 x i8> %shift
@@ -295,8 +295,8 @@ define <16 x i8> @constant_shift_v16i8(<16 x i8> %a) {
define <32 x i8> @constant_shift_v32i8(<32 x i8> %a) {
; CHECK: 'Cost Model Analysis' for function 'constant_shift_v32i8':
; SSE2: Found an estimated cost of 52 for instruction: %shift
-; SSE41: Found an estimated cost of 52 for instruction: %shift
-; AVX: Found an estimated cost of 52 for instruction: %shift
+; SSE41: Found an estimated cost of 24 for instruction: %shift
+; AVX: Found an estimated cost of 24 for instruction: %shift
; AVX2: Found an estimated cost of 11 for instruction: %shift
; XOP: Found an estimated cost of 4 for instruction: %shift
%shift = lshr <32 x i8> %a, <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0>
diff --git a/test/Analysis/CostModel/X86/vshift-shl-cost.ll b/test/Analysis/CostModel/X86/vshift-shl-cost.ll
index 85ca5a5a7f32..60ba3adea42a 100644
--- a/test/Analysis/CostModel/X86/vshift-shl-cost.ll
+++ b/test/Analysis/CostModel/X86/vshift-shl-cost.ll
@@ -63,9 +63,9 @@ define <8 x i32> @var_shift_v8i32(<8 x i32> %a, <8 x i32> %b) {
define <8 x i16> @var_shift_v8i16(<8 x i16> %a, <8 x i16> %b) {
; CHECK: 'Cost Model Analysis' for function 'var_shift_v8i16':
; SSE2: Found an estimated cost of 32 for instruction: %shift
-; SSE41: Found an estimated cost of 32 for instruction: %shift
-; AVX: Found an estimated cost of 32 for instruction: %shift
-; AVX2: Found an estimated cost of 32 for instruction: %shift
+; SSE41: Found an estimated cost of 14 for instruction: %shift
+; AVX: Found an estimated cost of 14 for instruction: %shift
+; AVX2: Found an estimated cost of 14 for instruction: %shift
; XOP: Found an estimated cost of 1 for instruction: %shift
%shift = shl <8 x i16> %a, %b
ret <8 x i16> %shift
@@ -74,8 +74,8 @@ define <8 x i16> @var_shift_v8i16(<8 x i16> %a, <8 x i16> %b) {
define <16 x i16> @var_shift_v16i16(<16 x i16> %a, <16 x i16> %b) {
; CHECK: 'Cost Model Analysis' for function 'var_shift_v16i16':
; SSE2: Found an estimated cost of 64 for instruction: %shift
-; SSE41: Found an estimated cost of 64 for instruction: %shift
-; AVX: Found an estimated cost of 64 for instruction: %shift
+; SSE41: Found an estimated cost of 28 for instruction: %shift
+; AVX: Found an estimated cost of 28 for instruction: %shift
; AVX2: Found an estimated cost of 10 for instruction: %shift
; XOP: Found an estimated cost of 2 for instruction: %shift
%shift = shl <16 x i16> %a, %b
@@ -85,9 +85,9 @@ define <16 x i16> @var_shift_v16i16(<16 x i16> %a, <16 x i16> %b) {
define <16 x i8> @var_shift_v16i8(<16 x i8> %a, <16 x i8> %b) {
; CHECK: 'Cost Model Analysis' for function 'var_shift_v16i8':
; SSE2: Found an estimated cost of 26 for instruction: %shift
-; SSE41: Found an estimated cost of 26 for instruction: %shift
-; AVX: Found an estimated cost of 26 for instruction: %shift
-; AVX2: Found an estimated cost of 26 for instruction: %shift
+; SSE41: Found an estimated cost of 11 for instruction: %shift
+; AVX: Found an estimated cost of 11 for instruction: %shift
+; AVX2: Found an estimated cost of 11 for instruction: %shift
; XOP: Found an estimated cost of 1 for instruction: %shift
%shift = shl <16 x i8> %a, %b
ret <16 x i8> %shift
@@ -96,8 +96,8 @@ define <16 x i8> @var_shift_v16i8(<16 x i8> %a, <16 x i8> %b) {
define <32 x i8> @var_shift_v32i8(<32 x i8> %a, <32 x i8> %b) {
; CHECK: 'Cost Model Analysis' for function 'var_shift_v32i8':
; SSE2: Found an estimated cost of 52 for instruction: %shift
-; SSE41: Found an estimated cost of 52 for instruction: %shift
-; AVX: Found an estimated cost of 52 for instruction: %shift
+; SSE41: Found an estimated cost of 22 for instruction: %shift
+; AVX: Found an estimated cost of 22 for instruction: %shift
; AVX2: Found an estimated cost of 11 for instruction: %shift
; XOP: Found an estimated cost of 2 for instruction: %shift
%shift = shl <32 x i8> %a, %b
@@ -163,9 +163,9 @@ define <8 x i32> @splatvar_shift_v8i32(<8 x i32> %a, <8 x i32> %b) {
define <8 x i16> @splatvar_shift_v8i16(<8 x i16> %a, <8 x i16> %b) {
; CHECK: 'Cost Model Analysis' for function 'splatvar_shift_v8i16':
; SSE2: Found an estimated cost of 32 for instruction: %shift
-; SSE41: Found an estimated cost of 32 for instruction: %shift
-; AVX: Found an estimated cost of 32 for instruction: %shift
-; AVX2: Found an estimated cost of 32 for instruction: %shift
+; SSE41: Found an estimated cost of 14 for instruction: %shift
+; AVX: Found an estimated cost of 14 for instruction: %shift
+; AVX2: Found an estimated cost of 14 for instruction: %shift
; XOP: Found an estimated cost of 1 for instruction: %shift
%splat = shufflevector <8 x i16> %b, <8 x i16> undef, <8 x i32> zeroinitializer
%shift = shl <8 x i16> %a, %splat
@@ -175,8 +175,8 @@ define <8 x i16> @splatvar_shift_v8i16(<8 x i16> %a, <8 x i16> %b) {
define <16 x i16> @splatvar_shift_v16i16(<16 x i16> %a, <16 x i16> %b) {
; CHECK: 'Cost Model Analysis' for function 'splatvar_shift_v16i16':
; SSE2: Found an estimated cost of 64 for instruction: %shift
-; SSE41: Found an estimated cost of 64 for instruction: %shift
-; AVX: Found an estimated cost of 64 for instruction: %shift
+; SSE41: Found an estimated cost of 28 for instruction: %shift
+; AVX: Found an estimated cost of 28 for instruction: %shift
; AVX2: Found an estimated cost of 10 for instruction: %shift
; XOP: Found an estimated cost of 2 for instruction: %shift
%splat = shufflevector <16 x i16> %b, <16 x i16> undef, <16 x i32> zeroinitializer
@@ -187,9 +187,9 @@ define <16 x i16> @splatvar_shift_v16i16(<16 x i16> %a, <16 x i16> %b) {
define <16 x i8> @splatvar_shift_v16i8(<16 x i8> %a, <16 x i8> %b) {
; CHECK: 'Cost Model Analysis' for function 'splatvar_shift_v16i8':
; SSE2: Found an estimated cost of 26 for instruction: %shift
-; SSE41: Found an estimated cost of 26 for instruction: %shift
-; AVX: Found an estimated cost of 26 for instruction: %shift
-; AVX2: Found an estimated cost of 26 for instruction: %shift
+; SSE41: Found an estimated cost of 11 for instruction: %shift
+; AVX: Found an estimated cost of 11 for instruction: %shift
+; AVX2: Found an estimated cost of 11 for instruction: %shift
; XOP: Found an estimated cost of 1 for instruction: %shift
%splat = shufflevector <16 x i8> %b, <16 x i8> undef, <16 x i32> zeroinitializer
%shift = shl <16 x i8> %a, %splat
@@ -199,8 +199,8 @@ define <16 x i8> @splatvar_shift_v16i8(<16 x i8> %a, <16 x i8> %b) {
define <32 x i8> @splatvar_shift_v32i8(<32 x i8> %a, <32 x i8> %b) {
; CHECK: 'Cost Model Analysis' for function 'splatvar_shift_v32i8':
; SSE2: Found an estimated cost of 52 for instruction: %shift
-; SSE41: Found an estimated cost of 52 for instruction: %shift
-; AVX: Found an estimated cost of 52 for instruction: %shift
+; SSE41: Found an estimated cost of 22 for instruction: %shift
+; AVX: Found an estimated cost of 22 for instruction: %shift
; AVX2: Found an estimated cost of 11 for instruction: %shift
; XOP: Found an estimated cost of 2 for instruction: %shift
%splat = shufflevector <32 x i8> %b, <32 x i8> undef, <32 x i32> zeroinitializer
@@ -286,9 +286,9 @@ define <16 x i16> @constant_shift_v16i16(<16 x i16> %a) {
define <16 x i8> @constant_shift_v16i8(<16 x i8> %a) {
; CHECK: 'Cost Model Analysis' for function 'constant_shift_v16i8':
; SSE2: Found an estimated cost of 26 for instruction: %shift
-; SSE41: Found an estimated cost of 26 for instruction: %shift
-; AVX: Found an estimated cost of 26 for instruction: %shift
-; AVX2: Found an estimated cost of 26 for instruction: %shift
+; SSE41: Found an estimated cost of 11 for instruction: %shift
+; AVX: Found an estimated cost of 11 for instruction: %shift
+; AVX2: Found an estimated cost of 11 for instruction: %shift
; XOP: Found an estimated cost of 1 for instruction: %shift
%shift = shl <16 x i8> %a, <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0>
ret <16 x i8> %shift
@@ -297,8 +297,8 @@ define <16 x i8> @constant_shift_v16i8(<16 x i8> %a) {
define <32 x i8> @constant_shift_v32i8(<32 x i8> %a) {
; CHECK: 'Cost Model Analysis' for function 'constant_shift_v32i8':
; SSE2: Found an estimated cost of 52 for instruction: %shift
-; SSE41: Found an estimated cost of 52 for instruction: %shift
-; AVX: Found an estimated cost of 52 for instruction: %shift
+; SSE41: Found an estimated cost of 22 for instruction: %shift
+; AVX: Found an estimated cost of 22 for instruction: %shift
; AVX2: Found an estimated cost of 11 for instruction: %shift
; XOP: Found an estimated cost of 2 for instruction: %shift
%shift = shl <32 x i8> %a, <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0>
diff --git a/test/Analysis/Delinearization/terms_with_identity_factor.ll b/test/Analysis/Delinearization/terms_with_identity_factor.ll
new file mode 100644
index 000000000000..677b079c5969
--- /dev/null
+++ b/test/Analysis/Delinearization/terms_with_identity_factor.ll
@@ -0,0 +1,64 @@
+; REQUIRES: asserts
+; RUN: opt < %s -analyze -delinearize -debug 2>&1 | FileCheck %s
+; void foo (int m, int n, char *A) {
+; for (int i=0; i < m; i++)
+; for(int j=0; j< n; j++)
+; A[i*n+j] += 1;
+;}
+
+; ModuleID = 'delin.cpp'
+;target datalayout = "e-m:e-i64:64-i128:128-n32:64-S128"
+;target triple = "aarch64--linux-gnu"
+
+; CHECK-LABEL: Delinearization on function foo
+; CHECK: Inst: %4 = load i8, i8* %arrayidx.us, align 1
+; CHECK: Subscripts
+; CHECK-NEXT: {0,+,1}<nuw><nsw><%for.body3.lr.ph.us>
+; CHECK-NEXT: {0,+,1}<nuw><nsw><%for.body3.us>
+; CHECK: succeeded to delinearize
+
+define void @foo(i32 %m, i32 %n, i8* nocapture %A) #0 {
+entry:
+ br label %entry.split
+
+entry.split: ; preds = %entry
+ %cmp15 = icmp sgt i32 %m, 0
+ %cmp213 = icmp sgt i32 %n, 0
+ %or.cond = and i1 %cmp15, %cmp213
+ br i1 %or.cond, label %for.cond1.preheader.lr.ph.split.us, label %for.end8
+
+for.cond1.preheader.lr.ph.split.us: ; preds = %entry.split
+ %0 = add i32 %n, -1
+ %1 = sext i32 %n to i64
+ %2 = add i32 %m, -1
+ br label %for.body3.lr.ph.us
+
+for.body3.us: ; preds = %for.body3.us, %for.body3.lr.ph.us
+ %indvars.iv = phi i64 [ 0, %for.body3.lr.ph.us ], [ %indvars.iv.next, %for.body3.us ]
+ %3 = add nsw i64 %indvars.iv, %5
+ %arrayidx.us = getelementptr inbounds i8, i8* %A, i64 %3
+ %4 = load i8, i8* %arrayidx.us, align 1
+ %add4.us = add i8 %4, 1
+ store i8 %add4.us, i8* %arrayidx.us, align 1
+ %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+ %lftr.wideiv = trunc i64 %indvars.iv to i32
+ %exitcond = icmp eq i32 %lftr.wideiv, %0
+ br i1 %exitcond, label %for.cond1.for.inc6_crit_edge.us, label %for.body3.us
+
+for.body3.lr.ph.us: ; preds = %for.cond1.for.inc6_crit_edge.us, %for.cond1.preheader.lr.ph.split.us
+ %indvars.iv19 = phi i64 [ %indvars.iv.next20, %for.cond1.for.inc6_crit_edge.us ], [ 0, %for.cond1.preheader.lr.ph.split.us ]
+ %5 = mul nsw i64 %indvars.iv19, %1
+ br label %for.body3.us
+
+for.cond1.for.inc6_crit_edge.us: ; preds = %for.body3.us
+ %indvars.iv.next20 = add nuw nsw i64 %indvars.iv19, 1
+ %lftr.wideiv22 = trunc i64 %indvars.iv19 to i32
+ %exitcond23 = icmp eq i32 %lftr.wideiv22, %2
+ br i1 %exitcond23, label %for.end8.loopexit, label %for.body3.lr.ph.us
+
+for.end8.loopexit: ; preds = %for.cond1.for.inc6_crit_edge.us
+ br label %for.end8
+
+for.end8: ; preds = %for.end8.loopexit, %entry.split
+ ret void
+}
diff --git a/test/Analysis/Dominators/2006-10-02-BreakCritEdges.ll b/test/Analysis/Dominators/2006-10-02-BreakCritEdges.ll
index 45efc4238114..a8013176977d 100644
--- a/test/Analysis/Dominators/2006-10-02-BreakCritEdges.ll
+++ b/test/Analysis/Dominators/2006-10-02-BreakCritEdges.ll
@@ -1,4 +1,5 @@
; RUN: opt < %s -domtree -break-crit-edges -analyze -domtree | FileCheck %s
+; RUN: opt < %s -passes='require<domtree>,break-crit-edges,print<domtree>' -disable-output 2>&1| FileCheck %s
; PR932
; CHECK: [3] %brtrue {1,2}
diff --git a/test/Analysis/Dominators/2007-01-14-BreakCritEdges.ll b/test/Analysis/Dominators/2007-01-14-BreakCritEdges.ll
index 96dc73929d1c..f73b12e0864b 100644
--- a/test/Analysis/Dominators/2007-01-14-BreakCritEdges.ll
+++ b/test/Analysis/Dominators/2007-01-14-BreakCritEdges.ll
@@ -1,4 +1,5 @@
-; RUN: opt < %s -domtree -break-crit-edges -domtree -disable-output
+; RUN: opt < %s -domtree -break-crit-edges -disable-output
+; RUN: opt < %s -passes='require<domtree>,break-crit-edges' -disable-output
; PR1110
%struct.OggVorbis_File = type { i8*, i32, i64, i64, %struct.ogg_sync_state, i32, i64*, i64*, i32*, i64*, %struct.vorbis_info*, %struct.vorbis_comment*, i64, i32, i32, i32, double, double, %struct.ogg_stream_state, %struct.vorbis_dsp_state, %struct.vorbis_block, %struct.ov_callbacks }
diff --git a/test/Analysis/GlobalsModRef/dead-uses.ll b/test/Analysis/GlobalsModRef/dead-uses.ll
new file mode 100644
index 000000000000..a96655d48bfd
--- /dev/null
+++ b/test/Analysis/GlobalsModRef/dead-uses.ll
@@ -0,0 +1,54 @@
+; RUN: opt < %s -instcombine -globals-aa -licm -S | FileCheck %s
+
+; Make sure -globals-aa ignores dead uses of globals.
+
+@a = internal global i32 0, align 4
+@c = common global i32 0, align 4
+
+; Function Attrs: nounwind
+define i32 @g() {
+; Make sure the load of @a is hoisted.
+; CHECK-LABEL: define i32 @g()
+; CHECK: entry:
+; CHECK-NEXT: load i32, i32* @a, align 4
+; CHECK-NEXT: br label %for.cond
+entry:
+ br label %for.cond
+
+for.cond: ; preds = %for.inc, %entry
+ %i.0 = phi i32 [ 0, %entry ], [ %inc, %for.inc ]
+ %sum.0 = phi i32 [ 0, %entry ], [ %add, %for.inc ]
+ %cmp = icmp slt i32 %i.0, 1000
+ br i1 %cmp, label %for.body, label %for.end
+
+for.body: ; preds = %for.cond
+ %0 = load i32, i32* @a, align 4
+ %add = add nsw i32 %sum.0, %0
+ call void @f()
+ br label %for.inc
+
+for.inc: ; preds = %for.body
+ %inc = add nsw i32 %i.0, 1
+ br label %for.cond
+
+for.end: ; preds = %for.cond
+ ret i32 %sum.0
+}
+
+; Function Attrs: nounwind
+define internal void @f() {
+entry:
+ %tobool = icmp ne i32 0, 0
+ br i1 %tobool, label %if.then, label %if.end
+
+if.then: ; preds = %entry
+ store i32 ptrtoint (i32* @a to i32), i32* @c, align 4
+ br label %if.end
+
+if.end: ; preds = %if.then, %entry
+ %0 = load i32, i32* @c, align 4
+ %inc = add nsw i32 %0, 1
+ store i32 %inc, i32* @c, align 4
+ ret void
+}
+
diff --git a/test/Analysis/GlobalsModRef/func-memattributes.ll b/test/Analysis/GlobalsModRef/func-memattributes.ll
index 5494512592e3..c1c269118739 100644
--- a/test/Analysis/GlobalsModRef/func-memattributes.ll
+++ b/test/Analysis/GlobalsModRef/func-memattributes.ll
@@ -2,30 +2,30 @@
@X = internal global i32 4
-define void @test0() {
+define i32 @test0() {
; CHECK-LABEL: @test0
; CHECK: store i32 0, i32* @X
-; CHECK-NEXT: call void @func_readonly() #0
+; CHECK-NEXT: call i32 @func_readonly() #0
; CHECK-NEXT: store i32 1, i32* @X
store i32 0, i32* @X
- call void @func_readonly() #0
+ %x = call i32 @func_readonly() #0
store i32 1, i32* @X
- ret void
+ ret i32 %x
}
-define void @test1() {
+define i32 @test1() {
; CHECK-LABEL: @test1
; CHECK-NOT: store
-; CHECK: call void @func_read_argmem_only() #1
+; CHECK: call i32 @func_read_argmem_only() #1
; CHECK-NEXT: store i32 3, i32* @X
store i32 2, i32* @X
- call void @func_read_argmem_only() #1
+ %x = call i32 @func_read_argmem_only() #1
store i32 3, i32* @X
- ret void
+ ret i32 %x
}
-declare void @func_readonly() #0
-declare void @func_read_argmem_only() #1
+declare i32 @func_readonly() #0
+declare i32 @func_read_argmem_only() #1
-attributes #0 = { readonly }
-attributes #1 = { readonly argmemonly }
+attributes #0 = { readonly nounwind }
+attributes #1 = { readonly argmemonly nounwind }
diff --git a/test/Analysis/GlobalsModRef/global-used-by-global.ll b/test/Analysis/GlobalsModRef/global-used-by-global.ll
new file mode 100644
index 000000000000..98a8e3a834c1
--- /dev/null
+++ b/test/Analysis/GlobalsModRef/global-used-by-global.ll
@@ -0,0 +1,54 @@
+; RUN: opt < %s -globals-aa -gvn -S | FileCheck %s
+
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+
+@a = internal global i32* null, align 8
+@b = global i32** @a, align 8
+@c = global i32** @a, align 8
+@d = common global i32 0, align 4
+
+; Make sure we globals-aa doesn't get confused and allow hoisting
+; the load from @a out of the loop.
+
+; CHECK-LABEL: define i32 @main()
+; CHECK: for.body:
+; CHECK-NEXT: %2 = load i32**, i32*** @b, align 8
+; CHECK-NEXT: store i32* @d, i32** %2, align 8
+; CHECK-NEXT: %3 = load i32*, i32** @a, align 8
+; CHECK-NEXT: %cmp1 = icmp ne i32* %3, @d
+; CHECK-NEXT: br i1 %cmp1, label %if.then, label %if.end
+
+define i32 @main() {
+entry:
+ %0 = load i32, i32* @d, align 4
+ br label %for.cond
+
+for.cond: ; preds = %if.end, %entry
+ %1 = phi i32 [ %inc, %if.end ], [ %0, %entry ]
+ %cmp = icmp slt i32 %1, 1
+ br i1 %cmp, label %for.body, label %for.end
+
+for.body: ; preds = %for.cond
+ %2 = load i32**, i32*** @b, align 8
+ store i32* @d, i32** %2, align 8
+ %3 = load i32*, i32** @a, align 8
+ %cmp1 = icmp ne i32* %3, @d
+ br i1 %cmp1, label %if.then, label %if.end
+
+if.then: ; preds = %for.body
+ br label %return
+
+if.end: ; preds = %for.body
+ %4 = load i32, i32* @d, align 4
+ %inc = add nsw i32 %4, 1
+ store i32 %inc, i32* @d, align 4
+ br label %for.cond
+
+for.end: ; preds = %for.cond
+ br label %return
+
+return: ; preds = %for.end, %if.then
+ %retval.0 = phi i32 [ 1, %if.then ], [ 0, %for.end ]
+ ret i32 %retval.0
+}
+
diff --git a/test/Analysis/MemoryDependenceAnalysis/invalidation.ll b/test/Analysis/MemoryDependenceAnalysis/invalidation.ll
new file mode 100644
index 000000000000..6e5d4a4bf846
--- /dev/null
+++ b/test/Analysis/MemoryDependenceAnalysis/invalidation.ll
@@ -0,0 +1,76 @@
+; Test that memdep gets invalidated when the analyses it depends on are
+; invalidated.
+;
+; Check AA specifically.
+; RUN: opt -disable-output -debug-pass-manager -aa-pipeline='basic-aa' %s 2>&1 \
+; RUN: -passes='require<memdep>,invalidate<aa>,gvn' \
+; RUN: | FileCheck %s --check-prefix=CHECK-AA-INVALIDATE
+; CHECK-AA-INVALIDATE: Running pass: RequireAnalysisPass
+; CHECK-AA-INVALIDATE: Running analysis: MemoryDependenceAnalysis
+; CHECK-AA-INVALIDATE: Running pass: InvalidateAnalysisPass
+; CHECK-AA-INVALIDATE: Invalidating analysis: AAManager
+; CHECK-AA-INVALIDATE: Invalidating analysis: MemoryDependenceAnalysis
+; CHECK-AA-INVALIDATE: Running pass: GVN
+; CHECK-AA-INVALIDATE: Running analysis: MemoryDependenceAnalysis
+;
+; Check the assumptions analysis specifically.
+; FIXME: We don't have any test cases that actually fail if the assumption
+; cache becomes stale. This just tests what we believe to be correct.
+; RUN: opt -disable-output -debug-pass-manager %s 2>&1 \
+; RUN: -passes='require<memdep>,invalidate<assumptions>,gvn' \
+; RUN: | FileCheck %s --check-prefix=CHECK-ASSUMPTIONS-INVALIDATE
+; CHECK-ASSUMPTIONS-INVALIDATE: Running pass: RequireAnalysisPass
+; CHECK-ASSUMPTIONS-INVALIDATE: Running analysis: MemoryDependenceAnalysis
+; CHECK-ASSUMPTIONS-INVALIDATE: Running pass: InvalidateAnalysisPass
+; CHECK-ASSUMPTIONS-INVALIDATE: Invalidating analysis: AssumptionAnalysis
+; CHECK-ASSUMPTIONS-INVALIDATE: Invalidating analysis: MemoryDependenceAnalysis
+; CHECK-ASSUMPTIONS-INVALIDATE: Running pass: GVN
+; CHECK-ASSUMPTIONS-INVALIDATE: Running analysis: MemoryDependenceAnalysis
+;
+; Check domtree specifically.
+; RUN: opt -disable-output -debug-pass-manager %s 2>&1 \
+; RUN: -passes='require<memdep>,invalidate<domtree>,gvn' \
+; RUN: | FileCheck %s --check-prefix=CHECK-DT-INVALIDATE
+; CHECK-DT-INVALIDATE: Running pass: RequireAnalysisPass
+; CHECK-DT-INVALIDATE: Running analysis: MemoryDependenceAnalysis
+; CHECK-DT-INVALIDATE: Running pass: InvalidateAnalysisPass
+; CHECK-DT-INVALIDATE: Invalidating analysis: DominatorTreeAnalysis
+; CHECK-DT-INVALIDATE: Invalidating analysis: MemoryDependenceAnalysis
+; CHECK-DT-INVALIDATE: Running pass: GVN
+; CHECK-DT-INVALIDATE: Running analysis: MemoryDependenceAnalysis
+;
+
+define void @test_use_domtree(i32* nocapture %bufUInt, i32* nocapture %pattern) nounwind {
+entry:
+ br label %for.body
+
+for.exit: ; preds = %for.body
+ ret void
+
+for.body: ; preds = %for.body, %entry
+ %i.01 = phi i32 [ 0, %entry ], [ %tmp8.7, %for.body ]
+ %arrayidx = getelementptr i32, i32* %bufUInt, i32 %i.01
+ %arrayidx5 = getelementptr i32, i32* %pattern, i32 %i.01
+ %tmp6 = load i32, i32* %arrayidx5, align 4
+ store i32 %tmp6, i32* %arrayidx, align 4
+ %tmp8.7 = add i32 %i.01, 8
+ %cmp.7 = icmp ult i32 %tmp8.7, 1024
+ br i1 %cmp.7, label %for.body, label %for.exit
+}
+
+%t = type { i32 }
+declare void @foo(i8*)
+
+define void @test_use_aa(%t* noalias %stuff ) {
+entry:
+ %p = getelementptr inbounds %t, %t* %stuff, i32 0, i32 0
+ %before = load i32, i32* %p
+
+ call void @foo(i8* null)
+
+ %after = load i32, i32* %p
+ %sum = add i32 %before, %after
+
+ store i32 %sum, i32* %p
+ ret void
+}
diff --git a/test/Analysis/RegionInfo/infinite_loop_5_a.ll b/test/Analysis/RegionInfo/infinite_loop_5_a.ll
new file mode 100644
index 000000000000..b0e52861b7c4
--- /dev/null
+++ b/test/Analysis/RegionInfo/infinite_loop_5_a.ll
@@ -0,0 +1,24 @@
+; RUN: opt -regions -analyze < %s | FileCheck %s
+
+define void @normal_condition() nounwind {
+0:
+ br label %"7"
+7:
+ br i1 1, label %"1", label %"8"
+1:
+ br i1 1, label %"6", label %"3"
+6:
+ br label %"8"
+8:
+ br label %"8"
+3:
+ br label %"4"
+4:
+ ret void
+}
+
+; CHECK: Region tree:
+; CHECK-NEXT: [0] 0 => <Function Return>
+; CHECK-NEXT: [1] 7 => 3
+; CHECK-NEXT: End region tree
+
diff --git a/test/Analysis/RegionInfo/infinite_loop_5_b.ll b/test/Analysis/RegionInfo/infinite_loop_5_b.ll
new file mode 100644
index 000000000000..49580c9de3de
--- /dev/null
+++ b/test/Analysis/RegionInfo/infinite_loop_5_b.ll
@@ -0,0 +1,25 @@
+; RUN: opt -regions -analyze < %s | FileCheck %s
+
+define void @normal_condition() nounwind {
+0:
+ br label %"7"
+7:
+ br i1 1, label %"1", label %"9"
+9:
+ br label %"8"
+1:
+ br i1 1, label %"6", label %"3"
+6:
+ br label %"9"
+8:
+ br label %"8"
+3:
+ br label %"4"
+4:
+ ret void
+}
+
+; CHECK: Region tree:
+; CHECK-NEXT: [0] 0 => <Function Return>
+; CHECK-NEXT: [1] 7 => 3
+; CHECK-NEXT: End region tree
diff --git a/test/Analysis/RegionInfo/infinite_loop_5_c.ll b/test/Analysis/RegionInfo/infinite_loop_5_c.ll
new file mode 100644
index 000000000000..51b48c90b4a8
--- /dev/null
+++ b/test/Analysis/RegionInfo/infinite_loop_5_c.ll
@@ -0,0 +1,22 @@
+; RUN: opt -regions -analyze < %s | FileCheck %s
+
+define void @normal_condition() nounwind {
+0:
+ br label %"7"
+7:
+ br i1 1, label %"1", label %"8"
+1:
+ br i1 1, label %"6", label %"3"
+6:
+ br label %"8"
+8:
+ br i1 1, label %"8", label %"7"
+3:
+ br label %"4"
+4:
+ ret void
+}
+
+; CHECK: [0] 0 => <Function Return>
+; CHECK-NEXT: [1] 7 => 3
+; CHECK-NEXT: [2] 8 => 7
diff --git a/test/Analysis/ScalarEvolution/max-mulops-inline.ll b/test/Analysis/ScalarEvolution/max-mulops-inline.ll
new file mode 100644
index 000000000000..c0dc6e012c12
--- /dev/null
+++ b/test/Analysis/ScalarEvolution/max-mulops-inline.ll
@@ -0,0 +1,29 @@
+; RUN: opt -analyze -scalar-evolution -scev-mulops-inline-threshold=1 < %s | FileCheck --check-prefix=CHECK1 %s
+; RUN: opt -analyze -scalar-evolution -scev-mulops-inline-threshold=10 < %s | FileCheck --check-prefix=CHECK10 %s
+
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+@a = local_unnamed_addr global i32 0, align 4
+@b = local_unnamed_addr global i32 0, align 4
+
+define i32 @main() local_unnamed_addr {
+
+; CHECK1: %mul.1 = mul nsw i32 %mul, %mul
+; CHECK1: --> ((%a.promoted * %a.promoted) * (%a.promoted * %a.promoted))
+
+; CHECK10: %mul.1 = mul nsw i32 %mul, %mul
+; CHECK10: --> (%a.promoted * %a.promoted * %a.promoted * %a.promoted)
+
+entry:
+ %a.promoted = load i32, i32* @a, align 4
+ %mul = mul nsw i32 %a.promoted, %a.promoted
+ %mul.1 = mul nsw i32 %mul, %mul
+ %mul.2 = mul nsw i32 %mul.1, %mul.1
+ %mul.3 = mul nsw i32 %mul.2, %mul.2
+ %mul.4 = mul nsw i32 %mul.3, %mul.3
+ %mul.5 = mul nsw i32 %mul.4, %mul.4
+ store i32 %mul.5, i32* @a, align 4
+ store i32 31, i32* @b, align 4
+ ret i32 0
+}
diff --git a/test/Analysis/ScalarEvolution/no-wrap-unknown-becount.ll b/test/Analysis/ScalarEvolution/no-wrap-unknown-becount.ll
index 1f972f3f6d93..bc01f22b3f30 100644
--- a/test/Analysis/ScalarEvolution/no-wrap-unknown-becount.ll
+++ b/test/Analysis/ScalarEvolution/no-wrap-unknown-becount.ll
@@ -63,6 +63,50 @@ leave:
ret void
}
+define void @s_3(i32 %start, i1* %cond) {
+; CHECK-LABEL: Classifying expressions for: @s_3
+entry:
+ br label %loop
+
+loop:
+ %iv = phi i32 [ %start, %entry ], [ %iv.inc, %be ]
+ %cmp = icmp slt i32 %iv, 10000
+ br i1 %cmp, label %be, label %leave
+
+be:
+ %iv.inc = add i32 %iv, 3
+ %iv.inc.sext = sext i32 %iv.inc to i64
+; CHECK: %iv.inc.sext = sext i32 %iv.inc to i64
+; CHECK-NEXT: --> {(sext i32 (3 + %start) to i64),+,3}<nsw><%loop>
+ %c = load volatile i1, i1* %cond
+ br i1 %c, label %loop, label %leave
+
+leave:
+ ret void
+}
+
+define void @s_4(i32 %start, i1* %cond) {
+; CHECK-LABEL: Classifying expressions for: @s_4
+entry:
+ br label %loop
+
+loop:
+ %iv = phi i32 [ %start, %entry ], [ %iv.inc, %be ]
+ %cmp = icmp sgt i32 %iv, -1000
+ br i1 %cmp, label %be, label %leave
+
+be:
+ %iv.inc = add i32 %iv, -3
+ %iv.inc.sext = sext i32 %iv.inc to i64
+; CHECK: %iv.inc.sext = sext i32 %iv.inc to i64
+; CHECK-NEXT: --> {(sext i32 (-3 + %start) to i64),+,-3}<nsw><%loop>
+ %c = load volatile i1, i1* %cond
+ br i1 %c, label %loop, label %leave
+
+leave:
+ ret void
+}
+
define void @u_0(i32 %n, i1* %cond) {
; CHECK-LABEL: Classifying expressions for: @u_0
entry:
@@ -122,3 +166,25 @@ loop:
leave:
ret void
}
+
+define void @u_3(i32 %start, i1* %cond) {
+; CHECK-LABEL: Classifying expressions for: @u_3
+entry:
+ br label %loop
+
+loop:
+ %iv = phi i32 [ %start, %entry ], [ %iv.inc, %be ]
+ %cmp = icmp ult i32 %iv, 10000
+ br i1 %cmp, label %be, label %leave
+
+be:
+ %iv.inc = add i32 %iv, 3
+ %iv.inc.zext = zext i32 %iv.inc to i64
+; CHECK: %iv.inc.zext = zext i32 %iv.inc to i64
+; CHECK-NEXT: --> {(zext i32 (3 + %start) to i64),+,3}<nuw><%loop>
+ %c = load volatile i1, i1* %cond
+ br i1 %c, label %loop, label %leave
+
+leave:
+ ret void
+}
diff --git a/test/Analysis/ScalarEvolution/pr18606.ll b/test/Analysis/ScalarEvolution/pr18606.ll
new file mode 100644
index 000000000000..6154a0f7d424
--- /dev/null
+++ b/test/Analysis/ScalarEvolution/pr18606.ll
@@ -0,0 +1,67 @@
+; RUN: opt -S -indvars < %s | FileCheck %s
+
+; CHECK: @main
+; CHECK: %mul.lcssa5 = phi i32 [ %a.promoted4, %entry ], [ %mul.30, %for.body3 ]
+; CEHCK: %mul = mul nsw i32 %mul.lcssa5, %mul.lcssa5
+; CHECK: %mul.30 = mul nsw i32 %mul.29, %mul.29
+
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+@a = local_unnamed_addr global i32 0, align 4
+@b = local_unnamed_addr global i32 0, align 4
+
+; Function Attrs: norecurse nounwind uwtable
+define i32 @main() local_unnamed_addr {
+entry:
+ %a.promoted4 = load i32, i32* @a, align 4
+ br label %for.cond1.preheader
+
+for.cond1.preheader: ; preds = %entry, %for.body3
+ %mul.lcssa5 = phi i32 [ %a.promoted4, %entry ], [ %mul.30, %for.body3 ]
+ %i.03 = phi i32 [ 0, %entry ], [ %inc5, %for.body3 ]
+ br label %for.body3
+
+for.body3: ; preds = %for.cond1.preheader
+ %mul = mul nsw i32 %mul.lcssa5, %mul.lcssa5
+ %mul.1 = mul nsw i32 %mul, %mul
+ %mul.2 = mul nsw i32 %mul.1, %mul.1
+ %mul.3 = mul nsw i32 %mul.2, %mul.2
+ %mul.4 = mul nsw i32 %mul.3, %mul.3
+ %mul.5 = mul nsw i32 %mul.4, %mul.4
+ %mul.6 = mul nsw i32 %mul.5, %mul.5
+ %mul.7 = mul nsw i32 %mul.6, %mul.6
+ %mul.8 = mul nsw i32 %mul.7, %mul.7
+ %mul.9 = mul nsw i32 %mul.8, %mul.8
+ %mul.10 = mul nsw i32 %mul.9, %mul.9
+ %mul.11 = mul nsw i32 %mul.10, %mul.10
+ %mul.12 = mul nsw i32 %mul.11, %mul.11
+ %mul.13 = mul nsw i32 %mul.12, %mul.12
+ %mul.14 = mul nsw i32 %mul.13, %mul.13
+ %mul.15 = mul nsw i32 %mul.14, %mul.14
+ %mul.16 = mul nsw i32 %mul.15, %mul.15
+ %mul.17 = mul nsw i32 %mul.16, %mul.16
+ %mul.18 = mul nsw i32 %mul.17, %mul.17
+ %mul.19 = mul nsw i32 %mul.18, %mul.18
+ %mul.20 = mul nsw i32 %mul.19, %mul.19
+ %mul.21 = mul nsw i32 %mul.20, %mul.20
+ %mul.22 = mul nsw i32 %mul.21, %mul.21
+ %mul.23 = mul nsw i32 %mul.22, %mul.22
+ %mul.24 = mul nsw i32 %mul.23, %mul.23
+ %mul.25 = mul nsw i32 %mul.24, %mul.24
+ %mul.26 = mul nsw i32 %mul.25, %mul.25
+ %mul.27 = mul nsw i32 %mul.26, %mul.26
+ %mul.28 = mul nsw i32 %mul.27, %mul.27
+ %mul.29 = mul nsw i32 %mul.28, %mul.28
+ %mul.30 = mul nsw i32 %mul.29, %mul.29
+ %inc5 = add nuw nsw i32 %i.03, 1
+ %exitcond = icmp ne i32 %inc5, 10
+ br i1 %exitcond, label %for.cond1.preheader, label %for.end6
+
+for.end6: ; preds = %for.body3
+ %mul.lcssa.lcssa = phi i32 [ %mul.30, %for.body3 ]
+ %inc.lcssa.lcssa = phi i32 [ 31, %for.body3 ]
+ store i32 %mul.lcssa.lcssa, i32* @a, align 4
+ store i32 %inc.lcssa.lcssa, i32* @b, align 4
+ ret i32 0
+}
diff --git a/test/Analysis/ScalarEvolution/pr28705.ll b/test/Analysis/ScalarEvolution/pr28705.ll
new file mode 100644
index 000000000000..8fbc08e3ca63
--- /dev/null
+++ b/test/Analysis/ScalarEvolution/pr28705.ll
@@ -0,0 +1,41 @@
+; PR28705
+; RUN: opt < %s -indvars -S | FileCheck %s
+
+; Check IndVarSimplify replaces the exitval use of the induction var "%inc.i.i"
+; with "%.sroa.speculated + 1".
+;
+; CHECK-LABEL: @foo(
+; CHECK: %[[EXIT:.+]] = sub i32 %.sroa.speculated, -1
+; CHECK: %DB.sroa.9.0.lcssa = phi i32 [ 1, %entry ], [ %[[EXIT]], %loopexit ]
+;
+define void @foo(i32 %sub.ptr.div.i, i8* %ref.i1174) local_unnamed_addr {
+entry:
+ %cmp.i1137 = icmp ugt i32 %sub.ptr.div.i, 3
+ %.sroa.speculated = select i1 %cmp.i1137, i32 3, i32 %sub.ptr.div.i
+ %cmp6483126 = icmp eq i32 %.sroa.speculated, 0
+ br i1 %cmp6483126, label %XZ.exit, label %for.body650.lr.ph
+
+for.body650.lr.ph:
+ br label %for.body650
+
+loopexit:
+ %inc.i.i.lcssa = phi i32 [ %inc.i.i, %for.body650 ]
+ br label %XZ.exit
+
+XZ.exit:
+ %DB.sroa.9.0.lcssa = phi i32 [ 1, %entry ], [ %inc.i.i.lcssa, %loopexit ]
+ br label %end
+
+for.body650:
+ %iv = phi i32 [ 0, %for.body650.lr.ph ], [ %inc655, %for.body650 ]
+ %iv2 = phi i32 [ 1, %for.body650.lr.ph ], [ %inc.i.i, %for.body650 ]
+ %arrayidx.i.i1105 = getelementptr inbounds i8, i8* %ref.i1174, i32 %iv2
+ store i8 7, i8* %arrayidx.i.i1105, align 1
+ %inc.i.i = add i32 %iv2, 1
+ %inc655 = add i32 %iv, 1
+ %cmp648 = icmp eq i32 %inc655, %.sroa.speculated
+ br i1 %cmp648, label %loopexit, label %for.body650
+
+end:
+ ret void
+}
diff --git a/test/Analysis/ScalarEvolution/scev-expander-existing-value-offset.ll b/test/Analysis/ScalarEvolution/scev-expander-existing-value-offset.ll
new file mode 100644
index 000000000000..20f822e1ad2c
--- /dev/null
+++ b/test/Analysis/ScalarEvolution/scev-expander-existing-value-offset.ll
@@ -0,0 +1,44 @@
+; RUN: opt < %s -loop-vectorize -force-vector-interleave=1 -force-vector-width=4 -S |FileCheck %s
+; SCEV expansion uses existing value or value + offset to reduce duplicate code expansion so foo should only generate one select inst after loop vectorization.
+; CHECK-LABEL: @foo(
+; CHECK: select
+; CHECK-NOT: select
+
+@ySrcL = common global i8* null, align 8
+@smL = common global i32 0, align 4
+
+define void @foo(i32 %rwL, i32 %kL, i32 %xfL) {
+entry:
+ %sub = add nsw i32 %rwL, -1
+ %shr = ashr i32 %xfL, 6
+ %cmp.i = icmp slt i32 %sub, %shr
+ %cond.i = select i1 %cmp.i, i32 %sub, i32 %shr
+ %cmp6 = icmp sgt i32 %cond.i, %kL
+ br i1 %cmp6, label %for.body.lr.ph, label %for.end
+
+for.body.lr.ph: ; preds = %entry
+ %tmp = load i8*, i8** @ySrcL, align 8
+ %tmp1 = sext i32 %kL to i64
+ %tmp2 = sext i32 %cond.i to i64
+ br label %for.body
+
+for.body: ; preds = %for.body, %for.body.lr.ph
+ %indvars.iv = phi i64 [ %tmp1, %for.body.lr.ph ], [ %indvars.iv.next, %for.body ]
+ %reduct.07 = phi i32 [ 0, %for.body.lr.ph ], [ %add, %for.body ]
+ %arrayidx = getelementptr inbounds i8, i8* %tmp, i64 %indvars.iv
+ %tmp3 = load i8, i8* %arrayidx, align 1
+ %conv = zext i8 %tmp3 to i32
+ %add = add nsw i32 %conv, %reduct.07
+ %indvars.iv.next = add nsw i64 %indvars.iv, 1
+ %cmp = icmp slt i64 %indvars.iv.next, %tmp2
+ br i1 %cmp, label %for.body, label %for.end.loopexit
+
+for.end.loopexit: ; preds = %for.body
+ %add.lcssa = phi i32 [ %add, %for.body ]
+ br label %for.end
+
+for.end: ; preds = %for.end.loopexit, %entry
+ %reduct.0.lcssa = phi i32 [ 0, %entry ], [ %add.lcssa, %for.end.loopexit ]
+ store i32 %reduct.0.lcssa, i32* @smL, align 4
+ ret void
+}
diff --git a/test/Analysis/ScalarEvolution/scev-expander-reuse-gep.ll b/test/Analysis/ScalarEvolution/scev-expander-reuse-gep.ll
new file mode 100644
index 000000000000..fd47b0854bb2
--- /dev/null
+++ b/test/Analysis/ScalarEvolution/scev-expander-reuse-gep.ll
@@ -0,0 +1,36 @@
+; PR30213. Check scev expand will generate correct code if the value
+; in ValueOffsetPair is of pointer type.
+; RUN: opt -mtriple=i386-apple-macosx10.12.0 < %s -loop-reduce -S | FileCheck %s
+
+; CHECK: %ptr4.ptr1 = select i1 %cmp.i, i8* %ptr4, i8* %ptr1
+; CHECK-NEXT: %scevgep = getelementptr i8, i8* %ptr4.ptr1, i32 1
+; CHECK-NEXT: br label %while.cond.i
+
+target datalayout = "e-m:o-p:32:32-f64:32:64-f80:128-n8:16:32-S128"
+target triple = "i386-apple-macosx10.12.0"
+
+; Function Attrs: nounwind optsize ssp
+define void @Foo() {
+entry:
+ switch i2 undef, label %sw.epilog102 [
+ i2 -2, label %sw.bb28
+ ]
+
+sw.bb28: ; preds = %entry
+ %0 = load i8*, i8** undef, align 2
+ %ptr1 = getelementptr inbounds i8, i8* undef, i32 -1
+ %ptr4 = getelementptr inbounds i8, i8* %0, i32 -1
+ %cmp.i = icmp ult i8* undef, %0
+ %ptr4.ptr1 = select i1 %cmp.i, i8* %ptr4, i8* %ptr1
+ br label %while.cond.i
+
+while.cond.i: ; preds = %while.cond.i, %sw.bb28
+ %currPtr.1.i = phi i8* [ %incdec.ptr.i, %while.cond.i ], [ %ptr4.ptr1, %sw.bb28 ]
+ %incdec.ptr.i = getelementptr inbounds i8, i8* %currPtr.1.i, i32 1
+ %1 = load i8, i8* %incdec.ptr.i, align 1
+ br label %while.cond.i
+
+sw.epilog102: ; preds = %entry
+ unreachable
+}
+
diff --git a/test/Analysis/ScalarEvolution/scev-expander-reuse-unroll.ll b/test/Analysis/ScalarEvolution/scev-expander-reuse-unroll.ll
new file mode 100644
index 000000000000..e74b004c6f8d
--- /dev/null
+++ b/test/Analysis/ScalarEvolution/scev-expander-reuse-unroll.ll
@@ -0,0 +1,35 @@
+; RUN: opt < %s -loop-unroll -unroll-runtime -unroll-count=2 -verify-scev-maps -S | FileCheck %s
+
+; Check SCEV expansion uses existing value when unrolling an inner loop with runtime trip count in a loop nest.
+; CHECK-LABEL: @foo(
+; CHECK: select
+; CHECK-NOT: select
+; CHECK: ret
+
+define void @foo(i32 %xfL, i32 %scaleL) local_unnamed_addr {
+entry:
+ br label %for.body
+
+for.body: ; preds = %for.body5, %for.body, %entry
+ %xfL.addr.033 = phi i32 [ %xfL, %entry ], [ %add, %for.body5 ]
+ %add = add nsw i32 %xfL.addr.033, %scaleL
+ %shr = ashr i32 %add, 16
+ %cmp.i = icmp slt i32 0, %shr
+ %.sroa.speculated = select i1 %cmp.i, i32 0, i32 %shr
+ %cmp425 = icmp slt i32 0, %.sroa.speculated
+ br i1 %cmp425, label %for.body5.preheader, label %for.end
+
+for.body5.preheader: ; preds = %for.body
+ %tmp0 = sext i32 %.sroa.speculated to i64
+ br label %for.body5
+
+for.body5: ; preds = %for.body5, %for.body5.preheader
+ %indvars.iv = phi i64 [ 0, %for.body5.preheader ], [ %indvars.iv.next, %for.body5 ]
+ %indvars.iv.next = add nsw i64 %indvars.iv, 1
+ %cmp4 = icmp slt i64 %indvars.iv.next, %tmp0
+ br i1 %cmp4, label %for.body5, label %for.body
+
+for.end:
+ ret void
+}
+
diff --git a/test/Analysis/ScalarEvolution/scev-expander-existing-value.ll b/test/Analysis/ScalarEvolution/scev-expander-reuse-vect.ll
index 0c1d6766869d..a818b47523e5 100644
--- a/test/Analysis/ScalarEvolution/scev-expander-existing-value.ll
+++ b/test/Analysis/ScalarEvolution/scev-expander-reuse-vect.ll
@@ -1,8 +1,10 @@
; RUN: opt < %s -loop-vectorize -force-vector-width=4 -verify-scev-maps -S |FileCheck %s
; SCEV expansion uses existing value when the SCEV has no AddRec expr.
+; CHECK-LABEL: @foo(
; CHECK: select
; CHECK-NOT: select
+; CHECK: ret
@a = common global [1000 x i16] zeroinitializer, align 16
diff --git a/test/Analysis/ScalarEvolution/sext-inreg.ll b/test/Analysis/ScalarEvolution/sext-inreg.ll
index 8f1d5bdbebad..2201d633f20e 100644
--- a/test/Analysis/ScalarEvolution/sext-inreg.ll
+++ b/test/Analysis/ScalarEvolution/sext-inreg.ll
@@ -1,28 +1,33 @@
-; RUN: opt < %s -analyze -scalar-evolution > %t
-; RUN: grep "sext i57 {0,+,199}<%bb> to i64" %t | count 1
-; RUN: grep "sext i59 {0,+,199}<%bb> to i64" %t | count 1
+; RUN: opt < %s -analyze -scalar-evolution | FileCheck %s
target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
target triple = "i386-apple-darwin9.6"
define i64 @foo(i64* nocapture %x, i64 %n) nounwind {
+; CHECK-LABEL: Classifying expressions for: @foo
entry:
- %t0 = icmp sgt i64 %n, 0 ; <i1> [#uses=1]
+ %t0 = icmp sgt i64 %n, 0
br i1 %t0, label %bb, label %return
-bb: ; preds = %bb, %entry
- %i.01 = phi i64 [ 0, %entry ], [ %indvar.next, %bb ] ; <i32> [#uses=2]
- %t1 = shl i64 %i.01, 7 ; <i32> [#uses=1]
- %t2 = ashr i64 %t1, 7 ; <i32> [#uses=1]
- %s1 = shl i64 %i.01, 5 ; <i32> [#uses=1]
- %s2 = ashr i64 %s1, 5 ; <i32> [#uses=1]
- %t3 = getelementptr i64, i64* %x, i64 %i.01 ; <i64*> [#uses=1]
+bb:
+ %i.01 = phi i64 [ 0, %entry ], [ %indvar.next, %bb ]
+ %t1 = shl i64 %i.01, 7
+ %t2 = ashr i64 %t1, 7
+; CHECK: %t2 = ashr i64 %t1, 7
+; CHECK-NEXT: sext i57 {0,+,199}<%bb> to i64
+; CHECK-NOT: i57
+; CHECK: %s2 = ashr i64 %s1, 5
+; CHECK-NEXT: sext i59 {0,+,199}<%bb> to i64
+; CHECK-NOT: i59
+ %s1 = shl i64 %i.01, 5
+ %s2 = ashr i64 %s1, 5
+ %t3 = getelementptr i64, i64* %x, i64 %i.01
store i64 0, i64* %t3, align 1
- %indvar.next = add i64 %i.01, 199 ; <i32> [#uses=2]
- %exitcond = icmp eq i64 %indvar.next, %n ; <i1> [#uses=1]
+ %indvar.next = add i64 %i.01, 199
+ %exitcond = icmp eq i64 %indvar.next, %n
br i1 %exitcond, label %return, label %bb
-return: ; preds = %bb, %entry
+return:
%p = phi i64 [ 0, %entry ], [ %t2, %bb ]
%q = phi i64 [ 0, %entry ], [ %s2, %bb ]
%v = xor i64 %p, %q
diff --git a/test/Analysis/ScalarEvolution/smax-br-phi-idioms.ll b/test/Analysis/ScalarEvolution/smax-br-phi-idioms.ll
index 500f3e16c8f5..dc24bd1b8047 100644
--- a/test/Analysis/ScalarEvolution/smax-br-phi-idioms.ll
+++ b/test/Analysis/ScalarEvolution/smax-br-phi-idioms.ll
@@ -126,3 +126,31 @@ for.cond.0:
ret i32 %init
}
+
+define i32 @f6(i32 %x, i32 %y) {
+; Do the right thing for unreachable code:
+
+; CHECK-LABEL: Classifying expressions for: @f6
+ entry:
+ %c0 = icmp sgt i32 %y, 0
+ %sum = add i32 %x, %y
+ br i1 %c0, label %merge, label %leave_1
+
+ merge:
+ %v0 = phi i32 [ %sum, %entry ], [ %v1, %unreachable ]
+ %c1 = icmp slt i32 %y, 0
+ br i1 %c1, label %leave_0, label %leave_0_cond
+
+leave_0_cond:
+ br label %leave_0
+
+leave_0:
+ %v1 = phi i32 [ %v0, %merge ], [ 0, %leave_0_cond ]
+ ret i32 0
+
+leave_1:
+ ret i32 0
+
+unreachable:
+ br label %merge
+}
diff --git a/test/Analysis/ScalarEvolution/trip-count-unknown-stride.ll b/test/Analysis/ScalarEvolution/trip-count-unknown-stride.ll
new file mode 100644
index 000000000000..60370d63e036
--- /dev/null
+++ b/test/Analysis/ScalarEvolution/trip-count-unknown-stride.ll
@@ -0,0 +1,62 @@
+; RUN: opt < %s -analyze -scalar-evolution | FileCheck %s
+
+; ScalarEvolution should be able to compute trip count of the loop by proving
+; that this is not an infinite loop with side effects.
+
+; CHECK: Determining loop execution counts for: @foo1
+; CHECK: backedge-taken count is ((-1 + %n) /u %s)
+
+; We should have a conservative estimate for the max backedge taken count for
+; loops with unknown stride.
+; CHECK: max backedge-taken count is -1
+
+target datalayout = "e-m:e-p:32:32-f64:32:64-f80:32-n8:16:32-S128"
+
+; Function Attrs: norecurse nounwind
+define void @foo1(i32* nocapture %A, i32 %n, i32 %s) #0 {
+entry:
+ %cmp4 = icmp sgt i32 %n, 0
+ br i1 %cmp4, label %for.body, label %for.end
+
+for.body: ; preds = %entry, %for.body
+ %i.05 = phi i32 [ %add, %for.body ], [ 0, %entry ]
+ %arrayidx = getelementptr inbounds i32, i32* %A, i32 %i.05
+ %0 = load i32, i32* %arrayidx, align 4
+ %inc = add nsw i32 %0, 1
+ store i32 %inc, i32* %arrayidx, align 4
+ %add = add nsw i32 %i.05, %s
+ %cmp = icmp slt i32 %add, %n
+ br i1 %cmp, label %for.body, label %for.end
+
+for.end: ; preds = %for.body, %entry
+ ret void
+}
+
+
+; Check that we are able to compute trip count of a loop without an entry guard.
+; CHECK: Determining loop execution counts for: @foo2
+; CHECK: backedge-taken count is ((-1 + (%n smax %s)) /u %s)
+
+; We should have a conservative estimate for the max backedge taken count for
+; loops with unknown stride.
+; CHECK: max backedge-taken count is -1
+
+; Function Attrs: norecurse nounwind
+define void @foo2(i32* nocapture %A, i32 %n, i32 %s) #0 {
+entry:
+ br label %for.body
+
+for.body: ; preds = %entry, %for.body
+ %i.05 = phi i32 [ %add, %for.body ], [ 0, %entry ]
+ %arrayidx = getelementptr inbounds i32, i32* %A, i32 %i.05
+ %0 = load i32, i32* %arrayidx, align 4
+ %inc = add nsw i32 %0, 1
+ store i32 %inc, i32* %arrayidx, align 4
+ %add = add nsw i32 %i.05, %s
+ %cmp = icmp slt i32 %add, %n
+ br i1 %cmp, label %for.body, label %for.end
+
+for.end: ; preds = %for.body, %entry
+ ret void
+}
+
diff --git a/test/Analysis/ScalarEvolution/trip-count13.ll b/test/Analysis/ScalarEvolution/trip-count13.ll
index 37ef2fd500a0..3e1009748f1c 100644
--- a/test/Analysis/ScalarEvolution/trip-count13.ll
+++ b/test/Analysis/ScalarEvolution/trip-count13.ll
@@ -14,7 +14,7 @@ loop:
; CHECK-LABEL: Determining loop execution counts for: @u_0
; CHECK-NEXT: Loop %loop: backedge-taken count is (-100 + (-1 * %rhs) + ((100 + %rhs) umax %rhs))
-; CHECK-NEXT: Loop %loop: max backedge-taken count is -1
+; CHECK-NEXT: Loop %loop: max backedge-taken count is -100, actual taken count either this or zero.
leave:
ret void
@@ -34,7 +34,7 @@ loop:
; CHECK-LABEL: Determining loop execution counts for: @u_1
; CHECK-NEXT: Loop %loop: backedge-taken count is ((-1 * %start) + ((-100 + %start) umax %start))
-; CHECK-NEXT: Loop %loop: max backedge-taken count is -1
+; CHECK-NEXT: Loop %loop: max backedge-taken count is -100, actual taken count either this or zero.
leave:
ret void
@@ -54,7 +54,7 @@ loop:
; CHECK-LABEL: Determining loop execution counts for: @s_0
; CHECK-NEXT: Loop %loop: backedge-taken count is (-100 + (-1 * %rhs) + ((100 + %rhs) smax %rhs))
-; CHECK-NEXT: Loop %loop: max backedge-taken count is -1
+; CHECK-NEXT: Loop %loop: max backedge-taken count is -100, actual taken count either this or zero.
leave:
ret void
@@ -74,7 +74,7 @@ loop:
; CHECK-LABEL: Determining loop execution counts for: @s_1
; CHECK-NEXT: Loop %loop: backedge-taken count is ((-1 * %start) + ((-100 + %start) smax %start))
-; CHECK-NEXT: Loop %loop: max backedge-taken count is -1
+; CHECK-NEXT: Loop %loop: max backedge-taken count is -100, actual taken count either this or zero.
leave:
ret void
diff --git a/test/Analysis/ScalarEvolution/trip-count14.ll b/test/Analysis/ScalarEvolution/trip-count14.ll
new file mode 100644
index 000000000000..0f935d749836
--- /dev/null
+++ b/test/Analysis/ScalarEvolution/trip-count14.ll
@@ -0,0 +1,177 @@
+; RUN: opt -S -analyze -scalar-evolution < %s | FileCheck %s
+
+define void @s32_max1(i32 %n, i32* %p) {
+entry:
+ %add = add i32 %n, 1
+ br label %do.body
+
+do.body:
+ %i.0 = phi i32 [ %n, %entry ], [ %inc, %do.body ]
+ %arrayidx = getelementptr i32, i32* %p, i32 %i.0
+ store i32 %i.0, i32* %arrayidx, align 4
+ %inc = add i32 %i.0, 1
+ %cmp = icmp slt i32 %i.0, %add
+ br i1 %cmp, label %do.body, label %do.end ; taken either 0 or 1 times
+
+; CHECK-LABEL: Determining loop execution counts for: @s32_max1
+; CHECK-NEXT: Loop %do.body: backedge-taken count is ((-1 * %n) + ((1 + %n) smax %n))
+; CHECK-NEXT: Loop %do.body: max backedge-taken count is 1, actual taken count either this or zero.
+
+do.end:
+ ret void
+}
+
+define void @s32_max2(i32 %n, i32* %p) {
+entry:
+ %add = add i32 %n, 2
+ br label %do.body
+
+do.body:
+ %i.0 = phi i32 [ %n, %entry ], [ %inc, %do.body ]
+ %arrayidx = getelementptr i32, i32* %p, i32 %i.0
+ store i32 %i.0, i32* %arrayidx, align 4
+ %inc = add i32 %i.0, 1
+ %cmp = icmp slt i32 %i.0, %add
+ br i1 %cmp, label %do.body, label %do.end ; taken either 0 or 2 times
+
+; CHECK-LABEL: Determining loop execution counts for: @s32_max2
+; CHECK-NEXT: Loop %do.body: backedge-taken count is ((-1 * %n) + ((2 + %n) smax %n))
+; CHECK-NEXT: Loop %do.body: max backedge-taken count is 2, actual taken count either this or zero.
+
+do.end:
+ ret void
+}
+
+define void @s32_maxx(i32 %n, i32 %x, i32* %p) {
+entry:
+ %add = add i32 %x, %n
+ br label %do.body
+
+do.body:
+ %i.0 = phi i32 [ %n, %entry ], [ %inc, %do.body ]
+ %arrayidx = getelementptr i32, i32* %p, i32 %i.0
+ store i32 %i.0, i32* %arrayidx, align 4
+ %inc = add i32 %i.0, 1
+ %cmp = icmp slt i32 %i.0, %add
+ br i1 %cmp, label %do.body, label %do.end ; taken either 0 or x times
+
+; CHECK-LABEL: Determining loop execution counts for: @s32_maxx
+; CHECK-NEXT: Loop %do.body: backedge-taken count is ((-1 * %n) + ((%n + %x) smax %n))
+; CHECK-NEXT: Loop %do.body: max backedge-taken count is -1{{$}}
+
+do.end:
+ ret void
+}
+
+define void @s32_max2_unpredictable_exit(i32 %n, i32 %x, i32* %p) {
+entry:
+ %add = add i32 %n, 2
+ br label %do.body
+
+do.body:
+ %i.0 = phi i32 [ %n, %entry ], [ %inc, %if.end ]
+ %cmp = icmp eq i32 %i.0, %x
+ br i1 %cmp, label %do.end, label %if.end ; unpredictable
+
+if.end:
+ %arrayidx = getelementptr i32, i32* %p, i32 %i.0
+ store i32 %i.0, i32* %arrayidx, align 4
+ %inc = add i32 %i.0, 1
+ %cmp1 = icmp slt i32 %i.0, %add
+ br i1 %cmp1, label %do.body, label %do.end ; taken either 0 or 2 times
+
+; CHECK-LABEL: Determining loop execution counts for: @s32_max2_unpredictable_exit
+; CHECK-NEXT: Loop %do.body: <multiple exits> Unpredictable backedge-taken count.
+; CHECK-NEXT: Loop %do.body: max backedge-taken count is 2{{$}}
+
+do.end:
+ ret void
+}
+
+define void @u32_max1(i32 %n, i32* %p) {
+entry:
+ %add = add i32 %n, 1
+ br label %do.body
+
+do.body:
+ %i.0 = phi i32 [ %n, %entry ], [ %inc, %do.body ]
+ %arrayidx = getelementptr i32, i32* %p, i32 %i.0
+ store i32 %i.0, i32* %arrayidx, align 4
+ %inc = add i32 %i.0, 1
+ %cmp = icmp ult i32 %i.0, %add
+ br i1 %cmp, label %do.body, label %do.end ; taken either 0 or 1 times
+
+; CHECK-LABEL: Determining loop execution counts for: @u32_max1
+; CHECK-NEXT: Loop %do.body: backedge-taken count is ((-1 * %n) + ((1 + %n) umax %n))
+; CHECK-NEXT: Loop %do.body: max backedge-taken count is 1, actual taken count either this or zero.
+
+do.end:
+ ret void
+}
+
+define void @u32_max2(i32 %n, i32* %p) {
+entry:
+ %add = add i32 %n, 2
+ br label %do.body
+
+do.body:
+ %i.0 = phi i32 [ %n, %entry ], [ %inc, %do.body ]
+ %arrayidx = getelementptr i32, i32* %p, i32 %i.0
+ store i32 %i.0, i32* %arrayidx, align 4
+ %inc = add i32 %i.0, 1
+ %cmp = icmp ult i32 %i.0, %add
+ br i1 %cmp, label %do.body, label %do.end ; taken either 0 or 2 times
+
+; CHECK-LABEL: Determining loop execution counts for: @u32_max2
+; CHECK-NEXT: Loop %do.body: backedge-taken count is ((-1 * %n) + ((2 + %n) umax %n))
+; CHECK-NEXT: Loop %do.body: max backedge-taken count is 2, actual taken count either this or zero.
+
+do.end:
+ ret void
+}
+
+define void @u32_maxx(i32 %n, i32 %x, i32* %p) {
+entry:
+ %add = add i32 %x, %n
+ br label %do.body
+
+do.body:
+ %i.0 = phi i32 [ %n, %entry ], [ %inc, %do.body ]
+ %arrayidx = getelementptr i32, i32* %p, i32 %i.0
+ store i32 %i.0, i32* %arrayidx, align 4
+ %inc = add i32 %i.0, 1
+ %cmp = icmp ult i32 %i.0, %add
+ br i1 %cmp, label %do.body, label %do.end ; taken either 0 or x times
+
+; CHECK-LABEL: Determining loop execution counts for: @u32_maxx
+; CHECK-NEXT: Loop %do.body: backedge-taken count is ((-1 * %n) + ((%n + %x) umax %n))
+; CHECK-NEXT: Loop %do.body: max backedge-taken count is -1{{$}}
+
+do.end:
+ ret void
+}
+
+define void @u32_max2_unpredictable_exit(i32 %n, i32 %x, i32* %p) {
+entry:
+ %add = add i32 %n, 2
+ br label %do.body
+
+do.body:
+ %i.0 = phi i32 [ %n, %entry ], [ %inc, %if.end ]
+ %cmp = icmp eq i32 %i.0, %x
+ br i1 %cmp, label %do.end, label %if.end ; unpredictable
+
+if.end:
+ %arrayidx = getelementptr i32, i32* %p, i32 %i.0
+ store i32 %i.0, i32* %arrayidx, align 4
+ %inc = add i32 %i.0, 1
+ %cmp1 = icmp ult i32 %i.0, %add
+ br i1 %cmp1, label %do.body, label %do.end ; taken either 0 or 2 times
+
+; CHECK-LABEL: Determining loop execution counts for: @u32_max2_unpredictable_exit
+; CHECK-NEXT: Loop %do.body: <multiple exits> Unpredictable backedge-taken count.
+; CHECK-NEXT: Loop %do.body: max backedge-taken count is 2{{$}}
+
+do.end:
+ ret void
+}
diff --git a/test/Analysis/ScalarEvolution/trip-count5.ll b/test/Analysis/ScalarEvolution/trip-count5.ll
index dc02fedd1342..f3ca343da6f1 100644
--- a/test/Analysis/ScalarEvolution/trip-count5.ll
+++ b/test/Analysis/ScalarEvolution/trip-count5.ll
@@ -1,6 +1,4 @@
-; RUN: opt < %s -analyze -scalar-evolution > %t
-; RUN: grep sext %t | count 2
-; RUN: not grep "(sext" %t
+; RUN: opt < %s -analyze -scalar-evolution | FileCheck %s
; ScalarEvolution should be able to compute a maximum trip count
; value sufficient to fold away both sext casts.
@@ -8,41 +6,46 @@
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
define float @t(float* %pTmp1, float* %peakWeight, float* %nrgReducePeakrate, i32 %bim) nounwind {
+; CHECK-LABEL: Classifying expressions for: @t
entry:
- %tmp3 = load float, float* %peakWeight, align 4 ; <float> [#uses=2]
- %tmp2538 = icmp sgt i32 %bim, 0 ; <i1> [#uses=1]
+ %tmp3 = load float, float* %peakWeight, align 4
+ %tmp2538 = icmp sgt i32 %bim, 0
br i1 %tmp2538, label %bb.nph, label %bb4
-bb.nph: ; preds = %entry
+bb.nph:
br label %bb
-bb: ; preds = %bb1, %bb.nph
- %distERBhi.036 = phi float [ %tmp10, %bb1 ], [ 0.000000e+00, %bb.nph ] ; <float> [#uses=1]
- %hiPart.035 = phi i32 [ %tmp12, %bb1 ], [ 0, %bb.nph ] ; <i32> [#uses=2]
- %peakCount.034 = phi float [ %tmp19, %bb1 ], [ %tmp3, %bb.nph ] ; <float> [#uses=1]
- %tmp6 = sext i32 %hiPart.035 to i64 ; <i64> [#uses=1]
- %tmp7 = getelementptr float, float* %pTmp1, i64 %tmp6 ; <float*> [#uses=1]
- %tmp8 = load float, float* %tmp7, align 4 ; <float> [#uses=1]
- %tmp10 = fadd float %tmp8, %distERBhi.036 ; <float> [#uses=3]
- %tmp12 = add i32 %hiPart.035, 1 ; <i32> [#uses=3]
- %tmp15 = sext i32 %tmp12 to i64 ; <i64> [#uses=1]
- %tmp16 = getelementptr float, float* %peakWeight, i64 %tmp15 ; <float*> [#uses=1]
- %tmp17 = load float, float* %tmp16, align 4 ; <float> [#uses=1]
- %tmp19 = fadd float %tmp17, %peakCount.034 ; <float> [#uses=2]
+bb:
+ %distERBhi.036 = phi float [ %tmp10, %bb1 ], [ 0.000000e+00, %bb.nph ]
+ %hiPart.035 = phi i32 [ %tmp12, %bb1 ], [ 0, %bb.nph ]
+ %peakCount.034 = phi float [ %tmp19, %bb1 ], [ %tmp3, %bb.nph ]
+ %tmp6 = sext i32 %hiPart.035 to i64
+ %tmp7 = getelementptr float, float* %pTmp1, i64 %tmp6
+; CHECK: %tmp6 = sext i32 %hiPart.035 to i64
+; CHECK-NEXT: --> {0,+,1}<nuw><nsw><%bb>
+ %tmp8 = load float, float* %tmp7, align 4
+ %tmp10 = fadd float %tmp8, %distERBhi.036
+ %tmp12 = add i32 %hiPart.035, 1
+ %tmp15 = sext i32 %tmp12 to i64
+ %tmp16 = getelementptr float, float* %peakWeight, i64 %tmp15
+; CHECK: %tmp15 = sext i32 %tmp12 to i64
+; CHECK-NEXT: --> {1,+,1}<nuw><nsw><%bb>
+ %tmp17 = load float, float* %tmp16, align 4
+ %tmp19 = fadd float %tmp17, %peakCount.034
br label %bb1
-bb1: ; preds = %bb
- %tmp21 = fcmp olt float %tmp10, 2.500000e+00 ; <i1> [#uses=1]
- %tmp25 = icmp slt i32 %tmp12, %bim ; <i1> [#uses=1]
- %tmp27 = and i1 %tmp21, %tmp25 ; <i1> [#uses=1]
+bb1:
+ %tmp21 = fcmp olt float %tmp10, 2.500000e+00
+ %tmp25 = icmp slt i32 %tmp12, %bim
+ %tmp27 = and i1 %tmp21, %tmp25
br i1 %tmp27, label %bb, label %bb1.bb4_crit_edge
-bb1.bb4_crit_edge: ; preds = %bb1
+bb1.bb4_crit_edge:
br label %bb4
-bb4: ; preds = %bb1.bb4_crit_edge, %entry
- %distERBhi.0.lcssa = phi float [ %tmp10, %bb1.bb4_crit_edge ], [ 0.000000e+00, %entry ] ; <float> [#uses=1]
- %peakCount.0.lcssa = phi float [ %tmp19, %bb1.bb4_crit_edge ], [ %tmp3, %entry ] ; <float> [#uses=1]
- %tmp31 = fdiv float %peakCount.0.lcssa, %distERBhi.0.lcssa ; <float> [#uses=1]
+bb4:
+ %distERBhi.0.lcssa = phi float [ %tmp10, %bb1.bb4_crit_edge ], [ 0.000000e+00, %entry ]
+ %peakCount.0.lcssa = phi float [ %tmp19, %bb1.bb4_crit_edge ], [ %tmp3, %entry ]
+ %tmp31 = fdiv float %peakCount.0.lcssa, %distERBhi.0.lcssa
ret float %tmp31
}
diff --git a/test/Analysis/TypeBasedAliasAnalysis/aliastest.ll b/test/Analysis/TypeBasedAliasAnalysis/aliastest.ll
index 93c34f9503ce..3bdd9d8a2ef5 100644
--- a/test/Analysis/TypeBasedAliasAnalysis/aliastest.ll
+++ b/test/Analysis/TypeBasedAliasAnalysis/aliastest.ll
@@ -63,5 +63,6 @@ define i8 @test1_no(i8* %a, i8* %b) nounwind {
!7 = !{ !"foo", !0 }
!8 = !{ !"bar", !0 }
!9 = !{ !"foo", !0 }
-!10 = !{ !"bar", !"different" }
+!10 = !{ !"bar", !12 }
!11 = !{ !"qux", !0}
+!12 = !{!"different"}
diff --git a/test/Analysis/TypeBasedAliasAnalysis/cyclic.ll b/test/Analysis/TypeBasedAliasAnalysis/cyclic.ll
index a88e26c5cd52..f50f870c53b1 100644
--- a/test/Analysis/TypeBasedAliasAnalysis/cyclic.ll
+++ b/test/Analysis/TypeBasedAliasAnalysis/cyclic.ll
@@ -1,5 +1,5 @@
; RUN: not opt -instcombine < %s 2>&1 | FileCheck %s
-; CHECK: Cycle found in TBAA metadata.
+; CHECK: Access type node must be a valid scalar type
define void @test6(i32* %gi) #0 {
entry:
diff --git a/test/Analysis/TypeBasedAliasAnalysis/dse.ll b/test/Analysis/TypeBasedAliasAnalysis/dse.ll
index b6dc9b298eb0..8c51e99e3101 100644
--- a/test/Analysis/TypeBasedAliasAnalysis/dse.ll
+++ b/test/Analysis/TypeBasedAliasAnalysis/dse.ll
@@ -68,5 +68,6 @@ define i8 @test1_no(i8* %a, i8* %b) nounwind {
!7 = !{ !"foo", !0 }
!8 = !{ !"bar", !0 }
!9 = !{ !"foo", !0 }
-!10 = !{ !"bar", !"different" }
+!10 = !{ !"bar", !12}
!11 = !{ !"qux", !0}
+!12 = !{!"different"}
diff --git a/test/Analysis/TypeBasedAliasAnalysis/dynamic-indices.ll b/test/Analysis/TypeBasedAliasAnalysis/dynamic-indices.ll
index afc83c9f4f57..455968d7a401 100644
--- a/test/Analysis/TypeBasedAliasAnalysis/dynamic-indices.ll
+++ b/test/Analysis/TypeBasedAliasAnalysis/dynamic-indices.ll
@@ -127,7 +127,7 @@ for.end: ; preds = %for.body
; CHECK: [[TYPE_LL]] = !{!"long long", {{!.*}}}
!0 = !{!6, !6, i64 0}
!1 = !{!"omnipotent char", !2}
-!2 = !{!"Simple C/C++ TBAA", null}
+!2 = !{!"Simple C/C++ TBAA"}
!3 = !{!7, !7, i64 0}
!4 = !{!8, !8, i64 0}
!5 = !{!9, !9, i64 0}
diff --git a/test/Analysis/TypeBasedAliasAnalysis/gvn-nonlocal-type-mismatch.ll b/test/Analysis/TypeBasedAliasAnalysis/gvn-nonlocal-type-mismatch.ll
index aaa43a460900..1b541a528436 100644
--- a/test/Analysis/TypeBasedAliasAnalysis/gvn-nonlocal-type-mismatch.ll
+++ b/test/Analysis/TypeBasedAliasAnalysis/gvn-nonlocal-type-mismatch.ll
@@ -91,5 +91,6 @@ if.else:
!4 = !{!8, !8, i64 0}
!5 = !{!"red", !0}
!6 = !{!"blu", !0}
-!7 = !{!"outer space"}
+!7 = !{!"outer space", !9}
!8 = !{!"brick red", !5}
+!9 = !{!"observable universe"}
diff --git a/test/Analysis/TypeBasedAliasAnalysis/intrinsics.ll b/test/Analysis/TypeBasedAliasAnalysis/intrinsics.ll
index 197ef7e5196f..eab314eaa9c2 100644
--- a/test/Analysis/TypeBasedAliasAnalysis/intrinsics.ll
+++ b/test/Analysis/TypeBasedAliasAnalysis/intrinsics.ll
@@ -26,7 +26,7 @@ declare void @llvm.arm.neon.vst1.p0i8.v8i16(i8*, <8 x i16>, i32) nounwind
; CHECK: attributes #1 = { argmemonly nounwind }
; CHECK: attributes [[NUW]] = { nounwind }
-!0 = !{!"tbaa root", null}
+!0 = !{!"tbaa root"}
!1 = !{!3, !3, i64 0}
!2 = !{!4, !4, i64 0}
!3 = !{!"A", !0}
diff --git a/test/Analysis/TypeBasedAliasAnalysis/licm.ll b/test/Analysis/TypeBasedAliasAnalysis/licm.ll
index d2aee58204df..03c64fff0969 100644
--- a/test/Analysis/TypeBasedAliasAnalysis/licm.ll
+++ b/test/Analysis/TypeBasedAliasAnalysis/licm.ll
@@ -29,7 +29,7 @@ for.end: ; preds = %for.body, %entry
ret void
}
-!0 = !{!"root", null}
+!0 = !{!"root"}
!1 = !{!6, !6, i64 0}
!2 = !{!7, !7, i64 0}
@@ -58,8 +58,9 @@ loop:
!3 = !{!"pointer", !8}
!4 = !{!8, !8, i64 0}
-!5 = !{!9, !9, i64 0}
+!5 = !{!10, !10, i64 0}
!6 = !{!"pointer", !0}
!7 = !{!"double", !0}
!8 = !{!"char", !9}
-!9 = !{!"root", null}
+!9 = !{!"root"}
+!10 = !{!"scalar-type", !9}
diff --git a/test/Analysis/TypeBasedAliasAnalysis/memcpyopt.ll b/test/Analysis/TypeBasedAliasAnalysis/memcpyopt.ll
index 9fc9e42fc6cb..64e35788429b 100644
--- a/test/Analysis/TypeBasedAliasAnalysis/memcpyopt.ll
+++ b/test/Analysis/TypeBasedAliasAnalysis/memcpyopt.ll
@@ -20,7 +20,7 @@ declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i32,
; CHECK: [[TAGA]] = !{[[TYPEA:!.*]], [[TYPEA]], i64 0}
; CHECK: [[TYPEA]] = !{!"A", !{{.*}}}
-!0 = !{!"tbaa root", null}
+!0 = !{!"tbaa root"}
!1 = !{!3, !3, i64 0}
!2 = !{!4, !4, i64 0}
!3 = !{!"A", !0}
diff --git a/test/Analysis/ValueTracking/dereferenceable-and-aligned.ll b/test/Analysis/ValueTracking/dereferenceable-and-aligned.ll
new file mode 100644
index 000000000000..8af0548db111
--- /dev/null
+++ b/test/Analysis/ValueTracking/dereferenceable-and-aligned.ll
@@ -0,0 +1,21 @@
+; RUN: opt < %s -licm -S | FileCheck %s
+
+target datalayout = "e-p:32:32-p1:64:64-p4:64:64"
+
+; Make sure isDereferenceableAndAlignePointer() doesn't crash when looking
+; walking pointer defs with an addrspacecast that changes pointer size.
+; CHECK-LABEL: @addrspacecast_crash
+define void @addrspacecast_crash() {
+bb:
+ %tmp = alloca [256 x i32]
+ br label %bb1
+
+bb1:
+ %tmp2 = getelementptr inbounds [256 x i32], [256 x i32]* %tmp, i32 0, i32 36
+ %tmp3 = bitcast i32* %tmp2 to <4 x i32>*
+ %tmp4 = addrspacecast <4 x i32>* %tmp3 to <4 x i32> addrspace(4)*
+ %tmp5 = load <4 x i32>, <4 x i32> addrspace(4)* %tmp4
+ %tmp6 = xor <4 x i32> %tmp5, undef
+ store <4 x i32> %tmp6, <4 x i32> addrspace(1)* undef
+ br label %bb1
+}
diff --git a/test/Analysis/ValueTracking/get-pointer-base-with-const-off.ll b/test/Analysis/ValueTracking/get-pointer-base-with-const-off.ll
new file mode 100644
index 000000000000..6752b08f0005
--- /dev/null
+++ b/test/Analysis/ValueTracking/get-pointer-base-with-const-off.ll
@@ -0,0 +1,26 @@
+; RUN: opt -gvn -S < %s | FileCheck %s
+
+; Make sure we don't crash when analyzing an addrspacecast in
+; GetPointerBaseWithConstantOffset()
+
+target datalayout = "e-p:32:32-p4:64:64"
+
+define i32 @addrspacecast-crash() {
+; CHECK-LABEL: @addrspacecast-crash
+; CHECK: %tmp = alloca [25 x i64]
+; CHECK: %tmp1 = getelementptr inbounds [25 x i64], [25 x i64]* %tmp, i32 0, i32 0
+; CHECK: %tmp2 = addrspacecast i64* %tmp1 to <8 x i64> addrspace(4)*
+; CHECK: store <8 x i64> zeroinitializer, <8 x i64> addrspace(4)* %tmp2
+; CHECK-NOT: load
+bb:
+ %tmp = alloca [25 x i64]
+ %tmp1 = getelementptr inbounds [25 x i64], [25 x i64]* %tmp, i32 0, i32 0
+ %tmp2 = addrspacecast i64* %tmp1 to <8 x i64> addrspace(4)*
+ %tmp3 = getelementptr inbounds <8 x i64>, <8 x i64> addrspace(4)* %tmp2, i64 0
+ store <8 x i64> zeroinitializer, <8 x i64> addrspace(4)* %tmp3
+ %tmp4 = getelementptr inbounds [25 x i64], [25 x i64]* %tmp, i32 0, i32 0
+ %tmp5 = addrspacecast i64* %tmp4 to i32 addrspace(4)*
+ %tmp6 = getelementptr inbounds i32, i32 addrspace(4)* %tmp5, i64 10
+ %tmp7 = load i32, i32 addrspace(4)* %tmp6, align 4
+ ret i32 %tmp7
+}
diff --git a/test/Analysis/ValueTracking/known-nonnull-at.ll b/test/Analysis/ValueTracking/known-nonnull-at.ll
new file mode 100644
index 000000000000..8a0d1f3aff3b
--- /dev/null
+++ b/test/Analysis/ValueTracking/known-nonnull-at.ll
@@ -0,0 +1,57 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt -S -instsimplify < %s | FileCheck %s
+
+declare void @bar(i8* %a, i8* nonnull %b)
+
+; 'y' must be nonnull.
+
+define i1 @caller1(i8* %x, i8* %y) {
+; CHECK-LABEL: @caller1(
+; CHECK-NEXT: call void @bar(i8* %x, i8* %y)
+; CHECK-NEXT: [[NULL_CHECK:%.*]] = icmp eq i8* %y, null
+; CHECK-NEXT: ret i1 [[NULL_CHECK]]
+;
+ call void @bar(i8* %x, i8* %y)
+ %null_check = icmp eq i8* %y, null
+ ret i1 %null_check
+}
+
+; Don't know anything about 'y'.
+
+define i1 @caller2(i8* %x, i8* %y) {
+; CHECK-LABEL: @caller2(
+; CHECK-NEXT: call void @bar(i8* %y, i8* %x)
+; CHECK-NEXT: [[NULL_CHECK:%.*]] = icmp eq i8* %y, null
+; CHECK-NEXT: ret i1 [[NULL_CHECK]]
+;
+ call void @bar(i8* %y, i8* %x)
+ %null_check = icmp eq i8* %y, null
+ ret i1 %null_check
+}
+
+; 'y' must be nonnull.
+
+define i1 @caller3(i8* %x, i8* %y) {
+; CHECK-LABEL: @caller3(
+; CHECK-NEXT: call void @bar(i8* %x, i8* %y)
+; CHECK-NEXT: [[NULL_CHECK:%.*]] = icmp ne i8* %y, null
+; CHECK-NEXT: ret i1 [[NULL_CHECK]]
+;
+ call void @bar(i8* %x, i8* %y)
+ %null_check = icmp ne i8* %y, null
+ ret i1 %null_check
+}
+
+; Don't know anything about 'y'.
+
+define i1 @caller4(i8* %x, i8* %y) {
+; CHECK-LABEL: @caller4(
+; CHECK-NEXT: call void @bar(i8* %y, i8* %x)
+; CHECK-NEXT: [[NULL_CHECK:%.*]] = icmp ne i8* %y, null
+; CHECK-NEXT: ret i1 [[NULL_CHECK]]
+;
+ call void @bar(i8* %y, i8* %x)
+ %null_check = icmp ne i8* %y, null
+ ret i1 %null_check
+}
+
diff --git a/test/Analysis/ValueTracking/known-signbit-shift.ll b/test/Analysis/ValueTracking/known-signbit-shift.ll
new file mode 100644
index 000000000000..bf984cb7474a
--- /dev/null
+++ b/test/Analysis/ValueTracking/known-signbit-shift.ll
@@ -0,0 +1,55 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+; Result of left shifting a non-negative integer
+; with nsw flag should also be non-negative
+define i1 @test_shift_nonnegative(i32 %a) {
+; CHECK-LABEL: @test_shift_nonnegative(
+; CHECK-NEXT: ret i1 true
+;
+ %b = lshr i32 %a, 2
+ %shift = shl nsw i32 %b, 3
+ %cmp = icmp sge i32 %shift, 0
+ ret i1 %cmp
+}
+
+; Result of left shifting a negative integer with
+; nsw flag should also be negative
+define i1 @test_shift_negative(i32 %a, i32 %b) {
+; CHECK-LABEL: @test_shift_negative(
+; CHECK-NEXT: ret i1 true
+;
+ %c = or i32 %a, -2147483648
+ %d = and i32 %b, 7
+ %shift = shl nsw i32 %c, %d
+ %cmp = icmp slt i32 %shift, 0
+ ret i1 %cmp
+}
+
+; If sign bit is a known zero, it cannot be a known one.
+; This test should not crash opt.
+define i32 @test_no_sign_bit_conflict1(i1 %b) {
+; CHECK-LABEL: @test_no_sign_bit_conflict1(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[SEL:%.*]] = select i1 %b, i32 -2147221504, i32 -2147483648
+; CHECK-NEXT: ret i32 [[SEL]]
+;
+entry:
+ %sel = select i1 %b, i32 8193, i32 8192
+ %mul = shl nsw i32 %sel, 18
+ ret i32 %mul
+}
+
+; If sign bit is a known one, it cannot be a known zero.
+; This test should not crash opt.
+define i32 @test_no_sign_bit_conflict2(i1 %b) {
+; CHECK-LABEL: @test_no_sign_bit_conflict2(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[SEL:%.*]] = select i1 %b, i32 2147221504, i32 2146959360
+; CHECK-NEXT: ret i32 [[SEL]]
+;
+entry:
+ %sel = select i1 %b, i32 -8193, i32 -8194
+ %mul = shl nsw i32 %sel, 18
+ ret i32 %mul
+}
diff --git a/test/Analysis/ValueTracking/knownzero-addrspacecast.ll b/test/Analysis/ValueTracking/knownzero-addrspacecast.ll
new file mode 100644
index 000000000000..94ba209e2072
--- /dev/null
+++ b/test/Analysis/ValueTracking/knownzero-addrspacecast.ll
@@ -0,0 +1,24 @@
+; RUN: opt -instcombine -S < %s | FileCheck %s
+
+; When a pointer is addrspacecasted to a another addr space, we cannot assume
+; anything about the new bits.
+
+target datalayout = "p:32:32-p3:32:32-p4:64:64"
+
+; CHECK-LABEL: @test_shift
+; CHECK-NOT: ret i64 0
+define i64 @test_shift(i8* %p) {
+ %g = addrspacecast i8* %p to i8 addrspace(4)*
+ %i = ptrtoint i8 addrspace(4)* %g to i64
+ %shift = lshr i64 %i, 32
+ ret i64 %shift
+}
+
+; CHECK-LABEL: @test_null
+; A null pointer casted to another addr space may no longer have null value.
+; CHECK-NOT: ret i32 0
+define i32 @test_null() {
+ %g = addrspacecast i8* null to i8 addrspace(3)*
+ %i = ptrtoint i8 addrspace(3)* %g to i32
+ ret i32 %i
+}
diff --git a/test/Analysis/ValueTracking/knownzero-shift.ll b/test/Analysis/ValueTracking/knownzero-shift.ll
index 835d87a9d9c1..4ceb822afa18 100644
--- a/test/Analysis/ValueTracking/knownzero-shift.ll
+++ b/test/Analysis/ValueTracking/knownzero-shift.ll
@@ -1,14 +1,75 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
; RUN: opt -instsimplify -S < %s | FileCheck %s
-; CHECK-LABEL: @test
define i1 @test(i8 %p, i8* %pq) {
+; CHECK-LABEL: @test(
+; CHECK-NEXT: ret i1 false
+;
%q = load i8, i8* %pq, !range !0 ; %q is known nonzero; no known bits
%1 = or i8 %p, 2 ; %1[1] = 1
%2 = and i8 %1, 254 ; %2[0] = 0, %2[1] = 1
%A = lshr i8 %2, 1 ; We should know that %A is nonzero.
%x = icmp eq i8 %A, 0
- ; CHECK: ret i1 false
ret i1 %x
}
!0 = !{ i8 1, i8 5 }
+
+define i32 @shl_shl(i32 %A) {
+; CHECK-LABEL: @shl_shl(
+; CHECK-NEXT: ret i32 0
+;
+ %B = shl i32 %A, 6
+ %C = shl i32 %B, 28
+ ret i32 %C
+}
+
+define <2 x i33> @shl_shl_splat_vec(<2 x i33> %A) {
+; CHECK-LABEL: @shl_shl_splat_vec(
+; CHECK-NEXT: ret <2 x i33> zeroinitializer
+;
+ %B = shl <2 x i33> %A, <i33 5, i33 5>
+ %C = shl <2 x i33> %B, <i33 28, i33 28>
+ ret <2 x i33> %C
+}
+
+; FIXME
+
+define <2 x i33> @shl_shl_vec(<2 x i33> %A) {
+; CHECK-LABEL: @shl_shl_vec(
+; CHECK-NEXT: [[B:%.*]] = shl <2 x i33> %A, <i33 6, i33 5>
+; CHECK-NEXT: [[C:%.*]] = shl <2 x i33> [[B]], <i33 27, i33 28>
+; CHECK-NEXT: ret <2 x i33> [[C]]
+;
+ %B = shl <2 x i33> %A, <i33 6, i33 5>
+ %C = shl <2 x i33> %B, <i33 27, i33 28>
+ ret <2 x i33> %C
+}
+
+define i232 @lshr_lshr(i232 %A) {
+; CHECK-LABEL: @lshr_lshr(
+; CHECK-NEXT: ret i232 0
+;
+ %B = lshr i232 %A, 231
+ %C = lshr i232 %B, 1
+ ret i232 %C
+}
+
+define <2 x i32> @lshr_lshr_splat_vec(<2 x i32> %A) {
+; CHECK-LABEL: @lshr_lshr_splat_vec(
+; CHECK-NEXT: ret <2 x i32> zeroinitializer
+;
+ %B = lshr <2 x i32> %A, <i32 28, i32 28>
+ %C = lshr <2 x i32> %B, <i32 4, i32 4>
+ ret <2 x i32> %C
+}
+
+define <2 x i32> @lshr_lshr_vec(<2 x i32> %A) {
+; CHECK-LABEL: @lshr_lshr_vec(
+; CHECK-NEXT: ret <2 x i32> zeroinitializer
+;
+ %B = lshr <2 x i32> %A, <i32 29, i32 28>
+ %C = lshr <2 x i32> %B, <i32 4, i32 5>
+ ret <2 x i32> %C
+}
+
diff --git a/test/Analysis/ValueTracking/signbits-extract-elt.ll b/test/Analysis/ValueTracking/signbits-extract-elt.ll
new file mode 100644
index 000000000000..e46c01256998
--- /dev/null
+++ b/test/Analysis/ValueTracking/signbits-extract-elt.ll
@@ -0,0 +1,28 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt < %s -instsimplify -S | FileCheck %s
+
+; If computeKnownBits can do a simple look-through for extractelement
+; then instsimplify will know that %elt1 is non-negative at icmp.
+define i1 @computeKnownBits_look_through_extractelt(<2 x i8> %vecin) {
+; CHECK-LABEL: @computeKnownBits_look_through_extractelt(
+; CHECK-NEXT: ret i1 false
+;
+ %vec = zext <2 x i8> %vecin to <2 x i32>
+ %elt1 = extractelement <2 x i32> %vec, i32 1
+ %bool = icmp slt i32 %elt1, 0
+ ret i1 %bool
+}
+
+; If computeNumSignBits can do a simple look-through for extractelement
+; then instsimplify will remove the ashr.
+define i32 @computeNumSignBits_look_through_extractelt(<2 x i1> %vecin) {
+; CHECK-LABEL: @computeNumSignBits_look_through_extractelt(
+; CHECK-NEXT: [[VEC:%.*]] = sext <2 x i1> [[VEC:%.*]]in to <2 x i32>
+; CHECK-NEXT: [[ELT0:%.*]] = extractelement <2 x i32> [[VEC]], i32 0
+; CHECK-NEXT: ret i32 [[ELT0]]
+;
+ %vec = sext <2 x i1> %vecin to <2 x i32>
+ %elt0 = extractelement <2 x i32> %vec, i32 0
+ %ashr = ashr i32 %elt0, 5
+ ret i32 %ashr
+}